v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
asm-scanner.h
Go to the documentation of this file.
1// Copyright 2017 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_ASMJS_ASM_SCANNER_H_
6#define V8_ASMJS_ASM_SCANNER_H_
7
8#include <memory>
9#include <string>
10#include <unordered_map>
11
12#include "src/asmjs/asm-names.h"
13#include "src/base/logging.h"
14#include "src/base/strings.h"
15
16namespace v8 {
17namespace internal {
18
19class Utf16CharacterStream;
20
21// A custom scanner to extract the token stream needed to parse valid
22// asm.js: http://asmjs.org/spec/latest/
23// This scanner intentionally avoids the portion of JavaScript lexing
24// that are not required to determine if code is valid asm.js code.
25// * Strings are disallowed except for 'use asm'.
26// * Only the subset of keywords needed to check asm.js invariants are
27// included.
28// * Identifiers are accumulated into local + global string tables
29// (for performance).
31 public:
32 using token_t = int32_t;
33
34 explicit AsmJsScanner(Utf16CharacterStream* stream);
35
36 // Get current token.
37 token_t Token() const { return token_; }
38 // Get position of current token.
39 size_t Position() const { return position_; }
40 // Advance to the next token.
41 void Next();
42 // Back up by one token.
43 void Rewind();
44
45 // Get raw string for current identifier. Note that the returned string will
46 // become invalid when the scanner advances, create a copy to preserve it.
47 const std::string& GetIdentifierString() const {
48 // Identifier strings don't work after a rewind.
49 DCHECK(!rewind_);
50 return identifier_string_;
51 }
52
53 // Check if we just passed a newline.
54 bool IsPrecededByNewline() const {
55 // Newline tracking doesn't work if you back up.
56 DCHECK(!rewind_);
57 return preceded_by_newline_;
58 }
59
60#if DEBUG
61 // Debug only method to go from a token back to its name.
62 // Slow, only use for debugging.
63 std::string Name(token_t token) const;
64#endif
65
66 // Restores old position (token after that position). Note that it is not
67 // allowed to rewind right after a seek, because previous tokens are unknown.
68 void Seek(size_t pos);
69
70 // Select whether identifiers are resolved in global or local scope,
71 // and which scope new identifiers are added to.
72 void EnterLocalScope() { in_local_scope_ = true; }
73 void EnterGlobalScope() { in_local_scope_ = false; }
74 // Drop all current local identifiers.
75 void ResetLocals();
76
77 // Methods to check if a token is an identifier and which scope.
78 bool IsLocal() const { return IsLocal(Token()); }
79 bool IsGlobal() const { return IsGlobal(Token()); }
80 static bool IsLocal(token_t token) { return token <= kLocalsStart; }
81 static bool IsGlobal(token_t token) { return token >= kGlobalsStart; }
82 // Methods to find the index position of an identifier (count starting from
83 // 0 for each scope separately).
84 static size_t LocalIndex(token_t token) {
85 DCHECK(IsLocal(token));
86 return -(token - kLocalsStart);
87 }
88 static size_t GlobalIndex(token_t token) {
89 DCHECK(IsGlobal(token));
90 return token - kGlobalsStart;
91 }
92
93 // Methods to check if the current token is a numeric literal considered an
94 // asm.js "double" (contains a dot) or an "unsigned" (without a dot). Note
95 // that numbers without a dot outside the [0 .. 2^32) range are errors.
96 bool IsUnsigned() const { return Token() == kUnsigned; }
97 uint32_t AsUnsigned() const {
98 DCHECK(IsUnsigned());
99 return unsigned_value_;
100 }
101 bool IsDouble() const { return Token() == kDouble; }
102 double AsDouble() const {
103 DCHECK(IsDouble());
104 return double_value_;
105 }
106
107 // clang-format off
108 enum {
109 // [-10000-kMaxIdentifierCount, -10000) :: Local identifiers (counting
110 // backwards)
111 // [-10000 .. -1) :: Builtin tokens like keywords
112 // (also includes some special
113 // ones like end of input)
114 // 0 .. 255 :: Single char tokens
115 // 256 .. 256+kMaxIdentifierCount :: Global identifiers
116 kLocalsStart = -10000,
117#define V(name, _junk1, _junk2, _junk3) kToken_##name,
120#undef V
121#define V(name, _junk1) kToken_##name,
123#undef V
124#define V(name) kToken_##name,
127#undef V
128#define V(rawname, name) kToken_##name,
130#undef V
131#define V(name, value, string_name) name = value,
133#undef V
134 kGlobalsStart = 256,
135 };
136 // clang-format on
137
138 static constexpr base::uc32 kEndOfInputU =
139 static_cast<base::uc32>(kEndOfInput);
140
141 private:
145 token_t next_token_; // Only set when in {rewind} state.
146 size_t position_; // Corresponds to {token} position.
147 size_t preceding_position_; // Corresponds to {preceding_token} position.
148 size_t next_position_; // Only set when in {rewind} state.
152 std::unordered_map<std::string, token_t> local_names_;
153 std::unordered_map<std::string, token_t> global_names_;
154 std::unordered_map<std::string, token_t> property_names_;
159
160 // Consume multiple characters.
161 void ConsumeIdentifier(base::uc32 ch);
162 void ConsumeNumber(base::uc32 ch);
163 bool ConsumeCComment();
164 void ConsumeCPPComment();
165 void ConsumeString(base::uc32 quote);
166 void ConsumeCompareOrShift(base::uc32 ch);
167
168 // Classify character categories.
171 bool IsNumberStart(base::uc32 ch);
172};
173
174} // namespace internal
175} // namespace v8
176
177#endif // V8_ASMJS_ASM_SCANNER_H_
#define V(Name)
#define STDLIB_ARRAY_TYPE_LIST(V)
Definition asm-names.h:50
#define STDLIB_MATH_VALUE_LIST(V)
Definition asm-names.h:9
#define STDLIB_MATH_FUNCTION_LIST(V)
Definition asm-names.h:41
#define KEYWORD_NAME_LIST(V)
Definition asm-names.h:66
#define LONG_SYMBOL_NAME_LIST(V)
Definition asm-names.h:87
#define SPECIAL_TOKEN_LIST(V)
Definition asm-names.h:104
#define STDLIB_OTHER_LIST(V)
Definition asm-names.h:60
SourcePosition pos
static size_t GlobalIndex(token_t token)
Definition asm-scanner.h:88
static size_t LocalIndex(token_t token)
Definition asm-scanner.h:84
std::unordered_map< std::string, token_t > local_names_
const std::string & GetIdentifierString() const
Definition asm-scanner.h:47
static bool IsLocal(token_t token)
Definition asm-scanner.h:80
std::unordered_map< std::string, token_t > property_names_
Utf16CharacterStream * stream_
static bool IsGlobal(token_t token)
Definition asm-scanner.h:81
std::unordered_map< std::string, token_t > global_names_
uint32_t AsUnsigned() const
Definition asm-scanner.h:97
bool IsPrecededByNewline() const
Definition asm-scanner.h:54
const int position_
uint32_t uc32
Definition strings.h:19
bool IsIdentifierStart(base::uc32 c)
bool IsIdentifierPart(base::uc32 c)
#define DCHECK(condition)
Definition logging.h:482
#define V8_EXPORT_PRIVATE
Definition macros.h:460