v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
asm-scanner.cc
Go to the documentation of this file.
1// Copyright 2017 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
6
7#include <cinttypes>
8
9#include "src/base/iterator.h"
10#include "src/flags/flags.h"
12#include "src/parsing/scanner.h"
14
15namespace v8 {
16namespace internal {
17
18namespace {
19// Cap number of identifiers to ensure we can assign both global and
20// local ones a token id in the range of an int32_t.
21static const int kMaxIdentifierCount = 0xF000000;
22} // namespace
23
25 : stream_(stream),
26 token_(kUninitialized),
27 preceding_token_(kUninitialized),
28 next_token_(kUninitialized),
29 position_(0),
30 preceding_position_(0),
31 next_position_(0),
32 rewind_(false),
33 in_local_scope_(false),
34 global_count_(0),
35 double_value_(0.0),
36 unsigned_value_(0),
37 preceded_by_newline_(false) {
38#define V(name, _junk1, _junk2, _junk3) property_names_[#name] = kToken_##name;
41#undef V
42#define V(name, _junk1) property_names_[#name] = kToken_##name;
44#undef V
45#define V(name) property_names_[#name] = kToken_##name;
47#undef V
48#define V(name) global_names_[#name] = kToken_##name;
50#undef V
51 Next();
52}
53
55 if (rewind_) {
62 rewind_ = false;
63 return;
64 }
65
66 if (token_ == kEndOfInput || token_ == kParseError) {
67 return;
68 }
69
70#if DEBUG
71 if (v8_flags.trace_asm_scanner) {
72 if (Token() == kDouble) {
73 PrintF("%lf ", AsDouble());
74 } else if (Token() == kUnsigned) {
75 PrintF("%" PRIu32 " ", AsUnsigned());
76 } else {
77 std::string name = Name(Token());
78 PrintF("%s ", name.c_str());
79 }
80 }
81#endif
82
86
87 for (;;) {
90 switch (ch) {
91 case ' ':
92 case '\t':
93 case '\r':
94 // Ignore whitespace.
95 break;
96
97 case '\n':
98 // Track when we've passed a newline for optional semicolon support,
99 // but keep scanning.
101 break;
102
103 case kEndOfInputU:
104 token_ = kEndOfInput;
105 return;
106
107 case '\'':
108 case '"':
109 ConsumeString(ch);
110 return;
111
112 case '/':
113 ch = stream_->Advance();
114 if (ch == '/') {
116 } else if (ch == '*') {
117 if (!ConsumeCComment()) {
118 token_ = kParseError;
119 return;
120 }
121 } else {
122 stream_->Back();
123 token_ = '/';
124 return;
125 }
126 // Breaks out of switch, but loops again (i.e. the case when we parsed
127 // a comment, but need to continue to look for the next token).
128 break;
129
130 case '<':
131 case '>':
132 case '=':
133 case '!':
135 return;
136
137#define V(single_char_token) case single_char_token:
139#undef V
140 // Use fixed token IDs for ASCII.
141 token_ = ch;
142 return;
143
144 default:
145 if (IsIdentifierStart(ch)) {
147 } else if (IsNumberStart(ch)) {
148 ConsumeNumber(ch);
149 } else {
150 // TODO(bradnelson): Support unicode (probably via UnicodeCache).
151 token_ = kParseError;
152 }
153 return;
154 }
155 }
156}
157
160 // TODO(bradnelson): Currently rewinding needs to leave in place the
161 // preceding newline state (in case a |0 ends a line).
162 // This is weird and stateful, fix me.
163 DCHECK(!rewind_);
170 rewind_ = true;
171 identifier_string_.clear();
172}
173
175
176#if DEBUG
177// Only used for debugging.
178std::string AsmJsScanner::Name(token_t token) const {
179 if (token >= 32 && token < 127) {
180 return std::string(1, static_cast<char>(token));
181 }
182 for (auto& i : local_names_) {
183 if (i.second == token) {
184 return i.first;
185 }
186 }
187 for (auto& i : global_names_) {
188 if (i.second == token) {
189 return i.first;
190 }
191 }
192 for (auto& i : property_names_) {
193 if (i.second == token) {
194 return i.first;
195 }
196 }
197 switch (token) {
198#define V(rawname, name) \
199 case kToken_##name: \
200 return rawname;
202#undef V
203#define V(name, value, string_name) \
204 case name: \
205 return string_name;
207 default:
208 break;
209#undef V
210 }
211 UNREACHABLE();
212}
213#endif
214
226
228 // Consume characters while still part of the identifier.
229 identifier_string_.clear();
230 while (IsIdentifierPart(ch)) {
231 identifier_string_ += ch;
232 ch = stream_->Advance();
233 }
234 // Go back one for next time.
235 stream_->Back();
236
237 // Decode what the identifier means.
238 if (preceding_token_ == '.') {
240 if (i != property_names_.end()) {
241 token_ = i->second;
242 return;
243 }
244 } else {
245 {
246 auto i = local_names_.find(identifier_string_);
247 if (i != local_names_.end()) {
248 token_ = i->second;
249 return;
250 }
251 }
252 if (!in_local_scope_) {
254 if (i != global_names_.end()) {
255 token_ = i->second;
256 return;
257 }
258 }
259 }
260 if (preceding_token_ == '.') {
261 CHECK_LT(global_count_, kMaxIdentifierCount);
264 } else if (in_local_scope_) {
265 CHECK_LT(local_names_.size(), kMaxIdentifierCount);
266 token_ = kLocalsStart - static_cast<token_t>(local_names_.size());
268 } else {
269 CHECK_LT(global_count_, kMaxIdentifierCount);
272 }
273}
274
275namespace {
276bool IsValidImplicitOctal(std::string_view number) {
277 DCHECK_EQ(number[0], '0');
278 return std::all_of(number.begin() + 1, number.end(), IsOctalDigit);
279}
280} // namespace
281
283 std::string number;
284 number.assign(1, ch);
285 bool has_dot = ch == '.';
286 bool has_prefix = false;
287 for (;;) {
288 ch = stream_->Advance();
289 if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') ||
290 (ch >= 'A' && ch <= 'F') || ch == '.' || ch == 'b' || ch == 'o' ||
291 ch == 'x' ||
292 ((ch == '-' || ch == '+') && !has_prefix &&
293 (number[number.size() - 1] == 'e' ||
294 number[number.size() - 1] == 'E'))) {
295 // TODO(bradnelson): Test weird cases ending in -.
296 if (ch == '.') {
297 has_dot = true;
298 }
299 if (ch == 'b' || ch == 'o' || ch == 'x') {
300 has_prefix = true;
301 }
302 number.push_back(ch);
303 } else {
304 break;
305 }
306 }
307 stream_->Back();
308 // Special case the most common number.
309 if (number.size() == 1 && number[0] == '0') {
310 unsigned_value_ = 0;
311 token_ = kUnsigned;
312 return;
313 }
314 // Pick out dot.
315 if (number.size() == 1 && number[0] == '.') {
316 token_ = '.';
317 return;
318 }
319 // Decode numbers, with seperate paths for prefixes and implicit octals.
320 if (has_prefix && number[0] == '0') {
321 // "0[xob]" by itself is a parse error.
322 if (number.size() <= 2) {
323 token_ = kParseError;
324 return;
325 }
326 switch (number[1]) {
327 case 'b':
330 break;
331 case 'o':
334 break;
335 case 'x':
338 break;
339 default:
340 // If there is a prefix character, but it's not the second character,
341 // then there's a parse error somewhere.
342 token_ = kParseError;
343 break;
344 }
345 } else if (number[0] == '0' && !has_prefix && IsValidImplicitOctal(number)) {
348 } else {
352 }
353 if (std::isnan(double_value_)) {
354 // Check if string to number conversion didn't consume all the characters.
355 // This happens if the character filter let through something invalid
356 // like: 0123ef for example.
357 // TODO(bradnelson): Check if this happens often enough to be a perf
358 // problem.
359 if (number[0] == '.') {
360 for (size_t k = 1; k < number.size(); ++k) {
361 stream_->Back();
362 }
363 token_ = '.';
364 return;
365 }
366 // Anything else that doesn't parse is an error.
367 token_ = kParseError;
368 return;
369 }
370 if (has_dot || trunc(double_value_) != double_value_) {
371 token_ = kDouble;
372 } else {
373 // Exceeding safe integer range is an error.
374 if (double_value_ > static_cast<double>(kMaxUInt32)) {
375 token_ = kParseError;
376 return;
377 }
378 unsigned_value_ = static_cast<uint32_t>(double_value_);
379 token_ = kUnsigned;
380 }
381}
382
384 for (;;) {
385 base::uc32 ch = stream_->Advance();
386 while (ch == '*') {
387 ch = stream_->Advance();
388 if (ch == '/') {
389 return true;
390 }
391 }
392 if (ch == '\n') {
394 }
395 if (ch == kEndOfInputU) {
396 return false;
397 }
398 }
399}
400
402 for (;;) {
403 base::uc32 ch = stream_->Advance();
404 if (ch == '\n') {
406 return;
407 }
408 if (ch == kEndOfInputU) {
409 return;
410 }
411 }
412}
413
415 // Only string allowed is 'use asm' / "use asm".
416 const char* expected = "use asm";
417 for (; *expected != '\0'; ++expected) {
418 if (stream_->Advance() != static_cast<base::uc32>(*expected)) {
419 token_ = kParseError;
420 return;
421 }
422 }
423 if (stream_->Advance() != quote) {
424 token_ = kParseError;
425 return;
426 }
427 token_ = kToken_UseAsm;
428}
429
431 base::uc32 next_ch = stream_->Advance();
432 if (next_ch == '=') {
433 switch (ch) {
434 case '<':
435 token_ = kToken_LE;
436 break;
437 case '>':
438 token_ = kToken_GE;
439 break;
440 case '=':
441 token_ = kToken_EQ;
442 break;
443 case '!':
444 token_ = kToken_NE;
445 break;
446 default:
447 UNREACHABLE();
448 }
449 } else if (ch == '<' && next_ch == '<') {
450 token_ = kToken_SHL;
451 } else if (ch == '>' && next_ch == '>') {
452 if (stream_->Advance() == '>') {
453 token_ = kToken_SHR;
454 } else {
455 token_ = kToken_SAR;
456 stream_->Back();
457 }
458 } else {
459 stream_->Back();
460 token_ = ch;
461 }
462}
463
465 return base::IsInRange(AsciiAlphaToLower(ch), 'a', 'z') || ch == '_' ||
466 ch == '$';
467}
468
472
474 return ch == '.' || IsDecimalDigit(ch);
475}
476
477} // namespace internal
478} // namespace v8
#define STDLIB_ARRAY_TYPE_LIST(V)
Definition asm-names.h:50
#define STDLIB_MATH_VALUE_LIST(V)
Definition asm-names.h:9
#define SIMPLE_SINGLE_TOKEN_LIST(V)
Definition asm-names.h:98
#define STDLIB_MATH_FUNCTION_LIST(V)
Definition asm-names.h:41
#define KEYWORD_NAME_LIST(V)
Definition asm-names.h:66
#define LONG_SYMBOL_NAME_LIST(V)
Definition asm-names.h:87
#define SPECIAL_TOKEN_LIST(V)
Definition asm-names.h:104
#define STDLIB_OTHER_LIST(V)
Definition asm-names.h:60
SourcePosition pos
void ConsumeNumber(base::uc32 ch)
std::unordered_map< std::string, token_t > local_names_
void ConsumeIdentifier(base::uc32 ch)
static constexpr base::uc32 kEndOfInputU
std::unordered_map< std::string, token_t > property_names_
void ConsumeCompareOrShift(base::uc32 ch)
Utf16CharacterStream * stream_
bool IsIdentifierPart(base::uc32 ch)
std::unordered_map< std::string, token_t > global_names_
uint32_t AsUnsigned() const
Definition asm-scanner.h:97
bool IsNumberStart(base::uc32 ch)
void ConsumeString(base::uc32 quote)
AsmJsScanner(Utf16CharacterStream *stream)
bool IsIdentifierStart(base::uc32 ch)
const int position_
uint32_t uc32
Definition strings.h:19
constexpr bool IsInRange(T value, U lower_limit, U higher_limit)
Definition bounds.h:20
constexpr Vector< T > VectorOf(T *start, size_t size)
Definition vector.h:360
double OctalStringToDouble(base::Vector< const uint8_t > str)
void PrintF(const char *format,...)
Definition utils.cc:39
double HexStringToDouble(base::Vector< const uint8_t > str)
constexpr bool IsAsciiIdentifier(base::uc32 c)
double ImplicitOctalStringToDouble(base::Vector< const uint8_t > str)
constexpr bool IsOctalDigit(base::uc32 c)
double BinaryStringToDouble(base::Vector< const uint8_t > str)
constexpr bool IsDecimalDigit(base::uc32 c)
V8_EXPORT_PRIVATE FlagValues v8_flags
constexpr int AsciiAlphaToLower(base::uc32 c)
constexpr uint32_t kMaxUInt32
Definition globals.h:387
double StringToDouble(const char *str, ConversionFlag flags, double empty_string_val)
#define CHECK_LT(lhs, rhs)
#define DCHECK_NE(v1, v2)
Definition logging.h:486
#define DCHECK(condition)
Definition logging.h:482
#define DCHECK_EQ(v1, v2)
Definition logging.h:485