v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
scanner.cc
Go to the documentation of this file.
1// Copyright 2011 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Features shared by parsing and pre-parsing scanners.
6
8
9#include <stdint.h>
10
11#include <cmath>
12#include <optional>
13
15#include "src/base/strings.h"
16#include "src/base/vlq-base64.h"
19#include "src/objects/bigint.h"
22#include "src/zone/zone.h"
23
24namespace v8::internal {
25
27 public:
28 ErrorState(MessageTemplate* message_stack, Scanner::Location* location_stack)
29 : message_stack_(message_stack),
30 old_message_(*message_stack),
31 location_stack_(location_stack),
32 old_location_(*location_stack) {
33 *message_stack_ = MessageTemplate::kNone;
35 }
36
41
42 void MoveErrorTo(TokenDesc* dest) {
43 if (*message_stack_ == MessageTemplate::kNone) {
44 return;
45 }
46 if (dest->invalid_template_escape_message == MessageTemplate::kNone) {
49 }
50 *message_stack_ = MessageTemplate::kNone;
52 }
53
54 private:
59};
60
61// ----------------------------------------------------------------------------
62// Scanner::BookmarkScope
63
65 std::numeric_limits<size_t>::max() - 1;
67 std::numeric_limits<size_t>::max();
68
73
75 DCHECK(HasBeenSet()); // Caller hasn't called SetBookmark.
76 if (had_parser_error_) {
77 scanner_->set_parser_error();
78 } else {
79 scanner_->reset_parser_error_flag();
80 scanner_->SeekNext(bookmark_);
81 }
82 bookmark_ = kBookmarkWasApplied;
83}
84
86 return bookmark_ != kNoBookmark && bookmark_ != kBookmarkWasApplied;
87}
88
90 return bookmark_ == kBookmarkWasApplied;
91}
92
93// ----------------------------------------------------------------------------
94// Scanner
95
104
106 // Need to capture identifiers in order to recognize "get" and "set"
107 // in object literals.
108 Init();
110 Scan();
111}
112
113// static
116 return c == Scanner::Invalid();
117}
118
119template <bool capture_raw, bool unicode>
120base::uc32 Scanner::ScanHexNumber(int expected_length) {
121 DCHECK_LE(expected_length, 4); // prevent overflow
122
123 int begin = source_pos() - 2;
124 base::uc32 x = 0;
125 for (int i = 0; i < expected_length; i++) {
126 int d = base::HexValue(c0_);
127 if (d < 0) {
128 ReportScannerError(Location(begin, begin + expected_length + 2),
129 unicode
130 ? MessageTemplate::kInvalidUnicodeEscapeSequence
131 : MessageTemplate::kInvalidHexEscapeSequence);
132 return Invalid();
133 }
134 x = x * 16 + d;
136 }
137
138 return x;
139}
140
141template <bool capture_raw>
143 int beg_pos) {
144 base::uc32 x = 0;
145 int d = base::HexValue(c0_);
146 if (d < 0) return Invalid();
147
148 while (d >= 0) {
149 x = x * 16 + d;
150 if (x > max_value) {
151 ReportScannerError(Location(beg_pos, source_pos() + 1),
152 MessageTemplate::kUndefinedUnicodeCodePoint);
153 return Invalid();
154 }
156 d = base::HexValue(c0_);
157 }
158
159 return x;
160}
161
163 // Rotate through tokens.
165 current_ = next_;
166 // Either we already have the next token lined up, in which case next_next_
167 // simply becomes next_. In that case we use current_ as new next_next_ and
168 // clear its token to indicate that it wasn't scanned yet. Otherwise we use
169 // current_ as next_ and scan into it, leaving next_next_ uninitialized.
170 if (V8_LIKELY(next_next().token == Token::kUninitialized)) {
171 DCHECK(next_next_next().token == Token::kUninitialized);
172 next_ = previous;
173 // User 'previous' instead of 'next_' because for some reason the compiler
174 // thinks 'next_' could be modified before the entry into Scan.
176 Scan(previous);
177 } else {
179
180 if (V8_LIKELY(next_next_next().token == Token::kUninitialized)) {
182 } else {
185 }
186
187 previous->token = Token::kUninitialized;
188 DCHECK_NE(Token::kUninitialized, current().token);
189 }
190 return current().token;
191}
192
194 DCHECK(next().token != Token::kDiv);
195 DCHECK(next().token != Token::kAssignDiv);
196
197 if (next_next().token != Token::kUninitialized) {
198 return next_next().token;
199 }
200 TokenDesc* temp = next_;
202 next().after_line_terminator = false;
203 Scan();
205 next_ = temp;
206 return next_next().token;
207}
208
210 if (next_next_next().token != Token::kUninitialized) {
211 return next_next_next().token;
212 }
213 // PeekAhead() must be called first in order to call PeekAheadAhead().
214 DCHECK(next_next().token != Token::kUninitialized);
215 TokenDesc* temp = next_;
216 TokenDesc* temp_next = next_next_;
218 next().after_line_terminator = false;
219 Scan();
221 next_next_ = temp_next;
222 next_ = temp;
223 return next_next_next().token;
224}
225
227 if (flags_.is_module()) {
228 ReportScannerError(source_pos(), MessageTemplate::kHtmlCommentInModule);
229 return Token::kIllegal;
230 }
231 return SkipSingleLineComment();
232}
233
235 // The line terminator at the end of the line is not considered
236 // to be part of the single-line comment; it is recognized
237 // separately by the lexical grammar and becomes part of the
238 // stream of input elements for the syntactic grammar (see
239 // ECMA-262, section 7.4).
240 AdvanceUntil([](base::uc32 c0) { return unibrow::IsLineTerminator(c0); });
241
242 return Token::kWhitespace;
243}
244
246 TryToParseMagicComment(hash_or_at_sign);
248 return Token::kWhitespace;
249 }
250 return SkipSingleLineComment();
251}
252
253namespace {
254
255void ProcessPerFunctionCompileHints(const base::Vector<const uint8_t>& data,
256 int current_position,
257 std::vector<int>& positions) {
258 // Compile hints are relative to the position of the comment end.
259 int last_position = current_position;
260 size_t pos = 0;
261 const char* char_data = reinterpret_cast<const char*>(data.data());
262 while (pos < static_cast<size_t>(data.length())) {
263 int32_t delta = base::VLQBase64Decode(char_data, data.length(), &pos);
264 if (delta == std::numeric_limits<int32_t>::min()) {
265 // Invalid data, bail out and clear the data we read so far. (Not using
266 // the data until the invalid portion is consistent with 2-byte data not
267 // being handled at all.)
268 positions.clear();
269 return;
270 }
271 last_position += delta;
272 positions.push_back(last_position);
273 }
274 positions.shrink_to_fit();
275}
276
277} // namespace
278
280 // Magic comments are of the form: //[#@]\s<name>=\s*<value>\s*.* and this
281 // function will just return if it cannot parse a magic comment.
283 if (!IsWhiteSpace(c0_)) return;
284 Advance();
286 name.Start();
287
289 c0_ != '=') {
290 name.AddChar(c0_);
291 Advance();
292 }
293 if (!name.is_one_byte()) return;
294 base::Vector<const uint8_t> name_literal = name.one_byte_literal();
296 LiteralBuffer per_function_compile_hints_value;
297 if (name_literal == base::StaticOneByteVector("sourceURL")) {
298 value = &source_url_;
299 } else if (name_literal == base::StaticOneByteVector("sourceMappingURL")) {
300 value = &source_mapping_url_;
301 DCHECK(hash_or_at_sign == '#' || hash_or_at_sign == '@');
302 saw_source_mapping_url_magic_comment_at_sign_ = hash_or_at_sign == '@';
303 } else if (!saw_non_comment_ &&
304 name_literal ==
305 base::StaticOneByteVector("allFunctionsCalledOnLoad") &&
306 hash_or_at_sign == '#' && c0_ != '=') {
308 } else if (name_literal ==
309 base::StaticOneByteVector("functionsCalledOnLoad") &&
310 hash_or_at_sign == '#') {
311 value = &per_function_compile_hints_value;
312 } else {
313 return;
314 }
315 if (c0_ != '=')
316 return;
317 value->Start();
318 Advance();
319 while (IsWhiteSpace(c0_)) {
320 Advance();
321 }
323 if (IsWhiteSpace(c0_)) {
324 break;
325 }
326 value->AddChar(c0_);
327 Advance();
328 }
329 // Allow whitespace at the end.
331 if (!IsWhiteSpace(c0_)) {
332 value->Start();
333 break;
334 }
335 Advance();
336 }
337 if (value == &per_function_compile_hints_value &&
338 per_function_compile_hints_value.is_one_byte()) {
339 base::Vector<const uint8_t> value_literal =
340 per_function_compile_hints_value.one_byte_literal();
343 ProcessPerFunctionCompileHints(value_literal, source_pos(),
345 }
346}
347
349 // Allow off-by-<slack> in the compile hints positions, to account for adding
350 // newlines at the end of the comment, function positions being off-by-one,
351 // etc.
352 const int kSlack = 3;
357 position - kSlack) {
359 }
362 return false;
363 }
364 int hint_position = per_function_compile_hint_positions_
366 return hint_position >= position - kSlack &&
367 hint_position <= position + kSlack;
368}
369
371 DCHECK_EQ(c0_, '*');
372
373 // Until we see the first newline, check for * and newline characters.
374 if (!next().after_line_terminator) {
375 do {
376 AdvanceUntil([](base::uc32 c0) {
377 if (V8_UNLIKELY(static_cast<uint32_t>(c0) > kMaxAscii)) {
378 return unibrow::IsLineTerminator(c0);
379 }
380 uint8_t char_flags = character_scan_flags[c0];
382 });
383
384 while (c0_ == '*') {
385 Advance();
386 if (c0_ == '/') {
387 Advance();
388 return Token::kWhitespace;
389 }
390 }
391
394 break;
395 }
396 } while (c0_ != kEndOfInput);
397 }
398
399 // After we've seen newline, simply try to find '*/'.
400 while (c0_ != kEndOfInput) {
401 AdvanceUntil([](base::uc32 c0) { return c0 == '*'; });
402
403 while (c0_ == '*') {
404 Advance();
405 if (c0_ == '/') {
406 Advance();
407 return Token::kWhitespace;
408 }
409 }
410 }
411
412 return Token::kIllegal;
413}
414
416 // Check for <!-- comments.
417 DCHECK_EQ(c0_, '!');
418 Advance();
419 if (c0_ != '-' || Peek() != '-') {
420 PushBack('!'); // undo Advance()
421 return Token::kLessThan;
422 }
423 Advance();
424
425 found_html_comment_ = true;
426 return SkipSingleHTMLComment();
427}
428
429#ifdef DEBUG
430void Scanner::SanityCheckTokenDesc(const TokenDesc& token) const {
431 // Only TEMPLATE_* tokens can have an invalid_template_escape_message.
432 // kIllegal and kUninitialized can have garbage for the field.
433
434 switch (token.token) {
435 case Token::kUninitialized:
436 case Token::kIllegal:
437 // token.literal_chars & other members might be garbage. That's ok.
438 case Token::kTemplateSpan:
439 case Token::kTemplateTail:
440 break;
441 default:
442 DCHECK_EQ(token.invalid_template_escape_message, MessageTemplate::kNone);
443 break;
444 }
445}
446#endif // DEBUG
447
449 // After this call, we will have the token at the given position as
450 // the "next" token. The "current" token will be invalid.
451 if (pos == next().location.beg_pos) return;
452 int current_pos = source_pos();
453 DCHECK_EQ(next().location.end_pos, current_pos);
454 // Positions inside the lookahead token aren't supported.
455 DCHECK(pos >= current_pos);
456 if (pos != current_pos) {
457 source_->Seek(pos);
458 Advance();
459 // This function is only called to seek to the location
460 // of the end of a function (at the "}" token). It doesn't matter
461 // whether there was a line terminator in the part we skip.
462 next().after_line_terminator = false;
463 }
464 Scan();
465}
466
467template <bool capture_raw>
469 base::uc32 c = c0_;
471
472 // Skip escaped newlines.
474 if (!capture_raw && unibrow::IsLineTerminator(c)) {
475 // Allow escaped CR+LF newlines in multiline string literals.
477 return true;
478 }
479
480 switch (c) {
481 case 'b' : c = '\b'; break;
482 case 'f' : c = '\f'; break;
483 case 'n' : c = '\n'; break;
484 case 'r' : c = '\r'; break;
485 case 't' : c = '\t'; break;
486 case 'u' : {
488 if (IsInvalid(c)) return false;
489 break;
490 }
491 case 'v':
492 c = '\v';
493 break;
494 case 'x': {
496 if (IsInvalid(c)) return false;
497 break;
498 }
499 case '0':
500 case '1':
501 case '2':
502 case '3':
503 case '4':
504 case '5':
505 case '6':
506 case '7':
508 break;
509 case '8':
510 case '9':
511 // '\8' and '\9' are disallowed in strict mode.
512 // Reuse the octal error state to propagate the error.
514 octal_message_ = capture_raw ? MessageTemplate::kTemplate8Or9Escape
515 : MessageTemplate::kStrict8Or9Escape;
516 break;
517 }
518
519 // Other escaped characters are interpreted as their non-escaped version.
521 return true;
522}
523
524template <bool capture_raw>
526 DCHECK('0' <= c && c <= '7');
527 base::uc32 x = c - '0';
528 int i = 0;
529 for (; i < length; i++) {
530 int d = c0_ - '0';
531 if (d < 0 || d > 7) break;
532 int nx = x * 8 + d;
533 if (nx >= 256) break;
534 x = nx;
536 }
537 // Anything except '\0' is an octal escape sequence, illegal in strict mode.
538 // Remember the position of octal escape sequences so that an error
539 // can be reported later (in strict mode).
540 // We don't report the error immediately, because the octal escape can
541 // occur before the "use strict" directive.
542 if (c != '0' || i > 0 || IsNonOctalDecimalDigit(c0_)) {
543 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1);
544 octal_message_ = capture_raw ? MessageTemplate::kTemplateOctalLiteral
545 : MessageTemplate::kStrictOctalEscape;
546 }
547 return x;
548}
549
551 base::uc32 quote = c0_;
552
554 while (true) {
555 AdvanceUntil([this](base::uc32 c0) {
556 if (V8_UNLIKELY(static_cast<uint32_t>(c0) > kMaxAscii)) {
558 return true;
559 }
560 AddLiteralChar(c0);
561 return false;
562 }
563 uint8_t char_flags = character_scan_flags[c0];
564 if (MayTerminateString(char_flags)) return true;
565 AddLiteralChar(c0);
566 return false;
567 });
568
569 while (c0_ == '\\') {
570 Advance();
571 // TODO(verwaest): Check whether we can remove the additional check.
573 return Token::kIllegal;
574 }
575 }
576
577 if (c0_ == quote) {
578 Advance();
579 return Token::kString;
580 }
581
582 if (V8_UNLIKELY(c0_ == kEndOfInput ||
584 return Token::kIllegal;
585 }
586
588 }
589}
590
593 DCHECK_EQ(c0_, '#');
595 int pos = source_pos();
596 Advance();
597 if (IsIdentifierStart(c0_) ||
599 AddLiteralChar('#');
601 return token == Token::kIllegal ? Token::kIllegal : Token::kPrivateName;
602 }
603
604 ReportScannerError(pos, MessageTemplate::kInvalidOrUnexpectedToken);
605 return Token::kIllegal;
606}
607
609 // When scanning a TemplateSpan, we are looking for the following construct:
610 // kTemplateSpan ::
611 // ` LiteralChars* ${
612 // | } LiteralChars* ${
613 //
614 // kTemplateTail ::
615 // ` LiteralChars* `
616 // | } LiteralChar* `
617 //
618 // A kTemplateSpan should always be followed by an Expression, while a
619 // kTemplateTail terminates a TemplateLiteral and does not need to be
620 // followed by an Expression.
621
622 // These scoped helpers save and restore the original error state, so that we
623 // can specially treat invalid escape sequences in templates (which are
624 // handled by the parser).
625 ErrorState scanner_error_state(&scanner_error_, &scanner_error_location_);
626 ErrorState octal_error_state(&octal_message_, &octal_pos_);
627
628 Token::Value result = Token::kTemplateSpan;
631 const bool capture_raw = true;
632 while (true) {
633 base::uc32 c = c0_;
634 if (c == '`') {
635 Advance(); // Consume '`'
636 result = Token::kTemplateTail;
637 break;
638 } else if (c == '$' && Peek() == '{') {
639 Advance(); // Consume '$'
640 Advance(); // Consume '{'
641 break;
642 } else if (c == '\\') {
643 Advance(); // Consume '\\'
645 if (capture_raw) AddRawLiteralChar('\\');
647 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty
648 // code unit sequence.
649 base::uc32 lastChar = c0_;
650 Advance();
651 if (lastChar == '\r') {
652 // Also skip \n.
653 if (c0_ == '\n') Advance();
654 lastChar = '\n';
655 }
656 if (capture_raw) AddRawLiteralChar(lastChar);
657 } else {
658 bool success = ScanEscape<capture_raw>();
659 USE(success);
660 DCHECK_EQ(!success, has_error());
661 // For templates, invalid escape sequence checking is handled in the
662 // parser.
663 scanner_error_state.MoveErrorTo(next_);
664 octal_error_state.MoveErrorTo(next_);
665 }
666 } else if (c == kEndOfInput) {
667 // Unterminated template literal
668 break;
669 } else {
670 Advance(); // Consume c.
671 // The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A.
672 // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence
673 // consisting of the CV 0x000A.
674 if (c == '\r') {
675 if (c0_ == '\n') Advance(); // Consume '\n'
676 c = '\n';
677 }
678 if (capture_raw) AddRawLiteralChar(c);
680 }
681 }
683 next().token = result;
684
685 return result;
686}
687
688template <typename IsolateT>
689DirectHandle<String> Scanner::SourceUrl(IsolateT* isolate) const {
691 if (source_url_.length() > 0) {
692 tmp = source_url_.Internalize(isolate);
693 }
694 return tmp;
695}
696
697template DirectHandle<String> Scanner::SourceUrl(Isolate* isolate) const;
699
700template <typename IsolateT>
703 if (source_mapping_url_.length() > 0) {
704 tmp = source_mapping_url_.Internalize(isolate);
705 }
706 return tmp;
707}
708
711 LocalIsolate* isolate) const;
712
714 bool is_check_first_digit) {
715 // we must have at least one digit after 'x'/'b'/'o'
716 if (is_check_first_digit && !predicate(c0_)) return false;
717
718 bool separator_seen = false;
719 while (predicate(c0_) || c0_ == '_') {
720 if (c0_ == '_') {
721 Advance();
722 if (c0_ == '_') {
724 MessageTemplate::kContinuousNumericSeparator);
725 return false;
726 }
727 separator_seen = true;
728 continue;
729 }
730 separator_seen = false;
732 }
733
734 if (separator_seen) {
736 MessageTemplate::kTrailingNumericSeparator);
737 return false;
738 }
739
740 return true;
741}
742
743bool Scanner::ScanDecimalDigits(bool allow_numeric_separator) {
744 if (allow_numeric_separator) {
746 }
747 while (IsDecimalDigit(c0_)) {
749 }
750 if (c0_ == '_') {
752 MessageTemplate::kInvalidOrUnexpectedToken);
753 return false;
754 }
755 return true;
756}
757
759 bool separator_seen = false;
760 while (IsDecimalDigit(c0_) || c0_ == '_') {
761 if (c0_ == '_') {
762 Advance();
763 if (c0_ == '_') {
765 MessageTemplate::kContinuousNumericSeparator);
766 return false;
767 }
768 separator_seen = true;
769 continue;
770 }
771 separator_seen = false;
772 *value = 10 * *value + (c0_ - '0');
773 base::uc32 first_char = c0_;
774 Advance();
775 AddLiteralChar(first_char);
776 }
777
778 if (separator_seen) {
780 MessageTemplate::kTrailingNumericSeparator);
781 return false;
782 }
783
784 return true;
785}
786
787bool Scanner::ScanDecimalAsSmi(uint64_t* value, bool allow_numeric_separator) {
788 if (allow_numeric_separator) {
790 }
791
792 while (IsDecimalDigit(c0_)) {
793 *value = 10 * *value + (c0_ - '0');
794 base::uc32 first_char = c0_;
795 Advance();
796 AddLiteralChar(first_char);
797 }
798 return true;
799}
800
804
808
812
813 while (true) {
814 // (possible) octal number
817 return true;
818 }
819 if (!IsOctalDigit(c0_)) {
820 // Octal literal finished.
821 octal_pos_ = Location(start_pos, source_pos());
822 octal_message_ = MessageTemplate::kStrictOctalLiteral;
823 return true;
824 }
826 }
827}
828
832
834 if (c0_ == '+' || c0_ == '-') AddLiteralCharAdvance();
835 // we must have at least one decimal digit after 'e'/'E'
836 if (!IsDecimalDigit(c0_)) return false;
837 return ScanDecimalDigits(true);
838}
839
841 DCHECK(IsDecimalDigit(c0_)); // the first digit of the number or the fraction
842
844
846 bool at_start = !seen_period;
847 int start_pos = source_pos(); // For reporting octal positions.
848 if (seen_period) {
849 // we have already seen a decimal point of the float
850 AddLiteralChar('.');
851 if (c0_ == '_') {
852 return Token::kIllegal;
853 }
854 // we know we have at least one digit
855 if (!ScanDecimalDigits(true)) return Token::kIllegal;
856 } else {
857 // if the first character is '0' we must check for octals and hex
858 if (c0_ == '0') {
860
861 // either 0, 0exxx, 0Exxx, 0.xxx, a hex number, a binary number or
862 // an octal number.
863 if (AsciiAlphaToLower(c0_) == 'x') {
865 kind = HEX;
866 if (!ScanHexDigits()) return Token::kIllegal;
867 } else if (AsciiAlphaToLower(c0_) == 'o') {
869 kind = OCTAL;
870 if (!ScanOctalDigits()) return Token::kIllegal;
871 } else if (AsciiAlphaToLower(c0_) == 'b') {
873 kind = BINARY;
874 if (!ScanBinaryDigits()) return Token::kIllegal;
875 } else if (IsOctalDigit(c0_)) {
877 if (!ScanImplicitOctalDigits(start_pos, &kind)) {
878 return Token::kIllegal;
879 }
881 at_start = false;
882 }
883 } else if (IsNonOctalDecimalDigit(c0_)) {
885 } else if (c0_ == '_') {
887 MessageTemplate::kZeroDigitNumericSeparator);
888 return Token::kIllegal;
889 }
890 }
891
892 // Parse decimal digits and allow trailing fractional part.
894 bool allow_numeric_separator = kind != DECIMAL_WITH_LEADING_ZERO;
895 // This is an optimization for parsing Decimal numbers as Smi's.
896 if (at_start) {
897 uint64_t value = 0;
898 // scan subsequent decimal digits
899 if (!ScanDecimalAsSmi(&value, allow_numeric_separator)) {
900 return Token::kIllegal;
901 }
902
903 if (next().literal_chars.one_byte_literal().length() <= 10 &&
904 value <= Smi::kMaxValue && c0_ != '.' && !IsIdentifierStart(c0_)) {
905 next().smi_value = static_cast<uint32_t>(value);
906
908 octal_pos_ = Location(start_pos, source_pos());
909 octal_message_ = MessageTemplate::kStrictDecimalWithLeadingZero;
910 }
911 return Token::kSmi;
912 }
913 }
914
915 if (!ScanDecimalDigits(allow_numeric_separator)) {
916 return Token::kIllegal;
917 }
918 if (c0_ == '.') {
919 seen_period = true;
921 if (c0_ == '_') {
922 return Token::kIllegal;
923 }
924 if (!ScanDecimalDigits(true)) return Token::kIllegal;
925 }
926 }
927 }
928
929 bool is_bigint = false;
930 if (c0_ == 'n' && !seen_period && IsValidBigIntKind(kind)) {
931 // Check that the literal is within our limits for BigInt length.
932 // For simplicity, use 4 bits per character to calculate the maximum
933 // allowed literal length.
934 static const int kMaxBigIntCharacters = BigInt::kMaxLengthBits / 4;
935 int length = source_pos() - start_pos - (kind != DECIMAL ? 2 : 0);
936 if (length > kMaxBigIntCharacters) {
938 MessageTemplate::kBigIntTooBig);
939 return Token::kIllegal;
940 }
941
942 is_bigint = true;
943 Advance();
944 } else if (AsciiAlphaToLower(c0_) == 'e') {
945 // scan exponent, if any
946 DCHECK_NE(kind, HEX); // 'e'/'E' must be scanned as part of the hex number
947
948 if (!IsDecimalNumberKind(kind)) return Token::kIllegal;
949
950 // scan exponent
952
953 if (!ScanSignedInteger()) return Token::kIllegal;
954 }
955
956 // The source character immediately following a numeric literal must
957 // not be an identifier start or a decimal digit; see ECMA-262
958 // section 7.8.3, page 17 (note that we read only one decimal digit
959 // if the value is 0).
961 return Token::kIllegal;
962 }
963
965 octal_pos_ = Location(start_pos, source_pos());
966 octal_message_ = MessageTemplate::kStrictDecimalWithLeadingZero;
967 }
968
970 return is_bigint ? Token::kBigInt : Token::kNumber;
971}
972
974 Advance();
975 if (c0_ != 'u') return Invalid();
976 Advance();
978}
979
980template <bool capture_raw>
982 // Accept both \uxxxx and \u{xxxxxx}. In the latter case, the number of
983 // hex digits between { } is arbitrary. \ and u have already been read.
984 if (c0_ == '{') {
985 int begin = source_pos() - 2;
987 base::uc32 cp =
989 if (cp == kInvalidSequence || c0_ != '}') {
991 MessageTemplate::kInvalidUnicodeEscapeSequence);
992 return Invalid();
993 }
995 return cp;
996 }
997 const bool unicode = true;
999}
1000
1002 bool can_be_keyword) {
1003 while (true) {
1004 if (c0_ == '\\') {
1005 escaped = true;
1007 // Only allow legal identifier part characters.
1008 // TODO(verwaest): Make this true.
1009 // DCHECK(!IsIdentifierPart('\'));
1011 if (c == '\\' || !IsIdentifierPart(c)) {
1012 return Token::kIllegal;
1013 }
1014 can_be_keyword = can_be_keyword && CharCanBeKeyword(c);
1015 AddLiteralChar(c);
1016 } else if (IsIdentifierPart(c0_) ||
1018 can_be_keyword = can_be_keyword && CharCanBeKeyword(c0_);
1020 } else {
1021 break;
1022 }
1023 }
1024
1025 if (can_be_keyword && next().literal_chars.is_one_byte()) {
1027 Token::Value token =
1028 KeywordOrIdentifierToken(chars.begin(), chars.length());
1029 if (base::IsInRange(token, Token::kIdentifier, Token::kYield)) return token;
1030
1031 if (token == Token::kFutureStrictReservedWord) {
1032 if (escaped) return Token::kEscapedStrictReservedWord;
1033 return token;
1034 }
1035
1036 if (!escaped) return token;
1037
1038 static_assert(Token::kLet + 1 == Token::kStatic);
1039 if (base::IsInRange(token, Token::kLet, Token::kStatic)) {
1040 return Token::kEscapedStrictReservedWord;
1041 }
1042 return Token::kEscapedKeyword;
1043 }
1044
1045 return Token::kIdentifier;
1046}
1047
1049 DCHECK_EQ(Token::kUninitialized, next_next().token);
1050 DCHECK(next().token == Token::kDiv || next().token == Token::kAssignDiv);
1051
1052 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
1053 bool in_character_class = false;
1054
1055 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
1056 // the scanner should pass uninterpreted bodies to the RegExp
1057 // constructor.
1059 if (next().token == Token::kAssignDiv) {
1060 AddLiteralChar('=');
1061 }
1062
1063 while (c0_ != '/' || in_character_class) {
1065 return false;
1066 }
1067 if (c0_ == '\\') { // Escape sequence.
1070 return false;
1071 }
1073 // If the escape allows more characters, i.e., \x??, \u????, or \c?,
1074 // only "safe" characters are allowed (letters, digits, underscore),
1075 // otherwise the escape isn't valid and the invalid character has
1076 // its normal meaning. I.e., we can just continue scanning without
1077 // worrying whether the following characters are part of the escape
1078 // or not, since any '/', '\\' or '[' is guaranteed to not be part
1079 // of the escape sequence.
1080 } else { // Unescaped character.
1081 if (c0_ == '[') in_character_class = true;
1082 if (c0_ == ']') in_character_class = false;
1084 }
1085 }
1086 Advance(); // consume '/'
1087
1088 next().token = Token::kRegExpLiteral;
1089 return true;
1090}
1091
1092std::optional<RegExpFlags> Scanner::ScanRegExpFlags() {
1093 DCHECK_EQ(Token::kRegExpLiteral, next().token);
1094
1097 while (IsIdentifierPart(c0_)) {
1098 std::optional<RegExpFlag> maybe_flag = JSRegExp::FlagFromChar(c0_);
1099 if (!maybe_flag.has_value()) return {};
1100 RegExpFlag flag = maybe_flag.value();
1101 if (flags & flag) return {};
1103 flags |= flag;
1104 }
1105
1107 return flags;
1108}
1109
1111 AstValueFactory* ast_value_factory) const {
1112 if (is_literal_one_byte()) {
1113 return ast_value_factory->GetOneByteString(literal_one_byte_string());
1114 }
1115 return ast_value_factory->GetTwoByteString(literal_two_byte_string());
1116}
1117
1119 AstValueFactory* ast_value_factory) const {
1121 return ast_value_factory->GetOneByteString(next_literal_one_byte_string());
1122 }
1123 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string());
1124}
1125
1127 AstValueFactory* ast_value_factory) const {
1129 return ast_value_factory->GetOneByteString(raw_literal_one_byte_string());
1130 }
1131 return ast_value_factory->GetTwoByteString(raw_literal_two_byte_string());
1132}
1133
1134
1151
1152const char* Scanner::CurrentLiteralAsCString(Zone* zone) const {
1155 int length = vector.length();
1156 char* buffer = zone->AllocateArray<char>(length + 1);
1157 memcpy(buffer, vector.begin(), length);
1158 buffer[length] = '\0';
1159 return buffer;
1160}
1161
1163 // Use with care: This cleanly resets most, but not all scanner state.
1164 // TODO(vogelheim): Fix this, or at least DCHECK the relevant conditions.
1165
1166 // To re-scan from a given character position, we need to:
1167 // 1, Reset the current_, next_ and next_next_ tokens
1168 // (next_ + next_next_ will be overwrittem by Next(),
1169 // current_ will remain unchanged, so overwrite it fully.)
1170 for (TokenDesc& token : token_storage_) {
1171 token.token = Token::kUninitialized;
1172 token.invalid_template_escape_message = MessageTemplate::kNone;
1173 }
1174 // 2, reset the source to the desired position,
1176 // 3, re-scan, by scanning the look-ahead char + 1 token (next_).
1177 c0_ = source_->Advance();
1178 next().after_line_terminator = false;
1179 Scan();
1180 DCHECK_EQ(next().location.beg_pos, static_cast<int>(position));
1181}
1182
1183} // namespace v8::internal
Builtins::Kind kind
Definition builtins.cc:40
SourcePosition pos
int length() const
Definition vector.h:64
constexpr T * begin() const
Definition vector.h:96
const AstRawString * GetOneByteString(base::Vector< const uint8_t > literal)
const AstRawString * GetTwoByteString(base::Vector< const uint16_t > literal)
static const uint32_t kMaxLengthBits
Definition bigint.h:105
static std::optional< RegExpFlag > FlagFromChar(char c)
Definition js-regexp.h:61
DirectHandle< String > Internalize(IsolateT *isolate) const
base::Vector< const uint8_t > one_byte_literal() const
static const size_t kNoBookmark
Definition scanner.h:236
static const size_t kBookmarkWasApplied
Definition scanner.h:237
void Set(size_t bookmark)
Definition scanner.cc:69
ErrorState(MessageTemplate *message_stack, Scanner::Location *location_stack)
Definition scanner.cc:28
MessageTemplate *const message_stack_
Definition scanner.cc:55
void MoveErrorTo(TokenDesc *dest)
Definition scanner.cc:42
Scanner::Location *const location_stack_
Definition scanner.cc:57
MessageTemplate const old_message_
Definition scanner.cc:56
Scanner::Location const old_location_
Definition scanner.cc:58
V8_INLINE void AddLiteralCharAdvance()
Definition scanner.h:545
void SeekForward(int pos)
Definition scanner.cc:448
Token::Value Next()
Definition scanner.cc:162
MessageTemplate octal_message_
Definition scanner.h:775
base::Vector< const uint16_t > literal_two_byte_string() const
Definition scanner.h:620
V8_INLINE Token::Value ScanIdentifierOrKeywordInner()
bool IsValidBigIntKind(NumberKind kind)
Definition scanner.h:490
base::Vector< const uint16_t > raw_literal_two_byte_string() const
Definition scanner.h:648
TokenDesc * current_
Definition scanner.h:747
TokenDesc * next_next_next_
Definition scanner.h:750
Token::Value SkipMagicComment(base::uc32 hash_or_at_sign)
Definition scanner.cc:245
static bool IsInvalid(base::uc32 c)
Definition scanner.cc:114
static constexpr base::uc32 kInvalidSequence
Definition scanner.h:278
const TokenDesc & next_next() const
Definition scanner.h:742
void PushBack(base::uc32 ch)
Definition scanner.h:578
V8_INLINE void AddLiteralChar(base::uc32 c)
Definition scanner.h:535
Token::Value ScanIdentifierOrKeywordInnerSlow(bool escaped, bool can_be_keyword)
Definition scanner.cc:1001
bool IsDecimalNumberKind(NumberKind kind)
Definition scanner.h:494
base::uc32 ScanUnicodeEscape()
Definition scanner.cc:981
bool CombineSurrogatePair()
Definition scanner.h:564
base::uc32 ScanOctalEscape(base::uc32 c, int length)
Definition scanner.cc:525
Token::Value ScanString()
Definition scanner.cc:550
Token::Value ScanTemplateSpan()
Definition scanner.cc:608
const AstRawString * CurrentSymbol(AstValueFactory *ast_value_factory) const
Definition scanner.cc:1110
base::Vector< const uint8_t > literal_one_byte_string() const
Definition scanner.h:615
Token::Value ScanNumber(bool seen_period)
Definition scanner.cc:840
LiteralBuffer source_mapping_url_
Definition scanner.h:766
const Location & location() const
Definition scanner.h:298
const char * CurrentLiteralAsCString(Zone *zone) const
Definition scanner.cc:1152
base::Vector< const uint8_t > next_literal_one_byte_string() const
Definition scanner.h:632
Token::Value SkipMultiLineComment()
Definition scanner.cc:370
V8_INLINE void Scan()
DirectHandle< String > SourceMappingUrl(IsolateT *isolate) const
Definition scanner.cc:701
Token::Value PeekAhead()
Definition scanner.cc:193
Token::Value SkipSingleHTMLComment()
Definition scanner.cc:226
V8_INLINE void AddRawLiteralChar(base::uc32 c)
Definition scanner.h:541
Utf16CharacterStream *const source_
Definition scanner.h:754
const AstRawString * CurrentRawSymbol(AstValueFactory *ast_value_factory) const
Definition scanner.cc:1126
TokenDesc * next_
Definition scanner.h:748
base::Vector< const uint16_t > next_literal_two_byte_string() const
Definition scanner.h:636
bool ScanDecimalAsSmiWithNumericSeparators(uint64_t *value)
Definition scanner.cc:758
base::Vector< const uint8_t > raw_literal_one_byte_string() const
Definition scanner.h:644
V8_INLINE void AdvanceUntil(FunctionType check)
Definition scanner.h:560
Token::Value PeekAheadAhead()
Definition scanner.cc:209
bool is_next_literal_one_byte() const
Definition scanner.h:640
base::uc32 ScanUnlimitedLengthHexNumber(base::uc32 max_value, int beg_pos)
Definition scanner.cc:142
TokenDesc token_storage_[4]
Definition scanner.h:759
UnoptimizedCompileFlags flags_
Definition scanner.h:745
Scanner(Utf16CharacterStream *source, UnoptimizedCompileFlags flags)
Definition scanner.cc:96
bool has_error() const
Definition scanner.h:301
LiteralBuffer source_url_
Definition scanner.h:765
DirectHandle< String > SourceUrl(IsolateT *isolate) const
Definition scanner.cc:689
TokenDesc & next()
Definition scanner.h:738
base::uc32 Peek() const
Definition scanner.h:585
base::uc32 ScanIdentifierUnicodeEscape()
Definition scanner.cc:973
MessageTemplate scanner_error_
Definition scanner.h:777
bool is_raw_literal_one_byte() const
Definition scanner.h:652
bool ScanDigitsWithNumericSeparators(bool(*predicate)(base::uc32 ch), bool is_check_first_digit)
Definition scanner.cc:713
static constexpr base::uc32 kEndOfInput
Definition scanner.h:277
std::optional< RegExpFlags > ScanRegExpFlags()
Definition scanner.cc:1092
const TokenDesc & next_next_next() const
Definition scanner.h:743
std::vector< int > per_function_compile_hint_positions_
Definition scanner.h:770
const AstRawString * NextSymbol(AstValueFactory *ast_value_factory) const
Definition scanner.cc:1118
Location scanner_error_location_
Definition scanner.h:778
bool ScanImplicitOctalDigits(int start_pos, NumberKind *kind)
Definition scanner.cc:809
Token::Value SkipSingleLineComment()
Definition scanner.cc:234
static const int kMaxAscii
Definition scanner.h:499
bool saw_source_mapping_url_magic_comment_at_sign_
Definition scanner.h:767
bool HasPerFunctionCompileHint(int position)
Definition scanner.cc:348
base::uc32 ScanHexNumber(int expected_length)
Definition scanner.cc:120
bool is_literal_one_byte() const
Definition scanner.h:625
size_t per_function_compile_hint_positions_idx_
Definition scanner.h:771
bool saw_magic_comment_compile_hints_all_
Definition scanner.h:768
TokenDesc * next_next_
Definition scanner.h:749
void SeekNext(size_t position)
Definition scanner.cc:1162
bool ScanDecimalAsSmi(uint64_t *value, bool allow_numeric_separator)
Definition scanner.cc:787
static constexpr base::uc32 Invalid()
Definition scanner.h:280
Token::Value ScanHtmlComment()
Definition scanner.cc:415
Token::Value ScanPrivateName()
Definition scanner.cc:591
void TryToParseMagicComment(base::uc32 hash_or_at_sign)
Definition scanner.cc:279
void ReportScannerError(const Location &location, MessageTemplate error)
Definition scanner.h:520
bool ScanDecimalDigits(bool allow_numeric_separator)
Definition scanner.cc:743
const TokenDesc & current() const
Definition scanner.h:740
static constexpr int kMaxValue
Definition smi.h:101
static const base::uc32 kMaxCodePoint
Definition string.h:504
T * AllocateArray(size_t length)
Definition zone.h:127
LineAndColumn previous
ZoneVector< RpoNumber > & result
Register tmp
int x
int position
Definition liveedit.cc:290
V8_INLINE bool IsStringLiteralLineTerminator(uchar c)
Definition unicode.h:271
V8_INLINE bool IsLineTerminator(uchar c)
Definition unicode.h:267
uint32_t uc32
Definition strings.h:19
int HexValue(uc32 c)
Definition strings.h:34
Vector< const uint8_t > StaticOneByteVector(const char(&array)[N])
Definition vector.h:346
constexpr bool IsInRange(T value, U lower_limit, U higher_limit)
Definition bounds.h:20
int32_t VLQBase64Decode(const char *start, size_t sz, size_t *pos)
Definition vlq-base64.cc:37
bool IsIdentifierStart(base::uc32 c)
constexpr bool IsHexDigit(base::uc32 c)
double OctalStringToDouble(base::Vector< const uint8_t > str)
double HexStringToDouble(base::Vector< const uint8_t > str)
constexpr bool IsCarriageReturn(base::uc32 c)
bool IsWhiteSpaceOrLineTerminator(base::uc32 c)
bool CharCanBeKeyword(base::uc32 c)
too high values may cause the compiler to set high thresholds for inlining to as much as possible avoid inlined allocation of objects that cannot escape trace load stores from virtual maglev objects use TurboFan fast string builder analyze liveness of environment slots and zap dead values trace TurboFan load elimination emit data about basic block usage in builtins to this enable builtin reordering when run mksnapshot flag for emit warnings when applying builtin profile data verify register allocation in TurboFan randomly schedule instructions to stress dependency tracking enable store store elimination in TurboFan rewrite far to near simulate GC compiler thread race related to allow float parameters to be passed in simulator mode JS Wasm Run additional turbo_optimize_inlined_js_wasm_wrappers enable experimental feedback collection in generic lowering enable Turboshaft s WasmLoadElimination enable Turboshaft s low level load elimination for JS enable Turboshaft s escape analysis for string concatenation use enable Turbolev features that we want to ship in the not too far future trace individual Turboshaft reduction steps trace intermediate Turboshaft reduction steps invocation count threshold for early optimization Enables optimizations which favor memory size over execution speed Enables sampling allocation profiler with X as a sample interval min size of a semi the new space consists of two semi spaces max size of the Collect garbage after Collect garbage after keeps maps alive for< n > old space garbage collections print one detailed trace line in name
Definition flags.cc:2086
Flag flags[]
Definition flags.cc:3797
double ImplicitOctalStringToDouble(base::Vector< const uint8_t > str)
static constexpr const uint8_t character_scan_flags[128]
constexpr bool IsOctalDigit(base::uc32 c)
double BinaryStringToDouble(base::Vector< const uint8_t > str)
constexpr bool IsNonOctalDecimalDigit(base::uc32 c)
bool MayTerminateString(uint8_t scan_flags)
bool IsWhiteSpace(base::uc32 c)
constexpr bool IsDecimalDigit(base::uc32 c)
V8_INLINE Token::Value KeywordOrIdentifierToken(const uint8_t *input, int input_length)
bool MultilineCommentCharacterNeedsSlowPath(uint8_t scan_flags)
constexpr bool IsBinaryDigit(base::uc32 c)
constexpr bool IsLineFeed(base::uc32 c)
return value
Definition map-inl.h:893
constexpr int AsciiAlphaToLower(base::uc32 c)
constexpr Register cp
bool IsIdentifierPart(base::uc32 c)
double StringToDouble(const char *str, ConversionFlag flags, double empty_string_val)
#define DCHECK_LE(v1, v2)
Definition logging.h:490
#define DCHECK_NOT_NULL(val)
Definition logging.h:492
#define DCHECK_NE(v1, v2)
Definition logging.h:486
#define DCHECK(condition)
Definition logging.h:482
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
#define USE(...)
Definition macros.h:293
static Location invalid()
Definition scanner.h:270
MessageTemplate invalid_template_escape_message
Definition scanner.h:468
#define V8_LIKELY(condition)
Definition v8config.h:661
#define V8_UNLIKELY(condition)
Definition v8config.h:660