18#include "unicode/locid.h"
19#include "unicode/uniset.h"
20#include "unicode/utypes.h"
25using namespace regexp_compiler_constants;
178constexpr base::uc32 MaxCodeUnit(
const bool one_byte) {
180 std::numeric_limits<uint16_t>::max());
182 std::numeric_limits<uint16_t>::max());
186constexpr uint32_t CharMask(
const bool one_byte) {
189 return MaxCodeUnit(one_byte);
206 text->AddElement(
elements()->at(
i), zone);
243 : next_register_(
JSRegExp::RegistersForCaptureCount(capture_count)),
245 unicode_lookaround_position_register_(
kNoRegister),
250 reg_exp_too_big_(false),
251 limiting_recursion_(false),
252 optimize_(
v8_flags.regexp_optimization),
253 read_backward_(false),
254 current_expansion_factor_(1),
255 frequency_collator_(),
272 start->Emit(
this, &new_trace);
275 while (!work_list.
empty()) {
278 node->set_on_work_list(
false);
279 if (!node->label()->is_bound()) node->Emit(
this, &new_trace);
282 if (
v8_flags.correctness_fuzzer_suppressions) {
283 FATAL(
"Aborting on excess zone allocation");
290 isolate->IncreaseTotalRegexpCodeGenerated(code);
299 return range.Contains(that);
301 return reg() == that;
307 action = action->
next()) {
308 if (action->Mentions(
reg))
return true;
316 action = action->
next()) {
317 if (action->Mentions(
reg)) {
334 if (value < kFirstLimit) {
335 return (first_ & (1 << value)) != 0;
336 }
else if (remaining_ ==
nullptr) {
339 return remaining_->Contains(value);
345 if (value < kFirstLimit) {
346 first_ |= (1 <<
value);
348 if (remaining_ ==
nullptr)
350 if (remaining_->is_empty() || !remaining_->Contains(value))
351 remaining_->
Add(value, zone);
356 static constexpr unsigned kFirstLimit = 32;
366 action = action->
next()) {
369 for (
int i = range.
from();
i <= range.
to();
i++)
370 affected_registers->
Set(
i, zone);
371 if (range.to() > max_register) max_register = range.to();
373 affected_registers->
Set(action->reg(), zone);
374 if (action->reg() > max_register) max_register = action->reg();
384 for (
int reg = max_register;
reg >= 0;
reg--) {
385 if (registers_to_pop.
Get(
reg)) {
386 assembler->PopRegister(
reg);
387 }
else if (registers_to_clear.
Get(
reg)) {
389 while (
reg > 0 && registers_to_clear.
Get(
reg - 1)) {
392 assembler->ClearRegisters(
reg, clear_to);
406 for (
int reg = 0;
reg <= max_register;
reg++) {
407 if (!affected_registers.
Get(
reg))
continue;
412 enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR };
413 DeferredActionUndoType undo_action = IGNORE;
416 bool absolute =
false;
418 static const int kNoStore =
kMinInt;
419 int store_position = kNoStore;
423 action = action->
next()) {
424 if (action->Mentions(
reg)) {
425 switch (action->action_type()) {
430 value += psr->
value();
438 undo_action = RESTORE;
449 undo_action = RESTORE;
454 if (!clear && store_position == kNoStore) {
455 store_position =
pc->cp_offset();
466 undo_action = IGNORE;
468 undo_action =
pc->is_capture() ? CLEAR : RESTORE;
478 if (store_position == kNoStore) {
481 undo_action = RESTORE;
492 if (undo_action == RESTORE) {
496 DCHECK_GT(assembler->stack_limit_slack_slot_count(), 0);
497 if (pushes == assembler->stack_limit_slack_slot_count()) {
502 assembler->PushRegister(
reg, stack_check);
503 registers_to_pop->
Set(
reg, zone);
504 }
else if (undo_action == CLEAR) {
505 registers_to_clear->
Set(
reg, zone);
509 if (store_position != kNoStore) {
510 assembler->WriteCurrentPositionToRegister(
reg, store_position);
512 assembler->ClearRegisters(
reg,
reg);
513 }
else if (absolute) {
514 assembler->SetRegister(
reg, value);
515 }
else if (value != 0) {
516 assembler->AdvanceRegister(
reg, value);
536 successor->
Emit(compiler, &new_state);
547 assembler->PushCurrentPosition();
555 ®isters_to_pop, ®isters_to_clear,
558 assembler->AdvanceCurrentPosition(
cp_offset_);
563 assembler->PushBacktrack(&undo);
566 successor->
Emit(compiler, &new_state);
568 compiler->AddWork(successor);
569 assembler->GoTo(successor->
label());
573 assembler->BindJumpTarget(&undo);
577 assembler->Backtrack();
579 assembler->PopCurrentPosition();
589 if (!
label()->is_bound()) {
592 assembler->Bind(
label());
598 assembler->ReadStackPointerFromRegister(stack_pointer_register_);
599 if (clear_capture_count_ > 0) {
602 int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1;
603 assembler->ClearRegisters(clear_capture_start_, clear_capture_end);
607 assembler->Backtrack();
611 if (!trace->is_trivial()) {
612 trace->Flush(compiler,
this);
616 if (!
label()->is_bound()) {
617 assembler->Bind(
label());
621 assembler->Succeed();
624 assembler->GoTo(trace->backtrack());
626 case NEGATIVE_SUBMATCH_SUCCESS:
635 guards_->
Add(guard, zone);
643 result->data_.u_store_register.value = val;
659 result->data_.u_position_register.is_capture = is_capture;
667 result->data_.u_clear_captures.range_to = range.to();
677 result->data_.u_submatch.current_position_register = position_reg;
678 result->data_.u_submatch.success_node = success_node;
687 result->data_.u_submatch.current_position_register = position_reg;
692 int clear_register_count,
693 int clear_register_from,
696 POSITIVE_SUBMATCH_SUCCESS, on_success);
698 result->data_.u_submatch.current_position_register = position_reg;
699 result->data_.u_submatch.clear_register_count = clear_register_count;
700 result->data_.u_submatch.clear_register_from = clear_register_from;
705 int repetition_register,
706 int repetition_limit,
711 result->data_.u_empty_match_check.repetition_register = repetition_register;
712 result->data_.u_empty_match_check.repetition_limit = repetition_limit;
723#define DEFINE_ACCEPT(Type) \
724 void Type##Node::Accept(NodeVisitor* visitor) { visitor->Visit##Type(this); }
733 switch (guard->
op()) {
735 DCHECK(!trace->mentions_reg(guard->
reg()));
740 DCHECK(!trace->mentions_reg(guard->
reg()));
750bool ContainsOnlyUtf16CodeUnits(
unibrow::uchar* chars,
int length) {
767 bool one_byte_subject = compiler->one_byte();
770 if (!unicode &&
character <= kMaxAscii) {
773 if (
'A' <= upper && upper <=
'Z') {
775 letters[1] = upper | 0x20;
781#ifdef V8_INTL_SUPPORT
790 DCHECK(ContainsOnlyUtf16CodeUnits(letters, 1));
793 bool in_special_add_set =
794 RegExpCaseFolding::SpecialAddSet().contains(
character);
798 set = set.closeOver(unicode ? USET_SIMPLE_CASE_INSENSITIVE
799 : USET_CASE_INSENSITIVE);
802 if (in_special_add_set && !unicode) {
803 canon = RegExpCaseFolding::Canonicalize(
character);
806 int32_t range_count = set.getRangeCount();
808 for (int32_t
i = 0;
i < range_count;
i++) {
809 UChar32
start = set.getRangeStart(
i);
810 UChar32
end = set.getRangeEnd(
i);
812 for (UChar32 cu =
start; cu <=
end; cu++) {
814 if (!unicode && in_special_add_set &&
815 RegExpCaseFolding::Canonicalize(cu) != canon) {
821 DCHECK(ContainsOnlyUtf16CodeUnits(letters, items));
825 isolate->jsregexp_uncanonicalize()->get(
character,
'\0', letters);
833 if (one_byte_subject) {
843 DCHECK(ContainsOnlyUtf16CodeUnits(letters, length));
848inline bool EmitSimpleCharacter(Isolate* isolate, RegExpCompiler* compiler,
850 bool check,
bool preloaded) {
851 RegExpMacroAssembler* assembler = compiler->macro_assembler();
852 bool bound_checked =
false;
854 assembler->LoadCurrentCharacter(
cp_offset, on_failure, check);
855 bound_checked =
true;
857 assembler->CheckNotCharacter(c, on_failure);
858 return bound_checked;
863inline bool EmitAtomNonLetter(Isolate* isolate, RegExpCompiler* compiler,
865 bool check,
bool preloaded) {
866 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
867 bool one_byte = compiler->one_byte();
869 int length = GetCaseIndependentLetters(isolate, c, compiler, chars, 4);
877 bool checked =
false;
885 macro_assembler->LoadCurrentCharacter(
cp_offset, on_failure, check);
888 macro_assembler->CheckNotCharacter(chars[0], on_failure);
893bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
896 const uint32_t char_mask = CharMask(one_byte);
899 if (((exor - 1) & exor) == 0) {
904 macro_assembler->CheckNotCharacterAfterAnd(c1,
mask, on_failure);
909 if (((diff - 1) & diff) == 0 && c1 >= diff) {
915 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, diff,
mask,
924inline bool EmitAtomLetter(Isolate* isolate, RegExpCompiler* compiler,
926 bool check,
bool preloaded) {
927 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
928 bool one_byte = compiler->one_byte();
930 int length = GetCaseIndependentLetters(isolate, c, compiler, chars, 4);
932 if (length <= 1)
return false;
936 macro_assembler->LoadCurrentCharacter(
cp_offset, on_failure, check);
941 if (ShortCutEmitCharacterPair(macro_assembler, one_byte, chars[0],
942 chars[1], on_failure)) {
944 macro_assembler->CheckCharacter(chars[0], &ok);
945 macro_assembler->CheckNotCharacter(chars[1], on_failure);
946 macro_assembler->Bind(&ok);
951 macro_assembler->CheckCharacter(chars[3], &ok);
954 macro_assembler->CheckCharacter(chars[0], &ok);
955 macro_assembler->CheckCharacter(chars[1], &ok);
956 macro_assembler->CheckNotCharacter(chars[2], on_failure);
957 macro_assembler->Bind(&ok);
965void EmitBoundaryTest(RegExpMacroAssembler* masm,
int border,
966 Label* fall_through, Label* above_or_equal,
968 if (
below != fall_through) {
969 masm->CheckCharacterLT(border,
below);
970 if (above_or_equal != fall_through) masm->GoTo(above_or_equal);
972 masm->CheckCharacterGT(border - 1, above_or_equal);
976void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm,
int first,
int last,
977 Label* fall_through, Label* in_range,
978 Label* out_of_range) {
979 if (in_range == fall_through) {
981 masm->CheckNotCharacter(first, out_of_range);
983 masm->CheckCharacterNotInRange(first, last, out_of_range);
987 masm->CheckCharacter(first, in_range);
989 masm->CheckCharacterInRange(first, last, in_range);
991 if (out_of_range != fall_through) masm->GoTo(out_of_range);
997void EmitUseLookupTable(RegExpMacroAssembler* masm,
998 ZoneList<base::uc32>* ranges, uint32_t start_index,
1000 Label* fall_through, Label* even_label,
1009 for (uint32_t
i = start_index;
i <= end_index;
i++) {
1012 DCHECK(start_index == 0 || (ranges->at(start_index - 1) & ~kMask) <= base);
1016 Label* on_bit_clear;
1018 if (even_label == fall_through) {
1019 on_bit_set = odd_label;
1020 on_bit_clear = even_label;
1023 on_bit_set = even_label;
1024 on_bit_clear = odd_label;
1027 for (uint32_t
i = 0;
i < (ranges->at(start_index) & kMask) &&
i < kSize;
1033 for (uint32_t
i = start_index;
i < end_index;
i++) {
1034 for (j = (ranges->at(
i) & kMask); j < (ranges->at(
i + 1) & kMask); j++) {
1039 for (uint32_t
i = j;
i < kSize;
i++) {
1042 Factory* factory = masm->isolate()->factory();
1045 for (uint32_t
i = 0;
i < kSize;
i++) {
1046 ba->set(
i, templ[
i]);
1048 masm->CheckBitInTable(ba, on_bit_set);
1049 if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear);
1052void CutOutRange(RegExpMacroAssembler* masm, ZoneList<base::uc32>* ranges,
1053 uint32_t start_index, uint32_t end_index, uint32_t cut_index,
1054 Label* even_label, Label* odd_label) {
1055 bool odd = (((cut_index - start_index) & 1) == 1);
1056 Label* in_range_label = odd ? odd_label : even_label;
1058 EmitDoubleBoundaryTest(masm, ranges->at(cut_index),
1059 ranges->at(cut_index + 1) - 1, &dummy, in_range_label,
1061 DCHECK(!dummy.is_linked());
1065 for (uint32_t j = cut_index; j > start_index; j--) {
1066 ranges->at(j) = ranges->at(j - 1);
1068 for (uint32_t j = cut_index + 1; j < end_index; j++) {
1069 ranges->at(j) = ranges->at(j + 1);
1075void SplitSearchSpace(ZoneList<base::uc32>* ranges, uint32_t start_index,
1076 uint32_t end_index, uint32_t* new_start_index,
1077 uint32_t* new_end_index,
base::uc32* border) {
1084 *new_start_index = start_index;
1085 *border = (ranges->at(start_index) & ~kMask) + kSize;
1086 while (*new_start_index < end_index) {
1087 if (ranges->at(*new_start_index) > *border)
break;
1088 (*new_start_index)++;
1101 uint32_t binary_chop_index = (end_index + start_index) / 2;
1107 end_index - start_index > (*new_start_index - start_index) * 2 &&
1108 last - first > kSize * 2 && binary_chop_index > *new_start_index &&
1109 ranges->at(binary_chop_index) >= first + 2 * kSize) {
1110 uint32_t scan_forward_for_section_border = binary_chop_index;
1111 uint32_t new_border = (ranges->at(binary_chop_index) | kMask) + 1;
1113 while (scan_forward_for_section_border < end_index) {
1114 if (ranges->at(scan_forward_for_section_border) > new_border) {
1115 *new_start_index = scan_forward_for_section_border;
1116 *border = new_border;
1119 scan_forward_for_section_border++;
1123 DCHECK(*new_start_index > start_index);
1124 *new_end_index = *new_start_index - 1;
1125 if (ranges->at(*new_end_index) == *border) {
1128 if (*border >= ranges->at(end_index)) {
1129 *border = ranges->at(end_index);
1130 *new_start_index = end_index;
1131 *new_end_index = end_index - 1;
1141void GenerateBranches(RegExpMacroAssembler* masm, ZoneList<base::uc32>* ranges,
1142 uint32_t start_index, uint32_t end_index,
1144 Label* fall_through, Label* even_label,
1156 if (start_index == end_index) {
1157 EmitBoundaryTest(masm, first, fall_through, even_label, odd_label);
1163 if (start_index + 1 == end_index) {
1164 EmitDoubleBoundaryTest(masm, first, last, fall_through, even_label,
1171 if (end_index - start_index <= 6) {
1174 static uint32_t kNoCutIndex = -1;
1175 uint32_t cut = kNoCutIndex;
1176 for (uint32_t
i = start_index;
i < end_index;
i++) {
1177 if (ranges->at(
i) == ranges->at(
i + 1) - 1) {
1182 if (cut == kNoCutIndex) cut = start_index;
1183 CutOutRange(masm, ranges, start_index, end_index, cut, even_label,
1186 GenerateBranches(masm, ranges, start_index + 1, end_index - 1, min_char,
1187 max_char, fall_through, even_label, odd_label);
1195 if ((max_char >> kBits) == (min_char >> kBits)) {
1196 EmitUseLookupTable(masm, ranges, start_index, end_index, min_char,
1197 fall_through, even_label, odd_label);
1201 if ((min_char >> kBits) != first >> kBits) {
1202 masm->CheckCharacterLT(first, odd_label);
1203 GenerateBranches(masm, ranges, start_index + 1, end_index, first, max_char,
1204 fall_through, odd_label, even_label);
1208 uint32_t new_start_index = 0;
1209 uint32_t new_end_index = 0;
1212 SplitSearchSpace(ranges, start_index, end_index, &new_start_index,
1213 &new_end_index, &border);
1216 Label*
above = &handle_rest;
1217 if (border == last + 1) {
1220 above = (end_index & 1) != (start_index & 1) ? odd_label : even_label;
1221 DCHECK(new_end_index == end_index - 1);
1226 DCHECK_LT(start_index, new_start_index);
1228 DCHECK(new_end_index + 1 == new_start_index ||
1229 (new_end_index + 2 == new_start_index &&
1230 border == ranges->at(new_end_index + 1)));
1233 DCHECK_LT(ranges->at(new_end_index), border);
1234 DCHECK(border < ranges->at(new_start_index) ||
1235 (border == ranges->at(new_start_index) &&
1236 new_start_index == end_index && new_end_index == end_index - 1 &&
1237 border == last + 1));
1238 DCHECK(new_start_index == 0 || border >= ranges->at(new_start_index - 1));
1240 masm->CheckCharacterGT(border - 1,
above);
1242 GenerateBranches(masm, ranges, start_index, new_end_index, min_char,
1243 border - 1, &dummy, even_label, odd_label);
1244 if (handle_rest.is_linked()) {
1245 masm->Bind(&handle_rest);
1246 bool flip = (new_start_index & 1) != (start_index & 1);
1247 GenerateBranches(masm, ranges, new_start_index, end_index, border, max_char,
1248 &dummy, flip ? odd_label : even_label,
1249 flip ? even_label : odd_label);
1253void EmitClassRanges(RegExpMacroAssembler* macro_assembler,
1254 RegExpClassRanges* cr,
bool one_byte, Label* on_failure,
1257 ZoneList<CharacterRange>* ranges = cr->ranges(zone);
1264 const int ranges_length = ranges->length();
1265 if (ranges_length == 0) {
1266 if (!cr->is_negated()) {
1267 macro_assembler->GoTo(on_failure);
1270 macro_assembler->CheckPosition(
cp_offset, on_failure);
1275 const base::uc32 max_char = MaxCodeUnit(one_byte);
1276 if (ranges_length == 1 && ranges->at(0).IsEverything(max_char)) {
1277 if (cr->is_negated()) {
1278 macro_assembler->GoTo(on_failure);
1282 macro_assembler->CheckPosition(
cp_offset, on_failure);
1292 if (cr->is_standard(zone) && macro_assembler->CheckSpecialClassRanges(
1293 cr->standard_type(), on_failure)) {
1297 static constexpr int kMaxRangesForInlineBranchGeneration = 16;
1298 if (ranges_length > kMaxRangesForInlineBranchGeneration) {
1304 if (cr->is_negated()) {
1305 if (macro_assembler->CheckCharacterInRangeArray(ranges, on_failure)) {
1309 if (macro_assembler->CheckCharacterNotInRangeArray(ranges, on_failure)) {
1318 ZoneList<base::uc32>* range_boundaries =
1319 zone->New<ZoneList<base::uc32>>(ranges_length * 2, zone);
1321 bool zeroth_entry_is_failure = !cr->is_negated();
1323 for (
int i = 0;
i < ranges_length;
i++) {
1324 CharacterRange& range = ranges->at(
i);
1325 if (range.from() == 0) {
1327 zeroth_entry_is_failure = !zeroth_entry_is_failure;
1329 range_boundaries->Add(range.from(), zone);
1333 range_boundaries->Add(range.to() + 1, zone);
1335 int end_index = range_boundaries->length() - 1;
1336 if (range_boundaries->at(end_index) > max_char) {
1341 GenerateBranches(macro_assembler, range_boundaries,
1345 max_char, &fall_through,
1346 zeroth_entry_is_failure ? &fall_through : on_failure,
1347 zeroth_entry_is_failure ? on_failure : &fall_through);
1348 macro_assembler->Bind(&fall_through);
1358 if (trace->stop_node() !=
nullptr) {
1363 if (trace->is_trivial()) {
1364 if (
label_.is_bound() || on_work_list() || !KeepRecursing(compiler)) {
1370 compiler->AddWork(
this);
1381 if (KeepRecursing(compiler) && compiler->optimize() &&
1382 trace_count_ < kMaxCopiesCodeGenerated) {
1389 bool was_limiting = compiler->limiting_recursion();
1390 compiler->set_limiting_recursion(
true);
1391 trace->Flush(compiler,
this);
1392 compiler->set_limiting_recursion(was_limiting);
1397 return !compiler->limiting_recursion() &&
1403 std::optional<RegExpFlags> old_flags;
1404 if (action_type_ == MODIFY_FLAGS) {
1411 if (action_type_ == BEGIN_POSITIVE_SUBMATCH) {
1414 success_node()->on_success()->FillInBMInfo(isolate,
offset, budget - 1, bm,
1416 }
else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) {
1420 on_success()->FillInBMInfo(isolate,
offset, budget - 1, bm, not_at_start);
1422 SaveBMInfo(bm, not_at_start,
offset);
1423 if (old_flags.has_value()) {
1430 bool not_at_start) {
1431 if (action_type_ == SET_REGISTER_FOR_LOOP) {
1432 on_success()->GetQuickCheckDetailsFromLoopEntry(details, compiler,
1433 filled_in, not_at_start);
1434 }
else if (action_type_ == BEGIN_POSITIVE_SUBMATCH) {
1437 success_node()->on_success()->GetQuickCheckDetails(details, compiler,
1438 filled_in, not_at_start);
1439 }
else if (action_type() != POSITIVE_SUBMATCH_SUCCESS) {
1443 std::optional<RegExpFlags> old_flags;
1444 if (action_type() == MODIFY_FLAGS) {
1448 old_flags = compiler->flags();
1449 compiler->set_flags(
flags());
1451 on_success()->GetQuickCheckDetails(details, compiler, filled_in,
1453 if (old_flags.has_value()) {
1454 compiler->set_flags(*old_flags);
1462 if (assertion_type() == AT_START && not_at_start)
return;
1463 on_success()->FillInBMInfo(isolate,
offset, budget - 1, bm, not_at_start);
1464 SaveBMInfo(bm, not_at_start,
offset);
1469 bool not_at_start) {
1471 return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start);
1477inline uint32_t SmearBitsRight(uint32_t v) {
1489 bool found_useful_op =
false;
1490 const uint32_t char_mask = CharMask(asc);
1497 found_useful_op =
true;
1499 mask_ |= (
pos->mask & char_mask) << char_shift;
1500 value_ |= (
pos->value & char_mask) << char_shift;
1501 char_shift += asc ? 8 : 16;
1503 return found_useful_op;
1507 return not_at_start ? eats_at_least_.eats_at_least_from_not_start
1508 : eats_at_least_.eats_at_least_from_possibly_start;
1522 int characters_filled_in,
1523 bool not_at_start) {
1531 if (read_backward()) {
1534 DCHECK_EQ(eats_at_least_info()->eats_at_least_from_possibly_start, 0);
1535 DCHECK_EQ(eats_at_least_info()->eats_at_least_from_not_start, 0);
1546 uint8_t loop_body_from_not_start = base::saturated_cast<uint8_t>(
1547 static_cast<int>(loop_node_->EatsAtLeast(
true)) -
1548 static_cast<int>(continue_node_->EatsAtLeast(
true)));
1549 uint8_t loop_body_from_possibly_start = base::saturated_cast<uint8_t>(
1550 static_cast<int>(loop_node_->EatsAtLeast(
false)) -
1551 static_cast<int>(continue_node_->EatsAtLeast(
true)));
1554 int loop_iterations = base::saturated_cast<uint8_t>(min_loop_iterations());
1558 base::saturated_cast<uint8_t>(loop_iterations * loop_body_from_not_start +
1559 continue_node_->EatsAtLeast(
true));
1560 if (loop_iterations > 0 && loop_body_from_possibly_start > 0) {
1563 result.eats_at_least_from_possibly_start = base::saturated_cast<uint8_t>(
1564 loop_body_from_possibly_start +
1565 (loop_iterations - 1) * loop_body_from_not_start +
1566 continue_node_->EatsAtLeast(
true));
1569 result.eats_at_least_from_possibly_start =
1570 continue_node_->EatsAtLeast(
false);
1577 bool preload_has_checked_bounds,
1578 Label* on_possible_success,
1580 bool fall_through_on_failure,
1583 if (details->
characters() == 0)
return false;
1584 GetQuickCheckDetails(details, compiler, 0,
1587 if (!details->
Rationalize(compiler->one_byte()))
return false;
1589 compiler->macro_assembler()->CanReadUnaligned());
1591 uint32_t value = details->
value();
1595 if (trace->characters_preloaded() != details->
characters()) {
1605 assembler->LoadCurrentCharacter(
1606 trace->cp_offset(), bounds_check_trace->
backtrack(),
1607 !preload_has_checked_bounds, details->
characters(), eats_at_least);
1610 bool need_mask =
true;
1615 const uint32_t char_mask = CharMask(compiler->one_byte());
1616 if ((
mask & char_mask) == char_mask) need_mask =
false;
1621 static const uint32_t kTwoByteMask = 0xFFFF;
1622 static const uint32_t kFourByteMask = 0xFFFFFFFF;
1623 if (details->
characters() == 2 && compiler->one_byte()) {
1624 if ((
mask & kTwoByteMask) == kTwoByteMask) need_mask =
false;
1625 }
else if (details->
characters() == 1 && !compiler->one_byte()) {
1626 if ((
mask & kTwoByteMask) == kTwoByteMask) need_mask =
false;
1628 if (
mask == kFourByteMask) need_mask =
false;
1632 if (fall_through_on_failure) {
1634 assembler->CheckCharacterAfterAnd(value,
mask, on_possible_success);
1636 assembler->CheckCharacter(value, on_possible_success);
1640 assembler->CheckNotCharacterAfterAnd(value,
mask, trace->backtrack());
1642 assembler->CheckNotCharacter(value, trace->backtrack());
1658 int characters_filled_in,
1659 bool not_at_start) {
1662 if (read_backward())
return;
1664 DCHECK(characters_filled_in < details->characters());
1666 const uint32_t char_mask = CharMask(compiler->one_byte());
1667 for (
int k = 0; k < elements()->length(); k++) {
1671 for (
int i = 0;
i < characters &&
i < quarks.
length();
i++) {
1673 details->
positions(characters_filled_in);
1675 if (IsIgnoreCase(compiler->flags())) {
1678 GetCaseIndependentLetters(isolate, c, compiler, chars, 4);
1683 pos->determines_perfectly =
false;
1690 pos->mask = char_mask;
1691 pos->value = chars[0];
1692 pos->determines_perfectly =
true;
1694 uint32_t common_bits = char_mask;
1695 uint32_t bits = chars[0];
1696 for (
int j = 1; j <
length; j++) {
1697 uint32_t differing_bits = ((chars[j] & common_bits) ^ bits);
1698 common_bits ^= differing_bits;
1699 bits &= common_bits;
1705 uint32_t one_zero = (common_bits | ~char_mask);
1706 if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) {
1707 pos->determines_perfectly =
true;
1709 pos->mask = common_bits;
1716 if (c > char_mask) {
1718 pos->determines_perfectly =
false;
1721 pos->mask = char_mask;
1723 pos->determines_perfectly =
true;
1725 characters_filled_in++;
1726 DCHECK(characters_filled_in <= details->characters());
1727 if (characters_filled_in == details->
characters()) {
1733 details->
positions(characters_filled_in);
1736 if (tree->is_negated() || ranges->is_empty()) {
1747 int first_range = 0;
1748 while (ranges->at(first_range).from() > char_mask) {
1750 if (first_range == ranges->length()) {
1752 pos->determines_perfectly =
false;
1759 (range.to() > char_mask) ? char_mask : range.to();
1760 const uint32_t differing_bits = (first_from ^ first_to);
1763 if ((differing_bits & (differing_bits + 1)) == 0 &&
1764 first_from + differing_bits == first_to) {
1765 pos->determines_perfectly =
true;
1767 uint32_t common_bits = ~SmearBitsRight(differing_bits);
1768 uint32_t bits = (first_from & common_bits);
1769 for (
int i = first_range + 1;
i < ranges->
length();
i++) {
1770 range = ranges->at(
i);
1772 if (from > char_mask)
continue;
1774 (range.to() > char_mask) ? char_mask : range.to();
1780 pos->determines_perfectly =
false;
1781 uint32_t new_common_bits = (from ^
to);
1782 new_common_bits = ~SmearBitsRight(new_common_bits);
1783 common_bits &= new_common_bits;
1784 bits &= new_common_bits;
1785 uint32_t new_differing_bits = (from & common_bits) ^ bits;
1786 common_bits ^= new_differing_bits;
1787 bits &= common_bits;
1789 pos->mask = common_bits;
1792 characters_filled_in++;
1793 DCHECK(characters_filled_in <= details->characters());
1794 if (characters_filled_in == details->
characters())
return;
1799 on_success()->GetQuickCheckDetails(details, compiler, characters_filled_in,
1806 positions_[
i].mask = 0;
1807 positions_[
i].value = 0;
1808 positions_[
i].determines_perfectly =
false;
1822 positions_[
i] = positions_[by +
i];
1825 positions_[
i].mask = 0;
1826 positions_[
i].value = 0;
1827 positions_[
i].determines_perfectly =
false;
1837 if (other->cannot_match_) {
1840 if (cannot_match_) {
1851 pos->determines_perfectly =
false;
1856 uint32_t differing_bits = (
pos->value ^ other_pos->
value);
1857 pos->mask &= ~differing_bits;
1866 info->visited =
true;
1878 DCHECK(!
node_->traversed_loop_initialization_node_);
1879 node_->traversed_loop_initialization_node_ =
true;
1882 DCHECK(
node_->traversed_loop_initialization_node_);
1883 node_->traversed_loop_initialization_node_ =
false;
1897 --
node_->min_loop_iterations_;
1908 if (
info()->replacement_calculated)
return replacement();
1909 if (depth < 0)
return this;
1912 return FilterSuccessor(depth - 1, compiler);
1918 if (next ==
nullptr)
return set_replacement(
nullptr);
1920 return set_replacement(
this);
1926 return range.Contains(0x039C) || range.Contains(0x03BC) ||
1927 range.Contains(0x0178);
1932bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) {
1933 for (
int i = 0;
i < ranges->
length();
i++) {
1944 if (
info()->replacement_calculated)
return replacement();
1945 if (depth < 0)
return this;
1948 int element_count = elements()->length();
1949 for (
int i = 0;
i < element_count;
i++) {
1953 for (
int j = 0; j < quarks.
length(); j++) {
1955 if (!IsIgnoreCase(flags)) {
1959 int length = GetCaseIndependentLetters(compiler->isolate(), c,
1960 compiler, chars, 4);
1962 return set_replacement(
nullptr);
1977 int range_count = ranges->length();
1979 if (range_count != 0 && ranges->at(0).from() == 0 &&
1982 IsIgnoreCase(flags) &&
1983 RangesContainLatin1Equivalents(ranges);
1984 if (!case_complications) {
1985 return set_replacement(
nullptr);
1989 if (range_count == 0 ||
1992 IsIgnoreCase(flags) &&
1993 RangesContainLatin1Equivalents(ranges);
1994 if (!case_complications) {
1995 return set_replacement(
nullptr);
2001 return FilterSuccessor(depth - 1, compiler);
2005 if (
info()->replacement_calculated)
return replacement();
2006 if (depth < 0)
return this;
2007 if (
info()->visited)
return this;
2015 if (continue_replacement ==
nullptr)
return set_replacement(
nullptr);
2022 if (
info()->replacement_calculated)
return replacement();
2023 if (depth < 0)
return this;
2024 if (
info()->visited)
return this;
2028 for (
int i = 0;
i < choice_count;
i++) {
2030 if (alternative.
guards() !=
nullptr &&
2031 alternative.
guards()->length() != 0) {
2032 set_replacement(
this);
2039 for (
int i = 0;
i < choice_count;
i++) {
2043 DCHECK(replacement !=
this);
2044 if (replacement !=
nullptr) {
2047 survivor = replacement;
2050 if (surviving < 2)
return set_replacement(survivor);
2052 set_replacement(
this);
2053 if (surviving == choice_count) {
2060 for (
int i = 0;
i < choice_count;
i++) {
2063 if (replacement !=
nullptr) {
2074 if (
info()->replacement_calculated)
return replacement();
2075 if (depth < 0)
return this;
2076 if (
info()->visited)
return this;
2082 if (replacement ==
nullptr)
return set_replacement(
nullptr);
2089 if (neg_replacement ==
nullptr)
return set_replacement(replacement);
2090 alternatives_->at(kLookaroundIndex).set_node(neg_replacement);
2091 return set_replacement(
this);
2096 int characters_filled_in,
2097 bool not_at_start) {
2098 if (body_can_be_zero_length_ ||
info()->visited)
return;
2099 not_at_start = not_at_start || this->not_at_start();
2101 if (traversed_loop_initialization_node_ && min_loop_iterations_ > 0 &&
2102 loop_node_->EatsAtLeast(not_at_start) >
2103 continue_node_->EatsAtLeast(
true)) {
2110 loop_node_->GetQuickCheckDetails(details, compiler, characters_filled_in,
2123 int characters_filled_in,
bool not_at_start) {
2124 if (traversed_loop_initialization_node_) {
2131 GetQuickCheckDetails(details, compiler, characters_filled_in, not_at_start);
2139 GetQuickCheckDetails(details, compiler, characters_filled_in, not_at_start);
2145 if (body_can_be_zero_length_ || budget <= 0) {
2147 SaveBMInfo(bm, not_at_start,
offset);
2151 SaveBMInfo(bm, not_at_start,
offset);
2156 int characters_filled_in,
2157 bool not_at_start) {
2158 not_at_start = (not_at_start || not_at_start_);
2162 details, compiler, characters_filled_in, not_at_start);
2163 for (
int i = 1;
i < choice_count;
i++) {
2169 details->
Merge(&new_details, characters_filled_in);
2177 Label* non_word,
bool fall_through_on_word) {
2178 if (assembler->CheckSpecialClassRanges(
2181 fall_through_on_word ? non_word : word)) {
2185 assembler->CheckCharacterGT(
'z', non_word);
2186 assembler->CheckCharacterLT(
'0', non_word);
2187 assembler->CheckCharacterGT(
'a' - 1, word);
2188 assembler->CheckCharacterLT(
'9' + 1, word);
2189 assembler->CheckCharacterLT(
'A', non_word);
2190 assembler->CheckCharacterLT(
'Z' + 1, word);
2191 if (fall_through_on_word) {
2192 assembler->CheckNotCharacter(
'_', non_word);
2194 assembler->CheckCharacter(
'_', word);
2200void EmitHat(RegExpCompiler* compiler, RegExpNode* on_success, Trace* trace) {
2201 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2204 Trace new_trace(*trace);
2205 new_trace.InvalidateCurrentCharacter();
2211 const bool may_be_at_or_before_subject_string_start =
2212 new_trace.cp_offset() <= 0;
2215 if (may_be_at_or_before_subject_string_start) {
2218 assembler->CheckAtStart(new_trace.cp_offset(), &ok);
2223 const bool can_skip_bounds_check = !may_be_at_or_before_subject_string_start;
2224 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1,
2225 new_trace.backtrack(), can_skip_bounds_check);
2227 new_trace.backtrack())) {
2229 if (!compiler->one_byte()) {
2230 assembler->CheckCharacterAfterAnd(0x2028, 0xFFFE, &ok);
2232 assembler->CheckCharacter(
'\n', &ok);
2233 assembler->CheckNotCharacter(
'\r', new_trace.backtrack());
2235 assembler->Bind(&ok);
2236 on_success->Emit(compiler, &new_trace);
2248 if (lookahead ==
nullptr) {
2251 if (eats_at_least >= 1) {
2254 FillInBMInfo(isolate, 0, kRecursionBudget, bm, not_at_start);
2265 Label before_non_word;
2267 if (trace->characters_preloaded() != 1) {
2268 assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word);
2271 EmitWordCheck(assembler, &before_word, &before_non_word,
false);
2273 assembler->Bind(&before_non_word);
2275 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
2276 assembler->GoTo(&ok);
2278 assembler->Bind(&before_word);
2279 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
2280 assembler->Bind(&ok);
2282 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
2285 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
2293 Trace new_trace(*trace);
2297 Label* non_word = backtrack_if_previous == kIsNonWord ? new_trace.
backtrack()
2299 Label* word = backtrack_if_previous == kIsNonWord ? &fall_through
2306 const bool may_be_at_or_before_subject_string_start =
2309 if (may_be_at_or_before_subject_string_start) {
2312 assembler->CheckAtStart(new_trace.
cp_offset(), non_word);
2317 const bool can_skip_bounds_check = !may_be_at_or_before_subject_string_start;
2318 assembler->LoadCurrentCharacter(new_trace.
cp_offset() - 1, non_word,
2319 can_skip_bounds_check);
2320 EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord);
2322 assembler->Bind(&fall_through);
2323 on_success()->Emit(compiler, &new_trace);
2328 int filled_in,
bool not_at_start) {
2329 if (assertion_type_ == AT_START && not_at_start) {
2333 return on_success()->GetQuickCheckDetails(details, compiler, filled_in,
2339 switch (assertion_type_) {
2342 assembler->CheckPosition(trace->cp_offset(), &ok);
2343 assembler->GoTo(trace->backtrack());
2344 assembler->Bind(&ok);
2349 assembler->GoTo(trace->backtrack());
2353 assembler->CheckNotAtStart(trace->cp_offset(), trace->backtrack());
2354 Trace at_start_trace = *trace;
2356 on_success()->Emit(compiler, &at_start_trace);
2361 EmitHat(compiler, on_success(), trace);
2364 case AT_NON_BOUNDARY: {
2365 EmitBoundaryCheck(compiler, trace);
2369 on_success()->Emit(compiler, trace);
2375 if (quick_check ==
nullptr)
return false;
2380void UpdateBoundsCheck(
int index,
int* checked_up_to) {
2381 if (index > *checked_up_to) {
2382 *checked_up_to =
index;
2418 bool preloaded,
Trace* trace,
2419 bool first_element_checked,
int* checked_up_to) {
2422 bool one_byte = compiler->one_byte();
2425 int element_count = elements()->length();
2426 int backward_offset = read_backward() ? -Length() : 0;
2427 for (
int i = preloaded ? 0 : element_count - 1;
i >= 0;
i--) {
2432 for (
int j = preloaded ? 0 : quarks.
length() - 1; j >= 0; j--) {
2433 if (first_element_checked &&
i == 0 && j == 0)
continue;
2434 if (DeterminedAlready(quick_check, elm.
cp_offset() + j))
continue;
2436 bool needs_bounds_check =
2437 *checked_up_to <
cp_offset + j || read_backward();
2438 bool bounds_checked =
false;
2440 case NON_LATIN1_MATCH: {
2442 if (IsIgnoreCase(compiler->flags())) {
2449 GetCaseIndependentLetters(isolate, quark, compiler, chars, 4);
2463 case NON_LETTER_CHARACTER_MATCH:
2465 EmitAtomNonLetter(isolate, compiler, quark,
backtrack,
2466 cp_offset + j, needs_bounds_check, preloaded);
2468 case SIMPLE_CHARACTER_MATCH:
2469 bounds_checked = EmitSimpleCharacter(isolate, compiler, quark,
2471 needs_bounds_check, preloaded);
2473 case CASE_CHARACTER_MATCH:
2475 EmitAtomLetter(isolate, compiler, quark,
backtrack,
2476 cp_offset + j, needs_bounds_check, preloaded);
2481 if (bounds_checked) UpdateBoundsCheck(
cp_offset + j, checked_up_to);
2485 if (pass == CHARACTER_CLASS_MATCH) {
2486 if (first_element_checked &&
i == 0)
continue;
2487 if (DeterminedAlready(quick_check, elm.
cp_offset()))
continue;
2489 bool bounds_check = *checked_up_to <
cp_offset || read_backward();
2491 bounds_check, preloaded, zone());
2492 UpdateBoundsCheck(
cp_offset, checked_up_to);
2512 read_backward, on_success);
2517 bool read_backward,
RegExpNode* on_success) {
2519 if (lead.
from() == lead.
to()) {
2521 lead_surrogate.
Add(lead.
from(), zone);
2533 return zone->
New<
TextNode>(elms, read_backward, on_success);
2538 bool read_backward,
RegExpNode* on_success) {
2547 return zone->
New<
TextNode>(elms, read_backward, on_success);
2557 LimitResult limit_result = LimitVersions(compiler, trace);
2558 if (limit_result == DONE)
return;
2562 compiler->SetRegExpTooBig();
2566 if (compiler->one_byte()) {
2568 TextEmitPass(compiler, NON_LATIN1_MATCH,
false, trace,
false, &dummy);
2571 bool first_elt_done =
false;
2572 int bound_checked_to = trace->cp_offset() - 1;
2573 bound_checked_to += trace->bound_checked_up_to();
2577 for (
int twice = 0; twice < 2; twice++) {
2578 bool is_preloaded_pass = twice == 0;
2579 if (is_preloaded_pass && trace->characters_preloaded() != 1)
continue;
2580 if (IsIgnoreCase(compiler->flags())) {
2581 TextEmitPass(compiler, NON_LETTER_CHARACTER_MATCH, is_preloaded_pass,
2582 trace, first_elt_done, &bound_checked_to);
2583 TextEmitPass(compiler, CASE_CHARACTER_MATCH, is_preloaded_pass, trace,
2584 first_elt_done, &bound_checked_to);
2586 TextEmitPass(compiler, SIMPLE_CHARACTER_MATCH, is_preloaded_pass, trace,
2587 first_elt_done, &bound_checked_to);
2589 TextEmitPass(compiler, CHARACTER_CLASS_MATCH, is_preloaded_pass, trace,
2590 first_elt_done, &bound_checked_to);
2591 first_elt_done =
true;
2594 Trace successor_trace(*trace);
2597 read_backward() ? -Length() : Length(), compiler);
2601 on_success()->Emit(compiler, &successor_trace);
2617 compiler->SetRegExpTooBig();
2625 if (!IsIgnoreCase(flags))
return;
2626#ifdef V8_INTL_SUPPORT
2632 int element_count = elements()->length();
2633 for (
int i = 0;
i < element_count;
i++) {
2650 if (read_backward())
return nullptr;
2651 if (elements()->
length() != 1)
return nullptr;
2657 if (node->is_negated()) {
2658 return ranges->length() == 0 ? on_success() :
nullptr;
2660 if (ranges->length() != 1)
return nullptr;
2661 const base::uc32 max_char = MaxCodeUnit(compiler->one_byte());
2662 return ranges->at(0).IsEverything(max_char) ? on_success() :
nullptr;
2676 while (node !=
this) {
2678 return kNodeIsTooComplexForGreedyLoops;
2680 int node_length = node->GreedyLoopTextLength();
2681 if (node_length == kNodeIsTooComplexForGreedyLoops) {
2682 return kNodeIsTooComplexForGreedyLoops;
2684 length += node_length;
2685 node = node->AsSeqRegExpNode()->on_success();
2687 if (read_backward()) {
2694 return kNodeIsTooComplexForGreedyLoops;
2701 AddAlternative(alt);
2702 loop_node_ = alt.
node();
2707 AddAlternative(alt);
2708 continue_node_ = alt.
node();
2713 if (trace->stop_node() ==
this) {
2716 GreedyLoopTextLengthForAlternative(&(
alternatives_->at(0)));
2717 DCHECK_NE(kNodeIsTooComplexForGreedyLoops, text_length);
2720 DCHECK(trace->cp_offset() == text_length);
2722 macro_assembler->
GoTo(trace->loop_label());
2726 if (!trace->is_trivial()) {
2727 trace->Flush(compiler,
this);
2734 int eats_at_least) {
2735 int preload_characters = std::min(4, eats_at_least);
2737 if (compiler->macro_assembler()->CanReadUnaligned()) {
2738 bool one_byte = compiler->one_byte();
2743 if (preload_characters == 3) preload_characters = 2;
2745 if (preload_characters > 2) preload_characters = 2;
2748 if (preload_characters > 1) preload_characters = 1;
2750 return preload_characters;
2758 : possible_success(),
2759 expects_preload(false),
2761 quick_check_details() {}
2773 for (
int i = 0;
i <
count &&
i < kAFew;
i++) {
2774 alt_gens_.Add(a_few_alt_gens_ +
i, zone);
2776 for (
int i = kAFew;
i <
count;
i++) {
2781 for (
int i = kAFew;
i < alt_gens_.
length();
i++) {
2782 delete alt_gens_[
i];
2783 alt_gens_[
i] =
nullptr;
2790 static const int kAFew = 10;
2802 int ranges_length,
Interval new_range) {
2806 bool inside =
false;
2808 for (
int i = 0;
i < ranges_length; inside = !inside, last = ranges[
i],
i++) {
2811 if (ranges[
i] <= new_range.
from())
continue;
2814 if (last <= new_range.
from() && new_range.
to() < ranges[
i]) {
2834 static_assert(
kInt64Size >=
sizeof(
decltype(masked_bitset.to_ullong())));
2835 uint64_t lsb = masked_bitset.to_ullong();
2841 uint64_t msb = masked_bitset.to_ullong();
2853 if (interval.size() >= kMapSize) {
2854 map_count_ = kMapSize;
2859 for (
int i = interval.
from();
i <= interval.
to();
i++) {
2860 int mod_character = (
i & kMask);
2861 if (!
map_[mod_character]) {
2863 map_.set(mod_character);
2865 if (map_count_ == kMapSize)
return;
2871 if (map_count_ != kMapSize) {
2872 map_count_ = kMapSize;
2881 max_char_(MaxCodeUnit(compiler->one_byte())) {
2892 int biggest_points = 0;
2895 const int kMaxMax = 32;
2896 for (
int max_number_of_chars = 4; max_number_of_chars < kMaxMax;
2897 max_number_of_chars *= 2) {
2901 if (biggest_points == 0)
return false;
2912 int old_biggest_points,
int* from,
2914 int biggest_points = old_biggest_points;
2919 int remembered_from =
i;
2923 union_bitset |=
bitmaps_->at(
i)->raw_bitset();
2930 while ((j = BitsetFirstSetBit(union_bitset)) != -1) {
2938 union_bitset.reset(j);
2946 bool in_quickcheck_range =
2947 ((
i - remembered_from < 4) ||
2951 int probability = (in_quickcheck_range ? kSize / 2 : kSize) - frequency;
2952 int points = (
i - remembered_from) * probability;
2953 if (
points > biggest_points) {
2954 *from = remembered_from;
2959 return biggest_points;
2973 int min_lookahead,
int max_lookahead,
2976 const int kSkipArrayEntry = 0;
2977 const int kDontSkipArrayEntry = 1;
2979 std::memset(boolean_skip_table->begin(), kSkipArrayEntry,
2980 boolean_skip_table->length());
2981 const bool fill_nibble_table = !nibble_table.
is_null();
2982 if (fill_nibble_table) {
2983 std::memset(nibble_table->begin(), 0, nibble_table->length());
2986 for (
int i = max_lookahead;
i >= min_lookahead;
i--) {
2991 while ((j = BitsetFirstSetBit(bitset)) != -1) {
2993 boolean_skip_table->set(j, kDontSkipArrayEntry);
2994 if (fill_nibble_table) {
2995 int lo_nibble = j & 0x0f;
2996 int hi_nibble = (j >> 4) & 0x07;
2997 int row = nibble_table->get(lo_nibble);
2998 row |= 1 << hi_nibble;
2999 nibble_table->set(lo_nibble, row);
3005 const int skip = max_lookahead + 1 - min_lookahead;
3013 int min_lookahead = 0;
3014 int max_lookahead = 0;
3020 bool found_single_character =
false;
3021 int single_character = 0;
3022 for (
int i = max_lookahead;
i >= min_lookahead;
i--) {
3024 if (map->map_count() == 0)
continue;
3026 if (found_single_character || map->map_count() > 1) {
3027 found_single_character =
false;
3031 DCHECK(!found_single_character);
3034 found_single_character =
true;
3035 single_character = BitsetFirstSetBit(map->raw_bitset());
3040 int lookahead_width = max_lookahead + 1 - min_lookahead;
3042 if (found_single_character && lookahead_width == 1 && max_lookahead < 3) {
3047 if (found_single_character) {
3068 const int skip_distance = max_lookahead + 1 - min_lookahead;
3071 static_assert(kSize == 128);
3075 GetSkipTable(min_lookahead, max_lookahead, boolean_skip_table, nibble_table);
3164 for (
int i = 0;
i < choice_count - 1;
i++) {
3167 int guard_count = (guards ==
nullptr) ? 0 : guards->
length();
3168 for (
int j = 0; j < guard_count; j++) {
3169 DCHECK(!trace->mentions_reg(guards->
at(j)->reg()));
3179 state->eats_at_least_ =
3182 state->preload_characters_ =
3185 state->preload_is_current_ =
3187 state->preload_has_checked_bounds_ = state->preload_is_current_;
3193 if (choice_count == 1 &&
alternatives_->at(0).guards() ==
nullptr) {
3201 if (limit_result ==
DONE)
return;
3206 if (trace->flush_budget() == 0 && trace->actions() !=
nullptr) {
3207 trace->Flush(compiler,
this);
3222 &greedy_loop_state, text_length);
3226 EmitChoices(compiler, &alt_gens, 0, trace, &preload);
3232 int new_flush_budget = trace->flush_budget() / choice_count;
3233 for (
int i = 0;
i < choice_count;
i++) {
3235 Trace new_trace(*trace);
3239 if (new_trace.
actions() !=
nullptr) {
3242 bool next_expects_preload =
3246 next_expects_preload);
3263 DCHECK(trace->stop_node() ==
nullptr);
3265 Label greedy_match_failed;
3266 Trace greedy_match_trace;
3270 macro_assembler->
Bind(&loop_label);
3273 alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace);
3274 macro_assembler->
Bind(&greedy_match_failed);
3276 Label second_choice;
3277 macro_assembler->
Bind(&second_choice);
3281 EmitChoices(compiler, alt_gens, 1, new_trace, preload);
3283 macro_assembler->
Bind(greedy_loop_state->
label());
3288 macro_assembler->
GoTo(&second_choice);
3298 if (alt1.
guards() !=
nullptr && alt1.
guards()->length() != 0) {
3299 return eats_at_least;
3303 return eats_at_least;
3312 DCHECK(trace->is_trivial());
3324 if (bm ==
nullptr) {
3326 if (eats_at_least >= 1) {
3332 if (bm !=
nullptr) {
3335 return eats_at_least;
3340 int first_choice,
Trace* trace,
3349 int new_flush_budget = trace->flush_budget() / choice_count;
3351 for (
int i = first_choice;
i < choice_count;
i++) {
3352 bool is_last =
i == choice_count - 1;
3353 bool fall_through_on_failure = !is_last;
3358 int guard_count = (guards ==
nullptr) ? 0 : guards->
length();
3359 Trace new_trace(*trace);
3371 bool generate_full_check_inline =
false;
3372 if (
v8_flags.regexp_optimization &&
3377 fall_through_on_failure,
this)) {
3383 if (!fall_through_on_failure) {
3388 generate_full_check_inline =
true;
3391 if (!fall_through_on_failure) {
3392 macro_assembler->
GoTo(trace->backtrack());
3401 if (
i != first_choice) {
3405 generate_full_check_inline =
true;
3407 if (generate_full_check_inline) {
3408 if (new_trace.
actions() !=
nullptr) {
3411 for (
int j = 0; j < guard_count; j++) {
3414 alternative.
node()->
Emit(compiler, &new_trace);
3425 int preload_characters,
3426 bool next_expects_preload) {
3431 Trace out_of_line_trace(*trace);
3436 int guard_count = (guards ==
nullptr) ? 0 : guards->
length();
3437 if (next_expects_preload) {
3438 Label reload_current_char;
3440 for (
int j = 0; j < guard_count; j++) {
3443 alternative.
node()->
Emit(compiler, &out_of_line_trace);
3444 macro_assembler->
Bind(&reload_current_char);
3449 preload_characters);
3450 macro_assembler->
GoTo(&(alt_gen->
after));
3453 for (
int j = 0; j < guard_count; j++) {
3456 alternative.
node()->
Emit(compiler, &out_of_line_trace);
3463 if (limit_result ==
DONE)
return;
3471 data_.u_position_register.is_capture,
3473 Trace new_trace = *trace;
3480 data_.u_increment_register.reg);
3481 Trace new_trace = *trace;
3488 data_.u_store_register.value);
3489 Trace new_trace = *trace;
3496 data_.u_clear_captures.range_from,
data_.u_clear_captures.range_to));
3497 Trace new_trace = *trace;
3504 if (!trace->is_trivial()) {
3505 trace->Flush(compiler,
this);
3507 assembler->WriteCurrentPositionToRegister(
3508 data_.u_submatch.current_position_register, 0);
3509 assembler->WriteStackPointerToRegister(
3510 data_.u_submatch.stack_pointer_register);
3515 int start_pos_reg =
data_.u_empty_match_check.start_register;
3517 int rep_reg =
data_.u_empty_match_check.repetition_register;
3519 bool know_dist = trace->GetStoredPosition(start_pos_reg, &stored_pos);
3520 if (know_dist && !has_minimum && stored_pos == trace->cp_offset()) {
3523 assembler->GoTo(trace->backtrack());
3524 }
else if (know_dist && stored_pos < trace->cp_offset()) {
3528 }
else if (!trace->is_trivial()) {
3529 trace->Flush(compiler,
this);
3531 Label skip_empty_check;
3535 int limit =
data_.u_empty_match_check.repetition_limit;
3536 assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check);
3540 assembler->IfRegisterEqPos(
data_.u_empty_match_check.start_register,
3541 trace->backtrack());
3542 assembler->Bind(&skip_empty_check);
3548 if (!trace->is_trivial()) {
3549 trace->Flush(compiler,
this);
3552 assembler->ReadCurrentPositionFromRegister(
3553 data_.u_submatch.current_position_register);
3554 assembler->ReadStackPointerFromRegister(
3555 data_.u_submatch.stack_pointer_register);
3561 int clear_registers_from =
data_.u_submatch.clear_register_from;
3562 Label clear_registers_backtrack;
3563 Trace new_trace = *trace;
3567 assembler->Bind(&clear_registers_backtrack);
3569 assembler->ClearRegisters(clear_registers_from, clear_registers_to);
3571 DCHECK(trace->backtrack() ==
nullptr);
3572 assembler->Backtrack();
3576 compiler->set_flags(
flags());
3587 if (!trace->is_trivial()) {
3588 trace->Flush(compiler,
this);
3593 if (limit_result ==
DONE)
return;
3599 if (IsIgnoreCase(compiler->flags())) {
3602 unicode, trace->backtrack());
3605 trace->backtrack());
3612 assembler->CheckNotInSurrogatePair(trace->cp_offset(), trace->backtrack());
3618 int element_count =
elements()->length();
3622 for (
int i = 0;
i < element_count;
i++) {
3625 cp_offset += elm.
length();
3643class AssertionPropagator :
public AllStatic {
3645 static void VisitText(
TextNode* that) {}
3647 static void VisitAction(ActionNode* that) {
3650 that->info()->AddFromFollowing(that->on_success()->info());
3653 static void VisitChoice(ChoiceNode* that,
int i) {
3656 that->info()->AddFromFollowing(that->alternatives()->at(
i).node()->info());
3659 static void VisitLoopChoiceContinueNode(LoopChoiceNode* that) {
3660 that->info()->AddFromFollowing(that->continue_node()->info());
3663 static void VisitLoopChoiceLoopNode(LoopChoiceNode* that) {
3664 that->info()->AddFromFollowing(that->loop_node()->info());
3667 static void VisitNegativeLookaroundChoiceLookaroundNode(
3668 NegativeLookaroundChoiceNode* that) {
3672 static void VisitNegativeLookaroundChoiceContinueNode(
3673 NegativeLookaroundChoiceNode* that) {
3677 static void VisitBackReference(BackReferenceNode* that) {}
3679 static void VisitAssertion(AssertionNode* that) {}
3685class EatsAtLeastPropagator :
public AllStatic {
3687 static void VisitText(TextNode* that) {
3689 if (!that->read_backward()) {
3692 uint8_t eats_at_least = base::saturated_cast<uint8_t>(
3693 that->Length() + that->on_success()
3694 ->eats_at_least_info()
3695 ->eats_at_least_from_not_start);
3696 that->set_eats_at_least_info(EatsAtLeastInfo(eats_at_least));
3700 static void VisitAction(ActionNode* that) {
3701 switch (that->action_type()) {
3712 that->set_eats_at_least_info(
3713 *that->success_node()->on_success()->eats_at_least_info());
3719 DCHECK(that->eats_at_least_info()->IsZero());
3725 that->set_eats_at_least_info(
3726 that->on_success()->EatsAtLeastFromLoopEntry());
3734 that->set_eats_at_least_info(*that->on_success()->eats_at_least_info());
3739 static void VisitChoice(ChoiceNode* that,
int i) {
3742 EatsAtLeastInfo eats_at_least =
3743 i == 0 ? EatsAtLeastInfo(UINT8_MAX) : *that->eats_at_least_info();
3744 eats_at_least.SetMin(
3745 *that->alternatives()->at(
i).node()->eats_at_least_info());
3746 that->set_eats_at_least_info(eats_at_least);
3749 static void VisitLoopChoiceContinueNode(LoopChoiceNode* that) {
3750 if (!that->read_backward()) {
3751 that->set_eats_at_least_info(
3752 *that->continue_node()->eats_at_least_info());
3756 static void VisitLoopChoiceLoopNode(LoopChoiceNode* that) {}
3758 static void VisitNegativeLookaroundChoiceLookaroundNode(
3759 NegativeLookaroundChoiceNode* that) {}
3761 static void VisitNegativeLookaroundChoiceContinueNode(
3762 NegativeLookaroundChoiceNode* that) {
3763 that->set_eats_at_least_info(*that->continue_node()->eats_at_least_info());
3766 static void VisitBackReference(BackReferenceNode* that) {
3767 if (!that->read_backward()) {
3768 that->set_eats_at_least_info(*that->on_success()->eats_at_least_info());
3772 static void VisitAssertion(AssertionNode* that) {
3773 EatsAtLeastInfo eats_at_least = *that->on_success()->eats_at_least_info();
3780 eats_at_least.eats_at_least_from_not_start = UINT8_MAX;
3782 that->set_eats_at_least_info(eats_at_least);
3793template <
typename... Propagators>
3804 if (check.HasOverflowed()) {
3805 if (
v8_flags.correctness_fuzzer_suppressions) {
3806 FATAL(
"Analysis: Aborting on stack overflow");
3808 fail(RegExpError::kAnalysisStackOverflow);
3811 if (that->info()->been_analyzed || that->info()->being_analyzed)
return;
3812 that->info()->being_analyzed =
true;
3814 that->info()->being_analyzed =
false;
3815 that->info()->been_analyzed =
true;
3833#define STATIC_FOR_EACH(expr) \
3835 int dummy[] = {((expr), 0)...}; \
3843 that->CalculateOffsets();
3857 for (
int i = 0;
i < that->alternatives()->
length();
i++) {
3865 DCHECK_EQ(that->alternatives()->length(), 2);
3894 DCHECK_EQ(that->alternatives()->length(), 2);
3899 Propagators::VisitNegativeLookaroundChoiceLookaroundNode(that));
3904 Propagators::VisitNegativeLookaroundChoiceContinueNode(that));
3919#undef STATIC_FOR_EACH
3936 isolate, is_one_byte, flags);
3937 DCHECK_EQ(node->info()->been_analyzed,
false);
3945 bool not_at_start) {
3958 budget = (budget - 1) / alts->
length();
3959 for (
int i = 0;
i < alts->
length();
i++) {
3961 if (alt.
guards() !=
nullptr && alt.
guards()->length() != 0) {
3973 if (initial_offset >= bm->
length())
return;
3975 int offset = initial_offset;
3979 if (initial_offset == 0)
set_bm_info(not_at_start, bm);
3987 if (initial_offset == 0)
set_bm_info(not_at_start, bm);
3993 int length = GetCaseIndependentLetters(isolate,
character,
3995 for (
int k = 0; k <
length; k++) {
4009 for (
int k = 0; k < ranges->length(); k++) {
4011 if (
static_cast<int>(range.from()) > max_char)
continue;
4012 int to = std::min(max_char,
static_cast<int>(range.to()));
4020 if (initial_offset == 0)
set_bm_info(not_at_start, bm);
4025 if (initial_offset == 0)
set_bm_info(not_at_start, bm);
4041 zone(), lead_surrogates,
true, on_success);
4051 return optional_step_back;
4060 if (!data->tree->IsAnchoredAtStart() && !IsSticky(
flags())) {
4066 captured_body, data->contains_anchor);
4068 if (data->contains_anchor) {
4075 false, loop_node)));
4076 node = first_step_node;
4085 if (node !=
nullptr) {
4097 data->error = RegExpError::kTooLarge;
static ActionNode * EmptyMatchCheck(int start_register, int repetition_register, int repetition_limit, RegExpNode *on_success)
int stack_pointer_register
static ActionNode * ModifyFlags(RegExpFlags flags, RegExpNode *on_success)
struct v8::internal::ActionNode::@137::@143 u_clear_captures
static ActionNode * StorePosition(int reg, bool is_capture, RegExpNode *on_success)
static ActionNode * PositiveSubmatchSuccess(int stack_pointer_reg, int restore_reg, int clear_capture_count, int clear_capture_from, RegExpNode *on_success)
union v8::internal::ActionNode::@137 data_
static ActionNode * BeginNegativeSubmatch(int stack_pointer_reg, int position_reg, RegExpNode *on_success)
static ActionNode * SetRegisterForLoop(int reg, int val, RegExpNode *on_success)
struct v8::internal::ActionNode::@137::@140 u_position_register
void FillInBMInfo(Isolate *isolate, int offset, int budget, BoyerMooreLookahead *bm, bool not_at_start) override
struct v8::internal::ActionNode::@137::@144 u_modify_flags
@ POSITIVE_SUBMATCH_SUCCESS
@ BEGIN_POSITIVE_SUBMATCH
@ BEGIN_NEGATIVE_SUBMATCH
static ActionNode * IncrementRegister(int reg, RegExpNode *on_success)
struct v8::internal::ActionNode::@137::@138 u_store_register
void Emit(RegExpCompiler *compiler, Trace *trace) override
struct v8::internal::ActionNode::@137::@139 u_increment_register
void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int filled_in, bool not_at_start) override
static ActionNode * BeginPositiveSubmatch(int stack_pointer_reg, int position_reg, RegExpNode *body, ActionNode *success_node)
struct v8::internal::ActionNode::@137::@141 u_submatch
struct v8::internal::ActionNode::@137::@142 u_empty_match_check
static ActionNode * ClearCaptures(Interval range, RegExpNode *on_success)
AlternativeGenerationList(int count, Zone *zone)
~AlternativeGenerationList()
AlternativeGeneration * at(int i)
ZoneList< AlternativeGeneration * > alt_gens_
QuickCheckDetails quick_check_details
void EnsureAnalyzed(RegExpNode *that)
Analysis(Isolate *isolate, bool is_one_byte, RegExpFlags flags)
void VisitAssertion(AssertionNode *that) override
void VisitLoopChoice(LoopChoiceNode *that) override
DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis)
void VisitText(TextNode *that) override
void VisitChoice(ChoiceNode *that) override
void fail(RegExpError error)
void set_flags(RegExpFlags flags)
void VisitEnd(EndNode *that) override
void VisitAction(ActionNode *that) override
RegExpFlags flags() const
void VisitBackReference(BackReferenceNode *that) override
void VisitNegativeLookaroundChoice(NegativeLookaroundChoiceNode *that) override
Isolate * isolate() const
void EmitBoundaryCheck(RegExpCompiler *compiler, Trace *trace)
void FillInBMInfo(Isolate *isolate, int offset, int budget, BoyerMooreLookahead *bm, bool not_at_start) override
void BacktrackIfPrevious(RegExpCompiler *compiler, Trace *trace, IfPrevious backtrack_if_previous)
void Emit(RegExpCompiler *compiler, Trace *trace) override
void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int filled_in, bool not_at_start) override
void FillInBMInfo(Isolate *isolate, int offset, int budget, BoyerMooreLookahead *bm, bool not_at_start) override
void Emit(RegExpCompiler *compiler, Trace *trace) override
RegExpCompiler * compiler()
void SetInterval(int map_number, const Interval &interval)
ZoneList< BoyerMoorePositionInfo * > * bitmaps_
void SetAll(int map_number)
int GetSkipTable(int min_lookahead, int max_lookahead, DirectHandle< ByteArray > boolean_skip_table, DirectHandle< ByteArray > nibble_table=DirectHandle< ByteArray >{})
int Count(int map_number)
RegExpCompiler * compiler_
void SetRest(int from_map)
BoyerMoorePositionInfo * at(int i)
BoyerMooreLookahead(int length, RegExpCompiler *compiler, Zone *zone)
bool FindWorthwhileInterval(int *from, int *to)
int FindBestInterval(int max_number_of_chars, int old_biggest_points, int *from, int *to)
void EmitSkipInstructions(RegExpMacroAssembler *masm)
void Set(int map_number, int character)
static constexpr int kMapSize
void SetInterval(const Interval &interval)
std::bitset< kMapSize > Bitset
static void Canonicalize(ZoneList< CharacterRange > *ranges)
static V8_EXPORT_PRIVATE void AddCaseEquivalents(Isolate *isolate, Zone *zone, ZoneList< CharacterRange > *ranges, bool is_one_byte)
static void ClampToOneByte(ZoneList< CharacterRange > *ranges)
static ZoneList< CharacterRange > * List(Zone *zone, CharacterRange range)
static CharacterRange Range(base::uc32 from, base::uc32 to)
int EmitOptimizedUnanchoredSearch(RegExpCompiler *compiler, Trace *trace)
ZoneList< GuardedAlternative > * alternatives_
void AddAlternative(GuardedAlternative node)
int GreedyLoopTextLengthForAlternative(GuardedAlternative *alternative)
ZoneList< GuardedAlternative > * alternatives()
int CalculatePreloadCharacters(RegExpCompiler *compiler, int eats_at_least)
Trace * EmitGreedyLoop(RegExpCompiler *compiler, Trace *trace, AlternativeGenerationList *alt_gens, PreloadState *preloads, GreedyLoopState *greedy_loop_state, int text_length)
void FillInBMInfo(Isolate *isolate, int offset, int budget, BoyerMooreLookahead *bm, bool not_at_start) override
void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start) override
virtual bool try_to_emit_quick_check_for_alternative(bool is_first)
void EmitChoices(RegExpCompiler *compiler, AlternativeGenerationList *alt_gens, int first_choice, Trace *trace, PreloadState *preloads)
RegExpNode * FilterOneByte(int depth, RegExpCompiler *compiler) override
void EmitOutOfLineContinuation(RegExpCompiler *compiler, Trace *trace, GuardedAlternative alternative, AlternativeGeneration *alt_gen, int preload_characters, bool next_expects_preload)
void SetUpPreLoad(RegExpCompiler *compiler, Trace *current_trace, PreloadState *preloads)
void AssertGuardsMentionRegisters(Trace *trace)
void GenerateGuard(RegExpMacroAssembler *macro_assembler, Guard *guard, Trace *trace)
void Emit(RegExpCompiler *compiler, Trace *trace) override
V8_INLINE bool is_null() const
V8_EXPORT_PRIVATE bool Get(unsigned value) const
void Set(unsigned value, Zone *zone)
void Emit(RegExpCompiler *compiler, Trace *trace) override
Handle< ByteArray > NewByteArray(int length, AllocationType allocation=AllocationType::kYoung)
Isolate * isolate() const
int Frequency(int in_character)
Trace counter_backtrack_trace_
Trace * counter_backtrack_trace()
GreedyLoopState(bool not_at_start)
void AddGuard(Guard *guard, Zone *zone)
ZoneList< Guard * > * guards()
v8::internal::Factory * factory()
IterationDecrementer(const IterationDecrementer &)=delete
IterationDecrementer(LoopChoiceNode *node)
IterationDecrementer & operator=(const IterationDecrementer &)=delete
V8_INLINE bool is_linked() const
void AddContinueAlternative(GuardedAlternative alt)
void AddLoopAlternative(GuardedAlternative alt)
RegExpNode * FilterOneByte(int depth, RegExpCompiler *compiler) override
void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start) override
void Emit(RegExpCompiler *compiler, Trace *trace) override
void FillInBMInfo(Isolate *isolate, int offset, int budget, BoyerMooreLookahead *bm, bool not_at_start) override
void GetQuickCheckDetailsFromLoopEntry(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start) override
EatsAtLeastInfo EatsAtLeastFromLoopEntry() override
LoopInitializationMarker & operator=(const LoopInitializationMarker &)=delete
~LoopInitializationMarker()
LoopInitializationMarker(LoopChoiceNode *node)
LoopInitializationMarker(const LoopInitializationMarker &)=delete
static constexpr int kLookaroundIndex
static constexpr int kContinueIndex
void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start) override
RegExpNode * FilterOneByte(int depth, RegExpCompiler *compiler) override
void Emit(RegExpCompiler *compiler, Trace *trace) override
void Merge(QuickCheckDetails *other, int from_index)
void set_characters(int characters)
Position * positions(int index)
bool Rationalize(bool one_byte)
void Advance(int by, bool one_byte)
RecursionCheck(RegExpCompiler *compiler)
RegExpCompiler * compiler_
base::Vector< const base::uc16 > data() const
void AppendToText(RegExpText *text, Zone *zone) override
static RegExpNode * ToNode(RegExpTree *body, int index, RegExpCompiler *compiler, RegExpNode *on_success)
bool is_standard(Zone *zone)
void AppendToText(RegExpText *text, Zone *zone) override
ZoneList< CharacterRange > * ranges(Zone *zone)
static const int kMaxRecursion
int UnicodeLookaroundStackRegister()
void IncrementRecursionDepth()
void set_flags(RegExpFlags flags)
ZoneVector< RegExpNode * > * work_list_
RegExpMacroAssembler * macro_assembler()
static const int kNoRegister
RegExpNode * OptionallyStepBackToLeadSurrogate(RegExpNode *on_success)
RegExpMacroAssembler * macro_assembler_
int UnicodeLookaroundPositionRegister()
RegExpNode * PreprocessRegExp(RegExpCompileData *data, bool is_one_byte)
RegExpFlags flags() const
RegExpCompiler(Isolate *isolate, Zone *zone, int capture_count, RegExpFlags flags, bool is_one_byte)
Isolate * isolate() const
void ToNodeCheckForStackOverflow()
FrequencyCollator * frequency_collator()
CompilationResult Assemble(Isolate *isolate, RegExpMacroAssembler *assembler, RegExpNode *start, int capture_count, DirectHandle< String > pattern)
void DecrementRecursionDepth()
RegExpNode * on_match_success() const
RegExpNode * ForMatch(RegExpNode *match)
virtual void SkipUntilBitInTable(int cp_offset, Handle< ByteArray > table, Handle< ByteArray > nibble_table, int advance_by)=0
static constexpr int kTableSize
static constexpr int kMaxCPOffset
virtual void Bind(Label *label)=0
virtual void PushBacktrack(Label *label)=0
virtual void AbortedCodeGeneration()
static constexpr int kTableSizeBits
static constexpr int kMinCPOffset
static constexpr int kTableMask
virtual void CheckCharacterAfterAnd(unsigned c, unsigned and_with, Label *on_equal)=0
virtual void CheckCharacter(unsigned c, Label *on_equal)=0
virtual void ReadCurrentPositionFromRegister(int reg)=0
virtual DirectHandle< HeapObject > GetCode(DirectHandle< String > source, RegExpFlags flags)=0
virtual void IfRegisterLT(int reg, int comparand, Label *if_lt)=0
virtual void PushCurrentPosition()=0
virtual void CheckGreedyLoop(Label *on_tos_equals_current_position)=0
virtual void AdvanceCurrentPosition(int by)=0
V8_EXPORT_PRIVATE void LoadCurrentCharacter(int cp_offset, Label *on_end_of_input, bool check_bounds=true, int characters=1, int eats_at_least=kUseCharactersValue)
static constexpr int kMaxRegister
virtual void BindJumpTarget(Label *label)
Isolate * isolate() const
virtual void GoTo(Label *label)=0
virtual bool SkipUntilBitInTableUseSimd(int advance_by)
virtual void IfRegisterGE(int reg, int comparand, Label *if_ge)=0
void set_bm_info(bool not_at_start, BoyerMooreLookahead *bm)
virtual EatsAtLeastInfo EatsAtLeastFromLoopEntry()
virtual void FillInBMInfo(Isolate *isolate, int offset, int budget, BoyerMooreLookahead *bm, bool not_at_start)
virtual RegExpNode * GetSuccessorOfOmnivorousTextNode(RegExpCompiler *compiler)
BoyerMooreLookahead * bm_info(bool not_at_start)
bool KeepRecursing(RegExpCompiler *compiler)
LimitResult LimitVersions(RegExpCompiler *compiler, Trace *trace)
void SaveBMInfo(BoyerMooreLookahead *bm, bool not_at_start, int offset)
uint32_t EatsAtLeast(bool not_at_start)
bool EmitQuickCheck(RegExpCompiler *compiler, Trace *bounds_check_trace, Trace *trace, bool preload_has_checked_bounds, Label *on_possible_success, QuickCheckDetails *details_return, bool fall_through_on_failure, ChoiceNode *predecessor)
virtual RegExpNode * FilterOneByte(int depth, RegExpCompiler *compiler)
virtual void GetQuickCheckDetailsFromLoopEntry(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)
virtual void Emit(RegExpCompiler *compiler, Trace *trace)=0
static const int kRecursionBudget
static const int kNodeIsTooComplexForGreedyLoops
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)=0
static RegExpNode * ToNode(int min, int max, bool is_greedy, RegExpTree *body, RegExpCompiler *compiler, RegExpNode *on_success, bool not_at_start=false)
void AppendToText(RegExpText *text, Zone *zone) override
ZoneList< TextElement > * elements()
virtual void AppendToText(RegExpText *text, Zone *zone)
static const int kInfinity
RegExpNode * FilterSuccessor(int depth, RegExpCompiler *compiler)
RegExpNode * on_success()
RegExpNode * FilterOneByte(int depth, RegExpCompiler *compiler) override
bool HasOverflowed() const
static const uint32_t kMaxOneByteCharCodeU
static const int32_t kMaxOneByteCharCode
static const uint32_t kMaxUtf16CodeUnitU
static const base::uc32 kMaxCodePoint
static const int kMaxUtf16CodeUnit
TextType text_type() const
TextElement(TextType text_type, RegExpTree *tree)
RegExpClassRanges * class_ranges() const
static TextElement ClassRanges(RegExpClassRanges *class_ranges)
void set_cp_offset(int cp_offset)
static TextElement Atom(RegExpAtom *atom)
RegExpAtom * atom() const
static TextNode * CreateForSurrogatePair(Zone *zone, CharacterRange lead, ZoneList< CharacterRange > *trail_ranges, bool read_backward, RegExpNode *on_success)
static TextNode * CreateForCharacterRanges(Zone *zone, ZoneList< CharacterRange > *ranges, bool read_backward, RegExpNode *on_success)
RegExpNode * GetSuccessorOfOmnivorousTextNode(RegExpCompiler *compiler) override
ZoneList< TextElement > * elements()
void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start) override
int GreedyLoopTextLength() override
RegExpNode * FilterOneByte(int depth, RegExpCompiler *compiler) override
void Emit(RegExpCompiler *compiler, Trace *trace) override
void TextEmitPass(RegExpCompiler *compiler, TextEmitPassType pass, bool preloaded, Trace *trace, bool first_element_checked, int *checked_up_to)
void FillInBMInfo(Isolate *isolate, int offset, int budget, BoyerMooreLookahead *bm, bool not_at_start) override
void MakeCaseIndependent(Isolate *isolate, bool is_one_byte, RegExpFlags flags)
ActionNode::ActionType action_type()
QuickCheckDetails * quick_check_performed()
void PerformDeferredActions(RegExpMacroAssembler *macro, int max_register, const DynamicBitSet &affected_registers, DynamicBitSet *registers_to_pop, DynamicBitSet *registers_to_clear, Zone *zone)
void set_stop_node(RegExpNode *node)
int characters_preloaded()
void Flush(RegExpCompiler *compiler, RegExpNode *successor)
void set_at_start(TriBool at_start)
int characters_preloaded_
DeferredAction * actions_
void AdvanceCurrentPositionInTrace(int by, RegExpCompiler *compiler)
void set_characters_preloaded(int count)
void add_action(DeferredAction *new_action)
int FindAffectedRegisters(DynamicBitSet *affected_registers, Zone *zone)
void set_quick_check_performed(QuickCheckDetails *d)
DeferredAction * actions()
void RestoreAffectedRegisters(RegExpMacroAssembler *macro, int max_register, const DynamicBitSet ®isters_to_pop, const DynamicBitSet ®isters_to_clear)
bool mentions_reg(int reg)
bool GetStoredPosition(int reg, int *cp_offset)
void set_backtrack(Label *backtrack)
void set_bound_checked_up_to(int to)
QuickCheckDetails quick_check_performed_
void set_loop_label(Label *label)
void set_flush_budget(int to)
void InvalidateCurrentCharacter()
static V8_EXPORT_PRIVATE void FatalProcessOutOfMemory(Isolate *isolate, const char *location, const OOMDetails &details=kNoOOMDetails)
VisitMarker(NodeInfo *info)
V8_INLINE int length() const
base::Vector< const T > ToConstVector() const
void Add(const T &element, Zone *zone)
Handle< SharedFunctionInfo > info
ZoneVector< RpoNumber > & result
std::vector< Point > points
constexpr unsigned CountTrailingZeros(T value)
constexpr bool IsPowerOfTwo(T value)
bool contains(const C &container, const T &element)
constexpr int kWordRangeCount
constexpr int kWordRanges[]
constexpr uint32_t kMaxLookaheadForBoyerMoore
constexpr int kBitsPerByte
static const base::uc32 kTrailSurrogateStart
constexpr bool IsEitherUnicode(RegExpFlags f)
ContainedInLattice Combine(ContainedInLattice a, ContainedInLattice b)
bool NeedsUnicodeCaseEquivalents(RegExpFlags flags)
BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL int character
constexpr int kNoRegister
static const base::uc32 kTrailSurrogateEnd
V8_EXPORT_PRIVATE FlagValues v8_flags
RegExpError AnalyzeRegExp(Isolate *isolate, bool is_one_byte, RegExpFlags flags, RegExpNode *node)
static const base::uc32 kLeadSurrogateStart
static const base::uc32 kLeadSurrogateEnd
bool RangeContainsLatin1Equivalents(CharacterRange range)
OptimizedCompilationInfo * info_
RegExpCompiler * compiler_
#define STATIC_FOR_EACH(expr)
#define DEFINE_ACCEPT(Type)
#define FOR_EACH_NODE_TYPE(VISIT)
ZoneList< base::uc16 > * characters_
SmallRegExpTreeVector alternatives_
#define DCHECK_LE(v1, v2)
#define CHECK_IMPLIES(lhs, rhs)
#define DCHECK_NOT_NULL(val)
#define DCHECK_IMPLIES(v1, v2)
#define DCHECK_NE(v1, v2)
#define DCHECK_GE(v1, v2)
#define DCHECK(condition)
#define DCHECK_LT(v1, v2)
#define DCHECK_EQ(v1, v2)
#define DCHECK_GT(v1, v2)
#define V8_EXPORT_PRIVATE
uint8_t eats_at_least_from_not_start
static const int kEatsAtLeastNotYetInitialized
bool preload_has_checked_bounds_
bool determines_perfectly
static CompilationResult RegExpTooBig()
std::unique_ptr< ValueMirror > value