15constexpr int kUndefinedRegisterValue = -1;
16constexpr int kUndefinedMatchIndexValue = -1;
17constexpr uint64_t kUndefinedClockValue = -1;
19template <
class Character>
21 base::Vector<const Character> context,
int position) {
34 if (
position == context.length())
return true;
37 if (context.length() == 0) {
41 }
else if (
position == context.length()) {
53base::Vector<RegExpInstruction> ToInstructionVector(
56 RegExpInstruction* inst_begin =
57 reinterpret_cast<RegExpInstruction*
>(raw_bytes->begin());
58 int inst_num = raw_bytes->length() /
sizeof(RegExpInstruction);
59 DCHECK_EQ(
sizeof(RegExpInstruction) * inst_num, raw_bytes->length());
60 return base::Vector<RegExpInstruction>(inst_begin, inst_num);
63template <
class Character>
64base::Vector<const Character> ToCharacterVector(
68base::Vector<const uint8_t> ToCharacterVector<uint8_t>(
71 String::FlatContent content = str->GetFlatContent(no_gc);
72 DCHECK(content.IsOneByte());
73 return content.ToOneByteVector();
77base::Vector<const base::uc16> ToCharacterVector<base::uc16>(
80 String::FlatContent content = str->GetFlatContent(no_gc);
81 DCHECK(content.IsTwoByte());
82 return content.ToUC16Vector();
87 static base::Vector<int> Filter(
89 base::Vector<uint64_t> quantifiers_clocks,
90 base::Vector<uint64_t> capture_clocks,
91 std::optional<base::Vector<uint64_t>> lookaround_clocks,
92 base::Vector<int> filtered_registers,
93 base::Vector<const RegExpInstruction> bytecode,
Zone* zone) {
106 return FilterGroups(
pc, bytecode, zone)
107 .Run(
registers, quantifiers_clocks, capture_clocks, lookaround_clocks,
112 FilterGroups(
int pc, base::Vector<const RegExpInstruction> bytecode,
145 base::Vector<int> Run(base::Vector<int>
registers_,
146 base::Vector<uint64_t> quantifiers_clocks_,
147 base::Vector<uint64_t> capture_clocks_,
148 std::optional<base::Vector<uint64_t>> lookaround_clocks,
149 base::Vector<int> filtered_registers_) {
155 switch (
instr.opcode) {
159 if (!IsAtNodeEnd()) {
169 int group_id =
instr.payload.group_id;
172 int register_id = 2 * group_id;
173 if (capture_clocks_[register_id] >= max_clock_ &&
174 capture_clocks_[register_id] != kUndefinedClockValue) {
175 filtered_registers_[register_id] =
registers_[register_id];
176 filtered_registers_[register_id + 1] =
registers_[register_id + 1];
188 int quantifier_id =
instr.payload.quantifier_id;
191 if (quantifiers_clocks_[quantifier_id] >= max_clock_) {
192 max_clock_ = quantifiers_clocks_[quantifier_id];
205 if (!lookaround_clocks.has_value() ||
206 lookaround_clocks->at(
instr.payload.lookaround_id) >=
224 return filtered_registers_;
240template <
class Character>
241class NfaInterpreter {
351 zone->AllocateArray<LastInputIndex>(bytecode->length()),
375 std::optional<struct Lookaround> lookaround;
376 bool in_lookaround =
false;
377 int lookaround_index;
382 DCHECK(!lookaround.has_value());
383 in_lookaround =
true;
387 lookaround_index = inst.payload.lookaround.index();
388 lookaround = Lookaround{.match_pc =
i,
390 .
type = inst.payload.lookaround.type()};
392 if (inst.payload.lookaround.type() ==
404 DCHECK(lookaround.has_value());
407 lookaround->capture_pc =
i + 1;
419 in_lookaround =
false;
454 if (
v8_flags.experimental_regexp_engine_capture_group_opt) {
464 sizeof(InterpreterThread);
476 int FindMatches(int32_t* output_registers,
int output_register_count) {
479 if (!only_captureless_lookbehinds_) {
480 int err_code = FillLookaroundTable();
487 while (match_num != max_match_num) {
488 int err_code = FindNextMatch();
491 if (!FoundMatch())
break;
495 err_code = GetFilteredRegisters(*best_match_thread_,
registers);
508 const int match_length = match_end - match_begin;
509 if (match_length != 0) {
510 SetInputIndex(match_end);
511 }
else if (match_end ==
input_.length()) {
513 SetInputIndex(match_end);
518 SetInputIndex(match_end + 1);
533 class InterpreterThread {
535 enum class ConsumedCharacter { DidConsume, DidNotConsume };
537 InterpreterThread(
int pc,
int* register_array_begin,
538 int* lookaround_match_index_array_begin,
539 uint64_t* quantifier_clock_array_begin,
540 uint64_t* capture_clock_array_begin,
541 uint64_t* lookaround_clock_array_begin,
542 ConsumedCharacter consumed_since_last_quantifier)
595 for (InterpreterThread t : blocked_threads_) {
600 for (InterpreterThread t : active_threads_) {
611 int err_code = RunActiveThreadsToEnd();
626 InterpreterThread& main_thread) {
629 DCHECK(!only_captureless_lookbehinds_);
639 if (GetLookaroundMatchIndexArray(main_thread)[
i] ==
640 kUndefinedMatchIndexValue) {
650 for (InterpreterThread t : blocked_threads_) {
655 for (InterpreterThread t : active_threads_) {
667 main_thread.pc = lookaround.capture_pc;
668 main_thread.consumed_since_last_quantifier =
669 InterpreterThread::ConsumedCharacter::DidConsume;
672 int err_code = RunActiveThreadsToEnd();
698 while ((reverse_ ? ((0 < input_index_ && input_index_ <=
input_.length()))
699 : (0 <= input_index_ && input_index_ <
input_.length())) &&
717 static constexpr int kTicksBetweenInterruptHandling = 64;
718 if (input_index_ % kTicksBetweenInterruptHandling == 0) {
719 int err_code = HandleInterrupts();
724 FlushBlockedThreads(input_char);
737 StackLimitCheck check(isolate_);
744 if (check.JsHasOverflowed()) {
746 }
else if (check.InterruptRequested()) {
751 HandleScope handles(isolate_);
752 DirectHandle<TrustedByteArray> bytecode_handle(bytecode_object_,
754 DirectHandle<String> input_handle(input_object_, isolate_);
756 if (check.JsHasOverflowed()) {
762 }
else if (check.InterruptRequested()) {
767 const bool was_one_byte =
775 if (IsException(
result, isolate_)) {
789 bytecode_ = ToInstructionVector(bytecode_object_, no_gc_);
791 input_ = ToCharacterVector<Character>(input_object_, no_gc_);
798 void SetInputIndex(
int new_input_index) {
810 int FindNextMatch() {
828 for (InterpreterThread t : blocked_threads_) {
833 for (InterpreterThread t : active_threads_) {
839 DestroyThread(*best_match_thread_);
845 if (only_captureless_lookbehinds_) {
851 int err_code = RunActiveThreadsToEnd();
865 int RunActiveThread(InterpreterThread t) {
878 if (IsPcProcessed(t.pc, t.consumed_since_last_quantifier)) {
882 MarkPcProcessed(t.pc, t.consumed_since_last_quantifier);
884 RegExpInstruction inst =
bytecode_[t.pc];
886 switch (inst.opcode) {
893 if (!SatisfiesAssertion(inst.payload.assertion_type, input_,
901 InterpreterThread fork = NewUninitializedThread(inst.payload.pc);
902 fork.consumed_since_last_quantifier =
903 t.consumed_since_last_quantifier;
905 base::Vector<int> fork_registers = GetRegisterArray(fork);
906 base::Vector<int> t_registers = GetRegisterArray(t);
907 DCHECK_EQ(fork_registers.length(), t_registers.length());
908 std::copy(t_registers.begin(), t_registers.end(),
909 fork_registers.begin());
911 if (
v8_flags.experimental_regexp_engine_capture_group_opt) {
912 base::Vector<uint64_t> fork_quantifier_clocks =
913 GetQuantifierClockArray(fork);
914 base::Vector<uint64_t> t_fork_quantifier_clocks =
915 GetQuantifierClockArray(t);
916 DCHECK_EQ(fork_quantifier_clocks.length(),
917 t_fork_quantifier_clocks.length());
918 std::copy(t_fork_quantifier_clocks.begin(),
919 t_fork_quantifier_clocks.end(),
920 fork_quantifier_clocks.begin());
922 base::Vector<uint64_t> fork_capture_clocks =
923 GetCaptureClockArray(fork);
924 base::Vector<uint64_t> t_fork_capture_clocks =
925 GetCaptureClockArray(t);
927 t_fork_capture_clocks.length());
928 std::copy(t_fork_capture_clocks.begin(),
929 t_fork_capture_clocks.end(), fork_capture_clocks.begin());
931 if (!only_captureless_lookbehinds_) {
932 base::Vector<int> fork_lookaround_match_index =
933 GetLookaroundMatchIndexArray(fork);
934 base::Vector<int> t_fork_lookaround_match_index =
935 GetLookaroundMatchIndexArray(t);
936 DCHECK_EQ(fork_lookaround_match_index.length(),
937 t_fork_lookaround_match_index.length());
938 std::copy(t_fork_lookaround_match_index.begin(),
939 t_fork_lookaround_match_index.end(),
940 fork_lookaround_match_index.begin());
942 base::Vector<uint64_t> fork_lookaround_clocks =
943 GetLookaroundClockArray(fork);
944 base::Vector<uint64_t> t_fork_lookaround_clocks =
945 GetLookaroundClockArray(t);
946 DCHECK_EQ(fork_lookaround_clocks.length(),
947 t_fork_lookaround_clocks.length());
948 std::copy(t_fork_lookaround_clocks.begin(),
949 t_fork_lookaround_clocks.end(),
950 fork_lookaround_clocks.begin());
956 if (
v8_flags.experimental_regexp_engine_capture_group_opt) {
957 int err_code = CheckMemoryConsumption();
965 t.pc = inst.payload.pc;
969 DestroyThread(*best_match_thread_);
973 for (InterpreterThread s : active_threads_) {
979 GetQuantifierClockArray(t)[inst.payload.quantifier_id] =
clock;
985 register_count_per_match_);
986 GetRegisterArray(t)[inst.payload.register_index] =
987 kUndefinedRegisterValue;
992 register_count_per_match_);
993 GetRegisterArray(t)[inst.payload.register_index] =
input_index_;
994 if (
v8_flags.experimental_regexp_engine_capture_group_opt) {
995 GetCaptureClockArray(t)[inst.payload.register_index] =
clock;
1005 t.consumed_since_last_quantifier =
1006 InterpreterThread::ConsumedCharacter::DidNotConsume;
1013 if (t.consumed_since_last_quantifier ==
1014 InterpreterThread::ConsumedCharacter::DidNotConsume) {
1026 DestroyThread(*best_match_thread_);
1030 for (InterpreterThread s : active_threads_) {
1040 if (!only_captureless_lookbehinds_) {
1057 if (!only_captureless_lookbehinds_) {
1061 if ((*lookaround_table_)[inst.payload.lookaround.index()]
1063 inst.payload.lookaround.is_positive()) {
1069 if (inst.payload.lookaround.is_positive() &&
1070 !only_captureless_lookbehinds_) {
1071 GetLookaroundClockArray(t)[inst.payload.lookaround.index()] =
1073 GetLookaroundMatchIndexArray(t)[inst.payload.lookaround.index()] =
1080 const int32_t lookbehind_index = inst.payload.lookaround.index();
1084 inst.payload.lookaround.is_positive()) {
1099 int RunActiveThreads() {
1111 void FlushBlockedThreads(
base::uc16 input_char) {
1120 bool has_matched =
false;
1123 ranges =
bytecode_[t.pc].payload.num_ranges;
1128 int next_pc = t.pc + ranges;
1131 for (
int pc = t.pc;
pc < next_pc; ++
pc) {
1134 RegExpInstruction::Uc16Range range = inst.payload.consume_range;
1135 if (input_char >= range.min && input_char <= range.max) {
1138 t.consumed_since_last_quantifier =
1139 InterpreterThread::ConsumedCharacter::DidConsume;
1155 size_t ApproximateTotalMemoryUsage() {
1162 int CheckMemoryConsumption() {
1172 v8_flags.experimental_regexp_engine_capture_group_opt_max_memory_usage *
1178 base::Vector<int> GetRegisterArray(InterpreterThread t) {
1179 return base::Vector<int>(t.register_array_begin, register_count_per_match_);
1181 base::Vector<int> GetLookaroundMatchIndexArray(InterpreterThread t) {
1185 return base::Vector<int>(t.lookaround_match_index_array_begin,
1189 base::Vector<uint64_t> GetQuantifierClockArray(InterpreterThread t) {
1193 return base::Vector<uint64_t>(t.quantifier_clock_array_begin,
1196 base::Vector<uint64_t> GetCaptureClockArray(InterpreterThread t) {
1200 return base::Vector<uint64_t>(t.captures_clock_array_begin,
1201 register_count_per_match_);
1203 base::Vector<uint64_t> GetLookaroundClockArray(InterpreterThread t) {
1207 return base::Vector<uint64_t>(t.lookaround_clock_array_begin,
1211 int* NewRegisterArrayUninitialized() {
1215 int* NewRegisterArray(
int fill_value) {
1216 int* array_begin = NewRegisterArrayUninitialized();
1218 std::fill(array_begin, array_end, fill_value);
1224 register_count_per_match_);
1227 int* NewLookaroundMatchIndexArrayUninitialized() {
1233 int* NewLookaroundMatchIndexArray(
int fill_value) {
1234 int* array_begin = NewLookaroundMatchIndexArrayUninitialized();
1236 std::fill(array_begin, array_end, fill_value);
1246 uint64_t* NewQuantifierClockArrayUninitialized() {
1251 uint64_t* NewQuantifierClockArray(uint64_t fill_value) {
1254 uint64_t* array_begin = NewQuantifierClockArrayUninitialized();
1256 std::fill(array_begin, array_end, fill_value);
1266 uint64_t* NewCaptureClockArrayUninitialized() {
1271 uint64_t* NewCaptureClockArray(uint64_t fill_value) {
1273 uint64_t* array_begin = NewCaptureClockArrayUninitialized();
1275 std::fill(array_begin, array_end, fill_value);
1279 void FreeCaptureClockArray(uint64_t* capture_clock_array_begin) {
1282 register_count_per_match_);
1285 uint64_t* NewLookaroundClockArrayUninitialized() {
1291 uint64_t* NewLookaroundClockArray(uint64_t fill_value) {
1293 uint64_t* array_begin = NewLookaroundClockArrayUninitialized();
1295 std::fill(array_begin, array_end, fill_value);
1308 InterpreterThread NewEmptyThread(
int pc) {
1309 if (
v8_flags.experimental_regexp_engine_capture_group_opt) {
1310 return InterpreterThread(
1311 pc, NewRegisterArray(kUndefinedRegisterValue),
1312 only_captureless_lookbehinds_
1314 : NewLookaroundMatchIndexArray(kUndefinedMatchIndexValue),
1315 NewQuantifierClockArray(0), NewCaptureClockArray(0),
1316 only_captureless_lookbehinds_ ?
nullptr : NewLookaroundClockArray(0),
1317 InterpreterThread::ConsumedCharacter::DidConsume);
1319 return InterpreterThread(
1320 pc, NewRegisterArray(kUndefinedRegisterValue),
nullptr,
nullptr,
1321 nullptr,
nullptr, InterpreterThread::ConsumedCharacter::DidConsume);
1328 InterpreterThread NewUninitializedThread(
int pc) {
1329 if (
v8_flags.experimental_regexp_engine_capture_group_opt) {
1330 return InterpreterThread(
1331 pc, NewRegisterArrayUninitialized(),
1332 only_captureless_lookbehinds_
1334 : NewLookaroundMatchIndexArrayUninitialized(),
1335 NewQuantifierClockArrayUninitialized(),
1336 NewCaptureClockArrayUninitialized(),
1337 only_captureless_lookbehinds_
1339 : NewLookaroundClockArrayUninitialized(),
1340 InterpreterThread::ConsumedCharacter::DidConsume);
1342 return InterpreterThread(
1343 pc, NewRegisterArrayUninitialized(),
nullptr,
nullptr,
nullptr,
1344 nullptr, InterpreterThread::ConsumedCharacter::DidConsume);
1349 InterpreterThread main_thread, base::Vector<int>& filtered_registers) {
1350 if (!only_captureless_lookbehinds_) {
1351 int err_code = FillLookaroundCaptures(main_thread);
1357 base::Vector<int>
registers = GetRegisterArray(main_thread);
1360 filtered_registers = base::Vector<int>(
1361 NewRegisterArray(kUndefinedRegisterValue), register_count_per_match_);
1366 filtered_registers = FilterGroups::Filter(
1367 *filter_groups_pc_, GetRegisterArray(main_thread),
1368 GetQuantifierClockArray(main_thread),
1369 GetCaptureClockArray(main_thread),
1370 only_captureless_lookbehinds_
1372 : std::optional(GetLookaroundClockArray(main_thread)),
1373 filtered_registers, bytecode_, zone_);
1381 void DestroyThread(InterpreterThread t) {
1382 FreeRegisterArray(t.register_array_begin);
1384 if (
v8_flags.experimental_regexp_engine_capture_group_opt) {
1385 FreeQuantifierClockArray(t.quantifier_clock_array_begin);
1386 FreeCaptureClockArray(t.captures_clock_array_begin);
1388 if (!only_captureless_lookbehinds_) {
1389 FreeLookaroundClockArray(t.lookaround_clock_array_begin);
1390 FreeLookaroundMatchIndexArray(t.lookaround_match_index_array_begin);
1408 bool IsPcProcessed(
int pc,
typename InterpreterThread::ConsumedCharacter
1411 case InterpreterThread::ConsumedCharacter::DidConsume:
1414 case InterpreterThread::ConsumedCharacter::DidNotConsume:
1422 void MarkPcProcessed(
int pc,
typename InterpreterThread::ConsumedCharacter
1425 case InterpreterThread::ConsumedCharacter::DidConsume:
1428 case InterpreterThread::ConsumedCharacter::DidNotConsume:
1441 base::Vector<const RegExpInstruction>
bytecode_;
1459 class LastInputIndex {
1461 LastInputIndex() : LastInputIndex(-1, -1) {}
1489 std::optional<RecyclingZoneAllocator<int>>
1491 std::optional<RecyclingZoneAllocator<uint64_t>>
1494 std::optional<RecyclingZoneAllocator<uint64_t>>
1553 Tagged<String> input,
int start_index, int32_t* output_registers,
1554 int output_register_count,
Zone* zone) {
1558 if (input->GetFlatContent(no_gc).IsOneByte()) {
1559 NfaInterpreter<uint8_t> interpreter(isolate,
call_origin, bytecode,
1560 register_count_per_match, input,
1562 return interpreter.FindMatches(output_registers, output_register_count);
1564 DCHECK(input->GetFlatContent(no_gc).IsTwoByte());
1565 NfaInterpreter<base::uc16> interpreter(isolate,
call_origin, bytecode,
1566 register_count_per_match, input,
1568 return interpreter.FindMatches(output_registers, output_register_count);
#define SBXCHECK_LT(lhs, rhs)
#define SBXCHECK_BOUNDS(index, limit)
#define SBXCHECK_GE(lhs, rhs)
static int FindMatches(Isolate *isolate, RegExp::CallOrigin call_origin, Tagged< TrustedByteArray > bytecode, int capture_count, Tagged< String > input, int start_index, int32_t *output_registers, int output_register_count, Zone *zone)
static constexpr bool kSupportsUnicode
static constexpr int kInternalRegExpException
static constexpr int kInternalRegExpSuccess
static constexpr int kInternalRegExpRetry
static bool IsOneByteRepresentationUnderneath(Tagged< String > string)
ZoneLinkedList< RegExpLookaround * > lookarounds_
ZoneStack< uint64_t > max_clock_stack_
int * lookaround_match_index_array_begin
RecyclingZoneAllocator< int > register_array_allocator_
uint64_t memory_consumption_per_thread_
const int register_count_per_match_
int not_having_consumed_character
base::Vector< LastInputIndex > pc_last_input_index_
bool only_captureless_lookbehinds_
Tagged< TrustedByteArray > bytecode_object_
ZoneStack< int > pc_stack_
std::optional< ZoneVector< ZoneVector< bool > > > lookaround_table_
uint64_t * captures_clock_array_begin
std::optional< int > filter_groups_pc_
std::optional< RecyclingZoneAllocator< uint64_t > > capture_clock_array_allocator_
std::optional< RecyclingZoneAllocator< int > > lookaround_match_index_array_allocator_
std::optional< ZoneList< bool > > lookbehind_table_
const RegExp::CallOrigin call_origin_
uint64_t * quantifier_clock_array_begin
ZoneList< InterpreterThread > active_threads_
int having_consumed_character
ZoneList< InterpreterThread > blocked_threads_
DisallowGarbageCollection no_gc_
ConsumedCharacter consumed_since_last_quantifier
std::optional< RecyclingZoneAllocator< uint64_t > > lookaround_clock_array_allocator_
int * register_array_begin
Tagged< String > input_object_
uint64_t * lookaround_clock_array_begin
std::optional< InterpreterThread > best_match_thread_
base::Vector< const RegExpInstruction > bytecode_
std::optional< RecyclingZoneAllocator< uint64_t > > quantifier_array_allocator_
ZoneVector< RpoNumber > & result
RegListBase< RegisterT > registers
V8_INLINE bool IsLineTerminator(uchar c)
PerThreadAssertScopeDebugOnly< false, SAFEPOINTS_ASSERT, HEAP_ALLOCATION_ASSERT > DisallowGarbageCollection
Tagged(T object) -> Tagged< T >
PerThreadAssertScopeDebugOnly< true, SAFEPOINTS_ASSERT, HEAP_ALLOCATION_ASSERT > AllowGarbageCollection
constexpr bool IsRegExpWord(base::uc32 c)
V8_EXPORT_PRIVATE FlagValues v8_flags
base::SmallVector< RegisterT, kStaticCapacity > registers_
#define DCHECK_LE(v1, v2)
#define DCHECK_NOT_NULL(val)
#define DCHECK_GE(v1, v2)
#define DCHECK(condition)
#define DCHECK_EQ(v1, v2)
#define DCHECK_GT(v1, v2)
@ SET_QUANTIFIER_TO_CLOCK
static bool IsFilter(const RegExpInstruction &instruction)
#define V8_WARN_UNUSED_RESULT