20constexpr base::uc32 kMaxSupportedCodepoint = 0xFFFFu;
25class CanBeHandledVisitor final :
private RegExpVisitor {
28 static bool Check(RegExpTree* tree,
RegExpFlags flags,
int capture_count) {
29 if (!AreSuitableFlags(flags))
return false;
30 CanBeHandledVisitor visitor{flags};
31 tree->Accept(&visitor,
nullptr);
32 return visitor.result_;
42 RegExpFlag::kGlobal | RegExpFlag::kSticky | RegExpFlag::kMultiline |
43 RegExpFlag::kDotAll | RegExpFlag::kLinear;
46 IsUnicode(kAllowedFlags));
47 return (flags & ~kAllowedFlags) == 0;
50 void* VisitDisjunction(RegExpDisjunction* node,
void*)
override {
51 for (RegExpTree* alt : *node->alternatives()) {
52 alt->Accept(
this,
nullptr);
60 void* VisitAlternative(RegExpAlternative* node,
void*)
override {
61 for (RegExpTree* child : *node->nodes()) {
62 child->Accept(
this,
nullptr);
70 void* VisitClassRanges(RegExpClassRanges* node,
void*)
override {
74 void* VisitClassSetOperand(RegExpClassSetOperand* node,
void*)
override {
79 void* VisitClassSetExpression(RegExpClassSetExpression* node,
85 void* VisitAssertion(RegExpAssertion* node,
void*)
override {
89 void* VisitAtom(RegExpAtom* node,
void*)
override {
return nullptr; }
91 void* VisitText(RegExpText* node,
void*)
override {
92 for (TextElement& el : *node->elements()) {
93 el.tree()->Accept(
this,
nullptr);
101 void* VisitQuantifier(RegExpQuantifier* node,
void*)
override {
109 static constexpr int kMaxReplicationFactor = 16;
114 if (node->min() > kMaxReplicationFactor ||
116 node->max() > kMaxReplicationFactor)) {
125 int local_replication;
127 if (node->min() > 0 && node->min_match() > 0) {
129 local_replication = std::max(node->min(), 1);
131 local_replication = node->min() + 1;
134 local_replication = node->max();
138 if (replication_factor_ > kMaxReplicationFactor) {
143 switch (node->quantifier_type()) {
154 node->body()->Accept(
this,
nullptr);
159 void* VisitCapture(RegExpCapture* node,
void*)
override {
160 node->body()->Accept(
this,
nullptr);
164 void* VisitGroup(RegExpGroup* node,
void*)
override {
165 if (
flags() != node->flags()) {
173 if (!AreSuitableFlags(node->flags())) {
178 node->body()->Accept(
this,
nullptr);
182 void* VisitLookaround(RegExpLookaround* node,
void*)
override {
190 if (!
v8_flags.experimental_regexp_engine_capture_group_opt &&
192 node->capture_count() > 0)) {
197 node->body()->Accept(
this,
nullptr);
201 void* VisitBackReference(RegExpBackReference* node,
void*)
override {
207 void* VisitEmpty(RegExpEmpty* node,
void*)
override {
return nullptr; }
224 return CanBeHandledVisitor::Check(tree, flags, capture_count);
243 Label(
const Label&) =
delete;
247 friend class BytecodeAssembler;
251 enum { UNBOUND, BOUND }
state_ = UNBOUND;
258class BytecodeAssembler {
262 explicit BytecodeAssembler(
Zone* zone) :
zone_(zone),
code_(0, zone) {}
264 ZoneList<RegExpInstruction> IntoCode() && {
return std::move(code_); }
272 void ClearRegister(int32_t register_index) {
280 void ConsumeAnyChar() {
284 void RangeCount(int32_t num_ranges) {
288 void Fork(Label& target) {
289 LabelledInstrImpl(RegExpInstruction::Opcode::FORK, target);
292 void Jmp(Label& target) {
293 LabelledInstrImpl(RegExpInstruction::Opcode::JMP, target);
296 void SetRegisterToCp(int32_t register_index) {
304 void StartLookaround(
int lookaround_index,
bool is_positive,
313 void WriteLookaroundTable(
int index) {
317 void ReadLookaroundTable(
int index,
bool is_positive,
323 void SetQuantifierToClock(int32_t quantifier_id) {
327 void FilterQuantifier(int32_t quantifier_id) {
331 void FilterGroup(int32_t group_id) {
335 void FilterLookaround(int32_t lookaround_id) {
339 void FilterChild(Label& target) {
340 LabelledInstrImpl(RegExpInstruction::Opcode::FILTER_CHILD, target);
343 void Bind(Label& target) {
344 DCHECK_EQ(target.state_, Label::UNBOUND);
346 int index =
code_.length();
348 while (target.unbound_patch_list_begin_ != -1) {
349 RegExpInstruction& inst =
code_[target.unbound_patch_list_begin_];
354 target.unbound_patch_list_begin_ = inst.payload.pc;
355 inst.payload.pc =
index;
358 target.state_ = Label::BOUND;
359 target.bound_index_ =
index;
369 if (target.state_ == Label::BOUND) {
370 result.payload.pc = target.bound_index_;
372 DCHECK_EQ(target.state_, Label::UNBOUND);
373 int new_list_begin =
code_.length();
376 result.payload.pc = target.unbound_patch_list_begin_;
378 target.unbound_patch_list_begin_ = new_list_begin;
388class FilterGroupsCompileVisitor final :
private RegExpVisitor {
390 static void CompileFilter(
Zone* zone, RegExpTree* tree,
391 BytecodeAssembler& assembler,
392 const ZoneMap<int, int>& quantifier_id_remapping,
393 const ZoneMap<int, int>& lookaround_id_remapping) {
407 FilterGroupsCompileVisitor visitor(assembler, zone, quantifier_id_remapping,
408 lookaround_id_remapping);
410 tree->Accept(&visitor,
nullptr);
412 while (!visitor.nodes_.empty()) {
413 auto& entry = visitor.nodes_.front();
415 visitor.assembler_.Bind(entry.label);
416 visitor.can_compile_node_ =
true;
417 entry.node->Accept(&visitor,
nullptr);
419 visitor.nodes_.pop_front();
424 FilterGroupsCompileVisitor(BytecodeAssembler& assembler,
Zone* zone,
425 const ZoneMap<int, int>& quantifier_id_remapping,
426 const ZoneMap<int, int>& lookaround_id_remapping)
434 void* VisitDisjunction(RegExpDisjunction* node,
void*)
override {
435 for (RegExpTree* alt : *node->alternatives()) {
436 alt->Accept(
this,
nullptr);
441 void* VisitAlternative(RegExpAlternative* node,
void*)
override {
442 for (RegExpTree* alt : *node->nodes()) {
443 alt->Accept(
this,
nullptr);
448 void* VisitClassRanges(RegExpClassRanges* node,
void*)
override {
452 void* VisitClassSetOperand(RegExpClassSetOperand* node,
void*)
override {
456 void* VisitClassSetExpression(RegExpClassSetExpression* node,
461 void* VisitAssertion(RegExpAssertion* node,
void*)
override {
465 void* VisitAtom(RegExpAtom* node,
void*)
override {
return nullptr; }
467 void* VisitText(RegExpText* node,
void*)
override {
return nullptr; }
469 void* VisitQuantifier(RegExpQuantifier* node,
void*)
override {
470 if (can_compile_node_) {
473 node->body()->Accept(
this,
nullptr);
475 if (node->CaptureRegisters().is_empty()) {
479 nodes_.emplace_back(node);
486 void* VisitCapture(RegExpCapture* node,
void*)
override {
487 if (can_compile_node_) {
490 node->body()->Accept(
this,
nullptr);
492 nodes_.emplace_back(node);
499 void* VisitGroup(RegExpGroup* node,
void*)
override {
500 node->body()->Accept(
this,
nullptr);
504 void* VisitLookaround(RegExpLookaround* node,
void*)
override {
505 if (can_compile_node_) {
508 node->body()->Accept(
this,
nullptr);
510 if (node->CaptureRegisters().is_empty()) {
514 nodes_.emplace_back(node);
521 void* VisitBackReference(RegExpBackReference* node,
void*)
override {
525 void* VisitEmpty(RegExpEmpty* node,
void*)
override {
return nullptr; }
530 explicit BFEntry(RegExpTree* node) :
label(), node(node) {}
553class CompileVisitor :
private RegExpVisitor {
555 static ZoneList<RegExpInstruction> Compile(RegExpTree* tree,
557 CompileVisitor compiler(zone);
559 if (!IsSticky(flags) && !tree->IsAnchoredAtStart()) {
563 compiler.CompileNonGreedyStar(
564 [&]() { compiler.assembler_.ConsumeAnyChar(); });
567 compiler.assembler_.SetRegisterToCp(0);
568 tree->Accept(&compiler,
nullptr);
569 compiler.assembler_.SetRegisterToCp(1);
570 compiler.assembler_.Accept();
602 while (!compiler.lookarounds_.empty()) {
603 auto node = compiler.lookarounds_.front();
604 compiler.CompileLookaround(node);
605 compiler.lookarounds_.pop_front();
608 if (
v8_flags.experimental_regexp_engine_capture_group_opt) {
609 FilterGroupsCompileVisitor::CompileFilter(
610 zone, tree, compiler.assembler_,
611 compiler.quantifier_id_remapping_.value(),
612 compiler.lookaround_id_remapping_);
615 return std::move(compiler.assembler_).IntoCode();
619 explicit CompileVisitor(
Zone* zone)
628 if (
v8_flags.experimental_regexp_engine_capture_group_opt) {
634 void CompileLookaround(RegExpLookaround* lookaround) {
636 assembler_.StartLookaround(RemapLookaround(lookaround->index()),
637 lookaround->is_positive(), lookaround->type());
642 !lookaround->body()->IsAnchoredAtEnd()) ||
644 !lookaround->body()->IsAnchoredAtStart())) {
645 CompileNonGreedyStar([&]() {
assembler_.ConsumeAnyChar(); });
651 lookaround->body()->Accept(
this,
nullptr);
654 assembler_.WriteLookaroundTable(RemapLookaround(lookaround->index()));
657 if (lookaround->capture_count() > 0 && lookaround->is_positive()) {
661 lookaround->body()->Accept(
this,
nullptr);
673 void CompileDisjunction(
int alt_num,
F&& gen_alt) {
704 for (
int i = 0;
i != alt_num - 1; ++
i) {
712 gen_alt(alt_num - 1);
717 void* VisitDisjunction(RegExpDisjunction* node,
void*)
override {
718 ZoneList<RegExpTree*>& alts = *node->alternatives();
719 CompileDisjunction(alts.length(),
720 [&](
int i) { alts[i]->Accept(this, nullptr); });
724 void* VisitAlternative(RegExpAlternative* node,
void*)
override {
726 ZoneList<RegExpTree*>*
children = node->nodes();
731 for (RegExpTree* child : *node->nodes()) {
732 child->Accept(
this,
nullptr);
738 void* VisitAssertion(RegExpAssertion* node,
void*)
override {
743 void CompileCharacterRanges(ZoneList<CharacterRange>* ranges,
bool negated) {
752 ZoneList<CharacterRange>* negated_ranges =
753 zone_->New<ZoneList<CharacterRange>>(ranges->length() + 1, zone_);
755 DCHECK_LE(negated_ranges->length(), ranges->length() + 1);
756 ranges = negated_ranges;
759 if (ranges->length() == 0) {
764 if (ranges->length() > 1) {
769 for (
int i = 0;
i < ranges->
length(); ++
i) {
772 static_assert(kMaxSupportedCodepoint <=
773 std::numeric_limits<base::uc16>::max());
782 static_cast<base::uc16>(std::min(to, kMaxSupportedCodepoint));
788 void* VisitClassRanges(RegExpClassRanges* node,
void*)
override {
789 CompileCharacterRanges(node->ranges(zone_), node->is_negated());
793 void* VisitClassSetOperand(RegExpClassSetOperand* node,
void*)
override {
795 DCHECK(!node->has_strings());
796 CompileCharacterRanges(node->ranges(),
false);
800 void* VisitClassSetExpression(RegExpClassSetExpression* node,
806 void* VisitAtom(RegExpAtom* node,
void*)
override {
808 base::Vector<const base::uc16> data = node->data();
809 for (
int i = data.
length() - 1;
i >= 0; --
i) {
820 void ClearRegisters(Interval indices) {
821 if (indices.is_empty())
return;
824 for (
int i = indices.from();
i <= indices.to();
i += 2) {
834 void CompileGreedyStar(
F&& emit_body) {
863 void CompileNonGreedyStar(
F&& emit_body) {
893 void CompileGreedyRepetition(
F&& emit_body,
int max_repetition_num) {
915 for (
int i = 0;
i != max_repetition_num; ++
i) {
926 void CompileNonGreedyRepetition(
F&& emit_body,
int max_repetition_num) {
955 for (
int i = 0;
i != max_repetition_num; ++
i) {
985 void CompileNonNullableGreedyPlus(
F&& emit_body) {
1008 void CompileNonNullableNonGreedyPlus(
F&& emit_body) {
1024 void* VisitQuantifier(RegExpQuantifier* node,
void*)
override {
1028 if (
v8_flags.experimental_regexp_engine_capture_group_opt &&
1030 if (!node->CaptureRegisters().is_empty()) {
1031 assembler_.SetQuantifierToClock(RemapQuantifier(node->index()));
1045 Interval body_registers = node->body()->CaptureRegisters();
1046 auto emit_body = [&]() {
1047 if (
v8_flags.experimental_regexp_engine_capture_group_opt) {
1048 assembler_.SetQuantifierToClock(RemapQuantifier(node->index()));
1050 ClearRegisters(body_registers);
1053 node->body()->Accept(
this,
nullptr);
1056 bool can_be_reduced_to_non_nullable_plus =
1058 node->min_match() > 0;
1060 if (can_be_reduced_to_non_nullable_plus) {
1068 for (
int i = 0;
i < node->
min() - 1; ++
i) {
1074 switch (node->quantifier_type()) {
1079 CompileNonNullableGreedyPlus(emit_body);
1084 CompileNonNullableNonGreedyPlus(emit_body);
1093 for (
int i = 0;
i < node->
min(); ++
i) {
1098 switch (node->quantifier_type()) {
1103 CompileGreedyStar(emit_body);
1106 CompileGreedyRepetition(emit_body, node->max() - node->min());
1112 CompileNonGreedyStar(emit_body);
1115 CompileNonGreedyRepetition(emit_body, node->max() - node->min());
1125 void* VisitCapture(RegExpCapture* node,
void*)
override {
1126 if (ignore_captures_) {
1128 node->body()->Accept(
this,
nullptr);
1130 int index = node->index();
1134 assembler_.SetRegisterToCp(reverse_ ? end_register : start_register);
1135 node->body()->Accept(
this,
nullptr);
1136 assembler_.SetRegisterToCp(reverse_ ? start_register : end_register);
1142 void* VisitGroup(RegExpGroup* node,
void*)
override {
1143 node->body()->Accept(
this,
nullptr);
1147 void* VisitLookaround(RegExpLookaround* node,
void*)
override {
1148 assembler_.ReadLookaroundTable(RemapLookaround(node->index()),
1149 node->is_positive(), node->type());
1152 if (!ignore_lookarounds_) {
1159 void* VisitBackReference(RegExpBackReference* node,
void*)
override {
1163 void* VisitEmpty(RegExpEmpty* node,
void*)
override {
return nullptr; }
1165 void* VisitText(RegExpText* node,
void*)
override {
1167 ZoneList<TextElement>* elements = node->elements();
1168 for (
int i = elements->
length() - 1;
i >= 0; --
i) {
1169 elements->at(
i).tree()->Accept(
this,
nullptr);
1172 for (TextElement& text_el : *node->elements()) {
1173 text_el.tree()->Accept(
this,
nullptr);
1179 int RemapQuantifier(
int id) {
1184 if (!map.contains(
id)) {
1185 map[id] =
static_cast<int>(map.size());
1191 int RemapLookaround(
int id) {
1229 return CompileVisitor::Compile(tree, flags, zone);
static void Canonicalize(ZoneList< CharacterRange > *ranges)
static void Negate(const ZoneList< CharacterRange > *src, ZoneList< CharacterRange > *dst, Zone *zone)
static ZoneList< RegExpInstruction > Compile(RegExpTree *tree, RegExpFlags flags, Zone *zone)
static bool CanBeHandled(RegExpTree *tree, RegExpFlags flags, int capture_count)
static constexpr bool kSupportsUnicode
Label & operator=(const Label &)=delete
static int StartRegister(int index)
static int EndRegister(int index)
static const int kInfinity
enum v8::internal::@1270::DeoptimizableCodeIterator::@67 state_
int unbound_patch_list_begin_
ZoneLinkedList< RegExpLookaround * > lookarounds_
const ZoneMap< int, int > & lookaround_id_remapping_
BytecodeAssembler & assembler_
const ZoneMap< int, int > & quantifier_id_remapping_
ZoneList< RegExpInstruction > code_
ZoneLinkedList< BFEntry > nodes_
std::vector< std::unique_ptr< InstanceTypeTree > > children
ZoneVector< RpoNumber > & result
Node::Uses::const_iterator begin(const Node::Uses &uses)
constexpr base::uc32 kMaxCodePoint
base::Flags< RegExpFlag > RegExpFlags
V8_EXPORT_PRIVATE FlagValues v8_flags
#define DCHECK_LE(v1, v2)
#define DCHECK_IMPLIES(v1, v2)
#define DCHECK_NE(v1, v2)
#define DCHECK_GE(v1, v2)
#define DCHECK(condition)
#define DCHECK_EQ(v1, v2)
static RegExpInstruction Accept()
static RegExpInstruction EndLoop()
static RegExpInstruction SetQuantifierToClock(int32_t quantifier_id)
static RegExpInstruction ConsumeAnyChar()
static RegExpInstruction BeginLoop()
static RegExpInstruction FilterGroup(int32_t group_id)
static RegExpInstruction ConsumeRange(base::uc16 min, base::uc16 max)
static RegExpInstruction SetRegisterToCp(int32_t register_index)
static RegExpInstruction FilterLookaround(int32_t lookaround_id)
static RegExpInstruction FilterQuantifier(int32_t quantifier_id)
static RegExpInstruction Fail()
static RegExpInstruction StartLookaround(int lookaround_index, bool is_positive, RegExpLookaround::Type type)
static RegExpInstruction ClearRegister(int32_t register_index)
static RegExpInstruction ReadLookTable(int32_t index, bool is_positive, RegExpLookaround::Type type)
static RegExpInstruction WriteLookTable(int32_t index)
static RegExpInstruction RangeCount(int32_t num_ranges)
static RegExpInstruction EndLookaround()
static RegExpInstruction Assertion(RegExpAssertion::Type t)