v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
regexp-ast.h
Go to the documentation of this file.
1// Copyright 2016 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_REGEXP_REGEXP_AST_H_
6#define V8_REGEXP_REGEXP_AST_H_
7
8#include <optional>
9
10#include "src/base/strings.h"
13#include "src/zone/zone-list.h"
14#include "src/zone/zone.h"
15
16#ifdef V8_INTL_SUPPORT
17#include "unicode/uniset.h"
18#endif // V8_INTL_SUPPORT
19
20namespace v8::internal {
21
22#define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \
23 VISIT(Disjunction) \
24 VISIT(Alternative) \
25 VISIT(Assertion) \
26 VISIT(ClassRanges) \
27 VISIT(ClassSetOperand) \
28 VISIT(ClassSetExpression) \
29 VISIT(Atom) \
30 VISIT(Quantifier) \
31 VISIT(Capture) \
32 VISIT(Group) \
33 VISIT(Lookaround) \
34 VISIT(BackReference) \
35 VISIT(Empty) \
36 VISIT(Text)
37
38#define FORWARD_DECLARE(Name) class RegExp##Name;
40#undef FORWARD_DECLARE
41
42class RegExpCompiler;
43class RegExpNode;
44class RegExpTree;
45
47 public:
48 virtual ~RegExpVisitor() = default;
49#define MAKE_CASE(Name) \
50 virtual void* Visit##Name(RegExp##Name*, void* data) = 0;
52#undef MAKE_CASE
53};
54
55// A simple closed interval.
56class Interval {
57 public:
58 Interval() : from_(kNone), to_(kNone - 1) {} // '- 1' for branchless size().
59 Interval(int from, int to) : from_(from), to_(to) {}
61 if (that.from_ == kNone) return *this;
62 if (from_ == kNone) return that;
63 return Interval(std::min(from_, that.from_), std::max(to_, that.to_));
64 }
65
66 static Interval Empty() { return Interval(); }
67
68 bool Contains(int value) const { return (from_ <= value) && (value <= to_); }
69 bool is_empty() const { return from_ == kNone; }
70 int from() const { return from_; }
71 int to() const { return to_; }
72 int size() const { return to_ - from_ + 1; }
73
74 static constexpr int kNone = -1;
75
76 private:
77 int from_;
78 int to_;
79};
80
81// Named standard character sets.
82enum class StandardCharacterSet : char {
83 kWhitespace = 's', // Like /\s/.
84 kNotWhitespace = 'S', // Like /\S/.
85 kWord = 'w', // Like /\w/.
86 kNotWord = 'W', // Like /\W/.
87 kDigit = 'd', // Like /\d/.
88 kNotDigit = 'D', // Like /\D/.
89 kLineTerminator = 'n', // The inverse of /./.
90 kNotLineTerminator = '.', // Like /./.
91 kEverything = '*', // Matches every character, like /./s.
92};
93
94// Represents code points (with values up to 0x10FFFF) in the range from from_
95// to to_, both ends are inclusive.
97 public:
98 CharacterRange() = default;
99 // For compatibility with the CHECK_OK macro.
100 CharacterRange(void* null) { DCHECK_NULL(null); } // NOLINT
101
102 static inline CharacterRange Singleton(base::uc32 value) {
103 return CharacterRange(value, value);
104 }
105 static inline CharacterRange Range(base::uc32 from, base::uc32 to) {
106 DCHECK(0 <= from && to <= kMaxCodePoint);
107 DCHECK(static_cast<uint32_t>(from) <= static_cast<uint32_t>(to));
108 return CharacterRange(from, to);
109 }
110 static inline CharacterRange Everything() {
111 return CharacterRange(0, kMaxCodePoint);
112 }
113
114 static inline ZoneList<CharacterRange>* List(Zone* zone,
115 CharacterRange range) {
117 zone->New<ZoneList<CharacterRange>>(1, zone);
118 list->Add(range, zone);
119 return list;
120 }
121
122 // Add class escapes. Add case equivalent closure for \w and \W if necessary.
124 StandardCharacterSet standard_character_set,
125 ZoneList<CharacterRange>* ranges, bool add_unicode_case_equivalents,
126 Zone* zone);
127 // Add case equivalents to ranges. Only used for /i, not for /ui or /vi, as
128 // the semantics for unicode mode are slightly different.
129 // See https://tc39.es/ecma262/#sec-runtime-semantics-canonicalize-ch Note 4.
131 Isolate* isolate, Zone* zone, ZoneList<CharacterRange>* ranges,
132 bool is_one_byte);
133 // Add case equivalent code points to ranges. Only used for /ui and /vi, not
134 // for /i, as the semantics for non-unicode mode are slightly different.
135 // See https://tc39.es/ecma262/#sec-runtime-semantics-canonicalize-ch Note 4.
137 Zone* zone);
138
139 bool Contains(base::uc32 i) const { return from_ <= i && i <= to_; }
140 base::uc32 from() const { return from_; }
141 base::uc32 to() const { return to_; }
142 bool IsEverything(base::uc32 max) const { return from_ == 0 && to_ >= max; }
143 bool IsSingleton() const { return from_ == to_; }
144
145 // Whether a range list is in canonical form: Ranges ordered by from value,
146 // and ranges non-overlapping and non-adjacent.
147 V8_EXPORT_PRIVATE static bool IsCanonical(
148 const ZoneList<CharacterRange>* ranges);
149 // Convert range list to canonical form. The characters covered by the ranges
150 // will still be the same, but no character is in more than one range, and
151 // adjacent ranges are merged. The resulting list may be shorter than the
152 // original, but cannot be longer.
153 static void Canonicalize(ZoneList<CharacterRange>* ranges);
154 // Negate the contents of a character range in canonical form.
155 static void Negate(const ZoneList<CharacterRange>* src,
156 ZoneList<CharacterRange>* dst, Zone* zone);
157 // Intersect the contents of two character ranges in canonical form.
158 static void Intersect(const ZoneList<CharacterRange>* lhs,
159 const ZoneList<CharacterRange>* rhs,
160 ZoneList<CharacterRange>* dst, Zone* zone);
161 // Subtract the contents of |to_remove| from the contents of |src|.
162 static void Subtract(const ZoneList<CharacterRange>* src,
163 const ZoneList<CharacterRange>* to_remove,
164 ZoneList<CharacterRange>* dst, Zone* zone);
165 // Remove all ranges outside the one-byte range.
166 static void ClampToOneByte(ZoneList<CharacterRange>* ranges);
167 // Checks if two ranges (both need to be canonical) are equal.
168 static bool Equals(const ZoneList<CharacterRange>* lhs,
169 const ZoneList<CharacterRange>* rhs);
170
171 private:
172 CharacterRange(base::uc32 from, base::uc32 to) : from_(from), to_(to) {}
173
174 static constexpr int kMaxCodePoint = 0x10ffff;
175
178};
179
180inline bool operator==(const CharacterRange& lhs, const CharacterRange& rhs) {
181 return lhs.from() == rhs.from() && lhs.to() == rhs.to();
182}
183inline bool operator!=(const CharacterRange& lhs, const CharacterRange& rhs) {
184 return !operator==(lhs, rhs);
185}
186
187#define DECL_BOILERPLATE(Name) \
188 void* Accept(RegExpVisitor* visitor, void* data) override; \
189 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) \
190 override; \
191 RegExp##Name* As##Name() override; \
192 bool Is##Name() override
193
194class RegExpTree : public ZoneObject {
195 public:
196 static const int kInfinity = kMaxInt;
197 virtual ~RegExpTree() = default;
198 virtual void* Accept(RegExpVisitor* visitor, void* data) = 0;
199 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
200 RegExpNode* on_success) = 0;
201 virtual bool IsTextElement() { return false; }
202 virtual bool IsAnchoredAtStart() { return false; }
203 virtual bool IsAnchoredAtEnd() { return false; }
204 virtual int min_match() = 0;
205 virtual int max_match() = 0;
206 // Returns the interval of registers used for captures within this
207 // expression.
209 virtual void AppendToText(RegExpText* text, Zone* zone);
210 V8_EXPORT_PRIVATE std::ostream& Print(std::ostream& os, Zone* zone);
211#define MAKE_ASTYPE(Name) \
212 virtual RegExp##Name* As##Name(); \
213 virtual bool Is##Name();
215#undef MAKE_ASTYPE
216};
217
218class RegExpDisjunction final : public RegExpTree {
219 public:
221
222 DECL_BOILERPLATE(Disjunction);
223
224 Interval CaptureRegisters() override;
225 bool IsAnchoredAtStart() override;
226 bool IsAnchoredAtEnd() override;
227 int min_match() override { return min_match_; }
228 int max_match() override { return max_match_; }
230
231 private:
232 bool SortConsecutiveAtoms(RegExpCompiler* compiler);
238};
239
240class RegExpAlternative final : public RegExpTree {
241 public:
243
244 DECL_BOILERPLATE(Alternative);
245
246 Interval CaptureRegisters() override;
247 bool IsAnchoredAtStart() override;
248 bool IsAnchoredAtEnd() override;
249 int min_match() override { return min_match_; }
250 int max_match() override { return max_match_; }
251 ZoneList<RegExpTree*>* nodes() const { return nodes_; }
252
253 private:
257};
258
259class RegExpAssertion final : public RegExpTree {
260 public:
261 enum class Type {
262 START_OF_LINE = 0,
263 START_OF_INPUT = 1,
264 END_OF_LINE = 2,
265 END_OF_INPUT = 3,
266 BOUNDARY = 4,
267 NON_BOUNDARY = 5,
269 };
270 explicit RegExpAssertion(Type type) : assertion_type_(type) {}
271
273
274 bool IsAnchoredAtStart() override;
275 bool IsAnchoredAtEnd() override;
276 int min_match() override { return 0; }
277 int max_match() override { return 0; }
279
280 private:
282};
283
304
305class RegExpClassRanges final : public RegExpTree {
306 public:
307 // NEGATED: The character class is negated and should match everything but
308 // the specified ranges.
309 // CONTAINS_SPLIT_SURROGATE: The character class contains part of a split
310 // surrogate and should not be unicode-desugared (crbug.com/641091).
311 // IS_CASE_FOLDED: If case folding is required (/i), it was already
312 // performed on individual ranges and should not be applied again.
313 enum Flag {
314 NEGATED = 1 << 0,
317 };
319
321 ClassRangesFlags class_ranges_flags = ClassRangesFlags())
322 : set_(ranges), class_ranges_flags_(class_ranges_flags) {
323 // Convert the empty set of ranges to the negated Everything() range.
324 if (ranges->is_empty()) {
325 ranges->Add(CharacterRange::Everything(), zone);
326 class_ranges_flags_ ^= NEGATED;
327 }
328 }
329 explicit RegExpClassRanges(StandardCharacterSet standard_set_type)
330 : set_(standard_set_type), class_ranges_flags_() {}
331
332 DECL_BOILERPLATE(ClassRanges);
333
334 bool IsTextElement() override { return true; }
335 int min_match() override { return 1; }
336 // The character class may match two code units for unicode regexps.
337 // TODO(yangguo): we should split this class for usage in TextElement, and
338 // make max_match() dependent on the character class content.
339 int max_match() override { return 2; }
340
341 void AppendToText(RegExpText* text, Zone* zone) override;
342
343 // TODO(lrn): Remove need for complex version if is_standard that
344 // recognizes a mangled standard set and just do { return set_.is_special(); }
345 bool is_standard(Zone* zone);
346 // Returns a value representing the standard character set if is_standard()
347 // returns true.
351
352 CharacterSet character_set() const { return set_; }
354
355 bool is_negated() const { return (class_ranges_flags_ & NEGATED) != 0; }
359 bool is_case_folded() const {
360 return (class_ranges_flags_ & IS_CASE_FOLDED) != 0;
361 }
362
363 private:
366};
367
371 // Longer strings first so we generate matches for the largest string
372 // possible.
373 if (lhs.length() != rhs.length()) {
374 return lhs.length() > rhs.length();
375 }
376 for (int i = 0; i < lhs.length(); i++) {
377 if (lhs[i] != rhs[i]) {
378 return lhs[i] < rhs[i];
379 }
380 }
381 return false;
382 }
383};
384
385// A type used for strings as part of character classes (only possible in
386// unicode sets mode).
387// We use a ZoneMap instead of an UnorderedZoneMap because we need to match
388// the longest alternatives first. By using a ZoneMap with the custom comparator
389// we can avoid sorting before assembling the code.
390// Strings are likely short (the largest string in current unicode properties
391// consists of 10 code points).
394
395// TODO(pthier): If we are sure we don't want to use icu::UnicodeSets
396// (performance evaluation pending), this class can be merged with
397// RegExpClassRanges.
398class RegExpClassSetOperand final : public RegExpTree {
399 public:
401 CharacterClassStrings* strings);
402
403 DECL_BOILERPLATE(ClassSetOperand);
404
405 bool IsTextElement() override { return true; }
406 int min_match() override { return min_match_; }
407 int max_match() override { return max_match_; }
408
409 void Union(RegExpClassSetOperand* other, Zone* zone);
411 ZoneList<CharacterRange>* temp_ranges, Zone* zone);
413 ZoneList<CharacterRange>* temp_ranges, Zone* zone);
414
415 bool has_strings() const { return strings_ != nullptr && !strings_->empty(); }
421
422 private:
427};
428
430 public:
432
436
437 DECL_BOILERPLATE(ClassSetExpression);
438
439 // Create an empty class set expression (matches everything if |is_negated|,
440 // nothing otherwise).
441 static RegExpClassSetExpression* Empty(Zone* zone, bool is_negated);
442
443 bool IsTextElement() override { return true; }
444 int min_match() override { return 0; }
445 int max_match() override { return max_match_; }
446
448 bool is_negated() const { return is_negated_; }
450 const ZoneList<RegExpTree*>* operands() const { return operands_; }
452
453 private:
454 // Recursively evaluates the tree rooted at |root|, computing the valid
455 // CharacterRanges and strings after applying all set operations.
456 // The original tree will be modified by this method, so don't store pointers
457 // to inner nodes of the tree somewhere else!
458 // Modifying the tree in-place saves memory and speeds up multiple calls of
459 // the method (e.g. when unrolling quantifiers).
460 // |temp_ranges| is used for intermediate results, passed as parameter to
461 // avoid allocating new lists all the time.
463 RegExpTree* root, ZoneList<CharacterRange>* temp_ranges, Zone* zone);
464
470};
471
472class RegExpAtom final : public RegExpTree {
473 public:
475
477
478 bool IsTextElement() override { return true; }
479 int min_match() override { return data_.length(); }
480 int max_match() override { return data_.length(); }
481 void AppendToText(RegExpText* text, Zone* zone) override;
482
484 int length() const { return data_.length(); }
485
486 private:
488};
489
490class TextElement final {
491 public:
493
496
497 int cp_offset() const { return cp_offset_; }
499 int length() const;
500
501 TextType text_type() const { return text_type_; }
502
503 RegExpTree* tree() const { return tree_; }
504
505 RegExpAtom* atom() const {
506 DCHECK(text_type() == ATOM);
507 return reinterpret_cast<RegExpAtom*>(tree());
508 }
509
512 return reinterpret_cast<RegExpClassRanges*>(tree());
513 }
514
515 private:
518
522};
523
524class RegExpText final : public RegExpTree {
525 public:
526 explicit RegExpText(Zone* zone) : elements_(2, zone) {}
527
529
530 bool IsTextElement() override { return true; }
531 int min_match() override { return length_; }
532 int max_match() override { return length_; }
533 void AppendToText(RegExpText* text, Zone* zone) override;
534 void AddElement(TextElement elm, Zone* zone) {
535 elements_.Add(elm, zone);
536 length_ += elm.length();
537 }
539
540 private:
542 int length_ = 0;
543};
544
545class RegExpQuantifier final : public RegExpTree {
546 public:
548 RegExpQuantifier(int min, int max, QuantifierType type, int index,
550 : body_(body),
551 min_(min),
552 max_(max),
553 quantifier_type_(type),
554 index_(index) {
555 if (min > 0 && body->min_match() > kInfinity / min) {
556 min_match_ = kInfinity;
557 } else {
558 min_match_ = min * body->min_match();
559 }
560 if (max > 0 && body->max_match() > kInfinity / max) {
561 max_match_ = kInfinity;
562 } else {
563 max_match_ = max * body->max_match();
564 }
565 }
566
567 DECL_BOILERPLATE(Quantifier);
568
569 static RegExpNode* ToNode(int min, int max, bool is_greedy, RegExpTree* body,
570 RegExpCompiler* compiler, RegExpNode* on_success,
571 bool not_at_start = false);
572 Interval CaptureRegisters() override;
573 int min_match() override { return min_match_; }
574 int max_match() override { return max_match_; }
575 int min() const { return min_; }
576 int max() const { return max_; }
578 int index() const { return index_; }
579 bool is_possessive() const { return quantifier_type_ == POSSESSIVE; }
580 bool is_non_greedy() const { return quantifier_type_ == NON_GREEDY; }
581 bool is_greedy() const { return quantifier_type_ == GREEDY; }
582 RegExpTree* body() const { return body_; }
583
584 private:
586 int min_;
587 int max_;
592};
593
594class RegExpCapture final : public RegExpTree {
595 public:
596 explicit RegExpCapture(int index)
597 : body_(nullptr),
598 index_(index),
599 min_match_(0),
600 max_match_(0),
601 name_(nullptr) {}
602
604
605 static RegExpNode* ToNode(RegExpTree* body, int index,
606 RegExpCompiler* compiler, RegExpNode* on_success);
607 bool IsAnchoredAtStart() override;
608 bool IsAnchoredAtEnd() override;
609 Interval CaptureRegisters() override;
610 int min_match() override { return min_match_; }
611 int max_match() override { return max_match_; }
612 RegExpTree* body() { return body_; }
614 body_ = body;
617 }
618 int index() const { return index_; }
619 const ZoneVector<base::uc16>* name() const { return name_; }
620 void set_name(const ZoneVector<base::uc16>* name) { name_ = name; }
621 static int StartRegister(int index) { return index * 2; }
622 static int EndRegister(int index) { return index * 2 + 1; }
623
624 private:
625 RegExpTree* body_ = nullptr;
627 int min_match_ = 0;
628 int max_match_ = 0;
630};
631
632class RegExpGroup final : public RegExpTree {
633 public:
635 : body_(body),
636 flags_(flags),
639
641
642 bool IsAnchoredAtStart() override { return body_->IsAnchoredAtStart(); }
643 bool IsAnchoredAtEnd() override { return body_->IsAnchoredAtEnd(); }
644 int min_match() override { return min_match_; }
645 int max_match() override { return max_match_; }
647 RegExpTree* body() const { return body_; }
648 RegExpFlags flags() const { return flags_; }
649
650 private:
655};
656
657class RegExpLookaround final : public RegExpTree {
658 public:
660
669
670 DECL_BOILERPLATE(Lookaround);
671
672 Interval CaptureRegisters() override;
673 bool IsAnchoredAtStart() override;
674 int min_match() override { return 0; }
675 int max_match() override { return 0; }
676 RegExpTree* body() const { return body_; }
677 bool is_positive() const { return is_positive_; }
678 int capture_count() const { return capture_count_; }
679 int capture_from() const { return capture_from_; }
680 Type type() const { return type_; }
681 int index() const { return index_; }
682
683 class Builder {
684 public:
685 Builder(bool is_positive, RegExpNode* on_success,
686 int stack_pointer_register, int position_register,
687 int capture_register_count = 0, int capture_register_start = 0);
690
691 private:
697 };
698
699 private:
706};
707
708class RegExpBackReference final : public RegExpTree {
709 public:
710 explicit RegExpBackReference(Zone* zone) : captures_(1, zone) {}
711 explicit RegExpBackReference(RegExpCapture* capture, Zone* zone)
712 : captures_(1, zone) {
713 captures_.Add(capture, zone);
714 }
715
716 DECL_BOILERPLATE(BackReference);
717
718 int min_match() override { return 0; }
719 // The back reference may be recursive, e.g. /(\2)(\1)/. To avoid infinite
720 // recursion, we give up. Ignorance is bliss.
721 int max_match() override { return kInfinity; }
722 const ZoneList<RegExpCapture*>* captures() const { return &captures_; }
723 void add_capture(RegExpCapture* capture, Zone* zone) {
724 captures_.Add(capture, zone);
725 }
726 const ZoneVector<base::uc16>* name() const { return name_; }
727 void set_name(const ZoneVector<base::uc16>* name) { name_ = name; }
728
729 private:
732};
733
734class RegExpEmpty final : public RegExpTree {
735 public:
737 int min_match() override { return 0; }
738 int max_match() override { return 0; }
739};
740
741} // namespace v8::internal
742
743#undef DECL_BOILERPLATE
744
745#endif // V8_REGEXP_REGEXP_AST_H_
#define FORWARD_DECLARE(Name, Argc)
Definition builtins.cc:30
int length() const
Definition vector.h:64
static bool Equals(const ZoneList< CharacterRange > *lhs, const ZoneList< CharacterRange > *rhs)
static void Canonicalize(ZoneList< CharacterRange > *ranges)
static void AddUnicodeCaseEquivalents(ZoneList< CharacterRange > *ranges, Zone *zone)
base::uc32 from() const
Definition regexp-ast.h:140
static void Subtract(const ZoneList< CharacterRange > *src, const ZoneList< CharacterRange > *to_remove, ZoneList< CharacterRange > *dst, Zone *zone)
bool IsEverything(base::uc32 max) const
Definition regexp-ast.h:142
static void Negate(const ZoneList< CharacterRange > *src, ZoneList< CharacterRange > *dst, Zone *zone)
base::uc32 to() const
Definition regexp-ast.h:141
CharacterRange(base::uc32 from, base::uc32 to)
Definition regexp-ast.h:172
static V8_EXPORT_PRIVATE void AddCaseEquivalents(Isolate *isolate, Zone *zone, ZoneList< CharacterRange > *ranges, bool is_one_byte)
static constexpr int kMaxCodePoint
Definition regexp-ast.h:174
static void ClampToOneByte(ZoneList< CharacterRange > *ranges)
bool Contains(base::uc32 i) const
Definition regexp-ast.h:139
static CharacterRange Singleton(base::uc32 value)
Definition regexp-ast.h:102
static V8_EXPORT_PRIVATE bool IsCanonical(const ZoneList< CharacterRange > *ranges)
static ZoneList< CharacterRange > * List(Zone *zone, CharacterRange range)
Definition regexp-ast.h:114
static void Intersect(const ZoneList< CharacterRange > *lhs, const ZoneList< CharacterRange > *rhs, ZoneList< CharacterRange > *dst, Zone *zone)
static CharacterRange Range(base::uc32 from, base::uc32 to)
Definition regexp-ast.h:105
static CharacterRange Everything()
Definition regexp-ast.h:110
static V8_EXPORT_PRIVATE void AddClassEscape(StandardCharacterSet standard_character_set, ZoneList< CharacterRange > *ranges, bool add_unicode_case_equivalents, Zone *zone)
CharacterSet(StandardCharacterSet standard_set_type)
Definition regexp-ast.h:286
void set_standard_set_type(StandardCharacterSet standard_set_type)
Definition regexp-ast.h:294
V8_EXPORT_PRIVATE void Canonicalize()
CharacterSet(ZoneList< CharacterRange > *ranges)
Definition regexp-ast.h:288
std::optional< StandardCharacterSet > standard_set_type_
Definition regexp-ast.h:302
StandardCharacterSet standard_set_type() const
Definition regexp-ast.h:291
ZoneList< CharacterRange > * ranges_
Definition regexp-ast.h:301
ZoneList< CharacterRange > * ranges(Zone *zone)
Interval(int from, int to)
Definition regexp-ast.h:59
bool is_empty() const
Definition regexp-ast.h:69
static Interval Empty()
Definition regexp-ast.h:66
Interval Union(Interval that)
Definition regexp-ast.h:60
static constexpr int kNone
Definition regexp-ast.h:74
bool Contains(int value) const
Definition regexp-ast.h:68
bool IsAnchoredAtStart() override
Definition regexp-ast.cc:79
Interval CaptureRegisters() override
Definition regexp-ast.cc:43
ZoneList< RegExpTree * > * nodes_
Definition regexp-ast.h:254
RegExpAlternative(ZoneList< RegExpTree * > *nodes)
ZoneList< RegExpTree * > * nodes() const
Definition regexp-ast.h:251
bool IsAnchoredAtEnd() override
Definition regexp-ast.cc:74
bool IsAnchoredAtStart() override
Definition regexp-ast.cc:69
RegExpAtom(base::Vector< const base::uc16 > data)
Definition regexp-ast.h:474
int min_match() override
Definition regexp-ast.h:479
base::Vector< const base::uc16 > data_
Definition regexp-ast.h:487
base::Vector< const base::uc16 > data() const
Definition regexp-ast.h:483
void AppendToText(RegExpText *text, Zone *zone) override
int max_match() override
Definition regexp-ast.h:480
bool IsTextElement() override
Definition regexp-ast.h:478
RegExpBackReference(RegExpCapture *capture, Zone *zone)
Definition regexp-ast.h:711
const ZoneVector< base::uc16 > * name() const
Definition regexp-ast.h:726
const ZoneVector< base::uc16 > * name_
Definition regexp-ast.h:731
void add_capture(RegExpCapture *capture, Zone *zone)
Definition regexp-ast.h:723
void set_name(const ZoneVector< base::uc16 > *name)
Definition regexp-ast.h:727
const ZoneList< RegExpCapture * > * captures() const
Definition regexp-ast.h:722
ZoneList< RegExpCapture * > captures_
Definition regexp-ast.h:730
static int StartRegister(int index)
Definition regexp-ast.h:621
const ZoneVector< base::uc16 > * name_
Definition regexp-ast.h:629
static RegExpNode * ToNode(RegExpTree *body, int index, RegExpCompiler *compiler, RegExpNode *on_success)
void set_name(const ZoneVector< base::uc16 > *name)
Definition regexp-ast.h:620
static int EndRegister(int index)
Definition regexp-ast.h:622
bool IsAnchoredAtStart() override
Interval CaptureRegisters() override
Definition regexp-ast.cc:58
void set_body(RegExpTree *body)
Definition regexp-ast.h:613
bool IsAnchoredAtEnd() override
const ZoneVector< base::uc16 > * name() const
Definition regexp-ast.h:619
RegExpClassRanges(StandardCharacterSet standard_set_type)
Definition regexp-ast.h:329
StandardCharacterSet standard_type() const
Definition regexp-ast.h:348
base::Flags< Flag > ClassRangesFlags
Definition regexp-ast.h:318
void AppendToText(RegExpText *text, Zone *zone) override
ZoneList< CharacterRange > * ranges(Zone *zone)
Definition regexp-ast.h:353
ClassRangesFlags class_ranges_flags_
Definition regexp-ast.h:365
CharacterSet character_set() const
Definition regexp-ast.h:352
RegExpClassRanges(Zone *zone, ZoneList< CharacterRange > *ranges, ClassRangesFlags class_ranges_flags=ClassRangesFlags())
Definition regexp-ast.h:320
static RegExpClassSetOperand * ComputeExpression(RegExpTree *root, ZoneList< CharacterRange > *temp_ranges, Zone *zone)
ZoneList< RegExpTree * > * operands()
Definition regexp-ast.h:451
const ZoneList< RegExpTree * > * operands() const
Definition regexp-ast.h:450
static RegExpClassSetExpression * Empty(Zone *zone, bool is_negated)
RegExpClassSetExpression(OperationType op, bool is_negated, bool may_contain_strings, ZoneList< RegExpTree * > *operands)
ZoneList< RegExpTree * > * operands_
Definition regexp-ast.h:468
ZoneList< CharacterRange > * ranges_
Definition regexp-ast.h:423
CharacterClassStrings * strings_
Definition regexp-ast.h:424
ZoneList< CharacterRange > * ranges()
Definition regexp-ast.h:416
void Intersect(RegExpClassSetOperand *other, ZoneList< CharacterRange > *temp_ranges, Zone *zone)
RegExpClassSetOperand(ZoneList< CharacterRange > *ranges, CharacterClassStrings *strings)
CharacterClassStrings * strings()
Definition regexp-ast.h:417
void Union(RegExpClassSetOperand *other, Zone *zone)
void Subtract(RegExpClassSetOperand *other, ZoneList< CharacterRange > *temp_ranges, Zone *zone)
RegExpDisjunction(ZoneList< RegExpTree * > *alternatives)
ZoneList< RegExpTree * > * alternatives() const
Definition regexp-ast.h:229
ZoneList< RegExpTree * > * alternatives_
Definition regexp-ast.h:235
Interval CaptureRegisters() override
Definition regexp-ast.cc:48
void RationalizeConsecutiveAtoms(RegExpCompiler *compiler)
void FixSingleCharacterDisjunctions(RegExpCompiler *compiler)
bool SortConsecutiveAtoms(RegExpCompiler *compiler)
int min_match() override
Definition regexp-ast.h:737
int max_match() override
Definition regexp-ast.h:738
RegExpTree * body() const
Definition regexp-ast.h:647
bool IsAnchoredAtStart() override
Definition regexp-ast.h:642
int min_match() override
Definition regexp-ast.h:644
int max_match() override
Definition regexp-ast.h:645
Interval CaptureRegisters() override
Definition regexp-ast.h:646
RegExpFlags flags() const
Definition regexp-ast.h:648
const RegExpFlags flags_
Definition regexp-ast.h:652
bool IsAnchoredAtEnd() override
Definition regexp-ast.h:643
RegExpGroup(RegExpTree *body, RegExpFlags flags)
Definition regexp-ast.h:634
Builder(bool is_positive, RegExpNode *on_success, int stack_pointer_register, int position_register, int capture_register_count=0, int capture_register_start=0)
bool IsAnchoredAtStart() override
RegExpTree * body() const
Definition regexp-ast.h:676
RegExpLookaround(RegExpTree *body, bool is_positive, int capture_count, int capture_from, Type type, int index)
Definition regexp-ast.h:661
Interval CaptureRegisters() override
Definition regexp-ast.cc:53
QuantifierType quantifier_type() const
Definition regexp-ast.h:577
Interval CaptureRegisters() override
Definition regexp-ast.cc:64
RegExpQuantifier(int min, int max, QuantifierType type, int index, RegExpTree *body)
Definition regexp-ast.h:548
RegExpTree * body() const
Definition regexp-ast.h:582
static RegExpNode * ToNode(int min, int max, bool is_greedy, RegExpTree *body, RegExpCompiler *compiler, RegExpNode *on_success, bool not_at_start=false)
void AppendToText(RegExpText *text, Zone *zone) override
ZoneList< TextElement > elements_
Definition regexp-ast.h:541
ZoneList< TextElement > * elements()
Definition regexp-ast.h:538
void AddElement(TextElement elm, Zone *zone)
Definition regexp-ast.h:534
bool IsTextElement() override
Definition regexp-ast.h:530
int min_match() override
Definition regexp-ast.h:531
int max_match() override
Definition regexp-ast.h:532
virtual RegExpNode * ToNode(RegExpCompiler *compiler, RegExpNode *on_success)=0
virtual bool IsAnchoredAtStart()
Definition regexp-ast.h:202
virtual bool IsAnchoredAtEnd()
Definition regexp-ast.h:203
virtual int min_match()=0
virtual void AppendToText(RegExpText *text, Zone *zone)
static const int kInfinity
Definition regexp-ast.h:196
virtual ~RegExpTree()=default
virtual Interval CaptureRegisters()
Definition regexp-ast.h:208
virtual int max_match()=0
virtual bool IsTextElement()
Definition regexp-ast.h:201
virtual void * Accept(RegExpVisitor *visitor, void *data)=0
V8_EXPORT_PRIVATE std::ostream & Print(std::ostream &os, Zone *zone)
virtual ~RegExpVisitor()=default
RegExpTree * tree() const
Definition regexp-ast.h:503
TextType text_type() const
Definition regexp-ast.h:501
TextElement(TextType text_type, RegExpTree *tree)
Definition regexp-ast.h:516
RegExpClassRanges * class_ranges() const
Definition regexp-ast.h:510
static TextElement ClassRanges(RegExpClassRanges *class_ranges)
void set_cp_offset(int cp_offset)
Definition regexp-ast.h:498
static TextElement Atom(RegExpAtom *atom)
RegExpAtom * atom() const
Definition regexp-ast.h:505
void Add(const T &element, Zone *zone)
T * New(Args &&... args)
Definition zone.h:114
#define MAKE_CASE(TYPE, Name, name)
uint32_t uc32
Definition strings.h:19
bool operator!=(ExternalReference lhs, ExternalReference rhs)
bool operator==(ExternalReference lhs, ExternalReference rhs)
constexpr int kMaxInt
Definition globals.h:374
#define FOR_EACH_REG_EXP_TREE_TYPE(VISIT)
Definition regexp-ast.h:22
#define MAKE_ASTYPE(Name)
Definition regexp-ast.h:211
#define DCHECK_NULL(val)
Definition logging.h:491
#define DCHECK_NOT_NULL(val)
Definition logging.h:492
#define DCHECK(condition)
Definition logging.h:482
#define V8_EXPORT_PRIVATE
Definition macros.h:460
bool operator()(base::Vector< const base::uc32 > lhs, base::Vector< const base::uc32 > rhs) const
Definition regexp-ast.h:369