v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
regexp-ast.cc
Go to the documentation of this file.
1// Copyright 2016 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
6
9
10namespace v8 {
11namespace internal {
12
13#define MAKE_ACCEPT(Name) \
14 void* RegExp##Name::Accept(RegExpVisitor* visitor, void* data) { \
15 return visitor->Visit##Name(this, data); \
16 }
18#undef MAKE_ACCEPT
19
20#define MAKE_TYPE_CASE(Name) \
21 RegExp##Name* RegExpTree::As##Name() { return nullptr; } \
22 bool RegExpTree::Is##Name() { return false; }
24#undef MAKE_TYPE_CASE
25
26#define MAKE_TYPE_CASE(Name) \
27 RegExp##Name* RegExp##Name::As##Name() { return this; } \
28 bool RegExp##Name::Is##Name() { return true; }
30#undef MAKE_TYPE_CASE
31
32namespace {
33
34Interval ListCaptureRegisters(ZoneList<RegExpTree*>* children) {
35 Interval result = Interval::Empty();
36 for (int i = 0; i < children->length(); i++)
37 result = result.Union(children->at(i)->CaptureRegisters());
38 return result;
39}
40
41} // namespace
42
44 return ListCaptureRegisters(nodes());
45}
46
47
49 return ListCaptureRegisters(alternatives());
50}
51
52
56
57
62
63
67
68
72
73
77
78
80 ZoneList<RegExpTree*>* nodes = this->nodes();
81 for (int i = 0; i < nodes->length(); i++) {
82 RegExpTree* node = nodes->at(i);
83 if (node->IsAnchoredAtStart()) {
84 return true;
85 }
86 if (node->max_match() > 0) {
87 return false;
88 }
89 }
90 return false;
91}
92
93
95 ZoneList<RegExpTree*>* nodes = this->nodes();
96 for (int i = nodes->length() - 1; i >= 0; i--) {
97 RegExpTree* node = nodes->at(i);
98 if (node->IsAnchoredAtEnd()) {
99 return true;
100 }
101 if (node->max_match() > 0) {
102 return false;
103 }
104 }
105 return false;
106}
107
108
111 for (int i = 0; i < alternatives->length(); i++) {
112 if (!alternatives->at(i)->IsAnchoredAtStart()) return false;
113 }
114 return true;
115}
116
117
120 for (int i = 0; i < alternatives->length(); i++) {
121 if (!alternatives->at(i)->IsAnchoredAtEnd()) return false;
122 }
123 return true;
124}
125
126
130
131
133
134
136
137namespace {
138
139// Convert regular expression trees to a simple sexp representation.
140// This representation should be different from the input grammar
141// in as many cases as possible, to make it more difficult for incorrect
142// parses to look as correct ones which is likely if the input and
143// output formats are alike.
144class RegExpUnparser final : public RegExpVisitor {
145 public:
146 RegExpUnparser(std::ostream& os, Zone* zone) : os_(os), zone_(zone) {}
147 void VisitCharacterRange(CharacterRange that);
148#define MAKE_CASE(Name) void* Visit##Name(RegExp##Name*, void* data) override;
150#undef MAKE_CASE
151 private:
152 std::ostream& os_;
154};
155
156} // namespace
157
158void* RegExpUnparser::VisitDisjunction(RegExpDisjunction* that, void* data) {
159 os_ << "(|";
160 for (int i = 0; i < that->alternatives()->length(); i++) {
161 os_ << " ";
162 that->alternatives()->at(i)->Accept(this, data);
163 }
164 os_ << ")";
165 return nullptr;
166}
167
168
169void* RegExpUnparser::VisitAlternative(RegExpAlternative* that, void* data) {
170 os_ << "(:";
171 for (int i = 0; i < that->nodes()->length(); i++) {
172 os_ << " ";
173 that->nodes()->at(i)->Accept(this, data);
174 }
175 os_ << ")";
176 return nullptr;
177}
178
179
180void RegExpUnparser::VisitCharacterRange(CharacterRange that) {
181 os_ << AsUC32(that.from());
182 if (!that.IsSingleton()) {
183 os_ << "-" << AsUC32(that.to());
184 }
185}
186
187void* RegExpUnparser::VisitClassRanges(RegExpClassRanges* that, void* data) {
188 if (that->is_negated()) os_ << "^";
189 os_ << "[";
190 for (int i = 0; i < that->ranges(zone_)->length(); i++) {
191 if (i > 0) os_ << " ";
192 VisitCharacterRange(that->ranges(zone_)->at(i));
193 }
194 os_ << "]";
195 return nullptr;
196}
197
198void* RegExpUnparser::VisitClassSetOperand(RegExpClassSetOperand* that,
199 void* data) {
200 os_ << "![";
201 for (int i = 0; i < that->ranges()->length(); i++) {
202 if (i > 0) os_ << " ";
203 VisitCharacterRange(that->ranges()->at(i));
204 }
205 if (that->has_strings()) {
206 for (auto iter : *that->strings()) {
207 os_ << " '";
208 os_ << std::string(iter.first.begin(), iter.first.end());
209 os_ << "'";
210 }
211 }
212 os_ << "]";
213 return nullptr;
214}
215
216void* RegExpUnparser::VisitClassSetExpression(RegExpClassSetExpression* that,
217 void* data) {
218 switch (that->operation()) {
219 case RegExpClassSetExpression::OperationType::kUnion:
220 os_ << "++";
221 break;
222 case RegExpClassSetExpression::OperationType::kIntersection:
223 os_ << "&&";
224 break;
225 case RegExpClassSetExpression::OperationType::kSubtraction:
226 os_ << "--";
227 break;
228 }
229 if (that->is_negated()) os_ << "^";
230 os_ << "[";
231 for (int i = 0; i < that->operands()->length(); i++) {
232 if (i > 0) os_ << " ";
233 that->operands()->at(i)->Accept(this, data);
234 }
235 os_ << "]";
236 return nullptr;
237}
238
239void* RegExpUnparser::VisitAssertion(RegExpAssertion* that, void* data) {
240 switch (that->assertion_type()) {
241 case RegExpAssertion::Type::START_OF_INPUT:
242 os_ << "@^i";
243 break;
244 case RegExpAssertion::Type::END_OF_INPUT:
245 os_ << "@$i";
246 break;
247 case RegExpAssertion::Type::START_OF_LINE:
248 os_ << "@^l";
249 break;
250 case RegExpAssertion::Type::END_OF_LINE:
251 os_ << "@$l";
252 break;
253 case RegExpAssertion::Type::BOUNDARY:
254 os_ << "@b";
255 break;
256 case RegExpAssertion::Type::NON_BOUNDARY:
257 os_ << "@B";
258 break;
259 }
260 return nullptr;
261}
262
263
264void* RegExpUnparser::VisitAtom(RegExpAtom* that, void* data) {
265 os_ << "'";
266 base::Vector<const base::uc16> chardata = that->data();
267 for (int i = 0; i < chardata.length(); i++) {
268 os_ << AsUC16(chardata[i]);
269 }
270 os_ << "'";
271 return nullptr;
272}
273
274
275void* RegExpUnparser::VisitText(RegExpText* that, void* data) {
276 if (that->elements()->length() == 1) {
277 that->elements()->at(0).tree()->Accept(this, data);
278 } else {
279 os_ << "(!";
280 for (int i = 0; i < that->elements()->length(); i++) {
281 os_ << " ";
282 that->elements()->at(i).tree()->Accept(this, data);
283 }
284 os_ << ")";
285 }
286 return nullptr;
287}
288
289
290void* RegExpUnparser::VisitQuantifier(RegExpQuantifier* that, void* data) {
291 os_ << "(# " << that->min() << " ";
292 if (that->max() == RegExpTree::kInfinity) {
293 os_ << "- ";
294 } else {
295 os_ << that->max() << " ";
296 }
297 os_ << (that->is_greedy() ? "g " : that->is_possessive() ? "p " : "n ");
298 that->body()->Accept(this, data);
299 os_ << ")";
300 return nullptr;
301}
302
303
304void* RegExpUnparser::VisitCapture(RegExpCapture* that, void* data) {
305 os_ << "(^ ";
306 that->body()->Accept(this, data);
307 os_ << ")";
308 return nullptr;
309}
310
311void* RegExpUnparser::VisitGroup(RegExpGroup* that, void* data) {
312 os_ << "(?" << that->flags() << ": ";
313 that->body()->Accept(this, data);
314 os_ << ")";
315 return nullptr;
316}
317
318void* RegExpUnparser::VisitLookaround(RegExpLookaround* that, void* data) {
319 os_ << "(";
320 os_ << (that->type() == RegExpLookaround::LOOKAHEAD ? "->" : "<-");
321 os_ << (that->is_positive() ? " + " : " - ");
322 that->body()->Accept(this, data);
323 os_ << ")";
324 return nullptr;
325}
326
327
328void* RegExpUnparser::VisitBackReference(RegExpBackReference* that,
329 void* data) {
330 os_ << "(<- " << that->captures()->first()->index();
331 for (int i = 1; i < that->captures()->length(); ++i) {
332 os_ << "," << that->captures()->at(i)->index();
333 }
334 os_ << ")";
335 return nullptr;
336}
337
338
339void* RegExpUnparser::VisitEmpty(RegExpEmpty* that, void* data) {
340 os_ << '%';
341 return nullptr;
342}
343
344std::ostream& RegExpTree::Print(std::ostream& os, Zone* zone) {
345 RegExpUnparser unparser(os, zone);
346 Accept(&unparser, nullptr);
347 return os;
348}
349
351 : alternatives_(alternatives) {
352 DCHECK_LT(1, alternatives->length());
353 RegExpTree* first_alternative = alternatives->at(0);
354 min_match_ = first_alternative->min_match();
355 max_match_ = first_alternative->max_match();
356 for (int i = 1; i < alternatives->length(); i++) {
357 RegExpTree* alternative = alternatives->at(i);
358 min_match_ = std::min(min_match_, alternative->min_match());
359 max_match_ = std::max(max_match_, alternative->max_match());
360 }
361}
362
363namespace {
364
365int IncreaseBy(int previous, int increase) {
366 if (RegExpTree::kInfinity - previous < increase) {
368 } else {
369 return previous + increase;
370 }
371}
372
373} // namespace
374
376 : nodes_(nodes) {
377 DCHECK_LT(1, nodes->length());
378 min_match_ = 0;
379 max_match_ = 0;
380 for (int i = 0; i < nodes->length(); i++) {
381 RegExpTree* node = nodes->at(i);
382 int node_min_match = node->min_match();
383 min_match_ = IncreaseBy(min_match_, node_min_match);
384 int node_max_match = node->max_match();
385 max_match_ = IncreaseBy(max_match_, node_max_match);
386 }
387}
388
390 CharacterClassStrings* strings)
391 : ranges_(ranges), strings_(strings) {
392 DCHECK_NOT_NULL(ranges);
393 min_match_ = 0;
394 max_match_ = 0;
395 if (!ranges->is_empty()) {
396 min_match_ = 1;
397 max_match_ = 2;
398 }
399 if (has_strings()) {
400 for (auto string : *strings) {
401 min_match_ = std::min(min_match_, string.second->min_match());
402 max_match_ = std::max(max_match_, string.second->max_match());
403 }
404 }
405}
406
408 OperationType op, bool is_negated, bool may_contain_strings,
409 ZoneList<RegExpTree*>* operands)
410 : operation_(op),
411 is_negated_(is_negated),
412 may_contain_strings_(may_contain_strings),
413 operands_(operands) {
415 if (is_negated) {
417 // We don't know anything about max matches for negated classes.
418 // As there are no strings involved, assume that we can match a unicode
419 // character (2 code points).
420 max_match_ = 2;
421 } else {
422 max_match_ = 0;
423 for (auto operand : *operands) {
424 max_match_ = std::max(max_match_, operand->max_match());
425 }
426 }
427}
428
429// static
431 bool is_negated) {
433 zone->template New<ZoneList<CharacterRange>>(0, zone);
435 zone->template New<RegExpClassSetOperand>(ranges, nullptr);
437 zone->template New<ZoneList<RegExpTree*>>(1, zone);
438 operands->Add(op, zone);
439 return zone->template New<RegExpClassSetExpression>(
441 operands);
442}
443
444} // namespace internal
445} // namespace v8
friend Zone
Definition asm-types.cc:195
static Interval Empty()
Definition regexp-ast.h:66
Interval Union(Interval that)
Definition regexp-ast.h:60
bool IsAnchoredAtStart() override
Definition regexp-ast.cc:79
Interval CaptureRegisters() override
Definition regexp-ast.cc:43
RegExpAlternative(ZoneList< RegExpTree * > *nodes)
ZoneList< RegExpTree * > * nodes() const
Definition regexp-ast.h:251
bool IsAnchoredAtEnd() override
Definition regexp-ast.cc:74
bool IsAnchoredAtStart() override
Definition regexp-ast.cc:69
static int StartRegister(int index)
Definition regexp-ast.h:621
static int EndRegister(int index)
Definition regexp-ast.h:622
bool IsAnchoredAtStart() override
Interval CaptureRegisters() override
Definition regexp-ast.cc:58
bool IsAnchoredAtEnd() override
const ZoneList< RegExpTree * > * operands() const
Definition regexp-ast.h:450
static RegExpClassSetExpression * Empty(Zone *zone, bool is_negated)
RegExpClassSetExpression(OperationType op, bool is_negated, bool may_contain_strings, ZoneList< RegExpTree * > *operands)
RegExpClassSetOperand(ZoneList< CharacterRange > *ranges, CharacterClassStrings *strings)
RegExpDisjunction(ZoneList< RegExpTree * > *alternatives)
ZoneList< RegExpTree * > * alternatives() const
Definition regexp-ast.h:229
Interval CaptureRegisters() override
Definition regexp-ast.cc:48
bool IsAnchoredAtStart() override
RegExpTree * body() const
Definition regexp-ast.h:676
Interval CaptureRegisters() override
Definition regexp-ast.cc:53
Interval CaptureRegisters() override
Definition regexp-ast.cc:64
RegExpTree * body() const
Definition regexp-ast.h:582
virtual bool IsAnchoredAtStart()
Definition regexp-ast.h:202
virtual bool IsAnchoredAtEnd()
Definition regexp-ast.h:203
virtual int min_match()=0
static const int kInfinity
Definition regexp-ast.h:196
virtual Interval CaptureRegisters()
Definition regexp-ast.h:208
virtual int max_match()=0
V8_EXPORT_PRIVATE std::ostream & Print(std::ostream &os, Zone *zone)
V8_INLINE int length() const
Definition zone-list.h:101
Zone * zone_
LineAndColumn previous
ZoneLinkedList< BFEntry > nodes_
std::vector< std::unique_ptr< InstanceTypeTree > > children
double second
ZoneVector< RpoNumber > & result
#define MAKE_CASE(TYPE, Name, name)
#define MAKE_ACCEPT(Name)
Definition regexp-ast.cc:13
#define MAKE_TYPE_CASE(Name)
Definition regexp-ast.cc:20
#define FOR_EACH_REG_EXP_TREE_TYPE(VISIT)
Definition regexp-ast.h:22
SmallRegExpTreeVector alternatives_
#define DCHECK_NOT_NULL(val)
Definition logging.h:492
#define DCHECK(condition)
Definition logging.h:482
#define DCHECK_LT(v1, v2)
Definition logging.h:489
std::ostream * os_