v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
runtime-regexp.cc
Go to the documentation of this file.
1// Copyright 2014 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <functional>
6
8#include "src/base/strings.h"
12#include "src/heap/heap-inl.h" // For ToBoolean. TODO(jkummerow): Drop.
18#include "src/regexp/regexp.h"
21
22namespace v8 {
23namespace internal {
24
25namespace {
26
27// Fairly arbitrary, but intended to fit:
28//
29// - captures
30// - results
31// - parsed replacement pattern parts
32//
33// for small, common cases.
34constexpr int kStaticVectorSlots = 8;
35
36// Returns -1 for failure.
37uint32_t GetArgcForReplaceCallable(uint32_t num_captures,
38 bool has_named_captures) {
39 const uint32_t kAdditionalArgsWithoutNamedCaptures = 2;
40 const uint32_t kAdditionalArgsWithNamedCaptures = 3;
41 if (num_captures > Code::kMaxArguments) return -1;
42 uint32_t argc = has_named_captures
43 ? num_captures + kAdditionalArgsWithNamedCaptures
44 : num_captures + kAdditionalArgsWithoutNamedCaptures;
45 static_assert(Code::kMaxArguments < std::numeric_limits<uint32_t>::max() -
46 kAdditionalArgsWithNamedCaptures);
47 return (argc > Code::kMaxArguments) ? -1 : argc;
48}
49
50// Looks up the capture of the given name. Returns the (1-based) numbered
51// capture index or -1 on failure.
52// The lookup starts at index |index_in_out|. On success |index_in_out| is set
53// to the index after the entry was found (i.e. the start index to continue the
54// search in the presence of duplicate group names).
55template <typename Matcher, typename = std::enable_if<std::is_invocable_r_v<
56 bool, Matcher, Tagged<String>>>>
57int LookupNamedCapture(Matcher name_matches,
58 Tagged<FixedArray> capture_name_map, int* index_in_out) {
59 DCHECK_GE(*index_in_out, 0);
60 // TODO(jgruber): Sort capture_name_map and do binary search via
61 // internalized strings.
62
63 int maybe_capture_index = -1;
64 const int named_capture_count = capture_name_map->length() >> 1;
65 DCHECK_LE(*index_in_out, named_capture_count);
66 for (int j = *index_in_out; j < named_capture_count; j++) {
67 // The format of {capture_name_map} is documented at
68 // JSRegExp::kIrregexpCaptureNameMapIndex.
69 const int name_ix = j * 2;
70 const int index_ix = j * 2 + 1;
71
72 Tagged<String> capture_name = Cast<String>(capture_name_map->get(name_ix));
73 if (!name_matches(capture_name)) continue;
74
75 maybe_capture_index = Smi::ToInt(capture_name_map->get(index_ix));
76 *index_in_out = j + 1;
77 break;
78 }
79
80 return maybe_capture_index;
81}
82
83} // namespace
84
86 public:
87 explicit CompiledReplacement(Isolate* isolate)
88 : replacement_substrings_(isolate) {}
89
90 // Return whether the replacement is simple.
91 bool Compile(Isolate* isolate, DirectHandle<JSRegExp> regexp,
92 DirectHandle<RegExpData> regexp_data,
93 DirectHandle<String> replacement, int capture_count,
94 int subject_length);
95
96 // Use Apply only if Compile returned false.
97 void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
98 int32_t* match);
99
100 // Number of distinct parts of the replacement pattern.
101 int parts() { return static_cast<int>(parts_.size()); }
102
103 private:
113
117 }
118 static inline ReplacementPart SubjectCapture(int capture_index) {
119 return ReplacementPart(SUBJECT_CAPTURE, capture_index);
120 }
123 }
124 static inline ReplacementPart SubjectSuffix(int subject_length) {
125 return ReplacementPart(SUBJECT_SUFFIX, subject_length);
126 }
132 }
133 static inline ReplacementPart ReplacementSubString(int from, int to) {
134 DCHECK_LE(0, from);
135 DCHECK_GT(to, from);
136 return ReplacementPart(-from, to);
137 }
138
139 // If tag <= 0 then it is the negation of a start index of a substring of
140 // the replacement pattern, otherwise it's a value from PartType.
141 ReplacementPart(int tag, int data) : tag(tag), data(data) {
142 // Must be non-positive or a PartType value.
144 }
145 // Either a value of PartType or a non-positive number that is
146 // the negation of an index into the replacement string.
147 int tag;
148 // The data value's interpretation depends on the value of tag:
149 // tag == SUBJECT_PREFIX ||
150 // tag == SUBJECT_SUFFIX: data is unused.
151 // tag == SUBJECT_CAPTURE: data is the number of the capture.
152 // tag == REPLACEMENT_SUBSTRING ||
153 // tag == REPLACEMENT_STRING: data is index into array of substrings
154 // of the replacement string.
155 // tag == EMPTY_REPLACEMENT: data is unused.
156 // tag <= 0: Temporary representation of the substring of the replacement
157 // string ranging over -tag .. data.
158 // Is replaced by REPLACEMENT_{SUB,}STRING when we create the
159 // substring objects.
160 int data;
161 };
162
163 template <typename Char>
166 int capture_count, int subject_length) {
167 // Equivalent to String::GetSubstitution, except that this method converts
168 // the replacement string into an internal representation that avoids
169 // repeated parsing when used repeatedly.
170 int length = characters.length();
171 int last = 0;
172 for (int i = 0; i < length; i++) {
173 Char c = characters[i];
174 if (c == '$') {
175 int next_index = i + 1;
176 if (next_index == length) { // No next character!
177 break;
178 }
179 Char c2 = characters[next_index];
180 switch (c2) {
181 case '$':
182 if (i > last) {
183 // There is a substring before. Include the first "$".
184 parts_.emplace_back(
185 ReplacementPart::ReplacementSubString(last, next_index));
186 last = next_index + 1; // Continue after the second "$".
187 } else {
188 // Let the next substring start with the second "$".
189 last = next_index;
190 }
191 i = next_index;
192 break;
193 case '`':
194 if (i > last) {
195 parts_.emplace_back(
197 }
199 i = next_index;
200 last = i + 1;
201 break;
202 case '\'':
203 if (i > last) {
204 parts_.emplace_back(
206 }
207 parts_.emplace_back(ReplacementPart::SubjectSuffix(subject_length));
208 i = next_index;
209 last = i + 1;
210 break;
211 case '&':
212 if (i > last) {
213 parts_.emplace_back(
215 }
217 i = next_index;
218 last = i + 1;
219 break;
220 case '0':
221 case '1':
222 case '2':
223 case '3':
224 case '4':
225 case '5':
226 case '6':
227 case '7':
228 case '8':
229 case '9': {
230 int capture_ref = c2 - '0';
231 if (capture_ref > capture_count) {
232 i = next_index;
233 continue;
234 }
235 int second_digit_index = next_index + 1;
236 if (second_digit_index < length) {
237 // Peek ahead to see if we have two digits.
238 Char c3 = characters[second_digit_index];
239 if ('0' <= c3 && c3 <= '9') { // Double digits.
240 int double_digit_ref = capture_ref * 10 + c3 - '0';
241 if (double_digit_ref <= capture_count) {
242 next_index = second_digit_index;
243 capture_ref = double_digit_ref;
244 }
245 }
246 }
247 if (capture_ref > 0) {
248 if (i > last) {
249 parts_.emplace_back(
251 }
252 DCHECK(capture_ref <= capture_count);
253 parts_.emplace_back(ReplacementPart::SubjectCapture(capture_ref));
254 last = next_index + 1;
255 }
256 i = next_index;
257 break;
258 }
259 case '<': {
260 if (capture_name_map.is_null()) {
261 i = next_index;
262 break;
263 }
264
265 // Scan until the next '>', and let the enclosed substring be the
266 // groupName.
267
268 const int name_start_index = next_index + 1;
269 int closing_bracket_index = -1;
270 for (int j = name_start_index; j < length; j++) {
271 if (characters[j] == '>') {
272 closing_bracket_index = j;
273 break;
274 }
275 }
276
277 // If no closing bracket is found, '$<' is treated as a string
278 // literal.
279 if (closing_bracket_index == -1) {
280 i = next_index;
281 break;
282 }
283
284 if (i > last) {
285 parts_.emplace_back(
287 }
288
289 base::Vector<Char> requested_name =
290 characters.SubVector(name_start_index, closing_bracket_index);
291
292 // If capture is undefined or does not exist, replace the text
293 // through the following '>' with the empty string.
294 // Otherwise, replace the text through the following '>' with
295 // ? ToString(capture).
296 // For duplicated capture group names we don't know which of them
297 // matches at this point in time, so we create a separate
298 // replacement for each possible match. When applying the
299 // replacement unmatched groups will be skipped.
300
301 int capture_index = 0;
302 int capture_name_map_index = 0;
303 while (capture_index != -1) {
304 capture_index = LookupNamedCapture(
305 [=](Tagged<String> capture_name) {
306 return capture_name->IsEqualTo(requested_name);
307 },
308 capture_name_map, &capture_name_map_index);
309 DCHECK(capture_index == -1 ||
310 (1 <= capture_index && capture_index <= capture_count));
311
312 parts_.emplace_back(
313 capture_index == -1
315 : ReplacementPart::SubjectCapture(capture_index));
316 }
317
318 last = closing_bracket_index + 1;
319 i = closing_bracket_index;
320 break;
321 }
322 default:
323 i = next_index;
324 break;
325 }
326 }
327 }
328 if (length > last) {
329 if (last == 0) {
330 // Replacement is simple. Do not use Apply to do the replacement.
331 return true;
332 } else {
333 parts_.emplace_back(
335 }
336 }
337 return false;
338 }
339
342};
343
346 DirectHandle<RegExpData> regexp_data,
347 DirectHandle<String> replacement,
348 int capture_count, int subject_length) {
349 {
351 String::FlatContent content = replacement->GetFlatContent(no_gc);
352 DCHECK(content.IsFlat());
353
355 if (capture_count > 0) {
356 // capture_count > 0 implies IrRegExpData. Since capture_count is in
357 // trusted space, this is not a SBXCHECK.
358 DCHECK(Is<IrRegExpData>(*regexp_data));
359 Tagged<IrRegExpData> re_data = Cast<IrRegExpData>(*regexp_data);
360
361 Tagged<Object> maybe_capture_name_map = re_data->capture_name_map();
362 if (IsFixedArray(maybe_capture_name_map)) {
363 capture_name_map = Cast<FixedArray>(maybe_capture_name_map);
364 }
365 }
366
367 bool simple;
368 if (content.IsOneByte()) {
369 simple =
371 capture_count, subject_length);
372 } else {
373 DCHECK(content.IsTwoByte());
375 capture_count, subject_length);
376 }
377 if (simple) return true;
378 }
379
380 // Find substrings of replacement string and create them as String objects.
382 int substring_index = 0;
383 for (ReplacementPart& part : parts_) {
384 int tag = part.tag;
385 if (tag <= 0) { // A replacement string slice.
386 int from = -tag;
387 int to = part.data;
388 replacement_substrings_.emplace_back(
389 isolate->factory()->NewSubString(replacement, from, to));
390 part.tag = REPLACEMENT_SUBSTRING;
391 part.data = substring_index;
392 substring_index++;
393 } else if (tag == REPLACEMENT_STRING) {
394 replacement_substrings_.emplace_back(replacement);
395 part.data = substring_index;
396 substring_index++;
397 }
398 }
399 return false;
400}
401
403 int match_from, int match_to, int32_t* match) {
404 DCHECK_LT(0, parts_.size());
405 for (ReplacementPart& part : parts_) {
406 switch (part.tag) {
407 case SUBJECT_PREFIX:
408 if (match_from > 0) builder->AddSubjectSlice(0, match_from);
409 break;
410 case SUBJECT_SUFFIX: {
411 int subject_length = part.data;
412 if (match_to < subject_length) {
413 builder->AddSubjectSlice(match_to, subject_length);
414 }
415 break;
416 }
417 case SUBJECT_CAPTURE: {
418 int capture = part.data;
419 int from = match[capture * 2];
420 int to = match[capture * 2 + 1];
421 if (from >= 0 && to > from) {
422 builder->AddSubjectSlice(from, to);
423 }
424 break;
425 }
428 builder->AddString(replacement_substrings_[part.data]);
429 break;
431 break;
432 default:
433 UNREACHABLE();
434 }
435 }
436}
437
439 uint8_t pattern, std::vector<int>* indices,
440 unsigned int limit) {
441 DCHECK_LT(0, limit);
442 // Collect indices of pattern in subject using memchr.
443 // Stop after finding at most limit values.
444 const uint8_t* subject_start = subject.begin();
445 const uint8_t* subject_end = subject_start + subject.length();
446 const uint8_t* pos = subject_start;
447 while (limit > 0) {
448 pos = reinterpret_cast<const uint8_t*>(
449 memchr(pos, pattern, subject_end - pos));
450 if (pos == nullptr) return;
451 indices->push_back(static_cast<int>(pos - subject_start));
452 pos++;
453 limit--;
454 }
455}
456
458 base::uc16 pattern, std::vector<int>* indices,
459 unsigned int limit) {
460 DCHECK_LT(0, limit);
461 const base::uc16* subject_start = subject.begin();
462 const base::uc16* subject_end = subject_start + subject.length();
463 for (const base::uc16* pos = subject_start; pos < subject_end && limit > 0;
464 pos++) {
465 if (*pos == pattern) {
466 indices->push_back(static_cast<int>(pos - subject_start));
467 limit--;
468 }
469 }
470}
471
472template <typename SubjectChar, typename PatternChar>
476 std::vector<int>* indices, unsigned int limit) {
477 DCHECK_LT(0, limit);
478 // Collect indices of pattern in subject.
479 // Stop after finding at most limit values.
480 int pattern_length = pattern.length();
481 int index = 0;
483 while (limit > 0) {
484 index = search.Search(subject, index);
485 if (index < 0) return;
486 indices->push_back(index);
487 index += pattern_length;
488 limit--;
489 }
490}
491
494 std::vector<int>* indices, unsigned int limit) {
495 {
497 String::FlatContent subject_content = subject->GetFlatContent(no_gc);
498 String::FlatContent pattern_content = pattern->GetFlatContent(no_gc);
499 DCHECK(subject_content.IsFlat());
500 DCHECK(pattern_content.IsFlat());
501 if (subject_content.IsOneByte()) {
502 base::Vector<const uint8_t> subject_vector =
503 subject_content.ToOneByteVector();
504 if (pattern_content.IsOneByte()) {
505 base::Vector<const uint8_t> pattern_vector =
506 pattern_content.ToOneByteVector();
507 if (pattern_vector.length() == 1) {
508 FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
509 limit);
510 } else {
511 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
512 limit);
513 }
514 } else {
515 FindStringIndices(isolate, subject_vector,
516 pattern_content.ToUC16Vector(), indices, limit);
517 }
518 } else {
519 base::Vector<const base::uc16> subject_vector =
520 subject_content.ToUC16Vector();
521 if (pattern_content.IsOneByte()) {
522 base::Vector<const uint8_t> pattern_vector =
523 pattern_content.ToOneByteVector();
524 if (pattern_vector.length() == 1) {
525 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
526 limit);
527 } else {
528 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
529 limit);
530 }
531 } else {
532 base::Vector<const base::uc16> pattern_vector =
533 pattern_content.ToUC16Vector();
534 if (pattern_vector.length() == 1) {
535 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
536 limit);
537 } else {
538 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
539 limit);
540 }
541 }
542 }
543 }
544}
545
546namespace {
547std::vector<int>* GetRewoundRegexpIndicesList(Isolate* isolate) {
548 std::vector<int>* list = isolate->regexp_indices();
549 list->clear();
550 return list;
551}
552
553void TruncateRegexpIndicesList(Isolate* isolate) {
554 // Same size as smallest zone segment, preserving behavior from the
555 // runtime zone.
556 // TODO(jgruber): Consider removing the reusable regexp_indices list and
557 // simply allocating a new list each time. It feels like we're needlessly
558 // optimizing an edge case.
559 static const int kMaxRegexpIndicesListCapacity = 8 * KB / kIntSize;
560 std::vector<int>* indices = isolate->regexp_indices();
561 if (indices->capacity() > kMaxRegexpIndicesListCapacity) {
562 // Throw away backing storage.
563 indices->clear();
564 indices->shrink_to_fit();
565 }
566}
567} // namespace
568
569template <typename ResultSeqString>
572 Isolate* isolate, DirectHandle<String> subject,
573 DirectHandle<JSRegExp> pattern_regexp, DirectHandle<String> replacement,
574 DirectHandle<RegExpMatchInfo> last_match_info,
575 DirectHandle<AtomRegExpData> regexp_data) {
576 DCHECK(subject->IsFlat());
577 DCHECK(replacement->IsFlat());
578
579 std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
580
581 Tagged<String> pattern = regexp_data->pattern();
582 int subject_len = subject->length();
583 int pattern_len = pattern->length();
584 int replacement_len = replacement->length();
585
586 FindStringIndicesDispatch(isolate, *subject, pattern, indices, 0xFFFFFFFF);
587
588 if (indices->empty()) return *subject;
589
590 // Detect integer overflow.
591 int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
592 static_cast<int64_t>(pattern_len)) *
593 static_cast<int64_t>(indices->size()) +
594 static_cast<int64_t>(subject_len);
595 int result_len;
596 if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
597 static_assert(String::kMaxLength < kMaxInt);
598 result_len = kMaxInt; // Provoke exception.
599 } else {
600 result_len = static_cast<int>(result_len_64);
601 }
602 if (result_len == 0) {
603 return ReadOnlyRoots(isolate).empty_string();
604 }
605
606 int subject_pos = 0;
607 int result_pos = 0;
608
610 if (ResultSeqString::kHasOneByteEncoding) {
611 maybe_res = isolate->factory()->NewRawOneByteString(result_len);
612 } else {
613 maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
614 }
615 DirectHandle<SeqString> untyped_res;
616 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
618
620 for (int index : *indices) {
621 // Copy non-matched subject content.
622 if (subject_pos < index) {
623 String::WriteToFlat(*subject, result->GetChars(no_gc) + result_pos,
624 subject_pos, index - subject_pos);
625 result_pos += index - subject_pos;
626 }
627
628 // Replace match.
629 if (replacement_len > 0) {
630 String::WriteToFlat(*replacement, result->GetChars(no_gc) + result_pos, 0,
631 replacement_len);
632 result_pos += replacement_len;
633 }
634
635 subject_pos = index + pattern_len;
636 }
637 // Add remaining subject content at the end.
638 if (subject_pos < subject_len) {
639 String::WriteToFlat(*subject, result->GetChars(no_gc) + result_pos,
640 subject_pos, subject_len - subject_pos);
641 }
642
643 int32_t match_indices[] = {indices->back(), indices->back() + pattern_len};
644 RegExp::SetLastMatchInfo(isolate, last_match_info, subject, 0, match_indices);
645
646 TruncateRegexpIndicesList(isolate);
647
648 return *result;
649}
650
652 Isolate* isolate, DirectHandle<String> subject,
654 DirectHandle<String> replacement,
655 DirectHandle<RegExpMatchInfo> last_match_info) {
656 DCHECK(subject->IsFlat());
657 DCHECK(replacement->IsFlat());
658
659 int capture_count = regexp_data->capture_count();
660 int subject_length = subject->length();
661
662 // Ensure the RegExp is compiled so we can access the capture-name map.
663 if (!RegExp::EnsureFullyCompiled(isolate, regexp_data, subject)) {
664 return ReadOnlyRoots(isolate).exception();
665 }
666
667 CompiledReplacement compiled_replacement(isolate);
668 const bool simple_replace = compiled_replacement.Compile(
669 isolate, regexp, regexp_data, replacement, capture_count, subject_length);
670
671 // Shortcut for simple non-regexp global replacements.
672 if (regexp_data->type_tag() == RegExpData::Type::ATOM && simple_replace) {
673 if (subject->IsOneByteRepresentation() &&
674 replacement->IsOneByteRepresentation()) {
676 isolate, subject, regexp, replacement, last_match_info,
677 Cast<AtomRegExpData>(regexp_data));
678 } else {
680 isolate, subject, regexp, replacement, last_match_info,
681 Cast<AtomRegExpData>(regexp_data));
682 }
683 }
684
685 RegExpGlobalExecRunner runner(direct_handle(*regexp_data, isolate), subject,
686 isolate);
687 if (runner.HasException()) return ReadOnlyRoots(isolate).exception();
688
689 int32_t* current_match = runner.FetchNext();
690 if (current_match == nullptr) {
691 if (runner.HasException()) return ReadOnlyRoots(isolate).exception();
692 return *subject;
693 }
694
695 // Guessing the number of parts that the final result string is built
696 // from. Global regexps can match any number of times, so we guess
697 // conservatively.
698 int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
699 // TODO(v8:12843): improve the situation where the expected_parts exceeds
700 // the maximum size of the backing store.
701 ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);
702
703 int prev = 0;
704
705 do {
706 int start = current_match[0];
707 int end = current_match[1];
708
709 if (prev < start) {
710 builder.AddSubjectSlice(prev, start);
711 }
712
713 if (simple_replace) {
714 builder.AddString(replacement);
715 } else {
716 compiled_replacement.Apply(&builder, start, end, current_match);
717 }
718 prev = end;
719
720 current_match = runner.FetchNext();
721 } while (current_match != nullptr);
722
723 if (runner.HasException()) return ReadOnlyRoots(isolate).exception();
724
725 if (prev < subject_length) {
726 builder.AddSubjectSlice(prev, subject_length);
727 }
728
729 RegExp::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
730 runner.LastSuccessfulMatch());
731
732 RETURN_RESULT_OR_FAILURE(isolate, builder.ToString());
733}
734
735template <typename ResultSeqString>
738 Isolate* isolate, DirectHandle<String> subject,
740 DirectHandle<RegExpMatchInfo> last_match_info) {
741 DCHECK(subject->IsFlat());
742
743 // Shortcut for simple non-regexp global replacements.
744 if (regexp_data->type_tag() == RegExpData::Type::ATOM) {
745 DirectHandle<String> empty_string = isolate->factory()->empty_string();
746 if (subject->IsOneByteRepresentation()) {
748 isolate, subject, regexp, empty_string, last_match_info,
749 Cast<AtomRegExpData>(regexp_data));
750 } else {
752 isolate, subject, regexp, empty_string, last_match_info,
753 Cast<AtomRegExpData>(regexp_data));
754 }
755 }
756
757 RegExpGlobalExecRunner runner(direct_handle(*regexp_data, isolate), subject,
758 isolate);
759 if (runner.HasException()) return ReadOnlyRoots(isolate).exception();
760
761 int32_t* current_match = runner.FetchNext();
762 if (current_match == nullptr) {
763 if (runner.HasException()) return ReadOnlyRoots(isolate).exception();
764 return *subject;
765 }
766
767 int start = current_match[0];
768 int end = current_match[1];
769 int capture_count = regexp_data->capture_count();
770 int subject_length = subject->length();
771
772 int new_length = subject_length - (end - start);
773 if (new_length == 0) return ReadOnlyRoots(isolate).empty_string();
774
776 if (ResultSeqString::kHasOneByteEncoding) {
777 answer = Cast<ResultSeqString>(
778 isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
779 } else {
780 answer = Cast<ResultSeqString>(
781 isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
782 }
783
784 int prev = 0;
785 int position = 0;
786
788 do {
789 start = current_match[0];
790 end = current_match[1];
791 if (prev < start) {
792 // Add substring subject[prev;start] to answer string.
793 String::WriteToFlat(*subject, answer->GetChars(no_gc) + position, prev,
794 start - prev);
795 position += start - prev;
796 }
797 prev = end;
798
799 current_match = runner.FetchNext();
800 } while (current_match != nullptr);
801
802 if (runner.HasException()) return ReadOnlyRoots(isolate).exception();
803
804 RegExp::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
805 runner.LastSuccessfulMatch());
806
807 if (prev < subject_length) {
808 // Add substring subject[prev;length] to answer string.
809 String::WriteToFlat(*subject, answer->GetChars(no_gc) + position, prev,
810 subject_length - prev);
811 position += subject_length - prev;
812 }
813
814 if (position == 0) return ReadOnlyRoots(isolate).empty_string();
815
816 // Shorten string and fill
817 int string_size = ResultSeqString::SizeFor(position);
818 int allocated_string_size = ResultSeqString::SizeFor(new_length);
819 int delta = allocated_string_size - string_size;
820
821 answer->set_length(position);
822 if (delta == 0) return *answer;
823
824 Address end_of_string = answer->address() + string_size;
825 Heap* heap = isolate->heap();
826
827 // The trimming is performed on a newly allocated object, which is on a
828 // freshly allocated page or on an already swept page. Hence, the sweeper
829 // thread can not get confused with the filler creation. No synchronization
830 // needed.
831 // TODO(hpayer): We should shrink the large object page if the size
832 // of the object changed significantly.
833 if (!heap->IsLargeObject(*answer)) {
834 heap->CreateFillerObjectAt(end_of_string, delta);
835 }
836 return *answer;
837}
838
839RUNTIME_FUNCTION(Runtime_StringSplit) {
840 HandleScope handle_scope(isolate);
841 DCHECK_EQ(3, args.length());
842 DirectHandle<String> subject = args.at<String>(0);
844 uint32_t limit = NumberToUint32(args[2]);
845 CHECK_LT(0, limit);
846
847 int subject_length = subject->length();
848 int pattern_length = pattern->length();
849 CHECK_LT(0, pattern_length);
850
851 if (limit == 0xFFFFFFFFu) {
852 Tagged<FixedArray> last_match_cache_unused;
853 DirectHandle<Object> cached_answer(
854 RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
855 &last_match_cache_unused,
857 isolate);
858 if (*cached_answer != Smi::zero()) {
859 // The cache FixedArray is a COW-array and can therefore be reused.
860 DirectHandle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
861 Cast<FixedArray>(cached_answer));
862 return *result;
863 }
864 }
865
866 // The limit can be very large (0xFFFFFFFFu), but since the pattern
867 // isn't empty, we can never create more parts than ~half the length
868 // of the subject.
869
870 subject = String::Flatten(isolate, subject);
871 pattern = String::Flatten(isolate, pattern);
872
873 std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
874
875 FindStringIndicesDispatch(isolate, *subject, *pattern, indices, limit);
876
877 if (static_cast<uint32_t>(indices->size()) < limit) {
878 indices->push_back(subject_length);
879 }
880
881 // The list indices now contains the end of each part to create.
882
883 // Create JSArray of substrings separated by separator.
884 int part_count = static_cast<int>(indices->size());
885
886 DirectHandle<JSArray> result = isolate->factory()->NewJSArray(
887 PACKED_ELEMENTS, part_count, part_count,
889
890 DCHECK(result->HasObjectElements());
891
893 isolate);
894
895 if (part_count == 1 && indices->at(0) == subject_length) {
896 elements->set(0, *subject);
897 } else {
898 int part_start = 0;
899 FOR_WITH_HANDLE_SCOPE(isolate, int, i = 0, i, i < part_count, i++, {
900 int part_end = indices->at(i);
901 DirectHandle<String> substring =
902 isolate->factory()->NewProperSubString(subject, part_start, part_end);
903 elements->set(i, *substring);
904 part_start = part_end + pattern_length;
905 });
906 }
907
908 if (limit == 0xFFFFFFFFu) {
909 if (result->HasObjectElements()) {
910 RegExpResultsCache::Enter(isolate, subject, pattern, elements,
911 isolate->factory()->empty_fixed_array(),
913 }
914 }
915
916 TruncateRegexpIndicesList(isolate);
917
918 return *result;
919}
920
921namespace {
922
923std::optional<int> RegExpExec(Isolate* isolate, DirectHandle<JSRegExp> regexp,
924 DirectHandle<String> subject, int32_t index,
925 int32_t* result_offsets_vector,
926 uint32_t result_offsets_vector_length) {
927 // Due to the way the JS calls are constructed this must be less than the
928 // length of a string, i.e. it is always a Smi. We check anyway for security.
929 CHECK_LE(0, index);
930 CHECK_GE(subject->length(), index);
931 isolate->counters()->regexp_entry_runtime()->Increment();
932 return RegExp::Exec(isolate, regexp, subject, index, result_offsets_vector,
933 result_offsets_vector_length);
934}
935
936std::optional<int> ExperimentalOneshotExec(
937 Isolate* isolate, DirectHandle<JSRegExp> regexp,
938 DirectHandle<String> subject, int32_t index, int32_t* result_offsets_vector,
939 uint32_t result_offsets_vector_length) {
940 CHECK_GE(result_offsets_vector_length,
942 regexp->data(isolate)->capture_count()));
943 // Due to the way the JS calls are constructed this must be less than the
944 // length of a string, i.e. it is always a Smi. We check anyway for security.
945 CHECK_LE(0, index);
946 CHECK_GE(subject->length(), index);
947 isolate->counters()->regexp_entry_runtime()->Increment();
948 return RegExp::ExperimentalOneshotExec(isolate, regexp, subject, index,
949 result_offsets_vector,
950 result_offsets_vector_length);
951}
952
953} // namespace
954
955RUNTIME_FUNCTION(Runtime_RegExpExec) {
956 HandleScope scope(isolate);
957 DCHECK_EQ(4, args.length());
958 DirectHandle<JSRegExp> regexp = args.at<JSRegExp>(0);
959 DirectHandle<String> subject = args.at<String>(1);
960 int32_t index = 0;
961 CHECK(Object::ToInt32(args[2], &index));
962 uint32_t result_offsets_vector_length = 0;
963 CHECK(Object::ToUint32(args[3], &result_offsets_vector_length));
964
965 // This untagged arg must be passed as an implicit arg.
966 int32_t* result_offsets_vector = reinterpret_cast<int32_t*>(
967 isolate->isolate_data()->regexp_exec_vector_argument());
968 DCHECK_NOT_NULL(result_offsets_vector);
969
970 std::optional<int> result =
971 RegExpExec(isolate, regexp, subject, index, result_offsets_vector,
972 result_offsets_vector_length);
973 DCHECK_EQ(!result, isolate->has_exception());
974 if (!result) return ReadOnlyRoots(isolate).exception();
975 return Smi::FromInt(result.value());
976}
977
978RUNTIME_FUNCTION(Runtime_RegExpGrowRegExpMatchInfo) {
979 HandleScope scope(isolate);
980 DCHECK_EQ(2, args.length());
982 int32_t register_count;
983 CHECK(Object::ToInt32(args[1], &register_count));
984
985 // We never pass anything besides the global last_match_info.
986 DCHECK_EQ(*match_info, *isolate->regexp_last_match_info());
987
989 isolate, match_info, JSRegExp::CaptureCountForRegisters(register_count));
990 if (*result != *match_info) {
991 isolate->native_context()->set_regexp_last_match_info(*result);
992 }
993
994 return *result;
995}
996
997RUNTIME_FUNCTION(Runtime_RegExpExperimentalOneshotExec) {
998 HandleScope scope(isolate);
999 DCHECK_EQ(4, args.length());
1000 DirectHandle<JSRegExp> regexp = args.at<JSRegExp>(0);
1001 DirectHandle<String> subject = args.at<String>(1);
1002 int32_t index = 0;
1003 CHECK(Object::ToInt32(args[2], &index));
1004 uint32_t result_offsets_vector_length = 0;
1005 CHECK(Object::ToUint32(args[3], &result_offsets_vector_length));
1006
1007 // This untagged arg must be passed as an implicit arg.
1008 int32_t* result_offsets_vector = reinterpret_cast<int32_t*>(
1009 isolate->isolate_data()->regexp_exec_vector_argument());
1010 DCHECK_NOT_NULL(result_offsets_vector);
1011
1012 std::optional<int> result = ExperimentalOneshotExec(
1013 isolate, regexp, subject, index, result_offsets_vector,
1014 result_offsets_vector_length);
1015 DCHECK_EQ(!result, isolate->has_exception());
1016 if (!result) return ReadOnlyRoots(isolate).exception();
1017 return Smi::FromInt(result.value());
1018}
1019
1020RUNTIME_FUNCTION(Runtime_RegExpBuildIndices) {
1021 HandleScope scope(isolate);
1022 DCHECK_EQ(3, args.length());
1024 DirectHandle<Object> maybe_names = args.at(2);
1025#ifdef DEBUG
1026 DirectHandle<JSRegExp> regexp = args.at<JSRegExp>(0);
1027 DCHECK(regexp->flags() & JSRegExp::kHasIndices);
1028#endif
1029
1030 return *JSRegExpResultIndices::BuildIndices(isolate, match_info, maybe_names);
1031}
1032
1033namespace {
1034
1035class MatchInfoBackedMatch : public String::Match {
1036 public:
1037 MatchInfoBackedMatch(Isolate* isolate, DirectHandle<JSRegExp> regexp,
1038 DirectHandle<RegExpData> regexp_data,
1039 DirectHandle<String> subject,
1040 DirectHandle<RegExpMatchInfo> match_info)
1041 : isolate_(isolate), match_info_(match_info) {
1042 subject_ = String::Flatten(isolate, subject);
1043
1044 if (RegExpData::TypeSupportsCaptures(regexp_data->type_tag())) {
1045 DCHECK(Is<IrRegExpData>(*regexp_data));
1046 Tagged<Object> o = Cast<IrRegExpData>(regexp_data)->capture_name_map();
1047 has_named_captures_ = IsFixedArray(o);
1048 if (has_named_captures_) {
1050 }
1051 } else {
1052 has_named_captures_ = false;
1053 }
1054 }
1055
1056 DirectHandle<String> GetMatch() override {
1057 return RegExpUtils::GenericCaptureGetter(isolate_, match_info_, 0, nullptr);
1058 }
1059
1060 DirectHandle<String> GetPrefix() override {
1061 const int match_start = match_info_->capture(0);
1062 return isolate_->factory()->NewSubString(subject_, 0, match_start);
1063 }
1064
1065 DirectHandle<String> GetSuffix() override {
1066 const int match_end = match_info_->capture(1);
1067 return isolate_->factory()->NewSubString(subject_, match_end,
1068 subject_->length());
1069 }
1070
1071 bool HasNamedCaptures() override { return has_named_captures_; }
1072
1073 int CaptureCount() override {
1074 return match_info_->number_of_capture_registers() / 2;
1075 }
1076
1077 MaybeDirectHandle<String> GetCapture(int i, bool* capture_exists) override {
1078 DirectHandle<Object> capture_obj = RegExpUtils::GenericCaptureGetter(
1079 isolate_, match_info_, i, capture_exists);
1080 return (*capture_exists) ? Object::ToString(isolate_, capture_obj)
1081 : isolate_->factory()->empty_string();
1082 }
1083
1084 MaybeDirectHandle<String> GetNamedCapture(DirectHandle<String> name,
1085 CaptureState* state) override {
1086 DCHECK(has_named_captures_);
1087 int capture_index = 0;
1088 int capture_name_map_index = 0;
1089 while (true) {
1090 capture_index = LookupNamedCapture(
1091 [=](Tagged<String> capture_name) {
1092 return capture_name->Equals(*name);
1093 },
1094 *capture_name_map_, &capture_name_map_index);
1095 if (capture_index == -1) {
1096 *state = UNMATCHED;
1097 return isolate_->factory()->empty_string();
1098 }
1099 if (RegExpUtils::IsMatchedCapture(*match_info_, capture_index)) {
1100 DirectHandle<String> capture_value;
1102 isolate_, capture_value,
1103 Object::ToString(isolate_,
1105 isolate_, match_info_, capture_index)));
1106 *state = MATCHED;
1107 return capture_value;
1108 }
1109 }
1110 }
1111
1112 private:
1113 Isolate* isolate_;
1114 DirectHandle<String> subject_;
1115 DirectHandle<RegExpMatchInfo> match_info_;
1116
1118 DirectHandle<FixedArray> capture_name_map_;
1119};
1120
1121class VectorBackedMatch : public String::Match {
1122 public:
1123 VectorBackedMatch(Isolate* isolate, DirectHandle<String> subject,
1124 DirectHandle<String> match, uint32_t match_position,
1125 base::Vector<DirectHandle<Object>> captures,
1126 DirectHandle<Object> groups_obj)
1127 : isolate_(isolate),
1128 match_(match),
1129 match_position_(match_position),
1130 captures_(captures) {
1131 subject_ = String::Flatten(isolate, subject);
1132
1133 DCHECK(IsUndefined(*groups_obj, isolate) || IsJSReceiver(*groups_obj));
1134 has_named_captures_ = !IsUndefined(*groups_obj, isolate);
1136 }
1137
1138 DirectHandle<String> GetMatch() override { return match_; }
1139
1140 DirectHandle<String> GetPrefix() override {
1141 // match_position_ and match_ are user-controlled, hence we manually clamp
1142 // the index here.
1143 uint32_t end = std::min(subject_->length(), match_position_);
1144 return isolate_->factory()->NewSubString(subject_, 0, end);
1145 }
1146
1147 DirectHandle<String> GetSuffix() override {
1148 // match_position_ and match_ are user-controlled, hence we manually clamp
1149 // the index here.
1150 uint32_t start =
1151 std::min(subject_->length(), match_position_ + match_->length());
1152 return isolate_->factory()->NewSubString(subject_, start,
1153 subject_->length());
1154 }
1155
1156 bool HasNamedCaptures() override { return has_named_captures_; }
1157
1158 int CaptureCount() override { return captures_.length(); }
1159
1160 MaybeDirectHandle<String> GetCapture(int i, bool* capture_exists) override {
1161 DirectHandle<Object> capture_obj = captures_[i];
1162 if (IsUndefined(*capture_obj, isolate_)) {
1163 *capture_exists = false;
1164 return isolate_->factory()->empty_string();
1165 }
1166 *capture_exists = true;
1167 return Object::ToString(isolate_, capture_obj);
1168 }
1169
1170 MaybeDirectHandle<String> GetNamedCapture(DirectHandle<String> name,
1171 CaptureState* state) override {
1172 DCHECK(has_named_captures_);
1173
1174 // Strings representing integer indices are not valid identifiers (and
1175 // therefore not valid capture names).
1176 {
1177 size_t unused;
1178 if (name->AsIntegerIndex(&unused)) {
1179 *state = UNMATCHED;
1180 return isolate_->factory()->empty_string();
1181 }
1182 }
1183 DirectHandle<Object> capture_obj;
1185 isolate_, capture_obj,
1186 Object::GetProperty(isolate_, groups_obj_, name));
1187 if (IsUndefined(*capture_obj, isolate_)) {
1188 *state = UNMATCHED;
1189 return isolate_->factory()->empty_string();
1190 } else {
1191 *state = MATCHED;
1192 return Object::ToString(isolate_, capture_obj);
1193 }
1194 }
1195
1196 private:
1197 Isolate* isolate_;
1198 DirectHandle<String> subject_;
1199 DirectHandle<String> match_;
1200 const uint32_t match_position_;
1201 base::Vector<DirectHandle<Object>> captures_;
1202
1204 DirectHandle<JSReceiver> groups_obj_;
1205};
1206
1207// Create the groups object (see also the RegExp result creation in
1208// RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo).
1209// TODO(42203211): We cannot simply pass a std::function here, as the closure
1210// may contain direct handles and they cannot be stored off-stack.
1211template <typename FunctionType,
1212 typename = std::enable_if_t<std::is_function_v<Tagged<Object>(int)>>>
1213DirectHandle<JSObject> ConstructNamedCaptureGroupsObject(
1214 Isolate* isolate, DirectHandle<FixedArray> capture_map,
1215 const FunctionType& f_get_capture) {
1216 DirectHandle<JSObject> groups =
1217 isolate->factory()->NewJSObjectWithNullProto();
1218
1219 const int named_capture_count = capture_map->length() >> 1;
1220 for (int i = 0; i < named_capture_count; i++) {
1221 const int name_ix = i * 2;
1222 const int index_ix = i * 2 + 1;
1223
1224 DirectHandle<String> capture_name(Cast<String>(capture_map->get(name_ix)),
1225 isolate);
1226 const int capture_ix = Smi::ToInt(capture_map->get(index_ix));
1227 DCHECK_GE(capture_ix, 1); // Explicit groups start at index 1.
1228
1229 DirectHandle<Object> capture_value(f_get_capture(capture_ix), isolate);
1230 DCHECK(IsUndefined(*capture_value, isolate) || IsString(*capture_value));
1231
1232 LookupIterator it(isolate, groups, capture_name, groups,
1234 if (it.IsFound()) {
1235 DCHECK(v8_flags.js_regexp_duplicate_named_groups);
1236 if (!IsUndefined(*capture_value, isolate)) {
1237 DCHECK(IsUndefined(*it.GetDataValue(), isolate));
1238 CHECK(Object::SetDataProperty(&it, capture_value).ToChecked());
1239 }
1240 } else {
1241 CHECK(Object::AddDataProperty(&it, capture_value, NONE,
1244 .IsJust());
1245 }
1246 }
1247
1248 return groups;
1249}
1250
1251// Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
1252// separate last match info. See comment on that function.
1253template <bool has_capture>
1254static Tagged<Object> SearchRegExpMultiple(
1255 Isolate* isolate, DirectHandle<String> subject,
1256 DirectHandle<JSRegExp> regexp, DirectHandle<RegExpData> regexp_data,
1257 DirectHandle<RegExpMatchInfo> last_match_array) {
1258 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1259 DCHECK_NE(has_capture, regexp_data->capture_count() == 0);
1260 DCHECK_IMPLIES(has_capture, Is<IrRegExpData>(*regexp_data));
1261 DCHECK(subject->IsFlat());
1262
1263 // Force tier up to native code for global replaces. The global replace is
1264 // implemented differently for native code and bytecode execution, where the
1265 // native code expects an array to store all the matches, and the bytecode
1266 // matches one at a time, so it's easier to tier-up to native code from the
1267 // start.
1268 if (v8_flags.regexp_tier_up &&
1269 regexp_data->type_tag() == RegExpData::Type::IRREGEXP) {
1270 Cast<IrRegExpData>(regexp_data)->MarkTierUpForNextExec();
1271 if (v8_flags.trace_regexp_tier_up) {
1272 PrintF("Forcing tier-up of JSRegExp object %p in SearchRegExpMultiple\n",
1273 reinterpret_cast<void*>(regexp->ptr()));
1274 }
1275 }
1276
1277 int capture_count = regexp_data->capture_count();
1278 int subject_length = subject->length();
1279
1280 static const int kMinLengthToCache = 0x1000;
1281
1282 if (subject_length > kMinLengthToCache) {
1283 Tagged<FixedArray> last_match_cache;
1285 isolate->heap(), *subject, regexp_data->wrapper(), &last_match_cache,
1287 if (IsFixedArray(cached_answer)) {
1288 int capture_registers = JSRegExp::RegistersForCaptureCount(capture_count);
1289 std::unique_ptr<int32_t[]> last_match(new int32_t[capture_registers]);
1290 int32_t* raw_last_match = last_match.get();
1291 for (int i = 0; i < capture_registers; i++) {
1292 raw_last_match[i] = Smi::ToInt(last_match_cache->get(i));
1293 }
1294 DirectHandle<FixedArray> cached_fixed_array(
1295 Cast<FixedArray>(cached_answer), isolate);
1296 // The cache FixedArray is a COW-array and we need to return a copy.
1297 DirectHandle<FixedArray> copied_fixed_array =
1298 isolate->factory()->CopyFixedArrayWithMap(
1299 cached_fixed_array, isolate->factory()->fixed_array_map());
1300 RegExp::SetLastMatchInfo(isolate, last_match_array, subject,
1301 capture_count, raw_last_match);
1302 return *copied_fixed_array;
1303 }
1304 }
1305
1306 RegExpGlobalExecRunner runner(direct_handle(*regexp_data, isolate), subject,
1307 isolate);
1308 if (runner.HasException()) return ReadOnlyRoots(isolate).exception();
1309
1310 FixedArrayBuilder builder = FixedArrayBuilder::Lazy(isolate);
1311
1312 // Position to search from.
1313 int match_start = -1;
1314 int match_end = 0;
1315 bool first = true;
1316
1317 // Two smis before and after the match, for very long strings.
1318 static const int kMaxBuilderEntriesPerRegExpMatch = 5;
1319
1320 while (true) {
1321 int32_t* current_match = runner.FetchNext();
1322 if (current_match == nullptr) break;
1323 match_start = current_match[0];
1324 builder.EnsureCapacity(isolate, kMaxBuilderEntriesPerRegExpMatch);
1325 if (match_end < match_start) {
1327 match_start);
1328 }
1329 match_end = current_match[1];
1330 {
1331 // Avoid accumulating new handles inside loop.
1332 HandleScope temp_scope(isolate);
1333 DirectHandle<String> match;
1334 if (!first) {
1335 match = isolate->factory()->NewProperSubString(subject, match_start,
1336 match_end);
1337 } else {
1338 match =
1339 isolate->factory()->NewSubString(subject, match_start, match_end);
1340 first = false;
1341 }
1342
1343 if (has_capture) {
1344 // Arguments array to replace function is match, captures, index and
1345 // subject, i.e., 3 + capture count in total. If the RegExp contains
1346 // named captures, they are also passed as the last argument.
1347
1348 // has_capture can only be true for IrRegExp.
1349 Tagged<IrRegExpData> re_data = Cast<IrRegExpData>(*regexp_data);
1350 DirectHandle<Object> maybe_capture_map(re_data->capture_name_map(),
1351 isolate);
1352 const bool has_named_captures = IsFixedArray(*maybe_capture_map);
1353
1354 const int argc =
1355 has_named_captures ? 4 + capture_count : 3 + capture_count;
1356
1357 DirectHandle<FixedArray> elements =
1358 isolate->factory()->NewFixedArray(argc);
1359 int cursor = 0;
1360
1361 elements->set(cursor++, *match);
1362 for (int i = 1; i <= capture_count; i++) {
1363 int start = current_match[i * 2];
1364 if (start >= 0) {
1365 int end = current_match[i * 2 + 1];
1366 DCHECK(start <= end);
1367 DirectHandle<String> substring =
1368 isolate->factory()->NewSubString(subject, start, end);
1369 elements->set(cursor++, *substring);
1370 } else {
1371 DCHECK_GT(0, current_match[i * 2 + 1]);
1372 elements->set(cursor++, ReadOnlyRoots(isolate).undefined_value());
1373 }
1374 }
1375
1376 elements->set(cursor++, Smi::FromInt(match_start));
1377 elements->set(cursor++, *subject);
1378
1379 if (has_named_captures) {
1380 DirectHandle<FixedArray> capture_map =
1381 Cast<FixedArray>(maybe_capture_map);
1382 DirectHandle<JSObject> groups = ConstructNamedCaptureGroupsObject(
1383 isolate, capture_map, [=](int ix) { return elements->get(ix); });
1384 elements->set(cursor++, *groups);
1385 }
1386
1387 DCHECK_EQ(cursor, argc);
1388 builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
1389 } else {
1390 builder.Add(*match);
1391 }
1392 }
1393 }
1394
1395 if (runner.HasException()) return ReadOnlyRoots(isolate).exception();
1396
1397 if (match_start >= 0) {
1398 // Finished matching, with at least one match.
1399 if (match_end < subject_length) {
1401 subject_length);
1402 }
1403
1404 RegExp::SetLastMatchInfo(isolate, last_match_array, subject, capture_count,
1405 runner.LastSuccessfulMatch());
1406
1407 if (subject_length > kMinLengthToCache) {
1408 // Store the last successful match into the array for caching.
1409 int capture_registers = JSRegExp::RegistersForCaptureCount(capture_count);
1410 DirectHandle<FixedArray> last_match_cache =
1411 isolate->factory()->NewFixedArray(capture_registers);
1412 int32_t* last_match = runner.LastSuccessfulMatch();
1413 for (int i = 0; i < capture_registers; i++) {
1414 last_match_cache->set(i, Smi::FromInt(last_match[i]));
1415 }
1416 DirectHandle<FixedArray> result_fixed_array =
1417 FixedArray::RightTrimOrEmpty(isolate, builder.array(),
1418 builder.length());
1419 // Cache the result and copy the FixedArray into a COW array.
1420 DirectHandle<FixedArray> copied_fixed_array =
1421 isolate->factory()->CopyFixedArrayWithMap(
1422 result_fixed_array, isolate->factory()->fixed_array_map());
1424 isolate, subject,
1425 direct_handle(regexp->data(isolate)->wrapper(), isolate),
1426 copied_fixed_array, last_match_cache,
1428 }
1429 return *builder.array();
1430 } else {
1431 return ReadOnlyRoots(isolate).null_value(); // No matches at all.
1432 }
1433}
1434
1435// Legacy implementation of RegExp.prototype[Symbol.replace] which
1436// doesn't properly call the underlying exec method.
1437V8_WARN_UNUSED_RESULT MaybeDirectHandle<String> RegExpReplace(
1438 Isolate* isolate, DirectHandle<JSRegExp> regexp,
1439 DirectHandle<String> string, DirectHandle<String> replace) {
1440 // Functional fast-paths are dispatched directly by replace builtin.
1441 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1442
1443 Factory* factory = isolate->factory();
1444
1445 const int flags = regexp->flags();
1446 const bool global = (flags & JSRegExp::kGlobal) != 0;
1447 const bool sticky = (flags & JSRegExp::kSticky) != 0;
1448
1449 replace = String::Flatten(isolate, replace);
1450
1451 DirectHandle<RegExpMatchInfo> last_match_info =
1452 isolate->regexp_last_match_info();
1453 DirectHandle<RegExpData> data(regexp->data(isolate), isolate);
1454
1455 if (!global) {
1456 // Non-global regexp search, string replace.
1457
1458 uint32_t last_index = 0;
1459 if (sticky) {
1460 DirectHandle<Object> last_index_obj(regexp->last_index(), isolate);
1461 ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
1462 Object::ToLength(isolate, last_index_obj));
1463 last_index = PositiveNumberToUint32(*last_index_obj);
1464 }
1465
1466 DirectHandle<Object> match_indices_obj(ReadOnlyRoots(isolate).null_value(),
1467 isolate);
1468
1469 // A lastIndex exceeding the string length always returns null (signalling
1470 // failure) in RegExpBuiltinExec, thus we can skip the call.
1471 if (last_index <= static_cast<uint32_t>(string->length())) {
1473 isolate, match_indices_obj,
1474 RegExp::Exec_Single(isolate, regexp, string, last_index,
1475 last_match_info));
1476 }
1477
1478 if (IsNull(*match_indices_obj, isolate)) {
1479 if (sticky) regexp->set_last_index(Smi::zero(), SKIP_WRITE_BARRIER);
1480 return string;
1481 }
1482
1483 auto match_indices = Cast<RegExpMatchInfo>(match_indices_obj);
1484
1485 const int start_index = match_indices->capture(0);
1486 const int end_index = match_indices->capture(1);
1487
1488 if (sticky) {
1489 regexp->set_last_index(Smi::FromInt(end_index), SKIP_WRITE_BARRIER);
1490 }
1491
1492 IncrementalStringBuilder builder(isolate);
1493 builder.AppendString(factory->NewSubString(string, 0, start_index));
1494
1495 if (replace->length() > 0) {
1496 MatchInfoBackedMatch m(isolate, regexp, data, string, match_indices);
1497 DirectHandle<String> replacement;
1498 ASSIGN_RETURN_ON_EXCEPTION(isolate, replacement,
1499 String::GetSubstitution(isolate, &m, replace));
1500 builder.AppendString(replacement);
1501 }
1502
1503 builder.AppendString(
1504 factory->NewSubString(string, end_index, string->length()));
1505 return builder.Finish();
1506 } else {
1507 // Global regexp search, string replace.
1508 DCHECK(global);
1509 RETURN_ON_EXCEPTION(isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0));
1510
1511 // Force tier up to native code for global replaces. The global replace is
1512 // implemented differently for native code and bytecode execution, where the
1513 // native code expects an array to store all the matches, and the bytecode
1514 // matches one at a time, so it's easier to tier-up to native code from the
1515 // start.
1516 if (v8_flags.regexp_tier_up &&
1517 data->type_tag() == RegExpData::Type::IRREGEXP) {
1518 Cast<IrRegExpData>(data)->MarkTierUpForNextExec();
1519 if (v8_flags.trace_regexp_tier_up) {
1520 PrintF("Forcing tier-up of JSRegExp object %p in RegExpReplace\n",
1521 reinterpret_cast<void*>(regexp->ptr()));
1522 }
1523 }
1524
1525 if (replace->length() == 0) {
1526 if (string->IsOneByteRepresentation()) {
1529 isolate, string, regexp, data, last_match_info);
1530 return direct_handle(Cast<String>(result), isolate);
1531 } else {
1534 isolate, string, regexp, data, last_match_info);
1535 return direct_handle(Cast<String>(result), isolate);
1536 }
1537 }
1538
1540 isolate, string, regexp, data, replace, last_match_info);
1541 if (IsString(result)) {
1542 return direct_handle(Cast<String>(result), isolate);
1543 } else {
1544 return MaybeDirectHandle<String>();
1545 }
1546 }
1547
1548 UNREACHABLE();
1549}
1550
1551} // namespace
1552
1553// This is only called for StringReplaceGlobalRegExpWithFunction.
1554RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
1555 HandleScope handles(isolate);
1556 DCHECK_EQ(3, args.length());
1557
1558 DirectHandle<JSRegExp> regexp = args.at<JSRegExp>(0);
1559 DirectHandle<String> subject = args.at<String>(1);
1560 DirectHandle<RegExpMatchInfo> last_match_info = args.at<RegExpMatchInfo>(2);
1561
1562 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1563 DirectHandle<RegExpData> regexp_data(regexp->data(isolate), isolate);
1564
1565 subject = String::Flatten(isolate, subject);
1566 CHECK(regexp->flags() & JSRegExp::kGlobal);
1567
1569 if (regexp_data->capture_count() == 0) {
1570 result = SearchRegExpMultiple<false>(isolate, subject, regexp, regexp_data,
1571 last_match_info);
1572 } else {
1573 result = SearchRegExpMultiple<true>(isolate, subject, regexp, regexp_data,
1574 last_match_info);
1575 }
1576 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1577 return result;
1578}
1579
1580RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) {
1581 HandleScope scope(isolate);
1582 DCHECK_EQ(3, args.length());
1583 DirectHandle<String> subject = args.at<String>(0);
1584 DirectHandle<JSRegExp> regexp = args.at<JSRegExp>(1);
1585 DirectHandle<JSReceiver> replace_obj = args.at<JSReceiver>(2);
1586
1587 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1588 DCHECK(replace_obj->map()->is_callable());
1589
1590 Factory* factory = isolate->factory();
1591 DirectHandle<RegExpMatchInfo> last_match_info =
1592 isolate->regexp_last_match_info();
1593 DirectHandle<RegExpData> data(regexp->data(isolate), isolate);
1594
1595 const int flags = regexp->flags();
1596 DCHECK_EQ(flags & JSRegExp::kGlobal, 0);
1597
1598 // TODO(jgruber): This should be an easy port to CSA with massive payback.
1599
1600 const bool sticky = (flags & JSRegExp::kSticky) != 0;
1601 uint32_t last_index = 0;
1602 if (sticky) {
1603 DirectHandle<Object> last_index_obj(regexp->last_index(), isolate);
1605 isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1606 last_index = PositiveNumberToUint32(*last_index_obj);
1607 }
1608
1609 DirectHandle<Object> match_indices_obj(ReadOnlyRoots(isolate).null_value(),
1610 isolate);
1611
1612 // A lastIndex exceeding the string length always returns null (signalling
1613 // failure) in RegExpBuiltinExec, thus we can skip the call.
1614 if (last_index <= static_cast<uint32_t>(subject->length())) {
1616 isolate, match_indices_obj,
1617 RegExp::Exec_Single(isolate, regexp, subject, last_index,
1618 last_match_info));
1619 }
1620
1621 if (IsNull(*match_indices_obj, isolate)) {
1622 if (sticky) regexp->set_last_index(Smi::zero(), SKIP_WRITE_BARRIER);
1623 return *subject;
1624 }
1625
1626 auto match_indices = Cast<RegExpMatchInfo>(match_indices_obj);
1627
1628 const int index = match_indices->capture(0);
1629 const int end_of_match = match_indices->capture(1);
1630
1631 if (sticky) {
1632 regexp->set_last_index(Smi::FromInt(end_of_match), SKIP_WRITE_BARRIER);
1633 }
1634
1635 IncrementalStringBuilder builder(isolate);
1636 builder.AppendString(factory->NewSubString(subject, 0, index));
1637
1638 // Compute the parameter list consisting of the match, captures, index,
1639 // and subject for the replace function invocation. If the RegExp contains
1640 // named captures, they are also passed as the last argument.
1641
1642 // The number of captures plus one for the match.
1643 const int m = match_indices->number_of_capture_registers() / 2;
1644
1645 bool has_named_captures = false;
1646 DirectHandle<FixedArray> capture_map;
1647 if (m > 1) {
1648 SBXCHECK(Is<IrRegExpData>(*data));
1649
1650 Tagged<Object> maybe_capture_map =
1651 Cast<IrRegExpData>(data)->capture_name_map();
1652 if (IsFixedArray(maybe_capture_map)) {
1653 has_named_captures = true;
1654 capture_map = direct_handle(Cast<FixedArray>(maybe_capture_map), isolate);
1655 }
1656 }
1657
1658 const uint32_t argc = GetArgcForReplaceCallable(m, has_named_captures);
1659 if (argc == static_cast<uint32_t>(-1)) {
1661 isolate, NewRangeError(MessageTemplate::kTooManyArguments));
1662 }
1663 DirectHandleVector<Object> arguments(isolate, argc);
1664
1665 int cursor = 0;
1666 for (int j = 0; j < m; j++) {
1667 bool ok;
1668 DirectHandle<String> capture =
1669 RegExpUtils::GenericCaptureGetter(isolate, match_indices, j, &ok);
1670 if (ok) {
1671 arguments[cursor++] = capture;
1672 } else {
1673 arguments[cursor++] = factory->undefined_value();
1674 }
1675 }
1676
1677 arguments[cursor++] = direct_handle(Smi::FromInt(index), isolate);
1678 arguments[cursor++] = subject;
1679
1680 if (has_named_captures) {
1681 arguments[cursor++] = ConstructNamedCaptureGroupsObject(
1682 isolate, capture_map, [&arguments](int ix) { return *arguments[ix]; });
1683 }
1684
1685 DCHECK_EQ(cursor, argc);
1686
1687 DirectHandle<Object> replacement_obj;
1689 isolate, replacement_obj,
1690 Execution::Call(isolate, replace_obj, factory->undefined_value(),
1691 base::VectorOf(arguments)));
1692
1693 DirectHandle<String> replacement;
1695 isolate, replacement, Object::ToString(isolate, replacement_obj));
1696
1697 builder.AppendString(replacement);
1698 builder.AppendString(
1699 factory->NewSubString(subject, end_of_match, subject->length()));
1700
1701 RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1702}
1703
1704namespace {
1705
1706V8_WARN_UNUSED_RESULT MaybeDirectHandle<Object> ToUint32(
1707 Isolate* isolate, DirectHandle<Object> object, uint32_t* out) {
1708 if (IsUndefined(*object, isolate)) {
1709 *out = kMaxUInt32;
1710 return object;
1711 }
1712
1713 DirectHandle<Object> number;
1714 ASSIGN_RETURN_ON_EXCEPTION(isolate, number,
1715 Object::ToNumber(isolate, object));
1716 *out = NumberToUint32(*number);
1717 return object;
1718}
1719
1720DirectHandle<JSArray> NewJSArrayWithElements(Isolate* isolate,
1721 DirectHandle<FixedArray> elems,
1722 int num_elems) {
1723 return isolate->factory()->NewJSArrayWithElements(
1724 FixedArray::RightTrimOrEmpty(isolate, elems, num_elems));
1725}
1726
1727} // namespace
1728
1729// Slow path for:
1730// ES#sec-regexp.prototype-@@replace
1731// RegExp.prototype [ @@split ] ( string, limit )
1732RUNTIME_FUNCTION(Runtime_RegExpSplit) {
1733 HandleScope scope(isolate);
1734 DCHECK_EQ(3, args.length());
1735
1737 DirectHandle<String> string = args.at<String>(1);
1738 DirectHandle<Object> limit_obj = args.at(2);
1739
1740 Factory* factory = isolate->factory();
1741
1742 DirectHandle<JSFunction> regexp_fun = isolate->regexp_function();
1745 isolate, ctor, Object::SpeciesConstructor(isolate, recv, regexp_fun));
1746
1747 DirectHandle<Object> flags_obj;
1749 isolate, flags_obj,
1750 JSObject::GetProperty(isolate, recv, factory->flags_string()));
1751
1754 Object::ToString(isolate, flags_obj));
1755
1756 DirectHandle<String> u_str =
1758 const bool unicode = (String::IndexOf(isolate, flags, u_str, 0) >= 0);
1759
1760 DirectHandle<String> y_str =
1762 const bool sticky = (String::IndexOf(isolate, flags, y_str, 0) >= 0);
1763
1764 DirectHandle<String> new_flags = flags;
1765 if (!sticky) {
1766 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, new_flags,
1767 factory->NewConsString(flags, y_str));
1768 }
1769
1770 DirectHandle<JSReceiver> splitter;
1771 {
1772 constexpr int argc = 2;
1773 std::array<DirectHandle<Object>, argc> ctor_args = {recv, new_flags};
1774
1775 DirectHandle<Object> splitter_obj;
1777 isolate, splitter_obj,
1778 Execution::New(isolate, ctor, base::VectorOf(ctor_args)));
1779
1780 splitter = Cast<JSReceiver>(splitter_obj);
1781 }
1782
1783 uint32_t limit;
1784 RETURN_FAILURE_ON_EXCEPTION(isolate, ToUint32(isolate, limit_obj, &limit));
1785
1786 const uint32_t length = string->length();
1787
1788 if (limit == 0) return *factory->NewJSArray(0);
1789
1790 if (length == 0) {
1793 isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1794 factory->undefined_value()));
1795
1796 if (!IsNull(*result, isolate)) return *factory->NewJSArray(0);
1797
1798 DirectHandle<FixedArray> elems = factory->NewFixedArray(1);
1799 elems->set(0, *string);
1800 return *factory->NewJSArrayWithElements(elems);
1801 }
1802
1803 static const int kInitialArraySize = 8;
1805 factory->NewFixedArrayWithHoles(kInitialArraySize);
1806 uint32_t num_elems = 0;
1807
1808 uint32_t string_index = 0;
1809 uint32_t prev_string_index = 0;
1810 while (string_index < length) {
1812 isolate, RegExpUtils::SetLastIndex(isolate, splitter, string_index));
1813
1816 isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1817 factory->undefined_value()));
1818
1819 if (IsNull(*result, isolate)) {
1820 string_index = static_cast<uint32_t>(
1821 RegExpUtils::AdvanceStringIndex(*string, string_index, unicode));
1822 continue;
1823 }
1824
1825 DirectHandle<Object> last_index_obj;
1827 isolate, last_index_obj, RegExpUtils::GetLastIndex(isolate, splitter));
1828
1830 isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1831
1832 const uint32_t end =
1833 std::min(PositiveNumberToUint32(*last_index_obj), length);
1834 if (end == prev_string_index) {
1835 string_index = static_cast<uint32_t>(
1836 RegExpUtils::AdvanceStringIndex(*string, string_index, unicode));
1837 continue;
1838 }
1839
1840 {
1841 DirectHandle<String> substr =
1842 factory->NewSubString(string, prev_string_index, string_index);
1843 elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr);
1844 if (num_elems == limit) {
1845 return *NewJSArrayWithElements(isolate, elems, num_elems);
1846 }
1847 }
1848
1849 prev_string_index = end;
1850
1851 DirectHandle<Object> num_captures_obj;
1853 isolate, num_captures_obj,
1854 Object::GetProperty(isolate, result,
1855 isolate->factory()->length_string()));
1856
1858 isolate, num_captures_obj, Object::ToLength(isolate, num_captures_obj));
1859 const uint32_t num_captures = PositiveNumberToUint32(*num_captures_obj);
1860
1861 for (uint32_t i = 1; i < num_captures; i++) {
1862 DirectHandle<Object> capture;
1864 isolate, capture, Object::GetElement(isolate, result, i));
1865 elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, capture);
1866 if (num_elems == limit) {
1867 return *NewJSArrayWithElements(isolate, elems, num_elems);
1868 }
1869 }
1870
1871 string_index = prev_string_index;
1872 }
1873
1874 {
1875 DirectHandle<String> substr =
1876 factory->NewSubString(string, prev_string_index, length);
1877 elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr);
1878 }
1879
1880 return *NewJSArrayWithElements(isolate, elems, num_elems);
1881}
1882
1883namespace {
1884
1885template <typename Char>
1886inline bool IsContainFlagImpl(Isolate* isolate, base::Vector<const Char> flags,
1887 const char* target,
1889 StringSearch<uint8_t, Char> search(isolate, base::OneByteVector(target));
1890 return search.Search(flags, 0) >= 0;
1891}
1892
1893inline bool IsContainFlag(Isolate* isolate, String::FlatContent& flags,
1894 const char* target,
1896 return flags.IsOneByte()
1897 ? IsContainFlagImpl<uint8_t>(isolate, flags.ToOneByteVector(),
1898 target, no_gc)
1899 : IsContainFlagImpl<base::uc16>(isolate, flags.ToUC16Vector(),
1900 target, no_gc);
1901}
1902
1903} // namespace
1904
1905// Slow path for:
1906// ES#sec-regexp.prototype-@@replace
1907// RegExp.prototype [ @@replace ] ( string, replaceValue )
1908RUNTIME_FUNCTION(Runtime_RegExpReplaceRT) {
1909 HandleScope scope(isolate);
1910 DCHECK_EQ(3, args.length());
1911
1913 DirectHandle<String> string = args.at<String>(1);
1914 DirectHandle<Object> replace_obj = args.at(2);
1915
1916 Factory* factory = isolate->factory();
1917
1918 string = String::Flatten(isolate, string);
1919
1920 const bool functional_replace = IsCallable(*replace_obj);
1921
1922 DirectHandle<String> replace;
1923 if (!functional_replace) {
1924 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, replace,
1925 Object::ToString(isolate, replace_obj));
1926 }
1927
1928 // Fast-path for unmodified JSRegExps (and non-functional replace).
1929 if (RegExpUtils::IsUnmodifiedRegExp(isolate, recv)) {
1930 // We should never get here with functional replace because unmodified
1931 // regexp and functional replace should be fully handled in CSA code.
1932 CHECK(!functional_replace);
1935 isolate, result,
1936 RegExpReplace(isolate, Cast<JSRegExp>(recv), string, replace));
1938 return *result;
1939 }
1940
1941 const uint32_t length = string->length();
1942 bool global = false;
1943 bool fullUnicode = false;
1944
1945 DirectHandle<Object> flags_obj;
1946 DirectHandle<String> flag_str;
1947
1949 isolate, flags_obj,
1950 JSReceiver::GetProperty(isolate, recv, factory->flags_string()));
1951
1952 // 7. Let flags be ? ToString(? Get(rx, "flags")).
1953 // 8. If flags contains "g", let global be true. Otherwise, let global be
1954 // false.
1955 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, flag_str,
1956 Object::ToString(isolate, flags_obj));
1957 flag_str = String::Flatten(isolate, flag_str);
1958 {
1960 String::FlatContent flat_flag = flag_str->GetFlatContent(no_gc);
1961
1962 global = IsContainFlag(isolate, flat_flag, "g", no_gc);
1963
1964 if (global) {
1965 // b. If flags contains "u" or flags contains "v", let fullUnicode be
1966 // true. Otherwise, let fullUnicode be false.
1967 fullUnicode = IsContainFlag(isolate, flat_flag, "u", no_gc) ||
1968 IsContainFlag(isolate, flat_flag, "v", no_gc);
1969 }
1970 }
1971
1972 if (global) {
1974 RegExpUtils::SetLastIndex(isolate, recv, 0));
1975 }
1976
1978
1979 while (true) {
1981 {
1982 HandleScope inner_scope(isolate);
1984 isolate, result,
1985 RegExpUtils::RegExpExec(isolate, recv, string,
1986 factory->undefined_value()));
1987 result = inner_scope.CloseAndEscape(result);
1988 }
1989
1990 if (IsNull(*result, isolate)) break;
1991
1992 results.emplace_back(result);
1993 if (!global) break;
1994
1995 {
1996 HandleScope inner_scope(isolate);
1997
1998 DirectHandle<Object> match_obj;
2000 isolate, match_obj, Object::GetElement(isolate, result, 0));
2001
2004 Object::ToString(isolate, match_obj));
2005
2006 if (match->length() == 0) {
2008 isolate, RegExpUtils::SetAdvancedStringIndex(isolate, recv, string,
2009 fullUnicode));
2010 }
2011 }
2012 }
2013
2014 // TODO(jgruber): Look into ReplacementStringBuilder instead.
2015 IncrementalStringBuilder builder(isolate);
2016 uint32_t next_source_position = 0;
2017
2018 for (const auto& result : results) {
2019 HandleScope handle_scope(isolate);
2020 DirectHandle<Object> captures_length_obj;
2022 isolate, captures_length_obj,
2023 Object::GetProperty(isolate, result, factory->length_string()));
2024
2026 isolate, captures_length_obj,
2027 Object::ToLength(isolate, captures_length_obj));
2028 const uint32_t captures_length =
2029 PositiveNumberToUint32(*captures_length_obj);
2030
2031 DirectHandle<Object> match_obj;
2032 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
2033 Object::GetElement(isolate, result, 0));
2034
2037 Object::ToString(isolate, match_obj));
2038
2039 const int match_length = match->length();
2040
2041 DirectHandle<Object> position_obj;
2043 isolate, position_obj,
2044 Object::GetProperty(isolate, result, factory->index_string()));
2045
2047 isolate, position_obj, Object::ToInteger(isolate, position_obj));
2048 const uint32_t position =
2049 std::min(PositiveNumberToUint32(*position_obj), length);
2050
2051 // Do not reserve capacity since captures_length is user-controlled.
2053
2054 captures.emplace_back(match);
2055 for (uint32_t n = 1; n < captures_length; n++) {
2056 DirectHandle<Object> capture;
2058 isolate, capture, Object::GetElement(isolate, result, n));
2059
2060 if (!IsUndefined(*capture, isolate)) {
2061 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, capture,
2062 Object::ToString(isolate, capture));
2063 }
2064 captures.emplace_back(capture);
2065 }
2066
2067 DirectHandle<Object> groups_obj = isolate->factory()->undefined_value();
2069 isolate, groups_obj,
2070 Object::GetProperty(isolate, result, factory->groups_string()));
2071
2072 const bool has_named_captures = !IsUndefined(*groups_obj, isolate);
2073
2074 DirectHandle<String> replacement;
2075 if (functional_replace) {
2076 // The first argument is always match string itself. So min argc value
2077 // should be 1.
2078 const uint32_t argc = GetArgcForReplaceCallable(
2079 static_cast<uint32_t>(captures.size()), has_named_captures);
2080 if (argc == static_cast<uint32_t>(-1)) {
2082 isolate, NewRangeError(MessageTemplate::kTooManyArguments));
2083 }
2084
2085 DirectHandleVector<Object> call_args(isolate, argc);
2086
2087 int cursor = 0;
2088 for (uint32_t j = 0; j < captures.size(); j++) {
2089 call_args[cursor++] = captures[j];
2090 }
2091
2092 call_args[cursor++] = direct_handle(Smi::FromInt(position), isolate);
2093 call_args[cursor++] = string;
2094 if (has_named_captures) call_args[cursor++] = groups_obj;
2095
2096 DCHECK_EQ(cursor, argc);
2097
2098 DirectHandle<Object> replacement_obj;
2100 isolate, replacement_obj,
2101 Execution::Call(isolate, replace_obj, factory->undefined_value(),
2102 base::VectorOf(call_args)));
2103
2105 isolate, replacement, Object::ToString(isolate, replacement_obj));
2106 } else {
2107 DCHECK(!functional_replace);
2108 if (!IsUndefined(*groups_obj, isolate)) {
2110 isolate, groups_obj, Object::ToObject(isolate, groups_obj));
2111 }
2112 VectorBackedMatch m(isolate, string, match, position,
2113 base::VectorOf(captures), groups_obj);
2115 isolate, replacement, String::GetSubstitution(isolate, &m, replace));
2116 }
2117
2118 if (position >= next_source_position) {
2119 builder.AppendString(
2120 factory->NewSubString(string, next_source_position, position));
2121 builder.AppendString(replacement);
2122
2123 next_source_position = position + match_length;
2124 }
2125 }
2126
2127 if (next_source_position < length) {
2128 builder.AppendString(
2129 factory->NewSubString(string, next_source_position, length));
2130 }
2131
2132 RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
2133}
2134
2135RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
2136 HandleScope scope(isolate);
2137 DCHECK_EQ(3, args.length());
2138 // TODO(pwong): To follow the spec more closely and simplify calling code,
2139 // this could handle the canonicalization of pattern and flags. See
2140 // https://tc39.github.io/ecma262/#sec-regexpinitialize
2141 DirectHandle<JSRegExp> regexp = args.at<JSRegExp>(0);
2142 DirectHandle<String> source = args.at<String>(1);
2143 DirectHandle<String> flags = args.at<String>(2);
2144
2146 JSRegExp::Initialize(regexp, source, flags));
2147
2148 return *regexp;
2149}
2150
2151RUNTIME_FUNCTION(Runtime_RegExpStringFromFlags) {
2152 HandleScope scope(isolate);
2153 DCHECK_EQ(1, args.length());
2154 auto regexp = Cast<JSRegExp>(args[0]);
2155 DirectHandle<String> flags =
2156 JSRegExp::StringFromFlags(isolate, regexp->flags());
2157 return *flags;
2158}
2159
2160namespace {
2161
2162template <typename SChar, typename PChar>
2163inline void RegExpMatchGlobalAtom_OneCharPattern(
2164 Isolate* isolate, base::Vector<const SChar> subject, const PChar pattern,
2165 int start_index, int* number_of_matches, int* last_match_index,
2166 const DisallowGarbageCollection& no_gc) {
2167 for (int i = start_index; i < subject.length(); i++) {
2168 // Subtle: the valid variants are {SChar,PChar} in:
2169 // {uint8_t,uint8_t}, {uc16,uc16}, {uc16,uint8_t}. In the latter case,
2170 // we cast the uint8_t pattern to uc16 for the comparison.
2171 if (subject[i] != static_cast<const SChar>(pattern)) continue;
2172 (*number_of_matches)++;
2173 (*last_match_index) = i;
2174 }
2175}
2176
2177// Unimplemented.
2178template <>
2179inline void RegExpMatchGlobalAtom_OneCharPattern(
2180 Isolate* isolate, base::Vector<const uint8_t> subject,
2181 const base::uc16 pattern, int start_index, int* number_of_matches,
2182 int* last_match_index, const DisallowGarbageCollection& no_gc) = delete;
2183
2184template <typename Char>
2185inline int AdvanceStringIndex(base::Vector<const Char> subject, int index,
2186 bool is_unicode) {
2187 // Taken from RegExpUtils::AdvanceStringIndex:
2188
2189 const int subject_length = subject.length();
2190 if (is_unicode && index < subject_length) {
2191 const uint16_t first = subject[index];
2192 if (first >= 0xD800 && first <= 0xDBFF && index + 1 < subject_length) {
2193 DCHECK_LT(index, std::numeric_limits<int>::max());
2194 const uint16_t second = subject[index + 1];
2195 if (second >= 0xDC00 && second <= 0xDFFF) {
2196 return index + 2;
2197 }
2198 }
2199 }
2200
2201 return index + 1;
2202}
2203
2204template <typename SChar, typename PChar>
2205inline void RegExpMatchGlobalAtom_Generic(
2206 Isolate* isolate, base::Vector<const SChar> subject,
2207 base::Vector<const PChar> pattern, bool is_unicode, int start_index,
2208 int* number_of_matches, int* last_match_index,
2209 const DisallowGarbageCollection& no_gc) {
2210 const int pattern_length = pattern.length();
2211 StringSearch<PChar, SChar> search(isolate, pattern);
2212 int found_at_index;
2213
2214 while (true) {
2215 found_at_index = search.Search(subject, start_index);
2216 if (found_at_index == -1) return;
2217
2218 (*number_of_matches)++;
2219 (*last_match_index) = found_at_index;
2220 start_index = pattern_length > 0
2221 ? found_at_index + pattern_length
2222 : AdvanceStringIndex(subject, start_index, is_unicode);
2223 }
2224}
2225
2226inline void RegExpMatchGlobalAtom_Dispatch(
2227 Isolate* isolate, const String::FlatContent& subject,
2228 const String::FlatContent& pattern, bool is_unicode, int start_index,
2229 int* number_of_matches, int* last_match_index,
2230 const DisallowGarbageCollection& no_gc) {
2231#define CALL_Generic() \
2232 RegExpMatchGlobalAtom_Generic(isolate, sv, pv, is_unicode, start_index, \
2233 number_of_matches, last_match_index, no_gc);
2234#define CALL_OneCharPattern() \
2235 RegExpMatchGlobalAtom_OneCharPattern(isolate, sv, pv[0], start_index, \
2236 number_of_matches, last_match_index, \
2237 no_gc);
2238 DCHECK_NOT_NULL(number_of_matches);
2239 DCHECK_NOT_NULL(last_match_index);
2240 if (pattern.IsOneByte()) {
2241 auto pv = pattern.ToOneByteVector();
2242 if (subject.IsOneByte()) {
2243 auto sv = subject.ToOneByteVector();
2244 if (pattern.length() == 1) {
2246 } else {
2247 CALL_Generic();
2248 }
2249 } else {
2250 auto sv = subject.ToUC16Vector();
2251 if (pattern.length() == 1) {
2253 } else {
2254 CALL_Generic();
2255 }
2256 }
2257 } else {
2258 auto pv = pattern.ToUC16Vector();
2259 if (subject.IsOneByte()) {
2260 auto sv = subject.ToOneByteVector();
2261 CALL_Generic();
2262 } else {
2263 auto sv = subject.ToUC16Vector();
2264 if (pattern.length() == 1) {
2266 } else {
2267 CALL_Generic();
2268 }
2269 }
2270 }
2271#undef CALL_OneCharPattern
2272#undef CALL_Generic
2273}
2274
2275} // namespace
2276
2277RUNTIME_FUNCTION(Runtime_RegExpMatchGlobalAtom) {
2278 HandleScope scope(isolate);
2279 DCHECK_EQ(3, args.length());
2280
2281 DirectHandle<JSRegExp> regexp_handle = args.at<JSRegExp>(0);
2282 DirectHandle<String> subject_handle =
2283 String::Flatten(isolate, args.at<String>(1));
2285
2286 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp_handle));
2287 DCHECK(regexp_handle->flags() & JSRegExp::kGlobal);
2288 DCHECK_EQ(data_handle->type_tag(), RegExpData::Type::ATOM);
2289
2290 // Initialized below.
2291 DirectHandle<String> pattern_handle;
2292 int pattern_length;
2293
2294 int number_of_matches = 0;
2295 int last_match_index = -1;
2296
2297 {
2299
2300 Tagged<JSRegExp> regexp = *regexp_handle;
2301 Tagged<String> subject = *subject_handle;
2302 Tagged<String> pattern = data_handle->pattern();
2303
2304 DCHECK(pattern->IsFlat());
2305 pattern_handle = direct_handle(pattern, isolate);
2306 pattern_length = pattern->length();
2307
2308 // Reset lastIndex (the final state after this call is always 0).
2309 regexp->set_last_index(Smi::zero(), SKIP_WRITE_BARRIER);
2310
2311 // Caching.
2312 int start_index = 0; // Start matching at the beginning.
2314 isolate, subject, pattern, &number_of_matches, &last_match_index)) {
2315 DCHECK_GT(number_of_matches, 0);
2316 DCHECK_NE(last_match_index, -1);
2317 start_index = last_match_index + pattern_length;
2318 }
2319
2320 const bool is_unicode = (regexp->flags() & JSRegExp::kUnicode) != 0;
2321 String::FlatContent subject_content = subject->GetFlatContent(no_gc);
2322 String::FlatContent pattern_content = pattern->GetFlatContent(no_gc);
2323 RegExpMatchGlobalAtom_Dispatch(isolate, subject_content, pattern_content,
2324 is_unicode, start_index, &number_of_matches,
2325 &last_match_index, no_gc);
2326
2327 if (last_match_index == -1) {
2328 // Not matched.
2329 return ReadOnlyRoots(isolate).null_value();
2330 }
2331
2332 // Successfully matched at least once:
2333 DCHECK_GE(last_match_index, 0);
2334
2335 // Caching.
2337 isolate, subject, pattern, number_of_matches, last_match_index);
2338 }
2339
2340 // Update the LastMatchInfo.
2341 static constexpr int kNumberOfCaptures = 0; // ATOM.
2342 int32_t match_indices[] = {last_match_index,
2343 last_match_index + pattern_length};
2344 DirectHandle<RegExpMatchInfo> last_match_info =
2345 isolate->regexp_last_match_info();
2346 RegExp::SetLastMatchInfo(isolate, last_match_info, subject_handle,
2347 kNumberOfCaptures, match_indices);
2348
2349 // Create the result array.
2350 auto elems = isolate->factory()->NewFixedArray(number_of_matches);
2351 ObjectSlot dst_slot = elems->RawFieldOfFirstElement();
2352 MemsetTagged(dst_slot, *pattern_handle, number_of_matches);
2353 if (!HeapLayout::InReadOnlySpace(*pattern_handle)) {
2354 WriteBarrier::ForRange(isolate->heap(), *elems, dst_slot,
2355 dst_slot + number_of_matches);
2356 }
2357 DirectHandle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
2358 elems, TERMINAL_FAST_ELEMENTS_KIND, number_of_matches);
2359 return *result;
2360}
2361
2362} // namespace internal
2363} // namespace v8
Isolate * isolate_
union v8::internal::@341::BuiltinMetadata::KindSpecificData data
#define SBXCHECK(condition)
Definition check.h:61
SourcePosition pos
int length() const
Definition vector.h:64
Vector< T > SubVector(size_t from, size_t to) const
Definition vector.h:41
constexpr T * begin() const
Definition vector.h:96
static const int kMaxArguments
Definition code.h:463
base::SmallVector< ReplacementPart, kStaticVectorSlots > parts_
bool Compile(Isolate *isolate, DirectHandle< JSRegExp > regexp, DirectHandle< RegExpData > regexp_data, DirectHandle< String > replacement, int capture_count, int subject_length)
bool ParseReplacementPattern(base::Vector< Char > characters, Tagged< FixedArray > capture_name_map, int capture_count, int subject_length)
DirectHandleSmallVector< String, kStaticVectorSlots > replacement_substrings_
void Apply(ReplacementStringBuilder *builder, int match_from, int match_to, int32_t *match)
void emplace_back(Args &&... args)
Definition handles.h:1088
size_t size() const noexcept
Definition handles.h:1072
V8_INLINE Address address() const
Definition handles.h:695
V8_EXPORT_PRIVATE static V8_WARN_UNUSED_RESULT MaybeHandle< Object > Call(Isolate *isolate, DirectHandle< Object > callable, DirectHandle< Object > receiver, base::Vector< const DirectHandle< Object > > args)
Definition execution.cc:523
static V8_WARN_UNUSED_RESULT MaybeDirectHandle< JSReceiver > New(Isolate *isolate, DirectHandle< Object > constructor, base::Vector< const DirectHandle< Object > > args)
Definition execution.cc:556
V8_WARN_UNUSED_RESULT HandleType< String >::MaybeType NewConsString(HandleType< String > left, HandleType< String > right, AllocationType allocation=AllocationType::kYoung)
Handle< String > LookupSingleCharacterStringFromCode(uint16_t code)
Handle< FixedArray > NewFixedArrayWithHoles(int length, AllocationType allocation=AllocationType::kYoung)
Handle< FixedArray > NewFixedArray(int length, AllocationType allocation=AllocationType::kYoung)
Handle< JSArray > NewJSArray(ElementsKind elements_kind, int length, int capacity, ArrayStorageAllocationMode mode=ArrayStorageAllocationMode::DONT_INITIALIZE_ARRAY_ELEMENTS, AllocationType allocation=AllocationType::kYoung)
Definition factory.cc:3211
Handle< JSArray > NewJSArrayWithElements(DirectHandle< FixedArrayBase > elements, ElementsKind elements_kind, int length, AllocationType allocation=AllocationType::kYoung)
Definition factory.cc:3228
HandleType< String > NewSubString(HandleType< T > str, uint32_t begin, uint32_t end)
Definition factory-inl.h:88
static FixedArrayBuilder Lazy(Isolate *isolate)
static HandleType< FixedArray > RightTrimOrEmpty(Isolate *isolate, HandleType< FixedArray > array, int new_length)
static V8_EXPORT_PRIVATE HandleType< FixedArray > SetAndGrow(Isolate *isolate, HandleType< FixedArray > array, int index, DirectHandle< Object > value)
HandleType< T > CloseAndEscape(HandleType< T > handle_value)
static V8_INLINE bool InReadOnlySpace(Tagged< HeapObject > object)
MaybeDirectHandle< String > Finish()
V8_INLINE void AppendString(std::string_view str)
static V8_WARN_UNUSED_RESULT MaybeHandle< Object > GetProperty(Isolate *isolate, DirectHandle< JSReceiver > receiver, const char *key)
static DirectHandle< JSRegExpResultIndices > BuildIndices(Isolate *isolate, DirectHandle< RegExpMatchInfo > match_info, DirectHandle< Object > maybe_names)
Definition js-regexp.cc:18
static MaybeDirectHandle< JSRegExp > Initialize(DirectHandle< JSRegExp > regexp, DirectHandle< String > source, Flags flags, uint32_t backtrack_limit=kNoBacktrackLimit)
Definition js-regexp.cc:328
static V8_EXPORT_PRIVATE DirectHandle< String > StringFromFlags(Isolate *isolate, Flags flags)
Definition js-regexp.cc:144
static constexpr int CaptureCountForRegisters(int register_count)
Definition js-regexp.h:93
static constexpr int RegistersForCaptureCount(int count)
Definition js-regexp.h:90
static V8_WARN_UNUSED_RESULT MaybeHandle< Object > ToLength(Isolate *isolate, DirectHandle< Object > input)
static V8_WARN_UNUSED_RESULT HandleType< String >::MaybeType ToString(Isolate *isolate, HandleType< T > input)
V8_EXPORT_PRIVATE static V8_WARN_UNUSED_RESULT Maybe< bool > AddDataProperty(LookupIterator *it, DirectHandle< Object > value, PropertyAttributes attributes, Maybe< ShouldThrow > should_throw, StoreOrigin store_origin, EnforceDefineSemantics semantics=EnforceDefineSemantics::kSet)
Definition objects.cc:2667
static V8_WARN_UNUSED_RESULT HandleType< Number >::MaybeType ToNumber(Isolate *isolate, HandleType< T > input)
static V8_WARN_UNUSED_RESULT HandleType< Number >::MaybeType ToInteger(Isolate *isolate, HandleType< T > input)
static V8_WARN_UNUSED_RESULT HandleType< JSReceiver >::MaybeType ToObject(Isolate *isolate, HandleType< T > object, const char *method_name=nullptr)
static V8_EXPORT_PRIVATE bool ToInt32(Tagged< Object > obj, int32_t *value)
Definition objects.cc:1438
static V8_WARN_UNUSED_RESULT MaybeDirectHandle< Object > SpeciesConstructor(Isolate *isolate, DirectHandle< JSReceiver > recv, DirectHandle< JSFunction > default_ctor)
Definition objects.cc:1791
V8_EXPORT_PRIVATE static V8_WARN_UNUSED_RESULT MaybeHandle< Object > GetProperty(LookupIterator *it, bool is_global_reference=false)
Definition objects.cc:1248
static bool ToUint32(Tagged< Object > obj, uint32_t *value)
static V8_WARN_UNUSED_RESULT Maybe< bool > SetDataProperty(LookupIterator *it, DirectHandle< Object > value)
Definition objects.cc:2604
static V8_WARN_UNUSED_RESULT MaybeHandle< Object > GetElement(Isolate *isolate, DirectHandle< JSAny > object, uint32_t index)
static constexpr bool TypeSupportsCaptures(Type t)
Definition js-regexp.h:177
static DirectHandle< RegExpMatchInfo > ReserveCaptures(Isolate *isolate, DirectHandle< RegExpMatchInfo > match_info, int capture_count)
static bool TryGet(Isolate *isolate, Tagged< String > subject, Tagged< String > pattern, int *number_of_matches_out, int *last_match_index_out)
Definition regexp.cc:1421
static void TryInsert(Isolate *isolate, Tagged< String > subject, Tagged< String > pattern, int number_of_matches, int last_match_index)
Definition regexp.cc:1403
static void Enter(Isolate *isolate, DirectHandle< String > key_string, DirectHandle< Object > key_pattern, DirectHandle< FixedArray > value_array, DirectHandle< FixedArray > last_match_cache, ResultsCacheType type)
Definition regexp.cc:1335
static Tagged< Object > Lookup(Heap *heap, Tagged< String > key_string, Tagged< Object > key_pattern, Tagged< FixedArray > *last_match_out, ResultsCacheType type)
Definition regexp.cc:1301
static Handle< String > GenericCaptureGetter(Isolate *isolate, DirectHandle< RegExpMatchInfo > match_info, int capture, bool *ok=nullptr)
static V8_WARN_UNUSED_RESULT MaybeDirectHandle< Object > GetLastIndex(Isolate *isolate, DirectHandle< JSReceiver > recv)
static V8_WARN_UNUSED_RESULT MaybeDirectHandle< Object > SetAdvancedStringIndex(Isolate *isolate, DirectHandle< JSReceiver > regexp, DirectHandle< String > string, bool unicode)
static V8_WARN_UNUSED_RESULT MaybeDirectHandle< Object > SetLastIndex(Isolate *isolate, DirectHandle< JSReceiver > regexp, uint64_t value)
static V8_WARN_UNUSED_RESULT MaybeDirectHandle< JSAny > RegExpExec(Isolate *isolate, DirectHandle< JSReceiver > regexp, DirectHandle< String > string, DirectHandle< Object > exec)
static uint64_t AdvanceStringIndex(Tagged< String > string, uint64_t index, bool unicode)
static bool IsMatchedCapture(Tagged< RegExpMatchInfo > match_info, int capture)
static bool IsUnmodifiedRegExp(Isolate *isolate, DirectHandle< Object > obj)
V8_EXPORT_PRIVATE static V8_WARN_UNUSED_RESULT std::optional< int > ExperimentalOneshotExec(Isolate *isolate, DirectHandle< JSRegExp > regexp, DirectHandle< String > subject, int index, int32_t *result_offsets_vector, uint32_t result_offsets_vector_length)
Definition regexp.cc:320
static DirectHandle< RegExpMatchInfo > SetLastMatchInfo(Isolate *isolate, DirectHandle< RegExpMatchInfo > last_match_info, DirectHandle< String > subject, int capture_count, int32_t *match)
Definition regexp.cc:896
V8_EXPORT_PRIVATE static V8_WARN_UNUSED_RESULT std::optional< int > Exec(Isolate *isolate, DirectHandle< JSRegExp > regexp, DirectHandle< String > subject, int index, int32_t *result_offsets_vector, uint32_t result_offsets_vector_length)
Definition regexp.cc:332
V8_EXPORT_PRIVATE static V8_WARN_UNUSED_RESULT MaybeDirectHandle< Object > Exec_Single(Isolate *isolate, DirectHandle< JSRegExp > regexp, DirectHandle< String > subject, int index, DirectHandle< RegExpMatchInfo > last_match_info)
Definition regexp.cc:358
static V8_WARN_UNUSED_RESULT bool EnsureFullyCompiled(Isolate *isolate, DirectHandle< RegExpData > re_data, DirectHandle< String > subject)
Definition regexp.cc:294
static void AddSubjectSlice(FixedArrayBuilder *builder, int from, int to)
MaybeDirectHandle< String > ToString()
void AddString(DirectHandle< String > string)
static constexpr int ToInt(const Tagged< Object > object)
Definition smi.h:33
static constexpr Tagged< Smi > FromInt(int value)
Definition smi.h:38
static constexpr Tagged< Smi > zero()
Definition smi.h:99
int Search(base::Vector< const SubjectChar > subject, int index)
base::Vector< const uint8_t > ToOneByteVector() const
Definition string.h:139
base::Vector< const base::uc16 > ToUC16Vector() const
Definition string.h:145
static void WriteToFlat(Tagged< String > source, SinkCharT *sink, uint32_t start, uint32_t length)
Definition string.cc:772
static const uint32_t kMaxLength
Definition string.h:511
static V8_INLINE HandleType< String > Flatten(Isolate *isolate, HandleType< T > string, AllocationType allocation=AllocationType::kYoung)
static Tagged< Object > IndexOf(Isolate *isolate, DirectHandle< Object > receiver, DirectHandle< Object > search, DirectHandle< Object > position)
Definition string.cc:1426
static V8_WARN_UNUSED_RESULT MaybeDirectHandle< String > GetSubstitution(Isolate *isolate, Match *match, DirectHandle< String > replacement, uint32_t start_index=0)
Definition string.cc:1495
static void ForRange(Heap *heap, Tagged< HeapObject > object, TSlot start, TSlot end)
int start
int end
#define RUNTIME_FUNCTION(Name)
Definition arguments.h:162
#define RETURN_ON_EXCEPTION(isolate, call)
Definition isolate.h:395
#define ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, dst, call)
Definition isolate.h:284
#define ASSIGN_RETURN_ON_EXCEPTION(isolate, dst, call)
Definition isolate.h:291
#define THROW_NEW_ERROR_RETURN_FAILURE(isolate, call)
Definition isolate.h:294
#define RETURN_FAILURE_ON_EXCEPTION(isolate, call)
Definition isolate.h:368
#define FOR_WITH_HANDLE_SCOPE(isolate, loop_var_type, init, loop_var, limit_check, increment, body)
Definition isolate.h:457
#define RETURN_RESULT_OR_FAILURE(isolate, call)
Definition isolate.h:264
base::Vector< const DirectHandle< Object > > args
Definition execution.cc:74
DirectHandle< FixedArray > capture_name_map
std::string pattern
double second
ZoneVector< RpoNumber > & result
int position
Definition liveedit.cc:290
int m
Definition mul-fft.cc:294
int int32_t
Definition unicode.cc:40
unsigned short uint16_t
Definition unicode.cc:39
uint16_t uc16
Definition strings.h:18
constexpr Vector< T > VectorOf(T *start, size_t size)
Definition vector.h:360
Vector< const uint8_t > OneByteVector(const char *data, size_t length)
Definition vector.h:337
bool(* FunctionType)(const Operation &op, Zone *zone)
Definition use-map.h:12
void FindOneByteStringIndices(base::Vector< const uint8_t > subject, uint8_t pattern, std::vector< int > *indices, unsigned int limit)
constexpr int kIntSize
Definition globals.h:400
PerThreadAssertScopeDebugOnly< false, SAFEPOINTS_ASSERT, HEAP_ALLOCATION_ASSERT > DisallowGarbageCollection
@ SKIP_WRITE_BARRIER
Definition objects.h:52
uint32_t PositiveNumberToUint32(Tagged< Object > number)
bool Is(IndirectHandle< U > value)
Definition handles-inl.h:51
void PrintF(const char *format,...)
Definition utils.cc:39
void FindStringIndicesDispatch(Isolate *isolate, Tagged< String > subject, Tagged< String > pattern, std::vector< int > *indices, unsigned int limit)
Tagged(T object) -> Tagged< T >
static V8_WARN_UNUSED_RESULT Tagged< Object > StringReplaceGlobalRegExpWithEmptyString(Isolate *isolate, DirectHandle< String > subject, DirectHandle< JSRegExp > regexp, DirectHandle< RegExpData > regexp_data, DirectHandle< RegExpMatchInfo > last_match_info)
kStaticElementsTemplateOffset kInstancePropertiesTemplateOffset Tagged< FixedArray >
void MemsetTagged(Tagged_t *start, Tagged< MaybeObject > value, size_t counter)
Definition slots-inl.h:486
@ TERMINAL_FAST_ELEMENTS_KIND
V8_INLINE DirectHandle< T > direct_handle(Tagged< T > object, Isolate *isolate)
Flag flags[]
Definition flags.cc:3797
static V8_WARN_UNUSED_RESULT Tagged< Object > StringReplaceGlobalAtomRegExpWithString(Isolate *isolate, DirectHandle< String > subject, DirectHandle< JSRegExp > pattern_regexp, DirectHandle< String > replacement, DirectHandle< RegExpMatchInfo > last_match_info, DirectHandle< AtomRegExpData > regexp_data)
uint32_t NumberToUint32(Tagged< Object > number)
V8_EXPORT_PRIVATE FlagValues v8_flags
void FindStringIndices(Isolate *isolate, base::Vector< const SubjectChar > subject, base::Vector< const PatternChar > pattern, std::vector< int > *indices, unsigned int limit)
constexpr int kMaxInt
Definition globals.h:374
void FindTwoByteStringIndices(const base::Vector< const base::uc16 > subject, base::uc16 pattern, std::vector< int > *indices, unsigned int limit)
constexpr uint32_t kMaxUInt32
Definition globals.h:387
kInstanceDescriptorsOffset kTransitionsOrPrototypeInfoOffset IsNull(value)||IsJSProxy(value)||IsWasmObject(value)||(IsJSObject(value) &&(HeapLayout
Definition map-inl.h:70
template const char * string
static V8_WARN_UNUSED_RESULT Tagged< Object > StringReplaceGlobalRegExpWithString(Isolate *isolate, DirectHandle< String > subject, DirectHandle< JSRegExp > regexp, DirectHandle< RegExpData > regexp_data, DirectHandle< String > replacement, DirectHandle< RegExpMatchInfo > last_match_info)
Tagged< To > Cast(Tagged< From > value, const v8::SourceLocation &loc=INIT_SOURCE_LOCATION_IN_DEBUG)
Definition casting.h:150
Maybe< T > Just(const T &t)
Definition v8-maybe.h:117
bool has_named_captures_
ZoneList< RegExpCapture * > * captures_
#define CALL_Generic()
#define CALL_OneCharPattern()
DirectHandle< String > subject_
DirectHandle< String > match_
DirectHandle< FixedArray > capture_name_map_
DirectHandle< JSReceiver > groups_obj_
const uint32_t match_position_
DirectHandle< RegExpMatchInfo > match_info_
#define DCHECK_LE(v1, v2)
Definition logging.h:490
#define CHECK_GE(lhs, rhs)
#define CHECK(condition)
Definition logging.h:124
#define CHECK_LT(lhs, rhs)
#define CHECK_LE(lhs, rhs)
#define DCHECK_NOT_NULL(val)
Definition logging.h:492
#define DCHECK_IMPLIES(v1, v2)
Definition logging.h:493
#define DCHECK_NE(v1, v2)
Definition logging.h:486
#define DCHECK_GE(v1, v2)
Definition logging.h:488
#define DCHECK(condition)
Definition logging.h:482
#define DCHECK_LT(v1, v2)
Definition logging.h:489
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
#define DCHECK_GT(v1, v2)
Definition logging.h:487
static ReplacementPart ReplacementSubString(int from, int to)
static ReplacementPart SubjectSuffix(int subject_length)
static ReplacementPart SubjectCapture(int capture_index)
#define V8_WARN_UNUSED_RESULT
Definition v8config.h:671