17#include "unicode/uchar.h"
18#include "unicode/unistr.h"
25 : slow_safe_compiler_(false),
26 backtrack_limit_(
JSRegExp::kNoBacktrackLimit),
27 global_mode_(NOT_GLOBAL),
46 size_t length = byte_length / 2;
51 UChar32 c1 = RegExpCaseFolding::Canonicalize(substring1[
i]);
52 UChar32 c2 = RegExpCaseFolding::Canonicalize(substring2[
i]);
76 int32_t length =
static_cast<int32_t
>(byte_length >> 1);
77 icu::UnicodeString uni_str_1(
reinterpret_cast<const char16_t*
>(byte_offset1),
79 return uni_str_1.caseCompare(
reinterpret_cast<const char16_t*
>(byte_offset2),
80 length, U_FOLD_CASE_DEFAULT) == 0;
84 size_t length = byte_length >> 1;
87 isolate->regexp_macro_assembler_canonicalize();
93 canonicalize->
get(c1,
'\0', s1);
96 canonicalize->
get(c2,
'\0', s2);
111 for (
int i = 0;
i < ranges->
length();
i++) {
115 return static_cast<uint32_t
>(seed);
125int RangeArrayLengthFor(
const ZoneList<CharacterRange>* ranges) {
126 const int ranges_length = ranges->length();
127 return MaskEndOfRangeMarker(ranges->at(ranges_length - 1).to()) ==
kMaxUInt16
128 ? ranges_length * 2 - 1
132bool Equals(
const ZoneList<CharacterRange>* lhs,
133 const DirectHandle<FixedUInt16Array>& rhs) {
134 const int rhs_length = rhs->length();
135 if (rhs_length != RangeArrayLengthFor(lhs))
return false;
136 for (
int i = 0;
i < lhs->
length();
i++) {
137 const CharacterRange&
r = lhs->at(
i);
138 if (rhs->get(
i * 2 + 0) !=
r.from())
return false;
139 if (
i * 2 + 1 == rhs_length)
break;
140 if (rhs->get(
i * 2 + 1) !=
r.to() + 1)
return false;
146 Isolate* isolate,
const ZoneList<CharacterRange>* ranges) {
147 const int ranges_length = ranges->length();
148 const int range_array_length = RangeArrayLengthFor(ranges);
151 for (
int i = 0;
i < ranges_length;
i++) {
152 const CharacterRange&
r = ranges->at(
i);
154 range_array->set(
i * 2 + 0,
r.
from());
155 const base::uc32 to = MaskEndOfRangeMarker(
r.to());
157 DCHECK_EQ(range_array_length, ranges_length * 2 - 1);
161 range_array->set(
i * 2 + 1, to + 1);
170 const uint32_t hash =
Hash(ranges);
174 if (Equals(ranges, range_array))
return range_array;
187 static constexpr uint32_t
kTrue = 1;
188 static constexpr uint32_t
kFalse = 0;
195 if (current_char < ranges->get(0))
return kFalse;
196 if (current_char >= ranges->get(ranges->length() - 1)) {
198 return (ranges->length() % 2) == 0 ?
kFalse :
kTrue;
205 int mid, lower = 0, upper = ranges->length();
207 mid = lower + (upper - lower) / 2;
209 if (current_char < elem) {
211 }
else if (current_char > elem) {
217 }
while (lower < upper);
219 const bool current_char_ge_last_elem = current_char >= ranges->get(mid);
220 const int current_range_start_index =
221 current_char_ge_last_elem ? mid : mid - 1;
224 return (current_range_start_index % 2) == 0 ?
kTrue :
kFalse;
240 Label* on_outside_input) {
245 Label* on_end_of_input,
251 eats_at_least = characters;
259 int cp_offset,
Label* on_end_of_input,
bool check_bounds,
int characters,
267 if (cp_offset >= 0) {
268 CheckPosition(cp_offset + eats_at_least - 1, on_end_of_input);
280#ifndef COMPILING_IRREGEXP_FOR_EXTERNAL_EMBEDDER
287 Address* subject,
const uint8_t** input_start,
const uint8_t** input_end,
291 DCHECK_LE(re_code->instruction_start(), old_pc);
295 bool js_has_overflowed = check.JsHasOverflowed(gap);
307 if (js_has_overflowed) {
309 }
else if (check.InterruptRequested()) {
323 int return_value = 0;
327 if (js_has_overflowed) {
329 isolate->StackOverflow();
331 }
else if (check.InterruptRequested()) {
340 if (!code_handle->SafeEquals(re_code)) {
343 Address new_pc = old_pc + delta;
350 if (return_value == 0) {
357 return_value =
RETRY;
359 *subject = subject_handle->ptr();
360 intptr_t byte_length = *input_end - *input_start;
361 *input_start = subject_handle->AddressOfCharacterAt(start_index, no_gc);
362 *input_end = *input_start + byte_length;
372 int offsets_vector_length,
373 int previous_index,
Isolate* isolate) {
374 DCHECK(subject->IsFlat());
376 DCHECK_LE(previous_index, subject->length());
384 int start_offset = previous_index;
385 int char_length = subject_ptr->length() - start_offset;
386 int slice_offset = 0;
395 subject_ptr = slice->parent();
396 slice_offset = slice->offset();
402 bool is_one_byte = subject_ptr->IsOneByteRepresentation();
403 DCHECK(IsExternalString(subject_ptr) || IsSeqString(subject_ptr));
405 int char_size_shift = is_one_byte ? 0 : 1;
408 const uint8_t* input_start =
409 subject_ptr->AddressOfCharacterAt(start_offset + slice_offset, no_gc);
410 int byte_length = char_length << char_size_shift;
411 const uint8_t* input_end = input_start + byte_length;
412 return Execute(*subject, start_offset, input_start, input_end, offsets_vector,
413 offsets_vector_length, isolate, *regexp_data);
418 Tagged<String> input,
int start_offset,
const uint8_t* input_start,
419 const uint8_t* input_end,
int* output,
int output_size,
Isolate* isolate,
423 return Execute(input, start_offset, input_start, input_end, output,
431 int start_offset,
const uint8_t* input_start,
const uint8_t* input_end,
432 int* output,
int output_size,
Isolate* isolate,
435 Tagged<Code> code = regexp_data->code(isolate, is_one_byte);
438 using RegexpMatcherSig =
440 int(
Address input_string,
int start_offset,
const uint8_t* input_start,
441 const uint8_t* input_end,
int* output,
int output_size,
446 fn.Call(input.ptr(), start_offset, input_start, input_end, output,
456 isolate->StackOverflow();
465 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
466 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
467 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
468 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
470 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
471 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
472 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu,
473 0xFFu, 0xFFu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
475 0x00u, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu,
476 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu,
477 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu,
478 0xFFu, 0xFFu, 0xFFu, 0x00u, 0x00u, 0x00u, 0x00u, 0xFFu,
480 0x00u, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu,
481 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu,
482 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu,
483 0xFFu, 0xFFu, 0xFFu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
485 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
486 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
487 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
488 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
490 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
491 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
492 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
493 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
495 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
496 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
497 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
498 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
500 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
501 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
502 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
503 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
511 RegExpStack* regexp_stack = isolate->regexp_stack();
512 const size_t old_size = regexp_stack->
memory_size();
517 CHECK_LE(old_stack_pointer, old_stack_top);
518 CHECK_LE(
static_cast<size_t>(old_stack_top - old_stack_pointer), old_size);
#define SBXCHECK(condition)
int get(uchar c, uchar n, uchar *result)
V8_INLINE Address address() const
static Handle< FixedIntegerArrayBase< T, Base > > New(Isolate *isolate, int length, MoreArgs &&... more_args)
static GeneratedCode FromCode(Isolate *isolate, Tagged< Code > code)
static constexpr uint32_t kNoBacktrackLimit
static Address GrowStack(Isolate *isolate)
Handle< ByteArray > GetOrAddRangeArray(const ZoneList< CharacterRange > *ranges)
ZoneUnorderedMap< uint32_t, IndirectHandle< FixedUInt16Array > > range_array_cache_
static int Match(DirectHandle< IrRegExpData > regexp_data, DirectHandle< String > subject, int *offsets_vector, int offsets_vector_length, int previous_index, Isolate *isolate)
virtual void LoadCurrentCharacterUnchecked(int cp_offset, int character_count)=0
static const uint8_t word_character_map[256]
bool CanReadUnaligned() const override
static int Execute(Tagged< String > input, int start_offset, const uint8_t *input_start, const uint8_t *input_end, int *output, int output_size, Isolate *isolate, Tagged< IrRegExpData > regexp_data)
void LoadCurrentCharacterImpl(int cp_offset, Label *on_end_of_input, bool check_bounds, int characters, int eats_at_least) override
static int CheckStackGuardState(Isolate *isolate, int start_index, RegExp::CallOrigin call_origin, Address *return_address, Tagged< InstructionStream > re_code, Address *subject, const uint8_t **input_start, const uint8_t **input_end, uintptr_t gap)
static V8_EXPORT_PRIVATE int ExecuteForTesting(Tagged< String > input, int start_offset, const uint8_t *input_start, const uint8_t *input_end, int *output, int output_size, Isolate *isolate, Tagged< JSRegExp > regexp)
static V8_INLINE void ReplacePC(Address *pc_address, Address new_pc, int offset_from_sp)
static V8_INLINE Address AuthenticatePC(Address *pc_address, unsigned offset_from_sp)
bool has_backtrack_limit() const
void CheckNotInSurrogatePair(int cp_offset, Label *on_failure)
virtual void CheckCharacterInRange(base::uc16 from, base::uc16 to, Label *on_in_range)=0
virtual void LoadCurrentCharacterImpl(int cp_offset, Label *on_end_of_input, bool check_bounds, int characters, int eats_at_least)=0
static int CaseInsensitiveCompareUnicode(Address byte_offset1, Address byte_offset2, size_t byte_length, Isolate *isolate)
static constexpr int kMaxCPOffset
virtual void Bind(Label *label)=0
static uint32_t IsCharacterInRangeArray(uint32_t current_char, Address raw_byte_array)
static constexpr int kUseCharactersValue
static constexpr int kMinCPOffset
static int CaseInsensitiveCompareNonUnicode(Address byte_offset1, Address byte_offset2, size_t byte_length, Isolate *isolate)
uint32_t backtrack_limit_
RegExpMacroAssembler(Isolate *isolate, Zone *zone)
virtual void CheckPosition(int cp_offset, Label *on_outside_input)
V8_EXPORT_PRIVATE void LoadCurrentCharacter(int cp_offset, Label *on_end_of_input, bool check_bounds=true, int characters=1, int eats_at_least=kUseCharactersValue)
Isolate * isolate() const
virtual void CheckCharacterNotInRange(base::uc16 from, base::uc16 to, Label *on_not_in_range)=0
Address memory_top() const
size_t memory_size() const
Address EnsureCapacity(size_t size)
Address stack_pointer() const
static bool IsOneByteRepresentationUnderneath(Tagged< String > string)
static const base::uc32 kMaxCodePoint
V8_INLINE constexpr StorageType ptr() const
ZoneVector< RpoNumber > & result
V8_INLINE size_t hash_combine(size_t seed, size_t hash)
constexpr bool IsInRange(T value, U lower_limit, U higher_limit)
static const base::uc32 kTrailSurrogateStart
static constexpr uint32_t kFalse
bool Is(IndirectHandle< U > value)
static const base::uc32 kTrailSurrogateEnd
static uint32_t Hash(RegisteredExtension *extension)
V8_EXPORT_PRIVATE FlagValues v8_flags
static const base::uc32 kLeadSurrogateStart
static constexpr Address kNullAddress
static const base::uc32 kLeadSurrogateEnd
Tagged< To > Cast(Tagged< From > value, const v8::SourceLocation &loc=INIT_SOURCE_LOCATION_IN_DEBUG)
static constexpr AcquireLoadTag kAcquireLoad
#define DCHECK_LE(v1, v2)
#define CHECK_LE(lhs, rhs)
#define DCHECK_NOT_NULL(val)
#define DCHECK_IMPLIES(v1, v2)
#define DCHECK_GE(v1, v2)
#define DCHECK(condition)
#define DCHECK_LT(v1, v2)
#define DCHECK_EQ(v1, v2)