v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
regexp-macro-assembler.h
Go to the documentation of this file.
1// Copyright 2012 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_REGEXP_REGEXP_MACRO_ASSEMBLER_H_
6#define V8_REGEXP_REGEXP_MACRO_ASSEMBLER_H_
7
8#include "src/base/strings.h"
12#include "src/regexp/regexp.h"
13
14namespace v8 {
15namespace internal {
16
17class ByteArray;
18class JSRegExp;
19class Label;
20class String;
21
22static const base::uc32 kLeadSurrogateStart = 0xd800;
23static const base::uc32 kLeadSurrogateEnd = 0xdbff;
24static const base::uc32 kTrailSurrogateStart = 0xdc00;
25static const base::uc32 kTrailSurrogateEnd = 0xdfff;
26static const base::uc32 kNonBmpStart = 0x10000;
27static const base::uc32 kNonBmpEnd = 0x10ffff;
28
30 public:
31 // The implementation must be able to handle at least:
32 static constexpr int kMaxRegisterCount = (1 << 16);
33 static constexpr int kMaxRegister = kMaxRegisterCount - 1;
34 static constexpr int kMaxCaptures = (kMaxRegister - 1) / 2;
35 static constexpr int kMaxCPOffset = (1 << 15) - 1;
36 static constexpr int kMinCPOffset = -(1 << 15);
37
38 static constexpr int kTableSizeBits = 7;
39 static constexpr int kTableSize = 1 << kTableSizeBits;
40 static constexpr int kTableMask = kTableSize - 1;
41
42 static constexpr int kUseCharactersValue = -1;
43
45 virtual ~RegExpMacroAssembler() = default;
46
48 RegExpFlags flags) = 0;
49
50 // This function is called when code generation is aborted, so that
51 // the assembler could clean up internal data structures.
52 virtual void AbortedCodeGeneration() {}
53 // The maximal number of pushes between stack checks. Users must supply
54 // kCheckStackLimit flag to push operations (instead of kNoStackLimitCheck)
55 // at least once for every stack_limit() pushes that are executed.
57 virtual bool CanReadUnaligned() const = 0;
58
59 virtual void AdvanceCurrentPosition(int by) = 0; // Signed cp change.
60 virtual void AdvanceRegister(int reg, int by) = 0; // r[reg] += by.
61 // Continues execution from the position pushed on the top of the backtrack
62 // stack by an earlier PushBacktrack(Label*).
63 virtual void Backtrack() = 0;
64 virtual void Bind(Label* label) = 0;
65 // Dispatch after looking the current character up in a 2-bits-per-entry
66 // map. The destinations vector has up to 4 labels.
67 virtual void CheckCharacter(unsigned c, Label* on_equal) = 0;
68 // Bitwise and the current character with the given constant and then
69 // check for a match with c.
70 virtual void CheckCharacterAfterAnd(unsigned c,
71 unsigned and_with,
72 Label* on_equal) = 0;
73 virtual void CheckCharacterGT(base::uc16 limit, Label* on_greater) = 0;
74 virtual void CheckCharacterLT(base::uc16 limit, Label* on_less) = 0;
75 virtual void CheckGreedyLoop(Label* on_tos_equals_current_position) = 0;
76 virtual void CheckAtStart(int cp_offset, Label* on_at_start) = 0;
77 virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start) = 0;
78 virtual void CheckNotBackReference(int start_reg, bool read_backward,
79 Label* on_no_match) = 0;
80 virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
81 bool read_backward, bool unicode,
82 Label* on_no_match) = 0;
83 // Check the current character for a match with a literal character. If we
84 // fail to match then goto the on_failure label. End of input always
85 // matches. If the label is nullptr then we should pop a backtrack address
86 // off the stack and go to that.
87 virtual void CheckNotCharacter(unsigned c, Label* on_not_equal) = 0;
88 virtual void CheckNotCharacterAfterAnd(unsigned c,
89 unsigned and_with,
90 Label* on_not_equal) = 0;
91 // Subtract a constant from the current character, then and with the given
92 // constant and then check for a match with c.
94 base::uc16 and_with,
95 Label* on_not_equal) = 0;
97 base::uc16 to, // Both inclusive.
98 Label* on_in_range) = 0;
100 base::uc16 to, // Both inclusive.
101 Label* on_not_in_range) = 0;
102 // Returns true if the check was emitted, false otherwise.
104 const ZoneList<CharacterRange>* ranges, Label* on_in_range) = 0;
106 const ZoneList<CharacterRange>* ranges, Label* on_not_in_range) = 0;
107
108 // The current character (modulus the kTableSize) is looked up in the byte
109 // array, and if the found byte is non-zero, we jump to the on_bit_set label.
110 virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set) = 0;
111
112 virtual void SkipUntilBitInTable(int cp_offset, Handle<ByteArray> table,
113 Handle<ByteArray> nibble_table,
114 int advance_by) = 0;
115 virtual bool SkipUntilBitInTableUseSimd(int advance_by) { return false; }
116
117 // Checks whether the given offset from the current position is before
118 // the end of the string. May overwrite the current character.
119 virtual void CheckPosition(int cp_offset, Label* on_outside_input);
120 // Check whether a standard/default character class matches the current
121 // character. Returns false if the type of special character class does
122 // not have custom support.
123 // May clobber the current loaded character.
125 Label* on_no_match) {
126 return false;
127 }
128
129 // Control-flow integrity:
130 // Define a jump target and bind a label.
131 virtual void BindJumpTarget(Label* label) { Bind(label); }
132
133 virtual void Fail() = 0;
134 virtual void GoTo(Label* label) = 0;
135 // Check whether a register is >= a given constant and go to a label if it
136 // is. Backtracks instead if the label is nullptr.
137 virtual void IfRegisterGE(int reg, int comparand, Label* if_ge) = 0;
138 // Check whether a register is < a given constant and go to a label if it is.
139 // Backtracks instead if the label is nullptr.
140 virtual void IfRegisterLT(int reg, int comparand, Label* if_lt) = 0;
141 // Check whether a register is == to the current position and go to a
142 // label if it is.
143 virtual void IfRegisterEqPos(int reg, Label* if_eq) = 0;
145 int cp_offset, Label* on_end_of_input, bool check_bounds = true,
146 int characters = 1, int eats_at_least = kUseCharactersValue);
147 virtual void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input,
148 bool check_bounds, int characters,
149 int eats_at_least) = 0;
150 virtual void PopCurrentPosition() = 0;
151 virtual void PopRegister(int register_index) = 0;
152 // Pushes the label on the backtrack stack, so that a following Backtrack
153 // will go to this label. Always checks the backtrack stack limit.
154 virtual void PushBacktrack(Label* label) = 0;
155 virtual void PushCurrentPosition() = 0;
157 virtual void PushRegister(int register_index,
158 StackCheckFlag check_stack_limit) = 0;
160 virtual void ReadStackPointerFromRegister(int reg) = 0;
161 virtual void SetCurrentPositionFromEnd(int by) = 0;
162 virtual void SetRegister(int register_index, int to) = 0;
163 // Return whether the matching (with a global regexp) will be restarted.
164 virtual bool Succeed() = 0;
165 virtual void WriteCurrentPositionToRegister(int reg, int cp_offset) = 0;
166 virtual void ClearRegisters(int reg_from, int reg_to) = 0;
167 virtual void WriteStackPointerToRegister(int reg) = 0;
168
169 // Check that we are not in the middle of a surrogate pair.
170 void CheckNotInSurrogatePair(int cp_offset, Label* on_failure);
171
172#define IMPLEMENTATIONS_LIST(V) \
173 V(IA32) \
174 V(ARM) \
175 V(ARM64) \
176 V(MIPS) \
177 V(LOONG64) \
178 V(RISCV) \
179 V(RISCV32) \
180 V(S390) \
181 V(PPC) \
182 V(X64) \
183 V(Bytecode)
184
186#define V(Name) k##Name##Implementation,
188#undef V
189 };
190
192 static const char* const kNames[] = {
193#define V(Name) #Name,
195#undef V
196 };
197 return kNames[impl];
198 }
199#undef IMPLEMENTATIONS_LIST
201
202 // Compare two-byte strings case insensitively.
203 //
204 // Called from generated code.
205 static int CaseInsensitiveCompareNonUnicode(Address byte_offset1,
206 Address byte_offset2,
207 size_t byte_length,
208 Isolate* isolate);
209 static int CaseInsensitiveCompareUnicode(Address byte_offset1,
210 Address byte_offset2,
211 size_t byte_length,
212 Isolate* isolate);
213
214 // `raw_byte_array` is a ByteArray containing a set of character ranges,
215 // where ranges are encoded as uint16_t elements:
216 //
217 // [from0, to0, from1, to1, ..., fromN, toN], or
218 // [from0, to0, from1, to1, ..., fromN] (open-ended last interval).
219 //
220 // fromN is inclusive, toN is exclusive. Returns zero if not in a range,
221 // non-zero otherwise.
222 //
223 // Called from generated code.
224 static uint32_t IsCharacterInRangeArray(uint32_t current_char,
225 Address raw_byte_array);
226
227 // Controls the generation of large inlined constants in the code.
228 void set_slow_safe(bool ssc) { slow_safe_compiler_ = ssc; }
229 bool slow_safe() const { return slow_safe_compiler_; }
230
231 // Controls after how many backtracks irregexp should abort execution. If it
232 // can fall back to the experimental engine (see `set_can_fallback`), it will
233 // return the appropriate error code, otherwise it will return the number of
234 // matches found so far (perhaps none).
238
239 // Set whether or not irregexp can fall back to the experimental engine on
240 // excessive backtracking. The number of backtracks considered excessive can
241 // be controlled with set_backtrack_limit.
242 void set_can_fallback(bool val) { can_fallback_ = val; }
243
250 // Set whether the regular expression has the global flag. Exiting due to
251 // a failure in a global regexp may still mean success overall.
253 inline bool global() const { return global_mode_ != NOT_GLOBAL; }
254 inline bool global_with_zero_length_check() const {
256 }
257 inline bool global_unicode() const { return global_mode_ == GLOBAL_UNICODE; }
258
259 Isolate* isolate() const { return isolate_; }
260 Zone* zone() const { return zone_; }
261
262 protected:
263 bool has_backtrack_limit() const;
264 uint32_t backtrack_limit() const { return backtrack_limit_; }
265
266 bool can_fallback() const { return can_fallback_; }
267
268 private:
271 bool can_fallback_ = false;
274 Zone* const zone_;
275};
276
278 public:
279 // Type of input string to generate code for.
280 enum Mode { LATIN1 = 1, UC16 = 2 };
281
282 // Result of calling generated native RegExp code.
283 // RETRY: Something significant changed during execution, and the matching
284 // should be retried from scratch.
285 // EXCEPTION: Something failed during execution. If no exception has been
286 // thrown, it's an internal out-of-memory, and the caller should
287 // throw the exception.
288 // FAILURE: Matching failed.
289 // SUCCESS: Matching succeeded, and the output array has been filled with
290 // capture positions.
291 // FALLBACK_TO_EXPERIMENTAL: Execute the regexp on this subject using the
292 // experimental engine instead.
301
304 ~NativeRegExpMacroAssembler() override = default;
305
306 // Returns a {Result} sentinel, or the number of successful matches.
307 static int Match(DirectHandle<IrRegExpData> regexp_data,
308 DirectHandle<String> subject, int* offsets_vector,
309 int offsets_vector_length, int previous_index,
310 Isolate* isolate);
311
313 Tagged<String> input, int start_offset, const uint8_t* input_start,
314 const uint8_t* input_end, int* output, int output_size, Isolate* isolate,
315 Tagged<JSRegExp> regexp);
316
317 bool CanReadUnaligned() const override;
318
319 void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input,
320 bool check_bounds, int characters,
321 int eats_at_least) override;
322 // Load a number of characters at the given offset from the
323 // current position, into the current-character register.
324 virtual void LoadCurrentCharacterUnchecked(int cp_offset,
325 int character_count) = 0;
326
327 // Called from RegExp if the backtrack stack limit is hit. Tries to expand
328 // the stack. Returns the new stack-pointer if successful, or returns 0 if
329 // unable to grow the stack.
330 // This function must not trigger a garbage collection.
331 //
332 // Called from generated code.
333 static Address GrowStack(Isolate* isolate);
334
335 // Called from generated code.
336 static int CheckStackGuardState(Isolate* isolate, int start_index,
338 Address* return_address,
340 Address* subject, const uint8_t** input_start,
341 const uint8_t** input_end, uintptr_t gap);
342
344 return reinterpret_cast<Address>(&word_character_map[0]);
345 }
346
347 protected:
348 // Byte map of one byte characters with a 0xff if the character is a word
349 // character (digit, letter or underscore) and 0x00 otherwise.
350 // Used by generated RegExp code.
351 static const uint8_t word_character_map[256];
352
354
355 private:
356 // Returns a {Result} sentinel, or the number of successful matches.
357 static int Execute(Tagged<String> input, int start_offset,
358 const uint8_t* input_start, const uint8_t* input_end,
359 int* output, int output_size, Isolate* isolate,
360 Tagged<IrRegExpData> regexp_data);
361
364};
365
366} // namespace internal
367} // namespace v8
368
369#endif // V8_REGEXP_REGEXP_MACRO_ASSEMBLER_H_
~NativeRegExpMacroAssembler() override=default
Handle< ByteArray > GetOrAddRangeArray(const ZoneList< CharacterRange > *ranges)
ZoneUnorderedMap< uint32_t, IndirectHandle< FixedUInt16Array > > range_array_cache_
static int Match(DirectHandle< IrRegExpData > regexp_data, DirectHandle< String > subject, int *offsets_vector, int offsets_vector_length, int previous_index, Isolate *isolate)
virtual void LoadCurrentCharacterUnchecked(int cp_offset, int character_count)=0
static int Execute(Tagged< String > input, int start_offset, const uint8_t *input_start, const uint8_t *input_end, int *output, int output_size, Isolate *isolate, Tagged< IrRegExpData > regexp_data)
void LoadCurrentCharacterImpl(int cp_offset, Label *on_end_of_input, bool check_bounds, int characters, int eats_at_least) override
NativeRegExpMacroAssembler(Isolate *isolate, Zone *zone)
static int CheckStackGuardState(Isolate *isolate, int start_index, RegExp::CallOrigin call_origin, Address *return_address, Tagged< InstructionStream > re_code, Address *subject, const uint8_t **input_start, const uint8_t **input_end, uintptr_t gap)
static V8_EXPORT_PRIVATE int ExecuteForTesting(Tagged< String > input, int start_offset, const uint8_t *input_start, const uint8_t *input_end, int *output, int output_size, Isolate *isolate, Tagged< JSRegExp > regexp)
virtual void SkipUntilBitInTable(int cp_offset, Handle< ByteArray > table, Handle< ByteArray > nibble_table, int advance_by)=0
virtual bool CheckCharacterInRangeArray(const ZoneList< CharacterRange > *ranges, Label *on_in_range)=0
virtual void CheckNotAtStart(int cp_offset, Label *on_not_at_start)=0
virtual void CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward, bool unicode, Label *on_no_match)=0
void CheckNotInSurrogatePair(int cp_offset, Label *on_failure)
void set_backtrack_limit(uint32_t backtrack_limit)
virtual void CheckCharacterInRange(base::uc16 from, base::uc16 to, Label *on_in_range)=0
virtual void CheckBitInTable(Handle< ByteArray > table, Label *on_bit_set)=0
virtual void LoadCurrentCharacterImpl(int cp_offset, Label *on_end_of_input, bool check_bounds, int characters, int eats_at_least)=0
virtual void PushRegister(int register_index, StackCheckFlag check_stack_limit)=0
static int CaseInsensitiveCompareUnicode(Address byte_offset1, Address byte_offset2, size_t byte_length, Isolate *isolate)
virtual void Bind(Label *label)=0
virtual void PushBacktrack(Label *label)=0
virtual int stack_limit_slack_slot_count()=0
virtual void CheckNotBackReference(int start_reg, bool read_backward, Label *on_no_match)=0
virtual void IfRegisterEqPos(int reg, Label *if_eq)=0
static uint32_t IsCharacterInRangeArray(uint32_t current_char, Address raw_byte_array)
virtual bool CheckSpecialClassRanges(StandardCharacterSet type, Label *on_no_match)
virtual void CheckCharacterLT(base::uc16 limit, Label *on_less)=0
virtual void SetRegister(int register_index, int to)=0
virtual void CheckCharacterAfterAnd(unsigned c, unsigned and_with, Label *on_equal)=0
virtual bool CanReadUnaligned() const =0
static int CaseInsensitiveCompareNonUnicode(Address byte_offset1, Address byte_offset2, size_t byte_length, Isolate *isolate)
virtual void SetCurrentPositionFromEnd(int by)=0
virtual void CheckCharacter(unsigned c, Label *on_equal)=0
virtual void ReadCurrentPositionFromRegister(int reg)=0
virtual DirectHandle< HeapObject > GetCode(DirectHandle< String > source, RegExpFlags flags)=0
virtual void IfRegisterLT(int reg, int comparand, Label *if_lt)=0
virtual IrregexpImplementation Implementation()=0
virtual void ReadStackPointerFromRegister(int reg)=0
virtual void CheckNotCharacterAfterMinusAnd(base::uc16 c, base::uc16 minus, base::uc16 and_with, Label *on_not_equal)=0
RegExpMacroAssembler(Isolate *isolate, Zone *zone)
virtual void CheckPosition(int cp_offset, Label *on_outside_input)
virtual void CheckAtStart(int cp_offset, Label *on_at_start)=0
virtual void WriteStackPointerToRegister(int reg)=0
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset)=0
virtual void CheckGreedyLoop(Label *on_tos_equals_current_position)=0
virtual void AdvanceCurrentPosition(int by)=0
V8_EXPORT_PRIVATE void LoadCurrentCharacter(int cp_offset, Label *on_end_of_input, bool check_bounds=true, int characters=1, int eats_at_least=kUseCharactersValue)
virtual void PopRegister(int register_index)=0
virtual ~RegExpMacroAssembler()=default
virtual void BindJumpTarget(Label *label)
virtual void CheckCharacterNotInRange(base::uc16 from, base::uc16 to, Label *on_not_in_range)=0
const char * ImplementationToString(IrregexpImplementation impl)
virtual void CheckNotCharacter(unsigned c, Label *on_not_equal)=0
virtual void GoTo(Label *label)=0
virtual bool CheckCharacterNotInRangeArray(const ZoneList< CharacterRange > *ranges, Label *on_not_in_range)=0
virtual void AdvanceRegister(int reg, int by)=0
virtual void ClearRegisters(int reg_from, int reg_to)=0
virtual void CheckCharacterGT(base::uc16 limit, Label *on_greater)=0
virtual bool SkipUntilBitInTableUseSimd(int advance_by)
virtual void IfRegisterGE(int reg, int comparand, Label *if_ge)=0
virtual void CheckNotCharacterAfterAnd(unsigned c, unsigned and_with, Label *on_not_equal)=0
static constexpr int kInternalRegExpException
Definition regexp.h:136
static constexpr int kInternalRegExpSmallestResult
Definition regexp.h:139
static constexpr int kInternalRegExpSuccess
Definition regexp.h:135
static constexpr int kInternalRegExpRetry
Definition regexp.h:137
static constexpr int kInternalRegExpFallbackToExperimental
Definition regexp.h:138
static constexpr int kInternalRegExpFailure
Definition regexp.h:134
Label label
LiftoffRegister reg
uint32_t uc32
Definition strings.h:19
uint16_t uc16
Definition strings.h:18
static const base::uc32 kNonBmpEnd
static const base::uc32 kNonBmpStart
static const base::uc32 kTrailSurrogateStart
too high values may cause the compiler to set high thresholds for inlining to as much as possible avoid inlined allocation of objects that cannot escape trace load stores from virtual maglev objects use TurboFan fast string builder analyze liveness of environment slots and zap dead values trace TurboFan load elimination emit data about basic block usage in builtins to this enable builtin reordering when run mksnapshot flag for emit warnings when applying builtin profile data verify register allocation in TurboFan randomly schedule instructions to stress dependency tracking enable store store elimination in TurboFan rewrite far to near simulate GC compiler thread race related to allow float parameters to be passed in simulator mode JS Wasm Run additional turbo_optimize_inlined_js_wasm_wrappers enable experimental feedback collection in generic lowering enable Turboshaft s WasmLoadElimination enable Turboshaft s low level load elimination for JS enable Turboshaft s escape analysis for string concatenation use enable Turbolev features that we want to ship in the not too far future trace individual Turboshaft reduction steps trace intermediate Turboshaft reduction steps invocation count threshold for early optimization Enables optimizations which favor memory size over execution speed Enables sampling allocation profiler with X as a sample interval min size of a semi the new space consists of two semi spaces max size of the Collect garbage after Collect garbage after keeps maps alive for< n > old space garbage collections print one detailed trace line in allocation gc speed threshold for starting incremental marking via a task in percent of available threshold for starting incremental marking immediately in percent of available Use a single schedule for determining a marking schedule between JS and C objects schedules the minor GC task with kUserVisible priority max worker number of concurrent for NumberOfWorkerThreads start background threads that allocate memory concurrent_array_buffer_sweeping use parallel threads to clear weak refs in the atomic pause trace progress of the incremental marking trace object counts and memory usage report a tick only when allocated zone memory changes by this amount TracingFlags::gc_stats TracingFlags::gc_stats track native contexts that are expected to be garbage collected verify heap pointers before and after GC memory reducer runs GC with ReduceMemoryFootprint flag Maximum number of memory reducer GCs scheduled Old gen GC speed is computed directly from gc tracer counters Perform compaction on full GCs based on V8 s default heuristics Perform compaction on every full GC Perform code space compaction when finalizing a full GC with stack Stress GC compaction to flush out bugs with moving objects flush of baseline code when it has not been executed recently Use time base code flushing instead of age Use a progress bar to scan large objects in increments when incremental marking is active force incremental marking for small heaps and run it more often force marking at random points between and force scavenge at random points between and reclaim otherwise unreachable unmodified wrapper objects when possible less compaction in non memory reducing mode use high priority threads for concurrent Marking Test mode only flag It allows an unit test to select evacuation candidates use incremental marking for CppHeap cppheap_concurrent_marking c value for membalancer A special constant to balance between memory and space tradeoff The smaller the more memory it uses enable use of SSE4 instructions if available enable use of AVX VNNI instructions if available enable use of POPCNT instruction if available force all emitted branches to be in long mode(MIPS/PPC only)") DEFINE_BOOL(partial_constant_pool
static const base::uc32 kTrailSurrogateEnd
static const base::uc32 kLeadSurrogateStart
static const base::uc32 kLeadSurrogateEnd
v8_inspector::String16 String
Definition string-util.h:26
#define IMPLEMENTATIONS_LIST(V)
#define V8_EXPORT_PRIVATE
Definition macros.h:460