v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
regexp.h
Go to the documentation of this file.
1// Copyright 2012 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_REGEXP_REGEXP_H_
6#define V8_REGEXP_REGEXP_H_
7
14
15namespace v8 {
16namespace internal {
17
18class JSRegExp;
19class RegExpCapture;
20class RegExpData;
21class IrRegExpData;
22class AtomRegExpData;
23class RegExpMatchInfo;
24class RegExpNode;
25class RegExpTree;
26
28
29// TODO(jgruber): Do not expose in regexp.h.
30// TODO(jgruber): Consider splitting between ParseData and CompileData.
32 // The parsed AST as produced by the RegExpParser.
33 RegExpTree* tree = nullptr;
34
35 // The compiled Node graph as produced by RegExpTree::ToNode methods.
36 RegExpNode* node = nullptr;
37
38 // Either the generated code as produced by the compiler or a trampoline
39 // to the interpreter.
41
42 // True, iff the pattern is a 'simple' atom with zero captures. In other
43 // words, the pattern consists of a string with no metacharacters and special
44 // regexp features, and can be implemented as a standard string search.
45 bool simple = true;
46
47 // True, iff the pattern is anchored at the start of the string with '^'.
48 bool contains_anchor = false;
49
50 // Only set if the pattern contains named captures.
51 // Note: the lifetime equals that of the parse/compile zone.
53
54 // The error message. Only used if an error occurred during parsing or
55 // compilation.
56 RegExpError error = RegExpError::kNone;
57
58 // The position at which the error was detected. Only used if an
59 // error occurred.
60 int error_pos = 0;
61
62 // The number of capture groups, without the global capture \0.
64
65 // The number of registers used by the generated code.
67
68 // The compilation target (bytecode or native code).
70};
71
72class RegExp final : public AllStatic {
73 public:
74 // Whether the irregexp engine generates interpreter bytecode.
75 static bool CanGenerateBytecode();
76
77 // Verify that the given flags combination is valid.
78 V8_EXPORT_PRIVATE static bool VerifyFlags(RegExpFlags flags);
79
80 // Verify the given pattern, i.e. check that parsing succeeds. If
81 // verification fails, `regexp_error_out` is set.
82 template <class CharT>
83 static bool VerifySyntax(Zone* zone, uintptr_t stack_limit,
84 const CharT* input, int input_length,
85 RegExpFlags flags, RegExpError* regexp_error_out,
86 const DisallowGarbageCollection& no_gc);
87
88 // Parses the RegExp pattern and prepares the JSRegExp object with
89 // generic data and choice of implementation - as well as what
90 // the implementation wants to store in the data field.
91 // Returns false if compilation fails.
94 RegExpFlags flags, uint32_t backtrack_limit);
95
96 // Ensures that a regexp is fully compiled and ready to be executed on a
97 // subject string. Returns true on success. Throw and return false on
98 // failure.
100 Isolate* isolate, DirectHandle<RegExpData> re_data,
101 DirectHandle<String> subject);
102
103 enum CallOrigin : int {
106 };
107
108 // See ECMA-262 section 15.10.6.2.
109 // This function calls the garbage collector if necessary.
110 V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static std::optional<int> Exec(
111 Isolate* isolate, DirectHandle<JSRegExp> regexp,
112 DirectHandle<String> subject, int index, int32_t* result_offsets_vector,
113 uint32_t result_offsets_vector_length);
114 // As above, but passes the result through the old-style RegExpMatchInfo|Null
115 // interface. At most one match is returned.
118 DirectHandle<String> subject, int index,
119 DirectHandle<RegExpMatchInfo> last_match_info);
120
121 V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static std::optional<int>
123 DirectHandle<String> subject, int index,
124 int32_t* result_offsets_vector,
125 uint32_t result_offsets_vector_length);
126
127 // Called directly from generated code through ExternalReference.
128 V8_EXPORT_PRIVATE static intptr_t AtomExecRaw(
129 Isolate* isolate, Address /* AtomRegExpData */ data_address,
130 Address /* String */ subject_address, int32_t index,
131 int32_t* result_offsets_vector, int32_t result_offsets_vector_length);
132
133 // Integral return values used throughout regexp code layers.
134 static constexpr int kInternalRegExpFailure = 0;
135 static constexpr int kInternalRegExpSuccess = 1;
136 static constexpr int kInternalRegExpException = -1;
137 static constexpr int kInternalRegExpRetry = -2;
138 static constexpr int kInternalRegExpFallbackToExperimental = -3;
139 static constexpr int kInternalRegExpSmallestResult = -3;
140
148
149 // Set last match info. If match is nullptr, then setting captures is
150 // omitted.
152 Isolate* isolate, DirectHandle<RegExpMatchInfo> last_match_info,
153 DirectHandle<String> subject, int capture_count, int32_t* match);
154
156 Isolate* isolate, Zone* zone, RegExpCompileData* input, RegExpFlags flags,
158 bool is_one_byte);
159
160 V8_EXPORT_PRIVATE static void DotPrintForTesting(const char* label,
161 RegExpNode* node);
162
163 static const int kRegExpTooLargeToOptimize = 20 * KB;
164
168 RegExpError error);
169 static void ThrowRegExpException(Isolate* isolate,
171 RegExpError error_text);
172
173 static bool IsUnmodifiedRegExp(Isolate* isolate,
175
177 Isolate* isolate, ZoneVector<RegExpCapture*>* named_captures);
178};
179
180// Uses a special global mode of irregexp-generated code to perform a global
181// search and return multiple results at once. As such, this is essentially an
182// iterator over multiple results (retrieved batch-wise in advance).
184 public:
186 DirectHandle<String> subject, Isolate* isolate);
187
188 // Fetch the next entry in the cache for global regexp match results.
189 // This does not set the last match info. Upon failure, nullptr is
190 // returned. The cause can be checked with Result(). The previous result is
191 // still in available in memory when a failure happens.
192 int32_t* FetchNext();
193
194 int32_t* LastSuccessfulMatch() const;
195
196 bool HasException() const { return num_matches_ < 0; }
197
198 private:
199 int AdvanceZeroLength(int last_index) const;
200
205
210 // Pointer to the last set of captures.
211 int32_t* register_array_ = nullptr;
216};
217
218// Caches results for specific regexp queries on the isolate. At the time of
219// writing, this is used during global calls to RegExp.prototype.exec and
220// @@split.
221class RegExpResultsCache final : public AllStatic {
222 public:
224
225 // Attempt to retrieve a cached result. On failure, 0 is returned as a Smi.
226 // On success, the returned result is guaranteed to be a COW-array.
227 static Tagged<Object> Lookup(Heap* heap, Tagged<String> key_string,
228 Tagged<Object> key_pattern,
229 Tagged<FixedArray>* last_match_out,
230 ResultsCacheType type);
231 // Attempt to add value_array to the cache specified by type. On success,
232 // value_array is turned into a COW-array.
233 static void Enter(Isolate* isolate, DirectHandle<String> key_string,
234 DirectHandle<Object> key_pattern,
235 DirectHandle<FixedArray> value_array,
236 DirectHandle<FixedArray> last_match_cache,
237 ResultsCacheType type);
238 static void Clear(Tagged<FixedArray> cache);
239
240 static constexpr int kRegExpResultsCacheSize = 0x100;
241
242 private:
243 static constexpr int kStringOffset = 0;
244 static constexpr int kPatternOffset = 1;
245 static constexpr int kArrayOffset = 2;
246 static constexpr int kLastMatchOffset = 3;
247 static constexpr int kArrayEntriesPerCacheEntry = 4;
248};
249
250// Caches results of RegExpPrototypeMatch when:
251// - the subject is a SlicedString
252// - the pattern is an ATOM type regexp.
253//
254// This is intended for usage patterns where we search ever-growing slices of
255// some large string. After a cache hit, RegExpMatchGlobalAtom only needs to
256// process the trailing part of the subject string that was *not* part of the
257// cached SlicedString.
258//
259// For example:
260//
261// long_string.substring(0, 100).match(pattern);
262// long_string.substring(0, 200).match(pattern);
263//
264// The second call hits the cache for the slice [0, 100[ and only has to search
265// the slice [100, 200].
267 public:
268 static void TryInsert(Isolate* isolate, Tagged<String> subject,
269 Tagged<String> pattern, int number_of_matches,
270 int last_match_index);
271 static bool TryGet(Isolate* isolate, Tagged<String> subject,
272 Tagged<String> pattern, int* number_of_matches_out,
273 int* last_match_index_out);
274 static void Clear(Heap* heap);
275
276 private:
277 static constexpr int kSubjectIndex = 0; // SlicedString.
278 static constexpr int kPatternIndex = 1; // String.
279 static constexpr int kNumberOfMatchesIndex = 2; // Smi.
280 static constexpr int kLastMatchIndexIndex = 3; // Smi.
281 static constexpr int kEntrySize = 4;
282
283 public:
284 static constexpr int kSize = kEntrySize; // Single-entry cache.
285};
286
287} // namespace internal
288} // namespace v8
289
290#endif // V8_REGEXP_REGEXP_H_
int32_t * LastSuccessfulMatch() const
Definition regexp.cc:1292
RegExpGlobalExecRunner(DirectHandle< RegExpData > regexp_data, DirectHandle< String > subject, Isolate *isolate)
Definition regexp.cc:1143
DirectHandle< String > subject_
Definition regexp.h:214
DirectHandle< RegExpData > regexp_data_
Definition regexp.h:213
int AdvanceZeroLength(int last_index) const
Definition regexp.cc:1208
RegExpResultVectorScope result_vector_scope_
Definition regexp.h:206
static bool TryGet(Isolate *isolate, Tagged< String > subject, Tagged< String > pattern, int *number_of_matches_out, int *last_match_index_out)
Definition regexp.cc:1421
static void TryInsert(Isolate *isolate, Tagged< String > subject, Tagged< String > pattern, int number_of_matches, int last_match_index)
Definition regexp.cc:1403
static constexpr int kPatternOffset
Definition regexp.h:244
static void Clear(Tagged< FixedArray > cache)
Definition regexp.cc:1396
static constexpr int kLastMatchOffset
Definition regexp.h:246
static void Enter(Isolate *isolate, DirectHandle< String > key_string, DirectHandle< Object > key_pattern, DirectHandle< FixedArray > value_array, DirectHandle< FixedArray > last_match_cache, ResultsCacheType type)
Definition regexp.cc:1335
static constexpr int kRegExpResultsCacheSize
Definition regexp.h:240
static constexpr int kStringOffset
Definition regexp.h:243
static constexpr int kArrayEntriesPerCacheEntry
Definition regexp.h:247
static constexpr int kArrayOffset
Definition regexp.h:245
static Tagged< Object > Lookup(Heap *heap, Tagged< String > key_string, Tagged< Object > key_pattern, Tagged< FixedArray > *last_match_out, ResultsCacheType type)
Definition regexp.cc:1301
static constexpr int kInternalRegExpException
Definition regexp.h:136
static V8_EXPORT_PRIVATE bool CompileForTesting(Isolate *isolate, Zone *zone, RegExpCompileData *input, RegExpFlags flags, DirectHandle< String > pattern, DirectHandle< String > sample_subject, bool is_one_byte)
Definition regexp.cc:952
V8_EXPORT_PRIVATE static V8_WARN_UNUSED_RESULT std::optional< int > ExperimentalOneshotExec(Isolate *isolate, DirectHandle< JSRegExp > regexp, DirectHandle< String > subject, int index, int32_t *result_offsets_vector, uint32_t result_offsets_vector_length)
Definition regexp.cc:320
static DirectHandle< RegExpMatchInfo > SetLastMatchInfo(Isolate *isolate, DirectHandle< RegExpMatchInfo > last_match_info, DirectHandle< String > subject, int capture_count, int32_t *match)
Definition regexp.cc:896
static V8_EXPORT_PRIVATE intptr_t AtomExecRaw(Isolate *isolate, Address data_address, Address subject_address, int32_t index, int32_t *result_offsets_vector, int32_t result_offsets_vector_length)
Definition regexp.cc:492
static bool VerifySyntax(Zone *zone, uintptr_t stack_limit, const CharT *input, int input_length, RegExpFlags flags, RegExpError *regexp_error_out, const DisallowGarbageCollection &no_gc)
Definition regexp.cc:124
V8_EXPORT_PRIVATE static V8_WARN_UNUSED_RESULT std::optional< int > Exec(Isolate *isolate, DirectHandle< JSRegExp > regexp, DirectHandle< String > subject, int index, int32_t *result_offsets_vector, uint32_t result_offsets_vector_length)
Definition regexp.cc:332
V8_EXPORT_PRIVATE static V8_WARN_UNUSED_RESULT MaybeDirectHandle< Object > Exec_Single(Isolate *isolate, DirectHandle< JSRegExp > regexp, DirectHandle< String > subject, int index, DirectHandle< RegExpMatchInfo > last_match_info)
Definition regexp.cc:358
static V8_EXPORT_PRIVATE void DotPrintForTesting(const char *label, RegExpNode *node)
Definition regexp.cc:926
static constexpr int kInternalRegExpSmallestResult
Definition regexp.h:139
static bool CanGenerateBytecode()
Definition regexp.cc:112
static DirectHandle< FixedArray > CreateCaptureNameMap(Isolate *isolate, ZoneVector< RegExpCapture * > *named_captures)
Definition regexp.cc:588
static constexpr int kInternalRegExpSuccess
Definition regexp.h:135
static V8_EXPORT_PRIVATE bool VerifyFlags(RegExpFlags flags)
Definition regexp.cc:117
static V8_WARN_UNUSED_RESULT MaybeDirectHandle< Object > ThrowRegExpException(Isolate *isolate, RegExpFlags flags, DirectHandle< String > pattern, RegExpError error)
Definition regexp.cc:143
static bool IsUnmodifiedRegExp(Isolate *isolate, DirectHandle< JSRegExp > regexp)
Definition regexp.cc:166
static const int kRegExpTooLargeToOptimize
Definition regexp.h:163
static V8_WARN_UNUSED_RESULT bool EnsureFullyCompiled(Isolate *isolate, DirectHandle< RegExpData > re_data, DirectHandle< String > subject)
Definition regexp.cc:294
static constexpr int kInternalRegExpRetry
Definition regexp.h:137
static constexpr int kInternalRegExpFallbackToExperimental
Definition regexp.h:138
static constexpr int kInternalRegExpFailure
Definition regexp.h:134
static V8_WARN_UNUSED_RESULT MaybeDirectHandle< Object > Compile(Isolate *isolate, DirectHandle< JSRegExp > re, DirectHandle< String > pattern, RegExpFlags flags, uint32_t backtrack_limit)
Definition regexp.cc:200
Label label
std::string pattern
refactor address components for immediate indexing make OptimizeMaglevOnNextCall optimize to turbofan instead of maglev filter for tracing turbofan compilation trace turbo cfg trace TurboFan s graph trimmer trace TurboFan s control equivalence trace TurboFan s register allocator trace stack load store counters for optimized code in run fuzzing &&concurrent_recompilation trace_turbo trace_turbo_scheduled trace_turbo_stack_accesses verify TurboFan machine graph of code stubs enable FixedArray bounds checks print TurboFan statistics of wasm compilations maximum cumulative size of bytecode considered for inlining scale factor of bytecode size used to calculate the inlining budget * KB
Definition flags.cc:1366
RegExpCompilationTarget
Definition regexp.h:27
#define DCHECK_NE(v1, v2)
Definition logging.h:486
#define V8_EXPORT_PRIVATE
Definition macros.h:460
ZoneVector< RegExpCapture * > * named_captures
Definition regexp.h:52
DirectHandle< Object > code
Definition regexp.h:40
RegExpCompilationTarget compilation_target
Definition regexp.h:69
#define V8_WARN_UNUSED_RESULT
Definition v8config.h:671