v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
experimental.cc
Go to the documentation of this file.
1// Copyright 2020 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
6
7#include <optional>
8
15#include "src/utils/ostreams.h"
16
17namespace v8::internal {
18
21 RegExpFlags flags, int capture_count) {
22 DCHECK(v8_flags.enable_experimental_regexp_engine ||
23 v8_flags.enable_experimental_regexp_engine_on_excessive_backtracks);
24 bool can_be_handled =
25 ExperimentalRegExpCompiler::CanBeHandled(tree, flags, capture_count);
26 if (!can_be_handled && v8_flags.trace_experimental_regexp_engine) {
27 StdoutStream{} << "Pattern not supported by experimental engine: "
28 << pattern << std::endl;
29 }
30 return can_be_handled;
31}
32
35 RegExpFlags flags, int capture_count) {
36 DCHECK(v8_flags.enable_experimental_regexp_engine);
37 if (v8_flags.trace_experimental_regexp_engine) {
38 StdoutStream{} << "Initializing experimental regexp " << *source
39 << std::endl;
40 }
41
42 isolate->factory()->SetRegExpExperimentalData(
43 re, source, JSRegExp::AsJSRegExpFlags(flags), capture_count);
44}
45
47 Isolate* isolate) {
48 DCHECK(v8_flags.enable_experimental_regexp_engine);
49 DCHECK_EQ(re_data->type_tag(), RegExpData::Type::EXPERIMENTAL);
50#ifdef VERIFY_HEAP
51 if (v8_flags.verify_heap) re_data->IrRegExpDataVerify(isolate);
52#endif
53
54 static constexpr bool kIsLatin1 = true;
55 return re_data->has_bytecode(kIsLatin1);
56}
57
58template <class T>
60 base::Vector<T> data) {
61 static_assert(std::is_trivial_v<T>);
62
63 int byte_length = sizeof(T) * data.length();
65 isolate->factory()->NewTrustedByteArray(byte_length);
67 MemCopy(byte_array->begin(), data.begin(), byte_length);
68 return byte_array;
69}
70
71namespace {
72
73struct CompilationResult {
74 DirectHandle<TrustedByteArray> bytecode;
75 DirectHandle<FixedArray> capture_name_map;
76};
77
78// Compiles source pattern, but doesn't change the regexp object.
79std::optional<CompilationResult> CompileImpl(
80 Isolate* isolate, DirectHandle<IrRegExpData> re_data) {
81 Zone zone(isolate->allocator(), ZONE_NAME);
82
83 DirectHandle<String> source(re_data->source(), isolate);
84
85 // Parse and compile the regexp source.
86 RegExpCompileData parse_result;
87 DCHECK(!isolate->has_exception());
88
89 RegExpFlags flags = JSRegExp::AsRegExpFlags(re_data->flags());
90 bool parse_success = RegExpParser::ParseRegExpFromHeapString(
91 isolate, &zone, source, flags, &parse_result);
92 if (!parse_success) {
93 // The pattern was already parsed successfully during initialization, so
94 // the only way parsing can fail now is because of stack overflow.
95 DCHECK_EQ(parse_result.error, RegExpError::kStackOverflow);
96 USE(RegExp::ThrowRegExpException(isolate, flags, source,
97 parse_result.error));
98 return std::nullopt;
99 }
100
101 ZoneList<RegExpInstruction> bytecode = ExperimentalRegExpCompiler::Compile(
102 parse_result.tree, JSRegExp::AsRegExpFlags(re_data->flags()), &zone);
103
104 CompilationResult result;
105 result.bytecode = VectorToByteArray(isolate, bytecode.ToVector());
106 result.capture_name_map =
107 RegExp::CreateCaptureNameMap(isolate, parse_result.named_captures);
108 return result;
109}
110
111} // namespace
112
115 DCHECK(v8_flags.enable_experimental_regexp_engine);
116 DCHECK_EQ(re_data->type_tag(), RegExpData::Type::EXPERIMENTAL);
117#ifdef VERIFY_HEAP
118 if (v8_flags.verify_heap) re_data->IrRegExpDataVerify(isolate);
119#endif
120
121 DirectHandle<String> source(re_data->source(), isolate);
122 if (v8_flags.trace_experimental_regexp_engine) {
123 StdoutStream{} << "Compiling experimental regexp " << *source << std::endl;
124 }
125
126 std::optional<CompilationResult> compilation_result =
127 CompileImpl(isolate, re_data);
128 if (!compilation_result.has_value()) {
129 DCHECK(isolate->has_exception());
130 return false;
131 }
132
133 re_data->SetBytecodeForExperimental(isolate, *compilation_result->bytecode);
134 re_data->set_capture_name_map(compilation_result->capture_name_map);
135
136 return true;
137}
138
140 Tagged<TrustedByteArray> raw_bytes) {
141 RegExpInstruction* inst_begin =
142 reinterpret_cast<RegExpInstruction*>(raw_bytes->begin());
143 int inst_num = raw_bytes->length() / sizeof(RegExpInstruction);
144 DCHECK_EQ(sizeof(RegExpInstruction) * inst_num, raw_bytes->length());
145 return base::Vector<RegExpInstruction>(inst_begin, inst_num);
146}
147
148namespace {
149
150int32_t ExecRawImpl(Isolate* isolate, RegExp::CallOrigin call_origin,
152 int capture_count, int32_t* output_registers,
153 int32_t output_register_count, int32_t subject_index) {
155 // TODO(cbruni): remove once gcmole is fixed.
156 DisableGCMole no_gc_mole;
157
158 int register_count_per_match =
160
161 int32_t result;
162 DCHECK(subject->IsFlat());
163 Zone zone(isolate->allocator(), ZONE_NAME);
165 isolate, call_origin, bytecode, register_count_per_match, subject,
166 subject_index, output_registers, output_register_count, &zone);
167 return result;
168}
169
170} // namespace
171
172// Returns the number of matches.
175 Tagged<IrRegExpData> regexp_data,
176 Tagged<String> subject,
177 int32_t* output_registers,
178 int32_t output_register_count,
179 int32_t subject_index) {
180 CHECK(v8_flags.enable_experimental_regexp_engine);
182
183 if (v8_flags.trace_experimental_regexp_engine) {
184 StdoutStream{} << "Executing experimental regexp " << regexp_data->source()
185 << std::endl;
186 }
187
188 static constexpr bool kIsLatin1 = true;
189 Tagged<TrustedByteArray> bytecode = regexp_data->bytecode(kIsLatin1);
190
191 return ExecRawImpl(isolate, call_origin, bytecode, subject,
192 regexp_data->capture_count(), output_registers,
193 output_register_count, subject_index);
194}
195
197 Address subject, int32_t start_position, Address input_start,
198 Address input_end, int* output_registers, int32_t output_register_count,
199 RegExp::CallOrigin call_origin, Isolate* isolate, Address regexp_data) {
200 DCHECK(v8_flags.enable_experimental_regexp_engine);
201 DCHECK_NOT_NULL(isolate);
202 DCHECK_NOT_NULL(output_registers);
204
206 DisallowJavascriptExecution no_js(isolate);
207 DisallowHandleAllocation no_handles;
209
210 Tagged<String> subject_string = Cast<String>(Tagged<Object>(subject));
211
212 Tagged<IrRegExpData> regexp_data_obj =
214
215 return ExecRaw(isolate, RegExp::kFromJs, regexp_data_obj, subject_string,
216 output_registers, output_register_count, start_position);
217}
218
219// static
220std::optional<int> ExperimentalRegExp::Exec(
221 Isolate* isolate, DirectHandle<IrRegExpData> regexp_data,
222 DirectHandle<String> subject, int index, int32_t* result_offsets_vector,
223 uint32_t result_offsets_vector_length) {
224 DCHECK(v8_flags.enable_experimental_regexp_engine);
225 DCHECK_EQ(regexp_data->type_tag(), RegExpData::Type::EXPERIMENTAL);
226#ifdef VERIFY_HEAP
227 if (v8_flags.verify_heap) regexp_data->IrRegExpDataVerify(isolate);
228#endif
229
230 if (!IsCompiled(regexp_data, isolate) && !Compile(isolate, regexp_data)) {
231 DCHECK(isolate->has_exception());
232 return {};
233 }
234
235 DCHECK(IsCompiled(regexp_data, isolate));
236
237 subject = String::Flatten(isolate, subject);
238
239 DCHECK_GE(result_offsets_vector_length,
240 JSRegExp::RegistersForCaptureCount(regexp_data->capture_count()));
241
242 do {
243 int num_matches =
244 ExecRaw(isolate, RegExp::kFromRuntime, *regexp_data, *subject,
245 result_offsets_vector, result_offsets_vector_length, index);
246
247 if (num_matches > 0) {
249 regexp_data->capture_count()),
250 result_offsets_vector_length);
251 return num_matches;
252 } else if (num_matches == 0) {
253 return num_matches;
254 } else {
255 DCHECK_LT(num_matches, 0);
256 if (num_matches == RegExp::kInternalRegExpRetry) {
257 // Re-run execution.
258 continue;
259 }
260 DCHECK(isolate->has_exception());
261 return {};
262 }
263 } while (true);
264 UNREACHABLE();
265}
266
268 Isolate* isolate, DirectHandle<IrRegExpData> regexp_data,
269 DirectHandle<String> subject, int32_t* output_registers,
270 int32_t output_register_count, int32_t subject_index) {
271 CHECK(v8_flags.enable_experimental_regexp_engine_on_excessive_backtracks);
272
273 if (v8_flags.trace_experimental_regexp_engine) {
274 StdoutStream{} << "Experimental execution (oneshot) of regexp "
275 << regexp_data->source() << std::endl;
276 }
277
278 std::optional<CompilationResult> compilation_result =
279 CompileImpl(isolate, regexp_data);
280 if (!compilation_result.has_value()) return RegExp::kInternalRegExpException;
281
283 return ExecRawImpl(isolate, RegExp::kFromRuntime,
284 *compilation_result->bytecode, *subject,
285 regexp_data->capture_count(), output_registers,
286 output_register_count, subject_index);
287}
288
290 Isolate* isolate, DirectHandle<IrRegExpData> regexp_data,
291 DirectHandle<String> subject, int subject_index,
292 int32_t* result_offsets_vector, uint32_t result_offsets_vector_length) {
293 DCHECK(v8_flags.enable_experimental_regexp_engine_on_excessive_backtracks);
294
295 do {
296 int num_matches =
297 OneshotExecRaw(isolate, regexp_data, subject, result_offsets_vector,
298 result_offsets_vector_length, subject_index);
299
300 if (num_matches > 0) {
302 regexp_data->capture_count()),
303 result_offsets_vector_length);
304 return num_matches;
305 } else if (num_matches == 0) {
306 return num_matches;
307 } else {
308 DCHECK_LT(num_matches, 0);
309 if (num_matches == RegExp::kInternalRegExpRetry) {
310 // Re-run execution.
311 continue;
312 }
313 DCHECK(isolate->has_exception());
314 return {};
315 }
316 } while (true);
317 UNREACHABLE();
318}
319
320} // namespace v8::internal
friend Zone
Definition asm-types.cc:195
#define T
static ZoneList< RegExpInstruction > Compile(RegExpTree *tree, RegExpFlags flags, Zone *zone)
static bool CanBeHandled(RegExpTree *tree, RegExpFlags flags, int capture_count)
static int FindMatches(Isolate *isolate, RegExp::CallOrigin call_origin, Tagged< TrustedByteArray > bytecode, int capture_count, Tagged< String > input, int start_index, int32_t *output_registers, int output_register_count, Zone *zone)
static int32_t ExecRaw(Isolate *isolate, RegExp::CallOrigin call_origin, Tagged< IrRegExpData > regexp_data, Tagged< String > subject, int32_t *output_registers, int32_t output_register_count, int32_t subject_index)
static void Initialize(Isolate *isolate, DirectHandle< JSRegExp > re, DirectHandle< String > pattern, RegExpFlags flags, int capture_count)
static int32_t MatchForCallFromJs(Address subject, int32_t start_position, Address input_start, Address input_end, int *output_registers, int32_t output_register_count, RegExp::CallOrigin call_origin, Isolate *isolate, Address regexp_data)
static bool CanBeHandled(RegExpTree *tree, DirectHandle< String > pattern, RegExpFlags flags, int capture_count)
static bool IsCompiled(DirectHandle< IrRegExpData > re_data, Isolate *isolate)
static V8_WARN_UNUSED_RESULT bool Compile(Isolate *isolate, DirectHandle< IrRegExpData > re_data)
static std::optional< int > Exec(Isolate *isolate, DirectHandle< IrRegExpData > regexp_data, DirectHandle< String > subject, int index, int32_t *result_offsets_vector, uint32_t result_offsets_vector_length)
static std::optional< int > OneshotExec(Isolate *isolate, DirectHandle< IrRegExpData > regexp_data, DirectHandle< String > subject, int index, int32_t *result_offsets_vector, uint32_t result_offsets_vector_length)
static int32_t OneshotExecRaw(Isolate *isolate, DirectHandle< IrRegExpData > regexp_data, DirectHandle< String > subject, int32_t *output_registers, int32_t output_register_count, int32_t subject_index)
static constexpr RegExpFlags AsRegExpFlags(Flags f)
Definition js-regexp.h:57
static constexpr Flags AsJSRegExpFlags(RegExpFlags f)
Definition js-regexp.h:54
static constexpr int RegistersForCaptureCount(int count)
Definition js-regexp.h:90
static bool ParseRegExpFromHeapString(Isolate *isolate, Zone *zone, DirectHandle< String > input, RegExpFlags flags, RegExpCompileData *result)
static constexpr int kInternalRegExpException
Definition regexp.h:136
static DirectHandle< FixedArray > CreateCaptureNameMap(Isolate *isolate, ZoneVector< RegExpCapture * > *named_captures)
Definition regexp.cc:588
static V8_WARN_UNUSED_RESULT MaybeDirectHandle< Object > ThrowRegExpException(Isolate *isolate, RegExpFlags flags, DirectHandle< String > pattern, RegExpError error)
Definition regexp.cc:143
static constexpr int kInternalRegExpRetry
Definition regexp.h:137
static V8_INLINE HandleType< String > Flatten(Isolate *isolate, HandleType< T > string, AllocationType allocation=AllocationType::kYoung)
DirectHandle< FixedArray > capture_name_map
std::string pattern
ZoneVector< RpoNumber > & result
InstructionOperand source
PerThreadAssertScopeDebugOnly< false, SAFEPOINTS_ASSERT, HEAP_ALLOCATION_ASSERT > DisallowGarbageCollection
Tagged(T object) -> Tagged< T >
PerThreadAssertScopeDebugOnly< false, GC_MOLE > DisableGCMole
base::Flags< RegExpFlag > RegExpFlags
DirectHandle< TrustedByteArray > VectorToByteArray(Isolate *isolate, base::Vector< T > data)
V8_EXPORT_PRIVATE FlagValues v8_flags
base::Vector< RegExpInstruction > AsInstructionSequence(Tagged< TrustedByteArray > raw_bytes)
void MemCopy(void *dest, const void *src, size_t size)
Definition memcopy.h:124
Tagged< To > Cast(Tagged< From > value, const v8::SourceLocation &loc=INIT_SOURCE_LOCATION_IN_DEBUG)
Definition casting.h:150
#define DCHECK_LE(v1, v2)
Definition logging.h:490
#define CHECK(condition)
Definition logging.h:124
#define DCHECK_NOT_NULL(val)
Definition logging.h:492
#define DCHECK_GE(v1, v2)
Definition logging.h:488
#define DCHECK(condition)
Definition logging.h:482
#define DCHECK_LT(v1, v2)
Definition logging.h:489
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
#define USE(...)
Definition macros.h:293
#define ZONE_NAME
Definition zone.h:22