v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
builtins-regexp-gen.h
Go to the documentation of this file.
1// Copyright 2017 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_BUILTINS_BUILTINS_REGEXP_GEN_H_
6#define V8_BUILTINS_BUILTINS_REGEXP_GEN_H_
7
8#include <optional>
9
12#include "src/objects/string.h"
13#include "src/regexp/regexp.h"
14
15namespace v8 {
16namespace internal {
17
19 public:
22
25
26 // Allocate either a JSRegExpResult or a JSRegExpResultWithIndices (depending
27 // on has_indices) with the given length (the number of captures, including
28 // the match itself), index (the index where the match starts), and input
29 // string.
31 TNode<Context> context, TNode<Smi> length, TNode<Smi> index,
32 TNode<String> input, TNode<JSRegExp> regexp, TNode<Number> last_index,
33 TNode<BoolT> has_indices, TNode<FixedArray>* elements_out = nullptr);
34
40
43 TNode<Object> value);
44
47
48 // Loads {var_string_start} and {var_string_end} with the corresponding
49 // offsets into the given {string_data}.
51 TNode<IntPtrT> last_index,
52 TNode<IntPtrT> string_length,
53 String::Encoding encoding,
54 TVariable<RawPtrT>* var_string_start,
55 TVariable<RawPtrT>* var_string_end);
56
57 // Returns the vector and whether the returned vector was dynamically
58 // allocated. Both must be passed to FreeRegExpResultVector when done,
59 // even for exceptional control flow.
60 std::pair<TNode<RawPtrT>, TNode<BoolT>> LoadOrAllocateRegExpResultVector(
61 TNode<Smi> register_count);
62 void FreeRegExpResultVector(TNode<RawPtrT> result_vector,
63 TNode<BoolT> is_dynamic);
64
66 TNode<Context> context, TNode<RegExpMatchInfo> match_info,
67 TNode<Smi> register_count, TNode<String> subject,
68 TNode<RawPtrT> result_offsets_vector);
69
70 // Low level logic around the actual call into pattern matching code.
71 //
72 // TODO(jgruber): Callers that either 1. don't need the RegExpMatchInfo, or
73 // 2. need multiple matches, should switch to the new API which passes
74 // results via an offsets vector and allows returning multiple matches per
75 // call. See RegExpExecInternal_Batched.
77 TNode<JSRegExp> regexp,
78 TNode<String> string,
79 TNode<Number> last_index,
80 Label* if_not_matched);
81
82 // This is the new API which makes it possible to use the global irregexp
83 // execution mode from within CSA.
84 //
85 // - The result_offsets_vector must be managed by callers.
86 // - This returns the number of matches. Callers must initialize the
87 // RegExpMatchInfo as needed.
88 // - Subtle: The engine signals 'end of matches' (i.e. there is no further
89 // match in the string past the last match contained in the
90 // result_offsets_vector) by returning fewer matches than the
91 // result_offsets_vector capacity. For example, if the vector could fit 10
92 // matches, but we return '9', then all matches have been found.
93 // - Subtle: The above point requires that all implementations ALWAYS return
94 // the maximum number of matches they can.
97 TNode<String> string, TNode<Number> last_index,
98 TNode<RawPtrT> result_offsets_vector,
99 TNode<Int32T> result_offsets_vector_length);
100
103 TNode<String> string, TNode<Smi> last_index,
104 TNode<RawPtrT> result_offsets_vector,
105 TNode<Int32T> result_offsets_vector_length);
106
107 // This is a wrapper around using the global irregexp mode, i.e. the mode in
108 // which a single call into irregexp may return multiple matches. The
109 // once_per_batch function is called once after each irregexp call, and
110 // once_per_match is called once per single match.
111 using OncePerBatchFunction = std::function<void(TNode<IntPtrT>)>;
113 std::function<void(TNode<RawPtrT>, TNode<Int32T>, TNode<Int32T>)>;
115 TNode<Context> context, TNode<JSRegExp> regexp, TNode<String> subject,
116 TNode<RegExpData> data, const VariableList& merge_vars,
117 OncePerBatchFunction once_per_batch, OncePerMatchFunction once_per_match);
118
120 TNode<Context> context, TNode<JSRegExp> regexp,
121 TNode<RegExpMatchInfo> match_info, TNode<String> string,
122 TNode<Number> last_index);
123
124 // Fast path check logic.
125 //
126 // Are you afraid? If not, you should be.
127 //
128 // It's complicated. Fast path checks protect certain assumptions, e.g. that
129 // relevant properties on the regexp prototype (such as exec, @@split, global)
130 // are unmodified.
131 //
132 // These assumptions differ by callsite. For example, RegExpPrototypeExec
133 // cares whether the exec property has been modified; but it's totally fine
134 // to modify other prototype properties. On the other hand,
135 // StringPrototypeSplit does care very much whether @@split has been changed.
136 //
137 // We want to keep regexp execution on the fast path as much as possible.
138 // Ideally, we could simply check if the regexp prototype has been modified;
139 // yet common web frameworks routinely mutate it for various reasons. But most
140 // of these mutations should happen in a way that still allows us to remain
141 // on the fast path. To support this, the fast path check logic necessarily
142 // becomes more involved.
143 //
144 // There are multiple knobs to twiddle for regexp fast path checks. We support
145 // checks that completely ignore the prototype, checks that verify specific
146 // properties on the prototype (the caller must ensure it passes in the right
147 // ones), and strict checks that additionally ensure the prototype is
148 // unchanged (we use these when we'd have to check multiple properties we
149 // don't care too much about, e.g. all individual flag getters).
150
153
155 TNode<Context> context, TNode<HeapObject> object, TNode<Map> map,
156 PrototypeCheckAssembler::Flags prototype_check_flags,
157 std::optional<DescriptorIndexNameValue> additional_property_to_check,
158 Label* if_isunmodified, Label* if_ismodified);
159
161 TNode<HeapObject> object,
162 Label* if_isunmodified,
163 Label* if_ismodified);
165 TNode<HeapObject> object,
166 Label* if_isunmodified, Label* if_ismodified);
167
168 // Strict: Does not tolerate any changes to the prototype map.
169 // Permissive: Allows changes to the prototype map except for the exec
170 // property.
172 TNode<HeapObject> object,
173 Label* if_isunmodified, Label* if_ismodified);
175 TNode<HeapObject> object,
176 Label* if_isunmodified,
177 Label* if_ismodified);
178
179 // Performs fast path checks on the given object itself, but omits prototype
180 // checks.
182 TNode<Object> object);
184 TNode<Object> object, TNode<Map> map);
185
186 void BranchIfRegExpResult(const TNode<Context> context,
187 const TNode<Object> object, Label* if_isunmodified,
188 Label* if_ismodified);
189
191 const bool is_fastpath);
192
193 TNode<BoolT> FastFlagGetter(TNode<JSRegExp> regexp, JSRegExp::Flag flag);
195 return FastFlagGetter(regexp, JSRegExp::kGlobal);
196 }
198 return FastFlagGetter(regexp, JSRegExp::kUnicode);
199 }
201 return FastFlagGetter(regexp, JSRegExp::kUnicodeSets);
202 }
204 JSRegExp::Flag flag);
205
207 const TNode<JSRegExp> regexp,
208 const TNode<Object> maybe_pattern,
209 const TNode<Object> maybe_flags);
210
212 TNode<BoolT> is_unicode, bool is_fastpath);
213
215 TNode<BoolT> is_unicode) {
216 return CAST(AdvanceStringIndex(string, index, is_unicode, true));
217 }
218
220 TNode<BoolT> is_unicode) {
221 return CAST(AdvanceStringIndex(string, index, is_unicode, false));
222 }
223
225 TNode<JSRegExp> regexp,
226 TNode<String> string,
227 TNode<Smi> limit);
228
230 TNode<JSRegExp> regexp,
231 TNode<String> subject,
232 TNode<RegExpData> data);
234 TNode<String> to_string,
235 TNode<String> from_string,
236 TNode<Smi> slice_start, TNode<Smi> slice_end);
238 TNode<JSRegExp> regexp,
239 TNode<String> subject,
241 TNode<String> replace_string);
242};
243
255
256} // namespace internal
257} // namespace v8
258
259#endif // V8_BUILTINS_BUILTINS_REGEXP_GEN_H_
TNode< String > RegExpReplaceGlobalSimpleString(TNode< Context > context, TNode< JSRegExp > regexp, TNode< String > subject, TNode< RegExpData > data, TNode< String > replace_string)
TNode< Object > FastLoadLastIndexBeforeSmiCheck(TNode< JSRegExp > regexp)
TNode< UintPtrT > RegExpExecAtom(TNode< Context > context, TNode< AtomRegExpData > data, TNode< String > string, TNode< Smi > last_index, TNode< RawPtrT > result_offsets_vector, TNode< Int32T > result_offsets_vector_length)
std::pair< TNode< RawPtrT >, TNode< BoolT > > LoadOrAllocateRegExpResultVector(TNode< Smi > register_count)
void BranchIfRegExpResult(const TNode< Context > context, const TNode< Object > object, Label *if_isunmodified, Label *if_ismodified)
TNode< Union< Null, JSArray > > RegExpMatchGlobal(TNode< Context > context, TNode< JSRegExp > regexp, TNode< String > subject, TNode< RegExpData > data)
TNode< UintPtrT > RegExpExecInternal(TNode< Context > context, TNode< JSRegExp > regexp, TNode< RegExpData > data, TNode< String > string, TNode< Number > last_index, TNode< RawPtrT > result_offsets_vector, TNode< Int32T > result_offsets_vector_length)
void BranchIfFastRegExpForMatch(TNode< Context > context, TNode< HeapObject > object, Label *if_isunmodified, Label *if_ismodified)
TNode< JSRegExpResult > ConstructNewResultFromMatchInfo(TNode< Context > context, TNode< JSRegExp > regexp, TNode< RegExpMatchInfo > match_info, TNode< String > string, TNode< Number > last_index)
void FastStoreLastIndex(TNode< JSRegExp > regexp, TNode< Smi > value)
TNode< BoolT > SlowFlagGetter(TNode< Context > context, TNode< JSAny > regexp, JSRegExp::Flag flag)
TNode< Smi > FastLoadLastIndex(TNode< JSRegExp > regexp)
TNode< Object > RegExpInitialize(const TNode< Context > context, const TNode< JSRegExp > regexp, const TNode< Object > maybe_pattern, const TNode< Object > maybe_flags)
TNode< BoolT > FastFlagGetterGlobal(TNode< JSRegExp > regexp)
TNode< Smi > AdvanceStringIndexSlow(TNode< String > string, TNode< Number > index, TNode< BoolT > is_unicode)
std::function< void(TNode< IntPtrT >)> OncePerBatchFunction
TNode< JSAny > SlowLoadLastIndex(TNode< Context > context, TNode< JSAny > regexp)
TNode< BoolT > FastFlagGetterUnicode(TNode< JSRegExp > regexp)
TNode< RegExpMatchInfo > RegExpExecInternal_Single(TNode< Context > context, TNode< JSRegExp > regexp, TNode< String > string, TNode< Number > last_index, Label *if_not_matched)
void FreeRegExpResultVector(TNode< RawPtrT > result_vector, TNode< BoolT > is_dynamic)
TNode< Smi > LoadCaptureCount(TNode< RegExpData > data)
TNode< String > FlagsGetter(TNode< Context > context, TNode< JSAny > regexp, const bool is_fastpath)
void BranchIfFastRegExp_Strict(TNode< Context > context, TNode< HeapObject > object, Label *if_isunmodified, Label *if_ismodified)
TNode< BoolT > IsFastRegExpNoPrototype(TNode< Context > context, TNode< Object > object)
void BranchIfFastRegExp(TNode< Context > context, TNode< HeapObject > object, TNode< Map > map, PrototypeCheckAssembler::Flags prototype_check_flags, std::optional< DescriptorIndexNameValue > additional_property_to_check, Label *if_isunmodified, Label *if_ismodified)
TNode< JSRegExpResult > AllocateRegExpResult(TNode< Context > context, TNode< Smi > length, TNode< Smi > index, TNode< String > input, TNode< JSRegExp > regexp, TNode< Number > last_index, TNode< BoolT > has_indices, TNode< FixedArray > *elements_out=nullptr)
std::function< void(TNode< RawPtrT >, TNode< Int32T >, TNode< Int32T >)> OncePerMatchFunction
TNode< RegExpMatchInfo > InitializeMatchInfoFromRegisters(TNode< Context > context, TNode< RegExpMatchInfo > match_info, TNode< Smi > register_count, TNode< String > subject, TNode< RawPtrT > result_offsets_vector)
TNode< BoolT > FastFlagGetter(TNode< JSRegExp > regexp, JSRegExp::Flag flag)
TNode< Smi > RegistersForCaptureCount(TNode< Smi > capture_count)
void BranchIfFastRegExp_Permissive(TNode< Context > context, TNode< HeapObject > object, Label *if_isunmodified, Label *if_ismodified)
TNode< String > AppendStringSlice(TNode< Context > context, TNode< String > to_string, TNode< String > from_string, TNode< Smi > slice_start, TNode< Smi > slice_end)
TNode< BoolT > FastFlagGetterUnicodeSets(TNode< JSRegExp > regexp)
void SlowStoreLastIndex(TNode< Context > context, TNode< JSAny > regexp, TNode< Object > value)
TNode< Number > AdvanceStringIndex(TNode< String > string, TNode< Number > index, TNode< BoolT > is_unicode, bool is_fastpath)
TNode< IntPtrT > RegExpExecInternal_Batched(TNode< Context > context, TNode< JSRegExp > regexp, TNode< String > subject, TNode< RegExpData > data, const VariableList &merge_vars, OncePerBatchFunction once_per_batch, OncePerMatchFunction once_per_match)
RegExpBuiltinsAssembler(compiler::CodeAssemblerState *state)
TNode< Smi > AdvanceStringIndexFast(TNode< String > string, TNode< Smi > index, TNode< BoolT > is_unicode)
TNode< JSArray > RegExpPrototypeSplitBody(TNode< Context > context, TNode< JSRegExp > regexp, TNode< String > string, TNode< Smi > limit)
void BranchIfFastRegExpForSearch(TNode< Context > context, TNode< HeapObject > object, Label *if_isunmodified, Label *if_ismodified)
void GetStringPointers(TNode< RawPtrT > string_data, TNode< IntPtrT > offset, TNode< IntPtrT > last_index, TNode< IntPtrT > string_length, String::Encoding encoding, TVariable< RawPtrT > *var_string_start, TVariable< RawPtrT > *var_string_end)
RegExpMatchAllAssembler(compiler::CodeAssemblerState *state)
TNode< JSAny > CreateRegExpStringIterator(TNode< NativeContext > native_context, TNode< JSAny > regexp, TNode< String > string, TNode< BoolT > global, TNode< BoolT > full_unicode)
#define CAST(x)
int32_t offset
!IsContextMap !IsContextMap native_context
Definition map-inl.h:877