v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
string-builder-optimizer.h
Go to the documentation of this file.
1// Copyright 2022 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_COMPILER_STRING_BUILDER_OPTIMIZER_H_
6#define V8_COMPILER_STRING_BUILDER_OPTIMIZER_H_
7
8#include <cstdint>
9#include <optional>
10#include <unordered_map>
11#include <vector>
12
13#include "src/base/macros.h"
19#include "src/compiler/node.h"
22
23namespace v8 {
24namespace internal {
25
26class Zone;
27
28namespace compiler {
29
30class JSGraphAssembler;
31class NodeOriginTable;
32class Schedule;
33class SourcePositionTable;
34class JSHeapBroker;
35
36// StringBuilderOptimizer aims at avoid ConsString for some loops that build
37// strings, and instead update a mutable over-allocated backing store, while
38// keeping a (mutable) SlicedString to the valid part of the backing store.
39//
40// StringBuilderOptimizer only does the analysis: it finds out which nodes could
41// benefit from this optimization. Then, EffectControlLinearizer actually
42// applies the optimization to the graph.
43//
44// A typical example of what the StringBuilderOptimizer can optimize is:
45//
46// let s = "";
47// for (...) {
48// s += "...";
49// }
50//
51// In general, for a series of concatenations to be optimized, they need:
52// - To start on a single initial concatenation
53// - All the concatenations in the string builder must have constant strings
54// or String.FromCharCode on their right-hand side.
55// - At least one of the concatenation must be in a loop.
56//
57// Because everything is nicer with a picture, here is one of what kind of
58// patterns the StringBuilderOptimizer tries to optimize:
59//
60// +--------+
61// |kLiteral|
62// +--------+
63// |
64// |
65// v
66// +-------------+ +--------+
67// |kStringConcat| <------- |kLiteral|
68// +-------------+ +--------+
69// |
70// |
71// v
72// optionally,
73// more kStringConcat
74// |
75// |
76// v
77// +----+
78// -------->|kPhi|------------------------------------------
79// | +----+ |
80// | | |
81// | | |
82// | | |
83// | | |
84// | | |
85// | | |
86// | | |
87// | v |
88// | +-------------+ +--------+ |
89// | |kStringConcat| <------- |kLiteral| |
90// | +-------------+ +--------+ |
91// | | |
92// | | |
93// | v |
94// | optionally, v
95// | more kStringConcat optionally,
96// | | more kStringConcat
97// | | or more kPhi/loops
98// | | |
99// ------------| |
100// |
101// |
102// v
103//
104// Where "kLiteral" actually means "either a string literal (HeapConstant) or a
105// StringFromSingleCharCode". And kStringConcat can also be kNewConsString (when
106// the size of the concatenation is known to be more than 13 bytes, Turbofan's
107// front-end generates kNewConsString opcodes rather than kStringConcat).
108// The StringBuilder also supports merge phis. For instance:
109//
110// +--------+
111// |kLiteral|
112// +--------+
113// |
114// |
115// v
116// +-------------+ +--------+
117// |kStringConcat| <------- |kLiteral|
118// +-------------+ +--------+
119// | |
120// | |
121// | |
122// +---------------+ +---------------+
123// | |
124// | |
125// v v
126// +-------------+ +-------------+
127// |kStringConcat| |kStringConcat|
128// +-------------+ +-------------+
129// | |
130// | |
131// | |
132// +---------------+ +---------------+
133// | |
134// | |
135// v v
136// +-------------+
137// | kPhi |
138// | (merge) |
139// +-------------+
140// |
141// |
142// v
143//
144// (and, of course, loops and merge can be mixed).
145
147 // The class OneOrTwoByteAnalysis is used to try to statically determine
148 // whether a string constant or StringFromSingleCharCode is a 1-byte or a
149 // 2-byte string.
150 // If we succeed to do this analysis for all of the nodes in a string builder,
151 // then we know statically whether this string builder is building a 1-byte or
152 // a 2-byte string, and we can optimize the generated code to remove all
153 // 1-byte/2-byte checks.
154 public:
156 : states_(graph->NodeCount(), State::kUnknown, zone), broker_(broker) {}
157
158 enum class State : uint8_t {
159 kUnknown, // Not yet determined if the string is 1 or 2-bytes
160 kOneByte, // Only 1-byte strings in the string builder
161 kTwoByte, // At least one 2-byte string in the string builder
162 kCantKnow // Cannot determine statically if the string will be 1 or 2-bytes
163
164 // Lattice of possible transitions:
165 //
166 // kUnknown
167 // / | \
168 // / | \
169 // v | \
170 // kOneByte | |
171 // | | | |
172 // | | | |
173 // | v v |
174 // | kTwoByte |
175 // | | /
176 // \ | /
177 // v v v
178 // kCantKnow
179 //
180 // Which means that for instance it's possible to realize that a kUnknown
181 // string builder will produce a 1-byte string, and we can later realize
182 // that it will instead be a 2-byte string. Or, we could be in kOneByte
183 // state, and then realize that the string may or may not end up being
184 // 2-byte, so we'll move to kCantKnow state.
185 };
186
187 // Computes and returns a State reflecting whether {node} is a 1-byte or
188 // 2-byte string.
189 State OneOrTwoByte(Node* node);
190
191 // Computes whether the string builder will be on 1-byte or 2-byte if it
192 // contains two nodes that have states {a} and {b}. For instance, if both {a}
193 // and {b} are kOneByte, ConcatResultIsOneOrTwoByte returns kOneByte.
195
196 private:
197 // Returns the positive integral range that {node} can take. If {node} can be
198 // negative or is not a number, returns nullopt. If the range exceeds 2**32,
199 // returns nullopt as well. The analysis of TryGetRange is not complete (some
200 // operators are ignored), so if {node} isn't handled, then nullopt is
201 // returned. If this function returns a range between 0 and 255, then we
202 // assume that calling StringFromSingleCharCode on {node} will produce a
203 // 1-byte string. The analysis is sound (it doesn't make mistake), but is not
204 // complete (it bails out (returns nullopt) on operators that are not
205 // handled).
206 std::optional<std::pair<int64_t, int64_t>> TryGetRange(Node* node);
207
209
212};
213
215 public:
218
219 // Returns true if some trimming code should be inserted at the beginning of
220 // {block} to finalize some string builders.
221 bool BlockShouldFinalizeStringBuilders(BasicBlock* block);
222 // Returns which nodes should be trimmed at the beginning of {block} to
223 // finalize some string builders.
224 ZoneVector<Node*> GetStringBuildersToFinalize(BasicBlock* block);
225
226 // Returns true if {node} is the last node of a StringBuilder (which means
227 // that trimming code should be inserted after {node}).
228 // Note that string builders cannot end in the middle of a loop (unless it was
229 // started in the same loop). The way it's enforced is that when we first
230 // visit a loop Phi that could be part of a String Builder, we set its status
231 // to State::kPendingPhi. Only once we've visited the whole loop and the
232 // backedge and that the use chain following the loop phi up to and including
233 // the backedge are valid as part of a String Builder, then the loop phi
234 // status is siwtched to State::kInStringBuilder. Then, in the final step
235 // where we switch the status to State::kConfirmedInStringBuilder, we ignore
236 // nodes that have a status that isn't kInStringBuilder, which means that we
237 // ignore loop phis that still have the kPendingPhi status (and their
238 // successors). The String Builders thus cannot end inside loops.
239 bool IsStringBuilderEnd(Node* node);
240 // Returns true if {node} is a the last node of a StringBuilder and is not a
241 // loop phi. The "loop phi" distinction matters, because trimming for loop
242 // phis is trickier (because we don't want to trim at every iteration of the
243 // loop, but only once after the loop).
244 bool IsNonLoopPhiStringBuilderEnd(Node* node);
245 // Returns true if {node} is the input of a concatenation that is part of a
246 // StringBuilder.
247 bool IsStringBuilderConcatInput(Node* node);
248 // Returns true if {node} is part of a StringBuilder.
249 bool ConcatIsInStringBuilder(Node* node);
250 // Returns true if {node} is the 1st node of a StringBuilder (which means that
251 // when lowering {node}, we should allocate and initialize everything for this
252 // particular StringBuilder).
253 bool IsFirstConcatInStringBuilder(Node* node);
254
255 // Returns an OneOrTwoByteAnalysis::State representing whether the
256 // StringBuilder that contains {node} is building a 1-byte or a 2-byte.
257 OneOrTwoByteAnalysis::State GetOneOrTwoByte(Node* node);
258
259 void Run();
260
261 JSGraph* jsgraph() const { return jsgraph_; }
262 TFGraph* graph() const { return jsgraph_->graph(); }
263 Schedule* schedule() const { return schedule_; }
264 Zone* temp_zone() const { return temp_zone_; }
265 JSHeapBroker* broker() const { return broker_; }
266
267 private:
268 enum class State : uint8_t {
269 kUnvisited = 0,
270 kBeginStringBuilder, // A (potential) beginning of a StringBuilder
271 kInStringBuilder, // A node that could be in a StringBuilder
272 kPendingPhi, // A phi that could be in a StringBuilder
273 kConfirmedInStringBuilder, // A node that is definitely in a StringBuilder
274 kEndStringBuilder, // A node that ends definitely a StringBuilder, and that
275 // can be trimmed right away
276 kEndStringBuilderLoopPhi, // A phi that ends a StringBuilder, and whose
277 // trimming need to be done at the beginning of
278 // the following blocks.
279 kInvalid, // A node that we visited and that we can't optimize.
280 kNumberOfState
281 };
282
283 struct Status {
284 int id; // The id of the StringBuilder that the node belongs to (or
285 // kInvalidId).
286 State state; // The state of the node.
287 };
288 static constexpr int kInvalidId = -1;
289
290 Status GetStatus(Node* node) const {
291 if (node->id() > status_.size()) {
292 return Status{kInvalidId, State::kInvalid};
293 } else {
294 return status_[node->id()];
295 }
296 }
297 void SetStatus(Node* node, State state, int id = kInvalidId) {
298 DCHECK_NE(state, State::kUnvisited);
299 DCHECK_IMPLIES(id != kInvalidId, state != State::kInvalid);
300 if (node->id() >= status_.size()) {
301 // We should really not allocate too many new nodes: the only new nodes we
302 // allocate are constant inputs of nodes in the string builder that have
303 // multiple uses. Thus, we use a slow exponential growth for {status_}.
304 constexpr double growth_factor = 1.1;
305 status_.resize(node->id() * growth_factor,
306 Status{kInvalidId, State::kUnvisited});
307 }
308 status_[node->id()] = Status{id, state};
309 }
310 void UpdateStatus(Node* node, State state) {
311 int id = state == State::kInvalid ? kInvalidId : GetStatus(node).id;
312 status_[node->id()] = Status{id, state};
313 }
314
321 const StringBuilder kInvalidStringBuilder = {
322 nullptr, kInvalidId, false, OneOrTwoByteAnalysis::State::kUnknown};
323
324#ifdef DEBUG
325 bool StringBuilderIsValid(StringBuilder string_builder) {
326 return string_builder.start != nullptr && string_builder.id != kInvalidId &&
327 string_builder.has_loop_phi;
328 }
329#endif
330
331 bool IsLoopPhi(Node* node) const {
332 return node->opcode() == IrOpcode::kPhi &&
333 schedule()->block(node)->IsLoopHeader();
334 }
335 bool LoopContains(Node* loop_phi, Node* node) {
336 DCHECK(IsLoopPhi(loop_phi));
337 return schedule()->block(loop_phi)->LoopContains(schedule()->block(node));
338 }
339
340 int GetStringBuilderIdForConcat(Node* node);
341 void ReplaceConcatInputIfNeeded(Node* node, int input_idx);
342 bool CheckNodeUses(Node* node, Node* concat_child, Status status);
343 bool CheckPreviousNodeUses(Node* child, Status status,
344 int input_if_loop_phi = 0);
345 int GetPhiPredecessorsCommonId(Node* node);
346
347 void FinalizeStringBuilders();
348 void VisitNode(Node* node, BasicBlock* block);
349 void VisitGraph();
350
351 static constexpr bool kAllowAnyStringOnTheRhs = false;
352
357 unsigned int string_builder_count_ = 0;
358 // {blocks_to_trimmings_map_} is a map from block IDs to loop phi nodes that
359 // end string builders. For each such node, a trimming should be inserted at
360 // the beginning of the block (in EffectControlLinearizer) in order to
361 // properly finish the string builder (well, most things will work if the
362 // trimming is omitted, but adding this trimming save memory and removes the
363 // SlicedString indirection; the only thing that would be an issue is that the
364 // rest of the VM could have access to a SlicedString that is less than
365 // SlicedString::kMinLength characters, which may or may not break things).
369 // {loop_headers_} is used to keep track ot the start of each loop that the
370 // block currently being visited is part of.
372};
373
374} // namespace compiler
375} // namespace internal
376} // namespace v8
377
378#endif // V8_COMPILER_STRING_BUILDER_OPTIMIZER_H_
Schedule * schedule
JSGraph * jsgraph
friend Zone
Definition asm-types.cc:195
NodeId id() const
Definition node.h:57
static State ConcatResultIsOneOrTwoByte(State a, State b)
OneOrTwoByteAnalysis(TFGraph *graph, Zone *zone, JSHeapBroker *broker)
std::optional< std::pair< int64_t, int64_t > > TryGetRange(Node *node)
void SetStatus(Node *node, State state, int id=kInvalidId)
ZoneVector< std::optional< ZoneVector< Node * > > > blocks_to_trimmings_map_
JSHeapBroker *const broker_
JSHeapBroker * broker
RpoNumber block
Schedule const *const schedule_
#define DCHECK_IMPLIES(v1, v2)
Definition logging.h:493
#define DCHECK_NE(v1, v2)
Definition logging.h:486
#define DCHECK(condition)
Definition logging.h:482
#define V8_EXPORT_PRIVATE
Definition macros.h:460