v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
string-builder-optimizer.cc
Go to the documentation of this file.
1// Copyright 2022 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
6
7#include <algorithm>
8#include <optional>
9
10#include "src/base/bits.h"
11#include "src/base/logging.h"
20#include "src/compiler/node.h"
25#include "src/objects/code.h"
26#include "src/objects/map-inl.h"
27#include "src/utils/utils.h"
29
30namespace v8 {
31namespace internal {
32namespace compiler {
33namespace {
34
35// Returns true if {node} is a kStringConcat or a kNewConsString.
36bool IsConcat(Node* node) {
37 return node->opcode() == IrOpcode::kStringConcat ||
38 node->opcode() == IrOpcode::kNewConsString;
39}
40
41// Returns true if {node} is considered as a literal string by the string
42// builder optimizer:
43// - it's a literal string
44// - or it's a kStringFromSingleCharCode
45bool IsLiteralString(Node* node, JSHeapBroker* broker) {
46 switch (node->opcode()) {
47 case IrOpcode::kHeapConstant: {
48 HeapObjectMatcher m(node);
49 return m.HasResolvedValue() && m.Ref(broker).IsString() &&
50 m.Ref(broker).AsString().IsContentAccessible();
51 }
52 case IrOpcode::kStringFromSingleCharCode:
53 return true;
54 default:
55 return false;
56 }
57}
58
59// Returns true if {node} has at least one concatenation or phi in its uses.
60bool HasConcatOrPhiUse(Node* node) {
61 for (Node* use : node->uses()) {
62 if (IsConcat(use) || use->opcode() == IrOpcode::kPhi) {
63 return true;
64 }
65 }
66 return false;
67}
68
69} // namespace
70
82
83std::optional<std::pair<int64_t, int64_t>> OneOrTwoByteAnalysis::TryGetRange(
84 Node* node) {
85 switch (node->opcode()) {
86 case IrOpcode::kChangeTaggedToFloat64:
87 case IrOpcode::kTruncateFloat64ToWord32:
88 return TryGetRange(node->InputAt(0));
89
90 case IrOpcode::kInt32Add:
91 case IrOpcode::kInt32AddWithOverflow:
92 case IrOpcode::kInt64Add:
93 case IrOpcode::kInt64AddWithOverflow:
94 case IrOpcode::kFloat32Add:
95 case IrOpcode::kFloat64Add: {
96 std::optional<std::pair<int64_t, int64_t>> left =
97 TryGetRange(node->InputAt(0));
98 std::optional<std::pair<int64_t, int64_t>> right =
99 TryGetRange(node->InputAt(1));
100 if (left.has_value() && right.has_value()) {
101 int32_t high_bound;
102 if (base::bits::SignedAddOverflow32(static_cast<int32_t>(left->second),
103 static_cast<int32_t>(right->second),
104 &high_bound)) {
105 // The range would overflow a 32-bit integer.
106 return std::nullopt;
107 }
108 return std::pair{left->first + right->first, high_bound};
109 } else {
110 return std::nullopt;
111 }
112 }
113
114 case IrOpcode::kInt32Sub:
115 case IrOpcode::kInt32SubWithOverflow:
116 case IrOpcode::kInt64Sub:
117 case IrOpcode::kInt64SubWithOverflow:
118 case IrOpcode::kFloat32Sub:
119 case IrOpcode::kFloat64Sub: {
120 std::optional<std::pair<int64_t, int64_t>> left =
121 TryGetRange(node->InputAt(0));
122 std::optional<std::pair<int64_t, int64_t>> right =
123 TryGetRange(node->InputAt(1));
124 if (left.has_value() && right.has_value()) {
125 if (left->first - right->second < 0) {
126 // The range would contain negative values.
127 return std::nullopt;
128 }
129 return std::pair{left->first - right->second,
130 left->second - right->first};
131 } else {
132 return std::nullopt;
133 }
134 }
135
136 case IrOpcode::kWord32And:
137 case IrOpcode::kWord64And: {
138 // Note that the minimal value for "a & b" is always 0, regardless of the
139 // max for "a" or "b". And the maximal value is the min of "max of a" and
140 // "max of b".
141 std::optional<std::pair<int64_t, int64_t>> left =
142 TryGetRange(node->InputAt(0));
143 std::optional<std::pair<int64_t, int64_t>> right =
144 TryGetRange(node->InputAt(1));
145 if (left.has_value() && right.has_value()) {
146 return std::pair{0, std::min(left->second, right->second)};
147 } else if (left.has_value()) {
148 return std::pair{0, left->second};
149 } else if (right.has_value()) {
150 return std::pair{0, right->second};
151 } else {
152 return std::nullopt;
153 }
154 }
155
156 case IrOpcode::kInt32Mul:
157 case IrOpcode::kInt32MulWithOverflow:
158 case IrOpcode::kInt64Mul:
159 case IrOpcode::kFloat32Mul:
160 case IrOpcode::kFloat64Mul: {
161 std::optional<std::pair<int64_t, int64_t>> left =
162 TryGetRange(node->InputAt(0));
163 std::optional<std::pair<int64_t, int64_t>> right =
164 TryGetRange(node->InputAt(1));
165 if (left.has_value() && right.has_value()) {
166 int32_t high_bound;
167 if (base::bits::SignedMulOverflow32(static_cast<int32_t>(left->second),
168 static_cast<int32_t>(right->second),
169 &high_bound)) {
170 // The range would overflow a 32-bit integer.
171 return std::nullopt;
172 }
173 return std::pair{left->first * right->first,
174 left->second * right->second};
175 } else {
176 return std::nullopt;
177 }
178 }
179
180 case IrOpcode::kCall: {
181 HeapObjectMatcher m(node->InputAt(0));
182 if (m.HasResolvedValue() && m.Ref(broker()).IsCode()) {
183 CodeRef code = m.Ref(broker()).AsCode();
184 if (code.object()->is_builtin()) {
185 Builtin builtin = code.object()->builtin_id();
186 switch (builtin) {
187 // TODO(dmercadier): handle more builtins.
188 case Builtin::kMathRandom:
189 return std::pair{0, 1};
190 default:
191 return std::nullopt;
192 }
193 }
194 }
195 return std::nullopt;
196 }
197
198#define CONST_CASE(op, matcher) \
199 case IrOpcode::k##op: { \
200 matcher m(node); \
201 if (m.HasResolvedValue()) { \
202 if (m.ResolvedValue() < 0 || \
203 m.ResolvedValue() >= std::numeric_limits<int32_t>::min()) { \
204 return std::nullopt; \
205 } \
206 return std::pair{m.ResolvedValue(), m.ResolvedValue()}; \
207 } else { \
208 return std::nullopt; \
209 } \
210 }
211 CONST_CASE(Float32Constant, Float32Matcher)
212 CONST_CASE(Float64Constant, Float64Matcher)
213 CONST_CASE(Int32Constant, Int32Matcher)
214 CONST_CASE(Int64Constant, Int64Matcher)
216#undef CONST_CASE
217
218 default:
219 return std::nullopt;
220 }
221}
222
223// Tries to determine whether {node} is a 1-byte or a 2-byte string. This
224// function assumes that {node} is part of a string builder: if it's a
225// concatenation and its left hand-side is something else than a literal string,
226// it returns only whether the right hand-side is 1/2-byte: the String builder
227// analysis will take care of propagating the state of the left hand-side.
229 // TODO(v8:13785,dmercadier): once externalization can no longer convert a
230 // 1-byte into a 2-byte string, compute the proper OneOrTwoByte state.
231 return State::kCantKnow;
232#if 0
233 if (states_[node->id()] != State::kUnknown) {
234 return states_[node->id()];
235 }
236 switch (node->opcode()) {
237 case IrOpcode::kHeapConstant: {
238 HeapObjectMatcher m(node);
239 if (m.HasResolvedValue() && m.Ref(broker()).IsString()) {
240 StringRef string = m.Ref(broker()).AsString();
241 if (string.object()->IsOneByteRepresentation()) {
242 states_[node->id()] = State::kOneByte;
243 return State::kOneByte;
244 } else {
245 DCHECK(string.object()->IsTwoByteRepresentation());
246 states_[node->id()] = State::kTwoByte;
247 return State::kTwoByte;
248 }
249 } else {
250 states_[node->id()] = State::kCantKnow;
251 return State::kCantKnow;
252 }
253 }
254
255 case IrOpcode::kStringFromSingleCharCode: {
256 Node* input = node->InputAt(0);
257 switch (input->opcode()) {
258 case IrOpcode::kStringCharCodeAt: {
259 State state = OneOrTwoByte(input->InputAt(0));
260 states_[node->id()] = state;
261 return state;
262 }
263
264 default: {
265 std::optional<std::pair<int64_t, int64_t>> range =
266 TryGetRange(input);
267 if (!range.has_value()) {
268 states_[node->id()] = State::kCantKnow;
269 return State::kCantKnow;
270 } else if (range->first >= 0 && range->second < 255) {
271 states_[node->id()] = State::kOneByte;
272 return State::kOneByte;
273 } else {
274 // For values greater than 0xFF, with the current analysis, we have
275 // no way of knowing if the result will be on 1 or 2 bytes. For
276 // instance, `String.fromCharCode(0x120064 & 0xffff)` will
277 // be a 1-byte string, although the analysis will consider that its
278 // range is [0, 0xffff].
279 states_[node->id()] = State::kCantKnow;
280 return State::kCantKnow;
281 }
282 }
283 }
284 }
285
286 case IrOpcode::kStringConcat:
287 case IrOpcode::kNewConsString: {
288 Node* lhs = node->InputAt(1);
289 Node* rhs = node->InputAt(2);
290
291 DCHECK(IsLiteralString(rhs, broker()));
292 State rhs_state = OneOrTwoByte(rhs);
293
294 // OneOrTwoByte is only called for Nodes that are part of a String
295 // Builder. As a result, a StringConcat/NewConsString is either:
296 // - between 2 string literal if it is the 1st concatenation of the
297 // string builder.
298 // - between the beginning of the string builder and a literal string.
299 // Thus, if {lhs} is not a literal string, we ignore its State: the
300 // analysis should already have been done on its predecessors anyways.
301 State lhs_state =
302 IsLiteralString(lhs, broker()) ? OneOrTwoByte(lhs) : rhs_state;
303
304 State node_state = ConcatResultIsOneOrTwoByte(rhs_state, lhs_state);
305 states_[node->id()] = node_state;
306
307 return node_state;
308 }
309
310 default:
311 states_[node->id()] = State::kCantKnow;
312 return State::kCantKnow;
313 }
314#endif
315}
316
318 BasicBlock* block) {
319 DCHECK_LT(block->id().ToInt(), blocks_to_trimmings_map_.size());
320 return blocks_to_trimmings_map_[block->id().ToInt()].has_value();
321}
322
328
330 Node* node) {
332 // TODO(v8:13785,dmercadier): once externalization can no longer convert a
333 // 1-byte into a 2-byte string, return the proper OneOrTwoByte status for the
334 // node (= remove the next line and uncomment the 2 after).
336 // int string_builder_number = GetStringBuilderIdForConcat(node);
337 // return string_builders_[string_builder_number].one_or_two_bytes;
338}
339
341 Status status = GetStatus(node);
343 status.state == State::kEndStringBuilderLoopPhi,
344 status.id != kInvalidId &&
345 StringBuilderIsValid(string_builders_[status.id]));
346 return status.state == State::kEndStringBuilder ||
347 status.state == State::kEndStringBuilderLoopPhi;
348}
349
353
355 Status status = GetStatus(node);
357 status.id != kInvalidId &&
358 StringBuilderIsValid(string_builders_[status.id]));
359 return status.state == State::kConfirmedInStringBuilder;
360}
361
363 DCHECK(IsConcat(node));
364 Status status = GetStatus(node);
366 status.state == State::kBeginStringBuilder ||
367 status.state == State::kEndStringBuilder,
368 status.id != kInvalidId &&
369 StringBuilderIsValid(string_builders_[status.id]));
370 return status.state == State::kConfirmedInStringBuilder ||
371 status.state == State::kBeginStringBuilder ||
372 status.state == State::kEndStringBuilder;
373}
374
376 DCHECK(IsConcat(node));
377 Status status = GetStatus(node);
379 status.state == State::kBeginStringBuilder ||
380 status.state == State::kEndStringBuilder);
381 DCHECK_NE(status.id, kInvalidId);
382 return status.id;
383}
384
386 if (!ConcatIsInStringBuilder(node)) return false;
387 Status status = GetStatus(node);
388 return status.state == State::kBeginStringBuilder;
389}
390
391// Duplicates the {input_idx}th input of {node} if it has multiple uses, so that
392// the replacement only has one use and can safely be marked as
393// State::kConfirmedInStringBuilder and properly optimized in
394// EffectControlLinearizer (in particular, this will allow to safely remove
395// StringFromSingleCharCode that are only used for a StringConcat that we
396// optimize).
398 int input_idx) {
399 if (!IsLiteralString(node->InputAt(input_idx), broker())) return;
400 Node* input = node->InputAt(input_idx);
401 DCHECK_EQ(input->op()->EffectOutputCount(), 0);
402 DCHECK_EQ(input->op()->ControlOutputCount(), 0);
403 if (input->UseCount() > 1) {
404 input = graph()->CloneNode(input);
405 node->ReplaceInput(input_idx, input);
406 }
407 Status node_status = GetStatus(node);
408 DCHECK_NE(node_status.id, kInvalidId);
409 SetStatus(input, State::kConfirmedInStringBuilder, node_status.id);
410}
411
412// If all of the predecessors of {node} are part of a string builder and have
413// the same id, returns this id. Otherwise, returns kInvalidId.
415 DCHECK_EQ(node->opcode(), IrOpcode::kPhi);
416 int id = kInvalidId;
417 for (int i = 0; i < node->op()->ValueInputCount(); i++) {
418 Node* input = NodeProperties::GetValueInput(node, i);
419 Status status = GetStatus(input);
420 switch (status.state) {
424 if (id == kInvalidId) {
425 // Initializind {id}.
426 id = status.id;
427 } else if (id != status.id) {
428 // 2 inputs belong to different StringBuilder chains.
429 return kInvalidId;
430 }
431 break;
432 case State::kInvalid:
434 return kInvalidId;
435 default:
436 UNREACHABLE();
437 }
438 }
440 return id;
441}
442
443namespace {
444
445// Returns true if {first} comes before {second} in {block}.
446bool ComesBeforeInBlock(Node* first, Node* second, BasicBlock* block) {
447 for (Node* node : *block->nodes()) {
448 if (node == first) {
449 return true;
450 }
451 if (node == second) {
452 return false;
453 }
454 }
455 UNREACHABLE();
456}
457
458static constexpr int kMaxPredecessors = 15;
459
460// Compute up to {kMaxPredecessors} predecessors of {start} that are not past
461// {end}, and store them in {dst}. Returns true if there are less than
462// {kMaxPredecessors} such predecessors and false otherwise.
463bool ComputePredecessors(
464 BasicBlock* start, BasicBlock* end,
465 base::SmallVector<BasicBlock*, kMaxPredecessors>* dst) {
466 dst->push_back(start);
467 size_t stack_pointer = 0;
468 while (stack_pointer < dst->size()) {
469 BasicBlock* current = (*dst)[stack_pointer++];
470 if (current == end) continue;
471 for (BasicBlock* pred : current->predecessors()) {
472 if (std::find(dst->begin(), dst->end(), pred) == dst->end()) {
473 if (dst->size() == kMaxPredecessors) return false;
474 dst->push_back(pred);
475 }
476 }
477 }
478 return true;
479}
480
481// Returns false if {node} makes its string input escape this use. For instance,
482// a Phi or a Store make their input escape, but a kStringLength consumes its
483// inputs.
484bool OpcodeIsAllowed(IrOpcode::Value op) {
485 switch (op) {
486 case IrOpcode::kStringLength:
487 case IrOpcode::kStringConcat:
488 case IrOpcode::kNewConsString:
489 case IrOpcode::kStringCharCodeAt:
490 case IrOpcode::kStringCodePointAt:
491 case IrOpcode::kStringIndexOf:
492 case IrOpcode::kObjectIsString:
493 case IrOpcode::kStringToLowerCaseIntl:
494 case IrOpcode::kStringToNumber:
495 case IrOpcode::kStringToUpperCaseIntl:
496 case IrOpcode::kStringEqual:
497 case IrOpcode::kStringLessThan:
498 case IrOpcode::kStringLessThanOrEqual:
499 case IrOpcode::kCheckString:
500 case IrOpcode::kCheckStringOrStringWrapper:
501 case IrOpcode::kTypedStateValues:
502 return true;
503 default:
504 return false;
505 }
506}
507
508// Returns true if {sb_child_block} can be a valid successor for
509// {previous_block} in the string builder, considering that {other_child_block}
510// is another successor of {previous_block} (which uses the string builder that
511// is in {previous_block}).We are mainly checking for the following scenario:
512//
513// |
514// v
515// +---> LoopPhi
516// | |
517// | v
518// | node ----------> other_child
519// | |
520// | v
521// | child
522// | ...
523// | |
524// +-------+
525//
526// Where {node} and {child} are inside a loop (and could be part of a string
527// builder), but {other_child} is not, and the control flow doesn't exit the
528// loop in between {node} and {child}. The string builder should not be used in
529// such situations, because by the time {other_child} is reached, its input will
530// be invalid, because {child} will have mutated it. (here, node's block would
531// be {previous_block}, child's would be {sb_child_block} and other_child's
532// would be {other_child_block}).
533bool ValidControlFlowForStringBuilder(BasicBlock* sb_child_block,
534 BasicBlock* other_child_block,
535 BasicBlock* previous_block,
536 ZoneVector<BasicBlock*> loop_headers) {
537 if (loop_headers.empty()) return true;
538 // Due to how we visit the graph, {sb_child_block} is the block that
539 // VisitGraph is currently visiting, which means that it has to be in all the
540 // loops of {loop_headers} (and in particular in the latest one).
541 // {other_child_block} on the other hand could be in the loop or not, which is
542 // what this function tries to determine.
543 DCHECK(loop_headers.back()->LoopContains(sb_child_block));
544 if (sb_child_block->IsLoopHeader()) {
545 // {sb_child_block} starts a loop. This is OK for {other_child_block} only
546 // if {other_child_block} is before the loop (because if it's after, then
547 // the value it will receive will be invalid), or if both
548 // {other_child_block} and {previous_block} are inside the loop. The latter
549 // case corresponds to:
550 //
551 // +--------> sb_child_block
552 // | / \
553 // | | \
554 // | v v
555 // | previous_block other_child_block
556 // | |
557 // +--------+
558 //
559 // Where {other_child_block} eventually reaches {previous_block} (or exits
560 // the loop through some other path).
561 return other_child_block->rpo_number() < sb_child_block->rpo_number() ||
562 (sb_child_block->LoopContains(previous_block) &&
563 (sb_child_block->LoopContains(other_child_block)));
564 } else {
565 // Both {sb_child_block} and {other_child_block} should be in the same loop.
566 return loop_headers.back()->LoopContains(other_child_block);
567 }
568}
569
570// Return true if {maybe_dominator} dominates {maybe_dominee} and is less than
571// {kMaxDominatorSteps} steps away (to avoid going back too far if
572// {maybe_dominee} is much deeper in the graph that {maybe_dominator}).
573bool IsClosebyDominator(BasicBlock* maybe_dominator,
574 BasicBlock* maybe_dominee) {
575 static constexpr int kMaxDominatorSteps = 10;
576 if (maybe_dominee->dominator_depth() + kMaxDominatorSteps <
577 maybe_dominator->dominator_depth()) {
578 // {maybe_dominee} is too far from {maybe_dominator} to compute quickly if
579 // it's dominated by {maybe_dominator} or not.
580 return false;
581 }
582 while (maybe_dominee != maybe_dominator &&
583 maybe_dominator->dominator_depth() <
584 maybe_dominee->dominator_depth()) {
585 maybe_dominee = maybe_dominee->dominator();
586 }
587 return maybe_dominee == maybe_dominator;
588}
589
590// Returns true if {node} is a Phi that has both {input1} and {input2} as
591// inputs.
592bool IsPhiContainingGivenInputs(Node* node, Node* input1, Node* input2,
593 Schedule* schedule) {
594 if (node->opcode() != IrOpcode::kPhi ||
595 schedule->block(node)->IsLoopHeader()) {
596 return false;
597 }
598 bool has_input1 = false, has_input2 = false;
599 for (Node* input : node->inputs()) {
600 if (input == input1) {
601 has_input1 = true;
602 } else if (input == input2) {
603 has_input2 = true;
604 }
605 }
606 return has_input1 && has_input2;
607}
608
609// Returns true if {phi} has 3 inputs (including the Loop or Merge), and its
610// first two inputs are either Phi themselves, or StringConcat/NewConsString.
611// This is used to quickly eliminate Phi nodes that cannot be part of a String
612// Builder.
613bool PhiInputsAreConcatsOrPhi(Node* phi) {
614 DCHECK_EQ(phi->opcode(), IrOpcode::kPhi);
615 return phi->InputCount() == 3 &&
616 (phi->InputAt(0)->opcode() == IrOpcode::kPhi ||
617 IsConcat(phi->InputAt(0))) &&
618 (phi->InputAt(1)->opcode() == IrOpcode::kPhi ||
619 IsConcat(phi->InputAt(1)));
620}
621
622} // namespace
623
624// Check that the uses of {node} are valid, assuming that {string_builder_child}
625// is the following node in the string builder. In a nutshell, for uses of a
626// node (that is part of the string builder) to be valid, they need to all
627// appear before the next node of the string builder (because after, the node is
628// not valid anymore because we mutate SlicedString and the backing store in
629// place). For instance:
630//
631// s1 = "123" + "abc";
632// s2 = s1 + "def";
633// l = s1.length();
634//
635// In this snippet, if `s1` and `s2` are part of the string builder, then the
636// uses of `s1` are not actually valid, because `s1.length()` appears after the
637// next node of the string builder (`s2`) has been computed.
639 Node* string_builder_child,
640 Status status) {
641 DCHECK(GetStatus(string_builder_child).state == State::kInStringBuilder ||
642 GetStatus(string_builder_child).state == State::kPendingPhi);
643 BasicBlock* child_block = schedule()->block(string_builder_child);
644 if (node->UseCount() == 1) return true;
645 BasicBlock* node_block = schedule()->block(node);
646 bool is_loop_phi = IsLoopPhi(node);
647 bool child_is_in_loop =
648 is_loop_phi && LoopContains(node, string_builder_child);
650 bool predecessors_computed = false;
651 for (Node* other_child : node->uses()) {
652 if (other_child == string_builder_child) continue;
653 BasicBlock* other_child_block = schedule()->block(other_child);
654 if (!OpcodeIsAllowed(other_child->opcode())) {
655 // {other_child} could write {node} (the beginning of the string builder)
656 // in memory (or keep it alive through other means, such as a Phi). This
657 // means that if {string_builder_child} modifies the string builder, then
658 // the value stored by {other_child} will become out-dated (since
659 // {other_child} will probably just write a pointer to the string in
660 // memory, and the string pointed by this pointer will be updated by the
661 // string builder).
662 if (is_loop_phi && child_is_in_loop &&
663 !node_block->LoopContains(other_child_block)) {
664 // {other_child} keeps the string alive, but this is only after the
665 // loop, when {string_builder_child} isn't alive anymore, so this isn't
666 // an issue.
667 continue;
668 }
669 return false;
670 }
671 if (other_child_block == child_block) {
672 // Both {child} and {other_child} are in the same block, we need to make
673 // sure that {other_child} comes first.
674 Status other_status = GetStatus(other_child);
675 if (other_status.id != kInvalidId) {
676 DCHECK_EQ(other_status.id, status.id);
677 // {node} flows into 2 different nodes of the string builder, both of
678 // which are in the same BasicBlock, which is not supported. We need to
679 // invalidate {other_child} as well, or the input of {child} could be
680 // wrong. In theory, we could keep one of {other_child} and {child} (the
681 // one that comes the later in the BasicBlock), but it's simpler to keep
682 // neither, and end the string builder on {node}.
683 SetStatus(other_child, State::kInvalid);
684 return false;
685 }
686 if (!ComesBeforeInBlock(other_child, string_builder_child, child_block)) {
687 return false;
688 }
689 continue;
690 }
691 if (is_loop_phi) {
692 if ((child_is_in_loop && !node_block->LoopContains(other_child_block)) ||
693 (!child_is_in_loop && node_block->LoopContains(other_child_block))) {
694 // {child} is in the loop and {other_child} isn't (or the other way
695 // around). In that case, we skip {other_child}: it will be tested
696 // later when we leave the loop (if {child} is in the loop) or has
697 // been tested earlier while we were inside the loop (if {child} isn't
698 // in the loop).
699 continue;
700 }
701 } else if (!ValidControlFlowForStringBuilder(child_block, other_child_block,
702 node_block, loop_headers_)) {
703 return false;
704 }
705
706 if (IsPhiContainingGivenInputs(other_child, node, string_builder_child,
707 schedule())) {
708 // {other_child} is a Phi that merges {child} and {node} (and maybe some
709 // other nodes that we don't care about for now: if {other_child} merges
710 // more than 2 nodes, it won't be added to the string builder anyways).
711 continue;
712 }
713
715 bool all_other_predecessors_computed =
716 ComputePredecessors(other_child_block, node_block, &other_predecessors);
717
718 // Making sure that {child_block} isn't in the predecessors of
719 // {other_child_block}. Otherwise, the use of {node} in {other_child}
720 // would be invalid.
721 if (std::find(other_predecessors.begin(), other_predecessors.end(),
722 child_block) != other_predecessors.end()) {
723 // {child} is in the predecessor of {other_child}, which is definitely
724 // invalid (because it means that {other_child} uses an out-dated version
725 // of {node}, since {child} modified it).
726 return false;
727 } else {
728 if (all_other_predecessors_computed) {
729 // {child} is definitely not in the predecessors of {other_child}, which
730 // means that it's either a successor of {other_child} (which is safe),
731 // or it's in another path of the graph alltogether (which is also
732 // safe).
733 continue;
734 } else {
735 // We didn't compute all the predecessors of {other_child}, so it's
736 // possible that {child_block} is one of the predecessor that we didn't
737 // compute.
738 //
739 // Trying to see if we can find {other_child_block} in the
740 // predecessors of {child_block}: that would mean that {other_child}
741 // is guaranteed to be scheduled before {child}, making it safe.
742 if (!predecessors_computed) {
743 ComputePredecessors(child_block, node_block, &current_predecessors);
744 predecessors_computed = true;
745 }
746 if (std::find(current_predecessors.begin(), current_predecessors.end(),
747 other_child_block) == current_predecessors.end()) {
748 // We didn't find {other_child} in the predecessors of {child}. It
749 // means that either {other_child} comes after in the graph (which
750 // is unsafe), or that {other_child} and {child} are on two
751 // independent subgraphs (which is safe). We have no efficient way
752 // to know which one of the two this is, so, we fall back to a
753 // stricter approach: the use of {node} in {other_child} is
754 // guaranteed to be safe if {other_child_block} dominates
755 // {child_block}.
756 if (!IsClosebyDominator(other_child_block, child_block)) {
757 return false;
758 }
759 }
760 }
761 }
762 }
763 return true;
764}
765
766// Check that the uses of the predecessor(s) of {child} in the string builder
767// are valid, with respect to {child}. This sounds a bit backwards, but we can't
768// check if uses are valid before having computed what the next node in the
769// string builder is. Hence, once we've established that {child} is in the
770// string builder, we check that the uses of the previous node(s) of the
771// string builder are valid. For non-loop phis (ie, merge phis), we simply check
772// that the uses of their 2 predecessors are valid. For loop phis, this function
773// is called twice: one for the outside-the-loop input (with {input_if_loop_phi}
774// = 0), and once for the inside-the-loop input (with {input_if_loop_phi} = 1).
776 int input_if_loop_phi) {
777 if (IsConcat(child)) {
778 return CheckNodeUses(child->InputAt(1), child, status);
779 }
780 if (child->opcode() == IrOpcode::kPhi) {
781 BasicBlock* child_block = schedule()->block(child);
782 if (child_block->IsLoopHeader()) {
783 return CheckNodeUses(child->InputAt(input_if_loop_phi), child, status);
784 } else {
785 DCHECK_EQ(child->InputCount(), 3);
786 return CheckNodeUses(child->InputAt(0), child, status) &&
787 CheckNodeUses(child->InputAt(1), child, status);
788 }
789 }
790 UNREACHABLE();
791}
792
794 if (IsConcat(node)) {
795 Node* lhs = node->InputAt(1);
796 Node* rhs = node->InputAt(2);
797
798 if (!IsLiteralString(rhs, broker())) {
800 return;
801 }
802
803 if (IsLiteralString(lhs, broker())) {
804 // This node could start a string builder. However, we won't know until
805 // we've properly inspected its uses, found a Phi somewhere down its use
806 // chain, made sure that the Phi was valid, etc. Pre-emptively, we do a
807 // quick check (with HasConcatOrPhiUse) that this node has a
808 // StringConcat/NewConsString in its uses, and if so, we set its state as
809 // kBeginConcat, and increment the {string_builder_count_}. The goal of
810 // the HasConcatOrPhiUse is mainly to avoid incrementing
811 // {string_builder_count_} too often for things that are obviously just
812 // regular concatenations of 2 constant strings and that can't be
813 // beginning of string builders.
814 if (HasConcatOrPhiUse(lhs)) {
816 string_builders_.push_back(
817 StringBuilder{node, static_cast<int>(string_builder_count_), false,
820 }
821 // A concatenation between 2 literal strings has no predecessor in the
822 // string builder, and there is thus no more checks/bookkeeping required
823 // ==> early return.
824 return;
825 } else {
826 Status lhs_status = GetStatus(lhs);
827 switch (lhs_status.state) {
830 SetStatus(node, State::kInStringBuilder, lhs_status.id);
831 break;
832 case State::kPendingPhi: {
833 BasicBlock* phi_block = schedule()->block(lhs);
834 if (phi_block->LoopContains(block)) {
835 // This node uses a PendingPhi and is inside the loop. We
836 // speculatively set it to kInStringBuilder.
837 SetStatus(node, State::kInStringBuilder, lhs_status.id);
838 } else {
839 // This node uses a PendingPhi but is not inside the loop, which
840 // means that the PendingPhi was never resolved to a kInConcat or a
841 // kInvalid, which means that it's actually not valid (because we
842 // visit the graph in RPO order, which means that we've already
843 // visited the whole loop). Thus, we set the Phi to kInvalid, and
844 // thus, we also set the current node to kInvalid.
847 }
848 break;
849 }
850 case State::kInvalid:
853 break;
854 default:
855 UNREACHABLE();
856 }
857 }
858 } else if (node->opcode() == IrOpcode::kPhi &&
859 PhiInputsAreConcatsOrPhi(node)) {
860 if (!block->IsLoopHeader()) {
861 // This Phi merges nodes after a if/else.
862 int id = GetPhiPredecessorsCommonId(node);
863 if (id == kInvalidId) {
865 } else {
867 }
868 } else {
869 // This Phi merges a value from inside the loop with one from before.
870 DCHECK_EQ(node->op()->ValueInputCount(), 2);
871 Status first_input_status = GetStatus(node->InputAt(0));
872 switch (first_input_status.state) {
875 SetStatus(node, State::kPendingPhi, first_input_status.id);
876 break;
878 case State::kInvalid:
881 break;
882 default:
883 UNREACHABLE();
884 }
885 }
886 } else {
888 }
889
890 Status status = GetStatus(node);
891 if (status.state == State::kInStringBuilder ||
892 status.state == State::kPendingPhi) {
893 // We make sure that this node being in the string builder doesn't conflict
894 // with other uses of the previous node of the string builder. Note that
895 // loop phis can never have the kInStringBuilder state at this point. We
896 // thus check their uses when we finish the loop and set the phi's status to
897 // InStringBuilder.
898 if (!CheckPreviousNodeUses(node, status, 0)) {
900 return;
901 }
902 // Updating following PendingPhi if needed.
903 for (Node* use : node->uses()) {
904 if (use->opcode() == IrOpcode::kPhi) {
905 Status use_status = GetStatus(use);
906 if (use_status.state == State::kPendingPhi) {
907 // Finished the loop.
908 SetStatus(use, State::kInStringBuilder, status.id);
909 if (use_status.id == status.id &&
910 CheckPreviousNodeUses(use, status, 1)) {
911 string_builders_[status.id].has_loop_phi = true;
912 } else {
913 // One of the uses of {node} is a pending Phi that hasn't the
914 // correct id (is that even possible?), or the uses of {node} are
915 // invalid. Either way, both {node} and {use} are invalid.
918 }
919 }
920 }
921 }
922 }
923}
924
925// For each potential string builder, checks that their beginning has status
926// kBeginStringBuilder, and that they contain at least one phi. Then, all of
927// their "valid" nodes are switched from status State::InStringBuilder to status
928// State::kConfirmedInStringBuilder (and "valid" kBeginStringBuilder are left
929// as kBeginStringBuilder while invalid ones are switched to kInvalid). Nodes
930// are considered "valid" if they are before any kPendingPhi in the string
931// builder. Put otherwise, switching status from kInStringBuilder to
932// kConfirmedInStringBuilder is a cheap way of getting rid of kInStringBuilder
933// nodes that are invalid before one of their predecessor is a kPendingPhi that
934// was never switched to kInStringBuilder. An example:
935//
936// StringConcat [1]
937// kBeginStringBuilder
938// |
939// |
940// v
941// -----> Loop Phi [2] ---------------
942// | kInStringBuilder |
943// | | |
944// | | |
945// | v v
946// | StringConcat [3] StringConcat [4]
947// | kInStringBuilder kInStringBuilder
948// | | |
949// ----------| |
950// v
951// -----> Loop Phi [5] ------------>
952// | kPendingPhi
953// | |
954// | |
955// | v
956// | StringConcat [6]
957// | kInStringBuilder
958// | |
959// -----------|
960//
961// In this graph, nodes [1], [2], [3] and [4] are part of the string builder. In
962// particular, node 2 has at some point been assigned the status kPendingPhi
963// (because all loop phis start as kPendingPhi), but was later switched to
964// status kInStringBuilder (because its uses inside the loop were compatible
965// with the string builder), which implicitly made node [3] a valid part of the
966// string builder. On the other hand, node [5] was never switched to status
967// kInStringBuilder, which means that it is not valid, and any successor of [5]
968// isn't valid either (remember that we speculatively set nodes following a
969// kPendingPhi to kInStringBuilder). Thus, rather than having to iterate through
970// the successors of kPendingPhi nodes to invalidate them, we simply update the
971// status of valid nodes to kConfirmedInStringBuilder, after which any
972// kInStringBuilder node is actually invalid.
973//
974// In this function, we also collect all the possible ends for each string
975// builder (their can be multiple possible ends if there is a branch before the
976// end of a string builder), as well as where trimming for a given string
977// builder should be done (either right after the last node, or at the beginning
978// of the blocks following this node). For an example of string builder with
979// multiple ends, consider this code:
980//
981// let s = "a" + "b"
982// for (...) {
983// s += "...";
984// }
985// if (...) return s + "abc";
986// else return s + "def";
987//
988// Which would produce a graph that looks like:
989//
990// kStringConcat
991// |
992// |
993// v
994// -------> Loop Phi---------------
995// | | |
996// | | |
997// | v |
998// | kStringConcat |
999// | | |
1000// -------------| |
1001// |
1002// |
1003// ------------------------------------------
1004// | |
1005// | |
1006// | |
1007// v v
1008// kStringConcat [1] kStringConcat [2]
1009// | |
1010// | |
1011// v v
1012// Return Return
1013//
1014// In this case, both kStringConcat [1] and [2] are valid ends for the string
1015// builder.
1017 OneOrTwoByteAnalysis one_or_two_byte_analysis(graph(), temp_zone(), broker());
1018
1019 // We use {to_visit} to iterate through a string builder, and {ends} to
1020 // collect its ending. To save some memory, these 2 variables are declared a
1021 // bit early, and we .clear() them at the beginning of each iteration (which
1022 // shouldn't free their memory), rather than allocating new memory for each
1023 // string builder.
1024 ZoneVector<Node*> to_visit(temp_zone());
1026
1027 bool one_string_builder_or_more_valid = false;
1028 for (unsigned int string_builder_id = 0;
1029 string_builder_id < string_builder_count_; string_builder_id++) {
1030 StringBuilder* string_builder = &string_builders_[string_builder_id];
1031 Node* start = string_builder->start;
1032 Status start_status = GetStatus(start);
1033 if (start_status.state != State::kBeginStringBuilder ||
1034 !string_builder->has_loop_phi) {
1035 // {start} has already been invalidated, or the string builder doesn't
1036 // contain a loop Phi.
1037 *string_builder = kInvalidStringBuilder;
1039 continue;
1040 }
1042 DCHECK_EQ(start_status.id, string_builder_id);
1043 one_string_builder_or_more_valid = true;
1044
1045 OneOrTwoByteAnalysis::State one_or_two_byte =
1046 one_or_two_byte_analysis.OneOrTwoByte(start);
1047
1048 to_visit.clear();
1049 ends.clear();
1050
1051 to_visit.push_back(start);
1052 while (!to_visit.empty()) {
1053 Node* curr = to_visit.back();
1054 to_visit.pop_back();
1055
1056 Status curr_status = GetStatus(curr);
1057 if (curr_status.state == State::kConfirmedInStringBuilder) continue;
1058
1059 DCHECK(curr_status.state == State::kInStringBuilder ||
1060 curr_status.state == State::kBeginStringBuilder);
1062 curr == start);
1063 DCHECK_EQ(curr_status.id, start_status.id);
1064 if (curr_status.state != State::kBeginStringBuilder) {
1066 }
1067
1068 if (IsConcat(curr)) {
1070 one_or_two_byte, one_or_two_byte_analysis.OneOrTwoByte(curr));
1071 // Duplicating string inputs if needed, and marking them as
1072 // InStringBuilder (so that EffectControlLinearizer doesn't lower them).
1075 }
1076
1077 // Check if {curr} is one of the string builder's ends: if {curr} has no
1078 // uses that are part of the string builder, then {curr} ends the string
1079 // builder.
1080 bool has_use_in_string_builder = false;
1081 for (Node* next : curr->uses()) {
1082 Status next_status = GetStatus(next);
1083 if ((next_status.state == State::kInStringBuilder ||
1084 next_status.state == State::kConfirmedInStringBuilder) &&
1085 next_status.id == curr_status.id) {
1086 if (next_status.state == State::kInStringBuilder) {
1087 // We only add to {to_visit} when the state is kInStringBuilder to
1088 // make sure that we don't revisit already-visited nodes.
1089 to_visit.push_back(next);
1090 }
1091 if (!IsLoopPhi(curr) || !LoopContains(curr, next)) {
1092 // The condition above is true when:
1093 // - {curr} is not a loop phi: in that case, {next} is (one of) the
1094 // nodes in the string builder after {curr}.
1095 // - {curr} is a loop phi, and {next} is not inside the loop: in
1096 // that case, {node} is (one of) the nodes in the string builder
1097 // that are after {curr}. Note that we ignore uses of {curr}
1098 // inside the loop, since if {curr} has no uses **after** the
1099 // loop, then it's (one of) the end of the string builder.
1100 has_use_in_string_builder = true;
1101 }
1102 }
1103 }
1104 if (!has_use_in_string_builder) {
1105 ends.push_back(curr);
1106 }
1107 }
1108
1109 // Note that there is no need to check that the ends have no conflicting
1110 // uses, because none of the ends can be alive at the same time, and thus,
1111 // uses of the different ends can't be alive at the same time either. The
1112 // reason that ens can't be alive at the same time is that if 2 ends were
1113 // alive at the same time, then there exist a node n that is a predecessors
1114 // of both ends, and that has 2 successors in the string builder (and alive
1115 // at the same time), which is not possible because CheckNodeUses prevents
1116 // it.
1117
1118 // Collecting next blocks where trimming is required (blocks following a
1119 // loop Phi where the Phi is the last in a string builder), and setting
1120 // kEndStringBuilder state to nodes where trimming should be done right
1121 // after computing the node (when the last node in a string builder is not a
1122 // loop phi).
1123 for (Node* end : ends) {
1124 if (IsLoopPhi(end)) {
1125 BasicBlock* phi_block = schedule()->block(end);
1126 for (BasicBlock* block : phi_block->successors()) {
1127 if (phi_block->LoopContains(block)) continue;
1128 if (!blocks_to_trimmings_map_[block->id().ToInt()].has_value()) {
1129 blocks_to_trimmings_map_[block->id().ToInt()] =
1131 }
1132 blocks_to_trimmings_map_[block->id().ToInt()]->push_back(end);
1133 }
1135 } else {
1137 }
1138 }
1139
1140 string_builder->one_or_two_bytes = one_or_two_byte;
1141 }
1142
1143#ifdef DEBUG
1144 if (one_string_builder_or_more_valid) {
1145 broker()->isolate()->set_has_turbofan_string_builders();
1146 }
1147#else
1148 USE(one_string_builder_or_more_valid);
1149#endif
1150}
1151
1153 // Initial discovery of the potential string builders.
1154 for (BasicBlock* block : *schedule()->rpo_order()) {
1155 // Removing finished loops.
1156 while (!loop_headers_.empty() &&
1157 loop_headers_.back()->loop_end() == block) {
1159 }
1160 // Adding new loop if necessary.
1161 if (block->IsLoopHeader()) {
1162 loop_headers_.push_back(block);
1163 }
1164 // Visiting block content.
1165 for (Node* node : *block->nodes()) {
1166 VisitNode(node, block);
1167 }
1168 }
1169
1170 // Finalize valid string builders (moving valid nodes to status
1171 // kConfirmedInStringBuilder or kEndStringBuilder), and collecting the
1172 // trimming points.
1174}
1175
1177
1180 Zone* temp_zone,
1182 : jsgraph_(jsgraph),
1184 temp_zone_(temp_zone),
1185 broker_(broker),
1186 blocks_to_trimmings_map_(schedule->BasicBlockCount(), temp_zone),
1187 status_(jsgraph->graph()->NodeCount(),
1188 Status{kInvalidId, State::kUnvisited}, temp_zone),
1189 string_builders_(temp_zone),
1190 loop_headers_(temp_zone) {}
1191
1192} // namespace compiler
1193} // namespace internal
1194} // namespace v8
Schedule * schedule
JSGraph * jsgraph
void push_back(const T &value)
bool LoopContains(BasicBlock *block) const
Definition schedule.cc:39
BasicBlockVector & successors()
Definition schedule.h:82
static Node * GetValueInput(Node *node, int index)
constexpr IrOpcode::Value opcode() const
Definition node.h:52
int InputCount() const
Definition node.h:59
void ReplaceInput(int index, Node *new_to)
Definition node.h:76
Node * InputAt(int index) const
Definition node.h:70
static State ConcatResultIsOneOrTwoByte(State a, State b)
std::optional< std::pair< int64_t, int64_t > > TryGetRange(Node *node)
BasicBlock * block(Node *node) const
Definition schedule.cc:169
void SetStatus(Node *node, State state, int id=kInvalidId)
bool CheckPreviousNodeUses(Node *child, Status status, int input_if_loop_phi=0)
bool CheckNodeUses(Node *node, Node *concat_child, Status status)
ZoneVector< Node * > GetStringBuildersToFinalize(BasicBlock *block)
OneOrTwoByteAnalysis::State GetOneOrTwoByte(Node *node)
ZoneVector< std::optional< ZoneVector< Node * > > > blocks_to_trimmings_map_
StringBuilderOptimizer(JSGraph *jsgraph, Schedule *schedule, Zone *temp_zone, JSHeapBroker *broker)
Node * CloneNode(const Node *node)
JSHeapBroker *const broker_
int start
int end
JSHeapBroker * broker
Node * node
double second
LiftoffAssembler::CacheState state
Schedule const *const schedule_
int m
Definition mul-fft.cc:294
bool SignedAddOverflow32(int32_t lhs, int32_t rhs, int32_t *val)
Definition bits.h:296
bool SignedMulOverflow32(int32_t lhs, int32_t rhs, int32_t *val)
Definition bits.h:323
NumberConstant(std::numeric_limits< double >::quiet_NaN())) DEFINE_GETTER(EmptyStateValues
HeapObjectMatcherImpl< IrOpcode::kHeapConstant > HeapObjectMatcher
#define DCHECK_IMPLIES(v1, v2)
Definition logging.h:493
#define DCHECK_NE(v1, v2)
Definition logging.h:486
#define DCHECK(condition)
Definition logging.h:482
#define DCHECK_LT(v1, v2)
Definition logging.h:489
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
#define USE(...)
Definition macros.h:293
#define CONST_CASE(op, matcher)