v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
branch-elimination-reducer.h
Go to the documentation of this file.
1// Copyright 2022 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_COMPILER_TURBOSHAFT_BRANCH_ELIMINATION_REDUCER_H_
6#define V8_COMPILER_TURBOSHAFT_BRANCH_ELIMINATION_REDUCER_H_
7
8#include <optional>
9
10#include "src/base/bits.h"
11#include "src/base/logging.h"
16#include "src/utils/utils.h"
17
19
21
22template <typename>
23class VariableReducer;
24
25template <class Next>
26class BranchEliminationReducer : public Next {
27 // # General overview
28 //
29 // BranchEliminationAssembler optimizes branches in a few ways:
30 //
31 // 1- When a branch is nested in another branch and uses the same condition,
32 // then we can get rid of this branch and keep only the correct target.
33 // For instance:
34 //
35 // if (cond) {
36 // if (cond) print("B1");
37 // else print("B2");
38 // } else {
39 // if (cond) print("B3");
40 // else print("B4");
41 // }
42 //
43 // Will be simplified to:
44 //
45 // if (cond) {
46 // print("B1");
47 // } else {
48 // print("B4");
49 // }
50 //
51 // Because the 1st nested "if (cond)" is always true, and the 2nd is
52 // always false.
53 //
54 // Or, if you prefer a more graph-oriented visual representation:
55 //
56 // condition condition
57 // | | | |
58 // ----- | ------ |
59 // | | | |
60 // | v | v
61 // | branch | branch
62 // | / \ | / \
63 // | / \ | / \
64 // v / \ v becomes v v
65 // branch branch ======> B1 B4
66 // / \ / \
67 // / \ / \
68 // B1 B2 B3 B4
69 //
70 //
71 // 2- When 2 consecutive branches (where the 2nd one is after the merging of
72 // the 1st one) have the same condition, we can pull up the 2nd branch to
73 // get rid of the merge of the 1st branch and the branch of the 2nd
74 // branch. For instance:
75 //
76 // if (cond) {
77 // B1;
78 // } else {
79 // B2;
80 // }
81 // B3;
82 // if (cond) {
83 // B4;
84 // } else {
85 // B5;
86 // }
87 //
88 // Will be simplified to:
89 //
90 // if (cond) {
91 // B1;
92 // B3;
93 // B4;
94 // } else {
95 // B2;
96 // B3;
97 // B5;
98 // }
99 //
100 // Or, if you prefer a more graph-oriented visual representation:
101 //
102 // condition condition
103 // | | |
104 // ------- | |
105 // | v v
106 // | branch branch
107 // | / \ / \
108 // | / \ / \
109 // | B1 B2 B1 B2
110 // | \ / | |
111 // | \ / becomes | |
112 // | merge1 ======> B3 B3
113 // | B3 | |
114 // -------> branch | |
115 // / \ B4 B5
116 // / \ \ /
117 // B4 B5 \ /
118 // \ / merge
119 // \ /
120 // merge2
121 //
122 // 2bis- In the 2nd optimization, if `cond` is a Phi of 2 values that come
123 // from B1 and B2, then the same optimization can be applied for a similar
124 // result. For instance:
125 //
126 // if (cond) { if (cond) {
127 // x = 1 x = 1;
128 // } else { becomes B1
129 // x = 0 ======> } else {
130 // } x = 0;
131 // if (x) B1 else B2 B2;
132 // }
133 //
134 // If `x` is more complex than a simple boolean, then the 2nd branch will
135 // remain, except that it will be on `x`'s value directly rather than on a
136 // Phi (so, it avoids creating a Phi, and it will probably be better for
137 // branch prediction).
138 //
139 //
140 // 3- Optimizing {Return} nodes through merges. It checks that
141 // the return value is actually a {Phi} and the Return is dominated
142 // only by the Phi.
143 //
144 // if (c) { if (c) {
145 // v = 42; ====> v = 42;
146 // } else { return v;
147 // v = 5; } else {
148 // } v = 5;
149 // return v; return v;
150 // }
151 //
152 // And here's the graph representation:
153 //
154 // +----B1----+ <Some other +----B1'----+ +----B2'----+
155 // | p1 = ... | block(s): | p1 = ... | | p2 = ... |
156 // | <...> | B2,...> | <...> | | <...> |
157 // +----------+ / | return p1 | | return p2 |
158 // \ / +-----------+ +-----------+
159 // \ / =====>
160 // \ /
161 // \ |
162 // +--------B3-------+
163 // | p = Phi(p1,...) |
164 // | <...> |
165 // | return p |
166 // +-----------------+
167 //
168 //
169 // 4- Eliminating merges: if the 2 merged branches are empty,
170 // and the merge block doesn't have a Phi (which is either the first
171 // operation or is only preceded by FrameState operations),
172 // we can remove the merge and instead Goto the block from the new graph.
173 //
174 // 5- Eliminating unneeded control flow edges: if a block has only one
175 // successor and the successor only has one predecessor, we can merge these
176 // blocks.
177 //
178 // # Technical overview of the implementation
179 //
180 // We iterate the graph in dominator order, and maintain a hash map of
181 // conditions with a resolved value along the current path. For instance, if
182 // we have:
183 // if (c) { B1 } else { B2 }
184 // when iterating B1, we'll know that |c| is true, while when iterating
185 // over B2, we'll know that |c| is false.
186 // When reaching a Branch, we'll insert the condition in the hash map, while
187 // when reaching a Merge, we'll remove it.
188 //
189 // Then, the 1st optimization (nested branches with the same condition) is
190 // trivial: we just look in the hashmap if the condition is known, and only
191 // generate the right branch target without generating the branch itself.
192 //
193 // For the 2nd optimization, when generating a Goto, we check if the
194 // destination block ends with a branch whose condition is already known. If
195 // that's the case, then we copy the destination block, and the 1st
196 // optimization will replace its final Branch by a Goto when reaching it.
197 public:
199 // TODO(dmercadier): Add static_assert that this is ran as part of a
200 // CopyingPhase.
201
202 void Bind(Block* new_block) {
203 Next::Bind(new_block);
204
206 // It's important to have a ShouldSkipOptimizationStep here, because
207 // {known_conditions_} assumes that we perform all branch elimination
208 // possible (which implies that we don't ever insert twice the same thing
209 // in {known_conditions_}). If we stop doing ReduceBranch because of
210 // ShouldSkipOptimizationStep, then this assumption doesn't hold anymore,
211 // and we should thus stop updating {known_conditions_} to not trigger
212 // some DCHECKs.
213 return;
214 }
215
216 // Update {known_conditions_} based on where {new_block} is in the dominator
217 // tree.
218 ResetToBlock(new_block);
219 ReplayMissingPredecessors(new_block);
220 StartLayer(new_block);
221
222 if (new_block->IsBranchTarget()) {
223 // The current block is a branch target, so we add the branch condition
224 // along with its value in {known_conditions_}.
225 DCHECK_EQ(new_block->PredecessorCount(), 1);
226 const Operation& op =
227 new_block->LastPredecessor()->LastOperation(__ output_graph());
228 if (const BranchOp* branch = op.TryCast<BranchOp>()) {
229 DCHECK_EQ(new_block, any_of(branch->if_true, branch->if_false));
230 bool condition_value = branch->if_true == new_block;
231 if (!known_conditions_.Contains(branch->condition())) {
232 known_conditions_.InsertNewKey(branch->condition(), condition_value);
233 }
234 }
235 }
236 }
237
238 V<None> REDUCE(Branch)(V<Word32> cond, Block* if_true, Block* if_false,
239 BranchHint hint) {
240 LABEL_BLOCK(no_change) {
241 return Next::ReduceBranch(cond, if_true, if_false, hint);
242 }
243 if (ShouldSkipOptimizationStep()) goto no_change;
244
245 if (const Block* if_true_origin = __ OriginForBlockStart(if_true)) {
246 if (const Block* if_false_origin = __ OriginForBlockStart(if_false)) {
247 const Operation& first_op_true =
248 if_true_origin->FirstOperation(__ input_graph());
249 const Operation& first_op_false =
250 if_false_origin->FirstOperation(__ input_graph());
251 const GotoOp* true_goto = first_op_true.template TryCast<GotoOp>();
252 const GotoOp* false_goto = first_op_false.template TryCast<GotoOp>();
253 // We apply the fourth optimization, replacing empty braches with a
254 // Goto to their destination (if it's the same block).
255 if (true_goto && false_goto &&
256 true_goto->destination == false_goto->destination) {
257 Block* merge_block = true_goto->destination;
258 if (!merge_block->HasPhis(__ input_graph())) {
259 // Using `ReduceInputGraphGoto()` here enables more optimizations.
260 __ Goto(__ MapToNewGraph(merge_block));
261 return V<None>::Invalid();
262 }
263 }
264 }
265 }
266
267 if (auto cond_value = known_conditions_.Get(cond)) {
268 // We already know the value of {cond}. We thus remove the branch (this is
269 // the "first" optimization in the documentation at the top of this
270 // module).
271 __ Goto(*cond_value ? if_true : if_false);
272 return V<None>::Invalid();
273 }
274 // We can't optimize this branch.
275 goto no_change;
276 }
277
278 V<Any> REDUCE(Select)(V<Word32> cond, V<Any> vtrue, V<Any> vfalse,
281 LABEL_BLOCK(no_change) {
282 return Next::ReduceSelect(cond, vtrue, vfalse, rep, hint, implem);
283 }
284 if (ShouldSkipOptimizationStep()) goto no_change;
285
286 if (auto cond_value = known_conditions_.Get(cond)) {
287 if (*cond_value) {
288 return vtrue;
289 } else {
290 return vfalse;
291 }
292 }
293 goto no_change;
294 }
295
296 V<None> REDUCE(Goto)(Block* destination, bool is_backedge) {
297 LABEL_BLOCK(no_change) {
298 return Next::ReduceGoto(destination, is_backedge);
299 }
300 if (ShouldSkipOptimizationStep()) goto no_change;
301
302 const Block* destination_origin = __ OriginForBlockStart(destination);
303 if (!destination_origin || !destination_origin->IsMerge()) {
304 goto no_change;
305 }
306
307 // Maximum size up to which we allow cloning a block. Cloning too large
308 // blocks will lead to increasing the size of the graph too much, which will
309 // lead to slower compile time, and larger generated code.
310 // TODO(dmercadier): we might want to exclude Phis from this, since they are
311 // typically removed when we clone a block. However, computing the number of
312 // operations in a block excluding Phis is more costly (because we'd have to
313 // iterate all of the operations one by one).
314 // TODO(dmercadier): this "13" was selected fairly arbitrarily (= it sounded
315 // reasonable). It could be useful to run a few benchmarks to see if we can
316 // find a more optimal number.
317 static constexpr int kMaxOpCountForCloning = 13;
318
319 const Operation& last_op =
320 destination_origin->LastOperation(__ input_graph());
321
322 if (destination_origin->OpCountUpperBound() > kMaxOpCountForCloning) {
323 goto no_change;
324 }
325
326 if (const BranchOp* branch = last_op.template TryCast<BranchOp>()) {
328 __ template MapToNewGraph<true>(branch->condition());
329 if (condition.valid()) {
330 std::optional<bool> condition_value = known_conditions_.Get(condition);
331 if (!condition_value.has_value()) {
332 // We've already visited the subsequent block's Branch condition, but
333 // we don't know its value right now.
334 goto no_change;
335 }
336
337 // The next block {new_dst} is a Merge, and ends with a Branch whose
338 // condition is already known. As per the 2nd optimization, we'll
339 // process {new_dst} right away, and we'll end it with a Goto instead of
340 // its current Branch.
341 __ CloneBlockAndGoto(destination_origin);
342 return {};
343 } else {
344 // Optimization 2bis:
345 // {condition} hasn't been visited yet, and thus it doesn't have a
346 // mapping to the new graph. However, if it's the result of a Phi whose
347 // input is coming from the current block, then it still makes sense to
348 // inline {destination_origin}: the condition will then be known.
349 if (destination_origin->Contains(branch->condition())) {
350 if (__ input_graph().Get(branch->condition()).template Is<PhiOp>()) {
351 __ CloneBlockAndGoto(destination_origin);
352 return {};
353 } else if (CanBeConstantFolded(branch->condition(),
354 destination_origin)) {
355 // If the {cond} only uses constant Phis that come from the current
356 // block, it's probably worth it to clone the block in order to
357 // constant-fold away the Branch.
358 __ CloneBlockAndGoto(destination_origin);
359 return {};
360 } else {
361 goto no_change;
362 }
363 }
364 }
365 } else if (last_op.template Is<ReturnOp>()) {
366 // In case of the following pattern, the `Goto` is most likely going to be
367 // folded into a jump table, so duplicating Block 5 will only increase the
368 // amount of different targets within the jump table.
369 //
370 // Block 1:
371 // [...]
372 // SwitchOp()[2, 3, 4]
373 //
374 // Block 2: Block 3: Block 4:
375 // Goto 5 Goto 5 Goto 6
376 //
377 // Block 5: Block 6:
378 // [...] [...]
379 // ReturnOp
380 if (Asm().current_block()->PredecessorCount() == 1 &&
381 Asm().current_block()->begin() ==
382 __ output_graph().next_operation_index()) {
383 const Block* prev_block = Asm().current_block()->LastPredecessor();
384 if (prev_block->LastOperation(__ output_graph())
385 .template Is<SwitchOp>()) {
386 goto no_change;
387 }
388 }
389 // The destination block in the old graph ends with a Return
390 // and the old destination is a merge block, so we can directly
391 // inline the destination block in place of the Goto.
392 Asm().CloneAndInlineBlock(destination_origin);
393 return {};
394 }
395
396 goto no_change;
397 }
398
400 bool negated,
401 const DeoptimizeParameters* parameters) {
402 LABEL_BLOCK(no_change) {
403 return Next::ReduceDeoptimizeIf(condition, frame_state, negated,
404 parameters);
405 }
406 if (ShouldSkipOptimizationStep()) goto no_change;
407
408 std::optional<bool> condition_value = known_conditions_.Get(condition);
409 if (!condition_value.has_value()) {
410 known_conditions_.InsertNewKey(condition, negated);
411 goto no_change;
412 }
413
414 if ((*condition_value && !negated) || (!*condition_value && negated)) {
415 // The condition is true, so we always deoptimize.
416 return Next::ReduceDeoptimize(frame_state, parameters);
417 } else {
418 // The condition is false, so we never deoptimize.
419 return V<None>::Invalid();
420 }
421 }
422
423#if V8_ENABLE_WEBASSEMBLY
425 bool negated, const TrapId trap_id) {
426 LABEL_BLOCK(no_change) {
427 return Next::ReduceTrapIf(condition, frame_state, negated, trap_id);
428 }
429 if (ShouldSkipOptimizationStep()) goto no_change;
430
431 std::optional<bool> condition_value = known_conditions_.Get(condition);
432 if (!condition_value.has_value()) {
433 known_conditions_.InsertNewKey(condition, negated);
434 goto no_change;
435 }
436
437 if (__ matcher().template Is<ConstantOp>(condition)) {
438 goto no_change;
439 }
440
441 V<Word32> static_condition = __ Word32Constant(*condition_value);
442 if (negated) {
443 __ TrapIfNot(static_condition, frame_state, trap_id);
444 } else {
445 __ TrapIf(static_condition, frame_state, trap_id);
446 }
447 return V<None>::Invalid();
448 }
449#endif // V8_ENABLE_WEBASSEMBLY
450
451 private:
452 // Resets {known_conditions_} and {dominator_path_} up to the 1st dominator of
453 // {block} that they contain.
454 void ResetToBlock(Block* block) {
455 Block* target = block->GetDominator();
456 while (!dominator_path_.empty() && target != nullptr &&
457 dominator_path_.back() != target) {
458 if (dominator_path_.back()->Depth() > target->Depth()) {
460 } else if (dominator_path_.back()->Depth() < target->Depth()) {
461 target = target->GetDominator();
462 } else {
463 // {target} and {dominator_path.back} have the same depth but are not
464 // equal, so we go one level up for both.
466 target = target->GetDominator();
467 }
468 }
469 }
470
471 // Removes the latest entry in {known_conditions_} and {dominator_path_}.
473 known_conditions_.DropLastLayer();
474 dominator_path_.pop_back();
475 }
476
477 void StartLayer(Block* block) {
478 known_conditions_.StartLayer();
479 dominator_path_.push_back(block);
480 }
481
482 // ReplayMissingPredecessors adds to {known_conditions_} and {dominator_path_}
483 // the conditions/blocks that related to the dominators of {block} that are
484 // not already present. This can happen when control-flow changes during the
485 // CopyingPhase, which results in a block being visited not right after
486 // its dominator. For instance, when optimizing a double-diamond like:
487 //
488 // B0
489 // / \
490 // / \
491 // B1 B2
492 // \ /
493 // \ /
494 // B3
495 // / \
496 // / \
497 // B4 B5
498 // \ /
499 // \ /
500 // B6
501 // / \
502 // / \
503 // B7 B8
504 // \ /
505 // \ /
506 // B9
507 //
508 // In this example, where B0, B3 and B6 branch on the same condition, the
509 // blocks are actually visited in the following order: B0 - B1 - B3/1 - B2 -
510 // B3/2 - B4 - B5 - ... (note how B3 is duplicated and visited twice because
511 // from B1/B2 its branch condition is already known; I've noted the duplicated
512 // blocks as B3/1 and B3/2). In the new graph, the dominator of B4 is B3/1 and
513 // the dominator of B5 is B3/2. Except that upon visiting B4, the last visited
514 // block is not B3/1 but rather B3/2, so, we have to reset {known_conditions_}
515 // to B0, and thus miss that we actually know branch condition of B0/B3/B6 and
516 // we thus won't optimize the 3rd diamond.
517 //
518 // To overcome this issue, ReplayMissingPredecessors will add the information
519 // of the missing predecessors of the current block to {known_conditions_}. In
520 // the example above, this means that when visiting B4,
521 // ReplayMissingPredecessors will add the information of B3/1 to
522 // {known_conditions_}.
524 // Collect blocks that need to be replayed.
525 base::SmallVector<Block*, 32> missing_blocks;
526 for (Block* dom = new_block->GetDominator();
527 dom != nullptr && dom != dominator_path_.back();
528 dom = dom->GetDominator()) {
529 missing_blocks.push_back(dom);
530 }
531 // Actually does the replaying, starting from the oldest block and finishing
532 // with the newest one (so that they will later be removed in the correct
533 // order).
534 for (auto it = missing_blocks.rbegin(); it != missing_blocks.rend(); ++it) {
535 Block* block = *it;
536 StartLayer(block);
537
538 if (block->IsBranchTarget()) {
539 const Operation& op =
540 block->LastPredecessor()->LastOperation(__ output_graph());
541 if (const BranchOp* branch = op.TryCast<BranchOp>()) {
542 DCHECK(branch->if_true->index() == block->index() ||
543 branch->if_false->index() == block->index());
544 bool condition_value =
545 branch->if_true->index().valid()
546 ? branch->if_true->index() == block->index()
547 : branch->if_false->index() != block->index();
548 known_conditions_.InsertNewKey(branch->condition(), condition_value);
549 }
550 }
551 }
552 }
553
554 // Checks that {idx} only depends on only on Constants or on Phi whose input
555 // from the current block is a Constant, and on a least one Phi (whose input
556 // from the current block is a Constant). If it is the case and {idx} is used
557 // in a Branch, then the Branch's block could be cloned in the current block,
558 // and {idx} could then be constant-folded away such that the Branch becomes a
559 // Goto.
560 bool CanBeConstantFolded(OpIndex idx, const Block* cond_input_block,
561 bool has_phi = false, int depth = 0) {
562 // We limit the depth of the search to {kMaxDepth} in order to avoid
563 // potentially visiting a lot of nodes.
564 static constexpr int kMaxDepth = 4;
565 if (depth > kMaxDepth) return false;
566 const Operation& op = __ input_graph().Get(idx);
567 if (!cond_input_block->Contains(idx)) {
568 // If we reach a ConstantOp without having gone through a Phi, then the
569 // condition can be constant-folded without performing block cloning.
570 return has_phi && op.Is<ConstantOp>();
571 }
572 if (op.Is<PhiOp>()) {
573 int curr_block_pred_idx = cond_input_block->GetPredecessorIndex(
574 __ current_block()->OriginForBlockEnd());
575 // There is no need to increment {depth} on this recursive call, because
576 // it will anyways exit early because {idx} won't be in
577 // {cond_input_block}.
578 return CanBeConstantFolded(op.input(curr_block_pred_idx),
579 cond_input_block, /*has_phi*/ true, depth);
580 } else if (op.Is<ConstantOp>()) {
581 return true;
582 } else if (op.input_count == 0) {
583 // Any operation that has no input but is not a ConstantOp probably won't
584 // be able to be constant-folded away (eg, LoadRootRegister).
585 return false;
586 } else if (!op.Effects().can_be_constant_folded()) {
587 // Operations with side-effects won't be able to be constant-folded.
588 return false;
589 }
590
591 for (int i = 0; i < op.input_count; i++) {
592 if (!CanBeConstantFolded(op.input(i), cond_input_block, has_phi,
593 depth + 1)) {
594 return false;
595 }
596 }
597
598 return has_phi;
599 }
600
601 // TODO(dmercadier): use the SnapshotTable to replace {dominator_path_} and
602 // {known_conditions_}, and to reuse the existing merging/replay logic of the
603 // SnapshotTable.
606 __ phase_zone(), __ input_graph().DominatorTreeDepth() * 2};
607};
608
610
611} // namespace v8::internal::compiler::turboshaft
612
613#endif // V8_COMPILER_TURBOSHAFT_BRANCH_ELIMINATION_REDUCER_H_
#define REDUCE(operation)
bool Contains(OpIndex op_idx) const
Definition graph.h:322
bool HasPhis(const Graph &graph) const
Definition graph.h:1252
const Operation & LastOperation(const Graph &graph) const
Definition graph.h:1242
int GetPredecessorIndex(const Block *target) const
Definition graph.h:368
V< None > REDUCE DeoptimizeIf(V< Word32 > condition, V< FrameState > frame_state, bool negated, const DeoptimizeParameters *parameters)
bool CanBeConstantFolded(OpIndex idx, const Block *cond_input_block, bool has_phi=false, int depth=0)
V< None > REDUCE Branch(V< Word32 > cond, Block *if_true, Block *if_false, BranchHint hint)
V< None > REDUCE Goto(Block *destination, bool is_backedge)
#define TURBOSHAFT_REDUCER_BOILERPLATE(Name)
Definition assembler.h:823
#define LABEL_BLOCK(label)
Definition assembler.h:910
InstructionOperand destination
V8_INLINE const Operation & Get(const Graph &graph, OpIndex index)
Definition graph.h:1231
any_of(const Args &...) -> any_of< Args... >
V8_EXPORT_PRIVATE bool ShouldSkipOptimizationStep()
Definition utils.h:84
Node::Uses::const_iterator begin(const Node::Uses &uses)
Definition node.h:708
bool TryCast(Tagged< From > value, Tagged< To > *out)
Definition casting.h:77
bool Is(IndirectHandle< U > value)
Definition handles-inl.h:51
#define DCHECK(condition)
Definition logging.h:482
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
V8_INLINE OpIndex input(size_t i) const
Definition operations.h:959
const underlying_operation_t< Op > * TryCast() const
Definition operations.h:990