v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
duplication-optimization-reducer.h
Go to the documentation of this file.
1// Copyright 2022 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_COMPILER_TURBOSHAFT_DUPLICATION_OPTIMIZATION_REDUCER_H_
6#define V8_COMPILER_TURBOSHAFT_DUPLICATION_OPTIMIZATION_REDUCER_H_
7
13
15
16// DuplicationOptimizationReducer introduces duplication where this can be
17// beneficial for generated code. It should run late in the pipeline so that the
18// duplication isn't optimized away by some other phases (such as GVN).
19//
20// In particular, it introduces duplication in 2 places:
21//
22// 1. Branch condition duplication: it tries to ensure that the condition nodes
23// of branches are used only once (under some conditions). When it finds a
24// branch node whose condition has multiples uses, this condition is duplicated.
25//
26// Doing this enables the InstructionSelector to generate more efficient code
27// for branches. For instance, consider this code:
28//
29// c = a + b;
30// if (c == 0) { /* some code */ }
31// if (c == 0) { /* more code */ }
32//
33// Then the generated code will be something like (using registers "ra" for "a"
34// and "rb" for "b", and "rt" a temporary register):
35//
36// add ra, rb ; a + b
37// cmp ra, 0 ; a + b == 0
38// sete rt ; rt = (a + b == 0)
39// cmp rt, 0 ; rt == 0
40// jz
41// ...
42// cmp rt, 0 ; rt == 0
43// jz
44//
45// As you can see, TurboFan materialized the == bit into a temporary register.
46// However, since the "add" instruction sets the ZF flag (on x64), it can be
47// used to determine wether the jump should be taken or not. The code we'd like
48// to generate instead if thus:
49//
50// add ra, rb
51// jnz
52// ...
53// add ra, rb
54// jnz
55//
56// However, this requires to generate twice the instruction "add ra, rb". Due to
57// how virtual registers are assigned in TurboFan (there is a map from node ID
58// to virtual registers), both "add" instructions will use the same virtual
59// register as output, which will break SSA.
60//
61// In order to overcome this issue, BranchConditionDuplicator duplicates branch
62// conditions that are used more than once, so that they can be generated right
63// before each branch without worrying about breaking SSA.
64//
65// 2. Load/Store flexible second operand duplication: on Arm64, it tries to
66// duplicate the "index" input of Loads/Stores when it's a shift by a constant.
67// This allows the Instruction Selector to compute said shift using a flexible
68// second operand, which in most cases on recent Arm64 CPUs should be for free.
69
71
72template <class Next>
73class DuplicationOptimizationReducer : public Next {
74 public:
75 TURBOSHAFT_REDUCER_BOILERPLATE(DuplucationOptimization)
76
77 V<None> REDUCE_INPUT_GRAPH(Branch)(V<None> ig_index, const BranchOp& branch) {
78 LABEL_BLOCK(no_change) {
79 return Next::ReduceInputGraphBranch(ig_index, branch);
80 }
81 if (ShouldSkipOptimizationStep()) goto no_change;
82
83 const Operation& cond = __ input_graph().Get(branch.condition());
84 V<Word32> new_cond;
85 if (!MaybeDuplicateCond(cond, branch.condition(), &new_cond)) {
86 goto no_change;
87 }
88
89 DCHECK(new_cond.valid());
90 __ Branch(new_cond, __ MapToNewGraph(branch.if_true),
91 __ MapToNewGraph(branch.if_false), branch.hint);
92 return V<None>::Invalid();
93 }
94
95 V<Any> REDUCE_INPUT_GRAPH(Select)(V<Any> ig_index, const SelectOp& select) {
96 LABEL_BLOCK(no_change) {
97 return Next::ReduceInputGraphSelect(ig_index, select);
98 }
99 if (ShouldSkipOptimizationStep()) goto no_change;
100
101 const Operation& cond = __ input_graph().Get(select.cond());
102 V<Word32> new_cond;
103 if (!MaybeDuplicateCond(cond, select.cond(), &new_cond)) goto no_change;
104
105 DCHECK(new_cond.valid());
106 return __ Select(new_cond, __ MapToNewGraph(select.vtrue()),
107 __ MapToNewGraph(select.vfalse()), select.rep, select.hint,
108 select.implem);
109 }
110
111#if V8_TARGET_ARCH_ARM64
112 // TODO(dmercadier): duplicating a shift to use a flexible second operand is
113 // not always worth it; this depends mostly on the CPU, the kind of shift, and
114 // the size of the loaded/stored data. Ideally, we would have cost models for
115 // all the CPUs we target, and use those to decide to duplicate shifts or not.
117 MemoryRepresentation loaded_rep,
118 RegisterRepresentation result_rep, int32_t offset,
119 uint8_t element_size_log2) {
120 if (offset == 0 && element_size_log2 == 0 && index.valid()) {
121 index = MaybeDuplicateOutputGraphShift(index.value());
122 }
123 return Next::ReduceLoad(base, index, kind, loaded_rep, result_rep, offset,
124 element_size_log2);
125 }
126
129 WriteBarrierKind write_barrier, int32_t offset,
130 uint8_t element_size_log2,
131 bool maybe_initializing_or_transitioning,
132 IndirectPointerTag maybe_indirect_pointer_tag) {
133 if (offset == 0 && element_size_log2 == 0 && index.valid()) {
134 index = MaybeDuplicateOutputGraphShift(index.value());
135 }
136 return Next::ReduceStore(base, index, value, kind, stored_rep,
137 write_barrier, offset, element_size_log2,
138 maybe_initializing_or_transitioning,
139 maybe_indirect_pointer_tag);
140 }
141#endif
142
143 private:
144 bool MaybeDuplicateCond(const Operation& cond, OpIndex input_idx,
145 V<Word32>* new_cond) {
146 if (cond.saturated_use_count.IsOne()) return false;
147
148 switch (cond.opcode) {
149 case Opcode::kComparison:
150 *new_cond =
151 MaybeDuplicateComparison(cond.Cast<ComparisonOp>(), input_idx);
152 break;
153 case Opcode::kWordBinop:
154 *new_cond =
155 MaybeDuplicateWordBinop(cond.Cast<WordBinopOp>(), input_idx);
156 break;
157 case Opcode::kShift:
158 *new_cond = MaybeDuplicateShift(cond.Cast<ShiftOp>(), input_idx);
159 break;
160 default:
161 return false;
162 }
163 return new_cond->valid();
164 }
165
167 OpIndex right) {
168 if (__ input_graph().Get(left).saturated_use_count.IsOne() &&
169 __ input_graph().Get(right).saturated_use_count.IsOne()) {
170 // We don't duplicate binops when all of their inputs are used a single
171 // time (this would increase register pressure by keeping 2 values alive
172 // instead of 1).
173 return false;
174 }
175 OpIndex binop_output_idx = __ MapToNewGraph(input_idx);
176 if (__ Get(binop_output_idx).saturated_use_count.IsZero()) {
177 // This is the 1st use of {binop} in the output graph, so there is no need
178 // to duplicate it just yet.
179 return false;
180 }
181 return true;
182 }
183
185 if (!MaybeCanDuplicateGenericBinop(input_idx, binop.left(),
186 binop.right())) {
187 return OpIndex::Invalid();
188 }
189
190 switch (binop.kind) {
195 // These operations are somewhat expensive, and duplicating them is
196 // probably not worth it.
197 return OpIndex::Invalid();
198 default:
199 break;
200 }
201
202 DisableValueNumbering disable_gvn(this);
203 return __ WordBinop(__ MapToNewGraph(binop.left()),
204 __ MapToNewGraph(binop.right()), binop.kind, binop.rep);
205 }
206
208 OpIndex input_idx) {
209 if (!MaybeCanDuplicateGenericBinop(input_idx, comp.left(), comp.right())) {
210 return {};
211 }
212
213 DisableValueNumbering disable_gvn(this);
214 return __ Comparison(__ MapToNewGraph(comp.left()),
215 __ MapToNewGraph(comp.right()), comp.kind, comp.rep);
216 }
217
218 OpIndex MaybeDuplicateShift(const ShiftOp& shift, OpIndex input_idx) {
219 if (!MaybeCanDuplicateGenericBinop(input_idx, shift.left(),
220 shift.right())) {
221 return OpIndex::Invalid();
222 }
223
224 DisableValueNumbering disable_gvn(this);
225 return __ Shift(__ MapToNewGraph(shift.left()),
226 __ MapToNewGraph(shift.right()), shift.kind, shift.rep);
227 }
228
230 V<Word> shifted;
231 int shifted_by;
232 ShiftOp::Kind shift_kind;
233 WordRepresentation shift_rep;
234 if (__ matcher().MatchConstantShift(index, &shifted, &shift_kind,
235 &shift_rep, &shifted_by) &&
236 !__ matcher().Get(index).saturated_use_count.IsZero()) {
237 // We don't check the use count of {shifted}, because it might have uses
238 // in the future that haven't been emitted yet.
239 DisableValueNumbering disable_gvn(this);
240 return __ Shift(shifted, __ Word32Constant(shifted_by), shift_kind,
241 shift_rep);
242 }
243 return index;
244 }
245};
246
248
249} // namespace v8::internal::compiler::turboshaft
250
251#endif // V8_COMPILER_TURBOSHAFT_DUPLICATION_OPTIMIZATION_REDUCER_H_
#define REDUCE(operation)
#define REDUCE_INPUT_GRAPH(operation)
Builtins::Kind kind
Definition builtins.cc:40
bool MaybeDuplicateCond(const Operation &cond, OpIndex input_idx, V< Word32 > *new_cond)
OpIndex MaybeDuplicateWordBinop(const WordBinopOp &binop, OpIndex input_idx)
V< Any > REDUCE_INPUT_GRAPH Select(V< Any > ig_index, const SelectOp &select)
V< Word32 > MaybeDuplicateComparison(const ComparisonOp &comp, OpIndex input_idx)
bool MaybeCanDuplicateGenericBinop(OpIndex input_idx, OpIndex left, OpIndex right)
V< None > REDUCE_INPUT_GRAPH Branch(V< None > ig_index, const BranchOp &branch)
static constexpr OpIndex Invalid()
Definition index.h:88
static constexpr auto rep
Definition index.h:618
#define TURBOSHAFT_REDUCER_BOILERPLATE(Name)
Definition assembler.h:823
#define LABEL_BLOCK(label)
Definition assembler.h:910
int32_t offset
V8_INLINE const Operation & Get(const Graph &graph, OpIndex index)
Definition graph.h:1231
V8_EXPORT_PRIVATE bool ShouldSkipOptimizationStep()
Definition utils.h:84
return value
Definition map-inl.h:893
i::Address Load(i::Address address)
Definition unwinder.cc:19
#define DCHECK(condition)
Definition logging.h:482
underlying_operation_t< Op > & Cast()
Definition operations.h:980