v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
wasm-shuffle-reducer.h
Go to the documentation of this file.
1// Copyright 2025 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#if !V8_ENABLE_WEBASSEMBLY
6#error This header should only be included if WebAssembly is enabled.
7#endif // !V8_ENABLE_WEBASSEMBLY
8
9#ifndef V8_COMPILER_TURBOSHAFT_WASM_SHUFFLE_REDUCER_H_
10#define V8_COMPILER_TURBOSHAFT_WASM_SHUFFLE_REDUCER_H_
11
12#include <optional>
13
22
24
26
28
29// The aim of this reducer is to reduce the size of shuffles, by looking at
30// what elements are required and we do this by looking at their users:
31// - Simd128UnaryOp ConvertLow ops
32// - Simd128BinaryOp ExtMulLow ops
33// - Simd128ShuffleOps
34// If a shuffle is only used by an operation which only reads the low half of
35// shuffle input, then we can reduce the shuffle to one which shuffles fewer
36// bytes. When multiple ConvertLow and/or ExtMulLow are chained, then the
37// required width of the shuffle can be further reduced.
38// If a shuffle is only used by a shuffle which only uses half of a shuffle
39// input, that input shuffle can also reduced.
40
41// Used by the analysis to search back from uses to their defs, looking for
42// shuffles that could be reduced.
44 public:
45 static constexpr uint16_t k8x16 = 0xFFFF;
46 static constexpr uint16_t k8x8Low = 0xFF;
47 static constexpr uint16_t k8x4Low = 0xF;
48 static constexpr uint16_t k8x2Low = 0x3;
49
50 using LaneBitSet = std::bitset<16>;
53
56
57 void AddUnaryOp(const Simd128UnaryOp& unop, LaneBitSet lanes);
58 void AddBinaryOp(const Simd128BinopOp& binop, LaneBitSet lanes);
59 void RecordOp(const Operation* op, LaneBitSet lanes);
60
64
65 const Graph& input_graph() const { return input_graph_; }
66
67 bool Visited(const Operation* op) const { return visited_.count(op); }
68
69 private:
74};
75
77 public:
78#if V8_ENABLE_WASM_DEINTERLEAVED_MEM_OPS
79 struct DeinterleaveLoadCandidate {
80 const Simd128LoadPairDeinterleaveOp::Kind kind;
81 const LoadOp& left;
82 const LoadOp& right;
83 const Simd128ShuffleOp& even_shfop;
84 const Simd128ShuffleOp& odd_shfop;
85 };
86#endif // V8_ENABLE_WASM_DEINTERLEAVED_MEM_OPS
87
92
94
95 void Process(const Operation& op);
96 void ProcessUnary(const Simd128UnaryOp& unop);
97 void ProcessBinary(const Simd128BinopOp& binop);
98 void ProcessShuffle(const Simd128ShuffleOp& shuffle_op);
99 void ProcessShuffleOfShuffle(const Simd128ShuffleOp& shuffle_op,
100 const Simd128ShuffleOp& shuffle,
101 uint8_t lower_limit, uint8_t upper_limit);
102#if V8_ENABLE_WASM_DEINTERLEAVED_MEM_OPS
103 void ProcessShuffleOfLoads(const Simd128ShuffleOp& shfop, const LoadOp& left,
104 const LoadOp& right);
105 bool CouldLoadPair(const LoadOp& load0, const LoadOp& load1) const;
106 void AddLoadMultipleCandidate(const Simd128ShuffleOp& even_shuffle,
107 const Simd128ShuffleOp& odd_shuffle,
108 const LoadOp& left, const LoadOp& right,
109 Simd128LoadPairDeinterleaveOp::Kind kind);
110#endif // V8_ENABLE_WASM_DEINTERLEAVED_MEM_OPS
111
112 bool ShouldReduce() const {
114#if V8_ENABLE_WASM_DEINTERLEAVED_MEM_OPS
115 || !deinterleave_load_candidates_.empty()
116#endif
117 ;
118 }
119
123
124 std::optional<DemandedElementAnalysis::LaneBitSet> DemandedByteLanes(
125 const Operation* op) const {
126 for (const auto& [narrow_op, lanes] : ops_to_reduce()) {
127 if (op == narrow_op) {
128 return lanes;
129 }
130 }
131 return {};
132 }
133
134 // Is only the top half (lanes 8...15) of the result of shuffle required?
135 // If so shuffle will need to be modified so that it writes the designed data
136 // into the low half lanes instead.
137 bool ShouldRewriteShuffleToLow(const Simd128ShuffleOp* shuffle) const {
138 for (auto shift_shuffle : shift_shuffles_) {
139 if (shift_shuffle == shuffle) {
140 return true;
141 }
142 }
143 return false;
144 }
145
146#ifdef DEBUG
147 bool ShouldRewriteShuffleToLow(OpIndex op) const {
150 }
151#endif
152
153 // Is the low half (lanes 0...7) result of shuffle coming exclusively from
154 // the high half of one of its operands.
155 bool DoesShuffleIntoLowHalf(const Simd128ShuffleOp* shuffle) const {
156 for (auto half_shuffle : low_half_shuffles_) {
157 if (half_shuffle == shuffle) {
158 return true;
159 }
160 }
161 return false;
162 }
163
164 // Is the high half (lanes: 8...15) result of shuffle coming exclusively from
165 // the high half of its operands.
166 bool DoesShuffleIntoHighHalf(const Simd128ShuffleOp* shuffle) const {
167 for (auto half_shuffle : high_half_shuffles_) {
168 if (half_shuffle == shuffle) {
169 return true;
170 }
171 }
172 return false;
173 }
174
175#if V8_ENABLE_WASM_DEINTERLEAVED_MEM_OPS
176 std::optional<const DeinterleaveLoadCandidate*> IsDeinterleaveCandidate(
177 const LoadOp* load) const {
178 for (auto& candidate : deinterleave_load_candidates_) {
179 if (&candidate.left == load || &candidate.right == load) {
180 return &candidate;
181 }
182 }
183 return {};
184 }
185#endif // V8_ENABLE_WASM_DEINTERLEAVED_MEM_OPS
186
187 const Graph& input_graph() const { return input_graph_; }
188
189 private:
190#if V8_ENABLE_WASM_DEINTERLEAVED_MEM_OPS
191 std::optional<const Simd128ShuffleOp*> GetOtherShuffleUser(
192 const LoadOp& left, const LoadOp& right,
193 const SmallShuffleVector& shuffles) const {
194 for (const auto* shuffle : shuffles) {
195 const auto& shuffle_left =
196 input_graph().Get(shuffle->left()).Cast<LoadOp>();
197 const auto& shuffle_right =
198 input_graph().Get(shuffle->right()).Cast<LoadOp>();
199 if (&shuffle_left == &left && &shuffle_right == &right) {
200 return shuffle;
201 }
202 }
203 return {};
204 }
205#endif // V8_ENABLE_WASM_DEINTERLEAVED_MEM_OPS
206
213#if V8_ENABLE_WASM_DEINTERLEAVED_MEM_OPS
214 SmallShuffleVector even_64x2_shuffles_{phase_zone_};
215 SmallShuffleVector odd_64x2_shuffles_{phase_zone_};
216 SmallShuffleVector even_32x4_shuffles_{phase_zone_};
217 SmallShuffleVector odd_32x4_shuffles_{phase_zone_};
218 SmallShuffleVector even_16x8_shuffles_{phase_zone_};
219 SmallShuffleVector odd_16x8_shuffles_{phase_zone_};
220 SmallShuffleVector even_8x16_shuffles_{phase_zone_};
221 SmallShuffleVector odd_8x16_shuffles_{phase_zone_};
222 base::SmallVector<DeinterleaveLoadCandidate, 8> deinterleave_load_candidates_;
223#endif // V8_ENABLE_WASM_DEINTERLEAVED_MEM_OPS
224};
225
226template <class Next>
227class WasmShuffleReducer : public Next {
228 private:
229 std::optional<WasmShuffleAnalyzer> analyzer_;
230
231#if V8_ENABLE_WASM_DEINTERLEAVED_MEM_OPS
232 struct DeinterleaveLoadShuffle {
233 const Simd128ShuffleOp* shuffle;
234 OpIndex og_index;
235 uint8_t result_index;
236 };
237
238 struct DeinterleaveLoad {
239 const LoadOp* load;
240 OpIndex og_index;
241 };
243 base::SmallVector<DeinterleaveLoadShuffle, 8> deinterleave_load_shuffles_;
244
245 void AddCombinedLoad(const LoadOp& load, OpIndex og_index) {
246 combined_loads_.emplace_back(&load, og_index);
247 }
248
249 void AddDeinterleavedShuffle(const Simd128ShuffleOp& shuffle,
250 OpIndex og_index, uint8_t result_index) {
251 deinterleave_load_shuffles_.emplace_back(&shuffle, og_index, result_index);
252 }
253
254 std::optional<const DeinterleaveLoadShuffle*> IsDeinterleaveLoadShuffle(
255 const Simd128ShuffleOp* shuffle) const {
256 for (const auto& deinterleave_load : deinterleave_load_shuffles_) {
257 if (deinterleave_load.shuffle == shuffle) {
258 return &deinterleave_load;
259 }
260 }
261 return {};
262 }
263
264 std::optional<OpIndex> MaybeAlreadyCombined(const LoadOp* load) const {
265 for (const auto& combined : combined_loads_) {
266 if (combined.load == load) {
267 return combined.og_index;
268 }
269 }
270 return {};
271 }
272#endif // V8_ENABLE_WASM_DEINTERLEAVED_MEM_OPS
273
274 public:
275 TURBOSHAFT_REDUCER_BOILERPLATE(WasmShuffleReducer)
276
277 void Analyze() {
278 analyzer_.emplace(__ phase_zone(), __ input_graph());
279 analyzer_->Run();
280 Next::Analyze();
281 }
282
283#if V8_ENABLE_WASM_DEINTERLEAVED_MEM_OPS
284 OpIndex REDUCE_INPUT_GRAPH(Load)(OpIndex ig_index, const LoadOp& load) {
285 LABEL_BLOCK(no_change) {
286 return Next::ReduceInputGraphLoad(ig_index, load);
287 }
288 if (ShouldSkipOptimizationStep()) goto no_change;
289 if (load.loaded_rep != MemoryRepresentation::Simd128()) goto no_change;
290
291 if (auto maybe_combined = MaybeAlreadyCombined(&load)) {
292 return maybe_combined.value();
293 }
294
295 if (auto maybe_candidate = analyzer_->IsDeinterleaveCandidate(&load)) {
296 const auto* candidate = maybe_candidate.value();
297 const LoadOp& left = candidate->left;
298 const LoadOp& right = candidate->right;
299#ifdef DEBUG
300 DCHECK(!MaybeAlreadyCombined(&left));
301 DCHECK(!MaybeAlreadyCombined(&right));
302 CHECK_EQ(left.kind, right.kind);
303 // The 'left' load of the candidate is the one shuffling the
304 // even-numbered elements, which includes zero. So, the left load
305 // accesses the lowest address.
306 CHECK_LT(left.offset, right.offset);
307#endif // DEBUG
308
309 V<WordPtr> base = __ MapToNewGraph(load.base());
311 int offset = left.offset;
312 if (load.index().has_value()) {
313 index = __ MapToNewGraph(load.index().value());
314 if (offset != 0) {
315 index = __ WordPtrAdd(index, offset);
316 }
317 } else {
318 index = __ IntPtrConstant(offset);
319 }
320
321 OpIndex og_index = __ Simd128LoadPairDeinterleave(base, index, load.kind,
322 candidate->kind);
323 AddCombinedLoad(left, og_index);
324 AddCombinedLoad(right, og_index);
325
326 AddDeinterleavedShuffle(candidate->even_shfop, og_index, 0);
327 AddDeinterleavedShuffle(candidate->odd_shfop, og_index, 1);
328 return og_index;
329 }
330 goto no_change;
331 }
332#endif // V8_ENABLE_WASM_DEINTERLEAVED_MEM_OPS
333
335 const Simd128ShuffleOp& shuffle) {
336 LABEL_BLOCK(no_change) {
337 return Next::ReduceInputGraphSimd128Shuffle(ig_index, shuffle);
338 }
339 if (ShouldSkipOptimizationStep()) goto no_change;
340
341 if (shuffle.kind != Simd128ShuffleOp::Kind::kI8x16) goto no_change;
342
343 auto og_left = __ MapToNewGraph(shuffle.left());
344 auto og_right = __ MapToNewGraph(shuffle.right());
345 std::array<uint8_t, kSimd128Size> shuffle_bytes = {0};
346 std::copy(shuffle.shuffle, shuffle.shuffle + kSimd128Size,
347 shuffle_bytes.begin());
348#if V8_ENABLE_WASM_DEINTERLEAVED_MEM_OPS
349 if (auto maybe_deinterleaved_load = IsDeinterleaveLoadShuffle(&shuffle)) {
350 const auto* deinterleaved_load = maybe_deinterleaved_load.value();
351 return __ Projection(deinterleaved_load->og_index,
352 deinterleaved_load->result_index,
354 }
355#endif // V8_ENABLE_WASM_DEINTERLEAVED_MEM_OPS
356
357 constexpr size_t half_lanes = kSimd128Size / 2;
358
359 bool does_shuffle_into_low_half =
360 analyzer_->DoesShuffleIntoLowHalf(&shuffle);
361 bool does_shuffle_into_high_half =
362 analyzer_->DoesShuffleIntoHighHalf(&shuffle);
363
364 // Shuffles to adjust because one, or both, of their inputs have been
365 // narrowed.
366 if (does_shuffle_into_low_half && does_shuffle_into_high_half) {
367 DCHECK(analyzer_->ShouldRewriteShuffleToLow(shuffle.left()));
368 DCHECK(analyzer_->ShouldRewriteShuffleToLow(shuffle.right()));
369 // We have a shuffle where both inputs have been reduced and shifted, so
370 // something like this:
371 // |--------|--------|---a1---|---b3---| shf0 = (a, b)
372 //
373 // |--------|--------|---c2---|---d4---| shf1 = (c, d)
374 //
375 // |---a1---|---b3---|---c2---|---d4---| shf2 = (shf0, shf1)
376 //
377 // Is being changed into this:
378 // |---a1---|---b3---|--------|--------| shf0 = (a, b)
379 //
380 // |---c2---|---d4---|--------|--------| shf1 = (c, d)
381 //
382 // |---a1---|---b3---|---c2---|---d4---| shf2 = (shf0, shf1)
383 std::transform(shuffle_bytes.begin(), shuffle_bytes.end(),
384 shuffle_bytes.begin(),
385 [](uint8_t lane) { return lane - half_lanes; });
386 } else if (does_shuffle_into_low_half) {
387 DCHECK(analyzer_->ShouldRewriteShuffleToLow(shuffle.left()) ||
388 analyzer_->ShouldRewriteShuffleToLow(shuffle.right()));
389 DCHECK_NE(analyzer_->ShouldRewriteShuffleToLow(shuffle.left()),
390 analyzer_->ShouldRewriteShuffleToLow(shuffle.right()));
391 // We have a shuffle where both inputs have been reduced and one has
392 // been shifted, so something like this:
393 // |--------|--------|---a1---|---b3---| shf0 = (a, b)
394 //
395 // |---c2---|---d4---|--------|--------| shf1 = (c, d)
396 //
397 // |---a1---|---b3---|---c2---|---d4---| shf2 = (shf0, shf1)
398 //
399 // Is being changed into this:
400 // |---a1---|---b3---|--------|--------| shf0 = (a, b)
401 //
402 // |---c2---|---d4---|--------|--------| shf1 = (c, d)
403 //
404 // |---a1---|---b3---|---c2---|---d4---| shf2 = (shf0, shf1)
405 //
406 // Original shf2 lane-wise shuffle: [2, 3, 4, 5]
407 // Needs to be converted to: [0, 1, 4, 5]
408 std::transform(shuffle_bytes.begin(), shuffle_bytes.begin() + half_lanes,
409 shuffle_bytes.begin(),
410 [](uint8_t lane) { return lane - half_lanes; });
411 } else if (does_shuffle_into_high_half) {
412 DCHECK(analyzer_->ShouldRewriteShuffleToLow(shuffle.left()) ||
413 analyzer_->ShouldRewriteShuffleToLow(shuffle.right()));
414 DCHECK_NE(analyzer_->ShouldRewriteShuffleToLow(shuffle.left()),
415 analyzer_->ShouldRewriteShuffleToLow(shuffle.right()));
416 // We have a shuffle where both inputs have been reduced and one has
417 // been shifted, so something like this:
418 // |---a1---|---b3---|--------|--------| shf0 = (a, b)
419 //
420 // |--------|--------|---c2---|---d4---| shf1 = (c, d)
421 //
422 // |---a1---|---b3---|---c2---|---d4---| shf2 = (shf0, shf1)
423 //
424 // Is being changed into this:
425 // |---a1---|---b3---|--------|--------| shf0 = (a, b)
426 //
427 // |---c2---|---d4---|--------|--------| shf1 = (c, d)
428 //
429 // |---a1---|---b3---|---c2---|---d4---| shf2 = (shf0, shf1)
430 std::transform(shuffle_bytes.begin() + half_lanes, shuffle_bytes.end(),
431 shuffle_bytes.begin() + half_lanes,
432 [](uint8_t lane) { return lane - half_lanes; });
433 }
434
435 if (does_shuffle_into_low_half || does_shuffle_into_high_half) {
436 return __ Simd128Shuffle(og_left, og_right,
437 Simd128ShuffleOp::Kind::kI8x16,
438 shuffle_bytes.data());
439 }
440
441 // Shuffles to narrow.
442 if (auto maybe_lanes = analyzer_->DemandedByteLanes(&shuffle)) {
443 auto lanes = maybe_lanes.value();
444 if (analyzer_->ShouldRewriteShuffleToLow(&shuffle)) {
446 // Take the top half of the shuffle bytes and these will now write
447 // those values into the low half of the result instead.
448 std::copy(shuffle.shuffle + half_lanes, shuffle.shuffle + kSimd128Size,
449 shuffle_bytes.begin());
450 } else {
451 // Just truncate the lower half.
452 std::copy(shuffle.shuffle, shuffle.shuffle + half_lanes,
453 shuffle_bytes.begin());
454 }
455
457 return __ Simd128Shuffle(og_left, og_right,
458 Simd128ShuffleOp::Kind::kI8x2,
459 shuffle_bytes.data());
460 } else if (lanes == DemandedElementAnalysis::k8x4Low) {
461 return __ Simd128Shuffle(og_left, og_right,
462 Simd128ShuffleOp::Kind::kI8x4,
463 shuffle_bytes.data());
464 } else if (lanes == DemandedElementAnalysis::k8x8Low) {
465 return __ Simd128Shuffle(og_left, og_right,
466 Simd128ShuffleOp::Kind::kI8x8,
467 shuffle_bytes.data());
468 }
469 }
470 goto no_change;
471 }
472};
473
474} // namespace v8::internal::compiler::turboshaft
475
477
478#endif // V8_COMPILER_TURBOSHAFT_WASM_SHUFFLE_REDUCER_H_
#define REDUCE_INPUT_GRAPH(operation)
Builtins::Kind kind
Definition builtins.cc:40
void emplace_back(Args &&... args)
DemandedElementAnalysis(Zone *phase_zone, const Graph &input_graph)
void AddUnaryOp(const Simd128UnaryOp &unop, LaneBitSet lanes)
void AddBinaryOp(const Simd128BinopOp &binop, LaneBitSet lanes)
static constexpr MemoryRepresentation Simd128()
static constexpr RegisterRepresentation Simd128()
WasmShuffleAnalyzer(Zone *phase_zone, const Graph &input_graph)
const DemandedElementAnalysis::DemandedElementMap & ops_to_reduce() const
std::optional< DemandedElementAnalysis::LaneBitSet > DemandedByteLanes(const Operation *op) const
bool ShouldRewriteShuffleToLow(const Simd128ShuffleOp *shuffle) const
bool DoesShuffleIntoLowHalf(const Simd128ShuffleOp *shuffle) const
void ProcessShuffle(const Simd128ShuffleOp &shuffle_op)
void ProcessShuffleOfShuffle(const Simd128ShuffleOp &shuffle_op, const Simd128ShuffleOp &shuffle, uint8_t lower_limit, uint8_t upper_limit)
bool DoesShuffleIntoHighHalf(const Simd128ShuffleOp *shuffle) const
OpIndex REDUCE_INPUT_GRAPH Simd128Shuffle(OpIndex ig_index, const Simd128ShuffleOp &shuffle)
#define TURBOSHAFT_REDUCER_BOILERPLATE(Name)
Definition assembler.h:823
#define LABEL_BLOCK(label)
Definition assembler.h:910
int32_t offset
V8_INLINE const Operation & Get(const Graph &graph, OpIndex index)
Definition graph.h:1231
V8_EXPORT_PRIVATE bool ShouldSkipOptimizationStep()
Definition utils.h:84
SmallZoneVector< const Simd128ShuffleOp *, 8 > SmallShuffleVector
static const Operator * IntPtrConstant(CommonOperatorBuilder *common, intptr_t value)
constexpr int kSimd128Size
Definition globals.h:706
Tagged< To > Cast(Tagged< From > value, const v8::SourceLocation &loc=INIT_SOURCE_LOCATION_IN_DEBUG)
Definition casting.h:150
i::Address Load(i::Address address)
Definition unwinder.cc:19
#define CHECK_LT(lhs, rhs)
#define DCHECK_NE(v1, v2)
Definition logging.h:486
#define CHECK_EQ(lhs, rhs)
#define DCHECK(condition)
Definition logging.h:482
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
#define V8_EXPORT_PRIVATE
Definition macros.h:460