v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
wasm-revec-reducer.h
Go to the documentation of this file.
1// Copyright 2023 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_COMPILER_TURBOSHAFT_WASM_REVEC_REDUCER_H_
6#define V8_COMPILER_TURBOSHAFT_WASM_REVEC_REDUCER_H_
7
8#if !V8_ENABLE_WEBASSEMBLY
9#error This header should only be included if WebAssembly is enabled.
10#endif // !V8_ENABLE_WEBASSEMBLY
11
12#include <algorithm>
13
20
22
23#define SIMD256_LOADTRANSFORM_OP(V) \
24 V(8x8S, 8x16S) \
25 V(8x8U, 8x16U) \
26 V(16x4S, 16x8S) \
27 V(16x4U, 16x8U) \
28 V(32x2S, 32x4S) \
29 V(32x2U, 32x4U) \
30 V(8Splat, 8Splat) \
31 V(16Splat, 16Splat) \
32 V(32Splat, 32Splat) \
33 V(64Splat, 64Splat)
34
35#define SIMD256_UNARY_SIMPLE_OP(V) \
36 V(S128Not, S256Not) \
37 V(I8x16Abs, I8x32Abs) \
38 V(I8x16Neg, I8x32Neg) \
39 V(I16x8ExtAddPairwiseI8x16S, I16x16ExtAddPairwiseI8x32S) \
40 V(I16x8ExtAddPairwiseI8x16U, I16x16ExtAddPairwiseI8x32U) \
41 V(I32x4ExtAddPairwiseI16x8S, I32x8ExtAddPairwiseI16x16S) \
42 V(I32x4ExtAddPairwiseI16x8U, I32x8ExtAddPairwiseI16x16U) \
43 V(I16x8Abs, I16x16Abs) \
44 V(I16x8Neg, I16x16Neg) \
45 V(I32x4Abs, I32x8Abs) \
46 V(I32x4Neg, I32x8Neg) \
47 V(F32x4Abs, F32x8Abs) \
48 V(F32x4Neg, F32x8Neg) \
49 V(F32x4Sqrt, F32x8Sqrt) \
50 V(F64x2Abs, F64x4Abs) \
51 V(F64x2Neg, F64x4Neg) \
52 V(F64x2Sqrt, F64x4Sqrt) \
53 V(I32x4UConvertF32x4, I32x8UConvertF32x8) \
54 V(I32x4SConvertF32x4, I32x8SConvertF32x8) \
55 V(F32x4UConvertI32x4, F32x8UConvertI32x8) \
56 V(F32x4SConvertI32x4, F32x8SConvertI32x8) \
57 V(I32x4RelaxedTruncF32x4S, I32x8RelaxedTruncF32x8S) \
58 V(I32x4RelaxedTruncF32x4U, I32x8RelaxedTruncF32x8U)
59
60#define SIMD256_UNARY_SIGN_EXTENSION_OP(V) \
61 V(I64x2SConvertI32x4Low, I64x4SConvertI32x4, I64x2SConvertI32x4High) \
62 V(I64x2UConvertI32x4Low, I64x4UConvertI32x4, I64x2UConvertI32x4High) \
63 V(I32x4SConvertI16x8Low, I32x8SConvertI16x8, I32x4SConvertI16x8High) \
64 V(I32x4UConvertI16x8Low, I32x8UConvertI16x8, I32x4UConvertI16x8High) \
65 V(I16x8SConvertI8x16Low, I16x16SConvertI8x16, I16x8SConvertI8x16High) \
66 V(I16x8UConvertI8x16Low, I16x16UConvertI8x16, I16x8UConvertI8x16High)
67
68#define SIMD256_BINOP_SIMPLE_OP(V) \
69 V(I8x16Eq, I8x32Eq) \
70 V(I8x16Ne, I8x32Ne) \
71 V(I8x16GtS, I8x32GtS) \
72 V(I8x16GtU, I8x32GtU) \
73 V(I8x16GeS, I8x32GeS) \
74 V(I8x16GeU, I8x32GeU) \
75 V(I16x8Eq, I16x16Eq) \
76 V(I16x8Ne, I16x16Ne) \
77 V(I16x8GtS, I16x16GtS) \
78 V(I16x8GtU, I16x16GtU) \
79 V(I16x8GeS, I16x16GeS) \
80 V(I16x8GeU, I16x16GeU) \
81 V(I32x4Eq, I32x8Eq) \
82 V(I32x4Ne, I32x8Ne) \
83 V(I32x4GtS, I32x8GtS) \
84 V(I32x4GtU, I32x8GtU) \
85 V(I32x4GeS, I32x8GeS) \
86 V(I32x4GeU, I32x8GeU) \
87 V(F32x4Eq, F32x8Eq) \
88 V(F32x4Ne, F32x8Ne) \
89 V(F32x4Lt, F32x8Lt) \
90 V(F32x4Le, F32x8Le) \
91 V(F64x2Eq, F64x4Eq) \
92 V(F64x2Ne, F64x4Ne) \
93 V(F64x2Lt, F64x4Lt) \
94 V(F64x2Le, F64x4Le) \
95 V(S128And, S256And) \
96 V(S128AndNot, S256AndNot) \
97 V(S128Or, S256Or) \
98 V(S128Xor, S256Xor) \
99 V(I8x16SConvertI16x8, I8x32SConvertI16x16) \
100 V(I8x16UConvertI16x8, I8x32UConvertI16x16) \
101 V(I8x16Add, I8x32Add) \
102 V(I8x16AddSatS, I8x32AddSatS) \
103 V(I8x16AddSatU, I8x32AddSatU) \
104 V(I8x16Sub, I8x32Sub) \
105 V(I8x16SubSatS, I8x32SubSatS) \
106 V(I8x16SubSatU, I8x32SubSatU) \
107 V(I8x16MinS, I8x32MinS) \
108 V(I8x16MinU, I8x32MinU) \
109 V(I8x16MaxS, I8x32MaxS) \
110 V(I8x16MaxU, I8x32MaxU) \
111 V(I8x16RoundingAverageU, I8x32RoundingAverageU) \
112 V(I16x8SConvertI32x4, I16x16SConvertI32x8) \
113 V(I16x8UConvertI32x4, I16x16UConvertI32x8) \
114 V(I16x8Add, I16x16Add) \
115 V(I16x8AddSatS, I16x16AddSatS) \
116 V(I16x8AddSatU, I16x16AddSatU) \
117 V(I16x8Sub, I16x16Sub) \
118 V(I16x8SubSatS, I16x16SubSatS) \
119 V(I16x8SubSatU, I16x16SubSatU) \
120 V(I16x8Mul, I16x16Mul) \
121 V(I16x8MinS, I16x16MinS) \
122 V(I16x8MinU, I16x16MinU) \
123 V(I16x8MaxS, I16x16MaxS) \
124 V(I16x8MaxU, I16x16MaxU) \
125 V(I16x8RoundingAverageU, I16x16RoundingAverageU) \
126 V(I32x4Add, I32x8Add) \
127 V(I32x4Sub, I32x8Sub) \
128 V(I32x4Mul, I32x8Mul) \
129 V(I32x4MinS, I32x8MinS) \
130 V(I32x4MinU, I32x8MinU) \
131 V(I32x4MaxS, I32x8MaxS) \
132 V(I32x4MaxU, I32x8MaxU) \
133 V(I32x4DotI16x8S, I32x8DotI16x16S) \
134 V(I64x2Add, I64x4Add) \
135 V(I64x2Sub, I64x4Sub) \
136 V(I64x2Mul, I64x4Mul) \
137 V(I64x2Eq, I64x4Eq) \
138 V(I64x2Ne, I64x4Ne) \
139 V(I64x2GtS, I64x4GtS) \
140 V(I64x2GeS, I64x4GeS) \
141 V(F32x4Add, F32x8Add) \
142 V(F32x4Sub, F32x8Sub) \
143 V(F32x4Mul, F32x8Mul) \
144 V(F32x4Div, F32x8Div) \
145 V(F32x4Min, F32x8Min) \
146 V(F32x4Max, F32x8Max) \
147 V(F32x4Pmin, F32x8Pmin) \
148 V(F32x4Pmax, F32x8Pmax) \
149 V(F64x2Add, F64x4Add) \
150 V(F64x2Sub, F64x4Sub) \
151 V(F64x2Mul, F64x4Mul) \
152 V(F64x2Div, F64x4Div) \
153 V(F64x2Min, F64x4Min) \
154 V(F64x2Max, F64x4Max) \
155 V(F64x2Pmin, F64x4Pmin) \
156 V(F64x2Pmax, F64x4Pmax) \
157 V(F32x4RelaxedMin, F32x8RelaxedMin) \
158 V(F32x4RelaxedMax, F32x8RelaxedMax) \
159 V(F64x2RelaxedMin, F64x4RelaxedMin) \
160 V(F64x2RelaxedMax, F64x4RelaxedMax) \
161 V(I16x8DotI8x16I7x16S, I16x16DotI8x32I7x32S)
162
163#define SIMD256_BINOP_SIGN_EXTENSION_OP(V) \
164 V(I16x8ExtMulLowI8x16S, I16x16ExtMulI8x16S, I16x8ExtMulHighI8x16S) \
165 V(I16x8ExtMulLowI8x16U, I16x16ExtMulI8x16U, I16x8ExtMulHighI8x16U) \
166 V(I32x4ExtMulLowI16x8S, I32x8ExtMulI16x8S, I32x4ExtMulHighI16x8S) \
167 V(I32x4ExtMulLowI16x8U, I32x8ExtMulI16x8U, I32x4ExtMulHighI16x8U) \
168 V(I64x2ExtMulLowI32x4S, I64x4ExtMulI32x4S, I64x2ExtMulHighI32x4S) \
169 V(I64x2ExtMulLowI32x4U, I64x4ExtMulI32x4U, I64x2ExtMulHighI32x4U)
170
171#define SIMD256_SHIFT_OP(V) \
172 V(I16x8Shl, I16x16Shl) \
173 V(I16x8ShrS, I16x16ShrS) \
174 V(I16x8ShrU, I16x16ShrU) \
175 V(I32x4Shl, I32x8Shl) \
176 V(I32x4ShrS, I32x8ShrS) \
177 V(I32x4ShrU, I32x8ShrU) \
178 V(I64x2Shl, I64x4Shl) \
179 V(I64x2ShrU, I64x4ShrU)
180
181#define SIMD256_TERNARY_OP(V) \
182 V(S128Select, S256Select) \
183 V(F32x4Qfma, F32x8Qfma) \
184 V(F32x4Qfms, F32x8Qfms) \
185 V(F64x2Qfma, F64x4Qfma) \
186 V(F64x2Qfms, F64x4Qfms) \
187 V(I8x16RelaxedLaneSelect, I8x32RelaxedLaneSelect) \
188 V(I16x8RelaxedLaneSelect, I16x16RelaxedLaneSelect) \
189 V(I32x4RelaxedLaneSelect, I32x8RelaxedLaneSelect) \
190 V(I64x2RelaxedLaneSelect, I64x4RelaxedLaneSelect) \
191 V(I32x4DotI8x16I7x16AddS, I32x8DotI8x32I7x32AddS)
192
193#define SIMD256_SPLAT_OP(V) \
194 V(I8x16, I8x32) \
195 V(I16x8, I16x16) \
196 V(I32x4, I32x8) \
197 V(I64x2, I64x4) \
198 V(F32x4, F32x8) \
199 V(F64x2, F64x4)
200
201#define REDUCE_SEED_KIND(V) \
202 V(I64x2Add) \
203 V(I32x4Add) \
204 V(I8x16Add) \
205 V(I16x8AddSatS) \
206 V(I16x8AddSatU) \
207 V(I8x16AddSatS) \
208 V(I8x16AddSatU) \
209 V(I16x8SConvertI32x4) \
210 V(I16x8UConvertI32x4) \
211 V(I8x16SConvertI16x8) \
212 V(I8x16UConvertI16x8)
213
215
217 public:
218 // Current only support merge 2 Simd128 into Simd256
219 static constexpr int kSize = kSimd256Size / kSimd128Size;
221 indexes_[0] = a;
222 indexes_[1] = b;
223 }
224 size_t size() const { return kSize; }
225 OpIndex operator[](int i) const { return indexes_[i]; }
226
227 bool operator==(const NodeGroup& other) const {
228 return indexes_[0] == other.indexes_[0] && indexes_[1] == other.indexes_[1];
229 }
230 bool operator!=(const NodeGroup& other) const {
231 return indexes_[0] != other.indexes_[0] || indexes_[1] != other.indexes_[1];
232 }
233
234 const OpIndex* begin() const { return indexes_; }
235 const OpIndex* end() const { return indexes_ + kSize; }
236
237 private:
239};
240
241class ForcePackNode;
242class ShufflePackNode;
243class BundlePackNode;
244
245// A PackNode consists of a fixed number of isomorphic simd128 nodes which can
246// execute in parallel and convert to a 256-bit simd node later. The nodes in a
247// PackNode must satisfy that they can be scheduled in the same basic block and
248// are mutually independent.
249class PackNode : public NON_EXPORTED_BASE(ZoneObject) {
250 public:
251 enum NodeType {
252 kDefault, // Nodes are naturally packed without special attributes.
253 kForcePackNode, // Nodes do not satisfy some packing rule, but can be
254 // forcely coalesced with a Pack128To256 operation. E.g.
255 // inconsecutive loads. In x64, we can use the vinsertf128
256 // instruction to forcely coalescing two 128-bit values.
257 kShufflePackNode, // Nodes are Simd128Shuffle operations with specific
258 // info.
259 kBundlePackNode, // Nodes representing a i8x16/i16x8 to f32x4 conversion.
260 kIntersectPackNode, // One or more nodes already packed by an existing
261 // PackNode.
262 };
263
264 explicit PackNode(Zone* zone, const NodeGroup& node_group,
265 NodeType node_type = kDefault)
266 : nodes_(node_group),
268 operands_(zone),
269 node_type_(node_type) {}
270 const NodeGroup& nodes() const { return nodes_; }
271 bool IsSame(const NodeGroup& node_group) const {
272 return nodes_ == node_group;
273 }
274 bool IsSame(const PackNode& other) const { return nodes_ == other.nodes_; }
277
278 bool IsDefaultPackNode() const { return node_type_ == kDefault; }
279 bool IsForcePackNode() const { return node_type_ == kForcePackNode; }
281 bool IsBundlePackNode() const { return node_type_ == kBundlePackNode; }
282 // We will force-pack nodes for both ForcePackNode and IntersectPackNode.
283 bool is_force_packing() const {
285 }
286
289 return reinterpret_cast<ForcePackNode*>(this);
290 }
293 return reinterpret_cast<ShufflePackNode*>(this);
294 }
297 return reinterpret_cast<BundlePackNode*>(this);
298 }
299
300 PackNode* GetOperand(int index) const {
301 DCHECK_LT(index, operands_.size());
302 DCHECK(operands_[index]);
303 return operands_[index];
304 }
305
306 void SetOperand(int index, PackNode* pnode) {
307 DCHECK_GE(index, 0);
308 if (operands_.size() < static_cast<size_t>(index + 1)) {
309 operands_.resize(index + 1);
310 }
311 operands_[index] = pnode;
312 }
313
317
318 void Print(Graph* graph) const;
319
320 private:
321 friend class ForcePackNode;
326};
327
328class ForcePackNode : public PackNode {
329 public:
331 kSplat, // force pack 2 identical nodes or 2 loads at the same address
332 kGeneral, // force pack 2 different nodes
333 };
334 explicit ForcePackNode(Zone* zone, const NodeGroup& node_group,
335 ForcePackType type)
336 : PackNode(zone, node_group, kForcePackNode), force_pack_type_(type) {}
337
339
340 private:
342};
343
344class ShufflePackNode : public PackNode {
345 public:
347 public:
348 enum class Kind {
352#ifdef V8_TARGET_ARCH_X64
353 kShufd,
354 kShufps,
355 kS32x8UnpackLow,
356 kS32x8UnpackHigh,
357#endif // V8_TARGET_ARCH_X64
358 };
359 union Param {
360 int splat_index = 0;
361#ifdef V8_TARGET_ARCH_X64
362 uint8_t shufd_control;
363 uint8_t shufps_control;
364#endif // V8_TARGET_ARCH_X64
365 };
366
367 Kind kind() { return kind_; }
369
380
381#ifdef V8_TARGET_ARCH_X64
382 void set_shufd_control(uint8_t control) {
383 DCHECK_EQ(kind_, Kind::kShufd);
384 param_.shufd_control = control;
385 }
386 uint8_t shufd_control() const {
387 DCHECK_EQ(kind_, Kind::kShufd);
388 return param_.shufd_control;
389 }
390
391 void set_shufps_control(uint8_t control) {
392 DCHECK_EQ(kind_, Kind::kShufps);
393 param_.shufps_control = control;
394 }
395 uint8_t shufps_control() const {
396 DCHECK_EQ(kind_, Kind::kShufps);
397 return param_.shufps_control;
398 }
399#endif // V8_TARGET_ARCH_X64
400
401 private:
404 };
405
406 ShufflePackNode(Zone* zone, const NodeGroup& node_group,
408 : PackNode(zone, node_group, kShufflePackNode) {
410 }
411
412 SpecificInfo& info() { return info_; }
413
414 private:
416};
417
418// BundlePackNode is used to represent a i8x16/i16x8 to f32x4 conversion.
419// The conversion extracts 4 lanes of i8x16/i16x8 input(base), start from lane
420// index(offset), sign/zero(is_sign_extract) extends the extracted lanes to
421// i32x4, then converts i32x4/u32x4(is_sign_convert) to f32x4.
422class BundlePackNode : public PackNode {
423 public:
424 BundlePackNode(Zone* zone, const NodeGroup& node_group, OpIndex base,
425 int8_t offset, uint8_t lane_size, bool is_sign_extract,
426 bool is_sign_convert)
427 : PackNode(zone, node_group, kBundlePackNode) {
428 base_ = base;
429 offset_ = offset;
433 }
434
435 OpIndex base() const { return base_; }
436 uint8_t offset() const { return offset_; }
437 uint8_t lane_size() const { return lane_size_; }
438 bool is_sign_extract() const { return is_sign_extract_; }
439 bool is_sign_convert() const { return is_sign_convert_; }
440
441 private:
443 uint8_t offset_;
444 uint8_t lane_size_;
447};
448
449// An auxillary tree structure with a set of PackNodes based on the Superword
450// Level Parallelism (SLP) vectorization technique. The BuildTree method will
451// start from a selected root, e.g. a group of consecutive stores, and extend
452// through value inputs to create new PackNodes if the inputs are valid, or
453// conclude that the current PackNode is a leaf and terminate the tree.
454// Below is an example of SLPTree where loads and stores in each PackNode are
455// all consecutive.
456// [Load0, Load1] [Load2, Load3]
457// \ /
458// [Add0, Add1]
459// |
460// [Store0, Store1]
461class SLPTree : public NON_EXPORTED_BASE(ZoneObject) {
462 public:
463 explicit SLPTree(Graph& graph, WasmRevecAnalyzer* analyzer, Zone* zone)
464 : graph_(graph),
465 analyzer_(analyzer),
467 root_(nullptr),
470
471 // Information for extending i8x16/i16x8 to f32x4
474 uint8_t start_lane; // 0 or 8
475 uint8_t lane_size; // 1(i8) or 2(i16)
476 bool is_sign_extract; // extract_lane_s or extract_lane_u
477 bool is_sign_convert; // f32x4.convert_i32x4_s or f32x4.convert_i32x4_u
478 };
479
480 // Per-lane information for extending i8x16/i16x8 to f32x4
488
489 PackNode* BuildTree(const NodeGroup& roots);
491
500
501 void Print(const char* info);
502
503 private:
504 // This is the recursive part of BuildTree.
505 PackNode* BuildTreeRec(const NodeGroup& node_group, unsigned depth);
506
507 // Baseline: create a new PackNode, and return.
508 PackNode* NewPackNode(const NodeGroup& node_group);
509
510 // Baseline: create a new IntersectPackNode that contains nodes existing in
511 // another PackNode, and return.
512 PackNode* NewIntersectPackNode(const NodeGroup& node_group);
513
514 PackNode* NewForcePackNode(const NodeGroup& node_group,
516 const Graph& graph);
517 BundlePackNode* NewBundlePackNode(const NodeGroup& node_group, OpIndex base,
518 int8_t offset, uint8_t lane_size,
519 bool is_sign_extract, bool is_sign_convert);
520
521 // Recursion: create a new PackNode and call BuildTreeRec recursively
522 PackNode* NewPackNodeAndRecurs(const NodeGroup& node_group, int start_index,
523 int count, unsigned depth);
524
526 unsigned depth);
527
530
531 // Try match the following pattern:
532 // 1. simd128_load64zero(memargs)
533 // 2. simd128_const[0,0,0,0]
534 // 3. simd128_shuffle(1, 2, shuffle_arg0)
535 // 4. simd128_shuffle(1, 2, shuffle_arg1)
536 // To:
537 // 1. simd256_load8x8u(memargs)
539 const uint8_t* shuffle0,
540 const uint8_t* shuffle1);
541
542#ifdef V8_TARGET_ARCH_X64
543 // The Simd Shuffle in wasm is a high level representation, and it can map to
544 // different x64 intructions base on its shuffle array. And the performance of
545 // different intructions are varies greatly.
546 // For example, if the shuffle array are totally random, there is a high
547 // probability to use a general shuffle. Under x64, the general shuffle may
548 // consists of a series mov, a vpinsrq and a vpshufb. It's performance cost is
549 // high. However, if the shuffle array is in an particular pattern, for
550 // example: [0, 1, 2, 3, 32, 33, 34, 35, 4, 5, 6, 7, 36, 37, 38, 39,
551 // 16, 17, 18, 19, 48, 49, 50, 51, 20, 21, 22, 23, 52, 53, 54, 55]
552 // we can use a single vpunpckldq instruction. It's performance cost is much
553 // more lower than a general one.
554 //
555 // This function is used to try to match the shuffle array to the
556 // x64 instructions which has the best performance.
557 ShufflePackNode* X64TryMatch256Shuffle(const NodeGroup& node_group,
558 const uint8_t* shuffle0,
559 const uint8_t* shuffle1);
560#endif // V8_TARGET_ARCH_X64
561
562 bool TryMatchExtendIntToF32x4(const NodeGroup& node_group,
563 ExtendIntToF32x4Info* info);
564 std::optional<ExtendIntToF32x4Info> TryGetExtendIntToF32x4Info(OpIndex index);
565
567 bool CanBePacked(const NodeGroup& node_group);
568 bool IsEqual(const OpIndex node0, const OpIndex node1);
569 // Check if the nodes in the node_group depend on the result of each other.
570 bool HasInputDependencies(const NodeGroup& node_group);
571
572 Graph& graph() const { return graph_; }
573 Zone* zone() const { return phase_zone_; }
574
579 // Maps a specific node to PackNode.
581 // Maps a node to multiple IntersectPackNodes.
583 static constexpr size_t RecursionMaxDepth = 1000;
584};
585
587 public:
589 : data_(data),
590 graph_(graph),
591 phase_zone_(zone),
592 store_seeds_(zone),
593 reduce_seeds_(zone),
596 should_reduce_(false),
597 use_map_(nullptr) {
598 Run();
599 }
600
601 void Run();
602
603 void MergeSLPTree(SLPTree& slp_tree);
604 bool ShouldReduce() const { return should_reduce_; }
605
606 PackNode* GetPackNode(const OpIndex ig_index) {
607 auto itr = revectorizable_node_.find(ig_index);
608 if (itr != revectorizable_node_.end()) {
609 return itr->second;
610 }
611 return nullptr;
612 }
613
615 auto I = revectorizable_intersect_node_.find(node);
616 if (I != revectorizable_intersect_node_.end()) {
617 return &(I->second);
618 }
619 return nullptr;
620 }
621
622 const OpIndex GetReducedInput(const PackNode* pnode, const int index = 0) {
623 if (index >= static_cast<int>(pnode->GetOperandsSize())) {
624 return OpIndex::Invalid();
625 }
626 return pnode->GetOperand(index)->RevectorizedNode();
627 }
628
629 const Operation& GetStartOperation(const PackNode* pnode, const OpIndex node,
630 const Operation& op) {
631 DCHECK(pnode);
632 const OpIndex start = pnode->nodes()[0];
633 return (start == node) ? op : graph_.Get(start);
634 }
635
637 return use_map_->uses(node);
638 }
639
640 private:
641 bool IsSupportedReduceSeed(const Operation& op);
642 void ProcessBlock(const Block& block);
643 bool DecideVectorize();
644 void Print(const char* info);
645
651 const wasm::WasmModule* module_ = data_->wasm_module();
657};
658
659template <class Next>
660class WasmRevecReducer : public UniformReducerAdapter<WasmRevecReducer, Next> {
661 public:
664
666 OpIndex og_index) {
667 const auto lane = base::checked_cast<uint8_t>(
668 std::find(pnode->nodes().begin(), pnode->nodes().end(), ig_index) -
669 pnode->nodes().begin());
670
671 // Force PackNode has a dedicated use in SimdPack128To256Op.
672 if (pnode->is_force_packing()) {
673 SimdPack128To256Op& op = __ output_graph()
674 .Get(pnode -> RevectorizedNode())
675 .template Cast<SimdPack128To256Op>();
676 return lane == 0 ? op.left() : op.right();
677 }
678
679 for (auto use : analyzer_.uses(ig_index)) {
680 // Extract128 is needed for the additional Simd128 store before
681 // Simd256 store in case of OOB trap at the higher 128-bit
682 // address.
683 auto use_pnode = analyzer_.GetPackNode(use);
684 if (use_pnode != nullptr && !use_pnode->is_force_packing()) {
685 DCHECK_GE(use_pnode->nodes().size(), 2);
686 if (__ input_graph().Get(use).opcode != Opcode::kStore ||
687 use_pnode->nodes()[0] != use ||
688 use_pnode->nodes()[0] > use_pnode->nodes()[1])
689 continue;
690 }
691
692 return __ Simd256Extract128Lane(og_index, lane);
693 }
694
695 return OpIndex::Invalid();
696 }
697
699 V<Simd128> ig_index, const Simd128ConstantOp& constant_op) {
700 PackNode* pnode = analyzer_.GetPackNode(ig_index);
701 if (!pnode) {
702 return Adapter::ReduceInputGraphSimd128Constant(ig_index, constant_op);
703 }
704
705 V<Simd256> og_index = pnode->RevectorizedNode();
706 // Skip revectorized node.
707 if (!og_index.valid()) {
708 NodeGroup inputs = pnode->nodes();
709 const Simd128ConstantOp& op0 =
710 __ input_graph().Get(inputs[0]).template Cast<Simd128ConstantOp>();
711 const Simd128ConstantOp& op1 =
712 __ input_graph().Get(inputs[1]).template Cast<Simd128ConstantOp>();
713 uint8_t value[kSimd256Size] = {};
714 memcpy(value, op0.value, kSimd128Size);
715 memcpy(value + kSimd128Size, op1.value, kSimd128Size);
716
717 og_index = __ Simd256Constant(value);
718
719 pnode->SetRevectorizedNode(og_index);
720 }
721 return GetExtractOpIfNeeded(pnode, ig_index, og_index);
722 }
723
725 V<Simd128> ig_index, const Simd128LoadTransformOp& load_transform) {
726 PackNode* pnode = analyzer_.GetPackNode(ig_index);
727 if (!pnode || !pnode->IsDefaultPackNode()) {
728 return Adapter::ReduceInputGraphSimd128LoadTransform(ig_index,
729 load_transform);
730 }
731
732 V<Simd256> og_index = pnode->RevectorizedNode();
733 // Skip revectorized node.
734 if (!og_index.valid()) {
735 auto base = __ MapToNewGraph(load_transform.base());
736 auto index = __ MapToNewGraph(load_transform.index());
737 auto offset = load_transform.offset;
738 DCHECK_EQ(load_transform.offset, 0);
739
740 og_index = __ Simd256LoadTransform(
741 base, index, load_transform.load_kind,
742 Get256LoadTransformKindFrom128(load_transform.transform_kind),
743 offset);
744 pnode->SetRevectorizedNode(og_index);
745 }
746 return GetExtractOpIfNeeded(pnode, ig_index, og_index);
747 }
748
749 OpIndex REDUCE_INPUT_GRAPH(Load)(OpIndex ig_index, const LoadOp& load) {
750 PackNode* pnode = analyzer_.GetPackNode(ig_index);
751 if (!pnode || !pnode->IsDefaultPackNode()) {
752 return Adapter::ReduceInputGraphLoad(ig_index, load);
753 }
754
755 OpIndex og_index = pnode->RevectorizedNode();
756 // Skip revectorized node.
757 if (!og_index.valid()) {
758 const LoadOp& start = analyzer_.GetStartOperation(pnode, ig_index, load)
759 .template Cast<LoadOp>();
760 DCHECK_EQ(start.base(), load.base());
761
762 auto base = __ MapToNewGraph(load.base());
763 // We need to use load's index here due to there would be different
764 // ChangeOps from the same index. If start is not load, it's possible
765 // that the ChangeOp of start index is not visited yet.
766 auto index = __ MapToNewGraph(load.index());
767 og_index = __ Load(base, index, load.kind,
769 pnode->SetRevectorizedNode(og_index);
770 }
771
772 // Emit extract op if needed.
773 return GetExtractOpIfNeeded(pnode, ig_index, og_index);
774 }
775
777 PackNode* pnode = analyzer_.GetPackNode(ig_index);
778 if (!pnode) {
779 return Adapter::ReduceInputGraphStore(ig_index, store);
780 }
781
782 OpIndex og_index = pnode->RevectorizedNode();
783 // Skip revectorized node.
784 if (!og_index.valid()) {
785 const StoreOp& start =
786 (analyzer_.GetStartOperation(pnode, ig_index, store))
787 .template Cast<StoreOp>();
788 DCHECK_EQ(start.base(), store.base());
789
790 // It's possible that an OOB is trapped at the higher 128-bit address
791 // after the lower 128-bit store is executed. To ensure a consistent
792 // memory state before and after revectorization, emit the first 128-bit
793 // store before the 256-bit revectorized store.
794 if (ig_index == pnode->nodes()[0]) {
795 Adapter::ReduceInputGraphStore(ig_index, store);
796 }
797
798 auto base = __ MapToNewGraph(store.base());
799 // We need to use store's index here due to there would be different
800 // ChangeOps from the same index. If start is not store, it's possible
801 // that the ChangeOp of start index is not visited yet.
802 auto index = __ MapToNewGraph(store.index());
803 V<Simd256> value = analyzer_.GetReducedInput(pnode);
804 DCHECK(value.valid());
805
806 __ Store(base, index, value, store.kind, MemoryRepresentation::Simd256(),
807 store.write_barrier, start.offset);
808
809 // Set an arbitrary valid opindex here to skip reduce later.
810 pnode->SetRevectorizedNode(ig_index);
811 }
812
813 // No extract op needed for Store.
814 return OpIndex::Invalid();
815 }
816
817 OpIndex REDUCE_INPUT_GRAPH(Phi)(OpIndex ig_index, const PhiOp& phi) {
818 if (phi.rep == RegisterRepresentation::Simd128()) {
819 if (auto pnode = analyzer_.GetPackNode(ig_index)) {
820 OpIndex og_index = pnode->RevectorizedNode();
821
822 // Don't reduce revectorized node.
823 if (!og_index.valid()) {
825 og_index = __ ResolvePhi(
826 phi,
827 [&](OpIndex ind, int block_id, int old_block_id = 0) {
828 return analyzer_.GetReducedInput(pnode, old_block_id);
829 },
831 pnode->SetRevectorizedNode(og_index);
832 }
833
834 OpIndex extract_op_index =
835 GetExtractOpIfNeeded(pnode, ig_index, og_index);
836 // If phis are not be mapped to anything in the new graph,
837 // they will be skipped in FixLoopPhis in copying-phase.
838 // return og_index to create the mapping.
839 if (extract_op_index == OpIndex::Invalid()) {
840 return og_index;
841 } else {
842 return extract_op_index;
843 }
844 }
845 }
846
847 return Adapter::ReduceInputGraphPhi(ig_index, phi);
848 }
849
850 void FixLoopPhi(const PhiOp& input_phi, OpIndex output_index,
851 Block* output_graph_loop) {
852 if (input_phi.rep == RegisterRepresentation::Simd128()) {
853 OpIndex phi_index = __ input_graph().Index(input_phi);
854 DCHECK(phi_index.valid());
855 if (auto* pnode = analyzer_.GetPackNode(phi_index)) {
856 auto pending_index = pnode->RevectorizedNode();
857 DCHECK(pending_index.valid());
858 if (pending_index.valid() &&
859 output_graph_loop->Contains(pending_index)) {
860 // Need skip replaced op
861 if (auto* pending_phi = __ output_graph()
862 .Get(pending_index)
863 .template TryCast<PendingLoopPhiOp>()) {
864 __ output_graph().template Replace<PhiOp>(
865 pending_index,
866 base::VectorOf({pending_phi -> first(),
867 analyzer_.GetReducedInput(pnode, 1)}),
869 return;
870 }
871 }
872 }
873 }
874
875 return Adapter::FixLoopPhi(input_phi, output_index, output_graph_loop);
876 }
877
879 const Simd128UnaryOp& unary) {
880 PackNode* pnode = analyzer_.GetPackNode(ig_index);
881 if (!pnode || !pnode->IsDefaultPackNode()) {
882 return Adapter::ReduceInputGraphSimd128Unary(ig_index, unary);
883 }
884
885 V<Simd256> og_index = pnode->RevectorizedNode();
886 // Skip revectorized node.
887 if (!og_index.valid()) {
888 V<Simd256> input = analyzer_.GetReducedInput(pnode);
889 if (!input.valid()) {
890 V<Simd128> input_128 = __ MapToNewGraph(unary.input());
891 og_index = __ Simd256Unary(input_128, GetSimd256UnaryKind(unary.kind));
892 } else {
893 og_index = __ Simd256Unary(input, GetSimd256UnaryKind(unary.kind));
894 }
895 pnode->SetRevectorizedNode(og_index);
896 }
897 return GetExtractOpIfNeeded(pnode, ig_index, og_index);
898 }
899
901 const Simd128BinopOp& op) {
902 PackNode* pnode = analyzer_.GetPackNode(ig_index);
903 if (!pnode || !pnode->IsDefaultPackNode()) {
904 return Adapter::ReduceInputGraphSimd128Binop(ig_index, op);
905 }
906
907 V<Simd256> og_index = pnode->RevectorizedNode();
908 // Skip revectorized node.
909 if (!og_index.valid()) {
910 if (pnode->GetOperandsSize() < 2) {
911 V<Simd128> left = __ MapToNewGraph(op.left());
912 V<Simd128> right = __ MapToNewGraph(op.right());
913 og_index = __ Simd256Binop(left, right, GetSimd256BinOpKind(op.kind));
914 } else {
915 V<Simd256> left = analyzer_.GetReducedInput(pnode, 0);
916 V<Simd256> right = analyzer_.GetReducedInput(pnode, 1);
917 og_index = __ Simd256Binop(left, right, GetSimd256BinOpKind(op.kind));
918 }
919 pnode->SetRevectorizedNode(og_index);
920 }
921 return GetExtractOpIfNeeded(pnode, ig_index, og_index);
922 }
923
925 const Simd128ShiftOp& op) {
926 PackNode* pnode = analyzer_.GetPackNode(ig_index);
927 if (!pnode) {
928 return Adapter::ReduceInputGraphSimd128Shift(ig_index, op);
929 }
930
931 V<Simd256> og_index = pnode->RevectorizedNode();
932 // Skip revectorized node.
933 if (!og_index.valid()) {
934 V<Simd256> input = analyzer_.GetReducedInput(pnode);
935 DCHECK(input.valid());
936 V<Word32> shift = __ MapToNewGraph(op.shift());
937 og_index = __ Simd256Shift(input, shift, GetSimd256ShiftOpKind(op.kind));
938 pnode->SetRevectorizedNode(og_index);
939 }
940 return GetExtractOpIfNeeded(pnode, ig_index, og_index);
941 }
942
944 V<Simd128> ig_index, const Simd128TernaryOp& ternary) {
945 PackNode* pnode = analyzer_.GetPackNode(ig_index);
946 if (!pnode) {
947 return Adapter::ReduceInputGraphSimd128Ternary(ig_index, ternary);
948 }
949
950 V<Simd256> og_index = pnode->RevectorizedNode();
951 // Skip revectorized node.
952 if (!og_index.valid()) {
953 V<Simd256> first = analyzer_.GetReducedInput(pnode, 0);
955 V<Simd256> third = analyzer_.GetReducedInput(pnode, 2);
956
957 og_index = __ Simd256Ternary(first, second, third,
958 GetSimd256TernaryKind(ternary.kind));
959
960 pnode->SetRevectorizedNode(og_index);
961 }
962 return GetExtractOpIfNeeded(pnode, ig_index, og_index);
963 }
964
966 const Simd128SplatOp& op) {
967 PackNode* pnode = analyzer_.GetPackNode(ig_index);
968 if (!pnode) {
969 return Adapter::ReduceInputGraphSimd128Splat(ig_index, op);
970 }
971
972 V<Simd256> og_index = pnode->RevectorizedNode();
973 // Skip revectorized node.
974 if (!og_index.valid()) {
975 og_index = __ Simd256Splat(__ MapToNewGraph(op.input()),
976 Get256SplatOpKindFrom128(op.kind));
977
978 pnode->SetRevectorizedNode(og_index);
979 }
980 return GetExtractOpIfNeeded(pnode, ig_index, og_index);
981 }
982
984 const Simd128ShuffleOp& op) {
985 PackNode* p = analyzer_.GetPackNode(ig_index);
986 if (!p) {
987 return Adapter::ReduceInputGraphSimd128Shuffle(ig_index, op);
988 }
989 DCHECK_EQ(op.kind, Simd128ShuffleOp::Kind::kI8x16);
990
992 V<Simd256> og_index = pnode->RevectorizedNode();
993 // Skip revectorized node.
994 if (!og_index.valid()) {
996 switch (kind) {
999 const bool is_32 =
1001
1002 const OpIndex load_index =
1003 op.input(pnode->info().splat_index() >> (is_32 ? 2 : 1));
1004 const LoadOp& load =
1005 __ input_graph().Get(load_index).template Cast<LoadOp>();
1006
1007 const int bytes_per_lane = is_32 ? 4 : 8;
1008 const int splat_index = pnode->info().splat_index() * bytes_per_lane;
1009 const int offset = splat_index + load.offset;
1010
1011 V<WordPtr> base = __ WordPtrAdd(__ MapToNewGraph(load.base()),
1013
1014 V<WordPtr> index = load.index().has_value()
1015 ? __ MapToNewGraph(load.index().value())
1016 : __ IntPtrConstant(0);
1017
1018 const Simd256LoadTransformOp::TransformKind transform_kind =
1019 is_32 ? Simd256LoadTransformOp::TransformKind::k32Splat
1020 : Simd256LoadTransformOp::TransformKind::k64Splat;
1021 og_index = __ Simd256LoadTransform(base, index, load.kind,
1022 transform_kind, 0);
1023 pnode->SetRevectorizedNode(og_index);
1024 break;
1025 }
1027 const Simd128ShuffleOp& op0 = __ input_graph()
1028 .Get(pnode -> nodes()[0])
1029 .template Cast<Simd128ShuffleOp>();
1030
1031 V<Simd128> load_transform_idx =
1032 __ input_graph()
1033 .Get(op0.left())
1034 .template Is<Simd128LoadTransformOp>()
1035 ? op0.left()
1036 : op0.right();
1037 const Simd128LoadTransformOp& load_transform =
1038 __ input_graph()
1039 .Get(load_transform_idx)
1040 .template Cast<Simd128LoadTransformOp>();
1041 DCHECK_EQ(load_transform.transform_kind,
1042 Simd128LoadTransformOp::TransformKind::k64Zero);
1043 V<WordPtr> base = __ MapToNewGraph(load_transform.base());
1044 V<WordPtr> index = __ MapToNewGraph(load_transform.index());
1045 og_index = __ Simd256LoadTransform(
1046 base, index, load_transform.load_kind,
1047 Simd256LoadTransformOp::TransformKind::k8x8U,
1048 load_transform.offset);
1049 pnode->SetRevectorizedNode(og_index);
1050 break;
1051 }
1052#ifdef V8_TARGET_ARCH_X64
1053 case ShufflePackNode::SpecificInfo::Kind::kShufd: {
1054 V<Simd256> og_left = analyzer_.GetReducedInput(pnode, 0);
1055 DCHECK_EQ(og_left, analyzer_.GetReducedInput(pnode, 1));
1056 og_index = __ Simd256Shufd(og_left, pnode->info().shufd_control());
1057 pnode->SetRevectorizedNode(og_index);
1058 break;
1059 }
1060 case ShufflePackNode::SpecificInfo::Kind::kShufps: {
1061 V<Simd256> og_left = analyzer_.GetReducedInput(pnode, 0);
1062 V<Simd256> og_right = analyzer_.GetReducedInput(pnode, 1);
1063 og_index = __ Simd256Shufps(og_left, og_right,
1064 pnode->info().shufps_control());
1065 pnode->SetRevectorizedNode(og_index);
1066 break;
1067 }
1068 case ShufflePackNode::SpecificInfo::Kind::kS32x8UnpackLow: {
1069 V<Simd256> og_left = analyzer_.GetReducedInput(pnode, 0);
1070 V<Simd256> og_right = analyzer_.GetReducedInput(pnode, 1);
1071 og_index = __ Simd256Unpack(og_left, og_right,
1072 Simd256UnpackOp::Kind::k32x8Low);
1073 pnode->SetRevectorizedNode(og_index);
1074 break;
1075 }
1076 case ShufflePackNode::SpecificInfo::Kind::kS32x8UnpackHigh: {
1077 V<Simd256> og_left = analyzer_.GetReducedInput(pnode, 0);
1078 V<Simd256> og_right = analyzer_.GetReducedInput(pnode, 1);
1079 og_index = __ Simd256Unpack(og_left, og_right,
1080 Simd256UnpackOp::Kind::k32x8High);
1081 pnode->SetRevectorizedNode(og_index);
1082 break;
1083 }
1084#endif // V8_TARGET_ARCH_X64
1085 default:
1086 UNREACHABLE();
1087 }
1088 }
1089 return GetExtractOpIfNeeded(pnode, ig_index, og_index);
1090 }
1091
1093 OpIndex ig_index, const Simd128ReplaceLaneOp& replace) {
1094 PackNode* pnode = analyzer_.GetPackNode(ig_index);
1095 if (!pnode || !pnode->IsBundlePackNode()) {
1096 return Adapter::ReduceInputGraphSimd128ReplaceLane(ig_index, replace);
1097 }
1098
1099 V<Simd256> og_index = pnode->RevectorizedNode();
1100 // Don't reduce revectorized node.
1101 if (!og_index.valid()) {
1102 const BundlePackNode* bundle_pnode = pnode->AsBundlePackNode();
1103 V<Simd128> base_index = __ MapToNewGraph(bundle_pnode->base());
1104 V<Simd128> i16x8_index = base_index;
1105 V<Simd256> i32x8_index;
1106 if (bundle_pnode->is_sign_extract()) {
1107 if (bundle_pnode->lane_size() == 1) {
1108 if (bundle_pnode->offset() == 0) {
1109 i16x8_index = __ Simd128Unary(
1110 base_index, Simd128UnaryOp::Kind::kI16x8SConvertI8x16Low);
1111 } else {
1112 DCHECK_EQ(bundle_pnode->offset(), 8);
1113 i16x8_index = __ Simd128Unary(
1114 base_index, Simd128UnaryOp::Kind::kI16x8SConvertI8x16High);
1115 }
1116 }
1117 i32x8_index = __ Simd256Unary(
1118 i16x8_index, Simd256UnaryOp::Kind::kI32x8SConvertI16x8);
1119 } else {
1120 if (bundle_pnode->lane_size() == 1) {
1121 if (bundle_pnode->offset() == 0) {
1122 i16x8_index = __ Simd128Unary(
1123 base_index, Simd128UnaryOp::Kind::kI16x8UConvertI8x16Low);
1124 } else {
1125 DCHECK_EQ(bundle_pnode->offset(), 8);
1126 i16x8_index = __ Simd128Unary(
1127 base_index, Simd128UnaryOp::Kind::kI16x8UConvertI8x16High);
1128 }
1129 }
1130 i32x8_index = __ Simd256Unary(
1131 i16x8_index, Simd256UnaryOp::Kind::kI32x8UConvertI16x8);
1132 }
1133
1134 if (bundle_pnode->is_sign_convert()) {
1135 og_index = __ Simd256Unary(i32x8_index,
1136 Simd256UnaryOp::Kind::kF32x8SConvertI32x8);
1137 } else {
1138 og_index = __ Simd256Unary(i32x8_index,
1139 Simd256UnaryOp::Kind::kF32x8UConvertI32x8);
1140 }
1141
1142 pnode->SetRevectorizedNode(og_index);
1143 }
1144 return GetExtractOpIfNeeded(pnode, ig_index, og_index);
1145 }
1146
1147 void ReduceInputsOfOp(OpIndex cur_index, OpIndex op_index) {
1148 // Reduce all the operations of op_index's input tree, which should be
1149 // bigger than the cur_index. The traversal is done in a DFS manner
1150 // to make sure all inputs are emitted before the use.
1151 const Block* current_input_block = Asm().current_input_block();
1152 std::stack<OpIndex> inputs;
1153 ZoneUnorderedSet<OpIndex> visited(Asm().phase_zone());
1154 inputs.push(op_index);
1155
1156 while (!inputs.empty()) {
1157 OpIndex idx = inputs.top();
1158 if (visited.find(idx) != visited.end()) {
1159 inputs.pop();
1160 continue;
1161 }
1162
1163 const Operation& op = __ input_graph().Get(idx);
1164 bool has_unvisited_inputs = false;
1165 for (OpIndex input : op.inputs()) {
1166 if (input > cur_index && visited.find(input) == visited.end()) {
1167 inputs.push(input);
1168 has_unvisited_inputs = true;
1169 }
1170 }
1171
1172 if (!has_unvisited_inputs) {
1173 inputs.pop();
1174 visited.insert(idx);
1175
1176 // op_index will be reduced later.
1177 if (idx == op_index) continue;
1178
1179 DCHECK(!Asm().input_graph().Get(idx).template Is<PhiOp>());
1180 Asm().template VisitOpAndUpdateMapping<false>(idx, current_input_block);
1181 }
1182 }
1183 }
1184
1185 template <typename Op, typename Continuation>
1187 OpIndex* og_index) {
1188 std::array<OpIndex, 2> v;
1189 DCHECK_EQ(pnode->nodes().size(), 2);
1190 // The operation order in pnode is determined by the store or reduce
1191 // seed when build the SLPTree. It is not quaranteed to align with
1192 // the visiting order in each basic block from input graph. E.g. we
1193 // can have a block including {a1, a2, b1, b2} operations, and the
1194 // SLPTree can be pnode1: (a2, a1), pnode2: (b1, b2) if a2 is input
1195 // of b1, and a1 is input of b2.
1196 for (int i = 0; i < static_cast<int>(pnode->nodes().size()); i++) {
1197 OpIndex cur_index = pnode->nodes()[i];
1198 if ((*og_index).valid() && cur_index == ig_index) {
1199 v[i] = *og_index;
1200 } else {
1201 // The current index maybe already reduced by the IntersectPackNode.
1202 v[i] = __ template MapToNewGraph<true>(cur_index);
1203 }
1204
1205 if (v[i].valid()) continue;
1206
1207 if (cur_index != ig_index) {
1208 ReduceInputsOfOp(ig_index, cur_index);
1209 }
1210 const Op& op = Asm().input_graph().Get(cur_index).template Cast<Op>();
1211 v[i] = Continuation{this}.ReduceInputGraph(cur_index, op);
1212
1213 if (cur_index == ig_index) {
1214 *og_index = v[i];
1215 } else {
1216 // We have to create the mapping as cur_index may exist in other
1217 // IntersectPackNode and reduce again.
1218 __ CreateOldToNewMapping(cur_index, v[i]);
1219 }
1220 }
1221
1222 OpIndex revec_index = __ SimdPack128To256(v[0], v[1]);
1223 pnode->SetRevectorizedNode(revec_index);
1224 }
1225
1226 template <typename Op, typename Continuation>
1228 OpIndex og_index;
1229 // Reduce ForcePackNode
1230 if (PackNode* p = analyzer_.GetPackNode(ig_index);
1231 p && p->IsForcePackNode()) {
1232 // Handle force packing nodes.
1233 ForcePackNode* pnode = p->AsForcePackNode();
1234 if (!pnode->RevectorizedNode().valid()) {
1235 switch (pnode->force_pack_type()) {
1236 case ForcePackNode::kSplat: {
1237 // The og_index maybe already reduced by the IntersectPackNode.
1238 OpIndex reduced_index = __ template MapToNewGraph<true>(ig_index);
1239 if (!reduced_index.valid()) {
1240 og_index = reduced_index =
1241 Continuation{this}.ReduceInputGraph(ig_index, op);
1242 }
1243 OpIndex revec_index =
1244 __ SimdPack128To256(reduced_index, reduced_index);
1245 pnode->SetRevectorizedNode(revec_index);
1246 break;
1247 }
1250 &og_index);
1251 break;
1252 }
1253 }
1254 }
1255 }
1256
1257 // Reduce IntersectPackNode
1258 if (auto intersect_packnodes = analyzer_.GetIntersectPackNodes(ig_index)) {
1259 for (PackNode* pnode : *intersect_packnodes) {
1260 if (!(pnode->RevectorizedNode()).valid()) {
1262 &og_index);
1263 }
1264 }
1265 }
1266
1267 if (og_index.valid()) {
1268 return og_index;
1269 }
1270
1271 if (__ template MapToNewGraph<true>(ig_index).valid()) {
1272 // The op is already emitted during emitting force pack right node input
1273 // trees.
1274 return OpIndex::Invalid();
1275 }
1276
1277 return Continuation{this}.ReduceInputGraph(ig_index, op);
1278 }
1279
1280 private:
1281 static Simd256UnaryOp::Kind GetSimd256UnaryKind(
1282 Simd128UnaryOp::Kind simd128_kind) {
1283 switch (simd128_kind) {
1284#define UNOP_KIND_MAPPING(from, to) \
1285 case Simd128UnaryOp::Kind::k##from: \
1286 return Simd256UnaryOp::Kind::k##to;
1288#undef UNOP_KIND_MAPPING
1289
1290#define SIGN_EXTENSION_UNOP_KIND_MAPPING(from_1, to, from_2) \
1291 case Simd128UnaryOp::Kind::k##from_1: \
1292 return Simd256UnaryOp::Kind::k##to; \
1293 case Simd128UnaryOp::Kind::k##from_2: \
1294 return Simd256UnaryOp::Kind::k##to;
1296#undef SIGN_EXTENSION_UNOP_KIND_MAPPING
1297 default:
1298 UNIMPLEMENTED();
1299 }
1300 }
1301
1302 static Simd256BinopOp::Kind GetSimd256BinOpKind(Simd128BinopOp::Kind kind) {
1303 switch (kind) {
1304#define BINOP_KIND_MAPPING(from, to) \
1305 case Simd128BinopOp::Kind::k##from: \
1306 return Simd256BinopOp::Kind::k##to;
1308#undef BINOP_KIND_MAPPING
1309
1310#define SIGN_EXTENSION_BINOP_KIND_MAPPING(from_1, to, from_2) \
1311 case Simd128BinopOp::Kind::k##from_1: \
1312 return Simd256BinopOp::Kind::k##to; \
1313 case Simd128BinopOp::Kind::k##from_2: \
1314 return Simd256BinopOp::Kind::k##to;
1316#undef SIGN_EXTENSION_UNOP_KIND_MAPPING
1317 default:
1318 UNIMPLEMENTED();
1319 }
1320 }
1321
1322 static Simd256ShiftOp::Kind GetSimd256ShiftOpKind(Simd128ShiftOp::Kind kind) {
1323 switch (kind) {
1324#define SHIFT_KIND_MAPPING(from, to) \
1325 case Simd128ShiftOp::Kind::k##from: \
1326 return Simd256ShiftOp::Kind::k##to;
1328#undef SHIFT_KIND_MAPPING
1329 default:
1330 UNIMPLEMENTED();
1331 }
1332 }
1333
1334 static Simd256TernaryOp::Kind GetSimd256TernaryKind(
1335 Simd128TernaryOp::Kind simd128_kind) {
1336 switch (simd128_kind) {
1337#define TERNARY_KIND_MAPPING(from, to) \
1338 case Simd128TernaryOp::Kind::k##from: \
1339 return Simd256TernaryOp::Kind::k##to;
1341#undef TERNARY_KIND_MAPPING
1342 default:
1343 UNIMPLEMENTED();
1344 }
1345 }
1346
1347 static Simd256LoadTransformOp::TransformKind Get256LoadTransformKindFrom128(
1348 Simd128LoadTransformOp::TransformKind simd128_kind) {
1349 switch (simd128_kind) {
1350#define TRANSFORM_KIND_MAPPING(from, to) \
1351 case Simd128LoadTransformOp::TransformKind::k##from: \
1352 return Simd256LoadTransformOp::TransformKind::k##to;
1354#undef TRANSFORM_KIND_MAPPING
1355 default:
1356 UNREACHABLE();
1357 }
1358 }
1359
1360 static Simd256SplatOp::Kind Get256SplatOpKindFrom128(
1361 Simd128SplatOp::Kind kind) {
1362 switch (kind) {
1363#define SPLAT_KIND_MAPPING(from, to) \
1364 case Simd128SplatOp::Kind::k##from: \
1365 return Simd256SplatOp::Kind::k##to;
1367 default:
1368 UNREACHABLE();
1369 }
1370 }
1371
1372 const wasm::WasmModule* module_ = __ data() -> wasm_module();
1373 WasmRevecAnalyzer analyzer_ = *__ data() -> wasm_revec_analyzer();
1374};
1375
1377
1378} // namespace v8::internal::compiler::turboshaft
1379
1380#endif // V8_COMPILER_TURBOSHAFT_WASM_REVEC_REDUCER_H_
#define REDUCE_INPUT_GRAPH(operation)
union v8::internal::@341::BuiltinMetadata::KindSpecificData data
Builtins::Kind kind
Definition builtins.cc:40
bool Contains(OpIndex op_idx) const
Definition graph.h:322
BundlePackNode(Zone *zone, const NodeGroup &node_group, OpIndex base, int8_t offset, uint8_t lane_size, bool is_sign_extract, bool is_sign_convert)
ForcePackNode(Zone *zone, const NodeGroup &node_group, ForcePackType type)
V8_INLINE const Operation & Get(OpIndex i) const
Definition graph.h:618
static constexpr MemoryRepresentation Simd256()
bool operator==(const NodeGroup &other) const
bool operator!=(const NodeGroup &other) const
static constexpr OpIndex Invalid()
Definition index.h:88
bool IsSame(const NodeGroup &node_group) const
void SetOperand(int index, PackNode *pnode)
bool IsSame(const PackNode &other) const
ZoneVector< PackNode * >::size_type GetOperandsSize() const
PackNode(Zone *zone, const NodeGroup &node_group, NodeType node_type=kDefault)
static constexpr RegisterRepresentation Simd128()
static constexpr RegisterRepresentation Simd256()
PackNode * NewPackNodeAndRecurs(const NodeGroup &node_group, int start_index, int count, unsigned depth)
bool IsSideEffectFree(OpIndex first, OpIndex second)
PackNode * BuildTreeRec(const NodeGroup &node_group, unsigned depth)
SLPTree(Graph &graph, WasmRevecAnalyzer *analyzer, Zone *zone)
ZoneUnorderedMap< OpIndex, ZoneVector< PackNode * > > node_to_intersect_packnodes_
ZoneVector< PackNode * > * GetIntersectPackNodes(OpIndex node)
ShufflePackNode * Try256ShuffleMatchLoad8x8U(const NodeGroup &node_group, const uint8_t *shuffle0, const uint8_t *shuffle1)
ZoneUnorderedMap< OpIndex, PackNode * > node_to_packnode_
bool IsEqual(const OpIndex node0, const OpIndex node1)
bool HasInputDependencies(const NodeGroup &node_group)
PackNode * NewPackNode(const NodeGroup &node_group)
bool CanBePacked(const NodeGroup &node_group)
ZoneUnorderedMap< OpIndex, ZoneVector< PackNode * > > & GetIntersectNodeMapping()
PackNode * NewForcePackNode(const NodeGroup &node_group, ForcePackNode::ForcePackType type, const Graph &graph)
PackNode * NewIntersectPackNode(const NodeGroup &node_group)
BundlePackNode * NewBundlePackNode(const NodeGroup &node_group, OpIndex base, int8_t offset, uint8_t lane_size, bool is_sign_extract, bool is_sign_convert)
PackNode * BuildTree(const NodeGroup &roots)
bool TryMatchExtendIntToF32x4(const NodeGroup &node_group, ExtendIntToF32x4Info *info)
ZoneUnorderedMap< OpIndex, PackNode * > & GetNodeMapping()
ShufflePackNode * NewShufflePackNode(const NodeGroup &node_group, ShufflePackNode::SpecificInfo::Kind kind)
PackNode * NewCommutativePackNodeAndRecurs(const NodeGroup &node_group, unsigned depth)
std::optional< ExtendIntToF32x4Info > TryGetExtendIntToF32x4Info(OpIndex index)
ShufflePackNode(Zone *zone, const NodeGroup &node_group, SpecificInfo::Kind kind)
base::Vector< const OpIndex > uses(OpIndex index) const
Definition use-map.cc:69
WasmRevecAnalyzer(PipelineData *data, Zone *zone, Graph &graph)
const Operation & GetStartOperation(const PackNode *pnode, const OpIndex node, const Operation &op)
ZoneVector< std::pair< OpIndex, OpIndex > > store_seeds_
ZoneVector< std::pair< OpIndex, OpIndex > > reduce_seeds_
base::Vector< const OpIndex > uses(OpIndex node)
ZoneUnorderedMap< OpIndex, ZoneVector< PackNode * > > revectorizable_intersect_node_
const OpIndex GetReducedInput(const PackNode *pnode, const int index=0)
ZoneVector< PackNode * > * GetIntersectPackNodes(const OpIndex node)
ZoneUnorderedMap< OpIndex, PackNode * > revectorizable_node_
void ReduceInputsOfOp(OpIndex cur_index, OpIndex op_index)
OpIndex REDUCE_INPUT_GRAPH Simd128ReplaceLane(OpIndex ig_index, const Simd128ReplaceLaneOp &replace)
V< Simd128 > REDUCE_INPUT_GRAPH Simd128Splat(V< Simd128 > ig_index, const Simd128SplatOp &op)
OpIndex ReduceInputGraphOperation(OpIndex ig_index, const Op &op)
OpIndex REDUCE_INPUT_GRAPH Phi(OpIndex ig_index, const PhiOp &phi)
V< Simd128 > REDUCE_INPUT_GRAPH Simd128Shift(V< Simd128 > ig_index, const Simd128ShiftOp &op)
static Simd256TernaryOp::Kind GetSimd256TernaryKind(Simd128TernaryOp::Kind simd128_kind)
V< Simd128 > REDUCE_INPUT_GRAPH Simd128Constant(V< Simd128 > ig_index, const Simd128ConstantOp &constant_op)
V< Simd128 > REDUCE_INPUT_GRAPH Simd128LoadTransform(V< Simd128 > ig_index, const Simd128LoadTransformOp &load_transform)
OpIndex REDUCE_INPUT_GRAPH Store(OpIndex ig_index, const StoreOp &store)
static Simd256LoadTransformOp::TransformKind Get256LoadTransformKindFrom128(Simd128LoadTransformOp::TransformKind simd128_kind)
static Simd256ShiftOp::Kind GetSimd256ShiftOpKind(Simd128ShiftOp::Kind kind)
OpIndex REDUCE_INPUT_GRAPH Load(OpIndex ig_index, const LoadOp &load)
OpIndex GetExtractOpIfNeeded(const PackNode *pnode, OpIndex ig_index, OpIndex og_index)
void ReduceForceOrIntersectPackNode(PackNode *pnode, const OpIndex ig_index, OpIndex *og_index)
V< Simd128 > REDUCE_INPUT_GRAPH Simd128Shuffle(V< Simd128 > ig_index, const Simd128ShuffleOp &op)
V< Simd128 > REDUCE_INPUT_GRAPH Simd128Ternary(V< Simd128 > ig_index, const Simd128TernaryOp &ternary)
static Simd256SplatOp::Kind Get256SplatOpKindFrom128(Simd128SplatOp::Kind kind)
V< Simd128 > REDUCE_INPUT_GRAPH Simd128Unary(V< Simd128 > ig_index, const Simd128UnaryOp &unary)
void FixLoopPhi(const PhiOp &input_phi, OpIndex output_index, Block *output_graph_loop)
V< Simd128 > REDUCE_INPUT_GRAPH Simd128Binop(V< Simd128 > ig_index, const Simd128BinopOp &op)
static Simd256UnaryOp::Kind GetSimd256UnaryKind(Simd128UnaryOp::Kind simd128_kind)
static Simd256BinopOp::Kind GetSimd256BinOpKind(Simd128BinopOp::Kind kind)
#define TURBOSHAFT_REDUCER_BOILERPLATE(Name)
Definition assembler.h:823
int start
int32_t offset
std::optional< TNode< JSArray > > a
Node * node
double second
constexpr Vector< T > VectorOf(T *start, size_t size)
Definition vector.h:360
V8_INLINE const Operation & Get(const Graph &graph, OpIndex index)
Definition graph.h:1231
static const Operator * IntPtrConstant(CommonOperatorBuilder *common, intptr_t value)
bool TryCast(Tagged< From > value, Tagged< To > *out)
Definition casting.h:77
constexpr int kSimd128Size
Definition globals.h:706
bool Is(IndirectHandle< U > value)
Definition handles-inl.h:51
too high values may cause the compiler to set high thresholds for inlining to as much as possible avoid inlined allocation of objects that cannot escape trace load stores from virtual maglev objects use TurboFan fast string builder analyze liveness of environment slots and zap dead values trace TurboFan load elimination emit data about basic block usage in builtins to this enable builtin reordering when run mksnapshot flag for emit warnings when applying builtin profile data verify register allocation in TurboFan randomly schedule instructions to stress dependency tracking enable store store elimination in TurboFan rewrite far to near simulate GC compiler thread race related to allow float parameters to be passed in simulator mode JS Wasm Run additional turbo_optimize_inlined_js_wasm_wrappers enable experimental feedback collection in generic lowering enable Turboshaft s WasmLoadElimination enable Turboshaft s low level load elimination for JS enable Turboshaft s escape analysis for string concatenation use enable Turbolev features that we want to ship in the not too far future trace individual Turboshaft reduction steps trace intermediate Turboshaft reduction steps invocation count threshold for early optimization Enables optimizations which favor memory size over execution speed Enables sampling allocation profiler with X as a sample interval min size of a semi the new space consists of two semi spaces max size of the Collect garbage after Collect garbage after keeps maps alive for< n > old space garbage collections print one detailed trace line in allocation gc speed threshold for starting incremental marking via a task in percent of available threshold for starting incremental marking immediately in percent of available Use a single schedule for determining a marking schedule between JS and C objects schedules the minor GC task with kUserVisible priority max worker number of concurrent for NumberOfWorkerThreads start background threads that allocate memory concurrent_array_buffer_sweeping use parallel threads to clear weak refs in the atomic pause trace progress of the incremental marking trace object counts and memory usage report a tick only when allocated zone memory changes by this amount TracingFlags::gc_stats store(v8::tracing::TracingCategoryObserver::ENABLED_BY_NATIVE)) DEFINE_GENERIC_IMPLICATION(trace_gc_object_stats
constexpr int I
constexpr int kSimd256Size
Definition globals.h:709
return value
Definition map-inl.h:893
Tagged< To > Cast(Tagged< From > value, const v8::SourceLocation &loc=INIT_SOURCE_LOCATION_IN_DEBUG)
Definition casting.h:150
#define NON_EXPORTED_BASE(code)
#define DCHECK_GE(v1, v2)
Definition logging.h:488
#define DCHECK(condition)
Definition logging.h:482
#define DCHECK_LT(v1, v2)
Definition logging.h:489
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
base::Vector< const OpIndex > inputs() const
#define SIGN_EXTENSION_BINOP_KIND_MAPPING(from_1, to, from_2)
#define SHIFT_KIND_MAPPING(from, to)
#define SIMD256_BINOP_SIGN_EXTENSION_OP(V)
#define SIGN_EXTENSION_UNOP_KIND_MAPPING(from_1, to, from_2)
#define SIMD256_LOADTRANSFORM_OP(V)
#define SIMD256_SPLAT_OP(V)
#define SIMD256_UNARY_SIGN_EXTENSION_OP(V)
#define SPLAT_KIND_MAPPING(from, to)
#define TRANSFORM_KIND_MAPPING(from, to)
#define BINOP_KIND_MAPPING(from, to)
#define SIMD256_SHIFT_OP(V)
#define SIMD256_TERNARY_OP(V)
#define UNOP_KIND_MAPPING(from, to)
#define TERNARY_KIND_MAPPING(from, to)
#define SIMD256_BINOP_SIMPLE_OP(V)
#define SIMD256_UNARY_SIMPLE_OP(V)