v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
wasm-revec-reducer.cc
Go to the documentation of this file.
1// Copyright 2023 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
6
7#include <optional>
8
9#include "src/base/logging.h"
12
13#define TRACE(...) \
14 do { \
15 if (v8_flags.trace_wasm_revectorize) { \
16 PrintF("Revec: %s %d: ", __func__, __LINE__); \
17 PrintF(__VA_ARGS__); \
18 } \
19 } while (false)
20
22
23// Returns true if op in node_group have same kind.
24bool IsSameOpAndKind(const Operation& op0, const Operation& op1) {
25#define CASE(operation) \
26 case Opcode::k##operation: { \
27 using Op = operation##Op; \
28 return op0.Cast<Op>().kind == op1.Cast<Op>().kind; \
29 }
30 if (op0.opcode != op1.opcode) {
31 return false;
32 }
33 switch (op0.opcode) {
34 CASE(Simd128Unary)
35 CASE(Simd128Binop)
36 CASE(Simd128Shift)
37 CASE(Simd128Ternary)
38 CASE(Simd128Splat)
39 default:
40 return true;
41 }
42#undef CASE
43}
44
45std::string GetSimdOpcodeName(Operation const& op) {
46 std::ostringstream oss;
47 if (op.Is<Simd128BinopOp>() || op.Is<Simd128UnaryOp>() ||
48 op.Is<Simd128ShiftOp>() || op.Is<Simd128TestOp>() ||
49 op.Is<Simd128TernaryOp>()) {
50 op.PrintOptions(oss);
51 } else {
52 oss << OpcodeName(op.opcode);
53 }
54 return oss.str();
55}
56
57// This class is the wrapper for StoreOp/LoadOp, which is helpful to calcualte
58// the relative offset between two StoreOp/LoadOp.
59template <typename Op,
60 typename = std::enable_if_t<
61 std::is_same_v<Op, StoreOp> || std::is_same_v<Op, LoadOp> ||
62 std::is_same_v<Op, Simd128LoadTransformOp>>>
64 public:
65 StoreLoadInfo(const Graph* graph, const Op* op)
66 : op_(op), offset_(op->offset) {
67 base_ = &graph->Get(op->base());
68 if constexpr (std::is_same_v<Op, Simd128LoadTransformOp>) {
70 const WordBinopOp* add_op = base_->TryCast<WordBinopOp>();
71 if (!add_op || add_op->kind != WordBinopOp::Kind::kAdd ||
72 add_op->rep != WordRepresentation::Word64()) {
73 SetInvalid();
74 return;
75 }
76 base_ = &graph->Get(add_op->left());
77 const ConstantOp* const_op =
78 graph->Get(add_op->right()).TryCast<ConstantOp>();
79 if (!const_op) {
80 SetInvalid();
81 return;
82 }
83 offset_ = const_op->word64();
84 index_ = &(graph->Get(op->index()));
85 } else {
86 if (!op->index().has_value()) {
87 SetInvalid();
88 return;
89 }
90 index_ = &(graph->Get(op->index().value()));
91 }
92
93 if (const ChangeOp* change_op = index_->TryCast<ChangeOp>()) {
94 DCHECK_EQ(change_op->kind, ChangeOp::Kind::kZeroExtend);
95 index_ = &graph->Get(change_op->input());
96 // If index_ is constant, add the constant to offset_ and set index_ to
97 // nullptr
98 if (const ConstantOp* const_op = index_->TryCast<ConstantOp>()) {
99 DCHECK_EQ(const_op->kind, ConstantOp::Kind::kWord32);
100 int32_t new_offset;
101 if (base::bits::SignedAddOverflow32(
102 static_cast<int32_t>(const_op->word32()),
103 static_cast<int32_t>(offset_), &new_offset)) {
104 // offset is overflow
105 SetInvalid();
106 return;
107 }
108 offset_ = new_offset;
109 index_ = nullptr;
110 }
111 } else { // memory64
112 if (const ConstantOp* const_op = index_->TryCast<ConstantOp>()) {
113 DCHECK_EQ(const_op->kind, ConstantOp::Kind::kWord64);
114 offset_ += const_op->word64();
115 index_ = nullptr;
116 }
117 }
118 }
119
120 std::optional<int> operator-(const StoreLoadInfo<Op>& rhs) const {
121 DCHECK(IsValid() && rhs.IsValid());
122 bool calculatable = base_ == rhs.base_ && index_ == rhs.index_;
123
124 if constexpr (std::is_same_v<Op, Simd128LoadTransformOp>) {
125 calculatable &= (op_->load_kind == rhs.op_->load_kind &&
126 op_->transform_kind == rhs.op_->transform_kind);
127 } else {
128 calculatable &= (op_->kind == rhs.op_->kind);
129 }
130
131 if constexpr (std::is_same_v<Op, StoreOp>) {
132 // TODO(v8:12716) If one store has a full write barrier and the other has
133 // no write barrier, consider combine them with a full write barrier.
134 calculatable &= (op_->write_barrier == rhs.op_->write_barrier);
135 }
136
137 if (calculatable) {
138 return offset_ - rhs.offset_;
139 }
140 return {};
141 }
142
143 bool IsValid() const { return op_ != nullptr; }
144
145 const Operation* index() const { return index_; }
146 int64_t offset() const { return offset_; }
147 const Op* op() const { return op_; }
148
149 private:
150 void SetInvalid() { op_ = nullptr; }
151
152 const Op* op_;
153 const Operation* base_ = nullptr;
154 const Operation* index_ = nullptr;
155 int64_t offset_;
156};
157
160 const StoreLoadInfo<StoreOp>& rhs) const {
161 if (lhs.index() != rhs.index()) {
162 return lhs.index() < rhs.index();
163 }
164 return lhs.offset() < rhs.offset();
165 }
166};
167
169
170// Return whether the stride of node_group equal to a specific value
171template <class Op, class Info>
172bool LoadStrideEqualTo(const Graph& graph, const NodeGroup& node_group,
173 int stride) {
175 for (OpIndex op_idx : node_group) {
176 const Operation& op = graph.Get(op_idx);
177 const Op& load_op = op.Cast<Op>();
178 Info info(&graph, &load_op);
179 if (!info.IsValid()) {
180 return false;
181 }
182 load_infos.push_back(info);
183 }
184 return load_infos[1] - load_infos[0] == stride;
185}
186
187// Returns true if all of the nodes in node_group are identical.
188// Splat opcode in WASM SIMD is used to create vector with identical lanes.
189template <typename T>
190bool IsSplat(const T& node_group) {
191 DCHECK_EQ(node_group.size(), 2);
192 return node_group[1] == node_group[0];
193}
194
195void PackNode::Print(Graph* graph) const {
196 Operation& op = graph->Get(nodes_[0]);
197 TRACE("%s(#%d, #%d)\n", GetSimdOpcodeName(op).c_str(), nodes_[0].id(),
198 nodes_[1].id());
199}
200
201PackNode* SLPTree::GetPackNode(OpIndex node) {
202 auto itr = node_to_packnode_.find(node);
203 if (itr != node_to_packnode_.end()) {
204 return itr->second;
205 }
206 return analyzer_->GetPackNode(node);
207}
208
209ZoneVector<PackNode*>* SLPTree::GetIntersectPackNodes(OpIndex node) {
210 auto I = node_to_intersect_packnodes_.find(node);
211 if (I != node_to_intersect_packnodes_.end()) {
212 return &(I->second);
213 }
214 return nullptr;
215}
216
217template <typename FunctionType>
218void ForEach(FunctionType callback,
219 const ZoneUnorderedMap<OpIndex, PackNode*>& node_map) {
220 absl::flat_hash_set<PackNode const*> visited;
221
222 for (auto& entry : node_map) {
223 PackNode const* pnode = entry.second;
224 if (!pnode || visited.find(pnode) != visited.end()) {
225 continue;
226 }
227 visited.insert(pnode);
228
229 callback(pnode);
230 }
231}
232
233template <typename FunctionType>
234void ForEach(FunctionType callback,
236 absl::flat_hash_set<PackNode const*> visited;
237
238 for (const auto& entry : node_map) {
239 for (auto* pnode : entry.second) {
240 if (visited.find(pnode) != visited.end()) {
241 continue;
242 }
243 visited.insert(pnode);
244 callback(pnode);
245 }
246 }
247}
248
249void SLPTree::Print(const char* info) {
250 TRACE("%s, %zu Packed node:\n", info, node_to_packnode_.size());
251 if (!v8_flags.trace_wasm_revectorize) {
252 return;
253 }
254
255 ForEach([this](PackNode const* pnode) { pnode->Print(&graph_); },
256 node_to_packnode_);
257 ForEach([this](PackNode const* pnode) { pnode->Print(&graph_); },
258 node_to_intersect_packnodes_);
259}
260
261bool SLPTree::HasInputDependencies(const NodeGroup& node_group) {
262 DCHECK_EQ(node_group.size(), 2);
263 if (node_group[0] == node_group[1]) return false;
265 if (node_group[0] < node_group[1]) {
266 start = node_group[0];
267 end = node_group[1];
268 } else {
269 start = node_group[1];
270 end = node_group[0];
271 }
272 // Do BFS from the end node and see if there is a path to the start node.
273 ZoneQueue<OpIndex> to_visit(phase_zone_);
274 to_visit.push(end);
275 while (!to_visit.empty()) {
276 OpIndex to_visit_node = to_visit.front();
277 Operation& op = graph_.Get(to_visit_node);
278 to_visit.pop();
279 for (OpIndex input : op.inputs()) {
280 if (input == start) {
281 return true;
282 } else if (input > start) {
283 // We should ensure that there is no back edge.
284 DCHECK_LT(input, to_visit_node);
285 to_visit.push(input);
286 }
287 }
288 }
289 return false;
290}
291
292PackNode* SLPTree::NewPackNode(const NodeGroup& node_group) {
293 TRACE("PackNode %s(#%d, #%d)\n",
294 GetSimdOpcodeName(graph_.Get(node_group[0])).c_str(),
295 node_group[0].id(), node_group[1].id());
296 PackNode* pnode = phase_zone_->New<PackNode>(phase_zone_, node_group);
297 for (OpIndex node : node_group) {
298 node_to_packnode_[node] = pnode;
299 }
300 return pnode;
301}
302
303PackNode* SLPTree::NewForcePackNode(const NodeGroup& node_group,
305 const Graph& graph) {
306 // Currently we only support force packing two nodes.
307 DCHECK_EQ(node_group.size(), 2);
308 // We should guarantee that the one node in the NodeGroup does not rely on the
309 // result of the other. Because it is costly to force pack such candidates.
310 // For example, we have four nodes {A, B, C, D} which are connected by input
311 // edges: A <-- B <-- C <-- D. If {B} and {D} are already packed into a
312 // PackNode and we want to force pack {A} and {C}, we need to duplicate {B}
313 // and the result will be {A, B, C}, {B, D}. This increase the cost of
314 // ForcePack so currently we do not support it.
315 if (HasInputDependencies(node_group)) {
316 TRACE("ForcePackNode %s(#%d, #%d) failed due to input dependencies.\n",
317 GetSimdOpcodeName(graph_.Get(node_group[0])).c_str(),
318 node_group[0].id(), node_group[1].id());
319 return nullptr;
320 }
321
322 TRACE("ForcePackNode %s(#%d, #%d)\n",
323 GetSimdOpcodeName(graph_.Get(node_group[0])).c_str(),
324 node_group[0].id(), node_group[1].id());
325 ForcePackNode* pnode =
326 phase_zone_->New<ForcePackNode>(phase_zone_, node_group, type);
327 for (OpIndex node : node_group) {
328 node_to_packnode_[node] = pnode;
329 }
330
331 return pnode;
332}
333
334BundlePackNode* SLPTree::NewBundlePackNode(const NodeGroup& node_group,
335 OpIndex base, int8_t offset,
336 uint8_t lane_size,
337 bool is_sign_extract,
338 bool is_sign_convert) {
339 Operation& op = graph_.Get(node_group[0]);
340 TRACE("PackNode %s(#%d:, #%d)\n", GetSimdOpcodeName(op).c_str(),
341 node_group[0].id(), node_group[1].id());
342 BundlePackNode* pnode = phase_zone_->New<BundlePackNode>(
343 phase_zone_, node_group, base, offset, lane_size, is_sign_extract,
344 is_sign_convert);
345 for (OpIndex node : node_group) {
346 node_to_packnode_[node] = pnode;
347 }
348 return pnode;
349}
350
351PackNode* SLPTree::NewIntersectPackNode(const NodeGroup& node_group) {
352 // Similar as ForcePackNode, dependent inputs are not supported.
353 if (HasInputDependencies(node_group)) {
354 TRACE("IntersectPackNode %s(#%d, #%d) failed due to input dependencies.\n",
355 GetSimdOpcodeName(graph_.Get(node_group[0])).c_str(),
356 node_group[0].id(), node_group[1].id());
357 return nullptr;
358 }
359
360 TRACE("IntersectPackNode %s(#%d, #%d)\n",
361 GetSimdOpcodeName(graph_.Get(node_group[0])).c_str(),
362 node_group[0].id(), node_group[1].id());
363 PackNode* intersect_pnode = phase_zone_->New<PackNode>(
364 phase_zone_, node_group, PackNode::kIntersectPackNode);
365
366 for (int i = 0; i < static_cast<int>(node_group.size()); i++) {
367 OpIndex op_idx = node_group[i];
368 if (i > 0 && op_idx == node_group[0]) continue;
369 auto it = node_to_intersect_packnodes_.find(op_idx);
370 if (it == node_to_intersect_packnodes_.end()) {
371 bool result;
372 std::tie(it, result) = node_to_intersect_packnodes_.emplace(
373 op_idx, ZoneVector<PackNode*>(phase_zone_));
374 DCHECK(result);
375 }
376 it->second.push_back(intersect_pnode);
377 }
378
379 return intersect_pnode;
380}
381
382PackNode* SLPTree::NewCommutativePackNodeAndRecurs(const NodeGroup& node_group,
383 unsigned depth) {
384 PackNode* pnode = NewPackNode(node_group);
385
386 const Simd128BinopOp& op0 = graph_.Get(node_group[0]).Cast<Simd128BinopOp>();
387 const Simd128BinopOp& op1 = graph_.Get(node_group[1]).Cast<Simd128BinopOp>();
388
389 bool same_kind =
390 (op0.left() == op1.left()) ||
391 IsSameOpAndKind(graph_.Get(op0.left()), graph_.Get(op1.left()));
392 bool need_swap = Simd128BinopOp::IsCommutative(op0.kind) && !same_kind;
393 if (need_swap) {
394 TRACE("Change the order of binop operands\n");
395 }
396 for (int i = 0; i < 2; ++i) {
397 // Swap the left and right input if necessary
398 unsigned node1_input_index = need_swap ? 1 - i : i;
399 NodeGroup operands(graph_.Get(node_group[0]).input(i),
400 graph_.Get(node_group[1]).input(node1_input_index));
401
402 PackNode* child = BuildTreeRec(operands, depth + 1);
403 if (child) {
404 pnode->SetOperand(i, child);
405 } else {
406 return nullptr;
407 }
408 }
409 return pnode;
410}
411
412PackNode* SLPTree::NewPackNodeAndRecurs(const NodeGroup& node_group,
413 int start_index, int count,
414 unsigned depth) {
415 PackNode* pnode = NewPackNode(node_group);
416 for (int i = 0; i < count; ++i) {
417 // Prepare the operand vector.
418 int input_index = i + start_index;
419 NodeGroup operands(graph_.Get(node_group[0]).input(input_index),
420 graph_.Get(node_group[1]).input(input_index));
421
422 PackNode* child = BuildTreeRec(operands, depth + 1);
423 if (child) {
424 pnode->SetOperand(i, child);
425 } else {
426 return nullptr;
427 }
428 }
429 return pnode;
430}
431
432ShufflePackNode* SLPTree::NewShufflePackNode(
434 Operation& op = graph_.Get(node_group[0]);
435 TRACE("PackNode %s(#%d:, #%d)\n", GetSimdOpcodeName(op).c_str(),
436 node_group[0].id(), node_group[1].id());
437 ShufflePackNode* pnode =
438 phase_zone_->New<ShufflePackNode>(phase_zone_, node_group, kind);
439 for (OpIndex node : node_group) {
440 node_to_packnode_[node] = pnode;
441 }
442 return pnode;
443}
444
445ShufflePackNode* SLPTree::Try256ShuffleMatchLoad8x8U(
446 const NodeGroup& node_group, const uint8_t* shuffle0,
447 const uint8_t* shuffle1) {
448 uint8_t shuffle_copy0[kSimd128Size];
449 uint8_t shuffle_copy1[kSimd128Size];
450
451 V<Simd128> op_idx0 = node_group[0];
452 V<Simd128> op_idx1 = node_group[1];
453 const Simd128ShuffleOp& op0 = graph_.Get(op_idx0).Cast<Simd128ShuffleOp>();
454 const Simd128ShuffleOp& op1 = graph_.Get(op_idx1).Cast<Simd128ShuffleOp>();
455
456 if (op0.kind != Simd128ShuffleOp::Kind::kI8x16 ||
457 op1.kind != Simd128ShuffleOp::Kind::kI8x16) {
458 return nullptr;
459 }
460
461 if (op0.left() == op0.right() || op1.left() == op1.right()) {
462 // Here shuffle couldn't be swizzle
463 return nullptr;
464 }
465
466 CopyChars(shuffle_copy0, shuffle0, kSimd128Size);
467 CopyChars(shuffle_copy1, shuffle1, kSimd128Size);
468
469 bool need_swap, is_swizzle;
470
471#define CANONICALIZE_SHUFFLE(n) \
472 wasm::SimdShuffle::CanonicalizeShuffle(false, shuffle_copy##n, &need_swap, \
473 &is_swizzle); \
474 if (is_swizzle) { \
475 /* Here shuffle couldn't be swizzle*/ \
476 return nullptr; \
477 } \
478 V<Simd128> shuffle##n##_left_idx = need_swap ? op##n.right() : op##n.left(); \
479 V<Simd128> shuffle##n##_right_idx = need_swap ? op##n.left() : op##n.right();
480
483
484#undef CANONICALIZE_SHUFFLE
485 if (shuffle0_left_idx != shuffle1_left_idx) {
486 // Not the same left
487 return nullptr;
488 }
489
490 const Simd128LoadTransformOp* load_transform =
491 graph_.Get(shuffle0_left_idx).TryCast<Simd128LoadTransformOp>();
492
493 if (!load_transform) {
494 // shuffle left is not Simd128LoadTransformOp
495 return nullptr;
496 }
497
498 Simd128ConstantOp* shuffle0_const =
499 graph_.Get(shuffle0_right_idx).TryCast<Simd128ConstantOp>();
500 Simd128ConstantOp* shuffle1_const =
501 graph_.Get(shuffle1_right_idx).TryCast<Simd128ConstantOp>();
502
503 if (!shuffle0_const || !shuffle1_const || !shuffle0_const->IsZero() ||
504 !shuffle1_const->IsZero()) {
505 // Shuffle right is not zero
506 return nullptr;
507 }
508
509 if (load_transform->transform_kind ==
510 Simd128LoadTransformOp::TransformKind::k64Zero) {
511 /*
512 should look like this:
513 shuffle0 = 0,x,x,x, 1,x,x,x 2,x,x,x 3,x,x,x
514 shuffle1 = 4,x,x,x, 5,x,x,x 6,x,x,x 7,x,x,x
515 x >= 16
516 */
517
518 for (int i = 0; i < kSimd128Size / 4; ++i) {
519 if (shuffle_copy0[i * 4] != i || shuffle_copy1[i * 4] != i + 4) {
520 // not match
521 return nullptr;
522 }
523
524 if (shuffle_copy0[i * 4 + 1] < kSimd128Size ||
525 shuffle_copy0[i * 4 + 2] < kSimd128Size ||
526 shuffle_copy0[i * 4 + 3] < kSimd128Size ||
527 shuffle_copy1[i * 4 + 1] < kSimd128Size ||
528 shuffle_copy1[i * 4 + 2] < kSimd128Size ||
529 shuffle_copy1[i * 4 + 3] < kSimd128Size) {
530 // not match
531 return nullptr;
532 }
533 }
534 TRACE("match load extend 8x8->32x8\n");
535 return NewShufflePackNode(
536 node_group, ShufflePackNode::SpecificInfo::Kind::kS256Load8x8U);
537 }
538 return nullptr;
539}
540
541#ifdef V8_TARGET_ARCH_X64
542ShufflePackNode* SLPTree::X64TryMatch256Shuffle(const NodeGroup& node_group,
543 const uint8_t* shuffle0,
544 const uint8_t* shuffle1) {
545 DCHECK_EQ(node_group.size(), 2);
546 OpIndex op_idx0 = node_group[0];
547 OpIndex op_idx1 = node_group[1];
548 const Simd128ShuffleOp& op0 = graph_.Get(op_idx0).Cast<Simd128ShuffleOp>();
549 const Simd128ShuffleOp& op1 = graph_.Get(op_idx1).Cast<Simd128ShuffleOp>();
550 if (op0.kind != Simd128ShuffleOp::Kind::kI8x16 ||
551 op1.kind != Simd128ShuffleOp::Kind::kI8x16) {
552 return nullptr;
553 }
554
555 uint8_t shuffle8x32[32];
556
557 if (op0.left() == op0.right() && op1.left() == op1.right()) {
558 // shuffles are swizzles
559 for (int i = 0; i < 16; ++i) {
560 shuffle8x32[i] = shuffle0[i] % 16;
561 shuffle8x32[i + 16] = 16 + shuffle1[i] % 16;
562 }
563
564 if (uint8_t shuffle32x8[8];
565 wasm::SimdShuffle::TryMatch32x8Shuffle(shuffle8x32, shuffle32x8)) {
566 uint8_t control;
567 if (wasm::SimdShuffle::TryMatchVpshufd(shuffle32x8, &control)) {
568 ShufflePackNode* pnode = NewShufflePackNode(
569 node_group, ShufflePackNode::SpecificInfo::Kind::kShufd);
570 pnode->info().set_shufd_control(control);
571 return pnode;
572 }
573 }
574 } else if (op0.left() != op0.right() && op1.left() != op1.right()) {
575 // shuffles are not swizzles
576 for (int i = 0; i < 16; ++i) {
577 if (shuffle0[i] < 16) {
578 shuffle8x32[i] = shuffle0[i];
579 } else {
580 shuffle8x32[i] = 16 + shuffle0[i];
581 }
582
583 if (shuffle1[i] < 16) {
584 shuffle8x32[i + 16] = 16 + shuffle1[i];
585 } else {
586 shuffle8x32[i + 16] = 32 + shuffle1[i];
587 }
588 }
589
590 if (const wasm::ShuffleEntry<kSimd256Size>* arch_shuffle;
591 wasm::SimdShuffle::TryMatchArchShuffle(shuffle8x32, false,
592 &arch_shuffle)) {
593 ShufflePackNode::SpecificInfo::Kind kind;
594 switch (arch_shuffle->opcode) {
595 case kX64S32x8UnpackHigh:
596 kind = ShufflePackNode::SpecificInfo::Kind::kS32x8UnpackHigh;
597 break;
598 case kX64S32x8UnpackLow:
599 kind = ShufflePackNode::SpecificInfo::Kind::kS32x8UnpackLow;
600 break;
601 default:
602 UNREACHABLE();
603 }
604 ShufflePackNode* pnode = NewShufflePackNode(node_group, kind);
605 return pnode;
606 } else if (uint8_t shuffle32x8[8]; wasm::SimdShuffle::TryMatch32x8Shuffle(
607 shuffle8x32, shuffle32x8)) {
608 uint8_t control;
609 if (wasm::SimdShuffle::TryMatchShufps256(shuffle32x8, &control)) {
610 ShufflePackNode* pnode = NewShufflePackNode(
611 node_group, ShufflePackNode::SpecificInfo::Kind::kShufps);
612 pnode->info().set_shufps_control(control);
613 return pnode;
614 }
615 }
616 }
617
618 return nullptr;
619}
620#endif // V8_TARGET_ARCH_X64
621
622// Try to match i8x16/i16x8 to f32x4 conversion pattern.
623// The following wasm snippet is an example for load i8x16,
624// extend to i32x4 and convert to f32x4
625// (f32x4.replace_lane 3
626// (f32x4.replace_lane 2
627// (f32x4.replace_lane 1
628// (f32x4.splat
629// (f32.convert_i32_u
630// (i8x16.extract_lane_u 0
631// (local.tee 7
632// (v128.load align=1
633// (local.get 0))))))
634// (f32.convert_i32_u
635// (i8x16.extract_lane_u 1
636// (local.get 7))))
637// (f32.convert_i32_u
638// (i8x16.extract_lane_u 2
639// (local.get 7))))
640// (f32.convert_i32_u
641// (i8x16.extract_lane_u 3
642// (local.get 7))))
643std::optional<SLPTree::ExtendIntToF32x4Info>
644SLPTree::TryGetExtendIntToF32x4Info(OpIndex index) {
645 OpIndex current = index;
646 LaneExtendInfo lane_extend_info[4];
647
648 // Get information for lane 1 to lane 3
649 for (int lane_index = 3; lane_index > 0; lane_index--) {
650 const Simd128ReplaceLaneOp* replace_lane =
651 graph_.Get(current)
652 .TryCast<turboshaft::Opmask::kSimd128ReplaceLaneF32x4>();
653 if (!replace_lane) {
654 TRACE("Mismatch in replace lane\n");
655 return {};
656 }
657 const ChangeOp* change =
658 graph_.Get(replace_lane->new_lane()).TryCast<ChangeOp>();
659 if (!change) {
660 TRACE("Mismatch in type convert\n");
661 return {};
662 }
663 const Simd128ExtractLaneOp* extract_lane =
664 graph_.Get(change->input()).TryCast<Simd128ExtractLaneOp>();
665 if (!extract_lane) {
666 TRACE("Mismatch in extract lane\n");
667 return {};
668 }
669 lane_extend_info[lane_index].replace_lane_index = replace_lane->lane;
670 lane_extend_info[lane_index].change_kind = change->kind;
671 lane_extend_info[lane_index].extract_from = extract_lane->input();
672 lane_extend_info[lane_index].extract_kind = extract_lane->kind;
673 lane_extend_info[lane_index].extract_lane_index = extract_lane->lane;
674
675 current = replace_lane->into();
676 }
677
678 // Get information for lane 0(splat)
679 const Simd128SplatOp* splat = graph_.Get(current).TryCast<Simd128SplatOp>();
680 if (!splat) {
681 TRACE("Mismatch in splat\n");
682 return {};
683 }
684 const ChangeOp* change = graph_.Get(splat->input()).TryCast<ChangeOp>();
685 if (!change) {
686 TRACE("Mismatch in splat type convert\n");
687 return {};
688 }
689 const Simd128ExtractLaneOp* extract_lane =
690 graph_.Get(change->input()).TryCast<Simd128ExtractLaneOp>();
691 if (!extract_lane) {
692 TRACE("Mismatch in splat extract lane\n");
693 return {};
694 }
695 lane_extend_info[0].replace_lane_index = 0;
696 lane_extend_info[0].change_kind = change->kind;
697 lane_extend_info[0].extract_from = extract_lane->input();
698 lane_extend_info[0].extract_kind = extract_lane->kind;
699 lane_extend_info[0].extract_lane_index = extract_lane->lane;
700
701 // Pattern matching for f32x4.convert_i32x4(i32x4.extract_lane)
702 for (int i = 0; i < 4; i++) {
703 if (lane_extend_info[i].replace_lane_index != i) {
704 return {};
705 }
706 if (lane_extend_info[i].change_kind != lane_extend_info[0].change_kind ||
707 (lane_extend_info[i].change_kind != ChangeOp::Kind::kSignedToFloat &&
708 lane_extend_info[i].change_kind != ChangeOp::Kind::kUnsignedToFloat)) {
709 return {};
710 }
711 if (lane_extend_info[i].extract_from != lane_extend_info[0].extract_from) {
712 return {};
713 }
714 if (lane_extend_info[i].extract_kind != lane_extend_info[0].extract_kind ||
715 (lane_extend_info[i].extract_kind !=
716 Simd128ExtractLaneOp::Kind::kI8x16S &&
717 lane_extend_info[i].extract_kind !=
718 Simd128ExtractLaneOp::Kind::kI8x16U &&
719 lane_extend_info[i].extract_kind !=
720 Simd128ExtractLaneOp::Kind::kI16x8S &&
721 lane_extend_info[i].extract_kind !=
722 Simd128ExtractLaneOp::Kind::kI16x8U)) {
723 return {};
724 }
725 if (lane_extend_info[i].extract_lane_index !=
726 lane_extend_info[0].extract_lane_index + i) {
727 return {};
728 }
729 }
730
732 info.extend_from = lane_extend_info[0].extract_from;
733 info.start_lane = lane_extend_info[0].extract_lane_index;
734 if (lane_extend_info[0].extract_kind == Simd128ExtractLaneOp::Kind::kI8x16S ||
735 lane_extend_info[0].extract_kind == Simd128ExtractLaneOp::Kind::kI8x16U) {
736 info.lane_size = 1;
737 } else {
738 info.lane_size = 2;
739 }
740 info.is_sign_extract =
741 lane_extend_info[0].extract_kind == Simd128ExtractLaneOp::Kind::kI8x16S ||
742 lane_extend_info[0].extract_kind == Simd128ExtractLaneOp::Kind::kI16x8S;
743 info.is_sign_convert =
744 lane_extend_info[0].change_kind == ChangeOp::Kind::kSignedToFloat;
745
746 return info;
747}
748
749bool SLPTree::TryMatchExtendIntToF32x4(const NodeGroup& node_group,
750 ExtendIntToF32x4Info* info) {
751 OpIndex node0 = node_group[0];
752 OpIndex node1 = node_group[1];
753 std::optional<ExtendIntToF32x4Info> info0 = TryGetExtendIntToF32x4Info(node0);
754 std::optional<ExtendIntToF32x4Info> info1 = TryGetExtendIntToF32x4Info(node1);
755 if (!info0.has_value() || !info1.has_value()) {
756 return false;
757 }
758
759 if (info0.value().extend_from != info1.value().extend_from ||
760 info0.value().is_sign_extract != info1.value().is_sign_extract ||
761 info0.value().lane_size != info1.value().lane_size ||
762 info0.value().is_sign_convert != info1.value().is_sign_convert) {
763 return false;
764 }
765
766 uint32_t min_lane_index =
767 std::min(info0.value().start_lane, info1.value().start_lane);
768 if (std::abs(info0.value().start_lane - info1.value().start_lane) != 4) {
769 return false;
770 }
771 if (info0.value().lane_size == 1) {
772 if (min_lane_index != 0 && min_lane_index != 8) {
773 return false;
774 }
775 } else {
776 DCHECK_EQ(info0.value().lane_size, 2);
777 if (min_lane_index != 0) {
778 return false;
779 }
780 }
781
782 *info = info0.value();
783 info->start_lane = min_lane_index;
784 return true;
785}
786
787bool SLPTree::IsSideEffectFree(OpIndex first, OpIndex second) {
788 DCHECK_LE(first.offset(), second.offset());
789 if (first == second) return true;
790 OpEffects effects = graph().Get(second).Effects();
791 OpIndex prev_node = graph().PreviousIndex(second);
792 while (prev_node != first) {
793 OpEffects prev_effects = graph().Get(prev_node).Effects();
794 if ((graph().Get(second).IsProtectedLoad() &&
795 graph().Get(prev_node).IsProtectedLoad())
796 ? CannotSwapProtectedLoads(prev_effects, effects)
797 : CannotSwapOperations(prev_effects, effects)) {
798 TRACE("break side effect %d, %d\n", prev_node.id(), second.id());
799 return false;
800 }
801 prev_node = graph().PreviousIndex(prev_node);
802 }
803 return true;
804}
805
807 if (const Simd128UnaryOp* unop = op.TryCast<Simd128UnaryOp>()) {
808 return unop->kind >= Simd128UnaryOp::Kind::kFirstSignExtensionOp &&
809 unop->kind <= Simd128UnaryOp::Kind::kLastSignExtensionOp;
810 } else if (const Simd128BinopOp* binop = op.TryCast<Simd128BinopOp>()) {
811 return binop->kind >= Simd128BinopOp::Kind::kFirstSignExtensionOp &&
812 binop->kind <= Simd128BinopOp::Kind::kLastSignExtensionOp;
813 }
814 return false;
815}
816
817bool SLPTree::CanBePacked(const NodeGroup& node_group) {
818 OpIndex node0 = node_group[0];
819 OpIndex node1 = node_group[1];
820 Operation& op0 = graph_.Get(node0);
821 Operation& op1 = graph_.Get(node1);
822
823 if (op0.opcode != op1.opcode) {
824 TRACE("Different opcode\n");
825 return false;
826 }
827
828 if (graph().BlockIndexOf(node0) != graph().BlockIndexOf(node1)) {
829 TRACE("Can't pack operations of different basic block\n");
830 return false;
831 }
832
833 auto is_sign_ext = IsSignExtensionOp(op0) && IsSignExtensionOp(op1);
834
835 if (!is_sign_ext && !IsSameOpAndKind(op0, op1)) {
836 TRACE("(%s, %s) have different op\n", GetSimdOpcodeName(op0).c_str(),
837 GetSimdOpcodeName(op1).c_str());
838 return false;
839 }
840
841 if (node0.offset() <= node1.offset() ? !IsSideEffectFree(node0, node1)
842 : !IsSideEffectFree(node1, node0)) {
843 TRACE("Break side effect\n");
844 return false;
845 }
846 return true;
847}
848
849bool SLPTree::IsEqual(const OpIndex node0, const OpIndex node1) {
850 if (node0 == node1) return true;
851 if (const ConstantOp* const0 = graph_.Get(node0).TryCast<ConstantOp>()) {
852 if (const ConstantOp* const1 = graph_.Get(node1).TryCast<ConstantOp>()) {
853 return *const0 == *const1;
854 }
855 }
856 return false;
857}
858
859PackNode* SLPTree::BuildTree(const NodeGroup& roots) {
860 root_ = BuildTreeRec(roots, 0);
861 return root_;
862}
863
864bool IsLoadExtend(const Simd128LoadTransformOp& op) {
865 switch (op.transform_kind) {
866 case Simd128LoadTransformOp::TransformKind::k8x8S:
867 case Simd128LoadTransformOp::TransformKind::k8x8U:
868 case Simd128LoadTransformOp::TransformKind::k16x4S:
869 case Simd128LoadTransformOp::TransformKind::k16x4U:
870 case Simd128LoadTransformOp::TransformKind::k32x2S:
871 case Simd128LoadTransformOp::TransformKind::k32x2U:
872 return true;
873 default:
874 return false;
875 }
876}
877
878bool IsLoadSplat(const Simd128LoadTransformOp& op) {
879 switch (op.transform_kind) {
880 case Simd128LoadTransformOp::TransformKind::k8Splat:
881 case Simd128LoadTransformOp::TransformKind::k16Splat:
882 case Simd128LoadTransformOp::TransformKind::k32Splat:
883 case Simd128LoadTransformOp::TransformKind::k64Splat:
884 return true;
885 default:
886 return false;
887 }
888}
889
890PackNode* SLPTree::BuildTreeRec(const NodeGroup& node_group,
891 unsigned recursion_depth) {
892 DCHECK_EQ(node_group.size(), 2);
893
894 OpIndex node0 = node_group[0];
895 OpIndex node1 = node_group[1];
896 Operation& op0 = graph_.Get(node0);
897 Operation& op1 = graph_.Get(node1);
898
899 if (recursion_depth == RecursionMaxDepth) {
900 TRACE("Failed due to max recursion depth!\n");
901 return nullptr;
902 }
903
904 if (!CanBePacked(node_group)) {
905 return nullptr;
906 }
907
908 // Check if this is a duplicate of another entry.
909 bool is_intersected = false;
910 // For revisited node_group, we only need to match from node0.
911 if (PackNode* pnode = GetPackNode(node0)) {
912 const Operation& op = graph_.Get(node0);
913 if (pnode->IsSame(node_group)) {
914 TRACE("Perfect diamond merge at #%d,%s\n", node0.id(),
915 GetSimdOpcodeName(op).c_str());
916 return pnode;
917 }
918
919 // TODO(yolanda): Support other intersect PackNode e.g. overlapped loads.
920 if (!pnode->IsForcePackNode() || recursion_depth < 1) {
921 TRACE("Unsupported partial overlap at #%d,%s!\n", node0.id(),
922 GetSimdOpcodeName(op).c_str());
923 return nullptr;
924 }
925
926 // Match intersect packnodes from current tree.
927 if (auto intersect_packnodes = GetIntersectPackNodes(node0)) {
928 for (auto intersect_pnode : *intersect_packnodes) {
929 if (intersect_pnode->IsSame(node_group)) {
930 TRACE("Perfect diamond merge at intersect pack node #%d,%s, #%d\n",
931 node0.id(), GetSimdOpcodeName(op).c_str(), node1.id());
932 return intersect_pnode;
933 }
934 }
935 }
936
937 // Match intersect packnodes from analyzer
938 if (auto intersect_packnodes = analyzer_->GetIntersectPackNodes(node0)) {
939 for (auto intersect_pnode : *intersect_packnodes) {
940 if (intersect_pnode->IsSame(node_group)) {
941 TRACE("Perfect diamond merge at intersect pack node #%d,%s, #%d\n",
942 node0.id(), GetSimdOpcodeName(op).c_str(), node1.id());
943 return intersect_pnode;
944 }
945 }
946 }
947
948 is_intersected = true;
949 TRACE("Partial overlap at #%d,%s!\n", node0.id(),
950 GetSimdOpcodeName(op).c_str());
951 }
952
953 // Catch overlapped PackNode on the other nodes.
954 if (!is_intersected) {
955 for (int i = 1; i < static_cast<int>(node_group.size()); i++) {
956 const OpIndex op_idx = node_group[i];
957 const Operation& op = graph_.Get(op_idx);
958 if (auto pnode = GetPackNode(op_idx)) {
959 if (!pnode->IsForcePackNode()) {
960 TRACE("Unsupported partial overlap at #%d,%s!\n", op_idx.id(),
961 GetSimdOpcodeName(op).c_str());
962 return nullptr;
963 }
964 } else if (!GetIntersectPackNodes(op_idx) &&
965 !analyzer_->GetIntersectPackNodes(op_idx)) {
966 continue;
967 }
968
969 is_intersected = true;
970 TRACE("Partial overlap at #%d,%s!\n", op_idx.id(),
971 GetSimdOpcodeName(op).c_str());
972 break;
973 }
974 }
975
976 if (is_intersected) {
977 TRACE("Create IntersectPackNode due to partial overlap!\n");
978 PackNode* pnode = NewIntersectPackNode(node_group);
979 return pnode;
980 }
981
982 int value_in_count = op0.input_count;
983
984 switch (op0.opcode) {
985 case Opcode::kSimd128Constant: {
986 PackNode* p = NewPackNode(node_group);
987 return p;
988 }
989
990 case Opcode::kSimd128LoadTransform: {
991 const Simd128LoadTransformOp& transform_op0 =
992 op0.Cast<Simd128LoadTransformOp>();
993 const Simd128LoadTransformOp& transform_op1 =
994 op1.Cast<Simd128LoadTransformOp>();
995 StoreLoadInfo<Simd128LoadTransformOp> info0(&graph_, &transform_op0);
996 StoreLoadInfo<Simd128LoadTransformOp> info1(&graph_, &transform_op1);
997 auto stride = info1 - info0;
998 if (IsLoadSplat(transform_op0)) {
999 TRACE("Simd128LoadTransform: LoadSplat\n");
1000 if (IsSplat(node_group) ||
1001 (stride.has_value() && stride.value() == 0)) {
1002 return NewPackNode(node_group);
1003 }
1004 return NewForcePackNode(node_group, ForcePackNode::kGeneral, graph_);
1005 } else if (IsLoadExtend(transform_op0)) {
1006 TRACE("Simd128LoadTransform: LoadExtend\n");
1007 if (stride.has_value()) {
1008 const int value = stride.value();
1009 if (value == kSimd128Size / 2) {
1010 return NewPackNode(node_group);
1011 } else if (value == 0) {
1012 return NewForcePackNode(node_group, ForcePackNode::kSplat, graph_);
1013 }
1014 }
1015 return NewForcePackNode(node_group, ForcePackNode::kGeneral, graph_);
1016 } else {
1017 TRACE("Load Transfrom k64Zero/k32Zero!\n");
1018 DCHECK(transform_op0.transform_kind ==
1019 Simd128LoadTransformOp::TransformKind::k32Zero ||
1020 transform_op0.transform_kind ==
1021 Simd128LoadTransformOp::TransformKind::k64Zero);
1022 if (stride.has_value() && stride.value() == 0) {
1023 return NewForcePackNode(node_group, ForcePackNode::kSplat, graph_);
1024 }
1025 return NewForcePackNode(node_group, ForcePackNode::kGeneral, graph_);
1026 }
1027 }
1028
1029 case Opcode::kLoad: {
1030 TRACE("Load leaf node\n");
1031 const LoadOp& load0 = op0.Cast<LoadOp>();
1032 const LoadOp& load1 = op1.Cast<LoadOp>();
1033 if (load0.loaded_rep != MemoryRepresentation::Simd128() ||
1034 load1.loaded_rep != MemoryRepresentation::Simd128()) {
1035 TRACE("Failed due to non-simd load representation!\n");
1036 return nullptr;
1037 }
1038 StoreLoadInfo<LoadOp> info0(&graph_, &load0);
1039 StoreLoadInfo<LoadOp> info1(&graph_, &load1);
1040 auto stride = info1 - info0;
1041 if (stride.has_value()) {
1042 if (const int value = stride.value(); value == kSimd128Size) {
1043 // TODO(jiepan) Sort load
1044 return NewPackNode(node_group);
1045 } else if (value == 0) {
1046 return NewForcePackNode(node_group, ForcePackNode::kSplat, graph_);
1047 }
1048 }
1049 return NewForcePackNode(node_group, ForcePackNode::kGeneral, graph_);
1050 }
1051 case Opcode::kStore: {
1052 TRACE("Added a vector of stores.\n");
1053 // input: base, value, [index]
1054 PackNode* pnode = NewPackNodeAndRecurs(node_group, 1, 1, recursion_depth);
1055 return pnode;
1056 }
1057 case Opcode::kPhi: {
1058 TRACE("Added a vector of phi nodes.\n");
1059 const PhiOp& phi = graph().Get(node0).Cast<PhiOp>();
1060 if (phi.rep != RegisterRepresentation::Simd128() ||
1061 op0.input_count != op1.input_count) {
1062 TRACE("Failed due to invalid phi\n");
1063 return nullptr;
1064 }
1065 PackNode* pnode =
1066 NewPackNodeAndRecurs(node_group, 0, value_in_count, recursion_depth);
1067 return pnode;
1068 }
1069 case Opcode::kSimd128Unary: {
1070#define UNARY_CASE(op_128, not_used) case Simd128UnaryOp::Kind::k##op_128:
1071#define UNARY_SIGN_EXTENSION_CASE(op_low, not_used1, op_high) \
1072 case Simd128UnaryOp::Kind::k##op_low: { \
1073 if (const Simd128UnaryOp* unop1 = \
1074 op1.TryCast<Opmask::kSimd128##op_high>(); \
1075 unop1 && op0.Cast<Simd128UnaryOp>().input() == unop1->input()) { \
1076 return NewPackNode(node_group); \
1077 } \
1078 [[fallthrough]]; \
1079 } \
1080 case Simd128UnaryOp::Kind::k##op_high: { \
1081 if (op1.Cast<Simd128UnaryOp>().kind == op0.Cast<Simd128UnaryOp>().kind) { \
1082 auto force_pack_type = \
1083 node0 == node1 ? ForcePackNode::kSplat : ForcePackNode::kGeneral; \
1084 return NewForcePackNode(node_group, force_pack_type, graph_); \
1085 } else { \
1086 return nullptr; \
1087 } \
1088 }
1089 switch (op0.Cast<Simd128UnaryOp>().kind) {
1092 TRACE("Added a vector of Unary\n");
1093 PackNode* pnode = NewPackNodeAndRecurs(node_group, 0, value_in_count,
1095 return pnode;
1096 }
1097 default: {
1098 TRACE("Unsupported Simd128Unary: %s\n",
1099 GetSimdOpcodeName(op0).c_str());
1100 return nullptr;
1101 }
1102 }
1103#undef UNARY_CASE
1104#undef UNARY_SIGN_EXTENSION_CASE
1105 }
1106 case Opcode::kSimd128Binop: {
1107#define BINOP_CASE(op_128, not_used) case Simd128BinopOp::Kind::k##op_128:
1108#define BINOP_SIGN_EXTENSION_CASE(op_low, not_used1, op_high) \
1109 case Simd128BinopOp::Kind::k##op_low: { \
1110 if (const Simd128BinopOp* binop1 = \
1111 op1.TryCast<Opmask::kSimd128##op_high>(); \
1112 binop1 && op0.Cast<Simd128BinopOp>().left() == binop1->left() && \
1113 op0.Cast<Simd128BinopOp>().right() == binop1->right()) { \
1114 return NewPackNode(node_group); \
1115 } \
1116 [[fallthrough]]; \
1117 } \
1118 case Simd128BinopOp::Kind::k##op_high: { \
1119 if (op1.Cast<Simd128BinopOp>().kind == op0.Cast<Simd128BinopOp>().kind) { \
1120 auto force_pack_type = \
1121 node0 == node1 ? ForcePackNode::kSplat : ForcePackNode::kGeneral; \
1122 return NewForcePackNode(node_group, force_pack_type, graph_); \
1123 } else { \
1124 return nullptr; \
1125 } \
1126 }
1127 switch (op0.Cast<Simd128BinopOp>().kind) {
1130 TRACE("Added a vector of Binop\n");
1131 PackNode* pnode =
1132 NewCommutativePackNodeAndRecurs(node_group, recursion_depth);
1133 return pnode;
1134 }
1135 default: {
1136 TRACE("Unsupported Simd128Binop: %s\n",
1137 GetSimdOpcodeName(op0).c_str());
1138 return nullptr;
1139 }
1140 }
1141#undef BINOP_CASE
1142#undef BINOP_SIGN_EXTENSION_CASE
1143 }
1144 case Opcode::kSimd128Shift: {
1145 Simd128ShiftOp& shift_op0 = op0.Cast<Simd128ShiftOp>();
1146 Simd128ShiftOp& shift_op1 = op1.Cast<Simd128ShiftOp>();
1147 if (IsEqual(shift_op0.shift(), shift_op1.shift())) {
1148 switch (op0.Cast<Simd128ShiftOp>().kind) {
1149#define SHIFT_CASE(op_128, not_used) case Simd128ShiftOp::Kind::k##op_128:
1151 TRACE("Added a vector of Shift op.\n");
1152 // We've already checked that the "shift by" input of both shifts is
1153 // the same, and we'll only pack the 1st input of the shifts
1154 // together anyways (since on both Simd128 and Simd256, the "shift
1155 // by" input of shifts is a Word32). Thus we only need to check the
1156 // 1st input of the shift when recursing.
1157 constexpr int kShiftValueInCount = 1;
1158 PackNode* pnode = NewPackNodeAndRecurs(
1159 node_group, 0, kShiftValueInCount, recursion_depth);
1160 return pnode;
1161 }
1162#undef SHIFT_CASE
1163 default: {
1164 TRACE("Unsupported Simd128ShiftOp: %s\n",
1165 GetSimdOpcodeName(op0).c_str());
1166 return nullptr;
1167 }
1168 }
1169 }
1170 TRACE("Failed due to SimdShiftOp kind or shift scalar is different!\n");
1171 return nullptr;
1172 }
1173 case Opcode::kSimd128Ternary: {
1174#define TERNARY_CASE(op_128, not_used) case Simd128TernaryOp::Kind::k##op_128:
1175 switch (op0.Cast<Simd128TernaryOp>().kind) {
1177 TRACE("Added a vector of Ternary\n");
1178 PackNode* pnode = NewPackNodeAndRecurs(node_group, 0, value_in_count,
1180 return pnode;
1181 }
1182#undef TERNARY_CASE
1183 default: {
1184 TRACE("Unsupported Simd128Ternary: %s\n",
1185 GetSimdOpcodeName(op0).c_str());
1186 return nullptr;
1187 }
1188 }
1189 }
1190
1191 case Opcode::kSimd128Splat: {
1192 if (op0.input(0) != op1.input(0)) {
1193 TRACE("Failed due to different splat input!\n");
1194 return nullptr;
1195 }
1196 PackNode* pnode = NewPackNode(node_group);
1197 return pnode;
1198 }
1199
1200 case Opcode::kSimd128Shuffle: {
1201 const auto& shuffle_op0 = op0.Cast<Simd128ShuffleOp>();
1202 const auto& shuffle_op1 = op1.Cast<Simd128ShuffleOp>();
1203 if (shuffle_op0.kind != Simd128ShuffleOp::Kind::kI8x16 ||
1204 shuffle_op1.kind != Simd128ShuffleOp::Kind::kI8x16) {
1205 return nullptr;
1206 }
1207 // We pack shuffles only if it can match specific patterns. We should
1208 // avoid packing general shuffles because it will cause regression.
1209 const auto& shuffle0 = shuffle_op0.shuffle;
1210 const auto& shuffle1 = shuffle_op1.shuffle;
1211
1212 if (CompareCharsEqual(shuffle0, shuffle1, kSimd128Size)) {
1213 if (IsSplat(node_group)) {
1214 // Check if the shuffle can be replaced by a loadsplat.
1215 // Take load32splat as an example:
1216 // 1. Param0 # be used as load base
1217 // 2. Param1 # be used as load index
1218 // 3. Param2 # be used as store base
1219 // 4. Param3 # be used as store index
1220 // 5. Load128(base, index, offset=0)
1221 // 6. AnyOp
1222 // 7. Shuffle32x4 (1,2, [2,2,2,2])
1223 // 8. Store128(3,4,7, offset=0)
1224 // 9. Store128(3,4,7, offset=16)
1225 //
1226 // We can replace the load128 and shuffle with a loadsplat32:
1227 // 1. Param0 # be used as load base
1228 // 2. Param1 # be used as load index
1229 // 3. Param2 # be used as store base
1230 // 4. Param3 # be used as store index
1231 // 5. Load32Splat256(base, index, offset=4)
1232 // 6. Store256(3,4,7,offset=0)
1233 int index;
1234 if (wasm::SimdShuffle::TryMatchSplat<4>(shuffle0, &index) &&
1235 graph_.Get(op0.input(index >> 2)).opcode == Opcode::kLoad) {
1236 ShufflePackNode* pnode = NewShufflePackNode(
1237 node_group,
1238 ShufflePackNode::SpecificInfo::Kind::kS256Load32Transform);
1239 pnode->info().set_splat_index(index);
1240 return pnode;
1241 } else if (wasm::SimdShuffle::TryMatchSplat<2>(shuffle0, &index) &&
1242 graph_.Get(op0.input(index >> 1)).opcode ==
1243 Opcode::kLoad) {
1244 ShufflePackNode* pnode = NewShufflePackNode(
1245 node_group,
1246 ShufflePackNode::SpecificInfo::Kind::kS256Load64Transform);
1247 pnode->info().set_splat_index(index);
1248 return pnode;
1249 }
1250 }
1251
1252#ifdef V8_TARGET_ARCH_X64
1253 if (ShufflePackNode* pnode =
1254 X64TryMatch256Shuffle(node_group, shuffle0, shuffle1)) {
1255 // Manually invoke recur build tree for shuffle node
1256 for (int i = 0; i < value_in_count; ++i) {
1257 NodeGroup operands(graph_.Get(node_group[0]).input(i),
1258 graph_.Get(node_group[1]).input(i));
1259
1260 PackNode* child = BuildTreeRec(operands, recursion_depth + 1);
1261 if (child) {
1262 pnode->SetOperand(i, child);
1263 } else {
1264 return nullptr;
1265 }
1266 }
1267 return pnode;
1268 }
1269#endif // V8_TARGET_ARCH_X64
1270
1271 TRACE("Unsupported Simd128Shuffle\n");
1272 return nullptr;
1273 } else {
1274 return Try256ShuffleMatchLoad8x8U(node_group, shuffle0, shuffle1);
1275 }
1276 }
1277
1278 case Opcode::kSimd128ReplaceLane: {
1280 if (TryMatchExtendIntToF32x4(node_group, &info)) {
1281 TRACE("Match extend i8x4/i16x4 to f32x4\n");
1282 PackNode* p = NewBundlePackNode(
1283 node_group, info.extend_from, info.start_lane, info.lane_size,
1284 info.is_sign_extract, info.is_sign_convert);
1285 return p;
1286 }
1287 if (recursion_depth < 1) {
1288 TRACE("Do not force pack at root #%d:%s\n", node0.id(),
1289 GetSimdOpcodeName(op0).c_str());
1290 return nullptr;
1291 }
1292 return NewForcePackNode(
1293 node_group,
1294 node0 == node1 ? ForcePackNode::kSplat : ForcePackNode::kGeneral,
1295 graph_);
1296 }
1297
1298 default:
1299 TRACE("Default branch #%d:%s\n", node0.id(),
1300 GetSimdOpcodeName(op0).c_str());
1301 break;
1302 }
1303 return nullptr;
1304}
1305
1306void WasmRevecAnalyzer::MergeSLPTree(SLPTree& slp_tree) {
1307 // We ensured the SLP trees are mergable when BuildTreeRec.
1308 for (const auto& entry : slp_tree.GetIntersectNodeMapping()) {
1309 auto it = revectorizable_intersect_node_.find(entry.first);
1310 if (it == revectorizable_intersect_node_.end()) {
1311 bool result;
1312 std::tie(it, result) = revectorizable_intersect_node_.emplace(
1313 entry.first, ZoneVector<PackNode*>(phase_zone_));
1314 DCHECK(result);
1315 }
1316 ZoneVector<PackNode*>& intersect_pnodes = it->second;
1317 intersect_pnodes.insert(intersect_pnodes.end(), entry.second.begin(),
1318 entry.second.end());
1319 SLOW_DCHECK(std::unique(intersect_pnodes.begin(), intersect_pnodes.end()) ==
1320 intersect_pnodes.end());
1321 }
1322
1323 revectorizable_node_.merge(slp_tree.GetNodeMapping());
1324}
1325
1326bool WasmRevecAnalyzer::IsSupportedReduceSeed(const Operation& op) {
1327 if (!op.Is<Simd128BinopOp>()) {
1328 return false;
1329 }
1330 switch (op.Cast<Simd128BinopOp>().kind) {
1331#define CASE(op_128) case Simd128BinopOp::Kind::k##op_128:
1332 REDUCE_SEED_KIND(CASE) { return true; }
1333 default:
1334 return false;
1335 }
1336#undef CASE
1337}
1338
1339void WasmRevecAnalyzer::ProcessBlock(const Block& block) {
1340 StoreInfoSet simd128_stores(phase_zone_);
1341 for (const Operation& op : base::Reversed(graph_.operations(block))) {
1342 if (const StoreOp* store_op = op.TryCast<StoreOp>()) {
1343 if (store_op->stored_rep == MemoryRepresentation::Simd128()) {
1345 if (info.IsValid()) {
1346 simd128_stores.insert(info);
1347 }
1348 }
1349 }
1350 // Try to find reduce op which can be used as revec seeds.
1351 if (IsSupportedReduceSeed(op)) {
1352 const Simd128BinopOp& binop = op.Cast<Simd128BinopOp>();
1353 V<Simd128> left_index = binop.left();
1354 V<Simd128> right_index = binop.right();
1355 const Operation& left_op = graph_.Get(left_index);
1356 const Operation& right_op = graph_.Get(right_index);
1357
1358 if (left_index != right_index && left_op.opcode == right_op.opcode &&
1359 IsSameOpAndKind(left_op, right_op)) {
1360 reduce_seeds_.push_back({left_index, right_index});
1361 }
1362 }
1363 }
1364
1365 if (simd128_stores.size() >= 2) {
1366 for (auto it = std::next(simd128_stores.cbegin()),
1367 end = simd128_stores.cend();
1368 it != end;) {
1369 const StoreLoadInfo<StoreOp>& info0 = *std::prev(it);
1370 const StoreLoadInfo<StoreOp>& info1 = *it;
1371 auto diff = info1 - info0;
1372
1373 if (diff.has_value()) {
1374 const int value = diff.value();
1375 DCHECK_GE(value, 0);
1376 if (value == kSimd128Size) {
1377 store_seeds_.push_back(
1378 {graph_.Index(*info0.op()), graph_.Index(*info1.op())});
1379 if (std::distance(it, end) < 2) {
1380 break;
1381 }
1382 std::advance(it, 2);
1383 continue;
1384 }
1385 }
1386 it++;
1387 }
1388 }
1389}
1390
1391void WasmRevecAnalyzer::Run() {
1392 for (Block& block : base::Reversed(graph_.blocks())) {
1393 ProcessBlock(block);
1394 }
1395
1396 if (store_seeds_.empty() && reduce_seeds_.empty()) {
1397 TRACE("Empty seed\n");
1398 return;
1399 }
1400
1401 if (v8_flags.trace_wasm_revectorize) {
1402 PrintF("store seeds:\n");
1403 for (auto pair : store_seeds_) {
1404 PrintF("{\n");
1405 PrintF("#%u ", pair.first.id());
1406 graph_.Get(pair.first).Print();
1407 PrintF("#%u ", pair.second.id());
1408 graph_.Get(pair.second).Print();
1409 PrintF("}\n");
1410 }
1411
1412 PrintF("reduce seeds:\n");
1413 for (auto pair : reduce_seeds_) {
1414 PrintF("{ ");
1415 PrintF("#%u, ", pair.first.id());
1416 PrintF("#%u ", pair.second.id());
1417 PrintF("}\n");
1418 }
1419 }
1420
1422 store_seeds_.begin(), store_seeds_.end(), phase_zone_);
1423 all_seeds.insert(all_seeds.end(), reduce_seeds_.begin(), reduce_seeds_.end());
1424
1425 for (auto pair : all_seeds) {
1426 NodeGroup roots(pair.first, pair.second);
1427 SLPTree slp_tree(graph_, this, phase_zone_);
1428 PackNode* root = slp_tree.BuildTree(roots);
1429 if (!root) {
1430 TRACE("Build tree failed!\n");
1431 continue;
1432 }
1433
1434 slp_tree.Print("After build tree");
1435 MergeSLPTree(slp_tree);
1436 }
1437
1438 // Early exist when no revectorizable node found.
1439 if (revectorizable_node_.empty()) return;
1440
1441 // Build SIMD usemap
1442 use_map_ = phase_zone_->New<SimdUseMap>(graph_, phase_zone_);
1443 if (!DecideVectorize()) {
1444 revectorizable_node_.clear();
1445 revectorizable_intersect_node_.clear();
1446 } else {
1447 should_reduce_ = true;
1448 Print("Decide to vectorize");
1449 }
1450}
1451
1452bool WasmRevecAnalyzer::DecideVectorize() {
1453 TRACE("Enter %s\n", __func__);
1454 int save = 0, cost = 0;
1455 ForEach(
1456 [&](PackNode const* pnode) {
1457 const NodeGroup& nodes = pnode->nodes();
1458 // An additional store is emitted in case of OOB trap at the higher
1459 // 128-bit address. Thus no save if the store at lower address is
1460 // executed first. Return directly as we dont need to check external use
1461 // for stores.
1462 if (graph_.Get(nodes[0]).opcode == Opcode::kStore) {
1463 if (nodes[0] > nodes[1]) save++;
1464 return;
1465 }
1466
1467 if (pnode->IsForcePackNode()) {
1468 cost++;
1469 return;
1470 }
1471
1472 // Splat nodes will not cause a saving as it simply extends itself.
1473 if (!IsSplat(nodes)) {
1474 save++;
1475 }
1476
1477#ifdef V8_TARGET_ARCH_X64
1478 // On x64 platform, we dont emit extract for lane 0 as the source ymm
1479 // register is alias to the corresponding xmm register in lower 128-bit.
1480 for (int i = 1; i < static_cast<int>(nodes.size()); i++) {
1481 if (nodes[i] == nodes[0]) continue;
1482#else
1483 for (int i = 0; i < static_cast<int>(nodes.size()); i++) {
1484 if (i > 0 && nodes[i] == nodes[0]) continue;
1485#endif // V8_TARGET_ARCH_X64
1486
1487 for (auto use : use_map_->uses(nodes[i])) {
1488 if (!GetPackNode(use) || GetPackNode(use)->is_force_packing()) {
1489 TRACE("External use edge: (%d:%s) -> (%d:%s)\n", use.id(),
1490 OpcodeName(graph_.Get(use).opcode), nodes[i].id(),
1491 OpcodeName(graph_.Get(nodes[i]).opcode));
1492 ++cost;
1493
1494 // We only need one Extract node and all other uses can share.
1495 break;
1496 }
1497 }
1498 }
1499 },
1500 revectorizable_node_);
1501
1502 ForEach(
1503 [&](PackNode const* pnode) {
1504 // We always generate SimdPack128To256Op for IntersectPackNode.
1505 cost++;
1506 return;
1507 },
1508 revectorizable_intersect_node_);
1509
1510 TRACE("Save: %d, cost: %d\n", save, cost);
1511 return save > cost;
1512}
1513
1514void WasmRevecAnalyzer::Print(const char* info) {
1515 if (!v8_flags.trace_wasm_revectorize) {
1516 return;
1517 }
1518
1519 TRACE("%s, %zu revectorizable nodes:\n", info, revectorizable_node_.size());
1520 ForEach([this](PackNode const* pnode) { pnode->Print(&graph_); },
1521 revectorizable_node_);
1522 TRACE("%s, %zu revectorizable intersect nodes:\n", info,
1523 revectorizable_intersect_node_.size());
1524 ForEach([this](PackNode const* pnode) { pnode->Print(&graph_); },
1525 revectorizable_intersect_node_);
1526}
1527
1528} // namespace v8::internal::compiler::turboshaft
TFGraph * graph
Builtins::Kind kind
Definition builtins.cc:40
#define SLOW_DCHECK(condition)
Definition checks.h:21
T * insert(const T *pos, It first, It last)
constexpr uint32_t id() const
Definition index.h:61
void SetOperand(int index, PackNode *pnode)
ZoneUnorderedMap< OpIndex, ZoneVector< PackNode * > > & GetIntersectNodeMapping()
PackNode * BuildTree(const NodeGroup &roots)
ZoneUnorderedMap< OpIndex, PackNode * > & GetNodeMapping()
std::optional< int > operator-(const StoreLoadInfo< Op > &rhs) const
Operand const offset_
Register const index_
int start
uint32_t count
Handle< SharedFunctionInfo > info
int end
ZoneLinkedList< BFEntry > nodes_
OptionalOpIndex index
int32_t offset
TNode< Object > callback
Node * node
double second
ZoneVector< RpoNumber > & result
std::string GetSimdOpcodeName(Operation const &op)
void ForEach(FunctionType callback, const ZoneUnorderedMap< OpIndex, PackNode * > &node_map)
V8_INLINE const Operation & Get(const Graph &graph, OpIndex index)
Definition graph.h:1231
const char * OpcodeName(Opcode opcode)
bool IsSameOpAndKind(const Operation &op0, const Operation &op1)
bool IsLoadExtend(const Simd128LoadTransformOp &op)
bool CannotSwapProtectedLoads(OpEffects first, OpEffects second)
Definition operations.h:858
bool CannotSwapOperations(OpEffects first, OpEffects second)
Definition operations.h:854
bool IsLoadSplat(const Simd128LoadTransformOp &op)
bool LoadStrideEqualTo(const Graph &graph, const NodeGroup &node_group, int stride)
constexpr int kSimd128Size
Definition globals.h:706
void PrintF(const char *format,...)
Definition utils.cc:39
bool CompareCharsEqual(const lchar *lhs, const rchar *rhs, size_t chars)
Definition utils.h:509
void CopyChars(DstType *dst, const SrcType *src, size_t count) V8_NONNULL(1
V8_EXPORT_PRIVATE FlagValues v8_flags
uint32_t recursion_depth
#define I(name, number_of_args, result_size)
Definition runtime.cc:36
#define UNREACHABLE()
Definition logging.h:67
#define DCHECK_LE(v1, v2)
Definition logging.h:490
#define DCHECK_GE(v1, v2)
Definition logging.h:488
#define DCHECK(condition)
Definition logging.h:482
#define DCHECK_LT(v1, v2)
Definition logging.h:489
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
void PrintOptions(std::ostream &os) const
V8_INLINE OpIndex input(size_t i) const
Definition operations.h:959
base::Vector< const OpIndex > inputs() const
const underlying_operation_t< Op > * TryCast() const
Definition operations.h:990
underlying_operation_t< Op > & Cast()
Definition operations.h:980
bool operator()(const StoreLoadInfo< StoreOp > &lhs, const StoreLoadInfo< StoreOp > &rhs) const
TFGraph * graph_
wasm::ValueType type
#define BINOP_CASE(op_128, not_used)
#define UNARY_CASE(op_128, not_used)
#define TERNARY_CASE(op_128, not_used)
#define SHIFT_CASE(op_128, not_used)
#define TRACE(...)
#define BINOP_SIGN_EXTENSION_CASE(op_low, not_used1, op_high)
#define UNARY_SIGN_EXTENSION_CASE(op_low, not_used1, op_high)
#define CANONICALIZE_SHUFFLE(n)
#define CASE(operation)
#define SIMD256_BINOP_SIGN_EXTENSION_OP(V)
#define SIMD256_UNARY_SIGN_EXTENSION_OP(V)
#define REDUCE_SEED_KIND(V)
#define SIMD256_SHIFT_OP(V)
#define SIMD256_TERNARY_OP(V)
#define SIMD256_BINOP_SIMPLE_OP(V)
#define SIMD256_UNARY_SIMPLE_OP(V)