v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
revectorizer.cc
Go to the documentation of this file.
1// Copyright 2022 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
6
7#include "src/base/cpu.h"
8#include "src/base/logging.h"
18
19namespace v8 {
20namespace internal {
21namespace compiler {
22
23#define TRACE(...) \
24 do { \
25 if (v8_flags.trace_wasm_revectorize) { \
26 PrintF("Revec: "); \
27 PrintF(__VA_ARGS__); \
28 } \
29 } while (false)
30
31namespace {
32
33#define SIMPLE_SIMD_OP(V) \
34 V(F64x2Add, F64x4Add) \
35 V(F32x4Add, F32x8Add) \
36 V(I64x2Add, I64x4Add) \
37 V(I32x4Add, I32x8Add) \
38 V(I16x8Add, I16x16Add) \
39 V(I8x16Add, I8x32Add) \
40 V(F64x2Sub, F64x4Sub) \
41 V(F32x4Sub, F32x8Sub) \
42 V(I64x2Sub, I64x4Sub) \
43 V(I32x4Sub, I32x8Sub) \
44 V(I16x8Sub, I16x16Sub) \
45 V(I8x16Sub, I8x32Sub) \
46 V(F64x2Mul, F64x4Mul) \
47 V(F32x4Mul, F32x8Mul) \
48 V(I64x2Mul, I64x4Mul) \
49 V(I32x4Mul, I32x8Mul) \
50 V(I16x8Mul, I16x16Mul) \
51 V(F64x2Div, F64x4Div) \
52 V(F32x4Div, F32x8Div) \
53 V(I16x8AddSatS, I16x16AddSatS) \
54 V(I16x8SubSatS, I16x16SubSatS) \
55 V(I16x8AddSatU, I16x16AddSatU) \
56 V(I16x8SubSatU, I16x16SubSatU) \
57 V(I8x16AddSatS, I8x32AddSatS) \
58 V(I8x16SubSatS, I8x32SubSatS) \
59 V(I8x16AddSatU, I8x32AddSatU) \
60 V(I8x16SubSatU, I8x32SubSatU) \
61 V(F64x2Eq, F64x4Eq) \
62 V(F32x4Eq, F32x8Eq) \
63 V(I64x2Eq, I64x4Eq) \
64 V(I32x4Eq, I32x8Eq) \
65 V(I16x8Eq, I16x16Eq) \
66 V(I8x16Eq, I8x32Eq) \
67 V(F64x2Ne, F64x4Ne) \
68 V(F32x4Ne, F32x8Ne) \
69 V(I64x2GtS, I64x4GtS) \
70 V(I32x4GtS, I32x8GtS) \
71 V(I16x8GtS, I16x16GtS) \
72 V(I8x16GtS, I8x32GtS) \
73 V(F64x2Lt, F64x4Lt) \
74 V(F32x4Lt, F32x8Lt) \
75 V(F64x2Le, F64x4Le) \
76 V(F32x4Le, F32x8Le) \
77 V(I32x4MinS, I32x8MinS) \
78 V(I16x8MinS, I16x16MinS) \
79 V(I8x16MinS, I8x32MinS) \
80 V(I32x4MinU, I32x8MinU) \
81 V(I16x8MinU, I16x16MinU) \
82 V(I8x16MinU, I8x32MinU) \
83 V(I32x4MaxS, I32x8MaxS) \
84 V(I16x8MaxS, I16x16MaxS) \
85 V(I8x16MaxS, I8x32MaxS) \
86 V(I32x4MaxU, I32x8MaxU) \
87 V(I16x8MaxU, I16x16MaxU) \
88 V(I8x16MaxU, I8x32MaxU) \
89 V(F32x4Abs, F32x8Abs) \
90 V(I32x4Abs, I32x8Abs) \
91 V(I16x8Abs, I16x16Abs) \
92 V(I8x16Abs, I8x32Abs) \
93 V(F32x4Neg, F32x8Neg) \
94 V(I32x4Neg, I32x8Neg) \
95 V(I16x8Neg, I16x16Neg) \
96 V(I8x16Neg, I8x32Neg) \
97 V(F64x2Sqrt, F64x4Sqrt) \
98 V(F32x4Sqrt, F32x8Sqrt) \
99 V(F64x2Min, F64x4Min) \
100 V(F32x4Min, F32x8Min) \
101 V(F64x2Max, F64x4Max) \
102 V(F32x4Max, F32x8Max) \
103 V(I64x2Ne, I64x4Ne) \
104 V(I32x4Ne, I32x8Ne) \
105 V(I16x8Ne, I16x16Ne) \
106 V(I8x16Ne, I8x32Ne) \
107 V(I32x4GtU, I32x8GtU) \
108 V(I16x8GtU, I16x16GtU) \
109 V(I8x16GtU, I8x32GtU) \
110 V(I64x2GeS, I64x4GeS) \
111 V(I32x4GeS, I32x8GeS) \
112 V(I16x8GeS, I16x16GeS) \
113 V(I8x16GeS, I8x32GeS) \
114 V(I32x4GeU, I32x8GeU) \
115 V(I16x8GeU, I16x16GeU) \
116 V(I8x16GeU, I8x32GeU) \
117 V(F32x4Pmin, F32x8Pmin) \
118 V(F32x4Pmax, F32x8Pmax) \
119 V(F64x2Pmin, F64x4Pmin) \
120 V(F64x2Pmax, F64x4Pmax) \
121 V(F32x4SConvertI32x4, F32x8SConvertI32x8) \
122 V(F32x4UConvertI32x4, F32x8UConvertI32x8) \
123 V(I32x4UConvertF32x4, I32x8UConvertF32x8) \
124 V(I32x4SConvertF32x4, I32x8SConvertF32x8) \
125 V(S128And, S256And) \
126 V(S128Or, S256Or) \
127 V(S128Xor, S256Xor) \
128 V(S128Not, S256Not) \
129 V(S128Select, S256Select) \
130 V(S128AndNot, S256AndNot)
131
132#define SIMD_SHIFT_OP(V) \
133 V(I64x2Shl, I64x4Shl) \
134 V(I32x4Shl, I32x8Shl) \
135 V(I16x8Shl, I16x16Shl) \
136 V(I32x4ShrS, I32x8ShrS) \
137 V(I16x8ShrS, I16x16ShrS) \
138 V(I64x2ShrU, I64x4ShrU) \
139 V(I32x4ShrU, I32x8ShrU) \
140 V(I16x8ShrU, I16x16ShrU)
141
142#define SIMD_SIGN_EXTENSION_CONVERT_OP(V) \
143 V(I64x2SConvertI32x4Low, I64x2SConvertI32x4High, I64x4SConvertI32x4) \
144 V(I64x2UConvertI32x4Low, I64x2UConvertI32x4High, I64x4UConvertI32x4) \
145 V(I32x4SConvertI16x8Low, I32x4SConvertI16x8High, I32x8SConvertI16x8) \
146 V(I32x4UConvertI16x8Low, I32x4UConvertI16x8High, I32x8UConvertI16x8) \
147 V(I16x8SConvertI8x16Low, I16x8SConvertI8x16High, I16x16SConvertI8x16) \
148 V(I16x8UConvertI8x16Low, I16x8UConvertI8x16High, I16x16UConvertI8x16)
149
150#define SIMD_SPLAT_OP(V) \
151 V(I8x16Splat, I8x32Splat) \
152 V(I16x8Splat, I16x16Splat) \
153 V(I32x4Splat, I32x8Splat) \
154 V(I64x2Splat, I64x4Splat)
155
156// Currently, only Load/ProtectedLoad/LoadTransfrom are supported.
157// TODO(jiepan): add support for UnalignedLoad, LoadLane, LoadTrapOnNull
158bool IsSupportedLoad(const Node* node) {
159 if (node->opcode() == IrOpcode::kProtectedLoad ||
160 node->opcode() == IrOpcode::kLoad ||
161 node->opcode() == IrOpcode::kLoadTransform) {
162 return true;
163 }
164 return false;
165}
166
167#ifdef DEBUG
168bool IsSupportedLoad(const ZoneVector<Node*>& node_group) {
169 for (auto node : node_group) {
170 if (!IsSupportedLoad(node)) return false;
171 }
172 return true;
173}
174#endif
175
176int64_t GetConstantValue(const Node* node) {
177 int64_t value = -1;
178 if (node->opcode() == IrOpcode::kInt64Constant) {
179 value = OpParameter<int64_t>(node->op());
180 }
181 return value;
182}
183
184int64_t GetMemoryOffsetValue(const Node* node) {
185 DCHECK(IsSupportedLoad(node) || node->opcode() == IrOpcode::kStore ||
186 node->opcode() == IrOpcode::kProtectedStore);
187
188 Node* offset = node->InputAt(0);
189 if (offset->opcode() == IrOpcode::kLoadFromObject ||
190 offset->opcode() == IrOpcode::kLoad) {
191 return 0;
192 }
193
194 int64_t offset_value = -1;
195 if (offset->opcode() == IrOpcode::kInt64Add) {
196 if (NodeProperties::IsConstant(offset->InputAt(0))) {
197 offset_value = GetConstantValue(offset->InputAt(0));
198 } else if (NodeProperties::IsConstant(offset->InputAt(1))) {
199 offset_value = GetConstantValue(offset->InputAt(1));
200 }
201 }
202 return offset_value;
203}
204
205// We want to combine load/store nodes with continuous memory address,
206// for load/store node, input(0) is memory_start + offset, input(1) is index,
207// we currently use index as the address of the node, nodes with same index and
208// continuous offset can be combined together.
209Node* GetNodeAddress(const Node* node) {
210 Node* address = node->InputAt(1);
211 // The index is changed to Uint64 for memory32
212 if (address->opcode() == IrOpcode::kChangeUint32ToUint64) {
213 address = address->InputAt(0);
214 }
215 return address;
216}
217
218bool IsContinuousAccess(const ZoneVector<Node*>& node_group) {
219 DCHECK_GT(node_group.size(), 0);
220 int64_t previous_offset = GetMemoryOffsetValue(node_group[0]);
221 for (size_t i = 1; i < node_group.size(); ++i) {
222 int64_t current_offset = GetMemoryOffsetValue(node_group[i]);
223 int64_t diff = current_offset - previous_offset;
224 if (diff == 8 && node_group[0]->opcode() == IrOpcode::kLoadTransform) {
225 LoadTransformParameters params =
226 LoadTransformParametersOf(node_group[0]->op());
227 if (params.transformation < LoadTransformation::kFirst128Extend ||
228 params.transformation > LoadTransformation::kLast128Extend) {
229 TRACE("Non-continuous access!\n");
230 return false;
231 }
232 TRACE("Continuous access with load extend offset!\n");
233 } else if (diff != kSimd128Size) {
234 TRACE("Non-continuous access!\n");
235 return false;
236 }
237 previous_offset = current_offset;
238 }
239 return true;
240}
241
242// Returns true if all of the nodes in node_group are constants.
243bool AllConstant(const ZoneVector<Node*>& node_group) {
244 for (Node* node : node_group) {
245 if (!NodeProperties::IsConstant(node)) {
246 return false;
247 }
248 }
249 return true;
250}
251
252// Returns true if all the addresses of the nodes in node_group are identical.
253bool AllSameAddress(const ZoneVector<Node*>& nodes) {
254 Node* address = GetNodeAddress(nodes[0]);
255 for (size_t i = 1; i < nodes.size(); i++) {
256 if (GetNodeAddress(nodes[i]) != address) {
257 TRACE("Diff address #%d,#%d!\n", address->id(),
258 GetNodeAddress(nodes[i])->id());
259 return false;
260 }
261 }
262 return true;
263}
264
265// Returns true if all of the nodes in node_group are identical.
266// Splat opcode in WASM SIMD is used to create vector with identical lanes.
267template <typename T>
268bool IsSplat(const T& node_group) {
269 for (typename T::size_type i = 1; i < node_group.size(); ++i) {
270 if (node_group[i] != node_group[0]) {
271 return false;
272 }
273 }
274 return true;
275}
276
277// Some kinds of node (shuffle, s128const) will have different operator
278// instances even if they have the same properties, we can't simply compare the
279// operator's address. We should compare their opcode and properties.
280V8_INLINE static bool OperatorCanBePacked(const Operator* lhs,
281 const Operator* rhs) {
282 return lhs->opcode() == rhs->opcode() &&
283 lhs->properties() == rhs->properties();
284}
285
286// Returns true if all of the nodes in node_group have the same type.
287bool AllPackableOperator(const ZoneVector<Node*>& node_group) {
288 auto op = node_group[0]->op();
289 for (ZoneVector<Node*>::size_type i = 1; i < node_group.size(); i++) {
290 if (!OperatorCanBePacked(node_group[i]->op(), op)) {
291 return false;
292 }
293 }
294 return true;
295}
296
297bool ShiftBySameScalar(const ZoneVector<Node*>& node_group) {
298 auto node0 = node_group[0];
299 for (ZoneVector<Node*>::size_type i = 1; i < node_group.size(); i++) {
300 DCHECK_EQ(node_group[i]->op(), node0->op());
301 DCHECK_EQ(node0->InputCount(), 2);
302 if (node_group[i]->InputAt(1) != node0->InputAt(1)) {
303 return false;
304 }
305 }
306 return true;
307}
308
309bool IsSignExtensionOperation(IrOpcode::Value op) {
310#define CASE(op_low, op_high, not_used) \
311 case IrOpcode::k##op_low: \
312 case IrOpcode::k##op_high:
313 switch (op) {
315 return true;
316 default:
317 return false;
318 }
319#undef CASE
320 UNREACHABLE();
321}
322
323bool MaybePackSignExtensionOp(const ZoneVector<Node*>& node_group) {
324#define CHECK_SIGN_EXTENSION_CASE(op_low, op_high, not_used) \
325 case IrOpcode::k##op_low: { \
326 if (node_group[1]->opcode() == IrOpcode::k##op_high && \
327 node_group[0]->InputAt(0) == node_group[1]->InputAt(0)) { \
328 return true; \
329 } \
330 return false; \
331 }
332 switch (node_group[0]->opcode()) {
334 default: {
335 return false;
336 }
337 }
338#undef CHECK_SIGN_EXTENSION_CASE
339 UNREACHABLE();
340}
341
342class EffectChainIterator {
343 public:
344 explicit EffectChainIterator(Node* node) : node_(node), prev_(nullptr) {}
345
346 Node* Advance() {
347 prev_ = node_;
348 node_ = EffectInputOf(node_);
349 return node_;
350 }
351
352 Node* Prev() {
353 DCHECK_NE(prev_, nullptr);
354 return prev_;
355 }
356
357 Node* Next() { return EffectInputOf(node_); }
358
359 void Set(Node* node) {
360 node_ = node;
361 prev_ = nullptr;
362 }
363
364 Node* operator*() { return node_; }
365
366 private:
367 Node* EffectInputOf(Node* node) {
368 DCHECK(IsSupportedLoad(node));
369 return node->InputAt(2);
370 }
371
372 Node* node_;
373 Node* prev_;
374};
375
376void InsertAfter(EffectChainIterator& dest, EffectChainIterator& src) {
377 Node* dest_next = dest.Next();
378 NodeProperties::ReplaceEffectInput(src.Prev(), src.Next());
380 NodeProperties::ReplaceEffectInput(*src, dest_next);
381}
382
383} // anonymous namespace
384
385// Sort load/store node by offset
386bool MemoryOffsetComparer::operator()(const Node* lhs, const Node* rhs) const {
387 return GetMemoryOffsetValue(lhs) < GetMemoryOffsetValue(rhs);
388}
389
390void PackNode::Print() const {
391 if (revectorized_node_ != nullptr) {
392 TRACE("0x%p #%d:%s(%d %d, %s)\n", this, revectorized_node_->id(),
393 revectorized_node_->op()->mnemonic(), nodes_[0]->id(),
394 nodes_[1]->id(), nodes_[0]->op()->mnemonic());
395 } else {
396 TRACE("0x%p null(%d %d, %s)\n", this, nodes_[0]->id(), nodes_[1]->id(),
397 nodes_[0]->op()->mnemonic());
398 }
399}
400
402 DCHECK_EQ(node_group.size(), 2);
403 // Only Support simd128 operators or common operators with simd128
404 // MachineRepresentation. The MachineRepresentation of root had been checked,
405 // and the leaf node will be checked later. here we omit the check of
406 // MachineRepresentation, only check the opcode itself.
407 IrOpcode::Value op = node_group[0]->opcode();
408 if (!NodeProperties::IsSimd128Operation(node_group[0]) &&
409 (op != IrOpcode::kStore) && (op != IrOpcode::kProtectedStore) &&
410 (op != IrOpcode::kLoad) && (op != IrOpcode::kProtectedLoad) &&
411 (op != IrOpcode::kPhi) && (op != IrOpcode::kLoopExitValue) &&
412 (op != IrOpcode::kExtractF128)) {
413 return false;
414 }
415
416 // TODO(jiepan): add support for Constant
417 if (AllConstant(node_group)) {
418 TRACE("%s(#%d, #%d) are constantant, not supported yet!\n",
419 node_group[0]->op()->mnemonic(), node_group[0]->id(),
420 node_group[1]->id());
421 return false;
422 }
423 if (IsSignExtensionOperation(op)) {
424 if (MaybePackSignExtensionOp(node_group)) {
425 return true;
426 } else {
427 TRACE("%s(#%d, #%d) are not (low, high) sign extension pair\n",
428 node_group[0]->op()->mnemonic(), node_group[0]->id(),
429 node_group[1]->id());
430 return false;
431 }
432 }
433 if (!AllPackableOperator(node_group)) {
434 TRACE(
435 "%s(#%d, #%d) have different op, and are not sign extension operator\n",
436 node_group[0]->op()->mnemonic(), node_group[0]->id(),
437 node_group[1]->id());
438 return false;
439 }
440 return true;
441}
442
444 TRACE("PackNode %s(#%d:, #%d)\n", node_group[0]->op()->mnemonic(),
445 node_group[0]->id(), node_group[1]->id());
446 PackNode* pnode = zone_->New<PackNode>(zone_, node_group);
447 for (Node* node : node_group) {
448 node_to_packnode_[node] = pnode;
449 }
450 return pnode;
451}
452
454 int start_index, int count,
455 unsigned recursion_depth) {
456 PackNode* pnode = NewPackNode(node_group);
457 for (int i = start_index; i < start_index + count; ++i) {
458 ZoneVector<Node*> operands(zone_);
459 // Prepare the operand vector.
460 for (size_t j = 0; j < node_group.size(); j++) {
461 Node* node = node_group[j];
463 }
464
465 PackNode* child = BuildTreeRec(operands, recursion_depth + 1);
466 if (child) {
467 pnode->SetOperand(i, child);
468 } else {
469 return nullptr;
470 }
471 }
472 return pnode;
473}
474
476 auto I = node_to_packnode_.find(node);
477 if (I != node_to_packnode_.end()) {
478 return I->second;
479 }
480 return nullptr;
481}
482
483void SLPTree::PushStack(const ZoneVector<Node*>& node_group) {
484 TRACE("Stack Push (%d %s, %d %s)\n", node_group[0]->id(),
485 node_group[0]->op()->mnemonic(), node_group[1]->id(),
486 node_group[1]->op()->mnemonic());
487 for (auto node : node_group) {
488 on_stack_.insert(node);
489 }
490 stack_.push({node_group});
491}
492
494 const ZoneVector<Node*>& node_group = stack_.top();
495 DCHECK_EQ(node_group.size(), 2);
496 TRACE("Stack Pop (%d %s, %d %s)\n", node_group[0]->id(),
497 node_group[0]->op()->mnemonic(), node_group[1]->id(),
498 node_group[1]->op()->mnemonic());
499 for (auto node : node_group) {
500 on_stack_.erase(node);
501 }
502 stack_.pop();
503}
504
506 return on_stack_.find(node) != on_stack_.end();
507}
508
509bool SLPTree::AllOnStack(const ZoneVector<Node*>& node_group) {
510 for (auto node : node_group) {
511 if (OnStack(node)) return true;
512 }
513 return false;
514}
515
517 const ZoneVector<Node*>& node_group = stack_.top();
518 DCHECK_EQ(node_group.size(), 2);
519 return NodeProperties::IsPhi(node_group[0]);
520}
521
526
527// Try to connect the nodes in |loads| by effect edges. This allows us to build
528// |PackNode| without breaking effect dependency:
529// Before: [Load1]->...->[Load2]->...->[Load3]->...->[Load4]
530// After: [Load1]->[Load2]->[Load3]->[Load4]
532 ZoneSet<Node*> visited(zone());
533 for (Node* load : loads) {
534 if (visited.find(load) != visited.end()) continue;
535 visited.insert(load);
536
537 EffectChainIterator dest(load);
538 EffectChainIterator it(dest.Next());
539 while (SameBasicBlock(*it, load) && IsSupportedLoad(*it)) {
540 if (std::find(loads.begin(), loads.end(), *it) != loads.end()) {
541 visited.insert(*it);
542 if (dest.Next() != *it) {
543 Node* prev = it.Prev();
544 InsertAfter(dest, it);
545 it.Set(prev);
546 }
547 dest.Advance();
548 }
549 it.Advance();
550 }
551 }
552}
553
555 DCHECK(IsSupportedLoad(node_group));
556 DCHECK_EQ(node_group.size(), 2);
557 TRACE("Enter IsSideEffectFreeLoad (%d %s, %d %s)\n", node_group[0]->id(),
558 node_group[0]->op()->mnemonic(), node_group[1]->id(),
559 node_group[1]->op()->mnemonic());
560
561 TryReduceLoadChain(node_group);
562 // We only allows Loads that are connected by effect edges.
563 if (node_group[0] != node_group[1] &&
564 NodeProperties::GetEffectInput(node_group[0]) != node_group[1] &&
565 NodeProperties::GetEffectInput(node_group[1]) != node_group[0])
566 return false;
567
568 std::stack<Node*> to_visit;
569 std::unordered_set<Node*> visited;
570 // Visit all the inputs (except for control inputs) of Loads.
571 for (size_t i = 0, e = node_group.size(); i < e; i++) {
572 Node* load = node_group[i];
573 for (int j = 0; j < NodeProperties::FirstControlIndex(load); ++j) {
574 Node* input = load->InputAt(j);
575 if (std::find(node_group.begin(), node_group.end(), input) ==
576 node_group.end()) {
577 to_visit.push(input);
578 }
579 }
580 }
581
582 // Check the inputs of Loads and find if they are connected to existing nodes
583 // in SLPTree. If there is, then there will be side effect and we can not
584 // merge such Loads.
585 while (!to_visit.empty()) {
586 Node* input = to_visit.top();
587 to_visit.pop();
588 TRACE("IsSideEffectFreeLoad visit (%d %s)\n", input->id(),
589 input->op()->mnemonic());
590 if (visited.find(input) == visited.end()) {
591 visited.insert(input);
592
593 if (OnStack(input)) {
594 TRACE("Has internal dependency because (%d %s) on stack\n", input->id(),
595 input->op()->mnemonic());
596 return false;
597 }
598
599 // If the input is not in same basic block as Loads, it must not be in
600 // SLPTree. Otherwise recursively visit all input's edges and find if they
601 // are connected to SLPTree.
602 if (SameBasicBlock(input, node_group[0])) {
603 for (int i = 0; i < NodeProperties::FirstControlIndex(input); ++i) {
604 to_visit.push(input->InputAt(i));
605 }
606 }
607 }
608 }
609 return true;
610}
611
613 TRACE("Enter %s\n", __func__);
614
615 DeleteTree();
616
617 root_ = BuildTreeRec(roots, 0);
618 return root_;
619}
620
622 unsigned recursion_depth) {
623 TRACE("Enter %s\n", __func__);
624 DCHECK_EQ(node_group.size(), 2);
625
626 Node* node0 = node_group[0];
627 Node* node1 = node_group[1];
628
630 TRACE("Failed due to max recursion depth!\n");
631 return nullptr;
632 }
633
634 if (AllOnStack(node_group)) {
635 if (!StackTopIsPhi()) {
636 TRACE("Failed due to (%d %s, %d %s) on stack!\n", node0->id(),
637 node0->op()->mnemonic(), node1->id(), node1->op()->mnemonic());
638 return nullptr;
639 }
640 }
641 PushStack(node_group);
642
643 if (!CanBePacked(node_group)) {
644 return nullptr;
645 }
646
647 DCHECK(AllConstant(node_group) || AllPackableOperator(node_group) ||
648 MaybePackSignExtensionOp(node_group));
649
650 // Check if this is a duplicate of another entry.
651 for (Node* node : node_group) {
652 if (PackNode* p = GetPackNode(node)) {
653 if (!p->IsSame(node_group)) {
654 // TODO(jiepan): Gathering due to partial overlap
655 TRACE("Failed due to partial overlap at #%d,%s!\n", node->id(),
656 node->op()->mnemonic());
657 return nullptr;
658 }
659
660 PopStack();
661 TRACE("Perfect diamond merge at #%d,%s\n", node->id(),
662 node->op()->mnemonic());
663 return p;
664 }
665 }
666
667 if (node0->opcode() == IrOpcode::kS128Zero) {
668 PackNode* p = NewPackNode(node_group);
669 PopStack();
670 return p;
671 }
672 if (node0->opcode() == IrOpcode::kS128Const) {
673 PackNode* p = NewPackNode(node_group);
674 PopStack();
675 return p;
676 }
677 if (node0->opcode() == IrOpcode::kExtractF128) {
678 Node* source = node0->InputAt(0);
679 TRACE("Extract leaf node from #%d,%s!\n", source->id(),
680 source->op()->mnemonic());
681 // For 256 only, check whether they are from the same source
682 if (node0->InputAt(0) == node1->InputAt(0) &&
683 (node0->InputAt(0)->opcode() == IrOpcode::kLoadTransform
684 ? node0 == node1
685 : OpParameter<int32_t>(node0->op()) + 1 ==
686 OpParameter<int32_t>(node1->op()))) {
687 TRACE("Added a pair of Extract.\n");
688 PackNode* pnode = NewPackNode(node_group);
689 PopStack();
690 return pnode;
691 }
692 TRACE("Failed due to ExtractF128!\n");
693 return nullptr;
694 }
695
696 if (IsSupportedLoad(node0)) {
697 TRACE("Load leaf node\n");
698 if (!AllSameAddress(node_group)) {
699 TRACE("Failed due to different load addr!\n");
700 PopStack();
701 return nullptr;
702 }
703
704 if (!IsSplat(node_group)) {
705 if (node0->opcode() == IrOpcode::kProtectedLoad &&
708 PopStack();
709 return nullptr;
710 }
711
712 if (!IsSideEffectFreeLoad(node_group)) {
713 TRACE("Failed due to dependency check\n");
714 PopStack();
715 return nullptr;
716 }
717
718 // Sort loads by offset
719 ZoneVector<Node*> sorted_node_group(node_group.size(), zone_);
720 std::partial_sort_copy(node_group.begin(), node_group.end(),
721 sorted_node_group.begin(), sorted_node_group.end(),
723 if (!IsContinuousAccess(sorted_node_group)) {
724 TRACE("Failed due to non-continuous load!\n");
725 PopStack();
726 return nullptr;
727 }
728 } else if (node0->opcode() == IrOpcode::kLoadTransform) {
730 if (params.transformation > LoadTransformation::kLast128Splat) {
731 TRACE("LoadTransform failed due to unsupported type #%d!\n",
732 node0->id());
733 PopStack();
734 return nullptr;
735 }
736 DCHECK_GE(params.transformation, LoadTransformation::kFirst128Splat);
737 } else {
738 TRACE("Failed due to unsupported splat!\n");
739 PopStack();
740 return nullptr;
741 }
742
743 PackNode* p = NewPackNode(node_group);
744 PopStack();
745 return p;
746 }
747
748 int value_in_count = node0->op()->ValueInputCount();
749
750#define CASE(op128, op256) case IrOpcode::k##op128:
751#define SIGN_EXTENSION_CASE(op_low, not_used1, not_used2) \
752 case IrOpcode::k##op_low:
753 switch (node0->opcode()) {
754 case IrOpcode::kPhi: {
755 TRACE("Added a vector of PHI nodes.\n");
758 return nullptr;
759 }
760 PackNode* pnode =
761 NewPackNodeAndRecurs(node_group, 0, value_in_count, recursion_depth);
762 PopStack();
763 return pnode;
764 }
765 case IrOpcode::kLoopExitValue: {
768 return nullptr;
769 }
770 PackNode* pnode =
771 NewPackNodeAndRecurs(node_group, 0, value_in_count, recursion_depth);
772 PopStack();
773 return pnode;
774 }
775 case IrOpcode::kI8x16Shuffle: {
776 // Try match 32x8Splat or 64x4Splat.
777 if (IsSplat(node_group)) {
778 const uint8_t* shuffle = S128ImmediateParameterOf(node0->op()).data();
779 int index;
780 if ((wasm::SimdShuffle::TryMatchSplat<4>(shuffle, &index) &&
781 node0->InputAt(index >> 2)->opcode() ==
782 IrOpcode::kProtectedLoad) ||
783 (wasm::SimdShuffle::TryMatchSplat<2>(shuffle, &index) &&
784 node0->InputAt(index >> 1)->opcode() ==
785 IrOpcode::kProtectedLoad)) {
786 PopStack();
787 return NewPackNode(node_group);
788 }
789 TRACE("Failed to match splat\n");
790 PopStack();
791 return nullptr;
792 } else {
793 PopStack();
794 return NewPackNodeAndRecurs(node_group, 0, value_in_count,
796 }
797 }
798 // clang-format off
800 TRACE("Added a vector of %s.\n", node0->op()->mnemonic());
801 PackNode* pnode = NewPackNodeAndRecurs(node_group, 0, value_in_count,
803 PopStack();
804 return pnode;
805 }
807 if (ShiftBySameScalar(node_group)) {
808 TRACE("Added a vector of %s.\n", node0->op()->mnemonic());
809 PackNode* pnode =
810 NewPackNodeAndRecurs(node_group, 0, 1, recursion_depth);
811 PopStack();
812 return pnode;
813 }
814 TRACE("Failed due to shift with different scalar!\n");
815 return nullptr;
816 }
818 TRACE("add a vector of sign extension op and stop building tree\n");
819 PackNode* pnode = NewPackNode(node_group);
820 PopStack();
821 return pnode;
822 }
824 TRACE("Added a vector of %s.\n", node0->op()->mnemonic());
825 if (node0->InputAt(0) != node1->InputAt(0)) {
826 TRACE("Failed due to different splat input");
827 return nullptr;
828 }
829 PackNode* pnode = NewPackNode(node_group);
830 PopStack();
831 return pnode;
832 }
833 // clang-format on
834
835 // TODO(jiepan): UnalignedStore, StoreTrapOnNull.
836 case IrOpcode::kStore:
837 case IrOpcode::kProtectedStore: {
838 TRACE("Added a vector of stores.\n");
839 if (!AllSameAddress(node_group)) {
840 TRACE("Failed due to different store addr!\n");
841 return nullptr;
842 }
843 PackNode* pnode = NewPackNodeAndRecurs(node_group, 2, 1, recursion_depth);
844 PopStack();
845 return pnode;
846 }
847 default:
848 TRACE("Default branch #%d:%s\n", node0->id(), node0->op()->mnemonic());
849 break;
850 }
851#undef CASE
852#undef SIGN_EXTENSION_CASE
853 return nullptr;
854}
855
857 ClearStack();
858 node_to_packnode_.clear();
859}
860
861void SLPTree::Print(const char* info) {
862 TRACE("%s, Packed node:\n", info);
863 if (!v8_flags.trace_wasm_revectorize) {
864 return;
865 }
866
867 ForEach([](PackNode const* pnode) { pnode->Print(); });
868}
869
870template <typename FunctionType>
872 std::unordered_set<PackNode const*> visited;
873
874 for (auto& entry : node_to_packnode_) {
875 PackNode const* pnode = entry.second;
876 if (!pnode || visited.find(pnode) != visited.end()) {
877 continue;
878 }
879 visited.insert(pnode);
880
881 callback(pnode);
882 }
883}
884
886
889 : zone_(zone),
890 graph_(graph),
891 mcgraph_(mcgraph),
892 group_of_stores_(zone),
893 source_positions_(source_positions),
894 support_simd256_(false) {
897 Isolate* isolate = Isolate::TryGetCurrent();
898 node_observer_for_test_ = isolate ? isolate->node_observer() : nullptr;
899}
900
902 TRACE("Enter %s\n", __func__);
903
904 int save = 0, cost = 0;
905 slp_tree_->ForEach([&](PackNode const* pnode) {
906 const ZoneVector<Node*>& nodes = pnode->Nodes();
907 IrOpcode::Value op = nodes[0]->opcode();
908
909 // Skip LoopExit as auxiliary nodes are not issued in generated code.
910 // Skip Extract128 as we will reuse its revectorized input and no additional
911 // extract nodes will be generated.
912 if (op == IrOpcode::kLoopExitValue || op == IrOpcode::kExtractF128) {
913 return;
914 }
915 // Splat nodes will not cause a saving as it simply extends itself.
916 if (!IsSplat(nodes)) {
917 save++;
918 }
919
920 for (size_t i = 0; i < nodes.size(); i++) {
921 if (i > 0 && nodes[i] == nodes[0]) continue;
922
923 for (auto edge : nodes[i]->use_edges()) {
924 if (!NodeProperties::IsValueEdge(edge)) continue;
925 Node* useNode = edge.from();
926 if (!GetPackNode(useNode) && !(useNode->uses().empty()) &&
927 useNode->opcode() != IrOpcode::kLoopExitValue) {
928 TRACE("External use edge: (%d:%s) -> (%d:%s)\n", useNode->id(),
929 useNode->op()->mnemonic(), nodes[i]->id(),
930 nodes[i]->op()->mnemonic());
931 cost++;
932
933 // We only need one Extract node and all other uses can share.
934 break;
935 }
936 }
937 }
938 });
939
940 TRACE("Save: %d, cost: %d\n", save, cost);
941 return save > cost;
942}
943
944void Revectorizer::SetEffectInput(PackNode* pnode, int index, Node*& input) {
945 const ZoneVector<Node*>& nodes = pnode->Nodes();
946
947 // We assumed there's no effect edge to the 3rd node inbetween.
948 DCHECK(nodes[0] == nodes[1] ||
949 NodeProperties::GetEffectInput(nodes[0]) == nodes[1] ||
950 NodeProperties::GetEffectInput(nodes[1]) == nodes[0]);
951
952 // Scanning till find the other effect outside pnode.
953 for (size_t i = 0; i < nodes.size(); i++) {
954 Node* node128 = nodes[i];
955 PackNode* effect = GetPackNode(node128->InputAt(index));
956 if (effect == pnode) continue;
957 if (effect)
958 pnode->SetOperand(index, effect);
959 else
960 input = node128->InputAt(index);
961 break;
962 }
963}
964
966 PackNode* pnode, int effect_index) {
967 Node* node = pnode->Nodes()[0];
968 // Keep the addressing inputs
969 inputs[0] = node->InputAt(0);
970 inputs[1] = node->InputAt(1);
971 // Set the effect input and the value input will be set later
972 SetEffectInput(pnode, effect_index, inputs[effect_index]);
973 // Set the control input
974 inputs[effect_index + 1] = node->InputAt(effect_index + 1);
975}
976
978 TRACE("Enter %s with PackNode\n", __func__);
979
980 Node* node0 = pnode->Nodes()[0];
981 Node* node1 = pnode->Nodes()[1];
982 if (pnode->RevectorizedNode()) {
983 TRACE("Diamond merged for #%d:%s\n", node0->id(), node0->op()->mnemonic());
984 return pnode->RevectorizedNode();
985 }
986
987 int input_count = node0->InputCount();
988 TRACE("Vectorize #%d:%s, input count: %d\n", node0->id(),
989 node0->op()->mnemonic(), input_count);
990
991 IrOpcode::Value op = node0->opcode();
992 const Operator* new_op = nullptr;
993 Node* source = nullptr;
994 Node* dead = mcgraph()->Dead();
995 base::SmallVector<Node*, 2> inputs(input_count);
996 for (int i = 0; i < input_count; i++) inputs[i] = dead;
997
998 switch (op) {
999 case IrOpcode::kPhi: {
1003 input_count - 1);
1004 inputs[input_count - 1] = NodeProperties::GetControlInput(node0);
1005 break;
1006 }
1007 case IrOpcode::kLoopExitValue: {
1010 new_op =
1012 inputs[input_count - 1] = NodeProperties::GetControlInput(node0);
1013 break;
1014 }
1015#define SIMPLE_CASE(from, to) \
1016 case IrOpcode::k##from: \
1017 new_op = mcgraph_->machine()->to(); \
1018 break;
1020#undef SIMPLE_CASE
1021#undef SIMPLE_SIMD_OP
1022
1023#define SHIFT_CASE(from, to) \
1024 case IrOpcode::k##from: { \
1025 DCHECK(ShiftBySameScalar(pnode->Nodes())); \
1026 new_op = mcgraph_->machine()->to(); \
1027 inputs[1] = node0->InputAt(1); \
1028 break; \
1029 }
1031#undef SHIFT_CASE
1032#undef SIMD_SHIFT_OP
1033
1034#define SIGN_EXTENSION_CONVERT_CASE(from, not_used, to) \
1035 case IrOpcode::k##from: { \
1036 DCHECK_EQ(node0->InputAt(0), pnode->Nodes()[1]->InputAt(0)); \
1037 new_op = mcgraph_->machine()->to(); \
1038 inputs[0] = node0->InputAt(0); \
1039 break; \
1040 }
1042#undef SIGN_EXTENSION_CONVERT_CASE
1043#undef SIMD_SIGN_EXTENSION_CONVERT_OP
1044
1045#define SPLAT_CASE(from, to) \
1046 case IrOpcode::k##from: \
1047 new_op = mcgraph_->machine()->to(); \
1048 inputs[0] = node0->InputAt(0); \
1049 break;
1051#undef SPLAT_CASE
1052#undef SIMD_SPLAT_OP
1053 case IrOpcode::kI8x16Shuffle: {
1054 // clang-format off
1055 if (IsSplat(pnode->Nodes())) {
1056 const uint8_t* shuffle = S128ImmediateParameterOf(node0->op()).data();
1057 int index, offset;
1058
1059 // Match Splat and Revectorize to LoadSplat as AVX-256 does not support
1060 // shuffling across 128-bit lane.
1061 if (wasm::SimdShuffle::TryMatchSplat<4>(shuffle, &index)) {
1062 new_op = mcgraph_->machine()->LoadTransform(
1065 offset = index * 4;
1066 } else if (wasm::SimdShuffle::TryMatchSplat<2>(shuffle, &index)) {
1067 new_op = mcgraph_->machine()->LoadTransform(
1070 offset = index * 8;
1071 } else {
1072 UNREACHABLE();
1073 }
1074
1075 source = node0->InputAt(offset >> 4);
1076 DCHECK_EQ(source->opcode(), IrOpcode::kProtectedLoad);
1077 inputs.resize(4);
1078 // Update LoadSplat offset.
1079 if (index) {
1081 inputs[0] = graph()->NewNode(mcgraph_->machine()->Int64Add(),
1082 source->InputAt(0),
1084 } else {
1085 inputs[0] = source->InputAt(0);
1086 }
1087 // Keep source index, effect and control inputs.
1088 inputs[1] = source->InputAt(1);
1089 inputs[2] = source->InputAt(2);
1090 inputs[3] = source->InputAt(3);
1091 input_count = 4;
1092 } else {
1093 const uint8_t* shuffle0 = S128ImmediateParameterOf(node0->op()).data();
1094 const uint8_t* shuffle1 = S128ImmediateParameterOf(node1->op()).data();
1095 uint8_t new_shuffle[32];
1096
1097 if (node0->InputAt(0) == node0->InputAt(1) &&
1098 node1->InputAt(0) == node1->InputAt(1)) {
1099 // Shuffle is Swizzle
1100 for (int i = 0; i < 16; ++i) {
1101 new_shuffle[i] = shuffle0[i] % 16;
1102 new_shuffle[i + 16] = 16 + shuffle1[i] % 16;
1103 }
1104 } else {
1105 for (int i = 0; i < 16; ++i) {
1106 if (shuffle0[i] < 16) {
1107 new_shuffle[i] = shuffle0[i];
1108 } else {
1109 new_shuffle[i] = 16 + shuffle0[i];
1110 }
1111
1112 if (shuffle1[i] < 16) {
1113 new_shuffle[i + 16] = 16 + shuffle1[i];
1114 } else {
1115 new_shuffle[i + 16] = 32 + shuffle1[i];
1116 }
1117 }
1118 }
1119 new_op = mcgraph_->machine()->I8x32Shuffle(new_shuffle);
1120 }
1121 break;
1122 // clang-format on
1123 }
1124 case IrOpcode::kS128Zero: {
1125 new_op = mcgraph_->machine()->S256Zero();
1126 break;
1127 }
1128 case IrOpcode::kS128Const: {
1129 uint8_t value[32];
1130 const uint8_t* value0 = S128ImmediateParameterOf(node0->op()).data();
1131 const uint8_t* value1 = S128ImmediateParameterOf(node1->op()).data();
1132 for (int i = 0; i < kSimd128Size; ++i) {
1133 value[i] = value0[i];
1134 value[i + 16] = value1[i];
1135 }
1136 new_op = mcgraph_->machine()->S256Const(value);
1137 break;
1138 }
1139 case IrOpcode::kProtectedLoad: {
1143 SetMemoryOpInputs(inputs, pnode, 2);
1144 break;
1145 }
1146 case IrOpcode::kLoad: {
1149 new_op = mcgraph_->machine()->Load(MachineType::Simd256());
1150 SetMemoryOpInputs(inputs, pnode, 2);
1151 break;
1152 }
1153 case IrOpcode::kProtectedStore: {
1156 new_op =
1158 SetMemoryOpInputs(inputs, pnode, 3);
1159 break;
1160 }
1161 case IrOpcode::kStore: {
1164 WriteBarrierKind write_barrier_kind =
1167 MachineRepresentation::kSimd256, write_barrier_kind));
1168 SetMemoryOpInputs(inputs, pnode, 3);
1169 break;
1170 }
1171 case IrOpcode::kLoadTransform: {
1173 LoadTransformation new_transformation;
1174
1175 // clang-format off
1176 switch (params.transformation) {
1178 new_transformation = LoadTransformation::kS256Load8Splat;
1179 break;
1181 new_transformation = LoadTransformation::kS256Load16Splat;
1182 break;
1184 new_transformation = LoadTransformation::kS256Load32Splat;
1185 break;
1187 new_transformation = LoadTransformation::kS256Load64Splat;
1188 break;
1190 new_transformation = LoadTransformation::kS256Load8x16S;
1191 break;
1193 new_transformation = LoadTransformation::kS256Load8x16U;
1194 break;
1196 new_transformation = LoadTransformation::kS256Load16x8S;
1197 break;
1199 new_transformation = LoadTransformation::kS256Load16x8U;
1200 break;
1202 new_transformation = LoadTransformation::kS256Load32x4S;
1203 break;
1205 new_transformation = LoadTransformation::kS256Load32x4U;
1206 break;
1207 default:
1208 UNREACHABLE();
1209 }
1210 // clang-format on
1211
1212 new_op =
1213 mcgraph_->machine()->LoadTransform(params.kind, new_transformation);
1214 SetMemoryOpInputs(inputs, pnode, 2);
1215 break;
1216 }
1217 case IrOpcode::kExtractF128: {
1218 pnode->SetRevectorizedNode(node0->InputAt(0));
1219 // The extract uses other than its parent don't need to change.
1220 break;
1221 }
1222 default:
1223 UNREACHABLE();
1224 }
1225
1226 DCHECK(pnode->RevectorizedNode() || new_op);
1227 if (new_op != nullptr) {
1229 Node* new_node =
1230 graph()->NewNode(new_op, input_count, inputs.begin(), true);
1231 pnode->SetRevectorizedNode(new_node);
1232 for (int i = 0; i < input_count; i++) {
1233 if (inputs[i] == dead) {
1234 new_node->ReplaceInput(i, VectorizeTree(pnode->GetOperand(i)));
1235 }
1236 }
1237 // Extract Uses
1238 const ZoneVector<Node*>& nodes = pnode->Nodes();
1239 for (size_t i = 0; i < nodes.size(); i++) {
1240 if (i > 0 && nodes[i] == nodes[i - 1]) continue;
1241 Node* input_128 = nullptr;
1242 for (auto edge : nodes[i]->use_edges()) {
1243 Node* useNode = edge.from();
1244 if (!GetPackNode(useNode)) {
1245 if (NodeProperties::IsValueEdge(edge)) {
1246 // Extract use
1247 TRACE("Replace Value Edge from %d:%s, to %d:%s\n", useNode->id(),
1248 useNode->op()->mnemonic(), edge.to()->id(),
1249 edge.to()->op()->mnemonic());
1250
1251 if (!input_128) {
1252 TRACE("Create ExtractF128(%lu) node from #%d\n", i,
1253 new_node->id());
1254 input_128 = graph()->NewNode(
1255 mcgraph()->machine()->ExtractF128(static_cast<int32_t>(i)),
1256 new_node);
1257 }
1258 edge.UpdateTo(input_128);
1259 } else if (NodeProperties::IsEffectEdge(edge)) {
1260 TRACE("Replace Effect Edge from %d:%s, to %d:%s\n", useNode->id(),
1261 useNode->op()->mnemonic(), edge.to()->id(),
1262 edge.to()->op()->mnemonic());
1263
1264 edge.UpdateTo(new_node);
1265 }
1266 }
1267 }
1268 if (nodes[i]->uses().empty()) nodes[i]->Kill();
1269 }
1270
1271 // Update effect use of NewNode from the dependent source.
1272 if (op == IrOpcode::kI8x16Shuffle && IsSplat(nodes)) {
1273 DCHECK(source);
1274 NodeProperties::ReplaceEffectInput(source, new_node, 0);
1275 TRACE("Replace Effect Edge from %d:%s, to %d:%s\n", source->id(),
1276 source->op()->mnemonic(), new_node->id(),
1277 new_node->op()->mnemonic());
1278 // Remove unused value use, so that we can safely elimite the node later.
1279 NodeProperties::ReplaceValueInput(node0, dead, 0);
1280 NodeProperties::ReplaceValueInput(node0, dead, 1);
1281 TRACE("Remove Value Input of %d:%s\n", node0->id(),
1282 node0->op()->mnemonic());
1283
1284 // We will try cleanup source nodes later
1285 sources_.insert(source);
1286 }
1287 }
1288
1289 return pnode->RevectorizedNode();
1290}
1291
1293 base::CPU cpu;
1294 if (v8_flags.enable_avx && v8_flags.enable_avx2 && cpu.has_avx2()) {
1295 support_simd256_ = true;
1296 }
1297}
1298
1299bool Revectorizer::TryRevectorize(const char* function) {
1301
1302 bool success = false;
1304 TRACE("TryRevectorize %s\n", function);
1305 CollectSeeds();
1306 for (auto entry : group_of_stores_) {
1307 ZoneMap<Node*, StoreNodeSet>* store_chains = entry.second;
1308 if (store_chains != nullptr) {
1309 PrintStores(store_chains);
1310 if (ReduceStoreChains(store_chains)) {
1311 TRACE("Successful revectorize %s\n", function);
1312 success = true;
1313 }
1314 }
1315 }
1316 TRACE("Finish revectorize %s\n", function);
1317 }
1319 return success;
1320}
1321
1323 for (auto* src : sources_) {
1324 std::vector<Node*> effect_uses;
1325 bool hasExternalValueUse = false;
1326 for (auto edge : src->use_edges()) {
1327 Node* use = edge.from();
1328 if (!GetPackNode(use)) {
1329 if (NodeProperties::IsValueEdge(edge)) {
1330 TRACE("Source node has external value dependence %d:%s\n",
1331 edge.from()->id(), edge.from()->op()->mnemonic());
1332 hasExternalValueUse = true;
1333 break;
1334 } else if (NodeProperties::IsEffectEdge(edge)) {
1335 effect_uses.push_back(use);
1336 }
1337 }
1338 }
1339
1340 if (!hasExternalValueUse) {
1341 // Remove unused source and linearize effect chain.
1342 Node* effect = NodeProperties::GetEffectInput(src);
1343 for (auto use : effect_uses) {
1344 TRACE("Replace Effect Edge for source node from %d:%s, to %d:%s\n",
1345 use->id(), use->op()->mnemonic(), effect->id(),
1346 effect->op()->mnemonic());
1347 NodeProperties::ReplaceEffectInput(use, effect, 0);
1348 }
1349 }
1350 }
1351
1352 sources_.clear();
1353}
1354
1356 for (auto it = graph_->GetSimdStoreNodes().begin();
1357 it != graph_->GetSimdStoreNodes().end(); ++it) {
1358 Node* node = *it;
1359 Node* dominator = slp_tree_->GetEarlySchedulePosition(node);
1360
1361 if ((GetMemoryOffsetValue(node) % kSimd128Size) != 0) {
1362 continue;
1363 }
1364 Node* address = GetNodeAddress(node);
1365 ZoneMap<Node*, StoreNodeSet>* store_nodes;
1366 auto first_level_iter = group_of_stores_.find(dominator);
1367 if (first_level_iter == group_of_stores_.end()) {
1369 group_of_stores_[dominator] = store_nodes;
1370 } else {
1371 store_nodes = first_level_iter->second;
1372 }
1373 auto second_level_iter = store_nodes->find(address);
1374 if (second_level_iter == store_nodes->end()) {
1375 second_level_iter =
1376 store_nodes->insert({address, StoreNodeSet(zone())}).first;
1377 }
1378 second_level_iter->second.insert(node);
1379 }
1380}
1381
1383 ZoneMap<Node*, StoreNodeSet>* store_chains) {
1384 TRACE("Enter %s\n", __func__);
1385 bool changed = false;
1386 for (auto chain_iter = store_chains->cbegin();
1387 chain_iter != store_chains->cend(); ++chain_iter) {
1388 if (chain_iter->second.size() >= 2 && chain_iter->second.size() % 2 == 0) {
1389 ZoneVector<Node*> store_chain(chain_iter->second.begin(),
1390 chain_iter->second.end(), zone_);
1391 for (auto it = store_chain.begin(); it < store_chain.end(); it = it + 2) {
1392 ZoneVector<Node*> stores_unit(it, it + 2, zone_);
1393 if ((NodeProperties::GetEffectInput(stores_unit[0]) == stores_unit[1] ||
1394 NodeProperties::GetEffectInput(stores_unit[1]) ==
1395 stores_unit[0]) &&
1396 ReduceStoreChain(stores_unit)) {
1397 changed = true;
1398 }
1399 }
1400 }
1401 }
1402
1403 return changed;
1404}
1405
1407 TRACE("Enter %s, root@ (#%d,#%d)\n", __func__, Stores[0]->id(),
1408 Stores[1]->id());
1409 if (!IsContinuousAccess(Stores)) {
1410 return false;
1411 }
1412
1413 PackNode* root = slp_tree_->BuildTree(Stores);
1414 if (!root) {
1415 TRACE("Build tree failed!\n");
1416 return false;
1417 }
1418
1419 slp_tree_->Print("After build tree");
1420
1421 if (DecideVectorize()) {
1422 VectorizeTree(root);
1423 UpdateSources();
1424 slp_tree_->Print("After vectorize tree");
1425
1427 slp_tree_->ForEach([&](const PackNode* pnode) {
1428 Node* node = pnode->RevectorizedNode();
1429 if (node) {
1431 }
1432 });
1433 }
1434 }
1435
1436 TRACE("\n");
1437 return true;
1438}
1439
1441 if (!v8_flags.trace_wasm_revectorize) {
1442 return;
1443 }
1444 TRACE("Enter %s\n", __func__);
1445 for (auto it = store_chains->cbegin(); it != store_chains->cend(); ++it) {
1446 if (it->second.size() > 0) {
1447 TRACE("address = #%d:%s \n", it->first->id(),
1448 it->first->op()->mnemonic());
1449
1450 for (auto node : it->second) {
1451 TRACE("#%d:%s, ", node->id(), node->op()->mnemonic());
1452 }
1453
1454 TRACE("\n");
1455 }
1456 }
1457}
1458
1459} // namespace compiler
1460} // namespace internal
1461} // namespace v8
static Isolate * TryGetCurrent()
Definition api.cc:9954
void resize(size_t new_size)
static constexpr MachineType Simd256()
constexpr MachineRepresentation representation() const
void push_back(const T &value)
T * New(Args &&... args)
Definition zone.h:114
const Operator * Phi(MachineRepresentation representation, int value_input_count)
const Operator * LoopExitValue(MachineRepresentation rep)
CommonOperatorBuilder * common() const
MachineOperatorBuilder * machine() const
const Operator * ProtectedLoad(LoadRepresentation rep)
const Operator * Load(LoadRepresentation rep)
const Operator * ProtectedStore(MachineRepresentation rep)
const Operator * Store(StoreRepresentation rep)
virtual Observation OnNodeCreated(const Node *node)
static void ReplaceEffectInput(Node *node, Node *effect, int index=0)
static Node * GetEffectInput(Node *node, int index=0)
static Node * GetValueInput(Node *node, int index)
static void ReplaceValueInput(Node *node, Node *value, int index)
static Node * GetControlInput(Node *node, int index=0)
constexpr IrOpcode::Value opcode() const
Definition node.h:52
const Operator * op() const
Definition node.h:50
int InputCount() const
Definition node.h:59
void ReplaceInput(int index, Node *new_to)
Definition node.h:76
Node * InputAt(int index) const
Definition node.h:70
NodeId id() const
Definition node.h:57
PackNode * GetOperand(size_t index)
void SetRevectorizedNode(Node *node)
void SetOperand(size_t index, PackNode *pnode)
ZoneVector< Node * > nodes_
const ZoneVector< Node * > & Nodes() const
PackNode * GetPackNode(Node *node) const
void PrintStores(ZoneMap< Node *, StoreNodeSet > *store_chains)
Revectorizer(Zone *zone, TFGraph *graph, MachineGraph *mcgraph, SourcePositionTable *source_positions)
std::unordered_set< Node * > sources_
SourcePositionTable * source_positions_
bool ReduceStoreChains(ZoneMap< Node *, StoreNodeSet > *store_chains)
ZoneMap< Node *, ZoneMap< Node *, StoreNodeSet > * > group_of_stores_
void SetEffectInput(PackNode *pnode, int index, Node *&nput)
bool ReduceStoreChain(const ZoneVector< Node * > &Stores)
compiler::NodeObserver * node_observer_for_test_
bool TryRevectorize(const char *name)
Node * VectorizeTree(PackNode *pnode)
void SetMemoryOpInputs(base::SmallVector< Node *, 2 > &inputs, PackNode *pnode, int index)
static constexpr size_t RecursionMaxDepth
void Print(const char *info)
bool IsSideEffectFreeLoad(const ZoneVector< Node * > &node_group)
Node * GetEarlySchedulePosition(Node *node)
PackNode * BuildTree(const ZoneVector< Node * > &roots)
PackNode * NewPackNodeAndRecurs(const ZoneVector< Node * > &node_group, int start_index, int count, unsigned depth)
PackNode * GetPackNode(Node *node)
PackNode * NewPackNode(const ZoneVector< Node * > &node_group)
PackNode * BuildTreeRec(const ZoneVector< Node * > &node_group, unsigned depth)
void TryReduceLoadChain(const ZoneVector< Node * > &loads)
bool CanBePacked(const ZoneVector< Node * > &node_group)
bool SameBasicBlock(Node *node0, Node *node1)
ZoneStack< ZoneVector< Node * > > stack_
void PushStack(const ZoneVector< Node * > &node_group)
ZoneUnorderedMap< Node *, PackNode * > node_to_packnode_
void ForEach(FunctionType callback)
bool AllOnStack(const ZoneVector< Node * > &node_group)
MachineRepresentation representation() const
ZoneVector< Node * > const & GetSimdStoreNodes()
Node * NewNode(const Operator *op, int input_count, Node *const *inputs, bool incomplete=false)
static bool TryMatchSplat(const uint8_t *shuffle, int *index)
Zone * zone_
uint32_t count
SourcePositionTable * source_positions
int32_t offset
TNode< Object > callback
Node * node
#define TRACE(...)
bool(* FunctionType)(const Operation &op, Zone *zone)
Definition use-map.h:12
V8_EXPORT_PRIVATE LoadTransformParameters const & LoadTransformParametersOf(Operator const *) V8_WARN_UNUSED_RESULT
V8_EXPORT_PRIVATE S128ImmediateParameter const & S128ImmediateParameterOf(Operator const *op) V8_WARN_UNUSED_RESULT
StoreRepresentation const & StoreRepresentationOf(Operator const *op)
LoadRepresentation LoadRepresentationOf(Operator const *op)
T const & OpParameter(const Operator *op)
Definition operator.h:214
MachineRepresentation LoopExitValueRepresentationOf(const Operator *const op)
MachineRepresentation PhiRepresentationOf(const Operator *const op)
ZoneSet< Node *, MemoryOffsetComparer > StoreNodeSet
constexpr int kSimd128Size
Definition globals.h:706
constexpr int I
V8_EXPORT_PRIVATE FlagValues v8_flags
return value
Definition map-inl.h:893
uint32_t recursion_depth
#define SIGN_EXTENSION_CONVERT_CASE(from, not_used, to)
#define SIMD_SIGN_EXTENSION_CONVERT_OP(V)
#define SIGN_EXTENSION_CASE(op_low, not_used1, not_used2)
#define SHIFT_CASE(from, to)
#define SPLAT_CASE(from, to)
#define SIMPLE_SIMD_OP(V)
Node * prev_
#define SIMD_SHIFT_OP(V)
Node * node_
#define CHECK_SIGN_EXTENSION_CASE(op_low, op_high, not_used)
#define SIMPLE_CASE(from, to)
#define SIMD_SPLAT_OP(V)
#define DCHECK_NE(v1, v2)
Definition logging.h:486
#define DCHECK_GE(v1, v2)
Definition logging.h:488
#define DCHECK(condition)
Definition logging.h:482
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
#define DCHECK_GT(v1, v2)
Definition logging.h:487
bool operator()(const Node *lhs, const Node *rhs) const
#define V8_INLINE
Definition v8config.h:500
TFGraph * graph_
MachineGraph * mcgraph_