15 if (v8_flags.trace_wasm_revectorize) { \
16 PrintF("Revec: %s %d: ", __func__, __LINE__); \
17 PrintF(__VA_ARGS__); \
25#define CASE(operation) \
26 case Opcode::k##operation: { \
27 using Op = operation##Op; \
28 return op0.Cast<Op>().kind == op1.Cast<Op>().kind; \
46 std::ostringstream oss;
47 if (op.
Is<Simd128BinopOp>() || op.
Is<Simd128UnaryOp>() ||
48 op.
Is<Simd128ShiftOp>() || op.
Is<Simd128TestOp>() ||
49 op.
Is<Simd128TernaryOp>()) {
60 typename = std::enable_if_t<
61 std::is_same_v<Op, StoreOp> || std::is_same_v<Op, LoadOp> ||
62 std::is_same_v<Op, Simd128LoadTransformOp>>>
67 base_ = &graph->Get(
op->base());
68 if constexpr (std::is_same_v<Op, Simd128LoadTransformOp>) {
86 if (!
op->index().has_value()) {
90 index_ = &(graph->Get(
op->index().value()));
94 DCHECK_EQ(change_op->kind, ChangeOp::Kind::kZeroExtend);
95 index_ = &graph->Get(change_op->input());
98 if (const ConstantOp* const_op = index_->TryCast<ConstantOp>()) {
99 DCHECK_EQ(const_op->kind, ConstantOp::Kind::kWord32);
101 if (base::bits::SignedAddOverflow32(
102 static_cast<int32_t>(const_op->word32()),
103 static_cast<int32_t>(offset_), &new_offset)) {
108 offset_ = new_offset;
124 if constexpr (std::is_same_v<Op, Simd128LoadTransformOp>) {
125 calculatable &= (op_->load_kind == rhs.
op_->load_kind &&
126 op_->transform_kind == rhs.
op_->transform_kind);
128 calculatable &= (op_->kind == rhs.
op_->kind);
131 if constexpr (std::is_same_v<Op, StoreOp>) {
134 calculatable &= (op_->write_barrier == rhs.
op_->write_barrier);
143 bool IsValid()
const {
return op_ !=
nullptr; }
147 const Op*
op()
const {
return op_; }
171template <
class Op,
class Info>
175 for (
OpIndex op_idx : node_group) {
177 const Op& load_op = op.
Cast<Op>();
178 Info
info(&graph, &load_op);
179 if (!info.IsValid()) {
184 return load_infos[1] - load_infos[0] == stride;
192 return node_group[1] == node_group[0];
195void PackNode::Print(
Graph* graph)
const {
202 auto itr = node_to_packnode_.find(node);
203 if (itr != node_to_packnode_.end()) {
206 return analyzer_->GetPackNode(node);
210 auto I = node_to_intersect_packnodes_.find(node);
211 if (
I != node_to_intersect_packnodes_.end()) {
217template <
typename FunctionType>
220 absl::flat_hash_set<PackNode const*> visited;
222 for (
auto& entry : node_map) {
223 PackNode const* pnode = entry.second;
224 if (!pnode || visited.find(pnode) != visited.end()) {
227 visited.insert(pnode);
233template <
typename FunctionType>
236 absl::flat_hash_set<PackNode const*> visited;
238 for (
const auto& entry : node_map) {
239 for (
auto* pnode : entry.second) {
240 if (visited.find(pnode) != visited.end()) {
243 visited.insert(pnode);
249void SLPTree::Print(
const char* info) {
250 TRACE(
"%s, %zu Packed node:\n", info, node_to_packnode_.size());
251 if (!
v8_flags.trace_wasm_revectorize) {
258 node_to_intersect_packnodes_);
261bool SLPTree::HasInputDependencies(
const NodeGroup& node_group) {
263 if (node_group[0] == node_group[1])
return false;
265 if (node_group[0] < node_group[1]) {
266 start = node_group[0];
269 start = node_group[1];
275 while (!to_visit.empty()) {
276 OpIndex to_visit_node = to_visit.front();
280 if (input ==
start) {
282 }
else if (input >
start) {
285 to_visit.push(input);
293 TRACE(
"PackNode %s(#%d, #%d)\n",
295 node_group[0].id(), node_group[1].id());
297 for (
OpIndex node : node_group) {
298 node_to_packnode_[
node] = pnode;
305 const Graph& graph) {
315 if (HasInputDependencies(node_group)) {
316 TRACE(
"ForcePackNode %s(#%d, #%d) failed due to input dependencies.\n",
318 node_group[0].id(), node_group[1].id());
322 TRACE(
"ForcePackNode %s(#%d, #%d)\n",
324 node_group[0].id(), node_group[1].id());
327 for (
OpIndex node : node_group) {
328 node_to_packnode_[
node] = pnode;
337 bool is_sign_extract,
338 bool is_sign_convert) {
341 node_group[0].
id(), node_group[1].
id());
343 phase_zone_, node_group,
base,
offset, lane_size, is_sign_extract,
345 for (
OpIndex node : node_group) {
346 node_to_packnode_[
node] = pnode;
353 if (HasInputDependencies(node_group)) {
354 TRACE(
"IntersectPackNode %s(#%d, #%d) failed due to input dependencies.\n",
356 node_group[0].id(), node_group[1].id());
360 TRACE(
"IntersectPackNode %s(#%d, #%d)\n",
362 node_group[0].id(), node_group[1].id());
364 phase_zone_, node_group, PackNode::kIntersectPackNode);
366 for (
int i = 0; i < static_cast<int>(node_group.
size());
i++) {
368 if (
i > 0 && op_idx == node_group[0])
continue;
369 auto it = node_to_intersect_packnodes_.find(op_idx);
370 if (it == node_to_intersect_packnodes_.end()) {
372 std::tie(it,
result) = node_to_intersect_packnodes_.emplace(
376 it->second.push_back(intersect_pnode);
379 return intersect_pnode;
384 PackNode* pnode = NewPackNode(node_group);
386 const Simd128BinopOp& op0 =
graph_.Get(node_group[0]).Cast<Simd128BinopOp>();
387 const Simd128BinopOp& op1 =
graph_.Get(node_group[1]).Cast<Simd128BinopOp>();
390 (op0.left() == op1.left()) ||
392 bool need_swap = Simd128BinopOp::IsCommutative(op0.kind) && !same_kind;
394 TRACE(
"Change the order of binop operands\n");
396 for (
int i = 0;
i < 2; ++
i) {
398 unsigned node1_input_index = need_swap ? 1 -
i :
i;
400 graph_.Get(node_group[1]).input(node1_input_index));
402 PackNode* child = BuildTreeRec(operands, depth + 1);
413 int start_index,
int count,
415 PackNode* pnode = NewPackNode(node_group);
418 int input_index =
i + start_index;
420 graph_.Get(node_group[1]).input(input_index));
422 PackNode* child = BuildTreeRec(operands, depth + 1);
436 node_group[0].
id(), node_group[1].
id());
439 for (
OpIndex node : node_group) {
440 node_to_packnode_[
node] = pnode;
446 const NodeGroup& node_group,
const uint8_t* shuffle0,
447 const uint8_t* shuffle1) {
453 const Simd128ShuffleOp& op0 =
graph_.Get(op_idx0).Cast<Simd128ShuffleOp>();
454 const Simd128ShuffleOp& op1 =
graph_.Get(op_idx1).Cast<Simd128ShuffleOp>();
456 if (op0.kind != Simd128ShuffleOp::Kind::kI8x16 ||
457 op1.kind != Simd128ShuffleOp::Kind::kI8x16) {
461 if (op0.left() == op0.right() || op1.left() == op1.right()) {
469 bool need_swap, is_swizzle;
471#define CANONICALIZE_SHUFFLE(n) \
472 wasm::SimdShuffle::CanonicalizeShuffle(false, shuffle_copy##n, &need_swap, \
478 V<Simd128> shuffle##n##_left_idx = need_swap ? op##n.right() : op##n.left(); \
479 V<Simd128> shuffle##n##_right_idx = need_swap ? op##n.left() : op##n.right();
484#undef CANONICALIZE_SHUFFLE
485 if (shuffle0_left_idx != shuffle1_left_idx) {
490 const Simd128LoadTransformOp* load_transform =
491 graph_.Get(shuffle0_left_idx).TryCast<Simd128LoadTransformOp>();
493 if (!load_transform) {
498 Simd128ConstantOp* shuffle0_const =
499 graph_.Get(shuffle0_right_idx).TryCast<Simd128ConstantOp>();
500 Simd128ConstantOp* shuffle1_const =
501 graph_.Get(shuffle1_right_idx).TryCast<Simd128ConstantOp>();
503 if (!shuffle0_const || !shuffle1_const || !shuffle0_const->IsZero() ||
504 !shuffle1_const->IsZero()) {
509 if (load_transform->transform_kind ==
510 Simd128LoadTransformOp::TransformKind::k64Zero) {
519 if (shuffle_copy0[
i * 4] !=
i || shuffle_copy1[
i * 4] !=
i + 4) {
534 TRACE(
"match load extend 8x8->32x8\n");
535 return NewShufflePackNode(
536 node_group, ShufflePackNode::SpecificInfo::Kind::kS256Load8x8U);
541#ifdef V8_TARGET_ARCH_X64
543 const uint8_t* shuffle0,
544 const uint8_t* shuffle1) {
546 OpIndex op_idx0 = node_group[0];
547 OpIndex op_idx1 = node_group[1];
548 const Simd128ShuffleOp& op0 =
graph_.Get(op_idx0).Cast<Simd128ShuffleOp>();
549 const Simd128ShuffleOp& op1 =
graph_.Get(op_idx1).Cast<Simd128ShuffleOp>();
550 if (op0.kind != Simd128ShuffleOp::Kind::kI8x16 ||
551 op1.kind != Simd128ShuffleOp::Kind::kI8x16) {
555 uint8_t shuffle8x32[32];
557 if (op0.left() == op0.right() && op1.left() == op1.right()) {
559 for (
int i = 0;
i < 16; ++
i) {
560 shuffle8x32[
i] = shuffle0[
i] % 16;
561 shuffle8x32[
i + 16] = 16 + shuffle1[
i] % 16;
564 if (uint8_t shuffle32x8[8];
565 wasm::SimdShuffle::TryMatch32x8Shuffle(shuffle8x32, shuffle32x8)) {
567 if (wasm::SimdShuffle::TryMatchVpshufd(shuffle32x8, &control)) {
569 node_group, ShufflePackNode::SpecificInfo::Kind::kShufd);
570 pnode->
info().set_shufd_control(control);
574 }
else if (op0.left() != op0.right() && op1.left() != op1.right()) {
576 for (
int i = 0;
i < 16; ++
i) {
577 if (shuffle0[
i] < 16) {
578 shuffle8x32[
i] = shuffle0[
i];
580 shuffle8x32[
i] = 16 + shuffle0[
i];
583 if (shuffle1[
i] < 16) {
584 shuffle8x32[
i + 16] = 16 + shuffle1[
i];
586 shuffle8x32[
i + 16] = 32 + shuffle1[
i];
590 if (
const wasm::ShuffleEntry<kSimd256Size>* arch_shuffle;
591 wasm::SimdShuffle::TryMatchArchShuffle(shuffle8x32,
false,
593 ShufflePackNode::SpecificInfo::Kind
kind;
594 switch (arch_shuffle->opcode) {
595 case kX64S32x8UnpackHigh:
596 kind = ShufflePackNode::SpecificInfo::Kind::kS32x8UnpackHigh;
598 case kX64S32x8UnpackLow:
599 kind = ShufflePackNode::SpecificInfo::Kind::kS32x8UnpackLow;
604 ShufflePackNode* pnode = NewShufflePackNode(node_group,
kind);
606 }
else if (uint8_t shuffle32x8[8]; wasm::SimdShuffle::TryMatch32x8Shuffle(
607 shuffle8x32, shuffle32x8)) {
609 if (wasm::SimdShuffle::TryMatchShufps256(shuffle32x8, &control)) {
610 ShufflePackNode* pnode = NewShufflePackNode(
611 node_group, ShufflePackNode::SpecificInfo::Kind::kShufps);
612 pnode->info().set_shufps_control(control);
643std::optional<SLPTree::ExtendIntToF32x4Info>
644SLPTree::TryGetExtendIntToF32x4Info(
OpIndex index) {
649 for (
int lane_index = 3; lane_index > 0; lane_index--) {
650 const Simd128ReplaceLaneOp* replace_lane =
652 .TryCast<turboshaft::Opmask::kSimd128ReplaceLaneF32x4>();
654 TRACE(
"Mismatch in replace lane\n");
660 TRACE(
"Mismatch in type convert\n");
663 const Simd128ExtractLaneOp* extract_lane =
666 TRACE(
"Mismatch in extract lane\n");
671 lane_extend_info[lane_index].
extract_from = extract_lane->input();
672 lane_extend_info[lane_index].
extract_kind = extract_lane->kind;
675 current = replace_lane->into();
679 const Simd128SplatOp* splat =
graph_.Get(current).TryCast<Simd128SplatOp>();
681 TRACE(
"Mismatch in splat\n");
686 TRACE(
"Mismatch in splat type convert\n");
689 const Simd128ExtractLaneOp* extract_lane =
692 TRACE(
"Mismatch in splat extract lane\n");
697 lane_extend_info[0].
extract_from = extract_lane->input();
702 for (
int i = 0;
i < 4;
i++) {
703 if (lane_extend_info[
i].replace_lane_index !=
i) {
706 if (lane_extend_info[
i].change_kind != lane_extend_info[0].change_kind ||
707 (lane_extend_info[
i].change_kind != ChangeOp::Kind::kSignedToFloat &&
708 lane_extend_info[
i].change_kind != ChangeOp::Kind::kUnsignedToFloat)) {
711 if (lane_extend_info[
i].extract_from != lane_extend_info[0].extract_from) {
714 if (lane_extend_info[
i].extract_kind != lane_extend_info[0].extract_kind ||
715 (lane_extend_info[
i].extract_kind !=
716 Simd128ExtractLaneOp::Kind::kI8x16S &&
717 lane_extend_info[
i].extract_kind !=
718 Simd128ExtractLaneOp::Kind::kI8x16U &&
719 lane_extend_info[
i].extract_kind !=
720 Simd128ExtractLaneOp::Kind::kI16x8S &&
721 lane_extend_info[
i].extract_kind !=
722 Simd128ExtractLaneOp::Kind::kI16x8U)) {
725 if (lane_extend_info[
i].extract_lane_index !=
726 lane_extend_info[0].extract_lane_index +
i) {
734 if (lane_extend_info[0].extract_kind == Simd128ExtractLaneOp::Kind::kI8x16S ||
735 lane_extend_info[0].extract_kind == Simd128ExtractLaneOp::Kind::kI8x16U) {
740 info.is_sign_extract =
741 lane_extend_info[0].
extract_kind == Simd128ExtractLaneOp::Kind::kI8x16S ||
742 lane_extend_info[0].
extract_kind == Simd128ExtractLaneOp::Kind::kI16x8S;
743 info.is_sign_convert =
744 lane_extend_info[0].
change_kind == ChangeOp::Kind::kSignedToFloat;
749bool SLPTree::TryMatchExtendIntToF32x4(
const NodeGroup& node_group,
753 std::optional<ExtendIntToF32x4Info> info0 = TryGetExtendIntToF32x4Info(node0);
754 std::optional<ExtendIntToF32x4Info> info1 = TryGetExtendIntToF32x4Info(node1);
755 if (!info0.has_value() || !info1.has_value()) {
759 if (info0.value().extend_from != info1.value().extend_from ||
760 info0.value().is_sign_extract != info1.value().is_sign_extract ||
761 info0.value().lane_size != info1.value().lane_size ||
762 info0.value().is_sign_convert != info1.value().is_sign_convert) {
766 uint32_t min_lane_index =
767 std::min(info0.value().start_lane, info1.value().start_lane);
768 if (std::abs(info0.value().start_lane - info1.value().start_lane) != 4) {
771 if (info0.value().lane_size == 1) {
772 if (min_lane_index != 0 && min_lane_index != 8) {
777 if (min_lane_index != 0) {
782 *info = info0.value();
783 info->start_lane = min_lane_index;
789 if (first ==
second)
return true;
792 while (prev_node != first) {
795 graph().
Get(prev_node).IsProtectedLoad())
798 TRACE(
"break side effect %d, %d\n", prev_node.
id(),
second.id());
801 prev_node =
graph().PreviousIndex(prev_node);
807 if (
const Simd128UnaryOp* unop = op.
TryCast<Simd128UnaryOp>()) {
808 return unop->kind >= Simd128UnaryOp::Kind::kFirstSignExtensionOp &&
809 unop->kind <= Simd128UnaryOp::Kind::kLastSignExtensionOp;
810 }
else if (
const Simd128BinopOp* binop = op.
TryCast<Simd128BinopOp>()) {
811 return binop->kind >= Simd128BinopOp::Kind::kFirstSignExtensionOp &&
812 binop->kind <= Simd128BinopOp::Kind::kLastSignExtensionOp;
824 TRACE(
"Different opcode\n");
828 if (
graph().BlockIndexOf(node0) !=
graph().BlockIndexOf(node1)) {
829 TRACE(
"Can't pack operations of different basic block\n");
841 if (node0.
offset() <= node1.
offset() ? !IsSideEffectFree(node0, node1)
842 : !IsSideEffectFree(node1, node0)) {
843 TRACE(
"Break side effect\n");
850 if (node0 == node1)
return true;
853 return *const0 == *const1;
860 root_ = BuildTreeRec(roots, 0);
865 switch (op.transform_kind) {
866 case Simd128LoadTransformOp::TransformKind::k8x8S:
867 case Simd128LoadTransformOp::TransformKind::k8x8U:
868 case Simd128LoadTransformOp::TransformKind::k16x4S:
869 case Simd128LoadTransformOp::TransformKind::k16x4U:
870 case Simd128LoadTransformOp::TransformKind::k32x2S:
871 case Simd128LoadTransformOp::TransformKind::k32x2U:
879 switch (op.transform_kind) {
880 case Simd128LoadTransformOp::TransformKind::k8Splat:
881 case Simd128LoadTransformOp::TransformKind::k16Splat:
882 case Simd128LoadTransformOp::TransformKind::k32Splat:
883 case Simd128LoadTransformOp::TransformKind::k64Splat:
900 TRACE(
"Failed due to max recursion depth!\n");
904 if (!CanBePacked(node_group)) {
909 bool is_intersected =
false;
911 if (
PackNode* pnode = GetPackNode(node0)) {
913 if (pnode->IsSame(node_group)) {
914 TRACE(
"Perfect diamond merge at #%d,%s\n", node0.
id(),
921 TRACE(
"Unsupported partial overlap at #%d,%s!\n", node0.
id(),
927 if (
auto intersect_packnodes = GetIntersectPackNodes(node0)) {
928 for (
auto intersect_pnode : *intersect_packnodes) {
929 if (intersect_pnode->IsSame(node_group)) {
930 TRACE(
"Perfect diamond merge at intersect pack node #%d,%s, #%d\n",
932 return intersect_pnode;
938 if (
auto intersect_packnodes = analyzer_->GetIntersectPackNodes(node0)) {
939 for (
auto intersect_pnode : *intersect_packnodes) {
940 if (intersect_pnode->IsSame(node_group)) {
941 TRACE(
"Perfect diamond merge at intersect pack node #%d,%s, #%d\n",
943 return intersect_pnode;
948 is_intersected =
true;
949 TRACE(
"Partial overlap at #%d,%s!\n", node0.
id(),
954 if (!is_intersected) {
955 for (
int i = 1; i < static_cast<int>(node_group.
size());
i++) {
956 const OpIndex op_idx = node_group[
i];
958 if (
auto pnode = GetPackNode(op_idx)) {
959 if (!pnode->IsForcePackNode()) {
960 TRACE(
"Unsupported partial overlap at #%d,%s!\n", op_idx.
id(),
964 }
else if (!GetIntersectPackNodes(op_idx) &&
965 !analyzer_->GetIntersectPackNodes(op_idx)) {
969 is_intersected =
true;
970 TRACE(
"Partial overlap at #%d,%s!\n", op_idx.
id(),
976 if (is_intersected) {
977 TRACE(
"Create IntersectPackNode due to partial overlap!\n");
978 PackNode* pnode = NewIntersectPackNode(node_group);
985 case Opcode::kSimd128Constant: {
986 PackNode* p = NewPackNode(node_group);
990 case Opcode::kSimd128LoadTransform: {
991 const Simd128LoadTransformOp& transform_op0 =
992 op0.
Cast<Simd128LoadTransformOp>();
993 const Simd128LoadTransformOp& transform_op1 =
994 op1.
Cast<Simd128LoadTransformOp>();
997 auto stride = info1 - info0;
999 TRACE(
"Simd128LoadTransform: LoadSplat\n");
1001 (stride.has_value() && stride.value() == 0)) {
1002 return NewPackNode(node_group);
1004 return NewForcePackNode(node_group, ForcePackNode::kGeneral,
graph_);
1006 TRACE(
"Simd128LoadTransform: LoadExtend\n");
1007 if (stride.has_value()) {
1008 const int value = stride.value();
1010 return NewPackNode(node_group);
1011 }
else if (value == 0) {
1012 return NewForcePackNode(node_group, ForcePackNode::kSplat,
graph_);
1015 return NewForcePackNode(node_group, ForcePackNode::kGeneral,
graph_);
1017 TRACE(
"Load Transfrom k64Zero/k32Zero!\n");
1018 DCHECK(transform_op0.transform_kind ==
1019 Simd128LoadTransformOp::TransformKind::k32Zero ||
1020 transform_op0.transform_kind ==
1021 Simd128LoadTransformOp::TransformKind::k64Zero);
1022 if (stride.has_value() && stride.value() == 0) {
1023 return NewForcePackNode(node_group, ForcePackNode::kSplat,
graph_);
1025 return NewForcePackNode(node_group, ForcePackNode::kGeneral,
graph_);
1029 case Opcode::kLoad: {
1030 TRACE(
"Load leaf node\n");
1033 if (load0.
loaded_rep != MemoryRepresentation::Simd128() ||
1034 load1.loaded_rep != MemoryRepresentation::Simd128()) {
1035 TRACE(
"Failed due to non-simd load representation!\n");
1040 auto stride = info1 - info0;
1041 if (stride.has_value()) {
1042 if (
const int value = stride.value(); value ==
kSimd128Size) {
1044 return NewPackNode(node_group);
1045 }
else if (value == 0) {
1046 return NewForcePackNode(node_group, ForcePackNode::kSplat,
graph_);
1049 return NewForcePackNode(node_group, ForcePackNode::kGeneral,
graph_);
1051 case Opcode::kStore: {
1052 TRACE(
"Added a vector of stores.\n");
1057 case Opcode::kPhi: {
1058 TRACE(
"Added a vector of phi nodes.\n");
1060 if (phi.rep != RegisterRepresentation::Simd128() ||
1062 TRACE(
"Failed due to invalid phi\n");
1069 case Opcode::kSimd128Unary: {
1070#define UNARY_CASE(op_128, not_used) case Simd128UnaryOp::Kind::k##op_128:
1071#define UNARY_SIGN_EXTENSION_CASE(op_low, not_used1, op_high) \
1072 case Simd128UnaryOp::Kind::k##op_low: { \
1073 if (const Simd128UnaryOp* unop1 = \
1074 op1.TryCast<Opmask::kSimd128##op_high>(); \
1075 unop1 && op0.Cast<Simd128UnaryOp>().input() == unop1->input()) { \
1076 return NewPackNode(node_group); \
1080 case Simd128UnaryOp::Kind::k##op_high: { \
1081 if (op1.Cast<Simd128UnaryOp>().kind == op0.Cast<Simd128UnaryOp>().kind) { \
1082 auto force_pack_type = \
1083 node0 == node1 ? ForcePackNode::kSplat : ForcePackNode::kGeneral; \
1084 return NewForcePackNode(node_group, force_pack_type, graph_); \
1089 switch (op0.
Cast<Simd128UnaryOp>().kind) {
1092 TRACE(
"Added a vector of Unary\n");
1093 PackNode* pnode = NewPackNodeAndRecurs(node_group, 0, value_in_count,
1098 TRACE(
"Unsupported Simd128Unary: %s\n",
1104#undef UNARY_SIGN_EXTENSION_CASE
1106 case Opcode::kSimd128Binop: {
1107#define BINOP_CASE(op_128, not_used) case Simd128BinopOp::Kind::k##op_128:
1108#define BINOP_SIGN_EXTENSION_CASE(op_low, not_used1, op_high) \
1109 case Simd128BinopOp::Kind::k##op_low: { \
1110 if (const Simd128BinopOp* binop1 = \
1111 op1.TryCast<Opmask::kSimd128##op_high>(); \
1112 binop1 && op0.Cast<Simd128BinopOp>().left() == binop1->left() && \
1113 op0.Cast<Simd128BinopOp>().right() == binop1->right()) { \
1114 return NewPackNode(node_group); \
1118 case Simd128BinopOp::Kind::k##op_high: { \
1119 if (op1.Cast<Simd128BinopOp>().kind == op0.Cast<Simd128BinopOp>().kind) { \
1120 auto force_pack_type = \
1121 node0 == node1 ? ForcePackNode::kSplat : ForcePackNode::kGeneral; \
1122 return NewForcePackNode(node_group, force_pack_type, graph_); \
1127 switch (op0.
Cast<Simd128BinopOp>().kind) {
1130 TRACE(
"Added a vector of Binop\n");
1136 TRACE(
"Unsupported Simd128Binop: %s\n",
1142#undef BINOP_SIGN_EXTENSION_CASE
1144 case Opcode::kSimd128Shift: {
1145 Simd128ShiftOp& shift_op0 = op0.
Cast<Simd128ShiftOp>();
1146 Simd128ShiftOp& shift_op1 = op1.
Cast<Simd128ShiftOp>();
1147 if (IsEqual(shift_op0.shift(), shift_op1.shift())) {
1148 switch (op0.
Cast<Simd128ShiftOp>().kind) {
1149#define SHIFT_CASE(op_128, not_used) case Simd128ShiftOp::Kind::k##op_128:
1151 TRACE(
"Added a vector of Shift op.\n");
1157 constexpr int kShiftValueInCount = 1;
1158 PackNode* pnode = NewPackNodeAndRecurs(
1164 TRACE(
"Unsupported Simd128ShiftOp: %s\n",
1170 TRACE(
"Failed due to SimdShiftOp kind or shift scalar is different!\n");
1173 case Opcode::kSimd128Ternary: {
1174#define TERNARY_CASE(op_128, not_used) case Simd128TernaryOp::Kind::k##op_128:
1175 switch (op0.
Cast<Simd128TernaryOp>().kind) {
1177 TRACE(
"Added a vector of Ternary\n");
1178 PackNode* pnode = NewPackNodeAndRecurs(node_group, 0, value_in_count,
1184 TRACE(
"Unsupported Simd128Ternary: %s\n",
1191 case Opcode::kSimd128Splat: {
1193 TRACE(
"Failed due to different splat input!\n");
1196 PackNode* pnode = NewPackNode(node_group);
1200 case Opcode::kSimd128Shuffle: {
1201 const auto& shuffle_op0 = op0.
Cast<Simd128ShuffleOp>();
1202 const auto& shuffle_op1 = op1.
Cast<Simd128ShuffleOp>();
1203 if (shuffle_op0.kind != Simd128ShuffleOp::Kind::kI8x16 ||
1204 shuffle_op1.kind != Simd128ShuffleOp::Kind::kI8x16) {
1209 const auto& shuffle0 = shuffle_op0.shuffle;
1210 const auto& shuffle1 = shuffle_op1.shuffle;
1234 if (wasm::SimdShuffle::TryMatchSplat<4>(shuffle0, &index) &&
1235 graph_.Get(op0.
input(index >> 2)).opcode == Opcode::kLoad) {
1238 ShufflePackNode::SpecificInfo::Kind::kS256Load32Transform);
1241 }
else if (wasm::SimdShuffle::TryMatchSplat<2>(shuffle0, &index) &&
1246 ShufflePackNode::SpecificInfo::Kind::kS256Load64Transform);
1252#ifdef V8_TARGET_ARCH_X64
1254 X64TryMatch256Shuffle(node_group, shuffle0, shuffle1)) {
1256 for (
int i = 0;
i < value_in_count; ++
i) {
1258 graph_.Get(node_group[1]).input(
i));
1262 pnode->SetOperand(
i, child);
1271 TRACE(
"Unsupported Simd128Shuffle\n");
1274 return Try256ShuffleMatchLoad8x8U(node_group, shuffle0, shuffle1);
1278 case Opcode::kSimd128ReplaceLane: {
1280 if (TryMatchExtendIntToF32x4(node_group, &info)) {
1281 TRACE(
"Match extend i8x4/i16x4 to f32x4\n");
1283 node_group, info.extend_from, info.start_lane, info.lane_size,
1284 info.is_sign_extract, info.is_sign_convert);
1288 TRACE(
"Do not force pack at root #%d:%s\n", node0.
id(),
1292 return NewForcePackNode(
1294 node0 == node1 ? ForcePackNode::kSplat : ForcePackNode::kGeneral,
1299 TRACE(
"Default branch #%d:%s\n", node0.
id(),
1306void WasmRevecAnalyzer::MergeSLPTree(
SLPTree& slp_tree) {
1309 auto it = revectorizable_intersect_node_.find(entry.first);
1310 if (it == revectorizable_intersect_node_.end()) {
1312 std::tie(it,
result) = revectorizable_intersect_node_.emplace(
1317 intersect_pnodes.
insert(intersect_pnodes.
end(), entry.second.begin(),
1318 entry.second.end());
1320 intersect_pnodes.
end());
1326bool WasmRevecAnalyzer::IsSupportedReduceSeed(
const Operation& op) {
1327 if (!op.
Is<Simd128BinopOp>()) {
1330 switch (op.
Cast<Simd128BinopOp>().kind) {
1331#define CASE(op_128) case Simd128BinopOp::Kind::k##op_128:
1339void WasmRevecAnalyzer::ProcessBlock(
const Block& block) {
1341 for (
const Operation& op : base::Reversed(
graph_.operations(block))) {
1343 if (store_op->stored_rep == MemoryRepresentation::Simd128()) {
1345 if (info.IsValid()) {
1346 simd128_stores.insert(info);
1351 if (IsSupportedReduceSeed(op)) {
1352 const Simd128BinopOp& binop = op.Cast<Simd128BinopOp>();
1358 if (left_index != right_index && left_op.
opcode == right_op.
opcode &&
1360 reduce_seeds_.push_back({left_index, right_index});
1365 if (simd128_stores.size() >= 2) {
1366 for (
auto it = std::next(simd128_stores.cbegin()),
1367 end = simd128_stores.cend();
1371 auto diff = info1 - info0;
1373 if (diff.has_value()) {
1374 const int value = diff.value();
1377 store_seeds_.push_back(
1379 if (std::distance(it,
end) < 2) {
1382 std::advance(it, 2);
1391void WasmRevecAnalyzer::Run() {
1392 for (
Block& block : base::Reversed(
graph_.blocks())) {
1393 ProcessBlock(block);
1396 if (store_seeds_.empty() && reduce_seeds_.empty()) {
1397 TRACE(
"Empty seed\n");
1401 if (
v8_flags.trace_wasm_revectorize) {
1402 PrintF(
"store seeds:\n");
1403 for (
auto pair : store_seeds_) {
1405 PrintF(
"#%u ", pair.first.id());
1406 graph_.Get(pair.first).Print();
1407 PrintF(
"#%u ", pair.second.id());
1408 graph_.Get(pair.second).Print();
1412 PrintF(
"reduce seeds:\n");
1413 for (
auto pair : reduce_seeds_) {
1415 PrintF(
"#%u, ", pair.first.id());
1416 PrintF(
"#%u ", pair.second.id());
1422 store_seeds_.begin(), store_seeds_.end(), phase_zone_);
1423 all_seeds.
insert(all_seeds.
end(), reduce_seeds_.begin(), reduce_seeds_.end());
1425 for (
auto pair : all_seeds) {
1426 NodeGroup roots(pair.first, pair.second);
1430 TRACE(
"Build tree failed!\n");
1434 slp_tree.
Print(
"After build tree");
1435 MergeSLPTree(slp_tree);
1439 if (revectorizable_node_.empty())
return;
1443 if (!DecideVectorize()) {
1444 revectorizable_node_.clear();
1445 revectorizable_intersect_node_.clear();
1447 should_reduce_ =
true;
1448 Print(
"Decide to vectorize");
1452bool WasmRevecAnalyzer::DecideVectorize() {
1453 TRACE(
"Enter %s\n", __func__);
1454 int save = 0, cost = 0;
1462 if (
graph_.Get(nodes[0]).opcode == Opcode::kStore) {
1463 if (nodes[0] > nodes[1]) save++;
1477#ifdef V8_TARGET_ARCH_X64
1480 for (
int i = 1; i < static_cast<int>(nodes.size());
i++) {
1481 if (nodes[
i] == nodes[0])
continue;
1483 for (
int i = 0; i < static_cast<int>(nodes.size());
i++) {
1484 if (
i > 0 && nodes[
i] == nodes[0])
continue;
1487 for (
auto use : use_map_->uses(nodes[
i])) {
1488 if (!GetPackNode(use) || GetPackNode(use)->is_force_packing()) {
1489 TRACE(
"External use edge: (%d:%s) -> (%d:%s)\n", use.id(),
1500 revectorizable_node_);
1508 revectorizable_intersect_node_);
1510 TRACE(
"Save: %d, cost: %d\n", save, cost);
1514void WasmRevecAnalyzer::Print(
const char* info) {
1515 if (!
v8_flags.trace_wasm_revectorize) {
1519 TRACE(
"%s, %zu revectorizable nodes:\n", info, revectorizable_node_.size());
1521 revectorizable_node_);
1522 TRACE(
"%s, %zu revectorizable intersect nodes:\n", info,
1523 revectorizable_intersect_node_.size());
1525 revectorizable_intersect_node_);
#define SLOW_DCHECK(condition)
T * insert(const T *pos, It first, It last)
constexpr uint32_t id() const
const NodeGroup & nodes() const
bool IsForcePackNode() const
void SetOperand(int index, PackNode *pnode)
void Print(Graph *graph) const
void Print(const char *info)
ZoneUnorderedMap< OpIndex, ZoneVector< PackNode * > > & GetIntersectNodeMapping()
PackNode * BuildTree(const NodeGroup &roots)
ZoneUnorderedMap< OpIndex, PackNode * > & GetNodeMapping()
void set_splat_index(uint8_t value)
const Operation * index() const
std::optional< int > operator-(const StoreLoadInfo< Op > &rhs) const
StoreLoadInfo(const Graph *graph, const Op *op)
static constexpr WordRepresentation Word64()
Handle< SharedFunctionInfo > info
ZoneLinkedList< BFEntry > nodes_
ZoneVector< RpoNumber > & result
std::string GetSimdOpcodeName(Operation const &op)
void ForEach(FunctionType callback, const ZoneUnorderedMap< OpIndex, PackNode * > &node_map)
V8_INLINE const Operation & Get(const Graph &graph, OpIndex index)
const char * OpcodeName(Opcode opcode)
bool IsSameOpAndKind(const Operation &op0, const Operation &op1)
bool IsSignExtensionOp(Operation &op)
bool IsLoadExtend(const Simd128LoadTransformOp &op)
bool CannotSwapProtectedLoads(OpEffects first, OpEffects second)
bool IsSplat(const T &node_group)
bool CannotSwapOperations(OpEffects first, OpEffects second)
bool IsLoadSplat(const Simd128LoadTransformOp &op)
bool LoadStrideEqualTo(const Graph &graph, const NodeGroup &node_group, int stride)
constexpr int kSimd128Size
void PrintF(const char *format,...)
bool CompareCharsEqual(const lchar *lhs, const rchar *rhs, size_t chars)
void CopyChars(DstType *dst, const SrcType *src, size_t count) V8_NONNULL(1
V8_EXPORT_PRIVATE FlagValues v8_flags
#define I(name, number_of_args, result_size)
#define DCHECK_LE(v1, v2)
#define DCHECK_GE(v1, v2)
#define DCHECK(condition)
#define DCHECK_LT(v1, v2)
#define DCHECK_EQ(v1, v2)
MemoryRepresentation loaded_rep
void PrintOptions(std::ostream &os) const
V8_INLINE OpIndex input(size_t i) const
const uint16_t input_count
base::Vector< const OpIndex > inputs() const
const underlying_operation_t< Op > * TryCast() const
underlying_operation_t< Op > & Cast()
Simd128ExtractLaneOp::Kind extract_kind
ChangeOp::Kind change_kind
bool operator()(const StoreLoadInfo< StoreOp > &lhs, const StoreLoadInfo< StoreOp > &rhs) const
V< WordType > left() const
V< WordType > right() const
#define BINOP_CASE(op_128, not_used)
#define UNARY_CASE(op_128, not_used)
#define TERNARY_CASE(op_128, not_used)
#define SHIFT_CASE(op_128, not_used)
#define BINOP_SIGN_EXTENSION_CASE(op_low, not_used1, op_high)
#define UNARY_SIGN_EXTENSION_CASE(op_low, not_used1, op_high)
#define CANONICALIZE_SHUFFLE(n)
#define SIMD256_BINOP_SIGN_EXTENSION_OP(V)
#define SIMD256_UNARY_SIGN_EXTENSION_OP(V)
#define REDUCE_SEED_KIND(V)
#define SIMD256_SHIFT_OP(V)
#define SIMD256_TERNARY_OP(V)
#define SIMD256_BINOP_SIMPLE_OP(V)
#define SIMD256_UNARY_SIMPLE_OP(V)