v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
instruction-selector-arm64.cc
Go to the documentation of this file.
1// Copyright 2014 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <optional>
6
7#include "src/base/bits.h"
8#include "src/base/logging.h"
11#include "src/common/globals.h"
20#include "src/flags/flags.h"
21
22namespace v8 {
23namespace internal {
24namespace compiler {
25
26using namespace turboshaft; // NOLINT(build/namespaces)
27
29 kArithmeticImm, // 12 bit unsigned immediate shifted left 0 or 12 bits
30 kShift32Imm, // 0 - 31
31 kShift64Imm, // 0 - 63
34 kLoadStoreImm8, // signed 8 bit or 12 bit unsigned scaled by access size
40};
41
42// Adds Arm64-specific methods for generating operands.
44 public:
47
49 if (CanBeImmediate(node, mode)) {
50 return UseImmediate(node);
51 }
52 return UseRegister(node);
53 }
54
56 if (const ConstantOp* constant =
57 selector()->Get(node).TryCast<ConstantOp>()) {
58 if (constant->IsIntegral() && constant->integral() == 0) return true;
59 if (constant->kind == ConstantOp::Kind::kFloat32) {
60 return constant->float32().get_bits() == 0;
61 }
62 if (constant->kind == ConstantOp::Kind::kFloat64) {
63 return constant->float64().get_bits() == 0;
64 }
65 }
66 return false;
67 }
68
69 // Use the zero register if the node has the immediate value zero, otherwise
70 // assign a register.
72 if (IsImmediateZero(node)) {
73 return UseImmediate(node);
74 }
75 return UseRegister(node);
76 }
77
78 // Use the zero register if the node has the immediate value zero, otherwise
79 // assign a register, keeping it alive for the whole sequence of continuation
80 // instructions.
82 if (IsImmediateZero(node)) {
83 return UseImmediate(node);
84 }
85 return this->UseRegisterAtEnd(node);
86 }
87
88 // Use the provided node if it has the required value, or create a
89 // TempImmediate otherwise.
91 if (selector()->Get(node).Cast<ConstantOp>().signed_integral() == value) {
92 return UseImmediate(node);
93 }
94 return TempImmediate(value);
95 }
96
97 bool IsIntegerConstant(OpIndex node) const {
98 int64_t unused;
99 return selector()->MatchSignedIntegralConstant(node, &unused);
100 }
101
102 std::optional<int64_t> GetOptionalIntegerConstant(OpIndex operation) {
103 if (int64_t constant; MatchSignedIntegralConstant(operation, &constant)) {
104 return constant;
105 }
106 return std::nullopt;
107 }
108
110 const ConstantOp* constant = selector()->Get(node).TryCast<ConstantOp>();
111 if (!constant) return false;
112 if (constant->kind == ConstantOp::Kind::kCompressedHeapObject) {
113 if (!COMPRESS_POINTERS_BOOL) return false;
114 // For builtin code we need static roots
115 if (selector()->isolate()->bootstrapper() && !V8_STATIC_ROOTS_BOOL) {
116 return false;
117 }
118 const RootsTable& roots_table = selector()->isolate()->roots_table();
119 RootIndex root_index;
120 Handle<HeapObject> value = constant->handle();
121 if (roots_table.IsRootHandle(value, &root_index)) {
122 if (!RootsTable::IsReadOnly(root_index)) return false;
124 root_index, selector()->isolate()),
125 mode);
126 }
127 return false;
128 }
129
130 int64_t value;
131 return selector()->MatchSignedIntegralConstant(node, &value) &&
132 CanBeImmediate(value, mode);
133 }
134
135 bool CanBeImmediate(int64_t value, ImmediateMode mode) {
136 unsigned ignored;
137 switch (mode) {
138 case kLogical32Imm:
139 // TODO(dcarney): some unencodable values can be handled by
140 // switching instructions.
142 static_cast<uint32_t>(value), 32, &ignored, &ignored, &ignored);
143 case kLogical64Imm:
145 static_cast<uint64_t>(value), 64, &ignored, &ignored, &ignored);
146 case kArithmeticImm:
148 case kLoadStoreImm8:
149 return IsLoadStoreImmediate(value, 0);
150 case kLoadStoreImm16:
151 return IsLoadStoreImmediate(value, 1);
152 case kLoadStoreImm32:
153 return IsLoadStoreImmediate(value, 2);
154 case kLoadStoreImm64:
155 return IsLoadStoreImmediate(value, 3);
156 case kNoImmediate:
157 return false;
160 case kShift32Imm: // Fall through.
161 case kShift64Imm:
162 // Shift operations only observe the bottom 5 or 6 bits of the value.
163 // All possible shifts can be encoded by discarding bits which have no
164 // effect.
165 return true;
166 }
167 return false;
168 }
169
171 if (uint64_t constant;
172 selector()->MatchUnsignedIntegralConstant(node, &constant) &&
173 constant == static_cast<uint64_t>(ElementSizeLog2Of(rep))) {
174 return true;
175 }
176 return false;
177 }
178
179 private:
180 bool IsLoadStoreImmediate(int64_t value, unsigned size) {
181 return internal::Assembler::IsImmLSScaled(value, size) ||
183 }
184};
185
186namespace {
187
188void VisitRR(InstructionSelectorT* selector, ArchOpcode opcode, OpIndex node) {
189 Arm64OperandGeneratorT g(selector);
190 selector->Emit(opcode, g.DefineAsRegister(node),
191 g.UseRegister(selector->input_at(node, 0)));
192}
193
194void VisitRRR(InstructionSelectorT* selector, InstructionCode opcode,
195 OpIndex node) {
196 Arm64OperandGeneratorT g(selector);
197 selector->Emit(opcode, g.DefineAsRegister(node),
198 g.UseRegister(selector->input_at(node, 0)),
199 g.UseRegister(selector->input_at(node, 1)));
200}
201
202#if V8_ENABLE_WEBASSEMBLY
203void VisitRR(InstructionSelectorT* selector, InstructionCode opcode,
204 OpIndex node) {
205 Arm64OperandGeneratorT g(selector);
206 selector->Emit(opcode, g.DefineAsRegister(node),
207 g.UseRegister(selector->input_at(node, 0)));
208}
209
210void VisitSimdShiftRRR(InstructionSelectorT* selector, ArchOpcode opcode,
211 OpIndex node, int width) {
212 Arm64OperandGeneratorT g(selector);
213 int64_t constant;
214 if (selector->MatchSignedIntegralConstant(selector->input_at(node, 1),
215 &constant)) {
216 if (constant % width == 0) {
217 selector->EmitIdentity(node);
218 } else {
219 selector->Emit(opcode, g.DefineAsRegister(node),
220 g.UseRegister(selector->input_at(node, 0)),
221 g.UseImmediate(selector->input_at(node, 1)));
222 }
223 } else {
224 selector->Emit(opcode, g.DefineAsRegister(node),
225 g.UseRegister(selector->input_at(node, 0)),
226 g.UseRegister(selector->input_at(node, 1)));
227 }
228}
229
230void VisitRRI(InstructionSelectorT* selector, InstructionCode opcode,
231 OpIndex node) {
232 Arm64OperandGeneratorT g(selector);
233 const Operation& op = selector->Get(node);
234 int imm = op.template Cast<Simd128ExtractLaneOp>().lane;
235 selector->Emit(opcode, g.DefineAsRegister(node), g.UseRegister(op.input(0)),
236 g.UseImmediate(imm));
237}
238
239void VisitRRIR(InstructionSelectorT* selector, InstructionCode opcode,
240 OpIndex node) {
241 const Simd128ReplaceLaneOp& op =
242 selector->Get(node).template Cast<Simd128ReplaceLaneOp>();
243 Arm64OperandGeneratorT g(selector);
244 selector->Emit(opcode, g.DefineAsRegister(node), g.UseRegister(op.input(0)),
245 g.UseImmediate(op.lane), g.UseUniqueRegister(op.input(1)));
246}
247#endif // V8_ENABLE_WEBASSEMBLY
248
249void VisitRRO(InstructionSelectorT* selector, ArchOpcode opcode, OpIndex node,
250 ImmediateMode operand_mode) {
251 Arm64OperandGeneratorT g(selector);
252 selector->Emit(opcode, g.DefineAsRegister(node),
253 g.UseRegister(selector->input_at(node, 0)),
254 g.UseOperand(selector->input_at(node, 1), operand_mode));
255}
256
257struct ExtendingLoadMatcher {
258 ExtendingLoadMatcher(OpIndex node, InstructionSelectorT* selector)
259 : matches_(false), selector_(selector), immediate_(0) {
260 Initialize(node);
261 }
262
263 bool Matches() const { return matches_; }
264
265 OpIndex base() const {
266 DCHECK(Matches());
267 return base_;
268 }
269 int64_t immediate() const {
270 DCHECK(Matches());
271 return immediate_;
272 }
273 ArchOpcode opcode() const {
274 DCHECK(Matches());
275 return opcode_;
276 }
277
278 private:
279 bool matches_;
280 InstructionSelectorT* selector_;
281 OpIndex base_{};
282 int64_t immediate_;
283 ArchOpcode opcode_;
284
285 void Initialize(OpIndex node) {
286 const ShiftOp& shift = selector_->Get(node).template Cast<ShiftOp>();
287 DCHECK(shift.kind == ShiftOp::Kind::kShiftRightArithmetic ||
288 shift.kind == ShiftOp::Kind::kShiftRightArithmeticShiftOutZeros);
289 // When loading a 64-bit value and shifting by 32, we should
290 // just load and sign-extend the interesting 4 bytes instead.
291 // This happens, for example, when we're loading and untagging SMIs.
292 const Operation& lhs = selector_->Get(shift.left());
293 int64_t constant_rhs;
294
295 if (lhs.Is<LoadOp>() &&
296 selector_->MatchIntegralWord64Constant(shift.right(), &constant_rhs) &&
297 constant_rhs == 32 && selector_->CanCover(node, shift.left())) {
298 Arm64OperandGeneratorT g(selector_);
299 const LoadOp& load = lhs.Cast<LoadOp>();
300 base_ = load.base();
301 opcode_ = kArm64Ldrsw;
302 if (load.index().has_value()) {
303 int64_t index_constant;
304 if (selector_->MatchIntegralWord64Constant(load.index().value(),
305 &index_constant)) {
306 DCHECK_EQ(load.element_size_log2, 0);
307 immediate_ = index_constant + 4;
308 matches_ = g.CanBeImmediate(immediate_, kLoadStoreImm32);
309 }
310 } else {
311 immediate_ = load.offset + 4;
312 matches_ = g.CanBeImmediate(immediate_, kLoadStoreImm32);
313 }
314 }
315 }
316};
317
318bool TryMatchExtendingLoad(InstructionSelectorT* selector, OpIndex node) {
319 ExtendingLoadMatcher m(node, selector);
320 return m.Matches();
321}
322
323bool TryEmitExtendingLoad(InstructionSelectorT* selector, OpIndex node) {
324 ExtendingLoadMatcher m(node, selector);
325 Arm64OperandGeneratorT g(selector);
326 if (m.Matches()) {
327 InstructionOperand inputs[2];
328 inputs[0] = g.UseRegister(m.base());
329 InstructionCode opcode =
330 m.opcode() | AddressingModeField::encode(kMode_MRI);
331 DCHECK(is_int32(m.immediate()));
332 inputs[1] = g.TempImmediate(static_cast<int32_t>(m.immediate()));
333 InstructionOperand outputs[] = {g.DefineAsRegister(node)};
334 selector->Emit(opcode, arraysize(outputs), outputs, arraysize(inputs),
335 inputs);
336 return true;
337 }
338 return false;
339}
340
341bool TryMatchAnyShift(InstructionSelectorT* selector, OpIndex node,
342 OpIndex input_node, InstructionCode* opcode, bool try_ror,
343 RegisterRepresentation rep) {
344 Arm64OperandGeneratorT g(selector);
345
346 if (!selector->CanCover(node, input_node)) return false;
347 if (const ShiftOp* shift = selector->Get(input_node).TryCast<ShiftOp>()) {
348 // Differently to Turbofan, the representation should always match.
349 DCHECK_EQ(shift->rep, rep);
350 if (shift->rep != rep) return false;
351 if (!g.IsIntegerConstant(shift->right())) return false;
352
353 switch (shift->kind) {
354 case ShiftOp::Kind::kShiftLeft:
355 *opcode |= AddressingModeField::encode(kMode_Operand2_R_LSL_I);
356 return true;
357 case ShiftOp::Kind::kShiftRightLogical:
358 *opcode |= AddressingModeField::encode(kMode_Operand2_R_LSR_I);
359 return true;
360 case ShiftOp::Kind::kShiftRightArithmetic:
361 case ShiftOp::Kind::kShiftRightArithmeticShiftOutZeros:
362 if (rep == WordRepresentation::Word64() &&
363 TryMatchExtendingLoad(selector, input_node)) {
364 return false;
365 }
366 *opcode |= AddressingModeField::encode(kMode_Operand2_R_ASR_I);
367 return true;
368 case ShiftOp::Kind::kRotateRight:
369 if (try_ror) {
370 *opcode |= AddressingModeField::encode(kMode_Operand2_R_ROR_I);
371 return true;
372 }
373 return false;
374 case ShiftOp::Kind::kRotateLeft:
375 return false;
376 }
377 }
378 return false;
379}
380
381bool TryMatchBitwiseAndSmallMask(OperationMatcher& matcher, OpIndex op,
382 OpIndex* left, int32_t* mask) {
383 if (const ChangeOp* change_op =
384 matcher.TryCast<Opmask::kChangeInt32ToInt64>(op)) {
385 return TryMatchBitwiseAndSmallMask(matcher, change_op->input(), left, mask);
386 }
387 if (const WordBinopOp* bitwise_and =
388 matcher.TryCast<Opmask::kWord32BitwiseAnd>(op)) {
389 if (matcher.MatchIntegralWord32Constant(bitwise_and->right(), mask) &&
390 (*mask == 0xFF || *mask == 0xFFFF)) {
391 *left = bitwise_and->left();
392 return true;
393 }
394 if (matcher.MatchIntegralWord32Constant(bitwise_and->left(), mask) &&
395 (*mask == 0xFF || *mask == 0xFFFF)) {
396 *left = bitwise_and->right();
397 return true;
398 }
399 }
400 return false;
401}
402
403bool TryMatchSignExtendShift(InstructionSelectorT* selector, OpIndex op,
404 OpIndex* left, int32_t* shift_by) {
405 if (const ChangeOp* change_op =
406 selector->TryCast<Opmask::kChangeInt32ToInt64>(op)) {
407 return TryMatchSignExtendShift(selector, change_op->input(), left,
408 shift_by);
409 }
410
411 if (const ShiftOp* sar =
412 selector->TryCast<Opmask::kWord32ShiftRightArithmetic>(op)) {
413 const Operation& sar_lhs = selector->Get(sar->left());
414 if (sar_lhs.Is<Opmask::kWord32ShiftLeft>() &&
415 selector->CanCover(op, sar->left())) {
416 const ShiftOp& shl = sar_lhs.Cast<ShiftOp>();
417 int32_t sar_by, shl_by;
418 if (selector->MatchIntegralWord32Constant(sar->right(), &sar_by) &&
419 selector->MatchIntegralWord32Constant(shl.right(), &shl_by) &&
420 sar_by == shl_by && (sar_by == 16 || sar_by == 24)) {
421 *left = shl.left();
422 *shift_by = sar_by;
423 return true;
424 }
425 }
426 }
427 return false;
428}
429
430bool TryMatchAnyExtend(Arm64OperandGeneratorT* g,
431 InstructionSelectorT* selector, OpIndex node,
432 OpIndex left_node, OpIndex right_node,
433 InstructionOperand* left_op,
434 InstructionOperand* right_op, InstructionCode* opcode) {
435 if (!selector->CanCover(node, right_node)) return false;
436
437 const Operation& right = selector->Get(right_node);
438 OpIndex bitwise_and_left;
440 if (TryMatchBitwiseAndSmallMask(*selector, right_node, &bitwise_and_left,
441 &mask)) {
442 *left_op = g->UseRegister(left_node);
443 *right_op = g->UseRegister(bitwise_and_left);
445 (mask == 0xFF) ? kMode_Operand2_R_UXTB : kMode_Operand2_R_UXTH);
446 return true;
447 }
448
449 OpIndex shift_input_left;
450 int32_t shift_by;
451 if (TryMatchSignExtendShift(selector, right_node, &shift_input_left,
452 &shift_by)) {
453 *left_op = g->UseRegister(left_node);
454 *right_op = g->UseRegister(shift_input_left);
456 (shift_by == 24) ? kMode_Operand2_R_SXTB : kMode_Operand2_R_SXTH);
457 return true;
458 }
459
460 if (const ChangeOp* change_op =
461 right.TryCast<Opmask::kChangeInt32ToInt64>()) {
462 // Use extended register form.
463 *opcode |= AddressingModeField::encode(kMode_Operand2_R_SXTW);
464 *left_op = g->UseRegister(left_node);
465 *right_op = g->UseRegister(change_op->input());
466 return true;
467 }
468 return false;
469}
470
471bool TryMatchLoadStoreShift(Arm64OperandGeneratorT* g,
472 InstructionSelectorT* selector,
474 OpIndex index, InstructionOperand* index_op,
475 InstructionOperand* shift_immediate_op) {
476 if (!selector->CanCover(node, index)) return false;
477 if (const ChangeOp* change =
478 selector->Get(index).TryCast<Opmask::kChangeUint32ToUint64>();
479 change && selector->CanCover(index, change->input())) {
480 index = change->input();
481 }
482 const ShiftOp* shift = selector->Get(index).TryCast<Opmask::kShiftLeft>();
483 if (shift == nullptr) return false;
484 if (!g->CanBeLoadStoreShiftImmediate(shift->right(), rep)) return false;
485 *index_op = g->UseRegister(shift->left());
486 *shift_immediate_op = g->UseImmediate(shift->right());
487 return true;
488}
489
490// Bitfields describing binary operator properties:
491// CanCommuteField is true if we can switch the two operands, potentially
492// requiring commuting the flags continuation condition.
493using CanCommuteField = base::BitField8<bool, 1, 1>;
494// MustCommuteCondField is true when we need to commute the flags continuation
495// condition in order to switch the operands.
496using MustCommuteCondField = base::BitField8<bool, 2, 1>;
497// IsComparisonField is true when the operation is a comparison and has no other
498// result other than the condition.
499using IsComparisonField = base::BitField8<bool, 3, 1>;
500// IsAddSubField is true when an instruction is encoded as ADD or SUB.
501using IsAddSubField = base::BitField8<bool, 4, 1>;
502
503// Get properties of a binary operator.
504uint8_t GetBinopProperties(InstructionCode opcode) {
505 uint8_t result = 0;
506 switch (opcode) {
507 case kArm64Cmp32:
508 case kArm64Cmp:
509 // We can commute CMP by switching the inputs and commuting
510 // the flags continuation.
511 result = CanCommuteField::update(result, true);
512 result = MustCommuteCondField::update(result, true);
513 result = IsComparisonField::update(result, true);
514 // The CMP and CMN instructions are encoded as SUB or ADD
515 // with zero output register, and therefore support the same
516 // operand modes.
517 result = IsAddSubField::update(result, true);
518 break;
519 case kArm64Cmn32:
520 case kArm64Cmn:
521 result = CanCommuteField::update(result, true);
522 result = IsComparisonField::update(result, true);
523 result = IsAddSubField::update(result, true);
524 break;
525 case kArm64Add32:
526 case kArm64Add:
527 result = CanCommuteField::update(result, true);
528 result = IsAddSubField::update(result, true);
529 break;
530 case kArm64Sub32:
531 case kArm64Sub:
532 result = IsAddSubField::update(result, true);
533 break;
534 case kArm64Tst32:
535 case kArm64Tst:
536 result = CanCommuteField::update(result, true);
537 result = IsComparisonField::update(result, true);
538 break;
539 case kArm64And32:
540 case kArm64And:
541 case kArm64Or32:
542 case kArm64Or:
543 case kArm64Eor32:
544 case kArm64Eor:
545 result = CanCommuteField::update(result, true);
546 break;
547 default:
548 UNREACHABLE();
549 }
550 DCHECK_IMPLIES(MustCommuteCondField::decode(result),
551 CanCommuteField::decode(result));
552 return result;
553}
554
555// Shared routine for multiple binary operations.
556template <typename Matcher>
557void VisitBinop(InstructionSelectorT* selector, OpIndex node, ArchOpcode opcode,
558 ImmediateMode operand_mode) {
559 FlagsContinuationT cont;
560 VisitBinop<Matcher>(selector, node, opcode, operand_mode, &cont);
561}
562
563void VisitBinopImpl(InstructionSelectorT* selector, OpIndex binop_idx,
564 OpIndex left_node, OpIndex right_node,
565 RegisterRepresentation rep, InstructionCode opcode,
566 ImmediateMode operand_mode, FlagsContinuationT* cont) {
567 DCHECK(!cont->IsConditionalSet() && !cont->IsConditionalBranch());
568 Arm64OperandGeneratorT g(selector);
569 constexpr uint32_t kMaxFlagSetInputs = 3;
570 constexpr uint32_t kMaxSelectInputs = 2;
571 constexpr uint32_t kMaxInputs = kMaxFlagSetInputs + kMaxSelectInputs;
572 InstructionOperand inputs[kMaxInputs];
573 size_t input_count = 0;
574 InstructionOperand outputs[1];
575 size_t output_count = 0;
576
577 uint8_t properties = GetBinopProperties(opcode);
578 bool can_commute = CanCommuteField::decode(properties);
579 bool must_commute_cond = MustCommuteCondField::decode(properties);
580 bool is_add_sub = IsAddSubField::decode(properties);
581
582 if (g.CanBeImmediate(right_node, operand_mode)) {
583 inputs[input_count++] = g.UseRegister(left_node);
584 inputs[input_count++] = g.UseImmediate(right_node);
585 } else if (can_commute && g.CanBeImmediate(left_node, operand_mode)) {
586 if (must_commute_cond) cont->Commute();
587 inputs[input_count++] = g.UseRegister(right_node);
588 inputs[input_count++] = g.UseImmediate(left_node);
589 } else if (is_add_sub &&
590 TryMatchAnyExtend(&g, selector, binop_idx, left_node, right_node,
591 &inputs[0], &inputs[1], &opcode)) {
592 input_count += 2;
593 } else if (is_add_sub && can_commute &&
594 TryMatchAnyExtend(&g, selector, binop_idx, right_node, left_node,
595 &inputs[0], &inputs[1], &opcode)) {
596 if (must_commute_cond) cont->Commute();
597 input_count += 2;
598 } else if (TryMatchAnyShift(selector, binop_idx, right_node, &opcode,
599 !is_add_sub, rep)) {
600 const ShiftOp& shift = selector->Get(right_node).Cast<ShiftOp>();
601 inputs[input_count++] = g.UseRegisterOrImmediateZero(left_node);
602 inputs[input_count++] = g.UseRegister(shift.left());
603 // We only need at most the last 6 bits of the shift.
604 int64_t constant;
605 selector->MatchSignedIntegralConstant(shift.right(), &constant);
606 inputs[input_count++] = g.UseImmediate(static_cast<int>(constant & 0x3F));
607 } else if (can_commute && TryMatchAnyShift(selector, binop_idx, left_node,
608 &opcode, !is_add_sub, rep)) {
609 if (must_commute_cond) cont->Commute();
610 const ShiftOp& shift = selector->Get(left_node).Cast<ShiftOp>();
611 inputs[input_count++] = g.UseRegisterOrImmediateZero(right_node);
612 inputs[input_count++] = g.UseRegister(shift.left());
613 // We only need at most the last 6 bits of the shift.
614 int64_t constant;
615 selector->MatchSignedIntegralConstant(shift.right(), &constant);
616 inputs[input_count++] = g.UseImmediate(static_cast<int>(constant & 0x3F));
617 } else {
618 inputs[input_count++] = g.UseRegisterOrImmediateZero(left_node);
619 inputs[input_count++] = g.UseRegister(right_node);
620 }
621
622 if (!IsComparisonField::decode(properties)) {
623 outputs[output_count++] = g.DefineAsRegister(binop_idx);
624 }
625
626 if (cont->IsSelect()) {
627 // Keep the values live until the end so that we can use operations that
628 // write registers to generate the condition, without accidently
629 // overwriting the inputs.
630 inputs[input_count++] = g.UseRegisterAtEnd(cont->true_value());
631 inputs[input_count++] = g.UseRegisterAtEnd(cont->false_value());
632 }
633 DCHECK_NE(0u, input_count);
634 DCHECK((output_count != 0) || IsComparisonField::decode(properties));
635 DCHECK_GE(arraysize(inputs), input_count);
636 DCHECK_GE(arraysize(outputs), output_count);
637
638 selector->EmitWithContinuation(opcode, output_count, outputs, input_count,
639 inputs, cont);
640}
641
642// Shared routine for multiple binary operations.
643void VisitBinop(InstructionSelectorT* selector, OpIndex binop_idx,
644 RegisterRepresentation rep, InstructionCode opcode,
645 ImmediateMode operand_mode, FlagsContinuationT* cont) {
646 const Operation& binop = selector->Get(binop_idx);
647 OpIndex left_node = binop.input(0);
648 OpIndex right_node = binop.input(1);
649 return VisitBinopImpl(selector, binop_idx, left_node, right_node, rep, opcode,
650 operand_mode, cont);
651}
652
653void VisitBinop(InstructionSelectorT* selector, OpIndex node,
654 RegisterRepresentation rep, ArchOpcode opcode,
655 ImmediateMode operand_mode) {
656 FlagsContinuationT cont;
657 VisitBinop(selector, node, rep, opcode, operand_mode, &cont);
658}
659
660std::tuple<OpIndex, OpIndex> GetBinopLeftRightCstOnTheRight(
661 InstructionSelectorT* selector, const WordBinopOp& binop) {
662 OpIndex left = binop.left();
663 OpIndex right = binop.right();
664 if (!selector->Is<ConstantOp>(right) &&
665 WordBinopOp::IsCommutative(binop.kind) &&
666 selector->Is<ConstantOp>(left)) {
667 std::swap(left, right);
668 }
669 return {left, right};
670}
671
672void VisitAddSub(InstructionSelectorT* selector, OpIndex node,
673 ArchOpcode opcode, ArchOpcode negate_opcode) {
674 Arm64OperandGeneratorT g(selector);
675 const WordBinopOp& add_sub = selector->Get(node).Cast<WordBinopOp>();
676 auto [left, right] = GetBinopLeftRightCstOnTheRight(selector, add_sub);
677
678 if (std::optional<int64_t> constant_rhs =
679 g.GetOptionalIntegerConstant(right)) {
680 if (constant_rhs < 0 && constant_rhs > std::numeric_limits<int>::min() &&
681 g.CanBeImmediate(-*constant_rhs, kArithmeticImm)) {
682 selector->Emit(negate_opcode, g.DefineAsRegister(node),
683 g.UseRegister(left),
684 g.TempImmediate(static_cast<int32_t>(-*constant_rhs)));
685 return;
686 }
687 }
688 VisitBinop(selector, node, add_sub.rep, opcode, kArithmeticImm);
689}
690
691// For multiplications by immediate of the form x * (2^k + 1), where k > 0,
692// return the value of k, otherwise return zero. This is used to reduce the
693// multiplication to addition with left shift: x + (x << k).
694template <typename Matcher>
695int32_t LeftShiftForReducedMultiply(Matcher* m) {
696 DCHECK(m->IsInt32Mul() || m->IsInt64Mul());
697 if (m->right().HasResolvedValue() && m->right().ResolvedValue() >= 3) {
698 uint64_t value_minus_one = m->right().ResolvedValue() - 1;
699 if (base::bits::IsPowerOfTwo(value_minus_one)) {
700 return base::bits::WhichPowerOfTwo(value_minus_one);
701 }
702 }
703 return 0;
704}
705
706// For multiplications by immediate of the form x * (2^k + 1), where k > 0,
707// return the value of k, otherwise return zero. This is used to reduce the
708// multiplication to addition with left shift: x + (x << k).
709int32_t LeftShiftForReducedMultiply(InstructionSelectorT* selector,
710 OpIndex rhs) {
711 Arm64OperandGeneratorT g(selector);
712 if (auto constant = g.GetOptionalIntegerConstant(rhs)) {
713 int64_t value_minus_one = constant.value() - 1;
714 if (base::bits::IsPowerOfTwo(value_minus_one)) {
715 return base::bits::WhichPowerOfTwo(value_minus_one);
716 }
717 }
718 return 0;
719}
720
721// Try to match Add(Mul(x, y), z) and emit Madd(x, y, z) for it.
722template <typename MultiplyOpmaskT>
723bool TryEmitMultiplyAdd(InstructionSelectorT* selector, OpIndex add,
724 OpIndex lhs, OpIndex rhs, InstructionCode madd_opcode) {
725 const Operation& add_lhs = selector->Get(lhs);
726 if (!add_lhs.Is<MultiplyOpmaskT>() || !selector->CanCover(add, lhs)) {
727 return false;
728 }
729 // Check that multiply can't be reduced to an addition with shift later on.
730 const WordBinopOp& mul = add_lhs.Cast<WordBinopOp>();
731 if (LeftShiftForReducedMultiply(selector, mul.right()) != 0) return false;
732
733 Arm64OperandGeneratorT g(selector);
734 selector->Emit(madd_opcode, g.DefineAsRegister(add),
735 g.UseRegister(mul.left()), g.UseRegister(mul.right()),
736 g.UseRegister(rhs));
737 return true;
738}
739
740bool TryEmitMultiplyAddInt32(InstructionSelectorT* selector, OpIndex add,
741 OpIndex lhs, OpIndex rhs) {
742 return TryEmitMultiplyAdd<Opmask::kWord32Mul>(selector, add, lhs, rhs,
743 kArm64Madd32);
744}
745
746bool TryEmitMultiplyAddInt64(InstructionSelectorT* selector, OpIndex add,
747 OpIndex lhs, OpIndex rhs) {
748 return TryEmitMultiplyAdd<Opmask::kWord64Mul>(selector, add, lhs, rhs,
749 kArm64Madd);
750}
751
752// Try to match Mul(Sub(0, x), y) and emit Mneg(x, y) for it.
753template <typename SubtractOpmaskT>
754bool TryEmitMultiplyNegate(InstructionSelectorT* selector, OpIndex mul,
755 OpIndex lhs, OpIndex rhs,
756 InstructionCode mneg_opcode) {
757 const Operation& mul_lhs = selector->Get(lhs);
758 if (!mul_lhs.Is<SubtractOpmaskT>() || !selector->CanCover(mul, lhs)) {
759 return false;
760 }
761 const WordBinopOp& sub = mul_lhs.Cast<WordBinopOp>();
762 Arm64OperandGeneratorT g(selector);
763 std::optional<int64_t> sub_lhs_constant =
764 g.GetOptionalIntegerConstant(sub.left());
765 if (!sub_lhs_constant.has_value() || sub_lhs_constant != 0) return false;
766 selector->Emit(mneg_opcode, g.DefineAsRegister(mul),
767 g.UseRegister(sub.right()), g.UseRegister(rhs));
768 return true;
769}
770
771bool TryEmitMultiplyNegateInt32(InstructionSelectorT* selector, OpIndex mul,
772 OpIndex lhs, OpIndex rhs) {
773 return TryEmitMultiplyNegate<Opmask::kWord32Sub>(selector, mul, lhs, rhs,
774 kArm64Mneg32);
775}
776
777bool TryEmitMultiplyNegateInt64(InstructionSelectorT* selector, OpIndex mul,
778 OpIndex lhs, OpIndex rhs) {
779 return TryEmitMultiplyNegate<Opmask::kWord64Sub>(selector, mul, lhs, rhs,
780 kArm64Mneg);
781}
782
783// Try to match Sub(a, Mul(x, y)) and emit Msub(x, y, a) for it.
784template <typename MultiplyOpmaskT>
785bool TryEmitMultiplySub(InstructionSelectorT* selector, OpIndex node,
786 InstructionCode msub_opbocde) {
787 const WordBinopOp& sub = selector->Get(node).Cast<WordBinopOp>();
788 DCHECK_EQ(sub.kind, WordBinopOp::Kind::kSub);
789
790 // Select Msub(x, y, a) for Sub(a, Mul(x, y)).
791 const Operation& sub_rhs = selector->Get(sub.right());
792 if (sub_rhs.Is<MultiplyOpmaskT>() && selector->CanCover(node, sub.right())) {
793 const WordBinopOp& mul = sub_rhs.Cast<WordBinopOp>();
794 if (LeftShiftForReducedMultiply(selector, mul.right()) == 0) {
795 Arm64OperandGeneratorT g(selector);
796 selector->Emit(msub_opbocde, g.DefineAsRegister(node),
797 g.UseRegister(mul.left()), g.UseRegister(mul.right()),
798 g.UseRegister(sub.left()));
799 return true;
800 }
801 }
802 return false;
803}
804
805std::tuple<InstructionCode, ImmediateMode> GetStoreOpcodeAndImmediate(
806 MemoryRepresentation stored_rep, bool paired) {
807 switch (stored_rep) {
809 case MemoryRepresentation::Uint8():
810 CHECK(!paired);
811 return {kArm64Strb, kLoadStoreImm8};
813 case MemoryRepresentation::Uint16():
814 CHECK(!paired);
815 return {kArm64Strh, kLoadStoreImm16};
817 case MemoryRepresentation::Uint32():
818 return {paired ? kArm64StrWPair : kArm64StrW, kLoadStoreImm32};
820 case MemoryRepresentation::Uint64():
821 return {paired ? kArm64StrPair : kArm64Str, kLoadStoreImm64};
823 CHECK(!paired);
824 return {kArm64StrH, kLoadStoreImm16};
826 CHECK(!paired);
827 return {kArm64StrS, kLoadStoreImm32};
829 CHECK(!paired);
830 return {kArm64StrD, kLoadStoreImm64};
832 case MemoryRepresentation::TaggedPointer():
833 case MemoryRepresentation::TaggedSigned():
834 if (paired) {
835 // There is an inconsistency here on how we treat stores vs. paired
836 // stores. In the normal store case we have special opcodes for
837 // compressed fields and the backend decides whether to write 32 or 64
838 // bits. However, for pairs this does not make sense, since the
839 // paired values could have different representations (e.g.,
840 // compressed paired with word32). Therefore, we decide on the actual
841 // machine representation already in instruction selection.
842#ifdef V8_COMPRESS_POINTERS
843 static_assert(ElementSizeLog2Of(MachineRepresentation::kTagged) == 2);
844 return {kArm64StrWPair, kLoadStoreImm32};
845#else
846 static_assert(ElementSizeLog2Of(MachineRepresentation::kTagged) == 3);
847 return {kArm64StrPair, kLoadStoreImm64};
848#endif
849 }
850 return {kArm64StrCompressTagged,
852 case MemoryRepresentation::AnyUncompressedTagged():
853 case MemoryRepresentation::UncompressedTaggedPointer():
854 case MemoryRepresentation::UncompressedTaggedSigned():
855 CHECK(!paired);
856 return {kArm64Str, kLoadStoreImm64};
857 case MemoryRepresentation::ProtectedPointer():
858 // We never store directly to protected pointers from generated code.
859 UNREACHABLE();
860 case MemoryRepresentation::IndirectPointer():
861 return {kArm64StrIndirectPointer, kLoadStoreImm32};
862 case MemoryRepresentation::SandboxedPointer():
863 CHECK(!paired);
864 return {kArm64StrEncodeSandboxedPointer, kLoadStoreImm64};
865 case MemoryRepresentation::Simd128():
866 CHECK(!paired);
867 return {kArm64StrQ, kNoImmediate};
868 case MemoryRepresentation::Simd256():
869 UNREACHABLE();
870 }
871}
872
873} // namespace
874
875void InstructionSelectorT::VisitTraceInstruction(OpIndex node) {}
876
877void InstructionSelectorT::VisitStackSlot(OpIndex node) {
878 const StackSlotOp& stack_slot = Cast<StackSlotOp>(node);
879 int slot = frame_->AllocateSpillSlot(stack_slot.size, stack_slot.alignment,
880 stack_slot.is_tagged);
881 OperandGenerator g(this);
882
883 Emit(kArchStackSlot, g.DefineAsRegister(node),
884 sequence()->AddImmediate(Constant(slot)), 0, nullptr);
885}
886
887void InstructionSelectorT::VisitAbortCSADcheck(OpIndex node) {
888 Arm64OperandGeneratorT g(this);
889 Emit(kArchAbortCSADcheck, g.NoOutput(),
890 g.UseFixed(this->input_at(node, 0), x1));
891}
892
894 InstructionCode opcode, ImmediateMode immediate_mode,
895 MachineRepresentation rep, OptionalOpIndex output = {}) {
896 Arm64OperandGeneratorT g(selector);
897 const LoadOp& load = selector->Get(node).Cast<LoadOp>();
898
899 // The LoadStoreSimplificationReducer transforms all loads into
900 // *(base + index).
901 OpIndex base = load.base();
902 OpIndex index = load.index().value();
903 DCHECK_EQ(load.offset, 0);
904 DCHECK_EQ(load.element_size_log2, 0);
905
906 InstructionOperand inputs[3];
907 size_t input_count = 0;
908 InstructionOperand output_op;
909
910 // If output is valid, use that as the output register. This is used when we
911 // merge a conversion into the load.
912 output_op = g.DefineAsRegister(output.valid() ? output.value() : node);
913
914 const Operation& base_op = selector->Get(base);
915 int64_t index_constant;
916 const bool is_index_constant =
917 selector->MatchSignedIntegralConstant(index, &index_constant);
918 if (base_op.Is<Opmask::kExternalConstant>() && is_index_constant) {
919 const ConstantOp& constant_base = base_op.Cast<ConstantOp>();
921 constant_base.external_reference())) {
922 ptrdiff_t const delta =
923 index_constant +
924 MacroAssemblerBase::RootRegisterOffsetForExternalReference(
925 selector->isolate(), constant_base.external_reference());
926 input_count = 1;
927 // Check that the delta is a 32-bit integer due to the limitations of
928 // immediate operands.
929 if (is_int32(delta)) {
930 inputs[0] = g.UseImmediate(static_cast<int32_t>(delta));
931 opcode |= AddressingModeField::encode(kMode_Root);
932 selector->Emit(opcode, 1, &output_op, input_count, inputs);
933 return;
934 }
935 }
936 }
937
938 if (base_op.Is<LoadRootRegisterOp>()) {
939 DCHECK(is_index_constant);
940 input_count = 1;
941 inputs[0] = g.UseImmediate64(index_constant);
942 opcode |= AddressingModeField::encode(kMode_Root);
943 selector->Emit(opcode, 1, &output_op, input_count, inputs);
944 return;
945 }
946
947 inputs[0] = g.UseRegister(base);
948
949 if (is_index_constant) {
950 if (g.CanBeImmediate(index_constant, immediate_mode)) {
951 input_count = 2;
952 inputs[1] = g.UseImmediate64(index_constant);
953 opcode |= AddressingModeField::encode(kMode_MRI);
954 } else {
955 input_count = 2;
956 inputs[1] = g.UseRegister(index);
957 opcode |= AddressingModeField::encode(kMode_MRR);
958 }
959 } else {
960 if (TryMatchLoadStoreShift(&g, selector, rep, node, index, &inputs[1],
961 &inputs[2])) {
962 input_count = 3;
963 opcode |= AddressingModeField::encode(kMode_Operand2_R_LSL_I);
964 } else {
965 input_count = 2;
966 inputs[1] = g.UseRegister(index);
967 opcode |= AddressingModeField::encode(kMode_MRR);
968 }
969 }
970 selector->Emit(opcode, 1, &output_op, input_count, inputs);
971}
972
973#if V8_ENABLE_WEBASSEMBLY
974namespace {
975// Manually add base and index into a register to get the actual address.
976// This should be used prior to instructions that only support
977// immediate/post-index addressing, like ld1 and st1.
978InstructionOperand EmitAddBeforeLoadOrStore(InstructionSelectorT* selector,
979 OpIndex node,
980 InstructionCode* opcode) {
981 Arm64OperandGeneratorT g(selector);
982 *opcode |= AddressingModeField::encode(kMode_MRI);
983 OpIndex input0 = selector->input_at(node, 0);
984 OpIndex input1 = selector->input_at(node, 1);
985 InstructionOperand addr = g.TempRegister();
986 auto rhs = g.CanBeImmediate(input1, kArithmeticImm) ? g.UseImmediate(input1)
987 : g.UseRegister(input1);
988 selector->Emit(kArm64Add, addr, g.UseRegister(input0), rhs);
989 return addr;
990}
991} // namespace
992
993void InstructionSelectorT::VisitLoadLane(OpIndex node) {
994 const Simd128LaneMemoryOp& load = this->Get(node).Cast<Simd128LaneMemoryOp>();
995 InstructionCode opcode = kArm64LoadLane;
996 opcode |= LaneSizeField::encode(load.lane_size() * kBitsPerByte);
997 if (load.kind.with_trap_handler) {
998 opcode |= AccessModeField::encode(kMemoryAccessProtectedMemOutOfBounds);
999 }
1000
1001 Arm64OperandGeneratorT g(this);
1002 InstructionOperand addr = EmitAddBeforeLoadOrStore(this, node, &opcode);
1003 Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(input_at(node, 2)),
1004 g.UseImmediate(load.lane), addr, g.TempImmediate(0));
1005}
1006
1007void InstructionSelectorT::VisitStoreLane(OpIndex node) {
1008 const Simd128LaneMemoryOp& store = Get(node).Cast<Simd128LaneMemoryOp>();
1009 InstructionCode opcode = kArm64StoreLane;
1010 opcode |= LaneSizeField::encode(store.lane_size() * kBitsPerByte);
1011 if (store.kind.with_trap_handler) {
1012 opcode |= AccessModeField::encode(kMemoryAccessProtectedMemOutOfBounds);
1013 }
1014
1015 Arm64OperandGeneratorT g(this);
1016 InstructionOperand addr = EmitAddBeforeLoadOrStore(this, node, &opcode);
1017 InstructionOperand inputs[4] = {
1018 g.UseRegister(input_at(node, 2)),
1019 g.UseImmediate(store.lane),
1020 addr,
1021 g.TempImmediate(0),
1022 };
1023
1024 Emit(opcode, 0, nullptr, 4, inputs);
1025}
1026
1027void InstructionSelectorT::VisitLoadTransform(OpIndex node) {
1028 const Simd128LoadTransformOp& op =
1029 this->Get(node).Cast<Simd128LoadTransformOp>();
1030 InstructionCode opcode = kArchNop;
1031 bool require_add = false;
1032 switch (op.transform_kind) {
1033 case Simd128LoadTransformOp::TransformKind::k8Splat:
1034 opcode = kArm64LoadSplat;
1035 opcode |= LaneSizeField::encode(8);
1036 require_add = true;
1037 break;
1038 case Simd128LoadTransformOp::TransformKind::k16Splat:
1039 opcode = kArm64LoadSplat;
1040 opcode |= LaneSizeField::encode(16);
1041 require_add = true;
1042 break;
1043 case Simd128LoadTransformOp::TransformKind::k32Splat:
1044 opcode = kArm64LoadSplat;
1045 opcode |= LaneSizeField::encode(32);
1046 require_add = true;
1047 break;
1048 case Simd128LoadTransformOp::TransformKind::k64Splat:
1049 opcode = kArm64LoadSplat;
1050 opcode |= LaneSizeField::encode(64);
1051 require_add = true;
1052 break;
1053 case Simd128LoadTransformOp::TransformKind::k8x8S:
1054 opcode = kArm64S128Load8x8S;
1055 break;
1056 case Simd128LoadTransformOp::TransformKind::k8x8U:
1057 opcode = kArm64S128Load8x8U;
1058 break;
1059 case Simd128LoadTransformOp::TransformKind::k16x4S:
1060 opcode = kArm64S128Load16x4S;
1061 break;
1062 case Simd128LoadTransformOp::TransformKind::k16x4U:
1063 opcode = kArm64S128Load16x4U;
1064 break;
1065 case Simd128LoadTransformOp::TransformKind::k32x2S:
1066 opcode = kArm64S128Load32x2S;
1067 break;
1068 case Simd128LoadTransformOp::TransformKind::k32x2U:
1069 opcode = kArm64S128Load32x2U;
1070 break;
1071 case Simd128LoadTransformOp::TransformKind::k32Zero:
1072 opcode = kArm64LdrS;
1073 break;
1074 case Simd128LoadTransformOp::TransformKind::k64Zero:
1075 opcode = kArm64LdrD;
1076 break;
1077 default:
1078 UNIMPLEMENTED();
1079 }
1080 // ARM64 supports unaligned loads
1081 DCHECK(!op.load_kind.maybe_unaligned);
1082
1083 Arm64OperandGeneratorT g(this);
1084 OpIndex base = input_at(node, 0);
1085 OpIndex index = input_at(node, 1);
1086 InstructionOperand inputs[2];
1087 InstructionOperand outputs[1];
1088
1089 inputs[0] = g.UseRegister(base);
1090 inputs[1] = g.UseRegister(index);
1091 outputs[0] = g.DefineAsRegister(node);
1092
1093 if (require_add) {
1094 // ld1r uses post-index, so construct address first.
1095 // TODO(v8:9886) If index can be immediate, use vldr without this add.
1096 inputs[0] = EmitAddBeforeLoadOrStore(this, node, &opcode);
1097 inputs[1] = g.TempImmediate(0);
1098 opcode |= AddressingModeField::encode(kMode_MRI);
1099 } else {
1100 opcode |= AddressingModeField::encode(kMode_MRR);
1101 }
1102 if (op.load_kind.with_trap_handler) {
1103 opcode |= AccessModeField::encode(kMemoryAccessProtectedMemOutOfBounds);
1104 }
1105 Emit(opcode, 1, outputs, 2, inputs);
1106}
1107
1108#endif // V8_ENABLE_WEBASSEMBLY
1109
1110std::tuple<InstructionCode, ImmediateMode> GetLoadOpcodeAndImmediate(
1111 MemoryRepresentation loaded_rep, RegisterRepresentation result_rep) {
1112 // NOTE: The meaning of `loaded_rep` = `MemoryRepresentation::AnyTagged()` is
1113 // we are loading a compressed tagged field, while `result_rep` =
1114 // `RegisterRepresentation::Tagged()` refers to an uncompressed tagged value.
1115 switch (loaded_rep) {
1116 case MemoryRepresentation::Int8():
1117 DCHECK_EQ(result_rep, RegisterRepresentation::Word32());
1118 return {kArm64LdrsbW, kLoadStoreImm8};
1119 case MemoryRepresentation::Uint8():
1120 DCHECK_EQ(result_rep, RegisterRepresentation::Word32());
1121 return {kArm64Ldrb, kLoadStoreImm8};
1122 case MemoryRepresentation::Int16():
1123 DCHECK_EQ(result_rep, RegisterRepresentation::Word32());
1124 return {kArm64LdrshW, kLoadStoreImm16};
1125 case MemoryRepresentation::Uint16():
1126 DCHECK_EQ(result_rep, RegisterRepresentation::Word32());
1127 return {kArm64Ldrh, kLoadStoreImm16};
1128 case MemoryRepresentation::Int32():
1129 case MemoryRepresentation::Uint32():
1130 DCHECK_EQ(result_rep, RegisterRepresentation::Word32());
1131 return {kArm64LdrW, kLoadStoreImm32};
1132 case MemoryRepresentation::Int64():
1133 case MemoryRepresentation::Uint64():
1134 DCHECK_EQ(result_rep, RegisterRepresentation::Word64());
1135 return {kArm64Ldr, kLoadStoreImm64};
1136 case MemoryRepresentation::Float16():
1137 DCHECK_EQ(result_rep, RegisterRepresentation::Float32());
1138 return {kArm64LdrH, kLoadStoreImm16};
1139 case MemoryRepresentation::Float32():
1140 DCHECK_EQ(result_rep, RegisterRepresentation::Float32());
1141 return {kArm64LdrS, kLoadStoreImm32};
1142 case MemoryRepresentation::Float64():
1143 DCHECK_EQ(result_rep, RegisterRepresentation::Float64());
1144 return {kArm64LdrD, kLoadStoreImm64};
1145#ifdef V8_COMPRESS_POINTERS
1146 case MemoryRepresentation::AnyTagged():
1147 case MemoryRepresentation::TaggedPointer():
1148 if (result_rep == RegisterRepresentation::Compressed()) {
1149 return {kArm64LdrW, kLoadStoreImm32};
1150 }
1151 DCHECK_EQ(result_rep, RegisterRepresentation::Tagged());
1152 return {kArm64LdrDecompressTagged, kLoadStoreImm32};
1153 case MemoryRepresentation::TaggedSigned():
1154 if (result_rep == RegisterRepresentation::Compressed()) {
1155 return {kArm64LdrW, kLoadStoreImm32};
1156 }
1157 DCHECK_EQ(result_rep, RegisterRepresentation::Tagged());
1158 return {kArm64LdrDecompressTaggedSigned, kLoadStoreImm32};
1159#else
1160 case MemoryRepresentation::AnyTagged():
1161 case MemoryRepresentation::TaggedPointer():
1162 case MemoryRepresentation::TaggedSigned():
1163 return {kArm64Ldr, kLoadStoreImm64};
1164#endif
1165 case MemoryRepresentation::AnyUncompressedTagged():
1166 case MemoryRepresentation::UncompressedTaggedPointer():
1167 case MemoryRepresentation::UncompressedTaggedSigned():
1168 DCHECK_EQ(result_rep, RegisterRepresentation::Tagged());
1169 return {kArm64Ldr, kLoadStoreImm64};
1170 case MemoryRepresentation::ProtectedPointer():
1172 return {kArm64LdrDecompressProtected, kNoImmediate};
1173 case MemoryRepresentation::IndirectPointer():
1174 UNREACHABLE();
1175 case MemoryRepresentation::SandboxedPointer():
1176 return {kArm64LdrDecodeSandboxedPointer, kLoadStoreImm64};
1177 case MemoryRepresentation::Simd128():
1178 return {kArm64LdrQ, kNoImmediate};
1179 case MemoryRepresentation::Simd256():
1180 UNREACHABLE();
1181 }
1182}
1183
1184std::tuple<InstructionCode, ImmediateMode> GetLoadOpcodeAndImmediate(
1185 LoadRepresentation load_rep) {
1186 switch (load_rep.representation()) {
1187 case MachineRepresentation::kFloat16:
1188 return {kArm64LdrH, kLoadStoreImm16};
1189 case MachineRepresentation::kFloat32:
1190 return {kArm64LdrS, kLoadStoreImm32};
1191 case MachineRepresentation::kFloat64:
1192 return {kArm64LdrD, kLoadStoreImm64};
1193 case MachineRepresentation::kBit: // Fall through.
1194 case MachineRepresentation::kWord8:
1195 return {load_rep.IsUnsigned() ? kArm64Ldrb
1196 : load_rep.semantic() == MachineSemantic::kInt32 ? kArm64LdrsbW
1197 : kArm64Ldrsb,
1199 case MachineRepresentation::kWord16:
1200 return {load_rep.IsUnsigned() ? kArm64Ldrh
1201 : load_rep.semantic() == MachineSemantic::kInt32 ? kArm64LdrshW
1202 : kArm64Ldrsh,
1204 case MachineRepresentation::kWord32:
1205 return {kArm64LdrW, kLoadStoreImm32};
1206 case MachineRepresentation::kCompressedPointer: // Fall through.
1207 case MachineRepresentation::kCompressed:
1208#ifdef V8_COMPRESS_POINTERS
1209 return {kArm64LdrW, kLoadStoreImm32};
1210#else
1211 UNREACHABLE();
1212#endif
1213#ifdef V8_COMPRESS_POINTERS
1214 case MachineRepresentation::kTaggedSigned:
1215 return {kArm64LdrDecompressTaggedSigned, kLoadStoreImm32};
1216 case MachineRepresentation::kTaggedPointer:
1217 case MachineRepresentation::kTagged:
1218 return {kArm64LdrDecompressTagged, kLoadStoreImm32};
1219#else
1220 case MachineRepresentation::kTaggedSigned: // Fall through.
1221 case MachineRepresentation::kTaggedPointer: // Fall through.
1222 case MachineRepresentation::kTagged: // Fall through.
1223#endif
1224 case MachineRepresentation::kWord64:
1225 return {kArm64Ldr, kLoadStoreImm64};
1226 case MachineRepresentation::kProtectedPointer:
1228 return {kArm64LdrDecompressProtected, kNoImmediate};
1229 case MachineRepresentation::kSandboxedPointer:
1230 return {kArm64LdrDecodeSandboxedPointer, kLoadStoreImm64};
1231 case MachineRepresentation::kSimd128:
1232 return {kArm64LdrQ, kNoImmediate};
1233 case MachineRepresentation::kSimd256: // Fall through.
1234 case MachineRepresentation::kMapWord: // Fall through.
1235 case MachineRepresentation::kIndirectPointer: // Fall through.
1236 case MachineRepresentation::kFloat16RawBits: // Fall through.
1237 case MachineRepresentation::kNone:
1238 UNREACHABLE();
1239 }
1240}
1241
1242void InstructionSelectorT::VisitLoad(OpIndex node) {
1243 InstructionCode opcode = kArchNop;
1244 ImmediateMode immediate_mode = kNoImmediate;
1245 auto load = this->load_view(node);
1246 LoadRepresentation load_rep = load.loaded_rep();
1247 MachineRepresentation rep = load_rep.representation();
1248 std::tie(opcode, immediate_mode) =
1249 GetLoadOpcodeAndImmediate(load.ts_loaded_rep(), load.ts_result_rep());
1250 bool traps_on_null;
1251 if (load.is_protected(&traps_on_null)) {
1252 if (traps_on_null) {
1253 opcode |= AccessModeField::encode(kMemoryAccessProtectedNullDereference);
1254 } else {
1255 opcode |= AccessModeField::encode(kMemoryAccessProtectedMemOutOfBounds);
1256 }
1257 }
1258 EmitLoad(this, node, opcode, immediate_mode, rep);
1259}
1260
1261void InstructionSelectorT::VisitProtectedLoad(OpIndex node) { VisitLoad(node); }
1262
1263void InstructionSelectorT::VisitStorePair(OpIndex node) {
1264 Arm64OperandGeneratorT g(this);
1265 UNIMPLEMENTED();
1266}
1267
1268void InstructionSelectorT::VisitStore(OpIndex node) {
1269 TurboshaftAdapter::StoreView store_view = this->store_view(node);
1270 DCHECK_EQ(store_view.displacement(), 0);
1271 WriteBarrierKind write_barrier_kind =
1272 store_view.stored_rep().write_barrier_kind();
1273 const MachineRepresentation representation =
1274 store_view.stored_rep().representation();
1275
1276 Arm64OperandGeneratorT g(this);
1277
1278 // TODO(arm64): I guess this could be done in a better way.
1279 if (write_barrier_kind != kNoWriteBarrier &&
1280 !v8_flags.disable_write_barriers) {
1282 AddressingMode addressing_mode;
1283 InstructionOperand inputs[4];
1284 size_t input_count = 0;
1285 inputs[input_count++] = g.UseUniqueRegister(store_view.base());
1286 // OutOfLineRecordWrite uses the index in an add or sub instruction, but we
1287 // can trust the assembler to generate extra instructions if the index does
1288 // not fit into add or sub. So here only check the immediate for a store.
1289 OpIndex index = this->value(store_view.index());
1290 if (g.CanBeImmediate(index, COMPRESS_POINTERS_BOOL ? kLoadStoreImm32
1291 : kLoadStoreImm64)) {
1292 inputs[input_count++] = g.UseImmediate(index);
1293 addressing_mode = kMode_MRI;
1294 } else {
1295 inputs[input_count++] = g.UseUniqueRegister(index);
1296 addressing_mode = kMode_MRR;
1297 }
1298 inputs[input_count++] = g.UseUniqueRegister(store_view.value());
1299 RecordWriteMode record_write_mode =
1300 WriteBarrierKindToRecordWriteMode(write_barrier_kind);
1302 if (representation == MachineRepresentation::kIndirectPointer) {
1303 DCHECK_EQ(write_barrier_kind, kIndirectPointerWriteBarrier);
1304 // In this case we need to add the IndirectPointerTag as additional input.
1305 code = kArchStoreIndirectWithWriteBarrier;
1306 IndirectPointerTag tag = store_view.indirect_pointer_tag();
1307 inputs[input_count++] = g.UseImmediate64(static_cast<int64_t>(tag));
1308 } else {
1309 code = kArchStoreWithWriteBarrier;
1310 }
1311 code |= AddressingModeField::encode(addressing_mode);
1312 code |= RecordWriteModeField::encode(record_write_mode);
1313 if (store_view.is_store_trap_on_null()) {
1314 code |= AccessModeField::encode(kMemoryAccessProtectedNullDereference);
1315 }
1316 Emit(code, 0, nullptr, input_count, inputs);
1317 return;
1318 }
1319
1320 InstructionOperand inputs[4];
1321 size_t input_count = 0;
1322
1323 MachineRepresentation approx_rep = representation;
1324 InstructionCode opcode;
1325 ImmediateMode immediate_mode;
1326 std::tie(opcode, immediate_mode) =
1327 GetStoreOpcodeAndImmediate(store_view.ts_stored_rep(), false);
1328
1329 if (v8_flags.enable_unconditional_write_barriers) {
1330 if (CanBeTaggedOrCompressedPointer(representation)) {
1331 write_barrier_kind = kFullWriteBarrier;
1332 }
1333 }
1334
1335 std::optional<ExternalReference> external_base;
1336 ExternalReference value;
1337 if (this->MatchExternalConstant(store_view.base(), &value)) {
1338 external_base = value;
1339 }
1340
1341 std::optional<int64_t> constant_index;
1342 if (store_view.index().valid()) {
1343 OpIndex index = this->value(store_view.index());
1344 constant_index = g.GetOptionalIntegerConstant(index);
1345 }
1346 if (external_base.has_value() && constant_index.has_value() &&
1347 CanAddressRelativeToRootsRegister(*external_base)) {
1348 ptrdiff_t const delta =
1349 *constant_index +
1350 MacroAssemblerBase::RootRegisterOffsetForExternalReference(
1351 isolate(), *external_base);
1352 if (is_int32(delta)) {
1353 input_count = 2;
1354 InstructionOperand inputs[2];
1355 inputs[0] = g.UseRegister(store_view.value());
1356 inputs[1] = g.UseImmediate(static_cast<int32_t>(delta));
1357 opcode |= AddressingModeField::encode(kMode_Root);
1358 Emit(opcode, 0, nullptr, input_count, inputs);
1359 return;
1360 }
1361 }
1362
1363 OpIndex base = store_view.base();
1364 OpIndex index = this->value(store_view.index());
1365
1366 inputs[input_count++] = g.UseRegisterOrImmediateZero(store_view.value());
1367
1368 if (this->is_load_root_register(base)) {
1369 inputs[input_count++] = g.UseImmediate(index);
1370 opcode |= AddressingModeField::encode(kMode_Root);
1371 Emit(opcode, 0, nullptr, input_count, inputs);
1372 return;
1373 }
1374
1375 inputs[input_count++] = g.UseRegister(base);
1376
1377 if (g.CanBeImmediate(index, immediate_mode)) {
1378 inputs[input_count++] = g.UseImmediate(index);
1379 opcode |= AddressingModeField::encode(kMode_MRI);
1380 } else if (TryMatchLoadStoreShift(&g, this, approx_rep, node, index,
1381 &inputs[input_count],
1382 &inputs[input_count + 1])) {
1383 input_count += 2;
1384 opcode |= AddressingModeField::encode(kMode_Operand2_R_LSL_I);
1385 } else {
1386 inputs[input_count++] = g.UseRegister(index);
1387 opcode |= AddressingModeField::encode(kMode_MRR);
1388 }
1389
1390 if (store_view.is_store_trap_on_null()) {
1391 opcode |= AccessModeField::encode(kMemoryAccessProtectedNullDereference);
1392 } else if (store_view.access_kind() ==
1393 MemoryAccessKind::kProtectedByTrapHandler) {
1394 opcode |= AccessModeField::encode(kMemoryAccessProtectedMemOutOfBounds);
1395 }
1396
1397 Emit(opcode, 0, nullptr, input_count, inputs);
1398}
1399
1400void InstructionSelectorT::VisitProtectedStore(OpIndex node) {
1401 VisitStore(node);
1402}
1403
1404void InstructionSelectorT::VisitSimd128ReverseBytes(OpIndex node) {
1405 UNREACHABLE();
1406}
1407
1408// Architecture supports unaligned access, therefore VisitLoad is used instead
1409void InstructionSelectorT::VisitUnalignedLoad(OpIndex node) { UNREACHABLE(); }
1410
1411// Architecture supports unaligned access, therefore VisitStore is used instead
1412void InstructionSelectorT::VisitUnalignedStore(OpIndex node) { UNREACHABLE(); }
1413
1414namespace turboshaft {
1415
1417 public:
1420 DCHECK(!HasCompare());
1421 cmp_ = op;
1422 left_ = l;
1423 right_ = r;
1424 opcode_ = GetOpcode(rep);
1425 }
1426 bool HasCompare() const { return cmp_.valid(); }
1427 OpIndex cmp() const { return cmp_; }
1428 OpIndex left() const { return left_; }
1429 OpIndex right() const { return right_; }
1430 InstructionCode opcode() const { return opcode_; }
1431 uint32_t num_ccmps() const { return num_ccmps_; }
1434 FlagsCondition ccmp_condition,
1435 FlagsCondition default_flags, OpIndex ccmp_lhs,
1436 OpIndex ccmp_rhs) {
1437 InstructionCode code = GetOpcode(rep);
1438 ccmps_.at(num_ccmps_) = FlagsContinuationT::ConditionalCompare{
1439 code, ccmp_condition, default_flags, ccmp_lhs, ccmp_rhs};
1440 ++num_ccmps_;
1441 }
1442 bool IsFloatCmp() const {
1443 return opcode() == kArm64Float32Cmp || opcode() == kArm64Float64Cmp;
1444 }
1445
1446 private:
1448 switch (rep.MapTaggedToWord().value()) {
1449 case RegisterRepresentation::Word32():
1450 return kArm64Cmp32;
1451 case RegisterRepresentation::Word64():
1452 return kArm64Cmp;
1453 case RegisterRepresentation::Float32():
1454 return kArm64Float32Cmp;
1455 case RegisterRepresentation::Float64():
1456 return kArm64Float64Cmp;
1457 default:
1458 UNREACHABLE();
1459 }
1460 }
1461
1467 uint32_t num_ccmps_ = 0;
1468};
1469
1470class CompareChainNode final : public ZoneObject {
1471 public:
1472 enum class NodeKind : uint8_t { kFlagSetting, kLogicalCombine };
1473
1475 : node_kind_(NodeKind::kFlagSetting),
1476 user_condition_(condition),
1477 node_(n) {}
1478
1480 : node_kind_(NodeKind::kLogicalCombine), node_(n), lhs_(l), rhs_(r) {
1481 // Canonicalise the chain with cmps on the right.
1482 if (lhs_->IsFlagSetting() && !rhs_->IsFlagSetting()) {
1483 std::swap(lhs_, rhs_);
1484 }
1485 }
1487 DCHECK(IsLogicalCombine());
1488 user_condition_ = condition;
1489 if (requires_negation_) {
1490 NegateFlags();
1491 }
1492 }
1494 if (IsFlagSetting()) {
1495 NegateFlags();
1496 } else {
1497 requires_negation_ = !requires_negation_;
1498 }
1499 }
1501 user_condition_ = NegateFlagsCondition(user_condition_);
1502 requires_negation_ = false;
1503 }
1504 bool IsLegalFirstCombine() const {
1505 DCHECK(IsLogicalCombine());
1506 // We need two cmps feeding the first logic op.
1507 return lhs_->IsFlagSetting() && rhs_->IsFlagSetting();
1508 }
1509 bool IsFlagSetting() const { return node_kind_ == NodeKind::kFlagSetting; }
1510 bool IsLogicalCombine() const {
1511 return node_kind_ == NodeKind::kLogicalCombine;
1512 }
1513 OpIndex node() const { return node_; }
1514 FlagsCondition user_condition() const { return user_condition_; }
1516 DCHECK(IsLogicalCombine());
1517 return lhs_;
1518 }
1520 DCHECK(IsLogicalCombine());
1521 return rhs_;
1522 }
1523
1524 private:
1527 bool requires_negation_ = false;
1529 CompareChainNode* lhs_ = nullptr;
1530 CompareChainNode* rhs_ = nullptr;
1531};
1532
1533static std::optional<FlagsCondition> GetFlagsCondition(
1534 OpIndex node, InstructionSelectorT* selector) {
1535 if (const ComparisonOp* comparison =
1536 selector->Get(node).TryCast<ComparisonOp>()) {
1537 if (comparison->rep == RegisterRepresentation::Word32() ||
1538 comparison->rep == RegisterRepresentation::Word64() ||
1539 comparison->rep == RegisterRepresentation::Tagged()) {
1540 switch (comparison->kind) {
1541 case ComparisonOp::Kind::kEqual:
1542 return FlagsCondition::kEqual;
1543 case ComparisonOp::Kind::kSignedLessThan:
1544 return FlagsCondition::kSignedLessThan;
1545 case ComparisonOp::Kind::kSignedLessThanOrEqual:
1546 return FlagsCondition::kSignedLessThanOrEqual;
1547 case ComparisonOp::Kind::kUnsignedLessThan:
1548 return FlagsCondition::kUnsignedLessThan;
1549 case ComparisonOp::Kind::kUnsignedLessThanOrEqual:
1550 return FlagsCondition::kUnsignedLessThanOrEqual;
1551 default:
1552 UNREACHABLE();
1553 }
1554 } else if (comparison->rep == RegisterRepresentation::Float32() ||
1555 comparison->rep == RegisterRepresentation::Float64()) {
1556 switch (comparison->kind) {
1557 case ComparisonOp::Kind::kEqual:
1558 return FlagsCondition::kEqual;
1559 case ComparisonOp::Kind::kSignedLessThan:
1560 return FlagsCondition::kFloatLessThan;
1561 case ComparisonOp::Kind::kSignedLessThanOrEqual:
1562 return FlagsCondition::kFloatLessThanOrEqual;
1563 default:
1564 UNREACHABLE();
1565 }
1566 }
1567 }
1568 return std::nullopt;
1569}
1570
1571// Search through AND, OR and comparisons.
1572// To make life a little easier, we currently don't handle combining two logic
1573// operations. There are restrictions on what logical combinations can be
1574// performed with ccmp, so this implementation builds a ccmp chain from the LHS
1575// of the tree while combining one more compare from the RHS at each step. So,
1576// currently, if we discover a pattern like this:
1577// logic(logic(cmp, cmp), logic(cmp, cmp))
1578// The search will fail from the outermost logic operation, but it will succeed
1579// for the two inner operations. This will result in, suboptimal, codegen:
1580// cmp
1581// ccmp
1582// cset x
1583// cmp
1584// ccmp
1585// cset y
1586// logic x, y
1587static std::optional<CompareChainNode*> FindCompareChain(
1588 OpIndex user, OpIndex node, InstructionSelectorT* selector, Zone* zone,
1590 if (selector->Get(node).Is<Opmask::kWord32BitwiseAnd>() ||
1591 selector->Get(node).Is<Opmask::kWord32BitwiseOr>()) {
1592 auto maybe_lhs = FindCompareChain(node, selector->input_at(node, 0),
1593 selector, zone, nodes);
1594 auto maybe_rhs = FindCompareChain(node, selector->input_at(node, 1),
1595 selector, zone, nodes);
1596 if (maybe_lhs.has_value() && maybe_rhs.has_value()) {
1597 CompareChainNode* lhs = maybe_lhs.value();
1598 CompareChainNode* rhs = maybe_rhs.value();
1599 // Ensure we don't try to combine a logic operation with two logic inputs.
1600 if (lhs->IsFlagSetting() || rhs->IsFlagSetting()) {
1601 nodes.push_back(std::move(zone->New<CompareChainNode>(node, lhs, rhs)));
1602 return nodes.back();
1603 }
1604 }
1605 // Ensure we remove any valid sub-trees that now cannot be used.
1606 nodes.clear();
1607 return std::nullopt;
1608 } else if (user.valid() && selector->CanCover(user, node)) {
1609 std::optional<FlagsCondition> user_condition =
1610 GetFlagsCondition(node, selector);
1611 if (!user_condition.has_value()) {
1612 return std::nullopt;
1613 }
1614 const ComparisonOp& comparison = selector->Get(node).Cast<ComparisonOp>();
1615 if (comparison.kind == ComparisonOp::Kind::kEqual &&
1616 selector->MatchIntegralZero(comparison.right())) {
1617 auto maybe_negated = FindCompareChain(node, selector->input_at(node, 0),
1618 selector, zone, nodes);
1619 if (maybe_negated.has_value()) {
1620 CompareChainNode* negated = maybe_negated.value();
1621 negated->MarkRequiresNegation();
1622 return negated;
1623 }
1624 }
1625 return zone->New<CompareChainNode>(node, user_condition.value());
1626 }
1627 return std::nullopt;
1628}
1629
1630// Overview -------------------------------------------------------------------
1631//
1632// A compare operation will generate a 'user condition', which is the
1633// FlagCondition of the opcode. For this algorithm, we generate the default
1634// flags from the LHS of the logic op, while the RHS is used to predicate the
1635// new ccmp. Depending on the logical user, those conditions are either used
1636// as-is or negated:
1637// > For OR, the generated ccmp will negate the LHS condition for its predicate
1638// while the default flags are taken from the RHS.
1639// > For AND, the generated ccmp will take the LHS condition for its predicate
1640// while the default flags are a negation of the RHS.
1641//
1642// The new ccmp will now generate a user condition of its own, and this is
1643// always forwarded from the RHS.
1644//
1645// Chaining compares, including with OR, needs to be equivalent to combining
1646// all the results with AND, and NOT.
1647//
1648// AND Example ----------------------------------------------------------------
1649//
1650// cmpA cmpB
1651// | |
1652// condA condB
1653// | |
1654// --- AND ---
1655//
1656// As the AND becomes the ccmp, it is predicated on condA and the cset is
1657// predicated on condB. The user of the ccmp is always predicated on the
1658// condition from the RHS of the logic operation. The default flags are
1659// not(condB) so cset only produces one when both condA and condB are true:
1660// cmpA
1661// ccmpB not(condB), condA
1662// cset condB
1663//
1664// OR Example -----------------------------------------------------------------
1665//
1666// cmpA cmpB
1667// | |
1668// condA condB
1669// | |
1670// --- OR ---
1671//
1672// cmpA cmpB
1673// equivalent -> | |
1674// not(condA) not(condB)
1675// | |
1676// ----- AND -----
1677// |
1678// NOT
1679//
1680// In this case, the input conditions to the AND (the ccmp) have been negated
1681// so the user condition and default flags have been negated compared to the
1682// previous example. The cset still uses condB because it is negated twice:
1683// cmpA
1684// ccmpB condB, not(condA)
1685// cset condB
1686//
1687// Combining AND and OR -------------------------------------------------------
1688//
1689// cmpA cmpB cmpC
1690// | | |
1691// condA condB condC
1692// | | |
1693// --- AND --- |
1694// | |
1695// OR -----------
1696//
1697// equivalent -> cmpA cmpB cmpC
1698// | | |
1699// condA condB not(condC)
1700// | | |
1701// --- AND --- |
1702// | |
1703// NOT |
1704// | |
1705// AND -------------
1706// |
1707// NOT
1708//
1709// For this example the 'user condition', coming out, of the first ccmp is
1710// condB but it is negated as the input predicate for the next ccmp as that
1711// one is performing an OR:
1712// cmpA
1713// ccmpB not(condB), condA
1714// ccmpC condC, not(condB)
1715// cset condC
1716//
1718 InstructionSelectorT* selector,
1719 CompareSequence* sequence) {
1720 CompareChainNode* lhs = logic_node->lhs();
1721 CompareChainNode* rhs = logic_node->rhs();
1722
1723 Arm64OperandGeneratorT g(selector);
1724 if (!sequence->HasCompare()) {
1725 // This is the beginning of the conditional compare chain.
1726 DCHECK(lhs->IsFlagSetting());
1727 DCHECK(rhs->IsFlagSetting());
1728
1729 {
1730 // ccmp has a much smaller immediate range than cmp, so swap the
1731 // operations if possible.
1732 OpIndex cmp = lhs->node();
1733 OpIndex ccmp = rhs->node();
1734 OpIndex cmp_right = selector->input_at(cmp, 1);
1735 OpIndex ccmp_right = selector->input_at(ccmp, 1);
1736 if (g.CanBeImmediate(cmp_right, kConditionalCompareImm) &&
1737 !g.CanBeImmediate(ccmp_right, kConditionalCompareImm)) {
1738 // If the ccmp could use the cmp immediate, swap them.
1739 std::swap(lhs, rhs);
1740 } else if (g.CanBeImmediate(ccmp_right, kArithmeticImm) &&
1741 !g.CanBeImmediate(ccmp_right, kConditionalCompareImm)) {
1742 // If the ccmp can't use its immediate, but a cmp could, swap them.
1743 std::swap(lhs, rhs);
1744 }
1745 }
1746 OpIndex cmp = lhs->node();
1747 OpIndex left = selector->input_at(lhs->node(), 0);
1748 OpIndex right = selector->input_at(lhs->node(), 1);
1749
1750 // Initialize chain with the compare which will hold the continuation.
1751 RegisterRepresentation rep = selector->Get(cmp).Cast<ComparisonOp>().rep;
1752 sequence->InitialCompare(cmp, left, right, rep);
1753 }
1754
1755 bool is_logical_or =
1756 selector->Get(logic_node->node()).Is<Opmask::kWord32BitwiseOr>();
1757 FlagsCondition ccmp_condition =
1758 is_logical_or ? NegateFlagsCondition(lhs->user_condition())
1759 : lhs->user_condition();
1760 FlagsCondition default_flags =
1761 is_logical_or ? rhs->user_condition()
1763
1764 // We canonicalise the chain so that the rhs is always a cmp, whereas lhs
1765 // will either be the initial cmp or the previous logic, now ccmp, op and
1766 // only provides ccmp_condition.
1767 FlagsCondition user_condition = rhs->user_condition();
1768 OpIndex ccmp = rhs->node();
1769 OpIndex ccmp_lhs = selector->input_at(ccmp, 0);
1770 OpIndex ccmp_rhs = selector->input_at(ccmp, 1);
1771
1772 // Switch ccmp lhs/rhs if lhs is a small immediate.
1773 if (g.CanBeImmediate(ccmp_lhs, kConditionalCompareImm)) {
1774 user_condition = CommuteFlagsCondition(user_condition);
1775 default_flags = CommuteFlagsCondition(default_flags);
1776 std::swap(ccmp_lhs, ccmp_rhs);
1777 }
1778
1779 RegisterRepresentation rep = selector->Get(ccmp).Cast<ComparisonOp>().rep;
1780 sequence->AddConditionalCompare(rep, ccmp_condition, default_flags, ccmp_lhs,
1781 ccmp_rhs);
1782 // Ensure the user_condition is kept up-to-date for the next ccmp/cset.
1783 logic_node->SetCondition(user_condition);
1784}
1785
1786static std::optional<FlagsCondition> TryMatchConditionalCompareChainShared(
1787 InstructionSelectorT* selector, Zone* zone, OpIndex node,
1788 CompareSequence* sequence) {
1789 // Instead of:
1790 // cmp x0, y0
1791 // cset cc0
1792 // cmp x1, y1
1793 // cset cc1
1794 // and/orr
1795 // Try to merge logical combinations of flags into:
1796 // cmp x0, y0
1797 // ccmp x1, y1 ..
1798 // cset ..
1799 // So, for AND:
1800 // (cset cc1 (ccmp x1 y1 !cc1 cc0 (cmp x0, y0)))
1801 // and for ORR:
1802 // (cset cc1 (ccmp x1 y1 cc1 !cc0 (cmp x0, y0))
1803
1804 // Look for a potential chain.
1805 ZoneVector<CompareChainNode*> logic_nodes(zone);
1806 auto root =
1807 FindCompareChain(OpIndex::Invalid(), node, selector, zone, logic_nodes);
1808 if (!root.has_value()) return std::nullopt;
1809
1810 if (logic_nodes.size() > FlagsContinuationT::kMaxCompareChainSize) {
1811 return std::nullopt;
1812 }
1813 if (!logic_nodes.front()->IsLegalFirstCombine()) {
1814 return std::nullopt;
1815 }
1816
1817 for (auto* logic_node : logic_nodes) {
1818 CombineFlagSettingOps(logic_node, selector, sequence);
1819 }
1820 DCHECK_LE(sequence->num_ccmps(), FlagsContinuationT::kMaxCompareChainSize);
1821 return logic_nodes.back()->user_condition();
1822}
1823
1824static void VisitCompareChain(InstructionSelectorT* selector, OpIndex left_node,
1825 OpIndex right_node, RegisterRepresentation rep,
1826 InstructionCode opcode,
1827 ImmediateMode operand_mode,
1828 FlagsContinuationT* cont) {
1829 DCHECK(cont->IsConditionalSet() || cont->IsConditionalBranch());
1830 Arm64OperandGeneratorT g(selector);
1831 constexpr uint32_t kMaxFlagSetInputs = 2;
1832 constexpr uint32_t kMaxCcmpOperands =
1833 FlagsContinuationT::kMaxCompareChainSize * kNumCcmpOperands;
1834 constexpr uint32_t kExtraCcmpInputs = 2;
1835 constexpr uint32_t kMaxInputs =
1836 kMaxFlagSetInputs + kMaxCcmpOperands + kExtraCcmpInputs;
1837 InstructionOperand inputs[kMaxInputs];
1838 size_t input_count = 0;
1839
1840 if (g.CanBeImmediate(right_node, operand_mode)) {
1841 inputs[input_count++] = g.UseRegister(left_node);
1842 inputs[input_count++] = g.UseImmediate(right_node);
1843 } else {
1844 inputs[input_count++] = g.UseRegisterOrImmediateZero(left_node);
1845 inputs[input_count++] = g.UseRegister(right_node);
1846 }
1847
1848 auto& compares = cont->compares();
1849 for (unsigned i = 0; i < cont->num_conditional_compares(); ++i) {
1850 auto compare = compares[i];
1851 inputs[input_count + kCcmpOffsetOfOpcode] = g.TempImmediate(compare.code);
1852 inputs[input_count + kCcmpOffsetOfLhs] = g.UseRegisterAtEnd(compare.lhs);
1853 if ((compare.code == kArm64Cmp32 || compare.code == kArm64Cmp) &&
1855 inputs[input_count + kCcmpOffsetOfRhs] = g.UseImmediate(compare.rhs);
1856 } else {
1857 inputs[input_count + kCcmpOffsetOfRhs] = g.UseRegisterAtEnd(compare.rhs);
1858 }
1859 inputs[input_count + kCcmpOffsetOfDefaultFlags] =
1860 g.TempImmediate(compare.default_flags);
1861 inputs[input_count + kCcmpOffsetOfCompareCondition] =
1862 g.TempImmediate(compare.compare_condition);
1863 input_count += kNumCcmpOperands;
1864 }
1865 inputs[input_count++] = g.TempImmediate(cont->final_condition());
1866 inputs[input_count++] =
1867 g.TempImmediate(static_cast<int32_t>(cont->num_conditional_compares()));
1868
1869 DCHECK_GE(arraysize(inputs), input_count);
1870
1871 selector->EmitWithContinuation(opcode, 0, nullptr, input_count, inputs, cont);
1872}
1873
1875 InstructionSelectorT* selector, Zone* zone, OpIndex node,
1876 FlagsContinuationT* cont) {
1877 if (!cont->IsBranch()) return false;
1878 DCHECK(cont->condition() == kNotEqual || cont->condition() == kEqual);
1879
1880 CompareSequence sequence;
1881 auto final_cond =
1882 TryMatchConditionalCompareChainShared(selector, zone, node, &sequence);
1883 if (final_cond.has_value()) {
1885 ? final_cond.value()
1886 : NegateFlagsCondition(final_cond.value());
1887 FlagsContinuationT new_cont = FlagsContinuationT::ForConditionalBranch(
1888 sequence.ccmps(), sequence.num_ccmps(), condition, cont->true_block(),
1889 cont->false_block());
1890
1891 ImmediateMode imm_mode =
1892 sequence.IsFloatCmp() ? kNoImmediate : kArithmeticImm;
1893 VisitCompareChain(selector, sequence.left(), sequence.right(),
1894 selector->Get(sequence.cmp()).Cast<ComparisonOp>().rep,
1895 sequence.opcode(), imm_mode, &new_cont);
1896
1897 return true;
1898 }
1899 return false;
1900}
1901
1903 Zone* zone, OpIndex node) {
1904 // Create the cmp + ccmp ... sequence.
1905 CompareSequence sequence;
1906 auto final_cond =
1907 TryMatchConditionalCompareChainShared(selector, zone, node, &sequence);
1908 if (final_cond.has_value()) {
1909 // The continuation performs the conditional compare and cset.
1910 FlagsContinuationT cont = FlagsContinuationT::ForConditionalSet(
1911 sequence.ccmps(), sequence.num_ccmps(), final_cond.value(), node);
1912
1913 ImmediateMode imm_mode =
1914 sequence.IsFloatCmp() ? kNoImmediate : kArithmeticImm;
1915 VisitCompareChain(selector, sequence.left(), sequence.right(),
1916 selector->Get(sequence.cmp()).Cast<ComparisonOp>().rep,
1917 sequence.opcode(), imm_mode, &cont);
1918 return true;
1919 }
1920 return false;
1921}
1922
1923} // end namespace turboshaft
1924
1925static void VisitLogical(InstructionSelectorT* selector, Zone* zone,
1926 OpIndex node, WordRepresentation rep,
1927 ArchOpcode opcode, bool left_can_cover,
1928 bool right_can_cover, ImmediateMode imm_mode) {
1929 Arm64OperandGeneratorT g(selector);
1930 const WordBinopOp& logical_op = selector->Get(node).Cast<WordBinopOp>();
1931 const Operation& lhs = selector->Get(logical_op.left());
1932 const Operation& rhs = selector->Get(logical_op.right());
1933
1934 // Map instruction to equivalent operation with inverted right input.
1935 ArchOpcode inv_opcode = opcode;
1936 switch (opcode) {
1937 case kArm64And32:
1938 inv_opcode = kArm64Bic32;
1939 break;
1940 case kArm64And:
1941 inv_opcode = kArm64Bic;
1942 break;
1943 case kArm64Or32:
1944 inv_opcode = kArm64Orn32;
1945 break;
1946 case kArm64Or:
1947 inv_opcode = kArm64Orn;
1948 break;
1949 case kArm64Eor32:
1950 inv_opcode = kArm64Eon32;
1951 break;
1952 case kArm64Eor:
1953 inv_opcode = kArm64Eon;
1954 break;
1955 default:
1956 UNREACHABLE();
1957 }
1958
1959 if (TryMatchConditionalCompareChainSet(selector, zone, node)) {
1960 return;
1961 }
1962
1963 // Select Logical(y, ~x) for Logical(Xor(x, -1), y).
1964 if (lhs.Is<Opmask::kBitwiseXor>() && left_can_cover) {
1965 const WordBinopOp& xor_op = lhs.Cast<WordBinopOp>();
1966 int64_t xor_rhs_val;
1967 if (selector->MatchSignedIntegralConstant(xor_op.right(), &xor_rhs_val) &&
1968 xor_rhs_val == -1) {
1969 // TODO(all): support shifted operand on right.
1970 selector->Emit(inv_opcode, g.DefineAsRegister(node),
1971 g.UseRegister(logical_op.right()),
1972 g.UseRegister(xor_op.left()));
1973 return;
1974 }
1975 }
1976
1977 // Select Logical(x, ~y) for Logical(x, Xor(y, -1)).
1978 if (rhs.Is<Opmask::kBitwiseXor>() && right_can_cover) {
1979 const WordBinopOp& xor_op = rhs.Cast<WordBinopOp>();
1980 int64_t xor_rhs_val;
1981 if (selector->MatchSignedIntegralConstant(xor_op.right(), &xor_rhs_val) &&
1982 xor_rhs_val == -1) {
1983 // TODO(all): support shifted operand on right.
1984 selector->Emit(inv_opcode, g.DefineAsRegister(node),
1985 g.UseRegister(logical_op.left()),
1986 g.UseRegister(xor_op.left()));
1987 return;
1988 }
1989 }
1990
1991 int64_t xor_rhs_val;
1992 if (logical_op.Is<Opmask::kBitwiseXor>() &&
1993 selector->MatchSignedIntegralConstant(logical_op.right(), &xor_rhs_val) &&
1994 xor_rhs_val == -1) {
1995 const WordBinopOp& xor_op = logical_op.Cast<Opmask::kBitwiseXor>();
1996 bool is32 = rep == WordRepresentation::Word32();
1997 ArchOpcode opcode = is32 ? kArm64Not32 : kArm64Not;
1998 selector->Emit(opcode, g.DefineAsRegister(node),
1999 g.UseRegister(xor_op.left()));
2000 } else {
2001 VisitBinop(selector, node, rep, opcode, imm_mode);
2002 }
2003}
2004
2005void InstructionSelectorT::VisitWord32And(OpIndex node) {
2006 Arm64OperandGeneratorT g(this);
2007 const WordBinopOp& bitwise_and =
2008 this->Get(node).Cast<Opmask::kWord32BitwiseAnd>();
2009 const Operation& lhs = this->Get(bitwise_and.left());
2010 if (int64_t constant_rhs;
2012 CanCover(node, bitwise_and.left()) &&
2013 MatchSignedIntegralConstant(bitwise_and.right(), &constant_rhs)) {
2014 DCHECK(base::IsInRange(constant_rhs, std::numeric_limits<int32_t>::min(),
2015 std::numeric_limits<int32_t>::max()));
2016 uint32_t mask = static_cast<uint32_t>(constant_rhs);
2017 uint32_t mask_width = base::bits::CountPopulation(mask);
2018 uint32_t mask_msb = base::bits::CountLeadingZeros32(mask);
2019 if ((mask_width != 0) && (mask_width != 32) &&
2020 (mask_msb + mask_width == 32)) {
2021 // The mask must be contiguous, and occupy the least-significant bits.
2022 DCHECK_EQ(0u, base::bits::CountTrailingZeros32(mask));
2023
2024 // Select Ubfx for And(Shr(x, imm), mask) where the mask is in the least
2025 // significant bits.
2026 const ShiftOp& lhs_shift = lhs.Cast<Opmask::kWord32ShiftRightLogical>();
2027 if (int64_t constant;
2028 MatchSignedIntegralConstant(lhs_shift.right(), &constant)) {
2029 // Any shift value can match; int32 shifts use `value % 32`.
2030 uint32_t lsb = constant & 0x1F;
2031
2032 // Ubfx cannot extract bits past the register size, however since
2033 // shifting the original value would have introduced some zeros we can
2034 // still use ubfx with a smaller mask and the remaining bits will be
2035 // zeros.
2036 if (lsb + mask_width > 32) mask_width = 32 - lsb;
2037
2038 Emit(kArm64Ubfx32, g.DefineAsRegister(node),
2039 g.UseRegister(lhs_shift.left()),
2040 g.UseImmediateOrTemp(lhs_shift.right(), lsb),
2041 g.TempImmediate(mask_width));
2042 return;
2043 }
2044 // Other cases fall through to the normal And operation.
2045 }
2046 }
2047 VisitLogical(this, zone(), node, bitwise_and.rep, kArm64And32,
2048 CanCover(node, bitwise_and.left()),
2049 CanCover(node, bitwise_and.right()), kLogical32Imm);
2050}
2051
2052void InstructionSelectorT::VisitWord64And(OpIndex node) {
2053 Arm64OperandGeneratorT g(this);
2054
2055 const WordBinopOp& bitwise_and = Get(node).Cast<Opmask::kWord64BitwiseAnd>();
2056 const Operation& lhs = Get(bitwise_and.left());
2057
2058 if (uint64_t mask;
2059 lhs.Is<Opmask::kWord64ShiftRightLogical>() &&
2060 CanCover(node, bitwise_and.left()) &&
2061 MatchUnsignedIntegralConstant(bitwise_and.right(), &mask)) {
2062 uint64_t mask_width = base::bits::CountPopulation(mask);
2063 uint64_t mask_msb = base::bits::CountLeadingZeros64(mask);
2064 if ((mask_width != 0) && (mask_width != 64) &&
2065 (mask_msb + mask_width == 64)) {
2066 // The mask must be contiguous, and occupy the least-significant bits.
2067 DCHECK_EQ(0u, base::bits::CountTrailingZeros64(mask));
2068
2069 // Select Ubfx for And(Shr(x, imm), mask) where the mask is in the least
2070 // significant bits.
2071 const ShiftOp& shift = lhs.Cast<ShiftOp>();
2072 if (int64_t shift_by;
2073 MatchSignedIntegralConstant(shift.right(), &shift_by)) {
2074 // Any shift value can match; int64 shifts use `value % 64`.
2075 uint32_t lsb = static_cast<uint32_t>(shift_by & 0x3F);
2076
2077 // Ubfx cannot extract bits past the register size, however since
2078 // shifting the original value would have introduced some zeros we can
2079 // still use ubfx with a smaller mask and the remaining bits will be
2080 // zeros.
2081 if (lsb + mask_width > 64) mask_width = 64 - lsb;
2082
2083 Emit(kArm64Ubfx, g.DefineAsRegister(node), g.UseRegister(shift.left()),
2084 g.UseImmediateOrTemp(shift.right(), lsb),
2085 g.TempImmediate(static_cast<int32_t>(mask_width)));
2086 return;
2087 }
2088 // Other cases fall through to the normal And operation.
2089 }
2090 }
2091 VisitLogical(this, zone(), node, bitwise_and.rep, kArm64And,
2092 CanCover(node, bitwise_and.left()),
2093 CanCover(node, bitwise_and.right()), kLogical64Imm);
2094}
2095
2096void InstructionSelectorT::VisitWord32Or(OpIndex node) {
2097 const WordBinopOp& op = this->Get(node).template Cast<WordBinopOp>();
2098 VisitLogical(this, zone(), node, op.rep, kArm64Or32,
2099 CanCover(node, op.left()), CanCover(node, op.right()),
2100 kLogical32Imm);
2101}
2102
2103void InstructionSelectorT::VisitWord64Or(OpIndex node) {
2104 const WordBinopOp& op = this->Get(node).template Cast<WordBinopOp>();
2105 VisitLogical(this, zone(), node, op.rep, kArm64Or, CanCover(node, op.left()),
2106 CanCover(node, op.right()), kLogical64Imm);
2107}
2108
2109void InstructionSelectorT::VisitWord32Xor(OpIndex node) {
2110 const WordBinopOp& op = this->Get(node).template Cast<WordBinopOp>();
2111 VisitLogical(this, zone(), node, op.rep, kArm64Eor32,
2112 CanCover(node, op.left()), CanCover(node, op.right()),
2113 kLogical32Imm);
2114}
2115
2116void InstructionSelectorT::VisitWord64Xor(OpIndex node) {
2117 const WordBinopOp& op = this->Get(node).template Cast<WordBinopOp>();
2118 VisitLogical(this, zone(), node, op.rep, kArm64Eor, CanCover(node, op.left()),
2119 CanCover(node, op.right()), kLogical64Imm);
2120}
2121
2122void InstructionSelectorT::VisitWord32Shl(OpIndex node) {
2123 const ShiftOp& shift_op = Get(node).Cast<ShiftOp>();
2124 const Operation& lhs = Get(shift_op.left());
2125 if (uint64_t constant_left;
2126 lhs.Is<Opmask::kWord32BitwiseAnd>() && CanCover(node, shift_op.left()) &&
2127 MatchUnsignedIntegralConstant(shift_op.right(), &constant_left)) {
2128 uint32_t shift_by = static_cast<uint32_t>(constant_left);
2129 if (base::IsInRange(shift_by, 1, 31)) {
2130 const WordBinopOp& bitwise_and = lhs.Cast<WordBinopOp>();
2131 if (uint64_t constant_right;
2132 MatchUnsignedIntegralConstant(bitwise_and.right(), &constant_right)) {
2133 uint32_t mask = static_cast<uint32_t>(constant_right);
2134
2135 uint32_t mask_width = base::bits::CountPopulation(mask);
2136 uint32_t mask_msb = base::bits::CountLeadingZeros32(mask);
2137 if ((mask_width != 0) && (mask_msb + mask_width == 32)) {
2138 DCHECK_EQ(0u, base::bits::CountTrailingZeros32(mask));
2139 DCHECK_NE(0u, shift_by);
2140 Arm64OperandGeneratorT g(this);
2141 if ((shift_by + mask_width) >= 32) {
2142 // If the mask is contiguous and reaches or extends beyond the top
2143 // bit, only the shift is needed.
2144 Emit(kArm64Lsl32, g.DefineAsRegister(node),
2145 g.UseRegister(bitwise_and.left()), g.UseImmediate(shift_by));
2146 return;
2147 } else {
2148 // Select Ubfiz for Shl(And(x, mask), imm) where the mask is
2149 // contiguous, and the shift immediate non-zero.
2150 Emit(kArm64Ubfiz32, g.DefineAsRegister(node),
2151 g.UseRegister(bitwise_and.left()), g.UseImmediate(shift_by),
2152 g.TempImmediate(mask_width));
2153 return;
2154 }
2155 }
2156 }
2157 }
2158 }
2159 VisitRRO(this, kArm64Lsl32, node, kShift32Imm);
2160}
2161
2162void InstructionSelectorT::VisitWord64Shl(OpIndex node) {
2163 Arm64OperandGeneratorT g(this);
2164 const ShiftOp& shift_op = this->Get(node).template Cast<ShiftOp>();
2165 const Operation& lhs = this->Get(shift_op.left());
2166 const Operation& rhs = this->Get(shift_op.right());
2167 if ((lhs.Is<Opmask::kChangeInt32ToInt64>() ||
2168 lhs.Is<Opmask::kChangeUint32ToUint64>()) &&
2169 rhs.Is<Opmask::kWord32Constant>()) {
2170 int64_t shift_by = rhs.Cast<ConstantOp>().signed_integral();
2171 if (base::IsInRange(shift_by, 32, 63) && CanCover(node, shift_op.left())) {
2172 // There's no need to sign/zero-extend to 64-bit if we shift out the
2173 // upper 32 bits anyway.
2174 Emit(kArm64Lsl, g.DefineAsRegister(node),
2175 g.UseRegister(lhs.Cast<ChangeOp>().input()),
2176 g.UseImmediate64(shift_by));
2177 return;
2178 }
2179 }
2180 VisitRRO(this, kArm64Lsl, node, kShift64Imm);
2181}
2182
2183void InstructionSelectorT::VisitStackPointerGreaterThan(
2184 OpIndex node, FlagsContinuationT* cont) {
2186 OpIndex value;
2187 const auto& op = this->turboshaft_graph()
2188 ->Get(node)
2189 .template Cast<StackPointerGreaterThanOp>();
2190 kind = op.kind;
2191 value = op.stack_limit();
2192 InstructionCode opcode =
2193 kArchStackPointerGreaterThan | MiscField::encode(static_cast<int>(kind));
2194
2195 Arm64OperandGeneratorT g(this);
2196
2197 // No outputs.
2198 InstructionOperand* const outputs = nullptr;
2199 const int output_count = 0;
2200
2201 // Applying an offset to this stack check requires a temp register. Offsets
2202 // are only applied to the first stack check. If applying an offset, we must
2203 // ensure the input and temp registers do not alias, thus kUniqueRegister.
2204 InstructionOperand temps[] = {g.TempRegister()};
2205 const int temp_count = (kind == StackCheckKind::kJSFunctionEntry) ? 1 : 0;
2206 const auto register_mode = (kind == StackCheckKind::kJSFunctionEntry)
2207 ? OperandGenerator::kUniqueRegister
2208 : OperandGenerator::kRegister;
2209
2210 InstructionOperand inputs[] = {g.UseRegisterWithMode(value, register_mode)};
2211 static constexpr int input_count = arraysize(inputs);
2212
2213 EmitWithContinuation(opcode, output_count, outputs, input_count, inputs,
2214 temp_count, temps, cont);
2215}
2216
2217namespace {
2218
2219bool TryEmitBitfieldExtract32(InstructionSelectorT* selector, OpIndex node) {
2220 Arm64OperandGeneratorT g(selector);
2221 const ShiftOp& shift = selector->Get(node).Cast<ShiftOp>();
2222 const Operation& lhs = selector->Get(shift.left());
2223 if (selector->CanCover(node, shift.left()) &&
2224 lhs.Is<Opmask::kWord32ShiftLeft>()) {
2225 // Select Ubfx or Sbfx for (x << (K & 0x1F)) OP (K & 0x1F), where
2226 // OP is >>> or >> and (K & 0x1F) != 0.
2227 const ShiftOp& lhs_shift = lhs.Cast<ShiftOp>();
2228 int64_t lhs_shift_by_constant, shift_by_constant;
2229 if (selector->MatchSignedIntegralConstant(lhs_shift.right(),
2230 &lhs_shift_by_constant) &&
2231 selector->MatchSignedIntegralConstant(shift.right(),
2232 &shift_by_constant) &&
2233 (lhs_shift_by_constant & 0x1F) != 0 &&
2234 (lhs_shift_by_constant & 0x1F) == (shift_by_constant & 0x1F)) {
2235 DCHECK(shift.Is<Opmask::kWord32ShiftRightArithmetic>() ||
2236 shift.Is<Opmask::kWord32ShiftRightArithmeticShiftOutZeros>() ||
2237 shift.Is<Opmask::kWord32ShiftRightLogical>());
2238
2239 ArchOpcode opcode = shift.kind == ShiftOp::Kind::kShiftRightLogical
2240 ? kArm64Ubfx32
2241 : kArm64Sbfx32;
2242
2243 int right_val = shift_by_constant & 0x1F;
2244 DCHECK_NE(right_val, 0);
2245
2246 selector->Emit(opcode, g.DefineAsRegister(node),
2247 g.UseRegister(lhs_shift.left()), g.TempImmediate(0),
2248 g.TempImmediate(32 - right_val));
2249 return true;
2250 }
2251 }
2252 return false;
2253}
2254
2255} // namespace
2256void InstructionSelectorT::VisitWord32Shr(OpIndex node) {
2257 const ShiftOp& shift = Get(node).Cast<ShiftOp>();
2258 const Operation& lhs = Get(shift.left());
2259 uint64_t constant_right;
2260 const bool right_is_constant =
2261 MatchUnsignedIntegralConstant(shift.right(), &constant_right);
2262 if (lhs.Is<Opmask::kWord32BitwiseAnd>() && right_is_constant) {
2263 uint32_t lsb = constant_right & 0x1F;
2264 const WordBinopOp& bitwise_and = lhs.Cast<WordBinopOp>();
2265 uint32_t constant_bitmask;
2266 if (MatchIntegralWord32Constant(bitwise_and.right(), &constant_bitmask) &&
2267 constant_bitmask != 0) {
2268 // Select Ubfx for Shr(And(x, mask), imm) where the result of the mask is
2269 // shifted into the least-significant bits.
2270 uint32_t mask = (constant_bitmask >> lsb) << lsb;
2271 unsigned mask_width = base::bits::CountPopulation(mask);
2272 unsigned mask_msb = base::bits::CountLeadingZeros32(mask);
2273 if ((mask_msb + mask_width + lsb) == 32) {
2274 Arm64OperandGeneratorT g(this);
2275 DCHECK_EQ(lsb, base::bits::CountTrailingZeros32(mask));
2276 Emit(kArm64Ubfx32, g.DefineAsRegister(node),
2277 g.UseRegister(bitwise_and.left()),
2278 g.UseImmediateOrTemp(shift.right(), lsb),
2279 g.TempImmediate(mask_width));
2280 return;
2281 }
2282 }
2283 } else if (TryEmitBitfieldExtract32(this, node)) {
2284 return;
2285 }
2286
2287 if (lhs.Is<Opmask::kWord32UnsignedMulOverflownBits>() && right_is_constant &&
2288 CanCover(node, shift.left())) {
2289 // Combine this shift with the multiply and shift that would be generated
2290 // by Uint32MulHigh.
2291 Arm64OperandGeneratorT g(this);
2292 const WordBinopOp& mul = lhs.Cast<WordBinopOp>();
2293 int shift_by = constant_right & 0x1F;
2294 InstructionOperand const smull_operand = g.TempRegister();
2295 Emit(kArm64Umull, smull_operand, g.UseRegister(mul.left()),
2296 g.UseRegister(mul.right()));
2297 Emit(kArm64Lsr, g.DefineAsRegister(node), smull_operand,
2298 g.TempImmediate(32 + shift_by));
2299 return;
2300 }
2301
2302 VisitRRO(this, kArm64Lsr32, node, kShift32Imm);
2303}
2304
2305void InstructionSelectorT::VisitWord64Shr(OpIndex node) {
2306 const ShiftOp& op = Get(node).Cast<ShiftOp>();
2307 const Operation& lhs = Get(op.left());
2308 if (uint64_t constant; lhs.Is<Opmask::kWord64BitwiseAnd>() &&
2309 MatchUnsignedIntegralConstant(op.right(), &constant)) {
2310 uint32_t lsb = constant & 0x3F;
2311 const WordBinopOp& bitwise_and = lhs.Cast<WordBinopOp>();
2312 uint64_t constant_and_rhs;
2313 if (MatchIntegralWord64Constant(bitwise_and.right(), &constant_and_rhs) &&
2314 constant_and_rhs != 0) {
2315 // Select Ubfx for Shr(And(x, mask), imm) where the result of the mask is
2316 // shifted into the least-significant bits.
2317 uint64_t mask = static_cast<uint64_t>(constant_and_rhs >> lsb) << lsb;
2318 unsigned mask_width = base::bits::CountPopulation(mask);
2319 unsigned mask_msb = base::bits::CountLeadingZeros64(mask);
2320 if ((mask_msb + mask_width + lsb) == 64) {
2321 Arm64OperandGeneratorT g(this);
2322 DCHECK_EQ(lsb, base::bits::CountTrailingZeros64(mask));
2323 Emit(kArm64Ubfx, g.DefineAsRegister(node),
2324 g.UseRegister(bitwise_and.left()),
2325 g.UseImmediateOrTemp(op.right(), lsb),
2326 g.TempImmediate(mask_width));
2327 return;
2328 }
2329 }
2330 }
2331 VisitRRO(this, kArm64Lsr, node, kShift64Imm);
2332}
2333
2334void InstructionSelectorT::VisitWord32Sar(OpIndex node) {
2335 if (TryEmitBitfieldExtract32(this, node)) {
2336 return;
2337 }
2338
2339 const ShiftOp& shift = Get(node).Cast<ShiftOp>();
2340 const Operation& lhs = Get(shift.left());
2341 uint64_t constant_right;
2342 const bool right_is_constant =
2343 MatchUnsignedIntegralConstant(shift.right(), &constant_right);
2344 if (lhs.Is<Opmask::kWord32SignedMulOverflownBits>() && right_is_constant &&
2345 CanCover(node, shift.left())) {
2346 // Combine this shift with the multiply and shift that would be generated
2347 // by Int32MulHigh.
2348 Arm64OperandGeneratorT g(this);
2349 const WordBinopOp& mul_overflow = lhs.Cast<WordBinopOp>();
2350 int shift_by = constant_right & 0x1F;
2351 InstructionOperand const smull_operand = g.TempRegister();
2352 Emit(kArm64Smull, smull_operand, g.UseRegister(mul_overflow.left()),
2353 g.UseRegister(mul_overflow.right()));
2354 Emit(kArm64Asr, g.DefineAsRegister(node), smull_operand,
2355 g.TempImmediate(32 + shift_by));
2356 return;
2357 }
2358
2359 if (lhs.Is<Opmask::kWord32Add>() && right_is_constant &&
2360 CanCover(node, shift.left())) {
2361 const WordBinopOp& add = Get(shift.left()).Cast<WordBinopOp>();
2362 const Operation& lhs = Get(add.left());
2363 if (lhs.Is<Opmask::kWord32SignedMulOverflownBits>() &&
2364 CanCover(shift.left(), add.left())) {
2365 // Combine the shift that would be generated by Int32MulHigh with the add
2366 // on the left of this Sar operation. We do it here, as the result of the
2367 // add potentially has 33 bits, so we have to ensure the result is
2368 // truncated by being the input to this 32-bit Sar operation.
2369 Arm64OperandGeneratorT g(this);
2370 const WordBinopOp& mul = lhs.Cast<WordBinopOp>();
2371
2372 InstructionOperand const smull_operand = g.TempRegister();
2373 Emit(kArm64Smull, smull_operand, g.UseRegister(mul.left()),
2374 g.UseRegister(mul.right()));
2375
2376 InstructionOperand const add_operand = g.TempRegister();
2377 Emit(kArm64Add | AddressingModeField::encode(kMode_Operand2_R_ASR_I),
2378 add_operand, g.UseRegister(add.right()), smull_operand,
2379 g.TempImmediate(32));
2380
2381 Emit(kArm64Asr32, g.DefineAsRegister(node), add_operand,
2382 g.UseImmediate(shift.right()));
2383 return;
2384 }
2385 }
2386
2387 VisitRRO(this, kArm64Asr32, node, kShift32Imm);
2388}
2389
2390void InstructionSelectorT::VisitWord64Sar(OpIndex node) {
2391 if (TryEmitExtendingLoad(this, node)) return;
2392
2393 // Select Sbfx(x, imm, 32-imm) for Word64Sar(ChangeInt32ToInt64(x), imm)
2394 // where possible
2395 const ShiftOp& shiftop = Get(node).Cast<ShiftOp>();
2396 const Operation& lhs = Get(shiftop.left());
2397
2398 int64_t constant_rhs;
2399 if (lhs.Is<Opmask::kChangeInt32ToInt64>() &&
2400 MatchIntegralWord64Constant(shiftop.right(), &constant_rhs) &&
2401 is_uint5(constant_rhs) && CanCover(node, shiftop.left())) {
2402 // Don't select Sbfx here if Asr(Ldrsw(x), imm) can be selected for
2403 // Word64Sar(ChangeInt32ToInt64(Load(x)), imm)
2404 OpIndex input = lhs.Cast<ChangeOp>().input();
2405 if (!Get(input).Is<LoadOp>() || !CanCover(shiftop.left(), input)) {
2406 Arm64OperandGeneratorT g(this);
2407 int right = static_cast<int>(constant_rhs);
2408 Emit(kArm64Sbfx, g.DefineAsRegister(node), g.UseRegister(input),
2409 g.UseImmediate(right), g.UseImmediate(32 - right));
2410 return;
2411 }
2412 }
2413
2414 VisitRRO(this, kArm64Asr, node, kShift64Imm);
2415}
2416
2417void InstructionSelectorT::VisitWord32Rol(OpIndex node) { UNREACHABLE(); }
2418
2419void InstructionSelectorT::VisitWord64Rol(OpIndex node) { UNREACHABLE(); }
2420
2421void InstructionSelectorT::VisitWord32Ror(OpIndex node) {
2422 VisitRRO(this, kArm64Ror32, node, kShift32Imm);
2423}
2424
2425void InstructionSelectorT::VisitWord64Ror(OpIndex node) {
2426 VisitRRO(this, kArm64Ror, node, kShift64Imm);
2427}
2428
2429#define RR_OP_T_LIST(V) \
2430 V(Float32Sqrt, kArm64Float32Sqrt) \
2431 V(Float64Sqrt, kArm64Float64Sqrt) \
2432 V(Float32RoundDown, kArm64Float32RoundDown) \
2433 V(Float64RoundDown, kArm64Float64RoundDown) \
2434 V(Float32RoundUp, kArm64Float32RoundUp) \
2435 V(Float64RoundUp, kArm64Float64RoundUp) \
2436 V(Float32RoundTruncate, kArm64Float32RoundTruncate) \
2437 V(Float64RoundTruncate, kArm64Float64RoundTruncate) \
2438 V(Float64RoundTiesAway, kArm64Float64RoundTiesAway) \
2439 V(Float32RoundTiesEven, kArm64Float32RoundTiesEven) \
2440 V(Float64RoundTiesEven, kArm64Float64RoundTiesEven) \
2441 V(Float64SilenceNaN, kArm64Float64SilenceNaN) \
2442 V(ChangeInt32ToFloat64, kArm64Int32ToFloat64) \
2443 V(RoundFloat64ToInt32, kArm64Float64ToInt32) \
2444 V(ChangeFloat32ToFloat64, kArm64Float32ToFloat64) \
2445 V(RoundInt32ToFloat32, kArm64Int32ToFloat32) \
2446 V(RoundUint32ToFloat32, kArm64Uint32ToFloat32) \
2447 V(ChangeInt64ToFloat64, kArm64Int64ToFloat64) \
2448 V(ChangeUint32ToFloat64, kArm64Uint32ToFloat64) \
2449 V(ChangeFloat64ToInt32, kArm64Float64ToInt32) \
2450 V(ChangeFloat64ToInt64, kArm64Float64ToInt64) \
2451 V(ChangeFloat64ToUint32, kArm64Float64ToUint32) \
2452 V(ChangeFloat64ToUint64, kArm64Float64ToUint64) \
2453 V(RoundInt64ToFloat32, kArm64Int64ToFloat32) \
2454 V(RoundInt64ToFloat64, kArm64Int64ToFloat64) \
2455 V(RoundUint64ToFloat32, kArm64Uint64ToFloat32) \
2456 V(RoundUint64ToFloat64, kArm64Uint64ToFloat64) \
2457 V(BitcastFloat32ToInt32, kArm64Float64ExtractLowWord32) \
2458 V(BitcastFloat64ToInt64, kArm64U64MoveFloat64) \
2459 V(BitcastInt32ToFloat32, kArm64Float64MoveU64) \
2460 V(BitcastInt64ToFloat64, kArm64Float64MoveU64) \
2461 V(TruncateFloat64ToFloat32, kArm64Float64ToFloat32) \
2462 V(TruncateFloat64ToWord32, kArchTruncateDoubleToI) \
2463 V(TruncateFloat64ToUint32, kArm64Float64ToUint32) \
2464 V(Float64ExtractLowWord32, kArm64Float64ExtractLowWord32) \
2465 V(Float64ExtractHighWord32, kArm64Float64ExtractHighWord32) \
2466 V(Word64Clz, kArm64Clz) \
2467 V(Word32Clz, kArm64Clz32) \
2468 V(Word32Popcnt, kArm64Cnt32) \
2469 V(Word64Popcnt, kArm64Cnt64) \
2470 V(Word32ReverseBits, kArm64Rbit32) \
2471 V(Word64ReverseBits, kArm64Rbit) \
2472 V(Word32ReverseBytes, kArm64Rev32) \
2473 V(Word64ReverseBytes, kArm64Rev) \
2474 IF_WASM(V, F16x8Ceil, kArm64Float16RoundUp) \
2475 IF_WASM(V, F16x8Floor, kArm64Float16RoundDown) \
2476 IF_WASM(V, F16x8Trunc, kArm64Float16RoundTruncate) \
2477 IF_WASM(V, F16x8NearestInt, kArm64Float16RoundTiesEven) \
2478 IF_WASM(V, F32x4Ceil, kArm64Float32RoundUp) \
2479 IF_WASM(V, F32x4Floor, kArm64Float32RoundDown) \
2480 IF_WASM(V, F32x4Trunc, kArm64Float32RoundTruncate) \
2481 IF_WASM(V, F32x4NearestInt, kArm64Float32RoundTiesEven) \
2482 IF_WASM(V, F64x2Ceil, kArm64Float64RoundUp) \
2483 IF_WASM(V, F64x2Floor, kArm64Float64RoundDown) \
2484 IF_WASM(V, F64x2Trunc, kArm64Float64RoundTruncate) \
2485 IF_WASM(V, F64x2NearestInt, kArm64Float64RoundTiesEven)
2486
2487#define RRR_OP_T_LIST(V) \
2488 V(Int32Div, kArm64Idiv32) \
2489 V(Int64Div, kArm64Idiv) \
2490 V(Uint32Div, kArm64Udiv32) \
2491 V(Uint64Div, kArm64Udiv) \
2492 V(Int32Mod, kArm64Imod32) \
2493 V(Int64Mod, kArm64Imod) \
2494 V(Uint32Mod, kArm64Umod32) \
2495 V(Uint64Mod, kArm64Umod) \
2496 V(Float32Add, kArm64Float32Add) \
2497 V(Float64Add, kArm64Float64Add) \
2498 V(Float32Sub, kArm64Float32Sub) \
2499 V(Float64Sub, kArm64Float64Sub) \
2500 V(Float32Div, kArm64Float32Div) \
2501 V(Float64Div, kArm64Float64Div) \
2502 V(Float32Max, kArm64Float32Max) \
2503 V(Float64Max, kArm64Float64Max) \
2504 V(Float32Min, kArm64Float32Min) \
2505 V(Float64Min, kArm64Float64Min) \
2506 IF_WASM(V, I8x16Swizzle, kArm64I8x16Swizzle)
2507
2508#define RR_VISITOR(Name, opcode) \
2509 void InstructionSelectorT::Visit##Name(OpIndex node) { \
2510 VisitRR(this, opcode, node); \
2511 }
2513#undef RR_VISITOR
2514#undef RR_OP_T_LIST
2515
2516#define RRR_VISITOR(Name, opcode) \
2517 void InstructionSelectorT::Visit##Name(OpIndex node) { \
2518 VisitRRR(this, opcode, node); \
2519 }
2521#undef RRR_VISITOR
2522#undef RRR_OP_T_LIST
2523
2524void InstructionSelectorT::VisitWord32Ctz(OpIndex node) { UNREACHABLE(); }
2525
2526void InstructionSelectorT::VisitWord64Ctz(OpIndex node) { UNREACHABLE(); }
2527
2528void InstructionSelectorT::VisitInt32Add(OpIndex node) {
2529 const WordBinopOp& add = this->Get(node).Cast<WordBinopOp>();
2530 DCHECK(add.Is<Opmask::kWord32Add>());
2531 V<Word32> left = add.left<Word32>();
2532 V<Word32> right = add.right<Word32>();
2533 // Select Madd(x, y, z) for Add(Mul(x, y), z) or Add(z, Mul(x, y)).
2534 if (TryEmitMultiplyAddInt32(this, node, left, right) ||
2535 TryEmitMultiplyAddInt32(this, node, right, left)) {
2536 return;
2537 }
2538 VisitAddSub(this, node, kArm64Add32, kArm64Sub32);
2539}
2540
2541void InstructionSelectorT::VisitInt64Add(OpIndex node) {
2542 const WordBinopOp& add = this->Get(node).Cast<WordBinopOp>();
2543 DCHECK(add.Is<Opmask::kWord64Add>());
2544 V<Word64> left = add.left<Word64>();
2545 V<Word64> right = add.right<Word64>();
2546 // Select Madd(x, y, z) for Add(Mul(x, y), z) or Add(z, Mul(x, y)).
2547 if (TryEmitMultiplyAddInt64(this, node, left, right) ||
2548 TryEmitMultiplyAddInt64(this, node, right, left)) {
2549 return;
2550 }
2551 VisitAddSub(this, node, kArm64Add, kArm64Sub);
2552}
2553
2554void InstructionSelectorT::VisitInt32Sub(OpIndex node) {
2555 DCHECK(this->Get(node).Is<Opmask::kWord32Sub>());
2556
2557 // Select Msub(x, y, a) for Sub(a, Mul(x, y)).
2558 if (TryEmitMultiplySub<Opmask::kWord32Mul>(this, node, kArm64Msub32)) {
2559 return;
2560 }
2561
2562 VisitAddSub(this, node, kArm64Sub32, kArm64Add32);
2563}
2564
2565void InstructionSelectorT::VisitInt64Sub(OpIndex node) {
2566 DCHECK(this->Get(node).Is<Opmask::kWord64Sub>());
2567
2568 // Select Msub(x, y, a) for Sub(a, Mul(x, y)).
2569 if (TryEmitMultiplySub<Opmask::kWord64Mul>(this, node, kArm64Msub)) {
2570 return;
2571 }
2572
2573 VisitAddSub(this, node, kArm64Sub, kArm64Add);
2574}
2575
2576namespace {
2577
2578void EmitInt32MulWithOverflow(InstructionSelectorT* selector, OpIndex node,
2579 FlagsContinuationT* cont) {
2580 Arm64OperandGeneratorT g(selector);
2581 const OverflowCheckedBinopOp& mul =
2582 selector->Get(node).Cast<OverflowCheckedBinopOp>();
2583 InstructionOperand result = g.DefineAsRegister(node);
2584 InstructionOperand left = g.UseRegister(mul.left());
2585
2586 int32_t constant_rhs;
2587 if (selector->MatchIntegralWord32Constant(mul.right(), &constant_rhs) &&
2588 base::bits::IsPowerOfTwo(constant_rhs)) {
2589 // Sign extend the bottom 32 bits and shift left.
2590 int32_t shift = base::bits::WhichPowerOfTwo(constant_rhs);
2591 selector->Emit(kArm64Sbfiz, result, left, g.TempImmediate(shift),
2592 g.TempImmediate(32));
2593 } else {
2594 InstructionOperand right = g.UseRegister(mul.right());
2595 selector->Emit(kArm64Smull, result, left, right);
2596 }
2597
2598 InstructionCode opcode =
2599 kArm64Cmp | AddressingModeField::encode(kMode_Operand2_R_SXTW);
2600 selector->EmitWithContinuation(opcode, result, result, cont);
2601}
2602
2603void EmitInt64MulWithOverflow(InstructionSelectorT* selector, OpIndex node,
2604 FlagsContinuationT* cont) {
2605 Arm64OperandGeneratorT g(selector);
2606 InstructionOperand result = g.DefineAsRegister(node);
2607 InstructionOperand left = g.UseRegister(selector->input_at(node, 0));
2608 InstructionOperand high = g.TempRegister();
2609
2610 InstructionOperand right = g.UseRegister(selector->input_at(node, 1));
2611 selector->Emit(kArm64Mul, result, left, right);
2612 selector->Emit(kArm64Smulh, high, left, right);
2613
2614 // Test whether {high} is a sign-extension of {result}.
2615 InstructionCode opcode =
2616 kArm64Cmp | AddressingModeField::encode(kMode_Operand2_R_ASR_I);
2617 selector->EmitWithContinuation(opcode, high, result, g.TempImmediate(63),
2618 cont);
2619}
2620
2621} // namespace
2622
2623void InstructionSelectorT::VisitInt32Mul(OpIndex node) {
2624 Arm64OperandGeneratorT g(this);
2625 const WordBinopOp& mul = Get(node).Cast<WordBinopOp>();
2626 DCHECK(mul.Is<Opmask::kWord32Mul>());
2627
2628 // First, try to reduce the multiplication to addition with left shift.
2629 // x * (2^k + 1) -> x + (x << k)
2630 int32_t shift = LeftShiftForReducedMultiply(this, mul.right());
2631 if (shift > 0) {
2632 Emit(kArm64Add32 | AddressingModeField::encode(kMode_Operand2_R_LSL_I),
2633 g.DefineAsRegister(node), g.UseRegister(mul.left()),
2634 g.UseRegister(mul.left()), g.TempImmediate(shift));
2635 return;
2636 }
2637
2638 // Select Mneg(x, y) for Mul(Sub(0, x), y) or Mul(y, Sub(0, x)).
2639 if (TryEmitMultiplyNegateInt32(this, node, mul.left(), mul.right()) ||
2640 TryEmitMultiplyNegateInt32(this, node, mul.right(), mul.left())) {
2641 return;
2642 }
2643
2644 VisitRRR(this, kArm64Mul32, node);
2645}
2646
2647void InstructionSelectorT::VisitInt64Mul(OpIndex node) {
2648 Arm64OperandGeneratorT g(this);
2649 const WordBinopOp& mul = Get(node).Cast<WordBinopOp>();
2650 DCHECK(mul.Is<Opmask::kWord64Mul>());
2651
2652 // First, try to reduce the multiplication to addition with left shift.
2653 // x * (2^k + 1) -> x + (x << k)
2654 int32_t shift = LeftShiftForReducedMultiply(this, mul.right());
2655 if (shift > 0) {
2656 Emit(kArm64Add | AddressingModeField::encode(kMode_Operand2_R_LSL_I),
2657 g.DefineAsRegister(node), g.UseRegister(mul.left()),
2658 g.UseRegister(mul.left()), g.TempImmediate(shift));
2659 return;
2660 }
2661
2662 // Select Mneg(x, y) for Mul(Sub(0, x), y) or Mul(y, Sub(0, x)).
2663 if (TryEmitMultiplyNegateInt64(this, node, mul.left(), mul.right()) ||
2664 TryEmitMultiplyNegateInt64(this, node, mul.right(), mul.left())) {
2665 return;
2666 }
2667
2668 VisitRRR(this, kArm64Mul, node);
2669}
2670
2671#if V8_ENABLE_WEBASSEMBLY
2672namespace {
2673void VisitExtMul(InstructionSelectorT* selector, ArchOpcode opcode,
2674 OpIndex node, int dst_lane_size) {
2675 InstructionCode code = opcode;
2676 code |= LaneSizeField::encode(dst_lane_size);
2677 VisitRRR(selector, code, node);
2678}
2679} // namespace
2680
2681void InstructionSelectorT::VisitI16x8ExtMulLowI8x16S(OpIndex node) {
2682 VisitExtMul(this, kArm64Smull, node, 16);
2683}
2684
2685void InstructionSelectorT::VisitI16x8ExtMulHighI8x16S(OpIndex node) {
2686 VisitExtMul(this, kArm64Smull2, node, 16);
2687}
2688
2689void InstructionSelectorT::VisitI16x8ExtMulLowI8x16U(OpIndex node) {
2690 VisitExtMul(this, kArm64Umull, node, 16);
2691}
2692
2693void InstructionSelectorT::VisitI16x8ExtMulHighI8x16U(OpIndex node) {
2694 VisitExtMul(this, kArm64Umull2, node, 16);
2695}
2696
2697void InstructionSelectorT::VisitI32x4ExtMulLowI16x8S(OpIndex node) {
2698 VisitExtMul(this, kArm64Smull, node, 32);
2699}
2700
2701void InstructionSelectorT::VisitI32x4ExtMulHighI16x8S(OpIndex node) {
2702 VisitExtMul(this, kArm64Smull2, node, 32);
2703}
2704
2705void InstructionSelectorT::VisitI32x4ExtMulLowI16x8U(OpIndex node) {
2706 VisitExtMul(this, kArm64Umull, node, 32);
2707}
2708
2709void InstructionSelectorT::VisitI32x4ExtMulHighI16x8U(OpIndex node) {
2710 VisitExtMul(this, kArm64Umull2, node, 32);
2711}
2712
2713void InstructionSelectorT::VisitI64x2ExtMulLowI32x4S(OpIndex node) {
2714 VisitExtMul(this, kArm64Smull, node, 64);
2715}
2716
2717void InstructionSelectorT::VisitI64x2ExtMulHighI32x4S(OpIndex node) {
2718 VisitExtMul(this, kArm64Smull2, node, 64);
2719}
2720
2721void InstructionSelectorT::VisitI64x2ExtMulLowI32x4U(OpIndex node) {
2722 VisitExtMul(this, kArm64Umull, node, 64);
2723}
2724
2725void InstructionSelectorT::VisitI64x2ExtMulHighI32x4U(OpIndex node) {
2726 VisitExtMul(this, kArm64Umull2, node, 64);
2727}
2728#endif // V8_ENABLE_WEBASSEMBLY
2729
2730#if V8_ENABLE_WEBASSEMBLY
2731namespace {
2732void VisitExtAddPairwise(InstructionSelectorT* selector, ArchOpcode opcode,
2733 OpIndex node, int dst_lane_size) {
2734 InstructionCode code = opcode;
2735 code |= LaneSizeField::encode(dst_lane_size);
2736 VisitRR(selector, code, node);
2737}
2738} // namespace
2739
2740void InstructionSelectorT::VisitI32x4ExtAddPairwiseI16x8S(OpIndex node) {
2741 VisitExtAddPairwise(this, kArm64Saddlp, node, 32);
2742}
2743
2744void InstructionSelectorT::VisitI32x4ExtAddPairwiseI16x8U(OpIndex node) {
2745 VisitExtAddPairwise(this, kArm64Uaddlp, node, 32);
2746}
2747
2748void InstructionSelectorT::VisitI16x8ExtAddPairwiseI8x16S(OpIndex node) {
2749 VisitExtAddPairwise(this, kArm64Saddlp, node, 16);
2750}
2751
2752void InstructionSelectorT::VisitI16x8ExtAddPairwiseI8x16U(OpIndex node) {
2753 VisitExtAddPairwise(this, kArm64Uaddlp, node, 16);
2754}
2755#endif // V8_ENABLE_WEBASSEMBLY
2756
2757void InstructionSelectorT::VisitInt32MulHigh(OpIndex node) {
2758 Arm64OperandGeneratorT g(this);
2759 InstructionOperand const smull_operand = g.TempRegister();
2760 Emit(kArm64Smull, smull_operand, g.UseRegister(this->input_at(node, 0)),
2761 g.UseRegister(this->input_at(node, 1)));
2762 Emit(kArm64Asr, g.DefineAsRegister(node), smull_operand, g.TempImmediate(32));
2763}
2764
2765void InstructionSelectorT::VisitInt64MulHigh(OpIndex node) {
2766 return VisitRRR(this, kArm64Smulh, node);
2767}
2768
2769void InstructionSelectorT::VisitUint32MulHigh(OpIndex node) {
2770 Arm64OperandGeneratorT g(this);
2771 InstructionOperand const smull_operand = g.TempRegister();
2772 Emit(kArm64Umull, smull_operand, g.UseRegister(this->input_at(node, 0)),
2773 g.UseRegister(this->input_at(node, 1)));
2774 Emit(kArm64Lsr, g.DefineAsRegister(node), smull_operand, g.TempImmediate(32));
2775}
2776
2777void InstructionSelectorT::VisitUint64MulHigh(OpIndex node) {
2778 return VisitRRR(this, kArm64Umulh, node);
2779}
2780
2781void InstructionSelectorT::VisitTruncateFloat32ToInt32(OpIndex node) {
2782 Arm64OperandGeneratorT g(this);
2783 const Operation& op = this->Get(node);
2784 InstructionCode opcode = kArm64Float32ToInt32;
2785 opcode |=
2786 MiscField::encode(op.Is<Opmask::kTruncateFloat32ToInt32OverflowToMin>());
2787 Emit(opcode, g.DefineAsRegister(node),
2788 g.UseRegister(this->input_at(node, 0)));
2789}
2790
2791void InstructionSelectorT::VisitTruncateFloat32ToUint32(OpIndex node) {
2792 Arm64OperandGeneratorT g(this);
2793 const Operation& op = this->Get(node);
2794 InstructionCode opcode = kArm64Float32ToUint32;
2795 if (op.Is<Opmask::kTruncateFloat32ToUint32OverflowToMin>()) {
2796 opcode |= MiscField::encode(true);
2797 }
2798
2799 Emit(opcode, g.DefineAsRegister(node),
2800 g.UseRegister(this->input_at(node, 0)));
2801}
2802
2803void InstructionSelectorT::VisitTryTruncateFloat32ToInt64(OpIndex node) {
2804 Arm64OperandGeneratorT g(this);
2805
2806 InstructionOperand inputs[] = {g.UseRegister(this->input_at(node, 0))};
2807 InstructionOperand outputs[2];
2808 size_t output_count = 0;
2809 outputs[output_count++] = g.DefineAsRegister(node);
2810
2811 OptionalOpIndex success_output = FindProjection(node, 1);
2812 if (success_output.valid()) {
2813 outputs[output_count++] = g.DefineAsRegister(success_output.value());
2814 }
2815
2816 Emit(kArm64Float32ToInt64, output_count, outputs, 1, inputs);
2817}
2818
2819void InstructionSelectorT::VisitTruncateFloat64ToInt64(OpIndex node) {
2820 Arm64OperandGeneratorT g(this);
2821 InstructionCode opcode = kArm64Float64ToInt64;
2822 const Operation& op = this->Get(node);
2823 if (op.Is<Opmask::kTruncateFloat64ToInt64OverflowToMin>()) {
2824 opcode |= MiscField::encode(true);
2825 }
2826
2827 Emit(opcode, g.DefineAsRegister(node), g.UseRegister(op.input(0)));
2828}
2829
2830void InstructionSelectorT::VisitTryTruncateFloat64ToInt64(OpIndex node) {
2831 Arm64OperandGeneratorT g(this);
2832
2833 InstructionOperand inputs[] = {g.UseRegister(this->input_at(node, 0))};
2834 InstructionOperand outputs[2];
2835 size_t output_count = 0;
2836 outputs[output_count++] = g.DefineAsRegister(node);
2837
2838 OptionalOpIndex success_output = FindProjection(node, 1);
2839 if (success_output.valid()) {
2840 outputs[output_count++] = g.DefineAsRegister(success_output.value());
2841 }
2842
2843 Emit(kArm64Float64ToInt64, output_count, outputs, 1, inputs);
2844}
2845
2846void InstructionSelectorT::VisitTruncateFloat64ToFloat16RawBits(OpIndex node) {
2847 Arm64OperandGeneratorT g(this);
2848 InstructionOperand inputs[] = {g.UseRegister(this->input_at(node, 0))};
2849 InstructionOperand outputs[] = {g.DefineAsRegister(node)};
2850 InstructionOperand temps[] = {g.TempDoubleRegister()};
2851 Emit(kArm64Float64ToFloat16RawBits, arraysize(outputs), outputs,
2852 arraysize(inputs), inputs, arraysize(temps), temps);
2853}
2854
2855void InstructionSelectorT::VisitChangeFloat16RawBitsToFloat64(OpIndex node) {
2856 Arm64OperandGeneratorT g(this);
2857 InstructionOperand inputs[] = {g.UseRegister(this->input_at(node, 0))};
2858 InstructionOperand outputs[] = {g.DefineAsRegister(node)};
2859 InstructionOperand temps[] = {g.TempDoubleRegister()};
2860 Emit(kArm64Float16RawBitsToFloat64, arraysize(outputs), outputs,
2861 arraysize(inputs), inputs, arraysize(temps), temps);
2862}
2863
2864void InstructionSelectorT::VisitTryTruncateFloat32ToUint64(OpIndex node) {
2865 Arm64OperandGeneratorT g(this);
2866
2867 InstructionOperand inputs[] = {g.UseRegister(this->input_at(node, 0))};
2868 InstructionOperand outputs[2];
2869 size_t output_count = 0;
2870 outputs[output_count++] = g.DefineAsRegister(node);
2871
2872 OptionalOpIndex success_output = FindProjection(node, 1);
2873 if (success_output.valid()) {
2874 outputs[output_count++] = g.DefineAsRegister(success_output.value());
2875 }
2876
2877 Emit(kArm64Float32ToUint64, output_count, outputs, 1, inputs);
2878}
2879
2880void InstructionSelectorT::VisitTryTruncateFloat64ToUint64(OpIndex node) {
2881 Arm64OperandGeneratorT g(this);
2882
2883 InstructionOperand inputs[] = {g.UseRegister(this->input_at(node, 0))};
2884 InstructionOperand outputs[2];
2885 size_t output_count = 0;
2886 outputs[output_count++] = g.DefineAsRegister(node);
2887
2888 OptionalOpIndex success_output = FindProjection(node, 1);
2889 if (success_output.valid()) {
2890 outputs[output_count++] = g.DefineAsRegister(success_output.value());
2891 }
2892
2893 Emit(kArm64Float64ToUint64, output_count, outputs, 1, inputs);
2894}
2895
2896void InstructionSelectorT::VisitTryTruncateFloat64ToInt32(OpIndex node) {
2897 Arm64OperandGeneratorT g(this);
2898 InstructionOperand inputs[] = {g.UseRegister(this->input_at(node, 0))};
2899 InstructionOperand outputs[2];
2900 size_t output_count = 0;
2901 outputs[output_count++] = g.DefineAsRegister(node);
2902
2903 OptionalOpIndex success_output = FindProjection(node, 1);
2904 if (success_output.valid()) {
2905 outputs[output_count++] = g.DefineAsRegister(success_output.value());
2906 }
2907
2908 Emit(kArm64Float64ToInt32, output_count, outputs, 1, inputs);
2909}
2910
2911void InstructionSelectorT::VisitTryTruncateFloat64ToUint32(OpIndex node) {
2912 Arm64OperandGeneratorT g(this);
2913 InstructionOperand inputs[] = {g.UseRegister(this->input_at(node, 0))};
2914 InstructionOperand outputs[2];
2915 size_t output_count = 0;
2916 outputs[output_count++] = g.DefineAsRegister(node);
2917
2918 OptionalOpIndex success_output = FindProjection(node, 1);
2919 if (success_output.valid()) {
2920 outputs[output_count++] = g.DefineAsRegister(success_output.value());
2921 }
2922
2923 Emit(kArm64Float64ToUint32, output_count, outputs, 1, inputs);
2924}
2925
2926void InstructionSelectorT::VisitBitcastWord32ToWord64(OpIndex node) {
2929 EmitIdentity(node);
2930}
2931
2932void InstructionSelectorT::VisitChangeInt32ToInt64(OpIndex node) {
2933 const ChangeOp& change_op = this->Get(node).template Cast<ChangeOp>();
2934 const Operation& input_op = this->Get(change_op.input());
2935 if (input_op.Is<LoadOp>() && CanCover(node, change_op.input())) {
2936 // Generate sign-extending load.
2937 LoadRepresentation load_rep =
2938 this->load_view(change_op.input()).loaded_rep();
2939 MachineRepresentation rep = load_rep.representation();
2940 InstructionCode opcode = kArchNop;
2941 ImmediateMode immediate_mode = kNoImmediate;
2942 switch (rep) {
2943 case MachineRepresentation::kBit: // Fall through.
2944 case MachineRepresentation::kWord8:
2945 opcode = load_rep.IsSigned() ? kArm64Ldrsb : kArm64Ldrb;
2946 immediate_mode = kLoadStoreImm8;
2947 break;
2948 case MachineRepresentation::kWord16:
2949 opcode = load_rep.IsSigned() ? kArm64Ldrsh : kArm64Ldrh;
2950 immediate_mode = kLoadStoreImm16;
2951 break;
2952 case MachineRepresentation::kWord32:
2953 case MachineRepresentation::kWord64:
2954 // Since BitcastElider may remove nodes of
2955 // IrOpcode::kTruncateInt64ToInt32 and directly use the inputs, values
2956 // with kWord64 can also reach this line.
2957 case MachineRepresentation::kTaggedSigned:
2958 case MachineRepresentation::kTagged:
2959 case MachineRepresentation::kTaggedPointer:
2960 opcode = kArm64Ldrsw;
2961 immediate_mode = kLoadStoreImm32;
2962 break;
2963 default:
2964 UNREACHABLE();
2965 }
2966 EmitLoad(this, change_op.input(), opcode, immediate_mode, rep, node);
2967 return;
2968 }
2969 if ((input_op.Is<Opmask::kWord32ShiftRightArithmetic>() ||
2970 input_op.Is<Opmask::kWord32ShiftRightArithmeticShiftOutZeros>()) &&
2971 CanCover(node, change_op.input())) {
2972 const ShiftOp& sar = input_op.Cast<ShiftOp>();
2973 if (int64_t constant; MatchSignedIntegralConstant(sar.right(), &constant)) {
2974 Arm64OperandGeneratorT g(this);
2975 // Mask the shift amount, to keep the same semantics as Word32Sar.
2976 int right = constant & 0x1F;
2977 Emit(kArm64Sbfx, g.DefineAsRegister(node), g.UseRegister(sar.left()),
2978 g.TempImmediate(right), g.TempImmediate(32 - right));
2979 return;
2980 }
2981 }
2982 VisitRR(this, kArm64Sxtw, node);
2983}
2984
2985bool InstructionSelectorT::ZeroExtendsWord32ToWord64NoPhis(OpIndex node) {
2986 DCHECK(!this->Get(node).Is<PhiOp>());
2987 const Operation& op = this->Get(node);
2988 // 32-bit operations will write their result in a W register (implicitly
2989 // clearing the top 32-bit of the corresponding X register) so the
2990 // zero-extension is a no-op.
2991 switch (op.opcode) {
2992 case Opcode::kWordBinop:
2993 return op.Cast<WordBinopOp>().rep == WordRepresentation::Word32();
2994 case Opcode::kShift:
2995 return op.Cast<ShiftOp>().rep == WordRepresentation::Word32();
2996 case Opcode::kComparison:
2997 return op.Cast<ComparisonOp>().rep == RegisterRepresentation::Word32();
2998 case Opcode::kOverflowCheckedBinop:
2999 return op.Cast<OverflowCheckedBinopOp>().rep ==
3000 WordRepresentation::Word32();
3001 case Opcode::kProjection:
3002 return ZeroExtendsWord32ToWord64NoPhis(op.Cast<ProjectionOp>().input());
3003 case Opcode::kLoad: {
3004 RegisterRepresentation rep =
3005 op.Cast<LoadOp>().loaded_rep.ToRegisterRepresentation();
3006 return rep == RegisterRepresentation::Word32();
3007 }
3008 default:
3009 return false;
3010 }
3011}
3012
3013void InstructionSelectorT::VisitChangeUint32ToUint64(OpIndex node) {
3014 Arm64OperandGeneratorT g(this);
3015 OpIndex value = this->input_at(node, 0);
3016 if (ZeroExtendsWord32ToWord64(value)) {
3017 return EmitIdentity(node);
3018 }
3019 Emit(kArm64Mov32, g.DefineAsRegister(node), g.UseRegister(value));
3020}
3021
3022void InstructionSelectorT::VisitTruncateInt64ToInt32(OpIndex node) {
3023 Arm64OperandGeneratorT g(this);
3024 // The top 32 bits in the 64-bit register will be undefined, and
3025 // must not be used by a dependent node.
3026 EmitIdentity(node);
3027}
3028
3029void InstructionSelectorT::VisitFloat64Mod(OpIndex node) {
3030 Arm64OperandGeneratorT g(this);
3031 Emit(kArm64Float64Mod, g.DefineAsFixed(node, d0),
3032 g.UseFixed(this->input_at(node, 0), d0),
3033 g.UseFixed(this->input_at(node, 1), d1))
3034 ->MarkAsCall();
3035}
3036
3037void InstructionSelectorT::VisitFloat64Ieee754Binop(OpIndex node,
3038 InstructionCode opcode) {
3039 Arm64OperandGeneratorT g(this);
3040 Emit(opcode, g.DefineAsFixed(node, d0),
3041 g.UseFixed(this->input_at(node, 0), d0),
3042 g.UseFixed(this->input_at(node, 1), d1))
3043 ->MarkAsCall();
3044}
3045
3046void InstructionSelectorT::VisitFloat64Ieee754Unop(OpIndex node,
3047 InstructionCode opcode) {
3048 Arm64OperandGeneratorT g(this);
3049 Emit(opcode, g.DefineAsFixed(node, d0),
3050 g.UseFixed(this->input_at(node, 0), d0))
3051 ->MarkAsCall();
3052}
3053
3054void InstructionSelectorT::EmitMoveParamToFPR(OpIndex node, int index) {}
3055
3056void InstructionSelectorT::EmitMoveFPRToParam(InstructionOperand* op,
3057 LinkageLocation location) {}
3058
3059void InstructionSelectorT::EmitPrepareArguments(
3060 ZoneVector<PushParameter>* arguments, const CallDescriptor* call_descriptor,
3061 OpIndex node) {
3062 Arm64OperandGeneratorT g(this);
3063
3064 // `arguments` includes alignment "holes". This means that slots bigger than
3065 // kSystemPointerSize, e.g. Simd128, will span across multiple arguments.
3066 int claim_count = static_cast<int>(arguments->size());
3067 bool needs_padding = claim_count % 2 != 0;
3068 int slot = claim_count - 1;
3069 claim_count = RoundUp(claim_count, 2);
3070 // Bump the stack pointer.
3071 if (claim_count > 0) {
3072 // TODO(titzer): claim and poke probably take small immediates.
3073 // TODO(titzer): it would be better to bump the sp here only
3074 // and emit paired stores with increment for non c frames.
3075 Emit(kArm64Claim, g.NoOutput(), g.TempImmediate(claim_count));
3076
3077 if (needs_padding) {
3078 Emit(kArm64Poke, g.NoOutput(), g.UseImmediate(0),
3079 g.TempImmediate(claim_count - 1));
3080 }
3081 }
3082
3083 // Poke the arguments into the stack.
3084 while (slot >= 0) {
3085 PushParameter input0 = (*arguments)[slot];
3086 // Skip holes in the param array. These represent both extra slots for
3087 // multi-slot values and padding slots for alignment.
3088 if (!input0.node.valid()) {
3089 slot--;
3090 continue;
3091 }
3092 PushParameter input1 = slot > 0 ? (*arguments)[slot - 1] : PushParameter();
3093 // Emit a poke-pair if consecutive parameters have the same type.
3094 // TODO(arm): Support consecutive Simd128 parameters.
3095 if (input1.node.valid() &&
3096 input0.location.GetType() == input1.location.GetType()) {
3097 Emit(kArm64PokePair, g.NoOutput(), g.UseRegister(input0.node),
3098 g.UseRegister(input1.node), g.TempImmediate(slot));
3099 slot -= 2;
3100 } else {
3101 Emit(kArm64Poke, g.NoOutput(), g.UseRegister(input0.node),
3102 g.TempImmediate(slot));
3103 slot--;
3104 }
3105 }
3106}
3107
3108void InstructionSelectorT::EmitPrepareResults(
3109 ZoneVector<PushParameter>* results, const CallDescriptor* call_descriptor,
3110 OpIndex node) {
3111 Arm64OperandGeneratorT g(this);
3112
3113 for (PushParameter output : *results) {
3114 if (!output.location.IsCallerFrameSlot()) continue;
3115 // Skip any alignment holes in nodes.
3116 if (output.node.valid()) {
3117 DCHECK(!call_descriptor->IsCFunctionCall());
3118
3119 if (output.location.GetType() == MachineType::Float32()) {
3120 MarkAsFloat32(output.node);
3121 } else if (output.location.GetType() == MachineType::Float64()) {
3122 MarkAsFloat64(output.node);
3123 } else if (output.location.GetType() == MachineType::Simd128()) {
3124 MarkAsSimd128(output.node);
3125 }
3126
3127 int offset = call_descriptor->GetOffsetToReturns();
3128 int reverse_slot = -output.location.GetLocation() - offset;
3129 Emit(kArm64Peek, g.DefineAsRegister(output.node),
3130 g.UseImmediate(reverse_slot));
3131 }
3132 }
3133}
3134
3135bool InstructionSelectorT::IsTailCallAddressImmediate() { return false; }
3136
3137namespace {
3138
3139// Shared routine for multiple compare operations.
3140void VisitCompare(InstructionSelectorT* selector, InstructionCode opcode,
3141 InstructionOperand left, InstructionOperand right,
3142 FlagsContinuationT* cont) {
3143 if (cont->IsSelect()) {
3144 Arm64OperandGeneratorT g(selector);
3145 InstructionOperand inputs[] = {
3146 left, right, g.UseRegisterOrImmediateZero(cont->true_value()),
3147 g.UseRegisterOrImmediateZero(cont->false_value())};
3148 selector->EmitWithContinuation(opcode, 0, nullptr, 4, inputs, cont);
3149 } else {
3150 selector->EmitWithContinuation(opcode, left, right, cont);
3151 }
3152}
3153
3154// This function checks whether we can convert:
3155// ((a <op> b) cmp 0), b.<cond>
3156// to:
3157// (a <ops> b), b.<cond'>
3158// where <ops> is the flag setting version of <op>.
3159// We only generate conditions <cond'> that are a combination of the N
3160// and Z flags. This avoids the need to make this function dependent on
3161// the flag-setting operation.
3162bool CanUseFlagSettingBinop(FlagsCondition cond) {
3163 switch (cond) {
3164 case kEqual:
3165 case kNotEqual:
3166 case kSignedLessThan:
3168 case kUnsignedLessThanOrEqual: // x <= 0 -> x == 0
3169 case kUnsignedGreaterThan: // x > 0 -> x != 0
3170 return true;
3171 default:
3172 return false;
3173 }
3174}
3175
3176// Map <cond> to <cond'> so that the following transformation is possible:
3177// ((a <op> b) cmp 0), b.<cond>
3178// to:
3179// (a <ops> b), b.<cond'>
3180// where <ops> is the flag setting version of <op>.
3181FlagsCondition MapForFlagSettingBinop(FlagsCondition cond) {
3182 DCHECK(CanUseFlagSettingBinop(cond));
3183 switch (cond) {
3184 case kEqual:
3185 case kNotEqual:
3186 return cond;
3187 case kSignedLessThan:
3188 return kNegative;
3190 return kPositiveOrZero;
3191 case kUnsignedLessThanOrEqual: // x <= 0 -> x == 0
3192 return kEqual;
3193 case kUnsignedGreaterThan: // x > 0 -> x != 0
3194 return kNotEqual;
3195 default:
3196 UNREACHABLE();
3197 }
3198}
3199
3200// This function checks if we can perform the transformation:
3201// ((a <op> b) cmp 0), b.<cond>
3202// to:
3203// (a <ops> b), b.<cond'>
3204// where <ops> is the flag setting version of <op>, and if so,
3205// updates {node}, {opcode} and {cont} accordingly.
3206void MaybeReplaceCmpZeroWithFlagSettingBinop(InstructionSelectorT* selector,
3207 OpIndex* node, OpIndex binop,
3208 ArchOpcode* opcode,
3209 FlagsCondition cond,
3210 FlagsContinuationT* cont,
3211 ImmediateMode* immediate_mode) {
3212 ArchOpcode binop_opcode;
3213 ArchOpcode no_output_opcode;
3214 ImmediateMode binop_immediate_mode;
3215 const Operation& op = selector->Get(binop);
3216 if (op.Is<Opmask::kWord32Add>()) {
3217 binop_opcode = kArm64Add32;
3218 no_output_opcode = kArm64Cmn32;
3219 binop_immediate_mode = kArithmeticImm;
3220 } else if (op.Is<Opmask::kWord32BitwiseAnd>()) {
3221 binop_opcode = kArm64And32;
3222 no_output_opcode = kArm64Tst32;
3223 binop_immediate_mode = kLogical32Imm;
3224 } else {
3225 UNREACHABLE();
3226 }
3227 if (selector->CanCover(*node, binop)) {
3228 // The comparison is the only user of the add or and, so we can generate
3229 // a cmn or tst instead.
3230 cont->Overwrite(MapForFlagSettingBinop(cond));
3231 *opcode = no_output_opcode;
3232 *node = binop;
3233 *immediate_mode = binop_immediate_mode;
3234 } else if (selector->IsOnlyUserOfNodeInSameBlock(*node, binop)) {
3235 // We can also handle the case where the add and the compare are in the
3236 // same basic block, and the compare is the only use of add in this basic
3237 // block (the add has users in other basic blocks).
3238 cont->Overwrite(MapForFlagSettingBinop(cond));
3239 *opcode = binop_opcode;
3240 *node = binop;
3241 *immediate_mode = binop_immediate_mode;
3242 }
3243}
3244
3245// Map {cond} to kEqual or kNotEqual, so that we can select
3246// either TBZ or TBNZ when generating code for:
3247// (x cmp 0), b.{cond}
3248FlagsCondition MapForTbz(FlagsCondition cond) {
3249 switch (cond) {
3250 case kSignedLessThan: // generate TBNZ
3251 return kNotEqual;
3252 case kSignedGreaterThanOrEqual: // generate TBZ
3253 return kEqual;
3254 default:
3255 UNREACHABLE();
3256 }
3257}
3258
3259// Map {cond} to kEqual or kNotEqual, so that we can select
3260// either CBZ or CBNZ when generating code for:
3261// (x cmp 0), b.{cond}
3262FlagsCondition MapForCbz(FlagsCondition cond) {
3263 switch (cond) {
3264 case kEqual: // generate CBZ
3265 case kNotEqual: // generate CBNZ
3266 return cond;
3267 case kUnsignedLessThanOrEqual: // generate CBZ
3268 return kEqual;
3269 case kUnsignedGreaterThan: // generate CBNZ
3270 return kNotEqual;
3271 default:
3272 UNREACHABLE();
3273 }
3274}
3275
3276void EmitBranchOrDeoptimize(InstructionSelectorT* selector,
3277 InstructionCode opcode, InstructionOperand value,
3278 FlagsContinuationT* cont) {
3279 DCHECK(cont->IsBranch() || cont->IsDeoptimize());
3280 selector->EmitWithContinuation(opcode, value, cont);
3281}
3282
3283template <int N>
3284struct CbzOrTbzMatchTrait {};
3285
3286template <>
3287struct CbzOrTbzMatchTrait<32> {
3288 using IntegralType = uint32_t;
3289 using BinopMatcher = Int32BinopMatcher;
3290 static constexpr ArchOpcode kTestAndBranchOpcode = kArm64TestAndBranch32;
3291 static constexpr ArchOpcode kCompareAndBranchOpcode =
3292 kArm64CompareAndBranch32;
3293 static constexpr unsigned kSignBit = kWSignBit;
3294};
3295
3296template <>
3297struct CbzOrTbzMatchTrait<64> {
3298 using IntegralType = uint64_t;
3299 using BinopMatcher = Int64BinopMatcher;
3300 static constexpr ArchOpcode kTestAndBranchOpcode = kArm64TestAndBranch;
3301 static constexpr ArchOpcode kCompareAndBranchOpcode = kArm64CompareAndBranch;
3302 static constexpr unsigned kSignBit = kXSignBit;
3303};
3304
3305// Try to emit TBZ, TBNZ, CBZ or CBNZ for certain comparisons of {node}
3306// against {value}, depending on the condition.
3307template <int N>
3308bool TryEmitCbzOrTbz(InstructionSelectorT* selector, OpIndex node,
3309 typename CbzOrTbzMatchTrait<N>::IntegralType value,
3310 OpIndex user, FlagsCondition cond,
3311 FlagsContinuationT* cont) {
3312 // Only handle branches and deoptimisations.
3313 if (!cont->IsBranch() && !cont->IsDeoptimize()) return false;
3314
3315 switch (cond) {
3316 case kSignedLessThan:
3318 // Here we handle sign tests, aka. comparisons with zero.
3319 if (value != 0) return false;
3320 // We don't generate TBZ/TBNZ for deoptimisations, as they have a
3321 // shorter range than conditional branches and generating them for
3322 // deoptimisations results in more veneers.
3323 if (cont->IsDeoptimize()) return false;
3324 Arm64OperandGeneratorT g(selector);
3325 cont->Overwrite(MapForTbz(cond));
3326
3327 if (N == 32) {
3328 const Operation& op = selector->Get(node);
3329 if (op.Is<Opmask::kFloat64ExtractHighWord32>() &&
3330 selector->CanCover(user, node)) {
3331 // SignedLessThan(Float64ExtractHighWord32(x), 0) and
3332 // SignedGreaterThanOrEqual(Float64ExtractHighWord32(x), 0)
3333 // essentially check the sign bit of a 64-bit floating point value.
3334 InstructionOperand temp = g.TempRegister();
3335 selector->Emit(kArm64U64MoveFloat64, temp,
3336 g.UseRegister(selector->input_at(node, 0)));
3337 selector->EmitWithContinuation(kArm64TestAndBranch, temp,
3338 g.TempImmediate(kDSignBit), cont);
3339 return true;
3340 }
3341 }
3342
3343 selector->EmitWithContinuation(
3344 CbzOrTbzMatchTrait<N>::kTestAndBranchOpcode, g.UseRegister(node),
3345 g.TempImmediate(CbzOrTbzMatchTrait<N>::kSignBit), cont);
3346 return true;
3347 }
3348 case kEqual:
3349 case kNotEqual: {
3350 const Operation& op = selector->Get(node);
3351 if (const WordBinopOp* bitwise_and = op.TryCast<Opmask::kBitwiseAnd>()) {
3352 // Emit a tbz/tbnz if we are comparing with a single-bit mask:
3353 // Branch(WordEqual(WordAnd(x, 1 << N), 1 << N), true, false)
3354 uint64_t actual_value;
3355 if (cont->IsBranch() && base::bits::IsPowerOfTwo(value) &&
3356 selector->MatchUnsignedIntegralConstant(bitwise_and->right(),
3357 &actual_value) &&
3358 actual_value == value && selector->CanCover(user, node)) {
3359 Arm64OperandGeneratorT g(selector);
3360 // In the code generator, Equal refers to a bit being cleared. We
3361 // want the opposite here so negate the condition.
3362 cont->Negate();
3363 selector->EmitWithContinuation(
3364 CbzOrTbzMatchTrait<N>::kTestAndBranchOpcode,
3365 g.UseRegister(bitwise_and->left()),
3366 g.TempImmediate(base::bits::CountTrailingZeros(value)), cont);
3367 return true;
3368 }
3369 }
3370 [[fallthrough]];
3371 }
3373 case kUnsignedGreaterThan: {
3374 if (value != 0) return false;
3375 Arm64OperandGeneratorT g(selector);
3376 cont->Overwrite(MapForCbz(cond));
3377 EmitBranchOrDeoptimize(selector,
3378 CbzOrTbzMatchTrait<N>::kCompareAndBranchOpcode,
3379 g.UseRegister(node), cont);
3380 return true;
3381 }
3382 default:
3383 return false;
3384 }
3385}
3386
3387// Shared routine for multiple word compare operations.
3388void VisitWordCompare(InstructionSelectorT* selector, OpIndex node,
3389 InstructionCode opcode, FlagsContinuationT* cont,
3390 ImmediateMode immediate_mode) {
3391 Arm64OperandGeneratorT g(selector);
3392 DCHECK_EQ(selector->value_input_count(node), 2);
3393 auto left = selector->input_at(node, 0);
3394 auto right = selector->input_at(node, 1);
3395
3396 // If one of the two inputs is an immediate, make sure it's on the right.
3397 if (!g.CanBeImmediate(right, immediate_mode) &&
3398 g.CanBeImmediate(left, immediate_mode)) {
3399 cont->Commute();
3400 std::swap(left, right);
3401 }
3402
3403 int64_t constant;
3404 if (opcode == kArm64Cmp &&
3405 selector->MatchSignedIntegralConstant(right, &constant)) {
3406 if (TryEmitCbzOrTbz<64>(selector, left, constant, node, cont->condition(),
3407 cont)) {
3408 return;
3409 }
3410 }
3411
3412 VisitCompare(selector, opcode, g.UseRegister(left),
3413 g.UseOperand(right, immediate_mode), cont);
3414}
3415
3416void VisitWord32Compare(InstructionSelectorT* selector, OpIndex node,
3417 FlagsContinuationT* cont) {
3418 const Operation& compare = selector->Get(node);
3419 DCHECK_GE(compare.input_count, 2);
3420 OpIndex lhs = compare.input(0);
3421 OpIndex rhs = compare.input(1);
3422 FlagsCondition cond = cont->condition();
3423
3424 if (uint64_t constant;
3425 selector->MatchUnsignedIntegralConstant(rhs, &constant) &&
3426 TryEmitCbzOrTbz<32>(selector, lhs, static_cast<uint32_t>(constant), node,
3427 cond, cont)) {
3428 return;
3429 }
3430 if (uint64_t constant;
3431 selector->MatchUnsignedIntegralConstant(lhs, &constant) &&
3432 TryEmitCbzOrTbz<32>(selector, rhs, static_cast<uint32_t>(constant), node,
3433 CommuteFlagsCondition(cond), cont)) {
3434 return;
3435 }
3436
3437 const Operation& left = selector->Get(lhs);
3438 const Operation& right = selector->Get(rhs);
3439 ArchOpcode opcode = kArm64Cmp32;
3440 ImmediateMode immediate_mode = kArithmeticImm;
3441
3442 if (selector->MatchIntegralZero(rhs) &&
3443 (left.Is<Opmask::kWord32Add>() || left.Is<Opmask::kWord32BitwiseAnd>())) {
3444 // Emit flag setting add/and instructions for comparisons against zero.
3445 if (CanUseFlagSettingBinop(cond)) {
3446 MaybeReplaceCmpZeroWithFlagSettingBinop(selector, &node, lhs, &opcode,
3447 cond, cont, &immediate_mode);
3448 }
3449 } else if (selector->MatchIntegralZero(lhs) &&
3450 (right.Is<Opmask::kWord32Add>() ||
3451 right.Is<Opmask::kWord32BitwiseAnd>())) {
3452 // Same as above, but we need to commute the condition before we
3453 // continue with the rest of the checks.
3454 FlagsCondition commuted_cond = CommuteFlagsCondition(cond);
3455 if (CanUseFlagSettingBinop(commuted_cond)) {
3456 MaybeReplaceCmpZeroWithFlagSettingBinop(
3457 selector, &node, rhs, &opcode, commuted_cond, cont, &immediate_mode);
3458 }
3459 } else if (right.Is<Opmask::kWord32Sub>() &&
3460 (cond == kEqual || cond == kNotEqual)) {
3461 const WordBinopOp& sub = right.Cast<WordBinopOp>();
3462 if (selector->MatchIntegralZero(sub.left())) {
3463 // For a given compare(x, 0 - y) where compare is kEqual or kNotEqual,
3464 // it can be expressed as cmn(x, y).
3465 opcode = kArm64Cmn32;
3466 VisitBinopImpl(selector, node, lhs, sub.right(),
3467 RegisterRepresentation::Word32(), opcode, immediate_mode,
3468 cont);
3469 return;
3470 }
3471 }
3472 VisitBinop(selector, node, RegisterRepresentation::Word32(), opcode,
3473 immediate_mode, cont);
3474}
3475
3476void VisitWordTest(InstructionSelectorT* selector, OpIndex node,
3477 InstructionCode opcode, FlagsContinuationT* cont) {
3478 Arm64OperandGeneratorT g(selector);
3479 VisitCompare(selector, opcode, g.UseRegister(node), g.UseRegister(node),
3480 cont);
3481}
3482
3483void VisitWord32Test(InstructionSelectorT* selector, OpIndex node,
3484 FlagsContinuationT* cont) {
3485 VisitWordTest(selector, node, kArm64Tst32, cont);
3486}
3487
3488void VisitWord64Test(InstructionSelectorT* selector, OpIndex node,
3489 FlagsContinuationT* cont) {
3490 VisitWordTest(selector, node, kArm64Tst, cont);
3491}
3492
3493struct TestAndBranchMatcherTurboshaft {
3494 TestAndBranchMatcherTurboshaft(InstructionSelectorT* selector,
3495 const WordBinopOp& binop)
3496 : selector_(selector), binop_(binop) {
3497 Initialize();
3498 }
3499
3500 bool Matches() const { return matches_; }
3501
3502 unsigned bit() const {
3503 DCHECK(Matches());
3504 return bit_;
3505 }
3506
3507 private:
3508 void Initialize() {
3509 if (binop_.kind != WordBinopOp::Kind::kBitwiseAnd) return;
3510 uint64_t value{0};
3511 if (!selector_->MatchUnsignedIntegralConstant(binop_.right(), &value) ||
3512 !base::bits::IsPowerOfTwo(value)) {
3513 return;
3514 }
3515 // All preconditions for TBZ/TBNZ matched.
3516 matches_ = true;
3517 bit_ = base::bits::CountTrailingZeros(value);
3518 }
3519
3520 InstructionSelectorT* selector_;
3521 const WordBinopOp& binop_;
3522 bool matches_ = false;
3523 unsigned bit_ = 0;
3524};
3525
3526// Shared routine for multiple float32 compare operations.
3527void VisitFloat32Compare(InstructionSelectorT* selector, OpIndex node,
3528 FlagsContinuationT* cont) {
3529 Arm64OperandGeneratorT g(selector);
3530 const ComparisonOp& op = selector->Get(node).template Cast<ComparisonOp>();
3531 OpIndex left = op.left();
3532 OpIndex right = op.right();
3533 if (selector->MatchZero(right)) {
3534 VisitCompare(selector, kArm64Float32Cmp, g.UseRegister(left),
3535 g.UseImmediate(right), cont);
3536 } else if (selector->MatchZero(left)) {
3537 cont->Commute();
3538 VisitCompare(selector, kArm64Float32Cmp, g.UseRegister(right),
3539 g.UseImmediate(left), cont);
3540 } else {
3541 VisitCompare(selector, kArm64Float32Cmp, g.UseRegister(left),
3542 g.UseRegister(right), cont);
3543 }
3544}
3545
3546// Shared routine for multiple float64 compare operations.
3547void VisitFloat64Compare(InstructionSelectorT* selector, OpIndex node,
3548 FlagsContinuationT* cont) {
3549 Arm64OperandGeneratorT g(selector);
3550 const Operation& compare = selector->Get(node);
3551 DCHECK(compare.Is<ComparisonOp>());
3552 OpIndex lhs = compare.input(0);
3553 OpIndex rhs = compare.input(1);
3554 if (selector->MatchZero(rhs)) {
3555 VisitCompare(selector, kArm64Float64Cmp, g.UseRegister(lhs),
3556 g.UseImmediate(rhs), cont);
3557 } else if (selector->MatchZero(lhs)) {
3558 cont->Commute();
3559 VisitCompare(selector, kArm64Float64Cmp, g.UseRegister(rhs),
3560 g.UseImmediate(lhs), cont);
3561 } else {
3562 VisitCompare(selector, kArm64Float64Cmp, g.UseRegister(lhs),
3563 g.UseRegister(rhs), cont);
3564 }
3565}
3566
3567void VisitAtomicExchange(InstructionSelectorT* selector, OpIndex node,
3568 ArchOpcode opcode, AtomicWidth width,
3569 MemoryAccessKind access_kind) {
3570 using OpIndex = OpIndex;
3571 const AtomicRMWOp& atomic_op = selector->Cast<AtomicRMWOp>(node);
3572 Arm64OperandGeneratorT g(selector);
3573 OpIndex base = atomic_op.base();
3574 OpIndex index = atomic_op.index();
3575 OpIndex value = atomic_op.value();
3576 InstructionOperand inputs[] = {g.UseRegister(base), g.UseRegister(index),
3577 g.UseUniqueRegister(value)};
3578 InstructionOperand outputs[] = {g.DefineAsRegister(node)};
3579 InstructionCode code = opcode | AddressingModeField::encode(kMode_MRR) |
3580 AtomicWidthField::encode(width);
3581 if (access_kind == MemoryAccessKind::kProtectedByTrapHandler) {
3582 code |= AccessModeField::encode(kMemoryAccessProtectedMemOutOfBounds);
3583 }
3584 if (CpuFeatures::IsSupported(LSE)) {
3585 InstructionOperand temps[] = {g.TempRegister()};
3586 selector->Emit(code, arraysize(outputs), outputs, arraysize(inputs), inputs,
3587 arraysize(temps), temps);
3588 } else {
3589 InstructionOperand temps[] = {g.TempRegister(), g.TempRegister()};
3590 selector->Emit(code, arraysize(outputs), outputs, arraysize(inputs), inputs,
3591 arraysize(temps), temps);
3592 }
3593}
3594
3595void VisitAtomicCompareExchange(InstructionSelectorT* selector, OpIndex node,
3596 ArchOpcode opcode, AtomicWidth width,
3597 MemoryAccessKind access_kind) {
3598 using OpIndex = OpIndex;
3599 Arm64OperandGeneratorT g(selector);
3600 const AtomicRMWOp& atomic_op = selector->Cast<AtomicRMWOp>(node);
3601 OpIndex base = atomic_op.base();
3602 OpIndex index = atomic_op.index();
3603 OpIndex old_value = atomic_op.expected().value();
3604 OpIndex new_value = atomic_op.value();
3605 InstructionOperand inputs[] = {g.UseRegister(base), g.UseRegister(index),
3606 g.UseUniqueRegister(old_value),
3607 g.UseUniqueRegister(new_value)};
3608 InstructionOperand outputs[1];
3609 InstructionCode code = opcode | AddressingModeField::encode(kMode_MRR) |
3610 AtomicWidthField::encode(width);
3611 if (access_kind == MemoryAccessKind::kProtectedByTrapHandler) {
3612 code |= AccessModeField::encode(kMemoryAccessProtectedMemOutOfBounds);
3613 }
3614 if (CpuFeatures::IsSupported(LSE)) {
3615 InstructionOperand temps[] = {g.TempRegister()};
3616 outputs[0] = g.DefineSameAsInput(node, 2);
3617 selector->Emit(code, arraysize(outputs), outputs, arraysize(inputs), inputs,
3618 arraysize(temps), temps);
3619 } else {
3620 InstructionOperand temps[] = {g.TempRegister(), g.TempRegister()};
3621 outputs[0] = g.DefineAsRegister(node);
3622 selector->Emit(code, arraysize(outputs), outputs, arraysize(inputs), inputs,
3623 arraysize(temps), temps);
3624 }
3625}
3626
3627void VisitAtomicLoad(InstructionSelectorT* selector, OpIndex node,
3628 AtomicWidth width) {
3629 using OpIndex = OpIndex;
3630 Arm64OperandGeneratorT g(selector);
3631 auto load = selector->load_view(node);
3632 OpIndex base = load.base();
3633 OpIndex index = load.index();
3634 InstructionOperand inputs[] = {g.UseRegister(base), g.UseRegister(index)};
3635 InstructionOperand outputs[] = {g.DefineAsRegister(node)};
3636 InstructionOperand temps[] = {g.TempRegister()};
3637
3638 // The memory order is ignored as both acquire and sequentially consistent
3639 // loads can emit LDAR.
3640 // https://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
3641 LoadRepresentation load_rep = load.loaded_rep();
3643 switch (load_rep.representation()) {
3644 case MachineRepresentation::kWord8:
3645 DCHECK_IMPLIES(load_rep.IsSigned(), width == AtomicWidth::kWord32);
3646 code = load_rep.IsSigned() ? kAtomicLoadInt8 : kAtomicLoadUint8;
3647 break;
3648 case MachineRepresentation::kWord16:
3649 DCHECK_IMPLIES(load_rep.IsSigned(), width == AtomicWidth::kWord32);
3650 code = load_rep.IsSigned() ? kAtomicLoadInt16 : kAtomicLoadUint16;
3651 break;
3652 case MachineRepresentation::kWord32:
3653 code = kAtomicLoadWord32;
3654 break;
3655 case MachineRepresentation::kWord64:
3656 code = kArm64Word64AtomicLoadUint64;
3657 break;
3658#ifdef V8_COMPRESS_POINTERS
3659 case MachineRepresentation::kTaggedSigned:
3660 code = kArm64LdarDecompressTaggedSigned;
3661 break;
3662 case MachineRepresentation::kTaggedPointer:
3663 code = kArm64LdarDecompressTagged;
3664 break;
3665 case MachineRepresentation::kTagged:
3666 code = kArm64LdarDecompressTagged;
3667 break;
3668#else
3669 case MachineRepresentation::kTaggedSigned: // Fall through.
3670 case MachineRepresentation::kTaggedPointer: // Fall through.
3671 case MachineRepresentation::kTagged:
3672 if (kTaggedSize == 8) {
3673 code = kArm64Word64AtomicLoadUint64;
3674 } else {
3675 code = kAtomicLoadWord32;
3676 }
3677 break;
3678#endif
3679 case MachineRepresentation::kCompressedPointer: // Fall through.
3680 case MachineRepresentation::kCompressed:
3682 code = kAtomicLoadWord32;
3683 break;
3684 default:
3685 UNREACHABLE();
3686 }
3687
3688 bool traps_on_null;
3689 if (load.is_protected(&traps_on_null)) {
3690 // Atomic loads and null dereference are mutually exclusive. This might
3691 // change with multi-threaded wasm-gc in which case the access mode should
3692 // probably be kMemoryAccessProtectedNullDereference.
3693 DCHECK(!traps_on_null);
3694 code |= AccessModeField::encode(kMemoryAccessProtectedMemOutOfBounds);
3695 }
3696
3697 code |=
3698 AddressingModeField::encode(kMode_MRR) | AtomicWidthField::encode(width);
3699 selector->Emit(code, arraysize(outputs), outputs, arraysize(inputs), inputs,
3700 arraysize(temps), temps);
3701}
3702
3703AtomicStoreParameters AtomicStoreParametersOf(InstructionSelectorT* selector,
3704 OpIndex node) {
3705 auto store = selector->store_view(node);
3706 return AtomicStoreParameters(store.stored_rep().representation(),
3707 store.stored_rep().write_barrier_kind(),
3708 store.memory_order().value(),
3709 store.access_kind());
3710}
3711
3712void VisitAtomicStore(InstructionSelectorT* selector, OpIndex node,
3713 AtomicWidth width) {
3714 using OpIndex = OpIndex;
3715 Arm64OperandGeneratorT g(selector);
3716 auto store = selector->store_view(node);
3717 OpIndex base = store.base();
3718 OpIndex index = selector->value(store.index());
3719 OpIndex value = store.value();
3720 DCHECK_EQ(store.displacement(), 0);
3721
3722 // The memory order is ignored as both release and sequentially consistent
3723 // stores can emit STLR.
3724 // https://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
3725 AtomicStoreParameters store_params = AtomicStoreParametersOf(selector, node);
3726 WriteBarrierKind write_barrier_kind = store_params.write_barrier_kind();
3727 MachineRepresentation rep = store_params.representation();
3728
3729 if (v8_flags.enable_unconditional_write_barriers &&
3731 write_barrier_kind = kFullWriteBarrier;
3732 }
3733
3734 InstructionOperand inputs[] = {g.UseRegister(base), g.UseRegister(index),
3735 g.UseUniqueRegister(value)};
3736 InstructionOperand temps[] = {g.TempRegister()};
3738
3739 if (write_barrier_kind != kNoWriteBarrier &&
3740 !v8_flags.disable_write_barriers) {
3742 DCHECK_EQ(AtomicWidthSize(width), kTaggedSize);
3743
3744 RecordWriteMode record_write_mode =
3745 WriteBarrierKindToRecordWriteMode(write_barrier_kind);
3746 code = kArchAtomicStoreWithWriteBarrier;
3747 code |= RecordWriteModeField::encode(record_write_mode);
3748 } else {
3749 switch (rep) {
3750 case MachineRepresentation::kWord8:
3751 code = kAtomicStoreWord8;
3752 break;
3753 case MachineRepresentation::kWord16:
3754 code = kAtomicStoreWord16;
3755 break;
3756 case MachineRepresentation::kWord32:
3757 code = kAtomicStoreWord32;
3758 break;
3759 case MachineRepresentation::kWord64:
3760 DCHECK_EQ(width, AtomicWidth::kWord64);
3761 code = kArm64Word64AtomicStoreWord64;
3762 break;
3763 case MachineRepresentation::kTaggedSigned: // Fall through.
3764 case MachineRepresentation::kTaggedPointer: // Fall through.
3765 case MachineRepresentation::kTagged:
3766 DCHECK_EQ(AtomicWidthSize(width), kTaggedSize);
3767 code = kArm64StlrCompressTagged;
3768 break;
3769 case MachineRepresentation::kCompressedPointer: // Fall through.
3770 case MachineRepresentation::kCompressed:
3772 DCHECK_EQ(width, AtomicWidth::kWord32);
3773 code = kArm64StlrCompressTagged;
3774 break;
3775 default:
3776 UNREACHABLE();
3777 }
3778 code |= AtomicWidthField::encode(width);
3779 }
3780
3781 if (store_params.kind() == MemoryAccessKind::kProtectedByTrapHandler) {
3782 code |= AccessModeField::encode(kMemoryAccessProtectedMemOutOfBounds);
3783 }
3784
3785 code |= AddressingModeField::encode(kMode_MRR);
3786 selector->Emit(code, 0, nullptr, arraysize(inputs), inputs, arraysize(temps),
3787 temps);
3788}
3789
3790void VisitAtomicBinop(InstructionSelectorT* selector, OpIndex node,
3791 ArchOpcode opcode, AtomicWidth width,
3792 MemoryAccessKind access_kind) {
3793 using OpIndex = OpIndex;
3794 Arm64OperandGeneratorT g(selector);
3795 const AtomicRMWOp& atomic_op = selector->Cast<AtomicRMWOp>(node);
3796 OpIndex base = atomic_op.base();
3797 OpIndex index = atomic_op.index();
3798 OpIndex value = atomic_op.value();
3799 AddressingMode addressing_mode = kMode_MRR;
3800 InstructionOperand inputs[] = {g.UseRegister(base), g.UseRegister(index),
3801 g.UseUniqueRegister(value)};
3802 InstructionOperand outputs[] = {g.DefineAsRegister(node)};
3803 InstructionCode code = opcode | AddressingModeField::encode(addressing_mode) |
3804 AtomicWidthField::encode(width);
3805 if (access_kind == MemoryAccessKind::kProtectedByTrapHandler) {
3806 code |= AccessModeField::encode(kMemoryAccessProtectedMemOutOfBounds);
3807 }
3808
3809 if (CpuFeatures::IsSupported(LSE)) {
3810 InstructionOperand temps[] = {g.TempRegister()};
3811 selector->Emit(code, arraysize(outputs), outputs, arraysize(inputs), inputs,
3812 arraysize(temps), temps);
3813 } else {
3814 InstructionOperand temps[] = {g.TempRegister(), g.TempRegister(),
3815 g.TempRegister()};
3816 selector->Emit(code, arraysize(outputs), outputs, arraysize(inputs), inputs,
3817 arraysize(temps), temps);
3818 }
3819}
3820
3821} // namespace
3822
3823void InstructionSelectorT::VisitWordCompareZero(OpIndex user, OpIndex value,
3824 FlagsContinuation* cont) {
3825 Arm64OperandGeneratorT g(this);
3826 // Try to combine with comparisons against 0 by simply inverting the branch.
3827 ConsumeEqualZero(&user, &value, cont);
3828
3829 // Remove Word64->Word32 truncation.
3830 if (V<Word64> value64;
3831 MatchTruncateWord64ToWord32(value, &value64) && CanCover(user, value)) {
3832 user = value;
3833 value = value64;
3834 }
3835
3836 // Try to match bit checks to create TBZ/TBNZ instructions.
3837 // Unlike the switch below, CanCover check is not needed here.
3838 // If there are several uses of the given operation, we will generate a TBZ
3839 // instruction for each. This is useful even if there are other uses of the
3840 // arithmetic result, because it moves dependencies further back.
3841 const Operation& value_op = Get(value);
3842
3843 if (cont->IsBranch()) {
3844 if (value_op.Is<Opmask::kWord64Equal>()) {
3845 const ComparisonOp& equal = value_op.Cast<ComparisonOp>();
3846 if (MatchIntegralZero(equal.right())) {
3847 const WordBinopOp* left_binop =
3848 Get(equal.left()).TryCast<WordBinopOp>();
3849 if (left_binop) {
3850 TestAndBranchMatcherTurboshaft matcher(this, *left_binop);
3851 if (matcher.Matches()) {
3852 // If the mask has only one bit set, we can use tbz/tbnz.
3853 DCHECK((cont->condition() == kEqual) ||
3854 (cont->condition() == kNotEqual));
3855 Arm64OperandGeneratorT gen(this);
3856 cont->OverwriteAndNegateIfEqual(kEqual);
3857 EmitWithContinuation(kArm64TestAndBranch,
3858 gen.UseRegister(left_binop->left()),
3859 gen.TempImmediate(matcher.bit()), cont);
3860 return;
3861 }
3862 }
3863 }
3864 }
3865
3866 if (const WordBinopOp* value_binop = value_op.TryCast<WordBinopOp>()) {
3867 TestAndBranchMatcherTurboshaft matcher(this, *value_binop);
3868 if (matcher.Matches()) {
3869 // If the mask has only one bit set, we can use tbz/tbnz.
3870 DCHECK((cont->condition() == kEqual) ||
3871 (cont->condition() == kNotEqual));
3872 InstructionCode opcode = value_binop->rep.MapTaggedToWord() ==
3873 RegisterRepresentation::Word32()
3874 ? kArm64TestAndBranch32
3875 : kArm64TestAndBranch;
3876 Arm64OperandGeneratorT gen(this);
3877 EmitWithContinuation(opcode, gen.UseRegister(value_binop->left()),
3878 gen.TempImmediate(matcher.bit()), cont);
3879 return;
3880 }
3881 }
3882 }
3883
3884 if (CanCover(user, value)) {
3885 if (const ComparisonOp* comparison = value_op.TryCast<ComparisonOp>()) {
3886 switch (comparison->rep.MapTaggedToWord().value()) {
3887 case RegisterRepresentation::Word32():
3888 cont->OverwriteAndNegateIfEqual(
3889 GetComparisonFlagCondition(*comparison));
3890 return VisitWord32Compare(this, value, cont);
3891
3892 case RegisterRepresentation::Word64():
3893 cont->OverwriteAndNegateIfEqual(
3894 GetComparisonFlagCondition(*comparison));
3895
3896 if (comparison->kind == ComparisonOp::Kind::kEqual) {
3897 const Operation& left_op = Get(comparison->left());
3898 if (MatchIntegralZero(comparison->right()) &&
3899 left_op.Is<Opmask::kWord64BitwiseAnd>() &&
3900 CanCover(value, comparison->left())) {
3901 return VisitWordCompare(this, comparison->left(), kArm64Tst, cont,
3902 kLogical64Imm);
3903 }
3904 }
3905 return VisitWordCompare(this, value, kArm64Cmp, cont, kArithmeticImm);
3906
3907 case RegisterRepresentation::Float32():
3908 switch (comparison->kind) {
3909 case ComparisonOp::Kind::kEqual:
3910 cont->OverwriteAndNegateIfEqual(kEqual);
3911 return VisitFloat32Compare(this, value, cont);
3912 case ComparisonOp::Kind::kSignedLessThan:
3913 cont->OverwriteAndNegateIfEqual(kFloatLessThan);
3914 return VisitFloat32Compare(this, value, cont);
3915 case ComparisonOp::Kind::kSignedLessThanOrEqual:
3916 cont->OverwriteAndNegateIfEqual(kFloatLessThanOrEqual);
3917 return VisitFloat32Compare(this, value, cont);
3918 default:
3919 UNREACHABLE();
3920 }
3921
3922 case RegisterRepresentation::Float64():
3923 switch (comparison->kind) {
3924 case ComparisonOp::Kind::kEqual:
3925 cont->OverwriteAndNegateIfEqual(kEqual);
3926 return VisitFloat64Compare(this, value, cont);
3927 case ComparisonOp::Kind::kSignedLessThan:
3928 cont->OverwriteAndNegateIfEqual(kFloatLessThan);
3929 return VisitFloat64Compare(this, value, cont);
3930 case ComparisonOp::Kind::kSignedLessThanOrEqual:
3931 cont->OverwriteAndNegateIfEqual(kFloatLessThanOrEqual);
3932 return VisitFloat64Compare(this, value, cont);
3933 default:
3934 UNREACHABLE();
3935 }
3936
3937 default:
3938 break;
3939 }
3940 } else if (const ProjectionOp* projection =
3941 value_op.TryCast<ProjectionOp>()) {
3942 // Check if this is the overflow output projection of an
3943 // <Operation>WithOverflow node.
3944 if (projection->index == 1u) {
3945 // We cannot combine the <Operation>WithOverflow with this branch
3946 // unless the 0th projection (the use of the actual value of the
3947 // <Operation> is either nullptr, which means there's no use of the
3948 // actual value, or was already defined, which means it is scheduled
3949 // *AFTER* this branch).
3950 OpIndex node = projection->input();
3951 if (const OverflowCheckedBinopOp* binop =
3952 TryCast<OverflowCheckedBinopOp>(node);
3953 binop && CanDoBranchIfOverflowFusion(node)) {
3954 const bool is64 = binop->rep == WordRepresentation::Word64();
3955 switch (binop->kind) {
3956 case OverflowCheckedBinopOp::Kind::kSignedAdd:
3957 cont->OverwriteAndNegateIfEqual(kOverflow);
3958 return VisitBinop(this, node, binop->rep,
3959 is64 ? kArm64Add : kArm64Add32, kArithmeticImm,
3960 cont);
3961 case OverflowCheckedBinopOp::Kind::kSignedSub:
3962 cont->OverwriteAndNegateIfEqual(kOverflow);
3963 return VisitBinop(this, node, binop->rep,
3964 is64 ? kArm64Sub : kArm64Sub32, kArithmeticImm,
3965 cont);
3966 case OverflowCheckedBinopOp::Kind::kSignedMul:
3967 if (is64) {
3968 // ARM64 doesn't set the overflow flag for multiplication, so
3969 // we need to test on kNotEqual. Here is the code sequence
3970 // used:
3971 // mul result, left, right
3972 // smulh high, left, right
3973 // cmp high, result, asr 63
3974 cont->OverwriteAndNegateIfEqual(kNotEqual);
3975 return EmitInt64MulWithOverflow(this, node, cont);
3976 } else {
3977 // ARM64 doesn't set the overflow flag for multiplication, so
3978 // we need to test on kNotEqual. Here is the code sequence
3979 // used:
3980 // smull result, left, right
3981 // cmp result.X(), Operand(result, SXTW)
3982 cont->OverwriteAndNegateIfEqual(kNotEqual);
3983 return EmitInt32MulWithOverflow(this, node, cont);
3984 }
3985 }
3986 }
3987 }
3988 } else if (value_op.Is<Opmask::kWord32Add>()) {
3989 return VisitWordCompare(this, value, kArm64Cmn32, cont, kArithmeticImm);
3990 } else if (value_op.Is<Opmask::kWord32Sub>()) {
3991 return VisitWord32Compare(this, value, cont);
3992 } else if (value_op.Is<Opmask::kWord32BitwiseAnd>()) {
3993 if (TryMatchConditionalCompareChainBranch(this, zone(), value, cont)) {
3994 return;
3995 }
3996 return VisitWordCompare(this, value, kArm64Tst32, cont, kLogical32Imm);
3997 } else if (value_op.Is<Opmask::kWord64BitwiseAnd>()) {
3998 return VisitWordCompare(this, value, kArm64Tst, cont, kLogical64Imm);
3999 } else if (value_op.Is<Opmask::kWord32BitwiseOr>()) {
4000 if (TryMatchConditionalCompareChainBranch(this, zone(), value, cont)) {
4001 return;
4002 }
4003 } else if (value_op.Is<StackPointerGreaterThanOp>()) {
4004 cont->OverwriteAndNegateIfEqual(kStackPointerGreaterThanCondition);
4005 return VisitStackPointerGreaterThan(value, cont);
4006 }
4007 }
4008
4009 // Branch could not be combined with a compare, compare against 0 and
4010 // branch.
4011 if (cont->IsBranch()) {
4012 Emit(cont->Encode(kArm64CompareAndBranch32), g.NoOutput(),
4013 g.UseRegister(value), g.Label(cont->true_block()),
4014 g.Label(cont->false_block()));
4015 } else {
4016 VisitCompare(this, cont->Encode(kArm64Tst32), g.UseRegister(value),
4017 g.UseRegister(value), cont);
4018 }
4019}
4020
4021void InstructionSelectorT::VisitSwitch(OpIndex node, const SwitchInfo& sw) {
4022 Arm64OperandGeneratorT g(this);
4023 InstructionOperand value_operand = g.UseRegister(this->input_at(node, 0));
4024
4025 // Emit either ArchTableSwitch or ArchBinarySearchSwitch.
4026 if (enable_switch_jump_table_ ==
4027 InstructionSelector::kEnableSwitchJumpTable) {
4028 static const size_t kMaxTableSwitchValueRange = 2 << 16;
4029 size_t table_space_cost = 4 + sw.value_range();
4030 size_t table_time_cost = 3;
4031 size_t lookup_space_cost = 3 + 2 * sw.case_count();
4032 size_t lookup_time_cost = sw.case_count();
4033 if (sw.case_count() > 4 &&
4034 table_space_cost + 3 * table_time_cost <=
4035 lookup_space_cost + 3 * lookup_time_cost &&
4036 sw.min_value() > std::numeric_limits<int32_t>::min() &&
4037 sw.value_range() <= kMaxTableSwitchValueRange) {
4038 InstructionOperand index_operand = value_operand;
4039 if (sw.min_value()) {
4040 index_operand = g.TempRegister();
4041 Emit(kArm64Sub32, index_operand, value_operand,
4042 g.TempImmediate(sw.min_value()));
4043 } else {
4044 // Smis top bits are undefined, so zero-extend if not already done so.
4045 if (!ZeroExtendsWord32ToWord64(this->input_at(node, 0))) {
4046 index_operand = g.TempRegister();
4047 Emit(kArm64Mov32, index_operand, value_operand);
4048 }
4049 }
4050 // Generate a table lookup.
4051 return EmitTableSwitch(sw, index_operand);
4052 }
4053 }
4054
4055 // Generate a tree of conditional jumps.
4056 return EmitBinarySearchSwitch(sw, value_operand);
4057}
4058
4059void InstructionSelectorT::VisitWord32Equal(OpIndex node) {
4060 const Operation& equal = Get(node);
4061 DCHECK(equal.Is<ComparisonOp>());
4062 OpIndex left = equal.input(0);
4063 OpIndex right = equal.input(1);
4064 OpIndex user = node;
4065 FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
4066
4067 if (MatchZero(right)) {
4068 OpIndex value = left;
4069 if (CanCover(user, value)) {
4070 const Operation& value_op = Get(value);
4071 if (value_op.Is<Opmask::kWord32Add>() ||
4072 value_op.Is<Opmask::kWord32BitwiseAnd>()) {
4073 return VisitWord32Compare(this, node, &cont);
4074 }
4075 if (value_op.Is<Opmask::kWord32Sub>()) {
4076 return VisitWordCompare(this, value, kArm64Cmp32, &cont,
4077 kArithmeticImm);
4078 }
4079 if (value_op.Is<Opmask::kWord32Equal>()) {
4080 // Word32Equal(Word32Equal(x, y), 0) => Word32Compare(x, y, ne).
4081 // A new FlagsContinuation is needed as instead of generating the result
4082 // for {node}, it is generated for {value}.
4083 FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, value);
4084 cont.Negate();
4085 VisitWord32Compare(this, value, &cont);
4086 EmitIdentity(node);
4087 return;
4088 }
4089 return VisitWord32Test(this, value, &cont);
4090 }
4091 }
4092
4093 if (isolate() && (V8_STATIC_ROOTS_BOOL ||
4094 (COMPRESS_POINTERS_BOOL && !isolate()->bootstrapper()))) {
4095 Arm64OperandGeneratorT g(this);
4096 const RootsTable& roots_table = isolate()->roots_table();
4097 RootIndex root_index;
4098 Handle<HeapObject> right;
4099 // HeapConstants and CompressedHeapConstants can be treated the same when
4100 // using them as an input to a 32-bit comparison. Check whether either is
4101 // present.
4102 if (MatchHeapConstant(node, &right) && !right.is_null() &&
4103 roots_table.IsRootHandle(right, &root_index)) {
4104 if (RootsTable::IsReadOnly(root_index)) {
4105 Tagged_t ptr =
4106 MacroAssemblerBase::ReadOnlyRootPtr(root_index, isolate());
4107 if (g.CanBeImmediate(ptr, ImmediateMode::kArithmeticImm)) {
4108 return VisitCompare(this, kArm64Cmp32, g.UseRegister(left),
4109 g.TempImmediate(ptr), &cont);
4110 }
4111 }
4112 }
4113 }
4114 VisitWord32Compare(this, node, &cont);
4115}
4116
4117void InstructionSelectorT::VisitInt32LessThan(OpIndex node) {
4118 FlagsContinuation cont = FlagsContinuation::ForSet(kSignedLessThan, node);
4119 VisitWord32Compare(this, node, &cont);
4120}
4121
4122void InstructionSelectorT::VisitInt32LessThanOrEqual(OpIndex node) {
4123 FlagsContinuation cont =
4124 FlagsContinuation::ForSet(kSignedLessThanOrEqual, node);
4125 VisitWord32Compare(this, node, &cont);
4126}
4127
4128void InstructionSelectorT::VisitUint32LessThan(OpIndex node) {
4129 FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node);
4130 VisitWord32Compare(this, node, &cont);
4131}
4132
4133void InstructionSelectorT::VisitUint32LessThanOrEqual(OpIndex node) {
4134 FlagsContinuation cont =
4135 FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node);
4136 VisitWord32Compare(this, node, &cont);
4137}
4138
4139void InstructionSelectorT::VisitWord64Equal(OpIndex node) {
4140 FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
4141 const ComparisonOp& equal = this->Get(node).template Cast<ComparisonOp>();
4142 DCHECK_EQ(equal.kind, ComparisonOp::Kind::kEqual);
4143 if (this->MatchIntegralZero(equal.right()) && CanCover(node, equal.left())) {
4144 if (this->Get(equal.left()).template Is<Opmask::kWord64BitwiseAnd>()) {
4145 return VisitWordCompare(this, equal.left(), kArm64Tst, &cont,
4146 kLogical64Imm);
4147 }
4148 return VisitWord64Test(this, equal.left(), &cont);
4149 }
4150 VisitWordCompare(this, node, kArm64Cmp, &cont, kArithmeticImm);
4151}
4152
4153void InstructionSelectorT::VisitInt32AddWithOverflow(OpIndex node) {
4154 OptionalOpIndex ovf = FindProjection(node, 1);
4155 if (ovf.valid() && IsUsed(ovf.value())) {
4156 FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf.value());
4157 return VisitBinop(this, node, RegisterRepresentation::Word32(), kArm64Add32,
4158 kArithmeticImm, &cont);
4159 }
4160 FlagsContinuation cont;
4161 VisitBinop(this, node, RegisterRepresentation::Word32(), kArm64Add32,
4162 kArithmeticImm, &cont);
4163}
4164
4165void InstructionSelectorT::VisitInt32SubWithOverflow(OpIndex node) {
4166 OptionalOpIndex ovf = FindProjection(node, 1);
4167 if (ovf.valid()) {
4168 FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf.value());
4169 return VisitBinop(this, node, RegisterRepresentation::Word32(), kArm64Sub32,
4170 kArithmeticImm, &cont);
4171 }
4172 FlagsContinuation cont;
4173 VisitBinop(this, node, RegisterRepresentation::Word32(), kArm64Sub32,
4174 kArithmeticImm, &cont);
4175}
4176
4177void InstructionSelectorT::VisitInt32MulWithOverflow(OpIndex node) {
4178 OptionalOpIndex ovf = FindProjection(node, 1);
4179 if (ovf.valid()) {
4180 // ARM64 doesn't set the overflow flag for multiplication, so we need to
4181 // test on kNotEqual. Here is the code sequence used:
4182 // smull result, left, right
4183 // cmp result.X(), Operand(result, SXTW)
4184 FlagsContinuation cont = FlagsContinuation::ForSet(kNotEqual, ovf.value());
4185 return EmitInt32MulWithOverflow(this, node, &cont);
4186 }
4187 FlagsContinuation cont;
4188 EmitInt32MulWithOverflow(this, node, &cont);
4189}
4190
4191void InstructionSelectorT::VisitInt64AddWithOverflow(OpIndex node) {
4192 OptionalOpIndex ovf = FindProjection(node, 1);
4193 if (ovf.valid()) {
4194 FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf.value());
4195 return VisitBinop(this, node, RegisterRepresentation::Word64(), kArm64Add,
4196 kArithmeticImm, &cont);
4197 }
4198 FlagsContinuation cont;
4199 VisitBinop(this, node, RegisterRepresentation::Word64(), kArm64Add,
4200 kArithmeticImm, &cont);
4201}
4202
4203void InstructionSelectorT::VisitInt64SubWithOverflow(OpIndex node) {
4204 OptionalOpIndex ovf = FindProjection(node, 1);
4205 if (ovf.valid()) {
4206 FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf.value());
4207 return VisitBinop(this, node, RegisterRepresentation::Word64(), kArm64Sub,
4208 kArithmeticImm, &cont);
4209 }
4210 FlagsContinuation cont;
4211 VisitBinop(this, node, RegisterRepresentation::Word64(), kArm64Sub,
4212 kArithmeticImm, &cont);
4213}
4214
4215void InstructionSelectorT::VisitInt64MulWithOverflow(OpIndex node) {
4216 OptionalOpIndex ovf = FindProjection(node, 1);
4217 if (ovf.valid()) {
4218 // ARM64 doesn't set the overflow flag for multiplication, so we need to
4219 // test on kNotEqual. Here is the code sequence used:
4220 // mul result, left, right
4221 // smulh high, left, right
4222 // cmp high, result, asr 63
4223 FlagsContinuation cont = FlagsContinuation::ForSet(kNotEqual, ovf.value());
4224 return EmitInt64MulWithOverflow(this, node, &cont);
4225 }
4226 FlagsContinuation cont;
4227 EmitInt64MulWithOverflow(this, node, &cont);
4228}
4229
4230void InstructionSelectorT::VisitInt64LessThan(OpIndex node) {
4231 FlagsContinuation cont = FlagsContinuation::ForSet(kSignedLessThan, node);
4232 VisitWordCompare(this, node, kArm64Cmp, &cont, kArithmeticImm);
4233}
4234
4235void InstructionSelectorT::VisitInt64LessThanOrEqual(OpIndex node) {
4236 FlagsContinuation cont =
4237 FlagsContinuation::ForSet(kSignedLessThanOrEqual, node);
4238 VisitWordCompare(this, node, kArm64Cmp, &cont, kArithmeticImm);
4239}
4240
4241void InstructionSelectorT::VisitUint64LessThan(OpIndex node) {
4242 FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node);
4243 VisitWordCompare(this, node, kArm64Cmp, &cont, kArithmeticImm);
4244}
4245
4246void InstructionSelectorT::VisitUint64LessThanOrEqual(OpIndex node) {
4247 FlagsContinuation cont =
4248 FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node);
4249 VisitWordCompare(this, node, kArm64Cmp, &cont, kArithmeticImm);
4250}
4251
4252void InstructionSelectorT::VisitFloat32Neg(OpIndex node) {
4253 Arm64OperandGeneratorT g(this);
4254 OpIndex input = this->Get(node).input(0);
4255 const Operation& input_op = this->Get(input);
4256 if (input_op.Is<Opmask::kFloat32Mul>() && CanCover(node, input)) {
4257 const FloatBinopOp& mul = input_op.Cast<FloatBinopOp>();
4258 Emit(kArm64Float32Fnmul, g.DefineAsRegister(node),
4259 g.UseRegister(mul.left()), g.UseRegister(mul.right()));
4260 return;
4261 }
4262 VisitRR(this, kArm64Float32Neg, node);
4263}
4264
4265void InstructionSelectorT::VisitFloat32Mul(OpIndex node) {
4266 Arm64OperandGeneratorT g(this);
4267 const FloatBinopOp& mul = this->Get(node).template Cast<FloatBinopOp>();
4268 const Operation& lhs = this->Get(mul.left());
4269
4270 if (lhs.Is<Opmask::kFloat32Negate>() && CanCover(node, mul.left())) {
4271 Emit(kArm64Float32Fnmul, g.DefineAsRegister(node),
4272 g.UseRegister(lhs.input(0)), g.UseRegister(mul.right()));
4273 return;
4274 }
4275
4276 const Operation& rhs = this->Get(mul.right());
4277 if (rhs.Is<Opmask::kFloat32Negate>() && CanCover(node, mul.right())) {
4278 Emit(kArm64Float32Fnmul, g.DefineAsRegister(node),
4279 g.UseRegister(rhs.input(0)), g.UseRegister(mul.left()));
4280 return;
4281 }
4282 return VisitRRR(this, kArm64Float32Mul, node);
4283}
4284
4285void InstructionSelectorT::VisitFloat32Abs(OpIndex node) {
4286 Arm64OperandGeneratorT g(this);
4287 OpIndex in = this->input_at(node, 0);
4288 const Operation& input_op = this->Get(in);
4289 if (input_op.Is<Opmask::kFloat32Sub>() && CanCover(node, in)) {
4290 const FloatBinopOp& sub = input_op.Cast<FloatBinopOp>();
4291 Emit(kArm64Float32Abd, g.DefineAsRegister(node), g.UseRegister(sub.left()),
4292 g.UseRegister(sub.right()));
4293 return;
4294 }
4295
4296 return VisitRR(this, kArm64Float32Abs, node);
4297}
4298
4299void InstructionSelectorT::VisitFloat64Abs(OpIndex node) {
4300 Arm64OperandGeneratorT g(this);
4301 OpIndex in = this->input_at(node, 0);
4302 const Operation& input_op = this->Get(in);
4303 if (input_op.Is<Opmask::kFloat64Sub>() && CanCover(node, in)) {
4304 const FloatBinopOp& sub = input_op.Cast<FloatBinopOp>();
4305 Emit(kArm64Float64Abd, g.DefineAsRegister(node), g.UseRegister(sub.left()),
4306 g.UseRegister(sub.right()));
4307 return;
4308 }
4309
4310 return VisitRR(this, kArm64Float64Abs, node);
4311}
4312
4313void InstructionSelectorT::VisitFloat32Equal(OpIndex node) {
4314 FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
4315 VisitFloat32Compare(this, node, &cont);
4316}
4317
4318void InstructionSelectorT::VisitFloat32LessThan(OpIndex node) {
4319 FlagsContinuation cont = FlagsContinuation::ForSet(kFloatLessThan, node);
4320 VisitFloat32Compare(this, node, &cont);
4321}
4322
4323void InstructionSelectorT::VisitFloat32LessThanOrEqual(OpIndex node) {
4324 FlagsContinuation cont =
4325 FlagsContinuation::ForSet(kFloatLessThanOrEqual, node);
4326 VisitFloat32Compare(this, node, &cont);
4327}
4328
4329void InstructionSelectorT::VisitFloat64Equal(OpIndex node) {
4330 FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
4331 VisitFloat64Compare(this, node, &cont);
4332}
4333
4334void InstructionSelectorT::VisitFloat64LessThan(OpIndex node) {
4335 FlagsContinuation cont = FlagsContinuation::ForSet(kFloatLessThan, node);
4336 VisitFloat64Compare(this, node, &cont);
4337}
4338
4339void InstructionSelectorT::VisitFloat64LessThanOrEqual(OpIndex node) {
4340 FlagsContinuation cont =
4341 FlagsContinuation::ForSet(kFloatLessThanOrEqual, node);
4342 VisitFloat64Compare(this, node, &cont);
4343}
4344
4345void InstructionSelectorT::VisitBitcastWord32PairToFloat64(OpIndex node) {
4346 Arm64OperandGeneratorT g(this);
4347 const auto& bitcast = this->Cast<BitcastWord32PairToFloat64Op>(node);
4348 OpIndex hi = bitcast.high_word32();
4349 OpIndex lo = bitcast.low_word32();
4350
4351 int vreg = g.AllocateVirtualRegister();
4352 Emit(kArm64Bfi, g.DefineSameAsFirstForVreg(vreg), g.UseRegister(lo),
4353 g.UseRegister(hi), g.TempImmediate(32), g.TempImmediate(32));
4354 Emit(kArm64Float64MoveU64, g.DefineAsRegister(node),
4355 g.UseRegisterForVreg(vreg));
4356}
4357
4358void InstructionSelectorT::VisitFloat64InsertLowWord32(OpIndex node) {
4359 UNIMPLEMENTED();
4360}
4361
4362void InstructionSelectorT::VisitFloat64InsertHighWord32(OpIndex node) {
4363 UNIMPLEMENTED();
4364}
4365
4366void InstructionSelectorT::VisitFloat64Neg(OpIndex node) {
4367 Arm64OperandGeneratorT g(this);
4368 OpIndex input = this->Get(node).input(0);
4369 const Operation& input_op = this->Get(input);
4370 if (input_op.Is<Opmask::kFloat64Mul>() && CanCover(node, input)) {
4371 const FloatBinopOp& mul = input_op.Cast<FloatBinopOp>();
4372 Emit(kArm64Float64Fnmul, g.DefineAsRegister(node),
4373 g.UseRegister(mul.left()), g.UseRegister(mul.right()));
4374 return;
4375 }
4376 VisitRR(this, kArm64Float64Neg, node);
4377}
4378
4379void InstructionSelectorT::VisitFloat64Mul(OpIndex node) {
4380 Arm64OperandGeneratorT g(this);
4381 const FloatBinopOp& mul = this->Get(node).template Cast<FloatBinopOp>();
4382 const Operation& lhs = this->Get(mul.left());
4383 if (lhs.Is<Opmask::kFloat64Negate>() && CanCover(node, mul.left())) {
4384 Emit(kArm64Float64Fnmul, g.DefineAsRegister(node),
4385 g.UseRegister(lhs.input(0)), g.UseRegister(mul.right()));
4386 return;
4387 }
4388
4389 const Operation& rhs = this->Get(mul.right());
4390 if (rhs.Is<Opmask::kFloat64Negate>() && CanCover(node, mul.right())) {
4391 Emit(kArm64Float64Fnmul, g.DefineAsRegister(node),
4392 g.UseRegister(rhs.input(0)), g.UseRegister(mul.left()));
4393 return;
4394 }
4395 return VisitRRR(this, kArm64Float64Mul, node);
4396}
4397
4398void InstructionSelectorT::VisitMemoryBarrier(OpIndex node) {
4399 // Use DMB ISH for both acquire-release and sequentially consistent barriers.
4400 Arm64OperandGeneratorT g(this);
4401 Emit(kArm64DmbIsh, g.NoOutput());
4402}
4403
4404void InstructionSelectorT::VisitWord32AtomicLoad(OpIndex node) {
4405 VisitAtomicLoad(this, node, AtomicWidth::kWord32);
4406}
4407
4408void InstructionSelectorT::VisitWord64AtomicLoad(OpIndex node) {
4409 VisitAtomicLoad(this, node, AtomicWidth::kWord64);
4410}
4411
4412void InstructionSelectorT::VisitWord32AtomicStore(OpIndex node) {
4413 VisitAtomicStore(this, node, AtomicWidth::kWord32);
4414}
4415
4416void InstructionSelectorT::VisitWord64AtomicStore(OpIndex node) {
4417 VisitAtomicStore(this, node, AtomicWidth::kWord64);
4418}
4419
4420void InstructionSelectorT::VisitWord32AtomicExchange(OpIndex node) {
4421 const AtomicRMWOp& atomic_op = this->Get(node).template Cast<AtomicRMWOp>();
4422 ArchOpcode opcode;
4423 if (atomic_op.memory_rep == MemoryRepresentation::Int8()) {
4424 opcode = kAtomicExchangeInt8;
4425 } else if (atomic_op.memory_rep == MemoryRepresentation::Uint8()) {
4426 opcode = kAtomicExchangeUint8;
4427 } else if (atomic_op.memory_rep == MemoryRepresentation::Int16()) {
4428 opcode = kAtomicExchangeInt16;
4429 } else if (atomic_op.memory_rep == MemoryRepresentation::Uint16()) {
4430 opcode = kAtomicExchangeUint16;
4431 } else if (atomic_op.memory_rep == MemoryRepresentation::Int32() ||
4432 atomic_op.memory_rep == MemoryRepresentation::Uint32()) {
4433 opcode = kAtomicExchangeWord32;
4434 } else {
4435 UNREACHABLE();
4436 }
4437 VisitAtomicExchange(this, node, opcode, AtomicWidth::kWord32,
4438 atomic_op.memory_access_kind);
4439}
4440
4441void InstructionSelectorT::VisitWord64AtomicExchange(OpIndex node) {
4442 const AtomicRMWOp& atomic_op = this->Get(node).template Cast<AtomicRMWOp>();
4443 ArchOpcode opcode;
4444 if (atomic_op.memory_rep == MemoryRepresentation::Uint8()) {
4445 opcode = kAtomicExchangeUint8;
4446 } else if (atomic_op.memory_rep == MemoryRepresentation::Uint16()) {
4447 opcode = kAtomicExchangeUint16;
4448 } else if (atomic_op.memory_rep == MemoryRepresentation::Uint32()) {
4449 opcode = kAtomicExchangeWord32;
4450 } else if (atomic_op.memory_rep == MemoryRepresentation::Uint64()) {
4451 opcode = kArm64Word64AtomicExchangeUint64;
4452 } else {
4453 UNREACHABLE();
4454 }
4455 VisitAtomicExchange(this, node, opcode, AtomicWidth::kWord64,
4456 atomic_op.memory_access_kind);
4457}
4458
4459void InstructionSelectorT::VisitWord32AtomicCompareExchange(OpIndex node) {
4460 const AtomicRMWOp& atomic_op = this->Get(node).template Cast<AtomicRMWOp>();
4461 ArchOpcode opcode;
4462 if (atomic_op.memory_rep == MemoryRepresentation::Int8()) {
4463 opcode = kAtomicCompareExchangeInt8;
4464 } else if (atomic_op.memory_rep == MemoryRepresentation::Uint8()) {
4465 opcode = kAtomicCompareExchangeUint8;
4466 } else if (atomic_op.memory_rep == MemoryRepresentation::Int16()) {
4467 opcode = kAtomicCompareExchangeInt16;
4468 } else if (atomic_op.memory_rep == MemoryRepresentation::Uint16()) {
4469 opcode = kAtomicCompareExchangeUint16;
4470 } else if (atomic_op.memory_rep == MemoryRepresentation::Int32() ||
4471 atomic_op.memory_rep == MemoryRepresentation::Uint32()) {
4472 opcode = kAtomicCompareExchangeWord32;
4473 } else {
4474 UNREACHABLE();
4475 }
4476 VisitAtomicCompareExchange(this, node, opcode, AtomicWidth::kWord32,
4477 atomic_op.memory_access_kind);
4478}
4479
4480void InstructionSelectorT::VisitWord64AtomicCompareExchange(OpIndex node) {
4481 const AtomicRMWOp& atomic_op = this->Get(node).template Cast<AtomicRMWOp>();
4482 ArchOpcode opcode;
4483 if (atomic_op.memory_rep == MemoryRepresentation::Uint8()) {
4484 opcode = kAtomicCompareExchangeUint8;
4485 } else if (atomic_op.memory_rep == MemoryRepresentation::Uint16()) {
4486 opcode = kAtomicCompareExchangeUint16;
4487 } else if (atomic_op.memory_rep == MemoryRepresentation::Uint32()) {
4488 opcode = kAtomicCompareExchangeWord32;
4489 } else if (atomic_op.memory_rep == MemoryRepresentation::Uint64()) {
4490 opcode = kArm64Word64AtomicCompareExchangeUint64;
4491 } else {
4492 UNREACHABLE();
4493 }
4494 VisitAtomicCompareExchange(this, node, opcode, AtomicWidth::kWord64,
4495 atomic_op.memory_access_kind);
4496}
4497
4498void InstructionSelectorT::VisitWord32AtomicBinaryOperation(
4499 OpIndex node, ArchOpcode int8_op, ArchOpcode uint8_op, ArchOpcode int16_op,
4500 ArchOpcode uint16_op, ArchOpcode word32_op) {
4501 const AtomicRMWOp& atomic_op = this->Get(node).template Cast<AtomicRMWOp>();
4502 ArchOpcode opcode;
4503 if (atomic_op.memory_rep == MemoryRepresentation::Int8()) {
4504 opcode = int8_op;
4505 } else if (atomic_op.memory_rep == MemoryRepresentation::Uint8()) {
4506 opcode = uint8_op;
4507 } else if (atomic_op.memory_rep == MemoryRepresentation::Int16()) {
4508 opcode = int16_op;
4509 } else if (atomic_op.memory_rep == MemoryRepresentation::Uint16()) {
4510 opcode = uint16_op;
4511 } else if (atomic_op.memory_rep == MemoryRepresentation::Int32() ||
4512 atomic_op.memory_rep == MemoryRepresentation::Uint32()) {
4513 opcode = word32_op;
4514 } else {
4515 UNREACHABLE();
4516 }
4517 VisitAtomicBinop(this, node, opcode, AtomicWidth::kWord32,
4518 atomic_op.memory_access_kind);
4519}
4520
4521#define VISIT_ATOMIC_BINOP(op) \
4522 void InstructionSelectorT::VisitWord32Atomic##op(OpIndex node) { \
4523 VisitWord32AtomicBinaryOperation( \
4524 node, kAtomic##op##Int8, kAtomic##op##Uint8, kAtomic##op##Int16, \
4525 kAtomic##op##Uint16, kAtomic##op##Word32); \
4526 }
4532#undef VISIT_ATOMIC_BINOP
4533
4534void InstructionSelectorT::VisitWord64AtomicBinaryOperation(
4535 OpIndex node, ArchOpcode uint8_op, ArchOpcode uint16_op,
4536 ArchOpcode uint32_op, ArchOpcode uint64_op) {
4537 const AtomicRMWOp& atomic_op = this->Get(node).template Cast<AtomicRMWOp>();
4538 ArchOpcode opcode;
4539 if (atomic_op.memory_rep == MemoryRepresentation::Uint8()) {
4540 opcode = uint8_op;
4541 } else if (atomic_op.memory_rep == MemoryRepresentation::Uint16()) {
4542 opcode = uint16_op;
4543 } else if (atomic_op.memory_rep == MemoryRepresentation::Uint32()) {
4544 opcode = uint32_op;
4545 } else if (atomic_op.memory_rep == MemoryRepresentation::Uint64()) {
4546 opcode = uint64_op;
4547 } else {
4548 UNREACHABLE();
4549 }
4550 VisitAtomicBinop(this, node, opcode, AtomicWidth::kWord64,
4551 atomic_op.memory_access_kind);
4552}
4553
4554#define VISIT_ATOMIC_BINOP(op) \
4555 void InstructionSelectorT::VisitWord64Atomic##op(OpIndex node) { \
4556 VisitWord64AtomicBinaryOperation(node, kAtomic##op##Uint8, \
4557 kAtomic##op##Uint16, kAtomic##op##Word32, \
4558 kArm64Word64Atomic##op##Uint64); \
4559 }
4565#undef VISIT_ATOMIC_BINOP
4566
4567void InstructionSelectorT::VisitInt32AbsWithOverflow(OpIndex node) {
4568 UNREACHABLE();
4569}
4570
4571void InstructionSelectorT::VisitInt64AbsWithOverflow(OpIndex node) {
4572 UNREACHABLE();
4573}
4574
4575#if V8_ENABLE_WEBASSEMBLY
4576#define SIMD_UNOP_LIST(V) \
4577 V(F64x2ConvertLowI32x4S, kArm64F64x2ConvertLowI32x4S) \
4578 V(F64x2ConvertLowI32x4U, kArm64F64x2ConvertLowI32x4U) \
4579 V(F64x2PromoteLowF32x4, kArm64F64x2PromoteLowF32x4) \
4580 V(F32x4SConvertI32x4, kArm64F32x4SConvertI32x4) \
4581 V(F32x4UConvertI32x4, kArm64F32x4UConvertI32x4) \
4582 V(F32x4DemoteF64x2Zero, kArm64F32x4DemoteF64x2Zero) \
4583 V(F16x8SConvertI16x8, kArm64F16x8SConvertI16x8) \
4584 V(F16x8UConvertI16x8, kArm64F16x8UConvertI16x8) \
4585 V(I16x8SConvertF16x8, kArm64I16x8SConvertF16x8) \
4586 V(I16x8UConvertF16x8, kArm64I16x8UConvertF16x8) \
4587 V(F16x8DemoteF32x4Zero, kArm64F16x8DemoteF32x4Zero) \
4588 V(F16x8DemoteF64x2Zero, kArm64F16x8DemoteF64x2Zero) \
4589 V(F32x4PromoteLowF16x8, kArm64F32x4PromoteLowF16x8) \
4590 V(I64x2BitMask, kArm64I64x2BitMask) \
4591 V(I32x4SConvertF32x4, kArm64I32x4SConvertF32x4) \
4592 V(I32x4UConvertF32x4, kArm64I32x4UConvertF32x4) \
4593 V(I32x4RelaxedTruncF32x4S, kArm64I32x4SConvertF32x4) \
4594 V(I32x4RelaxedTruncF32x4U, kArm64I32x4UConvertF32x4) \
4595 V(I32x4BitMask, kArm64I32x4BitMask) \
4596 V(I32x4TruncSatF64x2SZero, kArm64I32x4TruncSatF64x2SZero) \
4597 V(I32x4TruncSatF64x2UZero, kArm64I32x4TruncSatF64x2UZero) \
4598 V(I32x4RelaxedTruncF64x2SZero, kArm64I32x4TruncSatF64x2SZero) \
4599 V(I32x4RelaxedTruncF64x2UZero, kArm64I32x4TruncSatF64x2UZero) \
4600 V(I16x8BitMask, kArm64I16x8BitMask) \
4601 V(S128Not, kArm64S128Not) \
4602 V(V128AnyTrue, kArm64V128AnyTrue) \
4603 V(I64x2AllTrue, kArm64I64x2AllTrue) \
4604 V(I32x4AllTrue, kArm64I32x4AllTrue) \
4605 V(I16x8AllTrue, kArm64I16x8AllTrue) \
4606 V(I8x16AllTrue, kArm64I8x16AllTrue)
4607
4608#define SIMD_UNOP_LANE_SIZE_LIST(V) \
4609 V(F64x2Splat, kArm64FSplat, 64) \
4610 V(F64x2Abs, kArm64FAbs, 64) \
4611 V(F64x2Sqrt, kArm64FSqrt, 64) \
4612 V(F64x2Neg, kArm64FNeg, 64) \
4613 V(F32x4Splat, kArm64FSplat, 32) \
4614 V(F32x4Abs, kArm64FAbs, 32) \
4615 V(F32x4Sqrt, kArm64FSqrt, 32) \
4616 V(F32x4Neg, kArm64FNeg, 32) \
4617 V(I64x2Splat, kArm64ISplat, 64) \
4618 V(I64x2Abs, kArm64IAbs, 64) \
4619 V(I64x2Neg, kArm64INeg, 64) \
4620 V(I32x4Splat, kArm64ISplat, 32) \
4621 V(I32x4Abs, kArm64IAbs, 32) \
4622 V(I32x4Neg, kArm64INeg, 32) \
4623 V(F16x8Splat, kArm64FSplat, 16) \
4624 V(F16x8Abs, kArm64FAbs, 16) \
4625 V(F16x8Sqrt, kArm64FSqrt, 16) \
4626 V(F16x8Neg, kArm64FNeg, 16) \
4627 V(I16x8Splat, kArm64ISplat, 16) \
4628 V(I16x8Abs, kArm64IAbs, 16) \
4629 V(I16x8Neg, kArm64INeg, 16) \
4630 V(I8x16Splat, kArm64ISplat, 8) \
4631 V(I8x16Abs, kArm64IAbs, 8) \
4632 V(I8x16Neg, kArm64INeg, 8)
4633
4634#define SIMD_SHIFT_OP_LIST(V) \
4635 V(I64x2Shl, 64) \
4636 V(I64x2ShrS, 64) \
4637 V(I64x2ShrU, 64) \
4638 V(I32x4Shl, 32) \
4639 V(I32x4ShrS, 32) \
4640 V(I32x4ShrU, 32) \
4641 V(I16x8Shl, 16) \
4642 V(I16x8ShrS, 16) \
4643 V(I16x8ShrU, 16) \
4644 V(I8x16Shl, 8) \
4645 V(I8x16ShrS, 8) \
4646 V(I8x16ShrU, 8)
4647
4648#define SIMD_BINOP_LIST(V) \
4649 V(I32x4Mul, kArm64I32x4Mul) \
4650 V(I32x4DotI16x8S, kArm64I32x4DotI16x8S) \
4651 V(I16x8DotI8x16I7x16S, kArm64I16x8DotI8x16S) \
4652 V(I16x8SConvertI32x4, kArm64I16x8SConvertI32x4) \
4653 V(I16x8Mul, kArm64I16x8Mul) \
4654 V(I16x8UConvertI32x4, kArm64I16x8UConvertI32x4) \
4655 V(I16x8Q15MulRSatS, kArm64I16x8Q15MulRSatS) \
4656 V(I16x8RelaxedQ15MulRS, kArm64I16x8Q15MulRSatS) \
4657 V(I8x16SConvertI16x8, kArm64I8x16SConvertI16x8) \
4658 V(I8x16UConvertI16x8, kArm64I8x16UConvertI16x8) \
4659 V(S128Or, kArm64S128Or)
4660
4661#define SIMD_BINOP_LANE_SIZE_LIST(V) \
4662 V(F64x2Min, kArm64FMin, 64) \
4663 V(F64x2Max, kArm64FMax, 64) \
4664 V(F64x2Add, kArm64FAdd, 64) \
4665 V(F64x2Sub, kArm64FSub, 64) \
4666 V(F64x2Div, kArm64FDiv, 64) \
4667 V(F64x2RelaxedMin, kArm64FMin, 64) \
4668 V(F64x2RelaxedMax, kArm64FMax, 64) \
4669 V(F32x4Min, kArm64FMin, 32) \
4670 V(F32x4Max, kArm64FMax, 32) \
4671 V(F32x4Add, kArm64FAdd, 32) \
4672 V(F32x4Sub, kArm64FSub, 32) \
4673 V(F32x4Div, kArm64FDiv, 32) \
4674 V(F32x4RelaxedMin, kArm64FMin, 32) \
4675 V(F32x4RelaxedMax, kArm64FMax, 32) \
4676 V(F16x8Add, kArm64FAdd, 16) \
4677 V(F16x8Sub, kArm64FSub, 16) \
4678 V(F16x8Div, kArm64FDiv, 16) \
4679 V(F16x8Min, kArm64FMin, 16) \
4680 V(F16x8Max, kArm64FMax, 16) \
4681 V(I64x2Sub, kArm64ISub, 64) \
4682 V(I32x4GtU, kArm64IGtU, 32) \
4683 V(I32x4GeU, kArm64IGeU, 32) \
4684 V(I32x4MinS, kArm64IMinS, 32) \
4685 V(I32x4MaxS, kArm64IMaxS, 32) \
4686 V(I32x4MinU, kArm64IMinU, 32) \
4687 V(I32x4MaxU, kArm64IMaxU, 32) \
4688 V(I16x8AddSatS, kArm64IAddSatS, 16) \
4689 V(I16x8SubSatS, kArm64ISubSatS, 16) \
4690 V(I16x8AddSatU, kArm64IAddSatU, 16) \
4691 V(I16x8SubSatU, kArm64ISubSatU, 16) \
4692 V(I16x8GtU, kArm64IGtU, 16) \
4693 V(I16x8GeU, kArm64IGeU, 16) \
4694 V(I16x8RoundingAverageU, kArm64RoundingAverageU, 16) \
4695 V(I8x16RoundingAverageU, kArm64RoundingAverageU, 8) \
4696 V(I16x8MinS, kArm64IMinS, 16) \
4697 V(I16x8MaxS, kArm64IMaxS, 16) \
4698 V(I16x8MinU, kArm64IMinU, 16) \
4699 V(I16x8MaxU, kArm64IMaxU, 16) \
4700 V(I8x16Sub, kArm64ISub, 8) \
4701 V(I8x16AddSatS, kArm64IAddSatS, 8) \
4702 V(I8x16SubSatS, kArm64ISubSatS, 8) \
4703 V(I8x16AddSatU, kArm64IAddSatU, 8) \
4704 V(I8x16SubSatU, kArm64ISubSatU, 8) \
4705 V(I8x16GtU, kArm64IGtU, 8) \
4706 V(I8x16GeU, kArm64IGeU, 8) \
4707 V(I8x16MinS, kArm64IMinS, 8) \
4708 V(I8x16MaxS, kArm64IMaxS, 8) \
4709 V(I8x16MinU, kArm64IMinU, 8) \
4710 V(I8x16MaxU, kArm64IMaxU, 8)
4711
4712void InstructionSelectorT::VisitS128Const(OpIndex node) {
4713 Arm64OperandGeneratorT g(this);
4714 static const int kUint32Immediates = 4;
4715 uint32_t val[kUint32Immediates];
4716 static_assert(sizeof(val) == kSimd128Size);
4717 const Simd128ConstantOp& constant =
4718 this->Get(node).template Cast<Simd128ConstantOp>();
4719 memcpy(val, constant.value, kSimd128Size);
4720 Emit(kArm64S128Const, g.DefineAsRegister(node), g.UseImmediate(val[0]),
4721 g.UseImmediate(val[1]), g.UseImmediate(val[2]), g.UseImmediate(val[3]));
4722}
4723
4724namespace {
4725
4726struct BicImmParam {
4727 BicImmParam(uint32_t imm, uint8_t lane_size, uint8_t shift_amount)
4728 : imm(imm), lane_size(lane_size), shift_amount(shift_amount) {}
4729 uint8_t imm;
4730 uint8_t lane_size;
4731 uint8_t shift_amount;
4732};
4733
4734struct BicImmResult {
4735 BicImmResult(std::optional<BicImmParam> param, OpIndex const_node,
4736 OpIndex other_node)
4737 : param(param), const_node(const_node), other_node(other_node) {}
4738 std::optional<BicImmParam> param;
4739 OpIndex const_node;
4740 OpIndex other_node;
4741};
4742
4743std::optional<BicImmParam> BicImm16bitHelper(uint16_t val) {
4744 uint8_t byte0 = val & 0xFF;
4745 uint8_t byte1 = val >> 8;
4746 // Cannot use Bic if both bytes are not 0x00
4747 if (byte0 == 0x00) {
4748 return BicImmParam(byte1, 16, 8);
4749 }
4750 if (byte1 == 0x00) {
4751 return BicImmParam(byte0, 16, 0);
4752 }
4753 return std::nullopt;
4754}
4755
4756std::optional<BicImmParam> BicImm32bitHelper(uint32_t val) {
4757 for (int i = 0; i < 4; i++) {
4758 // All bytes are 0 but one
4759 if ((val & (0xFF << (8 * i))) == val) {
4760 return BicImmParam(static_cast<uint8_t>(val >> i * 8), 32, i * 8);
4761 }
4762 }
4763 // Low and high 2 bytes are equal
4764 if ((val >> 16) == (0xFFFF & val)) {
4765 return BicImm16bitHelper(0xFFFF & val);
4766 }
4767 return std::nullopt;
4768}
4769
4770std::optional<BicImmParam> BicImmConstHelper(const Operation& op,
4771 bool not_imm) {
4772 const int kUint32Immediates = 4;
4773 uint32_t val[kUint32Immediates];
4774 static_assert(sizeof(val) == kSimd128Size);
4775 memcpy(val, op.Cast<Simd128ConstantOp>().value, kSimd128Size);
4776 // If 4 uint32s are not the same, cannot emit Bic
4777 if (!(val[0] == val[1] && val[1] == val[2] && val[2] == val[3])) {
4778 return std::nullopt;
4779 }
4780 return BicImm32bitHelper(not_imm ? ~val[0] : val[0]);
4781}
4782
4783std::optional<BicImmResult> BicImmHelper(InstructionSelectorT* selector,
4784 OpIndex and_node, bool not_imm) {
4785 const Simd128BinopOp& op = selector->Get(and_node).Cast<Simd128BinopOp>();
4786 // If we are negating the immediate then we are producing And(x, imm), and so
4787 // can take the immediate from the left or right input. Otherwise we are
4788 // producing And(x, Not(imm)), which can only be used when the immediate is
4789 // the right (negated) input.
4790 if (not_imm && selector->Get(op.left()).Is<Simd128ConstantOp>()) {
4791 return BicImmResult(BicImmConstHelper(selector->Get(op.left()), not_imm),
4792 op.left(), op.right());
4793 }
4794 if (selector->Get(op.right()).Is<Simd128ConstantOp>()) {
4795 return BicImmResult(BicImmConstHelper(selector->Get(op.right()), not_imm),
4796 op.right(), op.left());
4797 }
4798 return std::nullopt;
4799}
4800
4801bool TryEmitS128AndNotImm(InstructionSelectorT* selector, OpIndex node,
4802 bool not_imm) {
4803 Arm64OperandGeneratorT g(selector);
4804 std::optional<BicImmResult> result = BicImmHelper(selector, node, not_imm);
4805 if (!result.has_value()) return false;
4806 std::optional<BicImmParam> param = result->param;
4807 if (param.has_value()) {
4808 if (selector->CanCover(node, result->other_node)) {
4809 selector->Emit(
4810 kArm64S128AndNot | LaneSizeField::encode(param->lane_size),
4811 g.DefineSameAsFirst(node), g.UseRegister(result->other_node),
4812 g.UseImmediate(param->imm), g.UseImmediate(param->shift_amount));
4813 return true;
4814 }
4815 }
4816 return false;
4817}
4818
4819} // namespace
4820
4821void InstructionSelectorT::VisitS128AndNot(OpIndex node) {
4822 if (!TryEmitS128AndNotImm(this, node, false)) {
4823 VisitRRR(this, kArm64S128AndNot, node);
4824 }
4825}
4826
4827void InstructionSelectorT::VisitS128And(OpIndex node) {
4828 // AndNot can be used if we negate the immediate input of And.
4829 if (!TryEmitS128AndNotImm(this, node, true)) {
4830 VisitRRR(this, kArm64S128And, node);
4831 }
4832}
4833
4834void InstructionSelectorT::VisitS128Zero(OpIndex node) {
4835 Arm64OperandGeneratorT g(this);
4836 Emit(kArm64S128Const, g.DefineAsRegister(node), g.UseImmediate(0),
4837 g.UseImmediate(0), g.UseImmediate(0), g.UseImmediate(0));
4838}
4839
4840void InstructionSelectorT::VisitI32x4DotI8x16I7x16AddS(OpIndex node) {
4841 Arm64OperandGeneratorT g(this);
4842 InstructionOperand output = CpuFeatures::IsSupported(DOTPROD)
4843 ? g.DefineSameAsInput(node, 2)
4844 : g.DefineAsRegister(node);
4845 Emit(kArm64I32x4DotI8x16AddS, output, g.UseRegister(this->input_at(node, 0)),
4846 g.UseRegister(this->input_at(node, 1)),
4847 g.UseRegister(this->input_at(node, 2)));
4848}
4849
4850void InstructionSelectorT::VisitI8x16BitMask(OpIndex node) {
4851 Arm64OperandGeneratorT g(this);
4852 InstructionOperand temps[1];
4853 size_t temp_count = 0;
4854
4855 if (CpuFeatures::IsSupported(PMULL1Q)) {
4856 temps[0] = g.TempSimd128Register();
4857 temp_count = 1;
4858 }
4859
4860 Emit(kArm64I8x16BitMask, g.DefineAsRegister(node),
4861 g.UseRegister(this->input_at(node, 0)), temp_count, temps);
4862}
4863
4864#define SIMD_VISIT_EXTRACT_LANE(Type, T, Sign, LaneSize) \
4865 void InstructionSelectorT::Visit##Type##ExtractLane##Sign(OpIndex node) { \
4866 VisitRRI(this, \
4867 kArm64##T##ExtractLane##Sign | LaneSizeField::encode(LaneSize), \
4868 node); \
4869 }
4870SIMD_VISIT_EXTRACT_LANE(F64x2, F, , 64)
4871SIMD_VISIT_EXTRACT_LANE(F32x4, F, , 32)
4872SIMD_VISIT_EXTRACT_LANE(F16x8, F, , 16)
4873SIMD_VISIT_EXTRACT_LANE(I64x2, I, , 64)
4874SIMD_VISIT_EXTRACT_LANE(I32x4, I, , 32)
4875SIMD_VISIT_EXTRACT_LANE(I16x8, I, U, 16)
4876SIMD_VISIT_EXTRACT_LANE(I16x8, I, S, 16)
4877SIMD_VISIT_EXTRACT_LANE(I8x16, I, U, 8)
4878SIMD_VISIT_EXTRACT_LANE(I8x16, I, S, 8)
4879#undef SIMD_VISIT_EXTRACT_LANE
4880
4881#define SIMD_VISIT_REPLACE_LANE(Type, T, LaneSize) \
4882 void InstructionSelectorT::Visit##Type##ReplaceLane(OpIndex node) { \
4883 VisitRRIR(this, kArm64##T##ReplaceLane | LaneSizeField::encode(LaneSize), \
4884 node); \
4885 }
4886SIMD_VISIT_REPLACE_LANE(F64x2, F, 64)
4887SIMD_VISIT_REPLACE_LANE(F32x4, F, 32)
4888SIMD_VISIT_REPLACE_LANE(F16x8, F, 16)
4889SIMD_VISIT_REPLACE_LANE(I64x2, I, 64)
4890SIMD_VISIT_REPLACE_LANE(I32x4, I, 32)
4891SIMD_VISIT_REPLACE_LANE(I16x8, I, 16)
4892SIMD_VISIT_REPLACE_LANE(I8x16, I, 8)
4893#undef SIMD_VISIT_REPLACE_LANE
4894
4895#define SIMD_VISIT_UNOP(Name, instruction) \
4896 void InstructionSelectorT::Visit##Name(OpIndex node) { \
4897 VisitRR(this, instruction, node); \
4898 }
4900#undef SIMD_VISIT_UNOP
4901#undef SIMD_UNOP_LIST
4902
4903#define SIMD_VISIT_SHIFT_OP(Name, width) \
4904 void InstructionSelectorT::Visit##Name(OpIndex node) { \
4905 VisitSimdShiftRRR(this, kArm64##Name, node, width); \
4906 }
4908#undef SIMD_VISIT_SHIFT_OP
4909#undef SIMD_SHIFT_OP_LIST
4910
4911#define SIMD_VISIT_BINOP(Name, instruction) \
4912 void InstructionSelectorT::Visit##Name(OpIndex node) { \
4913 VisitRRR(this, instruction, node); \
4914 }
4916#undef SIMD_VISIT_BINOP
4917#undef SIMD_BINOP_LIST
4918
4919#define SIMD_VISIT_BINOP_LANE_SIZE(Name, instruction, LaneSize) \
4920 void InstructionSelectorT::Visit##Name(OpIndex node) { \
4921 VisitRRR(this, instruction | LaneSizeField::encode(LaneSize), node); \
4922 }
4923SIMD_BINOP_LANE_SIZE_LIST(SIMD_VISIT_BINOP_LANE_SIZE)
4924#undef SIMD_VISIT_BINOP_LANE_SIZE
4925#undef SIMD_BINOP_LANE_SIZE_LIST
4926
4927#define SIMD_VISIT_UNOP_LANE_SIZE(Name, instruction, LaneSize) \
4928 void InstructionSelectorT::Visit##Name(OpIndex node) { \
4929 VisitRR(this, instruction | LaneSizeField::encode(LaneSize), node); \
4930 }
4931SIMD_UNOP_LANE_SIZE_LIST(SIMD_VISIT_UNOP_LANE_SIZE)
4932#undef SIMD_VISIT_UNOP_LANE_SIZE
4933#undef SIMD_UNOP_LANE_SIZE_LIST
4934
4935using ShuffleMatcher =
4936 ValueMatcher<S128ImmediateParameter, IrOpcode::kI8x16Shuffle>;
4937using BinopWithShuffleMatcher = BinopMatcher<ShuffleMatcher, ShuffleMatcher,
4938 MachineRepresentation::kSimd128>;
4939
4940namespace {
4941// Struct holding the result of pattern-matching a mul+dup.
4942struct MulWithDup {
4943 OpIndex input; // Node holding the vector elements.
4944 OpIndex dup_node; // Node holding the lane to multiply.
4945 int index;
4946 // Pattern-match is successful if dup_node is set.
4947 explicit operator bool() const { return dup_node.valid(); }
4948};
4949
4950template <int LANES>
4951MulWithDup TryMatchMulWithDup(InstructionSelectorT* selector, OpIndex node) {
4952 // Pattern match:
4953 // f32x4.mul(x, shuffle(x, y, indices)) => f32x4.mul(x, y, laneidx)
4954 // f64x2.mul(x, shuffle(x, y, indices)) => f64x2.mul(x, y, laneidx)
4955 // where shuffle(x, y, indices) = dup(x[laneidx]) or dup(y[laneidx])
4956 // f32x4.mul and f64x2.mul are commutative, so use BinopMatcher.
4957 OpIndex input;
4958 OpIndex dup_node;
4959
4960 int index = 0;
4961#if V8_ENABLE_WEBASSEMBLY
4962 const Simd128BinopOp& mul = selector->Get(node).Cast<Simd128BinopOp>();
4963 const Operation& left = selector->Get(mul.left());
4964 const Operation& right = selector->Get(mul.right());
4965
4966 // TODO(zhin): We can canonicalize first to avoid checking index < LANES.
4967 // e.g. shuffle(x, y, [16, 17, 18, 19...]) => shuffle(y, y, [0, 1, 2,
4968 // 3]...). But doing so can mutate the inputs of the shuffle node without
4969 // updating the shuffle immediates themselves. Fix that before we
4970 // canonicalize here. We don't want CanCover here because in many use cases,
4971 // the shuffle is generated early in the function, but the f32x4.mul happens
4972 // in a loop, which won't cover the shuffle since they are different basic
4973 // blocks.
4974 if (left.Is<Simd128ShuffleOp>() &&
4975 wasm::SimdShuffle::TryMatchSplat<LANES>(
4976 left.Cast<Simd128ShuffleOp>().shuffle, &index)) {
4977 dup_node = left.input(index < LANES ? 0 : 1);
4978 input = mul.right();
4979 } else if (right.Is<Simd128ShuffleOp>() &&
4980 wasm::SimdShuffle::TryMatchSplat<LANES>(
4981 right.Cast<Simd128ShuffleOp>().shuffle, &index)) {
4982 dup_node = right.input(index < LANES ? 0 : 1);
4983 input = mul.left();
4984 }
4985#endif // V8_ENABLE_WEBASSEMBLY
4986
4987 // Canonicalization would get rid of this too.
4988 index %= LANES;
4989
4990 return {input, dup_node, index};
4991}
4992} // namespace
4993
4994void InstructionSelectorT::VisitF16x8Mul(OpIndex node) {
4995 if (MulWithDup result = TryMatchMulWithDup<8>(this, node)) {
4996 Arm64OperandGeneratorT g(this);
4997 Emit(kArm64FMulElement | LaneSizeField::encode(16),
4998 g.DefineAsRegister(node), g.UseRegister(result.input),
4999 g.UseRegister(result.dup_node), g.UseImmediate(result.index));
5000 } else {
5001 return VisitRRR(this, kArm64FMul | LaneSizeField::encode(16), node);
5002 }
5003}
5004
5005void InstructionSelectorT::VisitF32x4Mul(OpIndex node) {
5006 if (MulWithDup result = TryMatchMulWithDup<4>(this, node)) {
5007 Arm64OperandGeneratorT g(this);
5008 Emit(kArm64FMulElement | LaneSizeField::encode(32),
5009 g.DefineAsRegister(node), g.UseRegister(result.input),
5010 g.UseRegister(result.dup_node), g.UseImmediate(result.index));
5011 } else {
5012 return VisitRRR(this, kArm64FMul | LaneSizeField::encode(32), node);
5013 }
5014}
5015
5016void InstructionSelectorT::VisitF64x2Mul(OpIndex node) {
5017 if (MulWithDup result = TryMatchMulWithDup<2>(this, node)) {
5018 Arm64OperandGeneratorT g(this);
5019 Emit(kArm64FMulElement | LaneSizeField::encode(64),
5020 g.DefineAsRegister(node), g.UseRegister(result.input),
5021 g.UseRegister(result.dup_node), g.UseImmediate(result.index));
5022 } else {
5023 return VisitRRR(this, kArm64FMul | LaneSizeField::encode(64), node);
5024 }
5025}
5026
5027void InstructionSelectorT::VisitI64x2Mul(OpIndex node) {
5028 Arm64OperandGeneratorT g(this);
5029 InstructionOperand temps[] = {g.TempSimd128Register()};
5030 Emit(kArm64I64x2Mul, g.DefineAsRegister(node),
5031 g.UseRegister(this->input_at(node, 0)),
5032 g.UseRegister(this->input_at(node, 1)), arraysize(temps), temps);
5033}
5034
5035namespace {
5036
5037// Tries to match either input of a commutative binop to a given Opmask.
5038class SimdBinopMatcherTurboshaft {
5039 public:
5040 SimdBinopMatcherTurboshaft(InstructionSelectorT* selector, OpIndex node)
5041 : selector_(selector), node_(node) {
5042 const Simd128BinopOp& add_op = selector->Get(node).Cast<Simd128BinopOp>();
5043 DCHECK(Simd128BinopOp::IsCommutative(add_op.kind));
5044 input0_ = add_op.left();
5045 input1_ = add_op.right();
5046 }
5047 template <typename OpmaskT>
5048 bool InputMatches() {
5049 if (selector_->Get(input1_).Is<OpmaskT>()) {
5050 std::swap(input0_, input1_);
5051 return true;
5052 }
5053 return selector_->Get(input0_).Is<OpmaskT>();
5054 }
5055 OpIndex matched_input() const { return input0_; }
5056 OpIndex other_input() const { return input1_; }
5057
5058 private:
5059 InstructionSelectorT* selector_;
5060 OpIndex node_;
5061 OpIndex input0_;
5062 OpIndex input1_;
5063};
5064
5065template <typename OpmaskT>
5066bool ShraHelper(InstructionSelectorT* selector, OpIndex node, int lane_size,
5067 InstructionCode shra_code, InstructionCode add_code) {
5068 Arm64OperandGeneratorT g(selector);
5069 SimdBinopMatcherTurboshaft m(selector, node);
5070 if (!m.InputMatches<OpmaskT>() ||
5071 !selector->CanCover(node, m.matched_input())) {
5072 return false;
5073 }
5074 const Simd128ShiftOp& shiftop =
5075 selector->Get(m.matched_input()).Cast<Simd128ShiftOp>();
5076 int64_t constant;
5077 if (!selector->MatchSignedIntegralConstant(shiftop.shift(), &constant)) {
5078 return false;
5079 }
5080
5081 // If shifting by zero, just do the addition
5082 if (constant % lane_size == 0) {
5083 selector->Emit(add_code, g.DefineAsRegister(node),
5084 g.UseRegister(shiftop.input()),
5085 g.UseRegister(m.other_input()));
5086 } else {
5087 selector->Emit(shra_code | LaneSizeField::encode(lane_size),
5088 g.DefineSameAsFirst(node), g.UseRegister(m.other_input()),
5089 g.UseRegister(shiftop.input()),
5090 g.UseImmediate(shiftop.shift()));
5091 }
5092 return true;
5093}
5094
5095template <typename OpmaskT>
5096bool AdalpHelper(InstructionSelectorT* selector, OpIndex node, int lane_size,
5097 InstructionCode adalp_code) {
5098 Arm64OperandGeneratorT g(selector);
5099 SimdBinopMatcherTurboshaft m(selector, node);
5100 if (!m.InputMatches<OpmaskT>() ||
5101 !selector->CanCover(node, m.matched_input())) {
5102 return false;
5103 }
5104 selector->Emit(adalp_code | LaneSizeField::encode(lane_size),
5105 g.DefineSameAsFirst(node), g.UseRegister(m.other_input()),
5106 g.UseRegister(selector->Get(m.matched_input()).input(0)));
5107 return true;
5108}
5109
5110template <typename OpmaskT>
5111bool MlaHelper(InstructionSelectorT* selector, OpIndex node,
5112 InstructionCode mla_code) {
5113 Arm64OperandGeneratorT g(selector);
5114 SimdBinopMatcherTurboshaft m(selector, node);
5115 if (!m.InputMatches<OpmaskT>() ||
5116 !selector->CanCover(node, m.matched_input())) {
5117 return false;
5118 }
5119 const Operation& mul = selector->Get(m.matched_input());
5120 selector->Emit(mla_code, g.DefineSameAsFirst(node),
5121 g.UseRegister(m.other_input()), g.UseRegister(mul.input(0)),
5122 g.UseRegister(mul.input(1)));
5123 return true;
5124}
5125
5126template <Simd128BinopOp::Kind kind>
5127bool SmlalHelper(InstructionSelectorT* selector, OpIndex node, int lane_size,
5128 InstructionCode smlal_code) {
5129 Arm64OperandGeneratorT g(selector);
5130 SimdBinopMatcherTurboshaft m(selector, node);
5131 using OpmaskT = Opmask::Simd128BinopMask::For<kind>;
5132 if (!m.InputMatches<OpmaskT>() ||
5133 !selector->CanCover(node, m.matched_input()))
5134 return false;
5135
5136 const Operation& matched = selector->Get(m.matched_input());
5137 selector->Emit(smlal_code | LaneSizeField::encode(lane_size),
5138 g.DefineSameAsFirst(node), g.UseRegister(m.other_input()),
5139 g.UseRegister(matched.input(0)),
5140 g.UseRegister(matched.input(1)));
5141 return true;
5142}
5143
5144template <typename OpmaskT>
5145bool sha3helper(InstructionSelectorT* selector, OpIndex node,
5146 InstructionCode sha3_code) {
5147 Arm64OperandGeneratorT g(selector);
5148 SimdBinopMatcherTurboshaft m(selector, node);
5149 if (!m.InputMatches<OpmaskT>() ||
5150 !selector->CanCover(node, m.matched_input())) {
5151 return false;
5152 }
5153 const Operation& matched = selector->Get(m.matched_input());
5154 selector->Emit(
5155 sha3_code, g.DefineSameAsFirst(node), g.UseRegister(m.other_input()),
5156 g.UseRegister(matched.input(0)), g.UseRegister(matched.input(1)));
5157 return true;
5158}
5159
5160} // namespace
5161
5162void InstructionSelectorT::VisitS128Xor(OpIndex node) {
5163 Arm64OperandGeneratorT g(this);
5164
5165 if (!CpuFeatures::IsSupported(SHA3)) {
5166 return VisitRRR(this, kArm64S128Xor, node);
5167 }
5168
5169 if (sha3helper<Opmask::kSimd128AndNot>(this, node, kArm64Bcax) ||
5170 sha3helper<Opmask::kSimd128Xor>(this, node, kArm64Eor3))
5171 return;
5172
5173 return VisitRRR(this, kArm64S128Xor, node);
5174}
5175
5176void InstructionSelectorT::VisitI64x2Add(OpIndex node) {
5177 if (ShraHelper<Opmask::kSimd128I64x2ShrS>(
5178 this, node, 64, kArm64Ssra, kArm64IAdd | LaneSizeField::encode(64)) ||
5179 ShraHelper<Opmask::kSimd128I64x2ShrU>(
5180 this, node, 64, kArm64Usra, kArm64IAdd | LaneSizeField::encode(64))) {
5181 return;
5182 }
5183 VisitRRR(this, kArm64IAdd | LaneSizeField::encode(64), node);
5184}
5185
5186void InstructionSelectorT::VisitI8x16Add(OpIndex node) {
5187 if (!ShraHelper<Opmask::kSimd128I8x16ShrS>(
5188 this, node, 8, kArm64Ssra, kArm64IAdd | LaneSizeField::encode(8)) &&
5189 !ShraHelper<Opmask::kSimd128I8x16ShrU>(
5190 this, node, 8, kArm64Usra, kArm64IAdd | LaneSizeField::encode(8))) {
5191 VisitRRR(this, kArm64IAdd | LaneSizeField::encode(8), node);
5192 }
5193}
5194
5195#define VISIT_SIMD_ADD(Type, PairwiseType, LaneSize) \
5196 void InstructionSelectorT::Visit##Type##Add(OpIndex node) { \
5197 /* Select Mla(z, x, y) for Add(x, Mul(y, z)). */ \
5198 if (MlaHelper<Opmask::kSimd128##Type##Mul>( \
5199 this, node, kArm64Mla | LaneSizeField::encode(LaneSize))) { \
5200 return; \
5201 } \
5202 /* Select S/Uadalp(x, y) for Add(x, ExtAddPairwise(y)). */ \
5203 if (AdalpHelper<Opmask::kSimd128##Type##ExtAddPairwise##PairwiseType##S>( \
5204 this, node, LaneSize, kArm64Sadalp) || \
5205 AdalpHelper<Opmask::kSimd128##Type##ExtAddPairwise##PairwiseType##U>( \
5206 this, node, LaneSize, kArm64Uadalp)) { \
5207 return; \
5208 } \
5209 /* Select S/Usra(x, y) for Add(x, ShiftRight(y, imm)). */ \
5210 if (ShraHelper<Opmask::kSimd128##Type##ShrS>( \
5211 this, node, LaneSize, kArm64Ssra, \
5212 kArm64IAdd | LaneSizeField::encode(LaneSize)) || \
5213 ShraHelper<Opmask::kSimd128##Type##ShrU>( \
5214 this, node, LaneSize, kArm64Usra, \
5215 kArm64IAdd | LaneSizeField::encode(LaneSize))) { \
5216 return; \
5217 } \
5218 /* Select Smlal/Umlal(x, y, z) for Add(x, ExtMulLow(y, z)) and \
5219 * Smlal2/Umlal2(x, y, z) for Add(x, ExtMulHigh(y, z)). */ \
5220 if (SmlalHelper< \
5221 Simd128BinopOp::Kind::k##Type##ExtMulLow##PairwiseType##S>( \
5222 this, node, LaneSize, kArm64Smlal) || \
5223 SmlalHelper< \
5224 Simd128BinopOp::Kind::k##Type##ExtMulHigh##PairwiseType##S>( \
5225 this, node, LaneSize, kArm64Smlal2) || \
5226 SmlalHelper< \
5227 Simd128BinopOp::Kind::k##Type##ExtMulLow##PairwiseType##U>( \
5228 this, node, LaneSize, kArm64Umlal) || \
5229 SmlalHelper< \
5230 Simd128BinopOp::Kind::k##Type##ExtMulHigh##PairwiseType##U>( \
5231 this, node, LaneSize, kArm64Umlal2)) { \
5232 return; \
5233 } \
5234 VisitRRR(this, kArm64IAdd | LaneSizeField::encode(LaneSize), node); \
5235 }
5236
5237VISIT_SIMD_ADD(I32x4, I16x8, 32)
5238VISIT_SIMD_ADD(I16x8, I8x16, 16)
5239#undef VISIT_SIMD_ADD
5240
5241#define VISIT_SIMD_SUB(Type, LaneSize) \
5242 void InstructionSelectorT::Visit##Type##Sub(OpIndex node) { \
5243 Arm64OperandGeneratorT g(this); \
5244 const Simd128BinopOp& sub = Get(node).Cast<Simd128BinopOp>(); \
5245 const Operation& right = Get(sub.right()); \
5246 /* Select Mls(z, x, y) for Sub(z, Mul(x, y)). */ \
5247 if (right.Is<Opmask::kSimd128##Type##Mul>() && \
5248 CanCover(node, sub.right())) { \
5249 Emit(kArm64Mls | LaneSizeField::encode(LaneSize), \
5250 g.DefineSameAsFirst(node), g.UseRegister(sub.left()), \
5251 g.UseRegister(right.input(0)), g.UseRegister(right.input(1))); \
5252 return; \
5253 } \
5254 VisitRRR(this, kArm64ISub | LaneSizeField::encode(LaneSize), node); \
5255 }
5256
5257VISIT_SIMD_SUB(I32x4, 32)
5258VISIT_SIMD_SUB(I16x8, 16)
5259#undef VISIT_SIMD_SUB
5260
5261namespace {
5262void VisitSimdReduce(InstructionSelectorT* selector, OpIndex node,
5263 InstructionCode opcode) {
5264 Arm64OperandGeneratorT g(selector);
5265 selector->Emit(opcode, g.DefineAsRegister(node),
5266 g.UseRegister(selector->Get(node).input(0)));
5267}
5268
5269} // namespace
5270
5271#define VISIT_SIMD_REDUCE(Type, Opcode) \
5272 void InstructionSelectorT::Visit##Type##AddReduce(OpIndex node) { \
5273 VisitSimdReduce(this, node, Opcode); \
5274 }
5275
5276VISIT_SIMD_REDUCE(I8x16, kArm64I8x16Addv)
5277VISIT_SIMD_REDUCE(I16x8, kArm64I16x8Addv)
5278VISIT_SIMD_REDUCE(I32x4, kArm64I32x4Addv)
5279VISIT_SIMD_REDUCE(I64x2, kArm64I64x2AddPair)
5280VISIT_SIMD_REDUCE(F32x4, kArm64F32x4AddReducePairwise)
5281VISIT_SIMD_REDUCE(F64x2, kArm64F64x2AddPair)
5282#undef VISIT_SIMD_REDUCE
5283
5284namespace {
5285bool isSimdZero(InstructionSelectorT* selector, OpIndex node) {
5286 const Operation& op = selector->Get(node);
5287 if (auto constant = op.TryCast<Simd128ConstantOp>()) {
5288 return constant->IsZero();
5289 }
5290 return false;
5291}
5292
5293} // namespace
5294
5295#define VISIT_SIMD_CM(Type, T, CmOp, CmOpposite, LaneSize) \
5296 void InstructionSelectorT::Visit##Type##CmOp(OpIndex node) { \
5297 Arm64OperandGeneratorT g(this); \
5298 OpIndex left = this->input_at(node, 0); \
5299 OpIndex right = this->input_at(node, 1); \
5300 if (isSimdZero(this, left)) { \
5301 Emit(kArm64##T##CmOpposite | LaneSizeField::encode(LaneSize), \
5302 g.DefineAsRegister(node), g.UseRegister(right)); \
5303 return; \
5304 } else if (isSimdZero(this, right)) { \
5305 Emit(kArm64##T##CmOp | LaneSizeField::encode(LaneSize), \
5306 g.DefineAsRegister(node), g.UseRegister(left)); \
5307 return; \
5308 } \
5309 VisitRRR(this, kArm64##T##CmOp | LaneSizeField::encode(LaneSize), node); \
5310 }
5311
5312VISIT_SIMD_CM(F64x2, F, Eq, Eq, 64)
5313VISIT_SIMD_CM(F64x2, F, Ne, Ne, 64)
5314VISIT_SIMD_CM(F64x2, F, Lt, Gt, 64)
5315VISIT_SIMD_CM(F64x2, F, Le, Ge, 64)
5316VISIT_SIMD_CM(F32x4, F, Eq, Eq, 32)
5317VISIT_SIMD_CM(F32x4, F, Ne, Ne, 32)
5318VISIT_SIMD_CM(F32x4, F, Lt, Gt, 32)
5319VISIT_SIMD_CM(F32x4, F, Le, Ge, 32)
5320VISIT_SIMD_CM(F16x8, F, Eq, Eq, 16)
5321VISIT_SIMD_CM(F16x8, F, Ne, Ne, 16)
5322VISIT_SIMD_CM(F16x8, F, Lt, Gt, 16)
5323VISIT_SIMD_CM(F16x8, F, Le, Ge, 16)
5324
5325VISIT_SIMD_CM(I64x2, I, Eq, Eq, 64)
5326VISIT_SIMD_CM(I64x2, I, Ne, Ne, 64)
5327VISIT_SIMD_CM(I64x2, I, GtS, LtS, 64)
5328VISIT_SIMD_CM(I64x2, I, GeS, LeS, 64)
5329VISIT_SIMD_CM(I32x4, I, Eq, Eq, 32)
5330VISIT_SIMD_CM(I32x4, I, Ne, Ne, 32)
5331VISIT_SIMD_CM(I32x4, I, GtS, LtS, 32)
5332VISIT_SIMD_CM(I32x4, I, GeS, LeS, 32)
5333VISIT_SIMD_CM(I16x8, I, Eq, Eq, 16)
5334VISIT_SIMD_CM(I16x8, I, Ne, Ne, 16)
5335VISIT_SIMD_CM(I16x8, I, GtS, LtS, 16)
5336VISIT_SIMD_CM(I16x8, I, GeS, LeS, 16)
5337VISIT_SIMD_CM(I8x16, I, Eq, Eq, 8)
5338VISIT_SIMD_CM(I8x16, I, Ne, Ne, 8)
5339VISIT_SIMD_CM(I8x16, I, GtS, LtS, 8)
5340VISIT_SIMD_CM(I8x16, I, GeS, LeS, 8)
5341#undef VISIT_SIMD_CM
5342
5343void InstructionSelectorT::VisitS128Select(OpIndex node) {
5344 Arm64OperandGeneratorT g(this);
5345 Emit(kArm64S128Select, g.DefineSameAsFirst(node),
5346 g.UseRegister(this->input_at(node, 0)),
5347 g.UseRegister(this->input_at(node, 1)),
5348 g.UseRegister(this->input_at(node, 2)));
5349}
5350
5351void InstructionSelectorT::VisitI8x16RelaxedLaneSelect(OpIndex node) {
5352 VisitS128Select(node);
5353}
5354
5355void InstructionSelectorT::VisitI16x8RelaxedLaneSelect(OpIndex node) {
5356 VisitS128Select(node);
5357}
5358
5359void InstructionSelectorT::VisitI32x4RelaxedLaneSelect(OpIndex node) {
5360 VisitS128Select(node);
5361}
5362
5363void InstructionSelectorT::VisitI64x2RelaxedLaneSelect(OpIndex node) {
5364 VisitS128Select(node);
5365}
5366
5367#define VISIT_SIMD_QFMOP(op) \
5368 void InstructionSelectorT::Visit##op(OpIndex node) { \
5369 Arm64OperandGeneratorT g(this); \
5370 Emit(kArm64##op, g.DefineSameAsInput(node, 2), \
5371 g.UseRegister(this->input_at(node, 0)), \
5372 g.UseRegister(this->input_at(node, 1)), \
5373 g.UseRegister(this->input_at(node, 2))); \
5374 }
5375VISIT_SIMD_QFMOP(F64x2Qfma)
5376VISIT_SIMD_QFMOP(F64x2Qfms)
5377VISIT_SIMD_QFMOP(F32x4Qfma)
5378VISIT_SIMD_QFMOP(F32x4Qfms)
5379VISIT_SIMD_QFMOP(F16x8Qfma)
5380VISIT_SIMD_QFMOP(F16x8Qfms)
5381#undef VISIT_SIMD_QFMOP
5382
5383namespace {
5384
5385void ArrangeShuffleTable(Arm64OperandGeneratorT* g, OpIndex input0,
5386 OpIndex input1, InstructionOperand* src0,
5387 InstructionOperand* src1) {
5388 if (input0 == input1) {
5389 // Unary, any q-register can be the table.
5390 *src0 = *src1 = g->UseRegister(input0);
5391 } else {
5392 // Binary, table registers must be consecutive.
5393 *src0 = g->UseFixed(input0, fp_fixed1);
5394 *src1 = g->UseFixed(input1, fp_fixed2);
5395 }
5396}
5397
5398using CanonicalShuffle = wasm::SimdShuffle::CanonicalShuffle;
5399std::optional<ArchOpcode> TryMapCanonicalShuffleToArch(
5400 CanonicalShuffle shuffle) {
5401 using CanonicalToArch = std::pair<CanonicalShuffle, ArchOpcode>;
5402 constexpr static auto arch_shuffles = std::to_array<CanonicalToArch>({
5403 {CanonicalShuffle::kS64x2Even, kArm64S64x2UnzipLeft},
5404 {CanonicalShuffle::kS64x2Odd, kArm64S64x2UnzipRight},
5405 {CanonicalShuffle::kS64x2ReverseBytes, kArm64S8x8Reverse},
5406 {CanonicalShuffle::kS32x4Even, kArm64S32x4UnzipLeft},
5407 {CanonicalShuffle::kS32x4Odd, kArm64S32x4UnzipRight},
5408 {CanonicalShuffle::kS32x4InterleaveLowHalves, kArm64S32x4ZipLeft},
5409 {CanonicalShuffle::kS32x4InterleaveHighHalves, kArm64S32x4ZipRight},
5410 {CanonicalShuffle::kS32x4ReverseBytes, kArm64S8x4Reverse},
5411 {CanonicalShuffle::kS32x4Reverse, kArm64S32x4Reverse},
5412 {CanonicalShuffle::kS32x2Reverse, kArm64S32x2Reverse},
5413 {CanonicalShuffle::kS32x4TransposeEven, kArm64S32x4TransposeLeft},
5414 {CanonicalShuffle::kS32x4TransposeOdd, kArm64S32x4TransposeRight},
5415 {CanonicalShuffle::kS16x8Even, kArm64S16x8UnzipLeft},
5416 {CanonicalShuffle::kS16x8Odd, kArm64S16x8UnzipRight},
5417 {CanonicalShuffle::kS16x8InterleaveLowHalves, kArm64S16x8ZipLeft},
5418 {CanonicalShuffle::kS16x8InterleaveHighHalves, kArm64S16x8ZipRight},
5419 {CanonicalShuffle::kS16x2Reverse, kArm64S16x2Reverse},
5420 {CanonicalShuffle::kS16x4Reverse, kArm64S16x4Reverse},
5421 {CanonicalShuffle::kS16x8ReverseBytes, kArm64S8x2Reverse},
5422 {CanonicalShuffle::kS16x8TransposeEven, kArm64S16x8TransposeLeft},
5423 {CanonicalShuffle::kS16x8TransposeOdd, kArm64S16x8TransposeRight},
5424 {CanonicalShuffle::kS8x16Even, kArm64S8x16UnzipLeft},
5425 {CanonicalShuffle::kS8x16Odd, kArm64S8x16UnzipRight},
5426 {CanonicalShuffle::kS8x16InterleaveLowHalves, kArm64S8x16ZipLeft},
5427 {CanonicalShuffle::kS8x16InterleaveHighHalves, kArm64S8x16ZipRight},
5428 {CanonicalShuffle::kS8x16TransposeEven, kArm64S8x16TransposeLeft},
5429 {CanonicalShuffle::kS8x16TransposeOdd, kArm64S8x16TransposeRight},
5430 });
5431
5432 for (auto& [canonical, arch_opcode] : arch_shuffles) {
5433 if (canonical == shuffle) {
5434 return arch_opcode;
5435 }
5436 }
5437 return {};
5438}
5439} // namespace
5440
5441void InstructionSelectorT::VisitI8x2Shuffle(OpIndex node) {
5442 Arm64OperandGeneratorT g(this);
5443 auto view = this->simd_shuffle_view(node);
5444 constexpr size_t shuffle_bytes = 2;
5445 OpIndex input0 = view.input(0);
5446 OpIndex input1 = view.input(1);
5447 std::array<uint8_t, shuffle_bytes> shuffle;
5448 std::copy(view.data(), view.data() + shuffle_bytes, shuffle.begin());
5449
5450 uint8_t shuffle16x1;
5451 if (wasm::SimdShuffle::TryMatch16x1Shuffle(shuffle.data(), &shuffle16x1)) {
5452 Emit(kArm64S16x1Shuffle, g.DefineAsRegister(node), g.UseRegister(input0),
5453 g.UseRegister(input1), g.UseImmediate(shuffle16x1));
5454 } else {
5455 Emit(kArm64S8x2Shuffle, g.DefineAsRegister(node), g.UseRegister(input0),
5456 g.UseRegister(input1),
5457 g.UseImmediate(wasm::SimdShuffle::Pack2Lanes(shuffle)));
5458 }
5459}
5460
5461void InstructionSelectorT::VisitI8x4Shuffle(OpIndex node) {
5462 Arm64OperandGeneratorT g(this);
5463 auto view = this->simd_shuffle_view(node);
5464 OpIndex input0 = view.input(0);
5465 OpIndex input1 = view.input(1);
5466 constexpr size_t shuffle_bytes = 4;
5467 std::array<uint8_t, shuffle_bytes> shuffle;
5468 std::copy(view.data(), view.data() + shuffle_bytes, shuffle.begin());
5469 std::array<uint8_t, 2> shuffle16x2;
5470 uint8_t shuffle32x1;
5471
5472 if (wasm::SimdShuffle::TryMatch32x1Shuffle(shuffle.data(), &shuffle32x1)) {
5473 Emit(kArm64S32x1Shuffle, g.DefineAsRegister(node), g.UseRegister(input0),
5474 g.UseRegister(input1), g.UseImmediate(shuffle32x1));
5475 } else if (wasm::SimdShuffle::TryMatch16x2Shuffle(shuffle.data(),
5476 shuffle16x2.data())) {
5477 Emit(kArm64S16x2Shuffle, g.DefineAsRegister(node), g.UseRegister(input0),
5478 g.UseRegister(input1),
5479 g.UseImmediate(wasm::SimdShuffle::Pack2Lanes(shuffle16x2)));
5480 } else {
5481 InstructionOperand src0, src1;
5482 ArrangeShuffleTable(&g, input0, input1, &src0, &src1);
5483 Emit(kArm64I8x16Shuffle, g.DefineAsRegister(node), src0, src1,
5484 g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(&shuffle[0])),
5485 g.UseImmediate(0), g.UseImmediate(0), g.UseImmediate(0));
5486 }
5487}
5488
5489void InstructionSelectorT::VisitI8x8Shuffle(OpIndex node) {
5490 Arm64OperandGeneratorT g(this);
5491 auto view = this->simd_shuffle_view(node);
5492 OpIndex input0 = view.input(0);
5493 OpIndex input1 = view.input(1);
5494 constexpr size_t shuffle_bytes = 8;
5495 std::array<uint8_t, shuffle_bytes> shuffle;
5496 std::copy(view.data(), view.data() + shuffle_bytes, shuffle.begin());
5497 std::array<uint8_t, 2> shuffle32x2;
5498 uint8_t shuffle64x1;
5499 if (wasm::SimdShuffle::TryMatch64x1Shuffle(shuffle.data(), &shuffle64x1)) {
5500 Emit(kArm64S64x1Shuffle, g.DefineAsRegister(node), g.UseRegister(input0),
5501 g.UseRegister(input1), g.UseImmediate(shuffle64x1));
5502 } else if (wasm::SimdShuffle::TryMatch32x2Shuffle(shuffle.data(),
5503 shuffle32x2.data())) {
5504 Emit(kArm64S32x2Shuffle, g.DefineAsRegister(node), g.UseRegister(input0),
5505 g.UseRegister(input1),
5506 g.UseImmediate(wasm::SimdShuffle::Pack2Lanes(shuffle32x2)));
5507 } else {
5508 // Code generator uses vtbl, arrange sources to form a valid lookup table.
5509 InstructionOperand src0, src1;
5510 ArrangeShuffleTable(&g, input0, input1, &src0, &src1);
5511 Emit(kArm64I8x16Shuffle, g.DefineAsRegister(node), src0, src1,
5512 g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(&shuffle[0])),
5513 g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(&shuffle[4])),
5514 g.UseImmediate(0), g.UseImmediate(0));
5515 }
5516}
5517
5518void InstructionSelectorT::VisitI8x16Shuffle(OpIndex node) {
5519 std::array<uint8_t, kSimd128Size> shuffle;
5520 bool is_swizzle;
5521 auto view = this->simd_shuffle_view(node);
5522 CanonicalizeShuffle(view, shuffle.data(), &is_swizzle);
5523 OpIndex input0 = view.input(0);
5524 OpIndex input1 = view.input(1);
5525 Arm64OperandGeneratorT g(this);
5526
5527 const CanonicalShuffle canonical =
5528 wasm::SimdShuffle::TryMatchCanonical(shuffle);
5529
5530 if (auto arch_opcode = TryMapCanonicalShuffleToArch(canonical);
5531 arch_opcode.has_value()) {
5532 Emit(arch_opcode.value(), g.DefineAsRegister(node), g.UseRegister(input0),
5533 g.UseRegister(input1));
5534 return;
5535 }
5536
5537 uint8_t offset;
5538 if (wasm::SimdShuffle::TryMatchConcat(shuffle.data(), &offset)) {
5539 Emit(kArm64S8x16Concat, g.DefineAsRegister(node), g.UseRegister(input0),
5540 g.UseRegister(input1), g.UseImmediate(offset));
5541 return;
5542 }
5543 std::array<uint8_t, 2> shuffle64x2;
5544 if (wasm::SimdShuffle::TryMatch64x2Shuffle(shuffle.data(),
5545 shuffle64x2.data())) {
5546 Emit(kArm64S64x2Shuffle, g.DefineAsRegister(node), g.UseRegister(input0),
5547 g.UseRegister(input1),
5548 g.UseImmediate(wasm::SimdShuffle::Pack2Lanes(shuffle64x2)));
5549 return;
5550 }
5551 uint8_t shuffle32x4[4];
5552 int index = 0;
5553 uint8_t from = 0;
5554 uint8_t to = 0;
5555 if (wasm::SimdShuffle::TryMatch32x4Shuffle(shuffle.data(), shuffle32x4)) {
5556 if (wasm::SimdShuffle::TryMatchSplat<4>(shuffle.data(), &index)) {
5557 DCHECK_GT(4, index);
5558 Emit(kArm64S128Dup, g.DefineAsRegister(node), g.UseRegister(input0),
5559 g.UseImmediate(4), g.UseImmediate(index % 4));
5560 } else if (wasm::SimdShuffle::TryMatch32x4OneLaneSwizzle(shuffle32x4, &from,
5561 &to)) {
5562 Emit(kArm64S32x4OneLaneSwizzle, g.DefineAsRegister(node),
5563 g.UseRegister(input0), g.TempImmediate(from), g.TempImmediate(to));
5564 } else if (canonical == CanonicalShuffle::kIdentity) {
5565 // Bypass normal shuffle code generation in this case.
5566 // EmitIdentity
5567 MarkAsUsed(input0);
5568 MarkAsDefined(node);
5569 SetRename(node, input0);
5570 } else {
5571 Emit(kArm64S32x4Shuffle, g.DefineAsRegister(node), g.UseRegister(input0),
5572 g.UseRegister(input1),
5573 g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle32x4)));
5574 }
5575 return;
5576 }
5577 if (wasm::SimdShuffle::TryMatchSplat<8>(shuffle.data(), &index)) {
5578 DCHECK_GT(8, index);
5579 Emit(kArm64S128Dup, g.DefineAsRegister(node), g.UseRegister(input0),
5580 g.UseImmediate(8), g.UseImmediate(index % 8));
5581 return;
5582 }
5583 if (wasm::SimdShuffle::TryMatchSplat<16>(shuffle.data(), &index)) {
5584 DCHECK_GT(16, index);
5585 Emit(kArm64S128Dup, g.DefineAsRegister(node), g.UseRegister(input0),
5586 g.UseImmediate(16), g.UseImmediate(index % 16));
5587 return;
5588 }
5589 // Code generator uses vtbl, arrange sources to form a valid lookup table.
5590 InstructionOperand src0, src1;
5591 ArrangeShuffleTable(&g, input0, input1, &src0, &src1);
5592 Emit(kArm64I8x16Shuffle, g.DefineAsRegister(node), src0, src1,
5593 g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(&shuffle[0])),
5594 g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(&shuffle[4])),
5595 g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(&shuffle[8])),
5596 g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(&shuffle[12])));
5597}
5598
5599void InstructionSelectorT::VisitSetStackPointer(OpIndex node) {
5600 OperandGenerator g(this);
5601 auto input = g.UseRegister(this->input_at(node, 0));
5602 Emit(kArchSetStackPointer, 0, nullptr, 1, &input);
5603}
5604
5605#endif // V8_ENABLE_WEBASSEMBLY
5606
5607void InstructionSelectorT::VisitSignExtendWord8ToInt32(OpIndex node) {
5608 VisitRR(this, kArm64Sxtb32, node);
5609}
5610
5611void InstructionSelectorT::VisitSignExtendWord16ToInt32(OpIndex node) {
5612 VisitRR(this, kArm64Sxth32, node);
5613}
5614
5615void InstructionSelectorT::VisitSignExtendWord8ToInt64(OpIndex node) {
5616 VisitRR(this, kArm64Sxtb, node);
5617}
5618
5619void InstructionSelectorT::VisitSignExtendWord16ToInt64(OpIndex node) {
5620 VisitRR(this, kArm64Sxth, node);
5621}
5622
5623void InstructionSelectorT::VisitSignExtendWord32ToInt64(OpIndex node) {
5624 VisitRR(this, kArm64Sxtw, node);
5625}
5626
5627#if V8_ENABLE_WEBASSEMBLY
5628namespace {
5629void VisitPminOrPmax(InstructionSelectorT* selector, ArchOpcode opcode,
5630 OpIndex node) {
5631 Arm64OperandGeneratorT g(selector);
5632 // Need all unique registers because we first compare the two inputs, then
5633 // we need the inputs to remain unchanged for the bitselect later.
5634 selector->Emit(opcode, g.DefineAsRegister(node),
5635 g.UseUniqueRegister(selector->input_at(node, 0)),
5636 g.UseUniqueRegister(selector->input_at(node, 1)));
5637}
5638} // namespace
5639
5640void InstructionSelectorT::VisitF16x8Pmin(OpIndex node) {
5641 VisitPminOrPmax(this, kArm64F16x8Pmin, node);
5642}
5643
5644void InstructionSelectorT::VisitF16x8Pmax(OpIndex node) {
5645 VisitPminOrPmax(this, kArm64F16x8Pmax, node);
5646}
5647
5648void InstructionSelectorT::VisitF32x4Pmin(OpIndex node) {
5649 VisitPminOrPmax(this, kArm64F32x4Pmin, node);
5650}
5651
5652void InstructionSelectorT::VisitF32x4Pmax(OpIndex node) {
5653 VisitPminOrPmax(this, kArm64F32x4Pmax, node);
5654}
5655
5656void InstructionSelectorT::VisitF64x2Pmin(OpIndex node) {
5657 VisitPminOrPmax(this, kArm64F64x2Pmin, node);
5658}
5659
5660void InstructionSelectorT::VisitF64x2Pmax(OpIndex node) {
5661 VisitPminOrPmax(this, kArm64F64x2Pmax, node);
5662}
5663
5664namespace {
5665void VisitSignExtendLong(InstructionSelectorT* selector, ArchOpcode opcode,
5666 OpIndex node, int lane_size) {
5667 InstructionCode code = opcode;
5668 code |= LaneSizeField::encode(lane_size);
5669 VisitRR(selector, code, node);
5670}
5671} // namespace
5672
5673void InstructionSelectorT::VisitI64x2SConvertI32x4Low(OpIndex node) {
5674 VisitSignExtendLong(this, kArm64Sxtl, node, 64);
5675}
5676
5677void InstructionSelectorT::VisitI64x2SConvertI32x4High(OpIndex node) {
5678 VisitSignExtendLong(this, kArm64Sxtl2, node, 64);
5679}
5680
5681void InstructionSelectorT::VisitI64x2UConvertI32x4Low(OpIndex node) {
5682 VisitSignExtendLong(this, kArm64Uxtl, node, 64);
5683}
5684
5685void InstructionSelectorT::VisitI64x2UConvertI32x4High(OpIndex node) {
5686 VisitSignExtendLong(this, kArm64Uxtl2, node, 64);
5687}
5688
5689void InstructionSelectorT::VisitI32x4SConvertI16x8Low(OpIndex node) {
5690 VisitSignExtendLong(this, kArm64Sxtl, node, 32);
5691}
5692
5693void InstructionSelectorT::VisitI32x4SConvertI16x8High(OpIndex node) {
5694 VisitSignExtendLong(this, kArm64Sxtl2, node, 32);
5695}
5696
5697void InstructionSelectorT::VisitI32x4UConvertI16x8Low(OpIndex node) {
5698 VisitSignExtendLong(this, kArm64Uxtl, node, 32);
5699}
5700
5701void InstructionSelectorT::VisitI32x4UConvertI16x8High(OpIndex node) {
5702 VisitSignExtendLong(this, kArm64Uxtl2, node, 32);
5703}
5704
5705void InstructionSelectorT::VisitI16x8SConvertI8x16Low(OpIndex node) {
5706 VisitSignExtendLong(this, kArm64Sxtl, node, 16);
5707}
5708
5709void InstructionSelectorT::VisitI16x8SConvertI8x16High(OpIndex node) {
5710 VisitSignExtendLong(this, kArm64Sxtl2, node, 16);
5711}
5712
5713void InstructionSelectorT::VisitI16x8UConvertI8x16Low(OpIndex node) {
5714 VisitSignExtendLong(this, kArm64Uxtl, node, 16);
5715}
5716
5717void InstructionSelectorT::VisitI16x8UConvertI8x16High(OpIndex node) {
5718 VisitSignExtendLong(this, kArm64Uxtl2, node, 16);
5719}
5720
5721void InstructionSelectorT::VisitI8x16Popcnt(OpIndex node) {
5722 InstructionCode code = kArm64Cnt;
5723 code |= LaneSizeField::encode(8);
5724 VisitRR(this, code, node);
5725}
5726
5727#ifdef V8_ENABLE_WASM_DEINTERLEAVED_MEM_OPS
5728
5729void InstructionSelectorT::VisitSimd128LoadPairDeinterleave(OpIndex node) {
5730 const auto& load = this->Get(node).Cast<Simd128LoadPairDeinterleaveOp>();
5731 Arm64OperandGeneratorT g(this);
5732 InstructionCode opcode = kArm64S128LoadPairDeinterleave;
5733 opcode |= LaneSizeField::encode(load.lane_size());
5734 if (load.load_kind.with_trap_handler) {
5735 opcode |= AccessModeField::encode(kMemoryAccessProtectedMemOutOfBounds);
5736 }
5737 OptionalOpIndex first = FindProjection(node, 0);
5738 OptionalOpIndex second = FindProjection(node, 1);
5739
5740 InstructionOperand outputs[] = {
5741 g.DefineAsFixed(first.value(), fp_fixed1),
5742 g.DefineAsFixed(second.value(), fp_fixed2),
5743 };
5744
5745 InstructionOperand inputs[] = {
5746 EmitAddBeforeLoadOrStore(this, node, &opcode),
5747 g.TempImmediate(0),
5748 };
5749 Emit(opcode, arraysize(outputs), outputs, arraysize(inputs), inputs);
5750}
5751
5752#endif // V8_ENABLE_WASM_DEINTERLEAVED_MEM_OPS
5753
5754#endif // V8_ENABLE_WEBASSEMBLY
5755
5756void InstructionSelectorT::AddOutputToSelectContinuation(OperandGenerator* g,
5757 int first_input_index,
5758 OpIndex node) {
5759 continuation_outputs_.push_back(g->DefineAsRegister(node));
5760}
5761
5762// static
5763MachineOperatorBuilder::Flags
5764InstructionSelector::SupportedMachineOperatorFlags() {
5765 auto flags = MachineOperatorBuilder::kFloat32RoundDown |
5766 MachineOperatorBuilder::kFloat64RoundDown |
5767 MachineOperatorBuilder::kFloat32RoundUp |
5768 MachineOperatorBuilder::kFloat64RoundUp |
5769 MachineOperatorBuilder::kFloat32RoundTruncate |
5770 MachineOperatorBuilder::kFloat64RoundTruncate |
5771 MachineOperatorBuilder::kFloat64RoundTiesAway |
5772 MachineOperatorBuilder::kFloat32RoundTiesEven |
5773 MachineOperatorBuilder::kFloat64RoundTiesEven |
5774 MachineOperatorBuilder::kWord32Popcnt |
5775 MachineOperatorBuilder::kWord64Popcnt |
5776 MachineOperatorBuilder::kWord32ShiftIsSafe |
5777 MachineOperatorBuilder::kInt32DivIsSafe |
5778 MachineOperatorBuilder::kUint32DivIsSafe |
5779 MachineOperatorBuilder::kWord32ReverseBits |
5780 MachineOperatorBuilder::kWord64ReverseBits |
5781 MachineOperatorBuilder::kSatConversionIsSafe |
5782 MachineOperatorBuilder::kFloat32Select |
5783 MachineOperatorBuilder::kFloat64Select |
5784 MachineOperatorBuilder::kWord32Select |
5785 MachineOperatorBuilder::kWord64Select |
5786 MachineOperatorBuilder::kLoadStorePairs;
5787 if (CpuFeatures::IsSupported(FP16)) {
5788 flags |= MachineOperatorBuilder::kFloat16 |
5789 MachineOperatorBuilder::kFloat16RawBitsConversion;
5790 }
5791 return flags;
5792}
5793
5794// static
5795MachineOperatorBuilder::AlignmentRequirements
5796InstructionSelector::AlignmentRequirements() {
5797 return MachineOperatorBuilder::AlignmentRequirements::
5798 FullUnalignedAccessSupport();
5799}
5800
5801} // namespace compiler
5802} // namespace internal
5803} // namespace v8
#define V(Name)
#define F(name, str)
Builtins::Kind kind
Definition builtins.cc:40
static constexpr U encode(T value)
Definition bit-field.h:55
static constexpr bool IsImmLSScaled(int64_t offset, unsigned size_log2)
static constexpr bool IsImmAddSub(int64_t immediate)
static constexpr bool IsImmConditionalCompare(int64_t immediate)
static constexpr bool IsImmLSUnscaled(int64_t offset)
static bool IsImmLogical(uint64_t value, unsigned width, unsigned *n, unsigned *imm_s, unsigned *imm_r)
RootsTable & roots_table()
Definition isolate.h:1250
constexpr bool IsUnsigned() const
constexpr MachineSemantic semantic() const
constexpr MachineRepresentation representation() const
Tagged_t ReadOnlyRootPtr(RootIndex index)
bool IsRootHandle(IndirectHandle< T > handle, RootIndex *index) const
Definition roots-inl.h:65
static constexpr bool IsReadOnly(RootIndex root_index)
Definition roots.h:623
T * New(Args &&... args)
Definition zone.h:114
InstructionOperand UseRegisterAtEndOrImmediateZero(OpIndex node)
std::optional< int64_t > GetOptionalIntegerConstant(OpIndex operation)
bool CanBeImmediate(int64_t value, ImmediateMode mode)
bool CanBeLoadStoreShiftImmediate(OpIndex node, MachineRepresentation rep)
InstructionOperand UseImmediateOrTemp(OpIndex node, int32_t value)
bool CanBeImmediate(OpIndex node, ImmediateMode mode)
InstructionOperand UseOperand(OpIndex node, ImmediateMode mode)
std::array< ConditionalCompare, kMaxCompareChainSize > compare_chain_t
Instruction * Emit(InstructionCode opcode, InstructionOperand output, size_t temp_count=0, InstructionOperand *temps=nullptr)
bool CanCover(turboshaft::OpIndex user, turboshaft::OpIndex node) const
Instruction * EmitWithContinuation(InstructionCode opcode, FlagsContinuation *cont)
bool CanAddressRelativeToRootsRegister(const ExternalReference &reference) const
InstructionOperand DefineAsRegister(turboshaft::OpIndex node)
InstructionOperand UseRegisterAtEnd(turboshaft::OpIndex node)
InstructionOperand UseRegister(turboshaft::OpIndex node)
CompareChainNode(OpIndex n, CompareChainNode *l, CompareChainNode *r)
void AddConditionalCompare(RegisterRepresentation rep, FlagsCondition ccmp_condition, FlagsCondition default_flags, OpIndex ccmp_lhs, OpIndex ccmp_rhs)
InstructionCode GetOpcode(RegisterRepresentation rep) const
void InitialCompare(OpIndex op, OpIndex l, OpIndex r, RegisterRepresentation rep)
static constexpr MemoryRepresentation AnyTagged()
static constexpr MemoryRepresentation Float16()
static constexpr MemoryRepresentation Int32()
static constexpr MemoryRepresentation Int64()
static constexpr MemoryRepresentation Int16()
static constexpr MemoryRepresentation Float32()
static constexpr MemoryRepresentation Float64()
const Operation & Get(V< AnyOrNone > op_idx) const
const underlying_operation_t< Op > * TryCast(V< AnyOrNone > op_idx) const
const underlying_operation_t< Op > & Cast(V< AnyOrNone > op_idx) const
bool MatchSignedIntegralConstant(V< Any > matched, int64_t *constant) const
bool MatchUnsignedIntegralConstant(V< Any > matched, uint64_t *constant) const
constexpr RegisterRepresentation MapTaggedToWord() const
T const right_
T const left_
Handle< Code > code
#define COMPRESS_POINTERS_BOOL
Definition globals.h:99
#define V8_ENABLE_SANDBOX_BOOL
Definition globals.h:160
WasmFrame *const frame_
other heap size flags(e.g. initial_heap_size) take precedence") DEFINE_SIZE_T( max_shared_heap_size
Isolate * isolate
const WordBinopOp & binop_
static constexpr ArchOpcode kTestAndBranchOpcode
static constexpr unsigned kSignBit
#define RR_OP_T_LIST(V)
#define RR_VISITOR(Name, opcode)
#define RRR_OP_T_LIST(V)
#define RRR_VISITOR(Name, opcode)
int64_t immediate_
InstructionSelectorT * selector_
ArchOpcode opcode_
#define VISIT_ATOMIC_BINOP(op)
static constexpr ArchOpcode kCompareAndBranchOpcode
#define SIMD_SHIFT_OP_LIST(V)
OptionalOpIndex index
int32_t offset
#define SIMD_VISIT_SHIFT_OP(Name)
#define SIMD_VISIT_EXTRACT_LANE(Type, Sign)
#define SIMD_VISIT_REPLACE_LANE(Type)
#define SIMD_VISIT_UNOP(Name, instruction)
#define SIMD_VISIT_BINOP(Name, instruction)
#define VISIT_SIMD_QFMOP(Name, instruction)
Node * node
double second
ZoneVector< RpoNumber > & result
uint32_t const mask
#define SIMD_UNOP_LIST(V)
#define SIMD_BINOP_LIST(V)
int m
Definition mul-fft.cc:294
int r
Definition mul-fft.cc:298
int int32_t
Definition unicode.cc:40
constexpr bool IsPowerOfTwo(T value)
Definition bits.h:187
constexpr int WhichPowerOfTwo(T value)
Definition bits.h:195
BitField< T, shift, size, uint8_t > BitField8
Definition bit-field.h:90
ShiftKindMask::For< ShiftOp::Kind::kShiftLeft > kShiftLeft
Definition opmasks.h:235
WordBinopMask::For< WordBinopOp::Kind::kBitwiseAnd, WordRepresentation::Word32()> kWord32BitwiseAnd
Definition opmasks.h:159
ChangeOpMask::For< ChangeOp::Kind::kSignExtend, ChangeOp::Assumption::kNoAssumption, RegisterRepresentation::Word32(), RegisterRepresentation::Word64()> kChangeInt32ToInt64
Definition opmasks.h:267
ChangeOpMask::For< ChangeOp::Kind::kZeroExtend, ChangeOp::Assumption::kNoAssumption, RegisterRepresentation::Word32(), RegisterRepresentation::Word64()> kChangeUint32ToUint64
Definition opmasks.h:270
ShiftMask::For< ShiftOp::Kind::kShiftRightArithmetic, WordRepresentation::Word32()> kWord32ShiftRightArithmetic
Definition opmasks.h:216
ShiftMask::For< ShiftOp::Kind::kShiftLeft, WordRepresentation::Word32()> kWord32ShiftLeft
Definition opmasks.h:214
static bool TryMatchConditionalCompareChainSet(InstructionSelectorT *selector, Zone *zone, OpIndex node)
static void VisitCompareChain(InstructionSelectorT *selector, OpIndex left_node, OpIndex right_node, RegisterRepresentation rep, InstructionCode opcode, ImmediateMode operand_mode, FlagsContinuationT *cont)
static std::optional< FlagsCondition > GetFlagsCondition(OpIndex node, InstructionSelectorT *selector)
V8_INLINE const Operation & Get(const Graph &graph, OpIndex index)
Definition graph.h:1231
WordWithBits< 64 > Word64
Definition index.h:224
WordWithBits< 32 > Word32
Definition index.h:223
static std::optional< FlagsCondition > TryMatchConditionalCompareChainShared(InstructionSelectorT *selector, Zone *zone, OpIndex node, CompareSequence *sequence)
static bool TryMatchConditionalCompareChainBranch(InstructionSelectorT *selector, Zone *zone, OpIndex node, FlagsContinuationT *cont)
void CombineFlagSettingOps(CompareChainNode *logic_node, InstructionSelectorT *selector, CompareSequence *sequence)
static std::optional< CompareChainNode * > FindCompareChain(OpIndex user, OpIndex node, InstructionSelectorT *selector, Zone *zone, ZoneVector< CompareChainNode * > &nodes)
static void VisitLogical(InstructionSelectorT *selector, Zone *zone, OpIndex node, WordRepresentation rep, ArchOpcode opcode, bool left_can_cover, bool right_can_cover, ImmediateMode imm_mode)
void VisitAtomicBinop(InstructionSelectorT *selector, OpIndex node, ArchOpcode opcode, AtomicWidth width)
constexpr size_t kCcmpOffsetOfOpcode
static void VisitRRIR(InstructionSelectorT *selector, ArchOpcode opcode, OpIndex node)
void VisitAtomicExchange(InstructionSelectorT *selector, OpIndex node, ArchOpcode opcode)
constexpr size_t kNumCcmpOperands
BinopMatcher< Int32Matcher, Int32Matcher, MachineRepresentation::kWord32 > Int32BinopMatcher
static void VisitRRO(InstructionSelectorT *selector, ArchOpcode opcode, OpIndex node)
static void VisitRR(InstructionSelectorT *selector, ArchOpcode opcode, OpIndex node)
static void VisitRRI(InstructionSelectorT *selector, ArchOpcode opcode, OpIndex node)
void VisitRRR(InstructionSelectorT *selector, ArchOpcode opcode, OpIndex node)
template const Signature< wasm::ValueType > bool
AtomicStoreParameters const & AtomicStoreParametersOf(Operator const *op)
bool TryEmitExtendingLoad(InstructionSelectorT *selector, OpIndex node, OpIndex output_node)
BinopMatcher< Int64Matcher, Int64Matcher, MachineRepresentation::kWord64 > Int64BinopMatcher
size_t AtomicWidthSize(AtomicWidth width)
void EmitInt64MulWithOverflow(InstructionSelectorT *selector, OpIndex node, FlagsContinuationT *cont)
constexpr size_t kCcmpOffsetOfDefaultFlags
std::tuple< InstructionCode, ImmediateMode > GetLoadOpcodeAndImmediate(MemoryRepresentation loaded_rep, RegisterRepresentation result_rep)
void VisitFloat32Compare(InstructionSelectorT *selector, OpIndex node, FlagsContinuationT *cont)
constexpr size_t kCcmpOffsetOfRhs
void EmitLoad(InstructionSelectorT *selector, OpIndex node, InstructionCode opcode, ImmediateMode immediate_mode, MachineRepresentation rep, OptionalOpIndex output={})
static void VisitBinop(InstructionSelectorT *selector, turboshaft::OpIndex node, InstructionCode opcode, bool has_reverse_opcode, InstructionCode reverse_opcode, FlagsContinuationT *cont)
FlagsCondition CommuteFlagsCondition(FlagsCondition condition)
Instruction * VisitWordCompare(InstructionSelectorT *selector, OpIndex node, InstructionCode opcode, FlagsContinuationT *cont, bool commutative)
void VisitFloat64Compare(InstructionSelectorT *selector, OpIndex node, FlagsContinuationT *cont)
RecordWriteMode WriteBarrierKindToRecordWriteMode(WriteBarrierKind write_barrier_kind)
FlagsCondition NegateFlagsCondition(FlagsCondition condition)
constexpr size_t kCcmpOffsetOfCompareCondition
constexpr size_t kCcmpOffsetOfLhs
void VisitRR(InstructionSelectorT *selector, InstructionCode opcode, OpIndex node)
void VisitAtomicCompareExchange(InstructionSelectorT *selector, OpIndex node, ArchOpcode opcode)
constexpr auto kRegister
constexpr int64_t kXSignBit
constexpr int kSimd128Size
Definition globals.h:706
constexpr bool CanBeTaggedOrCompressedOrIndirectPointer(MachineRepresentation rep)
switch(set_by_)
Definition flags.cc:3669
constexpr bool CanBeTaggedOrCompressedPointer(MachineRepresentation rep)
Address Tagged_t
Definition globals.h:547
constexpr bool SmiValuesAre31Bits()
V8_EXPORT_PRIVATE FlagValues v8_flags
V8_EXPORT_PRIVATE constexpr int ElementSizeLog2Of(MachineRepresentation)
return value
Definition map-inl.h:893
Tagged< To > Cast(Tagged< From > value, const v8::SourceLocation &loc=INIT_SOURCE_LOCATION_IN_DEBUG)
Definition casting.h:150
Operation
Definition operation.h:43
BodyGen * gen
uint32_t compare
Node * node_
#define I(name, number_of_args, result_size)
Definition runtime.cc:36
#define UNREACHABLE()
Definition logging.h:67
#define DCHECK_LE(v1, v2)
Definition logging.h:490
#define CHECK(condition)
Definition logging.h:124
#define DCHECK_IMPLIES(v1, v2)
Definition logging.h:493
#define DCHECK_NE(v1, v2)
Definition logging.h:486
#define DCHECK_GE(v1, v2)
Definition logging.h:488
#define UNIMPLEMENTED()
Definition logging.h:66
#define DCHECK(condition)
Definition logging.h:482
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
#define DCHECK_GT(v1, v2)
Definition logging.h:487
constexpr T RoundUp(T x, intptr_t m)
Definition macros.h:387
#define arraysize(array)
Definition macros.h:67
ExtendingLoadMatcher(OpIndex node, InstructionSelectorT *selector)
turboshaft::OpIndex input_at(turboshaft::OpIndex node, size_t index) const
underlying_operation_t< Op > & Cast()
Definition operations.h:980
#define V8_STATIC_ROOTS_BOOL
Definition v8config.h:1001
std::unique_ptr< ValueMirror > value