v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
code-generator-arm64.cc
Go to the documentation of this file.
1// Copyright 2014 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
16#include "src/compiler/osr.h"
19
20#if V8_ENABLE_WEBASSEMBLY
23#endif // V8_ENABLE_WEBASSEMBLY
24
25namespace v8 {
26namespace internal {
27namespace compiler {
28
29#define __ masm()->
30
31// Adds Arm64-specific methods to convert InstructionOperands.
33 public:
36
38 return InputDoubleRegister(index).S();
39 }
40
42 return InputDoubleRegister(index);
43 }
44
46 return InputDoubleRegister(index).Q();
47 }
48
50 if (instr_->InputAt(index)->IsImmediate()) {
52 return wzr;
53 }
54 DCHECK(instr_->InputAt(index)->IsFPRegister());
55 return InputDoubleRegister(index).S();
56 }
57
59 if (instr_->InputAt(index)->IsImmediate()) {
61 return fp_zero.S();
62 }
63 DCHECK(instr_->InputAt(index)->IsFPRegister());
64 return InputDoubleRegister(index).S();
65 }
66
68 if (instr_->InputAt(index)->IsImmediate()) {
70 return xzr;
71 }
72 DCHECK(instr_->InputAt(index)->IsDoubleRegister());
73 return InputDoubleRegister(index);
74 }
75
77 if (instr_->InputAt(index)->IsImmediate()) {
79 return fp_zero;
80 }
81 DCHECK(instr_->InputAt(index)->IsDoubleRegister());
82 return InputDoubleRegister(index);
83 }
84
85 size_t OutputCount() { return instr_->OutputCount(); }
86
88 return OutputDoubleRegister(index).S();
89 }
90
92 return OutputDoubleRegister(index);
93 }
94
96 return OutputDoubleRegister(index).Q();
97 }
98
99 Register InputRegister32(size_t index) {
100 return ToRegister(instr_->InputAt(index)).W();
101 }
102
104 DCHECK(instr_->InputAt(index)->IsRegister() ||
105 (instr_->InputAt(index)->IsImmediate() && (InputInt32(index) == 0)));
106 if (instr_->InputAt(index)->IsImmediate()) {
107 return wzr;
108 }
109 return InputRegister32(index);
110 }
111
112 Register InputRegister64(size_t index) { return InputRegister(index); }
113
115 DCHECK(instr_->InputAt(index)->IsRegister() ||
116 (instr_->InputAt(index)->IsImmediate() && (InputInt64(index) == 0)));
117 if (instr_->InputAt(index)->IsImmediate()) {
118 return xzr;
119 }
120 return InputRegister64(index);
121 }
122
123 Operand InputOperand(size_t index) {
124 return ToOperand(instr_->InputAt(index));
125 }
126
127 Operand InputOperand64(size_t index) { return InputOperand(index); }
128
129 Operand InputOperand32(size_t index) {
130 return ToOperand32(instr_->InputAt(index));
131 }
132
133 Register OutputRegister64(size_t index = 0) { return OutputRegister(index); }
134
135 Register OutputRegister32(size_t index = 0) {
136 return OutputRegister(index).W();
137 }
138
139 Register TempRegister32(size_t index) {
140 return ToRegister(instr_->TempAt(index)).W();
141 }
142
143 Operand InputOperand2_32(size_t index) {
145 case kMode_None:
146 return InputOperand32(index);
147 case kMode_Operand2_R_LSL_I:
148 return Operand(InputRegister32(index), LSL, InputInt5(index + 1));
149 case kMode_Operand2_R_LSR_I:
150 return Operand(InputRegister32(index), LSR, InputInt5(index + 1));
151 case kMode_Operand2_R_ASR_I:
152 return Operand(InputRegister32(index), ASR, InputInt5(index + 1));
153 case kMode_Operand2_R_ROR_I:
154 return Operand(InputRegister32(index), ROR, InputInt5(index + 1));
155 case kMode_Operand2_R_UXTB:
156 return Operand(InputRegister32(index), UXTB);
157 case kMode_Operand2_R_UXTH:
158 return Operand(InputRegister32(index), UXTH);
159 case kMode_Operand2_R_SXTB:
160 return Operand(InputRegister32(index), SXTB);
161 case kMode_Operand2_R_SXTH:
162 return Operand(InputRegister32(index), SXTH);
163 case kMode_Operand2_R_SXTW:
164 return Operand(InputRegister32(index), SXTW);
165 case kMode_MRI:
166 case kMode_MRR:
167 case kMode_Root:
168 break;
169 }
170 UNREACHABLE();
171 }
172
173 Operand InputOperand2_64(size_t index) {
175 case kMode_None:
176 return InputOperand64(index);
177 case kMode_Operand2_R_LSL_I:
178 return Operand(InputRegister64(index), LSL, InputInt6(index + 1));
179 case kMode_Operand2_R_LSR_I:
180 return Operand(InputRegister64(index), LSR, InputInt6(index + 1));
181 case kMode_Operand2_R_ASR_I:
182 return Operand(InputRegister64(index), ASR, InputInt6(index + 1));
183 case kMode_Operand2_R_ROR_I:
184 return Operand(InputRegister64(index), ROR, InputInt6(index + 1));
185 case kMode_Operand2_R_UXTB:
186 return Operand(InputRegister64(index), UXTB);
187 case kMode_Operand2_R_UXTH:
188 return Operand(InputRegister64(index), UXTH);
189 case kMode_Operand2_R_SXTB:
190 return Operand(InputRegister64(index), SXTB);
191 case kMode_Operand2_R_SXTH:
192 return Operand(InputRegister64(index), SXTH);
193 case kMode_Operand2_R_SXTW:
194 return Operand(InputRegister64(index), SXTW);
195 case kMode_MRI:
196 case kMode_MRR:
197 case kMode_Root:
198 break;
199 }
200 UNREACHABLE();
201 }
202
203 MemOperand MemoryOperand(size_t index = 0) {
205 case kMode_None:
206 case kMode_Operand2_R_LSR_I:
207 case kMode_Operand2_R_ASR_I:
208 case kMode_Operand2_R_ROR_I:
209 case kMode_Operand2_R_UXTB:
210 case kMode_Operand2_R_UXTH:
211 case kMode_Operand2_R_SXTB:
212 case kMode_Operand2_R_SXTH:
213 case kMode_Operand2_R_SXTW:
214 break;
215 case kMode_Root:
216 return MemOperand(kRootRegister, InputInt64(index));
217 case kMode_Operand2_R_LSL_I:
218 return MemOperand(InputRegister(index + 0), InputRegister(index + 1),
219 LSL, InputInt32(index + 2));
220 case kMode_MRI:
221 return MemOperand(InputRegister(index + 0), InputInt32(index + 1));
222 case kMode_MRR:
223 return MemOperand(InputRegister(index + 0), InputRegister(index + 1));
224 }
225 UNREACHABLE();
226 }
227
229 if (op->IsRegister()) {
230 return Operand(ToRegister(op));
231 }
232 return ToImmediate(op);
233 }
234
236 if (op->IsRegister()) {
237 return Operand(ToRegister(op).W());
238 }
239 return ToImmediate(op);
240 }
241
243 Constant constant = ToConstant(operand);
244 switch (constant.type()) {
245 case Constant::kInt32:
246 return Operand(constant.ToInt32(), constant.rmode());
247 case Constant::kInt64:
248 return Operand(constant.ToInt64(), constant.rmode());
250 return Operand::EmbeddedNumber(constant.ToFloat32());
252 return Operand::EmbeddedNumber(constant.ToFloat64().value());
254 return Operand(constant.ToExternalReference());
256 RootIndex root_index;
257 if (gen_->isolate()->roots_table().IsRootHandle(constant.ToHeapObject(),
258 &root_index)) {
261 Tagged_t ptr =
264 return Immediate(ptr);
265 }
266
267 return Operand(constant.ToHeapObject());
268 }
270 return Operand(constant.ToHeapObject());
272 UNREACHABLE(); // TODO(dcarney): RPO immediates on arm64.
273 }
274 UNREACHABLE();
275 }
276
282
285 if (offset.from_frame_pointer()) {
286 int from_sp = offset.offset() + frame_access_state()->GetSPToFPOffset();
287 // Convert FP-offsets to SP-offsets if it results in better code.
288 if (!frame_access_state()->FPRelativeOnly() &&
289 (Assembler::IsImmLSUnscaled(from_sp) ||
290 Assembler::IsImmLSScaled(from_sp, 3))) {
292 }
293 }
294 // Access below the stack pointer is not expected in arm64 and is actively
295 // prevented at run time in the simulator.
296 DCHECK_IMPLIES(offset.from_stack_pointer(), offset.offset() >= 0);
297 return MemOperand(offset.from_stack_pointer() ? sp : fp, offset.offset());
298 }
299};
300
301namespace {
302
303class OutOfLineRecordWrite final : public OutOfLineCode {
304 public:
305 OutOfLineRecordWrite(
306 CodeGenerator* gen, Register object, Operand offset, Register value,
307 RecordWriteMode mode, StubCallMode stub_mode,
308 UnwindingInfoWriter* unwinding_info_writer,
309 IndirectPointerTag indirect_pointer_tag = kIndirectPointerNullTag)
310 : OutOfLineCode(gen),
311 object_(object),
313 value_(value),
314 mode_(mode),
315#if V8_ENABLE_WEBASSEMBLY
316 stub_mode_(stub_mode),
317#endif // V8_ENABLE_WEBASSEMBLY
318 must_save_lr_(!gen->frame_access_state()->has_frame()),
319 unwinding_info_writer_(unwinding_info_writer),
320 zone_(gen->zone()),
321 indirect_pointer_tag_(indirect_pointer_tag) {
322 }
323
324 void Generate() final {
325 // When storing an indirect pointer, the value will always be a
326 // full/decompressed pointer.
329 __ DecompressTagged(value_, value_);
330 }
331
332 // No need to check value page flags with the indirect pointer write barrier
333 // because the value is always an ExposedTrustedObject.
336 eq, exit());
337 }
338
339 SaveFPRegsMode const save_fp_mode = frame()->DidAllocateDoubleRegisters()
342 if (must_save_lr_) {
343 // We need to save and restore lr if the frame was elided.
344 __ Push<MacroAssembler::kSignLR>(lr, padreg);
345 unwinding_info_writer_->MarkLinkRegisterOnTopOfStack(__ pc_offset(), sp);
346 }
348 __ CallEphemeronKeyBarrier(object_, offset_, save_fp_mode);
349 } else if (mode_ == RecordWriteMode::kValueIsIndirectPointer) {
350 // We must have a valid indirect pointer tag here. Otherwise, we risk not
351 // invoking the correct write barrier, which may lead to subtle issues.
353 __ CallIndirectPointerBarrier(object_, offset_, save_fp_mode,
355#if V8_ENABLE_WEBASSEMBLY
356 } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
357 // A direct call to a wasm runtime stub defined in this module.
358 // Just encode the stub index. This will be patched when the code
359 // is added to the native module and copied into wasm code space.
360 __ CallRecordWriteStubSaveRegisters(object_, offset_, save_fp_mode,
361 StubCallMode::kCallWasmRuntimeStub);
362#endif // V8_ENABLE_WEBASSEMBLY
363 } else {
364 __ CallRecordWriteStubSaveRegisters(object_, offset_, save_fp_mode);
365 }
366 if (must_save_lr_) {
367 __ Pop<MacroAssembler::kAuthLR>(padreg, lr);
368 unwinding_info_writer_->MarkPopLinkRegisterFromTopOfStack(__ pc_offset());
369 }
370 }
371
372 private:
373 Register const object_;
374 Operand const offset_;
375 Register const value_;
377#if V8_ENABLE_WEBASSEMBLY
378 StubCallMode const stub_mode_;
379#endif // V8_ENABLE_WEBASSEMBLY
381 UnwindingInfoWriter* const unwinding_info_writer_;
384};
385
387 switch (condition) {
388 case kEqual:
389 return eq;
390 case kNotEqual:
391 return ne;
392 case kSignedLessThan:
393 return lt;
395 return ge;
397 return le;
399 return gt;
401 return lo;
403 return hs;
405 return ls;
407 return hi;
409 return lt;
411 return ge;
413 return ls;
415 return hi;
416 case kFloatLessThan:
417 return lo;
419 return hs;
421 return le;
423 return gt;
424 case kOverflow:
425 return vs;
426 case kNotOverflow:
427 return vc;
428 case kUnorderedEqual:
430 case kIsNaN:
431 case kIsNotNaN:
432 break;
433 case kPositiveOrZero:
434 return pl;
435 case kNegative:
436 return mi;
437 }
438 UNREACHABLE();
439}
440
441#if V8_ENABLE_WEBASSEMBLY
442class WasmOutOfLineTrap : public OutOfLineCode {
443 public:
444 WasmOutOfLineTrap(CodeGenerator* gen, Instruction* instr)
445 : OutOfLineCode(gen), gen_(gen), instr_(instr) {}
446 void Generate() override {
447 Arm64OperandConverter i(gen_, instr_);
448 TrapId trap_id =
449 static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
450 GenerateCallToTrap(trap_id);
451 }
452
453 protected:
454 CodeGenerator* gen_;
455
456 void GenerateWithTrapId(TrapId trap_id) { GenerateCallToTrap(trap_id); }
457
458 private:
459 void GenerateCallToTrap(TrapId trap_id) {
460 gen_->AssembleSourcePosition(instr_);
461 __ Call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
462 ReferenceMap* reference_map = gen_->zone()->New<ReferenceMap>(gen_->zone());
463 gen_->RecordSafepoint(reference_map);
464 __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
465 }
466
467 Instruction* instr_;
468};
469
470void RecordTrapInfoIfNeeded(Zone* zone, CodeGenerator* codegen,
471 InstructionCode opcode, Instruction* instr,
472 int pc) {
473 const MemoryAccessMode access_mode = AccessModeField::decode(opcode);
474 if (access_mode == kMemoryAccessProtectedMemOutOfBounds ||
476 codegen->RecordProtectedInstruction(pc);
477 }
478}
479#else
480void RecordTrapInfoIfNeeded(Zone* zone, CodeGenerator* codegen,
481 InstructionCode opcode, Instruction* instr,
482 int pc) {
484}
485#endif // V8_ENABLE_WEBASSEMBLY
486
487// Handles unary ops that work for float (scalar), double (scalar), or NEON.
488template <typename Fn>
489void EmitFpOrNeonUnop(MacroAssembler* masm, Fn fn, Instruction* instr,
490 Arm64OperandConverter i, VectorFormat scalar,
491 VectorFormat vector) {
492 VectorFormat f = instr->InputAt(0)->IsSimd128Register() ? vector : scalar;
493
494 VRegister output = VRegister::Create(i.OutputDoubleRegister().code(), f);
495 VRegister input = VRegister::Create(i.InputDoubleRegister(0).code(), f);
496 (masm->*fn)(output, input);
497}
498
499} // namespace
500
501#define ASSEMBLE_SHIFT(asm_instr, width) \
502 do { \
503 if (instr->InputAt(1)->IsRegister()) { \
504 __ asm_instr(i.OutputRegister##width(), i.InputRegister##width(0), \
505 i.InputRegister##width(1)); \
506 } else { \
507 uint32_t imm = \
508 static_cast<uint32_t>(i.InputOperand##width(1).ImmediateValue()); \
509 __ asm_instr(i.OutputRegister##width(), i.InputRegister##width(0), \
510 imm % (width)); \
511 } \
512 } while (0)
513
514#define ASSEMBLE_ATOMIC_LOAD_INTEGER(asm_instr, reg) \
515 do { \
516 __ Add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \
517 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset()); \
518 __ asm_instr(i.Output##reg(), i.TempRegister(0)); \
519 } while (0)
520
521#define ASSEMBLE_ATOMIC_STORE_INTEGER(asm_instr, reg) \
522 do { \
523 __ Add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \
524 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset()); \
525 __ asm_instr(i.Input##reg(2), i.TempRegister(0)); \
526 } while (0)
527
528#define ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(suffix, reg) \
529 do { \
530 __ Add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \
531 if (CpuFeatures::IsSupported(LSE)) { \
532 CpuFeatureScope scope(masm(), LSE); \
533 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset()); \
534 __ Swpal##suffix(i.Input##reg(2), i.Output##reg(), \
535 MemOperand(i.TempRegister(0))); \
536 } else { \
537 Label exchange; \
538 __ Bind(&exchange); \
539 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset()); \
540 __ ldaxr##suffix(i.Output##reg(), i.TempRegister(0)); \
541 __ stlxr##suffix(i.TempRegister32(1), i.Input##reg(2), \
542 i.TempRegister(0)); \
543 __ Cbnz(i.TempRegister32(1), &exchange); \
544 } \
545 } while (0)
546
547#define ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(suffix, ext, reg) \
548 do { \
549 __ Add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \
550 if (CpuFeatures::IsSupported(LSE)) { \
551 DCHECK_EQ(i.OutputRegister(), i.InputRegister(2)); \
552 CpuFeatureScope scope(masm(), LSE); \
553 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset()); \
554 __ Casal##suffix(i.Output##reg(), i.Input##reg(3), \
555 MemOperand(i.TempRegister(0))); \
556 } else { \
557 Label compareExchange; \
558 Label exit; \
559 __ Bind(&compareExchange); \
560 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset()); \
561 __ ldaxr##suffix(i.Output##reg(), i.TempRegister(0)); \
562 __ Cmp(i.Output##reg(), Operand(i.Input##reg(2), ext)); \
563 __ B(ne, &exit); \
564 __ stlxr##suffix(i.TempRegister32(1), i.Input##reg(3), \
565 i.TempRegister(0)); \
566 __ Cbnz(i.TempRegister32(1), &compareExchange); \
567 __ Bind(&exit); \
568 } \
569 } while (0)
570
571#define ASSEMBLE_ATOMIC_SUB(suffix, reg) \
572 do { \
573 __ Add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \
574 if (CpuFeatures::IsSupported(LSE)) { \
575 CpuFeatureScope scope(masm(), LSE); \
576 UseScratchRegisterScope temps(masm()); \
577 Register scratch = temps.AcquireSameSizeAs(i.Input##reg(2)); \
578 __ Neg(scratch, i.Input##reg(2)); \
579 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset()); \
580 __ Ldaddal##suffix(scratch, i.Output##reg(), \
581 MemOperand(i.TempRegister(0))); \
582 } else { \
583 Label binop; \
584 __ Bind(&binop); \
585 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset()); \
586 __ ldaxr##suffix(i.Output##reg(), i.TempRegister(0)); \
587 __ Sub(i.Temp##reg(1), i.Output##reg(), Operand(i.Input##reg(2))); \
588 __ stlxr##suffix(i.TempRegister32(2), i.Temp##reg(1), \
589 i.TempRegister(0)); \
590 __ Cbnz(i.TempRegister32(2), &binop); \
591 } \
592 } while (0)
593
594#define ASSEMBLE_ATOMIC_AND(suffix, reg) \
595 do { \
596 __ Add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \
597 if (CpuFeatures::IsSupported(LSE)) { \
598 CpuFeatureScope scope(masm(), LSE); \
599 UseScratchRegisterScope temps(masm()); \
600 Register scratch = temps.AcquireSameSizeAs(i.Input##reg(2)); \
601 __ Mvn(scratch, i.Input##reg(2)); \
602 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset()); \
603 __ Ldclral##suffix(scratch, i.Output##reg(), \
604 MemOperand(i.TempRegister(0))); \
605 } else { \
606 Label binop; \
607 __ Bind(&binop); \
608 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset()); \
609 __ ldaxr##suffix(i.Output##reg(), i.TempRegister(0)); \
610 __ And(i.Temp##reg(1), i.Output##reg(), Operand(i.Input##reg(2))); \
611 __ stlxr##suffix(i.TempRegister32(2), i.Temp##reg(1), \
612 i.TempRegister(0)); \
613 __ Cbnz(i.TempRegister32(2), &binop); \
614 } \
615 } while (0)
616
617#define ASSEMBLE_ATOMIC_BINOP(suffix, bin_instr, lse_instr, reg) \
618 do { \
619 __ Add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \
620 if (CpuFeatures::IsSupported(LSE)) { \
621 CpuFeatureScope scope(masm(), LSE); \
622 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset()); \
623 __ lse_instr##suffix(i.Input##reg(2), i.Output##reg(), \
624 MemOperand(i.TempRegister(0))); \
625 } else { \
626 Label binop; \
627 __ Bind(&binop); \
628 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset()); \
629 __ ldaxr##suffix(i.Output##reg(), i.TempRegister(0)); \
630 __ bin_instr(i.Temp##reg(1), i.Output##reg(), Operand(i.Input##reg(2))); \
631 __ stlxr##suffix(i.TempRegister32(2), i.Temp##reg(1), \
632 i.TempRegister(0)); \
633 __ Cbnz(i.TempRegister32(2), &binop); \
634 } \
635 } while (0)
636
637#define ASSEMBLE_IEEE754_BINOP(name) \
638 do { \
639 FrameScope scope(masm(), StackFrame::MANUAL); \
640 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 2); \
641 } while (0)
642
643#define ASSEMBLE_IEEE754_UNOP(name) \
644 do { \
645 FrameScope scope(masm(), StackFrame::MANUAL); \
646 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 1); \
647 } while (0)
648
649// If shift value is an immediate, we can call asm_imm, taking the shift value
650// modulo 2^width. Otherwise, emit code to perform the modulus operation, and
651// call asm_shl.
652#define ASSEMBLE_SIMD_SHIFT_LEFT(asm_imm, width, format, asm_shl, gp) \
653 do { \
654 if (instr->InputAt(1)->IsImmediate()) { \
655 __ asm_imm(i.OutputSimd128Register().format(), \
656 i.InputSimd128Register(0).format(), i.InputInt##width(1)); \
657 } else { \
658 UseScratchRegisterScope temps(masm()); \
659 VRegister tmp = temps.AcquireQ(); \
660 Register shift = temps.Acquire##gp(); \
661 constexpr int mask = (1 << width) - 1; \
662 __ And(shift, i.InputRegister32(1), mask); \
663 __ Dup(tmp.format(), shift); \
664 __ asm_shl(i.OutputSimd128Register().format(), \
665 i.InputSimd128Register(0).format(), tmp.format()); \
666 } \
667 } while (0)
668
669// If shift value is an immediate, we can call asm_imm, taking the shift value
670// modulo 2^width. Otherwise, emit code to perform the modulus operation, and
671// call asm_shl, passing in the negative shift value (treated as right shift).
672#define ASSEMBLE_SIMD_SHIFT_RIGHT(asm_imm, width, format, asm_shl, gp) \
673 do { \
674 if (instr->InputAt(1)->IsImmediate()) { \
675 __ asm_imm(i.OutputSimd128Register().format(), \
676 i.InputSimd128Register(0).format(), i.InputInt##width(1)); \
677 } else { \
678 UseScratchRegisterScope temps(masm()); \
679 VRegister tmp = temps.AcquireQ(); \
680 Register shift = temps.Acquire##gp(); \
681 constexpr int mask = (1 << width) - 1; \
682 __ And(shift, i.InputRegister32(1), mask); \
683 __ Dup(tmp.format(), shift); \
684 __ Neg(tmp.format(), tmp.format()); \
685 __ asm_shl(i.OutputSimd128Register().format(), \
686 i.InputSimd128Register(0).format(), tmp.format()); \
687 } \
688 } while (0)
689
691 __ Mov(sp, fp);
693
695}
696
698 if (frame_access_state()->has_frame()) {
699 __ RestoreFPAndLR();
700 }
702}
703
704namespace {
705
706void AdjustStackPointerForTailCall(MacroAssembler* masm,
707 FrameAccessState* state,
708 int new_slot_above_sp,
709 bool allow_shrinkage = true) {
710 int current_sp_offset = state->GetSPToFPSlotCount() +
712 int stack_slot_delta = new_slot_above_sp - current_sp_offset;
713 DCHECK_EQ(stack_slot_delta % 2, 0);
714 if (stack_slot_delta > 0) {
715 masm->Claim(stack_slot_delta);
716 state->IncreaseSPDelta(stack_slot_delta);
717 } else if (allow_shrinkage && stack_slot_delta < 0) {
718 masm->Drop(-stack_slot_delta);
719 state->IncreaseSPDelta(stack_slot_delta);
720 }
721}
722
723} // namespace
724
726 int first_unused_slot_offset) {
727 AdjustStackPointerForTailCall(masm(), frame_access_state(),
728 first_unused_slot_offset, false);
729}
730
732 int first_unused_slot_offset) {
733 DCHECK_EQ(first_unused_slot_offset % 2, 0);
734 AdjustStackPointerForTailCall(masm(), frame_access_state(),
735 first_unused_slot_offset);
736 DCHECK(instr->IsTailCall());
737 InstructionOperandConverter g(this, instr);
738 int optional_padding_offset = g.InputInt32(instr->InputCount() - 2);
739 if (optional_padding_offset % 2) {
740 __ Poke(padreg, optional_padding_offset * kSystemPointerSize);
741 }
742}
743
744// Check that {kJavaScriptCallCodeStartRegister} is correct.
746 UseScratchRegisterScope temps(masm());
747 Register scratch = temps.AcquireX();
748 __ ComputeCodeStartAddress(scratch);
750 __ Assert(eq, AbortReason::kWrongFunctionCodeStart);
751}
752
753#ifdef V8_ENABLE_LEAPTIERING
754// Check that {kJavaScriptCallDispatchHandleRegister} is correct.
755void CodeGenerator::AssembleDispatchHandleRegisterCheck() {
756 DCHECK(linkage()->GetIncomingDescriptor()->IsJSFunctionCall());
757
759
760 // We currently don't check this for JS builtins as those are sometimes
761 // called directly (e.g. from other builtins) and not through the dispatch
762 // table. This is fine as builtin functions don't use the dispatch handle,
763 // but we could enable this check in the future if we make sure to pass the
764 // kInvalidDispatchHandle whenever we do a direct call to a JS builtin.
766 return;
767 }
768
769 // For now, we only ensure that the register references a valid dispatch
770 // entry with the correct parameter count. In the future, we may also be able
771 // to check that the entry points back to this code.
772 UseScratchRegisterScope temps(masm());
773 Register actual_parameter_count = temps.AcquireX();
774 Register scratch = temps.AcquireX();
775 __ LoadParameterCountFromJSDispatchTable(
776 actual_parameter_count, kJavaScriptCallDispatchHandleRegister, scratch);
777 __ Mov(scratch, parameter_count_);
778 __ cmp(actual_parameter_count, scratch);
779 __ Assert(eq, AbortReason::kWrongFunctionDispatchHandle);
780}
781#endif // V8_ENABLE_LEAPTIERING
782
784
785int32_t GetLaneMask(int32_t lane_count) { return lane_count * 2 - 1; }
786
788 VectorFormat f) {
789 VRegister dst = VRegister::Create(i.OutputSimd128Register().code(), f);
790 VRegister src0 = VRegister::Create(i.InputSimd128Register(0).code(), f);
791 VRegister src1 = VRegister::Create(i.InputSimd128Register(1).code(), f);
792
793 int32_t shuffle = i.InputInt32(2);
794 int32_t lane_count = LaneCountFromFormat(f);
795 int32_t max_src0_lane = lane_count - 1;
796 int32_t lane_mask = GetLaneMask(lane_count);
797
798 int lane = shuffle & lane_mask;
799 VRegister src = (lane > max_src0_lane) ? src1 : src0;
800 lane &= max_src0_lane;
801 masm->Dup(dst, src, lane);
802}
803
805 VectorFormat f) {
806 VRegister dst = VRegister::Create(i.OutputSimd128Register().code(), f);
807 VRegister src0 = VRegister::Create(i.InputSimd128Register(0).code(), f);
808 VRegister src1 = VRegister::Create(i.InputSimd128Register(1).code(), f);
809 // Check for in-place shuffles, as we may need to use a temporary register
810 // to avoid overwriting an input.
811 if (dst == src0 || dst == src1) {
812 UseScratchRegisterScope scope(masm);
813 VRegister temp = scope.AcquireV(f);
814 if (dst == src0) {
815 masm->Mov(temp, src0);
816 src0 = temp;
817 } else if (dst == src1) {
818 masm->Mov(temp, src1);
819 src1 = temp;
820 }
821 }
822 int32_t shuffle = i.InputInt32(2);
823 int32_t lane_count = LaneCountFromFormat(f);
824 int32_t max_src0_lane = lane_count - 1;
825 int32_t lane_mask = GetLaneMask(lane_count);
826
827 // Perform shuffle as a vmov per lane.
828 for (int i = 0; i < 2; i++) {
829 VRegister src = src0;
830 int lane = shuffle & lane_mask;
831 if (lane > max_src0_lane) {
832 src = src1;
833 lane &= max_src0_lane;
834 }
835 masm->Mov(dst, i, src, lane);
836 shuffle >>= 8;
837 }
838}
839
841 VectorFormat f) {
842 VRegister dst = VRegister::Create(i.OutputSimd128Register().code(), f);
843 VRegister src0 = VRegister::Create(i.InputSimd128Register(0).code(), f);
844 VRegister src1 = VRegister::Create(i.InputSimd128Register(1).code(), f);
845 // Check for in-place shuffles, as we may need to use a temporary register
846 // to avoid overwriting an input.
847 if (dst == src0 || dst == src1) {
848 UseScratchRegisterScope scope(masm);
849 VRegister temp = scope.AcquireV(f);
850 if (dst == src0) {
851 masm->Mov(temp, src0);
852 src0 = temp;
853 } else if (dst == src1) {
854 masm->Mov(temp, src1);
855 src1 = temp;
856 }
857 }
858 int32_t shuffle = i.InputInt32(2);
859 int32_t lane_count = LaneCountFromFormat(f);
860 int32_t max_src0_lane = lane_count - 1;
861 int32_t lane_mask = GetLaneMask(lane_count);
862
864 // Check whether we can reduce the number of vmovs by performing a dup
865 // first. So, for [1, 1, 2, 1] we can dup lane zero and then perform
866 // a single lane move for lane two.
867 const std::array<int, 4> input_lanes{
868 shuffle & lane_mask, shuffle >> 8 & lane_mask, shuffle >> 16 & lane_mask,
869 shuffle >> 24 & lane_mask};
870 std::array<int, 8> lane_counts = {0};
871 for (int lane : input_lanes) {
872 ++lane_counts[lane];
873 }
874
875 // Find first duplicate lane, if any, and insert dup.
876 int duplicate_lane = -1;
877 for (size_t lane = 0; lane < lane_counts.size(); ++lane) {
878 if (lane_counts[lane] > 1) {
879 duplicate_lane = static_cast<int>(lane);
880 if (duplicate_lane > max_src0_lane) {
881 masm->Dup(dst, src1, duplicate_lane & max_src0_lane);
882 } else {
883 masm->Dup(dst, src0, duplicate_lane);
884 }
885 break;
886 }
887 }
888
889 // Perform shuffle as a vmov per lane.
890 for (int i = 0; i < 4; i++) {
891 int lane = shuffle & lane_mask;
892 shuffle >>= 8;
893 if (lane == duplicate_lane) continue;
894 VRegister src = src0;
895 if (lane > max_src0_lane) {
896 src = src1;
897 lane &= max_src0_lane;
898 }
899 masm->Mov(dst, i, src, lane);
900 }
901}
902
903// Assembles an instruction after register allocation, producing machine code.
906 Arm64OperandConverter i(this, instr);
907 InstructionCode opcode = instr->opcode();
908 ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
909 switch (arch_opcode) {
910 case kArchCallCodeObject: {
911 if (instr->InputAt(0)->IsImmediate()) {
912 __ Call(i.InputCode(0), RelocInfo::CODE_TARGET);
913 } else {
914 Register reg = i.InputRegister(0);
916 i.InputCodeEntrypointTag(instr->CodeEnrypointTagInputIndex());
918 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
920 __ CallCodeObject(reg, tag);
921 }
924 break;
925 }
926 case kArchCallBuiltinPointer: {
927 DCHECK(!instr->InputAt(0)->IsImmediate());
928 Register builtin_index = i.InputRegister(0);
929 Register target =
930 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister)
932 : builtin_index;
933 __ CallBuiltinByIndex(builtin_index, target);
936 break;
937 }
938#if V8_ENABLE_WEBASSEMBLY
939 case kArchCallWasmFunction:
940 case kArchCallWasmFunctionIndirect: {
941 if (instr->InputAt(0)->IsImmediate()) {
942 DCHECK_EQ(arch_opcode, kArchCallWasmFunction);
943 Constant constant = i.ToConstant(instr->InputAt(0));
944 Address wasm_code = static_cast<Address>(constant.ToInt64());
945 __ Call(wasm_code, constant.rmode());
946 } else if (arch_opcode == kArchCallWasmFunctionIndirect) {
947 __ CallWasmCodePointer(
948 i.InputRegister(0),
949 i.InputInt64(instr->WasmSignatureHashInputIndex()));
950 } else {
951 __ Call(i.InputRegister(0));
952 }
955 break;
956 }
957 case kArchTailCallWasm:
958 case kArchTailCallWasmIndirect: {
959 if (instr->InputAt(0)->IsImmediate()) {
960 DCHECK_EQ(arch_opcode, kArchTailCallWasm);
961 Constant constant = i.ToConstant(instr->InputAt(0));
962 Address wasm_code = static_cast<Address>(constant.ToInt64());
963 __ Jump(wasm_code, constant.rmode());
964 } else {
965 Register target = i.InputRegister(0);
966 UseScratchRegisterScope temps(masm());
967 temps.Exclude(x17);
968 __ Mov(x17, target);
969 if (arch_opcode == kArchTailCallWasmIndirect) {
970 __ CallWasmCodePointer(
971 x17, i.InputInt64(instr->WasmSignatureHashInputIndex()),
973 } else {
974 __ Jump(x17);
975 }
976 }
980 break;
981 }
982#endif // V8_ENABLE_WEBASSEMBLY
983 case kArchTailCallCodeObject: {
984 if (instr->InputAt(0)->IsImmediate()) {
985 __ Jump(i.InputCode(0), RelocInfo::CODE_TARGET);
986 } else {
987 Register reg = i.InputRegister(0);
989 i.InputCodeEntrypointTag(instr->CodeEnrypointTagInputIndex());
991 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
993 __ JumpCodeObject(reg, tag);
994 }
998 break;
999 }
1000 case kArchTailCallAddress: {
1001 CHECK(!instr->InputAt(0)->IsImmediate());
1002 Register reg = i.InputRegister(0);
1004 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
1006 UseScratchRegisterScope temps(masm());
1007 temps.Exclude(x17);
1008 __ Mov(x17, reg);
1009 __ Jump(x17);
1013 break;
1014 }
1015 case kArchCallJSFunction: {
1016 Register func = i.InputRegister(0);
1017 if (v8_flags.debug_code) {
1018 // Check the function's context matches the context argument.
1019 UseScratchRegisterScope scope(masm());
1020 Register temp = scope.AcquireX();
1021 __ LoadTaggedField(temp,
1022 FieldMemOperand(func, JSFunction::kContextOffset));
1023 __ cmp(cp, temp);
1024 __ Assert(eq, AbortReason::kWrongFunctionContext);
1025 }
1026 uint32_t num_arguments =
1027 i.InputUint32(instr->JSCallArgumentCountInputIndex());
1028 __ CallJSFunction(func, num_arguments);
1031 break;
1032 }
1033 case kArchPrepareCallCFunction:
1034 // We don't need kArchPrepareCallCFunction on arm64 as the instruction
1035 // selector has already performed a Claim to reserve space on the stack.
1036 // Frame alignment is always 16 bytes, and the stack pointer is already
1037 // 16-byte aligned, therefore we do not need to align the stack pointer
1038 // by an unknown value, and it is safe to continue accessing the frame
1039 // via the stack pointer.
1040 UNREACHABLE();
1041 case kArchSaveCallerRegisters: {
1042 fp_mode_ =
1043 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
1046 // kReturnRegister0 should have been saved before entering the stub.
1047 int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
1049 DCHECK_EQ(0, frame_access_state()->sp_delta());
1053 break;
1054 }
1055 case kArchRestoreCallerRegisters: {
1056 DCHECK(fp_mode_ ==
1057 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
1060 // Don't overwrite the returned value.
1061 int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
1063 DCHECK_EQ(0, frame_access_state()->sp_delta());
1066 break;
1067 }
1068 case kArchPrepareTailCall:
1070 break;
1071 case kArchCallCFunctionWithFrameState:
1072 case kArchCallCFunction: {
1073 int const num_gp_parameters = ParamField::decode(instr->opcode());
1074 int const num_fp_parameters = FPParamField::decode(instr->opcode());
1075 Label return_location;
1076 SetIsolateDataSlots set_isolate_data_slots = SetIsolateDataSlots::kYes;
1077#if V8_ENABLE_WEBASSEMBLY
1078 if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
1079 // Put the return address in a stack slot.
1080 __ StoreReturnAddressInWasmExitFrame(&return_location);
1081 set_isolate_data_slots = SetIsolateDataSlots::kNo;
1082 }
1083#endif // V8_ENABLE_WEBASSEMBLY
1084 int pc_offset;
1085 if (instr->InputAt(0)->IsImmediate()) {
1086 ExternalReference ref = i.InputExternalReference(0);
1087 pc_offset = __ CallCFunction(ref, num_gp_parameters, num_fp_parameters,
1088 set_isolate_data_slots, &return_location);
1089 } else {
1090 Register func = i.InputRegister(0);
1091 pc_offset = __ CallCFunction(func, num_gp_parameters, num_fp_parameters,
1092 set_isolate_data_slots, &return_location);
1093 }
1094 RecordSafepoint(instr->reference_map(), pc_offset);
1095
1096 bool const needs_frame_state =
1097 (arch_opcode == kArchCallCFunctionWithFrameState);
1098 if (needs_frame_state) {
1100 }
1101
1103 // Ideally, we should decrement SP delta to match the change of stack
1104 // pointer in CallCFunction. However, for certain architectures (e.g.
1105 // ARM), there may be more strict alignment requirement, causing old SP
1106 // to be saved on the stack. In those cases, we can not calculate the SP
1107 // delta statically.
1110 // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
1111 // Here, we assume the sequence to be:
1112 // kArchSaveCallerRegisters;
1113 // kArchCallCFunction;
1114 // kArchRestoreCallerRegisters;
1115 int bytes =
1116 __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
1118 }
1119 break;
1120 }
1121 case kArchJmp:
1122 AssembleArchJump(i.InputRpo(0));
1123 break;
1124 case kArchTableSwitch:
1126 break;
1127 case kArchBinarySearchSwitch:
1129 break;
1130 case kArchAbortCSADcheck:
1131 DCHECK_EQ(i.InputRegister(0), x1);
1132 {
1133 // We don't actually want to generate a pile of code for this, so just
1134 // claim there is a stack frame, without generating one.
1135 FrameScope scope(masm(), StackFrame::NO_FRAME_TYPE);
1136 __ CallBuiltin(Builtin::kAbortCSADcheck);
1137 }
1138 __ Debug("kArchAbortCSADcheck", 0, BREAK);
1140 break;
1141 case kArchDebugBreak:
1142 __ DebugBreak();
1143 break;
1144 case kArchComment:
1145 __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
1146 break;
1147 case kArchThrowTerminator:
1149 break;
1150 case kArchNop:
1151 // don't emit code for nops.
1152 break;
1153 case kArchDeoptimize: {
1154 DeoptimizationExit* exit =
1156 __ B(exit->label());
1157 break;
1158 }
1159 case kArchRet:
1160 AssembleReturn(instr->InputAt(0));
1161 break;
1162 case kArchFramePointer:
1163 __ mov(i.OutputRegister(), fp);
1164 break;
1165 case kArchParentFramePointer:
1166 if (frame_access_state()->has_frame()) {
1167 __ ldr(i.OutputRegister(), MemOperand(fp, 0));
1168 } else {
1169 __ mov(i.OutputRegister(), fp);
1170 }
1171 break;
1172#if V8_ENABLE_WEBASSEMBLY
1173 case kArchStackPointer:
1174 // The register allocator expects an allocatable register for the output,
1175 // we cannot use sp directly.
1176 __ mov(i.OutputRegister(), sp);
1177 break;
1178 case kArchSetStackPointer: {
1179 DCHECK(instr->InputAt(0)->IsRegister());
1180 if (masm()->options().enable_simulator_code) {
1181 __ RecordComment("-- Set simulator stack limit --");
1182 DCHECK(__ TmpList()->IncludesAliasOf(kSimulatorHltArgument));
1183 __ LoadStackLimit(kSimulatorHltArgument,
1186 }
1187 __ Mov(sp, i.InputRegister(0));
1188 break;
1189 }
1190#endif // V8_ENABLE_WEBASSEMBLY
1191 case kArchStackPointerGreaterThan: {
1192 // Potentially apply an offset to the current stack pointer before the
1193 // comparison to consider the size difference of an optimized frame versus
1194 // the contained unoptimized frames.
1195
1196 Register lhs_register = sp;
1197 uint32_t offset;
1198
1200 lhs_register = i.TempRegister(0);
1201 __ Sub(lhs_register, sp, offset);
1202 }
1203
1204 constexpr size_t kValueIndex = 0;
1205 DCHECK(instr->InputAt(kValueIndex)->IsRegister());
1206 __ Cmp(lhs_register, i.InputRegister(kValueIndex));
1207 break;
1208 }
1209 case kArchStackCheckOffset:
1210 __ Move(i.OutputRegister(), Smi::FromInt(GetStackCheckOffset()));
1211 break;
1212 case kArchTruncateDoubleToI:
1213 __ TruncateDoubleToI(isolate(), zone(), i.OutputRegister(),
1214 i.InputDoubleRegister(0), DetermineStubCallMode(),
1215 frame_access_state()->has_frame()
1218
1219 break;
1220 case kArchStoreWithWriteBarrier: {
1222 // Indirect pointer writes must use a different opcode.
1224 AddressingMode addressing_mode =
1226 Register object = i.InputRegister(0);
1227 Operand offset(0);
1228 if (addressing_mode == kMode_MRI) {
1229 offset = Operand(i.InputInt64(1));
1230 } else {
1231 DCHECK_EQ(addressing_mode, kMode_MRR);
1232 offset = Operand(i.InputRegister(1));
1233 }
1234 Register value = i.InputRegister(2);
1235
1236 if (v8_flags.debug_code) {
1237 // Checking that |value| is not a cleared weakref: our write barrier
1238 // does not support that for now.
1239 __ cmp(value, Operand(kClearedWeakHeapObjectLower32));
1240 __ Check(ne, AbortReason::kOperandIsCleared);
1241 }
1242
1243 auto ool = zone()->New<OutOfLineRecordWrite>(
1244 this, object, offset, value, mode, DetermineStubCallMode(),
1246 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
1247 __ StoreTaggedField(value, MemOperand(object, offset));
1249 __ JumpIfSmi(value, ool->exit());
1250 }
1252 ne, ool->entry());
1253 __ Bind(ool->exit());
1254 break;
1255 }
1256 case kArchAtomicStoreWithWriteBarrier: {
1257 DCHECK_EQ(AddressingModeField::decode(instr->opcode()), kMode_MRR);
1259 // Indirect pointer writes must use a different opcode.
1261 Register object = i.InputRegister(0);
1262 Register offset = i.InputRegister(1);
1263 Register value = i.InputRegister(2);
1264 auto ool = zone()->New<OutOfLineRecordWrite>(
1265 this, object, offset, value, mode, DetermineStubCallMode(),
1267 __ AtomicStoreTaggedField(value, object, offset, i.TempRegister(0));
1268 // Skip the write barrier if the value is a Smi. However, this is only
1269 // valid if the value isn't an indirect pointer. Otherwise the value will
1270 // be a pointer table index, which will always look like a Smi (but
1271 // actually reference a pointer in the pointer table).
1273 __ JumpIfSmi(value, ool->exit());
1274 }
1276 ne, ool->entry());
1277 __ Bind(ool->exit());
1278 break;
1279 }
1280 case kArchStoreIndirectWithWriteBarrier: {
1283 AddressingMode addressing_mode =
1285 Register object = i.InputRegister(0);
1286 Operand offset(0);
1287 if (addressing_mode == kMode_MRI) {
1288 offset = Operand(i.InputInt64(1));
1289 } else {
1290 DCHECK_EQ(addressing_mode, kMode_MRR);
1291 offset = Operand(i.InputRegister(1));
1292 }
1293 Register value = i.InputRegister(2);
1294 IndirectPointerTag tag = static_cast<IndirectPointerTag>(i.InputInt64(3));
1296
1297 auto ool = zone()->New<OutOfLineRecordWrite>(
1298 this, object, offset, value, mode, DetermineStubCallMode(),
1300 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
1301 __ StoreIndirectPointerField(value, MemOperand(object, offset));
1302 __ JumpIfMarking(ool->entry());
1303 __ Bind(ool->exit());
1304 break;
1305 }
1306 case kArchStackSlot: {
1307 FrameOffset offset =
1308 frame_access_state()->GetFrameOffset(i.InputInt32(0));
1309 Register base = offset.from_stack_pointer() ? sp : fp;
1310 __ Add(i.OutputRegister(0), base, Operand(offset.offset()));
1311 break;
1312 }
1313 case kIeee754Float64Acos:
1315 break;
1316 case kIeee754Float64Acosh:
1317 ASSEMBLE_IEEE754_UNOP(acosh);
1318 break;
1319 case kIeee754Float64Asin:
1321 break;
1322 case kIeee754Float64Asinh:
1323 ASSEMBLE_IEEE754_UNOP(asinh);
1324 break;
1325 case kIeee754Float64Atan:
1327 break;
1328 case kIeee754Float64Atanh:
1329 ASSEMBLE_IEEE754_UNOP(atanh);
1330 break;
1331 case kIeee754Float64Atan2:
1333 break;
1334 case kIeee754Float64Cos:
1336 break;
1337 case kIeee754Float64Cosh:
1339 break;
1340 case kIeee754Float64Cbrt:
1342 break;
1343 case kIeee754Float64Exp:
1345 break;
1346 case kIeee754Float64Expm1:
1347 ASSEMBLE_IEEE754_UNOP(expm1);
1348 break;
1349 case kIeee754Float64Log:
1351 break;
1352 case kIeee754Float64Log1p:
1353 ASSEMBLE_IEEE754_UNOP(log1p);
1354 break;
1355 case kIeee754Float64Log2:
1357 break;
1358 case kIeee754Float64Log10:
1359 ASSEMBLE_IEEE754_UNOP(log10);
1360 break;
1361 case kIeee754Float64Pow:
1363 break;
1364 case kIeee754Float64Sin:
1366 break;
1367 case kIeee754Float64Sinh:
1369 break;
1370 case kIeee754Float64Tan:
1372 break;
1373 case kIeee754Float64Tanh:
1375 break;
1376 case kArm64Float16RoundDown:
1377 EmitFpOrNeonUnop(masm(), &MacroAssembler::Frintm, instr, i, kFormatH,
1378 kFormat8H);
1379 break;
1380 case kArm64Float32RoundDown:
1381 EmitFpOrNeonUnop(masm(), &MacroAssembler::Frintm, instr, i, kFormatS,
1382 kFormat4S);
1383 break;
1384 case kArm64Float64RoundDown:
1385 EmitFpOrNeonUnop(masm(), &MacroAssembler::Frintm, instr, i, kFormatD,
1386 kFormat2D);
1387 break;
1388 case kArm64Float16RoundUp:
1389 EmitFpOrNeonUnop(masm(), &MacroAssembler::Frintp, instr, i, kFormatH,
1390 kFormat8H);
1391 break;
1392 case kArm64Float32RoundUp:
1393 EmitFpOrNeonUnop(masm(), &MacroAssembler::Frintp, instr, i, kFormatS,
1394 kFormat4S);
1395 break;
1396 case kArm64Float64RoundUp:
1397 EmitFpOrNeonUnop(masm(), &MacroAssembler::Frintp, instr, i, kFormatD,
1398 kFormat2D);
1399 break;
1400 case kArm64Float64RoundTiesAway:
1401 EmitFpOrNeonUnop(masm(), &MacroAssembler::Frinta, instr, i, kFormatD,
1402 kFormat2D);
1403 break;
1404 case kArm64Float16RoundTruncate:
1405 EmitFpOrNeonUnop(masm(), &MacroAssembler::Frintz, instr, i, kFormatH,
1406 kFormat8H);
1407 break;
1408 case kArm64Float32RoundTruncate:
1409 EmitFpOrNeonUnop(masm(), &MacroAssembler::Frintz, instr, i, kFormatS,
1410 kFormat4S);
1411 break;
1412 case kArm64Float64RoundTruncate:
1413 EmitFpOrNeonUnop(masm(), &MacroAssembler::Frintz, instr, i, kFormatD,
1414 kFormat2D);
1415 break;
1416 case kArm64Float16RoundTiesEven:
1417 EmitFpOrNeonUnop(masm(), &MacroAssembler::Frintn, instr, i, kFormatH,
1418 kFormat8H);
1419 break;
1420 case kArm64Float32RoundTiesEven:
1421 EmitFpOrNeonUnop(masm(), &MacroAssembler::Frintn, instr, i, kFormatS,
1422 kFormat4S);
1423 break;
1424 case kArm64Float64RoundTiesEven:
1425 EmitFpOrNeonUnop(masm(), &MacroAssembler::Frintn, instr, i, kFormatD,
1426 kFormat2D);
1427 break;
1428 case kArm64Add:
1429 if (FlagsModeField::decode(opcode) != kFlags_none) {
1430 __ Adds(i.OutputRegister(), i.InputOrZeroRegister64(0),
1431 i.InputOperand2_64(1));
1432 } else {
1433 __ Add(i.OutputRegister(), i.InputOrZeroRegister64(0),
1434 i.InputOperand2_64(1));
1435 }
1436 break;
1437 case kArm64Add32:
1438 if (FlagsModeField::decode(opcode) != kFlags_none) {
1439 __ Adds(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1440 i.InputOperand2_32(1));
1441 } else {
1442 __ Add(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1443 i.InputOperand2_32(1));
1444 }
1445 break;
1446 case kArm64And:
1447 if (FlagsModeField::decode(opcode) != kFlags_none) {
1448 // The ands instruction only sets N and Z, so only the following
1449 // conditions make sense.
1454 __ Ands(i.OutputRegister(), i.InputOrZeroRegister64(0),
1455 i.InputOperand2_64(1));
1456 } else {
1457 __ And(i.OutputRegister(), i.InputOrZeroRegister64(0),
1458 i.InputOperand2_64(1));
1459 }
1460 break;
1461 case kArm64And32:
1462 if (FlagsModeField::decode(opcode) != kFlags_none) {
1463 // The ands instruction only sets N and Z, so only the following
1464 // conditions make sense.
1469 __ Ands(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1470 i.InputOperand2_32(1));
1471 } else {
1472 __ And(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1473 i.InputOperand2_32(1));
1474 }
1475 break;
1476 case kArm64Bic:
1477 __ Bic(i.OutputRegister(), i.InputOrZeroRegister64(0),
1478 i.InputOperand2_64(1));
1479 break;
1480 case kArm64Bic32:
1481 __ Bic(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1482 i.InputOperand2_32(1));
1483 break;
1484 case kArm64Mul:
1485 __ Mul(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1486 break;
1487 case kArm64Smulh:
1488 __ Smulh(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1489 break;
1490 case kArm64Umulh:
1491 __ Umulh(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1492 break;
1493 case kArm64Mul32:
1494 __ Mul(i.OutputRegister32(), i.InputRegister32(0), i.InputRegister32(1));
1495 break;
1496#if V8_ENABLE_WEBASSEMBLY
1497 case kArm64Bcax: {
1499 CpuFeatureScope scope(masm(), SHA3);
1500 __ Bcax(
1501 i.OutputSimd128Register().V16B(), i.InputSimd128Register(0).V16B(),
1502 i.InputSimd128Register(1).V16B(), i.InputSimd128Register(2).V16B());
1503 break;
1504 }
1505 case kArm64Eor3: {
1507 CpuFeatureScope scope(masm(), SHA3);
1508 __ Eor3(
1509 i.OutputSimd128Register().V16B(), i.InputSimd128Register(0).V16B(),
1510 i.InputSimd128Register(1).V16B(), i.InputSimd128Register(2).V16B());
1511 break;
1512 }
1513 case kArm64Sadalp: {
1514 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1517 __ Sadalp(i.OutputSimd128Register().Format(dst_f),
1518 i.InputSimd128Register(1).Format(src_f));
1519 break;
1520 }
1521 case kArm64Saddlp: {
1524 __ Saddlp(i.OutputSimd128Register().Format(dst_f),
1525 i.InputSimd128Register(0).Format(src_f));
1526 break;
1527 }
1528 case kArm64Uadalp: {
1529 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1532 __ Uadalp(i.OutputSimd128Register().Format(dst_f),
1533 i.InputSimd128Register(1).Format(src_f));
1534 break;
1535 }
1536 case kArm64Uaddlp: {
1539 __ Uaddlp(i.OutputSimd128Register().Format(dst_f),
1540 i.InputSimd128Register(0).Format(src_f));
1541 break;
1542 }
1543 case kArm64ISplat: {
1545 Register src = LaneSizeField::decode(opcode) == 64 ? i.InputRegister64(0)
1546 : i.InputRegister32(0);
1547 __ Dup(i.OutputSimd128Register().Format(f), src);
1548 break;
1549 }
1550 case kArm64FSplat: {
1551 VectorFormat src_f =
1553 VectorFormat dst_f = VectorFormatFillQ(src_f);
1554 if (src_f == kFormatH) {
1555 __ Fcvt(i.OutputFloat32Register(0).H(), i.InputFloat32Register(0));
1556 __ Dup(i.OutputSimd128Register().Format(dst_f),
1557 i.OutputSimd128Register().Format(src_f), 0);
1558 } else {
1559 __ Dup(i.OutputSimd128Register().Format(dst_f),
1560 i.InputSimd128Register(0).Format(src_f), 0);
1561 }
1562 break;
1563 }
1564 case kArm64Smlal: {
1566 VectorFormat src_f = VectorFormatHalfWidth(dst_f);
1567 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1568 __ Smlal(i.OutputSimd128Register().Format(dst_f),
1569 i.InputSimd128Register(1).Format(src_f),
1570 i.InputSimd128Register(2).Format(src_f));
1571 break;
1572 }
1573 case kArm64Smlal2: {
1576 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1577 __ Smlal2(i.OutputSimd128Register().Format(dst_f),
1578 i.InputSimd128Register(1).Format(src_f),
1579 i.InputSimd128Register(2).Format(src_f));
1580 break;
1581 }
1582 case kArm64Umlal: {
1584 VectorFormat src_f = VectorFormatHalfWidth(dst_f);
1585 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1586 __ Umlal(i.OutputSimd128Register().Format(dst_f),
1587 i.InputSimd128Register(1).Format(src_f),
1588 i.InputSimd128Register(2).Format(src_f));
1589 break;
1590 }
1591 case kArm64Umlal2: {
1594 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1595 __ Umlal2(i.OutputSimd128Register().Format(dst_f),
1596 i.InputSimd128Register(1).Format(src_f),
1597 i.InputSimd128Register(2).Format(src_f));
1598 break;
1599 }
1600#endif // V8_ENABLE_WEBASSEMBLY
1601 case kArm64Smull: {
1602 if (instr->InputAt(0)->IsRegister()) {
1603 __ Smull(i.OutputRegister(), i.InputRegister32(0),
1604 i.InputRegister32(1));
1605 } else {
1606 DCHECK(instr->InputAt(0)->IsSimd128Register());
1608 VectorFormat src_f = VectorFormatHalfWidth(dst_f);
1609 __ Smull(i.OutputSimd128Register().Format(dst_f),
1610 i.InputSimd128Register(0).Format(src_f),
1611 i.InputSimd128Register(1).Format(src_f));
1612 }
1613 break;
1614 }
1615 case kArm64Smull2: {
1618 __ Smull2(i.OutputSimd128Register().Format(dst_f),
1619 i.InputSimd128Register(0).Format(src_f),
1620 i.InputSimd128Register(1).Format(src_f));
1621 break;
1622 }
1623 case kArm64Umull: {
1624 if (instr->InputAt(0)->IsRegister()) {
1625 __ Umull(i.OutputRegister(), i.InputRegister32(0),
1626 i.InputRegister32(1));
1627 } else {
1628 DCHECK(instr->InputAt(0)->IsSimd128Register());
1630 VectorFormat src_f = VectorFormatHalfWidth(dst_f);
1631 __ Umull(i.OutputSimd128Register().Format(dst_f),
1632 i.InputSimd128Register(0).Format(src_f),
1633 i.InputSimd128Register(1).Format(src_f));
1634 }
1635 break;
1636 }
1637 case kArm64Umull2: {
1640 __ Umull2(i.OutputSimd128Register().Format(dst_f),
1641 i.InputSimd128Register(0).Format(src_f),
1642 i.InputSimd128Register(1).Format(src_f));
1643 break;
1644 }
1645 case kArm64Madd:
1646 __ Madd(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1647 i.InputRegister(2));
1648 break;
1649 case kArm64Madd32:
1650 __ Madd(i.OutputRegister32(), i.InputRegister32(0), i.InputRegister32(1),
1651 i.InputRegister32(2));
1652 break;
1653 case kArm64Msub:
1654 __ Msub(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1655 i.InputRegister(2));
1656 break;
1657 case kArm64Msub32:
1658 __ Msub(i.OutputRegister32(), i.InputRegister32(0), i.InputRegister32(1),
1659 i.InputRegister32(2));
1660 break;
1661 case kArm64Mneg:
1662 __ Mneg(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1663 break;
1664 case kArm64Mneg32:
1665 __ Mneg(i.OutputRegister32(), i.InputRegister32(0), i.InputRegister32(1));
1666 break;
1667 case kArm64Idiv:
1668 __ Sdiv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1669 break;
1670 case kArm64Idiv32:
1671 __ Sdiv(i.OutputRegister32(), i.InputRegister32(0), i.InputRegister32(1));
1672 break;
1673 case kArm64Udiv:
1674 __ Udiv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1675 break;
1676 case kArm64Udiv32:
1677 __ Udiv(i.OutputRegister32(), i.InputRegister32(0), i.InputRegister32(1));
1678 break;
1679 case kArm64Imod: {
1680 UseScratchRegisterScope scope(masm());
1681 Register temp = scope.AcquireX();
1682 __ Sdiv(temp, i.InputRegister(0), i.InputRegister(1));
1683 __ Msub(i.OutputRegister(), temp, i.InputRegister(1), i.InputRegister(0));
1684 break;
1685 }
1686 case kArm64Imod32: {
1687 UseScratchRegisterScope scope(masm());
1688 Register temp = scope.AcquireW();
1689 __ Sdiv(temp, i.InputRegister32(0), i.InputRegister32(1));
1690 __ Msub(i.OutputRegister32(), temp, i.InputRegister32(1),
1691 i.InputRegister32(0));
1692 break;
1693 }
1694 case kArm64Umod: {
1695 UseScratchRegisterScope scope(masm());
1696 Register temp = scope.AcquireX();
1697 __ Udiv(temp, i.InputRegister(0), i.InputRegister(1));
1698 __ Msub(i.OutputRegister(), temp, i.InputRegister(1), i.InputRegister(0));
1699 break;
1700 }
1701 case kArm64Umod32: {
1702 UseScratchRegisterScope scope(masm());
1703 Register temp = scope.AcquireW();
1704 __ Udiv(temp, i.InputRegister32(0), i.InputRegister32(1));
1705 __ Msub(i.OutputRegister32(), temp, i.InputRegister32(1),
1706 i.InputRegister32(0));
1707 break;
1708 }
1709 case kArm64Not:
1710 __ Mvn(i.OutputRegister(), i.InputOperand(0));
1711 break;
1712 case kArm64Not32:
1713 __ Mvn(i.OutputRegister32(), i.InputOperand32(0));
1714 break;
1715 case kArm64Or:
1716 __ Orr(i.OutputRegister(), i.InputOrZeroRegister64(0),
1717 i.InputOperand2_64(1));
1718 break;
1719 case kArm64Or32:
1720 __ Orr(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1721 i.InputOperand2_32(1));
1722 break;
1723 case kArm64Orn:
1724 __ Orn(i.OutputRegister(), i.InputOrZeroRegister64(0),
1725 i.InputOperand2_64(1));
1726 break;
1727 case kArm64Orn32:
1728 __ Orn(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1729 i.InputOperand2_32(1));
1730 break;
1731 case kArm64Eor:
1732 __ Eor(i.OutputRegister(), i.InputOrZeroRegister64(0),
1733 i.InputOperand2_64(1));
1734 break;
1735 case kArm64Eor32:
1736 __ Eor(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1737 i.InputOperand2_32(1));
1738 break;
1739 case kArm64Eon:
1740 __ Eon(i.OutputRegister(), i.InputOrZeroRegister64(0),
1741 i.InputOperand2_64(1));
1742 break;
1743 case kArm64Eon32:
1744 __ Eon(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1745 i.InputOperand2_32(1));
1746 break;
1747 case kArm64Sub:
1748 if (FlagsModeField::decode(opcode) != kFlags_none) {
1749 __ Subs(i.OutputRegister(), i.InputOrZeroRegister64(0),
1750 i.InputOperand2_64(1));
1751 } else {
1752 __ Sub(i.OutputRegister(), i.InputOrZeroRegister64(0),
1753 i.InputOperand2_64(1));
1754 }
1755 break;
1756 case kArm64Sub32:
1757 if (FlagsModeField::decode(opcode) != kFlags_none) {
1758 __ Subs(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1759 i.InputOperand2_32(1));
1760 } else {
1761 __ Sub(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1762 i.InputOperand2_32(1));
1763 }
1764 break;
1765 case kArm64Lsl:
1766 ASSEMBLE_SHIFT(Lsl, 64);
1767 break;
1768 case kArm64Lsl32:
1769 ASSEMBLE_SHIFT(Lsl, 32);
1770 break;
1771 case kArm64Lsr:
1772 ASSEMBLE_SHIFT(Lsr, 64);
1773 break;
1774 case kArm64Lsr32:
1775 ASSEMBLE_SHIFT(Lsr, 32);
1776 break;
1777 case kArm64Asr:
1778 ASSEMBLE_SHIFT(Asr, 64);
1779 break;
1780 case kArm64Asr32:
1781 ASSEMBLE_SHIFT(Asr, 32);
1782 break;
1783 case kArm64Ror:
1784 ASSEMBLE_SHIFT(Ror, 64);
1785 break;
1786 case kArm64Ror32:
1787 ASSEMBLE_SHIFT(Ror, 32);
1788 break;
1789 case kArm64Mov32:
1790 __ Mov(i.OutputRegister32(), i.InputRegister32(0));
1791 break;
1792 case kArm64Sxtb32:
1793 __ Sxtb(i.OutputRegister32(), i.InputRegister32(0));
1794 break;
1795 case kArm64Sxth32:
1796 __ Sxth(i.OutputRegister32(), i.InputRegister32(0));
1797 break;
1798 case kArm64Sxtb:
1799 __ Sxtb(i.OutputRegister(), i.InputRegister32(0));
1800 break;
1801 case kArm64Sxth:
1802 __ Sxth(i.OutputRegister(), i.InputRegister32(0));
1803 break;
1804 case kArm64Sxtw:
1805 __ Sxtw(i.OutputRegister(), i.InputRegister32(0));
1806 break;
1807 case kArm64Sbfx:
1808 __ Sbfx(i.OutputRegister(), i.InputRegister(0), i.InputInt6(1),
1809 i.InputInt6(2));
1810 break;
1811 case kArm64Sbfx32:
1812 __ Sbfx(i.OutputRegister32(), i.InputRegister32(0), i.InputInt5(1),
1813 i.InputInt5(2));
1814 break;
1815 case kArm64Ubfx:
1816 __ Ubfx(i.OutputRegister(), i.InputRegister(0), i.InputInt6(1),
1817 i.InputInt32(2));
1818 break;
1819 case kArm64Ubfx32:
1820 __ Ubfx(i.OutputRegister32(), i.InputRegister32(0), i.InputInt5(1),
1821 i.InputInt32(2));
1822 break;
1823 case kArm64Ubfiz32:
1824 __ Ubfiz(i.OutputRegister32(), i.InputRegister32(0), i.InputInt5(1),
1825 i.InputInt5(2));
1826 break;
1827 case kArm64Sbfiz:
1828 __ Sbfiz(i.OutputRegister(), i.InputRegister(0), i.InputInt6(1),
1829 i.InputInt6(2));
1830 break;
1831 case kArm64Bfi:
1832 __ Bfi(i.OutputRegister(), i.InputRegister(1), i.InputInt6(2),
1833 i.InputInt6(3));
1834 break;
1835 case kArm64TestAndBranch32:
1836 case kArm64TestAndBranch:
1837 // Pseudo instructions turned into tbz/tbnz in AssembleArchBranch.
1838 break;
1839 case kArm64CompareAndBranch32:
1840 case kArm64CompareAndBranch:
1841 // Pseudo instruction handled in AssembleArchBranch.
1842 break;
1843 case kArm64Claim: {
1844 int count = i.InputInt32(0);
1845 DCHECK_EQ(count % 2, 0);
1846 __ AssertSpAligned();
1847 if (count > 0) {
1848 __ Claim(count);
1850 }
1851 break;
1852 }
1853 case kArm64Poke: {
1854 Operand operand(i.InputInt32(1) * kSystemPointerSize);
1855 if (instr->InputAt(0)->IsSimd128Register()) {
1856 __ Poke(i.InputSimd128Register(0), operand);
1857 } else if (instr->InputAt(0)->IsFPRegister()) {
1858 __ Poke(i.InputFloat64Register(0), operand);
1859 } else {
1860 __ Poke(i.InputOrZeroRegister64(0), operand);
1861 }
1862 break;
1863 }
1864 case kArm64PokePair: {
1865 int slot = i.InputInt32(2) - 1;
1866 if (instr->InputAt(0)->IsFPRegister()) {
1867 __ PokePair(i.InputFloat64Register(1), i.InputFloat64Register(0),
1868 slot * kSystemPointerSize);
1869 } else {
1870 __ PokePair(i.InputRegister(1), i.InputRegister(0),
1871 slot * kSystemPointerSize);
1872 }
1873 break;
1874 }
1875 case kArm64Peek: {
1876 int reverse_slot = i.InputInt32(0);
1877 int offset =
1878 FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
1879 if (instr->OutputAt(0)->IsFPRegister()) {
1880 LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
1881 if (op->representation() == MachineRepresentation::kFloat64) {
1882 __ Ldr(i.OutputDoubleRegister(), MemOperand(fp, offset));
1883 } else if (op->representation() == MachineRepresentation::kFloat32) {
1884 __ Ldr(i.OutputFloatRegister(), MemOperand(fp, offset));
1885 } else {
1886 DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
1887 __ Ldr(i.OutputSimd128Register(), MemOperand(fp, offset));
1888 }
1889 } else {
1890 __ Ldr(i.OutputRegister(), MemOperand(fp, offset));
1891 }
1892 break;
1893 }
1894 case kArm64Clz:
1895 __ Clz(i.OutputRegister64(), i.InputRegister64(0));
1896 break;
1897 case kArm64Clz32:
1898 __ Clz(i.OutputRegister32(), i.InputRegister32(0));
1899 break;
1900 case kArm64Rbit:
1901 __ Rbit(i.OutputRegister64(), i.InputRegister64(0));
1902 break;
1903 case kArm64Rbit32:
1904 __ Rbit(i.OutputRegister32(), i.InputRegister32(0));
1905 break;
1906 case kArm64Rev:
1907 __ Rev(i.OutputRegister64(), i.InputRegister64(0));
1908 break;
1909 case kArm64Rev32:
1910 __ Rev(i.OutputRegister32(), i.InputRegister32(0));
1911 break;
1912 case kArm64Cmp:
1913 __ Cmp(i.InputOrZeroRegister64(0), i.InputOperand2_64(1));
1914 break;
1915 case kArm64Cmp32:
1916 __ Cmp(i.InputOrZeroRegister32(0), i.InputOperand2_32(1));
1917 break;
1918 case kArm64Cmn:
1919 __ Cmn(i.InputOrZeroRegister64(0), i.InputOperand2_64(1));
1920 break;
1921 case kArm64Cmn32:
1922 __ Cmn(i.InputOrZeroRegister32(0), i.InputOperand2_32(1));
1923 break;
1924 case kArm64Cnt32: {
1925 __ PopcntHelper(i.OutputRegister32(), i.InputRegister32(0));
1926 break;
1927 }
1928 case kArm64Cnt64: {
1929 __ PopcntHelper(i.OutputRegister64(), i.InputRegister64(0));
1930 break;
1931 }
1932 case kArm64Cnt: {
1934 __ Cnt(i.OutputSimd128Register().Format(f),
1935 i.InputSimd128Register(0).Format(f));
1936 break;
1937 }
1938 case kArm64Tst:
1939 __ Tst(i.InputOrZeroRegister64(0), i.InputOperand2_64(1));
1940 break;
1941 case kArm64Tst32:
1942 __ Tst(i.InputOrZeroRegister32(0), i.InputOperand2_32(1));
1943 break;
1944 case kArm64Float32Cmp:
1945 if (instr->InputAt(1)->IsFPRegister()) {
1946 __ Fcmp(i.InputFloat32OrFPZeroRegister(0), i.InputFloat32Register(1));
1947 } else {
1948 DCHECK(instr->InputAt(1)->IsImmediate());
1949 // 0.0 is the only immediate supported by fcmp instructions.
1950 DCHECK_EQ(0.0f, i.InputFloat32(1));
1951 __ Fcmp(i.InputFloat32Register(0), i.InputFloat32(1));
1952 }
1953 break;
1954 case kArm64Float32Add:
1955 __ Fadd(i.OutputFloat32Register(), i.InputFloat32Register(0),
1956 i.InputFloat32Register(1));
1957 break;
1958 case kArm64Float32Sub:
1959 __ Fsub(i.OutputFloat32Register(), i.InputFloat32Register(0),
1960 i.InputFloat32Register(1));
1961 break;
1962 case kArm64Float32Mul:
1963 __ Fmul(i.OutputFloat32Register(), i.InputFloat32Register(0),
1964 i.InputFloat32Register(1));
1965 break;
1966 case kArm64Float32Div:
1967 __ Fdiv(i.OutputFloat32Register(), i.InputFloat32Register(0),
1968 i.InputFloat32Register(1));
1969 break;
1970 case kArm64Float32Abs:
1971 __ Fabs(i.OutputFloat32Register(), i.InputFloat32Register(0));
1972 break;
1973 case kArm64Float32Abd:
1974 __ Fabd(i.OutputFloat32Register(), i.InputFloat32Register(0),
1975 i.InputFloat32Register(1));
1976 break;
1977 case kArm64Float32Neg:
1978 __ Fneg(i.OutputFloat32Register(), i.InputFloat32Register(0));
1979 break;
1980 case kArm64Float32Sqrt:
1981 __ Fsqrt(i.OutputFloat32Register(), i.InputFloat32Register(0));
1982 break;
1983 case kArm64Float32Fnmul: {
1984 __ Fnmul(i.OutputFloat32Register(), i.InputFloat32Register(0),
1985 i.InputFloat32Register(1));
1986 break;
1987 }
1988 case kArm64Float64Cmp:
1989 if (instr->InputAt(1)->IsFPRegister()) {
1990 __ Fcmp(i.InputFloat64OrFPZeroRegister(0), i.InputDoubleRegister(1));
1991 } else {
1992 DCHECK(instr->InputAt(1)->IsImmediate());
1993 // 0.0 is the only immediate supported by fcmp instructions.
1994 DCHECK_EQ(0.0, i.InputDouble(1));
1995 __ Fcmp(i.InputFloat64Register(0), i.InputDouble(1));
1996 }
1997 break;
1998 case kArm64Float64Add:
1999 __ Fadd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2000 i.InputDoubleRegister(1));
2001 break;
2002 case kArm64Float64Sub:
2003 __ Fsub(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2004 i.InputDoubleRegister(1));
2005 break;
2006 case kArm64Float64Mul:
2007 __ Fmul(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2008 i.InputDoubleRegister(1));
2009 break;
2010 case kArm64Float64Div:
2011 __ Fdiv(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2012 i.InputDoubleRegister(1));
2013 break;
2014 case kArm64Float64Mod: {
2015 // TODO(turbofan): implement directly.
2016 FrameScope scope(masm(), StackFrame::MANUAL);
2017 DCHECK_EQ(d0, i.InputDoubleRegister(0));
2018 DCHECK_EQ(d1, i.InputDoubleRegister(1));
2019 DCHECK_EQ(d0, i.OutputDoubleRegister());
2020 // TODO(turbofan): make sure this saves all relevant registers.
2021 __ CallCFunction(ExternalReference::mod_two_doubles_operation(), 0, 2);
2022 break;
2023 }
2024 case kArm64Float32Max: {
2025 __ Fmax(i.OutputFloat32Register(), i.InputFloat32Register(0),
2026 i.InputFloat32Register(1));
2027 break;
2028 }
2029 case kArm64Float64Max: {
2030 __ Fmax(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2031 i.InputDoubleRegister(1));
2032 break;
2033 }
2034 case kArm64Float32Min: {
2035 __ Fmin(i.OutputFloat32Register(), i.InputFloat32Register(0),
2036 i.InputFloat32Register(1));
2037 break;
2038 }
2039 case kArm64Float64Min: {
2040 __ Fmin(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2041 i.InputDoubleRegister(1));
2042 break;
2043 }
2044 case kArm64Float64Abs:
2045 __ Fabs(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
2046 break;
2047 case kArm64Float64Abd:
2048 __ Fabd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2049 i.InputDoubleRegister(1));
2050 break;
2051 case kArm64Float64Neg:
2052 __ Fneg(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
2053 break;
2054 case kArm64Float64Sqrt:
2055 __ Fsqrt(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
2056 break;
2057 case kArm64Float64Fnmul:
2058 __ Fnmul(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2059 i.InputDoubleRegister(1));
2060 break;
2061 case kArm64Float32ToFloat64:
2062 __ Fcvt(i.OutputDoubleRegister(), i.InputDoubleRegister(0).S());
2063 break;
2064 case kArm64Float64ToFloat32:
2065 __ Fcvt(i.OutputDoubleRegister().S(), i.InputDoubleRegister(0));
2066 break;
2067 case kArm64Float64ToFloat16RawBits: {
2068 VRegister tmp_dst = i.TempDoubleRegister(0);
2069 __ Fcvt(tmp_dst.H(), i.InputDoubleRegister(0));
2070 __ Fmov(i.OutputRegister32(), tmp_dst.S());
2071 break;
2072 }
2073 case kArm64Float16RawBitsToFloat64: {
2074 VRegister tmp_dst = i.TempDoubleRegister(0);
2075 __ Fmov(tmp_dst.S(), i.InputRegister32(0));
2076 __ Fcvt(i.OutputDoubleRegister(), tmp_dst.H());
2077 break;
2078 }
2079 case kArm64Float32ToInt32: {
2080 __ Fcvtzs(i.OutputRegister32(), i.InputFloat32Register(0));
2081 bool set_overflow_to_min_i32 = MiscField::decode(instr->opcode());
2082 if (set_overflow_to_min_i32) {
2083 // Avoid INT32_MAX as an overflow indicator and use INT32_MIN instead,
2084 // because INT32_MIN allows easier out-of-bounds detection.
2085 __ Cmn(i.OutputRegister32(), 1);
2086 __ Csinc(i.OutputRegister32(), i.OutputRegister32(),
2087 i.OutputRegister32(), vc);
2088 }
2089 break;
2090 }
2091 case kArm64Float64ToInt32:
2092 __ Fcvtzs(i.OutputRegister32(), i.InputDoubleRegister(0));
2093 if (i.OutputCount() > 1) {
2094 // Check for inputs below INT32_MIN and NaN.
2095 __ Fcmp(i.InputDoubleRegister(0), static_cast<double>(INT32_MIN));
2096 __ Cset(i.OutputRegister(1).W(), ge);
2097 __ Fcmp(i.InputDoubleRegister(0), static_cast<double>(INT32_MAX) + 1);
2098 __ CmovX(i.OutputRegister(1), xzr, ge);
2099 }
2100 break;
2101 case kArm64Float32ToUint32: {
2102 __ Fcvtzu(i.OutputRegister32(), i.InputFloat32Register(0));
2103 bool set_overflow_to_min_u32 = MiscField::decode(instr->opcode());
2104 if (set_overflow_to_min_u32) {
2105 // Avoid UINT32_MAX as an overflow indicator and use 0 instead,
2106 // because 0 allows easier out-of-bounds detection.
2107 __ Cmn(i.OutputRegister32(), 1);
2108 __ Adc(i.OutputRegister32(), i.OutputRegister32(), Operand(0));
2109 }
2110 break;
2111 }
2112 case kArm64Float64ToUint32:
2113 __ Fcvtzu(i.OutputRegister32(), i.InputDoubleRegister(0));
2114 if (i.OutputCount() > 1) {
2115 __ Fcmp(i.InputDoubleRegister(0), -1.0);
2116 __ Cset(i.OutputRegister(1).W(), gt);
2117 __ Fcmp(i.InputDoubleRegister(0), static_cast<double>(UINT32_MAX) + 1);
2118 __ CmovX(i.OutputRegister(1), xzr, ge);
2119 }
2120 break;
2121 case kArm64Float32ToInt64:
2122 __ Fcvtzs(i.OutputRegister64(), i.InputFloat32Register(0));
2123 if (i.OutputCount() > 1) {
2124 // Check for inputs below INT64_MIN and NaN.
2125 __ Fcmp(i.InputFloat32Register(0), static_cast<float>(INT64_MIN));
2126 // Check overflow.
2127 // -1 value is used to indicate a possible overflow which will occur
2128 // when subtracting (-1) from the provided INT64_MAX operand.
2129 // OutputRegister(1) is set to 0 if the input was out of range or NaN.
2130 __ Ccmp(i.OutputRegister(0), -1, VFlag, ge);
2131 __ Cset(i.OutputRegister(1), vc);
2132 }
2133 break;
2134 case kArm64Float64ToInt64: {
2135 __ Fcvtzs(i.OutputRegister(0), i.InputDoubleRegister(0));
2136 bool set_overflow_to_min_i64 = MiscField::decode(instr->opcode());
2137 DCHECK_IMPLIES(set_overflow_to_min_i64, i.OutputCount() == 1);
2138 if (set_overflow_to_min_i64) {
2139 // Avoid INT64_MAX as an overflow indicator and use INT64_MIN instead,
2140 // because INT64_MIN allows easier out-of-bounds detection.
2141 __ Cmn(i.OutputRegister64(), 1);
2142 __ Csinc(i.OutputRegister64(), i.OutputRegister64(),
2143 i.OutputRegister64(), vc);
2144 } else if (i.OutputCount() > 1) {
2145 // See kArm64Float32ToInt64 for a detailed description.
2146 __ Fcmp(i.InputDoubleRegister(0), static_cast<double>(INT64_MIN));
2147 __ Ccmp(i.OutputRegister(0), -1, VFlag, ge);
2148 __ Cset(i.OutputRegister(1), vc);
2149 }
2150 break;
2151 }
2152 case kArm64Float32ToUint64:
2153 __ Fcvtzu(i.OutputRegister64(), i.InputFloat32Register(0));
2154 if (i.OutputCount() > 1) {
2155 // See kArm64Float32ToInt64 for a detailed description.
2156 __ Fcmp(i.InputFloat32Register(0), -1.0);
2157 __ Ccmp(i.OutputRegister(0), -1, ZFlag, gt);
2158 __ Cset(i.OutputRegister(1), ne);
2159 }
2160 break;
2161 case kArm64Float64ToUint64:
2162 __ Fcvtzu(i.OutputRegister64(), i.InputDoubleRegister(0));
2163 if (i.OutputCount() > 1) {
2164 // See kArm64Float32ToInt64 for a detailed description.
2165 __ Fcmp(i.InputDoubleRegister(0), -1.0);
2166 __ Ccmp(i.OutputRegister(0), -1, ZFlag, gt);
2167 __ Cset(i.OutputRegister(1), ne);
2168 }
2169 break;
2170 case kArm64Int32ToFloat32:
2171 __ Scvtf(i.OutputFloat32Register(), i.InputRegister32(0));
2172 break;
2173 case kArm64Int32ToFloat64:
2174 __ Scvtf(i.OutputDoubleRegister(), i.InputRegister32(0));
2175 break;
2176 case kArm64Int64ToFloat32:
2177 __ Scvtf(i.OutputDoubleRegister().S(), i.InputRegister64(0));
2178 break;
2179 case kArm64Int64ToFloat64:
2180 __ Scvtf(i.OutputDoubleRegister(), i.InputRegister64(0));
2181 break;
2182 case kArm64Uint32ToFloat32:
2183 __ Ucvtf(i.OutputFloat32Register(), i.InputRegister32(0));
2184 break;
2185 case kArm64Uint32ToFloat64:
2186 __ Ucvtf(i.OutputDoubleRegister(), i.InputRegister32(0));
2187 break;
2188 case kArm64Uint64ToFloat32:
2189 __ Ucvtf(i.OutputDoubleRegister().S(), i.InputRegister64(0));
2190 break;
2191 case kArm64Uint64ToFloat64:
2192 __ Ucvtf(i.OutputDoubleRegister(), i.InputRegister64(0));
2193 break;
2194 case kArm64Float64ExtractLowWord32:
2195 __ Fmov(i.OutputRegister32(), i.InputFloat32Register(0));
2196 break;
2197 case kArm64Float64ExtractHighWord32:
2198 __ Umov(i.OutputRegister32(), i.InputFloat64Register(0).V2S(), 1);
2199 break;
2200 case kArm64Float64InsertLowWord32:
2201 DCHECK_EQ(i.OutputFloat64Register(), i.InputFloat64Register(0));
2202 __ Ins(i.OutputFloat64Register().V2S(), 0, i.InputRegister32(1));
2203 break;
2204 case kArm64Float64InsertHighWord32:
2205 DCHECK_EQ(i.OutputFloat64Register(), i.InputFloat64Register(0));
2206 __ Ins(i.OutputFloat64Register().V2S(), 1, i.InputRegister32(1));
2207 break;
2208 case kArm64Float64MoveU64:
2209 __ Fmov(i.OutputFloat64Register(), i.InputRegister(0));
2210 break;
2211 case kArm64Float64SilenceNaN:
2212 __ CanonicalizeNaN(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
2213 break;
2214 case kArm64U64MoveFloat64:
2215 __ Fmov(i.OutputRegister(), i.InputDoubleRegister(0));
2216 break;
2217 case kArm64Ldrb:
2218 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2219 __ Ldrb(i.OutputRegister(), i.MemoryOperand());
2220 break;
2221 case kArm64Ldrsb:
2222 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2223 __ Ldrsb(i.OutputRegister(), i.MemoryOperand());
2224 break;
2225 case kArm64LdrsbW:
2226 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2227 __ Ldrsb(i.OutputRegister32(), i.MemoryOperand());
2228 break;
2229 case kArm64Strb:
2230 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2231 __ Strb(i.InputOrZeroRegister64(0), i.MemoryOperand(1));
2232 break;
2233 case kArm64Ldrh:
2234 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2235 __ Ldrh(i.OutputRegister(), i.MemoryOperand());
2236 break;
2237 case kArm64Ldrsh:
2238 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2239 __ Ldrsh(i.OutputRegister(), i.MemoryOperand());
2240 break;
2241 case kArm64LdrshW:
2242 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2243 __ Ldrsh(i.OutputRegister32(), i.MemoryOperand());
2244 break;
2245 case kArm64Strh:
2246 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2247 __ Strh(i.InputOrZeroRegister64(0), i.MemoryOperand(1));
2248 break;
2249 case kArm64Ldrsw:
2250 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2251 __ Ldrsw(i.OutputRegister(), i.MemoryOperand());
2252 break;
2253 case kArm64LdrW:
2254 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2255 __ Ldr(i.OutputRegister32(), i.MemoryOperand());
2256 break;
2257 case kArm64StrW:
2258 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2259 __ Str(i.InputOrZeroRegister32(0), i.MemoryOperand(1));
2260 break;
2261 case kArm64StrWPair:
2262 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2263 __ Stp(i.InputOrZeroRegister32(0), i.InputOrZeroRegister32(1),
2264 i.MemoryOperand(2));
2265 break;
2266 case kArm64Ldr:
2267 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2268 __ Ldr(i.OutputRegister(), i.MemoryOperand());
2269 break;
2270 case kArm64LdrDecompressTaggedSigned:
2271 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2272 __ DecompressTaggedSigned(i.OutputRegister(), i.MemoryOperand());
2273 break;
2274 case kArm64LdrDecompressTagged:
2275 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2276 __ DecompressTagged(i.OutputRegister(), i.MemoryOperand());
2277 break;
2278 case kArm64LdrDecompressProtected:
2279 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2280 __ DecompressProtected(i.OutputRegister(), i.MemoryOperand());
2281 break;
2282 case kArm64LdarDecompressTaggedSigned:
2283 __ AtomicDecompressTaggedSigned(i.OutputRegister(), i.InputRegister(0),
2284 i.InputRegister(1), i.TempRegister(0));
2285 break;
2286 case kArm64LdarDecompressTagged:
2287 __ AtomicDecompressTagged(i.OutputRegister(), i.InputRegister(0),
2288 i.InputRegister(1), i.TempRegister(0));
2289 break;
2290 case kArm64LdrDecodeSandboxedPointer:
2291 __ LoadSandboxedPointerField(i.OutputRegister(), i.MemoryOperand());
2292 break;
2293 case kArm64Str:
2294 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2295 __ Str(i.InputOrZeroRegister64(0), i.MemoryOperand(1));
2296 break;
2297 case kArm64StrPair:
2298 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2299 __ Stp(i.InputOrZeroRegister64(0), i.InputOrZeroRegister64(1),
2300 i.MemoryOperand(2));
2301 break;
2302 case kArm64StrCompressTagged:
2303 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2304 __ StoreTaggedField(i.InputOrZeroRegister64(0), i.MemoryOperand(1));
2305 break;
2306 case kArm64StlrCompressTagged:
2307 // To be consistent with other STLR instructions, the value is stored at
2308 // the 3rd input register instead of the 1st.
2309 __ AtomicStoreTaggedField(i.InputRegister(2), i.InputRegister(0),
2310 i.InputRegister(1), i.TempRegister(0));
2311 break;
2312 case kArm64StrIndirectPointer:
2313 __ StoreIndirectPointerField(i.InputOrZeroRegister64(0),
2314 i.MemoryOperand(1));
2315 break;
2316 case kArm64StrEncodeSandboxedPointer:
2317 __ StoreSandboxedPointerField(i.InputOrZeroRegister64(0),
2318 i.MemoryOperand(1));
2319 break;
2320 case kArm64LdrH: {
2321 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2322 __ Ldr(i.OutputDoubleRegister().H(), i.MemoryOperand());
2323 __ Fcvt(i.OutputDoubleRegister().S(), i.OutputDoubleRegister().H());
2324 break;
2325 }
2326 case kArm64StrH:
2327 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2328 __ Fcvt(i.InputFloat32OrZeroRegister(0).H(),
2329 i.InputFloat32OrZeroRegister(0).S());
2330 __ Str(i.InputFloat32OrZeroRegister(0).H(), i.MemoryOperand(1));
2331 break;
2332 case kArm64LdrS:
2333 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2334 __ Ldr(i.OutputDoubleRegister().S(), i.MemoryOperand());
2335 break;
2336 case kArm64StrS:
2337 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2338 __ Str(i.InputFloat32OrZeroRegister(0), i.MemoryOperand(1));
2339 break;
2340 case kArm64LdrD:
2341 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2342 __ Ldr(i.OutputDoubleRegister(), i.MemoryOperand());
2343 break;
2344 case kArm64StrD:
2345 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2346 __ Str(i.InputFloat64OrZeroRegister(0), i.MemoryOperand(1));
2347 break;
2348 case kArm64LdrQ:
2349 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2350 __ Ldr(i.OutputSimd128Register(), i.MemoryOperand());
2351 break;
2352 case kArm64StrQ:
2353 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2354 __ Str(i.InputSimd128Register(0), i.MemoryOperand(1));
2355 break;
2356 case kArm64DmbIsh:
2358 break;
2359 case kArm64DsbIsb:
2360 __ Dsb(FullSystem, BarrierAll);
2361 __ Isb();
2362 break;
2363 case kAtomicLoadInt8:
2365 ASSEMBLE_ATOMIC_LOAD_INTEGER(Ldarb, Register32);
2366 __ Sxtb(i.OutputRegister(0), i.OutputRegister(0));
2367 break;
2368 case kAtomicLoadUint8:
2369 ASSEMBLE_ATOMIC_LOAD_INTEGER(Ldarb, Register32);
2370 break;
2371 case kAtomicLoadInt16:
2373 ASSEMBLE_ATOMIC_LOAD_INTEGER(Ldarh, Register32);
2374 __ Sxth(i.OutputRegister(0), i.OutputRegister(0));
2375 break;
2376 case kAtomicLoadUint16:
2377 ASSEMBLE_ATOMIC_LOAD_INTEGER(Ldarh, Register32);
2378 break;
2379 case kAtomicLoadWord32:
2380 ASSEMBLE_ATOMIC_LOAD_INTEGER(Ldar, Register32);
2381 break;
2382 case kArm64Word64AtomicLoadUint64:
2383 ASSEMBLE_ATOMIC_LOAD_INTEGER(Ldar, Register);
2384 break;
2385 case kAtomicStoreWord8:
2386 ASSEMBLE_ATOMIC_STORE_INTEGER(Stlrb, Register32);
2387 break;
2388 case kAtomicStoreWord16:
2389 ASSEMBLE_ATOMIC_STORE_INTEGER(Stlrh, Register32);
2390 break;
2391 case kAtomicStoreWord32:
2392 ASSEMBLE_ATOMIC_STORE_INTEGER(Stlr, Register32);
2393 break;
2394 case kArm64Word64AtomicStoreWord64:
2395 ASSEMBLE_ATOMIC_STORE_INTEGER(Stlr, Register);
2396 break;
2397 case kAtomicExchangeInt8:
2398 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(b, Register32);
2399 __ Sxtb(i.OutputRegister(0), i.OutputRegister(0));
2400 break;
2401 case kAtomicExchangeUint8:
2402 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(b, Register32);
2403 break;
2404 case kAtomicExchangeInt16:
2405 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(h, Register32);
2406 __ Sxth(i.OutputRegister(0), i.OutputRegister(0));
2407 break;
2408 case kAtomicExchangeUint16:
2409 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(h, Register32);
2410 break;
2411 case kAtomicExchangeWord32:
2413 break;
2414 case kArm64Word64AtomicExchangeUint64:
2416 break;
2417 case kAtomicCompareExchangeInt8:
2419 __ Sxtb(i.OutputRegister(0), i.OutputRegister(0));
2420 break;
2421 case kAtomicCompareExchangeUint8:
2423 break;
2424 case kAtomicCompareExchangeInt16:
2426 __ Sxth(i.OutputRegister(0), i.OutputRegister(0));
2427 break;
2428 case kAtomicCompareExchangeUint16:
2430 break;
2431 case kAtomicCompareExchangeWord32:
2433 break;
2434 case kArm64Word64AtomicCompareExchangeUint64:
2436 break;
2437 case kAtomicSubInt8:
2438 ASSEMBLE_ATOMIC_SUB(b, Register32);
2439 __ Sxtb(i.OutputRegister(0), i.OutputRegister(0));
2440 break;
2441 case kAtomicSubUint8:
2442 ASSEMBLE_ATOMIC_SUB(b, Register32);
2443 break;
2444 case kAtomicSubInt16:
2445 ASSEMBLE_ATOMIC_SUB(h, Register32);
2446 __ Sxth(i.OutputRegister(0), i.OutputRegister(0));
2447 break;
2448 case kAtomicSubUint16:
2449 ASSEMBLE_ATOMIC_SUB(h, Register32);
2450 break;
2451 case kAtomicSubWord32:
2452 ASSEMBLE_ATOMIC_SUB(, Register32);
2453 break;
2454 case kArm64Word64AtomicSubUint64:
2455 ASSEMBLE_ATOMIC_SUB(, Register);
2456 break;
2457 case kAtomicAndInt8:
2458 ASSEMBLE_ATOMIC_AND(b, Register32);
2459 __ Sxtb(i.OutputRegister(0), i.OutputRegister(0));
2460 break;
2461 case kAtomicAndUint8:
2462 ASSEMBLE_ATOMIC_AND(b, Register32);
2463 break;
2464 case kAtomicAndInt16:
2465 ASSEMBLE_ATOMIC_AND(h, Register32);
2466 __ Sxth(i.OutputRegister(0), i.OutputRegister(0));
2467 break;
2468 case kAtomicAndUint16:
2469 ASSEMBLE_ATOMIC_AND(h, Register32);
2470 break;
2471 case kAtomicAndWord32:
2472 ASSEMBLE_ATOMIC_AND(, Register32);
2473 break;
2474 case kArm64Word64AtomicAndUint64:
2475 ASSEMBLE_ATOMIC_AND(, Register);
2476 break;
2477#define ATOMIC_BINOP_CASE(op, inst, lse_instr) \
2478 case kAtomic##op##Int8: \
2479 ASSEMBLE_ATOMIC_BINOP(b, inst, lse_instr, Register32); \
2480 __ Sxtb(i.OutputRegister(0), i.OutputRegister(0)); \
2481 break; \
2482 case kAtomic##op##Uint8: \
2483 ASSEMBLE_ATOMIC_BINOP(b, inst, lse_instr, Register32); \
2484 break; \
2485 case kAtomic##op##Int16: \
2486 ASSEMBLE_ATOMIC_BINOP(h, inst, lse_instr, Register32); \
2487 __ Sxth(i.OutputRegister(0), i.OutputRegister(0)); \
2488 break; \
2489 case kAtomic##op##Uint16: \
2490 ASSEMBLE_ATOMIC_BINOP(h, inst, lse_instr, Register32); \
2491 break; \
2492 case kAtomic##op##Word32: \
2493 ASSEMBLE_ATOMIC_BINOP(, inst, lse_instr, Register32); \
2494 break; \
2495 case kArm64Word64Atomic##op##Uint64: \
2496 ASSEMBLE_ATOMIC_BINOP(, inst, lse_instr, Register); \
2497 break;
2498 ATOMIC_BINOP_CASE(Add, Add, Ldaddal)
2499 ATOMIC_BINOP_CASE(Or, Orr, Ldsetal)
2500 ATOMIC_BINOP_CASE(Xor, Eor, Ldeoral)
2501#undef ATOMIC_BINOP_CASE
2502#undef ASSEMBLE_SHIFT
2503#undef ASSEMBLE_ATOMIC_LOAD_INTEGER
2504#undef ASSEMBLE_ATOMIC_STORE_INTEGER
2505#undef ASSEMBLE_ATOMIC_EXCHANGE_INTEGER
2506#undef ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER
2507#undef ASSEMBLE_ATOMIC_BINOP
2508#undef ASSEMBLE_IEEE754_BINOP
2509#undef ASSEMBLE_IEEE754_UNOP
2510
2511#if V8_ENABLE_WEBASSEMBLY
2512#define SIMD_UNOP_CASE(Op, Instr, FORMAT) \
2513 case Op: \
2514 __ Instr(i.OutputSimd128Register().V##FORMAT(), \
2515 i.InputSimd128Register(0).V##FORMAT()); \
2516 break;
2517#define SIMD_UNOP_LANE_SIZE_CASE(Op, Instr) \
2518 case Op: { \
2519 VectorFormat f = VectorFormatFillQ(LaneSizeField::decode(opcode)); \
2520 __ Instr(i.OutputSimd128Register().Format(f), \
2521 i.InputSimd128Register(0).Format(f)); \
2522 break; \
2523 }
2524#define SIMD_BINOP_CASE(Op, Instr, FORMAT) \
2525 case Op: \
2526 __ Instr(i.OutputSimd128Register().V##FORMAT(), \
2527 i.InputSimd128Register(0).V##FORMAT(), \
2528 i.InputSimd128Register(1).V##FORMAT()); \
2529 break;
2530#define SIMD_BINOP_LANE_SIZE_CASE(Op, Instr) \
2531 case Op: { \
2532 VectorFormat f = VectorFormatFillQ(LaneSizeField::decode(opcode)); \
2533 __ Instr(i.OutputSimd128Register().Format(f), \
2534 i.InputSimd128Register(0).Format(f), \
2535 i.InputSimd128Register(1).Format(f)); \
2536 break; \
2537 }
2538#define SIMD_FCM_L_CASE(Op, ImmOp, RegOp) \
2539 case Op: { \
2540 VectorFormat f = VectorFormatFillQ(LaneSizeField::decode(opcode)); \
2541 if (instr->InputCount() == 1) { \
2542 __ Fcm##ImmOp(i.OutputSimd128Register().Format(f), \
2543 i.InputSimd128Register(0).Format(f), +0.0); \
2544 } else { \
2545 __ Fcm##RegOp(i.OutputSimd128Register().Format(f), \
2546 i.InputSimd128Register(1).Format(f), \
2547 i.InputSimd128Register(0).Format(f)); \
2548 } \
2549 break; \
2550 }
2551#define SIMD_FCM_G_CASE(Op, ImmOp) \
2552 case Op: { \
2553 VectorFormat f = VectorFormatFillQ(LaneSizeField::decode(opcode)); \
2554 /* Currently Gt/Ge instructions are only used with zero */ \
2555 DCHECK_EQ(instr->InputCount(), 1); \
2556 __ Fcm##ImmOp(i.OutputSimd128Register().Format(f), \
2557 i.InputSimd128Register(0).Format(f), +0.0); \
2558 break; \
2559 }
2560#define SIMD_CM_L_CASE(Op, ImmOp) \
2561 case Op: { \
2562 VectorFormat f = VectorFormatFillQ(LaneSizeField::decode(opcode)); \
2563 DCHECK_EQ(instr->InputCount(), 1); \
2564 __ Cm##ImmOp(i.OutputSimd128Register().Format(f), \
2565 i.InputSimd128Register(0).Format(f), 0); \
2566 break; \
2567 }
2568#define SIMD_CM_G_CASE(Op, CmOp) \
2569 case Op: { \
2570 VectorFormat f = VectorFormatFillQ(LaneSizeField::decode(opcode)); \
2571 if (instr->InputCount() == 1) { \
2572 __ Cm##CmOp(i.OutputSimd128Register().Format(f), \
2573 i.InputSimd128Register(0).Format(f), 0); \
2574 } else { \
2575 __ Cm##CmOp(i.OutputSimd128Register().Format(f), \
2576 i.InputSimd128Register(0).Format(f), \
2577 i.InputSimd128Register(1).Format(f)); \
2578 } \
2579 break; \
2580 }
2581#define SIMD_DESTRUCTIVE_BINOP_CASE(Op, Instr, FORMAT) \
2582 case Op: { \
2583 VRegister dst = i.OutputSimd128Register().V##FORMAT(); \
2584 DCHECK_EQ(dst, i.InputSimd128Register(0).V##FORMAT()); \
2585 __ Instr(dst, i.InputSimd128Register(1).V##FORMAT(), \
2586 i.InputSimd128Register(2).V##FORMAT()); \
2587 break; \
2588 }
2589#define SIMD_DESTRUCTIVE_BINOP_LANE_SIZE_CASE(Op, Instr) \
2590 case Op: { \
2591 VectorFormat f = VectorFormatFillQ(LaneSizeField::decode(opcode)); \
2592 VRegister dst = i.OutputSimd128Register().Format(f); \
2593 DCHECK_EQ(dst, i.InputSimd128Register(0).Format(f)); \
2594 __ Instr(dst, i.InputSimd128Register(1).Format(f), \
2595 i.InputSimd128Register(2).Format(f)); \
2596 break; \
2597 }
2598#define SIMD_DESTRUCTIVE_RELAXED_FUSED_CASE(Op, Instr, FORMAT) \
2599 case Op: { \
2600 VRegister dst = i.OutputSimd128Register().V##FORMAT(); \
2601 DCHECK_EQ(dst, i.InputSimd128Register(2).V##FORMAT()); \
2602 __ Instr(dst, i.InputSimd128Register(0).V##FORMAT(), \
2603 i.InputSimd128Register(1).V##FORMAT()); \
2604 break; \
2605 }
2606 SIMD_BINOP_LANE_SIZE_CASE(kArm64FMin, Fmin);
2607 SIMD_BINOP_LANE_SIZE_CASE(kArm64FMax, Fmax);
2608 SIMD_UNOP_LANE_SIZE_CASE(kArm64FAbs, Fabs);
2609 SIMD_UNOP_LANE_SIZE_CASE(kArm64FSqrt, Fsqrt);
2610 SIMD_BINOP_LANE_SIZE_CASE(kArm64FAdd, Fadd);
2611 SIMD_BINOP_LANE_SIZE_CASE(kArm64FSub, Fsub);
2612 SIMD_BINOP_LANE_SIZE_CASE(kArm64FMul, Fmul);
2613 SIMD_BINOP_LANE_SIZE_CASE(kArm64FDiv, Fdiv);
2614 SIMD_UNOP_LANE_SIZE_CASE(kArm64FNeg, Fneg);
2615 SIMD_UNOP_LANE_SIZE_CASE(kArm64IAbs, Abs);
2616 SIMD_UNOP_LANE_SIZE_CASE(kArm64INeg, Neg);
2617 SIMD_BINOP_LANE_SIZE_CASE(kArm64RoundingAverageU, Urhadd);
2618 SIMD_BINOP_LANE_SIZE_CASE(kArm64IMinS, Smin);
2619 SIMD_BINOP_LANE_SIZE_CASE(kArm64IMaxS, Smax);
2620 SIMD_BINOP_LANE_SIZE_CASE(kArm64IMinU, Umin);
2621 SIMD_BINOP_LANE_SIZE_CASE(kArm64IMaxU, Umax);
2622 SIMD_DESTRUCTIVE_BINOP_LANE_SIZE_CASE(kArm64Mla, Mla);
2623 SIMD_DESTRUCTIVE_BINOP_LANE_SIZE_CASE(kArm64Mls, Mls);
2624 case kArm64Sxtl: {
2626 VectorFormat narrow = VectorFormatHalfWidth(wide);
2627 __ Sxtl(i.OutputSimd128Register().Format(wide),
2628 i.InputSimd128Register(0).Format(narrow));
2629 break;
2630 }
2631 case kArm64Sxtl2: {
2634 __ Sxtl2(i.OutputSimd128Register().Format(wide),
2635 i.InputSimd128Register(0).Format(narrow));
2636 break;
2637 }
2638 case kArm64Uxtl: {
2640 VectorFormat narrow = VectorFormatHalfWidth(wide);
2641 __ Uxtl(i.OutputSimd128Register().Format(wide),
2642 i.InputSimd128Register(0).Format(narrow));
2643 break;
2644 }
2645 case kArm64Uxtl2: {
2648 __ Uxtl2(i.OutputSimd128Register().Format(wide),
2649 i.InputSimd128Register(0).Format(narrow));
2650 break;
2651 }
2652 case kArm64F64x2ConvertLowI32x4S: {
2653 VRegister dst = i.OutputSimd128Register().V2D();
2654 __ Sxtl(dst, i.InputSimd128Register(0).V2S());
2655 __ Scvtf(dst, dst);
2656 break;
2657 }
2658 case kArm64F64x2ConvertLowI32x4U: {
2659 VRegister dst = i.OutputSimd128Register().V2D();
2660 __ Uxtl(dst, i.InputSimd128Register(0).V2S());
2661 __ Ucvtf(dst, dst);
2662 break;
2663 }
2664 case kArm64I32x4TruncSatF64x2SZero: {
2665 VRegister dst = i.OutputSimd128Register();
2666 __ Fcvtzs(dst.V2D(), i.InputSimd128Register(0).V2D());
2667 __ Sqxtn(dst.V2S(), dst.V2D());
2668 break;
2669 }
2670 case kArm64I32x4TruncSatF64x2UZero: {
2671 VRegister dst = i.OutputSimd128Register();
2672 __ Fcvtzu(dst.V2D(), i.InputSimd128Register(0).V2D());
2673 __ Uqxtn(dst.V2S(), dst.V2D());
2674 break;
2675 }
2676 case kArm64F32x4DemoteF64x2Zero: {
2677 __ Fcvtn(i.OutputSimd128Register().V2S(),
2678 i.InputSimd128Register(0).V2D());
2679 break;
2680 }
2681 case kArm64F64x2PromoteLowF32x4: {
2682 __ Fcvtl(i.OutputSimd128Register().V2D(),
2683 i.InputSimd128Register(0).V2S());
2684 break;
2685 }
2686 SIMD_UNOP_CASE(kArm64F16x8SConvertI16x8, Scvtf, 8H);
2687 SIMD_UNOP_CASE(kArm64F16x8UConvertI16x8, Ucvtf, 8H);
2688 SIMD_UNOP_CASE(kArm64I16x8UConvertF16x8, Fcvtzu, 8H);
2689 SIMD_UNOP_CASE(kArm64I16x8SConvertF16x8, Fcvtzs, 8H);
2690 case kArm64F16x8DemoteF32x4Zero: {
2691 __ Fcvtn(i.OutputSimd128Register().V4H(),
2692 i.InputSimd128Register(0).V4S());
2693 break;
2694 }
2695 case kArm64F16x8DemoteF64x2Zero: {
2696 // There is no vector f64 -> f16 conversion instruction,
2697 // so convert them by component using scalar version.
2698 // Convert high double to a temp reg first, because dst and src
2699 // can overlap.
2700 __ Mov(fp_scratch.D(), i.InputSimd128Register(0).V2D(), 1);
2701 __ Fcvt(fp_scratch.H(), fp_scratch.D());
2702
2703 __ Fcvt(i.OutputSimd128Register().H(), i.InputSimd128Register(0).D());
2704 __ Mov(i.OutputSimd128Register().V8H(), 1, fp_scratch.V8H(), 0);
2705 break;
2706 }
2707 case kArm64F32x4PromoteLowF16x8: {
2708 __ Fcvtl(i.OutputSimd128Register().V4S(),
2709 i.InputSimd128Register(0).V4H());
2710 break;
2711 }
2712 case kArm64FExtractLane: {
2713 VectorFormat dst_f =
2715 VectorFormat src_f = VectorFormatFillQ(dst_f);
2716 __ Mov(i.OutputSimd128Register().Format(dst_f),
2717 i.InputSimd128Register(0).Format(src_f), i.InputInt8(1));
2718 if (dst_f == kFormatH) {
2719 __ Fcvt(i.OutputSimd128Register().S(), i.OutputSimd128Register().H());
2720 }
2721 break;
2722 }
2723 case kArm64FReplaceLane: {
2725 VRegister dst = i.OutputSimd128Register().Format(f),
2726 src1 = i.InputSimd128Register(0).Format(f);
2727 if (dst != src1) {
2728 __ Mov(dst, src1);
2729 }
2730 if (f == kFormat8H) {
2731 UseScratchRegisterScope scope(masm());
2732 VRegister tmp = scope.AcquireV(kFormat8H);
2733 __ Fcvt(tmp.H(), i.InputSimd128Register(2).S());
2734 __ Mov(dst, i.InputInt8(1), tmp.Format(f), 0);
2735 } else {
2736 __ Mov(dst, i.InputInt8(1), i.InputSimd128Register(2).Format(f), 0);
2737 }
2738 break;
2739 }
2740 SIMD_FCM_L_CASE(kArm64FEq, eq, eq);
2741 case kArm64FNe: {
2743 VRegister dst = i.OutputSimd128Register().Format(f);
2744 if (instr->InputCount() == 1) {
2745 __ Fcmeq(dst, i.InputSimd128Register(0).Format(f), +0.0);
2746 } else {
2747 __ Fcmeq(dst, i.InputSimd128Register(0).Format(f),
2748 i.InputSimd128Register(1).Format(f));
2749 }
2750 __ Mvn(dst, dst);
2751 break;
2752 }
2753 SIMD_FCM_L_CASE(kArm64FLt, lt, gt);
2754 SIMD_FCM_L_CASE(kArm64FLe, le, ge);
2755 SIMD_FCM_G_CASE(kArm64FGt, gt);
2756 SIMD_FCM_G_CASE(kArm64FGe, ge);
2757 SIMD_DESTRUCTIVE_RELAXED_FUSED_CASE(kArm64F64x2Qfma, Fmla, 2D);
2758 SIMD_DESTRUCTIVE_RELAXED_FUSED_CASE(kArm64F64x2Qfms, Fmls, 2D);
2759 case kArm64F64x2Pmin: {
2760 VRegister dst = i.OutputSimd128Register().V2D();
2761 VRegister lhs = i.InputSimd128Register(0).V2D();
2762 VRegister rhs = i.InputSimd128Register(1).V2D();
2763 // f64x2.pmin(lhs, rhs)
2764 // = v128.bitselect(rhs, lhs, f64x2.lt(rhs,lhs))
2765 // = v128.bitselect(rhs, lhs, f64x2.gt(lhs,rhs))
2766 __ Fcmgt(dst, lhs, rhs);
2767 __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
2768 break;
2769 }
2770 case kArm64F64x2Pmax: {
2771 VRegister dst = i.OutputSimd128Register().V2D();
2772 VRegister lhs = i.InputSimd128Register(0).V2D();
2773 VRegister rhs = i.InputSimd128Register(1).V2D();
2774 // f64x2.pmax(lhs, rhs)
2775 // = v128.bitselect(rhs, lhs, f64x2.gt(rhs, lhs))
2776 __ Fcmgt(dst, rhs, lhs);
2777 __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
2778 break;
2779 }
2780 SIMD_UNOP_CASE(kArm64F32x4SConvertI32x4, Scvtf, 4S);
2781 SIMD_UNOP_CASE(kArm64F32x4UConvertI32x4, Ucvtf, 4S);
2782 case kArm64FMulElement: {
2783 VectorFormat s_f =
2786 __ Fmul(i.OutputSimd128Register().Format(v_f),
2787 i.InputSimd128Register(0).Format(v_f),
2788 i.InputSimd128Register(1).Format(s_f), i.InputInt8(2));
2789 break;
2790 }
2791 SIMD_DESTRUCTIVE_RELAXED_FUSED_CASE(kArm64F32x4Qfma, Fmla, 4S);
2792 SIMD_DESTRUCTIVE_RELAXED_FUSED_CASE(kArm64F32x4Qfms, Fmls, 4S);
2793 case kArm64F32x4Pmin: {
2794 VRegister dst = i.OutputSimd128Register().V4S();
2795 VRegister lhs = i.InputSimd128Register(0).V4S();
2796 VRegister rhs = i.InputSimd128Register(1).V4S();
2797 // f32x4.pmin(lhs, rhs)
2798 // = v128.bitselect(rhs, lhs, f32x4.lt(rhs, lhs))
2799 // = v128.bitselect(rhs, lhs, f32x4.gt(lhs, rhs))
2800 __ Fcmgt(dst, lhs, rhs);
2801 __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
2802 break;
2803 }
2804 case kArm64F32x4Pmax: {
2805 VRegister dst = i.OutputSimd128Register().V4S();
2806 VRegister lhs = i.InputSimd128Register(0).V4S();
2807 VRegister rhs = i.InputSimd128Register(1).V4S();
2808 // f32x4.pmax(lhs, rhs)
2809 // = v128.bitselect(rhs, lhs, f32x4.gt(rhs, lhs))
2810 __ Fcmgt(dst, rhs, lhs);
2811 __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
2812 break;
2813 }
2814 case kArm64F16x8Pmin: {
2815 VRegister dst = i.OutputSimd128Register().V8H();
2816 VRegister lhs = i.InputSimd128Register(0).V8H();
2817 VRegister rhs = i.InputSimd128Register(1).V8H();
2818 // f16x8.pmin(lhs, rhs)
2819 // = v128.bitselect(rhs, lhs, f16x8.lt(rhs, lhs))
2820 // = v128.bitselect(rhs, lhs, f16x8.gt(lhs, rhs))
2821 __ Fcmgt(dst, lhs, rhs);
2822 __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
2823 break;
2824 }
2825 case kArm64F16x8Pmax: {
2826 VRegister dst = i.OutputSimd128Register().V8H();
2827 VRegister lhs = i.InputSimd128Register(0).V8H();
2828 VRegister rhs = i.InputSimd128Register(1).V8H();
2829 // f16x8.pmax(lhs, rhs)
2830 // = v128.bitselect(rhs, lhs, f16x8.gt(rhs, lhs))
2831 __ Fcmgt(dst, rhs, lhs);
2832 __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
2833 break;
2834 }
2835 SIMD_DESTRUCTIVE_RELAXED_FUSED_CASE(kArm64F16x8Qfma, Fmla, 8H);
2836 SIMD_DESTRUCTIVE_RELAXED_FUSED_CASE(kArm64F16x8Qfms, Fmls, 8H);
2837 case kArm64IExtractLane: {
2839 Register dst =
2840 f == kFormat2D ? i.OutputRegister64() : i.OutputRegister32();
2841 __ Mov(dst, i.InputSimd128Register(0).Format(f), i.InputInt8(1));
2842 break;
2843 }
2844 case kArm64IReplaceLane: {
2846 VRegister dst = i.OutputSimd128Register().Format(f),
2847 src1 = i.InputSimd128Register(0).Format(f);
2848 Register src2 =
2849 f == kFormat2D ? i.InputRegister64(2) : i.InputRegister32(2);
2850 if (dst != src1) {
2851 __ Mov(dst, src1);
2852 }
2853 __ Mov(dst, i.InputInt8(1), src2);
2854 break;
2855 }
2856 case kArm64I64x2Shl: {
2857 ASSEMBLE_SIMD_SHIFT_LEFT(Shl, 6, V2D, Sshl, X);
2858 break;
2859 }
2860 case kArm64I64x2ShrS: {
2861 ASSEMBLE_SIMD_SHIFT_RIGHT(Sshr, 6, V2D, Sshl, X);
2862 break;
2863 }
2864 SIMD_BINOP_LANE_SIZE_CASE(kArm64IAdd, Add);
2865 SIMD_BINOP_LANE_SIZE_CASE(kArm64ISub, Sub);
2866 case kArm64I64x2Mul: {
2867 UseScratchRegisterScope scope(masm());
2868 VRegister dst = i.OutputSimd128Register();
2869 VRegister src1 = i.InputSimd128Register(0);
2870 VRegister src2 = i.InputSimd128Register(1);
2871 VRegister tmp1 = scope.AcquireSameSizeAs(dst);
2872 VRegister tmp2 = scope.AcquireSameSizeAs(dst);
2873 VRegister tmp3 = i.ToSimd128Register(instr->TempAt(0));
2874
2875 // This 2x64-bit multiplication is performed with several 32-bit
2876 // multiplications.
2877
2878 // 64-bit numbers x and y, can be represented as:
2879 // x = a + 2^32(b)
2880 // y = c + 2^32(d)
2881
2882 // A 64-bit multiplication is:
2883 // x * y = ac + 2^32(ad + bc) + 2^64(bd)
2884 // note: `2^64(bd)` can be ignored, the value is too large to fit in
2885 // 64-bits.
2886
2887 // This sequence implements a 2x64bit multiply, where the registers
2888 // `src1` and `src2` are split up into 32-bit components:
2889 // src1 = |d|c|b|a|
2890 // src2 = |h|g|f|e|
2891 //
2892 // src1 * src2 = |cg + 2^32(ch + dg)|ae + 2^32(af + be)|
2893
2894 // Reverse the 32-bit elements in the 64-bit words.
2895 // tmp2 = |g|h|e|f|
2896 __ Rev64(tmp2.V4S(), src2.V4S());
2897
2898 // Calculate the high half components.
2899 // tmp2 = |dg|ch|be|af|
2900 __ Mul(tmp2.V4S(), tmp2.V4S(), src1.V4S());
2901
2902 // Extract the low half components of src1.
2903 // tmp1 = |c|a|
2904 __ Xtn(tmp1.V2S(), src1.V2D());
2905
2906 // Sum the respective high half components.
2907 // tmp2 = |dg+ch|be+af||dg+ch|be+af|
2908 __ Addp(tmp2.V4S(), tmp2.V4S(), tmp2.V4S());
2909
2910 // Extract the low half components of src2.
2911 // tmp3 = |g|e|
2912 __ Xtn(tmp3.V2S(), src2.V2D());
2913
2914 // Shift the high half components, into the high half.
2915 // dst = |dg+ch << 32|be+af << 32|
2916 __ Shll(dst.V2D(), tmp2.V2S(), 32);
2917
2918 // Multiply the low components together, and accumulate with the high
2919 // half.
2920 // dst = |dst[1] + cg|dst[0] + ae|
2921 __ Umlal(dst.V2D(), tmp3.V2S(), tmp1.V2S());
2922
2923 break;
2924 }
2925 SIMD_CM_G_CASE(kArm64IEq, eq);
2926 case kArm64INe: {
2928 VRegister dst = i.OutputSimd128Register().Format(f);
2929 if (instr->InputCount() == 1) {
2930 __ Cmeq(dst, i.InputSimd128Register(0).Format(f), 0);
2931 } else {
2932 __ Cmeq(dst, i.InputSimd128Register(0).Format(f),
2933 i.InputSimd128Register(1).Format(f));
2934 }
2935 __ Mvn(dst, dst);
2936 break;
2937 }
2938 SIMD_CM_L_CASE(kArm64ILtS, lt);
2939 SIMD_CM_L_CASE(kArm64ILeS, le);
2940 SIMD_CM_G_CASE(kArm64IGtS, gt);
2941 SIMD_CM_G_CASE(kArm64IGeS, ge);
2942 case kArm64I64x2ShrU: {
2943 ASSEMBLE_SIMD_SHIFT_RIGHT(Ushr, 6, V2D, Ushl, X);
2944 break;
2945 }
2946 case kArm64I64x2BitMask: {
2947 __ I64x2BitMask(i.OutputRegister32(), i.InputSimd128Register(0));
2948 break;
2949 }
2950 SIMD_UNOP_CASE(kArm64I32x4SConvertF32x4, Fcvtzs, 4S);
2951 case kArm64I32x4Shl: {
2952 ASSEMBLE_SIMD_SHIFT_LEFT(Shl, 5, V4S, Sshl, W);
2953 break;
2954 }
2955 case kArm64I32x4ShrS: {
2956 ASSEMBLE_SIMD_SHIFT_RIGHT(Sshr, 5, V4S, Sshl, W);
2957 break;
2958 }
2959 SIMD_BINOP_CASE(kArm64I32x4Mul, Mul, 4S);
2960 SIMD_UNOP_CASE(kArm64I32x4UConvertF32x4, Fcvtzu, 4S);
2961 case kArm64I32x4ShrU: {
2962 ASSEMBLE_SIMD_SHIFT_RIGHT(Ushr, 5, V4S, Ushl, W);
2963 break;
2964 }
2965 SIMD_BINOP_LANE_SIZE_CASE(kArm64IGtU, Cmhi);
2966 SIMD_BINOP_LANE_SIZE_CASE(kArm64IGeU, Cmhs);
2967 case kArm64I32x4BitMask: {
2968 __ I32x4BitMask(i.OutputRegister32(), i.InputSimd128Register(0));
2969 break;
2970 }
2971 case kArm64I8x16Addv: {
2972 __ Addv(i.OutputSimd128Register().B(), i.InputSimd128Register(0).V16B());
2973 break;
2974 }
2975 case kArm64I16x8Addv: {
2976 __ Addv(i.OutputSimd128Register().H(), i.InputSimd128Register(0).V8H());
2977 break;
2978 }
2979 case kArm64I32x4Addv: {
2980 __ Addv(i.OutputSimd128Register().S(), i.InputSimd128Register(0).V4S());
2981 break;
2982 }
2983 case kArm64I64x2AddPair: {
2984 __ Addp(i.OutputSimd128Register().D(), i.InputSimd128Register(0).V2D());
2985 break;
2986 }
2987 case kArm64F32x4AddReducePairwise: {
2988 UseScratchRegisterScope scope(masm());
2989 VRegister tmp = scope.AcquireV(kFormat4S);
2990 __ Faddp(tmp.V4S(), i.InputSimd128Register(0).V4S(),
2991 i.InputSimd128Register(0).V4S());
2992 __ Faddp(i.OutputSimd128Register().S(), tmp.V2S());
2993 break;
2994 }
2995 case kArm64F64x2AddPair: {
2996 __ Faddp(i.OutputSimd128Register().D(), i.InputSimd128Register(0).V2D());
2997 break;
2998 }
2999 case kArm64I32x4DotI16x8S: {
3000 UseScratchRegisterScope scope(masm());
3001 VRegister lhs = i.InputSimd128Register(0);
3002 VRegister rhs = i.InputSimd128Register(1);
3003 VRegister tmp1 = scope.AcquireV(kFormat4S);
3004 VRegister tmp2 = scope.AcquireV(kFormat4S);
3005 __ Smull(tmp1, lhs.V4H(), rhs.V4H());
3006 __ Smull2(tmp2, lhs.V8H(), rhs.V8H());
3007 __ Addp(i.OutputSimd128Register().V4S(), tmp1, tmp2);
3008 break;
3009 }
3010 case kArm64I16x8DotI8x16S: {
3011 UseScratchRegisterScope scope(masm());
3012 VRegister lhs = i.InputSimd128Register(0);
3013 VRegister rhs = i.InputSimd128Register(1);
3014 VRegister tmp1 = scope.AcquireV(kFormat8H);
3015 VRegister tmp2 = scope.AcquireV(kFormat8H);
3016 __ Smull(tmp1, lhs.V8B(), rhs.V8B());
3017 __ Smull2(tmp2, lhs.V16B(), rhs.V16B());
3018 __ Addp(i.OutputSimd128Register().V8H(), tmp1, tmp2);
3019 break;
3020 }
3021 case kArm64I32x4DotI8x16AddS: {
3022 if (CpuFeatures::IsSupported(DOTPROD)) {
3023 CpuFeatureScope scope(masm(), DOTPROD);
3024
3025 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(2));
3026 __ Sdot(i.InputSimd128Register(2).V4S(),
3027 i.InputSimd128Register(0).V16B(),
3028 i.InputSimd128Register(1).V16B());
3029
3030 } else {
3031 UseScratchRegisterScope scope(masm());
3032 VRegister lhs = i.InputSimd128Register(0);
3033 VRegister rhs = i.InputSimd128Register(1);
3034 VRegister tmp1 = scope.AcquireV(kFormat8H);
3035 VRegister tmp2 = scope.AcquireV(kFormat8H);
3036 __ Smull(tmp1, lhs.V8B(), rhs.V8B());
3037 __ Smull2(tmp2, lhs.V16B(), rhs.V16B());
3038 __ Addp(tmp1, tmp1, tmp2);
3039 __ Saddlp(tmp1.V4S(), tmp1);
3040 __ Add(i.OutputSimd128Register().V4S(), tmp1.V4S(),
3041 i.InputSimd128Register(2).V4S());
3042 }
3043 break;
3044 }
3045 case kArm64IExtractLaneU: {
3047 __ Umov(i.OutputRegister32(), i.InputSimd128Register(0).Format(f),
3048 i.InputInt8(1));
3049 break;
3050 }
3051 case kArm64IExtractLaneS: {
3053 __ Smov(i.OutputRegister32(), i.InputSimd128Register(0).Format(f),
3054 i.InputInt8(1));
3055 break;
3056 }
3057 case kArm64I16x8Shl: {
3058 ASSEMBLE_SIMD_SHIFT_LEFT(Shl, 4, V8H, Sshl, W);
3059 break;
3060 }
3061 case kArm64I16x8ShrS: {
3062 ASSEMBLE_SIMD_SHIFT_RIGHT(Sshr, 4, V8H, Sshl, W);
3063 break;
3064 }
3065 case kArm64I16x8SConvertI32x4: {
3066 VRegister dst = i.OutputSimd128Register(),
3067 src0 = i.InputSimd128Register(0),
3068 src1 = i.InputSimd128Register(1);
3069 UseScratchRegisterScope scope(masm());
3070 VRegister temp = scope.AcquireV(kFormat4S);
3071 if (dst == src1) {
3072 __ Mov(temp, src1.V4S());
3073 src1 = temp;
3074 }
3075 __ Sqxtn(dst.V4H(), src0.V4S());
3076 __ Sqxtn2(dst.V8H(), src1.V4S());
3077 break;
3078 }
3079 SIMD_BINOP_LANE_SIZE_CASE(kArm64IAddSatS, Sqadd);
3080 SIMD_BINOP_LANE_SIZE_CASE(kArm64ISubSatS, Sqsub);
3081 SIMD_BINOP_CASE(kArm64I16x8Mul, Mul, 8H);
3082 case kArm64I16x8ShrU: {
3083 ASSEMBLE_SIMD_SHIFT_RIGHT(Ushr, 4, V8H, Ushl, W);
3084 break;
3085 }
3086 case kArm64I16x8UConvertI32x4: {
3087 VRegister dst = i.OutputSimd128Register(),
3088 src0 = i.InputSimd128Register(0),
3089 src1 = i.InputSimd128Register(1);
3090 UseScratchRegisterScope scope(masm());
3091 VRegister temp = scope.AcquireV(kFormat4S);
3092 if (dst == src1) {
3093 __ Mov(temp, src1.V4S());
3094 src1 = temp;
3095 }
3096 __ Sqxtun(dst.V4H(), src0.V4S());
3097 __ Sqxtun2(dst.V8H(), src1.V4S());
3098 break;
3099 }
3100 SIMD_BINOP_LANE_SIZE_CASE(kArm64IAddSatU, Uqadd);
3101 SIMD_BINOP_LANE_SIZE_CASE(kArm64ISubSatU, Uqsub);
3102 SIMD_BINOP_CASE(kArm64I16x8Q15MulRSatS, Sqrdmulh, 8H);
3103 case kArm64I16x8BitMask: {
3104 __ I16x8BitMask(i.OutputRegister32(), i.InputSimd128Register(0));
3105 break;
3106 }
3107 case kArm64I8x16Shl: {
3108 ASSEMBLE_SIMD_SHIFT_LEFT(Shl, 3, V16B, Sshl, W);
3109 break;
3110 }
3111 case kArm64I8x16ShrS: {
3112 ASSEMBLE_SIMD_SHIFT_RIGHT(Sshr, 3, V16B, Sshl, W);
3113 break;
3114 }
3115 case kArm64I8x16SConvertI16x8: {
3116 VRegister dst = i.OutputSimd128Register(),
3117 src0 = i.InputSimd128Register(0),
3118 src1 = i.InputSimd128Register(1);
3119 UseScratchRegisterScope scope(masm());
3120 VRegister temp = scope.AcquireV(kFormat8H);
3121 if (dst == src1) {
3122 __ Mov(temp, src1.V8H());
3123 src1 = temp;
3124 }
3125 __ Sqxtn(dst.V8B(), src0.V8H());
3126 __ Sqxtn2(dst.V16B(), src1.V8H());
3127 break;
3128 }
3129 case kArm64I8x16ShrU: {
3130 ASSEMBLE_SIMD_SHIFT_RIGHT(Ushr, 3, V16B, Ushl, W);
3131 break;
3132 }
3133 case kArm64I8x16UConvertI16x8: {
3134 VRegister dst = i.OutputSimd128Register(),
3135 src0 = i.InputSimd128Register(0),
3136 src1 = i.InputSimd128Register(1);
3137 UseScratchRegisterScope scope(masm());
3138 VRegister temp = scope.AcquireV(kFormat8H);
3139 if (dst == src1) {
3140 __ Mov(temp, src1.V8H());
3141 src1 = temp;
3142 }
3143 __ Sqxtun(dst.V8B(), src0.V8H());
3144 __ Sqxtun2(dst.V16B(), src1.V8H());
3145 break;
3146 }
3147 case kArm64I8x16BitMask: {
3148 VRegister temp = NoVReg;
3149
3150 if (CpuFeatures::IsSupported(PMULL1Q)) {
3151 temp = i.TempSimd128Register(0);
3152 }
3153
3154 __ I8x16BitMask(i.OutputRegister32(), i.InputSimd128Register(0), temp);
3155 break;
3156 }
3157 case kArm64S128Const: {
3158 uint64_t imm1 = make_uint64(i.InputUint32(1), i.InputUint32(0));
3159 uint64_t imm2 = make_uint64(i.InputUint32(3), i.InputUint32(2));
3160 __ Movi(i.OutputSimd128Register().V16B(), imm2, imm1);
3161 break;
3162 }
3163 SIMD_BINOP_CASE(kArm64S128And, And, 16B);
3164 SIMD_BINOP_CASE(kArm64S128Or, Orr, 16B);
3165 SIMD_BINOP_CASE(kArm64S128Xor, Eor, 16B);
3166 SIMD_UNOP_CASE(kArm64S128Not, Mvn, 16B);
3167 case kArm64S128Dup: {
3168 VRegister dst = i.OutputSimd128Register(),
3169 src = i.InputSimd128Register(0);
3170 int lanes = i.InputInt32(1);
3171 int index = i.InputInt32(2);
3172 switch (lanes) {
3173 case 4:
3174 __ Dup(dst.V4S(), src.V4S(), index);
3175 break;
3176 case 8:
3177 __ Dup(dst.V8H(), src.V8H(), index);
3178 break;
3179 case 16:
3180 __ Dup(dst.V16B(), src.V16B(), index);
3181 break;
3182 default:
3183 UNREACHABLE();
3184 }
3185 break;
3186 }
3187 SIMD_DESTRUCTIVE_BINOP_CASE(kArm64S128Select, Bsl, 16B);
3188 case kArm64S128AndNot:
3189 if (instr->InputAt(1)->IsImmediate()) {
3191 VRegister dst = i.OutputSimd128Register().Format(f);
3192 DCHECK_EQ(dst, i.InputSimd128Register(0).Format(f));
3193 __ Bic(dst, i.InputInt32(1), i.InputInt8(2));
3194 } else {
3195 __ Bic(i.OutputSimd128Register().V16B(),
3196 i.InputSimd128Register(0).V16B(),
3197 i.InputSimd128Register(1).V16B());
3198 }
3199 break;
3200 case kArm64Ssra: {
3201 int8_t laneSize = LaneSizeField::decode(opcode);
3202 VectorFormat f = VectorFormatFillQ(laneSize);
3203 int8_t mask = laneSize - 1;
3204 VRegister dst = i.OutputSimd128Register().Format(f);
3205 DCHECK_EQ(dst, i.InputSimd128Register(0).Format(f));
3206 __ Ssra(dst, i.InputSimd128Register(1).Format(f), i.InputInt8(2) & mask);
3207 break;
3208 }
3209 case kArm64Usra: {
3210 int8_t laneSize = LaneSizeField::decode(opcode);
3211 VectorFormat f = VectorFormatFillQ(laneSize);
3212 int8_t mask = laneSize - 1;
3213 VRegister dst = i.OutputSimd128Register().Format(f);
3214 DCHECK_EQ(dst, i.InputSimd128Register(0).Format(f));
3215 __ Usra(dst, i.InputSimd128Register(1).Format(f), i.InputUint8(2) & mask);
3216 break;
3217 }
3218 case kArm64S8x2Shuffle: {
3220 break;
3221 }
3222 case kArm64S16x1Shuffle: {
3224 break;
3225 }
3226 case kArm64S16x2Shuffle: {
3228 break;
3229 }
3230 case kArm64S32x1Shuffle: {
3232 break;
3233 }
3234 case kArm64S32x2Shuffle: {
3236 break;
3237 }
3238 case kArm64S32x4Shuffle: {
3240 break;
3241 }
3242 case kArm64S64x1Shuffle: {
3244 break;
3245 }
3246 case kArm64S64x2Shuffle: {
3248 break;
3249 }
3250 SIMD_BINOP_CASE(kArm64S64x2UnzipLeft, Uzp1, 2D);
3251 SIMD_BINOP_CASE(kArm64S64x2UnzipRight, Uzp2, 2D);
3252 SIMD_BINOP_CASE(kArm64S32x4ZipLeft, Zip1, 4S);
3253 SIMD_BINOP_CASE(kArm64S32x4ZipRight, Zip2, 4S);
3254 SIMD_BINOP_CASE(kArm64S32x4UnzipLeft, Uzp1, 4S);
3255 SIMD_BINOP_CASE(kArm64S32x4UnzipRight, Uzp2, 4S);
3256 SIMD_BINOP_CASE(kArm64S32x4TransposeLeft, Trn1, 4S);
3257 SIMD_BINOP_CASE(kArm64S32x4TransposeRight, Trn2, 4S);
3258 SIMD_BINOP_CASE(kArm64S16x8ZipLeft, Zip1, 8H);
3259 SIMD_BINOP_CASE(kArm64S16x8ZipRight, Zip2, 8H);
3260 SIMD_BINOP_CASE(kArm64S16x8UnzipLeft, Uzp1, 8H);
3261 SIMD_BINOP_CASE(kArm64S16x8UnzipRight, Uzp2, 8H);
3262 SIMD_BINOP_CASE(kArm64S16x8TransposeLeft, Trn1, 8H);
3263 SIMD_BINOP_CASE(kArm64S16x8TransposeRight, Trn2, 8H);
3264 SIMD_BINOP_CASE(kArm64S8x16ZipLeft, Zip1, 16B);
3265 SIMD_BINOP_CASE(kArm64S8x16ZipRight, Zip2, 16B);
3266 SIMD_BINOP_CASE(kArm64S8x16UnzipLeft, Uzp1, 16B);
3267 SIMD_BINOP_CASE(kArm64S8x16UnzipRight, Uzp2, 16B);
3268 SIMD_BINOP_CASE(kArm64S8x16TransposeLeft, Trn1, 16B);
3269 SIMD_BINOP_CASE(kArm64S8x16TransposeRight, Trn2, 16B);
3270 case kArm64S8x16Concat: {
3271 __ Ext(i.OutputSimd128Register().V16B(), i.InputSimd128Register(0).V16B(),
3272 i.InputSimd128Register(1).V16B(), i.InputInt4(2));
3273 break;
3274 }
3275 case kArm64I8x16Swizzle: {
3276 __ Tbl(i.OutputSimd128Register().V16B(), i.InputSimd128Register(0).V16B(),
3277 i.InputSimd128Register(1).V16B());
3278 break;
3279 }
3280 case kArm64I8x16Shuffle: {
3281 Simd128Register dst = i.OutputSimd128Register().V16B(),
3282 src0 = i.InputSimd128Register(0).V16B(),
3283 src1 = i.InputSimd128Register(1).V16B();
3284 // Unary shuffle table is in src0, binary shuffle table is in src0, src1,
3285 // which must be consecutive.
3286 if (src0 != src1) {
3287 DCHECK(AreConsecutive(src0, src1));
3288 }
3289
3290 int64_t imm1 = make_uint64(i.InputInt32(3), i.InputInt32(2));
3291 int64_t imm2 = make_uint64(i.InputInt32(5), i.InputInt32(4));
3292 DCHECK_EQ(0, (imm1 | imm2) & (src0 == src1 ? 0xF0F0F0F0F0F0F0F0
3293 : 0xE0E0E0E0E0E0E0E0));
3294
3295 UseScratchRegisterScope scope(masm());
3296 VRegister temp = scope.AcquireV(kFormat16B);
3297 __ Movi(temp, imm2, imm1);
3298
3299 if (src0 == src1) {
3300 __ Tbl(dst, src0, temp.V16B());
3301 } else {
3302 __ Tbl(dst, src0, src1, temp.V16B());
3303 }
3304 break;
3305 }
3306 case kArm64S32x4Reverse: {
3307 Simd128Register dst = i.OutputSimd128Register().V16B(),
3308 src = i.InputSimd128Register(0).V16B();
3309 __ Rev64(dst.V4S(), src.V4S());
3310 __ Ext(dst.V16B(), dst.V16B(), dst.V16B(), 8);
3311 break;
3312 }
3313 SIMD_UNOP_CASE(kArm64S32x2Reverse, Rev64, 4S);
3314 SIMD_UNOP_CASE(kArm64S16x4Reverse, Rev64, 8H);
3315 SIMD_UNOP_CASE(kArm64S16x2Reverse, Rev32, 8H);
3316 SIMD_UNOP_CASE(kArm64S8x8Reverse, Rev64, 16B);
3317 SIMD_UNOP_CASE(kArm64S8x4Reverse, Rev32, 16B);
3318 SIMD_UNOP_CASE(kArm64S8x2Reverse, Rev16, 16B);
3319 case kArm64LoadSplat: {
3320 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3322 __ ld1r(i.OutputSimd128Register().Format(f), i.MemoryOperand(0));
3323 break;
3324 }
3325 case kArm64LoadLane: {
3326 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3327 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3329 int laneidx = i.InputInt8(1);
3330 __ ld1(i.OutputSimd128Register().Format(f), laneidx, i.MemoryOperand(2));
3331 break;
3332 }
3333 case kArm64StoreLane: {
3334 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3336 int laneidx = i.InputInt8(1);
3337 __ st1(i.InputSimd128Register(0).Format(f), laneidx, i.MemoryOperand(2));
3338 break;
3339 }
3340 case kArm64S128Load8x8S: {
3341 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3342 __ Ldr(i.OutputSimd128Register().V8B(), i.MemoryOperand(0));
3343 __ Sxtl(i.OutputSimd128Register().V8H(), i.OutputSimd128Register().V8B());
3344 break;
3345 }
3346 case kArm64S128Load8x8U: {
3347 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3348 __ Ldr(i.OutputSimd128Register().V8B(), i.MemoryOperand(0));
3349 __ Uxtl(i.OutputSimd128Register().V8H(), i.OutputSimd128Register().V8B());
3350 break;
3351 }
3352 case kArm64S128Load16x4S: {
3353 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3354 __ Ldr(i.OutputSimd128Register().V4H(), i.MemoryOperand(0));
3355 __ Sxtl(i.OutputSimd128Register().V4S(), i.OutputSimd128Register().V4H());
3356 break;
3357 }
3358 case kArm64S128Load16x4U: {
3359 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3360 __ Ldr(i.OutputSimd128Register().V4H(), i.MemoryOperand(0));
3361 __ Uxtl(i.OutputSimd128Register().V4S(), i.OutputSimd128Register().V4H());
3362 break;
3363 }
3364 case kArm64S128Load32x2S: {
3365 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3366 __ Ldr(i.OutputSimd128Register().V2S(), i.MemoryOperand(0));
3367 __ Sxtl(i.OutputSimd128Register().V2D(), i.OutputSimd128Register().V2S());
3368 break;
3369 }
3370 case kArm64S128Load32x2U: {
3371 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3372 __ Ldr(i.OutputSimd128Register().V2S(), i.MemoryOperand(0));
3373 __ Uxtl(i.OutputSimd128Register().V2D(), i.OutputSimd128Register().V2S());
3374 break;
3375 }
3376 case kArm64S128LoadPairDeinterleave: {
3377 DCHECK_EQ(i.OutputCount(), 2);
3379 __ Ld2(i.OutputSimd128Register(0).Format(f),
3380 i.OutputSimd128Register(1).Format(f), i.MemoryOperand(0));
3381 break;
3382 }
3383 case kArm64I64x2AllTrue: {
3384 __ I64x2AllTrue(i.OutputRegister32(), i.InputSimd128Register(0));
3385 break;
3386 }
3387 case kArm64V128AnyTrue: {
3388 UseScratchRegisterScope scope(masm());
3389 // For AnyTrue, the format does not matter; also, we would like to avoid
3390 // an expensive horizontal reduction.
3391 VRegister temp = scope.AcquireV(kFormat4S);
3392 __ Umaxp(temp, i.InputSimd128Register(0).V4S(),
3393 i.InputSimd128Register(0).V4S());
3394 __ Fmov(i.OutputRegister64(), temp.D());
3395 __ Cmp(i.OutputRegister64(), 0);
3396 __ Cset(i.OutputRegister32(), ne);
3397 break;
3398 }
3399 case kArm64S32x4OneLaneSwizzle: {
3400 Simd128Register dst = i.OutputSimd128Register().V4S(),
3401 src = i.InputSimd128Register(0).V4S();
3402 int from = i.InputInt32(1);
3403 int to = i.InputInt32(2);
3404 if (dst != src) {
3405 __ Mov(dst, src);
3406 }
3407 __ Mov(dst, to, src, from);
3408 break;
3409 }
3410#define SIMD_REDUCE_OP_CASE(Op, Instr, format, FORMAT) \
3411 case Op: { \
3412 UseScratchRegisterScope scope(masm()); \
3413 VRegister temp = scope.AcquireV(format); \
3414 __ Instr(temp, i.InputSimd128Register(0).V##FORMAT()); \
3415 __ Umov(i.OutputRegister32(), temp, 0); \
3416 __ Cmp(i.OutputRegister32(), 0); \
3417 __ Cset(i.OutputRegister32(), ne); \
3418 break; \
3419 }
3420 SIMD_REDUCE_OP_CASE(kArm64I32x4AllTrue, Uminv, kFormatS, 4S);
3421 SIMD_REDUCE_OP_CASE(kArm64I16x8AllTrue, Uminv, kFormatH, 8H);
3422 SIMD_REDUCE_OP_CASE(kArm64I8x16AllTrue, Uminv, kFormatB, 16B);
3423#endif // V8_ENABLE_WEBASSEMBLY
3424 }
3425 return kSuccess;
3426}
3427
3428#undef SIMD_UNOP_CASE
3429#undef SIMD_UNOP_LANE_SIZE_CASE
3430#undef SIMD_BINOP_CASE
3431#undef SIMD_BINOP_LANE_SIZE_CASE
3432#undef SIMD_DESTRUCTIVE_BINOP_CASE
3433#undef SIMD_DESTRUCTIVE_BINOP_LANE_SIZE_CASE
3434#undef SIMD_DESTRUCTIVE_RELAXED_FUSED_CASE
3435#undef SIMD_REDUCE_OP_CASE
3436#undef ASSEMBLE_SIMD_SHIFT_LEFT
3437#undef ASSEMBLE_SIMD_SHIFT_RIGHT
3438
3439// Assemble branches after this instruction.
3440void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
3441 Arm64OperandConverter i(this, instr);
3442 Label* tlabel = branch->true_label;
3443 Label* flabel = branch->false_label;
3444 FlagsCondition condition = branch->condition;
3445 ArchOpcode opcode = instr->arch_opcode();
3446
3447 if (opcode == kArm64CompareAndBranch32) {
3448 switch (condition) {
3449 case kEqual:
3450 __ Cbz(i.InputRegister32(0), tlabel);
3451 break;
3452 case kNotEqual:
3453 __ Cbnz(i.InputRegister32(0), tlabel);
3454 break;
3455 default:
3456 UNREACHABLE();
3457 }
3458 } else if (opcode == kArm64CompareAndBranch) {
3459 switch (condition) {
3460 case kEqual:
3461 __ Cbz(i.InputRegister64(0), tlabel);
3462 break;
3463 case kNotEqual:
3464 __ Cbnz(i.InputRegister64(0), tlabel);
3465 break;
3466 default:
3467 UNREACHABLE();
3468 }
3469 } else if (opcode == kArm64TestAndBranch32) {
3470 switch (condition) {
3471 case kEqual:
3472 __ Tbz(i.InputRegister32(0), i.InputInt5(1), tlabel);
3473 break;
3474 case kNotEqual:
3475 __ Tbnz(i.InputRegister32(0), i.InputInt5(1), tlabel);
3476 break;
3477 default:
3478 UNREACHABLE();
3479 }
3480 } else if (opcode == kArm64TestAndBranch) {
3481 switch (condition) {
3482 case kEqual:
3483 __ Tbz(i.InputRegister64(0), i.InputInt6(1), tlabel);
3484 break;
3485 case kNotEqual:
3486 __ Tbnz(i.InputRegister64(0), i.InputInt6(1), tlabel);
3487 break;
3488 default:
3489 UNREACHABLE();
3490 }
3491 } else {
3493 __ B(cc, tlabel);
3494 }
3495 if (!branch->fallthru) __ B(flabel); // no fallthru to flabel.
3496}
3497
3499 BranchInfo* branch) {
3500 AssembleArchBranch(instr, branch);
3501}
3502
3504 RpoNumber target) {
3505 __ B(GetLabel(target));
3506}
3507
3508#if V8_ENABLE_WEBASSEMBLY
3509void CodeGenerator::AssembleArchTrap(Instruction* instr,
3511 auto ool = zone()->New<WasmOutOfLineTrap>(this, instr);
3512 Label* tlabel = ool->entry();
3514 __ B(cc, tlabel);
3515}
3516#endif // V8_ENABLE_WEBASSEMBLY
3517
3518// Assemble boolean materializations after this instruction.
3521 Arm64OperandConverter i(this, instr);
3522
3523 // Materialize a full 64-bit 1 or 0 value. The result register is always the
3524 // last output of the instruction.
3525 DCHECK_NE(0u, instr->OutputCount());
3526 Register reg = i.OutputRegister(instr->OutputCount() - 1);
3528 __ Cset(reg, cc);
3529}
3530
3531// Mnemonic Meaning (INT) Meaning (FP) Condition flags
3532// EQ Equal Equal Z == 1
3533// NE Not equal Not equal or unordered Z == 0
3534// CS or HS Carry set >= or unordered C == 1
3535// CC or LO Carry clear < C == 0
3536// MI Minus, negative < N == 1
3537// PL Plus, positive or zero >= or unordered N == 0
3538// VS Overflow Unordered V == 1
3539// VC No overflow Ordered V == 0
3540// HI Unsigned > > or unordered C ==1 && Z == 0
3541// LS Unsigned <= < or equal !(C ==1 && Z ==0)
3542// GE Signed >= > or equal N == V
3543// LT Signed < < or unordered N! = V
3544// GT Signed >= > Z == 0 && N == V
3545// LE Signed <= <= or unordered !(Z == 0 && N == V)
3546// AL Always Always Any
3547// NV Always Always Any
3548
3549// Given condition, return a value for nzcv which represents it. This is used
3550// for the default condition for ccmp.
3552 switch (condition) {
3553 default:
3554 UNREACHABLE();
3555 case eq:
3556 return ZFlag; // Z == 1
3557 case ne:
3558 return NoFlag; // Z == 0
3559 case hs:
3560 return CFlag; // C == 1
3561 case lo:
3562 return NoFlag; // C == 0
3563 case mi:
3564 return NFlag; // N == 1
3565 case pl:
3566 return NoFlag; // N == 0
3567 case vs:
3568 return VFlag; // V == 1
3569 case vc:
3570 return NoFlag; // V == 0
3571 case hi:
3572 return CFlag; // C == 1 && Z == 0
3573 case ls:
3574 return NoFlag; // C == 0 || Z == 1
3575 case ge:
3576 return NoFlag; // N == V
3577 case lt:
3578 return NFlag; // N != V
3579 case gt:
3580 return NoFlag; // Z == 0 && N == V
3581 case le:
3582 return ZFlag; // Z == 1 || N != V
3583 }
3584}
3585
3587 size_t ccmp_base_index,
3588 CodeGenerator* gen) {
3590 // The first two, or three operands are the compare that begins the chain.
3591 // These operands are used when the first compare, the one with the
3592 // continuation attached, is generated.
3593 // Then, each five provide:
3594 // - cmp opcode
3595 // - compare lhs
3596 // - compare rhs
3597 // - default flags
3598 // - user condition
3599 for (unsigned n = 0; n < num_ccmps; ++n) {
3600 size_t opcode_index = ccmp_base_index + kCcmpOffsetOfOpcode;
3601 size_t compare_lhs_index = ccmp_base_index + kCcmpOffsetOfLhs;
3602 size_t compare_rhs_index = ccmp_base_index + kCcmpOffsetOfRhs;
3603 size_t default_condition_index =
3604 ccmp_base_index + kCcmpOffsetOfDefaultFlags;
3605 size_t compare_condition_index =
3606 ccmp_base_index + kCcmpOffsetOfCompareCondition;
3607 ccmp_base_index += kNumCcmpOperands;
3608 DCHECK_LT(ccmp_base_index, instr->InputCount() - 1);
3609
3610 InstructionCode code = static_cast<InstructionCode>(
3611 i.ToConstant(instr->InputAt(opcode_index)).ToInt64());
3612
3613 FlagsCondition default_condition = static_cast<FlagsCondition>(
3614 i.ToConstant(instr->InputAt(default_condition_index)).ToInt64());
3615
3616 StatusFlags default_flags =
3618
3619 FlagsCondition compare_condition = static_cast<FlagsCondition>(
3620 i.ToConstant(instr->InputAt(compare_condition_index)).ToInt64());
3621
3622 if (code == kArm64Cmp) {
3623 gen->masm()->Ccmp(i.InputRegister64(compare_lhs_index),
3624 i.InputOperand64(compare_rhs_index), default_flags,
3625 FlagsConditionToCondition(compare_condition));
3626 } else if (code == kArm64Cmp32) {
3627 gen->masm()->Ccmp(i.InputRegister32(compare_lhs_index),
3628 i.InputOperand32(compare_rhs_index), default_flags,
3629 FlagsConditionToCondition(compare_condition));
3630 } else if (code == kArm64Float64Cmp) {
3631 gen->masm()->Fccmp(i.InputFloat64OrFPZeroRegister(compare_lhs_index),
3632 i.InputFloat64OrFPZeroRegister(compare_rhs_index),
3633 default_flags,
3634 FlagsConditionToCondition(compare_condition));
3635 } else {
3636 DCHECK_EQ(code, kArm64Float32Cmp);
3637 gen->masm()->Fccmp(i.InputFloat32OrFPZeroRegister(compare_lhs_index),
3638 i.InputFloat32OrFPZeroRegister(compare_rhs_index),
3639 default_flags,
3640 FlagsConditionToCondition(compare_condition));
3641 }
3642 }
3643}
3644
3645// Assemble a conditional compare and boolean materializations after this
3646// instruction.
3648 // Materialize a full 64-bit 1 or 0 value. The result register is always the
3649 // last output of the instruction.
3650 DCHECK_NE(0u, instr->OutputCount());
3651 Arm64OperandConverter i(this, instr);
3652 Register reg = i.OutputRegister(instr->OutputCount() - 1);
3653 DCHECK_GE(instr->InputCount(), 6);
3654
3655 // Input ordering:
3656 // > InputCount - 1: number of ccmps.
3657 // > InputCount - 2: branch condition.
3658 size_t num_ccmps_index =
3660 size_t set_condition_index =
3662 int64_t num_ccmps = i.ToConstant(instr->InputAt(num_ccmps_index)).ToInt64();
3663 size_t ccmp_base_index = set_condition_index - kNumCcmpOperands * num_ccmps;
3664 AssembleConditionalCompareChain(instr, num_ccmps, ccmp_base_index, this);
3665
3666 FlagsCondition set_condition = static_cast<FlagsCondition>(
3667 i.ToConstant(instr->InputAt(set_condition_index)).ToInt64());
3668 __ Cset(reg, FlagsConditionToCondition(set_condition));
3669}
3670
3672 BranchInfo* branch) {
3673 DCHECK_GE(instr->InputCount(), 6);
3674 Arm64OperandConverter i(this, instr);
3675 // Input ordering:
3676 // > InputCount - 1: false block.
3677 // > InputCount - 2: true block.
3678 // > InputCount - 3: number of ccmps.
3679 // > InputCount - 4: branch condition.
3680 size_t num_ccmps_index =
3682 int64_t num_ccmps = i.ToConstant(instr->InputAt(num_ccmps_index)).ToInt64();
3683 size_t ccmp_base_index = instr->InputCount() -
3685 kNumCcmpOperands * num_ccmps;
3686 AssembleConditionalCompareChain(instr, num_ccmps, ccmp_base_index, this);
3687 Condition cc = FlagsConditionToCondition(branch->condition);
3688 __ B(cc, branch->true_label);
3689 if (!branch->fallthru) __ B(branch->false_label);
3690}
3691
3694 Arm64OperandConverter i(this, instr);
3695 // The result register is always the last output of the instruction.
3696 size_t output_index = instr->OutputCount() - 1;
3698 LocationOperand::cast(instr->OutputAt(output_index))->representation();
3700 // We don't now how many inputs were consumed by the condition, so we have to
3701 // calculate the indices of the last two inputs.
3702 DCHECK_GE(instr->InputCount(), 2);
3703 size_t true_value_index = instr->InputCount() - 2;
3704 size_t false_value_index = instr->InputCount() - 1;
3706 __ Fcsel(i.OutputFloat32Register(output_index),
3707 i.InputFloat32OrFPZeroRegister(true_value_index),
3708 i.InputFloat32OrFPZeroRegister(false_value_index), cc);
3709 } else if (rep == MachineRepresentation::kFloat64) {
3710 __ Fcsel(i.OutputFloat64Register(output_index),
3711 i.InputFloat64OrFPZeroRegister(true_value_index),
3712 i.InputFloat64OrFPZeroRegister(false_value_index), cc);
3713 } else if (rep == MachineRepresentation::kWord32) {
3714 __ Csel(i.OutputRegister32(output_index),
3715 i.InputOrZeroRegister32(true_value_index),
3716 i.InputOrZeroRegister32(false_value_index), cc);
3717 } else {
3719 __ Csel(i.OutputRegister64(output_index),
3720 i.InputOrZeroRegister64(true_value_index),
3721 i.InputOrZeroRegister64(false_value_index), cc);
3722 }
3723}
3724
3726 Arm64OperandConverter i(this, instr);
3727 Register input = i.InputRegister32(0);
3728 std::vector<std::pair<int32_t, Label*>> cases;
3729 for (size_t index = 2; index < instr->InputCount(); index += 2) {
3730 cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
3731 }
3732 AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
3733 cases.data() + cases.size());
3734}
3735
3737 Arm64OperandConverter i(this, instr);
3738 UseScratchRegisterScope scope(masm());
3739 Register input = i.InputRegister64(0);
3740 size_t const case_count = instr->InputCount() - 2;
3741
3742 base::Vector<Label*> cases = zone()->AllocateVector<Label*>(case_count);
3743 for (size_t index = 0; index < case_count; ++index) {
3744 cases[index] = GetLabel(i.InputRpo(index + 2));
3745 }
3746 Label* fallthrough = GetLabel(i.InputRpo(1));
3747 __ Cmp(input, Immediate(case_count));
3748 __ B(fallthrough, hs);
3749
3750 Label* const jump_table = AddJumpTable(cases);
3751 Register addr = scope.AcquireX();
3752 __ Adr(addr, jump_table, MacroAssembler::kAdrFar);
3753 Register offset = scope.AcquireX();
3754 // Load the 32-bit offset.
3755 __ Ldrsw(offset, MemOperand(addr, input, LSL, 2));
3756 // The offset is relative to the address of 'jump_table', so add 'offset'
3757 // to 'addr' to reconstruct the absolute address.
3758 __ Add(addr, addr, offset);
3759 __ Br(addr);
3760}
3761
3762void CodeGenerator::AssembleJumpTable(base::Vector<Label*> targets) {
3763 const size_t jump_table_size = targets.size() * kInt32Size;
3764 MacroAssembler::BlockPoolsScope no_pool_inbetween(masm(), jump_table_size);
3765 int table_pos = __ pc_offset();
3766 // Store 32-bit pc-relative offsets.
3767 for (auto* target : targets) {
3768 __ dc32(target->pos() - table_pos);
3769 }
3770}
3771
3772void CodeGenerator::FinishFrame(Frame* frame) {
3773 auto call_descriptor = linkage()->GetIncomingDescriptor();
3774
3775 // Save FP registers.
3776 CPURegList saves_fp =
3777 CPURegList(kDRegSizeInBits, call_descriptor->CalleeSavedFPRegisters());
3778 int saved_count = saves_fp.Count();
3779 if (saved_count != 0) {
3780 DCHECK(saves_fp.bits() == CPURegList::GetCalleeSavedV().bits());
3781 frame->AllocateSavedCalleeRegisterSlots(saved_count *
3783 }
3784
3785 CPURegList saves =
3786 CPURegList(kXRegSizeInBits, call_descriptor->CalleeSavedRegisters());
3787 saved_count = saves.Count();
3788 if (saved_count != 0) {
3789 frame->AllocateSavedCalleeRegisterSlots(saved_count);
3790 }
3791 frame->AlignFrame(16);
3792}
3793
3795 auto call_descriptor = linkage()->GetIncomingDescriptor();
3796 __ AssertSpAligned();
3797
3798 // The frame has been previously padded in CodeGenerator::FinishFrame().
3799 DCHECK_EQ(frame()->GetTotalFrameSlotCount() % 2, 0);
3800 int required_slots =
3801 frame()->GetTotalFrameSlotCount() - frame()->GetFixedSlotCount();
3802
3803 CPURegList saves =
3804 CPURegList(kXRegSizeInBits, call_descriptor->CalleeSavedRegisters());
3805 DCHECK_EQ(saves.Count() % 2, 0);
3806 CPURegList saves_fp =
3807 CPURegList(kDRegSizeInBits, call_descriptor->CalleeSavedFPRegisters());
3808 DCHECK_EQ(saves_fp.Count() % 2, 0);
3809 // The number of return slots should be even after aligning the Frame.
3810 const int returns = frame()->GetReturnSlotCount();
3811 DCHECK_EQ(returns % 2, 0);
3812
3813 if (frame_access_state()->has_frame()) {
3814 // Link the frame
3815 if (call_descriptor->IsJSFunctionCall()) {
3816 static_assert(StandardFrameConstants::kFixedFrameSize % 16 == 8);
3817 DCHECK_EQ(required_slots % 2, 1);
3818 __ Prologue();
3819 // Update required_slots count since we have just claimed one extra slot.
3822#if V8_ENABLE_WEBASSEMBLY
3823 } else if (call_descriptor->IsAnyWasmFunctionCall() ||
3824 call_descriptor->IsWasmCapiFunction() ||
3825 call_descriptor->IsWasmImportWrapper() ||
3826 (call_descriptor->IsCFunctionCall() &&
3827 info()->GetOutputStackFrameType() ==
3828 StackFrame::C_WASM_ENTRY)) {
3829 UseScratchRegisterScope temps(masm());
3830 Register scratch = temps.AcquireX();
3831 __ Mov(scratch,
3832 StackFrame::TypeToMarker(info()->GetOutputStackFrameType()));
3833 __ Push<MacroAssembler::kSignLR>(lr, fp, scratch,
3835 static constexpr int kSPToFPDelta = 2 * kSystemPointerSize;
3836 __ Add(fp, sp, kSPToFPDelta);
3837 if (call_descriptor->IsWasmCapiFunction()) {
3838 // The C-API function has one extra slot for the PC.
3839 required_slots++;
3840 }
3841#endif // V8_ENABLE_WEBASSEMBLY
3842 } else if (call_descriptor->kind() == CallDescriptor::kCallCodeObject) {
3843 UseScratchRegisterScope temps(masm());
3844 Register scratch = temps.AcquireX();
3845 __ Mov(scratch,
3846 StackFrame::TypeToMarker(info()->GetOutputStackFrameType()));
3847 __ Push<MacroAssembler::kSignLR>(lr, fp, scratch, padreg);
3848 static constexpr int kSPToFPDelta = 2 * kSystemPointerSize;
3849 __ Add(fp, sp, kSPToFPDelta);
3850 // One of the extra slots has just been claimed when pushing the padreg.
3851 // We also know that we have at least one slot to claim here, as the typed
3852 // frame has an odd number of fixed slots, and all other parts of the
3853 // total frame slots are even, leaving {required_slots} to be odd.
3854 DCHECK_GE(required_slots, 1);
3855 required_slots--;
3856 } else {
3858 __ Mov(fp, sp);
3859 }
3861
3862 // Create OSR entry if applicable
3863 if (info()->is_osr()) {
3864 // TurboFan OSR-compiled functions cannot be entered directly.
3865 __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
3866
3867 // Unoptimized code jumps directly to this entrypoint while the
3868 // unoptimized frame is still on the stack. Optimized code uses OSR values
3869 // directly from the unoptimized frame. Thus, all that needs to be done is
3870 // to allocate the remaining stack slots.
3871 __ RecordComment("-- OSR entrypoint --");
3873 __ CodeEntry();
3874 size_t unoptimized_frame_slots = osr_helper()->UnoptimizedFrameSlots();
3875
3876#ifdef V8_ENABLE_SANDBOX_BOOL
3877 UseScratchRegisterScope temps(masm());
3878 uint32_t expected_frame_size =
3879 static_cast<uint32_t>(osr_helper()->UnoptimizedFrameSlots()) *
3882 Register scratch = temps.AcquireX();
3883 __ Add(scratch, sp, expected_frame_size);
3884 __ Cmp(scratch, fp);
3885 __ SbxCheck(eq, AbortReason::kOsrUnexpectedStackSize);
3886#endif // V8_ENABLE_SANDBOX_BOOL
3887
3888 DCHECK(call_descriptor->IsJSFunctionCall());
3889 DCHECK_EQ(unoptimized_frame_slots % 2, 1);
3890 // One unoptimized frame slot has already been claimed when the actual
3891 // arguments count was pushed.
3892 required_slots -=
3893 unoptimized_frame_slots - MacroAssembler::kExtraSlotClaimedByPrologue;
3894 }
3895
3896#if V8_ENABLE_WEBASSEMBLY
3897 if (info()->IsWasm() && required_slots * kSystemPointerSize > 4 * KB) {
3898 // For WebAssembly functions with big frames we have to do the stack
3899 // overflow check before we construct the frame. Otherwise we may not
3900 // have enough space on the stack to call the runtime for the stack
3901 // overflow.
3902 Label done;
3903 // If the frame is bigger than the stack, we throw the stack overflow
3904 // exception unconditionally. Thereby we can avoid the integer overflow
3905 // check in the condition code.
3906 if (required_slots * kSystemPointerSize < v8_flags.stack_size * KB) {
3907 UseScratchRegisterScope temps(masm());
3908 Register stack_limit = temps.AcquireX();
3909 __ LoadStackLimit(stack_limit, StackLimitKind::kRealStackLimit);
3910 __ Add(stack_limit, stack_limit, required_slots * kSystemPointerSize);
3911 __ Cmp(sp, stack_limit);
3912 __ B(hs, &done);
3913 }
3914
3915 if (v8_flags.experimental_wasm_growable_stacks) {
3916 CPURegList regs_to_save(kXRegSizeInBits, RegList{});
3918 regs_to_save.Combine(
3919 WasmHandleStackOverflowDescriptor::FrameBaseRegister());
3920 for (auto reg : wasm::kGpParamRegisters) regs_to_save.Combine(reg);
3921 __ PushCPURegList(regs_to_save);
3922 CPURegList fp_regs_to_save(kDRegSizeInBits, DoubleRegList{});
3923 for (auto reg : wasm::kFpParamRegisters) fp_regs_to_save.Combine(reg);
3924 __ PushCPURegList(fp_regs_to_save);
3926 required_slots * kSystemPointerSize);
3927 __ Add(
3928 WasmHandleStackOverflowDescriptor::FrameBaseRegister(), fp,
3929 Operand(call_descriptor->ParameterSlotCount() * kSystemPointerSize +
3931 __ CallBuiltin(Builtin::kWasmHandleStackOverflow);
3932 __ PopCPURegList(fp_regs_to_save);
3933 __ PopCPURegList(regs_to_save);
3934 } else {
3935 __ Call(static_cast<intptr_t>(Builtin::kWasmStackOverflow),
3937 // The call does not return, hence we can ignore any references and just
3938 // define an empty safepoint.
3939 ReferenceMap* reference_map = zone()->New<ReferenceMap>(zone());
3940 RecordSafepoint(reference_map);
3941 if (v8_flags.debug_code) __ Brk(0);
3942 }
3943 __ Bind(&done);
3944 }
3945#endif // V8_ENABLE_WEBASSEMBLY
3946
3947 // Skip callee-saved slots, which are pushed below.
3948 required_slots -= saves.Count();
3949 required_slots -= saves_fp.Count();
3950 required_slots -= returns;
3951
3952 __ Claim(required_slots);
3953 }
3954
3955 // Save FP registers.
3956 DCHECK_IMPLIES(saves_fp.Count() != 0,
3957 saves_fp.bits() == CPURegList::GetCalleeSavedV().bits());
3958 __ PushCPURegList(saves_fp);
3959
3960 // Save registers.
3961 __ PushCPURegList(saves);
3962
3963 if (returns != 0) {
3964 __ Claim(returns);
3965 }
3966
3967 for (int spill_slot : frame()->tagged_slots()) {
3968 FrameOffset offset = frame_access_state()->GetFrameOffset(spill_slot);
3969 DCHECK(offset.from_frame_pointer());
3970 __ Str(xzr, MemOperand(fp, offset.offset()));
3971 }
3972}
3973
3974void CodeGenerator::AssembleReturn(InstructionOperand* additional_pop_count) {
3975 auto call_descriptor = linkage()->GetIncomingDescriptor();
3976
3977 const int returns = RoundUp(frame()->GetReturnSlotCount(), 2);
3978 if (returns != 0) {
3979 __ Drop(returns);
3980 }
3981
3982 // Restore registers.
3983 CPURegList saves =
3984 CPURegList(kXRegSizeInBits, call_descriptor->CalleeSavedRegisters());
3985 __ PopCPURegList(saves);
3986
3987 // Restore fp registers.
3988 CPURegList saves_fp =
3989 CPURegList(kDRegSizeInBits, call_descriptor->CalleeSavedFPRegisters());
3990 __ PopCPURegList(saves_fp);
3991
3993
3994 const int parameter_slots =
3995 static_cast<int>(call_descriptor->ParameterSlotCount());
3996 Arm64OperandConverter g(this, nullptr);
3997
3998 // {aditional_pop_count} is only greater than zero if {parameter_slots = 0}.
3999 // Check RawMachineAssembler::PopAndReturn.
4000 if (parameter_slots != 0) {
4001 if (additional_pop_count->IsImmediate()) {
4002 DCHECK_EQ(g.ToConstant(additional_pop_count).ToInt32(), 0);
4003 } else if (v8_flags.debug_code) {
4004 __ cmp(g.ToRegister(additional_pop_count), Operand(0));
4005 __ Assert(eq, AbortReason::kUnexpectedAdditionalPopValue);
4006 }
4007 }
4008
4009#if V8_ENABLE_WEBASSEMBLY
4010 if (call_descriptor->IsAnyWasmFunctionCall() &&
4011 v8_flags.experimental_wasm_growable_stacks) {
4012 {
4013 UseScratchRegisterScope temps{masm()};
4014 Register scratch = temps.AcquireX();
4016 __ Cmp(scratch,
4017 Operand(StackFrame::TypeToMarker(StackFrame::WASM_SEGMENT_START)));
4018 }
4019 Label done;
4020 __ B(ne, &done);
4021 CPURegList regs_to_save(kXRegSizeInBits, RegList{});
4022 for (auto reg : wasm::kGpReturnRegisters) regs_to_save.Combine(reg);
4023 __ PushCPURegList(regs_to_save);
4024 CPURegList fp_regs_to_save(kDRegSizeInBits, DoubleRegList{});
4025 for (auto reg : wasm::kFpReturnRegisters) fp_regs_to_save.Combine(reg);
4026 __ PushCPURegList(fp_regs_to_save);
4028 __ CallCFunction(ExternalReference::wasm_shrink_stack(), 1);
4029 __ Mov(fp, kReturnRegister0);
4030 __ PopCPURegList(fp_regs_to_save);
4031 __ PopCPURegList(regs_to_save);
4032 if (masm()->options().enable_simulator_code) {
4033 // The next instruction after shrinking stack is leaving the frame.
4034 // So SP will be set to old FP there. Switch simulator stack limit here.
4035 UseScratchRegisterScope temps{masm()};
4036 temps.Exclude(x16);
4037 __ LoadStackLimit(x16, StackLimitKind::kRealStackLimit);
4039 }
4040 __ bind(&done);
4041 }
4042#endif // V8_ENABLE_WEBASSEMBLY
4043
4044 Register argc_reg = x3;
4045 // Functions with JS linkage have at least one parameter (the receiver).
4046 // If {parameter_slots} == 0, it means it is a builtin with
4047 // kDontAdaptArgumentsSentinel, which takes care of JS arguments popping
4048 // itself.
4049 const bool drop_jsargs = parameter_slots != 0 &&
4051 call_descriptor->IsJSFunctionCall();
4052 if (call_descriptor->IsCFunctionCall()) {
4054 } else if (frame_access_state()->has_frame()) {
4055 // Canonicalize JSFunction return sites for now unless they have an variable
4056 // number of stack slot pops.
4057 if (additional_pop_count->IsImmediate() &&
4058 g.ToConstant(additional_pop_count).ToInt32() == 0) {
4059 if (return_label_.is_bound()) {
4060 __ B(&return_label_);
4061 return;
4062 } else {
4063 __ Bind(&return_label_);
4064 }
4065 }
4066 if (drop_jsargs) {
4067 // Get the actual argument count.
4068 DCHECK(!call_descriptor->CalleeSavedRegisters().has(argc_reg));
4070 }
4072 }
4073
4074 if (drop_jsargs) {
4075 // We must pop all arguments from the stack (including the receiver). This
4076 // number of arguments is given by max(1 + argc_reg, parameter_slots).
4077 Label argc_reg_has_final_count;
4078 DCHECK(!call_descriptor->CalleeSavedRegisters().has(argc_reg));
4079 if (parameter_slots > 1) {
4080 __ Cmp(argc_reg, Operand(parameter_slots));
4081 __ B(&argc_reg_has_final_count, ge);
4082 __ Mov(argc_reg, Operand(parameter_slots));
4083 __ Bind(&argc_reg_has_final_count);
4084 }
4085 __ DropArguments(argc_reg);
4086 } else if (additional_pop_count->IsImmediate()) {
4087 int additional_count = g.ToConstant(additional_pop_count).ToInt32();
4088 __ DropArguments(parameter_slots + additional_count);
4089 } else if (parameter_slots == 0) {
4090 __ DropArguments(g.ToRegister(additional_pop_count));
4091 } else {
4092 // {additional_pop_count} is guaranteed to be zero if {parameter_slots !=
4093 // 0}. Check RawMachineAssembler::PopAndReturn.
4094 __ DropArguments(parameter_slots);
4095 }
4096 __ AssertSpAligned();
4097 __ Ret();
4098}
4099
4100void CodeGenerator::FinishCode() { __ ForceConstantPoolEmissionWithoutJump(); }
4101
4103 ZoneDeque<DeoptimizationExit*>* exits) {
4104 __ ForceConstantPoolEmissionWithoutJump();
4105 // We are conservative here, reserving sufficient space for the largest deopt
4106 // kind.
4108 __ CheckVeneerPool(
4109 false, false,
4110 static_cast<int>(exits->size()) * Deoptimizer::kLazyDeoptExitSize);
4111
4112 // Check which deopt kinds exist in this InstructionStream object, to avoid
4113 // emitting jumps to unused entries.
4114 bool saw_deopt_kind[kDeoptimizeKindCount] = {false};
4115 for (auto exit : *exits) {
4116 saw_deopt_kind[static_cast<int>(exit->kind())] = true;
4117 }
4118
4119 // Emit the jumps to deoptimization entries.
4120 UseScratchRegisterScope scope(masm());
4121 Register scratch = scope.AcquireX();
4122 static_assert(static_cast<int>(kFirstDeoptimizeKind) == 0);
4123 for (int i = 0; i < kDeoptimizeKindCount; i++) {
4124 if (!saw_deopt_kind[i]) continue;
4125 DeoptimizeKind kind = static_cast<DeoptimizeKind>(i);
4127 __ LoadEntryFromBuiltin(Deoptimizer::GetDeoptimizationEntry(kind), scratch);
4128 __ Jump(scratch);
4129 }
4130}
4131
4132AllocatedOperand CodeGenerator::Push(InstructionOperand* source) {
4133 auto rep = LocationOperand::cast(source)->representation();
4134 int new_slots = RoundUp<2>(ElementSizeInPointers(rep));
4135 Arm64OperandConverter g(this, nullptr);
4136 int last_frame_slot_id =
4137 frame_access_state_->frame()->GetTotalFrameSlotCount() - 1;
4138 int sp_delta = frame_access_state_->sp_delta();
4139 int slot_id = last_frame_slot_id + sp_delta + new_slots;
4140 AllocatedOperand stack_slot(LocationOperand::STACK_SLOT, rep, slot_id);
4141 if (source->IsRegister()) {
4142 __ Push(padreg, g.ToRegister(source));
4143 frame_access_state()->IncreaseSPDelta(new_slots);
4144 } else if (source->IsStackSlot()) {
4145 UseScratchRegisterScope temps(masm());
4146 Register scratch = temps.AcquireX();
4147 __ Ldr(scratch, g.ToMemOperand(source, masm()));
4148 __ Push(padreg, scratch);
4149 frame_access_state()->IncreaseSPDelta(new_slots);
4150 } else {
4151 // No push instruction for this operand type. Bump the stack pointer and
4152 // assemble the move.
4153 __ Sub(sp, sp, Operand(new_slots * kSystemPointerSize));
4154 frame_access_state()->IncreaseSPDelta(new_slots);
4155 AssembleMove(source, &stack_slot);
4156 }
4157 temp_slots_ += new_slots;
4158 return stack_slot;
4159}
4160
4161void CodeGenerator::Pop(InstructionOperand* dest, MachineRepresentation rep) {
4162 int dropped_slots = RoundUp<2>(ElementSizeInPointers(rep));
4163 Arm64OperandConverter g(this, nullptr);
4164 if (dest->IsRegister()) {
4165 frame_access_state()->IncreaseSPDelta(-dropped_slots);
4166 __ Pop(g.ToRegister(dest), padreg);
4167 } else if (dest->IsStackSlot()) {
4168 frame_access_state()->IncreaseSPDelta(-dropped_slots);
4169 UseScratchRegisterScope temps(masm());
4170 Register scratch = temps.AcquireX();
4171 __ Pop(scratch, padreg);
4172 __ Str(scratch, g.ToMemOperand(dest, masm()));
4173 } else {
4174 int last_frame_slot_id =
4175 frame_access_state_->frame()->GetTotalFrameSlotCount() - 1;
4176 int sp_delta = frame_access_state_->sp_delta();
4177 int slot_id = last_frame_slot_id + sp_delta;
4178 AllocatedOperand stack_slot(LocationOperand::STACK_SLOT, rep, slot_id);
4179 AssembleMove(&stack_slot, dest);
4180 frame_access_state()->IncreaseSPDelta(-dropped_slots);
4181 __ Add(sp, sp, Operand(dropped_slots * kSystemPointerSize));
4182 }
4183 temp_slots_ -= dropped_slots;
4184}
4185
4187 if (temp_slots_ > 0) {
4189 __ add(sp, sp, Operand(temp_slots_ * kSystemPointerSize));
4190 temp_slots_ = 0;
4191 }
4192}
4193
4194void CodeGenerator::MoveToTempLocation(InstructionOperand* source,
4196 // Must be kept in sync with {MoveTempLocationTo}.
4197 DCHECK(!source->IsImmediate());
4198 move_cycle_.temps.emplace(masm());
4199 auto& temps = *move_cycle_.temps;
4200 // Temporarily exclude the reserved scratch registers while we pick one to
4201 // resolve the move cycle. Re-include them immediately afterwards as they
4202 // might be needed for the move to the temp location.
4203 temps.Exclude(CPURegList(64, move_cycle_.scratch_regs));
4204 temps.ExcludeFP(CPURegList(64, move_cycle_.scratch_fp_regs));
4205 if (!IsFloatingPoint(rep)) {
4206 if (temps.CanAcquire()) {
4207 Register scratch = move_cycle_.temps->AcquireX();
4208 move_cycle_.scratch_reg.emplace(scratch);
4209 } else if (temps.CanAcquireFP()) {
4210 // Try to use an FP register if no GP register is available for non-FP
4211 // moves.
4212 DoubleRegister scratch = move_cycle_.temps->AcquireD();
4213 move_cycle_.scratch_reg.emplace(scratch);
4214 }
4215 } else if (rep == MachineRepresentation::kFloat32) {
4216 VRegister scratch = move_cycle_.temps->AcquireS();
4217 move_cycle_.scratch_reg.emplace(scratch);
4218 } else if (rep == MachineRepresentation::kFloat64) {
4219 VRegister scratch = move_cycle_.temps->AcquireD();
4220 move_cycle_.scratch_reg.emplace(scratch);
4221 } else if (rep == MachineRepresentation::kSimd128) {
4222 VRegister scratch = move_cycle_.temps->AcquireQ();
4223 move_cycle_.scratch_reg.emplace(scratch);
4224 }
4225 temps.Include(CPURegList(64, move_cycle_.scratch_regs));
4226 temps.IncludeFP(CPURegList(64, move_cycle_.scratch_fp_regs));
4227 if (move_cycle_.scratch_reg.has_value()) {
4228 // A scratch register is available for this rep.
4229 auto& scratch_reg = *move_cycle_.scratch_reg;
4230 if (scratch_reg.IsD() && !IsFloatingPoint(rep)) {
4231 AllocatedOperand scratch(LocationOperand::REGISTER,
4233 scratch_reg.code());
4234 Arm64OperandConverter g(this, nullptr);
4235 if (source->IsStackSlot()) {
4236 __ Ldr(g.ToDoubleRegister(&scratch), g.ToMemOperand(source, masm()));
4237 } else {
4238 DCHECK(source->IsRegister());
4239 __ fmov(g.ToDoubleRegister(&scratch), g.ToRegister(source));
4240 }
4241 } else {
4242 AllocatedOperand scratch(LocationOperand::REGISTER, rep,
4243 move_cycle_.scratch_reg->code());
4244 AssembleMove(source, &scratch);
4245 }
4246 } else {
4247 // The scratch registers are blocked by pending moves. Use the stack
4248 // instead.
4249 Push(source);
4250 }
4251}
4252
4253void CodeGenerator::MoveTempLocationTo(InstructionOperand* dest,
4255 if (move_cycle_.scratch_reg.has_value()) {
4256 auto& scratch_reg = *move_cycle_.scratch_reg;
4257 if (!IsFloatingPoint(rep) && scratch_reg.IsD()) {
4258 // We used a D register to move a non-FP operand, change the
4259 // representation to correctly interpret the InstructionOperand's code.
4260 AllocatedOperand scratch(LocationOperand::REGISTER,
4262 move_cycle_.scratch_reg->code());
4263 Arm64OperandConverter g(this, nullptr);
4264 if (dest->IsStackSlot()) {
4265 __ Str(g.ToDoubleRegister(&scratch), g.ToMemOperand(dest, masm()));
4266 } else {
4267 DCHECK(dest->IsRegister());
4268 __ fmov(g.ToRegister(dest), g.ToDoubleRegister(&scratch));
4269 }
4270 } else {
4271 AllocatedOperand scratch(LocationOperand::REGISTER, rep,
4272 move_cycle_.scratch_reg->code());
4273 AssembleMove(&scratch, dest);
4274 }
4275 } else {
4276 Pop(dest, rep);
4277 }
4278 // Restore the default state to release the {UseScratchRegisterScope} and to
4279 // prepare for the next cycle.
4280 move_cycle_ = MoveCycleState();
4281}
4282
4283void CodeGenerator::SetPendingMove(MoveOperands* move) {
4284 auto move_type = MoveType::InferMove(&move->source(), &move->destination());
4285 if (move_type == MoveType::kStackToStack) {
4286 Arm64OperandConverter g(this, nullptr);
4287 MemOperand src = g.ToMemOperand(&move->source(), masm());
4288 MemOperand dst = g.ToMemOperand(&move->destination(), masm());
4289 UseScratchRegisterScope temps(masm());
4290 if (move->source().IsSimd128StackSlot()) {
4291 VRegister temp = temps.AcquireQ();
4293 } else {
4294 Register temp = temps.AcquireX();
4296 }
4297 int64_t src_offset = src.offset();
4298 unsigned src_size_log2 = CalcLSDataSizeLog2(LDR_x);
4299 int64_t dst_offset = dst.offset();
4300 unsigned dst_size_log2 = CalcLSDataSizeLog2(STR_x);
4301 // Offset doesn't fit into the immediate field so the assembler will emit
4302 // two instructions and use a second temp register.
4303 if ((src.IsImmediateOffset() &&
4304 !masm()->IsImmLSScaled(src_offset, src_size_log2) &&
4305 !masm()->IsImmLSUnscaled(src_offset)) ||
4306 (dst.IsImmediateOffset() &&
4307 !masm()->IsImmLSScaled(dst_offset, dst_size_log2) &&
4308 !masm()->IsImmLSUnscaled(dst_offset))) {
4309 Register temp = temps.AcquireX();
4311 }
4312 }
4313}
4314
4315void CodeGenerator::AssembleMove(InstructionOperand* source,
4316 InstructionOperand* destination) {
4317 Arm64OperandConverter g(this, nullptr);
4318 // Helper function to write the given constant to the dst register.
4319 auto MoveConstantToRegister = [&](Register dst, Constant src) {
4320 if (src.type() == Constant::kHeapObject) {
4321 Handle<HeapObject> src_object = src.ToHeapObject();
4323 if (IsMaterializableFromRoot(src_object, &index)) {
4324 __ LoadRoot(dst, index);
4325 } else {
4326 __ Mov(dst, src_object);
4327 }
4328 } else if (src.type() == Constant::kCompressedHeapObject) {
4329 Handle<HeapObject> src_object = src.ToHeapObject();
4331 if (IsMaterializableFromRoot(src_object, &index)) {
4332 __ LoadTaggedRoot(dst, index);
4333 } else {
4334 // TODO(v8:8977): Even though this mov happens on 32 bits (Note the
4335 // .W()) and we are passing along the RelocInfo, we still haven't made
4336 // the address embedded in the code-stream actually be compressed.
4337 __ Mov(dst.W(),
4338 Immediate(src_object, RelocInfo::COMPRESSED_EMBEDDED_OBJECT));
4339 }
4340 } else if (src.type() == Constant::kExternalReference) {
4341 __ Mov(dst, src.ToExternalReference());
4342 } else {
4343 Operand src_op = g.ToImmediate(source);
4344 if (src.type() == Constant::kInt32 && src_op.NeedsRelocation(masm())) {
4345 // Use 32-bit loads for relocatable 32-bit constants.
4346 dst = dst.W();
4347 }
4348 __ Mov(dst, src_op);
4349 }
4350 };
4351 switch (MoveType::InferMove(source, destination)) {
4353 if (source->IsRegister()) {
4354 __ Mov(g.ToRegister(destination), g.ToRegister(source));
4355 } else {
4356 DCHECK(source->IsSimd128Register() || source->IsFloatRegister() ||
4357 source->IsDoubleRegister());
4358 __ Mov(g.ToDoubleRegister(destination).Q(),
4359 g.ToDoubleRegister(source).Q());
4360 }
4361 return;
4363 MemOperand dst = g.ToMemOperand(destination, masm());
4364 if (source->IsRegister()) {
4365 __ Str(g.ToRegister(source), dst);
4366 } else {
4367 VRegister src = g.ToDoubleRegister(source);
4368 if (source->IsFloatRegister() || source->IsDoubleRegister()) {
4369 __ Str(src, dst);
4370 } else {
4371 DCHECK(source->IsSimd128Register());
4372 __ Str(src.Q(), dst);
4373 }
4374 }
4375 return;
4376 }
4378 MemOperand src = g.ToMemOperand(source, masm());
4379 if (destination->IsRegister()) {
4380 __ Ldr(g.ToRegister(destination), src);
4381 } else {
4382 VRegister dst = g.ToDoubleRegister(destination);
4383 if (destination->IsFloatRegister() || destination->IsDoubleRegister()) {
4384 __ Ldr(dst, src);
4385 } else {
4386 DCHECK(destination->IsSimd128Register());
4387 __ Ldr(dst.Q(), src);
4388 }
4389 }
4390 return;
4391 }
4393 MemOperand src = g.ToMemOperand(source, masm());
4394 MemOperand dst = g.ToMemOperand(destination, masm());
4395 if (source->IsSimd128StackSlot()) {
4396 UseScratchRegisterScope scope(masm());
4397 VRegister temp = scope.AcquireQ();
4398 __ Ldr(temp, src);
4399 __ Str(temp, dst);
4400 } else {
4401 UseScratchRegisterScope scope(masm());
4402 Register temp = scope.AcquireX();
4403 __ Ldr(temp, src);
4404 __ Str(temp, dst);
4405 }
4406 return;
4407 }
4409 Constant src = g.ToConstant(source);
4410 if (destination->IsRegister()) {
4411 MoveConstantToRegister(g.ToRegister(destination), src);
4412 } else {
4413 VRegister dst = g.ToDoubleRegister(destination);
4414 if (destination->IsFloatRegister()) {
4415 __ Fmov(dst.S(), src.ToFloat32());
4416 } else {
4417 DCHECK(destination->IsDoubleRegister());
4418 __ Fmov(dst, src.ToFloat64().value());
4419 }
4420 }
4421 return;
4422 }
4424 Constant src = g.ToConstant(source);
4425 MemOperand dst = g.ToMemOperand(destination, masm());
4426 if (destination->IsStackSlot()) {
4427 UseScratchRegisterScope scope(masm());
4428 Register temp = scope.AcquireX();
4429 MoveConstantToRegister(temp, src);
4430 __ Str(temp, dst);
4431 } else if (destination->IsFloatStackSlot()) {
4432 if (base::bit_cast<int32_t>(src.ToFloat32()) == 0) {
4433 __ Str(wzr, dst);
4434 } else {
4435 UseScratchRegisterScope scope(masm());
4436 VRegister temp = scope.AcquireS();
4437 __ Fmov(temp, src.ToFloat32());
4438 __ Str(temp, dst);
4439 }
4440 } else {
4441 DCHECK(destination->IsDoubleStackSlot());
4442 if (src.ToFloat64().AsUint64() == 0) {
4443 __ Str(xzr, dst);
4444 } else {
4445 UseScratchRegisterScope scope(masm());
4446 VRegister temp = scope.AcquireD();
4447 __ Fmov(temp, src.ToFloat64().value());
4448 __ Str(temp, dst);
4449 }
4450 }
4451 return;
4452 }
4453 }
4454 UNREACHABLE();
4455}
4456
4457void CodeGenerator::AssembleSwap(InstructionOperand* source,
4458 InstructionOperand* destination) {
4459 Arm64OperandConverter g(this, nullptr);
4460 switch (MoveType::InferSwap(source, destination)) {
4462 if (source->IsRegister()) {
4463 __ Swap(g.ToRegister(source), g.ToRegister(destination));
4464 } else {
4465 VRegister src = g.ToDoubleRegister(source);
4466 VRegister dst = g.ToDoubleRegister(destination);
4467 if (source->IsFloatRegister() || source->IsDoubleRegister()) {
4468 __ Swap(src, dst);
4469 } else {
4470 DCHECK(source->IsSimd128Register());
4471 __ Swap(src.Q(), dst.Q());
4472 }
4473 }
4474 return;
4476 UseScratchRegisterScope scope(masm());
4477 MemOperand dst = g.ToMemOperand(destination, masm());
4478 if (source->IsRegister()) {
4479 Register temp = scope.AcquireX();
4480 Register src = g.ToRegister(source);
4481 __ Mov(temp, src);
4482 __ Ldr(src, dst);
4483 __ Str(temp, dst);
4484 } else {
4485 UseScratchRegisterScope scope(masm());
4486 VRegister src = g.ToDoubleRegister(source);
4487 if (source->IsFloatRegister() || source->IsDoubleRegister()) {
4488 VRegister temp = scope.AcquireD();
4489 __ Mov(temp, src);
4490 __ Ldr(src, dst);
4491 __ Str(temp, dst);
4492 } else {
4493 DCHECK(source->IsSimd128Register());
4494 VRegister temp = scope.AcquireQ();
4495 __ Mov(temp, src.Q());
4496 __ Ldr(src.Q(), dst);
4497 __ Str(temp, dst);
4498 }
4499 }
4500 return;
4501 }
4503 UseScratchRegisterScope scope(masm());
4504 MemOperand src = g.ToMemOperand(source, masm());
4505 MemOperand dst = g.ToMemOperand(destination, masm());
4506 VRegister temp_0 = scope.AcquireD();
4507 VRegister temp_1 = scope.AcquireD();
4508 if (source->IsSimd128StackSlot()) {
4509 __ Ldr(temp_0.Q(), src);
4510 __ Ldr(temp_1.Q(), dst);
4511 __ Str(temp_0.Q(), dst);
4512 __ Str(temp_1.Q(), src);
4513 } else {
4514 __ Ldr(temp_0, src);
4515 __ Ldr(temp_1, dst);
4516 __ Str(temp_0, dst);
4517 __ Str(temp_1, src);
4518 }
4519 return;
4520 }
4521 default:
4522 UNREACHABLE();
4523 }
4524}
4525
4526#undef __
4527
4528} // namespace compiler
4529} // namespace internal
4530} // namespace v8
friend Zone
Definition asm-types.cc:195
#define Assert(condition)
Builtins::Kind kind
Definition builtins.cc:40
static constexpr T decode(U value)
Definition bit-field.h:66
static constexpr bool IsImmLSScaled(int64_t offset, unsigned size_log2)
static constexpr bool IsImmAddSub(int64_t immediate)
static constexpr bool IsImmLSUnscaled(int64_t offset)
static constexpr bool IsBuiltinId(Builtin builtin)
Definition builtins.h:128
static CPURegList GetCalleeSavedV(int size=kDRegSizeInBits)
uint64_t bits() const
static constexpr int kFixedSlotCountAboveFp
static constexpr int kFixedFrameSizeAboveFp
static bool IsSupported(CpuFeature f)
static V8_EXPORT_PRIVATE const int kEagerDeoptExitSize
static V8_EXPORT_PRIVATE Builtin GetDeoptimizationEntry(DeoptimizeKind kind)
static V8_EXPORT_PRIVATE const int kLazyDeoptExitSize
static V8_EXPORT_PRIVATE ExternalReference isolate_address()
Bootstrapper * bootstrapper()
Definition isolate.h:1178
RootsTable & roots_table()
Definition isolate.h:1250
Tagged_t ReadOnlyRootPtr(RootIndex index)
void Mov(const Register &rd, const Operand &operand, DiscardMoveMode discard_mode=kDontDiscardForSameWReg)
static constexpr int kExtraSlotClaimedByPrologue
void Dup(const VRegister &vd, const VRegister &vn, int index)
static constexpr MainThreadFlags kPointersToHereAreInterestingMask
static constexpr MainThreadFlags kPointersFromHereAreInterestingMask
static Operand EmbeddedNumber(double number)
constexpr void set(RegisterT reg)
bool IsRootHandle(IndirectHandle< T > handle, RootIndex *index) const
Definition roots-inl.h:65
static constexpr Tagged< Smi > FromInt(int value)
Definition smi.h:38
static constexpr int32_t TypeToMarker(Type type)
Definition frames.h:196
static constexpr int kFrameTypeOffset
VRegister AcquireV(VectorFormat format)
static constexpr VRegister Create(int code, int size, int lane_count=1)
T * New(Args &&... args)
Definition zone.h:114
base::Vector< T > AllocateVector(size_t length)
Definition zone.h:136
DoubleRegister OutputFloat32Register(size_t index=0)
Operand ToImmediate(InstructionOperand *operand)
MemOperand SlotToMemOperand(int slot, MacroAssembler *masm) const
MemOperand ToMemOperand(InstructionOperand *op, MacroAssembler *masm) const
Arm64OperandConverter(CodeGenerator *gen, Instruction *instr)
DoubleRegister InputFloat64OrFPZeroRegister(size_t index)
DoubleRegister OutputFloat64Register(size_t index=0)
DoubleRegister InputFloat32OrFPZeroRegister(size_t index)
DoubleRegister OutputSimd128Register(size_t index=0)
static Type InferSwap(InstructionOperand *source, InstructionOperand *destination)
static Type InferMove(InstructionOperand *source, InstructionOperand *destination)
void MoveToTempLocation(InstructionOperand *src, MachineRepresentation rep) final
void AssembleTailCallAfterGap(Instruction *instr, int first_unused_stack_slot)
void AssembleReturn(InstructionOperand *pop)
void AssembleTailCallBeforeGap(Instruction *instr, int first_unused_stack_slot)
FrameAccessState * frame_access_state() const
CodeGenResult AssembleArchInstruction(Instruction *instr)
DeoptimizationExit * BuildTranslation(Instruction *instr, int pc_offset, size_t frame_state_offset, size_t immediate_args_count, OutputFrameStateCombine state_combine)
void AssembleArchBinarySearchSwitch(Instruction *instr)
Label jump_deoptimization_entry_labels_[kDeoptimizeKindCount]
void AssembleArchJumpRegardlessOfAssemblyOrder(RpoNumber target)
void AssembleArchBoolean(Instruction *instr, FlagsCondition condition)
void AssembleJumpTable(base::Vector< Label * > targets)
void AssembleArchBranch(Instruction *instr, BranchInfo *branch)
void AssembleMove(InstructionOperand *source, InstructionOperand *destination) final
void SetPendingMove(MoveOperands *move) final
bool ShouldApplyOffsetToStackCheck(Instruction *instr, uint32_t *offset)
void RecordSafepoint(ReferenceMap *references, int pc_offset=0)
void AssembleArchBinarySearchSwitchRange(Register input, RpoNumber def_block, std::pair< int32_t, Label * > *begin, std::pair< int32_t, Label * > *end)
void PrepareForDeoptimizationExits(ZoneDeque< DeoptimizationExit * > *exits)
void AssembleArchTableSwitch(Instruction *instr)
bool IsMaterializableFromRoot(Handle< HeapObject > object, RootIndex *index_return)
void AssembleArchConditionalBranch(Instruction *instr, BranchInfo *branch)
AllocatedOperand Push(InstructionOperand *src) final
void MoveTempLocationTo(InstructionOperand *dst, MachineRepresentation rep) final
void AssembleArchDeoptBranch(Instruction *instr, BranchInfo *branch)
void RecordCallPosition(Instruction *instr)
void AssembleSwap(InstructionOperand *source, InstructionOperand *destination) final
Label * AddJumpTable(base::Vector< Label * > targets)
void AssembleArchConditionalBoolean(Instruction *instr)
void RecordDeoptInfo(Instruction *instr, int pc_offset)
OptimizedCompilationInfo * info() const
void AssembleArchSelect(Instruction *instr, FlagsCondition condition)
void Pop(InstructionOperand *src, MachineRepresentation rep) final
FrameOffset GetFrameOffset(int spill_slot) const
Definition frame.cc:61
static FrameOffset FromStackPointer(int offset)
Definition frame.h:214
Constant ToConstant(InstructionOperand *op) const
Register ToRegister(InstructionOperand *op) const
InstructionCode opcode() const
const InstructionOperand * InputAt(size_t i) const
const InstructionOperand * TempAt(size_t i) const
CallDescriptor * GetIncomingDescriptor() const
Definition linkage.h:405
MachineRepresentation representation() const
static LocationOperand * cast(InstructionOperand *op)
static OutputFrameStateCombine Ignore()
#define ASSEMBLE_SHIFT(asm_instr, width)
#define ASSEMBLE_ATOMIC_AND(suffix, reg)
#define ASSEMBLE_ATOMIC_SUB(suffix, reg)
IndirectPointerTag indirect_pointer_tag_
UnwindingInfoWriter *const unwinding_info_writer_
#define ATOMIC_BINOP_CASE(op, inst)
#define ASSEMBLE_ATOMIC_LOAD_INTEGER(asm_instr)
bool must_save_lr_
#define ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(load_instr, store_instr, cmp_reg)
Zone * zone_
#define ASSEMBLE_IEEE754_UNOP(name)
Register const object_
#define ASSEMBLE_SIMD_SHIFT_LEFT(asm_imm, width, sz, dt)
Operand const offset_
#define ASSEMBLE_IEEE754_BINOP(name)
#define ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(load_instr, store_instr)
Register const value_
#define ASSEMBLE_ATOMIC_STORE_INTEGER(asm_instr, order)
RecordWriteMode const mode_
#define ASSEMBLE_SIMD_SHIFT_RIGHT(asm_imm, width, sz, dt)
#define COMPRESS_POINTERS_BOOL
Definition globals.h:99
#define V8_JS_LINKAGE_INCLUDES_DISPATCH_HANDLE_BOOL
Definition globals.h:161
int32_t offset
DirectHandle< JSReceiver > options
Instruction * instr
Builtin builtin
LiftoffRegister reg
int pc_offset
EmitFn fn
LiftoffRegList regs_to_save
uint32_t const mask
SetIsolateDataSlots
InstructionOperand destination
int n
Definition mul-fft.cc:296
V8_INLINE Dest bit_cast(Source const &source)
Definition macros.h:95
uintptr_t Address
Definition memory.h:13
void Add(RWDigits Z, Digits X, Digits Y)
constexpr size_t kCcmpOffsetOfOpcode
constexpr size_t kNumCcmpOperands
constexpr size_t kConditionalSetEndOffsetOfNumCcmps
constexpr size_t kCcmpOffsetOfDefaultFlags
void AssembleConditionalCompareChain(Instruction *instr, int64_t num_ccmps, size_t ccmp_base_index, CodeGenerator *gen)
constexpr size_t kCcmpOffsetOfRhs
static Condition FlagsConditionToCondition(FlagsCondition condition)
void Shuffle4Helper(MacroAssembler *masm, Arm64OperandConverter i, VectorFormat f)
StatusFlags ConditionToDefaultFlags(Condition condition)
constexpr size_t kConditionalBranchEndOffsetOfCondition
constexpr size_t kCcmpOffsetOfCompareCondition
constexpr size_t kConditionalSetEndOffsetOfCondition
constexpr size_t kCcmpOffsetOfLhs
int32_t GetLaneMask(int32_t lane_count)
constexpr size_t kConditionalBranchEndOffsetOfNumCcmps
void Shuffle2Helper(MacroAssembler *masm, Arm64OperandConverter i, VectorFormat f)
void Shuffle1Helper(MacroAssembler *masm, Arm64OperandConverter i, VectorFormat f)
void And(LiftoffAssembler *lasm, Register dst, Register lhs, Register rhs)
void Sub(LiftoffAssembler *lasm, Register dst, Register lhs, Register rhs)
constexpr DoubleRegister kFpReturnRegisters[]
constexpr Register kGpParamRegisters[]
constexpr DoubleRegister kFpParamRegisters[]
constexpr Register kGpReturnRegisters[]
V8_EXPORT_PRIVATE bool AreConsecutive(const CPURegister &reg1, const CPURegister &reg2, const CPURegister &reg3=NoReg, const CPURegister &reg4=NoReg)
constexpr Register kRootRegister
VectorFormat ScalarFormatFromLaneSize(int lanesize)
constexpr int W
RegListBase< DoubleRegister > DoubleRegList
Definition reglist-arm.h:15
constexpr int H
std::make_unsigned< T >::type Abs(T a)
Definition utils.h:93
constexpr DeoptimizeKind kFirstDeoptimizeKind
Definition globals.h:873
V8_EXPORT_PRIVATE int LaneCountFromFormat(VectorFormat vform)
V8_EXPORT_PRIVATE constexpr int ElementSizeInPointers(MachineRepresentation rep)
DwVfpRegister DoubleRegister
constexpr ShiftOp LSR
RegListBase< Register > RegList
Definition reglist-arm.h:14
constexpr ShiftOp ASR
constexpr ShiftOp LSL
constexpr int B
V8_INLINE constexpr bool IsValidIndirectPointerTag(IndirectPointerTag tag)
Address Tagged_t
Definition globals.h:547
too high values may cause the compiler to set high thresholds for inlining to as much as possible avoid inlined allocation of objects that cannot escape trace load stores from virtual maglev objects use TurboFan fast string builder analyze liveness of environment slots and zap dead values trace TurboFan load elimination emit data about basic block usage in builtins to this enable builtin reordering when run mksnapshot flag for emit warnings when applying builtin profile data verify register allocation in TurboFan randomly schedule instructions to stress dependency tracking enable store store elimination in TurboFan rewrite far to near simulate GC compiler thread race related to allow float parameters to be passed in simulator mode JS Wasm Run additional turbo_optimize_inlined_js_wasm_wrappers enable experimental feedback collection in generic lowering enable Turboshaft s WasmLoadElimination enable Turboshaft s low level load elimination for JS enable Turboshaft s escape analysis for string concatenation use enable Turbolev features that we want to ship in the not too far future trace individual Turboshaft reduction steps trace intermediate Turboshaft reduction steps invocation count threshold for early optimization Enables optimizations which favor memory size over execution speed Enables sampling allocation profiler with X as a sample interval min size of a semi the new space consists of two semi spaces max size of the Collect garbage after Collect garbage after keeps maps alive for< n > old space garbage collections print one detailed trace line in allocation gc speed threshold for starting incremental marking via a task in percent of available threshold for starting incremental marking immediately in percent of available Use a single schedule for determining a marking schedule between JS and C objects schedules the minor GC task with kUserVisible priority max worker number of concurrent for NumberOfWorkerThreads start background threads that allocate memory concurrent_array_buffer_sweeping use parallel threads to clear weak refs in the atomic pause trace progress of the incremental marking trace object counts and memory usage report a tick only when allocated zone memory changes by this amount TracingFlags::gc_stats TracingFlags::gc_stats track native contexts that are expected to be garbage collected verify heap pointers before and after GC memory reducer runs GC with ReduceMemoryFootprint flag Maximum number of memory reducer GCs scheduled Old gen GC speed is computed directly from gc tracer counters Perform compaction on full GCs based on V8 s default heuristics Perform compaction on every full GC Perform code space compaction when finalizing a full GC with stack Stress GC compaction to flush out bugs with moving objects flush of baseline code when it has not been executed recently Use time base code flushing instead of age Use a progress bar to scan large objects in increments when incremental marking is active force incremental marking for small heaps and run it more often force marking at random points between and force scavenge at random points between and reclaim otherwise unreachable unmodified wrapper objects when possible less compaction in non memory reducing mode use high priority threads for concurrent Marking Test mode only flag It allows an unit test to select evacuation candidates use incremental marking for CppHeap cppheap_concurrent_marking c value for membalancer A special constant to balance between memory and space tradeoff The smaller the more memory it uses enable use of SSE4 instructions if available enable use of AVX VNNI instructions if available enable use of POPCNT instruction if available force all emitted branches to be in long mode(MIPS/PPC only)") DEFINE_BOOL(partial_constant_pool
QwNeonRegister Simd128Register
MemOperand FieldMemOperand(Register object, int offset)
constexpr int kSystemPointerSize
Definition globals.h:410
constexpr bool IsFloatingPoint(MachineRepresentation rep)
unsigned CalcLSDataSizeLog2(LoadStoreOp op)
constexpr int S
VectorFormat VectorFormatHalfWidth(VectorFormat vform)
const Instr kImmExceptionIsSwitchStackLimit
constexpr Register kReturnRegister0
constexpr int kInt32Size
Definition globals.h:401
constexpr Register kWasmImplicitArgRegister
constexpr int kDeoptimizeKindCount
Definition globals.h:876
constexpr VRegister NoVReg
constexpr Register kSimulatorHltArgument
V8_EXPORT_PRIVATE FlagValues v8_flags
too high values may cause the compiler to set high thresholds for inlining to as much as possible avoid inlined allocation of objects that cannot escape trace load stores from virtual maglev objects use TurboFan fast string builder analyze liveness of environment slots and zap dead values trace TurboFan load elimination emit data about basic block usage in builtins to this enable builtin reordering when run mksnapshot flag for emit warnings when applying builtin profile data verify register allocation in TurboFan randomly schedule instructions to stress dependency tracking enable store store elimination in TurboFan rewrite far to near simulate GC compiler thread race related to allow float parameters to be passed in simulator mode JS Wasm Run additional turbo_optimize_inlined_js_wasm_wrappers enable experimental feedback collection in generic lowering enable Turboshaft s WasmLoadElimination enable Turboshaft s low level load elimination for JS enable Turboshaft s escape analysis for string concatenation use enable Turbolev features that we want to ship in the not too far future trace individual Turboshaft reduction steps trace intermediate Turboshaft reduction steps invocation count threshold for early optimization Enables optimizations which favor memory size over execution speed Enables sampling allocation profiler with X as a sample interval min size of a semi the new space consists of two semi spaces max size of the Collect garbage after Collect garbage after keeps maps alive for< n > old space garbage collections print one detailed trace line in allocation gc speed threshold for starting incremental marking via a task in percent of available threshold for starting incremental marking immediately in percent of available Use a single schedule for determining a marking schedule between JS and C objects schedules the minor GC task with kUserVisible priority max worker number of concurrent for NumberOfWorkerThreads start background threads that allocate memory concurrent_array_buffer_sweeping use parallel threads to clear weak refs in the atomic pause trace progress of the incremental marking trace object counts and memory usage report a tick only when allocated zone memory changes by this amount TracingFlags::gc_stats TracingFlags::gc_stats track native contexts that are expected to be garbage collected verify heap pointers before and after GC memory reducer runs GC with ReduceMemoryFootprint flag Maximum number of memory reducer GCs scheduled Old gen GC speed is computed directly from gc tracer counters Perform compaction on full GCs based on V8 s default heuristics Perform compaction on every full GC Perform code space compaction when finalizing a full GC with stack Stress GC compaction to flush out bugs with moving objects flush of baseline code when it has not been executed recently Use time base code flushing instead of age Use a progress bar to scan large objects in increments when incremental marking is active force incremental marking for small heaps and run it more often force marking at random points between and X(inclusive) percent " "of the regular marking start limit") DEFINE_INT(stress_scavenge
constexpr Register kJavaScriptCallCodeStartRegister
constexpr ShiftOp ROR
return value
Definition map-inl.h:893
constexpr int kXRegSizeInBits
constexpr Register cp
constexpr Register kCArgRegs[]
constexpr int kDoubleSize
Definition globals.h:407
VectorFormat VectorFormatHalfWidthDoubleLanes(VectorFormat vform)
constexpr Register kJavaScriptCallDispatchHandleRegister
const uint32_t kClearedWeakHeapObjectLower32
Definition globals.h:981
static int FrameSlotToFPOffset(int slot)
VectorFormat VectorFormatFillQ(int laneSize)
constexpr Register padreg
constexpr int kDRegSizeInBits
BodyGen *const gen_
BodyGen * gen
ro::BitSet tagged_slots
#define CHECK(condition)
Definition logging.h:124
#define DCHECK_NOT_NULL(val)
Definition logging.h:492
#define DCHECK_IMPLIES(v1, v2)
Definition logging.h:493
#define DCHECK_NE(v1, v2)
Definition logging.h:486
#define DCHECK_GE(v1, v2)
Definition logging.h:488
#define DCHECK(condition)
Definition logging.h:482
#define DCHECK_LT(v1, v2)
Definition logging.h:489
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
constexpr T RoundUp(T x, intptr_t m)
Definition macros.h:387
constexpr bool IsAligned(T value, U alignment)
Definition macros.h:403
uint64_t make_uint64(uint32_t high, uint32_t low)
Definition macros.h:365
std::optional< CPURegister > scratch_reg
std::optional< UseScratchRegisterScope > temps
#define V8_STATIC_ROOTS_BOOL
Definition v8config.h:1001