v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
code-generator-arm.cc
Go to the documentation of this file.
1// Copyright 2014 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
14#include "src/common/globals.h"
20#include "src/compiler/osr.h"
23
24#if V8_ENABLE_WEBASSEMBLY
27#endif // V8_ENABLE_WEBASSEMBLY
28
29namespace v8 {
30namespace internal {
31namespace compiler {
32
33#define __ masm()->
34
35// Adds Arm-specific methods to convert InstructionOperands.
37 public:
40
41 SBit OutputSBit() const {
42 switch (instr_->flags_mode()) {
43 case kFlags_branch:
46 case kFlags_set:
48 case kFlags_trap:
49 case kFlags_select:
50 return SetCC;
51 case kFlags_none:
52 return LeaveCC;
53 }
55 }
56
57 Operand InputImmediate(size_t index) const {
58 return ToImmediate(instr_->InputAt(index));
59 }
60
61 Operand InputOperand2(size_t first_index) {
62 const size_t index = first_index;
64 case kMode_None:
65 case kMode_Offset_RI:
66 case kMode_Offset_RR:
67 case kMode_Root:
68 break;
69 case kMode_Operand2_I:
70 return InputImmediate(index + 0);
71 case kMode_Operand2_R:
72 return Operand(InputRegister(index + 0));
73 case kMode_Operand2_R_ASR_I:
74 return Operand(InputRegister(index + 0), ASR, InputInt5(index + 1));
75 case kMode_Operand2_R_ASR_R:
76 return Operand(InputRegister(index + 0), ASR, InputRegister(index + 1));
77 case kMode_Operand2_R_LSL_I:
78 return Operand(InputRegister(index + 0), LSL, InputInt5(index + 1));
79 case kMode_Operand2_R_LSL_R:
80 return Operand(InputRegister(index + 0), LSL, InputRegister(index + 1));
81 case kMode_Operand2_R_LSR_I:
82 return Operand(InputRegister(index + 0), LSR, InputInt5(index + 1));
83 case kMode_Operand2_R_LSR_R:
84 return Operand(InputRegister(index + 0), LSR, InputRegister(index + 1));
85 case kMode_Operand2_R_ROR_I:
86 return Operand(InputRegister(index + 0), ROR, InputInt5(index + 1));
87 case kMode_Operand2_R_ROR_R:
88 return Operand(InputRegister(index + 0), ROR, InputRegister(index + 1));
89 }
91 }
92
93 MemOperand InputOffset(size_t* first_index) {
94 const size_t index = *first_index;
96 case kMode_None:
97 case kMode_Operand2_I:
98 case kMode_Operand2_R:
99 case kMode_Operand2_R_ASR_I:
100 case kMode_Operand2_R_ASR_R:
101 case kMode_Operand2_R_LSL_R:
102 case kMode_Operand2_R_LSR_I:
103 case kMode_Operand2_R_LSR_R:
104 case kMode_Operand2_R_ROR_I:
105 case kMode_Operand2_R_ROR_R:
106 break;
107 case kMode_Operand2_R_LSL_I:
108 *first_index += 3;
109 return MemOperand(InputRegister(index + 0), InputRegister(index + 1),
110 LSL, InputInt32(index + 2));
111 case kMode_Offset_RI:
112 *first_index += 2;
113 return MemOperand(InputRegister(index + 0), InputInt32(index + 1));
114 case kMode_Offset_RR:
115 *first_index += 2;
116 return MemOperand(InputRegister(index + 0), InputRegister(index + 1));
117 case kMode_Root:
118 *first_index += 1;
119 return MemOperand(kRootRegister, InputInt32(index));
120 }
121 UNREACHABLE();
122 }
123
124 MemOperand InputOffset(size_t first_index = 0) {
125 return InputOffset(&first_index);
126 }
127
129 Constant constant = ToConstant(operand);
130 switch (constant.type()) {
131 case Constant::kInt32:
132 return Operand(constant.ToInt32(), constant.rmode());
134 return Operand::EmbeddedNumber(constant.ToFloat32());
136 return Operand::EmbeddedNumber(constant.ToFloat64().value());
138 return Operand(constant.ToExternalReference());
139 case Constant::kInt64:
142 // TODO(dcarney): loading RPO constants on arm.
144 break;
145 }
146 UNREACHABLE();
147 }
148
154
155 MemOperand SlotToMemOperand(int slot) const {
157 return MemOperand(offset.from_stack_pointer() ? sp : fp, offset.offset());
158 }
159
160 NeonMemOperand NeonInputOperand(size_t first_index) {
161 const size_t index = first_index;
163 case kMode_Operand2_R:
164 return NeonMemOperand(InputRegister(index + 0));
165 default:
166 break;
167 }
168 UNREACHABLE();
169 }
170};
171
172namespace {
173
174class OutOfLineRecordWrite final : public OutOfLineCode {
175 public:
176 OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand offset,
177 Register value, RecordWriteMode mode,
178 StubCallMode stub_mode,
179 UnwindingInfoWriter* unwinding_info_writer)
180 : OutOfLineCode(gen),
181 object_(object),
183 value_(value),
184 mode_(mode),
185#if V8_ENABLE_WEBASSEMBLY
186 stub_mode_(stub_mode),
187#endif // V8_ENABLE_WEBASSEMBLY
188 must_save_lr_(!gen->frame_access_state()->has_frame()),
189 unwinding_info_writer_(unwinding_info_writer),
190 zone_(gen->zone()) {
191 }
192
193 void Generate() final {
195 exit());
196 SaveFPRegsMode const save_fp_mode = frame()->DidAllocateDoubleRegisters()
199 if (must_save_lr_) {
200 // We need to save and restore lr if the frame was elided.
201 __ Push(lr);
202 unwinding_info_writer_->MarkLinkRegisterOnTopOfStack(__ pc_offset());
203 }
205 __ CallEphemeronKeyBarrier(object_, offset_, save_fp_mode);
206#if V8_ENABLE_WEBASSEMBLY
207 } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
208 __ CallRecordWriteStubSaveRegisters(object_, offset_, save_fp_mode,
209 StubCallMode::kCallWasmRuntimeStub);
210#endif // V8_ENABLE_WEBASSEMBLY
211 } else {
212 __ CallRecordWriteStubSaveRegisters(object_, offset_, save_fp_mode);
213 }
214 if (must_save_lr_) {
215 __ Pop(lr);
216 unwinding_info_writer_->MarkPopLinkRegisterFromTopOfStack(__ pc_offset());
217 }
218 }
219
220 private:
221 Register const object_;
222 Operand const offset_;
223 Register const value_;
225#if V8_ENABLE_WEBASSEMBLY
226 StubCallMode stub_mode_;
227#endif // V8_ENABLE_WEBASSEMBLY
229 UnwindingInfoWriter* const unwinding_info_writer_;
231};
232
233template <typename T>
234class OutOfLineFloatMin final : public OutOfLineCode {
235 public:
236 OutOfLineFloatMin(CodeGenerator* gen, T result, T left, T right)
237 : OutOfLineCode(gen), result_(result), left_(left), right_(right) {}
238
239 void Generate() final { __ FloatMinOutOfLine(result_, left_, right_); }
240
241 private:
242 T const result_;
243 T const left_;
244 T const right_;
245};
246using OutOfLineFloat32Min = OutOfLineFloatMin<SwVfpRegister>;
247using OutOfLineFloat64Min = OutOfLineFloatMin<DwVfpRegister>;
248
249template <typename T>
250class OutOfLineFloatMax final : public OutOfLineCode {
251 public:
252 OutOfLineFloatMax(CodeGenerator* gen, T result, T left, T right)
253 : OutOfLineCode(gen), result_(result), left_(left), right_(right) {}
254
255 void Generate() final { __ FloatMaxOutOfLine(result_, left_, right_); }
256
257 private:
258 T const result_;
259 T const left_;
260 T const right_;
261};
262using OutOfLineFloat32Max = OutOfLineFloatMax<SwVfpRegister>;
263using OutOfLineFloat64Max = OutOfLineFloatMax<DwVfpRegister>;
264
266 switch (condition) {
267 case kEqual:
268 return eq;
269 case kNotEqual:
270 return ne;
271 case kSignedLessThan:
272 return lt;
274 return ge;
276 return le;
278 return gt;
280 return lo;
282 return hs;
284 return ls;
286 return hi;
288 return lt;
290 return ge;
292 return ls;
294 return hi;
295 case kFloatLessThan:
296 return lo;
298 return hs;
300 return le;
302 return gt;
303 case kOverflow:
304 return vs;
305 case kNotOverflow:
306 return vc;
307 case kPositiveOrZero:
308 return pl;
309 case kNegative:
310 return mi;
311 default:
312 break;
313 }
314 UNREACHABLE();
315}
316
317} // namespace
318
319#define ASSEMBLE_ATOMIC_LOAD_INTEGER(asm_instr) \
320 do { \
321 __ asm_instr(i.OutputRegister(), \
322 MemOperand(i.InputRegister(0), i.InputRegister(1))); \
323 __ dmb(ISH); \
324 } while (0)
325
326#define ASSEMBLE_ATOMIC_STORE_INTEGER(asm_instr, order) \
327 do { \
328 __ dmb(ISH); \
329 __ asm_instr(i.InputRegister(0), i.InputOffset(1)); \
330 if (order == AtomicMemoryOrder::kSeqCst) __ dmb(ISH); \
331 } while (0)
332
333#define ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(load_instr, store_instr) \
334 do { \
335 Label exchange; \
336 __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1)); \
337 __ dmb(ISH); \
338 __ bind(&exchange); \
339 __ load_instr(i.OutputRegister(0), i.TempRegister(1)); \
340 __ store_instr(i.TempRegister(0), i.InputRegister(2), i.TempRegister(1)); \
341 __ teq(i.TempRegister(0), Operand(0)); \
342 __ b(ne, &exchange); \
343 __ dmb(ISH); \
344 } while (0)
345
346#define ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(load_instr, store_instr, \
347 cmp_reg) \
348 do { \
349 Label compareExchange; \
350 Label exit; \
351 __ dmb(ISH); \
352 __ bind(&compareExchange); \
353 __ load_instr(i.OutputRegister(0), i.TempRegister(1)); \
354 __ teq(cmp_reg, Operand(i.OutputRegister(0))); \
355 __ b(ne, &exit); \
356 __ store_instr(i.TempRegister(0), i.InputRegister(3), i.TempRegister(1)); \
357 __ teq(i.TempRegister(0), Operand(0)); \
358 __ b(ne, &compareExchange); \
359 __ bind(&exit); \
360 __ dmb(ISH); \
361 } while (0)
362
363#define ASSEMBLE_ATOMIC_BINOP(load_instr, store_instr, bin_instr) \
364 do { \
365 Label binop; \
366 __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1)); \
367 __ dmb(ISH); \
368 __ bind(&binop); \
369 __ load_instr(i.OutputRegister(0), i.TempRegister(1)); \
370 __ bin_instr(i.TempRegister(0), i.OutputRegister(0), \
371 Operand(i.InputRegister(2))); \
372 __ store_instr(i.TempRegister(2), i.TempRegister(0), i.TempRegister(1)); \
373 __ teq(i.TempRegister(2), Operand(0)); \
374 __ b(ne, &binop); \
375 __ dmb(ISH); \
376 } while (0)
377
378#define ASSEMBLE_ATOMIC64_ARITH_BINOP(instr1, instr2) \
379 do { \
380 Label binop; \
381 __ add(i.TempRegister(0), i.InputRegister(2), i.InputRegister(3)); \
382 __ dmb(ISH); \
383 __ bind(&binop); \
384 __ ldrexd(r2, r3, i.TempRegister(0)); \
385 __ instr1(i.TempRegister(1), r2, i.InputRegister(0), SetCC); \
386 __ instr2(i.TempRegister(2), r3, Operand(i.InputRegister(1))); \
387 DCHECK_EQ(LeaveCC, i.OutputSBit()); \
388 __ strexd(i.TempRegister(3), i.TempRegister(1), i.TempRegister(2), \
389 i.TempRegister(0)); \
390 __ teq(i.TempRegister(3), Operand(0)); \
391 __ b(ne, &binop); \
392 __ dmb(ISH); \
393 } while (0)
394
395#define ASSEMBLE_ATOMIC64_LOGIC_BINOP(instr) \
396 do { \
397 Label binop; \
398 __ add(i.TempRegister(0), i.InputRegister(2), i.InputRegister(3)); \
399 __ dmb(ISH); \
400 __ bind(&binop); \
401 __ ldrexd(r2, r3, i.TempRegister(0)); \
402 __ instr(i.TempRegister(1), r2, Operand(i.InputRegister(0))); \
403 __ instr(i.TempRegister(2), r3, Operand(i.InputRegister(1))); \
404 __ strexd(i.TempRegister(3), i.TempRegister(1), i.TempRegister(2), \
405 i.TempRegister(0)); \
406 __ teq(i.TempRegister(3), Operand(0)); \
407 __ b(ne, &binop); \
408 __ dmb(ISH); \
409 } while (0)
410
411#define ASSEMBLE_IEEE754_BINOP(name) \
412 do { \
413 /* TODO(bmeurer): We should really get rid of this special instruction, */ \
414 /* and generate a CallAddress instruction instead. */ \
415 FrameScope scope(masm(), StackFrame::MANUAL); \
416 __ PrepareCallCFunction(0, 2); \
417 __ MovToFloatParameters(i.InputDoubleRegister(0), \
418 i.InputDoubleRegister(1)); \
419 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 2); \
420 /* Move the result in the double result register. */ \
421 __ MovFromFloatResult(i.OutputDoubleRegister()); \
422 DCHECK_EQ(LeaveCC, i.OutputSBit()); \
423 } while (0)
424
425#define ASSEMBLE_IEEE754_UNOP(name) \
426 do { \
427 /* TODO(bmeurer): We should really get rid of this special instruction, */ \
428 /* and generate a CallAddress instruction instead. */ \
429 FrameScope scope(masm(), StackFrame::MANUAL); \
430 __ PrepareCallCFunction(0, 1); \
431 __ MovToFloatParameter(i.InputDoubleRegister(0)); \
432 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 1); \
433 /* Move the result in the double result register. */ \
434 __ MovFromFloatResult(i.OutputDoubleRegister()); \
435 DCHECK_EQ(LeaveCC, i.OutputSBit()); \
436 } while (0)
437
438#define ASSEMBLE_NEON_NARROWING_OP(dt, sdt) \
439 do { \
440 Simd128Register dst = i.OutputSimd128Register(), \
441 src0 = i.InputSimd128Register(0), \
442 src1 = i.InputSimd128Register(1); \
443 if (dst == src0 && dst == src1) { \
444 __ vqmovn(dt, sdt, dst.low(), src0); \
445 __ vmov(dst.high(), dst.low()); \
446 } else if (dst == src0) { \
447 __ vqmovn(dt, sdt, dst.low(), src0); \
448 __ vqmovn(dt, sdt, dst.high(), src1); \
449 } else { \
450 __ vqmovn(dt, sdt, dst.high(), src1); \
451 __ vqmovn(dt, sdt, dst.low(), src0); \
452 } \
453 } while (0)
454
455#define ASSEMBLE_F64X2_ARITHMETIC_BINOP(op) \
456 do { \
457 __ op(i.OutputSimd128Register().low(), i.InputSimd128Register(0).low(), \
458 i.InputSimd128Register(1).low()); \
459 __ op(i.OutputSimd128Register().high(), i.InputSimd128Register(0).high(), \
460 i.InputSimd128Register(1).high()); \
461 } while (0)
462
463// If shift value is an immediate, we can call asm_imm, taking the shift value
464// modulo 2^width. Otherwise, emit code to perform the modulus operation, and
465// call vshl.
466#define ASSEMBLE_SIMD_SHIFT_LEFT(asm_imm, width, sz, dt) \
467 do { \
468 QwNeonRegister dst = i.OutputSimd128Register(); \
469 QwNeonRegister src = i.InputSimd128Register(0); \
470 if (instr->InputAt(1)->IsImmediate()) { \
471 __ asm_imm(dt, dst, src, i.InputInt##width(1)); \
472 } else { \
473 UseScratchRegisterScope temps(masm()); \
474 Simd128Register tmp = temps.AcquireQ(); \
475 Register shift = temps.Acquire(); \
476 constexpr int mask = (1 << width) - 1; \
477 __ and_(shift, i.InputRegister(1), Operand(mask)); \
478 __ vdup(sz, tmp, shift); \
479 __ vshl(dt, dst, src, tmp); \
480 } \
481 } while (0)
482
483// If shift value is an immediate, we can call asm_imm, taking the shift value
484// modulo 2^width. Otherwise, emit code to perform the modulus operation, and
485// call vshl, passing in the negative shift value (treated as a right shift).
486#define ASSEMBLE_SIMD_SHIFT_RIGHT(asm_imm, width, sz, dt) \
487 do { \
488 QwNeonRegister dst = i.OutputSimd128Register(); \
489 QwNeonRegister src = i.InputSimd128Register(0); \
490 if (instr->InputAt(1)->IsImmediate()) { \
491 __ asm_imm(dt, dst, src, i.InputInt##width(1)); \
492 } else { \
493 UseScratchRegisterScope temps(masm()); \
494 Simd128Register tmp = temps.AcquireQ(); \
495 Register shift = temps.Acquire(); \
496 constexpr int mask = (1 << width) - 1; \
497 __ and_(shift, i.InputRegister(1), Operand(mask)); \
498 __ vdup(sz, tmp, shift); \
499 __ vneg(sz, tmp, tmp); \
500 __ vshl(dt, dst, src, tmp); \
501 } \
502 } while (0)
503
508
510 if (frame_access_state()->has_frame()) {
511 __ ldm(ia, fp, {lr, fp});
512 }
514}
515
516namespace {
517
518void FlushPendingPushRegisters(MacroAssembler* masm,
519 FrameAccessState* frame_access_state,
520 ZoneVector<Register>* pending_pushes) {
521 switch (pending_pushes->size()) {
522 case 0:
523 break;
524 case 1:
525 masm->push((*pending_pushes)[0]);
526 break;
527 case 2:
528 masm->Push((*pending_pushes)[0], (*pending_pushes)[1]);
529 break;
530 case 3:
531 masm->Push((*pending_pushes)[0], (*pending_pushes)[1],
532 (*pending_pushes)[2]);
533 break;
534 default:
535 UNREACHABLE();
536 }
537 frame_access_state->IncreaseSPDelta(pending_pushes->size());
538 pending_pushes->clear();
539}
540
541void AdjustStackPointerForTailCall(
542 MacroAssembler* masm, FrameAccessState* state, int new_slot_above_sp,
543 ZoneVector<Register>* pending_pushes = nullptr,
544 bool allow_shrinkage = true) {
545 int current_sp_offset = state->GetSPToFPSlotCount() +
547 int stack_slot_delta = new_slot_above_sp - current_sp_offset;
548 if (stack_slot_delta > 0) {
549 if (pending_pushes != nullptr) {
550 FlushPendingPushRegisters(masm, state, pending_pushes);
551 }
552 masm->AllocateStackSpace(stack_slot_delta * kSystemPointerSize);
553 state->IncreaseSPDelta(stack_slot_delta);
554 } else if (allow_shrinkage && stack_slot_delta < 0) {
555 if (pending_pushes != nullptr) {
556 FlushPendingPushRegisters(masm, state, pending_pushes);
557 }
558 masm->add(sp, sp, Operand(-stack_slot_delta * kSystemPointerSize));
559 state->IncreaseSPDelta(stack_slot_delta);
560 }
561}
562
563#if DEBUG
564bool VerifyOutputOfAtomicPairInstr(ArmOperandConverter* converter,
565 const Instruction* instr, Register low,
566 Register high) {
568 if (instr->OutputCount() == 2) {
569 return (converter->OutputRegister(0) == low &&
570 converter->OutputRegister(1) == high);
571 }
572 if (instr->OutputCount() == 1) {
573 return (converter->OutputRegister(0) == low &&
574 converter->TempRegister(instr->TempCount() - 1) == high) ||
575 (converter->OutputRegister(0) == high &&
576 converter->TempRegister(instr->TempCount() - 1) == low);
577 }
579 return (converter->TempRegister(instr->TempCount() - 2) == low &&
580 converter->TempRegister(instr->TempCount() - 1) == high);
581}
582#endif
583
584} // namespace
585
587 int first_unused_slot_offset) {
590
591 if (!pushes.empty() &&
592 (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
593 first_unused_slot_offset)) {
594 ArmOperandConverter g(this, instr);
595 ZoneVector<Register> pending_pushes(zone());
596 for (auto move : pushes) {
597 LocationOperand destination_location(
598 LocationOperand::cast(move->destination()));
599 InstructionOperand source(move->source());
600 AdjustStackPointerForTailCall(
602 destination_location.index() - pending_pushes.size(),
603 &pending_pushes);
604 // Pushes of non-register data types are not supported.
605 DCHECK(source.IsRegister());
606 LocationOperand source_location(LocationOperand::cast(source));
607 pending_pushes.push_back(source_location.GetRegister());
608 // TODO(arm): We can push more than 3 registers at once. Add support in
609 // the macro-assembler for pushing a list of registers.
610 if (pending_pushes.size() == 3) {
611 FlushPendingPushRegisters(masm(), frame_access_state(),
612 &pending_pushes);
613 }
614 move->Eliminate();
615 }
616 FlushPendingPushRegisters(masm(), frame_access_state(), &pending_pushes);
617 }
618 AdjustStackPointerForTailCall(masm(), frame_access_state(),
619 first_unused_slot_offset, nullptr, false);
620}
621
623 int first_unused_slot_offset) {
624 AdjustStackPointerForTailCall(masm(), frame_access_state(),
625 first_unused_slot_offset);
626}
627
628// Check that {kJavaScriptCallCodeStartRegister} is correct.
631 Register scratch = temps.Acquire();
632 __ ComputeCodeStartAddress(scratch);
634 __ Assert(eq, AbortReason::kWrongFunctionCodeStart);
635}
636
637#ifdef V8_ENABLE_LEAPTIERING
638void CodeGenerator::AssembleDispatchHandleRegisterCheck() {
640}
641#endif // V8_ENABLE_LEAPTIERING
642
643// Check if the code object is marked for deoptimization. If it is, then it
644// jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
645// to:
646// 1. read from memory the word that contains that bit, which can be found in
647// the flags in the referenced {Code} object;
648// 2. test kMarkedForDeoptimizationBit in those flags; and
649// 3. if it is not zero then it jumps to the builtin.
651
652// Assembles an instruction after register allocation, producing machine code.
656
657 __ MaybeCheckConstPool();
658 InstructionCode opcode = instr->opcode();
659 ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
660 switch (arch_opcode) {
661 case kArchCallCodeObject: {
662 if (instr->InputAt(0)->IsImmediate()) {
663 __ Call(i.InputCode(0), RelocInfo::CODE_TARGET);
664 } else {
665 Register reg = i.InputRegister(0);
667 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
669 __ CallCodeObject(reg);
670 }
672 DCHECK_EQ(LeaveCC, i.OutputSBit());
674 break;
675 }
676 case kArchCallBuiltinPointer: {
677 DCHECK(!instr->InputAt(0)->IsImmediate());
678 Register builtin_index = i.InputRegister(0);
679 Register target =
680 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister)
682 : builtin_index;
683 __ CallBuiltinByIndex(builtin_index, target);
686 break;
687 }
688#if V8_ENABLE_WEBASSEMBLY
689 case kArchCallWasmFunction:
690 case kArchCallWasmFunctionIndirect: {
691 if (instr->InputAt(0)->IsImmediate()) {
692 DCHECK_EQ(arch_opcode, kArchCallWasmFunction);
693 Constant constant = i.ToConstant(instr->InputAt(0));
694 Address wasm_code = static_cast<Address>(constant.ToInt32());
695 __ Call(wasm_code, constant.rmode());
696 } else if (arch_opcode == kArchCallWasmFunctionIndirect) {
697 __ CallWasmCodePointer(i.InputRegister(0));
698 } else {
699 __ Call(i.InputRegister(0));
700 }
702 DCHECK_EQ(LeaveCC, i.OutputSBit());
704 break;
705 }
706 case kArchTailCallWasm:
707 case kArchTailCallWasmIndirect: {
708 if (instr->InputAt(0)->IsImmediate()) {
709 DCHECK_EQ(arch_opcode, kArchTailCallWasm);
710 Constant constant = i.ToConstant(instr->InputAt(0));
711 Address wasm_code = static_cast<Address>(constant.ToInt32());
712 __ Jump(wasm_code, constant.rmode());
713 } else if (arch_opcode == kArchTailCallWasmIndirect) {
714 __ CallWasmCodePointer(i.InputRegister(0), CallJumpMode::kTailCall);
715 } else {
716 __ Jump(i.InputRegister(0));
717 }
718 DCHECK_EQ(LeaveCC, i.OutputSBit());
722 break;
723 }
724#endif // V8_ENABLE_WEBASSEMBLY
725 case kArchTailCallCodeObject: {
726 if (instr->InputAt(0)->IsImmediate()) {
727 __ Jump(i.InputCode(0), RelocInfo::CODE_TARGET);
728 } else {
729 Register reg = i.InputRegister(0);
731 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
733 __ JumpCodeObject(reg);
734 }
735 DCHECK_EQ(LeaveCC, i.OutputSBit());
739 break;
740 }
741 case kArchTailCallAddress: {
742 CHECK(!instr->InputAt(0)->IsImmediate());
743 Register reg = i.InputRegister(0);
745 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
747 __ Jump(reg);
751 break;
752 }
753 case kArchCallJSFunction: {
754 Register func = i.InputRegister(0);
755 if (v8_flags.debug_code) {
757 Register scratch = temps.Acquire();
758 // Check the function's context matches the context argument.
759 __ ldr(scratch, FieldMemOperand(func, JSFunction::kContextOffset));
760 __ cmp(cp, scratch);
761 __ Assert(eq, AbortReason::kWrongFunctionContext);
762 }
763 uint32_t num_arguments =
764 i.InputUint32(instr->JSCallArgumentCountInputIndex());
765 __ CallJSFunction(func, num_arguments);
767 DCHECK_EQ(LeaveCC, i.OutputSBit());
769 break;
770 }
771 case kArchPrepareCallCFunction: {
772 int const num_gp_parameters = ParamField::decode(instr->opcode());
773 int const num_fp_parameters = FPParamField::decode(instr->opcode());
774 __ PrepareCallCFunction(num_gp_parameters + num_fp_parameters);
775 // Frame alignment requires using FP-relative frame addressing.
777 break;
778 }
779 case kArchSaveCallerRegisters: {
780 fp_mode_ =
781 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
784 // kReturnRegister0 should have been saved before entering the stub.
785 int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
787 DCHECK_EQ(0, frame_access_state()->sp_delta());
791 break;
792 }
793 case kArchRestoreCallerRegisters: {
795 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
798 // Don't overwrite the returned value.
799 int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
801 DCHECK_EQ(0, frame_access_state()->sp_delta());
804 break;
805 }
806 case kArchPrepareTailCall:
808 break;
809 case kArchCallCFunctionWithFrameState:
810 case kArchCallCFunction: {
811 int const num_parameters = ParamField::decode(instr->opcode()) +
812 FPParamField::decode(instr->opcode());
813 SetIsolateDataSlots set_isolate_data_slots = SetIsolateDataSlots::kYes;
814 Label return_location;
815#if V8_ENABLE_WEBASSEMBLY
816 if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
817 // Put the return address in a stack slot.
818 Register pc_scratch = r5;
819 __ Push(pc_scratch);
820 __ GetLabelAddress(pc_scratch, &return_location);
821 __ str(pc_scratch,
822 MemOperand(fp, WasmExitFrameConstants::kCallingPCOffset));
823 __ Pop(pc_scratch);
824 set_isolate_data_slots = SetIsolateDataSlots::kNo;
825 }
826#endif // V8_ENABLE_WEBASSEMBLY
827 int pc_offset;
828 if (instr->InputAt(0)->IsImmediate()) {
829 ExternalReference ref = i.InputExternalReference(0);
830 pc_offset = __ CallCFunction(ref, num_parameters,
831 set_isolate_data_slots, &return_location);
832 } else {
833 Register func = i.InputRegister(0);
834 pc_offset = __ CallCFunction(func, num_parameters,
835 set_isolate_data_slots, &return_location);
836 }
837 RecordSafepoint(instr->reference_map(), pc_offset);
838
839 bool const needs_frame_state =
840 (arch_opcode == kArchCallCFunctionWithFrameState);
841 if (needs_frame_state) {
843 }
844
846 // Ideally, we should decrement SP delta to match the change of stack
847 // pointer in CallCFunction. However, for certain architectures (e.g.
848 // ARM), there may be more strict alignment requirement, causing old SP
849 // to be saved on the stack. In those cases, we can not calculate the SP
850 // delta statically.
853 // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
854 // Here, we assume the sequence to be:
855 // kArchSaveCallerRegisters;
856 // kArchCallCFunction;
857 // kArchRestoreCallerRegisters;
858 int bytes =
859 __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
861 }
862 break;
863 }
864 case kArchJmp:
865 AssembleArchJump(i.InputRpo(0));
866 DCHECK_EQ(LeaveCC, i.OutputSBit());
867 break;
868 case kArchBinarySearchSwitch:
870 break;
871 case kArchTableSwitch:
873 DCHECK_EQ(LeaveCC, i.OutputSBit());
874 break;
875 case kArchAbortCSADcheck:
876 DCHECK(i.InputRegister(0) == r1);
877 {
878 // We don't actually want to generate a pile of code for this, so just
879 // claim there is a stack frame, without generating one.
881 __ CallBuiltin(Builtin::kAbortCSADcheck);
882 }
883 __ stop();
885 break;
886 case kArchDebugBreak:
887 __ DebugBreak();
888 break;
889 case kArchComment:
890 __ RecordComment(reinterpret_cast<const char*>(i.InputInt32(0)),
892 break;
893 case kArchThrowTerminator:
894 DCHECK_EQ(LeaveCC, i.OutputSBit());
896 break;
897 case kArchNop:
898 // don't emit code for nops.
899 DCHECK_EQ(LeaveCC, i.OutputSBit());
900 break;
901 case kArchDeoptimize: {
902 DeoptimizationExit* exit =
904 __ b(exit->label());
905 break;
906 }
907 case kArchRet:
908 AssembleReturn(instr->InputAt(0));
909 DCHECK_EQ(LeaveCC, i.OutputSBit());
910 break;
911 case kArchFramePointer:
912 __ mov(i.OutputRegister(), fp);
913 DCHECK_EQ(LeaveCC, i.OutputSBit());
914 break;
915 case kArchParentFramePointer:
916 if (frame_access_state()->has_frame()) {
917 __ ldr(i.OutputRegister(), MemOperand(fp, 0));
918 } else {
919 __ mov(i.OutputRegister(), fp);
920 }
921 break;
922#if V8_ENABLE_WEBASSEMBLY
923 case kArchStackPointer:
924 // The register allocator expects an allocatable register for the output,
925 // we cannot use sp directly.
926 __ mov(i.OutputRegister(), sp);
927 break;
928 case kArchSetStackPointer:
929 DCHECK(instr->InputAt(0)->IsRegister());
930 __ mov(sp, i.InputRegister(0));
931 break;
932#endif // V8_ENABLE_WEBASSEMBLY
933 case kArchStackPointerGreaterThan: {
934 // Potentially apply an offset to the current stack pointer before the
935 // comparison to consider the size difference of an optimized frame versus
936 // the contained unoptimized frames.
937
938 Register lhs_register = sp;
939 uint32_t offset;
940
942 lhs_register = i.TempRegister(0);
943 __ sub(lhs_register, sp, Operand(offset));
944 }
945
946 constexpr size_t kValueIndex = 0;
947 DCHECK(instr->InputAt(kValueIndex)->IsRegister());
948 __ cmp(lhs_register, i.InputRegister(kValueIndex));
949 break;
950 }
951 case kArchStackCheckOffset:
952 __ Move(i.OutputRegister(), Smi::FromInt(GetStackCheckOffset()));
953 break;
954 case kArchTruncateDoubleToI:
955 __ TruncateDoubleToI(isolate(), zone(), i.OutputRegister(),
956 i.InputDoubleRegister(0), DetermineStubCallMode());
957 DCHECK_EQ(LeaveCC, i.OutputSBit());
958 break;
959 case kArchStoreWithWriteBarrier: // Fall through.
960 case kArchAtomicStoreWithWriteBarrier: {
962 if (arch_opcode == kArchStoreWithWriteBarrier) {
963 mode = RecordWriteModeField::decode(instr->opcode());
964 } else {
966 }
967 Register object = i.InputRegister(0);
968 Register value = i.InputRegister(2);
969
970 if (v8_flags.debug_code) {
971 // Checking that |value| is not a cleared weakref: our write barrier
972 // does not support that for now.
974 __ Check(ne, AbortReason::kOperandIsCleared);
975 }
976
977 AddressingMode addressing_mode =
979 Operand offset(0);
980
981 if (arch_opcode == kArchAtomicStoreWithWriteBarrier) {
982 __ dmb(ISH);
983 }
984 if (addressing_mode == kMode_Offset_RI) {
985 int32_t immediate = i.InputInt32(1);
986 offset = Operand(immediate);
987 __ str(value, MemOperand(object, immediate));
988 } else {
989 DCHECK_EQ(kMode_Offset_RR, addressing_mode);
990 Register reg = i.InputRegister(1);
991 offset = Operand(reg);
992 __ str(value, MemOperand(object, reg));
993 }
994 if (arch_opcode == kArchAtomicStoreWithWriteBarrier &&
997 __ dmb(ISH);
998 }
999
1000 auto ool = zone()->New<OutOfLineRecordWrite>(
1001 this, object, offset, value, mode, DetermineStubCallMode(),
1004 __ JumpIfSmi(value, ool->exit());
1005 }
1007 ne, ool->entry());
1008 __ bind(ool->exit());
1009 break;
1010 }
1011 case kArchStoreIndirectWithWriteBarrier:
1012 UNREACHABLE();
1013 case kArchStackSlot: {
1015 frame_access_state()->GetFrameOffset(i.InputInt32(0));
1016 Register base = offset.from_stack_pointer() ? sp : fp;
1017 __ add(i.OutputRegister(0), base, Operand(offset.offset()));
1018 break;
1019 }
1020 case kIeee754Float64Acos:
1022 break;
1023 case kIeee754Float64Acosh:
1024 ASSEMBLE_IEEE754_UNOP(acosh);
1025 break;
1026 case kIeee754Float64Asin:
1028 break;
1029 case kIeee754Float64Asinh:
1030 ASSEMBLE_IEEE754_UNOP(asinh);
1031 break;
1032 case kIeee754Float64Atan:
1034 break;
1035 case kIeee754Float64Atanh:
1036 ASSEMBLE_IEEE754_UNOP(atanh);
1037 break;
1038 case kIeee754Float64Atan2:
1040 break;
1041 case kIeee754Float64Cbrt:
1043 break;
1044 case kIeee754Float64Cos:
1046 break;
1047 case kIeee754Float64Cosh:
1049 break;
1050 case kIeee754Float64Exp:
1052 break;
1053 case kIeee754Float64Expm1:
1054 ASSEMBLE_IEEE754_UNOP(expm1);
1055 break;
1056 case kIeee754Float64Log:
1058 break;
1059 case kIeee754Float64Log1p:
1060 ASSEMBLE_IEEE754_UNOP(log1p);
1061 break;
1062 case kIeee754Float64Log2:
1064 break;
1065 case kIeee754Float64Log10:
1066 ASSEMBLE_IEEE754_UNOP(log10);
1067 break;
1068 case kIeee754Float64Pow:
1070 break;
1071 case kIeee754Float64Sin:
1073 break;
1074 case kIeee754Float64Sinh:
1076 break;
1077 case kIeee754Float64Tan:
1079 break;
1080 case kIeee754Float64Tanh:
1082 break;
1083 case kArmAdd:
1084 __ add(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1085 i.OutputSBit());
1086 break;
1087 case kArmAnd:
1088 __ and_(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1089 i.OutputSBit());
1090 break;
1091 case kArmBic:
1092 __ bic(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1093 i.OutputSBit());
1094 break;
1095 case kArmMul:
1096 __ mul(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1097 i.OutputSBit());
1098 break;
1099 case kArmMla:
1100 __ mla(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1101 i.InputRegister(2), i.OutputSBit());
1102 break;
1103 case kArmMls: {
1104 CpuFeatureScope scope(masm(), ARMv7);
1105 __ mls(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1106 i.InputRegister(2));
1107 DCHECK_EQ(LeaveCC, i.OutputSBit());
1108 break;
1109 }
1110 case kArmSmull:
1111 __ smull(i.OutputRegister(0), i.OutputRegister(1), i.InputRegister(0),
1112 i.InputRegister(1));
1113 break;
1114 case kArmSmmul:
1115 __ smmul(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1116 DCHECK_EQ(LeaveCC, i.OutputSBit());
1117 break;
1118 case kArmSmmla:
1119 __ smmla(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1120 i.InputRegister(2));
1121 DCHECK_EQ(LeaveCC, i.OutputSBit());
1122 break;
1123 case kArmUmull:
1124 __ umull(i.OutputRegister(0), i.OutputRegister(1), i.InputRegister(0),
1125 i.InputRegister(1), i.OutputSBit());
1126 break;
1127 case kArmSdiv: {
1128 CpuFeatureScope scope(masm(), SUDIV);
1129 __ sdiv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1130 DCHECK_EQ(LeaveCC, i.OutputSBit());
1131 break;
1132 }
1133 case kArmUdiv: {
1134 CpuFeatureScope scope(masm(), SUDIV);
1135 __ udiv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1136 DCHECK_EQ(LeaveCC, i.OutputSBit());
1137 break;
1138 }
1139 case kArmMov:
1140 __ Move(i.OutputRegister(), i.InputOperand2(0), i.OutputSBit());
1141 break;
1142 case kArmMvn:
1143 __ mvn(i.OutputRegister(), i.InputOperand2(0), i.OutputSBit());
1144 break;
1145 case kArmOrr:
1146 __ orr(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1147 i.OutputSBit());
1148 break;
1149 case kArmEor:
1150 __ eor(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1151 i.OutputSBit());
1152 break;
1153 case kArmSub:
1154 __ sub(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1155 i.OutputSBit());
1156 break;
1157 case kArmRsb:
1158 __ rsb(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1159 i.OutputSBit());
1160 break;
1161 case kArmBfc: {
1162 CpuFeatureScope scope(masm(), ARMv7);
1163 __ bfc(i.OutputRegister(), i.InputInt8(1), i.InputInt8(2));
1164 DCHECK_EQ(LeaveCC, i.OutputSBit());
1165 break;
1166 }
1167 case kArmUbfx: {
1168 CpuFeatureScope scope(masm(), ARMv7);
1169 __ ubfx(i.OutputRegister(), i.InputRegister(0), i.InputInt8(1),
1170 i.InputInt8(2));
1171 DCHECK_EQ(LeaveCC, i.OutputSBit());
1172 break;
1173 }
1174 case kArmSbfx: {
1175 CpuFeatureScope scope(masm(), ARMv7);
1176 __ sbfx(i.OutputRegister(), i.InputRegister(0), i.InputInt8(1),
1177 i.InputInt8(2));
1178 DCHECK_EQ(LeaveCC, i.OutputSBit());
1179 break;
1180 }
1181 case kArmSxtb:
1182 __ sxtb(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1));
1183 DCHECK_EQ(LeaveCC, i.OutputSBit());
1184 break;
1185 case kArmSxth:
1186 __ sxth(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1));
1187 DCHECK_EQ(LeaveCC, i.OutputSBit());
1188 break;
1189 case kArmSxtab:
1190 __ sxtab(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1191 i.InputInt32(2));
1192 DCHECK_EQ(LeaveCC, i.OutputSBit());
1193 break;
1194 case kArmSxtah:
1195 __ sxtah(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1196 i.InputInt32(2));
1197 DCHECK_EQ(LeaveCC, i.OutputSBit());
1198 break;
1199 case kArmUxtb:
1200 __ uxtb(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1));
1201 DCHECK_EQ(LeaveCC, i.OutputSBit());
1202 break;
1203 case kArmUxth:
1204 __ uxth(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1));
1205 DCHECK_EQ(LeaveCC, i.OutputSBit());
1206 break;
1207 case kArmUxtab:
1208 __ uxtab(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1209 i.InputInt32(2));
1210 DCHECK_EQ(LeaveCC, i.OutputSBit());
1211 break;
1212 case kArmUxtah:
1213 __ uxtah(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1214 i.InputInt32(2));
1215 DCHECK_EQ(LeaveCC, i.OutputSBit());
1216 break;
1217 case kArmRbit: {
1218 CpuFeatureScope scope(masm(), ARMv7);
1219 __ rbit(i.OutputRegister(), i.InputRegister(0));
1220 DCHECK_EQ(LeaveCC, i.OutputSBit());
1221 break;
1222 }
1223 case kArmRev:
1224 __ rev(i.OutputRegister(), i.InputRegister(0));
1225 DCHECK_EQ(LeaveCC, i.OutputSBit());
1226 break;
1227 case kArmClz:
1228 __ clz(i.OutputRegister(), i.InputRegister(0));
1229 DCHECK_EQ(LeaveCC, i.OutputSBit());
1230 break;
1231 case kArmCmp:
1232 __ cmp(i.InputRegister(0), i.InputOperand2(1));
1233 DCHECK_EQ(SetCC, i.OutputSBit());
1234 break;
1235 case kArmCmn:
1236 __ cmn(i.InputRegister(0), i.InputOperand2(1));
1237 DCHECK_EQ(SetCC, i.OutputSBit());
1238 break;
1239 case kArmTst:
1240 __ tst(i.InputRegister(0), i.InputOperand2(1));
1241 DCHECK_EQ(SetCC, i.OutputSBit());
1242 break;
1243 case kArmTeq:
1244 __ teq(i.InputRegister(0), i.InputOperand2(1));
1245 DCHECK_EQ(SetCC, i.OutputSBit());
1246 break;
1247 case kArmAddPair:
1248 // i.InputRegister(0) ... left low word.
1249 // i.InputRegister(1) ... left high word.
1250 // i.InputRegister(2) ... right low word.
1251 // i.InputRegister(3) ... right high word.
1252 __ add(i.OutputRegister(0), i.InputRegister(0), i.InputRegister(2),
1253 SetCC);
1254 __ adc(i.OutputRegister(1), i.InputRegister(1),
1255 Operand(i.InputRegister(3)));
1256 DCHECK_EQ(LeaveCC, i.OutputSBit());
1257 break;
1258 case kArmSubPair:
1259 // i.InputRegister(0) ... left low word.
1260 // i.InputRegister(1) ... left high word.
1261 // i.InputRegister(2) ... right low word.
1262 // i.InputRegister(3) ... right high word.
1263 __ sub(i.OutputRegister(0), i.InputRegister(0), i.InputRegister(2),
1264 SetCC);
1265 __ sbc(i.OutputRegister(1), i.InputRegister(1),
1266 Operand(i.InputRegister(3)));
1267 DCHECK_EQ(LeaveCC, i.OutputSBit());
1268 break;
1269 case kArmMulPair:
1270 // i.InputRegister(0) ... left low word.
1271 // i.InputRegister(1) ... left high word.
1272 // i.InputRegister(2) ... right low word.
1273 // i.InputRegister(3) ... right high word.
1274 __ umull(i.OutputRegister(0), i.OutputRegister(1), i.InputRegister(0),
1275 i.InputRegister(2));
1276 __ mla(i.OutputRegister(1), i.InputRegister(0), i.InputRegister(3),
1277 i.OutputRegister(1));
1278 __ mla(i.OutputRegister(1), i.InputRegister(2), i.InputRegister(1),
1279 i.OutputRegister(1));
1280 break;
1281 case kArmLslPair: {
1282 Register second_output =
1283 instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0);
1284 if (instr->InputAt(2)->IsImmediate()) {
1285 __ LslPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1286 i.InputRegister(1), i.InputInt32(2));
1287 } else {
1288 __ LslPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1289 i.InputRegister(1), i.InputRegister(2));
1290 }
1291 break;
1292 }
1293 case kArmLsrPair: {
1294 Register second_output =
1295 instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0);
1296 if (instr->InputAt(2)->IsImmediate()) {
1297 __ LsrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1298 i.InputRegister(1), i.InputInt32(2));
1299 } else {
1300 __ LsrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1301 i.InputRegister(1), i.InputRegister(2));
1302 }
1303 break;
1304 }
1305 case kArmAsrPair: {
1306 Register second_output =
1307 instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0);
1308 if (instr->InputAt(2)->IsImmediate()) {
1309 __ AsrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1310 i.InputRegister(1), i.InputInt32(2));
1311 } else {
1312 __ AsrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1313 i.InputRegister(1), i.InputRegister(2));
1314 }
1315 break;
1316 }
1317 case kArmVcmpF32:
1318 if (instr->InputAt(1)->IsFPRegister()) {
1319 __ VFPCompareAndSetFlags(i.InputFloatRegister(0),
1320 i.InputFloatRegister(1));
1321 } else {
1322 DCHECK(instr->InputAt(1)->IsImmediate());
1323 // 0.0 is the only immediate supported by vcmp instructions.
1324 DCHECK_EQ(0.0f, i.InputFloat32(1));
1325 __ VFPCompareAndSetFlags(i.InputFloatRegister(0), i.InputFloat32(1));
1326 }
1327 DCHECK_EQ(SetCC, i.OutputSBit());
1328 break;
1329 case kArmVaddF32:
1330 __ vadd(i.OutputFloatRegister(), i.InputFloatRegister(0),
1331 i.InputFloatRegister(1));
1332 DCHECK_EQ(LeaveCC, i.OutputSBit());
1333 break;
1334 case kArmVsubF32:
1335 __ vsub(i.OutputFloatRegister(), i.InputFloatRegister(0),
1336 i.InputFloatRegister(1));
1337 DCHECK_EQ(LeaveCC, i.OutputSBit());
1338 break;
1339 case kArmVmulF32:
1340 __ vmul(i.OutputFloatRegister(), i.InputFloatRegister(0),
1341 i.InputFloatRegister(1));
1342 DCHECK_EQ(LeaveCC, i.OutputSBit());
1343 break;
1344 case kArmVmlaF32:
1345 __ vmla(i.OutputFloatRegister(), i.InputFloatRegister(1),
1346 i.InputFloatRegister(2));
1347 DCHECK_EQ(LeaveCC, i.OutputSBit());
1348 break;
1349 case kArmVmlsF32:
1350 __ vmls(i.OutputFloatRegister(), i.InputFloatRegister(1),
1351 i.InputFloatRegister(2));
1352 DCHECK_EQ(LeaveCC, i.OutputSBit());
1353 break;
1354 case kArmVdivF32:
1355 __ vdiv(i.OutputFloatRegister(), i.InputFloatRegister(0),
1356 i.InputFloatRegister(1));
1357 DCHECK_EQ(LeaveCC, i.OutputSBit());
1358 break;
1359 case kArmVsqrtF32:
1360 __ vsqrt(i.OutputFloatRegister(), i.InputFloatRegister(0));
1361 break;
1362 case kArmVabsF32:
1363 __ vabs(i.OutputFloatRegister(), i.InputFloatRegister(0));
1364 break;
1365 case kArmVnegF32:
1366 __ vneg(i.OutputFloatRegister(), i.InputFloatRegister(0));
1367 break;
1368 case kArmVcmpF64:
1369 if (instr->InputAt(1)->IsFPRegister()) {
1370 __ VFPCompareAndSetFlags(i.InputDoubleRegister(0),
1371 i.InputDoubleRegister(1));
1372 } else {
1373 DCHECK(instr->InputAt(1)->IsImmediate());
1374 // 0.0 is the only immediate supported by vcmp instructions.
1375 DCHECK_EQ(0.0, i.InputDouble(1));
1376 __ VFPCompareAndSetFlags(i.InputDoubleRegister(0), i.InputDouble(1));
1377 }
1378 DCHECK_EQ(SetCC, i.OutputSBit());
1379 break;
1380 case kArmVaddF64:
1381 __ vadd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1382 i.InputDoubleRegister(1));
1383 DCHECK_EQ(LeaveCC, i.OutputSBit());
1384 break;
1385 case kArmVsubF64:
1386 __ vsub(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1387 i.InputDoubleRegister(1));
1388 DCHECK_EQ(LeaveCC, i.OutputSBit());
1389 break;
1390 case kArmVmulF64:
1391 __ vmul(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1392 i.InputDoubleRegister(1));
1393 DCHECK_EQ(LeaveCC, i.OutputSBit());
1394 break;
1395 case kArmVmlaF64:
1396 __ vmla(i.OutputDoubleRegister(), i.InputDoubleRegister(1),
1397 i.InputDoubleRegister(2));
1398 DCHECK_EQ(LeaveCC, i.OutputSBit());
1399 break;
1400 case kArmVmlsF64:
1401 __ vmls(i.OutputDoubleRegister(), i.InputDoubleRegister(1),
1402 i.InputDoubleRegister(2));
1403 DCHECK_EQ(LeaveCC, i.OutputSBit());
1404 break;
1405 case kArmVdivF64:
1406 __ vdiv(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1407 i.InputDoubleRegister(1));
1408 DCHECK_EQ(LeaveCC, i.OutputSBit());
1409 break;
1410 case kArmVmodF64: {
1411 // TODO(bmeurer): We should really get rid of this special instruction,
1412 // and generate a CallAddress instruction instead.
1414 __ PrepareCallCFunction(0, 2);
1415 __ MovToFloatParameters(i.InputDoubleRegister(0),
1416 i.InputDoubleRegister(1));
1417 __ CallCFunction(ExternalReference::mod_two_doubles_operation(), 0, 2);
1418 // Move the result in the double result register.
1419 __ MovFromFloatResult(i.OutputDoubleRegister());
1420 DCHECK_EQ(LeaveCC, i.OutputSBit());
1421 break;
1422 }
1423 case kArmVsqrtF64:
1424 __ vsqrt(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1425 break;
1426 case kArmVabsF64:
1427 __ vabs(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1428 break;
1429 case kArmVnegF64:
1430 __ vneg(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1431 break;
1432 case kArmVrintmF32: {
1433 CpuFeatureScope scope(masm(), ARMv8);
1434 if (instr->InputAt(0)->IsSimd128Register()) {
1435 __ vrintm(NeonS32, i.OutputSimd128Register(),
1436 i.InputSimd128Register(0));
1437 } else {
1438 __ vrintm(i.OutputFloatRegister(), i.InputFloatRegister(0));
1439 }
1440 break;
1441 }
1442 case kArmVrintmF64: {
1443 CpuFeatureScope scope(masm(), ARMv8);
1444 __ vrintm(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1445 break;
1446 }
1447 case kArmVrintpF32: {
1448 CpuFeatureScope scope(masm(), ARMv8);
1449 if (instr->InputAt(0)->IsSimd128Register()) {
1450 __ vrintp(NeonS32, i.OutputSimd128Register(),
1451 i.InputSimd128Register(0));
1452 } else {
1453 __ vrintp(i.OutputFloatRegister(), i.InputFloatRegister(0));
1454 }
1455 break;
1456 }
1457 case kArmVrintpF64: {
1458 CpuFeatureScope scope(masm(), ARMv8);
1459 __ vrintp(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1460 break;
1461 }
1462 case kArmVrintzF32: {
1463 CpuFeatureScope scope(masm(), ARMv8);
1464 if (instr->InputAt(0)->IsSimd128Register()) {
1465 __ vrintz(NeonS32, i.OutputSimd128Register(),
1466 i.InputSimd128Register(0));
1467 } else {
1468 __ vrintz(i.OutputFloatRegister(), i.InputFloatRegister(0));
1469 }
1470 break;
1471 }
1472 case kArmVrintzF64: {
1473 CpuFeatureScope scope(masm(), ARMv8);
1474 __ vrintz(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1475 break;
1476 }
1477 case kArmVrintaF64: {
1478 CpuFeatureScope scope(masm(), ARMv8);
1479 __ vrinta(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1480 break;
1481 }
1482 case kArmVrintnF32: {
1483 CpuFeatureScope scope(masm(), ARMv8);
1484 if (instr->InputAt(0)->IsSimd128Register()) {
1485 __ vrintn(NeonS32, i.OutputSimd128Register(),
1486 i.InputSimd128Register(0));
1487 } else {
1488 __ vrintn(i.OutputFloatRegister(), i.InputFloatRegister(0));
1489 }
1490 break;
1491 }
1492 case kArmVrintnF64: {
1493 CpuFeatureScope scope(masm(), ARMv8);
1494 __ vrintn(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1495 break;
1496 }
1497 case kArmVcvtF32F64: {
1498 __ vcvt_f32_f64(i.OutputFloatRegister(), i.InputDoubleRegister(0));
1499 DCHECK_EQ(LeaveCC, i.OutputSBit());
1500 break;
1501 }
1502 case kArmVcvtF64F32: {
1503 __ vcvt_f64_f32(i.OutputDoubleRegister(), i.InputFloatRegister(0));
1504 DCHECK_EQ(LeaveCC, i.OutputSBit());
1505 break;
1506 }
1507 case kArmVcvtF32S32: {
1509 SwVfpRegister scratch = temps.AcquireS();
1510 __ vmov(scratch, i.InputRegister(0));
1511 __ vcvt_f32_s32(i.OutputFloatRegister(), scratch);
1512 DCHECK_EQ(LeaveCC, i.OutputSBit());
1513 break;
1514 }
1515 case kArmVcvtF32U32: {
1517 SwVfpRegister scratch = temps.AcquireS();
1518 __ vmov(scratch, i.InputRegister(0));
1519 __ vcvt_f32_u32(i.OutputFloatRegister(), scratch);
1520 DCHECK_EQ(LeaveCC, i.OutputSBit());
1521 break;
1522 }
1523 case kArmVcvtF64S32: {
1525 SwVfpRegister scratch = temps.AcquireS();
1526 __ vmov(scratch, i.InputRegister(0));
1527 __ vcvt_f64_s32(i.OutputDoubleRegister(), scratch);
1528 DCHECK_EQ(LeaveCC, i.OutputSBit());
1529 break;
1530 }
1531 case kArmVcvtF64U32: {
1533 SwVfpRegister scratch = temps.AcquireS();
1534 __ vmov(scratch, i.InputRegister(0));
1535 __ vcvt_f64_u32(i.OutputDoubleRegister(), scratch);
1536 DCHECK_EQ(LeaveCC, i.OutputSBit());
1537 break;
1538 }
1539 case kArmVcvtS32F32: {
1541 SwVfpRegister scratch = temps.AcquireS();
1542 __ vcvt_s32_f32(scratch, i.InputFloatRegister(0));
1543 __ vmov(i.OutputRegister(), scratch);
1544 bool set_overflow_to_min_i32 = MiscField::decode(instr->opcode());
1545 if (set_overflow_to_min_i32) {
1546 // Avoid INT32_MAX as an overflow indicator and use INT32_MIN instead,
1547 // because INT32_MIN allows easier out-of-bounds detection.
1548 __ cmn(i.OutputRegister(), Operand(1));
1549 __ mov(i.OutputRegister(), Operand(INT32_MIN), LeaveCC, vs);
1550 }
1551 DCHECK_EQ(LeaveCC, i.OutputSBit());
1552 break;
1553 }
1554 case kArmVcvtU32F32: {
1556 SwVfpRegister scratch = temps.AcquireS();
1557 __ vcvt_u32_f32(scratch, i.InputFloatRegister(0));
1558 __ vmov(i.OutputRegister(), scratch);
1559 bool set_overflow_to_min_u32 = MiscField::decode(instr->opcode());
1560 if (set_overflow_to_min_u32) {
1561 // Avoid UINT32_MAX as an overflow indicator and use 0 instead,
1562 // because 0 allows easier out-of-bounds detection.
1563 __ cmn(i.OutputRegister(), Operand(1));
1564 __ adc(i.OutputRegister(), i.OutputRegister(), Operand::Zero());
1565 }
1566 DCHECK_EQ(LeaveCC, i.OutputSBit());
1567 break;
1568 }
1569 case kArmVcvtS32F64: {
1571 SwVfpRegister scratch = temps.AcquireS();
1572 __ vcvt_s32_f64(scratch, i.InputDoubleRegister(0));
1573 __ vmov(i.OutputRegister(), scratch);
1574 DCHECK_EQ(LeaveCC, i.OutputSBit());
1575 break;
1576 }
1577 case kArmVcvtU32F64: {
1579 SwVfpRegister scratch = temps.AcquireS();
1580 __ vcvt_u32_f64(scratch, i.InputDoubleRegister(0));
1581 __ vmov(i.OutputRegister(), scratch);
1582 DCHECK_EQ(LeaveCC, i.OutputSBit());
1583 break;
1584 }
1585 case kArmVmovU32F32:
1586 __ vmov(i.OutputRegister(), i.InputFloatRegister(0));
1587 DCHECK_EQ(LeaveCC, i.OutputSBit());
1588 break;
1589 case kArmVmovF32U32:
1590 __ vmov(i.OutputFloatRegister(), i.InputRegister(0));
1591 DCHECK_EQ(LeaveCC, i.OutputSBit());
1592 break;
1593 case kArmVmovLowU32F64:
1594 __ VmovLow(i.OutputRegister(), i.InputDoubleRegister(0));
1595 DCHECK_EQ(LeaveCC, i.OutputSBit());
1596 break;
1597 case kArmVmovLowF64U32:
1598 __ VmovLow(i.OutputDoubleRegister(), i.InputRegister(1));
1599 DCHECK_EQ(LeaveCC, i.OutputSBit());
1600 break;
1601 case kArmVmovHighU32F64:
1602 __ VmovHigh(i.OutputRegister(), i.InputDoubleRegister(0));
1603 DCHECK_EQ(LeaveCC, i.OutputSBit());
1604 break;
1605 case kArmVmovHighF64U32:
1606 __ VmovHigh(i.OutputDoubleRegister(), i.InputRegister(1));
1607 DCHECK_EQ(LeaveCC, i.OutputSBit());
1608 break;
1609 case kArmVmovF64U32U32:
1610 __ vmov(i.OutputDoubleRegister(), i.InputRegister(0), i.InputRegister(1));
1611 DCHECK_EQ(LeaveCC, i.OutputSBit());
1612 break;
1613 case kArmVmovU32U32F64:
1614 __ vmov(i.OutputRegister(0), i.OutputRegister(1),
1615 i.InputDoubleRegister(0));
1616 DCHECK_EQ(LeaveCC, i.OutputSBit());
1617 break;
1618 case kArmVcnt: {
1619 __ vcnt(i.OutputSimd128Register(), i.InputSimd128Register(0));
1620 break;
1621 }
1622 case kArmLdrb:
1623 __ ldrb(i.OutputRegister(), i.InputOffset());
1624 DCHECK_EQ(LeaveCC, i.OutputSBit());
1625 break;
1626 case kArmLdrsb:
1627 __ ldrsb(i.OutputRegister(), i.InputOffset());
1628 DCHECK_EQ(LeaveCC, i.OutputSBit());
1629 break;
1630 case kArmStrb:
1631 __ strb(i.InputRegister(0), i.InputOffset(1));
1632 DCHECK_EQ(LeaveCC, i.OutputSBit());
1633 break;
1634 case kArmLdrh:
1635 __ ldrh(i.OutputRegister(), i.InputOffset());
1636 break;
1637 case kArmLdrsh:
1638 __ ldrsh(i.OutputRegister(), i.InputOffset());
1639 break;
1640 case kArmStrh:
1641 __ strh(i.InputRegister(0), i.InputOffset(1));
1642 DCHECK_EQ(LeaveCC, i.OutputSBit());
1643 break;
1644 case kArmLdr:
1645 __ ldr(i.OutputRegister(), i.InputOffset());
1646 break;
1647 case kArmStr:
1648 __ str(i.InputRegister(0), i.InputOffset(1));
1649 DCHECK_EQ(LeaveCC, i.OutputSBit());
1650 break;
1651 case kArmVldrF32: {
1652 __ vldr(i.OutputFloatRegister(), i.InputOffset());
1653 DCHECK_EQ(LeaveCC, i.OutputSBit());
1654 break;
1655 }
1656 case kArmVstrF32:
1657 __ vstr(i.InputFloatRegister(0), i.InputOffset(1));
1658 DCHECK_EQ(LeaveCC, i.OutputSBit());
1659 break;
1660 case kArmVld1F64: {
1661 __ vld1(Neon8, NeonListOperand(i.OutputDoubleRegister()),
1662 i.NeonInputOperand(0));
1663 break;
1664 }
1665 case kArmVst1F64: {
1666 __ vst1(Neon8, NeonListOperand(i.InputDoubleRegister(0)),
1667 i.NeonInputOperand(1));
1668 break;
1669 }
1670 case kArmVld1S128: {
1671 __ vld1(Neon8, NeonListOperand(i.OutputSimd128Register()),
1672 i.NeonInputOperand(0));
1673 break;
1674 }
1675 case kArmVst1S128: {
1676 __ vst1(Neon8, NeonListOperand(i.InputSimd128Register(0)),
1677 i.NeonInputOperand(1));
1678 break;
1679 }
1680 case kArmVldrF64: {
1681 __ vldr(i.OutputDoubleRegister(), i.InputOffset());
1682 DCHECK_EQ(LeaveCC, i.OutputSBit());
1683 break;
1684 }
1685 case kArmVstrF64:
1686 __ vstr(i.InputDoubleRegister(0), i.InputOffset(1));
1687 DCHECK_EQ(LeaveCC, i.OutputSBit());
1688 break;
1689 case kArmFloat32Max: {
1690 SwVfpRegister result = i.OutputFloatRegister();
1691 SwVfpRegister left = i.InputFloatRegister(0);
1692 SwVfpRegister right = i.InputFloatRegister(1);
1693 if (left == right) {
1694 __ Move(result, left);
1695 } else {
1696 auto ool = zone()->New<OutOfLineFloat32Max>(this, result, left, right);
1697 __ FloatMax(result, left, right, ool->entry());
1698 __ bind(ool->exit());
1699 }
1700 DCHECK_EQ(LeaveCC, i.OutputSBit());
1701 break;
1702 }
1703 case kArmFloat64Max: {
1704 DwVfpRegister result = i.OutputDoubleRegister();
1705 DwVfpRegister left = i.InputDoubleRegister(0);
1706 DwVfpRegister right = i.InputDoubleRegister(1);
1707 if (left == right) {
1708 __ Move(result, left);
1709 } else {
1710 auto ool = zone()->New<OutOfLineFloat64Max>(this, result, left, right);
1711 __ FloatMax(result, left, right, ool->entry());
1712 __ bind(ool->exit());
1713 }
1714 DCHECK_EQ(LeaveCC, i.OutputSBit());
1715 break;
1716 }
1717 case kArmFloat32Min: {
1718 SwVfpRegister result = i.OutputFloatRegister();
1719 SwVfpRegister left = i.InputFloatRegister(0);
1720 SwVfpRegister right = i.InputFloatRegister(1);
1721 if (left == right) {
1722 __ Move(result, left);
1723 } else {
1724 auto ool = zone()->New<OutOfLineFloat32Min>(this, result, left, right);
1725 __ FloatMin(result, left, right, ool->entry());
1726 __ bind(ool->exit());
1727 }
1728 DCHECK_EQ(LeaveCC, i.OutputSBit());
1729 break;
1730 }
1731 case kArmFloat64Min: {
1732 DwVfpRegister result = i.OutputDoubleRegister();
1733 DwVfpRegister left = i.InputDoubleRegister(0);
1734 DwVfpRegister right = i.InputDoubleRegister(1);
1735 if (left == right) {
1736 __ Move(result, left);
1737 } else {
1738 auto ool = zone()->New<OutOfLineFloat64Min>(this, result, left, right);
1739 __ FloatMin(result, left, right, ool->entry());
1740 __ bind(ool->exit());
1741 }
1742 DCHECK_EQ(LeaveCC, i.OutputSBit());
1743 break;
1744 }
1745 case kArmFloat64SilenceNaN: {
1746 DwVfpRegister value = i.InputDoubleRegister(0);
1747 DwVfpRegister result = i.OutputDoubleRegister();
1748 __ VFPCanonicalizeNaN(result, value);
1749 break;
1750 }
1751 case kArmPush: {
1752 int stack_decrement = i.InputInt32(0);
1753 int slots = stack_decrement / kSystemPointerSize;
1754 LocationOperand* op = LocationOperand::cast(instr->InputAt(1));
1756 int pushed_slots = ElementSizeInPointers(rep);
1757 // Slot-sized arguments are never padded but there may be a gap if
1758 // the slot allocator reclaimed other padding slots. Adjust the stack
1759 // here to skip any gap.
1760 __ AllocateStackSpace((slots - pushed_slots) * kSystemPointerSize);
1761 switch (rep) {
1763 __ vpush(i.InputFloatRegister(1));
1764 break;
1766 __ vpush(i.InputDoubleRegister(1));
1767 break;
1769 __ vpush(i.InputSimd128Register(1));
1770 break;
1771 default:
1772 __ push(i.InputRegister(1));
1773 break;
1774 }
1776 DCHECK_EQ(LeaveCC, i.OutputSBit());
1777 break;
1778 }
1779 case kArmPoke: {
1780 int const slot = MiscField::decode(instr->opcode());
1781 __ str(i.InputRegister(0), MemOperand(sp, slot * kSystemPointerSize));
1782 DCHECK_EQ(LeaveCC, i.OutputSBit());
1783 break;
1784 }
1785 case kArmPeek: {
1786 int reverse_slot = i.InputInt32(0);
1787 int offset =
1788 FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
1789 if (instr->OutputAt(0)->IsFPRegister()) {
1790 LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
1792 __ vldr(i.OutputDoubleRegister(), MemOperand(fp, offset));
1793 } else if (op->representation() == MachineRepresentation::kFloat32) {
1794 __ vldr(i.OutputFloatRegister(), MemOperand(fp, offset));
1795 } else {
1798 Register scratch = temps.Acquire();
1799 __ add(scratch, fp, Operand(offset));
1800 __ vld1(Neon8, NeonListOperand(i.OutputSimd128Register()),
1801 NeonMemOperand(scratch));
1802 }
1803 } else {
1804 __ ldr(i.OutputRegister(), MemOperand(fp, offset));
1805 }
1806 break;
1807 }
1808 case kArmDmbIsh: {
1809 __ dmb(ISH);
1810 break;
1811 }
1812 case kArmDsbIsb: {
1813 __ dsb(SY);
1814 __ isb(SY);
1815 break;
1816 }
1817 case kArmVmullLow: {
1818 auto dt = static_cast<NeonDataType>(MiscField::decode(instr->opcode()));
1819 __ vmull(dt, i.OutputSimd128Register(), i.InputSimd128Register(0).low(),
1820 i.InputSimd128Register(1).low());
1821 break;
1822 }
1823 case kArmVmullHigh: {
1824 auto dt = static_cast<NeonDataType>(MiscField::decode(instr->opcode()));
1825 __ vmull(dt, i.OutputSimd128Register(), i.InputSimd128Register(0).high(),
1826 i.InputSimd128Register(1).high());
1827 break;
1828 }
1829 case kArmVpadal: {
1830 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1831 auto dt = static_cast<NeonDataType>(MiscField::decode(instr->opcode()));
1832 __ vpadal(dt, i.OutputSimd128Register(), i.InputSimd128Register(1));
1833 break;
1834 }
1835 case kArmVpaddl: {
1836 auto dt = static_cast<NeonDataType>(MiscField::decode(instr->opcode()));
1837 __ vpaddl(dt, i.OutputSimd128Register(), i.InputSimd128Register(0));
1838 break;
1839 }
1840 case kArmF64x2Splat: {
1841 Simd128Register dst = i.OutputSimd128Register();
1842 DoubleRegister src = i.InputDoubleRegister(0);
1843 __ Move(dst.low(), src);
1844 __ Move(dst.high(), src);
1845 break;
1846 }
1847 case kArmF64x2ExtractLane: {
1848 __ ExtractLane(i.OutputDoubleRegister(), i.InputSimd128Register(0),
1849 i.InputInt8(1));
1850 break;
1851 }
1852 case kArmF64x2ReplaceLane: {
1853 __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
1854 i.InputDoubleRegister(2), i.InputInt8(1));
1855 break;
1856 }
1857 case kArmF64x2Abs: {
1858 __ vabs(i.OutputSimd128Register().low(), i.InputSimd128Register(0).low());
1859 __ vabs(i.OutputSimd128Register().high(),
1860 i.InputSimd128Register(0).high());
1861 break;
1862 }
1863 case kArmF64x2Neg: {
1864 __ vneg(i.OutputSimd128Register().low(), i.InputSimd128Register(0).low());
1865 __ vneg(i.OutputSimd128Register().high(),
1866 i.InputSimd128Register(0).high());
1867 break;
1868 }
1869 case kArmF64x2Sqrt: {
1870 __ vsqrt(i.OutputSimd128Register().low(),
1871 i.InputSimd128Register(0).low());
1872 __ vsqrt(i.OutputSimd128Register().high(),
1873 i.InputSimd128Register(0).high());
1874 break;
1875 }
1876 case kArmF64x2Add: {
1878 break;
1879 }
1880 case kArmF64x2Sub: {
1882 break;
1883 }
1884 case kArmF64x2Mul: {
1886 break;
1887 }
1888 case kArmF64x2Div: {
1890 break;
1891 }
1892 case kArmF64x2Min: {
1893 Simd128Register result = i.OutputSimd128Register();
1894 Simd128Register left = i.InputSimd128Register(0);
1895 Simd128Register right = i.InputSimd128Register(1);
1896 if (left == right) {
1897 __ Move(result, left);
1898 } else {
1899 auto ool_low = zone()->New<OutOfLineFloat64Min>(
1900 this, result.low(), left.low(), right.low());
1901 auto ool_high = zone()->New<OutOfLineFloat64Min>(
1902 this, result.high(), left.high(), right.high());
1903 __ FloatMin(result.low(), left.low(), right.low(), ool_low->entry());
1904 __ bind(ool_low->exit());
1905 __ FloatMin(result.high(), left.high(), right.high(),
1906 ool_high->entry());
1907 __ bind(ool_high->exit());
1908 }
1909 DCHECK_EQ(LeaveCC, i.OutputSBit());
1910 break;
1911 }
1912 case kArmF64x2Max: {
1913 Simd128Register result = i.OutputSimd128Register();
1914 Simd128Register left = i.InputSimd128Register(0);
1915 Simd128Register right = i.InputSimd128Register(1);
1916 if (left == right) {
1917 __ Move(result, left);
1918 } else {
1919 auto ool_low = zone()->New<OutOfLineFloat64Max>(
1920 this, result.low(), left.low(), right.low());
1921 auto ool_high = zone()->New<OutOfLineFloat64Max>(
1922 this, result.high(), left.high(), right.high());
1923 __ FloatMax(result.low(), left.low(), right.low(), ool_low->entry());
1924 __ bind(ool_low->exit());
1925 __ FloatMax(result.high(), left.high(), right.high(),
1926 ool_high->entry());
1927 __ bind(ool_high->exit());
1928 }
1929 DCHECK_EQ(LeaveCC, i.OutputSBit());
1930 break;
1931 }
1932#undef ASSEMBLE_F64X2_ARITHMETIC_BINOP
1933 case kArmF64x2Eq: {
1935 Register scratch = temps.Acquire();
1936 __ mov(scratch, Operand(0));
1937 __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(),
1938 i.InputSimd128Register(1).low());
1939 __ mov(scratch, Operand(-1), LeaveCC, eq);
1940 __ vmov(i.OutputSimd128Register().low(), scratch, scratch);
1941
1942 __ mov(scratch, Operand(0));
1943 __ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(),
1944 i.InputSimd128Register(1).high());
1945 __ mov(scratch, Operand(-1), LeaveCC, eq);
1946 __ vmov(i.OutputSimd128Register().high(), scratch, scratch);
1947 break;
1948 }
1949 case kArmF64x2Ne: {
1951 Register scratch = temps.Acquire();
1952 __ mov(scratch, Operand(0));
1953 __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(),
1954 i.InputSimd128Register(1).low());
1955 __ mov(scratch, Operand(-1), LeaveCC, ne);
1956 __ vmov(i.OutputSimd128Register().low(), scratch, scratch);
1957
1958 __ mov(scratch, Operand(0));
1959 __ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(),
1960 i.InputSimd128Register(1).high());
1961 __ mov(scratch, Operand(-1), LeaveCC, ne);
1962 __ vmov(i.OutputSimd128Register().high(), scratch, scratch);
1963 break;
1964 }
1965 case kArmF64x2Lt: {
1967 Register scratch = temps.Acquire();
1968 __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(),
1969 i.InputSimd128Register(1).low());
1970 __ mov(scratch, Operand(0), LeaveCC, cs);
1971 __ mov(scratch, Operand(-1), LeaveCC, mi);
1972 __ vmov(i.OutputSimd128Register().low(), scratch, scratch);
1973
1974 __ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(),
1975 i.InputSimd128Register(1).high());
1976 __ mov(scratch, Operand(0), LeaveCC, cs);
1977 __ mov(scratch, Operand(-1), LeaveCC, mi);
1978 __ vmov(i.OutputSimd128Register().high(), scratch, scratch);
1979 break;
1980 }
1981 case kArmF64x2Le: {
1983 Register scratch = temps.Acquire();
1984 __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(),
1985 i.InputSimd128Register(1).low());
1986 __ mov(scratch, Operand(0), LeaveCC, hi);
1987 __ mov(scratch, Operand(-1), LeaveCC, ls);
1988 __ vmov(i.OutputSimd128Register().low(), scratch, scratch);
1989
1990 __ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(),
1991 i.InputSimd128Register(1).high());
1992 __ mov(scratch, Operand(0), LeaveCC, hi);
1993 __ mov(scratch, Operand(-1), LeaveCC, ls);
1994 __ vmov(i.OutputSimd128Register().high(), scratch, scratch);
1995 break;
1996 }
1997 case kArmF64x2Pmin: {
1998 Simd128Register dst = i.OutputSimd128Register();
1999 Simd128Register lhs = i.InputSimd128Register(0);
2000 Simd128Register rhs = i.InputSimd128Register(1);
2001 DCHECK_EQ(dst, lhs);
2002
2003 // Move rhs only when rhs is strictly lesser (mi).
2004 __ VFPCompareAndSetFlags(rhs.low(), lhs.low());
2005 __ vmov(dst.low(), rhs.low(), mi);
2006 __ VFPCompareAndSetFlags(rhs.high(), lhs.high());
2007 __ vmov(dst.high(), rhs.high(), mi);
2008 break;
2009 }
2010 case kArmF64x2Pmax: {
2011 Simd128Register dst = i.OutputSimd128Register();
2012 Simd128Register lhs = i.InputSimd128Register(0);
2013 Simd128Register rhs = i.InputSimd128Register(1);
2014 DCHECK_EQ(dst, lhs);
2015
2016 // Move rhs only when rhs is strictly greater (gt).
2017 __ VFPCompareAndSetFlags(rhs.low(), lhs.low());
2018 __ vmov(dst.low(), rhs.low(), gt);
2019 __ VFPCompareAndSetFlags(rhs.high(), lhs.high());
2020 __ vmov(dst.high(), rhs.high(), gt);
2021 break;
2022 }
2023 case kArmF64x2Qfma: {
2024 Simd128Register dst = i.OutputSimd128Register();
2025 Simd128Register src0 = i.InputSimd128Register(0);
2026 Simd128Register src1 = i.InputSimd128Register(1);
2027 Simd128Register src2 = i.InputSimd128Register(2);
2028 __ vmul(dst.low(), src0.low(), src1.low());
2029 __ vmul(dst.high(), src0.high(), src1.high());
2030 __ vadd(dst.low(), src2.low(), dst.low());
2031 __ vadd(dst.high(), src2.high(), dst.high());
2032 break;
2033 }
2034 case kArmF64x2Qfms: {
2035 Simd128Register dst = i.OutputSimd128Register();
2036 Simd128Register src0 = i.InputSimd128Register(0);
2037 Simd128Register src1 = i.InputSimd128Register(1);
2038 Simd128Register src2 = i.InputSimd128Register(2);
2039 __ vmul(dst.low(), src0.low(), src1.low());
2040 __ vmul(dst.high(), src0.high(), src1.high());
2041 __ vsub(dst.low(), src2.low(), dst.low());
2042 __ vsub(dst.high(), src2.high(), dst.high());
2043 break;
2044 }
2045 case kArmF64x2Ceil: {
2046 CpuFeatureScope scope(masm(), ARMv8);
2047 Simd128Register dst = i.OutputSimd128Register();
2048 Simd128Register src = i.InputSimd128Register(0);
2049 __ vrintp(dst.low(), src.low());
2050 __ vrintp(dst.high(), src.high());
2051 break;
2052 }
2053 case kArmF64x2Floor: {
2054 CpuFeatureScope scope(masm(), ARMv8);
2055 Simd128Register dst = i.OutputSimd128Register();
2056 Simd128Register src = i.InputSimd128Register(0);
2057 __ vrintm(dst.low(), src.low());
2058 __ vrintm(dst.high(), src.high());
2059 break;
2060 }
2061 case kArmF64x2Trunc: {
2062 CpuFeatureScope scope(masm(), ARMv8);
2063 Simd128Register dst = i.OutputSimd128Register();
2064 Simd128Register src = i.InputSimd128Register(0);
2065 __ vrintz(dst.low(), src.low());
2066 __ vrintz(dst.high(), src.high());
2067 break;
2068 }
2069 case kArmF64x2NearestInt: {
2070 CpuFeatureScope scope(masm(), ARMv8);
2071 Simd128Register dst = i.OutputSimd128Register();
2072 Simd128Register src = i.InputSimd128Register(0);
2073 __ vrintn(dst.low(), src.low());
2074 __ vrintn(dst.high(), src.high());
2075 break;
2076 }
2077 case kArmF64x2ConvertLowI32x4S: {
2078 __ F64x2ConvertLowI32x4S(i.OutputSimd128Register(),
2079 i.InputSimd128Register(0));
2080 break;
2081 }
2082 case kArmF64x2ConvertLowI32x4U: {
2083 __ F64x2ConvertLowI32x4U(i.OutputSimd128Register(),
2084 i.InputSimd128Register(0));
2085 break;
2086 }
2087 case kArmF64x2PromoteLowF32x4: {
2088 __ F64x2PromoteLowF32x4(i.OutputSimd128Register(),
2089 i.InputSimd128Register(0));
2090 break;
2091 }
2092 case kArmI64x2SplatI32Pair: {
2093 Simd128Register dst = i.OutputSimd128Register();
2094 __ vdup(Neon32, dst, i.InputRegister(0));
2095 __ ReplaceLane(dst, dst, i.InputRegister(1), NeonS32, 1);
2096 __ ReplaceLane(dst, dst, i.InputRegister(1), NeonS32, 3);
2097 break;
2098 }
2099 case kArmI64x2ReplaceLaneI32Pair: {
2100 Simd128Register dst = i.OutputSimd128Register();
2101 int8_t lane = i.InputInt8(1);
2102 __ ReplaceLane(dst, dst, i.InputRegister(2), NeonS32, lane * 2);
2103 __ ReplaceLane(dst, dst, i.InputRegister(3), NeonS32, lane * 2 + 1);
2104 break;
2105 }
2106 case kArmI64x2Add: {
2107 __ vadd(Neon64, i.OutputSimd128Register(), i.InputSimd128Register(0),
2108 i.InputSimd128Register(1));
2109 break;
2110 }
2111 case kArmI64x2Sub: {
2112 __ vsub(Neon64, i.OutputSimd128Register(), i.InputSimd128Register(0),
2113 i.InputSimd128Register(1));
2114 break;
2115 }
2116 case kArmI64x2Mul: {
2118 QwNeonRegister dst = i.OutputSimd128Register();
2119 QwNeonRegister left = i.InputSimd128Register(0);
2120 QwNeonRegister right = i.InputSimd128Register(1);
2121 QwNeonRegister tmp1 = i.TempSimd128Register(0);
2122 QwNeonRegister tmp2 = temps.AcquireQ();
2123
2124 // This algorithm uses vector operations to perform 64-bit integer
2125 // multiplication by splitting it into a high and low 32-bit integers.
2126 // The tricky part is getting the low and high integers in the correct
2127 // place inside a NEON register, so that we can use as little vmull and
2128 // vmlal as possible.
2129
2130 // Move left and right into temporaries, they will be modified by vtrn.
2131 __ vmov(tmp1, left);
2132 __ vmov(tmp2, right);
2133
2134 // This diagram shows how the 64-bit integers fit into NEON registers.
2135 //
2136 // [q.high()| q.low()]
2137 // left/tmp1: [ a3, a2 | a1, a0 ]
2138 // right/tmp2: [ b3, b2 | b1, b0 ]
2139 //
2140 // We want to multiply the low 32 bits of left with high 32 bits of right,
2141 // for each lane, i.e. a2 * b3, a0 * b1. However, vmull takes two input d
2142 // registers, and multiply the corresponding low/high 32 bits, to get a
2143 // 64-bit integer: a1 * b1, a0 * b0. In order to make it work we transpose
2144 // the vectors, so that we get the low 32 bits of each 64-bit integer into
2145 // the same lane, similarly for high 32 bits.
2146 __ vtrn(Neon32, tmp1.low(), tmp1.high());
2147 // tmp1: [ a3, a1 | a2, a0 ]
2148 __ vtrn(Neon32, tmp2.low(), tmp2.high());
2149 // tmp2: [ b3, b1 | b2, b0 ]
2150
2151 __ vmull(NeonU32, dst, tmp1.low(), tmp2.high());
2152 // dst: [ a2*b3 | a0*b1 ]
2153 __ vmlal(NeonU32, dst, tmp1.high(), tmp2.low());
2154 // dst: [ a2*b3 + a3*b2 | a0*b1 + a1*b0 ]
2155 __ vshl(NeonU64, dst, dst, 32);
2156 // dst: [ (a2*b3 + a3*b2) << 32 | (a0*b1 + a1*b0) << 32 ]
2157
2158 __ vmlal(NeonU32, dst, tmp1.low(), tmp2.low());
2159 // dst: [ (a2*b3 + a3*b2)<<32 + (a2*b2) | (a0*b1 + a1*b0)<<32 + (a0*b0) ]
2160 break;
2161 }
2162 case kArmI64x2Abs: {
2163 __ I64x2Abs(i.OutputSimd128Register(), i.InputSimd128Register(0));
2164 break;
2165 }
2166 case kArmI64x2Neg: {
2167 Simd128Register dst = i.OutputSimd128Register();
2168 __ vmov(dst, uint64_t{0});
2169 __ vsub(Neon64, dst, dst, i.InputSimd128Register(0));
2170 break;
2171 }
2172 case kArmI64x2Shl: {
2174 break;
2175 }
2176 case kArmI64x2ShrS: {
2177 // Only the least significant byte of each lane is used, so we can use
2178 // Neon32 as the size.
2180 break;
2181 }
2182 case kArmI64x2ShrU: {
2183 // Only the least significant byte of each lane is used, so we can use
2184 // Neon32 as the size.
2186 break;
2187 }
2188 case kArmI64x2BitMask: {
2189 __ I64x2BitMask(i.OutputRegister(), i.InputSimd128Register(0));
2190 break;
2191 }
2192 case kArmI64x2SConvertI32x4Low: {
2193 __ vmovl(NeonS32, i.OutputSimd128Register(),
2194 i.InputSimd128Register(0).low());
2195 break;
2196 }
2197 case kArmI64x2SConvertI32x4High: {
2198 __ vmovl(NeonS32, i.OutputSimd128Register(),
2199 i.InputSimd128Register(0).high());
2200 break;
2201 }
2202 case kArmI64x2UConvertI32x4Low: {
2203 __ vmovl(NeonU32, i.OutputSimd128Register(),
2204 i.InputSimd128Register(0).low());
2205 break;
2206 }
2207 case kArmI64x2UConvertI32x4High: {
2208 __ vmovl(NeonU32, i.OutputSimd128Register(),
2209 i.InputSimd128Register(0).high());
2210 break;
2211 }
2212 case kArmF32x4Splat: {
2213 int src_code = i.InputFloatRegister(0).code();
2214 __ vdup(Neon32, i.OutputSimd128Register(),
2215 DwVfpRegister::from_code(src_code / 2), src_code % 2);
2216 break;
2217 }
2218 case kArmF32x4ExtractLane: {
2219 __ ExtractLane(i.OutputFloatRegister(), i.InputSimd128Register(0),
2220 i.InputInt8(1));
2221 break;
2222 }
2223 case kArmF32x4ReplaceLane: {
2224 __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
2225 i.InputFloatRegister(2), i.InputInt8(1));
2226 break;
2227 }
2228 case kArmF32x4SConvertI32x4: {
2229 __ vcvt_f32_s32(i.OutputSimd128Register(), i.InputSimd128Register(0));
2230 break;
2231 }
2232 case kArmF32x4UConvertI32x4: {
2233 __ vcvt_f32_u32(i.OutputSimd128Register(), i.InputSimd128Register(0));
2234 break;
2235 }
2236 case kArmF32x4Abs: {
2237 __ vabs(i.OutputSimd128Register(), i.InputSimd128Register(0));
2238 break;
2239 }
2240 case kArmF32x4Neg: {
2241 __ vneg(i.OutputSimd128Register(), i.InputSimd128Register(0));
2242 break;
2243 }
2244 case kArmF32x4Sqrt: {
2245 QwNeonRegister dst = i.OutputSimd128Register();
2246 QwNeonRegister src1 = i.InputSimd128Register(0);
2247 DCHECK_EQ(dst, q0);
2248 DCHECK_EQ(src1, q0);
2249#define S_FROM_Q(reg, lane) SwVfpRegister::from_code(reg.code() * 4 + lane)
2250 __ vsqrt(S_FROM_Q(dst, 0), S_FROM_Q(src1, 0));
2251 __ vsqrt(S_FROM_Q(dst, 1), S_FROM_Q(src1, 1));
2252 __ vsqrt(S_FROM_Q(dst, 2), S_FROM_Q(src1, 2));
2253 __ vsqrt(S_FROM_Q(dst, 3), S_FROM_Q(src1, 3));
2254#undef S_FROM_Q
2255 break;
2256 }
2257 case kArmF32x4Add: {
2258 __ vadd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2259 i.InputSimd128Register(1));
2260 break;
2261 }
2262 case kArmF32x4Sub: {
2263 __ vsub(i.OutputSimd128Register(), i.InputSimd128Register(0),
2264 i.InputSimd128Register(1));
2265 break;
2266 }
2267 case kArmF32x4Mul: {
2268 __ vmul(i.OutputSimd128Register(), i.InputSimd128Register(0),
2269 i.InputSimd128Register(1));
2270 break;
2271 }
2272 case kArmF32x4Div: {
2273 QwNeonRegister dst = i.OutputSimd128Register();
2274 QwNeonRegister src1 = i.InputSimd128Register(0);
2275 QwNeonRegister src2 = i.InputSimd128Register(1);
2276 DCHECK_EQ(dst, q0);
2277 DCHECK_EQ(src1, q0);
2278 DCHECK_EQ(src2, q1);
2279#define S_FROM_Q(reg, lane) SwVfpRegister::from_code(reg.code() * 4 + lane)
2280 __ vdiv(S_FROM_Q(dst, 0), S_FROM_Q(src1, 0), S_FROM_Q(src2, 0));
2281 __ vdiv(S_FROM_Q(dst, 1), S_FROM_Q(src1, 1), S_FROM_Q(src2, 1));
2282 __ vdiv(S_FROM_Q(dst, 2), S_FROM_Q(src1, 2), S_FROM_Q(src2, 2));
2283 __ vdiv(S_FROM_Q(dst, 3), S_FROM_Q(src1, 3), S_FROM_Q(src2, 3));
2284#undef S_FROM_Q
2285 break;
2286 }
2287 case kArmF32x4Min: {
2288 __ vmin(i.OutputSimd128Register(), i.InputSimd128Register(0),
2289 i.InputSimd128Register(1));
2290 break;
2291 }
2292 case kArmF32x4Max: {
2293 __ vmax(i.OutputSimd128Register(), i.InputSimd128Register(0),
2294 i.InputSimd128Register(1));
2295 break;
2296 }
2297 case kArmF32x4Eq: {
2298 __ vceq(i.OutputSimd128Register(), i.InputSimd128Register(0),
2299 i.InputSimd128Register(1));
2300 break;
2301 }
2302 case kArmF32x4Ne: {
2303 Simd128Register dst = i.OutputSimd128Register();
2304 __ vceq(dst, i.InputSimd128Register(0), i.InputSimd128Register(1));
2305 __ vmvn(dst, dst);
2306 break;
2307 }
2308 case kArmF32x4Lt: {
2309 __ vcgt(i.OutputSimd128Register(), i.InputSimd128Register(1),
2310 i.InputSimd128Register(0));
2311 break;
2312 }
2313 case kArmF32x4Le: {
2314 __ vcge(i.OutputSimd128Register(), i.InputSimd128Register(1),
2315 i.InputSimd128Register(0));
2316 break;
2317 }
2318 case kArmF32x4Pmin: {
2319 Simd128Register dst = i.OutputSimd128Register();
2320 Simd128Register lhs = i.InputSimd128Register(0);
2321 Simd128Register rhs = i.InputSimd128Register(1);
2322 DCHECK_NE(dst, lhs);
2323 DCHECK_NE(dst, rhs);
2324
2325 // f32x4.pmin(lhs, rhs)
2326 // = v128.bitselect(rhs, lhs, f32x4.lt(rhs, lhs))
2327 // = v128.bitselect(rhs, lhs, f32x4.gt(lhs, rhs))
2328 __ vcgt(dst, lhs, rhs);
2329 __ vbsl(dst, rhs, lhs);
2330 break;
2331 }
2332 case kArmF32x4Pmax: {
2333 Simd128Register dst = i.OutputSimd128Register();
2334 Simd128Register lhs = i.InputSimd128Register(0);
2335 Simd128Register rhs = i.InputSimd128Register(1);
2336 DCHECK_NE(dst, lhs);
2337 DCHECK_NE(dst, rhs);
2338
2339 // f32x4.pmax(lhs, rhs)
2340 // = v128.bitselect(rhs, lhs, f32x4.gt(rhs, lhs))
2341 __ vcgt(dst, rhs, lhs);
2342 __ vbsl(dst, rhs, lhs);
2343 break;
2344 }
2345 case kArmF32x4Qfma: {
2346 Simd128Register dst = i.OutputSimd128Register();
2347 __ vmul(dst, i.InputSimd128Register(0), i.InputSimd128Register(1));
2348 __ vadd(dst, i.InputSimd128Register(2), dst);
2349 break;
2350 }
2351 case kArmF32x4Qfms: {
2352 Simd128Register dst = i.OutputSimd128Register();
2353 __ vmul(dst, i.InputSimd128Register(0), i.InputSimd128Register(1));
2354 __ vsub(dst, i.InputSimd128Register(2), dst);
2355 break;
2356 }
2357 case kArmF32x4DemoteF64x2Zero: {
2358 Simd128Register dst = i.OutputSimd128Register();
2359 Simd128Register src = i.InputSimd128Register(0);
2360 __ vcvt_f32_f64(SwVfpRegister::from_code(dst.code() * 4), src.low());
2361 __ vcvt_f32_f64(SwVfpRegister::from_code(dst.code() * 4 + 1), src.high());
2362 __ vmov(dst.high(), 0);
2363 break;
2364 }
2365 case kArmI32x4Splat: {
2366 __ vdup(Neon32, i.OutputSimd128Register(), i.InputRegister(0));
2367 break;
2368 }
2369 case kArmI32x4ExtractLane: {
2370 __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS32,
2371 i.InputInt8(1));
2372 break;
2373 }
2374 case kArmI32x4ReplaceLane: {
2375 __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
2376 i.InputRegister(2), NeonS32, i.InputInt8(1));
2377 break;
2378 }
2379 case kArmI32x4SConvertF32x4: {
2380 __ vcvt_s32_f32(i.OutputSimd128Register(), i.InputSimd128Register(0));
2381 break;
2382 }
2383 case kArmI32x4SConvertI16x8Low: {
2384 __ vmovl(NeonS16, i.OutputSimd128Register(),
2385 i.InputSimd128Register(0).low());
2386 break;
2387 }
2388 case kArmI32x4SConvertI16x8High: {
2389 __ vmovl(NeonS16, i.OutputSimd128Register(),
2390 i.InputSimd128Register(0).high());
2391 break;
2392 }
2393 case kArmI32x4Neg: {
2394 __ vneg(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0));
2395 break;
2396 }
2397 case kArmI32x4Shl: {
2399 break;
2400 }
2401 case kArmI32x4ShrS: {
2403 break;
2404 }
2405 case kArmI32x4Add: {
2406 __ vadd(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2407 i.InputSimd128Register(1));
2408 break;
2409 }
2410 case kArmI32x4Sub: {
2411 __ vsub(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2412 i.InputSimd128Register(1));
2413 break;
2414 }
2415 case kArmI32x4Mul: {
2416 __ vmul(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2417 i.InputSimd128Register(1));
2418 break;
2419 }
2420 case kArmI32x4MinS: {
2421 __ vmin(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2422 i.InputSimd128Register(1));
2423 break;
2424 }
2425 case kArmI32x4MaxS: {
2426 __ vmax(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2427 i.InputSimd128Register(1));
2428 break;
2429 }
2430 case kArmI64x2Eq: {
2431 __ I64x2Eq(i.OutputSimd128Register(), i.InputSimd128Register(0),
2432 i.InputSimd128Register(1));
2433 break;
2434 }
2435 case kArmI64x2Ne: {
2436 __ I64x2Ne(i.OutputSimd128Register(), i.InputSimd128Register(0),
2437 i.InputSimd128Register(1));
2438 break;
2439 }
2440 case kArmI64x2GtS: {
2441 __ I64x2GtS(i.OutputSimd128Register(), i.InputSimd128Register(0),
2442 i.InputSimd128Register(1));
2443 break;
2444 }
2445 case kArmI64x2GeS: {
2446 __ I64x2GeS(i.OutputSimd128Register(), i.InputSimd128Register(0),
2447 i.InputSimd128Register(1));
2448 break;
2449 }
2450 case kArmI32x4Eq: {
2451 __ vceq(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2452 i.InputSimd128Register(1));
2453 break;
2454 }
2455 case kArmI32x4Ne: {
2456 Simd128Register dst = i.OutputSimd128Register();
2457 __ vceq(Neon32, dst, i.InputSimd128Register(0),
2458 i.InputSimd128Register(1));
2459 __ vmvn(dst, dst);
2460 break;
2461 }
2462 case kArmI32x4GtS: {
2463 __ vcgt(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2464 i.InputSimd128Register(1));
2465 break;
2466 }
2467 case kArmI32x4GeS: {
2468 __ vcge(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2469 i.InputSimd128Register(1));
2470 break;
2471 }
2472 case kArmI32x4UConvertF32x4: {
2473 __ vcvt_u32_f32(i.OutputSimd128Register(), i.InputSimd128Register(0));
2474 break;
2475 }
2476 case kArmI32x4UConvertI16x8Low: {
2477 __ vmovl(NeonU16, i.OutputSimd128Register(),
2478 i.InputSimd128Register(0).low());
2479 break;
2480 }
2481 case kArmI32x4UConvertI16x8High: {
2482 __ vmovl(NeonU16, i.OutputSimd128Register(),
2483 i.InputSimd128Register(0).high());
2484 break;
2485 }
2486 case kArmI32x4ShrU: {
2488 break;
2489 }
2490 case kArmI32x4MinU: {
2491 __ vmin(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2492 i.InputSimd128Register(1));
2493 break;
2494 }
2495 case kArmI32x4MaxU: {
2496 __ vmax(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2497 i.InputSimd128Register(1));
2498 break;
2499 }
2500 case kArmI32x4GtU: {
2501 __ vcgt(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2502 i.InputSimd128Register(1));
2503 break;
2504 }
2505 case kArmI32x4GeU: {
2506 __ vcge(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2507 i.InputSimd128Register(1));
2508 break;
2509 }
2510 case kArmI32x4Abs: {
2511 __ vabs(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0));
2512 break;
2513 }
2514 case kArmI32x4BitMask: {
2515 Register dst = i.OutputRegister();
2517 Simd128Register src = i.InputSimd128Register(0);
2518 Simd128Register tmp = temps.AcquireQ();
2519 Simd128Register mask = i.TempSimd128Register(0);
2520
2521 __ vshr(NeonS32, tmp, src, 31);
2522 // Set i-th bit of each lane i. When AND with tmp, the lanes that
2523 // are signed will have i-th bit set, unsigned will be 0.
2524 __ vmov(mask.low(), base::Double(uint64_t{0x0000'0002'0000'0001}));
2525 __ vmov(mask.high(), base::Double(uint64_t{0x0000'0008'0000'0004}));
2526 __ vand(tmp, mask, tmp);
2527 __ vpadd(Neon32, tmp.low(), tmp.low(), tmp.high());
2528 __ vpadd(Neon32, tmp.low(), tmp.low(), kDoubleRegZero);
2529 __ VmovLow(dst, tmp.low());
2530 break;
2531 }
2532 case kArmI32x4DotI16x8S: {
2533 Simd128Register dst = i.OutputSimd128Register();
2534 Simd128Register lhs = i.InputSimd128Register(0);
2535 Simd128Register rhs = i.InputSimd128Register(1);
2537 Simd128Register scratch = temps.AcquireQ();
2538 __ vmull(NeonS16, scratch, lhs.low(), rhs.low());
2539 __ vpadd(Neon32, dst.low(), scratch.low(), scratch.high());
2540 __ vmull(NeonS16, scratch, lhs.high(), rhs.high());
2541 __ vpadd(Neon32, dst.high(), scratch.low(), scratch.high());
2542 break;
2543 }
2544 case kArmI16x8DotI8x16S: {
2545 Simd128Register dst = i.OutputSimd128Register();
2546 Simd128Register lhs = i.InputSimd128Register(0);
2547 Simd128Register rhs = i.InputSimd128Register(1);
2549 Simd128Register scratch = temps.AcquireQ();
2550 __ vmull(NeonS8, scratch, lhs.low(), rhs.low());
2551 __ vpadd(Neon16, dst.low(), scratch.low(), scratch.high());
2552 __ vmull(NeonS8, scratch, lhs.high(), rhs.high());
2553 __ vpadd(Neon16, dst.high(), scratch.low(), scratch.high());
2554 break;
2555 }
2556 case kArmI32x4DotI8x16AddS: {
2557 Simd128Register dst = i.OutputSimd128Register();
2558 Simd128Register lhs = i.InputSimd128Register(0);
2559 Simd128Register rhs = i.InputSimd128Register(1);
2560 Simd128Register tmp1 = i.TempSimd128Register(0);
2561 DCHECK_EQ(dst, i.InputSimd128Register(2));
2563 Simd128Register scratch = temps.AcquireQ();
2564 __ vmull(NeonS8, scratch, lhs.low(), rhs.low());
2565 __ vpadd(Neon16, tmp1.low(), scratch.low(), scratch.high());
2566 __ vmull(NeonS8, scratch, lhs.high(), rhs.high());
2567 __ vpadd(Neon16, tmp1.high(), scratch.low(), scratch.high());
2568 __ vpadal(NeonS16, dst, tmp1);
2569 break;
2570 }
2571 case kArmI32x4TruncSatF64x2SZero: {
2572 Simd128Register dst = i.OutputSimd128Register();
2573 Simd128Register src = i.InputSimd128Register(0);
2574 __ vcvt_s32_f64(SwVfpRegister::from_code(dst.code() * 4), src.low());
2575 __ vcvt_s32_f64(SwVfpRegister::from_code(dst.code() * 4 + 1), src.high());
2576 __ vmov(dst.high(), 0);
2577 break;
2578 }
2579 case kArmI32x4TruncSatF64x2UZero: {
2580 Simd128Register dst = i.OutputSimd128Register();
2581 Simd128Register src = i.InputSimd128Register(0);
2582 __ vcvt_u32_f64(SwVfpRegister::from_code(dst.code() * 4), src.low());
2583 __ vcvt_u32_f64(SwVfpRegister::from_code(dst.code() * 4 + 1), src.high());
2584 __ vmov(dst.high(), 0);
2585 break;
2586 }
2587 case kArmI16x8Splat: {
2588 __ vdup(Neon16, i.OutputSimd128Register(), i.InputRegister(0));
2589 break;
2590 }
2591 case kArmI16x8ExtractLaneU: {
2592 __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonU16,
2593 i.InputInt8(1));
2594 break;
2595 }
2596 case kArmI16x8ExtractLaneS: {
2597 __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS16,
2598 i.InputInt8(1));
2599 break;
2600 }
2601 case kArmI16x8ReplaceLane: {
2602 __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
2603 i.InputRegister(2), NeonS16, i.InputInt8(1));
2604 break;
2605 }
2606 case kArmI16x8SConvertI8x16Low: {
2607 __ vmovl(NeonS8, i.OutputSimd128Register(),
2608 i.InputSimd128Register(0).low());
2609 break;
2610 }
2611 case kArmI16x8SConvertI8x16High: {
2612 __ vmovl(NeonS8, i.OutputSimd128Register(),
2613 i.InputSimd128Register(0).high());
2614 break;
2615 }
2616 case kArmI16x8Neg: {
2617 __ vneg(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
2618 break;
2619 }
2620 case kArmI16x8Shl: {
2622 break;
2623 }
2624 case kArmI16x8ShrS: {
2626 break;
2627 }
2628 case kArmI16x8SConvertI32x4:
2630 break;
2631 case kArmI16x8Add: {
2632 __ vadd(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2633 i.InputSimd128Register(1));
2634 break;
2635 }
2636 case kArmI16x8AddSatS: {
2637 __ vqadd(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2638 i.InputSimd128Register(1));
2639 break;
2640 }
2641 case kArmI16x8Sub: {
2642 __ vsub(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2643 i.InputSimd128Register(1));
2644 break;
2645 }
2646 case kArmI16x8SubSatS: {
2647 __ vqsub(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2648 i.InputSimd128Register(1));
2649 break;
2650 }
2651 case kArmI16x8Mul: {
2652 __ vmul(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2653 i.InputSimd128Register(1));
2654 break;
2655 }
2656 case kArmI16x8MinS: {
2657 __ vmin(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2658 i.InputSimd128Register(1));
2659 break;
2660 }
2661 case kArmI16x8MaxS: {
2662 __ vmax(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2663 i.InputSimd128Register(1));
2664 break;
2665 }
2666 case kArmI16x8Eq: {
2667 __ vceq(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2668 i.InputSimd128Register(1));
2669 break;
2670 }
2671 case kArmI16x8Ne: {
2672 Simd128Register dst = i.OutputSimd128Register();
2673 __ vceq(Neon16, dst, i.InputSimd128Register(0),
2674 i.InputSimd128Register(1));
2675 __ vmvn(dst, dst);
2676 break;
2677 }
2678 case kArmI16x8GtS: {
2679 __ vcgt(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2680 i.InputSimd128Register(1));
2681 break;
2682 }
2683 case kArmI16x8GeS: {
2684 __ vcge(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2685 i.InputSimd128Register(1));
2686 break;
2687 }
2688 case kArmI16x8UConvertI8x16Low: {
2689 __ vmovl(NeonU8, i.OutputSimd128Register(),
2690 i.InputSimd128Register(0).low());
2691 break;
2692 }
2693 case kArmI16x8UConvertI8x16High: {
2694 __ vmovl(NeonU8, i.OutputSimd128Register(),
2695 i.InputSimd128Register(0).high());
2696 break;
2697 }
2698 case kArmI16x8ShrU: {
2700 break;
2701 }
2702 case kArmI16x8UConvertI32x4:
2704 break;
2705 case kArmI16x8AddSatU: {
2706 __ vqadd(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2707 i.InputSimd128Register(1));
2708 break;
2709 }
2710 case kArmI16x8SubSatU: {
2711 __ vqsub(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2712 i.InputSimd128Register(1));
2713 break;
2714 }
2715 case kArmI16x8MinU: {
2716 __ vmin(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2717 i.InputSimd128Register(1));
2718 break;
2719 }
2720 case kArmI16x8MaxU: {
2721 __ vmax(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2722 i.InputSimd128Register(1));
2723 break;
2724 }
2725 case kArmI16x8GtU: {
2726 __ vcgt(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2727 i.InputSimd128Register(1));
2728 break;
2729 }
2730 case kArmI16x8GeU: {
2731 __ vcge(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2732 i.InputSimd128Register(1));
2733 break;
2734 }
2735 case kArmI16x8RoundingAverageU: {
2736 __ vrhadd(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2737 i.InputSimd128Register(1));
2738 break;
2739 }
2740 case kArmI16x8Abs: {
2741 __ vabs(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
2742 break;
2743 }
2744 case kArmI16x8BitMask: {
2746 Register dst = i.OutputRegister();
2747 Simd128Register src = i.InputSimd128Register(0);
2748 Simd128Register tmp = temps.AcquireQ();
2749 Simd128Register mask = i.TempSimd128Register(0);
2750
2751 __ vshr(NeonS16, tmp, src, 15);
2752 // Set i-th bit of each lane i. When AND with tmp, the lanes that
2753 // are signed will have i-th bit set, unsigned will be 0.
2754 __ vmov(mask.low(), base::Double(uint64_t{0x0008'0004'0002'0001}));
2755 __ vmov(mask.high(), base::Double(uint64_t{0x0080'0040'0020'0010}));
2756 __ vand(tmp, mask, tmp);
2757 __ vpadd(Neon16, tmp.low(), tmp.low(), tmp.high());
2758 __ vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
2759 __ vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
2760 __ vmov(NeonU16, dst, tmp.low(), 0);
2761 break;
2762 }
2763 case kArmI16x8Q15MulRSatS: {
2764 __ vqrdmulh(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2765 i.InputSimd128Register(1));
2766 break;
2767 }
2768 case kArmI8x16Splat: {
2769 __ vdup(Neon8, i.OutputSimd128Register(), i.InputRegister(0));
2770 break;
2771 }
2772 case kArmI8x16ExtractLaneU: {
2773 __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonU8,
2774 i.InputInt8(1));
2775 break;
2776 }
2777 case kArmI8x16ExtractLaneS: {
2778 __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS8,
2779 i.InputInt8(1));
2780 break;
2781 }
2782 case kArmI8x16ReplaceLane: {
2783 __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
2784 i.InputRegister(2), NeonS8, i.InputInt8(1));
2785 break;
2786 }
2787 case kArmI8x16Neg: {
2788 __ vneg(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
2789 break;
2790 }
2791 case kArmI8x16Shl: {
2793 break;
2794 }
2795 case kArmI8x16ShrS: {
2797 break;
2798 }
2799 case kArmI8x16SConvertI16x8:
2801 break;
2802 case kArmI8x16Add: {
2803 __ vadd(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2804 i.InputSimd128Register(1));
2805 break;
2806 }
2807 case kArmI8x16AddSatS: {
2808 __ vqadd(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2809 i.InputSimd128Register(1));
2810 break;
2811 }
2812 case kArmI8x16Sub: {
2813 __ vsub(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2814 i.InputSimd128Register(1));
2815 break;
2816 }
2817 case kArmI8x16SubSatS: {
2818 __ vqsub(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2819 i.InputSimd128Register(1));
2820 break;
2821 }
2822 case kArmI8x16MinS: {
2823 __ vmin(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2824 i.InputSimd128Register(1));
2825 break;
2826 }
2827 case kArmI8x16MaxS: {
2828 __ vmax(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2829 i.InputSimd128Register(1));
2830 break;
2831 }
2832 case kArmI8x16Eq: {
2833 __ vceq(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2834 i.InputSimd128Register(1));
2835 break;
2836 }
2837 case kArmI8x16Ne: {
2838 Simd128Register dst = i.OutputSimd128Register();
2839 __ vceq(Neon8, dst, i.InputSimd128Register(0), i.InputSimd128Register(1));
2840 __ vmvn(dst, dst);
2841 break;
2842 }
2843 case kArmI8x16GtS: {
2844 __ vcgt(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2845 i.InputSimd128Register(1));
2846 break;
2847 }
2848 case kArmI8x16GeS: {
2849 __ vcge(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2850 i.InputSimd128Register(1));
2851 break;
2852 }
2853 case kArmI8x16ShrU: {
2855 break;
2856 }
2857 case kArmI8x16UConvertI16x8:
2859 break;
2860 case kArmI8x16AddSatU: {
2861 __ vqadd(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2862 i.InputSimd128Register(1));
2863 break;
2864 }
2865 case kArmI8x16SubSatU: {
2866 __ vqsub(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2867 i.InputSimd128Register(1));
2868 break;
2869 }
2870 case kArmI8x16MinU: {
2871 __ vmin(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2872 i.InputSimd128Register(1));
2873 break;
2874 }
2875 case kArmI8x16MaxU: {
2876 __ vmax(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2877 i.InputSimd128Register(1));
2878 break;
2879 }
2880 case kArmI8x16GtU: {
2881 __ vcgt(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2882 i.InputSimd128Register(1));
2883 break;
2884 }
2885 case kArmI8x16GeU: {
2886 __ vcge(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2887 i.InputSimd128Register(1));
2888 break;
2889 }
2890 case kArmI8x16RoundingAverageU: {
2891 __ vrhadd(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2892 i.InputSimd128Register(1));
2893 break;
2894 }
2895 case kArmI8x16Abs: {
2896 __ vabs(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
2897 break;
2898 }
2899 case kArmI8x16BitMask: {
2901 Register dst = i.OutputRegister();
2902 Simd128Register src = i.InputSimd128Register(0);
2903 Simd128Register tmp = temps.AcquireQ();
2904 Simd128Register mask = i.TempSimd128Register(0);
2905
2906 __ vshr(NeonS8, tmp, src, 7);
2907 // Set i-th bit of each lane i. When AND with tmp, the lanes that
2908 // are signed will have i-th bit set, unsigned will be 0.
2909 __ vmov(mask.low(), base::Double(uint64_t{0x8040'2010'0804'0201}));
2910 __ vmov(mask.high(), base::Double(uint64_t{0x8040'2010'0804'0201}));
2911 __ vand(tmp, mask, tmp);
2912 __ vext(mask, tmp, tmp, 8);
2913 __ vzip(Neon8, mask, tmp);
2914 __ vpadd(Neon16, tmp.low(), tmp.low(), tmp.high());
2915 __ vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
2916 __ vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
2917 __ vmov(NeonU16, dst, tmp.low(), 0);
2918 break;
2919 }
2920 case kArmS128Const: {
2921 QwNeonRegister dst = i.OutputSimd128Register();
2922 uint64_t imm1 = make_uint64(i.InputUint32(1), i.InputUint32(0));
2923 uint64_t imm2 = make_uint64(i.InputUint32(3), i.InputUint32(2));
2924 __ vmov(dst.low(), base::Double(imm1));
2925 __ vmov(dst.high(), base::Double(imm2));
2926 break;
2927 }
2928 case kArmS128Zero: {
2929 __ veor(i.OutputSimd128Register(), i.OutputSimd128Register(),
2930 i.OutputSimd128Register());
2931 break;
2932 }
2933 case kArmS128AllOnes: {
2934 __ vmov(i.OutputSimd128Register(), uint64_t{0xffff'ffff'ffff'ffff});
2935 break;
2936 }
2937 case kArmS128Dup: {
2938 NeonSize size = static_cast<NeonSize>(i.InputInt32(1));
2939 int lanes = kSimd128Size >> size;
2940 int index = i.InputInt32(2);
2941 DCHECK(index < lanes);
2942 int d_lanes = lanes / 2;
2943 int src_d_index = index & (d_lanes - 1);
2944 int src_d_code = i.InputSimd128Register(0).low().code() + index / d_lanes;
2945 __ vdup(size, i.OutputSimd128Register(),
2946 DwVfpRegister::from_code(src_d_code), src_d_index);
2947 break;
2948 }
2949 case kArmS128And: {
2950 __ vand(i.OutputSimd128Register(), i.InputSimd128Register(0),
2951 i.InputSimd128Register(1));
2952 break;
2953 }
2954 case kArmS128Or: {
2955 __ vorr(i.OutputSimd128Register(), i.InputSimd128Register(0),
2956 i.InputSimd128Register(1));
2957 break;
2958 }
2959 case kArmS128Xor: {
2960 __ veor(i.OutputSimd128Register(), i.InputSimd128Register(0),
2961 i.InputSimd128Register(1));
2962 break;
2963 }
2964 case kArmS128Not: {
2965 __ vmvn(i.OutputSimd128Register(), i.InputSimd128Register(0));
2966 break;
2967 }
2968 case kArmS128Select: {
2969 Simd128Register dst = i.OutputSimd128Register();
2970 DCHECK(dst == i.InputSimd128Register(0));
2971 __ vbsl(dst, i.InputSimd128Register(1), i.InputSimd128Register(2));
2972 break;
2973 }
2974 case kArmS128AndNot: {
2975 __ vbic(i.OutputSimd128Register(), i.InputSimd128Register(0),
2976 i.InputSimd128Register(1));
2977 break;
2978 }
2979 case kArmS32x4ZipLeft: {
2980 Simd128Register dst = i.OutputSimd128Register(),
2981 src1 = i.InputSimd128Register(1);
2982 DCHECK(dst == i.InputSimd128Register(0));
2983 // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
2984 __ vmov(dst.high(), src1.low()); // dst = [0, 1, 4, 5]
2985 __ vtrn(Neon32, dst.low(), dst.high()); // dst = [0, 4, 1, 5]
2986 break;
2987 }
2988 case kArmS32x4ZipRight: {
2989 Simd128Register dst = i.OutputSimd128Register(),
2990 src1 = i.InputSimd128Register(1);
2991 DCHECK(dst == i.InputSimd128Register(0));
2992 // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from ZipLeft).
2993 __ vmov(dst.low(), src1.high()); // dst = [2, 3, 6, 7]
2994 __ vtrn(Neon32, dst.low(), dst.high()); // dst = [2, 6, 3, 7]
2995 break;
2996 }
2997 case kArmS32x4UnzipLeft: {
2998 Simd128Register dst = i.OutputSimd128Register(),
2999 src1 = i.InputSimd128Register(1);
3000 DCHECK(dst == i.InputSimd128Register(0));
3002 Simd128Register scratch = temps.AcquireQ();
3003 // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
3004 __ vmov(scratch, src1);
3005 __ vuzp(Neon32, dst, scratch); // dst = [0, 2, 4, 6]
3006 break;
3007 }
3008 case kArmS32x4UnzipRight: {
3009 Simd128Register dst = i.OutputSimd128Register(),
3010 src1 = i.InputSimd128Register(1);
3011 DCHECK(dst == i.InputSimd128Register(0));
3013 Simd128Register scratch = temps.AcquireQ();
3014 // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from UnzipLeft).
3015 __ vmov(scratch, src1);
3016 __ vuzp(Neon32, scratch, dst); // dst = [1, 3, 5, 7]
3017 break;
3018 }
3019 case kArmS32x4TransposeLeft: {
3020 Simd128Register dst = i.OutputSimd128Register(),
3021 src1 = i.InputSimd128Register(1);
3022 DCHECK(dst == i.InputSimd128Register(0));
3024 Simd128Register scratch = temps.AcquireQ();
3025 // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
3026 __ vmov(scratch, src1);
3027 __ vtrn(Neon32, dst, scratch); // dst = [0, 4, 2, 6]
3028 break;
3029 }
3030 case kArmS32x4Shuffle: {
3031 Simd128Register dst = i.OutputSimd128Register(),
3032 src0 = i.InputSimd128Register(0),
3033 src1 = i.InputSimd128Register(1);
3034 DCHECK_NE(dst, src0);
3035 DCHECK_NE(dst, src1);
3036 // Perform shuffle as a vmov per lane.
3037 int dst_code = dst.code() * 4;
3038 int src0_code = src0.code() * 4;
3039 int src1_code = src1.code() * 4;
3040 int32_t shuffle = i.InputInt32(2);
3041 for (int i = 0; i < 4; i++) {
3042 int lane = shuffle & 0x7;
3043 int src_code = src0_code;
3044 if (lane >= 4) {
3045 src_code = src1_code;
3046 lane &= 0x3;
3047 }
3048 __ VmovExtended(dst_code + i, src_code + lane);
3049 shuffle >>= 8;
3050 }
3051 break;
3052 }
3053 case kArmS32x4TransposeRight: {
3054 Simd128Register dst = i.OutputSimd128Register(),
3055 src1 = i.InputSimd128Register(1);
3057 Simd128Register scratch = temps.AcquireQ();
3058 DCHECK(dst == i.InputSimd128Register(0));
3059 // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from TransposeLeft).
3060 __ vmov(scratch, src1);
3061 __ vtrn(Neon32, scratch, dst); // dst = [1, 5, 3, 7]
3062 break;
3063 }
3064 case kArmS16x8ZipLeft: {
3065 Simd128Register dst = i.OutputSimd128Register(),
3066 src1 = i.InputSimd128Register(1);
3067 // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
3068 DCHECK(dst == i.InputSimd128Register(0));
3069 __ vmov(dst.high(), src1.low()); // dst = [0, 1, 2, 3, 8, ... 11]
3070 __ vzip(Neon16, dst.low(), dst.high()); // dst = [0, 8, 1, 9, ... 11]
3071 break;
3072 }
3073 case kArmS16x8ZipRight: {
3074 Simd128Register dst = i.OutputSimd128Register(),
3075 src1 = i.InputSimd128Register(1);
3076 DCHECK(dst == i.InputSimd128Register(0));
3077 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
3078 __ vmov(dst.low(), src1.high());
3079 __ vzip(Neon16, dst.low(), dst.high()); // dst = [4, 12, 5, 13, ... 15]
3080 break;
3081 }
3082 case kArmS16x8UnzipLeft: {
3083 Simd128Register dst = i.OutputSimd128Register(),
3084 src1 = i.InputSimd128Register(1);
3086 Simd128Register scratch = temps.AcquireQ();
3087 DCHECK(dst == i.InputSimd128Register(0));
3088 // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
3089 __ vmov(scratch, src1);
3090 __ vuzp(Neon16, dst, scratch); // dst = [0, 2, 4, 6, ... 14]
3091 break;
3092 }
3093 case kArmS16x8UnzipRight: {
3094 Simd128Register dst = i.OutputSimd128Register(),
3095 src1 = i.InputSimd128Register(1);
3097 Simd128Register scratch = temps.AcquireQ();
3098 DCHECK(dst == i.InputSimd128Register(0));
3099 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
3100 __ vmov(scratch, src1);
3101 __ vuzp(Neon16, scratch, dst); // dst = [1, 3, 5, 7, ... 15]
3102 break;
3103 }
3104 case kArmS16x8TransposeLeft: {
3105 Simd128Register dst = i.OutputSimd128Register(),
3106 src1 = i.InputSimd128Register(1);
3108 Simd128Register scratch = temps.AcquireQ();
3109 DCHECK(dst == i.InputSimd128Register(0));
3110 // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
3111 __ vmov(scratch, src1);
3112 __ vtrn(Neon16, dst, scratch); // dst = [0, 8, 2, 10, ... 14]
3113 break;
3114 }
3115 case kArmS16x8TransposeRight: {
3116 Simd128Register dst = i.OutputSimd128Register(),
3117 src1 = i.InputSimd128Register(1);
3119 Simd128Register scratch = temps.AcquireQ();
3120 DCHECK(dst == i.InputSimd128Register(0));
3121 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
3122 __ vmov(scratch, src1);
3123 __ vtrn(Neon16, scratch, dst); // dst = [1, 9, 3, 11, ... 15]
3124 break;
3125 }
3126 case kArmS8x16ZipLeft: {
3127 Simd128Register dst = i.OutputSimd128Register(),
3128 src1 = i.InputSimd128Register(1);
3129 DCHECK(dst == i.InputSimd128Register(0));
3130 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
3131 __ vmov(dst.high(), src1.low());
3132 __ vzip(Neon8, dst.low(), dst.high()); // dst = [0, 16, 1, 17, ... 23]
3133 break;
3134 }
3135 case kArmS8x16ZipRight: {
3136 Simd128Register dst = i.OutputSimd128Register(),
3137 src1 = i.InputSimd128Register(1);
3138 DCHECK(dst == i.InputSimd128Register(0));
3139 // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
3140 __ vmov(dst.low(), src1.high());
3141 __ vzip(Neon8, dst.low(), dst.high()); // dst = [8, 24, 9, 25, ... 31]
3142 break;
3143 }
3144 case kArmS8x16UnzipLeft: {
3145 Simd128Register dst = i.OutputSimd128Register(),
3146 src1 = i.InputSimd128Register(1);
3148 Simd128Register scratch = temps.AcquireQ();
3149 DCHECK(dst == i.InputSimd128Register(0));
3150 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
3151 __ vmov(scratch, src1);
3152 __ vuzp(Neon8, dst, scratch); // dst = [0, 2, 4, 6, ... 30]
3153 break;
3154 }
3155 case kArmS8x16UnzipRight: {
3156 Simd128Register dst = i.OutputSimd128Register(),
3157 src1 = i.InputSimd128Register(1);
3159 Simd128Register scratch = temps.AcquireQ();
3160 DCHECK(dst == i.InputSimd128Register(0));
3161 // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
3162 __ vmov(scratch, src1);
3163 __ vuzp(Neon8, scratch, dst); // dst = [1, 3, 5, 7, ... 31]
3164 break;
3165 }
3166 case kArmS8x16TransposeLeft: {
3167 Simd128Register dst = i.OutputSimd128Register(),
3168 src1 = i.InputSimd128Register(1);
3170 Simd128Register scratch = temps.AcquireQ();
3171 DCHECK(dst == i.InputSimd128Register(0));
3172 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
3173 __ vmov(scratch, src1);
3174 __ vtrn(Neon8, dst, scratch); // dst = [0, 16, 2, 18, ... 30]
3175 break;
3176 }
3177 case kArmS8x16TransposeRight: {
3178 Simd128Register dst = i.OutputSimd128Register(),
3179 src1 = i.InputSimd128Register(1);
3181 Simd128Register scratch = temps.AcquireQ();
3182 DCHECK(dst == i.InputSimd128Register(0));
3183 // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
3184 __ vmov(scratch, src1);
3185 __ vtrn(Neon8, scratch, dst); // dst = [1, 17, 3, 19, ... 31]
3186 break;
3187 }
3188 case kArmS8x16Concat: {
3189 __ vext(i.OutputSimd128Register(), i.InputSimd128Register(0),
3190 i.InputSimd128Register(1), i.InputInt4(2));
3191 break;
3192 }
3193 case kArmI8x16Swizzle: {
3194 Simd128Register dst = i.OutputSimd128Register(),
3195 tbl = i.InputSimd128Register(0),
3196 src = i.InputSimd128Register(1);
3197 NeonListOperand table(tbl);
3198 __ vtbl(dst.low(), table, src.low());
3199 __ vtbl(dst.high(), table, src.high());
3200 break;
3201 }
3202 case kArmI8x16Shuffle: {
3203 Simd128Register dst = i.OutputSimd128Register(),
3204 src0 = i.InputSimd128Register(0),
3205 src1 = i.InputSimd128Register(1);
3206 DwVfpRegister table_base = src0.low();
3208 Simd128Register scratch = temps.AcquireQ();
3209 // If unary shuffle, table is src0 (2 d-registers), otherwise src0 and
3210 // src1. They must be consecutive.
3211 int table_size = src0 == src1 ? 2 : 4;
3212 DCHECK_IMPLIES(src0 != src1, src0.code() + 1 == src1.code());
3213 // The shuffle lane mask is a byte mask, materialize in scratch.
3214 int scratch_s_base = scratch.code() * 4;
3215 for (int j = 0; j < 4; j++) {
3216 uint32_t four_lanes = i.InputUint32(2 + j);
3217 DCHECK_EQ(0, four_lanes & (table_size == 2 ? 0xF0F0F0F0 : 0xE0E0E0E0));
3218 __ vmov(SwVfpRegister::from_code(scratch_s_base + j),
3219 Float32::FromBits(four_lanes));
3220 }
3221 NeonListOperand table(table_base, table_size);
3222 if (dst != src0 && dst != src1) {
3223 __ vtbl(dst.low(), table, scratch.low());
3224 __ vtbl(dst.high(), table, scratch.high());
3225 } else {
3226 __ vtbl(scratch.low(), table, scratch.low());
3227 __ vtbl(scratch.high(), table, scratch.high());
3228 __ vmov(dst, scratch);
3229 }
3230 break;
3231 }
3232 case kArmS32x2Reverse: {
3233 __ vrev64(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0));
3234 break;
3235 }
3236 case kArmS16x4Reverse: {
3237 __ vrev64(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
3238 break;
3239 }
3240 case kArmS16x2Reverse: {
3241 __ vrev32(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
3242 break;
3243 }
3244 case kArmS8x8Reverse: {
3245 __ vrev64(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
3246 break;
3247 }
3248 case kArmS8x4Reverse: {
3249 __ vrev32(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
3250 break;
3251 }
3252 case kArmS8x2Reverse: {
3253 __ vrev16(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
3254 break;
3255 }
3256 case kArmV128AnyTrue: {
3257 const QwNeonRegister& src = i.InputSimd128Register(0);
3259 DwVfpRegister scratch = temps.AcquireD();
3260 __ vpmax(NeonU32, scratch, src.low(), src.high());
3261 __ vpmax(NeonU32, scratch, scratch, scratch);
3262 __ ExtractLane(i.OutputRegister(), scratch, NeonS32, 0);
3263 __ cmp(i.OutputRegister(), Operand(0));
3264 __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne);
3265 break;
3266 }
3267 case kArmI64x2AllTrue: {
3268 __ I64x2AllTrue(i.OutputRegister(), i.InputSimd128Register(0));
3269 break;
3270 }
3271 case kArmI32x4AllTrue: {
3272 const QwNeonRegister& src = i.InputSimd128Register(0);
3274 DwVfpRegister scratch = temps.AcquireD();
3275 __ vpmin(NeonU32, scratch, src.low(), src.high());
3276 __ vpmin(NeonU32, scratch, scratch, scratch);
3277 __ ExtractLane(i.OutputRegister(), scratch, NeonS32, 0);
3278 __ cmp(i.OutputRegister(), Operand(0));
3279 __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne);
3280 break;
3281 }
3282 case kArmI16x8AllTrue: {
3283 const QwNeonRegister& src = i.InputSimd128Register(0);
3285 DwVfpRegister scratch = temps.AcquireD();
3286 __ vpmin(NeonU16, scratch, src.low(), src.high());
3287 __ vpmin(NeonU16, scratch, scratch, scratch);
3288 __ vpmin(NeonU16, scratch, scratch, scratch);
3289 __ ExtractLane(i.OutputRegister(), scratch, NeonS16, 0);
3290 __ cmp(i.OutputRegister(), Operand(0));
3291 __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne);
3292 break;
3293 }
3294 case kArmI8x16AllTrue: {
3295 const QwNeonRegister& src = i.InputSimd128Register(0);
3297 DwVfpRegister scratch = temps.AcquireD();
3298 __ vpmin(NeonU8, scratch, src.low(), src.high());
3299 __ vpmin(NeonU8, scratch, scratch, scratch);
3300 __ vpmin(NeonU8, scratch, scratch, scratch);
3301 __ vpmin(NeonU8, scratch, scratch, scratch);
3302 __ ExtractLane(i.OutputRegister(), scratch, NeonS8, 0);
3303 __ cmp(i.OutputRegister(), Operand(0));
3304 __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne);
3305 break;
3306 }
3307 case kArmS128Load8Splat: {
3308 __ vld1r(Neon8, NeonListOperand(i.OutputSimd128Register()),
3309 i.NeonInputOperand(0));
3310 break;
3311 }
3312 case kArmS128Load16Splat: {
3313 __ vld1r(Neon16, NeonListOperand(i.OutputSimd128Register()),
3314 i.NeonInputOperand(0));
3315 break;
3316 }
3317 case kArmS128Load32Splat: {
3318 __ vld1r(Neon32, NeonListOperand(i.OutputSimd128Register()),
3319 i.NeonInputOperand(0));
3320 break;
3321 }
3322 case kArmS128Load64Splat: {
3323 Simd128Register dst = i.OutputSimd128Register();
3324 __ vld1(Neon32, NeonListOperand(dst.low()), i.NeonInputOperand(0));
3325 __ Move(dst.high(), dst.low());
3326 break;
3327 }
3328 case kArmS128Load8x8S: {
3329 Simd128Register dst = i.OutputSimd128Register();
3330 __ vld1(Neon8, NeonListOperand(dst.low()), i.NeonInputOperand(0));
3331 __ vmovl(NeonS8, dst, dst.low());
3332 break;
3333 }
3334 case kArmS128Load8x8U: {
3335 Simd128Register dst = i.OutputSimd128Register();
3336 __ vld1(Neon8, NeonListOperand(dst.low()), i.NeonInputOperand(0));
3337 __ vmovl(NeonU8, dst, dst.low());
3338 break;
3339 }
3340 case kArmS128Load16x4S: {
3341 Simd128Register dst = i.OutputSimd128Register();
3342 __ vld1(Neon16, NeonListOperand(dst.low()), i.NeonInputOperand(0));
3343 __ vmovl(NeonS16, dst, dst.low());
3344 break;
3345 }
3346 case kArmS128Load16x4U: {
3347 Simd128Register dst = i.OutputSimd128Register();
3348 __ vld1(Neon16, NeonListOperand(dst.low()), i.NeonInputOperand(0));
3349 __ vmovl(NeonU16, dst, dst.low());
3350 break;
3351 }
3352 case kArmS128Load32x2S: {
3353 Simd128Register dst = i.OutputSimd128Register();
3354 __ vld1(Neon32, NeonListOperand(dst.low()), i.NeonInputOperand(0));
3355 __ vmovl(NeonS32, dst, dst.low());
3356 break;
3357 }
3358 case kArmS128Load32x2U: {
3359 Simd128Register dst = i.OutputSimd128Register();
3360 __ vld1(Neon32, NeonListOperand(dst.low()), i.NeonInputOperand(0));
3361 __ vmovl(NeonU32, dst, dst.low());
3362 break;
3363 }
3364 case kArmS128Load32Zero: {
3365 Simd128Register dst = i.OutputSimd128Register();
3366 __ vmov(dst, 0);
3367 __ vld1s(Neon32, NeonListOperand(dst.low()), 0, i.NeonInputOperand(0));
3368 break;
3369 }
3370 case kArmS128Load64Zero: {
3371 Simd128Register dst = i.OutputSimd128Register();
3372 __ vmov(dst.high(), 0);
3373 __ vld1(Neon64, NeonListOperand(dst.low()), i.NeonInputOperand(0));
3374 break;
3375 }
3376 case kArmS128LoadLaneLow: {
3377 Simd128Register dst = i.OutputSimd128Register();
3378 DCHECK_EQ(dst, i.InputSimd128Register(0));
3379 auto sz = static_cast<NeonSize>(MiscField::decode(instr->opcode()));
3380 NeonListOperand dst_list = NeonListOperand(dst.low());
3381 __ LoadLane(sz, dst_list, i.InputUint8(1), i.NeonInputOperand(2));
3382 break;
3383 }
3384 case kArmS128LoadLaneHigh: {
3385 Simd128Register dst = i.OutputSimd128Register();
3386 DCHECK_EQ(dst, i.InputSimd128Register(0));
3387 auto sz = static_cast<NeonSize>(MiscField::decode(instr->opcode()));
3388 NeonListOperand dst_list = NeonListOperand(dst.high());
3389 __ LoadLane(sz, dst_list, i.InputUint8(1), i.NeonInputOperand(2));
3390 break;
3391 }
3392 case kArmS128StoreLaneLow: {
3393 Simd128Register src = i.InputSimd128Register(0);
3394 NeonListOperand src_list = NeonListOperand(src.low());
3395 auto sz = static_cast<NeonSize>(MiscField::decode(instr->opcode()));
3396 __ StoreLane(sz, src_list, i.InputUint8(1), i.NeonInputOperand(2));
3397 break;
3398 }
3399 case kArmS128StoreLaneHigh: {
3400 Simd128Register src = i.InputSimd128Register(0);
3401 NeonListOperand src_list = NeonListOperand(src.high());
3402 auto sz = static_cast<NeonSize>(MiscField::decode(instr->opcode()));
3403 __ StoreLane(sz, src_list, i.InputUint8(1), i.NeonInputOperand(2));
3404 break;
3405 }
3406 case kAtomicLoadInt8:
3408 break;
3409 case kAtomicLoadUint8:
3411 break;
3412 case kAtomicLoadInt16:
3414 break;
3415 case kAtomicLoadUint16:
3417 break;
3418 case kAtomicLoadWord32:
3420 break;
3421 case kAtomicStoreWord8:
3424 break;
3425 case kAtomicStoreWord16:
3428 break;
3429 case kAtomicStoreWord32:
3432 break;
3433 case kAtomicExchangeInt8:
3434 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexb, strexb);
3435 __ sxtb(i.OutputRegister(0), i.OutputRegister(0));
3436 break;
3437 case kAtomicExchangeUint8:
3438 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexb, strexb);
3439 break;
3440 case kAtomicExchangeInt16:
3441 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexh, strexh);
3442 __ sxth(i.OutputRegister(0), i.OutputRegister(0));
3443 break;
3444 case kAtomicExchangeUint16:
3445 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexh, strexh);
3446 break;
3447 case kAtomicExchangeWord32:
3449 break;
3450 case kAtomicCompareExchangeInt8:
3451 __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
3452 __ uxtb(i.TempRegister(2), i.InputRegister(2));
3454 i.TempRegister(2));
3455 __ sxtb(i.OutputRegister(0), i.OutputRegister(0));
3456 break;
3457 case kAtomicCompareExchangeUint8:
3458 __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
3459 __ uxtb(i.TempRegister(2), i.InputRegister(2));
3461 i.TempRegister(2));
3462 break;
3463 case kAtomicCompareExchangeInt16:
3464 __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
3465 __ uxth(i.TempRegister(2), i.InputRegister(2));
3467 i.TempRegister(2));
3468 __ sxth(i.OutputRegister(0), i.OutputRegister(0));
3469 break;
3470 case kAtomicCompareExchangeUint16:
3471 __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
3472 __ uxth(i.TempRegister(2), i.InputRegister(2));
3474 i.TempRegister(2));
3475 break;
3476 case kAtomicCompareExchangeWord32:
3477 __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
3479 i.InputRegister(2));
3480 break;
3481#define ATOMIC_BINOP_CASE(op, inst) \
3482 case kAtomic##op##Int8: \
3483 ASSEMBLE_ATOMIC_BINOP(ldrexb, strexb, inst); \
3484 __ sxtb(i.OutputRegister(0), i.OutputRegister(0)); \
3485 break; \
3486 case kAtomic##op##Uint8: \
3487 ASSEMBLE_ATOMIC_BINOP(ldrexb, strexb, inst); \
3488 break; \
3489 case kAtomic##op##Int16: \
3490 ASSEMBLE_ATOMIC_BINOP(ldrexh, strexh, inst); \
3491 __ sxth(i.OutputRegister(0), i.OutputRegister(0)); \
3492 break; \
3493 case kAtomic##op##Uint16: \
3494 ASSEMBLE_ATOMIC_BINOP(ldrexh, strexh, inst); \
3495 break; \
3496 case kAtomic##op##Word32: \
3497 ASSEMBLE_ATOMIC_BINOP(ldrex, strex, inst); \
3498 break;
3499 ATOMIC_BINOP_CASE(Add, add)
3500 ATOMIC_BINOP_CASE(Sub, sub)
3501 ATOMIC_BINOP_CASE(And, and_)
3502 ATOMIC_BINOP_CASE(Or, orr)
3503 ATOMIC_BINOP_CASE(Xor, eor)
3504#undef ATOMIC_BINOP_CASE
3505 case kArmWord32AtomicPairLoad: {
3506 if (instr->OutputCount() == 2) {
3507 DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr, r0, r1));
3508 __ add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));
3509 __ ldrexd(r0, r1, i.TempRegister(0));
3510 __ dmb(ISH);
3511 } else {
3512 // A special case of this instruction: even though this is a pair load,
3513 // we only need one of the two words. We emit a normal atomic load.
3514 DCHECK_EQ(instr->OutputCount(), 1);
3515 Register base = i.InputRegister(0);
3516 Register offset = i.InputRegister(1);
3517 DCHECK(instr->InputAt(2)->IsImmediate());
3518 int32_t offset_imm = i.InputInt32(2);
3519 if (offset_imm != 0) {
3520 Register temp = i.TempRegister(0);
3521 __ add(temp, offset, Operand(offset_imm));
3522 offset = temp;
3523 }
3524 __ ldr(i.OutputRegister(), MemOperand(base, offset));
3525 __ dmb(ISH);
3526 }
3527 break;
3528 }
3529 case kArmWord32AtomicPairStore: {
3530 Label store;
3531 Register base = i.InputRegister(0);
3532 Register offset = i.InputRegister(1);
3533 Register value_low = i.InputRegister(2);
3534 Register value_high = i.InputRegister(3);
3535 Register actual_addr = i.TempRegister(0);
3536 // The {ldrexd} instruction needs two temp registers. We do not need the
3537 // result of {ldrexd}, but {strexd} likely fails without the {ldrexd}.
3538 Register tmp1 = i.TempRegister(1);
3539 Register tmp2 = i.TempRegister(2);
3540 // Reuse one of the temp registers for the result of {strexd}.
3541 Register store_result = tmp1;
3542 __ add(actual_addr, base, offset);
3543 __ dmb(ISH);
3544 __ bind(&store);
3545 // Add this {ldrexd} instruction here so that {strexd} below can succeed.
3546 // We don't need the result of {ldrexd} itself.
3547 __ ldrexd(tmp1, tmp2, actual_addr);
3548 __ strexd(store_result, value_low, value_high, actual_addr);
3549 __ cmp(store_result, Operand(0));
3550 __ b(ne, &store);
3551 __ dmb(ISH);
3552 break;
3553 }
3554#define ATOMIC_ARITH_BINOP_CASE(op, instr1, instr2) \
3555 case kArmWord32AtomicPair##op: { \
3556 DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr, r2, r3)); \
3557 ASSEMBLE_ATOMIC64_ARITH_BINOP(instr1, instr2); \
3558 break; \
3559 }
3560 ATOMIC_ARITH_BINOP_CASE(Add, add, adc)
3561 ATOMIC_ARITH_BINOP_CASE(Sub, sub, sbc)
3562#undef ATOMIC_ARITH_BINOP_CASE
3563#define ATOMIC_LOGIC_BINOP_CASE(op, instr1) \
3564 case kArmWord32AtomicPair##op: { \
3565 DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr, r2, r3)); \
3566 ASSEMBLE_ATOMIC64_LOGIC_BINOP(instr1); \
3567 break; \
3568 }
3569 ATOMIC_LOGIC_BINOP_CASE(And, and_)
3571 ATOMIC_LOGIC_BINOP_CASE(Xor, eor)
3572#undef ATOMIC_LOGIC_BINOP_CASE
3573 case kArmWord32AtomicPairExchange: {
3574 DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr, r6, r7));
3575 Label exchange;
3576 __ add(i.TempRegister(0), i.InputRegister(2), i.InputRegister(3));
3577 __ dmb(ISH);
3578 __ bind(&exchange);
3579 __ ldrexd(r6, r7, i.TempRegister(0));
3580 __ strexd(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1),
3581 i.TempRegister(0));
3582 __ teq(i.TempRegister(1), Operand(0));
3583 __ b(ne, &exchange);
3584 __ dmb(ISH);
3585 break;
3586 }
3587 case kArmWord32AtomicPairCompareExchange: {
3588 DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr, r2, r3));
3589 __ add(i.TempRegister(0), i.InputRegister(4), i.InputRegister(5));
3590 Label compareExchange;
3591 Label exit;
3592 __ dmb(ISH);
3593 __ bind(&compareExchange);
3594 __ ldrexd(r2, r3, i.TempRegister(0));
3595 __ teq(i.InputRegister(0), Operand(r2));
3596 __ b(ne, &exit);
3597 __ teq(i.InputRegister(1), Operand(r3));
3598 __ b(ne, &exit);
3599 __ strexd(i.TempRegister(1), i.InputRegister(2), i.InputRegister(3),
3600 i.TempRegister(0));
3601 __ teq(i.TempRegister(1), Operand(0));
3602 __ b(ne, &compareExchange);
3603 __ bind(&exit);
3604 __ dmb(ISH);
3605 break;
3606 }
3607#undef ASSEMBLE_ATOMIC_LOAD_INTEGER
3608#undef ASSEMBLE_ATOMIC_STORE_INTEGER
3609#undef ASSEMBLE_ATOMIC_EXCHANGE_INTEGER
3610#undef ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER
3611#undef ASSEMBLE_ATOMIC_BINOP
3612#undef ASSEMBLE_ATOMIC64_ARITH_BINOP
3613#undef ASSEMBLE_ATOMIC64_LOGIC_BINOP
3614#undef ASSEMBLE_IEEE754_BINOP
3615#undef ASSEMBLE_IEEE754_UNOP
3616#undef ASSEMBLE_NEON_NARROWING_OP
3617#undef ASSEMBLE_SIMD_SHIFT_LEFT
3618#undef ASSEMBLE_SIMD_SHIFT_RIGHT
3619 }
3620 return kSuccess;
3621}
3622
3623// Assembles branches after an instruction.
3626 Label* tlabel = branch->true_label;
3627 Label* flabel = branch->false_label;
3629 __ b(cc, tlabel);
3630 if (!branch->fallthru) __ b(flabel); // no fallthru to flabel.
3631}
3632
3637
3642
3643#if V8_ENABLE_WEBASSEMBLY
3644void CodeGenerator::AssembleArchTrap(Instruction* instr,
3646 class OutOfLineTrap final : public OutOfLineCode {
3647 public:
3648 OutOfLineTrap(CodeGenerator* gen, Instruction* instr)
3649 : OutOfLineCode(gen), instr_(instr), gen_(gen) {}
3650
3651 void Generate() final {
3652 ArmOperandConverter i(gen_, instr_);
3653 TrapId trap_id =
3654 static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
3655 GenerateCallToTrap(trap_id);
3656 }
3657
3658 private:
3659 void GenerateCallToTrap(TrapId trap_id) {
3660 gen_->AssembleSourcePosition(instr_);
3661 // A direct call to a wasm runtime stub defined in this module.
3662 // Just encode the stub index. This will be patched when the code
3663 // is added to the native module and copied into wasm code space.
3664 __ Call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
3665 ReferenceMap* reference_map =
3666 gen_->zone()->New<ReferenceMap>(gen_->zone());
3667 gen_->RecordSafepoint(reference_map);
3668 if (v8_flags.debug_code) {
3669 __ stop();
3670 }
3671 }
3672
3673 Instruction* instr_;
3674 CodeGenerator* gen_;
3675 };
3676 auto ool = zone()->New<OutOfLineTrap>(this, instr);
3677 Label* tlabel = ool->entry();
3679 __ b(cc, tlabel);
3680}
3681#endif // V8_ENABLE_WEBASSEMBLY
3682
3683// Assembles boolean materializations after an instruction.
3687
3688 // Materialize a full 32-bit 1 or 0 value. The result register is always the
3689 // last output of the instruction.
3690 DCHECK_NE(0u, instr->OutputCount());
3691 Register reg = i.OutputRegister(instr->OutputCount() - 1);
3693 __ mov(reg, Operand(0));
3694 __ mov(reg, Operand(1), LeaveCC, cc);
3695}
3696
3700
3705
3708 Register input = i.InputRegister(0);
3709 std::vector<std::pair<int32_t, Label*>> cases;
3710 for (size_t index = 2; index < instr->InputCount(); index += 2) {
3711 cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
3712 }
3713 AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
3714 cases.data() + cases.size());
3715}
3716
3719 Register input = i.InputRegister(0);
3720 size_t const case_count = instr->InputCount() - 2;
3721 // This {cmp} might still emit a constant pool entry.
3722 __ cmp(input, Operand(case_count));
3723 // Ensure to emit the constant pool first if necessary.
3724 __ CheckConstPool(true, true);
3725 __ BlockConstPoolFor(case_count + 2);
3726 __ add(pc, pc, Operand(input, LSL, 2), LeaveCC, lo);
3727 __ b(GetLabel(i.InputRpo(1)));
3728 for (size_t index = 0; index < case_count; ++index) {
3729 __ b(GetLabel(i.InputRpo(index + 2)));
3730 }
3731}
3732
3737
3739 auto call_descriptor = linkage()->GetIncomingDescriptor();
3740
3741 const DoubleRegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3742 if (!saves_fp.is_empty()) {
3743 frame->AlignSavedCalleeRegisterSlots();
3744 }
3745
3746 if (!saves_fp.is_empty()) {
3747 // Save callee-saved FP registers.
3748 static_assert(DwVfpRegister::kNumRegisters == 32);
3749 uint32_t last = base::bits::CountLeadingZeros32(saves_fp.bits()) - 1;
3750 uint32_t first = base::bits::CountTrailingZeros32(saves_fp.bits());
3751 DCHECK_EQ((last - first + 1), saves_fp.Count());
3752 frame->AllocateSavedCalleeRegisterSlots((last - first + 1) *
3754 }
3755 const RegList saves = call_descriptor->CalleeSavedRegisters();
3756 if (!saves.is_empty()) {
3757 // Save callee-saved registers.
3758 frame->AllocateSavedCalleeRegisterSlots(saves.Count());
3759 }
3760}
3761
3763 auto call_descriptor = linkage()->GetIncomingDescriptor();
3764 if (frame_access_state()->has_frame()) {
3765 if (call_descriptor->IsCFunctionCall()) {
3766#if V8_ENABLE_WEBASSEMBLY
3767 if (info()->GetOutputStackFrameType() == StackFrame::C_WASM_ENTRY) {
3768 __ StubPrologue(StackFrame::C_WASM_ENTRY);
3769 // Reserve stack space for saving the c_entry_fp later.
3770 __ AllocateStackSpace(kSystemPointerSize);
3771#else
3772 // For balance.
3773 if (false) {
3774#endif // V8_ENABLE_WEBASSEMBLY
3775 } else {
3776 __ Push(lr, fp);
3777 __ mov(fp, sp);
3778 }
3779 } else if (call_descriptor->IsJSFunctionCall()) {
3780 __ Prologue();
3781 } else {
3782 __ StubPrologue(info()->GetOutputStackFrameType());
3783#if V8_ENABLE_WEBASSEMBLY
3784 if (call_descriptor->IsAnyWasmFunctionCall() ||
3785 call_descriptor->IsWasmImportWrapper() ||
3786 call_descriptor->IsWasmCapiFunction()) {
3787 // For import wrappers and C-API functions, this stack slot is only used
3788 // for printing stack traces in V8. Also, it holds a WasmImportData
3789 // instead of the trusted instance data, which is taken care of in the
3790 // frames accessors.
3792 }
3793 if (call_descriptor->IsWasmCapiFunction()) {
3794 // Reserve space for saving the PC later.
3795 __ AllocateStackSpace(kSystemPointerSize);
3796 }
3797#endif // V8_ENABLE_WEBASSEMBLY
3798 }
3799
3801 }
3802
3803 int required_slots =
3804 frame()->GetTotalFrameSlotCount() - frame()->GetFixedSlotCount();
3805
3806 if (info()->is_osr()) {
3807 // TurboFan OSR-compiled functions cannot be entered directly.
3808 __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
3809
3810 // Unoptimized code jumps directly to this entrypoint while the unoptimized
3811 // frame is still on the stack. Optimized code uses OSR values directly from
3812 // the unoptimized frame. Thus, all that needs to be done is to allocate the
3813 // remaining stack slots.
3814 __ RecordComment("-- OSR entrypoint --");
3816 required_slots -= osr_helper()->UnoptimizedFrameSlots();
3817 }
3818
3819 const RegList saves = call_descriptor->CalleeSavedRegisters();
3820 const DoubleRegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3821
3822 if (required_slots > 0) {
3823 DCHECK(frame_access_state()->has_frame());
3824#if V8_ENABLE_WEBASSEMBLY
3825 if (info()->IsWasm() && required_slots * kSystemPointerSize > 4 * KB) {
3826 // For WebAssembly functions with big frames we have to do the stack
3827 // overflow check before we construct the frame. Otherwise we may not
3828 // have enough space on the stack to call the runtime for the stack
3829 // overflow.
3830 Label done;
3831
3832 // If the frame is bigger than the stack, we throw the stack overflow
3833 // exception unconditionally. Thereby we can avoid the integer overflow
3834 // check in the condition code.
3835 if (required_slots * kSystemPointerSize < v8_flags.stack_size * KB) {
3837 Register stack_limit = temps.Acquire();
3838 __ LoadStackLimit(stack_limit, StackLimitKind::kRealStackLimit);
3839 __ add(stack_limit, stack_limit,
3840 Operand(required_slots * kSystemPointerSize));
3841 __ cmp(sp, stack_limit);
3842 __ b(cs, &done);
3843 }
3844
3845 if (v8_flags.experimental_wasm_growable_stacks) {
3848 regs_to_save.set(
3849 WasmHandleStackOverflowDescriptor::FrameBaseRegister());
3850 for (auto reg : wasm::kGpParamRegisters) regs_to_save.set(reg);
3851 __ stm(db_w, sp, regs_to_save);
3852 DoubleRegList fp_regs_to_save;
3853 for (auto reg : wasm::kFpParamRegisters) fp_regs_to_save.set(reg);
3854 __ vstm(db_w, sp, fp_regs_to_save.first(), fp_regs_to_save.last());
3856 Operand(required_slots * kSystemPointerSize));
3857 __ add(
3858 WasmHandleStackOverflowDescriptor::FrameBaseRegister(), fp,
3859 Operand(call_descriptor->ParameterSlotCount() * kSystemPointerSize +
3861 __ CallBuiltin(Builtin::kWasmHandleStackOverflow);
3862 __ vldm(ia_w, sp, fp_regs_to_save.first(), fp_regs_to_save.last());
3863 __ ldm(ia_w, sp, regs_to_save);
3864 } else {
3865 __ Call(static_cast<intptr_t>(Builtin::kWasmStackOverflow),
3867 // The call does not return, hence we can ignore any references and just
3868 // define an empty safepoint.
3869 ReferenceMap* reference_map = zone()->New<ReferenceMap>(zone());
3870 RecordSafepoint(reference_map);
3871 if (v8_flags.debug_code) __ stop();
3872 }
3873
3874 __ bind(&done);
3875 }
3876#endif // V8_ENABLE_WEBASSEMBLY
3877
3878 // Skip callee-saved and return slots, which are pushed below.
3879 required_slots -= saves.Count();
3880 required_slots -= frame()->GetReturnSlotCount();
3881 required_slots -= 2 * saves_fp.Count();
3882 if (required_slots > 0) {
3883 __ AllocateStackSpace(required_slots * kSystemPointerSize);
3884 }
3885 }
3886
3887 if (!saves_fp.is_empty()) {
3888 // Save callee-saved FP registers.
3889 static_assert(DwVfpRegister::kNumRegisters == 32);
3890 __ vstm(db_w, sp, saves_fp.first(), saves_fp.last());
3891 }
3892
3893 if (!saves.is_empty()) {
3894 // Save callee-saved registers.
3895 __ stm(db_w, sp, saves);
3896 }
3897
3898 const int returns = frame()->GetReturnSlotCount();
3899 // Create space for returns.
3900 __ AllocateStackSpace(returns * kSystemPointerSize);
3901
3902 if (!frame()->tagged_slots().IsEmpty()) {
3904 Register zero = temps.Acquire();
3905 __ mov(zero, Operand(0));
3906 for (int spill_slot : frame()->tagged_slots()) {
3908 DCHECK(offset.from_frame_pointer());
3909 __ str(zero, MemOperand(fp, offset.offset()));
3910 }
3911 }
3912}
3913
3915 auto call_descriptor = linkage()->GetIncomingDescriptor();
3916
3917 const int returns = frame()->GetReturnSlotCount();
3918 if (returns != 0) {
3919 // Free space of returns.
3920 __ add(sp, sp, Operand(returns * kSystemPointerSize));
3921 }
3922
3923 // Restore registers.
3924 const RegList saves = call_descriptor->CalleeSavedRegisters();
3925 if (!saves.is_empty()) {
3926 __ ldm(ia_w, sp, saves);
3927 }
3928
3929 // Restore FP registers.
3930 const DoubleRegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3931 if (!saves_fp.is_empty()) {
3932 static_assert(DwVfpRegister::kNumRegisters == 32);
3933 __ vldm(ia_w, sp, saves_fp.first(), saves_fp.last());
3934 }
3935
3937
3938 ArmOperandConverter g(this, nullptr);
3939 const int parameter_slots =
3940 static_cast<int>(call_descriptor->ParameterSlotCount());
3941
3942 // {additional_pop_count} is only greater than zero if {parameter_slots = 0}.
3943 // Check RawMachineAssembler::PopAndReturn.
3944 if (parameter_slots != 0) {
3945 if (additional_pop_count->IsImmediate()) {
3946 DCHECK_EQ(g.ToConstant(additional_pop_count).ToInt32(), 0);
3947 } else if (v8_flags.debug_code) {
3948 __ cmp(g.ToRegister(additional_pop_count), Operand(0));
3949 __ Assert(eq, AbortReason::kUnexpectedAdditionalPopValue);
3950 }
3951 }
3952
3953#if V8_ENABLE_WEBASSEMBLY
3954 if (call_descriptor->IsAnyWasmFunctionCall() &&
3955 v8_flags.experimental_wasm_growable_stacks) {
3956 {
3958 Register scratch = temps.Acquire();
3960 __ cmp(scratch,
3961 Operand(StackFrame::TypeToMarker(StackFrame::WASM_SEGMENT_START)));
3962 }
3963 Label done;
3964 __ b(&done, ne);
3967 __ stm(db_w, sp, regs_to_save);
3968 DoubleRegList fp_regs_to_save;
3969 for (auto reg : wasm::kFpParamRegisters) fp_regs_to_save.set(reg);
3970 __ vstm(db_w, sp, fp_regs_to_save.first(), fp_regs_to_save.last());
3972 __ PrepareCallCFunction(1);
3973 __ CallCFunction(ExternalReference::wasm_shrink_stack(), 1);
3974 // Restore old FP. We don't need to restore old SP explicitly, because
3975 // it will be restored from FP in LeaveFrame before return.
3976 __ mov(fp, kReturnRegister0);
3977 __ vldm(ia_w, sp, fp_regs_to_save.first(), fp_regs_to_save.last());
3978 __ ldm(ia_w, sp, regs_to_save);
3979 __ bind(&done);
3980 }
3981#endif // V8_ENABLE_WEBASSEMBLY
3982
3983 Register argc_reg = r3;
3984 // Functions with JS linkage have at least one parameter (the receiver).
3985 // If {parameter_slots} == 0, it means it is a builtin with
3986 // kDontAdaptArgumentsSentinel, which takes care of JS arguments popping
3987 // itself.
3988 const bool drop_jsargs = parameter_slots != 0 &&
3990 call_descriptor->IsJSFunctionCall();
3991 if (call_descriptor->IsCFunctionCall()) {
3993 } else if (frame_access_state()->has_frame()) {
3994 // Canonicalize JSFunction return sites for now unless they have an variable
3995 // number of stack slot pops.
3996 if (additional_pop_count->IsImmediate() &&
3997 g.ToConstant(additional_pop_count).ToInt32() == 0) {
3998 if (return_label_.is_bound()) {
3999 __ b(&return_label_);
4000 return;
4001 } else {
4002 __ bind(&return_label_);
4003 }
4004 }
4005 if (drop_jsargs) {
4006 // Get the actual argument count.
4008 DCHECK(!call_descriptor->CalleeSavedRegisters().has(argc_reg));
4009 }
4011 }
4012
4013 if (drop_jsargs) {
4014 // We must pop all arguments from the stack (including the receiver).
4015 // The number of arguments without the receiver is
4016 // max(argc_reg, parameter_slots-1), and the receiver is added in
4017 // DropArguments().
4018 DCHECK(!call_descriptor->CalleeSavedRegisters().has(argc_reg));
4019 if (parameter_slots > 1) {
4020 __ cmp(argc_reg, Operand(parameter_slots));
4021 __ mov(argc_reg, Operand(parameter_slots), LeaveCC, lt);
4022 }
4023 __ DropArguments(argc_reg);
4024 } else if (additional_pop_count->IsImmediate()) {
4025 DCHECK_EQ(Constant::kInt32, g.ToConstant(additional_pop_count).type());
4026 int additional_count = g.ToConstant(additional_pop_count).ToInt32();
4027 __ Drop(parameter_slots + additional_count);
4028 } else if (parameter_slots == 0) {
4029 __ Drop(g.ToRegister(additional_pop_count));
4030 } else {
4031 // {additional_pop_count} is guaranteed to be zero if {parameter_slots !=
4032 // 0}. Check RawMachineAssembler::PopAndReturn.
4033 __ Drop(parameter_slots);
4034 }
4035 __ Ret();
4036}
4037
4038void CodeGenerator::FinishCode() { __ CheckConstPool(true, false); }
4039
4042 __ CheckConstPool(true, false);
4043}
4044
4047 ArmOperandConverter g(this, nullptr);
4048 // Helper function to write the given constant to the dst register.
4049 auto MoveConstantToRegister = [&](Register dst, Constant src) {
4050 if (src.type() == Constant::kHeapObject) {
4051 Handle<HeapObject> src_object = src.ToHeapObject();
4053 if (IsMaterializableFromRoot(src_object, &index)) {
4054 __ LoadRoot(dst, index);
4055 } else {
4056 __ Move(dst, src_object);
4057 }
4058 } else if (src.type() == Constant::kExternalReference) {
4059 __ Move(dst, src.ToExternalReference());
4060 } else {
4061 __ mov(dst, g.ToImmediate(source));
4062 }
4063 };
4064 switch (MoveType::InferMove(source, destination)) {
4066 if (source->IsRegister()) {
4067 __ mov(g.ToRegister(destination), g.ToRegister(source));
4068 } else if (source->IsFloatRegister()) {
4069 DCHECK(destination->IsFloatRegister());
4070 // GapResolver may give us reg codes that don't map to actual
4071 // s-registers. Generate code to work around those cases.
4072 int src_code = LocationOperand::cast(source)->register_code();
4074 __ VmovExtended(dst_code, src_code);
4075 } else if (source->IsDoubleRegister()) {
4076 __ Move(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
4077 } else {
4079 }
4080 return;
4083 if (source->IsRegister()) {
4084 __ str(g.ToRegister(source), dst);
4085 } else if (source->IsFloatRegister()) {
4086 // GapResolver may give us reg codes that don't map to actual
4087 // s-registers. Generate code to work around those cases.
4088 int src_code = LocationOperand::cast(source)->register_code();
4089 __ VmovExtended(dst, src_code);
4090 } else if (source->IsDoubleRegister()) {
4091 __ vstr(g.ToDoubleRegister(source), dst);
4092 } else {
4094 Register temp = temps.Acquire();
4095 QwNeonRegister src = g.ToSimd128Register(source);
4096 __ add(temp, dst.rn(), Operand(dst.offset()));
4097 __ vst1(Neon8, NeonListOperand(src.low(), 2), NeonMemOperand(temp));
4098 }
4099 return;
4100 }
4102 MemOperand src = g.ToMemOperand(source);
4103 if (source->IsStackSlot()) {
4104 __ ldr(g.ToRegister(destination), src);
4105 } else if (source->IsFloatStackSlot()) {
4106 DCHECK(destination->IsFloatRegister());
4107 // GapResolver may give us reg codes that don't map to actual
4108 // s-registers. Generate code to work around those cases.
4110 __ VmovExtended(dst_code, src);
4111 } else if (source->IsDoubleStackSlot()) {
4112 __ vldr(g.ToDoubleRegister(destination), src);
4113 } else {
4115 Register temp = temps.Acquire();
4117 __ add(temp, src.rn(), Operand(src.offset()));
4118 __ vld1(Neon8, NeonListOperand(dst.low(), 2), NeonMemOperand(temp));
4119 }
4120 return;
4121 }
4123 MemOperand src = g.ToMemOperand(source);
4126 if (source->IsStackSlot() || source->IsFloatStackSlot()) {
4127 SwVfpRegister temp = temps.AcquireS();
4128 __ vldr(temp, src);
4129 __ vstr(temp, dst);
4130 } else if (source->IsDoubleStackSlot()) {
4131 DwVfpRegister temp = temps.AcquireD();
4132 __ vldr(temp, src);
4133 __ vstr(temp, dst);
4134 } else {
4135 DCHECK(source->IsSimd128StackSlot());
4136 Register temp = temps.Acquire();
4137 QwNeonRegister temp_q = temps.AcquireQ();
4138 __ add(temp, src.rn(), Operand(src.offset()));
4139 __ vld1(Neon8, NeonListOperand(temp_q.low(), 2), NeonMemOperand(temp));
4140 __ add(temp, dst.rn(), Operand(dst.offset()));
4141 __ vst1(Neon8, NeonListOperand(temp_q.low(), 2), NeonMemOperand(temp));
4142 }
4143 return;
4144 }
4146 Constant src = g.ToConstant(source);
4147 if (destination->IsRegister()) {
4148 MoveConstantToRegister(g.ToRegister(destination), src);
4149 } else if (destination->IsFloatRegister()) {
4151 Float32::FromBits(src.ToFloat32AsInt()));
4152 } else {
4153 // TODO(arm): Look into optimizing this further if possible. Supporting
4154 // the NEON version of VMOV may help.
4155 __ vmov(g.ToDoubleRegister(destination), src.ToFloat64());
4156 }
4157 return;
4158 }
4160 Constant src = g.ToConstant(source);
4162 if (destination->IsStackSlot()) {
4164 // Acquire a S register instead of a general purpose register in case
4165 // `vstr` needs one to compute the address of `dst`.
4166 SwVfpRegister s_temp = temps.AcquireS();
4167 {
4168 // TODO(arm): This sequence could be optimized further if necessary by
4169 // writing the constant directly into `s_temp`.
4171 Register temp = temps.Acquire();
4172 MoveConstantToRegister(temp, src);
4173 __ vmov(s_temp, temp);
4174 }
4175 __ vstr(s_temp, dst);
4176 } else if (destination->IsFloatStackSlot()) {
4178 SwVfpRegister temp = temps.AcquireS();
4179 __ vmov(temp, Float32::FromBits(src.ToFloat32AsInt()));
4180 __ vstr(temp, dst);
4181 } else {
4182 DCHECK(destination->IsDoubleStackSlot());
4184 DwVfpRegister temp = temps.AcquireD();
4185 // TODO(arm): Look into optimizing this further if possible. Supporting
4186 // the NEON version of VMOV may help.
4187 __ vmov(temp, src.ToFloat64());
4188 __ vstr(temp, g.ToMemOperand(destination));
4189 }
4190 return;
4191 }
4192 }
4193 UNREACHABLE();
4194}
4195
4197 auto rep = LocationOperand::cast(source)->representation();
4198 int new_slots = ElementSizeInPointers(rep);
4199 ArmOperandConverter g(this, nullptr);
4200 int last_frame_slot_id =
4201 frame_access_state_->frame()->GetTotalFrameSlotCount() - 1;
4202 int sp_delta = frame_access_state_->sp_delta();
4203 int slot_id = last_frame_slot_id + sp_delta + new_slots;
4204 AllocatedOperand stack_slot(LocationOperand::STACK_SLOT, rep, slot_id);
4205 if (source->IsRegister()) {
4206 __ push(g.ToRegister(source));
4207 frame_access_state()->IncreaseSPDelta(new_slots);
4208 } else if (source->IsStackSlot()) {
4210 Register scratch = temps.Acquire();
4211 __ ldr(scratch, g.ToMemOperand(source));
4212 __ push(scratch);
4213 frame_access_state()->IncreaseSPDelta(new_slots);
4214 } else {
4215 // No push instruction for this operand type. Bump the stack pointer and
4216 // assemble the move.
4217 __ sub(sp, sp, Operand(new_slots * kSystemPointerSize));
4218 frame_access_state()->IncreaseSPDelta(new_slots);
4219 AssembleMove(source, &stack_slot);
4220 }
4221 temp_slots_ += new_slots;
4222 return stack_slot;
4223}
4224
4226 int dropped_slots = ElementSizeInPointers(rep);
4227 ArmOperandConverter g(this, nullptr);
4228 if (dest->IsRegister()) {
4229 frame_access_state()->IncreaseSPDelta(-dropped_slots);
4230 __ pop(g.ToRegister(dest));
4231 } else if (dest->IsStackSlot()) {
4232 frame_access_state()->IncreaseSPDelta(-dropped_slots);
4234 Register scratch = temps.Acquire();
4235 __ pop(scratch);
4236 __ str(scratch, g.ToMemOperand(dest));
4237 } else {
4238 int last_frame_slot_id =
4239 frame_access_state_->frame()->GetTotalFrameSlotCount() - 1;
4240 int sp_delta = frame_access_state_->sp_delta();
4241 int slot_id = last_frame_slot_id + sp_delta;
4242 AllocatedOperand stack_slot(LocationOperand::STACK_SLOT, rep, slot_id);
4243 AssembleMove(&stack_slot, dest);
4244 frame_access_state()->IncreaseSPDelta(-dropped_slots);
4245 __ add(sp, sp, Operand(dropped_slots * kSystemPointerSize));
4246 }
4247 temp_slots_ -= dropped_slots;
4248}
4249
4257
4260 // Must be kept in sync with {MoveTempLocationTo}.
4261 move_cycle_.temps.emplace(masm());
4262 auto& temps = *move_cycle_.temps;
4263 // Temporarily exclude the reserved scratch registers while we pick a
4264 // location to resolve the cycle. Re-include them immediately afterwards so
4265 // that they are available to assemble the move.
4266 temps.Exclude(move_cycle_.scratch_v_reglist);
4267 int reg_code = -1;
4268 if ((!IsFloatingPoint(rep) || rep == MachineRepresentation::kFloat32) &&
4269 temps.CanAcquireS()) {
4270 reg_code = temps.AcquireS().code();
4271 } else if (rep == MachineRepresentation::kFloat64 && temps.CanAcquireD()) {
4272 reg_code = temps.AcquireD().code();
4273 } else if (rep == MachineRepresentation::kSimd128 && temps.CanAcquireQ()) {
4274 reg_code = temps.AcquireQ().code();
4275 }
4276 temps.Include(move_cycle_.scratch_v_reglist);
4277 if (reg_code != -1) {
4278 // A scratch register is available for this rep.
4279 move_cycle_.scratch_reg_code = reg_code;
4280 if (IsFloatingPoint(rep)) {
4281 AllocatedOperand scratch(LocationOperand::REGISTER, rep, reg_code);
4282 AssembleMove(source, &scratch);
4283 } else {
4286 ArmOperandConverter g(this, nullptr);
4287 if (source->IsStackSlot()) {
4288 __ vldr(g.ToFloatRegister(&scratch), g.ToMemOperand(source));
4289 } else {
4290 DCHECK(source->IsRegister());
4291 __ vmov(g.ToFloatRegister(&scratch), g.ToRegister(source));
4292 }
4293 }
4294 } else {
4295 // The scratch registers are blocked by pending moves. Use the stack
4296 // instead.
4297 Push(source);
4298 }
4299}
4300
4303 int scratch_reg_code = move_cycle_.scratch_reg_code;
4304 DCHECK(move_cycle_.temps.has_value());
4305 if (scratch_reg_code != -1) {
4306 if (IsFloatingPoint(rep)) {
4308 scratch_reg_code);
4309 AssembleMove(&scratch, dest);
4310 } else {
4313 scratch_reg_code);
4314 ArmOperandConverter g(this, nullptr);
4315 if (dest->IsStackSlot()) {
4316 __ vstr(g.ToFloatRegister(&scratch), g.ToMemOperand(dest));
4317 } else {
4318 DCHECK(dest->IsRegister());
4319 __ vmov(g.ToRegister(dest), g.ToFloatRegister(&scratch));
4320 }
4321 }
4322 } else {
4323 Pop(dest, rep);
4324 }
4325 // Restore the default state to release the {UseScratchRegisterScope} and to
4326 // prepare for the next cycle.
4328}
4329
4331 InstructionOperand& source = move->source();
4332 InstructionOperand& destination = move->destination();
4333 MoveType::Type move_type =
4334 MoveType::InferMove(&move->source(), &move->destination());
4336 if (move_type == MoveType::kStackToStack) {
4337 if (source.IsStackSlot() || source.IsFloatStackSlot()) {
4338 SwVfpRegister temp = temps.AcquireS();
4340 } else if (source.IsDoubleStackSlot()) {
4341 DwVfpRegister temp = temps.AcquireD();
4343 } else {
4344 QwNeonRegister temp = temps.AcquireQ();
4346 }
4347 return;
4348 } else if (move_type == MoveType::kConstantToStack) {
4349 if (destination.IsStackSlot()) {
4350 // Acquire a S register instead of a general purpose register in case
4351 // `vstr` needs one to compute the address of `dst`.
4352 SwVfpRegister s_temp = temps.AcquireS();
4354 } else if (destination.IsFloatStackSlot()) {
4355 SwVfpRegister temp = temps.AcquireS();
4357 } else {
4358 DwVfpRegister temp = temps.AcquireD();
4360 }
4361 }
4362}
4363
4366 ArmOperandConverter g(this, nullptr);
4367 switch (MoveType::InferSwap(source, destination)) {
4369 if (source->IsRegister()) {
4370 __ Swap(g.ToRegister(source), g.ToRegister(destination));
4371 } else if (source->IsFloatRegister()) {
4372 DCHECK(destination->IsFloatRegister());
4373 // GapResolver may give us reg codes that don't map to actual
4374 // s-registers. Generate code to work around those cases.
4376 LowDwVfpRegister temp = temps.AcquireLowD();
4377 int src_code = LocationOperand::cast(source)->register_code();
4379 __ VmovExtended(temp.low().code(), src_code);
4380 __ VmovExtended(src_code, dst_code);
4381 __ VmovExtended(dst_code, temp.low().code());
4382 } else if (source->IsDoubleRegister()) {
4383 __ Swap(g.ToDoubleRegister(source), g.ToDoubleRegister(destination));
4384 } else {
4386 }
4387 return;
4390 if (source->IsRegister()) {
4391 Register src = g.ToRegister(source);
4393 SwVfpRegister temp = temps.AcquireS();
4394 __ vmov(temp, src);
4395 __ ldr(src, dst);
4396 __ vstr(temp, dst);
4397 } else if (source->IsFloatRegister()) {
4398 int src_code = LocationOperand::cast(source)->register_code();
4400 LowDwVfpRegister temp = temps.AcquireLowD();
4401 __ VmovExtended(temp.low().code(), src_code);
4402 __ VmovExtended(src_code, dst);
4403 __ vstr(temp.low(), dst);
4404 } else if (source->IsDoubleRegister()) {
4406 DwVfpRegister temp = temps.AcquireD();
4407 DwVfpRegister src = g.ToDoubleRegister(source);
4408 __ Move(temp, src);
4409 __ vldr(src, dst);
4410 __ vstr(temp, dst);
4411 } else {
4412 QwNeonRegister src = g.ToSimd128Register(source);
4414 Register temp = temps.Acquire();
4415 QwNeonRegister temp_q = temps.AcquireQ();
4416 __ Move(temp_q, src);
4417 __ add(temp, dst.rn(), Operand(dst.offset()));
4418 __ vld1(Neon8, NeonListOperand(src.low(), 2), NeonMemOperand(temp));
4419 __ vst1(Neon8, NeonListOperand(temp_q.low(), 2), NeonMemOperand(temp));
4420 }
4421 return;
4422 }
4424 MemOperand src = g.ToMemOperand(source);
4426 if (source->IsStackSlot() || source->IsFloatStackSlot()) {
4428 SwVfpRegister temp_0 = temps.AcquireS();
4429 SwVfpRegister temp_1 = temps.AcquireS();
4430 __ vldr(temp_0, dst);
4431 __ vldr(temp_1, src);
4432 __ vstr(temp_0, src);
4433 __ vstr(temp_1, dst);
4434 } else if (source->IsDoubleStackSlot()) {
4436 LowDwVfpRegister temp = temps.AcquireLowD();
4437 if (temps.CanAcquireD()) {
4438 DwVfpRegister temp_0 = temp;
4439 DwVfpRegister temp_1 = temps.AcquireD();
4440 __ vldr(temp_0, dst);
4441 __ vldr(temp_1, src);
4442 __ vstr(temp_0, src);
4443 __ vstr(temp_1, dst);
4444 } else {
4445 // We only have a single D register available. However, we can split
4446 // it into 2 S registers and swap the slots 32 bits at a time.
4447 MemOperand src0 = src;
4448 MemOperand dst0 = dst;
4449 MemOperand src1(src.rn(), src.offset() + kFloatSize);
4450 MemOperand dst1(dst.rn(), dst.offset() + kFloatSize);
4451 SwVfpRegister temp_0 = temp.low();
4452 SwVfpRegister temp_1 = temp.high();
4453 __ vldr(temp_0, dst0);
4454 __ vldr(temp_1, src0);
4455 __ vstr(temp_0, src0);
4456 __ vstr(temp_1, dst0);
4457 __ vldr(temp_0, dst1);
4458 __ vldr(temp_1, src1);
4459 __ vstr(temp_0, src1);
4460 __ vstr(temp_1, dst1);
4461 }
4462 } else {
4463 DCHECK(source->IsSimd128StackSlot());
4464 MemOperand src0 = src;
4465 MemOperand dst0 = dst;
4466 MemOperand src1(src.rn(), src.offset() + kDoubleSize);
4467 MemOperand dst1(dst.rn(), dst.offset() + kDoubleSize);
4469 DwVfpRegister temp_0 = temps.AcquireD();
4470 DwVfpRegister temp_1 = temps.AcquireD();
4471 __ vldr(temp_0, dst0);
4472 __ vldr(temp_1, src0);
4473 __ vstr(temp_0, src0);
4474 __ vstr(temp_1, dst0);
4475 __ vldr(temp_0, dst1);
4476 __ vldr(temp_1, src1);
4477 __ vstr(temp_0, src1);
4478 __ vstr(temp_1, dst1);
4479 }
4480 return;
4481 }
4482 default:
4483 UNREACHABLE();
4484 }
4485}
4486
4488 // On 32-bit ARM we emit the jump tables inline.
4489 UNREACHABLE();
4490}
4491
4492#undef __
4493
4494} // namespace compiler
4495} // namespace internal
4496} // namespace v8
friend Zone
Definition asm-types.cc:195
#define Assert(condition)
static constexpr T decode(U value)
Definition bit-field.h:66
static constexpr int kFixedSlotCountAboveFp
static constexpr int kFixedFrameSizeAboveFp
VfpRegList ToVfpRegList() const
static V8_EXPORT_PRIVATE ExternalReference isolate_address()
SwVfpRegister high() const
SwVfpRegister low() const
static constexpr MainThreadFlags kPointersToHereAreInterestingMask
static constexpr MainThreadFlags kPointersFromHereAreInterestingMask
static Operand EmbeddedNumber(double number)
static V8_INLINE Operand Zero()
DwVfpRegister low() const
VfpRegList ToVfpRegList() const
DwVfpRegister high() const
constexpr RegisterT first() const
constexpr void set(RegisterT reg)
constexpr RegisterT last() const
constexpr bool is_empty() const
constexpr unsigned Count() const
constexpr storage_t bits() const
static constexpr DwVfpRegister from_code(int8_t code)
constexpr int8_t code() const
static constexpr Tagged< Smi > FromInt(int value)
Definition smi.h:38
static constexpr int32_t TypeToMarker(Type type)
Definition frames.h:196
VfpRegList ToVfpRegList() const
static constexpr int kFrameTypeOffset
void push_back(const T &value)
T * New(Args &&... args)
Definition zone.h:114
NeonMemOperand NeonInputOperand(size_t first_index)
MemOperand InputOffset(size_t first_index=0)
Operand ToImmediate(InstructionOperand *operand) const
MemOperand ToMemOperand(InstructionOperand *op) const
MemOperand InputOffset(size_t *first_index)
ArmOperandConverter(CodeGenerator *gen, Instruction *instr)
static Type InferSwap(InstructionOperand *source, InstructionOperand *destination)
static Type InferMove(InstructionOperand *source, InstructionOperand *destination)
void MoveToTempLocation(InstructionOperand *src, MachineRepresentation rep) final
void AssembleTailCallAfterGap(Instruction *instr, int first_unused_stack_slot)
void AssembleReturn(InstructionOperand *pop)
void AssembleTailCallBeforeGap(Instruction *instr, int first_unused_stack_slot)
FrameAccessState * frame_access_state() const
CodeGenResult AssembleArchInstruction(Instruction *instr)
DeoptimizationExit * BuildTranslation(Instruction *instr, int pc_offset, size_t frame_state_offset, size_t immediate_args_count, OutputFrameStateCombine state_combine)
void AssembleArchBinarySearchSwitch(Instruction *instr)
void AssembleArchJumpRegardlessOfAssemblyOrder(RpoNumber target)
static void GetPushCompatibleMoves(Instruction *instr, PushTypeFlags push_type, ZoneVector< MoveOperands * > *pushes)
void AssembleArchBoolean(Instruction *instr, FlagsCondition condition)
void AssembleJumpTable(base::Vector< Label * > targets)
void AssembleArchBranch(Instruction *instr, BranchInfo *branch)
void AssembleMove(InstructionOperand *source, InstructionOperand *destination) final
void SetPendingMove(MoveOperands *move) final
bool ShouldApplyOffsetToStackCheck(Instruction *instr, uint32_t *offset)
void RecordSafepoint(ReferenceMap *references, int pc_offset=0)
void AssembleArchBinarySearchSwitchRange(Register input, RpoNumber def_block, std::pair< int32_t, Label * > *begin, std::pair< int32_t, Label * > *end)
void PrepareForDeoptimizationExits(ZoneDeque< DeoptimizationExit * > *exits)
void AssembleArchTableSwitch(Instruction *instr)
bool IsMaterializableFromRoot(Handle< HeapObject > object, RootIndex *index_return)
void AssembleArchConditionalBranch(Instruction *instr, BranchInfo *branch)
AllocatedOperand Push(InstructionOperand *src) final
void MoveTempLocationTo(InstructionOperand *dst, MachineRepresentation rep) final
void AssembleArchDeoptBranch(Instruction *instr, BranchInfo *branch)
void RecordCallPosition(Instruction *instr)
void AssembleSwap(InstructionOperand *source, InstructionOperand *destination) final
void AssembleArchConditionalBoolean(Instruction *instr)
void RecordDeoptInfo(Instruction *instr, int pc_offset)
OptimizedCompilationInfo * info() const
void AssembleArchSelect(Instruction *instr, FlagsCondition condition)
void Pop(InstructionOperand *src, MachineRepresentation rep) final
FrameOffset GetFrameOffset(int spill_slot) const
Definition frame.cc:61
DoubleRegister ToDoubleRegister(InstructionOperand *op)
FloatRegister ToFloatRegister(InstructionOperand *op)
Constant ToConstant(InstructionOperand *op) const
Simd128Register ToSimd128Register(InstructionOperand *op)
Register ToRegister(InstructionOperand *op) const
InstructionCode opcode() const
const InstructionOperand * InputAt(size_t i) const
CallDescriptor * GetIncomingDescriptor() const
Definition linkage.h:405
MachineRepresentation representation() const
static LocationOperand * cast(InstructionOperand *op)
static OutputFrameStateCombine Ignore()
T const right_
UnwindingInfoWriter *const unwinding_info_writer_
#define ATOMIC_BINOP_CASE(op, inst)
#define ASSEMBLE_ATOMIC_LOAD_INTEGER(asm_instr)
#define ASSEMBLE_NEON_NARROWING_OP(dt, sdt)
bool must_save_lr_
#define ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(load_instr, store_instr, cmp_reg)
Zone * zone_
T const result_
#define ASSEMBLE_IEEE754_UNOP(name)
Register const object_
#define ATOMIC_LOGIC_BINOP_CASE(op, instr1)
#define ASSEMBLE_SIMD_SHIFT_LEFT(asm_imm, width, sz, dt)
Operand const offset_
#define ASSEMBLE_IEEE754_BINOP(name)
#define ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(load_instr, store_instr)
T const left_
Register const value_
#define ATOMIC_ARITH_BINOP_CASE(op, instr1, instr2)
#define ASSEMBLE_F64X2_ARITHMETIC_BINOP(op)
#define ASSEMBLE_ATOMIC_STORE_INTEGER(asm_instr, order)
RecordWriteMode const mode_
#define ASSEMBLE_SIMD_SHIFT_RIGHT(asm_imm, width, sz, dt)
#define S_FROM_Q(reg, lane)
#define V8_JS_LINKAGE_INCLUDES_DISPATCH_HANDLE_BOOL
Definition globals.h:161
int32_t offset
Instruction * instr
ZoneVector< RpoNumber > & result
LiftoffRegister reg
int pc_offset
LiftoffRegList regs_to_save
uint32_t const mask
SetIsolateDataSlots
InstructionOperand source
InstructionOperand destination
constexpr unsigned CountTrailingZeros32(uint32_t value)
Definition bits.h:161
constexpr unsigned CountLeadingZeros32(uint32_t value)
Definition bits.h:122
static Condition FlagsConditionToCondition(FlagsCondition condition)
constexpr Register kGpParamRegisters[]
constexpr DoubleRegister kFpParamRegisters[]
constexpr Register kGpReturnRegisters[]
constexpr Register kRootRegister
constexpr int kSimd128Size
Definition globals.h:706
constexpr NeonDataType NeonS16
constexpr NeonSize Neon32
constexpr BlockAddrMode ia_w
V8_EXPORT_PRIVATE constexpr int ElementSizeInPointers(MachineRepresentation rep)
constexpr NeonSize Neon8
constexpr ShiftOp LSR
constexpr BlockAddrMode db_w
constexpr NeonSize Neon64
constexpr NeonDataType NeonS8
constexpr ShiftOp ASR
constexpr int kFloatSize
Definition globals.h:406
constexpr ShiftOp LSL
too high values may cause the compiler to set high thresholds for inlining to as much as possible avoid inlined allocation of objects that cannot escape trace load stores from virtual maglev objects use TurboFan fast string builder analyze liveness of environment slots and zap dead values trace TurboFan load elimination emit data about basic block usage in builtins to this enable builtin reordering when run mksnapshot flag for emit warnings when applying builtin profile data verify register allocation in TurboFan randomly schedule instructions to stress dependency tracking enable store store elimination in TurboFan rewrite far to near simulate GC compiler thread race related to allow float parameters to be passed in simulator mode JS Wasm Run additional turbo_optimize_inlined_js_wasm_wrappers enable experimental feedback collection in generic lowering enable Turboshaft s WasmLoadElimination enable Turboshaft s low level load elimination for JS enable Turboshaft s escape analysis for string concatenation use enable Turbolev features that we want to ship in the not too far future trace individual Turboshaft reduction steps trace intermediate Turboshaft reduction steps invocation count threshold for early optimization Enables optimizations which favor memory size over execution speed Enables sampling allocation profiler with X as a sample interval min size of a semi the new space consists of two semi spaces max size of the Collect garbage after Collect garbage after keeps maps alive for< n > old space garbage collections print one detailed trace line in allocation gc speed threshold for starting incremental marking via a task in percent of available threshold for starting incremental marking immediately in percent of available Use a single schedule for determining a marking schedule between JS and C objects schedules the minor GC task with kUserVisible priority max worker number of concurrent for NumberOfWorkerThreads start background threads that allocate memory concurrent_array_buffer_sweeping use parallel threads to clear weak refs in the atomic pause trace progress of the incremental marking trace object counts and memory usage report a tick only when allocated zone memory changes by this amount TracingFlags::gc_stats store(v8::tracing::TracingCategoryObserver::ENABLED_BY_NATIVE)) DEFINE_GENERIC_IMPLICATION(trace_gc_object_stats
constexpr SBit LeaveCC
too high values may cause the compiler to set high thresholds for inlining to as much as possible avoid inlined allocation of objects that cannot escape trace load stores from virtual maglev objects use TurboFan fast string builder analyze liveness of environment slots and zap dead values trace TurboFan load elimination emit data about basic block usage in builtins to this enable builtin reordering when run mksnapshot flag for emit warnings when applying builtin profile data verify register allocation in TurboFan randomly schedule instructions to stress dependency tracking enable store store elimination in TurboFan rewrite far to near simulate GC compiler thread race related to allow float parameters to be passed in simulator mode JS Wasm Run additional turbo_optimize_inlined_js_wasm_wrappers enable experimental feedback collection in generic lowering enable Turboshaft s WasmLoadElimination enable Turboshaft s low level load elimination for JS enable Turboshaft s escape analysis for string concatenation use enable Turbolev features that we want to ship in the not too far future trace individual Turboshaft reduction steps trace intermediate Turboshaft reduction steps invocation count threshold for early optimization Enables optimizations which favor memory size over execution speed Enables sampling allocation profiler with X as a sample interval min size of a semi the new space consists of two semi spaces max size of the Collect garbage after Collect garbage after keeps maps alive for< n > old space garbage collections print one detailed trace line in allocation gc speed threshold for starting incremental marking via a task in percent of available threshold for starting incremental marking immediately in percent of available Use a single schedule for determining a marking schedule between JS and C objects schedules the minor GC task with kUserVisible priority max worker number of concurrent for NumberOfWorkerThreads start background threads that allocate memory concurrent_array_buffer_sweeping use parallel threads to clear weak refs in the atomic pause trace progress of the incremental marking trace object counts and memory usage report a tick only when allocated zone memory changes by this amount TracingFlags::gc_stats TracingFlags::gc_stats track native contexts that are expected to be garbage collected verify heap pointers before and after GC memory reducer runs GC with ReduceMemoryFootprint flag Maximum number of memory reducer GCs scheduled Old gen GC speed is computed directly from gc tracer counters Perform compaction on full GCs based on V8 s default heuristics Perform compaction on every full GC Perform code space compaction when finalizing a full GC with stack Stress GC compaction to flush out bugs with moving objects flush of baseline code when it has not been executed recently Use time base code flushing instead of age Use a progress bar to scan large objects in increments when incremental marking is active force incremental marking for small heaps and run it more often force marking at random points between and force scavenge at random points between and reclaim otherwise unreachable unmodified wrapper objects when possible less compaction in non memory reducing mode use high priority threads for concurrent Marking Test mode only flag It allows an unit test to select evacuation candidates use incremental marking for CppHeap cppheap_concurrent_marking c value for membalancer A special constant to balance between memory and space tradeoff The smaller the more memory it uses enable use of SSE4 instructions if available enable use of AVX VNNI instructions if available enable use of POPCNT instruction if available force all emitted branches to be in long mode(MIPS/PPC only)") DEFINE_BOOL(partial_constant_pool
constexpr BlockAddrMode ia
MemOperand FieldMemOperand(Register object, int offset)
constexpr NeonSize Neon16
constexpr int kSystemPointerSize
Definition globals.h:410
constexpr bool IsFloatingPoint(MachineRepresentation rep)
constexpr NeonDataType NeonU8
constexpr Register kReturnRegister0
constexpr BarrierOption ISH
constexpr Register kWasmImplicitArgRegister
constexpr NeonDataType NeonU16
constexpr NeonDataType NeonS32
constexpr LowDwVfpRegister kDoubleRegZero
V8_EXPORT_PRIVATE FlagValues v8_flags
constexpr BarrierOption SY
constexpr Register kJavaScriptCallCodeStartRegister
constexpr NeonDataType NeonU32
constexpr SBit SetCC
constexpr ShiftOp ROR
return value
Definition map-inl.h:893
constexpr NeonDataType NeonU64
constexpr NeonDataType NeonS64
constexpr Register cp
constexpr Register kCArgRegs[]
constexpr int kDoubleSize
Definition globals.h:407
const uint32_t kClearedWeakHeapObjectLower32
Definition globals.h:981
static int FrameSlotToFPOffset(int slot)
BodyGen *const gen_
BodyGen * gen
ro::BitSet tagged_slots
#define CHECK(condition)
Definition logging.h:124
#define DCHECK_NOT_NULL(val)
Definition logging.h:492
#define DCHECK_IMPLIES(v1, v2)
Definition logging.h:493
#define DCHECK_NE(v1, v2)
Definition logging.h:486
#define DCHECK_GE(v1, v2)
Definition logging.h:488
#define DCHECK(condition)
Definition logging.h:482
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
constexpr bool IsAligned(T value, U alignment)
Definition macros.h:403
uint64_t make_uint64(uint32_t high, uint32_t low)
Definition macros.h:365
std::optional< UseScratchRegisterScope > temps