v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
code-generator-x64.cc
Go to the documentation of this file.
1// Copyright 2013 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <limits>
6#include <optional>
7
8#include "src/base/logging.h"
19#include "src/common/globals.h"
26#include "src/compiler/osr.h"
31#include "src/objects/smi.h"
32
33#if V8_ENABLE_WEBASSEMBLY
36#endif // V8_ENABLE_WEBASSEMBLY
37
38namespace v8::internal::compiler {
39
40#define __ masm()->
41
43 // TEST
44 kTest,
45 // CMP
46 kCmp,
47 // AND
48 kAnd,
49 // ADD, SUB
50 kAddSub,
51 // INC, DEC
52 kIncDec,
53 // Not valid as a first macro fusion instruction.
55};
56
58 // JA, JB and variants.
59 kAB,
60 // JE, JL, JG and variants.
61 kELG,
62 // Not a fusible jump.
64};
65
67 SecondMacroFusionInstKind second_kind) {
68 switch (first_kind) {
71 return true;
74 return second_kind == SecondMacroFusionInstKind::kAB ||
77 return second_kind == SecondMacroFusionInstKind::kELG;
79 return false;
80 }
81}
82
85 switch (condition) {
86 // JE,JZ
87 case kEqual:
88 // JNE,JNZ
89 case kNotEqual:
90 // JL,JNGE
91 case kSignedLessThan:
92 // JLE,JNG
94 // JG,JNLE
96 // JGE,JNL
99 // JB,JC
101 // JNA,JBE
103 // JA,JNBE
105 // JAE,JNC,JNB
108 default:
110 }
111}
112
114 FirstMacroFusionInstKind first_kind) {
115 if (!CpuFeatures::IsSupported(INTEL_JCC_ERRATUM_MITIGATION)) return false;
117 if (mode == kFlags_branch || mode == kFlags_deoptimize) {
120 return true;
121 }
122 }
123 return false;
124}
125
126// Adds X64 specific methods for decoding operands.
128 public:
131
132 Immediate InputImmediate(size_t index) {
133 return ToImmediate(instr_->InputAt(index));
134 }
135
136 Operand InputOperand(size_t index, int extra = 0) {
137 return ToOperand(instr_->InputAt(index), extra);
138 }
139
141
143 Constant constant = ToConstant(operand);
144 if (constant.type() == Constant::kCompressedHeapObject) {
147 RootIndex root_index;
148 CHECK(gen_->isolate()->roots_table().IsRootHandle(constant.ToHeapObject(),
149 &root_index));
150 return Immediate(
152 }
153 if (constant.type() == Constant::kFloat64) {
154 DCHECK_EQ(0, constant.ToFloat64().AsUint64());
155 return Immediate(0);
156 }
157 return Immediate(constant.ToInt32(), constant.rmode());
158 }
159
161 DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
162 return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
163 }
164
165 Operand SlotToOperand(int slot_index, int extra = 0) {
167 return Operand(offset.from_stack_pointer() ? rsp : rbp,
168 offset.offset() + extra);
169 }
170
171 static size_t NextOffset(size_t* offset) {
172 size_t i = *offset;
173 (*offset)++;
174 return i;
175 }
176
178 static_assert(0 == static_cast<int>(times_1));
179 static_assert(1 == static_cast<int>(times_2));
180 static_assert(2 == static_cast<int>(times_4));
181 static_assert(3 == static_cast<int>(times_8));
182 int scale = static_cast<int>(mode - one);
183 DCHECK(scale >= 0 && scale < 4);
184 return static_cast<ScaleFactor>(scale);
185 }
186
189 switch (mode) {
190 case kMode_MR: {
192 int32_t disp = 0;
193 return Operand(base, disp);
194 }
195 case kMode_MRI: {
197 int32_t disp = InputInt32(NextOffset(offset));
198 return Operand(base, disp);
199 }
200 case kMode_MR1:
201 case kMode_MR2:
202 case kMode_MR4:
203 case kMode_MR8: {
206 ScaleFactor scale = ScaleFor(kMode_MR1, mode);
207 int32_t disp = 0;
208 return Operand(base, index, scale, disp);
209 }
210 case kMode_MR1I:
211 case kMode_MR2I:
212 case kMode_MR4I:
213 case kMode_MR8I: {
216 ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
217 int32_t disp = InputInt32(NextOffset(offset));
218 return Operand(base, index, scale, disp);
219 }
220 case kMode_M1: {
222 int32_t disp = 0;
223 return Operand(base, disp);
224 }
225 case kMode_M2:
226 UNREACHABLE(); // Should use kModeMR with more compact encoding instead
227 case kMode_M4:
228 case kMode_M8: {
230 ScaleFactor scale = ScaleFor(kMode_M1, mode);
231 int32_t disp = 0;
232 return Operand(index, scale, disp);
233 }
234 case kMode_M1I:
235 case kMode_M2I:
236 case kMode_M4I:
237 case kMode_M8I: {
239 ScaleFactor scale = ScaleFor(kMode_M1I, mode);
240 int32_t disp = InputInt32(NextOffset(offset));
241 return Operand(index, scale, disp);
242 }
243 case kMode_Root: {
245 int32_t disp = InputInt32(NextOffset(offset));
246 return Operand(base, disp);
247 }
248 case kMode_MCR: {
251 ScaleFactor scale = static_cast<ScaleFactor>(0);
252 int32_t disp = 0;
253 return Operand(base, index, scale, disp);
254 }
255 case kMode_MCRI: {
258 ScaleFactor scale = static_cast<ScaleFactor>(0);
259 int32_t disp = InputInt32(NextOffset(offset));
260 return Operand(base, index, scale, disp);
261 }
262 case kMode_None:
263 UNREACHABLE();
264 }
265 UNREACHABLE();
266 }
267
268 Operand MemoryOperand(size_t first_input = 0) {
269 return MemoryOperand(&first_input);
270 }
271};
272
273namespace {
274
275bool HasAddressingMode(Instruction* instr) {
276 return instr->addressing_mode() != kMode_None;
277}
278
279bool HasImmediateInput(Instruction* instr, size_t index) {
280 return instr->InputAt(index)->IsImmediate();
281}
282
283bool HasRegisterInput(Instruction* instr, size_t index) {
284 return instr->InputAt(index)->IsRegister();
285}
286
287class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
288 public:
289 OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
290 : OutOfLineCode(gen), result_(result) {}
291
292 void Generate() final {
293 __ Xorps(result_, result_);
294 __ Divss(result_, result_);
295 }
296
297 private:
298 XMMRegister const result_;
299};
300
301class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
302 public:
303 OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
304 : OutOfLineCode(gen), result_(result) {}
305
306 void Generate() final {
307 __ Xorpd(result_, result_);
308 __ Divsd(result_, result_);
309 }
310
311 private:
312 XMMRegister const result_;
313};
314
315class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
316 public:
317 OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
318 XMMRegister input, StubCallMode stub_mode,
319 UnwindingInfoWriter* unwinding_info_writer)
320 : OutOfLineCode(gen),
322 input_(input),
323#if V8_ENABLE_WEBASSEMBLY
324 stub_mode_(stub_mode),
325#endif // V8_ENABLE_WEBASSEMBLY
326 unwinding_info_writer_(unwinding_info_writer),
327 isolate_(gen->isolate()),
328 zone_(gen->zone()) {
329 }
330
331 void Generate() final {
332 __ AllocateStackSpace(kDoubleSize);
333 unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
335 __ Movsd(MemOperand(rsp, 0), input_);
336#if V8_ENABLE_WEBASSEMBLY
337 if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
338 // A direct call to a builtin. Just encode the builtin index. This will be
339 // patched when the code is added to the native module and copied into
340 // wasm code space.
341 __ near_call(static_cast<intptr_t>(Builtin::kDoubleToI),
343#else
344 // For balance.
345 if (false) {
346#endif // V8_ENABLE_WEBASSEMBLY
347 } else {
348 // With embedded builtins we do not need the isolate here. This allows
349 // the call to be generated asynchronously.
350 __ CallBuiltin(Builtin::kDoubleToI);
351 }
352 __ movl(result_, MemOperand(rsp, 0));
353 __ addq(rsp, Immediate(kDoubleSize));
354 unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
355 -kDoubleSize);
356 }
357
358 private:
359 Register const result_;
360 XMMRegister const input_;
361#if V8_ENABLE_WEBASSEMBLY
362 StubCallMode stub_mode_;
363#endif // V8_ENABLE_WEBASSEMBLY
364 UnwindingInfoWriter* const unwinding_info_writer_;
365 Isolate* isolate_;
367};
368
369class OutOfLineRecordWrite final : public OutOfLineCode {
370 public:
371 OutOfLineRecordWrite(
372 CodeGenerator* gen, Register object, Operand operand, Register value,
373 Register scratch0, Register scratch1, RecordWriteMode mode,
374 StubCallMode stub_mode,
375 IndirectPointerTag indirect_pointer_tag = kIndirectPointerNullTag)
376 : OutOfLineCode(gen),
377 object_(object),
378 operand_(operand),
379 value_(value),
380 scratch0_(scratch0),
381 scratch1_(scratch1),
382 mode_(mode),
383#if V8_ENABLE_WEBASSEMBLY
384 stub_mode_(stub_mode),
385#endif // V8_ENABLE_WEBASSEMBLY
386 zone_(gen->zone()),
387 indirect_pointer_tag_(indirect_pointer_tag) {
388 DCHECK(!AreAliased(object, scratch0, scratch1));
389 DCHECK(!AreAliased(value, scratch0, scratch1));
390 }
391
392#if V8_ENABLE_STICKY_MARK_BITS_BOOL
393 Label* stub_call() { return &stub_call_; }
394#endif // V8_ENABLE_STICKY_MARK_BITS_BOOL
395
396 void Generate() final {
397 // When storing an indirect pointer, the value will always be a
398 // full/decompressed pointer.
400 mode_ != RecordWriteMode::kValueIsIndirectPointer) {
401 __ DecompressTagged(value_, value_);
402 }
403
404 // No need to check value page flags with the indirect pointer write barrier
405 // because the value is always an ExposedTrustedObject.
406 if (mode_ != RecordWriteMode::kValueIsIndirectPointer) {
407#if V8_ENABLE_STICKY_MARK_BITS_BOOL
408 // TODO(333906585): Optimize this path.
409 Label stub_call_with_decompressed_value;
410 __ CheckPageFlag(value_, scratch0_, MemoryChunk::kIsInReadOnlyHeapMask,
411 not_zero, exit());
412 __ CheckMarkBit(value_, scratch0_, scratch1_, carry, exit());
413 __ jmp(&stub_call_with_decompressed_value);
414
415 __ bind(&stub_call_);
417 mode_ != RecordWriteMode::kValueIsIndirectPointer) {
418 __ DecompressTagged(value_, value_);
419 }
420
421 __ bind(&stub_call_with_decompressed_value);
422#else // !V8_ENABLE_STICKY_MARK_BITS_BOOL
423 __ CheckPageFlag(value_, scratch0_,
424 MemoryChunk::kPointersToHereAreInterestingMask, zero,
425 exit());
426#endif // !V8_ENABLE_STICKY_MARK_BITS_BOOL
427 }
428
429 __ leaq(scratch1_, operand_);
430
431 SaveFPRegsMode const save_fp_mode = frame()->DidAllocateDoubleRegisters()
432 ? SaveFPRegsMode::kSave
433 : SaveFPRegsMode::kIgnore;
434
435 if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
436 __ CallEphemeronKeyBarrier(object_, scratch1_, save_fp_mode);
437 } else if (mode_ == RecordWriteMode::kValueIsIndirectPointer) {
438 // We must have a valid indirect pointer tag here. Otherwise, we risk not
439 // invoking the correct write barrier, which may lead to subtle issues.
441 __ CallIndirectPointerBarrier(object_, scratch1_, save_fp_mode,
443#if V8_ENABLE_WEBASSEMBLY
444 } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
445 // A direct call to a wasm runtime stub defined in this module.
446 // Just encode the stub index. This will be patched when the code
447 // is added to the native module and copied into wasm code space.
448 __ CallRecordWriteStubSaveRegisters(object_, scratch1_, save_fp_mode,
449 StubCallMode::kCallWasmRuntimeStub);
450#endif // V8_ENABLE_WEBASSEMBLY
451 } else {
452 __ CallRecordWriteStubSaveRegisters(object_, scratch1_, save_fp_mode);
453 }
454 }
455
456 private:
457 Register const object_;
458 Operand const operand_;
459 Register const value_;
460 Register const scratch0_;
461 Register const scratch1_;
462 RecordWriteMode const mode_;
463#if V8_ENABLE_WEBASSEMBLY
464 StubCallMode const stub_mode_;
465#endif // V8_ENABLE_WEBASSEMBLY
466 Zone* zone_;
467 IndirectPointerTag indirect_pointer_tag_;
468#if V8_ENABLE_STICKY_MARK_BITS_BOOL
469 Label stub_call_;
470#endif // V8_ENABLE_STICKY_MARK_BITS_BOOL
471};
472
473template <std::memory_order order>
474int EmitStore(MacroAssembler* masm, Operand operand, Register value,
476 int store_instr_offset;
477 if (order == std::memory_order_relaxed) {
478 store_instr_offset = masm->pc_offset();
479 switch (rep) {
481 masm->movb(operand, value);
482 break;
484 masm->movw(operand, value);
485 break;
487 masm->movl(operand, value);
488 break;
490 masm->movq(operand, value);
491 break;
493 masm->StoreTaggedField(operand, value);
494 break;
496 masm->StoreSandboxedPointerField(operand, value);
497 break;
499 masm->StoreIndirectPointerField(operand, value);
500 break;
501 default:
502 UNREACHABLE();
503 }
504 return store_instr_offset;
505 }
506
507 DCHECK_EQ(order, std::memory_order_seq_cst);
508 switch (rep) {
510 masm->movq(kScratchRegister, value);
511 store_instr_offset = masm->pc_offset();
512 masm->xchgb(kScratchRegister, operand);
513 break;
515 masm->movq(kScratchRegister, value);
516 store_instr_offset = masm->pc_offset();
517 masm->xchgw(kScratchRegister, operand);
518 break;
520 masm->movq(kScratchRegister, value);
521 store_instr_offset = masm->pc_offset();
522 masm->xchgl(kScratchRegister, operand);
523 break;
525 masm->movq(kScratchRegister, value);
526 store_instr_offset = masm->pc_offset();
527 masm->xchgq(kScratchRegister, operand);
528 break;
530 store_instr_offset = masm->pc_offset();
531 masm->AtomicStoreTaggedField(operand, value);
532 break;
533 default:
534 UNREACHABLE();
535 }
536 return store_instr_offset;
537}
538
539template <std::memory_order order>
540int EmitStore(MacroAssembler* masm, Operand operand, Immediate value,
542
543template <>
544int EmitStore<std::memory_order_relaxed>(MacroAssembler* masm, Operand operand,
545 Immediate value,
547 int store_instr_offset = masm->pc_offset();
548 switch (rep) {
550 masm->movb(operand, value);
551 break;
553 masm->movw(operand, value);
554 break;
556 masm->movl(operand, value);
557 break;
559 masm->movq(operand, value);
560 break;
562 masm->StoreTaggedField(operand, value);
563 break;
564 default:
565 UNREACHABLE();
566 }
567 return store_instr_offset;
568}
569
570#if V8_ENABLE_WEBASSEMBLY
571class WasmOutOfLineTrap : public OutOfLineCode {
572 public:
573 WasmOutOfLineTrap(CodeGenerator* gen, Instruction* instr)
574 : OutOfLineCode(gen), gen_(gen), instr_(instr) {}
575
576 void Generate() override {
577 X64OperandConverter i(gen_, instr_);
578 TrapId trap_id =
579 static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
580 GenerateWithTrapId(trap_id);
581 }
582
583 protected:
584 CodeGenerator* gen_;
585
586 void GenerateWithTrapId(TrapId trap_id) { GenerateCallToTrap(trap_id); }
587
588 private:
589 void GenerateCallToTrap(TrapId trap_id) {
590 gen_->AssembleSourcePosition(instr_);
591 // A direct call to a wasm runtime stub defined in this module.
592 // Just encode the stub index. This will be patched when the code
593 // is added to the native module and copied into wasm code space.
594 __ near_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
595 ReferenceMap* reference_map = gen_->zone()->New<ReferenceMap>(gen_->zone());
596 gen_->RecordSafepoint(reference_map);
597 __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
598 }
599
600 Instruction* instr_;
601};
602
603void RecordTrapInfoIfNeeded(Zone* zone, CodeGenerator* codegen,
604 InstructionCode opcode, Instruction* instr,
605 int pc) {
606 const MemoryAccessMode access_mode = instr->memory_access_mode();
607 if (access_mode == kMemoryAccessProtectedMemOutOfBounds ||
609 codegen->RecordProtectedInstruction(pc);
610 }
611}
612
613#else
614
615void RecordTrapInfoIfNeeded(Zone* zone, CodeGenerator* codegen,
616 InstructionCode opcode, Instruction* instr,
617 int pc) {
618 DCHECK_EQ(kMemoryAccessDirect, instr->memory_access_mode());
619}
620
621#endif // V8_ENABLE_WEBASSEMBLY
622
623#ifdef V8_IS_TSAN
624void EmitMemoryProbeForTrapHandlerIfNeeded(MacroAssembler* masm,
625 Register scratch, Operand operand,
626 StubCallMode mode, int size) {
627#if V8_ENABLE_WEBASSEMBLY && V8_TRAP_HANDLER_SUPPORTED
628 // The wasm OOB trap handler needs to be able to look up the faulting
629 // instruction pointer to handle the SIGSEGV raised by an OOB access. It
630 // will not handle SIGSEGVs raised by the TSAN store helpers. Emit a
631 // redundant load here to give the trap handler a chance to handle any
632 // OOB SIGSEGVs.
634 mode == StubCallMode::kCallWasmRuntimeStub) {
635 switch (size) {
636 case kInt8Size:
637 masm->movb(scratch, operand);
638 break;
639 case kInt16Size:
640 masm->movw(scratch, operand);
641 break;
642 case kInt32Size:
643 masm->movl(scratch, operand);
644 break;
645 case kInt64Size:
646 masm->movq(scratch, operand);
647 break;
648 default:
649 UNREACHABLE();
650 }
651 }
652#endif
653}
654
655class OutOfLineTSANStore : public OutOfLineCode {
656 public:
657 OutOfLineTSANStore(CodeGenerator* gen, Operand operand, Register value,
658 Register scratch0, StubCallMode stub_mode, int size,
659 std::memory_order order)
660 : OutOfLineCode(gen),
661 operand_(operand),
662 value_(value),
663 scratch0_(scratch0),
664#if V8_ENABLE_WEBASSEMBLY
665 stub_mode_(stub_mode),
666#endif // V8_ENABLE_WEBASSEMBLY
667 size_(size),
668 memory_order_(order),
669 zone_(gen->zone()) {
670 DCHECK(!AreAliased(value, scratch0));
671 }
672
673 void Generate() final {
674 const SaveFPRegsMode save_fp_mode = frame()->DidAllocateDoubleRegisters()
675 ? SaveFPRegsMode::kSave
676 : SaveFPRegsMode::kIgnore;
677 __ leaq(scratch0_, operand_);
678
679#if V8_ENABLE_WEBASSEMBLY
680 if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
681 // A direct call to a wasm runtime stub defined in this module.
682 // Just encode the stub index. This will be patched when the code
683 // is added to the native module and copied into wasm code space.
684 masm()->CallTSANStoreStub(scratch0_, value_, save_fp_mode, size_,
685 StubCallMode::kCallWasmRuntimeStub,
686 memory_order_);
687 return;
688 }
689#endif // V8_ENABLE_WEBASSEMBLY
690
691 masm()->CallTSANStoreStub(scratch0_, value_, save_fp_mode, size_,
692 StubCallMode::kCallBuiltinPointer, memory_order_);
693 }
694
695 private:
696 Operand const operand_;
697 Register const value_;
698 Register const scratch0_;
699#if V8_ENABLE_WEBASSEMBLY
700 StubCallMode const stub_mode_;
701#endif // V8_ENABLE_WEBASSEMBLY
702 int size_;
703 const std::memory_order memory_order_;
704 Zone* zone_;
705};
706
707void EmitTSANStoreOOL(Zone* zone, CodeGenerator* codegen, MacroAssembler* masm,
708 Operand operand, Register value_reg,
709 X64OperandConverter& i, StubCallMode mode, int size,
710 std::memory_order order) {
711 // The FOR_TESTING code doesn't initialize the root register. We can't call
712 // the TSAN builtin since we need to load the external reference through the
713 // root register.
714 // TODO(solanes, v8:7790, v8:11600): See if we can support the FOR_TESTING
715 // path. It is not crucial, but it would be nice to remove this restriction.
716 DCHECK_NE(codegen->code_kind(), CodeKind::FOR_TESTING);
717
718 Register scratch0 = i.TempRegister(0);
719 auto tsan_ool = zone->New<OutOfLineTSANStore>(codegen, operand, value_reg,
720 scratch0, mode, size, order);
721 masm->jmp(tsan_ool->entry());
722 masm->bind(tsan_ool->exit());
723}
724
725template <std::memory_order order>
726Register GetTSANValueRegister(MacroAssembler* masm, Register value,
727 X64OperandConverter& i,
730 // SandboxedPointers need to be encoded.
731 Register value_reg = i.TempRegister(1);
732 masm->movq(value_reg, value);
733 masm->EncodeSandboxedPointer(value_reg);
734 return value_reg;
735 } else if (rep == MachineRepresentation::kIndirectPointer) {
736 // Indirect pointer fields contain an index to a pointer table entry, which
737 // is obtained from the referenced object.
738 Register value_reg = i.TempRegister(1);
739 masm->movl(
740 value_reg,
741 FieldOperand(value, ExposedTrustedObject::kSelfIndirectPointerOffset));
742 return value_reg;
743 }
744 return value;
745}
746
747template <std::memory_order order>
748Register GetTSANValueRegister(MacroAssembler* masm, Immediate value,
749 X64OperandConverter& i,
751
752template <>
753Register GetTSANValueRegister<std::memory_order_relaxed>(
754 MacroAssembler* masm, Immediate value, X64OperandConverter& i,
756 Register value_reg = i.TempRegister(1);
757 masm->movq(value_reg, value);
759 // SandboxedPointers need to be encoded.
760 masm->EncodeSandboxedPointer(value_reg);
761 } else if (rep == MachineRepresentation::kIndirectPointer) {
762 // Indirect pointer fields contain an index to a pointer table entry, which
763 // is obtained from the referenced object.
764 masm->movl(value_reg,
765 FieldOperand(value_reg,
766 ExposedTrustedObject::kSelfIndirectPointerOffset));
767 }
768 return value_reg;
769}
770
771template <std::memory_order order, typename ValueT>
772void EmitTSANAwareStore(Zone* zone, CodeGenerator* codegen,
773 MacroAssembler* masm, Operand operand, ValueT value,
774 X64OperandConverter& i, StubCallMode stub_call_mode,
775 MachineRepresentation rep, Instruction* instr) {
776 // The FOR_TESTING code doesn't initialize the root register. We can't call
777 // the TSAN builtin since we need to load the external reference through the
778 // root register.
779 // TODO(solanes, v8:7790, v8:11600): See if we can support the FOR_TESTING
780 // path. It is not crucial, but it would be nice to remove this restriction.
781 if (codegen->code_kind() != CodeKind::FOR_TESTING) {
782 if (instr->HasMemoryAccessMode()) {
783 RecordTrapInfoIfNeeded(zone, codegen, instr->opcode(), instr,
784 masm->pc_offset());
785 }
786 int size = ElementSizeInBytes(rep);
787 EmitMemoryProbeForTrapHandlerIfNeeded(masm, i.TempRegister(0), operand,
788 stub_call_mode, size);
789 Register value_reg = GetTSANValueRegister<order>(masm, value, i, rep);
790 EmitTSANStoreOOL(zone, codegen, masm, operand, value_reg, i, stub_call_mode,
791 size, order);
792 } else {
793 int store_instr_offset = EmitStore<order>(masm, operand, value, rep);
794 if (instr->HasMemoryAccessMode()) {
795 RecordTrapInfoIfNeeded(zone, codegen, instr->opcode(), instr,
796 store_instr_offset);
797 }
798 }
799}
800
801class OutOfLineTSANRelaxedLoad final : public OutOfLineCode {
802 public:
803 OutOfLineTSANRelaxedLoad(CodeGenerator* gen, Operand operand,
804 Register scratch0, StubCallMode stub_mode, int size)
805 : OutOfLineCode(gen),
806 operand_(operand),
807 scratch0_(scratch0),
808#if V8_ENABLE_WEBASSEMBLY
809 stub_mode_(stub_mode),
810#endif // V8_ENABLE_WEBASSEMBLY
811 size_(size),
812 zone_(gen->zone()) {
813 }
814
815 void Generate() final {
816 const SaveFPRegsMode save_fp_mode = frame()->DidAllocateDoubleRegisters()
817 ? SaveFPRegsMode::kSave
818 : SaveFPRegsMode::kIgnore;
819 __ leaq(scratch0_, operand_);
820
821#if V8_ENABLE_WEBASSEMBLY
822 if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
823 // A direct call to a wasm runtime stub defined in this module.
824 // Just encode the stub index. This will be patched when the code
825 // is added to the native module and copied into wasm code space.
826 __ CallTSANRelaxedLoadStub(scratch0_, save_fp_mode, size_,
827 StubCallMode::kCallWasmRuntimeStub);
828 return;
829 }
830#endif // V8_ENABLE_WEBASSEMBLY
831
832 __ CallTSANRelaxedLoadStub(scratch0_, save_fp_mode, size_,
833 StubCallMode::kCallBuiltinPointer);
834 }
835
836 private:
837 Operand const operand_;
838 Register const scratch0_;
839#if V8_ENABLE_WEBASSEMBLY
840 StubCallMode const stub_mode_;
841#endif // V8_ENABLE_WEBASSEMBLY
842 int size_;
843 Zone* zone_;
844};
845
846void EmitTSANRelaxedLoadOOLIfNeeded(Zone* zone, CodeGenerator* codegen,
847 MacroAssembler* masm, Operand operand,
848 X64OperandConverter& i, StubCallMode mode,
849 int size) {
850 // The FOR_TESTING code doesn't initialize the root register. We can't call
851 // the TSAN builtin since we need to load the external reference through the
852 // root register.
853 // TODO(solanes, v8:7790, v8:11600): See if we can support the FOR_TESTING
854 // path. It is not crucial, but it would be nice to remove this if.
855 if (codegen->code_kind() == CodeKind::FOR_TESTING) return;
856
857 Register scratch0 = i.TempRegister(0);
858 auto tsan_ool = zone->New<OutOfLineTSANRelaxedLoad>(codegen, operand,
859 scratch0, mode, size);
860 masm->jmp(tsan_ool->entry());
861 masm->bind(tsan_ool->exit());
862}
863
864#else
865template <std::memory_order order, typename ValueT>
866void EmitTSANAwareStore(Zone* zone, CodeGenerator* codegen,
867 MacroAssembler* masm, Operand operand, ValueT value,
868 X64OperandConverter& i, StubCallMode stub_call_mode,
869 MachineRepresentation rep, Instruction* instr) {
870 DCHECK(order == std::memory_order_relaxed ||
871 order == std::memory_order_seq_cst);
872 int store_instr_off = EmitStore<order>(masm, operand, value, rep);
873 if (instr->HasMemoryAccessMode()) {
874 RecordTrapInfoIfNeeded(zone, codegen, instr->opcode(), instr,
875 store_instr_off);
876 }
877}
878
879void EmitTSANRelaxedLoadOOLIfNeeded(Zone* zone, CodeGenerator* codegen,
880 MacroAssembler* masm, Operand operand,
881 X64OperandConverter& i, StubCallMode mode,
882 int size) {}
883#endif // V8_IS_TSAN
884
885} // namespace
886
887#define ASSEMBLE_UNOP(asm_instr) \
888 do { \
889 if (instr->Output()->IsRegister()) { \
890 __ asm_instr(i.OutputRegister()); \
891 } else { \
892 __ asm_instr(i.OutputOperand()); \
893 } \
894 } while (false)
895
896#define ASSEMBLE_BINOP(asm_instr) \
897 do { \
898 if (HasAddressingMode(instr)) { \
899 size_t index = 1; \
900 Operand right = i.MemoryOperand(&index); \
901 __ asm_instr(i.InputRegister(0), right); \
902 } else { \
903 if (HasImmediateInput(instr, 1)) { \
904 if (HasRegisterInput(instr, 0)) { \
905 __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
906 } else { \
907 __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
908 } \
909 } else { \
910 if (HasRegisterInput(instr, 1)) { \
911 __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
912 } else { \
913 __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
914 } \
915 } \
916 } \
917 } while (false)
918
919#define ASSEMBLE_COMPARE(cmp_instr, test_instr) \
920 do { \
921 if (HasAddressingMode(instr)) { \
922 size_t index = 0; \
923 Operand left = i.MemoryOperand(&index); \
924 if (HasImmediateInput(instr, index)) { \
925 __ cmp_instr(left, i.InputImmediate(index)); \
926 } else { \
927 __ cmp_instr(left, i.InputRegister(index)); \
928 } \
929 } else { \
930 if (HasImmediateInput(instr, 1)) { \
931 Immediate right = i.InputImmediate(1); \
932 if (HasRegisterInput(instr, 0)) { \
933 if (right.value() == 0) { \
934 __ test_instr(i.InputRegister(0), i.InputRegister(0)); \
935 } else { \
936 __ cmp_instr(i.InputRegister(0), right); \
937 } \
938 } else { \
939 __ cmp_instr(i.InputOperand(0), right); \
940 } \
941 } else { \
942 if (HasRegisterInput(instr, 1)) { \
943 __ cmp_instr(i.InputRegister(0), i.InputRegister(1)); \
944 } else { \
945 __ cmp_instr(i.InputRegister(0), i.InputOperand(1)); \
946 } \
947 } \
948 } \
949 } while (false)
950
951#define ASSEMBLE_TEST(asm_instr) \
952 do { \
953 if (HasAddressingMode(instr)) { \
954 size_t index = 0; \
955 Operand left = i.MemoryOperand(&index); \
956 if (HasImmediateInput(instr, index)) { \
957 __ asm_instr(left, i.InputImmediate(index)); \
958 } else { \
959 __ asm_instr(left, i.InputRegister(index)); \
960 } \
961 } else { \
962 if (HasImmediateInput(instr, 1)) { \
963 if (HasRegisterInput(instr, 0)) { \
964 __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
965 } else { \
966 __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
967 } \
968 } else { \
969 if (HasRegisterInput(instr, 1)) { \
970 __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
971 } else { \
972 __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
973 } \
974 } \
975 } \
976 } while (false)
977
978#define ASSEMBLE_MULT(asm_instr) \
979 do { \
980 if (HasImmediateInput(instr, 1)) { \
981 if (HasRegisterInput(instr, 0)) { \
982 __ asm_instr(i.OutputRegister(), i.InputRegister(0), \
983 i.InputImmediate(1)); \
984 } else { \
985 __ asm_instr(i.OutputRegister(), i.InputOperand(0), \
986 i.InputImmediate(1)); \
987 } \
988 } else { \
989 if (HasRegisterInput(instr, 1)) { \
990 __ asm_instr(i.OutputRegister(), i.InputRegister(1)); \
991 } else { \
992 __ asm_instr(i.OutputRegister(), i.InputOperand(1)); \
993 } \
994 } \
995 } while (false)
996
997#define ASSEMBLE_SHIFT(asm_instr, width) \
998 do { \
999 if (HasImmediateInput(instr, 1)) { \
1000 if (instr->Output()->IsRegister()) { \
1001 __ asm_instr(i.OutputRegister(), Immediate(i.InputInt##width(1))); \
1002 } else { \
1003 __ asm_instr(i.OutputOperand(), Immediate(i.InputInt##width(1))); \
1004 } \
1005 } else { \
1006 if (instr->Output()->IsRegister()) { \
1007 __ asm_instr##_cl(i.OutputRegister()); \
1008 } else { \
1009 __ asm_instr##_cl(i.OutputOperand()); \
1010 } \
1011 } \
1012 } while (false)
1013
1014#define ASSEMBLE_MOVX(asm_instr) \
1015 do { \
1016 if (HasAddressingMode(instr)) { \
1017 __ asm_instr(i.OutputRegister(), i.MemoryOperand()); \
1018 } else if (HasRegisterInput(instr, 0)) { \
1019 __ asm_instr(i.OutputRegister(), i.InputRegister(0)); \
1020 } else { \
1021 __ asm_instr(i.OutputRegister(), i.InputOperand(0)); \
1022 } \
1023 } while (false)
1024
1025#define ASSEMBLE_SSE_BINOP(asm_instr) \
1026 do { \
1027 if (HasAddressingMode(instr)) { \
1028 size_t index = 1; \
1029 Operand right = i.MemoryOperand(&index); \
1030 __ asm_instr(i.InputDoubleRegister(0), right); \
1031 } else { \
1032 if (instr->InputAt(1)->IsFPRegister()) { \
1033 __ asm_instr(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); \
1034 } else { \
1035 __ asm_instr(i.InputDoubleRegister(0), i.InputOperand(1)); \
1036 } \
1037 } \
1038 } while (false)
1039
1040#define ASSEMBLE_SSE_UNOP(asm_instr) \
1041 do { \
1042 if (instr->InputAt(0)->IsFPRegister()) { \
1043 __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); \
1044 } else { \
1045 __ asm_instr(i.OutputDoubleRegister(), i.InputOperand(0)); \
1046 } \
1047 } while (false)
1048
1049#define ASSEMBLE_AVX_BINOP(asm_instr) \
1050 do { \
1051 CpuFeatureScope avx_scope(masm(), AVX); \
1052 if (HasAddressingMode(instr)) { \
1053 size_t index = 1; \
1054 Operand right = i.MemoryOperand(&index); \
1055 __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), right); \
1056 } else { \
1057 if (instr->InputAt(1)->IsFPRegister()) { \
1058 __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
1059 i.InputDoubleRegister(1)); \
1060 } else { \
1061 __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
1062 i.InputOperand(1)); \
1063 } \
1064 } \
1065 } while (false)
1066
1067#define ASSEMBLE_IEEE754_BINOP(name) \
1068 do { \
1069 __ PrepareCallCFunction(2); \
1070 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
1071 } while (false)
1072
1073#define ASSEMBLE_IEEE754_UNOP(name) \
1074 do { \
1075 __ PrepareCallCFunction(1); \
1076 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 1); \
1077 } while (false)
1078
1079#define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
1080 do { \
1081 Label binop; \
1082 __ bind(&binop); \
1083 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset()); \
1084 __ mov_inst(rax, i.MemoryOperand(1)); \
1085 __ movl(i.TempRegister(0), rax); \
1086 __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
1087 __ lock(); \
1088 __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
1089 __ j(not_equal, &binop); \
1090 } while (false)
1091
1092#define ASSEMBLE_ATOMIC64_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
1093 do { \
1094 Label binop; \
1095 __ bind(&binop); \
1096 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset()); \
1097 __ mov_inst(rax, i.MemoryOperand(1)); \
1098 __ movq(i.TempRegister(0), rax); \
1099 __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
1100 __ lock(); \
1101 __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
1102 __ j(not_equal, &binop); \
1103 } while (false)
1104
1105// Handles both SSE and AVX codegen. For SSE we use DefineSameAsFirst, so the
1106// dst and first src will be the same. For AVX we don't restrict it that way, so
1107// we will omit unnecessary moves.
1108#define ASSEMBLE_SIMD_BINOP(opcode) \
1109 do { \
1110 if (CpuFeatures::IsSupported(AVX)) { \
1111 CpuFeatureScope avx_scope(masm(), AVX); \
1112 __ v##opcode(i.OutputSimd128Register(), i.InputSimd128Register(0), \
1113 i.InputSimd128Register(1)); \
1114 } else { \
1115 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); \
1116 __ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1)); \
1117 } \
1118 } while (false)
1119
1120#define ASSEMBLE_SIMD_F16x8_BINOP(instr) \
1121 do { \
1122 CpuFeatureScope f16c_scope(masm(), F16C); \
1123 CpuFeatureScope avx_scope(masm(), AVX); \
1124 YMMRegister tmp1 = i.TempSimd256Register(0); \
1125 YMMRegister tmp2 = i.TempSimd256Register(1); \
1126 __ vcvtph2ps(tmp1, i.InputSimd128Register(0)); \
1127 __ vcvtph2ps(tmp2, i.InputSimd128Register(1)); \
1128 __ instr(tmp2, tmp1, tmp2); \
1129 __ vcvtps2ph(i.OutputSimd128Register(), tmp2, 0); \
1130 } while (false)
1131
1132#define ASSEMBLE_SIMD_F16x8_RELOP(instr) \
1133 do { \
1134 CpuFeatureScope f16c_scope(masm(), F16C); \
1135 CpuFeatureScope avx_scope(masm(), AVX); \
1136 YMMRegister tmp1 = i.TempSimd256Register(0); \
1137 YMMRegister tmp2 = i.TempSimd256Register(1); \
1138 __ vcvtph2ps(tmp1, i.InputSimd128Register(0)); \
1139 __ vcvtph2ps(tmp2, i.InputSimd128Register(1)); \
1140 __ instr(tmp2, tmp1, tmp2); \
1141 __ vpackssdw(i.OutputSimd128Register(), tmp2, tmp2); \
1142 } while (false)
1143
1144#define ASSEMBLE_SIMD256_BINOP(opcode, cpu_feature) \
1145 do { \
1146 CpuFeatureScope avx_scope(masm(), cpu_feature); \
1147 __ v##opcode(i.OutputSimd256Register(), i.InputSimd256Register(0), \
1148 i.InputSimd256Register(1)); \
1149 } while (false)
1150
1151#define ASSEMBLE_SIMD_INSTR(opcode, dst_operand, index) \
1152 do { \
1153 if (instr->InputAt(index)->IsSimd128Register()) { \
1154 __ opcode(dst_operand, i.InputSimd128Register(index)); \
1155 } else { \
1156 __ opcode(dst_operand, i.InputOperand(index)); \
1157 } \
1158 } while (false)
1159
1160#define ASSEMBLE_SIMD_IMM_INSTR(opcode, dst_operand, index, imm) \
1161 do { \
1162 if (instr->InputAt(index)->IsSimd128Register()) { \
1163 __ opcode(dst_operand, i.InputSimd128Register(index), imm); \
1164 } else { \
1165 __ opcode(dst_operand, i.InputOperand(index), imm); \
1166 } \
1167 } while (false)
1168
1169#define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode) \
1170 do { \
1171 XMMRegister dst = i.OutputSimd128Register(); \
1172 uint8_t input_index = instr->InputCount() == 2 ? 1 : 0; \
1173 if (CpuFeatures::IsSupported(AVX)) { \
1174 CpuFeatureScope avx_scope(masm(), AVX); \
1175 DCHECK(instr->InputAt(input_index)->IsSimd128Register()); \
1176 __ v##opcode(dst, i.InputSimd128Register(0), \
1177 i.InputSimd128Register(input_index)); \
1178 } else { \
1179 DCHECK_EQ(dst, i.InputSimd128Register(0)); \
1180 ASSEMBLE_SIMD_INSTR(opcode, dst, input_index); \
1181 } \
1182 } while (false)
1183
1184#define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, imm) \
1185 do { \
1186 XMMRegister dst = i.OutputSimd128Register(); \
1187 XMMRegister src = i.InputSimd128Register(0); \
1188 if (CpuFeatures::IsSupported(AVX)) { \
1189 CpuFeatureScope avx_scope(masm(), AVX); \
1190 DCHECK(instr->InputAt(1)->IsSimd128Register()); \
1191 __ v##opcode(dst, src, i.InputSimd128Register(1), imm); \
1192 } else { \
1193 DCHECK_EQ(dst, src); \
1194 if (instr->InputAt(1)->IsSimd128Register()) { \
1195 __ opcode(dst, i.InputSimd128Register(1), imm); \
1196 } else { \
1197 __ opcode(dst, i.InputOperand(1), imm); \
1198 } \
1199 } \
1200 } while (false)
1201
1202#define ASSEMBLE_SIMD_ALL_TRUE(opcode) \
1203 do { \
1204 Register dst = i.OutputRegister(); \
1205 __ xorq(dst, dst); \
1206 __ Pxor(kScratchDoubleReg, kScratchDoubleReg); \
1207 __ opcode(kScratchDoubleReg, i.InputSimd128Register(0)); \
1208 __ Ptest(kScratchDoubleReg, kScratchDoubleReg); \
1209 __ setcc(equal, dst); \
1210 } while (false)
1211
1212// This macro will directly emit the opcode if the shift is an immediate - the
1213// shift value will be taken modulo 2^width. Otherwise, it will emit code to
1214// perform the modulus operation.
1215#define ASSEMBLE_SIMD_SHIFT(opcode, width) \
1216 do { \
1217 XMMRegister dst = i.OutputSimd128Register(); \
1218 if (HasImmediateInput(instr, 1)) { \
1219 if (CpuFeatures::IsSupported(AVX)) { \
1220 CpuFeatureScope avx_scope(masm(), AVX); \
1221 __ v##opcode(dst, i.InputSimd128Register(0), \
1222 uint8_t{i.InputInt##width(1)}); \
1223 } else { \
1224 DCHECK_EQ(dst, i.InputSimd128Register(0)); \
1225 __ opcode(dst, uint8_t{i.InputInt##width(1)}); \
1226 } \
1227 } else { \
1228 constexpr int mask = (1 << width) - 1; \
1229 __ movq(kScratchRegister, i.InputRegister(1)); \
1230 __ andq(kScratchRegister, Immediate(mask)); \
1231 __ Movq(kScratchDoubleReg, kScratchRegister); \
1232 if (CpuFeatures::IsSupported(AVX)) { \
1233 CpuFeatureScope avx_scope(masm(), AVX); \
1234 __ v##opcode(dst, i.InputSimd128Register(0), kScratchDoubleReg); \
1235 } else { \
1236 DCHECK_EQ(dst, i.InputSimd128Register(0)); \
1237 __ opcode(dst, kScratchDoubleReg); \
1238 } \
1239 } \
1240 } while (false)
1241
1242#define ASSEMBLE_SIMD256_SHIFT(opcode, width) \
1243 do { \
1244 CpuFeatureScope avx_scope(masm(), AVX2); \
1245 YMMRegister src = i.InputSimd256Register(0); \
1246 YMMRegister dst = i.OutputSimd256Register(); \
1247 if (HasImmediateInput(instr, 1)) { \
1248 __ v##opcode(dst, src, uint8_t{i.InputInt##width(1)}); \
1249 } else { \
1250 constexpr int mask = (1 << width) - 1; \
1251 __ movq(kScratchRegister, i.InputRegister(1)); \
1252 __ andq(kScratchRegister, Immediate(mask)); \
1253 __ Movq(kScratchDoubleReg, kScratchRegister); \
1254 __ v##opcode(dst, src, kScratchDoubleReg); \
1255 } \
1256 } while (false)
1257
1258#define ASSEMBLE_PINSR(ASM_INSTR) \
1259 do { \
1260 XMMRegister dst = i.OutputSimd128Register(); \
1261 XMMRegister src = i.InputSimd128Register(0); \
1262 uint8_t laneidx = i.InputUint8(1); \
1263 uint32_t load_offset; \
1264 if (HasAddressingMode(instr)) { \
1265 __ ASM_INSTR(dst, src, i.MemoryOperand(2), laneidx, &load_offset); \
1266 } else if (instr->InputAt(2)->IsFPRegister()) { \
1267 __ Movq(kScratchRegister, i.InputDoubleRegister(2)); \
1268 __ ASM_INSTR(dst, src, kScratchRegister, laneidx, &load_offset); \
1269 } else if (instr->InputAt(2)->IsRegister()) { \
1270 __ ASM_INSTR(dst, src, i.InputRegister(2), laneidx, &load_offset); \
1271 } else { \
1272 __ ASM_INSTR(dst, src, i.InputOperand(2), laneidx, &load_offset); \
1273 } \
1274 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, load_offset); \
1275 } while (false)
1276
1277#define ASSEMBLE_SEQ_CST_STORE(rep) \
1278 do { \
1279 Register value = i.InputRegister(0); \
1280 Operand operand = i.MemoryOperand(1); \
1281 EmitTSANAwareStore<std::memory_order_seq_cst>( \
1282 zone(), this, masm(), operand, value, i, DetermineStubCallMode(), rep, \
1283 instr); \
1284 } while (false)
1285
1288 __ movq(rsp, rbp);
1289 __ popq(rbp);
1290}
1291
1293 if (frame_access_state()->has_frame()) {
1294 __ movq(rbp, MemOperand(rbp, 0));
1295 }
1297}
1298
1299namespace {
1300
1301void AdjustStackPointerForTailCall(Instruction* instr,
1302 MacroAssembler* assembler, Linkage* linkage,
1303 OptimizedCompilationInfo* info,
1304 FrameAccessState* state,
1305 int new_slot_above_sp,
1306 bool allow_shrinkage = true) {
1307 int stack_slot_delta;
1309 // For this special tail-call mode, the callee has the same arguments and
1310 // linkage as the caller, and arguments adapter frames must be preserved.
1311 // Thus we simply have reset the stack pointer register to its original
1312 // value before frame construction.
1313 // See also: AssembleConstructFrame.
1314 DCHECK(!info->is_osr());
1316 DCHECK(
1318 DCHECK_EQ(state->frame()->GetReturnSlotCount(), 0);
1319 stack_slot_delta = (state->frame()->GetTotalFrameSlotCount() -
1321 -1;
1322 DCHECK_LE(stack_slot_delta, 0);
1323 } else {
1324 int current_sp_offset = state->GetSPToFPSlotCount() +
1326 stack_slot_delta = new_slot_above_sp - current_sp_offset;
1327 }
1328
1329 if (stack_slot_delta > 0) {
1330 assembler->AllocateStackSpace(stack_slot_delta * kSystemPointerSize);
1331 state->IncreaseSPDelta(stack_slot_delta);
1332 } else if (allow_shrinkage && stack_slot_delta < 0) {
1333 assembler->addq(rsp, Immediate(-stack_slot_delta * kSystemPointerSize));
1334 state->IncreaseSPDelta(stack_slot_delta);
1335 }
1336}
1337
1338void SetupSimdImmediateInRegister(MacroAssembler* assembler, uint32_t* imms,
1339 XMMRegister reg) {
1340 assembler->Move(reg, make_uint64(imms[3], imms[2]),
1341 make_uint64(imms[1], imms[0]));
1342}
1343
1344void SetupSimd256ImmediateInRegister(MacroAssembler* assembler, uint32_t* imms,
1345 YMMRegister reg, XMMRegister scratch) {
1346 bool is_splat = std::all_of(imms, imms + kSimd256Size,
1347 [imms](uint32_t v) { return v == imms[0]; });
1348 if (is_splat) {
1349 assembler->Move(scratch, imms[0]);
1350 CpuFeatureScope avx_scope(assembler, AVX2);
1351 assembler->vpbroadcastd(reg, scratch);
1352 } else {
1353 assembler->Move(reg, make_uint64(imms[3], imms[2]),
1354 make_uint64(imms[1], imms[0]));
1355 assembler->Move(scratch, make_uint64(imms[7], imms[6]),
1356 make_uint64(imms[5], imms[4]));
1357 CpuFeatureScope avx_scope(assembler, AVX2);
1358 assembler->vinserti128(reg, reg, scratch, uint8_t{1});
1359 }
1360}
1361
1362} // namespace
1363
1365 int first_unused_slot_offset) {
1367 ZoneVector<MoveOperands*> pushes(zone());
1368 GetPushCompatibleMoves(instr, flags, &pushes);
1369
1370 if (!pushes.empty() &&
1371 (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
1372 first_unused_slot_offset)) {
1373 DCHECK(!instr->HasCallDescriptorFlag(CallDescriptor::kIsTailCallForTierUp));
1374 X64OperandConverter g(this, instr);
1375 for (auto move : pushes) {
1376 LocationOperand destination_location(
1377 LocationOperand::cast(move->destination()));
1378 InstructionOperand source(move->source());
1379 AdjustStackPointerForTailCall(instr, masm(), linkage(), info(),
1381 destination_location.index());
1382 if (source.IsStackSlot()) {
1383 LocationOperand source_location(LocationOperand::cast(source));
1384 __ Push(g.SlotToOperand(source_location.index()));
1385 } else if (source.IsRegister()) {
1386 LocationOperand source_location(LocationOperand::cast(source));
1387 __ Push(source_location.GetRegister());
1388 } else if (source.IsImmediate()) {
1389 __ Push(Immediate(ImmediateOperand::cast(source).inline_int32_value()));
1390 } else {
1391 // Pushes of non-scalar data types is not supported.
1392 UNIMPLEMENTED();
1393 }
1395 move->Eliminate();
1396 }
1397 }
1398 AdjustStackPointerForTailCall(instr, masm(), linkage(), info(),
1399 frame_access_state(), first_unused_slot_offset,
1400 false);
1401}
1402
1404 int first_unused_slot_offset) {
1405 AdjustStackPointerForTailCall(instr, masm(), linkage(), info(),
1406 frame_access_state(), first_unused_slot_offset);
1407}
1408
1409// Check that {kJavaScriptCallCodeStartRegister} is correct.
1411 __ ComputeCodeStartAddress(rbx);
1413 __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
1414}
1415
1416#ifdef V8_ENABLE_LEAPTIERING
1417// Check that {kJavaScriptCallDispatchHandleRegister} is correct.
1418void CodeGenerator::AssembleDispatchHandleRegisterCheck() {
1419 DCHECK(linkage()->GetIncomingDescriptor()->IsJSFunctionCall());
1420
1422
1423 // We currently don't check this for JS builtins as those are sometimes
1424 // called directly (e.g. from other builtins) and not through the dispatch
1425 // table. This is fine as builtin functions don't use the dispatch handle,
1426 // but we could enable this check in the future if we make sure to pass the
1427 // kInvalidDispatchHandle whenever we do a direct call to a JS builtin.
1429 return;
1430 }
1431
1432 // For now, we only ensure that the register references a valid dispatch
1433 // entry with the correct parameter count. In the future, we may also be able
1434 // to check that the entry points back to this code.
1435 __ LoadParameterCountFromJSDispatchTable(
1437 __ cmpl(rbx, Immediate(parameter_count_));
1438 __ Assert(equal, AbortReason::kWrongFunctionDispatchHandle);
1439}
1440#endif // V8_ENABLE_LEAPTIERING
1441
1443
1445 Instruction* instr) {
1448 if (mode == kFlags_set) {
1451 Register reg = i.OutputRegister(instr->OutputCount() - 1);
1452 // Do not clear output register when it is also input register.
1453 for (size_t index = 0; index < instr->InputCount(); ++index) {
1454 if (HasRegisterInput(instr, index) && reg == i.InputRegister(index))
1455 return false;
1456 }
1457 return true;
1458 }
1459 }
1460 return false;
1461}
1462
1464 if (info()->shadow_stack_compliant_lazy_deopt() &&
1465 instr->HasCallDescriptorFlag(CallDescriptor::kNeedsFrameState)) {
1467 }
1468}
1469
1470// Assembles an instruction after register allocation, producing machine code.
1472 Instruction* instr) {
1473 X64OperandConverter i(this, instr);
1474 InstructionCode opcode = instr->opcode();
1475 ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
1477 // Transform setcc + movzxbl into xorl + setcc to avoid register stall and
1478 // encode one byte shorter.
1479 Register reg = i.OutputRegister(instr->OutputCount() - 1);
1480 __ xorl(reg, reg);
1481 }
1482 switch (arch_opcode) {
1483 case kX64TraceInstruction: {
1484 __ emit_trace_instruction(i.InputImmediate(0));
1485 break;
1486 }
1487 case kArchCallCodeObject: {
1488 if (HasImmediateInput(instr, 0)) {
1489 Handle<Code> code = i.InputCode(0);
1490 __ Call(code, RelocInfo::CODE_TARGET);
1491 } else {
1492 Register reg = i.InputRegister(0);
1493 CodeEntrypointTag tag =
1494 i.InputCodeEntrypointTag(instr->CodeEnrypointTagInputIndex());
1496 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
1498 __ LoadCodeInstructionStart(reg, reg, tag);
1499 __ call(reg);
1500 }
1504 break;
1505 }
1506 case kArchCallBuiltinPointer: {
1508 Register builtin_index = i.InputRegister(0);
1509 __ CallBuiltinByIndex(builtin_index);
1513 break;
1514 }
1515#if V8_ENABLE_WEBASSEMBLY
1516 case kArchCallWasmFunction:
1517 case kArchCallWasmFunctionIndirect: {
1518 if (arch_opcode == kArchCallWasmFunction) {
1519 // This should always use immediate inputs since we don't have a
1520 // constant pool on this arch.
1522 Constant constant = i.ToConstant(instr->InputAt(0));
1523 Address wasm_code = static_cast<Address>(constant.ToInt64());
1524 if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
1525 __ near_call(wasm_code, constant.rmode());
1526 } else {
1527 __ Call(wasm_code, constant.rmode());
1528 }
1529 } else {
1531
1532 __ CallWasmCodePointer(
1533 i.InputRegister(0),
1534 i.InputInt64(instr->WasmSignatureHashInputIndex()));
1535 }
1539 break;
1540 }
1541 case kArchTailCallWasm:
1542 case kArchTailCallWasmIndirect: {
1543 if (arch_opcode == kArchTailCallWasm) {
1545 Constant constant = i.ToConstant(instr->InputAt(0));
1546 Address wasm_code = static_cast<Address>(constant.ToInt64());
1547 if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
1548 __ near_jmp(wasm_code, constant.rmode());
1549 } else {
1550 __ Move(kScratchRegister, wasm_code, constant.rmode());
1551 __ jmp(kScratchRegister);
1552 }
1553 } else {
1555 __ CallWasmCodePointer(
1556 i.InputRegister(0),
1557 i.InputInt64(instr->WasmSignatureHashInputIndex()),
1559 }
1563 break;
1564 }
1565#endif // V8_ENABLE_WEBASSEMBLY
1566 case kArchTailCallCodeObject: {
1567 if (HasImmediateInput(instr, 0)) {
1568 Handle<Code> code = i.InputCode(0);
1569 __ Jump(code, RelocInfo::CODE_TARGET);
1570 } else {
1571 Register reg = i.InputRegister(0);
1572 CodeEntrypointTag tag =
1573 i.InputCodeEntrypointTag(instr->CodeEnrypointTagInputIndex());
1575 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
1577 __ LoadCodeInstructionStart(reg, reg, tag);
1578 __ jmp(reg);
1579 }
1583 break;
1584 }
1585 case kArchTailCallAddress: {
1587 Register reg = i.InputRegister(0);
1589 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
1591 __ jmp(reg);
1595 break;
1596 }
1597 case kArchCallJSFunction: {
1598 Register func = i.InputRegister(0);
1599 if (v8_flags.debug_code) {
1600 // Check the function's context matches the context argument.
1601 __ cmp_tagged(rsi, FieldOperand(func, JSFunction::kContextOffset));
1602 __ Assert(equal, AbortReason::kWrongFunctionContext);
1603 }
1604 static_assert(kJavaScriptCallCodeStartRegister == rcx, "ABI mismatch");
1605 uint32_t num_arguments =
1606 i.InputUint32(instr->JSCallArgumentCountInputIndex());
1607 __ CallJSFunction(func, num_arguments);
1611 break;
1612 }
1613 case kArchPrepareCallCFunction: {
1614 // Frame alignment requires using FP-relative frame addressing.
1616 int const num_parameters = MiscField::decode(instr->opcode());
1617 __ PrepareCallCFunction(num_parameters);
1618 break;
1619 }
1620 case kArchSaveCallerRegisters: {
1621 fp_mode_ =
1622 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
1625 // kReturnRegister0 should have been saved before entering the stub.
1626 int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
1628 DCHECK_EQ(0, frame_access_state()->sp_delta());
1632 break;
1633 }
1634 case kArchRestoreCallerRegisters: {
1635 DCHECK(fp_mode_ ==
1636 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
1639 // Don't overwrite the returned value.
1640 int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
1642 DCHECK_EQ(0, frame_access_state()->sp_delta());
1645 break;
1646 }
1647 case kArchPrepareTailCall:
1649 break;
1650 case kArchCallCFunctionWithFrameState:
1651 case kArchCallCFunction: {
1652 int const num_gp_parameters = ParamField::decode(instr->opcode());
1653 int const num_fp_parameters = FPParamField::decode(instr->opcode());
1654 Label return_location;
1655 SetIsolateDataSlots set_isolate_data_slots = SetIsolateDataSlots::kYes;
1656#if V8_ENABLE_WEBASSEMBLY
1657 if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
1658 // Put the return address in a stack slot.
1659 __ leaq(kScratchRegister, Operand(&return_location, 0));
1660 __ movq(MemOperand(rbp, WasmExitFrameConstants::kCallingPCOffset),
1662 set_isolate_data_slots = SetIsolateDataSlots::kNo;
1663 }
1664#endif // V8_ENABLE_WEBASSEMBLY
1665 int pc_offset;
1666 if (HasImmediateInput(instr, 0)) {
1667 ExternalReference ref = i.InputExternalReference(0);
1668 pc_offset = __ CallCFunction(ref, num_gp_parameters + num_fp_parameters,
1669 set_isolate_data_slots, &return_location);
1670 } else {
1671 Register func = i.InputRegister(0);
1672 pc_offset =
1673 __ CallCFunction(func, num_gp_parameters + num_fp_parameters,
1674 set_isolate_data_slots, &return_location);
1675 }
1676
1677 RecordSafepoint(instr->reference_map(), pc_offset);
1678
1679 bool const needs_frame_state =
1680 (arch_opcode == kArchCallCFunctionWithFrameState);
1681 if (needs_frame_state) {
1683 }
1684
1686 // Ideally, we should decrement SP delta to match the change of stack
1687 // pointer in CallCFunction. However, for certain architectures (e.g.
1688 // ARM), there may be more strict alignment requirement, causing old SP
1689 // to be saved on the stack. In those cases, we can not calculate the SP
1690 // delta statically.
1693 // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
1694 // Here, we assume the sequence to be:
1695 // kArchSaveCallerRegisters;
1696 // kArchCallCFunction;
1697 // kArchRestoreCallerRegisters;
1698 int bytes =
1699 __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
1701 }
1702 // TODO(turbofan): Do we need an lfence here?
1703 break;
1704 }
1705 case kArchJmp:
1706 AssembleArchJump(i.InputRpo(0));
1707 break;
1708 case kArchBinarySearchSwitch:
1710 break;
1711 case kArchTableSwitch:
1713 break;
1714 case kArchComment:
1715 __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)),
1716 SourceLocation());
1717 break;
1718 case kArchAbortCSADcheck:
1719 DCHECK(i.InputRegister(0) == rdx);
1720 {
1721 // We don't actually want to generate a pile of code for this, so just
1722 // claim there is a stack frame, without generating one.
1723 FrameScope scope(masm(), StackFrame::NO_FRAME_TYPE);
1724 __ CallBuiltin(Builtin::kAbortCSADcheck);
1725 }
1726 __ int3();
1728 break;
1729 case kArchDebugBreak:
1730 __ DebugBreak();
1731 break;
1732 case kArchThrowTerminator:
1734 break;
1735 case kArchNop:
1736 // don't emit code for nops.
1737 break;
1738 case kArchDeoptimize: {
1739 DeoptimizationExit* exit =
1741 __ jmp(exit->label());
1742 break;
1743 }
1744 case kArchRet:
1745 AssembleReturn(instr->InputAt(0));
1746 break;
1747 case kArchFramePointer:
1748 __ movq(i.OutputRegister(), rbp);
1749 break;
1750#if V8_ENABLE_WEBASSEMBLY
1751 case kArchStackPointer:
1752 __ movq(i.OutputRegister(), rsp);
1753 break;
1754 case kArchSetStackPointer:
1755 if (instr->InputAt(0)->IsRegister()) {
1756 __ movq(rsp, i.InputRegister(0));
1757 } else {
1758 __ movq(rsp, i.InputOperand(0));
1759 }
1760 break;
1761#endif // V8_ENABLE_WEBASSEMBLY
1762 case kArchParentFramePointer:
1763 if (frame_access_state()->has_frame()) {
1764 __ movq(i.OutputRegister(), Operand(rbp, 0));
1765 } else {
1766 __ movq(i.OutputRegister(), rbp);
1767 }
1768 break;
1769 case kArchStackPointerGreaterThan: {
1770 // Potentially apply an offset to the current stack pointer before the
1771 // comparison to consider the size difference of an optimized frame versus
1772 // the contained unoptimized frames.
1773
1774 Register lhs_register = rsp;
1775 uint32_t offset;
1776
1778 lhs_register = kScratchRegister;
1779 __ leaq(lhs_register, Operand(rsp, static_cast<int32_t>(offset) * -1));
1780 }
1781
1782 constexpr size_t kValueIndex = 0;
1783 if (HasAddressingMode(instr)) {
1784 __ cmpq(lhs_register, i.MemoryOperand(kValueIndex));
1785 } else {
1786 __ cmpq(lhs_register, i.InputRegister(kValueIndex));
1787 }
1788 break;
1789 }
1790 case kArchStackCheckOffset:
1791 __ Move(i.OutputRegister(), Smi::FromInt(GetStackCheckOffset()));
1792 break;
1793 case kArchTruncateDoubleToI: {
1794 auto result = i.OutputRegister();
1795 auto input = i.InputDoubleRegister(0);
1796 auto ool = zone()->New<OutOfLineTruncateDoubleToI>(
1797 this, result, input, DetermineStubCallMode(),
1799 // We use Cvttsd2siq instead of Cvttsd2si due to performance reasons. The
1800 // use of Cvttsd2siq requires the movl below to avoid sign extension.
1801 __ Cvttsd2siq(result, input);
1802 __ cmpq(result, Immediate(1));
1803 __ j(overflow, ool->entry());
1804 __ bind(ool->exit());
1805 __ movl(result, result);
1806 break;
1807 }
1808 case kArchStoreWithWriteBarrier: // Fall through.
1809 case kArchAtomicStoreWithWriteBarrier: {
1810 // {EmitTSANAwareStore} calls RecordTrapInfoIfNeeded. No need to do it
1811 // here.
1813 // Indirect pointer writes must use a different opcode.
1815 Register object = i.InputRegister(0);
1816 size_t index = 0;
1817 Operand operand = i.MemoryOperand(&index);
1818 Register value = i.InputRegister(index);
1819 Register scratch0 = i.TempRegister(0);
1820 Register scratch1 = i.TempRegister(1);
1821
1822 if (v8_flags.debug_code) {
1823 // Checking that |value| is not a cleared weakref: our write barrier
1824 // does not support that for now.
1826 __ Check(not_equal, AbortReason::kOperandIsCleared);
1827 }
1828
1829 auto ool = zone()->New<OutOfLineRecordWrite>(this, object, operand, value,
1830 scratch0, scratch1, mode,
1832 if (arch_opcode == kArchStoreWithWriteBarrier) {
1833 EmitTSANAwareStore<std::memory_order_relaxed>(
1834 zone(), this, masm(), operand, value, i, DetermineStubCallMode(),
1836 } else {
1837 DCHECK_EQ(arch_opcode, kArchAtomicStoreWithWriteBarrier);
1838 EmitTSANAwareStore<std::memory_order_seq_cst>(
1839 zone(), this, masm(), operand, value, i, DetermineStubCallMode(),
1841 }
1843 __ JumpIfSmi(value, ool->exit());
1844 }
1845#if V8_ENABLE_STICKY_MARK_BITS_BOOL
1846 __ CheckPageFlag(object, scratch0, MemoryChunk::kIncrementalMarking,
1847 not_zero, ool->stub_call());
1848 __ CheckMarkBit(object, scratch0, scratch1, carry, ool->entry());
1849#else // !V8_ENABLE_STICKY_MARK_BITS_BOOL
1851 __ CheckPageFlag(object, scratch0,
1853 not_zero, ool->entry());
1854#endif // !V8_ENABLE_STICKY_MARK_BITS_BOOL
1855 __ bind(ool->exit());
1856 break;
1857 }
1858 case kArchStoreIndirectWithWriteBarrier: {
1861 Register object = i.InputRegister(0);
1862 size_t index = 0;
1863 Operand operand = i.MemoryOperand(&index);
1864 Register value = i.InputRegister(index++);
1865 IndirectPointerTag tag =
1866 static_cast<IndirectPointerTag>(i.InputInt64(index));
1868 Register scratch0 = i.TempRegister(0);
1869 Register scratch1 = i.TempRegister(1);
1870
1871 auto ool = zone()->New<OutOfLineRecordWrite>(
1872 this, object, operand, value, scratch0, scratch1, mode,
1873 DetermineStubCallMode(), tag);
1874 EmitTSANAwareStore<std::memory_order_relaxed>(
1875 zone(), this, masm(), operand, value, i, DetermineStubCallMode(),
1877 __ JumpIfMarking(ool->entry());
1878 __ bind(ool->exit());
1879 break;
1880 }
1881 case kX64MFence:
1882 __ mfence();
1883 break;
1884 case kX64LFence:
1885 __ lfence();
1886 break;
1887 case kArchStackSlot: {
1888 FrameOffset offset =
1889 frame_access_state()->GetFrameOffset(i.InputInt32(0));
1890 Register base = offset.from_stack_pointer() ? rsp : rbp;
1891 __ leaq(i.OutputRegister(), Operand(base, offset.offset()));
1892 break;
1893 }
1894 case kIeee754Float64Acos:
1896 break;
1897 case kIeee754Float64Acosh:
1898 ASSEMBLE_IEEE754_UNOP(acosh);
1899 break;
1900 case kIeee754Float64Asin:
1902 break;
1903 case kIeee754Float64Asinh:
1904 ASSEMBLE_IEEE754_UNOP(asinh);
1905 break;
1906 case kIeee754Float64Atan:
1908 break;
1909 case kIeee754Float64Atanh:
1910 ASSEMBLE_IEEE754_UNOP(atanh);
1911 break;
1912 case kIeee754Float64Atan2:
1914 break;
1915 case kIeee754Float64Cbrt:
1917 break;
1918 case kIeee754Float64Cos:
1920 break;
1921 case kIeee754Float64Cosh:
1923 break;
1924 case kIeee754Float64Exp:
1926 break;
1927 case kIeee754Float64Expm1:
1928 ASSEMBLE_IEEE754_UNOP(expm1);
1929 break;
1930 case kIeee754Float64Log:
1932 break;
1933 case kIeee754Float64Log1p:
1934 ASSEMBLE_IEEE754_UNOP(log1p);
1935 break;
1936 case kIeee754Float64Log2:
1938 break;
1939 case kIeee754Float64Log10:
1940 ASSEMBLE_IEEE754_UNOP(log10);
1941 break;
1942 case kIeee754Float64Pow:
1944 break;
1945 case kIeee754Float64Sin:
1947 break;
1948 case kIeee754Float64Sinh:
1950 break;
1951 case kIeee754Float64Tan:
1953 break;
1954 case kIeee754Float64Tanh:
1956 break;
1957 case kX64Add32:
1958 ASSEMBLE_BINOP(addl);
1959 break;
1960 case kX64Add:
1961 ASSEMBLE_BINOP(addq);
1962 break;
1963 case kX64Sub32:
1964 ASSEMBLE_BINOP(subl);
1965 break;
1966 case kX64Sub:
1967 ASSEMBLE_BINOP(subq);
1968 break;
1969 case kX64And32:
1970 ASSEMBLE_BINOP(andl);
1971 break;
1972 case kX64And:
1973 ASSEMBLE_BINOP(andq);
1974 break;
1975 case kX64Cmp8:
1977 ASSEMBLE_COMPARE(aligned_cmpb, aligned_testb);
1978 } else {
1979 ASSEMBLE_COMPARE(cmpb, testb);
1980 }
1981 break;
1982 case kX64Cmp16:
1984 ASSEMBLE_COMPARE(aligned_cmpw, aligned_testw);
1985 } else {
1986 ASSEMBLE_COMPARE(cmpw, testw);
1987 }
1988 break;
1989 case kX64Cmp32:
1991 ASSEMBLE_COMPARE(aligned_cmpl, aligned_testl);
1992 } else {
1993 ASSEMBLE_COMPARE(cmpl, testl);
1994 }
1995 break;
1996 case kX64Cmp:
1998 ASSEMBLE_COMPARE(aligned_cmpq, aligned_testq);
1999 } else {
2000 ASSEMBLE_COMPARE(cmpq, testq);
2001 }
2002 break;
2003 case kX64Test8:
2005 ASSEMBLE_TEST(aligned_testb);
2006 } else {
2007 ASSEMBLE_TEST(testb);
2008 }
2009 break;
2010 case kX64Test16:
2012 ASSEMBLE_TEST(aligned_testw);
2013 } else {
2014 ASSEMBLE_TEST(testw);
2015 }
2016 break;
2017 case kX64Test32:
2019 ASSEMBLE_TEST(aligned_testl);
2020 } else {
2021 ASSEMBLE_TEST(testl);
2022 }
2023 break;
2024 case kX64Test:
2026 ASSEMBLE_TEST(aligned_testq);
2027 } else {
2028 ASSEMBLE_TEST(testq);
2029 }
2030 break;
2031 case kX64Imul32:
2032 ASSEMBLE_MULT(imull);
2033 break;
2034 case kX64Imul:
2035 ASSEMBLE_MULT(imulq);
2036 break;
2037 case kX64ImulHigh32:
2038 if (HasRegisterInput(instr, 1)) {
2039 __ imull(i.InputRegister(1));
2040 } else {
2041 __ imull(i.InputOperand(1));
2042 }
2043 break;
2044 case kX64UmulHigh32:
2045 if (HasRegisterInput(instr, 1)) {
2046 __ mull(i.InputRegister(1));
2047 } else {
2048 __ mull(i.InputOperand(1));
2049 }
2050 break;
2051 case kX64ImulHigh64:
2052 if (HasRegisterInput(instr, 1)) {
2053 __ imulq(i.InputRegister(1));
2054 } else {
2055 __ imulq(i.InputOperand(1));
2056 }
2057 break;
2058 case kX64UmulHigh64:
2059 if (HasRegisterInput(instr, 1)) {
2060 __ mulq(i.InputRegister(1));
2061 } else {
2062 __ mulq(i.InputOperand(1));
2063 }
2064 break;
2065 case kX64Idiv32:
2066 __ cdq();
2067 __ idivl(i.InputRegister(1));
2068 break;
2069 case kX64Idiv:
2070 __ cqo();
2071 __ idivq(i.InputRegister(1));
2072 break;
2073 case kX64Udiv32:
2074 __ xorl(rdx, rdx);
2075 __ divl(i.InputRegister(1));
2076 break;
2077 case kX64Udiv:
2078 __ xorq(rdx, rdx);
2079 __ divq(i.InputRegister(1));
2080 break;
2081 case kX64Not:
2082 ASSEMBLE_UNOP(notq);
2083 break;
2084 case kX64Not32:
2085 ASSEMBLE_UNOP(notl);
2086 break;
2087 case kX64Neg:
2088 ASSEMBLE_UNOP(negq);
2089 break;
2090 case kX64Neg32:
2091 ASSEMBLE_UNOP(negl);
2092 break;
2093 case kX64Or32:
2094 ASSEMBLE_BINOP(orl);
2095 break;
2096 case kX64Or:
2097 ASSEMBLE_BINOP(orq);
2098 break;
2099 case kX64Xor32:
2100 ASSEMBLE_BINOP(xorl);
2101 break;
2102 case kX64Xor:
2103 ASSEMBLE_BINOP(xorq);
2104 break;
2105 case kX64Shl32:
2106 ASSEMBLE_SHIFT(shll, 5);
2107 break;
2108 case kX64Shl:
2109 ASSEMBLE_SHIFT(shlq, 6);
2110 break;
2111 case kX64Shr32:
2112 ASSEMBLE_SHIFT(shrl, 5);
2113 break;
2114 case kX64Shr:
2115 ASSEMBLE_SHIFT(shrq, 6);
2116 break;
2117 case kX64Sar32:
2118 ASSEMBLE_SHIFT(sarl, 5);
2119 break;
2120 case kX64Sar:
2121 ASSEMBLE_SHIFT(sarq, 6);
2122 break;
2123 case kX64Rol32:
2124 ASSEMBLE_SHIFT(roll, 5);
2125 break;
2126 case kX64Rol:
2127 ASSEMBLE_SHIFT(rolq, 6);
2128 break;
2129 case kX64Ror32:
2130 ASSEMBLE_SHIFT(rorl, 5);
2131 break;
2132 case kX64Ror:
2133 ASSEMBLE_SHIFT(rorq, 6);
2134 break;
2135 case kX64Lzcnt:
2136 if (HasRegisterInput(instr, 0)) {
2137 __ Lzcntq(i.OutputRegister(), i.InputRegister(0));
2138 } else {
2139 __ Lzcntq(i.OutputRegister(), i.InputOperand(0));
2140 }
2141 break;
2142 case kX64Lzcnt32:
2143 if (HasRegisterInput(instr, 0)) {
2144 __ Lzcntl(i.OutputRegister(), i.InputRegister(0));
2145 } else {
2146 __ Lzcntl(i.OutputRegister(), i.InputOperand(0));
2147 }
2148 break;
2149 case kX64Tzcnt:
2150 if (HasRegisterInput(instr, 0)) {
2151 __ Tzcntq(i.OutputRegister(), i.InputRegister(0));
2152 } else {
2153 __ Tzcntq(i.OutputRegister(), i.InputOperand(0));
2154 }
2155 break;
2156 case kX64Tzcnt32:
2157 if (HasRegisterInput(instr, 0)) {
2158 __ Tzcntl(i.OutputRegister(), i.InputRegister(0));
2159 } else {
2160 __ Tzcntl(i.OutputRegister(), i.InputOperand(0));
2161 }
2162 break;
2163 case kX64Popcnt:
2164 if (HasRegisterInput(instr, 0)) {
2165 __ Popcntq(i.OutputRegister(), i.InputRegister(0));
2166 } else {
2167 __ Popcntq(i.OutputRegister(), i.InputOperand(0));
2168 }
2169 break;
2170 case kX64Popcnt32:
2171 if (HasRegisterInput(instr, 0)) {
2172 __ Popcntl(i.OutputRegister(), i.InputRegister(0));
2173 } else {
2174 __ Popcntl(i.OutputRegister(), i.InputOperand(0));
2175 }
2176 break;
2177 case kX64Bswap:
2178 __ bswapq(i.OutputRegister());
2179 break;
2180 case kX64Bswap32:
2181 __ bswapl(i.OutputRegister());
2182 break;
2183 case kSSEFloat32Cmp:
2184 ASSEMBLE_SSE_BINOP(Ucomiss);
2185 break;
2186 case kSSEFloat32Add:
2187 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2188 ASSEMBLE_SSE_BINOP(addss);
2189 break;
2190 case kSSEFloat32Sub:
2191 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2192 ASSEMBLE_SSE_BINOP(subss);
2193 break;
2194 case kSSEFloat32Mul:
2195 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2196 ASSEMBLE_SSE_BINOP(mulss);
2197 break;
2198 case kSSEFloat32Div:
2199 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2200 ASSEMBLE_SSE_BINOP(divss);
2201 // Don't delete this mov. It may improve performance on some CPUs,
2202 // when there is a (v)mulss depending on the result.
2203 __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
2204 break;
2205 case kSSEFloat32Sqrt:
2206 ASSEMBLE_SSE_UNOP(sqrtss);
2207 break;
2208 case kSSEFloat32ToFloat64:
2209 ASSEMBLE_SSE_UNOP(Cvtss2sd);
2210 break;
2211 case kSSEFloat32Round: {
2212 CpuFeatureScope sse_scope(masm(), SSE4_1);
2213 RoundingMode const mode =
2214 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
2215 __ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
2216 break;
2217 }
2218 case kSSEFloat32ToInt32:
2219 if (instr->InputAt(0)->IsFPRegister()) {
2220 __ Cvttss2si(i.OutputRegister(), i.InputDoubleRegister(0));
2221 } else {
2222 __ Cvttss2si(i.OutputRegister(), i.InputOperand(0));
2223 }
2224 break;
2225 case kSSEFloat32ToUint32: {
2226 if (instr->InputAt(0)->IsFPRegister()) {
2227 __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
2228 } else {
2229 __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
2230 }
2231 break;
2232 }
2233 case kSSEFloat64Cmp:
2234 ASSEMBLE_SSE_BINOP(Ucomisd);
2235 break;
2236 case kSSEFloat64Add:
2237 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2238 ASSEMBLE_SSE_BINOP(addsd);
2239 break;
2240 case kSSEFloat64Sub:
2241 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2242 ASSEMBLE_SSE_BINOP(subsd);
2243 break;
2244 case kSSEFloat64Mul:
2245 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2246 ASSEMBLE_SSE_BINOP(mulsd);
2247 break;
2248 case kSSEFloat64Div:
2249 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2250 ASSEMBLE_SSE_BINOP(divsd);
2251 // Don't delete this mov. It may improve performance on some CPUs,
2252 // when there is a (v)mulsd depending on the result.
2253 __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
2254 break;
2255 case kSSEFloat64Mod: {
2256 __ AllocateStackSpace(kDoubleSize);
2258 kDoubleSize);
2259 // Move values to st(0) and st(1).
2260 __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
2261 __ fld_d(Operand(rsp, 0));
2262 __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
2263 __ fld_d(Operand(rsp, 0));
2264 // Loop while fprem isn't done.
2265 Label mod_loop;
2266 __ bind(&mod_loop);
2267 // This instructions traps on all kinds inputs, but we are assuming the
2268 // floating point control word is set to ignore them all.
2269 __ fprem();
2270 // The following 2 instruction implicitly use rax.
2271 __ fnstsw_ax();
2272 if (CpuFeatures::IsSupported(SAHF)) {
2273 CpuFeatureScope sahf_scope(masm(), SAHF);
2274 __ sahf();
2275 } else {
2276 __ shrl(rax, Immediate(8));
2277 __ andl(rax, Immediate(0xFF));
2278 __ pushq(rax);
2281 __ popfq();
2284 }
2285 __ j(parity_even, &mod_loop);
2286 // Move output to stack and clean up.
2287 __ fstp(1);
2288 __ fstp_d(Operand(rsp, 0));
2289 __ Movsd(i.OutputDoubleRegister(), Operand(rsp, 0));
2290 __ addq(rsp, Immediate(kDoubleSize));
2292 -kDoubleSize);
2293 break;
2294 }
2295 case kSSEFloat32Max: {
2296 Label compare_swap, done_compare;
2297 if (instr->InputAt(1)->IsFPRegister()) {
2298 __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
2299 } else {
2300 __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
2301 }
2302 auto ool =
2303 zone()->New<OutOfLineLoadFloat32NaN>(this, i.OutputDoubleRegister());
2304 __ j(parity_even, ool->entry());
2305 __ j(above, &done_compare, Label::kNear);
2306 __ j(below, &compare_swap, Label::kNear);
2307 __ Movmskps(kScratchRegister, i.InputDoubleRegister(0));
2308 __ testl(kScratchRegister, Immediate(1));
2309 __ j(zero, &done_compare, Label::kNear);
2310 __ bind(&compare_swap);
2311 if (instr->InputAt(1)->IsFPRegister()) {
2312 __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
2313 } else {
2314 __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
2315 }
2316 __ bind(&done_compare);
2317 __ bind(ool->exit());
2318 break;
2319 }
2320 case kSSEFloat32Min: {
2321 Label compare_swap, done_compare;
2322 if (instr->InputAt(1)->IsFPRegister()) {
2323 __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
2324 } else {
2325 __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
2326 }
2327 auto ool =
2328 zone()->New<OutOfLineLoadFloat32NaN>(this, i.OutputDoubleRegister());
2329 __ j(parity_even, ool->entry());
2330 __ j(below, &done_compare, Label::kNear);
2331 __ j(above, &compare_swap, Label::kNear);
2332 if (instr->InputAt(1)->IsFPRegister()) {
2333 __ Movmskps(kScratchRegister, i.InputDoubleRegister(1));
2334 } else {
2335 __ Movss(kScratchDoubleReg, i.InputOperand(1));
2337 }
2338 __ testl(kScratchRegister, Immediate(1));
2339 __ j(zero, &done_compare, Label::kNear);
2340 __ bind(&compare_swap);
2341 if (instr->InputAt(1)->IsFPRegister()) {
2342 __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
2343 } else {
2344 __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
2345 }
2346 __ bind(&done_compare);
2347 __ bind(ool->exit());
2348 break;
2349 }
2350 case kSSEFloat64Max: {
2351 Label compare_swap, done_compare;
2352 if (instr->InputAt(1)->IsFPRegister()) {
2353 __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
2354 } else {
2355 __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
2356 }
2357 auto ool =
2358 zone()->New<OutOfLineLoadFloat64NaN>(this, i.OutputDoubleRegister());
2359 __ j(parity_even, ool->entry());
2360 __ j(above, &done_compare, Label::kNear);
2361 __ j(below, &compare_swap, Label::kNear);
2362 __ Movmskpd(kScratchRegister, i.InputDoubleRegister(0));
2363 __ testl(kScratchRegister, Immediate(1));
2364 __ j(zero, &done_compare, Label::kNear);
2365 __ bind(&compare_swap);
2366 if (instr->InputAt(1)->IsFPRegister()) {
2367 __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
2368 } else {
2369 __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
2370 }
2371 __ bind(&done_compare);
2372 __ bind(ool->exit());
2373 break;
2374 }
2375 case kSSEFloat64Min: {
2376 Label compare_swap, done_compare;
2377 if (instr->InputAt(1)->IsFPRegister()) {
2378 __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
2379 } else {
2380 __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
2381 }
2382 auto ool =
2383 zone()->New<OutOfLineLoadFloat64NaN>(this, i.OutputDoubleRegister());
2384 __ j(parity_even, ool->entry());
2385 __ j(below, &done_compare, Label::kNear);
2386 __ j(above, &compare_swap, Label::kNear);
2387 if (instr->InputAt(1)->IsFPRegister()) {
2388 __ Movmskpd(kScratchRegister, i.InputDoubleRegister(1));
2389 } else {
2390 __ Movsd(kScratchDoubleReg, i.InputOperand(1));
2392 }
2393 __ testl(kScratchRegister, Immediate(1));
2394 __ j(zero, &done_compare, Label::kNear);
2395 __ bind(&compare_swap);
2396 if (instr->InputAt(1)->IsFPRegister()) {
2397 __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
2398 } else {
2399 __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
2400 }
2401 __ bind(&done_compare);
2402 __ bind(ool->exit());
2403 break;
2404 }
2405 case kSSEFloat64Sqrt:
2406 ASSEMBLE_SSE_UNOP(Sqrtsd);
2407 break;
2408 case kSSEFloat64Round: {
2409 CpuFeatureScope sse_scope(masm(), SSE4_1);
2410 RoundingMode const mode =
2411 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
2412 __ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
2413 break;
2414 }
2415 case kSSEFloat64ToFloat16RawBits: {
2416 XMMRegister tmp_dst = i.TempDoubleRegister(0);
2417 __ Cvtpd2ph(tmp_dst, i.InputDoubleRegister(0), i.TempRegister(1));
2418 __ Pextrw(i.OutputRegister(), tmp_dst, static_cast<uint8_t>(0));
2419 break;
2420 }
2421 case kSSEFloat16RawBitsToFloat64: {
2422 XMMRegister tmp_dst = i.TempDoubleRegister(0);
2423
2424 __ Movq(tmp_dst, i.InputRegister(0));
2425 __ Cvtph2pd(i.OutputDoubleRegister(), tmp_dst);
2426 break;
2427 }
2428 case kSSEFloat64ToFloat32:
2429 ASSEMBLE_SSE_UNOP(Cvtsd2ss);
2430 break;
2431 case kSSEFloat64ToInt32: {
2432 Register output_reg = i.OutputRegister(0);
2433 if (instr->OutputCount() == 1) {
2434 if (instr->InputAt(0)->IsFPRegister()) {
2435 __ Cvttsd2si(i.OutputRegister(), i.InputDoubleRegister(0));
2436 } else {
2437 __ Cvttsd2si(i.OutputRegister(), i.InputOperand(0));
2438 }
2439 break;
2440 }
2441 DCHECK_EQ(2, instr->OutputCount());
2442 Register success_reg = i.OutputRegister(1);
2445 if (instr->InputAt(0)->IsFPRegister()) {
2446 __ Roundsd(rounded, i.InputDoubleRegister(0), kRoundToZero);
2447 __ Cvttsd2si(output_reg, i.InputDoubleRegister(0));
2448 } else {
2449 __ Roundsd(rounded, i.InputOperand(0), kRoundToZero);
2450 // Convert {rounded} instead of the input operand, to avoid another
2451 // load.
2452 __ Cvttsd2si(output_reg, rounded);
2453 }
2454 DoubleRegister converted_back = i.TempSimd128Register(0);
2455 __ Cvtlsi2sd(converted_back, output_reg);
2456 // Compare the converted back value to the rounded value, set
2457 // success_reg to 0 if they differ, or 1 on success.
2458 __ Cmpeqsd(converted_back, rounded);
2459 __ Movq(success_reg, converted_back);
2460 __ And(success_reg, Immediate(1));
2461 } else {
2462 // Less efficient code for non-AVX and non-SSE4_1 CPUs.
2463 if (instr->InputAt(0)->IsFPRegister()) {
2464 __ Cvttsd2si(i.OutputRegister(0), i.InputDoubleRegister(0));
2465 } else {
2466 __ Cvttsd2si(i.OutputRegister(0), i.InputOperand(0));
2467 }
2468 __ Move(success_reg, 1);
2469 Label done;
2470 Label fail;
2471 __ Move(kScratchDoubleReg, double{INT32_MIN});
2472 if (instr->InputAt(0)->IsFPRegister()) {
2473 __ Ucomisd(kScratchDoubleReg, i.InputDoubleRegister(0));
2474 } else {
2475 __ Ucomisd(kScratchDoubleReg, i.InputOperand(0));
2476 }
2477 // If the input is NaN, then the conversion fails.
2478 __ j(parity_even, &fail, Label::kNear);
2479 // If the input is INT32_MIN, then the conversion succeeds.
2480 __ j(equal, &done, Label::kNear);
2481 __ cmpl(output_reg, Immediate(1));
2482 // If the conversion results in INT32_MIN, but the input was not
2483 // INT32_MIN, then the conversion fails.
2484 __ j(no_overflow, &done, Label::kNear);
2485 __ bind(&fail);
2486 __ Move(success_reg, 0);
2487 __ bind(&done);
2488 }
2489 break;
2490 }
2491 case kSSEFloat64ToUint32: {
2492 Label fail;
2493 // Set Projection(1) to 0, denoting value out of range.
2494 if (instr->OutputCount() > 1) __ Move(i.OutputRegister(1), 0);
2495 if (instr->InputAt(0)->IsFPRegister()) {
2496 __ Cvttsd2ui(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
2497 } else {
2498 __ Cvttsd2ui(i.OutputRegister(), i.InputOperand(0), &fail);
2499 }
2500 // Set Projection(1) to 1, denoting value in range (otherwise the
2501 // conversion above would have jumped to `fail`), which is the success
2502 // case.
2503 if (instr->OutputCount() > 1) __ Move(i.OutputRegister(1), 1);
2504 __ bind(&fail);
2505 break;
2506 }
2507 case kSSEFloat32ToInt64: {
2508 Register output_reg = i.OutputRegister(0);
2509 if (instr->OutputCount() == 1) {
2510 if (instr->InputAt(0)->IsFPRegister()) {
2511 __ Cvttss2siq(output_reg, i.InputDoubleRegister(0));
2512 } else {
2513 __ Cvttss2siq(output_reg, i.InputOperand(0));
2514 }
2515 break;
2516 }
2517 DCHECK_EQ(2, instr->OutputCount());
2518 Register success_reg = i.OutputRegister(1);
2521 if (instr->InputAt(0)->IsFPRegister()) {
2522 __ Roundss(rounded, i.InputDoubleRegister(0), kRoundToZero);
2523 __ Cvttss2siq(output_reg, i.InputDoubleRegister(0));
2524 } else {
2525 __ Roundss(rounded, i.InputOperand(0), kRoundToZero);
2526 // Convert {rounded} instead of the input operand, to avoid another
2527 // load.
2528 __ Cvttss2siq(output_reg, rounded);
2529 }
2530 DoubleRegister converted_back = i.TempSimd128Register(0);
2531 __ Cvtqsi2ss(converted_back, output_reg);
2532 // Compare the converted back value to the rounded value, set
2533 // success_reg to 0 if they differ, or 1 on success.
2534 __ Cmpeqss(converted_back, rounded);
2535 __ Movq(success_reg, converted_back);
2536 __ And(success_reg, Immediate(1));
2537 } else {
2538 // Less efficient code for non-AVX and non-SSE4_1 CPUs.
2539 if (instr->InputAt(0)->IsFPRegister()) {
2540 __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
2541 } else {
2542 __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
2543 }
2544 __ Move(success_reg, 1);
2545 Label done;
2546 Label fail;
2547 __ Move(kScratchDoubleReg, float{INT64_MIN});
2548 if (instr->InputAt(0)->IsFPRegister()) {
2549 __ Ucomiss(kScratchDoubleReg, i.InputDoubleRegister(0));
2550 } else {
2551 __ Ucomiss(kScratchDoubleReg, i.InputOperand(0));
2552 }
2553 // If the input is NaN, then the conversion fails.
2554 __ j(parity_even, &fail, Label::kNear);
2555 // If the input is INT64_MIN, then the conversion succeeds.
2556 __ j(equal, &done, Label::kNear);
2557 __ cmpq(output_reg, Immediate(1));
2558 // If the conversion results in INT64_MIN, but the input was not
2559 // INT64_MIN, then the conversion fails.
2560 __ j(no_overflow, &done, Label::kNear);
2561 __ bind(&fail);
2562 __ Move(success_reg, 0);
2563 __ bind(&done);
2564 }
2565 break;
2566 }
2567 case kSSEFloat64ToInt64: {
2568 Register output_reg = i.OutputRegister(0);
2569 if (instr->OutputCount() == 1) {
2570 if (instr->InputAt(0)->IsFPRegister()) {
2571 __ Cvttsd2siq(output_reg, i.InputDoubleRegister(0));
2572 } else {
2573 __ Cvttsd2siq(output_reg, i.InputOperand(0));
2574 }
2575 break;
2576 }
2577 DCHECK_EQ(2, instr->OutputCount());
2578 Register success_reg = i.OutputRegister(1);
2581 if (instr->InputAt(0)->IsFPRegister()) {
2582 __ Roundsd(rounded, i.InputDoubleRegister(0), kRoundToZero);
2583 __ Cvttsd2siq(output_reg, i.InputDoubleRegister(0));
2584 } else {
2585 __ Roundsd(rounded, i.InputOperand(0), kRoundToZero);
2586 // Convert {rounded} instead of the input operand, to avoid another
2587 // load.
2588 __ Cvttsd2siq(output_reg, rounded);
2589 }
2590 DoubleRegister converted_back = i.TempSimd128Register(0);
2591 __ Cvtqsi2sd(converted_back, output_reg);
2592 // Compare the converted back value to the rounded value, set
2593 // success_reg to 0 if they differ, or 1 on success.
2594 __ Cmpeqsd(converted_back, rounded);
2595 __ Movq(success_reg, converted_back);
2596 __ And(success_reg, Immediate(1));
2597 } else {
2598 // Less efficient code for non-AVX and non-SSE4_1 CPUs.
2599 if (instr->InputAt(0)->IsFPRegister()) {
2600 __ Cvttsd2siq(i.OutputRegister(0), i.InputDoubleRegister(0));
2601 } else {
2602 __ Cvttsd2siq(i.OutputRegister(0), i.InputOperand(0));
2603 }
2604 __ Move(success_reg, 1);
2605 Label done;
2606 Label fail;
2607 __ Move(kScratchDoubleReg, double{INT64_MIN});
2608 if (instr->InputAt(0)->IsFPRegister()) {
2609 __ Ucomisd(kScratchDoubleReg, i.InputDoubleRegister(0));
2610 } else {
2611 __ Ucomisd(kScratchDoubleReg, i.InputOperand(0));
2612 }
2613 // If the input is NaN, then the conversion fails.
2614 __ j(parity_even, &fail, Label::kNear);
2615 // If the input is INT64_MIN, then the conversion succeeds.
2616 __ j(equal, &done, Label::kNear);
2617 __ cmpq(output_reg, Immediate(1));
2618 // If the conversion results in INT64_MIN, but the input was not
2619 // INT64_MIN, then the conversion fails.
2620 __ j(no_overflow, &done, Label::kNear);
2621 __ bind(&fail);
2622 __ Move(success_reg, 0);
2623 __ bind(&done);
2624 }
2625 break;
2626 }
2627 case kSSEFloat32ToUint64: {
2628 // See kSSEFloat64ToUint32 for explanation.
2629 Label fail;
2630 if (instr->OutputCount() > 1) __ Move(i.OutputRegister(1), 0);
2631 if (instr->InputAt(0)->IsFPRegister()) {
2632 __ Cvttss2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
2633 } else {
2634 __ Cvttss2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
2635 }
2636 if (instr->OutputCount() > 1) __ Move(i.OutputRegister(1), 1);
2637 __ bind(&fail);
2638 break;
2639 }
2640 case kSSEFloat64ToUint64: {
2641 // See kSSEFloat64ToUint32 for explanation.
2642 Label fail;
2643 if (instr->OutputCount() > 1) __ Move(i.OutputRegister(1), 0);
2644 if (instr->InputAt(0)->IsFPRegister()) {
2645 __ Cvttsd2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
2646 } else {
2647 __ Cvttsd2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
2648 }
2649 if (instr->OutputCount() > 1) __ Move(i.OutputRegister(1), 1);
2650 __ bind(&fail);
2651 break;
2652 }
2653 case kSSEInt32ToFloat64:
2654 if (HasRegisterInput(instr, 0)) {
2655 __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
2656 } else {
2657 __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
2658 }
2659 break;
2660 case kSSEInt32ToFloat32:
2661 if (HasRegisterInput(instr, 0)) {
2662 __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
2663 } else {
2664 __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
2665 }
2666 break;
2667 case kSSEInt64ToFloat32:
2668 if (HasRegisterInput(instr, 0)) {
2669 __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
2670 } else {
2671 __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
2672 }
2673 break;
2674 case kSSEInt64ToFloat64:
2675 if (HasRegisterInput(instr, 0)) {
2676 __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
2677 } else {
2678 __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
2679 }
2680 break;
2681 case kSSEUint64ToFloat32:
2682 if (HasRegisterInput(instr, 0)) {
2683 __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
2684 } else {
2685 __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
2686 }
2687 break;
2688 case kSSEUint64ToFloat64:
2689 if (HasRegisterInput(instr, 0)) {
2690 __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
2691 } else {
2692 __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
2693 }
2694 break;
2695 case kSSEUint32ToFloat64:
2696 if (HasRegisterInput(instr, 0)) {
2697 __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
2698 } else {
2699 __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
2700 }
2701 break;
2702 case kSSEUint32ToFloat32:
2703 if (HasRegisterInput(instr, 0)) {
2704 __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
2705 } else {
2706 __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
2707 }
2708 break;
2709 case kSSEFloat64ExtractLowWord32:
2710 if (instr->InputAt(0)->IsFPStackSlot()) {
2711 __ movl(i.OutputRegister(), i.InputOperand(0));
2712 } else {
2713 __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
2714 }
2715 break;
2716 case kSSEFloat64ExtractHighWord32:
2717 if (instr->InputAt(0)->IsFPStackSlot()) {
2718 __ movl(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
2719 } else {
2720 __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
2721 }
2722 break;
2723 case kSSEFloat64InsertLowWord32:
2724 if (HasRegisterInput(instr, 1)) {
2725 __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 0);
2726 } else {
2727 __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
2728 }
2729 break;
2730 case kSSEFloat64InsertHighWord32:
2731 if (HasRegisterInput(instr, 1)) {
2732 __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 1);
2733 } else {
2734 __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
2735 }
2736 break;
2737 case kSSEFloat64LoadLowWord32:
2738 if (HasRegisterInput(instr, 0)) {
2739 __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
2740 } else {
2741 __ Movd(i.OutputDoubleRegister(), i.InputOperand(0));
2742 }
2743 break;
2744 case kAVXFloat32Cmp: {
2745 CpuFeatureScope avx_scope(masm(), AVX);
2746 if (instr->InputAt(1)->IsFPRegister()) {
2747 __ vucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
2748 } else {
2749 __ vucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
2750 }
2751 break;
2752 }
2753 case kAVXFloat32Add:
2754 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2755 ASSEMBLE_AVX_BINOP(vaddss);
2756 break;
2757 case kAVXFloat32Sub:
2758 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2759 ASSEMBLE_AVX_BINOP(vsubss);
2760 break;
2761 case kAVXFloat32Mul:
2762 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2763 ASSEMBLE_AVX_BINOP(vmulss);
2764 break;
2765 case kAVXFloat32Div:
2766 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2767 ASSEMBLE_AVX_BINOP(vdivss);
2768 // Don't delete this mov. It may improve performance on some CPUs,
2769 // when there is a (v)mulss depending on the result.
2770 __ Movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
2771 break;
2772 case kAVXFloat64Cmp: {
2773 CpuFeatureScope avx_scope(masm(), AVX);
2774 if (instr->InputAt(1)->IsFPRegister()) {
2775 __ vucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
2776 } else {
2777 __ vucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
2778 }
2779 break;
2780 }
2781 case kAVXFloat64Add:
2782 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2783 ASSEMBLE_AVX_BINOP(vaddsd);
2784 break;
2785 case kAVXFloat64Sub:
2786 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2787 ASSEMBLE_AVX_BINOP(vsubsd);
2788 break;
2789 case kAVXFloat64Mul:
2790 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2791 ASSEMBLE_AVX_BINOP(vmulsd);
2792 break;
2793 case kAVXFloat64Div:
2794 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2795 ASSEMBLE_AVX_BINOP(vdivsd);
2796 // Don't delete this mov. It may improve performance on some CPUs,
2797 // when there is a (v)mulsd depending on the result.
2798 __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
2799 break;
2800 case kX64Float32Abs: {
2801 __ Absps(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2803 break;
2804 }
2805 case kX64Float32Neg: {
2806 __ Negps(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2808 break;
2809 }
2810 case kX64FAbs: {
2811 LaneSize lane_size = LaneSizeField::decode(opcode);
2812 VectorLength vec_len = VectorLengthField::decode(opcode);
2813 if (vec_len == kV128) {
2814 switch (lane_size) {
2815 case kL16: {
2816 // F16x8Abs
2817 CpuFeatureScope avx_scope(masm(), AVX);
2818 __ Absph(i.OutputSimd128Register(), i.InputSimd128Register(0),
2820 break;
2821 }
2822 case kL32: {
2823 // F32x4Abs
2824 __ Absps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2826 break;
2827 }
2828 case kL64: {
2829 // F64x2Abs
2830 __ Abspd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2832 break;
2833 }
2834 default:
2835 UNREACHABLE();
2836 }
2837 } else if (vec_len == kV256) {
2838 switch (lane_size) {
2839 case kL32: {
2840 // F32x8Abs
2841 YMMRegister dst = i.OutputSimd256Register();
2842 YMMRegister src = i.InputSimd256Register(0);
2843 CpuFeatureScope avx_scope(masm(), AVX2);
2844 if (dst == src) {
2847 __ vpsrld(kScratchSimd256Reg, kScratchSimd256Reg, uint8_t{1});
2848 __ vpand(dst, dst, kScratchSimd256Reg);
2849 } else {
2850 __ vpcmpeqd(dst, dst, dst);
2851 __ vpsrld(dst, dst, uint8_t{1});
2852 __ vpand(dst, dst, src);
2853 }
2854 break;
2855 }
2856 case kL64: {
2857 // F64x4Abs
2858 YMMRegister dst = i.OutputSimd256Register();
2859 YMMRegister src = i.InputSimd256Register(0);
2860 CpuFeatureScope avx_scope(masm(), AVX2);
2861 if (dst == src) {
2864 __ vpsrlq(kScratchSimd256Reg, kScratchSimd256Reg, uint8_t{1});
2865 __ vpand(dst, dst, kScratchSimd256Reg);
2866 } else {
2867 __ vpcmpeqq(dst, dst, dst);
2868 __ vpsrlq(dst, dst, uint8_t{1});
2869 __ vpand(dst, dst, src);
2870 }
2871 break;
2872 }
2873 default:
2874 UNREACHABLE();
2875 }
2876 } else {
2877 UNREACHABLE();
2878 }
2879 break;
2880 }
2881 case kX64Float64Abs: {
2882 __ Abspd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2884 break;
2885 }
2886 case kX64FNeg: {
2887 LaneSize lane_size = LaneSizeField::decode(opcode);
2888 VectorLength vec_len = VectorLengthField::decode(opcode);
2889 if (vec_len == kV128) {
2890 switch (lane_size) {
2891 case kL16: {
2892 // F16x8Neg
2893 CpuFeatureScope avx_scope(masm(), AVX);
2894 __ Negph(i.OutputSimd128Register(), i.InputSimd128Register(0),
2896 break;
2897 }
2898 case kL32: {
2899 // F32x4Neg
2900 __ Negps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2902 break;
2903 }
2904 case kL64: {
2905 // F64x2Neg
2906 __ Negpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2908 break;
2909 }
2910 default:
2911 UNREACHABLE();
2912 }
2913 } else if (vec_len == kV256) {
2914 switch (lane_size) {
2915 case kL32: {
2916 // F32x8Neg
2917 YMMRegister dst = i.OutputSimd256Register();
2918 YMMRegister src = i.InputSimd256Register(0);
2919 CpuFeatureScope avx_scope(masm(), AVX2);
2920 if (dst == src) {
2923 __ vpslld(kScratchSimd256Reg, kScratchSimd256Reg, uint8_t{31});
2924 __ vpxor(dst, dst, kScratchSimd256Reg);
2925 } else {
2926 __ vpcmpeqd(dst, dst, dst);
2927 __ vpslld(dst, dst, uint8_t{31});
2928 __ vxorps(dst, dst, src);
2929 }
2930 break;
2931 }
2932 case kL64: {
2933 // F64x4Neg
2934 YMMRegister dst = i.OutputSimd256Register();
2935 YMMRegister src = i.InputSimd256Register(0);
2936 CpuFeatureScope avx_scope(masm(), AVX2);
2937 if (dst == src) {
2940 __ vpsllq(kScratchSimd256Reg, kScratchSimd256Reg, uint8_t{63});
2941 __ vpxor(dst, dst, kScratchSimd256Reg);
2942 } else {
2943 __ vpcmpeqq(dst, dst, dst);
2944 __ vpsllq(dst, dst, uint8_t{31});
2945 __ vxorpd(dst, dst, src);
2946 }
2947 break;
2948 }
2949 default:
2950 UNREACHABLE();
2951 }
2952 } else {
2953 UNREACHABLE();
2954 }
2955 break;
2956 }
2957 case kX64Float64Neg: {
2958 __ Negpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2960 break;
2961 }
2962 case kSSEFloat64SilenceNaN:
2964 __ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
2965 break;
2966 case kX64Movsxbl:
2967 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2968 ASSEMBLE_MOVX(movsxbl);
2969 __ AssertZeroExtended(i.OutputRegister());
2970 break;
2971 case kX64Movzxbl:
2972 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2973 ASSEMBLE_MOVX(movzxbl);
2974 __ AssertZeroExtended(i.OutputRegister());
2975 break;
2976 case kX64Movsxbq:
2977 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2978 ASSEMBLE_MOVX(movsxbq);
2979 break;
2980 case kX64Movzxbq:
2981 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2982 ASSEMBLE_MOVX(movzxbq);
2983 __ AssertZeroExtended(i.OutputRegister());
2984 break;
2985 case kX64Movb: {
2986 size_t index = 0;
2987 Operand operand = i.MemoryOperand(&index);
2988 if (HasImmediateInput(instr, index)) {
2989 Immediate value(Immediate(i.InputInt8(index)));
2990 EmitTSANAwareStore<std::memory_order_relaxed>(
2991 zone(), this, masm(), operand, value, i, DetermineStubCallMode(),
2993 } else {
2994 Register value(i.InputRegister(index));
2995 EmitTSANAwareStore<std::memory_order_relaxed>(
2996 zone(), this, masm(), operand, value, i, DetermineStubCallMode(),
2998 }
2999 break;
3000 }
3001 case kX64Movsxwl:
3002 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3003 ASSEMBLE_MOVX(movsxwl);
3004 __ AssertZeroExtended(i.OutputRegister());
3005 break;
3006 case kX64Movzxwl:
3007 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3008 ASSEMBLE_MOVX(movzxwl);
3009 __ AssertZeroExtended(i.OutputRegister());
3010 break;
3011 case kX64Movsxwq:
3012 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3013 ASSEMBLE_MOVX(movsxwq);
3014 break;
3015 case kX64Movzxwq:
3016 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3017 ASSEMBLE_MOVX(movzxwq);
3018 __ AssertZeroExtended(i.OutputRegister());
3019 break;
3020 case kX64Movw: {
3021 size_t index = 0;
3022 Operand operand = i.MemoryOperand(&index);
3023 if (HasImmediateInput(instr, index)) {
3024 Immediate value(Immediate(i.InputInt16(index)));
3025 EmitTSANAwareStore<std::memory_order_relaxed>(
3026 zone(), this, masm(), operand, value, i, DetermineStubCallMode(),
3028 } else {
3029 Register value(i.InputRegister(index));
3030 EmitTSANAwareStore<std::memory_order_relaxed>(
3031 zone(), this, masm(), operand, value, i, DetermineStubCallMode(),
3033 }
3034 break;
3035 }
3036 case kX64Movl:
3037 if (instr->HasOutput()) {
3038 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3039 if (HasAddressingMode(instr)) {
3040 Operand address(i.MemoryOperand());
3041 __ movl(i.OutputRegister(), address);
3042 EmitTSANRelaxedLoadOOLIfNeeded(zone(), this, masm(), address, i,
3044 } else {
3045 if (HasRegisterInput(instr, 0)) {
3046 __ movl(i.OutputRegister(), i.InputRegister(0));
3047 } else {
3048 __ movl(i.OutputRegister(), i.InputOperand(0));
3049 }
3050 }
3051 __ AssertZeroExtended(i.OutputRegister());
3052 } else {
3053 size_t index = 0;
3054 Operand operand = i.MemoryOperand(&index);
3055 if (HasImmediateInput(instr, index)) {
3056 Immediate value(i.InputImmediate(index));
3057 EmitTSANAwareStore<std::memory_order_relaxed>(
3058 zone(), this, masm(), operand, value, i, DetermineStubCallMode(),
3060 } else {
3061 Register value(i.InputRegister(index));
3062 EmitTSANAwareStore<std::memory_order_relaxed>(
3063 zone(), this, masm(), operand, value, i, DetermineStubCallMode(),
3065 }
3066 }
3067 break;
3068 case kX64Movsxlq:
3069 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3070 ASSEMBLE_MOVX(movsxlq);
3071 break;
3072 case kX64MovqDecompressTaggedSigned: {
3073 CHECK(instr->HasOutput());
3074 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3075 Operand address(i.MemoryOperand());
3076 __ DecompressTaggedSigned(i.OutputRegister(), address);
3077 EmitTSANRelaxedLoadOOLIfNeeded(zone(), this, masm(), address, i,
3079 break;
3080 }
3081 case kX64MovqDecompressTagged: {
3082 CHECK(instr->HasOutput());
3083 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3084 Operand address(i.MemoryOperand());
3085 __ DecompressTagged(i.OutputRegister(), address);
3086 EmitTSANRelaxedLoadOOLIfNeeded(zone(), this, masm(), address, i,
3088 break;
3089 }
3090 case kX64MovqCompressTagged: {
3091 // {EmitTSANAwareStore} calls RecordTrapInfoIfNeeded. No need to do it
3092 // here.
3093 CHECK(!instr->HasOutput());
3094 size_t index = 0;
3095 Operand operand = i.MemoryOperand(&index);
3096 if (HasImmediateInput(instr, index)) {
3097 Immediate value(i.InputImmediate(index));
3098 EmitTSANAwareStore<std::memory_order_relaxed>(
3099 zone(), this, masm(), operand, value, i, DetermineStubCallMode(),
3101 } else {
3102 Register value(i.InputRegister(index));
3103 EmitTSANAwareStore<std::memory_order_relaxed>(
3104 zone(), this, masm(), operand, value, i, DetermineStubCallMode(),
3106 }
3107 break;
3108 }
3109 case kX64MovqDecompressProtected: {
3110 CHECK(instr->HasOutput());
3111 Operand address(i.MemoryOperand());
3112 __ DecompressProtected(i.OutputRegister(), address);
3113 EmitTSANRelaxedLoadOOLIfNeeded(zone(), this, masm(), address, i,
3115 break;
3116 }
3117 case kX64MovqStoreIndirectPointer: {
3118 CHECK(!instr->HasOutput());
3119 size_t index = 0;
3120 Operand operand = i.MemoryOperand(&index);
3121 CHECK(!HasImmediateInput(instr, index));
3122 Register value(i.InputRegister(index));
3123 EmitTSANAwareStore<std::memory_order_relaxed>(
3124 zone(), this, masm(), operand, value, i, DetermineStubCallMode(),
3126 break;
3127 }
3128 case kX64MovqDecodeSandboxedPointer: {
3129 CHECK(instr->HasOutput());
3130 Operand address(i.MemoryOperand());
3131 Register dst = i.OutputRegister();
3132 __ movq(dst, address);
3133 __ DecodeSandboxedPointer(dst);
3134 EmitTSANRelaxedLoadOOLIfNeeded(zone(), this, masm(), address, i,
3137 break;
3138 }
3139 case kX64MovqEncodeSandboxedPointer: {
3140 CHECK(!instr->HasOutput());
3141 size_t index = 0;
3142 Operand operand = i.MemoryOperand(&index);
3143 CHECK(!HasImmediateInput(instr, index));
3144 Register value(i.InputRegister(index));
3145 EmitTSANAwareStore<std::memory_order_relaxed>(
3146 zone(), this, masm(), operand, value, i, DetermineStubCallMode(),
3148 break;
3149 }
3150 case kX64Movq:
3151 if (instr->HasOutput()) {
3152 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3153 Operand address(i.MemoryOperand());
3154 __ movq(i.OutputRegister(), address);
3155 EmitTSANRelaxedLoadOOLIfNeeded(zone(), this, masm(), address, i,
3157 } else {
3158 size_t index = 0;
3159 Operand operand = i.MemoryOperand(&index);
3160 if (HasImmediateInput(instr, index)) {
3161 Immediate value(i.InputImmediate(index));
3162 EmitTSANAwareStore<std::memory_order_relaxed>(
3163 zone(), this, masm(), operand, value, i, DetermineStubCallMode(),
3165 } else {
3166 Register value(i.InputRegister(index));
3167 EmitTSANAwareStore<std::memory_order_relaxed>(
3168 zone(), this, masm(), operand, value, i, DetermineStubCallMode(),
3170 }
3171 }
3172 break;
3173 case kX64Movsh:
3174 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3175 if (instr->HasOutput()) {
3176 CpuFeatureScope f16c_scope(masm(), F16C);
3177 CpuFeatureScope avx2_scope(masm(), AVX2);
3178 __ vpbroadcastw(i.OutputDoubleRegister(), i.MemoryOperand());
3179 __ vcvtph2ps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
3180 } else {
3181 CpuFeatureScope f16c_scope(masm(), F16C);
3182 size_t index = 0;
3183 Operand operand = i.MemoryOperand(&index);
3184 __ vcvtps2ph(kScratchDoubleReg, i.InputDoubleRegister(index), 0);
3185 __ Pextrw(operand, kScratchDoubleReg, static_cast<uint8_t>(0));
3186 }
3187 break;
3188 case kX64Movss:
3189 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3190 if (instr->HasOutput()) {
3191 __ Movss(i.OutputDoubleRegister(), i.MemoryOperand());
3192 } else {
3193 size_t index = 0;
3194 Operand operand = i.MemoryOperand(&index);
3195 __ Movss(operand, i.InputDoubleRegister(index));
3196 }
3197 break;
3198 case kX64Movsd: {
3199 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3200 if (instr->HasOutput()) {
3201 __ Movsd(i.OutputDoubleRegister(), i.MemoryOperand());
3202 } else {
3203 size_t index = 0;
3204 Operand operand = i.MemoryOperand(&index);
3205 __ Movsd(operand, i.InputDoubleRegister(index));
3206 }
3207 break;
3208 }
3209 case kX64Movdqu: {
3210 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3211 if (instr->HasOutput()) {
3212 __ Movdqu(i.OutputSimd128Register(), i.MemoryOperand());
3213 } else {
3214 size_t index = 0;
3215 Operand operand = i.MemoryOperand(&index);
3216 __ Movdqu(operand, i.InputSimd128Register(index));
3217 }
3218 break;
3219 }
3220 case kX64BitcastFI:
3221 if (instr->InputAt(0)->IsFPStackSlot()) {
3222 __ movl(i.OutputRegister(), i.InputOperand(0));
3223 } else {
3224 __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
3225 }
3226 break;
3227 case kX64BitcastDL:
3228 if (instr->InputAt(0)->IsFPStackSlot()) {
3229 __ movq(i.OutputRegister(), i.InputOperand(0));
3230 } else {
3231 __ Movq(i.OutputRegister(), i.InputDoubleRegister(0));
3232 }
3233 break;
3234 case kX64BitcastIF:
3235 if (HasRegisterInput(instr, 0)) {
3236 __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
3237 } else {
3238 __ Movss(i.OutputDoubleRegister(), i.InputOperand(0));
3239 }
3240 break;
3241 case kX64BitcastLD:
3242 if (HasRegisterInput(instr, 0)) {
3243 __ Movq(i.OutputDoubleRegister(), i.InputRegister(0));
3244 } else {
3245 __ Movsd(i.OutputDoubleRegister(), i.InputOperand(0));
3246 }
3247 break;
3248 case kX64Lea32: {
3250 // Shorten "leal" to "addl", "subl" or "shll" if the register allocation
3251 // and addressing mode just happens to work out. The "addl"/"subl" forms
3252 // in these cases are faster based on measurements.
3253 if (i.InputRegister(0) == i.OutputRegister()) {
3254 if (mode == kMode_MRI) {
3255 int32_t constant_summand = i.InputInt32(1);
3256 DCHECK_NE(0, constant_summand);
3257 if (constant_summand > 0) {
3258 __ addl(i.OutputRegister(), Immediate(constant_summand));
3259 } else {
3260 __ subl(i.OutputRegister(),
3261 Immediate(base::NegateWithWraparound(constant_summand)));
3262 }
3263 } else if (mode == kMode_MR1) {
3264 if (i.InputRegister(1) == i.OutputRegister()) {
3265 __ shll(i.OutputRegister(), Immediate(1));
3266 } else {
3267 __ addl(i.OutputRegister(), i.InputRegister(1));
3268 }
3269 } else if (mode == kMode_M2) {
3270 __ shll(i.OutputRegister(), Immediate(1));
3271 } else if (mode == kMode_M4) {
3272 __ shll(i.OutputRegister(), Immediate(2));
3273 } else if (mode == kMode_M8) {
3274 __ shll(i.OutputRegister(), Immediate(3));
3275 } else {
3276 __ leal(i.OutputRegister(), i.MemoryOperand());
3277 }
3278 } else if (mode == kMode_MR1 &&
3279 i.InputRegister(1) == i.OutputRegister()) {
3280 __ addl(i.OutputRegister(), i.InputRegister(0));
3281 } else {
3282 __ leal(i.OutputRegister(), i.MemoryOperand());
3283 }
3284 __ AssertZeroExtended(i.OutputRegister());
3285 break;
3286 }
3287 case kX64Lea: {
3289 // Shorten "leaq" to "addq", "subq" or "shlq" if the register allocation
3290 // and addressing mode just happens to work out. The "addq"/"subq" forms
3291 // in these cases are faster based on measurements.
3292 if (i.InputRegister(0) == i.OutputRegister()) {
3293 if (mode == kMode_MRI) {
3294 int32_t constant_summand = i.InputInt32(1);
3295 if (constant_summand > 0) {
3296 __ addq(i.OutputRegister(), Immediate(constant_summand));
3297 } else if (constant_summand < 0) {
3298 __ subq(i.OutputRegister(), Immediate(-constant_summand));
3299 }
3300 } else if (mode == kMode_MR1) {
3301 if (i.InputRegister(1) == i.OutputRegister()) {
3302 __ shlq(i.OutputRegister(), Immediate(1));
3303 } else {
3304 __ addq(i.OutputRegister(), i.InputRegister(1));
3305 }
3306 } else if (mode == kMode_M2) {
3307 __ shlq(i.OutputRegister(), Immediate(1));
3308 } else if (mode == kMode_M4) {
3309 __ shlq(i.OutputRegister(), Immediate(2));
3310 } else if (mode == kMode_M8) {
3311 __ shlq(i.OutputRegister(), Immediate(3));
3312 } else {
3313 __ leaq(i.OutputRegister(), i.MemoryOperand());
3314 }
3315 } else if (mode == kMode_MR1 &&
3316 i.InputRegister(1) == i.OutputRegister()) {
3317 __ addq(i.OutputRegister(), i.InputRegister(0));
3318 } else {
3319 __ leaq(i.OutputRegister(), i.MemoryOperand());
3320 }
3321 break;
3322 }
3323 case kX64Dec32:
3324 __ decl(i.OutputRegister());
3325 break;
3326 case kX64Inc32:
3327 __ incl(i.OutputRegister());
3328 break;
3329 case kX64Push: {
3330 int stack_decrement = i.InputInt32(0);
3331 int slots = stack_decrement / kSystemPointerSize;
3332 // Whenever codegen uses pushq, we need to check if stack_decrement
3333 // contains any extra padding and adjust the stack before the pushq.
3334 if (HasAddressingMode(instr)) {
3335 __ AllocateStackSpace(stack_decrement - kSystemPointerSize);
3336 size_t index = 1;
3337 Operand operand = i.MemoryOperand(&index);
3338 __ pushq(operand);
3339 } else if (HasImmediateInput(instr, 1)) {
3340 __ AllocateStackSpace(stack_decrement - kSystemPointerSize);
3341 __ pushq(i.InputImmediate(1));
3342 } else {
3343 InstructionOperand* input = instr->InputAt(1);
3344 if (input->IsRegister()) {
3345 __ AllocateStackSpace(stack_decrement - kSystemPointerSize);
3346 __ pushq(i.InputRegister(1));
3347 } else if (input->IsFloatRegister() || input->IsDoubleRegister()) {
3348 DCHECK_GE(stack_decrement, kSystemPointerSize);
3349 __ AllocateStackSpace(stack_decrement);
3350 __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
3351 } else if (input->IsSimd128Register()) {
3352 DCHECK_GE(stack_decrement, kSimd128Size);
3353 __ AllocateStackSpace(stack_decrement);
3354 // TODO(bbudge) Use Movaps when slots are aligned.
3355 __ Movups(Operand(rsp, 0), i.InputSimd128Register(1));
3356 } else if (input->IsStackSlot() || input->IsFloatStackSlot() ||
3357 input->IsDoubleStackSlot()) {
3358 __ AllocateStackSpace(stack_decrement - kSystemPointerSize);
3359 __ pushq(i.InputOperand(1));
3360 } else {
3361 DCHECK(input->IsSimd128StackSlot());
3362 DCHECK_GE(stack_decrement, kSimd128Size);
3363 // TODO(bbudge) Use Movaps when slots are aligned.
3364 __ Movups(kScratchDoubleReg, i.InputOperand(1));
3365 __ AllocateStackSpace(stack_decrement);
3366 __ Movups(Operand(rsp, 0), kScratchDoubleReg);
3367 }
3368 }
3371 stack_decrement);
3372 break;
3373 }
3374 case kX64Poke: {
3375 int slot = MiscField::decode(instr->opcode());
3376 if (HasImmediateInput(instr, 0)) {
3377 __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputImmediate(0));
3378 } else if (instr->InputAt(0)->IsFPRegister()) {
3379 LocationOperand* op = LocationOperand::cast(instr->InputAt(0));
3380 if (op->representation() == MachineRepresentation::kFloat64) {
3381 __ Movsd(Operand(rsp, slot * kSystemPointerSize),
3382 i.InputDoubleRegister(0));
3383 } else {
3384 DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
3385 __ Movss(Operand(rsp, slot * kSystemPointerSize),
3386 i.InputFloatRegister(0));
3387 }
3388 } else {
3389 __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputRegister(0));
3390 }
3391 break;
3392 }
3393 case kX64Peek: {
3394 int reverse_slot = i.InputInt32(0);
3395 int offset =
3396 FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
3397 if (instr->OutputAt(0)->IsFPRegister()) {
3398 LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
3399 if (op->representation() == MachineRepresentation::kFloat64) {
3400 __ Movsd(i.OutputDoubleRegister(), Operand(rbp, offset));
3401 } else if (op->representation() == MachineRepresentation::kFloat32) {
3402 __ Movss(i.OutputFloatRegister(), Operand(rbp, offset));
3403 } else {
3404 DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
3405 __ Movdqu(i.OutputSimd128Register(), Operand(rbp, offset));
3406 }
3407 } else {
3408 __ movq(i.OutputRegister(), Operand(rbp, offset));
3409 }
3410 break;
3411 }
3412 case kX64FSplat: {
3413 LaneSize lane_size = LaneSizeField::decode(opcode);
3414 VectorLength vec_len = VectorLengthField::decode(opcode);
3415 if (vec_len == kV128) {
3416 switch (lane_size) {
3417 case kL16: {
3418 CpuFeatureScope f16c_scope(masm(), F16C);
3419 CpuFeatureScope avx2_scope(masm(), AVX2);
3420 __ vcvtps2ph(i.OutputDoubleRegister(0), i.InputDoubleRegister(0),
3421 0);
3422 __ vpbroadcastw(i.OutputSimd128Register(),
3423 i.OutputDoubleRegister(0));
3424 break;
3425 }
3426 case kL32: {
3427 // F32x4Splat
3428 __ F32x4Splat(i.OutputSimd128Register(), i.InputDoubleRegister(0));
3429 break;
3430 }
3431 case kL64: {
3432 // F64X2Splat
3433 XMMRegister dst = i.OutputSimd128Register();
3434 if (instr->InputAt(0)->IsFPRegister()) {
3435 __ Movddup(dst, i.InputDoubleRegister(0));
3436 } else {
3437 __ Movddup(dst, i.InputOperand(0));
3438 }
3439 break;
3440 }
3441 default:
3442 UNREACHABLE();
3443 }
3444
3445 } else if (vec_len == kV256) {
3446 switch (lane_size) {
3447 case kL32: {
3448 // F32x8Splat
3449 __ F32x8Splat(i.OutputSimd256Register(), i.InputFloatRegister(0));
3450 break;
3451 }
3452 case kL64: {
3453 // F64X4Splat
3454 __ F64x4Splat(i.OutputSimd256Register(), i.InputDoubleRegister(0));
3455 break;
3456 }
3457 default:
3458 UNREACHABLE();
3459 }
3460 } else {
3461 UNREACHABLE();
3462 }
3463 break;
3464 }
3465 case kX64FExtractLane: {
3466 LaneSize lane_size = LaneSizeField::decode(opcode);
3467 VectorLength vec_len = VectorLengthField::decode(opcode);
3468 if (vec_len == kV128) {
3469 switch (lane_size) {
3470 case kL16: {
3471 // F16x8ExtractLane
3472 CpuFeatureScope f16c_scope(masm(), F16C);
3473 CpuFeatureScope avx_scope(masm(), AVX);
3474 __ Pextrw(kScratchRegister, i.InputSimd128Register(0),
3475 i.InputUint8(1));
3476 __ vmovd(i.OutputFloatRegister(), kScratchRegister);
3477 __ vcvtph2ps(i.OutputFloatRegister(), i.OutputFloatRegister());
3478 break;
3479 }
3480 case kL32: {
3481 // F32x4ExtractLane
3482 __ F32x4ExtractLane(i.OutputFloatRegister(),
3483 i.InputSimd128Register(0), i.InputUint8(1));
3484 break;
3485 }
3486 case kL64: {
3487 // F64X2ExtractLane
3488 __ F64x2ExtractLane(i.OutputDoubleRegister(),
3489 i.InputDoubleRegister(0), i.InputUint8(1));
3490 break;
3491 }
3492 default:
3493 UNREACHABLE();
3494 }
3495
3496 } else {
3497 UNREACHABLE();
3498 }
3499 break;
3500 }
3501 case kX64FReplaceLane: {
3502 LaneSize lane_size = LaneSizeField::decode(opcode);
3503 VectorLength vec_len = VectorLengthField::decode(opcode);
3504 if (vec_len == kV128) {
3505 switch (lane_size) {
3506 case kL16: {
3507 // F16x8ReplaceLane
3508 CpuFeatureScope f16c_scope(masm(), F16C);
3509 CpuFeatureScope avx_scope(masm(), AVX);
3510 __ vcvtps2ph(kScratchDoubleReg, i.InputDoubleRegister(2), 0);
3512 __ vpinsrw(i.OutputSimd128Register(), i.InputSimd128Register(0),
3513 kScratchRegister, i.InputInt8(1));
3514 break;
3515 }
3516 case kL32: {
3517 // F32x4ReplaceLane
3518 // The insertps instruction uses imm8[5:4] to indicate the lane
3519 // that needs to be replaced.
3520 uint8_t select = i.InputInt8(1) << 4 & 0x30;
3521 if (instr->InputAt(2)->IsFPRegister()) {
3522 __ Insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2),
3523 select);
3524 } else {
3525 __ Insertps(i.OutputSimd128Register(), i.InputOperand(2), select);
3526 }
3527 break;
3528 }
3529 case kL64: {
3530 // F64X2ReplaceLane
3531 __ F64x2ReplaceLane(i.OutputSimd128Register(),
3532 i.InputSimd128Register(0),
3533 i.InputDoubleRegister(2), i.InputInt8(1));
3534 break;
3535 }
3536 default:
3537 UNREACHABLE();
3538 }
3539
3540 } else {
3541 UNREACHABLE();
3542 }
3543 break;
3544 }
3545 case kX64FSqrt: {
3546 LaneSize lane_size = LaneSizeField::decode(opcode);
3547 VectorLength vec_len = VectorLengthField::decode(opcode);
3548 if (vec_len == kV128) {
3549 XMMRegister dst = i.OutputSimd128Register();
3550 XMMRegister src = i.InputSimd128Register(0);
3551 switch (lane_size) {
3552 case kL16: {
3553 // F16x8Sqrt
3554 CpuFeatureScope f16c_scope(masm(), F16C);
3555 CpuFeatureScope avx_scope(masm(), AVX);
3556
3557 __ vcvtph2ps(kScratchSimd256Reg, src);
3559 __ vcvtps2ph(dst, kScratchSimd256Reg, 0);
3560 break;
3561 }
3562 case kL32: {
3563 // F32x4Sqrt
3564 __ Sqrtps(dst, src);
3565 break;
3566 }
3567 case kL64: {
3568 // F64x2Sqrt
3569 __ Sqrtpd(dst, src);
3570 break;
3571 }
3572 default:
3573 UNREACHABLE();
3574 }
3575 } else if (vec_len == kV256) {
3576 YMMRegister dst = i.OutputSimd256Register();
3577 YMMRegister src = i.InputSimd256Register(0);
3578 CpuFeatureScope avx_scope(masm(), AVX);
3579 switch (lane_size) {
3580 case kL32: {
3581 // F32x8Sqrt
3582 __ vsqrtps(dst, src);
3583 break;
3584 }
3585 case kL64: {
3586 // F64x4Sqrt
3587 __ vsqrtpd(dst, src);
3588 break;
3589 }
3590 default:
3591 UNREACHABLE();
3592 }
3593 } else {
3594 UNREACHABLE();
3595 }
3596 break;
3597 }
3598 case kX64FAdd: {
3599 LaneSize lane_size = LaneSizeField::decode(opcode);
3600 VectorLength vec_len = VectorLengthField::decode(opcode);
3601 if (vec_len == kV128) {
3602 switch (lane_size) {
3603 case kL16:
3604 // F16x8Add
3606 break;
3607 case kL32: {
3608 // F32x4Add
3609 ASSEMBLE_SIMD_BINOP(addps);
3610 break;
3611 }
3612 case kL64: {
3613 // F64x2Add
3614 ASSEMBLE_SIMD_BINOP(addpd);
3615 break;
3616 }
3617 default:
3618 UNREACHABLE();
3619 }
3620 } else if (vec_len == kV256) {
3621 switch (lane_size) {
3622 case kL32: {
3623 // F32x8Add
3624 ASSEMBLE_SIMD256_BINOP(addps, AVX);
3625 break;
3626 }
3627 case kL64: {
3628 // F64x4Add
3629 ASSEMBLE_SIMD256_BINOP(addpd, AVX);
3630 break;
3631 }
3632 default:
3633 UNREACHABLE();
3634 }
3635 } else {
3636 UNREACHABLE();
3637 }
3638 break;
3639 }
3640 case kX64FSub: {
3641 LaneSize lane_size = LaneSizeField::decode(opcode);
3642 VectorLength vec_len = VectorLengthField::decode(opcode);
3643 if (vec_len == kV128) {
3644 switch (lane_size) {
3645 case kL16:
3646 // F16x8Sub
3648 break;
3649 case kL32: {
3650 // F32x4Sub
3651 ASSEMBLE_SIMD_BINOP(subps);
3652 break;
3653 }
3654 case kL64: {
3655 // F64x2Sub
3656 ASSEMBLE_SIMD_BINOP(subpd);
3657 break;
3658 }
3659 default:
3660 UNREACHABLE();
3661 }
3662 } else if (vec_len == kV256) {
3663 switch (lane_size) {
3664 case kL32: {
3665 // F32x8Sub
3666 ASSEMBLE_SIMD256_BINOP(subps, AVX);
3667 break;
3668 }
3669 case kL64: {
3670 // F64x4Sub
3671 ASSEMBLE_SIMD256_BINOP(subpd, AVX);
3672 break;
3673 }
3674 default:
3675 UNREACHABLE();
3676 }
3677 } else {
3678 UNREACHABLE();
3679 }
3680 break;
3681 }
3682 case kX64FMul: {
3683 LaneSize lane_size = LaneSizeField::decode(opcode);
3684 VectorLength vec_len = VectorLengthField::decode(opcode);
3685 if (vec_len == kV128) {
3686 switch (lane_size) {
3687 case kL16:
3688 // F16x8Mul
3690 break;
3691 case kL32: {
3692 // F32x4Mul
3693 ASSEMBLE_SIMD_BINOP(mulps);
3694 break;
3695 }
3696 case kL64: {
3697 // F64x2Mul
3698 ASSEMBLE_SIMD_BINOP(mulpd);
3699 break;
3700 }
3701 default:
3702 UNREACHABLE();
3703 }
3704 } else if (vec_len == kV256) {
3705 switch (lane_size) {
3706 case kL64: {
3707 // F64x4Mul
3708 ASSEMBLE_SIMD256_BINOP(mulpd, AVX);
3709 break;
3710 }
3711 case kL32: {
3712 // F32x8Mul
3713 ASSEMBLE_SIMD256_BINOP(mulps, AVX);
3714 break;
3715 }
3716 default:
3717 UNREACHABLE();
3718 }
3719
3720 } else {
3721 UNREACHABLE();
3722 }
3723 break;
3724 }
3725 case kX64FDiv: {
3726 LaneSize lane_size = LaneSizeField::decode(opcode);
3727 VectorLength vec_len = VectorLengthField::decode(opcode);
3728 if (vec_len == kV128) {
3729 switch (lane_size) {
3730 case kL16:
3731 // F16x8Div
3733 break;
3734 case kL32: {
3735 // F32x4Div
3736 ASSEMBLE_SIMD_BINOP(divps);
3737 break;
3738 }
3739 case kL64: {
3740 // F64x2Div
3741 ASSEMBLE_SIMD_BINOP(divpd);
3742 break;
3743 }
3744 default:
3745 UNREACHABLE();
3746 }
3747 } else if (vec_len == kV256) {
3748 switch (lane_size) {
3749 case kL32: {
3750 // F32x8Div
3751 ASSEMBLE_SIMD256_BINOP(divps, AVX);
3752 break;
3753 }
3754 case kL64: {
3755 // F64x4Div
3756 ASSEMBLE_SIMD256_BINOP(divpd, AVX);
3757 break;
3758 }
3759 default:
3760 UNREACHABLE();
3761 }
3762 } else {
3763 UNREACHABLE();
3764 }
3765 break;
3766 }
3767 case kX64FMin: {
3768 LaneSize lane_size = LaneSizeField::decode(opcode);
3769 VectorLength vec_len = VectorLengthField::decode(opcode);
3770 if (vec_len == kV128) {
3771 switch (lane_size) {
3772 case kL16: {
3773 // F16x8Min
3774 // F16x8Min packs result in XMM register, but uses it as temporary
3775 // YMM register during computation. Cast dst to YMM here.
3776 YMMRegister ydst =
3777 YMMRegister::from_code(i.OutputSimd128Register().code());
3778 __ F16x8Min(ydst, i.InputSimd128Register(0),
3779 i.InputSimd128Register(1), i.TempSimd256Register(0),
3780 i.TempSimd256Register(1));
3781 break;
3782 }
3783 case kL32: {
3784 // F32x4Min
3785 __ F32x4Min(i.OutputSimd128Register(), i.InputSimd128Register(0),
3786 i.InputSimd128Register(1), kScratchDoubleReg);
3787 break;
3788 }
3789 case kL64: {
3790 // F64x2Min
3791 // Avoids a move in no-AVX case if dst = src0.
3792 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3793 __ F64x2Min(i.OutputSimd128Register(), i.InputSimd128Register(0),
3794 i.InputSimd128Register(1), kScratchDoubleReg);
3795 break;
3796 }
3797 default:
3798 UNREACHABLE();
3799 }
3800 } else if (vec_len == kV256) {
3801 switch (lane_size) {
3802 case kL32: {
3803 // F32x8Min
3804 __ F32x8Min(i.OutputSimd256Register(), i.InputSimd256Register(0),
3805 i.InputSimd256Register(1), kScratchSimd256Reg);
3806 break;
3807 }
3808 case kL64: {
3809 // F64x4Min
3810 DCHECK_EQ(i.OutputSimd256Register(), i.InputSimd256Register(0));
3811 __ F64x4Min(i.OutputSimd256Register(), i.InputSimd256Register(0),
3812 i.InputSimd256Register(1), kScratchSimd256Reg);
3813 break;
3814 }
3815 default:
3816 UNREACHABLE();
3817 }
3818 } else {
3819 UNREACHABLE();
3820 }
3821 break;
3822 }
3823 case kX64FMax: {
3824 LaneSize lane_size = LaneSizeField::decode(opcode);
3825 VectorLength vec_len = VectorLengthField::decode(opcode);
3826 if (vec_len == kV128) {
3827 switch (lane_size) {
3828 case kL16: {
3829 // F16x8Max
3830 // F16x8Max packs result in XMM dst register, but uses it as temp
3831 // YMM register during computation. Cast dst to YMM here.
3832 YMMRegister ydst =
3833 YMMRegister::from_code(i.OutputSimd128Register().code());
3834 __ F16x8Max(ydst, i.InputSimd128Register(0),
3835 i.InputSimd128Register(1), i.TempSimd256Register(0),
3836 i.TempSimd256Register(1));
3837 break;
3838 }
3839 case kL32: {
3840 // F32x4Max
3841 __ F32x4Max(i.OutputSimd128Register(), i.InputSimd128Register(0),
3842 i.InputSimd128Register(1), kScratchDoubleReg);
3843 break;
3844 }
3845 case kL64: {
3846 // F64x2Max
3847 // Avoids a move in no-AVX case if dst = src0.
3848 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3849 __ F64x2Max(i.OutputSimd128Register(), i.InputSimd128Register(0),
3850 i.InputSimd128Register(1), kScratchDoubleReg);
3851 break;
3852 }
3853 default:
3854 UNREACHABLE();
3855 }
3856 } else if (vec_len == kV256) {
3857 switch (lane_size) {
3858 case kL32: {
3859 // F32x8Max
3860 __ F32x8Max(i.OutputSimd256Register(), i.InputSimd256Register(0),
3861 i.InputSimd256Register(1), kScratchSimd256Reg);
3862 break;
3863 }
3864 case kL64: {
3865 // F64x4Max
3866 DCHECK_EQ(i.OutputSimd256Register(), i.InputSimd256Register(0));
3867 __ F64x4Max(i.OutputSimd256Register(), i.InputSimd256Register(0),
3868 i.InputSimd256Register(1), kScratchSimd256Reg);
3869 break;
3870 }
3871 default:
3872 UNREACHABLE();
3873 }
3874 } else {
3875 UNREACHABLE();
3876 }
3877 break;
3878 }
3879 case kX64FEq: {
3880 LaneSize lane_size = LaneSizeField::decode(opcode);
3881 VectorLength vec_len = VectorLengthField::decode(opcode);
3882 if (vec_len == kV128) {
3883 switch (lane_size) {
3884 case kL16: {
3885 // F16x8Eq
3886 ASSEMBLE_SIMD_F16x8_RELOP(vcmpeqps);
3887 break;
3888 }
3889 case kL32: {
3890 // F32x4Eq
3891 ASSEMBLE_SIMD_BINOP(cmpeqps);
3892 break;
3893 }
3894 case kL64: {
3895 // F64x2Eq
3896 ASSEMBLE_SIMD_BINOP(cmpeqpd);
3897 break;
3898 }
3899 default:
3900 UNREACHABLE();
3901 }
3902 } else if (vec_len == kV256) {
3903 switch (lane_size) {
3904 case kL32: {
3905 // F32x8Eq
3906 ASSEMBLE_SIMD256_BINOP(cmpeqps, AVX);
3907 break;
3908 }
3909 case kL64: {
3910 // F64x4Eq
3911 ASSEMBLE_SIMD256_BINOP(cmpeqpd, AVX);
3912 break;
3913 }
3914 default:
3915 UNREACHABLE();
3916 }
3917 } else {
3918 UNREACHABLE();
3919 }
3920 break;
3921 }
3922 case kX64FNe: {
3923 LaneSize lane_size = LaneSizeField::decode(opcode);
3924 VectorLength vec_len = VectorLengthField::decode(opcode);
3925 if (vec_len == kV128) {
3926 switch (lane_size) {
3927 case kL16: {
3928 // F16x8Ne
3929 ASSEMBLE_SIMD_F16x8_RELOP(vcmpneqps);
3930 break;
3931 }
3932 case kL32: {
3933 // F32x4Ne
3934 ASSEMBLE_SIMD_BINOP(cmpneqps);
3935 break;
3936 }
3937 case kL64: {
3938 // F64x2Ne
3939 ASSEMBLE_SIMD_BINOP(cmpneqpd);
3940 break;
3941 }
3942 default:
3943 UNREACHABLE();
3944 }
3945 } else if (vec_len == kV256) {
3946 switch (lane_size) {
3947 case kL32: {
3948 // F32x8Ne
3949 ASSEMBLE_SIMD256_BINOP(cmpneqps, AVX);
3950 break;
3951 }
3952 case kL64: {
3953 // F64x4Ne
3954 ASSEMBLE_SIMD256_BINOP(cmpneqpd, AVX);
3955 break;
3956 }
3957 default:
3958 UNREACHABLE();
3959 }
3960 } else {
3961 UNREACHABLE();
3962 }
3963 break;
3964 }
3965 case kX64FLt: {
3966 LaneSize lane_size = LaneSizeField::decode(opcode);
3967 VectorLength vec_len = VectorLengthField::decode(opcode);
3968 if (vec_len == kV128) {
3969 switch (lane_size) {
3970 case kL16: {
3971 // F16x8Lt
3972 ASSEMBLE_SIMD_F16x8_RELOP(vcmpltps);
3973 break;
3974 }
3975 case kL32: {
3976 // F32x4Lt
3977 ASSEMBLE_SIMD_BINOP(cmpltps);
3978 break;
3979 }
3980 case kL64: {
3981 // F64x2Lt
3982 ASSEMBLE_SIMD_BINOP(cmpltpd);
3983 break;
3984 }
3985 default:
3986 UNREACHABLE();
3987 }
3988 } else if (vec_len == kV256) {
3989 switch (lane_size) {
3990 case kL32: {
3991 // F32x8Lt
3992 ASSEMBLE_SIMD256_BINOP(cmpltps, AVX);
3993 break;
3994 }
3995 case kL64: {
3996 // F64x8Lt
3997 ASSEMBLE_SIMD256_BINOP(cmpltpd, AVX);
3998 break;
3999 }
4000 default:
4001 UNREACHABLE();
4002 }
4003 } else {
4004 UNREACHABLE();
4005 }
4006 break;
4007 }
4008 case kX64FLe: {
4009 LaneSize lane_size = LaneSizeField::decode(opcode);
4010 VectorLength vec_len = VectorLengthField::decode(opcode);
4011 if (vec_len == kV128) {
4012 switch (lane_size) {
4013 case kL16: {
4014 // F16x8Le
4015 ASSEMBLE_SIMD_F16x8_RELOP(vcmpleps);
4016 break;
4017 }
4018 case kL32: {
4019 // F32x4Le
4020 ASSEMBLE_SIMD_BINOP(cmpleps);
4021 break;
4022 }
4023 case kL64: {
4024 // F64x2Le
4025 ASSEMBLE_SIMD_BINOP(cmplepd);
4026 break;
4027 }
4028 default:
4029 UNREACHABLE();
4030 }
4031 } else if (vec_len == kV256) {
4032 switch (lane_size) {
4033 case kL32: {
4034 // F32x8Le
4035 ASSEMBLE_SIMD256_BINOP(cmpleps, AVX);
4036 break;
4037 }
4038 case kL64: {
4039 // F64x4Le
4040 ASSEMBLE_SIMD256_BINOP(cmplepd, AVX);
4041 break;
4042 }
4043 default:
4044 UNREACHABLE();
4045 }
4046 } else {
4047 UNREACHABLE();
4048 }
4049 break;
4050 }
4051 case kX64F64x2Qfma: {
4052 __ F64x2Qfma(i.OutputSimd128Register(), i.InputSimd128Register(0),
4053 i.InputSimd128Register(1), i.InputSimd128Register(2),
4055 break;
4056 }
4057 case kX64F64x2Qfms: {
4058 __ F64x2Qfms(i.OutputSimd128Register(), i.InputSimd128Register(0),
4059 i.InputSimd128Register(1), i.InputSimd128Register(2),
4061 break;
4062 }
4063 case kX64F64x4Qfma: {
4064 __ F64x4Qfma(i.OutputSimd256Register(), i.InputSimd256Register(0),
4065 i.InputSimd256Register(1), i.InputSimd256Register(2),
4067 break;
4068 }
4069 case kX64F64x4Qfms: {
4070 __ F64x4Qfms(i.OutputSimd256Register(), i.InputSimd256Register(0),
4071 i.InputSimd256Register(1), i.InputSimd256Register(2),
4073 break;
4074 }
4075 case kX64F64x2ConvertLowI32x4S: {
4076 __ Cvtdq2pd(i.OutputSimd128Register(), i.InputSimd128Register(0));
4077 break;
4078 }
4079 case kX64F64x4ConvertI32x4S: {
4080 CpuFeatureScope avx_scope(masm(), AVX);
4081 __ vcvtdq2pd(i.OutputSimd256Register(), i.InputSimd128Register(0));
4082 break;
4083 }
4084 case kX64F64x2ConvertLowI32x4U: {
4085 __ F64x2ConvertLowI32x4U(i.OutputSimd128Register(),
4086 i.InputSimd128Register(0), kScratchRegister);
4087 break;
4088 }
4089 case kX64F64x2PromoteLowF32x4: {
4090 if (HasAddressingMode(instr)) {
4091 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
4092 __ Cvtps2pd(i.OutputSimd128Register(), i.MemoryOperand());
4093 } else {
4094 __ Cvtps2pd(i.OutputSimd128Register(), i.InputSimd128Register(0));
4095 }
4096 break;
4097 }
4098 case kX64F32x4DemoteF64x2Zero: {
4099 __ Cvtpd2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
4100 break;
4101 }
4102 case kX64F32x4DemoteF64x4: {
4103 CpuFeatureScope avx_scope(masm(), AVX);
4104 __ vcvtpd2ps(i.OutputSimd128Register(), i.InputSimd256Register(0));
4105 break;
4106 }
4107 case kX64I32x4TruncSatF64x2SZero: {
4108 __ I32x4TruncSatF64x2SZero(i.OutputSimd128Register(),
4109 i.InputSimd128Register(0), kScratchDoubleReg,
4111 break;
4112 }
4113 case kX64I32x4TruncSatF64x2UZero: {
4114 __ I32x4TruncSatF64x2UZero(i.OutputSimd128Register(),
4115 i.InputSimd128Register(0), kScratchDoubleReg,
4117 break;
4118 }
4119 case kX64F32x4SConvertI32x4: {
4120 __ Cvtdq2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
4121 break;
4122 }
4123 case kX64F32x8SConvertI32x8: {
4124 CpuFeatureScope avx_scope(masm(), AVX);
4125 __ vcvtdq2ps(i.OutputSimd256Register(), i.InputSimd256Register(0));
4126 break;
4127 }
4128 case kX64I16x8SConvertF16x8: {
4129 CpuFeatureScope avx_scope(masm(), AVX);
4130 CpuFeatureScope f16c_scope(masm(), F16C);
4131 CpuFeatureScope avx2_scope(masm(), AVX2);
4132
4133 YMMRegister ydst =
4134 YMMRegister::from_code(i.OutputSimd128Register().code());
4135 __ I16x8SConvertF16x8(ydst, i.InputSimd128Register(0), kScratchSimd256Reg,
4137 break;
4138 }
4139 case kX64I16x8UConvertF16x8: {
4140 CpuFeatureScope avx_scope(masm(), AVX);
4141 CpuFeatureScope f16c_scope(masm(), F16C);
4142 CpuFeatureScope avx2_scope(masm(), AVX2);
4143
4144 YMMRegister ydst =
4145 YMMRegister::from_code(i.OutputSimd128Register().code());
4146 __ I16x8TruncF16x8U(ydst, i.InputSimd128Register(0), kScratchSimd256Reg);
4147 break;
4148 }
4149 case kX64F16x8SConvertI16x8: {
4150 CpuFeatureScope f16c_scope(masm(), F16C);
4151 CpuFeatureScope avx_scope(masm(), AVX);
4152 CpuFeatureScope avx2_scope(masm(), AVX2);
4153 __ vpmovsxwd(kScratchSimd256Reg, i.InputSimd128Register(0));
4155 __ vcvtps2ph(i.OutputSimd128Register(), kScratchSimd256Reg, 0);
4156 break;
4157 }
4158 case kX64F16x8UConvertI16x8: {
4159 CpuFeatureScope f16c_scope(masm(), F16C);
4160 CpuFeatureScope avx_scope(masm(), AVX);
4161 CpuFeatureScope avx2_scope(masm(), AVX2);
4162 __ vpmovzxwd(kScratchSimd256Reg, i.InputSimd128Register(0));
4164 __ vcvtps2ph(i.OutputSimd128Register(), kScratchSimd256Reg, 0);
4165 break;
4166 }
4167 case kX64F16x8DemoteF32x4Zero: {
4168 CpuFeatureScope f16c_scope(masm(), F16C);
4169 __ vcvtps2ph(i.OutputSimd128Register(), i.InputSimd128Register(0), 0);
4170 break;
4171 }
4172 case kX64F16x8DemoteF64x2Zero: {
4173 CpuFeatureScope f16c_scope(masm(), F16C);
4174 CpuFeatureScope avx_scope(masm(), AVX);
4175 Register tmp = i.TempRegister(0);
4176 XMMRegister ftmp = i.TempSimd128Register(1);
4177 XMMRegister ftmp2 = i.TempSimd128Register(2);
4178 XMMRegister dst = i.OutputSimd128Register();
4179 XMMRegister src = i.InputSimd128Register(0);
4180 __ F64x2ExtractLane(ftmp, src, 1);
4181 // Cvtpd2ph requires dst and src to not overlap.
4182 __ Cvtpd2ph(ftmp2, ftmp, tmp);
4183 __ Cvtpd2ph(dst, src, tmp);
4184 __ vmovd(tmp, ftmp2);
4185 __ vpinsrw(dst, dst, tmp, 1);
4186 // Set ftmp to 0.
4187 __ pxor(ftmp, ftmp);
4188 // Reset all unaffected lanes.
4189 __ F64x2ReplaceLane(dst, dst, ftmp, 1);
4190 __ vinsertps(dst, dst, ftmp, (1 << 4) & 0x30);
4191 break;
4192 }
4193 case kX64F32x4PromoteLowF16x8: {
4194 CpuFeatureScope f16c_scope(masm(), F16C);
4195 __ vcvtph2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
4196 break;
4197 }
4198 case kX64F32x4UConvertI32x4: {
4199 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
4200 DCHECK_NE(i.OutputSimd128Register(), kScratchDoubleReg);
4201 XMMRegister dst = i.OutputSimd128Register();
4202 __ Pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros
4203 __ Pblendw(kScratchDoubleReg, dst, uint8_t{0x55}); // get lo 16 bits
4204 __ Psubd(dst, kScratchDoubleReg); // get hi 16 bits
4205 __ Cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
4206 __ Psrld(dst, uint8_t{1}); // divide by 2 to get in unsigned range
4207 __ Cvtdq2ps(dst, dst); // convert hi exactly
4208 __ Addps(dst, dst); // double hi, exactly
4209 __ Addps(dst, kScratchDoubleReg); // add hi and lo, may round.
4210 break;
4211 }
4212 case kX64F32x8UConvertI32x8: {
4213 DCHECK_EQ(i.OutputSimd256Register(), i.InputSimd256Register(0));
4214 DCHECK_NE(i.OutputSimd256Register(), kScratchSimd256Reg);
4215 CpuFeatureScope avx_scope(masm(), AVX);
4216 CpuFeatureScope avx2_scope(masm(), AVX2);
4217 YMMRegister dst = i.OutputSimd256Register();
4219 kScratchSimd256Reg); // zeros
4221 uint8_t{0x55}); // get lo 16 bits
4222 __ vpsubd(dst, dst, kScratchSimd256Reg); // get hi 16 bits
4223 __ vcvtdq2ps(kScratchSimd256Reg,
4224 kScratchSimd256Reg); // convert lo exactly
4225 __ vpsrld(dst, dst, uint8_t{1}); // divide by 2 to get in unsigned range
4226 __ vcvtdq2ps(dst, dst); // convert hi
4227 __ vaddps(dst, dst, dst); // double hi
4228 __ vaddps(dst, dst, kScratchSimd256Reg);
4229 break;
4230 }
4231 case kX64F32x4Qfma: {
4232 __ F32x4Qfma(i.OutputSimd128Register(), i.InputSimd128Register(0),
4233 i.InputSimd128Register(1), i.InputSimd128Register(2),
4235 break;
4236 }
4237 case kX64F32x4Qfms: {
4238 __ F32x4Qfms(i.OutputSimd128Register(), i.InputSimd128Register(0),
4239 i.InputSimd128Register(1), i.InputSimd128Register(2),
4241 break;
4242 }
4243 case kX64F32x8Qfma: {
4244 __ F32x8Qfma(i.OutputSimd256Register(), i.InputSimd256Register(0),
4245 i.InputSimd256Register(1), i.InputSimd256Register(2),
4247 break;
4248 }
4249 case kX64F32x8Qfms: {
4250 __ F32x8Qfms(i.OutputSimd256Register(), i.InputSimd256Register(0),
4251 i.InputSimd256Register(1), i.InputSimd256Register(2),
4253 break;
4254 }
4255 case kX64F16x8Qfma: {
4256 YMMRegister ydst =
4257 YMMRegister::from_code(i.OutputSimd128Register().code());
4258 __ F16x8Qfma(ydst, i.InputSimd128Register(0), i.InputSimd128Register(1),
4259 i.InputSimd128Register(2), i.TempSimd256Register(0),
4260 i.TempSimd256Register(1));
4261 break;
4262 }
4263 case kX64F16x8Qfms: {
4264 YMMRegister ydst =
4265 YMMRegister::from_code(i.OutputSimd128Register().code());
4266 __ F16x8Qfms(ydst, i.InputSimd128Register(0), i.InputSimd128Register(1),
4267 i.InputSimd128Register(2), i.TempSimd256Register(0),
4268 i.TempSimd256Register(1));
4269 break;
4270 }
4271 case kX64Minps: {
4272 VectorLength vec_len = VectorLengthField::decode(opcode);
4273 if (vec_len == kV128) {
4274 ASSEMBLE_SIMD_BINOP(minps);
4275 } else if (vec_len == kV256) {
4276 ASSEMBLE_SIMD256_BINOP(minps, AVX);
4277 } else {
4278 UNREACHABLE();
4279 }
4280 break;
4281 }
4282 case kX64Maxps: {
4283 VectorLength vec_len = VectorLengthField::decode(opcode);
4284 if (vec_len == kV128) {
4285 ASSEMBLE_SIMD_BINOP(maxps);
4286 } else if (vec_len == kV256) {
4287 ASSEMBLE_SIMD256_BINOP(maxps, AVX);
4288 } else {
4289 UNREACHABLE();
4290 }
4291 break;
4292 }
4293 case kX64Minph: {
4294 DCHECK_EQ(VectorLengthField::decode(opcode), kV128);
4296 break;
4297 }
4298 case kX64Maxph: {
4299 DCHECK_EQ(VectorLengthField::decode(opcode), kV128);
4301 break;
4302 }
4303 case kX64F32x8Pmin: {
4304 YMMRegister dst = i.OutputSimd256Register();
4305 CpuFeatureScope avx_scope(masm(), AVX);
4306 __ vminps(dst, i.InputSimd256Register(0), i.InputSimd256Register(1));
4307 break;
4308 }
4309 case kX64F32x8Pmax: {
4310 YMMRegister dst = i.OutputSimd256Register();
4311 CpuFeatureScope avx_scope(masm(), AVX);
4312 __ vmaxps(dst, i.InputSimd256Register(0), i.InputSimd256Register(1));
4313 break;
4314 }
4315 case kX64F64x4Pmin: {
4316 YMMRegister dst = i.OutputSimd256Register();
4317 CpuFeatureScope avx_scope(masm(), AVX);
4318 __ vminpd(dst, i.InputSimd256Register(0), i.InputSimd256Register(1));
4319 break;
4320 }
4321 case kX64F64x4Pmax: {
4322 YMMRegister dst = i.OutputSimd256Register();
4323 CpuFeatureScope avx_scope(masm(), AVX);
4324 __ vmaxpd(dst, i.InputSimd256Register(0), i.InputSimd256Register(1));
4325 break;
4326 }
4327 case kX64F32x4Round: {
4328 RoundingMode const mode =
4329 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
4330 __ Roundps(i.OutputSimd128Register(), i.InputSimd128Register(0), mode);
4331 break;
4332 }
4333 case kX64F16x8Round: {
4334 CpuFeatureScope f16c_scope(masm(), F16C);
4335 CpuFeatureScope avx_scope(masm(), AVX);
4336 RoundingMode const mode =
4337 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
4338 __ vcvtph2ps(kScratchSimd256Reg, i.InputSimd128Register(0));
4339 __ vroundps(kScratchSimd256Reg, kScratchSimd256Reg, mode);
4340 __ vcvtps2ph(i.OutputSimd128Register(), kScratchSimd256Reg, 0);
4341 break;
4342 }
4343 case kX64F64x2Round: {
4344 RoundingMode const mode =
4345 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
4346 __ Roundpd(i.OutputSimd128Register(), i.InputSimd128Register(0), mode);
4347 break;
4348 }
4349 case kX64Minpd: {
4350 VectorLength vec_len = VectorLengthField::decode(opcode);
4351 if (vec_len == kV128) {
4352 ASSEMBLE_SIMD_BINOP(minpd);
4353 } else if (vec_len == kV256) {
4354 ASSEMBLE_SIMD256_BINOP(minpd, AVX);
4355 } else {
4356 UNREACHABLE();
4357 }
4358 break;
4359 }
4360 case kX64Maxpd: {
4361 VectorLength vec_len = VectorLengthField::decode(opcode);
4362 if (vec_len == kV128) {
4363 ASSEMBLE_SIMD_BINOP(maxpd);
4364 } else if (vec_len == kV256) {
4365 ASSEMBLE_SIMD256_BINOP(maxpd, AVX);
4366 } else {
4367 UNREACHABLE();
4368 }
4369 break;
4370 }
4371 case kX64ISplat: {
4372 LaneSize lane_size = LaneSizeField::decode(opcode);
4373 VectorLength vec_len = VectorLengthField::decode(opcode);
4374 if (vec_len == kV128) {
4375 switch (lane_size) {
4376 case kL8: {
4377 // I8x16Splat
4378 XMMRegister dst = i.OutputSimd128Register();
4379 if (HasRegisterInput(instr, 0)) {
4380 __ I8x16Splat(dst, i.InputRegister(0), kScratchDoubleReg);
4381 } else {
4382 __ I8x16Splat(dst, i.InputOperand(0), kScratchDoubleReg);
4383 }
4384 break;
4385 }
4386 case kL16: {
4387 // I16x8Splat
4388 XMMRegister dst = i.OutputSimd128Register();
4389 if (HasRegisterInput(instr, 0)) {
4390 __ I16x8Splat(dst, i.InputRegister(0));
4391 } else {
4392 __ I16x8Splat(dst, i.InputOperand(0));
4393 }
4394 break;
4395 }
4396 case kL32: {
4397 // I32x4Splat
4398 XMMRegister dst = i.OutputSimd128Register();
4399 if (HasRegisterInput(instr, 0)) {
4400 __ Movd(dst, i.InputRegister(0));
4401 } else {
4402 // TODO(v8:9198): Pshufd can load from aligned memory once
4403 // supported.
4404 __ Movd(dst, i.InputOperand(0));
4405 }
4406 __ Pshufd(dst, dst, uint8_t{0x0});
4407 break;
4408 }
4409 case kL64: {
4410 // I64X2Splat
4411 XMMRegister dst = i.OutputSimd128Register();
4412 if (HasRegisterInput(instr, 0)) {
4413 __ Movq(dst, i.InputRegister(0));
4414 __ Movddup(dst, dst);
4415 } else {
4416 __ Movddup(dst, i.InputOperand(0));
4417 }
4418 break;
4419 }
4420 default:
4421 UNREACHABLE();
4422 }
4423
4424 } else if (vec_len == kV256) {
4425 switch (lane_size) {
4426 case kL8: {
4427 // I8x32Splat
4428 YMMRegister dst = i.OutputSimd256Register();
4429 if (HasRegisterInput(instr, 0)) {
4430 __ I8x32Splat(dst, i.InputRegister(0));
4431 } else {
4432 __ I8x32Splat(dst, i.InputOperand(0));
4433 }
4434 break;
4435 }
4436 case kL16: {
4437 // I16x16Splat
4438 YMMRegister dst = i.OutputSimd256Register();
4439 if (HasRegisterInput(instr, 0)) {
4440 __ I16x16Splat(dst, i.InputRegister(0));
4441 } else {
4442 __ I16x16Splat(dst, i.InputOperand(0));
4443 }
4444 break;
4445 }
4446 case kL32: {
4447 // I32x8Splat
4448 YMMRegister dst = i.OutputSimd256Register();
4449 if (HasRegisterInput(instr, 0)) {
4450 __ I32x8Splat(dst, i.InputRegister(0));
4451 } else {
4452 __ I32x8Splat(dst, i.InputOperand(0));
4453 }
4454 break;
4455 }
4456 case kL64: {
4457 // I64X4Splat
4458 YMMRegister dst = i.OutputSimd256Register();
4459 if (HasRegisterInput(instr, 0)) {
4460 __ I64x4Splat(dst, i.InputRegister(0));
4461 } else {
4462 __ I64x4Splat(dst, i.InputOperand(0));
4463 }
4464 break;
4465 }
4466 default:
4467 UNREACHABLE();
4468 }
4469 } else {
4470 UNREACHABLE();
4471 }
4472 break;
4473 }
4474 case kX64IExtractLane: {
4475 LaneSize lane_size = LaneSizeField::decode(opcode);
4476 VectorLength vec_len = VectorLengthField::decode(opcode);
4477 if (vec_len == kV128) {
4478 switch (lane_size) {
4479 case kL32: {
4480 // I32x4ExtractLane
4481 __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0),
4482 i.InputInt8(1));
4483 break;
4484 }
4485 case kL64: {
4486 // I64X2ExtractLane
4487 __ Pextrq(i.OutputRegister(), i.InputSimd128Register(0),
4488 i.InputInt8(1));
4489 break;
4490 }
4491 default:
4492 UNREACHABLE();
4493 }
4494
4495 } else {
4496 UNREACHABLE();
4497 }
4498 break;
4499 }
4500 case kX64IAbs: {
4501 LaneSize lane_size = LaneSizeField::decode(opcode);
4502 VectorLength vec_len = VectorLengthField::decode(opcode);
4503 if (vec_len == kV128) {
4504 XMMRegister dst = i.OutputSimd128Register();
4505 XMMRegister src = i.InputSimd128Register(0);
4506 switch (lane_size) {
4507 case kL8: {
4508 // I8x16Abs
4509 __ Pabsb(dst, src);
4510 break;
4511 }
4512 case kL16: {
4513 // I16x8Abs
4514 __ Pabsw(dst, src);
4515 break;
4516 }
4517 case kL32: {
4518 // I32x4Abs
4519 __ Pabsd(dst, src);
4520 break;
4521 }
4522 case kL64: {
4523 // I64x2Abs
4524 __ I64x2Abs(dst, src, kScratchDoubleReg);
4525 break;
4526 }
4527 default:
4528 UNREACHABLE();
4529 }
4530 } else if (vec_len == kV256) {
4531 YMMRegister dst = i.OutputSimd256Register();
4532 YMMRegister src = i.InputSimd256Register(0);
4533 CpuFeatureScope avx_scope(masm(), AVX2);
4534 switch (lane_size) {
4535 case kL8: {
4536 // I8x32Abs
4537 __ vpabsb(dst, src);
4538 break;
4539 }
4540 case kL16: {
4541 // I16x16Abs
4542 __ vpabsw(dst, src);
4543 break;
4544 }
4545 case kL32: {
4546 // I32x8Abs
4547 __ vpabsd(dst, src);
4548 break;
4549 }
4550 case kL64: {
4551 // I64x4Abs
4552 UNIMPLEMENTED();
4553 }
4554 default:
4555 UNREACHABLE();
4556 }
4557
4558 } else {
4559 UNREACHABLE();
4560 }
4561 break;
4562 }
4563 case kX64INeg: {
4564 LaneSize lane_size = LaneSizeField::decode(opcode);
4565 VectorLength vec_len = VectorLengthField::decode(opcode);
4566 if (vec_len == kV128) {
4567 XMMRegister dst = i.OutputSimd128Register();
4568 XMMRegister src = i.InputSimd128Register(0);
4569 switch (lane_size) {
4570 case kL8: {
4571 // I8x16Neg
4572 if (dst == src) {
4574 __ Psignb(dst, kScratchDoubleReg);
4575 } else {
4576 __ Pxor(dst, dst);
4577 __ Psubb(dst, src);
4578 }
4579 break;
4580 }
4581 case kL16: {
4582 // I16x8Neg
4583 if (dst == src) {
4585 __ Psignw(dst, kScratchDoubleReg);
4586 } else {
4587 __ Pxor(dst, dst);
4588 __ Psubw(dst, src);
4589 }
4590 break;
4591 }
4592 case kL32: {
4593 // I32x4Neg
4594 if (dst == src) {
4596 __ Psignd(dst, kScratchDoubleReg);
4597 } else {
4598 __ Pxor(dst, dst);
4599 __ Psubd(dst, src);
4600 }
4601 break;
4602 }
4603 case kL64: {
4604 // I64x2Neg
4605 __ I64x2Neg(dst, src, kScratchDoubleReg);
4606 break;
4607 }
4608 default:
4609 UNREACHABLE();
4610 }
4611 } else if (vec_len == kV256) {
4612 YMMRegister dst = i.OutputSimd256Register();
4613 YMMRegister src = i.InputSimd256Register(0);
4614 CpuFeatureScope avx_scope(masm(), AVX2);
4615 switch (lane_size) {
4616 case kL8: {
4617 // I8x32Neg
4618 if (dst == src) {
4621 __ vpsignb(dst, dst, kScratchSimd256Reg);
4622 } else {
4623 __ vpxor(dst, dst, dst);
4624 __ vpsubb(dst, dst, src);
4625 }
4626 break;
4627 }
4628 case kL16: {
4629 // I16x8Neg
4630 if (dst == src) {
4633 __ vpsignw(dst, dst, kScratchSimd256Reg);
4634 } else {
4635 __ vpxor(dst, dst, dst);
4636 __ vpsubw(dst, dst, src);
4637 }
4638 break;
4639 }
4640 case kL32: {
4641 // I32x4Neg
4642 if (dst == src) {
4645 __ vpsignd(dst, dst, kScratchSimd256Reg);
4646 } else {
4647 __ vpxor(dst, dst, dst);
4648 __ vpsubd(dst, dst, src);
4649 }
4650 break;
4651 }
4652 case kL64: {
4653 // I64x2Neg
4654 UNIMPLEMENTED();
4655 }
4656 default:
4657 UNREACHABLE();
4658 }
4659 } else {
4660 UNREACHABLE();
4661 }
4662 break;
4663 }
4664 case kX64IBitMask: {
4665 LaneSize lane_size = LaneSizeField::decode(opcode);
4666 VectorLength vec_len = VectorLengthField::decode(opcode);
4667 if (vec_len == kV128) {
4668 switch (lane_size) {
4669 case kL8: {
4670 // I8x16BitMask
4671 __ Pmovmskb(i.OutputRegister(), i.InputSimd128Register(0));
4672 break;
4673 }
4674 case kL16: {
4675 // I16x8BitMask
4676 Register dst = i.OutputRegister();
4677 __ Packsswb(kScratchDoubleReg, i.InputSimd128Register(0));
4678 __ Pmovmskb(dst, kScratchDoubleReg);
4679 __ shrq(dst, Immediate(8));
4680 break;
4681 }
4682 case kL32: {
4683 // I632x4BitMask
4684 __ Movmskps(i.OutputRegister(), i.InputSimd128Register(0));
4685 break;
4686 }
4687 case kL64: {
4688 // I64x2BitMask
4689 __ Movmskpd(i.OutputRegister(), i.InputSimd128Register(0));
4690 break;
4691 }
4692 default:
4693 UNREACHABLE();
4694 }
4695 } else {
4696 UNREACHABLE();
4697 }
4698 break;
4699 }
4700 case kX64IShl: {
4701 LaneSize lane_size = LaneSizeField::decode(opcode);
4702 VectorLength vec_len = VectorLengthField::decode(opcode);
4703 if (vec_len == kV128) {
4704 switch (lane_size) {
4705 case kL8: {
4706 // I8x16Shl
4707 XMMRegister dst = i.OutputSimd128Register();
4708 XMMRegister src = i.InputSimd128Register(0);
4709 DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
4710 if (HasImmediateInput(instr, 1)) {
4711 __ I8x16Shl(dst, src, i.InputInt3(1), kScratchRegister,
4713 } else {
4714 __ I8x16Shl(dst, src, i.InputRegister(1), kScratchRegister,
4715 kScratchDoubleReg, i.TempSimd128Register(0));
4716 }
4717 break;
4718 }
4719 case kL16: {
4720 // I16x8Shl
4721 // Take shift value modulo 2^4.
4722 ASSEMBLE_SIMD_SHIFT(psllw, 4);
4723 break;
4724 }
4725 case kL32: {
4726 // I32x4Shl
4727 // Take shift value modulo 2^5.
4728 ASSEMBLE_SIMD_SHIFT(pslld, 5);
4729 break;
4730 }
4731 case kL64: {
4732 // I64x2Shl
4733 // Take shift value modulo 2^6.
4734 ASSEMBLE_SIMD_SHIFT(psllq, 6);
4735 break;
4736 }
4737 default:
4738 UNREACHABLE();
4739 }
4740 } else if (vec_len == kV256) {
4741 switch (lane_size) {
4742 case kL8: {
4743 // I8x32Shl
4744 UNIMPLEMENTED();
4745 }
4746 case kL16: {
4747 // I16x16Shl
4748 // Take shift value modulo 2^4.
4749 ASSEMBLE_SIMD256_SHIFT(psllw, 4);
4750 break;
4751 }
4752 case kL32: {
4753 // I32x8Shl
4754 // Take shift value modulo 2^5.
4755 ASSEMBLE_SIMD256_SHIFT(pslld, 5);
4756 break;
4757 }
4758 case kL64: {
4759 // I64x4Shl
4760 // Take shift value modulo 2^6.
4761 ASSEMBLE_SIMD256_SHIFT(psllq, 6);
4762 break;
4763 }
4764 default:
4765 UNREACHABLE();
4766 }
4767 } else {
4768 UNREACHABLE();
4769 }
4770 break;
4771 }
4772 case kX64IShrS: {
4773 LaneSize lane_size = LaneSizeField::decode(opcode);
4774 VectorLength vec_len = VectorLengthField::decode(opcode);
4775 if (vec_len == kV128) {
4776 switch (lane_size) {
4777 case kL8: {
4778 // I8x16ShrS
4779 XMMRegister dst = i.OutputSimd128Register();
4780 XMMRegister src = i.InputSimd128Register(0);
4781 DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
4782 if (HasImmediateInput(instr, 1)) {
4783 __ I8x16ShrS(dst, src, i.InputInt3(1), kScratchDoubleReg);
4784 } else {
4785 __ I8x16ShrS(dst, src, i.InputRegister(1), kScratchRegister,
4786 kScratchDoubleReg, i.TempSimd128Register(0));
4787 }
4788 break;
4789 }
4790 case kL16: {
4791 // I16x8ShrS
4792 // Take shift value modulo 2^4.
4793 ASSEMBLE_SIMD_SHIFT(psraw, 4);
4794 break;
4795 }
4796 case kL32: {
4797 // I32x4ShrS
4798 // Take shift value modulo 2^5.
4799 ASSEMBLE_SIMD_SHIFT(psrad, 5);
4800 break;
4801 }
4802 case kL64: {
4803 // I64x2ShrS
4804 // TODO(zhin): there is vpsraq but requires AVX512
4805 XMMRegister dst = i.OutputSimd128Register();
4806 XMMRegister src = i.InputSimd128Register(0);
4807 if (HasImmediateInput(instr, 1)) {
4808 __ I64x2ShrS(dst, src, i.InputInt6(1), kScratchDoubleReg);
4809 } else {
4810 __ I64x2ShrS(dst, src, i.InputRegister(1), kScratchDoubleReg,
4811 i.TempSimd128Register(0), kScratchRegister);
4812 }
4813 break;
4814 }
4815 default:
4816 UNREACHABLE();
4817 }
4818 } else if (vec_len == kV256) {
4819 switch (lane_size) {
4820 case kL8: {
4821 // I8x32ShrS
4822 UNIMPLEMENTED();
4823 }
4824 case kL16: {
4825 // I16x8ShrS
4826 // Take shift value modulo 2^4.
4827 ASSEMBLE_SIMD256_SHIFT(psraw, 4);
4828 break;
4829 }
4830 case kL32: {
4831 // I32x4ShrS
4832 // Take shift value modulo 2^5.
4833 ASSEMBLE_SIMD256_SHIFT(psrad, 5);
4834 break;
4835 }
4836 case kL64: {
4837 // I64x2ShrS
4838 UNIMPLEMENTED();
4839 }
4840 default:
4841 UNREACHABLE();
4842 }
4843 } else {
4844 UNREACHABLE();
4845 }
4846 break;
4847 }
4848 case kX64IAdd: {
4849 LaneSize lane_size = LaneSizeField::decode(opcode);
4850 VectorLength vec_len = VectorLengthField::decode(opcode);
4851 if (vec_len == kV128) {
4852 switch (lane_size) {
4853 case kL8: {
4854 // I8x16Add
4855 ASSEMBLE_SIMD_BINOP(paddb);
4856 break;
4857 }
4858 case kL16: {
4859 // I16x8Add
4860 ASSEMBLE_SIMD_BINOP(paddw);
4861 break;
4862 }
4863 case kL32: {
4864 // I32x4Add
4865 ASSEMBLE_SIMD_BINOP(paddd);
4866 break;
4867 }
4868 case kL64: {
4869 // I64x2Add
4870 ASSEMBLE_SIMD_BINOP(paddq);
4871 break;
4872 }
4873 default:
4874 UNREACHABLE();
4875 }
4876 } else if (vec_len == kV256) {
4877 switch (lane_size) {
4878 case kL64: {
4879 // I64x4Add
4880 ASSEMBLE_SIMD256_BINOP(paddq, AVX2);
4881 break;
4882 }
4883 case kL32: {
4884 // I32x8Add
4885 ASSEMBLE_SIMD256_BINOP(paddd, AVX2);
4886 break;
4887 }
4888 case kL16: {
4889 // I16x16Add
4890 ASSEMBLE_SIMD256_BINOP(paddw, AVX2);
4891 break;
4892 }
4893 case kL8: {
4894 // I8x32Add
4895 ASSEMBLE_SIMD256_BINOP(paddb, AVX2);
4896 break;
4897 }
4898 default:
4899 UNREACHABLE();
4900 }
4901 } else {
4902 UNREACHABLE();
4903 }
4904 break;
4905 }
4906 case kX64ISub: {
4907 LaneSize lane_size = LaneSizeField::decode(opcode);
4908 VectorLength vec_len = VectorLengthField::decode(opcode);
4909 if (vec_len == kV128) {
4910 switch (lane_size) {
4911 case kL8: {
4912 // I8x16Sub
4913 ASSEMBLE_SIMD_BINOP(psubb);
4914 break;
4915 }
4916 case kL16: {
4917 // I16x8Sub
4918 ASSEMBLE_SIMD_BINOP(psubw);
4919 break;
4920 }
4921 case kL32: {
4922 // I32x4Sub
4923 ASSEMBLE_SIMD_BINOP(psubd);
4924 break;
4925 }
4926 case kL64: {
4927 // I64x2Sub
4928 ASSEMBLE_SIMD_BINOP(psubq);
4929 break;
4930 }
4931 default:
4932 UNREACHABLE();
4933 }
4934 } else if (vec_len == kV256) {
4935 switch (lane_size) {
4936 case kL64: {
4937 // I64x4Sub
4938 ASSEMBLE_SIMD256_BINOP(psubq, AVX2);
4939 break;
4940 }
4941 case kL32: {
4942 // I32x8Sub
4943 ASSEMBLE_SIMD256_BINOP(psubd, AVX2);
4944 break;
4945 }
4946 case kL16: {
4947 // I16x16Sub
4948 ASSEMBLE_SIMD256_BINOP(psubw, AVX2);
4949 break;
4950 }
4951 case kL8: {
4952 // I8x32Sub
4953 ASSEMBLE_SIMD256_BINOP(psubb, AVX2);
4954 break;
4955 }
4956 default:
4957 UNREACHABLE();
4958 }
4959 } else {
4960 UNREACHABLE();
4961 }
4962 break;
4963 }
4964 case kX64IMul: {
4965 LaneSize lane_size = LaneSizeField::decode(opcode);
4966 VectorLength vec_len = VectorLengthField::decode(opcode);
4967 if (vec_len == kV128) {
4968 switch (lane_size) {
4969 case kL16: {
4970 // I16x8Mul
4971 ASSEMBLE_SIMD_BINOP(pmullw);
4972 break;
4973 }
4974 case kL32: {
4975 // I32x4Mul
4976 CpuFeatureScope scope(masm(), SSE4_1);
4977 ASSEMBLE_SIMD_BINOP(pmulld);
4978 break;
4979 }
4980 case kL64: {
4981 // I64x2Mul
4982 __ I64x2Mul(i.OutputSimd128Register(), i.InputSimd128Register(0),
4983 i.InputSimd128Register(1), i.TempSimd128Register(0),
4985 break;
4986 }
4987 default:
4988 UNREACHABLE();
4989 }
4990 } else if (vec_len == kV256) {
4991 switch (lane_size) {
4992 case kL16: {
4993 // I16x16Mul
4994 ASSEMBLE_SIMD256_BINOP(pmullw, AVX2);
4995 break;
4996 }
4997 case kL32: {
4998 // I32x8Mul
4999 ASSEMBLE_SIMD256_BINOP(pmulld, AVX2);
5000 break;
5001 }
5002 case kL64: {
5003 // I64x4Mul
5004 __ I64x4Mul(i.OutputSimd256Register(), i.InputSimd256Register(0),
5005 i.InputSimd256Register(1), i.TempSimd256Register(0),
5007 break;
5008 }
5009 default:
5010 UNREACHABLE();
5011 }
5012 } else {
5013 UNREACHABLE();
5014 }
5015 break;
5016 }
5017 case kX64IEq: {
5018 LaneSize lane_size = LaneSizeField::decode(opcode);
5019 VectorLength vec_len = VectorLengthField::decode(opcode);
5020 if (vec_len == kV128) {
5021 switch (lane_size) {
5022 case kL8: {
5023 // I8x16Eq
5024 ASSEMBLE_SIMD_BINOP(pcmpeqb);
5025 break;
5026 }
5027 case kL16: {
5028 // I16x8Eq
5029 ASSEMBLE_SIMD_BINOP(pcmpeqw);
5030 break;
5031 }
5032 case kL32: {
5033 // I32x4Eq
5034 ASSEMBLE_SIMD_BINOP(pcmpeqd);
5035 break;
5036 }
5037 case kL64: {
5038 // I64x2Eq
5039 CpuFeatureScope sse_scope(masm(), SSE4_1);
5040 ASSEMBLE_SIMD_BINOP(pcmpeqq);
5041 break;
5042 }
5043 default:
5044 UNREACHABLE();
5045 }
5046 } else if (vec_len == kV256) {
5047 switch (lane_size) {
5048 case kL8: {
5049 // I8x32Eq
5050 ASSEMBLE_SIMD256_BINOP(pcmpeqb, AVX2);
5051 break;
5052 }
5053 case kL16: {
5054 // I16x16Eq
5055 ASSEMBLE_SIMD256_BINOP(pcmpeqw, AVX2);
5056 break;
5057 }
5058 case kL32: {
5059 // I32x8Eq
5060 ASSEMBLE_SIMD256_BINOP(pcmpeqd, AVX2);
5061 break;
5062 }
5063 case kL64: {
5064 // I64x4Eq
5065 ASSEMBLE_SIMD256_BINOP(pcmpeqq, AVX2);
5066 break;
5067 }
5068 default:
5069 UNREACHABLE();
5070 }
5071 } else {
5072 UNREACHABLE();
5073 }
5074 break;
5075 }
5076 case kX64INe: {
5077 LaneSize lane_size = LaneSizeField::decode(opcode);
5078 VectorLength vec_len = VectorLengthField::decode(opcode);
5079 if (vec_len == kV128) {
5080 switch (lane_size) {
5081 case kL8: {
5082 XMMRegister dst = i.OutputSimd128Register();
5083 __ Pcmpeqb(dst, i.InputSimd128Register(1));
5085 __ Pxor(dst, kScratchDoubleReg);
5086 break;
5087 }
5088 case kL16: {
5089 // I16x8Ne
5090 XMMRegister dst = i.OutputSimd128Register();
5091 __ Pcmpeqw(dst, i.InputSimd128Register(1));
5093 __ Pxor(dst, kScratchDoubleReg);
5094 break;
5095 }
5096 case kL32: {
5097 // I32x4Ne
5098 __ Pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
5100 __ Pxor(i.OutputSimd128Register(), kScratchDoubleReg);
5101 break;
5102 }
5103 case kL64: {
5104 // I64x2Ne
5105 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
5106 __ Pcmpeqq(i.OutputSimd128Register(), i.InputSimd128Register(1));
5108 __ Pxor(i.OutputSimd128Register(), kScratchDoubleReg);
5109 break;
5110 }
5111 default:
5112 UNREACHABLE();
5113 }
5114 } else if (vec_len == kV256) {
5115 DCHECK_EQ(i.OutputSimd256Register(), i.InputSimd256Register(0));
5116 YMMRegister dst = i.OutputSimd256Register();
5117 CpuFeatureScope avx2_scope(masm(), AVX2);
5118 switch (lane_size) {
5119 case kL8: {
5120 // I8x32Ne
5121 __ vpcmpeqb(dst, dst, i.InputSimd256Register(1));
5124 __ vpxor(dst, dst, kScratchSimd256Reg);
5125 break;
5126 }
5127 case kL16: {
5128 // I16x16Ne
5129 __ vpcmpeqw(dst, dst, i.InputSimd256Register(1));
5132 __ vpxor(dst, dst, kScratchSimd256Reg);
5133 break;
5134 }
5135 case kL32: {
5136 // I32x8Ne
5137 __ vpcmpeqd(dst, dst, i.InputSimd256Register(1));
5140 __ vpxor(dst, dst, kScratchSimd256Reg);
5141 break;
5142 }
5143 case kL64: {
5144 // I64x4Ne
5145 __ vpcmpeqq(dst, dst, i.InputSimd256Register(1));
5148 __ vpxor(dst, dst, kScratchSimd256Reg);
5149 break;
5150 }
5151 default:
5152 UNREACHABLE();
5153 }
5154 } else {
5155 UNREACHABLE();
5156 }
5157 break;
5158 }
5159 case kX64IGtS: {
5160 LaneSize lane_size = LaneSizeField::decode(opcode);
5161 VectorLength vec_len = VectorLengthField::decode(opcode);
5162 if (vec_len == kV128) {
5163 switch (lane_size) {
5164 case kL8: {
5165 // I8x16GtS
5166 ASSEMBLE_SIMD_BINOP(pcmpgtb);
5167 break;
5168 }
5169 case kL16: {
5170 // I16x8GtS
5171 ASSEMBLE_SIMD_BINOP(pcmpgtw);
5172 break;
5173 }
5174 case kL32: {
5175 // I32x4GtS
5176 ASSEMBLE_SIMD_BINOP(pcmpgtd);
5177 break;
5178 }
5179 case kL64: {
5180 // I64x2GtS
5181 __ I64x2GtS(i.OutputSimd128Register(), i.InputSimd128Register(0),
5182 i.InputSimd128Register(1), kScratchDoubleReg);
5183 break;
5184 }
5185 default:
5186 UNREACHABLE();
5187 }
5188 } else if (vec_len == kV256) {
5189 switch (lane_size) {
5190 case kL8: {
5191 // I8x32GtS
5192 ASSEMBLE_SIMD256_BINOP(pcmpgtb, AVX2);
5193 break;
5194 }
5195 case kL16: {
5196 // I16x16GtS
5197 ASSEMBLE_SIMD256_BINOP(pcmpgtw, AVX2);
5198 break;
5199 }
5200 case kL32: {
5201 // I32x8GtS
5202 ASSEMBLE_SIMD256_BINOP(pcmpgtd, AVX2);
5203 break;
5204 }
5205 case kL64: {
5206 // I64x4GtS
5207 ASSEMBLE_SIMD256_BINOP(pcmpgtq, AVX2);
5208 break;
5209 }
5210 default:
5211 UNREACHABLE();
5212 }
5213 } else {
5214 UNREACHABLE();
5215 }
5216 break;
5217 }
5218 case kX64IGeS: {
5219 LaneSize lane_size = LaneSizeField::decode(opcode);
5220 VectorLength vec_len = VectorLengthField::decode(opcode);
5221 if (vec_len == kV128) {
5222 switch (lane_size) {
5223 case kL8: {
5224 // I8x16GeS
5225 XMMRegister dst = i.OutputSimd128Register();
5226 XMMRegister src = i.InputSimd128Register(1);
5227 __ Pminsb(dst, src);
5228 __ Pcmpeqb(dst, src);
5229 break;
5230 }
5231 case kL16: {
5232 // I16x8GeS
5233 XMMRegister dst = i.OutputSimd128Register();
5234 XMMRegister src = i.InputSimd128Register(1);
5235 __ Pminsw(dst, src);
5236 __ Pcmpeqw(dst, src);
5237 break;
5238 }
5239 case kL32: {
5240 // I32x4GeS
5241 XMMRegister dst = i.OutputSimd128Register();
5242 XMMRegister src = i.InputSimd128Register(1);
5243 __ Pminsd(dst, src);
5244 __ Pcmpeqd(dst, src);
5245 break;
5246 }
5247 case kL64: {
5248 // I64x2GeS
5249 __ I64x2GeS(i.OutputSimd128Register(), i.InputSimd128Register(0),
5250 i.InputSimd128Register(1), kScratchDoubleReg);
5251 break;
5252 }
5253 default:
5254 UNREACHABLE();
5255 }
5256 } else if (vec_len == kV256) {
5257 YMMRegister dst = i.OutputSimd256Register();
5258 YMMRegister src = i.InputSimd256Register(1);
5259 CpuFeatureScope avx2_scope(masm(), AVX2);
5260 switch (lane_size) {
5261 case kL8: {
5262 // I8x32GeS
5263 DCHECK_EQ(i.OutputSimd256Register(), i.InputSimd256Register(0));
5264 __ vpminsb(dst, dst, src);
5265 __ vpcmpeqb(dst, dst, src);
5266 break;
5267 }
5268 case kL16: {
5269 // I16x16GeS
5270 DCHECK_EQ(i.OutputSimd256Register(), i.InputSimd256Register(0));
5271 __ vpminsw(dst, dst, src);
5272 __ vpcmpeqw(dst, dst, src);
5273 break;
5274 }
5275 case kL32: {
5276 // I32x8GeS
5277 DCHECK_EQ(i.OutputSimd256Register(), i.InputSimd256Register(0));
5278 __ vpminsd(dst, dst, src);
5279 __ vpcmpeqd(dst, dst, src);
5280 break;
5281 }
5282 case kL64: {
5283 // I64x4GeS
5284 __ vpcmpgtq(dst, i.InputSimd256Register(1),
5285 i.InputSimd256Register(0));
5288 __ vpxor(dst, dst, kScratchSimd256Reg);
5289 break;
5290 }
5291 default:
5292 UNREACHABLE();
5293 }
5294 } else {
5295 UNREACHABLE();
5296 }
5297 break;
5298 }
5299 case kX64IShrU: {
5300 LaneSize lane_size = LaneSizeField::decode(opcode);
5301 VectorLength vec_len = VectorLengthField::decode(opcode);
5302 if (vec_len == kV128) {
5303 switch (lane_size) {
5304 case kL8: {
5305 // I8x16ShrU
5306 XMMRegister dst = i.OutputSimd128Register();
5307 XMMRegister src = i.InputSimd128Register(0);
5308 DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
5309 if (HasImmediateInput(instr, 1)) {
5310 __ I8x16ShrU(dst, src, i.InputInt3(1), kScratchRegister,
5312 } else {
5313 __ I8x16ShrU(dst, src, i.InputRegister(1), kScratchRegister,
5314 kScratchDoubleReg, i.TempSimd128Register(0));
5315 }
5316 break;
5317 }
5318 case kL16: {
5319 // I16x8ShrU
5320 // Take shift value modulo 2^4.
5321 ASSEMBLE_SIMD_SHIFT(psrlw, 4);
5322 break;
5323 }
5324 case kL32: {
5325 // I32x4ShrU
5326 // Take shift value modulo 2^5.
5327 ASSEMBLE_SIMD_SHIFT(psrld, 5);
5328 break;
5329 }
5330 case kL64: {
5331 // I64x2ShrU
5332 // Take shift value modulo 2^6.
5333 ASSEMBLE_SIMD_SHIFT(psrlq, 6);
5334 break;
5335 }
5336 default:
5337 UNREACHABLE();
5338 }
5339 } else if (vec_len == kV256) {
5340 switch (lane_size) {
5341 case kL8: {
5342 // I8x32ShrU
5343 UNIMPLEMENTED();
5344 }
5345 case kL16: {
5346 // I16x8ShrU
5347 // Take shift value modulo 2^4.
5348 ASSEMBLE_SIMD256_SHIFT(psrlw, 4);
5349 break;
5350 }
5351 case kL32: {
5352 // I32x4ShrU
5353 // Take shift value modulo 2^5.
5354 ASSEMBLE_SIMD256_SHIFT(psrld, 5);
5355 break;
5356 }
5357 case kL64: {
5358 // I64x2ShrU
5359 // Take shift value modulo 2^6.
5360 ASSEMBLE_SIMD256_SHIFT(psrlq, 6);
5361 break;
5362 }
5363 default:
5364 UNREACHABLE();
5365 }
5366 } else {
5367 UNREACHABLE();
5368 }
5369 break;
5370 }
5371 case kX64I64x2ExtMulLowI32x4S: {
5372 __ I64x2ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
5373 i.InputSimd128Register(1), kScratchDoubleReg, /*low=*/true,
5374 /*is_signed=*/true);
5375 break;
5376 }
5377 case kX64I64x2ExtMulHighI32x4S: {
5378 __ I64x2ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
5379 i.InputSimd128Register(1), kScratchDoubleReg,
5380 /*low=*/false,
5381 /*is_signed=*/true);
5382 break;
5383 }
5384 case kX64I64x2ExtMulLowI32x4U: {
5385 __ I64x2ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
5386 i.InputSimd128Register(1), kScratchDoubleReg, /*low=*/true,
5387 /*is_signed=*/false);
5388 break;
5389 }
5390 case kX64I64x2ExtMulHighI32x4U: {
5391 __ I64x2ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
5392 i.InputSimd128Register(1), kScratchDoubleReg,
5393 /*low=*/false,
5394 /*is_signed=*/false);
5395 break;
5396 }
5397 case kX64I64x2SConvertI32x4Low: {
5398 __ Pmovsxdq(i.OutputSimd128Register(), i.InputSimd128Register(0));
5399 break;
5400 }
5401 case kX64I64x2SConvertI32x4High: {
5402 __ I64x2SConvertI32x4High(i.OutputSimd128Register(),
5403 i.InputSimd128Register(0));
5404 break;
5405 }
5406 case kX64I64x4SConvertI32x4: {
5407 CpuFeatureScope avx2_scope(masm(), AVX2);
5408 __ vpmovsxdq(i.OutputSimd256Register(), i.InputSimd128Register(0));
5409 break;
5410 }
5411 case kX64I64x2UConvertI32x4Low: {
5412 __ Pmovzxdq(i.OutputSimd128Register(), i.InputSimd128Register(0));
5413 break;
5414 }
5415 case kX64I64x2UConvertI32x4High: {
5416 __ I64x2UConvertI32x4High(i.OutputSimd128Register(),
5417 i.InputSimd128Register(0), kScratchDoubleReg);
5418 break;
5419 }
5420 case kX64I64x4UConvertI32x4: {
5421 CpuFeatureScope avx2_scope(masm(), AVX2);
5422 __ vpmovzxdq(i.OutputSimd256Register(), i.InputSimd128Register(0));
5423 break;
5424 }
5425 case kX64I32x4SConvertF32x4: {
5426 __ I32x4SConvertF32x4(i.OutputSimd128Register(),
5427 i.InputSimd128Register(0), kScratchDoubleReg,
5429 break;
5430 }
5431 case kX64I32x8SConvertF32x8: {
5432 __ I32x8SConvertF32x8(i.OutputSimd256Register(),
5433 i.InputSimd256Register(0), kScratchSimd256Reg,
5435 break;
5436 }
5437 case kX64I32x4SConvertI16x8Low: {
5438 __ Pmovsxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
5439 break;
5440 }
5441 case kX64I32x4SConvertI16x8High: {
5442 __ I32x4SConvertI16x8High(i.OutputSimd128Register(),
5443 i.InputSimd128Register(0));
5444 break;
5445 }
5446 case kX64I32x8SConvertI16x8: {
5447 CpuFeatureScope avx2_scope(masm(), AVX2);
5448 __ vpmovsxwd(i.OutputSimd256Register(), i.InputSimd128Register(0));
5449 break;
5450 }
5451 case kX64IMinS: {
5452 LaneSize lane_size = LaneSizeField::decode(opcode);
5453 VectorLength vec_len = VectorLengthField::decode(opcode);
5454 if (vec_len == kV128) {
5455 switch (lane_size) {
5456 case kL8: {
5457 // I8x16MinS
5458 CpuFeatureScope scope(masm(), SSE4_1);
5459 ASSEMBLE_SIMD_BINOP(pminsb);
5460 break;
5461 }
5462 case kL16: {
5463 // I16x8MinS
5464 ASSEMBLE_SIMD_BINOP(pminsw);
5465 break;
5466 }
5467 case kL32: {
5468 // I32x4MinS
5469 CpuFeatureScope scope(masm(), SSE4_1);
5470 ASSEMBLE_SIMD_BINOP(pminsd);
5471 break;
5472 }
5473 default:
5474 UNREACHABLE();
5475 }
5476 } else if (vec_len == kV256) {
5477 switch (lane_size) {
5478 case kL8: {
5479 // I8x32MinS
5480 ASSEMBLE_SIMD256_BINOP(pminsb, AVX2);
5481 break;
5482 }
5483 case kL16: {
5484 // I16x16MinS
5485 ASSEMBLE_SIMD256_BINOP(pminsw, AVX2);
5486 break;
5487 }
5488 case kL32: {
5489 // I32x8MinS
5490 ASSEMBLE_SIMD256_BINOP(pminsd, AVX2);
5491 break;
5492 }
5493 default:
5494 UNREACHABLE();
5495 }
5496 } else {
5497 UNREACHABLE();
5498 }
5499 break;
5500 }
5501 case kX64IMaxS: {
5502 LaneSize lane_size = LaneSizeField::decode(opcode);
5503 VectorLength vec_len = VectorLengthField::decode(opcode);
5504 if (vec_len == kV128) {
5505 switch (lane_size) {
5506 case kL8: {
5507 // I8x16MaxS
5508 CpuFeatureScope scope(masm(), SSE4_1);
5509 ASSEMBLE_SIMD_BINOP(pmaxsb);
5510 break;
5511 }
5512 case kL16: {
5513 // I16x8MaxS
5514 ASSEMBLE_SIMD_BINOP(pmaxsw);
5515 break;
5516 }
5517 case kL32: {
5518 // I32x4MaxS
5519 CpuFeatureScope scope(masm(), SSE4_1);
5520 ASSEMBLE_SIMD_BINOP(pmaxsd);
5521 break;
5522 }
5523 default:
5524 UNREACHABLE();
5525 }
5526 } else if (vec_len == kV256) {
5527 switch (lane_size) {
5528 case kL8: {
5529 // I8x32MaxS
5530 ASSEMBLE_SIMD256_BINOP(pmaxsb, AVX2);
5531 break;
5532 }
5533 case kL16: {
5534 // I16x16MaxS
5535 ASSEMBLE_SIMD256_BINOP(pmaxsw, AVX2);
5536 break;
5537 }
5538 case kL32: {
5539 // I32x8MaxS
5540 ASSEMBLE_SIMD256_BINOP(pmaxsd, AVX2);
5541 break;
5542 }
5543 default:
5544 UNREACHABLE();
5545 }
5546 } else {
5547 UNREACHABLE();
5548 }
5549 break;
5550 }
5551 case kX64I32x4UConvertF32x4: {
5552 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
5553 XMMRegister dst = i.OutputSimd128Register();
5554 XMMRegister tmp = i.TempSimd128Register(0);
5555 XMMRegister tmp2 = i.TempSimd128Register(1);
5556 __ I32x4TruncF32x4U(dst, dst, tmp, tmp2);
5557 break;
5558 }
5559 case kX64I32x8UConvertF32x8: {
5560 DCHECK_EQ(i.OutputSimd256Register(), i.InputSimd256Register(0));
5561 CpuFeatureScope avx_scope(masm(), AVX);
5562 CpuFeatureScope avx2_scope(masm(), AVX2);
5563 YMMRegister dst = i.OutputSimd256Register();
5564 YMMRegister tmp1 = i.TempSimd256Register(0);
5565 YMMRegister tmp2 = i.TempSimd256Register(1);
5566 // NAN->0, negative->0
5567 __ vpxor(tmp2, tmp2, tmp2);
5568 __ vmaxps(dst, dst, tmp2);
5569 // scratch: float representation of max_signed
5570 __ vpcmpeqd(tmp2, tmp2, tmp2);
5571 __ vpsrld(tmp2, tmp2, uint8_t{1}); // 0x7fffffff
5572 __ vcvtdq2ps(tmp2, tmp2); // 0x4f000000
5573 // tmp1: convert (src-max_signed).
5574 // Positive overflow lanes -> 0x7FFFFFFF
5575 // Negative lanes -> 0
5576 __ vmovaps(tmp1, dst);
5577 __ vsubps(tmp1, tmp1, tmp2);
5578 __ vcmpleps(tmp2, tmp2, tmp1);
5579 __ vcvttps2dq(tmp1, tmp1);
5580 __ vpxor(tmp1, tmp1, tmp2);
5581 __ vpxor(tmp2, tmp2, tmp2);
5582 __ vpmaxsd(tmp1, tmp1, tmp2);
5583 // convert. Overflow lanes above max_signed will be 0x80000000
5584 __ vcvttps2dq(dst, dst);
5585 // Add (src-max_signed) for overflow lanes.
5586 __ vpaddd(dst, dst, tmp1);
5587 break;
5588 }
5589 case kX64I32x4UConvertI16x8Low: {
5590 __ Pmovzxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
5591 break;
5592 }
5593 case kX64I32x4UConvertI16x8High: {
5594 __ I32x4UConvertI16x8High(i.OutputSimd128Register(),
5595 i.InputSimd128Register(0), kScratchDoubleReg);
5596 break;
5597 }
5598 case kX64I32x8UConvertI16x8: {
5599 CpuFeatureScope avx2_scope(masm(), AVX2);
5600 __ vpmovzxwd(i.OutputSimd256Register(), i.InputSimd128Register(0));
5601 break;
5602 }
5603 case kX64IMinU: {
5604 LaneSize lane_size = LaneSizeField::decode(opcode);
5605 VectorLength vec_len = VectorLengthField::decode(opcode);
5606 if (vec_len == kV128) {
5607 switch (lane_size) {
5608 case kL8: {
5609 // I8x16MinU
5610 ASSEMBLE_SIMD_BINOP(pminub);
5611 break;
5612 }
5613 case kL16: {
5614 // I16x8MinU
5615 CpuFeatureScope scope(masm(), SSE4_1);
5616 ASSEMBLE_SIMD_BINOP(pminuw);
5617 break;
5618 }
5619 case kL32: {
5620 // I32x4MinU
5621 CpuFeatureScope scope(masm(), SSE4_1);
5622 ASSEMBLE_SIMD_BINOP(pminud);
5623 break;
5624 }
5625 default:
5626 UNREACHABLE();
5627 }
5628 } else if (vec_len == kV256) {
5629 switch (lane_size) {
5630 case kL8: {
5631 // I8x32MinU
5632 ASSEMBLE_SIMD256_BINOP(pminub, AVX2);
5633 break;
5634 }
5635 case kL16: {
5636 // I16x16MinU
5637 ASSEMBLE_SIMD256_BINOP(pminuw, AVX2);
5638 break;
5639 }
5640 case kL32: {
5641 // I32x8MinU
5642 ASSEMBLE_SIMD256_BINOP(pminud, AVX2);
5643 break;
5644 }
5645 default:
5646 UNREACHABLE();
5647 }
5648 } else {
5649 UNREACHABLE();
5650 }
5651 break;
5652 }
5653 case kX64IMaxU: {
5654 LaneSize lane_size = LaneSizeField::decode(opcode);
5655 VectorLength vec_len = VectorLengthField::decode(opcode);
5656 if (vec_len == kV128) {
5657 switch (lane_size) {
5658 case kL8: {
5659 // I8x16MaxU
5660 ASSEMBLE_SIMD_BINOP(pmaxub);
5661 break;
5662 }
5663 case kL16: {
5664 // I16x8MaxU
5665 CpuFeatureScope scope(masm(), SSE4_1);
5666 ASSEMBLE_SIMD_BINOP(pmaxuw);
5667 break;
5668 }
5669 case kL32: {
5670 // I32x4MaxU
5671 CpuFeatureScope scope(masm(), SSE4_1);
5672 ASSEMBLE_SIMD_BINOP(pmaxud);
5673 break;
5674 }
5675 default:
5676 UNREACHABLE();
5677 }
5678 } else if (vec_len == kV256) {
5679 switch (lane_size) {
5680 case kL8: {
5681 // I8x32MaxU
5682 ASSEMBLE_SIMD256_BINOP(pmaxub, AVX2);
5683 break;
5684 }
5685 case kL16: {
5686 // I16x16MaxU
5687 ASSEMBLE_SIMD256_BINOP(pmaxuw, AVX2);
5688 break;
5689 }
5690 case kL32: {
5691 // I32x8MaxU
5692 ASSEMBLE_SIMD256_BINOP(pmaxud, AVX2);
5693 break;
5694 }
5695 default:
5696 UNREACHABLE();
5697 }
5698 } else {
5699 UNREACHABLE();
5700 }
5701 break;
5702 }
5703 case kX64IGtU: {
5704 LaneSize lane_size = LaneSizeField::decode(opcode);
5705 VectorLength vec_len = VectorLengthField::decode(opcode);
5706 if (vec_len == kV128) {
5707 XMMRegister dst = i.OutputSimd128Register();
5708 XMMRegister src = i.InputSimd128Register(1);
5709 switch (lane_size) {
5710 case kL8: {
5711 __ Pmaxub(dst, src);
5712 __ Pcmpeqb(dst, src);
5714 __ Pxor(dst, kScratchDoubleReg);
5715 break;
5716 }
5717 case kL16: {
5718 // I16x8GtU
5719 __ Pmaxuw(dst, src);
5720 __ Pcmpeqw(dst, src);
5722 __ Pxor(dst, kScratchDoubleReg);
5723 break;
5724 }
5725 case kL32: {
5726 // I32x4GtU
5727 __ Pmaxud(dst, src);
5728 __ Pcmpeqd(dst, src);
5730 __ Pxor(dst, kScratchDoubleReg);
5731 break;
5732 }
5733 default:
5734 UNREACHABLE();
5735 }
5736 } else if (vec_len == kV256) {
5737 DCHECK_EQ(i.OutputSimd256Register(), i.InputSimd256Register(0));
5738 YMMRegister dst = i.OutputSimd256Register();
5739 YMMRegister src = i.InputSimd256Register(1);
5740 CpuFeatureScope avx2_scope(masm(), AVX2);
5741 switch (lane_size) {
5742 case kL8: {
5743 // I8x32GtU
5744 __ vpmaxub(dst, dst, src);
5745 __ vpcmpeqb(dst, dst, src);
5748 __ vpxor(dst, dst, kScratchSimd256Reg);
5749 break;
5750 }
5751 case kL16: {
5752 // I16x16GtU
5753 __ vpmaxuw(dst, dst, src);
5754 __ vpcmpeqw(dst, dst, src);
5757 __ vpxor(dst, dst, kScratchSimd256Reg);
5758 break;
5759 }
5760 case kL32: {
5761 // I32x8GtU
5762 __ vpmaxud(dst, dst, src);
5763 __ vpcmpeqd(dst, dst, src);
5766 __ vpxor(dst, dst, kScratchSimd256Reg);
5767 break;
5768 }
5769 default:
5770 UNREACHABLE();
5771 }
5772 } else {
5773 UNREACHABLE();
5774 }
5775 break;
5776 }
5777 case kX64IGeU: {
5778 LaneSize lane_size = LaneSizeField::decode(opcode);
5779 VectorLength vec_len = VectorLengthField::decode(opcode);
5780 if (vec_len == kV128) {
5781 XMMRegister dst = i.OutputSimd128Register();
5782 XMMRegister src = i.InputSimd128Register(1);
5783 switch (lane_size) {
5784 case kL8: {
5785 // I8x16GeU
5786 __ Pminub(dst, src);
5787 __ Pcmpeqb(dst, src);
5788 break;
5789 }
5790 case kL16: {
5791 // I16x8GeU
5792 __ Pminuw(dst, src);
5793 __ Pcmpeqw(dst, src);
5794 break;
5795 }
5796 case kL32: {
5797 // I32x4GeU
5798 __ Pminud(dst, src);
5799 __ Pcmpeqd(dst, src);
5800 break;
5801 }
5802 default:
5803 UNREACHABLE();
5804 }
5805 } else if (vec_len == kV256) {
5806 DCHECK_EQ(i.OutputSimd256Register(), i.InputSimd256Register(0));
5807 YMMRegister dst = i.OutputSimd256Register();
5808 YMMRegister src = i.InputSimd256Register(1);
5809 CpuFeatureScope avx2_scope(masm(), AVX2);
5810 switch (lane_size) {
5811 case kL8: {
5812 // I8x32GeU
5813 __ vpminub(dst, dst, src);
5814 __ vpcmpeqb(dst, dst, src);
5815 break;
5816 }
5817 case kL16: {
5818 // I16x16GeU
5819 __ vpminuw(dst, dst, src);
5820 __ vpcmpeqw(dst, dst, src);
5821 break;
5822 }
5823 case kL32: {
5824 // I32x8GeU
5825 __ vpminud(dst, dst, src);
5826 __ vpcmpeqd(dst, dst, src);
5827 break;
5828 }
5829 default:
5830 UNREACHABLE();
5831 }
5832 } else {
5833 UNREACHABLE();
5834 }
5835 break;
5836 }
5837 case kX64I32x4DotI16x8S: {
5838 ASSEMBLE_SIMD_BINOP(pmaddwd);
5839 break;
5840 }
5841 case kX64I32x4DotI8x16I7x16AddS: {
5842 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(2));
5843 // If AVX_VNNI supported, pass kScratchDoubleReg twice as unused
5844 // arguments.
5845 XMMRegister tmp = kScratchDoubleReg;
5846 if (!(CpuFeatures::IsSupported(AVX_VNNI) ||
5847 CpuFeatures::IsSupported(AVX_VNNI_INT8))) {
5848 tmp = i.TempSimd128Register(0);
5849 }
5850 __ I32x4DotI8x16I7x16AddS(
5851 i.OutputSimd128Register(), i.InputSimd128Register(0),
5852 i.InputSimd128Register(1), i.InputSimd128Register(2),
5853 kScratchDoubleReg, tmp);
5854 break;
5855 }
5856 case kX64I32x8DotI8x32I7x32AddS: {
5857 DCHECK_EQ(i.OutputSimd256Register(), i.InputSimd256Register(2));
5858 // If AVX_VNNI supported, pass kScratchSimd256Reg twice as unused
5859 // arguments.
5860 YMMRegister tmp = kScratchSimd256Reg;
5861 if (!CpuFeatures::IsSupported(AVX_VNNI)) {
5862 tmp = i.TempSimd256Register(0);
5863 }
5864 __ I32x8DotI8x32I7x32AddS(
5865 i.OutputSimd256Register(), i.InputSimd256Register(0),
5866 i.InputSimd256Register(1), i.InputSimd256Register(2),
5867 kScratchSimd256Reg, tmp);
5868 break;
5869 }
5870 case kX64I32x4ExtAddPairwiseI16x8S: {
5871 __ I32x4ExtAddPairwiseI16x8S(i.OutputSimd128Register(),
5872 i.InputSimd128Register(0), kScratchRegister);
5873 break;
5874 }
5875 case kX64I32x8ExtAddPairwiseI16x16S: {
5876 __ I32x8ExtAddPairwiseI16x16S(i.OutputSimd256Register(),
5877 i.InputSimd256Register(0),
5879 break;
5880 }
5881 case kX64I32x4ExtAddPairwiseI16x8U: {
5882 __ I32x4ExtAddPairwiseI16x8U(i.OutputSimd128Register(),
5883 i.InputSimd128Register(0),
5885 break;
5886 }
5887 case kX64I32x8ExtAddPairwiseI16x16U: {
5888 __ I32x8ExtAddPairwiseI16x16U(i.OutputSimd256Register(),
5889 i.InputSimd256Register(0),
5891 break;
5892 }
5893 case kX64I32X4ShiftZeroExtendI8x16: {
5894 XMMRegister dst = i.OutputSimd128Register();
5895 XMMRegister src = i.InputSimd128Register(0);
5896 uint8_t shift = i.InputUint8(1);
5897 if (shift != 0) {
5898 __ Palignr(dst, src, shift);
5899 __ Pmovzxbd(dst, dst);
5900 } else {
5901 __ Pmovzxbd(dst, src);
5902 }
5903 break;
5904 }
5905 case kX64S128Const: {
5906 // Emit code for generic constants as all zeros, or ones cases will be
5907 // handled separately by the selector.
5908 XMMRegister dst = i.OutputSimd128Register();
5909 uint32_t imm[4] = {};
5910 for (int j = 0; j < 4; j++) {
5911 imm[j] = i.InputUint32(j);
5912 }
5913 SetupSimdImmediateInRegister(masm(), imm, dst);
5914 break;
5915 }
5916 case kX64SZero: {
5917 VectorLength vec_len = VectorLengthField::decode(opcode);
5918 if (vec_len == kV128) { // S128Zero
5919 XMMRegister dst = i.OutputSimd128Register();
5920 __ Pxor(dst, dst);
5921 } else if (vec_len == kV256) { // S256Zero
5922 YMMRegister dst = i.OutputSimd256Register();
5923 CpuFeatureScope avx2_scope(masm(), AVX2);
5924 __ vpxor(dst, dst, dst);
5925 } else {
5926 UNREACHABLE();
5927 }
5928 break;
5929 }
5930 case kX64SAllOnes: {
5931 VectorLength vec_len = VectorLengthField::decode(opcode);
5932 if (vec_len == kV128) { // S128AllOnes
5933 XMMRegister dst = i.OutputSimd128Register();
5934 __ Pcmpeqd(dst, dst);
5935 } else if (vec_len == kV256) { // S256AllOnes
5936 YMMRegister dst = i.OutputSimd256Register();
5937 CpuFeatureScope avx2_scope(masm(), AVX2);
5938 __ vpcmpeqd(dst, dst, dst);
5939 } else {
5940 UNREACHABLE();
5941 }
5942 break;
5943 }
5944 // case kX64I16x8ExtractLaneS: {
5945 case kX64IExtractLaneS: {
5946 LaneSize lane_size = LaneSizeField::decode(opcode);
5947 VectorLength vec_len = VectorLengthField::decode(opcode);
5948 if (vec_len == kV128) {
5949 switch (lane_size) {
5950 case kL8: {
5951 // I8x16ExtractLaneS
5952 Register dst = i.OutputRegister();
5953 __ Pextrb(dst, i.InputSimd128Register(0), i.InputUint8(1));
5954 __ movsxbl(dst, dst);
5955 break;
5956 }
5957 case kL16: {
5958 // I16x8ExtractLaneS
5959 Register dst = i.OutputRegister();
5960 __ Pextrw(dst, i.InputSimd128Register(0), i.InputUint8(1));
5961 __ movsxwl(dst, dst);
5962 break;
5963 }
5964 default:
5965 UNREACHABLE();
5966 }
5967
5968 } else {
5969 UNREACHABLE();
5970 }
5971 break;
5972 }
5973 case kX64I16x8SConvertI8x16Low: {
5974 __ Pmovsxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
5975 break;
5976 }
5977 case kX64I16x8SConvertI8x16High: {
5978 __ I16x8SConvertI8x16High(i.OutputSimd128Register(),
5979 i.InputSimd128Register(0));
5980 break;
5981 }
5982 case kX64I16x16SConvertI8x16: {
5983 CpuFeatureScope avx2_scope(masm(), AVX2);
5984 __ vpmovsxbw(i.OutputSimd256Register(), i.InputSimd128Register(0));
5985 break;
5986 }
5987 case kX64I16x8SConvertI32x4: {
5988 ASSEMBLE_SIMD_BINOP(packssdw);
5989 break;
5990 }
5991 case kX64IAddSatS: {
5992 LaneSize lane_size = LaneSizeField::decode(opcode);
5993 VectorLength vec_len = VectorLengthField::decode(opcode);
5994 if (vec_len == kV128) {
5995 switch (lane_size) {
5996 case kL8: {
5997 // I8x16AddSatS
5998 ASSEMBLE_SIMD_BINOP(paddsb);
5999 break;
6000 }
6001 case kL16: {
6002 // I16x8AddSatS
6003 ASSEMBLE_SIMD_BINOP(paddsw);
6004 break;
6005 }
6006 default:
6007 UNREACHABLE();
6008 }
6009 } else if (vec_len == kV256) {
6010 switch (lane_size) {
6011 case kL8: {
6012 // I8x32AddSatS
6013 ASSEMBLE_SIMD256_BINOP(paddsb, AVX2);
6014 break;
6015 }
6016 case kL16: {
6017 // I16x16AddSatS
6018 ASSEMBLE_SIMD256_BINOP(paddsw, AVX2);
6019 break;
6020 }
6021 default:
6022 UNREACHABLE();
6023 }
6024 } else {
6025 UNREACHABLE();
6026 }
6027 break;
6028 }
6029 case kX64ISubSatS: {
6030 LaneSize lane_size = LaneSizeField::decode(opcode);
6031 VectorLength vec_len = VectorLengthField::decode(opcode);
6032 if (vec_len == kV128) {
6033 switch (lane_size) {
6034 case kL8: {
6035 // I8x16SubSatS
6036 ASSEMBLE_SIMD_BINOP(psubsb);
6037 break;
6038 }
6039 case kL16: {
6040 // I16x8SubSatS
6041 ASSEMBLE_SIMD_BINOP(psubsw);
6042 break;
6043 }
6044 default:
6045 UNREACHABLE();
6046 }
6047 } else if (vec_len == kV256) {
6048 switch (lane_size) {
6049 case kL8: {
6050 // I8x32SubSatS
6051 ASSEMBLE_SIMD256_BINOP(psubsb, AVX2);
6052 break;
6053 }
6054 case kL16: {
6055 // I16x16SubSatS
6056 ASSEMBLE_SIMD256_BINOP(psubsw, AVX2);
6057 break;
6058 }
6059 default:
6060 UNREACHABLE();
6061 }
6062 } else {
6063 UNREACHABLE();
6064 }
6065 break;
6066 }
6067 case kX64I16x8UConvertI8x16Low: {
6068 __ Pmovzxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
6069 break;
6070 }
6071 case kX64I16x8UConvertI8x16High: {
6072 __ I16x8UConvertI8x16High(i.OutputSimd128Register(),
6073 i.InputSimd128Register(0), kScratchDoubleReg);
6074 break;
6075 }
6076 case kX64I16x16UConvertI8x16: {
6077 CpuFeatureScope avx2_scope(masm(), AVX2);
6078 __ vpmovzxbw(i.OutputSimd256Register(), i.InputSimd128Register(0));
6079 break;
6080 }
6081 case kX64I16x8UConvertI32x4: {
6082 CpuFeatureScope scope(masm(), SSE4_1);
6083 ASSEMBLE_SIMD_BINOP(packusdw);
6084 break;
6085 }
6086 case kX64IAddSatU: {
6087 LaneSize lane_size = LaneSizeField::decode(opcode);
6088 VectorLength vec_len = VectorLengthField::decode(opcode);
6089 if (vec_len == kV128) {
6090 switch (lane_size) {
6091 case kL8: {
6092 // I8x16AddSatU
6093 ASSEMBLE_SIMD_BINOP(paddusb);
6094 break;
6095 }
6096 case kL16: {
6097 // I16x8AddSatU
6098 ASSEMBLE_SIMD_BINOP(paddusw);
6099 break;
6100 }
6101 default:
6102 UNREACHABLE();
6103 }
6104 } else if (vec_len == kV256) {
6105 switch (lane_size) {
6106 case kL8: {
6107 // I8x32AddSatU
6108 ASSEMBLE_SIMD256_BINOP(paddusb, AVX2);
6109 break;
6110 }
6111 case kL16: {
6112 // I16x16AddSatU
6113 ASSEMBLE_SIMD256_BINOP(paddusw, AVX2);
6114 break;
6115 }
6116 default:
6117 UNREACHABLE();
6118 }
6119 } else {
6120 UNREACHABLE();
6121 }
6122 break;
6123 }
6124 case kX64ISubSatU: {
6125 LaneSize lane_size = LaneSizeField::decode(opcode);
6126 VectorLength vec_len = VectorLengthField::decode(opcode);
6127 if (vec_len == kV128) {
6128 switch (lane_size) {
6129 case kL8: {
6130 // I8x16SubSatU
6131 ASSEMBLE_SIMD_BINOP(psubusb);
6132 break;
6133 }
6134 case kL16: {
6135 // I16x8SubSatU
6136 ASSEMBLE_SIMD_BINOP(psubusw);
6137 break;
6138 }
6139 default:
6140 UNREACHABLE();
6141 }
6142 } else if (vec_len == kV256) {
6143 switch (lane_size) {
6144 case kL8: {
6145 // I8x32SubSatU
6146 ASSEMBLE_SIMD256_BINOP(psubusb, AVX2);
6147 break;
6148 }
6149 case kL16: {
6150 // I16x16SubSatU
6151 ASSEMBLE_SIMD256_BINOP(psubusw, AVX2);
6152 break;
6153 }
6154 default:
6155 UNREACHABLE();
6156 }
6157 } else {
6158 UNREACHABLE();
6159 }
6160 break;
6161 }
6162 case kX64IRoundingAverageU: {
6163 LaneSize lane_size = LaneSizeField::decode(opcode);
6164 VectorLength vec_len = VectorLengthField::decode(opcode);
6165 if (vec_len == kV128) {
6166 switch (lane_size) {
6167 case kL8: {
6168 // I8x16RoundingAverageU
6169 ASSEMBLE_SIMD_BINOP(pavgb);
6170 break;
6171 }
6172 case kL16: {
6173 // I16x8RoundingAverageU
6174 ASSEMBLE_SIMD_BINOP(pavgw);
6175 break;
6176 }
6177 default:
6178 UNREACHABLE();
6179 }
6180 } else if (vec_len == kV256) {
6181 switch (lane_size) {
6182 case kL8: {
6183 // I8x32RoundingAverageU
6184 ASSEMBLE_SIMD256_BINOP(pavgb, AVX2);
6185 break;
6186 }
6187 case kL16: {
6188 // I16x16RoundingAverageU
6189 ASSEMBLE_SIMD256_BINOP(pavgw, AVX2);
6190 break;
6191 }
6192 default:
6193 UNREACHABLE();
6194 }
6195 } else {
6196 UNREACHABLE();
6197 }
6198 break;
6199 }
6200 case kX64I16x8ExtMulLowI8x16S: {
6201 __ I16x8ExtMulLow(i.OutputSimd128Register(), i.InputSimd128Register(0),
6202 i.InputSimd128Register(1), kScratchDoubleReg,
6203 /*is_signed=*/true);
6204 break;
6205 }
6206 case kX64I16x8ExtMulHighI8x16S: {
6207 __ I16x8ExtMulHighS(i.OutputSimd128Register(), i.InputSimd128Register(0),
6208 i.InputSimd128Register(1), kScratchDoubleReg);
6209 break;
6210 }
6211 case kX64I16x8ExtMulLowI8x16U: {
6212 __ I16x8ExtMulLow(i.OutputSimd128Register(), i.InputSimd128Register(0),
6213 i.InputSimd128Register(1), kScratchDoubleReg,
6214 /*is_signed=*/false);
6215 break;
6216 }
6217 case kX64I16x8ExtMulHighI8x16U: {
6218 __ I16x8ExtMulHighU(i.OutputSimd128Register(), i.InputSimd128Register(0),
6219 i.InputSimd128Register(1), kScratchDoubleReg);
6220 break;
6221 }
6222 case kX64I16x8ExtAddPairwiseI8x16S: {
6223 __ I16x8ExtAddPairwiseI8x16S(i.OutputSimd128Register(),
6224 i.InputSimd128Register(0), kScratchDoubleReg,
6226 break;
6227 }
6228 case kX64I16x16ExtAddPairwiseI8x32S: {
6229 __ I16x16ExtAddPairwiseI8x32S(i.OutputSimd256Register(),
6230 i.InputSimd256Register(0),
6232 break;
6233 }
6234 case kX64I16x8ExtAddPairwiseI8x16U: {
6235 __ I16x8ExtAddPairwiseI8x16U(i.OutputSimd128Register(),
6236 i.InputSimd128Register(0), kScratchRegister);
6237 break;
6238 }
6239 case kX64I16x16ExtAddPairwiseI8x32U: {
6240 __ I16x16ExtAddPairwiseI8x32U(i.OutputSimd256Register(),
6241 i.InputSimd256Register(0),
6243 break;
6244 }
6245 case kX64I16x8Q15MulRSatS: {
6246 __ I16x8Q15MulRSatS(i.OutputSimd128Register(), i.InputSimd128Register(0),
6247 i.InputSimd128Register(1), kScratchDoubleReg);
6248 break;
6249 }
6250 case kX64I16x8RelaxedQ15MulRS: {
6251 __ Pmulhrsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
6252 i.InputSimd128Register(1));
6253 break;
6254 }
6255 case kX64I16x8DotI8x16I7x16S: {
6256 __ I16x8DotI8x16I7x16S(i.OutputSimd128Register(),
6257 i.InputSimd128Register(0),
6258 i.InputSimd128Register(1));
6259 break;
6260 }
6261 case kX64I16x16DotI8x32I7x32S: {
6262 CpuFeatureScope avx_scope(masm(), AVX2);
6263 __ vpmaddubsw(i.OutputSimd256Register(), i.InputSimd256Register(1),
6264 i.InputSimd256Register(0));
6265 break;
6266 }
6267 case kX64Pextrb: {
6268 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
6269 size_t index = 0;
6270 if (HasAddressingMode(instr)) {
6271 Operand operand = i.MemoryOperand(&index);
6272 __ Pextrb(operand, i.InputSimd128Register(index),
6273 i.InputUint8(index + 1));
6274 } else {
6275 __ Pextrb(i.OutputRegister(), i.InputSimd128Register(0),
6276 i.InputUint8(1));
6277 }
6278 break;
6279 }
6280 case kX64Pextrw: {
6281 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
6282 size_t index = 0;
6283 if (HasAddressingMode(instr)) {
6284 Operand operand = i.MemoryOperand(&index);
6285 __ Pextrw(operand, i.InputSimd128Register(index),
6286 i.InputUint8(index + 1));
6287 } else {
6288 __ Pextrw(i.OutputRegister(), i.InputSimd128Register(0),
6289 i.InputUint8(1));
6290 }
6291 break;
6292 }
6293 case kX64Pinsrb: {
6294 ASSEMBLE_PINSR(Pinsrb);
6295 break;
6296 }
6297 case kX64Pinsrw: {
6298 ASSEMBLE_PINSR(Pinsrw);
6299 break;
6300 }
6301 case kX64Pinsrd: {
6302 ASSEMBLE_PINSR(Pinsrd);
6303 break;
6304 }
6305 case kX64Pinsrq: {
6306 ASSEMBLE_PINSR(Pinsrq);
6307 break;
6308 }
6309 case kX64I8x16SConvertI16x8: {
6310 ASSEMBLE_SIMD_BINOP(packsswb);
6311 break;
6312 }
6313 case kX64I8x16UConvertI16x8: {
6314 ASSEMBLE_SIMD_BINOP(packuswb);
6315 break;
6316 }
6317 case kX64I32x4ExtMulLowI16x8S: {
6318 __ I32x4ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
6319 i.InputSimd128Register(1), kScratchDoubleReg,
6320 /*low=*/true,
6321 /*is_signed=*/true);
6322 break;
6323 }
6324 case kX64I32x4ExtMulHighI16x8S: {
6325 __ I32x4ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
6326 i.InputSimd128Register(1), kScratchDoubleReg,
6327 /*low=*/false,
6328 /*is_signed=*/true);
6329 break;
6330 }
6331 case kX64I32x4ExtMulLowI16x8U: {
6332 __ I32x4ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
6333 i.InputSimd128Register(1), kScratchDoubleReg,
6334 /*low=*/true,
6335 /*is_signed=*/false);
6336 break;
6337 }
6338 case kX64I32x4ExtMulHighI16x8U: {
6339 __ I32x4ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
6340 i.InputSimd128Register(1), kScratchDoubleReg,
6341 /*low=*/false,
6342 /*is_signed=*/false);
6343 break;
6344 }
6345 case kX64SAnd: {
6346 VectorLength vec_len = VectorLengthField::decode(opcode);
6347 if (vec_len == kV128) { // S128And
6348 ASSEMBLE_SIMD_BINOP(pand);
6349 } else if (vec_len == kV256) { // S256And
6350 ASSEMBLE_SIMD256_BINOP(pand, AVX2);
6351 } else {
6352 UNREACHABLE();
6353 }
6354 break;
6355 }
6356 case kX64SOr: {
6357 VectorLength vec_len = VectorLengthField::decode(opcode);
6358 if (vec_len == kV128) { // S128Or
6360 } else if (vec_len == kV256) { // S256Or
6361 ASSEMBLE_SIMD256_BINOP(por, AVX2);
6362 } else {
6363 UNREACHABLE();
6364 }
6365 break;
6366 }
6367 case kX64SXor: {
6368 VectorLength vec_len = VectorLengthField::decode(opcode);
6369 if (vec_len == kV128) { // S128Xor
6370 ASSEMBLE_SIMD_BINOP(pxor);
6371 } else if (vec_len == kV256) { // S256Xor
6372 ASSEMBLE_SIMD256_BINOP(pxor, AVX2);
6373 } else {
6374 UNREACHABLE();
6375 }
6376 break;
6377 }
6378 case kX64SNot: {
6379 VectorLength vec_len = VectorLengthField::decode(opcode);
6380 if (vec_len == kV128) { // S128Not
6381 __ S128Not(i.OutputSimd128Register(), i.InputSimd128Register(0),
6383 } else if (vec_len == kV256) { // S256Not
6384 __ S256Not(i.OutputSimd256Register(), i.InputSimd256Register(0),
6386 } else {
6387 UNREACHABLE();
6388 }
6389 break;
6390 }
6391 case kX64SSelect: {
6392 VectorLength vec_len = VectorLengthField::decode(opcode);
6393 if (vec_len == kV128) { // S128Select
6394 __ S128Select(i.OutputSimd128Register(), i.InputSimd128Register(0),
6395 i.InputSimd128Register(1), i.InputSimd128Register(2),
6397 } else if (vec_len == kV256) { // S256Select
6398 __ S256Select(i.OutputSimd256Register(), i.InputSimd256Register(0),
6399 i.InputSimd256Register(1), i.InputSimd256Register(2),
6401 } else {
6402 UNREACHABLE();
6403 }
6404 break;
6405 }
6406 case kX64SAndNot: {
6407 VectorLength vec_len = VectorLengthField::decode(opcode);
6408 if (vec_len == kV128) { // S128AndNot
6409 // The inputs have been inverted by instruction selector, so we can call
6410 // andnps here without any modifications.
6411 ASSEMBLE_SIMD_BINOP(andnps);
6412 } else if (vec_len == kV256) { // S256AndNot
6413 // The inputs have been inverted by instruction selector, so we can call
6414 // andnps here without any modifications.
6415 ASSEMBLE_SIMD256_BINOP(andnps, AVX);
6416 } else {
6417 UNREACHABLE();
6418 }
6419 break;
6420 }
6421 case kX64I8x16Swizzle: {
6422 __ I8x16Swizzle(i.OutputSimd128Register(), i.InputSimd128Register(0),
6423 i.InputSimd128Register(1), kScratchDoubleReg,
6425 break;
6426 }
6427 case kX64Vpshufd: {
6428 if (instr->InputCount() == 2 && instr->InputAt(1)->IsImmediate()) {
6429 YMMRegister dst = i.OutputSimd256Register();
6430 YMMRegister src = i.InputSimd256Register(0);
6431 uint8_t imm = i.InputUint8(1);
6432 CpuFeatureScope avx2_scope(masm(), AVX2);
6433 __ vpshufd(dst, src, imm);
6434 } else {
6435 UNIMPLEMENTED();
6436 }
6437 break;
6438 }
6439 case kX64I8x16Shuffle: {
6440 XMMRegister dst = i.OutputSimd128Register();
6441 XMMRegister tmp_simd = i.TempSimd128Register(0);
6442 DCHECK_NE(tmp_simd, i.InputSimd128Register(0));
6443 if (instr->InputCount() == 5) { // only one input operand
6444 uint32_t mask[4] = {};
6445 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
6446 for (int j = 4; j > 0; j--) {
6447 mask[j - 1] = i.InputUint32(j);
6448 }
6449
6450 SetupSimdImmediateInRegister(masm(), mask, tmp_simd);
6451 __ Pshufb(dst, tmp_simd);
6452 } else { // two input operands
6453 DCHECK_NE(tmp_simd, i.InputSimd128Register(1));
6454 DCHECK_EQ(6, instr->InputCount());
6456 uint32_t mask1[4] = {};
6457 for (int j = 5; j > 1; j--) {
6458 uint32_t lanes = i.InputUint32(j);
6459 for (int k = 0; k < 32; k += 8) {
6460 uint8_t lane = lanes >> k;
6461 mask1[j - 2] |= (lane < kSimd128Size ? lane : 0x80) << k;
6462 }
6463 }
6464 SetupSimdImmediateInRegister(masm(), mask1, tmp_simd);
6465 __ Pshufb(kScratchDoubleReg, tmp_simd);
6466 uint32_t mask2[4] = {};
6467 if (instr->InputAt(1)->IsSimd128Register()) {
6468 XMMRegister src1 = i.InputSimd128Register(1);
6469 if (src1 != dst) __ Movdqa(dst, src1);
6470 } else {
6471 __ Movdqu(dst, i.InputOperand(1));
6472 }
6473 for (int j = 5; j > 1; j--) {
6474 uint32_t lanes = i.InputUint32(j);
6475 for (int k = 0; k < 32; k += 8) {
6476 uint8_t lane = lanes >> k;
6477 mask2[j - 2] |= (lane >= kSimd128Size ? (lane & 0x0F) : 0x80) << k;
6478 }
6479 }
6480 SetupSimdImmediateInRegister(masm(), mask2, tmp_simd);
6481 __ Pshufb(dst, tmp_simd);
6482 __ Por(dst, kScratchDoubleReg);
6483 }
6484 break;
6485 }
6486 case kX64I8x16Popcnt: {
6487 __ I8x16Popcnt(i.OutputSimd128Register(), i.InputSimd128Register(0),
6488 i.TempSimd128Register(0), kScratchDoubleReg,
6490 break;
6491 }
6492 case kX64S128Load8Splat: {
6493 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
6494 __ S128Load8Splat(i.OutputSimd128Register(), i.MemoryOperand(),
6496 break;
6497 }
6498 case kX64S128Load16Splat: {
6499 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
6500 __ S128Load16Splat(i.OutputSimd128Register(), i.MemoryOperand(),
6502 break;
6503 }
6504 case kX64S128Load32Splat: {
6505 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
6506 __ S128Load32Splat(i.OutputSimd128Register(), i.MemoryOperand());
6507 break;
6508 }
6509 case kX64S128Load64Splat: {
6510 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
6511 __ Movddup(i.OutputSimd128Register(), i.MemoryOperand());
6512 break;
6513 }
6514 case kX64S128Load8x8S: {
6515 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
6516 __ Pmovsxbw(i.OutputSimd128Register(), i.MemoryOperand());
6517 break;
6518 }
6519 case kX64S128Load8x8U: {
6520 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
6521 __ Pmovzxbw(i.OutputSimd128Register(), i.MemoryOperand());
6522 break;
6523 }
6524 case kX64S128Load16x4S: {
6525 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
6526 __ Pmovsxwd(i.OutputSimd128Register(), i.MemoryOperand());
6527 break;
6528 }
6529 case kX64S128Load16x4U: {
6530 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
6531 __ Pmovzxwd(i.OutputSimd128Register(), i.MemoryOperand());
6532 break;
6533 }
6534 case kX64S128Load32x2S: {
6535 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
6536 __ Pmovsxdq(i.OutputSimd128Register(), i.MemoryOperand());
6537 break;
6538 }
6539 case kX64S128Load32x2U: {
6540 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
6541 __ Pmovzxdq(i.OutputSimd128Register(), i.MemoryOperand());
6542 break;
6543 }
6544 case kX64S128Store32Lane: {
6545 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
6546 size_t index = 0;
6547 Operand operand = i.MemoryOperand(&index);
6548 uint8_t lane = i.InputUint8(index + 1);
6549 __ S128Store32Lane(operand, i.InputSimd128Register(index), lane);
6550 break;
6551 }
6552 case kX64S128Store64Lane: {
6553 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
6554 size_t index = 0;
6555 Operand operand = i.MemoryOperand(&index);
6556 uint8_t lane = i.InputUint8(index + 1);
6557 __ S128Store64Lane(operand, i.InputSimd128Register(index), lane);
6558 break;
6559 }
6560 case kX64Shufps: {
6561 if (instr->Output()->IsSimd128Register()) {
6562 __ Shufps(i.OutputSimd128Register(), i.InputSimd128Register(0),
6563 i.InputSimd128Register(1), i.InputUint8(2));
6564 } else {
6565 DCHECK(instr->Output()->IsSimd256Register());
6567 CpuFeatureScope scope(masm(), AVX);
6568 __ vshufps(i.OutputSimd256Register(), i.InputSimd256Register(0),
6569 i.InputSimd256Register(1), i.InputUint8(2));
6570 }
6571 break;
6572 }
6573 case kX64S32x4Rotate: {
6574 XMMRegister dst = i.OutputSimd128Register();
6575 XMMRegister src = i.InputSimd128Register(0);
6576 uint8_t mask = i.InputUint8(1);
6577 if (dst == src) {
6578 // 1-byte shorter encoding than pshufd.
6579 __ Shufps(dst, src, src, mask);
6580 } else {
6581 __ Pshufd(dst, src, mask);
6582 }
6583 break;
6584 }
6585 case kX64S32x4Swizzle: {
6586 DCHECK_EQ(2, instr->InputCount());
6587 ASSEMBLE_SIMD_IMM_INSTR(Pshufd, i.OutputSimd128Register(), 0,
6588 i.InputUint8(1));
6589 break;
6590 }
6591 case kX64S32x4Shuffle: {
6592 DCHECK_EQ(4, instr->InputCount()); // Swizzles should be handled above.
6593 uint8_t shuffle = i.InputUint8(2);
6594 DCHECK_NE(0xe4, shuffle); // A simple blend should be handled below.
6595 ASSEMBLE_SIMD_IMM_INSTR(Pshufd, kScratchDoubleReg, 1, shuffle);
6596 ASSEMBLE_SIMD_IMM_INSTR(Pshufd, i.OutputSimd128Register(), 0, shuffle);
6597 __ Pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputUint8(3));
6598 break;
6599 }
6600 case kX64S16x8Blend: {
6601 CpuFeatureScope scope(masm(), SSE4_1);
6602 ASSEMBLE_SIMD_IMM_SHUFFLE(pblendw, i.InputUint8(2));
6603 break;
6604 }
6605 case kX64S16x8HalfShuffle1: {
6606 XMMRegister dst = i.OutputSimd128Register();
6607 uint8_t mask_lo = i.InputUint8(1);
6608 uint8_t mask_hi = i.InputUint8(2);
6609 if (mask_lo != 0xe4) {
6610 ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, mask_lo);
6611 if (mask_hi != 0xe4) __ Pshufhw(dst, dst, mask_hi);
6612 } else {
6613 DCHECK_NE(mask_hi, 0xe4);
6614 ASSEMBLE_SIMD_IMM_INSTR(Pshufhw, dst, 0, mask_hi);
6615 }
6616 break;
6617 }
6618 case kX64S16x8HalfShuffle2: {
6619 XMMRegister dst = i.OutputSimd128Register();
6620 ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, kScratchDoubleReg, 1, i.InputUint8(2));
6621 __ Pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputUint8(3));
6622 ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, i.InputUint8(2));
6623 __ Pshufhw(dst, dst, i.InputUint8(3));
6624 __ Pblendw(dst, kScratchDoubleReg, i.InputUint8(4));
6625 break;
6626 }
6627 case kX64S8x16Alignr: {
6628 ASSEMBLE_SIMD_IMM_SHUFFLE(palignr, i.InputUint8(2));
6629 break;
6630 }
6631 case kX64S16x8Dup: {
6632 XMMRegister dst = i.OutputSimd128Register();
6633 uint8_t lane = i.InputInt8(1) & 0x7;
6634 uint8_t lane4 = lane & 0x3;
6635 uint8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
6636 if (lane < 4) {
6637 ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, half_dup);
6638 __ Punpcklqdq(dst, dst);
6639 } else {
6640 ASSEMBLE_SIMD_IMM_INSTR(Pshufhw, dst, 0, half_dup);
6641 __ Punpckhqdq(dst, dst);
6642 }
6643 break;
6644 }
6645 case kX64S8x16Dup: {
6646 XMMRegister dst = i.OutputSimd128Register();
6647 uint8_t lane = i.InputInt8(1) & 0xf;
6648 DCHECK_EQ(dst, i.InputSimd128Register(0));
6649 if (lane < 8) {
6650 __ Punpcklbw(dst, dst);
6651 } else {
6652 __ Punpckhbw(dst, dst);
6653 }
6654 lane &= 0x7;
6655 uint8_t lane4 = lane & 0x3;
6656 uint8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
6657 if (lane < 4) {
6658 __ Pshuflw(dst, dst, half_dup);
6659 __ Punpcklqdq(dst, dst);
6660 } else {
6661 __ Pshufhw(dst, dst, half_dup);
6662 __ Punpckhqdq(dst, dst);
6663 }
6664 break;
6665 }
6666 case kX64S64x2UnpackHigh:
6667 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhqdq);
6668 break;
6669 case kX64S32x4UnpackHigh:
6671 break;
6672 case kX64S32x8UnpackHigh: {
6673 CpuFeatureScope avx2_scope(masm(), AVX2);
6674 YMMRegister dst = i.OutputSimd256Register();
6675 __ vpunpckhdq(dst, i.InputSimd256Register(0), i.InputSimd256Register(1));
6676 break;
6677 }
6678 case kX64S16x8UnpackHigh:
6680 break;
6681 case kX64S8x16UnpackHigh:
6683 break;
6684 case kX64S64x2UnpackLow:
6685 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklqdq);
6686 break;
6687 case kX64S32x4UnpackLow:
6689 break;
6690 case kX64S32x8UnpackLow: {
6691 CpuFeatureScope avx2_scope(masm(), AVX2);
6692 YMMRegister dst = i.OutputSimd256Register();
6693 __ vpunpckldq(dst, i.InputSimd256Register(0), i.InputSimd256Register(1));
6694 break;
6695 }
6696 case kX64S16x8UnpackLow:
6698 break;
6699 case kX64S8x16UnpackLow:
6701 break;
6702 case kX64S16x8UnzipHigh: {
6703 XMMRegister dst = i.OutputSimd128Register();
6704 XMMRegister src2 = dst;
6705 DCHECK_EQ(dst, i.InputSimd128Register(0));
6706 if (instr->InputCount() == 2) {
6708 __ Psrld(kScratchDoubleReg, uint8_t{16});
6709 src2 = kScratchDoubleReg;
6710 }
6711 __ Psrld(dst, uint8_t{16});
6712 __ Packusdw(dst, src2);
6713 break;
6714 }
6715 case kX64S16x8UnzipLow: {
6716 XMMRegister dst = i.OutputSimd128Register();
6717 XMMRegister src2 = dst;
6718 DCHECK_EQ(dst, i.InputSimd128Register(0));
6720 if (instr->InputCount() == 2) {
6721 ASSEMBLE_SIMD_IMM_INSTR(Pblendw, kScratchDoubleReg, 1, uint8_t{0x55});
6722 src2 = kScratchDoubleReg;
6723 }
6724 __ Pblendw(dst, kScratchDoubleReg, uint8_t{0xaa});
6725 __ Packusdw(dst, src2);
6726 break;
6727 }
6728 case kX64S8x16UnzipHigh: {
6729 XMMRegister dst = i.OutputSimd128Register();
6730 XMMRegister src2 = dst;
6731 DCHECK_EQ(dst, i.InputSimd128Register(0));
6732 if (instr->InputCount() == 2) {
6734 __ Psrlw(kScratchDoubleReg, uint8_t{8});
6735 src2 = kScratchDoubleReg;
6736 }
6737 __ Psrlw(dst, uint8_t{8});
6738 __ Packuswb(dst, src2);
6739 break;
6740 }
6741 case kX64S8x16UnzipLow: {
6742 XMMRegister dst = i.OutputSimd128Register();
6743 XMMRegister src2 = dst;
6744 DCHECK_EQ(dst, i.InputSimd128Register(0));
6745 if (instr->InputCount() == 2) {
6747 __ Psllw(kScratchDoubleReg, uint8_t{8});
6748 __ Psrlw(kScratchDoubleReg, uint8_t{8});
6749 src2 = kScratchDoubleReg;
6750 }
6751 __ Psllw(dst, uint8_t{8});
6752 __ Psrlw(dst, uint8_t{8});
6753 __ Packuswb(dst, src2);
6754 break;
6755 }
6756 case kX64S8x16TransposeLow: {
6757 XMMRegister dst = i.OutputSimd128Register();
6758 DCHECK_EQ(dst, i.InputSimd128Register(0));
6759 __ Psllw(dst, uint8_t{8});
6760 if (instr->InputCount() == 1) {
6761 __ Movdqa(kScratchDoubleReg, dst);
6762 } else {
6763 DCHECK_EQ(2, instr->InputCount());
6765 __ Psllw(kScratchDoubleReg, uint8_t{8});
6766 }
6767 __ Psrlw(dst, uint8_t{8});
6768 __ Por(dst, kScratchDoubleReg);
6769 break;
6770 }
6771 case kX64S8x16TransposeHigh: {
6772 XMMRegister dst = i.OutputSimd128Register();
6773 DCHECK_EQ(dst, i.InputSimd128Register(0));
6774 __ Psrlw(dst, uint8_t{8});
6775 if (instr->InputCount() == 1) {
6776 __ Movdqa(kScratchDoubleReg, dst);
6777 } else {
6778 DCHECK_EQ(2, instr->InputCount());
6780 __ Psrlw(kScratchDoubleReg, uint8_t{8});
6781 }
6782 __ Psllw(kScratchDoubleReg, uint8_t{8});
6783 __ Por(dst, kScratchDoubleReg);
6784 break;
6785 }
6786 case kX64S8x8Reverse:
6787 case kX64S8x4Reverse:
6788 case kX64S8x2Reverse: {
6789 DCHECK_EQ(1, instr->InputCount());
6790 XMMRegister dst = i.OutputSimd128Register();
6791 DCHECK_EQ(dst, i.InputSimd128Register(0));
6792 if (arch_opcode != kX64S8x2Reverse) {
6793 // First shuffle words into position.
6794 uint8_t shuffle_mask = arch_opcode == kX64S8x4Reverse ? 0xB1 : 0x1B;
6795 __ Pshuflw(dst, dst, shuffle_mask);
6796 __ Pshufhw(dst, dst, shuffle_mask);
6797 }
6798 __ Movdqa(kScratchDoubleReg, dst);
6799 __ Psrlw(kScratchDoubleReg, uint8_t{8});
6800 __ Psllw(dst, uint8_t{8});
6801 __ Por(dst, kScratchDoubleReg);
6802 break;
6803 }
6804 case kX64V128AnyTrue: {
6805 Register dst = i.OutputRegister();
6806 XMMRegister src = i.InputSimd128Register(0);
6807
6808 __ xorq(dst, dst);
6809 __ Ptest(src, src);
6810 __ setcc(not_equal, dst);
6811 break;
6812 }
6813 // Need to split up all the different lane structures because the
6814 // comparison instruction used matters, e.g. given 0xff00, pcmpeqb returns
6815 // 0x0011, pcmpeqw returns 0x0000, ptest will set ZF to 0 and 1
6816 // respectively.
6817 case kX64IAllTrue: {
6818 LaneSize lane_size = LaneSizeField::decode(opcode);
6819 VectorLength vec_len = VectorLengthField::decode(opcode);
6820 if (vec_len == kV128) {
6821 switch (lane_size) {
6822 case kL8: {
6823 // I8x16AllTrue
6824 ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqb);
6825 break;
6826 }
6827 case kL16: {
6828 // I16x8AllTrue
6829 ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqw);
6830 break;
6831 }
6832 case kL32: {
6833 // I32x4AllTrue
6834 ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqd);
6835 break;
6836 }
6837 case kL64: {
6838 // I64x2AllTrue
6839 ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqq);
6840 break;
6841 }
6842 default:
6843 UNREACHABLE();
6844 }
6845 } else {
6846 UNREACHABLE();
6847 }
6848 break;
6849 }
6850 case kX64Blendvpd: {
6851 VectorLength vec_len = VectorLengthField::decode(opcode);
6852 if (vec_len == kV128) {
6853 __ Blendvpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
6854 i.InputSimd128Register(1), i.InputSimd128Register(2));
6855 } else {
6856 DCHECK_EQ(vec_len, kV256);
6857 CpuFeatureScope avx_scope(masm(), AVX);
6858 __ vblendvpd(i.OutputSimd256Register(), i.InputSimd256Register(0),
6859 i.InputSimd256Register(1), i.InputSimd256Register(2));
6860 }
6861 break;
6862 }
6863 case kX64Blendvps: {
6864 VectorLength vec_len = VectorLengthField::decode(opcode);
6865 if (vec_len == kV128) {
6866 __ Blendvps(i.OutputSimd128Register(), i.InputSimd128Register(0),
6867 i.InputSimd128Register(1), i.InputSimd128Register(2));
6868 } else {
6869 DCHECK_EQ(vec_len, kV256);
6870 CpuFeatureScope avx_scope(masm(), AVX);
6871 __ vblendvps(i.OutputSimd256Register(), i.InputSimd256Register(0),
6872 i.InputSimd256Register(1), i.InputSimd256Register(2));
6873 }
6874 break;
6875 }
6876 case kX64Pblendvb: {
6877 VectorLength vec_len = VectorLengthField::decode(opcode);
6878 if (vec_len == kV128) {
6879 __ Pblendvb(i.OutputSimd128Register(), i.InputSimd128Register(0),
6880 i.InputSimd128Register(1), i.InputSimd128Register(2));
6881 } else {
6882 DCHECK_EQ(vec_len, kV256);
6883 CpuFeatureScope avx_scope(masm(), AVX2);
6884 __ vpblendvb(i.OutputSimd256Register(), i.InputSimd256Register(0),
6885 i.InputSimd256Register(1), i.InputSimd256Register(2));
6886 }
6887 break;
6888 }
6889 case kX64I32x4TruncF64x2UZero: {
6890 __ I32x4TruncSatF64x2UZero(i.OutputSimd128Register(),
6891 i.InputSimd128Register(0), kScratchDoubleReg,
6893 break;
6894 }
6895 case kX64I32x4TruncF32x4U: {
6896 __ I32x4TruncF32x4U(i.OutputSimd128Register(), i.InputSimd128Register(0),
6897 kScratchDoubleReg, i.TempSimd128Register(0));
6898 break;
6899 }
6900 case kX64I32x8TruncF32x8U: {
6901 __ I32x8TruncF32x8U(i.OutputSimd256Register(), i.InputSimd256Register(0),
6902 kScratchSimd256Reg, i.TempSimd256Register(0));
6903 break;
6904 }
6905 case kX64Cvttps2dq: {
6906 VectorLength vec_len = VectorLengthField::decode(opcode);
6907 if (vec_len == kV128) {
6908 __ Cvttps2dq(i.OutputSimd128Register(), i.InputSimd128Register(0));
6909 } else {
6910 DCHECK_EQ(vec_len, kV256);
6911 CpuFeatureScope avx_scope(masm(), AVX);
6912 __ vcvttps2dq(i.OutputSimd256Register(), i.InputSimd256Register(0));
6913 }
6914 break;
6915 }
6916 case kX64Cvttpd2dq: {
6917 __ Cvttpd2dq(i.OutputSimd128Register(), i.InputSimd128Register(0));
6918 break;
6919 }
6920 case kAtomicStoreWord8: {
6922 break;
6923 }
6924 case kAtomicStoreWord16: {
6926 break;
6927 }
6928 case kAtomicStoreWord32: {
6930 break;
6931 }
6932 case kX64Word64AtomicStoreWord64: {
6934 break;
6935 }
6936 case kAtomicExchangeInt8: {
6938 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
6939 __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
6940 __ movsxbl(i.InputRegister(0), i.InputRegister(0));
6941 break;
6942 }
6943 case kAtomicExchangeUint8: {
6944 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
6945 __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
6946 switch (AtomicWidthField::decode(opcode)) {
6948 __ movzxbl(i.InputRegister(0), i.InputRegister(0));
6949 break;
6951 __ movzxbq(i.InputRegister(0), i.InputRegister(0));
6952 break;
6953 }
6954 break;
6955 }
6956 case kAtomicExchangeInt16: {
6958 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
6959 __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
6960 __ movsxwl(i.InputRegister(0), i.InputRegister(0));
6961 break;
6962 }
6963 case kAtomicExchangeUint16: {
6964 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
6965 __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
6966 switch (AtomicWidthField::decode(opcode)) {
6968 __ movzxwl(i.InputRegister(0), i.InputRegister(0));
6969 break;
6971 __ movzxwq(i.InputRegister(0), i.InputRegister(0));
6972 break;
6973 }
6974 break;
6975 }
6976 case kAtomicExchangeWord32: {
6977 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
6978 __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
6979 break;
6980 }
6981 case kAtomicCompareExchangeInt8: {
6983 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
6984 __ lock();
6985 __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
6986 __ movsxbl(rax, rax);
6987 break;
6988 }
6989 case kAtomicCompareExchangeUint8: {
6990 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
6991 __ lock();
6992 __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
6993 switch (AtomicWidthField::decode(opcode)) {
6995 __ movzxbl(rax, rax);
6996 break;
6998 __ movzxbq(rax, rax);
6999 break;
7000 }
7001 break;
7002 }
7003 case kAtomicCompareExchangeInt16: {
7005 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
7006 __ lock();
7007 __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
7008 __ movsxwl(rax, rax);
7009 break;
7010 }
7011 case kAtomicCompareExchangeUint16: {
7012 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
7013 __ lock();
7014 __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
7015 switch (AtomicWidthField::decode(opcode)) {
7017 __ movzxwl(rax, rax);
7018 break;
7020 __ movzxwq(rax, rax);
7021 break;
7022 }
7023 break;
7024 }
7025 case kAtomicCompareExchangeWord32: {
7026 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
7027 __ lock();
7028 __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
7030 // Zero-extend the 32 bit value to 64 bit.
7031 __ movl(rax, rax);
7032 }
7033 break;
7034 }
7035 case kX64Word64AtomicExchangeUint64: {
7036 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
7037 __ xchgq(i.InputRegister(0), i.MemoryOperand(1));
7038 break;
7039 }
7040 case kX64Word64AtomicCompareExchangeUint64: {
7041 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
7042 __ lock();
7043 __ cmpxchgq(i.MemoryOperand(2), i.InputRegister(1));
7044 break;
7045 }
7046#define ATOMIC_BINOP_CASE(op, inst32, inst64) \
7047 case kAtomic##op##Int8: \
7048 DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32); \
7049 ASSEMBLE_ATOMIC_BINOP(inst32, movb, cmpxchgb); \
7050 __ movsxbl(rax, rax); \
7051 break; \
7052 case kAtomic##op##Uint8: \
7053 switch (AtomicWidthField::decode(opcode)) { \
7054 case AtomicWidth::kWord32: \
7055 ASSEMBLE_ATOMIC_BINOP(inst32, movb, cmpxchgb); \
7056 __ movzxbl(rax, rax); \
7057 break; \
7058 case AtomicWidth::kWord64: \
7059 ASSEMBLE_ATOMIC64_BINOP(inst64, movb, cmpxchgb); \
7060 __ movzxbq(rax, rax); \
7061 break; \
7062 } \
7063 break; \
7064 case kAtomic##op##Int16: \
7065 DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32); \
7066 ASSEMBLE_ATOMIC_BINOP(inst32, movw, cmpxchgw); \
7067 __ movsxwl(rax, rax); \
7068 break; \
7069 case kAtomic##op##Uint16: \
7070 switch (AtomicWidthField::decode(opcode)) { \
7071 case AtomicWidth::kWord32: \
7072 ASSEMBLE_ATOMIC_BINOP(inst32, movw, cmpxchgw); \
7073 __ movzxwl(rax, rax); \
7074 break; \
7075 case AtomicWidth::kWord64: \
7076 ASSEMBLE_ATOMIC64_BINOP(inst64, movw, cmpxchgw); \
7077 __ movzxwq(rax, rax); \
7078 break; \
7079 } \
7080 break; \
7081 case kAtomic##op##Word32: \
7082 switch (AtomicWidthField::decode(opcode)) { \
7083 case AtomicWidth::kWord32: \
7084 ASSEMBLE_ATOMIC_BINOP(inst32, movl, cmpxchgl); \
7085 break; \
7086 case AtomicWidth::kWord64: \
7087 ASSEMBLE_ATOMIC64_BINOP(inst64, movl, cmpxchgl); \
7088 break; \
7089 } \
7090 break; \
7091 case kX64Word64Atomic##op##Uint64: \
7092 ASSEMBLE_ATOMIC64_BINOP(inst64, movq, cmpxchgq); \
7093 break;
7094 ATOMIC_BINOP_CASE(Add, addl, addq)
7095 ATOMIC_BINOP_CASE(Sub, subl, subq)
7096 ATOMIC_BINOP_CASE(And, andl, andq)
7097 ATOMIC_BINOP_CASE(Or, orl, orq)
7098 ATOMIC_BINOP_CASE(Xor, xorl, xorq)
7099#undef ATOMIC_BINOP_CASE
7100
7101 case kAtomicLoadInt8:
7102 case kAtomicLoadUint8:
7103 case kAtomicLoadInt16:
7104 case kAtomicLoadUint16:
7105 case kAtomicLoadWord32:
7106 UNREACHABLE(); // Won't be generated by instruction selector.
7107
7108 case kX64I32x8DotI16x16S: {
7109 ASSEMBLE_SIMD256_BINOP(pmaddwd, AVX2);
7110 break;
7111 }
7112 case kX64S256Load8Splat: {
7113 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
7114 CpuFeatureScope avx2_scope(masm(), AVX2);
7115 __ vpbroadcastb(i.OutputSimd256Register(), i.MemoryOperand());
7116 break;
7117 }
7118 case kX64S256Load16Splat: {
7119 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
7120 CpuFeatureScope avx2_scope(masm(), AVX2);
7121 __ vpbroadcastw(i.OutputSimd256Register(), i.MemoryOperand());
7122 break;
7123 }
7124 case kX64S256Load32Splat: {
7125 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
7126 CpuFeatureScope avx_scope(masm(), AVX);
7127 __ vbroadcastss(i.OutputSimd256Register(), i.MemoryOperand());
7128 break;
7129 }
7130 case kX64S256Load64Splat: {
7131 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
7132 CpuFeatureScope avx_scope(masm(), AVX);
7133 __ vbroadcastsd(i.OutputSimd256Register(), i.MemoryOperand());
7134 break;
7135 }
7136 case kX64Movdqu256: {
7137 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
7138 CpuFeatureScope avx_scope(masm(), AVX);
7139 if (instr->HasOutput()) {
7140 __ vmovdqu(i.OutputSimd256Register(), i.MemoryOperand());
7141 } else {
7142 size_t index = 0;
7143 Operand operand = i.MemoryOperand(&index);
7144 __ vmovdqu(operand, i.InputSimd256Register(index));
7145 }
7146 break;
7147 }
7148 case kX64I16x16SConvertI32x8: {
7149 CpuFeatureScope avx_scope(masm(), AVX2);
7150 YMMRegister dst = i.OutputSimd256Register();
7151 __ vpackssdw(dst, i.InputSimd256Register(0), i.InputSimd256Register(1));
7152 break;
7153 }
7154 case kX64I16x16UConvertI32x8: {
7155 CpuFeatureScope avx_scope(masm(), AVX2);
7156 YMMRegister dst = i.OutputSimd256Register();
7157 __ vpackusdw(dst, i.InputSimd256Register(0), i.InputSimd256Register(1));
7158 break;
7159 }
7160 case kX64I8x32SConvertI16x16: {
7161 CpuFeatureScope avx_scope(masm(), AVX2);
7162 YMMRegister dst = i.OutputSimd256Register();
7163 __ vpacksswb(dst, i.InputSimd256Register(0), i.InputSimd256Register(1));
7164 break;
7165 }
7166 case kX64I8x32UConvertI16x16: {
7167 CpuFeatureScope avx_scope(masm(), AVX2);
7168 YMMRegister dst = i.OutputSimd256Register();
7169 __ vpackuswb(dst, i.InputSimd256Register(0), i.InputSimd256Register(1));
7170 break;
7171 }
7172 case kX64I64x4ExtMulI32x4S: {
7173 __ I64x4ExtMul(i.OutputSimd256Register(), i.InputSimd128Register(0),
7174 i.InputSimd128Register(1), kScratchSimd256Reg,
7175 /*is_signed=*/true);
7176 break;
7177 }
7178 case kX64I64x4ExtMulI32x4U: {
7179 __ I64x4ExtMul(i.OutputSimd256Register(), i.InputSimd128Register(0),
7180 i.InputSimd128Register(1), kScratchSimd256Reg,
7181 /*is_signed=*/false);
7182 break;
7183 }
7184 case kX64I32x8ExtMulI16x8S: {
7185 __ I32x8ExtMul(i.OutputSimd256Register(), i.InputSimd128Register(0),
7186 i.InputSimd128Register(1), kScratchSimd256Reg,
7187 /*is_signed=*/true);
7188 break;
7189 }
7190 case kX64I32x8ExtMulI16x8U: {
7191 __ I32x8ExtMul(i.OutputSimd256Register(), i.InputSimd128Register(0),
7192 i.InputSimd128Register(1), kScratchSimd256Reg,
7193 /*is_signed=*/false);
7194 break;
7195 }
7196 case kX64I16x16ExtMulI8x16S: {
7197 __ I16x16ExtMul(i.OutputSimd256Register(), i.InputSimd128Register(0),
7198 i.InputSimd128Register(1), kScratchSimd256Reg,
7199 /*is_signed=*/true);
7200 break;
7201 }
7202 case kX64I16x16ExtMulI8x16U: {
7203 __ I16x16ExtMul(i.OutputSimd256Register(), i.InputSimd128Register(0),
7204 i.InputSimd128Register(1), kScratchSimd256Reg,
7205 /*is_signed=*/false);
7206 break;
7207 }
7208 case kX64S256Load8x16S: {
7209 CpuFeatureScope avx_scope(masm(), AVX2);
7210 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
7211 __ vpmovsxbw(i.OutputSimd256Register(), i.MemoryOperand());
7212 break;
7213 }
7214 case kX64S256Load8x16U: {
7215 CpuFeatureScope avx_scope(masm(), AVX2);
7216 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
7217 __ vpmovzxbw(i.OutputSimd256Register(), i.MemoryOperand());
7218 break;
7219 }
7220 case kX64S256Load8x8U: {
7221 CpuFeatureScope avx_scope(masm(), AVX2);
7222 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
7223 __ vpmovzxbd(i.OutputSimd256Register(), i.MemoryOperand());
7224 break;
7225 }
7226 case kX64S256Load16x8S: {
7227 CpuFeatureScope avx_scope(masm(), AVX2);
7228 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
7229 __ vpmovsxwd(i.OutputSimd256Register(), i.MemoryOperand());
7230 break;
7231 }
7232 case kX64S256Load16x8U: {
7233 CpuFeatureScope avx_scope(masm(), AVX2);
7234 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
7235 __ vpmovzxwd(i.OutputSimd256Register(), i.MemoryOperand());
7236 break;
7237 }
7238 case kX64S256Load32x4S: {
7239 CpuFeatureScope avx_scope(masm(), AVX2);
7240 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
7241 __ vpmovsxdq(i.OutputSimd256Register(), i.MemoryOperand());
7242 break;
7243 }
7244 case kX64S256Load32x4U: {
7245 CpuFeatureScope avx_scope(masm(), AVX2);
7246 RecordTrapInfoIfNeeded(zone(), this, opcode, instr, __ pc_offset());
7247 __ vpmovzxdq(i.OutputSimd256Register(), i.MemoryOperand());
7248 break;
7249 }
7250 case kX64S256Const: {
7251 // Emit code for generic constants as all zeros, or ones cases will be
7252 // handled separately by the selector.
7253 YMMRegister dst = i.OutputSimd256Register();
7254 uint32_t imm[8] = {};
7255 for (int j = 0; j < 8; j++) {
7256 imm[j] = i.InputUint32(j);
7257 }
7258 SetupSimd256ImmediateInRegister(masm(), imm, dst, kScratchDoubleReg);
7259 break;
7260 }
7261 case kX64ExtractF128: {
7262 CpuFeatureScope avx_scope(masm(), AVX);
7263 uint8_t lane = i.InputInt8(1);
7264 __ vextractf128(i.OutputSimd128Register(), i.InputSimd256Register(0),
7265 lane);
7266 break;
7267 }
7268 case kX64InsertI128: {
7269 CpuFeatureScope avx_scope(masm(), AVX2);
7270 uint8_t imm = i.InputInt8(2);
7271 InstructionOperand* input0 = instr->InputAt(0);
7272 if (input0->IsSimd128Register()) {
7273 __ vinserti128(i.OutputSimd256Register(),
7274 YMMRegister::from_xmm(i.InputSimd128Register(0)),
7275 i.InputSimd128Register(1), imm);
7276 } else {
7277 DCHECK(instr->InputAt(0)->IsSimd256Register());
7278 __ vinserti128(i.OutputSimd256Register(), i.InputSimd256Register(0),
7279 i.InputSimd128Register(1), imm);
7280 }
7281 break;
7282 }
7283 }
7284 return kSuccess;
7285} // NOLadability/fn_size)
7286
7287#undef ASSEMBLE_PINSR
7288#undef ASSEMBLE_UNOP
7289#undef ASSEMBLE_BINOP
7290#undef ASSEMBLE_COMPARE
7291#undef ASSEMBLE_MULT
7292#undef ASSEMBLE_SHIFT
7293#undef ASSEMBLE_MOVX
7294#undef ASSEMBLE_SSE_BINOP
7295#undef ASSEMBLE_SSE_UNOP
7296#undef ASSEMBLE_AVX_BINOP
7297#undef ASSEMBLE_IEEE754_BINOP
7298#undef ASSEMBLE_IEEE754_UNOP
7299#undef ASSEMBLE_ATOMIC_BINOP
7300#undef ASSEMBLE_ATOMIC64_BINOP
7301#undef ASSEMBLE_SIMD_INSTR
7302#undef ASSEMBLE_SIMD_IMM_INSTR
7303#undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE
7304#undef ASSEMBLE_SIMD_IMM_SHUFFLE
7305#undef ASSEMBLE_SIMD_ALL_TRUE
7306#undef ASSEMBLE_SIMD_SHIFT
7307#undef ASSEMBLE_SEQ_CST_STORE
7308
7309namespace {
7310
7312 switch (condition) {
7313 case kUnorderedEqual:
7314 case kEqual:
7315 return equal;
7316 case kUnorderedNotEqual:
7317 case kNotEqual:
7318 return not_equal;
7319 case kSignedLessThan:
7320 return less;
7322 return greater_equal;
7324 return less_equal;
7325 case kSignedGreaterThan:
7326 return greater;
7327 case kUnsignedLessThan:
7328 return below;
7330 return above_equal;
7332 return below_equal;
7334 return above;
7335 case kOverflow:
7336 return overflow;
7337 case kNotOverflow:
7338 return no_overflow;
7339 case kIsNaN:
7340 return parity_even;
7341 case kIsNotNaN:
7342 return parity_odd;
7343 default:
7344 break;
7345 }
7346 UNREACHABLE();
7347}
7348
7349} // namespace
7350
7351// Assembles branches after this instruction.
7352void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
7353 Label::Distance flabel_distance =
7354 branch->fallthru ? Label::kNear : Label::kFar;
7355 Label* tlabel = branch->true_label;
7356 Label* flabel = branch->false_label;
7357 if (CpuFeatures::IsSupported(INTEL_JCC_ERRATUM_MITIGATION)) {
7358 if (branch->condition == kUnorderedEqual) {
7359 __ aligned_j(FlagsConditionToCondition(kIsNaN), flabel, flabel_distance);
7360 } else if (branch->condition == kUnorderedNotEqual) {
7361 __ aligned_j(FlagsConditionToCondition(kIsNaN), tlabel);
7362 }
7363 __ aligned_j(FlagsConditionToCondition(branch->condition), tlabel);
7364 if (!branch->fallthru) {
7365 __ aligned_jmp(flabel, flabel_distance);
7366 }
7367 } else {
7368 if (branch->condition == kUnorderedEqual) {
7369 __ j(FlagsConditionToCondition(kIsNaN), flabel, flabel_distance);
7370 } else if (branch->condition == kUnorderedNotEqual) {
7372 }
7373 __ j(FlagsConditionToCondition(branch->condition), tlabel);
7374 if (!branch->fallthru) {
7375 __ jmp(flabel, flabel_distance);
7376 }
7377 }
7378}
7379
7381 BranchInfo* branch) {
7382 Label::Distance flabel_distance =
7383 branch->fallthru ? Label::kNear : Label::kFar;
7384 Label* tlabel = branch->true_label;
7385 Label* flabel = branch->false_label;
7386 Label nodeopt;
7387 if (CpuFeatures::IsSupported(INTEL_JCC_ERRATUM_MITIGATION)) {
7388 if (branch->condition == kUnorderedEqual) {
7389 __ aligned_j(FlagsConditionToCondition(kIsNaN), flabel, flabel_distance);
7390 } else if (branch->condition == kUnorderedNotEqual) {
7391 __ aligned_j(FlagsConditionToCondition(kIsNaN), tlabel);
7392 }
7393 __ aligned_j(FlagsConditionToCondition(branch->condition), tlabel);
7394 } else {
7395 if (branch->condition == kUnorderedEqual) {
7396 __ j(FlagsConditionToCondition(kIsNaN), flabel, flabel_distance);
7397 } else if (branch->condition == kUnorderedNotEqual) {
7399 }
7400 __ j(FlagsConditionToCondition(branch->condition), tlabel);
7401 }
7402
7403 if (v8_flags.deopt_every_n_times > 0) {
7404 if (isolate() != nullptr) {
7405 ExternalReference counter =
7406 ExternalReference::stress_deopt_count(isolate());
7407 // The following code assumes that `Isolate::stress_deopt_count_` is 8
7408 // bytes wide.
7409 static constexpr size_t kSizeofRAX = 8;
7410 static_assert(
7411 sizeof(decltype(*isolate()->stress_deopt_count_address())) ==
7412 kSizeofRAX);
7413
7414 __ pushfq();
7415 __ pushq(rax);
7416 __ load_rax(counter);
7417 __ decl(rax);
7418 __ j(not_zero, &nodeopt, Label::kNear);
7419
7420 __ Move(rax, v8_flags.deopt_every_n_times);
7421 __ store_rax(counter);
7422 __ popq(rax);
7423 __ popfq();
7424 __ jmp(tlabel);
7425
7426 __ bind(&nodeopt);
7427 __ store_rax(counter);
7428 __ popq(rax);
7429 __ popfq();
7430 } else {
7431#if V8_ENABLE_WEBASSEMBLY
7432 CHECK(v8_flags.wasm_deopt);
7433 CHECK(IsWasm());
7434 __ pushfq();
7435 __ pushq(rax);
7436 __ pushq(rbx);
7437 // Load the address of the counter into rbx.
7438 __ movq(rbx, Operand(rbp, WasmFrameConstants::kWasmInstanceDataOffset));
7439 __ movq(
7440 rbx,
7441 Operand(rbx, WasmTrustedInstanceData::kStressDeoptCounterOffset - 1));
7442 // Load the counter into rax and decrement it.
7443 __ movq(rax, Operand(rbx, 0));
7444 __ decl(rax);
7445 __ j(not_zero, &nodeopt, Label::kNear);
7446 // The counter is zero, reset counter.
7447 __ Move(rax, v8_flags.deopt_every_n_times);
7448 __ movq(Operand(rbx, 0), rax);
7449 // Restore registers and jump to deopt label.
7450 __ popq(rbx);
7451 __ popq(rax);
7452 __ popfq();
7453 __ jmp(tlabel);
7454 // Write back counter and restore registers.
7455 __ bind(&nodeopt);
7456 __ movq(Operand(rbx, 0), rax);
7457 __ popq(rbx);
7458 __ popq(rax);
7459 __ popfq();
7460#else
7461 UNREACHABLE();
7462#endif
7463 }
7464 }
7465
7466 if (!branch->fallthru) {
7467 if (CpuFeatures::IsSupported(INTEL_JCC_ERRATUM_MITIGATION)) {
7468 __ aligned_jmp(flabel, flabel_distance);
7469 } else {
7470 __ jmp(flabel, flabel_distance);
7471 }
7472 }
7473}
7474
7476 RpoNumber target) {
7477 __ jmp(GetLabel(target));
7478}
7479
7480#if V8_ENABLE_WEBASSEMBLY
7481void CodeGenerator::AssembleArchTrap(Instruction* instr,
7483 auto ool = zone()->New<WasmOutOfLineTrap>(this, instr);
7484 Label* tlabel = ool->entry();
7485 Label end;
7486 if (condition == kUnorderedEqual) {
7488 } else if (condition == kUnorderedNotEqual) {
7490 }
7492 __ bind(&end);
7493}
7494#endif // V8_ENABLE_WEBASSEMBLY
7495
7496// Assembles boolean materializations after this instruction.
7499 X64OperandConverter i(this, instr);
7500 Label done;
7501
7502 // Materialize a full 64-bit 1 or 0 value. The result register is always the
7503 // last output of the instruction.
7504 Label check;
7505 DCHECK_NE(0u, instr->OutputCount());
7506 Register reg = i.OutputRegister(instr->OutputCount() - 1);
7507 if (condition == kUnorderedEqual) {
7508 __ j(parity_odd, &check, Label::kNear);
7509 __ Move(reg, 0);
7510 __ jmp(&done, Label::kNear);
7511 } else if (condition == kUnorderedNotEqual) {
7512 __ j(parity_odd, &check, Label::kNear);
7513 __ Move(reg, 1);
7514 __ jmp(&done, Label::kNear);
7515 }
7516 __ bind(&check);
7519 __ movzxbl(reg, reg);
7520 }
7521 __ bind(&done);
7522}
7523
7525 UNREACHABLE();
7526}
7527
7529 BranchInfo* branch) {
7530 UNREACHABLE();
7531}
7532
7534 Register input, RpoNumber def_block, std::pair<int32_t, Label*>* begin,
7535 std::pair<int32_t, Label*>* end, std::optional<int32_t>& last_cmp_value) {
7537 if (last_cmp_value && *last_cmp_value == begin->first) {
7538 // No need to do another repeat cmp.
7539 masm()->j(equal, begin->second);
7540 ++begin;
7541 }
7542
7543 while (begin != end) {
7544 masm()->JumpIfEqual(input, begin->first, begin->second);
7545 ++begin;
7546 }
7548 return;
7549 }
7550 auto middle = begin + (end - begin) / 2;
7551 Label less_label;
7552 masm()->JumpIfLessThan(input, middle->first, &less_label);
7553 last_cmp_value = middle->first;
7554 AssembleArchBinarySearchSwitchRange(input, def_block, middle, end,
7555 last_cmp_value);
7556 masm()->bind(&less_label);
7557 AssembleArchBinarySearchSwitchRange(input, def_block, begin, middle,
7558 last_cmp_value);
7559}
7560
7562 X64OperandConverter i(this, instr);
7563 Register input = i.InputRegister(0);
7564 std::vector<std::pair<int32_t, Label*>> cases;
7565 for (size_t index = 2; index < instr->InputCount(); index += 2) {
7566 cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
7567 }
7568 std::optional<int32_t> last_cmp_value;
7569 AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
7570 cases.data() + cases.size(),
7571 last_cmp_value);
7572}
7573
7575 X64OperandConverter i(this, instr);
7576 Register input = i.InputRegister(0);
7577 int32_t const case_count = static_cast<int32_t>(instr->InputCount() - 2);
7578 base::Vector<Label*> cases = zone()->AllocateVector<Label*>(case_count);
7579 for (int32_t index = 0; index < case_count; ++index) {
7580 cases[index] = GetLabel(i.InputRpo(index + 2));
7581 }
7582 Label* const table = AddJumpTable(cases);
7583 __ cmpl(input, Immediate(case_count));
7584 __ j(above_equal, GetLabel(i.InputRpo(1)));
7585 __ leaq(kScratchRegister, Operand(table));
7586
7588 // For builtins, the value in the table is 'target_address - table_address'
7589 // (4 bytes) Load the value in the table with index.
7590 // value = [table +index*4]
7591 __ movsxlq(input, Operand(kScratchRegister, input, times_4, 0));
7592 // Calculate the absolute address of target:
7593 // target = table + (target - table)
7594 __ addq(input, kScratchRegister);
7595 // Jump to the target.
7596
7597 // Add the notrack prefix to disable landing pad enforcement.
7598 __ jmp(input, /*notrack=*/true);
7599 } else {
7600 // For non builtins, the value in the table is 'target_address' (8 bytes)
7601 // jmp [table + index*8]
7602 __ jmp(Operand(kScratchRegister, input, times_8, 0), /*notrack=*/true);
7603 }
7604}
7605
7608 X64OperandConverter i(this, instr);
7612 DCHECK_EQ(i.OutputRegister(), i.InputRegister(instr->InputCount() - 2));
7613 size_t last_input = instr->InputCount() - 1;
7614 // kUnorderedNotEqual can be implemented more efficiently than
7615 // kUnorderedEqual. As the OR of two flags, it can be done with just two
7616 // cmovs. If the condition was originally a kUnorderedEqual, expect the
7617 // instruction selector to have inverted it and swapped the input.
7619 if (rep == MachineRepresentation::kWord32) {
7620 if (HasRegisterInput(instr, last_input)) {
7621 __ cmovl(cc, i.OutputRegister(), i.InputRegister(last_input));
7623 __ cmovl(parity_even, i.OutputRegister(), i.InputRegister(last_input));
7624 }
7625 } else {
7626 __ cmovl(cc, i.OutputRegister(), i.InputOperand(last_input));
7628 __ cmovl(parity_even, i.OutputRegister(), i.InputOperand(last_input));
7629 }
7630 }
7631 } else {
7633 if (HasRegisterInput(instr, last_input)) {
7634 __ cmovq(cc, i.OutputRegister(), i.InputRegister(last_input));
7636 __ cmovq(parity_even, i.OutputRegister(), i.InputRegister(last_input));
7637 }
7638 } else {
7639 __ cmovq(cc, i.OutputRegister(), i.InputOperand(last_input));
7641 __ cmovq(parity_even, i.OutputRegister(), i.InputOperand(last_input));
7642 }
7643 }
7644 }
7645}
7646
7647namespace {
7648
7649static const int kQuadWordSize = 16;
7650
7651} // namespace
7652
7653void CodeGenerator::FinishFrame(Frame* frame) {
7654 CallDescriptor* call_descriptor = linkage()->GetIncomingDescriptor();
7655
7656 const DoubleRegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
7657 if (!saves_fp.is_empty()) { // Save callee-saved XMM registers.
7658 frame->AlignSavedCalleeRegisterSlots();
7659 const uint32_t saves_fp_count = saves_fp.Count();
7660 frame->AllocateSavedCalleeRegisterSlots(
7661 saves_fp_count * (kQuadWordSize / kSystemPointerSize));
7662 }
7663 const RegList saves = call_descriptor->CalleeSavedRegisters();
7664 if (!saves.is_empty()) { // Save callee-saved registers.
7665 frame->AllocateSavedCalleeRegisterSlots(saves.Count());
7666 }
7667}
7668
7670 auto call_descriptor = linkage()->GetIncomingDescriptor();
7671 if (frame_access_state()->has_frame()) {
7672 int pc_base = __ pc_offset();
7673
7674 if (call_descriptor->IsCFunctionCall()) {
7675 __ pushq(rbp);
7676 __ movq(rbp, rsp);
7677#if V8_ENABLE_WEBASSEMBLY
7678 if (info()->GetOutputStackFrameType() == StackFrame::C_WASM_ENTRY) {
7679 __ Push(Immediate(StackFrame::TypeToMarker(StackFrame::C_WASM_ENTRY)));
7680 // Reserve stack space for saving the c_entry_fp later.
7681 __ AllocateStackSpace(kSystemPointerSize);
7682 }
7683#endif // V8_ENABLE_WEBASSEMBLY
7684 } else if (call_descriptor->IsJSFunctionCall()) {
7685 __ Prologue();
7686 } else {
7687 __ StubPrologue(info()->GetOutputStackFrameType());
7688#if V8_ENABLE_WEBASSEMBLY
7689 if (call_descriptor->IsAnyWasmFunctionCall() ||
7690 call_descriptor->IsWasmImportWrapper() ||
7691 call_descriptor->IsWasmCapiFunction()) {
7692 // For import wrappers and C-API functions, this stack slot is only used
7693 // for printing stack traces in V8. Also, it holds a WasmImportData
7694 // instead of the trusted instance data, which is taken care of in the
7695 // frames accessors.
7697 }
7698 if (call_descriptor->IsWasmCapiFunction()) {
7699 // Reserve space for saving the PC later.
7700 __ AllocateStackSpace(kSystemPointerSize);
7701 }
7702#endif // V8_ENABLE_WEBASSEMBLY
7703 }
7704
7706 }
7707 int required_slots =
7708 frame()->GetTotalFrameSlotCount() - frame()->GetFixedSlotCount();
7709
7710 if (info()->is_osr()) {
7711 // TurboFan OSR-compiled functions cannot be entered directly.
7712 __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
7713
7714 // Unoptimized code jumps directly to this entrypoint while the unoptimized
7715 // frame is still on the stack. Optimized code uses OSR values directly from
7716 // the unoptimized frame. Thus, all that needs to be done is to allocate the
7717 // remaining stack slots.
7718 __ RecordComment("-- OSR entrypoint --");
7720#ifdef V8_ENABLE_SANDBOX_BOOL
7721 uint32_t expected_frame_size =
7722 static_cast<uint32_t>(osr_helper()->UnoptimizedFrameSlots()) *
7725 __ leaq(kScratchRegister, Operand(rsp, expected_frame_size));
7726 __ cmpq(kScratchRegister, rbp);
7727 __ SbxCheck(equal, AbortReason::kOsrUnexpectedStackSize);
7728#endif // V8_ENABLE_SANDBOX_BOOL
7729
7730 required_slots -= static_cast<int>(osr_helper()->UnoptimizedFrameSlots());
7731 }
7732
7733 const RegList saves = call_descriptor->CalleeSavedRegisters();
7734 const DoubleRegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
7735
7736 if (required_slots > 0) {
7737 DCHECK(frame_access_state()->has_frame());
7738#if V8_ENABLE_WEBASSEMBLY
7739 if (info()->IsWasm() && required_slots * kSystemPointerSize > 4 * KB) {
7740 // For WebAssembly functions with big frames we have to do the stack
7741 // overflow check before we construct the frame. Otherwise we may not
7742 // have enough space on the stack to call the runtime for the stack
7743 // overflow.
7744 Label done;
7745
7746 // If the frame is bigger than the stack, we throw the stack overflow
7747 // exception unconditionally. Thereby we can avoid the integer overflow
7748 // check in the condition code.
7749 if (required_slots * kSystemPointerSize < v8_flags.stack_size * KB) {
7750 __ movq(kScratchRegister,
7751 __ StackLimitAsOperand(StackLimitKind::kRealStackLimit));
7752 __ addq(kScratchRegister,
7753 Immediate(required_slots * kSystemPointerSize));
7754 __ cmpq(rsp, kScratchRegister);
7755 __ j(above_equal, &done, Label::kNear);
7756 }
7757
7758 if (v8_flags.experimental_wasm_growable_stacks) {
7761 regs_to_save.set(
7762 WasmHandleStackOverflowDescriptor::FrameBaseRegister());
7763 for (auto reg : wasm::kGpParamRegisters) regs_to_save.set(reg);
7765 DoubleRegList fp_regs_to_save;
7766 for (auto reg : wasm::kFpParamRegisters) fp_regs_to_save.set(reg);
7767 __ PushAll(fp_regs_to_save);
7769 Immediate(required_slots * kSystemPointerSize));
7770 __ movq(WasmHandleStackOverflowDescriptor::FrameBaseRegister(), rbp);
7771 __ addq(WasmHandleStackOverflowDescriptor::FrameBaseRegister(),
7772 Immediate(static_cast<int32_t>(
7773 call_descriptor->ParameterSlotCount() * kSystemPointerSize +
7775 __ CallBuiltin(Builtin::kWasmHandleStackOverflow);
7776 __ PopAll(fp_regs_to_save);
7777 __ PopAll(regs_to_save);
7778 } else {
7779 __ near_call(static_cast<intptr_t>(Builtin::kWasmStackOverflow),
7781 // The call does not return, hence we can ignore any references and just
7782 // define an empty safepoint.
7783 ReferenceMap* reference_map = zone()->New<ReferenceMap>(zone());
7784 RecordSafepoint(reference_map);
7785 __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
7786 }
7787 __ bind(&done);
7788 }
7789#endif // V8_ENABLE_WEBASSEMBLY
7790
7791 // Skip callee-saved and return slots, which are created below.
7792 required_slots -= saves.Count();
7793 required_slots -= saves_fp.Count() * (kQuadWordSize / kSystemPointerSize);
7794 required_slots -= frame()->GetReturnSlotCount();
7795 if (required_slots > 0) {
7796 __ AllocateStackSpace(required_slots * kSystemPointerSize);
7797 }
7798 }
7799
7800 if (!saves_fp.is_empty()) { // Save callee-saved XMM registers.
7801 const uint32_t saves_fp_count = saves_fp.Count();
7802 const int stack_size = saves_fp_count * kQuadWordSize;
7803 // Adjust the stack pointer.
7804 __ AllocateStackSpace(stack_size);
7805 // Store the registers on the stack.
7806 int slot_idx = 0;
7807 for (XMMRegister reg : saves_fp) {
7808 __ Movdqu(Operand(rsp, kQuadWordSize * slot_idx), reg);
7809 slot_idx++;
7810 }
7811 }
7812
7813 if (!saves.is_empty()) { // Save callee-saved registers.
7814 for (Register reg : base::Reversed(saves)) {
7815 __ pushq(reg);
7816 }
7817 }
7818
7819 // Allocate return slots (located after callee-saved).
7820 if (frame()->GetReturnSlotCount() > 0) {
7821 __ AllocateStackSpace(frame()->GetReturnSlotCount() * kSystemPointerSize);
7822 }
7823
7824 for (int spill_slot : frame()->tagged_slots()) {
7825 FrameOffset offset = frame_access_state()->GetFrameOffset(spill_slot);
7826 DCHECK(offset.from_frame_pointer());
7827 __ movq(Operand(rbp, offset.offset()), Immediate(0));
7828 }
7829}
7830
7831void CodeGenerator::AssembleReturn(InstructionOperand* additional_pop_count) {
7832 auto call_descriptor = linkage()->GetIncomingDescriptor();
7833
7834 // Restore registers.
7835 const RegList saves = call_descriptor->CalleeSavedRegisters();
7836 if (!saves.is_empty()) {
7837 const int returns = frame()->GetReturnSlotCount();
7838 if (returns != 0) {
7839 __ addq(rsp, Immediate(returns * kSystemPointerSize));
7840 }
7841 for (Register reg : saves) {
7842 __ popq(reg);
7843 }
7844 }
7845 const DoubleRegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
7846 if (!saves_fp.is_empty()) {
7847 const uint32_t saves_fp_count = saves_fp.Count();
7848 const int stack_size = saves_fp_count * kQuadWordSize;
7849 // Load the registers from the stack.
7850 int slot_idx = 0;
7851 for (XMMRegister reg : saves_fp) {
7852 __ Movdqu(reg, Operand(rsp, kQuadWordSize * slot_idx));
7853 slot_idx++;
7854 }
7855 // Adjust the stack pointer.
7856 __ addq(rsp, Immediate(stack_size));
7857 }
7858
7860
7861 X64OperandConverter g(this, nullptr);
7862 int parameter_slots = static_cast<int>(call_descriptor->ParameterSlotCount());
7863
7864 // {aditional_pop_count} is only greater than zero if {parameter_slots = 0}.
7865 // Check RawMachineAssembler::PopAndReturn.
7866 if (parameter_slots != 0) {
7867 if (additional_pop_count->IsImmediate()) {
7868 DCHECK_EQ(g.ToConstant(additional_pop_count).ToInt32(), 0);
7869 } else if (v8_flags.debug_code) {
7870 __ cmpq(g.ToRegister(additional_pop_count), Immediate(0));
7871 __ Assert(equal, AbortReason::kUnexpectedAdditionalPopValue);
7872 }
7873 }
7874
7875#if V8_ENABLE_WEBASSEMBLY
7876 if (call_descriptor->IsAnyWasmFunctionCall() &&
7877 v8_flags.experimental_wasm_growable_stacks) {
7878 __ cmpq(
7880 Immediate(StackFrame::TypeToMarker(StackFrame::WASM_SEGMENT_START)));
7881 Label done;
7882 __ j(not_equal, &done);
7886 DoubleRegList fp_regs_to_save;
7887 for (auto reg : wasm::kFpReturnRegisters) fp_regs_to_save.set(reg);
7888 __ PushAll(fp_regs_to_save);
7889 __ PrepareCallCFunction(1);
7891 __ CallCFunction(ExternalReference::wasm_shrink_stack(), 1);
7892 // Restore old FP. We don't need to restore old SP explicitly, because
7893 // it will be restored from FP inside of AssembleDeconstructFrame.
7894 __ movq(rbp, kReturnRegister0);
7895 __ PopAll(fp_regs_to_save);
7896 __ PopAll(regs_to_save);
7897 __ bind(&done);
7898 }
7899#endif // V8_ENABLE_WEBASSEMBLY
7900
7901 Register argc_reg = rcx;
7902 // Functions with JS linkage have at least one parameter (the receiver).
7903 // If {parameter_slots} == 0, it means it is a builtin with
7904 // kDontAdaptArgumentsSentinel, which takes care of JS arguments popping
7905 // itself.
7906 const bool drop_jsargs = parameter_slots != 0 &&
7908 call_descriptor->IsJSFunctionCall();
7909 if (call_descriptor->IsCFunctionCall()) {
7911 } else if (frame_access_state()->has_frame()) {
7912 if (additional_pop_count->IsImmediate() &&
7913 g.ToConstant(additional_pop_count).ToInt32() == 0) {
7914 // Canonicalize JSFunction return sites for now.
7915 if (return_label_.is_bound()) {
7916 // Emit a far jump here can't save code size but may bring some
7917 // regression, so we just forward when it is a near jump.
7918 const bool is_near_jump = is_int8(return_label_.pos() - __ pc_offset());
7919 if (drop_jsargs || is_near_jump) {
7920 __ jmp(&return_label_);
7921 return;
7922 }
7923 } else {
7924 __ bind(&return_label_);
7925 }
7926 }
7927 if (drop_jsargs) {
7928 // Get the actual argument count.
7929 DCHECK(!call_descriptor->CalleeSavedRegisters().has(argc_reg));
7930 __ movq(argc_reg, Operand(rbp, StandardFrameConstants::kArgCOffset));
7931 }
7933 }
7934
7935 if (drop_jsargs) {
7936 // We must pop all arguments from the stack (including the receiver).
7937 // The number of arguments without the receiver is
7938 // max(argc_reg, parameter_slots-1), and the receiver is added in
7939 // DropArguments().
7940 Label mismatch_return;
7941 Register scratch_reg = r10;
7942 DCHECK_NE(argc_reg, scratch_reg);
7943 DCHECK(!call_descriptor->CalleeSavedRegisters().has(scratch_reg));
7944 DCHECK(!call_descriptor->CalleeSavedRegisters().has(argc_reg));
7945 __ cmpq(argc_reg, Immediate(parameter_slots));
7946 __ j(greater, &mismatch_return, Label::kNear);
7947 __ Ret(parameter_slots * kSystemPointerSize, scratch_reg);
7948 __ bind(&mismatch_return);
7949 __ DropArguments(argc_reg, scratch_reg);
7950 // We use a return instead of a jump for better return address prediction.
7951 __ Ret();
7952 } else if (additional_pop_count->IsImmediate()) {
7953 Register scratch_reg = r10;
7954 DCHECK(!call_descriptor->CalleeSavedRegisters().has(scratch_reg));
7955 int additional_count = g.ToConstant(additional_pop_count).ToInt32();
7956 size_t pop_size = (parameter_slots + additional_count) * kSystemPointerSize;
7957 CHECK_LE(pop_size, static_cast<size_t>(std::numeric_limits<int>::max()));
7958 __ Ret(static_cast<int>(pop_size), scratch_reg);
7959 } else {
7960 Register pop_reg = g.ToRegister(additional_pop_count);
7961 Register scratch_reg = pop_reg == r10 ? rcx : r10;
7962 DCHECK(!call_descriptor->CalleeSavedRegisters().has(scratch_reg));
7963 DCHECK(!call_descriptor->CalleeSavedRegisters().has(pop_reg));
7964 int pop_size = static_cast<int>(parameter_slots * kSystemPointerSize);
7965 __ PopReturnAddressTo(scratch_reg);
7966 __ leaq(rsp, Operand(rsp, pop_reg, times_system_pointer_size,
7967 static_cast<int>(pop_size)));
7968 __ PushReturnAddressFrom(scratch_reg);
7969 __ Ret();
7970 }
7971}
7972
7974
7976 ZoneDeque<DeoptimizationExit*>* exits) {}
7977
7980 DCHECK(v8_flags.trace_turbo_stack_accesses);
7981 if (!info()->IsOptimizing()) {
7982#if V8_ENABLE_WEBASSEMBLY
7983 if (!info()->IsWasm()) return;
7984#else
7985 return;
7986#endif // V8_ENABLE_WEBASSEMBLY
7987 }
7989 auto IncrementCounter = [&](ExternalReference counter) {
7990 __ incl(__ ExternalReferenceAsOperand(counter));
7991 };
7992 if (source->IsAnyStackSlot()) {
7993 IncrementCounter(
7995 }
7996 if (destination->IsAnyStackSlot()) {
7997 IncrementCounter(
7999 }
8000}
8001
8003 auto rep = LocationOperand::cast(source)->representation();
8004 int new_slots = ElementSizeInPointers(rep);
8005 X64OperandConverter g(this, nullptr);
8006 int last_frame_slot_id =
8007 frame_access_state_->frame()->GetTotalFrameSlotCount() - 1;
8008 int sp_delta = frame_access_state_->sp_delta();
8009 int slot_id = last_frame_slot_id + sp_delta + new_slots;
8010 AllocatedOperand stack_slot(LocationOperand::STACK_SLOT, rep, slot_id);
8011 if (source->IsRegister()) {
8012 __ pushq(g.ToRegister(source));
8013 frame_access_state()->IncreaseSPDelta(new_slots);
8014 } else if (source->IsStackSlot() || source->IsFloatStackSlot() ||
8015 source->IsDoubleStackSlot()) {
8016 __ pushq(g.ToOperand(source));
8017 frame_access_state()->IncreaseSPDelta(new_slots);
8018 } else {
8019 // No push instruction for xmm registers / 128-bit memory operands. Bump
8020 // the stack pointer and assemble the move.
8021 __ subq(rsp, Immediate(new_slots * kSystemPointerSize));
8022 frame_access_state()->IncreaseSPDelta(new_slots);
8023 AssembleMove(source, &stack_slot);
8024 }
8025 temp_slots_ += new_slots;
8026 return stack_slot;
8027}
8028
8029void CodeGenerator::Pop(InstructionOperand* dest, MachineRepresentation rep) {
8030 X64OperandConverter g(this, nullptr);
8031 int dropped_slots = ElementSizeInPointers(rep);
8032 if (dest->IsRegister()) {
8033 frame_access_state()->IncreaseSPDelta(-dropped_slots);
8034 __ popq(g.ToRegister(dest));
8035 } else if (dest->IsStackSlot() || dest->IsFloatStackSlot() ||
8036 dest->IsDoubleStackSlot()) {
8037 frame_access_state()->IncreaseSPDelta(-dropped_slots);
8038 __ popq(g.ToOperand(dest));
8039 } else {
8040 int last_frame_slot_id =
8041 frame_access_state_->frame()->GetTotalFrameSlotCount() - 1;
8042 int sp_delta = frame_access_state_->sp_delta();
8043 int slot_id = last_frame_slot_id + sp_delta;
8044 AllocatedOperand stack_slot(LocationOperand::STACK_SLOT, rep, slot_id);
8045 AssembleMove(&stack_slot, dest);
8046 frame_access_state()->IncreaseSPDelta(-dropped_slots);
8047 __ addq(rsp, Immediate(dropped_slots * kSystemPointerSize));
8048 }
8049 temp_slots_ -= dropped_slots;
8050}
8051
8053 if (temp_slots_ > 0) {
8055 __ addq(rsp, Immediate(temp_slots_ * kSystemPointerSize));
8056 temp_slots_ = 0;
8057 }
8058}
8059
8060void CodeGenerator::MoveToTempLocation(InstructionOperand* source,
8062 // Must be kept in sync with {MoveTempLocationTo}.
8063 DCHECK(!source->IsImmediate());
8064 if ((IsFloatingPoint(rep) &&
8067 // The scratch register for this rep is available.
8068 int scratch_reg_code = !IsFloatingPoint(rep) ? kScratchRegister.code()
8070 AllocatedOperand scratch(LocationOperand::REGISTER, rep, scratch_reg_code);
8071 AssembleMove(source, &scratch);
8072 } else {
8073 // The scratch register is blocked by pending moves. Use the stack instead.
8074 Push(source);
8075 }
8076}
8077
8078void CodeGenerator::MoveTempLocationTo(InstructionOperand* dest,
8080 if ((IsFloatingPoint(rep) &&
8083 int scratch_reg_code = !IsFloatingPoint(rep) ? kScratchRegister.code()
8085 AllocatedOperand scratch(LocationOperand::REGISTER, rep, scratch_reg_code);
8086 AssembleMove(&scratch, dest);
8087 } else {
8088 Pop(dest, rep);
8089 }
8090 move_cycle_ = MoveCycleState();
8091}
8092
8093void CodeGenerator::SetPendingMove(MoveOperands* move) {
8094 MoveType::Type move_type =
8095 MoveType::InferMove(&move->source(), &move->destination());
8096 if (move_type == MoveType::kConstantToStack) {
8097 X64OperandConverter g(this, nullptr);
8098 Constant src = g.ToConstant(&move->source());
8099 if (move->destination().IsStackSlot() &&
8100 (!RelocInfo::IsNoInfo(src.rmode()) ||
8101 (src.type() != Constant::kInt32 && src.type() != Constant::kInt64))) {
8103 }
8104 } else if (move_type == MoveType::kStackToStack) {
8105 if (move->source().IsFPLocationOperand()) {
8107 } else {
8109 }
8110 }
8111}
8112
8113namespace {
8114
8115bool Is32BitOperand(InstructionOperand* operand) {
8116 DCHECK(operand->IsStackSlot() || operand->IsRegister());
8118 return mr == MachineRepresentation::kWord32 ||
8121}
8122
8123// When we need only 32 bits, move only 32 bits. Benefits:
8124// - Save a byte here and there (depending on the destination
8125// register; "movl eax, ..." is smaller than "movq rax, ...").
8126// - Safeguard against accidental decompression of compressed slots.
8127// We must check both {source} and {destination} to be 32-bit values,
8128// because treating 32-bit sources as 64-bit values can be perfectly
8129// fine as a result of virtual register renaming (to avoid redundant
8130// explicit zero-extensions that also happen implicitly).
8131bool Use32BitMove(InstructionOperand* source, InstructionOperand* destination) {
8132 return Is32BitOperand(source) && Is32BitOperand(destination);
8133}
8134
8135} // namespace
8136
8137void CodeGenerator::AssembleMove(InstructionOperand* source,
8138 InstructionOperand* destination) {
8139 X64OperandConverter g(this, nullptr);
8140 // Helper function to write the given constant to the dst register.
8141 // If a move type needs the scratch register, this also needs to be recorded
8142 // in {SetPendingMove} to avoid conflicts with the gap resolver.
8143 auto MoveConstantToRegister = [&](Register dst, Constant src) {
8144 switch (src.type()) {
8145 case Constant::kInt32: {
8146 int32_t value = src.ToInt32();
8147 if (value == 0 && RelocInfo::IsNoInfo(src.rmode())) {
8148 __ xorl(dst, dst);
8149 } else {
8150 __ movl(dst, Immediate(value, src.rmode()));
8151 }
8152 break;
8153 }
8154 case Constant::kInt64:
8155 if (RelocInfo::IsNoInfo(src.rmode())) {
8156 __ Move(dst, src.ToInt64());
8157 } else {
8158 __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
8159 }
8160 break;
8161 case Constant::kFloat32:
8162 __ MoveNumber(dst, src.ToFloat32());
8163 break;
8164 case Constant::kFloat64:
8165 __ MoveNumber(dst, src.ToFloat64().value());
8166 break;
8168 __ Move(dst, src.ToExternalReference());
8169 break;
8170 case Constant::kHeapObject: {
8171 Handle<HeapObject> src_object = src.ToHeapObject();
8173 if (IsMaterializableFromRoot(src_object, &index)) {
8174 __ LoadRoot(dst, index);
8175 } else {
8176 __ Move(dst, src_object);
8177 }
8178 break;
8179 }
8181 Handle<HeapObject> src_object = src.ToHeapObject();
8183 if (IsMaterializableFromRoot(src_object, &index)) {
8184 __ LoadTaggedRoot(dst, index);
8185 } else {
8186 __ Move(dst, src_object, RelocInfo::COMPRESSED_EMBEDDED_OBJECT);
8187 }
8188 break;
8189 }
8191 UNREACHABLE(); // TODO(dcarney): load of labels on x64.
8192 }
8193 };
8194 // Helper function to write the given constant to the stack.
8195 auto MoveConstantToSlot = [&](Operand dst, Constant src) {
8196 if (RelocInfo::IsNoInfo(src.rmode())) {
8197 switch (src.type()) {
8198 case Constant::kInt32:
8199 __ Move(dst, src.ToInt32());
8200 return;
8201 case Constant::kInt64:
8202 __ Move(dst, src.ToInt64());
8203 return;
8204 default:
8205 break;
8206 }
8207 }
8208 MoveConstantToRegister(kScratchRegister, src);
8209 __ movq(dst, kScratchRegister);
8210 };
8211
8212 if (v8_flags.trace_turbo_stack_accesses) {
8214 }
8215
8216 // Dispatch on the source and destination operand kinds.
8217 switch (MoveType::InferMove(source, destination)) {
8219 if (source->IsRegister()) {
8220 DCHECK(destination->IsRegister());
8221 if (Use32BitMove(source, destination)) {
8222 __ movl(g.ToRegister(destination), g.ToRegister(source));
8223 } else {
8224 __ movq(g.ToRegister(destination), g.ToRegister(source));
8225 }
8226 } else {
8227 DCHECK(source->IsFPRegister());
8231 CpuFeatureScope avx_scope(masm(), AVX);
8232 // Whether the ymm source should be used as a xmm.
8233 if (source->IsSimd256Register() && destination->IsSimd128Register()) {
8234 __ vmovapd(g.ToSimd128Register(destination),
8235 g.ToSimd128Register(source));
8236 } else {
8237 __ vmovapd(g.ToSimd256Register(destination),
8238 g.ToSimd256Register(source));
8239 }
8240 } else {
8241 __ Movapd(g.ToDoubleRegister(destination),
8242 g.ToDoubleRegister(source));
8243 }
8244 }
8245 return;
8247 Operand dst = g.ToOperand(destination);
8248 if (source->IsRegister()) {
8249 __ movq(dst, g.ToRegister(source));
8250 } else {
8251 DCHECK(source->IsFPRegister());
8252 XMMRegister src = g.ToDoubleRegister(source);
8256 __ Movups(dst, src);
8257 } else if (rep == MachineRepresentation::kSimd256) {
8258 CpuFeatureScope avx_scope(masm(), AVX);
8259 // Whether the ymm source should be used as a xmm.
8260 if (source->IsSimd256Register() &&
8261 destination->IsSimd128StackSlot()) {
8262 __ vmovups(dst, g.ToSimd128Register(source));
8263 } else {
8264 __ vmovups(dst, g.ToSimd256Register(source));
8265 }
8266 } else {
8267 __ Movsd(dst, src);
8268 }
8269 }
8270 return;
8271 }
8273 Operand src = g.ToOperand(source);
8274 if (source->IsStackSlot()) {
8275 if (Use32BitMove(source, destination)) {
8276 __ movl(g.ToRegister(destination), src);
8277 } else {
8278 __ movq(g.ToRegister(destination), src);
8279 }
8280 } else {
8281 DCHECK(source->IsFPStackSlot());
8282 XMMRegister dst = g.ToDoubleRegister(destination);
8286 __ Movups(dst, src);
8287 } else if (rep == MachineRepresentation::kSimd256) {
8288 CpuFeatureScope avx_scope(masm(), AVX);
8289 if (source->IsSimd256StackSlot() &&
8290 destination->IsSimd128Register()) {
8291 __ vmovups(g.ToSimd128Register(destination), src);
8292 } else {
8293 __ vmovups(g.ToSimd256Register(destination), src);
8294 }
8295 } else {
8296 __ Movsd(dst, src);
8297 }
8298 }
8299 return;
8300 }
8302 Operand src = g.ToOperand(source);
8303 Operand dst = g.ToOperand(destination);
8304 if (source->IsStackSlot()) {
8305 // Spill on demand to use a temporary register for memory-to-memory
8306 // moves.
8307 if (Use32BitMove(source, destination)) {
8308 __ movl(kScratchRegister, src);
8309 } else {
8310 __ movq(kScratchRegister, src);
8311 }
8312 // Always write the full 64-bit to avoid leaving stale bits in the upper
8313 // 32-bit on the stack.
8314 __ movq(dst, kScratchRegister);
8315 } else {
8319 __ Movups(kScratchDoubleReg, src);
8320 __ Movups(dst, kScratchDoubleReg);
8321 } else if (rep == MachineRepresentation::kSimd256) {
8322 CpuFeatureScope avx_scope(masm(), AVX);
8323 if (source->IsSimd256StackSlot() &&
8324 destination->IsSimd128StackSlot()) {
8325 __ vmovups(kScratchDoubleReg, src);
8326 __ vmovups(dst, kScratchDoubleReg);
8327 } else {
8328 __ vmovups(kScratchSimd256Reg, src);
8329 __ vmovups(dst, kScratchSimd256Reg);
8330 }
8331 } else {
8332 __ Movsd(kScratchDoubleReg, src);
8333 __ Movsd(dst, kScratchDoubleReg);
8334 }
8335 }
8336 return;
8337 }
8339 Constant src = g.ToConstant(source);
8340 if (destination->IsRegister()) {
8341 MoveConstantToRegister(g.ToRegister(destination), src);
8342 } else {
8343 DCHECK(destination->IsFPRegister());
8344 XMMRegister dst = g.ToDoubleRegister(destination);
8345 if (src.type() == Constant::kFloat32) {
8346 // TODO(turbofan): Can we do better here?
8347 __ Move(dst, base::bit_cast<uint32_t>(src.ToFloat32()));
8348 } else {
8349 DCHECK_EQ(src.type(), Constant::kFloat64);
8350 __ Move(dst, src.ToFloat64().AsUint64());
8351 }
8352 }
8353 return;
8354 }
8356 Constant src = g.ToConstant(source);
8357 Operand dst = g.ToOperand(destination);
8358 if (destination->IsStackSlot()) {
8359 MoveConstantToSlot(dst, src);
8360 } else {
8361 DCHECK(destination->IsFPStackSlot());
8362 if (src.type() == Constant::kFloat32) {
8363 __ movl(dst, Immediate(base::bit_cast<uint32_t>(src.ToFloat32())));
8364 } else {
8365 DCHECK_EQ(src.type(), Constant::kFloat64);
8366 __ Move(dst, src.ToFloat64().AsUint64());
8367 }
8368 }
8369 return;
8370 }
8371 }
8372 UNREACHABLE();
8373}
8374
8375void CodeGenerator::AssembleSwap(InstructionOperand* source,
8376 InstructionOperand* destination) {
8377 if (v8_flags.trace_turbo_stack_accesses) {
8380 }
8381
8382 X64OperandConverter g(this, nullptr);
8383 // Dispatch on the source and destination operand kinds. Not all
8384 // combinations are possible.
8385 switch (MoveType::InferSwap(source, destination)) {
8387 if (source->IsRegister()) {
8388 Register src = g.ToRegister(source);
8389 Register dst = g.ToRegister(destination);
8390 if (Use32BitMove(source, destination)) {
8391 __ movl(kScratchRegister, src);
8392 __ movl(src, dst);
8393 __ movl(dst, kScratchRegister);
8394 } else {
8395 __ movq(kScratchRegister, src);
8396 __ movq(src, dst);
8397 __ movq(dst, kScratchRegister);
8398 }
8399 } else {
8400 DCHECK(source->IsFPRegister());
8404 YMMRegister src = g.ToSimd256Register(source);
8405 YMMRegister dst = g.ToSimd256Register(destination);
8406 CpuFeatureScope avx_scope(masm(), AVX);
8407 __ vmovapd(kScratchSimd256Reg, src);
8408 __ vmovapd(src, dst);
8409 __ vmovapd(dst, kScratchSimd256Reg);
8410
8411 } else {
8412 XMMRegister src = g.ToDoubleRegister(source);
8413 XMMRegister dst = g.ToDoubleRegister(destination);
8414 __ Movapd(kScratchDoubleReg, src);
8415 __ Movapd(src, dst);
8416 __ Movapd(dst, kScratchDoubleReg);
8417 }
8418 }
8419 return;
8420 }
8422 if (source->IsRegister()) {
8423 Register src = g.ToRegister(source);
8424 Operand dst = g.ToOperand(destination);
8425 __ movq(kScratchRegister, src);
8426 __ movq(src, dst);
8427 __ movq(dst, kScratchRegister);
8428 } else {
8429 DCHECK(source->IsFPRegister());
8430 Operand dst = g.ToOperand(destination);
8434 XMMRegister src = g.ToDoubleRegister(source);
8435 __ Movups(kScratchDoubleReg, src);
8436 __ Movups(src, dst);
8437 __ Movups(dst, kScratchDoubleReg);
8438 } else if (rep == MachineRepresentation::kSimd256) {
8439 YMMRegister src = g.ToSimd256Register(source);
8440 CpuFeatureScope avx_scope(masm(), AVX);
8441 __ vmovups(kScratchSimd256Reg, src);
8442 __ vmovups(src, dst);
8443 __ vmovups(dst, kScratchSimd256Reg);
8444 } else {
8445 XMMRegister src = g.ToDoubleRegister(source);
8446 __ Movsd(kScratchDoubleReg, src);
8447 __ Movsd(src, dst);
8448 __ Movsd(dst, kScratchDoubleReg);
8449 }
8450 }
8451 return;
8452 }
8454 Operand src = g.ToOperand(source);
8455 Operand dst = g.ToOperand(destination);
8459 // Without AVX, misaligned reads and writes will trap. Move using the
8460 // stack, in two parts.
8461 // The XOR trick can be used if AVX is supported, but it needs more
8462 // instructions, and may introduce performance penalty if the memory
8463 // reference splits a cache line.
8464 __ movups(kScratchDoubleReg, dst); // Save dst in scratch register.
8465 __ pushq(src); // Then use stack to copy src to destination.
8468 __ popq(dst);
8471 __ pushq(g.ToOperand(source, kSystemPointerSize));
8474 __ popq(g.ToOperand(destination, kSystemPointerSize));
8477 __ movups(src, kScratchDoubleReg);
8478 } else if (rep == MachineRepresentation::kSimd256) {
8479 // Use the XOR trick to swap without a temporary. The xorps may read
8480 // from unaligned address, causing a slowdown, but swaps
8481 // between slots should be rare.
8482 __ vmovups(kScratchSimd256Reg, src);
8484 dst); // scratch contains src ^ dst.
8485 __ vmovups(src, kScratchSimd256Reg);
8487 dst); // scratch contains src.
8488 __ vmovups(dst, kScratchSimd256Reg);
8490 src); // scratch contains dst.
8491 __ vmovups(src, kScratchSimd256Reg);
8492 } else {
8494 __ movq(tmp, dst);
8495 __ pushq(src); // Then use stack to copy src to destination.
8498 __ popq(dst);
8501 __ movq(src, tmp);
8502 }
8503 return;
8504 }
8505 default:
8506 UNREACHABLE();
8507 }
8508}
8509
8510void CodeGenerator::AssembleJumpTable(base::Vector<Label*> targets) {
8511#ifdef V8_ENABLE_BUILTIN_JUMP_TABLE_SWITCH
8512 // For builtins, the value in table is `target_address - table_address`.
8513 // The reason is that the builtins code position may be changed so the table
8514 // value should be position independent.
8516 int table_pos = __ pc_offset();
8517
8518 for (auto* target : targets) {
8519 __ WriteBuiltinJumpTableEntry(target, table_pos);
8520 }
8521 return;
8522 }
8523
8524#endif // V8_ENABLE_BUILTIN_JUMP_TABLE_SWITCH
8525
8526 // For non-builtins, the value in table is just the target absolute address,
8527 // it's position dependent.
8528 for (size_t index = 0; index < targets.size(); ++index) {
8529 __ dq(targets[index]);
8530 }
8531}
8532
8533#undef __
8534
8535} // namespace v8::internal::compiler
Isolate * isolate_
friend Zone
Definition asm-types.cc:195
#define one
constexpr int kReturnAddressStackSlotCount
#define Assert(condition)
interpreter::OperandScale scale
Definition builtins.cc:44
static constexpr T decode(U value)
Definition bit-field.h:66
void j(Condition cc, Label *L, Label::Distance distance=Label::kFar)
static constexpr int kIntraSegmentJmpInstrSize
static constexpr bool IsBuiltinId(Builtin builtin)
Definition builtins.h:128
static constexpr int kFixedSlotCountAboveFp
static constexpr int kFixedFrameSizeAboveFp
static bool IsSupported(CpuFeature f)
static V8_EXPORT_PRIVATE ExternalReference address_of_load_from_stack_count(const char *function_name)
static V8_EXPORT_PRIVATE ExternalReference isolate_address()
static V8_EXPORT_PRIVATE ExternalReference address_of_store_to_stack_count(const char *function_name)
Bootstrapper * bootstrapper()
Definition isolate.h:1178
RootsTable & roots_table()
Definition isolate.h:1250
uint64_t * stress_deopt_count_address()
Definition isolate.h:1745
Tagged_t ReadOnlyRootPtr(RootIndex index)
void JumpIfEqual(Register x, int32_t y, Label *dest)
void JumpIfLessThan(Register x, int32_t y, Label *dest)
static constexpr MainThreadFlags kIncrementalMarking
static constexpr MainThreadFlags kPointersFromHereAreInterestingMask
constexpr void set(RegisterT reg)
constexpr bool is_empty() const
constexpr int8_t code() const
static constexpr bool IsNoInfo(Mode mode)
Definition reloc-info.h:257
bool IsRootHandle(IndirectHandle< T > handle, RootIndex *index) const
Definition roots-inl.h:65
static constexpr Tagged< Smi > FromInt(int value)
Definition smi.h:38
static constexpr int32_t TypeToMarker(Type type)
Definition frames.h:196
static constexpr int kFrameTypeOffset
static constexpr bool kUninterestingPagesCanBeSkipped
static constexpr YMMRegister from_code(int code)
static constexpr YMMRegister from_xmm(XMMRegister xmm)
T * New(Args &&... args)
Definition zone.h:114
base::Vector< T > AllocateVector(size_t length)
Definition zone.h:136
DoubleRegList CalleeSavedFPRegisters() const
Definition linkage.h:303
static Type InferSwap(InstructionOperand *source, InstructionOperand *destination)
static Type InferMove(InstructionOperand *source, InstructionOperand *destination)
void MoveToTempLocation(InstructionOperand *src, MachineRepresentation rep) final
void AssembleTailCallAfterGap(Instruction *instr, int first_unused_stack_slot)
void AssembleReturn(InstructionOperand *pop)
void AssembleTailCallBeforeGap(Instruction *instr, int first_unused_stack_slot)
FrameAccessState * frame_access_state() const
CodeGenResult AssembleArchInstruction(Instruction *instr)
static constexpr int kBinarySearchSwitchMinimalCases
DeoptimizationExit * BuildTranslation(Instruction *instr, int pc_offset, size_t frame_state_offset, size_t immediate_args_count, OutputFrameStateCombine state_combine)
void AssembleArchBinarySearchSwitch(Instruction *instr)
void AssembleArchJumpRegardlessOfAssemblyOrder(RpoNumber target)
static void GetPushCompatibleMoves(Instruction *instr, PushTypeFlags push_type, ZoneVector< MoveOperands * > *pushes)
void AssembleArchBoolean(Instruction *instr, FlagsCondition condition)
void IncrementStackAccessCounter(InstructionOperand *source, InstructionOperand *destination)
void AssembleJumpTable(base::Vector< Label * > targets)
void AssembleArchBranch(Instruction *instr, BranchInfo *branch)
void AssembleMove(InstructionOperand *source, InstructionOperand *destination) final
void SetPendingMove(MoveOperands *move) final
bool ShouldApplyOffsetToStackCheck(Instruction *instr, uint32_t *offset)
base::Flags< PushTypeFlag > PushTypeFlags
void RecordSafepoint(ReferenceMap *references, int pc_offset=0)
void AssembleArchBinarySearchSwitchRange(Register input, RpoNumber def_block, std::pair< int32_t, Label * > *begin, std::pair< int32_t, Label * > *end)
void PrepareForDeoptimizationExits(ZoneDeque< DeoptimizationExit * > *exits)
void AssembleArchTableSwitch(Instruction *instr)
bool IsMaterializableFromRoot(Handle< HeapObject > object, RootIndex *index_return)
void AssembleArchConditionalBranch(Instruction *instr, BranchInfo *branch)
AllocatedOperand Push(InstructionOperand *src) final
void MoveTempLocationTo(InstructionOperand *dst, MachineRepresentation rep) final
void AssemblePlaceHolderForLazyDeopt(Instruction *instr)
void AssembleArchDeoptBranch(Instruction *instr, BranchInfo *branch)
void RecordCallPosition(Instruction *instr)
void AssembleSwap(InstructionOperand *source, InstructionOperand *destination) final
Label * AddJumpTable(base::Vector< Label * > targets)
void AssembleArchConditionalBoolean(Instruction *instr)
void RecordDeoptInfo(Instruction *instr, int pc_offset)
OptimizedCompilationInfo * info() const
void AssembleArchSelect(Instruction *instr, FlagsCondition condition)
void Pop(InstructionOperand *src, MachineRepresentation rep) final
FrameOffset GetFrameOffset(int spill_slot) const
Definition frame.cc:61
Constant ToConstant(InstructionOperand *op) const
bool HasCallDescriptorFlag(CallDescriptor::Flag flag) const
const InstructionOperand * Output() const
InstructionCode opcode() const
const InstructionOperand * InputAt(size_t i) const
AddressingMode addressing_mode() const
CallDescriptor * GetIncomingDescriptor() const
Definition linkage.h:405
MachineRepresentation representation() const
static LocationOperand * cast(InstructionOperand *op)
static OutputFrameStateCombine Ignore()
void MaybeIncreaseBaseOffsetAt(int pc_offset, int base_delta)
static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode)
Operand SlotToOperand(int slot_index, int extra=0)
Operand InputOperand(size_t index, int extra=0)
X64OperandConverter(CodeGenerator *gen, Instruction *instr)
Operand ToOperand(InstructionOperand *op, int extra=0)
Immediate ToImmediate(InstructionOperand *operand)
#define ASSEMBLE_SHIFT(asm_instr, width)
IndirectPointerTag indirect_pointer_tag_
UnwindingInfoWriter *const unwinding_info_writer_
#define ATOMIC_BINOP_CASE(op, inst)
Zone * zone_
T const result_
#define ASSEMBLE_IEEE754_UNOP(name)
Register const object_
#define ASSEMBLE_IEEE754_BINOP(name)
Register const value_
RecordWriteMode const mode_
Operand const operand_
#define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode)
Register const scratch1_
#define ASSEMBLE_BINOP(asm_instr)
#define ASSEMBLE_SIMD_ALL_TRUE(opcode)
XMMRegister const input_
#define ASSEMBLE_MOVX(mov_instr)
Register const scratch0_
#define ASSEMBLE_COMPARE(asm_instr)
#define ASSEMBLE_SIMD_SHIFT(opcode, width)
#define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, SSELevel, imm)
#define ASSEMBLE_SIMD_F16x8_RELOP(instr)
#define ASSEMBLE_SEQ_CST_STORE(rep)
#define ASSEMBLE_SIMD256_SHIFT(opcode, width)
#define ASSEMBLE_SIMD_INSTR(opcode, dst_operand, index)
#define ASSEMBLE_SIMD_F16x8_BINOP(instr)
#define ASSEMBLE_SIMD256_BINOP(opcode, cpu_feature)
#define ASSEMBLE_UNOP(asm_instr)
#define ASSEMBLE_AVX_BINOP(asm_instr)
#define ASSEMBLE_TEST(asm_instr)
#define ASSEMBLE_SSE_UNOP(asm_instr)
#define ASSEMBLE_SIMD_IMM_INSTR(opcode, dst_operand, index, imm)
#define ASSEMBLE_SSE_BINOP(asm_instr)
#define ASSEMBLE_SIMD_BINOP(opcode)
#define ASSEMBLE_MULT(asm_instr)
#define ASSEMBLE_PINSR(ASM_INSTR)
const int size_
Definition assembler.cc:132
#define COMPRESS_POINTERS_BOOL
Definition globals.h:99
#define V8_JS_LINKAGE_INCLUDES_DISPATCH_HANDLE_BOOL
Definition globals.h:161
int end
Linkage * linkage
int32_t offset
Instruction * instr
ZoneVector< RpoNumber > & result
Builtin builtin
LiftoffRegister reg
int pc_offset
LiftoffRegList regs_to_save
uint32_t const mask
SetIsolateDataSlots
InstructionOperand source
InstructionOperand destination
v8::SourceLocation SourceLocation
int int32_t
Definition unicode.cc:40
signed_type NegateWithWraparound(signed_type a)
V8_INLINE Dest bit_cast(Source const &source)
Definition macros.h:95
auto Reversed(T &t)
Definition iterator.h:105
uintptr_t Address
Definition memory.h:13
void PushAll(BaselineAssembler *basm, Args... args)
static bool HasImmediateInput(Instruction *instr, size_t index)
bool IsMacroFused(FirstMacroFusionInstKind first_kind, SecondMacroFusionInstKind second_kind)
static bool HasRegisterInput(Instruction *instr, size_t index)
bool ShouldClearOutputRegisterBeforeInstruction(CodeGenerator *g, Instruction *instr)
static Condition FlagsConditionToCondition(FlagsCondition condition)
bool ShouldAlignForJCCErratum(Instruction *instr, FirstMacroFusionInstKind first_kind)
SecondMacroFusionInstKind GetSecondMacroFusionInstKind(FlagsCondition condition)
Node::Uses::const_iterator begin(const Node::Uses &uses)
Definition node.h:708
void And(LiftoffAssembler *lasm, Register dst, Register lhs, Register rhs)
constexpr DoubleRegister kFpReturnRegisters[]
constexpr Register kGpParamRegisters[]
constexpr DoubleRegister kFpParamRegisters[]
constexpr Register kGpReturnRegisters[]
constexpr Register kRootRegister
RegListBase< DoubleRegister > DoubleRegList
Definition reglist-arm.h:15
constexpr int kTaggedSize
Definition globals.h:542
constexpr int kInt64Size
Definition globals.h:402
constexpr int kSimd128Size
Definition globals.h:706
V8_EXPORT_PRIVATE constexpr int ElementSizeInPointers(MachineRepresentation rep)
constexpr int kInt16Size
Definition globals.h:398
DwVfpRegister DoubleRegister
constexpr DoubleRegister kScratchDoubleReg
RegListBase< Register > RegList
Definition reglist-arm.h:14
Operand FieldOperand(Register object, int offset)
V8_INLINE constexpr bool IsValidIndirectPointerTag(IndirectPointerTag tag)
Flag flags[]
Definition flags.cc:3797
too high values may cause the compiler to set high thresholds for inlining to as much as possible avoid inlined allocation of objects that cannot escape trace load stores from virtual maglev objects use TurboFan fast string builder analyze liveness of environment slots and zap dead values trace TurboFan load elimination emit data about basic block usage in builtins to this enable builtin reordering when run mksnapshot flag for emit warnings when applying builtin profile data verify register allocation in TurboFan randomly schedule instructions to stress dependency tracking enable store store elimination in TurboFan rewrite far to near simulate GC compiler thread race related to allow float parameters to be passed in simulator mode JS Wasm Run additional turbo_optimize_inlined_js_wasm_wrappers enable experimental feedback collection in generic lowering enable Turboshaft s WasmLoadElimination enable Turboshaft s low level load elimination for JS enable Turboshaft s escape analysis for string concatenation use enable Turbolev features that we want to ship in the not too far future trace individual Turboshaft reduction steps trace intermediate Turboshaft reduction steps invocation count threshold for early optimization Enables optimizations which favor memory size over execution speed Enables sampling allocation profiler with X as a sample interval min size of a semi the new space consists of two semi spaces max size of the Collect garbage after Collect garbage after keeps maps alive for< n > old space garbage collections print one detailed trace line in allocation gc speed threshold for starting incremental marking via a task in percent of available threshold for starting incremental marking immediately in percent of available Use a single schedule for determining a marking schedule between JS and C objects schedules the minor GC task with kUserVisible priority max worker number of concurrent for NumberOfWorkerThreads start background threads that allocate memory concurrent_array_buffer_sweeping use parallel threads to clear weak refs in the atomic pause trace progress of the incremental marking trace object counts and memory usage report a tick only when allocated zone memory changes by this amount TracingFlags::gc_stats TracingFlags::gc_stats track native contexts that are expected to be garbage collected verify heap pointers before and after GC memory reducer runs GC with ReduceMemoryFootprint flag Maximum number of memory reducer GCs scheduled Old gen GC speed is computed directly from gc tracer counters Perform compaction on full GCs based on V8 s default heuristics Perform compaction on every full GC Perform code space compaction when finalizing a full GC with stack Stress GC compaction to flush out bugs with moving objects flush of baseline code when it has not been executed recently Use time base code flushing instead of age Use a progress bar to scan large objects in increments when incremental marking is active force incremental marking for small heaps and run it more often force marking at random points between and force scavenge at random points between and reclaim otherwise unreachable unmodified wrapper objects when possible less compaction in non memory reducing mode use high priority threads for concurrent Marking Test mode only flag It allows an unit test to select evacuation candidates use incremental marking for CppHeap cppheap_concurrent_marking c value for membalancer A special constant to balance between memory and space tradeoff The smaller the more memory it uses enable use of SSE4 instructions if available enable use of AVX VNNI instructions if available enable use of POPCNT instruction if available force all emitted branches to be in long mode(MIPS/PPC only)") DEFINE_BOOL(partial_constant_pool
constexpr int kSimd256Size
Definition globals.h:709
constexpr int kSystemPointerSize
Definition globals.h:410
constexpr bool IsFloatingPoint(MachineRepresentation rep)
constexpr unsigned kQuadWordSize
constexpr Register kReturnRegister0
constexpr Register kScratchRegister
constexpr int kInt32Size
Definition globals.h:401
constexpr Register kWasmImplicitArgRegister
V8_EXPORT_PRIVATE bool AreAliased(const CPURegister &reg1, const CPURegister &reg2, const CPURegister &reg3=NoReg, const CPURegister &reg4=NoReg, const CPURegister &reg5=NoReg, const CPURegister &reg6=NoReg, const CPURegister &reg7=NoReg, const CPURegister &reg8=NoReg)
V8_EXPORT_PRIVATE FlagValues v8_flags
constexpr Register kJavaScriptCallCodeStartRegister
constexpr Register kPtrComprCageBaseRegister
constexpr VFPRoundingMode kRoundToZero
return value
Definition map-inl.h:893
V8_EXPORT_PRIVATE constexpr int ElementSizeInBytes(MachineRepresentation)
constexpr Register kCArgRegs[]
constexpr int kDoubleSize
Definition globals.h:407
constexpr Register kJavaScriptCallDispatchHandleRegister
const uint32_t kClearedWeakHeapObjectLower32
Definition globals.h:981
constexpr int kInt8Size
Definition globals.h:393
static int FrameSlotToFPOffset(int slot)
constexpr YMMRegister kScratchSimd256Reg
Local< T > Handle
BodyGen *const gen_
BodyGen * gen
ro::BitSet tagged_slots
#define DCHECK_LE(v1, v2)
Definition logging.h:490
#define CHECK(condition)
Definition logging.h:124
#define CHECK_LE(lhs, rhs)
#define DCHECK_NOT_NULL(val)
Definition logging.h:492
#define DCHECK_IMPLIES(v1, v2)
Definition logging.h:493
#define DCHECK_NE(v1, v2)
Definition logging.h:486
#define DCHECK_GE(v1, v2)
Definition logging.h:488
#define DCHECK(condition)
Definition logging.h:482
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
constexpr bool IsAligned(T value, U alignment)
Definition macros.h:403
uint64_t make_uint64(uint32_t high, uint32_t low)
Definition macros.h:365
#define V8_STATIC_ROOTS_BOOL
Definition v8config.h:1001
#define V8_UNLIKELY(condition)
Definition v8config.h:660