5#ifndef V8_WASM_BASELINE_IA32_LIFTOFF_ASSEMBLER_IA32_INL_H_
6#define V8_WASM_BASELINE_IA32_LIFTOFF_ASSEMBLER_IA32_INL_H_
23#define RETURN_FALSE_IF_MISSING_CPU_FEATURE(name) \
24 if (!CpuFeatures::IsSupported(name)) return false; \
25 CpuFeatureScope feature(this, name);
34 return Operand(
offset > 0 ? ebp : esp, -
offset + half_offset);
60 assm->
mov(dst.
gp(), src);
89 assm->
mov_w(dst, src.gp());
94 assm->
mov(dst, src.gp());
97 assm->
mov(dst, src.low_gp());
101 assm->
movss(dst, src.fp());
104 assm->
movsd(dst, src.fp());
107 assm->
movdqu(dst, src.fp());
156 assm->
sar(
reg.high_gp(), 31);
201 DCHECK(!available.is_empty());
241 kLiftoffFrameSetupFunctionReg) ==
245 LoadConstant(LiftoffRegister(kLiftoffFrameSetupFunctionReg),
251 int stack_param_delta) {
253 push(Operand(ebp, 4));
254 push(Operand(ebp, 0));
257 Register scratch = eax;
259 const int slot_count = num_callee_stack_params + 2;
260 for (
int i = slot_count;
i > 0; --
i) {
261 mov(scratch, Operand(esp,
i * 4));
262 mov(Operand(ebp, (
i - stack_param_delta - 1) * 4), scratch);
267 lea(esp, Operand(ebp, -stack_param_delta * 4));
274 int offset, SafepointTableBuilder* safepoint_table_builder,
275 bool feedback_vector_slot,
size_t stack_param_slots) {
281 if (feedback_vector_slot) {
288 constexpr int kAvailableSpace = 64;
296 patching_assembler.sub_sp_32(frame_size);
325 if (frame_size <
v8_flags.stack_size * 1024) {
329 mov(stack_limit, esp);
330 sub(stack_limit, Immediate(frame_size));
336 if (
v8_flags.experimental_wasm_growable_stacks) {
339 regs_to_save.set(WasmHandleStackOverflowDescriptor::FrameBaseRegister());
344 Immediate(frame_size));
345 mov(WasmHandleStackOverflowDescriptor::FrameBaseRegister(), ebp);
346 add(WasmHandleStackOverflowDescriptor::FrameBaseRegister(),
347 Immediate(
static_cast<int32_t>(
351 safepoint_table_builder->DefineSafepoint(
this);
354 wasm_call(
static_cast<intptr_t
>(Builtin::kWasmStackOverflow),
357 safepoint_table_builder->DefineSafepoint(
this);
393 const FreezeCacheState& frozen) {
395 liftoff::CacheStatePreservingTempRegisters temps{
this};
396 Register budget_array = temps.Acquire();
405 WasmTrustedInstanceData::kTieringBudgetArrayOffset);
408 int array_offset =
kInt32Size * declared_func_index;
409 sub(Operand{budget_array, array_offset}, Immediate(budget_used));
415 if (!
v8_flags.experimental_wasm_growable_stacks) {
418 Label done, call_runtime;
423 mov(old_fp.gp(), ebp);
463 switch (value.type().kind()) {
468 int32_t low_word = value.to_i64();
469 int32_t high_word = value.to_i64() >> 32;
499 Operand src{instance,
offset};
528 uint32_t* protected_load_pc,
532 Load(LiftoffRegister(dst), src_addr, offset_reg,
533 static_cast<uint32_t
>(offset_imm), LoadType::kI32Load, protected_load_pc,
534 false,
false, needs_shift);
544 int32_t offset_imm) {
545 mov(dst, Operand(src_addr, offset_imm));
550 int32_t offset_imm, Register src,
551 LiftoffRegList pinned,
552 uint32_t* protected_store_pc,
553 SkipWriteBarrier skip_write_barrier) {
555 DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
559 if (protected_store_pc) *protected_store_pc =
pc_offset();
563 if (skip_write_barrier ||
v8_flags.disable_write_barriers)
return;
565 liftoff::CacheStatePreservingTempRegisters temps{
this, pinned};
575 lea(scratch, dst_op);
577 StubCallMode::kCallWasmRuntimeStub);
582 Register offset_reg, uint32_t offset_imm,
583 LoadType type, uint32_t* protected_load_pc,
587 DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
591 Operand src_op = offset_reg ==
no_reg ? Operand(src_addr, offset_imm)
592 : Operand(src_addr, offset_reg,
593 scale_factor, offset_imm);
594 if (protected_load_pc) *protected_load_pc =
pc_offset();
596 switch (type.value()) {
597 case LoadType::kI32Load8U:
598 movzx_b(dst.gp(), src_op);
600 case LoadType::kI32Load8S:
601 movsx_b(dst.gp(), src_op);
603 case LoadType::kI64Load8U:
604 movzx_b(dst.low_gp(), src_op);
605 xor_(dst.high_gp(), dst.high_gp());
607 case LoadType::kI64Load8S:
608 movsx_b(dst.low_gp(), src_op);
609 liftoff::SignExtendI32ToI64(this, dst);
611 case LoadType::kI32Load16U:
612 movzx_w(dst.gp(), src_op);
614 case LoadType::kI32Load16S:
615 movsx_w(dst.gp(), src_op);
617 case LoadType::kI64Load16U:
618 movzx_w(dst.low_gp(), src_op);
619 xor_(dst.high_gp(), dst.high_gp());
621 case LoadType::kI64Load16S:
622 movsx_w(dst.low_gp(), src_op);
623 liftoff::SignExtendI32ToI64(this, dst);
625 case LoadType::kI32Load:
626 mov(dst.gp(), src_op);
628 case LoadType::kI64Load32U:
629 mov(dst.low_gp(), src_op);
630 xor_(dst.high_gp(), dst.high_gp());
632 case LoadType::kI64Load32S:
633 mov(dst.low_gp(), src_op);
634 liftoff::SignExtendI32ToI64(this, dst);
636 case LoadType::kI64Load: {
638 Operand upper_src_op =
639 liftoff::MemOperand(src_addr, offset_reg, offset_imm + 4);
642 mov(dst.high_gp(), upper_src_op);
643 mov(dst.low_gp(), src_op);
646 case LoadType::kF32Load:
647 movss(dst.fp(), src_op);
649 case LoadType::kF64Load:
650 movsd(dst.fp(), src_op);
652 case LoadType::kS128Load:
655 case LoadType::kF32LoadF16:
661void LiftoffAssembler::Store(Register dst_addr, Register offset_reg,
662 uint32_t offset_imm, LiftoffRegister src,
663 StoreType type, LiftoffRegList pinned,
664 uint32_t* protected_store_pc,
666 DCHECK_EQ(type.value_type() == kWasmI64, src.is_gp_pair());
668 DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
669 Operand dst_op = liftoff::MemOperand(dst_addr, offset_reg, offset_imm);
670 if (protected_store_pc) *protected_store_pc =
pc_offset();
672 switch (type.value()) {
673 case StoreType::kI64Store8:
676 case StoreType::kI32Store8:
678 if (src.gp().is_byte_register()) {
679 mov_b(dst_op, src.gp());
684 LiftoffRegList pinned_byte = pinned | LiftoffRegList{dst_addr};
685 if (offset_reg !=
no_reg) pinned_byte.set(offset_reg);
686 LiftoffRegList
candidates = liftoff::kByteRegs.MaskOut(pinned_byte);
687 if (cache_state_.has_unused_register(
candidates)) {
689 mov(byte_src, src.gp());
690 mov_b(dst_op, byte_src);
697 mov(byte_src, src.gp());
698 mov_b(dst_op, byte_src);
703 case StoreType::kI64Store16:
706 case StoreType::kI32Store16:
707 mov_w(dst_op, src.gp());
709 case StoreType::kI64Store32:
712 case StoreType::kI32Store:
713 mov(dst_op, src.gp());
715 case StoreType::kI64Store: {
717 Operand upper_dst_op =
718 liftoff::MemOperand(dst_addr, offset_reg, offset_imm + 4);
721 mov(upper_dst_op, src.high_gp());
722 mov(dst_op, src.low_gp());
725 case StoreType::kF32Store:
726 movss(dst_op, src.fp());
728 case StoreType::kF64Store:
729 movsd(dst_op, src.fp());
731 case StoreType::kS128Store:
732 Movdqu(dst_op, src.fp());
734 case StoreType::kF32StoreF16:
740void LiftoffAssembler::AtomicLoad(LiftoffRegister dst, Register src_addr,
741 Register offset_reg, uint32_t offset_imm,
742 LoadType type, LiftoffRegList ,
744 if (type.value() != LoadType::kI64Load) {
745 Load(dst, src_addr, offset_reg, offset_imm, type,
nullptr,
true);
749 DCHECK_EQ(type.value_type() == kWasmI64, dst.is_gp_pair());
750 DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
751 Operand src_op = liftoff::MemOperand(src_addr, offset_reg, offset_imm);
753 movsd(liftoff::kScratchDoubleReg, src_op);
754 Pextrd(dst.low().gp(), liftoff::kScratchDoubleReg, 0);
755 Pextrd(dst.high().gp(), liftoff::kScratchDoubleReg, 1);
758void LiftoffAssembler::AtomicStore(Register dst_addr, Register offset_reg,
759 uint32_t offset_imm, LiftoffRegister src,
760 StoreType type, LiftoffRegList pinned,
762 DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
763 Operand dst_op = liftoff::MemOperand(dst_addr, offset_reg, offset_imm);
766 if (type.value() == StoreType::kI64Store) {
767 auto scratch2 = GetUnusedRegister(kFpReg, pinned).fp();
768 movd(liftoff::kScratchDoubleReg, src.low().gp());
769 movd(scratch2, src.high().gp());
770 Punpckldq(liftoff::kScratchDoubleReg, scratch2);
771 movsd(dst_op, liftoff::kScratchDoubleReg);
774 or_(Operand(esp, 0), Immediate(0));
779 if (src.is_pair()) src = src.low();
782 bool is_byte_store = type.size() == 1;
783 LiftoffRegList src_candidates =
784 is_byte_store ? liftoff::kByteRegs : kGpCacheRegList;
785 pinned = pinned | LiftoffRegList{dst_addr, src};
786 if (offset_reg !=
no_reg) pinned.set(offset_reg);
789 if (!src_candidates.has(src) || cache_state_.is_used(src)) {
793 LiftoffRegList unpinned_candidates = src_candidates.MaskOut(pinned);
794 if (!cache_state_.has_unused_register(unpinned_candidates) &&
795 src_candidates.has(src)) {
798 Register safe_src = GetUnusedRegister(unpinned_candidates).gp();
799 mov(safe_src, src_gp);
804 switch (type.value()) {
805 case StoreType::kI64Store8:
806 case StoreType::kI32Store8:
807 xchg_b(src_gp, dst_op);
809 case StoreType::kI64Store16:
810 case StoreType::kI32Store16:
811 xchg_w(src_gp, dst_op);
813 case StoreType::kI64Store32:
814 case StoreType::kI32Store:
815 xchg(src_gp, dst_op);
834 bool is_64_bit_op = type.value_type() ==
kWasmI64;
836 Register value_reg = is_64_bit_op ? value.low_gp() : value.gp();
839 bool is_byte_store = type.size() == 1;
841 if (offset_reg !=
no_reg) pinned.
set(offset_reg);
844 if (is_byte_store && !liftoff::kByteRegs.has(value_reg)) {
846 __ GetUnusedRegister(liftoff::kByteRegs.MaskOut(pinned)).gp();
847 __ mov(safe_value_reg, value_reg);
848 value_reg = safe_value_reg;
851 Operand dst_op = liftoff::MemOperand(dst_addr, offset_reg, offset_imm);
858 switch (type.value()) {
859 case StoreType::kI64Store8:
860 case StoreType::kI32Store8:
862 __ xchg_b(value_reg, dst_op);
864 __ xadd_b(dst_op, value_reg);
866 __ movzx_b(result_reg, value_reg);
868 case StoreType::kI64Store16:
869 case StoreType::kI32Store16:
871 __ xchg_w(value_reg, dst_op);
873 __ xadd_w(dst_op, value_reg);
875 __ movzx_w(result_reg, value_reg);
877 case StoreType::kI64Store32:
878 case StoreType::kI32Store:
880 __ xchg(value_reg, dst_op);
882 __ xadd(dst_op, value_reg);
884 if (value_reg != result_reg) {
885 __ mov(result_reg, value_reg);
897 Register offset_reg, uint32_t offset_imm,
902 bool is_64_bit_op = type.value_type() ==
kWasmI64;
904 Register value_reg = is_64_bit_op ? value.low_gp() : value.gp();
912 if (offset_reg !=
no_reg) pinned.
set(offset_reg);
913 __ ClearRegister(eax, {&dst_addr, &offset_reg, &value_reg}, pinned);
916 bool is_byte_store = type.size() == 1;
922 "root register is not Liftoff cache register");
928 if (offset_reg !=
no_reg) pinned.
set(offset_reg);
929 scratch =
__ GetUnusedRegister(
kGpReg, pinned).gp();
932 Operand dst_op = liftoff::MemOperand(dst_addr, offset_reg, offset_imm);
934 switch (type.value()) {
935 case StoreType::kI32Store8:
936 case StoreType::kI64Store8: {
938 __ mov_b(eax, dst_op);
941 case StoreType::kI32Store16:
942 case StoreType::kI64Store16: {
944 __ mov_w(eax, dst_op);
947 case StoreType::kI32Store:
948 case StoreType::kI64Store32: {
958 __ mov(scratch, eax);
962 __ and_(scratch, value_reg);
966 __ or_(scratch, value_reg);
970 __ xor_(scratch, value_reg);
979 switch (type.value()) {
980 case StoreType::kI32Store8:
981 case StoreType::kI64Store8: {
982 __ cmpxchg_b(dst_op, scratch);
985 case StoreType::kI32Store16:
986 case StoreType::kI64Store16: {
987 __ cmpxchg_w(dst_op, scratch);
990 case StoreType::kI32Store:
991 case StoreType::kI64Store32: {
992 __ cmpxchg(dst_op, scratch);
1000 if (is_byte_store) {
1003 if (result_reg != eax) {
1004 __ mov(result_reg, eax);
1012 Register offset_reg, uint32_t offset_imm,
1018 "The following code assumes that kRootRegister == ebx");
1022 __ AllocateStackSpace(8);
1025 __ mov(value_op_lo, value.low_gp());
1026 __ mov(value_op_hi, value.high_gp());
1040 __ SpillRegisters(old_hi, old_lo, new_hi,
base,
offset);
1041 if (offset_reg ==
no_reg) {
1042 if (dst_addr !=
base)
__ mov(
base, dst_addr);
1046 if (dst_addr ==
offset || offset_reg ==
base) {
1047 std::swap(dst_addr, offset_reg);
1049 __ ParallelRegisterMove(
1058 __ mov(old_lo, dst_op_lo);
1059 __ mov(old_hi, dst_op_hi);
1062 __ mov(new_lo, old_lo);
1063 __ mov(new_hi, old_hi);
1066 __ add(new_lo, value_op_lo);
1067 __ adc(new_hi, value_op_hi);
1070 __ sub(new_lo, value_op_lo);
1071 __ sbb(new_hi, value_op_hi);
1074 __ and_(new_lo, value_op_lo);
1075 __ and_(new_hi, value_op_hi);
1078 __ or_(new_lo, value_op_lo);
1079 __ or_(new_hi, value_op_hi);
1082 __ xor_(new_lo, value_op_lo);
1083 __ xor_(new_hi, value_op_hi);
1086 __ mov(new_lo, value_op_lo);
1087 __ mov(new_hi, value_op_hi);
1091 __ cmpxchg8b(dst_op_lo);
1100 __ ParallelRegisterMove(
1101 {{
result, LiftoffRegister::ForPair(old_lo, old_hi),
kI64}});
1108 uint32_t offset_imm, LiftoffRegister value,
1109 LiftoffRegister
result, StoreType type,
1111 if (type.value() == StoreType::kI64Store) {
1112 liftoff::AtomicBinop64(
this, liftoff::kAdd, dst_addr, offset_reg,
1113 offset_imm, value,
result);
1117 liftoff::AtomicAddOrSubOrExchange32(
this, liftoff::kAdd, dst_addr, offset_reg,
1118 offset_imm, value,
result, type);
1121void LiftoffAssembler::AtomicSub(Register dst_addr, Register offset_reg,
1122 uint32_t offset_imm, LiftoffRegister value,
1123 LiftoffRegister
result, StoreType type,
1125 if (type.value() == StoreType::kI64Store) {
1126 liftoff::AtomicBinop64(
this, liftoff::kSub, dst_addr, offset_reg,
1127 offset_imm, value,
result);
1130 liftoff::AtomicAddOrSubOrExchange32(
this, liftoff::kSub, dst_addr, offset_reg,
1131 offset_imm, value,
result, type);
1134void LiftoffAssembler::AtomicAnd(Register dst_addr, Register offset_reg,
1135 uint32_t offset_imm, LiftoffRegister value,
1136 LiftoffRegister
result, StoreType type,
1138 if (type.value() == StoreType::kI64Store) {
1139 liftoff::AtomicBinop64(
this, liftoff::kAnd, dst_addr, offset_reg,
1140 offset_imm, value,
result);
1144 liftoff::AtomicBinop32(
this, liftoff::kAnd, dst_addr, offset_reg, offset_imm,
1148void LiftoffAssembler::AtomicOr(Register dst_addr, Register offset_reg,
1149 uint32_t offset_imm, LiftoffRegister value,
1150 LiftoffRegister
result, StoreType type,
1152 if (type.value() == StoreType::kI64Store) {
1153 liftoff::AtomicBinop64(
this, liftoff::kOr, dst_addr, offset_reg, offset_imm,
1158 liftoff::AtomicBinop32(
this, liftoff::kOr, dst_addr, offset_reg, offset_imm,
1162void LiftoffAssembler::AtomicXor(Register dst_addr, Register offset_reg,
1163 uint32_t offset_imm, LiftoffRegister value,
1164 LiftoffRegister
result, StoreType type,
1166 if (type.value() == StoreType::kI64Store) {
1167 liftoff::AtomicBinop64(
this, liftoff::kXor, dst_addr, offset_reg,
1168 offset_imm, value,
result);
1172 liftoff::AtomicBinop32(
this, liftoff::kXor, dst_addr, offset_reg, offset_imm,
1176void LiftoffAssembler::AtomicExchange(Register dst_addr, Register offset_reg,
1177 uint32_t offset_imm,
1178 LiftoffRegister value,
1179 LiftoffRegister
result, StoreType type,
1181 if (type.value() == StoreType::kI64Store) {
1182 liftoff::AtomicBinop64(
this, liftoff::kExchange, dst_addr, offset_reg,
1183 offset_imm, value,
result);
1186 liftoff::AtomicAddOrSubOrExchange32(
this, liftoff::kExchange, dst_addr,
1187 offset_reg, offset_imm, value,
result,
1191void LiftoffAssembler::AtomicCompareExchange(
1192 Register dst_addr, Register offset_reg, uint32_t offset_imm,
1193 LiftoffRegister expected, LiftoffRegister new_value, LiftoffRegister
result,
1194 StoreType type,
bool ) {
1201 if (type.value() != StoreType::kI64Store) {
1202 bool is_64_bit_op = type.value_type() ==
kWasmI64;
1204 Register value_reg = is_64_bit_op ? new_value.low_gp() : new_value.gp();
1205 Register expected_reg = is_64_bit_op ? expected.low_gp() : expected.gp();
1206 Register result_reg = expected_reg;
1211 ClearRegister(eax, {&dst_addr, &value_reg},
1212 LiftoffRegList{dst_addr, value_reg, expected_reg});
1213 if (expected_reg != eax) {
1214 mov(eax, expected_reg);
1218 bool is_byte_store = type.size() == 1;
1219 LiftoffRegList pinned{dst_addr, value_reg, expected_reg};
1222 if (is_byte_store && !liftoff::kByteRegs.has(value_reg)) {
1224 pinned.set(GetUnusedRegister(liftoff::kByteRegs.MaskOut(pinned)))
1226 mov(safe_value_reg, value_reg);
1227 value_reg = safe_value_reg;
1228 pinned.clear(LiftoffRegister(value_reg));
1231 Operand dst_op = Operand(dst_addr, offset_imm);
1234 switch (type.value()) {
1235 case StoreType::kI32Store8:
1236 case StoreType::kI64Store8: {
1237 cmpxchg_b(dst_op, value_reg);
1238 movzx_b(result_reg, eax);
1241 case StoreType::kI32Store16:
1242 case StoreType::kI64Store16: {
1243 cmpxchg_w(dst_op, value_reg);
1244 movzx_w(result_reg, eax);
1247 case StoreType::kI32Store:
1248 case StoreType::kI64Store32: {
1249 cmpxchg(dst_op, value_reg);
1250 if (result_reg != eax) {
1251 mov(result_reg, eax);
1270 "The following code assumes that kRootRegister == ebx");
1284 SpillRegisters(expected_hi, expected_lo, new_hi, address);
1289 mov(new_lo, new_value.low_gp());
1292 ParallelRegisterMove(
1293 {{LiftoffRegister(address), LiftoffRegister(dst_addr),
kI32},
1294 {LiftoffRegister::ForPair(expected_lo, expected_hi), expected,
kI64},
1295 {LiftoffRegister(new_hi), new_value.high(),
kI32}});
1297 Operand dst_op = Operand(address, offset_imm);
1306 ParallelRegisterMove(
1307 {{
result, LiftoffRegister::ForPair(expected_lo, expected_hi),
kI64}});
1310void LiftoffAssembler::AtomicFence() { mfence(); }
1312void LiftoffAssembler::LoadCallerFrameSlot(LiftoffRegister dst,
1313 uint32_t caller_slot_idx,
1319void LiftoffAssembler::LoadReturnStackSlot(LiftoffRegister
reg,
int offset,
1324void LiftoffAssembler::StoreCallerFrameSlot(LiftoffRegister src,
1325 uint32_t caller_slot_idx,
1327 Register frame_pointer) {
1328 liftoff::Store(
this, frame_pointer,
1332void LiftoffAssembler::MoveStackValue(uint32_t dst_offset, uint32_t src_offset,
1339 if (src_offset < dst_offset) {
1341 liftoff::MoveStackValue(
this, liftoff::GetStackSlot(src_offset),
1342 liftoff::GetStackSlot(dst_offset));
1348 liftoff::MoveStackValue(
1355void LiftoffAssembler::Move(Register dst, Register src, ValueKind
kind) {
1374void LiftoffAssembler::Spill(
int offset, LiftoffRegister
reg, ValueKind
kind) {
1375 RecordUsedSpillOffset(
offset);
1376 Operand dst = liftoff::GetStackSlot(
offset);
1384 mov(liftoff::GetHalfStackSlot(
offset, kLowWord),
reg.low_gp());
1385 mov(liftoff::GetHalfStackSlot(
offset, kHighWord),
reg.high_gp());
1388 movss(dst,
reg.fp());
1391 movsd(dst,
reg.fp());
1394 movdqu(dst,
reg.fp());
1401void LiftoffAssembler::Spill(
int offset, WasmValue value) {
1402 RecordUsedSpillOffset(
offset);
1403 Operand dst = liftoff::GetStackSlot(
offset);
1404 switch (value.type().kind()) {
1406 mov(dst, Immediate(value.to_i32()));
1409 int32_t low_word = value.to_i64();
1410 int32_t high_word = value.to_i64() >> 32;
1411 mov(liftoff::GetHalfStackSlot(
offset, kLowWord), Immediate(low_word));
1412 mov(liftoff::GetHalfStackSlot(
offset, kHighWord), Immediate(high_word));
1421void LiftoffAssembler::Fill(LiftoffRegister
reg,
int offset, ValueKind
kind) {
1425void LiftoffAssembler::FillI64Half(Register
reg,
int offset, RegPairHalf half) {
1426 mov(
reg, liftoff::GetHalfStackSlot(
offset, half));
1429void LiftoffAssembler::FillStackSlotsWithZero(
int start,
int size) {
1432 RecordUsedSpillOffset(
start + size);
1438 mov(liftoff::GetHalfStackSlot(
start +
offset, kLowWord), Immediate(0));
1448 lea(edi, liftoff::GetStackSlot(
start + size));
1451 mov(ecx, Immediate(size / 4));
1459void LiftoffAssembler::LoadSpillAddress(Register dst,
int offset,
1461 lea(dst, liftoff::GetStackSlot(
offset));
1464void LiftoffAssembler::emit_i32_add(Register dst, Register lhs, Register rhs) {
1466 lea(dst, Operand(lhs, rhs,
times_1, 0));
1472void LiftoffAssembler::emit_i32_addi(Register dst, Register lhs, int32_t imm) {
1474 lea(dst, Operand(lhs, imm));
1476 add(dst, Immediate(imm));
1480void LiftoffAssembler::emit_i32_sub(Register dst, Register lhs, Register rhs) {
1483 if (dst != lhs) mov(dst, lhs);
1485 }
else if (lhs == rhs) {
1495void LiftoffAssembler::emit_i32_subi(Register dst, Register lhs, int32_t imm) {
1499 lea(dst, Operand(lhs, -imm));
1501 sub(dst, Immediate(imm));
1506template <
void (Assembler::*op)(Register, Register)>
1510 (assm->*op)(dst, lhs);
1512 if (dst != lhs) assm->
mov(dst, lhs);
1513 (assm->*op)(dst, rhs);
1517template <
void (Assembler::*op)(Register,
int32_t)>
1520 if (dst != lhs) assm->
mov(dst, lhs);
1521 (assm->*op)(dst, imm);
1526 liftoff::EmitCommutativeBinOp<&Assembler::imul>(
this, dst, lhs, rhs);
1529void LiftoffAssembler::emit_i32_muli(Register dst, Register lhs, int32_t imm) {
1533 imul(dst, lhs, imm);
1539template <
bool is_
signed, DivOrRem div_or_rem>
1542 Label* trap_div_unrepresentable) {
1543 constexpr bool needs_unrepresentable_check =
1544 is_signed && div_or_rem == DivOrRem::kDiv;
1545 constexpr bool special_case_minus_1 =
1546 is_signed && div_or_rem == DivOrRem::kRem;
1547 DCHECK_EQ(needs_unrepresentable_check, trap_div_unrepresentable !=
nullptr);
1555 if (rhs == eax || rhs == edx) {
1558 assm->
mov(tmp, rhs);
1563 assm->
test(rhs, rhs);
1564 assm->
j(zero, trap_div_by_zero);
1567 if (needs_unrepresentable_check) {
1573 assm->
j(
equal, trap_div_unrepresentable);
1574 assm->
bind(&do_div);
1575 }
else if (special_case_minus_1) {
1581 assm->
xor_(dst, dst);
1583 assm->
bind(&do_rem);
1588 if (lhs != eax) assm->
mov(eax, lhs);
1593 assm->
xor_(edx, edx);
1598 constexpr Register kResultReg = div_or_rem == DivOrRem::kDiv ? eax : edx;
1599 if (dst != kResultReg) assm->
mov(dst, kResultReg);
1600 if (special_case_minus_1) assm->
bind(&done);
1605 Label* trap_div_by_zero,
1606 Label* trap_div_unrepresentable) {
1607 liftoff::EmitInt32DivOrRem<true, liftoff::DivOrRem::kDiv>(
1608 this, dst, lhs, rhs, trap_div_by_zero, trap_div_unrepresentable);
1611void LiftoffAssembler::emit_i32_divu(Register dst, Register lhs, Register rhs,
1612 Label* trap_div_by_zero) {
1613 liftoff::EmitInt32DivOrRem<false, liftoff::DivOrRem::kDiv>(
1614 this, dst, lhs, rhs, trap_div_by_zero,
nullptr);
1617void LiftoffAssembler::emit_i32_rems(Register dst, Register lhs, Register rhs,
1618 Label* trap_div_by_zero) {
1619 liftoff::EmitInt32DivOrRem<true, liftoff::DivOrRem::kRem>(
1620 this, dst, lhs, rhs, trap_div_by_zero,
nullptr);
1623void LiftoffAssembler::emit_i32_remu(Register dst, Register lhs, Register rhs,
1624 Label* trap_div_by_zero) {
1625 liftoff::EmitInt32DivOrRem<false, liftoff::DivOrRem::kRem>(
1626 this, dst, lhs, rhs, trap_div_by_zero,
nullptr);
1629void LiftoffAssembler::emit_i32_and(Register dst, Register lhs, Register rhs) {
1630 liftoff::EmitCommutativeBinOp<&Assembler::and_>(
this, dst, lhs, rhs);
1633void LiftoffAssembler::emit_i32_andi(Register dst, Register lhs, int32_t imm) {
1634 liftoff::EmitCommutativeBinOpImm<&Assembler::and_>(
this, dst, lhs, imm);
1637void LiftoffAssembler::emit_i32_or(Register dst, Register lhs, Register rhs) {
1638 liftoff::EmitCommutativeBinOp<&Assembler::or_>(
this, dst, lhs, rhs);
1641void LiftoffAssembler::emit_i32_ori(Register dst, Register lhs, int32_t imm) {
1642 liftoff::EmitCommutativeBinOpImm<&Assembler::or_>(
this, dst, lhs, imm);
1645void LiftoffAssembler::emit_i32_xor(Register dst, Register lhs, Register rhs) {
1646 liftoff::EmitCommutativeBinOp<&Assembler::xor_>(
this, dst, lhs, rhs);
1649void LiftoffAssembler::emit_i32_xori(Register dst, Register lhs, int32_t imm) {
1650 liftoff::EmitCommutativeBinOpImm<&Assembler::xor_>(
this, dst, lhs, imm);
1661 assm->
mov(tmp, src);
1662 if (amount != ecx) assm->
mov(ecx, amount);
1663 (assm->*emit_shift)(tmp);
1664 assm->
mov(ecx, tmp);
1671 if (amount != ecx) {
1675 assm->
mov(tmp_reg, ecx);
1676 if (src == ecx) src = tmp_reg;
1678 assm->
mov(ecx, amount);
1682 if (dst != src) assm->
mov(dst, src);
1683 (assm->*emit_shift)(dst);
1695void LiftoffAssembler::emit_i32_shli(Register dst, Register src,
1697 if (dst != src) mov(dst, src);
1698 shl(dst, amount & 31);
1701void LiftoffAssembler::emit_i32_sar(Register dst, Register src,
1706void LiftoffAssembler::emit_i32_sari(Register dst, Register src,
1708 if (dst != src) mov(dst, src);
1709 sar(dst, amount & 31);
1712void LiftoffAssembler::emit_i32_shr(Register dst, Register src,
1717void LiftoffAssembler::emit_i32_shri(Register dst, Register src,
1719 if (dst != src) mov(dst, src);
1720 shr(dst, amount & 31);
1723void LiftoffAssembler::emit_i32_clz(Register dst, Register src) {
1727void LiftoffAssembler::emit_i32_ctz(Register dst, Register src) {
1731bool LiftoffAssembler::emit_i32_popcnt(Register dst, Register src) {
1733 CpuFeatureScope scope(
this, POPCNT);
1752 (assm->*op)(dst_low, rhs.
low_gp());
1761 (assm->*op_with_carry)(dst_high, rhs.
high_gp());
1764 LiftoffRegister tmp_result = LiftoffRegister::ForPair(dst_low, dst_high);
1765 if (tmp_result != dst) assm->
Move(dst, tmp_result,
kI64);
1776 int32_t imm_low_word =
static_cast<int32_t
>(imm);
1777 int32_t imm_high_word =
static_cast<int32_t
>(imm >> 32);
1785 (assm->*op_with_carry)(dst.
high_gp(), imm_high_word);
1789void LiftoffAssembler::emit_i64_add(LiftoffRegister dst, LiftoffRegister lhs,
1790 LiftoffRegister rhs) {
1791 liftoff::OpWithCarry<&Assembler::add, &Assembler::adc>(
this, dst, lhs, rhs);
1794void LiftoffAssembler::emit_i64_addi(LiftoffRegister dst, LiftoffRegister lhs,
1796 liftoff::OpWithCarryI<&Assembler::add, &Assembler::adc>(
this, dst, lhs, imm);
1799void LiftoffAssembler::emit_i64_sub(LiftoffRegister dst, LiftoffRegister lhs,
1800 LiftoffRegister rhs) {
1801 liftoff::OpWithCarry<&Assembler::sub, &Assembler::sbb>(
this, dst, lhs, rhs);
1804void LiftoffAssembler::emit_i64_mul(LiftoffRegister dst, LiftoffRegister lhs,
1805 LiftoffRegister rhs) {
1821 SpillRegisters(dst_hi, dst_lo, lhs_hi, rhs_lo);
1824 ParallelRegisterMove({{LiftoffRegister::ForPair(lhs_lo, lhs_hi), lhs,
kI64},
1825 {LiftoffRegister::ForPair(rhs_lo, rhs_hi), rhs,
kI64}});
1828 imul(lhs_hi, rhs_lo);
1830 imul(rhs_hi, lhs_lo);
1832 add(lhs_hi, rhs_hi);
1836 add(dst_hi, lhs_hi);
1839 LiftoffRegister dst_tmp = LiftoffRegister::ForPair(dst_lo, dst_hi);
1840 if (dst != dst_tmp) Move(dst, dst_tmp,
kI64);
1843bool LiftoffAssembler::emit_i64_divs(LiftoffRegister dst, LiftoffRegister lhs,
1844 LiftoffRegister rhs,
1845 Label* trap_div_by_zero,
1846 Label* trap_div_unrepresentable) {
1850bool LiftoffAssembler::emit_i64_divu(LiftoffRegister dst, LiftoffRegister lhs,
1851 LiftoffRegister rhs,
1852 Label* trap_div_by_zero) {
1856bool LiftoffAssembler::emit_i64_rems(LiftoffRegister dst, LiftoffRegister lhs,
1857 LiftoffRegister rhs,
1858 Label* trap_div_by_zero) {
1862bool LiftoffAssembler::emit_i64_remu(LiftoffRegister dst, LiftoffRegister lhs,
1863 LiftoffRegister rhs,
1864 Label* trap_div_by_zero) {
1875 if (pair.
low_gp() == old_reg) {
1876 return LiftoffRegister::ForPair(new_reg, pair.
high_gp());
1878 if (pair.
high_gp() == old_reg) {
1879 return LiftoffRegister::ForPair(pair.
low_gp(), new_reg);
1890 constexpr size_t kMaxRegMoves = 3;
1903 }
else if (amount != ecx &&
1918 if (ecx_replace !=
no_reg) assm->
mov(ecx, ecx_replace);
1922void LiftoffAssembler::emit_i64_shl(LiftoffRegister dst, LiftoffRegister src,
1924 liftoff::Emit64BitShiftOperation(
this, dst, src, amount,
1928void LiftoffAssembler::emit_i64_shli(LiftoffRegister dst, LiftoffRegister src,
1932 if (dst.high_gp() != src.low_gp()) mov(dst.high_gp(), src.low_gp());
1933 if (amount != 32) shl(dst.high_gp(), amount - 32);
1934 xor_(dst.low_gp(), dst.low_gp());
1936 if (dst != src) Move(dst, src,
kI64);
1937 ShlPair(dst.high_gp(), dst.low_gp(), amount);
1941void LiftoffAssembler::emit_i64_sar(LiftoffRegister dst, LiftoffRegister src,
1943 liftoff::Emit64BitShiftOperation(
this, dst, src, amount,
1947void LiftoffAssembler::emit_i64_sari(LiftoffRegister dst, LiftoffRegister src,
1951 if (dst.low_gp() != src.high_gp()) mov(dst.low_gp(), src.high_gp());
1952 if (dst.high_gp() != src.high_gp()) mov(dst.high_gp(), src.high_gp());
1953 if (amount != 32) sar(dst.low_gp(), amount - 32);
1954 sar(dst.high_gp(), 31);
1956 if (dst != src) Move(dst, src,
kI64);
1957 SarPair(dst.high_gp(), dst.low_gp(), amount);
1960void LiftoffAssembler::emit_i64_shr(LiftoffRegister dst, LiftoffRegister src,
1962 liftoff::Emit64BitShiftOperation(
this, dst, src, amount,
1966void LiftoffAssembler::emit_i64_shri(LiftoffRegister dst, LiftoffRegister src,
1970 if (dst.low_gp() != src.high_gp()) mov(dst.low_gp(), src.high_gp());
1971 if (amount != 32)
shr(dst.low_gp(), amount - 32);
1972 xor_(dst.high_gp(), dst.high_gp());
1974 if (dst != src) Move(dst, src,
kI64);
1975 ShrPair(dst.high_gp(), dst.low_gp(), amount);
1979void LiftoffAssembler::emit_i64_clz(LiftoffRegister dst, LiftoffRegister src) {
1983 if (src.low_gp() == safe_dst) safe_dst = dst.high_gp();
1985 CpuFeatureScope scope(
this, LZCNT);
1986 lzcnt(safe_dst, src.high_gp());
1988 lzcnt(safe_dst, src.low_gp());
1989 add(safe_dst, Immediate(32));
1993 bsr(safe_dst, src.high_gp());
1995 xor_(safe_dst, Immediate(31));
1998 bind(&high_is_zero);
2000 bsr(safe_dst, src.low_gp());
2002 mov(safe_dst, Immediate(64 ^ 63));
2003 bind(&low_not_zero);
2008 if (safe_dst != dst.low_gp()) mov(dst.low_gp(), safe_dst);
2009 xor_(dst.high_gp(), dst.high_gp());
2012void LiftoffAssembler::emit_i64_ctz(LiftoffRegister dst, LiftoffRegister src) {
2016 if (src.high_gp() == safe_dst) safe_dst = dst.high_gp();
2018 CpuFeatureScope scope(
this, BMI1);
2019 tzcnt(safe_dst, src.low_gp());
2021 tzcnt(safe_dst, src.high_gp());
2022 add(safe_dst, Immediate(32));
2025 bsf(safe_dst, src.low_gp());
2028 Label high_not_zero;
2029 bsf(safe_dst, src.high_gp());
2033 bind(&high_not_zero);
2034 add(safe_dst, Immediate(32));
2038 if (safe_dst != dst.low_gp()) mov(dst.low_gp(), safe_dst);
2039 xor_(dst.high_gp(), dst.high_gp());
2042bool LiftoffAssembler::emit_i64_popcnt(LiftoffRegister dst,
2043 LiftoffRegister src) {
2045 CpuFeatureScope scope(
this, POPCNT);
2047 Register src1 = src.high_gp() == dst.low_gp() ? src.high_gp() : src.low_gp();
2048 Register src2 = src.high_gp() == dst.low_gp() ? src.low_gp() : src.high_gp();
2049 popcnt(dst.low_gp(), src1);
2050 popcnt(dst.high_gp(), src2);
2052 add(dst.low_gp(), dst.high_gp());
2053 xor_(dst.high_gp(), dst.high_gp());
2057void LiftoffAssembler::IncrementSmi(LiftoffRegister dst,
int offset) {
2064 CpuFeatureScope scope(
this, AVX);
2065 vaddss(dst, lhs, rhs);
2066 }
else if (dst == rhs) {
2069 if (dst != lhs) movss(dst, lhs);
2077 CpuFeatureScope scope(
this, AVX);
2078 vsubss(dst, lhs, rhs);
2079 }
else if (dst == rhs) {
2080 movss(liftoff::kScratchDoubleReg, rhs);
2082 subss(dst, liftoff::kScratchDoubleReg);
2084 if (dst != lhs) movss(dst, lhs);
2092 CpuFeatureScope scope(
this, AVX);
2093 vmulss(dst, lhs, rhs);
2094 }
else if (dst == rhs) {
2097 if (dst != lhs) movss(dst, lhs);
2105 CpuFeatureScope scope(
this, AVX);
2106 vdivss(dst, lhs, rhs);
2107 }
else if (dst == rhs) {
2108 movss(liftoff::kScratchDoubleReg, rhs);
2110 divss(dst, liftoff::kScratchDoubleReg);
2112 if (dst != lhs) movss(dst, lhs);
2119template <
typename type>
2124 Label lhs_below_rhs;
2125 Label lhs_above_rhs;
2132#define dop(name, ...) \
2134 if (sizeof(type) == 4) { \
2135 assm->name##s(__VA_ARGS__); \
2137 assm->name##d(__VA_ARGS__); \
2143 dop(ucomis, lhs, rhs);
2145 assm->
j(
below, &lhs_below_rhs, Label::kNear);
2146 assm->
j(
above, &lhs_above_rhs, Label::kNear);
2154 dop(movmskp, tmp, rhs);
2156 assm->
j(zero, &lhs_below_rhs, Label::kNear);
2157 assm->
jmp(&lhs_above_rhs, Label::kNear);
2159 assm->
bind(&is_nan);
2161 dop(xorp, dst, dst);
2162 dop(divs, dst, dst);
2163 assm->
jmp(&done, Label::kNear);
2165 assm->
bind(&lhs_below_rhs);
2166 DoubleRegister lhs_below_rhs_src = min_or_max == MinOrMax::kMin ? lhs : rhs;
2167 if (dst != lhs_below_rhs_src)
dop(movs, dst, lhs_below_rhs_src);
2168 assm->
jmp(&done, Label::kNear);
2170 assm->
bind(&lhs_above_rhs);
2171 DoubleRegister lhs_above_rhs_src = min_or_max == MinOrMax::kMin ? rhs : lhs;
2172 if (dst != lhs_above_rhs_src)
dop(movs, dst, lhs_above_rhs_src);
2180 liftoff::EmitFloatMinOrMax<float>(
this, dst, lhs, rhs,
2181 liftoff::MinOrMax::kMin);
2186 liftoff::EmitFloatMinOrMax<float>(
this, dst, lhs, rhs,
2187 liftoff::MinOrMax::kMax);
2192 static constexpr int kF32SignBit = 1 << 31;
2193 LiftoffRegList pinned;
2194 Register scratch = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp();
2195 Register scratch2 = GetUnusedRegister(kGpReg, pinned).gp();
2197 and_(scratch, Immediate(~kF32SignBit));
2198 Movd(scratch2, rhs);
2199 and_(scratch2, Immediate(kF32SignBit));
2200 or_(scratch, scratch2);
2205 static constexpr uint32_t
kSignBit = uint32_t{1} << 31;
2208 Andps(dst, liftoff::kScratchDoubleReg);
2216 static constexpr uint32_t
kSignBit = uint32_t{1} << 31;
2219 Xorps(dst, liftoff::kScratchDoubleReg);
2258 CpuFeatureScope scope(
this, AVX);
2259 vaddsd(dst, lhs, rhs);
2260 }
else if (dst == rhs) {
2263 if (dst != lhs) movsd(dst, lhs);
2271 CpuFeatureScope scope(
this, AVX);
2272 vsubsd(dst, lhs, rhs);
2273 }
else if (dst == rhs) {
2274 movsd(liftoff::kScratchDoubleReg, rhs);
2276 subsd(dst, liftoff::kScratchDoubleReg);
2278 if (dst != lhs) movsd(dst, lhs);
2286 CpuFeatureScope scope(
this, AVX);
2287 vmulsd(dst, lhs, rhs);
2288 }
else if (dst == rhs) {
2291 if (dst != lhs) movsd(dst, lhs);
2299 CpuFeatureScope scope(
this, AVX);
2300 vdivsd(dst, lhs, rhs);
2301 }
else if (dst == rhs) {
2302 movsd(liftoff::kScratchDoubleReg, rhs);
2304 divsd(dst, liftoff::kScratchDoubleReg);
2306 if (dst != lhs) movsd(dst, lhs);
2313 liftoff::EmitFloatMinOrMax<double>(
this, dst, lhs, rhs,
2314 liftoff::MinOrMax::kMin);
2319 static constexpr int kF32SignBit = 1 << 31;
2322 LiftoffRegList pinned;
2323 Register scratch = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp();
2324 Register scratch2 = GetUnusedRegister(kGpReg, pinned).gp();
2326 Pextrd(scratch, lhs, 1);
2327 and_(scratch, Immediate(~kF32SignBit));
2328 Pextrd(scratch2, rhs, 1);
2329 and_(scratch2, Immediate(kF32SignBit));
2330 or_(scratch, scratch2);
2332 Pinsrd(dst, scratch, 1);
2337 liftoff::EmitFloatMinOrMax<double>(
this, dst, lhs, rhs,
2338 liftoff::MinOrMax::kMax);
2342 static constexpr uint64_t
kSignBit = uint64_t{1} << 63;
2345 Andpd(dst, liftoff::kScratchDoubleReg);
2353 static constexpr uint64_t
kSignBit = uint64_t{1} << 63;
2356 Xorpd(dst, liftoff::kScratchDoubleReg);
2396template <
typename dst_type,
typename src_type>
2401 if (std::is_same<double, src_type>::value) {
2402 if (std::is_signed<dst_type>::value) {
2403 __ cvttsd2si(dst, src);
2404 __ Cvtsi2sd(converted_back, dst);
2408 __ Cvttsd2ui(dst, src, converted_back);
2409 __ Cvtui2sd(converted_back, dst,
2413 if (std::is_signed<dst_type>::value) {
2414 __ cvttss2si(dst, src);
2415 __ Cvtsi2ss(converted_back, dst);
2419 __ Cvttss2ui(dst, src, converted_back);
2420 __ Cvtui2ss(converted_back, dst,
2426template <
typename dst_type,
typename src_type>
2429 if (!CpuFeatures::IsSupported(SSE4_1)) {
2441 if (std::is_same<double, src_type>::value) {
2446 ConvertFloatToIntAndBack<dst_type, src_type>(assm, dst, rounded,
2447 converted_back, pinned);
2448 if (std::is_same<double, src_type>::value) {
2449 __ ucomisd(converted_back, rounded);
2451 __ ucomiss(converted_back, rounded);
2460template <
typename dst_type,
typename src_type>
2463 if (!CpuFeatures::IsSupported(SSE4_1)) {
2475 pinned.set(
__ GetUnusedRegister(
kFpReg, pinned)).fp();
2477 pinned.set(
__ GetUnusedRegister(
kFpReg, pinned)).fp();
2479 pinned.set(
__ GetUnusedRegister(
kFpReg, pinned)).fp();
2481 if (std::is_same<double, src_type>::value) {
2487 ConvertFloatToIntAndBack<dst_type, src_type>(assm, dst, rounded,
2488 converted_back, pinned);
2489 if (std::is_same<double, src_type>::value) {
2490 __ ucomisd(converted_back, rounded);
2492 __ ucomiss(converted_back, rounded);
2504 __ Xorpd(zero_reg, zero_reg);
2507 if (std::is_same<double, src_type>::value) {
2508 __ ucomisd(src, zero_reg);
2510 __ ucomiss(src, zero_reg);
2513 __ mov(dst,
Immediate(std::numeric_limits<dst_type>::min()));
2516 __ bind(&src_positive);
2518 __ mov(dst,
Immediate(std::numeric_limits<dst_type>::max()));
2525bool LiftoffAssembler::emit_type_conversion(
WasmOpcode opcode,
2526 LiftoffRegister dst,
2529 case kExprI32ConvertI64:
2530 if (dst.gp() != src.low_gp()) mov(dst.gp(), src.low_gp());
2532 case kExprI32SConvertF32:
2533 liftoff::EmitTruncateFloatToInt<int32_t, float>(
this, dst.gp(), src.fp(),
2536 case kExprI32UConvertF32:
2537 liftoff::EmitTruncateFloatToInt<uint32_t, float>(
this, dst.gp(), src.fp(),
2540 case kExprI32SConvertF64:
2541 liftoff::EmitTruncateFloatToInt<int32_t, double>(
this, dst.gp(), src.fp(),
2544 case kExprI32UConvertF64:
2545 liftoff::EmitTruncateFloatToInt<uint32_t, double>(
this, dst.gp(),
2548 case kExprI32SConvertSatF32:
2549 liftoff::EmitSatTruncateFloatToInt<int32_t, float>(
this, dst.gp(),
2552 case kExprI32UConvertSatF32:
2553 liftoff::EmitSatTruncateFloatToInt<uint32_t, float>(
this, dst.gp(),
2556 case kExprI32SConvertSatF64:
2557 liftoff::EmitSatTruncateFloatToInt<int32_t, double>(
this, dst.gp(),
2560 case kExprI32UConvertSatF64:
2561 liftoff::EmitSatTruncateFloatToInt<uint32_t, double>(
this, dst.gp(),
2564 case kExprI32ReinterpretF32:
2565 Movd(dst.gp(), src.fp());
2567 case kExprI64SConvertI32:
2568 if (dst.low_gp() != src.gp()) mov(dst.low_gp(), src.gp());
2569 if (dst.high_gp() != src.gp()) mov(dst.high_gp(), src.gp());
2570 sar(dst.high_gp(), 31);
2572 case kExprI64UConvertI32:
2573 if (dst.low_gp() != src.gp()) mov(dst.low_gp(), src.gp());
2574 xor_(dst.high_gp(), dst.high_gp());
2576 case kExprI64ReinterpretF64:
2578 AllocateStackSpace(8);
2579 movsd(
Operand(esp, 0), src.fp());
2584 case kExprF32SConvertI32:
2585 cvtsi2ss(dst.fp(), src.gp());
2587 case kExprF32UConvertI32: {
2588 LiftoffRegList pinned{dst, src};
2589 Register scratch = GetUnusedRegister(kGpReg, pinned).gp();
2590 Cvtui2ss(dst.fp(), src.gp(), scratch);
2593 case kExprF32ConvertF64:
2594 cvtsd2ss(dst.fp(), src.fp());
2596 case kExprF32ReinterpretI32:
2597 Movd(dst.fp(), src.gp());
2599 case kExprF64SConvertI32:
2600 Cvtsi2sd(dst.fp(), src.gp());
2602 case kExprF64UConvertI32: {
2603 LiftoffRegList pinned{dst, src};
2604 Register scratch = GetUnusedRegister(kGpReg, pinned).gp();
2605 Cvtui2sd(dst.fp(), src.gp(), scratch);
2608 case kExprF64ConvertF32:
2609 cvtss2sd(dst.fp(), src.fp());
2611 case kExprF64ReinterpretI64:
2613 push(src.high_gp());
2616 movsd(dst.fp(), Operand(esp, 0));
2617 add(esp, Immediate(8));
2624void LiftoffAssembler::emit_i32_signextend_i8(Register dst, Register src) {
2625 Register byte_reg = liftoff::GetTmpByteRegister(
this, src);
2626 if (byte_reg != src) mov(byte_reg, src);
2627 movsx_b(dst, byte_reg);
2630void LiftoffAssembler::emit_i32_signextend_i16(Register dst, Register src) {
2634void LiftoffAssembler::emit_i64_signextend_i8(LiftoffRegister dst,
2635 LiftoffRegister src) {
2636 Register byte_reg = liftoff::GetTmpByteRegister(
this, src.low_gp());
2637 if (byte_reg != src.low_gp()) mov(byte_reg, src.low_gp());
2638 movsx_b(dst.low_gp(), byte_reg);
2639 liftoff::SignExtendI32ToI64(
this, dst);
2642void LiftoffAssembler::emit_i64_signextend_i16(LiftoffRegister dst,
2643 LiftoffRegister src) {
2644 movsx_w(dst.low_gp(), src.low_gp());
2645 liftoff::SignExtendI32ToI64(
this, dst);
2648void LiftoffAssembler::emit_i64_signextend_i32(LiftoffRegister dst,
2649 LiftoffRegister src) {
2650 if (dst.low_gp() != src.low_gp()) mov(dst.low_gp(), src.low_gp());
2651 liftoff::SignExtendI32ToI64(
this, dst);
2654void LiftoffAssembler::emit_jump(Label*
label) { jmp(
label); }
2656void LiftoffAssembler::emit_jump(Register target) { jmp(target); }
2658void LiftoffAssembler::emit_cond_jump(
Condition cond, Label*
label,
2659 ValueKind
kind, Register lhs,
2661 const FreezeCacheState& frozen) {
2682void LiftoffAssembler::emit_i32_cond_jumpi(
Condition cond, Label*
label,
2683 Register lhs,
int imm,
2684 const FreezeCacheState& frozen) {
2685 cmp(lhs, Immediate(imm));
2695 assm->
setcc(cond, tmp_byte_reg);
2696 assm->
movzx_b(dst, tmp_byte_reg);
2709 liftoff::setcc_32(
this,
equal, dst);
2712void LiftoffAssembler::emit_i32_set_cond(
Condition cond, Register dst,
2713 Register lhs, Register rhs) {
2715 liftoff::setcc_32(
this, cond, dst);
2718void LiftoffAssembler::emit_i64_eqz(Register dst, LiftoffRegister src) {
2721 if (src.low_gp() == dst) {
2722 or_(dst, src.high_gp());
2724 if (src.high_gp() != dst) mov(dst, src.high_gp());
2725 or_(dst, src.low_gp());
2727 liftoff::setcc_32(
this,
equal, dst);
2748 LiftoffRegister lhs,
2749 LiftoffRegister rhs) {
2750 Condition unsigned_cond = liftoff::cond_make_unsigned(cond);
2754 Register tmp_byte_reg = liftoff::GetTmpByteRegister(
this, dst);
2763 cmp(lhs.high_gp(), rhs.high_gp());
2765 cmp(lhs.low_gp(), rhs.low_gp());
2766 if (unsigned_cond != cond) {
2769 liftoff::setcc_32_no_spill(
this, unsigned_cond, dst, tmp_byte_reg);
2773 liftoff::setcc_32_no_spill(
this, cond, dst, tmp_byte_reg);
2778template <
void (Assembler::*cmp_op)(DoubleRegister, DoubleRegister)>
2788 (assm->*cmp_op)(lhs, rhs);
2795 assm->
xor_(dst, dst);
2797 assm->
jmp(&cont, Label::kNear);
2798 assm->
bind(¬_nan);
2808 liftoff::EmitFloatSetCond<&Assembler::ucomiss>(
this, cond, dst, lhs, rhs);
2811void LiftoffAssembler::emit_f64_set_cond(
Condition cond, Register dst,
2814 liftoff::EmitFloatSetCond<&Assembler::ucomisd>(
this, cond, dst, lhs, rhs);
2817bool LiftoffAssembler::emit_select(LiftoffRegister dst, Register
condition,
2818 LiftoffRegister true_value,
2819 LiftoffRegister false_value,
2824void LiftoffAssembler::emit_smi_check(Register obj, Label* target,
2826 const FreezeCacheState& frozen) {
2833template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, XMMRegister),
2834 void (Assembler::*sse_op)(XMMRegister, XMMRegister)>
2837 LiftoffRegister rhs, std::optional<CpuFeature> feature = std::nullopt) {
2838 if (CpuFeatures::IsSupported(AVX)) {
2840 (assm->*avx_op)(dst.
fp(), lhs.
fp(), rhs.
fp());
2844 std::optional<CpuFeatureScope> sse_scope;
2845 if (feature.has_value()) sse_scope.emplace(assm, *feature);
2847 if (dst.
fp() == rhs.
fp()) {
2848 (assm->*sse_op)(dst.
fp(), lhs.
fp());
2851 (assm->*sse_op)(dst.
fp(), rhs.
fp());
2859 LiftoffRegister rhs, std::optional<CpuFeature> feature = std::nullopt) {
2860 if (CpuFeatures::IsSupported(AVX)) {
2862 (assm->*avx_op)(dst.
fp(), lhs.
fp(), rhs.
fp());
2866 std::optional<CpuFeatureScope> sse_scope;
2867 if (feature.has_value()) sse_scope.emplace(assm, *feature);
2869 if (dst.
fp() == rhs.
fp()) {
2875 (assm->*sse_op)(dst.
fp(), rhs.
fp());
2883 static constexpr RegClass tmp_rc = reg_class_for(
kI32);
2885 constexpr int mask = (1 << width) - 1;
2890 if (CpuFeatures::IsSupported(AVX)) {
2894 if (dst.
fp() != operand.
fp()) assm->
movaps(dst.
fp(), operand.
fp());
2903 constexpr int mask = (1 << width) - 1;
2904 uint8_t shift =
static_cast<uint8_t
>(
count &
mask);
2905 if (CpuFeatures::IsSupported(AVX)) {
2907 (assm->*avx_op)(dst.
fp(), operand.
fp(), shift);
2909 if (dst.
fp() != operand.
fp()) assm->
movaps(dst.
fp(), operand.
fp());
2910 (assm->*sse_op)(dst.
fp(), shift);
2917 assm->
xor_(tmp, tmp);
2919 assm->Ptest(src.fp(), src.fp());
2920 assm->
cmov(zero, dst.
gp(), tmp);
2923template <
void (SharedMacroAssemblerBase::*pcmp)(XMMRegister, XMMRegister)>
2926 std::optional<CpuFeature> feature = std::nullopt) {
2927 std::optional<CpuFeatureScope> sse_scope;
2928 if (feature.has_value()) sse_scope.emplace(assm, *feature);
2931 XMMRegister tmp_simd = liftoff::kScratchDoubleReg;
2934 assm->Pxor(tmp_simd, tmp_simd);
2935 (assm->*pcmp)(tmp_simd, src.fp());
2936 assm->Ptest(tmp_simd, tmp_simd);
2937 assm->
cmov(zero, dst.
gp(), tmp);
2942void LiftoffAssembler::LoadTransform(LiftoffRegister dst,
Register src_addr,
2943 Register offset_reg, uintptr_t offset_imm,
2945 LoadTransformationKind transform,
2946 uint32_t* protected_load_pc,
2948 DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
2950 static_cast<int32_t
>(offset_imm)};
2953 MachineType memtype = type.mem_type();
2954 if (transform == LoadTransformationKind::kExtend) {
2956 Pmovsxbw(dst.fp(), src_op);
2958 Pmovzxbw(dst.fp(), src_op);
2960 Pmovsxwd(dst.fp(), src_op);
2962 Pmovzxwd(dst.fp(), src_op);
2964 Pmovsxdq(dst.fp(), src_op);
2966 Pmovzxdq(dst.fp(), src_op);
2968 }
else if (transform == LoadTransformationKind::kZeroExtend) {
2970 Movss(dst.fp(), src_op);
2973 Movsd(dst.fp(), src_op);
2976 DCHECK_EQ(LoadTransformationKind::kSplat, transform);
2978 S128Load8Splat(dst.fp(), src_op, liftoff::kScratchDoubleReg);
2980 S128Load16Splat(dst.fp(), src_op, liftoff::kScratchDoubleReg);
2982 S128Load32Splat(dst.fp(), src_op);
2984 Movddup(dst.fp(), src_op);
2989void LiftoffAssembler::LoadLane(LiftoffRegister dst, LiftoffRegister src,
2990 Register addr, Register offset_reg,
2991 uintptr_t offset_imm, LoadType type,
2992 uint8_t laneidx, uint32_t* protected_load_pc,
2994 DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
2995 Operand src_op{addr, offset_reg,
times_1,
static_cast<int32_t>(offset_imm)};
2998 MachineType mem_type = type.mem_type();
3000 Pinsrb(dst.fp(), src.fp(), src_op, laneidx);
3002 Pinsrw(dst.fp(), src.fp(), src_op, laneidx);
3004 Pinsrd(dst.fp(), src.fp(), src_op, laneidx);
3008 Movlps(dst.fp(), src.fp(), src_op);
3011 Movhps(dst.fp(), src.fp(), src_op);
3016void LiftoffAssembler::StoreLane(Register dst, Register
offset,
3017 uintptr_t offset_imm, LiftoffRegister src,
3018 StoreType type, uint8_t lane,
3019 uint32_t* protected_store_pc,
3021 DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
3023 if (protected_store_pc) *protected_store_pc =
pc_offset();
3027 Pextrb(dst_op, src.fp(), lane);
3029 Pextrw(dst_op, src.fp(), lane);
3031 S128Store32Lane(dst_op, src.fp(), lane);
3034 S128Store64Lane(dst_op, src.fp(), lane);
3038void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst,
3039 LiftoffRegister lhs,
3040 LiftoffRegister rhs,
3041 const uint8_t shuffle[16],
3043 LiftoffRegister tmp = GetUnusedRegister(kGpReg, {});
3052 for (
int i = 3;
i >= 0;
i--) {
3053 push_imm32(imms[
i]);
3055 Pshufb(dst.fp(), lhs.fp(), Operand(esp, 0));
3060 movups(liftoff::kScratchDoubleReg, lhs.fp());
3061 for (
int i = 3;
i >= 0;
i--) {
3063 for (
int j = 3; j >= 0; j--) {
3064 uint8_t lane = shuffle[
i * 4 + j];
3070 Pshufb(liftoff::kScratchDoubleReg, lhs.fp(), Operand(esp, 0));
3072 for (
int i = 3;
i >= 0;
i--) {
3074 for (
int j = 3; j >= 0; j--) {
3075 uint8_t lane = shuffle[
i * 4 + j];
3081 Pshufb(dst.fp(), rhs.fp(), Operand(esp, 0));
3082 Por(dst.fp(), liftoff::kScratchDoubleReg);
3086void LiftoffAssembler::emit_i8x16_swizzle(LiftoffRegister dst,
3087 LiftoffRegister lhs,
3088 LiftoffRegister rhs) {
3089 Register scratch = GetUnusedRegister(RegClass::kGpReg, {}).gp();
3090 I8x16Swizzle(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg,
3094void LiftoffAssembler::emit_i8x16_relaxed_swizzle(LiftoffRegister dst,
3095 LiftoffRegister lhs,
3096 LiftoffRegister rhs) {
3097 Register tmp = GetUnusedRegister(RegClass::kGpReg, {}).gp();
3098 I8x16Swizzle(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg, tmp,
3102void LiftoffAssembler::emit_i32x4_relaxed_trunc_f32x4_s(LiftoffRegister dst,
3103 LiftoffRegister src) {
3104 Cvttps2dq(dst.fp(), src.fp());
3107void LiftoffAssembler::emit_i32x4_relaxed_trunc_f32x4_u(LiftoffRegister dst,
3108 LiftoffRegister src) {
3109 emit_i32x4_uconvert_f32x4(dst, src);
3112void LiftoffAssembler::emit_i32x4_relaxed_trunc_f64x2_s_zero(
3113 LiftoffRegister dst, LiftoffRegister src) {
3114 Cvttpd2dq(dst.fp(), src.fp());
3117void LiftoffAssembler::emit_i32x4_relaxed_trunc_f64x2_u_zero(
3118 LiftoffRegister dst, LiftoffRegister src) {
3119 emit_i32x4_trunc_sat_f64x2_u_zero(dst, src);
3122void LiftoffAssembler::emit_s128_relaxed_laneselect(LiftoffRegister dst,
3123 LiftoffRegister src1,
3124 LiftoffRegister src2,
3125 LiftoffRegister
mask,
3129 if (lane_width == 8) {
3130 Pblendvb(dst.fp(), src2.fp(), src1.fp(),
mask.fp());
3131 }
else if (lane_width == 32) {
3132 Blendvps(dst.fp(), src2.fp(), src1.fp(),
mask.fp());
3133 }
else if (lane_width == 64) {
3134 Blendvpd(dst.fp(), src2.fp(), src1.fp(),
mask.fp());
3140void LiftoffAssembler::emit_i8x16_popcnt(LiftoffRegister dst,
3141 LiftoffRegister src) {
3142 Register scratch = GetUnusedRegister(RegClass::kGpReg, {}).gp();
3144 GetUnusedRegister(RegClass::kFpReg, LiftoffRegList{dst, src}).fp();
3145 I8x16Popcnt(dst.fp(), src.fp(), liftoff::kScratchDoubleReg, tmp, scratch);
3148void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
3149 LiftoffRegister src) {
3150 I8x16Splat(dst.fp(), src.gp(), liftoff::kScratchDoubleReg);
3153void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
3154 LiftoffRegister src) {
3155 I16x8Splat(dst.fp(), src.gp());
3158void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
3159 LiftoffRegister src) {
3160 Movd(dst.fp(), src.gp());
3161 Pshufd(dst.fp(), dst.fp(), uint8_t{0});
3164void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
3165 LiftoffRegister src) {
3166 Pinsrd(dst.fp(), src.low_gp(), 0);
3167 Pinsrd(dst.fp(), src.high_gp(), 1);
3168 Pshufd(dst.fp(), dst.fp(), uint8_t{0x44});
3171void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
3172 LiftoffRegister src) {
3173 F32x4Splat(dst.fp(), src.fp());
3176void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
3177 LiftoffRegister src) {
3178 Movddup(dst.fp(), src.fp());
3181void LiftoffAssembler::emit_i8x16_eq(LiftoffRegister dst, LiftoffRegister lhs,
3182 LiftoffRegister rhs) {
3183 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqb, &Assembler::pcmpeqb>(
3184 this, dst, lhs, rhs);
3187void LiftoffAssembler::emit_i8x16_ne(LiftoffRegister dst, LiftoffRegister lhs,
3188 LiftoffRegister rhs) {
3189 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqb, &Assembler::pcmpeqb>(
3190 this, dst, lhs, rhs);
3191 Pcmpeqb(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3192 Pxor(dst.fp(), liftoff::kScratchDoubleReg);
3195void LiftoffAssembler::emit_i8x16_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
3196 LiftoffRegister rhs) {
3197 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpcmpgtb,
3198 &Assembler::pcmpgtb>(
this, dst, lhs,
3202void LiftoffAssembler::emit_i8x16_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
3203 LiftoffRegister rhs) {
3206 Movaps(liftoff::kScratchDoubleReg, rhs.fp());
3207 ref = liftoff::kScratchDoubleReg;
3209 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxub, &Assembler::pmaxub>(
3210 this, dst, lhs, rhs);
3211 Pcmpeqb(dst.fp(), ref);
3212 Pcmpeqb(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3213 Pxor(dst.fp(), liftoff::kScratchDoubleReg);
3216void LiftoffAssembler::emit_i8x16_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
3217 LiftoffRegister rhs) {
3220 Movaps(liftoff::kScratchDoubleReg, rhs.fp());
3221 ref = liftoff::kScratchDoubleReg;
3223 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsb, &Assembler::pminsb>(
3224 this, dst, lhs, rhs, SSE4_1);
3225 Pcmpeqb(dst.fp(), ref);
3228void LiftoffAssembler::emit_i8x16_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
3229 LiftoffRegister rhs) {
3232 Movaps(liftoff::kScratchDoubleReg, rhs.fp());
3233 ref = liftoff::kScratchDoubleReg;
3235 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminub, &Assembler::pminub>(
3236 this, dst, lhs, rhs);
3237 Pcmpeqb(dst.fp(), ref);
3240void LiftoffAssembler::emit_i16x8_eq(LiftoffRegister dst, LiftoffRegister lhs,
3241 LiftoffRegister rhs) {
3242 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqw, &Assembler::pcmpeqw>(
3243 this, dst, lhs, rhs);
3246void LiftoffAssembler::emit_i16x8_ne(LiftoffRegister dst, LiftoffRegister lhs,
3247 LiftoffRegister rhs) {
3248 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqw, &Assembler::pcmpeqw>(
3249 this, dst, lhs, rhs);
3250 Pcmpeqw(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3251 Pxor(dst.fp(), liftoff::kScratchDoubleReg);
3254void LiftoffAssembler::emit_i16x8_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
3255 LiftoffRegister rhs) {
3256 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpcmpgtw,
3257 &Assembler::pcmpgtw>(
this, dst, lhs,
3261void LiftoffAssembler::emit_i16x8_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
3262 LiftoffRegister rhs) {
3265 Movaps(liftoff::kScratchDoubleReg, rhs.fp());
3266 ref = liftoff::kScratchDoubleReg;
3268 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxuw, &Assembler::pmaxuw>(
3269 this, dst, lhs, rhs, SSE4_1);
3270 Pcmpeqw(dst.fp(), ref);
3271 Pcmpeqw(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3272 Pxor(dst.fp(), liftoff::kScratchDoubleReg);
3275void LiftoffAssembler::emit_i16x8_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
3276 LiftoffRegister rhs) {
3279 Movaps(liftoff::kScratchDoubleReg, rhs.fp());
3280 ref = liftoff::kScratchDoubleReg;
3282 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsw, &Assembler::pminsw>(
3283 this, dst, lhs, rhs);
3284 Pcmpeqw(dst.fp(), ref);
3287void LiftoffAssembler::emit_i16x8_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
3288 LiftoffRegister rhs) {
3291 Movaps(liftoff::kScratchDoubleReg, rhs.fp());
3292 ref = liftoff::kScratchDoubleReg;
3294 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminuw, &Assembler::pminuw>(
3295 this, dst, lhs, rhs, SSE4_1);
3296 Pcmpeqw(dst.fp(), ref);
3299void LiftoffAssembler::emit_i32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
3300 LiftoffRegister rhs) {
3301 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqd, &Assembler::pcmpeqd>(
3302 this, dst, lhs, rhs);
3305void LiftoffAssembler::emit_i32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
3306 LiftoffRegister rhs) {
3307 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqd, &Assembler::pcmpeqd>(
3308 this, dst, lhs, rhs);
3309 Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3310 Pxor(dst.fp(), liftoff::kScratchDoubleReg);
3313void LiftoffAssembler::emit_i32x4_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
3314 LiftoffRegister rhs) {
3315 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpcmpgtd,
3316 &Assembler::pcmpgtd>(
this, dst, lhs,
3320void LiftoffAssembler::emit_i32x4_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
3321 LiftoffRegister rhs) {
3324 Movaps(liftoff::kScratchDoubleReg, rhs.fp());
3325 ref = liftoff::kScratchDoubleReg;
3327 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxud, &Assembler::pmaxud>(
3328 this, dst, lhs, rhs, SSE4_1);
3329 Pcmpeqd(dst.fp(), ref);
3330 Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3331 Pxor(dst.fp(), liftoff::kScratchDoubleReg);
3334void LiftoffAssembler::emit_i32x4_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
3335 LiftoffRegister rhs) {
3338 Movaps(liftoff::kScratchDoubleReg, rhs.fp());
3339 ref = liftoff::kScratchDoubleReg;
3341 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsd, &Assembler::pminsd>(
3342 this, dst, lhs, rhs, SSE4_1);
3343 Pcmpeqd(dst.fp(), ref);
3346void LiftoffAssembler::emit_i32x4_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
3347 LiftoffRegister rhs) {
3350 Movaps(liftoff::kScratchDoubleReg, rhs.fp());
3351 ref = liftoff::kScratchDoubleReg;
3353 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminud, &Assembler::pminud>(
3354 this, dst, lhs, rhs, SSE4_1);
3355 Pcmpeqd(dst.fp(), ref);
3358void LiftoffAssembler::emit_i64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
3359 LiftoffRegister rhs) {
3360 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqq, &Assembler::pcmpeqq>(
3361 this, dst, lhs, rhs, SSE4_1);
3364void LiftoffAssembler::emit_i64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
3365 LiftoffRegister rhs) {
3366 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqq, &Assembler::pcmpeqq>(
3367 this, dst, lhs, rhs, SSE4_1);
3368 Pcmpeqq(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3369 Pxor(dst.fp(), liftoff::kScratchDoubleReg);
3372void LiftoffAssembler::emit_i64x2_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
3373 LiftoffRegister rhs) {
3377 I64x2GtS(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3380 if (dst == lhs || dst == rhs) {
3381 LiftoffRegister tmp =
3382 GetUnusedRegister(RegClass::kFpReg, LiftoffRegList{lhs, rhs});
3383 I64x2GtS(tmp.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3384 movaps(dst.fp(), tmp.fp());
3386 I64x2GtS(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3391void LiftoffAssembler::emit_i64x2_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
3392 LiftoffRegister rhs) {
3396 I64x2GeS(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3400 LiftoffRegister tmp =
3401 GetUnusedRegister(RegClass::kFpReg, {rhs}, LiftoffRegList{lhs});
3403 I64x2GeS(tmp.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3404 movaps(dst.fp(), tmp.fp());
3406 I64x2GeS(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3410 if (dst == lhs || dst == rhs) {
3411 LiftoffRegister tmp =
3412 GetUnusedRegister(RegClass::kFpReg, LiftoffRegList{lhs, rhs});
3413 I64x2GeS(tmp.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3414 movaps(dst.fp(), tmp.fp());
3416 I64x2GeS(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3421void LiftoffAssembler::emit_f32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
3422 LiftoffRegister rhs) {
3423 liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpeqps, &Assembler::cmpeqps>(
3424 this, dst, lhs, rhs);
3427void LiftoffAssembler::emit_f32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
3428 LiftoffRegister rhs) {
3429 liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpneqps,
3430 &Assembler::cmpneqps>(
this, dst, lhs, rhs);
3433void LiftoffAssembler::emit_f32x4_lt(LiftoffRegister dst, LiftoffRegister lhs,
3434 LiftoffRegister rhs) {
3435 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmpltps,
3436 &Assembler::cmpltps>(
this, dst, lhs,
3440void LiftoffAssembler::emit_f32x4_le(LiftoffRegister dst, LiftoffRegister lhs,
3441 LiftoffRegister rhs) {
3442 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmpleps,
3443 &Assembler::cmpleps>(
this, dst, lhs,
3447void LiftoffAssembler::emit_f64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
3448 LiftoffRegister rhs) {
3449 liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpeqpd, &Assembler::cmpeqpd>(
3450 this, dst, lhs, rhs);
3453void LiftoffAssembler::emit_f64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
3454 LiftoffRegister rhs) {
3455 liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpneqpd,
3456 &Assembler::cmpneqpd>(
this, dst, lhs, rhs);
3459void LiftoffAssembler::emit_f64x2_lt(LiftoffRegister dst, LiftoffRegister lhs,
3460 LiftoffRegister rhs) {
3461 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmpltpd,
3462 &Assembler::cmpltpd>(
this, dst, lhs,
3466void LiftoffAssembler::emit_f64x2_le(LiftoffRegister dst, LiftoffRegister lhs,
3467 LiftoffRegister rhs) {
3468 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmplepd,
3469 &Assembler::cmplepd>(
this, dst, lhs,
3473void LiftoffAssembler::emit_s128_const(LiftoffRegister dst,
3474 const uint8_t imms[16]) {
3476 memcpy(vals, imms,
sizeof(vals));
3479 uint64_t high = vals[1];
3480 Register tmp = GetUnusedRegister(RegClass::kGpReg, {}).gp();
3482 Pinsrd(dst.fp(), tmp, 2);
3485 Pinsrd(dst.fp(), tmp, 3);
3488void LiftoffAssembler::emit_s128_not(LiftoffRegister dst, LiftoffRegister src) {
3489 S128Not(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
3492void LiftoffAssembler::emit_s128_and(LiftoffRegister dst, LiftoffRegister lhs,
3493 LiftoffRegister rhs) {
3494 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpand, &Assembler::pand>(
3495 this, dst, lhs, rhs);
3498void LiftoffAssembler::emit_s128_or(LiftoffRegister dst, LiftoffRegister lhs,
3499 LiftoffRegister rhs) {
3500 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpor, &Assembler::por>(
3501 this, dst, lhs, rhs);
3504void LiftoffAssembler::emit_s128_xor(LiftoffRegister dst, LiftoffRegister lhs,
3505 LiftoffRegister rhs) {
3506 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpxor, &Assembler::pxor>(
3507 this, dst, lhs, rhs);
3510void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
3511 LiftoffRegister src1,
3512 LiftoffRegister src2,
3513 LiftoffRegister
mask) {
3518 movaps(dst.fp(),
mask.fp());
3519 S128Select(dst.fp(), dst.fp(), src1.fp(), src2.fp(),
3520 liftoff::kScratchDoubleReg);
3522 S128Select(dst.fp(),
mask.fp(), src1.fp(), src2.fp(),
3523 liftoff::kScratchDoubleReg);
3527void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst,
3528 LiftoffRegister src) {
3529 if (dst.fp() == src.fp()) {
3530 Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3531 Psignb(dst.fp(), liftoff::kScratchDoubleReg);
3533 Pxor(dst.fp(), dst.fp());
3534 Psubb(dst.fp(), src.fp());
3538void LiftoffAssembler::emit_v128_anytrue(LiftoffRegister dst,
3539 LiftoffRegister src) {
3540 liftoff::EmitAnyTrue(
this, dst, src);
3543void LiftoffAssembler::emit_i8x16_alltrue(LiftoffRegister dst,
3544 LiftoffRegister src) {
3545 liftoff::EmitAllTrue<&MacroAssembler::Pcmpeqb>(
this, dst, src);
3548void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst,
3549 LiftoffRegister src) {
3550 Pmovmskb(dst.gp(), src.fp());
3553void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs,
3554 LiftoffRegister rhs) {
3555 LiftoffRegister tmp = GetUnusedRegister(kGpReg, LiftoffRegList{rhs});
3556 LiftoffRegister tmp_simd =
3557 GetUnusedRegister(kFpReg, LiftoffRegList{dst, lhs});
3558 I8x16Shl(dst.fp(), lhs.fp(), rhs.gp(), tmp.gp(), liftoff::kScratchDoubleReg,
3562void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs,
3564 LiftoffRegister tmp = GetUnusedRegister(kGpReg, {});
3565 I8x16Shl(dst.fp(), lhs.fp(), rhs, tmp.gp(), liftoff::kScratchDoubleReg);
3568void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst,
3569 LiftoffRegister lhs,
3570 LiftoffRegister rhs) {
3571 Register tmp = GetUnusedRegister(kGpReg, LiftoffRegList{rhs}).gp();
3572 XMMRegister tmp_simd =
3573 GetUnusedRegister(kFpReg, LiftoffRegList{dst, lhs}).fp();
3574 I8x16ShrS(dst.fp(), lhs.fp(), rhs.gp(), tmp, liftoff::kScratchDoubleReg,
3578void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst,
3579 LiftoffRegister lhs, int32_t rhs) {
3580 I8x16ShrS(dst.fp(), lhs.fp(), rhs, liftoff::kScratchDoubleReg);
3583void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst,
3584 LiftoffRegister lhs,
3585 LiftoffRegister rhs) {
3586 Register tmp = GetUnusedRegister(kGpReg, LiftoffRegList{rhs}).gp();
3587 XMMRegister tmp_simd =
3588 GetUnusedRegister(kFpReg, LiftoffRegList{dst, lhs}).fp();
3589 I8x16ShrU(dst.fp(), lhs.fp(), rhs.gp(), tmp, liftoff::kScratchDoubleReg,
3593void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst,
3594 LiftoffRegister lhs, int32_t rhs) {
3595 Register tmp = GetUnusedRegister(kGpReg, {}).gp();
3596 I8x16ShrU(dst.fp(), lhs.fp(), rhs, tmp, liftoff::kScratchDoubleReg);
3599void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
3600 LiftoffRegister rhs) {
3601 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddb, &Assembler::paddb>(
3602 this, dst, lhs, rhs);
3605void LiftoffAssembler::emit_i8x16_add_sat_s(LiftoffRegister dst,
3606 LiftoffRegister lhs,
3607 LiftoffRegister rhs) {
3608 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddsb, &Assembler::paddsb>(
3609 this, dst, lhs, rhs);
3612void LiftoffAssembler::emit_i8x16_add_sat_u(LiftoffRegister dst,
3613 LiftoffRegister lhs,
3614 LiftoffRegister rhs) {
3615 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddusb, &Assembler::paddusb>(
3616 this, dst, lhs, rhs);
3619void LiftoffAssembler::emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs,
3620 LiftoffRegister rhs) {
3621 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubb, &Assembler::psubb>(
3622 this, dst, lhs, rhs);
3625void LiftoffAssembler::emit_i8x16_sub_sat_s(LiftoffRegister dst,
3626 LiftoffRegister lhs,
3627 LiftoffRegister rhs) {
3628 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubsb, &Assembler::psubsb>(
3629 this, dst, lhs, rhs);
3632void LiftoffAssembler::emit_i8x16_sub_sat_u(LiftoffRegister dst,
3633 LiftoffRegister lhs,
3634 LiftoffRegister rhs) {
3635 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubusb,
3636 &Assembler::psubusb>(
this, dst, lhs,
3640void LiftoffAssembler::emit_i8x16_min_s(LiftoffRegister dst,
3641 LiftoffRegister lhs,
3642 LiftoffRegister rhs) {
3643 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsb, &Assembler::pminsb>(
3644 this, dst, lhs, rhs, SSE4_1);
3647void LiftoffAssembler::emit_i8x16_min_u(LiftoffRegister dst,
3648 LiftoffRegister lhs,
3649 LiftoffRegister rhs) {
3650 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminub, &Assembler::pminub>(
3651 this, dst, lhs, rhs);
3654void LiftoffAssembler::emit_i8x16_max_s(LiftoffRegister dst,
3655 LiftoffRegister lhs,
3656 LiftoffRegister rhs) {
3657 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsb, &Assembler::pmaxsb>(
3658 this, dst, lhs, rhs, SSE4_1);
3661void LiftoffAssembler::emit_i8x16_max_u(LiftoffRegister dst,
3662 LiftoffRegister lhs,
3663 LiftoffRegister rhs) {
3664 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxub, &Assembler::pmaxub>(
3665 this, dst, lhs, rhs);
3668void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst,
3669 LiftoffRegister src) {
3670 if (dst.fp() == src.fp()) {
3671 Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3672 Psignw(dst.fp(), liftoff::kScratchDoubleReg);
3674 Pxor(dst.fp(), dst.fp());
3675 Psubw(dst.fp(), src.fp());
3679void LiftoffAssembler::emit_i16x8_alltrue(LiftoffRegister dst,
3680 LiftoffRegister src) {
3681 liftoff::EmitAllTrue<&MacroAssembler::Pcmpeqw>(
this, dst, src);
3684void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst,
3685 LiftoffRegister src) {
3686 XMMRegister tmp = liftoff::kScratchDoubleReg;
3687 Packsswb(tmp, src.fp());
3688 Pmovmskb(dst.gp(), tmp);
3692void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs,
3693 LiftoffRegister rhs) {
3694 liftoff::EmitSimdShiftOp<&Assembler::vpsllw, &Assembler::psllw, 4>(
this, dst,
3698void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs,
3700 liftoff::EmitSimdShiftOpImm<&Assembler::vpsllw, &Assembler::psllw, 4>(
3701 this, dst, lhs, rhs);
3704void LiftoffAssembler::emit_i16x8_shr_s(LiftoffRegister dst,
3705 LiftoffRegister lhs,
3706 LiftoffRegister rhs) {
3707 liftoff::EmitSimdShiftOp<&Assembler::vpsraw, &Assembler::psraw, 4>(
this, dst,
3711void LiftoffAssembler::emit_i16x8_shri_s(LiftoffRegister dst,
3712 LiftoffRegister lhs, int32_t rhs) {
3713 liftoff::EmitSimdShiftOpImm<&Assembler::vpsraw, &Assembler::psraw, 4>(
3714 this, dst, lhs, rhs);
3717void LiftoffAssembler::emit_i16x8_shr_u(LiftoffRegister dst,
3718 LiftoffRegister lhs,
3719 LiftoffRegister rhs) {
3720 liftoff::EmitSimdShiftOp<&Assembler::vpsrlw, &Assembler::psrlw, 4>(
this, dst,
3724void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst,
3725 LiftoffRegister lhs, int32_t rhs) {
3726 liftoff::EmitSimdShiftOpImm<&Assembler::vpsrlw, &Assembler::psrlw, 4>(
3727 this, dst, lhs, rhs);
3730void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
3731 LiftoffRegister rhs) {
3732 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddw, &Assembler::paddw>(
3733 this, dst, lhs, rhs);
3736void LiftoffAssembler::emit_i16x8_add_sat_s(LiftoffRegister dst,
3737 LiftoffRegister lhs,
3738 LiftoffRegister rhs) {
3739 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddsw, &Assembler::paddsw>(
3740 this, dst, lhs, rhs);
3743void LiftoffAssembler::emit_i16x8_add_sat_u(LiftoffRegister dst,
3744 LiftoffRegister lhs,
3745 LiftoffRegister rhs) {
3746 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddusw, &Assembler::paddusw>(
3747 this, dst, lhs, rhs);
3750void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
3751 LiftoffRegister rhs) {
3752 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubw, &Assembler::psubw>(
3753 this, dst, lhs, rhs);
3756void LiftoffAssembler::emit_i16x8_sub_sat_s(LiftoffRegister dst,
3757 LiftoffRegister lhs,
3758 LiftoffRegister rhs) {
3759 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubsw, &Assembler::psubsw>(
3760 this, dst, lhs, rhs);
3763void LiftoffAssembler::emit_i16x8_sub_sat_u(LiftoffRegister dst,
3764 LiftoffRegister lhs,
3765 LiftoffRegister rhs) {
3766 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubusw,
3767 &Assembler::psubusw>(
this, dst, lhs,
3771void LiftoffAssembler::emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs,
3772 LiftoffRegister rhs) {
3773 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmullw, &Assembler::pmullw>(
3774 this, dst, lhs, rhs);
3777void LiftoffAssembler::emit_i16x8_min_s(LiftoffRegister dst,
3778 LiftoffRegister lhs,
3779 LiftoffRegister rhs) {
3780 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsw, &Assembler::pminsw>(
3781 this, dst, lhs, rhs);
3784void LiftoffAssembler::emit_i16x8_min_u(LiftoffRegister dst,
3785 LiftoffRegister lhs,
3786 LiftoffRegister rhs) {
3787 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminuw, &Assembler::pminuw>(
3788 this, dst, lhs, rhs, SSE4_1);
3791void LiftoffAssembler::emit_i16x8_max_s(LiftoffRegister dst,
3792 LiftoffRegister lhs,
3793 LiftoffRegister rhs) {
3794 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsw, &Assembler::pmaxsw>(
3795 this, dst, lhs, rhs);
3798void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
3799 LiftoffRegister lhs,
3800 LiftoffRegister rhs) {
3801 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxuw, &Assembler::pmaxuw>(
3802 this, dst, lhs, rhs, SSE4_1);
3805void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_s(LiftoffRegister dst,
3806 LiftoffRegister src) {
3807 I16x8ExtAddPairwiseI8x16S(dst.fp(), src.fp(), liftoff::kScratchDoubleReg,
3808 GetUnusedRegister(kGpReg, {}).gp());
3811void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_u(LiftoffRegister dst,
3812 LiftoffRegister src) {
3813 I16x8ExtAddPairwiseI8x16U(dst.fp(), src.fp(),
3814 GetUnusedRegister(kGpReg, {}).gp());
3817void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,
3818 LiftoffRegister src1,
3819 LiftoffRegister src2) {
3820 I16x8ExtMulLow(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg,
3824void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,
3825 LiftoffRegister src1,
3826 LiftoffRegister src2) {
3827 I16x8ExtMulLow(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg,
3831void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,
3832 LiftoffRegister src1,
3833 LiftoffRegister src2) {
3834 I16x8ExtMulHighS(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg);
3837void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,
3838 LiftoffRegister src1,
3839 LiftoffRegister src2) {
3840 I16x8ExtMulHighU(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg);
3843void LiftoffAssembler::emit_i16x8_q15mulr_sat_s(LiftoffRegister dst,
3844 LiftoffRegister src1,
3845 LiftoffRegister src2) {
3846 I16x8Q15MulRSatS(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg);
3849void LiftoffAssembler::emit_i16x8_relaxed_q15mulr_s(LiftoffRegister dst,
3850 LiftoffRegister src1,
3851 LiftoffRegister src2) {
3853 Pmulhrsw(dst.fp(), src1.fp(), src2.fp());
3855 movdqa(dst.fp(), src1.fp());
3856 pmulhrsw(dst.fp(), src2.fp());
3860void LiftoffAssembler::emit_i16x8_dot_i8x16_i7x16_s(LiftoffRegister dst,
3861 LiftoffRegister lhs,
3862 LiftoffRegister rhs) {
3863 I16x8DotI8x16I7x16S(dst.fp(), lhs.fp(), rhs.fp());
3866void LiftoffAssembler::emit_i32x4_dot_i8x16_i7x16_add_s(LiftoffRegister dst,
3867 LiftoffRegister lhs,
3868 LiftoffRegister rhs,
3869 LiftoffRegister acc) {
3871 LiftoffRegister tmp1 =
3872 GetUnusedRegister(tmp_rc, LiftoffRegList{dst, lhs, rhs, acc});
3873 LiftoffRegister tmp2 =
3874 GetUnusedRegister(tmp_rc, LiftoffRegList{dst, lhs, rhs, acc, tmp1});
3875 I32x4DotI8x16I7x16AddS(dst.fp(), lhs.fp(), rhs.fp(), acc.fp(), tmp1.fp(),
3879void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
3880 LiftoffRegister src) {
3881 if (dst.fp() == src.fp()) {
3882 Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3883 Psignd(dst.fp(), liftoff::kScratchDoubleReg);
3885 Pxor(dst.fp(), dst.fp());
3886 Psubd(dst.fp(), src.fp());
3890void LiftoffAssembler::emit_i32x4_alltrue(LiftoffRegister dst,
3891 LiftoffRegister src) {
3892 liftoff::EmitAllTrue<&MacroAssembler::Pcmpeqd>(
this, dst, src);
3895void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst,
3896 LiftoffRegister src) {
3897 Movmskps(dst.gp(), src.fp());
3900void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs,
3901 LiftoffRegister rhs) {
3902 liftoff::EmitSimdShiftOp<&Assembler::vpslld, &Assembler::pslld, 5>(
this, dst,
3906void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs,
3908 liftoff::EmitSimdShiftOpImm<&Assembler::vpslld, &Assembler::pslld, 5>(
3909 this, dst, lhs, rhs);
3912void LiftoffAssembler::emit_i32x4_shr_s(LiftoffRegister dst,
3913 LiftoffRegister lhs,
3914 LiftoffRegister rhs) {
3915 liftoff::EmitSimdShiftOp<&Assembler::vpsrad, &Assembler::psrad, 5>(
this, dst,
3919void LiftoffAssembler::emit_i32x4_shri_s(LiftoffRegister dst,
3920 LiftoffRegister lhs, int32_t rhs) {
3921 liftoff::EmitSimdShiftOpImm<&Assembler::vpsrad, &Assembler::psrad, 5>(
3922 this, dst, lhs, rhs);
3925void LiftoffAssembler::emit_i32x4_shr_u(LiftoffRegister dst,
3926 LiftoffRegister lhs,
3927 LiftoffRegister rhs) {
3928 liftoff::EmitSimdShiftOp<&Assembler::vpsrld, &Assembler::psrld, 5>(
this, dst,
3932void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst,
3933 LiftoffRegister lhs, int32_t rhs) {
3934 liftoff::EmitSimdShiftOpImm<&Assembler::vpsrld, &Assembler::psrld, 5>(
3935 this, dst, lhs, rhs);
3938void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
3939 LiftoffRegister rhs) {
3940 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddd, &Assembler::paddd>(
3941 this, dst, lhs, rhs);
3944void LiftoffAssembler::emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
3945 LiftoffRegister rhs) {
3946 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubd, &Assembler::psubd>(
3947 this, dst, lhs, rhs);
3950void LiftoffAssembler::emit_i32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
3951 LiftoffRegister rhs) {
3952 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmulld, &Assembler::pmulld>(
3953 this, dst, lhs, rhs, SSE4_1);
3956void LiftoffAssembler::emit_i32x4_min_s(LiftoffRegister dst,
3957 LiftoffRegister lhs,
3958 LiftoffRegister rhs) {
3959 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsd, &Assembler::pminsd>(
3960 this, dst, lhs, rhs, SSE4_1);
3963void LiftoffAssembler::emit_i32x4_min_u(LiftoffRegister dst,
3964 LiftoffRegister lhs,
3965 LiftoffRegister rhs) {
3966 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminud, &Assembler::pminud>(
3967 this, dst, lhs, rhs, SSE4_1);
3970void LiftoffAssembler::emit_i32x4_max_s(LiftoffRegister dst,
3971 LiftoffRegister lhs,
3972 LiftoffRegister rhs) {
3973 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsd, &Assembler::pmaxsd>(
3974 this, dst, lhs, rhs, SSE4_1);
3977void LiftoffAssembler::emit_i32x4_max_u(LiftoffRegister dst,
3978 LiftoffRegister lhs,
3979 LiftoffRegister rhs) {
3980 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxud, &Assembler::pmaxud>(
3981 this, dst, lhs, rhs, SSE4_1);
3984void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst,
3985 LiftoffRegister lhs,
3986 LiftoffRegister rhs) {
3987 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaddwd, &Assembler::pmaddwd>(
3988 this, dst, lhs, rhs);
3991void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_s(LiftoffRegister dst,
3992 LiftoffRegister src) {
3993 I32x4ExtAddPairwiseI16x8S(dst.fp(), src.fp(),
3994 GetUnusedRegister(kGpReg, {}).gp());
3997void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_u(LiftoffRegister dst,
3998 LiftoffRegister src) {
3999 I32x4ExtAddPairwiseI16x8U(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
4009 if (CpuFeatures::IsSupported(AVX) || dst == src1) {
4010 assm->
I32x4ExtMul(dst, src1, src2, liftoff::kScratchDoubleReg, low,
4012 }
else if (dst != src2) {
4015 assm->
I32x4ExtMul(dst, dst, src2, liftoff::kScratchDoubleReg, low,
4021 assm->
I32x4ExtMul(dst, dst, src1, liftoff::kScratchDoubleReg, low,
4027void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,
4028 LiftoffRegister src1,
4029 LiftoffRegister src2) {
4030 liftoff::I32x4ExtMulHelper(
this, dst.fp(), src1.fp(), src2.fp(),
true,
4034void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,
4035 LiftoffRegister src1,
4036 LiftoffRegister src2) {
4037 liftoff::I32x4ExtMulHelper(
this, dst.fp(), src1.fp(), src2.fp(),
true,
4041void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,
4042 LiftoffRegister src1,
4043 LiftoffRegister src2) {
4044 liftoff::I32x4ExtMulHelper(
this, dst.fp(), src1.fp(), src2.fp(),
4049void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,
4050 LiftoffRegister src1,
4051 LiftoffRegister src2) {
4052 liftoff::I32x4ExtMulHelper(
this, dst.fp(), src1.fp(), src2.fp(),
4057void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst,
4058 LiftoffRegister src) {
4059 I64x2Neg(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
4062void LiftoffAssembler::emit_i64x2_alltrue(LiftoffRegister dst,
4063 LiftoffRegister src) {
4064 liftoff::EmitAllTrue<&MacroAssembler::Pcmpeqq>(
this, dst, src, SSE4_1);
4067void LiftoffAssembler::emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs,
4068 LiftoffRegister rhs) {
4069 liftoff::EmitSimdShiftOp<&Assembler::vpsllq, &Assembler::psllq, 6>(
this, dst,
4073void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs,
4075 liftoff::EmitSimdShiftOpImm<&Assembler::vpsllq, &Assembler::psllq, 6>(
4076 this, dst, lhs, rhs);
4079void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst,
4080 LiftoffRegister lhs,
4081 LiftoffRegister rhs) {
4083 GetUnusedRegister(RegClass::kFpReg, LiftoffRegList{dst, lhs}).fp();
4085 GetUnusedRegister(RegClass::kGpReg, LiftoffRegList{rhs}).gp();
4087 I64x2ShrS(dst.fp(), lhs.fp(), rhs.gp(), liftoff::kScratchDoubleReg, tmp,
4091void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst,
4092 LiftoffRegister lhs, int32_t rhs) {
4093 I64x2ShrS(dst.fp(), lhs.fp(), rhs & 0x3F, liftoff::kScratchDoubleReg);
4096void LiftoffAssembler::emit_i64x2_shr_u(LiftoffRegister dst,
4097 LiftoffRegister lhs,
4098 LiftoffRegister rhs) {
4099 liftoff::EmitSimdShiftOp<&Assembler::vpsrlq, &Assembler::psrlq, 6>(
this, dst,
4103void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst,
4104 LiftoffRegister lhs, int32_t rhs) {
4105 liftoff::EmitSimdShiftOpImm<&Assembler::vpsrlq, &Assembler::psrlq, 6>(
4106 this, dst, lhs, rhs);
4109void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
4110 LiftoffRegister rhs) {
4111 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddq, &Assembler::paddq>(
4112 this, dst, lhs, rhs);
4115void LiftoffAssembler::emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
4116 LiftoffRegister rhs) {
4117 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubq, &Assembler::psubq>(
4118 this, dst, lhs, rhs);
4121void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
4122 LiftoffRegister rhs) {
4124 LiftoffRegister tmp1 =
4125 GetUnusedRegister(tmp_rc, LiftoffRegList{dst, lhs, rhs});
4126 LiftoffRegister tmp2 =
4127 GetUnusedRegister(tmp_rc, LiftoffRegList{dst, lhs, rhs, tmp1});
4128 I64x2Mul(dst.fp(), lhs.fp(), rhs.fp(), tmp1.fp(), tmp2.fp());
4131void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,
4132 LiftoffRegister src1,
4133 LiftoffRegister src2) {
4134 I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg,
4138void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,
4139 LiftoffRegister src1,
4140 LiftoffRegister src2) {
4141 I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg,
4145void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,
4146 LiftoffRegister src1,
4147 LiftoffRegister src2) {
4148 I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg,
4152void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,
4153 LiftoffRegister src1,
4154 LiftoffRegister src2) {
4155 I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg,
4159void LiftoffAssembler::emit_i64x2_bitmask(LiftoffRegister dst,
4160 LiftoffRegister src) {
4161 Movmskpd(dst.gp(), src.fp());
4164void LiftoffAssembler::emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst,
4165 LiftoffRegister src) {
4166 Pmovsxdq(dst.fp(), src.fp());
4169void LiftoffAssembler::emit_i64x2_sconvert_i32x4_high(LiftoffRegister dst,
4170 LiftoffRegister src) {
4171 I64x2SConvertI32x4High(dst.fp(), src.fp());
4174void LiftoffAssembler::emit_i64x2_uconvert_i32x4_low(LiftoffRegister dst,
4175 LiftoffRegister src) {
4176 Pmovzxdq(dst.fp(), src.fp());
4179void LiftoffAssembler::emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,
4180 LiftoffRegister src) {
4181 I64x2UConvertI32x4High(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
4184void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
4185 LiftoffRegister src) {
4186 Register tmp = GetUnusedRegister(kGpReg, {}).gp();
4187 Absps(dst.fp(), src.fp(), tmp);
4190void LiftoffAssembler::emit_f32x4_neg(LiftoffRegister dst,
4191 LiftoffRegister src) {
4192 Register tmp = GetUnusedRegister(kGpReg, {}).gp();
4193 Negps(dst.fp(), src.fp(), tmp);
4196void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst,
4197 LiftoffRegister src) {
4198 Sqrtps(dst.fp(), src.fp());
4201bool LiftoffAssembler::emit_f32x4_ceil(LiftoffRegister dst,
4202 LiftoffRegister src) {
4204 Roundps(dst.fp(), src.fp(),
kRoundUp);
4208bool LiftoffAssembler::emit_f32x4_floor(LiftoffRegister dst,
4209 LiftoffRegister src) {
4215bool LiftoffAssembler::emit_f32x4_trunc(LiftoffRegister dst,
4216 LiftoffRegister src) {
4222bool LiftoffAssembler::emit_f32x4_nearest_int(LiftoffRegister dst,
4223 LiftoffRegister src) {
4229void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
4230 LiftoffRegister rhs) {
4232 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vaddps, &Assembler::addps>(
4233 this, dst, lhs, rhs);
4236void LiftoffAssembler::emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
4237 LiftoffRegister rhs) {
4238 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vsubps, &Assembler::subps>(
4239 this, dst, lhs, rhs);
4242void LiftoffAssembler::emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
4243 LiftoffRegister rhs) {
4244 liftoff::EmitSimdCommutativeBinOp<&Assembler::vmulps, &Assembler::mulps>(
4245 this, dst, lhs, rhs);
4248void LiftoffAssembler::emit_f32x4_div(LiftoffRegister dst, LiftoffRegister lhs,
4249 LiftoffRegister rhs) {
4250 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vdivps, &Assembler::divps>(
4251 this, dst, lhs, rhs);
4254void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs,
4255 LiftoffRegister rhs) {
4256 F32x4Min(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
4259void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
4260 LiftoffRegister rhs) {
4261 F32x4Max(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
4264void LiftoffAssembler::emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs,
4265 LiftoffRegister rhs) {
4267 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vminps, &Assembler::minps>(
4268 this, dst, rhs, lhs);
4271void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,
4272 LiftoffRegister rhs) {
4274 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vmaxps, &Assembler::maxps>(
4275 this, dst, rhs, lhs);
4278void LiftoffAssembler::emit_f32x4_relaxed_min(LiftoffRegister dst,
4279 LiftoffRegister lhs,
4280 LiftoffRegister rhs) {
4281 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vminps, &Assembler::minps>(
4282 this, dst, lhs, rhs);
4285void LiftoffAssembler::emit_f32x4_relaxed_max(LiftoffRegister dst,
4286 LiftoffRegister lhs,
4287 LiftoffRegister rhs) {
4288 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vmaxps, &Assembler::maxps>(
4289 this, dst, lhs, rhs);
4292void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
4293 LiftoffRegister src) {
4294 Register tmp = GetUnusedRegister(kGpReg, {}).gp();
4295 Abspd(dst.fp(), src.fp(), tmp);
4298void LiftoffAssembler::emit_f64x2_neg(LiftoffRegister dst,
4299 LiftoffRegister src) {
4300 Register tmp = GetUnusedRegister(kGpReg, {}).gp();
4301 Negpd(dst.fp(), src.fp(), tmp);
4304void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst,
4305 LiftoffRegister src) {
4306 Sqrtpd(dst.fp(), src.fp());
4309bool LiftoffAssembler::emit_f64x2_ceil(LiftoffRegister dst,
4310 LiftoffRegister src) {
4312 Roundpd(dst.fp(), src.fp(),
kRoundUp);
4316bool LiftoffAssembler::emit_f64x2_floor(LiftoffRegister dst,
4317 LiftoffRegister src) {
4323bool LiftoffAssembler::emit_f64x2_trunc(LiftoffRegister dst,
4324 LiftoffRegister src) {
4330bool LiftoffAssembler::emit_f64x2_nearest_int(LiftoffRegister dst,
4331 LiftoffRegister src) {
4337void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
4338 LiftoffRegister rhs) {
4339 liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddpd, &Assembler::addpd>(
4340 this, dst, lhs, rhs);
4343void LiftoffAssembler::emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
4344 LiftoffRegister rhs) {
4345 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vsubpd, &Assembler::subpd>(
4346 this, dst, lhs, rhs);
4349void LiftoffAssembler::emit_f64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
4350 LiftoffRegister rhs) {
4351 liftoff::EmitSimdCommutativeBinOp<&Assembler::vmulpd, &Assembler::mulpd>(
4352 this, dst, lhs, rhs);
4355void LiftoffAssembler::emit_f64x2_div(LiftoffRegister dst, LiftoffRegister lhs,
4356 LiftoffRegister rhs) {
4357 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vdivpd, &Assembler::divpd>(
4358 this, dst, lhs, rhs);
4361void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs,
4362 LiftoffRegister rhs) {
4363 F64x2Min(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
4366void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
4367 LiftoffRegister rhs) {
4368 F64x2Max(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
4371void LiftoffAssembler::emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs,
4372 LiftoffRegister rhs) {
4374 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vminpd, &Assembler::minpd>(
4375 this, dst, rhs, lhs);
4378void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
4379 LiftoffRegister rhs) {
4381 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vmaxpd, &Assembler::maxpd>(
4382 this, dst, rhs, lhs);
4385void LiftoffAssembler::emit_f64x2_relaxed_min(LiftoffRegister dst,
4386 LiftoffRegister lhs,
4387 LiftoffRegister rhs) {
4388 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vminpd, &Assembler::minpd>(
4389 this, dst, lhs, rhs);
4392void LiftoffAssembler::emit_f64x2_relaxed_max(LiftoffRegister dst,
4393 LiftoffRegister lhs,
4394 LiftoffRegister rhs) {
4395 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vmaxpd, &Assembler::maxpd>(
4396 this, dst, lhs, rhs);
4399void LiftoffAssembler::emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,
4400 LiftoffRegister src) {
4401 Cvtdq2pd(dst.fp(), src.fp());
4404void LiftoffAssembler::emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst,
4405 LiftoffRegister src) {
4406 Register tmp = GetUnusedRegister(kGpReg, {}).gp();
4407 F64x2ConvertLowI32x4U(dst.fp(), src.fp(), tmp);
4410void LiftoffAssembler::emit_f64x2_promote_low_f32x4(LiftoffRegister dst,
4411 LiftoffRegister src) {
4412 Cvtps2pd(dst.fp(), src.fp());
4415void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
4416 LiftoffRegister src) {
4417 Register tmp = GetUnusedRegister(kGpReg, {}).gp();
4418 I32x4SConvertF32x4(dst.fp(), src.fp(), liftoff::kScratchDoubleReg, tmp);
4421void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst,
4422 LiftoffRegister src) {
4425 GetUnusedRegister(tmp_rc, LiftoffRegList{dst, src}).fp();
4426 I32x4TruncF32x4U(dst.fp(), src.fp(), liftoff::kScratchDoubleReg, scratch2);
4429void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst,
4430 LiftoffRegister src) {
4431 Cvtdq2ps(dst.fp(), src.fp());
4434void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst,
4435 LiftoffRegister src) {
4436 Pxor(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
4437 Pblendw(liftoff::kScratchDoubleReg, src.fp(),
4440 CpuFeatureScope scope(
this, AVX);
4441 vpsubd(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
4443 if (dst.fp() != src.fp()) movaps(dst.fp(), src.fp());
4444 psubd(dst.fp(), liftoff::kScratchDoubleReg);
4446 Cvtdq2ps(liftoff::kScratchDoubleReg,
4447 liftoff::kScratchDoubleReg);
4448 Psrld(dst.fp(), dst.fp(), uint8_t{1});
4449 Cvtdq2ps(dst.fp(), dst.fp());
4450 Addps(dst.fp(), dst.fp(), dst.fp());
4451 Addps(dst.fp(), dst.fp(),
4452 liftoff::kScratchDoubleReg);
4455void LiftoffAssembler::emit_f32x4_demote_f64x2_zero(LiftoffRegister dst,
4456 LiftoffRegister src) {
4457 Cvtpd2ps(dst.fp(), src.fp());
4460void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst,
4461 LiftoffRegister lhs,
4462 LiftoffRegister rhs) {
4463 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpacksswb,
4464 &Assembler::packsswb>(
this, dst, lhs,
4468void LiftoffAssembler::emit_i8x16_uconvert_i16x8(LiftoffRegister dst,
4469 LiftoffRegister lhs,
4470 LiftoffRegister rhs) {
4471 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpackuswb,
4472 &Assembler::packuswb>(
this, dst, lhs,
4476void LiftoffAssembler::emit_i16x8_sconvert_i32x4(LiftoffRegister dst,
4477 LiftoffRegister lhs,
4478 LiftoffRegister rhs) {
4479 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpackssdw,
4480 &Assembler::packssdw>(
this, dst, lhs,
4484void LiftoffAssembler::emit_i16x8_uconvert_i32x4(LiftoffRegister dst,
4485 LiftoffRegister lhs,
4486 LiftoffRegister rhs) {
4487 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpackusdw,
4488 &Assembler::packusdw>(
this, dst, lhs,
4492void LiftoffAssembler::emit_i16x8_sconvert_i8x16_low(LiftoffRegister dst,
4493 LiftoffRegister src) {
4494 Pmovsxbw(dst.fp(), src.fp());
4497void LiftoffAssembler::emit_i16x8_sconvert_i8x16_high(LiftoffRegister dst,
4498 LiftoffRegister src) {
4499 I16x8SConvertI8x16High(dst.fp(), src.fp());
4502void LiftoffAssembler::emit_i16x8_uconvert_i8x16_low(LiftoffRegister dst,
4503 LiftoffRegister src) {
4504 Pmovzxbw(dst.fp(), src.fp());
4507void LiftoffAssembler::emit_i16x8_uconvert_i8x16_high(LiftoffRegister dst,
4508 LiftoffRegister src) {
4509 I16x8UConvertI8x16High(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
4512void LiftoffAssembler::emit_i32x4_sconvert_i16x8_low(LiftoffRegister dst,
4513 LiftoffRegister src) {
4514 Pmovsxwd(dst.fp(), src.fp());
4517void LiftoffAssembler::emit_i32x4_sconvert_i16x8_high(LiftoffRegister dst,
4518 LiftoffRegister src) {
4519 I32x4SConvertI16x8High(dst.fp(), src.fp());
4522void LiftoffAssembler::emit_i32x4_uconvert_i16x8_low(LiftoffRegister dst,
4523 LiftoffRegister src) {
4524 Pmovzxwd(dst.fp(), src.fp());
4527void LiftoffAssembler::emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst,
4528 LiftoffRegister src) {
4529 I32x4UConvertI16x8High(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
4532void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_s_zero(LiftoffRegister dst,
4533 LiftoffRegister src) {
4534 Register tmp = GetUnusedRegister(kGpReg, {}).gp();
4535 I32x4TruncSatF64x2SZero(dst.fp(), src.fp(), liftoff::kScratchDoubleReg, tmp);
4538void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_u_zero(LiftoffRegister dst,
4539 LiftoffRegister src) {
4540 Register tmp = GetUnusedRegister(kGpReg, {}).gp();
4541 I32x4TruncSatF64x2UZero(dst.fp(), src.fp(), liftoff::kScratchDoubleReg, tmp);
4544void LiftoffAssembler::emit_s128_and_not(LiftoffRegister dst,
4545 LiftoffRegister lhs,
4546 LiftoffRegister rhs) {
4547 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vandnps, &Assembler::andnps>(
4548 this, dst, rhs, lhs);
4551void LiftoffAssembler::emit_i8x16_rounding_average_u(LiftoffRegister dst,
4552 LiftoffRegister lhs,
4553 LiftoffRegister rhs) {
4554 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpavgb, &Assembler::pavgb>(
4555 this, dst, lhs, rhs);
4558void LiftoffAssembler::emit_i16x8_rounding_average_u(LiftoffRegister dst,
4559 LiftoffRegister lhs,
4560 LiftoffRegister rhs) {
4561 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpavgw, &Assembler::pavgw>(
4562 this, dst, lhs, rhs);
4565void LiftoffAssembler::emit_i8x16_abs(LiftoffRegister dst,
4566 LiftoffRegister src) {
4567 Pabsb(dst.fp(), src.fp());
4570void LiftoffAssembler::emit_i16x8_abs(LiftoffRegister dst,
4571 LiftoffRegister src) {
4572 Pabsw(dst.fp(), src.fp());
4575void LiftoffAssembler::emit_i32x4_abs(LiftoffRegister dst,
4576 LiftoffRegister src) {
4577 Pabsd(dst.fp(), src.fp());
4580void LiftoffAssembler::emit_i64x2_abs(LiftoffRegister dst,
4581 LiftoffRegister src) {
4582 I64x2Abs(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
4585void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst,
4586 LiftoffRegister lhs,
4587 uint8_t imm_lane_idx) {
4588 Register byte_reg = liftoff::GetTmpByteRegister(
this, dst.gp());
4589 Pextrb(byte_reg, lhs.fp(), imm_lane_idx);
4590 movsx_b(dst.gp(), byte_reg);
4593void LiftoffAssembler::emit_i8x16_extract_lane_u(LiftoffRegister dst,
4594 LiftoffRegister lhs,
4595 uint8_t imm_lane_idx) {
4596 Pextrb(dst.gp(), lhs.fp(), imm_lane_idx);
4599void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst,
4600 LiftoffRegister lhs,
4601 uint8_t imm_lane_idx) {
4602 Pextrw(dst.gp(), lhs.fp(), imm_lane_idx);
4603 movsx_w(dst.gp(), dst.gp());
4606void LiftoffAssembler::emit_i16x8_extract_lane_u(LiftoffRegister dst,
4607 LiftoffRegister lhs,
4608 uint8_t imm_lane_idx) {
4609 Pextrw(dst.gp(), lhs.fp(), imm_lane_idx);
4612void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst,
4613 LiftoffRegister lhs,
4614 uint8_t imm_lane_idx) {
4615 Pextrd(dst.gp(), lhs.fp(), imm_lane_idx);
4618void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst,
4619 LiftoffRegister lhs,
4620 uint8_t imm_lane_idx) {
4621 Pextrd(dst.low_gp(), lhs.fp(), imm_lane_idx * 2);
4622 Pextrd(dst.high_gp(), lhs.fp(), imm_lane_idx * 2 + 1);
4625void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
4626 LiftoffRegister lhs,
4627 uint8_t imm_lane_idx) {
4628 F32x4ExtractLane(dst.fp(), lhs.fp(), imm_lane_idx);
4631void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
4632 LiftoffRegister lhs,
4633 uint8_t imm_lane_idx) {
4634 F64x2ExtractLane(dst.fp(), lhs.fp(), imm_lane_idx);
4637void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
4638 LiftoffRegister src1,
4639 LiftoffRegister src2,
4640 uint8_t imm_lane_idx) {
4642 CpuFeatureScope scope(
this, AVX);
4643 vpinsrb(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
4645 CpuFeatureScope scope(
this, SSE4_1);
4646 if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
4647 pinsrb(dst.fp(), src2.gp(), imm_lane_idx);
4651void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
4652 LiftoffRegister src1,
4653 LiftoffRegister src2,
4654 uint8_t imm_lane_idx) {
4656 CpuFeatureScope scope(
this, AVX);
4657 vpinsrw(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
4659 if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
4660 pinsrw(dst.fp(), src2.gp(), imm_lane_idx);
4664void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst,
4665 LiftoffRegister src1,
4666 LiftoffRegister src2,
4667 uint8_t imm_lane_idx) {
4669 CpuFeatureScope scope(
this, AVX);
4670 vpinsrd(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
4672 CpuFeatureScope scope(
this, SSE4_1);
4673 if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
4674 pinsrd(dst.fp(), src2.gp(), imm_lane_idx);
4678void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst,
4679 LiftoffRegister src1,
4680 LiftoffRegister src2,
4681 uint8_t imm_lane_idx) {
4683 CpuFeatureScope scope(
this, AVX);
4684 vpinsrd(dst.fp(), src1.fp(), src2.low_gp(), imm_lane_idx * 2);
4685 vpinsrd(dst.fp(), dst.fp(), src2.high_gp(), imm_lane_idx * 2 + 1);
4687 CpuFeatureScope scope(
this, SSE4_1);
4688 if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
4689 pinsrd(dst.fp(), src2.low_gp(), imm_lane_idx * 2);
4690 pinsrd(dst.fp(), src2.high_gp(), imm_lane_idx * 2 + 1);
4694void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
4695 LiftoffRegister src1,
4696 LiftoffRegister src2,
4697 uint8_t imm_lane_idx) {
4699 CpuFeatureScope scope(
this, AVX);
4700 vinsertps(dst.fp(), src1.fp(), src2.fp(), (imm_lane_idx << 4) & 0x30);
4702 CpuFeatureScope scope(
this, SSE4_1);
4703 if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
4704 insertps(dst.fp(), src2.fp(), (imm_lane_idx << 4) & 0x30);
4708void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
4709 LiftoffRegister src1,
4710 LiftoffRegister src2,
4711 uint8_t imm_lane_idx) {
4712 F64x2ReplaceLane(dst.fp(), src1.fp(), src2.fp(), imm_lane_idx);
4715void LiftoffAssembler::emit_f32x4_qfma(LiftoffRegister dst,
4716 LiftoffRegister src1,
4717 LiftoffRegister src2,
4718 LiftoffRegister src3) {
4719 F32x4Qfma(dst.fp(), src1.fp(), src2.fp(), src3.fp(),
4720 liftoff::kScratchDoubleReg);
4723void LiftoffAssembler::emit_f32x4_qfms(LiftoffRegister dst,
4724 LiftoffRegister src1,
4725 LiftoffRegister src2,
4726 LiftoffRegister src3) {
4727 F32x4Qfms(dst.fp(), src1.fp(), src2.fp(), src3.fp(),
4728 liftoff::kScratchDoubleReg);
4731void LiftoffAssembler::emit_f64x2_qfma(LiftoffRegister dst,
4732 LiftoffRegister src1,
4733 LiftoffRegister src2,
4734 LiftoffRegister src3) {
4735 F64x2Qfma(dst.fp(), src1.fp(), src2.fp(), src3.fp(),
4736 liftoff::kScratchDoubleReg);
4739void LiftoffAssembler::emit_f64x2_qfms(LiftoffRegister dst,
4740 LiftoffRegister src1,
4741 LiftoffRegister src2,
4742 LiftoffRegister src3) {
4743 F64x2Qfms(dst.fp(), src1.fp(), src2.fp(), src3.fp(),
4744 liftoff::kScratchDoubleReg);
4747bool LiftoffAssembler::emit_f16x8_splat(LiftoffRegister dst,
4748 LiftoffRegister src) {
4752bool LiftoffAssembler::emit_f16x8_extract_lane(LiftoffRegister dst,
4753 LiftoffRegister lhs,
4754 uint8_t imm_lane_idx) {
4758bool LiftoffAssembler::emit_f16x8_replace_lane(LiftoffRegister dst,
4759 LiftoffRegister src1,
4760 LiftoffRegister src2,
4761 uint8_t imm_lane_idx) {
4765bool LiftoffAssembler::emit_f16x8_abs(LiftoffRegister dst,
4766 LiftoffRegister src) {
4770bool LiftoffAssembler::emit_f16x8_neg(LiftoffRegister dst,
4771 LiftoffRegister src) {
4775bool LiftoffAssembler::emit_f16x8_sqrt(LiftoffRegister dst,
4776 LiftoffRegister src) {
4780bool LiftoffAssembler::emit_f16x8_ceil(LiftoffRegister dst,
4781 LiftoffRegister src) {
4785bool LiftoffAssembler::emit_f16x8_floor(LiftoffRegister dst,
4786 LiftoffRegister src) {
4790bool LiftoffAssembler::emit_f16x8_trunc(LiftoffRegister dst,
4791 LiftoffRegister src) {
4795bool LiftoffAssembler::emit_f16x8_nearest_int(LiftoffRegister dst,
4796 LiftoffRegister src) {
4800bool LiftoffAssembler::emit_f16x8_eq(LiftoffRegister dst, LiftoffRegister lhs,
4801 LiftoffRegister rhs) {
4805bool LiftoffAssembler::emit_f16x8_ne(LiftoffRegister dst, LiftoffRegister lhs,
4806 LiftoffRegister rhs) {
4810bool LiftoffAssembler::emit_f16x8_lt(LiftoffRegister dst, LiftoffRegister lhs,
4811 LiftoffRegister rhs) {
4815bool LiftoffAssembler::emit_f16x8_le(LiftoffRegister dst, LiftoffRegister lhs,
4816 LiftoffRegister rhs) {
4820bool LiftoffAssembler::emit_f16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
4821 LiftoffRegister rhs) {
4825bool LiftoffAssembler::emit_f16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
4826 LiftoffRegister rhs) {
4830bool LiftoffAssembler::emit_f16x8_mul(LiftoffRegister dst, LiftoffRegister lhs,
4831 LiftoffRegister rhs) {
4835bool LiftoffAssembler::emit_f16x8_div(LiftoffRegister dst, LiftoffRegister lhs,
4836 LiftoffRegister rhs) {
4840bool LiftoffAssembler::emit_f16x8_min(LiftoffRegister dst, LiftoffRegister lhs,
4841 LiftoffRegister rhs) {
4845bool LiftoffAssembler::emit_f16x8_max(LiftoffRegister dst, LiftoffRegister lhs,
4846 LiftoffRegister rhs) {
4850bool LiftoffAssembler::emit_f16x8_pmin(LiftoffRegister dst, LiftoffRegister lhs,
4851 LiftoffRegister rhs) {
4855bool LiftoffAssembler::emit_f16x8_pmax(LiftoffRegister dst, LiftoffRegister lhs,
4856 LiftoffRegister rhs) {
4860bool LiftoffAssembler::emit_i16x8_sconvert_f16x8(LiftoffRegister dst,
4861 LiftoffRegister src) {
4865bool LiftoffAssembler::emit_i16x8_uconvert_f16x8(LiftoffRegister dst,
4866 LiftoffRegister src) {
4870bool LiftoffAssembler::emit_f16x8_sconvert_i16x8(LiftoffRegister dst,
4871 LiftoffRegister src) {
4875bool LiftoffAssembler::emit_f16x8_uconvert_i16x8(LiftoffRegister dst,
4876 LiftoffRegister src) {
4880bool LiftoffAssembler::emit_f16x8_demote_f32x4_zero(LiftoffRegister dst,
4881 LiftoffRegister src) {
4885bool LiftoffAssembler::emit_f16x8_demote_f64x2_zero(LiftoffRegister dst,
4886 LiftoffRegister src) {
4890bool LiftoffAssembler::emit_f32x4_promote_low_f16x8(LiftoffRegister dst,
4891 LiftoffRegister src) {
4895bool LiftoffAssembler::emit_f16x8_qfma(LiftoffRegister dst,
4896 LiftoffRegister src1,
4897 LiftoffRegister src2,
4898 LiftoffRegister src3) {
4902bool LiftoffAssembler::emit_f16x8_qfms(LiftoffRegister dst,
4903 LiftoffRegister src1,
4904 LiftoffRegister src2,
4905 LiftoffRegister src3) {
4909bool LiftoffAssembler::supports_f16_mem_access() {
return false; }
4911void LiftoffAssembler::StackCheck(Label* ool_code) {
4916void LiftoffAssembler::AssertUnreachable(
AbortReason reason) {
4920void LiftoffAssembler::PushRegisters(LiftoffRegList regs) {
4921 LiftoffRegList gp_regs = regs & kGpCacheRegList;
4922 while (!gp_regs.is_empty()) {
4923 LiftoffRegister
reg = gp_regs.GetFirstRegSet();
4927 LiftoffRegList fp_regs = regs & kFpCacheRegList;
4928 unsigned num_fp_regs = fp_regs.GetNumRegsSet();
4932 while (!fp_regs.is_empty()) {
4933 LiftoffRegister
reg = fp_regs.GetFirstRegSet();
4942void LiftoffAssembler::PopRegisters(LiftoffRegList regs) {
4943 LiftoffRegList fp_regs = regs & kFpCacheRegList;
4944 unsigned fp_offset = 0;
4945 while (!fp_regs.is_empty()) {
4946 LiftoffRegister
reg = fp_regs.GetFirstRegSet();
4947 Movdqu(
reg.fp(), Operand(esp, fp_offset));
4951 if (fp_offset) add(esp, Immediate(fp_offset));
4952 LiftoffRegList gp_regs = regs & kGpCacheRegList;
4953 while (!gp_regs.is_empty()) {
4954 LiftoffRegister
reg = gp_regs.GetLastRegSet();
4960void LiftoffAssembler::RecordSpillsInSafepoint(
4961 SafepointTableBuilder::Safepoint& safepoint, LiftoffRegList all_spills,
4962 LiftoffRegList ref_spills,
int spill_offset) {
4963 LiftoffRegList fp_spills = all_spills & kFpCacheRegList;
4964 int spill_space_size = fp_spills.GetNumRegsSet() *
kSimd128Size;
4965 LiftoffRegList gp_spills = all_spills & kGpCacheRegList;
4966 while (!gp_spills.is_empty()) {
4967 LiftoffRegister
reg = gp_spills.GetFirstRegSet();
4968 if (ref_spills.has(
reg)) {
4969 safepoint.DefineTaggedStackSlot(spill_offset);
4971 gp_spills.clear(
reg);
4976 RecordOolSpillSpaceSize(spill_space_size);
4979void LiftoffAssembler::DropStackSlotsAndRet(uint32_t num_stack_slots) {
4985void LiftoffAssembler::CallCWithStackBuffer(
4986 const std::initializer_list<VarState>
args,
const LiftoffRegister* rets,
4987 ValueKind return_kind, ValueKind out_argument_kind,
int stack_bytes,
4988 ExternalReference ext_ref) {
4989 AllocateStackSpace(stack_bytes);
4992 for (
const VarState& arg :
args) {
4994 liftoff::Store(
this, esp, arg_offset, arg.reg(), arg.kind());
4995 }
else if (arg.is_const()) {
4997 mov(Operand(esp, arg_offset), Immediate(arg.i32_const()));
5002 push(liftoff::GetStackSlot(arg.offset()));
5003 pop(Operand(esp, arg_offset));
5006 push(liftoff::GetStackSlot(arg.offset()));
5007 pop(Operand(esp, arg_offset + 4));
5008 push(liftoff::GetStackSlot(arg.offset() + 4));
5009 pop(Operand(esp, arg_offset));
5016 constexpr Register kArgumentBuffer = ecx;
5017 constexpr int kNumCCallArgs = 1;
5018 mov(kArgumentBuffer, esp);
5019 PrepareCallCFunction(kNumCCallArgs, kScratch);
5023 mov(Operand(esp, 0), kArgumentBuffer);
5026 CallCFunction(ext_ref, kNumCCallArgs);
5029 const LiftoffRegister* next_result_reg = rets;
5030 if (return_kind != kVoid) {
5031 constexpr Register kReturnReg = eax;
5032 if (kReturnReg != next_result_reg->gp()) {
5033 Move(*next_result_reg, LiftoffRegister(kReturnReg), return_kind);
5039 if (out_argument_kind != kVoid) {
5040 liftoff::Load(
this, *next_result_reg, esp, 0, out_argument_kind);
5043 add(esp, Immediate(stack_bytes));
5046void LiftoffAssembler::CallC(
const std::initializer_list<VarState>
args,
5047 ExternalReference ext_ref) {
5048 LiftoffRegList arg_regs;
5049 for (
const VarState arg :
args) {
5050 if (arg.is_reg()) arg_regs.set(arg.reg());
5054 Register scratch = usable_regs.first();
5055 int num_lowered_args = 0;
5057 for (
const VarState& arg :
args) {
5058 num_lowered_args += arg.kind() ==
kI64 ? 2 : 1;
5060 PrepareCallCFunction(num_lowered_args, scratch);
5067 auto GetNextOperand = [arg_offset = 0, num_lowered_args]()
mutable {
5069 DCHECK_GE(num_lowered_args, arg_offset);
5070 USE(num_lowered_args);
5073 for (
const VarState& arg :
args) {
5074 Operand dst = GetNextOperand();
5076 LiftoffRegister
reg = arg.reg();
5077 if (arg.kind() ==
kI64) {
5078 mov(dst,
reg.low_gp());
5079 mov(GetNextOperand(),
reg.high_gp());
5083 }
else if (arg.is_const()) {
5085 mov(dst, Immediate(arg.i32_const()));
5088 if (arg.kind() ==
kI64) {
5089 mov(scratch, liftoff::GetStackSlot(arg.offset()));
5092 mov(GetNextOperand(), scratch);
5094 mov(scratch, liftoff::GetStackSlot(arg.offset()));
5101 CallCFunction(ext_ref, num_lowered_args);
5104void LiftoffAssembler::CallNativeWasmCode(
Address addr) {
5108void LiftoffAssembler::TailCallNativeWasmCode(
Address addr) {
5112void LiftoffAssembler::CallIndirect(
const ValueKindSig*
sig,
5113 compiler::CallDescriptor* call_descriptor,
5117 DCHECK(target.is_valid());
5118 CallWasmCodePointer(target);
5121void LiftoffAssembler::TailCallIndirect(
5122 compiler::CallDescriptor* call_descriptor, Register target) {
5125 DCHECK(target.is_valid());
5129void LiftoffAssembler::CallBuiltin(
Builtin builtin) {
5135void LiftoffAssembler::AllocateStackSlot(Register addr, uint32_t size) {
5136 AllocateStackSpace(size);
5140void LiftoffAssembler::DeallocateStackSlot(uint32_t size) {
5141 add(esp, Immediate(size));
5144void LiftoffAssembler::MaybeOSR() {}
5146void LiftoffAssembler::emit_store_nonzero_if_nan(Register dst,
5157 mov(Operand(dst, 0), Immediate(1));
5161void LiftoffAssembler::emit_s128_store_nonzero_if_nan(Register dst,
5162 LiftoffRegister src,
5164 LiftoffRegister tmp_s128,
5165 ValueKind lane_kind) {
5166 if (lane_kind ==
kF32) {
5167 movaps(tmp_s128.fp(), src.fp());
5168 cmpunordps(tmp_s128.fp(), tmp_s128.fp());
5171 movapd(tmp_s128.fp(), src.fp());
5172 cmpunordpd(tmp_s128.fp(), tmp_s128.fp());
5174 pmovmskb(tmp_gp, tmp_s128.fp());
5175 or_(Operand(dst, 0), tmp_gp);
5178void LiftoffAssembler::emit_store_nonzero(Register dst) {
5179 mov(Operand(dst, 0), Immediate(1));
5182void LiftoffStackSlots::Construct(
int param_slots) {
5185 int last_stack_slot = param_slots;
5186 for (
auto& slot : slots_) {
5187 const int stack_slot = slot.dst_slot_;
5190 last_stack_slot = stack_slot;
5191 const LiftoffAssembler::VarState& src = slot.src_;
5192 switch (src.loc()) {
5193 case LiftoffAssembler::VarState::kStack:
5196 if (src.kind() ==
kS128) {
5197 asm_->AllocateStackSpace(stack_decrement);
5198 asm_->movdqu(liftoff::kScratchDoubleReg,
5199 liftoff::GetStackSlot(slot.src_offset_));
5200 asm_->movdqu(Operand(esp, 0), liftoff::kScratchDoubleReg);
5203 if (src.kind() ==
kF64) {
5206 asm_->push(liftoff::GetHalfStackSlot(slot.src_offset_, kHighWord));
5210 asm_->push(liftoff::GetHalfStackSlot(slot.src_offset_, slot.half_));
5212 case LiftoffAssembler::VarState::kRegister:
5213 if (src.kind() ==
kI64) {
5215 asm_, slot.half_ == kLowWord ? src.reg().low() : src.reg().high(),
5218 int pushed_bytes = SlotSizeInBytes(slot);
5219 liftoff::push(
asm_, src.reg(), src.kind(),
5220 stack_decrement - pushed_bytes);
5223 case LiftoffAssembler::VarState::kIntConst:
5226 asm_->push(Immediate(slot.half_ == kLowWord ? src.i32_const()
5227 : src.i32_const() >> 31));
5233#undef RETURN_FALSE_IF_MISSING_CPU_FEATURE
void emplace_back(Args &&... args)
V8_INLINE void RecordComment(const char *comment, const SourceLocation &loc=SourceLocation::Current())
void cmov(Condition cc, Register dst, Register src)
void and_(Register dst, Register src1, const Operand &src2, SBit s=LeaveCC, Condition cond=al)
void movss(XMMRegister dst, Operand src)
void sar(Register dst, uint8_t imm8)
void mov_w(Register dst, Operand src)
void j(Condition cc, Label *L, Label::Distance distance=Label::kFar)
void add(Register dst, Register src1, const Operand &src2, SBit s=LeaveCC, Condition cond=al)
void shl_cl(Register dst)
void cmp(Register src1, const Operand &src2, Condition cond=al)
void setcc(Condition cc, Register reg)
void test(Register reg, const Immediate &imm)
void sub(Register dst, Register src1, const Operand &src2, SBit s=LeaveCC, Condition cond=al)
void movzx_b(Register dst, Register src)
void xor_(Register dst, int32_t imm32)
void movdqu(XMMRegister dst, Operand src)
void movsd(XMMRegister dst, XMMRegister src)
void movaps(XMMRegister dst, XMMRegister src)
void mov(Register dst, const Operand &src, SBit s=LeaveCC, Condition cond=al)
void shr_cl(Register dst)
void wasm_call(Address address, RelocInfo::Mode rmode)
void lea(Register dst, Operand src)
void sar_cl(Register dst)
void sub_sp_32(uint32_t imm)
Assembler(const AssemblerOptions &, std::unique_ptr< AssemblerBuffer >={})
static constexpr int kFixedFrameSizeAboveFp
static bool IsSupported(CpuFeature f)
static V8_EXPORT_PRIVATE ExternalReference isolate_address()
static constexpr MachineType Uint8()
static constexpr MachineType Int32()
static constexpr MachineType Uint32()
static constexpr MachineType Uint16()
static constexpr MachineType Int16()
static constexpr MachineType Int64()
static constexpr MachineType Int8()
void ShlPair_cl(Register high, Register low)
void mov(Register rd, Register rj)
void CompareStackLimit(Register with, StackLimitKind kind)
void Move(Register dst, Tagged< Smi > smi)
void JumpIfSmi(Register value, Label *smi_label)
void AssertUnreachable(AbortReason reason) NOOP_UNLESS_DEBUG_CODE
void SarPair_cl(Register high, Register low)
void CheckPageFlag(Register object, int mask, Condition cc, Label *condition_met)
int CallCFunction(ExternalReference function, int num_arguments, SetIsolateDataSlots set_isolate_data_slots=SetIsolateDataSlots::kYes, Label *return_label=nullptr)
void AllocateStackSpace(Register bytes)
void ShrPair_cl(Register high, Register low)
void CallRecordWriteStubSaveRegisters(Register object, Operand offset, SaveFPRegsMode fp_mode, StubCallMode mode=StubCallMode::kCallBuiltinPointer)
void PrepareCallCFunction(int num_reg_arguments, int num_double_registers=0, Register scratch=no_reg)
static constexpr MainThreadFlags kPointersToHereAreInterestingMask
static constexpr MainThreadFlags kPointersFromHereAreInterestingMask
constexpr void set(RegisterT reg)
constexpr bool has(RegisterT reg) const
constexpr storage_t bits() const
constexpr bool is_valid() const
bool is_byte_register() const
void I32x4ExtMul(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister scratch, bool low, bool is_signed)
static constexpr Tagged< Smi > FromInt(int value)
static constexpr int32_t TypeToMarker(Type type)
static constexpr int kFrameTypeOffset
static constexpr Register GapRegister()
static constexpr int kInstanceDataOffset
static constexpr int kFeedbackVectorOffset
void SpillInstanceData(Register instance)
void LoadFullPointer(Register dst, Register src_addr, int32_t offset_imm)
void Load(LiftoffRegister dst, Register src_addr, Register offset_reg, uintptr_t offset_imm, LoadType type, uint32_t *protected_load_pc=nullptr, bool is_load_mem=false, bool i64_offset=false, bool needs_shift=false)
void CallBuiltin(Builtin builtin)
void CallFrameSetupStub(int declared_function_index)
void AssertUnreachable(AbortReason reason)
void LoadConstant(LiftoffRegister, WasmValue)
int GetTotalFrameSize() const
void PrepareTailCall(int num_callee_stack_params, int stack_param_delta)
void LoadFromInstance(Register dst, Register instance, int offset, int size)
static bool NeedsAlignment(ValueKind kind)
static int SlotSizeForType(ValueKind kind)
void LoadProtectedPointer(Register dst, Register src_addr, int32_t offset)
void LoadInstanceDataFromFrame(Register dst)
void ParallelRegisterMove(base::Vector< const ParallelRegisterMoveTuple >)
void Move(LiftoffRegister dst, LiftoffRegister src, ValueKind)
static constexpr int kStackSlotSize
void PatchPrepareStackFrame(int offset, SafepointTableBuilder *, bool feedback_vector_slot, size_t stack_param_slots)
CacheState * cache_state()
void SpillRegisters(Regs... regs)
void LoadTaggedPointer(Register dst, Register src_addr, Register offset_reg, int32_t offset_imm, uint32_t *protected_load_pc=nullptr, bool offset_reg_needs_shift=false)
void PushRegisters(LiftoffRegList)
void StoreTaggedPointer(Register dst_addr, Register offset_reg, int32_t offset_imm, Register src, LiftoffRegList pinned, uint32_t *protected_store_pc=nullptr, SkipWriteBarrier=kNoSkipWriteBarrier)
void PopRegisters(LiftoffRegList)
LiftoffRegister GetUnusedRegister(RegClass rc, std::initializer_list< LiftoffRegister > try_first, LiftoffRegList pinned)
void LoadTaggedPointerFromInstance(Register dst, Register instance, int offset)
Register LoadOldFramePointer()
void CheckTierUp(int declared_func_index, int budget_used, Label *ool_label, const FreezeCacheState &frozen)
void LoadTrustedPointer(Register dst, Register src_addr, int offset, IndirectPointerTag tag)
static constexpr int StaticStackFrameSize()
constexpr Register set(Register reg)
static constexpr LiftoffRegList FromBits()
constexpr DoubleRegister fp() const
constexpr Register gp() const
static constexpr int ToTagged(int offset)
static void Pack16Lanes(uint32_t *dst, const uint8_t *shuffle)
~CacheStatePreservingTempRegisters()
LiftoffAssembler *const assm_
CacheStatePreservingTempRegisters(LiftoffAssembler *assm, LiftoffRegList pinned={})
#define COMPRESS_POINTERS_BOOL
#define V8_ENABLE_SANDBOX_BOOL
ZoneVector< OpIndex > candidates
base::Vector< const DirectHandle< Object > > args
static constexpr unsigned kSignBit
ZoneVector< RpoNumber > & result
#define RETURN_FALSE_IF_MISSING_CPU_FEATURE(name)
MovableLabel continuation
LiftoffRegList regs_to_save
std::optional< OolTrapLabel > trap
constexpr bool IsPowerOfTwo(T value)
constexpr int WhichPowerOfTwo(T value)
void OpWithCarry(LiftoffAssembler *assm, LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)
void EmitAllTrue(LiftoffAssembler *assm, LiftoffRegister dst, LiftoffRegister src, VectorFormat format)
void Store(LiftoffAssembler *assm, LiftoffRegister src, MemOperand dst, ValueKind kind)
static constexpr LiftoffRegList kByteRegs
void EmitCommutativeBinOpImm(LiftoffAssembler *assm, Register dst, Register lhs, int32_t imm)
void EmitSatTruncateFloatToInt(LiftoffAssembler *assm, Register dst, DoubleRegister src)
void setcc_32_no_spill(LiftoffAssembler *assm, Condition cond, Register dst, Register tmp_byte_reg)
void EmitFloatMinOrMax(LiftoffAssembler *assm, RegisterType dst, RegisterType lhs, RegisterType rhs, MinOrMax min_or_max)
MemOperand GetHalfStackSlot(int offset, RegPairHalf half)
constexpr DoubleRegister kScratchDoubleReg
void EmitFloatSetCond(LiftoffAssembler *assm, Condition cond, Register dst, DoubleRegister lhs, DoubleRegister rhs)
bool PairContains(LiftoffRegister pair, Register reg)
void MoveStackValue(LiftoffAssembler *assm, const Operand &src, const Operand &dst)
Operand MemOperand(Register base, Register offset_reg, int offset_imm)
void EmitSimdNonCommutativeBinOp(LiftoffAssembler *assm, LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs, std::optional< CpuFeature > feature=std::nullopt)
void EmitSimdShiftOp(LiftoffAssembler *assm, LiftoffRegister dst, LiftoffRegister operand, LiftoffRegister count)
void EmitTruncateFloatToInt(LiftoffAssembler *assm, Register dst, DoubleRegister src, Label *trap)
void Emit64BitShiftOperation(LiftoffAssembler *assm, LiftoffRegister dst, LiftoffRegister src, Register amount, void(MacroAssembler::*emit_shift)(Register, Register))
MemOperand GetStackSlot(int offset)
Register GetTmpByteRegister(LiftoffAssembler *assm, Register candidate)
void setcc_32(LiftoffAssembler *assm, Condition cond, Register dst)
void SignExtendI32ToI64(Assembler *assm, LiftoffRegister reg)
void Load(LiftoffAssembler *assm, LiftoffRegister dst, MemOperand src, ValueKind kind)
void EmitSimdShiftOpImm(LiftoffAssembler *assm, LiftoffRegister dst, LiftoffRegister operand, int32_t count)
void push(LiftoffAssembler *assm, LiftoffRegister reg, ValueKind kind, int padding=0)
constexpr DoubleRegister kScratchDoubleReg2
void EmitShiftOperation(LiftoffAssembler *assm, Register dst, Register src, Register amount, void(Assembler::*emit_shift)(Register))
void EmitCommutativeBinOp(LiftoffAssembler *assm, Register dst, Register lhs, Register rhs)
MemOperand GetInstanceDataOperand()
void OpWithCarryI(LiftoffAssembler *assm, LiftoffRegister dst, LiftoffRegister lhs, int64_t imm)
LiftoffRegister ReplaceInPair(LiftoffRegister pair, Register old_reg, Register new_reg)
void AtomicBinop64(LiftoffAssembler *lasm, Binop op, Register dst_addr, Register offset_reg, uint32_t offset_imm, LiftoffRegister value, LiftoffRegister result)
void ConvertFloatToIntAndBack(LiftoffAssembler *assm, Register dst, DoubleRegister src, DoubleRegister converted_back, LiftoffRegList pinned)
Condition cond_make_unsigned(Condition cond)
void EmitInt32DivOrRem(LiftoffAssembler *assm, Register dst, Register lhs, Register rhs, Label *trap_div_by_zero, Label *trap_div_unrepresentable)
void EmitSimdCommutativeBinOp(LiftoffAssembler *assm, LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs, std::optional< CpuFeature > feature=std::nullopt)
void AtomicBinop32(LiftoffAssembler *lasm, Register dst_addr, Register offset_reg, uint32_t offset_imm, LiftoffRegister value, LiftoffRegister result, StoreType type, void(*op)(LiftoffAssembler *, Register, Register, Register))
void I32x4ExtMulHelper(LiftoffAssembler *assm, XMMRegister dst, XMMRegister src1, XMMRegister src2, bool low, bool is_signed)
void AtomicAddOrSubOrExchange32(LiftoffAssembler *lasm, Binop binop, Register dst_addr, Register offset_reg, uint32_t offset_imm, LiftoffRegister value, LiftoffRegister result, StoreType type)
constexpr DoubleRegister kFpReturnRegisters[]
constexpr Register kGpParamRegisters[]
constexpr DoubleRegister kFpParamRegisters[]
constexpr DoubleRegList kLiftoffAssemblerFpCacheRegs
constexpr int value_kind_full_size(ValueKind kind)
constexpr RegList kLiftoffAssemblerGpCacheRegs
constexpr Register kGpReturnRegisters[]
int declared_function_index(const WasmModule *module, int func_index)
constexpr int value_kind_size(ValueKind kind)
constexpr bool is_reference(ValueKind kind)
constexpr IndependentValueType kWasmI64
constexpr Register no_reg
constexpr Register kRootRegister
constexpr VFPRoundingMode kRoundToNearest
constexpr int kTaggedSize
constexpr int kSimd128Size
@ kUnsignedGreaterThanEqual
DwVfpRegister DoubleRegister
constexpr DoubleRegister kScratchDoubleReg
RegListBase< Register > RegList
kWasmInternalFunctionIndirectPointerTag instance_data
constexpr DoubleRegister kScratchDoubleReg2
kWasmInternalFunctionIndirectPointerTag kProtectedInstanceDataOffset sig
constexpr int kSystemPointerSize
constexpr Register kReturnRegister0
V8_EXPORT_PRIVATE FlagValues v8_flags
const intptr_t kSmiTagMask
constexpr VFPRoundingMode kRoundToZero
std::unique_ptr< AssemblerBuffer > ExternalAssemblerBuffer(void *start, int size)
constexpr int kDoubleSize
bool is_signed(Condition cond)
i::Address Load(i::Address address)
#define DCHECK_LE(v1, v2)
#define DCHECK_NE(v1, v2)
#define DCHECK_GE(v1, v2)
#define DCHECK(condition)
#define DCHECK_LT(v1, v2)
#define DCHECK_EQ(v1, v2)
Register cached_instance_data
LiftoffRegister unused_register(RegClass rc, LiftoffRegList pinned={}) const
bool is_used(LiftoffRegister reg) const
bool has_unused_register(RegClass rc, LiftoffRegList pinned={}) const
LiftoffRegList used_registers
#define V8_LIKELY(condition)