v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
liftoff-assembler-ia32-inl.h
Go to the documentation of this file.
1// Copyright 2017 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_WASM_BASELINE_IA32_LIFTOFF_ASSEMBLER_IA32_INL_H_
6#define V8_WASM_BASELINE_IA32_LIFTOFF_ASSEMBLER_IA32_INL_H_
7
8#include <optional>
9
17#include "src/wasm/value-type.h"
20
21namespace v8::internal::wasm {
22
23#define RETURN_FALSE_IF_MISSING_CPU_FEATURE(name) \
24 if (!CpuFeatures::IsSupported(name)) return false; \
25 CpuFeatureScope feature(this, name);
26
27namespace liftoff {
28
29inline Operand GetStackSlot(int offset) { return Operand(ebp, -offset); }
30
32 int32_t half_offset =
34 return Operand(offset > 0 ? ebp : esp, -offset + half_offset);
35}
36
37// TODO(clemensb): Make this a constexpr variable once Operand is constexpr.
38inline Operand GetInstanceDataOperand() {
40}
41
42inline Operand MemOperand(Register base, Register offset_reg, int offset_imm) {
43 return offset_reg == no_reg ? Operand(base, offset_imm)
44 : Operand(base, offset_reg, times_1, offset_imm);
45}
46
47static constexpr LiftoffRegList kByteRegs =
48 LiftoffRegList::FromBits<RegList{eax, ecx, edx}.bits()>();
49
50inline void Load(LiftoffAssembler* assm, LiftoffRegister dst, Register base,
51 int32_t offset, ValueKind kind) {
52 Operand src(base, offset);
53 switch (kind) {
54 case kI16:
55 assm->mov_w(dst.gp(), src);
56 break;
57 case kI32:
58 case kRefNull:
59 case kRef:
60 assm->mov(dst.gp(), src);
61 break;
62 case kI64:
63 assm->mov(dst.low_gp(), src);
64 assm->mov(dst.high_gp(), Operand(base, offset + 4));
65 break;
66 case kF32:
67 assm->movss(dst.fp(), src);
68 break;
69 case kF64:
70 assm->movsd(dst.fp(), src);
71 break;
72 case kS128:
73 assm->movdqu(dst.fp(), src);
74 break;
75 case kVoid:
76 case kTop:
77 case kBottom:
78 case kI8:
79 case kF16:
81 }
82}
83
84inline void Store(LiftoffAssembler* assm, Register base, int32_t offset,
86 Operand dst(base, offset);
87 switch (kind) {
88 case kI16:
89 assm->mov_w(dst, src.gp());
90 break;
91 case kI32:
92 case kRefNull:
93 case kRef:
94 assm->mov(dst, src.gp());
95 break;
96 case kI64:
97 assm->mov(dst, src.low_gp());
98 assm->mov(Operand(base, offset + 4), src.high_gp());
99 break;
100 case kF32:
101 assm->movss(dst, src.fp());
102 break;
103 case kF64:
104 assm->movsd(dst, src.fp());
105 break;
106 case kS128:
107 assm->movdqu(dst, src.fp());
108 break;
109 case kVoid:
110 case kTop:
111 case kBottom:
112 case kI8:
113 case kF16:
114 UNREACHABLE();
115 }
116}
117
119 int padding = 0) {
120 switch (kind) {
121 case kI32:
122 case kRef:
123 case kRefNull:
124 assm->AllocateStackSpace(padding);
125 assm->push(reg.gp());
126 break;
127 case kI64:
128 assm->AllocateStackSpace(padding);
129 assm->push(reg.high_gp());
130 assm->push(reg.low_gp());
131 break;
132 case kF32:
133 assm->AllocateStackSpace(sizeof(float) + padding);
134 assm->movss(Operand(esp, 0), reg.fp());
135 break;
136 case kF64:
137 assm->AllocateStackSpace(sizeof(double) + padding);
138 assm->movsd(Operand(esp, 0), reg.fp());
139 break;
140 case kS128:
141 assm->AllocateStackSpace(sizeof(double) * 2 + padding);
142 assm->movdqu(Operand(esp, 0), reg.fp());
143 break;
144 case kVoid:
145 case kTop:
146 case kBottom:
147 case kI8:
148 case kI16:
149 case kF16:
150 UNREACHABLE();
151 }
152}
153
155 assm->mov(reg.high_gp(), reg.low_gp());
156 assm->sar(reg.high_gp(), 31);
157}
158
159// Get a temporary byte register, using {candidate} if possible.
160// Might spill, but always keeps status flags intact.
162 if (candidate.is_byte_register()) return candidate;
163 // {GetUnusedRegister()} may insert move instructions to spill registers to
164 // the stack. This is OK because {mov} does not change the status flags.
166}
167
168inline void MoveStackValue(LiftoffAssembler* assm, const Operand& src,
169 const Operand& dst) {
170 if (assm->cache_state()->has_unused_register(kGpReg)) {
171 Register tmp = assm->cache_state()->unused_register(kGpReg).gp();
172 assm->mov(tmp, src);
173 assm->mov(dst, tmp);
174 } else {
175 // No free register, move via the stack.
176 assm->push(src);
177 assm->pop(dst);
178 }
179}
180
182 public:
184 LiftoffRegList pinned = {})
185 : assm_(assm), pinned_(pinned) {}
186
192
195 return pinned_.set(
197 }
198
199 RegList available =
201 DCHECK(!available.is_empty());
202 // Use {last()} here so we can just iterate forwards in the destructor.
203 Register reg = available.last();
204 assm_->push(reg);
206 return reg;
207 }
208
209 private:
210 LiftoffAssembler* const assm_;
213};
214
219
220constexpr int kSubSpSize = 6; // 6 bytes for "sub esp, <imm32>"
221
222} // namespace liftoff
223
225 int offset = pc_offset();
226 // Next we reserve the memory for the whole stack frame. We do not know yet
227 // how big the stack frame will be so we just emit a placeholder instruction.
228 // PatchPrepareStackFrame will patch this in order to increase the stack
229 // appropriately.
230 sub_sp_32(0);
232 return offset;
233}
234
236// The standard library used by gcc tryjobs does not consider `std::find` to be
237// `constexpr`, so wrap it in a `#ifdef __clang__` block.
238#ifdef __clang__
239 static_assert(std::find(std::begin(wasm::kGpParamRegisters),
240 std::end(wasm::kGpParamRegisters),
241 kLiftoffFrameSetupFunctionReg) ==
242 std::end(wasm::kGpParamRegisters));
243#endif
244
245 LoadConstant(LiftoffRegister(kLiftoffFrameSetupFunctionReg),
246 WasmValue(declared_function_index));
247 CallBuiltin(Builtin::kWasmLiftoffFrameSetup);
248}
249
250void LiftoffAssembler::PrepareTailCall(int num_callee_stack_params,
251 int stack_param_delta) {
252 // Push the return address and frame pointer to complete the stack frame.
253 push(Operand(ebp, 4));
254 push(Operand(ebp, 0));
255
256 // Shift the whole frame upwards.
257 Register scratch = eax;
258 push(scratch);
259 const int slot_count = num_callee_stack_params + 2;
260 for (int i = slot_count; i > 0; --i) {
261 mov(scratch, Operand(esp, i * 4));
262 mov(Operand(ebp, (i - stack_param_delta - 1) * 4), scratch);
263 }
264 pop(scratch);
265
266 // Set the new stack and frame pointers.
267 lea(esp, Operand(ebp, -stack_param_delta * 4));
268 pop(ebp);
269}
270
272
274 int offset, SafepointTableBuilder* safepoint_table_builder,
275 bool feedback_vector_slot, size_t stack_param_slots) {
276 // The frame_size includes the frame marker and the instance slot. Both are
277 // pushed as part of frame construction, so we don't need to allocate memory
278 // for them anymore.
279 int frame_size = GetTotalFrameSize() - 2 * kSystemPointerSize;
280 // The frame setup builtin also pushes the feedback vector.
281 if (feedback_vector_slot) {
282 frame_size -= kSystemPointerSize;
283 }
284 DCHECK_EQ(0, frame_size % kSystemPointerSize);
285
286 // We can't run out of space when patching, just pass anything big enough to
287 // not cause the assembler to try to grow the buffer.
288 constexpr int kAvailableSpace = 64;
289 Assembler patching_assembler(
290 AssemblerOptions{},
291 ExternalAssemblerBuffer(buffer_start_ + offset, kAvailableSpace));
292
293 if (V8_LIKELY(frame_size < 4 * KB)) {
294 // This is the standard case for small frames: just subtract from SP and be
295 // done with it.
296 patching_assembler.sub_sp_32(frame_size);
297 DCHECK_EQ(liftoff::kSubSpSize, patching_assembler.pc_offset());
298 return;
299 }
300
301 // The frame size is bigger than 4KB, so we might overflow the available stack
302 // space if we first allocate the frame and then do the stack check (we will
303 // need some remaining stack space for throwing the exception). That's why we
304 // check the available stack space before we allocate the frame. To do this we
305 // replace the {__ sub(sp, framesize)} with a jump to OOL code that does this
306 // "extended stack check".
307 //
308 // The OOL code can simply be generated here with the normal assembler,
309 // because all other code generation, including OOL code, has already finished
310 // when {PatchPrepareStackFrame} is called. The function prologue then jumps
311 // to the current {pc_offset()} to execute the OOL code for allocating the
312 // large frame.
313
314 // Emit the unconditional branch in the function prologue (from {offset} to
315 // {pc_offset()}).
316 patching_assembler.jmp_rel(pc_offset() - offset);
317 DCHECK_GE(liftoff::kSubSpSize, patching_assembler.pc_offset());
318 patching_assembler.Nop(liftoff::kSubSpSize - patching_assembler.pc_offset());
319
320 // If the frame is bigger than the stack, we throw the stack overflow
321 // exception unconditionally. Thereby we can avoid the integer overflow
322 // check in the condition code.
323 RecordComment("OOL: stack check for large frame");
324 Label continuation;
325 if (frame_size < v8_flags.stack_size * 1024) {
326 // We do not have a scratch register, so pick any and push it first.
327 Register stack_limit = eax;
328 push(stack_limit);
329 mov(stack_limit, esp);
330 sub(stack_limit, Immediate(frame_size));
332 pop(stack_limit);
334 }
335
336 if (v8_flags.experimental_wasm_growable_stacks) {
337 LiftoffRegList regs_to_save;
339 regs_to_save.set(WasmHandleStackOverflowDescriptor::FrameBaseRegister());
340 for (auto reg : kGpParamRegisters) regs_to_save.set(reg);
341 for (auto reg : kFpParamRegisters) regs_to_save.set(reg);
344 Immediate(frame_size));
345 mov(WasmHandleStackOverflowDescriptor::FrameBaseRegister(), ebp);
346 add(WasmHandleStackOverflowDescriptor::FrameBaseRegister(),
347 Immediate(static_cast<int32_t>(
348 stack_param_slots * kStackSlotSize +
350 CallBuiltin(Builtin::kWasmHandleStackOverflow);
351 safepoint_table_builder->DefineSafepoint(this);
353 } else {
354 wasm_call(static_cast<intptr_t>(Builtin::kWasmStackOverflow),
356 // The call will not return; just define an empty safepoint.
357 safepoint_table_builder->DefineSafepoint(this);
358 AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
359 }
360
362
363 // Now allocate the stack space. Note that this might do more than just
364 // decrementing the SP; consult {MacroAssembler::AllocateStackSpace}.
365 AllocateStackSpace(frame_size);
366
367 // Jump back to the start of the function, from {pc_offset()} to
368 // right after the reserved space for the {__ sub(sp, sp, framesize)} (which
369 // is a branch now).
370 int func_start_offset = offset + liftoff::kSubSpSize;
371 jmp_rel(func_start_offset - pc_offset());
372}
373
375
377
378// static
381}
382
385}
386
388 return is_reference(kind);
389}
390
391void LiftoffAssembler::CheckTierUp(int declared_func_index, int budget_used,
392 Label* ool_label,
393 const FreezeCacheState& frozen) {
394 {
395 liftoff::CacheStatePreservingTempRegisters temps{this};
396 Register budget_array = temps.Acquire();
397
399 if (instance_data == no_reg) {
400 instance_data = budget_array; // Reuse the temp register.
402 }
403
404 constexpr int kArrayOffset = wasm::ObjectAccess::ToTagged(
405 WasmTrustedInstanceData::kTieringBudgetArrayOffset);
406 mov(budget_array, Operand{instance_data, kArrayOffset});
407
408 int array_offset = kInt32Size * declared_func_index;
409 sub(Operand{budget_array, array_offset}, Immediate(budget_used));
410 }
411 j(negative, ool_label);
412}
413
415 if (!v8_flags.experimental_wasm_growable_stacks) {
416 return ebp;
417 }
418 Label done, call_runtime;
420 Immediate(StackFrame::TypeToMarker(StackFrame::WASM_SEGMENT_START)));
421 j(equal, &call_runtime);
422 LiftoffRegister old_fp = GetUnusedRegister(RegClass::kGpReg, {});
423 mov(old_fp.gp(), ebp);
424 jmp(&done);
425
426 bind(&call_runtime);
427 LiftoffRegList regs_to_save = cache_state()->used_registers;
429 PrepareCallCFunction(1, eax);
432 CallCFunction(ExternalReference::wasm_load_old_fp(), 1);
433 if (old_fp.gp() != kReturnRegister0) {
434 mov(old_fp.gp(), kReturnRegister0);
435 }
437
438 bind(&done);
439 return old_fp.gp();
440}
441
443 LiftoffRegList regs_to_save;
444 for (auto reg : kGpReturnRegisters) regs_to_save.set(reg);
445 for (auto reg : kFpReturnRegisters) regs_to_save.set(reg);
447 Immediate(StackFrame::TypeToMarker(StackFrame::WASM_SEGMENT_START)));
448 Label done;
449 j(not_equal, &done);
454 CallCFunction(ExternalReference::wasm_shrink_stack(), 1);
455 // Restore old ebp. We don't need to restore old esp explicitly, because
456 // it will be restored from ebp in LeaveFrame before return.
457 mov(ebp, kReturnRegister0);
459 bind(&done);
460}
461
462void LiftoffAssembler::LoadConstant(LiftoffRegister reg, WasmValue value) {
463 switch (value.type().kind()) {
464 case kI32:
465 MacroAssembler::Move(reg.gp(), Immediate(value.to_i32()));
466 break;
467 case kI64: {
468 int32_t low_word = value.to_i64();
469 int32_t high_word = value.to_i64() >> 32;
470 MacroAssembler::Move(reg.low_gp(), Immediate(low_word));
471 MacroAssembler::Move(reg.high_gp(), Immediate(high_word));
472 break;
473 }
474 case kF32:
475 MacroAssembler::Move(reg.fp(), value.to_f32_boxed().get_bits());
476 break;
477 case kF64:
478 MacroAssembler::Move(reg.fp(), value.to_f64_boxed().get_bits());
479 break;
480 default:
481 UNREACHABLE();
482 }
483}
484
487}
488
489void LiftoffAssembler::LoadTrustedPointer(Register dst, Register src_addr,
490 int offset, IndirectPointerTag tag) {
491 static_assert(!V8_ENABLE_SANDBOX_BOOL);
492 static_assert(!COMPRESS_POINTERS_BOOL);
493 mov(dst, Operand{src_addr, offset});
494}
495
496void LiftoffAssembler::LoadFromInstance(Register dst, Register instance,
497 int offset, int size) {
498 DCHECK_LE(0, offset);
499 Operand src{instance, offset};
500 switch (size) {
501 case 1:
502 movzx_b(dst, src);
503 break;
504 case 4:
505 mov(dst, src);
506 break;
507 default:
509 }
510}
511
513 Register instance,
514 int offset) {
515 static_assert(kTaggedSize == kSystemPointerSize);
516 mov(dst, Operand{instance, offset});
517}
518
519void LiftoffAssembler::SpillInstanceData(Register instance) {
521}
522
524
525void LiftoffAssembler::LoadTaggedPointer(Register dst, Register src_addr,
526 Register offset_reg,
527 int32_t offset_imm,
528 uint32_t* protected_load_pc,
529 bool needs_shift) {
530 DCHECK_GE(offset_imm, 0);
531 static_assert(kTaggedSize == kInt32Size);
532 Load(LiftoffRegister(dst), src_addr, offset_reg,
533 static_cast<uint32_t>(offset_imm), LoadType::kI32Load, protected_load_pc,
534 false, false, needs_shift);
535}
536
537void LiftoffAssembler::LoadProtectedPointer(Register dst, Register src_addr,
538 int32_t offset) {
539 static_assert(!V8_ENABLE_SANDBOX_BOOL);
540 LoadTaggedPointer(dst, src_addr, no_reg, offset);
541}
542
543void LiftoffAssembler::LoadFullPointer(Register dst, Register src_addr,
544 int32_t offset_imm) {
545 mov(dst, Operand(src_addr, offset_imm));
546}
547
548void LiftoffAssembler::StoreTaggedPointer(Register dst_addr,
549 Register offset_reg,
550 int32_t offset_imm, Register src,
551 LiftoffRegList pinned,
552 uint32_t* protected_store_pc,
553 SkipWriteBarrier skip_write_barrier) {
554 DCHECK_GE(offset_imm, 0);
555 DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
556 static_assert(kTaggedSize == kInt32Size);
557 Operand dst_op = liftoff::MemOperand(dst_addr, offset_reg, offset_imm);
558
559 if (protected_store_pc) *protected_store_pc = pc_offset();
560
561 mov(dst_op, src);
562
563 if (skip_write_barrier || v8_flags.disable_write_barriers) return;
564
565 liftoff::CacheStatePreservingTempRegisters temps{this, pinned};
566 Register scratch = temps.Acquire();
567
568 Label exit;
569 CheckPageFlag(dst_addr, scratch,
572 JumpIfSmi(src, &exit, Label::kNear);
574 zero, &exit, Label::kNear);
575 lea(scratch, dst_op);
577 StubCallMode::kCallWasmRuntimeStub);
578 bind(&exit);
579}
580
581void LiftoffAssembler::Load(LiftoffRegister dst, Register src_addr,
582 Register offset_reg, uint32_t offset_imm,
583 LoadType type, uint32_t* protected_load_pc,
584 bool /* is_load_mem */, bool /* i64_offset */,
585 bool needs_shift) {
586 // Offsets >=2GB are statically OOB on 32-bit systems.
587 DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
588 DCHECK_EQ(type.value_type() == kWasmI64, dst.is_gp_pair());
589 ScaleFactor scale_factor =
590 !needs_shift ? times_1 : static_cast<ScaleFactor>(type.size_log_2());
591 Operand src_op = offset_reg == no_reg ? Operand(src_addr, offset_imm)
592 : Operand(src_addr, offset_reg,
593 scale_factor, offset_imm);
594 if (protected_load_pc) *protected_load_pc = pc_offset();
595
596 switch (type.value()) {
597 case LoadType::kI32Load8U:
598 movzx_b(dst.gp(), src_op);
599 break;
600 case LoadType::kI32Load8S:
601 movsx_b(dst.gp(), src_op);
602 break;
603 case LoadType::kI64Load8U:
604 movzx_b(dst.low_gp(), src_op);
605 xor_(dst.high_gp(), dst.high_gp());
606 break;
607 case LoadType::kI64Load8S:
608 movsx_b(dst.low_gp(), src_op);
609 liftoff::SignExtendI32ToI64(this, dst);
610 break;
611 case LoadType::kI32Load16U:
612 movzx_w(dst.gp(), src_op);
613 break;
614 case LoadType::kI32Load16S:
615 movsx_w(dst.gp(), src_op);
616 break;
617 case LoadType::kI64Load16U:
618 movzx_w(dst.low_gp(), src_op);
619 xor_(dst.high_gp(), dst.high_gp());
620 break;
621 case LoadType::kI64Load16S:
622 movsx_w(dst.low_gp(), src_op);
623 liftoff::SignExtendI32ToI64(this, dst);
624 break;
625 case LoadType::kI32Load:
626 mov(dst.gp(), src_op);
627 break;
628 case LoadType::kI64Load32U:
629 mov(dst.low_gp(), src_op);
630 xor_(dst.high_gp(), dst.high_gp());
631 break;
632 case LoadType::kI64Load32S:
633 mov(dst.low_gp(), src_op);
634 liftoff::SignExtendI32ToI64(this, dst);
635 break;
636 case LoadType::kI64Load: {
637 // Compute the operand for the load of the upper half.
638 Operand upper_src_op =
639 liftoff::MemOperand(src_addr, offset_reg, offset_imm + 4);
640 // The high word has to be mov'ed first, such that this is the protected
641 // instruction. The mov of the low word cannot segfault.
642 mov(dst.high_gp(), upper_src_op);
643 mov(dst.low_gp(), src_op);
644 break;
645 }
646 case LoadType::kF32Load:
647 movss(dst.fp(), src_op);
648 break;
649 case LoadType::kF64Load:
650 movsd(dst.fp(), src_op);
651 break;
652 case LoadType::kS128Load:
653 movdqu(dst.fp(), src_op);
654 break;
655 case LoadType::kF32LoadF16:
657 break;
658 }
659}
660
661void LiftoffAssembler::Store(Register dst_addr, Register offset_reg,
662 uint32_t offset_imm, LiftoffRegister src,
663 StoreType type, LiftoffRegList pinned,
664 uint32_t* protected_store_pc,
665 bool /* is_store_mem */, bool /* i64_offset */) {
666 DCHECK_EQ(type.value_type() == kWasmI64, src.is_gp_pair());
667 // Offsets >=2GB are statically OOB on 32-bit systems.
668 DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
669 Operand dst_op = liftoff::MemOperand(dst_addr, offset_reg, offset_imm);
670 if (protected_store_pc) *protected_store_pc = pc_offset();
671
672 switch (type.value()) {
673 case StoreType::kI64Store8:
674 src = src.low();
675 [[fallthrough]];
676 case StoreType::kI32Store8:
677 // Only the lower 4 registers can be addressed as 8-bit registers.
678 if (src.gp().is_byte_register()) {
679 mov_b(dst_op, src.gp());
680 } else {
681 // We know that {src} is not a byte register, so the only pinned byte
682 // registers (beside the outer {pinned}) are {dst_addr} and potentially
683 // {offset_reg}.
684 LiftoffRegList pinned_byte = pinned | LiftoffRegList{dst_addr};
685 if (offset_reg != no_reg) pinned_byte.set(offset_reg);
686 LiftoffRegList candidates = liftoff::kByteRegs.MaskOut(pinned_byte);
687 if (cache_state_.has_unused_register(candidates)) {
688 Register byte_src = cache_state_.unused_register(candidates).gp();
689 mov(byte_src, src.gp());
690 mov_b(dst_op, byte_src);
691 } else {
692 // We have no available byte register. We will temporarily push the
693 // root register to use it as a scratch register.
694 static_assert(kRootRegister == ebx);
695 Register byte_src = kRootRegister;
696 Push(byte_src);
697 mov(byte_src, src.gp());
698 mov_b(dst_op, byte_src);
699 Pop(byte_src);
700 }
701 }
702 break;
703 case StoreType::kI64Store16:
704 src = src.low();
705 [[fallthrough]];
706 case StoreType::kI32Store16:
707 mov_w(dst_op, src.gp());
708 break;
709 case StoreType::kI64Store32:
710 src = src.low();
711 [[fallthrough]];
712 case StoreType::kI32Store:
713 mov(dst_op, src.gp());
714 break;
715 case StoreType::kI64Store: {
716 // Compute the operand for the store of the upper half.
717 Operand upper_dst_op =
718 liftoff::MemOperand(dst_addr, offset_reg, offset_imm + 4);
719 // The high word has to be mov'ed first, such that this is the protected
720 // instruction. The mov of the low word cannot segfault.
721 mov(upper_dst_op, src.high_gp());
722 mov(dst_op, src.low_gp());
723 break;
724 }
725 case StoreType::kF32Store:
726 movss(dst_op, src.fp());
727 break;
728 case StoreType::kF64Store:
729 movsd(dst_op, src.fp());
730 break;
731 case StoreType::kS128Store:
732 Movdqu(dst_op, src.fp());
733 break;
734 case StoreType::kF32StoreF16:
736 break;
737 }
738}
739
740void LiftoffAssembler::AtomicLoad(LiftoffRegister dst, Register src_addr,
741 Register offset_reg, uint32_t offset_imm,
742 LoadType type, LiftoffRegList /* pinned */,
743 bool /* i64_offset */) {
744 if (type.value() != LoadType::kI64Load) {
745 Load(dst, src_addr, offset_reg, offset_imm, type, nullptr, true);
746 return;
747 }
748
749 DCHECK_EQ(type.value_type() == kWasmI64, dst.is_gp_pair());
750 DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
751 Operand src_op = liftoff::MemOperand(src_addr, offset_reg, offset_imm);
752
753 movsd(liftoff::kScratchDoubleReg, src_op);
754 Pextrd(dst.low().gp(), liftoff::kScratchDoubleReg, 0);
755 Pextrd(dst.high().gp(), liftoff::kScratchDoubleReg, 1);
756}
757
758void LiftoffAssembler::AtomicStore(Register dst_addr, Register offset_reg,
759 uint32_t offset_imm, LiftoffRegister src,
760 StoreType type, LiftoffRegList pinned,
761 bool /* i64_offset */) {
762 DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
763 Operand dst_op = liftoff::MemOperand(dst_addr, offset_reg, offset_imm);
764
765 // i64 store uses a totally different approach, hence implement it separately.
766 if (type.value() == StoreType::kI64Store) {
767 auto scratch2 = GetUnusedRegister(kFpReg, pinned).fp();
768 movd(liftoff::kScratchDoubleReg, src.low().gp());
769 movd(scratch2, src.high().gp());
770 Punpckldq(liftoff::kScratchDoubleReg, scratch2);
771 movsd(dst_op, liftoff::kScratchDoubleReg);
772 // This lock+or is needed to achieve sequential consistency.
773 lock();
774 or_(Operand(esp, 0), Immediate(0));
775 return;
776 }
777
778 // Other i64 stores actually only use the low word.
779 if (src.is_pair()) src = src.low();
780 Register src_gp = src.gp();
781
782 bool is_byte_store = type.size() == 1;
783 LiftoffRegList src_candidates =
784 is_byte_store ? liftoff::kByteRegs : kGpCacheRegList;
785 pinned = pinned | LiftoffRegList{dst_addr, src};
786 if (offset_reg != no_reg) pinned.set(offset_reg);
787
788 // Ensure that {src} is a valid and otherwise unused register.
789 if (!src_candidates.has(src) || cache_state_.is_used(src)) {
790 // If there are no unused candidate registers, but {src} is a candidate,
791 // then spill other uses of {src}. Otherwise spill any candidate register
792 // and use that.
793 LiftoffRegList unpinned_candidates = src_candidates.MaskOut(pinned);
794 if (!cache_state_.has_unused_register(unpinned_candidates) &&
795 src_candidates.has(src)) {
796 SpillRegister(src);
797 } else {
798 Register safe_src = GetUnusedRegister(unpinned_candidates).gp();
799 mov(safe_src, src_gp);
800 src_gp = safe_src;
801 }
802 }
803
804 switch (type.value()) {
805 case StoreType::kI64Store8:
806 case StoreType::kI32Store8:
807 xchg_b(src_gp, dst_op);
808 return;
809 case StoreType::kI64Store16:
810 case StoreType::kI32Store16:
811 xchg_w(src_gp, dst_op);
812 return;
813 case StoreType::kI64Store32:
814 case StoreType::kI32Store:
815 xchg(src_gp, dst_op);
816 return;
817 default:
818 UNREACHABLE();
819 }
820}
821
822namespace liftoff {
823#define __ lasm->
824
826
828 Register dst_addr, Register offset_reg,
829 uint32_t offset_imm,
830 LiftoffRegister value,
832 DCHECK_EQ(value, result);
833 DCHECK(!__ cache_state()->is_used(result));
834 bool is_64_bit_op = type.value_type() == kWasmI64;
835
836 Register value_reg = is_64_bit_op ? value.low_gp() : value.gp();
837 Register result_reg = is_64_bit_op ? result.low_gp() : result.gp();
838
839 bool is_byte_store = type.size() == 1;
840 LiftoffRegList pinned{dst_addr, value_reg};
841 if (offset_reg != no_reg) pinned.set(offset_reg);
842
843 // Ensure that {value_reg} is a valid register.
844 if (is_byte_store && !liftoff::kByteRegs.has(value_reg)) {
845 Register safe_value_reg =
846 __ GetUnusedRegister(liftoff::kByteRegs.MaskOut(pinned)).gp();
847 __ mov(safe_value_reg, value_reg);
848 value_reg = safe_value_reg;
849 }
850
851 Operand dst_op = liftoff::MemOperand(dst_addr, offset_reg, offset_imm);
852 if (binop == kSub) {
853 __ neg(value_reg);
854 }
855 if (binop != kExchange) {
856 __ lock();
857 }
858 switch (type.value()) {
859 case StoreType::kI64Store8:
860 case StoreType::kI32Store8:
861 if (binop == kExchange) {
862 __ xchg_b(value_reg, dst_op);
863 } else {
864 __ xadd_b(dst_op, value_reg);
865 }
866 __ movzx_b(result_reg, value_reg);
867 break;
868 case StoreType::kI64Store16:
869 case StoreType::kI32Store16:
870 if (binop == kExchange) {
871 __ xchg_w(value_reg, dst_op);
872 } else {
873 __ xadd_w(dst_op, value_reg);
874 }
875 __ movzx_w(result_reg, value_reg);
876 break;
877 case StoreType::kI64Store32:
878 case StoreType::kI32Store:
879 if (binop == kExchange) {
880 __ xchg(value_reg, dst_op);
881 } else {
882 __ xadd(dst_op, value_reg);
883 }
884 if (value_reg != result_reg) {
885 __ mov(result_reg, value_reg);
886 }
887 break;
888 default:
889 UNREACHABLE();
890 }
891 if (is_64_bit_op) {
892 __ xor_(result.high_gp(), result.high_gp());
893 }
894}
895
896inline void AtomicBinop32(LiftoffAssembler* lasm, Binop op, Register dst_addr,
897 Register offset_reg, uint32_t offset_imm,
899 StoreType type) {
900 DCHECK_EQ(value, result);
901 DCHECK(!__ cache_state()->is_used(result));
902 bool is_64_bit_op = type.value_type() == kWasmI64;
903
904 Register value_reg = is_64_bit_op ? value.low_gp() : value.gp();
905 Register result_reg = is_64_bit_op ? result.low_gp() : result.gp();
906
907 // The cmpxchg instruction uses eax to store the old value of the
908 // compare-exchange primitive. Therefore we have to spill the register and
909 // move any use to another register.
910 {
911 LiftoffRegList pinned{dst_addr, value_reg};
912 if (offset_reg != no_reg) pinned.set(offset_reg);
913 __ ClearRegister(eax, {&dst_addr, &offset_reg, &value_reg}, pinned);
914 }
915
916 bool is_byte_store = type.size() == 1;
917 Register scratch = no_reg;
918 if (is_byte_store) {
919 // The scratch register has to be a byte register. As we are already tight
920 // on registers, we just use the root register here.
922 "root register is not Liftoff cache register");
925 scratch = kRootRegister;
926 } else {
927 LiftoffRegList pinned{dst_addr, value_reg, eax};
928 if (offset_reg != no_reg) pinned.set(offset_reg);
929 scratch = __ GetUnusedRegister(kGpReg, pinned).gp();
930 }
931
932 Operand dst_op = liftoff::MemOperand(dst_addr, offset_reg, offset_imm);
933
934 switch (type.value()) {
935 case StoreType::kI32Store8:
936 case StoreType::kI64Store8: {
937 __ xor_(eax, eax);
938 __ mov_b(eax, dst_op);
939 break;
940 }
941 case StoreType::kI32Store16:
942 case StoreType::kI64Store16: {
943 __ xor_(eax, eax);
944 __ mov_w(eax, dst_op);
945 break;
946 }
947 case StoreType::kI32Store:
948 case StoreType::kI64Store32: {
949 __ mov(eax, dst_op);
950 break;
951 }
952 default:
953 UNREACHABLE();
954 }
955
956 Label binop;
957 __ bind(&binop);
958 __ mov(scratch, eax);
959
960 switch (op) {
961 case kAnd: {
962 __ and_(scratch, value_reg);
963 break;
964 }
965 case kOr: {
966 __ or_(scratch, value_reg);
967 break;
968 }
969 case kXor: {
970 __ xor_(scratch, value_reg);
971 break;
972 }
973 default:
974 UNREACHABLE();
975 }
976
977 __ lock();
978
979 switch (type.value()) {
980 case StoreType::kI32Store8:
981 case StoreType::kI64Store8: {
982 __ cmpxchg_b(dst_op, scratch);
983 break;
984 }
985 case StoreType::kI32Store16:
986 case StoreType::kI64Store16: {
987 __ cmpxchg_w(dst_op, scratch);
988 break;
989 }
990 case StoreType::kI32Store:
991 case StoreType::kI64Store32: {
992 __ cmpxchg(dst_op, scratch);
993 break;
994 }
995 default:
996 UNREACHABLE();
997 }
998 __ j(not_equal, &binop);
999
1000 if (is_byte_store) {
1001 __ pop(kRootRegister);
1002 }
1003 if (result_reg != eax) {
1004 __ mov(result_reg, eax);
1005 }
1006 if (is_64_bit_op) {
1007 __ xor_(result.high_gp(), result.high_gp());
1008 }
1009}
1010
1011inline void AtomicBinop64(LiftoffAssembler* lasm, Binop op, Register dst_addr,
1012 Register offset_reg, uint32_t offset_imm,
1014 // We need {ebx} here, which is the root register. As the root register it
1015 // needs special treatment. As we use {ebx} directly in the code below, we
1016 // have to make sure here that the root register is actually {ebx}.
1017 static_assert(kRootRegister == ebx,
1018 "The following code assumes that kRootRegister == ebx");
1019 __ push(ebx);
1020
1021 // Store the value on the stack, so that we can use it for retries.
1022 __ AllocateStackSpace(8);
1023 Operand value_op_hi = Operand(esp, 0);
1024 Operand value_op_lo = Operand(esp, 4);
1025 __ mov(value_op_lo, value.low_gp());
1026 __ mov(value_op_hi, value.high_gp());
1027
1028 // We want to use the compare-exchange instruction here. It uses registers
1029 // as follows: old-value = EDX:EAX; new-value = ECX:EBX.
1030 Register old_hi = edx;
1031 Register old_lo = eax;
1032 Register new_hi = ecx;
1033 Register new_lo = ebx;
1034 // Base and offset need separate registers that do not alias with the
1035 // ones above.
1036 Register base = esi;
1037 Register offset = edi;
1038
1039 // Spill all these registers if they are still holding other values.
1040 __ SpillRegisters(old_hi, old_lo, new_hi, base, offset);
1041 if (offset_reg == no_reg) {
1042 if (dst_addr != base) __ mov(base, dst_addr);
1043 offset = no_reg;
1044 } else {
1045 // Potentially swap base and offset register to avoid unnecessary moves.
1046 if (dst_addr == offset || offset_reg == base) {
1047 std::swap(dst_addr, offset_reg);
1048 }
1049 __ ParallelRegisterMove(
1050 {{LiftoffRegister{base}, LiftoffRegister{dst_addr}, kI32},
1051 {LiftoffRegister{offset}, LiftoffRegister{offset_reg}, kI32}});
1052 }
1053
1054 Operand dst_op_lo = liftoff::MemOperand(base, offset, offset_imm);
1055 Operand dst_op_hi = liftoff::MemOperand(base, offset, offset_imm + 4);
1056
1057 // Load the old value from memory.
1058 __ mov(old_lo, dst_op_lo);
1059 __ mov(old_hi, dst_op_hi);
1060 Label retry;
1061 __ bind(&retry);
1062 __ mov(new_lo, old_lo);
1063 __ mov(new_hi, old_hi);
1064 switch (op) {
1065 case kAdd:
1066 __ add(new_lo, value_op_lo);
1067 __ adc(new_hi, value_op_hi);
1068 break;
1069 case kSub:
1070 __ sub(new_lo, value_op_lo);
1071 __ sbb(new_hi, value_op_hi);
1072 break;
1073 case kAnd:
1074 __ and_(new_lo, value_op_lo);
1075 __ and_(new_hi, value_op_hi);
1076 break;
1077 case kOr:
1078 __ or_(new_lo, value_op_lo);
1079 __ or_(new_hi, value_op_hi);
1080 break;
1081 case kXor:
1082 __ xor_(new_lo, value_op_lo);
1083 __ xor_(new_hi, value_op_hi);
1084 break;
1085 case kExchange:
1086 __ mov(new_lo, value_op_lo);
1087 __ mov(new_hi, value_op_hi);
1088 break;
1089 }
1090 __ lock();
1091 __ cmpxchg8b(dst_op_lo);
1092 __ j(not_equal, &retry);
1093
1094 // Deallocate the stack space again.
1095 __ add(esp, Immediate(8));
1096 // Restore the root register, and we are done.
1097 __ pop(kRootRegister);
1098
1099 // Move the result into the correct registers.
1100 __ ParallelRegisterMove(
1101 {{result, LiftoffRegister::ForPair(old_lo, old_hi), kI64}});
1102}
1103
1104#undef __
1105} // namespace liftoff
1106
1107void LiftoffAssembler::AtomicAdd(Register dst_addr, Register offset_reg,
1108 uint32_t offset_imm, LiftoffRegister value,
1109 LiftoffRegister result, StoreType type,
1110 bool /* i64_offset */) {
1111 if (type.value() == StoreType::kI64Store) {
1112 liftoff::AtomicBinop64(this, liftoff::kAdd, dst_addr, offset_reg,
1113 offset_imm, value, result);
1114 return;
1115 }
1116
1117 liftoff::AtomicAddOrSubOrExchange32(this, liftoff::kAdd, dst_addr, offset_reg,
1118 offset_imm, value, result, type);
1119}
1120
1121void LiftoffAssembler::AtomicSub(Register dst_addr, Register offset_reg,
1122 uint32_t offset_imm, LiftoffRegister value,
1123 LiftoffRegister result, StoreType type,
1124 bool /* i64_offset */) {
1125 if (type.value() == StoreType::kI64Store) {
1126 liftoff::AtomicBinop64(this, liftoff::kSub, dst_addr, offset_reg,
1127 offset_imm, value, result);
1128 return;
1129 }
1130 liftoff::AtomicAddOrSubOrExchange32(this, liftoff::kSub, dst_addr, offset_reg,
1131 offset_imm, value, result, type);
1132}
1133
1134void LiftoffAssembler::AtomicAnd(Register dst_addr, Register offset_reg,
1135 uint32_t offset_imm, LiftoffRegister value,
1136 LiftoffRegister result, StoreType type,
1137 bool /* i64_offset */) {
1138 if (type.value() == StoreType::kI64Store) {
1139 liftoff::AtomicBinop64(this, liftoff::kAnd, dst_addr, offset_reg,
1140 offset_imm, value, result);
1141 return;
1142 }
1143
1144 liftoff::AtomicBinop32(this, liftoff::kAnd, dst_addr, offset_reg, offset_imm,
1145 value, result, type);
1146}
1147
1148void LiftoffAssembler::AtomicOr(Register dst_addr, Register offset_reg,
1149 uint32_t offset_imm, LiftoffRegister value,
1150 LiftoffRegister result, StoreType type,
1151 bool /* i64_offset */) {
1152 if (type.value() == StoreType::kI64Store) {
1153 liftoff::AtomicBinop64(this, liftoff::kOr, dst_addr, offset_reg, offset_imm,
1154 value, result);
1155 return;
1156 }
1157
1158 liftoff::AtomicBinop32(this, liftoff::kOr, dst_addr, offset_reg, offset_imm,
1159 value, result, type);
1160}
1161
1162void LiftoffAssembler::AtomicXor(Register dst_addr, Register offset_reg,
1163 uint32_t offset_imm, LiftoffRegister value,
1164 LiftoffRegister result, StoreType type,
1165 bool /* i64_offset */) {
1166 if (type.value() == StoreType::kI64Store) {
1167 liftoff::AtomicBinop64(this, liftoff::kXor, dst_addr, offset_reg,
1168 offset_imm, value, result);
1169 return;
1170 }
1171
1172 liftoff::AtomicBinop32(this, liftoff::kXor, dst_addr, offset_reg, offset_imm,
1173 value, result, type);
1174}
1175
1176void LiftoffAssembler::AtomicExchange(Register dst_addr, Register offset_reg,
1177 uint32_t offset_imm,
1178 LiftoffRegister value,
1179 LiftoffRegister result, StoreType type,
1180 bool /* i64_offset */) {
1181 if (type.value() == StoreType::kI64Store) {
1182 liftoff::AtomicBinop64(this, liftoff::kExchange, dst_addr, offset_reg,
1183 offset_imm, value, result);
1184 return;
1185 }
1186 liftoff::AtomicAddOrSubOrExchange32(this, liftoff::kExchange, dst_addr,
1187 offset_reg, offset_imm, value, result,
1188 type);
1189}
1190
1191void LiftoffAssembler::AtomicCompareExchange(
1192 Register dst_addr, Register offset_reg, uint32_t offset_imm,
1193 LiftoffRegister expected, LiftoffRegister new_value, LiftoffRegister result,
1194 StoreType type, bool /* i64_offset */) {
1195 // We expect that the offset has already been added to {dst_addr}, and no
1196 // {offset_reg} is provided. This is to save registers.
1197 DCHECK_EQ(offset_reg, no_reg);
1198
1199 DCHECK_EQ(result, expected);
1200
1201 if (type.value() != StoreType::kI64Store) {
1202 bool is_64_bit_op = type.value_type() == kWasmI64;
1203
1204 Register value_reg = is_64_bit_op ? new_value.low_gp() : new_value.gp();
1205 Register expected_reg = is_64_bit_op ? expected.low_gp() : expected.gp();
1206 Register result_reg = expected_reg;
1207
1208 // The cmpxchg instruction uses eax to store the old value of the
1209 // compare-exchange primitive. Therefore we have to spill the register and
1210 // move any use to another register.
1211 ClearRegister(eax, {&dst_addr, &value_reg},
1212 LiftoffRegList{dst_addr, value_reg, expected_reg});
1213 if (expected_reg != eax) {
1214 mov(eax, expected_reg);
1215 expected_reg = eax;
1216 }
1217
1218 bool is_byte_store = type.size() == 1;
1219 LiftoffRegList pinned{dst_addr, value_reg, expected_reg};
1220
1221 // Ensure that {value_reg} is a valid register.
1222 if (is_byte_store && !liftoff::kByteRegs.has(value_reg)) {
1223 Register safe_value_reg =
1224 pinned.set(GetUnusedRegister(liftoff::kByteRegs.MaskOut(pinned)))
1225 .gp();
1226 mov(safe_value_reg, value_reg);
1227 value_reg = safe_value_reg;
1228 pinned.clear(LiftoffRegister(value_reg));
1229 }
1230
1231 Operand dst_op = Operand(dst_addr, offset_imm);
1232
1233 lock();
1234 switch (type.value()) {
1235 case StoreType::kI32Store8:
1236 case StoreType::kI64Store8: {
1237 cmpxchg_b(dst_op, value_reg);
1238 movzx_b(result_reg, eax);
1239 break;
1240 }
1241 case StoreType::kI32Store16:
1242 case StoreType::kI64Store16: {
1243 cmpxchg_w(dst_op, value_reg);
1244 movzx_w(result_reg, eax);
1245 break;
1246 }
1247 case StoreType::kI32Store:
1248 case StoreType::kI64Store32: {
1249 cmpxchg(dst_op, value_reg);
1250 if (result_reg != eax) {
1251 mov(result_reg, eax);
1252 }
1253 break;
1254 }
1255 default:
1256 UNREACHABLE();
1257 }
1258 if (is_64_bit_op) {
1259 xor_(result.high_gp(), result.high_gp());
1260 }
1261 return;
1262 }
1263
1264 // The following code handles kExprI64AtomicCompareExchange.
1265
1266 // We need {ebx} here, which is the root register. The root register it
1267 // needs special treatment. As we use {ebx} directly in the code below, we
1268 // have to make sure here that the root register is actually {ebx}.
1269 static_assert(kRootRegister == ebx,
1270 "The following code assumes that kRootRegister == ebx");
1272
1273 // The compare-exchange instruction uses registers as follows:
1274 // old-value = EDX:EAX; new-value = ECX:EBX.
1275 Register expected_hi = edx;
1276 Register expected_lo = eax;
1277 Register new_hi = ecx;
1278 Register new_lo = ebx;
1279 // The address needs a separate registers that does not alias with the
1280 // ones above.
1281 Register address = esi;
1282
1283 // Spill all these registers if they are still holding other values.
1284 SpillRegisters(expected_hi, expected_lo, new_hi, address);
1285
1286 // We have to set new_lo specially, because it's the root register. We do it
1287 // before setting all other registers so that the original value does not get
1288 // overwritten.
1289 mov(new_lo, new_value.low_gp());
1290
1291 // Move all other values into the right register.
1292 ParallelRegisterMove(
1293 {{LiftoffRegister(address), LiftoffRegister(dst_addr), kI32},
1294 {LiftoffRegister::ForPair(expected_lo, expected_hi), expected, kI64},
1295 {LiftoffRegister(new_hi), new_value.high(), kI32}});
1296
1297 Operand dst_op = Operand(address, offset_imm);
1298
1299 lock();
1300 cmpxchg8b(dst_op);
1301
1302 // Restore the root register, and we are done.
1303 pop(kRootRegister);
1304
1305 // Move the result into the correct registers.
1306 ParallelRegisterMove(
1307 {{result, LiftoffRegister::ForPair(expected_lo, expected_hi), kI64}});
1308}
1309
1310void LiftoffAssembler::AtomicFence() { mfence(); }
1311
1312void LiftoffAssembler::LoadCallerFrameSlot(LiftoffRegister dst,
1313 uint32_t caller_slot_idx,
1314 ValueKind kind) {
1315 liftoff::Load(this, dst, ebp, kSystemPointerSize * (caller_slot_idx + 1),
1316 kind);
1317}
1318
1319void LiftoffAssembler::LoadReturnStackSlot(LiftoffRegister reg, int offset,
1320 ValueKind kind) {
1321 liftoff::Load(this, reg, esp, offset, kind);
1322}
1323
1324void LiftoffAssembler::StoreCallerFrameSlot(LiftoffRegister src,
1325 uint32_t caller_slot_idx,
1326 ValueKind kind,
1327 Register frame_pointer) {
1328 liftoff::Store(this, frame_pointer,
1329 kSystemPointerSize * (caller_slot_idx + 1), src, kind);
1330}
1331
1332void LiftoffAssembler::MoveStackValue(uint32_t dst_offset, uint32_t src_offset,
1333 ValueKind kind) {
1334 DCHECK_EQ(0, SlotSizeForType(kind) % kSystemPointerSize);
1335 int words = SlotSizeForType(kind) / kSystemPointerSize;
1336 DCHECK_LE(1, words);
1337 // Make sure we move the words in the correct order in case there is an
1338 // overlap between src and dst.
1339 if (src_offset < dst_offset) {
1340 do {
1341 liftoff::MoveStackValue(this, liftoff::GetStackSlot(src_offset),
1342 liftoff::GetStackSlot(dst_offset));
1343 dst_offset -= kSystemPointerSize;
1344 src_offset -= kSystemPointerSize;
1345 } while (--words);
1346 } else {
1347 while (words--) {
1348 liftoff::MoveStackValue(
1349 this, liftoff::GetStackSlot(src_offset - words * kSystemPointerSize),
1350 liftoff::GetStackSlot(dst_offset - words * kSystemPointerSize));
1351 }
1352 }
1353}
1354
1355void LiftoffAssembler::Move(Register dst, Register src, ValueKind kind) {
1356 DCHECK_NE(dst, src);
1358 mov(dst, src);
1359}
1360
1361void LiftoffAssembler::Move(DoubleRegister dst, DoubleRegister src,
1362 ValueKind kind) {
1363 DCHECK_NE(dst, src);
1364 if (kind == kF32) {
1365 movss(dst, src);
1366 } else if (kind == kF64) {
1367 movsd(dst, src);
1368 } else {
1370 Movaps(dst, src);
1371 }
1372}
1373
1374void LiftoffAssembler::Spill(int offset, LiftoffRegister reg, ValueKind kind) {
1375 RecordUsedSpillOffset(offset);
1376 Operand dst = liftoff::GetStackSlot(offset);
1377 switch (kind) {
1378 case kI32:
1379 case kRefNull:
1380 case kRef:
1381 mov(dst, reg.gp());
1382 break;
1383 case kI64:
1384 mov(liftoff::GetHalfStackSlot(offset, kLowWord), reg.low_gp());
1385 mov(liftoff::GetHalfStackSlot(offset, kHighWord), reg.high_gp());
1386 break;
1387 case kF32:
1388 movss(dst, reg.fp());
1389 break;
1390 case kF64:
1391 movsd(dst, reg.fp());
1392 break;
1393 case kS128:
1394 movdqu(dst, reg.fp());
1395 break;
1396 default:
1397 UNREACHABLE();
1398 }
1399}
1400
1401void LiftoffAssembler::Spill(int offset, WasmValue value) {
1402 RecordUsedSpillOffset(offset);
1403 Operand dst = liftoff::GetStackSlot(offset);
1404 switch (value.type().kind()) {
1405 case kI32:
1406 mov(dst, Immediate(value.to_i32()));
1407 break;
1408 case kI64: {
1409 int32_t low_word = value.to_i64();
1410 int32_t high_word = value.to_i64() >> 32;
1411 mov(liftoff::GetHalfStackSlot(offset, kLowWord), Immediate(low_word));
1412 mov(liftoff::GetHalfStackSlot(offset, kHighWord), Immediate(high_word));
1413 break;
1414 }
1415 default:
1416 // We do not track f32 and f64 constants, hence they are unreachable.
1417 UNREACHABLE();
1418 }
1419}
1420
1421void LiftoffAssembler::Fill(LiftoffRegister reg, int offset, ValueKind kind) {
1422 liftoff::Load(this, reg, ebp, -offset, kind);
1423}
1424
1425void LiftoffAssembler::FillI64Half(Register reg, int offset, RegPairHalf half) {
1426 mov(reg, liftoff::GetHalfStackSlot(offset, half));
1427}
1428
1429void LiftoffAssembler::FillStackSlotsWithZero(int start, int size) {
1430 DCHECK_LT(0, size);
1431 DCHECK_EQ(0, size % 4);
1432 RecordUsedSpillOffset(start + size);
1433
1434 if (size <= 12) {
1435 // Special straight-line code for up to three words (6-9 bytes per word:
1436 // C7 <1-4 bytes operand> <4 bytes imm>, makes 18-27 bytes total).
1437 for (int offset = 4; offset <= size; offset += 4) {
1438 mov(liftoff::GetHalfStackSlot(start + offset, kLowWord), Immediate(0));
1439 }
1440 } else {
1441 // General case for bigger counts.
1442 // This sequence takes 19-22 bytes (3 for pushes, 3-6 for lea, 2 for xor, 5
1443 // for mov, 3 for repstosq, 3 for pops).
1444 // Note: rep_stos fills ECX doublewords at [EDI] with EAX.
1445 push(eax);
1446 push(ecx);
1447 push(edi);
1448 lea(edi, liftoff::GetStackSlot(start + size));
1449 xor_(eax, eax);
1450 // Size is in bytes, convert to doublewords (4-bytes).
1451 mov(ecx, Immediate(size / 4));
1452 rep_stos();
1453 pop(edi);
1454 pop(ecx);
1455 pop(eax);
1456 }
1457}
1458
1459void LiftoffAssembler::LoadSpillAddress(Register dst, int offset,
1460 ValueKind /* kind */) {
1461 lea(dst, liftoff::GetStackSlot(offset));
1462}
1463
1464void LiftoffAssembler::emit_i32_add(Register dst, Register lhs, Register rhs) {
1465 if (lhs != dst) {
1466 lea(dst, Operand(lhs, rhs, times_1, 0));
1467 } else {
1468 add(dst, rhs);
1469 }
1470}
1471
1472void LiftoffAssembler::emit_i32_addi(Register dst, Register lhs, int32_t imm) {
1473 if (lhs != dst) {
1474 lea(dst, Operand(lhs, imm));
1475 } else {
1476 add(dst, Immediate(imm));
1477 }
1478}
1479
1480void LiftoffAssembler::emit_i32_sub(Register dst, Register lhs, Register rhs) {
1481 if (dst != rhs) {
1482 // Default path.
1483 if (dst != lhs) mov(dst, lhs);
1484 sub(dst, rhs);
1485 } else if (lhs == rhs) {
1486 // Degenerate case.
1487 xor_(dst, dst);
1488 } else {
1489 // Emit {dst = lhs + -rhs} if dst == rhs.
1490 neg(dst);
1491 add(dst, lhs);
1492 }
1493}
1494
1495void LiftoffAssembler::emit_i32_subi(Register dst, Register lhs, int32_t imm) {
1496 if (dst != lhs) {
1497 // We'll have to implement an UB-safe version if we need this corner case.
1498 DCHECK_NE(imm, kMinInt);
1499 lea(dst, Operand(lhs, -imm));
1500 } else {
1501 sub(dst, Immediate(imm));
1502 }
1503}
1504
1505namespace liftoff {
1506template <void (Assembler::*op)(Register, Register)>
1508 Register rhs) {
1509 if (dst == rhs) {
1510 (assm->*op)(dst, lhs);
1511 } else {
1512 if (dst != lhs) assm->mov(dst, lhs);
1513 (assm->*op)(dst, rhs);
1514 }
1515}
1516
1517template <void (Assembler::*op)(Register, int32_t)>
1519 int32_t imm) {
1520 if (dst != lhs) assm->mov(dst, lhs);
1521 (assm->*op)(dst, imm);
1522}
1523} // namespace liftoff
1524
1525void LiftoffAssembler::emit_i32_mul(Register dst, Register lhs, Register rhs) {
1526 liftoff::EmitCommutativeBinOp<&Assembler::imul>(this, dst, lhs, rhs);
1527}
1528
1529void LiftoffAssembler::emit_i32_muli(Register dst, Register lhs, int32_t imm) {
1530 if (base::bits::IsPowerOfTwo(imm)) {
1531 emit_i32_shli(dst, lhs, base::bits::WhichPowerOfTwo(imm));
1532 } else {
1533 imul(dst, lhs, imm);
1534 }
1535}
1536
1537namespace liftoff {
1538enum class DivOrRem : uint8_t { kDiv, kRem };
1539template <bool is_signed, DivOrRem div_or_rem>
1541 Register rhs, Label* trap_div_by_zero,
1542 Label* trap_div_unrepresentable) {
1543 constexpr bool needs_unrepresentable_check =
1544 is_signed && div_or_rem == DivOrRem::kDiv;
1545 constexpr bool special_case_minus_1 =
1546 is_signed && div_or_rem == DivOrRem::kRem;
1547 DCHECK_EQ(needs_unrepresentable_check, trap_div_unrepresentable != nullptr);
1548
1549 // For division, the lhs is always taken from {edx:eax}. Thus, make sure that
1550 // these registers are unused. If {rhs} is stored in one of them, move it to
1551 // another temporary register.
1552 // Do all this before any branch, such that the code is executed
1553 // unconditionally, as the cache state will also be modified unconditionally.
1554 assm->SpillRegisters(eax, edx);
1555 if (rhs == eax || rhs == edx) {
1556 LiftoffRegList unavailable{eax, edx, lhs};
1557 Register tmp = assm->GetUnusedRegister(kGpReg, unavailable).gp();
1558 assm->mov(tmp, rhs);
1559 rhs = tmp;
1560 }
1561
1562 // Check for division by zero.
1563 assm->test(rhs, rhs);
1564 assm->j(zero, trap_div_by_zero);
1565
1566 Label done;
1567 if (needs_unrepresentable_check) {
1568 // Check for {kMinInt / -1}. This is unrepresentable.
1569 Label do_div;
1570 assm->cmp(rhs, -1);
1571 assm->j(not_equal, &do_div);
1572 assm->cmp(lhs, kMinInt);
1573 assm->j(equal, trap_div_unrepresentable);
1574 assm->bind(&do_div);
1575 } else if (special_case_minus_1) {
1576 // {lhs % -1} is always 0 (needs to be special cased because {kMinInt / -1}
1577 // cannot be computed).
1578 Label do_rem;
1579 assm->cmp(rhs, -1);
1580 assm->j(not_equal, &do_rem);
1581 assm->xor_(dst, dst);
1582 assm->jmp(&done);
1583 assm->bind(&do_rem);
1584 }
1585
1586 // Now move {lhs} into {eax}, then zero-extend or sign-extend into {edx}, then
1587 // do the division.
1588 if (lhs != eax) assm->mov(eax, lhs);
1589 if (is_signed) {
1590 assm->cdq();
1591 assm->idiv(rhs);
1592 } else {
1593 assm->xor_(edx, edx);
1594 assm->div(rhs);
1595 }
1596
1597 // Move back the result (in {eax} or {edx}) into the {dst} register.
1598 constexpr Register kResultReg = div_or_rem == DivOrRem::kDiv ? eax : edx;
1599 if (dst != kResultReg) assm->mov(dst, kResultReg);
1600 if (special_case_minus_1) assm->bind(&done);
1601}
1602} // namespace liftoff
1603
1604void LiftoffAssembler::emit_i32_divs(Register dst, Register lhs, Register rhs,
1605 Label* trap_div_by_zero,
1606 Label* trap_div_unrepresentable) {
1607 liftoff::EmitInt32DivOrRem<true, liftoff::DivOrRem::kDiv>(
1608 this, dst, lhs, rhs, trap_div_by_zero, trap_div_unrepresentable);
1609}
1610
1611void LiftoffAssembler::emit_i32_divu(Register dst, Register lhs, Register rhs,
1612 Label* trap_div_by_zero) {
1613 liftoff::EmitInt32DivOrRem<false, liftoff::DivOrRem::kDiv>(
1614 this, dst, lhs, rhs, trap_div_by_zero, nullptr);
1615}
1616
1617void LiftoffAssembler::emit_i32_rems(Register dst, Register lhs, Register rhs,
1618 Label* trap_div_by_zero) {
1619 liftoff::EmitInt32DivOrRem<true, liftoff::DivOrRem::kRem>(
1620 this, dst, lhs, rhs, trap_div_by_zero, nullptr);
1621}
1622
1623void LiftoffAssembler::emit_i32_remu(Register dst, Register lhs, Register rhs,
1624 Label* trap_div_by_zero) {
1625 liftoff::EmitInt32DivOrRem<false, liftoff::DivOrRem::kRem>(
1626 this, dst, lhs, rhs, trap_div_by_zero, nullptr);
1627}
1628
1629void LiftoffAssembler::emit_i32_and(Register dst, Register lhs, Register rhs) {
1630 liftoff::EmitCommutativeBinOp<&Assembler::and_>(this, dst, lhs, rhs);
1631}
1632
1633void LiftoffAssembler::emit_i32_andi(Register dst, Register lhs, int32_t imm) {
1634 liftoff::EmitCommutativeBinOpImm<&Assembler::and_>(this, dst, lhs, imm);
1635}
1636
1637void LiftoffAssembler::emit_i32_or(Register dst, Register lhs, Register rhs) {
1638 liftoff::EmitCommutativeBinOp<&Assembler::or_>(this, dst, lhs, rhs);
1639}
1640
1641void LiftoffAssembler::emit_i32_ori(Register dst, Register lhs, int32_t imm) {
1642 liftoff::EmitCommutativeBinOpImm<&Assembler::or_>(this, dst, lhs, imm);
1643}
1644
1645void LiftoffAssembler::emit_i32_xor(Register dst, Register lhs, Register rhs) {
1646 liftoff::EmitCommutativeBinOp<&Assembler::xor_>(this, dst, lhs, rhs);
1647}
1648
1649void LiftoffAssembler::emit_i32_xori(Register dst, Register lhs, int32_t imm) {
1650 liftoff::EmitCommutativeBinOpImm<&Assembler::xor_>(this, dst, lhs, imm);
1651}
1652
1653namespace liftoff {
1655 Register src, Register amount,
1656 void (Assembler::*emit_shift)(Register)) {
1657 LiftoffRegList pinned{dst, src, amount};
1658 // If dst is ecx, compute into a tmp register first, then move to ecx.
1659 if (dst == ecx) {
1660 Register tmp = assm->GetUnusedRegister(kGpReg, pinned).gp();
1661 assm->mov(tmp, src);
1662 if (amount != ecx) assm->mov(ecx, amount);
1663 (assm->*emit_shift)(tmp);
1664 assm->mov(ecx, tmp);
1665 return;
1666 }
1667
1668 // Move amount into ecx. If ecx is in use, move its content to a tmp register
1669 // first. If src is ecx, src is now the tmp register.
1670 Register tmp_reg = no_reg;
1671 if (amount != ecx) {
1672 if (assm->cache_state()->is_used(LiftoffRegister(ecx)) ||
1673 pinned.has(LiftoffRegister(ecx))) {
1674 tmp_reg = assm->GetUnusedRegister(kGpReg, pinned).gp();
1675 assm->mov(tmp_reg, ecx);
1676 if (src == ecx) src = tmp_reg;
1677 }
1678 assm->mov(ecx, amount);
1679 }
1680
1681 // Do the actual shift.
1682 if (dst != src) assm->mov(dst, src);
1683 (assm->*emit_shift)(dst);
1684
1685 // Restore ecx if needed.
1686 if (tmp_reg.is_valid()) assm->mov(ecx, tmp_reg);
1687}
1688} // namespace liftoff
1689
1690void LiftoffAssembler::emit_i32_shl(Register dst, Register src,
1691 Register amount) {
1692 liftoff::EmitShiftOperation(this, dst, src, amount, &Assembler::shl_cl);
1693}
1694
1695void LiftoffAssembler::emit_i32_shli(Register dst, Register src,
1696 int32_t amount) {
1697 if (dst != src) mov(dst, src);
1698 shl(dst, amount & 31);
1699}
1700
1701void LiftoffAssembler::emit_i32_sar(Register dst, Register src,
1702 Register amount) {
1703 liftoff::EmitShiftOperation(this, dst, src, amount, &Assembler::sar_cl);
1704}
1705
1706void LiftoffAssembler::emit_i32_sari(Register dst, Register src,
1707 int32_t amount) {
1708 if (dst != src) mov(dst, src);
1709 sar(dst, amount & 31);
1710}
1711
1712void LiftoffAssembler::emit_i32_shr(Register dst, Register src,
1713 Register amount) {
1714 liftoff::EmitShiftOperation(this, dst, src, amount, &Assembler::shr_cl);
1715}
1716
1717void LiftoffAssembler::emit_i32_shri(Register dst, Register src,
1718 int32_t amount) {
1719 if (dst != src) mov(dst, src);
1720 shr(dst, amount & 31);
1721}
1722
1723void LiftoffAssembler::emit_i32_clz(Register dst, Register src) {
1724 Lzcnt(dst, src);
1725}
1726
1727void LiftoffAssembler::emit_i32_ctz(Register dst, Register src) {
1728 Tzcnt(dst, src);
1729}
1730
1731bool LiftoffAssembler::emit_i32_popcnt(Register dst, Register src) {
1732 if (!CpuFeatures::IsSupported(POPCNT)) return false;
1733 CpuFeatureScope scope(this, POPCNT);
1734 popcnt(dst, src);
1735 return true;
1736}
1737
1738namespace liftoff {
1739template <void (Assembler::*op)(Register, Register),
1740 void (Assembler::*op_with_carry)(Register, Register)>
1743 // First, compute the low half of the result, potentially into a temporary dst
1744 // register if {dst.low_gp()} equals {rhs.low_gp()} or any register we need to
1745 // keep alive for computing the upper half.
1746 LiftoffRegList keep_alive{lhs.high_gp(), rhs};
1747 Register dst_low = keep_alive.has(dst.low_gp())
1748 ? assm->GetUnusedRegister(kGpReg, keep_alive).gp()
1749 : dst.low_gp();
1750
1751 if (dst_low != lhs.low_gp()) assm->mov(dst_low, lhs.low_gp());
1752 (assm->*op)(dst_low, rhs.low_gp());
1753
1754 // Now compute the upper half, while keeping alive the previous result.
1755 keep_alive = LiftoffRegList{dst_low, rhs.high_gp()};
1756 Register dst_high = keep_alive.has(dst.high_gp())
1757 ? assm->GetUnusedRegister(kGpReg, keep_alive).gp()
1758 : dst.high_gp();
1759
1760 if (dst_high != lhs.high_gp()) assm->mov(dst_high, lhs.high_gp());
1761 (assm->*op_with_carry)(dst_high, rhs.high_gp());
1762
1763 // If necessary, move result into the right registers.
1764 LiftoffRegister tmp_result = LiftoffRegister::ForPair(dst_low, dst_high);
1765 if (tmp_result != dst) assm->Move(dst, tmp_result, kI64);
1766}
1767
1768template <void (Assembler::*op)(Register, const Immediate&),
1769 void (Assembler::*op_with_carry)(Register, int32_t)>
1771 LiftoffRegister lhs, int64_t imm) {
1772 // The compiler allocated registers such that either {dst == lhs} or there is
1773 // no overlap between the two.
1774 DCHECK_NE(dst.low_gp(), lhs.high_gp());
1775
1776 int32_t imm_low_word = static_cast<int32_t>(imm);
1777 int32_t imm_high_word = static_cast<int32_t>(imm >> 32);
1778
1779 // First, compute the low half of the result.
1780 if (dst.low_gp() != lhs.low_gp()) assm->mov(dst.low_gp(), lhs.low_gp());
1781 (assm->*op)(dst.low_gp(), Immediate(imm_low_word));
1782
1783 // Now compute the upper half.
1784 if (dst.high_gp() != lhs.high_gp()) assm->mov(dst.high_gp(), lhs.high_gp());
1785 (assm->*op_with_carry)(dst.high_gp(), imm_high_word);
1786}
1787} // namespace liftoff
1788
1789void LiftoffAssembler::emit_i64_add(LiftoffRegister dst, LiftoffRegister lhs,
1790 LiftoffRegister rhs) {
1791 liftoff::OpWithCarry<&Assembler::add, &Assembler::adc>(this, dst, lhs, rhs);
1792}
1793
1794void LiftoffAssembler::emit_i64_addi(LiftoffRegister dst, LiftoffRegister lhs,
1795 int64_t imm) {
1796 liftoff::OpWithCarryI<&Assembler::add, &Assembler::adc>(this, dst, lhs, imm);
1797}
1798
1799void LiftoffAssembler::emit_i64_sub(LiftoffRegister dst, LiftoffRegister lhs,
1800 LiftoffRegister rhs) {
1801 liftoff::OpWithCarry<&Assembler::sub, &Assembler::sbb>(this, dst, lhs, rhs);
1802}
1803
1804void LiftoffAssembler::emit_i64_mul(LiftoffRegister dst, LiftoffRegister lhs,
1805 LiftoffRegister rhs) {
1806 // Idea:
1807 // [ lhs_hi | lhs_lo ] * [ rhs_hi | rhs_lo ]
1808 // = [ lhs_hi * rhs_lo | ] (32 bit mul, shift 32)
1809 // + [ lhs_lo * rhs_hi | ] (32 bit mul, shift 32)
1810 // + [ lhs_lo * rhs_lo ] (32x32->64 mul, shift 0)
1811
1812 // For simplicity, we move lhs and rhs into fixed registers.
1813 Register dst_hi = edx;
1814 Register dst_lo = eax;
1815 Register lhs_hi = ecx;
1816 Register lhs_lo = dst_lo;
1817 Register rhs_hi = dst_hi;
1818 Register rhs_lo = esi;
1819
1820 // Spill all these registers if they are still holding other values.
1821 SpillRegisters(dst_hi, dst_lo, lhs_hi, rhs_lo);
1822
1823 // Move lhs and rhs into the respective registers.
1824 ParallelRegisterMove({{LiftoffRegister::ForPair(lhs_lo, lhs_hi), lhs, kI64},
1825 {LiftoffRegister::ForPair(rhs_lo, rhs_hi), rhs, kI64}});
1826
1827 // First mul: lhs_hi' = lhs_hi * rhs_lo.
1828 imul(lhs_hi, rhs_lo);
1829 // Second mul: rhi_hi' = rhs_hi * lhs_lo.
1830 imul(rhs_hi, lhs_lo);
1831 // Add them: lhs_hi'' = lhs_hi' + rhs_hi' = lhs_hi * rhs_lo + rhs_hi * lhs_lo.
1832 add(lhs_hi, rhs_hi);
1833 // Third mul: edx:eax (dst_hi:dst_lo) = eax * esi (lhs_lo * rhs_lo).
1834 mul(rhs_lo);
1835 // Add lhs_hi'' to dst_hi.
1836 add(dst_hi, lhs_hi);
1837
1838 // Finally, move back the temporary result to the actual dst register pair.
1839 LiftoffRegister dst_tmp = LiftoffRegister::ForPair(dst_lo, dst_hi);
1840 if (dst != dst_tmp) Move(dst, dst_tmp, kI64);
1841}
1842
1843bool LiftoffAssembler::emit_i64_divs(LiftoffRegister dst, LiftoffRegister lhs,
1844 LiftoffRegister rhs,
1845 Label* trap_div_by_zero,
1846 Label* trap_div_unrepresentable) {
1847 return false;
1848}
1849
1850bool LiftoffAssembler::emit_i64_divu(LiftoffRegister dst, LiftoffRegister lhs,
1851 LiftoffRegister rhs,
1852 Label* trap_div_by_zero) {
1853 return false;
1854}
1855
1856bool LiftoffAssembler::emit_i64_rems(LiftoffRegister dst, LiftoffRegister lhs,
1857 LiftoffRegister rhs,
1858 Label* trap_div_by_zero) {
1859 return false;
1860}
1861
1862bool LiftoffAssembler::emit_i64_remu(LiftoffRegister dst, LiftoffRegister lhs,
1863 LiftoffRegister rhs,
1864 Label* trap_div_by_zero) {
1865 return false;
1866}
1867
1868namespace liftoff {
1870 return pair.low_gp() == reg || pair.high_gp() == reg;
1871}
1872
1874 Register new_reg) {
1875 if (pair.low_gp() == old_reg) {
1876 return LiftoffRegister::ForPair(new_reg, pair.high_gp());
1877 }
1878 if (pair.high_gp() == old_reg) {
1879 return LiftoffRegister::ForPair(pair.low_gp(), new_reg);
1880 }
1881 return pair;
1882}
1883
1886 Register amount, void (MacroAssembler::*emit_shift)(Register, Register)) {
1887 // Temporary registers cannot overlap with {dst}.
1888 LiftoffRegList pinned{dst};
1889
1890 constexpr size_t kMaxRegMoves = 3;
1892 reg_moves;
1893
1894 // If {dst} contains {ecx}, replace it by an unused register, which is then
1895 // moved to {ecx} in the end.
1896 Register ecx_replace = no_reg;
1897 if (PairContains(dst, ecx)) {
1898 ecx_replace = assm->GetUnusedRegister(kGpReg, pinned).gp();
1899 dst = ReplaceInPair(dst, ecx, ecx_replace);
1900 // If {amount} needs to be moved to {ecx}, but {ecx} is in use (and not part
1901 // of {dst}, hence overwritten anyway), move {ecx} to a tmp register and
1902 // restore it at the end.
1903 } else if (amount != ecx &&
1904 (assm->cache_state()->is_used(LiftoffRegister(ecx)) ||
1905 pinned.has(LiftoffRegister(ecx)))) {
1906 ecx_replace = assm->GetUnusedRegister(kGpReg, pinned).gp();
1907 reg_moves.emplace_back(ecx_replace, ecx, kI32);
1908 }
1909
1910 reg_moves.emplace_back(dst, src, kI64);
1911 reg_moves.emplace_back(ecx, amount, kI32);
1912 assm->ParallelRegisterMove(base::VectorOf(reg_moves));
1913
1914 // Do the actual shift.
1915 (assm->*emit_shift)(dst.high_gp(), dst.low_gp());
1916
1917 // Restore {ecx} if needed.
1918 if (ecx_replace != no_reg) assm->mov(ecx, ecx_replace);
1919}
1920} // namespace liftoff
1921
1922void LiftoffAssembler::emit_i64_shl(LiftoffRegister dst, LiftoffRegister src,
1923 Register amount) {
1924 liftoff::Emit64BitShiftOperation(this, dst, src, amount,
1926}
1927
1928void LiftoffAssembler::emit_i64_shli(LiftoffRegister dst, LiftoffRegister src,
1929 int32_t amount) {
1930 amount &= 63;
1931 if (amount >= 32) {
1932 if (dst.high_gp() != src.low_gp()) mov(dst.high_gp(), src.low_gp());
1933 if (amount != 32) shl(dst.high_gp(), amount - 32);
1934 xor_(dst.low_gp(), dst.low_gp());
1935 } else {
1936 if (dst != src) Move(dst, src, kI64);
1937 ShlPair(dst.high_gp(), dst.low_gp(), amount);
1938 }
1939}
1940
1941void LiftoffAssembler::emit_i64_sar(LiftoffRegister dst, LiftoffRegister src,
1942 Register amount) {
1943 liftoff::Emit64BitShiftOperation(this, dst, src, amount,
1945}
1946
1947void LiftoffAssembler::emit_i64_sari(LiftoffRegister dst, LiftoffRegister src,
1948 int32_t amount) {
1949 amount &= 63;
1950 if (amount >= 32) {
1951 if (dst.low_gp() != src.high_gp()) mov(dst.low_gp(), src.high_gp());
1952 if (dst.high_gp() != src.high_gp()) mov(dst.high_gp(), src.high_gp());
1953 if (amount != 32) sar(dst.low_gp(), amount - 32);
1954 sar(dst.high_gp(), 31);
1955 } else {
1956 if (dst != src) Move(dst, src, kI64);
1957 SarPair(dst.high_gp(), dst.low_gp(), amount);
1958 }
1959}
1960void LiftoffAssembler::emit_i64_shr(LiftoffRegister dst, LiftoffRegister src,
1961 Register amount) {
1962 liftoff::Emit64BitShiftOperation(this, dst, src, amount,
1964}
1965
1966void LiftoffAssembler::emit_i64_shri(LiftoffRegister dst, LiftoffRegister src,
1967 int32_t amount) {
1968 amount &= 63;
1969 if (amount >= 32) {
1970 if (dst.low_gp() != src.high_gp()) mov(dst.low_gp(), src.high_gp());
1971 if (amount != 32) shr(dst.low_gp(), amount - 32);
1972 xor_(dst.high_gp(), dst.high_gp());
1973 } else {
1974 if (dst != src) Move(dst, src, kI64);
1975 ShrPair(dst.high_gp(), dst.low_gp(), amount);
1976 }
1977}
1978
1979void LiftoffAssembler::emit_i64_clz(LiftoffRegister dst, LiftoffRegister src) {
1980 // return high == 0 ? 32 + CLZ32(low) : CLZ32(high);
1981 Label done;
1982 Register safe_dst = dst.low_gp();
1983 if (src.low_gp() == safe_dst) safe_dst = dst.high_gp();
1984 if (CpuFeatures::IsSupported(LZCNT)) {
1985 CpuFeatureScope scope(this, LZCNT);
1986 lzcnt(safe_dst, src.high_gp()); // Sets CF if high == 0.
1987 j(not_carry, &done, Label::kNear);
1988 lzcnt(safe_dst, src.low_gp());
1989 add(safe_dst, Immediate(32)); // 32 + CLZ32(low)
1990 } else {
1991 // CLZ32(x) =^ x == 0 ? 32 : 31 - BSR32(x)
1992 Label high_is_zero;
1993 bsr(safe_dst, src.high_gp()); // Sets ZF is high == 0.
1994 j(zero, &high_is_zero, Label::kNear);
1995 xor_(safe_dst, Immediate(31)); // for x in [0..31], 31^x == 31-x.
1996 jmp(&done, Label::kNear);
1997
1998 bind(&high_is_zero);
1999 Label low_not_zero;
2000 bsr(safe_dst, src.low_gp());
2001 j(not_zero, &low_not_zero, Label::kNear);
2002 mov(safe_dst, Immediate(64 ^ 63)); // 64, after the xor below.
2003 bind(&low_not_zero);
2004 xor_(safe_dst, 63); // for x in [0..31], 63^x == 63-x.
2005 }
2006
2007 bind(&done);
2008 if (safe_dst != dst.low_gp()) mov(dst.low_gp(), safe_dst);
2009 xor_(dst.high_gp(), dst.high_gp()); // High word of result is always 0.
2010}
2011
2012void LiftoffAssembler::emit_i64_ctz(LiftoffRegister dst, LiftoffRegister src) {
2013 // return low == 0 ? 32 + CTZ32(high) : CTZ32(low);
2014 Label done;
2015 Register safe_dst = dst.low_gp();
2016 if (src.high_gp() == safe_dst) safe_dst = dst.high_gp();
2017 if (CpuFeatures::IsSupported(BMI1)) {
2018 CpuFeatureScope scope(this, BMI1);
2019 tzcnt(safe_dst, src.low_gp()); // Sets CF if low == 0.
2020 j(not_carry, &done, Label::kNear);
2021 tzcnt(safe_dst, src.high_gp());
2022 add(safe_dst, Immediate(32)); // 32 + CTZ32(high)
2023 } else {
2024 // CTZ32(x) =^ x == 0 ? 32 : BSF32(x)
2025 bsf(safe_dst, src.low_gp()); // Sets ZF is low == 0.
2026 j(not_zero, &done, Label::kNear);
2027
2028 Label high_not_zero;
2029 bsf(safe_dst, src.high_gp());
2030 j(not_zero, &high_not_zero, Label::kNear);
2031 mov(safe_dst, 64); // low == 0 and high == 0
2032 jmp(&done);
2033 bind(&high_not_zero);
2034 add(safe_dst, Immediate(32)); // 32 + CTZ32(high)
2035 }
2036
2037 bind(&done);
2038 if (safe_dst != dst.low_gp()) mov(dst.low_gp(), safe_dst);
2039 xor_(dst.high_gp(), dst.high_gp()); // High word of result is always 0.
2040}
2041
2042bool LiftoffAssembler::emit_i64_popcnt(LiftoffRegister dst,
2043 LiftoffRegister src) {
2044 if (!CpuFeatures::IsSupported(POPCNT)) return false;
2045 CpuFeatureScope scope(this, POPCNT);
2046 // Produce partial popcnts in the two dst registers.
2047 Register src1 = src.high_gp() == dst.low_gp() ? src.high_gp() : src.low_gp();
2048 Register src2 = src.high_gp() == dst.low_gp() ? src.low_gp() : src.high_gp();
2049 popcnt(dst.low_gp(), src1);
2050 popcnt(dst.high_gp(), src2);
2051 // Add the two into the lower dst reg, clear the higher dst reg.
2052 add(dst.low_gp(), dst.high_gp());
2053 xor_(dst.high_gp(), dst.high_gp());
2054 return true;
2055}
2056
2057void LiftoffAssembler::IncrementSmi(LiftoffRegister dst, int offset) {
2058 add(Operand(dst.gp(), offset), Immediate(Smi::FromInt(1)));
2059}
2060
2061void LiftoffAssembler::emit_f32_add(DoubleRegister dst, DoubleRegister lhs,
2062 DoubleRegister rhs) {
2063 if (CpuFeatures::IsSupported(AVX)) {
2064 CpuFeatureScope scope(this, AVX);
2065 vaddss(dst, lhs, rhs);
2066 } else if (dst == rhs) {
2067 addss(dst, lhs);
2068 } else {
2069 if (dst != lhs) movss(dst, lhs);
2070 addss(dst, rhs);
2071 }
2072}
2073
2074void LiftoffAssembler::emit_f32_sub(DoubleRegister dst, DoubleRegister lhs,
2075 DoubleRegister rhs) {
2076 if (CpuFeatures::IsSupported(AVX)) {
2077 CpuFeatureScope scope(this, AVX);
2078 vsubss(dst, lhs, rhs);
2079 } else if (dst == rhs) {
2080 movss(liftoff::kScratchDoubleReg, rhs);
2081 movss(dst, lhs);
2082 subss(dst, liftoff::kScratchDoubleReg);
2083 } else {
2084 if (dst != lhs) movss(dst, lhs);
2085 subss(dst, rhs);
2086 }
2087}
2088
2089void LiftoffAssembler::emit_f32_mul(DoubleRegister dst, DoubleRegister lhs,
2090 DoubleRegister rhs) {
2091 if (CpuFeatures::IsSupported(AVX)) {
2092 CpuFeatureScope scope(this, AVX);
2093 vmulss(dst, lhs, rhs);
2094 } else if (dst == rhs) {
2095 mulss(dst, lhs);
2096 } else {
2097 if (dst != lhs) movss(dst, lhs);
2098 mulss(dst, rhs);
2099 }
2100}
2101
2102void LiftoffAssembler::emit_f32_div(DoubleRegister dst, DoubleRegister lhs,
2103 DoubleRegister rhs) {
2104 if (CpuFeatures::IsSupported(AVX)) {
2105 CpuFeatureScope scope(this, AVX);
2106 vdivss(dst, lhs, rhs);
2107 } else if (dst == rhs) {
2108 movss(liftoff::kScratchDoubleReg, rhs);
2109 movss(dst, lhs);
2110 divss(dst, liftoff::kScratchDoubleReg);
2111 } else {
2112 if (dst != lhs) movss(dst, lhs);
2113 divss(dst, rhs);
2114 }
2115}
2116
2117namespace liftoff {
2118enum class MinOrMax : uint8_t { kMin, kMax };
2119template <typename type>
2122 MinOrMax min_or_max) {
2123 Label is_nan;
2124 Label lhs_below_rhs;
2125 Label lhs_above_rhs;
2126 Label done;
2127
2128 // We need one tmp register to extract the sign bit. Get it right at the
2129 // beginning, such that the spilling code is not accidentially jumped over.
2130 Register tmp = assm->GetUnusedRegister(kGpReg, {}).gp();
2131
2132#define dop(name, ...) \
2133 do { \
2134 if (sizeof(type) == 4) { \
2135 assm->name##s(__VA_ARGS__); \
2136 } else { \
2137 assm->name##d(__VA_ARGS__); \
2138 } \
2139 } while (false)
2140
2141 // Check the easy cases first: nan (e.g. unordered), smaller and greater.
2142 // NaN has to be checked first, because PF=1 implies CF=1.
2143 dop(ucomis, lhs, rhs);
2144 assm->j(parity_even, &is_nan, Label::kNear); // PF=1
2145 assm->j(below, &lhs_below_rhs, Label::kNear); // CF=1
2146 assm->j(above, &lhs_above_rhs, Label::kNear); // CF=0 && ZF=0
2147
2148 // If we get here, then either
2149 // a) {lhs == rhs},
2150 // b) {lhs == -0.0} and {rhs == 0.0}, or
2151 // c) {lhs == 0.0} and {rhs == -0.0}.
2152 // For a), it does not matter whether we return {lhs} or {rhs}. Check the sign
2153 // bit of {rhs} to differentiate b) and c).
2154 dop(movmskp, tmp, rhs);
2155 assm->test(tmp, Immediate(1));
2156 assm->j(zero, &lhs_below_rhs, Label::kNear);
2157 assm->jmp(&lhs_above_rhs, Label::kNear);
2158
2159 assm->bind(&is_nan);
2160 // Create a NaN output.
2161 dop(xorp, dst, dst);
2162 dop(divs, dst, dst);
2163 assm->jmp(&done, Label::kNear);
2164
2165 assm->bind(&lhs_below_rhs);
2166 DoubleRegister lhs_below_rhs_src = min_or_max == MinOrMax::kMin ? lhs : rhs;
2167 if (dst != lhs_below_rhs_src) dop(movs, dst, lhs_below_rhs_src);
2168 assm->jmp(&done, Label::kNear);
2169
2170 assm->bind(&lhs_above_rhs);
2171 DoubleRegister lhs_above_rhs_src = min_or_max == MinOrMax::kMin ? rhs : lhs;
2172 if (dst != lhs_above_rhs_src) dop(movs, dst, lhs_above_rhs_src);
2173
2174 assm->bind(&done);
2175}
2176} // namespace liftoff
2177
2178void LiftoffAssembler::emit_f32_min(DoubleRegister dst, DoubleRegister lhs,
2179 DoubleRegister rhs) {
2180 liftoff::EmitFloatMinOrMax<float>(this, dst, lhs, rhs,
2181 liftoff::MinOrMax::kMin);
2182}
2183
2184void LiftoffAssembler::emit_f32_max(DoubleRegister dst, DoubleRegister lhs,
2185 DoubleRegister rhs) {
2186 liftoff::EmitFloatMinOrMax<float>(this, dst, lhs, rhs,
2187 liftoff::MinOrMax::kMax);
2188}
2189
2190void LiftoffAssembler::emit_f32_copysign(DoubleRegister dst, DoubleRegister lhs,
2191 DoubleRegister rhs) {
2192 static constexpr int kF32SignBit = 1 << 31;
2193 LiftoffRegList pinned;
2194 Register scratch = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp();
2195 Register scratch2 = GetUnusedRegister(kGpReg, pinned).gp();
2196 Movd(scratch, lhs); // move {lhs} into {scratch}.
2197 and_(scratch, Immediate(~kF32SignBit)); // clear sign bit in {scratch}.
2198 Movd(scratch2, rhs); // move {rhs} into {scratch2}.
2199 and_(scratch2, Immediate(kF32SignBit)); // isolate sign bit in {scratch2}.
2200 or_(scratch, scratch2); // combine {scratch2} into {scratch}.
2201 Movd(dst, scratch); // move result into {dst}.
2202}
2203
2204void LiftoffAssembler::emit_f32_abs(DoubleRegister dst, DoubleRegister src) {
2205 static constexpr uint32_t kSignBit = uint32_t{1} << 31;
2206 if (dst == src) {
2207 MacroAssembler::Move(liftoff::kScratchDoubleReg, kSignBit - 1);
2208 Andps(dst, liftoff::kScratchDoubleReg);
2209 } else {
2211 Andps(dst, src);
2212 }
2213}
2214
2215void LiftoffAssembler::emit_f32_neg(DoubleRegister dst, DoubleRegister src) {
2216 static constexpr uint32_t kSignBit = uint32_t{1} << 31;
2217 if (dst == src) {
2218 MacroAssembler::Move(liftoff::kScratchDoubleReg, kSignBit);
2219 Xorps(dst, liftoff::kScratchDoubleReg);
2220 } else {
2222 Xorps(dst, src);
2223 }
2224}
2225
2226bool LiftoffAssembler::emit_f32_ceil(DoubleRegister dst, DoubleRegister src) {
2228 roundss(dst, src, kRoundUp);
2229 return true;
2230}
2231
2232bool LiftoffAssembler::emit_f32_floor(DoubleRegister dst, DoubleRegister src) {
2234 roundss(dst, src, kRoundDown);
2235 return true;
2236}
2237
2238bool LiftoffAssembler::emit_f32_trunc(DoubleRegister dst, DoubleRegister src) {
2240 roundss(dst, src, kRoundToZero);
2241 return true;
2242}
2243
2244bool LiftoffAssembler::emit_f32_nearest_int(DoubleRegister dst,
2245 DoubleRegister src) {
2247 roundss(dst, src, kRoundToNearest);
2248 return true;
2249}
2250
2251void LiftoffAssembler::emit_f32_sqrt(DoubleRegister dst, DoubleRegister src) {
2252 Sqrtss(dst, src);
2253}
2254
2255void LiftoffAssembler::emit_f64_add(DoubleRegister dst, DoubleRegister lhs,
2256 DoubleRegister rhs) {
2257 if (CpuFeatures::IsSupported(AVX)) {
2258 CpuFeatureScope scope(this, AVX);
2259 vaddsd(dst, lhs, rhs);
2260 } else if (dst == rhs) {
2261 addsd(dst, lhs);
2262 } else {
2263 if (dst != lhs) movsd(dst, lhs);
2264 addsd(dst, rhs);
2265 }
2266}
2267
2268void LiftoffAssembler::emit_f64_sub(DoubleRegister dst, DoubleRegister lhs,
2269 DoubleRegister rhs) {
2270 if (CpuFeatures::IsSupported(AVX)) {
2271 CpuFeatureScope scope(this, AVX);
2272 vsubsd(dst, lhs, rhs);
2273 } else if (dst == rhs) {
2274 movsd(liftoff::kScratchDoubleReg, rhs);
2275 movsd(dst, lhs);
2276 subsd(dst, liftoff::kScratchDoubleReg);
2277 } else {
2278 if (dst != lhs) movsd(dst, lhs);
2279 subsd(dst, rhs);
2280 }
2281}
2282
2283void LiftoffAssembler::emit_f64_mul(DoubleRegister dst, DoubleRegister lhs,
2284 DoubleRegister rhs) {
2285 if (CpuFeatures::IsSupported(AVX)) {
2286 CpuFeatureScope scope(this, AVX);
2287 vmulsd(dst, lhs, rhs);
2288 } else if (dst == rhs) {
2289 mulsd(dst, lhs);
2290 } else {
2291 if (dst != lhs) movsd(dst, lhs);
2292 mulsd(dst, rhs);
2293 }
2294}
2295
2296void LiftoffAssembler::emit_f64_div(DoubleRegister dst, DoubleRegister lhs,
2297 DoubleRegister rhs) {
2298 if (CpuFeatures::IsSupported(AVX)) {
2299 CpuFeatureScope scope(this, AVX);
2300 vdivsd(dst, lhs, rhs);
2301 } else if (dst == rhs) {
2302 movsd(liftoff::kScratchDoubleReg, rhs);
2303 movsd(dst, lhs);
2304 divsd(dst, liftoff::kScratchDoubleReg);
2305 } else {
2306 if (dst != lhs) movsd(dst, lhs);
2307 divsd(dst, rhs);
2308 }
2309}
2310
2311void LiftoffAssembler::emit_f64_min(DoubleRegister dst, DoubleRegister lhs,
2312 DoubleRegister rhs) {
2313 liftoff::EmitFloatMinOrMax<double>(this, dst, lhs, rhs,
2314 liftoff::MinOrMax::kMin);
2315}
2316
2317void LiftoffAssembler::emit_f64_copysign(DoubleRegister dst, DoubleRegister lhs,
2318 DoubleRegister rhs) {
2319 static constexpr int kF32SignBit = 1 << 31;
2320 // On ia32, we cannot hold the whole f64 value in a gp register, so we just
2321 // operate on the upper half (UH).
2322 LiftoffRegList pinned;
2323 Register scratch = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp();
2324 Register scratch2 = GetUnusedRegister(kGpReg, pinned).gp();
2325
2326 Pextrd(scratch, lhs, 1); // move UH of {lhs} into {scratch}.
2327 and_(scratch, Immediate(~kF32SignBit)); // clear sign bit in {scratch}.
2328 Pextrd(scratch2, rhs, 1); // move UH of {rhs} into {scratch2}.
2329 and_(scratch2, Immediate(kF32SignBit)); // isolate sign bit in {scratch2}.
2330 or_(scratch, scratch2); // combine {scratch2} into {scratch}.
2331 movsd(dst, lhs); // move {lhs} into {dst}.
2332 Pinsrd(dst, scratch, 1); // insert {scratch} into UH of {dst}.
2333}
2334
2335void LiftoffAssembler::emit_f64_max(DoubleRegister dst, DoubleRegister lhs,
2336 DoubleRegister rhs) {
2337 liftoff::EmitFloatMinOrMax<double>(this, dst, lhs, rhs,
2338 liftoff::MinOrMax::kMax);
2339}
2340
2341void LiftoffAssembler::emit_f64_abs(DoubleRegister dst, DoubleRegister src) {
2342 static constexpr uint64_t kSignBit = uint64_t{1} << 63;
2343 if (dst == src) {
2344 MacroAssembler::Move(liftoff::kScratchDoubleReg, kSignBit - 1);
2345 Andpd(dst, liftoff::kScratchDoubleReg);
2346 } else {
2348 Andpd(dst, src);
2349 }
2350}
2351
2352void LiftoffAssembler::emit_f64_neg(DoubleRegister dst, DoubleRegister src) {
2353 static constexpr uint64_t kSignBit = uint64_t{1} << 63;
2354 if (dst == src) {
2355 MacroAssembler::Move(liftoff::kScratchDoubleReg, kSignBit);
2356 Xorpd(dst, liftoff::kScratchDoubleReg);
2357 } else {
2359 Xorpd(dst, src);
2360 }
2361}
2362
2363bool LiftoffAssembler::emit_f64_ceil(DoubleRegister dst, DoubleRegister src) {
2365 roundsd(dst, src, kRoundUp);
2366 return true;
2367}
2368
2369bool LiftoffAssembler::emit_f64_floor(DoubleRegister dst, DoubleRegister src) {
2371 roundsd(dst, src, kRoundDown);
2372 return true;
2373}
2374
2375bool LiftoffAssembler::emit_f64_trunc(DoubleRegister dst, DoubleRegister src) {
2377 roundsd(dst, src, kRoundToZero);
2378 return true;
2379}
2380
2381bool LiftoffAssembler::emit_f64_nearest_int(DoubleRegister dst,
2382 DoubleRegister src) {
2384 roundsd(dst, src, kRoundToNearest);
2385 return true;
2386}
2387
2388void LiftoffAssembler::emit_f64_sqrt(DoubleRegister dst, DoubleRegister src) {
2389 Sqrtsd(dst, src);
2390}
2391
2392namespace liftoff {
2393#define __ assm->
2394// Used for float to int conversions. If the value in {converted_back} equals
2395// {src} afterwards, the conversion succeeded.
2396template <typename dst_type, typename src_type>
2398 DoubleRegister src,
2399 DoubleRegister converted_back,
2400 LiftoffRegList pinned) {
2401 if (std::is_same<double, src_type>::value) { // f64
2402 if (std::is_signed<dst_type>::value) { // f64 -> i32
2403 __ cvttsd2si(dst, src);
2404 __ Cvtsi2sd(converted_back, dst);
2405 } else { // f64 -> u32
2406 // Use converted_back as a scratch register (we can use it as it is an
2407 // "output" register of this function.
2408 __ Cvttsd2ui(dst, src, converted_back);
2409 __ Cvtui2sd(converted_back, dst,
2410 CacheStatePreservingTempRegisters(assm, pinned).Acquire());
2411 }
2412 } else { // f32
2413 if (std::is_signed<dst_type>::value) { // f32 -> i32
2414 __ cvttss2si(dst, src);
2415 __ Cvtsi2ss(converted_back, dst);
2416 } else { // f32 -> u32
2417 // Use converted_back as a scratch register (we can use it as it is an
2418 // "output" register of this function.
2419 __ Cvttss2ui(dst, src, converted_back);
2420 __ Cvtui2ss(converted_back, dst,
2421 CacheStatePreservingTempRegisters(assm, pinned).Acquire());
2422 }
2423 }
2424}
2425
2426template <typename dst_type, typename src_type>
2428 DoubleRegister src, Label* trap) {
2429 if (!CpuFeatures::IsSupported(SSE4_1)) {
2430 __ bailout(kMissingCPUFeature, "no SSE4.1");
2431 return;
2432 }
2433 CpuFeatureScope feature(assm, SSE4_1);
2434
2435 LiftoffRegList pinned{src, dst};
2436 // Note: This relies on ConvertFloatToIntAndBack not reusing these scratch
2437 // registers!
2439 DoubleRegister converted_back = kScratchDoubleReg2;
2440
2441 if (std::is_same<double, src_type>::value) { // f64
2442 __ roundsd(rounded, src, kRoundToZero);
2443 } else { // f32
2444 __ roundss(rounded, src, kRoundToZero);
2445 }
2446 ConvertFloatToIntAndBack<dst_type, src_type>(assm, dst, rounded,
2447 converted_back, pinned);
2448 if (std::is_same<double, src_type>::value) { // f64
2449 __ ucomisd(converted_back, rounded);
2450 } else { // f32
2451 __ ucomiss(converted_back, rounded);
2452 }
2453
2454 // Jump to trap if PF is 0 (one of the operands was NaN) or they are not
2455 // equal.
2456 __ j(parity_even, trap);
2457 __ j(not_equal, trap);
2458}
2459
2460template <typename dst_type, typename src_type>
2462 DoubleRegister src) {
2463 if (!CpuFeatures::IsSupported(SSE4_1)) {
2464 __ bailout(kMissingCPUFeature, "no SSE4.1");
2465 return;
2466 }
2467 CpuFeatureScope feature(assm, SSE4_1);
2468
2469 Label done;
2470 Label not_nan;
2471 Label src_positive;
2472
2473 LiftoffRegList pinned{src, dst};
2474 DoubleRegister rounded =
2475 pinned.set(__ GetUnusedRegister(kFpReg, pinned)).fp();
2476 DoubleRegister converted_back =
2477 pinned.set(__ GetUnusedRegister(kFpReg, pinned)).fp();
2478 DoubleRegister zero_reg =
2479 pinned.set(__ GetUnusedRegister(kFpReg, pinned)).fp();
2480
2481 if (std::is_same<double, src_type>::value) { // f64
2482 __ roundsd(rounded, src, kRoundToZero);
2483 } else { // f32
2484 __ roundss(rounded, src, kRoundToZero);
2485 }
2486
2487 ConvertFloatToIntAndBack<dst_type, src_type>(assm, dst, rounded,
2488 converted_back, pinned);
2489 if (std::is_same<double, src_type>::value) { // f64
2490 __ ucomisd(converted_back, rounded);
2491 } else { // f32
2492 __ ucomiss(converted_back, rounded);
2493 }
2494
2495 // Return 0 if PF is 0 (one of the operands was NaN)
2496 __ j(parity_odd, &not_nan);
2497 __ xor_(dst, dst);
2498 __ jmp(&done);
2499
2500 __ bind(&not_nan);
2501 // If rounding is as expected, return result
2502 __ j(equal, &done);
2503
2504 __ Xorpd(zero_reg, zero_reg);
2505
2506 // if out-of-bounds, check if src is positive
2507 if (std::is_same<double, src_type>::value) { // f64
2508 __ ucomisd(src, zero_reg);
2509 } else { // f32
2510 __ ucomiss(src, zero_reg);
2511 }
2512 __ j(above, &src_positive);
2513 __ mov(dst, Immediate(std::numeric_limits<dst_type>::min()));
2514 __ jmp(&done);
2515
2516 __ bind(&src_positive);
2517
2518 __ mov(dst, Immediate(std::numeric_limits<dst_type>::max()));
2519
2520 __ bind(&done);
2521}
2522#undef __
2523} // namespace liftoff
2524
2525bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode,
2526 LiftoffRegister dst,
2527 LiftoffRegister src, Label* trap) {
2528 switch (opcode) {
2529 case kExprI32ConvertI64:
2530 if (dst.gp() != src.low_gp()) mov(dst.gp(), src.low_gp());
2531 return true;
2532 case kExprI32SConvertF32:
2533 liftoff::EmitTruncateFloatToInt<int32_t, float>(this, dst.gp(), src.fp(),
2534 trap);
2535 return true;
2536 case kExprI32UConvertF32:
2537 liftoff::EmitTruncateFloatToInt<uint32_t, float>(this, dst.gp(), src.fp(),
2538 trap);
2539 return true;
2540 case kExprI32SConvertF64:
2541 liftoff::EmitTruncateFloatToInt<int32_t, double>(this, dst.gp(), src.fp(),
2542 trap);
2543 return true;
2544 case kExprI32UConvertF64:
2545 liftoff::EmitTruncateFloatToInt<uint32_t, double>(this, dst.gp(),
2546 src.fp(), trap);
2547 return true;
2548 case kExprI32SConvertSatF32:
2549 liftoff::EmitSatTruncateFloatToInt<int32_t, float>(this, dst.gp(),
2550 src.fp());
2551 return true;
2552 case kExprI32UConvertSatF32:
2553 liftoff::EmitSatTruncateFloatToInt<uint32_t, float>(this, dst.gp(),
2554 src.fp());
2555 return true;
2556 case kExprI32SConvertSatF64:
2557 liftoff::EmitSatTruncateFloatToInt<int32_t, double>(this, dst.gp(),
2558 src.fp());
2559 return true;
2560 case kExprI32UConvertSatF64:
2561 liftoff::EmitSatTruncateFloatToInt<uint32_t, double>(this, dst.gp(),
2562 src.fp());
2563 return true;
2564 case kExprI32ReinterpretF32:
2565 Movd(dst.gp(), src.fp());
2566 return true;
2567 case kExprI64SConvertI32:
2568 if (dst.low_gp() != src.gp()) mov(dst.low_gp(), src.gp());
2569 if (dst.high_gp() != src.gp()) mov(dst.high_gp(), src.gp());
2570 sar(dst.high_gp(), 31);
2571 return true;
2572 case kExprI64UConvertI32:
2573 if (dst.low_gp() != src.gp()) mov(dst.low_gp(), src.gp());
2574 xor_(dst.high_gp(), dst.high_gp());
2575 return true;
2576 case kExprI64ReinterpretF64:
2577 // Push src to the stack.
2578 AllocateStackSpace(8);
2579 movsd(Operand(esp, 0), src.fp());
2580 // Pop to dst.
2581 pop(dst.low_gp());
2582 pop(dst.high_gp());
2583 return true;
2584 case kExprF32SConvertI32:
2585 cvtsi2ss(dst.fp(), src.gp());
2586 return true;
2587 case kExprF32UConvertI32: {
2588 LiftoffRegList pinned{dst, src};
2589 Register scratch = GetUnusedRegister(kGpReg, pinned).gp();
2590 Cvtui2ss(dst.fp(), src.gp(), scratch);
2591 return true;
2592 }
2593 case kExprF32ConvertF64:
2594 cvtsd2ss(dst.fp(), src.fp());
2595 return true;
2596 case kExprF32ReinterpretI32:
2597 Movd(dst.fp(), src.gp());
2598 return true;
2599 case kExprF64SConvertI32:
2600 Cvtsi2sd(dst.fp(), src.gp());
2601 return true;
2602 case kExprF64UConvertI32: {
2603 LiftoffRegList pinned{dst, src};
2604 Register scratch = GetUnusedRegister(kGpReg, pinned).gp();
2605 Cvtui2sd(dst.fp(), src.gp(), scratch);
2606 return true;
2607 }
2608 case kExprF64ConvertF32:
2609 cvtss2sd(dst.fp(), src.fp());
2610 return true;
2611 case kExprF64ReinterpretI64:
2612 // Push src to the stack.
2613 push(src.high_gp());
2614 push(src.low_gp());
2615 // Pop to dst.
2616 movsd(dst.fp(), Operand(esp, 0));
2617 add(esp, Immediate(8));
2618 return true;
2619 default:
2620 return false;
2621 }
2622}
2623
2624void LiftoffAssembler::emit_i32_signextend_i8(Register dst, Register src) {
2625 Register byte_reg = liftoff::GetTmpByteRegister(this, src);
2626 if (byte_reg != src) mov(byte_reg, src);
2627 movsx_b(dst, byte_reg);
2628}
2629
2630void LiftoffAssembler::emit_i32_signextend_i16(Register dst, Register src) {
2631 movsx_w(dst, src);
2632}
2633
2634void LiftoffAssembler::emit_i64_signextend_i8(LiftoffRegister dst,
2635 LiftoffRegister src) {
2636 Register byte_reg = liftoff::GetTmpByteRegister(this, src.low_gp());
2637 if (byte_reg != src.low_gp()) mov(byte_reg, src.low_gp());
2638 movsx_b(dst.low_gp(), byte_reg);
2639 liftoff::SignExtendI32ToI64(this, dst);
2640}
2641
2642void LiftoffAssembler::emit_i64_signextend_i16(LiftoffRegister dst,
2643 LiftoffRegister src) {
2644 movsx_w(dst.low_gp(), src.low_gp());
2645 liftoff::SignExtendI32ToI64(this, dst);
2646}
2647
2648void LiftoffAssembler::emit_i64_signextend_i32(LiftoffRegister dst,
2649 LiftoffRegister src) {
2650 if (dst.low_gp() != src.low_gp()) mov(dst.low_gp(), src.low_gp());
2651 liftoff::SignExtendI32ToI64(this, dst);
2652}
2653
2654void LiftoffAssembler::emit_jump(Label* label) { jmp(label); }
2655
2656void LiftoffAssembler::emit_jump(Register target) { jmp(target); }
2657
2658void LiftoffAssembler::emit_cond_jump(Condition cond, Label* label,
2659 ValueKind kind, Register lhs,
2660 Register rhs,
2661 const FreezeCacheState& frozen) {
2662 if (rhs != no_reg) {
2663 switch (kind) {
2664 case kRef:
2665 case kRefNull:
2666 DCHECK(cond == kEqual || cond == kNotEqual);
2667 [[fallthrough]];
2668 case kI32:
2669 cmp(lhs, rhs);
2670 break;
2671 default:
2672 UNREACHABLE();
2673 }
2674 } else {
2676 test(lhs, lhs);
2677 }
2678
2679 j(cond, label);
2680}
2681
2682void LiftoffAssembler::emit_i32_cond_jumpi(Condition cond, Label* label,
2683 Register lhs, int imm,
2684 const FreezeCacheState& frozen) {
2685 cmp(lhs, Immediate(imm));
2686 j(cond, label);
2687}
2688
2689namespace liftoff {
2690
2691// Setcc into dst register, given a scratch byte register (might be the same as
2692// dst). Never spills.
2694 Register dst, Register tmp_byte_reg) {
2695 assm->setcc(cond, tmp_byte_reg);
2696 assm->movzx_b(dst, tmp_byte_reg);
2697}
2698
2699// Setcc into dst register (no constraints). Might spill.
2700inline void setcc_32(LiftoffAssembler* assm, Condition cond, Register dst) {
2701 Register tmp_byte_reg = GetTmpByteRegister(assm, dst);
2702 setcc_32_no_spill(assm, cond, dst, tmp_byte_reg);
2703}
2704
2705} // namespace liftoff
2706
2707void LiftoffAssembler::emit_i32_eqz(Register dst, Register src) {
2708 test(src, src);
2709 liftoff::setcc_32(this, equal, dst);
2710}
2711
2712void LiftoffAssembler::emit_i32_set_cond(Condition cond, Register dst,
2713 Register lhs, Register rhs) {
2714 cmp(lhs, rhs);
2715 liftoff::setcc_32(this, cond, dst);
2716}
2717
2718void LiftoffAssembler::emit_i64_eqz(Register dst, LiftoffRegister src) {
2719 // Compute the OR of both registers in the src pair, using dst as scratch
2720 // register. Then check whether the result is equal to zero.
2721 if (src.low_gp() == dst) {
2722 or_(dst, src.high_gp());
2723 } else {
2724 if (src.high_gp() != dst) mov(dst, src.high_gp());
2725 or_(dst, src.low_gp());
2726 }
2727 liftoff::setcc_32(this, equal, dst);
2728}
2729
2730namespace liftoff {
2732 switch (cond) {
2733 case kLessThan:
2734 return kUnsignedLessThan;
2735 case kLessThanEqual:
2737 case kGreaterThan:
2738 return kUnsignedGreaterThan;
2739 case kGreaterThanEqual:
2741 default:
2742 return cond;
2743 }
2744}
2745} // namespace liftoff
2746
2747void LiftoffAssembler::emit_i64_set_cond(Condition cond, Register dst,
2748 LiftoffRegister lhs,
2749 LiftoffRegister rhs) {
2750 Condition unsigned_cond = liftoff::cond_make_unsigned(cond);
2751
2752 // Get the tmp byte register out here, such that we don't conditionally spill
2753 // (this cannot be reflected in the cache state).
2754 Register tmp_byte_reg = liftoff::GetTmpByteRegister(this, dst);
2755
2756 // For signed i64 comparisons, we still need to use unsigned comparison for
2757 // the low word (the only bit carrying signedness information is the MSB in
2758 // the high word).
2759 Label setcc;
2760 Label cont;
2761 // Compare high word first. If it differs, use if for the setcc. If it's
2762 // equal, compare the low word and use that for setcc.
2763 cmp(lhs.high_gp(), rhs.high_gp());
2764 j(not_equal, &setcc, Label::kNear);
2765 cmp(lhs.low_gp(), rhs.low_gp());
2766 if (unsigned_cond != cond) {
2767 // If the condition predicate for the low differs from that for the high
2768 // word, emit a separete setcc sequence for the low word.
2769 liftoff::setcc_32_no_spill(this, unsigned_cond, dst, tmp_byte_reg);
2770 jmp(&cont);
2771 }
2772 bind(&setcc);
2773 liftoff::setcc_32_no_spill(this, cond, dst, tmp_byte_reg);
2774 bind(&cont);
2775}
2776
2777namespace liftoff {
2778template <void (Assembler::*cmp_op)(DoubleRegister, DoubleRegister)>
2780 DoubleRegister lhs, DoubleRegister rhs) {
2781 Label cont;
2782 Label not_nan;
2783
2784 // Get the tmp byte register out here, such that we don't conditionally spill
2785 // (this cannot be reflected in the cache state).
2786 Register tmp_byte_reg = GetTmpByteRegister(assm, dst);
2787
2788 (assm->*cmp_op)(lhs, rhs);
2789 // If PF is one, one of the operands was Nan. This needs special handling.
2790 assm->j(parity_odd, &not_nan, Label::kNear);
2791 // Return 1 for f32.ne, 0 for all other cases.
2792 if (cond == not_equal) {
2793 assm->mov(dst, Immediate(1));
2794 } else {
2795 assm->xor_(dst, dst);
2796 }
2797 assm->jmp(&cont, Label::kNear);
2798 assm->bind(&not_nan);
2799
2800 setcc_32_no_spill(assm, cond, dst, tmp_byte_reg);
2801 assm->bind(&cont);
2802}
2803} // namespace liftoff
2804
2805void LiftoffAssembler::emit_f32_set_cond(Condition cond, Register dst,
2806 DoubleRegister lhs,
2807 DoubleRegister rhs) {
2808 liftoff::EmitFloatSetCond<&Assembler::ucomiss>(this, cond, dst, lhs, rhs);
2809}
2810
2811void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst,
2812 DoubleRegister lhs,
2813 DoubleRegister rhs) {
2814 liftoff::EmitFloatSetCond<&Assembler::ucomisd>(this, cond, dst, lhs, rhs);
2815}
2816
2817bool LiftoffAssembler::emit_select(LiftoffRegister dst, Register condition,
2818 LiftoffRegister true_value,
2819 LiftoffRegister false_value,
2820 ValueKind kind) {
2821 return false;
2822}
2823
2824void LiftoffAssembler::emit_smi_check(Register obj, Label* target,
2825 SmiCheckMode mode,
2826 const FreezeCacheState& frozen) {
2827 test_b(obj, Immediate(kSmiTagMask));
2828 Condition condition = mode == kJumpOnSmi ? zero : not_zero;
2829 j(condition, target);
2830}
2831
2832namespace liftoff {
2833template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, XMMRegister),
2834 void (Assembler::*sse_op)(XMMRegister, XMMRegister)>
2837 LiftoffRegister rhs, std::optional<CpuFeature> feature = std::nullopt) {
2838 if (CpuFeatures::IsSupported(AVX)) {
2839 CpuFeatureScope scope(assm, AVX);
2840 (assm->*avx_op)(dst.fp(), lhs.fp(), rhs.fp());
2841 return;
2842 }
2843
2844 std::optional<CpuFeatureScope> sse_scope;
2845 if (feature.has_value()) sse_scope.emplace(assm, *feature);
2846
2847 if (dst.fp() == rhs.fp()) {
2848 (assm->*sse_op)(dst.fp(), lhs.fp());
2849 } else {
2850 if (dst.fp() != lhs.fp()) (assm->movaps)(dst.fp(), lhs.fp());
2851 (assm->*sse_op)(dst.fp(), rhs.fp());
2852 }
2853}
2854
2855template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, XMMRegister),
2856 void (Assembler::*sse_op)(XMMRegister, XMMRegister)>
2859 LiftoffRegister rhs, std::optional<CpuFeature> feature = std::nullopt) {
2860 if (CpuFeatures::IsSupported(AVX)) {
2861 CpuFeatureScope scope(assm, AVX);
2862 (assm->*avx_op)(dst.fp(), lhs.fp(), rhs.fp());
2863 return;
2864 }
2865
2866 std::optional<CpuFeatureScope> sse_scope;
2867 if (feature.has_value()) sse_scope.emplace(assm, *feature);
2868
2869 if (dst.fp() == rhs.fp()) {
2870 assm->movaps(kScratchDoubleReg, rhs.fp());
2871 assm->movaps(dst.fp(), lhs.fp());
2872 (assm->*sse_op)(dst.fp(), kScratchDoubleReg);
2873 } else {
2874 if (dst.fp() != lhs.fp()) assm->movaps(dst.fp(), lhs.fp());
2875 (assm->*sse_op)(dst.fp(), rhs.fp());
2876 }
2877}
2878
2879template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, XMMRegister),
2880 void (Assembler::*sse_op)(XMMRegister, XMMRegister), uint8_t width>
2883 static constexpr RegClass tmp_rc = reg_class_for(kI32);
2885 constexpr int mask = (1 << width) - 1;
2886
2887 assm->mov(tmp.gp(), count.gp());
2888 assm->and_(tmp.gp(), Immediate(mask));
2889 assm->Movd(kScratchDoubleReg, tmp.gp());
2890 if (CpuFeatures::IsSupported(AVX)) {
2891 CpuFeatureScope scope(assm, AVX);
2892 (assm->*avx_op)(dst.fp(), operand.fp(), kScratchDoubleReg);
2893 } else {
2894 if (dst.fp() != operand.fp()) assm->movaps(dst.fp(), operand.fp());
2895 (assm->*sse_op)(dst.fp(), kScratchDoubleReg);
2896 }
2897}
2898
2899template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, uint8_t),
2900 void (Assembler::*sse_op)(XMMRegister, uint8_t), uint8_t width>
2902 LiftoffRegister operand, int32_t count) {
2903 constexpr int mask = (1 << width) - 1;
2904 uint8_t shift = static_cast<uint8_t>(count & mask);
2905 if (CpuFeatures::IsSupported(AVX)) {
2906 CpuFeatureScope scope(assm, AVX);
2907 (assm->*avx_op)(dst.fp(), operand.fp(), shift);
2908 } else {
2909 if (dst.fp() != operand.fp()) assm->movaps(dst.fp(), operand.fp());
2910 (assm->*sse_op)(dst.fp(), shift);
2911 }
2912}
2913
2914inline void EmitAnyTrue(LiftoffAssembler* assm, LiftoffRegister dst,
2915 LiftoffRegister src) {
2916 Register tmp = assm->GetUnusedRegister(kGpReg, LiftoffRegList{dst}).gp();
2917 assm->xor_(tmp, tmp);
2918 assm->mov(dst.gp(), Immediate(1));
2919 assm->Ptest(src.fp(), src.fp());
2920 assm->cmov(zero, dst.gp(), tmp);
2921}
2922
2923template <void (SharedMacroAssemblerBase::*pcmp)(XMMRegister, XMMRegister)>
2925 LiftoffRegister src,
2926 std::optional<CpuFeature> feature = std::nullopt) {
2927 std::optional<CpuFeatureScope> sse_scope;
2928 if (feature.has_value()) sse_scope.emplace(assm, *feature);
2929
2930 Register tmp = assm->GetUnusedRegister(kGpReg, LiftoffRegList{dst}).gp();
2931 XMMRegister tmp_simd = liftoff::kScratchDoubleReg;
2932 assm->mov(tmp, Immediate(1));
2933 assm->xor_(dst.gp(), dst.gp());
2934 assm->Pxor(tmp_simd, tmp_simd);
2935 (assm->*pcmp)(tmp_simd, src.fp());
2936 assm->Ptest(tmp_simd, tmp_simd);
2937 assm->cmov(zero, dst.gp(), tmp);
2938}
2939
2940} // namespace liftoff
2941
2942void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
2943 Register offset_reg, uintptr_t offset_imm,
2944 LoadType type,
2945 LoadTransformationKind transform,
2946 uint32_t* protected_load_pc,
2947 bool i64_offset) {
2948 DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
2949 Operand src_op{src_addr, offset_reg, times_1,
2950 static_cast<int32_t>(offset_imm)};
2951 *protected_load_pc = pc_offset();
2952
2953 MachineType memtype = type.mem_type();
2954 if (transform == LoadTransformationKind::kExtend) {
2955 if (memtype == MachineType::Int8()) {
2956 Pmovsxbw(dst.fp(), src_op);
2957 } else if (memtype == MachineType::Uint8()) {
2958 Pmovzxbw(dst.fp(), src_op);
2959 } else if (memtype == MachineType::Int16()) {
2960 Pmovsxwd(dst.fp(), src_op);
2961 } else if (memtype == MachineType::Uint16()) {
2962 Pmovzxwd(dst.fp(), src_op);
2963 } else if (memtype == MachineType::Int32()) {
2964 Pmovsxdq(dst.fp(), src_op);
2965 } else if (memtype == MachineType::Uint32()) {
2966 Pmovzxdq(dst.fp(), src_op);
2967 }
2968 } else if (transform == LoadTransformationKind::kZeroExtend) {
2969 if (memtype == MachineType::Int32()) {
2970 Movss(dst.fp(), src_op);
2971 } else {
2972 DCHECK_EQ(MachineType::Int64(), memtype);
2973 Movsd(dst.fp(), src_op);
2974 }
2975 } else {
2976 DCHECK_EQ(LoadTransformationKind::kSplat, transform);
2977 if (memtype == MachineType::Int8()) {
2978 S128Load8Splat(dst.fp(), src_op, liftoff::kScratchDoubleReg);
2979 } else if (memtype == MachineType::Int16()) {
2980 S128Load16Splat(dst.fp(), src_op, liftoff::kScratchDoubleReg);
2981 } else if (memtype == MachineType::Int32()) {
2982 S128Load32Splat(dst.fp(), src_op);
2983 } else if (memtype == MachineType::Int64()) {
2984 Movddup(dst.fp(), src_op);
2985 }
2986 }
2987}
2988
2989void LiftoffAssembler::LoadLane(LiftoffRegister dst, LiftoffRegister src,
2990 Register addr, Register offset_reg,
2991 uintptr_t offset_imm, LoadType type,
2992 uint8_t laneidx, uint32_t* protected_load_pc,
2993 bool /* i64_offset */) {
2994 DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
2995 Operand src_op{addr, offset_reg, times_1, static_cast<int32_t>(offset_imm)};
2996 *protected_load_pc = pc_offset();
2997
2998 MachineType mem_type = type.mem_type();
2999 if (mem_type == MachineType::Int8()) {
3000 Pinsrb(dst.fp(), src.fp(), src_op, laneidx);
3001 } else if (mem_type == MachineType::Int16()) {
3002 Pinsrw(dst.fp(), src.fp(), src_op, laneidx);
3003 } else if (mem_type == MachineType::Int32()) {
3004 Pinsrd(dst.fp(), src.fp(), src_op, laneidx);
3005 } else {
3006 DCHECK_EQ(MachineType::Int64(), mem_type);
3007 if (laneidx == 0) {
3008 Movlps(dst.fp(), src.fp(), src_op);
3009 } else {
3010 DCHECK_EQ(1, laneidx);
3011 Movhps(dst.fp(), src.fp(), src_op);
3012 }
3013 }
3014}
3015
3016void LiftoffAssembler::StoreLane(Register dst, Register offset,
3017 uintptr_t offset_imm, LiftoffRegister src,
3018 StoreType type, uint8_t lane,
3019 uint32_t* protected_store_pc,
3020 bool /* i64_offset */) {
3021 DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
3022 Operand dst_op = Operand(dst, offset, times_1, offset_imm);
3023 if (protected_store_pc) *protected_store_pc = pc_offset();
3024
3025 MachineRepresentation rep = type.mem_rep();
3026 if (rep == MachineRepresentation::kWord8) {
3027 Pextrb(dst_op, src.fp(), lane);
3028 } else if (rep == MachineRepresentation::kWord16) {
3029 Pextrw(dst_op, src.fp(), lane);
3030 } else if (rep == MachineRepresentation::kWord32) {
3031 S128Store32Lane(dst_op, src.fp(), lane);
3032 } else {
3034 S128Store64Lane(dst_op, src.fp(), lane);
3035 }
3036}
3037
3038void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst,
3039 LiftoffRegister lhs,
3040 LiftoffRegister rhs,
3041 const uint8_t shuffle[16],
3042 bool is_swizzle) {
3043 LiftoffRegister tmp = GetUnusedRegister(kGpReg, {});
3044 // Prepare 16 byte aligned buffer for shuffle control mask.
3045 mov(tmp.gp(), esp);
3046 and_(esp, -16);
3047
3048 if (is_swizzle) {
3049 uint32_t imms[4];
3050 // Shuffles that use just 1 operand are called swizzles, rhs can be ignored.
3051 wasm::SimdShuffle::Pack16Lanes(imms, shuffle);
3052 for (int i = 3; i >= 0; i--) {
3053 push_imm32(imms[i]);
3054 }
3055 Pshufb(dst.fp(), lhs.fp(), Operand(esp, 0));
3056 mov(esp, tmp.gp());
3057 return;
3058 }
3059
3060 movups(liftoff::kScratchDoubleReg, lhs.fp());
3061 for (int i = 3; i >= 0; i--) {
3062 uint32_t mask = 0;
3063 for (int j = 3; j >= 0; j--) {
3064 uint8_t lane = shuffle[i * 4 + j];
3065 mask <<= 8;
3066 mask |= lane < kSimd128Size ? lane : 0x80;
3067 }
3068 push(Immediate(mask));
3069 }
3070 Pshufb(liftoff::kScratchDoubleReg, lhs.fp(), Operand(esp, 0));
3071
3072 for (int i = 3; i >= 0; i--) {
3073 uint32_t mask = 0;
3074 for (int j = 3; j >= 0; j--) {
3075 uint8_t lane = shuffle[i * 4 + j];
3076 mask <<= 8;
3077 mask |= lane >= kSimd128Size ? (lane & 0x0F) : 0x80;
3078 }
3079 push(Immediate(mask));
3080 }
3081 Pshufb(dst.fp(), rhs.fp(), Operand(esp, 0));
3082 Por(dst.fp(), liftoff::kScratchDoubleReg);
3083 mov(esp, tmp.gp());
3084}
3085
3086void LiftoffAssembler::emit_i8x16_swizzle(LiftoffRegister dst,
3087 LiftoffRegister lhs,
3088 LiftoffRegister rhs) {
3089 Register scratch = GetUnusedRegister(RegClass::kGpReg, {}).gp();
3090 I8x16Swizzle(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg,
3091 scratch);
3092}
3093
3094void LiftoffAssembler::emit_i8x16_relaxed_swizzle(LiftoffRegister dst,
3095 LiftoffRegister lhs,
3096 LiftoffRegister rhs) {
3097 Register tmp = GetUnusedRegister(RegClass::kGpReg, {}).gp();
3098 I8x16Swizzle(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg, tmp,
3099 true);
3100}
3101
3102void LiftoffAssembler::emit_i32x4_relaxed_trunc_f32x4_s(LiftoffRegister dst,
3103 LiftoffRegister src) {
3104 Cvttps2dq(dst.fp(), src.fp());
3105}
3106
3107void LiftoffAssembler::emit_i32x4_relaxed_trunc_f32x4_u(LiftoffRegister dst,
3108 LiftoffRegister src) {
3109 emit_i32x4_uconvert_f32x4(dst, src);
3110}
3111
3112void LiftoffAssembler::emit_i32x4_relaxed_trunc_f64x2_s_zero(
3113 LiftoffRegister dst, LiftoffRegister src) {
3114 Cvttpd2dq(dst.fp(), src.fp());
3115}
3116
3117void LiftoffAssembler::emit_i32x4_relaxed_trunc_f64x2_u_zero(
3118 LiftoffRegister dst, LiftoffRegister src) {
3119 emit_i32x4_trunc_sat_f64x2_u_zero(dst, src);
3120}
3121
3122void LiftoffAssembler::emit_s128_relaxed_laneselect(LiftoffRegister dst,
3123 LiftoffRegister src1,
3124 LiftoffRegister src2,
3125 LiftoffRegister mask,
3126 int lane_width) {
3127 // Passing {src2} first is not a typo: the x86 instructions copy from the
3128 // second operand when the mask is 1, contrary to the Wasm instruction.
3129 if (lane_width == 8) {
3130 Pblendvb(dst.fp(), src2.fp(), src1.fp(), mask.fp());
3131 } else if (lane_width == 32) {
3132 Blendvps(dst.fp(), src2.fp(), src1.fp(), mask.fp());
3133 } else if (lane_width == 64) {
3134 Blendvpd(dst.fp(), src2.fp(), src1.fp(), mask.fp());
3135 } else {
3136 UNREACHABLE();
3137 }
3138}
3139
3140void LiftoffAssembler::emit_i8x16_popcnt(LiftoffRegister dst,
3141 LiftoffRegister src) {
3142 Register scratch = GetUnusedRegister(RegClass::kGpReg, {}).gp();
3143 XMMRegister tmp =
3144 GetUnusedRegister(RegClass::kFpReg, LiftoffRegList{dst, src}).fp();
3145 I8x16Popcnt(dst.fp(), src.fp(), liftoff::kScratchDoubleReg, tmp, scratch);
3146}
3147
3148void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
3149 LiftoffRegister src) {
3150 I8x16Splat(dst.fp(), src.gp(), liftoff::kScratchDoubleReg);
3151}
3152
3153void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
3154 LiftoffRegister src) {
3155 I16x8Splat(dst.fp(), src.gp());
3156}
3157
3158void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
3159 LiftoffRegister src) {
3160 Movd(dst.fp(), src.gp());
3161 Pshufd(dst.fp(), dst.fp(), uint8_t{0});
3162}
3163
3164void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
3165 LiftoffRegister src) {
3166 Pinsrd(dst.fp(), src.low_gp(), 0);
3167 Pinsrd(dst.fp(), src.high_gp(), 1);
3168 Pshufd(dst.fp(), dst.fp(), uint8_t{0x44});
3169}
3170
3171void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
3172 LiftoffRegister src) {
3173 F32x4Splat(dst.fp(), src.fp());
3174}
3175
3176void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
3177 LiftoffRegister src) {
3178 Movddup(dst.fp(), src.fp());
3179}
3180
3181void LiftoffAssembler::emit_i8x16_eq(LiftoffRegister dst, LiftoffRegister lhs,
3182 LiftoffRegister rhs) {
3183 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqb, &Assembler::pcmpeqb>(
3184 this, dst, lhs, rhs);
3185}
3186
3187void LiftoffAssembler::emit_i8x16_ne(LiftoffRegister dst, LiftoffRegister lhs,
3188 LiftoffRegister rhs) {
3189 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqb, &Assembler::pcmpeqb>(
3190 this, dst, lhs, rhs);
3191 Pcmpeqb(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3192 Pxor(dst.fp(), liftoff::kScratchDoubleReg);
3193}
3194
3195void LiftoffAssembler::emit_i8x16_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
3196 LiftoffRegister rhs) {
3197 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpcmpgtb,
3198 &Assembler::pcmpgtb>(this, dst, lhs,
3199 rhs);
3200}
3201
3202void LiftoffAssembler::emit_i8x16_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
3203 LiftoffRegister rhs) {
3204 DoubleRegister ref = rhs.fp();
3205 if (dst == rhs) {
3206 Movaps(liftoff::kScratchDoubleReg, rhs.fp());
3207 ref = liftoff::kScratchDoubleReg;
3208 }
3209 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxub, &Assembler::pmaxub>(
3210 this, dst, lhs, rhs);
3211 Pcmpeqb(dst.fp(), ref);
3212 Pcmpeqb(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3213 Pxor(dst.fp(), liftoff::kScratchDoubleReg);
3214}
3215
3216void LiftoffAssembler::emit_i8x16_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
3217 LiftoffRegister rhs) {
3218 DoubleRegister ref = rhs.fp();
3219 if (dst == rhs) {
3220 Movaps(liftoff::kScratchDoubleReg, rhs.fp());
3221 ref = liftoff::kScratchDoubleReg;
3222 }
3223 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsb, &Assembler::pminsb>(
3224 this, dst, lhs, rhs, SSE4_1);
3225 Pcmpeqb(dst.fp(), ref);
3226}
3227
3228void LiftoffAssembler::emit_i8x16_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
3229 LiftoffRegister rhs) {
3230 DoubleRegister ref = rhs.fp();
3231 if (dst == rhs) {
3232 Movaps(liftoff::kScratchDoubleReg, rhs.fp());
3233 ref = liftoff::kScratchDoubleReg;
3234 }
3235 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminub, &Assembler::pminub>(
3236 this, dst, lhs, rhs);
3237 Pcmpeqb(dst.fp(), ref);
3238}
3239
3240void LiftoffAssembler::emit_i16x8_eq(LiftoffRegister dst, LiftoffRegister lhs,
3241 LiftoffRegister rhs) {
3242 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqw, &Assembler::pcmpeqw>(
3243 this, dst, lhs, rhs);
3244}
3245
3246void LiftoffAssembler::emit_i16x8_ne(LiftoffRegister dst, LiftoffRegister lhs,
3247 LiftoffRegister rhs) {
3248 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqw, &Assembler::pcmpeqw>(
3249 this, dst, lhs, rhs);
3250 Pcmpeqw(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3251 Pxor(dst.fp(), liftoff::kScratchDoubleReg);
3252}
3253
3254void LiftoffAssembler::emit_i16x8_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
3255 LiftoffRegister rhs) {
3256 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpcmpgtw,
3257 &Assembler::pcmpgtw>(this, dst, lhs,
3258 rhs);
3259}
3260
3261void LiftoffAssembler::emit_i16x8_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
3262 LiftoffRegister rhs) {
3263 DoubleRegister ref = rhs.fp();
3264 if (dst == rhs) {
3265 Movaps(liftoff::kScratchDoubleReg, rhs.fp());
3266 ref = liftoff::kScratchDoubleReg;
3267 }
3268 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxuw, &Assembler::pmaxuw>(
3269 this, dst, lhs, rhs, SSE4_1);
3270 Pcmpeqw(dst.fp(), ref);
3271 Pcmpeqw(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3272 Pxor(dst.fp(), liftoff::kScratchDoubleReg);
3273}
3274
3275void LiftoffAssembler::emit_i16x8_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
3276 LiftoffRegister rhs) {
3277 DoubleRegister ref = rhs.fp();
3278 if (dst == rhs) {
3279 Movaps(liftoff::kScratchDoubleReg, rhs.fp());
3280 ref = liftoff::kScratchDoubleReg;
3281 }
3282 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsw, &Assembler::pminsw>(
3283 this, dst, lhs, rhs);
3284 Pcmpeqw(dst.fp(), ref);
3285}
3286
3287void LiftoffAssembler::emit_i16x8_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
3288 LiftoffRegister rhs) {
3289 DoubleRegister ref = rhs.fp();
3290 if (dst == rhs) {
3291 Movaps(liftoff::kScratchDoubleReg, rhs.fp());
3292 ref = liftoff::kScratchDoubleReg;
3293 }
3294 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminuw, &Assembler::pminuw>(
3295 this, dst, lhs, rhs, SSE4_1);
3296 Pcmpeqw(dst.fp(), ref);
3297}
3298
3299void LiftoffAssembler::emit_i32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
3300 LiftoffRegister rhs) {
3301 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqd, &Assembler::pcmpeqd>(
3302 this, dst, lhs, rhs);
3303}
3304
3305void LiftoffAssembler::emit_i32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
3306 LiftoffRegister rhs) {
3307 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqd, &Assembler::pcmpeqd>(
3308 this, dst, lhs, rhs);
3309 Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3310 Pxor(dst.fp(), liftoff::kScratchDoubleReg);
3311}
3312
3313void LiftoffAssembler::emit_i32x4_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
3314 LiftoffRegister rhs) {
3315 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpcmpgtd,
3316 &Assembler::pcmpgtd>(this, dst, lhs,
3317 rhs);
3318}
3319
3320void LiftoffAssembler::emit_i32x4_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
3321 LiftoffRegister rhs) {
3322 DoubleRegister ref = rhs.fp();
3323 if (dst == rhs) {
3324 Movaps(liftoff::kScratchDoubleReg, rhs.fp());
3325 ref = liftoff::kScratchDoubleReg;
3326 }
3327 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxud, &Assembler::pmaxud>(
3328 this, dst, lhs, rhs, SSE4_1);
3329 Pcmpeqd(dst.fp(), ref);
3330 Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3331 Pxor(dst.fp(), liftoff::kScratchDoubleReg);
3332}
3333
3334void LiftoffAssembler::emit_i32x4_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
3335 LiftoffRegister rhs) {
3336 DoubleRegister ref = rhs.fp();
3337 if (dst == rhs) {
3338 Movaps(liftoff::kScratchDoubleReg, rhs.fp());
3339 ref = liftoff::kScratchDoubleReg;
3340 }
3341 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsd, &Assembler::pminsd>(
3342 this, dst, lhs, rhs, SSE4_1);
3343 Pcmpeqd(dst.fp(), ref);
3344}
3345
3346void LiftoffAssembler::emit_i32x4_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
3347 LiftoffRegister rhs) {
3348 DoubleRegister ref = rhs.fp();
3349 if (dst == rhs) {
3350 Movaps(liftoff::kScratchDoubleReg, rhs.fp());
3351 ref = liftoff::kScratchDoubleReg;
3352 }
3353 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminud, &Assembler::pminud>(
3354 this, dst, lhs, rhs, SSE4_1);
3355 Pcmpeqd(dst.fp(), ref);
3356}
3357
3358void LiftoffAssembler::emit_i64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
3359 LiftoffRegister rhs) {
3360 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqq, &Assembler::pcmpeqq>(
3361 this, dst, lhs, rhs, SSE4_1);
3362}
3363
3364void LiftoffAssembler::emit_i64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
3365 LiftoffRegister rhs) {
3366 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqq, &Assembler::pcmpeqq>(
3367 this, dst, lhs, rhs, SSE4_1);
3368 Pcmpeqq(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3369 Pxor(dst.fp(), liftoff::kScratchDoubleReg);
3370}
3371
3372void LiftoffAssembler::emit_i64x2_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
3373 LiftoffRegister rhs) {
3374 // Different register alias requirements depending on CpuFeatures supported:
3376 // 1. AVX, or SSE4_2 no requirements (I64x2GtS takes care of aliasing).
3377 I64x2GtS(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3378 } else {
3379 // 2. Else, dst != lhs && dst != rhs (lhs == rhs is ok).
3380 if (dst == lhs || dst == rhs) {
3381 LiftoffRegister tmp =
3382 GetUnusedRegister(RegClass::kFpReg, LiftoffRegList{lhs, rhs});
3383 I64x2GtS(tmp.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3384 movaps(dst.fp(), tmp.fp());
3385 } else {
3386 I64x2GtS(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3387 }
3388 }
3389}
3390
3391void LiftoffAssembler::emit_i64x2_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
3392 LiftoffRegister rhs) {
3393 // Different register alias requirements depending on CpuFeatures supported:
3394 if (CpuFeatures::IsSupported(AVX)) {
3395 // 1. AVX, no requirements.
3396 I64x2GeS(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3397 } else if (CpuFeatures::IsSupported(SSE4_2)) {
3398 // 2. SSE4_2, dst != lhs.
3399 if (dst == lhs) {
3400 LiftoffRegister tmp =
3401 GetUnusedRegister(RegClass::kFpReg, {rhs}, LiftoffRegList{lhs});
3402 // macro-assembler uses kScratchDoubleReg, so don't use it.
3403 I64x2GeS(tmp.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3404 movaps(dst.fp(), tmp.fp());
3405 } else {
3406 I64x2GeS(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3407 }
3408 } else {
3409 // 3. Else, dst != lhs && dst != rhs (lhs == rhs is ok).
3410 if (dst == lhs || dst == rhs) {
3411 LiftoffRegister tmp =
3412 GetUnusedRegister(RegClass::kFpReg, LiftoffRegList{lhs, rhs});
3413 I64x2GeS(tmp.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3414 movaps(dst.fp(), tmp.fp());
3415 } else {
3416 I64x2GeS(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3417 }
3418 }
3419}
3420
3421void LiftoffAssembler::emit_f32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
3422 LiftoffRegister rhs) {
3423 liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpeqps, &Assembler::cmpeqps>(
3424 this, dst, lhs, rhs);
3425}
3426
3427void LiftoffAssembler::emit_f32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
3428 LiftoffRegister rhs) {
3429 liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpneqps,
3430 &Assembler::cmpneqps>(this, dst, lhs, rhs);
3431}
3432
3433void LiftoffAssembler::emit_f32x4_lt(LiftoffRegister dst, LiftoffRegister lhs,
3434 LiftoffRegister rhs) {
3435 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmpltps,
3436 &Assembler::cmpltps>(this, dst, lhs,
3437 rhs);
3438}
3439
3440void LiftoffAssembler::emit_f32x4_le(LiftoffRegister dst, LiftoffRegister lhs,
3441 LiftoffRegister rhs) {
3442 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmpleps,
3443 &Assembler::cmpleps>(this, dst, lhs,
3444 rhs);
3445}
3446
3447void LiftoffAssembler::emit_f64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
3448 LiftoffRegister rhs) {
3449 liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpeqpd, &Assembler::cmpeqpd>(
3450 this, dst, lhs, rhs);
3451}
3452
3453void LiftoffAssembler::emit_f64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
3454 LiftoffRegister rhs) {
3455 liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpneqpd,
3456 &Assembler::cmpneqpd>(this, dst, lhs, rhs);
3457}
3458
3459void LiftoffAssembler::emit_f64x2_lt(LiftoffRegister dst, LiftoffRegister lhs,
3460 LiftoffRegister rhs) {
3461 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmpltpd,
3462 &Assembler::cmpltpd>(this, dst, lhs,
3463 rhs);
3464}
3465
3466void LiftoffAssembler::emit_f64x2_le(LiftoffRegister dst, LiftoffRegister lhs,
3467 LiftoffRegister rhs) {
3468 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmplepd,
3469 &Assembler::cmplepd>(this, dst, lhs,
3470 rhs);
3471}
3472
3473void LiftoffAssembler::emit_s128_const(LiftoffRegister dst,
3474 const uint8_t imms[16]) {
3475 uint64_t vals[2];
3476 memcpy(vals, imms, sizeof(vals));
3477 MacroAssembler::Move(dst.fp(), vals[0]);
3478
3479 uint64_t high = vals[1];
3480 Register tmp = GetUnusedRegister(RegClass::kGpReg, {}).gp();
3481 MacroAssembler::Move(tmp, Immediate(high & 0xffff'ffff));
3482 Pinsrd(dst.fp(), tmp, 2);
3483
3484 MacroAssembler::Move(tmp, Immediate(high >> 32));
3485 Pinsrd(dst.fp(), tmp, 3);
3486}
3487
3488void LiftoffAssembler::emit_s128_not(LiftoffRegister dst, LiftoffRegister src) {
3489 S128Not(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
3490}
3491
3492void LiftoffAssembler::emit_s128_and(LiftoffRegister dst, LiftoffRegister lhs,
3493 LiftoffRegister rhs) {
3494 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpand, &Assembler::pand>(
3495 this, dst, lhs, rhs);
3496}
3497
3498void LiftoffAssembler::emit_s128_or(LiftoffRegister dst, LiftoffRegister lhs,
3499 LiftoffRegister rhs) {
3500 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpor, &Assembler::por>(
3501 this, dst, lhs, rhs);
3502}
3503
3504void LiftoffAssembler::emit_s128_xor(LiftoffRegister dst, LiftoffRegister lhs,
3505 LiftoffRegister rhs) {
3506 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpxor, &Assembler::pxor>(
3507 this, dst, lhs, rhs);
3508}
3509
3510void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
3511 LiftoffRegister src1,
3512 LiftoffRegister src2,
3513 LiftoffRegister mask) {
3514 // Ensure that we don't overwrite any inputs with the movaps below.
3515 DCHECK_NE(dst, src1);
3516 DCHECK_NE(dst, src2);
3517 if (!CpuFeatures::IsSupported(AVX) && dst != mask) {
3518 movaps(dst.fp(), mask.fp());
3519 S128Select(dst.fp(), dst.fp(), src1.fp(), src2.fp(),
3520 liftoff::kScratchDoubleReg);
3521 } else {
3522 S128Select(dst.fp(), mask.fp(), src1.fp(), src2.fp(),
3523 liftoff::kScratchDoubleReg);
3524 }
3525}
3526
3527void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst,
3528 LiftoffRegister src) {
3529 if (dst.fp() == src.fp()) {
3530 Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3531 Psignb(dst.fp(), liftoff::kScratchDoubleReg);
3532 } else {
3533 Pxor(dst.fp(), dst.fp());
3534 Psubb(dst.fp(), src.fp());
3535 }
3536}
3537
3538void LiftoffAssembler::emit_v128_anytrue(LiftoffRegister dst,
3539 LiftoffRegister src) {
3540 liftoff::EmitAnyTrue(this, dst, src);
3541}
3542
3543void LiftoffAssembler::emit_i8x16_alltrue(LiftoffRegister dst,
3544 LiftoffRegister src) {
3545 liftoff::EmitAllTrue<&MacroAssembler::Pcmpeqb>(this, dst, src);
3546}
3547
3548void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst,
3549 LiftoffRegister src) {
3550 Pmovmskb(dst.gp(), src.fp());
3551}
3552
3553void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs,
3554 LiftoffRegister rhs) {
3555 LiftoffRegister tmp = GetUnusedRegister(kGpReg, LiftoffRegList{rhs});
3556 LiftoffRegister tmp_simd =
3557 GetUnusedRegister(kFpReg, LiftoffRegList{dst, lhs});
3558 I8x16Shl(dst.fp(), lhs.fp(), rhs.gp(), tmp.gp(), liftoff::kScratchDoubleReg,
3559 tmp_simd.fp());
3560}
3561
3562void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs,
3563 int32_t rhs) {
3564 LiftoffRegister tmp = GetUnusedRegister(kGpReg, {});
3565 I8x16Shl(dst.fp(), lhs.fp(), rhs, tmp.gp(), liftoff::kScratchDoubleReg);
3566}
3567
3568void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst,
3569 LiftoffRegister lhs,
3570 LiftoffRegister rhs) {
3571 Register tmp = GetUnusedRegister(kGpReg, LiftoffRegList{rhs}).gp();
3572 XMMRegister tmp_simd =
3573 GetUnusedRegister(kFpReg, LiftoffRegList{dst, lhs}).fp();
3574 I8x16ShrS(dst.fp(), lhs.fp(), rhs.gp(), tmp, liftoff::kScratchDoubleReg,
3575 tmp_simd);
3576}
3577
3578void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst,
3579 LiftoffRegister lhs, int32_t rhs) {
3580 I8x16ShrS(dst.fp(), lhs.fp(), rhs, liftoff::kScratchDoubleReg);
3581}
3582
3583void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst,
3584 LiftoffRegister lhs,
3585 LiftoffRegister rhs) {
3586 Register tmp = GetUnusedRegister(kGpReg, LiftoffRegList{rhs}).gp();
3587 XMMRegister tmp_simd =
3588 GetUnusedRegister(kFpReg, LiftoffRegList{dst, lhs}).fp();
3589 I8x16ShrU(dst.fp(), lhs.fp(), rhs.gp(), tmp, liftoff::kScratchDoubleReg,
3590 tmp_simd);
3591}
3592
3593void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst,
3594 LiftoffRegister lhs, int32_t rhs) {
3595 Register tmp = GetUnusedRegister(kGpReg, {}).gp();
3596 I8x16ShrU(dst.fp(), lhs.fp(), rhs, tmp, liftoff::kScratchDoubleReg);
3597}
3598
3599void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
3600 LiftoffRegister rhs) {
3601 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddb, &Assembler::paddb>(
3602 this, dst, lhs, rhs);
3603}
3604
3605void LiftoffAssembler::emit_i8x16_add_sat_s(LiftoffRegister dst,
3606 LiftoffRegister lhs,
3607 LiftoffRegister rhs) {
3608 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddsb, &Assembler::paddsb>(
3609 this, dst, lhs, rhs);
3610}
3611
3612void LiftoffAssembler::emit_i8x16_add_sat_u(LiftoffRegister dst,
3613 LiftoffRegister lhs,
3614 LiftoffRegister rhs) {
3615 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddusb, &Assembler::paddusb>(
3616 this, dst, lhs, rhs);
3617}
3618
3619void LiftoffAssembler::emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs,
3620 LiftoffRegister rhs) {
3621 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubb, &Assembler::psubb>(
3622 this, dst, lhs, rhs);
3623}
3624
3625void LiftoffAssembler::emit_i8x16_sub_sat_s(LiftoffRegister dst,
3626 LiftoffRegister lhs,
3627 LiftoffRegister rhs) {
3628 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubsb, &Assembler::psubsb>(
3629 this, dst, lhs, rhs);
3630}
3631
3632void LiftoffAssembler::emit_i8x16_sub_sat_u(LiftoffRegister dst,
3633 LiftoffRegister lhs,
3634 LiftoffRegister rhs) {
3635 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubusb,
3636 &Assembler::psubusb>(this, dst, lhs,
3637 rhs);
3638}
3639
3640void LiftoffAssembler::emit_i8x16_min_s(LiftoffRegister dst,
3641 LiftoffRegister lhs,
3642 LiftoffRegister rhs) {
3643 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsb, &Assembler::pminsb>(
3644 this, dst, lhs, rhs, SSE4_1);
3645}
3646
3647void LiftoffAssembler::emit_i8x16_min_u(LiftoffRegister dst,
3648 LiftoffRegister lhs,
3649 LiftoffRegister rhs) {
3650 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminub, &Assembler::pminub>(
3651 this, dst, lhs, rhs);
3652}
3653
3654void LiftoffAssembler::emit_i8x16_max_s(LiftoffRegister dst,
3655 LiftoffRegister lhs,
3656 LiftoffRegister rhs) {
3657 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsb, &Assembler::pmaxsb>(
3658 this, dst, lhs, rhs, SSE4_1);
3659}
3660
3661void LiftoffAssembler::emit_i8x16_max_u(LiftoffRegister dst,
3662 LiftoffRegister lhs,
3663 LiftoffRegister rhs) {
3664 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxub, &Assembler::pmaxub>(
3665 this, dst, lhs, rhs);
3666}
3667
3668void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst,
3669 LiftoffRegister src) {
3670 if (dst.fp() == src.fp()) {
3671 Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3672 Psignw(dst.fp(), liftoff::kScratchDoubleReg);
3673 } else {
3674 Pxor(dst.fp(), dst.fp());
3675 Psubw(dst.fp(), src.fp());
3676 }
3677}
3678
3679void LiftoffAssembler::emit_i16x8_alltrue(LiftoffRegister dst,
3680 LiftoffRegister src) {
3681 liftoff::EmitAllTrue<&MacroAssembler::Pcmpeqw>(this, dst, src);
3682}
3683
3684void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst,
3685 LiftoffRegister src) {
3686 XMMRegister tmp = liftoff::kScratchDoubleReg;
3687 Packsswb(tmp, src.fp());
3688 Pmovmskb(dst.gp(), tmp);
3689 shr(dst.gp(), 8);
3690}
3691
3692void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs,
3693 LiftoffRegister rhs) {
3694 liftoff::EmitSimdShiftOp<&Assembler::vpsllw, &Assembler::psllw, 4>(this, dst,
3695 lhs, rhs);
3696}
3697
3698void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs,
3699 int32_t rhs) {
3700 liftoff::EmitSimdShiftOpImm<&Assembler::vpsllw, &Assembler::psllw, 4>(
3701 this, dst, lhs, rhs);
3702}
3703
3704void LiftoffAssembler::emit_i16x8_shr_s(LiftoffRegister dst,
3705 LiftoffRegister lhs,
3706 LiftoffRegister rhs) {
3707 liftoff::EmitSimdShiftOp<&Assembler::vpsraw, &Assembler::psraw, 4>(this, dst,
3708 lhs, rhs);
3709}
3710
3711void LiftoffAssembler::emit_i16x8_shri_s(LiftoffRegister dst,
3712 LiftoffRegister lhs, int32_t rhs) {
3713 liftoff::EmitSimdShiftOpImm<&Assembler::vpsraw, &Assembler::psraw, 4>(
3714 this, dst, lhs, rhs);
3715}
3716
3717void LiftoffAssembler::emit_i16x8_shr_u(LiftoffRegister dst,
3718 LiftoffRegister lhs,
3719 LiftoffRegister rhs) {
3720 liftoff::EmitSimdShiftOp<&Assembler::vpsrlw, &Assembler::psrlw, 4>(this, dst,
3721 lhs, rhs);
3722}
3723
3724void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst,
3725 LiftoffRegister lhs, int32_t rhs) {
3726 liftoff::EmitSimdShiftOpImm<&Assembler::vpsrlw, &Assembler::psrlw, 4>(
3727 this, dst, lhs, rhs);
3728}
3729
3730void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
3731 LiftoffRegister rhs) {
3732 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddw, &Assembler::paddw>(
3733 this, dst, lhs, rhs);
3734}
3735
3736void LiftoffAssembler::emit_i16x8_add_sat_s(LiftoffRegister dst,
3737 LiftoffRegister lhs,
3738 LiftoffRegister rhs) {
3739 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddsw, &Assembler::paddsw>(
3740 this, dst, lhs, rhs);
3741}
3742
3743void LiftoffAssembler::emit_i16x8_add_sat_u(LiftoffRegister dst,
3744 LiftoffRegister lhs,
3745 LiftoffRegister rhs) {
3746 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddusw, &Assembler::paddusw>(
3747 this, dst, lhs, rhs);
3748}
3749
3750void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
3751 LiftoffRegister rhs) {
3752 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubw, &Assembler::psubw>(
3753 this, dst, lhs, rhs);
3754}
3755
3756void LiftoffAssembler::emit_i16x8_sub_sat_s(LiftoffRegister dst,
3757 LiftoffRegister lhs,
3758 LiftoffRegister rhs) {
3759 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubsw, &Assembler::psubsw>(
3760 this, dst, lhs, rhs);
3761}
3762
3763void LiftoffAssembler::emit_i16x8_sub_sat_u(LiftoffRegister dst,
3764 LiftoffRegister lhs,
3765 LiftoffRegister rhs) {
3766 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubusw,
3767 &Assembler::psubusw>(this, dst, lhs,
3768 rhs);
3769}
3770
3771void LiftoffAssembler::emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs,
3772 LiftoffRegister rhs) {
3773 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmullw, &Assembler::pmullw>(
3774 this, dst, lhs, rhs);
3775}
3776
3777void LiftoffAssembler::emit_i16x8_min_s(LiftoffRegister dst,
3778 LiftoffRegister lhs,
3779 LiftoffRegister rhs) {
3780 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsw, &Assembler::pminsw>(
3781 this, dst, lhs, rhs);
3782}
3783
3784void LiftoffAssembler::emit_i16x8_min_u(LiftoffRegister dst,
3785 LiftoffRegister lhs,
3786 LiftoffRegister rhs) {
3787 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminuw, &Assembler::pminuw>(
3788 this, dst, lhs, rhs, SSE4_1);
3789}
3790
3791void LiftoffAssembler::emit_i16x8_max_s(LiftoffRegister dst,
3792 LiftoffRegister lhs,
3793 LiftoffRegister rhs) {
3794 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsw, &Assembler::pmaxsw>(
3795 this, dst, lhs, rhs);
3796}
3797
3798void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
3799 LiftoffRegister lhs,
3800 LiftoffRegister rhs) {
3801 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxuw, &Assembler::pmaxuw>(
3802 this, dst, lhs, rhs, SSE4_1);
3803}
3804
3805void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_s(LiftoffRegister dst,
3806 LiftoffRegister src) {
3807 I16x8ExtAddPairwiseI8x16S(dst.fp(), src.fp(), liftoff::kScratchDoubleReg,
3808 GetUnusedRegister(kGpReg, {}).gp());
3809}
3810
3811void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_u(LiftoffRegister dst,
3812 LiftoffRegister src) {
3813 I16x8ExtAddPairwiseI8x16U(dst.fp(), src.fp(),
3814 GetUnusedRegister(kGpReg, {}).gp());
3815}
3816
3817void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,
3818 LiftoffRegister src1,
3819 LiftoffRegister src2) {
3820 I16x8ExtMulLow(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg,
3821 /*is_signed=*/true);
3822}
3823
3824void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,
3825 LiftoffRegister src1,
3826 LiftoffRegister src2) {
3827 I16x8ExtMulLow(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg,
3828 /*is_signed=*/false);
3829}
3830
3831void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,
3832 LiftoffRegister src1,
3833 LiftoffRegister src2) {
3834 I16x8ExtMulHighS(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg);
3835}
3836
3837void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,
3838 LiftoffRegister src1,
3839 LiftoffRegister src2) {
3840 I16x8ExtMulHighU(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg);
3841}
3842
3843void LiftoffAssembler::emit_i16x8_q15mulr_sat_s(LiftoffRegister dst,
3844 LiftoffRegister src1,
3845 LiftoffRegister src2) {
3846 I16x8Q15MulRSatS(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg);
3847}
3848
3849void LiftoffAssembler::emit_i16x8_relaxed_q15mulr_s(LiftoffRegister dst,
3850 LiftoffRegister src1,
3851 LiftoffRegister src2) {
3852 if (CpuFeatures::IsSupported(AVX) || dst == src1) {
3853 Pmulhrsw(dst.fp(), src1.fp(), src2.fp());
3854 } else {
3855 movdqa(dst.fp(), src1.fp());
3856 pmulhrsw(dst.fp(), src2.fp());
3857 }
3858}
3859
3860void LiftoffAssembler::emit_i16x8_dot_i8x16_i7x16_s(LiftoffRegister dst,
3861 LiftoffRegister lhs,
3862 LiftoffRegister rhs) {
3863 I16x8DotI8x16I7x16S(dst.fp(), lhs.fp(), rhs.fp());
3864}
3865
3866void LiftoffAssembler::emit_i32x4_dot_i8x16_i7x16_add_s(LiftoffRegister dst,
3867 LiftoffRegister lhs,
3868 LiftoffRegister rhs,
3869 LiftoffRegister acc) {
3870 static constexpr RegClass tmp_rc = reg_class_for(kS128);
3871 LiftoffRegister tmp1 =
3872 GetUnusedRegister(tmp_rc, LiftoffRegList{dst, lhs, rhs, acc});
3873 LiftoffRegister tmp2 =
3874 GetUnusedRegister(tmp_rc, LiftoffRegList{dst, lhs, rhs, acc, tmp1});
3875 I32x4DotI8x16I7x16AddS(dst.fp(), lhs.fp(), rhs.fp(), acc.fp(), tmp1.fp(),
3876 tmp2.fp());
3877}
3878
3879void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
3880 LiftoffRegister src) {
3881 if (dst.fp() == src.fp()) {
3882 Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3883 Psignd(dst.fp(), liftoff::kScratchDoubleReg);
3884 } else {
3885 Pxor(dst.fp(), dst.fp());
3886 Psubd(dst.fp(), src.fp());
3887 }
3888}
3889
3890void LiftoffAssembler::emit_i32x4_alltrue(LiftoffRegister dst,
3891 LiftoffRegister src) {
3892 liftoff::EmitAllTrue<&MacroAssembler::Pcmpeqd>(this, dst, src);
3893}
3894
3895void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst,
3896 LiftoffRegister src) {
3897 Movmskps(dst.gp(), src.fp());
3898}
3899
3900void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs,
3901 LiftoffRegister rhs) {
3902 liftoff::EmitSimdShiftOp<&Assembler::vpslld, &Assembler::pslld, 5>(this, dst,
3903 lhs, rhs);
3904}
3905
3906void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs,
3907 int32_t rhs) {
3908 liftoff::EmitSimdShiftOpImm<&Assembler::vpslld, &Assembler::pslld, 5>(
3909 this, dst, lhs, rhs);
3910}
3911
3912void LiftoffAssembler::emit_i32x4_shr_s(LiftoffRegister dst,
3913 LiftoffRegister lhs,
3914 LiftoffRegister rhs) {
3915 liftoff::EmitSimdShiftOp<&Assembler::vpsrad, &Assembler::psrad, 5>(this, dst,
3916 lhs, rhs);
3917}
3918
3919void LiftoffAssembler::emit_i32x4_shri_s(LiftoffRegister dst,
3920 LiftoffRegister lhs, int32_t rhs) {
3921 liftoff::EmitSimdShiftOpImm<&Assembler::vpsrad, &Assembler::psrad, 5>(
3922 this, dst, lhs, rhs);
3923}
3924
3925void LiftoffAssembler::emit_i32x4_shr_u(LiftoffRegister dst,
3926 LiftoffRegister lhs,
3927 LiftoffRegister rhs) {
3928 liftoff::EmitSimdShiftOp<&Assembler::vpsrld, &Assembler::psrld, 5>(this, dst,
3929 lhs, rhs);
3930}
3931
3932void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst,
3933 LiftoffRegister lhs, int32_t rhs) {
3934 liftoff::EmitSimdShiftOpImm<&Assembler::vpsrld, &Assembler::psrld, 5>(
3935 this, dst, lhs, rhs);
3936}
3937
3938void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
3939 LiftoffRegister rhs) {
3940 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddd, &Assembler::paddd>(
3941 this, dst, lhs, rhs);
3942}
3943
3944void LiftoffAssembler::emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
3945 LiftoffRegister rhs) {
3946 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubd, &Assembler::psubd>(
3947 this, dst, lhs, rhs);
3948}
3949
3950void LiftoffAssembler::emit_i32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
3951 LiftoffRegister rhs) {
3952 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmulld, &Assembler::pmulld>(
3953 this, dst, lhs, rhs, SSE4_1);
3954}
3955
3956void LiftoffAssembler::emit_i32x4_min_s(LiftoffRegister dst,
3957 LiftoffRegister lhs,
3958 LiftoffRegister rhs) {
3959 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsd, &Assembler::pminsd>(
3960 this, dst, lhs, rhs, SSE4_1);
3961}
3962
3963void LiftoffAssembler::emit_i32x4_min_u(LiftoffRegister dst,
3964 LiftoffRegister lhs,
3965 LiftoffRegister rhs) {
3966 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminud, &Assembler::pminud>(
3967 this, dst, lhs, rhs, SSE4_1);
3968}
3969
3970void LiftoffAssembler::emit_i32x4_max_s(LiftoffRegister dst,
3971 LiftoffRegister lhs,
3972 LiftoffRegister rhs) {
3973 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsd, &Assembler::pmaxsd>(
3974 this, dst, lhs, rhs, SSE4_1);
3975}
3976
3977void LiftoffAssembler::emit_i32x4_max_u(LiftoffRegister dst,
3978 LiftoffRegister lhs,
3979 LiftoffRegister rhs) {
3980 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxud, &Assembler::pmaxud>(
3981 this, dst, lhs, rhs, SSE4_1);
3982}
3983
3984void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst,
3985 LiftoffRegister lhs,
3986 LiftoffRegister rhs) {
3987 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaddwd, &Assembler::pmaddwd>(
3988 this, dst, lhs, rhs);
3989}
3990
3991void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_s(LiftoffRegister dst,
3992 LiftoffRegister src) {
3993 I32x4ExtAddPairwiseI16x8S(dst.fp(), src.fp(),
3994 GetUnusedRegister(kGpReg, {}).gp());
3995}
3996
3997void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_u(LiftoffRegister dst,
3998 LiftoffRegister src) {
3999 I32x4ExtAddPairwiseI16x8U(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
4000}
4001
4002namespace liftoff {
4003// Helper function to check for register aliasing, AVX support, and moves
4004// registers around before calling the actual macro-assembler function.
4006 XMMRegister src1, XMMRegister src2, bool low,
4007 bool is_signed) {
4008 // I32x4ExtMul requires dst == src1 if AVX is not supported.
4009 if (CpuFeatures::IsSupported(AVX) || dst == src1) {
4010 assm->I32x4ExtMul(dst, src1, src2, liftoff::kScratchDoubleReg, low,
4011 is_signed);
4012 } else if (dst != src2) {
4013 // dst != src1 && dst != src2
4014 assm->movaps(dst, src1);
4015 assm->I32x4ExtMul(dst, dst, src2, liftoff::kScratchDoubleReg, low,
4016 is_signed);
4017 } else {
4018 // dst == src2
4019 // Extended multiplication is commutative,
4020 assm->movaps(dst, src2);
4021 assm->I32x4ExtMul(dst, dst, src1, liftoff::kScratchDoubleReg, low,
4022 is_signed);
4023 }
4024}
4025} // namespace liftoff
4026
4027void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,
4028 LiftoffRegister src1,
4029 LiftoffRegister src2) {
4030 liftoff::I32x4ExtMulHelper(this, dst.fp(), src1.fp(), src2.fp(), /*low=*/true,
4031 /*is_signed=*/true);
4032}
4033
4034void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,
4035 LiftoffRegister src1,
4036 LiftoffRegister src2) {
4037 liftoff::I32x4ExtMulHelper(this, dst.fp(), src1.fp(), src2.fp(), /*low=*/true,
4038 /*is_signed=*/false);
4039}
4040
4041void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,
4042 LiftoffRegister src1,
4043 LiftoffRegister src2) {
4044 liftoff::I32x4ExtMulHelper(this, dst.fp(), src1.fp(), src2.fp(),
4045 /*low=*/false,
4046 /*is_signed=*/true);
4047}
4048
4049void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,
4050 LiftoffRegister src1,
4051 LiftoffRegister src2) {
4052 liftoff::I32x4ExtMulHelper(this, dst.fp(), src1.fp(), src2.fp(),
4053 /*low=*/false,
4054 /*is_signed=*/false);
4055}
4056
4057void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst,
4058 LiftoffRegister src) {
4059 I64x2Neg(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
4060}
4061
4062void LiftoffAssembler::emit_i64x2_alltrue(LiftoffRegister dst,
4063 LiftoffRegister src) {
4064 liftoff::EmitAllTrue<&MacroAssembler::Pcmpeqq>(this, dst, src, SSE4_1);
4065}
4066
4067void LiftoffAssembler::emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs,
4068 LiftoffRegister rhs) {
4069 liftoff::EmitSimdShiftOp<&Assembler::vpsllq, &Assembler::psllq, 6>(this, dst,
4070 lhs, rhs);
4071}
4072
4073void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs,
4074 int32_t rhs) {
4075 liftoff::EmitSimdShiftOpImm<&Assembler::vpsllq, &Assembler::psllq, 6>(
4076 this, dst, lhs, rhs);
4077}
4078
4079void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst,
4080 LiftoffRegister lhs,
4081 LiftoffRegister rhs) {
4082 XMMRegister tmp =
4083 GetUnusedRegister(RegClass::kFpReg, LiftoffRegList{dst, lhs}).fp();
4084 Register scratch =
4085 GetUnusedRegister(RegClass::kGpReg, LiftoffRegList{rhs}).gp();
4086
4087 I64x2ShrS(dst.fp(), lhs.fp(), rhs.gp(), liftoff::kScratchDoubleReg, tmp,
4088 scratch);
4089}
4090
4091void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst,
4092 LiftoffRegister lhs, int32_t rhs) {
4093 I64x2ShrS(dst.fp(), lhs.fp(), rhs & 0x3F, liftoff::kScratchDoubleReg);
4094}
4095
4096void LiftoffAssembler::emit_i64x2_shr_u(LiftoffRegister dst,
4097 LiftoffRegister lhs,
4098 LiftoffRegister rhs) {
4099 liftoff::EmitSimdShiftOp<&Assembler::vpsrlq, &Assembler::psrlq, 6>(this, dst,
4100 lhs, rhs);
4101}
4102
4103void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst,
4104 LiftoffRegister lhs, int32_t rhs) {
4105 liftoff::EmitSimdShiftOpImm<&Assembler::vpsrlq, &Assembler::psrlq, 6>(
4106 this, dst, lhs, rhs);
4107}
4108
4109void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
4110 LiftoffRegister rhs) {
4111 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddq, &Assembler::paddq>(
4112 this, dst, lhs, rhs);
4113}
4114
4115void LiftoffAssembler::emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
4116 LiftoffRegister rhs) {
4117 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubq, &Assembler::psubq>(
4118 this, dst, lhs, rhs);
4119}
4120
4121void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
4122 LiftoffRegister rhs) {
4123 static constexpr RegClass tmp_rc = reg_class_for(kS128);
4124 LiftoffRegister tmp1 =
4125 GetUnusedRegister(tmp_rc, LiftoffRegList{dst, lhs, rhs});
4126 LiftoffRegister tmp2 =
4127 GetUnusedRegister(tmp_rc, LiftoffRegList{dst, lhs, rhs, tmp1});
4128 I64x2Mul(dst.fp(), lhs.fp(), rhs.fp(), tmp1.fp(), tmp2.fp());
4129}
4130
4131void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,
4132 LiftoffRegister src1,
4133 LiftoffRegister src2) {
4134 I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg,
4135 /*low=*/true, /*is_signed=*/true);
4136}
4137
4138void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,
4139 LiftoffRegister src1,
4140 LiftoffRegister src2) {
4141 I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg,
4142 /*low=*/true, /*is_signed=*/false);
4143}
4144
4145void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,
4146 LiftoffRegister src1,
4147 LiftoffRegister src2) {
4148 I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg,
4149 /*low=*/false, /*is_signed=*/true);
4150}
4151
4152void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,
4153 LiftoffRegister src1,
4154 LiftoffRegister src2) {
4155 I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg,
4156 /*low=*/false, /*is_signed=*/false);
4157}
4158
4159void LiftoffAssembler::emit_i64x2_bitmask(LiftoffRegister dst,
4160 LiftoffRegister src) {
4161 Movmskpd(dst.gp(), src.fp());
4162}
4163
4164void LiftoffAssembler::emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst,
4165 LiftoffRegister src) {
4166 Pmovsxdq(dst.fp(), src.fp());
4167}
4168
4169void LiftoffAssembler::emit_i64x2_sconvert_i32x4_high(LiftoffRegister dst,
4170 LiftoffRegister src) {
4171 I64x2SConvertI32x4High(dst.fp(), src.fp());
4172}
4173
4174void LiftoffAssembler::emit_i64x2_uconvert_i32x4_low(LiftoffRegister dst,
4175 LiftoffRegister src) {
4176 Pmovzxdq(dst.fp(), src.fp());
4177}
4178
4179void LiftoffAssembler::emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,
4180 LiftoffRegister src) {
4181 I64x2UConvertI32x4High(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
4182}
4183
4184void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
4185 LiftoffRegister src) {
4186 Register tmp = GetUnusedRegister(kGpReg, {}).gp();
4187 Absps(dst.fp(), src.fp(), tmp);
4188}
4189
4190void LiftoffAssembler::emit_f32x4_neg(LiftoffRegister dst,
4191 LiftoffRegister src) {
4192 Register tmp = GetUnusedRegister(kGpReg, {}).gp();
4193 Negps(dst.fp(), src.fp(), tmp);
4194}
4195
4196void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst,
4197 LiftoffRegister src) {
4198 Sqrtps(dst.fp(), src.fp());
4199}
4200
4201bool LiftoffAssembler::emit_f32x4_ceil(LiftoffRegister dst,
4202 LiftoffRegister src) {
4204 Roundps(dst.fp(), src.fp(), kRoundUp);
4205 return true;
4206}
4207
4208bool LiftoffAssembler::emit_f32x4_floor(LiftoffRegister dst,
4209 LiftoffRegister src) {
4211 Roundps(dst.fp(), src.fp(), kRoundDown);
4212 return true;
4213}
4214
4215bool LiftoffAssembler::emit_f32x4_trunc(LiftoffRegister dst,
4216 LiftoffRegister src) {
4218 Roundps(dst.fp(), src.fp(), kRoundToZero);
4219 return true;
4220}
4221
4222bool LiftoffAssembler::emit_f32x4_nearest_int(LiftoffRegister dst,
4223 LiftoffRegister src) {
4225 Roundps(dst.fp(), src.fp(), kRoundToNearest);
4226 return true;
4227}
4228
4229void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
4230 LiftoffRegister rhs) {
4231 // Addition is not commutative in the presence of NaNs.
4232 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vaddps, &Assembler::addps>(
4233 this, dst, lhs, rhs);
4234}
4235
4236void LiftoffAssembler::emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
4237 LiftoffRegister rhs) {
4238 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vsubps, &Assembler::subps>(
4239 this, dst, lhs, rhs);
4240}
4241
4242void LiftoffAssembler::emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
4243 LiftoffRegister rhs) {
4244 liftoff::EmitSimdCommutativeBinOp<&Assembler::vmulps, &Assembler::mulps>(
4245 this, dst, lhs, rhs);
4246}
4247
4248void LiftoffAssembler::emit_f32x4_div(LiftoffRegister dst, LiftoffRegister lhs,
4249 LiftoffRegister rhs) {
4250 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vdivps, &Assembler::divps>(
4251 this, dst, lhs, rhs);
4252}
4253
4254void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs,
4255 LiftoffRegister rhs) {
4256 F32x4Min(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
4257}
4258
4259void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
4260 LiftoffRegister rhs) {
4261 F32x4Max(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
4262}
4263
4264void LiftoffAssembler::emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs,
4265 LiftoffRegister rhs) {
4266 // Due to the way minps works, pmin(a, b) = minps(b, a).
4267 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vminps, &Assembler::minps>(
4268 this, dst, rhs, lhs);
4269}
4270
4271void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,
4272 LiftoffRegister rhs) {
4273 // Due to the way maxps works, pmax(a, b) = maxps(b, a).
4274 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vmaxps, &Assembler::maxps>(
4275 this, dst, rhs, lhs);
4276}
4277
4278void LiftoffAssembler::emit_f32x4_relaxed_min(LiftoffRegister dst,
4279 LiftoffRegister lhs,
4280 LiftoffRegister rhs) {
4281 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vminps, &Assembler::minps>(
4282 this, dst, lhs, rhs);
4283}
4284
4285void LiftoffAssembler::emit_f32x4_relaxed_max(LiftoffRegister dst,
4286 LiftoffRegister lhs,
4287 LiftoffRegister rhs) {
4288 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vmaxps, &Assembler::maxps>(
4289 this, dst, lhs, rhs);
4290}
4291
4292void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
4293 LiftoffRegister src) {
4294 Register tmp = GetUnusedRegister(kGpReg, {}).gp();
4295 Abspd(dst.fp(), src.fp(), tmp);
4296}
4297
4298void LiftoffAssembler::emit_f64x2_neg(LiftoffRegister dst,
4299 LiftoffRegister src) {
4300 Register tmp = GetUnusedRegister(kGpReg, {}).gp();
4301 Negpd(dst.fp(), src.fp(), tmp);
4302}
4303
4304void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst,
4305 LiftoffRegister src) {
4306 Sqrtpd(dst.fp(), src.fp());
4307}
4308
4309bool LiftoffAssembler::emit_f64x2_ceil(LiftoffRegister dst,
4310 LiftoffRegister src) {
4312 Roundpd(dst.fp(), src.fp(), kRoundUp);
4313 return true;
4314}
4315
4316bool LiftoffAssembler::emit_f64x2_floor(LiftoffRegister dst,
4317 LiftoffRegister src) {
4319 Roundpd(dst.fp(), src.fp(), kRoundDown);
4320 return true;
4321}
4322
4323bool LiftoffAssembler::emit_f64x2_trunc(LiftoffRegister dst,
4324 LiftoffRegister src) {
4326 Roundpd(dst.fp(), src.fp(), kRoundToZero);
4327 return true;
4328}
4329
4330bool LiftoffAssembler::emit_f64x2_nearest_int(LiftoffRegister dst,
4331 LiftoffRegister src) {
4333 Roundpd(dst.fp(), src.fp(), kRoundToNearest);
4334 return true;
4335}
4336
4337void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
4338 LiftoffRegister rhs) {
4339 liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddpd, &Assembler::addpd>(
4340 this, dst, lhs, rhs);
4341}
4342
4343void LiftoffAssembler::emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
4344 LiftoffRegister rhs) {
4345 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vsubpd, &Assembler::subpd>(
4346 this, dst, lhs, rhs);
4347}
4348
4349void LiftoffAssembler::emit_f64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
4350 LiftoffRegister rhs) {
4351 liftoff::EmitSimdCommutativeBinOp<&Assembler::vmulpd, &Assembler::mulpd>(
4352 this, dst, lhs, rhs);
4353}
4354
4355void LiftoffAssembler::emit_f64x2_div(LiftoffRegister dst, LiftoffRegister lhs,
4356 LiftoffRegister rhs) {
4357 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vdivpd, &Assembler::divpd>(
4358 this, dst, lhs, rhs);
4359}
4360
4361void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs,
4362 LiftoffRegister rhs) {
4363 F64x2Min(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
4364}
4365
4366void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
4367 LiftoffRegister rhs) {
4368 F64x2Max(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
4369}
4370
4371void LiftoffAssembler::emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs,
4372 LiftoffRegister rhs) {
4373 // Due to the way minpd works, pmin(a, b) = minpd(b, a).
4374 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vminpd, &Assembler::minpd>(
4375 this, dst, rhs, lhs);
4376}
4377
4378void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
4379 LiftoffRegister rhs) {
4380 // Due to the way maxpd works, pmax(a, b) = maxpd(b, a).
4381 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vmaxpd, &Assembler::maxpd>(
4382 this, dst, rhs, lhs);
4383}
4384
4385void LiftoffAssembler::emit_f64x2_relaxed_min(LiftoffRegister dst,
4386 LiftoffRegister lhs,
4387 LiftoffRegister rhs) {
4388 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vminpd, &Assembler::minpd>(
4389 this, dst, lhs, rhs);
4390}
4391
4392void LiftoffAssembler::emit_f64x2_relaxed_max(LiftoffRegister dst,
4393 LiftoffRegister lhs,
4394 LiftoffRegister rhs) {
4395 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vmaxpd, &Assembler::maxpd>(
4396 this, dst, lhs, rhs);
4397}
4398
4399void LiftoffAssembler::emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,
4400 LiftoffRegister src) {
4401 Cvtdq2pd(dst.fp(), src.fp());
4402}
4403
4404void LiftoffAssembler::emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst,
4405 LiftoffRegister src) {
4406 Register tmp = GetUnusedRegister(kGpReg, {}).gp();
4407 F64x2ConvertLowI32x4U(dst.fp(), src.fp(), tmp);
4408}
4409
4410void LiftoffAssembler::emit_f64x2_promote_low_f32x4(LiftoffRegister dst,
4411 LiftoffRegister src) {
4412 Cvtps2pd(dst.fp(), src.fp());
4413}
4414
4415void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
4416 LiftoffRegister src) {
4417 Register tmp = GetUnusedRegister(kGpReg, {}).gp();
4418 I32x4SConvertF32x4(dst.fp(), src.fp(), liftoff::kScratchDoubleReg, tmp);
4419}
4420
4421void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst,
4422 LiftoffRegister src) {
4423 static constexpr RegClass tmp_rc = reg_class_for(kS128);
4424 DoubleRegister scratch2 =
4425 GetUnusedRegister(tmp_rc, LiftoffRegList{dst, src}).fp();
4426 I32x4TruncF32x4U(dst.fp(), src.fp(), liftoff::kScratchDoubleReg, scratch2);
4427}
4428
4429void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst,
4430 LiftoffRegister src) {
4431 Cvtdq2ps(dst.fp(), src.fp());
4432}
4433
4434void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst,
4435 LiftoffRegister src) {
4436 Pxor(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg); // Zeros.
4437 Pblendw(liftoff::kScratchDoubleReg, src.fp(),
4438 uint8_t{0x55}); // Get lo 16 bits.
4439 if (CpuFeatures::IsSupported(AVX)) {
4440 CpuFeatureScope scope(this, AVX);
4441 vpsubd(dst.fp(), src.fp(), liftoff::kScratchDoubleReg); // Get hi 16 bits.
4442 } else {
4443 if (dst.fp() != src.fp()) movaps(dst.fp(), src.fp());
4444 psubd(dst.fp(), liftoff::kScratchDoubleReg);
4445 }
4446 Cvtdq2ps(liftoff::kScratchDoubleReg,
4447 liftoff::kScratchDoubleReg); // Convert lo exactly.
4448 Psrld(dst.fp(), dst.fp(), uint8_t{1}); // Div by 2 to get in unsigned range.
4449 Cvtdq2ps(dst.fp(), dst.fp()); // Convert hi, exactly.
4450 Addps(dst.fp(), dst.fp(), dst.fp()); // Double hi, exactly.
4451 Addps(dst.fp(), dst.fp(),
4452 liftoff::kScratchDoubleReg); // Add hi and lo, may round.
4453}
4454
4455void LiftoffAssembler::emit_f32x4_demote_f64x2_zero(LiftoffRegister dst,
4456 LiftoffRegister src) {
4457 Cvtpd2ps(dst.fp(), src.fp());
4458}
4459
4460void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst,
4461 LiftoffRegister lhs,
4462 LiftoffRegister rhs) {
4463 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpacksswb,
4464 &Assembler::packsswb>(this, dst, lhs,
4465 rhs);
4466}
4467
4468void LiftoffAssembler::emit_i8x16_uconvert_i16x8(LiftoffRegister dst,
4469 LiftoffRegister lhs,
4470 LiftoffRegister rhs) {
4471 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpackuswb,
4472 &Assembler::packuswb>(this, dst, lhs,
4473 rhs);
4474}
4475
4476void LiftoffAssembler::emit_i16x8_sconvert_i32x4(LiftoffRegister dst,
4477 LiftoffRegister lhs,
4478 LiftoffRegister rhs) {
4479 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpackssdw,
4480 &Assembler::packssdw>(this, dst, lhs,
4481 rhs);
4482}
4483
4484void LiftoffAssembler::emit_i16x8_uconvert_i32x4(LiftoffRegister dst,
4485 LiftoffRegister lhs,
4486 LiftoffRegister rhs) {
4487 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpackusdw,
4488 &Assembler::packusdw>(this, dst, lhs,
4489 rhs, SSE4_1);
4490}
4491
4492void LiftoffAssembler::emit_i16x8_sconvert_i8x16_low(LiftoffRegister dst,
4493 LiftoffRegister src) {
4494 Pmovsxbw(dst.fp(), src.fp());
4495}
4496
4497void LiftoffAssembler::emit_i16x8_sconvert_i8x16_high(LiftoffRegister dst,
4498 LiftoffRegister src) {
4499 I16x8SConvertI8x16High(dst.fp(), src.fp());
4500}
4501
4502void LiftoffAssembler::emit_i16x8_uconvert_i8x16_low(LiftoffRegister dst,
4503 LiftoffRegister src) {
4504 Pmovzxbw(dst.fp(), src.fp());
4505}
4506
4507void LiftoffAssembler::emit_i16x8_uconvert_i8x16_high(LiftoffRegister dst,
4508 LiftoffRegister src) {
4509 I16x8UConvertI8x16High(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
4510}
4511
4512void LiftoffAssembler::emit_i32x4_sconvert_i16x8_low(LiftoffRegister dst,
4513 LiftoffRegister src) {
4514 Pmovsxwd(dst.fp(), src.fp());
4515}
4516
4517void LiftoffAssembler::emit_i32x4_sconvert_i16x8_high(LiftoffRegister dst,
4518 LiftoffRegister src) {
4519 I32x4SConvertI16x8High(dst.fp(), src.fp());
4520}
4521
4522void LiftoffAssembler::emit_i32x4_uconvert_i16x8_low(LiftoffRegister dst,
4523 LiftoffRegister src) {
4524 Pmovzxwd(dst.fp(), src.fp());
4525}
4526
4527void LiftoffAssembler::emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst,
4528 LiftoffRegister src) {
4529 I32x4UConvertI16x8High(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
4530}
4531
4532void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_s_zero(LiftoffRegister dst,
4533 LiftoffRegister src) {
4534 Register tmp = GetUnusedRegister(kGpReg, {}).gp();
4535 I32x4TruncSatF64x2SZero(dst.fp(), src.fp(), liftoff::kScratchDoubleReg, tmp);
4536}
4537
4538void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_u_zero(LiftoffRegister dst,
4539 LiftoffRegister src) {
4540 Register tmp = GetUnusedRegister(kGpReg, {}).gp();
4541 I32x4TruncSatF64x2UZero(dst.fp(), src.fp(), liftoff::kScratchDoubleReg, tmp);
4542}
4543
4544void LiftoffAssembler::emit_s128_and_not(LiftoffRegister dst,
4545 LiftoffRegister lhs,
4546 LiftoffRegister rhs) {
4547 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vandnps, &Assembler::andnps>(
4548 this, dst, rhs, lhs);
4549}
4550
4551void LiftoffAssembler::emit_i8x16_rounding_average_u(LiftoffRegister dst,
4552 LiftoffRegister lhs,
4553 LiftoffRegister rhs) {
4554 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpavgb, &Assembler::pavgb>(
4555 this, dst, lhs, rhs);
4556}
4557
4558void LiftoffAssembler::emit_i16x8_rounding_average_u(LiftoffRegister dst,
4559 LiftoffRegister lhs,
4560 LiftoffRegister rhs) {
4561 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpavgw, &Assembler::pavgw>(
4562 this, dst, lhs, rhs);
4563}
4564
4565void LiftoffAssembler::emit_i8x16_abs(LiftoffRegister dst,
4566 LiftoffRegister src) {
4567 Pabsb(dst.fp(), src.fp());
4568}
4569
4570void LiftoffAssembler::emit_i16x8_abs(LiftoffRegister dst,
4571 LiftoffRegister src) {
4572 Pabsw(dst.fp(), src.fp());
4573}
4574
4575void LiftoffAssembler::emit_i32x4_abs(LiftoffRegister dst,
4576 LiftoffRegister src) {
4577 Pabsd(dst.fp(), src.fp());
4578}
4579
4580void LiftoffAssembler::emit_i64x2_abs(LiftoffRegister dst,
4581 LiftoffRegister src) {
4582 I64x2Abs(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
4583}
4584
4585void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst,
4586 LiftoffRegister lhs,
4587 uint8_t imm_lane_idx) {
4588 Register byte_reg = liftoff::GetTmpByteRegister(this, dst.gp());
4589 Pextrb(byte_reg, lhs.fp(), imm_lane_idx);
4590 movsx_b(dst.gp(), byte_reg);
4591}
4592
4593void LiftoffAssembler::emit_i8x16_extract_lane_u(LiftoffRegister dst,
4594 LiftoffRegister lhs,
4595 uint8_t imm_lane_idx) {
4596 Pextrb(dst.gp(), lhs.fp(), imm_lane_idx);
4597}
4598
4599void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst,
4600 LiftoffRegister lhs,
4601 uint8_t imm_lane_idx) {
4602 Pextrw(dst.gp(), lhs.fp(), imm_lane_idx);
4603 movsx_w(dst.gp(), dst.gp());
4604}
4605
4606void LiftoffAssembler::emit_i16x8_extract_lane_u(LiftoffRegister dst,
4607 LiftoffRegister lhs,
4608 uint8_t imm_lane_idx) {
4609 Pextrw(dst.gp(), lhs.fp(), imm_lane_idx);
4610}
4611
4612void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst,
4613 LiftoffRegister lhs,
4614 uint8_t imm_lane_idx) {
4615 Pextrd(dst.gp(), lhs.fp(), imm_lane_idx);
4616}
4617
4618void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst,
4619 LiftoffRegister lhs,
4620 uint8_t imm_lane_idx) {
4621 Pextrd(dst.low_gp(), lhs.fp(), imm_lane_idx * 2);
4622 Pextrd(dst.high_gp(), lhs.fp(), imm_lane_idx * 2 + 1);
4623}
4624
4625void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
4626 LiftoffRegister lhs,
4627 uint8_t imm_lane_idx) {
4628 F32x4ExtractLane(dst.fp(), lhs.fp(), imm_lane_idx);
4629}
4630
4631void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
4632 LiftoffRegister lhs,
4633 uint8_t imm_lane_idx) {
4634 F64x2ExtractLane(dst.fp(), lhs.fp(), imm_lane_idx);
4635}
4636
4637void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
4638 LiftoffRegister src1,
4639 LiftoffRegister src2,
4640 uint8_t imm_lane_idx) {
4641 if (CpuFeatures::IsSupported(AVX)) {
4642 CpuFeatureScope scope(this, AVX);
4643 vpinsrb(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
4644 } else {
4645 CpuFeatureScope scope(this, SSE4_1);
4646 if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
4647 pinsrb(dst.fp(), src2.gp(), imm_lane_idx);
4648 }
4649}
4650
4651void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
4652 LiftoffRegister src1,
4653 LiftoffRegister src2,
4654 uint8_t imm_lane_idx) {
4655 if (CpuFeatures::IsSupported(AVX)) {
4656 CpuFeatureScope scope(this, AVX);
4657 vpinsrw(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
4658 } else {
4659 if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
4660 pinsrw(dst.fp(), src2.gp(), imm_lane_idx);
4661 }
4662}
4663
4664void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst,
4665 LiftoffRegister src1,
4666 LiftoffRegister src2,
4667 uint8_t imm_lane_idx) {
4668 if (CpuFeatures::IsSupported(AVX)) {
4669 CpuFeatureScope scope(this, AVX);
4670 vpinsrd(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
4671 } else {
4672 CpuFeatureScope scope(this, SSE4_1);
4673 if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
4674 pinsrd(dst.fp(), src2.gp(), imm_lane_idx);
4675 }
4676}
4677
4678void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst,
4679 LiftoffRegister src1,
4680 LiftoffRegister src2,
4681 uint8_t imm_lane_idx) {
4682 if (CpuFeatures::IsSupported(AVX)) {
4683 CpuFeatureScope scope(this, AVX);
4684 vpinsrd(dst.fp(), src1.fp(), src2.low_gp(), imm_lane_idx * 2);
4685 vpinsrd(dst.fp(), dst.fp(), src2.high_gp(), imm_lane_idx * 2 + 1);
4686 } else {
4687 CpuFeatureScope scope(this, SSE4_1);
4688 if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
4689 pinsrd(dst.fp(), src2.low_gp(), imm_lane_idx * 2);
4690 pinsrd(dst.fp(), src2.high_gp(), imm_lane_idx * 2 + 1);
4691 }
4692}
4693
4694void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
4695 LiftoffRegister src1,
4696 LiftoffRegister src2,
4697 uint8_t imm_lane_idx) {
4698 if (CpuFeatures::IsSupported(AVX)) {
4699 CpuFeatureScope scope(this, AVX);
4700 vinsertps(dst.fp(), src1.fp(), src2.fp(), (imm_lane_idx << 4) & 0x30);
4701 } else {
4702 CpuFeatureScope scope(this, SSE4_1);
4703 if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
4704 insertps(dst.fp(), src2.fp(), (imm_lane_idx << 4) & 0x30);
4705 }
4706}
4707
4708void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
4709 LiftoffRegister src1,
4710 LiftoffRegister src2,
4711 uint8_t imm_lane_idx) {
4712 F64x2ReplaceLane(dst.fp(), src1.fp(), src2.fp(), imm_lane_idx);
4713}
4714
4715void LiftoffAssembler::emit_f32x4_qfma(LiftoffRegister dst,
4716 LiftoffRegister src1,
4717 LiftoffRegister src2,
4718 LiftoffRegister src3) {
4719 F32x4Qfma(dst.fp(), src1.fp(), src2.fp(), src3.fp(),
4720 liftoff::kScratchDoubleReg);
4721}
4722
4723void LiftoffAssembler::emit_f32x4_qfms(LiftoffRegister dst,
4724 LiftoffRegister src1,
4725 LiftoffRegister src2,
4726 LiftoffRegister src3) {
4727 F32x4Qfms(dst.fp(), src1.fp(), src2.fp(), src3.fp(),
4728 liftoff::kScratchDoubleReg);
4729}
4730
4731void LiftoffAssembler::emit_f64x2_qfma(LiftoffRegister dst,
4732 LiftoffRegister src1,
4733 LiftoffRegister src2,
4734 LiftoffRegister src3) {
4735 F64x2Qfma(dst.fp(), src1.fp(), src2.fp(), src3.fp(),
4736 liftoff::kScratchDoubleReg);
4737}
4738
4739void LiftoffAssembler::emit_f64x2_qfms(LiftoffRegister dst,
4740 LiftoffRegister src1,
4741 LiftoffRegister src2,
4742 LiftoffRegister src3) {
4743 F64x2Qfms(dst.fp(), src1.fp(), src2.fp(), src3.fp(),
4744 liftoff::kScratchDoubleReg);
4745}
4746
4747bool LiftoffAssembler::emit_f16x8_splat(LiftoffRegister dst,
4748 LiftoffRegister src) {
4749 return false;
4750}
4751
4752bool LiftoffAssembler::emit_f16x8_extract_lane(LiftoffRegister dst,
4753 LiftoffRegister lhs,
4754 uint8_t imm_lane_idx) {
4755 return false;
4756}
4757
4758bool LiftoffAssembler::emit_f16x8_replace_lane(LiftoffRegister dst,
4759 LiftoffRegister src1,
4760 LiftoffRegister src2,
4761 uint8_t imm_lane_idx) {
4762 return false;
4763}
4764
4765bool LiftoffAssembler::emit_f16x8_abs(LiftoffRegister dst,
4766 LiftoffRegister src) {
4767 return false;
4768}
4769
4770bool LiftoffAssembler::emit_f16x8_neg(LiftoffRegister dst,
4771 LiftoffRegister src) {
4772 return false;
4773}
4774
4775bool LiftoffAssembler::emit_f16x8_sqrt(LiftoffRegister dst,
4776 LiftoffRegister src) {
4777 return false;
4778}
4779
4780bool LiftoffAssembler::emit_f16x8_ceil(LiftoffRegister dst,
4781 LiftoffRegister src) {
4782 return false;
4783}
4784
4785bool LiftoffAssembler::emit_f16x8_floor(LiftoffRegister dst,
4786 LiftoffRegister src) {
4787 return false;
4788}
4789
4790bool LiftoffAssembler::emit_f16x8_trunc(LiftoffRegister dst,
4791 LiftoffRegister src) {
4792 return false;
4793}
4794
4795bool LiftoffAssembler::emit_f16x8_nearest_int(LiftoffRegister dst,
4796 LiftoffRegister src) {
4797 return false;
4798}
4799
4800bool LiftoffAssembler::emit_f16x8_eq(LiftoffRegister dst, LiftoffRegister lhs,
4801 LiftoffRegister rhs) {
4802 return false;
4803}
4804
4805bool LiftoffAssembler::emit_f16x8_ne(LiftoffRegister dst, LiftoffRegister lhs,
4806 LiftoffRegister rhs) {
4807 return false;
4808}
4809
4810bool LiftoffAssembler::emit_f16x8_lt(LiftoffRegister dst, LiftoffRegister lhs,
4811 LiftoffRegister rhs) {
4812 return false;
4813}
4814
4815bool LiftoffAssembler::emit_f16x8_le(LiftoffRegister dst, LiftoffRegister lhs,
4816 LiftoffRegister rhs) {
4817 return false;
4818}
4819
4820bool LiftoffAssembler::emit_f16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
4821 LiftoffRegister rhs) {
4822 return false;
4823}
4824
4825bool LiftoffAssembler::emit_f16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
4826 LiftoffRegister rhs) {
4827 return false;
4828}
4829
4830bool LiftoffAssembler::emit_f16x8_mul(LiftoffRegister dst, LiftoffRegister lhs,
4831 LiftoffRegister rhs) {
4832 return false;
4833}
4834
4835bool LiftoffAssembler::emit_f16x8_div(LiftoffRegister dst, LiftoffRegister lhs,
4836 LiftoffRegister rhs) {
4837 return false;
4838}
4839
4840bool LiftoffAssembler::emit_f16x8_min(LiftoffRegister dst, LiftoffRegister lhs,
4841 LiftoffRegister rhs) {
4842 return false;
4843}
4844
4845bool LiftoffAssembler::emit_f16x8_max(LiftoffRegister dst, LiftoffRegister lhs,
4846 LiftoffRegister rhs) {
4847 return false;
4848}
4849
4850bool LiftoffAssembler::emit_f16x8_pmin(LiftoffRegister dst, LiftoffRegister lhs,
4851 LiftoffRegister rhs) {
4852 return false;
4853}
4854
4855bool LiftoffAssembler::emit_f16x8_pmax(LiftoffRegister dst, LiftoffRegister lhs,
4856 LiftoffRegister rhs) {
4857 return false;
4858}
4859
4860bool LiftoffAssembler::emit_i16x8_sconvert_f16x8(LiftoffRegister dst,
4861 LiftoffRegister src) {
4862 return false;
4863}
4864
4865bool LiftoffAssembler::emit_i16x8_uconvert_f16x8(LiftoffRegister dst,
4866 LiftoffRegister src) {
4867 return false;
4868}
4869
4870bool LiftoffAssembler::emit_f16x8_sconvert_i16x8(LiftoffRegister dst,
4871 LiftoffRegister src) {
4872 return false;
4873}
4874
4875bool LiftoffAssembler::emit_f16x8_uconvert_i16x8(LiftoffRegister dst,
4876 LiftoffRegister src) {
4877 return false;
4878}
4879
4880bool LiftoffAssembler::emit_f16x8_demote_f32x4_zero(LiftoffRegister dst,
4881 LiftoffRegister src) {
4882 return false;
4883}
4884
4885bool LiftoffAssembler::emit_f16x8_demote_f64x2_zero(LiftoffRegister dst,
4886 LiftoffRegister src) {
4887 return false;
4888}
4889
4890bool LiftoffAssembler::emit_f32x4_promote_low_f16x8(LiftoffRegister dst,
4891 LiftoffRegister src) {
4892 return false;
4893}
4894
4895bool LiftoffAssembler::emit_f16x8_qfma(LiftoffRegister dst,
4896 LiftoffRegister src1,
4897 LiftoffRegister src2,
4898 LiftoffRegister src3) {
4899 return false;
4900}
4901
4902bool LiftoffAssembler::emit_f16x8_qfms(LiftoffRegister dst,
4903 LiftoffRegister src1,
4904 LiftoffRegister src2,
4905 LiftoffRegister src3) {
4906 return false;
4907}
4908
4909bool LiftoffAssembler::supports_f16_mem_access() { return false; }
4910
4911void LiftoffAssembler::StackCheck(Label* ool_code) {
4912 CompareStackLimit(esp, StackLimitKind::kInterruptStackLimit);
4913 j(below_equal, ool_code);
4914}
4915
4916void LiftoffAssembler::AssertUnreachable(AbortReason reason) {
4918}
4919
4920void LiftoffAssembler::PushRegisters(LiftoffRegList regs) {
4921 LiftoffRegList gp_regs = regs & kGpCacheRegList;
4922 while (!gp_regs.is_empty()) {
4923 LiftoffRegister reg = gp_regs.GetFirstRegSet();
4924 push(reg.gp());
4925 gp_regs.clear(reg);
4926 }
4927 LiftoffRegList fp_regs = regs & kFpCacheRegList;
4928 unsigned num_fp_regs = fp_regs.GetNumRegsSet();
4929 if (num_fp_regs) {
4930 AllocateStackSpace(num_fp_regs * kSimd128Size);
4931 unsigned offset = 0;
4932 while (!fp_regs.is_empty()) {
4933 LiftoffRegister reg = fp_regs.GetFirstRegSet();
4934 Movdqu(Operand(esp, offset), reg.fp());
4935 fp_regs.clear(reg);
4937 }
4938 DCHECK_EQ(offset, num_fp_regs * kSimd128Size);
4939 }
4940}
4941
4942void LiftoffAssembler::PopRegisters(LiftoffRegList regs) {
4943 LiftoffRegList fp_regs = regs & kFpCacheRegList;
4944 unsigned fp_offset = 0;
4945 while (!fp_regs.is_empty()) {
4946 LiftoffRegister reg = fp_regs.GetFirstRegSet();
4947 Movdqu(reg.fp(), Operand(esp, fp_offset));
4948 fp_regs.clear(reg);
4949 fp_offset += kSimd128Size;
4950 }
4951 if (fp_offset) add(esp, Immediate(fp_offset));
4952 LiftoffRegList gp_regs = regs & kGpCacheRegList;
4953 while (!gp_regs.is_empty()) {
4954 LiftoffRegister reg = gp_regs.GetLastRegSet();
4955 pop(reg.gp());
4956 gp_regs.clear(reg);
4957 }
4958}
4959
4960void LiftoffAssembler::RecordSpillsInSafepoint(
4961 SafepointTableBuilder::Safepoint& safepoint, LiftoffRegList all_spills,
4962 LiftoffRegList ref_spills, int spill_offset) {
4963 LiftoffRegList fp_spills = all_spills & kFpCacheRegList;
4964 int spill_space_size = fp_spills.GetNumRegsSet() * kSimd128Size;
4965 LiftoffRegList gp_spills = all_spills & kGpCacheRegList;
4966 while (!gp_spills.is_empty()) {
4967 LiftoffRegister reg = gp_spills.GetFirstRegSet();
4968 if (ref_spills.has(reg)) {
4969 safepoint.DefineTaggedStackSlot(spill_offset);
4970 }
4971 gp_spills.clear(reg);
4972 ++spill_offset;
4973 spill_space_size += kSystemPointerSize;
4974 }
4975 // Record the number of additional spill slots.
4976 RecordOolSpillSpaceSize(spill_space_size);
4977}
4978
4979void LiftoffAssembler::DropStackSlotsAndRet(uint32_t num_stack_slots) {
4980 DCHECK_LT(num_stack_slots,
4981 (1 << 16) / kSystemPointerSize); // 16 bit immediate
4982 ret(static_cast<int>(num_stack_slots * kSystemPointerSize));
4983}
4984
4985void LiftoffAssembler::CallCWithStackBuffer(
4986 const std::initializer_list<VarState> args, const LiftoffRegister* rets,
4987 ValueKind return_kind, ValueKind out_argument_kind, int stack_bytes,
4988 ExternalReference ext_ref) {
4989 AllocateStackSpace(stack_bytes);
4990
4991 int arg_offset = 0;
4992 for (const VarState& arg : args) {
4993 if (arg.is_reg()) {
4994 liftoff::Store(this, esp, arg_offset, arg.reg(), arg.kind());
4995 } else if (arg.is_const()) {
4996 DCHECK_EQ(kI32, arg.kind());
4997 mov(Operand(esp, arg_offset), Immediate(arg.i32_const()));
4998 } else if (value_kind_size(arg.kind()) == 4) {
4999 // We do not have a scratch register, so move via the stack. Note that
5000 // {push} decrements {esp} by 4 and {pop} increments it again, but the
5001 // destionation operand uses the {esp} value after increasing.
5002 push(liftoff::GetStackSlot(arg.offset()));
5003 pop(Operand(esp, arg_offset));
5004 } else {
5005 DCHECK_EQ(8, value_kind_size(arg.kind()));
5006 push(liftoff::GetStackSlot(arg.offset()));
5007 pop(Operand(esp, arg_offset + 4));
5008 push(liftoff::GetStackSlot(arg.offset() + 4));
5009 pop(Operand(esp, arg_offset));
5010 }
5011 arg_offset += value_kind_size(arg.kind());
5012 }
5013 DCHECK_LE(arg_offset, stack_bytes);
5014
5015 constexpr Register kScratch = eax;
5016 constexpr Register kArgumentBuffer = ecx;
5017 constexpr int kNumCCallArgs = 1;
5018 mov(kArgumentBuffer, esp);
5019 PrepareCallCFunction(kNumCCallArgs, kScratch);
5020
5021 // Pass a pointer to the buffer with the arguments to the C function. ia32
5022 // does not use registers here, so push to the stack.
5023 mov(Operand(esp, 0), kArgumentBuffer);
5024
5025 // Now call the C function.
5026 CallCFunction(ext_ref, kNumCCallArgs);
5027
5028 // Move return value to the right register.
5029 const LiftoffRegister* next_result_reg = rets;
5030 if (return_kind != kVoid) {
5031 constexpr Register kReturnReg = eax;
5032 if (kReturnReg != next_result_reg->gp()) {
5033 Move(*next_result_reg, LiftoffRegister(kReturnReg), return_kind);
5034 }
5035 ++next_result_reg;
5036 }
5037
5038 // Load potential output value from the buffer on the stack.
5039 if (out_argument_kind != kVoid) {
5040 liftoff::Load(this, *next_result_reg, esp, 0, out_argument_kind);
5041 }
5042
5043 add(esp, Immediate(stack_bytes));
5044}
5045
5046void LiftoffAssembler::CallC(const std::initializer_list<VarState> args,
5047 ExternalReference ext_ref) {
5048 LiftoffRegList arg_regs;
5049 for (const VarState arg : args) {
5050 if (arg.is_reg()) arg_regs.set(arg.reg());
5051 }
5052
5053 RegList usable_regs = kLiftoffAssemblerGpCacheRegs - arg_regs.GetGpList();
5054 Register scratch = usable_regs.first();
5055 int num_lowered_args = 0;
5056 // i64 arguments are lowered to two actual arguments (taking two stack slots).
5057 for (const VarState& arg : args) {
5058 num_lowered_args += arg.kind() == kI64 ? 2 : 1;
5059 }
5060 PrepareCallCFunction(num_lowered_args, scratch);
5061
5062 // Ia32 passes all arguments via the stack. Store them now in the stack space
5063 // allocated by {PrepareCallCFunction}.
5064
5065 // GetNextOperand returns the operand for the next stack slot on each
5066 // invocation.
5067 auto GetNextOperand = [arg_offset = 0, num_lowered_args]() mutable {
5068 // Check that we don't exceed the pre-computed {num_stack_slots}.
5069 DCHECK_GE(num_lowered_args, arg_offset);
5070 USE(num_lowered_args);
5071 return Operand{esp, arg_offset++ * kSystemPointerSize};
5072 };
5073 for (const VarState& arg : args) {
5074 Operand dst = GetNextOperand();
5075 if (arg.is_reg()) {
5076 LiftoffRegister reg = arg.reg();
5077 if (arg.kind() == kI64) {
5078 mov(dst, reg.low_gp());
5079 mov(GetNextOperand(), reg.high_gp());
5080 } else {
5081 mov(dst, reg.gp());
5082 }
5083 } else if (arg.is_const()) {
5084 DCHECK_EQ(kI32, arg.kind());
5085 mov(dst, Immediate(arg.i32_const()));
5086 } else {
5087 DCHECK(arg.is_stack());
5088 if (arg.kind() == kI64) {
5089 mov(scratch, liftoff::GetStackSlot(arg.offset()));
5090 mov(dst, scratch);
5091 mov(scratch, liftoff::GetStackSlot(arg.offset() + kSystemPointerSize));
5092 mov(GetNextOperand(), scratch);
5093 } else {
5094 mov(scratch, liftoff::GetStackSlot(arg.offset()));
5095 mov(dst, scratch);
5096 }
5097 }
5098 }
5099
5100 // Now call the C function.
5101 CallCFunction(ext_ref, num_lowered_args);
5102}
5103
5104void LiftoffAssembler::CallNativeWasmCode(Address addr) {
5105 wasm_call(addr, RelocInfo::WASM_CALL);
5106}
5107
5108void LiftoffAssembler::TailCallNativeWasmCode(Address addr) {
5109 jmp(addr, RelocInfo::WASM_CALL);
5110}
5111
5112void LiftoffAssembler::CallIndirect(const ValueKindSig* sig,
5113 compiler::CallDescriptor* call_descriptor,
5114 Register target) {
5115 // Since we have more cache registers than parameter registers, the
5116 // {LiftoffCompiler} should always be able to place {target} in a register.
5117 DCHECK(target.is_valid());
5118 CallWasmCodePointer(target);
5119}
5120
5121void LiftoffAssembler::TailCallIndirect(
5122 compiler::CallDescriptor* call_descriptor, Register target) {
5123 // Since we have more cache registers than parameter registers, the
5124 // {LiftoffCompiler} should always be able to place {target} in a register.
5125 DCHECK(target.is_valid());
5126 CallWasmCodePointer(target, CallJumpMode::kTailCall);
5127}
5128
5129void LiftoffAssembler::CallBuiltin(Builtin builtin) {
5130 // A direct call to a builtin. Just encode the builtin index. This will be
5131 // patched at relocation.
5132 wasm_call(static_cast<Address>(builtin), RelocInfo::WASM_STUB_CALL);
5133}
5134
5135void LiftoffAssembler::AllocateStackSlot(Register addr, uint32_t size) {
5136 AllocateStackSpace(size);
5137 mov(addr, esp);
5138}
5139
5140void LiftoffAssembler::DeallocateStackSlot(uint32_t size) {
5141 add(esp, Immediate(size));
5142}
5143
5144void LiftoffAssembler::MaybeOSR() {}
5145
5146void LiftoffAssembler::emit_store_nonzero_if_nan(Register dst,
5147 DoubleRegister src,
5148 ValueKind kind) {
5149 if (kind == kF32) {
5150 ucomiss(src, src);
5151 } else {
5153 ucomisd(src, src);
5154 }
5155 Label ret;
5156 j(parity_odd, &ret);
5157 mov(Operand(dst, 0), Immediate(1));
5158 bind(&ret);
5159}
5160
5161void LiftoffAssembler::emit_s128_store_nonzero_if_nan(Register dst,
5162 LiftoffRegister src,
5163 Register tmp_gp,
5164 LiftoffRegister tmp_s128,
5165 ValueKind lane_kind) {
5166 if (lane_kind == kF32) {
5167 movaps(tmp_s128.fp(), src.fp());
5168 cmpunordps(tmp_s128.fp(), tmp_s128.fp());
5169 } else {
5170 DCHECK_EQ(lane_kind, kF64);
5171 movapd(tmp_s128.fp(), src.fp());
5172 cmpunordpd(tmp_s128.fp(), tmp_s128.fp());
5173 }
5174 pmovmskb(tmp_gp, tmp_s128.fp());
5175 or_(Operand(dst, 0), tmp_gp);
5176}
5177
5178void LiftoffAssembler::emit_store_nonzero(Register dst) {
5179 mov(Operand(dst, 0), Immediate(1));
5180}
5181
5182void LiftoffStackSlots::Construct(int param_slots) {
5183 DCHECK_LT(0, slots_.size());
5184 SortInPushOrder();
5185 int last_stack_slot = param_slots;
5186 for (auto& slot : slots_) {
5187 const int stack_slot = slot.dst_slot_;
5188 int stack_decrement = (last_stack_slot - stack_slot) * kSystemPointerSize;
5189 DCHECK_LT(0, stack_decrement);
5190 last_stack_slot = stack_slot;
5191 const LiftoffAssembler::VarState& src = slot.src_;
5192 switch (src.loc()) {
5193 case LiftoffAssembler::VarState::kStack:
5194 // The combination of AllocateStackSpace and 2 movdqu is usually smaller
5195 // in code size than doing 4 pushes.
5196 if (src.kind() == kS128) {
5197 asm_->AllocateStackSpace(stack_decrement);
5198 asm_->movdqu(liftoff::kScratchDoubleReg,
5199 liftoff::GetStackSlot(slot.src_offset_));
5200 asm_->movdqu(Operand(esp, 0), liftoff::kScratchDoubleReg);
5201 break;
5202 }
5203 if (src.kind() == kF64) {
5204 asm_->AllocateStackSpace(stack_decrement - kDoubleSize);
5205 DCHECK_EQ(kLowWord, slot.half_);
5206 asm_->push(liftoff::GetHalfStackSlot(slot.src_offset_, kHighWord));
5207 stack_decrement = kSystemPointerSize;
5208 }
5209 asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize);
5210 asm_->push(liftoff::GetHalfStackSlot(slot.src_offset_, slot.half_));
5211 break;
5212 case LiftoffAssembler::VarState::kRegister:
5213 if (src.kind() == kI64) {
5214 liftoff::push(
5215 asm_, slot.half_ == kLowWord ? src.reg().low() : src.reg().high(),
5216 kI32, stack_decrement - kSystemPointerSize);
5217 } else {
5218 int pushed_bytes = SlotSizeInBytes(slot);
5219 liftoff::push(asm_, src.reg(), src.kind(),
5220 stack_decrement - pushed_bytes);
5221 }
5222 break;
5223 case LiftoffAssembler::VarState::kIntConst:
5224 asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize);
5225 // The high word is the sign extension of the low word.
5226 asm_->push(Immediate(slot.half_ == kLowWord ? src.i32_const()
5227 : src.i32_const() >> 31));
5228 break;
5229 }
5230 }
5231}
5232
5233#undef RETURN_FALSE_IF_MISSING_CPU_FEATURE
5234
5235} // namespace v8::internal::wasm
5236
5237#endif // V8_WASM_BASELINE_IA32_LIFTOFF_ASSEMBLER_IA32_INL_H_
Builtins::Kind kind
Definition builtins.cc:40
void emplace_back(Args &&... args)
V8_INLINE void RecordComment(const char *comment, const SourceLocation &loc=SourceLocation::Current())
Definition assembler.h:417
void cmov(Condition cc, Register dst, Register src)
void and_(Register dst, Register src1, const Operand &src2, SBit s=LeaveCC, Condition cond=al)
void movss(XMMRegister dst, Operand src)
void jmp_rel(int offset)
void sar(Register dst, uint8_t imm8)
void div(Register src)
void mov_w(Register dst, Operand src)
void j(Condition cc, Label *L, Label::Distance distance=Label::kFar)
void idiv(Register src)
void add(Register dst, Register src1, const Operand &src2, SBit s=LeaveCC, Condition cond=al)
void shl_cl(Register dst)
void cmp(Register src1, const Operand &src2, Condition cond=al)
void setcc(Condition cc, Register reg)
void test(Register reg, const Immediate &imm)
void sub(Register dst, Register src1, const Operand &src2, SBit s=LeaveCC, Condition cond=al)
void movzx_b(Register dst, Register src)
void xor_(Register dst, int32_t imm32)
void movdqu(XMMRegister dst, Operand src)
void movsd(XMMRegister dst, XMMRegister src)
void movaps(XMMRegister dst, XMMRegister src)
void mov(Register dst, const Operand &src, SBit s=LeaveCC, Condition cond=al)
void shr_cl(Register dst)
void wasm_call(Address address, RelocInfo::Mode rmode)
void lea(Register dst, Operand src)
void sar_cl(Register dst)
void sub_sp_32(uint32_t imm)
Assembler(const AssemblerOptions &, std::unique_ptr< AssemblerBuffer >={})
static constexpr int kFixedFrameSizeAboveFp
static bool IsSupported(CpuFeature f)
static V8_EXPORT_PRIVATE ExternalReference isolate_address()
static constexpr MachineType Uint8()
static constexpr MachineType Int32()
static constexpr MachineType Uint32()
static constexpr MachineType Uint16()
static constexpr MachineType Int16()
static constexpr MachineType Int64()
static constexpr MachineType Int8()
void ShlPair_cl(Register high, Register low)
void mov(Register rd, Register rj)
void CompareStackLimit(Register with, StackLimitKind kind)
void Move(Register dst, Tagged< Smi > smi)
void JumpIfSmi(Register value, Label *smi_label)
void AssertUnreachable(AbortReason reason) NOOP_UNLESS_DEBUG_CODE
void SarPair_cl(Register high, Register low)
void CheckPageFlag(Register object, int mask, Condition cc, Label *condition_met)
int CallCFunction(ExternalReference function, int num_arguments, SetIsolateDataSlots set_isolate_data_slots=SetIsolateDataSlots::kYes, Label *return_label=nullptr)
void AllocateStackSpace(Register bytes)
void ShrPair_cl(Register high, Register low)
void CallRecordWriteStubSaveRegisters(Register object, Operand offset, SaveFPRegsMode fp_mode, StubCallMode mode=StubCallMode::kCallBuiltinPointer)
void PrepareCallCFunction(int num_reg_arguments, int num_double_registers=0, Register scratch=no_reg)
static constexpr MainThreadFlags kPointersToHereAreInterestingMask
static constexpr MainThreadFlags kPointersFromHereAreInterestingMask
constexpr void set(RegisterT reg)
constexpr bool has(RegisterT reg) const
constexpr storage_t bits() const
constexpr bool is_valid() const
bool is_byte_register() const
void I32x4ExtMul(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister scratch, bool low, bool is_signed)
static constexpr Tagged< Smi > FromInt(int value)
Definition smi.h:38
static constexpr int32_t TypeToMarker(Type type)
Definition frames.h:196
static constexpr int kFrameTypeOffset
void LoadFullPointer(Register dst, Register src_addr, int32_t offset_imm)
void Load(LiftoffRegister dst, Register src_addr, Register offset_reg, uintptr_t offset_imm, LoadType type, uint32_t *protected_load_pc=nullptr, bool is_load_mem=false, bool i64_offset=false, bool needs_shift=false)
void CallFrameSetupStub(int declared_function_index)
void LoadConstant(LiftoffRegister, WasmValue)
void PrepareTailCall(int num_callee_stack_params, int stack_param_delta)
void LoadFromInstance(Register dst, Register instance, int offset, int size)
void LoadProtectedPointer(Register dst, Register src_addr, int32_t offset)
void ParallelRegisterMove(base::Vector< const ParallelRegisterMoveTuple >)
void Move(LiftoffRegister dst, LiftoffRegister src, ValueKind)
void PatchPrepareStackFrame(int offset, SafepointTableBuilder *, bool feedback_vector_slot, size_t stack_param_slots)
void LoadTaggedPointer(Register dst, Register src_addr, Register offset_reg, int32_t offset_imm, uint32_t *protected_load_pc=nullptr, bool offset_reg_needs_shift=false)
void StoreTaggedPointer(Register dst_addr, Register offset_reg, int32_t offset_imm, Register src, LiftoffRegList pinned, uint32_t *protected_store_pc=nullptr, SkipWriteBarrier=kNoSkipWriteBarrier)
LiftoffRegister GetUnusedRegister(RegClass rc, std::initializer_list< LiftoffRegister > try_first, LiftoffRegList pinned)
void LoadTaggedPointerFromInstance(Register dst, Register instance, int offset)
void CheckTierUp(int declared_func_index, int budget_used, Label *ool_label, const FreezeCacheState &frozen)
void LoadTrustedPointer(Register dst, Register src_addr, int offset, IndirectPointerTag tag)
constexpr Register set(Register reg)
static constexpr LiftoffRegList FromBits()
constexpr DoubleRegister fp() const
static constexpr int ToTagged(int offset)
static void Pack16Lanes(uint32_t *dst, const uint8_t *shuffle)
CacheStatePreservingTempRegisters(LiftoffAssembler *assm, LiftoffRegList pinned={})
#define COMPRESS_POINTERS_BOOL
Definition globals.h:99
#define V8_ENABLE_SANDBOX_BOOL
Definition globals.h:160
int start
ZoneVector< OpIndex > candidates
base::Vector< const DirectHandle< Object > > args
Definition execution.cc:74
Label label
static constexpr unsigned kSignBit
int32_t offset
ZoneVector< RpoNumber > & result
#define dop(name,...)
#define RETURN_FALSE_IF_MISSING_CPU_FEATURE(name)
LiftoffRegister reg
MovableLabel continuation
Register tmp
int pc_offset
LiftoffRegList regs_to_save
LiftoffAssembler asm_
std::optional< OolTrapLabel > trap
uint32_t const mask
int int32_t
Definition unicode.cc:40
constexpr bool IsPowerOfTwo(T value)
Definition bits.h:187
constexpr int WhichPowerOfTwo(T value)
Definition bits.h:195
void OpWithCarry(LiftoffAssembler *assm, LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs)
void EmitAllTrue(LiftoffAssembler *assm, LiftoffRegister dst, LiftoffRegister src, VectorFormat format)
void Store(LiftoffAssembler *assm, LiftoffRegister src, MemOperand dst, ValueKind kind)
static constexpr LiftoffRegList kByteRegs
void EmitCommutativeBinOpImm(LiftoffAssembler *assm, Register dst, Register lhs, int32_t imm)
void EmitSatTruncateFloatToInt(LiftoffAssembler *assm, Register dst, DoubleRegister src)
void setcc_32_no_spill(LiftoffAssembler *assm, Condition cond, Register dst, Register tmp_byte_reg)
void EmitFloatMinOrMax(LiftoffAssembler *assm, RegisterType dst, RegisterType lhs, RegisterType rhs, MinOrMax min_or_max)
MemOperand GetHalfStackSlot(int offset, RegPairHalf half)
constexpr DoubleRegister kScratchDoubleReg
void EmitFloatSetCond(LiftoffAssembler *assm, Condition cond, Register dst, DoubleRegister lhs, DoubleRegister rhs)
bool PairContains(LiftoffRegister pair, Register reg)
void MoveStackValue(LiftoffAssembler *assm, const Operand &src, const Operand &dst)
Operand MemOperand(Register base, Register offset_reg, int offset_imm)
void EmitSimdNonCommutativeBinOp(LiftoffAssembler *assm, LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs, std::optional< CpuFeature > feature=std::nullopt)
void EmitSimdShiftOp(LiftoffAssembler *assm, LiftoffRegister dst, LiftoffRegister operand, LiftoffRegister count)
void EmitTruncateFloatToInt(LiftoffAssembler *assm, Register dst, DoubleRegister src, Label *trap)
void Emit64BitShiftOperation(LiftoffAssembler *assm, LiftoffRegister dst, LiftoffRegister src, Register amount, void(MacroAssembler::*emit_shift)(Register, Register))
Register GetTmpByteRegister(LiftoffAssembler *assm, Register candidate)
void setcc_32(LiftoffAssembler *assm, Condition cond, Register dst)
void SignExtendI32ToI64(Assembler *assm, LiftoffRegister reg)
void Load(LiftoffAssembler *assm, LiftoffRegister dst, MemOperand src, ValueKind kind)
void EmitSimdShiftOpImm(LiftoffAssembler *assm, LiftoffRegister dst, LiftoffRegister operand, int32_t count)
void push(LiftoffAssembler *assm, LiftoffRegister reg, ValueKind kind, int padding=0)
constexpr DoubleRegister kScratchDoubleReg2
void EmitShiftOperation(LiftoffAssembler *assm, Register dst, Register src, Register amount, void(Assembler::*emit_shift)(Register))
void EmitCommutativeBinOp(LiftoffAssembler *assm, Register dst, Register lhs, Register rhs)
void OpWithCarryI(LiftoffAssembler *assm, LiftoffRegister dst, LiftoffRegister lhs, int64_t imm)
LiftoffRegister ReplaceInPair(LiftoffRegister pair, Register old_reg, Register new_reg)
void AtomicBinop64(LiftoffAssembler *lasm, Binop op, Register dst_addr, Register offset_reg, uint32_t offset_imm, LiftoffRegister value, LiftoffRegister result)
void ConvertFloatToIntAndBack(LiftoffAssembler *assm, Register dst, DoubleRegister src, DoubleRegister converted_back, LiftoffRegList pinned)
Condition cond_make_unsigned(Condition cond)
void EmitInt32DivOrRem(LiftoffAssembler *assm, Register dst, Register lhs, Register rhs, Label *trap_div_by_zero, Label *trap_div_unrepresentable)
void EmitSimdCommutativeBinOp(LiftoffAssembler *assm, LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs, std::optional< CpuFeature > feature=std::nullopt)
void AtomicBinop32(LiftoffAssembler *lasm, Register dst_addr, Register offset_reg, uint32_t offset_imm, LiftoffRegister value, LiftoffRegister result, StoreType type, void(*op)(LiftoffAssembler *, Register, Register, Register))
void I32x4ExtMulHelper(LiftoffAssembler *assm, XMMRegister dst, XMMRegister src1, XMMRegister src2, bool low, bool is_signed)
void AtomicAddOrSubOrExchange32(LiftoffAssembler *lasm, Binop binop, Register dst_addr, Register offset_reg, uint32_t offset_imm, LiftoffRegister value, LiftoffRegister result, StoreType type)
constexpr DoubleRegister kFpReturnRegisters[]
constexpr Register kGpParamRegisters[]
constexpr DoubleRegister kFpParamRegisters[]
constexpr DoubleRegList kLiftoffAssemblerFpCacheRegs
constexpr int value_kind_full_size(ValueKind kind)
constexpr RegList kLiftoffAssemblerGpCacheRegs
constexpr Register kGpReturnRegisters[]
int declared_function_index(const WasmModule *module, int func_index)
constexpr int value_kind_size(ValueKind kind)
constexpr bool is_reference(ValueKind kind)
constexpr IndependentValueType kWasmI64
constexpr Register no_reg
constexpr int kMinInt
Definition globals.h:375
constexpr Register kRootRegister
constexpr VFPRoundingMode kRoundToNearest
constexpr int kTaggedSize
Definition globals.h:542
constexpr int kSimd128Size
Definition globals.h:706
DwVfpRegister DoubleRegister
constexpr DoubleRegister kScratchDoubleReg
RegListBase< Register > RegList
Definition reglist-arm.h:14
kWasmInternalFunctionIndirectPointerTag instance_data
constexpr DoubleRegister kScratchDoubleReg2
kWasmInternalFunctionIndirectPointerTag kProtectedInstanceDataOffset sig
constexpr int kSystemPointerSize
Definition globals.h:410
constexpr Register kReturnRegister0
constexpr int kInt32Size
Definition globals.h:401
V8_EXPORT_PRIVATE FlagValues v8_flags
const intptr_t kSmiTagMask
Definition v8-internal.h:88
constexpr VFPRoundingMode kRoundToZero
std::unique_ptr< AssemblerBuffer > ExternalAssemblerBuffer(void *start, int size)
Definition assembler.cc:161
constexpr int kDoubleSize
Definition globals.h:407
bool is_signed(Condition cond)
i::Address Load(i::Address address)
Definition unwinder.cc:19
uint32_t test
#define shr(value, bits)
Definition sha-256.cc:31
#define UNREACHABLE()
Definition logging.h:67
#define DCHECK_LE(v1, v2)
Definition logging.h:490
#define DCHECK_NE(v1, v2)
Definition logging.h:486
#define DCHECK_GE(v1, v2)
Definition logging.h:488
#define DCHECK(condition)
Definition logging.h:482
#define DCHECK_LT(v1, v2)
Definition logging.h:489
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
#define USE(...)
Definition macros.h:293
LiftoffRegister unused_register(RegClass rc, LiftoffRegList pinned={}) const
bool has_unused_register(RegClass rc, LiftoffRegList pinned={}) const
#define V8_LIKELY(condition)
Definition v8config.h:661