v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
code-generator-ia32.cc
Go to the documentation of this file.
1// Copyright 2013 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
18#include "src/compiler/osr.h"
22#include "src/objects/smi.h"
23
24#if V8_ENABLE_WEBASSEMBLY
27#endif // V8_ENABLE_WEBASSEMBLY
28
29namespace v8 {
30namespace internal {
31namespace compiler {
32
33#define __ masm()->
34
35// Adds IA-32 specific methods for decoding operands.
37 public:
40
41 Operand InputOperand(size_t index, int extra = 0) {
42 return ToOperand(instr_->InputAt(index), extra);
43 }
44
45 Immediate InputImmediate(size_t index) {
46 return ToImmediate(instr_->InputAt(index));
47 }
48
50
51 Operand ToOperand(InstructionOperand* op, int extra = 0) {
52 if (op->IsRegister()) {
53 DCHECK_EQ(0, extra);
54 return Operand(ToRegister(op));
55 } else if (op->IsFPRegister()) {
56 DCHECK_EQ(0, extra);
57 return Operand(ToDoubleRegister(op));
58 }
59 DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
60 return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
61 }
62
63 Operand SlotToOperand(int slot, int extra = 0) {
65 return Operand(offset.from_stack_pointer() ? esp : ebp,
66 offset.offset() + extra);
67 }
68
70 Constant constant = ToConstant(operand);
71 switch (constant.type()) {
73 return Immediate(constant.ToInt32(), constant.rmode());
75 return Immediate::EmbeddedNumber(constant.ToFloat32());
77 return Immediate::EmbeddedNumber(constant.ToFloat64().value());
79 return Immediate(constant.ToExternalReference());
81 return Immediate(constant.ToHeapObject());
83 break;
85 break;
88 }
90 }
91
92 static size_t NextOffset(size_t* offset) {
93 size_t i = *offset;
94 (*offset)++;
95 return i;
96 }
97
99 static_assert(0 == static_cast<int>(times_1));
100 static_assert(1 == static_cast<int>(times_2));
101 static_assert(2 == static_cast<int>(times_4));
102 static_assert(3 == static_cast<int>(times_8));
103 int scale = static_cast<int>(mode - one);
104 DCHECK(scale >= 0 && scale < 4);
105 return static_cast<ScaleFactor>(scale);
106 }
107
110 switch (mode) {
111 case kMode_MR: {
113 int32_t disp = 0;
114 return Operand(base, disp);
115 }
116 case kMode_MRI: {
119 return Operand(base, ctant.ToInt32(), ctant.rmode());
120 }
121 case kMode_MR1:
122 case kMode_MR2:
123 case kMode_MR4:
124 case kMode_MR8: {
127 ScaleFactor scale = ScaleFor(kMode_MR1, mode);
128 int32_t disp = 0;
129 return Operand(base, index, scale, disp);
130 }
131 case kMode_MR1I:
132 case kMode_MR2I:
133 case kMode_MR4I:
134 case kMode_MR8I: {
137 ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
139 return Operand(base, index, scale, ctant.ToInt32(), ctant.rmode());
140 }
141 case kMode_M1:
142 case kMode_M2:
143 case kMode_M4:
144 case kMode_M8: {
146 ScaleFactor scale = ScaleFor(kMode_M1, mode);
147 int32_t disp = 0;
148 return Operand(index, scale, disp);
149 }
150 case kMode_M1I:
151 case kMode_M2I:
152 case kMode_M4I:
153 case kMode_M8I: {
155 ScaleFactor scale = ScaleFor(kMode_M1I, mode);
157 return Operand(index, scale, ctant.ToInt32(), ctant.rmode());
158 }
159 case kMode_MI: {
161 return Operand(ctant.ToInt32(), ctant.rmode());
162 }
163 case kMode_Root: {
165 int32_t disp = InputInt32(NextOffset(offset));
166 return Operand(base, disp);
167 }
168 case kMode_None:
169 UNREACHABLE();
170 }
171 UNREACHABLE();
172 }
173
174 Operand MemoryOperand(size_t first_input = 0) {
175 return MemoryOperand(&first_input);
176 }
177
181 const int32_t disp = 4;
182 if (mode == kMode_MR1) {
184 ScaleFactor scale = ScaleFor(kMode_MR1, kMode_MR1);
185 return Operand(base, index, scale, disp);
186 } else if (mode == kMode_MRI) {
188 return Operand(base, ctant.ToInt32() + disp, ctant.rmode());
189 } else {
190 UNREACHABLE();
191 }
192 }
193
195 InstructionOperand* op) {
196 if (op->IsImmediate() || op->IsConstant()) {
198 } else if (op->IsRegister()) {
200 } else {
202 }
203 }
204};
205
206namespace {
207
208bool HasAddressingMode(Instruction* instr) {
209 return instr->addressing_mode() != kMode_None;
210}
211
212bool HasImmediateInput(Instruction* instr, size_t index) {
213 return instr->InputAt(index)->IsImmediate();
214}
215
216bool HasRegisterInput(Instruction* instr, size_t index) {
217 return instr->InputAt(index)->IsRegister();
218}
219
220class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
221 public:
222 OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
223 : OutOfLineCode(gen), result_(result) {}
224
225 void Generate() final {
226 __ xorps(result_, result_);
227 __ divss(result_, result_);
228 }
229
230 private:
231 XMMRegister const result_;
232};
233
234class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
235 public:
236 OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
237 : OutOfLineCode(gen), result_(result) {}
238
239 void Generate() final {
240 __ xorpd(result_, result_);
241 __ divsd(result_, result_);
242 }
243
244 private:
245 XMMRegister const result_;
246};
247
248class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
249 public:
250 OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
251 XMMRegister input, StubCallMode stub_mode)
252 : OutOfLineCode(gen),
254 input_(input),
255#if V8_ENABLE_WEBASSEMBLY
256 stub_mode_(stub_mode),
257#endif // V8_ENABLE_WEBASSEMBLY
258 isolate_(gen->isolate()),
259 zone_(gen->zone()) {
260 }
261
262 void Generate() final {
263 __ AllocateStackSpace(kDoubleSize);
264 __ Movsd(MemOperand(esp, 0), input_);
265#if V8_ENABLE_WEBASSEMBLY
266 if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
267 // A direct call to a builtin. Just encode the builtin index. This will be
268 // patched when the code is added to the native module and copied into
269 // wasm code space.
270 __ wasm_call(static_cast<Address>(Builtin::kDoubleToI),
272#else
273 // For balance.
274 if (false) {
275#endif // V8_ENABLE_WEBASSEMBLY
276 } else {
277 __ CallBuiltin(Builtin::kDoubleToI);
278 }
279 __ mov(result_, MemOperand(esp, 0));
280 __ add(esp, Immediate(kDoubleSize));
281 }
282
283 private:
284 Register const result_;
285 XMMRegister const input_;
286#if V8_ENABLE_WEBASSEMBLY
287 StubCallMode stub_mode_;
288#endif // V8_ENABLE_WEBASSEMBLY
289 Isolate* isolate_;
291};
292
293class OutOfLineRecordWrite final : public OutOfLineCode {
294 public:
295 OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
296 Register value, Register scratch0, Register scratch1,
297 RecordWriteMode mode, StubCallMode stub_mode)
298 : OutOfLineCode(gen),
299 object_(object),
300 operand_(operand),
301 value_(value),
302 scratch0_(scratch0),
303 scratch1_(scratch1),
304 mode_(mode),
305#if V8_ENABLE_WEBASSEMBLY
306 stub_mode_(stub_mode),
307#endif // V8_ENABLE_WEBASSEMBLY
308 zone_(gen->zone()) {
309 DCHECK(!AreAliased(object, scratch0, scratch1));
310 DCHECK(!AreAliased(value, scratch0, scratch1));
311 }
312
313 void Generate() final {
314 __ CheckPageFlag(value_, scratch0_,
315 MemoryChunk::kPointersToHereAreInterestingMask, zero,
316 exit());
317 __ lea(scratch1_, operand_);
318 SaveFPRegsMode const save_fp_mode = frame()->DidAllocateDoubleRegisters()
319 ? SaveFPRegsMode::kSave
320 : SaveFPRegsMode::kIgnore;
321 if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
322 __ CallEphemeronKeyBarrier(object_, scratch1_, save_fp_mode);
323#if V8_ENABLE_WEBASSEMBLY
324 } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
325 // A direct call to a wasm runtime stub defined in this module.
326 // Just encode the stub index. This will be patched when the code
327 // is added to the native module and copied into wasm code space.
328 __ CallRecordWriteStubSaveRegisters(object_, scratch1_, save_fp_mode,
329 StubCallMode::kCallWasmRuntimeStub);
330#endif // V8_ENABLE_WEBASSEMBLY
331 } else {
332 __ CallRecordWriteStubSaveRegisters(object_, scratch1_, save_fp_mode);
333 }
334 }
335
336 private:
337 Register const object_;
338 Operand const operand_;
339 Register const value_;
340 Register const scratch0_;
341 Register const scratch1_;
342 RecordWriteMode const mode_;
343#if V8_ENABLE_WEBASSEMBLY
344 StubCallMode const stub_mode_;
345#endif // V8_ENABLE_WEBASSEMBLY
346 Zone* zone_;
347};
348
349} // namespace
350
351#define ASSEMBLE_COMPARE(asm_instr) \
352 do { \
353 if (HasAddressingMode(instr)) { \
354 size_t index = 0; \
355 Operand left = i.MemoryOperand(&index); \
356 if (HasImmediateInput(instr, index)) { \
357 __ asm_instr(left, i.InputImmediate(index)); \
358 } else { \
359 __ asm_instr(left, i.InputRegister(index)); \
360 } \
361 } else { \
362 if (HasImmediateInput(instr, 1)) { \
363 if (HasRegisterInput(instr, 0)) { \
364 __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
365 } else { \
366 __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
367 } \
368 } else { \
369 if (HasRegisterInput(instr, 1)) { \
370 __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
371 } else { \
372 __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
373 } \
374 } \
375 } \
376 } while (0)
377
378#define ASSEMBLE_IEEE754_BINOP(name) \
379 do { \
380 /* Pass two doubles as arguments on the stack. */ \
381 __ PrepareCallCFunction(4, eax); \
382 __ movsd(Operand(esp, 0 * kDoubleSize), i.InputDoubleRegister(0)); \
383 __ movsd(Operand(esp, 1 * kDoubleSize), i.InputDoubleRegister(1)); \
384 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 4); \
385 /* Return value is in st(0) on ia32. */ \
386 /* Store it into the result register. */ \
387 __ AllocateStackSpace(kDoubleSize); \
388 __ fstp_d(Operand(esp, 0)); \
389 __ movsd(i.OutputDoubleRegister(), Operand(esp, 0)); \
390 __ add(esp, Immediate(kDoubleSize)); \
391 } while (false)
392
393#define ASSEMBLE_IEEE754_UNOP(name) \
394 do { \
395 /* Pass one double as argument on the stack. */ \
396 __ PrepareCallCFunction(2, eax); \
397 __ movsd(Operand(esp, 0 * kDoubleSize), i.InputDoubleRegister(0)); \
398 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
399 /* Return value is in st(0) on ia32. */ \
400 /* Store it into the result register. */ \
401 __ AllocateStackSpace(kDoubleSize); \
402 __ fstp_d(Operand(esp, 0)); \
403 __ movsd(i.OutputDoubleRegister(), Operand(esp, 0)); \
404 __ add(esp, Immediate(kDoubleSize)); \
405 } while (false)
406
407#define ASSEMBLE_BINOP(asm_instr) \
408 do { \
409 if (HasAddressingMode(instr)) { \
410 size_t index = 1; \
411 Operand right = i.MemoryOperand(&index); \
412 __ asm_instr(i.InputRegister(0), right); \
413 } else { \
414 if (HasImmediateInput(instr, 1)) { \
415 __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
416 } else { \
417 __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
418 } \
419 } \
420 } while (0)
421
422#define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
423 do { \
424 Label binop; \
425 __ bind(&binop); \
426 __ mov_inst(eax, i.MemoryOperand(1)); \
427 __ Move(i.TempRegister(0), eax); \
428 __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
429 __ lock(); \
430 __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
431 __ j(not_equal, &binop); \
432 } while (false)
433
434#define ASSEMBLE_I64ATOMIC_BINOP(instr1, instr2) \
435 do { \
436 Label binop; \
437 __ bind(&binop); \
438 __ mov(eax, i.MemoryOperand(2)); \
439 __ mov(edx, i.NextMemoryOperand(2)); \
440 __ push(ebx); \
441 frame_access_state()->IncreaseSPDelta(1); \
442 i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0)); \
443 __ push(i.InputRegister(1)); \
444 __ instr1(ebx, eax); \
445 __ instr2(i.InputRegister(1), edx); \
446 __ lock(); \
447 __ cmpxchg8b(i.MemoryOperand(2)); \
448 __ pop(i.InputRegister(1)); \
449 __ pop(ebx); \
450 frame_access_state()->IncreaseSPDelta(-1); \
451 __ j(not_equal, &binop); \
452 } while (false);
453
454#define ASSEMBLE_MOVX(mov_instr) \
455 do { \
456 if (HasAddressingMode(instr)) { \
457 __ mov_instr(i.OutputRegister(), i.MemoryOperand()); \
458 } else if (HasRegisterInput(instr, 0)) { \
459 __ mov_instr(i.OutputRegister(), i.InputRegister(0)); \
460 } else { \
461 __ mov_instr(i.OutputRegister(), i.InputOperand(0)); \
462 } \
463 } while (0)
464
465#define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode) \
466 do { \
467 XMMRegister src0 = i.InputSimd128Register(0); \
468 Operand src1 = i.InputOperand(instr->InputCount() == 2 ? 1 : 0); \
469 if (CpuFeatures::IsSupported(AVX)) { \
470 CpuFeatureScope avx_scope(masm(), AVX); \
471 __ v##opcode(i.OutputSimd128Register(), src0, src1); \
472 } else { \
473 DCHECK_EQ(i.OutputSimd128Register(), src0); \
474 __ opcode(i.OutputSimd128Register(), src1); \
475 } \
476 } while (false)
477
478#define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, SSELevel, imm) \
479 if (CpuFeatures::IsSupported(AVX)) { \
480 CpuFeatureScope avx_scope(masm(), AVX); \
481 __ v##opcode(i.OutputSimd128Register(), i.InputSimd128Register(0), \
482 i.InputOperand(1), imm); \
483 } else { \
484 CpuFeatureScope sse_scope(masm(), SSELevel); \
485 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); \
486 __ opcode(i.OutputSimd128Register(), i.InputOperand(1), imm); \
487 }
488
489#define ASSEMBLE_SIMD_ALL_TRUE(opcode) \
490 do { \
491 Register dst = i.OutputRegister(); \
492 Operand src = i.InputOperand(0); \
493 Register tmp = i.TempRegister(0); \
494 XMMRegister tmp_simd = i.TempSimd128Register(1); \
495 __ mov(tmp, Immediate(1)); \
496 __ xor_(dst, dst); \
497 __ Pxor(tmp_simd, tmp_simd); \
498 __ opcode(tmp_simd, src); \
499 __ Ptest(tmp_simd, tmp_simd); \
500 __ cmov(zero, dst, tmp); \
501 } while (false)
502
503#define ASSEMBLE_SIMD_SHIFT(opcode, width) \
504 do { \
505 XMMRegister dst = i.OutputSimd128Register(); \
506 DCHECK_EQ(dst, i.InputSimd128Register(0)); \
507 if (HasImmediateInput(instr, 1)) { \
508 __ opcode(dst, dst, uint8_t{i.InputInt##width(1)}); \
509 } else { \
510 XMMRegister tmp = i.TempSimd128Register(0); \
511 Register tmp_shift = i.TempRegister(1); \
512 constexpr int mask = (1 << width) - 1; \
513 __ mov(tmp_shift, i.InputRegister(1)); \
514 __ and_(tmp_shift, Immediate(mask)); \
515 __ Movd(tmp, tmp_shift); \
516 __ opcode(dst, dst, tmp); \
517 } \
518 } while (false)
519
520#define ASSEMBLE_SIMD_PINSR(OPCODE, CPU_FEATURE) \
521 do { \
522 XMMRegister dst = i.OutputSimd128Register(); \
523 XMMRegister src = i.InputSimd128Register(0); \
524 int8_t laneidx = i.InputInt8(1); \
525 if (HasAddressingMode(instr)) { \
526 if (CpuFeatures::IsSupported(AVX)) { \
527 CpuFeatureScope avx_scope(masm(), AVX); \
528 __ v##OPCODE(dst, src, i.MemoryOperand(2), laneidx); \
529 } else { \
530 DCHECK_EQ(dst, src); \
531 CpuFeatureScope sse_scope(masm(), CPU_FEATURE); \
532 __ OPCODE(dst, i.MemoryOperand(2), laneidx); \
533 } \
534 } else { \
535 if (CpuFeatures::IsSupported(AVX)) { \
536 CpuFeatureScope avx_scope(masm(), AVX); \
537 __ v##OPCODE(dst, src, i.InputOperand(2), laneidx); \
538 } else { \
539 DCHECK_EQ(dst, src); \
540 CpuFeatureScope sse_scope(masm(), CPU_FEATURE); \
541 __ OPCODE(dst, i.InputOperand(2), laneidx); \
542 } \
543 } \
544 } while (false)
545
547 __ mov(esp, ebp);
548 __ pop(ebp);
549}
550
552 if (frame_access_state()->has_frame()) {
553 __ mov(ebp, MemOperand(ebp, 0));
554 }
556}
557
558namespace {
559
560void AdjustStackPointerForTailCall(MacroAssembler* masm,
561 FrameAccessState* state,
562 int new_slot_above_sp,
563 bool allow_shrinkage = true) {
564 int current_sp_offset = state->GetSPToFPSlotCount() +
566 int stack_slot_delta = new_slot_above_sp - current_sp_offset;
567 if (stack_slot_delta > 0) {
568 masm->AllocateStackSpace(stack_slot_delta * kSystemPointerSize);
569 state->IncreaseSPDelta(stack_slot_delta);
570 } else if (allow_shrinkage && stack_slot_delta < 0) {
571 masm->add(esp, Immediate(-stack_slot_delta * kSystemPointerSize));
572 state->IncreaseSPDelta(stack_slot_delta);
573 }
574}
575
576#ifdef DEBUG
577bool VerifyOutputOfAtomicPairInstr(IA32OperandConverter* converter,
578 const Instruction* instr) {
579 if (instr->OutputCount() == 2) {
580 return (converter->OutputRegister(0) == eax &&
581 converter->OutputRegister(1) == edx);
582 }
583 if (instr->OutputCount() == 1) {
584 return (converter->OutputRegister(0) == eax &&
585 converter->TempRegister(0) == edx) ||
586 (converter->OutputRegister(0) == edx &&
587 converter->TempRegister(0) == eax);
588 }
590 return (converter->TempRegister(0) == eax &&
591 converter->TempRegister(1) == edx);
592}
593#endif
594
595} // namespace
596
598 int first_unused_slot_offset) {
600 ZoneVector<MoveOperands*> pushes(zone());
601 GetPushCompatibleMoves(instr, flags, &pushes);
602
603 if (!pushes.empty() &&
604 (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
605 first_unused_slot_offset)) {
606 IA32OperandConverter g(this, instr);
607 for (auto move : pushes) {
608 LocationOperand destination_location(
609 LocationOperand::cast(move->destination()));
610 InstructionOperand source(move->source());
611 AdjustStackPointerForTailCall(masm(), frame_access_state(),
612 destination_location.index());
613 if (source.IsStackSlot()) {
614 LocationOperand source_location(LocationOperand::cast(source));
615 __ push(g.SlotToOperand(source_location.index()));
616 } else if (source.IsRegister()) {
617 LocationOperand source_location(LocationOperand::cast(source));
618 __ push(source_location.GetRegister());
619 } else if (source.IsImmediate()) {
620 __ Push(Immediate(ImmediateOperand::cast(source).inline_int32_value()));
621 } else {
622 // Pushes of non-scalar data types is not supported.
624 }
626 move->Eliminate();
627 }
628 }
629 AdjustStackPointerForTailCall(masm(), frame_access_state(),
630 first_unused_slot_offset, false);
631}
632
634 int first_unused_slot_offset) {
635 AdjustStackPointerForTailCall(masm(), frame_access_state(),
636 first_unused_slot_offset);
637}
638
639// Check that {kJavaScriptCallCodeStartRegister} is correct.
641 __ push(eax); // Push eax so we can use it as a scratch register.
642 __ ComputeCodeStartAddress(eax);
644 __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
645 __ pop(eax); // Restore eax.
646}
647
648#ifdef V8_ENABLE_LEAPTIERING
649void CodeGenerator::AssembleDispatchHandleRegisterCheck() {
651}
652#endif // V8_ENABLE_LEAPTIERING
653
654// Check if the code object is marked for deoptimization. If it is, then it
655// jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
656// to:
657// 1. read from memory the word that contains that bit, which can be found in
658// the flags in the referenced {Code} object;
659// 2. test kMarkedForDeoptimizationBit in those flags; and
660// 3. if it is not zero then it jumps to the builtin.
661//
662// Note: With leaptiering we simply assert the code is not deoptimized.
664 int offset = InstructionStream::kCodeOffset - InstructionStream::kHeaderSize;
665 if (v8_flags.debug_code || !V8_ENABLE_LEAPTIERING_BOOL) {
666 __ push(eax); // Push eax so we can use it as a scratch register.
667 __ mov(eax, Operand(kJavaScriptCallCodeStartRegister, offset));
668 __ test(FieldOperand(eax, Code::kFlagsOffset),
669 Immediate(1 << Code::kMarkedForDeoptimizationBit));
670 __ pop(eax); // Restore eax.
671 }
672#ifdef V8_ENABLE_LEAPTIERING
673 if (v8_flags.debug_code) {
674 __ Assert(zero, AbortReason::kInvalidDeoptimizedCode);
675 }
676#else
677 Label skip;
678 __ j(zero, &skip, Label::kNear);
679 __ TailCallBuiltin(Builtin::kCompileLazyDeoptimizedCode);
680 __ bind(&skip);
681#endif
682}
683
684// Assembles an instruction after register allocation, producing machine code.
686 Instruction* instr) {
687 IA32OperandConverter i(this, instr);
688 InstructionCode opcode = instr->opcode();
689 ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
690 switch (arch_opcode) {
691 case kArchCallCodeObject: {
692 InstructionOperand* op = instr->InputAt(0);
693 if (op->IsImmediate()) {
694 Handle<Code> code = i.InputCode(0);
695 __ Call(code, RelocInfo::CODE_TARGET);
696 } else {
697 Register reg = i.InputRegister(0);
699 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
701 __ CallCodeObject(reg);
702 }
705 break;
706 }
707 case kArchCallBuiltinPointer: {
709 Register builtin_index = i.InputRegister(0);
710 Register target =
711 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister)
713 : builtin_index;
714 __ CallBuiltinByIndex(builtin_index, target);
717 break;
718 }
719#if V8_ENABLE_WEBASSEMBLY
720 case kArchCallWasmFunction:
721 case kArchCallWasmFunctionIndirect: {
722 if (arch_opcode == kArchCallWasmFunction) {
723 // This should always use immediate inputs since we don't have a
724 // constant pool on this arch.
726 Constant constant = i.ToConstant(instr->InputAt(0));
727 Address wasm_code = static_cast<Address>(constant.ToInt32());
728 if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
729 __ wasm_call(wasm_code, constant.rmode());
730 } else {
731 __ call(wasm_code, constant.rmode());
732 }
733 } else {
735 __ CallWasmCodePointer(i.InputRegister(0));
736 }
739 break;
740 }
741 case kArchTailCallWasm:
742 case kArchTailCallWasmIndirect: {
743 if (arch_opcode == kArchTailCallWasm) {
745 Constant constant = i.ToConstant(instr->InputAt(0));
746 Address wasm_code = static_cast<Address>(constant.ToInt32());
747 __ jmp(wasm_code, constant.rmode());
748 } else {
750 __ CallWasmCodePointer(i.InputRegister(0), CallJumpMode::kTailCall);
751 }
754 break;
755 }
756#endif // V8_ENABLE_WEBASSEMBLY
757 case kArchTailCallCodeObject: {
758 if (HasImmediateInput(instr, 0)) {
759 Handle<Code> code = i.InputCode(0);
760 __ Jump(code, RelocInfo::CODE_TARGET);
761 } else {
762 Register reg = i.InputRegister(0);
764 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
766 __ JumpCodeObject(reg);
767 }
770 break;
771 }
772 case kArchTailCallAddress: {
774 Register reg = i.InputRegister(0);
776 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
778 __ jmp(reg);
781 break;
782 }
783 case kArchCallJSFunction: {
784 Register func = i.InputRegister(0);
785 if (v8_flags.debug_code) {
786 // Check the function's context matches the context argument.
787 __ cmp(esi, FieldOperand(func, JSFunction::kContextOffset));
788 __ Assert(equal, AbortReason::kWrongFunctionContext);
789 }
790 uint32_t num_arguments =
791 i.InputUint32(instr->JSCallArgumentCountInputIndex());
792 __ CallJSFunction(func, num_arguments);
795 break;
796 }
797 case kArchPrepareCallCFunction: {
798 // Frame alignment requires using FP-relative frame addressing.
800 int const num_gp_parameters = ParamField::decode(instr->opcode());
801 int const num_fp_parameters = FPParamField::decode(instr->opcode());
802 __ PrepareCallCFunction(num_gp_parameters + num_fp_parameters,
803 i.TempRegister(0));
804 break;
805 }
806 case kArchSaveCallerRegisters: {
807 fp_mode_ =
808 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
811 // kReturnRegister0 should have been saved before entering the stub.
812 int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
814 DCHECK_EQ(0, frame_access_state()->sp_delta());
818 break;
819 }
820 case kArchRestoreCallerRegisters: {
822 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
825 // Don't overwrite the returned value.
826 int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
828 DCHECK_EQ(0, frame_access_state()->sp_delta());
831 break;
832 }
833 case kArchPrepareTailCall:
835 break;
836 case kArchCallCFunctionWithFrameState:
837 case kArchCallCFunction: {
838 int const num_parameters = ParamField::decode(instr->opcode()) +
839 FPParamField::decode(instr->opcode());
840
841 Label return_location;
842 SetIsolateDataSlots set_isolate_data_slots = SetIsolateDataSlots::kYes;
843#if V8_ENABLE_WEBASSEMBLY
844 if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
845 // Put the return address in a stack slot.
846 Register scratch = eax;
847 __ push(scratch);
848 __ LoadLabelAddress(scratch, &return_location);
849 __ mov(MemOperand(ebp, WasmExitFrameConstants::kCallingPCOffset),
850 scratch);
851 __ pop(scratch);
852 set_isolate_data_slots = SetIsolateDataSlots::kNo;
853 }
854#endif // V8_ENABLE_WEBASSEMBLY
855 int pc_offset;
856 if (HasImmediateInput(instr, 0)) {
857 ExternalReference ref = i.InputExternalReference(0);
858 pc_offset = __ CallCFunction(ref, num_parameters,
859 set_isolate_data_slots, &return_location);
860 } else {
861 Register func = i.InputRegister(0);
862 pc_offset = __ CallCFunction(func, num_parameters,
863 set_isolate_data_slots, &return_location);
864 }
865 RecordSafepoint(instr->reference_map(), pc_offset);
866
867 bool const needs_frame_state =
868 (arch_opcode == kArchCallCFunctionWithFrameState);
869 if (needs_frame_state) {
871 }
872
874 // Ideally, we should decrement SP delta to match the change of stack
875 // pointer in CallCFunction. However, for certain architectures (e.g.
876 // ARM), there may be more strict alignment requirement, causing old SP
877 // to be saved on the stack. In those cases, we can not calculate the SP
878 // delta statically.
881 // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
882 // Here, we assume the sequence to be:
883 // kArchSaveCallerRegisters;
884 // kArchCallCFunction;
885 // kArchRestoreCallerRegisters;
886 int bytes =
887 __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
889 }
890 break;
891 }
892 case kArchJmp:
893 AssembleArchJump(i.InputRpo(0));
894 break;
895 case kArchBinarySearchSwitch:
897 break;
898 case kArchTableSwitch:
900 break;
901 case kArchComment:
902 __ RecordComment(reinterpret_cast<const char*>(i.InputInt32(0)),
904 break;
905 case kArchAbortCSADcheck:
906 DCHECK(i.InputRegister(0) == edx);
907 {
908 // We don't actually want to generate a pile of code for this, so just
909 // claim there is a stack frame, without generating one.
910 FrameScope scope(masm(), StackFrame::NO_FRAME_TYPE);
911 __ CallBuiltin(Builtin::kAbortCSADcheck);
912 }
913 __ int3();
914 break;
915 case kArchDebugBreak:
916 __ DebugBreak();
917 break;
918 case kArchNop:
919 case kArchThrowTerminator:
920 // don't emit code for nops.
921 break;
922 case kArchDeoptimize: {
923 DeoptimizationExit* exit =
925 __ jmp(exit->label());
926 break;
927 }
928 case kArchRet:
929 AssembleReturn(instr->InputAt(0));
930 break;
931 case kArchFramePointer:
932 __ mov(i.OutputRegister(), ebp);
933 break;
934 case kArchParentFramePointer:
935 if (frame_access_state()->has_frame()) {
936 __ mov(i.OutputRegister(), Operand(ebp, 0));
937 } else {
938 __ mov(i.OutputRegister(), ebp);
939 }
940 break;
941#if V8_ENABLE_WEBASSEMBLY
942 case kArchStackPointer:
943 __ mov(i.OutputRegister(), esp);
944 break;
945 case kArchSetStackPointer:
946 if (instr->InputAt(0)->IsRegister()) {
947 __ mov(esp, i.InputRegister(0));
948 } else {
949 __ mov(esp, i.InputOperand(0));
950 }
951 break;
952#endif
953 case kArchStackPointerGreaterThan: {
954 // Potentially apply an offset to the current stack pointer before the
955 // comparison to consider the size difference of an optimized frame versus
956 // the contained unoptimized frames.
957 Register lhs_register = esp;
958 uint32_t offset;
959
961 lhs_register = i.TempRegister(0);
962 __ lea(lhs_register, Operand(esp, -1 * static_cast<int32_t>(offset)));
963 }
964
965 constexpr size_t kValueIndex = 0;
966 if (HasAddressingMode(instr)) {
967 __ cmp(lhs_register, i.MemoryOperand(kValueIndex));
968 } else {
969 __ cmp(lhs_register, i.InputRegister(kValueIndex));
970 }
971 break;
972 }
973 case kArchStackCheckOffset:
974 __ Move(i.OutputRegister(), Smi::FromInt(GetStackCheckOffset()));
975 break;
976 case kArchTruncateDoubleToI: {
977 auto result = i.OutputRegister();
978 auto input = i.InputDoubleRegister(0);
979 auto ool = zone()->New<OutOfLineTruncateDoubleToI>(
980 this, result, input, DetermineStubCallMode());
981 __ cvttsd2si(result, Operand(input));
982 __ cmp(result, 1);
983 __ j(overflow, ool->entry());
984 __ bind(ool->exit());
985 break;
986 }
987 case kArchStoreWithWriteBarrier: // Fall thrugh.
988 case kArchAtomicStoreWithWriteBarrier: {
990 Register object = i.InputRegister(0);
991 size_t index = 0;
992 Operand operand = i.MemoryOperand(&index);
993 Register value = i.InputRegister(index);
994 Register scratch0 = i.TempRegister(0);
995 Register scratch1 = i.TempRegister(1);
996
997 if (v8_flags.debug_code) {
998 // Checking that |value| is not a cleared weakref: our write barrier
999 // does not support that for now.
1000 __ cmp(value, Immediate(kClearedWeakHeapObjectLower32));
1001 __ Check(not_equal, AbortReason::kOperandIsCleared);
1002 }
1003
1004 auto ool = zone()->New<OutOfLineRecordWrite>(this, object, operand, value,
1005 scratch0, scratch1, mode,
1007 if (arch_opcode == kArchStoreWithWriteBarrier) {
1008 __ mov(operand, value);
1009 } else {
1010 __ mov(scratch0, value);
1011 __ xchg(scratch0, operand);
1012 }
1014 __ JumpIfSmi(value, ool->exit());
1015 }
1016 __ CheckPageFlag(object, scratch0,
1018 not_zero, ool->entry());
1019 __ bind(ool->exit());
1020 break;
1021 }
1022 case kArchStoreIndirectWithWriteBarrier:
1023 UNREACHABLE();
1024 case kArchStackSlot: {
1025 FrameOffset offset =
1026 frame_access_state()->GetFrameOffset(i.InputInt32(0));
1027 Register base = offset.from_stack_pointer() ? esp : ebp;
1028 __ lea(i.OutputRegister(), Operand(base, offset.offset()));
1029 break;
1030 }
1031 case kIeee754Float64Acos:
1033 break;
1034 case kIeee754Float64Acosh:
1035 ASSEMBLE_IEEE754_UNOP(acosh);
1036 break;
1037 case kIeee754Float64Asin:
1039 break;
1040 case kIeee754Float64Asinh:
1041 ASSEMBLE_IEEE754_UNOP(asinh);
1042 break;
1043 case kIeee754Float64Atan:
1045 break;
1046 case kIeee754Float64Atanh:
1047 ASSEMBLE_IEEE754_UNOP(atanh);
1048 break;
1049 case kIeee754Float64Atan2:
1051 break;
1052 case kIeee754Float64Cbrt:
1054 break;
1055 case kIeee754Float64Cos:
1057 break;
1058 case kIeee754Float64Cosh:
1060 break;
1061 case kIeee754Float64Expm1:
1062 ASSEMBLE_IEEE754_UNOP(expm1);
1063 break;
1064 case kIeee754Float64Exp:
1066 break;
1067 case kIeee754Float64Log:
1069 break;
1070 case kIeee754Float64Log1p:
1071 ASSEMBLE_IEEE754_UNOP(log1p);
1072 break;
1073 case kIeee754Float64Log2:
1075 break;
1076 case kIeee754Float64Log10:
1077 ASSEMBLE_IEEE754_UNOP(log10);
1078 break;
1079 case kIeee754Float64Pow:
1081 break;
1082 case kIeee754Float64Sin:
1084 break;
1085 case kIeee754Float64Sinh:
1087 break;
1088 case kIeee754Float64Tan:
1090 break;
1091 case kIeee754Float64Tanh:
1093 break;
1094 case kIA32Add:
1095 ASSEMBLE_BINOP(add);
1096 break;
1097 case kIA32And:
1098 ASSEMBLE_BINOP(and_);
1099 break;
1100 case kIA32Cmp:
1101 ASSEMBLE_COMPARE(cmp);
1102 break;
1103 case kIA32Cmp16:
1104 ASSEMBLE_COMPARE(cmpw);
1105 break;
1106 case kIA32Cmp8:
1107 ASSEMBLE_COMPARE(cmpb);
1108 break;
1109 case kIA32Test:
1110 ASSEMBLE_COMPARE(test);
1111 break;
1112 case kIA32Test16:
1113 ASSEMBLE_COMPARE(test_w);
1114 break;
1115 case kIA32Test8:
1116 ASSEMBLE_COMPARE(test_b);
1117 break;
1118 case kIA32Imul:
1119 if (HasImmediateInput(instr, 1)) {
1120 __ imul(i.OutputRegister(), i.InputOperand(0), i.InputInt32(1));
1121 } else {
1122 __ imul(i.OutputRegister(), i.InputOperand(1));
1123 }
1124 break;
1125 case kIA32ImulHigh:
1126 __ imul(i.InputRegister(1));
1127 break;
1128 case kIA32UmulHigh:
1129 __ mul(i.InputRegister(1));
1130 break;
1131 case kIA32Idiv:
1132 __ cdq();
1133 __ idiv(i.InputOperand(1));
1134 break;
1135 case kIA32Udiv:
1136 __ Move(edx, Immediate(0));
1137 __ div(i.InputOperand(1));
1138 break;
1139 case kIA32Not:
1140 __ not_(i.OutputOperand());
1141 break;
1142 case kIA32Neg:
1143 __ neg(i.OutputOperand());
1144 break;
1145 case kIA32Or:
1146 ASSEMBLE_BINOP(or_);
1147 break;
1148 case kIA32Xor:
1149 ASSEMBLE_BINOP(xor_);
1150 break;
1151 case kIA32Sub:
1152 ASSEMBLE_BINOP(sub);
1153 break;
1154 case kIA32Shl:
1155 if (HasImmediateInput(instr, 1)) {
1156 __ shl(i.OutputOperand(), i.InputInt5(1));
1157 } else {
1158 __ shl_cl(i.OutputOperand());
1159 }
1160 break;
1161 case kIA32Shr:
1162 if (HasImmediateInput(instr, 1)) {
1163 __ shr(i.OutputOperand(), i.InputInt5(1));
1164 } else {
1165 __ shr_cl(i.OutputOperand());
1166 }
1167 break;
1168 case kIA32Sar:
1169 if (HasImmediateInput(instr, 1)) {
1170 __ sar(i.OutputOperand(), i.InputInt5(1));
1171 } else {
1172 __ sar_cl(i.OutputOperand());
1173 }
1174 break;
1175 case kIA32AddPair: {
1176 // i.OutputRegister(0) == i.InputRegister(0) ... left low word.
1177 // i.InputRegister(1) ... left high word.
1178 // i.InputRegister(2) ... right low word.
1179 // i.InputRegister(3) ... right high word.
1180 bool use_temp = false;
1181 if ((HasRegisterInput(instr, 1) &&
1182 i.OutputRegister(0).code() == i.InputRegister(1).code()) ||
1183 i.OutputRegister(0).code() == i.InputRegister(3).code()) {
1184 // We cannot write to the output register directly, because it would
1185 // overwrite an input for adc. We have to use the temp register.
1186 use_temp = true;
1187 __ Move(i.TempRegister(0), i.InputRegister(0));
1188 __ add(i.TempRegister(0), i.InputRegister(2));
1189 } else {
1190 __ add(i.OutputRegister(0), i.InputRegister(2));
1191 }
1192 i.MoveInstructionOperandToRegister(i.OutputRegister(1),
1193 instr->InputAt(1));
1194 __ adc(i.OutputRegister(1), Operand(i.InputRegister(3)));
1195 if (use_temp) {
1196 __ Move(i.OutputRegister(0), i.TempRegister(0));
1197 }
1198 break;
1199 }
1200 case kIA32SubPair: {
1201 // i.OutputRegister(0) == i.InputRegister(0) ... left low word.
1202 // i.InputRegister(1) ... left high word.
1203 // i.InputRegister(2) ... right low word.
1204 // i.InputRegister(3) ... right high word.
1205 bool use_temp = false;
1206 if ((HasRegisterInput(instr, 1) &&
1207 i.OutputRegister(0).code() == i.InputRegister(1).code()) ||
1208 i.OutputRegister(0).code() == i.InputRegister(3).code()) {
1209 // We cannot write to the output register directly, because it would
1210 // overwrite an input for adc. We have to use the temp register.
1211 use_temp = true;
1212 __ Move(i.TempRegister(0), i.InputRegister(0));
1213 __ sub(i.TempRegister(0), i.InputRegister(2));
1214 } else {
1215 __ sub(i.OutputRegister(0), i.InputRegister(2));
1216 }
1217 i.MoveInstructionOperandToRegister(i.OutputRegister(1),
1218 instr->InputAt(1));
1219 __ sbb(i.OutputRegister(1), Operand(i.InputRegister(3)));
1220 if (use_temp) {
1221 __ Move(i.OutputRegister(0), i.TempRegister(0));
1222 }
1223 break;
1224 }
1225 case kIA32MulPair: {
1226 __ imul(i.OutputRegister(1), i.InputOperand(0));
1227 i.MoveInstructionOperandToRegister(i.TempRegister(0), instr->InputAt(1));
1228 __ imul(i.TempRegister(0), i.InputOperand(2));
1229 __ add(i.OutputRegister(1), i.TempRegister(0));
1230 __ mov(i.OutputRegister(0), i.InputOperand(0));
1231 // Multiplies the low words and stores them in eax and edx.
1232 __ mul(i.InputRegister(2));
1233 __ add(i.OutputRegister(1), i.TempRegister(0));
1234
1235 break;
1236 }
1237 case kIA32ShlPair:
1238 if (HasImmediateInput(instr, 2)) {
1239 __ ShlPair(i.InputRegister(1), i.InputRegister(0), i.InputInt6(2));
1240 } else {
1241 // Shift has been loaded into CL by the register allocator.
1242 __ ShlPair_cl(i.InputRegister(1), i.InputRegister(0));
1243 }
1244 break;
1245 case kIA32ShrPair:
1246 if (HasImmediateInput(instr, 2)) {
1247 __ ShrPair(i.InputRegister(1), i.InputRegister(0), i.InputInt6(2));
1248 } else {
1249 // Shift has been loaded into CL by the register allocator.
1250 __ ShrPair_cl(i.InputRegister(1), i.InputRegister(0));
1251 }
1252 break;
1253 case kIA32SarPair:
1254 if (HasImmediateInput(instr, 2)) {
1255 __ SarPair(i.InputRegister(1), i.InputRegister(0), i.InputInt6(2));
1256 } else {
1257 // Shift has been loaded into CL by the register allocator.
1258 __ SarPair_cl(i.InputRegister(1), i.InputRegister(0));
1259 }
1260 break;
1261 case kIA32Rol:
1262 if (HasImmediateInput(instr, 1)) {
1263 __ rol(i.OutputOperand(), i.InputInt5(1));
1264 } else {
1265 __ rol_cl(i.OutputOperand());
1266 }
1267 break;
1268 case kIA32Ror:
1269 if (HasImmediateInput(instr, 1)) {
1270 __ ror(i.OutputOperand(), i.InputInt5(1));
1271 } else {
1272 __ ror_cl(i.OutputOperand());
1273 }
1274 break;
1275 case kIA32Lzcnt:
1276 __ Lzcnt(i.OutputRegister(), i.InputOperand(0));
1277 break;
1278 case kIA32Tzcnt:
1279 __ Tzcnt(i.OutputRegister(), i.InputOperand(0));
1280 break;
1281 case kIA32Popcnt:
1282 __ Popcnt(i.OutputRegister(), i.InputOperand(0));
1283 break;
1284 case kIA32Bswap:
1285 __ bswap(i.OutputRegister());
1286 break;
1287 case kIA32MFence:
1288 __ mfence();
1289 break;
1290 case kIA32LFence:
1291 __ lfence();
1292 break;
1293 case kIA32Float32Cmp:
1294 __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1295 break;
1296 case kIA32Float32Sqrt:
1297 __ Sqrtss(i.OutputDoubleRegister(), i.InputOperand(0));
1298 break;
1299 case kIA32Float32Round: {
1300 CpuFeatureScope sse_scope(masm(), SSE4_1);
1301 RoundingMode const mode =
1302 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1303 __ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1304 break;
1305 }
1306 case kIA32Float64Cmp:
1307 __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1308 break;
1309 case kIA32Float32Max: {
1310 Label compare_swap, done_compare;
1311 if (instr->InputAt(1)->IsFPRegister()) {
1312 __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1313 } else {
1314 __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1315 }
1316 auto ool =
1317 zone()->New<OutOfLineLoadFloat32NaN>(this, i.OutputDoubleRegister());
1318 __ j(parity_even, ool->entry());
1319 __ j(above, &done_compare, Label::kNear);
1320 __ j(below, &compare_swap, Label::kNear);
1321 __ Movmskps(i.TempRegister(0), i.InputDoubleRegister(0));
1322 __ test(i.TempRegister(0), Immediate(1));
1323 __ j(zero, &done_compare, Label::kNear);
1324 __ bind(&compare_swap);
1325 if (instr->InputAt(1)->IsFPRegister()) {
1326 __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1327 } else {
1328 __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1329 }
1330 __ bind(&done_compare);
1331 __ bind(ool->exit());
1332 break;
1333 }
1334
1335 case kIA32Float64Max: {
1336 Label compare_swap, done_compare;
1337 if (instr->InputAt(1)->IsFPRegister()) {
1338 __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1339 } else {
1340 __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1341 }
1342 auto ool =
1343 zone()->New<OutOfLineLoadFloat64NaN>(this, i.OutputDoubleRegister());
1344 __ j(parity_even, ool->entry());
1345 __ j(above, &done_compare, Label::kNear);
1346 __ j(below, &compare_swap, Label::kNear);
1347 __ Movmskpd(i.TempRegister(0), i.InputDoubleRegister(0));
1348 __ test(i.TempRegister(0), Immediate(1));
1349 __ j(zero, &done_compare, Label::kNear);
1350 __ bind(&compare_swap);
1351 if (instr->InputAt(1)->IsFPRegister()) {
1352 __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1353 } else {
1354 __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1355 }
1356 __ bind(&done_compare);
1357 __ bind(ool->exit());
1358 break;
1359 }
1360 case kIA32Float32Min: {
1361 Label compare_swap, done_compare;
1362 if (instr->InputAt(1)->IsFPRegister()) {
1363 __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1364 } else {
1365 __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1366 }
1367 auto ool =
1368 zone()->New<OutOfLineLoadFloat32NaN>(this, i.OutputDoubleRegister());
1369 __ j(parity_even, ool->entry());
1370 __ j(below, &done_compare, Label::kNear);
1371 __ j(above, &compare_swap, Label::kNear);
1372 if (instr->InputAt(1)->IsFPRegister()) {
1373 __ Movmskps(i.TempRegister(0), i.InputDoubleRegister(1));
1374 } else {
1375 __ Movss(kScratchDoubleReg, i.InputOperand(1));
1376 __ Movmskps(i.TempRegister(0), kScratchDoubleReg);
1377 }
1378 __ test(i.TempRegister(0), Immediate(1));
1379 __ j(zero, &done_compare, Label::kNear);
1380 __ bind(&compare_swap);
1381 if (instr->InputAt(1)->IsFPRegister()) {
1382 __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1383 } else {
1384 __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1385 }
1386 __ bind(&done_compare);
1387 __ bind(ool->exit());
1388 break;
1389 }
1390 case kIA32Float64Min: {
1391 Label compare_swap, done_compare;
1392 if (instr->InputAt(1)->IsFPRegister()) {
1393 __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1394 } else {
1395 __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1396 }
1397 auto ool =
1398 zone()->New<OutOfLineLoadFloat64NaN>(this, i.OutputDoubleRegister());
1399 __ j(parity_even, ool->entry());
1400 __ j(below, &done_compare, Label::kNear);
1401 __ j(above, &compare_swap, Label::kNear);
1402 if (instr->InputAt(1)->IsFPRegister()) {
1403 __ Movmskpd(i.TempRegister(0), i.InputDoubleRegister(1));
1404 } else {
1405 __ Movsd(kScratchDoubleReg, i.InputOperand(1));
1406 __ Movmskpd(i.TempRegister(0), kScratchDoubleReg);
1407 }
1408 __ test(i.TempRegister(0), Immediate(1));
1409 __ j(zero, &done_compare, Label::kNear);
1410 __ bind(&compare_swap);
1411 if (instr->InputAt(1)->IsFPRegister()) {
1412 __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1413 } else {
1414 __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1415 }
1416 __ bind(&done_compare);
1417 __ bind(ool->exit());
1418 break;
1419 }
1420 case kIA32Float64Mod: {
1421 Register tmp = i.TempRegister(1);
1422 __ mov(tmp, esp);
1423 __ AllocateStackSpace(kDoubleSize);
1424 __ and_(esp, -8); // align to 8 byte boundary.
1425 // Move values to st(0) and st(1).
1426 __ Movsd(Operand(esp, 0), i.InputDoubleRegister(1));
1427 __ fld_d(Operand(esp, 0));
1428 __ Movsd(Operand(esp, 0), i.InputDoubleRegister(0));
1429 __ fld_d(Operand(esp, 0));
1430 // Loop while fprem isn't done.
1431 Label mod_loop;
1432 __ bind(&mod_loop);
1433 // This instruction traps on all kinds of inputs, but we are assuming the
1434 // floating point control word is set to ignore them all.
1435 __ fprem();
1436 // fnstsw_ax clobbers eax.
1437 DCHECK_EQ(eax, i.TempRegister(0));
1438 __ fnstsw_ax();
1439 __ sahf();
1440 __ j(parity_even, &mod_loop);
1441 // Move output to stack and clean up.
1442 __ fstp(1);
1443 __ fstp_d(Operand(esp, 0));
1444 __ Movsd(i.OutputDoubleRegister(), Operand(esp, 0));
1445 __ mov(esp, tmp);
1446 break;
1447 }
1448 case kIA32Float64Sqrt:
1449 __ Sqrtsd(i.OutputDoubleRegister(), i.InputOperand(0));
1450 break;
1451 case kIA32Float64Round: {
1452 RoundingMode const mode =
1453 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1454 __ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1455 break;
1456 }
1457 case kIA32Float32ToFloat64:
1458 __ Cvtss2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1459 break;
1460 case kIA32Float64ToFloat32:
1461 __ Cvtsd2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1462 break;
1463 case kIA32Float32ToInt32:
1464 __ Cvttss2si(i.OutputRegister(), i.InputOperand(0));
1465 break;
1466 case kIA32Float32ToUint32:
1467 __ Cvttss2ui(i.OutputRegister(), i.InputOperand(0),
1468 i.TempSimd128Register(0));
1469 break;
1470 case kIA32Float64ToInt32:
1471 __ Cvttsd2si(i.OutputRegister(), i.InputOperand(0));
1472 break;
1473 case kIA32Float64ToUint32:
1474 __ Cvttsd2ui(i.OutputRegister(), i.InputOperand(0),
1475 i.TempSimd128Register(0));
1476 break;
1477 case kSSEInt32ToFloat32:
1478 // Calling Cvtsi2ss (which does a xor) regresses some benchmarks.
1479 __ cvtsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1480 break;
1481 case kIA32Uint32ToFloat32:
1482 __ Cvtui2ss(i.OutputDoubleRegister(), i.InputOperand(0),
1483 i.TempRegister(0));
1484 break;
1485 case kSSEInt32ToFloat64:
1486 // Calling Cvtsi2sd (which does a xor) regresses some benchmarks.
1487 __ cvtsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1488 break;
1489 case kIA32Uint32ToFloat64:
1490 __ Cvtui2sd(i.OutputDoubleRegister(), i.InputOperand(0),
1491 i.TempRegister(0));
1492 break;
1493 case kIA32Float64ExtractLowWord32:
1494 if (instr->InputAt(0)->IsFPStackSlot()) {
1495 __ mov(i.OutputRegister(), i.InputOperand(0));
1496 } else {
1497 __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
1498 }
1499 break;
1500 case kIA32Float64ExtractHighWord32:
1501 if (instr->InputAt(0)->IsFPStackSlot()) {
1502 __ mov(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
1503 } else {
1504 __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
1505 }
1506 break;
1507 case kIA32Float64InsertLowWord32:
1508 __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
1509 break;
1510 case kIA32Float64InsertHighWord32:
1511 __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
1512 break;
1513 case kIA32Float64FromWord32Pair:
1514 __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(0), 0);
1515 __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
1516 break;
1517 case kIA32Float64LoadLowWord32:
1518 __ Movd(i.OutputDoubleRegister(), i.InputOperand(0));
1519 break;
1520 case kFloat32Add: {
1521 __ Addss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1522 i.InputOperand(1));
1523 break;
1524 }
1525 case kFloat32Sub: {
1526 __ Subss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1527 i.InputOperand(1));
1528 break;
1529 }
1530 case kFloat32Mul: {
1531 __ Mulss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1532 i.InputOperand(1));
1533 break;
1534 }
1535 case kFloat32Div: {
1536 __ Divss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1537 i.InputOperand(1));
1538 // Don't delete this mov. It may improve performance on some CPUs,
1539 // when there is a (v)mulss depending on the result.
1540 __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1541 break;
1542 }
1543 case kFloat64Add: {
1544 __ Addsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1545 i.InputOperand(1));
1546 break;
1547 }
1548 case kFloat64Sub: {
1549 __ Subsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1550 i.InputOperand(1));
1551 break;
1552 }
1553 case kFloat64Mul: {
1554 __ Mulsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1555 i.InputOperand(1));
1556 break;
1557 }
1558 case kFloat64Div: {
1559 __ Divsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1560 i.InputOperand(1));
1561 // Don't delete this mov. It may improve performance on some CPUs,
1562 // when there is a (v)mulsd depending on the result.
1563 __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1564 break;
1565 }
1566 case kFloat32Abs: {
1567 __ Absps(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1568 i.TempRegister(0));
1569 break;
1570 }
1571 case kFloat32Neg: {
1572 __ Negps(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1573 i.TempRegister(0));
1574 break;
1575 }
1576 case kFloat64Abs: {
1577 __ Abspd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1578 i.TempRegister(0));
1579 break;
1580 }
1581 case kFloat64Neg: {
1582 __ Negpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1583 i.TempRegister(0));
1584 break;
1585 }
1586 case kIA32Float64SilenceNaN:
1588 __ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
1589 break;
1590 case kIA32Movsxbl:
1591 ASSEMBLE_MOVX(movsx_b);
1592 break;
1593 case kIA32Movzxbl:
1594 ASSEMBLE_MOVX(movzx_b);
1595 break;
1596 case kIA32Movb: {
1597 size_t index = 0;
1598 Operand operand = i.MemoryOperand(&index);
1599 if (HasImmediateInput(instr, index)) {
1600 __ mov_b(operand, i.InputInt8(index));
1601 } else {
1602 __ mov_b(operand, i.InputRegister(index));
1603 }
1604 break;
1605 }
1606 case kIA32Movsxwl:
1607 ASSEMBLE_MOVX(movsx_w);
1608 break;
1609 case kIA32Movzxwl:
1610 ASSEMBLE_MOVX(movzx_w);
1611 break;
1612 case kIA32Movw: {
1613 size_t index = 0;
1614 Operand operand = i.MemoryOperand(&index);
1615 if (HasImmediateInput(instr, index)) {
1616 __ mov_w(operand, i.InputInt16(index));
1617 } else {
1618 __ mov_w(operand, i.InputRegister(index));
1619 }
1620 break;
1621 }
1622 case kIA32Movl:
1623 if (instr->HasOutput()) {
1624 __ mov(i.OutputRegister(), i.MemoryOperand());
1625 } else {
1626 size_t index = 0;
1627 Operand operand = i.MemoryOperand(&index);
1628 if (HasImmediateInput(instr, index)) {
1629 __ Move(operand, i.InputImmediate(index));
1630 } else {
1631 __ mov(operand, i.InputRegister(index));
1632 }
1633 }
1634 break;
1635 case kIA32Movsd:
1636 if (instr->HasOutput()) {
1637 __ Movsd(i.OutputDoubleRegister(), i.MemoryOperand());
1638 } else {
1639 size_t index = 0;
1640 Operand operand = i.MemoryOperand(&index);
1641 __ Movsd(operand, i.InputDoubleRegister(index));
1642 }
1643 break;
1644 case kIA32Movss:
1645 if (instr->HasOutput()) {
1646 __ Movss(i.OutputDoubleRegister(), i.MemoryOperand());
1647 } else {
1648 size_t index = 0;
1649 Operand operand = i.MemoryOperand(&index);
1650 __ Movss(operand, i.InputDoubleRegister(index));
1651 }
1652 break;
1653 case kIA32Movdqu:
1654 if (instr->HasOutput()) {
1655 __ Movdqu(i.OutputSimd128Register(), i.MemoryOperand());
1656 } else {
1657 size_t index = 0;
1658 Operand operand = i.MemoryOperand(&index);
1659 __ Movdqu(operand, i.InputSimd128Register(index));
1660 }
1661 break;
1662 case kIA32BitcastFI:
1663 if (instr->InputAt(0)->IsFPStackSlot()) {
1664 __ mov(i.OutputRegister(), i.InputOperand(0));
1665 } else {
1666 __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
1667 }
1668 break;
1669 case kIA32BitcastIF:
1670 if (HasRegisterInput(instr, 0)) {
1671 __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
1672 } else {
1673 __ Movss(i.OutputDoubleRegister(), i.InputOperand(0));
1674 }
1675 break;
1676 case kIA32Lea: {
1678 // Shorten "leal" to "addl", "subl" or "shll" if the register allocation
1679 // and addressing mode just happens to work out. The "addl"/"subl" forms
1680 // in these cases are faster based on measurements.
1681 if (mode == kMode_MI) {
1682 __ Move(i.OutputRegister(), Immediate(i.InputInt32(0)));
1683 } else if (i.InputRegister(0) == i.OutputRegister()) {
1684 if (mode == kMode_MRI) {
1685 int32_t constant_summand = i.InputInt32(1);
1686 if (constant_summand > 0) {
1687 __ add(i.OutputRegister(), Immediate(constant_summand));
1688 } else if (constant_summand < 0) {
1689 __ sub(i.OutputRegister(),
1690 Immediate(base::NegateWithWraparound(constant_summand)));
1691 }
1692 } else if (mode == kMode_MR1) {
1693 if (i.InputRegister(1) == i.OutputRegister()) {
1694 __ shl(i.OutputRegister(), 1);
1695 } else {
1696 __ add(i.OutputRegister(), i.InputRegister(1));
1697 }
1698 } else if (mode == kMode_M2) {
1699 __ shl(i.OutputRegister(), 1);
1700 } else if (mode == kMode_M4) {
1701 __ shl(i.OutputRegister(), 2);
1702 } else if (mode == kMode_M8) {
1703 __ shl(i.OutputRegister(), 3);
1704 } else {
1705 __ lea(i.OutputRegister(), i.MemoryOperand());
1706 }
1707 } else if (mode == kMode_MR1 &&
1708 i.InputRegister(1) == i.OutputRegister()) {
1709 __ add(i.OutputRegister(), i.InputRegister(0));
1710 } else {
1711 __ lea(i.OutputRegister(), i.MemoryOperand());
1712 }
1713 break;
1714 }
1715 case kIA32Push: {
1716 int stack_decrement = i.InputInt32(0);
1717 int slots = stack_decrement / kSystemPointerSize;
1718 // Whenever codegen uses push, we need to check if stack_decrement
1719 // contains any extra padding and adjust the stack before the push.
1720 if (HasAddressingMode(instr)) {
1721 // Only single slot pushes from memory are supported.
1722 __ AllocateStackSpace(stack_decrement - kSystemPointerSize);
1723 size_t index = 1;
1724 Operand operand = i.MemoryOperand(&index);
1725 __ push(operand);
1726 } else if (HasImmediateInput(instr, 1)) {
1727 __ AllocateStackSpace(stack_decrement - kSystemPointerSize);
1728 __ push(i.InputImmediate(1));
1729 } else {
1730 InstructionOperand* input = instr->InputAt(1);
1731 if (input->IsRegister()) {
1732 __ AllocateStackSpace(stack_decrement - kSystemPointerSize);
1733 __ push(i.InputRegister(1));
1734 } else if (input->IsFloatRegister()) {
1735 DCHECK_GE(stack_decrement, kFloatSize);
1736 __ AllocateStackSpace(stack_decrement);
1737 __ Movss(Operand(esp, 0), i.InputDoubleRegister(1));
1738 } else if (input->IsDoubleRegister()) {
1739 DCHECK_GE(stack_decrement, kDoubleSize);
1740 __ AllocateStackSpace(stack_decrement);
1741 __ Movsd(Operand(esp, 0), i.InputDoubleRegister(1));
1742 } else if (input->IsSimd128Register()) {
1743 DCHECK_GE(stack_decrement, kSimd128Size);
1744 __ AllocateStackSpace(stack_decrement);
1745 // TODO(bbudge) Use Movaps when slots are aligned.
1746 __ Movups(Operand(esp, 0), i.InputSimd128Register(1));
1747 } else if (input->IsStackSlot() || input->IsFloatStackSlot()) {
1748 __ AllocateStackSpace(stack_decrement - kSystemPointerSize);
1749 __ push(i.InputOperand(1));
1750 } else if (input->IsDoubleStackSlot()) {
1751 DCHECK_GE(stack_decrement, kDoubleSize);
1752 __ Movsd(kScratchDoubleReg, i.InputOperand(1));
1753 __ AllocateStackSpace(stack_decrement);
1754 __ Movsd(Operand(esp, 0), kScratchDoubleReg);
1755 } else {
1756 DCHECK(input->IsSimd128StackSlot());
1757 DCHECK_GE(stack_decrement, kSimd128Size);
1758 // TODO(bbudge) Use Movaps when slots are aligned.
1759 __ Movups(kScratchDoubleReg, i.InputOperand(1));
1760 __ AllocateStackSpace(stack_decrement);
1761 __ Movups(Operand(esp, 0), kScratchDoubleReg);
1762 }
1763 }
1765 break;
1766 }
1767 case kIA32Poke: {
1768 int slot = MiscField::decode(instr->opcode());
1769 if (HasImmediateInput(instr, 0)) {
1770 __ mov(Operand(esp, slot * kSystemPointerSize), i.InputImmediate(0));
1771 } else {
1772 __ mov(Operand(esp, slot * kSystemPointerSize), i.InputRegister(0));
1773 }
1774 break;
1775 }
1776 case kIA32Peek: {
1777 int reverse_slot = i.InputInt32(0);
1778 int offset =
1779 FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
1780 if (instr->OutputAt(0)->IsFPRegister()) {
1781 LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
1782 if (op->representation() == MachineRepresentation::kFloat64) {
1783 __ Movsd(i.OutputDoubleRegister(), Operand(ebp, offset));
1784 } else if (op->representation() == MachineRepresentation::kFloat32) {
1785 __ Movss(i.OutputFloatRegister(), Operand(ebp, offset));
1786 } else {
1787 DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
1788 __ Movdqu(i.OutputSimd128Register(), Operand(ebp, offset));
1789 }
1790 } else {
1791 __ mov(i.OutputRegister(), Operand(ebp, offset));
1792 }
1793 break;
1794 }
1795 case kIA32F64x2Splat: {
1796 __ Movddup(i.OutputSimd128Register(), i.InputDoubleRegister(0));
1797 break;
1798 }
1799 case kIA32F64x2ExtractLane: {
1800 __ F64x2ExtractLane(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1801 i.InputUint8(1));
1802 break;
1803 }
1804 case kIA32F64x2ReplaceLane: {
1805 __ F64x2ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
1806 i.InputDoubleRegister(2), i.InputInt8(1));
1807 break;
1808 }
1809 case kIA32F64x2Sqrt: {
1810 __ Sqrtpd(i.OutputSimd128Register(), i.InputOperand(0));
1811 break;
1812 }
1813 case kIA32F64x2Add: {
1814 __ Addpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1815 i.InputOperand(1));
1816 break;
1817 }
1818 case kIA32F64x2Sub: {
1819 __ Subpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1820 i.InputOperand(1));
1821 break;
1822 }
1823 case kIA32F64x2Mul: {
1824 __ Mulpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1825 i.InputOperand(1));
1826 break;
1827 }
1828 case kIA32F64x2Div: {
1829 __ Divpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1830 i.InputOperand(1));
1831 break;
1832 }
1833 case kIA32F64x2Min: {
1834 __ F64x2Min(i.OutputSimd128Register(), i.InputSimd128Register(0),
1835 i.InputSimd128Register(1), kScratchDoubleReg);
1836 break;
1837 }
1838 case kIA32F64x2Max: {
1839 __ F64x2Max(i.OutputSimd128Register(), i.InputSimd128Register(0),
1840 i.InputSimd128Register(1), kScratchDoubleReg);
1841 break;
1842 }
1843 case kIA32F64x2Eq: {
1844 __ Cmpeqpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
1845 i.InputOperand(1));
1846 break;
1847 }
1848 case kIA32F64x2Ne: {
1849 __ Cmpneqpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
1850 i.InputOperand(1));
1851 break;
1852 }
1853 case kIA32F64x2Lt: {
1854 __ Cmpltpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
1855 i.InputOperand(1));
1856 break;
1857 }
1858 case kIA32F64x2Le: {
1859 __ Cmplepd(i.OutputSimd128Register(), i.InputSimd128Register(0),
1860 i.InputOperand(1));
1861 break;
1862 }
1863 case kIA32F64x2Qfma: {
1864 __ F64x2Qfma(i.OutputSimd128Register(), i.InputSimd128Register(0),
1865 i.InputSimd128Register(1), i.InputSimd128Register(2),
1867 break;
1868 }
1869 case kIA32F64x2Qfms: {
1870 __ F64x2Qfms(i.OutputSimd128Register(), i.InputSimd128Register(0),
1871 i.InputSimd128Register(1), i.InputSimd128Register(2),
1873 break;
1874 }
1875 case kIA32Minpd: {
1876 __ Minpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
1877 i.InputSimd128Register(1));
1878 break;
1879 }
1880 case kIA32Maxpd: {
1881 __ Maxpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
1882 i.InputSimd128Register(1));
1883 break;
1884 }
1885 case kIA32F64x2Round: {
1886 RoundingMode const mode =
1887 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1888 __ Roundpd(i.OutputSimd128Register(), i.InputDoubleRegister(0), mode);
1889 break;
1890 }
1891 case kIA32F64x2PromoteLowF32x4: {
1892 if (HasAddressingMode(instr)) {
1893 __ Cvtps2pd(i.OutputSimd128Register(), i.MemoryOperand());
1894 } else {
1895 __ Cvtps2pd(i.OutputSimd128Register(), i.InputSimd128Register(0));
1896 }
1897 break;
1898 }
1899 case kIA32F32x4DemoteF64x2Zero: {
1900 __ Cvtpd2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
1901 break;
1902 }
1903 case kIA32I32x4TruncSatF64x2SZero: {
1904 __ I32x4TruncSatF64x2SZero(i.OutputSimd128Register(),
1905 i.InputSimd128Register(0), kScratchDoubleReg,
1906 i.TempRegister(0));
1907 break;
1908 }
1909 case kIA32I32x4TruncSatF64x2UZero: {
1910 __ I32x4TruncSatF64x2UZero(i.OutputSimd128Register(),
1911 i.InputSimd128Register(0), kScratchDoubleReg,
1912 i.TempRegister(0));
1913 break;
1914 }
1915 case kIA32F64x2ConvertLowI32x4S: {
1916 __ Cvtdq2pd(i.OutputSimd128Register(), i.InputSimd128Register(0));
1917 break;
1918 }
1919 case kIA32F64x2ConvertLowI32x4U: {
1920 __ F64x2ConvertLowI32x4U(i.OutputSimd128Register(),
1921 i.InputSimd128Register(0), i.TempRegister(0));
1922 break;
1923 }
1924 case kIA32I64x2ExtMulLowI32x4S: {
1925 __ I64x2ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
1926 i.InputSimd128Register(1), kScratchDoubleReg,
1927 /*low=*/true, /*is_signed=*/true);
1928 break;
1929 }
1930 case kIA32I64x2ExtMulHighI32x4S: {
1931 __ I64x2ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
1932 i.InputSimd128Register(1), kScratchDoubleReg,
1933 /*low=*/false, /*is_signed=*/true);
1934 break;
1935 }
1936 case kIA32I64x2ExtMulLowI32x4U: {
1937 __ I64x2ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
1938 i.InputSimd128Register(1), kScratchDoubleReg,
1939 /*low=*/true, /*is_signed=*/false);
1940 break;
1941 }
1942 case kIA32I64x2ExtMulHighI32x4U: {
1943 __ I64x2ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
1944 i.InputSimd128Register(1), kScratchDoubleReg,
1945 /*low=*/false, /*is_signed=*/false);
1946 break;
1947 }
1948 case kIA32I32x4ExtMulLowI16x8S: {
1949 __ I32x4ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
1950 i.InputSimd128Register(1), kScratchDoubleReg,
1951 /*low=*/true, /*is_signed=*/true);
1952 break;
1953 }
1954 case kIA32I32x4ExtMulHighI16x8S: {
1955 __ I32x4ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
1956 i.InputSimd128Register(1), kScratchDoubleReg,
1957 /*low=*/false, /*is_signed=*/true);
1958 break;
1959 }
1960 case kIA32I32x4ExtMulLowI16x8U: {
1961 __ I32x4ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
1962 i.InputSimd128Register(1), kScratchDoubleReg,
1963 /*low=*/true, /*is_signed=*/false);
1964 break;
1965 }
1966 case kIA32I32x4ExtMulHighI16x8U: {
1967 __ I32x4ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
1968 i.InputSimd128Register(1), kScratchDoubleReg,
1969 /*low=*/false, /*is_signed=*/false);
1970 break;
1971 }
1972 case kIA32I16x8ExtMulLowI8x16S: {
1973 __ I16x8ExtMulLow(i.OutputSimd128Register(), i.InputSimd128Register(0),
1974 i.InputSimd128Register(1), kScratchDoubleReg,
1975 /*is_signed=*/true);
1976 break;
1977 }
1978 case kIA32I16x8ExtMulHighI8x16S: {
1979 __ I16x8ExtMulHighS(i.OutputSimd128Register(), i.InputSimd128Register(0),
1980 i.InputSimd128Register(1), kScratchDoubleReg);
1981 break;
1982 }
1983 case kIA32I16x8ExtMulLowI8x16U: {
1984 __ I16x8ExtMulLow(i.OutputSimd128Register(), i.InputSimd128Register(0),
1985 i.InputSimd128Register(1), kScratchDoubleReg,
1986 /*is_signed=*/false);
1987 break;
1988 }
1989 case kIA32I16x8ExtMulHighI8x16U: {
1990 __ I16x8ExtMulHighU(i.OutputSimd128Register(), i.InputSimd128Register(0),
1991 i.InputSimd128Register(1), kScratchDoubleReg);
1992 break;
1993 }
1994 case kIA32I64x2SplatI32Pair: {
1995 XMMRegister dst = i.OutputSimd128Register();
1996 __ Pinsrd(dst, i.InputRegister(0), 0);
1997 __ Pinsrd(dst, i.InputOperand(1), 1);
1998 __ Pshufd(dst, dst, uint8_t{0x44});
1999 break;
2000 }
2001 case kIA32I64x2ReplaceLaneI32Pair: {
2002 int8_t lane = i.InputInt8(1);
2003 __ Pinsrd(i.OutputSimd128Register(), i.InputOperand(2), lane * 2);
2004 __ Pinsrd(i.OutputSimd128Register(), i.InputOperand(3), lane * 2 + 1);
2005 break;
2006 }
2007 case kIA32I64x2Abs: {
2008 __ I64x2Abs(i.OutputSimd128Register(), i.InputSimd128Register(0),
2010 break;
2011 }
2012 case kIA32I64x2Neg: {
2013 __ I64x2Neg(i.OutputSimd128Register(), i.InputSimd128Register(0),
2015 break;
2016 }
2017 case kIA32I64x2Shl: {
2018 ASSEMBLE_SIMD_SHIFT(Psllq, 6);
2019 break;
2020 }
2021 case kIA32I64x2ShrS: {
2022 XMMRegister dst = i.OutputSimd128Register();
2023 XMMRegister src = i.InputSimd128Register(0);
2024 if (HasImmediateInput(instr, 1)) {
2025 __ I64x2ShrS(dst, src, i.InputInt6(1), kScratchDoubleReg);
2026 } else {
2027 __ I64x2ShrS(dst, src, i.InputRegister(1), kScratchDoubleReg,
2028 i.TempSimd128Register(0), i.TempRegister(1));
2029 }
2030 break;
2031 }
2032 case kIA32I64x2Add: {
2033 __ Paddq(i.OutputSimd128Register(), i.InputSimd128Register(0),
2034 i.InputOperand(1));
2035 break;
2036 }
2037 case kIA32I64x2Sub: {
2038 __ Psubq(i.OutputSimd128Register(), i.InputSimd128Register(0),
2039 i.InputOperand(1));
2040 break;
2041 }
2042 case kIA32I64x2Mul: {
2043 __ I64x2Mul(i.OutputSimd128Register(), i.InputSimd128Register(0),
2044 i.InputSimd128Register(1), i.TempSimd128Register(0),
2045 i.TempSimd128Register(1));
2046 break;
2047 }
2048 case kIA32I64x2ShrU: {
2049 ASSEMBLE_SIMD_SHIFT(Psrlq, 6);
2050 break;
2051 }
2052 case kIA32I64x2BitMask: {
2053 __ Movmskpd(i.OutputRegister(), i.InputSimd128Register(0));
2054 break;
2055 }
2056 case kIA32I64x2Eq: {
2057 __ Pcmpeqq(i.OutputSimd128Register(), i.InputSimd128Register(0),
2058 i.InputOperand(1));
2059 break;
2060 }
2061 case kIA32I64x2Ne: {
2062 __ Pcmpeqq(i.OutputSimd128Register(), i.InputSimd128Register(0),
2063 i.InputOperand(1));
2065 __ Pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2066 break;
2067 }
2068 case kIA32I64x2GtS: {
2069 __ I64x2GtS(i.OutputSimd128Register(), i.InputSimd128Register(0),
2070 i.InputSimd128Register(1), kScratchDoubleReg);
2071 break;
2072 }
2073 case kIA32I64x2GeS: {
2074 __ I64x2GeS(i.OutputSimd128Register(), i.InputSimd128Register(0),
2075 i.InputSimd128Register(1), kScratchDoubleReg);
2076 break;
2077 }
2078 case kIA32I64x2SConvertI32x4Low: {
2079 __ Pmovsxdq(i.OutputSimd128Register(), i.InputSimd128Register(0));
2080 break;
2081 }
2082 case kIA32I64x2SConvertI32x4High: {
2083 __ I64x2SConvertI32x4High(i.OutputSimd128Register(),
2084 i.InputSimd128Register(0));
2085 break;
2086 }
2087 case kIA32I64x2UConvertI32x4Low: {
2088 __ Pmovzxdq(i.OutputSimd128Register(), i.InputSimd128Register(0));
2089 break;
2090 }
2091 case kIA32I64x2UConvertI32x4High: {
2092 __ I64x2UConvertI32x4High(i.OutputSimd128Register(),
2093 i.InputSimd128Register(0), kScratchDoubleReg);
2094 break;
2095 }
2096 case kIA32I32x4ExtAddPairwiseI16x8S: {
2097 __ I32x4ExtAddPairwiseI16x8S(i.OutputSimd128Register(),
2098 i.InputSimd128Register(0),
2099 i.TempRegister(0));
2100 break;
2101 }
2102 case kIA32I32x4ExtAddPairwiseI16x8U: {
2103 __ I32x4ExtAddPairwiseI16x8U(i.OutputSimd128Register(),
2104 i.InputSimd128Register(0),
2106 break;
2107 }
2108 case kIA32I16x8ExtAddPairwiseI8x16S: {
2109 __ I16x8ExtAddPairwiseI8x16S(i.OutputSimd128Register(),
2110 i.InputSimd128Register(0), kScratchDoubleReg,
2111 i.TempRegister(0));
2112 break;
2113 }
2114 case kIA32I16x8ExtAddPairwiseI8x16U: {
2115 __ I16x8ExtAddPairwiseI8x16U(i.OutputSimd128Register(),
2116 i.InputSimd128Register(0),
2117 i.TempRegister(0));
2118 break;
2119 }
2120 case kIA32I16x8Q15MulRSatS: {
2121 __ I16x8Q15MulRSatS(i.OutputSimd128Register(), i.InputSimd128Register(0),
2122 i.InputSimd128Register(1), kScratchDoubleReg);
2123 break;
2124 }
2125 case kIA32I16x8RelaxedQ15MulRS: {
2126 __ Pmulhrsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2127 i.InputSimd128Register(1));
2128 break;
2129 }
2130 case kIA32I16x8DotI8x16I7x16S: {
2131 __ I16x8DotI8x16I7x16S(i.OutputSimd128Register(),
2132 i.InputSimd128Register(0),
2133 i.InputSimd128Register(1));
2134 break;
2135 }
2136 case kIA32I32x4DotI8x16I7x16AddS: {
2137 __ I32x4DotI8x16I7x16AddS(
2138 i.OutputSimd128Register(), i.InputSimd128Register(0),
2139 i.InputSimd128Register(1), i.InputSimd128Register(2),
2140 kScratchDoubleReg, i.TempSimd128Register(0));
2141 break;
2142 }
2143 case kIA32F32x4Splat: {
2144 __ F32x4Splat(i.OutputSimd128Register(), i.InputDoubleRegister(0));
2145 break;
2146 }
2147 case kIA32F32x4ExtractLane: {
2148 __ F32x4ExtractLane(i.OutputFloatRegister(), i.InputSimd128Register(0),
2149 i.InputUint8(1));
2150 break;
2151 }
2152 case kIA32Insertps: {
2153 if (CpuFeatures::IsSupported(AVX)) {
2154 CpuFeatureScope avx_scope(masm(), AVX);
2155 __ vinsertps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2156 i.InputOperand(2), i.InputInt8(1) << 4);
2157 } else {
2158 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2159 CpuFeatureScope sse_scope(masm(), SSE4_1);
2160 __ insertps(i.OutputSimd128Register(), i.InputOperand(2),
2161 i.InputInt8(1) << 4);
2162 }
2163 break;
2164 }
2165 case kIA32F32x4SConvertI32x4: {
2166 __ Cvtdq2ps(i.OutputSimd128Register(), i.InputOperand(0));
2167 break;
2168 }
2169 case kIA32F32x4UConvertI32x4: {
2170 XMMRegister dst = i.OutputSimd128Register();
2171 XMMRegister src = i.InputSimd128Register(0);
2172 __ Pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros
2173 __ Pblendw(kScratchDoubleReg, src, uint8_t{0x55}); // get lo 16 bits
2174 __ Psubd(dst, src, kScratchDoubleReg); // get hi 16 bits
2175 __ Cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
2176 __ Psrld(dst, dst, uint8_t{1}); // divide by 2 to get in unsigned range
2177 __ Cvtdq2ps(dst, dst); // convert hi exactly
2178 __ Addps(dst, dst, dst); // double hi, exactly
2179 __ Addps(dst, dst, kScratchDoubleReg); // add hi and lo, may round.
2180 break;
2181 }
2182 case kIA32F32x4Sqrt: {
2183 __ Sqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2184 break;
2185 }
2186 case kIA32F32x4Add: {
2187 __ Addps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2188 i.InputOperand(1));
2189 break;
2190 };
2191 case kIA32F32x4Sub: {
2192 __ Subps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2193 i.InputOperand(1));
2194 break;
2195 }
2196 case kIA32F32x4Mul: {
2197 __ Mulps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2198 i.InputOperand(1));
2199 break;
2200 }
2201 case kIA32F32x4Div: {
2202 __ Divps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2203 i.InputOperand(1));
2204 break;
2205 }
2206 case kIA32F32x4Min: {
2207 __ F32x4Min(i.OutputSimd128Register(), i.InputSimd128Register(0),
2208 i.InputSimd128Register(1), kScratchDoubleReg);
2209 break;
2210 }
2211 case kIA32F32x4Max: {
2212 __ F32x4Max(i.OutputSimd128Register(), i.InputSimd128Register(0),
2213 i.InputSimd128Register(1), kScratchDoubleReg);
2214 break;
2215 }
2216 case kIA32F32x4Eq: {
2217 __ Cmpeqps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2218 i.InputOperand(1));
2219 break;
2220 }
2221 case kIA32F32x4Ne: {
2222 __ Cmpneqps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2223 i.InputOperand(1));
2224 break;
2225 }
2226 case kIA32F32x4Lt: {
2227 __ Cmpltps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2228 i.InputOperand(1));
2229 break;
2230 }
2231 case kIA32F32x4Le: {
2232 __ Cmpleps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2233 i.InputOperand(1));
2234 break;
2235 }
2236 case kIA32F32x4Qfma: {
2237 __ F32x4Qfma(i.OutputSimd128Register(), i.InputSimd128Register(0),
2238 i.InputSimd128Register(1), i.InputSimd128Register(2),
2240 break;
2241 }
2242 case kIA32F32x4Qfms: {
2243 __ F32x4Qfms(i.OutputSimd128Register(), i.InputSimd128Register(0),
2244 i.InputSimd128Register(1), i.InputSimd128Register(2),
2246 break;
2247 }
2248 case kIA32Minps: {
2249 __ Minps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2250 i.InputSimd128Register(1));
2251 break;
2252 }
2253 case kIA32Maxps: {
2254 __ Maxps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2255 i.InputSimd128Register(1));
2256 break;
2257 }
2258 case kIA32F32x4Round: {
2259 RoundingMode const mode =
2260 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
2261 __ Roundps(i.OutputSimd128Register(), i.InputDoubleRegister(0), mode);
2262 break;
2263 }
2264 case kIA32I32x4Splat: {
2265 XMMRegister dst = i.OutputSimd128Register();
2266 __ Movd(dst, i.InputOperand(0));
2267 __ Pshufd(dst, dst, uint8_t{0x0});
2268 break;
2269 }
2270 case kIA32I32x4ExtractLane: {
2271 __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
2272 break;
2273 }
2274 case kIA32I32x4SConvertF32x4: {
2275 __ I32x4SConvertF32x4(i.OutputSimd128Register(),
2276 i.InputSimd128Register(0), kScratchDoubleReg,
2277 i.TempRegister(0));
2278 break;
2279 }
2280 case kIA32I32x4SConvertI16x8Low: {
2281 __ Pmovsxwd(i.OutputSimd128Register(), i.InputOperand(0));
2282 break;
2283 }
2284 case kIA32I32x4SConvertI16x8High: {
2285 __ I32x4SConvertI16x8High(i.OutputSimd128Register(),
2286 i.InputSimd128Register(0));
2287 break;
2288 }
2289 case kIA32I32x4Neg: {
2290 XMMRegister dst = i.OutputSimd128Register();
2291 Operand src = i.InputOperand(0);
2292 if (src.is_reg(dst)) {
2294 __ Psignd(dst, kScratchDoubleReg);
2295 } else {
2296 __ Pxor(dst, dst);
2297 __ Psubd(dst, src);
2298 }
2299 break;
2300 }
2301 case kIA32I32x4Shl: {
2302 ASSEMBLE_SIMD_SHIFT(Pslld, 5);
2303 break;
2304 }
2305 case kIA32I32x4ShrS: {
2306 ASSEMBLE_SIMD_SHIFT(Psrad, 5);
2307 break;
2308 }
2309 case kIA32I32x4Add: {
2310 __ Paddd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2311 i.InputOperand(1));
2312 break;
2313 }
2314 case kIA32I32x4Sub: {
2315 __ Psubd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2316 i.InputOperand(1));
2317 break;
2318 }
2319 case kIA32I32x4Mul: {
2320 __ Pmulld(i.OutputSimd128Register(), i.InputSimd128Register(0),
2321 i.InputOperand(1));
2322 break;
2323 }
2324 case kIA32I32x4MinS: {
2325 __ Pminsd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2326 i.InputOperand(1));
2327 break;
2328 }
2329 case kIA32I32x4MaxS: {
2330 __ Pmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2331 i.InputOperand(1));
2332 break;
2333 }
2334 case kIA32I32x4Eq: {
2335 __ Pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2336 i.InputOperand(1));
2337 break;
2338 }
2339 case kIA32I32x4Ne: {
2340 __ Pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2341 i.InputOperand(1));
2343 __ Pxor(i.OutputSimd128Register(), i.OutputSimd128Register(),
2345 break;
2346 }
2347 case kIA32I32x4GtS: {
2348 __ Pcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2349 i.InputOperand(1));
2350 break;
2351 }
2352 case kIA32I32x4GeS: {
2353 XMMRegister dst = i.OutputSimd128Register();
2354 XMMRegister src1 = i.InputSimd128Register(0);
2355 XMMRegister src2 = i.InputSimd128Register(1);
2356 if (CpuFeatures::IsSupported(AVX)) {
2357 CpuFeatureScope avx_scope(masm(), AVX);
2358 __ vpminsd(kScratchDoubleReg, src1, src2);
2359 __ vpcmpeqd(dst, kScratchDoubleReg, src2);
2360 } else {
2361 DCHECK_EQ(dst, src1);
2362 CpuFeatureScope sse_scope(masm(), SSE4_1);
2363 __ pminsd(dst, src2);
2364 __ pcmpeqd(dst, src2);
2365 }
2366 break;
2367 }
2368 case kSSEI32x4UConvertF32x4: {
2369 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2370 CpuFeatureScope sse_scope(masm(), SSE4_1);
2371 XMMRegister dst = i.OutputSimd128Register();
2372 XMMRegister tmp = i.TempSimd128Register(0);
2373 XMMRegister tmp2 = i.TempSimd128Register(1);
2374 __ I32x4TruncF32x4U(dst, dst, tmp, tmp2);
2375 break;
2376 }
2377 case kAVXI32x4UConvertF32x4: {
2378 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2379 CpuFeatureScope avx_scope(masm(), AVX);
2380 XMMRegister dst = i.OutputSimd128Register();
2381 XMMRegister tmp = i.TempSimd128Register(0);
2382 // NAN->0, negative->0
2384 __ vmaxps(dst, dst, kScratchDoubleReg);
2385 // scratch: float representation of max_signed
2387 __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1); // 0x7fffffff
2388 __ vcvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000
2389 // tmp: convert (src-max_signed).
2390 // Positive overflow lanes -> 0x7FFFFFFF
2391 // Negative lanes -> 0
2392 __ vsubps(tmp, dst, kScratchDoubleReg);
2393 __ vcmpleps(kScratchDoubleReg, kScratchDoubleReg, tmp);
2394 __ vcvttps2dq(tmp, tmp);
2395 __ vpxor(tmp, tmp, kScratchDoubleReg);
2397 __ vpmaxsd(tmp, tmp, kScratchDoubleReg);
2398 // convert. Overflow lanes above max_signed will be 0x80000000
2399 __ vcvttps2dq(dst, dst);
2400 // Add (src-max_signed) for overflow lanes.
2401 __ vpaddd(dst, dst, tmp);
2402 break;
2403 }
2404 case kIA32I32x4UConvertI16x8Low: {
2405 __ Pmovzxwd(i.OutputSimd128Register(), i.InputOperand(0));
2406 break;
2407 }
2408 case kIA32I32x4UConvertI16x8High: {
2409 __ I32x4UConvertI16x8High(i.OutputSimd128Register(),
2410 i.InputSimd128Register(0), kScratchDoubleReg);
2411 break;
2412 }
2413 case kIA32I32x4ShrU: {
2414 ASSEMBLE_SIMD_SHIFT(Psrld, 5);
2415 break;
2416 }
2417 case kIA32I32x4MinU: {
2418 __ Pminud(i.OutputSimd128Register(), i.InputSimd128Register(0),
2419 i.InputOperand(1));
2420 break;
2421 }
2422 case kIA32I32x4MaxU: {
2423 __ Pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(0),
2424 i.InputOperand(1));
2425 break;
2426 }
2427 case kSSEI32x4GtU: {
2428 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2429 CpuFeatureScope sse_scope(masm(), SSE4_1);
2430 XMMRegister dst = i.OutputSimd128Register();
2431 Operand src = i.InputOperand(1);
2432 __ pmaxud(dst, src);
2433 __ pcmpeqd(dst, src);
2435 __ xorps(dst, kScratchDoubleReg);
2436 break;
2437 }
2438 case kAVXI32x4GtU: {
2439 CpuFeatureScope avx_scope(masm(), AVX);
2440 XMMRegister dst = i.OutputSimd128Register();
2441 XMMRegister src1 = i.InputSimd128Register(0);
2442 Operand src2 = i.InputOperand(1);
2443 __ vpmaxud(kScratchDoubleReg, src1, src2);
2444 __ vpcmpeqd(dst, kScratchDoubleReg, src2);
2446 __ vpxor(dst, dst, kScratchDoubleReg);
2447 break;
2448 }
2449 case kSSEI32x4GeU: {
2450 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2451 CpuFeatureScope sse_scope(masm(), SSE4_1);
2452 XMMRegister dst = i.OutputSimd128Register();
2453 Operand src = i.InputOperand(1);
2454 __ pminud(dst, src);
2455 __ pcmpeqd(dst, src);
2456 break;
2457 }
2458 case kAVXI32x4GeU: {
2459 CpuFeatureScope avx_scope(masm(), AVX);
2460 XMMRegister src1 = i.InputSimd128Register(0);
2461 Operand src2 = i.InputOperand(1);
2462 __ vpminud(kScratchDoubleReg, src1, src2);
2463 __ vpcmpeqd(i.OutputSimd128Register(), kScratchDoubleReg, src2);
2464 break;
2465 }
2466 case kIA32I32x4Abs: {
2467 __ Pabsd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2468 break;
2469 }
2470 case kIA32I32x4BitMask: {
2471 __ Movmskps(i.OutputRegister(), i.InputSimd128Register(0));
2472 break;
2473 }
2474 case kIA32I32x4DotI16x8S: {
2475 __ Pmaddwd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2476 i.InputOperand(1));
2477 break;
2478 }
2479 case kIA32I16x8Splat: {
2480 if (instr->InputAt(0)->IsRegister()) {
2481 __ I16x8Splat(i.OutputSimd128Register(), i.InputRegister(0));
2482 } else {
2483 __ I16x8Splat(i.OutputSimd128Register(), i.InputOperand(0));
2484 }
2485 break;
2486 }
2487 case kIA32I16x8ExtractLaneS: {
2488 Register dst = i.OutputRegister();
2489 __ Pextrw(dst, i.InputSimd128Register(0), i.InputUint8(1));
2490 __ movsx_w(dst, dst);
2491 break;
2492 }
2493 case kIA32I16x8SConvertI8x16Low: {
2494 __ Pmovsxbw(i.OutputSimd128Register(), i.InputOperand(0));
2495 break;
2496 }
2497 case kIA32I16x8SConvertI8x16High: {
2498 __ I16x8SConvertI8x16High(i.OutputSimd128Register(),
2499 i.InputSimd128Register(0));
2500 break;
2501 }
2502 case kIA32I16x8Neg: {
2503 XMMRegister dst = i.OutputSimd128Register();
2504 Operand src = i.InputOperand(0);
2505 if (src.is_reg(dst)) {
2507 __ Psignw(dst, kScratchDoubleReg);
2508 } else {
2509 __ Pxor(dst, dst);
2510 __ Psubw(dst, src);
2511 }
2512 break;
2513 }
2514 case kIA32I16x8Shl: {
2515 ASSEMBLE_SIMD_SHIFT(Psllw, 4);
2516 break;
2517 }
2518 case kIA32I16x8ShrS: {
2519 ASSEMBLE_SIMD_SHIFT(Psraw, 4);
2520 break;
2521 }
2522 case kIA32I16x8SConvertI32x4: {
2523 __ Packssdw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2524 i.InputOperand(1));
2525 break;
2526 }
2527 case kIA32I16x8Add: {
2528 __ Paddw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2529 i.InputOperand(1));
2530 break;
2531 }
2532 case kIA32I16x8AddSatS: {
2533 __ Paddsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2534 i.InputOperand(1));
2535 break;
2536 }
2537 case kIA32I16x8Sub: {
2538 __ Psubw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2539 i.InputOperand(1));
2540 break;
2541 }
2542 case kIA32I16x8SubSatS: {
2543 __ Psubsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2544 i.InputOperand(1));
2545 break;
2546 }
2547 case kIA32I16x8Mul: {
2548 __ Pmullw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2549 i.InputOperand(1));
2550 break;
2551 }
2552 case kIA32I16x8MinS: {
2553 __ Pminsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2554 i.InputOperand(1));
2555 break;
2556 }
2557 case kIA32I16x8MaxS: {
2558 __ Pmaxsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2559 i.InputOperand(1));
2560 break;
2561 }
2562 case kIA32I16x8Eq: {
2563 __ Pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2564 i.InputOperand(1));
2565 break;
2566 }
2567 case kSSEI16x8Ne: {
2568 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2569 __ pcmpeqw(i.OutputSimd128Register(), i.InputOperand(1));
2571 __ xorps(i.OutputSimd128Register(), kScratchDoubleReg);
2572 break;
2573 }
2574 case kAVXI16x8Ne: {
2575 CpuFeatureScope avx_scope(masm(), AVX);
2576 __ vpcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2577 i.InputOperand(1));
2579 __ vpxor(i.OutputSimd128Register(), i.OutputSimd128Register(),
2581 break;
2582 }
2583 case kIA32I16x8GtS: {
2584 __ Pcmpgtw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2585 i.InputOperand(1));
2586 break;
2587 }
2588 case kSSEI16x8GeS: {
2589 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2590 XMMRegister dst = i.OutputSimd128Register();
2591 Operand src = i.InputOperand(1);
2592 __ pminsw(dst, src);
2593 __ pcmpeqw(dst, src);
2594 break;
2595 }
2596 case kAVXI16x8GeS: {
2597 CpuFeatureScope avx_scope(masm(), AVX);
2598 XMMRegister src1 = i.InputSimd128Register(0);
2599 Operand src2 = i.InputOperand(1);
2600 __ vpminsw(kScratchDoubleReg, src1, src2);
2601 __ vpcmpeqw(i.OutputSimd128Register(), kScratchDoubleReg, src2);
2602 break;
2603 }
2604 case kIA32I16x8UConvertI8x16Low: {
2605 __ Pmovzxbw(i.OutputSimd128Register(), i.InputOperand(0));
2606 break;
2607 }
2608 case kIA32I16x8UConvertI8x16High: {
2609 __ I16x8UConvertI8x16High(i.OutputSimd128Register(),
2610 i.InputSimd128Register(0), kScratchDoubleReg);
2611 break;
2612 }
2613 case kIA32I16x8ShrU: {
2614 ASSEMBLE_SIMD_SHIFT(Psrlw, 4);
2615 break;
2616 }
2617 case kIA32I16x8UConvertI32x4: {
2618 __ Packusdw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2619 i.InputSimd128Register(1));
2620 break;
2621 }
2622 case kIA32I16x8AddSatU: {
2623 __ Paddusw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2624 i.InputOperand(1));
2625 break;
2626 }
2627 case kIA32I16x8SubSatU: {
2628 __ Psubusw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2629 i.InputOperand(1));
2630 break;
2631 }
2632 case kIA32I16x8MinU: {
2633 __ Pminuw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2634 i.InputOperand(1));
2635 break;
2636 }
2637 case kIA32I16x8MaxU: {
2638 __ Pmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2639 i.InputOperand(1));
2640 break;
2641 }
2642 case kSSEI16x8GtU: {
2643 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2644 CpuFeatureScope sse_scope(masm(), SSE4_1);
2645 XMMRegister dst = i.OutputSimd128Register();
2646 Operand src = i.InputOperand(1);
2647 __ pmaxuw(dst, src);
2648 __ pcmpeqw(dst, src);
2650 __ xorps(dst, kScratchDoubleReg);
2651 break;
2652 }
2653 case kAVXI16x8GtU: {
2654 CpuFeatureScope avx_scope(masm(), AVX);
2655 XMMRegister dst = i.OutputSimd128Register();
2656 XMMRegister src1 = i.InputSimd128Register(0);
2657 Operand src2 = i.InputOperand(1);
2658 __ vpmaxuw(kScratchDoubleReg, src1, src2);
2659 __ vpcmpeqw(dst, kScratchDoubleReg, src2);
2661 __ vpxor(dst, dst, kScratchDoubleReg);
2662 break;
2663 }
2664 case kSSEI16x8GeU: {
2665 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2666 CpuFeatureScope sse_scope(masm(), SSE4_1);
2667 XMMRegister dst = i.OutputSimd128Register();
2668 Operand src = i.InputOperand(1);
2669 __ pminuw(dst, src);
2670 __ pcmpeqw(dst, src);
2671 break;
2672 }
2673 case kAVXI16x8GeU: {
2674 CpuFeatureScope avx_scope(masm(), AVX);
2675 XMMRegister src1 = i.InputSimd128Register(0);
2676 Operand src2 = i.InputOperand(1);
2677 __ vpminuw(kScratchDoubleReg, src1, src2);
2678 __ vpcmpeqw(i.OutputSimd128Register(), kScratchDoubleReg, src2);
2679 break;
2680 }
2681 case kIA32I16x8RoundingAverageU: {
2682 __ Pavgw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2683 i.InputOperand(1));
2684 break;
2685 }
2686 case kIA32I16x8Abs: {
2687 __ Pabsw(i.OutputSimd128Register(), i.InputSimd128Register(0));
2688 break;
2689 }
2690 case kIA32I16x8BitMask: {
2691 Register dst = i.OutputRegister();
2692 XMMRegister tmp = i.TempSimd128Register(0);
2693 __ Packsswb(tmp, i.InputSimd128Register(0));
2694 __ Pmovmskb(dst, tmp);
2695 __ shr(dst, 8);
2696 break;
2697 }
2698 case kIA32I8x16Splat: {
2699 if (instr->InputAt(0)->IsRegister()) {
2700 __ I8x16Splat(i.OutputSimd128Register(), i.InputRegister(0),
2702 } else {
2703 __ I8x16Splat(i.OutputSimd128Register(), i.InputOperand(0),
2705 }
2706 break;
2707 }
2708 case kIA32I8x16ExtractLaneS: {
2709 Register dst = i.OutputRegister();
2710 __ Pextrb(dst, i.InputSimd128Register(0), i.InputUint8(1));
2711 __ movsx_b(dst, dst);
2712 break;
2713 }
2714 case kIA32Pinsrb: {
2715 ASSEMBLE_SIMD_PINSR(pinsrb, SSE4_1);
2716 break;
2717 }
2718 case kIA32Pinsrw: {
2719 ASSEMBLE_SIMD_PINSR(pinsrw, SSE4_1);
2720 break;
2721 }
2722 case kIA32Pinsrd: {
2723 ASSEMBLE_SIMD_PINSR(pinsrd, SSE4_1);
2724 break;
2725 }
2726 case kIA32Movlps: {
2727 if (instr->HasOutput()) {
2728 __ Movlps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2729 i.MemoryOperand(2));
2730 } else {
2731 size_t index = 0;
2732 Operand dst = i.MemoryOperand(&index);
2733 __ Movlps(dst, i.InputSimd128Register(index));
2734 }
2735 break;
2736 }
2737 case kIA32Movhps: {
2738 if (instr->HasOutput()) {
2739 __ Movhps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2740 i.MemoryOperand(2));
2741 } else {
2742 size_t index = 0;
2743 Operand dst = i.MemoryOperand(&index);
2744 __ Movhps(dst, i.InputSimd128Register(index));
2745 }
2746 break;
2747 }
2748 case kIA32Pextrb: {
2749 if (HasAddressingMode(instr)) {
2750 size_t index = 0;
2751 Operand operand = i.MemoryOperand(&index);
2752 __ Pextrb(operand, i.InputSimd128Register(index),
2753 i.InputUint8(index + 1));
2754 } else {
2755 Register dst = i.OutputRegister();
2756 __ Pextrb(dst, i.InputSimd128Register(0), i.InputUint8(1));
2757 }
2758 break;
2759 }
2760 case kIA32Pextrw: {
2761 if (HasAddressingMode(instr)) {
2762 size_t index = 0;
2763 Operand operand = i.MemoryOperand(&index);
2764 __ Pextrw(operand, i.InputSimd128Register(index),
2765 i.InputUint8(index + 1));
2766 } else {
2767 Register dst = i.OutputRegister();
2768 __ Pextrw(dst, i.InputSimd128Register(0), i.InputUint8(1));
2769 }
2770 break;
2771 }
2772 case kIA32S128Store32Lane: {
2773 size_t index = 0;
2774 Operand operand = i.MemoryOperand(&index);
2775 uint8_t laneidx = i.InputUint8(index + 1);
2776 __ S128Store32Lane(operand, i.InputSimd128Register(index), laneidx);
2777 break;
2778 }
2779 case kIA32I8x16SConvertI16x8: {
2780 __ Packsswb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2781 i.InputOperand(1));
2782 break;
2783 }
2784 case kIA32I8x16Neg: {
2785 XMMRegister dst = i.OutputSimd128Register();
2786 Operand src = i.InputOperand(0);
2787 if (src.is_reg(dst)) {
2789 __ Psignb(dst, kScratchDoubleReg);
2790 } else {
2791 __ Pxor(dst, dst);
2792 __ Psubb(dst, src);
2793 }
2794 break;
2795 }
2796 case kIA32I8x16Shl: {
2797 XMMRegister dst = i.OutputSimd128Register();
2798 XMMRegister src = i.InputSimd128Register(0);
2799 DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
2800 Register tmp = i.TempRegister(0);
2801
2802 if (HasImmediateInput(instr, 1)) {
2803 __ I8x16Shl(dst, src, i.InputInt3(1), tmp, kScratchDoubleReg);
2804 } else {
2805 XMMRegister tmp_simd = i.TempSimd128Register(1);
2806 __ I8x16Shl(dst, src, i.InputRegister(1), tmp, kScratchDoubleReg,
2807 tmp_simd);
2808 }
2809 break;
2810 }
2811 case kIA32I8x16ShrS: {
2812 XMMRegister dst = i.OutputSimd128Register();
2813 XMMRegister src = i.InputSimd128Register(0);
2814 DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
2815
2816 if (HasImmediateInput(instr, 1)) {
2817 __ I8x16ShrS(dst, src, i.InputInt3(1), kScratchDoubleReg);
2818 } else {
2819 __ I8x16ShrS(dst, src, i.InputRegister(1), i.TempRegister(0),
2820 kScratchDoubleReg, i.TempSimd128Register(1));
2821 }
2822 break;
2823 }
2824 case kIA32I8x16Add: {
2825 __ Paddb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2826 i.InputOperand(1));
2827 break;
2828 }
2829 case kIA32I8x16AddSatS: {
2830 __ Paddsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2831 i.InputOperand(1));
2832 break;
2833 }
2834 case kIA32I8x16Sub: {
2835 __ Psubb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2836 i.InputOperand(1));
2837 break;
2838 }
2839 case kIA32I8x16SubSatS: {
2840 __ Psubsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2841 i.InputOperand(1));
2842 break;
2843 }
2844 case kIA32I8x16MinS: {
2845 __ Pminsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2846 i.InputOperand(1));
2847 break;
2848 }
2849 case kIA32I8x16MaxS: {
2850 __ Pmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2851 i.InputOperand(1));
2852 break;
2853 }
2854 case kIA32I8x16Eq: {
2855 __ Pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2856 i.InputOperand(1));
2857 break;
2858 }
2859 case kSSEI8x16Ne: {
2860 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2861 __ pcmpeqb(i.OutputSimd128Register(), i.InputOperand(1));
2863 __ xorps(i.OutputSimd128Register(), kScratchDoubleReg);
2864 break;
2865 }
2866 case kAVXI8x16Ne: {
2867 CpuFeatureScope avx_scope(masm(), AVX);
2868 __ vpcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2869 i.InputOperand(1));
2871 __ vpxor(i.OutputSimd128Register(), i.OutputSimd128Register(),
2873 break;
2874 }
2875 case kIA32I8x16GtS: {
2876 __ Pcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2877 i.InputOperand(1));
2878 break;
2879 }
2880 case kSSEI8x16GeS: {
2881 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2882 CpuFeatureScope sse_scope(masm(), SSE4_1);
2883 XMMRegister dst = i.OutputSimd128Register();
2884 Operand src = i.InputOperand(1);
2885 __ pminsb(dst, src);
2886 __ pcmpeqb(dst, src);
2887 break;
2888 }
2889 case kAVXI8x16GeS: {
2890 CpuFeatureScope avx_scope(masm(), AVX);
2891 XMMRegister src1 = i.InputSimd128Register(0);
2892 Operand src2 = i.InputOperand(1);
2893 __ vpminsb(kScratchDoubleReg, src1, src2);
2894 __ vpcmpeqb(i.OutputSimd128Register(), kScratchDoubleReg, src2);
2895 break;
2896 }
2897 case kIA32I8x16UConvertI16x8: {
2898 __ Packuswb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2899 i.InputSimd128Register(1));
2900 break;
2901 }
2902 case kIA32I8x16AddSatU: {
2903 __ Paddusb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2904 i.InputOperand(1));
2905 break;
2906 }
2907 case kIA32I8x16SubSatU: {
2908 __ Psubusb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2909 i.InputOperand(1));
2910 break;
2911 }
2912 case kIA32I8x16ShrU: {
2913 XMMRegister dst = i.OutputSimd128Register();
2914 XMMRegister src = i.InputSimd128Register(0);
2915 DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
2916 Register tmp = i.TempRegister(0);
2917
2918 if (HasImmediateInput(instr, 1)) {
2919 __ I8x16ShrU(dst, src, i.InputInt3(1), tmp, kScratchDoubleReg);
2920 } else {
2921 __ I8x16ShrU(dst, src, i.InputRegister(1), tmp, kScratchDoubleReg,
2922 i.TempSimd128Register(1));
2923 }
2924
2925 break;
2926 }
2927 case kIA32I8x16MinU: {
2928 __ Pminub(i.OutputSimd128Register(), i.InputSimd128Register(0),
2929 i.InputOperand(1));
2930 break;
2931 }
2932 case kIA32I8x16MaxU: {
2933 __ Pmaxub(i.OutputSimd128Register(), i.InputSimd128Register(0),
2934 i.InputOperand(1));
2935 break;
2936 }
2937 case kSSEI8x16GtU: {
2938 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2939 XMMRegister dst = i.OutputSimd128Register();
2940 Operand src = i.InputOperand(1);
2941 __ pmaxub(dst, src);
2942 __ pcmpeqb(dst, src);
2944 __ xorps(dst, kScratchDoubleReg);
2945 break;
2946 }
2947 case kAVXI8x16GtU: {
2948 CpuFeatureScope avx_scope(masm(), AVX);
2949 XMMRegister dst = i.OutputSimd128Register();
2950 XMMRegister src1 = i.InputSimd128Register(0);
2951 Operand src2 = i.InputOperand(1);
2952 __ vpmaxub(kScratchDoubleReg, src1, src2);
2953 __ vpcmpeqb(dst, kScratchDoubleReg, src2);
2955 __ vpxor(dst, dst, kScratchDoubleReg);
2956 break;
2957 }
2958 case kSSEI8x16GeU: {
2959 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2960 XMMRegister dst = i.OutputSimd128Register();
2961 Operand src = i.InputOperand(1);
2962 __ pminub(dst, src);
2963 __ pcmpeqb(dst, src);
2964 break;
2965 }
2966 case kAVXI8x16GeU: {
2967 CpuFeatureScope avx_scope(masm(), AVX);
2968 XMMRegister src1 = i.InputSimd128Register(0);
2969 Operand src2 = i.InputOperand(1);
2970 __ vpminub(kScratchDoubleReg, src1, src2);
2971 __ vpcmpeqb(i.OutputSimd128Register(), kScratchDoubleReg, src2);
2972 break;
2973 }
2974 case kIA32I8x16RoundingAverageU: {
2975 __ Pavgb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2976 i.InputOperand(1));
2977 break;
2978 }
2979 case kIA32I8x16Abs: {
2980 __ Pabsb(i.OutputSimd128Register(), i.InputSimd128Register(0));
2981 break;
2982 }
2983 case kIA32I8x16BitMask: {
2984 __ Pmovmskb(i.OutputRegister(), i.InputSimd128Register(0));
2985 break;
2986 }
2987 case kIA32I8x16Popcnt: {
2988 __ I8x16Popcnt(i.OutputSimd128Register(), i.InputSimd128Register(0),
2989 kScratchDoubleReg, i.TempSimd128Register(0),
2990 i.TempRegister(1));
2991 break;
2992 }
2993 case kIA32S128Const: {
2994 XMMRegister dst = i.OutputSimd128Register();
2995 Register tmp = i.TempRegister(0);
2996 uint64_t low_qword = make_uint64(i.InputUint32(1), i.InputUint32(0));
2997 __ Move(dst, low_qword);
2998 __ Move(tmp, Immediate(i.InputUint32(2)));
2999 __ Pinsrd(dst, tmp, 2);
3000 __ Move(tmp, Immediate(i.InputUint32(3)));
3001 __ Pinsrd(dst, tmp, 3);
3002 break;
3003 }
3004 case kIA32S128Zero: {
3005 XMMRegister dst = i.OutputSimd128Register();
3006 __ Pxor(dst, dst);
3007 break;
3008 }
3009 case kIA32S128AllOnes: {
3010 XMMRegister dst = i.OutputSimd128Register();
3011 __ Pcmpeqd(dst, dst);
3012 break;
3013 }
3014 case kIA32S128Not: {
3015 __ S128Not(i.OutputSimd128Register(), i.InputSimd128Register(0),
3017 break;
3018 }
3019 case kIA32S128And: {
3020 __ Pand(i.OutputSimd128Register(), i.InputSimd128Register(0),
3021 i.InputOperand(1));
3022 break;
3023 }
3024 case kIA32S128Or: {
3025 __ Por(i.OutputSimd128Register(), i.InputSimd128Register(0),
3026 i.InputOperand(1));
3027 break;
3028 }
3029 case kIA32S128Xor: {
3030 __ Pxor(i.OutputSimd128Register(), i.InputSimd128Register(0),
3031 i.InputOperand(1));
3032 break;
3033 }
3034 case kIA32S128Select: {
3035 __ S128Select(i.OutputSimd128Register(), i.InputSimd128Register(0),
3036 i.InputSimd128Register(1), i.InputSimd128Register(2),
3038 break;
3039 }
3040 case kIA32S128AndNot: {
3041 // The inputs have been inverted by instruction selector, so we can call
3042 // andnps here without any modifications.
3043 __ Andnps(i.OutputSimd128Register(), i.InputSimd128Register(0),
3044 i.InputSimd128Register(1));
3045 break;
3046 }
3047 case kIA32I8x16Swizzle: {
3048 __ I8x16Swizzle(i.OutputSimd128Register(), i.InputSimd128Register(0),
3049 i.InputSimd128Register(1), kScratchDoubleReg,
3050 i.TempRegister(0), MiscField::decode(instr->opcode()));
3051 break;
3052 }
3053 case kIA32I8x16Shuffle: {
3054 XMMRegister dst = i.OutputSimd128Register();
3055 Operand src0 = i.InputOperand(0);
3056 Register tmp = i.TempRegister(0);
3057 // Prepare 16 byte aligned buffer for shuffle control mask
3058 __ mov(tmp, esp);
3059 __ and_(esp, -16);
3060 if (instr->InputCount() == 5) { // only one input operand
3061 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3062 for (int j = 4; j > 0; j--) {
3063 uint32_t mask = i.InputUint32(j);
3064 __ push(Immediate(mask));
3065 }
3066 __ Pshufb(dst, Operand(esp, 0));
3067 } else { // two input operands
3068 DCHECK_EQ(6, instr->InputCount());
3069 __ Movups(kScratchDoubleReg, src0);
3070 for (int j = 5; j > 1; j--) {
3071 uint32_t lanes = i.InputUint32(j);
3072 uint32_t mask = 0;
3073 for (int k = 0; k < 32; k += 8) {
3074 uint8_t lane = lanes >> k;
3075 mask |= (lane < kSimd128Size ? lane : 0x80) << k;
3076 }
3077 __ push(Immediate(mask));
3078 }
3079 __ Pshufb(kScratchDoubleReg, Operand(esp, 0));
3080 Operand src1 = i.InputOperand(1);
3081 if (!src1.is_reg(dst)) __ Movups(dst, src1);
3082 for (int j = 5; j > 1; j--) {
3083 uint32_t lanes = i.InputUint32(j);
3084 uint32_t mask = 0;
3085 for (int k = 0; k < 32; k += 8) {
3086 uint8_t lane = lanes >> k;
3087 mask |= (lane >= kSimd128Size ? (lane & 0xF) : 0x80) << k;
3088 }
3089 __ push(Immediate(mask));
3090 }
3091 __ Pshufb(dst, Operand(esp, 0));
3092 __ por(dst, kScratchDoubleReg);
3093 }
3094 __ mov(esp, tmp);
3095 break;
3096 }
3097 case kIA32S128Load8Splat: {
3098 __ S128Load8Splat(i.OutputSimd128Register(), i.MemoryOperand(),
3100 break;
3101 }
3102 case kIA32S128Load16Splat: {
3103 __ S128Load16Splat(i.OutputSimd128Register(), i.MemoryOperand(),
3105 break;
3106 }
3107 case kIA32S128Load32Splat: {
3108 __ S128Load32Splat(i.OutputSimd128Register(), i.MemoryOperand());
3109 break;
3110 }
3111 case kIA32S128Load64Splat: {
3112 __ Movddup(i.OutputSimd128Register(), i.MemoryOperand());
3113 break;
3114 }
3115 case kIA32S128Load8x8S: {
3116 __ Pmovsxbw(i.OutputSimd128Register(), i.MemoryOperand());
3117 break;
3118 }
3119 case kIA32S128Load8x8U: {
3120 __ Pmovzxbw(i.OutputSimd128Register(), i.MemoryOperand());
3121 break;
3122 }
3123 case kIA32S128Load16x4S: {
3124 __ Pmovsxwd(i.OutputSimd128Register(), i.MemoryOperand());
3125 break;
3126 }
3127 case kIA32S128Load16x4U: {
3128 __ Pmovzxwd(i.OutputSimd128Register(), i.MemoryOperand());
3129 break;
3130 }
3131 case kIA32S128Load32x2S: {
3132 __ Pmovsxdq(i.OutputSimd128Register(), i.MemoryOperand());
3133 break;
3134 }
3135 case kIA32S128Load32x2U: {
3136 __ Pmovzxdq(i.OutputSimd128Register(), i.MemoryOperand());
3137 break;
3138 }
3139 case kIA32S32x4Rotate: {
3140 XMMRegister dst = i.OutputSimd128Register();
3141 XMMRegister src = i.InputSimd128Register(0);
3142 uint8_t mask = i.InputUint8(1);
3143 if (dst == src) {
3144 // 1-byte shorter encoding than pshufd.
3145 __ Shufps(dst, src, src, mask);
3146 } else {
3147 __ Pshufd(dst, src, mask);
3148 }
3149 break;
3150 }
3151 case kIA32S32x4Swizzle: {
3152 DCHECK_EQ(2, instr->InputCount());
3153 __ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), i.InputUint8(1));
3154 break;
3155 }
3156 case kIA32S32x4Shuffle: {
3157 DCHECK_EQ(4, instr->InputCount()); // Swizzles should be handled above.
3158 uint8_t shuffle = i.InputUint8(2);
3159 DCHECK_NE(0xe4, shuffle); // A simple blend should be handled below.
3160 __ Pshufd(kScratchDoubleReg, i.InputOperand(1), shuffle);
3161 __ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), shuffle);
3162 __ Pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputUint8(3));
3163 break;
3164 }
3165 case kIA32S16x8Blend:
3166 ASSEMBLE_SIMD_IMM_SHUFFLE(pblendw, SSE4_1, i.InputInt8(2));
3167 break;
3168 case kIA32S16x8HalfShuffle1: {
3169 XMMRegister dst = i.OutputSimd128Register();
3170 __ Pshuflw(dst, i.InputOperand(0), i.InputUint8(1));
3171 __ Pshufhw(dst, dst, i.InputUint8(2));
3172 break;
3173 }
3174 case kIA32S16x8HalfShuffle2: {
3175 XMMRegister dst = i.OutputSimd128Register();
3176 __ Pshuflw(kScratchDoubleReg, i.InputOperand(1), i.InputUint8(2));
3177 __ Pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputUint8(3));
3178 __ Pshuflw(dst, i.InputOperand(0), i.InputUint8(2));
3179 __ Pshufhw(dst, dst, i.InputUint8(3));
3180 __ Pblendw(dst, kScratchDoubleReg, i.InputUint8(4));
3181 break;
3182 }
3183 case kIA32S8x16Alignr:
3184 ASSEMBLE_SIMD_IMM_SHUFFLE(palignr, SSSE3, i.InputInt8(2));
3185 break;
3186 case kIA32S16x8Dup: {
3187 XMMRegister dst = i.OutputSimd128Register();
3188 Operand src = i.InputOperand(0);
3189 uint8_t lane = i.InputUint8(1) & 0x7;
3190 uint8_t lane4 = lane & 0x3;
3191 uint8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3192 if (lane < 4) {
3193 __ Pshuflw(dst, src, half_dup);
3194 __ Punpcklqdq(dst, dst);
3195 } else {
3196 __ Pshufhw(dst, src, half_dup);
3197 __ Punpckhqdq(dst, dst);
3198 }
3199 break;
3200 }
3201 case kIA32S8x16Dup: {
3202 XMMRegister dst = i.OutputSimd128Register();
3203 XMMRegister src = i.InputSimd128Register(0);
3204 uint8_t lane = i.InputUint8(1) & 0xf;
3205 if (CpuFeatures::IsSupported(AVX)) {
3206 CpuFeatureScope avx_scope(masm(), AVX);
3207 if (lane < 8) {
3208 __ vpunpcklbw(dst, src, src);
3209 } else {
3210 __ vpunpckhbw(dst, src, src);
3211 }
3212 } else {
3213 DCHECK_EQ(dst, src);
3214 if (lane < 8) {
3215 __ punpcklbw(dst, dst);
3216 } else {
3217 __ punpckhbw(dst, dst);
3218 }
3219 }
3220 lane &= 0x7;
3221 uint8_t lane4 = lane & 0x3;
3222 uint8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3223 if (lane < 4) {
3224 __ Pshuflw(dst, dst, half_dup);
3225 __ Punpcklqdq(dst, dst);
3226 } else {
3227 __ Pshufhw(dst, dst, half_dup);
3228 __ Punpckhqdq(dst, dst);
3229 }
3230 break;
3231 }
3232 case kIA32S64x2UnpackHigh:
3233 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhqdq);
3234 break;
3235 case kIA32S32x4UnpackHigh:
3237 break;
3238 case kIA32S16x8UnpackHigh:
3240 break;
3241 case kIA32S8x16UnpackHigh:
3243 break;
3244 case kIA32S64x2UnpackLow:
3245 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklqdq);
3246 break;
3247 case kIA32S32x4UnpackLow:
3249 break;
3250 case kIA32S16x8UnpackLow:
3252 break;
3253 case kIA32S8x16UnpackLow:
3255 break;
3256 case kSSES16x8UnzipHigh: {
3257 CpuFeatureScope sse_scope(masm(), SSE4_1);
3258 XMMRegister dst = i.OutputSimd128Register();
3259 XMMRegister src2 = dst;
3260 DCHECK_EQ(dst, i.InputSimd128Register(0));
3261 if (instr->InputCount() == 2) {
3262 __ movups(kScratchDoubleReg, i.InputOperand(1));
3263 __ psrld(kScratchDoubleReg, 16);
3264 src2 = kScratchDoubleReg;
3265 }
3266 __ psrld(dst, 16);
3267 __ packusdw(dst, src2);
3268 break;
3269 }
3270 case kAVXS16x8UnzipHigh: {
3271 CpuFeatureScope avx_scope(masm(), AVX);
3272 XMMRegister dst = i.OutputSimd128Register();
3273 XMMRegister src2 = dst;
3274 if (instr->InputCount() == 2) {
3275 __ vpsrld(kScratchDoubleReg, i.InputSimd128Register(1), 16);
3276 src2 = kScratchDoubleReg;
3277 }
3278 __ vpsrld(dst, i.InputSimd128Register(0), 16);
3279 __ vpackusdw(dst, dst, src2);
3280 break;
3281 }
3282 case kSSES16x8UnzipLow: {
3283 CpuFeatureScope sse_scope(masm(), SSE4_1);
3284 XMMRegister dst = i.OutputSimd128Register();
3285 XMMRegister src2 = dst;
3286 DCHECK_EQ(dst, i.InputSimd128Register(0));
3288 if (instr->InputCount() == 2) {
3289 __ pblendw(kScratchDoubleReg, i.InputOperand(1), 0x55);
3290 src2 = kScratchDoubleReg;
3291 }
3292 __ pblendw(dst, kScratchDoubleReg, 0xaa);
3293 __ packusdw(dst, src2);
3294 break;
3295 }
3296 case kAVXS16x8UnzipLow: {
3297 CpuFeatureScope avx_scope(masm(), AVX);
3298 XMMRegister dst = i.OutputSimd128Register();
3299 XMMRegister src2 = dst;
3301 if (instr->InputCount() == 2) {
3302 __ vpblendw(kScratchDoubleReg, kScratchDoubleReg, i.InputOperand(1),
3303 0x55);
3304 src2 = kScratchDoubleReg;
3305 }
3306 __ vpblendw(dst, kScratchDoubleReg, i.InputSimd128Register(0), 0x55);
3307 __ vpackusdw(dst, dst, src2);
3308 break;
3309 }
3310 case kSSES8x16UnzipHigh: {
3311 XMMRegister dst = i.OutputSimd128Register();
3312 XMMRegister src2 = dst;
3313 DCHECK_EQ(dst, i.InputSimd128Register(0));
3314 if (instr->InputCount() == 2) {
3315 __ movups(kScratchDoubleReg, i.InputOperand(1));
3316 __ psrlw(kScratchDoubleReg, 8);
3317 src2 = kScratchDoubleReg;
3318 }
3319 __ psrlw(dst, 8);
3320 __ packuswb(dst, src2);
3321 break;
3322 }
3323 case kAVXS8x16UnzipHigh: {
3324 CpuFeatureScope avx_scope(masm(), AVX);
3325 XMMRegister dst = i.OutputSimd128Register();
3326 XMMRegister src2 = dst;
3327 if (instr->InputCount() == 2) {
3328 __ vpsrlw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
3329 src2 = kScratchDoubleReg;
3330 }
3331 __ vpsrlw(dst, i.InputSimd128Register(0), 8);
3332 __ vpackuswb(dst, dst, src2);
3333 break;
3334 }
3335 case kSSES8x16UnzipLow: {
3336 XMMRegister dst = i.OutputSimd128Register();
3337 XMMRegister src2 = dst;
3338 DCHECK_EQ(dst, i.InputSimd128Register(0));
3339 if (instr->InputCount() == 2) {
3340 __ movups(kScratchDoubleReg, i.InputOperand(1));
3341 __ psllw(kScratchDoubleReg, 8);
3342 __ psrlw(kScratchDoubleReg, 8);
3343 src2 = kScratchDoubleReg;
3344 }
3345 __ psllw(dst, 8);
3346 __ psrlw(dst, 8);
3347 __ packuswb(dst, src2);
3348 break;
3349 }
3350 case kAVXS8x16UnzipLow: {
3351 CpuFeatureScope avx_scope(masm(), AVX);
3352 XMMRegister dst = i.OutputSimd128Register();
3353 XMMRegister src2 = dst;
3354 if (instr->InputCount() == 2) {
3355 __ vpsllw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
3357 src2 = kScratchDoubleReg;
3358 }
3359 __ vpsllw(dst, i.InputSimd128Register(0), 8);
3360 __ vpsrlw(dst, dst, 8);
3361 __ vpackuswb(dst, dst, src2);
3362 break;
3363 }
3364 case kSSES8x16TransposeLow: {
3365 XMMRegister dst = i.OutputSimd128Register();
3366 DCHECK_EQ(dst, i.InputSimd128Register(0));
3367 __ psllw(dst, 8);
3368 if (instr->InputCount() == 1) {
3369 __ movups(kScratchDoubleReg, dst);
3370 } else {
3371 DCHECK_EQ(2, instr->InputCount());
3372 __ movups(kScratchDoubleReg, i.InputOperand(1));
3373 __ psllw(kScratchDoubleReg, 8);
3374 }
3375 __ psrlw(dst, 8);
3376 __ orps(dst, kScratchDoubleReg);
3377 break;
3378 }
3379 case kAVXS8x16TransposeLow: {
3380 CpuFeatureScope avx_scope(masm(), AVX);
3381 XMMRegister dst = i.OutputSimd128Register();
3382 if (instr->InputCount() == 1) {
3383 __ vpsllw(kScratchDoubleReg, i.InputSimd128Register(0), 8);
3384 __ vpsrlw(dst, kScratchDoubleReg, 8);
3385 } else {
3386 DCHECK_EQ(2, instr->InputCount());
3387 __ vpsllw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
3388 __ vpsllw(dst, i.InputSimd128Register(0), 8);
3389 __ vpsrlw(dst, dst, 8);
3390 }
3391 __ vpor(dst, dst, kScratchDoubleReg);
3392 break;
3393 }
3394 case kSSES8x16TransposeHigh: {
3395 XMMRegister dst = i.OutputSimd128Register();
3396 DCHECK_EQ(dst, i.InputSimd128Register(0));
3397 __ psrlw(dst, 8);
3398 if (instr->InputCount() == 1) {
3399 __ movups(kScratchDoubleReg, dst);
3400 } else {
3401 DCHECK_EQ(2, instr->InputCount());
3402 __ movups(kScratchDoubleReg, i.InputOperand(1));
3403 __ psrlw(kScratchDoubleReg, 8);
3404 }
3405 __ psllw(kScratchDoubleReg, 8);
3406 __ orps(dst, kScratchDoubleReg);
3407 break;
3408 }
3409 case kAVXS8x16TransposeHigh: {
3410 CpuFeatureScope avx_scope(masm(), AVX);
3411 XMMRegister dst = i.OutputSimd128Register();
3412 if (instr->InputCount() == 1) {
3413 __ vpsrlw(dst, i.InputSimd128Register(0), 8);
3414 __ vpsllw(kScratchDoubleReg, dst, 8);
3415 } else {
3416 DCHECK_EQ(2, instr->InputCount());
3417 __ vpsrlw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
3418 __ vpsrlw(dst, i.InputSimd128Register(0), 8);
3420 }
3421 __ vpor(dst, dst, kScratchDoubleReg);
3422 break;
3423 }
3424 case kSSES8x8Reverse:
3425 case kSSES8x4Reverse:
3426 case kSSES8x2Reverse: {
3427 DCHECK_EQ(1, instr->InputCount());
3428 XMMRegister dst = i.OutputSimd128Register();
3429 DCHECK_EQ(dst, i.InputSimd128Register(0));
3430 if (arch_opcode != kSSES8x2Reverse) {
3431 // First shuffle words into position.
3432 int8_t shuffle_mask = arch_opcode == kSSES8x4Reverse ? 0xB1 : 0x1B;
3433 __ pshuflw(dst, dst, shuffle_mask);
3434 __ pshufhw(dst, dst, shuffle_mask);
3435 }
3436 __ movaps(kScratchDoubleReg, dst);
3437 __ psrlw(kScratchDoubleReg, 8);
3438 __ psllw(dst, 8);
3439 __ orps(dst, kScratchDoubleReg);
3440 break;
3441 }
3442 case kAVXS8x2Reverse:
3443 case kAVXS8x4Reverse:
3444 case kAVXS8x8Reverse: {
3445 DCHECK_EQ(1, instr->InputCount());
3446 CpuFeatureScope avx_scope(masm(), AVX);
3447 XMMRegister dst = i.OutputSimd128Register();
3448 XMMRegister src = dst;
3449 if (arch_opcode != kAVXS8x2Reverse) {
3450 // First shuffle words into position.
3451 int8_t shuffle_mask = arch_opcode == kAVXS8x4Reverse ? 0xB1 : 0x1B;
3452 __ vpshuflw(dst, i.InputOperand(0), shuffle_mask);
3453 __ vpshufhw(dst, dst, shuffle_mask);
3454 } else {
3455 src = i.InputSimd128Register(0);
3456 }
3457 // Reverse each 16 bit lane.
3458 __ vpsrlw(kScratchDoubleReg, src, 8);
3459 __ vpsllw(dst, src, 8);
3460 __ vpor(dst, dst, kScratchDoubleReg);
3461 break;
3462 }
3463 case kIA32S128AnyTrue: {
3464 Register dst = i.OutputRegister();
3465 XMMRegister src = i.InputSimd128Register(0);
3466 Register tmp = i.TempRegister(0);
3467 __ xor_(tmp, tmp);
3468 __ mov(dst, Immediate(1));
3469 __ Ptest(src, src);
3470 __ cmov(zero, dst, tmp);
3471 break;
3472 }
3473 // Need to split up all the different lane structures because the
3474 // comparison instruction used matters, e.g. given 0xff00, pcmpeqb returns
3475 // 0x0011, pcmpeqw returns 0x0000, ptest will set ZF to 0 and 1
3476 // respectively.
3477 case kIA32I64x2AllTrue:
3478 ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqq);
3479 break;
3480 case kIA32I32x4AllTrue:
3481 ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqd);
3482 break;
3483 case kIA32I16x8AllTrue:
3484 ASSEMBLE_SIMD_ALL_TRUE(pcmpeqw);
3485 break;
3486 case kIA32I8x16AllTrue: {
3487 ASSEMBLE_SIMD_ALL_TRUE(pcmpeqb);
3488 break;
3489 }
3490 case kIA32Blendvpd: {
3491 __ Blendvpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
3492 i.InputSimd128Register(1), i.InputSimd128Register(2));
3493 break;
3494 }
3495 case kIA32Blendvps: {
3496 __ Blendvps(i.OutputSimd128Register(), i.InputSimd128Register(0),
3497 i.InputSimd128Register(1), i.InputSimd128Register(2));
3498 break;
3499 }
3500 case kIA32Pblendvb: {
3501 __ Pblendvb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3502 i.InputSimd128Register(1), i.InputSimd128Register(2));
3503 break;
3504 }
3505 case kIA32I32x4TruncF64x2UZero: {
3506 __ I32x4TruncSatF64x2UZero(i.OutputSimd128Register(),
3507 i.InputSimd128Register(0), kScratchDoubleReg,
3508 i.TempRegister(0));
3509 break;
3510 }
3511 case kIA32I32x4TruncF32x4U: {
3512 __ I32x4TruncF32x4U(i.OutputSimd128Register(), i.InputSimd128Register(0),
3513 kScratchDoubleReg, i.TempSimd128Register(0));
3514 break;
3515 }
3516 case kIA32Cvttps2dq: {
3517 __ Cvttps2dq(i.OutputSimd128Register(), i.InputSimd128Register(0));
3518 break;
3519 }
3520 case kIA32Cvttpd2dq: {
3521 __ Cvttpd2dq(i.OutputSimd128Register(), i.InputSimd128Register(0));
3522 break;
3523 }
3524 case kIA32Word32AtomicPairLoad: {
3525 __ movq(kScratchDoubleReg, i.MemoryOperand());
3526 __ Pextrd(i.OutputRegister(0), kScratchDoubleReg, 0);
3527 __ Pextrd(i.OutputRegister(1), kScratchDoubleReg, 1);
3528 break;
3529 }
3530 case kIA32Word32ReleasePairStore: {
3531 __ push(ebx);
3532 i.MoveInstructionOperandToRegister(ebx, instr->InputAt(1));
3533 __ push(ebx);
3534 i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0));
3535 __ push(ebx);
3537 __ movq(kScratchDoubleReg, MemOperand(esp, 0));
3538 __ pop(ebx);
3539 __ pop(ebx);
3540 __ pop(ebx);
3542 __ movq(i.MemoryOperand(2), kScratchDoubleReg);
3543 break;
3544 }
3545 case kIA32Word32SeqCstPairStore: {
3546 Label store;
3547 __ bind(&store);
3548 __ mov(eax, i.MemoryOperand(2));
3549 __ mov(edx, i.NextMemoryOperand(2));
3550 __ push(ebx);
3552 i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0));
3553 __ lock();
3554 __ cmpxchg8b(i.MemoryOperand(2));
3555 __ pop(ebx);
3557 __ j(not_equal, &store);
3558 break;
3559 }
3560 case kAtomicExchangeInt8: {
3561 __ xchg_b(i.InputRegister(0), i.MemoryOperand(1));
3562 __ movsx_b(i.InputRegister(0), i.InputRegister(0));
3563 break;
3564 }
3565 case kAtomicExchangeUint8: {
3566 __ xchg_b(i.InputRegister(0), i.MemoryOperand(1));
3567 __ movzx_b(i.InputRegister(0), i.InputRegister(0));
3568 break;
3569 }
3570 case kAtomicExchangeInt16: {
3571 __ xchg_w(i.InputRegister(0), i.MemoryOperand(1));
3572 __ movsx_w(i.InputRegister(0), i.InputRegister(0));
3573 break;
3574 }
3575 case kAtomicExchangeUint16: {
3576 __ xchg_w(i.InputRegister(0), i.MemoryOperand(1));
3577 __ movzx_w(i.InputRegister(0), i.InputRegister(0));
3578 break;
3579 }
3580 case kAtomicExchangeWord32: {
3581 __ xchg(i.InputRegister(0), i.MemoryOperand(1));
3582 break;
3583 }
3584 case kIA32Word32AtomicPairExchange: {
3585 DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr));
3586 Label exchange;
3587 __ bind(&exchange);
3588 __ mov(eax, i.MemoryOperand(2));
3589 __ mov(edx, i.NextMemoryOperand(2));
3590 __ push(ebx);
3592 i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0));
3593 __ lock();
3594 __ cmpxchg8b(i.MemoryOperand(2));
3595 __ pop(ebx);
3597 __ j(not_equal, &exchange);
3598 break;
3599 }
3600 case kAtomicCompareExchangeInt8: {
3601 __ lock();
3602 __ cmpxchg_b(i.MemoryOperand(2), i.InputRegister(1));
3603 __ movsx_b(eax, eax);
3604 break;
3605 }
3606 case kAtomicCompareExchangeUint8: {
3607 __ lock();
3608 __ cmpxchg_b(i.MemoryOperand(2), i.InputRegister(1));
3609 __ movzx_b(eax, eax);
3610 break;
3611 }
3612 case kAtomicCompareExchangeInt16: {
3613 __ lock();
3614 __ cmpxchg_w(i.MemoryOperand(2), i.InputRegister(1));
3615 __ movsx_w(eax, eax);
3616 break;
3617 }
3618 case kAtomicCompareExchangeUint16: {
3619 __ lock();
3620 __ cmpxchg_w(i.MemoryOperand(2), i.InputRegister(1));
3621 __ movzx_w(eax, eax);
3622 break;
3623 }
3624 case kAtomicCompareExchangeWord32: {
3625 __ lock();
3626 __ cmpxchg(i.MemoryOperand(2), i.InputRegister(1));
3627 break;
3628 }
3629 case kIA32Word32AtomicPairCompareExchange: {
3630 __ push(ebx);
3632 i.MoveInstructionOperandToRegister(ebx, instr->InputAt(2));
3633 __ lock();
3634 __ cmpxchg8b(i.MemoryOperand(4));
3635 __ pop(ebx);
3637 break;
3638 }
3639#define ATOMIC_BINOP_CASE(op, inst) \
3640 case kAtomic##op##Int8: { \
3641 ASSEMBLE_ATOMIC_BINOP(inst, mov_b, cmpxchg_b); \
3642 __ movsx_b(eax, eax); \
3643 break; \
3644 } \
3645 case kAtomic##op##Uint8: { \
3646 ASSEMBLE_ATOMIC_BINOP(inst, mov_b, cmpxchg_b); \
3647 __ movzx_b(eax, eax); \
3648 break; \
3649 } \
3650 case kAtomic##op##Int16: { \
3651 ASSEMBLE_ATOMIC_BINOP(inst, mov_w, cmpxchg_w); \
3652 __ movsx_w(eax, eax); \
3653 break; \
3654 } \
3655 case kAtomic##op##Uint16: { \
3656 ASSEMBLE_ATOMIC_BINOP(inst, mov_w, cmpxchg_w); \
3657 __ movzx_w(eax, eax); \
3658 break; \
3659 } \
3660 case kAtomic##op##Word32: { \
3661 ASSEMBLE_ATOMIC_BINOP(inst, mov, cmpxchg); \
3662 break; \
3663 }
3664 ATOMIC_BINOP_CASE(Add, add)
3665 ATOMIC_BINOP_CASE(Sub, sub)
3666 ATOMIC_BINOP_CASE(And, and_)
3667 ATOMIC_BINOP_CASE(Or, or_)
3668 ATOMIC_BINOP_CASE(Xor, xor_)
3669#undef ATOMIC_BINOP_CASE
3670#define ATOMIC_BINOP_CASE(op, instr1, instr2) \
3671 case kIA32Word32AtomicPair##op: { \
3672 DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr)); \
3673 ASSEMBLE_I64ATOMIC_BINOP(instr1, instr2) \
3674 break; \
3675 }
3676 ATOMIC_BINOP_CASE(Add, add, adc)
3677 ATOMIC_BINOP_CASE(And, and_, and_)
3678 ATOMIC_BINOP_CASE(Or, or_, or_)
3679 ATOMIC_BINOP_CASE(Xor, xor_, xor_)
3680#undef ATOMIC_BINOP_CASE
3681 case kIA32Word32AtomicPairSub: {
3682 DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr));
3683 Label binop;
3684 __ bind(&binop);
3685 // Move memory operand into edx:eax
3686 __ mov(eax, i.MemoryOperand(2));
3687 __ mov(edx, i.NextMemoryOperand(2));
3688 // Save input registers temporarily on the stack.
3689 __ push(ebx);
3691 i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0));
3692 __ push(i.InputRegister(1));
3693 // Negate input in place
3694 __ neg(ebx);
3695 __ adc(i.InputRegister(1), 0);
3696 __ neg(i.InputRegister(1));
3697 // Add memory operand, negated input.
3698 __ add(ebx, eax);
3699 __ adc(i.InputRegister(1), edx);
3700 __ lock();
3701 __ cmpxchg8b(i.MemoryOperand(2));
3702 // Restore input registers
3703 __ pop(i.InputRegister(1));
3704 __ pop(ebx);
3706 __ j(not_equal, &binop);
3707 break;
3708 }
3709 case kAtomicLoadInt8:
3710 case kAtomicLoadUint8:
3711 case kAtomicLoadInt16:
3712 case kAtomicLoadUint16:
3713 case kAtomicLoadWord32:
3714 case kAtomicStoreWord8:
3715 case kAtomicStoreWord16:
3716 case kAtomicStoreWord32:
3717 UNREACHABLE(); // Won't be generated by instruction selector.
3718 }
3719 return kSuccess;
3720}
3721
3723 switch (condition) {
3724 case kUnorderedEqual:
3725 case kEqual:
3726 return equal;
3727 case kUnorderedNotEqual:
3728 case kNotEqual:
3729 return not_equal;
3730 case kSignedLessThan:
3731 return less;
3733 return greater_equal;
3735 return less_equal;
3736 case kSignedGreaterThan:
3737 return greater;
3738 case kUnsignedLessThan:
3739 return below;
3741 return above_equal;
3743 return below_equal;
3745 return above;
3746 case kOverflow:
3747 return overflow;
3748 case kNotOverflow:
3749 return no_overflow;
3750 default:
3751 UNREACHABLE();
3752 }
3753}
3754
3755// Assembles a branch after an instruction.
3756void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
3757 Label::Distance flabel_distance =
3758 branch->fallthru ? Label::kNear : Label::kFar;
3759 Label* tlabel = branch->true_label;
3760 Label* flabel = branch->false_label;
3761 if (branch->condition == kUnorderedEqual) {
3762 __ j(parity_even, flabel, flabel_distance);
3763 } else if (branch->condition == kUnorderedNotEqual) {
3764 __ j(parity_even, tlabel);
3765 }
3766 __ j(FlagsConditionToCondition(branch->condition), tlabel);
3767
3768 // Add a jump if not falling through to the next block.
3769 if (!branch->fallthru) __ jmp(flabel);
3770}
3771
3773 BranchInfo* branch) {
3774 AssembleArchBranch(instr, branch);
3775}
3776
3778 RpoNumber target) {
3779 __ jmp(GetLabel(target));
3780}
3781
3782#if V8_ENABLE_WEBASSEMBLY
3783void CodeGenerator::AssembleArchTrap(Instruction* instr,
3785 class OutOfLineTrap final : public OutOfLineCode {
3786 public:
3787 OutOfLineTrap(CodeGenerator* gen, Instruction* instr)
3788 : OutOfLineCode(gen), instr_(instr), gen_(gen) {}
3789
3790 void Generate() final {
3791 IA32OperandConverter i(gen_, instr_);
3792 TrapId trap_id =
3793 static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
3794 GenerateCallToTrap(trap_id);
3795 }
3796
3797 private:
3798 void GenerateCallToTrap(TrapId trap_id) {
3799 gen_->AssembleSourcePosition(instr_);
3800 // A direct call to a wasm runtime stub defined in this module.
3801 // Just encode the stub index. This will be patched when the code
3802 // is added to the native module and copied into wasm code space.
3803 __ wasm_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
3804 ReferenceMap* reference_map =
3805 gen_->zone()->New<ReferenceMap>(gen_->zone());
3806 gen_->RecordSafepoint(reference_map);
3807 __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
3808 }
3809
3810 Instruction* instr_;
3811 CodeGenerator* gen_;
3812 };
3813 auto ool = zone()->New<OutOfLineTrap>(this, instr);
3814 Label* tlabel = ool->entry();
3815 Label end;
3816 if (condition == kUnorderedEqual) {
3818 } else if (condition == kUnorderedNotEqual) {
3819 __ j(parity_even, tlabel);
3820 }
3822 __ bind(&end);
3823}
3824#endif // V8_ENABLE_WEBASSEMBLY
3825
3826// Assembles boolean materializations after an instruction.
3829 IA32OperandConverter i(this, instr);
3830 Label done;
3831
3832 // Materialize a full 32-bit 1 or 0 value. The result register is always the
3833 // last output of the instruction.
3834 Label check;
3835 DCHECK_NE(0u, instr->OutputCount());
3836 Register reg = i.OutputRegister(instr->OutputCount() - 1);
3837 if (condition == kUnorderedEqual) {
3838 __ j(parity_odd, &check, Label::kNear);
3839 __ Move(reg, Immediate(0));
3840 __ jmp(&done, Label::kNear);
3841 } else if (condition == kUnorderedNotEqual) {
3842 __ j(parity_odd, &check, Label::kNear);
3843 __ mov(reg, Immediate(1));
3844 __ jmp(&done, Label::kNear);
3845 }
3847
3848 __ bind(&check);
3849 if (reg.is_byte_register()) {
3850 // setcc for byte registers (al, bl, cl, dl).
3851 __ setcc(cc, reg);
3852 __ movzx_b(reg, reg);
3853 } else {
3854 // Emit a branch to set a register to either 1 or 0.
3855 Label set;
3856 __ j(cc, &set, Label::kNear);
3857 __ Move(reg, Immediate(0));
3858 __ jmp(&done, Label::kNear);
3859 __ bind(&set);
3860 __ mov(reg, Immediate(1));
3861 }
3862 __ bind(&done);
3863}
3864
3866 UNREACHABLE();
3867}
3868
3870 BranchInfo* branch) {
3871 UNREACHABLE();
3872}
3873
3875 IA32OperandConverter i(this, instr);
3876 Register input = i.InputRegister(0);
3877 std::vector<std::pair<int32_t, Label*>> cases;
3878 for (size_t index = 2; index < instr->InputCount(); index += 2) {
3879 cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
3880 }
3881 AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
3882 cases.data() + cases.size());
3883}
3884
3886 IA32OperandConverter i(this, instr);
3887 Register input = i.InputRegister(0);
3888 size_t const case_count = instr->InputCount() - 2;
3889 base::Vector<Label*> cases = zone()->AllocateVector<Label*>(case_count);
3890 for (size_t index = 0; index < case_count; ++index) {
3891 cases[index] = GetLabel(i.InputRpo(index + 2));
3892 }
3893 Label* const table = AddJumpTable(cases);
3894 __ cmp(input, Immediate(case_count));
3895 __ j(above_equal, GetLabel(i.InputRpo(1)));
3897}
3898
3901 UNIMPLEMENTED();
3902}
3903
3904// The calling convention for JSFunctions on IA32 passes arguments on the
3905// stack and the JSFunction and context in EDI and ESI, respectively, thus
3906// the steps of the call look as follows:
3907
3908// --{ before the call instruction }--------------------------------------------
3909// | caller frame |
3910// ^ esp ^ ebp
3911
3912// --{ push arguments and setup ESI, EDI }--------------------------------------
3913// | args + receiver | caller frame |
3914// ^ esp ^ ebp
3915// [edi = JSFunction, esi = context]
3916
3917// --{ call [edi + kCodeEntryOffset] }------------------------------------------
3918// | RET | args + receiver | caller frame |
3919// ^ esp ^ ebp
3920
3921// =={ prologue of called function }============================================
3922// --{ push ebp }---------------------------------------------------------------
3923// | FP | RET | args + receiver | caller frame |
3924// ^ esp ^ ebp
3925
3926// --{ mov ebp, esp }-----------------------------------------------------------
3927// | FP | RET | args + receiver | caller frame |
3928// ^ ebp,esp
3929
3930// --{ push esi }---------------------------------------------------------------
3931// | CTX | FP | RET | args + receiver | caller frame |
3932// ^esp ^ ebp
3933
3934// --{ push edi }---------------------------------------------------------------
3935// | FNC | CTX | FP | RET | args + receiver | caller frame |
3936// ^esp ^ ebp
3937
3938// --{ subi esp, #N }-----------------------------------------------------------
3939// | callee frame | FNC | CTX | FP | RET | args + receiver | caller frame |
3940// ^esp ^ ebp
3941
3942// =={ body of called function }================================================
3943
3944// =={ epilogue of called function }============================================
3945// --{ mov esp, ebp }-----------------------------------------------------------
3946// | FP | RET | args + receiver | caller frame |
3947// ^ esp,ebp
3948
3949// --{ pop ebp }-----------------------------------------------------------
3950// | | RET | args + receiver | caller frame |
3951// ^ esp ^ ebp
3952
3953// --{ ret #A+1 }-----------------------------------------------------------
3954// | | caller frame |
3955// ^ esp ^ ebp
3956
3957// Runtime function calls are accomplished by doing a stub call to the
3958// CEntry (a real code object). On IA32 passes arguments on the
3959// stack, the number of arguments in EAX, the address of the runtime function
3960// in EBX, and the context in ESI.
3961
3962// --{ before the call instruction }--------------------------------------------
3963// | caller frame |
3964// ^ esp ^ ebp
3965
3966// --{ push arguments and setup EAX, EBX, and ESI }-----------------------------
3967// | args + receiver | caller frame |
3968// ^ esp ^ ebp
3969// [eax = #args, ebx = runtime function, esi = context]
3970
3971// --{ call #CEntry }-----------------------------------------------------------
3972// | RET | args + receiver | caller frame |
3973// ^ esp ^ ebp
3974
3975// =={ body of runtime function }===============================================
3976
3977// --{ runtime returns }--------------------------------------------------------
3978// | caller frame |
3979// ^ esp ^ ebp
3980
3981// Other custom linkages (e.g. for calling directly into and out of C++) may
3982// need to save callee-saved registers on the stack, which is done in the
3983// function prologue of generated code.
3984
3985// --{ before the call instruction }--------------------------------------------
3986// | caller frame |
3987// ^ esp ^ ebp
3988
3989// --{ set up arguments in registers on stack }---------------------------------
3990// | args | caller frame |
3991// ^ esp ^ ebp
3992// [r0 = arg0, r1 = arg1, ...]
3993
3994// --{ call code }--------------------------------------------------------------
3995// | RET | args | caller frame |
3996// ^ esp ^ ebp
3997
3998// =={ prologue of called function }============================================
3999// --{ push ebp }---------------------------------------------------------------
4000// | FP | RET | args | caller frame |
4001// ^ esp ^ ebp
4002
4003// --{ mov ebp, esp }-----------------------------------------------------------
4004// | FP | RET | args | caller frame |
4005// ^ ebp,esp
4006
4007// --{ save registers }---------------------------------------------------------
4008// | regs | FP | RET | args | caller frame |
4009// ^ esp ^ ebp
4010
4011// --{ subi esp, #N }-----------------------------------------------------------
4012// | callee frame | regs | FP | RET | args | caller frame |
4013// ^esp ^ ebp
4014
4015// =={ body of called function }================================================
4016
4017// =={ epilogue of called function }============================================
4018// --{ restore registers }------------------------------------------------------
4019// | regs | FP | RET | args | caller frame |
4020// ^ esp ^ ebp
4021
4022// --{ mov esp, ebp }-----------------------------------------------------------
4023// | FP | RET | args | caller frame |
4024// ^ esp,ebp
4025
4026// --{ pop ebp }----------------------------------------------------------------
4027// | RET | args | caller frame |
4028// ^ esp ^ ebp
4029
4030void CodeGenerator::FinishFrame(Frame* frame) {
4031 auto call_descriptor = linkage()->GetIncomingDescriptor();
4032 const RegList saves = call_descriptor->CalleeSavedRegisters();
4033 if (!saves.is_empty()) { // Save callee-saved registers.
4034 DCHECK(!info()->is_osr());
4035 frame->AllocateSavedCalleeRegisterSlots(saves.Count());
4036 }
4037}
4038
4040 auto call_descriptor = linkage()->GetIncomingDescriptor();
4041 if (frame_access_state()->has_frame()) {
4042 if (call_descriptor->IsCFunctionCall()) {
4043 __ push(ebp);
4044 __ mov(ebp, esp);
4045#if V8_ENABLE_WEBASSEMBLY
4046 if (info()->GetOutputStackFrameType() == StackFrame::C_WASM_ENTRY) {
4047 __ Push(Immediate(StackFrame::TypeToMarker(StackFrame::C_WASM_ENTRY)));
4048 // Reserve stack space for saving the c_entry_fp later.
4049 __ AllocateStackSpace(kSystemPointerSize);
4050 }
4051#endif // V8_ENABLE_WEBASSEMBLY
4052 } else if (call_descriptor->IsJSFunctionCall()) {
4053 __ Prologue();
4054 } else {
4055 __ StubPrologue(info()->GetOutputStackFrameType());
4056#if V8_ENABLE_WEBASSEMBLY
4057 if (call_descriptor->IsAnyWasmFunctionCall() ||
4058 call_descriptor->IsWasmImportWrapper() ||
4059 call_descriptor->IsWasmCapiFunction()) {
4060 // For import wrappers and C-API functions, this stack slot is only used
4061 // for printing stack traces in V8. Also, it holds a WasmImportData
4062 // instead of the trusted instance data, which is taken care of in the
4063 // frames accessors.
4065 }
4066 if (call_descriptor->IsWasmCapiFunction()) {
4067 // Reserve space for saving the PC later.
4068 __ AllocateStackSpace(kSystemPointerSize);
4069 }
4070#endif // V8_ENABLE_WEBASSEMBLY
4071 }
4072 }
4073
4074 int required_slots =
4075 frame()->GetTotalFrameSlotCount() - frame()->GetFixedSlotCount();
4076
4077 if (info()->is_osr()) {
4078 // TurboFan OSR-compiled functions cannot be entered directly.
4079 __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
4080
4081 // Unoptimized code jumps directly to this entrypoint while the unoptimized
4082 // frame is still on the stack. Optimized code uses OSR values directly from
4083 // the unoptimized frame. Thus, all that needs to be done is to allocate the
4084 // remaining stack slots.
4085 __ RecordComment("-- OSR entrypoint --");
4087 required_slots -= osr_helper()->UnoptimizedFrameSlots();
4088 }
4089
4090 const RegList saves = call_descriptor->CalleeSavedRegisters();
4091 if (required_slots > 0) {
4092 DCHECK(frame_access_state()->has_frame());
4093#if V8_ENABLE_WEBASSEMBLY
4094 if (info()->IsWasm() && required_slots * kSystemPointerSize > 4 * KB) {
4095 // For WebAssembly functions with big frames we have to do the stack
4096 // overflow check before we construct the frame. Otherwise we may not
4097 // have enough space on the stack to call the runtime for the stack
4098 // overflow.
4099 Label done;
4100
4101 // If the frame is bigger than the stack, we throw the stack overflow
4102 // exception unconditionally. Thereby we can avoid the integer overflow
4103 // check in the condition code.
4104 if (required_slots * kSystemPointerSize < v8_flags.stack_size * KB) {
4105 Register scratch = esi;
4106 __ push(scratch);
4107 __ mov(scratch, esp);
4108 __ sub(scratch, Immediate(required_slots * kSystemPointerSize));
4109 __ CompareStackLimit(scratch, StackLimitKind::kRealStackLimit);
4110 __ pop(scratch);
4111 __ j(above_equal, &done, Label::kNear);
4112 }
4113
4114 if (v8_flags.experimental_wasm_growable_stacks) {
4117 regs_to_save.set(
4118 WasmHandleStackOverflowDescriptor::FrameBaseRegister());
4119 for (auto reg : wasm::kGpParamRegisters) regs_to_save.set(reg);
4120 for (Register reg : base::Reversed(regs_to_save)) {
4121 __ push(reg);
4122 }
4123 __ sub(esp,
4125 for (size_t i = 0; i < arraysize(wasm::kFpParamRegisters); i++) {
4126 __ Movdqu(Operand(esp, kSimd128Size * i), wasm::kFpParamRegisters[i]);
4127 }
4129 Immediate(required_slots * kSystemPointerSize));
4130 __ mov(WasmHandleStackOverflowDescriptor::FrameBaseRegister(), ebp);
4131 __ add(WasmHandleStackOverflowDescriptor::FrameBaseRegister(),
4132 Immediate(static_cast<int32_t>(
4133 call_descriptor->ParameterSlotCount() * kSystemPointerSize +
4135 __ CallBuiltin(Builtin::kWasmHandleStackOverflow);
4136 for (size_t i = 0; i < arraysize(wasm::kFpParamRegisters); i++) {
4137 __ Movdqu(wasm::kFpParamRegisters[i], Operand(esp, kSimd128Size * i));
4138 }
4139 __ add(esp,
4141 for (Register reg : regs_to_save) {
4142 __ pop(reg);
4143 }
4144 } else {
4145 __ wasm_call(static_cast<Address>(Builtin::kWasmStackOverflow),
4147 // The call does not return, hence we can ignore any references and just
4148 // define an empty safepoint.
4149 ReferenceMap* reference_map = zone()->New<ReferenceMap>(zone());
4150 RecordSafepoint(reference_map);
4151 __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
4152 }
4153 __ bind(&done);
4154 }
4155#endif // V8_ENABLE_WEBASSEMBLY
4156
4157 // Skip callee-saved and return slots, which are created below.
4158 required_slots -= saves.Count();
4159 required_slots -= frame()->GetReturnSlotCount();
4160 if (required_slots > 0) {
4161 __ AllocateStackSpace(required_slots * kSystemPointerSize);
4162 }
4163 }
4164
4165 if (!saves.is_empty()) { // Save callee-saved registers.
4166 DCHECK(!info()->is_osr());
4167 for (Register reg : base::Reversed(saves)) {
4168 __ push(reg);
4169 }
4170 }
4171
4172 // Allocate return slots (located after callee-saved).
4173 if (frame()->GetReturnSlotCount() > 0) {
4174 __ AllocateStackSpace(frame()->GetReturnSlotCount() * kSystemPointerSize);
4175 }
4176
4177 for (int spill_slot : frame()->tagged_slots()) {
4178 FrameOffset offset = frame_access_state()->GetFrameOffset(spill_slot);
4179 DCHECK(offset.from_frame_pointer());
4180 __ mov(Operand(ebp, offset.offset()), Immediate(0));
4181 }
4182}
4183
4184void CodeGenerator::AssembleReturn(InstructionOperand* additional_pop_count) {
4185 auto call_descriptor = linkage()->GetIncomingDescriptor();
4186
4187 const RegList saves = call_descriptor->CalleeSavedRegisters();
4188 // Restore registers.
4189 if (!saves.is_empty()) {
4190 const int returns = frame()->GetReturnSlotCount();
4191 if (returns != 0) {
4192 __ add(esp, Immediate(returns * kSystemPointerSize));
4193 }
4194 for (Register reg : saves) {
4195 __ pop(reg);
4196 }
4197 }
4198
4199 IA32OperandConverter g(this, nullptr);
4200 int parameter_slots = static_cast<int>(call_descriptor->ParameterSlotCount());
4201
4202 // {aditional_pop_count} is only greater than zero if {parameter_slots = 0}.
4203 // Check RawMachineAssembler::PopAndReturn.
4204 if (parameter_slots != 0) {
4205 if (additional_pop_count->IsImmediate()) {
4206 DCHECK_EQ(g.ToConstant(additional_pop_count).ToInt32(), 0);
4207 } else if (v8_flags.debug_code) {
4208 __ cmp(g.ToRegister(additional_pop_count), Immediate(0));
4209 __ Assert(equal, AbortReason::kUnexpectedAdditionalPopValue);
4210 }
4211 }
4212
4213#if V8_ENABLE_WEBASSEMBLY
4214 if (call_descriptor->IsAnyWasmFunctionCall() &&
4215 v8_flags.experimental_wasm_growable_stacks) {
4217 Immediate(StackFrame::TypeToMarker(StackFrame::WASM_SEGMENT_START)));
4218 Label done;
4219 __ j(not_equal, &done);
4220 for (Register reg : base::Reversed(wasm::kGpReturnRegisters)) {
4221 __ push(reg);
4222 }
4223 __ sub(esp, Immediate(arraysize(wasm::kFpReturnRegisters) * kSimd128Size));
4224 for (size_t i = 0; i < arraysize(wasm::kFpReturnRegisters); i++) {
4225 __ Movdqu(Operand(esp, kSimd128Size * i), wasm::kFpReturnRegisters[i]);
4226 }
4227 __ PrepareCallCFunction(1, kReturnRegister0);
4228 __ Move(Operand(esp, 0 * kSystemPointerSize),
4230 __ CallCFunction(ExternalReference::wasm_shrink_stack(), 1);
4231 // Restore old ebp. We don't need to restore old esp explicitly, because
4232 // it will be restored from ebp in LeaveFrame before return.
4233 __ mov(ebp, kReturnRegister0);
4234 for (size_t i = 0; i < arraysize(wasm::kFpReturnRegisters); i++) {
4235 __ Movdqu(wasm::kFpReturnRegisters[i], Operand(esp, kSimd128Size * i));
4236 }
4237 __ add(esp, Immediate(arraysize(wasm::kFpReturnRegisters) * kSimd128Size));
4238 for (Register reg : wasm::kGpReturnRegisters) {
4239 __ pop(reg);
4240 }
4241 __ bind(&done);
4242 }
4243#endif // V8_ENABLE_WEBASSEMBLY
4244
4245 Register argc_reg = ecx;
4246 // Functions with JS linkage have at least one parameter (the receiver).
4247 // If {parameter_slots} == 0, it means it is a builtin with
4248 // kDontAdaptArgumentsSentinel, which takes care of JS arguments popping
4249 // itself.
4250
4251 const bool drop_jsargs = parameter_slots != 0 &&
4253 call_descriptor->IsJSFunctionCall();
4254 if (call_descriptor->IsCFunctionCall()) {
4256 } else if (frame_access_state()->has_frame()) {
4257 // Canonicalize JSFunction return sites for now if they always have the same
4258 // number of return args.
4259 if (additional_pop_count->IsImmediate() &&
4260 g.ToConstant(additional_pop_count).ToInt32() == 0) {
4261 if (return_label_.is_bound()) {
4262 __ jmp(&return_label_);
4263 return;
4264 } else {
4265 __ bind(&return_label_);
4266 }
4267 }
4268 if (drop_jsargs) {
4269 // Get the actual argument count.
4270 __ mov(argc_reg, Operand(ebp, StandardFrameConstants::kArgCOffset));
4271 DCHECK(!call_descriptor->CalleeSavedRegisters().has(argc_reg));
4272 }
4274 }
4275
4276 if (drop_jsargs) {
4277 // We must pop all arguments from the stack (including the receiver).
4278 // The number of arguments without the receiver is
4279 // max(argc_reg, parameter_slots-1), and the receiver is added in
4280 // DropArguments().
4281 Label mismatch_return;
4282 Register scratch_reg = edx;
4283 DCHECK_NE(argc_reg, scratch_reg);
4284 DCHECK(!call_descriptor->CalleeSavedRegisters().has(argc_reg));
4285 DCHECK(!call_descriptor->CalleeSavedRegisters().has(scratch_reg));
4286 __ cmp(argc_reg, Immediate(parameter_slots));
4287 __ j(greater, &mismatch_return, Label::kNear);
4288 __ Ret(parameter_slots * kSystemPointerSize, scratch_reg);
4289 __ bind(&mismatch_return);
4290 __ DropArguments(argc_reg, scratch_reg);
4291 // We use a return instead of a jump for better return address prediction.
4292 __ Ret();
4293 } else if (additional_pop_count->IsImmediate()) {
4294 int additional_count = g.ToConstant(additional_pop_count).ToInt32();
4295 size_t pop_size = (parameter_slots + additional_count) * kSystemPointerSize;
4296 if (is_uint16(pop_size)) {
4297 // Avoid the additional scratch register, it might clobber the
4298 // CalleeSavedRegisters.
4299 __ ret(static_cast<int>(pop_size));
4300 } else {
4301 Register scratch_reg = ecx;
4302 DCHECK(!call_descriptor->CalleeSavedRegisters().has(scratch_reg));
4303 CHECK_LE(pop_size, static_cast<size_t>(std::numeric_limits<int>::max()));
4304 __ Ret(static_cast<int>(pop_size), scratch_reg);
4305 }
4306 } else {
4307 Register pop_reg = g.ToRegister(additional_pop_count);
4308 Register scratch_reg = pop_reg == ecx ? edi : ecx;
4309 DCHECK(!call_descriptor->CalleeSavedRegisters().has(scratch_reg));
4310 DCHECK(!call_descriptor->CalleeSavedRegisters().has(pop_reg));
4311 int pop_size = static_cast<int>(parameter_slots * kSystemPointerSize);
4312 __ PopReturnAddressTo(scratch_reg);
4313 __ lea(esp, Operand(esp, pop_reg, times_system_pointer_size,
4314 static_cast<int>(pop_size)));
4315 __ PushReturnAddressFrom(scratch_reg);
4316 __ Ret();
4317 }
4318}
4319
4321
4323 ZoneDeque<DeoptimizationExit*>* exits) {}
4324
4325AllocatedOperand CodeGenerator::Push(InstructionOperand* source) {
4326 auto rep = LocationOperand::cast(source)->representation();
4327 int new_slots = ElementSizeInPointers(rep);
4328 IA32OperandConverter g(this, nullptr);
4329 int last_frame_slot_id =
4330 frame_access_state_->frame()->GetTotalFrameSlotCount() - 1;
4331 int sp_delta = frame_access_state_->sp_delta();
4332 int slot_id = last_frame_slot_id + sp_delta + new_slots;
4333 AllocatedOperand stack_slot(LocationOperand::STACK_SLOT, rep, slot_id);
4334 if (source->IsRegister()) {
4335 __ push(g.ToRegister(source));
4336 frame_access_state()->IncreaseSPDelta(new_slots);
4337 } else if (source->IsStackSlot() || source->IsFloatStackSlot()) {
4338 __ push(g.ToOperand(source));
4339 frame_access_state()->IncreaseSPDelta(new_slots);
4340 } else {
4341 // No push instruction for this operand type. Bump the stack pointer and
4342 // assemble the move.
4343 __ sub(esp, Immediate(new_slots * kSystemPointerSize));
4344 frame_access_state()->IncreaseSPDelta(new_slots);
4345 AssembleMove(source, &stack_slot);
4346 }
4347 temp_slots_ += new_slots;
4348 return stack_slot;
4349}
4350
4351void CodeGenerator::Pop(InstructionOperand* dest, MachineRepresentation rep) {
4352 IA32OperandConverter g(this, nullptr);
4353 int dropped_slots = ElementSizeInPointers(rep);
4354 if (dest->IsRegister()) {
4355 frame_access_state()->IncreaseSPDelta(-dropped_slots);
4356 __ pop(g.ToRegister(dest));
4357 } else if (dest->IsStackSlot() || dest->IsFloatStackSlot()) {
4358 frame_access_state()->IncreaseSPDelta(-dropped_slots);
4359 __ pop(g.ToOperand(dest));
4360 } else {
4361 int last_frame_slot_id =
4362 frame_access_state_->frame()->GetTotalFrameSlotCount() - 1;
4363 int sp_delta = frame_access_state_->sp_delta();
4364 int slot_id = last_frame_slot_id + sp_delta;
4365 AllocatedOperand stack_slot(LocationOperand::STACK_SLOT, rep, slot_id);
4366 AssembleMove(&stack_slot, dest);
4367 frame_access_state()->IncreaseSPDelta(-dropped_slots);
4368 __ add(esp, Immediate(dropped_slots * kSystemPointerSize));
4369 }
4370 temp_slots_ -= dropped_slots;
4371}
4372
4374 if (temp_slots_ > 0) {
4376 __ add(esp, Immediate(temp_slots_ * kSystemPointerSize));
4377 temp_slots_ = 0;
4378 }
4379}
4380
4381void CodeGenerator::MoveToTempLocation(InstructionOperand* source,
4383 // Must be kept in sync with {MoveTempLocationTo}.
4384 DCHECK(!source->IsImmediate());
4385 if ((IsFloatingPoint(rep) &&
4387 // The scratch double register is available.
4388 AllocatedOperand scratch(LocationOperand::REGISTER, rep,
4390 AssembleMove(source, &scratch);
4391 } else {
4392 // The scratch register blocked by pending moves. Use the stack instead.
4393 Push(source);
4394 }
4395}
4396
4397void CodeGenerator::MoveTempLocationTo(InstructionOperand* dest,
4399 if (IsFloatingPoint(rep) &&
4401 AllocatedOperand scratch(LocationOperand::REGISTER, rep,
4403 AssembleMove(&scratch, dest);
4404 } else {
4405 Pop(dest, rep);
4406 }
4407 move_cycle_ = MoveCycleState();
4408}
4409
4410void CodeGenerator::SetPendingMove(MoveOperands* move) {
4411 InstructionOperand* source = &move->source();
4412 InstructionOperand* destination = &move->destination();
4413 MoveType::Type move_type = MoveType::InferMove(source, destination);
4414 if (move_type == MoveType::kStackToStack) {
4415 if (!source->IsStackSlot()) {
4417 }
4418 return;
4419 }
4420}
4421
4422void CodeGenerator::AssembleMove(InstructionOperand* source,
4423 InstructionOperand* destination) {
4424 IA32OperandConverter g(this, nullptr);
4425 // Dispatch on the source and destination operand kinds.
4426 switch (MoveType::InferMove(source, destination)) {
4428 if (source->IsRegister()) {
4429 __ mov(g.ToRegister(destination), g.ToRegister(source));
4430 } else {
4431 DCHECK(source->IsFPRegister());
4432 __ Movaps(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
4433 }
4434 return;
4436 Operand dst = g.ToOperand(destination);
4437 if (source->IsRegister()) {
4438 __ mov(dst, g.ToRegister(source));
4439 } else {
4440 DCHECK(source->IsFPRegister());
4441 XMMRegister src = g.ToDoubleRegister(source);
4445 __ Movss(dst, src);
4446 } else if (rep == MachineRepresentation::kFloat64) {
4447 __ Movsd(dst, src);
4448 } else {
4450 __ Movups(dst, src);
4451 }
4452 }
4453 return;
4454 }
4456 Operand src = g.ToOperand(source);
4457 if (source->IsStackSlot()) {
4458 __ mov(g.ToRegister(destination), src);
4459 } else {
4460 DCHECK(source->IsFPStackSlot());
4461 XMMRegister dst = g.ToDoubleRegister(destination);
4465 __ Movss(dst, src);
4466 } else if (rep == MachineRepresentation::kFloat64) {
4467 __ Movsd(dst, src);
4468 } else {
4470 __ Movups(dst, src);
4471 }
4472 }
4473 return;
4474 }
4476 Operand src = g.ToOperand(source);
4477 Operand dst = g.ToOperand(destination);
4478 if (source->IsStackSlot()) {
4479 __ push(src);
4480 __ pop(dst);
4481 } else {
4485 __ Movss(kScratchDoubleReg, src);
4486 __ Movss(dst, kScratchDoubleReg);
4487 } else if (rep == MachineRepresentation::kFloat64) {
4488 __ Movsd(kScratchDoubleReg, src);
4489 __ Movsd(dst, kScratchDoubleReg);
4490 } else {
4492 __ Movups(kScratchDoubleReg, src);
4493 __ Movups(dst, kScratchDoubleReg);
4494 }
4495 }
4496 return;
4497 }
4499 Constant src = g.ToConstant(source);
4500 if (destination->IsRegister()) {
4501 Register dst = g.ToRegister(destination);
4502 if (src.type() == Constant::kHeapObject) {
4503 __ Move(dst, src.ToHeapObject());
4504 } else if (src.type() == Constant::kExternalReference) {
4505 __ Move(dst, Immediate(src.ToExternalReference()));
4506 } else {
4507 __ Move(dst, g.ToImmediate(source));
4508 }
4509 } else {
4510 DCHECK(destination->IsFPRegister());
4511 XMMRegister dst = g.ToDoubleRegister(destination);
4512 if (src.type() == Constant::kFloat32) {
4513 // TODO(turbofan): Can we do better here?
4514 __ Move(dst, src.ToFloat32AsInt());
4515 } else {
4516 DCHECK_EQ(src.type(), Constant::kFloat64);
4517 __ Move(dst, src.ToFloat64().AsUint64());
4518 }
4519 }
4520 return;
4521 }
4523 Constant src = g.ToConstant(source);
4524 Operand dst = g.ToOperand(destination);
4525 if (destination->IsStackSlot()) {
4526 __ Move(dst, g.ToImmediate(source));
4527 } else {
4528 DCHECK(destination->IsFPStackSlot());
4529 if (src.type() == Constant::kFloat32) {
4530 __ Move(dst, Immediate(src.ToFloat32AsInt()));
4531 } else {
4532 DCHECK_EQ(src.type(), Constant::kFloat64);
4533 uint64_t constant_value = src.ToFloat64().AsUint64();
4534 uint32_t lower = static_cast<uint32_t>(constant_value);
4535 uint32_t upper = static_cast<uint32_t>(constant_value >> 32);
4536 Operand dst0 = dst;
4537 Operand dst1 = g.ToOperand(destination, kSystemPointerSize);
4538 __ Move(dst0, Immediate(lower));
4539 __ Move(dst1, Immediate(upper));
4540 }
4541 }
4542 return;
4543 }
4544 }
4545 UNREACHABLE();
4546}
4547
4548void CodeGenerator::AssembleSwap(InstructionOperand* source,
4549 InstructionOperand* destination) {
4550 IA32OperandConverter g(this, nullptr);
4551 // Dispatch on the source and destination operand kinds. Not all
4552 // combinations are possible.
4553 switch (MoveType::InferSwap(source, destination)) {
4555 if (source->IsRegister()) {
4556 Register src = g.ToRegister(source);
4557 Register dst = g.ToRegister(destination);
4558 __ push(src);
4559 __ mov(src, dst);
4560 __ pop(dst);
4561 } else {
4562 DCHECK(source->IsFPRegister());
4563 XMMRegister src = g.ToDoubleRegister(source);
4564 XMMRegister dst = g.ToDoubleRegister(destination);
4565 __ Movaps(kScratchDoubleReg, src);
4566 __ Movaps(src, dst);
4567 __ Movaps(dst, kScratchDoubleReg);
4568 }
4569 return;
4570 }
4572 if (source->IsRegister()) {
4573 Register src = g.ToRegister(source);
4574 __ push(src);
4576 Operand dst = g.ToOperand(destination);
4577 __ mov(src, dst);
4579 dst = g.ToOperand(destination);
4580 __ pop(dst);
4581 } else {
4582 DCHECK(source->IsFPRegister());
4583 XMMRegister src = g.ToDoubleRegister(source);
4584 Operand dst = g.ToOperand(destination);
4588 __ Movss(kScratchDoubleReg, dst);
4589 __ Movss(dst, src);
4590 __ Movaps(src, kScratchDoubleReg);
4591 } else if (rep == MachineRepresentation::kFloat64) {
4592 __ Movsd(kScratchDoubleReg, dst);
4593 __ Movsd(dst, src);
4594 __ Movaps(src, kScratchDoubleReg);
4595 } else {
4597 __ Movups(kScratchDoubleReg, dst);
4598 __ Movups(dst, src);
4599 __ Movups(src, kScratchDoubleReg);
4600 }
4601 }
4602 return;
4603 }
4605 if (source->IsStackSlot()) {
4606 Operand dst1 = g.ToOperand(destination);
4607 __ push(dst1);
4609 Operand src1 = g.ToOperand(source);
4610 __ push(src1);
4611 Operand dst2 = g.ToOperand(destination);
4612 __ pop(dst2);
4614 Operand src2 = g.ToOperand(source);
4615 __ pop(src2);
4616 } else {
4617 DCHECK(source->IsFPStackSlot());
4618 Operand src0 = g.ToOperand(source);
4619 Operand dst0 = g.ToOperand(destination);
4623 __ Movss(kScratchDoubleReg, dst0); // Save dst in scratch register.
4624 __ push(src0); // Then use stack to copy src to destination.
4625 __ pop(dst0);
4626 __ Movss(src0, kScratchDoubleReg);
4627 } else if (rep == MachineRepresentation::kFloat64) {
4628 __ Movsd(kScratchDoubleReg, dst0); // Save dst in scratch register.
4629 __ push(src0); // Then use stack to copy src to destination.
4630 __ pop(dst0);
4631 __ push(g.ToOperand(source, kSystemPointerSize));
4632 __ pop(g.ToOperand(destination, kSystemPointerSize));
4633 __ Movsd(src0, kScratchDoubleReg);
4634 } else {
4636 __ Movups(kScratchDoubleReg, dst0); // Save dst in scratch register.
4637 __ push(src0); // Then use stack to copy src to destination.
4638 __ pop(dst0);
4639 __ push(g.ToOperand(source, kSystemPointerSize));
4640 __ pop(g.ToOperand(destination, kSystemPointerSize));
4641 __ push(g.ToOperand(source, 2 * kSystemPointerSize));
4642 __ pop(g.ToOperand(destination, 2 * kSystemPointerSize));
4643 __ push(g.ToOperand(source, 3 * kSystemPointerSize));
4644 __ pop(g.ToOperand(destination, 3 * kSystemPointerSize));
4645 __ Movups(src0, kScratchDoubleReg);
4646 }
4647 }
4648 return;
4649 }
4650 default:
4651 UNREACHABLE();
4652 }
4653}
4654
4655void CodeGenerator::AssembleJumpTable(base::Vector<Label*> targets) {
4656 for (auto target : targets) {
4657 __ dd(target);
4658 }
4659}
4660
4661#undef __
4662#undef kScratchDoubleReg
4663#undef ASSEMBLE_COMPARE
4664#undef ASSEMBLE_IEEE754_BINOP
4665#undef ASSEMBLE_IEEE754_UNOP
4666#undef ASSEMBLE_BINOP
4667#undef ASSEMBLE_ATOMIC_BINOP
4668#undef ASSEMBLE_I64ATOMIC_BINOP
4669#undef ASSEMBLE_MOVX
4670#undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE
4671#undef ASSEMBLE_SIMD_IMM_SHUFFLE
4672#undef ASSEMBLE_SIMD_ALL_TRUE
4673#undef ASSEMBLE_SIMD_SHIFT
4674#undef ASSEMBLE_SIMD_PINSR
4675
4676} // namespace compiler
4677} // namespace internal
4678} // namespace v8
Isolate * isolate_
friend Zone
Definition asm-types.cc:195
#define one
#define Assert(condition)
interpreter::OperandScale scale
Definition builtins.cc:44
static constexpr T decode(U value)
Definition bit-field.h:66
static const int kMarkedForDeoptimizationBit
Definition code.h:456
static constexpr int kFixedSlotCountAboveFp
static constexpr int kFixedFrameSizeAboveFp
static bool IsSupported(CpuFeature f)
static V8_EXPORT_PRIVATE ExternalReference isolate_address()
static constexpr int kHeaderSize
static Immediate EmbeddedNumber(double number)
static Immediate CodeRelativeOffset(Label *label)
void mov(Register rd, Register rj)
void Move(Register dst, Tagged< Smi > smi)
static constexpr MainThreadFlags kPointersFromHereAreInterestingMask
static Operand JumpTable(Register index, ScaleFactor scale, Label *table)
constexpr void set(RegisterT reg)
constexpr int8_t code() const
static constexpr Tagged< Smi > FromInt(int value)
Definition smi.h:38
static constexpr int32_t TypeToMarker(Type type)
Definition frames.h:196
static constexpr int kFrameTypeOffset
T * New(Args &&... args)
Definition zone.h:114
base::Vector< T > AllocateVector(size_t length)
Definition zone.h:136
static Type InferSwap(InstructionOperand *source, InstructionOperand *destination)
static Type InferMove(InstructionOperand *source, InstructionOperand *destination)
void MoveToTempLocation(InstructionOperand *src, MachineRepresentation rep) final
void AssembleTailCallAfterGap(Instruction *instr, int first_unused_stack_slot)
void AssembleReturn(InstructionOperand *pop)
void AssembleTailCallBeforeGap(Instruction *instr, int first_unused_stack_slot)
FrameAccessState * frame_access_state() const
CodeGenResult AssembleArchInstruction(Instruction *instr)
DeoptimizationExit * BuildTranslation(Instruction *instr, int pc_offset, size_t frame_state_offset, size_t immediate_args_count, OutputFrameStateCombine state_combine)
void AssembleArchBinarySearchSwitch(Instruction *instr)
void AssembleArchJumpRegardlessOfAssemblyOrder(RpoNumber target)
static void GetPushCompatibleMoves(Instruction *instr, PushTypeFlags push_type, ZoneVector< MoveOperands * > *pushes)
void AssembleArchBoolean(Instruction *instr, FlagsCondition condition)
void AssembleJumpTable(base::Vector< Label * > targets)
void AssembleArchBranch(Instruction *instr, BranchInfo *branch)
void AssembleMove(InstructionOperand *source, InstructionOperand *destination) final
void SetPendingMove(MoveOperands *move) final
bool ShouldApplyOffsetToStackCheck(Instruction *instr, uint32_t *offset)
base::Flags< PushTypeFlag > PushTypeFlags
void RecordSafepoint(ReferenceMap *references, int pc_offset=0)
void AssembleArchBinarySearchSwitchRange(Register input, RpoNumber def_block, std::pair< int32_t, Label * > *begin, std::pair< int32_t, Label * > *end)
void PrepareForDeoptimizationExits(ZoneDeque< DeoptimizationExit * > *exits)
void AssembleArchTableSwitch(Instruction *instr)
void AssembleArchConditionalBranch(Instruction *instr, BranchInfo *branch)
AllocatedOperand Push(InstructionOperand *src) final
void MoveTempLocationTo(InstructionOperand *dst, MachineRepresentation rep) final
void AssembleArchDeoptBranch(Instruction *instr, BranchInfo *branch)
void RecordCallPosition(Instruction *instr)
void AssembleSwap(InstructionOperand *source, InstructionOperand *destination) final
Label * AddJumpTable(base::Vector< Label * > targets)
void AssembleArchConditionalBoolean(Instruction *instr)
void RecordDeoptInfo(Instruction *instr, int pc_offset)
OptimizedCompilationInfo * info() const
void AssembleArchSelect(Instruction *instr, FlagsCondition condition)
void Pop(InstructionOperand *src, MachineRepresentation rep) final
RelocInfo::Mode rmode() const
FrameOffset GetFrameOffset(int spill_slot) const
Definition frame.cc:61
Operand ToOperand(InstructionOperand *op, int extra=0)
IA32OperandConverter(CodeGenerator *gen, Instruction *instr)
Operand InputOperand(size_t index, int extra=0)
static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode)
Immediate ToImmediate(InstructionOperand *operand)
void MoveInstructionOperandToRegister(Register destination, InstructionOperand *op)
DoubleRegister ToDoubleRegister(InstructionOperand *op)
Constant ToConstant(InstructionOperand *op) const
Register ToRegister(InstructionOperand *op) const
const InstructionOperand * Output() const
InstructionCode opcode() const
const InstructionOperand * InputAt(size_t i) const
AddressingMode addressing_mode() const
CallDescriptor * GetIncomingDescriptor() const
Definition linkage.h:405
MachineRepresentation representation() const
static LocationOperand * cast(InstructionOperand *op)
static OutputFrameStateCombine Ignore()
#define ATOMIC_BINOP_CASE(op, inst)
Zone * zone_
T const result_
#define ASSEMBLE_IEEE754_UNOP(name)
Register const object_
#define ASSEMBLE_IEEE754_BINOP(name)
Register const value_
RecordWriteMode const mode_
#define ASSEMBLE_SIMD_PINSR(OPCODE, CPU_FEATURE)
Operand const operand_
#define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode)
Register const scratch1_
#define ASSEMBLE_BINOP(asm_instr)
#define ASSEMBLE_SIMD_ALL_TRUE(opcode)
XMMRegister const input_
#define ASSEMBLE_MOVX(mov_instr)
Register const scratch0_
#define ASSEMBLE_COMPARE(asm_instr)
#define ASSEMBLE_SIMD_SHIFT(opcode, width)
#define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, SSELevel, imm)
#define V8_ENABLE_LEAPTIERING_BOOL
Definition globals.h:151
#define V8_JS_LINKAGE_INCLUDES_DISPATCH_HANDLE_BOOL
Definition globals.h:161
int end
int32_t offset
Instruction * instr
ZoneVector< RpoNumber > & result
LiftoffRegister reg
int pc_offset
LiftoffRegList regs_to_save
uint32_t const mask
SetIsolateDataSlots
InstructionOperand source
InstructionOperand destination
v8::SourceLocation SourceLocation
int int32_t
Definition unicode.cc:40
signed_type NegateWithWraparound(signed_type a)
auto Reversed(T &t)
Definition iterator.h:105
uintptr_t Address
Definition memory.h:13
FloatBinopMask::For< FloatBinopOp::Kind::kSub, FloatRepresentation::Float64()> kFloat64Sub
Definition opmasks.h:205
FloatBinopMask::For< FloatBinopOp::Kind::kSub, FloatRepresentation::Float32()> kFloat32Sub
Definition opmasks.h:201
FloatBinopMask::For< FloatBinopOp::Kind::kMul, FloatRepresentation::Float32()> kFloat32Mul
Definition opmasks.h:203
FloatBinopMask::For< FloatBinopOp::Kind::kMul, FloatRepresentation::Float64()> kFloat64Mul
Definition opmasks.h:207
FloatUnaryMask::For< FloatUnaryOp::Kind::kAbs, FloatRepresentation::Float64()> kFloat64Abs
Definition opmasks.h:193
static bool HasImmediateInput(Instruction *instr, size_t index)
static bool HasRegisterInput(Instruction *instr, size_t index)
static Condition FlagsConditionToCondition(FlagsCondition condition)
void push(LiftoffAssembler *assm, LiftoffRegister reg, ValueKind kind, int padding=0)
constexpr DoubleRegister kFpReturnRegisters[]
constexpr Register kGpParamRegisters[]
constexpr DoubleRegister kFpParamRegisters[]
constexpr Register kGpReturnRegisters[]
constexpr Register kRootRegister
constexpr int kSimd128Size
Definition globals.h:706
V8_EXPORT_PRIVATE constexpr int ElementSizeInPointers(MachineRepresentation rep)
constexpr DoubleRegister kScratchDoubleReg
RegListBase< Register > RegList
Definition reglist-arm.h:14
constexpr int kFloatSize
Definition globals.h:406
Operand FieldOperand(Register object, int offset)
too high values may cause the compiler to set high thresholds for inlining to as much as possible avoid inlined allocation of objects that cannot escape trace load stores from virtual maglev objects use TurboFan fast string builder analyze liveness of environment slots and zap dead values trace TurboFan load elimination emit data about basic block usage in builtins to this enable builtin reordering when run mksnapshot flag for emit warnings when applying builtin profile data verify register allocation in TurboFan randomly schedule instructions to stress dependency tracking enable store store elimination in TurboFan rewrite far to near simulate GC compiler thread race related to allow float parameters to be passed in simulator mode JS Wasm Run additional turbo_optimize_inlined_js_wasm_wrappers enable experimental feedback collection in generic lowering enable Turboshaft s WasmLoadElimination enable Turboshaft s low level load elimination for JS enable Turboshaft s escape analysis for string concatenation use enable Turbolev features that we want to ship in the not too far future trace individual Turboshaft reduction steps trace intermediate Turboshaft reduction steps invocation count threshold for early optimization Enables optimizations which favor memory size over execution speed Enables sampling allocation profiler with X as a sample interval min size of a semi the new space consists of two semi spaces max size of the Collect garbage after Collect garbage after keeps maps alive for< n > old space garbage collections print one detailed trace line in allocation gc speed threshold for starting incremental marking via a task in percent of available threshold for starting incremental marking immediately in percent of available Use a single schedule for determining a marking schedule between JS and C objects schedules the minor GC task with kUserVisible priority max worker number of concurrent for NumberOfWorkerThreads start background threads that allocate memory concurrent_array_buffer_sweeping use parallel threads to clear weak refs in the atomic pause trace progress of the incremental marking trace object counts and memory usage report a tick only when allocated zone memory changes by this amount TracingFlags::gc_stats store(v8::tracing::TracingCategoryObserver::ENABLED_BY_NATIVE)) DEFINE_GENERIC_IMPLICATION(trace_gc_object_stats
Flag flags[]
Definition flags.cc:3797
too high values may cause the compiler to set high thresholds for inlining to as much as possible avoid inlined allocation of objects that cannot escape trace load stores from virtual maglev objects use TurboFan fast string builder analyze liveness of environment slots and zap dead values trace TurboFan load elimination emit data about basic block usage in builtins to this enable builtin reordering when run mksnapshot flag for emit warnings when applying builtin profile data verify register allocation in TurboFan randomly schedule instructions to stress dependency tracking enable store store elimination in TurboFan rewrite far to near simulate GC compiler thread race related to allow float parameters to be passed in simulator mode JS Wasm Run additional turbo_optimize_inlined_js_wasm_wrappers enable experimental feedback collection in generic lowering enable Turboshaft s WasmLoadElimination enable Turboshaft s low level load elimination for JS enable Turboshaft s escape analysis for string concatenation use enable Turbolev features that we want to ship in the not too far future trace individual Turboshaft reduction steps trace intermediate Turboshaft reduction steps invocation count threshold for early optimization Enables optimizations which favor memory size over execution speed Enables sampling allocation profiler with X as a sample interval min size of a semi the new space consists of two semi spaces max size of the Collect garbage after Collect garbage after keeps maps alive for< n > old space garbage collections print one detailed trace line in allocation gc speed threshold for starting incremental marking via a task in percent of available threshold for starting incremental marking immediately in percent of available Use a single schedule for determining a marking schedule between JS and C objects schedules the minor GC task with kUserVisible priority max worker number of concurrent for NumberOfWorkerThreads start background threads that allocate memory concurrent_array_buffer_sweeping use parallel threads to clear weak refs in the atomic pause trace progress of the incremental marking trace object counts and memory usage report a tick only when allocated zone memory changes by this amount TracingFlags::gc_stats TracingFlags::gc_stats track native contexts that are expected to be garbage collected verify heap pointers before and after GC memory reducer runs GC with ReduceMemoryFootprint flag Maximum number of memory reducer GCs scheduled Old gen GC speed is computed directly from gc tracer counters Perform compaction on full GCs based on V8 s default heuristics Perform compaction on every full GC Perform code space compaction when finalizing a full GC with stack Stress GC compaction to flush out bugs with moving objects flush of baseline code when it has not been executed recently Use time base code flushing instead of age Use a progress bar to scan large objects in increments when incremental marking is active force incremental marking for small heaps and run it more often force marking at random points between and force scavenge at random points between and reclaim otherwise unreachable unmodified wrapper objects when possible less compaction in non memory reducing mode use high priority threads for concurrent Marking Test mode only flag It allows an unit test to select evacuation candidates use incremental marking for CppHeap cppheap_concurrent_marking c value for membalancer A special constant to balance between memory and space tradeoff The smaller the more memory it uses enable use of SSE4 instructions if available enable use of AVX VNNI instructions if available enable use of POPCNT instruction if available force all emitted branches to be in long mode(MIPS/PPC only)") DEFINE_BOOL(partial_constant_pool
constexpr int kSystemPointerSize
Definition globals.h:410
constexpr bool IsFloatingPoint(MachineRepresentation rep)
constexpr Register kReturnRegister0
constexpr Register kWasmImplicitArgRegister
V8_EXPORT_PRIVATE bool AreAliased(const CPURegister &reg1, const CPURegister &reg2, const CPURegister &reg3=NoReg, const CPURegister &reg4=NoReg, const CPURegister &reg5=NoReg, const CPURegister &reg6=NoReg, const CPURegister &reg7=NoReg, const CPURegister &reg8=NoReg)
V8_EXPORT_PRIVATE FlagValues v8_flags
constexpr Register kJavaScriptCallCodeStartRegister
return value
Definition map-inl.h:893
constexpr int kDoubleSize
Definition globals.h:407
const uint32_t kClearedWeakHeapObjectLower32
Definition globals.h:981
static int FrameSlotToFPOffset(int slot)
Local< T > Handle
uint32_t test
BodyGen *const gen_
BodyGen * gen
ro::BitSet tagged_slots
#define shr(value, bits)
Definition sha-256.cc:31
#define ror(value, bits)
Definition sha-256.cc:30
#define CHECK(condition)
Definition logging.h:124
#define CHECK_LE(lhs, rhs)
#define DCHECK_IMPLIES(v1, v2)
Definition logging.h:493
#define DCHECK_NE(v1, v2)
Definition logging.h:486
#define DCHECK_GE(v1, v2)
Definition logging.h:488
#define DCHECK(condition)
Definition logging.h:482
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
constexpr bool IsAligned(T value, U alignment)
Definition macros.h:403
#define arraysize(array)
Definition macros.h:67
uint64_t make_uint64(uint32_t high, uint32_t low)
Definition macros.h:365