11#if V8_TARGET_ARCH_IA32
13#elif V8_TARGET_ARCH_X64
16#error Unsupported target architecture.
21#if V8_TARGET_ARCH_IA32
22#define DCHECK_OPERAND_IS_NOT_REG(op) DCHECK(!op.is_reg_only());
24#define DCHECK_OPERAND_IS_NOT_REG(op)
32#if V8_TARGET_ARCH_IA32
34#elif V8_TARGET_ARCH_X64
37#error Unsupported target architecture.
44#if V8_TARGET_ARCH_IA32
46#elif V8_TARGET_ARCH_X64
49#error Unsupported target architecture.
56#if V8_TARGET_ARCH_IA32
58#elif V8_TARGET_ARCH_X64
61#error Unsupported target architecture.
67#if V8_TARGET_ARCH_IA32
69#elif V8_TARGET_ARCH_X64
70 if (is_uint32(src.value())) {
76#error Unsupported target architecture.
148 vshufps(dst, src1, src2, imm8);
209 vminps(scratch, lhs, rhs);
210 vminps(dst, rhs, lhs);
211 }
else if (dst == lhs || dst == rhs) {
225 Cmpunordps(dst, dst, scratch);
227 Psrld(dst, dst, uint8_t{10});
228 Andnps(dst, dst, scratch);
238 vmaxps(scratch, lhs, rhs);
239 vmaxps(dst, rhs, lhs);
240 }
else if (dst == lhs || dst == rhs) {
256 Subps(scratch, scratch, dst);
258 Cmpunordps(dst, dst, scratch);
259 Psrld(dst, dst, uint8_t{10});
260 Andnps(dst, dst, scratch);
270 vminpd(scratch, lhs, rhs);
271 vminpd(dst, rhs, lhs);
273 vorpd(scratch, scratch, dst);
275 vcmpunordpd(dst, dst, scratch);
276 vorpd(scratch, scratch, dst);
277 vpsrlq(dst, dst, uint8_t{13});
278 vandnpd(dst, dst, scratch);
282 if (dst == lhs || dst == rhs) {
294 cmpunordpd(dst, scratch);
296 psrlq(dst, uint8_t{13});
297 andnpd(dst, scratch);
308 vmaxpd(scratch, lhs, rhs);
309 vmaxpd(dst, rhs, lhs);
311 vxorpd(dst, dst, scratch);
313 vorpd(scratch, scratch, dst);
315 vsubpd(scratch, scratch, dst);
317 vcmpunordpd(dst, dst, scratch);
318 vpsrlq(dst, dst, uint8_t{13});
319 vandnpd(dst, dst, scratch);
321 if (dst == lhs || dst == rhs) {
335 cmpunordpd(dst, scratch);
336 psrlq(dst, uint8_t{13});
337 andnpd(dst, scratch);
369 }
else if (lane == 1) {
371 }
else if (lane == 2 && dst == src) {
374 }
else if (dst == src) {
375 Shufps(dst, src, src, lane);
377 Pshufd(dst, src, lane);
388 Extractps(dst, src, laneidx);
392template <
typename Op>
399 Xorps(scratch, scratch);
409 vpbroadcastb(dst, scratch);
421 vpbroadcastb(dst, src);
438 uint8_t
shift = truncate_to_int3(src2);
439 Psllw(dst, src1, uint8_t{
shift});
441 uint8_t bmask =
static_cast<uint8_t
>(0xff <<
shift);
442 uint32_t
mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask;
445 Pshufd(tmp2, tmp2, uint8_t{0});
463 Psrlw(tmp2, tmp2, tmp3);
464 Packuswb(tmp2, tmp2);
470 Pand(dst, src1, tmp2);
473 Psllw(dst, dst, tmp3);
481 uint8_t
shift = truncate_to_int3(src2) + 8;
483 Punpckhbw(tmp, src1);
484 Punpcklbw(dst, src1);
498 Punpckhbw(tmp2, src1);
499 Punpcklbw(dst, src1);
522 uint8_t
shift = truncate_to_int3(src2);
523 Psrlw(dst, src1,
shift);
525 uint8_t bmask = 0xff >>
shift;
526 uint32_t
mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask;
529 Pshufd(tmp2, tmp2, uint8_t{0});
541 Punpckhbw(tmp2, src1);
542 Punpcklbw(dst, src1);
554template <
typename Op>
558 Pshuflw(dst, dst, uint8_t{0x0});
559 Punpcklqdq(dst, dst);
567 vpbroadcastw(dst, dst);
578 vpbroadcastw(dst, src);
589 is_signed ? Pmovsxbw(scratch, src1) : Pmovzxbw(scratch, src1);
590 is_signed ? Pmovsxbw(dst, src2) : Pmovzxbw(dst, src2);
591 Pmullw(dst, scratch);
601 vpunpckhbw(scratch, src1, src1);
602 vpsraw(scratch, scratch, 8);
603 vpunpckhbw(dst, src2, src2);
605 vpmullw(dst, dst, scratch);
613 punpckhbw(scratch, scratch);
615 pmullw(dst, scratch);
629 vpxor(scratch, scratch, scratch);
630 vpunpckhbw(dst, src1, scratch);
631 vpmullw(dst, dst, dst);
635 std::swap(src1, src2);
637 vpxor(scratch, scratch, scratch);
638 vpunpckhbw(dst, src1, scratch);
639 vpunpckhbw(scratch, src2, scratch);
640 vpmullw(dst, dst, scratch);
644 xorps(scratch, scratch);
648 punpckhbw(dst, scratch);
649 pmullw(dst, scratch);
655 std::swap(src1, src2);
657 }
else if (dst != src1) {
661 xorps(scratch, scratch);
662 punpckhbw(dst, scratch);
663 punpckhbw(scratch, src2);
665 pmullw(dst, scratch);
677 vpunpckhbw(dst, src, src);
703 vpxor(tmp, tmp, tmp);
704 vpunpckhbw(dst, src, tmp);
709 xorps(scratch, scratch);
710 punpckhbw(dst, scratch);
725 Pcmpeqd(scratch, scratch);
726 Psllw(scratch, scratch, uint8_t{15});
733 Pmulhrsw(dst, src1, src2);
734 Pcmpeqw(scratch, dst);
744 vpmaddubsw(dst, src2, src1);
749 pmaddubsw(dst, src1);
757#if V8_TARGET_ARCH_X64
784 Pcmpeqd(splat_reg, splat_reg);
785 Psrlw(splat_reg, splat_reg, uint8_t{15});
789 vpmaddubsw(scratch, src2, src1);
792 pmaddubsw(scratch, src1);
794 Pmaddwd(scratch, splat_reg);
815 vpaddd(dst, tmp, dst);
831 psrld(tmp, uint8_t{16});
838 psrld(dst, uint8_t{16});
854 vpmullw(scratch, src1, src2);
855 is_signed ? vpmulhw(dst, src1, src2) : vpmulhuw(dst, src1, src2);
856 low ? vpunpcklwd(dst, scratch, dst) : vpunpckhwd(dst, scratch, dst);
861 is_signed ? pmulhw(scratch, src2) : pmulhuw(scratch, src2);
862 low ? punpcklwd(dst, scratch) : punpckhwd(dst, scratch);
873 vpunpckhwd(dst, src, src);
899 vpxor(tmp, tmp, tmp);
900 vpunpckhwd(dst, src, tmp);
904 xorps(scratch, scratch);
905 punpckhwd(dst, scratch);
920 vpxor(scratch, scratch, scratch);
921 vpsubq(dst, scratch, src);
925 std::swap(src, scratch);
938 vpxor(tmp, tmp, tmp);
939 vpsubq(tmp, tmp, src);
963 }
else if (dst == src1) {
978 pcmpeqd(scratch, src1);
981 pcmpgtd(scratch, src1);
993 vpcmpeqd(scratch, scratch, scratch);
994 vpxor(dst, dst, scratch);
1002 pcmpeqd(scratch, scratch);
1003 xorps(dst, scratch);
1011 pcmpeqd(scratch, src0);
1012 andps(dst, scratch);
1014 pcmpgtd(scratch, src0);
1017 pcmpeqd(scratch, scratch);
1018 xorps(dst, scratch);
1038 Pcmpeqd(xmm_tmp, xmm_tmp);
1039 Psllq(xmm_tmp, uint8_t{63});
1047 Pxor(dst, src, xmm_tmp);
1050 Psrlq(xmm_tmp,
shift);
1052 Psubq(dst, xmm_tmp);
1067 Pcmpeqd(xmm_tmp, xmm_tmp);
1068 Psllq(xmm_tmp, uint8_t{63});
1073 Movd(xmm_shift, tmp_shift);
1079 Pxor(dst, src, xmm_tmp);
1080 Psrlq(dst, xmm_shift);
1081 Psrlq(xmm_tmp, xmm_shift);
1082 Psubq(dst, xmm_tmp);
1096 vpsrlq(tmp1, lhs, uint8_t{32});
1097 vpmuludq(tmp1, tmp1, rhs);
1099 vpsrlq(tmp2, rhs, uint8_t{32});
1100 vpmuludq(tmp2, tmp2, lhs);
1102 vpaddq(tmp2, tmp2, tmp1);
1103 vpsllq(tmp2, tmp2, uint8_t{32});
1105 vpmuludq(dst, lhs, rhs);
1107 vpaddq(dst, dst, tmp2);
1112 psrlq(tmp1, uint8_t{32});
1114 psrlq(tmp2, uint8_t{32});
1117 psllq(tmp2, uint8_t{32});
1143 vpunpckldq(scratch, src1, src1);
1144 vpunpckldq(dst, src2, src2);
1146 vpunpckhdq(scratch, src1, src1);
1147 vpunpckhdq(dst, src2, src2);
1150 vpmuldq(dst, scratch, dst);
1152 vpmuludq(dst, scratch, dst);
1155 uint8_t
mask = low ? 0x50 : 0xFA;
1160 pmuldq(dst, scratch);
1162 pmuludq(dst, scratch);
1172 vpunpckhqdq(dst, src, src);
1173 vpmovsxdq(dst, dst);
1191 vpxor(scratch, scratch, scratch);
1192 vpunpckhdq(dst, src, scratch);
1196 xorps(scratch, scratch);
1197 punpckhdq(dst, scratch);
1211 Pcmpeqd(scratch, scratch);
1227 vpandn(scratch,
mask, src2);
1228 vpand(dst, src1,
mask);
1229 vpor(dst, dst, scratch);
1234 andnps(scratch, src2);
1248 vpbroadcastb(dst, src);
1252 vpinsrb(dst, scratch, src, uint8_t{0});
1253 vpxor(scratch, scratch, scratch);
1254 vpshufb(dst, dst, scratch);
1257 pinsrb(dst, src, uint8_t{0});
1258 xorps(scratch, scratch);
1259 pshufb(dst, scratch);
1271 vpbroadcastw(dst, src);
1275 vpinsrw(dst, scratch, src, uint8_t{0});
1277 vpunpcklqdq(dst, dst, dst);
1279 pinsrw(dst, src, uint8_t{0});
1280 pshuflw(dst, dst, uint8_t{0});
1295 shufps(dst, dst, uint8_t{0});
1339#undef DCHECK_OPERAND_IS_NOT_REG
void vpblendvb(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister mask)
void vpsrad(XMMRegister dst, XMMRegister src, uint8_t imm8)
void and_(Register dst, Register src1, const Operand &src2, SBit s=LeaveCC, Condition cond=al)
void movlhps(XMMRegister dst, XMMRegister src)
void movss(XMMRegister dst, Operand src)
void vblendvpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister mask)
void vpshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle)
void psllq(XMMRegister reg, uint8_t shift)
void psraw(XMMRegister reg, uint8_t shift)
void vpsraw(XMMRegister dst, XMMRegister src, uint8_t imm8)
void vmovlps(XMMRegister dst, XMMRegister src1, Operand src2)
void vmovsd(Operand dst, XMMRegister src)
void pd(uint8_t op, XMMRegister dst, Operand src)
void psrlq(XMMRegister reg, uint8_t shift)
void vpsrld(XMMRegister dst, XMMRegister src, uint8_t imm8)
void vpsrlq(XMMRegister dst, XMMRegister src, uint8_t imm8)
void vbroadcastss(XMMRegister dst, XMMRegister src)
void vmovhps(XMMRegister dst, XMMRegister src1, Operand src2)
void add(Register dst, Register src1, const Operand &src2, SBit s=LeaveCC, Condition cond=al)
void vpinsrb(XMMRegister dst, XMMRegister src1, Register src2, uint8_t offset)
void vblendvps(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister mask)
void ps(uint8_t op, XMMRegister dst, Operand src)
void shufps(XMMRegister dst, XMMRegister src, uint8_t imm8)
void vpinsrw(XMMRegister dst, XMMRegister src1, Register src2, uint8_t offset)
void movdqa(XMMRegister dst, Operand src)
void pinsrw(XMMRegister dst, Register src, uint8_t offset)
void psrad(XMMRegister reg, uint8_t shift)
void pcmpgtq(XMMRegister dst, XMMRegister src)
void psrld(XMMRegister reg, uint8_t shift)
void pblendw(XMMRegister dst, XMMRegister src, uint8_t mask)
void pinsrb(XMMRegister dst, Register src, uint8_t offset)
void shift(Operand dst, Immediate shift_amount, int subcode, int size)
void vpsllq(XMMRegister dst, XMMRegister src, uint8_t imm8)
void movsd(XMMRegister dst, XMMRegister src)
void movaps(XMMRegister dst, XMMRegister src)
void vpcmpgtq(XMMRegister dst, XMMRegister src1, XMMRegister src2)
void mov(Register dst, const Operand &src, SBit s=LeaveCC, Condition cond=al)
void vpdpbssd(XMMRegister dst, XMMRegister src1, XMMRegister src2)
void movshdup(XMMRegister dst, XMMRegister src)
void vshufps(XMMRegister dst, XMMRegister src1, XMMRegister src2, uint8_t imm8)
void psrlw(XMMRegister reg, uint8_t shift)
void vpblendw(XMMRegister dst, XMMRegister src1, XMMRegister src2, uint8_t mask)
void movhps(XMMRegister dst, Operand src)
void movl(Operand dst, Label *src)
void movhlps(XMMRegister dst, XMMRegister src)
void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle)
void vmovhlps(XMMRegister dst, XMMRegister src1, XMMRegister src2)
void vmovlhps(XMMRegister dst, XMMRegister src1, XMMRegister src2)
void movlps(XMMRegister dst, Operand src)
void movq(XMMRegister dst, Operand src)
void pshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle)
void vpdpbusd(XMMRegister dst, XMMRegister src1, XMMRegister src2)
static bool IsSupported(CpuFeature f)
void S128Load16Splat(XMMRegister dst, Operand src, XMMRegister scratch)
void F32x4Qfms(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister src3, XMMRegister tmp)
void Move(Register dst, uint32_t src)
void F32x4Splat(XMMRegister dst, DoubleRegister src)
void I16x8UConvertI8x16High(XMMRegister dst, XMMRegister src, XMMRegister scratch)
void F32x4Max(XMMRegister dst, XMMRegister lhs, XMMRegister rhs, XMMRegister scratch)
void I32x4UConvertI16x8High(XMMRegister dst, XMMRegister src, XMMRegister scratch)
void I16x8ExtMulLow(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister scrat, bool is_signed)
void I16x8SplatPreAvx2(XMMRegister dst, Op src)
void I64x2GeS(XMMRegister dst, XMMRegister src0, XMMRegister src1, XMMRegister scratch)
void I64x2GtS(XMMRegister dst, XMMRegister src0, XMMRegister src1, XMMRegister scratch)
void I64x2Abs(XMMRegister dst, XMMRegister src, XMMRegister scratch)
void Add(Register dst, Immediate src)
void F64x2Min(XMMRegister dst, XMMRegister lhs, XMMRegister rhs, XMMRegister scratch)
void F64x2Qfms(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister src3, XMMRegister tmp)
void Pblendvb(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister mask)
void S128Not(XMMRegister dst, XMMRegister src, XMMRegister scratch)
void I16x8SConvertI8x16High(XMMRegister dst, XMMRegister src)
void I16x8ExtMulHighS(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister scratch)
void S128Store64Lane(Operand dst, XMMRegister src, uint8_t laneidx)
void I32x4DotI8x16I7x16AddS(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister src3, XMMRegister scratch, XMMRegister splat_reg)
void Blendvpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister mask)
void Blendvps(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister mask)
void I8x16SplatPreAvx2(XMMRegister dst, Op src, XMMRegister scratch)
void S128Load8Splat(XMMRegister dst, Operand src, XMMRegister scratch)
void I8x16ShrU(XMMRegister dst, XMMRegister src1, uint8_t src2, Register tmp1, XMMRegister tmp2)
void F32x4ExtractLane(FloatRegister dst, XMMRegister src, uint8_t lane)
void Movhps(XMMRegister dst, XMMRegister src1, Operand src2)
void F32x4Min(XMMRegister dst, XMMRegister lhs, XMMRegister rhs, XMMRegister scratch)
void F64x2ReplaceLane(XMMRegister dst, XMMRegister src, DoubleRegister rep, uint8_t lane)
void And(Register dst, Immediate src)
void I32x4SConvertI16x8High(XMMRegister dst, XMMRegister src)
void I64x2ShrS(XMMRegister dst, XMMRegister src, uint8_t shift, XMMRegister xmm_tmp)
void Movlps(XMMRegister dst, XMMRegister src1, Operand src2)
void I8x16Splat(XMMRegister dst, Register src, XMMRegister scratch)
void I64x2ExtMul(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister scratch, bool low, bool is_signed)
void Pshufb(XMMRegister dst, XMMRegister src, Op mask)
void I64x2SConvertI32x4High(XMMRegister dst, XMMRegister src)
void I32x4ExtMul(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister scratch, bool low, bool is_signed)
void F64x2Qfma(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister src3, XMMRegister tmp)
void I8x16Shl(XMMRegister dst, XMMRegister src1, uint8_t src2, Register tmp1, XMMRegister tmp2)
void I16x8Q15MulRSatS(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister scratch)
void I16x8DotI8x16I7x16S(XMMRegister dst, XMMRegister src1, XMMRegister src2)
void S128Select(XMMRegister dst, XMMRegister mask, XMMRegister src1, XMMRegister src2, XMMRegister scratch)
void S128Load32Splat(XMMRegister dst, Operand src)
void Shufps(XMMRegister dst, XMMRegister src1, XMMRegister src2, uint8_t imm8)
void I8x16ShrS(XMMRegister dst, XMMRegister src1, uint8_t src2, XMMRegister tmp)
void I64x2Neg(XMMRegister dst, XMMRegister src, XMMRegister scratch)
void I64x2UConvertI32x4High(XMMRegister dst, XMMRegister src, XMMRegister scratch)
void F64x2Max(XMMRegister dst, XMMRegister lhs, XMMRegister rhs, XMMRegister scratch)
void I16x8Splat(XMMRegister dst, Register src)
void F32x4Qfma(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister src3, XMMRegister tmp)
void I16x8ExtMulHighU(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister scratch)
void I32x4ExtAddPairwiseI16x8U(XMMRegister dst, XMMRegister src, XMMRegister tmp)
void S128Store32Lane(Operand dst, XMMRegister src, uint8_t laneidx)
void F64x2ExtractLane(DoubleRegister dst, XMMRegister src, uint8_t lane)
void I64x2Mul(XMMRegister dst, XMMRegister lhs, XMMRegister rhs, XMMRegister tmp1, XMMRegister tmp2)
#define ASM_CODE_COMMENT(asm)
#define DCHECK_OPERAND_IS_NOT_REG(op)
V8_EXPORT_PRIVATE bool AreAliased(const CPURegister ®1, const CPURegister ®2, const CPURegister ®3=NoReg, const CPURegister ®4=NoReg, const CPURegister ®5=NoReg, const CPURegister ®6=NoReg, const CPURegister ®7=NoReg, const CPURegister ®8=NoReg)
bool is_signed(Condition cond)
#define DCHECK_NE(v1, v2)
#define DCHECK_GE(v1, v2)
#define DCHECK(condition)
#define DCHECK_LT(v1, v2)
#define DCHECK_EQ(v1, v2)
#define DCHECK_GT(v1, v2)