112 using MacroAssemblerBase::MacroAssemblerBase;
114 void Move(
Register dst, uint32_t src);
131 template <
typename Op>
133 uint32_t* load_pc_offset =
nullptr) {
134 PinsrHelper(
this, &Assembler::vpinsrb, &Assembler::pinsrb, dst, src1, src2,
135 imm8, load_pc_offset, {SSE4_1});
138 template <
typename Op>
140 uint32_t* load_pc_offset =
nullptr) {
141 PinsrHelper(
this, &Assembler::vpinsrw, &Assembler::pinsrw, dst, src1, src2,
142 imm8, load_pc_offset);
146 template <
typename Op>
148 if (CpuFeatures::IsSupported(AVX)) {
150 vpshufb(dst, src,
mask);
162 template <
typename Op>
164 Pshufb(dst, dst,
mask);
173 template <
typename Dst,
typename Arg,
typename... Args>
176 std::optional<CpuFeature> feature = std::nullopt;
181 template <void (
Assembler::*avx)(Dst, Dst, Arg, Args...),
182 void (
Assembler::*no_avx)(Dst, Arg, Args...)>
184 if (CpuFeatures::IsSupported(AVX)) {
186 (assm->*avx)(dst, dst, arg,
args...);
187 }
else if (feature.has_value()) {
188 DCHECK(CpuFeatures::IsSupported(*feature));
190 (assm->*no_avx)(dst, arg,
args...);
192 (assm->*no_avx)(dst, arg,
args...);
200 template <void (
Assembler::*avx)(Dst, Arg, Args...),
203 if (CpuFeatures::IsSupported(AVX)) {
205 (assm->*avx)(dst, arg,
args...);
206 }
else if (feature.has_value()) {
208 DCHECK(CpuFeatures::IsSupported(*feature));
210 (assm->*no_avx)(dst,
args...);
213 (assm->*no_avx)(dst,
args...);
220 template <void (
Assembler::*avx)(Dst, Arg, Args...),
221 void (
Assembler::*no_avx)(Dst, Arg, Args...)>
223 if (CpuFeatures::IsSupported(AVX)) {
225 (assm->*avx)(dst, arg,
args...);
226 }
else if (feature.has_value()) {
227 DCHECK(CpuFeatures::IsSupported(*feature));
229 (assm->*no_avx)(dst, arg,
args...);
231 (assm->*no_avx)(dst, arg,
args...);
236#define AVX_OP(macro_name, name) \
237 template <typename Dst, typename Arg, typename... Args> \
238 void macro_name(Dst dst, Arg arg, Args... args) { \
239 AvxHelper<Dst, Arg, Args...>{this} \
240 .template emit<&Assembler::v##name, &Assembler::name>(dst, arg, \
255#define AVX_OP_WITH_DIFF_SSE_INSTR(macro_name, avx_name, sse_name) \
256 template <typename Dst, typename Arg, typename... Args> \
257 void macro_name(Dst dst, Arg arg, Args... args) { \
258 AvxHelper<Dst, Arg, Args...>{this} \
259 .template emit<&Assembler::v##avx_name, &Assembler::sse_name>( \
260 dst, arg, args...); \
263#define AVX_OP_SSE3(macro_name, name) \
264 template <typename Dst, typename Arg, typename... Args> \
265 void macro_name(Dst dst, Arg arg, Args... args) { \
266 AvxHelper<Dst, Arg, Args...>{this, std::optional<CpuFeature>(SSE3)} \
267 .template emit<&Assembler::v##name, &Assembler::name>(dst, arg, \
271#define AVX_OP_SSSE3(macro_name, name) \
272 template <typename Dst, typename Arg, typename... Args> \
273 void macro_name(Dst dst, Arg arg, Args... args) { \
274 AvxHelper<Dst, Arg, Args...>{this, std::optional<CpuFeature>(SSSE3)} \
275 .template emit<&Assembler::v##name, &Assembler::name>(dst, arg, \
279#define AVX_OP_SSE4_1(macro_name, name) \
280 template <typename Dst, typename Arg, typename... Args> \
281 void macro_name(Dst dst, Arg arg, Args... args) { \
282 AvxHelper<Dst, Arg, Args...>{this, std::optional<CpuFeature>(SSE4_1)} \
283 .template emit<&Assembler::v##name, &Assembler::name>(dst, arg, \
287#define AVX_OP_SSE4_2(macro_name, name) \
288 template <typename Dst, typename Arg, typename... Args> \
289 void macro_name(Dst dst, Arg arg, Args... args) { \
290 AvxHelper<Dst, Arg, Args...>{this, std::optional<CpuFeature>(SSE4_2)} \
291 .template emit<&Assembler::v##name, &Assembler::name>(dst, arg, \
310 AVX_OP(Cmpneqpd, cmpneqpd)
311 AVX_OP(Cmpneqps, cmpneqps)
312 AVX_OP(Cmpunordpd, cmpunordpd)
313 AVX_OP(Cmpunordps, cmpunordps)
314 AVX_OP(Cvtdq2pd, cvtdq2pd)
315 AVX_OP(Cvtdq2ps, cvtdq2ps)
316 AVX_OP(Cvtpd2ps, cvtpd2ps)
317 AVX_OP(Cvtps2pd, cvtps2pd)
318 AVX_OP(Cvtsd2ss, cvtsd2ss)
319 AVX_OP(Cvtss2sd, cvtss2sd)
320 AVX_OP(Cvttpd2dq, cvttpd2dq)
321 AVX_OP(Cvttps2dq, cvttps2dq)
322 AVX_OP(Cvttsd2si, cvttsd2si)
323 AVX_OP(Cvttss2si, cvttss2si)
337 AVX_OP(Movmskpd, movmskpd)
338 AVX_OP(Movmskps, movmskps)
349 AVX_OP(Packssdw, packssdw)
350 AVX_OP(Packsswb, packsswb)
351 AVX_OP(Packuswb, packuswb)
373 AVX_OP(Pmovmskb, pmovmskb)
395 AVX_OP(Punpckhbw, punpckhbw)
396 AVX_OP(Punpckhdq, punpckhdq)
397 AVX_OP(Punpckhqdq, punpckhqdq)
398 AVX_OP(Punpckhwd, punpckhwd)
399 AVX_OP(Punpcklbw, punpcklbw)
400 AVX_OP(Punpckldq, punpckldq)
401 AVX_OP(Punpcklqdq, punpcklqdq)
402 AVX_OP(Punpcklwd, punpcklwd)
415 AVX_OP(Unpcklps, unpcklps)
478 void F64x2ExtractLane(DoubleRegister dst, XMMRegister src, uint8_t lane);
479 void F64x2ReplaceLane(XMMRegister dst, XMMRegister src, DoubleRegister rep,
481 void F64x2Min(XMMRegister dst, XMMRegister lhs, XMMRegister rhs,
482 XMMRegister scratch);
483 void F64x2Max(XMMRegister dst, XMMRegister lhs, XMMRegister rhs,
484 XMMRegister scratch);
485 void F32x4Splat(XMMRegister dst, DoubleRegister src);
486 void F32x4ExtractLane(FloatRegister dst, XMMRegister src, uint8_t lane);
487 void F32x4Min(XMMRegister dst, XMMRegister lhs, XMMRegister rhs,
488 XMMRegister scratch);
489 void F32x4Max(XMMRegister dst, XMMRegister lhs, XMMRegister rhs,
490 XMMRegister scratch);
491 void S128Store32Lane(Operand dst, XMMRegister src, uint8_t laneidx);
492 void I8x16Splat(XMMRegister dst, Register src, XMMRegister scratch);
493 void I8x16Splat(XMMRegister dst, Operand src, XMMRegister scratch);
494 void I8x16Shl(XMMRegister dst, XMMRegister src1, uint8_t src2, Register tmp1,
496 void I8x16Shl(XMMRegister dst, XMMRegister src1, Register src2, Register tmp1,
497 XMMRegister tmp2, XMMRegister tmp3);
498 void I8x16ShrS(XMMRegister dst, XMMRegister src1, uint8_t src2,
500 void I8x16ShrS(XMMRegister dst, XMMRegister src1, Register src2,
501 Register tmp1, XMMRegister tmp2, XMMRegister tmp3);
502 void I8x16ShrU(XMMRegister dst, XMMRegister src1, uint8_t src2, Register tmp1,
504 void I8x16ShrU(XMMRegister dst, XMMRegister src1, Register src2,
505 Register tmp1, XMMRegister tmp2, XMMRegister tmp3);
506 void I16x8Splat(XMMRegister dst, Register src);
507 void I16x8Splat(XMMRegister dst, Operand src);
508 void I16x8ExtMulLow(XMMRegister dst, XMMRegister src1, XMMRegister src2,
509 XMMRegister scrat,
bool is_signed);
510 void I16x8ExtMulHighS(XMMRegister dst, XMMRegister src1, XMMRegister src2,
511 XMMRegister scratch);
512 void I16x8ExtMulHighU(XMMRegister dst, XMMRegister src1, XMMRegister src2,
513 XMMRegister scratch);
514 void I16x8SConvertI8x16High(XMMRegister dst, XMMRegister src);
515 void I16x8UConvertI8x16High(XMMRegister dst, XMMRegister src,
516 XMMRegister scratch);
518 void I16x8Q15MulRSatS(XMMRegister dst, XMMRegister src1, XMMRegister src2,
519 XMMRegister scratch);
520 void I16x8DotI8x16I7x16S(XMMRegister dst, XMMRegister src1, XMMRegister src2);
521 void I32x4DotI8x16I7x16AddS(XMMRegister dst, XMMRegister src1,
522 XMMRegister src2, XMMRegister src3,
523 XMMRegister scratch, XMMRegister splat_reg);
524 void I32x4ExtAddPairwiseI16x8U(XMMRegister dst, XMMRegister src,
527 void I32x4ExtMul(XMMRegister dst, XMMRegister src1, XMMRegister src2,
528 XMMRegister scratch,
bool low,
bool is_signed);
529 void I32x4SConvertI16x8High(XMMRegister dst, XMMRegister src);
530 void I32x4UConvertI16x8High(XMMRegister dst, XMMRegister src,
531 XMMRegister scratch);
532 void I64x2Neg(XMMRegister dst, XMMRegister src, XMMRegister scratch);
533 void I64x2Abs(XMMRegister dst, XMMRegister src, XMMRegister scratch);
534 void I64x2GtS(XMMRegister dst, XMMRegister src0, XMMRegister src1,
535 XMMRegister scratch);
536 void I64x2GeS(XMMRegister dst, XMMRegister src0, XMMRegister src1,
537 XMMRegister scratch);
538 void I64x2ShrS(XMMRegister dst, XMMRegister src, uint8_t shift,
539 XMMRegister xmm_tmp);
540 void I64x2ShrS(XMMRegister dst, XMMRegister src, Register shift,
541 XMMRegister xmm_tmp, XMMRegister xmm_shift,
543 void I64x2Mul(XMMRegister dst, XMMRegister lhs, XMMRegister rhs,
544 XMMRegister tmp1, XMMRegister tmp2);
545 void I64x2ExtMul(XMMRegister dst, XMMRegister src1, XMMRegister src2,
546 XMMRegister scratch,
bool low,
bool is_signed);
547 void I64x2SConvertI32x4High(XMMRegister dst, XMMRegister src);
548 void I64x2UConvertI32x4High(XMMRegister dst, XMMRegister src,
549 XMMRegister scratch);
550 void S128Not(XMMRegister dst, XMMRegister src, XMMRegister scratch);
552 void S128Select(XMMRegister dst, XMMRegister
mask, XMMRegister src1,
553 XMMRegister src2, XMMRegister scratch);
554 void S128Load8Splat(XMMRegister dst, Operand src, XMMRegister scratch);
555 void S128Load16Splat(XMMRegister dst, Operand src, XMMRegister scratch);
556 void S128Load32Splat(XMMRegister dst, Operand src);
557 void S128Store64Lane(Operand dst, XMMRegister src, uint8_t laneidx);
559 void F64x2Qfma(XMMRegister dst, XMMRegister src1, XMMRegister src2,
560 XMMRegister src3, XMMRegister tmp);
561 void F64x2Qfms(XMMRegister dst, XMMRegister src1, XMMRegister src2,
562 XMMRegister src3, XMMRegister tmp);
563 void F32x4Qfma(XMMRegister dst, XMMRegister src1, XMMRegister src2,
564 XMMRegister src3, XMMRegister tmp);
565 void F32x4Qfms(XMMRegister dst, XMMRegister src1, XMMRegister src2,
566 XMMRegister src3, XMMRegister tmp);
569 template <
typename Op>
571 template <
typename Op>
574 template <
typename Op>
577 uint32_t* load_pc_offset =
nullptr,
578 std::optional<CpuFeature> feature = std::nullopt) {
579 if (CpuFeatures::IsSupported(AVX)) {
581 if (load_pc_offset) *load_pc_offset = assm->
pc_offset();
582 (assm->*avx)(dst, src1, src2, imm8);
586 if (dst != src1) assm->
movaps(dst, src1);
587 if (load_pc_offset) *load_pc_offset = assm->
pc_offset();
588 if (feature.has_value()) {
589 DCHECK(CpuFeatures::IsSupported(*feature));
591 (assm->*noavx)(dst, src2, imm8);
593 (assm->*noavx)(dst, src2, imm8);
598 template <
typename Op>
600 template <
typename Op>