v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
macro-assembler-shared-ia32-x64.cc
Go to the documentation of this file.
1// Copyright 2021 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
6
10
11#if V8_TARGET_ARCH_IA32
13#elif V8_TARGET_ARCH_X64
15#else
16#error Unsupported target architecture.
17#endif
18
19// Operand on IA32 can be a wrapper for a single register, in which case they
20// should call I8x16Splat |src| being Register.
21#if V8_TARGET_ARCH_IA32
22#define DCHECK_OPERAND_IS_NOT_REG(op) DCHECK(!op.is_reg_only());
23#else
24#define DCHECK_OPERAND_IS_NOT_REG(op)
25#endif
26
27namespace v8 {
28namespace internal {
29
31 // Helper to paper over the different assembler function names.
32#if V8_TARGET_ARCH_IA32
33 mov(dst, Immediate(src));
34#elif V8_TARGET_ARCH_X64
35 movl(dst, Immediate(src));
36#else
37#error Unsupported target architecture.
38#endif
39}
40
42 // Helper to paper over the different assembler function names.
43 if (dst != src) {
44#if V8_TARGET_ARCH_IA32
45 mov(dst, src);
46#elif V8_TARGET_ARCH_X64
47 movq(dst, src);
48#else
49#error Unsupported target architecture.
50#endif
51 }
52}
53
55 // Helper to paper over the different assembler function names.
56#if V8_TARGET_ARCH_IA32
57 add(dst, src);
58#elif V8_TARGET_ARCH_X64
59 addq(dst, src);
60#else
61#error Unsupported target architecture.
62#endif
63}
64
66 // Helper to paper over the different assembler function names.
67#if V8_TARGET_ARCH_IA32
68 and_(dst, src);
69#elif V8_TARGET_ARCH_X64
70 if (is_uint32(src.value())) {
71 andl(dst, src);
72 } else {
73 andq(dst, src);
74 }
75#else
76#error Unsupported target architecture.
77#endif
78}
79
81 Operand src2) {
82 if (CpuFeatures::IsSupported(AVX)) {
83 CpuFeatureScope scope(this, AVX);
84 vmovhps(dst, src1, src2);
85 } else {
86 if (dst != src1) {
87 movaps(dst, src1);
88 }
89 movhps(dst, src2);
90 }
91}
92
94 Operand src2) {
95 if (CpuFeatures::IsSupported(AVX)) {
96 CpuFeatureScope scope(this, AVX);
97 vmovlps(dst, src1, src2);
98 } else {
99 if (dst != src1) {
100 movaps(dst, src1);
101 }
102 movlps(dst, src2);
103 }
104}
107 if (CpuFeatures::IsSupported(AVX)) {
108 CpuFeatureScope scope(this, AVX);
109 vblendvpd(dst, src1, src2, mask);
110 } else {
111 CpuFeatureScope scope(this, SSE4_1);
112 DCHECK_EQ(mask, xmm0);
113 DCHECK_EQ(dst, src1);
114 blendvpd(dst, src2);
115 }
116}
117
120 if (CpuFeatures::IsSupported(AVX)) {
121 CpuFeatureScope scope(this, AVX);
122 vblendvps(dst, src1, src2, mask);
123 } else {
124 CpuFeatureScope scope(this, SSE4_1);
125 DCHECK_EQ(mask, xmm0);
126 DCHECK_EQ(dst, src1);
127 blendvps(dst, src2);
128 }
129}
130
133 if (CpuFeatures::IsSupported(AVX)) {
134 CpuFeatureScope scope(this, AVX);
135 vpblendvb(dst, src1, src2, mask);
136 } else {
137 CpuFeatureScope scope(this, SSE4_1);
138 DCHECK_EQ(mask, xmm0);
139 DCHECK_EQ(dst, src1);
140 pblendvb(dst, src2);
141 }
142}
143
145 XMMRegister src2, uint8_t imm8) {
146 if (CpuFeatures::IsSupported(AVX)) {
147 CpuFeatureScope avx_scope(this, AVX);
148 vshufps(dst, src1, src2, imm8);
149 } else {
150 if (dst != src1) {
151 movaps(dst, src1);
152 }
153 shufps(dst, src2, imm8);
154 }
155}
156
158 XMMRegister src, uint8_t lane) {
159 ASM_CODE_COMMENT(this);
160 if (lane == 0) {
161 if (dst != src) {
162 Movaps(dst, src);
163 }
164 } else {
165 DCHECK_EQ(1, lane);
166 if (CpuFeatures::IsSupported(AVX)) {
167 CpuFeatureScope avx_scope(this, AVX);
168 // Pass src as operand to avoid false-dependency on dst.
169 vmovhlps(dst, src, src);
170 } else {
171 movhlps(dst, src);
172 }
173 }
174}
175
177 XMMRegister src,
178 DoubleRegister rep,
179 uint8_t lane) {
180 ASM_CODE_COMMENT(this);
181 if (CpuFeatures::IsSupported(AVX)) {
182 CpuFeatureScope scope(this, AVX);
183 if (lane == 0) {
184 vmovsd(dst, src, rep);
185 } else {
186 vmovlhps(dst, src, rep);
187 }
188 } else {
189 CpuFeatureScope scope(this, SSE4_1);
190 if (dst != src) {
191 DCHECK_NE(dst, rep); // Ensure rep is not overwritten.
192 movaps(dst, src);
193 }
194 if (lane == 0) {
195 movsd(dst, rep);
196 } else {
197 movlhps(dst, rep);
198 }
199 }
200}
201
203 XMMRegister rhs, XMMRegister scratch) {
204 ASM_CODE_COMMENT(this);
205 // The minps instruction doesn't propagate NaNs and +0's in its first
206 // operand. Perform minps in both orders, merge the results, and adjust.
207 if (CpuFeatures::IsSupported(AVX)) {
208 CpuFeatureScope scope(this, AVX);
209 vminps(scratch, lhs, rhs);
210 vminps(dst, rhs, lhs);
211 } else if (dst == lhs || dst == rhs) {
212 XMMRegister src = dst == lhs ? rhs : lhs;
213 movaps(scratch, src);
214 minps(scratch, dst);
215 minps(dst, src);
216 } else {
217 movaps(scratch, lhs);
218 minps(scratch, rhs);
219 movaps(dst, rhs);
220 minps(dst, lhs);
221 }
222 // Propagate -0's and NaNs, which may be non-canonical.
223 Orps(scratch, dst);
224 // Canonicalize NaNs by quieting and clearing the payload.
225 Cmpunordps(dst, dst, scratch);
226 Orps(scratch, dst);
227 Psrld(dst, dst, uint8_t{10});
228 Andnps(dst, dst, scratch);
229}
230
232 XMMRegister rhs, XMMRegister scratch) {
233 ASM_CODE_COMMENT(this);
234 // The maxps instruction doesn't propagate NaNs and +0's in its first
235 // operand. Perform maxps in both orders, merge the results, and adjust.
236 if (CpuFeatures::IsSupported(AVX)) {
237 CpuFeatureScope scope(this, AVX);
238 vmaxps(scratch, lhs, rhs);
239 vmaxps(dst, rhs, lhs);
240 } else if (dst == lhs || dst == rhs) {
241 XMMRegister src = dst == lhs ? rhs : lhs;
242 movaps(scratch, src);
243 maxps(scratch, dst);
244 maxps(dst, src);
245 } else {
246 movaps(scratch, lhs);
247 maxps(scratch, rhs);
248 movaps(dst, rhs);
249 maxps(dst, lhs);
250 }
251 // Find discrepancies.
252 Xorps(dst, scratch);
253 // Propagate NaNs, which may be non-canonical.
254 Orps(scratch, dst);
255 // Propagate sign discrepancy and (subtle) quiet NaNs.
256 Subps(scratch, scratch, dst);
257 // Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
258 Cmpunordps(dst, dst, scratch);
259 Psrld(dst, dst, uint8_t{10});
260 Andnps(dst, dst, scratch);
261}
262
264 XMMRegister rhs, XMMRegister scratch) {
265 ASM_CODE_COMMENT(this);
266 if (CpuFeatures::IsSupported(AVX)) {
267 CpuFeatureScope scope(this, AVX);
268 // The minpd instruction doesn't propagate NaNs and +0's in its first
269 // operand. Perform minpd in both orders, merge the resuls, and adjust.
270 vminpd(scratch, lhs, rhs);
271 vminpd(dst, rhs, lhs);
272 // propagate -0's and NaNs, which may be non-canonical.
273 vorpd(scratch, scratch, dst);
274 // Canonicalize NaNs by quieting and clearing the payload.
275 vcmpunordpd(dst, dst, scratch);
276 vorpd(scratch, scratch, dst);
277 vpsrlq(dst, dst, uint8_t{13});
278 vandnpd(dst, dst, scratch);
279 } else {
280 // Compare lhs with rhs, and rhs with lhs, and have the results in scratch
281 // and dst. If dst overlaps with lhs or rhs, we can save a move.
282 if (dst == lhs || dst == rhs) {
283 XMMRegister src = dst == lhs ? rhs : lhs;
284 movaps(scratch, src);
285 minpd(scratch, dst);
286 minpd(dst, src);
287 } else {
288 movaps(scratch, lhs);
289 movaps(dst, rhs);
290 minpd(scratch, rhs);
291 minpd(dst, lhs);
292 }
293 orpd(scratch, dst);
294 cmpunordpd(dst, scratch);
295 orpd(scratch, dst);
296 psrlq(dst, uint8_t{13});
297 andnpd(dst, scratch);
298 }
299}
300
302 XMMRegister rhs, XMMRegister scratch) {
303 ASM_CODE_COMMENT(this);
304 if (CpuFeatures::IsSupported(AVX)) {
305 CpuFeatureScope scope(this, AVX);
306 // The maxpd instruction doesn't propagate NaNs and +0's in its first
307 // operand. Perform maxpd in both orders, merge the resuls, and adjust.
308 vmaxpd(scratch, lhs, rhs);
309 vmaxpd(dst, rhs, lhs);
310 // Find discrepancies.
311 vxorpd(dst, dst, scratch);
312 // Propagate NaNs, which may be non-canonical.
313 vorpd(scratch, scratch, dst);
314 // Propagate sign discrepancy and (subtle) quiet NaNs.
315 vsubpd(scratch, scratch, dst);
316 // Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
317 vcmpunordpd(dst, dst, scratch);
318 vpsrlq(dst, dst, uint8_t{13});
319 vandnpd(dst, dst, scratch);
320 } else {
321 if (dst == lhs || dst == rhs) {
322 XMMRegister src = dst == lhs ? rhs : lhs;
323 movaps(scratch, src);
324 maxpd(scratch, dst);
325 maxpd(dst, src);
326 } else {
327 movaps(scratch, lhs);
328 movaps(dst, rhs);
329 maxpd(scratch, rhs);
330 maxpd(dst, lhs);
331 }
332 xorpd(dst, scratch);
333 orpd(scratch, dst);
334 subpd(scratch, dst);
335 cmpunordpd(dst, scratch);
336 psrlq(dst, uint8_t{13});
337 andnpd(dst, scratch);
338 }
339}
340
342 ASM_CODE_COMMENT(this);
343 if (CpuFeatures::IsSupported(AVX2)) {
344 CpuFeatureScope avx2_scope(this, AVX2);
345 vbroadcastss(dst, src);
346 } else if (CpuFeatures::IsSupported(AVX)) {
347 CpuFeatureScope avx_scope(this, AVX);
348 vshufps(dst, src, src, 0);
349 } else {
350 if (dst == src) {
351 // 1 byte shorter than pshufd.
352 shufps(dst, src, 0);
353 } else {
354 pshufd(dst, src, 0);
355 }
356 }
357}
358
360 XMMRegister src, uint8_t lane) {
361 ASM_CODE_COMMENT(this);
362 DCHECK_LT(lane, 4);
363 // These instructions are shorter than insertps, but will leave junk in
364 // the top lanes of dst.
365 if (lane == 0) {
366 if (dst != src) {
367 Movaps(dst, src);
368 }
369 } else if (lane == 1) {
370 Movshdup(dst, src);
371 } else if (lane == 2 && dst == src) {
372 // Check dst == src to avoid false dependency on dst.
373 Movhlps(dst, src);
374 } else if (dst == src) {
375 Shufps(dst, src, src, lane);
376 } else {
377 Pshufd(dst, src, lane);
378 }
379}
380
382 uint8_t laneidx) {
383 ASM_CODE_COMMENT(this);
384 if (laneidx == 0) {
385 Movss(dst, src);
386 } else {
387 DCHECK_GE(3, laneidx);
388 Extractps(dst, src, laneidx);
389 }
390}
391
392template <typename Op>
394 XMMRegister scratch) {
395 ASM_CODE_COMMENT(this);
397 CpuFeatureScope ssse3_scope(this, SSSE3);
398 Movd(dst, src);
399 Xorps(scratch, scratch);
400 Pshufb(dst, scratch);
401}
402
404 XMMRegister scratch) {
405 ASM_CODE_COMMENT(this);
406 if (CpuFeatures::IsSupported(AVX2)) {
407 CpuFeatureScope avx2_scope(this, AVX2);
408 Movd(scratch, src);
409 vpbroadcastb(dst, scratch);
410 } else {
411 I8x16SplatPreAvx2(dst, src, scratch);
412 }
413}
414
416 XMMRegister scratch) {
417 ASM_CODE_COMMENT(this);
419 if (CpuFeatures::IsSupported(AVX2)) {
420 CpuFeatureScope avx2_scope(this, AVX2);
421 vpbroadcastb(dst, src);
422 } else {
423 I8x16SplatPreAvx2(dst, src, scratch);
424 }
425}
426
428 uint8_t src2, Register tmp1,
429 XMMRegister tmp2) {
430 ASM_CODE_COMMENT(this);
431 DCHECK_NE(dst, tmp2);
432 // Perform 16-bit shift, then mask away low bits.
433 if (!CpuFeatures::IsSupported(AVX) && (dst != src1)) {
434 movaps(dst, src1);
435 src1 = dst;
436 }
437
438 uint8_t shift = truncate_to_int3(src2);
439 Psllw(dst, src1, uint8_t{shift});
440
441 uint8_t bmask = static_cast<uint8_t>(0xff << shift);
442 uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask;
443 Move(tmp1, mask);
444 Movd(tmp2, tmp1);
445 Pshufd(tmp2, tmp2, uint8_t{0});
446 Pand(dst, tmp2);
447}
448
450 Register src2, Register tmp1,
451 XMMRegister tmp2, XMMRegister tmp3) {
452 ASM_CODE_COMMENT(this);
453 DCHECK(!AreAliased(dst, tmp2, tmp3));
454 DCHECK(!AreAliased(src1, tmp2, tmp3));
455
456 // Take shift value modulo 8.
457 Move(tmp1, src2);
458 And(tmp1, Immediate(7));
459 Add(tmp1, Immediate(8));
460 // Create a mask to unset high bits.
461 Movd(tmp3, tmp1);
462 Pcmpeqd(tmp2, tmp2);
463 Psrlw(tmp2, tmp2, tmp3);
464 Packuswb(tmp2, tmp2);
465 if (!CpuFeatures::IsSupported(AVX) && (dst != src1)) {
466 movaps(dst, src1);
467 src1 = dst;
468 }
469 // Mask off the unwanted bits before word-shifting.
470 Pand(dst, src1, tmp2);
471 Add(tmp1, Immediate(-8));
472 Movd(tmp3, tmp1);
473 Psllw(dst, dst, tmp3);
474}
475
477 uint8_t src2, XMMRegister tmp) {
478 ASM_CODE_COMMENT(this);
479 // Unpack bytes into words, do word (16-bit) shifts, and repack.
480 DCHECK_NE(dst, tmp);
481 uint8_t shift = truncate_to_int3(src2) + 8;
482
483 Punpckhbw(tmp, src1);
484 Punpcklbw(dst, src1);
485 Psraw(tmp, shift);
486 Psraw(dst, shift);
487 Packsswb(dst, tmp);
488}
489
491 Register src2, Register tmp1,
492 XMMRegister tmp2, XMMRegister tmp3) {
493 ASM_CODE_COMMENT(this);
494 DCHECK(!AreAliased(dst, tmp2, tmp3));
495 DCHECK_NE(src1, tmp2);
496
497 // Unpack the bytes into words, do arithmetic shifts, and repack.
498 Punpckhbw(tmp2, src1);
499 Punpcklbw(dst, src1);
500 // Prepare shift value
501 Move(tmp1, src2);
502 // Take shift value modulo 8.
503 And(tmp1, Immediate(7));
504 Add(tmp1, Immediate(8));
505 Movd(tmp3, tmp1);
506 Psraw(tmp2, tmp3);
507 Psraw(dst, tmp3);
508 Packsswb(dst, tmp2);
509}
510
512 uint8_t src2, Register tmp1,
513 XMMRegister tmp2) {
514 ASM_CODE_COMMENT(this);
515 DCHECK_NE(dst, tmp2);
516 if (!CpuFeatures::IsSupported(AVX) && (dst != src1)) {
517 movaps(dst, src1);
518 src1 = dst;
519 }
520
521 // Perform 16-bit shift, then mask away high bits.
522 uint8_t shift = truncate_to_int3(src2);
523 Psrlw(dst, src1, shift);
524
525 uint8_t bmask = 0xff >> shift;
526 uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask;
527 Move(tmp1, mask);
528 Movd(tmp2, tmp1);
529 Pshufd(tmp2, tmp2, uint8_t{0});
530 Pand(dst, tmp2);
531}
532
534 Register src2, Register tmp1,
535 XMMRegister tmp2, XMMRegister tmp3) {
536 ASM_CODE_COMMENT(this);
537 DCHECK(!AreAliased(dst, tmp2, tmp3));
538 DCHECK_NE(src1, tmp2);
539
540 // Unpack the bytes into words, do logical shifts, and repack.
541 Punpckhbw(tmp2, src1);
542 Punpcklbw(dst, src1);
543 // Prepare shift value.
544 Move(tmp1, src2);
545 // Take shift value modulo 8.
546 And(tmp1, Immediate(7));
547 Add(tmp1, Immediate(8));
548 Movd(tmp3, tmp1);
549 Psrlw(tmp2, tmp3);
550 Psrlw(dst, tmp3);
551 Packuswb(dst, tmp2);
552}
553
554template <typename Op>
557 Movd(dst, src);
558 Pshuflw(dst, dst, uint8_t{0x0});
559 Punpcklqdq(dst, dst);
560}
561
563 ASM_CODE_COMMENT(this);
564 if (CpuFeatures::IsSupported(AVX2)) {
565 CpuFeatureScope avx2_scope(this, AVX2);
566 Movd(dst, src);
567 vpbroadcastw(dst, dst);
568 } else {
569 I16x8SplatPreAvx2(dst, src);
570 }
571}
572
574 ASM_CODE_COMMENT(this);
576 if (CpuFeatures::IsSupported(AVX2)) {
577 CpuFeatureScope avx2_scope(this, AVX2);
578 vpbroadcastw(dst, src);
579 } else {
580 I16x8SplatPreAvx2(dst, src);
581 }
582}
583
585 XMMRegister src2,
586 XMMRegister scratch,
587 bool is_signed) {
588 ASM_CODE_COMMENT(this);
589 is_signed ? Pmovsxbw(scratch, src1) : Pmovzxbw(scratch, src1);
590 is_signed ? Pmovsxbw(dst, src2) : Pmovzxbw(dst, src2);
591 Pmullw(dst, scratch);
592}
593
595 XMMRegister src1,
596 XMMRegister src2,
597 XMMRegister scratch) {
598 ASM_CODE_COMMENT(this);
599 if (CpuFeatures::IsSupported(AVX)) {
600 CpuFeatureScope avx_scope(this, AVX);
601 vpunpckhbw(scratch, src1, src1);
602 vpsraw(scratch, scratch, 8);
603 vpunpckhbw(dst, src2, src2);
604 vpsraw(dst, dst, 8);
605 vpmullw(dst, dst, scratch);
606 } else {
607 if (dst != src1) {
608 movaps(dst, src1);
609 }
610 movaps(scratch, src2);
611 punpckhbw(dst, dst);
612 psraw(dst, 8);
613 punpckhbw(scratch, scratch);
614 psraw(scratch, 8);
615 pmullw(dst, scratch);
616 }
617}
618
620 XMMRegister src1,
621 XMMRegister src2,
622 XMMRegister scratch) {
623 ASM_CODE_COMMENT(this);
624 // The logic here is slightly complicated to handle all the cases of register
625 // aliasing. This allows flexibility for callers in TurboFan and Liftoff.
626 if (CpuFeatures::IsSupported(AVX)) {
627 CpuFeatureScope avx_scope(this, AVX);
628 if (src1 == src2) {
629 vpxor(scratch, scratch, scratch);
630 vpunpckhbw(dst, src1, scratch);
631 vpmullw(dst, dst, dst);
632 } else {
633 if (dst == src2) {
634 // We overwrite dst, then use src2, so swap src1 and src2.
635 std::swap(src1, src2);
636 }
637 vpxor(scratch, scratch, scratch);
638 vpunpckhbw(dst, src1, scratch);
639 vpunpckhbw(scratch, src2, scratch);
640 vpmullw(dst, dst, scratch);
641 }
642 } else {
643 if (src1 == src2) {
644 xorps(scratch, scratch);
645 if (dst != src1) {
646 movaps(dst, src1);
647 }
648 punpckhbw(dst, scratch);
649 pmullw(dst, scratch);
650 } else {
651 // When dst == src1, nothing special needs to be done.
652 // When dst == src2, swap src1 and src2, since we overwrite dst.
653 // When dst is unique, copy src1 to dst first.
654 if (dst == src2) {
655 std::swap(src1, src2);
656 // Now, dst == src1.
657 } else if (dst != src1) {
658 // dst != src1 && dst != src2.
659 movaps(dst, src1);
660 }
661 xorps(scratch, scratch);
662 punpckhbw(dst, scratch);
663 punpckhbw(scratch, src2);
664 psrlw(scratch, 8);
665 pmullw(dst, scratch);
666 }
667 }
668}
669
671 XMMRegister src) {
672 ASM_CODE_COMMENT(this);
673 if (CpuFeatures::IsSupported(AVX)) {
674 CpuFeatureScope avx_scope(this, AVX);
675 // src = |a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p| (high)
676 // dst = |i|i|j|j|k|k|l|l|m|m|n|n|o|o|p|p|
677 vpunpckhbw(dst, src, src);
678 vpsraw(dst, dst, 8);
679 } else {
680 CpuFeatureScope sse_scope(this, SSE4_1);
681 if (dst == src) {
682 // 2 bytes shorter than pshufd, but has depdency on dst.
683 movhlps(dst, src);
684 pmovsxbw(dst, dst);
685 } else {
686 // No dependency on dst.
687 pshufd(dst, src, 0xEE);
688 pmovsxbw(dst, dst);
689 }
690 }
691}
692
694 XMMRegister src,
695 XMMRegister scratch) {
696 ASM_CODE_COMMENT(this);
697 if (CpuFeatures::IsSupported(AVX)) {
698 CpuFeatureScope avx_scope(this, AVX);
699 // tmp = |0|0|0|0|0|0|0|0 | 0|0|0|0|0|0|0|0|
700 // src = |a|b|c|d|e|f|g|h | i|j|k|l|m|n|o|p|
701 // dst = |0|a|0|b|0|c|0|d | 0|e|0|f|0|g|0|h|
702 XMMRegister tmp = dst == src ? scratch : dst;
703 vpxor(tmp, tmp, tmp);
704 vpunpckhbw(dst, src, tmp);
705 } else {
706 CpuFeatureScope sse_scope(this, SSE4_1);
707 if (dst == src) {
708 // xorps can be executed on more ports than pshufd.
709 xorps(scratch, scratch);
710 punpckhbw(dst, scratch);
711 } else {
712 // No dependency on dst.
713 pshufd(dst, src, 0xEE);
714 pmovzxbw(dst, dst);
715 }
716 }
717}
718
720 XMMRegister src1,
721 XMMRegister src2,
722 XMMRegister scratch) {
723 ASM_CODE_COMMENT(this);
724 // k = i16x8.splat(0x8000)
725 Pcmpeqd(scratch, scratch);
726 Psllw(scratch, scratch, uint8_t{15});
727
728 if (!CpuFeatures::IsSupported(AVX) && (dst != src1)) {
729 movaps(dst, src1);
730 src1 = dst;
731 }
732
733 Pmulhrsw(dst, src1, src2);
734 Pcmpeqw(scratch, dst);
735 Pxor(dst, scratch);
736}
737
739 XMMRegister src1,
740 XMMRegister src2) {
741 ASM_CODE_COMMENT(this);
742 if (CpuFeatures::IsSupported(AVX)) {
743 CpuFeatureScope avx_scope(this, AVX);
744 vpmaddubsw(dst, src2, src1);
745 } else {
746 if (dst != src2) {
747 movdqa(dst, src2);
748 }
749 pmaddubsw(dst, src1);
750 }
751}
752
754 XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister src3,
755 XMMRegister scratch, XMMRegister splat_reg) {
756 ASM_CODE_COMMENT(this);
757#if V8_TARGET_ARCH_X64
758 if (CpuFeatures::IsSupported(AVX_VNNI_INT8)) {
759 CpuFeatureScope avx_vnni_int8_scope(this, AVX_VNNI_INT8);
760 if (dst == src3) {
761 vpdpbssd(dst, src2, src1);
762 } else {
763 DCHECK_NE(dst, src1);
764 DCHECK_NE(dst, src2);
765 Movdqa(dst, src3);
766 vpdpbssd(dst, src2, src1);
767 }
768 return;
769 } else if (CpuFeatures::IsSupported(AVX_VNNI)) {
770 CpuFeatureScope avx_scope(this, AVX_VNNI);
771 if (dst == src3) {
772 vpdpbusd(dst, src2, src1);
773 } else {
774 DCHECK_NE(dst, src1);
775 DCHECK_NE(dst, src2);
776 Movdqa(dst, src3);
777 vpdpbusd(dst, src2, src1);
778 }
779 return;
780 }
781#endif
782
783 // k = i16x8.splat(1)
784 Pcmpeqd(splat_reg, splat_reg);
785 Psrlw(splat_reg, splat_reg, uint8_t{15});
786
787 if (CpuFeatures::IsSupported(AVX)) {
788 CpuFeatureScope avx_scope(this, AVX);
789 vpmaddubsw(scratch, src2, src1);
790 } else {
791 movdqa(scratch, src2);
792 pmaddubsw(scratch, src1);
793 }
794 Pmaddwd(scratch, splat_reg);
795 if (dst == src3) {
796 Paddd(dst, scratch);
797 } else {
798 Movdqa(dst, src3);
799 Paddd(dst, scratch);
800 }
801}
802
804 XMMRegister src,
805 XMMRegister tmp) {
806 ASM_CODE_COMMENT(this);
807 if (CpuFeatures::IsSupported(AVX)) {
808 CpuFeatureScope avx_scope(this, AVX);
809 // src = |a|b|c|d|e|f|g|h| (low)
810 // scratch = |0|a|0|c|0|e|0|g|
811 vpsrld(tmp, src, 16);
812 // dst = |0|b|0|d|0|f|0|h|
813 vpblendw(dst, src, tmp, 0xAA);
814 // dst = |a+b|c+d|e+f|g+h|
815 vpaddd(dst, tmp, dst);
816 } else if (CpuFeatures::IsSupported(SSE4_1)) {
817 CpuFeatureScope sse_scope(this, SSE4_1);
818 // There is a potentially better lowering if we get rip-relative
819 // constants, see https://github.com/WebAssembly/simd/pull/380.
820 movaps(tmp, src);
821 psrld(tmp, 16);
822 if (dst != src) {
823 movaps(dst, src);
824 }
825 pblendw(dst, tmp, 0xAA);
826 paddd(dst, tmp);
827 } else {
828 // src = |a|b|c|d|e|f|g|h|
829 // tmp = i32x4.splat(0x0000FFFF)
830 pcmpeqd(tmp, tmp);
831 psrld(tmp, uint8_t{16});
832 // tmp =|0|b|0|d|0|f|0|h|
833 andps(tmp, src);
834 // dst = |0|a|0|c|0|e|0|g|
835 if (dst != src) {
836 movaps(dst, src);
837 }
838 psrld(dst, uint8_t{16});
839 // dst = |a+b|c+d|e+f|g+h|
840 paddd(dst, tmp);
841 }
842}
843
844// 1. Multiply low word into scratch.
845// 2. Multiply high word (can be signed or unsigned) into dst.
846// 3. Unpack and interleave scratch and dst into dst.
848 XMMRegister src2,
849 XMMRegister scratch, bool low,
850 bool is_signed) {
851 ASM_CODE_COMMENT(this);
852 if (CpuFeatures::IsSupported(AVX)) {
853 CpuFeatureScope avx_scope(this, AVX);
854 vpmullw(scratch, src1, src2);
855 is_signed ? vpmulhw(dst, src1, src2) : vpmulhuw(dst, src1, src2);
856 low ? vpunpcklwd(dst, scratch, dst) : vpunpckhwd(dst, scratch, dst);
857 } else {
858 DCHECK_EQ(dst, src1);
859 movaps(scratch, src1);
860 pmullw(dst, src2);
861 is_signed ? pmulhw(scratch, src2) : pmulhuw(scratch, src2);
862 low ? punpcklwd(dst, scratch) : punpckhwd(dst, scratch);
863 }
864}
865
867 XMMRegister src) {
868 ASM_CODE_COMMENT(this);
869 if (CpuFeatures::IsSupported(AVX)) {
870 CpuFeatureScope avx_scope(this, AVX);
871 // src = |a|b|c|d|e|f|g|h| (high)
872 // dst = |e|e|f|f|g|g|h|h|
873 vpunpckhwd(dst, src, src);
874 vpsrad(dst, dst, 16);
875 } else {
876 CpuFeatureScope sse_scope(this, SSE4_1);
877 if (dst == src) {
878 // 2 bytes shorter than pshufd, but has depdency on dst.
879 movhlps(dst, src);
880 pmovsxwd(dst, dst);
881 } else {
882 // No dependency on dst.
883 pshufd(dst, src, 0xEE);
884 pmovsxwd(dst, dst);
885 }
886 }
887}
888
890 XMMRegister src,
891 XMMRegister scratch) {
892 ASM_CODE_COMMENT(this);
893 if (CpuFeatures::IsSupported(AVX)) {
894 CpuFeatureScope avx_scope(this, AVX);
895 // scratch = |0|0|0|0|0|0|0|0|
896 // src = |a|b|c|d|e|f|g|h|
897 // dst = |0|a|0|b|0|c|0|d|
898 XMMRegister tmp = dst == src ? scratch : dst;
899 vpxor(tmp, tmp, tmp);
900 vpunpckhwd(dst, src, tmp);
901 } else {
902 if (dst == src) {
903 // xorps can be executed on more ports than pshufd.
904 xorps(scratch, scratch);
905 punpckhwd(dst, scratch);
906 } else {
907 CpuFeatureScope sse_scope(this, SSE4_1);
908 // No dependency on dst.
909 pshufd(dst, src, 0xEE);
910 pmovzxwd(dst, dst);
911 }
912 }
913}
914
916 XMMRegister scratch) {
917 ASM_CODE_COMMENT(this);
918 if (CpuFeatures::IsSupported(AVX)) {
919 CpuFeatureScope scope(this, AVX);
920 vpxor(scratch, scratch, scratch);
921 vpsubq(dst, scratch, src);
922 } else {
923 if (dst == src) {
924 movaps(scratch, src);
925 std::swap(src, scratch);
926 }
927 pxor(dst, dst);
928 psubq(dst, src);
929 }
930}
931
933 XMMRegister scratch) {
934 ASM_CODE_COMMENT(this);
935 if (CpuFeatures::IsSupported(AVX)) {
936 CpuFeatureScope avx_scope(this, AVX);
937 XMMRegister tmp = dst == src ? scratch : dst;
938 vpxor(tmp, tmp, tmp);
939 vpsubq(tmp, tmp, src);
940 vblendvpd(dst, src, tmp, src);
941 } else {
942 CpuFeatureScope sse_scope(this, SSE3);
943 movshdup(scratch, src);
944 if (dst != src) {
945 movaps(dst, src);
946 }
947 psrad(scratch, 31);
948 xorps(dst, scratch);
949 psubq(dst, scratch);
950 }
951}
952
954 XMMRegister src1, XMMRegister scratch) {
955 ASM_CODE_COMMENT(this);
956 if (CpuFeatures::IsSupported(AVX)) {
957 CpuFeatureScope avx_scope(this, AVX);
958 vpcmpgtq(dst, src0, src1);
959 } else if (CpuFeatures::IsSupported(SSE4_2)) {
960 CpuFeatureScope sse_scope(this, SSE4_2);
961 if (dst == src0) {
962 pcmpgtq(dst, src1);
963 } else if (dst == src1) {
964 movaps(scratch, src0);
965 pcmpgtq(scratch, src1);
966 movaps(dst, scratch);
967 } else {
968 movaps(dst, src0);
969 pcmpgtq(dst, src1);
970 }
971 } else {
972 CpuFeatureScope sse_scope(this, SSE3);
973 DCHECK_NE(dst, src0);
974 DCHECK_NE(dst, src1);
975 movaps(dst, src1);
976 movaps(scratch, src0);
977 psubq(dst, src0);
978 pcmpeqd(scratch, src1);
979 andps(dst, scratch);
980 movaps(scratch, src0);
981 pcmpgtd(scratch, src1);
982 orps(dst, scratch);
983 movshdup(dst, dst);
984 }
985}
986
988 XMMRegister src1, XMMRegister scratch) {
989 ASM_CODE_COMMENT(this);
990 if (CpuFeatures::IsSupported(AVX)) {
991 CpuFeatureScope avx_scope(this, AVX);
992 vpcmpgtq(dst, src1, src0);
993 vpcmpeqd(scratch, scratch, scratch);
994 vpxor(dst, dst, scratch);
995 } else if (CpuFeatures::IsSupported(SSE4_2)) {
996 CpuFeatureScope sse_scope(this, SSE4_2);
997 DCHECK_NE(dst, src0);
998 if (dst != src1) {
999 movaps(dst, src1);
1000 }
1001 pcmpgtq(dst, src0);
1002 pcmpeqd(scratch, scratch);
1003 xorps(dst, scratch);
1004 } else {
1005 CpuFeatureScope sse_scope(this, SSE3);
1006 DCHECK_NE(dst, src0);
1007 DCHECK_NE(dst, src1);
1008 movaps(dst, src0);
1009 movaps(scratch, src1);
1010 psubq(dst, src1);
1011 pcmpeqd(scratch, src0);
1012 andps(dst, scratch);
1013 movaps(scratch, src1);
1014 pcmpgtd(scratch, src0);
1015 orps(dst, scratch);
1016 movshdup(dst, dst);
1017 pcmpeqd(scratch, scratch);
1018 xorps(dst, scratch);
1019 }
1020}
1021
1023 uint8_t shift, XMMRegister xmm_tmp) {
1024 ASM_CODE_COMMENT(this);
1025 DCHECK_GT(64, shift);
1026 DCHECK_NE(xmm_tmp, dst);
1027 DCHECK_NE(xmm_tmp, src);
1028 // Use logical right shift to emulate arithmetic right shifts:
1029 // Given:
1030 // signed >> c
1031 // == (signed + 2^63 - 2^63) >> c
1032 // == ((signed + 2^63) >> c) - (2^63 >> c)
1033 // ^^^^^^^^^
1034 // xmm_tmp
1035 // signed + 2^63 is an unsigned number, so we can use logical right shifts.
1036
1037 // xmm_tmp = wasm_i64x2_const(0x80000000'00000000).
1038 Pcmpeqd(xmm_tmp, xmm_tmp);
1039 Psllq(xmm_tmp, uint8_t{63});
1040
1041 if (!CpuFeatures::IsSupported(AVX) && (dst != src)) {
1042 movaps(dst, src);
1043 src = dst;
1044 }
1045 // Add a bias of 2^63 to convert signed to unsigned.
1046 // Since only highest bit changes, use pxor instead of paddq.
1047 Pxor(dst, src, xmm_tmp);
1048 // Logically shift both value and bias.
1049 Psrlq(dst, shift);
1050 Psrlq(xmm_tmp, shift);
1051 // Subtract shifted bias to convert back to signed value.
1052 Psubq(dst, xmm_tmp);
1053}
1054
1056 Register shift, XMMRegister xmm_tmp,
1057 XMMRegister xmm_shift,
1058 Register tmp_shift) {
1059 ASM_CODE_COMMENT(this);
1060 DCHECK_NE(xmm_tmp, dst);
1061 DCHECK_NE(xmm_tmp, src);
1062 DCHECK_NE(xmm_shift, dst);
1063 DCHECK_NE(xmm_shift, src);
1064 // tmp_shift can alias shift since we don't use shift after masking it.
1065
1066 // See I64x2ShrS with constant shift for explanation of this algorithm.
1067 Pcmpeqd(xmm_tmp, xmm_tmp);
1068 Psllq(xmm_tmp, uint8_t{63});
1069
1070 // Shift modulo 64.
1071 Move(tmp_shift, shift);
1072 And(tmp_shift, Immediate(0x3F));
1073 Movd(xmm_shift, tmp_shift);
1074
1075 if (!CpuFeatures::IsSupported(AVX) && (dst != src)) {
1076 movaps(dst, src);
1077 src = dst;
1078 }
1079 Pxor(dst, src, xmm_tmp);
1080 Psrlq(dst, xmm_shift);
1081 Psrlq(xmm_tmp, xmm_shift);
1082 Psubq(dst, xmm_tmp);
1083}
1084
1086 XMMRegister rhs, XMMRegister tmp1,
1087 XMMRegister tmp2) {
1088 ASM_CODE_COMMENT(this);
1089 DCHECK(!AreAliased(dst, tmp1, tmp2));
1090 DCHECK(!AreAliased(lhs, tmp1, tmp2));
1091 DCHECK(!AreAliased(rhs, tmp1, tmp2));
1092
1093 if (CpuFeatures::IsSupported(AVX)) {
1094 CpuFeatureScope avx_scope(this, AVX);
1095 // 1. Multiply high dword of each qword of left with right.
1096 vpsrlq(tmp1, lhs, uint8_t{32});
1097 vpmuludq(tmp1, tmp1, rhs);
1098 // 2. Multiply high dword of each qword of right with left.
1099 vpsrlq(tmp2, rhs, uint8_t{32});
1100 vpmuludq(tmp2, tmp2, lhs);
1101 // 3. Add 1 and 2, then shift left by 32 (this is the high dword of result).
1102 vpaddq(tmp2, tmp2, tmp1);
1103 vpsllq(tmp2, tmp2, uint8_t{32});
1104 // 4. Multiply low dwords (this is the low dword of result).
1105 vpmuludq(dst, lhs, rhs);
1106 // 5. Add 3 and 4.
1107 vpaddq(dst, dst, tmp2);
1108 } else {
1109 // Same algorithm as AVX version, but with moves to not overwrite inputs.
1110 movaps(tmp1, lhs);
1111 movaps(tmp2, rhs);
1112 psrlq(tmp1, uint8_t{32});
1113 pmuludq(tmp1, rhs);
1114 psrlq(tmp2, uint8_t{32});
1115 pmuludq(tmp2, lhs);
1116 paddq(tmp2, tmp1);
1117 psllq(tmp2, uint8_t{32});
1118 if (dst == rhs) {
1119 // pmuludq is commutative
1120 pmuludq(dst, lhs);
1121 } else {
1122 if (dst != lhs) {
1123 movaps(dst, lhs);
1124 }
1125 pmuludq(dst, rhs);
1126 }
1127 paddq(dst, tmp2);
1128 }
1129}
1130
1131// 1. Unpack src0, src1 into even-number elements of scratch.
1132// 2. Unpack src1, src0 into even-number elements of dst.
1133// 3. Multiply 1. with 2.
1134// For non-AVX, use non-destructive pshufd instead of punpckldq/punpckhdq.
1136 XMMRegister src2,
1137 XMMRegister scratch, bool low,
1138 bool is_signed) {
1139 ASM_CODE_COMMENT(this);
1140 if (CpuFeatures::IsSupported(AVX)) {
1141 CpuFeatureScope avx_scope(this, AVX);
1142 if (low) {
1143 vpunpckldq(scratch, src1, src1);
1144 vpunpckldq(dst, src2, src2);
1145 } else {
1146 vpunpckhdq(scratch, src1, src1);
1147 vpunpckhdq(dst, src2, src2);
1148 }
1149 if (is_signed) {
1150 vpmuldq(dst, scratch, dst);
1151 } else {
1152 vpmuludq(dst, scratch, dst);
1153 }
1154 } else {
1155 uint8_t mask = low ? 0x50 : 0xFA;
1156 pshufd(scratch, src1, mask);
1157 pshufd(dst, src2, mask);
1158 if (is_signed) {
1159 CpuFeatureScope sse4_scope(this, SSE4_1);
1160 pmuldq(dst, scratch);
1161 } else {
1162 pmuludq(dst, scratch);
1163 }
1164 }
1165}
1166
1168 XMMRegister src) {
1169 ASM_CODE_COMMENT(this);
1170 if (CpuFeatures::IsSupported(AVX)) {
1171 CpuFeatureScope avx_scope(this, AVX);
1172 vpunpckhqdq(dst, src, src);
1173 vpmovsxdq(dst, dst);
1174 } else {
1175 CpuFeatureScope sse_scope(this, SSE4_1);
1176 if (dst == src) {
1177 movhlps(dst, src);
1178 } else {
1179 pshufd(dst, src, 0xEE);
1180 }
1181 pmovsxdq(dst, dst);
1182 }
1183}
1184
1186 XMMRegister src,
1187 XMMRegister scratch) {
1188 ASM_CODE_COMMENT(this);
1189 if (CpuFeatures::IsSupported(AVX)) {
1190 CpuFeatureScope avx_scope(this, AVX);
1191 vpxor(scratch, scratch, scratch);
1192 vpunpckhdq(dst, src, scratch);
1193 } else {
1194 if (dst == src) {
1195 // xorps can be executed on more ports than pshufd.
1196 xorps(scratch, scratch);
1197 punpckhdq(dst, scratch);
1198 } else {
1199 CpuFeatureScope sse_scope(this, SSE4_1);
1200 // No dependency on dst.
1201 pshufd(dst, src, 0xEE);
1202 pmovzxdq(dst, dst);
1203 }
1204 }
1205}
1206
1208 XMMRegister scratch) {
1209 ASM_CODE_COMMENT(this);
1210 if (dst == src) {
1211 Pcmpeqd(scratch, scratch);
1212 Pxor(dst, scratch);
1213 } else {
1214 Pcmpeqd(dst, dst);
1215 Pxor(dst, src);
1216 }
1217}
1218
1220 XMMRegister src1, XMMRegister src2,
1221 XMMRegister scratch) {
1222 ASM_CODE_COMMENT(this);
1223 // v128.select = v128.or(v128.and(v1, c), v128.andnot(v2, c)).
1224 // pandn(x, y) = !x & y, so we have to flip the mask and input.
1225 if (CpuFeatures::IsSupported(AVX)) {
1226 CpuFeatureScope avx_scope(this, AVX);
1227 vpandn(scratch, mask, src2);
1228 vpand(dst, src1, mask);
1229 vpor(dst, dst, scratch);
1230 } else {
1231 DCHECK_EQ(dst, mask);
1232 // Use float ops as they are 1 byte shorter than int ops.
1233 movaps(scratch, mask);
1234 andnps(scratch, src2);
1235 andps(dst, src1);
1236 orps(dst, scratch);
1237 }
1238}
1239
1241 XMMRegister scratch) {
1242 ASM_CODE_COMMENT(this);
1243 // The trap handler uses the current pc to creating a landing, so that it can
1244 // determine if a trap occured in Wasm code due to a OOB load. Make sure the
1245 // first instruction in each case below is the one that loads.
1246 if (CpuFeatures::IsSupported(AVX2)) {
1247 CpuFeatureScope avx2_scope(this, AVX2);
1248 vpbroadcastb(dst, src);
1249 } else if (CpuFeatures::IsSupported(AVX)) {
1250 CpuFeatureScope avx_scope(this, AVX);
1251 // Avoid dependency on previous value of dst.
1252 vpinsrb(dst, scratch, src, uint8_t{0});
1253 vpxor(scratch, scratch, scratch);
1254 vpshufb(dst, dst, scratch);
1255 } else {
1256 CpuFeatureScope ssse4_scope(this, SSE4_1);
1257 pinsrb(dst, src, uint8_t{0});
1258 xorps(scratch, scratch);
1259 pshufb(dst, scratch);
1260 }
1261}
1262
1264 XMMRegister scratch) {
1265 ASM_CODE_COMMENT(this);
1266 // The trap handler uses the current pc to creating a landing, so that it can
1267 // determine if a trap occured in Wasm code due to a OOB load. Make sure the
1268 // first instruction in each case below is the one that loads.
1269 if (CpuFeatures::IsSupported(AVX2)) {
1270 CpuFeatureScope avx2_scope(this, AVX2);
1271 vpbroadcastw(dst, src);
1272 } else if (CpuFeatures::IsSupported(AVX)) {
1273 CpuFeatureScope avx_scope(this, AVX);
1274 // Avoid dependency on previous value of dst.
1275 vpinsrw(dst, scratch, src, uint8_t{0});
1276 vpshuflw(dst, dst, uint8_t{0});
1277 vpunpcklqdq(dst, dst, dst);
1278 } else {
1279 pinsrw(dst, src, uint8_t{0});
1280 pshuflw(dst, dst, uint8_t{0});
1281 movlhps(dst, dst);
1282 }
1283}
1284
1286 ASM_CODE_COMMENT(this);
1287 // The trap handler uses the current pc to creating a landing, so that it can
1288 // determine if a trap occured in Wasm code due to a OOB load. Make sure the
1289 // first instruction in each case below is the one that loads.
1290 if (CpuFeatures::IsSupported(AVX)) {
1291 CpuFeatureScope avx_scope(this, AVX);
1292 vbroadcastss(dst, src);
1293 } else {
1294 movss(dst, src);
1295 shufps(dst, dst, uint8_t{0});
1296 }
1297}
1298
1300 uint8_t laneidx) {
1301 ASM_CODE_COMMENT(this);
1302 if (laneidx == 0) {
1303 Movlps(dst, src);
1304 } else {
1305 DCHECK_EQ(1, laneidx);
1306 Movhps(dst, src);
1307 }
1308}
1309
1315
1321
1327
1333
1334#undef QFMOP
1335
1336} // namespace internal
1337} // namespace v8
1338
1339#undef DCHECK_OPERAND_IS_NOT_REG
void vpblendvb(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister mask)
void vpsrad(XMMRegister dst, XMMRegister src, uint8_t imm8)
void and_(Register dst, Register src1, const Operand &src2, SBit s=LeaveCC, Condition cond=al)
void movlhps(XMMRegister dst, XMMRegister src)
void movss(XMMRegister dst, Operand src)
void vblendvpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister mask)
void vpshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle)
void psllq(XMMRegister reg, uint8_t shift)
void psraw(XMMRegister reg, uint8_t shift)
void vpsraw(XMMRegister dst, XMMRegister src, uint8_t imm8)
void vmovlps(XMMRegister dst, XMMRegister src1, Operand src2)
void vmovsd(Operand dst, XMMRegister src)
void pd(uint8_t op, XMMRegister dst, Operand src)
void psrlq(XMMRegister reg, uint8_t shift)
void vpsrld(XMMRegister dst, XMMRegister src, uint8_t imm8)
void vpsrlq(XMMRegister dst, XMMRegister src, uint8_t imm8)
void vbroadcastss(XMMRegister dst, XMMRegister src)
void vmovhps(XMMRegister dst, XMMRegister src1, Operand src2)
void add(Register dst, Register src1, const Operand &src2, SBit s=LeaveCC, Condition cond=al)
void vpinsrb(XMMRegister dst, XMMRegister src1, Register src2, uint8_t offset)
void vblendvps(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister mask)
void ps(uint8_t op, XMMRegister dst, Operand src)
void shufps(XMMRegister dst, XMMRegister src, uint8_t imm8)
void vpinsrw(XMMRegister dst, XMMRegister src1, Register src2, uint8_t offset)
void movdqa(XMMRegister dst, Operand src)
void pinsrw(XMMRegister dst, Register src, uint8_t offset)
void psrad(XMMRegister reg, uint8_t shift)
void pcmpgtq(XMMRegister dst, XMMRegister src)
void psrld(XMMRegister reg, uint8_t shift)
void pblendw(XMMRegister dst, XMMRegister src, uint8_t mask)
void pinsrb(XMMRegister dst, Register src, uint8_t offset)
void shift(Operand dst, Immediate shift_amount, int subcode, int size)
void vpsllq(XMMRegister dst, XMMRegister src, uint8_t imm8)
void movsd(XMMRegister dst, XMMRegister src)
void movaps(XMMRegister dst, XMMRegister src)
void vpcmpgtq(XMMRegister dst, XMMRegister src1, XMMRegister src2)
void mov(Register dst, const Operand &src, SBit s=LeaveCC, Condition cond=al)
void vpdpbssd(XMMRegister dst, XMMRegister src1, XMMRegister src2)
void movshdup(XMMRegister dst, XMMRegister src)
void vshufps(XMMRegister dst, XMMRegister src1, XMMRegister src2, uint8_t imm8)
void psrlw(XMMRegister reg, uint8_t shift)
void vpblendw(XMMRegister dst, XMMRegister src1, XMMRegister src2, uint8_t mask)
void movhps(XMMRegister dst, Operand src)
void movl(Operand dst, Label *src)
void movhlps(XMMRegister dst, XMMRegister src)
void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle)
void vmovhlps(XMMRegister dst, XMMRegister src1, XMMRegister src2)
void vmovlhps(XMMRegister dst, XMMRegister src1, XMMRegister src2)
void movlps(XMMRegister dst, Operand src)
void movq(XMMRegister dst, Operand src)
void pshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle)
void vpdpbusd(XMMRegister dst, XMMRegister src1, XMMRegister src2)
static bool IsSupported(CpuFeature f)
void S128Load16Splat(XMMRegister dst, Operand src, XMMRegister scratch)
void F32x4Qfms(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister src3, XMMRegister tmp)
void F32x4Splat(XMMRegister dst, DoubleRegister src)
void I16x8UConvertI8x16High(XMMRegister dst, XMMRegister src, XMMRegister scratch)
void F32x4Max(XMMRegister dst, XMMRegister lhs, XMMRegister rhs, XMMRegister scratch)
void I32x4UConvertI16x8High(XMMRegister dst, XMMRegister src, XMMRegister scratch)
void I16x8ExtMulLow(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister scrat, bool is_signed)
void I64x2GeS(XMMRegister dst, XMMRegister src0, XMMRegister src1, XMMRegister scratch)
void I64x2GtS(XMMRegister dst, XMMRegister src0, XMMRegister src1, XMMRegister scratch)
void I64x2Abs(XMMRegister dst, XMMRegister src, XMMRegister scratch)
void F64x2Min(XMMRegister dst, XMMRegister lhs, XMMRegister rhs, XMMRegister scratch)
void F64x2Qfms(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister src3, XMMRegister tmp)
void Pblendvb(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister mask)
void S128Not(XMMRegister dst, XMMRegister src, XMMRegister scratch)
void I16x8SConvertI8x16High(XMMRegister dst, XMMRegister src)
void I16x8ExtMulHighS(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister scratch)
void S128Store64Lane(Operand dst, XMMRegister src, uint8_t laneidx)
void I32x4DotI8x16I7x16AddS(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister src3, XMMRegister scratch, XMMRegister splat_reg)
void Blendvpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister mask)
void Blendvps(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister mask)
void I8x16SplatPreAvx2(XMMRegister dst, Op src, XMMRegister scratch)
void S128Load8Splat(XMMRegister dst, Operand src, XMMRegister scratch)
void I8x16ShrU(XMMRegister dst, XMMRegister src1, uint8_t src2, Register tmp1, XMMRegister tmp2)
void F32x4ExtractLane(FloatRegister dst, XMMRegister src, uint8_t lane)
void Movhps(XMMRegister dst, XMMRegister src1, Operand src2)
void F32x4Min(XMMRegister dst, XMMRegister lhs, XMMRegister rhs, XMMRegister scratch)
void F64x2ReplaceLane(XMMRegister dst, XMMRegister src, DoubleRegister rep, uint8_t lane)
void I32x4SConvertI16x8High(XMMRegister dst, XMMRegister src)
void I64x2ShrS(XMMRegister dst, XMMRegister src, uint8_t shift, XMMRegister xmm_tmp)
void Movlps(XMMRegister dst, XMMRegister src1, Operand src2)
void I8x16Splat(XMMRegister dst, Register src, XMMRegister scratch)
void I64x2ExtMul(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister scratch, bool low, bool is_signed)
void Pshufb(XMMRegister dst, XMMRegister src, Op mask)
void I64x2SConvertI32x4High(XMMRegister dst, XMMRegister src)
void I32x4ExtMul(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister scratch, bool low, bool is_signed)
void F64x2Qfma(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister src3, XMMRegister tmp)
void I8x16Shl(XMMRegister dst, XMMRegister src1, uint8_t src2, Register tmp1, XMMRegister tmp2)
void I16x8Q15MulRSatS(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister scratch)
void I16x8DotI8x16I7x16S(XMMRegister dst, XMMRegister src1, XMMRegister src2)
void S128Select(XMMRegister dst, XMMRegister mask, XMMRegister src1, XMMRegister src2, XMMRegister scratch)
void Shufps(XMMRegister dst, XMMRegister src1, XMMRegister src2, uint8_t imm8)
void I8x16ShrS(XMMRegister dst, XMMRegister src1, uint8_t src2, XMMRegister tmp)
void I64x2Neg(XMMRegister dst, XMMRegister src, XMMRegister scratch)
void I64x2UConvertI32x4High(XMMRegister dst, XMMRegister src, XMMRegister scratch)
void F64x2Max(XMMRegister dst, XMMRegister lhs, XMMRegister rhs, XMMRegister scratch)
void F32x4Qfma(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister src3, XMMRegister tmp)
void I16x8ExtMulHighU(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister scratch)
void I32x4ExtAddPairwiseI16x8U(XMMRegister dst, XMMRegister src, XMMRegister tmp)
void S128Store32Lane(Operand dst, XMMRegister src, uint8_t laneidx)
void F64x2ExtractLane(DoubleRegister dst, XMMRegister src, uint8_t lane)
void I64x2Mul(XMMRegister dst, XMMRegister lhs, XMMRegister rhs, XMMRegister tmp1, XMMRegister tmp2)
#define ASM_CODE_COMMENT(asm)
Definition assembler.h:617
uint32_t const mask
#define DCHECK_OPERAND_IS_NOT_REG(op)
#define QFMA(ps_or_pd)
#define QFMS(ps_or_pd)
V8_EXPORT_PRIVATE bool AreAliased(const CPURegister &reg1, const CPURegister &reg2, const CPURegister &reg3=NoReg, const CPURegister &reg4=NoReg, const CPURegister &reg5=NoReg, const CPURegister &reg6=NoReg, const CPURegister &reg7=NoReg, const CPURegister &reg8=NoReg)
bool is_signed(Condition cond)
#define DCHECK_NE(v1, v2)
Definition logging.h:486
#define DCHECK_GE(v1, v2)
Definition logging.h:488
#define DCHECK(condition)
Definition logging.h:482
#define DCHECK_LT(v1, v2)
Definition logging.h:489
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
#define DCHECK_GT(v1, v2)
Definition logging.h:487