v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
macro-assembler-shared-ia32-x64.h
Go to the documentation of this file.
1// Copyright 2021 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_CODEGEN_SHARED_IA32_X64_MACRO_ASSEMBLER_SHARED_IA32_X64_H_
6#define V8_CODEGEN_SHARED_IA32_X64_MACRO_ASSEMBLER_SHARED_IA32_X64_H_
7
8#include <optional>
9
10#include "src/base/macros.h"
14
15#if V8_TARGET_ARCH_IA32
17#elif V8_TARGET_ARCH_X64
19#else
20#error Unsupported target architecture.
21#endif
22
23// Helper macro to define qfma macro-assembler. This takes care of every
24// possible case of register aliasing to minimize the number of instructions.
25#define QFMA(ps_or_pd) \
26 if (CpuFeatures::IsSupported(FMA3)) { \
27 CpuFeatureScope fma3_scope(this, FMA3); \
28 if (dst == src1) { \
29 vfmadd213##ps_or_pd(dst, src2, src3); \
30 } else if (dst == src2) { \
31 vfmadd213##ps_or_pd(dst, src1, src3); \
32 } else if (dst == src3) { \
33 vfmadd231##ps_or_pd(dst, src2, src1); \
34 } else { \
35 CpuFeatureScope avx_scope(this, AVX); \
36 vmovups(dst, src1); \
37 vfmadd213##ps_or_pd(dst, src2, src3); \
38 } \
39 } else if (CpuFeatures::IsSupported(AVX)) { \
40 CpuFeatureScope avx_scope(this, AVX); \
41 vmul##ps_or_pd(tmp, src1, src2); \
42 vadd##ps_or_pd(dst, tmp, src3); \
43 } else { \
44 if (dst == src1) { \
45 mul##ps_or_pd(dst, src2); \
46 add##ps_or_pd(dst, src3); \
47 } else if (dst == src2) { \
48 DCHECK_NE(src2, src1); \
49 mul##ps_or_pd(dst, src1); \
50 add##ps_or_pd(dst, src3); \
51 } else if (dst == src3) { \
52 DCHECK_NE(src3, src1); \
53 movaps(tmp, src1); \
54 mul##ps_or_pd(tmp, src2); \
55 add##ps_or_pd(dst, tmp); \
56 } else { \
57 movaps(dst, src1); \
58 mul##ps_or_pd(dst, src2); \
59 add##ps_or_pd(dst, src3); \
60 } \
61 }
62
63// Helper macro to define qfms macro-assembler. This takes care of every
64// possible case of register aliasing to minimize the number of instructions.
65#define QFMS(ps_or_pd) \
66 if (CpuFeatures::IsSupported(FMA3)) { \
67 CpuFeatureScope fma3_scope(this, FMA3); \
68 if (dst == src1) { \
69 vfnmadd213##ps_or_pd(dst, src2, src3); \
70 } else if (dst == src2) { \
71 vfnmadd213##ps_or_pd(dst, src1, src3); \
72 } else if (dst == src3) { \
73 vfnmadd231##ps_or_pd(dst, src2, src1); \
74 } else { \
75 CpuFeatureScope avx_scope(this, AVX); \
76 vmovups(dst, src1); \
77 vfnmadd213##ps_or_pd(dst, src2, src3); \
78 } \
79 } else if (CpuFeatures::IsSupported(AVX)) { \
80 CpuFeatureScope avx_scope(this, AVX); \
81 vmul##ps_or_pd(tmp, src1, src2); \
82 vsub##ps_or_pd(dst, src3, tmp); \
83 } else { \
84 movaps(tmp, src1); \
85 mul##ps_or_pd(tmp, src2); \
86 if (dst != src3) { \
87 movaps(dst, src3); \
88 } \
89 sub##ps_or_pd(dst, tmp); \
90 }
91
92namespace v8 {
93namespace internal {
94class Assembler;
95
96// For WebAssembly we care about the full floating point register. If we are not
97// running Wasm, we can get away with saving half of those registers.
98#if V8_ENABLE_WEBASSEMBLY
99constexpr int kStackSavedSavedFPSize = 2 * kDoubleSize;
100#else
102#endif // V8_ENABLE_WEBASSEMBLY
103
104// Base class for SharedMacroAssembler. This class contains macro-assembler
105// functions that can be shared across ia32 and x64 without any template
106// machinery, i.e. does not require the CRTP pattern that
107// SharedMacroAssembler exposes. This allows us to keep the bulk of
108// definition inside a separate source file, rather than putting everything
109// inside this header.
111 public:
112 using MacroAssemblerBase::MacroAssemblerBase;
113
114 void Move(Register dst, uint32_t src);
115 // Move if registers are not identical.
116 void Move(Register dst, Register src);
117 void Add(Register dst, Immediate src);
118 void And(Register dst, Immediate src);
119
120 // Will move src1 to dst if AVX is not supported.
121 void Movhps(XMMRegister dst, XMMRegister src1, Operand src2);
122 void Movlps(XMMRegister dst, XMMRegister src1, Operand src2);
123
124 void Blendvps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
126 void Blendvpd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
128 void Pblendvb(XMMRegister dst, XMMRegister src1, XMMRegister src2,
130
131 template <typename Op>
132 void Pinsrb(XMMRegister dst, XMMRegister src1, Op src2, uint8_t imm8,
133 uint32_t* load_pc_offset = nullptr) {
134 PinsrHelper(this, &Assembler::vpinsrb, &Assembler::pinsrb, dst, src1, src2,
135 imm8, load_pc_offset, {SSE4_1});
136 }
137
138 template <typename Op>
139 void Pinsrw(XMMRegister dst, XMMRegister src1, Op src2, uint8_t imm8,
140 uint32_t* load_pc_offset = nullptr) {
141 PinsrHelper(this, &Assembler::vpinsrw, &Assembler::pinsrw, dst, src1, src2,
142 imm8, load_pc_offset);
143 }
144
145 // Supports both SSE and AVX. Move src1 to dst if they are not equal on SSE.
146 template <typename Op>
147 void Pshufb(XMMRegister dst, XMMRegister src, Op mask) {
148 if (CpuFeatures::IsSupported(AVX)) {
149 CpuFeatureScope avx_scope(this, AVX);
150 vpshufb(dst, src, mask);
151 } else {
152 // Make sure these are different so that we won't overwrite mask.
153 DCHECK_NE(mask, dst);
154 if (dst != src) {
155 movaps(dst, src);
156 }
157 CpuFeatureScope sse_scope(this, SSSE3);
158 pshufb(dst, mask);
159 }
160 }
161
162 template <typename Op>
163 void Pshufb(XMMRegister dst, Op mask) {
164 Pshufb(dst, dst, mask);
165 }
166
167 // Shufps that will mov src1 into dst if AVX is not supported.
168 void Shufps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
169 uint8_t imm8);
170
171 // Helper struct to implement functions that check for AVX support and
172 // dispatch to the appropriate AVX/SSE instruction.
173 template <typename Dst, typename Arg, typename... Args>
174 struct AvxHelper {
176 std::optional<CpuFeature> feature = std::nullopt;
177 // Call a method where the AVX version expects the dst argument to be
178 // duplicated.
179 // E.g. Andps(x, y) -> vandps(x, x, y)
180 // -> andps(x, y)
181 template <void (Assembler::*avx)(Dst, Dst, Arg, Args...),
182 void (Assembler::*no_avx)(Dst, Arg, Args...)>
183 void emit(Dst dst, Arg arg, Args... args) {
184 if (CpuFeatures::IsSupported(AVX)) {
185 CpuFeatureScope scope(assm, AVX);
186 (assm->*avx)(dst, dst, arg, args...);
187 } else if (feature.has_value()) {
188 DCHECK(CpuFeatures::IsSupported(*feature));
189 CpuFeatureScope scope(assm, *feature);
190 (assm->*no_avx)(dst, arg, args...);
191 } else {
192 (assm->*no_avx)(dst, arg, args...);
193 }
194 }
195
196 // Call a method in the AVX form (one more operand), but if unsupported will
197 // check that dst == first src.
198 // E.g. Andps(x, y, z) -> vandps(x, y, z)
199 // -> andps(x, z) and check that x == y
200 template <void (Assembler::*avx)(Dst, Arg, Args...),
201 void (Assembler::*no_avx)(Dst, Args...)>
202 void emit(Dst dst, Arg arg, Args... args) {
203 if (CpuFeatures::IsSupported(AVX)) {
204 CpuFeatureScope scope(assm, AVX);
205 (assm->*avx)(dst, arg, args...);
206 } else if (feature.has_value()) {
207 DCHECK_EQ(dst, arg);
208 DCHECK(CpuFeatures::IsSupported(*feature));
209 CpuFeatureScope scope(assm, *feature);
210 (assm->*no_avx)(dst, args...);
211 } else {
212 DCHECK_EQ(dst, arg);
213 (assm->*no_avx)(dst, args...);
214 }
215 }
216
217 // Call a method where the AVX version expects no duplicated dst argument.
218 // E.g. Movddup(x, y) -> vmovddup(x, y)
219 // -> movddup(x, y)
220 template <void (Assembler::*avx)(Dst, Arg, Args...),
221 void (Assembler::*no_avx)(Dst, Arg, Args...)>
222 void emit(Dst dst, Arg arg, Args... args) {
223 if (CpuFeatures::IsSupported(AVX)) {
224 CpuFeatureScope scope(assm, AVX);
225 (assm->*avx)(dst, arg, args...);
226 } else if (feature.has_value()) {
227 DCHECK(CpuFeatures::IsSupported(*feature));
228 CpuFeatureScope scope(assm, *feature);
229 (assm->*no_avx)(dst, arg, args...);
230 } else {
231 (assm->*no_avx)(dst, arg, args...);
232 }
233 }
234 };
235
236#define AVX_OP(macro_name, name) \
237 template <typename Dst, typename Arg, typename... Args> \
238 void macro_name(Dst dst, Arg arg, Args... args) { \
239 AvxHelper<Dst, Arg, Args...>{this} \
240 .template emit<&Assembler::v##name, &Assembler::name>(dst, arg, \
241 args...); \
242 }
243
244// Define a macro which uses |avx_name| when AVX is supported, and |sse_name|
245// when AVX is not supported. This is useful for bit-wise instructions like
246// andpd/andps, where the behavior is exactly the same, but the *ps
247// version is 1 byte shorter, and on SSE-only processors there is no
248// performance difference since those processors don't differentiate integer
249// and floating-point domains.
250// Note: we require |avx_name| to be the AVX instruction without the "v"
251// prefix. If we require the full AVX instruction name and the caller
252// accidentally passes in a SSE instruction, we compile without any issues and
253// generate the SSE instruction. By appending "v" here, we ensure that we will
254// generate an AVX instruction.
255#define AVX_OP_WITH_DIFF_SSE_INSTR(macro_name, avx_name, sse_name) \
256 template <typename Dst, typename Arg, typename... Args> \
257 void macro_name(Dst dst, Arg arg, Args... args) { \
258 AvxHelper<Dst, Arg, Args...>{this} \
259 .template emit<&Assembler::v##avx_name, &Assembler::sse_name>( \
260 dst, arg, args...); \
261 }
262
263#define AVX_OP_SSE3(macro_name, name) \
264 template <typename Dst, typename Arg, typename... Args> \
265 void macro_name(Dst dst, Arg arg, Args... args) { \
266 AvxHelper<Dst, Arg, Args...>{this, std::optional<CpuFeature>(SSE3)} \
267 .template emit<&Assembler::v##name, &Assembler::name>(dst, arg, \
268 args...); \
269 }
270
271#define AVX_OP_SSSE3(macro_name, name) \
272 template <typename Dst, typename Arg, typename... Args> \
273 void macro_name(Dst dst, Arg arg, Args... args) { \
274 AvxHelper<Dst, Arg, Args...>{this, std::optional<CpuFeature>(SSSE3)} \
275 .template emit<&Assembler::v##name, &Assembler::name>(dst, arg, \
276 args...); \
277 }
278
279#define AVX_OP_SSE4_1(macro_name, name) \
280 template <typename Dst, typename Arg, typename... Args> \
281 void macro_name(Dst dst, Arg arg, Args... args) { \
282 AvxHelper<Dst, Arg, Args...>{this, std::optional<CpuFeature>(SSE4_1)} \
283 .template emit<&Assembler::v##name, &Assembler::name>(dst, arg, \
284 args...); \
285 }
286
287#define AVX_OP_SSE4_2(macro_name, name) \
288 template <typename Dst, typename Arg, typename... Args> \
289 void macro_name(Dst dst, Arg arg, Args... args) { \
290 AvxHelper<Dst, Arg, Args...>{this, std::optional<CpuFeature>(SSE4_2)} \
291 .template emit<&Assembler::v##name, &Assembler::name>(dst, arg, \
292 args...); \
293 }
294
295 // Keep this list sorted by required extension, then instruction name.
296 AVX_OP(Addpd, addpd)
297 AVX_OP(Addps, addps)
298 AVX_OP(Addsd, addsd)
299 AVX_OP(Addss, addss)
300 AVX_OP(Andnpd, andnpd)
301 AVX_OP(Andnps, andnps)
302 AVX_OP(Andpd, andpd)
303 AVX_OP(Andps, andps)
304 AVX_OP(Cmpeqpd, cmpeqpd)
305 AVX_OP(Cmpeqps, cmpeqps)
306 AVX_OP(Cmplepd, cmplepd)
307 AVX_OP(Cmpleps, cmpleps)
308 AVX_OP(Cmpltpd, cmpltpd)
309 AVX_OP(Cmpltps, cmpltps)
310 AVX_OP(Cmpneqpd, cmpneqpd)
311 AVX_OP(Cmpneqps, cmpneqps)
312 AVX_OP(Cmpunordpd, cmpunordpd)
313 AVX_OP(Cmpunordps, cmpunordps)
314 AVX_OP(Cvtdq2pd, cvtdq2pd)
315 AVX_OP(Cvtdq2ps, cvtdq2ps)
316 AVX_OP(Cvtpd2ps, cvtpd2ps)
317 AVX_OP(Cvtps2pd, cvtps2pd)
318 AVX_OP(Cvtsd2ss, cvtsd2ss)
319 AVX_OP(Cvtss2sd, cvtss2sd)
320 AVX_OP(Cvttpd2dq, cvttpd2dq)
321 AVX_OP(Cvttps2dq, cvttps2dq)
322 AVX_OP(Cvttsd2si, cvttsd2si)
323 AVX_OP(Cvttss2si, cvttss2si)
324 AVX_OP(Divpd, divpd)
325 AVX_OP(Divps, divps)
326 AVX_OP(Divsd, divsd)
327 AVX_OP(Divss, divss)
328 AVX_OP(Maxpd, maxpd)
329 AVX_OP(Maxps, maxps)
330 AVX_OP(Minpd, minpd)
331 AVX_OP(Minps, minps)
332 AVX_OP(Movaps, movaps)
333 AVX_OP(Movd, movd)
334 AVX_OP(Movhlps, movhlps)
335 AVX_OP(Movhps, movhps)
336 AVX_OP(Movlps, movlps)
337 AVX_OP(Movmskpd, movmskpd)
338 AVX_OP(Movmskps, movmskps)
339 AVX_OP(Movsd, movsd)
340 AVX_OP(Movss, movss)
341 AVX_OP(Movupd, movupd)
342 AVX_OP(Movups, movups)
343 AVX_OP(Mulpd, mulpd)
344 AVX_OP(Mulps, mulps)
345 AVX_OP(Mulsd, mulsd)
346 AVX_OP(Mulss, mulss)
347 AVX_OP(Orpd, orpd)
348 AVX_OP(Orps, orps)
349 AVX_OP(Packssdw, packssdw)
350 AVX_OP(Packsswb, packsswb)
351 AVX_OP(Packuswb, packuswb)
352 AVX_OP(Paddb, paddb)
353 AVX_OP(Paddd, paddd)
354 AVX_OP(Paddq, paddq)
355 AVX_OP(Paddsb, paddsb)
356 AVX_OP(Paddsw, paddsw)
357 AVX_OP(Paddusb, paddusb)
358 AVX_OP(Paddusw, paddusw)
359 AVX_OP(Paddw, paddw)
360 AVX_OP(Pavgb, pavgb)
361 AVX_OP(Pavgw, pavgw)
362 AVX_OP(Pcmpgtb, pcmpgtb)
363 AVX_OP(Pcmpgtd, pcmpgtd)
364 AVX_OP(Pcmpgtw, pcmpgtw)
365 AVX_OP(Pcmpeqb, pcmpeqb)
366 AVX_OP(Pcmpeqd, pcmpeqd)
367 AVX_OP(Pcmpeqw, pcmpeqw)
368 AVX_OP(Pmaddwd, pmaddwd)
369 AVX_OP(Pmaxsw, pmaxsw)
370 AVX_OP(Pmaxub, pmaxub)
371 AVX_OP(Pminsw, pminsw)
372 AVX_OP(Pminub, pminub)
373 AVX_OP(Pmovmskb, pmovmskb)
374 AVX_OP(Pmullw, pmullw)
375 AVX_OP(Pmuludq, pmuludq)
376 AVX_OP(Pshufd, pshufd)
377 AVX_OP(Pshufhw, pshufhw)
378 AVX_OP(Pshuflw, pshuflw)
379 AVX_OP(Pslld, pslld)
380 AVX_OP(Psllq, psllq)
381 AVX_OP(Psllw, psllw)
382 AVX_OP(Psrad, psrad)
383 AVX_OP(Psraw, psraw)
384 AVX_OP(Psrld, psrld)
385 AVX_OP(Psrlq, psrlq)
386 AVX_OP(Psrlw, psrlw)
387 AVX_OP(Psubb, psubb)
388 AVX_OP(Psubd, psubd)
389 AVX_OP(Psubq, psubq)
390 AVX_OP(Psubsb, psubsb)
391 AVX_OP(Psubsw, psubsw)
392 AVX_OP(Psubusb, psubusb)
393 AVX_OP(Psubusw, psubusw)
394 AVX_OP(Psubw, psubw)
395 AVX_OP(Punpckhbw, punpckhbw)
396 AVX_OP(Punpckhdq, punpckhdq)
397 AVX_OP(Punpckhqdq, punpckhqdq)
398 AVX_OP(Punpckhwd, punpckhwd)
399 AVX_OP(Punpcklbw, punpcklbw)
400 AVX_OP(Punpckldq, punpckldq)
401 AVX_OP(Punpcklqdq, punpcklqdq)
402 AVX_OP(Punpcklwd, punpcklwd)
403 AVX_OP(Rcpps, rcpps)
404 AVX_OP(Rsqrtps, rsqrtps)
405 AVX_OP(Sqrtpd, sqrtpd)
406 AVX_OP(Sqrtps, sqrtps)
407 AVX_OP(Sqrtsd, sqrtsd)
408 AVX_OP(Sqrtss, sqrtss)
409 AVX_OP(Subpd, subpd)
410 AVX_OP(Subps, subps)
411 AVX_OP(Subsd, subsd)
412 AVX_OP(Subss, subss)
413 AVX_OP(Ucomisd, ucomisd)
414 AVX_OP(Ucomiss, ucomiss)
415 AVX_OP(Unpcklps, unpcklps)
416 AVX_OP(Xorpd, xorpd)
417 AVX_OP(Xorps, xorps)
418
419 // Many AVX processors have separate integer/floating-point domains, so use
420 // vmovaps if AVX is supported. On SSE, movaps is 1 byte shorter than movdqa,
421 // and has the same behavior. Most SSE processors also don't have the same
422 // delay moving between integer and floating-point domains.
423 AVX_OP_WITH_DIFF_SSE_INSTR(Movapd, movapd, movaps)
424 AVX_OP_WITH_DIFF_SSE_INSTR(Movdqa, movdqa, movaps)
425 AVX_OP_WITH_DIFF_SSE_INSTR(Movdqu, movdqu, movups)
426 AVX_OP_WITH_DIFF_SSE_INSTR(Pand, pand, andps)
427 AVX_OP_WITH_DIFF_SSE_INSTR(Por, por, orps)
428 AVX_OP_WITH_DIFF_SSE_INSTR(Pxor, pxor, xorps)
429
430 AVX_OP_SSE3(Haddps, haddps)
431 AVX_OP_SSE3(Movddup, movddup)
432 AVX_OP_SSE3(Movshdup, movshdup)
433
434 AVX_OP_SSSE3(Pabsb, pabsb)
435 AVX_OP_SSSE3(Pabsd, pabsd)
436 AVX_OP_SSSE3(Pabsw, pabsw)
437 AVX_OP_SSSE3(Palignr, palignr)
438 AVX_OP_SSSE3(Pmulhrsw, pmulhrsw)
439 AVX_OP_SSSE3(Psignb, psignb)
440 AVX_OP_SSSE3(Psignd, psignd)
441 AVX_OP_SSSE3(Psignw, psignw)
442
443 AVX_OP_SSE4_1(Extractps, extractps)
444 AVX_OP_SSE4_1(Insertps, insertps)
445 AVX_OP_SSE4_1(Packusdw, packusdw)
446 AVX_OP_SSE4_1(Pblendw, pblendw)
447 AVX_OP_SSE4_1(Pcmpeqq, pcmpeqq)
448 AVX_OP_SSE4_1(Pextrb, pextrb)
449 AVX_OP_SSE4_1(Pextrw, pextrw)
450 AVX_OP_SSE4_1(Pmaxsb, pmaxsb)
451 AVX_OP_SSE4_1(Pmaxsd, pmaxsd)
452 AVX_OP_SSE4_1(Pmaxud, pmaxud)
453 AVX_OP_SSE4_1(Pmaxuw, pmaxuw)
454 AVX_OP_SSE4_1(Pminsb, pminsb)
455 AVX_OP_SSE4_1(Pminsd, pminsd)
456 AVX_OP_SSE4_1(Pminud, pminud)
457 AVX_OP_SSE4_1(Pminuw, pminuw)
458 AVX_OP_SSE4_1(Pmovsxbw, pmovsxbw)
459 AVX_OP_SSE4_1(Pmovsxdq, pmovsxdq)
460 AVX_OP_SSE4_1(Pmovsxwd, pmovsxwd)
461 AVX_OP_SSE4_1(Pmovzxbw, pmovzxbw)
462 AVX_OP_SSE4_1(Pmovzxbd, pmovzxbd)
463 AVX_OP_SSE4_1(Pmovzxdq, pmovzxdq)
464 AVX_OP_SSE4_1(Pmovzxwd, pmovzxwd)
465 AVX_OP_SSE4_1(Pmulld, pmulld)
466 AVX_OP_SSE4_1(Ptest, ptest)
467 AVX_OP_SSE4_1(Roundpd, roundpd)
468 AVX_OP_SSE4_1(Roundps, roundps)
469 AVX_OP_SSE4_1(Roundsd, roundsd)
470 AVX_OP_SSE4_1(Roundss, roundss)
471
472#undef AVX_OP
473#undef AVX_OP_SSE3
474#undef AVX_OP_SSSE3
475#undef AVX_OP_SSE4_1
476#undef AVX_OP_SSE4_2
477
478 void F64x2ExtractLane(DoubleRegister dst, XMMRegister src, uint8_t lane);
479 void F64x2ReplaceLane(XMMRegister dst, XMMRegister src, DoubleRegister rep,
480 uint8_t lane);
481 void F64x2Min(XMMRegister dst, XMMRegister lhs, XMMRegister rhs,
482 XMMRegister scratch);
483 void F64x2Max(XMMRegister dst, XMMRegister lhs, XMMRegister rhs,
484 XMMRegister scratch);
485 void F32x4Splat(XMMRegister dst, DoubleRegister src);
486 void F32x4ExtractLane(FloatRegister dst, XMMRegister src, uint8_t lane);
487 void F32x4Min(XMMRegister dst, XMMRegister lhs, XMMRegister rhs,
488 XMMRegister scratch);
489 void F32x4Max(XMMRegister dst, XMMRegister lhs, XMMRegister rhs,
490 XMMRegister scratch);
491 void S128Store32Lane(Operand dst, XMMRegister src, uint8_t laneidx);
492 void I8x16Splat(XMMRegister dst, Register src, XMMRegister scratch);
493 void I8x16Splat(XMMRegister dst, Operand src, XMMRegister scratch);
494 void I8x16Shl(XMMRegister dst, XMMRegister src1, uint8_t src2, Register tmp1,
495 XMMRegister tmp2);
496 void I8x16Shl(XMMRegister dst, XMMRegister src1, Register src2, Register tmp1,
497 XMMRegister tmp2, XMMRegister tmp3);
498 void I8x16ShrS(XMMRegister dst, XMMRegister src1, uint8_t src2,
499 XMMRegister tmp);
500 void I8x16ShrS(XMMRegister dst, XMMRegister src1, Register src2,
501 Register tmp1, XMMRegister tmp2, XMMRegister tmp3);
502 void I8x16ShrU(XMMRegister dst, XMMRegister src1, uint8_t src2, Register tmp1,
503 XMMRegister tmp2);
504 void I8x16ShrU(XMMRegister dst, XMMRegister src1, Register src2,
505 Register tmp1, XMMRegister tmp2, XMMRegister tmp3);
506 void I16x8Splat(XMMRegister dst, Register src);
507 void I16x8Splat(XMMRegister dst, Operand src);
508 void I16x8ExtMulLow(XMMRegister dst, XMMRegister src1, XMMRegister src2,
509 XMMRegister scrat, bool is_signed);
510 void I16x8ExtMulHighS(XMMRegister dst, XMMRegister src1, XMMRegister src2,
511 XMMRegister scratch);
512 void I16x8ExtMulHighU(XMMRegister dst, XMMRegister src1, XMMRegister src2,
513 XMMRegister scratch);
514 void I16x8SConvertI8x16High(XMMRegister dst, XMMRegister src);
515 void I16x8UConvertI8x16High(XMMRegister dst, XMMRegister src,
516 XMMRegister scratch);
517 // Will move src1 to dst if AVX is not supported.
518 void I16x8Q15MulRSatS(XMMRegister dst, XMMRegister src1, XMMRegister src2,
519 XMMRegister scratch);
520 void I16x8DotI8x16I7x16S(XMMRegister dst, XMMRegister src1, XMMRegister src2);
521 void I32x4DotI8x16I7x16AddS(XMMRegister dst, XMMRegister src1,
522 XMMRegister src2, XMMRegister src3,
523 XMMRegister scratch, XMMRegister splat_reg);
524 void I32x4ExtAddPairwiseI16x8U(XMMRegister dst, XMMRegister src,
525 XMMRegister tmp);
526 // Requires that dst == src1 if AVX is not supported.
527 void I32x4ExtMul(XMMRegister dst, XMMRegister src1, XMMRegister src2,
528 XMMRegister scratch, bool low, bool is_signed);
529 void I32x4SConvertI16x8High(XMMRegister dst, XMMRegister src);
530 void I32x4UConvertI16x8High(XMMRegister dst, XMMRegister src,
531 XMMRegister scratch);
532 void I64x2Neg(XMMRegister dst, XMMRegister src, XMMRegister scratch);
533 void I64x2Abs(XMMRegister dst, XMMRegister src, XMMRegister scratch);
534 void I64x2GtS(XMMRegister dst, XMMRegister src0, XMMRegister src1,
535 XMMRegister scratch);
536 void I64x2GeS(XMMRegister dst, XMMRegister src0, XMMRegister src1,
537 XMMRegister scratch);
538 void I64x2ShrS(XMMRegister dst, XMMRegister src, uint8_t shift,
539 XMMRegister xmm_tmp);
540 void I64x2ShrS(XMMRegister dst, XMMRegister src, Register shift,
541 XMMRegister xmm_tmp, XMMRegister xmm_shift,
542 Register tmp_shift);
543 void I64x2Mul(XMMRegister dst, XMMRegister lhs, XMMRegister rhs,
544 XMMRegister tmp1, XMMRegister tmp2);
545 void I64x2ExtMul(XMMRegister dst, XMMRegister src1, XMMRegister src2,
546 XMMRegister scratch, bool low, bool is_signed);
547 void I64x2SConvertI32x4High(XMMRegister dst, XMMRegister src);
548 void I64x2UConvertI32x4High(XMMRegister dst, XMMRegister src,
549 XMMRegister scratch);
550 void S128Not(XMMRegister dst, XMMRegister src, XMMRegister scratch);
551 // Requires dst == mask when AVX is not supported.
552 void S128Select(XMMRegister dst, XMMRegister mask, XMMRegister src1,
553 XMMRegister src2, XMMRegister scratch);
554 void S128Load8Splat(XMMRegister dst, Operand src, XMMRegister scratch);
555 void S128Load16Splat(XMMRegister dst, Operand src, XMMRegister scratch);
556 void S128Load32Splat(XMMRegister dst, Operand src);
557 void S128Store64Lane(Operand dst, XMMRegister src, uint8_t laneidx);
558
559 void F64x2Qfma(XMMRegister dst, XMMRegister src1, XMMRegister src2,
560 XMMRegister src3, XMMRegister tmp);
561 void F64x2Qfms(XMMRegister dst, XMMRegister src1, XMMRegister src2,
562 XMMRegister src3, XMMRegister tmp);
563 void F32x4Qfma(XMMRegister dst, XMMRegister src1, XMMRegister src2,
564 XMMRegister src3, XMMRegister tmp);
565 void F32x4Qfms(XMMRegister dst, XMMRegister src1, XMMRegister src2,
566 XMMRegister src3, XMMRegister tmp);
567
568 protected:
569 template <typename Op>
570 using AvxFn = void (Assembler::*)(XMMRegister, XMMRegister, Op, uint8_t);
571 template <typename Op>
572 using NoAvxFn = void (Assembler::*)(XMMRegister, Op, uint8_t);
573
574 template <typename Op>
576 XMMRegister dst, XMMRegister src1, Op src2, uint8_t imm8,
577 uint32_t* load_pc_offset = nullptr,
578 std::optional<CpuFeature> feature = std::nullopt) {
579 if (CpuFeatures::IsSupported(AVX)) {
580 CpuFeatureScope scope(assm, AVX);
581 if (load_pc_offset) *load_pc_offset = assm->pc_offset();
582 (assm->*avx)(dst, src1, src2, imm8);
583 return;
584 }
585
586 if (dst != src1) assm->movaps(dst, src1);
587 if (load_pc_offset) *load_pc_offset = assm->pc_offset();
588 if (feature.has_value()) {
589 DCHECK(CpuFeatures::IsSupported(*feature));
590 CpuFeatureScope scope(assm, *feature);
591 (assm->*noavx)(dst, src2, imm8);
592 } else {
593 (assm->*noavx)(dst, src2, imm8);
594 }
595 }
596
597 private:
598 template <typename Op>
599 void I8x16SplatPreAvx2(XMMRegister dst, Op src, XMMRegister scratch);
600 template <typename Op>
601 void I16x8SplatPreAvx2(XMMRegister dst, Op src);
602};
603
604// Common base class template shared by ia32 and x64 MacroAssembler. This uses
605// the Curiously Recurring Template Pattern (CRTP), where Impl is the actual
606// class (subclass of SharedMacroAssembler instantiated with the actual
607// class). This allows static polymorphism, where member functions can be move
608// into SharedMacroAssemblerBase, and we can also call into member functions
609// defined in ia32 or x64 specific MacroAssembler from within this template
610// class, via Impl.
611//
612// Note: all member functions must be defined in this header file so that the
613// compiler can generate code for the function definitions. See
614// https://isocpp.org/wiki/faq/templates#templates-defn-vs-decl for rationale.
615// If a function does not need polymorphism, move it into
616// SharedMacroAssemblerBase, and define it outside of this header.
617template <typename Impl>
619 using SharedMacroAssemblerBase::SharedMacroAssemblerBase;
620
621 public:
622 void Abspd(XMMRegister dst, XMMRegister src, Register tmp) {
623 FloatUnop(dst, src, tmp, &SharedMacroAssemblerBase::Andps,
624 ExternalReference::address_of_double_abs_constant());
625 }
626
627 void Absps(XMMRegister dst, XMMRegister src, Register tmp) {
628 FloatUnop(dst, src, tmp, &SharedMacroAssemblerBase::Andps,
629 ExternalReference::address_of_float_abs_constant());
630 }
631
632 void Absph(XMMRegister dst, XMMRegister src, Register tmp) {
633 FloatUnop(dst, src, tmp, &SharedMacroAssemblerBase::Andps,
634 ExternalReference::address_of_fp16_abs_constant());
635 }
636
637 void Negpd(XMMRegister dst, XMMRegister src, Register tmp) {
638 FloatUnop(dst, src, tmp, &SharedMacroAssemblerBase::Xorps,
639 ExternalReference::address_of_double_neg_constant());
640 }
641
642 void Negps(XMMRegister dst, XMMRegister src, Register tmp) {
643 FloatUnop(dst, src, tmp, &SharedMacroAssemblerBase::Xorps,
644 ExternalReference::address_of_float_neg_constant());
645 }
646
647 void Negph(XMMRegister dst, XMMRegister src, Register tmp) {
648 FloatUnop(dst, src, tmp, &SharedMacroAssemblerBase::Xorps,
649 ExternalReference::address_of_fp16_neg_constant());
650 }
651#undef FLOAT_UNOP
652
653 void Pextrd(Register dst, XMMRegister src, uint8_t imm8) {
654 if (imm8 == 0) {
655 Movd(dst, src);
656 return;
657 }
658
659 if (CpuFeatures::IsSupported(AVX)) {
660 CpuFeatureScope scope(this, AVX);
661 vpextrd(dst, src, imm8);
662 } else if (CpuFeatures::IsSupported(SSE4_1)) {
663 CpuFeatureScope sse_scope(this, SSE4_1);
664 pextrd(dst, src, imm8);
665 } else {
666 DCHECK_LT(imm8, 2);
667 impl()->PextrdPreSse41(dst, src, imm8);
668 }
669 }
670
671 template <typename Op>
672 void Pinsrd(XMMRegister dst, XMMRegister src1, Op src2, uint8_t imm8,
673 uint32_t* load_pc_offset = nullptr) {
674 if (CpuFeatures::IsSupported(SSE4_1)) {
675 PinsrHelper(this, &Assembler::vpinsrd, &Assembler::pinsrd, dst, src1,
676 src2, imm8, load_pc_offset,
677 std::optional<CpuFeature>(SSE4_1));
678 } else {
679 if (dst != src1) {
680 movaps(dst, src1);
681 }
682 impl()->PinsrdPreSse41(dst, src2, imm8, load_pc_offset);
683 }
684 }
685
686 template <typename Op>
687 void Pinsrd(XMMRegister dst, Op src, uint8_t imm8,
688 uint32_t* load_pc_offset = nullptr) {
689 Pinsrd(dst, dst, src, imm8, load_pc_offset);
690 }
691
693 Register scratch) {
694 ASM_CODE_COMMENT(this);
695 // dst = [ src_low, 0x43300000, src_high, 0x4330000 ];
696 // 0x43300000'00000000 is a special double where the significand bits
697 // precisely represents all uint32 numbers.
698 if (!CpuFeatures::IsSupported(AVX) && dst != src) {
699 movaps(dst, src);
700 src = dst;
701 }
702 Unpcklps(dst, src,
703 ExternalReferenceAsOperand(
705 address_of_wasm_f64x2_convert_low_i32x4_u_int_mask(),
706 scratch));
707 Subpd(dst,
708 ExternalReferenceAsOperand(
709 ExternalReference::address_of_wasm_double_2_power_52(), scratch));
710 }
711
713 Register scratch) {
714 ASM_CODE_COMMENT(this);
715 Operand op = ExternalReferenceAsOperand(
716 ExternalReference::address_of_wasm_int32_overflow_as_float(), scratch);
717
718 // This algorithm works by:
719 // 1. lanes with NaNs are zero-ed
720 // 2. lanes ge than 2147483648.0f (MAX_INT32+1) set to 0xffff'ffff
721 // 3. cvttps2dq sets all out of range lanes to 0x8000'0000
722 // a. correct for underflows (< MIN_INT32)
723 // b. wrong for overflow, and we know which lanes overflow from 2.
724 // 4. adjust for 3b by xor-ing 2 and 3
725 // a. 0x8000'0000 xor 0xffff'ffff = 0x7fff'ffff (MAX_INT32)
726 if (CpuFeatures::IsSupported(AVX)) {
727 CpuFeatureScope scope(this, AVX);
728 vcmpeqps(tmp, src, src);
729 vandps(dst, src, tmp);
730 vcmpgeps(tmp, src, op);
731 vcvttps2dq(dst, dst);
732 vpxor(dst, dst, tmp);
733 } else {
734 if (src == dst) {
735 movaps(tmp, src);
736 cmpeqps(tmp, tmp);
737 andps(dst, tmp);
738 movaps(tmp, op);
739 cmpleps(tmp, dst);
740 cvttps2dq(dst, dst);
741 xorps(dst, tmp);
742 } else {
743 movaps(tmp, op);
744 cmpleps(tmp, src);
745 cvttps2dq(dst, src);
746 xorps(dst, tmp);
747 movaps(tmp, src);
748 cmpeqps(tmp, tmp);
749 andps(dst, tmp);
750 }
751 }
752 }
753
755 XMMRegister scratch, Register tmp) {
756 ASM_CODE_COMMENT(this);
757 if (CpuFeatures::IsSupported(AVX)) {
758 CpuFeatureScope avx_scope(this, AVX);
759 XMMRegister original_dst = dst;
760 // Make sure we don't overwrite src.
761 if (dst == src) {
762 DCHECK_NE(src, scratch);
763 dst = scratch;
764 }
765 // dst = 0 if src == NaN, else all ones.
766 vcmpeqpd(dst, src, src);
767 // dst = 0 if src == NaN, else INT32_MAX as double.
768 vandpd(
769 dst, dst,
770 ExternalReferenceAsOperand(
771 ExternalReference::address_of_wasm_int32_max_as_double(), tmp));
772 // dst = 0 if src == NaN, src is saturated to INT32_MAX as double.
773 vminpd(dst, src, dst);
774 // Values > INT32_MAX already saturated, values < INT32_MIN raises an
775 // exception, which is masked and returns 0x80000000.
776 vcvttpd2dq(original_dst, dst);
777 } else {
778 if (dst != src) {
779 movaps(dst, src);
780 }
781 movaps(scratch, dst);
782 cmpeqpd(scratch, dst);
783 andps(scratch,
784 ExternalReferenceAsOperand(
785 ExternalReference::address_of_wasm_int32_max_as_double(), tmp));
786 minpd(dst, scratch);
787 cvttpd2dq(dst, dst);
788 }
789 }
790
792 XMMRegister scratch, Register tmp) {
793 ASM_CODE_COMMENT(this);
794 if (CpuFeatures::IsSupported(AVX)) {
795 CpuFeatureScope avx_scope(this, AVX);
796 vxorpd(scratch, scratch, scratch);
797 // Saturate to 0.
798 vmaxpd(dst, src, scratch);
799 // Saturate to UINT32_MAX.
800 vminpd(
801 dst, dst,
802 ExternalReferenceAsOperand(
803 ExternalReference::address_of_wasm_uint32_max_as_double(), tmp));
804 // Truncate.
805 vroundpd(dst, dst, kRoundToZero);
806 // Add to special double where significant bits == uint32.
807 vaddpd(dst, dst,
808 ExternalReferenceAsOperand(
809 ExternalReference::address_of_wasm_double_2_power_52(), tmp));
810 // Extract low 32 bits of each double's significand, zero top lanes.
811 // dst = [dst[0], dst[2], 0, 0]
812 vshufps(dst, dst, scratch, 0x88);
813 } else {
814 CpuFeatureScope scope(this, SSE4_1);
815 if (dst != src) {
816 movaps(dst, src);
817 }
818 xorps(scratch, scratch);
819 maxpd(dst, scratch);
820 minpd(dst, ExternalReferenceAsOperand(
821 ExternalReference::address_of_wasm_uint32_max_as_double(),
822 tmp));
823 roundpd(dst, dst, kRoundToZero);
824 addpd(dst,
825 ExternalReferenceAsOperand(
826 ExternalReference::address_of_wasm_double_2_power_52(), tmp));
827 shufps(dst, scratch, 0x88);
828 }
829 }
830
832 XMMRegister scratch2) {
833 // NAN->0, negative->0.
834 Pxor(scratch1, scratch1);
835 if (CpuFeatures::IsSupported(AVX)) {
836 CpuFeatureScope scope(this, AVX);
837 vmaxps(dst, src, scratch1);
838 } else {
839 if (dst != src) movaps(dst, src);
840 maxps(dst, scratch1);
841 }
842 // scratch: float representation of max_signed.
843 Pcmpeqd(scratch1, scratch1);
844 Psrld(scratch1, uint8_t{1}); // 0x7fffffff
845 Cvtdq2ps(scratch1, scratch1); // 0x4f000000
846 // scratch2: convert (src-max_signed).
847 // Set positive overflow lanes to 0x7FFFFFFF.
848 // Set negative lanes to 0.
849 if (CpuFeatures::IsSupported(AVX)) {
850 CpuFeatureScope scope(this, AVX);
851 vsubps(scratch2, dst, scratch1);
852 } else {
853 movaps(scratch2, dst);
854 subps(scratch2, scratch1);
855 }
856 Cmpleps(scratch1, scratch2);
857 Cvttps2dq(scratch2, scratch2);
858 Pxor(scratch2, scratch1);
859 Pxor(scratch1, scratch1);
860 Pmaxsd(scratch2, scratch1);
861 // Convert to int. Overflow lanes above max_signed will be 0x80000000.
862 Cvttps2dq(dst, dst);
863 // Add (src-max_signed) for overflow lanes.
864 Paddd(dst, scratch2);
865 }
866
868 Register scratch) {
869 ASM_CODE_COMMENT(this);
870 Operand op = ExternalReferenceAsOperand(
871 ExternalReference::address_of_wasm_i16x8_splat_0x0001(), scratch);
872 // pmaddwd multiplies signed words in src and op, producing
873 // signed doublewords, then adds pairwise.
874 // src = |a|b|c|d|e|f|g|h|
875 // dst = | a*1 + b*1 | c*1 + d*1 | e*1 + f*1 | g*1 + h*1 |
876 if (!CpuFeatures::IsSupported(AVX) && (dst != src)) {
877 movaps(dst, src);
878 src = dst;
879 }
880
881 Pmaddwd(dst, src, op);
882 }
883
885 XMMRegister scratch, Register tmp) {
886 ASM_CODE_COMMENT(this);
887 // pmaddubsw treats the first operand as unsigned, so pass the external
888 // reference to it as the first operand.
889 Operand op = ExternalReferenceAsOperand(
890 ExternalReference::address_of_wasm_i8x16_splat_0x01(), tmp);
891 if (CpuFeatures::IsSupported(AVX)) {
892 CpuFeatureScope avx_scope(this, AVX);
893 vmovdqa(scratch, op);
894 vpmaddubsw(dst, scratch, src);
895 } else {
896 CpuFeatureScope sse_scope(this, SSSE3);
897 if (dst == src) {
898 movaps(scratch, op);
899 pmaddubsw(scratch, src);
900 movaps(dst, scratch);
901 } else {
902 movaps(dst, op);
903 pmaddubsw(dst, src);
904 }
905 }
906 }
907
909 Register scratch) {
910 ASM_CODE_COMMENT(this);
911 Operand op = ExternalReferenceAsOperand(
912 ExternalReference::address_of_wasm_i8x16_splat_0x01(), scratch);
913 if (CpuFeatures::IsSupported(AVX)) {
914 CpuFeatureScope avx_scope(this, AVX);
915 vpmaddubsw(dst, src, op);
916 } else {
917 CpuFeatureScope sse_scope(this, SSSE3);
918 if (dst != src) {
919 movaps(dst, src);
920 }
921 pmaddubsw(dst, op);
922 }
923 }
924
926 XMMRegister scratch, Register tmp, bool omit_add = false) {
927 ASM_CODE_COMMENT(this);
928 if (omit_add) {
929 // We have determined that the indices are immediates, and they are either
930 // within bounds, or the top bit is set, so we can omit the add.
931 Pshufb(dst, src, mask);
932 return;
933 }
934
935 // Out-of-range indices should return 0, add 112 so that any value > 15
936 // saturates to 128 (top bit set), so pshufb will zero that lane.
937 Operand op = ExternalReferenceAsOperand(
938 ExternalReference::address_of_wasm_i8x16_swizzle_mask(), tmp);
939 if (CpuFeatures::IsSupported(AVX)) {
940 CpuFeatureScope avx_scope(this, AVX);
941 vpaddusb(scratch, mask, op);
942 vpshufb(dst, src, scratch);
943 } else {
944 CpuFeatureScope sse_scope(this, SSSE3);
945 movaps(scratch, op);
946 if (dst != src) {
947 DCHECK_NE(dst, mask);
948 movaps(dst, src);
949 }
950 paddusb(scratch, mask);
951 pshufb(dst, scratch);
952 }
953 }
954
956 XMMRegister tmp2, Register scratch) {
957 ASM_CODE_COMMENT(this);
958 DCHECK_NE(dst, tmp1);
959 DCHECK_NE(src, tmp1);
960 DCHECK_NE(dst, tmp2);
961 DCHECK_NE(src, tmp2);
962 if (CpuFeatures::IsSupported(AVX)) {
963 CpuFeatureScope avx_scope(this, AVX);
964 vmovdqa(tmp1, ExternalReferenceAsOperand(
965 ExternalReference::address_of_wasm_i8x16_splat_0x0f(),
966 scratch));
967 vpandn(tmp2, tmp1, src);
968 vpand(dst, tmp1, src);
969 vmovdqa(tmp1, ExternalReferenceAsOperand(
970 ExternalReference::address_of_wasm_i8x16_popcnt_mask(),
971 scratch));
972 vpsrlw(tmp2, tmp2, 4);
973 vpshufb(dst, tmp1, dst);
974 vpshufb(tmp2, tmp1, tmp2);
975 vpaddb(dst, dst, tmp2);
976 } else if (CpuFeatures::IsSupported(INTEL_ATOM)) {
977 // Pre-Goldmont low-power Intel microarchitectures have very slow
978 // PSHUFB instruction, thus use PSHUFB-free divide-and-conquer
979 // algorithm on these processors. ATOM CPU feature captures exactly
980 // the right set of processors.
981 movaps(tmp1, src);
982 psrlw(tmp1, 1);
983 if (dst != src) {
984 movaps(dst, src);
985 }
986 andps(tmp1, ExternalReferenceAsOperand(
987 ExternalReference::address_of_wasm_i8x16_splat_0x55(),
988 scratch));
989 psubb(dst, tmp1);
990 Operand splat_0x33 = ExternalReferenceAsOperand(
991 ExternalReference::address_of_wasm_i8x16_splat_0x33(), scratch);
992 movaps(tmp1, dst);
993 andps(dst, splat_0x33);
994 psrlw(tmp1, 2);
995 andps(tmp1, splat_0x33);
996 paddb(dst, tmp1);
997 movaps(tmp1, dst);
998 psrlw(dst, 4);
999 paddb(dst, tmp1);
1000 andps(dst, ExternalReferenceAsOperand(
1001 ExternalReference::address_of_wasm_i8x16_splat_0x0f(),
1002 scratch));
1003 } else {
1004 CpuFeatureScope sse_scope(this, SSSE3);
1005 movaps(tmp1, ExternalReferenceAsOperand(
1006 ExternalReference::address_of_wasm_i8x16_splat_0x0f(),
1007 scratch));
1008 Operand mask = ExternalReferenceAsOperand(
1009 ExternalReference::address_of_wasm_i8x16_popcnt_mask(), scratch);
1010 if (tmp2 != tmp1) {
1011 movaps(tmp2, tmp1);
1012 }
1013 andps(tmp1, src);
1014 andnps(tmp2, src);
1015 psrlw(tmp2, 4);
1016 movaps(dst, mask);
1017 pshufb(dst, tmp1);
1018 movaps(tmp1, mask);
1019 pshufb(tmp1, tmp2);
1020 paddb(dst, tmp1);
1021 }
1022 }
1023
1024 private:
1025 // All implementation-specific methods must be called through this.
1026 Impl* impl() { return static_cast<Impl*>(this); }
1027
1029 Register scratch) {
1030 return impl()->ExternalReferenceAsOperand(reference, scratch);
1031 }
1032
1035 Operand);
1038 if (!CpuFeatures::IsSupported(AVX) && (dst != src)) {
1039 movaps(dst, src);
1040 src = dst;
1041 }
1042 SharedMacroAssemblerBase* assm = this;
1043 (assm->*op)(dst, src, ExternalReferenceAsOperand(ext, tmp));
1044 }
1045};
1046
1047} // namespace internal
1048} // namespace v8
1049#endif // V8_CODEGEN_SHARED_IA32_X64_MACRO_ASSEMBLER_SHARED_IA32_X64_H_
void movaps(XMMRegister dst, XMMRegister src)
void(Assembler::*)(XMMRegister, Op, uint8_t) NoAvxFn
void Pinsrb(XMMRegister dst, XMMRegister src1, Op src2, uint8_t imm8, uint32_t *load_pc_offset=nullptr)
void(Assembler::*)(XMMRegister, XMMRegister, Op, uint8_t) AvxFn
void Pshufb(XMMRegister dst, XMMRegister src, Op mask)
void PinsrHelper(Assembler *assm, AvxFn< Op > avx, NoAvxFn< Op > noavx, XMMRegister dst, XMMRegister src1, Op src2, uint8_t imm8, uint32_t *load_pc_offset=nullptr, std::optional< CpuFeature > feature=std::nullopt)
void Pinsrw(XMMRegister dst, XMMRegister src1, Op src2, uint8_t imm8, uint32_t *load_pc_offset=nullptr)
void I32x4SConvertF32x4(XMMRegister dst, XMMRegister src, XMMRegister tmp, Register scratch)
void F64x2ConvertLowI32x4U(XMMRegister dst, XMMRegister src, Register scratch)
void Negpd(XMMRegister dst, XMMRegister src, Register tmp)
void I32x4TruncSatF64x2UZero(XMMRegister dst, XMMRegister src, XMMRegister scratch, Register tmp)
void I32x4TruncF32x4U(XMMRegister dst, XMMRegister src, XMMRegister scratch1, XMMRegister scratch2)
void I8x16Swizzle(XMMRegister dst, XMMRegister src, XMMRegister mask, XMMRegister scratch, Register tmp, bool omit_add=false)
void Absps(XMMRegister dst, XMMRegister src, Register tmp)
void Negph(XMMRegister dst, XMMRegister src, Register tmp)
void Negps(XMMRegister dst, XMMRegister src, Register tmp)
void FloatUnop(XMMRegister dst, XMMRegister src, Register tmp, FloatInstruction op, ExternalReference ext)
void(SharedMacroAssemblerBase::*)(XMMRegister, XMMRegister, Operand) FloatInstruction
void Pinsrd(XMMRegister dst, Op src, uint8_t imm8, uint32_t *load_pc_offset=nullptr)
void Pinsrd(XMMRegister dst, XMMRegister src1, Op src2, uint8_t imm8, uint32_t *load_pc_offset=nullptr)
void I16x8ExtAddPairwiseI8x16S(XMMRegister dst, XMMRegister src, XMMRegister scratch, Register tmp)
void Abspd(XMMRegister dst, XMMRegister src, Register tmp)
void Pextrd(Register dst, XMMRegister src, uint8_t imm8)
void Absph(XMMRegister dst, XMMRegister src, Register tmp)
void I16x8ExtAddPairwiseI8x16U(XMMRegister dst, XMMRegister src, Register scratch)
void I32x4ExtAddPairwiseI16x8S(XMMRegister dst, XMMRegister src, Register scratch)
Operand ExternalReferenceAsOperand(ExternalReference reference, Register scratch)
void I8x16Popcnt(XMMRegister dst, XMMRegister src, XMMRegister tmp1, XMMRegister tmp2, Register scratch)
void I32x4TruncSatF64x2SZero(XMMRegister dst, XMMRegister src, XMMRegister scratch, Register tmp)
#define ASM_CODE_COMMENT(asm)
Definition assembler.h:617
base::Vector< const DirectHandle< Object > > args
Definition execution.cc:74
uint32_t const mask
#define AVX_OP_WITH_DIFF_SSE_INSTR(macro_name, avx_name, sse_name)
#define AVX_OP_SSE3(macro_name, name)
#define AVX_OP_SSE4_1(macro_name, name)
#define AVX_OP(macro_name, name)
#define AVX_OP_SSSE3(macro_name, name)
constexpr VFPRoundingMode kRoundToZero
constexpr int kDoubleSize
Definition globals.h:407
#define DCHECK_NE(v1, v2)
Definition logging.h:486
#define DCHECK(condition)
Definition logging.h:482
#define DCHECK_LT(v1, v2)
Definition logging.h:489
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
#define V8_EXPORT_PRIVATE
Definition macros.h:460