v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
sse-instr.h
Go to the documentation of this file.
1// Copyright 2012 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_CODEGEN_X64_SSE_INSTR_H_
6#define V8_CODEGEN_X64_SSE_INSTR_H_
7
8// SSE instructions whose AVX version has two operands.
9#define SSE_UNOP_INSTRUCTION_LIST(V) \
10 V(sqrtps, 0F, 51) \
11 V(rsqrtps, 0F, 52) \
12 V(rcpps, 0F, 53) \
13 V(cvtps2pd, 0F, 5A) \
14 V(cvtdq2ps, 0F, 5B)
15
16// SSE instructions whose AVX version has three operands.
17#define SSE_BINOP_INSTRUCTION_LIST(V) \
18 V(unpcklps, 0F, 14) \
19 V(andps, 0F, 54) \
20 V(andnps, 0F, 55) \
21 V(orps, 0F, 56) \
22 V(xorps, 0F, 57) \
23 V(addps, 0F, 58) \
24 V(mulps, 0F, 59) \
25 V(subps, 0F, 5C) \
26 V(minps, 0F, 5D) \
27 V(divps, 0F, 5E) \
28 V(maxps, 0F, 5F)
29
30// Instructions dealing with scalar single-precision values.
31#define SSE_INSTRUCTION_LIST_SS(V) \
32 V(sqrtss, F3, 0F, 51) \
33 V(addss, F3, 0F, 58) \
34 V(mulss, F3, 0F, 59) \
35 V(cvtss2sd, F3, 0F, 5A) \
36 V(subss, F3, 0F, 5C) \
37 V(minss, F3, 0F, 5D) \
38 V(divss, F3, 0F, 5E) \
39 V(maxss, F3, 0F, 5F)
40
41// Keep sorted by last code.
42// SSE2 Instructions dealing with packed double-precision values.
43#define SSE2_INSTRUCTION_LIST_PD(V) \
44 V(andpd, 66, 0F, 54) \
45 V(andnpd, 66, 0F, 55) \
46 V(orpd, 66, 0F, 56) \
47 V(xorpd, 66, 0F, 57) \
48 V(addpd, 66, 0F, 58) \
49 V(mulpd, 66, 0F, 59) \
50 V(subpd, 66, 0F, 5C) \
51 V(minpd, 66, 0F, 5D) \
52 V(divpd, 66, 0F, 5E) \
53 V(maxpd, 66, 0F, 5F)
54
55// SSE2 Instructions dealing with packed integer values.
56#define SSE2_INSTRUCTION_LIST_PI(V) \
57 V(punpcklbw, 66, 0F, 60) \
58 V(punpcklwd, 66, 0F, 61) \
59 V(punpckldq, 66, 0F, 62) \
60 V(packsswb, 66, 0F, 63) \
61 V(pcmpgtb, 66, 0F, 64) \
62 V(pcmpgtw, 66, 0F, 65) \
63 V(pcmpgtd, 66, 0F, 66) \
64 V(packuswb, 66, 0F, 67) \
65 V(punpckhbw, 66, 0F, 68) \
66 V(punpckhwd, 66, 0F, 69) \
67 V(punpckhdq, 66, 0F, 6A) \
68 V(packssdw, 66, 0F, 6B) \
69 V(punpcklqdq, 66, 0F, 6C) \
70 V(punpckhqdq, 66, 0F, 6D) \
71 V(pcmpeqb, 66, 0F, 74) \
72 V(pcmpeqw, 66, 0F, 75) \
73 V(pcmpeqd, 66, 0F, 76) \
74 V(paddq, 66, 0F, D4) \
75 V(pmullw, 66, 0F, D5) \
76 V(psubusb, 66, 0F, D8) \
77 V(psubusw, 66, 0F, D9) \
78 V(pminub, 66, 0F, DA) \
79 V(pand, 66, 0F, DB) \
80 V(paddusb, 66, 0F, DC) \
81 V(paddusw, 66, 0F, DD) \
82 V(pmaxub, 66, 0F, DE) \
83 V(pandn, 66, 0F, DF) \
84 V(pavgb, 66, 0F, E0) \
85 V(pavgw, 66, 0F, E3) \
86 V(pmulhuw, 66, 0F, E4) \
87 V(pmulhw, 66, 0F, E5) \
88 V(psubsb, 66, 0F, E8) \
89 V(psubsw, 66, 0F, E9) \
90 V(pminsw, 66, 0F, EA) \
91 V(por, 66, 0F, EB) \
92 V(paddsb, 66, 0F, EC) \
93 V(paddsw, 66, 0F, ED) \
94 V(pmaxsw, 66, 0F, EE) \
95 V(pxor, 66, 0F, EF) \
96 V(pmuludq, 66, 0F, F4) \
97 V(pmaddwd, 66, 0F, F5) \
98 V(psubb, 66, 0F, F8) \
99 V(psubw, 66, 0F, F9) \
100 V(psubd, 66, 0F, FA) \
101 V(psubq, 66, 0F, FB) \
102 V(paddb, 66, 0F, FC) \
103 V(paddw, 66, 0F, FD) \
104 V(paddd, 66, 0F, FE)
105
106// SSE2 shift instructions with XMM register or m128 operand
107#define SSE2_INSTRUCTION_LIST_SHIFT(V) \
108 V(psrlw, 66, 0F, D1) \
109 V(psrld, 66, 0F, D2) \
110 V(psrlq, 66, 0F, D3) \
111 V(psraw, 66, 0F, E1) \
112 V(psrad, 66, 0F, E2) \
113 V(psllw, 66, 0F, F1) \
114 V(pslld, 66, 0F, F2) \
115 V(psllq, 66, 0F, F3)
116
117#define SSE2_INSTRUCTION_LIST(V) \
118 SSE2_INSTRUCTION_LIST_PD(V) \
119 SSE2_INSTRUCTION_LIST_PI(V) \
120 SSE2_INSTRUCTION_LIST_SHIFT(V)
121
122// SSE2 instructions whose AVX version has two operands.
123#define SSE2_UNOP_INSTRUCTION_LIST(V) \
124 V(ucomisd, 66, 0F, 2E) \
125 V(sqrtpd, 66, 0F, 51) \
126 V(cvtpd2ps, 66, 0F, 5A) \
127 V(cvtps2dq, 66, 0F, 5B) \
128 V(cvttpd2dq, 66, 0F, E6)
129
130// SSE2 shift instructions with an immediate operand. The last element is the
131// extension to the opcode.
132#define SSE2_INSTRUCTION_LIST_SHIFT_IMM(V) \
133 V(psrlw, 66, 0F, 71, 2) \
134 V(psrld, 66, 0F, 72, 2) \
135 V(psrlq, 66, 0F, 73, 2) \
136 V(psraw, 66, 0F, 71, 4) \
137 V(psrad, 66, 0F, 72, 4) \
138 V(psllw, 66, 0F, 71, 6) \
139 V(pslld, 66, 0F, 72, 6) \
140 V(psllq, 66, 0F, 73, 6)
141
142// Instructions dealing with scalar double-precision values.
143#define SSE2_INSTRUCTION_LIST_SD(V) \
144 V(sqrtsd, F2, 0F, 51) \
145 V(addsd, F2, 0F, 58) \
146 V(mulsd, F2, 0F, 59) \
147 V(cvtsd2ss, F2, 0F, 5A) \
148 V(subsd, F2, 0F, 5C) \
149 V(minsd, F2, 0F, 5D) \
150 V(divsd, F2, 0F, 5E) \
151 V(maxsd, F2, 0F, 5F)
152
153#define SSSE3_INSTRUCTION_LIST(V) \
154 V(pshufb, 66, 0F, 38, 00) \
155 V(phaddw, 66, 0F, 38, 01) \
156 V(phaddd, 66, 0F, 38, 02) \
157 V(pmaddubsw, 66, 0F, 38, 04) \
158 V(psignb, 66, 0F, 38, 08) \
159 V(psignw, 66, 0F, 38, 09) \
160 V(psignd, 66, 0F, 38, 0A) \
161 V(pmulhrsw, 66, 0F, 38, 0B)
162
163// SSSE3 instructions whose AVX version has two operands.
164#define SSSE3_UNOP_INSTRUCTION_LIST(V) \
165 V(pabsb, 66, 0F, 38, 1C) \
166 V(pabsw, 66, 0F, 38, 1D) \
167 V(pabsd, 66, 0F, 38, 1E)
168
169#define SSE4_INSTRUCTION_LIST(V) \
170 V(pmuldq, 66, 0F, 38, 28) \
171 V(pcmpeqq, 66, 0F, 38, 29) \
172 V(packusdw, 66, 0F, 38, 2B) \
173 V(pminsb, 66, 0F, 38, 38) \
174 V(pminsd, 66, 0F, 38, 39) \
175 V(pminuw, 66, 0F, 38, 3A) \
176 V(pminud, 66, 0F, 38, 3B) \
177 V(pmaxsb, 66, 0F, 38, 3C) \
178 V(pmaxsd, 66, 0F, 38, 3D) \
179 V(pmaxuw, 66, 0F, 38, 3E) \
180 V(pmaxud, 66, 0F, 38, 3F) \
181 V(pmulld, 66, 0F, 38, 40)
182
183// SSE instructions whose AVX version has two operands.
184#define SSE4_UNOP_INSTRUCTION_LIST(V) \
185 V(ptest, 66, 0F, 38, 17) \
186 SSE4_UNOP_INSTRUCTION_LIST_PMOV(V)
187
188#define SSE4_UNOP_INSTRUCTION_LIST_PMOV(V) \
189 V(pmovsxbw, 66, 0F, 38, 20) \
190 V(pmovsxwd, 66, 0F, 38, 23) \
191 V(pmovsxdq, 66, 0F, 38, 25) \
192 V(pmovzxbw, 66, 0F, 38, 30) \
193 V(pmovzxbd, 66, 0F, 38, 31) \
194 V(pmovzxwd, 66, 0F, 38, 33) \
195 V(pmovzxdq, 66, 0F, 38, 35)
196
197#define SSE4_EXTRACT_INSTRUCTION_LIST(V) \
198 V(extractps, 66, 0F, 3A, 17) \
199 V(pextrb, 66, 0F, 3A, 14) \
200 V(pextrw, 66, 0F, 3A, 15) \
201 V(pextrd, 66, 0F, 3A, 16)
202
203#define SSE4_2_INSTRUCTION_LIST(V) V(pcmpgtq, 66, 0F, 38, 37)
204
205// These require AVX2.
206#define AVX2_BROADCAST_LIST(V) \
207 V(vpbroadcastb, 66, 0F, 38, 78) \
208 V(vpbroadcastw, 66, 0F, 38, 79) \
209 V(vpbroadcastd, 66, 0F, 38, 58) \
210 V(vpbroadcastq, 66, 0F, 38, 59)
211
212#endif // V8_CODEGEN_X64_SSE_INSTR_H_