v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
simd-shuffle.cc
Go to the documentation of this file.
1// Copyright 2020 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
6
7#include <algorithm>
8
10
11namespace v8 {
12namespace internal {
13namespace wasm {
14
15// Take a lane-wise shuffle and expand to a 16 byte-wise shuffle.
16template <size_t N>
18 const std::array<uint8_t, N> in) {
20 constexpr size_t lane_bytes = 16 / N;
21 for (unsigned i = 0; i < N; ++i) {
22 for (unsigned j = 0; j < lane_bytes; ++j) {
23 res[i * lane_bytes + j] = lane_bytes * in[i] + j;
24 }
25 }
26 return res;
27}
28
30 const ShuffleArray& shuffle) {
31 using CanonicalShuffleList =
32 std::array<std::pair<const ShuffleArray, const CanonicalShuffle>,
33 static_cast<size_t>(CanonicalShuffle::kMaxShuffles) - 1>;
34
35 static constexpr CanonicalShuffleList canonical_shuffle_list = {{
48 {expand<8>({0, 2, 4, 6, 8, 10, 12, 14}), CanonicalShuffle::kS16x8Even},
49 {expand<8>({1, 3, 5, 7, 9, 11, 13, 15}), CanonicalShuffle::kS16x8Odd},
50 {expand<8>({0, 8, 1, 9, 2, 10, 3, 11}),
52 {expand<8>({4, 12, 5, 13, 6, 14, 7, 15}),
54 {expand<8>({0, 8, 2, 10, 4, 12, 6, 14}),
56 {expand<8>({1, 9, 3, 11, 5, 13, 7, 15}),
58 {expand<8>({1, 0, 3, 2, 5, 4, 7, 6}), CanonicalShuffle::kS16x2Reverse},
59 {expand<8>({3, 2, 1, 0, 7, 6, 5, 4}), CanonicalShuffle::kS16x4Reverse},
60 {{7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8},
62 {{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12},
64 {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14},
66 {{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23},
68 {{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31},
70 {{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30},
72 {{1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31},
74 {{0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30},
76 {{1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31},
78 }};
79 for (auto& [lanes, canonical] : canonical_shuffle_list) {
80 if (std::equal(lanes.begin(), lanes.end(), shuffle.begin())) {
81 return canonical;
82 }
83 }
85}
86
87bool SimdShuffle::TryMatchIdentity(const uint8_t* shuffle) {
88 for (int i = 0; i < kSimd128Size; ++i) {
89 if (shuffle[i] != i) return false;
90 }
91 return true;
92}
93
94bool SimdShuffle::TryMatch32x4Rotate(const uint8_t* shuffle,
95 uint8_t* shuffle32x4, bool is_swizzle) {
96 uint8_t offset;
97 bool is_concat = TryMatchConcat(shuffle, &offset);
98 DCHECK_NE(offset, 0); // 0 is identity, it should not be matched.
99 // Since we already have a concat shuffle, we know that the indices goes from:
100 // [ offset, ..., 15, 0, ... ], it suffices to check that the offset points
101 // to the low byte of a 32x4 element.
102 if (!is_concat || !is_swizzle || offset % 4 != 0) {
103 return false;
104 }
105
106 uint8_t offset_32 = offset / 4;
107 for (int i = 0; i < 4; i++) {
108 shuffle32x4[i] = (offset_32 + i) % 4;
109 }
110 return true;
111}
112
113bool SimdShuffle::TryMatch32x4Reverse(const uint8_t* shuffle32x4) {
114 return shuffle32x4[0] == 3 && shuffle32x4[1] == 2 && shuffle32x4[2] == 1 &&
115 shuffle32x4[3] == 0;
116}
117
118bool SimdShuffle::TryMatch32x4OneLaneSwizzle(const uint8_t* shuffle32x4,
119 uint8_t* from_lane,
120 uint8_t* to_lane) {
121 constexpr uint32_t patterns[12]{
122 0x30200000, // 0 -> 1
123 0x30000100, // 0 -> 2
124 0x00020100, // 0 -> 3
125 0x03020101, // 1 -> 0
126 0x03010100, // 1 -> 2
127 0x01020100, // 1 -> 3
128 0x03020102, // 2 -> 0
129 0x03020200, // 2 -> 1
130 0x02020100, // 2 -> 3
131 0x03020103, // 3 -> 0
132 0x03020300, // 3 -> 1
133 0x03030100 // 3 -> 2
134 };
135
136 unsigned pattern_idx = 0;
137 uint32_t shuffle = *reinterpret_cast<const uint32_t*>(shuffle32x4);
138#ifdef V8_TARGET_BIG_ENDIAN
139 shuffle = base::bits::ReverseBytes(shuffle);
140#endif
141 for (unsigned from = 0; from < 4; ++from) {
142 for (unsigned to = 0; to < 4; ++to) {
143 if (from == to) {
144 continue;
145 }
146 if (shuffle == patterns[pattern_idx]) {
147 *from_lane = from;
148 *to_lane = to;
149 return true;
150 }
151 ++pattern_idx;
152 }
153 }
154 return false;
155}
156
157bool SimdShuffle::TryMatch64x2Shuffle(const uint8_t* shuffle,
158 uint8_t* shuffle64x2) {
159 constexpr std::array<std::array<uint8_t, 8>, 4> element_patterns = {
160 {{0, 1, 2, 3, 4, 5, 6, 7},
161 {8, 9, 10, 11, 12, 13, 14, 15},
162 {16, 17, 18, 19, 20, 21, 22, 23},
163 {24, 25, 26, 27, 28, 29, 30, 31}}};
164
165 for (unsigned i = 0; i < 2; ++i) {
166 uint64_t element = *reinterpret_cast<const uint64_t*>(&shuffle[i * 8]);
167 for (unsigned j = 0; j < 4; ++j) {
168 uint64_t pattern =
169 *reinterpret_cast<const uint64_t*>(element_patterns[j].data());
170 if (pattern == element) {
171 shuffle64x2[i] = j;
172 break;
173 }
174 if (j == 3) {
175 return false;
176 }
177 }
178 }
179 return true;
180}
181
182template <int kLanes, int kLaneBytes>
183bool MatchHelper(const uint8_t* input, uint8_t* output) {
184 for (int i = 0; i < kLanes; ++i) {
185 if (input[i * kLaneBytes] % kLaneBytes != 0) return false;
186 for (int j = 1; j < kLaneBytes; ++j) {
187 if (input[i * kLaneBytes + j] - input[i * kLaneBytes + j - 1] != 1)
188 return false;
189 }
190 output[i] = input[i * kLaneBytes] / kLaneBytes;
191 }
192 return true;
193}
194
195bool SimdShuffle::TryMatch64x1Shuffle(const uint8_t* shuffle,
196 uint8_t* shuffle64x1) {
197 return MatchHelper<1, 8>(shuffle, shuffle64x1);
198}
199
200bool SimdShuffle::TryMatch32x1Shuffle(const uint8_t* shuffle,
201 uint8_t* shuffle32x1) {
202 return MatchHelper<1, 4>(shuffle, shuffle32x1);
203}
204
205bool SimdShuffle::TryMatch32x2Shuffle(const uint8_t* shuffle,
206 uint8_t* shuffle32x2) {
207 return MatchHelper<2, 4>(shuffle, shuffle32x2);
208}
209
210bool SimdShuffle::TryMatch32x4Shuffle(const uint8_t* shuffle,
211 uint8_t* shuffle32x4) {
212 return MatchHelper<4, 4>(shuffle, shuffle32x4);
213}
214
215bool SimdShuffle::TryMatch32x8Shuffle(const uint8_t* shuffle,
216 uint8_t* shuffle32x8) {
217 return MatchHelper<8, 4>(shuffle, shuffle32x8);
218}
219
220bool SimdShuffle::TryMatch16x1Shuffle(const uint8_t* shuffle,
221 uint8_t* shuffle16x1) {
222 return MatchHelper<1, 2>(shuffle, shuffle16x1);
223}
224
225bool SimdShuffle::TryMatch16x2Shuffle(const uint8_t* shuffle,
226 uint8_t* shuffle16x2) {
227 return MatchHelper<2, 2>(shuffle, shuffle16x2);
228}
229
230bool SimdShuffle::TryMatch16x4Shuffle(const uint8_t* shuffle,
231 uint8_t* shuffle16x4) {
232 return MatchHelper<4, 2>(shuffle, shuffle16x4);
233}
234
235bool SimdShuffle::TryMatch16x8Shuffle(const uint8_t* shuffle,
236 uint8_t* shuffle16x8) {
237 return MatchHelper<8, 2>(shuffle, shuffle16x8);
238}
239
240bool SimdShuffle::TryMatchConcat(const uint8_t* shuffle, uint8_t* offset) {
241 // Don't match the identity shuffle (e.g. [0 1 2 ... 15]).
242 uint8_t start = shuffle[0];
243 if (start == 0) return false;
244 DCHECK_GT(kSimd128Size, start); // The shuffle should be canonicalized.
245 // A concatenation is a series of consecutive indices, with at most one jump
246 // in the middle from the last lane to the first.
247 for (int i = 1; i < kSimd128Size; ++i) {
248 if ((shuffle[i]) != ((shuffle[i - 1] + 1))) {
249 if (shuffle[i - 1] != 15) return false;
250 if (shuffle[i] % kSimd128Size != 0) return false;
251 }
252 }
253 *offset = start;
254 return true;
255}
256
257bool SimdShuffle::TryMatchBlend(const uint8_t* shuffle) {
258 for (int i = 0; i < 16; ++i) {
259 if ((shuffle[i] & 0xF) != i) return false;
260 }
261 return true;
262}
263
264bool SimdShuffle::TryMatchByteToDwordZeroExtend(const uint8_t* shuffle) {
265 for (int i = 0; i < 16; ++i) {
266 if ((i % 4 != 0) && (shuffle[i] < 16)) return false;
267 if ((i % 4 == 0) && (shuffle[i] > 15 || (shuffle[i] != shuffle[0] + i / 4)))
268 return false;
269 }
270 return true;
271}
272
273namespace {
274// Try to match the first step in a 32x4 pairwise shuffle.
275bool TryMatch32x4Pairwise(const uint8_t* shuffle) {
276 // Pattern to select 32-bit element 1.
277 constexpr uint8_t low_pattern_arr[4] = {4, 5, 6, 7};
278 // And we'll check that element 1 is shuffled into element 0.
279 uint32_t low_shuffle = reinterpret_cast<const uint32_t*>(shuffle)[0];
280 // Pattern to select 32-bit element 3.
281 constexpr uint8_t high_pattern_arr[4] = {12, 13, 14, 15};
282 // And we'll check that element 3 is shuffled into element 2.
283 uint32_t high_shuffle = reinterpret_cast<const uint32_t*>(shuffle)[2];
284 uint32_t low_pattern = *reinterpret_cast<const uint32_t*>(low_pattern_arr);
285 uint32_t high_pattern = *reinterpret_cast<const uint32_t*>(high_pattern_arr);
286 return low_shuffle == low_pattern && high_shuffle == high_pattern;
287}
288
289// Try to match the final step in a 32x4, now 32x2, pairwise shuffle.
290bool TryMatch32x2Pairwise(const uint8_t* shuffle) {
291 // Pattern to select 32-bit element 2.
292 constexpr uint8_t pattern_arr[4] = {8, 9, 10, 11};
293 // And we'll check that element 2 is shuffled to element 0.
294 uint32_t low_shuffle = reinterpret_cast<const uint32_t*>(shuffle)[0];
295 uint32_t pattern = *reinterpret_cast<const uint32_t*>(pattern_arr);
296 return low_shuffle == pattern;
297}
298
299// Try to match the first step in a upper-to-lower half shuffle.
300bool TryMatchUpperToLowerFirst(const uint8_t* shuffle) {
301 // There's 16 'active' bytes, so the pattern to select the upper half starts
302 // at byte 8.
303 constexpr uint8_t low_pattern_arr[8] = {8, 9, 10, 11, 12, 13, 14, 15};
304 // And we'll check that the top half is shuffled into the lower.
305 uint64_t low_shuffle = reinterpret_cast<const uint64_t*>(shuffle)[0];
306 uint64_t low_pattern = *reinterpret_cast<const uint64_t*>(low_pattern_arr);
307 return low_shuffle == low_pattern;
308}
309
310// Try to match the second step in a upper-to-lower half shuffle.
311bool TryMatchUpperToLowerSecond(const uint8_t* shuffle) {
312 // There's 8 'active' bytes, so the pattern to select the upper half starts
313 // at byte 4.
314 constexpr uint8_t low_pattern_arr[4] = {4, 5, 6, 7};
315 // And we'll check that the top half is shuffled into the lower.
316 uint32_t low_shuffle = reinterpret_cast<const uint32_t*>(shuffle)[0];
317 uint32_t low_pattern = *reinterpret_cast<const uint32_t*>(low_pattern_arr);
318 return low_shuffle == low_pattern;
319}
320
321// Try to match the third step in a upper-to-lower half shuffle.
322bool TryMatchUpperToLowerThird(const uint8_t* shuffle) {
323 // The vector now has 4 'active' bytes, select the top two.
324 constexpr uint8_t low_pattern_arr[2] = {2, 3};
325 // And check they're shuffled to the lower half.
326 uint16_t low_shuffle = reinterpret_cast<const uint16_t*>(shuffle)[0];
327 uint16_t low_pattern = *reinterpret_cast<const uint16_t*>(low_pattern_arr);
328 return low_shuffle == low_pattern;
329}
330
331// Try to match the fourth step in a upper-to-lower half shuffle.
332bool TryMatchUpperToLowerFourth(const uint8_t* shuffle) {
333 return shuffle[0] == 1;
334}
335} // end namespace
336
338 const uint8_t* shuffle2,
339 const uint8_t* shuffle3,
340 const uint8_t* shuffle4) {
341 return TryMatchUpperToLowerFirst(shuffle1) &&
342 TryMatchUpperToLowerSecond(shuffle2) &&
343 TryMatchUpperToLowerThird(shuffle3) &&
344 TryMatchUpperToLowerFourth(shuffle4);
345}
346
348 const uint8_t* shuffle2,
349 const uint8_t* shuffle3) {
350 return TryMatchUpperToLowerFirst(shuffle1) &&
351 TryMatchUpperToLowerSecond(shuffle2) &&
352 TryMatchUpperToLowerThird(shuffle3);
353}
354
356 const uint8_t* shuffle2) {
357 return TryMatchUpperToLowerFirst(shuffle1) &&
358 TryMatchUpperToLowerSecond(shuffle2);
359}
360
361bool SimdShuffle::TryMatch32x4PairwiseReduce(const uint8_t* shuffle1,
362 const uint8_t* shuffle2) {
363 return TryMatch32x4Pairwise(shuffle1) && TryMatch32x2Pairwise(shuffle2);
364}
365
366bool SimdShuffle::TryMatch64x2Reduce(const uint8_t* shuffle64x2) {
367 return shuffle64x2[0] == 1;
368}
369
370uint8_t SimdShuffle::PackShuffle4(uint8_t* shuffle) {
371 return (shuffle[0] & 3) | ((shuffle[1] & 3) << 2) | ((shuffle[2] & 3) << 4) |
372 ((shuffle[3] & 3) << 6);
373}
374
375uint8_t SimdShuffle::PackBlend8(const uint8_t* shuffle16x8) {
376 int8_t result = 0;
377 for (int i = 0; i < 8; ++i) {
378 result |= (shuffle16x8[i] >= 8 ? 1 : 0) << i;
379 }
380 return result;
381}
382
383uint8_t SimdShuffle::PackBlend4(const uint8_t* shuffle32x4) {
384 int8_t result = 0;
385 for (int i = 0; i < 4; ++i) {
386 result |= (shuffle32x4[i] >= 4 ? 0x3 : 0) << (i * 2);
387 }
388 return result;
389}
390
391int32_t SimdShuffle::Pack2Lanes(const std::array<uint8_t, 2>& shuffle) {
392 int32_t result = 0;
393 for (int i = 1; i >= 0; --i) {
394 result <<= 8;
395 result |= shuffle[i];
396 }
397 return result;
398}
399
400int32_t SimdShuffle::Pack4Lanes(const uint8_t* shuffle) {
401 int32_t result = 0;
402 for (int i = 3; i >= 0; --i) {
403 result <<= 8;
404 result |= shuffle[i];
405 }
406 return result;
407}
408
409void SimdShuffle::Pack16Lanes(uint32_t* dst, const uint8_t* shuffle) {
410 for (int i = 0; i < 4; i++) {
411 dst[i] = wasm::SimdShuffle::Pack4Lanes(shuffle + (i * 4));
412 }
413}
414
415#ifdef V8_TARGET_ARCH_X64
416// static
417bool SimdShuffle::TryMatchVpshufd(const uint8_t* shuffle32x8,
418 uint8_t* control) {
419 *control = 0;
420 for (int i = 0; i < 4; ++i) {
421 uint8_t mask;
422 if (shuffle32x8[i] < 4 && shuffle32x8[i + 4] - shuffle32x8[i] == 4) {
423 mask = shuffle32x8[i];
424 *control |= mask << (2 * i);
425 continue;
426 }
427 return false;
428 }
429 return true;
430}
431
432// static
433bool SimdShuffle::TryMatchShufps256(const uint8_t* shuffle32x8,
434 uint8_t* control) {
435 *control = 0;
436 for (int i = 0; i < 4; ++i) {
437 // low 128-bits and high 128-bits should have the same shuffle order.
438 if (shuffle32x8[i + 4] - shuffle32x8[i] == 4) {
439 // [63:0] bits select from SRC1,
440 // [127:64] bits select from SRC2
441 if ((i < 2 && shuffle32x8[i] < 4) ||
442 (i >= 2 && shuffle32x8[i] >= 8 && shuffle32x8[i] < 12)) {
443 *control |= (shuffle32x8[i] % 4) << (2 * i);
444 continue;
445 }
446 return false;
447 }
448 return false;
449 }
450 return true;
451}
452#endif // V8_TARGET_ARCH_X64
453
455 std::array<uint8_t, kSimd128Size> shuffle) {
456 return std::all_of(shuffle.begin(), shuffle.end(),
457 [](auto i) { return (i < kSimd128Size) || (i & 0x80); });
458}
459
460} // namespace wasm
461} // namespace internal
462} // namespace v8
static bool TryMatch16x4Shuffle(const uint8_t *shuffle, uint8_t *shuffle16x4)
static bool TryMatch32x1Shuffle(const uint8_t *shuffle, uint8_t *shuffle32x1)
static bool TryMatch16x1Shuffle(const uint8_t *shuffle, uint8_t *shuffle16x1)
static uint8_t PackBlend4(const uint8_t *shuffle32x4)
static uint8_t PackBlend8(const uint8_t *shuffle16x8)
static CanonicalShuffle TryMatchCanonical(const ShuffleArray &shuffle)
static bool TryMatch16x2Shuffle(const uint8_t *shuffle, uint8_t *shuffle16x2)
static bool TryMatch16x8UpperToLowerReduce(const uint8_t *shuffle1, const uint8_t *shuffle2, const uint8_t *shuffle3)
static bool TryMatch32x4Rotate(const uint8_t *shuffle, uint8_t *shuffle32x4, bool is_swizzle)
static bool TryMatch32x4Shuffle(const uint8_t *shuffle, uint8_t *shuffle32x4)
static int32_t Pack2Lanes(const std::array< uint8_t, 2 > &shuffle)
static bool TryMatchIdentity(const uint8_t *shuffle)
std::array< uint8_t, kSimd128Size > ShuffleArray
static bool TryMatch64x1Shuffle(const uint8_t *shuffle, uint8_t *shuffle64x1)
static bool TryMatch32x4PairwiseReduce(const uint8_t *shuffle1, const uint8_t *shuffle2)
static void Pack16Lanes(uint32_t *dst, const uint8_t *shuffle)
static uint8_t PackShuffle4(uint8_t *shuffle)
static bool TryMatch32x2Shuffle(const uint8_t *shuffle, uint8_t *shuffle32x2)
static bool TryMatchByteToDwordZeroExtend(const uint8_t *shuffle)
static bool TryMatch32x4Reverse(const uint8_t *shuffle32x4)
static bool TryMatchConcat(const uint8_t *shuffle, uint8_t *offset)
static bool TryMatch64x2Shuffle(const uint8_t *shuffle, uint8_t *shuffle64x2)
static bool TryMatch32x4OneLaneSwizzle(const uint8_t *shuffle32x4, uint8_t *from, uint8_t *to)
static int32_t Pack4Lanes(const uint8_t *shuffle)
static bool TryMatch32x4UpperToLowerReduce(const uint8_t *shuffle1, const uint8_t *shuffle2)
static bool TryMatch64x2Reduce(const uint8_t *shuffle64x2)
static bool TryMatch32x8Shuffle(const uint8_t *shuffle, uint8_t *shuffle32x8)
static bool TryMatch16x8Shuffle(const uint8_t *shuffle, uint8_t *shuffle16x8)
static bool TryMatch8x16UpperToLowerReduce(const uint8_t *shuffle1, const uint8_t *shuffle2, const uint8_t *shuffle3, const uint8_t *shuffle4)
static bool TryMatchBlend(const uint8_t *shuffle)
static bool AllInRangeOrTopBitSet(std::array< uint8_t, kSimd128Size > shuffle)
int start
int32_t offset
std::string pattern
ZoneVector< RpoNumber > & result
Point from
Point to
uint32_t const mask
unsigned short uint16_t
Definition unicode.cc:39
T ReverseBytes(T value)
Definition bits.h:74
constexpr const SimdShuffle::ShuffleArray expand(const std::array< uint8_t, N > in)
bool MatchHelper(const uint8_t *input, uint8_t *output)
constexpr int kSimd128Size
Definition globals.h:706
constexpr int N
Definition c-api.cc:87
#define DCHECK_NE(v1, v2)
Definition logging.h:486
#define DCHECK_GT(v1, v2)
Definition logging.h:487