50 template <const
int simd_size = kSimd128Size>
52 bool* needs_swap,
bool* is_swizzle)
61 bool src0_is_used =
false;
62 bool src1_is_used =
false;
63 for (
int i = 0;
i < simd_size; ++
i) {
64 if (shuffle[
i] < simd_size) {
70 if (src0_is_used && !src1_is_used) {
72 }
else if (src1_is_used && !src0_is_used) {
81 if (shuffle[0] >= simd_size) {
85 for (
int i = 0;
i < simd_size; ++
i) {
86 shuffle[
i] ^= simd_size;
92 for (
int i = 0;
i < simd_size; ++
i) shuffle[
i] &= simd_size - 1;
99 static bool TryMatchIdentity(
const uint8_t* shuffle);
108 uint8_t lane0[kBytesPerLane];
109 lane0[0] = shuffle[0];
110 if (lane0[0] % kBytesPerLane != 0)
return false;
111 for (
int i = 1;
i < kBytesPerLane; ++
i) {
112 lane0[
i] = shuffle[
i];
113 if (lane0[
i] != lane0[0] +
i)
return false;
116 for (
int i = 1;
i < LANES; ++
i) {
117 for (
int j = 0; j < kBytesPerLane; ++j) {
118 if (lane0[j] != shuffle[
i * kBytesPerLane + j])
return false;
121 *index = lane0[0] / kBytesPerLane;
128 static bool TryMatch32x4Rotate(
const uint8_t* shuffle, uint8_t* shuffle32x4,
132 static bool TryMatch32x4Reverse(
const uint8_t* shuffle32x4);
135 static bool TryMatch32x4OneLaneSwizzle(
const uint8_t* shuffle32x4,
136 uint8_t* from, uint8_t* to);
141 static bool TryMatch64x1Shuffle(
const uint8_t* shuffle, uint8_t* shuffle64x1);
146 static bool TryMatch64x2Shuffle(
const uint8_t* shuffle, uint8_t* shuffle64x2);
151 static bool TryMatch32x1Shuffle(
const uint8_t* shuffle, uint8_t* shuffle32x1);
156 static bool TryMatch32x2Shuffle(
const uint8_t* shuffle, uint8_t* shuffle32x2);
161 static bool TryMatch32x4Shuffle(
const uint8_t* shuffle, uint8_t* shuffle32x4);
167 static bool TryMatch32x8Shuffle(
const uint8_t* shuffle, uint8_t* shuffle32x8);
172 static bool TryMatch16x1Shuffle(
const uint8_t* shuffle, uint8_t* shuffle16x1);
177 static bool TryMatch16x2Shuffle(
const uint8_t* shuffle, uint8_t* shuffle16x2);
182 static bool TryMatch16x4Shuffle(
const uint8_t* shuffle, uint8_t* shuffle16x4);
187 static bool TryMatch16x8Shuffle(
const uint8_t* shuffle, uint8_t* shuffle16x8);
193 static bool TryMatchConcat(
const uint8_t* shuffle, uint8_t*
offset);
199 static bool TryMatchBlend(
const uint8_t* shuffle);
205 static bool TryMatchByteToDwordZeroExtend(
const uint8_t* shuffle);
210 static bool TryMatch8x16UpperToLowerReduce(
const uint8_t* shuffle1,
211 const uint8_t* shuffle2,
212 const uint8_t* shuffle3,
213 const uint8_t* shuffle4);
218 static bool TryMatch16x8UpperToLowerReduce(
const uint8_t* shuffle1,
219 const uint8_t* shuffle2,
220 const uint8_t* shuffle3);
225 static bool TryMatch32x4UpperToLowerReduce(
const uint8_t* shuffle1,
226 const uint8_t* shuffle2);
231 static bool TryMatch32x4PairwiseReduce(
const uint8_t* shuffle1,
232 const uint8_t* shuffle2);
236 static bool TryMatch64x2Reduce(
const uint8_t* shuffle64x2);
240 static uint8_t PackShuffle4(uint8_t* shuffle);
242 static uint8_t PackBlend8(
const uint8_t* shuffle16x8);
244 static uint8_t PackBlend4(
const uint8_t* shuffle32x4);
246 static int32_t Pack2Lanes(
const std::array<uint8_t, 2>& shuffle);
248 static int32_t Pack4Lanes(
const uint8_t* shuffle);
250 static void Pack16Lanes(uint32_t* dst,
const uint8_t* shuffle);
261 kS32x4InterleaveLowHalves,
262 kS32x4InterleaveHighHalves,
270 kS16x8InterleaveLowHalves,
271 kS16x8InterleaveHighHalves,
279 kS8x16InterleaveLowHalves,
280 kS8x16InterleaveHighHalves,
289#ifdef V8_TARGET_ARCH_X64
293 static bool TryMatchVpshufd(
const uint8_t* shuffle32x8, uint8_t* control);
298 static bool TryMatchShufps256(
const uint8_t* shuffle32x8, uint8_t* control);
305 static constexpr ShuffleEntry<kSimd128Size> arch_shuffles128[] = {
306 {{0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23},
307 compiler::kX64S64x2UnpackLow,
311 {{8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31},
312 compiler::kX64S64x2UnpackHigh,
316 {{0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23},
317 compiler::kX64S32x4UnpackLow,
321 {{8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31},
322 compiler::kX64S32x4UnpackHigh,
326 {{0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23},
327 compiler::kX64S16x8UnpackLow,
331 {{8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31},
332 compiler::kX64S16x8UnpackHigh,
336 {{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23},
337 compiler::kX64S8x16UnpackLow,
341 {{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31},
342 compiler::kX64S8x16UnpackHigh,
347 {{0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29},
348 compiler::kX64S16x8UnzipLow,
352 {{2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31},
353 compiler::kX64S16x8UnzipHigh,
357 {{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30},
358 compiler::kX64S8x16UnzipLow,
362 {{1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31},
363 compiler::kX64S8x16UnzipHigh,
367 {{0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30},
368 compiler::kX64S8x16TransposeLow,
372 {{1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31},
373 compiler::kX64S8x16TransposeHigh,
377 {{7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8},
378 compiler::kX64S8x8Reverse,
382 {{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12},
383 compiler::kX64S8x4Reverse,
387 {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14},
388 compiler::kX64S8x2Reverse,
393 static constexpr ShuffleEntry<kSimd256Size> arch_shuffles256[] = {
394 {{0, 1, 2, 3, 32, 33, 34, 35, 4, 5, 6, 7, 36, 37, 38, 39,
395 16, 17, 18, 19, 48, 49, 50, 51, 20, 21, 22, 23, 52, 53, 54, 55},
396 compiler::kX64S32x8UnpackLow},
398 {{8, 9, 10, 11, 40, 41, 42, 43, 12, 13, 14, 15, 44, 45, 46, 47,
399 24, 25, 26, 27, 56, 57, 58, 59, 28, 29, 30, 31, 60, 61, 62, 63},
400 compiler::kX64S32x8UnpackHigh}};
402 template <
int simd_size>
403 static bool TryMatchArchShuffle(
const uint8_t* shuffle,
bool is_swizzle,
404 const ShuffleEntry<simd_size>** arch_shuffle)
407 uint8_t
mask = is_swizzle ? simd_size - 1 : 2 * simd_size - 1;
409 const ShuffleEntry<simd_size>* table;
412 table = arch_shuffles128;
413 num_entries =
arraysize(arch_shuffles128);
415 table = arch_shuffles256;
416 num_entries =
arraysize(arch_shuffles256);
419 for (
size_t i = 0;
i < num_entries; ++
i) {
420 const ShuffleEntry<simd_size>& entry = table[
i];
422 for (; j < simd_size; ++j) {
423 if ((entry.shuffle[j] &
mask) != (shuffle[j] &
mask)) {
427 if (j == simd_size) {
428 *arch_shuffle = &entry;