v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
simulator-logic-arm64.cc
Go to the documentation of this file.
1// Copyright 2016 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
6
7#if defined(USE_SIMULATOR)
8
9#include <cmath>
10
12#include "third_party/fp16/src/include/fp16.h"
13
14namespace v8 {
15namespace internal {
16
17class half {
18 public:
19 half() : bits_(0) {}
20 half(float f) : bits_(fp16_ieee_from_fp32_value(f)) {}
21 explicit half(double d) : bits_(DoubleToFloat16(d)) {}
22 explicit half(uint16_t b) : bits_(b) {}
23 operator float() const { return fp16_ieee_to_fp32_value(bits_); }
24
25 uint16_t bits() const { return bits_; }
26
27 private:
28 uint16_t bits_;
29};
30
31template <>
32half Simulator::FPDefaultNaN<half>() {
33 return half(kFP16DefaultNaN);
34}
35
36inline half ToQuietNaN(half num) {
37 return half(static_cast<uint16_t>(num.bits() | kHQuietNanMask));
38}
39
40template <typename T>
41bool isnormal(T f) {
42 return std::isnormal(f);
43}
44
45template <>
46bool isnormal(half f) {
47 return float16classify(f.bits()) == FP_NORMAL;
48}
49
50double copysign(double a, double f) { return std::copysign(a, f); }
51float copysign(double a, float f) { return std::copysign(a, f); }
52half copysign(double a, half f) {
53 return std::copysign(static_cast<float>(a), f);
54}
55
56static_assert(sizeof(half) == sizeof(uint16_t), "Half must be 16 bit");
57
58namespace {
59
60// See FPRound for a description of this function.
61inline double FPRoundToDouble(int64_t sign, int64_t exponent, uint64_t mantissa,
62 FPRounding round_mode) {
63 uint64_t bits = FPRound<uint64_t, kDoubleExponentBits, kDoubleMantissaBits>(
64 sign, exponent, mantissa, round_mode);
65 return base::bit_cast<double>(bits);
66}
67
68// See FPRound for a description of this function.
69inline float FPRoundToFloat(int64_t sign, int64_t exponent, uint64_t mantissa,
70 FPRounding round_mode) {
71 uint32_t bits = FPRound<uint32_t, kFloatExponentBits, kFloatMantissaBits>(
72 sign, exponent, mantissa, round_mode);
73 return base::bit_cast<float>(bits);
74}
75
76// See FPRound for a description of this function.
77inline float16 FPRoundToFloat16(int64_t sign, int64_t exponent,
78 uint64_t mantissa, FPRounding round_mode) {
79 return FPRound<float16, kFloat16ExponentBits, kFloat16MantissaBits>(
80 sign, exponent, mantissa, round_mode);
81}
82
83} // namespace
84
85double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
86 if (src >= 0) {
87 return UFixedToDouble(src, fbits, round);
88 } else if (src == INT64_MIN) {
89 return -UFixedToDouble(src, fbits, round);
90 } else {
91 return -UFixedToDouble(-src, fbits, round);
92 }
93}
94
95double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
96 // An input of 0 is a special case because the result is effectively
97 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
98 if (src == 0) {
99 return 0.0;
100 }
101
102 // Calculate the exponent. The highest significant bit will have the value
103 // 2^exponent.
104 const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);
105 const int64_t exponent = highest_significant_bit - fbits;
106
107 return FPRoundToDouble(0, exponent, src, round);
108}
109
110float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
111 if (src >= 0) {
112 return UFixedToFloat(src, fbits, round);
113 } else if (src == INT64_MIN) {
114 return -UFixedToFloat(src, fbits, round);
115 } else {
116 return -UFixedToFloat(-src, fbits, round);
117 }
118}
119
120float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
121 // An input of 0 is a special case because the result is effectively
122 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
123 if (src == 0) {
124 return 0.0f;
125 }
126
127 // Calculate the exponent. The highest significant bit will have the value
128 // 2^exponent.
129 const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);
130 const int32_t exponent = highest_significant_bit - fbits;
131
132 return FPRoundToFloat(0, exponent, src, round);
133}
134
135float16 Simulator::FixedToFloat16(int64_t src, int fbits, FPRounding round) {
136 if (src >= 0) {
137 return UFixedToFloat16(src, fbits, round);
138 } else if (src == INT64_MIN) {
139 return -UFixedToFloat16(src, fbits, round);
140 } else {
141 return -UFixedToFloat16(-src, fbits, round);
142 }
143}
144
145float16 Simulator::UFixedToFloat16(uint64_t src, int fbits, FPRounding round) {
146 // An input of 0 is a special case because the result is effectively
147 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
148 if (src == 0) {
149 return static_cast<float16>(0);
150 }
151
152 // Calculate the exponent. The highest significant bit will have the value
153 // 2^exponent.
154 const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);
155 const int16_t exponent = highest_significant_bit - fbits;
156
157 return FPRoundToFloat16(0, exponent, src, round);
158}
159
160double Simulator::FPToDouble(float value) {
161 switch (std::fpclassify(value)) {
162 case FP_NAN: {
163 if (IsSignallingNaN(value)) {
164 FPProcessException();
165 }
166 if (DN()) return kFP64DefaultNaN;
167
168 // Convert NaNs as the processor would:
169 // - The sign is propagated.
170 // - The mantissa is transferred entirely, except that the top bit is
171 // forced to '1', making the result a quiet NaN. The unused (low-order)
172 // mantissa bits are set to 0.
173 uint32_t raw = base::bit_cast<uint32_t>(value);
174
175 uint64_t sign = raw >> 31;
176 uint64_t exponent = (1 << kDoubleExponentBits) - 1;
177 uint64_t mantissa = unsigned_bitextract_64(21, 0, raw);
178
179 // Unused low-order bits remain zero.
181
182 // Force a quiet NaN.
183 mantissa |= (UINT64_C(1) << (kDoubleMantissaBits - 1));
184
185 return double_pack(sign, exponent, mantissa);
186 }
187
188 case FP_ZERO:
189 case FP_NORMAL:
190 case FP_SUBNORMAL:
191 case FP_INFINITE: {
192 // All other inputs are preserved in a standard cast, because every value
193 // representable using an IEEE-754 float is also representable using an
194 // IEEE-754 double.
195 return static_cast<double>(value);
196 }
197 }
198
199 UNREACHABLE();
200}
201
202float Simulator::FPToFloat(float16 value) {
203 uint32_t sign = value >> 15;
204 uint32_t exponent =
206 kFloat16MantissaBits, value);
207 uint32_t mantissa =
209
210 switch (float16classify(value)) {
211 case FP_ZERO:
212 return (sign == 0) ? 0.0f : -0.0f;
213
214 case FP_INFINITE:
216
217 case FP_SUBNORMAL: {
218 // Calculate shift required to put mantissa into the most-significant bits
219 // of the destination mantissa.
220 int shift = CountLeadingZeros(mantissa << (32 - 10), 32);
221
222 // Shift mantissa and discard implicit '1'.
223 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
224 mantissa &= (1 << kFloatMantissaBits) - 1;
225
226 // Adjust the exponent for the shift applied, and rebias.
227 exponent = exponent - shift + (kFloatExponentBias - kFloat16ExponentBias);
228 break;
229 }
230
231 case FP_NAN: {
232 if (IsSignallingNaN(value)) {
233 FPProcessException();
234 }
235 if (DN()) return kFP32DefaultNaN;
236
237 // Convert NaNs as the processor would:
238 // - The sign is propagated.
239 // - The mantissa is transferred entirely, except that the top bit is
240 // forced to '1', making the result a quiet NaN. The unused (low-order)
241 // mantissa bits are set to 0.
242 exponent = (1 << kFloatExponentBits) - 1;
243
244 // Increase bits in mantissa, making low-order bits 0.
246 mantissa |= 1 << (kFloatMantissaBits - 1); // Force a quiet NaN.
247 break;
248 }
249
250 case FP_NORMAL: {
251 // Increase bits in mantissa, making low-order bits 0.
253
254 // Change exponent bias.
256 break;
257 }
258
259 default:
260 UNREACHABLE();
261 }
262 return float_pack(sign, exponent, mantissa);
263}
264
265float16 Simulator::FPToFloat16(float value, FPRounding round_mode) {
266 // Only the FPTieEven rounding mode is implemented.
267 DCHECK_EQ(round_mode, FPTieEven);
268 USE(round_mode);
269
270 int64_t sign = float_sign(value);
271 int64_t exponent =
272 static_cast<int64_t>(float_exp(value)) - kFloatExponentBias;
273 uint32_t mantissa = float_mantissa(value);
274
275 switch (std::fpclassify(value)) {
276 case FP_NAN: {
277 if (IsSignallingNaN(value)) {
278 FPProcessException();
279 }
280 if (DN()) return kFP16DefaultNaN;
281
282 // Convert NaNs as the processor would:
283 // - The sign is propagated.
284 // - The mantissa is transferred as much as possible, except that the top
285 // bit is forced to '1', making the result a quiet NaN.
289 result |= (1 << (kFloat16MantissaBits - 1)); // Force a quiet NaN;
290 return result;
291 }
292
293 case FP_ZERO:
294 return (sign == 0) ? 0 : 0x8000;
295
296 case FP_INFINITE:
298
299 case FP_NORMAL:
300 case FP_SUBNORMAL: {
301 // Convert float-to-half as the processor would, assuming that FPCR.FZ
302 // (flush-to-zero) is not set.
303
304 // Add the implicit '1' bit to the mantissa.
305 mantissa += (1 << kFloatMantissaBits);
306 return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
307 }
308 }
309
310 UNREACHABLE();
311}
312
313float16 Simulator::FPToFloat16(double value, FPRounding round_mode) {
314 // Only the FPTieEven rounding mode is implemented.
315 DCHECK_EQ(round_mode, FPTieEven);
316 USE(round_mode);
317
318 int64_t sign = double_sign(value);
319 int64_t exponent =
320 static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias;
321 uint64_t mantissa = double_mantissa(value);
322
323 switch (std::fpclassify(value)) {
324 case FP_NAN: {
325 if (IsSignallingNaN(value)) {
326 FPProcessException();
327 }
328 if (DN()) return kFP16DefaultNaN;
329
330 // Convert NaNs as the processor would:
331 // - The sign is propagated.
332 // - The mantissa is transferred as much as possible, except that the top
333 // bit is forced to '1', making the result a quiet NaN.
337 result |= (1 << (kFloat16MantissaBits - 1)); // Force a quiet NaN;
338 return result;
339 }
340
341 case FP_ZERO:
342 return (sign == 0) ? 0 : 0x8000;
343
344 case FP_INFINITE:
346
347 case FP_NORMAL:
348 case FP_SUBNORMAL: {
349 // Convert double-to-half as the processor would, assuming that FPCR.FZ
350 // (flush-to-zero) is not set.
351
352 // Add the implicit '1' bit to the mantissa.
353 mantissa += (UINT64_C(1) << kDoubleMantissaBits);
354 return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
355 }
356 }
357
358 UNREACHABLE();
359}
360
361float Simulator::FPToFloat(double value, FPRounding round_mode) {
362 // Only the FPTieEven rounding mode is implemented.
363 DCHECK((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
364 USE(round_mode);
365
366 switch (std::fpclassify(value)) {
367 case FP_NAN: {
368 if (IsSignallingNaN(value)) {
369 FPProcessException();
370 }
371 if (DN()) return kFP32DefaultNaN;
372
373 // Convert NaNs as the processor would:
374 // - The sign is propagated.
375 // - The mantissa is transferred as much as possible, except that the
376 // top bit is forced to '1', making the result a quiet NaN.
377
378 uint64_t raw = base::bit_cast<uint64_t>(value);
379
380 uint32_t sign = raw >> 63;
381 uint32_t exponent = (1 << 8) - 1;
382 uint32_t mantissa = static_cast<uint32_t>(unsigned_bitextract_64(
384 mantissa |= (1 << (kFloatMantissaBits - 1)); // Force a quiet NaN.
385
386 return float_pack(sign, exponent, mantissa);
387 }
388
389 case FP_ZERO:
390 case FP_INFINITE: {
391 // In a C++ cast, any value representable in the target type will be
392 // unchanged. This is always the case for +/-0.0 and infinities.
393 return static_cast<float>(value);
394 }
395
396 case FP_NORMAL:
397 case FP_SUBNORMAL: {
398 // Convert double-to-float as the processor would, assuming that FPCR.FZ
399 // (flush-to-zero) is not set.
400 uint32_t sign = double_sign(value);
401 int64_t exponent =
402 static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias;
403 uint64_t mantissa = double_mantissa(value);
404 if (std::fpclassify(value) == FP_NORMAL) {
405 // For normal FP values, add the hidden bit.
406 mantissa |= (UINT64_C(1) << kDoubleMantissaBits);
407 }
408 return FPRoundToFloat(sign, exponent, mantissa, round_mode);
409 }
410 }
411
412 UNREACHABLE();
413}
414
415void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
416 dst.ClearForWrite(vform);
417 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
418 dst.ReadUintFromMem(vform, i, addr);
419 addr += LaneSizeInBytesFromFormat(vform);
420 }
421}
422
423void Simulator::ld1(VectorFormat vform, LogicVRegister dst, int index,
424 uint64_t addr) {
425 dst.ReadUintFromMem(vform, index, addr);
426}
427
428void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
429 dst.ClearForWrite(vform);
430 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
431 dst.ReadUintFromMem(vform, i, addr);
432 }
433}
434
435void Simulator::ld2(VectorFormat vform, LogicVRegister dst1,
436 LogicVRegister dst2, uint64_t addr1) {
437 dst1.ClearForWrite(vform);
438 dst2.ClearForWrite(vform);
439 int esize = LaneSizeInBytesFromFormat(vform);
440 uint64_t addr2 = addr1 + esize;
441 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
442 dst1.ReadUintFromMem(vform, i, addr1);
443 dst2.ReadUintFromMem(vform, i, addr2);
444 addr1 += 2 * esize;
445 addr2 += 2 * esize;
446 }
447}
448
449void Simulator::ld2(VectorFormat vform, LogicVRegister dst1,
450 LogicVRegister dst2, int index, uint64_t addr1) {
451 dst1.ClearForWrite(vform);
452 dst2.ClearForWrite(vform);
453 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
454 dst1.ReadUintFromMem(vform, index, addr1);
455 dst2.ReadUintFromMem(vform, index, addr2);
456}
457
458void Simulator::ld2r(VectorFormat vform, LogicVRegister dst1,
459 LogicVRegister dst2, uint64_t addr) {
460 dst1.ClearForWrite(vform);
461 dst2.ClearForWrite(vform);
462 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
463 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
464 dst1.ReadUintFromMem(vform, i, addr);
465 dst2.ReadUintFromMem(vform, i, addr2);
466 }
467}
468
469void Simulator::ld3(VectorFormat vform, LogicVRegister dst1,
470 LogicVRegister dst2, LogicVRegister dst3, uint64_t addr1) {
471 dst1.ClearForWrite(vform);
472 dst2.ClearForWrite(vform);
473 dst3.ClearForWrite(vform);
474 int esize = LaneSizeInBytesFromFormat(vform);
475 uint64_t addr2 = addr1 + esize;
476 uint64_t addr3 = addr2 + esize;
477 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
478 dst1.ReadUintFromMem(vform, i, addr1);
479 dst2.ReadUintFromMem(vform, i, addr2);
480 dst3.ReadUintFromMem(vform, i, addr3);
481 addr1 += 3 * esize;
482 addr2 += 3 * esize;
483 addr3 += 3 * esize;
484 }
485}
486
487void Simulator::ld3(VectorFormat vform, LogicVRegister dst1,
488 LogicVRegister dst2, LogicVRegister dst3, int index,
489 uint64_t addr1) {
490 dst1.ClearForWrite(vform);
491 dst2.ClearForWrite(vform);
492 dst3.ClearForWrite(vform);
493 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
494 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
495 dst1.ReadUintFromMem(vform, index, addr1);
496 dst2.ReadUintFromMem(vform, index, addr2);
497 dst3.ReadUintFromMem(vform, index, addr3);
498}
499
500void Simulator::ld3r(VectorFormat vform, LogicVRegister dst1,
501 LogicVRegister dst2, LogicVRegister dst3, uint64_t addr) {
502 dst1.ClearForWrite(vform);
503 dst2.ClearForWrite(vform);
504 dst3.ClearForWrite(vform);
505 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
506 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
507 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
508 dst1.ReadUintFromMem(vform, i, addr);
509 dst2.ReadUintFromMem(vform, i, addr2);
510 dst3.ReadUintFromMem(vform, i, addr3);
511 }
512}
513
514void Simulator::ld4(VectorFormat vform, LogicVRegister dst1,
515 LogicVRegister dst2, LogicVRegister dst3,
516 LogicVRegister dst4, uint64_t addr1) {
517 dst1.ClearForWrite(vform);
518 dst2.ClearForWrite(vform);
519 dst3.ClearForWrite(vform);
520 dst4.ClearForWrite(vform);
521 int esize = LaneSizeInBytesFromFormat(vform);
522 uint64_t addr2 = addr1 + esize;
523 uint64_t addr3 = addr2 + esize;
524 uint64_t addr4 = addr3 + esize;
525 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
526 dst1.ReadUintFromMem(vform, i, addr1);
527 dst2.ReadUintFromMem(vform, i, addr2);
528 dst3.ReadUintFromMem(vform, i, addr3);
529 dst4.ReadUintFromMem(vform, i, addr4);
530 addr1 += 4 * esize;
531 addr2 += 4 * esize;
532 addr3 += 4 * esize;
533 addr4 += 4 * esize;
534 }
535}
536
537void Simulator::ld4(VectorFormat vform, LogicVRegister dst1,
538 LogicVRegister dst2, LogicVRegister dst3,
539 LogicVRegister dst4, int index, uint64_t addr1) {
540 dst1.ClearForWrite(vform);
541 dst2.ClearForWrite(vform);
542 dst3.ClearForWrite(vform);
543 dst4.ClearForWrite(vform);
544 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
545 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
546 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
547 dst1.ReadUintFromMem(vform, index, addr1);
548 dst2.ReadUintFromMem(vform, index, addr2);
549 dst3.ReadUintFromMem(vform, index, addr3);
550 dst4.ReadUintFromMem(vform, index, addr4);
551}
552
553void Simulator::ld4r(VectorFormat vform, LogicVRegister dst1,
554 LogicVRegister dst2, LogicVRegister dst3,
555 LogicVRegister dst4, uint64_t addr) {
556 dst1.ClearForWrite(vform);
557 dst2.ClearForWrite(vform);
558 dst3.ClearForWrite(vform);
559 dst4.ClearForWrite(vform);
560 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
561 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
562 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
563 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
564 dst1.ReadUintFromMem(vform, i, addr);
565 dst2.ReadUintFromMem(vform, i, addr2);
566 dst3.ReadUintFromMem(vform, i, addr3);
567 dst4.ReadUintFromMem(vform, i, addr4);
568 }
569}
570
571void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
572 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
573 src.WriteUintToMem(vform, i, addr);
574 addr += LaneSizeInBytesFromFormat(vform);
575 }
576}
577
578void Simulator::st1(VectorFormat vform, LogicVRegister src, int index,
579 uint64_t addr) {
580 src.WriteUintToMem(vform, index, addr);
581}
582
583void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
584 uint64_t addr) {
585 int esize = LaneSizeInBytesFromFormat(vform);
586 uint64_t addr2 = addr + esize;
587 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
588 dst.WriteUintToMem(vform, i, addr);
589 dst2.WriteUintToMem(vform, i, addr2);
590 addr += 2 * esize;
591 addr2 += 2 * esize;
592 }
593}
594
595void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
596 int index, uint64_t addr) {
597 int esize = LaneSizeInBytesFromFormat(vform);
598 dst.WriteUintToMem(vform, index, addr);
599 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
600}
601
602void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
603 LogicVRegister dst3, uint64_t addr) {
604 int esize = LaneSizeInBytesFromFormat(vform);
605 uint64_t addr2 = addr + esize;
606 uint64_t addr3 = addr2 + esize;
607 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
608 dst.WriteUintToMem(vform, i, addr);
609 dst2.WriteUintToMem(vform, i, addr2);
610 dst3.WriteUintToMem(vform, i, addr3);
611 addr += 3 * esize;
612 addr2 += 3 * esize;
613 addr3 += 3 * esize;
614 }
615}
616
617void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
618 LogicVRegister dst3, int index, uint64_t addr) {
619 int esize = LaneSizeInBytesFromFormat(vform);
620 dst.WriteUintToMem(vform, index, addr);
621 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
622 dst3.WriteUintToMem(vform, index, addr + 2 * esize);
623}
624
625void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
626 LogicVRegister dst3, LogicVRegister dst4, uint64_t addr) {
627 int esize = LaneSizeInBytesFromFormat(vform);
628 uint64_t addr2 = addr + esize;
629 uint64_t addr3 = addr2 + esize;
630 uint64_t addr4 = addr3 + esize;
631 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
632 dst.WriteUintToMem(vform, i, addr);
633 dst2.WriteUintToMem(vform, i, addr2);
634 dst3.WriteUintToMem(vform, i, addr3);
635 dst4.WriteUintToMem(vform, i, addr4);
636 addr += 4 * esize;
637 addr2 += 4 * esize;
638 addr3 += 4 * esize;
639 addr4 += 4 * esize;
640 }
641}
642
643void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
644 LogicVRegister dst3, LogicVRegister dst4, int index,
645 uint64_t addr) {
646 int esize = LaneSizeInBytesFromFormat(vform);
647 dst.WriteUintToMem(vform, index, addr);
648 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
649 dst3.WriteUintToMem(vform, index, addr + 2 * esize);
650 dst4.WriteUintToMem(vform, index, addr + 3 * esize);
651}
652
653LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst,
654 const LogicVRegister& src1,
655 const LogicVRegister& src2, Condition cond) {
656 dst.ClearForWrite(vform);
657 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
658 bool result = false;
659 int64_t sa = src1.Int(vform, i);
660 int64_t sb = src2.Int(vform, i);
661 uint64_t ua = src1.Uint(vform, i);
662 uint64_t ub = src2.Uint(vform, i);
663 switch (cond) {
664 case eq:
665 result = (src1.Is(src2) || ua == ub);
666 break;
667 case ge:
668 result = (src1.Is(src2) || sa >= sb);
669 break;
670 case gt:
671 result = (!src1.Is(src2) && sa > sb);
672 break;
673 case hi:
674 result = (!src1.Is(src2) && ua > ub);
675 break;
676 case hs:
677 result = (src1.Is(src2) || ua >= ub);
678 break;
679 case lt:
680 result = (!src1.Is(src2) && sa < sb);
681 break;
682 case le:
683 result = (src1.Is(src2) || sa <= sb);
684 break;
685 default:
686 UNREACHABLE();
687 }
688 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
689 }
690 return dst;
691}
692
693LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst,
694 const LogicVRegister& src1, int imm,
695 Condition cond) {
696 SimVRegister temp;
697 LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
698 return cmp(vform, dst, src1, imm_reg, cond);
699}
700
701LogicVRegister Simulator::cmptst(VectorFormat vform, LogicVRegister dst,
702 const LogicVRegister& src1,
703 const LogicVRegister& src2) {
704 dst.ClearForWrite(vform);
705 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
706 uint64_t ua = src1.Uint(vform, i);
707 uint64_t ub = src2.Uint(vform, i);
708 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
709 }
710 return dst;
711}
712
713LogicVRegister Simulator::add(VectorFormat vform, LogicVRegister dst,
714 const LogicVRegister& src1,
715 const LogicVRegister& src2) {
716 int lane_size = LaneSizeInBitsFromFormat(vform);
717 dst.ClearForWrite(vform);
718 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
719 // Test for unsigned saturation.
720 uint64_t ua = src1.UintLeftJustified(vform, i);
721 uint64_t ub = src2.UintLeftJustified(vform, i);
722 uint64_t ur = ua + ub;
723 if (ur < ua) {
724 dst.SetUnsignedSat(i, true);
725 }
726
727 // Test for signed saturation.
728 bool pos_a = (ua >> 63) == 0;
729 bool pos_b = (ub >> 63) == 0;
730 bool pos_r = (ur >> 63) == 0;
731 // If the signs of the operands are the same, but different from the result,
732 // there was an overflow.
733 if ((pos_a == pos_b) && (pos_a != pos_r)) {
734 dst.SetSignedSat(i, pos_a);
735 }
736
737 dst.SetInt(vform, i, ur >> (64 - lane_size));
738 }
739 return dst;
740}
741
742LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst,
743 const LogicVRegister& src1,
744 const LogicVRegister& src2) {
745 SimVRegister temp1, temp2;
746 uzp1(vform, temp1, src1, src2);
747 uzp2(vform, temp2, src1, src2);
748 add(vform, dst, temp1, temp2);
749 return dst;
750}
751
752LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst,
753 const LogicVRegister& src1,
754 const LogicVRegister& src2) {
755 SimVRegister temp;
756 mul(vform, temp, src1, src2);
757 add(vform, dst, dst, temp);
758 return dst;
759}
760
761LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst,
762 const LogicVRegister& src1,
763 const LogicVRegister& src2) {
764 SimVRegister temp;
765 mul(vform, temp, src1, src2);
766 sub(vform, dst, dst, temp);
767 return dst;
768}
769
770LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst,
771 const LogicVRegister& src1,
772 const LogicVRegister& src2) {
773 dst.ClearForWrite(vform);
774 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
775 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
776 }
777 return dst;
778}
779
780LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst,
781 const LogicVRegister& src1,
782 const LogicVRegister& src2, int index) {
783 SimVRegister temp;
784 VectorFormat indexform = VectorFormatFillQ(vform);
785 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
786}
787
788LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst,
789 const LogicVRegister& src1,
790 const LogicVRegister& src2, int index) {
791 SimVRegister temp;
792 VectorFormat indexform = VectorFormatFillQ(vform);
793 return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));
794}
795
796LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst,
797 const LogicVRegister& src1,
798 const LogicVRegister& src2, int index) {
799 SimVRegister temp;
800 VectorFormat indexform = VectorFormatFillQ(vform);
801 return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));
802}
803
804LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst,
805 const LogicVRegister& src1,
806 const LogicVRegister& src2, int index) {
807 SimVRegister temp;
808 VectorFormat indexform =
810 return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));
811}
812
813LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst,
814 const LogicVRegister& src1,
815 const LogicVRegister& src2, int index) {
816 SimVRegister temp;
817 VectorFormat indexform =
819 return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
820}
821
822LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst,
823 const LogicVRegister& src1,
824 const LogicVRegister& src2, int index) {
825 SimVRegister temp;
826 VectorFormat indexform =
828 return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));
829}
830
831LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst,
832 const LogicVRegister& src1,
833 const LogicVRegister& src2, int index) {
834 SimVRegister temp;
835 VectorFormat indexform =
837 return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
838}
839
840LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst,
841 const LogicVRegister& src1,
842 const LogicVRegister& src2, int index) {
843 SimVRegister temp;
844 VectorFormat indexform =
846 return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
847}
848
849LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst,
850 const LogicVRegister& src1,
851 const LogicVRegister& src2, int index) {
852 SimVRegister temp;
853 VectorFormat indexform =
855 return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
856}
857
858LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst,
859 const LogicVRegister& src1,
860 const LogicVRegister& src2, int index) {
861 SimVRegister temp;
862 VectorFormat indexform =
864 return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
865}
866
867LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst,
868 const LogicVRegister& src1,
869 const LogicVRegister& src2, int index) {
870 SimVRegister temp;
871 VectorFormat indexform =
873 return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
874}
875
876LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst,
877 const LogicVRegister& src1,
878 const LogicVRegister& src2, int index) {
879 SimVRegister temp;
880 VectorFormat indexform =
882 return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
883}
884
885LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst,
886 const LogicVRegister& src1,
887 const LogicVRegister& src2, int index) {
888 SimVRegister temp;
889 VectorFormat indexform =
891 return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
892}
893
894LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst,
895 const LogicVRegister& src1,
896 const LogicVRegister& src2, int index) {
897 SimVRegister temp;
898 VectorFormat indexform =
900 return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
901}
902
903LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst,
904 const LogicVRegister& src1,
905 const LogicVRegister& src2, int index) {
906 SimVRegister temp;
907 VectorFormat indexform =
909 return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
910}
911
912LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst,
913 const LogicVRegister& src1,
914 const LogicVRegister& src2, int index) {
915 SimVRegister temp;
916 VectorFormat indexform =
918 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
919}
920
921LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst,
922 const LogicVRegister& src1,
923 const LogicVRegister& src2, int index) {
924 SimVRegister temp;
925 VectorFormat indexform =
927 return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
928}
929
930LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst,
931 const LogicVRegister& src1,
932 const LogicVRegister& src2, int index) {
933 SimVRegister temp;
934 VectorFormat indexform =
936 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
937}
938
939LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst,
940 const LogicVRegister& src1,
941 const LogicVRegister& src2, int index) {
942 SimVRegister temp;
943 VectorFormat indexform =
945 return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
946}
947
948LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst,
949 const LogicVRegister& src1,
950 const LogicVRegister& src2, int index) {
951 SimVRegister temp;
952 VectorFormat indexform =
954 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
955}
956
957LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst,
958 const LogicVRegister& src1,
959 const LogicVRegister& src2, int index) {
960 SimVRegister temp;
961 VectorFormat indexform =
963 return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
964}
965
966LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst,
967 const LogicVRegister& src1,
968 const LogicVRegister& src2, int index) {
969 SimVRegister temp;
970 VectorFormat indexform = VectorFormatFillQ(vform);
971 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
972}
973
974LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst,
975 const LogicVRegister& src1,
976 const LogicVRegister& src2, int index) {
977 SimVRegister temp;
978 VectorFormat indexform = VectorFormatFillQ(vform);
979 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
980}
981
982uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) {
983 return PolynomialMult128(op1, op2, 8).second;
984}
985
986LogicVRegister Simulator::pmul(VectorFormat vform, LogicVRegister dst,
987 const LogicVRegister& src1,
988 const LogicVRegister& src2) {
989 dst.ClearForWrite(vform);
990 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
991 dst.SetUint(vform, i,
992 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));
993 }
994 return dst;
995}
996
997LogicVRegister Simulator::pmull(VectorFormat vform, LogicVRegister dst,
998 const LogicVRegister& src1,
999 const LogicVRegister& src2) {
1000 VectorFormat vform_src = VectorFormatHalfWidth(vform);
1001 dst.ClearForWrite(vform);
1002 // Process the elements in reverse to avoid problems when the destination
1003 // register is the same as a source.
1004 for (int i = LaneCountFromFormat(vform) - 1; i > -1; i--) {
1005 dst.SetUint(
1006 vform, i,
1007 PolynomialMult128(src1.Uint(vform_src, i), src2.Uint(vform_src, i),
1008 LaneSizeInBitsFromFormat(vform_src)));
1009 }
1010 return dst;
1011}
1012
1013LogicVRegister Simulator::pmull2(VectorFormat vform, LogicVRegister dst,
1014 const LogicVRegister& src1,
1015 const LogicVRegister& src2) {
1017 dst.ClearForWrite(vform);
1018 int lane_count = LaneCountFromFormat(vform);
1019 for (int i = 0; i < lane_count; i++) {
1020 dst.SetUint(vform, i,
1021 PolynomialMult128(src1.Uint(vform_src, lane_count + i),
1022 src2.Uint(vform_src, lane_count + i),
1023 LaneSizeInBitsFromFormat(vform_src)));
1024 }
1025 return dst;
1026}
1027
1028LogicVRegister Simulator::sub(VectorFormat vform, LogicVRegister dst,
1029 const LogicVRegister& src1,
1030 const LogicVRegister& src2) {
1031 int lane_size = LaneSizeInBitsFromFormat(vform);
1032 dst.ClearForWrite(vform);
1033 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1034 // Test for unsigned saturation.
1035 uint64_t ua = src1.UintLeftJustified(vform, i);
1036 uint64_t ub = src2.UintLeftJustified(vform, i);
1037 uint64_t ur = ua - ub;
1038 if (ub > ua) {
1039 dst.SetUnsignedSat(i, false);
1040 }
1041
1042 // Test for signed saturation.
1043 bool pos_a = (ua >> 63) == 0;
1044 bool pos_b = (ub >> 63) == 0;
1045 bool pos_r = (ur >> 63) == 0;
1046 // If the signs of the operands are different, and the sign of the first
1047 // operand doesn't match the result, there was an overflow.
1048 if ((pos_a != pos_b) && (pos_a != pos_r)) {
1049 dst.SetSignedSat(i, pos_a);
1050 }
1051
1052 dst.SetInt(vform, i, ur >> (64 - lane_size));
1053 }
1054 return dst;
1055}
1056
1057LogicVRegister Simulator::and_(VectorFormat vform, LogicVRegister dst,
1058 const LogicVRegister& src1,
1059 const LogicVRegister& src2) {
1060 dst.ClearForWrite(vform);
1061 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1062 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
1063 }
1064 return dst;
1065}
1066
1067LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst,
1068 const LogicVRegister& src1,
1069 const LogicVRegister& src2) {
1070 dst.ClearForWrite(vform);
1071 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1072 dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1073 }
1074 return dst;
1075}
1076
1077LogicVRegister Simulator::orn(VectorFormat vform, LogicVRegister dst,
1078 const LogicVRegister& src1,
1079 const LogicVRegister& src2) {
1080 dst.ClearForWrite(vform);
1081 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1082 dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1083 }
1084 return dst;
1085}
1086
1087LogicVRegister Simulator::eor(VectorFormat vform, LogicVRegister dst,
1088 const LogicVRegister& src1,
1089 const LogicVRegister& src2) {
1090 dst.ClearForWrite(vform);
1091 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1092 dst.SetUint(vform, i,
1093 src1.Is(src2) ? 0 : src1.Uint(vform, i) ^ src2.Uint(vform, i));
1094 }
1095 return dst;
1096}
1097
1098LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst,
1099 const LogicVRegister& src1,
1100 const LogicVRegister& src2) {
1101 dst.ClearForWrite(vform);
1102 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1103 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1104 }
1105 return dst;
1106}
1107
1108LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst,
1109 const LogicVRegister& src, uint64_t imm) {
1110 uint64_t result[16];
1111 int laneCount = LaneCountFromFormat(vform);
1112 for (int i = 0; i < laneCount; ++i) {
1113 result[i] = src.Uint(vform, i) & ~imm;
1114 }
1115 dst.SetUintArray(vform, result);
1116 return dst;
1117}
1118
1119LogicVRegister Simulator::bif(VectorFormat vform, LogicVRegister dst,
1120 const LogicVRegister& src1,
1121 const LogicVRegister& src2) {
1122 dst.ClearForWrite(vform);
1123 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1124 uint64_t operand1 = dst.Uint(vform, i);
1125 uint64_t operand2 = ~src2.Uint(vform, i);
1126 uint64_t operand3 = src1.Uint(vform, i);
1127 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1128 dst.SetUint(vform, i, result);
1129 }
1130 return dst;
1131}
1132
1133LogicVRegister Simulator::bit(VectorFormat vform, LogicVRegister dst,
1134 const LogicVRegister& src1,
1135 const LogicVRegister& src2) {
1136 dst.ClearForWrite(vform);
1137 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1138 uint64_t operand1 = dst.Uint(vform, i);
1139 uint64_t operand2 = src2.Uint(vform, i);
1140 uint64_t operand3 = src1.Uint(vform, i);
1141 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1142 dst.SetUint(vform, i, result);
1143 }
1144 return dst;
1145}
1146
1147LogicVRegister Simulator::bsl(VectorFormat vform, LogicVRegister dst,
1148 const LogicVRegister& src1,
1149 const LogicVRegister& src2) {
1150 dst.ClearForWrite(vform);
1151 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1152 uint64_t operand1 = src2.Uint(vform, i);
1153 uint64_t operand2 = dst.Uint(vform, i);
1154 uint64_t operand3 = src1.Uint(vform, i);
1155 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1156 dst.SetUint(vform, i, result);
1157 }
1158 return dst;
1159}
1160
1161LogicVRegister Simulator::SMinMax(VectorFormat vform, LogicVRegister dst,
1162 const LogicVRegister& src1,
1163 const LogicVRegister& src2, bool max) {
1164 dst.ClearForWrite(vform);
1165 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1166 int64_t src1_val = src1.Int(vform, i);
1167 int64_t src2_val = src2.Int(vform, i);
1168 int64_t dst_val;
1169 if (max) {
1170 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1171 } else {
1172 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1173 }
1174 dst.SetInt(vform, i, dst_val);
1175 }
1176 return dst;
1177}
1178
1179LogicVRegister Simulator::smax(VectorFormat vform, LogicVRegister dst,
1180 const LogicVRegister& src1,
1181 const LogicVRegister& src2) {
1182 return SMinMax(vform, dst, src1, src2, true);
1183}
1184
1185LogicVRegister Simulator::smin(VectorFormat vform, LogicVRegister dst,
1186 const LogicVRegister& src1,
1187 const LogicVRegister& src2) {
1188 return SMinMax(vform, dst, src1, src2, false);
1189}
1190
1191LogicVRegister Simulator::SMinMaxP(VectorFormat vform, LogicVRegister dst,
1192 const LogicVRegister& src1,
1193 const LogicVRegister& src2, bool max) {
1194 int lanes = LaneCountFromFormat(vform);
1195 int64_t result[kMaxLanesPerVector];
1196 const LogicVRegister* src = &src1;
1197 for (int j = 0; j < 2; j++) {
1198 for (int i = 0; i < lanes; i += 2) {
1199 int64_t first_val = src->Int(vform, i);
1200 int64_t second_val = src->Int(vform, i + 1);
1201 int64_t dst_val;
1202 if (max) {
1203 dst_val = (first_val > second_val) ? first_val : second_val;
1204 } else {
1205 dst_val = (first_val < second_val) ? first_val : second_val;
1206 }
1207 DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector);
1208 result[(i >> 1) + (j * lanes / 2)] = dst_val;
1209 }
1210 src = &src2;
1211 }
1212 dst.SetIntArray(vform, result);
1213 return dst;
1214}
1215
1216LogicVRegister Simulator::smaxp(VectorFormat vform, LogicVRegister dst,
1217 const LogicVRegister& src1,
1218 const LogicVRegister& src2) {
1219 return SMinMaxP(vform, dst, src1, src2, true);
1220}
1221
1222LogicVRegister Simulator::sminp(VectorFormat vform, LogicVRegister dst,
1223 const LogicVRegister& src1,
1224 const LogicVRegister& src2) {
1225 return SMinMaxP(vform, dst, src1, src2, false);
1226}
1227
1228LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst,
1229 const LogicVRegister& src) {
1230 DCHECK_EQ(vform, kFormatD);
1231
1232 uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);
1233 dst.ClearForWrite(vform);
1234 dst.SetUint(vform, 0, dst_val);
1235 return dst;
1236}
1237
1238LogicVRegister Simulator::addv(VectorFormat vform, LogicVRegister dst,
1239 const LogicVRegister& src) {
1240 VectorFormat vform_dst =
1242
1243 int64_t dst_val = 0;
1244 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1245 dst_val += src.Int(vform, i);
1246 }
1247
1248 dst.ClearForWrite(vform_dst);
1249 dst.SetInt(vform_dst, 0, dst_val);
1250 return dst;
1251}
1252
1253LogicVRegister Simulator::saddlv(VectorFormat vform, LogicVRegister dst,
1254 const LogicVRegister& src) {
1255 VectorFormat vform_dst =
1257
1258 int64_t dst_val = 0;
1259 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1260 dst_val += src.Int(vform, i);
1261 }
1262
1263 dst.ClearForWrite(vform_dst);
1264 dst.SetInt(vform_dst, 0, dst_val);
1265 return dst;
1266}
1267
1268LogicVRegister Simulator::uaddlv(VectorFormat vform, LogicVRegister dst,
1269 const LogicVRegister& src) {
1270 VectorFormat vform_dst =
1272
1273 uint64_t dst_val = 0;
1274 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1275 dst_val += src.Uint(vform, i);
1276 }
1277
1278 dst.ClearForWrite(vform_dst);
1279 dst.SetUint(vform_dst, 0, dst_val);
1280 return dst;
1281}
1282
1283LogicVRegister Simulator::SMinMaxV(VectorFormat vform, LogicVRegister dst,
1284 const LogicVRegister& src, bool max) {
1285 int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1286 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1287 int64_t src_val = src.Int(vform, i);
1288 if (max) {
1289 dst_val = (src_val > dst_val) ? src_val : dst_val;
1290 } else {
1291 dst_val = (src_val < dst_val) ? src_val : dst_val;
1292 }
1293 }
1294 dst.ClearForWrite(ScalarFormatFromFormat(vform));
1295 dst.SetInt(vform, 0, dst_val);
1296 return dst;
1297}
1298
1299LogicVRegister Simulator::smaxv(VectorFormat vform, LogicVRegister dst,
1300 const LogicVRegister& src) {
1301 SMinMaxV(vform, dst, src, true);
1302 return dst;
1303}
1304
1305LogicVRegister Simulator::sminv(VectorFormat vform, LogicVRegister dst,
1306 const LogicVRegister& src) {
1307 SMinMaxV(vform, dst, src, false);
1308 return dst;
1309}
1310
1311LogicVRegister Simulator::UMinMax(VectorFormat vform, LogicVRegister dst,
1312 const LogicVRegister& src1,
1313 const LogicVRegister& src2, bool max) {
1314 dst.ClearForWrite(vform);
1315 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1316 uint64_t src1_val = src1.Uint(vform, i);
1317 uint64_t src2_val = src2.Uint(vform, i);
1318 uint64_t dst_val;
1319 if (max) {
1320 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1321 } else {
1322 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1323 }
1324 dst.SetUint(vform, i, dst_val);
1325 }
1326 return dst;
1327}
1328
1329LogicVRegister Simulator::umax(VectorFormat vform, LogicVRegister dst,
1330 const LogicVRegister& src1,
1331 const LogicVRegister& src2) {
1332 return UMinMax(vform, dst, src1, src2, true);
1333}
1334
1335LogicVRegister Simulator::umin(VectorFormat vform, LogicVRegister dst,
1336 const LogicVRegister& src1,
1337 const LogicVRegister& src2) {
1338 return UMinMax(vform, dst, src1, src2, false);
1339}
1340
1341LogicVRegister Simulator::UMinMaxP(VectorFormat vform, LogicVRegister dst,
1342 const LogicVRegister& src1,
1343 const LogicVRegister& src2, bool max) {
1344 int lanes = LaneCountFromFormat(vform);
1345 uint64_t result[kMaxLanesPerVector];
1346 const LogicVRegister* src = &src1;
1347 for (int j = 0; j < 2; j++) {
1348 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
1349 uint64_t first_val = src->Uint(vform, i);
1350 uint64_t second_val = src->Uint(vform, i + 1);
1351 uint64_t dst_val;
1352 if (max) {
1353 dst_val = (first_val > second_val) ? first_val : second_val;
1354 } else {
1355 dst_val = (first_val < second_val) ? first_val : second_val;
1356 }
1357 DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector);
1358 result[(i >> 1) + (j * lanes / 2)] = dst_val;
1359 }
1360 src = &src2;
1361 }
1362 dst.SetUintArray(vform, result);
1363 return dst;
1364}
1365
1366LogicVRegister Simulator::umaxp(VectorFormat vform, LogicVRegister dst,
1367 const LogicVRegister& src1,
1368 const LogicVRegister& src2) {
1369 return UMinMaxP(vform, dst, src1, src2, true);
1370}
1371
1372LogicVRegister Simulator::uminp(VectorFormat vform, LogicVRegister dst,
1373 const LogicVRegister& src1,
1374 const LogicVRegister& src2) {
1375 return UMinMaxP(vform, dst, src1, src2, false);
1376}
1377
1378LogicVRegister Simulator::UMinMaxV(VectorFormat vform, LogicVRegister dst,
1379 const LogicVRegister& src, bool max) {
1380 uint64_t dst_val = max ? 0 : UINT64_MAX;
1381 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1382 uint64_t src_val = src.Uint(vform, i);
1383 if (max) {
1384 dst_val = (src_val > dst_val) ? src_val : dst_val;
1385 } else {
1386 dst_val = (src_val < dst_val) ? src_val : dst_val;
1387 }
1388 }
1389 dst.ClearForWrite(ScalarFormatFromFormat(vform));
1390 dst.SetUint(vform, 0, dst_val);
1391 return dst;
1392}
1393
1394LogicVRegister Simulator::umaxv(VectorFormat vform, LogicVRegister dst,
1395 const LogicVRegister& src) {
1396 UMinMaxV(vform, dst, src, true);
1397 return dst;
1398}
1399
1400LogicVRegister Simulator::uminv(VectorFormat vform, LogicVRegister dst,
1401 const LogicVRegister& src) {
1402 UMinMaxV(vform, dst, src, false);
1403 return dst;
1404}
1405
1406LogicVRegister Simulator::shl(VectorFormat vform, LogicVRegister dst,
1407 const LogicVRegister& src, int shift) {
1408 DCHECK_GE(shift, 0);
1409 SimVRegister temp;
1410 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1411 return ushl(vform, dst, src, shiftreg);
1412}
1413
1414LogicVRegister Simulator::sshll(VectorFormat vform, LogicVRegister dst,
1415 const LogicVRegister& src, int shift) {
1416 DCHECK_GE(shift, 0);
1417 SimVRegister temp1, temp2;
1418 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1419 LogicVRegister extendedreg = sxtl(vform, temp2, src);
1420 return sshl(vform, dst, extendedreg, shiftreg);
1421}
1422
1423LogicVRegister Simulator::sshll2(VectorFormat vform, LogicVRegister dst,
1424 const LogicVRegister& src, int shift) {
1425 DCHECK_GE(shift, 0);
1426 SimVRegister temp1, temp2;
1427 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1428 LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1429 return sshl(vform, dst, extendedreg, shiftreg);
1430}
1431
1432LogicVRegister Simulator::shll(VectorFormat vform, LogicVRegister dst,
1433 const LogicVRegister& src) {
1434 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1435 return sshll(vform, dst, src, shift);
1436}
1437
1438LogicVRegister Simulator::shll2(VectorFormat vform, LogicVRegister dst,
1439 const LogicVRegister& src) {
1440 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1441 return sshll2(vform, dst, src, shift);
1442}
1443
1444LogicVRegister Simulator::ushll(VectorFormat vform, LogicVRegister dst,
1445 const LogicVRegister& src, int shift) {
1446 DCHECK_GE(shift, 0);
1447 SimVRegister temp1, temp2;
1448 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1449 LogicVRegister extendedreg = uxtl(vform, temp2, src);
1450 return ushl(vform, dst, extendedreg, shiftreg);
1451}
1452
1453LogicVRegister Simulator::ushll2(VectorFormat vform, LogicVRegister dst,
1454 const LogicVRegister& src, int shift) {
1455 DCHECK_GE(shift, 0);
1456 SimVRegister temp1, temp2;
1457 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1458 LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1459 return ushl(vform, dst, extendedreg, shiftreg);
1460}
1461
1462LogicVRegister Simulator::sli(VectorFormat vform, LogicVRegister dst,
1463 const LogicVRegister& src, int shift) {
1464 dst.ClearForWrite(vform);
1465 int laneCount = LaneCountFromFormat(vform);
1466 for (int i = 0; i < laneCount; i++) {
1467 uint64_t src_lane = src.Uint(vform, i);
1468 uint64_t dst_lane = dst.Uint(vform, i);
1469 uint64_t shifted = src_lane << shift;
1470 uint64_t mask = MaxUintFromFormat(vform) << shift;
1471 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1472 }
1473 return dst;
1474}
1475
1476LogicVRegister Simulator::sqshl(VectorFormat vform, LogicVRegister dst,
1477 const LogicVRegister& src, int shift) {
1478 DCHECK_GE(shift, 0);
1479 SimVRegister temp;
1480 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1481 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1482}
1483
1484LogicVRegister Simulator::uqshl(VectorFormat vform, LogicVRegister dst,
1485 const LogicVRegister& src, int shift) {
1486 DCHECK_GE(shift, 0);
1487 SimVRegister temp;
1488 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1489 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1490}
1491
1492LogicVRegister Simulator::sqshlu(VectorFormat vform, LogicVRegister dst,
1493 const LogicVRegister& src, int shift) {
1494 DCHECK_GE(shift, 0);
1495 SimVRegister temp;
1496 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1497 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1498}
1499
1500LogicVRegister Simulator::sri(VectorFormat vform, LogicVRegister dst,
1501 const LogicVRegister& src, int shift) {
1502 dst.ClearForWrite(vform);
1503 int laneCount = LaneCountFromFormat(vform);
1504 DCHECK((shift > 0) &&
1505 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1506 for (int i = 0; i < laneCount; i++) {
1507 uint64_t src_lane = src.Uint(vform, i);
1508 uint64_t dst_lane = dst.Uint(vform, i);
1509 uint64_t shifted;
1510 uint64_t mask;
1511 if (shift == 64) {
1512 shifted = 0;
1513 mask = 0;
1514 } else {
1515 shifted = src_lane >> shift;
1516 mask = MaxUintFromFormat(vform) >> shift;
1517 }
1518 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1519 }
1520 return dst;
1521}
1522
1523LogicVRegister Simulator::ushr(VectorFormat vform, LogicVRegister dst,
1524 const LogicVRegister& src, int shift) {
1525 DCHECK_GE(shift, 0);
1526 SimVRegister temp;
1527 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1528 return ushl(vform, dst, src, shiftreg);
1529}
1530
1531LogicVRegister Simulator::sshr(VectorFormat vform, LogicVRegister dst,
1532 const LogicVRegister& src, int shift) {
1533 DCHECK_GE(shift, 0);
1534 SimVRegister temp;
1535 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1536 return sshl(vform, dst, src, shiftreg);
1537}
1538
1539LogicVRegister Simulator::ssra(VectorFormat vform, LogicVRegister dst,
1540 const LogicVRegister& src, int shift) {
1541 SimVRegister temp;
1542 LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1543 return add(vform, dst, dst, shifted_reg);
1544}
1545
1546LogicVRegister Simulator::usra(VectorFormat vform, LogicVRegister dst,
1547 const LogicVRegister& src, int shift) {
1548 SimVRegister temp;
1549 LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1550 return add(vform, dst, dst, shifted_reg);
1551}
1552
1553LogicVRegister Simulator::srsra(VectorFormat vform, LogicVRegister dst,
1554 const LogicVRegister& src, int shift) {
1555 SimVRegister temp;
1556 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1557 return add(vform, dst, dst, shifted_reg);
1558}
1559
1560LogicVRegister Simulator::ursra(VectorFormat vform, LogicVRegister dst,
1561 const LogicVRegister& src, int shift) {
1562 SimVRegister temp;
1563 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1564 return add(vform, dst, dst, shifted_reg);
1565}
1566
1567LogicVRegister Simulator::cls(VectorFormat vform, LogicVRegister dst,
1568 const LogicVRegister& src) {
1569 uint64_t result[16];
1570 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1571 int laneCount = LaneCountFromFormat(vform);
1572 for (int i = 0; i < laneCount; i++) {
1573 result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits);
1574 }
1575
1576 dst.SetUintArray(vform, result);
1577 return dst;
1578}
1579
1580LogicVRegister Simulator::clz(VectorFormat vform, LogicVRegister dst,
1581 const LogicVRegister& src) {
1582 uint64_t result[16];
1583 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1584 int laneCount = LaneCountFromFormat(vform);
1585 for (int i = 0; i < laneCount; i++) {
1586 result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits);
1587 }
1588
1589 dst.SetUintArray(vform, result);
1590 return dst;
1591}
1592
1593LogicVRegister Simulator::cnt(VectorFormat vform, LogicVRegister dst,
1594 const LogicVRegister& src) {
1595 uint64_t result[16];
1596 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1597 int laneCount = LaneCountFromFormat(vform);
1598 for (int i = 0; i < laneCount; i++) {
1599 uint64_t value = src.Uint(vform, i);
1600 result[i] = 0;
1601 for (int j = 0; j < laneSizeInBits; j++) {
1602 result[i] += (value & 1);
1603 value >>= 1;
1604 }
1605 }
1606
1607 dst.SetUintArray(vform, result);
1608 return dst;
1609}
1610
1611LogicVRegister Simulator::sshl(VectorFormat vform, LogicVRegister dst,
1612 const LogicVRegister& src1,
1613 const LogicVRegister& src2) {
1614 dst.ClearForWrite(vform);
1615 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1616 int8_t shift_val = src2.Int(vform, i);
1617 int64_t lj_src_val = src1.IntLeftJustified(vform, i);
1618
1619 // Set signed saturation state.
1620 if ((shift_val > CountLeadingSignBits(lj_src_val, 64)) &&
1621 (lj_src_val != 0)) {
1622 dst.SetSignedSat(i, lj_src_val >= 0);
1623 }
1624
1625 // Set unsigned saturation state.
1626 if (lj_src_val < 0) {
1627 dst.SetUnsignedSat(i, false);
1628 } else if ((shift_val > CountLeadingZeros(lj_src_val, 64)) &&
1629 (lj_src_val != 0)) {
1630 dst.SetUnsignedSat(i, true);
1631 }
1632
1633 int64_t src_val = src1.Int(vform, i);
1634 bool src_is_negative = src_val < 0;
1635 if (shift_val > 63) {
1636 dst.SetInt(vform, i, 0);
1637 } else if (shift_val < -63) {
1638 dst.SetRounding(i, src_is_negative);
1639 dst.SetInt(vform, i, src_is_negative ? -1 : 0);
1640 } else {
1641 // Use unsigned types for shifts, as behaviour is undefined for signed
1642 // lhs.
1643 uint64_t usrc_val = static_cast<uint64_t>(src_val);
1644
1645 if (shift_val < 0) {
1646 // Convert to right shift.
1647 shift_val = -shift_val;
1648
1649 // Set rounding state by testing most-significant bit shifted out.
1650 // Rounding only needed on right shifts.
1651 if (((usrc_val >> (shift_val - 1)) & 1) == 1) {
1652 dst.SetRounding(i, true);
1653 }
1654
1655 usrc_val >>= shift_val;
1656
1657 if (src_is_negative) {
1658 // Simulate sign-extension.
1659 usrc_val |= (~UINT64_C(0) << (64 - shift_val));
1660 }
1661 } else {
1662 usrc_val <<= shift_val;
1663 }
1664 dst.SetUint(vform, i, usrc_val);
1665 }
1666 }
1667 return dst;
1668}
1669
1670LogicVRegister Simulator::ushl(VectorFormat vform, LogicVRegister dst,
1671 const LogicVRegister& src1,
1672 const LogicVRegister& src2) {
1673 dst.ClearForWrite(vform);
1674 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1675 int8_t shift_val = src2.Int(vform, i);
1676 uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
1677
1678 // Set saturation state.
1679 if ((shift_val > CountLeadingZeros(lj_src_val, 64)) && (lj_src_val != 0)) {
1680 dst.SetUnsignedSat(i, true);
1681 }
1682
1683 uint64_t src_val = src1.Uint(vform, i);
1684 if ((shift_val > 63) || (shift_val < -64)) {
1685 dst.SetUint(vform, i, 0);
1686 } else {
1687 if (shift_val < 0) {
1688 // Set rounding state. Rounding only needed on right shifts.
1689 if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1690 dst.SetRounding(i, true);
1691 }
1692
1693 if (shift_val == -64) {
1694 src_val = 0;
1695 } else {
1696 src_val >>= -shift_val;
1697 }
1698 } else {
1699 src_val <<= shift_val;
1700 }
1701 dst.SetUint(vform, i, src_val);
1702 }
1703 }
1704 return dst;
1705}
1706
1707LogicVRegister Simulator::neg(VectorFormat vform, LogicVRegister dst,
1708 const LogicVRegister& src) {
1709 dst.ClearForWrite(vform);
1710 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1711 // Test for signed saturation.
1712 int64_t sa = src.Int(vform, i);
1713 if (sa == MinIntFromFormat(vform)) {
1714 dst.SetSignedSat(i, true);
1715 }
1716 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
1717 }
1718 return dst;
1719}
1720
1721LogicVRegister Simulator::suqadd(VectorFormat vform, LogicVRegister dst,
1722 const LogicVRegister& src) {
1723 dst.ClearForWrite(vform);
1724 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1725 int64_t sa = dst.IntLeftJustified(vform, i);
1726 uint64_t ub = src.UintLeftJustified(vform, i);
1727 uint64_t ur = sa + ub;
1728
1729 int64_t sr = base::bit_cast<int64_t>(ur);
1730 if (sr < sa) { // Test for signed positive saturation.
1731 dst.SetInt(vform, i, MaxIntFromFormat(vform));
1732 } else {
1733 dst.SetUint(vform, i, dst.Int(vform, i) + src.Uint(vform, i));
1734 }
1735 }
1736 return dst;
1737}
1738
1739LogicVRegister Simulator::usqadd(VectorFormat vform, LogicVRegister dst,
1740 const LogicVRegister& src) {
1741 dst.ClearForWrite(vform);
1742 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1743 uint64_t ua = dst.UintLeftJustified(vform, i);
1744 int64_t sb = src.IntLeftJustified(vform, i);
1745 uint64_t ur = ua + sb;
1746
1747 if ((sb > 0) && (ur <= ua)) {
1748 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation.
1749 } else if ((sb < 0) && (ur >= ua)) {
1750 dst.SetUint(vform, i, 0); // Negative saturation.
1751 } else {
1752 dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));
1753 }
1754 }
1755 return dst;
1756}
1757
1758LogicVRegister Simulator::abs(VectorFormat vform, LogicVRegister dst,
1759 const LogicVRegister& src) {
1760 dst.ClearForWrite(vform);
1761 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1762 // Test for signed saturation.
1763 int64_t sa = src.Int(vform, i);
1764 if (sa == MinIntFromFormat(vform)) {
1765 dst.SetSignedSat(i, true);
1766 }
1767 if (sa < 0) {
1768 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
1769 } else {
1770 dst.SetInt(vform, i, sa);
1771 }
1772 }
1773 return dst;
1774}
1775
1776LogicVRegister Simulator::ExtractNarrow(VectorFormat dstform,
1777 LogicVRegister dst, bool dstIsSigned,
1778 const LogicVRegister& src,
1779 bool srcIsSigned) {
1780 bool upperhalf = false;
1782 int64_t ssrc[8];
1783 uint64_t usrc[8];
1784
1785 switch (dstform) {
1786 case kFormat8B:
1787 upperhalf = false;
1788 srcform = kFormat8H;
1789 break;
1790 case kFormat16B:
1791 upperhalf = true;
1792 srcform = kFormat8H;
1793 break;
1794 case kFormat4H:
1795 upperhalf = false;
1796 srcform = kFormat4S;
1797 break;
1798 case kFormat8H:
1799 upperhalf = true;
1800 srcform = kFormat4S;
1801 break;
1802 case kFormat2S:
1803 upperhalf = false;
1804 srcform = kFormat2D;
1805 break;
1806 case kFormat4S:
1807 upperhalf = true;
1808 srcform = kFormat2D;
1809 break;
1810 case kFormatB:
1811 upperhalf = false;
1812 srcform = kFormatH;
1813 break;
1814 case kFormatH:
1815 upperhalf = false;
1816 srcform = kFormatS;
1817 break;
1818 case kFormatS:
1819 upperhalf = false;
1820 srcform = kFormatD;
1821 break;
1822 default:
1823 UNIMPLEMENTED();
1824 }
1825
1826 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
1827 ssrc[i] = src.Int(srcform, i);
1828 usrc[i] = src.Uint(srcform, i);
1829 }
1830
1831 int offset;
1832 if (upperhalf) {
1833 offset = LaneCountFromFormat(dstform) / 2;
1834 } else {
1835 offset = 0;
1836 dst.ClearForWrite(dstform);
1837 }
1838
1839 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
1840 // Test for signed saturation
1841 if (ssrc[i] > MaxIntFromFormat(dstform)) {
1842 dst.SetSignedSat(offset + i, true);
1843 } else if (ssrc[i] < MinIntFromFormat(dstform)) {
1844 dst.SetSignedSat(offset + i, false);
1845 }
1846
1847 // Test for unsigned saturation
1848 if (srcIsSigned) {
1849 if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
1850 dst.SetUnsignedSat(offset + i, true);
1851 } else if (ssrc[i] < 0) {
1852 dst.SetUnsignedSat(offset + i, false);
1853 }
1854 } else {
1855 if (usrc[i] > MaxUintFromFormat(dstform)) {
1856 dst.SetUnsignedSat(offset + i, true);
1857 }
1858 }
1859
1860 int64_t result;
1861 if (srcIsSigned) {
1862 result = ssrc[i] & MaxUintFromFormat(dstform);
1863 } else {
1864 result = usrc[i] & MaxUintFromFormat(dstform);
1865 }
1866
1867 if (dstIsSigned) {
1868 dst.SetInt(dstform, offset + i, result);
1869 } else {
1870 dst.SetUint(dstform, offset + i, result);
1871 }
1872 }
1873 return dst;
1874}
1875
1876LogicVRegister Simulator::xtn(VectorFormat vform, LogicVRegister dst,
1877 const LogicVRegister& src) {
1878 return ExtractNarrow(vform, dst, true, src, true);
1879}
1880
1881LogicVRegister Simulator::sqxtn(VectorFormat vform, LogicVRegister dst,
1882 const LogicVRegister& src) {
1883 return ExtractNarrow(vform, dst, true, src, true).SignedSaturate(vform);
1884}
1885
1886LogicVRegister Simulator::sqxtun(VectorFormat vform, LogicVRegister dst,
1887 const LogicVRegister& src) {
1888 return ExtractNarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
1889}
1890
1891LogicVRegister Simulator::uqxtn(VectorFormat vform, LogicVRegister dst,
1892 const LogicVRegister& src) {
1893 return ExtractNarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
1894}
1895
1896LogicVRegister Simulator::AbsDiff(VectorFormat vform, LogicVRegister dst,
1897 const LogicVRegister& src1,
1898 const LogicVRegister& src2, bool issigned) {
1899 dst.ClearForWrite(vform);
1900 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1901 if (issigned) {
1902 int64_t sr = src1.Int(vform, i) - src2.Int(vform, i);
1903 sr = sr > 0 ? sr : -sr;
1904 dst.SetInt(vform, i, sr);
1905 } else {
1906 int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i);
1907 sr = sr > 0 ? sr : -sr;
1908 dst.SetUint(vform, i, sr);
1909 }
1910 }
1911 return dst;
1912}
1913
1914LogicVRegister Simulator::saba(VectorFormat vform, LogicVRegister dst,
1915 const LogicVRegister& src1,
1916 const LogicVRegister& src2) {
1917 SimVRegister temp;
1918 dst.ClearForWrite(vform);
1919 AbsDiff(vform, temp, src1, src2, true);
1920 add(vform, dst, dst, temp);
1921 return dst;
1922}
1923
1924LogicVRegister Simulator::uaba(VectorFormat vform, LogicVRegister dst,
1925 const LogicVRegister& src1,
1926 const LogicVRegister& src2) {
1927 SimVRegister temp;
1928 dst.ClearForWrite(vform);
1929 AbsDiff(vform, temp, src1, src2, false);
1930 add(vform, dst, dst, temp);
1931 return dst;
1932}
1933
1934LogicVRegister Simulator::not_(VectorFormat vform, LogicVRegister dst,
1935 const LogicVRegister& src) {
1936 dst.ClearForWrite(vform);
1937 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1938 dst.SetUint(vform, i, ~src.Uint(vform, i));
1939 }
1940 return dst;
1941}
1942
1943LogicVRegister Simulator::rbit(VectorFormat vform, LogicVRegister dst,
1944 const LogicVRegister& src) {
1945 uint64_t result[16];
1946 int laneCount = LaneCountFromFormat(vform);
1947 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1948 uint64_t reversed_value;
1949 uint64_t value;
1950 for (int i = 0; i < laneCount; i++) {
1951 value = src.Uint(vform, i);
1952 reversed_value = 0;
1953 for (int j = 0; j < laneSizeInBits; j++) {
1954 reversed_value = (reversed_value << 1) | (value & 1);
1955 value >>= 1;
1956 }
1957 result[i] = reversed_value;
1958 }
1959
1960 dst.SetUintArray(vform, result);
1961 return dst;
1962}
1963
1964LogicVRegister Simulator::rev(VectorFormat vform, LogicVRegister dst,
1965 const LogicVRegister& src, int revSize) {
1966 uint64_t result[16];
1967 int laneCount = LaneCountFromFormat(vform);
1968 int laneSize = LaneSizeInBytesFromFormat(vform);
1969 int lanesPerLoop = revSize / laneSize;
1970 for (int i = 0; i < laneCount; i += lanesPerLoop) {
1971 for (int j = 0; j < lanesPerLoop; j++) {
1972 result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j);
1973 }
1974 }
1975 dst.SetUintArray(vform, result);
1976 return dst;
1977}
1978
1979LogicVRegister Simulator::rev16(VectorFormat vform, LogicVRegister dst,
1980 const LogicVRegister& src) {
1981 return rev(vform, dst, src, 2);
1982}
1983
1984LogicVRegister Simulator::rev32(VectorFormat vform, LogicVRegister dst,
1985 const LogicVRegister& src) {
1986 return rev(vform, dst, src, 4);
1987}
1988
1989LogicVRegister Simulator::rev64(VectorFormat vform, LogicVRegister dst,
1990 const LogicVRegister& src) {
1991 return rev(vform, dst, src, 8);
1992}
1993
1994LogicVRegister Simulator::addlp(VectorFormat vform, LogicVRegister dst,
1995 const LogicVRegister& src, bool is_signed,
1996 bool do_accumulate) {
1998 DCHECK_LE(LaneSizeInBitsFromFormat(vformsrc), 32U);
1999 DCHECK_LE(LaneCountFromFormat(vform), 8);
2000
2001 uint64_t result[8];
2002 int lane_count = LaneCountFromFormat(vform);
2003 for (int i = 0; i < lane_count; i++) {
2004 if (is_signed) {
2005 result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +
2006 src.Int(vformsrc, 2 * i + 1));
2007 } else {
2008 result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
2009 }
2010 }
2011
2012 dst.ClearForWrite(vform);
2013 for (int i = 0; i < lane_count; ++i) {
2014 if (do_accumulate) {
2015 result[i] += dst.Uint(vform, i);
2016 }
2017 dst.SetUint(vform, i, result[i]);
2018 }
2019
2020 return dst;
2021}
2022
2023LogicVRegister Simulator::saddlp(VectorFormat vform, LogicVRegister dst,
2024 const LogicVRegister& src) {
2025 return addlp(vform, dst, src, true, false);
2026}
2027
2028LogicVRegister Simulator::uaddlp(VectorFormat vform, LogicVRegister dst,
2029 const LogicVRegister& src) {
2030 return addlp(vform, dst, src, false, false);
2031}
2032
2033LogicVRegister Simulator::sadalp(VectorFormat vform, LogicVRegister dst,
2034 const LogicVRegister& src) {
2035 return addlp(vform, dst, src, true, true);
2036}
2037
2038LogicVRegister Simulator::uadalp(VectorFormat vform, LogicVRegister dst,
2039 const LogicVRegister& src) {
2040 return addlp(vform, dst, src, false, true);
2041}
2042
2043LogicVRegister Simulator::ext(VectorFormat vform, LogicVRegister dst,
2044 const LogicVRegister& src1,
2045 const LogicVRegister& src2, int index) {
2046 uint8_t result[16];
2047 int laneCount = LaneCountFromFormat(vform);
2048 for (int i = 0; i < laneCount - index; ++i) {
2049 result[i] = src1.Uint(vform, i + index);
2050 }
2051 for (int i = 0; i < index; ++i) {
2052 result[laneCount - index + i] = src2.Uint(vform, i);
2053 }
2054 dst.ClearForWrite(vform);
2055 for (int i = 0; i < laneCount; ++i) {
2056 dst.SetUint(vform, i, result[i]);
2057 }
2058 return dst;
2059}
2060
2061LogicVRegister Simulator::dup_element(VectorFormat vform, LogicVRegister dst,
2062 const LogicVRegister& src,
2063 int src_index) {
2064 int laneCount = LaneCountFromFormat(vform);
2065 uint64_t value = src.Uint(vform, src_index);
2066 dst.ClearForWrite(vform);
2067 for (int i = 0; i < laneCount; ++i) {
2068 dst.SetUint(vform, i, value);
2069 }
2070 return dst;
2071}
2072
2073LogicVRegister Simulator::dup_immediate(VectorFormat vform, LogicVRegister dst,
2074 uint64_t imm) {
2075 int laneCount = LaneCountFromFormat(vform);
2076 uint64_t value = imm & MaxUintFromFormat(vform);
2077 dst.ClearForWrite(vform);
2078 for (int i = 0; i < laneCount; ++i) {
2079 dst.SetUint(vform, i, value);
2080 }
2081 return dst;
2082}
2083
2084LogicVRegister Simulator::ins_element(VectorFormat vform, LogicVRegister dst,
2085 int dst_index, const LogicVRegister& src,
2086 int src_index) {
2087 dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2088 return dst;
2089}
2090
2091LogicVRegister Simulator::ins_immediate(VectorFormat vform, LogicVRegister dst,
2092 int dst_index, uint64_t imm) {
2093 uint64_t value = imm & MaxUintFromFormat(vform);
2094 dst.SetUint(vform, dst_index, value);
2095 return dst;
2096}
2097
2098LogicVRegister Simulator::movi(VectorFormat vform, LogicVRegister dst,
2099 uint64_t imm) {
2100 int laneCount = LaneCountFromFormat(vform);
2101 dst.ClearForWrite(vform);
2102 for (int i = 0; i < laneCount; ++i) {
2103 dst.SetUint(vform, i, imm);
2104 }
2105 return dst;
2106}
2107
2108LogicVRegister Simulator::mvni(VectorFormat vform, LogicVRegister dst,
2109 uint64_t imm) {
2110 int laneCount = LaneCountFromFormat(vform);
2111 dst.ClearForWrite(vform);
2112 for (int i = 0; i < laneCount; ++i) {
2113 dst.SetUint(vform, i, ~imm);
2114 }
2115 return dst;
2116}
2117
2118LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst,
2119 const LogicVRegister& src, uint64_t imm) {
2120 uint64_t result[16];
2121 int laneCount = LaneCountFromFormat(vform);
2122 for (int i = 0; i < laneCount; ++i) {
2123 result[i] = src.Uint(vform, i) | imm;
2124 }
2125 dst.SetUintArray(vform, result);
2126 return dst;
2127}
2128
2129LogicVRegister Simulator::uxtl(VectorFormat vform, LogicVRegister dst,
2130 const LogicVRegister& src) {
2131 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2132
2133 dst.ClearForWrite(vform);
2134 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2135 dst.SetUint(vform, i, src.Uint(vform_half, i));
2136 }
2137 return dst;
2138}
2139
2140LogicVRegister Simulator::sxtl(VectorFormat vform, LogicVRegister dst,
2141 const LogicVRegister& src) {
2142 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2143
2144 dst.ClearForWrite(vform);
2145 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2146 dst.SetInt(vform, i, src.Int(vform_half, i));
2147 }
2148 return dst;
2149}
2150
2151LogicVRegister Simulator::uxtl2(VectorFormat vform, LogicVRegister dst,
2152 const LogicVRegister& src) {
2153 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2154 int lane_count = LaneCountFromFormat(vform);
2155
2156 dst.ClearForWrite(vform);
2157 for (int i = 0; i < lane_count; i++) {
2158 dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));
2159 }
2160 return dst;
2161}
2162
2163LogicVRegister Simulator::sxtl2(VectorFormat vform, LogicVRegister dst,
2164 const LogicVRegister& src) {
2165 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2166 int lane_count = LaneCountFromFormat(vform);
2167
2168 dst.ClearForWrite(vform);
2169 for (int i = 0; i < lane_count; i++) {
2170 dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));
2171 }
2172 return dst;
2173}
2174
2175LogicVRegister Simulator::shrn(VectorFormat vform, LogicVRegister dst,
2176 const LogicVRegister& src, int shift) {
2177 SimVRegister temp;
2178 VectorFormat vform_src = VectorFormatDoubleWidth(vform);
2179 VectorFormat vform_dst = vform;
2180 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
2181 return ExtractNarrow(vform_dst, dst, false, shifted_src, false);
2182}
2183
2184LogicVRegister Simulator::shrn2(VectorFormat vform, LogicVRegister dst,
2185 const LogicVRegister& src, int shift) {
2186 SimVRegister temp;
2188 VectorFormat vformdst = vform;
2189 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
2190 return ExtractNarrow(vformdst, dst, false, shifted_src, false);
2191}
2192
2193LogicVRegister Simulator::rshrn(VectorFormat vform, LogicVRegister dst,
2194 const LogicVRegister& src, int shift) {
2195 SimVRegister temp;
2196 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2197 VectorFormat vformdst = vform;
2198 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2199 return ExtractNarrow(vformdst, dst, false, shifted_src, false);
2200}
2201
2202LogicVRegister Simulator::rshrn2(VectorFormat vform, LogicVRegister dst,
2203 const LogicVRegister& src, int shift) {
2204 SimVRegister temp;
2206 VectorFormat vformdst = vform;
2207 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2208 return ExtractNarrow(vformdst, dst, false, shifted_src, false);
2209}
2210
2211LogicVRegister Simulator::Table(VectorFormat vform, LogicVRegister dst,
2212 const LogicVRegister& ind,
2213 bool zero_out_of_bounds,
2214 const LogicVRegister* tab1,
2215 const LogicVRegister* tab2,
2216 const LogicVRegister* tab3,
2217 const LogicVRegister* tab4) {
2218 DCHECK_NOT_NULL(tab1);
2219 const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4};
2220 uint64_t result[kMaxLanesPerVector];
2221 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2222 result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i);
2223 }
2224 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2225 uint64_t j = ind.Uint(vform, i);
2226 int tab_idx = static_cast<int>(j >> 4);
2227 int j_idx = static_cast<int>(j & 15);
2228 if ((tab_idx < 4) && (tab[tab_idx] != nullptr)) {
2229 result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx);
2230 }
2231 }
2232 dst.SetUintArray(vform, result);
2233 return dst;
2234}
2235
2236LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2237 const LogicVRegister& tab,
2238 const LogicVRegister& ind) {
2239 return Table(vform, dst, ind, true, &tab);
2240}
2241
2242LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2243 const LogicVRegister& tab,
2244 const LogicVRegister& tab2,
2245 const LogicVRegister& ind) {
2246 return Table(vform, dst, ind, true, &tab, &tab2);
2247}
2248
2249LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2250 const LogicVRegister& tab,
2251 const LogicVRegister& tab2,
2252 const LogicVRegister& tab3,
2253 const LogicVRegister& ind) {
2254 return Table(vform, dst, ind, true, &tab, &tab2, &tab3);
2255}
2256
2257LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2258 const LogicVRegister& tab,
2259 const LogicVRegister& tab2,
2260 const LogicVRegister& tab3,
2261 const LogicVRegister& tab4,
2262 const LogicVRegister& ind) {
2263 return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);
2264}
2265
2266LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2267 const LogicVRegister& tab,
2268 const LogicVRegister& ind) {
2269 return Table(vform, dst, ind, false, &tab);
2270}
2271
2272LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2273 const LogicVRegister& tab,
2274 const LogicVRegister& tab2,
2275 const LogicVRegister& ind) {
2276 return Table(vform, dst, ind, false, &tab, &tab2);
2277}
2278
2279LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2280 const LogicVRegister& tab,
2281 const LogicVRegister& tab2,
2282 const LogicVRegister& tab3,
2283 const LogicVRegister& ind) {
2284 return Table(vform, dst, ind, false, &tab, &tab2, &tab3);
2285}
2286
2287LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2288 const LogicVRegister& tab,
2289 const LogicVRegister& tab2,
2290 const LogicVRegister& tab3,
2291 const LogicVRegister& tab4,
2292 const LogicVRegister& ind) {
2293 return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);
2294}
2295
2296LogicVRegister Simulator::uqshrn(VectorFormat vform, LogicVRegister dst,
2297 const LogicVRegister& src, int shift) {
2298 return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
2299}
2300
2301LogicVRegister Simulator::uqshrn2(VectorFormat vform, LogicVRegister dst,
2302 const LogicVRegister& src, int shift) {
2303 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2304}
2305
2306LogicVRegister Simulator::uqrshrn(VectorFormat vform, LogicVRegister dst,
2307 const LogicVRegister& src, int shift) {
2308 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
2309}
2310
2311LogicVRegister Simulator::uqrshrn2(VectorFormat vform, LogicVRegister dst,
2312 const LogicVRegister& src, int shift) {
2313 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2314}
2315
2316LogicVRegister Simulator::sqshrn(VectorFormat vform, LogicVRegister dst,
2317 const LogicVRegister& src, int shift) {
2318 SimVRegister temp;
2319 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2320 VectorFormat vformdst = vform;
2321 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2322 return sqxtn(vformdst, dst, shifted_src);
2323}
2324
2325LogicVRegister Simulator::sqshrn2(VectorFormat vform, LogicVRegister dst,
2326 const LogicVRegister& src, int shift) {
2327 SimVRegister temp;
2329 VectorFormat vformdst = vform;
2330 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2331 return sqxtn(vformdst, dst, shifted_src);
2332}
2333
2334LogicVRegister Simulator::sqrshrn(VectorFormat vform, LogicVRegister dst,
2335 const LogicVRegister& src, int shift) {
2336 SimVRegister temp;
2337 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2338 VectorFormat vformdst = vform;
2339 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2340 return sqxtn(vformdst, dst, shifted_src);
2341}
2342
2343LogicVRegister Simulator::sqrshrn2(VectorFormat vform, LogicVRegister dst,
2344 const LogicVRegister& src, int shift) {
2345 SimVRegister temp;
2347 VectorFormat vformdst = vform;
2348 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2349 return sqxtn(vformdst, dst, shifted_src);
2350}
2351
2352LogicVRegister Simulator::sqshrun(VectorFormat vform, LogicVRegister dst,
2353 const LogicVRegister& src, int shift) {
2354 SimVRegister temp;
2355 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2356 VectorFormat vformdst = vform;
2357 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2358 return sqxtun(vformdst, dst, shifted_src);
2359}
2360
2361LogicVRegister Simulator::sqshrun2(VectorFormat vform, LogicVRegister dst,
2362 const LogicVRegister& src, int shift) {
2363 SimVRegister temp;
2365 VectorFormat vformdst = vform;
2366 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2367 return sqxtun(vformdst, dst, shifted_src);
2368}
2369
2370LogicVRegister Simulator::sqrshrun(VectorFormat vform, LogicVRegister dst,
2371 const LogicVRegister& src, int shift) {
2372 SimVRegister temp;
2373 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2374 VectorFormat vformdst = vform;
2375 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2376 return sqxtun(vformdst, dst, shifted_src);
2377}
2378
2379LogicVRegister Simulator::sqrshrun2(VectorFormat vform, LogicVRegister dst,
2380 const LogicVRegister& src, int shift) {
2381 SimVRegister temp;
2383 VectorFormat vformdst = vform;
2384 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2385 return sqxtun(vformdst, dst, shifted_src);
2386}
2387
2388LogicVRegister Simulator::uaddl(VectorFormat vform, LogicVRegister dst,
2389 const LogicVRegister& src1,
2390 const LogicVRegister& src2) {
2391 SimVRegister temp1, temp2;
2392 uxtl(vform, temp1, src1);
2393 uxtl(vform, temp2, src2);
2394 add(vform, dst, temp1, temp2);
2395 return dst;
2396}
2397
2398LogicVRegister Simulator::uaddl2(VectorFormat vform, LogicVRegister dst,
2399 const LogicVRegister& src1,
2400 const LogicVRegister& src2) {
2401 SimVRegister temp1, temp2;
2402 uxtl2(vform, temp1, src1);
2403 uxtl2(vform, temp2, src2);
2404 add(vform, dst, temp1, temp2);
2405 return dst;
2406}
2407
2408LogicVRegister Simulator::uaddw(VectorFormat vform, LogicVRegister dst,
2409 const LogicVRegister& src1,
2410 const LogicVRegister& src2) {
2411 SimVRegister temp;
2412 uxtl(vform, temp, src2);
2413 add(vform, dst, src1, temp);
2414 return dst;
2415}
2416
2417LogicVRegister Simulator::uaddw2(VectorFormat vform, LogicVRegister dst,
2418 const LogicVRegister& src1,
2419 const LogicVRegister& src2) {
2420 SimVRegister temp;
2421 uxtl2(vform, temp, src2);
2422 add(vform, dst, src1, temp);
2423 return dst;
2424}
2425
2426LogicVRegister Simulator::saddl(VectorFormat vform, LogicVRegister dst,
2427 const LogicVRegister& src1,
2428 const LogicVRegister& src2) {
2429 SimVRegister temp1, temp2;
2430 sxtl(vform, temp1, src1);
2431 sxtl(vform, temp2, src2);
2432 add(vform, dst, temp1, temp2);
2433 return dst;
2434}
2435
2436LogicVRegister Simulator::saddl2(VectorFormat vform, LogicVRegister dst,
2437 const LogicVRegister& src1,
2438 const LogicVRegister& src2) {
2439 SimVRegister temp1, temp2;
2440 sxtl2(vform, temp1, src1);
2441 sxtl2(vform, temp2, src2);
2442 add(vform, dst, temp1, temp2);
2443 return dst;
2444}
2445
2446LogicVRegister Simulator::saddw(VectorFormat vform, LogicVRegister dst,
2447 const LogicVRegister& src1,
2448 const LogicVRegister& src2) {
2449 SimVRegister temp;
2450 sxtl(vform, temp, src2);
2451 add(vform, dst, src1, temp);
2452 return dst;
2453}
2454
2455LogicVRegister Simulator::saddw2(VectorFormat vform, LogicVRegister dst,
2456 const LogicVRegister& src1,
2457 const LogicVRegister& src2) {
2458 SimVRegister temp;
2459 sxtl2(vform, temp, src2);
2460 add(vform, dst, src1, temp);
2461 return dst;
2462}
2463
2464LogicVRegister Simulator::usubl(VectorFormat vform, LogicVRegister dst,
2465 const LogicVRegister& src1,
2466 const LogicVRegister& src2) {
2467 SimVRegister temp1, temp2;
2468 uxtl(vform, temp1, src1);
2469 uxtl(vform, temp2, src2);
2470 sub(vform, dst, temp1, temp2);
2471 return dst;
2472}
2473
2474LogicVRegister Simulator::usubl2(VectorFormat vform, LogicVRegister dst,
2475 const LogicVRegister& src1,
2476 const LogicVRegister& src2) {
2477 SimVRegister temp1, temp2;
2478 uxtl2(vform, temp1, src1);
2479 uxtl2(vform, temp2, src2);
2480 sub(vform, dst, temp1, temp2);
2481 return dst;
2482}
2483
2484LogicVRegister Simulator::usubw(VectorFormat vform, LogicVRegister dst,
2485 const LogicVRegister& src1,
2486 const LogicVRegister& src2) {
2487 SimVRegister temp;
2488 uxtl(vform, temp, src2);
2489 sub(vform, dst, src1, temp);
2490 return dst;
2491}
2492
2493LogicVRegister Simulator::usubw2(VectorFormat vform, LogicVRegister dst,
2494 const LogicVRegister& src1,
2495 const LogicVRegister& src2) {
2496 SimVRegister temp;
2497 uxtl2(vform, temp, src2);
2498 sub(vform, dst, src1, temp);
2499 return dst;
2500}
2501
2502LogicVRegister Simulator::ssubl(VectorFormat vform, LogicVRegister dst,
2503 const LogicVRegister& src1,
2504 const LogicVRegister& src2) {
2505 SimVRegister temp1, temp2;
2506 sxtl(vform, temp1, src1);
2507 sxtl(vform, temp2, src2);
2508 sub(vform, dst, temp1, temp2);
2509 return dst;
2510}
2511
2512LogicVRegister Simulator::ssubl2(VectorFormat vform, LogicVRegister dst,
2513 const LogicVRegister& src1,
2514 const LogicVRegister& src2) {
2515 SimVRegister temp1, temp2;
2516 sxtl2(vform, temp1, src1);
2517 sxtl2(vform, temp2, src2);
2518 sub(vform, dst, temp1, temp2);
2519 return dst;
2520}
2521
2522LogicVRegister Simulator::ssubw(VectorFormat vform, LogicVRegister dst,
2523 const LogicVRegister& src1,
2524 const LogicVRegister& src2) {
2525 SimVRegister temp;
2526 sxtl(vform, temp, src2);
2527 sub(vform, dst, src1, temp);
2528 return dst;
2529}
2530
2531LogicVRegister Simulator::ssubw2(VectorFormat vform, LogicVRegister dst,
2532 const LogicVRegister& src1,
2533 const LogicVRegister& src2) {
2534 SimVRegister temp;
2535 sxtl2(vform, temp, src2);
2536 sub(vform, dst, src1, temp);
2537 return dst;
2538}
2539
2540LogicVRegister Simulator::uabal(VectorFormat vform, LogicVRegister dst,
2541 const LogicVRegister& src1,
2542 const LogicVRegister& src2) {
2543 SimVRegister temp1, temp2;
2544 uxtl(vform, temp1, src1);
2545 uxtl(vform, temp2, src2);
2546 uaba(vform, dst, temp1, temp2);
2547 return dst;
2548}
2549
2550LogicVRegister Simulator::uabal2(VectorFormat vform, LogicVRegister dst,
2551 const LogicVRegister& src1,
2552 const LogicVRegister& src2) {
2553 SimVRegister temp1, temp2;
2554 uxtl2(vform, temp1, src1);
2555 uxtl2(vform, temp2, src2);
2556 uaba(vform, dst, temp1, temp2);
2557 return dst;
2558}
2559
2560LogicVRegister Simulator::sabal(VectorFormat vform, LogicVRegister dst,
2561 const LogicVRegister& src1,
2562 const LogicVRegister& src2) {
2563 SimVRegister temp1, temp2;
2564 sxtl(vform, temp1, src1);
2565 sxtl(vform, temp2, src2);
2566 saba(vform, dst, temp1, temp2);
2567 return dst;
2568}
2569
2570LogicVRegister Simulator::sabal2(VectorFormat vform, LogicVRegister dst,
2571 const LogicVRegister& src1,
2572 const LogicVRegister& src2) {
2573 SimVRegister temp1, temp2;
2574 sxtl2(vform, temp1, src1);
2575 sxtl2(vform, temp2, src2);
2576 saba(vform, dst, temp1, temp2);
2577 return dst;
2578}
2579
2580LogicVRegister Simulator::uabdl(VectorFormat vform, LogicVRegister dst,
2581 const LogicVRegister& src1,
2582 const LogicVRegister& src2) {
2583 SimVRegister temp1, temp2;
2584 uxtl(vform, temp1, src1);
2585 uxtl(vform, temp2, src2);
2586 AbsDiff(vform, dst, temp1, temp2, false);
2587 return dst;
2588}
2589
2590LogicVRegister Simulator::uabdl2(VectorFormat vform, LogicVRegister dst,
2591 const LogicVRegister& src1,
2592 const LogicVRegister& src2) {
2593 SimVRegister temp1, temp2;
2594 uxtl2(vform, temp1, src1);
2595 uxtl2(vform, temp2, src2);
2596 AbsDiff(vform, dst, temp1, temp2, false);
2597 return dst;
2598}
2599
2600LogicVRegister Simulator::sabdl(VectorFormat vform, LogicVRegister dst,
2601 const LogicVRegister& src1,
2602 const LogicVRegister& src2) {
2603 SimVRegister temp1, temp2;
2604 sxtl(vform, temp1, src1);
2605 sxtl(vform, temp2, src2);
2606 AbsDiff(vform, dst, temp1, temp2, true);
2607 return dst;
2608}
2609
2610LogicVRegister Simulator::sabdl2(VectorFormat vform, LogicVRegister dst,
2611 const LogicVRegister& src1,
2612 const LogicVRegister& src2) {
2613 SimVRegister temp1, temp2;
2614 sxtl2(vform, temp1, src1);
2615 sxtl2(vform, temp2, src2);
2616 AbsDiff(vform, dst, temp1, temp2, true);
2617 return dst;
2618}
2619
2620LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst,
2621 const LogicVRegister& src1,
2622 const LogicVRegister& src2) {
2623 SimVRegister temp1, temp2;
2624 uxtl(vform, temp1, src1);
2625 uxtl(vform, temp2, src2);
2626 mul(vform, dst, temp1, temp2);
2627 return dst;
2628}
2629
2630LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst,
2631 const LogicVRegister& src1,
2632 const LogicVRegister& src2) {
2633 SimVRegister temp1, temp2;
2634 uxtl2(vform, temp1, src1);
2635 uxtl2(vform, temp2, src2);
2636 mul(vform, dst, temp1, temp2);
2637 return dst;
2638}
2639
2640LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst,
2641 const LogicVRegister& src1,
2642 const LogicVRegister& src2) {
2643 SimVRegister temp1, temp2;
2644 sxtl(vform, temp1, src1);
2645 sxtl(vform, temp2, src2);
2646 mul(vform, dst, temp1, temp2);
2647 return dst;
2648}
2649
2650LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst,
2651 const LogicVRegister& src1,
2652 const LogicVRegister& src2) {
2653 SimVRegister temp1, temp2;
2654 sxtl2(vform, temp1, src1);
2655 sxtl2(vform, temp2, src2);
2656 mul(vform, dst, temp1, temp2);
2657 return dst;
2658}
2659
2660LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst,
2661 const LogicVRegister& src1,
2662 const LogicVRegister& src2) {
2663 SimVRegister temp1, temp2;
2664 uxtl(vform, temp1, src1);
2665 uxtl(vform, temp2, src2);
2666 mls(vform, dst, temp1, temp2);
2667 return dst;
2668}
2669
2670LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst,
2671 const LogicVRegister& src1,
2672 const LogicVRegister& src2) {
2673 SimVRegister temp1, temp2;
2674 uxtl2(vform, temp1, src1);
2675 uxtl2(vform, temp2, src2);
2676 mls(vform, dst, temp1, temp2);
2677 return dst;
2678}
2679
2680LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst,
2681 const LogicVRegister& src1,
2682 const LogicVRegister& src2) {
2683 SimVRegister temp1, temp2;
2684 sxtl(vform, temp1, src1);
2685 sxtl(vform, temp2, src2);
2686 mls(vform, dst, temp1, temp2);
2687 return dst;
2688}
2689
2690LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst,
2691 const LogicVRegister& src1,
2692 const LogicVRegister& src2) {
2693 SimVRegister temp1, temp2;
2694 sxtl2(vform, temp1, src1);
2695 sxtl2(vform, temp2, src2);
2696 mls(vform, dst, temp1, temp2);
2697 return dst;
2698}
2699
2700LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst,
2701 const LogicVRegister& src1,
2702 const LogicVRegister& src2) {
2703 SimVRegister temp1, temp2;
2704 uxtl(vform, temp1, src1);
2705 uxtl(vform, temp2, src2);
2706 mla(vform, dst, temp1, temp2);
2707 return dst;
2708}
2709
2710LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst,
2711 const LogicVRegister& src1,
2712 const LogicVRegister& src2) {
2713 SimVRegister temp1, temp2;
2714 uxtl2(vform, temp1, src1);
2715 uxtl2(vform, temp2, src2);
2716 mla(vform, dst, temp1, temp2);
2717 return dst;
2718}
2719
2720LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst,
2721 const LogicVRegister& src1,
2722 const LogicVRegister& src2) {
2723 SimVRegister temp1, temp2;
2724 sxtl(vform, temp1, src1);
2725 sxtl(vform, temp2, src2);
2726 mla(vform, dst, temp1, temp2);
2727 return dst;
2728}
2729
2730LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst,
2731 const LogicVRegister& src1,
2732 const LogicVRegister& src2) {
2733 SimVRegister temp1, temp2;
2734 sxtl2(vform, temp1, src1);
2735 sxtl2(vform, temp2, src2);
2736 mla(vform, dst, temp1, temp2);
2737 return dst;
2738}
2739
2740LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst,
2741 const LogicVRegister& src1,
2742 const LogicVRegister& src2) {
2743 SimVRegister temp;
2744 LogicVRegister product = sqdmull(vform, temp, src1, src2);
2745 return add(vform, dst, dst, product).SignedSaturate(vform);
2746}
2747
2748LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst,
2749 const LogicVRegister& src1,
2750 const LogicVRegister& src2) {
2751 SimVRegister temp;
2752 LogicVRegister product = sqdmull2(vform, temp, src1, src2);
2753 return add(vform, dst, dst, product).SignedSaturate(vform);
2754}
2755
2756LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst,
2757 const LogicVRegister& src1,
2758 const LogicVRegister& src2) {
2759 SimVRegister temp;
2760 LogicVRegister product = sqdmull(vform, temp, src1, src2);
2761 return sub(vform, dst, dst, product).SignedSaturate(vform);
2762}
2763
2764LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst,
2765 const LogicVRegister& src1,
2766 const LogicVRegister& src2) {
2767 SimVRegister temp;
2768 LogicVRegister product = sqdmull2(vform, temp, src1, src2);
2769 return sub(vform, dst, dst, product).SignedSaturate(vform);
2770}
2771
2772LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst,
2773 const LogicVRegister& src1,
2774 const LogicVRegister& src2) {
2775 SimVRegister temp;
2776 LogicVRegister product = smull(vform, temp, src1, src2);
2777 return add(vform, dst, product, product).SignedSaturate(vform);
2778}
2779
2780LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst,
2781 const LogicVRegister& src1,
2782 const LogicVRegister& src2) {
2783 SimVRegister temp;
2784 LogicVRegister product = smull2(vform, temp, src1, src2);
2785 return add(vform, dst, product, product).SignedSaturate(vform);
2786}
2787
2788LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst,
2789 const LogicVRegister& src1,
2790 const LogicVRegister& src2, bool round) {
2791 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
2792 // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
2793 // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.
2794
2795 int esize = LaneSizeInBitsFromFormat(vform);
2796 int round_const = round ? (1 << (esize - 2)) : 0;
2797 int64_t product;
2798
2799 dst.ClearForWrite(vform);
2800 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2801 product = src1.Int(vform, i) * src2.Int(vform, i);
2802 product += round_const;
2803 product = product >> (esize - 1);
2804
2805 if (product > MaxIntFromFormat(vform)) {
2806 product = MaxIntFromFormat(vform);
2807 } else if (product < MinIntFromFormat(vform)) {
2808 product = MinIntFromFormat(vform);
2809 }
2810 dst.SetInt(vform, i, product);
2811 }
2812 return dst;
2813}
2814
2815LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst,
2816 const LogicVRegister& src1,
2817 const LogicVRegister& src2) {
2818 return sqrdmulh(vform, dst, src1, src2, false);
2819}
2820
2821LogicVRegister Simulator::addhn(VectorFormat vform, LogicVRegister dst,
2822 const LogicVRegister& src1,
2823 const LogicVRegister& src2) {
2824 SimVRegister temp;
2825 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
2826 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2827 return dst;
2828}
2829
2830LogicVRegister Simulator::addhn2(VectorFormat vform, LogicVRegister dst,
2831 const LogicVRegister& src1,
2832 const LogicVRegister& src2) {
2833 SimVRegister temp;
2834 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2835 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2836 return dst;
2837}
2838
2839LogicVRegister Simulator::raddhn(VectorFormat vform, LogicVRegister dst,
2840 const LogicVRegister& src1,
2841 const LogicVRegister& src2) {
2842 SimVRegister temp;
2843 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
2844 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2845 return dst;
2846}
2847
2848LogicVRegister Simulator::raddhn2(VectorFormat vform, LogicVRegister dst,
2849 const LogicVRegister& src1,
2850 const LogicVRegister& src2) {
2851 SimVRegister temp;
2852 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2853 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2854 return dst;
2855}
2856
2857LogicVRegister Simulator::subhn(VectorFormat vform, LogicVRegister dst,
2858 const LogicVRegister& src1,
2859 const LogicVRegister& src2) {
2860 SimVRegister temp;
2861 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
2862 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2863 return dst;
2864}
2865
2866LogicVRegister Simulator::subhn2(VectorFormat vform, LogicVRegister dst,
2867 const LogicVRegister& src1,
2868 const LogicVRegister& src2) {
2869 SimVRegister temp;
2870 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2871 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2872 return dst;
2873}
2874
2875LogicVRegister Simulator::rsubhn(VectorFormat vform, LogicVRegister dst,
2876 const LogicVRegister& src1,
2877 const LogicVRegister& src2) {
2878 SimVRegister temp;
2879 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
2880 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2881 return dst;
2882}
2883
2884LogicVRegister Simulator::rsubhn2(VectorFormat vform, LogicVRegister dst,
2885 const LogicVRegister& src1,
2886 const LogicVRegister& src2) {
2887 SimVRegister temp;
2888 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2889 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2890 return dst;
2891}
2892
2893LogicVRegister Simulator::trn1(VectorFormat vform, LogicVRegister dst,
2894 const LogicVRegister& src1,
2895 const LogicVRegister& src2) {
2896 uint64_t result[16];
2897 int laneCount = LaneCountFromFormat(vform);
2898 int pairs = laneCount / 2;
2899 for (int i = 0; i < pairs; ++i) {
2900 result[2 * i] = src1.Uint(vform, 2 * i);
2901 result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
2902 }
2903
2904 dst.SetUintArray(vform, result);
2905 return dst;
2906}
2907
2908LogicVRegister Simulator::trn2(VectorFormat vform, LogicVRegister dst,
2909 const LogicVRegister& src1,
2910 const LogicVRegister& src2) {
2911 uint64_t result[16];
2912 int laneCount = LaneCountFromFormat(vform);
2913 int pairs = laneCount / 2;
2914 for (int i = 0; i < pairs; ++i) {
2915 result[2 * i] = src1.Uint(vform, (2 * i) + 1);
2916 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
2917 }
2918
2919 dst.SetUintArray(vform, result);
2920 return dst;
2921}
2922
2923LogicVRegister Simulator::zip1(VectorFormat vform, LogicVRegister dst,
2924 const LogicVRegister& src1,
2925 const LogicVRegister& src2) {
2926 uint64_t result[16];
2927 int laneCount = LaneCountFromFormat(vform);
2928 int pairs = laneCount / 2;
2929 for (int i = 0; i < pairs; ++i) {
2930 result[2 * i] = src1.Uint(vform, i);
2931 result[(2 * i) + 1] = src2.Uint(vform, i);
2932 }
2933
2934 dst.SetUintArray(vform, result);
2935 return dst;
2936}
2937
2938LogicVRegister Simulator::zip2(VectorFormat vform, LogicVRegister dst,
2939 const LogicVRegister& src1,
2940 const LogicVRegister& src2) {
2941 uint64_t result[16];
2942 int laneCount = LaneCountFromFormat(vform);
2943 int pairs = laneCount / 2;
2944 for (int i = 0; i < pairs; ++i) {
2945 result[2 * i] = src1.Uint(vform, pairs + i);
2946 result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
2947 }
2948
2949 dst.SetUintArray(vform, result);
2950 return dst;
2951}
2952
2953LogicVRegister Simulator::uzp1(VectorFormat vform, LogicVRegister dst,
2954 const LogicVRegister& src1,
2955 const LogicVRegister& src2) {
2956 uint64_t result[32];
2957 int laneCount = LaneCountFromFormat(vform);
2958 for (int i = 0; i < laneCount; ++i) {
2959 result[i] = src1.Uint(vform, i);
2960 result[laneCount + i] = src2.Uint(vform, i);
2961 }
2962
2963 dst.ClearForWrite(vform);
2964 for (int i = 0; i < laneCount; ++i) {
2965 dst.SetUint(vform, i, result[2 * i]);
2966 }
2967 return dst;
2968}
2969
2970LogicVRegister Simulator::uzp2(VectorFormat vform, LogicVRegister dst,
2971 const LogicVRegister& src1,
2972 const LogicVRegister& src2) {
2973 uint64_t result[32];
2974 int laneCount = LaneCountFromFormat(vform);
2975 for (int i = 0; i < laneCount; ++i) {
2976 result[i] = src1.Uint(vform, i);
2977 result[laneCount + i] = src2.Uint(vform, i);
2978 }
2979
2980 dst.ClearForWrite(vform);
2981 for (int i = 0; i < laneCount; ++i) {
2982 dst.SetUint(vform, i, result[(2 * i) + 1]);
2983 }
2984 return dst;
2985}
2986
2987template <typename T>
2988T Simulator::FPAdd(T op1, T op2) {
2989 T result = FPProcessNaNs(op1, op2);
2990 if (std::isnan(result)) return result;
2991
2992 if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) {
2993 // inf + -inf returns the default NaN.
2994 FPProcessException();
2995 return FPDefaultNaN<T>();
2996 } else {
2997 // Other cases should be handled by standard arithmetic.
2998 return op1 + op2;
2999 }
3000}
3001
3002template <typename T>
3003T Simulator::FPSub(T op1, T op2) {
3004 // NaNs should be handled elsewhere.
3005 DCHECK(!std::isnan(op1) && !std::isnan(op2));
3006
3007 if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) {
3008 // inf - inf returns the default NaN.
3009 FPProcessException();
3010 return FPDefaultNaN<T>();
3011 } else {
3012 // Other cases should be handled by standard arithmetic.
3013 return op1 - op2;
3014 }
3015}
3016
3017template <typename T>
3018T Simulator::FPMul(T op1, T op2) {
3019 // NaNs should be handled elsewhere.
3020 DCHECK(!std::isnan(op1) && !std::isnan(op2));
3021
3022 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
3023 // inf * 0.0 returns the default NaN.
3024 FPProcessException();
3025 return FPDefaultNaN<T>();
3026 } else {
3027 // Other cases should be handled by standard arithmetic.
3028 return op1 * op2;
3029 }
3030}
3031
3032template <typename T>
3033T Simulator::FPMulx(T op1, T op2) {
3034 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
3035 // inf * 0.0 returns +/-2.0.
3036 T two = 2.0;
3037 return copysign(1.0, op1) * copysign(1.0, op2) * two;
3038 }
3039 return FPMul(op1, op2);
3040}
3041
3042template <typename T>
3043T Simulator::FPMulAdd(T a, T op1, T op2) {
3044 T result = FPProcessNaNs3(a, op1, op2);
3045
3046 T sign_a = copysign(1.0, a);
3047 T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
3048 bool isinf_prod = std::isinf(op1) || std::isinf(op2);
3049 bool operation_generates_nan =
3050 (std::isinf(op1) && (op2 == 0.0)) || // inf * 0.0
3051 (std::isinf(op2) && (op1 == 0.0)) || // 0.0 * inf
3052 (std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf
3053
3054 if (std::isnan(result)) {
3055 // Generated NaNs override quiet NaNs propagated from a.
3056 if (operation_generates_nan && IsQuietNaN(a)) {
3057 FPProcessException();
3058 return FPDefaultNaN<T>();
3059 } else {
3060 return result;
3061 }
3062 }
3063
3064 // If the operation would produce a NaN, return the default NaN.
3065 if (operation_generates_nan) {
3066 FPProcessException();
3067 return FPDefaultNaN<T>();
3068 }
3069
3070 // Work around broken fma implementations for exact zero results: The sign of
3071 // exact 0.0 results is positive unless both a and op1 * op2 are negative.
3072 if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
3073 return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0;
3074 }
3075
3076 result = FusedMultiplyAdd(op1, op2, a);
3077 DCHECK(!std::isnan(result));
3078
3079 // Work around broken fma implementations for rounded zero results: If a is
3080 // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
3081 if ((a == 0.0) && (result == 0.0)) {
3082 return copysign(0.0, sign_prod);
3083 }
3084
3085 return result;
3086}
3087
3088template <typename T>
3089T Simulator::FPDiv(T op1, T op2) {
3090 // NaNs should be handled elsewhere.
3091 DCHECK(!std::isnan(op1) && !std::isnan(op2));
3092
3093 if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
3094 // inf / inf and 0.0 / 0.0 return the default NaN.
3095 FPProcessException();
3096 return FPDefaultNaN<T>();
3097 } else {
3098 if (op2 == 0.0) {
3099 FPProcessException();
3100 if (!std::isnan(op1)) {
3101 double op1_sign = copysign(1.0, op1);
3102 double op2_sign = copysign(1.0, op2);
3103 return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);
3104 }
3105 }
3106
3107 // Other cases should be handled by standard arithmetic.
3108 return op1 / op2;
3109 }
3110}
3111
3112template <typename T>
3113T Simulator::FPSqrt(T op) {
3114 if (std::isnan(op)) {
3115 return FPProcessNaN(op);
3116 } else if (op < 0.0) {
3117 FPProcessException();
3118 return FPDefaultNaN<T>();
3119 } else {
3120 return std::sqrt(op);
3121 }
3122}
3123
3124template <typename T>
3125T Simulator::FPMax(T a, T b) {
3126 T result = FPProcessNaNs(a, b);
3127 if (std::isnan(result)) return result;
3128
3129 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
3130 // a and b are zero, and the sign differs: return +0.0.
3131 return 0.0;
3132 } else {
3133 return (a > b) ? a : b;
3134 }
3135}
3136
3137template <typename T>
3138T Simulator::FPMaxNM(T a, T b) {
3139 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3141 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3143 }
3144
3145 T result = FPProcessNaNs(a, b);
3146 return std::isnan(result) ? result : FPMax(a, b);
3147}
3148
3149template <typename T>
3150T Simulator::FPMin(T a, T b) {
3151 T result = FPProcessNaNs(a, b);
3152 if (std::isnan(result)) return result;
3153
3154 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
3155 // a and b are zero, and the sign differs: return -0.0.
3156 return -0.0;
3157 } else {
3158 return (a < b) ? a : b;
3159 }
3160}
3161
3162template <typename T>
3163T Simulator::FPMinNM(T a, T b) {
3164 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3166 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3168 }
3169
3170 T result = FPProcessNaNs(a, b);
3171 return std::isnan(result) ? result : FPMin(a, b);
3172}
3173
3174template <typename T>
3175T Simulator::FPRecipStepFused(T op1, T op2) {
3176 const T two = 2.0;
3177 if ((std::isinf(op1) && (op2 == 0.0)) ||
3178 ((op1 == 0.0) && (std::isinf(op2)))) {
3179 return two;
3180 } else if (std::isinf(op1) || std::isinf(op2)) {
3181 // Return +inf if signs match, otherwise -inf.
3182 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3184 } else {
3185 return FusedMultiplyAdd(op1, op2, two);
3186 }
3187}
3188
3189template <typename T>
3190T Simulator::FPRSqrtStepFused(T op1, T op2) {
3191 const T one_point_five = 1.5;
3192 const T two = 2.0;
3193
3194 if ((std::isinf(op1) && (op2 == 0.0)) ||
3195 ((op1 == 0.0) && (std::isinf(op2)))) {
3196 return one_point_five;
3197 } else if (std::isinf(op1) || std::isinf(op2)) {
3198 // Return +inf if signs match, otherwise -inf.
3199 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3201 } else {
3202 // The multiply-add-halve operation must be fully fused, so avoid interim
3203 // rounding by checking which operand can be losslessly divided by two
3204 // before doing the multiply-add.
3205 if (isnormal(op1 / two)) {
3206 return FusedMultiplyAdd(op1 / two, op2, one_point_five);
3207 } else if (isnormal(op2 / two)) {
3208 return FusedMultiplyAdd(op1, op2 / two, one_point_five);
3209 } else {
3210 // Neither operand is normal after halving: the result is dominated by
3211 // the addition term, so just return that.
3212 return one_point_five;
3213 }
3214 }
3215}
3216
3217double Simulator::FPRoundInt(double value, FPRounding round_mode) {
3218 if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
3219 (value == kFP64NegativeInfinity)) {
3220 return value;
3221 } else if (std::isnan(value)) {
3222 return FPProcessNaN(value);
3223 }
3224
3225 double int_result = std::floor(value);
3226 double error = value - int_result;
3227 switch (round_mode) {
3228 case FPTieAway: {
3229 // Take care of correctly handling the range ]-0.5, -0.0], which must
3230 // yield -0.0.
3231 if ((-0.5 < value) && (value < 0.0)) {
3232 int_result = -0.0;
3233
3234 } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
3235 // If the error is greater than 0.5, or is equal to 0.5 and the integer
3236 // result is positive, round up.
3237 int_result++;
3238 }
3239 break;
3240 }
3241 case FPTieEven: {
3242 // Take care of correctly handling the range [-0.5, -0.0], which must
3243 // yield -0.0.
3244 if ((-0.5 <= value) && (value < 0.0)) {
3245 int_result = -0.0;
3246
3247 // If the error is greater than 0.5, or is equal to 0.5 and the integer
3248 // result is odd, round up.
3249 } else if ((error > 0.5) ||
3250 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
3251 int_result++;
3252 }
3253 break;
3254 }
3255 case FPZero: {
3256 // If value>0 then we take floor(value)
3257 // otherwise, ceil(value).
3258 if (value < 0) {
3259 int_result = ceil(value);
3260 }
3261 break;
3262 }
3263 case FPNegativeInfinity: {
3264 // We always use floor(value).
3265 break;
3266 }
3267 case FPPositiveInfinity: {
3268 // Take care of correctly handling the range ]-1.0, -0.0], which must
3269 // yield -0.0.
3270 if ((-1.0 < value) && (value < 0.0)) {
3271 int_result = -0.0;
3272
3273 // If the error is non-zero, round up.
3274 } else if (error > 0.0) {
3275 int_result++;
3276 }
3277 break;
3278 }
3279 default:
3280 UNIMPLEMENTED();
3281 }
3282 return int_result;
3283}
3284
3285int16_t Simulator::FPToInt16(double value, FPRounding rmode) {
3286 value = FPRoundInt(value, rmode);
3287 return base::saturated_cast<int16_t>(value);
3288}
3289
3290int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
3291 value = FPRoundInt(value, rmode);
3292 return base::saturated_cast<int32_t>(value);
3293}
3294
3295int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
3296 value = FPRoundInt(value, rmode);
3297 return base::saturated_cast<int64_t>(value);
3298}
3299
3300uint16_t Simulator::FPToUInt16(double value, FPRounding rmode) {
3301 value = FPRoundInt(value, rmode);
3302 return base::saturated_cast<uint16_t>(value);
3303}
3304
3305uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
3306 value = FPRoundInt(value, rmode);
3307 return base::saturated_cast<uint32_t>(value);
3308}
3309
3310uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
3311 value = FPRoundInt(value, rmode);
3312 return base::saturated_cast<uint64_t>(value);
3313}
3314
3315#define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \
3316 template <typename T> \
3317 LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \
3318 const LogicVRegister& src1, \
3319 const LogicVRegister& src2) { \
3320 dst.ClearForWrite(vform); \
3321 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \
3322 T op1 = src1.Float<T>(i); \
3323 T op2 = src2.Float<T>(i); \
3324 T result; \
3325 if (PROCNAN) { \
3326 result = FPProcessNaNs(op1, op2); \
3327 if (!isnan(result)) { \
3328 result = OP(op1, op2); \
3329 } \
3330 } else { \
3331 result = OP(op1, op2); \
3332 } \
3333 dst.SetFloat(i, result); \
3334 } \
3335 return dst; \
3336 } \
3337 \
3338 LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \
3339 const LogicVRegister& src1, \
3340 const LogicVRegister& src2) { \
3341 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) { \
3342 FN<half>(vform, dst, src1, src2); \
3343 } else if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { \
3344 FN<float>(vform, dst, src1, src2); \
3345 } else { \
3346 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); \
3347 FN<double>(vform, dst, src1, src2); \
3348 } \
3349 return dst; \
3350 }
3351NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
3352#undef DEFINE_NEON_FP_VECTOR_OP
3353
3354LogicVRegister Simulator::fnmul(VectorFormat vform, LogicVRegister dst,
3355 const LogicVRegister& src1,
3356 const LogicVRegister& src2) {
3357 SimVRegister temp;
3358 LogicVRegister product = fmul(vform, temp, src1, src2);
3359 return fneg(vform, dst, product);
3360}
3361
3362template <typename T>
3363LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst,
3364 const LogicVRegister& src1,
3365 const LogicVRegister& src2) {
3366 dst.ClearForWrite(vform);
3367 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3368 T op1 = -src1.Float<T>(i);
3369 T op2 = src2.Float<T>(i);
3370 T result = FPProcessNaNs(op1, op2);
3371 dst.SetFloat(i, isnan(result) ? result : FPRecipStepFused(op1, op2));
3372 }
3373 return dst;
3374}
3375
3376LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst,
3377 const LogicVRegister& src1,
3378 const LogicVRegister& src2) {
3379 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3380 frecps<half>(vform, dst, src1, src2);
3381 } else if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3382 frecps<float>(vform, dst, src1, src2);
3383 } else {
3385 frecps<double>(vform, dst, src1, src2);
3386 }
3387 return dst;
3388}
3389
3390template <typename T>
3391LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst,
3392 const LogicVRegister& src1,
3393 const LogicVRegister& src2) {
3394 dst.ClearForWrite(vform);
3395 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3396 T op1 = -src1.Float<T>(i);
3397 T op2 = src2.Float<T>(i);
3398 T result = FPProcessNaNs(op1, op2);
3399 dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2));
3400 }
3401 return dst;
3402}
3403
3404int32_t Simulator::FPToFixedJS(double value) {
3405 // The Z-flag is set when the conversion from double precision floating-point
3406 // to 32-bit integer is exact. If the source value is +/-Infinity, -0.0, NaN,
3407 // outside the bounds of a 32-bit integer, or isn't an exact integer then the
3408 // Z-flag is unset.
3409 int Z = 1;
3411 if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
3412 (value == kFP64NegativeInfinity)) {
3413 // +/- zero and infinity all return zero, however -0 and +/- Infinity also
3414 // unset the Z-flag.
3415 result = 0.0;
3416 if ((value != 0.0) || std::signbit(value)) {
3417 Z = 0;
3418 }
3419 } else if (std::isnan(value)) {
3420 // NaN values unset the Z-flag and set the result to 0.
3421 result = 0;
3422 Z = 0;
3423 } else {
3424 // All other values are converted to an integer representation, rounded
3425 // toward zero.
3426 double int_result = std::floor(value);
3427 double error = value - int_result;
3428 if ((error != 0.0) && (int_result < 0.0)) {
3429 int_result++;
3430 }
3431 // Constrain the value into the range [INT32_MIN, INT32_MAX]. We can almost
3432 // write a one-liner with std::round, but the behaviour on ties is incorrect
3433 // for our purposes.
3434 double mod_const = static_cast<double>(UINT64_C(1) << 32);
3435 double mod_error =
3436 (int_result / mod_const) - std::floor(int_result / mod_const);
3437 double constrained;
3438 if (mod_error == 0.5) {
3439 constrained = INT32_MIN;
3440 } else {
3441 constrained = int_result - mod_const * round(int_result / mod_const);
3442 }
3443 DCHECK(std::floor(constrained) == constrained);
3444 DCHECK(constrained >= INT32_MIN);
3445 DCHECK(constrained <= INT32_MAX);
3446 // Take the bottom 32 bits of the result as a 32-bit integer.
3447 result = static_cast<int32_t>(constrained);
3448 if ((int_result < INT32_MIN) || (int_result > INT32_MAX) ||
3449 (error != 0.0)) {
3450 // If the integer result is out of range or the conversion isn't exact,
3451 // take exception and unset the Z-flag.
3452 FPProcessException();
3453 Z = 0;
3454 }
3455 }
3456 nzcv().SetN(0);
3457 nzcv().SetZ(Z);
3458 nzcv().SetC(0);
3459 nzcv().SetV(0);
3460 return result;
3461}
3462
3463LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst,
3464 const LogicVRegister& src1,
3465 const LogicVRegister& src2) {
3466 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3467 frsqrts<half>(vform, dst, src1, src2);
3468 } else if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3469 frsqrts<float>(vform, dst, src1, src2);
3470 } else {
3472 frsqrts<double>(vform, dst, src1, src2);
3473 }
3474 return dst;
3475}
3476
3477template <typename T>
3478LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst,
3479 const LogicVRegister& src1,
3480 const LogicVRegister& src2, Condition cond) {
3481 dst.ClearForWrite(vform);
3482 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3483 bool result = false;
3484 T op1 = src1.Float<T>(i);
3485 T op2 = src2.Float<T>(i);
3486 T nan_result = FPProcessNaNs(op1, op2);
3487 if (!std::isnan(nan_result)) {
3488 switch (cond) {
3489 case eq:
3490 result = (op1 == op2);
3491 break;
3492 case ge:
3493 result = (op1 >= op2);
3494 break;
3495 case gt:
3496 result = (op1 > op2);
3497 break;
3498 case le:
3499 result = (op1 <= op2);
3500 break;
3501 case lt:
3502 result = (op1 < op2);
3503 break;
3504 default:
3505 UNREACHABLE();
3506 }
3507 }
3508 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
3509 }
3510 return dst;
3511}
3512
3513LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst,
3514 const LogicVRegister& src1,
3515 const LogicVRegister& src2, Condition cond) {
3516 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3517 fcmp<half>(vform, dst, src1, src2, cond);
3518 } else if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3519 fcmp<float>(vform, dst, src1, src2, cond);
3520 } else {
3522 fcmp<double>(vform, dst, src1, src2, cond);
3523 }
3524 return dst;
3525}
3526
3527LogicVRegister Simulator::fcmp_zero(VectorFormat vform, LogicVRegister dst,
3528 const LogicVRegister& src, Condition cond) {
3529 SimVRegister temp;
3530 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3531 LogicVRegister zero_reg = dup_immediate(vform, temp, uint16_t{0});
3532 fcmp<half>(vform, dst, src, zero_reg, cond);
3533 } else if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3534 LogicVRegister zero_reg = dup_immediate(vform, temp, uint32_t{0});
3535 fcmp<float>(vform, dst, src, zero_reg, cond);
3536 } else {
3538 LogicVRegister zero_reg = dup_immediate(vform, temp, uint64_t{0});
3539 fcmp<double>(vform, dst, src, zero_reg, cond);
3540 }
3541 return dst;
3542}
3543
3544LogicVRegister Simulator::fabscmp(VectorFormat vform, LogicVRegister dst,
3545 const LogicVRegister& src1,
3546 const LogicVRegister& src2, Condition cond) {
3547 SimVRegister temp1, temp2;
3548 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3549 LogicVRegister abs_src1 = fabs_<half>(vform, temp1, src1);
3550 LogicVRegister abs_src2 = fabs_<half>(vform, temp2, src2);
3551 fcmp<half>(vform, dst, abs_src1, abs_src2, cond);
3552 } else if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3553 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
3554 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
3555 fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
3556 } else {
3558 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
3559 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
3560 fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
3561 }
3562 return dst;
3563}
3564
3565template <typename T>
3566LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
3567 const LogicVRegister& src1,
3568 const LogicVRegister& src2) {
3569 dst.ClearForWrite(vform);
3570 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3571 T op1 = src1.Float<T>(i);
3572 T op2 = src2.Float<T>(i);
3573 T acc = dst.Float<T>(i);
3574 T result = FPMulAdd(acc, op1, op2);
3575 dst.SetFloat(i, result);
3576 }
3577 return dst;
3578}
3579
3580LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
3581 const LogicVRegister& src1,
3582 const LogicVRegister& src2) {
3583 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3584 fmla<half>(vform, dst, src1, src2);
3585 } else if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3586 fmla<float>(vform, dst, src1, src2);
3587 } else {
3589 fmla<double>(vform, dst, src1, src2);
3590 }
3591 return dst;
3592}
3593
3594template <typename T>
3595LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
3596 const LogicVRegister& src1,
3597 const LogicVRegister& src2) {
3598 dst.ClearForWrite(vform);
3599 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3600 T op1 = -src1.Float<T>(i);
3601 T op2 = src2.Float<T>(i);
3602 T acc = dst.Float<T>(i);
3603 T result = FPMulAdd(acc, op1, op2);
3604 dst.SetFloat(i, result);
3605 }
3606 return dst;
3607}
3608
3609LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
3610 const LogicVRegister& src1,
3611 const LogicVRegister& src2) {
3612 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3613 fmls<half>(vform, dst, src1, src2);
3614 } else if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3615 fmls<float>(vform, dst, src1, src2);
3616 } else {
3618 fmls<double>(vform, dst, src1, src2);
3619 }
3620 return dst;
3621}
3622
3623template <typename T>
3624LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst,
3625 const LogicVRegister& src) {
3626 dst.ClearForWrite(vform);
3627 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3628 T op = src.Float<T>(i);
3629 op = -op;
3630 dst.SetFloat(i, op);
3631 }
3632 return dst;
3633}
3634
3635LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst,
3636 const LogicVRegister& src) {
3637 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3638 fneg<half>(vform, dst, src);
3639 } else if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3640 fneg<float>(vform, dst, src);
3641 } else {
3643 fneg<double>(vform, dst, src);
3644 }
3645 return dst;
3646}
3647
3648template <typename T>
3649LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst,
3650 const LogicVRegister& src) {
3651 dst.ClearForWrite(vform);
3652 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3653 T op = src.Float<T>(i);
3654 if (copysign(1.0, op) < 0.0) {
3655 op = -op;
3656 }
3657 dst.SetFloat(i, op);
3658 }
3659 return dst;
3660}
3661
3662LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst,
3663 const LogicVRegister& src) {
3664 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3665 fabs_<half>(vform, dst, src);
3666 } else if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3667 fabs_<float>(vform, dst, src);
3668 } else {
3670 fabs_<double>(vform, dst, src);
3671 }
3672 return dst;
3673}
3674
3675LogicVRegister Simulator::fabd(VectorFormat vform, LogicVRegister dst,
3676 const LogicVRegister& src1,
3677 const LogicVRegister& src2) {
3678 SimVRegister temp;
3679 fsub(vform, temp, src1, src2);
3680 fabs_(vform, dst, temp);
3681 return dst;
3682}
3683
3684LogicVRegister Simulator::fsqrt(VectorFormat vform, LogicVRegister dst,
3685 const LogicVRegister& src) {
3686 dst.ClearForWrite(vform);
3687 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3688 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3689 half result = FPSqrt(src.Float<half>(i));
3690 dst.SetFloat(i, result);
3691 }
3692 } else if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3693 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3694 float result = FPSqrt(src.Float<float>(i));
3695 dst.SetFloat(i, result);
3696 }
3697 } else {
3699 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3700 double result = FPSqrt(src.Float<double>(i));
3701 dst.SetFloat(i, result);
3702 }
3703 }
3704 return dst;
3705}
3706
3707#define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \
3708 LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \
3709 const LogicVRegister& src1, \
3710 const LogicVRegister& src2) { \
3711 SimVRegister temp1, temp2; \
3712 uzp1(vform, temp1, src1, src2); \
3713 uzp2(vform, temp2, src1, src2); \
3714 FN(vform, dst, temp1, temp2); \
3715 return dst; \
3716 } \
3717 \
3718 LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \
3719 const LogicVRegister& src) { \
3720 if (vform == kFormatS) { \
3721 float result = OP(src.Float<float>(0), src.Float<float>(1)); \
3722 dst.SetFloat(0, result); \
3723 } else { \
3724 DCHECK_EQ(vform, kFormatD); \
3725 double result = OP(src.Float<double>(0), src.Float<double>(1)); \
3726 dst.SetFloat(0, result); \
3727 } \
3728 dst.ClearForWrite(vform); \
3729 return dst; \
3730 }
3731NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
3732#undef DEFINE_NEON_FP_PAIR_OP
3733
3734LogicVRegister Simulator::FMinMaxV(VectorFormat vform, LogicVRegister dst,
3735 const LogicVRegister& src, FPMinMaxOp Op) {
3736 DCHECK_EQ(vform, kFormat4S);
3737 USE(vform);
3738 float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1));
3739 float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3));
3740 float result = (this->*Op)(result1, result2);
3741 dst.ClearForWrite(kFormatS);
3742 dst.SetFloat<float>(0, result);
3743 return dst;
3744}
3745
3746LogicVRegister Simulator::fmaxv(VectorFormat vform, LogicVRegister dst,
3747 const LogicVRegister& src) {
3748 return FMinMaxV(vform, dst, src, &Simulator::FPMax);
3749}
3750
3751LogicVRegister Simulator::fminv(VectorFormat vform, LogicVRegister dst,
3752 const LogicVRegister& src) {
3753 return FMinMaxV(vform, dst, src, &Simulator::FPMin);
3754}
3755
3756LogicVRegister Simulator::fmaxnmv(VectorFormat vform, LogicVRegister dst,
3757 const LogicVRegister& src) {
3758 return FMinMaxV(vform, dst, src, &Simulator::FPMaxNM);
3759}
3760
3761LogicVRegister Simulator::fminnmv(VectorFormat vform, LogicVRegister dst,
3762 const LogicVRegister& src) {
3763 return FMinMaxV(vform, dst, src, &Simulator::FPMinNM);
3764}
3765
3766LogicVRegister Simulator::fmul(VectorFormat vform, LogicVRegister dst,
3767 const LogicVRegister& src1,
3768 const LogicVRegister& src2, int index) {
3769 dst.ClearForWrite(vform);
3770 SimVRegister temp;
3771 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3772 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
3773 fmul<half>(vform, dst, src1, index_reg);
3774 } else if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3775 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3776 fmul<float>(vform, dst, src1, index_reg);
3777 } else {
3779 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3780 fmul<double>(vform, dst, src1, index_reg);
3781 }
3782 return dst;
3783}
3784
3785LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
3786 const LogicVRegister& src1,
3787 const LogicVRegister& src2, int index) {
3788 dst.ClearForWrite(vform);
3789 SimVRegister temp;
3790 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3791 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
3792 fmla<half>(vform, dst, src1, index_reg);
3793 } else if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3794 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3795 fmla<float>(vform, dst, src1, index_reg);
3796 } else {
3798 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3799 fmla<double>(vform, dst, src1, index_reg);
3800 }
3801 return dst;
3802}
3803
3804LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
3805 const LogicVRegister& src1,
3806 const LogicVRegister& src2, int index) {
3807 dst.ClearForWrite(vform);
3808 SimVRegister temp;
3809 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3810 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
3811 fmls<half>(vform, dst, src1, index_reg);
3812 } else if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3813 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3814 fmls<float>(vform, dst, src1, index_reg);
3815 } else {
3817 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3818 fmls<double>(vform, dst, src1, index_reg);
3819 }
3820 return dst;
3821}
3822
3823LogicVRegister Simulator::fmulx(VectorFormat vform, LogicVRegister dst,
3824 const LogicVRegister& src1,
3825 const LogicVRegister& src2, int index) {
3826 dst.ClearForWrite(vform);
3827 SimVRegister temp;
3828 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3829 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
3830 fmulx<half>(vform, dst, src1, index_reg);
3831 } else if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3832 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3833 fmulx<float>(vform, dst, src1, index_reg);
3834 } else {
3836 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3837 fmulx<double>(vform, dst, src1, index_reg);
3838 }
3839 return dst;
3840}
3841
3842LogicVRegister Simulator::frint(VectorFormat vform, LogicVRegister dst,
3843 const LogicVRegister& src,
3845 bool inexact_exception) {
3846 dst.ClearForWrite(vform);
3847 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3848 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3849 half input = src.Float<half>(i);
3850 half rounded = FPRoundInt(input, rounding_mode);
3851 if (inexact_exception && !isnan(input) && (input != rounded)) {
3852 FPProcessException();
3853 }
3854 dst.SetFloat<half>(i, rounded);
3855 }
3856 } else if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3857 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3858 float input = src.Float<float>(i);
3859 float rounded = FPRoundInt(input, rounding_mode);
3860 if (inexact_exception && !std::isnan(input) && (input != rounded)) {
3861 FPProcessException();
3862 }
3863 dst.SetFloat<float>(i, rounded);
3864 }
3865 } else {
3867 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3868 double input = src.Float<double>(i);
3869 double rounded = FPRoundInt(input, rounding_mode);
3870 if (inexact_exception && !std::isnan(input) && (input != rounded)) {
3871 FPProcessException();
3872 }
3873 dst.SetFloat<double>(i, rounded);
3874 }
3875 }
3876 return dst;
3877}
3878
3879LogicVRegister Simulator::fcvts(VectorFormat vform, LogicVRegister dst,
3880 const LogicVRegister& src,
3881 FPRounding rounding_mode, int fbits) {
3882 dst.ClearForWrite(vform);
3883 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3884 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3885 half op = src.Float<half>(i) * std::pow(2, fbits);
3886 dst.SetInt(vform, i, FPToInt16(op, rounding_mode));
3887 }
3888 } else if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3889 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3890 float op = src.Float<float>(i) * std::pow(2.0f, fbits);
3891 dst.SetInt(vform, i, FPToInt32(op, rounding_mode));
3892 }
3893 } else {
3895 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3896 double op = src.Float<double>(i) * std::pow(2.0, fbits);
3897 dst.SetInt(vform, i, FPToInt64(op, rounding_mode));
3898 }
3899 }
3900 return dst;
3901}
3902
3903LogicVRegister Simulator::fcvtu(VectorFormat vform, LogicVRegister dst,
3904 const LogicVRegister& src,
3905 FPRounding rounding_mode, int fbits) {
3906 dst.ClearForWrite(vform);
3907 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3908 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3909 half op = src.Float<half>(i) * std::pow(2.0f, fbits);
3910 dst.SetUint(vform, i, FPToUInt16(op, rounding_mode));
3911 }
3912 } else if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3913 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3914 float op = src.Float<float>(i) * std::pow(2.0f, fbits);
3915 dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));
3916 }
3917 } else {
3919 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3920 double op = src.Float<double>(i) * std::pow(2.0, fbits);
3921 dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));
3922 }
3923 }
3924 return dst;
3925}
3926
3927LogicVRegister Simulator::fcvtl(VectorFormat vform, LogicVRegister dst,
3928 const LogicVRegister& src) {
3929 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3930 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
3931 dst.SetFloat(i, FPToFloat(src.Float<float16>(i)));
3932 }
3933 } else {
3935 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
3936 dst.SetFloat(i, FPToDouble(src.Float<float>(i)));
3937 }
3938 }
3939 return dst;
3940}
3941
3942LogicVRegister Simulator::fcvtl2(VectorFormat vform, LogicVRegister dst,
3943 const LogicVRegister& src) {
3944 int lane_count = LaneCountFromFormat(vform);
3945 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3946 for (int i = 0; i < lane_count; i++) {
3947 dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count)));
3948 }
3949 } else {
3951 for (int i = 0; i < lane_count; i++) {
3952 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count)));
3953 }
3954 }
3955 return dst;
3956}
3957
3958LogicVRegister Simulator::fcvtn(VectorFormat vform, LogicVRegister dst,
3959 const LogicVRegister& src) {
3960 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3961 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3962 dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven));
3963 }
3964 } else {
3966 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3967 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven));
3968 }
3969 }
3970 dst.ClearForWrite(vform);
3971 return dst;
3972}
3973
3974LogicVRegister Simulator::fcvtn2(VectorFormat vform, LogicVRegister dst,
3975 const LogicVRegister& src) {
3976 int lane_count = LaneCountFromFormat(vform) / 2;
3977 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3978 for (int i = lane_count - 1; i >= 0; i--) {
3979 dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven));
3980 }
3981 } else {
3983 for (int i = lane_count - 1; i >= 0; i--) {
3984 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven));
3985 }
3986 }
3987 return dst;
3988}
3989
3990LogicVRegister Simulator::fcvtxn(VectorFormat vform, LogicVRegister dst,
3991 const LogicVRegister& src) {
3992 dst.ClearForWrite(vform);
3994 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3995 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd));
3996 }
3997 return dst;
3998}
3999
4000LogicVRegister Simulator::fcvtxn2(VectorFormat vform, LogicVRegister dst,
4001 const LogicVRegister& src) {
4003 int lane_count = LaneCountFromFormat(vform) / 2;
4004 for (int i = lane_count - 1; i >= 0; i--) {
4005 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd));
4006 }
4007 return dst;
4008}
4009
4010// Based on reference C function recip_sqrt_estimate from ARM ARM.
4011double Simulator::recip_sqrt_estimate(double a) {
4012 int q0, q1, s;
4013 double r;
4014 if (a < 0.5) {
4015 q0 = static_cast<int>(a * 512.0);
4016 r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
4017 } else {
4018 q1 = static_cast<int>(a * 256.0);
4019 r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
4020 }
4021 s = static_cast<int>(256.0 * r + 0.5);
4022 return static_cast<double>(s) / 256.0;
4023}
4024
4025namespace {
4026
4027inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
4028 return unsigned_bitextract_64(start_bit, end_bit, val);
4029}
4030
4031} // anonymous namespace
4032
4033template <typename T>
4034T Simulator::FPRecipSqrtEstimate(T op) {
4035 static_assert(std::is_same_v<float, T> || std::is_same_v<double, T>,
4036 "T must be a float or double");
4037
4038 if (std::isnan(op)) {
4039 return FPProcessNaN(op);
4040 } else if (op == 0.0) {
4041 if (copysign(1.0, op) < 0.0) {
4042 return kFP64NegativeInfinity;
4043 } else {
4044 return kFP64PositiveInfinity;
4045 }
4046 } else if (copysign(1.0, op) < 0.0) {
4047 FPProcessException();
4048 return FPDefaultNaN<T>();
4049 } else if (std::isinf(op)) {
4050 return 0.0;
4051 } else {
4052 uint64_t fraction;
4053 int32_t exp, result_exp;
4054
4055 if (sizeof(T) == sizeof(float)) {
4056 exp = static_cast<int32_t>(float_exp(op));
4057 fraction = float_mantissa(op);
4058 fraction <<= 29;
4059 } else {
4060 exp = static_cast<int32_t>(double_exp(op));
4061 fraction = double_mantissa(op);
4062 }
4063
4064 if (exp == 0) {
4065 while (Bits(fraction, 51, 51) == 0) {
4066 fraction = Bits(fraction, 50, 0) << 1;
4067 exp -= 1;
4068 }
4069 fraction = Bits(fraction, 50, 0) << 1;
4070 }
4071
4072 double scaled;
4073 if (Bits(exp, 0, 0) == 0) {
4074 scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
4075 } else {
4076 scaled = double_pack(0, 1021, Bits(fraction, 51, 44) << 44);
4077 }
4078
4079 if (sizeof(T) == sizeof(float)) {
4080 result_exp = (380 - exp) / 2;
4081 } else {
4082 result_exp = (3068 - exp) / 2;
4083 }
4084
4085 uint64_t estimate = base::bit_cast<uint64_t>(recip_sqrt_estimate(scaled));
4086
4087 if (sizeof(T) == sizeof(float)) {
4088 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
4089 uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
4090 return float_pack(0, exp_bits, est_bits);
4091 } else {
4092 return double_pack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
4093 }
4094 }
4095}
4096
4097LogicVRegister Simulator::frsqrte(VectorFormat vform, LogicVRegister dst,
4098 const LogicVRegister& src) {
4099 dst.ClearForWrite(vform);
4100 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
4101 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4102 half input = src.Float<half>(i);
4103 dst.SetFloat<half>(i, FPRecipSqrtEstimate<float>(input));
4104 }
4105 } else if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4106 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4107 float input = src.Float<float>(i);
4108 dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));
4109 }
4110 } else {
4112 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4113 double input = src.Float<double>(i);
4114 dst.SetFloat(i, FPRecipSqrtEstimate<double>(input));
4115 }
4116 }
4117 return dst;
4118}
4119
4120template <typename T>
4121T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
4122 static_assert(std::is_same_v<float, T> || std::is_same_v<double, T>,
4123 "T must be a float or double");
4124 uint32_t sign;
4125
4126 if (sizeof(T) == sizeof(float)) {
4127 sign = float_sign(op);
4128 } else {
4129 sign = double_sign(op);
4130 }
4131
4132 if (std::isnan(op)) {
4133 return FPProcessNaN(op);
4134 } else if (std::isinf(op)) {
4135 return (sign == 1) ? -0.0 : 0.0;
4136 } else if (op == 0.0) {
4137 FPProcessException(); // FPExc_DivideByZero exception.
4139 } else if (((sizeof(T) == sizeof(float)) &&
4140 (std::fabs(op) < std::pow(2.0, -128.0))) ||
4141 ((sizeof(T) == sizeof(double)) &&
4142 (std::fabs(op) < std::pow(2.0, -1024.0)))) {
4143 bool overflow_to_inf = false;
4144 switch (rounding) {
4145 case FPTieEven:
4146 overflow_to_inf = true;
4147 break;
4148 case FPPositiveInfinity:
4149 overflow_to_inf = (sign == 0);
4150 break;
4151 case FPNegativeInfinity:
4152 overflow_to_inf = (sign == 1);
4153 break;
4154 case FPZero:
4155 overflow_to_inf = false;
4156 break;
4157 default:
4158 break;
4159 }
4160 FPProcessException(); // FPExc_Overflow and FPExc_Inexact.
4161 if (overflow_to_inf) {
4163 } else {
4164 // Return FPMaxNormal(sign).
4165 if (sizeof(T) == sizeof(float)) {
4166 return float_pack(sign, 0xFE, 0x07FFFFF);
4167 } else {
4168 return double_pack(sign, 0x7FE, 0x0FFFFFFFFFFFFFl);
4169 }
4170 }
4171 } else {
4172 uint64_t fraction;
4173 int32_t exp, result_exp;
4174 uint32_t sign;
4175
4176 if (sizeof(T) == sizeof(float)) {
4177 sign = float_sign(op);
4178 exp = static_cast<int32_t>(float_exp(op));
4179 fraction = float_mantissa(op);
4180 fraction <<= 29;
4181 } else {
4182 sign = double_sign(op);
4183 exp = static_cast<int32_t>(double_exp(op));
4184 fraction = double_mantissa(op);
4185 }
4186
4187 if (exp == 0) {
4188 if (Bits(fraction, 51, 51) == 0) {
4189 exp -= 1;
4190 fraction = Bits(fraction, 49, 0) << 2;
4191 } else {
4192 fraction = Bits(fraction, 50, 0) << 1;
4193 }
4194 }
4195
4196 double scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
4197
4198 if (sizeof(T) == sizeof(float)) {
4199 result_exp = 253 - exp;
4200 } else {
4201 result_exp = 2045 - exp;
4202 }
4203
4204 double estimate = recip_estimate(scaled);
4205
4206 fraction = double_mantissa(estimate);
4207 if (result_exp == 0) {
4208 fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
4209 } else if (result_exp == -1) {
4210 fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
4211 result_exp = 0;
4212 }
4213 if (sizeof(T) == sizeof(float)) {
4214 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
4215 uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
4216 return float_pack(sign, exp_bits, frac_bits);
4217 } else {
4218 return double_pack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
4219 }
4220 }
4221}
4222
4223LogicVRegister Simulator::frecpe(VectorFormat vform, LogicVRegister dst,
4224 const LogicVRegister& src, FPRounding round) {
4225 dst.ClearForWrite(vform);
4226 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
4227 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4228 half input = src.Float<half>(i);
4229 dst.SetFloat<half>(i, FPRecipEstimate<float>(input, round));
4230 }
4231 } else if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4232 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4233 float input = src.Float<float>(i);
4234 dst.SetFloat(i, FPRecipEstimate<float>(input, round));
4235 }
4236 } else {
4238 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4239 double input = src.Float<double>(i);
4240 dst.SetFloat(i, FPRecipEstimate<double>(input, round));
4241 }
4242 }
4243 return dst;
4244}
4245
4246LogicVRegister Simulator::ursqrte(VectorFormat vform, LogicVRegister dst,
4247 const LogicVRegister& src) {
4248 dst.ClearForWrite(vform);
4249 uint64_t operand;
4250 uint32_t result;
4251 double dp_operand, dp_result;
4252 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4253 operand = src.Uint(vform, i);
4254 if (operand <= 0x3FFFFFFF) {
4255 result = 0xFFFFFFFF;
4256 } else {
4257 dp_operand = operand * std::pow(2.0, -32);
4258 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
4259 result = static_cast<uint32_t>(dp_result);
4260 }
4261 dst.SetUint(vform, i, result);
4262 }
4263 return dst;
4264}
4265
4266// Based on reference C function recip_estimate from ARM ARM.
4267double Simulator::recip_estimate(double a) {
4268 int q, s;
4269 double r;
4270 q = static_cast<int>(a * 512.0);
4271 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
4272 s = static_cast<int>(256.0 * r + 0.5);
4273 return static_cast<double>(s) / 256.0;
4274}
4275
4276LogicVRegister Simulator::urecpe(VectorFormat vform, LogicVRegister dst,
4277 const LogicVRegister& src) {
4278 dst.ClearForWrite(vform);
4279 uint64_t operand;
4280 uint32_t result;
4281 double dp_operand, dp_result;
4282 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4283 operand = src.Uint(vform, i);
4284 if (operand <= 0x7FFFFFFF) {
4285 result = 0xFFFFFFFF;
4286 } else {
4287 dp_operand = operand * std::pow(2.0, -32);
4288 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
4289 result = static_cast<uint32_t>(dp_result);
4290 }
4291 dst.SetUint(vform, i, result);
4292 }
4293 return dst;
4294}
4295
4296template <typename T>
4297LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst,
4298 const LogicVRegister& src) {
4299 dst.ClearForWrite(vform);
4300 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4301 T op = src.Float<T>(i);
4302 T result;
4303 if (std::isnan(op)) {
4304 result = FPProcessNaN(op);
4305 } else {
4306 int exp;
4307 uint32_t sign;
4308 if (sizeof(T) == sizeof(float)) {
4309 sign = float_sign(op);
4310 exp = static_cast<int>(float_exp(op));
4311 exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
4312 result = float_pack(sign, exp, 0);
4313 } else {
4314 DCHECK_EQ(sizeof(T), sizeof(double));
4315 sign = double_sign(op);
4316 exp = static_cast<int>(double_exp(op));
4317 exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
4318 result = double_pack(sign, exp, 0);
4319 }
4320 }
4321 dst.SetFloat(i, result);
4322 }
4323 return dst;
4324}
4325
4326LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst,
4327 const LogicVRegister& src) {
4328 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4329 frecpx<float>(vform, dst, src);
4330 } else {
4332 frecpx<double>(vform, dst, src);
4333 }
4334 return dst;
4335}
4336
4337LogicVRegister Simulator::scvtf(VectorFormat vform, LogicVRegister dst,
4338 const LogicVRegister& src, int fbits,
4339 FPRounding round) {
4340 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4341 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
4342 float16 result = FixedToFloat16(src.Int(kFormatH, i), fbits, round);
4343 dst.SetFloat<float16>(i, result);
4344 } else if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4345 float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);
4346 dst.SetFloat<float>(i, result);
4347 } else {
4349 double result = FixedToDouble(src.Int(kFormatD, i), fbits, round);
4350 dst.SetFloat<double>(i, result);
4351 }
4352 }
4353 return dst;
4354}
4355
4356LogicVRegister Simulator::ucvtf(VectorFormat vform, LogicVRegister dst,
4357 const LogicVRegister& src, int fbits,
4358 FPRounding round) {
4359 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4360 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
4361 float16 result = UFixedToFloat16(src.Uint(kFormatH, i), fbits, round);
4362 dst.SetFloat<float16>(i, result);
4363 } else if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4364 float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);
4365 dst.SetFloat<float>(i, result);
4366 } else {
4368 double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round);
4369 dst.SetFloat<double>(i, result);
4370 }
4371 }
4372 return dst;
4373}
4374
4375LogicVRegister Simulator::dot(VectorFormat vform, LogicVRegister dst,
4376 const LogicVRegister& src1,
4377 const LogicVRegister& src2, bool is_src1_signed,
4378 bool is_src2_signed) {
4379 VectorFormat quarter_vform =
4381
4382 dst.ClearForWrite(vform);
4383 for (int e = 0; e < LaneCountFromFormat(vform); e++) {
4384 uint64_t result = 0;
4385 int64_t element1, element2;
4386 for (int i = 0; i < 4; i++) {
4387 int index = 4 * e + i;
4388 if (is_src1_signed) {
4389 element1 = src1.Int(quarter_vform, index);
4390 } else {
4391 element1 = src1.Uint(quarter_vform, index);
4392 }
4393 if (is_src2_signed) {
4394 element2 = src2.Int(quarter_vform, index);
4395 } else {
4396 element2 = src2.Uint(quarter_vform, index);
4397 }
4398 result += element1 * element2;
4399 }
4400 dst.SetUint(vform, e, result + dst.Uint(vform, e));
4401 }
4402 return dst;
4403}
4404
4405LogicVRegister Simulator::sdot(VectorFormat vform, LogicVRegister dst,
4406 const LogicVRegister& src1,
4407 const LogicVRegister& src2) {
4408 return dot(vform, dst, src1, src2, true, true);
4409}
4410
4411} // namespace internal
4412} // namespace v8
4413
4414#endif // USE_SIMULATOR
int32_t offset
std::optional< TNode< JSArray > > a
std::vector< PatternMap > pairs
RoundingMode rounding_mode
ZoneVector< RpoNumber > & result
uint32_t const mask
int s
Definition mul-fft.cc:297
int r
Definition mul-fft.cc:298
int int32_t
Definition unicode.cc:40
unsigned short uint16_t
Definition unicode.cc:39
signed short int16_t
Definition unicode.cc:38
double exp(double x)
Definition ieee754.cc:1447
V8_INLINE Dest bit_cast(Source const &source)
Definition macros.h:95
int CountLeadingSignBits(int64_t value, int width)
VectorFormat ScalarFormatFromLaneSize(int lanesize)
constexpr unsigned kDoubleExponentBias
VectorFormat VectorFormatHalfLanes(VectorFormat vform)
bool IsSignallingNaN(double num)
Definition utils-arm64.h:82
constexpr unsigned kFloat16ExponentBits
V8_EXPORT_PRIVATE int LaneCountFromFormat(VectorFormat vform)
V8_EXPORT_PRIVATE const float kFP32PositiveInfinity
constexpr unsigned kFloatMantissaBits
const float16 kFP16NegativeInfinity
constexpr int kSRegSize
uint32_t float_sign(float val)
constexpr int kHRegSize
constexpr unsigned kDoubleMantissaBits
int64_t MinIntFromFormat(VectorFormat vform)
V8_EXPORT_PRIVATE const double kFP64DefaultNaN
const float16 kFP16PositiveInfinity
V8_EXPORT_PRIVATE const float kFP32DefaultNaN
uint64_t MaxUintFromFormat(VectorFormat vform)
V8_EXPORT_PRIVATE const double kFP64PositiveInfinity
constexpr int kMaxLanesPerVector
constexpr unsigned kFloatExponentBias
double ToQuietNaN(double num)
constexpr int64_t kHQuietNanMask
const float16 kFP16DefaultNaN
bool IsQuietNaN(T num)
constexpr unsigned kFloatExponentBits
double FusedMultiplyAdd(double op1, double op2, double a)
int64_t MaxIntFromFormat(VectorFormat vform)
uint32_t double_sign(double val)
VectorFormat VectorFormatHalfWidth(VectorFormat vform)
float float_pack(uint32_t sign, uint32_t exp, uint32_t mantissa)
double double_pack(uint64_t sign, uint64_t exp, uint64_t mantissa)
constexpr unsigned kFloat16MantissaBits
int float16classify(float16 value)
unsigned LaneSizeInBitsFromFormat(VectorFormat vform)
uint32_t float_mantissa(float val)
uint32_t double_exp(double val)
uint16_t DoubleToFloat16(double value)
V8_EXPORT_PRIVATE const float kFP32NegativeInfinity
uint32_t float_exp(float val)
return value
Definition map-inl.h:893
uint64_t double_mantissa(double val)
V8_EXPORT_PRIVATE const double kFP64NegativeInfinity
uint64_t unsigned_bitextract_64(int msb, int lsb, uint64_t x)
Definition utils.h:559
VectorFormat VectorFormatDoubleWidth(VectorFormat vform)
constexpr int kDRegSize
static int CountLeadingZeros(uint64_t value, int width)
Definition utils-arm64.h:34
int LaneSizeInBytesFromFormat(VectorFormat vform)
constexpr unsigned kDoubleExponentBits
VectorFormat VectorFormatHalfWidthDoubleLanes(VectorFormat vform)
constexpr unsigned kFloat16ExponentBias
uint32_t unsigned_bitextract_32(int msb, int lsb, uint32_t x)
Definition utils.h:555
bool is_signed(Condition cond)
VectorFormat VectorFormatFillQ(int laneSize)
VectorFormat ScalarFormatFromFormat(VectorFormat vform)
#define DCHECK_LE(v1, v2)
Definition logging.h:490
#define DCHECK_NOT_NULL(val)
Definition logging.h:492
#define DCHECK_GE(v1, v2)
Definition logging.h:488
#define DCHECK(condition)
Definition logging.h:482
#define DCHECK_LT(v1, v2)
Definition logging.h:489
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
#define USE(...)
Definition macros.h:293