7#if defined(USE_SIMULATOR)
12#include "third_party/fp16/src/include/fp16.h"
20 half(
float f) : bits_(fp16_ieee_from_fp32_value(f)) {}
22 explicit half(uint16_t b) : bits_(b) {}
23 operator float()
const {
return fp16_ieee_to_fp32_value(bits_); }
25 uint16_t bits()
const {
return bits_; }
32half Simulator::FPDefaultNaN<half>() {
42 return std::isnormal(f);
46bool isnormal(half f) {
50double copysign(
double a,
double f) {
return std::copysign(a, f); }
51float copysign(
double a,
float f) {
return std::copysign(a, f); }
52half copysign(
double a, half f) {
53 return std::copysign(
static_cast<float>(a), f);
56static_assert(
sizeof(half) ==
sizeof(
uint16_t),
"Half must be 16 bit");
61inline double FPRoundToDouble(int64_t
sign, int64_t exponent, uint64_t mantissa,
63 uint64_t bits = FPRound<uint64_t, kDoubleExponentBits, kDoubleMantissaBits>(
64 sign, exponent, mantissa, round_mode);
69inline float FPRoundToFloat(int64_t
sign, int64_t exponent, uint64_t mantissa,
71 uint32_t bits = FPRound<uint32_t, kFloatExponentBits, kFloatMantissaBits>(
72 sign, exponent, mantissa, round_mode);
77inline float16 FPRoundToFloat16(int64_t
sign, int64_t exponent,
79 return FPRound<float16, kFloat16ExponentBits, kFloat16MantissaBits>(
80 sign, exponent, mantissa, round_mode);
85double Simulator::FixedToDouble(int64_t src,
int fbits,
FPRounding round) {
87 return UFixedToDouble(src, fbits, round);
88 }
else if (src == INT64_MIN) {
89 return -UFixedToDouble(src, fbits, round);
91 return -UFixedToDouble(-src, fbits, round);
95double Simulator::UFixedToDouble(uint64_t src,
int fbits,
FPRounding round) {
105 const int64_t exponent = highest_significant_bit - fbits;
107 return FPRoundToDouble(0, exponent, src, round);
110float Simulator::FixedToFloat(int64_t src,
int fbits,
FPRounding round) {
112 return UFixedToFloat(src, fbits, round);
113 }
else if (src == INT64_MIN) {
114 return -UFixedToFloat(src, fbits, round);
116 return -UFixedToFloat(-src, fbits, round);
120float Simulator::UFixedToFloat(uint64_t src,
int fbits,
FPRounding round) {
130 const int32_t exponent = highest_significant_bit - fbits;
132 return FPRoundToFloat(0, exponent, src, round);
137 return UFixedToFloat16(src, fbits, round);
138 }
else if (src == INT64_MIN) {
139 return -UFixedToFloat16(src, fbits, round);
141 return -UFixedToFloat16(-src, fbits, round);
149 return static_cast<float16>(0);
155 const int16_t exponent = highest_significant_bit - fbits;
157 return FPRoundToFloat16(0, exponent, src, round);
160double Simulator::FPToDouble(
float value) {
161 switch (std::fpclassify(value)) {
164 FPProcessException();
175 uint64_t
sign = raw >> 31;
195 return static_cast<double>(
value);
202float Simulator::FPToFloat(
float16 value) {
203 uint32_t
sign = value >> 15;
212 return (
sign == 0) ? 0.0f : -0.0f;
233 FPProcessException();
275 switch (std::fpclassify(value)) {
278 FPProcessException();
294 return (
sign == 0) ? 0 : 0x8000;
306 return FPRoundToFloat16(
sign, exponent, mantissa, round_mode);
323 switch (std::fpclassify(value)) {
326 FPProcessException();
342 return (
sign == 0) ? 0 : 0x8000;
354 return FPRoundToFloat16(
sign, exponent, mantissa, round_mode);
361float Simulator::FPToFloat(
double value,
FPRounding round_mode) {
366 switch (std::fpclassify(value)) {
369 FPProcessException();
380 uint32_t
sign = raw >> 63;
381 uint32_t exponent = (1 << 8) - 1;
393 return static_cast<float>(
value);
404 if (std::fpclassify(value) == FP_NORMAL) {
408 return FPRoundToFloat(
sign, exponent, mantissa, round_mode);
415void Simulator::ld1(
VectorFormat vform, LogicVRegister dst, uint64_t addr) {
416 dst.ClearForWrite(vform);
418 dst.ReadUintFromMem(vform,
i, addr);
423void Simulator::ld1(
VectorFormat vform, LogicVRegister dst,
int index,
425 dst.ReadUintFromMem(vform, index, addr);
428void Simulator::ld1r(
VectorFormat vform, LogicVRegister dst, uint64_t addr) {
429 dst.ClearForWrite(vform);
431 dst.ReadUintFromMem(vform,
i, addr);
435void Simulator::ld2(
VectorFormat vform, LogicVRegister dst1,
436 LogicVRegister dst2, uint64_t addr1) {
437 dst1.ClearForWrite(vform);
438 dst2.ClearForWrite(vform);
440 uint64_t addr2 = addr1 + esize;
442 dst1.ReadUintFromMem(vform,
i, addr1);
443 dst2.ReadUintFromMem(vform,
i, addr2);
449void Simulator::ld2(
VectorFormat vform, LogicVRegister dst1,
450 LogicVRegister dst2,
int index, uint64_t addr1) {
451 dst1.ClearForWrite(vform);
452 dst2.ClearForWrite(vform);
454 dst1.ReadUintFromMem(vform, index, addr1);
455 dst2.ReadUintFromMem(vform, index, addr2);
458void Simulator::ld2r(
VectorFormat vform, LogicVRegister dst1,
459 LogicVRegister dst2, uint64_t addr) {
460 dst1.ClearForWrite(vform);
461 dst2.ClearForWrite(vform);
464 dst1.ReadUintFromMem(vform,
i, addr);
465 dst2.ReadUintFromMem(vform,
i, addr2);
469void Simulator::ld3(
VectorFormat vform, LogicVRegister dst1,
470 LogicVRegister dst2, LogicVRegister dst3, uint64_t addr1) {
471 dst1.ClearForWrite(vform);
472 dst2.ClearForWrite(vform);
473 dst3.ClearForWrite(vform);
475 uint64_t addr2 = addr1 + esize;
476 uint64_t addr3 = addr2 + esize;
478 dst1.ReadUintFromMem(vform,
i, addr1);
479 dst2.ReadUintFromMem(vform,
i, addr2);
480 dst3.ReadUintFromMem(vform,
i, addr3);
487void Simulator::ld3(
VectorFormat vform, LogicVRegister dst1,
488 LogicVRegister dst2, LogicVRegister dst3,
int index,
490 dst1.ClearForWrite(vform);
491 dst2.ClearForWrite(vform);
492 dst3.ClearForWrite(vform);
495 dst1.ReadUintFromMem(vform, index, addr1);
496 dst2.ReadUintFromMem(vform, index, addr2);
497 dst3.ReadUintFromMem(vform, index, addr3);
500void Simulator::ld3r(
VectorFormat vform, LogicVRegister dst1,
501 LogicVRegister dst2, LogicVRegister dst3, uint64_t addr) {
502 dst1.ClearForWrite(vform);
503 dst2.ClearForWrite(vform);
504 dst3.ClearForWrite(vform);
508 dst1.ReadUintFromMem(vform,
i, addr);
509 dst2.ReadUintFromMem(vform,
i, addr2);
510 dst3.ReadUintFromMem(vform,
i, addr3);
514void Simulator::ld4(
VectorFormat vform, LogicVRegister dst1,
515 LogicVRegister dst2, LogicVRegister dst3,
516 LogicVRegister dst4, uint64_t addr1) {
517 dst1.ClearForWrite(vform);
518 dst2.ClearForWrite(vform);
519 dst3.ClearForWrite(vform);
520 dst4.ClearForWrite(vform);
522 uint64_t addr2 = addr1 + esize;
523 uint64_t addr3 = addr2 + esize;
524 uint64_t addr4 = addr3 + esize;
526 dst1.ReadUintFromMem(vform,
i, addr1);
527 dst2.ReadUintFromMem(vform,
i, addr2);
528 dst3.ReadUintFromMem(vform,
i, addr3);
529 dst4.ReadUintFromMem(vform,
i, addr4);
537void Simulator::ld4(
VectorFormat vform, LogicVRegister dst1,
538 LogicVRegister dst2, LogicVRegister dst3,
539 LogicVRegister dst4,
int index, uint64_t addr1) {
540 dst1.ClearForWrite(vform);
541 dst2.ClearForWrite(vform);
542 dst3.ClearForWrite(vform);
543 dst4.ClearForWrite(vform);
547 dst1.ReadUintFromMem(vform, index, addr1);
548 dst2.ReadUintFromMem(vform, index, addr2);
549 dst3.ReadUintFromMem(vform, index, addr3);
550 dst4.ReadUintFromMem(vform, index, addr4);
553void Simulator::ld4r(
VectorFormat vform, LogicVRegister dst1,
554 LogicVRegister dst2, LogicVRegister dst3,
555 LogicVRegister dst4, uint64_t addr) {
556 dst1.ClearForWrite(vform);
557 dst2.ClearForWrite(vform);
558 dst3.ClearForWrite(vform);
559 dst4.ClearForWrite(vform);
564 dst1.ReadUintFromMem(vform,
i, addr);
565 dst2.ReadUintFromMem(vform,
i, addr2);
566 dst3.ReadUintFromMem(vform,
i, addr3);
567 dst4.ReadUintFromMem(vform,
i, addr4);
571void Simulator::st1(
VectorFormat vform, LogicVRegister src, uint64_t addr) {
573 src.WriteUintToMem(vform,
i, addr);
578void Simulator::st1(
VectorFormat vform, LogicVRegister src,
int index,
580 src.WriteUintToMem(vform, index, addr);
583void Simulator::st2(
VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
586 uint64_t addr2 = addr + esize;
588 dst.WriteUintToMem(vform,
i, addr);
589 dst2.WriteUintToMem(vform,
i, addr2);
595void Simulator::st2(
VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
596 int index, uint64_t addr) {
598 dst.WriteUintToMem(vform, index, addr);
599 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
602void Simulator::st3(
VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
603 LogicVRegister dst3, uint64_t addr) {
605 uint64_t addr2 = addr + esize;
606 uint64_t addr3 = addr2 + esize;
608 dst.WriteUintToMem(vform,
i, addr);
609 dst2.WriteUintToMem(vform,
i, addr2);
610 dst3.WriteUintToMem(vform,
i, addr3);
617void Simulator::st3(
VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
618 LogicVRegister dst3,
int index, uint64_t addr) {
620 dst.WriteUintToMem(vform, index, addr);
621 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
622 dst3.WriteUintToMem(vform, index, addr + 2 * esize);
625void Simulator::st4(
VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
626 LogicVRegister dst3, LogicVRegister dst4, uint64_t addr) {
628 uint64_t addr2 = addr + esize;
629 uint64_t addr3 = addr2 + esize;
630 uint64_t addr4 = addr3 + esize;
632 dst.WriteUintToMem(vform,
i, addr);
633 dst2.WriteUintToMem(vform,
i, addr2);
634 dst3.WriteUintToMem(vform,
i, addr3);
635 dst4.WriteUintToMem(vform,
i, addr4);
643void Simulator::st4(
VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
644 LogicVRegister dst3, LogicVRegister dst4,
int index,
647 dst.WriteUintToMem(vform, index, addr);
648 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
649 dst3.WriteUintToMem(vform, index, addr + 2 * esize);
650 dst4.WriteUintToMem(vform, index, addr + 3 * esize);
653LogicVRegister Simulator::cmp(
VectorFormat vform, LogicVRegister dst,
654 const LogicVRegister& src1,
655 const LogicVRegister& src2,
Condition cond) {
656 dst.ClearForWrite(vform);
659 int64_t sa = src1.Int(vform,
i);
660 int64_t sb = src2.Int(vform,
i);
661 uint64_t ua = src1.Uint(vform,
i);
662 uint64_t ub = src2.Uint(vform,
i);
665 result = (src1.Is(src2) || ua == ub);
668 result = (src1.Is(src2) || sa >= sb);
671 result = (!src1.Is(src2) && sa > sb);
674 result = (!src1.Is(src2) && ua > ub);
677 result = (src1.Is(src2) || ua >= ub);
680 result = (!src1.Is(src2) && sa < sb);
683 result = (src1.Is(src2) || sa <= sb);
693LogicVRegister Simulator::cmp(
VectorFormat vform, LogicVRegister dst,
694 const LogicVRegister& src1,
int imm,
697 LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
698 return cmp(vform, dst, src1, imm_reg, cond);
701LogicVRegister Simulator::cmptst(
VectorFormat vform, LogicVRegister dst,
702 const LogicVRegister& src1,
703 const LogicVRegister& src2) {
704 dst.ClearForWrite(vform);
706 uint64_t ua = src1.Uint(vform,
i);
707 uint64_t ub = src2.Uint(vform,
i);
713LogicVRegister Simulator::add(
VectorFormat vform, LogicVRegister dst,
714 const LogicVRegister& src1,
715 const LogicVRegister& src2) {
717 dst.ClearForWrite(vform);
720 uint64_t ua = src1.UintLeftJustified(vform,
i);
721 uint64_t ub = src2.UintLeftJustified(vform,
i);
722 uint64_t ur = ua + ub;
724 dst.SetUnsignedSat(
i,
true);
728 bool pos_a = (ua >> 63) == 0;
729 bool pos_b = (ub >> 63) == 0;
730 bool pos_r = (ur >> 63) == 0;
733 if ((pos_a == pos_b) && (pos_a != pos_r)) {
734 dst.SetSignedSat(
i, pos_a);
737 dst.SetInt(vform,
i, ur >> (64 - lane_size));
742LogicVRegister Simulator::addp(
VectorFormat vform, LogicVRegister dst,
743 const LogicVRegister& src1,
744 const LogicVRegister& src2) {
745 SimVRegister temp1, temp2;
746 uzp1(vform, temp1, src1, src2);
747 uzp2(vform, temp2, src1, src2);
748 add(vform, dst, temp1, temp2);
752LogicVRegister Simulator::mla(
VectorFormat vform, LogicVRegister dst,
753 const LogicVRegister& src1,
754 const LogicVRegister& src2) {
756 mul(vform, temp, src1, src2);
757 add(vform, dst, dst, temp);
761LogicVRegister Simulator::mls(
VectorFormat vform, LogicVRegister dst,
762 const LogicVRegister& src1,
763 const LogicVRegister& src2) {
765 mul(vform, temp, src1, src2);
766 sub(vform, dst, dst, temp);
770LogicVRegister Simulator::mul(
VectorFormat vform, LogicVRegister dst,
771 const LogicVRegister& src1,
772 const LogicVRegister& src2) {
773 dst.ClearForWrite(vform);
775 dst.SetUint(vform,
i, src1.Uint(vform,
i) * src2.Uint(vform,
i));
780LogicVRegister Simulator::mul(
VectorFormat vform, LogicVRegister dst,
781 const LogicVRegister& src1,
782 const LogicVRegister& src2,
int index) {
785 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
788LogicVRegister Simulator::mla(
VectorFormat vform, LogicVRegister dst,
789 const LogicVRegister& src1,
790 const LogicVRegister& src2,
int index) {
793 return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));
796LogicVRegister Simulator::mls(
VectorFormat vform, LogicVRegister dst,
797 const LogicVRegister& src1,
798 const LogicVRegister& src2,
int index) {
801 return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));
804LogicVRegister Simulator::smull(
VectorFormat vform, LogicVRegister dst,
805 const LogicVRegister& src1,
806 const LogicVRegister& src2,
int index) {
810 return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));
813LogicVRegister Simulator::smull2(
VectorFormat vform, LogicVRegister dst,
814 const LogicVRegister& src1,
815 const LogicVRegister& src2,
int index) {
819 return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
822LogicVRegister Simulator::umull(
VectorFormat vform, LogicVRegister dst,
823 const LogicVRegister& src1,
824 const LogicVRegister& src2,
int index) {
828 return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));
831LogicVRegister Simulator::umull2(
VectorFormat vform, LogicVRegister dst,
832 const LogicVRegister& src1,
833 const LogicVRegister& src2,
int index) {
837 return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
840LogicVRegister Simulator::smlal(
VectorFormat vform, LogicVRegister dst,
841 const LogicVRegister& src1,
842 const LogicVRegister& src2,
int index) {
846 return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
849LogicVRegister Simulator::smlal2(
VectorFormat vform, LogicVRegister dst,
850 const LogicVRegister& src1,
851 const LogicVRegister& src2,
int index) {
855 return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
858LogicVRegister Simulator::umlal(
VectorFormat vform, LogicVRegister dst,
859 const LogicVRegister& src1,
860 const LogicVRegister& src2,
int index) {
864 return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
867LogicVRegister Simulator::umlal2(
VectorFormat vform, LogicVRegister dst,
868 const LogicVRegister& src1,
869 const LogicVRegister& src2,
int index) {
873 return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
876LogicVRegister Simulator::smlsl(
VectorFormat vform, LogicVRegister dst,
877 const LogicVRegister& src1,
878 const LogicVRegister& src2,
int index) {
882 return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
885LogicVRegister Simulator::smlsl2(
VectorFormat vform, LogicVRegister dst,
886 const LogicVRegister& src1,
887 const LogicVRegister& src2,
int index) {
891 return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
894LogicVRegister Simulator::umlsl(
VectorFormat vform, LogicVRegister dst,
895 const LogicVRegister& src1,
896 const LogicVRegister& src2,
int index) {
900 return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
903LogicVRegister Simulator::umlsl2(
VectorFormat vform, LogicVRegister dst,
904 const LogicVRegister& src1,
905 const LogicVRegister& src2,
int index) {
909 return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
912LogicVRegister Simulator::sqdmull(
VectorFormat vform, LogicVRegister dst,
913 const LogicVRegister& src1,
914 const LogicVRegister& src2,
int index) {
918 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
921LogicVRegister Simulator::sqdmull2(
VectorFormat vform, LogicVRegister dst,
922 const LogicVRegister& src1,
923 const LogicVRegister& src2,
int index) {
927 return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
930LogicVRegister Simulator::sqdmlal(
VectorFormat vform, LogicVRegister dst,
931 const LogicVRegister& src1,
932 const LogicVRegister& src2,
int index) {
936 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
939LogicVRegister Simulator::sqdmlal2(
VectorFormat vform, LogicVRegister dst,
940 const LogicVRegister& src1,
941 const LogicVRegister& src2,
int index) {
945 return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
948LogicVRegister Simulator::sqdmlsl(
VectorFormat vform, LogicVRegister dst,
949 const LogicVRegister& src1,
950 const LogicVRegister& src2,
int index) {
954 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
957LogicVRegister Simulator::sqdmlsl2(
VectorFormat vform, LogicVRegister dst,
958 const LogicVRegister& src1,
959 const LogicVRegister& src2,
int index) {
963 return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
966LogicVRegister Simulator::sqdmulh(
VectorFormat vform, LogicVRegister dst,
967 const LogicVRegister& src1,
968 const LogicVRegister& src2,
int index) {
971 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
974LogicVRegister Simulator::sqrdmulh(
VectorFormat vform, LogicVRegister dst,
975 const LogicVRegister& src1,
976 const LogicVRegister& src2,
int index) {
979 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
982uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) {
983 return PolynomialMult128(op1, op2, 8).second;
986LogicVRegister Simulator::pmul(
VectorFormat vform, LogicVRegister dst,
987 const LogicVRegister& src1,
988 const LogicVRegister& src2) {
989 dst.ClearForWrite(vform);
991 dst.SetUint(vform,
i,
992 PolynomialMult(src1.Uint(vform,
i), src2.Uint(vform,
i)));
997LogicVRegister Simulator::pmull(
VectorFormat vform, LogicVRegister dst,
998 const LogicVRegister& src1,
999 const LogicVRegister& src2) {
1001 dst.ClearForWrite(vform);
1007 PolynomialMult128(src1.Uint(vform_src,
i), src2.Uint(vform_src,
i),
1013LogicVRegister Simulator::pmull2(
VectorFormat vform, LogicVRegister dst,
1014 const LogicVRegister& src1,
1015 const LogicVRegister& src2) {
1017 dst.ClearForWrite(vform);
1019 for (
int i = 0;
i < lane_count;
i++) {
1020 dst.SetUint(vform,
i,
1021 PolynomialMult128(src1.Uint(vform_src, lane_count +
i),
1022 src2.Uint(vform_src, lane_count +
i),
1028LogicVRegister Simulator::sub(
VectorFormat vform, LogicVRegister dst,
1029 const LogicVRegister& src1,
1030 const LogicVRegister& src2) {
1032 dst.ClearForWrite(vform);
1035 uint64_t ua = src1.UintLeftJustified(vform,
i);
1036 uint64_t ub = src2.UintLeftJustified(vform,
i);
1037 uint64_t ur = ua - ub;
1039 dst.SetUnsignedSat(
i,
false);
1043 bool pos_a = (ua >> 63) == 0;
1044 bool pos_b = (ub >> 63) == 0;
1045 bool pos_r = (ur >> 63) == 0;
1048 if ((pos_a != pos_b) && (pos_a != pos_r)) {
1049 dst.SetSignedSat(
i, pos_a);
1052 dst.SetInt(vform,
i, ur >> (64 - lane_size));
1057LogicVRegister Simulator::and_(
VectorFormat vform, LogicVRegister dst,
1058 const LogicVRegister& src1,
1059 const LogicVRegister& src2) {
1060 dst.ClearForWrite(vform);
1062 dst.SetUint(vform,
i, src1.Uint(vform,
i) & src2.Uint(vform,
i));
1067LogicVRegister Simulator::orr(
VectorFormat vform, LogicVRegister dst,
1068 const LogicVRegister& src1,
1069 const LogicVRegister& src2) {
1070 dst.ClearForWrite(vform);
1072 dst.SetUint(vform,
i, src1.Uint(vform,
i) | src2.Uint(vform,
i));
1077LogicVRegister Simulator::orn(
VectorFormat vform, LogicVRegister dst,
1078 const LogicVRegister& src1,
1079 const LogicVRegister& src2) {
1080 dst.ClearForWrite(vform);
1082 dst.SetUint(vform,
i, src1.Uint(vform,
i) | ~src2.Uint(vform,
i));
1087LogicVRegister Simulator::eor(
VectorFormat vform, LogicVRegister dst,
1088 const LogicVRegister& src1,
1089 const LogicVRegister& src2) {
1090 dst.ClearForWrite(vform);
1092 dst.SetUint(vform,
i,
1093 src1.Is(src2) ? 0 : src1.Uint(vform,
i) ^ src2.Uint(vform,
i));
1098LogicVRegister Simulator::bic(
VectorFormat vform, LogicVRegister dst,
1099 const LogicVRegister& src1,
1100 const LogicVRegister& src2) {
1101 dst.ClearForWrite(vform);
1103 dst.SetUint(vform,
i, src1.Uint(vform,
i) & ~src2.Uint(vform,
i));
1108LogicVRegister Simulator::bic(
VectorFormat vform, LogicVRegister dst,
1109 const LogicVRegister& src, uint64_t imm) {
1112 for (
int i = 0;
i < laneCount; ++
i) {
1113 result[
i] = src.Uint(vform,
i) & ~imm;
1115 dst.SetUintArray(vform,
result);
1119LogicVRegister Simulator::bif(
VectorFormat vform, LogicVRegister dst,
1120 const LogicVRegister& src1,
1121 const LogicVRegister& src2) {
1122 dst.ClearForWrite(vform);
1124 uint64_t operand1 = dst.Uint(vform,
i);
1125 uint64_t operand2 = ~src2.Uint(vform,
i);
1126 uint64_t operand3 = src1.Uint(vform,
i);
1127 uint64_t
result = operand1 ^ ((operand1 ^ operand3) & operand2);
1128 dst.SetUint(vform,
i,
result);
1133LogicVRegister Simulator::bit(
VectorFormat vform, LogicVRegister dst,
1134 const LogicVRegister& src1,
1135 const LogicVRegister& src2) {
1136 dst.ClearForWrite(vform);
1138 uint64_t operand1 = dst.Uint(vform,
i);
1139 uint64_t operand2 = src2.Uint(vform,
i);
1140 uint64_t operand3 = src1.Uint(vform,
i);
1141 uint64_t
result = operand1 ^ ((operand1 ^ operand3) & operand2);
1142 dst.SetUint(vform,
i,
result);
1147LogicVRegister Simulator::bsl(
VectorFormat vform, LogicVRegister dst,
1148 const LogicVRegister& src1,
1149 const LogicVRegister& src2) {
1150 dst.ClearForWrite(vform);
1152 uint64_t operand1 = src2.Uint(vform,
i);
1153 uint64_t operand2 = dst.Uint(vform,
i);
1154 uint64_t operand3 = src1.Uint(vform,
i);
1155 uint64_t
result = operand1 ^ ((operand1 ^ operand3) & operand2);
1156 dst.SetUint(vform,
i,
result);
1161LogicVRegister Simulator::SMinMax(
VectorFormat vform, LogicVRegister dst,
1162 const LogicVRegister& src1,
1163 const LogicVRegister& src2,
bool max) {
1164 dst.ClearForWrite(vform);
1166 int64_t src1_val = src1.Int(vform,
i);
1167 int64_t src2_val = src2.Int(vform,
i);
1170 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1172 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1174 dst.SetInt(vform,
i, dst_val);
1179LogicVRegister Simulator::smax(
VectorFormat vform, LogicVRegister dst,
1180 const LogicVRegister& src1,
1181 const LogicVRegister& src2) {
1182 return SMinMax(vform, dst, src1, src2,
true);
1185LogicVRegister Simulator::smin(
VectorFormat vform, LogicVRegister dst,
1186 const LogicVRegister& src1,
1187 const LogicVRegister& src2) {
1188 return SMinMax(vform, dst, src1, src2,
false);
1191LogicVRegister Simulator::SMinMaxP(
VectorFormat vform, LogicVRegister dst,
1192 const LogicVRegister& src1,
1193 const LogicVRegister& src2,
bool max) {
1196 const LogicVRegister* src = &src1;
1197 for (
int j = 0; j < 2; j++) {
1198 for (
int i = 0;
i < lanes;
i += 2) {
1199 int64_t first_val = src->Int(vform,
i);
1200 int64_t second_val = src->Int(vform,
i + 1);
1203 dst_val = (first_val > second_val) ? first_val : second_val;
1205 dst_val = (first_val < second_val) ? first_val : second_val;
1208 result[(
i >> 1) + (j * lanes / 2)] = dst_val;
1212 dst.SetIntArray(vform,
result);
1216LogicVRegister Simulator::smaxp(
VectorFormat vform, LogicVRegister dst,
1217 const LogicVRegister& src1,
1218 const LogicVRegister& src2) {
1219 return SMinMaxP(vform, dst, src1, src2,
true);
1222LogicVRegister Simulator::sminp(
VectorFormat vform, LogicVRegister dst,
1223 const LogicVRegister& src1,
1224 const LogicVRegister& src2) {
1225 return SMinMaxP(vform, dst, src1, src2,
false);
1228LogicVRegister Simulator::addp(
VectorFormat vform, LogicVRegister dst,
1229 const LogicVRegister& src) {
1233 dst.ClearForWrite(vform);
1234 dst.SetUint(vform, 0, dst_val);
1238LogicVRegister Simulator::addv(
VectorFormat vform, LogicVRegister dst,
1239 const LogicVRegister& src) {
1243 int64_t dst_val = 0;
1245 dst_val += src.Int(vform,
i);
1248 dst.ClearForWrite(vform_dst);
1249 dst.SetInt(vform_dst, 0, dst_val);
1253LogicVRegister Simulator::saddlv(
VectorFormat vform, LogicVRegister dst,
1254 const LogicVRegister& src) {
1258 int64_t dst_val = 0;
1260 dst_val += src.Int(vform,
i);
1263 dst.ClearForWrite(vform_dst);
1264 dst.SetInt(vform_dst, 0, dst_val);
1268LogicVRegister Simulator::uaddlv(
VectorFormat vform, LogicVRegister dst,
1269 const LogicVRegister& src) {
1273 uint64_t dst_val = 0;
1275 dst_val += src.Uint(vform,
i);
1278 dst.ClearForWrite(vform_dst);
1279 dst.SetUint(vform_dst, 0, dst_val);
1283LogicVRegister Simulator::SMinMaxV(
VectorFormat vform, LogicVRegister dst,
1284 const LogicVRegister& src,
bool max) {
1285 int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1287 int64_t src_val = src.Int(vform,
i);
1289 dst_val = (src_val > dst_val) ? src_val : dst_val;
1291 dst_val = (src_val < dst_val) ? src_val : dst_val;
1295 dst.SetInt(vform, 0, dst_val);
1299LogicVRegister Simulator::smaxv(
VectorFormat vform, LogicVRegister dst,
1300 const LogicVRegister& src) {
1301 SMinMaxV(vform, dst, src,
true);
1305LogicVRegister Simulator::sminv(
VectorFormat vform, LogicVRegister dst,
1306 const LogicVRegister& src) {
1307 SMinMaxV(vform, dst, src,
false);
1311LogicVRegister Simulator::UMinMax(
VectorFormat vform, LogicVRegister dst,
1312 const LogicVRegister& src1,
1313 const LogicVRegister& src2,
bool max) {
1314 dst.ClearForWrite(vform);
1316 uint64_t src1_val = src1.Uint(vform,
i);
1317 uint64_t src2_val = src2.Uint(vform,
i);
1320 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1322 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1324 dst.SetUint(vform,
i, dst_val);
1329LogicVRegister Simulator::umax(
VectorFormat vform, LogicVRegister dst,
1330 const LogicVRegister& src1,
1331 const LogicVRegister& src2) {
1332 return UMinMax(vform, dst, src1, src2,
true);
1335LogicVRegister Simulator::umin(
VectorFormat vform, LogicVRegister dst,
1336 const LogicVRegister& src1,
1337 const LogicVRegister& src2) {
1338 return UMinMax(vform, dst, src1, src2,
false);
1341LogicVRegister Simulator::UMinMaxP(
VectorFormat vform, LogicVRegister dst,
1342 const LogicVRegister& src1,
1343 const LogicVRegister& src2,
bool max) {
1346 const LogicVRegister* src = &src1;
1347 for (
int j = 0; j < 2; j++) {
1349 uint64_t first_val = src->Uint(vform,
i);
1350 uint64_t second_val = src->Uint(vform,
i + 1);
1353 dst_val = (first_val > second_val) ? first_val : second_val;
1355 dst_val = (first_val < second_val) ? first_val : second_val;
1358 result[(
i >> 1) + (j * lanes / 2)] = dst_val;
1362 dst.SetUintArray(vform,
result);
1366LogicVRegister Simulator::umaxp(
VectorFormat vform, LogicVRegister dst,
1367 const LogicVRegister& src1,
1368 const LogicVRegister& src2) {
1369 return UMinMaxP(vform, dst, src1, src2,
true);
1372LogicVRegister Simulator::uminp(
VectorFormat vform, LogicVRegister dst,
1373 const LogicVRegister& src1,
1374 const LogicVRegister& src2) {
1375 return UMinMaxP(vform, dst, src1, src2,
false);
1378LogicVRegister Simulator::UMinMaxV(
VectorFormat vform, LogicVRegister dst,
1379 const LogicVRegister& src,
bool max) {
1380 uint64_t dst_val = max ? 0 : UINT64_MAX;
1382 uint64_t src_val = src.Uint(vform,
i);
1384 dst_val = (src_val > dst_val) ? src_val : dst_val;
1386 dst_val = (src_val < dst_val) ? src_val : dst_val;
1390 dst.SetUint(vform, 0, dst_val);
1394LogicVRegister Simulator::umaxv(
VectorFormat vform, LogicVRegister dst,
1395 const LogicVRegister& src) {
1396 UMinMaxV(vform, dst, src,
true);
1400LogicVRegister Simulator::uminv(
VectorFormat vform, LogicVRegister dst,
1401 const LogicVRegister& src) {
1402 UMinMaxV(vform, dst, src,
false);
1406LogicVRegister Simulator::shl(
VectorFormat vform, LogicVRegister dst,
1407 const LogicVRegister& src,
int shift) {
1410 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1411 return ushl(vform, dst, src, shiftreg);
1414LogicVRegister Simulator::sshll(
VectorFormat vform, LogicVRegister dst,
1415 const LogicVRegister& src,
int shift) {
1417 SimVRegister temp1, temp2;
1418 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1419 LogicVRegister extendedreg = sxtl(vform, temp2, src);
1420 return sshl(vform, dst, extendedreg, shiftreg);
1423LogicVRegister Simulator::sshll2(
VectorFormat vform, LogicVRegister dst,
1424 const LogicVRegister& src,
int shift) {
1426 SimVRegister temp1, temp2;
1427 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1428 LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1429 return sshl(vform, dst, extendedreg, shiftreg);
1432LogicVRegister Simulator::shll(
VectorFormat vform, LogicVRegister dst,
1433 const LogicVRegister& src) {
1435 return sshll(vform, dst, src, shift);
1438LogicVRegister Simulator::shll2(
VectorFormat vform, LogicVRegister dst,
1439 const LogicVRegister& src) {
1441 return sshll2(vform, dst, src, shift);
1444LogicVRegister Simulator::ushll(
VectorFormat vform, LogicVRegister dst,
1445 const LogicVRegister& src,
int shift) {
1447 SimVRegister temp1, temp2;
1448 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1449 LogicVRegister extendedreg = uxtl(vform, temp2, src);
1450 return ushl(vform, dst, extendedreg, shiftreg);
1453LogicVRegister Simulator::ushll2(
VectorFormat vform, LogicVRegister dst,
1454 const LogicVRegister& src,
int shift) {
1456 SimVRegister temp1, temp2;
1457 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1458 LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1459 return ushl(vform, dst, extendedreg, shiftreg);
1462LogicVRegister Simulator::sli(
VectorFormat vform, LogicVRegister dst,
1463 const LogicVRegister& src,
int shift) {
1464 dst.ClearForWrite(vform);
1466 for (
int i = 0;
i < laneCount;
i++) {
1467 uint64_t src_lane = src.Uint(vform,
i);
1468 uint64_t dst_lane = dst.Uint(vform,
i);
1469 uint64_t shifted = src_lane << shift;
1471 dst.SetUint(vform,
i, (dst_lane & ~
mask) | shifted);
1476LogicVRegister Simulator::sqshl(
VectorFormat vform, LogicVRegister dst,
1477 const LogicVRegister& src,
int shift) {
1480 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1481 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1484LogicVRegister Simulator::uqshl(
VectorFormat vform, LogicVRegister dst,
1485 const LogicVRegister& src,
int shift) {
1488 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1489 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1492LogicVRegister Simulator::sqshlu(
VectorFormat vform, LogicVRegister dst,
1493 const LogicVRegister& src,
int shift) {
1496 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1497 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1500LogicVRegister Simulator::sri(
VectorFormat vform, LogicVRegister dst,
1501 const LogicVRegister& src,
int shift) {
1502 dst.ClearForWrite(vform);
1506 for (
int i = 0;
i < laneCount;
i++) {
1507 uint64_t src_lane = src.Uint(vform,
i);
1508 uint64_t dst_lane = dst.Uint(vform,
i);
1515 shifted = src_lane >> shift;
1518 dst.SetUint(vform,
i, (dst_lane & ~
mask) | shifted);
1523LogicVRegister Simulator::ushr(
VectorFormat vform, LogicVRegister dst,
1524 const LogicVRegister& src,
int shift) {
1527 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1528 return ushl(vform, dst, src, shiftreg);
1531LogicVRegister Simulator::sshr(
VectorFormat vform, LogicVRegister dst,
1532 const LogicVRegister& src,
int shift) {
1535 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1536 return sshl(vform, dst, src, shiftreg);
1539LogicVRegister Simulator::ssra(
VectorFormat vform, LogicVRegister dst,
1540 const LogicVRegister& src,
int shift) {
1542 LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1543 return add(vform, dst, dst, shifted_reg);
1546LogicVRegister Simulator::usra(
VectorFormat vform, LogicVRegister dst,
1547 const LogicVRegister& src,
int shift) {
1549 LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1550 return add(vform, dst, dst, shifted_reg);
1553LogicVRegister Simulator::srsra(
VectorFormat vform, LogicVRegister dst,
1554 const LogicVRegister& src,
int shift) {
1556 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1557 return add(vform, dst, dst, shifted_reg);
1560LogicVRegister Simulator::ursra(
VectorFormat vform, LogicVRegister dst,
1561 const LogicVRegister& src,
int shift) {
1563 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1564 return add(vform, dst, dst, shifted_reg);
1567LogicVRegister Simulator::cls(
VectorFormat vform, LogicVRegister dst,
1568 const LogicVRegister& src) {
1572 for (
int i = 0;
i < laneCount;
i++) {
1576 dst.SetUintArray(vform,
result);
1580LogicVRegister Simulator::clz(
VectorFormat vform, LogicVRegister dst,
1581 const LogicVRegister& src) {
1585 for (
int i = 0;
i < laneCount;
i++) {
1589 dst.SetUintArray(vform,
result);
1593LogicVRegister Simulator::cnt(
VectorFormat vform, LogicVRegister dst,
1594 const LogicVRegister& src) {
1598 for (
int i = 0;
i < laneCount;
i++) {
1599 uint64_t value = src.Uint(vform,
i);
1601 for (
int j = 0; j < laneSizeInBits; j++) {
1607 dst.SetUintArray(vform,
result);
1611LogicVRegister Simulator::sshl(
VectorFormat vform, LogicVRegister dst,
1612 const LogicVRegister& src1,
1613 const LogicVRegister& src2) {
1614 dst.ClearForWrite(vform);
1616 int8_t shift_val = src2.Int(vform,
i);
1617 int64_t lj_src_val = src1.IntLeftJustified(vform,
i);
1621 (lj_src_val != 0)) {
1622 dst.SetSignedSat(
i, lj_src_val >= 0);
1626 if (lj_src_val < 0) {
1627 dst.SetUnsignedSat(
i,
false);
1629 (lj_src_val != 0)) {
1630 dst.SetUnsignedSat(
i,
true);
1633 int64_t src_val = src1.Int(vform,
i);
1634 bool src_is_negative = src_val < 0;
1635 if (shift_val > 63) {
1636 dst.SetInt(vform,
i, 0);
1637 }
else if (shift_val < -63) {
1638 dst.SetRounding(
i, src_is_negative);
1639 dst.SetInt(vform,
i, src_is_negative ? -1 : 0);
1643 uint64_t usrc_val =
static_cast<uint64_t
>(src_val);
1645 if (shift_val < 0) {
1647 shift_val = -shift_val;
1651 if (((usrc_val >> (shift_val - 1)) & 1) == 1) {
1652 dst.SetRounding(
i,
true);
1655 usrc_val >>= shift_val;
1657 if (src_is_negative) {
1659 usrc_val |= (~UINT64_C(0) << (64 - shift_val));
1662 usrc_val <<= shift_val;
1664 dst.SetUint(vform,
i, usrc_val);
1670LogicVRegister Simulator::ushl(
VectorFormat vform, LogicVRegister dst,
1671 const LogicVRegister& src1,
1672 const LogicVRegister& src2) {
1673 dst.ClearForWrite(vform);
1675 int8_t shift_val = src2.Int(vform,
i);
1676 uint64_t lj_src_val = src1.UintLeftJustified(vform,
i);
1680 dst.SetUnsignedSat(
i,
true);
1683 uint64_t src_val = src1.Uint(vform,
i);
1684 if ((shift_val > 63) || (shift_val < -64)) {
1685 dst.SetUint(vform,
i, 0);
1687 if (shift_val < 0) {
1689 if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1690 dst.SetRounding(
i,
true);
1693 if (shift_val == -64) {
1696 src_val >>= -shift_val;
1699 src_val <<= shift_val;
1701 dst.SetUint(vform,
i, src_val);
1707LogicVRegister Simulator::neg(
VectorFormat vform, LogicVRegister dst,
1708 const LogicVRegister& src) {
1709 dst.ClearForWrite(vform);
1712 int64_t sa = src.Int(vform,
i);
1714 dst.SetSignedSat(
i,
true);
1716 dst.SetInt(vform,
i, (sa == INT64_MIN) ? sa : -sa);
1721LogicVRegister Simulator::suqadd(
VectorFormat vform, LogicVRegister dst,
1722 const LogicVRegister& src) {
1723 dst.ClearForWrite(vform);
1725 int64_t sa = dst.IntLeftJustified(vform,
i);
1726 uint64_t ub = src.UintLeftJustified(vform,
i);
1727 uint64_t ur = sa + ub;
1733 dst.SetUint(vform,
i, dst.Int(vform,
i) + src.Uint(vform,
i));
1739LogicVRegister Simulator::usqadd(
VectorFormat vform, LogicVRegister dst,
1740 const LogicVRegister& src) {
1741 dst.ClearForWrite(vform);
1743 uint64_t ua = dst.UintLeftJustified(vform,
i);
1744 int64_t sb = src.IntLeftJustified(vform,
i);
1745 uint64_t ur = ua + sb;
1747 if ((sb > 0) && (ur <= ua)) {
1749 }
else if ((sb < 0) && (ur >= ua)) {
1750 dst.SetUint(vform,
i, 0);
1752 dst.SetUint(vform,
i, dst.Uint(vform,
i) + src.Int(vform,
i));
1758LogicVRegister Simulator::abs(
VectorFormat vform, LogicVRegister dst,
1759 const LogicVRegister& src) {
1760 dst.ClearForWrite(vform);
1763 int64_t sa = src.Int(vform,
i);
1765 dst.SetSignedSat(
i,
true);
1768 dst.SetInt(vform,
i, (sa == INT64_MIN) ? sa : -sa);
1770 dst.SetInt(vform,
i, sa);
1776LogicVRegister Simulator::ExtractNarrow(
VectorFormat dstform,
1777 LogicVRegister dst,
bool dstIsSigned,
1778 const LogicVRegister& src,
1780 bool upperhalf =
false;
1827 ssrc[
i] = src.Int(srcform,
i);
1828 usrc[
i] = src.Uint(srcform,
i);
1836 dst.ClearForWrite(dstform);
1842 dst.SetSignedSat(
offset +
i,
true);
1844 dst.SetSignedSat(
offset +
i,
false);
1850 dst.SetUnsignedSat(
offset +
i,
true);
1851 }
else if (ssrc[
i] < 0) {
1852 dst.SetUnsignedSat(
offset +
i,
false);
1856 dst.SetUnsignedSat(
offset +
i,
true);
1876LogicVRegister Simulator::xtn(
VectorFormat vform, LogicVRegister dst,
1877 const LogicVRegister& src) {
1878 return ExtractNarrow(vform, dst,
true, src,
true);
1881LogicVRegister Simulator::sqxtn(
VectorFormat vform, LogicVRegister dst,
1882 const LogicVRegister& src) {
1883 return ExtractNarrow(vform, dst,
true, src,
true).SignedSaturate(vform);
1886LogicVRegister Simulator::sqxtun(
VectorFormat vform, LogicVRegister dst,
1887 const LogicVRegister& src) {
1888 return ExtractNarrow(vform, dst,
false, src,
true).UnsignedSaturate(vform);
1891LogicVRegister Simulator::uqxtn(
VectorFormat vform, LogicVRegister dst,
1892 const LogicVRegister& src) {
1893 return ExtractNarrow(vform, dst,
false, src,
false).UnsignedSaturate(vform);
1896LogicVRegister Simulator::AbsDiff(
VectorFormat vform, LogicVRegister dst,
1897 const LogicVRegister& src1,
1898 const LogicVRegister& src2,
bool issigned) {
1899 dst.ClearForWrite(vform);
1902 int64_t sr = src1.Int(vform,
i) - src2.Int(vform,
i);
1903 sr = sr > 0 ? sr : -sr;
1904 dst.SetInt(vform,
i, sr);
1906 int64_t sr = src1.Uint(vform,
i) - src2.Uint(vform,
i);
1907 sr = sr > 0 ? sr : -sr;
1908 dst.SetUint(vform,
i, sr);
1914LogicVRegister Simulator::saba(
VectorFormat vform, LogicVRegister dst,
1915 const LogicVRegister& src1,
1916 const LogicVRegister& src2) {
1918 dst.ClearForWrite(vform);
1919 AbsDiff(vform, temp, src1, src2,
true);
1920 add(vform, dst, dst, temp);
1924LogicVRegister Simulator::uaba(
VectorFormat vform, LogicVRegister dst,
1925 const LogicVRegister& src1,
1926 const LogicVRegister& src2) {
1928 dst.ClearForWrite(vform);
1929 AbsDiff(vform, temp, src1, src2,
false);
1930 add(vform, dst, dst, temp);
1934LogicVRegister Simulator::not_(
VectorFormat vform, LogicVRegister dst,
1935 const LogicVRegister& src) {
1936 dst.ClearForWrite(vform);
1938 dst.SetUint(vform,
i, ~src.Uint(vform,
i));
1943LogicVRegister Simulator::rbit(
VectorFormat vform, LogicVRegister dst,
1944 const LogicVRegister& src) {
1948 uint64_t reversed_value;
1950 for (
int i = 0;
i < laneCount;
i++) {
1951 value = src.Uint(vform,
i);
1953 for (
int j = 0; j < laneSizeInBits; j++) {
1954 reversed_value = (reversed_value << 1) | (value & 1);
1960 dst.SetUintArray(vform,
result);
1964LogicVRegister Simulator::rev(
VectorFormat vform, LogicVRegister dst,
1965 const LogicVRegister& src,
int revSize) {
1969 int lanesPerLoop = revSize / laneSize;
1970 for (
int i = 0;
i < laneCount;
i += lanesPerLoop) {
1971 for (
int j = 0; j < lanesPerLoop; j++) {
1972 result[
i + lanesPerLoop - 1 - j] = src.Uint(vform,
i + j);
1975 dst.SetUintArray(vform,
result);
1979LogicVRegister Simulator::rev16(
VectorFormat vform, LogicVRegister dst,
1980 const LogicVRegister& src) {
1981 return rev(vform, dst, src, 2);
1984LogicVRegister Simulator::rev32(
VectorFormat vform, LogicVRegister dst,
1985 const LogicVRegister& src) {
1986 return rev(vform, dst, src, 4);
1989LogicVRegister Simulator::rev64(
VectorFormat vform, LogicVRegister dst,
1990 const LogicVRegister& src) {
1991 return rev(vform, dst, src, 8);
1994LogicVRegister Simulator::addlp(
VectorFormat vform, LogicVRegister dst,
1995 const LogicVRegister& src,
bool is_signed,
1996 bool do_accumulate) {
2003 for (
int i = 0;
i < lane_count;
i++) {
2005 result[
i] =
static_cast<uint64_t
>(src.Int(vformsrc, 2 *
i) +
2006 src.Int(vformsrc, 2 *
i + 1));
2008 result[
i] = src.Uint(vformsrc, 2 *
i) + src.Uint(vformsrc, 2 *
i + 1);
2012 dst.ClearForWrite(vform);
2013 for (
int i = 0;
i < lane_count; ++
i) {
2014 if (do_accumulate) {
2023LogicVRegister Simulator::saddlp(
VectorFormat vform, LogicVRegister dst,
2024 const LogicVRegister& src) {
2025 return addlp(vform, dst, src,
true,
false);
2028LogicVRegister Simulator::uaddlp(
VectorFormat vform, LogicVRegister dst,
2029 const LogicVRegister& src) {
2030 return addlp(vform, dst, src,
false,
false);
2033LogicVRegister Simulator::sadalp(
VectorFormat vform, LogicVRegister dst,
2034 const LogicVRegister& src) {
2035 return addlp(vform, dst, src,
true,
true);
2038LogicVRegister Simulator::uadalp(
VectorFormat vform, LogicVRegister dst,
2039 const LogicVRegister& src) {
2040 return addlp(vform, dst, src,
false,
true);
2043LogicVRegister Simulator::ext(
VectorFormat vform, LogicVRegister dst,
2044 const LogicVRegister& src1,
2045 const LogicVRegister& src2,
int index) {
2048 for (
int i = 0;
i < laneCount -
index; ++
i) {
2049 result[
i] = src1.Uint(vform,
i + index);
2052 result[laneCount - index +
i] = src2.Uint(vform,
i);
2054 dst.ClearForWrite(vform);
2055 for (
int i = 0;
i < laneCount; ++
i) {
2061LogicVRegister Simulator::dup_element(
VectorFormat vform, LogicVRegister dst,
2062 const LogicVRegister& src,
2065 uint64_t value = src.Uint(vform, src_index);
2066 dst.ClearForWrite(vform);
2067 for (
int i = 0;
i < laneCount; ++
i) {
2068 dst.SetUint(vform,
i, value);
2073LogicVRegister Simulator::dup_immediate(
VectorFormat vform, LogicVRegister dst,
2077 dst.ClearForWrite(vform);
2078 for (
int i = 0;
i < laneCount; ++
i) {
2079 dst.SetUint(vform,
i, value);
2084LogicVRegister Simulator::ins_element(
VectorFormat vform, LogicVRegister dst,
2085 int dst_index,
const LogicVRegister& src,
2087 dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2091LogicVRegister Simulator::ins_immediate(
VectorFormat vform, LogicVRegister dst,
2092 int dst_index, uint64_t imm) {
2094 dst.SetUint(vform, dst_index, value);
2098LogicVRegister Simulator::movi(
VectorFormat vform, LogicVRegister dst,
2101 dst.ClearForWrite(vform);
2102 for (
int i = 0;
i < laneCount; ++
i) {
2103 dst.SetUint(vform,
i, imm);
2108LogicVRegister Simulator::mvni(
VectorFormat vform, LogicVRegister dst,
2111 dst.ClearForWrite(vform);
2112 for (
int i = 0;
i < laneCount; ++
i) {
2113 dst.SetUint(vform,
i, ~imm);
2118LogicVRegister Simulator::orr(
VectorFormat vform, LogicVRegister dst,
2119 const LogicVRegister& src, uint64_t imm) {
2122 for (
int i = 0;
i < laneCount; ++
i) {
2123 result[
i] = src.Uint(vform,
i) | imm;
2125 dst.SetUintArray(vform,
result);
2129LogicVRegister Simulator::uxtl(
VectorFormat vform, LogicVRegister dst,
2130 const LogicVRegister& src) {
2133 dst.ClearForWrite(vform);
2135 dst.SetUint(vform,
i, src.Uint(vform_half,
i));
2140LogicVRegister Simulator::sxtl(
VectorFormat vform, LogicVRegister dst,
2141 const LogicVRegister& src) {
2144 dst.ClearForWrite(vform);
2146 dst.SetInt(vform,
i, src.Int(vform_half,
i));
2151LogicVRegister Simulator::uxtl2(
VectorFormat vform, LogicVRegister dst,
2152 const LogicVRegister& src) {
2156 dst.ClearForWrite(vform);
2157 for (
int i = 0;
i < lane_count;
i++) {
2158 dst.SetUint(vform,
i, src.Uint(vform_half, lane_count +
i));
2163LogicVRegister Simulator::sxtl2(
VectorFormat vform, LogicVRegister dst,
2164 const LogicVRegister& src) {
2168 dst.ClearForWrite(vform);
2169 for (
int i = 0;
i < lane_count;
i++) {
2170 dst.SetInt(vform,
i, src.Int(vform_half, lane_count +
i));
2175LogicVRegister Simulator::shrn(
VectorFormat vform, LogicVRegister dst,
2176 const LogicVRegister& src,
int shift) {
2180 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
2181 return ExtractNarrow(vform_dst, dst,
false, shifted_src,
false);
2184LogicVRegister Simulator::shrn2(
VectorFormat vform, LogicVRegister dst,
2185 const LogicVRegister& src,
int shift) {
2189 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
2190 return ExtractNarrow(vformdst, dst,
false, shifted_src,
false);
2193LogicVRegister Simulator::rshrn(
VectorFormat vform, LogicVRegister dst,
2194 const LogicVRegister& src,
int shift) {
2198 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2199 return ExtractNarrow(vformdst, dst,
false, shifted_src,
false);
2202LogicVRegister Simulator::rshrn2(
VectorFormat vform, LogicVRegister dst,
2203 const LogicVRegister& src,
int shift) {
2207 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2208 return ExtractNarrow(vformdst, dst,
false, shifted_src,
false);
2211LogicVRegister Simulator::Table(
VectorFormat vform, LogicVRegister dst,
2212 const LogicVRegister& ind,
2213 bool zero_out_of_bounds,
2214 const LogicVRegister* tab1,
2215 const LogicVRegister* tab2,
2216 const LogicVRegister* tab3,
2217 const LogicVRegister* tab4) {
2219 const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4};
2225 uint64_t j = ind.Uint(vform,
i);
2226 int tab_idx =
static_cast<int>(j >> 4);
2227 int j_idx =
static_cast<int>(j & 15);
2228 if ((tab_idx < 4) && (tab[tab_idx] !=
nullptr)) {
2232 dst.SetUintArray(vform,
result);
2236LogicVRegister Simulator::tbl(
VectorFormat vform, LogicVRegister dst,
2237 const LogicVRegister& tab,
2238 const LogicVRegister& ind) {
2239 return Table(vform, dst, ind,
true, &tab);
2242LogicVRegister Simulator::tbl(
VectorFormat vform, LogicVRegister dst,
2243 const LogicVRegister& tab,
2244 const LogicVRegister& tab2,
2245 const LogicVRegister& ind) {
2246 return Table(vform, dst, ind,
true, &tab, &tab2);
2249LogicVRegister Simulator::tbl(
VectorFormat vform, LogicVRegister dst,
2250 const LogicVRegister& tab,
2251 const LogicVRegister& tab2,
2252 const LogicVRegister& tab3,
2253 const LogicVRegister& ind) {
2254 return Table(vform, dst, ind,
true, &tab, &tab2, &tab3);
2257LogicVRegister Simulator::tbl(
VectorFormat vform, LogicVRegister dst,
2258 const LogicVRegister& tab,
2259 const LogicVRegister& tab2,
2260 const LogicVRegister& tab3,
2261 const LogicVRegister& tab4,
2262 const LogicVRegister& ind) {
2263 return Table(vform, dst, ind,
true, &tab, &tab2, &tab3, &tab4);
2266LogicVRegister Simulator::tbx(
VectorFormat vform, LogicVRegister dst,
2267 const LogicVRegister& tab,
2268 const LogicVRegister& ind) {
2269 return Table(vform, dst, ind,
false, &tab);
2272LogicVRegister Simulator::tbx(
VectorFormat vform, LogicVRegister dst,
2273 const LogicVRegister& tab,
2274 const LogicVRegister& tab2,
2275 const LogicVRegister& ind) {
2276 return Table(vform, dst, ind,
false, &tab, &tab2);
2279LogicVRegister Simulator::tbx(
VectorFormat vform, LogicVRegister dst,
2280 const LogicVRegister& tab,
2281 const LogicVRegister& tab2,
2282 const LogicVRegister& tab3,
2283 const LogicVRegister& ind) {
2284 return Table(vform, dst, ind,
false, &tab, &tab2, &tab3);
2287LogicVRegister Simulator::tbx(
VectorFormat vform, LogicVRegister dst,
2288 const LogicVRegister& tab,
2289 const LogicVRegister& tab2,
2290 const LogicVRegister& tab3,
2291 const LogicVRegister& tab4,
2292 const LogicVRegister& ind) {
2293 return Table(vform, dst, ind,
false, &tab, &tab2, &tab3, &tab4);
2296LogicVRegister Simulator::uqshrn(
VectorFormat vform, LogicVRegister dst,
2297 const LogicVRegister& src,
int shift) {
2298 return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
2301LogicVRegister Simulator::uqshrn2(
VectorFormat vform, LogicVRegister dst,
2302 const LogicVRegister& src,
int shift) {
2303 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2306LogicVRegister Simulator::uqrshrn(
VectorFormat vform, LogicVRegister dst,
2307 const LogicVRegister& src,
int shift) {
2308 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
2311LogicVRegister Simulator::uqrshrn2(
VectorFormat vform, LogicVRegister dst,
2312 const LogicVRegister& src,
int shift) {
2313 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2316LogicVRegister Simulator::sqshrn(
VectorFormat vform, LogicVRegister dst,
2317 const LogicVRegister& src,
int shift) {
2321 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2322 return sqxtn(vformdst, dst, shifted_src);
2325LogicVRegister Simulator::sqshrn2(
VectorFormat vform, LogicVRegister dst,
2326 const LogicVRegister& src,
int shift) {
2330 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2331 return sqxtn(vformdst, dst, shifted_src);
2334LogicVRegister Simulator::sqrshrn(
VectorFormat vform, LogicVRegister dst,
2335 const LogicVRegister& src,
int shift) {
2339 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2340 return sqxtn(vformdst, dst, shifted_src);
2343LogicVRegister Simulator::sqrshrn2(
VectorFormat vform, LogicVRegister dst,
2344 const LogicVRegister& src,
int shift) {
2348 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2349 return sqxtn(vformdst, dst, shifted_src);
2352LogicVRegister Simulator::sqshrun(
VectorFormat vform, LogicVRegister dst,
2353 const LogicVRegister& src,
int shift) {
2357 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2358 return sqxtun(vformdst, dst, shifted_src);
2361LogicVRegister Simulator::sqshrun2(
VectorFormat vform, LogicVRegister dst,
2362 const LogicVRegister& src,
int shift) {
2366 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2367 return sqxtun(vformdst, dst, shifted_src);
2370LogicVRegister Simulator::sqrshrun(
VectorFormat vform, LogicVRegister dst,
2371 const LogicVRegister& src,
int shift) {
2375 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2376 return sqxtun(vformdst, dst, shifted_src);
2379LogicVRegister Simulator::sqrshrun2(
VectorFormat vform, LogicVRegister dst,
2380 const LogicVRegister& src,
int shift) {
2384 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2385 return sqxtun(vformdst, dst, shifted_src);
2388LogicVRegister Simulator::uaddl(
VectorFormat vform, LogicVRegister dst,
2389 const LogicVRegister& src1,
2390 const LogicVRegister& src2) {
2391 SimVRegister temp1, temp2;
2392 uxtl(vform, temp1, src1);
2393 uxtl(vform, temp2, src2);
2394 add(vform, dst, temp1, temp2);
2398LogicVRegister Simulator::uaddl2(
VectorFormat vform, LogicVRegister dst,
2399 const LogicVRegister& src1,
2400 const LogicVRegister& src2) {
2401 SimVRegister temp1, temp2;
2402 uxtl2(vform, temp1, src1);
2403 uxtl2(vform, temp2, src2);
2404 add(vform, dst, temp1, temp2);
2408LogicVRegister Simulator::uaddw(
VectorFormat vform, LogicVRegister dst,
2409 const LogicVRegister& src1,
2410 const LogicVRegister& src2) {
2412 uxtl(vform, temp, src2);
2413 add(vform, dst, src1, temp);
2417LogicVRegister Simulator::uaddw2(
VectorFormat vform, LogicVRegister dst,
2418 const LogicVRegister& src1,
2419 const LogicVRegister& src2) {
2421 uxtl2(vform, temp, src2);
2422 add(vform, dst, src1, temp);
2426LogicVRegister Simulator::saddl(
VectorFormat vform, LogicVRegister dst,
2427 const LogicVRegister& src1,
2428 const LogicVRegister& src2) {
2429 SimVRegister temp1, temp2;
2430 sxtl(vform, temp1, src1);
2431 sxtl(vform, temp2, src2);
2432 add(vform, dst, temp1, temp2);
2436LogicVRegister Simulator::saddl2(
VectorFormat vform, LogicVRegister dst,
2437 const LogicVRegister& src1,
2438 const LogicVRegister& src2) {
2439 SimVRegister temp1, temp2;
2440 sxtl2(vform, temp1, src1);
2441 sxtl2(vform, temp2, src2);
2442 add(vform, dst, temp1, temp2);
2446LogicVRegister Simulator::saddw(
VectorFormat vform, LogicVRegister dst,
2447 const LogicVRegister& src1,
2448 const LogicVRegister& src2) {
2450 sxtl(vform, temp, src2);
2451 add(vform, dst, src1, temp);
2455LogicVRegister Simulator::saddw2(
VectorFormat vform, LogicVRegister dst,
2456 const LogicVRegister& src1,
2457 const LogicVRegister& src2) {
2459 sxtl2(vform, temp, src2);
2460 add(vform, dst, src1, temp);
2464LogicVRegister Simulator::usubl(
VectorFormat vform, LogicVRegister dst,
2465 const LogicVRegister& src1,
2466 const LogicVRegister& src2) {
2467 SimVRegister temp1, temp2;
2468 uxtl(vform, temp1, src1);
2469 uxtl(vform, temp2, src2);
2470 sub(vform, dst, temp1, temp2);
2474LogicVRegister Simulator::usubl2(
VectorFormat vform, LogicVRegister dst,
2475 const LogicVRegister& src1,
2476 const LogicVRegister& src2) {
2477 SimVRegister temp1, temp2;
2478 uxtl2(vform, temp1, src1);
2479 uxtl2(vform, temp2, src2);
2480 sub(vform, dst, temp1, temp2);
2484LogicVRegister Simulator::usubw(
VectorFormat vform, LogicVRegister dst,
2485 const LogicVRegister& src1,
2486 const LogicVRegister& src2) {
2488 uxtl(vform, temp, src2);
2489 sub(vform, dst, src1, temp);
2493LogicVRegister Simulator::usubw2(
VectorFormat vform, LogicVRegister dst,
2494 const LogicVRegister& src1,
2495 const LogicVRegister& src2) {
2497 uxtl2(vform, temp, src2);
2498 sub(vform, dst, src1, temp);
2502LogicVRegister Simulator::ssubl(
VectorFormat vform, LogicVRegister dst,
2503 const LogicVRegister& src1,
2504 const LogicVRegister& src2) {
2505 SimVRegister temp1, temp2;
2506 sxtl(vform, temp1, src1);
2507 sxtl(vform, temp2, src2);
2508 sub(vform, dst, temp1, temp2);
2512LogicVRegister Simulator::ssubl2(
VectorFormat vform, LogicVRegister dst,
2513 const LogicVRegister& src1,
2514 const LogicVRegister& src2) {
2515 SimVRegister temp1, temp2;
2516 sxtl2(vform, temp1, src1);
2517 sxtl2(vform, temp2, src2);
2518 sub(vform, dst, temp1, temp2);
2522LogicVRegister Simulator::ssubw(
VectorFormat vform, LogicVRegister dst,
2523 const LogicVRegister& src1,
2524 const LogicVRegister& src2) {
2526 sxtl(vform, temp, src2);
2527 sub(vform, dst, src1, temp);
2531LogicVRegister Simulator::ssubw2(
VectorFormat vform, LogicVRegister dst,
2532 const LogicVRegister& src1,
2533 const LogicVRegister& src2) {
2535 sxtl2(vform, temp, src2);
2536 sub(vform, dst, src1, temp);
2540LogicVRegister Simulator::uabal(
VectorFormat vform, LogicVRegister dst,
2541 const LogicVRegister& src1,
2542 const LogicVRegister& src2) {
2543 SimVRegister temp1, temp2;
2544 uxtl(vform, temp1, src1);
2545 uxtl(vform, temp2, src2);
2546 uaba(vform, dst, temp1, temp2);
2550LogicVRegister Simulator::uabal2(
VectorFormat vform, LogicVRegister dst,
2551 const LogicVRegister& src1,
2552 const LogicVRegister& src2) {
2553 SimVRegister temp1, temp2;
2554 uxtl2(vform, temp1, src1);
2555 uxtl2(vform, temp2, src2);
2556 uaba(vform, dst, temp1, temp2);
2560LogicVRegister Simulator::sabal(
VectorFormat vform, LogicVRegister dst,
2561 const LogicVRegister& src1,
2562 const LogicVRegister& src2) {
2563 SimVRegister temp1, temp2;
2564 sxtl(vform, temp1, src1);
2565 sxtl(vform, temp2, src2);
2566 saba(vform, dst, temp1, temp2);
2570LogicVRegister Simulator::sabal2(
VectorFormat vform, LogicVRegister dst,
2571 const LogicVRegister& src1,
2572 const LogicVRegister& src2) {
2573 SimVRegister temp1, temp2;
2574 sxtl2(vform, temp1, src1);
2575 sxtl2(vform, temp2, src2);
2576 saba(vform, dst, temp1, temp2);
2580LogicVRegister Simulator::uabdl(
VectorFormat vform, LogicVRegister dst,
2581 const LogicVRegister& src1,
2582 const LogicVRegister& src2) {
2583 SimVRegister temp1, temp2;
2584 uxtl(vform, temp1, src1);
2585 uxtl(vform, temp2, src2);
2586 AbsDiff(vform, dst, temp1, temp2,
false);
2590LogicVRegister Simulator::uabdl2(
VectorFormat vform, LogicVRegister dst,
2591 const LogicVRegister& src1,
2592 const LogicVRegister& src2) {
2593 SimVRegister temp1, temp2;
2594 uxtl2(vform, temp1, src1);
2595 uxtl2(vform, temp2, src2);
2596 AbsDiff(vform, dst, temp1, temp2,
false);
2600LogicVRegister Simulator::sabdl(
VectorFormat vform, LogicVRegister dst,
2601 const LogicVRegister& src1,
2602 const LogicVRegister& src2) {
2603 SimVRegister temp1, temp2;
2604 sxtl(vform, temp1, src1);
2605 sxtl(vform, temp2, src2);
2606 AbsDiff(vform, dst, temp1, temp2,
true);
2610LogicVRegister Simulator::sabdl2(
VectorFormat vform, LogicVRegister dst,
2611 const LogicVRegister& src1,
2612 const LogicVRegister& src2) {
2613 SimVRegister temp1, temp2;
2614 sxtl2(vform, temp1, src1);
2615 sxtl2(vform, temp2, src2);
2616 AbsDiff(vform, dst, temp1, temp2,
true);
2620LogicVRegister Simulator::umull(
VectorFormat vform, LogicVRegister dst,
2621 const LogicVRegister& src1,
2622 const LogicVRegister& src2) {
2623 SimVRegister temp1, temp2;
2624 uxtl(vform, temp1, src1);
2625 uxtl(vform, temp2, src2);
2626 mul(vform, dst, temp1, temp2);
2630LogicVRegister Simulator::umull2(
VectorFormat vform, LogicVRegister dst,
2631 const LogicVRegister& src1,
2632 const LogicVRegister& src2) {
2633 SimVRegister temp1, temp2;
2634 uxtl2(vform, temp1, src1);
2635 uxtl2(vform, temp2, src2);
2636 mul(vform, dst, temp1, temp2);
2640LogicVRegister Simulator::smull(
VectorFormat vform, LogicVRegister dst,
2641 const LogicVRegister& src1,
2642 const LogicVRegister& src2) {
2643 SimVRegister temp1, temp2;
2644 sxtl(vform, temp1, src1);
2645 sxtl(vform, temp2, src2);
2646 mul(vform, dst, temp1, temp2);
2650LogicVRegister Simulator::smull2(
VectorFormat vform, LogicVRegister dst,
2651 const LogicVRegister& src1,
2652 const LogicVRegister& src2) {
2653 SimVRegister temp1, temp2;
2654 sxtl2(vform, temp1, src1);
2655 sxtl2(vform, temp2, src2);
2656 mul(vform, dst, temp1, temp2);
2660LogicVRegister Simulator::umlsl(
VectorFormat vform, LogicVRegister dst,
2661 const LogicVRegister& src1,
2662 const LogicVRegister& src2) {
2663 SimVRegister temp1, temp2;
2664 uxtl(vform, temp1, src1);
2665 uxtl(vform, temp2, src2);
2666 mls(vform, dst, temp1, temp2);
2670LogicVRegister Simulator::umlsl2(
VectorFormat vform, LogicVRegister dst,
2671 const LogicVRegister& src1,
2672 const LogicVRegister& src2) {
2673 SimVRegister temp1, temp2;
2674 uxtl2(vform, temp1, src1);
2675 uxtl2(vform, temp2, src2);
2676 mls(vform, dst, temp1, temp2);
2680LogicVRegister Simulator::smlsl(
VectorFormat vform, LogicVRegister dst,
2681 const LogicVRegister& src1,
2682 const LogicVRegister& src2) {
2683 SimVRegister temp1, temp2;
2684 sxtl(vform, temp1, src1);
2685 sxtl(vform, temp2, src2);
2686 mls(vform, dst, temp1, temp2);
2690LogicVRegister Simulator::smlsl2(
VectorFormat vform, LogicVRegister dst,
2691 const LogicVRegister& src1,
2692 const LogicVRegister& src2) {
2693 SimVRegister temp1, temp2;
2694 sxtl2(vform, temp1, src1);
2695 sxtl2(vform, temp2, src2);
2696 mls(vform, dst, temp1, temp2);
2700LogicVRegister Simulator::umlal(
VectorFormat vform, LogicVRegister dst,
2701 const LogicVRegister& src1,
2702 const LogicVRegister& src2) {
2703 SimVRegister temp1, temp2;
2704 uxtl(vform, temp1, src1);
2705 uxtl(vform, temp2, src2);
2706 mla(vform, dst, temp1, temp2);
2710LogicVRegister Simulator::umlal2(
VectorFormat vform, LogicVRegister dst,
2711 const LogicVRegister& src1,
2712 const LogicVRegister& src2) {
2713 SimVRegister temp1, temp2;
2714 uxtl2(vform, temp1, src1);
2715 uxtl2(vform, temp2, src2);
2716 mla(vform, dst, temp1, temp2);
2720LogicVRegister Simulator::smlal(
VectorFormat vform, LogicVRegister dst,
2721 const LogicVRegister& src1,
2722 const LogicVRegister& src2) {
2723 SimVRegister temp1, temp2;
2724 sxtl(vform, temp1, src1);
2725 sxtl(vform, temp2, src2);
2726 mla(vform, dst, temp1, temp2);
2730LogicVRegister Simulator::smlal2(
VectorFormat vform, LogicVRegister dst,
2731 const LogicVRegister& src1,
2732 const LogicVRegister& src2) {
2733 SimVRegister temp1, temp2;
2734 sxtl2(vform, temp1, src1);
2735 sxtl2(vform, temp2, src2);
2736 mla(vform, dst, temp1, temp2);
2740LogicVRegister Simulator::sqdmlal(
VectorFormat vform, LogicVRegister dst,
2741 const LogicVRegister& src1,
2742 const LogicVRegister& src2) {
2744 LogicVRegister product = sqdmull(vform, temp, src1, src2);
2745 return add(vform, dst, dst, product).SignedSaturate(vform);
2748LogicVRegister Simulator::sqdmlal2(
VectorFormat vform, LogicVRegister dst,
2749 const LogicVRegister& src1,
2750 const LogicVRegister& src2) {
2752 LogicVRegister product = sqdmull2(vform, temp, src1, src2);
2753 return add(vform, dst, dst, product).SignedSaturate(vform);
2756LogicVRegister Simulator::sqdmlsl(
VectorFormat vform, LogicVRegister dst,
2757 const LogicVRegister& src1,
2758 const LogicVRegister& src2) {
2760 LogicVRegister product = sqdmull(vform, temp, src1, src2);
2761 return sub(vform, dst, dst, product).SignedSaturate(vform);
2764LogicVRegister Simulator::sqdmlsl2(
VectorFormat vform, LogicVRegister dst,
2765 const LogicVRegister& src1,
2766 const LogicVRegister& src2) {
2768 LogicVRegister product = sqdmull2(vform, temp, src1, src2);
2769 return sub(vform, dst, dst, product).SignedSaturate(vform);
2772LogicVRegister Simulator::sqdmull(
VectorFormat vform, LogicVRegister dst,
2773 const LogicVRegister& src1,
2774 const LogicVRegister& src2) {
2776 LogicVRegister product = smull(vform, temp, src1, src2);
2777 return add(vform, dst, product, product).SignedSaturate(vform);
2780LogicVRegister Simulator::sqdmull2(
VectorFormat vform, LogicVRegister dst,
2781 const LogicVRegister& src1,
2782 const LogicVRegister& src2) {
2784 LogicVRegister product = smull2(vform, temp, src1, src2);
2785 return add(vform, dst, product, product).SignedSaturate(vform);
2788LogicVRegister Simulator::sqrdmulh(
VectorFormat vform, LogicVRegister dst,
2789 const LogicVRegister& src1,
2790 const LogicVRegister& src2,
bool round) {
2796 int round_const = round ? (1 << (esize - 2)) : 0;
2799 dst.ClearForWrite(vform);
2801 product = src1.Int(vform,
i) * src2.Int(vform,
i);
2802 product += round_const;
2803 product = product >> (esize - 1);
2810 dst.SetInt(vform,
i, product);
2815LogicVRegister Simulator::sqdmulh(
VectorFormat vform, LogicVRegister dst,
2816 const LogicVRegister& src1,
2817 const LogicVRegister& src2) {
2818 return sqrdmulh(vform, dst, src1, src2,
false);
2821LogicVRegister Simulator::addhn(
VectorFormat vform, LogicVRegister dst,
2822 const LogicVRegister& src1,
2823 const LogicVRegister& src2) {
2830LogicVRegister Simulator::addhn2(
VectorFormat vform, LogicVRegister dst,
2831 const LogicVRegister& src1,
2832 const LogicVRegister& src2) {
2839LogicVRegister Simulator::raddhn(
VectorFormat vform, LogicVRegister dst,
2840 const LogicVRegister& src1,
2841 const LogicVRegister& src2) {
2848LogicVRegister Simulator::raddhn2(
VectorFormat vform, LogicVRegister dst,
2849 const LogicVRegister& src1,
2850 const LogicVRegister& src2) {
2857LogicVRegister Simulator::subhn(
VectorFormat vform, LogicVRegister dst,
2858 const LogicVRegister& src1,
2859 const LogicVRegister& src2) {
2866LogicVRegister Simulator::subhn2(
VectorFormat vform, LogicVRegister dst,
2867 const LogicVRegister& src1,
2868 const LogicVRegister& src2) {
2875LogicVRegister Simulator::rsubhn(
VectorFormat vform, LogicVRegister dst,
2876 const LogicVRegister& src1,
2877 const LogicVRegister& src2) {
2884LogicVRegister Simulator::rsubhn2(
VectorFormat vform, LogicVRegister dst,
2885 const LogicVRegister& src1,
2886 const LogicVRegister& src2) {
2893LogicVRegister Simulator::trn1(
VectorFormat vform, LogicVRegister dst,
2894 const LogicVRegister& src1,
2895 const LogicVRegister& src2) {
2898 int pairs = laneCount / 2;
2900 result[2 *
i] = src1.Uint(vform, 2 *
i);
2901 result[(2 *
i) + 1] = src2.Uint(vform, 2 *
i);
2904 dst.SetUintArray(vform,
result);
2908LogicVRegister Simulator::trn2(
VectorFormat vform, LogicVRegister dst,
2909 const LogicVRegister& src1,
2910 const LogicVRegister& src2) {
2913 int pairs = laneCount / 2;
2915 result[2 *
i] = src1.Uint(vform, (2 *
i) + 1);
2916 result[(2 *
i) + 1] = src2.Uint(vform, (2 *
i) + 1);
2919 dst.SetUintArray(vform,
result);
2923LogicVRegister Simulator::zip1(
VectorFormat vform, LogicVRegister dst,
2924 const LogicVRegister& src1,
2925 const LogicVRegister& src2) {
2928 int pairs = laneCount / 2;
2930 result[2 *
i] = src1.Uint(vform,
i);
2931 result[(2 *
i) + 1] = src2.Uint(vform,
i);
2934 dst.SetUintArray(vform,
result);
2938LogicVRegister Simulator::zip2(
VectorFormat vform, LogicVRegister dst,
2939 const LogicVRegister& src1,
2940 const LogicVRegister& src2) {
2943 int pairs = laneCount / 2;
2949 dst.SetUintArray(vform,
result);
2953LogicVRegister Simulator::uzp1(
VectorFormat vform, LogicVRegister dst,
2954 const LogicVRegister& src1,
2955 const LogicVRegister& src2) {
2958 for (
int i = 0;
i < laneCount; ++
i) {
2960 result[laneCount +
i] = src2.Uint(vform,
i);
2963 dst.ClearForWrite(vform);
2964 for (
int i = 0;
i < laneCount; ++
i) {
2965 dst.SetUint(vform,
i,
result[2 *
i]);
2970LogicVRegister Simulator::uzp2(
VectorFormat vform, LogicVRegister dst,
2971 const LogicVRegister& src1,
2972 const LogicVRegister& src2) {
2975 for (
int i = 0;
i < laneCount; ++
i) {
2977 result[laneCount +
i] = src2.Uint(vform,
i);
2980 dst.ClearForWrite(vform);
2981 for (
int i = 0;
i < laneCount; ++
i) {
2982 dst.SetUint(vform,
i,
result[(2 *
i) + 1]);
2987template <
typename T>
2988T Simulator::FPAdd(T op1, T op2) {
2989 T
result = FPProcessNaNs(op1, op2);
2992 if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) {
2994 FPProcessException();
2995 return FPDefaultNaN<T>();
3002template <
typename T>
3003T Simulator::FPSub(T op1, T op2) {
3005 DCHECK(!std::isnan(op1) && !std::isnan(op2));
3007 if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) {
3009 FPProcessException();
3010 return FPDefaultNaN<T>();
3017template <
typename T>
3018T Simulator::FPMul(T op1, T op2) {
3020 DCHECK(!std::isnan(op1) && !std::isnan(op2));
3022 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
3024 FPProcessException();
3025 return FPDefaultNaN<T>();
3032template <
typename T>
3033T Simulator::FPMulx(T op1, T op2) {
3034 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
3037 return copysign(1.0, op1) * copysign(1.0, op2) * two;
3039 return FPMul(op1, op2);
3042template <
typename T>
3043T Simulator::FPMulAdd(T a, T op1, T op2) {
3044 T
result = FPProcessNaNs3(a, op1, op2);
3046 T sign_a = copysign(1.0, a);
3047 T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
3048 bool isinf_prod = std::isinf(op1) || std::isinf(op2);
3049 bool operation_generates_nan =
3050 (std::isinf(op1) && (op2 == 0.0)) ||
3051 (std::isinf(op2) && (op1 == 0.0)) ||
3052 (std::isinf(a) && isinf_prod && (sign_a != sign_prod));
3054 if (std::isnan(
result)) {
3056 if (operation_generates_nan &&
IsQuietNaN(a)) {
3057 FPProcessException();
3058 return FPDefaultNaN<T>();
3065 if (operation_generates_nan) {
3066 FPProcessException();
3067 return FPDefaultNaN<T>();
3072 if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
3073 return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0;
3081 if ((a == 0.0) && (
result == 0.0)) {
3082 return copysign(0.0, sign_prod);
3088template <
typename T>
3089T Simulator::FPDiv(T op1, T op2) {
3091 DCHECK(!std::isnan(op1) && !std::isnan(op2));
3093 if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
3095 FPProcessException();
3096 return FPDefaultNaN<T>();
3099 FPProcessException();
3100 if (!std::isnan(op1)) {
3101 double op1_sign = copysign(1.0, op1);
3102 double op2_sign = copysign(1.0, op2);
3112template <
typename T>
3113T Simulator::FPSqrt(T op) {
3114 if (std::isnan(op)) {
3115 return FPProcessNaN(op);
3116 }
else if (op < 0.0) {
3117 FPProcessException();
3118 return FPDefaultNaN<T>();
3120 return std::sqrt(op);
3124template <
typename T>
3125T Simulator::FPMax(T a, T b) {
3126 T
result = FPProcessNaNs(a, b);
3129 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
3133 return (a > b) ?
a : b;
3137template <
typename T>
3138T Simulator::FPMaxNM(T a, T b) {
3145 T
result = FPProcessNaNs(a, b);
3149template <
typename T>
3150T Simulator::FPMin(T a, T b) {
3151 T
result = FPProcessNaNs(a, b);
3154 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
3158 return (a < b) ?
a : b;
3162template <
typename T>
3163T Simulator::FPMinNM(T a, T b) {
3170 T
result = FPProcessNaNs(a, b);
3174template <
typename T>
3175T Simulator::FPRecipStepFused(T op1, T op2) {
3177 if ((std::isinf(op1) && (op2 == 0.0)) ||
3178 ((op1 == 0.0) && (std::isinf(op2)))) {
3180 }
else if (std::isinf(op1) || std::isinf(op2)) {
3189template <
typename T>
3190T Simulator::FPRSqrtStepFused(T op1, T op2) {
3191 const T one_point_five = 1.5;
3194 if ((std::isinf(op1) && (op2 == 0.0)) ||
3195 ((op1 == 0.0) && (std::isinf(op2)))) {
3196 return one_point_five;
3197 }
else if (std::isinf(op1) || std::isinf(op2)) {
3205 if (isnormal(op1 / two)) {
3207 }
else if (isnormal(op2 / two)) {
3212 return one_point_five;
3217double Simulator::FPRoundInt(
double value,
FPRounding round_mode) {
3221 }
else if (std::isnan(value)) {
3222 return FPProcessNaN(value);
3225 double int_result = std::floor(value);
3226 double error = value - int_result;
3227 switch (round_mode) {
3231 if ((-0.5 < value) && (value < 0.0)) {
3234 }
else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
3244 if ((-0.5 <= value) && (value < 0.0)) {
3249 }
else if ((error > 0.5) ||
3250 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
3259 int_result = ceil(value);
3270 if ((-1.0 < value) && (value < 0.0)) {
3274 }
else if (error > 0.0) {
3286 value = FPRoundInt(value, rmode);
3287 return base::saturated_cast<int16_t>(value);
3291 value = FPRoundInt(value, rmode);
3292 return base::saturated_cast<int32_t>(value);
3295int64_t Simulator::FPToInt64(
double value,
FPRounding rmode) {
3296 value = FPRoundInt(value, rmode);
3297 return base::saturated_cast<int64_t>(value);
3301 value = FPRoundInt(value, rmode);
3302 return base::saturated_cast<uint16_t>(value);
3305uint32_t Simulator::FPToUInt32(
double value,
FPRounding rmode) {
3306 value = FPRoundInt(value, rmode);
3307 return base::saturated_cast<uint32_t>(value);
3310uint64_t Simulator::FPToUInt64(
double value,
FPRounding rmode) {
3311 value = FPRoundInt(value, rmode);
3312 return base::saturated_cast<uint64_t>(value);
3315#define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \
3316 template <typename T> \
3317 LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \
3318 const LogicVRegister& src1, \
3319 const LogicVRegister& src2) { \
3320 dst.ClearForWrite(vform); \
3321 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \
3322 T op1 = src1.Float<T>(i); \
3323 T op2 = src2.Float<T>(i); \
3326 result = FPProcessNaNs(op1, op2); \
3327 if (!isnan(result)) { \
3328 result = OP(op1, op2); \
3331 result = OP(op1, op2); \
3333 dst.SetFloat(i, result); \
3338 LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \
3339 const LogicVRegister& src1, \
3340 const LogicVRegister& src2) { \
3341 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) { \
3342 FN<half>(vform, dst, src1, src2); \
3343 } else if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { \
3344 FN<float>(vform, dst, src1, src2); \
3346 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); \
3347 FN<double>(vform, dst, src1, src2); \
3351NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
3352#undef DEFINE_NEON_FP_VECTOR_OP
3354LogicVRegister Simulator::fnmul(
VectorFormat vform, LogicVRegister dst,
3355 const LogicVRegister& src1,
3356 const LogicVRegister& src2) {
3358 LogicVRegister product = fmul(vform, temp, src1, src2);
3359 return fneg(vform, dst, product);
3362template <
typename T>
3363LogicVRegister Simulator::frecps(
VectorFormat vform, LogicVRegister dst,
3364 const LogicVRegister& src1,
3365 const LogicVRegister& src2) {
3366 dst.ClearForWrite(vform);
3368 T op1 = -src1.Float<T>(
i);
3369 T op2 = src2.Float<T>(
i);
3370 T
result = FPProcessNaNs(op1, op2);
3371 dst.SetFloat(
i, isnan(
result) ?
result : FPRecipStepFused(op1, op2));
3376LogicVRegister Simulator::frecps(
VectorFormat vform, LogicVRegister dst,
3377 const LogicVRegister& src1,
3378 const LogicVRegister& src2) {
3380 frecps<half>(vform, dst, src1, src2);
3382 frecps<float>(vform, dst, src1, src2);
3385 frecps<double>(vform, dst, src1, src2);
3390template <
typename T>
3391LogicVRegister Simulator::frsqrts(
VectorFormat vform, LogicVRegister dst,
3392 const LogicVRegister& src1,
3393 const LogicVRegister& src2) {
3394 dst.ClearForWrite(vform);
3396 T op1 = -src1.Float<T>(
i);
3397 T op2 = src2.Float<T>(
i);
3398 T
result = FPProcessNaNs(op1, op2);
3399 dst.SetFloat(
i, std::isnan(
result) ?
result : FPRSqrtStepFused(op1, op2));
3404int32_t Simulator::FPToFixedJS(
double value) {
3416 if ((value != 0.0) || std::signbit(value)) {
3419 }
else if (std::isnan(value)) {
3426 double int_result = std::floor(value);
3427 double error = value - int_result;
3428 if ((error != 0.0) && (int_result < 0.0)) {
3434 double mod_const =
static_cast<double>(UINT64_C(1) << 32);
3436 (int_result / mod_const) - std::floor(int_result / mod_const);
3438 if (mod_error == 0.5) {
3439 constrained = INT32_MIN;
3441 constrained = int_result - mod_const * round(int_result / mod_const);
3443 DCHECK(std::floor(constrained) == constrained);
3444 DCHECK(constrained >= INT32_MIN);
3445 DCHECK(constrained <= INT32_MAX);
3448 if ((int_result < INT32_MIN) || (int_result > INT32_MAX) ||
3452 FPProcessException();
3463LogicVRegister Simulator::frsqrts(
VectorFormat vform, LogicVRegister dst,
3464 const LogicVRegister& src1,
3465 const LogicVRegister& src2) {
3467 frsqrts<half>(vform, dst, src1, src2);
3469 frsqrts<float>(vform, dst, src1, src2);
3472 frsqrts<double>(vform, dst, src1, src2);
3477template <
typename T>
3478LogicVRegister Simulator::fcmp(
VectorFormat vform, LogicVRegister dst,
3479 const LogicVRegister& src1,
3480 const LogicVRegister& src2,
Condition cond) {
3481 dst.ClearForWrite(vform);
3484 T op1 = src1.Float<T>(
i);
3485 T op2 = src2.Float<T>(
i);
3486 T nan_result = FPProcessNaNs(op1, op2);
3487 if (!std::isnan(nan_result)) {
3513LogicVRegister Simulator::fcmp(
VectorFormat vform, LogicVRegister dst,
3514 const LogicVRegister& src1,
3515 const LogicVRegister& src2,
Condition cond) {
3517 fcmp<half>(vform, dst, src1, src2, cond);
3519 fcmp<float>(vform, dst, src1, src2, cond);
3522 fcmp<double>(vform, dst, src1, src2, cond);
3527LogicVRegister Simulator::fcmp_zero(
VectorFormat vform, LogicVRegister dst,
3528 const LogicVRegister& src,
Condition cond) {
3531 LogicVRegister zero_reg = dup_immediate(vform, temp, uint16_t{0});
3532 fcmp<half>(vform, dst, src, zero_reg, cond);
3534 LogicVRegister zero_reg = dup_immediate(vform, temp, uint32_t{0});
3535 fcmp<float>(vform, dst, src, zero_reg, cond);
3538 LogicVRegister zero_reg = dup_immediate(vform, temp, uint64_t{0});
3539 fcmp<double>(vform, dst, src, zero_reg, cond);
3544LogicVRegister Simulator::fabscmp(
VectorFormat vform, LogicVRegister dst,
3545 const LogicVRegister& src1,
3546 const LogicVRegister& src2,
Condition cond) {
3547 SimVRegister temp1, temp2;
3549 LogicVRegister abs_src1 = fabs_<half>(vform, temp1, src1);
3550 LogicVRegister abs_src2 = fabs_<half>(vform, temp2, src2);
3551 fcmp<half>(vform, dst, abs_src1, abs_src2, cond);
3553 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
3554 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
3555 fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
3558 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
3559 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
3560 fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
3565template <
typename T>
3566LogicVRegister Simulator::fmla(
VectorFormat vform, LogicVRegister dst,
3567 const LogicVRegister& src1,
3568 const LogicVRegister& src2) {
3569 dst.ClearForWrite(vform);
3571 T op1 = src1.Float<T>(
i);
3572 T op2 = src2.Float<T>(
i);
3573 T acc = dst.Float<T>(
i);
3574 T
result = FPMulAdd(acc, op1, op2);
3580LogicVRegister Simulator::fmla(
VectorFormat vform, LogicVRegister dst,
3581 const LogicVRegister& src1,
3582 const LogicVRegister& src2) {
3584 fmla<half>(vform, dst, src1, src2);
3586 fmla<float>(vform, dst, src1, src2);
3589 fmla<double>(vform, dst, src1, src2);
3594template <
typename T>
3595LogicVRegister Simulator::fmls(
VectorFormat vform, LogicVRegister dst,
3596 const LogicVRegister& src1,
3597 const LogicVRegister& src2) {
3598 dst.ClearForWrite(vform);
3600 T op1 = -src1.Float<T>(
i);
3601 T op2 = src2.Float<T>(
i);
3602 T acc = dst.Float<T>(
i);
3603 T
result = FPMulAdd(acc, op1, op2);
3609LogicVRegister Simulator::fmls(
VectorFormat vform, LogicVRegister dst,
3610 const LogicVRegister& src1,
3611 const LogicVRegister& src2) {
3613 fmls<half>(vform, dst, src1, src2);
3615 fmls<float>(vform, dst, src1, src2);
3618 fmls<double>(vform, dst, src1, src2);
3623template <
typename T>
3624LogicVRegister Simulator::fneg(
VectorFormat vform, LogicVRegister dst,
3625 const LogicVRegister& src) {
3626 dst.ClearForWrite(vform);
3628 T op = src.Float<T>(
i);
3630 dst.SetFloat(
i, op);
3635LogicVRegister Simulator::fneg(
VectorFormat vform, LogicVRegister dst,
3636 const LogicVRegister& src) {
3638 fneg<half>(vform, dst, src);
3640 fneg<float>(vform, dst, src);
3643 fneg<double>(vform, dst, src);
3648template <
typename T>
3649LogicVRegister Simulator::fabs_(
VectorFormat vform, LogicVRegister dst,
3650 const LogicVRegister& src) {
3651 dst.ClearForWrite(vform);
3653 T op = src.Float<T>(
i);
3654 if (copysign(1.0, op) < 0.0) {
3657 dst.SetFloat(
i, op);
3662LogicVRegister Simulator::fabs_(
VectorFormat vform, LogicVRegister dst,
3663 const LogicVRegister& src) {
3665 fabs_<half>(vform, dst, src);
3667 fabs_<float>(vform, dst, src);
3670 fabs_<double>(vform, dst, src);
3675LogicVRegister Simulator::fabd(
VectorFormat vform, LogicVRegister dst,
3676 const LogicVRegister& src1,
3677 const LogicVRegister& src2) {
3679 fsub(vform, temp, src1, src2);
3680 fabs_(vform, dst, temp);
3684LogicVRegister Simulator::fsqrt(
VectorFormat vform, LogicVRegister dst,
3685 const LogicVRegister& src) {
3686 dst.ClearForWrite(vform);
3689 half
result = FPSqrt(src.Float<half>(
i));
3694 float result = FPSqrt(src.Float<
float>(
i));
3700 double result = FPSqrt(src.Float<
double>(
i));
3707#define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \
3708 LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \
3709 const LogicVRegister& src1, \
3710 const LogicVRegister& src2) { \
3711 SimVRegister temp1, temp2; \
3712 uzp1(vform, temp1, src1, src2); \
3713 uzp2(vform, temp2, src1, src2); \
3714 FN(vform, dst, temp1, temp2); \
3718 LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \
3719 const LogicVRegister& src) { \
3720 if (vform == kFormatS) { \
3721 float result = OP(src.Float<float>(0), src.Float<float>(1)); \
3722 dst.SetFloat(0, result); \
3724 DCHECK_EQ(vform, kFormatD); \
3725 double result = OP(src.Float<double>(0), src.Float<double>(1)); \
3726 dst.SetFloat(0, result); \
3728 dst.ClearForWrite(vform); \
3731NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
3732#undef DEFINE_NEON_FP_PAIR_OP
3734LogicVRegister Simulator::FMinMaxV(
VectorFormat vform, LogicVRegister dst,
3735 const LogicVRegister& src, FPMinMaxOp Op) {
3738 float result1 = (this->*Op)(src.Float<
float>(0), src.Float<
float>(1));
3739 float result2 = (this->*Op)(src.Float<
float>(2), src.Float<
float>(3));
3740 float result = (this->*Op)(result1, result2);
3742 dst.SetFloat<
float>(0,
result);
3746LogicVRegister Simulator::fmaxv(
VectorFormat vform, LogicVRegister dst,
3747 const LogicVRegister& src) {
3748 return FMinMaxV(vform, dst, src, &Simulator::FPMax);
3751LogicVRegister Simulator::fminv(
VectorFormat vform, LogicVRegister dst,
3752 const LogicVRegister& src) {
3753 return FMinMaxV(vform, dst, src, &Simulator::FPMin);
3756LogicVRegister Simulator::fmaxnmv(
VectorFormat vform, LogicVRegister dst,
3757 const LogicVRegister& src) {
3758 return FMinMaxV(vform, dst, src, &Simulator::FPMaxNM);
3761LogicVRegister Simulator::fminnmv(
VectorFormat vform, LogicVRegister dst,
3762 const LogicVRegister& src) {
3763 return FMinMaxV(vform, dst, src, &Simulator::FPMinNM);
3766LogicVRegister Simulator::fmul(
VectorFormat vform, LogicVRegister dst,
3767 const LogicVRegister& src1,
3768 const LogicVRegister& src2,
int index) {
3769 dst.ClearForWrite(vform);
3772 LogicVRegister index_reg = dup_element(
kFormat8H, temp, src2, index);
3773 fmul<half>(vform, dst, src1, index_reg);
3775 LogicVRegister index_reg = dup_element(
kFormat4S, temp, src2, index);
3776 fmul<float>(vform, dst, src1, index_reg);
3779 LogicVRegister index_reg = dup_element(
kFormat2D, temp, src2, index);
3780 fmul<double>(vform, dst, src1, index_reg);
3785LogicVRegister Simulator::fmla(
VectorFormat vform, LogicVRegister dst,
3786 const LogicVRegister& src1,
3787 const LogicVRegister& src2,
int index) {
3788 dst.ClearForWrite(vform);
3791 LogicVRegister index_reg = dup_element(
kFormat8H, temp, src2, index);
3792 fmla<half>(vform, dst, src1, index_reg);
3794 LogicVRegister index_reg = dup_element(
kFormat4S, temp, src2, index);
3795 fmla<float>(vform, dst, src1, index_reg);
3798 LogicVRegister index_reg = dup_element(
kFormat2D, temp, src2, index);
3799 fmla<double>(vform, dst, src1, index_reg);
3804LogicVRegister Simulator::fmls(
VectorFormat vform, LogicVRegister dst,
3805 const LogicVRegister& src1,
3806 const LogicVRegister& src2,
int index) {
3807 dst.ClearForWrite(vform);
3810 LogicVRegister index_reg = dup_element(
kFormat8H, temp, src2, index);
3811 fmls<half>(vform, dst, src1, index_reg);
3813 LogicVRegister index_reg = dup_element(
kFormat4S, temp, src2, index);
3814 fmls<float>(vform, dst, src1, index_reg);
3817 LogicVRegister index_reg = dup_element(
kFormat2D, temp, src2, index);
3818 fmls<double>(vform, dst, src1, index_reg);
3823LogicVRegister Simulator::fmulx(
VectorFormat vform, LogicVRegister dst,
3824 const LogicVRegister& src1,
3825 const LogicVRegister& src2,
int index) {
3826 dst.ClearForWrite(vform);
3829 LogicVRegister index_reg = dup_element(
kFormat8H, temp, src2, index);
3830 fmulx<half>(vform, dst, src1, index_reg);
3832 LogicVRegister index_reg = dup_element(
kFormat4S, temp, src2, index);
3833 fmulx<float>(vform, dst, src1, index_reg);
3836 LogicVRegister index_reg = dup_element(
kFormat2D, temp, src2, index);
3837 fmulx<double>(vform, dst, src1, index_reg);
3842LogicVRegister Simulator::frint(
VectorFormat vform, LogicVRegister dst,
3843 const LogicVRegister& src,
3845 bool inexact_exception) {
3846 dst.ClearForWrite(vform);
3849 half input = src.Float<half>(
i);
3851 if (inexact_exception && !isnan(input) && (input != rounded)) {
3852 FPProcessException();
3854 dst.SetFloat<half>(
i, rounded);
3858 float input = src.Float<
float>(
i);
3860 if (inexact_exception && !std::isnan(input) && (input != rounded)) {
3861 FPProcessException();
3863 dst.SetFloat<
float>(
i, rounded);
3868 double input = src.Float<
double>(
i);
3870 if (inexact_exception && !std::isnan(input) && (input != rounded)) {
3871 FPProcessException();
3873 dst.SetFloat<
double>(
i, rounded);
3879LogicVRegister Simulator::fcvts(
VectorFormat vform, LogicVRegister dst,
3880 const LogicVRegister& src,
3882 dst.ClearForWrite(vform);
3885 half op = src.Float<half>(
i) * std::pow(2, fbits);
3890 float op = src.Float<
float>(
i) * std::pow(2.0f, fbits);
3896 double op = src.Float<
double>(
i) * std::pow(2.0, fbits);
3903LogicVRegister Simulator::fcvtu(
VectorFormat vform, LogicVRegister dst,
3904 const LogicVRegister& src,
3906 dst.ClearForWrite(vform);
3909 half op = src.Float<half>(
i) * std::pow(2.0f, fbits);
3914 float op = src.Float<
float>(
i) * std::pow(2.0f, fbits);
3920 double op = src.Float<
double>(
i) * std::pow(2.0, fbits);
3927LogicVRegister Simulator::fcvtl(
VectorFormat vform, LogicVRegister dst,
3928 const LogicVRegister& src) {
3931 dst.SetFloat(
i, FPToFloat(src.Float<
float16>(
i)));
3936 dst.SetFloat(
i, FPToDouble(src.Float<
float>(
i)));
3942LogicVRegister Simulator::fcvtl2(
VectorFormat vform, LogicVRegister dst,
3943 const LogicVRegister& src) {
3946 for (
int i = 0;
i < lane_count;
i++) {
3947 dst.SetFloat(
i, FPToFloat(src.Float<
float16>(
i + lane_count)));
3951 for (
int i = 0;
i < lane_count;
i++) {
3952 dst.SetFloat(
i, FPToDouble(src.Float<
float>(
i + lane_count)));
3958LogicVRegister Simulator::fcvtn(
VectorFormat vform, LogicVRegister dst,
3959 const LogicVRegister& src) {
3962 dst.SetFloat(
i, FPToFloat16(src.Float<
float>(
i),
FPTieEven));
3967 dst.SetFloat(
i, FPToFloat(src.Float<
double>(
i),
FPTieEven));
3970 dst.ClearForWrite(vform);
3974LogicVRegister Simulator::fcvtn2(
VectorFormat vform, LogicVRegister dst,
3975 const LogicVRegister& src) {
3978 for (
int i = lane_count - 1;
i >= 0;
i--) {
3979 dst.SetFloat(
i + lane_count, FPToFloat16(src.Float<
float>(
i),
FPTieEven));
3983 for (
int i = lane_count - 1;
i >= 0;
i--) {
3984 dst.SetFloat(
i + lane_count, FPToFloat(src.Float<
double>(
i),
FPTieEven));
3990LogicVRegister Simulator::fcvtxn(
VectorFormat vform, LogicVRegister dst,
3991 const LogicVRegister& src) {
3992 dst.ClearForWrite(vform);
3995 dst.SetFloat(
i, FPToFloat(src.Float<
double>(
i),
FPRoundOdd));
4000LogicVRegister Simulator::fcvtxn2(
VectorFormat vform, LogicVRegister dst,
4001 const LogicVRegister& src) {
4004 for (
int i = lane_count - 1;
i >= 0;
i--) {
4005 dst.SetFloat(
i + lane_count, FPToFloat(src.Float<
double>(
i),
FPRoundOdd));
4011double Simulator::recip_sqrt_estimate(
double a) {
4015 q0 =
static_cast<int>(a * 512.0);
4016 r = 1.0 / sqrt((
static_cast<double>(q0) + 0.5) / 512.0);
4018 q1 =
static_cast<int>(a * 256.0);
4019 r = 1.0 / sqrt((
static_cast<double>(q1) + 0.5) / 256.0);
4021 s =
static_cast<int>(256.0 *
r + 0.5);
4022 return static_cast<double>(
s) / 256.0;
4027inline uint64_t Bits(uint64_t val,
int start_bit,
int end_bit) {
4033template <
typename T>
4034T Simulator::FPRecipSqrtEstimate(T op) {
4035 static_assert(std::is_same_v<float, T> || std::is_same_v<double, T>,
4036 "T must be a float or double");
4038 if (std::isnan(op)) {
4039 return FPProcessNaN(op);
4040 }
else if (op == 0.0) {
4041 if (copysign(1.0, op) < 0.0) {
4046 }
else if (copysign(1.0, op) < 0.0) {
4047 FPProcessException();
4048 return FPDefaultNaN<T>();
4049 }
else if (std::isinf(op)) {
4055 if (
sizeof(T) ==
sizeof(
float)) {
4065 while (Bits(fraction, 51, 51) == 0) {
4066 fraction = Bits(fraction, 50, 0) << 1;
4069 fraction = Bits(fraction, 50, 0) << 1;
4073 if (Bits(exp, 0, 0) == 0) {
4074 scaled =
double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
4076 scaled =
double_pack(0, 1021, Bits(fraction, 51, 44) << 44);
4079 if (
sizeof(T) ==
sizeof(
float)) {
4080 result_exp = (380 -
exp) / 2;
4082 result_exp = (3068 -
exp) / 2;
4087 if (
sizeof(T) ==
sizeof(
float)) {
4088 uint32_t exp_bits =
static_cast<uint32_t
>(Bits(result_exp, 7, 0));
4089 uint32_t est_bits =
static_cast<uint32_t
>(Bits(estimate, 51, 29));
4092 return double_pack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
4097LogicVRegister Simulator::frsqrte(
VectorFormat vform, LogicVRegister dst,
4098 const LogicVRegister& src) {
4099 dst.ClearForWrite(vform);
4102 half input = src.Float<half>(
i);
4103 dst.SetFloat<half>(
i, FPRecipSqrtEstimate<float>(input));
4107 float input = src.Float<
float>(
i);
4108 dst.SetFloat(
i, FPRecipSqrtEstimate<float>(input));
4113 double input = src.Float<
double>(
i);
4114 dst.SetFloat(
i, FPRecipSqrtEstimate<double>(input));
4120template <
typename T>
4121T Simulator::FPRecipEstimate(T op,
FPRounding rounding) {
4122 static_assert(std::is_same_v<float, T> || std::is_same_v<double, T>,
4123 "T must be a float or double");
4126 if (
sizeof(T) ==
sizeof(
float)) {
4132 if (std::isnan(op)) {
4133 return FPProcessNaN(op);
4134 }
else if (std::isinf(op)) {
4135 return (
sign == 1) ? -0.0 : 0.0;
4136 }
else if (op == 0.0) {
4137 FPProcessException();
4139 }
else if (((
sizeof(T) ==
sizeof(
float)) &&
4140 (std::fabs(op) < std::pow(2.0, -128.0))) ||
4141 ((
sizeof(T) ==
sizeof(
double)) &&
4142 (std::fabs(op) < std::pow(2.0, -1024.0)))) {
4143 bool overflow_to_inf =
false;
4146 overflow_to_inf =
true;
4149 overflow_to_inf = (
sign == 0);
4152 overflow_to_inf = (
sign == 1);
4155 overflow_to_inf =
false;
4160 FPProcessException();
4161 if (overflow_to_inf) {
4165 if (
sizeof(T) ==
sizeof(
float)) {
4176 if (
sizeof(T) ==
sizeof(
float)) {
4188 if (Bits(fraction, 51, 51) == 0) {
4190 fraction = Bits(fraction, 49, 0) << 2;
4192 fraction = Bits(fraction, 50, 0) << 1;
4196 double scaled =
double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
4198 if (
sizeof(T) ==
sizeof(
float)) {
4199 result_exp = 253 -
exp;
4201 result_exp = 2045 -
exp;
4204 double estimate = recip_estimate(scaled);
4207 if (result_exp == 0) {
4208 fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
4209 }
else if (result_exp == -1) {
4210 fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
4213 if (
sizeof(T) ==
sizeof(
float)) {
4214 uint32_t exp_bits =
static_cast<uint32_t
>(Bits(result_exp, 7, 0));
4215 uint32_t frac_bits =
static_cast<uint32_t
>(Bits(fraction, 51, 29));
4218 return double_pack(
sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
4223LogicVRegister Simulator::frecpe(
VectorFormat vform, LogicVRegister dst,
4224 const LogicVRegister& src,
FPRounding round) {
4225 dst.ClearForWrite(vform);
4228 half input = src.Float<half>(
i);
4229 dst.SetFloat<half>(
i, FPRecipEstimate<float>(input, round));
4233 float input = src.Float<
float>(
i);
4234 dst.SetFloat(
i, FPRecipEstimate<float>(input, round));
4239 double input = src.Float<
double>(
i);
4240 dst.SetFloat(
i, FPRecipEstimate<double>(input, round));
4246LogicVRegister Simulator::ursqrte(
VectorFormat vform, LogicVRegister dst,
4247 const LogicVRegister& src) {
4248 dst.ClearForWrite(vform);
4251 double dp_operand, dp_result;
4253 operand = src.Uint(vform,
i);
4254 if (operand <= 0x3FFFFFFF) {
4257 dp_operand = operand * std::pow(2.0, -32);
4258 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
4259 result =
static_cast<uint32_t
>(dp_result);
4261 dst.SetUint(vform,
i,
result);
4267double Simulator::recip_estimate(
double a) {
4270 q =
static_cast<int>(a * 512.0);
4271 r = 1.0 / ((
static_cast<double>(q) + 0.5) / 512.0);
4272 s =
static_cast<int>(256.0 *
r + 0.5);
4273 return static_cast<double>(
s) / 256.0;
4276LogicVRegister Simulator::urecpe(
VectorFormat vform, LogicVRegister dst,
4277 const LogicVRegister& src) {
4278 dst.ClearForWrite(vform);
4281 double dp_operand, dp_result;
4283 operand = src.Uint(vform,
i);
4284 if (operand <= 0x7FFFFFFF) {
4287 dp_operand = operand * std::pow(2.0, -32);
4288 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
4289 result =
static_cast<uint32_t
>(dp_result);
4291 dst.SetUint(vform,
i,
result);
4296template <
typename T>
4297LogicVRegister Simulator::frecpx(
VectorFormat vform, LogicVRegister dst,
4298 const LogicVRegister& src) {
4299 dst.ClearForWrite(vform);
4301 T op = src.Float<T>(
i);
4303 if (std::isnan(op)) {
4304 result = FPProcessNaN(op);
4308 if (
sizeof(T) ==
sizeof(
float)) {
4311 exp = (
exp == 0) ? (0xFF - 1) :
static_cast<int>(Bits(~exp, 7, 0));
4317 exp = (
exp == 0) ? (0x7FF - 1) :
static_cast<int>(Bits(~exp, 10, 0));
4326LogicVRegister Simulator::frecpx(
VectorFormat vform, LogicVRegister dst,
4327 const LogicVRegister& src) {
4329 frecpx<float>(vform, dst, src);
4332 frecpx<double>(vform, dst, src);
4337LogicVRegister Simulator::scvtf(
VectorFormat vform, LogicVRegister dst,
4338 const LogicVRegister& src,
int fbits,
4346 dst.SetFloat<
float>(
i,
result);
4350 dst.SetFloat<
double>(
i,
result);
4356LogicVRegister Simulator::ucvtf(
VectorFormat vform, LogicVRegister dst,
4357 const LogicVRegister& src,
int fbits,
4365 dst.SetFloat<
float>(
i,
result);
4369 dst.SetFloat<
double>(
i,
result);
4375LogicVRegister Simulator::dot(
VectorFormat vform, LogicVRegister dst,
4376 const LogicVRegister& src1,
4377 const LogicVRegister& src2,
bool is_src1_signed,
4378 bool is_src2_signed) {
4382 dst.ClearForWrite(vform);
4385 int64_t element1, element2;
4386 for (
int i = 0;
i < 4;
i++) {
4387 int index = 4 * e +
i;
4388 if (is_src1_signed) {
4389 element1 = src1.Int(quarter_vform, index);
4391 element1 = src1.Uint(quarter_vform, index);
4393 if (is_src2_signed) {
4394 element2 = src2.Int(quarter_vform, index);
4396 element2 = src2.Uint(quarter_vform, index);
4398 result += element1 * element2;
4400 dst.SetUint(vform, e,
result + dst.Uint(vform, e));
4405LogicVRegister Simulator::sdot(
VectorFormat vform, LogicVRegister dst,
4406 const LogicVRegister& src1,
4407 const LogicVRegister& src2) {
4408 return dot(vform, dst, src1, src2,
true,
true);
std::optional< TNode< JSArray > > a
RoundingMode rounding_mode
ZoneVector< RpoNumber > & result
V8_INLINE Dest bit_cast(Source const &source)
int CountLeadingSignBits(int64_t value, int width)
VectorFormat ScalarFormatFromLaneSize(int lanesize)
constexpr unsigned kDoubleExponentBias
VectorFormat VectorFormatHalfLanes(VectorFormat vform)
bool IsSignallingNaN(double num)
constexpr unsigned kFloat16ExponentBits
V8_EXPORT_PRIVATE int LaneCountFromFormat(VectorFormat vform)
V8_EXPORT_PRIVATE const float kFP32PositiveInfinity
constexpr unsigned kFloatMantissaBits
const float16 kFP16NegativeInfinity
uint32_t float_sign(float val)
constexpr unsigned kDoubleMantissaBits
int64_t MinIntFromFormat(VectorFormat vform)
V8_EXPORT_PRIVATE const double kFP64DefaultNaN
const float16 kFP16PositiveInfinity
V8_EXPORT_PRIVATE const float kFP32DefaultNaN
uint64_t MaxUintFromFormat(VectorFormat vform)
V8_EXPORT_PRIVATE const double kFP64PositiveInfinity
constexpr int kMaxLanesPerVector
constexpr unsigned kFloatExponentBias
double ToQuietNaN(double num)
constexpr int64_t kHQuietNanMask
const float16 kFP16DefaultNaN
constexpr unsigned kFloatExponentBits
double FusedMultiplyAdd(double op1, double op2, double a)
int64_t MaxIntFromFormat(VectorFormat vform)
uint32_t double_sign(double val)
VectorFormat VectorFormatHalfWidth(VectorFormat vform)
float float_pack(uint32_t sign, uint32_t exp, uint32_t mantissa)
double double_pack(uint64_t sign, uint64_t exp, uint64_t mantissa)
constexpr unsigned kFloat16MantissaBits
int float16classify(float16 value)
unsigned LaneSizeInBitsFromFormat(VectorFormat vform)
uint32_t float_mantissa(float val)
uint32_t double_exp(double val)
uint16_t DoubleToFloat16(double value)
V8_EXPORT_PRIVATE const float kFP32NegativeInfinity
uint32_t float_exp(float val)
uint64_t double_mantissa(double val)
V8_EXPORT_PRIVATE const double kFP64NegativeInfinity
uint64_t unsigned_bitextract_64(int msb, int lsb, uint64_t x)
VectorFormat VectorFormatDoubleWidth(VectorFormat vform)
static int CountLeadingZeros(uint64_t value, int width)
int LaneSizeInBytesFromFormat(VectorFormat vform)
constexpr unsigned kDoubleExponentBits
VectorFormat VectorFormatHalfWidthDoubleLanes(VectorFormat vform)
constexpr unsigned kFloat16ExponentBias
uint32_t unsigned_bitextract_32(int msb, int lsb, uint32_t x)
bool is_signed(Condition cond)
VectorFormat VectorFormatFillQ(int laneSize)
VectorFormat ScalarFormatFromFormat(VectorFormat vform)
#define DCHECK_LE(v1, v2)
#define DCHECK_NOT_NULL(val)
#define DCHECK_GE(v1, v2)
#define DCHECK(condition)
#define DCHECK_LT(v1, v2)
#define DCHECK_EQ(v1, v2)