657 __ MaybeCheckConstPool();
660 switch (arch_opcode) {
661 case kArchCallCodeObject: {
662 if (
instr->InputAt(0)->IsImmediate()) {
669 __ CallCodeObject(
reg);
676 case kArchCallBuiltinPointer: {
678 Register builtin_index =
i.InputRegister(0);
683 __ CallBuiltinByIndex(builtin_index, target);
688#if V8_ENABLE_WEBASSEMBLY
689 case kArchCallWasmFunction:
690 case kArchCallWasmFunctionIndirect: {
691 if (
instr->InputAt(0)->IsImmediate()) {
692 DCHECK_EQ(arch_opcode, kArchCallWasmFunction);
695 __ Call(wasm_code, constant.rmode());
696 }
else if (arch_opcode == kArchCallWasmFunctionIndirect) {
697 __ CallWasmCodePointer(
i.InputRegister(0));
706 case kArchTailCallWasm:
707 case kArchTailCallWasmIndirect: {
708 if (
instr->InputAt(0)->IsImmediate()) {
709 DCHECK_EQ(arch_opcode, kArchTailCallWasm);
712 __ Jump(wasm_code, constant.rmode());
713 }
else if (arch_opcode == kArchTailCallWasmIndirect) {
716 __ Jump(
i.InputRegister(0));
725 case kArchTailCallCodeObject: {
726 if (
instr->InputAt(0)->IsImmediate()) {
733 __ JumpCodeObject(
reg);
741 case kArchTailCallAddress: {
753 case kArchCallJSFunction: {
761 __ Assert(
eq, AbortReason::kWrongFunctionContext);
763 uint32_t num_arguments =
764 i.InputUint32(
instr->JSCallArgumentCountInputIndex());
765 __ CallJSFunction(func, num_arguments);
771 case kArchPrepareCallCFunction: {
774 __ PrepareCallCFunction(num_gp_parameters + num_fp_parameters);
779 case kArchSaveCallerRegisters: {
793 case kArchRestoreCallerRegisters: {
806 case kArchPrepareTailCall:
809 case kArchCallCFunctionWithFrameState:
810 case kArchCallCFunction: {
814 Label return_location;
815#if V8_ENABLE_WEBASSEMBLY
816 if (
linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
820 __ GetLabelAddress(pc_scratch, &return_location);
822 MemOperand(fp, WasmExitFrameConstants::kCallingPCOffset));
828 if (
instr->InputAt(0)->IsImmediate()) {
831 set_isolate_data_slots, &return_location);
835 set_isolate_data_slots, &return_location);
839 bool const needs_frame_state =
840 (arch_opcode == kArchCallCFunctionWithFrameState);
841 if (needs_frame_state) {
868 case kArchBinarySearchSwitch:
871 case kArchTableSwitch:
875 case kArchAbortCSADcheck:
876 DCHECK(
i.InputRegister(0) == r1);
881 __ CallBuiltin(Builtin::kAbortCSADcheck);
886 case kArchDebugBreak:
890 __ RecordComment(
reinterpret_cast<const char*
>(
i.InputInt32(0)),
893 case kArchThrowTerminator:
901 case kArchDeoptimize: {
911 case kArchFramePointer:
912 __ mov(
i.OutputRegister(), fp);
915 case kArchParentFramePointer:
919 __ mov(
i.OutputRegister(), fp);
922#if V8_ENABLE_WEBASSEMBLY
923 case kArchStackPointer:
926 __ mov(
i.OutputRegister(), sp);
928 case kArchSetStackPointer:
930 __ mov(sp,
i.InputRegister(0));
933 case kArchStackPointerGreaterThan: {
942 lhs_register =
i.TempRegister(0);
946 constexpr size_t kValueIndex = 0;
948 __ cmp(lhs_register,
i.InputRegister(kValueIndex));
951 case kArchStackCheckOffset:
954 case kArchTruncateDoubleToI:
959 case kArchStoreWithWriteBarrier:
960 case kArchAtomicStoreWithWriteBarrier: {
962 if (arch_opcode == kArchStoreWithWriteBarrier) {
974 __ Check(
ne, AbortReason::kOperandIsCleared);
981 if (arch_opcode == kArchAtomicStoreWithWriteBarrier) {
984 if (addressing_mode == kMode_Offset_RI) {
985 int32_t immediate =
i.InputInt32(1);
989 DCHECK_EQ(kMode_Offset_RR, addressing_mode);
994 if (arch_opcode == kArchAtomicStoreWithWriteBarrier &&
1000 auto ool =
zone()->
New<OutOfLineRecordWrite>(
1004 __ JumpIfSmi(value, ool->exit());
1008 __ bind(ool->exit());
1011 case kArchStoreIndirectWithWriteBarrier:
1013 case kArchStackSlot: {
1020 case kIeee754Float64Acos:
1023 case kIeee754Float64Acosh:
1026 case kIeee754Float64Asin:
1029 case kIeee754Float64Asinh:
1032 case kIeee754Float64Atan:
1035 case kIeee754Float64Atanh:
1038 case kIeee754Float64Atan2:
1041 case kIeee754Float64Cbrt:
1044 case kIeee754Float64Cos:
1047 case kIeee754Float64Cosh:
1050 case kIeee754Float64Exp:
1053 case kIeee754Float64Expm1:
1056 case kIeee754Float64Log:
1059 case kIeee754Float64Log1p:
1062 case kIeee754Float64Log2:
1065 case kIeee754Float64Log10:
1068 case kIeee754Float64Pow:
1071 case kIeee754Float64Sin:
1074 case kIeee754Float64Sinh:
1077 case kIeee754Float64Tan:
1080 case kIeee754Float64Tanh:
1084 __ add(
i.OutputRegister(),
i.InputRegister(0),
i.InputOperand2(1),
1088 __ and_(
i.OutputRegister(),
i.InputRegister(0),
i.InputOperand2(1),
1092 __ bic(
i.OutputRegister(),
i.InputRegister(0),
i.InputOperand2(1),
1096 __ mul(
i.OutputRegister(),
i.InputRegister(0),
i.InputRegister(1),
1100 __ mla(
i.OutputRegister(),
i.InputRegister(0),
i.InputRegister(1),
1101 i.InputRegister(2),
i.OutputSBit());
1105 __ mls(
i.OutputRegister(),
i.InputRegister(0),
i.InputRegister(1),
1106 i.InputRegister(2));
1111 __ smull(
i.OutputRegister(0),
i.OutputRegister(1),
i.InputRegister(0),
1112 i.InputRegister(1));
1115 __ smmul(
i.OutputRegister(),
i.InputRegister(0),
i.InputRegister(1));
1119 __ smmla(
i.OutputRegister(),
i.InputRegister(0),
i.InputRegister(1),
1120 i.InputRegister(2));
1124 __ umull(
i.OutputRegister(0),
i.OutputRegister(1),
i.InputRegister(0),
1125 i.InputRegister(1),
i.OutputSBit());
1129 __ sdiv(
i.OutputRegister(),
i.InputRegister(0),
i.InputRegister(1));
1135 __ udiv(
i.OutputRegister(),
i.InputRegister(0),
i.InputRegister(1));
1140 __ Move(
i.OutputRegister(),
i.InputOperand2(0),
i.OutputSBit());
1143 __ mvn(
i.OutputRegister(),
i.InputOperand2(0),
i.OutputSBit());
1146 __ orr(
i.OutputRegister(),
i.InputRegister(0),
i.InputOperand2(1),
1150 __ eor(
i.OutputRegister(),
i.InputRegister(0),
i.InputOperand2(1),
1154 __ sub(
i.OutputRegister(),
i.InputRegister(0),
i.InputOperand2(1),
1158 __ rsb(
i.OutputRegister(),
i.InputRegister(0),
i.InputOperand2(1),
1163 __ bfc(
i.OutputRegister(),
i.InputInt8(1),
i.InputInt8(2));
1169 __ ubfx(
i.OutputRegister(),
i.InputRegister(0),
i.InputInt8(1),
1176 __ sbfx(
i.OutputRegister(),
i.InputRegister(0),
i.InputInt8(1),
1182 __ sxtb(
i.OutputRegister(),
i.InputRegister(0),
i.InputInt32(1));
1186 __ sxth(
i.OutputRegister(),
i.InputRegister(0),
i.InputInt32(1));
1190 __ sxtab(
i.OutputRegister(),
i.InputRegister(0),
i.InputRegister(1),
1195 __ sxtah(
i.OutputRegister(),
i.InputRegister(0),
i.InputRegister(1),
1200 __ uxtb(
i.OutputRegister(),
i.InputRegister(0),
i.InputInt32(1));
1204 __ uxth(
i.OutputRegister(),
i.InputRegister(0),
i.InputInt32(1));
1208 __ uxtab(
i.OutputRegister(),
i.InputRegister(0),
i.InputRegister(1),
1213 __ uxtah(
i.OutputRegister(),
i.InputRegister(0),
i.InputRegister(1),
1219 __ rbit(
i.OutputRegister(),
i.InputRegister(0));
1224 __ rev(
i.OutputRegister(),
i.InputRegister(0));
1228 __ clz(
i.OutputRegister(),
i.InputRegister(0));
1232 __ cmp(
i.InputRegister(0),
i.InputOperand2(1));
1236 __ cmn(
i.InputRegister(0),
i.InputOperand2(1));
1240 __ tst(
i.InputRegister(0),
i.InputOperand2(1));
1244 __ teq(
i.InputRegister(0),
i.InputOperand2(1));
1252 __ add(
i.OutputRegister(0),
i.InputRegister(0),
i.InputRegister(2),
1254 __ adc(
i.OutputRegister(1),
i.InputRegister(1),
1263 __ sub(
i.OutputRegister(0),
i.InputRegister(0),
i.InputRegister(2),
1265 __ sbc(
i.OutputRegister(1),
i.InputRegister(1),
1274 __ umull(
i.OutputRegister(0),
i.OutputRegister(1),
i.InputRegister(0),
1275 i.InputRegister(2));
1276 __ mla(
i.OutputRegister(1),
i.InputRegister(0),
i.InputRegister(3),
1277 i.OutputRegister(1));
1278 __ mla(
i.OutputRegister(1),
i.InputRegister(2),
i.InputRegister(1),
1279 i.OutputRegister(1));
1283 instr->OutputCount() >= 2 ?
i.OutputRegister(1) :
i.TempRegister(0);
1284 if (
instr->InputAt(2)->IsImmediate()) {
1285 __ LslPair(
i.OutputRegister(0), second_output,
i.InputRegister(0),
1286 i.InputRegister(1),
i.InputInt32(2));
1288 __ LslPair(
i.OutputRegister(0), second_output,
i.InputRegister(0),
1289 i.InputRegister(1),
i.InputRegister(2));
1295 instr->OutputCount() >= 2 ?
i.OutputRegister(1) :
i.TempRegister(0);
1296 if (
instr->InputAt(2)->IsImmediate()) {
1297 __ LsrPair(
i.OutputRegister(0), second_output,
i.InputRegister(0),
1298 i.InputRegister(1),
i.InputInt32(2));
1300 __ LsrPair(
i.OutputRegister(0), second_output,
i.InputRegister(0),
1301 i.InputRegister(1),
i.InputRegister(2));
1307 instr->OutputCount() >= 2 ?
i.OutputRegister(1) :
i.TempRegister(0);
1308 if (
instr->InputAt(2)->IsImmediate()) {
1309 __ AsrPair(
i.OutputRegister(0), second_output,
i.InputRegister(0),
1310 i.InputRegister(1),
i.InputInt32(2));
1312 __ AsrPair(
i.OutputRegister(0), second_output,
i.InputRegister(0),
1313 i.InputRegister(1),
i.InputRegister(2));
1318 if (
instr->InputAt(1)->IsFPRegister()) {
1319 __ VFPCompareAndSetFlags(
i.InputFloatRegister(0),
1320 i.InputFloatRegister(1));
1325 __ VFPCompareAndSetFlags(
i.InputFloatRegister(0),
i.InputFloat32(1));
1330 __ vadd(
i.OutputFloatRegister(),
i.InputFloatRegister(0),
1331 i.InputFloatRegister(1));
1335 __ vsub(
i.OutputFloatRegister(),
i.InputFloatRegister(0),
1336 i.InputFloatRegister(1));
1340 __ vmul(
i.OutputFloatRegister(),
i.InputFloatRegister(0),
1341 i.InputFloatRegister(1));
1345 __ vmla(
i.OutputFloatRegister(),
i.InputFloatRegister(1),
1346 i.InputFloatRegister(2));
1350 __ vmls(
i.OutputFloatRegister(),
i.InputFloatRegister(1),
1351 i.InputFloatRegister(2));
1355 __ vdiv(
i.OutputFloatRegister(),
i.InputFloatRegister(0),
1356 i.InputFloatRegister(1));
1360 __ vsqrt(
i.OutputFloatRegister(),
i.InputFloatRegister(0));
1363 __ vabs(
i.OutputFloatRegister(),
i.InputFloatRegister(0));
1366 __ vneg(
i.OutputFloatRegister(),
i.InputFloatRegister(0));
1369 if (
instr->InputAt(1)->IsFPRegister()) {
1370 __ VFPCompareAndSetFlags(
i.InputDoubleRegister(0),
1371 i.InputDoubleRegister(1));
1376 __ VFPCompareAndSetFlags(
i.InputDoubleRegister(0),
i.InputDouble(1));
1381 __ vadd(
i.OutputDoubleRegister(),
i.InputDoubleRegister(0),
1382 i.InputDoubleRegister(1));
1386 __ vsub(
i.OutputDoubleRegister(),
i.InputDoubleRegister(0),
1387 i.InputDoubleRegister(1));
1391 __ vmul(
i.OutputDoubleRegister(),
i.InputDoubleRegister(0),
1392 i.InputDoubleRegister(1));
1396 __ vmla(
i.OutputDoubleRegister(),
i.InputDoubleRegister(1),
1397 i.InputDoubleRegister(2));
1401 __ vmls(
i.OutputDoubleRegister(),
i.InputDoubleRegister(1),
1402 i.InputDoubleRegister(2));
1406 __ vdiv(
i.OutputDoubleRegister(),
i.InputDoubleRegister(0),
1407 i.InputDoubleRegister(1));
1414 __ PrepareCallCFunction(0, 2);
1415 __ MovToFloatParameters(
i.InputDoubleRegister(0),
1416 i.InputDoubleRegister(1));
1417 __ CallCFunction(ExternalReference::mod_two_doubles_operation(), 0, 2);
1419 __ MovFromFloatResult(
i.OutputDoubleRegister());
1424 __ vsqrt(
i.OutputDoubleRegister(),
i.InputDoubleRegister(0));
1427 __ vabs(
i.OutputDoubleRegister(),
i.InputDoubleRegister(0));
1430 __ vneg(
i.OutputDoubleRegister(),
i.InputDoubleRegister(0));
1432 case kArmVrintmF32: {
1434 if (
instr->InputAt(0)->IsSimd128Register()) {
1435 __ vrintm(
NeonS32,
i.OutputSimd128Register(),
1436 i.InputSimd128Register(0));
1438 __ vrintm(
i.OutputFloatRegister(),
i.InputFloatRegister(0));
1442 case kArmVrintmF64: {
1444 __ vrintm(
i.OutputDoubleRegister(),
i.InputDoubleRegister(0));
1447 case kArmVrintpF32: {
1449 if (
instr->InputAt(0)->IsSimd128Register()) {
1450 __ vrintp(
NeonS32,
i.OutputSimd128Register(),
1451 i.InputSimd128Register(0));
1453 __ vrintp(
i.OutputFloatRegister(),
i.InputFloatRegister(0));
1457 case kArmVrintpF64: {
1459 __ vrintp(
i.OutputDoubleRegister(),
i.InputDoubleRegister(0));
1462 case kArmVrintzF32: {
1464 if (
instr->InputAt(0)->IsSimd128Register()) {
1465 __ vrintz(
NeonS32,
i.OutputSimd128Register(),
1466 i.InputSimd128Register(0));
1468 __ vrintz(
i.OutputFloatRegister(),
i.InputFloatRegister(0));
1472 case kArmVrintzF64: {
1474 __ vrintz(
i.OutputDoubleRegister(),
i.InputDoubleRegister(0));
1477 case kArmVrintaF64: {
1479 __ vrinta(
i.OutputDoubleRegister(),
i.InputDoubleRegister(0));
1482 case kArmVrintnF32: {
1484 if (
instr->InputAt(0)->IsSimd128Register()) {
1485 __ vrintn(
NeonS32,
i.OutputSimd128Register(),
1486 i.InputSimd128Register(0));
1488 __ vrintn(
i.OutputFloatRegister(),
i.InputFloatRegister(0));
1492 case kArmVrintnF64: {
1494 __ vrintn(
i.OutputDoubleRegister(),
i.InputDoubleRegister(0));
1497 case kArmVcvtF32F64: {
1498 __ vcvt_f32_f64(
i.OutputFloatRegister(),
i.InputDoubleRegister(0));
1502 case kArmVcvtF64F32: {
1503 __ vcvt_f64_f32(
i.OutputDoubleRegister(),
i.InputFloatRegister(0));
1507 case kArmVcvtF32S32: {
1510 __ vmov(scratch,
i.InputRegister(0));
1511 __ vcvt_f32_s32(
i.OutputFloatRegister(), scratch);
1515 case kArmVcvtF32U32: {
1518 __ vmov(scratch,
i.InputRegister(0));
1519 __ vcvt_f32_u32(
i.OutputFloatRegister(), scratch);
1523 case kArmVcvtF64S32: {
1526 __ vmov(scratch,
i.InputRegister(0));
1527 __ vcvt_f64_s32(
i.OutputDoubleRegister(), scratch);
1531 case kArmVcvtF64U32: {
1534 __ vmov(scratch,
i.InputRegister(0));
1535 __ vcvt_f64_u32(
i.OutputDoubleRegister(), scratch);
1539 case kArmVcvtS32F32: {
1542 __ vcvt_s32_f32(scratch,
i.InputFloatRegister(0));
1543 __ vmov(
i.OutputRegister(), scratch);
1545 if (set_overflow_to_min_i32) {
1554 case kArmVcvtU32F32: {
1557 __ vcvt_u32_f32(scratch,
i.InputFloatRegister(0));
1558 __ vmov(
i.OutputRegister(), scratch);
1560 if (set_overflow_to_min_u32) {
1569 case kArmVcvtS32F64: {
1572 __ vcvt_s32_f64(scratch,
i.InputDoubleRegister(0));
1573 __ vmov(
i.OutputRegister(), scratch);
1577 case kArmVcvtU32F64: {
1580 __ vcvt_u32_f64(scratch,
i.InputDoubleRegister(0));
1581 __ vmov(
i.OutputRegister(), scratch);
1585 case kArmVmovU32F32:
1586 __ vmov(
i.OutputRegister(),
i.InputFloatRegister(0));
1589 case kArmVmovF32U32:
1590 __ vmov(
i.OutputFloatRegister(),
i.InputRegister(0));
1593 case kArmVmovLowU32F64:
1594 __ VmovLow(
i.OutputRegister(),
i.InputDoubleRegister(0));
1597 case kArmVmovLowF64U32:
1598 __ VmovLow(
i.OutputDoubleRegister(),
i.InputRegister(1));
1601 case kArmVmovHighU32F64:
1602 __ VmovHigh(
i.OutputRegister(),
i.InputDoubleRegister(0));
1605 case kArmVmovHighF64U32:
1606 __ VmovHigh(
i.OutputDoubleRegister(),
i.InputRegister(1));
1609 case kArmVmovF64U32U32:
1610 __ vmov(
i.OutputDoubleRegister(),
i.InputRegister(0),
i.InputRegister(1));
1613 case kArmVmovU32U32F64:
1614 __ vmov(
i.OutputRegister(0),
i.OutputRegister(1),
1615 i.InputDoubleRegister(0));
1619 __ vcnt(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
1623 __ ldrb(
i.OutputRegister(),
i.InputOffset());
1627 __ ldrsb(
i.OutputRegister(),
i.InputOffset());
1631 __ strb(
i.InputRegister(0),
i.InputOffset(1));
1635 __ ldrh(
i.OutputRegister(),
i.InputOffset());
1638 __ ldrsh(
i.OutputRegister(),
i.InputOffset());
1641 __ strh(
i.InputRegister(0),
i.InputOffset(1));
1645 __ ldr(
i.OutputRegister(),
i.InputOffset());
1648 __ str(
i.InputRegister(0),
i.InputOffset(1));
1652 __ vldr(
i.OutputFloatRegister(),
i.InputOffset());
1657 __ vstr(
i.InputFloatRegister(0),
i.InputOffset(1));
1662 i.NeonInputOperand(0));
1667 i.NeonInputOperand(1));
1670 case kArmVld1S128: {
1672 i.NeonInputOperand(0));
1675 case kArmVst1S128: {
1677 i.NeonInputOperand(1));
1681 __ vldr(
i.OutputDoubleRegister(),
i.InputOffset());
1686 __ vstr(
i.InputDoubleRegister(0),
i.InputOffset(1));
1689 case kArmFloat32Max: {
1693 if (left == right) {
1696 auto ool =
zone()->
New<OutOfLineFloat32Max>(
this,
result, left, right);
1697 __ FloatMax(
result, left, right, ool->entry());
1698 __ bind(ool->exit());
1703 case kArmFloat64Max: {
1707 if (left == right) {
1710 auto ool =
zone()->
New<OutOfLineFloat64Max>(
this,
result, left, right);
1711 __ FloatMax(
result, left, right, ool->entry());
1712 __ bind(ool->exit());
1717 case kArmFloat32Min: {
1721 if (left == right) {
1724 auto ool =
zone()->
New<OutOfLineFloat32Min>(
this,
result, left, right);
1725 __ FloatMin(
result, left, right, ool->entry());
1726 __ bind(ool->exit());
1731 case kArmFloat64Min: {
1735 if (left == right) {
1738 auto ool =
zone()->
New<OutOfLineFloat64Min>(
this,
result, left, right);
1739 __ FloatMin(
result, left, right, ool->entry());
1740 __ bind(ool->exit());
1745 case kArmFloat64SilenceNaN: {
1748 __ VFPCanonicalizeNaN(
result, value);
1752 int stack_decrement =
i.InputInt32(0);
1763 __ vpush(
i.InputFloatRegister(1));
1766 __ vpush(
i.InputDoubleRegister(1));
1769 __ vpush(
i.InputSimd128Register(1));
1772 __ push(
i.InputRegister(1));
1786 int reverse_slot =
i.InputInt32(0);
1789 if (
instr->OutputAt(0)->IsFPRegister()) {
1817 case kArmVmullLow: {
1819 __ vmull(dt,
i.OutputSimd128Register(),
i.InputSimd128Register(0).
low(),
1820 i.InputSimd128Register(1).
low());
1823 case kArmVmullHigh: {
1825 __ vmull(dt,
i.OutputSimd128Register(),
i.InputSimd128Register(0).high(),
1826 i.InputSimd128Register(1).high());
1830 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
1832 __ vpadal(dt,
i.OutputSimd128Register(),
i.InputSimd128Register(1));
1837 __ vpaddl(dt,
i.OutputSimd128Register(),
i.InputSimd128Register(0));
1840 case kArmF64x2Splat: {
1843 __ Move(dst.low(), src);
1844 __ Move(dst.high(), src);
1847 case kArmF64x2ExtractLane: {
1848 __ ExtractLane(
i.OutputDoubleRegister(),
i.InputSimd128Register(0),
1852 case kArmF64x2ReplaceLane: {
1853 __ ReplaceLane(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
1854 i.InputDoubleRegister(2),
i.InputInt8(1));
1857 case kArmF64x2Abs: {
1858 __ vabs(
i.OutputSimd128Register().
low(),
i.InputSimd128Register(0).
low());
1859 __ vabs(
i.OutputSimd128Register().high(),
1860 i.InputSimd128Register(0).high());
1863 case kArmF64x2Neg: {
1864 __ vneg(
i.OutputSimd128Register().
low(),
i.InputSimd128Register(0).
low());
1865 __ vneg(
i.OutputSimd128Register().high(),
1866 i.InputSimd128Register(0).high());
1869 case kArmF64x2Sqrt: {
1870 __ vsqrt(
i.OutputSimd128Register().
low(),
1871 i.InputSimd128Register(0).
low());
1872 __ vsqrt(
i.OutputSimd128Register().high(),
1873 i.InputSimd128Register(0).high());
1876 case kArmF64x2Add: {
1880 case kArmF64x2Sub: {
1884 case kArmF64x2Mul: {
1888 case kArmF64x2Div: {
1892 case kArmF64x2Min: {
1896 if (left == right) {
1899 auto ool_low =
zone()->
New<OutOfLineFloat64Min>(
1900 this,
result.low(), left.low(), right.low());
1901 auto ool_high =
zone()->
New<OutOfLineFloat64Min>(
1902 this,
result.high(), left.high(), right.high());
1903 __ FloatMin(
result.low(), left.low(), right.low(), ool_low->entry());
1904 __ bind(ool_low->exit());
1905 __ FloatMin(
result.high(), left.high(), right.high(),
1907 __ bind(ool_high->exit());
1912 case kArmF64x2Max: {
1916 if (left == right) {
1919 auto ool_low =
zone()->
New<OutOfLineFloat64Max>(
1920 this,
result.low(), left.low(), right.low());
1921 auto ool_high =
zone()->
New<OutOfLineFloat64Max>(
1922 this,
result.high(), left.high(), right.high());
1923 __ FloatMax(
result.low(), left.low(), right.low(), ool_low->entry());
1924 __ bind(ool_low->exit());
1925 __ FloatMax(
result.high(), left.high(), right.high(),
1927 __ bind(ool_high->exit());
1932#undef ASSEMBLE_F64X2_ARITHMETIC_BINOP
1937 __ VFPCompareAndSetFlags(
i.InputSimd128Register(0).
low(),
1938 i.InputSimd128Register(1).
low());
1940 __ vmov(
i.OutputSimd128Register().
low(), scratch, scratch);
1943 __ VFPCompareAndSetFlags(
i.InputSimd128Register(0).high(),
1944 i.InputSimd128Register(1).high());
1946 __ vmov(
i.OutputSimd128Register().high(), scratch, scratch);
1953 __ VFPCompareAndSetFlags(
i.InputSimd128Register(0).
low(),
1954 i.InputSimd128Register(1).
low());
1956 __ vmov(
i.OutputSimd128Register().
low(), scratch, scratch);
1959 __ VFPCompareAndSetFlags(
i.InputSimd128Register(0).high(),
1960 i.InputSimd128Register(1).high());
1962 __ vmov(
i.OutputSimd128Register().high(), scratch, scratch);
1968 __ VFPCompareAndSetFlags(
i.InputSimd128Register(0).
low(),
1969 i.InputSimd128Register(1).
low());
1972 __ vmov(
i.OutputSimd128Register().
low(), scratch, scratch);
1974 __ VFPCompareAndSetFlags(
i.InputSimd128Register(0).high(),
1975 i.InputSimd128Register(1).high());
1978 __ vmov(
i.OutputSimd128Register().high(), scratch, scratch);
1984 __ VFPCompareAndSetFlags(
i.InputSimd128Register(0).
low(),
1985 i.InputSimd128Register(1).
low());
1988 __ vmov(
i.OutputSimd128Register().
low(), scratch, scratch);
1990 __ VFPCompareAndSetFlags(
i.InputSimd128Register(0).high(),
1991 i.InputSimd128Register(1).high());
1994 __ vmov(
i.OutputSimd128Register().high(), scratch, scratch);
1997 case kArmF64x2Pmin: {
2004 __ VFPCompareAndSetFlags(rhs.low(), lhs.low());
2005 __ vmov(dst.low(), rhs.low(),
mi);
2006 __ VFPCompareAndSetFlags(rhs.high(), lhs.high());
2007 __ vmov(dst.high(), rhs.high(),
mi);
2010 case kArmF64x2Pmax: {
2017 __ VFPCompareAndSetFlags(rhs.low(), lhs.low());
2018 __ vmov(dst.low(), rhs.low(),
gt);
2019 __ VFPCompareAndSetFlags(rhs.high(), lhs.high());
2020 __ vmov(dst.high(), rhs.high(),
gt);
2023 case kArmF64x2Qfma: {
2028 __ vmul(dst.low(), src0.low(), src1.low());
2029 __ vmul(dst.high(), src0.high(), src1.high());
2030 __ vadd(dst.low(), src2.low(), dst.low());
2031 __ vadd(dst.high(), src2.high(), dst.high());
2034 case kArmF64x2Qfms: {
2039 __ vmul(dst.low(), src0.low(), src1.low());
2040 __ vmul(dst.high(), src0.high(), src1.high());
2041 __ vsub(dst.low(), src2.low(), dst.low());
2042 __ vsub(dst.high(), src2.high(), dst.high());
2045 case kArmF64x2Ceil: {
2049 __ vrintp(dst.low(), src.low());
2050 __ vrintp(dst.high(), src.high());
2053 case kArmF64x2Floor: {
2057 __ vrintm(dst.low(), src.low());
2058 __ vrintm(dst.high(), src.high());
2061 case kArmF64x2Trunc: {
2065 __ vrintz(dst.low(), src.low());
2066 __ vrintz(dst.high(), src.high());
2069 case kArmF64x2NearestInt: {
2073 __ vrintn(dst.low(), src.low());
2074 __ vrintn(dst.high(), src.high());
2077 case kArmF64x2ConvertLowI32x4S: {
2078 __ F64x2ConvertLowI32x4S(
i.OutputSimd128Register(),
2079 i.InputSimd128Register(0));
2082 case kArmF64x2ConvertLowI32x4U: {
2083 __ F64x2ConvertLowI32x4U(
i.OutputSimd128Register(),
2084 i.InputSimd128Register(0));
2087 case kArmF64x2PromoteLowF32x4: {
2088 __ F64x2PromoteLowF32x4(
i.OutputSimd128Register(),
2089 i.InputSimd128Register(0));
2092 case kArmI64x2SplatI32Pair: {
2094 __ vdup(
Neon32, dst,
i.InputRegister(0));
2095 __ ReplaceLane(dst, dst,
i.InputRegister(1),
NeonS32, 1);
2096 __ ReplaceLane(dst, dst,
i.InputRegister(1),
NeonS32, 3);
2099 case kArmI64x2ReplaceLaneI32Pair: {
2101 int8_t lane =
i.InputInt8(1);
2102 __ ReplaceLane(dst, dst,
i.InputRegister(2),
NeonS32, lane * 2);
2103 __ ReplaceLane(dst, dst,
i.InputRegister(3),
NeonS32, lane * 2 + 1);
2106 case kArmI64x2Add: {
2107 __ vadd(
Neon64,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2108 i.InputSimd128Register(1));
2111 case kArmI64x2Sub: {
2112 __ vsub(
Neon64,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2113 i.InputSimd128Register(1));
2116 case kArmI64x2Mul: {
2131 __ vmov(tmp1, left);
2132 __ vmov(tmp2, right);
2162 case kArmI64x2Abs: {
2163 __ I64x2Abs(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2166 case kArmI64x2Neg: {
2168 __ vmov(dst, uint64_t{0});
2169 __ vsub(
Neon64, dst, dst,
i.InputSimd128Register(0));
2172 case kArmI64x2Shl: {
2176 case kArmI64x2ShrS: {
2182 case kArmI64x2ShrU: {
2188 case kArmI64x2BitMask: {
2189 __ I64x2BitMask(
i.OutputRegister(),
i.InputSimd128Register(0));
2192 case kArmI64x2SConvertI32x4Low: {
2194 i.InputSimd128Register(0).
low());
2197 case kArmI64x2SConvertI32x4High: {
2199 i.InputSimd128Register(0).high());
2202 case kArmI64x2UConvertI32x4Low: {
2204 i.InputSimd128Register(0).
low());
2207 case kArmI64x2UConvertI32x4High: {
2209 i.InputSimd128Register(0).high());
2212 case kArmF32x4Splat: {
2213 int src_code =
i.InputFloatRegister(0).
code();
2214 __ vdup(
Neon32,
i.OutputSimd128Register(),
2218 case kArmF32x4ExtractLane: {
2219 __ ExtractLane(
i.OutputFloatRegister(),
i.InputSimd128Register(0),
2223 case kArmF32x4ReplaceLane: {
2224 __ ReplaceLane(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2225 i.InputFloatRegister(2),
i.InputInt8(1));
2228 case kArmF32x4SConvertI32x4: {
2229 __ vcvt_f32_s32(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2232 case kArmF32x4UConvertI32x4: {
2233 __ vcvt_f32_u32(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2236 case kArmF32x4Abs: {
2237 __ vabs(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2240 case kArmF32x4Neg: {
2241 __ vneg(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2244 case kArmF32x4Sqrt: {
2249#define S_FROM_Q(reg, lane) SwVfpRegister::from_code(reg.code() * 4 + lane)
2257 case kArmF32x4Add: {
2258 __ vadd(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2259 i.InputSimd128Register(1));
2262 case kArmF32x4Sub: {
2263 __ vsub(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2264 i.InputSimd128Register(1));
2267 case kArmF32x4Mul: {
2268 __ vmul(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2269 i.InputSimd128Register(1));
2272 case kArmF32x4Div: {
2279#define S_FROM_Q(reg, lane) SwVfpRegister::from_code(reg.code() * 4 + lane)
2287 case kArmF32x4Min: {
2288 __ vmin(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2289 i.InputSimd128Register(1));
2292 case kArmF32x4Max: {
2293 __ vmax(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2294 i.InputSimd128Register(1));
2298 __ vceq(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2299 i.InputSimd128Register(1));
2304 __ vceq(dst,
i.InputSimd128Register(0),
i.InputSimd128Register(1));
2309 __ vcgt(
i.OutputSimd128Register(),
i.InputSimd128Register(1),
2310 i.InputSimd128Register(0));
2314 __ vcge(
i.OutputSimd128Register(),
i.InputSimd128Register(1),
2315 i.InputSimd128Register(0));
2318 case kArmF32x4Pmin: {
2328 __ vcgt(dst, lhs, rhs);
2329 __ vbsl(dst, rhs, lhs);
2332 case kArmF32x4Pmax: {
2341 __ vcgt(dst, rhs, lhs);
2342 __ vbsl(dst, rhs, lhs);
2345 case kArmF32x4Qfma: {
2347 __ vmul(dst,
i.InputSimd128Register(0),
i.InputSimd128Register(1));
2348 __ vadd(dst,
i.InputSimd128Register(2), dst);
2351 case kArmF32x4Qfms: {
2353 __ vmul(dst,
i.InputSimd128Register(0),
i.InputSimd128Register(1));
2354 __ vsub(dst,
i.InputSimd128Register(2), dst);
2357 case kArmF32x4DemoteF64x2Zero: {
2362 __ vmov(dst.high(), 0);
2365 case kArmI32x4Splat: {
2366 __ vdup(
Neon32,
i.OutputSimd128Register(),
i.InputRegister(0));
2369 case kArmI32x4ExtractLane: {
2370 __ ExtractLane(
i.OutputRegister(),
i.InputSimd128Register(0),
NeonS32,
2374 case kArmI32x4ReplaceLane: {
2375 __ ReplaceLane(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2376 i.InputRegister(2),
NeonS32,
i.InputInt8(1));
2379 case kArmI32x4SConvertF32x4: {
2380 __ vcvt_s32_f32(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2383 case kArmI32x4SConvertI16x8Low: {
2385 i.InputSimd128Register(0).
low());
2388 case kArmI32x4SConvertI16x8High: {
2390 i.InputSimd128Register(0).high());
2393 case kArmI32x4Neg: {
2394 __ vneg(
Neon32,
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2397 case kArmI32x4Shl: {
2401 case kArmI32x4ShrS: {
2405 case kArmI32x4Add: {
2406 __ vadd(
Neon32,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2407 i.InputSimd128Register(1));
2410 case kArmI32x4Sub: {
2411 __ vsub(
Neon32,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2412 i.InputSimd128Register(1));
2415 case kArmI32x4Mul: {
2416 __ vmul(
Neon32,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2417 i.InputSimd128Register(1));
2420 case kArmI32x4MinS: {
2421 __ vmin(
NeonS32,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2422 i.InputSimd128Register(1));
2425 case kArmI32x4MaxS: {
2426 __ vmax(
NeonS32,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2427 i.InputSimd128Register(1));
2431 __ I64x2Eq(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2432 i.InputSimd128Register(1));
2436 __ I64x2Ne(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2437 i.InputSimd128Register(1));
2440 case kArmI64x2GtS: {
2441 __ I64x2GtS(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2442 i.InputSimd128Register(1));
2445 case kArmI64x2GeS: {
2446 __ I64x2GeS(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2447 i.InputSimd128Register(1));
2451 __ vceq(
Neon32,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2452 i.InputSimd128Register(1));
2457 __ vceq(
Neon32, dst,
i.InputSimd128Register(0),
2458 i.InputSimd128Register(1));
2462 case kArmI32x4GtS: {
2463 __ vcgt(
NeonS32,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2464 i.InputSimd128Register(1));
2467 case kArmI32x4GeS: {
2468 __ vcge(
NeonS32,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2469 i.InputSimd128Register(1));
2472 case kArmI32x4UConvertF32x4: {
2473 __ vcvt_u32_f32(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2476 case kArmI32x4UConvertI16x8Low: {
2478 i.InputSimd128Register(0).
low());
2481 case kArmI32x4UConvertI16x8High: {
2483 i.InputSimd128Register(0).high());
2486 case kArmI32x4ShrU: {
2490 case kArmI32x4MinU: {
2491 __ vmin(
NeonU32,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2492 i.InputSimd128Register(1));
2495 case kArmI32x4MaxU: {
2496 __ vmax(
NeonU32,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2497 i.InputSimd128Register(1));
2500 case kArmI32x4GtU: {
2501 __ vcgt(
NeonU32,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2502 i.InputSimd128Register(1));
2505 case kArmI32x4GeU: {
2506 __ vcge(
NeonU32,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2507 i.InputSimd128Register(1));
2510 case kArmI32x4Abs: {
2511 __ vabs(
Neon32,
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2514 case kArmI32x4BitMask: {
2527 __ vpadd(
Neon32, tmp.low(), tmp.low(), tmp.high());
2529 __ VmovLow(dst, tmp.low());
2532 case kArmI32x4DotI16x8S: {
2538 __ vmull(
NeonS16, scratch, lhs.low(), rhs.low());
2539 __ vpadd(
Neon32, dst.low(), scratch.low(), scratch.high());
2540 __ vmull(
NeonS16, scratch, lhs.high(), rhs.high());
2541 __ vpadd(
Neon32, dst.high(), scratch.low(), scratch.high());
2544 case kArmI16x8DotI8x16S: {
2550 __ vmull(
NeonS8, scratch, lhs.low(), rhs.low());
2551 __ vpadd(
Neon16, dst.low(), scratch.low(), scratch.high());
2552 __ vmull(
NeonS8, scratch, lhs.high(), rhs.high());
2553 __ vpadd(
Neon16, dst.high(), scratch.low(), scratch.high());
2556 case kArmI32x4DotI8x16AddS: {
2564 __ vmull(
NeonS8, scratch, lhs.low(), rhs.low());
2565 __ vpadd(
Neon16, tmp1.low(), scratch.low(), scratch.high());
2566 __ vmull(
NeonS8, scratch, lhs.high(), rhs.high());
2567 __ vpadd(
Neon16, tmp1.high(), scratch.low(), scratch.high());
2571 case kArmI32x4TruncSatF64x2SZero: {
2576 __ vmov(dst.high(), 0);
2579 case kArmI32x4TruncSatF64x2UZero: {
2584 __ vmov(dst.high(), 0);
2587 case kArmI16x8Splat: {
2588 __ vdup(
Neon16,
i.OutputSimd128Register(),
i.InputRegister(0));
2591 case kArmI16x8ExtractLaneU: {
2592 __ ExtractLane(
i.OutputRegister(),
i.InputSimd128Register(0),
NeonU16,
2596 case kArmI16x8ExtractLaneS: {
2597 __ ExtractLane(
i.OutputRegister(),
i.InputSimd128Register(0),
NeonS16,
2601 case kArmI16x8ReplaceLane: {
2602 __ ReplaceLane(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2603 i.InputRegister(2),
NeonS16,
i.InputInt8(1));
2606 case kArmI16x8SConvertI8x16Low: {
2607 __ vmovl(
NeonS8,
i.OutputSimd128Register(),
2608 i.InputSimd128Register(0).
low());
2611 case kArmI16x8SConvertI8x16High: {
2612 __ vmovl(
NeonS8,
i.OutputSimd128Register(),
2613 i.InputSimd128Register(0).high());
2616 case kArmI16x8Neg: {
2617 __ vneg(
Neon16,
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2620 case kArmI16x8Shl: {
2624 case kArmI16x8ShrS: {
2628 case kArmI16x8SConvertI32x4:
2631 case kArmI16x8Add: {
2632 __ vadd(
Neon16,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2633 i.InputSimd128Register(1));
2636 case kArmI16x8AddSatS: {
2637 __ vqadd(
NeonS16,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2638 i.InputSimd128Register(1));
2641 case kArmI16x8Sub: {
2642 __ vsub(
Neon16,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2643 i.InputSimd128Register(1));
2646 case kArmI16x8SubSatS: {
2647 __ vqsub(
NeonS16,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2648 i.InputSimd128Register(1));
2651 case kArmI16x8Mul: {
2652 __ vmul(
Neon16,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2653 i.InputSimd128Register(1));
2656 case kArmI16x8MinS: {
2657 __ vmin(
NeonS16,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2658 i.InputSimd128Register(1));
2661 case kArmI16x8MaxS: {
2662 __ vmax(
NeonS16,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2663 i.InputSimd128Register(1));
2667 __ vceq(
Neon16,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2668 i.InputSimd128Register(1));
2673 __ vceq(
Neon16, dst,
i.InputSimd128Register(0),
2674 i.InputSimd128Register(1));
2678 case kArmI16x8GtS: {
2679 __ vcgt(
NeonS16,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2680 i.InputSimd128Register(1));
2683 case kArmI16x8GeS: {
2684 __ vcge(
NeonS16,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2685 i.InputSimd128Register(1));
2688 case kArmI16x8UConvertI8x16Low: {
2689 __ vmovl(
NeonU8,
i.OutputSimd128Register(),
2690 i.InputSimd128Register(0).
low());
2693 case kArmI16x8UConvertI8x16High: {
2694 __ vmovl(
NeonU8,
i.OutputSimd128Register(),
2695 i.InputSimd128Register(0).high());
2698 case kArmI16x8ShrU: {
2702 case kArmI16x8UConvertI32x4:
2705 case kArmI16x8AddSatU: {
2706 __ vqadd(
NeonU16,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2707 i.InputSimd128Register(1));
2710 case kArmI16x8SubSatU: {
2711 __ vqsub(
NeonU16,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2712 i.InputSimd128Register(1));
2715 case kArmI16x8MinU: {
2716 __ vmin(
NeonU16,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2717 i.InputSimd128Register(1));
2720 case kArmI16x8MaxU: {
2721 __ vmax(
NeonU16,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2722 i.InputSimd128Register(1));
2725 case kArmI16x8GtU: {
2726 __ vcgt(
NeonU16,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2727 i.InputSimd128Register(1));
2730 case kArmI16x8GeU: {
2731 __ vcge(
NeonU16,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2732 i.InputSimd128Register(1));
2735 case kArmI16x8RoundingAverageU: {
2736 __ vrhadd(
NeonU16,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2737 i.InputSimd128Register(1));
2740 case kArmI16x8Abs: {
2741 __ vabs(
Neon16,
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2744 case kArmI16x8BitMask: {
2757 __ vpadd(
Neon16, tmp.low(), tmp.low(), tmp.high());
2758 __ vpadd(
Neon16, tmp.low(), tmp.low(), tmp.low());
2759 __ vpadd(
Neon16, tmp.low(), tmp.low(), tmp.low());
2763 case kArmI16x8Q15MulRSatS: {
2764 __ vqrdmulh(
NeonS16,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2765 i.InputSimd128Register(1));
2768 case kArmI8x16Splat: {
2769 __ vdup(
Neon8,
i.OutputSimd128Register(),
i.InputRegister(0));
2772 case kArmI8x16ExtractLaneU: {
2773 __ ExtractLane(
i.OutputRegister(),
i.InputSimd128Register(0),
NeonU8,
2777 case kArmI8x16ExtractLaneS: {
2778 __ ExtractLane(
i.OutputRegister(),
i.InputSimd128Register(0),
NeonS8,
2782 case kArmI8x16ReplaceLane: {
2783 __ ReplaceLane(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2784 i.InputRegister(2),
NeonS8,
i.InputInt8(1));
2787 case kArmI8x16Neg: {
2788 __ vneg(
Neon8,
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2791 case kArmI8x16Shl: {
2795 case kArmI8x16ShrS: {
2799 case kArmI8x16SConvertI16x8:
2802 case kArmI8x16Add: {
2803 __ vadd(
Neon8,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2804 i.InputSimd128Register(1));
2807 case kArmI8x16AddSatS: {
2808 __ vqadd(
NeonS8,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2809 i.InputSimd128Register(1));
2812 case kArmI8x16Sub: {
2813 __ vsub(
Neon8,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2814 i.InputSimd128Register(1));
2817 case kArmI8x16SubSatS: {
2818 __ vqsub(
NeonS8,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2819 i.InputSimd128Register(1));
2822 case kArmI8x16MinS: {
2823 __ vmin(
NeonS8,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2824 i.InputSimd128Register(1));
2827 case kArmI8x16MaxS: {
2828 __ vmax(
NeonS8,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2829 i.InputSimd128Register(1));
2833 __ vceq(
Neon8,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2834 i.InputSimd128Register(1));
2839 __ vceq(
Neon8, dst,
i.InputSimd128Register(0),
i.InputSimd128Register(1));
2843 case kArmI8x16GtS: {
2844 __ vcgt(
NeonS8,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2845 i.InputSimd128Register(1));
2848 case kArmI8x16GeS: {
2849 __ vcge(
NeonS8,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2850 i.InputSimd128Register(1));
2853 case kArmI8x16ShrU: {
2857 case kArmI8x16UConvertI16x8:
2860 case kArmI8x16AddSatU: {
2861 __ vqadd(
NeonU8,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2862 i.InputSimd128Register(1));
2865 case kArmI8x16SubSatU: {
2866 __ vqsub(
NeonU8,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2867 i.InputSimd128Register(1));
2870 case kArmI8x16MinU: {
2871 __ vmin(
NeonU8,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2872 i.InputSimd128Register(1));
2875 case kArmI8x16MaxU: {
2876 __ vmax(
NeonU8,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2877 i.InputSimd128Register(1));
2880 case kArmI8x16GtU: {
2881 __ vcgt(
NeonU8,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2882 i.InputSimd128Register(1));
2885 case kArmI8x16GeU: {
2886 __ vcge(
NeonU8,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2887 i.InputSimd128Register(1));
2890 case kArmI8x16RoundingAverageU: {
2891 __ vrhadd(
NeonU8,
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2892 i.InputSimd128Register(1));
2895 case kArmI8x16Abs: {
2896 __ vabs(
Neon8,
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2899 case kArmI8x16BitMask: {
2912 __ vext(
mask, tmp, tmp, 8);
2914 __ vpadd(
Neon16, tmp.low(), tmp.low(), tmp.high());
2915 __ vpadd(
Neon16, tmp.low(), tmp.low(), tmp.low());
2916 __ vpadd(
Neon16, tmp.low(), tmp.low(), tmp.low());
2920 case kArmS128Const: {
2922 uint64_t imm1 =
make_uint64(
i.InputUint32(1),
i.InputUint32(0));
2923 uint64_t imm2 =
make_uint64(
i.InputUint32(3),
i.InputUint32(2));
2928 case kArmS128Zero: {
2929 __ veor(
i.OutputSimd128Register(),
i.OutputSimd128Register(),
2930 i.OutputSimd128Register());
2933 case kArmS128AllOnes: {
2934 __ vmov(
i.OutputSimd128Register(), uint64_t{0xffff'ffff'ffff'ffff});
2940 int index =
i.InputInt32(2);
2942 int d_lanes = lanes / 2;
2943 int src_d_index = index & (d_lanes - 1);
2944 int src_d_code =
i.InputSimd128Register(0).
low().code() + index / d_lanes;
2945 __ vdup(size,
i.OutputSimd128Register(),
2950 __ vand(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2951 i.InputSimd128Register(1));
2955 __ vorr(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2956 i.InputSimd128Register(1));
2960 __ veor(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2961 i.InputSimd128Register(1));
2965 __ vmvn(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2968 case kArmS128Select: {
2970 DCHECK(dst ==
i.InputSimd128Register(0));
2971 __ vbsl(dst,
i.InputSimd128Register(1),
i.InputSimd128Register(2));
2974 case kArmS128AndNot: {
2975 __ vbic(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2976 i.InputSimd128Register(1));
2979 case kArmS32x4ZipLeft: {
2981 src1 =
i.InputSimd128Register(1);
2982 DCHECK(dst ==
i.InputSimd128Register(0));
2984 __ vmov(dst.high(), src1.low());
2985 __ vtrn(
Neon32, dst.low(), dst.high());
2988 case kArmS32x4ZipRight: {
2990 src1 =
i.InputSimd128Register(1);
2991 DCHECK(dst ==
i.InputSimd128Register(0));
2993 __ vmov(dst.low(), src1.high());
2994 __ vtrn(
Neon32, dst.low(), dst.high());
2997 case kArmS32x4UnzipLeft: {
2999 src1 =
i.InputSimd128Register(1);
3000 DCHECK(dst ==
i.InputSimd128Register(0));
3004 __ vmov(scratch, src1);
3008 case kArmS32x4UnzipRight: {
3010 src1 =
i.InputSimd128Register(1);
3011 DCHECK(dst ==
i.InputSimd128Register(0));
3015 __ vmov(scratch, src1);
3019 case kArmS32x4TransposeLeft: {
3021 src1 =
i.InputSimd128Register(1);
3022 DCHECK(dst ==
i.InputSimd128Register(0));
3026 __ vmov(scratch, src1);
3030 case kArmS32x4Shuffle: {
3032 src0 =
i.InputSimd128Register(0),
3033 src1 =
i.InputSimd128Register(1);
3037 int dst_code = dst.
code() * 4;
3038 int src0_code = src0.code() * 4;
3039 int src1_code = src1.code() * 4;
3040 int32_t shuffle =
i.InputInt32(2);
3041 for (
int i = 0;
i < 4;
i++) {
3042 int lane = shuffle & 0x7;
3043 int src_code = src0_code;
3045 src_code = src1_code;
3048 __ VmovExtended(dst_code +
i, src_code + lane);
3053 case kArmS32x4TransposeRight: {
3055 src1 =
i.InputSimd128Register(1);
3058 DCHECK(dst ==
i.InputSimd128Register(0));
3060 __ vmov(scratch, src1);
3064 case kArmS16x8ZipLeft: {
3066 src1 =
i.InputSimd128Register(1);
3068 DCHECK(dst ==
i.InputSimd128Register(0));
3069 __ vmov(dst.high(), src1.low());
3070 __ vzip(
Neon16, dst.low(), dst.high());
3073 case kArmS16x8ZipRight: {
3075 src1 =
i.InputSimd128Register(1);
3076 DCHECK(dst ==
i.InputSimd128Register(0));
3078 __ vmov(dst.low(), src1.high());
3079 __ vzip(
Neon16, dst.low(), dst.high());
3082 case kArmS16x8UnzipLeft: {
3084 src1 =
i.InputSimd128Register(1);
3087 DCHECK(dst ==
i.InputSimd128Register(0));
3089 __ vmov(scratch, src1);
3093 case kArmS16x8UnzipRight: {
3095 src1 =
i.InputSimd128Register(1);
3098 DCHECK(dst ==
i.InputSimd128Register(0));
3100 __ vmov(scratch, src1);
3104 case kArmS16x8TransposeLeft: {
3106 src1 =
i.InputSimd128Register(1);
3109 DCHECK(dst ==
i.InputSimd128Register(0));
3111 __ vmov(scratch, src1);
3115 case kArmS16x8TransposeRight: {
3117 src1 =
i.InputSimd128Register(1);
3120 DCHECK(dst ==
i.InputSimd128Register(0));
3122 __ vmov(scratch, src1);
3126 case kArmS8x16ZipLeft: {
3128 src1 =
i.InputSimd128Register(1);
3129 DCHECK(dst ==
i.InputSimd128Register(0));
3131 __ vmov(dst.high(), src1.low());
3132 __ vzip(
Neon8, dst.low(), dst.high());
3135 case kArmS8x16ZipRight: {
3137 src1 =
i.InputSimd128Register(1);
3138 DCHECK(dst ==
i.InputSimd128Register(0));
3140 __ vmov(dst.low(), src1.high());
3141 __ vzip(
Neon8, dst.low(), dst.high());
3144 case kArmS8x16UnzipLeft: {
3146 src1 =
i.InputSimd128Register(1);
3149 DCHECK(dst ==
i.InputSimd128Register(0));
3151 __ vmov(scratch, src1);
3155 case kArmS8x16UnzipRight: {
3157 src1 =
i.InputSimd128Register(1);
3160 DCHECK(dst ==
i.InputSimd128Register(0));
3162 __ vmov(scratch, src1);
3166 case kArmS8x16TransposeLeft: {
3168 src1 =
i.InputSimd128Register(1);
3171 DCHECK(dst ==
i.InputSimd128Register(0));
3173 __ vmov(scratch, src1);
3177 case kArmS8x16TransposeRight: {
3179 src1 =
i.InputSimd128Register(1);
3182 DCHECK(dst ==
i.InputSimd128Register(0));
3184 __ vmov(scratch, src1);
3188 case kArmS8x16Concat: {
3189 __ vext(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
3190 i.InputSimd128Register(1),
i.InputInt4(2));
3193 case kArmI8x16Swizzle: {
3195 tbl =
i.InputSimd128Register(0),
3196 src =
i.InputSimd128Register(1);
3198 __ vtbl(dst.low(), table, src.low());
3199 __ vtbl(dst.high(), table, src.high());
3202 case kArmI8x16Shuffle: {
3204 src0 =
i.InputSimd128Register(0),
3205 src1 =
i.InputSimd128Register(1);
3211 int table_size = src0 == src1 ? 2 : 4;
3214 int scratch_s_base = scratch.
code() * 4;
3215 for (
int j = 0; j < 4; j++) {
3216 uint32_t four_lanes =
i.InputUint32(2 + j);
3217 DCHECK_EQ(0, four_lanes & (table_size == 2 ? 0xF0F0F0F0 : 0xE0E0E0E0));
3219 Float32::FromBits(four_lanes));
3222 if (dst != src0 && dst != src1) {
3223 __ vtbl(dst.low(), table, scratch.low());
3224 __ vtbl(dst.high(), table, scratch.high());
3226 __ vtbl(scratch.low(), table, scratch.low());
3227 __ vtbl(scratch.high(), table, scratch.high());
3228 __ vmov(dst, scratch);
3232 case kArmS32x2Reverse: {
3233 __ vrev64(
Neon32,
i.OutputSimd128Register(),
i.InputSimd128Register(0));
3236 case kArmS16x4Reverse: {
3237 __ vrev64(
Neon16,
i.OutputSimd128Register(),
i.InputSimd128Register(0));
3240 case kArmS16x2Reverse: {
3241 __ vrev32(
Neon16,
i.OutputSimd128Register(),
i.InputSimd128Register(0));
3244 case kArmS8x8Reverse: {
3245 __ vrev64(
Neon8,
i.OutputSimd128Register(),
i.InputSimd128Register(0));
3248 case kArmS8x4Reverse: {
3249 __ vrev32(
Neon8,
i.OutputSimd128Register(),
i.InputSimd128Register(0));
3252 case kArmS8x2Reverse: {
3253 __ vrev16(
Neon8,
i.OutputSimd128Register(),
i.InputSimd128Register(0));
3256 case kArmV128AnyTrue: {
3260 __ vpmax(
NeonU32, scratch, src.low(), src.high());
3261 __ vpmax(
NeonU32, scratch, scratch, scratch);
3262 __ ExtractLane(
i.OutputRegister(), scratch,
NeonS32, 0);
3267 case kArmI64x2AllTrue: {
3268 __ I64x2AllTrue(
i.OutputRegister(),
i.InputSimd128Register(0));
3271 case kArmI32x4AllTrue: {
3275 __ vpmin(
NeonU32, scratch, src.low(), src.high());
3276 __ vpmin(
NeonU32, scratch, scratch, scratch);
3277 __ ExtractLane(
i.OutputRegister(), scratch,
NeonS32, 0);
3282 case kArmI16x8AllTrue: {
3286 __ vpmin(
NeonU16, scratch, src.low(), src.high());
3287 __ vpmin(
NeonU16, scratch, scratch, scratch);
3288 __ vpmin(
NeonU16, scratch, scratch, scratch);
3289 __ ExtractLane(
i.OutputRegister(), scratch,
NeonS16, 0);
3294 case kArmI8x16AllTrue: {
3298 __ vpmin(
NeonU8, scratch, src.low(), src.high());
3299 __ vpmin(
NeonU8, scratch, scratch, scratch);
3300 __ vpmin(
NeonU8, scratch, scratch, scratch);
3301 __ vpmin(
NeonU8, scratch, scratch, scratch);
3302 __ ExtractLane(
i.OutputRegister(), scratch,
NeonS8, 0);
3307 case kArmS128Load8Splat: {
3309 i.NeonInputOperand(0));
3312 case kArmS128Load16Splat: {
3314 i.NeonInputOperand(0));
3317 case kArmS128Load32Splat: {
3319 i.NeonInputOperand(0));
3322 case kArmS128Load64Splat: {
3325 __ Move(dst.high(), dst.low());
3328 case kArmS128Load8x8S: {
3334 case kArmS128Load8x8U: {
3340 case kArmS128Load16x4S: {
3346 case kArmS128Load16x4U: {
3352 case kArmS128Load32x2S: {
3358 case kArmS128Load32x2U: {
3364 case kArmS128Load32Zero: {
3370 case kArmS128Load64Zero: {
3372 __ vmov(dst.high(), 0);
3376 case kArmS128LoadLaneLow: {
3381 __ LoadLane(sz, dst_list,
i.InputUint8(1),
i.NeonInputOperand(2));
3384 case kArmS128LoadLaneHigh: {
3389 __ LoadLane(sz, dst_list,
i.InputUint8(1),
i.NeonInputOperand(2));
3392 case kArmS128StoreLaneLow: {
3396 __ StoreLane(sz, src_list,
i.InputUint8(1),
i.NeonInputOperand(2));
3399 case kArmS128StoreLaneHigh: {
3403 __ StoreLane(sz, src_list,
i.InputUint8(1),
i.NeonInputOperand(2));
3406 case kAtomicLoadInt8:
3409 case kAtomicLoadUint8:
3412 case kAtomicLoadInt16:
3415 case kAtomicLoadUint16:
3418 case kAtomicLoadWord32:
3421 case kAtomicStoreWord8:
3425 case kAtomicStoreWord16:
3429 case kAtomicStoreWord32:
3433 case kAtomicExchangeInt8:
3435 __ sxtb(
i.OutputRegister(0),
i.OutputRegister(0));
3437 case kAtomicExchangeUint8:
3440 case kAtomicExchangeInt16:
3442 __ sxth(
i.OutputRegister(0),
i.OutputRegister(0));
3444 case kAtomicExchangeUint16:
3447 case kAtomicExchangeWord32:
3450 case kAtomicCompareExchangeInt8:
3451 __ add(
i.TempRegister(1),
i.InputRegister(0),
i.InputRegister(1));
3452 __ uxtb(
i.TempRegister(2),
i.InputRegister(2));
3455 __ sxtb(
i.OutputRegister(0),
i.OutputRegister(0));
3457 case kAtomicCompareExchangeUint8:
3458 __ add(
i.TempRegister(1),
i.InputRegister(0),
i.InputRegister(1));
3459 __ uxtb(
i.TempRegister(2),
i.InputRegister(2));
3463 case kAtomicCompareExchangeInt16:
3464 __ add(
i.TempRegister(1),
i.InputRegister(0),
i.InputRegister(1));
3465 __ uxth(
i.TempRegister(2),
i.InputRegister(2));
3468 __ sxth(
i.OutputRegister(0),
i.OutputRegister(0));
3470 case kAtomicCompareExchangeUint16:
3471 __ add(
i.TempRegister(1),
i.InputRegister(0),
i.InputRegister(1));
3472 __ uxth(
i.TempRegister(2),
i.InputRegister(2));
3476 case kAtomicCompareExchangeWord32:
3477 __ add(
i.TempRegister(1),
i.InputRegister(0),
i.InputRegister(1));
3479 i.InputRegister(2));
3481#define ATOMIC_BINOP_CASE(op, inst) \
3482 case kAtomic##op##Int8: \
3483 ASSEMBLE_ATOMIC_BINOP(ldrexb, strexb, inst); \
3484 __ sxtb(i.OutputRegister(0), i.OutputRegister(0)); \
3486 case kAtomic##op##Uint8: \
3487 ASSEMBLE_ATOMIC_BINOP(ldrexb, strexb, inst); \
3489 case kAtomic##op##Int16: \
3490 ASSEMBLE_ATOMIC_BINOP(ldrexh, strexh, inst); \
3491 __ sxth(i.OutputRegister(0), i.OutputRegister(0)); \
3493 case kAtomic##op##Uint16: \
3494 ASSEMBLE_ATOMIC_BINOP(ldrexh, strexh, inst); \
3496 case kAtomic##op##Word32: \
3497 ASSEMBLE_ATOMIC_BINOP(ldrex, strex, inst); \
3504#undef ATOMIC_BINOP_CASE
3505 case kArmWord32AtomicPairLoad: {
3506 if (
instr->OutputCount() == 2) {
3507 DCHECK(VerifyOutputOfAtomicPairInstr(&
i,
instr, r0, r1));
3508 __ add(
i.TempRegister(0),
i.InputRegister(0),
i.InputRegister(1));
3509 __ ldrexd(r0, r1,
i.TempRegister(0));
3518 int32_t offset_imm =
i.InputInt32(2);
3519 if (offset_imm != 0) {
3529 case kArmWord32AtomicPairStore: {
3533 Register value_low =
i.InputRegister(2);
3534 Register value_high =
i.InputRegister(3);
3535 Register actual_addr =
i.TempRegister(0);
3547 __ ldrexd(tmp1, tmp2, actual_addr);
3548 __ strexd(store_result, value_low, value_high, actual_addr);
3554#define ATOMIC_ARITH_BINOP_CASE(op, instr1, instr2) \
3555 case kArmWord32AtomicPair##op: { \
3556 DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr, r2, r3)); \
3557 ASSEMBLE_ATOMIC64_ARITH_BINOP(instr1, instr2); \
3562#undef ATOMIC_ARITH_BINOP_CASE
3563#define ATOMIC_LOGIC_BINOP_CASE(op, instr1) \
3564 case kArmWord32AtomicPair##op: { \
3565 DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr, r2, r3)); \
3566 ASSEMBLE_ATOMIC64_LOGIC_BINOP(instr1); \
3572#undef ATOMIC_LOGIC_BINOP_CASE
3573 case kArmWord32AtomicPairExchange: {
3574 DCHECK(VerifyOutputOfAtomicPairInstr(&
i,
instr, r6, r7));
3576 __ add(
i.TempRegister(0),
i.InputRegister(2),
i.InputRegister(3));
3579 __ ldrexd(r6, r7,
i.TempRegister(0));
3580 __ strexd(
i.TempRegister(1),
i.InputRegister(0),
i.InputRegister(1),
3583 __ b(
ne, &exchange);
3587 case kArmWord32AtomicPairCompareExchange: {
3588 DCHECK(VerifyOutputOfAtomicPairInstr(&
i,
instr, r2, r3));
3589 __ add(
i.TempRegister(0),
i.InputRegister(4),
i.InputRegister(5));
3590 Label compareExchange;
3593 __ bind(&compareExchange);
3594 __ ldrexd(r2, r3,
i.TempRegister(0));
3599 __ strexd(
i.TempRegister(1),
i.InputRegister(2),
i.InputRegister(3),
3602 __ b(
ne, &compareExchange);
3607#undef ASSEMBLE_ATOMIC_LOAD_INTEGER
3608#undef ASSEMBLE_ATOMIC_STORE_INTEGER
3609#undef ASSEMBLE_ATOMIC_EXCHANGE_INTEGER
3610#undef ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER
3611#undef ASSEMBLE_ATOMIC_BINOP
3612#undef ASSEMBLE_ATOMIC64_ARITH_BINOP
3613#undef ASSEMBLE_ATOMIC64_LOGIC_BINOP
3614#undef ASSEMBLE_IEEE754_BINOP
3615#undef ASSEMBLE_IEEE754_UNOP
3616#undef ASSEMBLE_NEON_NARROWING_OP
3617#undef ASSEMBLE_SIMD_SHIFT_LEFT
3618#undef ASSEMBLE_SIMD_SHIFT_RIGHT