v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
instruction-selector-ia32.cc
Go to the documentation of this file.
1// Copyright 2014 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <stddef.h>
6#include <stdint.h>
7
8#include <limits>
9#include <optional>
10#include <type_traits>
11#include <vector>
12
13#include "src/base/bits.h"
14#include "src/base/flags.h"
15#include "src/base/iterator.h"
16#include "src/base/logging.h"
17#include "src/base/macros.h"
23#include "src/common/globals.h"
29#include "src/compiler/frame.h"
34#include "src/flags/flags.h"
35#include "src/utils/utils.h"
37
38#if V8_ENABLE_WEBASSEMBLY
40#endif // V8_ENABLE_WEBASSEMBLY
41
42namespace v8 {
43namespace internal {
44namespace compiler {
45
46using namespace turboshaft; // NOLINT(build/namespaces)
47
48namespace {
49
50struct LoadStoreView {
51 explicit LoadStoreView(const Operation& op) {
52 DCHECK(op.Is<LoadOp>() || op.Is<StoreOp>());
53 if (const LoadOp* load = op.TryCast<LoadOp>()) {
54 base = load->base();
55 index = load->index();
56 offset = load->offset;
57 } else {
58 DCHECK(op.Is<StoreOp>());
59 const StoreOp& store = op.Cast<StoreOp>();
60 base = store.base();
61 index = store.index();
62 offset = store.offset;
63 }
64 }
67 int32_t offset;
68};
69
70struct ScaledIndexMatch {
72 OpIndex index;
73 int scale;
74};
75
76struct BaseWithScaledIndexAndDisplacementMatch {
77 OpIndex base = {};
78 OpIndex index = {};
79 int scale = 0;
80 int32_t displacement = 0;
82};
83
84// Copied from x64, dropped kWord64 constant support.
85bool MatchScaledIndex(InstructionSelectorT* selector, OpIndex node,
86 OpIndex* index, int* scale, bool* power_of_two_plus_one) {
87 DCHECK_NOT_NULL(index);
89
90 auto MatchScaleConstant = [](const Operation& op, int& scale,
91 bool* plus_one) {
92 const ConstantOp* constant = op.TryCast<ConstantOp>();
93 if (constant == nullptr) return false;
94 if (constant->kind != ConstantOp::Kind::kWord32) return false;
95
96 uint64_t value = constant->integral();
97 if (plus_one) *plus_one = false;
98 if (value == 1) return (scale = 0), true;
99 if (value == 2) return (scale = 1), true;
100 if (value == 4) return (scale = 2), true;
101 if (value == 8) return (scale = 3), true;
102 if (plus_one == nullptr) return false;
103 *plus_one = true;
104 if (value == 3) return (scale = 1), true;
105 if (value == 5) return (scale = 2), true;
106 if (value == 9) return (scale = 3), true;
107 return false;
108 };
109
110 const Operation& op = selector->Get(node);
111 if (const WordBinopOp* binop = op.TryCast<WordBinopOp>()) {
112 if (binop->kind != WordBinopOp::Kind::kMul) return false;
113 if (MatchScaleConstant(selector->Get(binop->right()), *scale,
114 power_of_two_plus_one)) {
115 *index = binop->left();
116 return true;
117 }
118 if (MatchScaleConstant(selector->Get(binop->left()), *scale,
119 power_of_two_plus_one)) {
120 *index = binop->right();
121 return true;
122 }
123 return false;
124 } else if (const ShiftOp* shift = op.TryCast<ShiftOp>()) {
125 if (shift->kind != ShiftOp::Kind::kShiftLeft) return false;
126 int32_t scale_value;
127 if (selector->MatchIntegralWord32Constant(shift->right(), &scale_value)) {
128 if (scale_value < 0 || scale_value > 3) return false;
129 *index = shift->left();
130 *scale = static_cast<int>(scale_value);
131 if (power_of_two_plus_one) *power_of_two_plus_one = false;
132 return true;
133 }
134 }
135 return false;
136}
137
138std::optional<ScaledIndexMatch> TryMatchScaledIndex(
139 InstructionSelectorT* selector, OpIndex node,
140 bool allow_power_of_two_plus_one) {
141 ScaledIndexMatch match;
142 bool plus_one = false;
143 if (MatchScaledIndex(selector, node, &match.index, &match.scale,
144 allow_power_of_two_plus_one ? &plus_one : nullptr)) {
145 match.base = plus_one ? match.index : OpIndex{};
146 return match;
147 }
148 return std::nullopt;
149}
150
151// Copied verbatim from x64 (just renamed).
152std::optional<BaseWithScaledIndexAndDisplacementMatch>
153TryMatchBaseWithScaledIndexAndDisplacementForWordBinop(
154 InstructionSelectorT* selector, OpIndex left, OpIndex right) {
155 BaseWithScaledIndexAndDisplacementMatch result;
156 result.displacement_mode = kPositiveDisplacement;
157
158 auto OwnedByAddressingOperand = [](OpIndex) {
159 // TODO(nicohartmann@): Consider providing this. For now we just allow
160 // everything to be covered regardless of other uses.
161 return true;
162 };
163
164 // Check (S + ...)
165 if (MatchScaledIndex(selector, left, &result.index, &result.scale, nullptr) &&
166 OwnedByAddressingOperand(left)) {
167 result.displacement_mode = kPositiveDisplacement;
168
169 // Check (S + (... binop ...))
170 if (const WordBinopOp* right_binop =
171 selector->Get(right).TryCast<WordBinopOp>()) {
172 // Check (S + (B - D))
173 if (right_binop->kind == WordBinopOp::Kind::kSub &&
174 OwnedByAddressingOperand(right)) {
175 if (!selector->MatchIntegralWord32Constant(right_binop->right(),
176 &result.displacement)) {
177 return std::nullopt;
178 }
179 result.base = right_binop->left();
180 result.displacement_mode = kNegativeDisplacement;
181 return result;
182 }
183 // Check (S + (... + ...))
184 if (right_binop->kind == WordBinopOp::Kind::kAdd &&
185 OwnedByAddressingOperand(right)) {
186 if (selector->MatchIntegralWord32Constant(right_binop->right(),
187 &result.displacement)) {
188 // (S + (B + D))
189 result.base = right_binop->left();
190 } else if (selector->MatchIntegralWord32Constant(
191 right_binop->left(), &result.displacement)) {
192 // (S + (D + B))
193 result.base = right_binop->right();
194 } else {
195 // Treat it as (S + B)
196 result.base = right;
197 result.displacement = 0;
198 }
199 return result;
200 }
201 }
202
203 // Check (S + D)
204 if (selector->MatchIntegralWord32Constant(right, &result.displacement)) {
205 result.base = OpIndex{};
206 return result;
207 }
208
209 // Treat it as (S + B)
210 result.base = right;
211 result.displacement = 0;
212 return result;
213 }
214
215 // Check ((... + ...) + ...)
216 if (const WordBinopOp* left_add = selector->Get(left).TryCast<WordBinopOp>();
217 left_add && left_add->kind == WordBinopOp::Kind::kAdd &&
218 OwnedByAddressingOperand(left)) {
219 // Check ((S + ...) + ...)
220 if (MatchScaledIndex(selector, left_add->left(), &result.index,
221 &result.scale, nullptr)) {
222 result.displacement_mode = kPositiveDisplacement;
223 // Check ((S + D) + B)
224 if (selector->MatchIntegralWord32Constant(left_add->right(),
225 &result.displacement)) {
226 result.base = right;
227 return result;
228 }
229 // Check ((S + B) + D)
230 if (selector->MatchIntegralWord32Constant(right, &result.displacement)) {
231 result.base = left_add->right();
232 return result;
233 }
234 // Treat it as (B + B) and use index as right B.
235 result.base = left;
236 result.index = right;
237 result.scale = 0;
238 DCHECK_EQ(result.displacement, 0);
239 return result;
240 }
241 }
242
243 DCHECK_EQ(result.index, OpIndex{});
244 DCHECK_EQ(result.scale, 0);
245 result.displacement_mode = kPositiveDisplacement;
246
247 // Check (B + D)
248 if (selector->MatchIntegralWord32Constant(right, &result.displacement)) {
249 result.base = left;
250 return result;
251 }
252
253 // Treat as (B + B) and use index as left B.
254 result.index = left;
255 result.base = right;
256 return result;
257}
258
259// Copied verbatim from x64 (just renamed).
260std::optional<BaseWithScaledIndexAndDisplacementMatch>
261TryMatchBaseWithScaledIndexAndDisplacement(InstructionSelectorT* selector,
262 OpIndex node) {
263 // The BaseWithIndexAndDisplacementMatcher canonicalizes the order of
264 // displacements and scale factors that are used as inputs, so instead of
265 // enumerating all possible patterns by brute force, checking for node
266 // clusters using the following templates in the following order suffices
267 // to find all of the interesting cases (S = index * scale, B = base
268 // input, D = displacement input):
269 //
270 // (S + (B + D))
271 // (S + (B + B))
272 // (S + D)
273 // (S + B)
274 // ((S + D) + B)
275 // ((S + B) + D)
276 // ((B + D) + B)
277 // ((B + B) + D)
278 // (B + D)
279 // (B + B)
280 BaseWithScaledIndexAndDisplacementMatch result;
281 result.displacement_mode = kPositiveDisplacement;
282
283 const Operation& op = selector->Get(node);
284 if (const LoadOp* load = op.TryCast<LoadOp>()) {
285 result.base = load->base();
286 result.index = load->index().value_or_invalid();
287 result.scale = load->element_size_log2;
288 result.displacement = load->offset;
289 if (load->kind.tagged_base) result.displacement -= kHeapObjectTag;
290 return result;
291 } else if (const StoreOp* store = op.TryCast<StoreOp>()) {
292 result.base = store->base();
293 result.index = store->index().value_or_invalid();
294 result.scale = store->element_size_log2;
295 result.displacement = store->offset;
296 if (store->kind.tagged_base) result.displacement -= kHeapObjectTag;
297 return result;
298 } else if (op.Is<WordBinopOp>()) {
299 // Nothing to do here, fall into the case below.
300#ifdef V8_ENABLE_WEBASSEMBLY
301 } else if (const Simd128LaneMemoryOp* lane_op =
302 op.TryCast<Simd128LaneMemoryOp>()) {
303 result.base = lane_op->base();
304 result.index = lane_op->index();
305 result.scale = 0;
306 result.displacement = 0;
307 if (lane_op->kind.tagged_base) result.displacement -= kHeapObjectTag;
308 return result;
309 } else if (const Simd128LoadTransformOp* load_transform =
310 op.TryCast<Simd128LoadTransformOp>()) {
311 result.base = load_transform->base();
312 result.index = load_transform->index();
313 DCHECK_EQ(load_transform->offset, 0);
314 result.scale = 0;
315 result.displacement = 0;
316 DCHECK(!load_transform->load_kind.tagged_base);
317 return result;
318#endif // V8_ENABLE_WEBASSEMBLY
319 } else {
320 return std::nullopt;
321 }
322
323 const WordBinopOp& binop = op.Cast<WordBinopOp>();
324 OpIndex left = binop.left();
325 OpIndex right = binop.right();
326 return TryMatchBaseWithScaledIndexAndDisplacementForWordBinop(selector, left,
327 right);
328}
329
330} // namespace
331
332// Adds IA32-specific methods for generating operands.
334 public:
337
339 // TODO(titzer): encode byte register use constraints.
340 return UseFixed(node, edx);
341 }
342
344 int effect_level) {
345 if (!this->IsLoadOrLoadImmutable(input)) return false;
346 if (!selector()->CanCover(node, input)) return false;
347 if (effect_level != selector()->GetEffectLevel(input)) {
348 return false;
349 }
351 this->load_view(input).loaded_rep().representation();
352 switch (opcode) {
353 case kIA32And:
354 case kIA32Or:
355 case kIA32Xor:
356 case kIA32Add:
357 case kIA32Sub:
358 case kIA32Cmp:
359 case kIA32Test:
360 return rep == MachineRepresentation::kWord32 || IsAnyTagged(rep);
361 case kIA32Cmp16:
362 case kIA32Test16:
363 return rep == MachineRepresentation::kWord16;
364 case kIA32Cmp8:
365 case kIA32Test8:
366 return rep == MachineRepresentation::kWord8;
367 default:
368 break;
369 }
370 return false;
371 }
372
374 if (this->IsExternalConstant(node)) return true;
375 if (const ConstantOp* constant = Get(node).TryCast<ConstantOp>()) {
376 switch (constant->kind) {
377 case ConstantOp::Kind::kWord32:
378 case ConstantOp::Kind::kRelocatableWasmCall:
379 case ConstantOp::Kind::kRelocatableWasmStubCall:
380 case ConstantOp::Kind::kSmi:
381 return true;
382 case ConstantOp::Kind::kNumber:
383 return constant->number().get_bits() == 0;
384 default:
385 break;
386 }
387 }
388 return false;
389 }
390
392 DCHECK(CanBeImmediate(node));
393 const ConstantOp& constant = Get(node).Cast<ConstantOp>();
394 if (constant.kind == ConstantOp::Kind::kWord32) return constant.word32();
395 if (constant.kind == ConstantOp::Kind::kSmi) {
396 return static_cast<int32_t>(constant.smi().ptr());
397 }
398 DCHECK_EQ(constant.kind, ConstantOp::Kind::kNumber);
399 DCHECK_EQ(constant.number().get_bits(), 0);
400 return 0;
401 }
402
403 bool ValueFitsIntoImmediate(int64_t value) const {
404 // int32_t min will overflow if displacement mode is kNegativeDisplacement.
405 return std::numeric_limits<int32_t>::min() < value &&
406 value <= std::numeric_limits<int32_t>::max();
407 }
408
410 OptionalOpIndex index, int scale, OpIndex base, int32_t displacement,
412 size_t* input_count,
413 RegisterMode register_mode = RegisterMode::kRegister) {
414 AddressingMode mode = kMode_MRI;
417 }
418 if (base.valid()) {
419 if (const ConstantOp* constant = Get(base).TryCast<ConstantOp>()) {
420 if (constant->kind == ConstantOp::Kind::kWord32) {
422 base::bits::WraparoundAdd32(displacement, constant->word32());
423 base = OpIndex{};
424 } else if (constant->kind == ConstantOp::Kind::kSmi) {
426 displacement, static_cast<int32_t>(constant->smi().ptr()));
427 base = OpIndex{};
428 }
429 }
430 }
431 if (base.valid()) {
432 inputs[(*input_count)++] = UseRegisterWithMode(base, register_mode);
433 if (index.valid()) {
434 DCHECK(scale >= 0 && scale <= 3);
435 inputs[(*input_count)++] =
436 UseRegisterWithMode(this->value(index), register_mode);
437 if (displacement != 0) {
438 inputs[(*input_count)++] = TempImmediate(displacement);
439 static const AddressingMode kMRnI_modes[] = {kMode_MR1I, kMode_MR2I,
440 kMode_MR4I, kMode_MR8I};
441 mode = kMRnI_modes[scale];
442 } else {
443 static const AddressingMode kMRn_modes[] = {kMode_MR1, kMode_MR2,
444 kMode_MR4, kMode_MR8};
445 mode = kMRn_modes[scale];
446 }
447 } else {
448 if (displacement == 0) {
449 mode = kMode_MR;
450 } else {
451 inputs[(*input_count)++] = TempImmediate(displacement);
452 mode = kMode_MRI;
453 }
454 }
455 } else {
456 DCHECK(scale >= 0 && scale <= 3);
457 if (index.valid()) {
458 inputs[(*input_count)++] =
459 UseRegisterWithMode(this->value(index), register_mode);
460 if (displacement != 0) {
461 inputs[(*input_count)++] = TempImmediate(displacement);
462 static const AddressingMode kMnI_modes[] = {kMode_MRI, kMode_M2I,
463 kMode_M4I, kMode_M8I};
464 mode = kMnI_modes[scale];
465 } else {
466 static const AddressingMode kMn_modes[] = {kMode_MR, kMode_M2,
467 kMode_M4, kMode_M8};
468 mode = kMn_modes[scale];
469 }
470 } else {
471 inputs[(*input_count)++] = TempImmediate(displacement);
472 return kMode_MI;
473 }
474 }
475 return mode;
476 }
477
479 OpIndex node, InstructionOperand inputs[], size_t* input_count,
480 RegisterMode register_mode = RegisterMode::kRegister) {
481 const Operation& op = this->Get(node);
482 if (op.Is<LoadOp>() || op.Is<StoreOp>()) {
483 LoadStoreView load_or_store(op);
484 if (ExternalReference reference;
485 this->MatchExternalConstant(load_or_store.base, &reference) &&
486 !load_or_store.index.valid()) {
487 if (selector()->CanAddressRelativeToRootsRegister(reference)) {
488 const ptrdiff_t delta =
489 load_or_store.offset +
491 selector()->isolate(), reference);
492 if (is_int32(delta)) {
493 inputs[(*input_count)++] =
494 TempImmediate(static_cast<int32_t>(delta));
495 return kMode_Root;
496 }
497 }
498 }
499 }
500
501 auto m = TryMatchBaseWithScaledIndexAndDisplacement(selector(), node);
502 DCHECK(m.has_value());
503 if (m->base.valid() &&
504 this->Get(m->base).template Is<LoadRootRegisterOp>()) {
505 DCHECK(!m->index.valid());
506 DCHECK_EQ(m->scale, 0);
507 DCHECK(ValueFitsIntoImmediate(m->displacement));
508 inputs[(*input_count)++] =
509 UseImmediate(static_cast<int>(m->displacement));
510 return kMode_Root;
511 } else if (ValueFitsIntoImmediate(m->displacement)) {
512 return GenerateMemoryOperandInputs(m->index, m->scale, m->base,
513 m->displacement, m->displacement_mode,
514 inputs, input_count, register_mode);
515 } else if (!m->base.valid() &&
516 m->displacement_mode == kPositiveDisplacement) {
517 // The displacement cannot be an immediate, but we can use the
518 // displacement as base instead and still benefit from addressing
519 // modes for the scale.
521 } else {
522 // TODO(nicohartmann@): Turn this into a `DCHECK` once we have some
523 // coverage.
524 CHECK_EQ(m->displacement, 0);
525 inputs[(*input_count)++] = UseRegisterWithMode(m->base, register_mode);
526 inputs[(*input_count)++] = UseRegisterWithMode(m->index, register_mode);
527 return kMode_MR1;
528 }
529 }
530
532 AddressingMode* mode) {
533 if (CanBeImmediate(index)) {
534 *mode = kMode_MRI;
535 return UseImmediate(index);
536 } else {
537 *mode = kMode_MR1;
538 return UseUniqueRegister(index);
539 }
540 }
541
543 return !selector()->IsLive(node);
544 }
545};
546
547namespace {
548
549ArchOpcode GetLoadOpcode(LoadRepresentation load_rep) {
550 ArchOpcode opcode;
551 switch (load_rep.representation()) {
553 opcode = kIA32Movss;
554 break;
556 opcode = kIA32Movsd;
557 break;
558 case MachineRepresentation::kBit: // Fall through.
560 opcode = load_rep.IsSigned() ? kIA32Movsxbl : kIA32Movzxbl;
561 break;
563 opcode = load_rep.IsSigned() ? kIA32Movsxwl : kIA32Movzxwl;
564 break;
565 case MachineRepresentation::kTaggedSigned: // Fall through.
566 case MachineRepresentation::kTaggedPointer: // Fall through.
567 case MachineRepresentation::kTagged: // Fall through.
569 opcode = kIA32Movl;
570 break;
572 opcode = kIA32Movdqu;
573 break;
576 case MachineRepresentation::kSimd256: // Fall through.
577 case MachineRepresentation::kCompressedPointer: // Fall through.
578 case MachineRepresentation::kCompressed: // Fall through.
579 case MachineRepresentation::kProtectedPointer: // Fall through.
580 case MachineRepresentation::kIndirectPointer: // Fall through.
581 case MachineRepresentation::kSandboxedPointer: // Fall through.
582 case MachineRepresentation::kWord64: // Fall through.
583 case MachineRepresentation::kMapWord: // Fall through.
584 case MachineRepresentation::kFloat16RawBits: // Fall through.
586 UNREACHABLE();
587 }
588 return opcode;
589}
590
591void VisitRO(InstructionSelectorT* selector, OpIndex node, ArchOpcode opcode) {
592 IA32OperandGeneratorT g(selector);
593 OpIndex input = selector->input_at(node, 0);
594 // We have to use a byte register as input to movsxb.
595 InstructionOperand input_op =
596 opcode == kIA32Movsxbl ? g.UseFixed(input, eax) : g.Use(input);
597 selector->Emit(opcode, g.DefineAsRegister(node), input_op);
598}
599
600void VisitROWithTemp(InstructionSelectorT* selector, OpIndex node,
601 ArchOpcode opcode) {
602 IA32OperandGeneratorT g(selector);
603 InstructionOperand temps[] = {g.TempRegister()};
604 selector->Emit(opcode, g.DefineAsRegister(node),
605 g.Use(selector->input_at(node, 0)), arraysize(temps), temps);
606}
607
608void VisitROWithTempSimd(InstructionSelectorT* selector, OpIndex node,
609 ArchOpcode opcode) {
610 IA32OperandGeneratorT g(selector);
611 InstructionOperand temps[] = {g.TempSimd128Register()};
612 selector->Emit(opcode, g.DefineAsRegister(node),
613 g.UseUniqueRegister(selector->input_at(node, 0)),
614 arraysize(temps), temps);
615}
616
617void VisitRR(InstructionSelectorT* selector, OpIndex node,
618 InstructionCode opcode) {
619 IA32OperandGeneratorT g(selector);
620 selector->Emit(opcode, g.DefineAsRegister(node),
621 g.UseRegister(selector->input_at(node, 0)));
622}
623
624void VisitRROFloat(InstructionSelectorT* selector, OpIndex node,
625 ArchOpcode opcode) {
626 IA32OperandGeneratorT g(selector);
627 InstructionOperand operand0 = g.UseRegister(selector->input_at(node, 0));
628 InstructionOperand operand1 = g.Use(selector->input_at(node, 1));
629 if (selector->IsSupported(AVX)) {
630 selector->Emit(opcode, g.DefineAsRegister(node), operand0, operand1);
631 } else {
632 selector->Emit(opcode, g.DefineSameAsFirst(node), operand0, operand1);
633 }
634}
635
636// For float unary operations. Also allocates a temporary general register for
637// used in external operands. If a temp is not required, use VisitRRSimd (since
638// float and SIMD registers are the same on IA32).
639void VisitFloatUnop(InstructionSelectorT* selector, OpIndex node, OpIndex input,
640 ArchOpcode opcode) {
641 IA32OperandGeneratorT g(selector);
642 InstructionOperand temps[] = {g.TempRegister()};
643 // No need for unique because inputs are float but temp is general.
644 if (selector->IsSupported(AVX)) {
645 selector->Emit(opcode, g.DefineAsRegister(node), g.UseRegister(input),
646 arraysize(temps), temps);
647 } else {
648 selector->Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(input),
649 arraysize(temps), temps);
650 }
651}
652
653#if V8_ENABLE_WEBASSEMBLY
654
655void VisitRRSimd(InstructionSelectorT* selector, OpIndex node,
656 ArchOpcode avx_opcode, ArchOpcode sse_opcode) {
657 IA32OperandGeneratorT g(selector);
658 InstructionOperand operand0 = g.UseRegister(selector->input_at(node, 0));
659 if (selector->IsSupported(AVX)) {
660 selector->Emit(avx_opcode, g.DefineAsRegister(node), operand0);
661 } else {
662 selector->Emit(sse_opcode, g.DefineSameAsFirst(node), operand0);
663 }
664}
665
666void VisitRRSimd(InstructionSelectorT* selector, OpIndex node,
667 ArchOpcode opcode) {
668 VisitRRSimd(selector, node, opcode, opcode);
669}
670
671// TODO(v8:9198): Like VisitRROFloat, but for SIMD. SSE requires operand1 to be
672// a register as we don't have memory alignment yet. For AVX, memory operands
673// are fine, but can have performance issues if not aligned to 16/32 bytes
674// (based on load size), see SDM Vol 1, chapter 14.9
675void VisitRROSimd(InstructionSelectorT* selector, OpIndex node,
676 ArchOpcode avx_opcode, ArchOpcode sse_opcode) {
677 IA32OperandGeneratorT g(selector);
678 InstructionOperand operand0 = g.UseRegister(selector->input_at(node, 0));
679 if (selector->IsSupported(AVX)) {
680 selector->Emit(avx_opcode, g.DefineAsRegister(node), operand0,
681 g.UseRegister(selector->input_at(node, 1)));
682 } else {
683 selector->Emit(sse_opcode, g.DefineSameAsFirst(node), operand0,
684 g.UseRegister(selector->input_at(node, 1)));
685 }
686}
687
688void VisitRRRSimd(InstructionSelectorT* selector, OpIndex node,
689 ArchOpcode opcode) {
690 IA32OperandGeneratorT g(selector);
691 InstructionOperand dst = selector->IsSupported(AVX)
692 ? g.DefineAsRegister(node)
693 : g.DefineSameAsFirst(node);
694 InstructionOperand operand0 = g.UseRegister(selector->input_at(node, 0));
695 InstructionOperand operand1 = g.UseRegister(selector->input_at(node, 1));
696 selector->Emit(opcode, dst, operand0, operand1);
697}
698
699int32_t GetSimdLaneConstant(InstructionSelectorT* selector, OpIndex node) {
700 const Simd128ExtractLaneOp& op =
701 selector->Get(node).template Cast<Simd128ExtractLaneOp>();
702 return op.lane;
703}
704
705void VisitRRISimd(InstructionSelectorT* selector, OpIndex node,
706 ArchOpcode opcode) {
707 IA32OperandGeneratorT g(selector);
708 InstructionOperand operand0 = g.UseRegister(selector->input_at(node, 0));
709 InstructionOperand operand1 =
710 g.UseImmediate(GetSimdLaneConstant(selector, node));
711 // 8x16 uses movsx_b on dest to extract a byte, which only works
712 // if dest is a byte register.
713 InstructionOperand dest = opcode == kIA32I8x16ExtractLaneS
714 ? g.DefineAsFixed(node, eax)
715 : g.DefineAsRegister(node);
716 selector->Emit(opcode, dest, operand0, operand1);
717}
718
719void VisitRRISimd(InstructionSelectorT* selector, OpIndex node,
720 ArchOpcode avx_opcode, ArchOpcode sse_opcode) {
721 IA32OperandGeneratorT g(selector);
722 InstructionOperand operand0 = g.UseRegister(selector->input_at(node, 0));
723 InstructionOperand operand1 =
724 g.UseImmediate(GetSimdLaneConstant(selector, node));
725 if (selector->IsSupported(AVX)) {
726 selector->Emit(avx_opcode, g.DefineAsRegister(node), operand0, operand1);
727 } else {
728 selector->Emit(sse_opcode, g.DefineSameAsFirst(node), operand0, operand1);
729 }
730}
731
732void VisitRROSimdShift(InstructionSelectorT* selector, OpIndex node,
733 ArchOpcode opcode) {
734 IA32OperandGeneratorT g(selector);
735 if (g.CanBeImmediate(selector->input_at(node, 1))) {
736 selector->Emit(opcode, g.DefineSameAsFirst(node),
737 g.UseRegister(selector->input_at(node, 0)),
738 g.UseImmediate(selector->input_at(node, 1)));
739 } else {
740 InstructionOperand operand0 =
741 g.UseUniqueRegister(selector->input_at(node, 0));
742 InstructionOperand operand1 =
743 g.UseUniqueRegister(selector->input_at(node, 1));
744 InstructionOperand temps[] = {g.TempSimd128Register(), g.TempRegister()};
745 selector->Emit(opcode, g.DefineSameAsFirst(node), operand0, operand1,
746 arraysize(temps), temps);
747 }
748}
749
750void VisitRRRR(InstructionSelectorT* selector, OpIndex node,
751 InstructionCode opcode) {
752 IA32OperandGeneratorT g(selector);
753 selector->Emit(opcode, g.DefineAsRegister(node),
754 g.UseRegister(selector->input_at(node, 0)),
755 g.UseRegister(selector->input_at(node, 1)),
756 g.UseRegister(selector->input_at(node, 2)));
757}
758
759void VisitI8x16Shift(InstructionSelectorT* selector, OpIndex node,
760 ArchOpcode opcode) {
761 IA32OperandGeneratorT g(selector);
762 InstructionOperand output = CpuFeatures::IsSupported(AVX)
763 ? g.UseRegister(node)
764 : g.DefineSameAsFirst(node);
765
766 if (g.CanBeImmediate(selector->input_at(node, 1))) {
767 if (opcode == kIA32I8x16ShrS) {
768 selector->Emit(opcode, output, g.UseRegister(selector->input_at(node, 0)),
769 g.UseImmediate(selector->input_at(node, 1)));
770 } else {
771 InstructionOperand temps[] = {g.TempRegister()};
772 selector->Emit(opcode, output, g.UseRegister(selector->input_at(node, 0)),
773 g.UseImmediate(selector->input_at(node, 1)),
774 arraysize(temps), temps);
775 }
776 } else {
777 InstructionOperand operand0 =
778 g.UseUniqueRegister(selector->input_at(node, 0));
779 InstructionOperand operand1 =
780 g.UseUniqueRegister(selector->input_at(node, 1));
781 InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()};
782 selector->Emit(opcode, output, operand0, operand1, arraysize(temps), temps);
783 }
784}
785#endif // V8_ENABLE_WEBASSEMBLY
786
787} // namespace
788
789void InstructionSelectorT::VisitStackSlot(OpIndex node) {
790 const StackSlotOp& stack_slot = Cast<StackSlotOp>(node);
791 int slot = frame_->AllocateSpillSlot(stack_slot.size, stack_slot.alignment,
792 stack_slot.is_tagged);
793 OperandGenerator g(this);
794
795 Emit(kArchStackSlot, g.DefineAsRegister(node),
796 sequence()->AddImmediate(Constant(slot)), 0, nullptr);
797}
798
799void InstructionSelectorT::VisitAbortCSADcheck(OpIndex node) {
800 IA32OperandGeneratorT g(this);
801 Emit(kArchAbortCSADcheck, g.NoOutput(),
802 g.UseFixed(this->input_at(node, 0), edx));
803}
804
805#if V8_ENABLE_WEBASSEMBLY
806
807void InstructionSelectorT::VisitLoadLane(OpIndex node) {
809 int lane;
810 const Simd128LaneMemoryOp& load =
811 this->Get(node).template Cast<Simd128LaneMemoryOp>();
812 lane = load.lane;
813 switch (load.lane_kind) {
814 case Simd128LaneMemoryOp::LaneKind::k8:
815 opcode = kIA32Pinsrb;
816 break;
817 case Simd128LaneMemoryOp::LaneKind::k16:
818 opcode = kIA32Pinsrw;
819 break;
820 case Simd128LaneMemoryOp::LaneKind::k32:
821 opcode = kIA32Pinsrd;
822 break;
823 case Simd128LaneMemoryOp::LaneKind::k64:
824 // pinsrq not available on IA32.
825 if (lane == 0) {
826 opcode = kIA32Movlps;
827 } else {
828 DCHECK_EQ(1, lane);
829 opcode = kIA32Movhps;
830 }
831 break;
832 }
833 // IA32 supports unaligned loads.
834 DCHECK(!load.kind.maybe_unaligned);
835 // Trap handler is not supported on IA32.
836 DCHECK(!load.kind.with_trap_handler);
837
838 IA32OperandGeneratorT g(this);
839 InstructionOperand outputs[] = {IsSupported(AVX)
840 ? g.DefineAsRegister(node)
841 : g.DefineSameAsFirst(node)};
842 // Input 0 is value node, 1 is lane idx, and
843 // GetEffectiveAddressMemoryOperand uses up to 3 inputs. This ordering is
844 // consistent with other operations that use the same opcode.
845 InstructionOperand inputs[5];
846 size_t input_count = 0;
847
848 inputs[input_count++] = g.UseRegister(this->input_at(node, 2));
849 inputs[input_count++] = g.UseImmediate(lane);
850
851 AddressingMode mode =
852 g.GetEffectiveAddressMemoryOperand(node, inputs, &input_count);
854
855 DCHECK_GE(5, input_count);
856
857 Emit(opcode, 1, outputs, input_count, inputs);
858}
859
861 const Simd128LoadTransformOp& op =
862 this->Get(node).Cast<Simd128LoadTransformOp>();
864 switch (op.transform_kind) {
865 case Simd128LoadTransformOp::TransformKind::k8x8S:
866 opcode = kIA32S128Load8x8S;
867 break;
868 case Simd128LoadTransformOp::TransformKind::k8x8U:
869 opcode = kIA32S128Load8x8U;
870 break;
871 case Simd128LoadTransformOp::TransformKind::k16x4S:
872 opcode = kIA32S128Load16x4S;
873 break;
874 case Simd128LoadTransformOp::TransformKind::k16x4U:
875 opcode = kIA32S128Load16x4U;
876 break;
877 case Simd128LoadTransformOp::TransformKind::k32x2S:
878 opcode = kIA32S128Load32x2S;
879 break;
880 case Simd128LoadTransformOp::TransformKind::k32x2U:
881 opcode = kIA32S128Load32x2U;
882 break;
883 case Simd128LoadTransformOp::TransformKind::k8Splat:
884 opcode = kIA32S128Load8Splat;
885 break;
886 case Simd128LoadTransformOp::TransformKind::k16Splat:
887 opcode = kIA32S128Load16Splat;
888 break;
889 case Simd128LoadTransformOp::TransformKind::k32Splat:
890 opcode = kIA32S128Load32Splat;
891 break;
892 case Simd128LoadTransformOp::TransformKind::k64Splat:
893 opcode = kIA32S128Load64Splat;
894 break;
895 case Simd128LoadTransformOp::TransformKind::k32Zero:
896 opcode = kIA32Movss;
897 break;
898 case Simd128LoadTransformOp::TransformKind::k64Zero:
899 opcode = kIA32Movsd;
900 break;
901 }
902
903 // IA32 supports unaligned loads
904 DCHECK(!op.load_kind.maybe_unaligned);
905 // Trap handler is not supported on IA32.
906 DCHECK(!op.load_kind.with_trap_handler);
907
908 VisitLoad(node, node, opcode);
909}
910#endif // V8_ENABLE_WEBASSEMBLY
911
913 InstructionCode opcode) {
914 IA32OperandGeneratorT g(this);
915 InstructionOperand outputs[1];
916 outputs[0] = g.DefineAsRegister(node);
917 InstructionOperand inputs[3];
918 size_t input_count = 0;
919 AddressingMode mode =
920 g.GetEffectiveAddressMemoryOperand(value, inputs, &input_count);
922 Emit(code, 1, outputs, input_count, inputs);
923}
924
926 LoadRepresentation load_rep = this->load_view(node).loaded_rep();
927 DCHECK(!load_rep.IsMapWord());
928 VisitLoad(node, node, GetLoadOpcode(load_rep));
929}
930
931void InstructionSelectorT::VisitProtectedLoad(OpIndex node) {
932 // Trap handler is not supported on IA32.
933 UNREACHABLE();
934}
935
936namespace {
937
938ArchOpcode GetStoreOpcode(MachineRepresentation rep) {
939 switch (rep) {
941 return kIA32Movss;
943 return kIA32Movsd;
944 case MachineRepresentation::kBit: // Fall through.
946 return kIA32Movb;
948 return kIA32Movw;
949 case MachineRepresentation::kTaggedSigned: // Fall through.
950 case MachineRepresentation::kTaggedPointer: // Fall through.
951 case MachineRepresentation::kTagged: // Fall through.
953 return kIA32Movl;
955 return kIA32Movdqu;
958 case MachineRepresentation::kSimd256: // Fall through.
959 case MachineRepresentation::kCompressedPointer: // Fall through.
960 case MachineRepresentation::kCompressed: // Fall through.
961 case MachineRepresentation::kProtectedPointer: // Fall through.
962 case MachineRepresentation::kIndirectPointer: // Fall through.
963 case MachineRepresentation::kSandboxedPointer: // Fall through.
964 case MachineRepresentation::kWord64: // Fall through.
965 case MachineRepresentation::kMapWord: // Fall through.
966 case MachineRepresentation::kFloat16RawBits: // Fall through.
968 UNREACHABLE();
969 }
970}
971
972ArchOpcode GetSeqCstStoreOpcode(MachineRepresentation rep) {
973 switch (rep) {
975 return kAtomicExchangeInt8;
977 return kAtomicExchangeInt16;
978 case MachineRepresentation::kTaggedSigned: // Fall through.
979 case MachineRepresentation::kTaggedPointer: // Fall through.
980 case MachineRepresentation::kTagged: // Fall through.
982 return kAtomicExchangeWord32;
983 default:
984 UNREACHABLE();
985 }
986}
987
988void VisitAtomicExchange(InstructionSelectorT* selector, OpIndex node,
989 ArchOpcode opcode, MachineRepresentation rep) {
990 IA32OperandGeneratorT g(selector);
991 OpIndex base = selector->input_at(node, 0);
992 OpIndex index = selector->input_at(node, 1);
993 OpIndex value = selector->input_at(node, 2);
994
995 AddressingMode addressing_mode;
996 InstructionOperand value_operand = (rep == MachineRepresentation::kWord8)
997 ? g.UseFixed(value, edx)
998 : g.UseUniqueRegister(value);
999 InstructionOperand inputs[] = {
1000 value_operand, g.UseUniqueRegister(base),
1001 g.GetEffectiveIndexOperand(index, &addressing_mode)};
1002 InstructionOperand outputs[] = {
1004 // Using DefineSameAsFirst requires the register to be unallocated.
1005 ? g.DefineAsFixed(node, edx)
1006 : g.DefineSameAsFirst(node)};
1007 InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
1008 selector->Emit(code, 1, outputs, arraysize(inputs), inputs);
1009}
1010
1011void VisitStoreCommon(InstructionSelectorT* selector,
1012 const TurboshaftAdapter::StoreView& store) {
1013 IA32OperandGeneratorT g(selector);
1014
1015 OpIndex base = store.base();
1016 OptionalOpIndex index = store.index();
1017 OpIndex value = store.value();
1018 int32_t displacement = store.displacement();
1019 uint8_t element_size_log2 = store.element_size_log2();
1020 std::optional<AtomicMemoryOrder> atomic_order = store.memory_order();
1021 StoreRepresentation store_rep = store.stored_rep();
1022
1023 WriteBarrierKind write_barrier_kind = store_rep.write_barrier_kind();
1024 MachineRepresentation rep = store_rep.representation();
1025 const bool is_seqcst =
1026 atomic_order && *atomic_order == AtomicMemoryOrder::kSeqCst;
1027
1028 if (v8_flags.enable_unconditional_write_barriers && CanBeTaggedPointer(rep)) {
1029 write_barrier_kind = kFullWriteBarrier;
1030 }
1031
1032 if (write_barrier_kind != kNoWriteBarrier &&
1033 !v8_flags.disable_write_barriers) {
1035 AddressingMode addressing_mode;
1036 InstructionOperand inputs[4];
1037 size_t input_count = 0;
1038 addressing_mode = g.GenerateMemoryOperandInputs(
1039 index, element_size_log2, base, displacement,
1040 DisplacementMode::kPositiveDisplacement, inputs, &input_count,
1042 DCHECK_LT(input_count, 4);
1043 inputs[input_count++] = g.UseUniqueRegister(value);
1044 RecordWriteMode record_write_mode =
1045 WriteBarrierKindToRecordWriteMode(write_barrier_kind);
1046 InstructionOperand temps[] = {g.TempRegister(), g.TempRegister()};
1047 size_t const temp_count = arraysize(temps);
1048 InstructionCode code = is_seqcst ? kArchAtomicStoreWithWriteBarrier
1049 : kArchStoreWithWriteBarrier;
1050 code |= AddressingModeField::encode(addressing_mode);
1051 code |= RecordWriteModeField::encode(record_write_mode);
1052 selector->Emit(code, 0, nullptr, input_count, inputs, temp_count, temps);
1053 } else {
1054 InstructionOperand inputs[4];
1055 size_t input_count = 0;
1056 // To inform the register allocator that xchg clobbered its input.
1057 InstructionOperand outputs[1];
1058 size_t output_count = 0;
1059 ArchOpcode opcode;
1060 AddressingMode addressing_mode;
1061
1062 if (is_seqcst) {
1063 // SeqCst stores emit XCHG instead of MOV, so encode the inputs as we
1064 // would for XCHG. XCHG can't encode the value as an immediate and has
1065 // fewer addressing modes available.
1066 if (rep == MachineRepresentation::kWord8 ||
1068 inputs[input_count++] = g.UseFixed(value, edx);
1069 outputs[output_count++] = g.DefineAsFixed(store, edx);
1070 } else {
1071 inputs[input_count++] = g.UseUniqueRegister(value);
1072 outputs[output_count++] = g.DefineSameAsFirst(store);
1073 }
1074 addressing_mode = g.GetEffectiveAddressMemoryOperand(
1075 store, inputs, &input_count,
1077 opcode = GetSeqCstStoreOpcode(rep);
1078 } else {
1079 // Release and non-atomic stores emit MOV.
1080 // https://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
1081 InstructionOperand val;
1082 if (g.CanBeImmediate(value)) {
1083 val = g.UseImmediate(value);
1084 } else if (!atomic_order && (rep == MachineRepresentation::kWord8 ||
1086 val = g.UseByteRegister(value);
1087 } else {
1088 val = g.UseUniqueRegister(value);
1089 }
1090 addressing_mode = g.GetEffectiveAddressMemoryOperand(
1091 store, inputs, &input_count,
1093 inputs[input_count++] = val;
1094 opcode = GetStoreOpcode(rep);
1095 }
1096 InstructionCode code =
1097 opcode | AddressingModeField::encode(addressing_mode);
1098 selector->Emit(code, output_count, outputs, input_count, inputs);
1099 }
1100}
1101
1102} // namespace
1103
1104void InstructionSelectorT::VisitStorePair(OpIndex node) { UNREACHABLE(); }
1105
1106void InstructionSelectorT::VisitStore(OpIndex node) {
1107 VisitStoreCommon(this, this->store_view(node));
1108}
1109
1110void InstructionSelectorT::VisitProtectedStore(OpIndex node) {
1111 // Trap handler is not supported on IA32.
1112 UNREACHABLE();
1113}
1114
1115#if V8_ENABLE_WEBASSEMBLY
1116
1117void InstructionSelectorT::VisitStoreLane(OpIndex node) {
1118 IA32OperandGeneratorT g(this);
1119 InstructionCode opcode = kArchNop;
1120 int lane;
1121 const Simd128LaneMemoryOp& store =
1122 this->Get(node).template Cast<Simd128LaneMemoryOp>();
1123 lane = store.lane;
1124 switch (store.lane_kind) {
1125 case Simd128LaneMemoryOp::LaneKind::k8:
1126 opcode = kIA32Pextrb;
1127 break;
1128 case Simd128LaneMemoryOp::LaneKind::k16:
1129 opcode = kIA32Pextrw;
1130 break;
1131 case Simd128LaneMemoryOp::LaneKind::k32:
1132 opcode = kIA32S128Store32Lane;
1133 break;
1134 case Simd128LaneMemoryOp::LaneKind::k64:
1135 if (lane == 0) {
1136 opcode = kIA32Movlps;
1137 } else {
1138 DCHECK_EQ(1, lane);
1139 opcode = kIA32Movhps;
1140 }
1141 break;
1142 }
1143
1144 InstructionOperand inputs[4];
1145 size_t input_count = 0;
1146 AddressingMode addressing_mode =
1147 g.GetEffectiveAddressMemoryOperand(node, inputs, &input_count);
1148 opcode |= AddressingModeField::encode(addressing_mode);
1149
1150 InstructionOperand value_operand = g.UseRegister(this->input_at(node, 2));
1151 inputs[input_count++] = value_operand;
1152 inputs[input_count++] = g.UseImmediate(lane);
1153 DCHECK_GE(4, input_count);
1154 Emit(opcode, 0, nullptr, input_count, inputs);
1155}
1156#endif // V8_ENABLE_WEBASSEMBLY
1157
1158// Architecture supports unaligned access, therefore VisitLoad is used instead
1159void InstructionSelectorT::VisitUnalignedLoad(OpIndex node) { UNREACHABLE(); }
1160
1161// Architecture supports unaligned access, therefore VisitStore is used instead
1162void InstructionSelectorT::VisitUnalignedStore(OpIndex node) { UNREACHABLE(); }
1163
1164namespace {
1165
1166// Shared routine for multiple binary operations.
1167void VisitBinop(InstructionSelectorT* selector, OpIndex node,
1168 InstructionCode opcode, FlagsContinuationT* cont) {
1169 IA32OperandGeneratorT g(selector);
1170 auto left = selector->input_at(node, 0);
1171 auto right = selector->input_at(node, 1);
1172 InstructionOperand inputs[6];
1173 size_t input_count = 0;
1174 InstructionOperand outputs[1];
1175 size_t output_count = 0;
1176
1177 // TODO(turbofan): match complex addressing modes.
1178 if (left == right) {
1179 // If both inputs refer to the same operand, enforce allocating a register
1180 // for both of them to ensure that we don't end up generating code like
1181 // this:
1182 //
1183 // mov eax, [ebp-0x10]
1184 // add eax, [ebp-0x10]
1185 // jo label
1186 InstructionOperand const input = g.UseRegister(left);
1187 inputs[input_count++] = input;
1188 inputs[input_count++] = input;
1189 } else if (g.CanBeImmediate(right)) {
1190 inputs[input_count++] = g.UseRegister(left);
1191 inputs[input_count++] = g.UseImmediate(right);
1192 } else {
1193 int effect_level = selector->GetEffectLevel(node, cont);
1194 if (selector->IsCommutative(node) && g.CanBeBetterLeftOperand(right) &&
1195 (!g.CanBeBetterLeftOperand(left) ||
1196 !g.CanBeMemoryOperand(opcode, node, right, effect_level))) {
1197 std::swap(left, right);
1198 }
1199 if (g.CanBeMemoryOperand(opcode, node, right, effect_level)) {
1200 inputs[input_count++] = g.UseRegister(left);
1201 AddressingMode addressing_mode =
1202 g.GetEffectiveAddressMemoryOperand(right, inputs, &input_count);
1203 opcode |= AddressingModeField::encode(addressing_mode);
1204 } else {
1205 inputs[input_count++] = g.UseRegister(left);
1206 inputs[input_count++] = g.Use(right);
1207 }
1208 }
1209
1210 outputs[output_count++] = g.DefineSameAsFirst(node);
1211
1212 DCHECK_NE(0u, input_count);
1213 DCHECK_EQ(1u, output_count);
1214 DCHECK_GE(arraysize(inputs), input_count);
1215 DCHECK_GE(arraysize(outputs), output_count);
1216
1217 selector->EmitWithContinuation(opcode, output_count, outputs, input_count,
1218 inputs, cont);
1219}
1220
1221void VisitBinop(InstructionSelectorT* selector, OpIndex node,
1222 InstructionCode opcode) {
1223 FlagsContinuationT cont;
1224 VisitBinop(selector, node, opcode, &cont);
1225}
1226
1227} // namespace
1228
1229void InstructionSelectorT::VisitWord32And(OpIndex node) {
1230 VisitBinop(this, node, kIA32And);
1231}
1232
1233void InstructionSelectorT::VisitWord32Or(OpIndex node) {
1234 VisitBinop(this, node, kIA32Or);
1235}
1236
1237void InstructionSelectorT::VisitWord32Xor(OpIndex node) {
1238 IA32OperandGeneratorT g(this);
1239 const WordBinopOp& binop = this->Get(node).template Cast<WordBinopOp>();
1240 int32_t constant;
1241 if (this->MatchIntegralWord32Constant(binop.right(), &constant) &&
1242 constant == -1) {
1243 Emit(kIA32Not, g.DefineSameAsFirst(node), g.UseRegister(binop.left()));
1244 return;
1245 }
1246 VisitBinop(this, node, kIA32Xor);
1247}
1248
1250 OpIndex node, FlagsContinuation* cont) {
1252 this->Get(node).template Cast<StackPointerGreaterThanOp>().kind;
1253 { // Temporary scope to minimize indentation change churn below.
1254 InstructionCode opcode = kArchStackPointerGreaterThan |
1255 MiscField::encode(static_cast<int>(kind));
1256
1257 int effect_level = GetEffectLevel(node, cont);
1258
1259 IA32OperandGeneratorT g(this);
1260
1261 // No outputs.
1262 InstructionOperand* const outputs = nullptr;
1263 const int output_count = 0;
1264
1265 // Applying an offset to this stack check requires a temp register. Offsets
1266 // are only applied to the first stack check. If applying an offset, we must
1267 // ensure the input and temp registers do not alias, thus kUniqueRegister.
1268 InstructionOperand temps[] = {g.TempRegister()};
1269 const int temp_count = (kind == StackCheckKind::kJSFunctionEntry) ? 1 : 0;
1270 const auto register_mode = (kind == StackCheckKind::kJSFunctionEntry)
1273
1274 OpIndex value = this->input_at(node, 0);
1275 if (g.CanBeMemoryOperand(kIA32Cmp, node, value, effect_level)) {
1276 DCHECK(this->IsLoadOrLoadImmutable(value));
1277
1278 // GetEffectiveAddressMemoryOperand can create at most 3 inputs.
1279 static constexpr int kMaxInputCount = 3;
1280
1281 size_t input_count = 0;
1282 InstructionOperand inputs[kMaxInputCount];
1283 AddressingMode addressing_mode = g.GetEffectiveAddressMemoryOperand(
1284 value, inputs, &input_count, register_mode);
1285 opcode |= AddressingModeField::encode(addressing_mode);
1286 DCHECK_LE(input_count, kMaxInputCount);
1287
1288 EmitWithContinuation(opcode, output_count, outputs, input_count, inputs,
1289 temp_count, temps, cont);
1290 } else {
1291 InstructionOperand inputs[] = {
1292 g.UseRegisterWithMode(value, register_mode)};
1293 static constexpr int input_count = arraysize(inputs);
1294 EmitWithContinuation(opcode, output_count, outputs, input_count, inputs,
1295 temp_count, temps, cont);
1296 }
1297 }
1298}
1299
1300// Shared routine for multiple shift operations.
1301static inline void VisitShift(InstructionSelectorT* selector, OpIndex node,
1302 ArchOpcode opcode) {
1303 IA32OperandGeneratorT g(selector);
1304 auto left = selector->input_at(node, 0);
1305 auto right = selector->input_at(node, 1);
1306
1307 if (g.CanBeImmediate(right)) {
1308 selector->Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(left),
1309 g.UseImmediate(right));
1310 } else {
1311 selector->Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(left),
1312 g.UseFixed(right, ecx));
1313 }
1314}
1315
1316namespace {
1317
1318void VisitMulHigh(InstructionSelectorT* selector, OpIndex node,
1319 ArchOpcode opcode) {
1320 IA32OperandGeneratorT g(selector);
1321 InstructionOperand temps[] = {g.TempRegister(eax)};
1322 selector->Emit(opcode, g.DefineAsFixed(node, edx),
1323 g.UseFixed(selector->input_at(node, 0), eax),
1324 g.UseUniqueRegister(selector->input_at(node, 1)),
1325 arraysize(temps), temps);
1326}
1327
1328void VisitDiv(InstructionSelectorT* selector, OpIndex node, ArchOpcode opcode) {
1329 IA32OperandGeneratorT g(selector);
1330 InstructionOperand temps[] = {g.TempRegister(edx)};
1331 selector->Emit(opcode, g.DefineAsFixed(node, eax),
1332 g.UseFixed(selector->input_at(node, 0), eax),
1333 g.UseUnique(selector->input_at(node, 1)), arraysize(temps),
1334 temps);
1335}
1336
1337void VisitMod(InstructionSelectorT* selector, OpIndex node, ArchOpcode opcode) {
1338 IA32OperandGeneratorT g(selector);
1339 InstructionOperand temps[] = {g.TempRegister(eax)};
1340 selector->Emit(opcode, g.DefineAsFixed(node, edx),
1341 g.UseFixed(selector->input_at(node, 0), eax),
1342 g.UseUnique(selector->input_at(node, 1)), arraysize(temps),
1343 temps);
1344}
1345
1346// {Displacement} is either OpIndex or int32_t.
1347template <typename Displacement>
1348void EmitLea(InstructionSelectorT* selector, OpIndex result, OpIndex index,
1349 int scale, OpIndex base, Displacement displacement,
1351 IA32OperandGeneratorT g(selector);
1352 InstructionOperand inputs[4];
1353 size_t input_count = 0;
1354 AddressingMode mode =
1355 g.GenerateMemoryOperandInputs(index, scale, base, displacement,
1356 displacement_mode, inputs, &input_count);
1357
1358 DCHECK_NE(0u, input_count);
1359 DCHECK_GE(arraysize(inputs), input_count);
1360
1361 InstructionOperand outputs[1];
1362 outputs[0] = g.DefineAsRegister(result);
1363
1364 InstructionCode opcode = AddressingModeField::encode(mode) | kIA32Lea;
1365
1366 selector->Emit(opcode, 1, outputs, input_count, inputs);
1367}
1368
1369} // namespace
1370
1371void InstructionSelectorT::VisitWord32Shl(OpIndex node) {
1372 if (auto m = TryMatchScaledIndex(this, node, true)) {
1373 EmitLea(this, node, m->index, m->scale, m->base, 0, kPositiveDisplacement);
1374 return;
1375 }
1376 VisitShift(this, node, kIA32Shl);
1377}
1378
1379void InstructionSelectorT::VisitWord32Shr(OpIndex node) {
1380 VisitShift(this, node, kIA32Shr);
1381}
1382
1383void InstructionSelectorT::VisitWord32Sar(OpIndex node) {
1384 VisitShift(this, node, kIA32Sar);
1385}
1386
1387void InstructionSelectorT::VisitInt32PairAdd(OpIndex node) {
1388 IA32OperandGeneratorT g(this);
1389
1390 OptionalOpIndex projection1 = FindProjection(node, 1);
1391 if (projection1.valid()) {
1392 // We use UseUniqueRegister here to avoid register sharing with the temp
1393 // register.
1394 InstructionOperand inputs[] = {
1395 g.UseRegister(this->input_at(node, 0)),
1396 g.UseUniqueRegisterOrSlotOrConstant(this->input_at(node, 1)),
1397 g.UseRegister(this->input_at(node, 2)),
1398 g.UseUniqueRegister(this->input_at(node, 3))};
1399
1400 InstructionOperand outputs[] = {g.DefineSameAsFirst(node),
1401 g.DefineAsRegister(projection1.value())};
1402
1403 InstructionOperand temps[] = {g.TempRegister()};
1404
1405 Emit(kIA32AddPair, 2, outputs, 4, inputs, 1, temps);
1406 } else {
1407 // The high word of the result is not used, so we emit the standard 32 bit
1408 // instruction.
1409 Emit(kIA32Add, g.DefineSameAsFirst(node),
1410 g.UseRegister(this->input_at(node, 0)),
1411 g.Use(this->input_at(node, 2)));
1412 }
1413}
1414
1415void InstructionSelectorT::VisitInt32PairSub(OpIndex node) {
1416 IA32OperandGeneratorT g(this);
1417
1418 OptionalOpIndex projection1 = FindProjection(node, 1);
1419 if (projection1.valid()) {
1420 // We use UseUniqueRegister here to avoid register sharing with the temp
1421 // register.
1422 InstructionOperand inputs[] = {
1423 g.UseRegister(this->input_at(node, 0)),
1424 g.UseUniqueRegisterOrSlotOrConstant(this->input_at(node, 1)),
1425 g.UseRegister(this->input_at(node, 2)),
1426 g.UseUniqueRegister(this->input_at(node, 3))};
1427
1428 InstructionOperand outputs[] = {g.DefineSameAsFirst(node),
1429 g.DefineAsRegister(projection1.value())};
1430
1431 InstructionOperand temps[] = {g.TempRegister()};
1432
1433 Emit(kIA32SubPair, 2, outputs, 4, inputs, 1, temps);
1434 } else {
1435 // The high word of the result is not used, so we emit the standard 32 bit
1436 // instruction.
1437 Emit(kIA32Sub, g.DefineSameAsFirst(node),
1438 g.UseRegister(this->input_at(node, 0)),
1439 g.Use(this->input_at(node, 2)));
1440 }
1441}
1442
1443void InstructionSelectorT::VisitInt32PairMul(OpIndex node) {
1444 IA32OperandGeneratorT g(this);
1445
1446 OptionalOpIndex projection1 = FindProjection(node, 1);
1447 if (projection1.valid()) {
1448 // InputAt(3) explicitly shares ecx with OutputRegister(1) to save one
1449 // register and one mov instruction.
1450 InstructionOperand inputs[] = {
1451 g.UseUnique(this->input_at(node, 0)),
1452 g.UseUniqueRegisterOrSlotOrConstant(this->input_at(node, 1)),
1453 g.UseUniqueRegister(this->input_at(node, 2)),
1454 g.UseFixed(this->input_at(node, 3), ecx)};
1455
1456 InstructionOperand outputs[] = {g.DefineAsFixed(node, eax),
1457 g.DefineAsFixed(projection1.value(), ecx)};
1458
1459 InstructionOperand temps[] = {g.TempRegister(edx)};
1460
1461 Emit(kIA32MulPair, 2, outputs, 4, inputs, 1, temps);
1462 } else {
1463 // The high word of the result is not used, so we emit the standard 32 bit
1464 // instruction.
1465 Emit(kIA32Imul, g.DefineSameAsFirst(node),
1466 g.UseRegister(this->input_at(node, 0)),
1467 g.Use(this->input_at(node, 2)));
1468 }
1469}
1470
1472 InstructionCode opcode, OpIndex node) {
1473 IA32OperandGeneratorT g(selector);
1474
1475 OpIndex shift = selector->input_at(node, 2);
1476 InstructionOperand shift_operand;
1477 if (g.CanBeImmediate(shift)) {
1478 shift_operand = g.UseImmediate(shift);
1479 } else {
1480 shift_operand = g.UseFixed(shift, ecx);
1481 }
1482 InstructionOperand inputs[] = {g.UseFixed(selector->input_at(node, 0), eax),
1483 g.UseFixed(selector->input_at(node, 1), edx),
1484 shift_operand};
1485
1486 InstructionOperand outputs[2];
1487 InstructionOperand temps[1];
1488 int32_t output_count = 0;
1489 int32_t temp_count = 0;
1490 outputs[output_count++] = g.DefineAsFixed(node, eax);
1491 OptionalOpIndex projection1 = selector->FindProjection(node, 1);
1492 if (projection1.valid()) {
1493 outputs[output_count++] = g.DefineAsFixed(projection1.value(), edx);
1494 } else {
1495 temps[temp_count++] = g.TempRegister(edx);
1496 }
1497
1498 selector->Emit(opcode, output_count, outputs, 3, inputs, temp_count, temps);
1499}
1500
1501void InstructionSelectorT::VisitWord32PairShl(OpIndex node) {
1502 VisitWord32PairShift(this, kIA32ShlPair, node);
1503}
1504
1505void InstructionSelectorT::VisitWord32PairShr(OpIndex node) {
1506 VisitWord32PairShift(this, kIA32ShrPair, node);
1507}
1508
1509void InstructionSelectorT::VisitWord32PairSar(OpIndex node) {
1510 VisitWord32PairShift(this, kIA32SarPair, node);
1511}
1512
1513void InstructionSelectorT::VisitWord32Rol(OpIndex node) {
1514 VisitShift(this, node, kIA32Rol);
1515}
1516
1517void InstructionSelectorT::VisitWord32Ror(OpIndex node) {
1518 VisitShift(this, node, kIA32Ror);
1519}
1520
1521#define RO_OP_T_LIST(V) \
1522 V(Float32Sqrt, kIA32Float32Sqrt) \
1523 V(Float64Sqrt, kIA32Float64Sqrt) \
1524 V(ChangeInt32ToFloat64, kSSEInt32ToFloat64) \
1525 V(TruncateFloat32ToInt32, kIA32Float32ToInt32) \
1526 V(TruncateFloat64ToFloat32, kIA32Float64ToFloat32) \
1527 V(BitcastFloat32ToInt32, kIA32BitcastFI) \
1528 V(BitcastInt32ToFloat32, kIA32BitcastIF) \
1529 V(Float64ExtractLowWord32, kIA32Float64ExtractLowWord32) \
1530 V(Float64ExtractHighWord32, kIA32Float64ExtractHighWord32) \
1531 V(ChangeFloat64ToInt32, kIA32Float64ToInt32) \
1532 V(ChangeFloat32ToFloat64, kIA32Float32ToFloat64) \
1533 V(RoundInt32ToFloat32, kSSEInt32ToFloat32) \
1534 V(RoundFloat64ToInt32, kIA32Float64ToInt32) \
1535 V(Word32Clz, kIA32Lzcnt) \
1536 V(Word32Ctz, kIA32Tzcnt) \
1537 V(Word32Popcnt, kIA32Popcnt) \
1538 V(SignExtendWord8ToInt32, kIA32Movsxbl) \
1539 V(SignExtendWord16ToInt32, kIA32Movsxwl) \
1540
1541#define RO_WITH_TEMP_OP_T_LIST(V) V(ChangeUint32ToFloat64, kIA32Uint32ToFloat64)
1542
1543#define RO_WITH_TEMP_SIMD_OP_T_LIST(V) \
1544 V(TruncateFloat64ToUint32, kIA32Float64ToUint32) \
1545 V(TruncateFloat32ToUint32, kIA32Float32ToUint32) \
1546 V(ChangeFloat64ToUint32, kIA32Float64ToUint32)
1547
1548#define RR_OP_T_LIST(V) \
1549 V(Float32RoundDown, kIA32Float32Round | MiscField::encode(kRoundDown)) \
1550 V(Float64RoundDown, kIA32Float64Round | MiscField::encode(kRoundDown)) \
1551 V(Float32RoundUp, kIA32Float32Round | MiscField::encode(kRoundUp)) \
1552 V(Float64RoundUp, kIA32Float64Round | MiscField::encode(kRoundUp)) \
1553 V(Float32RoundTruncate, kIA32Float32Round | MiscField::encode(kRoundToZero)) \
1554 V(Float64RoundTruncate, kIA32Float64Round | MiscField::encode(kRoundToZero)) \
1555 V(Float32RoundTiesEven, \
1556 kIA32Float32Round | MiscField::encode(kRoundToNearest)) \
1557 V(Float64RoundTiesEven, \
1558 kIA32Float64Round | MiscField::encode(kRoundToNearest)) \
1559 V(TruncateFloat64ToWord32, kArchTruncateDoubleToI) \
1560 IF_WASM(V, F32x4Ceil, kIA32F32x4Round | MiscField::encode(kRoundUp)) \
1561 IF_WASM(V, F32x4Floor, kIA32F32x4Round | MiscField::encode(kRoundDown)) \
1562 IF_WASM(V, F32x4Trunc, kIA32F32x4Round | MiscField::encode(kRoundToZero)) \
1563 IF_WASM(V, F32x4NearestInt, \
1564 kIA32F32x4Round | MiscField::encode(kRoundToNearest)) \
1565 IF_WASM(V, F64x2Ceil, kIA32F64x2Round | MiscField::encode(kRoundUp)) \
1566 IF_WASM(V, F64x2Floor, kIA32F64x2Round | MiscField::encode(kRoundDown)) \
1567 IF_WASM(V, F64x2Trunc, kIA32F64x2Round | MiscField::encode(kRoundToZero)) \
1568 IF_WASM(V, F64x2NearestInt, \
1569 kIA32F64x2Round | MiscField::encode(kRoundToNearest)) \
1570 IF_WASM(V, F64x2Sqrt, kIA32F64x2Sqrt)
1571
1572#define RRO_FLOAT_OP_T_LIST(V) \
1573 V(Float32Add, kFloat32Add) \
1574 V(Float64Add, kFloat64Add) \
1575 V(Float32Sub, kFloat32Sub) \
1576 V(Float64Sub, kFloat64Sub) \
1577 V(Float32Mul, kFloat32Mul) \
1578 V(Float64Mul, kFloat64Mul) \
1579 V(Float32Div, kFloat32Div) \
1580 V(Float64Div, kFloat64Div)
1581
1582#define FLOAT_UNOP_T_LIST(V) \
1583 V(Float32Abs, kFloat32Abs) \
1584 V(Float64Abs, kFloat64Abs) \
1585 V(Float32Neg, kFloat32Neg) \
1586 V(Float64Neg, kFloat64Neg) \
1587 IF_WASM(V, F32x4Abs, kFloat32Abs) \
1588 IF_WASM(V, F32x4Neg, kFloat32Neg) \
1589 IF_WASM(V, F64x2Abs, kFloat64Abs) \
1590 IF_WASM(V, F64x2Neg, kFloat64Neg)
1591
1592#define RO_VISITOR(Name, opcode) \
1593 void InstructionSelectorT::Visit##Name(OpIndex node) { \
1594 VisitRO(this, node, opcode); \
1595 }
1597#undef RO_VISITOR
1598#undef RO_OP_T_LIST
1599
1600#define RO_WITH_TEMP_VISITOR(Name, opcode) \
1601 void InstructionSelectorT::Visit##Name(OpIndex node) { \
1602 VisitROWithTemp(this, node, opcode); \
1603 }
1605#undef RO_WITH_TEMP_VISITOR
1606#undef RO_WITH_TEMP_OP_T_LIST
1607
1608#define RO_WITH_TEMP_SIMD_VISITOR(Name, opcode) \
1609 void InstructionSelectorT::Visit##Name(OpIndex node) { \
1610 VisitROWithTempSimd(this, node, opcode); \
1611 }
1613#undef RO_WITH_TEMP_SIMD_VISITOR
1614#undef RO_WITH_TEMP_SIMD_OP_T_LIST
1615
1616#define RR_VISITOR(Name, opcode) \
1617 void InstructionSelectorT::Visit##Name(OpIndex node) { \
1618 VisitRR(this, node, opcode); \
1619 }
1621#undef RR_VISITOR
1622#undef RR_OP_T_LIST
1623
1624#define RRO_FLOAT_VISITOR(Name, opcode) \
1625 void InstructionSelectorT::Visit##Name(OpIndex node) { \
1626 VisitRROFloat(this, node, opcode); \
1627 }
1629#undef RRO_FLOAT_VISITOR
1630#undef RRO_FLOAT_OP_T_LIST
1631
1632#define FLOAT_UNOP_VISITOR(Name, opcode) \
1633 void InstructionSelectorT::Visit##Name(OpIndex node) { \
1634 DCHECK_EQ(this->value_input_count(node), 1); \
1635 VisitFloatUnop(this, node, this->input_at(node, 0), opcode); \
1636 }
1638#undef FLOAT_UNOP_VISITOR
1639#undef FLOAT_UNOP_T_LIST
1640
1641void InstructionSelectorT::VisitTruncateFloat64ToFloat16RawBits(OpIndex node) {
1642 UNIMPLEMENTED();
1643}
1644
1645void InstructionSelectorT::VisitChangeFloat16RawBitsToFloat64(OpIndex node) {
1646 UNIMPLEMENTED();
1647}
1648
1649void InstructionSelectorT::VisitWord32ReverseBits(OpIndex node) {
1650 UNREACHABLE();
1651}
1652
1653void InstructionSelectorT::VisitWord64ReverseBytes(OpIndex node) {
1654 UNREACHABLE();
1655}
1656
1657void InstructionSelectorT::VisitWord32ReverseBytes(OpIndex node) {
1658 IA32OperandGeneratorT g(this);
1659 DCHECK_EQ(this->value_input_count(node), 1);
1660 Emit(kIA32Bswap, g.DefineSameAsFirst(node),
1661 g.UseRegister(this->input_at(node, 0)));
1662}
1663
1664void InstructionSelectorT::VisitSimd128ReverseBytes(OpIndex node) {
1665 UNREACHABLE();
1666}
1667
1668void InstructionSelectorT::VisitInt32Add(OpIndex node) {
1669 IA32OperandGeneratorT g(this);
1670 const WordBinopOp& add = this->Get(node).template Cast<WordBinopOp>();
1671 OpIndex left = add.left();
1672 OpIndex right = add.right();
1673
1674 std::optional<BaseWithScaledIndexAndDisplacementMatch> m =
1675 TryMatchBaseWithScaledIndexAndDisplacementForWordBinop(this, left, right);
1676 if (m.has_value()) {
1677 if (g.ValueFitsIntoImmediate(m->displacement)) {
1678 EmitLea(this, node, m->index, m->scale, m->base, m->displacement,
1679 m->displacement_mode);
1680 return;
1681 }
1682 }
1683 // No lea pattern, use add.
1684 VisitBinop(this, node, kIA32Add);
1685}
1686
1687void InstructionSelectorT::VisitInt32Sub(OpIndex node) {
1688 IA32OperandGeneratorT g(this);
1689 auto [left, right] = Inputs<WordBinopOp>(node);
1690 if (this->MatchIntegralZero(left)) {
1691 Emit(kIA32Neg, g.DefineSameAsFirst(node), g.Use(right));
1692 } else {
1693 VisitBinop(this, node, kIA32Sub);
1694 }
1695}
1696
1697void InstructionSelectorT::VisitInt32Mul(OpIndex node) {
1698 if (auto m = TryMatchScaledIndex(this, node, true)) {
1699 EmitLea(this, node, m->index, m->scale, m->base, 0, kPositiveDisplacement);
1700 return;
1701 }
1702 IA32OperandGeneratorT g(this);
1703 auto left = this->input_at(node, 0);
1704 auto right = this->input_at(node, 1);
1705 if (g.CanBeImmediate(right)) {
1706 Emit(kIA32Imul, g.DefineAsRegister(node), g.Use(left),
1707 g.UseImmediate(right));
1708 } else {
1709 if (g.CanBeBetterLeftOperand(right)) {
1710 std::swap(left, right);
1711 }
1712 Emit(kIA32Imul, g.DefineSameAsFirst(node), g.UseRegister(left),
1713 g.Use(right));
1714 }
1715}
1716
1717void InstructionSelectorT::VisitInt32MulHigh(OpIndex node) {
1718 VisitMulHigh(this, node, kIA32ImulHigh);
1719}
1720
1721void InstructionSelectorT::VisitUint32MulHigh(OpIndex node) {
1722 VisitMulHigh(this, node, kIA32UmulHigh);
1723}
1724
1725void InstructionSelectorT::VisitInt32Div(OpIndex node) {
1726 VisitDiv(this, node, kIA32Idiv);
1727}
1728
1729void InstructionSelectorT::VisitUint32Div(OpIndex node) {
1730 VisitDiv(this, node, kIA32Udiv);
1731}
1732
1733void InstructionSelectorT::VisitInt32Mod(OpIndex node) {
1734 VisitMod(this, node, kIA32Idiv);
1735}
1736
1737void InstructionSelectorT::VisitUint32Mod(OpIndex node) {
1738 VisitMod(this, node, kIA32Udiv);
1739}
1740
1741void InstructionSelectorT::VisitRoundUint32ToFloat32(OpIndex node) {
1742 IA32OperandGeneratorT g(this);
1743 InstructionOperand temps[] = {g.TempRegister()};
1744 Emit(kIA32Uint32ToFloat32, g.DefineAsRegister(node),
1745 g.Use(this->input_at(node, 0)), arraysize(temps), temps);
1746}
1747
1748void InstructionSelectorT::VisitFloat64Mod(OpIndex node) {
1749 IA32OperandGeneratorT g(this);
1750 InstructionOperand temps[] = {g.TempRegister(eax), g.TempRegister()};
1751 Emit(kIA32Float64Mod, g.DefineSameAsFirst(node),
1752 g.UseRegister(this->input_at(node, 0)),
1753 g.UseRegister(this->input_at(node, 1)), arraysize(temps), temps);
1754}
1755
1756void InstructionSelectorT::VisitFloat32Max(OpIndex node) {
1757 IA32OperandGeneratorT g(this);
1758 InstructionOperand temps[] = {g.TempRegister()};
1759 Emit(kIA32Float32Max, g.DefineSameAsFirst(node),
1760 g.UseRegister(this->input_at(node, 0)), g.Use(this->input_at(node, 1)),
1761 arraysize(temps), temps);
1762}
1763
1764void InstructionSelectorT::VisitFloat64Max(OpIndex node) {
1765 IA32OperandGeneratorT g(this);
1766 InstructionOperand temps[] = {g.TempRegister()};
1767 Emit(kIA32Float64Max, g.DefineSameAsFirst(node),
1768 g.UseRegister(this->input_at(node, 0)), g.Use(this->input_at(node, 1)),
1769 arraysize(temps), temps);
1770}
1771
1772void InstructionSelectorT::VisitFloat32Min(OpIndex node) {
1773 IA32OperandGeneratorT g(this);
1774 InstructionOperand temps[] = {g.TempRegister()};
1775 Emit(kIA32Float32Min, g.DefineSameAsFirst(node),
1776 g.UseRegister(this->input_at(node, 0)), g.Use(this->input_at(node, 1)),
1777 arraysize(temps), temps);
1778}
1779
1780void InstructionSelectorT::VisitFloat64Min(OpIndex node) {
1781 IA32OperandGeneratorT g(this);
1782 InstructionOperand temps[] = {g.TempRegister()};
1783 Emit(kIA32Float64Min, g.DefineSameAsFirst(node),
1784 g.UseRegister(this->input_at(node, 0)), g.Use(this->input_at(node, 1)),
1785 arraysize(temps), temps);
1786}
1787
1788void InstructionSelectorT::VisitFloat64RoundTiesAway(OpIndex node) {
1789 UNREACHABLE();
1790}
1791
1793 InstructionCode opcode) {
1794 IA32OperandGeneratorT g(this);
1795 Emit(opcode, g.DefineSameAsFirst(node),
1796 g.UseRegister(this->input_at(node, 0)),
1797 g.UseRegister(this->input_at(node, 1)))
1798 ->MarkAsCall();
1799}
1800
1802 InstructionCode opcode) {
1803 IA32OperandGeneratorT g(this);
1804 Emit(opcode, g.DefineSameAsFirst(node),
1805 g.UseRegister(this->input_at(node, 0)))
1806 ->MarkAsCall();
1807}
1808
1810
1811void InstructionSelectorT::EmitMoveFPRToParam(InstructionOperand* op,
1812 LinkageLocation location) {}
1813
1815 ZoneVector<PushParameter>* arguments, const CallDescriptor* call_descriptor,
1816 OpIndex node) {
1817 IA32OperandGeneratorT g(this);
1818
1819 { // Temporary scope to minimize indentation change churn below.
1820 // Prepare for C function call.
1821 if (call_descriptor->IsCFunctionCall()) {
1822 InstructionOperand temps[] = {g.TempRegister()};
1823 size_t const temp_count = arraysize(temps);
1824 Emit(kArchPrepareCallCFunction | MiscField::encode(static_cast<int>(
1825 call_descriptor->ParameterCount())),
1826 0, nullptr, 0, nullptr, temp_count, temps);
1827
1828 // Poke any stack arguments.
1829 for (size_t n = 0; n < arguments->size(); ++n) {
1830 PushParameter input = (*arguments)[n];
1831 if (input.node.valid()) {
1832 int const slot = static_cast<int>(n);
1833 // TODO(jkummerow): The next line should use `input.node`, but
1834 // fixing it causes mksnapshot failures. Investigate.
1835 InstructionOperand value = g.CanBeImmediate(node)
1836 ? g.UseImmediate(input.node)
1837 : g.UseRegister(input.node);
1838 Emit(kIA32Poke | MiscField::encode(slot), g.NoOutput(), value);
1839 }
1840 }
1841 } else {
1842 // Push any stack arguments.
1843 int effect_level = GetEffectLevel(node);
1844 int stack_decrement = 0;
1845 for (PushParameter input : base::Reversed(*arguments)) {
1846 stack_decrement += kSystemPointerSize;
1847 // Skip holes in the param array. These represent both extra slots for
1848 // multi-slot values and padding slots for alignment.
1849 if (!input.node.valid()) continue;
1850 InstructionOperand decrement = g.UseImmediate(stack_decrement);
1851 stack_decrement = 0;
1852 if (g.CanBeImmediate(input.node)) {
1853 Emit(kIA32Push, g.NoOutput(), decrement, g.UseImmediate(input.node));
1854 } else if (IsSupported(INTEL_ATOM) ||
1855 sequence()->IsFP(GetVirtualRegister(input.node))) {
1856 // TODO(bbudge): IA32Push cannot handle stack->stack double moves
1857 // because there is no way to encode fixed double slots.
1858 Emit(kIA32Push, g.NoOutput(), decrement, g.UseRegister(input.node));
1859 } else if (g.CanBeMemoryOperand(kIA32Push, node, input.node,
1860 effect_level)) {
1861 InstructionOperand outputs[1];
1862 InstructionOperand inputs[5];
1863 size_t input_count = 0;
1864 inputs[input_count++] = decrement;
1865 AddressingMode mode = g.GetEffectiveAddressMemoryOperand(
1866 input.node, inputs, &input_count);
1868 kIA32Push | AddressingModeField::encode(mode);
1869 Emit(opcode, 0, outputs, input_count, inputs);
1870 } else {
1871 Emit(kIA32Push, g.NoOutput(), decrement, g.UseAny(input.node));
1872 }
1873 }
1874 } // End of temporary scope.
1875 }
1876}
1877
1879 ZoneVector<PushParameter>* results, const CallDescriptor* call_descriptor,
1880 OpIndex node) {
1881 { // Temporary scope to minimize indentation change churn below.
1882 IA32OperandGeneratorT g(this);
1883
1884 for (PushParameter output : *results) {
1885 if (!output.location.IsCallerFrameSlot()) continue;
1886 // Skip any alignment holes in nodes.
1887 if (output.node.valid()) {
1888 DCHECK(!call_descriptor->IsCFunctionCall());
1889 if (output.location.GetType() == MachineType::Float32()) {
1890 MarkAsFloat32(output.node);
1891 } else if (output.location.GetType() == MachineType::Float64()) {
1892 MarkAsFloat64(output.node);
1893 } else if (output.location.GetType() == MachineType::Simd128()) {
1894 MarkAsSimd128(output.node);
1895 }
1896 int offset = call_descriptor->GetOffsetToReturns();
1897 int reverse_slot = -output.location.GetLocation() - offset;
1898 Emit(kIA32Peek, g.DefineAsRegister(output.node),
1899 g.UseImmediate(reverse_slot));
1900 }
1901 }
1902 } // End of temporary scope.
1903}
1904
1906
1907namespace {
1908
1909void VisitCompareWithMemoryOperand(InstructionSelectorT* selector,
1910 InstructionCode opcode, OpIndex left,
1911 InstructionOperand right,
1912 FlagsContinuationT* cont) {
1913 DCHECK(selector->IsLoadOrLoadImmutable(left));
1914 IA32OperandGeneratorT g(selector);
1915 size_t input_count = 0;
1916 InstructionOperand inputs[4];
1917 AddressingMode addressing_mode =
1918 g.GetEffectiveAddressMemoryOperand(left, inputs, &input_count);
1919 opcode |= AddressingModeField::encode(addressing_mode);
1920 inputs[input_count++] = right;
1921
1922 selector->EmitWithContinuation(opcode, 0, nullptr, input_count, inputs, cont);
1923}
1924
1925// Shared routine for multiple compare operations.
1926void VisitCompare(InstructionSelectorT* selector, InstructionCode opcode,
1927 InstructionOperand left, InstructionOperand right,
1928 FlagsContinuationT* cont) {
1929 selector->EmitWithContinuation(opcode, left, right, cont);
1930}
1931
1932// Shared routine for multiple compare operations.
1933void VisitCompare(InstructionSelectorT* selector, InstructionCode opcode,
1934 OpIndex left, OpIndex right, FlagsContinuationT* cont,
1935 bool commutative) {
1936 IA32OperandGeneratorT g(selector);
1937 if (commutative && g.CanBeBetterLeftOperand(right)) {
1938 std::swap(left, right);
1939 }
1940 VisitCompare(selector, opcode, g.UseRegister(left), g.Use(right), cont);
1941}
1942
1943MachineType MachineTypeForNarrow(InstructionSelectorT* selector, OpIndex node,
1944 OpIndex hint_node) {
1945 if (selector->IsLoadOrLoadImmutable(hint_node)) {
1946 MachineType hint = selector->load_view(hint_node).loaded_rep();
1947 if (int64_t constant;
1948 selector->MatchSignedIntegralConstant(node, &constant)) {
1949 if (hint == MachineType::Int8()) {
1950 if (constant >= std::numeric_limits<int8_t>::min() &&
1951 constant <= std::numeric_limits<int8_t>::max()) {
1952 return hint;
1953 }
1954 } else if (hint == MachineType::Uint8()) {
1955 if (constant >= std::numeric_limits<uint8_t>::min() &&
1956 constant <= std::numeric_limits<uint8_t>::max()) {
1957 return hint;
1958 }
1959 } else if (hint == MachineType::Int16()) {
1960 if (constant >= std::numeric_limits<int16_t>::min() &&
1961 constant <= std::numeric_limits<int16_t>::max()) {
1962 return hint;
1963 }
1964 } else if (hint == MachineType::Uint16()) {
1965 if (constant >= std::numeric_limits<uint16_t>::min() &&
1966 constant <= std::numeric_limits<uint16_t>::max()) {
1967 return hint;
1968 }
1969 } else if (hint == MachineType::Int32()) {
1970 return hint;
1971 } else if (hint == MachineType::Uint32()) {
1972 if (constant >= 0) return hint;
1973 }
1974 }
1975 }
1976 return selector->IsLoadOrLoadImmutable(node)
1977 ? selector->load_view(node).loaded_rep()
1979}
1980
1981// Tries to match the size of the given opcode to that of the operands, if
1982// possible.
1983InstructionCode TryNarrowOpcodeSize(InstructionSelectorT* selector,
1984 InstructionCode opcode, OpIndex left,
1985 OpIndex right, FlagsContinuationT* cont) {
1986 // TODO(epertoso): we can probably get some size information out of phi nodes.
1987 // If the load representations don't match, both operands will be
1988 // zero/sign-extended to 32bit.
1989 MachineType left_type = MachineTypeForNarrow(selector, left, right);
1990 MachineType right_type = MachineTypeForNarrow(selector, right, left);
1991 if (left_type == right_type) {
1992 switch (left_type.representation()) {
1995 if (opcode == kIA32Test) return kIA32Test8;
1996 if (opcode == kIA32Cmp) {
1997 if (left_type.semantic() == MachineSemantic::kUint32) {
1998 cont->OverwriteUnsignedIfSigned();
1999 } else {
2000 CHECK_EQ(MachineSemantic::kInt32, left_type.semantic());
2001 }
2002 return kIA32Cmp8;
2003 }
2004 break;
2005 }
2007 if (opcode == kIA32Test) return kIA32Test16;
2008 if (opcode == kIA32Cmp) {
2009 if (left_type.semantic() == MachineSemantic::kUint32) {
2010 cont->OverwriteUnsignedIfSigned();
2011 } else {
2012 CHECK_EQ(MachineSemantic::kInt32, left_type.semantic());
2013 }
2014 return kIA32Cmp16;
2015 }
2016 break;
2017 default:
2018 break;
2019 }
2020 }
2021 return opcode;
2022}
2023
2024// Shared routine for multiple float32 compare operations (inputs commuted).
2025void VisitFloat32Compare(InstructionSelectorT* selector, OpIndex node,
2026 FlagsContinuationT* cont) {
2027 auto left = selector->input_at(node, 0);
2028 auto right = selector->input_at(node, 1);
2029 VisitCompare(selector, kIA32Float32Cmp, right, left, cont, false);
2030}
2031
2032// Shared routine for multiple float64 compare operations (inputs commuted).
2033void VisitFloat64Compare(InstructionSelectorT* selector, OpIndex node,
2034 FlagsContinuationT* cont) {
2035 auto left = selector->input_at(node, 0);
2036 auto right = selector->input_at(node, 1);
2037 VisitCompare(selector, kIA32Float64Cmp, right, left, cont, false);
2038}
2039
2040// Shared routine for multiple word compare operations.
2041void VisitWordCompare(InstructionSelectorT* selector, OpIndex node,
2042 InstructionCode opcode, FlagsContinuationT* cont) {
2043 { // Temporary scope to minimize indentation change churn below.
2044 IA32OperandGeneratorT g(selector);
2045 auto left = selector->input_at(node, 0);
2046 auto right = selector->input_at(node, 1);
2047
2048 InstructionCode narrowed_opcode =
2049 TryNarrowOpcodeSize(selector, opcode, left, right, cont);
2050
2051 int effect_level = selector->GetEffectLevel(node, cont);
2052
2053 // If one of the two inputs is an immediate, make sure it's on the right, or
2054 // if one of the two inputs is a memory operand, make sure it's on the left.
2055 if ((!g.CanBeImmediate(right) && g.CanBeImmediate(left)) ||
2056 (g.CanBeMemoryOperand(narrowed_opcode, node, right, effect_level) &&
2057 !g.CanBeMemoryOperand(narrowed_opcode, node, left, effect_level))) {
2058 if (!selector->IsCommutative(node)) cont->Commute();
2059 std::swap(left, right);
2060 }
2061
2062 // Match immediates on right side of comparison.
2063 if (g.CanBeImmediate(right)) {
2064 if (g.CanBeMemoryOperand(narrowed_opcode, node, left, effect_level)) {
2065 return VisitCompareWithMemoryOperand(selector, narrowed_opcode, left,
2066 g.UseImmediate(right), cont);
2067 }
2068 return VisitCompare(selector, opcode, g.Use(left), g.UseImmediate(right),
2069 cont);
2070 }
2071
2072 // Match memory operands on left side of comparison.
2073 if (g.CanBeMemoryOperand(narrowed_opcode, node, left, effect_level)) {
2074 bool needs_byte_register =
2075 narrowed_opcode == kIA32Test8 || narrowed_opcode == kIA32Cmp8;
2076 return VisitCompareWithMemoryOperand(
2077 selector, narrowed_opcode, left,
2078 needs_byte_register ? g.UseByteRegister(right) : g.UseRegister(right),
2079 cont);
2080 }
2081
2082 return VisitCompare(selector, opcode, left, right, cont,
2083 selector->IsCommutative(node));
2084 }
2085}
2086
2087void VisitWordCompare(InstructionSelectorT* selector, OpIndex node,
2088 FlagsContinuationT* cont) {
2089 VisitWordCompare(selector, node, kIA32Cmp, cont);
2090}
2091
2092void VisitAtomicBinOp(InstructionSelectorT* selector, OpIndex node,
2093 ArchOpcode opcode, MachineRepresentation rep) {
2094 AddressingMode addressing_mode;
2095 IA32OperandGeneratorT g(selector);
2096 OpIndex base = selector->input_at(node, 0);
2097 OpIndex index = selector->input_at(node, 1);
2098 OpIndex value = selector->input_at(node, 2);
2099 InstructionOperand inputs[] = {
2100 g.UseUniqueRegister(value), g.UseUniqueRegister(base),
2101 g.GetEffectiveIndexOperand(index, &addressing_mode)};
2102 InstructionOperand outputs[] = {g.DefineAsFixed(node, eax)};
2103 InstructionOperand temp[] = {(rep == MachineRepresentation::kWord8)
2104 ? g.UseByteRegister(node)
2105 : g.TempRegister()};
2106 InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
2107 selector->Emit(code, arraysize(outputs), outputs, arraysize(inputs), inputs,
2108 arraysize(temp), temp);
2109}
2110
2111void VisitPairAtomicBinOp(InstructionSelectorT* selector, OpIndex node,
2112 ArchOpcode opcode) {
2113 IA32OperandGeneratorT g(selector);
2114 OpIndex base = selector->input_at(node, 0);
2115 OpIndex index = selector->input_at(node, 1);
2116 OpIndex value = selector->input_at(node, 2);
2117 // For Word64 operations, the value input is split into the a high node,
2118 // and a low node in the int64-lowering phase.
2119 OpIndex value_high = selector->input_at(node, 3);
2120
2121 // Wasm lives in 32-bit address space, so we do not need to worry about
2122 // base/index lowering. This will need to be fixed for Wasm64.
2123 AddressingMode addressing_mode;
2124 InstructionOperand inputs[] = {
2125 g.UseUniqueRegisterOrSlotOrConstant(value), g.UseFixed(value_high, ecx),
2126 g.UseUniqueRegister(base),
2127 g.GetEffectiveIndexOperand(index, &addressing_mode)};
2128 InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
2129 OptionalOpIndex projection0 = selector->FindProjection(node, 0);
2130 OptionalOpIndex projection1 = selector->FindProjection(node, 1);
2131 InstructionOperand outputs[2];
2132 size_t output_count = 0;
2133 InstructionOperand temps[2];
2134 size_t temp_count = 0;
2135 if (projection0.valid()) {
2136 outputs[output_count++] = g.DefineAsFixed(projection0.value(), eax);
2137 } else {
2138 temps[temp_count++] = g.TempRegister(eax);
2139 }
2140 if (projection1.valid()) {
2141 outputs[output_count++] = g.DefineAsFixed(projection1.value(), edx);
2142 } else {
2143 temps[temp_count++] = g.TempRegister(edx);
2144 }
2145 selector->Emit(code, output_count, outputs, arraysize(inputs), inputs,
2146 temp_count, temps);
2147}
2148
2149} // namespace
2150
2151// Shared routine for word comparison with zero.
2153 FlagsContinuation* cont) {
2154 // Try to combine with comparisons against 0 by simply inverting the branch.
2155 ConsumeEqualZero(&user, &value, cont);
2156
2157 if (CanCover(user, value)) {
2158 const Operation& value_op = Get(value);
2159 if (const ComparisonOp* comparison = value_op.TryCast<ComparisonOp>()) {
2160 switch (comparison->rep.MapTaggedToWord().value()) {
2162 cont->OverwriteAndNegateIfEqual(
2163 GetComparisonFlagCondition(*comparison));
2164 return VisitWordCompare(this, value, cont);
2166 switch (comparison->kind) {
2167 case ComparisonOp::Kind::kEqual:
2168 cont->OverwriteAndNegateIfEqual(kUnorderedEqual);
2169 return VisitFloat32Compare(this, value, cont);
2170 case ComparisonOp::Kind::kSignedLessThan:
2171 cont->OverwriteAndNegateIfEqual(kUnsignedGreaterThan);
2172 return VisitFloat32Compare(this, value, cont);
2173 case ComparisonOp::Kind::kSignedLessThanOrEqual:
2174 cont->OverwriteAndNegateIfEqual(kUnsignedGreaterThanOrEqual);
2175 return VisitFloat32Compare(this, value, cont);
2176 default:
2177 UNREACHABLE();
2178 }
2179 case RegisterRepresentation::Float64():
2180 switch (comparison->kind) {
2181 case ComparisonOp::Kind::kEqual:
2182 cont->OverwriteAndNegateIfEqual(kUnorderedEqual);
2183 return VisitFloat64Compare(this, value, cont);
2184 case ComparisonOp::Kind::kSignedLessThan:
2185 cont->OverwriteAndNegateIfEqual(kUnsignedGreaterThan);
2186 return VisitFloat64Compare(this, value, cont);
2187 case ComparisonOp::Kind::kSignedLessThanOrEqual:
2188 cont->OverwriteAndNegateIfEqual(kUnsignedGreaterThanOrEqual);
2189 return VisitFloat64Compare(this, value, cont);
2190 default:
2191 UNREACHABLE();
2192 }
2193 default:
2194 break;
2195 }
2196 } else if (value_op.Is<Opmask::kWord32Sub>()) {
2197 return VisitWordCompare(this, value, cont);
2198 } else if (value_op.Is<Opmask::kWord32BitwiseAnd>()) {
2199 return VisitWordCompare(this, value, kIA32Test, cont);
2200 } else if (const ProjectionOp* projection =
2201 value_op.TryCast<ProjectionOp>()) {
2202 // Check if this is the overflow output projection of an
2203 // OverflowCheckedBinop operation.
2204 if (projection->index == 1u) {
2205 // We cannot combine the OverflowCheckedBinop operation with this branch
2206 // unless the 0th projection (the use of the actual value of the
2207 // operation is either {OpIndex::Invalid()}, which means there's no use
2208 // of the actual value, or was already defined, which means it is
2209 // scheduled *AFTER* this branch).
2210 OpIndex node = projection->input();
2211 if (const OverflowCheckedBinopOp* binop =
2212 this->TryCast<OverflowCheckedBinopOp>(node);
2213 binop && CanDoBranchIfOverflowFusion(node)) {
2214 DCHECK_EQ(binop->rep, WordRepresentation::Word32());
2215 cont->OverwriteAndNegateIfEqual(kOverflow);
2216 switch (binop->kind) {
2217 case OverflowCheckedBinopOp::Kind::kSignedAdd:
2218 return VisitBinop(this, node, kIA32Add, cont);
2219 case OverflowCheckedBinopOp::Kind::kSignedSub:
2220 return VisitBinop(this, node, kIA32Sub, cont);
2221 case OverflowCheckedBinopOp::Kind::kSignedMul:
2222 return VisitBinop(this, node, kIA32Imul, cont);
2223 }
2224 UNREACHABLE();
2225 }
2226 }
2227 } else if (value_op.Is<StackPointerGreaterThanOp>()) {
2228 cont->OverwriteAndNegateIfEqual(kStackPointerGreaterThanCondition);
2229 return VisitStackPointerGreaterThan(value, cont);
2230 }
2231 }
2232
2233 // Branch could not be combined with a compare, emit compare against 0.
2234 IA32OperandGeneratorT g(this);
2235 VisitCompare(this, kIA32Cmp, g.Use(value), g.TempImmediate(0), cont);
2236}
2237
2238void InstructionSelectorT::VisitSwitch(OpIndex node, const SwitchInfo& sw) {
2239 { // Temporary scope to minimize indentation change churn below.
2240 IA32OperandGeneratorT g(this);
2241 InstructionOperand value_operand = g.UseRegister(this->input_at(node, 0));
2242
2243 // Emit either ArchTableSwitch or ArchBinarySearchSwitch.
2244 if (enable_switch_jump_table_ ==
2245 InstructionSelector::kEnableSwitchJumpTable) {
2246 static const size_t kMaxTableSwitchValueRange = 2 << 16;
2247 size_t table_space_cost = 4 + sw.value_range();
2248 size_t table_time_cost = 3;
2249 size_t lookup_space_cost = 3 + 2 * sw.case_count();
2250 size_t lookup_time_cost = sw.case_count();
2251 if (sw.case_count() > 4 &&
2252 table_space_cost + 3 * table_time_cost <=
2253 lookup_space_cost + 3 * lookup_time_cost &&
2254 sw.min_value() > std::numeric_limits<int32_t>::min() &&
2255 sw.value_range() <= kMaxTableSwitchValueRange) {
2256 InstructionOperand index_operand = value_operand;
2257 if (sw.min_value()) {
2258 index_operand = g.TempRegister();
2259 Emit(kIA32Lea | AddressingModeField::encode(kMode_MRI), index_operand,
2260 value_operand, g.TempImmediate(-sw.min_value()));
2261 }
2262 // Generate a table lookup.
2263 return EmitTableSwitch(sw, index_operand);
2264 }
2265 }
2266
2267 // Generate a tree of conditional jumps.
2268 return EmitBinarySearchSwitch(sw, value_operand);
2269 }
2270}
2271
2272void InstructionSelectorT::VisitWord32Equal(OpIndex node) {
2273 FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
2274 const ComparisonOp& comparison =
2275 this->Get(node).template Cast<ComparisonOp>();
2276 if (this->MatchIntegralZero(comparison.right())) {
2277 return VisitWordCompareZero(node, comparison.left(), &cont);
2278 }
2279 VisitWordCompare(this, node, &cont);
2280}
2281
2282void InstructionSelectorT::VisitInt32LessThan(OpIndex node) {
2283 FlagsContinuation cont = FlagsContinuation::ForSet(kSignedLessThan, node);
2284 VisitWordCompare(this, node, &cont);
2285}
2286
2287void InstructionSelectorT::VisitInt32LessThanOrEqual(OpIndex node) {
2288 FlagsContinuation cont =
2289 FlagsContinuation::ForSet(kSignedLessThanOrEqual, node);
2290 VisitWordCompare(this, node, &cont);
2291}
2292
2293void InstructionSelectorT::VisitUint32LessThan(OpIndex node) {
2294 FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node);
2295 VisitWordCompare(this, node, &cont);
2296}
2297
2298void InstructionSelectorT::VisitUint32LessThanOrEqual(OpIndex node) {
2299 FlagsContinuation cont =
2300 FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node);
2301 VisitWordCompare(this, node, &cont);
2302}
2303
2304void InstructionSelectorT::VisitInt32AddWithOverflow(OpIndex node) {
2305 OptionalOpIndex ovf = FindProjection(node, 1);
2306 if (ovf.valid()) {
2307 FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf.value());
2308 return VisitBinop(this, node, kIA32Add, &cont);
2309 }
2310 FlagsContinuation cont;
2311 VisitBinop(this, node, kIA32Add, &cont);
2312}
2313
2314void InstructionSelectorT::VisitInt32SubWithOverflow(OpIndex node) {
2315 OptionalOpIndex ovf = FindProjection(node, 1);
2316 if (ovf.valid()) {
2317 FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf.value());
2318 return VisitBinop(this, node, kIA32Sub, &cont);
2319 }
2320 FlagsContinuation cont;
2321 VisitBinop(this, node, kIA32Sub, &cont);
2322}
2323
2324void InstructionSelectorT::VisitInt32MulWithOverflow(OpIndex node) {
2325 OptionalOpIndex ovf = FindProjection(node, 1);
2326 if (ovf.valid()) {
2327 FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf.value());
2328 return VisitBinop(this, node, kIA32Imul, &cont);
2329 }
2330 FlagsContinuation cont;
2331 VisitBinop(this, node, kIA32Imul, &cont);
2332}
2333
2334void InstructionSelectorT::VisitFloat32Equal(OpIndex node) {
2335 FlagsContinuation cont = FlagsContinuation::ForSet(kUnorderedEqual, node);
2336 VisitFloat32Compare(this, node, &cont);
2337}
2338
2339void InstructionSelectorT::VisitFloat32LessThan(OpIndex node) {
2340 FlagsContinuation cont =
2341 FlagsContinuation::ForSet(kUnsignedGreaterThan, node);
2342 VisitFloat32Compare(this, node, &cont);
2343}
2344
2345void InstructionSelectorT::VisitFloat32LessThanOrEqual(OpIndex node) {
2346 FlagsContinuation cont =
2347 FlagsContinuation::ForSet(kUnsignedGreaterThanOrEqual, node);
2348 VisitFloat32Compare(this, node, &cont);
2349}
2350
2351void InstructionSelectorT::VisitFloat64Equal(OpIndex node) {
2352 FlagsContinuation cont = FlagsContinuation::ForSet(kUnorderedEqual, node);
2353 VisitFloat64Compare(this, node, &cont);
2354}
2355
2356void InstructionSelectorT::VisitFloat64LessThan(OpIndex node) {
2357 FlagsContinuation cont =
2358 FlagsContinuation::ForSet(kUnsignedGreaterThan, node);
2359 VisitFloat64Compare(this, node, &cont);
2360}
2361
2362void InstructionSelectorT::VisitFloat64LessThanOrEqual(OpIndex node) {
2363 FlagsContinuation cont =
2364 FlagsContinuation::ForSet(kUnsignedGreaterThanOrEqual, node);
2365 VisitFloat64Compare(this, node, &cont);
2366}
2367
2368void InstructionSelectorT::VisitFloat64InsertLowWord32(OpIndex node) {
2369 // Turboshaft uses {BitcastWord32PairToFloat64}.
2370 UNREACHABLE();
2371}
2372
2373void InstructionSelectorT::VisitFloat64InsertHighWord32(OpIndex node) {
2374 // Turboshaft uses {BitcastWord32PairToFloat64}.
2375 UNREACHABLE();
2376}
2377
2378void InstructionSelectorT::VisitBitcastWord32PairToFloat64(OpIndex node) {
2379 IA32OperandGeneratorT g(this);
2380 const BitcastWord32PairToFloat64Op& cast_op =
2381 this->Get(node).template Cast<BitcastWord32PairToFloat64Op>();
2382 Emit(kIA32Float64FromWord32Pair, g.DefineAsRegister(node),
2383 g.Use(cast_op.low_word32()), g.Use(cast_op.high_word32()));
2384}
2385
2386void InstructionSelectorT::VisitFloat64SilenceNaN(OpIndex node) {
2387 IA32OperandGeneratorT g(this);
2388 Emit(kIA32Float64SilenceNaN, g.DefineSameAsFirst(node),
2389 g.UseRegister(this->input_at(node, 0)));
2390}
2391
2393 const Operation& op = selector->Get(node);
2394 if (op.Is<AtomicWord32PairOp>()) {
2395 // TODO(nicohartmann): Turboshaft doesn't support configurable memory
2396 // orders yet; see also {TurboshaftAdapter::StoreView}.
2397 return AtomicMemoryOrder::kSeqCst;
2398 }
2399 if (const MemoryBarrierOp* barrier = op.TryCast<MemoryBarrierOp>()) {
2400 return barrier->memory_order;
2401 }
2402 UNREACHABLE();
2403}
2404
2405void InstructionSelectorT::VisitMemoryBarrier(OpIndex node) {
2406 // ia32 is no weaker than release-acquire and only needs to emit an
2407 // instruction for SeqCst memory barriers.
2408 AtomicMemoryOrder order = AtomicOrder(this, node);
2409 if (order == AtomicMemoryOrder::kSeqCst) {
2410 IA32OperandGeneratorT g(this);
2411 Emit(kIA32MFence, g.NoOutput());
2412 return;
2413 }
2414 DCHECK_EQ(AtomicMemoryOrder::kAcqRel, order);
2415}
2416
2417void InstructionSelectorT::VisitWord32AtomicLoad(OpIndex node) {
2418 LoadRepresentation load_rep = this->load_view(node).loaded_rep();
2419 DCHECK(load_rep.representation() == MachineRepresentation::kWord8 ||
2420 load_rep.representation() == MachineRepresentation::kWord16 ||
2421 load_rep.representation() == MachineRepresentation::kWord32 ||
2422 load_rep.representation() == MachineRepresentation::kTaggedSigned ||
2423 load_rep.representation() == MachineRepresentation::kTaggedPointer ||
2424 load_rep.representation() == MachineRepresentation::kTagged);
2425 // The memory order is ignored as both acquire and sequentially consistent
2426 // loads can emit MOV.
2427 // https://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
2428 VisitLoad(node, node, GetLoadOpcode(load_rep));
2429}
2430
2431void InstructionSelectorT::VisitWord32AtomicStore(OpIndex node) {
2432 VisitStoreCommon(this, this->store_view(node));
2433}
2434
2436 const AtomicRMWOp& atomic_op =
2437 selector->Get(node).template Cast<AtomicRMWOp>();
2438 return atomic_op.memory_rep.ToMachineType();
2439}
2440
2441void InstructionSelectorT::VisitWord32AtomicExchange(OpIndex node) {
2442 IA32OperandGeneratorT g(this);
2443 MachineType type = AtomicOpType(this, node);
2444 ArchOpcode opcode;
2445 if (type == MachineType::Int8()) {
2446 opcode = kAtomicExchangeInt8;
2447 } else if (type == MachineType::Uint8()) {
2448 opcode = kAtomicExchangeUint8;
2449 } else if (type == MachineType::Int16()) {
2450 opcode = kAtomicExchangeInt16;
2451 } else if (type == MachineType::Uint16()) {
2452 opcode = kAtomicExchangeUint16;
2453 } else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
2454 opcode = kAtomicExchangeWord32;
2455 } else {
2456 UNREACHABLE();
2457 }
2458 VisitAtomicExchange(this, node, opcode, type.representation());
2459}
2460
2461void InstructionSelectorT::VisitWord32AtomicCompareExchange(OpIndex node) {
2462 IA32OperandGeneratorT g(this);
2463 const AtomicRMWOp& atomic_op = Cast<AtomicRMWOp>(node);
2464 OpIndex base = atomic_op.base();
2465 OpIndex index = atomic_op.index();
2466 OpIndex old_value = atomic_op.expected().value();
2467 OpIndex new_value = atomic_op.value();
2468
2469 MachineType type = AtomicOpType(this, node);
2470 ArchOpcode opcode;
2471 if (type == MachineType::Int8()) {
2472 opcode = kAtomicCompareExchangeInt8;
2473 } else if (type == MachineType::Uint8()) {
2474 opcode = kAtomicCompareExchangeUint8;
2475 } else if (type == MachineType::Int16()) {
2476 opcode = kAtomicCompareExchangeInt16;
2477 } else if (type == MachineType::Uint16()) {
2478 opcode = kAtomicCompareExchangeUint16;
2479 } else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
2480 opcode = kAtomicCompareExchangeWord32;
2481 } else {
2482 UNREACHABLE();
2483 }
2484 AddressingMode addressing_mode;
2485 InstructionOperand new_val_operand =
2486 (type.representation() == MachineRepresentation::kWord8)
2487 ? g.UseByteRegister(new_value)
2488 : g.UseUniqueRegister(new_value);
2489 InstructionOperand inputs[] = {
2490 g.UseFixed(old_value, eax), new_val_operand, g.UseUniqueRegister(base),
2491 g.GetEffectiveIndexOperand(index, &addressing_mode)};
2492 InstructionOperand outputs[] = {g.DefineAsFixed(node, eax)};
2493 InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
2494 Emit(code, 1, outputs, arraysize(inputs), inputs);
2495}
2496
2497void InstructionSelectorT::VisitWord32AtomicBinaryOperation(
2498 OpIndex node, ArchOpcode int8_op, ArchOpcode uint8_op, ArchOpcode int16_op,
2499 ArchOpcode uint16_op, ArchOpcode word32_op) {
2500 { // Temporary scope to minimize indentation change churn below.
2501 MachineType type = AtomicOpType(this, node);
2502 ArchOpcode opcode;
2503 if (type == MachineType::Int8()) {
2504 opcode = int8_op;
2505 } else if (type == MachineType::Uint8()) {
2506 opcode = uint8_op;
2507 } else if (type == MachineType::Int16()) {
2508 opcode = int16_op;
2509 } else if (type == MachineType::Uint16()) {
2510 opcode = uint16_op;
2511 } else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
2512 opcode = word32_op;
2513 } else {
2514 UNREACHABLE();
2515 }
2516 VisitAtomicBinOp(this, node, opcode, type.representation());
2517 }
2518}
2519
2520#define VISIT_ATOMIC_BINOP(op) \
2521 void InstructionSelectorT::VisitWord32Atomic##op(OpIndex node) { \
2522 VisitWord32AtomicBinaryOperation( \
2523 node, kAtomic##op##Int8, kAtomic##op##Uint8, kAtomic##op##Int16, \
2524 kAtomic##op##Uint16, kAtomic##op##Word32); \
2525 }
2531#undef VISIT_ATOMIC_BINOP
2532
2533void InstructionSelectorT::VisitWord32AtomicPairLoad(OpIndex node) {
2534 // Both acquire and sequentially consistent loads can emit MOV.
2535 // https://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
2536 IA32OperandGeneratorT g(this);
2537 AddressingMode mode;
2538 OpIndex base = this->input_at(node, 0);
2539 OpIndex index = this->input_at(node, 1);
2540 OptionalOpIndex projection0 = FindProjection(node, 0);
2541 OptionalOpIndex projection1 = FindProjection(node, 1);
2542 if (projection0.valid() && projection1.valid()) {
2543 InstructionOperand inputs[] = {g.UseUniqueRegister(base),
2544 g.GetEffectiveIndexOperand(index, &mode)};
2545 InstructionCode code =
2546 kIA32Word32AtomicPairLoad | AddressingModeField::encode(mode);
2547 InstructionOperand outputs[] = {g.DefineAsRegister(projection0.value()),
2548 g.DefineAsRegister(projection1.value())};
2549 Emit(code, 2, outputs, 2, inputs);
2550 } else if (projection0.valid() || projection1.valid()) {
2551 // Only one word is needed, so it's enough to load just that.
2552 ArchOpcode opcode = kIA32Movl;
2553
2554 InstructionOperand outputs[] = {g.DefineAsRegister(
2555 projection0.valid() ? projection0.value() : projection1.value())};
2556 InstructionOperand inputs[3];
2557 size_t input_count = 0;
2558 // TODO(ahaas): Introduce an enum for {scale} instead of an integer.
2559 // {scale = 0} means *1 in the generated code.
2560 int scale = 0;
2561 mode = g.GenerateMemoryOperandInputs(
2562 index, scale, base, projection0.valid() ? 0 : 4, kPositiveDisplacement,
2563 inputs, &input_count);
2564 InstructionCode code = opcode | AddressingModeField::encode(mode);
2565 Emit(code, 1, outputs, input_count, inputs);
2566 }
2567}
2568
2569void InstructionSelectorT::VisitWord32AtomicPairStore(OpIndex node) {
2570 // Release pair stores emit a MOVQ via a double register, and sequentially
2571 // consistent stores emit CMPXCHG8B.
2572 // https://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
2573
2574 IA32OperandGeneratorT g(this);
2575 OpIndex base = this->input_at(node, 0);
2576 OpIndex index = this->input_at(node, 1);
2577 OpIndex value = this->input_at(node, 2);
2578 OpIndex value_high = this->input_at(node, 3);
2579
2580 AtomicMemoryOrder order = AtomicOrder(this, node);
2581 if (order == AtomicMemoryOrder::kAcqRel) {
2582 AddressingMode addressing_mode;
2583 InstructionOperand inputs[] = {
2584 g.UseUniqueRegisterOrSlotOrConstant(value),
2585 g.UseUniqueRegisterOrSlotOrConstant(value_high),
2586 g.UseUniqueRegister(base),
2587 g.GetEffectiveIndexOperand(index, &addressing_mode),
2588 };
2589 InstructionCode code = kIA32Word32ReleasePairStore |
2590 AddressingModeField::encode(addressing_mode);
2591 Emit(code, 0, nullptr, arraysize(inputs), inputs);
2592 } else {
2593 DCHECK_EQ(order, AtomicMemoryOrder::kSeqCst);
2594
2595 AddressingMode addressing_mode;
2596 InstructionOperand inputs[] = {
2597 g.UseUniqueRegisterOrSlotOrConstant(value), g.UseFixed(value_high, ecx),
2598 g.UseUniqueRegister(base),
2599 g.GetEffectiveIndexOperand(index, &addressing_mode)};
2600 // Allocating temp registers here as stores are performed using an atomic
2601 // exchange, the output of which is stored in edx:eax, which should be saved
2602 // and restored at the end of the instruction.
2603 InstructionOperand temps[] = {g.TempRegister(eax), g.TempRegister(edx)};
2604 const int num_temps = arraysize(temps);
2605 InstructionCode code = kIA32Word32SeqCstPairStore |
2606 AddressingModeField::encode(addressing_mode);
2607 Emit(code, 0, nullptr, arraysize(inputs), inputs, num_temps, temps);
2608 }
2609}
2610
2611void InstructionSelectorT::VisitWord32AtomicPairAdd(OpIndex node) {
2612 VisitPairAtomicBinOp(this, node, kIA32Word32AtomicPairAdd);
2613}
2614
2615void InstructionSelectorT::VisitWord32AtomicPairSub(OpIndex node) {
2616 VisitPairAtomicBinOp(this, node, kIA32Word32AtomicPairSub);
2617}
2618
2619void InstructionSelectorT::VisitWord32AtomicPairAnd(OpIndex node) {
2620 VisitPairAtomicBinOp(this, node, kIA32Word32AtomicPairAnd);
2621}
2622
2623void InstructionSelectorT::VisitWord32AtomicPairOr(OpIndex node) {
2624 VisitPairAtomicBinOp(this, node, kIA32Word32AtomicPairOr);
2625}
2626
2627void InstructionSelectorT::VisitWord32AtomicPairXor(OpIndex node) {
2628 VisitPairAtomicBinOp(this, node, kIA32Word32AtomicPairXor);
2629}
2630
2631void InstructionSelectorT::VisitWord32AtomicPairExchange(OpIndex node) {
2632 VisitPairAtomicBinOp(this, node, kIA32Word32AtomicPairExchange);
2633}
2634
2635void InstructionSelectorT::VisitWord32AtomicPairCompareExchange(OpIndex node) {
2636 IA32OperandGeneratorT g(this);
2637 OpIndex index = this->input_at(node, 1);
2638 AddressingMode addressing_mode;
2639
2640 const size_t expected_offset = 4;
2641 const size_t value_offset = 2;
2642 InstructionOperand inputs[] = {
2643 // High, Low values of old value
2644 g.UseFixed(this->input_at(node, expected_offset), eax),
2645 g.UseFixed(this->input_at(node, expected_offset + 1), edx),
2646 // High, Low values of new value
2647 g.UseUniqueRegisterOrSlotOrConstant(this->input_at(node, value_offset)),
2648 g.UseFixed(this->input_at(node, value_offset + 1), ecx),
2649 // InputAt(0) => base
2650 g.UseUniqueRegister(this->input_at(node, 0)),
2651 g.GetEffectiveIndexOperand(index, &addressing_mode)};
2652 OptionalOpIndex projection0 = FindProjection(node, 0);
2653 OptionalOpIndex projection1 = FindProjection(node, 1);
2654 InstructionCode code = kIA32Word32AtomicPairCompareExchange |
2655 AddressingModeField::encode(addressing_mode);
2656
2657 InstructionOperand outputs[2];
2658 size_t output_count = 0;
2659 InstructionOperand temps[2];
2660 size_t temp_count = 0;
2661 if (projection0.valid()) {
2662 outputs[output_count++] = g.DefineAsFixed(projection0.value(), eax);
2663 } else {
2664 temps[temp_count++] = g.TempRegister(eax);
2665 }
2666 if (projection1.valid()) {
2667 outputs[output_count++] = g.DefineAsFixed(projection1.value(), edx);
2668 } else {
2669 temps[temp_count++] = g.TempRegister(edx);
2670 }
2671 Emit(code, output_count, outputs, arraysize(inputs), inputs, temp_count,
2672 temps);
2673}
2674
2675#define SIMD_INT_TYPES(V) \
2676 V(I32x4) \
2677 V(I16x8) \
2678 V(I8x16)
2679
2680#define SIMD_BINOP_LIST(V) \
2681 V(I32x4GtU) \
2682 V(I32x4GeU) \
2683 V(I16x8Ne) \
2684 V(I16x8GeS) \
2685 V(I16x8GtU) \
2686 V(I16x8GeU) \
2687 V(I8x16Ne) \
2688 V(I8x16GeS) \
2689 V(I8x16GtU) \
2690 V(I8x16GeU)
2691
2692#define SIMD_BINOP_UNIFIED_SSE_AVX_LIST(V) \
2693 V(F32x4Add) \
2694 V(F32x4Sub) \
2695 V(F32x4Mul) \
2696 V(F32x4Div) \
2697 V(F32x4Eq) \
2698 V(F32x4Ne) \
2699 V(F32x4Lt) \
2700 V(F32x4Le) \
2701 V(F32x4Min) \
2702 V(F32x4Max) \
2703 IF_WASM(V, F64x2Add) \
2704 IF_WASM(V, F64x2Sub) \
2705 IF_WASM(V, F64x2Mul) \
2706 IF_WASM(V, F64x2Div) \
2707 IF_WASM(V, F64x2Eq) \
2708 IF_WASM(V, F64x2Ne) \
2709 IF_WASM(V, F64x2Lt) \
2710 IF_WASM(V, F64x2Le) \
2711 V(I64x2Add) \
2712 V(I64x2Sub) \
2713 V(I64x2Eq) \
2714 V(I64x2Ne) \
2715 V(I32x4Add) \
2716 V(I32x4Sub) \
2717 V(I32x4Mul) \
2718 V(I32x4MinS) \
2719 V(I32x4MaxS) \
2720 V(I32x4Eq) \
2721 V(I32x4Ne) \
2722 V(I32x4GtS) \
2723 V(I32x4GeS) \
2724 V(I32x4MinU) \
2725 V(I32x4MaxU) \
2726 V(I32x4DotI16x8S) \
2727 V(I16x8Add) \
2728 V(I16x8AddSatS) \
2729 V(I16x8Sub) \
2730 V(I16x8SubSatS) \
2731 V(I16x8Mul) \
2732 V(I16x8Eq) \
2733 V(I16x8GtS) \
2734 V(I16x8MinS) \
2735 V(I16x8MaxS) \
2736 V(I16x8AddSatU) \
2737 V(I16x8SubSatU) \
2738 V(I16x8MinU) \
2739 V(I16x8MaxU) \
2740 V(I16x8SConvertI32x4) \
2741 V(I16x8UConvertI32x4) \
2742 V(I16x8RoundingAverageU) \
2743 V(I8x16Add) \
2744 V(I8x16AddSatS) \
2745 V(I8x16Sub) \
2746 V(I8x16SubSatS) \
2747 V(I8x16MinS) \
2748 V(I8x16MaxS) \
2749 V(I8x16Eq) \
2750 V(I8x16GtS) \
2751 V(I8x16AddSatU) \
2752 V(I8x16SubSatU) \
2753 V(I8x16MinU) \
2754 V(I8x16MaxU) \
2755 V(I8x16SConvertI16x8) \
2756 V(I8x16UConvertI16x8) \
2757 V(I8x16RoundingAverageU) \
2758 V(S128And) \
2759 V(S128Or) \
2760 V(S128Xor)
2761
2762// These opcodes require all inputs to be registers because the codegen is
2763// simpler with all registers.
2764#define SIMD_BINOP_RRR(V) \
2765 V(I64x2ExtMulLowI32x4S) \
2766 V(I64x2ExtMulHighI32x4S) \
2767 V(I64x2ExtMulLowI32x4U) \
2768 V(I64x2ExtMulHighI32x4U) \
2769 V(I32x4ExtMulLowI16x8S) \
2770 V(I32x4ExtMulHighI16x8S) \
2771 V(I32x4ExtMulLowI16x8U) \
2772 V(I32x4ExtMulHighI16x8U) \
2773 V(I16x8ExtMulLowI8x16S) \
2774 V(I16x8ExtMulHighI8x16S) \
2775 V(I16x8ExtMulLowI8x16U) \
2776 V(I16x8ExtMulHighI8x16U) \
2777 V(I16x8Q15MulRSatS) \
2778 V(I16x8RelaxedQ15MulRS)
2779
2780#define SIMD_UNOP_LIST(V) \
2781 V(F64x2ConvertLowI32x4S) \
2782 V(F32x4DemoteF64x2Zero) \
2783 V(F32x4Sqrt) \
2784 V(F32x4SConvertI32x4) \
2785 V(I64x2BitMask) \
2786 V(I64x2SConvertI32x4Low) \
2787 V(I64x2SConvertI32x4High) \
2788 V(I64x2UConvertI32x4Low) \
2789 V(I64x2UConvertI32x4High) \
2790 V(I32x4SConvertI16x8Low) \
2791 V(I32x4SConvertI16x8High) \
2792 V(I32x4Neg) \
2793 V(I32x4UConvertI16x8Low) \
2794 V(I32x4UConvertI16x8High) \
2795 V(I32x4Abs) \
2796 V(I32x4BitMask) \
2797 V(I16x8SConvertI8x16Low) \
2798 V(I16x8SConvertI8x16High) \
2799 V(I16x8Neg) \
2800 V(I16x8UConvertI8x16Low) \
2801 V(I16x8UConvertI8x16High) \
2802 V(I16x8Abs) \
2803 V(I8x16Neg) \
2804 V(I8x16Abs) \
2805 V(I8x16BitMask) \
2806 V(S128Not)
2807
2808#define SIMD_ALLTRUE_LIST(V) \
2809 V(I64x2AllTrue) \
2810 V(I32x4AllTrue) \
2811 V(I16x8AllTrue) \
2812 V(I8x16AllTrue)
2813
2814#define SIMD_SHIFT_OPCODES_UNIFED_SSE_AVX(V) \
2815 V(I64x2Shl) \
2816 V(I64x2ShrU) \
2817 V(I32x4Shl) \
2818 V(I32x4ShrS) \
2819 V(I32x4ShrU) \
2820 V(I16x8Shl) \
2821 V(I16x8ShrS) \
2822 V(I16x8ShrU)
2823
2824#if V8_ENABLE_WEBASSEMBLY
2825
2826void InstructionSelectorT::VisitS128Const(OpIndex node) {
2827 IA32OperandGeneratorT g(this);
2828 static const int kUint32Immediates = kSimd128Size / sizeof(uint32_t);
2829 uint32_t val[kUint32Immediates];
2830 const Simd128ConstantOp& constant =
2831 this->Get(node).template Cast<Simd128ConstantOp>();
2832 memcpy(val, constant.value, kSimd128Size);
2833 // If all bytes are zeros or ones, avoid emitting code for generic constants
2834 bool all_zeros = !(val[0] || val[1] || val[2] || val[3]);
2835 bool all_ones = val[0] == UINT32_MAX && val[1] == UINT32_MAX &&
2836 val[2] == UINT32_MAX && val[3] == UINT32_MAX;
2837 InstructionOperand dst = g.DefineAsRegister(node);
2838 if (all_zeros) {
2839 Emit(kIA32S128Zero, dst);
2840 } else if (all_ones) {
2841 Emit(kIA32S128AllOnes, dst);
2842 } else {
2843 InstructionOperand inputs[kUint32Immediates];
2844 for (int i = 0; i < kUint32Immediates; ++i) {
2845 inputs[i] = g.UseImmediate(val[i]);
2846 }
2847 InstructionOperand temp(g.TempRegister());
2848 Emit(kIA32S128Const, 1, &dst, kUint32Immediates, inputs, 1, &temp);
2849 }
2850}
2851
2852void InstructionSelectorT::VisitF64x2Min(OpIndex node) {
2853 IA32OperandGeneratorT g(this);
2854 InstructionOperand operand0 = g.UseRegister(this->input_at(node, 0));
2855 InstructionOperand operand1 = g.UseRegister(this->input_at(node, 1));
2856
2857 if (IsSupported(AVX)) {
2858 Emit(kIA32F64x2Min, g.DefineAsRegister(node), operand0, operand1);
2859 } else {
2860 Emit(kIA32F64x2Min, g.DefineSameAsFirst(node), operand0, operand1);
2861 }
2862}
2863
2864void InstructionSelectorT::VisitF64x2Max(OpIndex node) {
2865 IA32OperandGeneratorT g(this);
2866 InstructionOperand operand0 = g.UseRegister(this->input_at(node, 0));
2867 InstructionOperand operand1 = g.UseRegister(this->input_at(node, 1));
2868 if (IsSupported(AVX)) {
2869 Emit(kIA32F64x2Max, g.DefineAsRegister(node), operand0, operand1);
2870 } else {
2871 Emit(kIA32F64x2Max, g.DefineSameAsFirst(node), operand0, operand1);
2872 }
2873}
2874
2875void InstructionSelectorT::VisitF64x2Splat(OpIndex node) {
2876 VisitRRSimd(this, node, kIA32F64x2Splat);
2877}
2878
2879void InstructionSelectorT::VisitF64x2ExtractLane(OpIndex node) {
2880 VisitRRISimd(this, node, kIA32F64x2ExtractLane, kIA32F64x2ExtractLane);
2881}
2882
2883void InstructionSelectorT::VisitI64x2SplatI32Pair(OpIndex node) {
2884 // In turboshaft it gets lowered to an I32x4Splat.
2885 UNREACHABLE();
2886}
2887
2888void InstructionSelectorT::VisitI64x2ReplaceLaneI32Pair(OpIndex node) {
2889 // In turboshaft it gets lowered to an I32x4ReplaceLane.
2890 UNREACHABLE();
2891}
2892
2893void InstructionSelectorT::VisitI64x2Neg(OpIndex node) {
2894 IA32OperandGeneratorT g(this);
2895 // If AVX unsupported, make sure dst != src to avoid a move.
2896 InstructionOperand operand0 =
2897 IsSupported(AVX) ? g.UseRegister(this->input_at(node, 0))
2898 : g.UseUniqueRegister(this->input_at(node, 0));
2899 Emit(kIA32I64x2Neg, g.DefineAsRegister(node), operand0);
2900}
2901
2902void InstructionSelectorT::VisitI64x2ShrS(OpIndex node) {
2903 IA32OperandGeneratorT g(this);
2904 InstructionOperand dst =
2905 IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node);
2906
2907 if (g.CanBeImmediate(this->input_at(node, 1))) {
2908 Emit(kIA32I64x2ShrS, dst, g.UseRegister(this->input_at(node, 0)),
2909 g.UseImmediate(this->input_at(node, 1)));
2910 } else {
2911 InstructionOperand temps[] = {g.TempSimd128Register(), g.TempRegister()};
2912 Emit(kIA32I64x2ShrS, dst, g.UseUniqueRegister(this->input_at(node, 0)),
2913 g.UseRegister(this->input_at(node, 1)), arraysize(temps), temps);
2914 }
2915}
2916
2917void InstructionSelectorT::VisitI64x2Mul(OpIndex node) {
2918 IA32OperandGeneratorT g(this);
2919 InstructionOperand temps[] = {g.TempSimd128Register(),
2920 g.TempSimd128Register()};
2921 Emit(kIA32I64x2Mul, g.DefineAsRegister(node),
2922 g.UseUniqueRegister(this->input_at(node, 0)),
2923 g.UseUniqueRegister(this->input_at(node, 1)), arraysize(temps), temps);
2924}
2925
2926void InstructionSelectorT::VisitF32x4Splat(OpIndex node) {
2927 VisitRRSimd(this, node, kIA32F32x4Splat);
2928}
2929
2930void InstructionSelectorT::VisitF32x4ExtractLane(OpIndex node) {
2931 VisitRRISimd(this, node, kIA32F32x4ExtractLane);
2932}
2933
2934void InstructionSelectorT::VisitF32x4UConvertI32x4(OpIndex node) {
2935 VisitRRSimd(this, node, kIA32F32x4UConvertI32x4);
2936}
2937
2938void InstructionSelectorT::VisitI32x4SConvertF32x4(OpIndex node) {
2939 IA32OperandGeneratorT g(this);
2940 InstructionOperand temps[] = {g.TempRegister()};
2941 InstructionOperand dst =
2942 IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node);
2943 Emit(kIA32I32x4SConvertF32x4, dst, g.UseRegister(this->input_at(node, 0)),
2944 arraysize(temps), temps);
2945}
2946
2947void InstructionSelectorT::VisitI32x4UConvertF32x4(OpIndex node) {
2948 IA32OperandGeneratorT g(this);
2949 InstructionOperand temps[] = {g.TempSimd128Register(),
2950 g.TempSimd128Register()};
2951 InstructionCode opcode =
2952 IsSupported(AVX) ? kAVXI32x4UConvertF32x4 : kSSEI32x4UConvertF32x4;
2953 Emit(opcode, g.DefineSameAsFirst(node),
2954 g.UseRegister(this->input_at(node, 0)), arraysize(temps), temps);
2955}
2956
2957void InstructionSelectorT::VisitS128Zero(OpIndex node) {
2958 IA32OperandGeneratorT g(this);
2959 Emit(kIA32S128Zero, g.DefineAsRegister(node));
2960}
2961
2962void InstructionSelectorT::VisitS128Select(OpIndex node) {
2963 IA32OperandGeneratorT g(this);
2964 InstructionOperand dst =
2965 IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node);
2966 Emit(kIA32S128Select, dst, g.UseRegister(this->input_at(node, 0)),
2967 g.UseRegister(this->input_at(node, 1)),
2968 g.UseRegister(this->input_at(node, 2)));
2969}
2970
2971void InstructionSelectorT::VisitS128AndNot(OpIndex node) {
2972 IA32OperandGeneratorT g(this);
2973 // andnps a b does ~a & b, but we want a & !b, so flip the input.
2974 InstructionOperand dst =
2975 IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node);
2976 Emit(kIA32S128AndNot, dst, g.UseRegister(this->input_at(node, 1)),
2977 g.UseRegister(this->input_at(node, 0)));
2978}
2979
2980#define VISIT_SIMD_SPLAT(Type) \
2981 void InstructionSelectorT::Visit##Type##Splat(OpIndex node) { \
2982 bool set_zero = this->MatchIntegralZero(this->input_at(node, 0)); \
2983 if (set_zero) { \
2984 IA32OperandGeneratorT g(this); \
2985 Emit(kIA32S128Zero, g.DefineAsRegister(node)); \
2986 } else { \
2987 VisitRO(this, node, kIA32##Type##Splat); \
2988 } \
2989 }
2990SIMD_INT_TYPES(VISIT_SIMD_SPLAT)
2991#undef SIMD_INT_TYPES
2992#undef VISIT_SIMD_SPLAT
2993
2994void InstructionSelectorT::VisitF16x8Splat(OpIndex node) { UNIMPLEMENTED(); }
2995
2996void InstructionSelectorT::VisitI8x16ExtractLaneU(OpIndex node) {
2997 VisitRRISimd(this, node, kIA32Pextrb);
2998}
2999
3000void InstructionSelectorT::VisitI8x16ExtractLaneS(OpIndex node) {
3001 VisitRRISimd(this, node, kIA32I8x16ExtractLaneS);
3002}
3003
3004void InstructionSelectorT::VisitI16x8ExtractLaneU(OpIndex node) {
3005 VisitRRISimd(this, node, kIA32Pextrw);
3006}
3007
3008void InstructionSelectorT::VisitI16x8ExtractLaneS(OpIndex node) {
3009 VisitRRISimd(this, node, kIA32I16x8ExtractLaneS);
3010}
3011
3012void InstructionSelectorT::VisitI32x4ExtractLane(OpIndex node) {
3013 VisitRRISimd(this, node, kIA32I32x4ExtractLane);
3014}
3015
3016void InstructionSelectorT::VisitF16x8ExtractLane(OpIndex node) {
3017 UNIMPLEMENTED();
3018}
3019
3020void InstructionSelectorT::VisitF16x8ReplaceLane(OpIndex node) {
3021 UNIMPLEMENTED();
3022}
3023
3024#define SIMD_REPLACE_LANE_TYPE_OP(V) \
3025 V(I32x4, kIA32Pinsrd) \
3026 V(I16x8, kIA32Pinsrw) \
3027 V(I8x16, kIA32Pinsrb) \
3028 V(F32x4, kIA32Insertps) \
3029 V(F64x2, kIA32F64x2ReplaceLane)
3030
3031#define VISIT_SIMD_REPLACE_LANE(TYPE, OPCODE) \
3032 void InstructionSelectorT::Visit##TYPE##ReplaceLane(OpIndex node) { \
3033 IA32OperandGeneratorT g(this); \
3034 const Simd128ReplaceLaneOp& op = \
3035 this->Get(node).template Cast<Simd128ReplaceLaneOp>(); \
3036 int lane = op.lane; \
3037 InstructionOperand operand0 = g.UseRegister(this->input_at(node, 0)); \
3038 InstructionOperand operand1 = g.UseImmediate(lane); \
3039 auto input1 = this->input_at(node, 1); \
3040 InstructionOperand operand2; \
3041 if constexpr (OPCODE == kIA32F64x2ReplaceLane) { \
3042 operand2 = g.UseRegister(input1); \
3043 } else { \
3044 operand2 = g.Use(input1); \
3045 } \
3046 /* When no-AVX, define dst == src to save a move. */ \
3047 InstructionOperand dst = IsSupported(AVX) ? g.DefineAsRegister(node) \
3048 : g.DefineSameAsFirst(node); \
3049 Emit(OPCODE, dst, operand0, operand1, operand2); \
3050 }
3051SIMD_REPLACE_LANE_TYPE_OP(VISIT_SIMD_REPLACE_LANE)
3052#undef VISIT_SIMD_REPLACE_LANE
3053#undef SIMD_REPLACE_LANE_TYPE_OP
3054
3055#define VISIT_SIMD_SHIFT_UNIFIED_SSE_AVX(Opcode) \
3056 void InstructionSelectorT::Visit##Opcode(OpIndex node) { \
3057 VisitRROSimdShift(this, node, kIA32##Opcode); \
3058 }
3059SIMD_SHIFT_OPCODES_UNIFED_SSE_AVX(VISIT_SIMD_SHIFT_UNIFIED_SSE_AVX)
3060#undef VISIT_SIMD_SHIFT_UNIFIED_SSE_AVX
3061#undef SIMD_SHIFT_OPCODES_UNIFED_SSE_AVX
3062
3063// TODO(v8:9198): SSE requires operand0 to be a register as we don't have memory
3064// alignment yet. For AVX, memory operands are fine, but can have performance
3065// issues if not aligned to 16/32 bytes (based on load size), see SDM Vol 1,
3066// chapter 14.9
3067#define VISIT_SIMD_UNOP(Opcode) \
3068 void InstructionSelectorT::Visit##Opcode(OpIndex node) { \
3069 IA32OperandGeneratorT g(this); \
3070 Emit(kIA32##Opcode, g.DefineAsRegister(node), \
3071 g.UseRegister(this->input_at(node, 0))); \
3072 }
3073SIMD_UNOP_LIST(VISIT_SIMD_UNOP)
3074#undef VISIT_SIMD_UNOP
3075#undef SIMD_UNOP_LIST
3076
3077#define UNIMPLEMENTED_SIMD_UNOP_LIST(V) \
3078 V(F16x8Abs) \
3079 V(F16x8Neg) \
3080 V(F16x8Sqrt) \
3081 V(F16x8Floor) \
3082 V(F16x8Ceil) \
3083 V(F16x8Trunc) \
3084 V(F16x8NearestInt)
3085
3086#define SIMD_VISIT_UNIMPL_UNOP(Name) \
3087 void InstructionSelectorT::Visit##Name(OpIndex node) { UNIMPLEMENTED(); }
3088
3089UNIMPLEMENTED_SIMD_UNOP_LIST(SIMD_VISIT_UNIMPL_UNOP)
3090#undef SIMD_VISIT_UNIMPL_UNOP
3091#undef UNIMPLEMENTED_SIMD_UNOP_LIST
3092
3093#define UNIMPLEMENTED_SIMD_CVTOP_LIST(V) \
3094 V(F16x8SConvertI16x8) \
3095 V(F16x8UConvertI16x8) \
3096 V(I16x8SConvertF16x8) \
3097 V(I16x8UConvertF16x8) \
3098 V(F32x4PromoteLowF16x8) \
3099 V(F16x8DemoteF32x4Zero) \
3100 V(F16x8DemoteF64x2Zero)
3101
3102#define SIMD_VISIT_UNIMPL_CVTOP(Name) \
3103 void InstructionSelectorT::Visit##Name(OpIndex node) { UNIMPLEMENTED(); }
3104
3105UNIMPLEMENTED_SIMD_CVTOP_LIST(SIMD_VISIT_UNIMPL_CVTOP)
3106#undef SIMD_VISIT_UNIMPL_CVTOP
3107#undef UNIMPLEMENTED_SIMD_CVTOP_LIST
3108
3109void InstructionSelectorT::VisitV128AnyTrue(OpIndex node) {
3110 IA32OperandGeneratorT g(this);
3111 InstructionOperand temps[] = {g.TempRegister()};
3112 Emit(kIA32S128AnyTrue, g.DefineAsRegister(node),
3113 g.UseRegister(this->input_at(node, 0)), arraysize(temps), temps);
3114}
3115
3116#define VISIT_SIMD_ALLTRUE(Opcode) \
3117 void InstructionSelectorT::Visit##Opcode(OpIndex node) { \
3118 IA32OperandGeneratorT g(this); \
3119 InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()}; \
3120 Emit(kIA32##Opcode, g.DefineAsRegister(node), \
3121 g.UseUniqueRegister(this->input_at(node, 0)), arraysize(temps), \
3122 temps); \
3123 }
3124SIMD_ALLTRUE_LIST(VISIT_SIMD_ALLTRUE)
3125#undef VISIT_SIMD_ALLTRUE
3126#undef SIMD_ALLTRUE_LIST
3127
3128#define VISIT_SIMD_BINOP(Opcode) \
3129 void InstructionSelectorT::Visit##Opcode(OpIndex node) { \
3130 VisitRROSimd(this, node, kAVX##Opcode, kSSE##Opcode); \
3131 }
3132SIMD_BINOP_LIST(VISIT_SIMD_BINOP)
3133#undef VISIT_SIMD_BINOP
3134#undef SIMD_BINOP_LIST
3135
3136#define UNIMPLEMENTED_SIMD_BINOP_LIST(V) \
3137 V(F16x8Add) \
3138 V(F16x8Sub) \
3139 V(F16x8Mul) \
3140 V(F16x8Div) \
3141 V(F16x8Min) \
3142 V(F16x8Max) \
3143 V(F16x8Pmin) \
3144 V(F16x8Pmax) \
3145 V(F16x8Eq) \
3146 V(F16x8Ne) \
3147 V(F16x8Lt) \
3148 V(F16x8Le)
3149
3150#define SIMD_VISIT_UNIMPL_BINOP(Name) \
3151 void InstructionSelectorT::Visit##Name(OpIndex node) { UNIMPLEMENTED(); }
3152
3153UNIMPLEMENTED_SIMD_BINOP_LIST(SIMD_VISIT_UNIMPL_BINOP)
3154#undef SIMD_VISIT_UNIMPL_BINOP
3155#undef UNIMPLEMENTED_SIMD_BINOP_LIST
3156
3157#define VISIT_SIMD_BINOP_UNIFIED_SSE_AVX(Opcode) \
3158 void InstructionSelectorT::Visit##Opcode(OpIndex node) { \
3159 VisitRROSimd(this, node, kIA32##Opcode, kIA32##Opcode); \
3160 }
3161SIMD_BINOP_UNIFIED_SSE_AVX_LIST(VISIT_SIMD_BINOP_UNIFIED_SSE_AVX)
3162#undef VISIT_SIMD_BINOP_UNIFIED_SSE_AVX
3163#undef SIMD_BINOP_UNIFIED_SSE_AVX_LIST
3164
3165#define VISIT_SIMD_BINOP_RRR(OPCODE) \
3166 void InstructionSelectorT::Visit##OPCODE(OpIndex node) { \
3167 VisitRRRSimd(this, node, kIA32##OPCODE); \
3168 }
3169SIMD_BINOP_RRR(VISIT_SIMD_BINOP_RRR)
3170#undef VISIT_SIMD_BINOP_RRR
3171#undef SIMD_BINOP_RRR
3172
3173void InstructionSelectorT::VisitI16x8BitMask(OpIndex node) {
3174 IA32OperandGeneratorT g(this);
3175 InstructionOperand temps[] = {g.TempSimd128Register()};
3176 Emit(kIA32I16x8BitMask, g.DefineAsRegister(node),
3177 g.UseUniqueRegister(this->input_at(node, 0)), arraysize(temps), temps);
3178}
3179
3180void InstructionSelectorT::VisitI8x16Shl(OpIndex node) {
3181 VisitI8x16Shift(this, node, kIA32I8x16Shl);
3182}
3183
3184void InstructionSelectorT::VisitI8x16ShrS(OpIndex node) {
3185 VisitI8x16Shift(this, node, kIA32I8x16ShrS);
3186}
3187
3188void InstructionSelectorT::VisitI8x16ShrU(OpIndex node) {
3189 VisitI8x16Shift(this, node, kIA32I8x16ShrU);
3190}
3191#endif // V8_ENABLE_WEBASSEMBLY
3192
3193void InstructionSelectorT::VisitInt32AbsWithOverflow(OpIndex node) {
3194 UNREACHABLE();
3195}
3196
3197void InstructionSelectorT::VisitInt64AbsWithOverflow(OpIndex node) {
3198 UNREACHABLE();
3199}
3200
3201#if V8_ENABLE_WEBASSEMBLY
3202namespace {
3203
3204// Returns true if shuffle can be decomposed into two 16x4 half shuffles
3205// followed by a 16x8 blend.
3206// E.g. [3 2 1 0 15 14 13 12].
3207bool TryMatch16x8HalfShuffle(uint8_t* shuffle16x8, uint8_t* blend_mask) {
3208 *blend_mask = 0;
3209 for (int i = 0; i < 8; i++) {
3210 if ((shuffle16x8[i] & 0x4) != (i & 0x4)) return false;
3211 *blend_mask |= (shuffle16x8[i] > 7 ? 1 : 0) << i;
3212 }
3213 return true;
3214}
3215
3216struct ShuffleEntry {
3217 uint8_t shuffle[kSimd128Size];
3218 ArchOpcode opcode;
3219 ArchOpcode avx_opcode;
3220 bool src0_needs_reg;
3221 bool src1_needs_reg;
3222};
3223
3224// Shuffles that map to architecture-specific instruction sequences. These are
3225// matched very early, so we shouldn't include shuffles that match better in
3226// later tests, like 32x4 and 16x8 shuffles. In general, these patterns should
3227// map to either a single instruction, or be finer grained, such as zip/unzip or
3228// transpose patterns.
3229static const ShuffleEntry arch_shuffles[] = {
3230 {{0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23},
3231 kIA32S64x2UnpackLow,
3232 kIA32S64x2UnpackLow,
3233 true,
3234 false},
3235 {{8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31},
3236 kIA32S64x2UnpackHigh,
3237 kIA32S64x2UnpackHigh,
3238 true,
3239 false},
3240 {{0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23},
3241 kIA32S32x4UnpackLow,
3242 kIA32S32x4UnpackLow,
3243 true,
3244 false},
3245 {{8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31},
3246 kIA32S32x4UnpackHigh,
3247 kIA32S32x4UnpackHigh,
3248 true,
3249 false},
3250 {{0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23},
3251 kIA32S16x8UnpackLow,
3252 kIA32S16x8UnpackLow,
3253 true,
3254 false},
3255 {{8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31},
3256 kIA32S16x8UnpackHigh,
3257 kIA32S16x8UnpackHigh,
3258 true,
3259 false},
3260 {{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23},
3261 kIA32S8x16UnpackLow,
3262 kIA32S8x16UnpackLow,
3263 true,
3264 false},
3265 {{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31},
3266 kIA32S8x16UnpackHigh,
3267 kIA32S8x16UnpackHigh,
3268 true,
3269 false},
3270
3271 {{0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29},
3272 kSSES16x8UnzipLow,
3273 kAVXS16x8UnzipLow,
3274 true,
3275 false},
3276 {{2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31},
3277 kSSES16x8UnzipHigh,
3278 kAVXS16x8UnzipHigh,
3279 true,
3280 true},
3281 {{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30},
3282 kSSES8x16UnzipLow,
3283 kAVXS8x16UnzipLow,
3284 true,
3285 true},
3286 {{1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31},
3287 kSSES8x16UnzipHigh,
3288 kAVXS8x16UnzipHigh,
3289 true,
3290 true},
3291
3292 {{0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30},
3293 kSSES8x16TransposeLow,
3294 kAVXS8x16TransposeLow,
3295 true,
3296 true},
3297 {{1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31},
3298 kSSES8x16TransposeHigh,
3299 kAVXS8x16TransposeHigh,
3300 true,
3301 true},
3302 {{7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8},
3303 kSSES8x8Reverse,
3304 kAVXS8x8Reverse,
3305 true,
3306 true},
3307 {{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12},
3308 kSSES8x4Reverse,
3309 kAVXS8x4Reverse,
3310 true,
3311 true},
3312 {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14},
3313 kSSES8x2Reverse,
3314 kAVXS8x2Reverse,
3315 true,
3316 true}};
3317
3318bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table,
3319 size_t num_entries, bool is_swizzle,
3320 const ShuffleEntry** arch_shuffle) {
3321 uint8_t mask = is_swizzle ? kSimd128Size - 1 : 2 * kSimd128Size - 1;
3322 for (size_t i = 0; i < num_entries; ++i) {
3323 const ShuffleEntry& entry = table[i];
3324 int j = 0;
3325 for (; j < kSimd128Size; ++j) {
3326 if ((entry.shuffle[j] & mask) != (shuffle[j] & mask)) {
3327 break;
3328 }
3329 }
3330 if (j == kSimd128Size) {
3331 *arch_shuffle = &entry;
3332 return true;
3333 }
3334 }
3335 return false;
3336}
3337
3338} // namespace
3339
3340void InstructionSelectorT::VisitI8x16Shuffle(OpIndex node) {
3341 uint8_t shuffle[kSimd128Size];
3342 bool is_swizzle;
3343 auto view = this->simd_shuffle_view(node);
3344 CanonicalizeShuffle(view, shuffle, &is_swizzle);
3345
3346 int imm_count = 0;
3347 static const int kMaxImms = 6;
3348 uint32_t imms[kMaxImms];
3349 int temp_count = 0;
3350 static const int kMaxTemps = 2;
3351 InstructionOperand temps[kMaxTemps];
3352
3353 IA32OperandGeneratorT g(this);
3354 bool use_avx = CpuFeatures::IsSupported(AVX);
3355 // AVX and swizzles don't generally need DefineSameAsFirst to avoid a move.
3356 bool no_same_as_first = use_avx || is_swizzle;
3357 // We generally need UseRegister for input0, Use for input1.
3358 // TODO(v8:9198): We don't have 16-byte alignment for SIMD operands yet, but
3359 // we retain this logic (continue setting these in the various shuffle match
3360 // clauses), but ignore it when selecting registers or slots.
3361 bool src0_needs_reg = true;
3362 bool src1_needs_reg = false;
3363 ArchOpcode opcode = kIA32I8x16Shuffle; // general shuffle is the default
3364
3365 uint8_t offset;
3366 uint8_t shuffle32x4[4];
3367 uint8_t shuffle16x8[8];
3368 int index;
3369 const ShuffleEntry* arch_shuffle;
3370 if (wasm::SimdShuffle::TryMatchConcat(shuffle, &offset)) {
3371 if (wasm::SimdShuffle::TryMatch32x4Rotate(shuffle, shuffle32x4,
3372 is_swizzle)) {
3373 uint8_t shuffle_mask = wasm::SimdShuffle::PackShuffle4(shuffle32x4);
3374 opcode = kIA32S32x4Rotate;
3375 imms[imm_count++] = shuffle_mask;
3376 } else {
3377 // Swap inputs from the normal order for (v)palignr.
3378 SwapShuffleInputs(view);
3379 is_swizzle = false; // It's simpler to just handle the general case.
3380 no_same_as_first = use_avx; // SSE requires same-as-first.
3381 opcode = kIA32S8x16Alignr;
3382 // palignr takes a single imm8 offset.
3383 imms[imm_count++] = offset;
3384 }
3385 } else if (TryMatchArchShuffle(shuffle, arch_shuffles,
3386 arraysize(arch_shuffles), is_swizzle,
3387 &arch_shuffle)) {
3388 opcode = use_avx ? arch_shuffle->avx_opcode : arch_shuffle->opcode;
3389 src0_needs_reg = !use_avx || arch_shuffle->src0_needs_reg;
3390 // SSE can't take advantage of both operands in registers and needs
3391 // same-as-first.
3392 src1_needs_reg = use_avx && arch_shuffle->src1_needs_reg;
3393 no_same_as_first = use_avx;
3394 } else if (wasm::SimdShuffle::TryMatch32x4Shuffle(shuffle, shuffle32x4)) {
3395 uint8_t shuffle_mask = wasm::SimdShuffle::PackShuffle4(shuffle32x4);
3396 if (is_swizzle) {
3397 if (wasm::SimdShuffle::TryMatchIdentity(shuffle)) {
3398 // Bypass normal shuffle code generation in this case.
3399 OpIndex input = view.input(0);
3400 // EmitIdentity
3401 MarkAsUsed(input);
3402 MarkAsDefined(node);
3403 SetRename(node, input);
3404 return;
3405 } else {
3406 // pshufd takes a single imm8 shuffle mask.
3407 opcode = kIA32S32x4Swizzle;
3408 no_same_as_first = true;
3409 // TODO(v8:9198): This doesn't strictly require a register, forcing the
3410 // swizzles to always use registers until generation of incorrect memory
3411 // operands can be fixed.
3412 src0_needs_reg = true;
3413 imms[imm_count++] = shuffle_mask;
3414 }
3415 } else {
3416 // 2 operand shuffle
3417 // A blend is more efficient than a general 32x4 shuffle; try it first.
3418 if (wasm::SimdShuffle::TryMatchBlend(shuffle)) {
3419 opcode = kIA32S16x8Blend;
3420 uint8_t blend_mask = wasm::SimdShuffle::PackBlend4(shuffle32x4);
3421 imms[imm_count++] = blend_mask;
3422 } else {
3423 opcode = kIA32S32x4Shuffle;
3424 no_same_as_first = true;
3425 // TODO(v8:9198): src0 and src1 is used by pshufd in codegen, which
3426 // requires memory to be 16-byte aligned, since we cannot guarantee that
3427 // yet, force using a register here.
3428 src0_needs_reg = true;
3429 src1_needs_reg = true;
3430 imms[imm_count++] = shuffle_mask;
3431 int8_t blend_mask = wasm::SimdShuffle::PackBlend4(shuffle32x4);
3432 imms[imm_count++] = blend_mask;
3433 }
3434 }
3435 } else if (wasm::SimdShuffle::TryMatch16x8Shuffle(shuffle, shuffle16x8)) {
3436 uint8_t blend_mask;
3437 if (wasm::SimdShuffle::TryMatchBlend(shuffle)) {
3438 opcode = kIA32S16x8Blend;
3439 blend_mask = wasm::SimdShuffle::PackBlend8(shuffle16x8);
3440 imms[imm_count++] = blend_mask;
3441 } else if (wasm::SimdShuffle::TryMatchSplat<8>(shuffle, &index)) {
3442 opcode = kIA32S16x8Dup;
3443 src0_needs_reg = false;
3444 imms[imm_count++] = index;
3445 } else if (TryMatch16x8HalfShuffle(shuffle16x8, &blend_mask)) {
3446 opcode = is_swizzle ? kIA32S16x8HalfShuffle1 : kIA32S16x8HalfShuffle2;
3447 // Half-shuffles don't need DefineSameAsFirst or UseRegister(src0).
3448 no_same_as_first = true;
3449 src0_needs_reg = false;
3450 uint8_t mask_lo = wasm::SimdShuffle::PackShuffle4(shuffle16x8);
3451 uint8_t mask_hi = wasm::SimdShuffle::PackShuffle4(shuffle16x8 + 4);
3452 imms[imm_count++] = mask_lo;
3453 imms[imm_count++] = mask_hi;
3454 if (!is_swizzle) imms[imm_count++] = blend_mask;
3455 }
3456 } else if (wasm::SimdShuffle::TryMatchSplat<16>(shuffle, &index)) {
3457 opcode = kIA32S8x16Dup;
3458 no_same_as_first = use_avx;
3459 src0_needs_reg = true;
3460 imms[imm_count++] = index;
3461 }
3462 if (opcode == kIA32I8x16Shuffle) {
3463 // Use same-as-first for general swizzle, but not shuffle.
3464 no_same_as_first = !is_swizzle;
3465 src0_needs_reg = !no_same_as_first;
3466 imms[imm_count++] = wasm::SimdShuffle::Pack4Lanes(shuffle);
3467 imms[imm_count++] = wasm::SimdShuffle::Pack4Lanes(shuffle + 4);
3468 imms[imm_count++] = wasm::SimdShuffle::Pack4Lanes(shuffle + 8);
3469 imms[imm_count++] = wasm::SimdShuffle::Pack4Lanes(shuffle + 12);
3470 temps[temp_count++] = g.TempRegister();
3471 }
3472
3473 // Use DefineAsRegister(node) and Use(src0) if we can without forcing an extra
3474 // move instruction in the CodeGenerator.
3475 OpIndex input0 = view.input(0);
3476 InstructionOperand dst =
3477 no_same_as_first ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node);
3478 // TODO(v8:9198): Use src0_needs_reg when we have memory alignment for SIMD.
3479 InstructionOperand src0 = g.UseRegister(input0);
3480 USE(src0_needs_reg);
3481
3482 int input_count = 0;
3483 InstructionOperand inputs[2 + kMaxImms + kMaxTemps];
3484 inputs[input_count++] = src0;
3485 if (!is_swizzle) {
3486 OpIndex input1 = view.input(1);
3487 // TODO(v8:9198): Use src1_needs_reg when we have memory alignment for SIMD.
3488 inputs[input_count++] = g.UseRegister(input1);
3489 USE(src1_needs_reg);
3490 }
3491 for (int i = 0; i < imm_count; ++i) {
3492 inputs[input_count++] = g.UseImmediate(imms[i]);
3493 }
3494 Emit(opcode, 1, &dst, input_count, inputs, temp_count, temps);
3495}
3496
3497void InstructionSelectorT::VisitI8x16Swizzle(OpIndex node) {
3498 InstructionCode op = kIA32I8x16Swizzle;
3499
3500 OpIndex left = this->input_at(node, 0);
3501 OpIndex right = this->input_at(node, 1);
3502 const Simd128BinopOp& binop = this->Get(node).template Cast<Simd128BinopOp>();
3503 DCHECK(binop.kind == any_of(Simd128BinopOp::Kind::kI8x16Swizzle,
3504 Simd128BinopOp::Kind::kI8x16RelaxedSwizzle));
3505 bool relaxed = binop.kind == Simd128BinopOp::Kind::kI8x16RelaxedSwizzle;
3506 if (relaxed) {
3507 op |= MiscField::encode(true);
3508 } else {
3509 // If the indices vector is a const, check if they are in range, or if the
3510 // top bit is set, then we can avoid the paddusb in the codegen and simply
3511 // emit a pshufb.
3512 const Operation& right_op = this->Get(right);
3513 if (auto c = right_op.TryCast<Simd128ConstantOp>()) {
3514 std::array<uint8_t, kSimd128Size> imms;
3515 std::memcpy(&imms, c->value, kSimd128Size);
3516 op |= MiscField::encode(wasm::SimdSwizzle::AllInRangeOrTopBitSet(imms));
3517 }
3518 }
3519
3520 IA32OperandGeneratorT g(this);
3521 InstructionOperand temps[] = {g.TempRegister()};
3522 Emit(
3523 op,
3524 IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node),
3525 g.UseRegister(left), g.UseRegister(right), arraysize(temps), temps);
3526}
3527
3528void InstructionSelectorT::VisitSetStackPointer(OpIndex node) {
3529 OperandGenerator g(this);
3530 auto input = g.UseAny(this->input_at(node, 0));
3531 Emit(kArchSetStackPointer, 0, nullptr, 1, &input);
3532}
3533
3534namespace {
3535
3536void VisitMinOrMax(InstructionSelectorT* selector, OpIndex node,
3537 ArchOpcode opcode, bool flip_inputs) {
3538 // Due to the way minps/minpd work, we want the dst to be same as the second
3539 // input: b = pmin(a, b) directly maps to minps b a.
3540 IA32OperandGeneratorT g(selector);
3541 InstructionOperand dst = selector->IsSupported(AVX)
3542 ? g.DefineAsRegister(node)
3543 : g.DefineSameAsFirst(node);
3544 if (flip_inputs) {
3545 // Due to the way minps/minpd work, we want the dst to be same as the second
3546 // input: b = pmin(a, b) directly maps to minps b a.
3547 selector->Emit(opcode, dst, g.UseRegister(selector->input_at(node, 1)),
3548 g.UseRegister(selector->input_at(node, 0)));
3549 } else {
3550 selector->Emit(opcode, dst, g.UseRegister(selector->input_at(node, 0)),
3551 g.UseRegister(selector->input_at(node, 1)));
3552 }
3553}
3554} // namespace
3555
3556void InstructionSelectorT::VisitF32x4Pmin(OpIndex node) {
3557 VisitMinOrMax(this, node, kIA32Minps, true);
3558}
3559
3560void InstructionSelectorT::VisitF32x4Pmax(OpIndex node) {
3561 VisitMinOrMax(this, node, kIA32Maxps, true);
3562}
3563
3564void InstructionSelectorT::VisitF64x2Pmin(OpIndex node) {
3565 VisitMinOrMax(this, node, kIA32Minpd, true);
3566}
3567
3568void InstructionSelectorT::VisitF64x2Pmax(OpIndex node) {
3569 VisitMinOrMax(this, node, kIA32Maxpd, true);
3570}
3571
3572void InstructionSelectorT::VisitF32x4RelaxedMin(OpIndex node) {
3573 VisitMinOrMax(this, node, kIA32Minps, false);
3574}
3575
3576void InstructionSelectorT::VisitF32x4RelaxedMax(OpIndex node) {
3577 VisitMinOrMax(this, node, kIA32Maxps, false);
3578}
3579
3580void InstructionSelectorT::VisitF64x2RelaxedMin(OpIndex node) {
3581 VisitMinOrMax(this, node, kIA32Minpd, false);
3582}
3583
3584void InstructionSelectorT::VisitF64x2RelaxedMax(OpIndex node) {
3585 VisitMinOrMax(this, node, kIA32Maxpd, false);
3586}
3587
3588namespace {
3589
3590void VisitExtAddPairwise(InstructionSelectorT* selector, OpIndex node,
3591 ArchOpcode opcode, bool need_temp) {
3592 IA32OperandGeneratorT g(selector);
3593 InstructionOperand operand0 = g.UseRegister(selector->input_at(node, 0));
3594 InstructionOperand dst = (selector->IsSupported(AVX))
3595 ? g.DefineAsRegister(node)
3596 : g.DefineSameAsFirst(node);
3597 if (need_temp) {
3598 InstructionOperand temps[] = {g.TempRegister()};
3599 selector->Emit(opcode, dst, operand0, arraysize(temps), temps);
3600 } else {
3601 selector->Emit(opcode, dst, operand0);
3602 }
3603}
3604} // namespace
3605
3606void InstructionSelectorT::VisitI32x4ExtAddPairwiseI16x8S(OpIndex node) {
3607 VisitExtAddPairwise(this, node, kIA32I32x4ExtAddPairwiseI16x8S, true);
3608}
3609
3610void InstructionSelectorT::VisitI32x4ExtAddPairwiseI16x8U(OpIndex node) {
3611 VisitExtAddPairwise(this, node, kIA32I32x4ExtAddPairwiseI16x8U, false);
3612}
3613
3614void InstructionSelectorT::VisitI16x8ExtAddPairwiseI8x16S(OpIndex node) {
3615 VisitExtAddPairwise(this, node, kIA32I16x8ExtAddPairwiseI8x16S, true);
3616}
3617
3618void InstructionSelectorT::VisitI16x8ExtAddPairwiseI8x16U(OpIndex node) {
3619 VisitExtAddPairwise(this, node, kIA32I16x8ExtAddPairwiseI8x16U, true);
3620}
3621
3622void InstructionSelectorT::VisitI8x16Popcnt(OpIndex node) {
3623 IA32OperandGeneratorT g(this);
3624 InstructionOperand dst = CpuFeatures::IsSupported(AVX)
3625 ? g.DefineAsRegister(node)
3626 : g.DefineAsRegister(node);
3627 InstructionOperand temps[] = {g.TempSimd128Register(), g.TempRegister()};
3628 Emit(kIA32I8x16Popcnt, dst, g.UseUniqueRegister(this->input_at(node, 0)),
3629 arraysize(temps), temps);
3630}
3631
3632void InstructionSelectorT::VisitF64x2ConvertLowI32x4U(OpIndex node) {
3633 IA32OperandGeneratorT g(this);
3634 InstructionOperand temps[] = {g.TempRegister()};
3635 InstructionOperand dst =
3636 IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node);
3637 Emit(kIA32F64x2ConvertLowI32x4U, dst, g.UseRegister(this->input_at(node, 0)),
3638 arraysize(temps), temps);
3639}
3640
3641void InstructionSelectorT::VisitI32x4TruncSatF64x2SZero(OpIndex node) {
3642 IA32OperandGeneratorT g(this);
3643 InstructionOperand temps[] = {g.TempRegister()};
3644 if (IsSupported(AVX)) {
3645 // Requires dst != src.
3646 Emit(kIA32I32x4TruncSatF64x2SZero, g.DefineAsRegister(node),
3647 g.UseUniqueRegister(this->input_at(node, 0)), arraysize(temps), temps);
3648 } else {
3649 Emit(kIA32I32x4TruncSatF64x2SZero, g.DefineSameAsFirst(node),
3650 g.UseRegister(this->input_at(node, 0)), arraysize(temps), temps);
3651 }
3652}
3653
3654void InstructionSelectorT::VisitI32x4TruncSatF64x2UZero(OpIndex node) {
3655 IA32OperandGeneratorT g(this);
3656 InstructionOperand temps[] = {g.TempRegister()};
3657 InstructionOperand dst =
3658 IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node);
3659 Emit(kIA32I32x4TruncSatF64x2UZero, dst,
3660 g.UseRegister(this->input_at(node, 0)), arraysize(temps), temps);
3661}
3662
3663void InstructionSelectorT::VisitI32x4RelaxedTruncF64x2SZero(OpIndex node) {
3664 VisitRRSimd(this, node, kIA32Cvttpd2dq);
3665}
3666
3667void InstructionSelectorT::VisitI32x4RelaxedTruncF64x2UZero(OpIndex node) {
3668 VisitFloatUnop(this, node, this->input_at(node, 0),
3669 kIA32I32x4TruncF64x2UZero);
3670}
3671
3672void InstructionSelectorT::VisitI32x4RelaxedTruncF32x4S(OpIndex node) {
3673 VisitRRSimd(this, node, kIA32Cvttps2dq);
3674}
3675
3676void InstructionSelectorT::VisitI32x4RelaxedTruncF32x4U(OpIndex node) {
3677 IA32OperandGeneratorT g(this);
3678 OpIndex input = this->input_at(node, 0);
3679 InstructionOperand temps[] = {g.TempSimd128Register()};
3680 // No need for unique because inputs are float but temp is general.
3681 if (IsSupported(AVX)) {
3682 Emit(kIA32I32x4TruncF32x4U, g.DefineAsRegister(node), g.UseRegister(input),
3683 arraysize(temps), temps);
3684 } else {
3685 Emit(kIA32I32x4TruncF32x4U, g.DefineSameAsFirst(node), g.UseRegister(input),
3686 arraysize(temps), temps);
3687 }
3688}
3689
3690void InstructionSelectorT::VisitI64x2GtS(OpIndex node) {
3691 IA32OperandGeneratorT g(this);
3692 if (CpuFeatures::IsSupported(AVX)) {
3693 Emit(kIA32I64x2GtS, g.DefineAsRegister(node),
3694 g.UseRegister(this->input_at(node, 0)),
3695 g.UseRegister(this->input_at(node, 1)));
3696 } else if (CpuFeatures::IsSupported(SSE4_2)) {
3697 Emit(kIA32I64x2GtS, g.DefineSameAsFirst(node),
3698 g.UseRegister(this->input_at(node, 0)),
3699 g.UseRegister(this->input_at(node, 1)));
3700 } else {
3701 Emit(kIA32I64x2GtS, g.DefineAsRegister(node),
3702 g.UseUniqueRegister(this->input_at(node, 0)),
3703 g.UseUniqueRegister(this->input_at(node, 1)));
3704 }
3705}
3706
3707void InstructionSelectorT::VisitI64x2GeS(OpIndex node) {
3708 IA32OperandGeneratorT g(this);
3709 if (CpuFeatures::IsSupported(AVX)) {
3710 Emit(kIA32I64x2GeS, g.DefineAsRegister(node),
3711 g.UseRegister(this->input_at(node, 0)),
3712 g.UseRegister(this->input_at(node, 1)));
3713 } else if (CpuFeatures::IsSupported(SSE4_2)) {
3714 Emit(kIA32I64x2GeS, g.DefineAsRegister(node),
3715 g.UseUniqueRegister(this->input_at(node, 0)),
3716 g.UseRegister(this->input_at(node, 1)));
3717 } else {
3718 Emit(kIA32I64x2GeS, g.DefineAsRegister(node),
3719 g.UseUniqueRegister(this->input_at(node, 0)),
3720 g.UseUniqueRegister(this->input_at(node, 1)));
3721 }
3722}
3723
3724void InstructionSelectorT::VisitI64x2Abs(OpIndex node) {
3725 VisitRRSimd(this, node, kIA32I64x2Abs, kIA32I64x2Abs);
3726}
3727
3728void InstructionSelectorT::VisitF64x2PromoteLowF32x4(OpIndex node) {
3729 IA32OperandGeneratorT g(this);
3730 InstructionCode code = kIA32F64x2PromoteLowF32x4;
3731 // TODO(nicohartmann@): Implement this special case for turboshaft. Note
3732 // that this special case may require adaptions in instruction-selector.cc
3733 // in `FinishEmittedInstructions`, similar to what exists for TurboFan.
3734#if 0
3735 OpIndex input = this->input_at(node, 0);
3736 LoadTransformMatcher m(input);
3737
3738 if (m.Is(LoadTransformation::kS128Load64Zero) && CanCover(node, input)) {
3739 // Trap handler is not supported on IA32.
3740 DCHECK_NE(m.ResolvedValue().kind,
3741 MemoryAccessKind::kProtectedByTrapHandler);
3742 // LoadTransforms cannot be eliminated, so they are visited even if
3743 // unused. Mark it as defined so that we don't visit it.
3744 MarkAsDefined(input);
3745 VisitLoad(node, input, code);
3746 return;
3747 }
3748#endif
3749
3750 VisitRR(this, node, code);
3751}
3752
3753namespace {
3754void VisitRelaxedLaneSelect(InstructionSelectorT* selector, OpIndex node,
3755 InstructionCode code = kIA32Pblendvb) {
3756 IA32OperandGeneratorT g(selector);
3757 // pblendvb/blendvps/blendvpd copies src2 when mask is set, opposite from Wasm
3758 // semantics. node's inputs are: mask, lhs, rhs (determined in
3759 // wasm-compiler.cc).
3760 if (selector->IsSupported(AVX)) {
3761 selector->Emit(code, g.DefineAsRegister(node),
3762 g.UseRegister(selector->input_at(node, 2)),
3763 g.UseRegister(selector->input_at(node, 1)),
3764 g.UseRegister(selector->input_at(node, 0)));
3765 } else {
3766 // SSE4.1 pblendvb/blendvps/blendvpd requires xmm0 to hold the mask as an
3767 // implicit operand.
3768 selector->Emit(code, g.DefineSameAsFirst(node),
3769 g.UseRegister(selector->input_at(node, 2)),
3770 g.UseRegister(selector->input_at(node, 1)),
3771 g.UseFixed(selector->input_at(node, 0), xmm0));
3772 }
3773}
3774} // namespace
3775
3776void InstructionSelectorT::VisitI8x16RelaxedLaneSelect(OpIndex node) {
3777 VisitRelaxedLaneSelect(this, node);
3778}
3779void InstructionSelectorT::VisitI16x8RelaxedLaneSelect(OpIndex node) {
3780 VisitRelaxedLaneSelect(this, node);
3781}
3782void InstructionSelectorT::VisitI32x4RelaxedLaneSelect(OpIndex node) {
3783 VisitRelaxedLaneSelect(this, node, kIA32Blendvps);
3784}
3785void InstructionSelectorT::VisitI64x2RelaxedLaneSelect(OpIndex node) {
3786 VisitRelaxedLaneSelect(this, node, kIA32Blendvpd);
3787}
3788
3789void InstructionSelectorT::VisitF64x2Qfma(OpIndex node) {
3790 VisitRRRR(this, node, kIA32F64x2Qfma);
3791}
3792
3793void InstructionSelectorT::VisitF64x2Qfms(OpIndex node) {
3794 VisitRRRR(this, node, kIA32F64x2Qfms);
3795}
3796
3797void InstructionSelectorT::VisitF32x4Qfma(OpIndex node) {
3798 VisitRRRR(this, node, kIA32F32x4Qfma);
3799}
3800
3801void InstructionSelectorT::VisitF32x4Qfms(OpIndex node) {
3802 VisitRRRR(this, node, kIA32F32x4Qfms);
3803}
3804
3805void InstructionSelectorT::VisitF16x8Qfma(OpIndex node) { UNIMPLEMENTED(); }
3806
3807void InstructionSelectorT::VisitF16x8Qfms(OpIndex node) { UNIMPLEMENTED(); }
3808
3809void InstructionSelectorT::VisitI16x8DotI8x16I7x16S(OpIndex node) {
3810 IA32OperandGeneratorT g(this);
3811 Emit(kIA32I16x8DotI8x16I7x16S, g.DefineAsRegister(node),
3812 g.UseUniqueRegister(this->input_at(node, 0)),
3813 g.UseRegister(this->input_at(node, 1)));
3814}
3815
3816void InstructionSelectorT::VisitI32x4DotI8x16I7x16AddS(OpIndex node) {
3817 IA32OperandGeneratorT g(this);
3818 InstructionOperand temps[] = {g.TempSimd128Register()};
3819 Emit(kIA32I32x4DotI8x16I7x16AddS, g.DefineSameAsInput(node, 2),
3820 g.UseUniqueRegister(this->input_at(node, 0)),
3821 g.UseUniqueRegister(this->input_at(node, 1)),
3822 g.UseUniqueRegister(this->input_at(node, 2)), arraysize(temps), temps);
3823}
3824#endif // V8_ENABLE_WEBASSEMBLY
3825
3826void InstructionSelectorT::AddOutputToSelectContinuation(OperandGeneratorT* g,
3827 int first_input_index,
3828 OpIndex node) {
3829 UNREACHABLE();
3830}
3831
3832// static
3833MachineOperatorBuilder::Flags
3834InstructionSelector::SupportedMachineOperatorFlags() {
3835 MachineOperatorBuilder::Flags flags =
3836 MachineOperatorBuilder::kWord32ShiftIsSafe |
3837 MachineOperatorBuilder::kWord32Ctz | MachineOperatorBuilder::kWord32Rol;
3838 if (CpuFeatures::IsSupported(POPCNT)) {
3839 flags |= MachineOperatorBuilder::kWord32Popcnt;
3840 }
3841 if (CpuFeatures::IsSupported(SSE4_1)) {
3842 flags |= MachineOperatorBuilder::kFloat32RoundDown |
3843 MachineOperatorBuilder::kFloat64RoundDown |
3844 MachineOperatorBuilder::kFloat32RoundUp |
3845 MachineOperatorBuilder::kFloat64RoundUp |
3846 MachineOperatorBuilder::kFloat32RoundTruncate |
3847 MachineOperatorBuilder::kFloat64RoundTruncate |
3848 MachineOperatorBuilder::kFloat32RoundTiesEven |
3849 MachineOperatorBuilder::kFloat64RoundTiesEven;
3850 }
3851 return flags;
3852}
3853
3854// static
3855MachineOperatorBuilder::AlignmentRequirements
3856InstructionSelector::AlignmentRequirements() {
3857 return MachineOperatorBuilder::AlignmentRequirements::
3858 FullUnalignedAccessSupport();
3859}
3860
3861} // namespace compiler
3862} // namespace internal
3863} // namespace v8
interpreter::OperandScale scale
Definition builtins.cc:44
Builtins::Kind kind
Definition builtins.cc:40
static constexpr U encode(T value)
Definition bit-field.h:55
static bool IsSupported(CpuFeature f)
static constexpr MachineType Float64()
static constexpr MachineType Uint8()
constexpr MachineRepresentation representation() const
static constexpr MachineType Int32()
static constexpr MachineType Simd128()
static constexpr MachineType Uint32()
static constexpr MachineType Uint16()
static constexpr MachineType Int16()
static constexpr MachineType Float32()
static constexpr MachineType None()
static constexpr MachineType Int8()
static intptr_t RootRegisterOffsetForExternalReference(Isolate *isolate, const ExternalReference &reference)
int AllocateSpillSlot(int width, int alignment=0, bool is_tagged=false)
Definition frame.h:138
AddressingMode GetEffectiveAddressMemoryOperand(OpIndex node, InstructionOperand inputs[], size_t *input_count, RegisterMode register_mode=RegisterMode::kRegister)
InstructionOperand GetEffectiveIndexOperand(OpIndex index, AddressingMode *mode)
AddressingMode GenerateMemoryOperandInputs(OptionalOpIndex index, int scale, OpIndex base, int32_t displacement, DisplacementMode displacement_mode, InstructionOperand inputs[], size_t *input_count, RegisterMode register_mode=RegisterMode::kRegister)
bool CanBeMemoryOperand(InstructionCode opcode, OpIndex node, OpIndex input, int effect_level)
void VisitWordCompareZero(turboshaft::OpIndex user, turboshaft::OpIndex value, FlagsContinuation *cont)
FlagsCondition GetComparisonFlagCondition(const turboshaft::ComparisonOp &op) const
void EmitPrepareArguments(ZoneVector< PushParameter > *arguments, const CallDescriptor *call_descriptor, turboshaft::OpIndex node)
Instruction * Emit(InstructionCode opcode, InstructionOperand output, size_t temp_count=0, InstructionOperand *temps=nullptr)
bool CanCover(turboshaft::OpIndex user, turboshaft::OpIndex node) const
void EmitMoveFPRToParam(InstructionOperand *op, LinkageLocation location)
Instruction * EmitWithContinuation(InstructionCode opcode, FlagsContinuation *cont)
void VisitLoadTransform(Node *node, Node *value, InstructionCode opcode)
void VisitStackPointerGreaterThan(turboshaft::OpIndex node, FlagsContinuation *cont)
void ConsumeEqualZero(turboshaft::OpIndex *user, turboshaft::OpIndex *value, FlagsContinuation *cont)
turboshaft::OptionalOpIndex FindProjection(turboshaft::OpIndex node, size_t projection_index)
void VisitLoad(turboshaft::OpIndex node, turboshaft::OpIndex value, InstructionCode opcode)
bool IsLive(turboshaft::OpIndex node) const
int GetEffectLevel(turboshaft::OpIndex node) const
void VisitFloat64Ieee754Binop(turboshaft::OpIndex, InstructionCode code)
void EmitMoveParamToFPR(turboshaft::OpIndex node, int index)
void VisitFloat64Ieee754Unop(turboshaft::OpIndex, InstructionCode code)
void EmitPrepareResults(ZoneVector< PushParameter > *results, const CallDescriptor *call_descriptor, turboshaft::OpIndex node)
InstructionOperand UseFixed(turboshaft::OpIndex node, Register reg)
InstructionOperand DefineSameAsFirst(turboshaft::OpIndex node)
InstructionOperand UseUniqueRegister(turboshaft::OpIndex node)
InstructionOperand UseRegister(turboshaft::OpIndex node)
InstructionOperand DefineAsFixed(turboshaft::OpIndex node, Register reg)
InstructionOperand UseRegisterWithMode(turboshaft::OpIndex node, RegisterMode register_mode)
const Operation & Get(V< AnyOrNone > op_idx) const
const underlying_operation_t< Op > * TryCast(V< AnyOrNone > op_idx) const
bool MatchExternalConstant(V< Any > matched, ExternalReference *reference) const
const underlying_operation_t< Op > & Cast(V< AnyOrNone > op_idx) const
bool MatchIntegralWord32Constant(V< Any > matched, uint32_t *constant) const
static constexpr RegisterRepresentation Word32()
static constexpr RegisterRepresentation Float32()
too high values may cause the compiler to set high thresholds for inlining to as much as possible avoid inlined allocation of objects that cannot escape trace load stores from virtual maglev objects use TurboFan fast string builder analyze liveness of environment slots and zap dead values trace TurboFan load elimination emit data about basic block usage in builtins to this enable builtin reordering when run mksnapshot flag for emit warnings when applying builtin profile data verify register allocation in TurboFan randomly schedule instructions to stress dependency tracking enable store store elimination in TurboFan rewrite far to near simulate GC compiler thread race related to allow float parameters to be passed in simulator mode JS Wasm Run additional turbo_optimize_inlined_js_wasm_wrappers enable experimental feedback collection in generic lowering enable Turboshaft s WasmLoadElimination enable Turboshaft s low level load elimination for JS enable Turboshaft s escape analysis for string concatenation use enable Turbolev features that we want to ship in the not too far future trace individual Turboshaft reduction steps trace intermediate Turboshaft reduction steps invocation count threshold for early optimization Enables optimizations which favor memory size over execution speed Enables sampling allocation profiler with X as a sample interval min size of a semi the new space consists of two semi spaces max size of the Collect garbage after Collect garbage after keeps maps alive for< n > old space garbage collections print one detailed trace line in allocation gc speed threshold for starting incremental marking via a task in percent of available threshold for starting incremental marking immediately in percent of available Use a single schedule for determining a marking schedule between JS and C objects schedules the minor GC task with kUserVisible priority max worker number of concurrent for NumberOfWorkerThreads start background threads that allocate memory concurrent_array_buffer_sweeping use parallel threads to clear weak refs in the atomic pause trace progress of the incremental marking trace object counts and memory usage report a tick only when allocated zone memory changes by this amount TracingFlags::gc_stats TracingFlags::gc_stats track native contexts that are expected to be garbage collected verify heap pointers before and after GC memory reducer runs GC with ReduceMemoryFootprint flag Maximum number of memory reducer GCs scheduled Old gen GC speed is computed directly from gc tracer counters Perform compaction on full GCs based on V8 s default heuristics Perform compaction on every full GC Perform code space compaction when finalizing a full GC with stack Stress GC compaction to flush out bugs with moving objects flush of baseline code when it has not been executed recently Use time base code flushing instead of age Use a progress bar to scan large objects in increments when incremental marking is active force incremental marking for small heaps and run it more often force marking at random points between and force scavenge at random points between and reclaim otherwise unreachable unmodified wrapper objects when possible less compaction in non memory reducing mode use high priority threads for concurrent Marking Test mode only flag It allows an unit test to select evacuation candidates use incremental marking for CppHeap cppheap_concurrent_marking c value for membalancer A special constant to balance between memory and space tradeoff The smaller the more memory it uses enable use of SSE4 instructions if available enable use of AVX VNNI instructions if available enable use of POPCNT instruction if available force all emitted branches to be in long mode(MIPS/PPC only)") DEFINE_BOOL(partial_constant_pool
other heap size flags(e.g. initial_heap_size) take precedence") DEFINE_SIZE_T( max_shared_heap_size
Isolate * isolate
#define RR_OP_T_LIST(V)
#define RR_VISITOR(Name, opcode)
#define RO_VISITOR(Name, opcode)
#define SIMD_INT_TYPES(V)
#define SIMD_UNOP_LIST(V)
#define SIMD_BINOP_RRR(V)
int32_t displacement
#define RO_WITH_TEMP_SIMD_OP_T_LIST(V)
#define SIMD_SHIFT_OPCODES_UNIFED_SSE_AVX(V)
#define SIMD_BINOP_LIST(V)
OptionalOpIndex index
#define RO_WITH_TEMP_VISITOR(Name, opcode)
#define RO_OP_T_LIST(V)
#define RRO_FLOAT_OP_T_LIST(V)
int32_t offset
#define FLOAT_UNOP_VISITOR(Name, opcode)
#define RRO_FLOAT_VISITOR(Name, opcode)
#define VISIT_ATOMIC_BINOP(op)
#define FLOAT_UNOP_T_LIST(V)
#define RO_WITH_TEMP_SIMD_VISITOR(Name, opcode)
#define RO_WITH_TEMP_OP_T_LIST(V)
#define SIMD_ALLTRUE_LIST(V)
DisplacementMode displacement_mode
#define SIMD_BINOP_UNIFIED_SSE_AVX_LIST(V)
ZoneVector< RpoNumber > & result
uint32_t const mask
int m
Definition mul-fft.cc:294
int n
Definition mul-fft.cc:296
int int32_t
Definition unicode.cc:40
int32_t WraparoundNeg32(int32_t x)
Definition bits.h:473
int32_t WraparoundAdd32(int32_t lhs, int32_t rhs)
Definition bits.h:468
bool any_of(const C &container, const P &predicate)
auto Reversed(T &t)
Definition iterator.h:105
V8_INLINE const Operation & Get(const Graph &graph, OpIndex index)
Definition graph.h:1231
AtomicMemoryOrder AtomicOrder(InstructionSelectorT *selector, OpIndex node)
void VisitAtomicExchange(InstructionSelectorT *selector, OpIndex node, ArchOpcode opcode)
void VisitWord32PairShift(InstructionSelectorT *selector, InstructionCode opcode, OpIndex node)
void VisitRRRR(InstructionSelectorT *selector, ArchOpcode opcode, OpIndex node)
static void VisitRR(InstructionSelectorT *selector, ArchOpcode opcode, OpIndex node)
std::optional< ScaledIndexMatch > TryMatchScaledIndex(InstructionSelectorT *selector, OpIndex node, bool allow_power_of_two_plus_one)
static Instruction * VisitCompare(InstructionSelectorT *selector, InstructionCode opcode, InstructionOperand left, InstructionOperand right, FlagsContinuationT *cont)
void VisitFloat32Compare(InstructionSelectorT *selector, OpIndex node, FlagsContinuationT *cont)
MachineType AtomicOpType(InstructionSelectorT *selector, OpIndex node)
void VisitStoreCommon(InstructionSelectorT *selector, OpIndex node, StoreRepresentation store_rep, std::optional< AtomicMemoryOrder > atomic_order)
static void VisitBinop(InstructionSelectorT *selector, turboshaft::OpIndex node, InstructionCode opcode, bool has_reverse_opcode, InstructionCode reverse_opcode, FlagsContinuationT *cont)
bool MatchScaledIndex(InstructionSelectorT *selector, OpIndex node, OpIndex *index, int *scale, bool *power_of_two_plus_one)
Instruction * VisitWordCompare(InstructionSelectorT *selector, OpIndex node, InstructionCode opcode, FlagsContinuationT *cont, bool commutative)
void VisitFloat64Compare(InstructionSelectorT *selector, OpIndex node, FlagsContinuationT *cont)
RecordWriteMode WriteBarrierKindToRecordWriteMode(WriteBarrierKind write_barrier_kind)
static void VisitShift(InstructionSelectorT *selector, OpIndex node, ArchOpcode opcode)
void VisitRR(InstructionSelectorT *selector, InstructionCode opcode, OpIndex node)
constexpr int kSimd128Size
Definition globals.h:706
switch(set_by_)
Definition flags.cc:3669
too high values may cause the compiler to set high thresholds for inlining to as much as possible avoid inlined allocation of objects that cannot escape trace load stores from virtual maglev objects use TurboFan fast string builder analyze liveness of environment slots and zap dead values trace TurboFan load elimination emit data about basic block usage in builtins to this enable builtin reordering when run mksnapshot flag for emit warnings when applying builtin profile data verify register allocation in TurboFan randomly schedule instructions to stress dependency tracking enable store store elimination in TurboFan rewrite far to near simulate GC compiler thread race related to allow float parameters to be passed in simulator mode JS Wasm Run additional turbo_optimize_inlined_js_wasm_wrappers enable experimental feedback collection in generic lowering enable Turboshaft s WasmLoadElimination enable Turboshaft s low level load elimination for JS enable Turboshaft s escape analysis for string concatenation use enable Turbolev features that we want to ship in the not too far future trace individual Turboshaft reduction steps trace intermediate Turboshaft reduction steps invocation count threshold for early optimization Enables optimizations which favor memory size over execution speed Enables sampling allocation profiler with X as a sample interval min size of a semi the new space consists of two semi spaces max size of the Collect garbage after Collect garbage after keeps maps alive for< n > old space garbage collections print one detailed trace line in allocation gc speed threshold for starting incremental marking via a task in percent of available threshold for starting incremental marking immediately in percent of available Use a single schedule for determining a marking schedule between JS and C objects schedules the minor GC task with kUserVisible priority max worker number of concurrent for NumberOfWorkerThreads start background threads that allocate memory concurrent_array_buffer_sweeping use parallel threads to clear weak refs in the atomic pause trace progress of the incremental marking trace object counts and memory usage report a tick only when allocated zone memory changes by this amount TracingFlags::gc_stats TracingFlags::gc_stats track native contexts that are expected to be garbage collected verify heap pointers before and after GC memory reducer runs GC with ReduceMemoryFootprint flag Maximum number of memory reducer GCs scheduled Old gen GC speed is computed directly from gc tracer counters Perform compaction on full GCs based on V8 s default heuristics Perform compaction on every full GC Perform code space compaction when finalizing a full GC with stack Stress GC compaction to flush out bugs with moving objects flush of baseline code when it has not been executed recently Use time base code flushing instead of age Use a progress bar to scan large objects in increments when incremental marking is active force incremental marking for small heaps and run it more often force marking at random points between and force scavenge at random points between and reclaim otherwise unreachable unmodified wrapper objects when possible less compaction in non memory reducing mode use high priority threads for concurrent Marking Test mode only flag It allows an unit test to select evacuation candidates use incremental marking for CppHeap cppheap_concurrent_marking c value for membalancer A special constant to balance between memory and space tradeoff The smaller the more memory it uses enable use of SSE4 instructions if available enable use of AVX VNNI instructions if available enable use of POPCNT instruction if available force all emitted branches to be in long mode(MIPS/PPC only)") DEFINE_BOOL(partial_constant_pool
constexpr bool IsAnyTagged(MachineRepresentation rep)
constexpr int kSystemPointerSize
Definition globals.h:410
constexpr bool CanBeTaggedPointer(MachineRepresentation rep)
const int kHeapObjectTag
Definition v8-internal.h:72
V8_EXPORT_PRIVATE FlagValues v8_flags
return value
Definition map-inl.h:893
Tagged< To > Cast(Tagged< From > value, const v8::SourceLocation &loc=INIT_SOURCE_LOCATION_IN_DEBUG)
Definition casting.h:150
Operation
Definition operation.h:43
#define UNREACHABLE()
Definition logging.h:67
#define DCHECK_LE(v1, v2)
Definition logging.h:490
#define DCHECK_NOT_NULL(val)
Definition logging.h:492
#define DCHECK_NE(v1, v2)
Definition logging.h:486
#define DCHECK_GE(v1, v2)
Definition logging.h:488
#define CHECK_EQ(lhs, rhs)
#define UNIMPLEMENTED()
Definition logging.h:66
#define DCHECK(condition)
Definition logging.h:482
#define DCHECK_LT(v1, v2)
Definition logging.h:489
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
#define USE(...)
Definition macros.h:293
#define arraysize(array)
Definition macros.h:67
bool IsExternalConstant(turboshaft::OpIndex node) const
turboshaft::OpIndex input_at(turboshaft::OpIndex node, size_t index) const
base::Vector< const turboshaft::OpIndex > inputs(turboshaft::OpIndex node) const
bool IsLoadOrLoadImmutable(turboshaft::OpIndex node) const
turboshaft::Opcode opcode(turboshaft::OpIndex node) const
int value_input_count(turboshaft::OpIndex node) const
const underlying_operation_t< Op > * TryCast() const
Definition operations.h:990