v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
assembler-arm64.cc
Go to the documentation of this file.
1// Copyright 2013 the V8 project authors. All rights reserved.
2//
3// Redistribution and use in source and binary forms, with or without
4// modification, are permitted provided that the following conditions are
5// met:
6//
7// * Redistributions of source code must retain the above copyright
8// notice, this list of conditions and the following disclaimer.
9// * Redistributions in binary form must reproduce the above
10// copyright notice, this list of conditions and the following
11// disclaimer in the documentation and/or other materials provided
12// with the distribution.
13// * Neither the name of Google Inc. nor the names of its
14// contributors may be used to endorse or promote products derived
15// from this software without specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29#if V8_TARGET_ARCH_ARM64
30
32
33#include "src/base/bits.h"
34#include "src/base/cpu.h"
40
41namespace v8 {
42namespace internal {
43
44namespace {
45
46#ifdef USE_SIMULATOR
47unsigned SimulatorFeaturesFromCommandLine() {
48 if (strcmp(v8_flags.sim_arm64_optional_features, "none") == 0) {
49 return 0;
50 }
51 if (strcmp(v8_flags.sim_arm64_optional_features, "all") == 0) {
52 return (1u << NUMBER_OF_CPU_FEATURES) - 1;
53 }
54 fprintf(
55 stderr,
56 "Error: unrecognised value for --sim-arm64-optional-features ('%s').\n",
57 v8_flags.sim_arm64_optional_features.value());
58 fprintf(stderr,
59 "Supported values are: none\n"
60 " all\n");
61 FATAL("sim-arm64-optional-features");
62}
63#endif // USE_SIMULATOR
64
65constexpr unsigned CpuFeaturesFromCompiler() {
66 unsigned features = 0;
67#if defined(__ARM_FEATURE_JCVT) && !defined(V8_TARGET_OS_IOS)
68 features |= 1u << JSCVT;
69#endif
70#if defined(__ARM_FEATURE_DOTPROD)
71 features |= 1u << DOTPROD;
72#endif
73#if defined(__ARM_FEATURE_SHA3)
74 features |= 1u << SHA3;
75#endif
76#if defined(__ARM_FEATURE_ATOMICS)
77 features |= 1u << LSE;
78#endif
79// There is no __ARM_FEATURE_PMULL macro; instead, __ARM_FEATURE_AES
80// covers the FEAT_PMULL feature too.
81#if defined(__ARM_FEATURE_AES)
82 features |= 1u << PMULL1Q;
83#endif
84 return features;
85}
86
87constexpr unsigned CpuFeaturesFromTargetOS() {
88 unsigned features = 0;
89#if defined(V8_TARGET_OS_MACOS) && !defined(V8_TARGET_OS_IOS)
90 // TODO(v8:13004): Detect if an iPhone is new enough to support jscvt, dotprot
91 // and lse.
92 features |= 1u << JSCVT;
93 features |= 1u << DOTPROD;
94 features |= 1u << LSE;
95 features |= 1u << PMULL1Q;
96#endif
97 return features;
98}
99
100} // namespace
101
102// -----------------------------------------------------------------------------
103// CpuFeatures implementation.
104bool CpuFeatures::SupportsWasmSimd128() { return true; }
105
106void CpuFeatures::ProbeImpl(bool cross_compile) {
107 // Only use statically determined features for cross compile (snapshot).
108 if (cross_compile) {
109 supported_ |= CpuFeaturesFromCompiler();
110 supported_ |= CpuFeaturesFromTargetOS();
111 return;
112 }
113
114 // We used to probe for coherent cache support, but on older CPUs it
115 // causes crashes (crbug.com/524337), and newer CPUs don't even have
116 // the feature any more.
117
118#ifdef USE_SIMULATOR
119 supported_ |= SimulatorFeaturesFromCommandLine();
120#else
121 // Probe for additional features at runtime.
122 base::CPU cpu;
123 unsigned runtime = 0;
124 if (cpu.has_jscvt()) {
125 runtime |= 1u << JSCVT;
126 }
127 if (cpu.has_dot_prod()) {
128 runtime |= 1u << DOTPROD;
129 }
130 if (cpu.has_sha3()) {
131 runtime |= 1u << SHA3;
132 }
133 if (cpu.has_lse()) {
134 runtime |= 1u << LSE;
135 }
136 if (cpu.has_pmull1q()) {
137 runtime |= 1u << PMULL1Q;
138 }
139 if (cpu.has_fp16()) {
140 runtime |= 1u << FP16;
141 }
142
143 // Use the best of the features found by CPU detection and those inferred from
144 // the build system.
145 supported_ |= CpuFeaturesFromCompiler();
146 supported_ |= runtime;
147#endif // USE_SIMULATOR
148
149 // Set a static value on whether Simd is supported.
150 // This variable is only used for certain archs to query SupportWasmSimd128()
151 // at runtime in builtins using an extern ref. Other callers should use
152 // CpuFeatures::SupportWasmSimd128().
154}
155
158
159// -----------------------------------------------------------------------------
160// CPURegList utilities.
161
162CPURegister CPURegList::PopLowestIndex() {
163 if (IsEmpty()) {
164 return NoCPUReg;
165 }
167 DCHECK((1LL << index) & list_);
168 Remove(index);
169 return CPURegister::Create(index, size_, type_);
170}
171
172CPURegister CPURegList::PopHighestIndex() {
173 if (IsEmpty()) {
174 return NoCPUReg;
175 }
177 index = kRegListSizeInBits - 1 - index;
178 DCHECK((1LL << index) & list_);
179 Remove(index);
180 return CPURegister::Create(index, size_, type_);
181}
182
183void CPURegList::Align() {
184 // Use padreg, if necessary, to maintain stack alignment.
185 if (Count() % 2 != 0) {
186 if (IncludesAliasOf(padreg)) {
187 Remove(padreg);
188 } else {
190 }
191 }
192
193 DCHECK_EQ(Count() % 2, 0);
194}
195
196CPURegList CPURegList::GetCalleeSaved(int size) {
197 return CPURegList(CPURegister::kRegister, size, 19, 28);
198}
199
200CPURegList CPURegList::GetCalleeSavedV(int size) {
201 return CPURegList(CPURegister::kVRegister, size, 8, 15);
202}
203
204CPURegList CPURegList::GetCallerSaved(int size) {
205 // x18 is the platform register and is reserved for the use of platform ABIs.
206 // Registers x0-x17 are caller-saved.
207 CPURegList list = CPURegList(CPURegister::kRegister, size, 0, 17);
208 return list;
209}
210
211CPURegList CPURegList::GetCallerSavedV(int size) {
212 // Registers d0-d7 and d16-d31 are caller-saved.
214 list.Combine(CPURegList(CPURegister::kVRegister, size, 16, 31));
215 return list;
216}
217
218// -----------------------------------------------------------------------------
219// Implementation of RelocInfo
220
221const int RelocInfo::kApplyMask =
226
228 // The deserializer needs to know whether a pointer is specially coded. Being
229 // specially coded on ARM64 means that it is an immediate branch.
230 Instruction* instr = reinterpret_cast<Instruction*>(pc_);
231 if (instr->IsLdrLiteralX()) {
232 return false;
233 } else {
234 DCHECK(instr->IsBranchAndLink() || instr->IsUnconditionalBranch());
235 return true;
236 }
237}
238
240 Instruction* instr = reinterpret_cast<Instruction*>(pc_);
242 return instr->IsLdrLiteralX() ||
243 (COMPRESS_POINTERS_BOOL && instr->IsLdrLiteralW());
244}
245
246uint32_t RelocInfo::wasm_call_tag() const {
248 Instruction* instr = reinterpret_cast<Instruction*>(pc_);
249 if (instr->IsLdrLiteralX()) {
250 return static_cast<uint32_t>(
251 Memory<Address>(Assembler::target_pointer_address_at(pc_)));
252 } else {
253 DCHECK(instr->IsBranchAndLink() || instr->IsUnconditionalBranch());
254 return static_cast<uint32_t>(instr->ImmPCOffset() / kInstrSize);
255 }
256}
257
258bool AreAliased(const CPURegister& reg1, const CPURegister& reg2,
259 const CPURegister& reg3, const CPURegister& reg4,
260 const CPURegister& reg5, const CPURegister& reg6,
261 const CPURegister& reg7, const CPURegister& reg8) {
262 int number_of_valid_regs = 0;
263 int number_of_valid_fpregs = 0;
264
265 uint64_t unique_regs = 0;
266 uint64_t unique_fpregs = 0;
267
268 const CPURegister regs[] = {reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8};
269
270 for (unsigned i = 0; i < arraysize(regs); i++) {
271 if (regs[i].IsRegister()) {
272 number_of_valid_regs++;
273 unique_regs |= (uint64_t{1} << regs[i].code());
274 } else if (regs[i].IsVRegister()) {
275 number_of_valid_fpregs++;
276 unique_fpregs |= (uint64_t{1} << regs[i].code());
277 } else {
278 DCHECK(!regs[i].is_valid());
279 }
280 }
281
282 int number_of_unique_regs =
283 CountSetBits(unique_regs, sizeof(unique_regs) * kBitsPerByte);
284 int number_of_unique_fpregs =
285 CountSetBits(unique_fpregs, sizeof(unique_fpregs) * kBitsPerByte);
286
287 DCHECK(number_of_valid_regs >= number_of_unique_regs);
288 DCHECK(number_of_valid_fpregs >= number_of_unique_fpregs);
289
290 return (number_of_valid_regs != number_of_unique_regs) ||
291 (number_of_valid_fpregs != number_of_unique_fpregs);
292}
293
294bool AreSameSizeAndType(const CPURegister& reg1, const CPURegister& reg2,
295 const CPURegister& reg3, const CPURegister& reg4,
296 const CPURegister& reg5, const CPURegister& reg6,
297 const CPURegister& reg7, const CPURegister& reg8) {
298 DCHECK(reg1.is_valid());
299 bool match = true;
300 match &= !reg2.is_valid() || reg2.IsSameSizeAndType(reg1);
301 match &= !reg3.is_valid() || reg3.IsSameSizeAndType(reg1);
302 match &= !reg4.is_valid() || reg4.IsSameSizeAndType(reg1);
303 match &= !reg5.is_valid() || reg5.IsSameSizeAndType(reg1);
304 match &= !reg6.is_valid() || reg6.IsSameSizeAndType(reg1);
305 match &= !reg7.is_valid() || reg7.IsSameSizeAndType(reg1);
306 match &= !reg8.is_valid() || reg8.IsSameSizeAndType(reg1);
307 return match;
308}
309
310bool AreSameFormat(const Register& reg1, const Register& reg2,
311 const Register& reg3, const Register& reg4) {
312 DCHECK(reg1.is_valid());
313 return (!reg2.is_valid() || reg2.IsSameSizeAndType(reg1)) &&
314 (!reg3.is_valid() || reg3.IsSameSizeAndType(reg1)) &&
315 (!reg4.is_valid() || reg4.IsSameSizeAndType(reg1));
316}
317
318bool AreSameFormat(const VRegister& reg1, const VRegister& reg2,
319 const VRegister& reg3, const VRegister& reg4) {
320 DCHECK(reg1.is_valid());
321 return (!reg2.is_valid() || reg2.IsSameFormat(reg1)) &&
322 (!reg3.is_valid() || reg3.IsSameFormat(reg1)) &&
323 (!reg4.is_valid() || reg4.IsSameFormat(reg1));
324}
325
326bool AreConsecutive(const CPURegister& reg1, const CPURegister& reg2,
327 const CPURegister& reg3, const CPURegister& reg4) {
328 DCHECK(reg1.is_valid());
329
330 if (!reg2.is_valid()) {
331 DCHECK(!reg3.is_valid() && !reg4.is_valid());
332 return true;
333 } else if (reg2.code() != ((reg1.code() + 1) % (reg1.MaxCode() + 1))) {
334 return false;
335 }
336
337 if (!reg3.is_valid()) {
338 DCHECK(!reg4.is_valid());
339 return true;
340 } else if (reg3.code() != ((reg2.code() + 1) % (reg1.MaxCode() + 1))) {
341 return false;
342 }
343
344 if (!reg4.is_valid()) {
345 return true;
346 } else if (reg4.code() != ((reg3.code() + 1) % (reg1.MaxCode() + 1))) {
347 return false;
348 }
349
350 return true;
351}
352
353bool AreEven(const CPURegister& reg1, const CPURegister& reg2,
354 const CPURegister& reg3, const CPURegister& reg4,
355 const CPURegister& reg5, const CPURegister& reg6,
356 const CPURegister& reg7, const CPURegister& reg8) {
357 DCHECK(reg1.is_valid());
358 bool even = reg1.IsEven();
359 even &= !reg2.is_valid() || reg2.IsEven();
360 even &= !reg3.is_valid() || reg3.IsEven();
361 even &= !reg4.is_valid() || reg4.IsEven();
362 even &= !reg5.is_valid() || reg5.IsEven();
363 even &= !reg6.is_valid() || reg6.IsEven();
364 even &= !reg7.is_valid() || reg7.IsEven();
365 even &= !reg8.is_valid() || reg8.IsEven();
366 return even;
367}
368
369bool Operand::NeedsRelocation(const Assembler* assembler) const {
371
373 return assembler->options().record_reloc_info_for_serialization;
374 }
375
376 return !RelocInfo::IsNoInfo(rmode);
377}
378
379// Assembler
381 const AssemblerOptions& options,
382 std::unique_ptr<AssemblerBuffer> buffer)
383 : AssemblerBase(options, std::move(buffer)),
384 zone_(zone),
385 unresolved_branches_(zone_.get()),
386 constpool_(this) {
387 Reset();
388
389#if defined(V8_OS_WIN)
390 if (options.collect_win64_unwind_info) {
391 xdata_encoder_ = std::make_unique<win64_unwindinfo::XdataEncoder>(*this);
392 }
393#endif
394}
395
396Assembler::~Assembler() {
397 DCHECK(constpool_.IsEmpty());
398 DCHECK_EQ(veneer_pool_blocked_nesting_, 0);
399}
400
401void Assembler::AbortedCodeGeneration() { constpool_.Clear(); }
402
403void Assembler::Reset() {
404#ifdef DEBUG
405 DCHECK((pc_ >= buffer_start_) && (pc_ < buffer_start_ + buffer_->size()));
406 DCHECK_EQ(veneer_pool_blocked_nesting_, 0);
407 DCHECK(unresolved_branches_.empty());
408 memset(buffer_start_, 0, pc_ - buffer_start_);
409#endif
410 pc_ = buffer_start_;
411 reloc_info_writer.Reposition(buffer_start_ + buffer_->size(), pc_);
412 constpool_.Clear();
413 constpool_.SetNextCheckIn(ConstantPool::kCheckInterval);
414 next_veneer_pool_check_ = kMaxInt;
415}
416
417#if defined(V8_OS_WIN)
418win64_unwindinfo::BuiltinUnwindInfo Assembler::GetUnwindInfo() const {
419 DCHECK(options().collect_win64_unwind_info);
420 DCHECK_NOT_NULL(xdata_encoder_);
421 return xdata_encoder_->unwinding_info();
422}
423#endif
424
425void Assembler::AllocateAndInstallRequestedHeapNumbers(LocalIsolate* isolate) {
426 DCHECK_IMPLIES(isolate == nullptr, heap_number_requests_.empty());
427 for (auto& request : heap_number_requests_) {
428 Address pc = reinterpret_cast<Address>(buffer_start_) + request.offset();
429 Handle<HeapObject> object =
430 isolate->factory()->NewHeapNumber<AllocationType::kOld>(
431 request.heap_number());
432 EmbeddedObjectIndex index = AddEmbeddedObject(object);
433 set_embedded_object_index_referenced_from(pc, index);
434 }
435}
436
437void Assembler::GetCode(Isolate* isolate, CodeDesc* desc) {
438 GetCode(isolate->main_thread_local_isolate(), desc);
439}
440void Assembler::GetCode(LocalIsolate* isolate, CodeDesc* desc,
441 SafepointTableBuilderBase* safepoint_table_builder,
442 int handler_table_offset) {
443 // As a crutch to avoid having to add manual Align calls wherever we use a
444 // raw workflow to create InstructionStream objects (mostly in tests), add
445 // another Align call here. It does no harm - the end of the InstructionStream
446 // object is aligned to the (larger) kCodeAlignment anyways.
447 // TODO(jgruber): Consider moving responsibility for proper alignment to
448 // metadata table builders (safepoint, handler, constant pool, code
449 // comments).
450 DataAlign(InstructionStream::kMetadataAlignment);
451
452 // Emit constant pool if necessary.
453 ForceConstantPoolEmissionWithoutJump();
454 DCHECK(constpool_.IsEmpty());
455
456 int code_comments_size = WriteCodeComments();
457
458 AllocateAndInstallRequestedHeapNumbers(isolate);
459
460 // Set up code descriptor.
461 // TODO(jgruber): Reconsider how these offsets and sizes are maintained up to
462 // this point to make CodeDesc initialization less fiddly.
463
464 static constexpr int kConstantPoolSize = 0;
465 static constexpr int kBuiltinJumpTableInfoSize = 0;
466 const int instruction_size = pc_offset();
467 const int builtin_jump_table_info_offset =
468 instruction_size - kBuiltinJumpTableInfoSize;
469 const int code_comments_offset =
470 builtin_jump_table_info_offset - code_comments_size;
471 const int constant_pool_offset = code_comments_offset - kConstantPoolSize;
472 const int handler_table_offset2 = (handler_table_offset == kNoHandlerTable)
473 ? constant_pool_offset
474 : handler_table_offset;
475 const int safepoint_table_offset =
476 (safepoint_table_builder == kNoSafepointTable)
477 ? handler_table_offset2
478 : safepoint_table_builder->safepoint_table_offset();
479 const int reloc_info_offset =
480 static_cast<int>(reloc_info_writer.pos() - buffer_->start());
481 CodeDesc::Initialize(desc, this, safepoint_table_offset,
482 handler_table_offset2, constant_pool_offset,
483 code_comments_offset, builtin_jump_table_info_offset,
484 reloc_info_offset);
485}
486
487void Assembler::Align(int m) {
488 // If not, the loop below won't terminate.
489 DCHECK(IsAligned(pc_offset(), kInstrSize));
490 DCHECK(m >= kInstrSize && base::bits::IsPowerOfTwo(m));
491 while ((pc_offset() & (m - 1)) != 0) {
492 nop();
493 }
494}
495
496void Assembler::CodeTargetAlign() {
497 // Preferred alignment of jump targets on some ARM chips.
498#if !defined(V8_TARGET_OS_MACOS)
499 Align(8);
500#endif
501}
502
503void Assembler::CheckLabelLinkChain(Label const* label) {
504#ifdef DEBUG
505 if (label->is_linked()) {
506 static const int kMaxLinksToCheck = 64; // Avoid O(n2) behaviour.
507 int links_checked = 0;
508 int64_t linkoffset = label->pos();
509 bool end_of_chain = false;
510 while (!end_of_chain) {
511 if (++links_checked > kMaxLinksToCheck) break;
512 Instruction* link = InstructionAt(linkoffset);
513 int64_t linkpcoffset = link->ImmPCOffset();
514 int64_t prevlinkoffset = linkoffset + linkpcoffset;
515
516 end_of_chain = (linkoffset == prevlinkoffset);
517 linkoffset = linkoffset + linkpcoffset;
518 }
519 }
520#endif
521}
522
523void Assembler::RemoveBranchFromLabelLinkChain(Instruction* branch,
524 Label* label,
525 Instruction* label_veneer) {
526 DCHECK(label->is_linked());
527
528 CheckLabelLinkChain(label);
529
530 Instruction* link = InstructionAt(label->pos());
531 Instruction* prev_link = link;
532 Instruction* next_link;
533
534 if (link != branch) {
535 int i = static_cast<int>(InstructionOffset(branch));
536 // Currently, we don't support adr instructions sharing labels with
537 // branches in the link chain.
538 DCHECK(branch_link_chain_back_edge_.contains(i));
539 prev_link = InstructionAt(branch_link_chain_back_edge_.at(i));
540 link = branch;
541 }
542
543 DCHECK(branch == link);
544 next_link = branch->ImmPCOffsetTarget();
545
546 if (branch == prev_link) {
547 // The branch is the first instruction in the chain.
548 if (branch == next_link) {
549 // It is also the last instruction in the chain, so it is the only branch
550 // currently referring to this label.
551 //
552 // Label -> this branch -> start
553 label->Unuse();
554 } else {
555 // Label -> this branch -> 1+ branches -> start
556 label->link_to(static_cast<int>(InstructionOffset(next_link)));
557 branch_link_chain_back_edge_.erase(
558 static_cast<int>(InstructionOffset(next_link)));
559 }
560 } else if (branch == next_link) {
561 // The branch is the last (but not also the first) instruction in the chain.
562 //
563 // Label -> 1+ branches -> this branch -> start
564 prev_link->SetImmPCOffsetTarget(zone(), options(), prev_link);
565 branch_link_chain_back_edge_.erase(
566 static_cast<int>(InstructionOffset(branch)));
567 } else {
568 // The branch is in the middle of the chain.
569 //
570 // Label -> 1+ branches -> this branch -> 1+ branches -> start
571 int n = static_cast<int>(InstructionOffset(next_link));
572 if (branch_link_chain_back_edge_.contains(n)) {
573 // Update back edge such that the branch after this branch points to the
574 // branch before it.
575 branch_link_chain_back_edge_[n] =
576 static_cast<int>(InstructionOffset(prev_link));
577 branch_link_chain_back_edge_.erase(
578 static_cast<int>(InstructionOffset(branch)));
579 }
580
581 if (prev_link->IsTargetInImmPCOffsetRange(next_link)) {
582 prev_link->SetImmPCOffsetTarget(zone(), options(), next_link);
583 } else if (label_veneer != nullptr) {
584 // Use the veneer for all previous links in the chain.
585 prev_link->SetImmPCOffsetTarget(zone(), options(), prev_link);
586
587 bool end_of_chain = false;
588 link = next_link;
589 while (!end_of_chain) {
590 next_link = link->ImmPCOffsetTarget();
591 end_of_chain = (link == next_link);
592 link->SetImmPCOffsetTarget(zone(), options(), label_veneer);
593 // {link} is now resolved; remove it from {unresolved_branches_} so
594 // we won't later try to process it again, which would fail because
595 // by walking the chain of its label's unresolved branch instructions,
596 // we won't find it: {prev_link} is now the end of that chain after
597 // its update above.
598 if (link->IsCondBranchImm() || link->IsCompareBranch()) {
599 static_assert(Instruction::ImmBranchRange(CondBranchType) ==
600 Instruction::ImmBranchRange(CompareBranchType));
601 int max_reachable_pc = static_cast<int>(InstructionOffset(link)) +
602 Instruction::ImmBranchRange(CondBranchType);
603 unresolved_branches_.erase(max_reachable_pc);
604 } else if (link->IsTestBranch()) {
605 // Add 1 to account for branch type tag bit.
606 int max_reachable_pc = static_cast<int>(InstructionOffset(link)) +
607 Instruction::ImmBranchRange(TestBranchType) +
608 1;
609 unresolved_branches_.erase(max_reachable_pc);
610 } else {
611 // Other branch types are not handled by veneers.
612 }
613 link = next_link;
614 }
615 } else {
616 // The assert below will fire.
617 // Some other work could be attempted to fix up the chain, but it would be
618 // rather complicated. If we crash here, we may want to consider using an
619 // other mechanism than a chain of branches.
620 //
621 // Note that this situation currently should not happen, as we always call
622 // this function with a veneer to the target label.
623 // However this could happen with a MacroAssembler in the following state:
624 // [previous code]
625 // B(label);
626 // [20KB code]
627 // Tbz(label); // First tbz. Pointing to unconditional branch.
628 // [20KB code]
629 // Tbz(label); // Second tbz. Pointing to the first tbz.
630 // [more code]
631 // and this function is called to remove the first tbz from the label link
632 // chain. Since tbz has a range of +-32KB, the second tbz cannot point to
633 // the unconditional branch.
634 CHECK(prev_link->IsTargetInImmPCOffsetRange(next_link));
635 UNREACHABLE();
636 }
637 }
638
639 CheckLabelLinkChain(label);
640}
641
642void Assembler::bind(Label* label) {
643 // Bind label to the address at pc_. All instructions (most likely branches)
644 // that are linked to this label will be updated to point to the newly-bound
645 // label.
646
647 DCHECK(!label->is_near_linked());
648 DCHECK(!label->is_bound());
649
650 DeleteUnresolvedBranchInfoForLabel(label);
651
652 // If the label is linked, the link chain looks something like this:
653 //
654 // |--I----I-------I-------L
655 // |---------------------->| pc_offset
656 // |-------------->| linkoffset = label->pos()
657 // |<------| link->ImmPCOffset()
658 // |------>| prevlinkoffset = linkoffset + link->ImmPCOffset()
659 //
660 // On each iteration, the last link is updated and then removed from the
661 // chain until only one remains. At that point, the label is bound.
662 //
663 // If the label is not linked, no preparation is required before binding.
664 while (label->is_linked()) {
665 int linkoffset = label->pos();
666 Instruction* link = InstructionAt(linkoffset);
667 int prevlinkoffset = linkoffset + static_cast<int>(link->ImmPCOffset());
668
669 CheckLabelLinkChain(label);
670
671 DCHECK_GE(linkoffset, 0);
672 DCHECK(linkoffset < pc_offset());
673 DCHECK((linkoffset > prevlinkoffset) ||
674 (linkoffset - prevlinkoffset == kStartOfLabelLinkChain));
675 DCHECK_GE(prevlinkoffset, 0);
676
677 // Update the link to point to the label.
678 if (link->IsUnresolvedInternalReference()) {
679 // Internal references do not get patched to an instruction but directly
680 // to an address.
681 internal_reference_positions_.push_back(linkoffset);
682 memcpy(link, &pc_, kSystemPointerSize);
683 } else {
684 link->SetImmPCOffsetTarget(zone(), options(),
685 reinterpret_cast<Instruction*>(pc_));
686
687 // Discard back edge data for this link.
688 branch_link_chain_back_edge_.erase(
689 static_cast<int>(InstructionOffset(link)));
690 }
691
692 // Link the label to the previous link in the chain.
693 if (linkoffset - prevlinkoffset == kStartOfLabelLinkChain) {
694 // We hit kStartOfLabelLinkChain, so the chain is fully processed.
695 label->Unuse();
696 } else {
697 // Update the label for the next iteration.
698 label->link_to(prevlinkoffset);
699 }
700 }
701 label->bind_to(pc_offset());
702
703 DCHECK(label->is_bound());
704 DCHECK(!label->is_linked());
705}
706
707int Assembler::LinkAndGetByteOffsetTo(Label* label) {
708 DCHECK_EQ(sizeof(*pc_), 1);
709 CheckLabelLinkChain(label);
710
711 int offset;
712 if (label->is_bound()) {
713 // The label is bound, so it does not need to be updated. Referring
714 // instructions must link directly to the label as they will not be
715 // updated.
716 //
717 // In this case, label->pos() returns the offset of the label from the
718 // start of the buffer.
719 //
720 // Note that offset can be zero for self-referential instructions. (This
721 // could be useful for ADR, for example.)
722 offset = label->pos() - pc_offset();
723 DCHECK_LE(offset, 0);
724 } else {
725 if (label->is_linked()) {
726 // The label is linked, so the referring instruction should be added onto
727 // the end of the label's link chain.
728 //
729 // In this case, label->pos() returns the offset of the last linked
730 // instruction from the start of the buffer.
731 offset = label->pos() - pc_offset();
732 DCHECK_NE(offset, kStartOfLabelLinkChain);
733 // Note that the offset here needs to be PC-relative only so that the
734 // first instruction in a buffer can link to an unbound label. Otherwise,
735 // the offset would be 0 for this case, and 0 is reserved for
736 // kStartOfLabelLinkChain.
737 } else {
738 // The label is unused, so it now becomes linked and the referring
739 // instruction is at the start of the new link chain.
740 offset = kStartOfLabelLinkChain;
741 }
742 // The instruction at pc is now the last link in the label's chain.
743 label->link_to(pc_offset());
744 }
745
746 return offset;
747}
748
749void Assembler::DeleteUnresolvedBranchInfoForLabelTraverse(Label* label) {
750 DCHECK(label->is_linked());
751 CheckLabelLinkChain(label);
752
753 int link_offset = label->pos();
754 int link_pcoffset;
755 bool end_of_chain = false;
756
757 while (!end_of_chain) {
758 Instruction* link = InstructionAt(link_offset);
759 int max_reachable_pc = static_cast<int>(InstructionOffset(link));
760
761 // ADR instructions and unconditional branches are not handled by veneers.
762 if (link->IsCondBranchImm() || link->IsCompareBranch()) {
763 static_assert(Instruction::ImmBranchRange(CondBranchType) ==
764 Instruction::ImmBranchRange(CompareBranchType));
765 max_reachable_pc += Instruction::ImmBranchRange(CondBranchType);
766 unresolved_branches_.erase(max_reachable_pc);
767 link_pcoffset = link->ImmCondBranch() * kInstrSize;
768 } else if (link->IsTestBranch()) {
769 // Add one to account for branch type tag bit.
770 max_reachable_pc += Instruction::ImmBranchRange(TestBranchType) + 1;
771 unresolved_branches_.erase(max_reachable_pc);
772 link_pcoffset = link->ImmTestBranch() * kInstrSize;
773 } else if (link->IsUncondBranchImm()) {
774 link_pcoffset = link->ImmUncondBranch() * kInstrSize;
775 } else {
776 link_pcoffset = static_cast<int>(link->ImmPCOffset());
777 }
778
779 end_of_chain = (link_pcoffset == 0);
780 link_offset = link_offset + link_pcoffset;
781 }
782}
783
784void Assembler::DeleteUnresolvedBranchInfoForLabel(Label* label) {
785 if (unresolved_branches_.empty()) {
786 DCHECK_EQ(next_veneer_pool_check_, kMaxInt);
787 return;
788 }
789
790 if (label->is_linked()) {
791 // Branches to this label will be resolved when the label is bound, normally
792 // just after all the associated info has been deleted.
793 DeleteUnresolvedBranchInfoForLabelTraverse(label);
794 }
795 if (unresolved_branches_.empty()) {
796 next_veneer_pool_check_ = kMaxInt;
797 } else {
798 next_veneer_pool_check_ =
799 unresolved_branches_first_limit() - kVeneerDistanceCheckMargin;
800 }
801}
802
803bool Assembler::IsConstantPoolAt(Instruction* instr) {
804 // The constant pool marker is made of two instructions. These instructions
805 // will never be emitted by the JIT, so checking for the first one is enough:
806 // 0: ldr xzr, #<size of pool>
807 bool result = instr->IsLdrLiteralX() && (instr->Rt() == kZeroRegCode);
808
809 // It is still worth asserting the marker is complete.
810 // 4: blr xzr
811 DCHECK(!result || (instr->following()->IsBranchAndLinkToRegister() &&
812 instr->following()->Rn() == kZeroRegCode));
813
814 return result;
815}
816
817int Assembler::ConstantPoolSizeAt(Instruction* instr) {
818#ifdef USE_SIMULATOR
819 // Assembler::debug() embeds constants directly into the instruction stream.
820 // Although this is not a genuine constant pool, treat it like one to avoid
821 // disassembling the constants.
822 if ((instr->Mask(ExceptionMask) == HLT) &&
823 (instr->ImmException() == kImmExceptionIsDebug)) {
824 const char* message = reinterpret_cast<const char*>(
825 instr->InstructionAtOffset(kDebugMessageOffset));
826 int size = static_cast<int>(kDebugMessageOffset + strlen(message) + 1);
827 return RoundUp(size, kInstrSize) / kInstrSize;
828 }
829 // Same for printf support, see MacroAssembler::CallPrintf().
830 if ((instr->Mask(ExceptionMask) == HLT) &&
831 (instr->ImmException() == kImmExceptionIsPrintf)) {
832 return kPrintfLength / kInstrSize;
833 }
834#endif
835 if (IsConstantPoolAt(instr)) {
836 return instr->ImmLLiteral();
837 } else {
838 return -1;
839 }
840}
841
842void Assembler::EmitPoolGuard() {
843 // We must generate only one instruction as this is used in scopes that
844 // control the size of the code generated.
845 Emit(BLR | Rn(xzr));
846}
847
848void Assembler::StartBlockVeneerPool() { ++veneer_pool_blocked_nesting_; }
849
850void Assembler::EndBlockVeneerPool() {
851 if (--veneer_pool_blocked_nesting_ == 0) {
852 // Check the veneer pool hasn't been blocked for too long.
853 DCHECK(unresolved_branches_.empty() ||
854 (pc_offset() < unresolved_branches_first_limit()));
855 }
856}
857
858void Assembler::br(const Register& xn) {
859 DCHECK(xn.Is64Bits());
860 Emit(BR | Rn(xn));
861}
862
863void Assembler::blr(const Register& xn) {
864 DCHECK(xn.Is64Bits());
865 // The pattern 'blr xzr' is used as a guard to detect when execution falls
866 // through the constant pool. It should not be emitted.
867 DCHECK_NE(xn, xzr);
868 Emit(BLR | Rn(xn));
869}
870
871void Assembler::ret(const Register& xn) {
872 DCHECK(xn.Is64Bits());
873 Emit(RET | Rn(xn));
874}
875
876void Assembler::b(int imm26) { Emit(B | ImmUncondBranch(imm26)); }
877
878void Assembler::b(Label* label) {
879 b(LinkAndGetBranchInstructionOffsetTo(label));
880}
881
882void Assembler::b(int imm19, Condition cond) {
883 Emit(B_cond | ImmCondBranch(imm19) | cond);
884}
885
886void Assembler::b(Label* label, Condition cond) {
887 b(LinkAndGetBranchInstructionOffsetTo(label), cond);
888}
889
890void Assembler::bl(int imm26) { Emit(BL | ImmUncondBranch(imm26)); }
891
892void Assembler::bl(Label* label) {
893 bl(LinkAndGetBranchInstructionOffsetTo(label));
894}
895
896void Assembler::cbz(const Register& rt, int imm19) {
897 Emit(SF(rt) | CBZ | ImmCmpBranch(imm19) | Rt(rt));
898}
899
900void Assembler::cbz(const Register& rt, Label* label) {
901 cbz(rt, LinkAndGetBranchInstructionOffsetTo(label));
902}
903
904void Assembler::cbnz(const Register& rt, int imm19) {
905 Emit(SF(rt) | CBNZ | ImmCmpBranch(imm19) | Rt(rt));
906}
907
908void Assembler::cbnz(const Register& rt, Label* label) {
909 cbnz(rt, LinkAndGetBranchInstructionOffsetTo(label));
910}
911
912void Assembler::tbz(const Register& rt, unsigned bit_pos, int imm14) {
913 DCHECK(rt.Is64Bits() || (rt.Is32Bits() && (bit_pos < kWRegSizeInBits)));
914 Emit(TBZ | ImmTestBranchBit(bit_pos) | ImmTestBranch(imm14) | Rt(rt));
915}
916
917void Assembler::tbz(const Register& rt, unsigned bit_pos, Label* label) {
918 tbz(rt, bit_pos, LinkAndGetBranchInstructionOffsetTo(label));
919}
920
921void Assembler::tbnz(const Register& rt, unsigned bit_pos, int imm14) {
922 DCHECK(rt.Is64Bits() || (rt.Is32Bits() && (bit_pos < kWRegSizeInBits)));
923 Emit(TBNZ | ImmTestBranchBit(bit_pos) | ImmTestBranch(imm14) | Rt(rt));
924}
925
926void Assembler::tbnz(const Register& rt, unsigned bit_pos, Label* label) {
927 tbnz(rt, bit_pos, LinkAndGetBranchInstructionOffsetTo(label));
928}
929
930void Assembler::adr(const Register& rd, int imm21) {
931 DCHECK(rd.Is64Bits());
932 Emit(ADR | ImmPCRelAddress(imm21) | Rd(rd));
933}
934
935void Assembler::adr(const Register& rd, Label* label) {
936 adr(rd, LinkAndGetByteOffsetTo(label));
937}
938
939void Assembler::nop(NopMarkerTypes n) {
940 DCHECK((FIRST_NOP_MARKER <= n) && (n <= LAST_NOP_MARKER));
941 mov(Register::XRegFromCode(n), Register::XRegFromCode(n));
942}
943
944void Assembler::add(const Register& rd, const Register& rn,
945 const Operand& operand) {
946 AddSub(rd, rn, operand, LeaveFlags, ADD);
947}
948
949void Assembler::adds(const Register& rd, const Register& rn,
950 const Operand& operand) {
951 AddSub(rd, rn, operand, SetFlags, ADD);
952}
953
954void Assembler::cmn(const Register& rn, const Operand& operand) {
955 Register zr = AppropriateZeroRegFor(rn);
956 adds(zr, rn, operand);
957}
958
959void Assembler::sub(const Register& rd, const Register& rn,
960 const Operand& operand) {
961 AddSub(rd, rn, operand, LeaveFlags, SUB);
962}
963
964void Assembler::subs(const Register& rd, const Register& rn,
965 const Operand& operand) {
966 AddSub(rd, rn, operand, SetFlags, SUB);
967}
968
969void Assembler::cmp(const Register& rn, const Operand& operand) {
970 Register zr = AppropriateZeroRegFor(rn);
971 subs(zr, rn, operand);
972}
973
974void Assembler::neg(const Register& rd, const Operand& operand) {
975 Register zr = AppropriateZeroRegFor(rd);
976 sub(rd, zr, operand);
977}
978
979void Assembler::negs(const Register& rd, const Operand& operand) {
980 Register zr = AppropriateZeroRegFor(rd);
981 subs(rd, zr, operand);
982}
983
984void Assembler::adc(const Register& rd, const Register& rn,
985 const Operand& operand) {
986 AddSubWithCarry(rd, rn, operand, LeaveFlags, ADC);
987}
988
989void Assembler::adcs(const Register& rd, const Register& rn,
990 const Operand& operand) {
991 AddSubWithCarry(rd, rn, operand, SetFlags, ADC);
992}
993
994void Assembler::sbc(const Register& rd, const Register& rn,
995 const Operand& operand) {
996 AddSubWithCarry(rd, rn, operand, LeaveFlags, SBC);
997}
998
999void Assembler::sbcs(const Register& rd, const Register& rn,
1000 const Operand& operand) {
1001 AddSubWithCarry(rd, rn, operand, SetFlags, SBC);
1002}
1003
1004void Assembler::ngc(const Register& rd, const Operand& operand) {
1005 Register zr = AppropriateZeroRegFor(rd);
1006 sbc(rd, zr, operand);
1007}
1008
1009void Assembler::ngcs(const Register& rd, const Operand& operand) {
1010 Register zr = AppropriateZeroRegFor(rd);
1011 sbcs(rd, zr, operand);
1012}
1013
1014// Logical instructions.
1015void Assembler::and_(const Register& rd, const Register& rn,
1016 const Operand& operand) {
1017 Logical(rd, rn, operand, AND);
1018}
1019
1020void Assembler::ands(const Register& rd, const Register& rn,
1021 const Operand& operand) {
1022 Logical(rd, rn, operand, ANDS);
1023}
1024
1025void Assembler::tst(const Register& rn, const Operand& operand) {
1026 ands(AppropriateZeroRegFor(rn), rn, operand);
1027}
1028
1029void Assembler::bic(const Register& rd, const Register& rn,
1030 const Operand& operand) {
1031 Logical(rd, rn, operand, BIC);
1032}
1033
1034void Assembler::bics(const Register& rd, const Register& rn,
1035 const Operand& operand) {
1036 Logical(rd, rn, operand, BICS);
1037}
1038
1039void Assembler::orr(const Register& rd, const Register& rn,
1040 const Operand& operand) {
1041 Logical(rd, rn, operand, ORR);
1042}
1043
1044void Assembler::orn(const Register& rd, const Register& rn,
1045 const Operand& operand) {
1046 Logical(rd, rn, operand, ORN);
1047}
1048
1049void Assembler::eor(const Register& rd, const Register& rn,
1050 const Operand& operand) {
1051 Logical(rd, rn, operand, EOR);
1052}
1053
1054void Assembler::eon(const Register& rd, const Register& rn,
1055 const Operand& operand) {
1056 Logical(rd, rn, operand, EON);
1057}
1058
1059void Assembler::lslv(const Register& rd, const Register& rn,
1060 const Register& rm) {
1061 DCHECK(rd.SizeInBits() == rn.SizeInBits());
1062 DCHECK(rd.SizeInBits() == rm.SizeInBits());
1063 Emit(SF(rd) | LSLV | Rm(rm) | Rn(rn) | Rd(rd));
1064}
1065
1066void Assembler::lsrv(const Register& rd, const Register& rn,
1067 const Register& rm) {
1068 DCHECK(rd.SizeInBits() == rn.SizeInBits());
1069 DCHECK(rd.SizeInBits() == rm.SizeInBits());
1070 Emit(SF(rd) | LSRV | Rm(rm) | Rn(rn) | Rd(rd));
1071}
1072
1073void Assembler::asrv(const Register& rd, const Register& rn,
1074 const Register& rm) {
1075 DCHECK(rd.SizeInBits() == rn.SizeInBits());
1076 DCHECK(rd.SizeInBits() == rm.SizeInBits());
1077 Emit(SF(rd) | ASRV | Rm(rm) | Rn(rn) | Rd(rd));
1078}
1079
1080void Assembler::rorv(const Register& rd, const Register& rn,
1081 const Register& rm) {
1082 DCHECK(rd.SizeInBits() == rn.SizeInBits());
1083 DCHECK(rd.SizeInBits() == rm.SizeInBits());
1084 Emit(SF(rd) | RORV | Rm(rm) | Rn(rn) | Rd(rd));
1085}
1086
1087// Bitfield operations.
1088void Assembler::bfm(const Register& rd, const Register& rn, int immr,
1089 int imms) {
1090 DCHECK(rd.SizeInBits() == rn.SizeInBits());
1091 Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset);
1092 Emit(SF(rd) | BFM | N | ImmR(immr, rd.SizeInBits()) |
1093 ImmS(imms, rn.SizeInBits()) | Rn(rn) | Rd(rd));
1094}
1095
1096void Assembler::sbfm(const Register& rd, const Register& rn, int immr,
1097 int imms) {
1098 DCHECK(rd.Is64Bits() || rn.Is32Bits());
1099 Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset);
1100 Emit(SF(rd) | SBFM | N | ImmR(immr, rd.SizeInBits()) |
1101 ImmS(imms, rn.SizeInBits()) | Rn(rn) | Rd(rd));
1102}
1103
1104void Assembler::ubfm(const Register& rd, const Register& rn, int immr,
1105 int imms) {
1106 DCHECK(rd.SizeInBits() == rn.SizeInBits());
1107 Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset);
1108 Emit(SF(rd) | UBFM | N | ImmR(immr, rd.SizeInBits()) |
1109 ImmS(imms, rn.SizeInBits()) | Rn(rn) | Rd(rd));
1110}
1111
1112void Assembler::extr(const Register& rd, const Register& rn, const Register& rm,
1113 int lsb) {
1114 DCHECK(rd.SizeInBits() == rn.SizeInBits());
1115 DCHECK(rd.SizeInBits() == rm.SizeInBits());
1116 Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset);
1117 Emit(SF(rd) | EXTR | N | Rm(rm) | ImmS(lsb, rn.SizeInBits()) | Rn(rn) |
1118 Rd(rd));
1119}
1120
1121void Assembler::csel(const Register& rd, const Register& rn, const Register& rm,
1122 Condition cond) {
1123 ConditionalSelect(rd, rn, rm, cond, CSEL);
1124}
1125
1126void Assembler::csinc(const Register& rd, const Register& rn,
1127 const Register& rm, Condition cond) {
1128 ConditionalSelect(rd, rn, rm, cond, CSINC);
1129}
1130
1131void Assembler::csinv(const Register& rd, const Register& rn,
1132 const Register& rm, Condition cond) {
1133 ConditionalSelect(rd, rn, rm, cond, CSINV);
1134}
1135
1136void Assembler::csneg(const Register& rd, const Register& rn,
1137 const Register& rm, Condition cond) {
1138 ConditionalSelect(rd, rn, rm, cond, CSNEG);
1139}
1140
1141void Assembler::cset(const Register& rd, Condition cond) {
1142 DCHECK((cond != al) && (cond != nv));
1143 Register zr = AppropriateZeroRegFor(rd);
1144 csinc(rd, zr, zr, NegateCondition(cond));
1145}
1146
1147void Assembler::csetm(const Register& rd, Condition cond) {
1148 DCHECK((cond != al) && (cond != nv));
1149 Register zr = AppropriateZeroRegFor(rd);
1150 csinv(rd, zr, zr, NegateCondition(cond));
1151}
1152
1153void Assembler::cinc(const Register& rd, const Register& rn, Condition cond) {
1154 DCHECK((cond != al) && (cond != nv));
1155 csinc(rd, rn, rn, NegateCondition(cond));
1156}
1157
1158void Assembler::cinv(const Register& rd, const Register& rn, Condition cond) {
1159 DCHECK((cond != al) && (cond != nv));
1160 csinv(rd, rn, rn, NegateCondition(cond));
1161}
1162
1163void Assembler::cneg(const Register& rd, const Register& rn, Condition cond) {
1164 DCHECK((cond != al) && (cond != nv));
1165 csneg(rd, rn, rn, NegateCondition(cond));
1166}
1167
1168void Assembler::ConditionalSelect(const Register& rd, const Register& rn,
1169 const Register& rm, Condition cond,
1170 ConditionalSelectOp op) {
1171 DCHECK(rd.SizeInBits() == rn.SizeInBits());
1172 DCHECK(rd.SizeInBits() == rm.SizeInBits());
1173 Emit(SF(rd) | op | Rm(rm) | Cond(cond) | Rn(rn) | Rd(rd));
1174}
1175
1176void Assembler::ccmn(const Register& rn, const Operand& operand,
1177 StatusFlags nzcv, Condition cond) {
1178 ConditionalCompare(rn, operand, nzcv, cond, CCMN);
1179}
1180
1181void Assembler::ccmp(const Register& rn, const Operand& operand,
1182 StatusFlags nzcv, Condition cond) {
1183 ConditionalCompare(rn, operand, nzcv, cond, CCMP);
1184}
1185
1186void Assembler::DataProcessing3Source(const Register& rd, const Register& rn,
1187 const Register& rm, const Register& ra,
1188 DataProcessing3SourceOp op) {
1189 Emit(SF(rd) | op | Rm(rm) | Ra(ra) | Rn(rn) | Rd(rd));
1190}
1191
1192void Assembler::mul(const Register& rd, const Register& rn,
1193 const Register& rm) {
1194 DCHECK(AreSameSizeAndType(rd, rn, rm));
1195 Register zr = AppropriateZeroRegFor(rn);
1196 DataProcessing3Source(rd, rn, rm, zr, MADD);
1197}
1198
1199void Assembler::madd(const Register& rd, const Register& rn, const Register& rm,
1200 const Register& ra) {
1201 DCHECK(AreSameSizeAndType(rd, rn, rm, ra));
1202 DataProcessing3Source(rd, rn, rm, ra, MADD);
1203}
1204
1205void Assembler::mneg(const Register& rd, const Register& rn,
1206 const Register& rm) {
1207 DCHECK(AreSameSizeAndType(rd, rn, rm));
1208 Register zr = AppropriateZeroRegFor(rn);
1209 DataProcessing3Source(rd, rn, rm, zr, MSUB);
1210}
1211
1212void Assembler::msub(const Register& rd, const Register& rn, const Register& rm,
1213 const Register& ra) {
1214 DCHECK(AreSameSizeAndType(rd, rn, rm, ra));
1215 DataProcessing3Source(rd, rn, rm, ra, MSUB);
1216}
1217
1218void Assembler::smaddl(const Register& rd, const Register& rn,
1219 const Register& rm, const Register& ra) {
1220 DCHECK(rd.Is64Bits() && ra.Is64Bits());
1221 DCHECK(rn.Is32Bits() && rm.Is32Bits());
1222 DataProcessing3Source(rd, rn, rm, ra, SMADDL_x);
1223}
1224
1225void Assembler::smsubl(const Register& rd, const Register& rn,
1226 const Register& rm, const Register& ra) {
1227 DCHECK(rd.Is64Bits() && ra.Is64Bits());
1228 DCHECK(rn.Is32Bits() && rm.Is32Bits());
1229 DataProcessing3Source(rd, rn, rm, ra, SMSUBL_x);
1230}
1231
1232void Assembler::umaddl(const Register& rd, const Register& rn,
1233 const Register& rm, const Register& ra) {
1234 DCHECK(rd.Is64Bits() && ra.Is64Bits());
1235 DCHECK(rn.Is32Bits() && rm.Is32Bits());
1236 DataProcessing3Source(rd, rn, rm, ra, UMADDL_x);
1237}
1238
1239void Assembler::umsubl(const Register& rd, const Register& rn,
1240 const Register& rm, const Register& ra) {
1241 DCHECK(rd.Is64Bits() && ra.Is64Bits());
1242 DCHECK(rn.Is32Bits() && rm.Is32Bits());
1243 DataProcessing3Source(rd, rn, rm, ra, UMSUBL_x);
1244}
1245
1246void Assembler::smull(const Register& rd, const Register& rn,
1247 const Register& rm) {
1248 DCHECK(rd.Is64Bits());
1249 DCHECK(rn.Is32Bits() && rm.Is32Bits());
1250 DataProcessing3Source(rd, rn, rm, xzr, SMADDL_x);
1251}
1252
1253void Assembler::smulh(const Register& rd, const Register& rn,
1254 const Register& rm) {
1255 DCHECK(rd.Is64Bits());
1256 DCHECK(rn.Is64Bits());
1257 DCHECK(rm.Is64Bits());
1258 DataProcessing3Source(rd, rn, rm, xzr, SMULH_x);
1259}
1260
1261void Assembler::umulh(const Register& rd, const Register& rn,
1262 const Register& rm) {
1263 DCHECK(rd.Is64Bits());
1264 DCHECK(rn.Is64Bits());
1265 DCHECK(rm.Is64Bits());
1266 DataProcessing3Source(rd, rn, rm, xzr, UMULH_x);
1267}
1268
1269void Assembler::sdiv(const Register& rd, const Register& rn,
1270 const Register& rm) {
1271 DCHECK(rd.SizeInBits() == rn.SizeInBits());
1272 DCHECK(rd.SizeInBits() == rm.SizeInBits());
1273 Emit(SF(rd) | SDIV | Rm(rm) | Rn(rn) | Rd(rd));
1274}
1275
1276void Assembler::udiv(const Register& rd, const Register& rn,
1277 const Register& rm) {
1278 DCHECK(rd.SizeInBits() == rn.SizeInBits());
1279 DCHECK(rd.SizeInBits() == rm.SizeInBits());
1280 Emit(SF(rd) | UDIV | Rm(rm) | Rn(rn) | Rd(rd));
1281}
1282
1283void Assembler::rbit(const Register& rd, const Register& rn) {
1284 DataProcessing1Source(rd, rn, RBIT);
1285}
1286
1287void Assembler::rev16(const Register& rd, const Register& rn) {
1288 DataProcessing1Source(rd, rn, REV16);
1289}
1290
1291void Assembler::rev32(const Register& rd, const Register& rn) {
1292 DCHECK(rd.Is64Bits());
1293 DataProcessing1Source(rd, rn, REV);
1294}
1295
1296void Assembler::rev(const Register& rd, const Register& rn) {
1297 DataProcessing1Source(rd, rn, rd.Is64Bits() ? REV_x : REV_w);
1298}
1299
1300void Assembler::clz(const Register& rd, const Register& rn) {
1301 DataProcessing1Source(rd, rn, CLZ);
1302}
1303
1304void Assembler::cls(const Register& rd, const Register& rn) {
1305 DataProcessing1Source(rd, rn, CLS);
1306}
1307
1308void Assembler::pacib1716() { Emit(PACIB1716); }
1309void Assembler::autib1716() { Emit(AUTIB1716); }
1310void Assembler::pacibsp() { Emit(PACIBSP); }
1311void Assembler::autibsp() { Emit(AUTIBSP); }
1312
1313void Assembler::bti(BranchTargetIdentifier id) {
1314 SystemHint op;
1315 switch (id) {
1316 case BranchTargetIdentifier::kBti:
1317 op = BTI;
1318 break;
1319 case BranchTargetIdentifier::kBtiCall:
1320 op = BTI_c;
1321 break;
1322 case BranchTargetIdentifier::kBtiJump:
1323 op = BTI_j;
1324 break;
1325 case BranchTargetIdentifier::kBtiJumpCall:
1326 op = BTI_jc;
1327 break;
1328 case BranchTargetIdentifier::kNone:
1329 case BranchTargetIdentifier::kPacibsp:
1330 // We always want to generate a BTI instruction here, so disallow
1331 // skipping its generation or generating a PACIBSP instead.
1332 UNREACHABLE();
1333 }
1334 hint(op);
1335}
1336
1337void Assembler::ldp(const CPURegister& rt, const CPURegister& rt2,
1338 const MemOperand& src) {
1339 LoadStorePair(rt, rt2, src, LoadPairOpFor(rt, rt2));
1340}
1341
1342void Assembler::stp(const CPURegister& rt, const CPURegister& rt2,
1343 const MemOperand& dst) {
1344 LoadStorePair(rt, rt2, dst, StorePairOpFor(rt, rt2));
1345
1346#if defined(V8_OS_WIN)
1347 if (xdata_encoder_ && rt == x29 && rt2 == lr && dst.base().IsSP()) {
1348 xdata_encoder_->onSaveFpLr();
1349 }
1350#endif
1351}
1352
1353void Assembler::ldpsw(const Register& rt, const Register& rt2,
1354 const MemOperand& src) {
1355 DCHECK(rt.Is64Bits());
1356 LoadStorePair(rt, rt2, src, LDPSW_x);
1357}
1358
1359void Assembler::LoadStorePair(const CPURegister& rt, const CPURegister& rt2,
1360 const MemOperand& addr, LoadStorePairOp op) {
1361 // 'rt' and 'rt2' can only be aliased for stores.
1362 DCHECK(((op & LoadStorePairLBit) == 0) || rt != rt2);
1363 DCHECK(AreSameSizeAndType(rt, rt2));
1364 DCHECK(IsImmLSPair(addr.offset(), CalcLSPairDataSize(op)));
1365 int offset = static_cast<int>(addr.offset());
1366
1367 Instr memop = op | Rt(rt) | Rt2(rt2) | RnSP(addr.base()) |
1368 ImmLSPair(offset, CalcLSPairDataSize(op));
1369
1370 Instr addrmodeop;
1371 if (addr.IsImmediateOffset()) {
1372 addrmodeop = LoadStorePairOffsetFixed;
1373 } else {
1374 // Pre-index and post-index modes.
1375 DCHECK_NE(rt, addr.base());
1376 DCHECK_NE(rt2, addr.base());
1377 DCHECK_NE(addr.offset(), 0);
1378 if (addr.IsPreIndex()) {
1379 addrmodeop = LoadStorePairPreIndexFixed;
1380 } else {
1381 DCHECK(addr.IsPostIndex());
1382 addrmodeop = LoadStorePairPostIndexFixed;
1383 }
1384 }
1385 Emit(addrmodeop | memop);
1386}
1387
1388// Memory instructions.
1389void Assembler::ldrb(const Register& rt, const MemOperand& src) {
1390 LoadStore(rt, src, LDRB_w);
1391}
1392
1393void Assembler::strb(const Register& rt, const MemOperand& dst) {
1394 LoadStore(rt, dst, STRB_w);
1395}
1396
1397void Assembler::ldrsb(const Register& rt, const MemOperand& src) {
1398 LoadStore(rt, src, rt.Is64Bits() ? LDRSB_x : LDRSB_w);
1399}
1400
1401void Assembler::ldrh(const Register& rt, const MemOperand& src) {
1402 LoadStore(rt, src, LDRH_w);
1403}
1404
1405void Assembler::strh(const Register& rt, const MemOperand& dst) {
1406 LoadStore(rt, dst, STRH_w);
1407}
1408
1409void Assembler::ldrsh(const Register& rt, const MemOperand& src) {
1410 LoadStore(rt, src, rt.Is64Bits() ? LDRSH_x : LDRSH_w);
1411}
1412
1413void Assembler::ldr(const CPURegister& rt, const MemOperand& src) {
1414 LoadStore(rt, src, LoadOpFor(rt));
1415}
1416
1417void Assembler::str(const CPURegister& rt, const MemOperand& src) {
1418 LoadStore(rt, src, StoreOpFor(rt));
1419}
1420
1421void Assembler::ldrsw(const Register& rt, const MemOperand& src) {
1422 DCHECK(rt.Is64Bits());
1423 LoadStore(rt, src, LDRSW_x);
1424}
1425
1426void Assembler::ldr_pcrel(const CPURegister& rt, int imm19) {
1427 // The pattern 'ldr xzr, #offset' is used to indicate the beginning of a
1428 // constant pool. It should not be emitted.
1429 DCHECK(!rt.IsZero());
1430 Emit(LoadLiteralOpFor(rt) | ImmLLiteral(imm19) | Rt(rt));
1431}
1432
1433Operand Operand::EmbeddedNumber(double number) {
1434 int32_t smi;
1435 if (DoubleToSmiInteger(number, &smi)) {
1436 return Operand(Immediate(Smi::FromInt(smi)));
1437 }
1438 return EmbeddedHeapNumber(number);
1439}
1440
1441Operand Operand::EmbeddedHeapNumber(double number) {
1442 Operand result(0, RelocInfo::FULL_EMBEDDED_OBJECT);
1443 result.heap_number_request_.emplace(number);
1444 DCHECK(result.IsHeapNumberRequest());
1445 return result;
1446}
1447
1448void Assembler::ldr(const CPURegister& rt, const Operand& operand) {
1449 if (operand.IsHeapNumberRequest()) {
1450 BlockPoolsScope no_pool_before_ldr_of_heap_number_request(this);
1451 RequestHeapNumber(operand.heap_number_request());
1452 ldr(rt, operand.immediate_for_heap_number_request());
1453 } else {
1454 ldr(rt, operand.immediate());
1455 }
1456}
1457
1458void Assembler::ldr(const CPURegister& rt, const Immediate& imm) {
1459 BlockPoolsScope no_pool_before_ldr_pcrel_instr(this);
1460 RecordRelocInfo(imm.rmode(), imm.value());
1461 // The load will be patched when the constpool is emitted, patching code
1462 // expect a load literal with offset 0.
1463 ldr_pcrel(rt, 0);
1464}
1465
1466void Assembler::ldar(const Register& rt, const Register& rn) {
1467 DCHECK(rn.Is64Bits());
1468 LoadStoreAcquireReleaseOp op = rt.Is32Bits() ? LDAR_w : LDAR_x;
1469 Emit(op | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1470}
1471
1472void Assembler::ldaxr(const Register& rt, const Register& rn) {
1473 DCHECK(rn.Is64Bits());
1474 LoadStoreAcquireReleaseOp op = rt.Is32Bits() ? LDAXR_w : LDAXR_x;
1475 Emit(op | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1476}
1477
1478void Assembler::stlr(const Register& rt, const Register& rn) {
1479 DCHECK(rn.Is64Bits());
1480 LoadStoreAcquireReleaseOp op = rt.Is32Bits() ? STLR_w : STLR_x;
1481 Emit(op | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1482}
1483
1484void Assembler::stlxr(const Register& rs, const Register& rt,
1485 const Register& rn) {
1486 DCHECK(rn.Is64Bits());
1487 DCHECK(rs != rt && rs != rn);
1488 LoadStoreAcquireReleaseOp op = rt.Is32Bits() ? STLXR_w : STLXR_x;
1489 Emit(op | Rs(rs) | Rt2(x31) | RnSP(rn) | Rt(rt));
1490}
1491
1492void Assembler::ldarb(const Register& rt, const Register& rn) {
1493 DCHECK(rt.Is32Bits());
1494 DCHECK(rn.Is64Bits());
1495 Emit(LDAR_b | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1496}
1497
1498void Assembler::ldaxrb(const Register& rt, const Register& rn) {
1499 DCHECK(rt.Is32Bits());
1500 DCHECK(rn.Is64Bits());
1501 Emit(LDAXR_b | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1502}
1503
1504void Assembler::stlrb(const Register& rt, const Register& rn) {
1505 DCHECK(rt.Is32Bits());
1506 DCHECK(rn.Is64Bits());
1507 Emit(STLR_b | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1508}
1509
1510void Assembler::stlxrb(const Register& rs, const Register& rt,
1511 const Register& rn) {
1512 DCHECK(rs.Is32Bits());
1513 DCHECK(rt.Is32Bits());
1514 DCHECK(rn.Is64Bits());
1515 DCHECK(rs != rt && rs != rn);
1516 Emit(STLXR_b | Rs(rs) | Rt2(x31) | RnSP(rn) | Rt(rt));
1517}
1518
1519void Assembler::ldarh(const Register& rt, const Register& rn) {
1520 DCHECK(rt.Is32Bits());
1521 DCHECK(rn.Is64Bits());
1522 Emit(LDAR_h | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1523}
1524
1525void Assembler::ldaxrh(const Register& rt, const Register& rn) {
1526 DCHECK(rt.Is32Bits());
1527 DCHECK(rn.Is64Bits());
1528 Emit(LDAXR_h | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1529}
1530
1531void Assembler::stlrh(const Register& rt, const Register& rn) {
1532 DCHECK(rt.Is32Bits());
1533 DCHECK(rn.Is64Bits());
1534 Emit(STLR_h | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1535}
1536
1537void Assembler::stlxrh(const Register& rs, const Register& rt,
1538 const Register& rn) {
1539 DCHECK(rs.Is32Bits());
1540 DCHECK(rt.Is32Bits());
1541 DCHECK(rn.Is64Bits());
1542 DCHECK(rs != rt && rs != rn);
1543 Emit(STLXR_h | Rs(rs) | Rt2(x31) | RnSP(rn) | Rt(rt));
1544}
1545
1546#define COMPARE_AND_SWAP_W_X_LIST(V) \
1547 V(cas, CAS) \
1548 V(casa, CASA) \
1549 V(casl, CASL) \
1550 V(casal, CASAL)
1551
1552#define DEFINE_ASM_FUNC(FN, OP) \
1553 void Assembler::FN(const Register& rs, const Register& rt, \
1554 const MemOperand& src) { \
1555 DCHECK(IsEnabled(LSE)); \
1556 DCHECK(src.IsImmediateOffset() && (src.offset() == 0)); \
1557 LoadStoreAcquireReleaseOp op = rt.Is64Bits() ? OP##_x : OP##_w; \
1558 Emit(op | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(src.base())); \
1559 }
1560COMPARE_AND_SWAP_W_X_LIST(DEFINE_ASM_FUNC)
1561#undef DEFINE_ASM_FUNC
1562
1563#define COMPARE_AND_SWAP_W_LIST(V) \
1564 V(casb, CASB) \
1565 V(casab, CASAB) \
1566 V(caslb, CASLB) \
1567 V(casalb, CASALB) \
1568 V(cash, CASH) \
1569 V(casah, CASAH) \
1570 V(caslh, CASLH) \
1571 V(casalh, CASALH)
1572
1573#define DEFINE_ASM_FUNC(FN, OP) \
1574 void Assembler::FN(const Register& rs, const Register& rt, \
1575 const MemOperand& src) { \
1576 DCHECK(IsEnabled(LSE)); \
1577 DCHECK(src.IsImmediateOffset() && (src.offset() == 0)); \
1578 Emit(OP | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(src.base())); \
1579 }
1580COMPARE_AND_SWAP_W_LIST(DEFINE_ASM_FUNC)
1581#undef DEFINE_ASM_FUNC
1582
1583#define COMPARE_AND_SWAP_PAIR_LIST(V) \
1584 V(casp, CASP) \
1585 V(caspa, CASPA) \
1586 V(caspl, CASPL) \
1587 V(caspal, CASPAL)
1588
1589#define DEFINE_ASM_FUNC(FN, OP) \
1590 void Assembler::FN(const Register& rs, const Register& rs1, \
1591 const Register& rt, const Register& rt1, \
1592 const MemOperand& src) { \
1593 DCHECK(IsEnabled(LSE)); \
1594 DCHECK(src.IsImmediateOffset() && (src.offset() == 0)); \
1595 DCHECK(AreEven(rs, rt)); \
1596 DCHECK(AreConsecutive(rs, rs1)); \
1597 DCHECK(AreConsecutive(rt, rt1)); \
1598 DCHECK(AreSameFormat(rs, rs1, rt, rt1)); \
1599 LoadStoreAcquireReleaseOp op = rt.Is64Bits() ? OP##_x : OP##_w; \
1600 Emit(op | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(src.base())); \
1601 }
1602COMPARE_AND_SWAP_PAIR_LIST(DEFINE_ASM_FUNC)
1603#undef DEFINE_ASM_FUNC
1604
1605// These macros generate all the variations of the atomic memory operations,
1606// e.g. ldadd, ldadda, ldaddb, staddl, etc.
1607// For a full list of the methods with comments, see the assembler header file.
1608
1609#define ATOMIC_MEMORY_SIMPLE_OPERATION_LIST(V, DEF) \
1610 V(DEF, add, LDADD) \
1611 V(DEF, clr, LDCLR) \
1612 V(DEF, eor, LDEOR) \
1613 V(DEF, set, LDSET) \
1614 V(DEF, smax, LDSMAX) \
1615 V(DEF, smin, LDSMIN) \
1616 V(DEF, umax, LDUMAX) \
1617 V(DEF, umin, LDUMIN)
1618
1619#define ATOMIC_MEMORY_STORE_MODES(V, NAME, OP) \
1620 V(NAME, OP##_x, OP##_w) \
1621 V(NAME##l, OP##L_x, OP##L_w) \
1622 V(NAME##b, OP##B, OP##B) \
1623 V(NAME##lb, OP##LB, OP##LB) \
1624 V(NAME##h, OP##H, OP##H) \
1625 V(NAME##lh, OP##LH, OP##LH)
1626
1627#define ATOMIC_MEMORY_LOAD_MODES(V, NAME, OP) \
1628 ATOMIC_MEMORY_STORE_MODES(V, NAME, OP) \
1629 V(NAME##a, OP##A_x, OP##A_w) \
1630 V(NAME##al, OP##AL_x, OP##AL_w) \
1631 V(NAME##ab, OP##AB, OP##AB) \
1632 V(NAME##alb, OP##ALB, OP##ALB) \
1633 V(NAME##ah, OP##AH, OP##AH) \
1634 V(NAME##alh, OP##ALH, OP##ALH)
1635
1636#define DEFINE_ASM_LOAD_FUNC(FN, OP_X, OP_W) \
1637 void Assembler::ld##FN(const Register& rs, const Register& rt, \
1638 const MemOperand& src) { \
1639 DCHECK(IsEnabled(LSE)); \
1640 DCHECK(src.IsImmediateOffset() && (src.offset() == 0)); \
1641 AtomicMemoryOp op = rt.Is64Bits() ? OP_X : OP_W; \
1642 Emit(op | Rs(rs) | Rt(rt) | RnSP(src.base())); \
1643 }
1644#define DEFINE_ASM_STORE_FUNC(FN, OP_X, OP_W) \
1645 void Assembler::st##FN(const Register& rs, const MemOperand& src) { \
1646 DCHECK(IsEnabled(LSE)); \
1647 ld##FN(rs, AppropriateZeroRegFor(rs), src); \
1648 }
1649
1650ATOMIC_MEMORY_SIMPLE_OPERATION_LIST(ATOMIC_MEMORY_LOAD_MODES,
1651 DEFINE_ASM_LOAD_FUNC)
1652ATOMIC_MEMORY_SIMPLE_OPERATION_LIST(ATOMIC_MEMORY_STORE_MODES,
1653 DEFINE_ASM_STORE_FUNC)
1654
1655#define DEFINE_ASM_SWP_FUNC(FN, OP_X, OP_W) \
1656 void Assembler::FN(const Register& rs, const Register& rt, \
1657 const MemOperand& src) { \
1658 DCHECK(IsEnabled(LSE)); \
1659 DCHECK(src.IsImmediateOffset() && (src.offset() == 0)); \
1660 AtomicMemoryOp op = rt.Is64Bits() ? OP_X : OP_W; \
1661 Emit(op | Rs(rs) | Rt(rt) | RnSP(src.base())); \
1662 }
1663
1664ATOMIC_MEMORY_LOAD_MODES(DEFINE_ASM_SWP_FUNC, swp, SWP)
1665
1666#undef DEFINE_ASM_LOAD_FUNC
1667#undef DEFINE_ASM_STORE_FUNC
1668#undef DEFINE_ASM_SWP_FUNC
1669
1670void Assembler::sdot(const VRegister& vd, const VRegister& vn,
1671 const VRegister& vm) {
1672 DCHECK(IsEnabled(DOTPROD));
1673 DCHECK((vn.Is16B() && vd.Is4S()) || (vn.Is8B() && vd.Is2S()));
1674 DCHECK(AreSameFormat(vn, vm));
1675 Emit(VFormat(vd) | NEON_SDOT | Rm(vm) | Rn(vn) | Rd(vd));
1676}
1677
1678void Assembler::NEON3DifferentL(const VRegister& vd, const VRegister& vn,
1679 const VRegister& vm, NEON3DifferentOp vop) {
1680 DCHECK(AreSameFormat(vn, vm));
1681 DCHECK((vn.Is1H() && vd.Is1S()) || (vn.Is1S() && vd.Is1D()) ||
1682 (vn.Is8B() && vd.Is8H()) || (vn.Is4H() && vd.Is4S()) ||
1683 (vn.Is2S() && vd.Is2D()) || (vn.Is16B() && vd.Is8H()) ||
1684 (vn.Is8H() && vd.Is4S()) || (vn.Is4S() && vd.Is2D()));
1685 Instr format, op = vop;
1686 if (vd.IsScalar()) {
1687 op |= NEON_Q | NEONScalar;
1688 format = SFormat(vn);
1689 } else {
1690 format = VFormat(vn);
1691 }
1692 Emit(format | op | Rm(vm) | Rn(vn) | Rd(vd));
1693}
1694
1695void Assembler::NEON3DifferentW(const VRegister& vd, const VRegister& vn,
1696 const VRegister& vm, NEON3DifferentOp vop) {
1697 DCHECK(AreSameFormat(vd, vn));
1698 DCHECK((vm.Is8B() && vd.Is8H()) || (vm.Is4H() && vd.Is4S()) ||
1699 (vm.Is2S() && vd.Is2D()) || (vm.Is16B() && vd.Is8H()) ||
1700 (vm.Is8H() && vd.Is4S()) || (vm.Is4S() && vd.Is2D()));
1701 Emit(VFormat(vm) | vop | Rm(vm) | Rn(vn) | Rd(vd));
1702}
1703
1704void Assembler::NEON3DifferentHN(const VRegister& vd, const VRegister& vn,
1705 const VRegister& vm, NEON3DifferentOp vop) {
1706 DCHECK(AreSameFormat(vm, vn));
1707 DCHECK((vd.Is8B() && vn.Is8H()) || (vd.Is4H() && vn.Is4S()) ||
1708 (vd.Is2S() && vn.Is2D()) || (vd.Is16B() && vn.Is8H()) ||
1709 (vd.Is8H() && vn.Is4S()) || (vd.Is4S() && vn.Is2D()));
1710 Emit(VFormat(vd) | vop | Rm(vm) | Rn(vn) | Rd(vd));
1711}
1712
1713#define NEON_3DIFF_LONG_LIST(V) \
1714 V(saddl, NEON_SADDL, vn.IsVector() && vn.IsD()) \
1715 V(saddl2, NEON_SADDL2, vn.IsVector() && vn.IsQ()) \
1716 V(sabal, NEON_SABAL, vn.IsVector() && vn.IsD()) \
1717 V(sabal2, NEON_SABAL2, vn.IsVector() && vn.IsQ()) \
1718 V(uabal, NEON_UABAL, vn.IsVector() && vn.IsD()) \
1719 V(uabal2, NEON_UABAL2, vn.IsVector() && vn.IsQ()) \
1720 V(sabdl, NEON_SABDL, vn.IsVector() && vn.IsD()) \
1721 V(sabdl2, NEON_SABDL2, vn.IsVector() && vn.IsQ()) \
1722 V(uabdl, NEON_UABDL, vn.IsVector() && vn.IsD()) \
1723 V(uabdl2, NEON_UABDL2, vn.IsVector() && vn.IsQ()) \
1724 V(smlal, NEON_SMLAL, vn.IsVector() && vn.IsD()) \
1725 V(smlal2, NEON_SMLAL2, vn.IsVector() && vn.IsQ()) \
1726 V(umlal, NEON_UMLAL, vn.IsVector() && vn.IsD()) \
1727 V(umlal2, NEON_UMLAL2, vn.IsVector() && vn.IsQ()) \
1728 V(smlsl, NEON_SMLSL, vn.IsVector() && vn.IsD()) \
1729 V(smlsl2, NEON_SMLSL2, vn.IsVector() && vn.IsQ()) \
1730 V(umlsl, NEON_UMLSL, vn.IsVector() && vn.IsD()) \
1731 V(umlsl2, NEON_UMLSL2, vn.IsVector() && vn.IsQ()) \
1732 V(smull, NEON_SMULL, vn.IsVector() && vn.IsD()) \
1733 V(smull2, NEON_SMULL2, vn.IsVector() && vn.IsQ()) \
1734 V(umull, NEON_UMULL, vn.IsVector() && vn.IsD()) \
1735 V(umull2, NEON_UMULL2, vn.IsVector() && vn.IsQ()) \
1736 V(ssubl, NEON_SSUBL, vn.IsVector() && vn.IsD()) \
1737 V(ssubl2, NEON_SSUBL2, vn.IsVector() && vn.IsQ()) \
1738 V(uaddl, NEON_UADDL, vn.IsVector() && vn.IsD()) \
1739 V(uaddl2, NEON_UADDL2, vn.IsVector() && vn.IsQ()) \
1740 V(usubl, NEON_USUBL, vn.IsVector() && vn.IsD()) \
1741 V(usubl2, NEON_USUBL2, vn.IsVector() && vn.IsQ()) \
1742 V(sqdmlal, NEON_SQDMLAL, vn.Is1H() || vn.Is1S() || vn.Is4H() || vn.Is2S()) \
1743 V(sqdmlal2, NEON_SQDMLAL2, vn.Is1H() || vn.Is1S() || vn.Is8H() || vn.Is4S()) \
1744 V(sqdmlsl, NEON_SQDMLSL, vn.Is1H() || vn.Is1S() || vn.Is4H() || vn.Is2S()) \
1745 V(sqdmlsl2, NEON_SQDMLSL2, vn.Is1H() || vn.Is1S() || vn.Is8H() || vn.Is4S()) \
1746 V(sqdmull, NEON_SQDMULL, vn.Is1H() || vn.Is1S() || vn.Is4H() || vn.Is2S()) \
1747 V(sqdmull2, NEON_SQDMULL2, vn.Is1H() || vn.Is1S() || vn.Is8H() || vn.Is4S())
1748
1749#define DEFINE_ASM_FUNC(FN, OP, AS) \
1750 void Assembler::FN(const VRegister& vd, const VRegister& vn, \
1751 const VRegister& vm) { \
1752 DCHECK(AS); \
1753 NEON3DifferentL(vd, vn, vm, OP); \
1754 }
1755NEON_3DIFF_LONG_LIST(DEFINE_ASM_FUNC)
1756#undef DEFINE_ASM_FUNC
1757
1758#define NEON_3DIFF_HN_LIST(V) \
1759 V(addhn, NEON_ADDHN, vd.IsD()) \
1760 V(addhn2, NEON_ADDHN2, vd.IsQ()) \
1761 V(raddhn, NEON_RADDHN, vd.IsD()) \
1762 V(raddhn2, NEON_RADDHN2, vd.IsQ()) \
1763 V(subhn, NEON_SUBHN, vd.IsD()) \
1764 V(subhn2, NEON_SUBHN2, vd.IsQ()) \
1765 V(rsubhn, NEON_RSUBHN, vd.IsD()) \
1766 V(rsubhn2, NEON_RSUBHN2, vd.IsQ())
1767
1768#define DEFINE_ASM_FUNC(FN, OP, AS) \
1769 void Assembler::FN(const VRegister& vd, const VRegister& vn, \
1770 const VRegister& vm) { \
1771 DCHECK(AS); \
1772 NEON3DifferentHN(vd, vn, vm, OP); \
1773 }
1774NEON_3DIFF_HN_LIST(DEFINE_ASM_FUNC)
1775#undef DEFINE_ASM_FUNC
1776
1777void Assembler::NEONPerm(const VRegister& vd, const VRegister& vn,
1778 const VRegister& vm, NEONPermOp op) {
1779 DCHECK(AreSameFormat(vd, vn, vm));
1780 DCHECK(!vd.Is1D());
1781 Emit(VFormat(vd) | op | Rm(vm) | Rn(vn) | Rd(vd));
1782}
1783
1784void Assembler::trn1(const VRegister& vd, const VRegister& vn,
1785 const VRegister& vm) {
1786 NEONPerm(vd, vn, vm, NEON_TRN1);
1787}
1788
1789void Assembler::trn2(const VRegister& vd, const VRegister& vn,
1790 const VRegister& vm) {
1791 NEONPerm(vd, vn, vm, NEON_TRN2);
1792}
1793
1794void Assembler::uzp1(const VRegister& vd, const VRegister& vn,
1795 const VRegister& vm) {
1796 NEONPerm(vd, vn, vm, NEON_UZP1);
1797}
1798
1799void Assembler::uzp2(const VRegister& vd, const VRegister& vn,
1800 const VRegister& vm) {
1801 NEONPerm(vd, vn, vm, NEON_UZP2);
1802}
1803
1804void Assembler::zip1(const VRegister& vd, const VRegister& vn,
1805 const VRegister& vm) {
1806 NEONPerm(vd, vn, vm, NEON_ZIP1);
1807}
1808
1809void Assembler::zip2(const VRegister& vd, const VRegister& vn,
1810 const VRegister& vm) {
1811 NEONPerm(vd, vn, vm, NEON_ZIP2);
1812}
1813
1814void Assembler::NEONShiftImmediate(const VRegister& vd, const VRegister& vn,
1815 NEONShiftImmediateOp op, int immh_immb) {
1816 DCHECK(AreSameFormat(vd, vn));
1817 Instr q, scalar;
1818 if (vn.IsScalar()) {
1819 q = NEON_Q;
1820 scalar = NEONScalar;
1821 } else {
1822 q = vd.IsD() ? 0 : NEON_Q;
1823 scalar = 0;
1824 }
1825 Emit(q | op | scalar | immh_immb | Rn(vn) | Rd(vd));
1826}
1827
1828void Assembler::NEONShiftLeftImmediate(const VRegister& vd, const VRegister& vn,
1829 int shift, NEONShiftImmediateOp op) {
1830 int laneSizeInBits = vn.LaneSizeInBits();
1831 DCHECK((shift >= 0) && (shift < laneSizeInBits));
1832 NEONShiftImmediate(vd, vn, op, (laneSizeInBits + shift) << 16);
1833}
1834
1835void Assembler::NEONShiftRightImmediate(const VRegister& vd,
1836 const VRegister& vn, int shift,
1837 NEONShiftImmediateOp op) {
1838 int laneSizeInBits = vn.LaneSizeInBits();
1839 DCHECK((shift >= 1) && (shift <= laneSizeInBits));
1840 NEONShiftImmediate(vd, vn, op, ((2 * laneSizeInBits) - shift) << 16);
1841}
1842
1843void Assembler::NEONShiftImmediateL(const VRegister& vd, const VRegister& vn,
1844 int shift, NEONShiftImmediateOp op) {
1845 int laneSizeInBits = vn.LaneSizeInBits();
1846 DCHECK((shift >= 0) && (shift < laneSizeInBits));
1847 int immh_immb = (laneSizeInBits + shift) << 16;
1848
1849 DCHECK((vn.Is8B() && vd.Is8H()) || (vn.Is4H() && vd.Is4S()) ||
1850 (vn.Is2S() && vd.Is2D()) || (vn.Is16B() && vd.Is8H()) ||
1851 (vn.Is8H() && vd.Is4S()) || (vn.Is4S() && vd.Is2D()));
1852 Instr q;
1853 q = vn.IsD() ? 0 : NEON_Q;
1854 Emit(q | op | immh_immb | Rn(vn) | Rd(vd));
1855}
1856
1857void Assembler::NEONShiftImmediateN(const VRegister& vd, const VRegister& vn,
1858 int shift, NEONShiftImmediateOp op) {
1859 Instr q, scalar;
1860 int laneSizeInBits = vd.LaneSizeInBits();
1861 DCHECK((shift >= 1) && (shift <= laneSizeInBits));
1862 int immh_immb = (2 * laneSizeInBits - shift) << 16;
1863
1864 if (vn.IsScalar()) {
1865 DCHECK((vd.Is1B() && vn.Is1H()) || (vd.Is1H() && vn.Is1S()) ||
1866 (vd.Is1S() && vn.Is1D()));
1867 q = NEON_Q;
1868 scalar = NEONScalar;
1869 } else {
1870 DCHECK((vd.Is8B() && vn.Is8H()) || (vd.Is4H() && vn.Is4S()) ||
1871 (vd.Is2S() && vn.Is2D()) || (vd.Is16B() && vn.Is8H()) ||
1872 (vd.Is8H() && vn.Is4S()) || (vd.Is4S() && vn.Is2D()));
1873 scalar = 0;
1874 q = vd.IsD() ? 0 : NEON_Q;
1875 }
1876 Emit(q | op | scalar | immh_immb | Rn(vn) | Rd(vd));
1877}
1878
1879void Assembler::shl(const VRegister& vd, const VRegister& vn, int shift) {
1880 DCHECK(vd.IsVector() || vd.Is1D());
1881 NEONShiftLeftImmediate(vd, vn, shift, NEON_SHL);
1882}
1883
1884void Assembler::sli(const VRegister& vd, const VRegister& vn, int shift) {
1885 DCHECK(vd.IsVector() || vd.Is1D());
1886 NEONShiftLeftImmediate(vd, vn, shift, NEON_SLI);
1887}
1888
1889void Assembler::sqshl(const VRegister& vd, const VRegister& vn, int shift) {
1890 NEONShiftLeftImmediate(vd, vn, shift, NEON_SQSHL_imm);
1891}
1892
1893void Assembler::sqshlu(const VRegister& vd, const VRegister& vn, int shift) {
1894 NEONShiftLeftImmediate(vd, vn, shift, NEON_SQSHLU);
1895}
1896
1897void Assembler::uqshl(const VRegister& vd, const VRegister& vn, int shift) {
1898 NEONShiftLeftImmediate(vd, vn, shift, NEON_UQSHL_imm);
1899}
1900
1901void Assembler::sshll(const VRegister& vd, const VRegister& vn, int shift) {
1902 DCHECK(vn.IsD());
1903 NEONShiftImmediateL(vd, vn, shift, NEON_SSHLL);
1904}
1905
1906void Assembler::sshll2(const VRegister& vd, const VRegister& vn, int shift) {
1907 DCHECK(vn.IsQ());
1908 NEONShiftImmediateL(vd, vn, shift, NEON_SSHLL);
1909}
1910
1911void Assembler::sxtl(const VRegister& vd, const VRegister& vn) {
1912 sshll(vd, vn, 0);
1913}
1914
1915void Assembler::sxtl2(const VRegister& vd, const VRegister& vn) {
1916 sshll2(vd, vn, 0);
1917}
1918
1919void Assembler::ushll(const VRegister& vd, const VRegister& vn, int shift) {
1920 DCHECK(vn.IsD());
1921 NEONShiftImmediateL(vd, vn, shift, NEON_USHLL);
1922}
1923
1924void Assembler::ushll2(const VRegister& vd, const VRegister& vn, int shift) {
1925 DCHECK(vn.IsQ());
1926 NEONShiftImmediateL(vd, vn, shift, NEON_USHLL);
1927}
1928
1929void Assembler::uxtl(const VRegister& vd, const VRegister& vn) {
1930 ushll(vd, vn, 0);
1931}
1932
1933void Assembler::uxtl2(const VRegister& vd, const VRegister& vn) {
1934 ushll2(vd, vn, 0);
1935}
1936
1937void Assembler::sri(const VRegister& vd, const VRegister& vn, int shift) {
1938 DCHECK(vd.IsVector() || vd.Is1D());
1939 NEONShiftRightImmediate(vd, vn, shift, NEON_SRI);
1940}
1941
1942void Assembler::sshr(const VRegister& vd, const VRegister& vn, int shift) {
1943 DCHECK(vd.IsVector() || vd.Is1D());
1944 NEONShiftRightImmediate(vd, vn, shift, NEON_SSHR);
1945}
1946
1947void Assembler::ushr(const VRegister& vd, const VRegister& vn, int shift) {
1948 DCHECK(vd.IsVector() || vd.Is1D());
1949 NEONShiftRightImmediate(vd, vn, shift, NEON_USHR);
1950}
1951
1952void Assembler::srshr(const VRegister& vd, const VRegister& vn, int shift) {
1953 DCHECK(vd.IsVector() || vd.Is1D());
1954 NEONShiftRightImmediate(vd, vn, shift, NEON_SRSHR);
1955}
1956
1957void Assembler::urshr(const VRegister& vd, const VRegister& vn, int shift) {
1958 DCHECK(vd.IsVector() || vd.Is1D());
1959 NEONShiftRightImmediate(vd, vn, shift, NEON_URSHR);
1960}
1961
1962void Assembler::ssra(const VRegister& vd, const VRegister& vn, int shift) {
1963 DCHECK(vd.IsVector() || vd.Is1D());
1964 NEONShiftRightImmediate(vd, vn, shift, NEON_SSRA);
1965}
1966
1967void Assembler::usra(const VRegister& vd, const VRegister& vn, int shift) {
1968 DCHECK(vd.IsVector() || vd.Is1D());
1969 NEONShiftRightImmediate(vd, vn, shift, NEON_USRA);
1970}
1971
1972void Assembler::srsra(const VRegister& vd, const VRegister& vn, int shift) {
1973 DCHECK(vd.IsVector() || vd.Is1D());
1974 NEONShiftRightImmediate(vd, vn, shift, NEON_SRSRA);
1975}
1976
1977void Assembler::ursra(const VRegister& vd, const VRegister& vn, int shift) {
1978 DCHECK(vd.IsVector() || vd.Is1D());
1979 NEONShiftRightImmediate(vd, vn, shift, NEON_URSRA);
1980}
1981
1982void Assembler::shrn(const VRegister& vd, const VRegister& vn, int shift) {
1983 DCHECK(vn.IsVector() && vd.IsD());
1984 NEONShiftImmediateN(vd, vn, shift, NEON_SHRN);
1985}
1986
1987void Assembler::shrn2(const VRegister& vd, const VRegister& vn, int shift) {
1988 DCHECK(vn.IsVector() && vd.IsQ());
1989 NEONShiftImmediateN(vd, vn, shift, NEON_SHRN);
1990}
1991
1992void Assembler::rshrn(const VRegister& vd, const VRegister& vn, int shift) {
1993 DCHECK(vn.IsVector() && vd.IsD());
1994 NEONShiftImmediateN(vd, vn, shift, NEON_RSHRN);
1995}
1996
1997void Assembler::rshrn2(const VRegister& vd, const VRegister& vn, int shift) {
1998 DCHECK(vn.IsVector() && vd.IsQ());
1999 NEONShiftImmediateN(vd, vn, shift, NEON_RSHRN);
2000}
2001
2002void Assembler::sqshrn(const VRegister& vd, const VRegister& vn, int shift) {
2003 DCHECK(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
2004 NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRN);
2005}
2006
2007void Assembler::sqshrn2(const VRegister& vd, const VRegister& vn, int shift) {
2008 DCHECK(vn.IsVector() && vd.IsQ());
2009 NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRN);
2010}
2011
2012void Assembler::sqrshrn(const VRegister& vd, const VRegister& vn, int shift) {
2013 DCHECK(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
2014 NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRN);
2015}
2016
2017void Assembler::sqrshrn2(const VRegister& vd, const VRegister& vn, int shift) {
2018 DCHECK(vn.IsVector() && vd.IsQ());
2019 NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRN);
2020}
2021
2022void Assembler::sqshrun(const VRegister& vd, const VRegister& vn, int shift) {
2023 DCHECK(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
2024 NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRUN);
2025}
2026
2027void Assembler::sqshrun2(const VRegister& vd, const VRegister& vn, int shift) {
2028 DCHECK(vn.IsVector() && vd.IsQ());
2029 NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRUN);
2030}
2031
2032void Assembler::sqrshrun(const VRegister& vd, const VRegister& vn, int shift) {
2033 DCHECK(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
2034 NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRUN);
2035}
2036
2037void Assembler::sqrshrun2(const VRegister& vd, const VRegister& vn, int shift) {
2038 DCHECK(vn.IsVector() && vd.IsQ());
2039 NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRUN);
2040}
2041
2042void Assembler::uqshrn(const VRegister& vd, const VRegister& vn, int shift) {
2043 DCHECK(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
2044 NEONShiftImmediateN(vd, vn, shift, NEON_UQSHRN);
2045}
2046
2047void Assembler::uqshrn2(const VRegister& vd, const VRegister& vn, int shift) {
2048 DCHECK(vn.IsVector() && vd.IsQ());
2049 NEONShiftImmediateN(vd, vn, shift, NEON_UQSHRN);
2050}
2051
2052void Assembler::uqrshrn(const VRegister& vd, const VRegister& vn, int shift) {
2053 DCHECK(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
2054 NEONShiftImmediateN(vd, vn, shift, NEON_UQRSHRN);
2055}
2056
2057void Assembler::uqrshrn2(const VRegister& vd, const VRegister& vn, int shift) {
2058 DCHECK(vn.IsVector() && vd.IsQ());
2059 NEONShiftImmediateN(vd, vn, shift, NEON_UQRSHRN);
2060}
2061
2062void Assembler::uaddw(const VRegister& vd, const VRegister& vn,
2063 const VRegister& vm) {
2064 DCHECK(vm.IsD());
2065 NEON3DifferentW(vd, vn, vm, NEON_UADDW);
2066}
2067
2068void Assembler::uaddw2(const VRegister& vd, const VRegister& vn,
2069 const VRegister& vm) {
2070 DCHECK(vm.IsQ());
2071 NEON3DifferentW(vd, vn, vm, NEON_UADDW2);
2072}
2073
2074void Assembler::saddw(const VRegister& vd, const VRegister& vn,
2075 const VRegister& vm) {
2076 DCHECK(vm.IsD());
2077 NEON3DifferentW(vd, vn, vm, NEON_SADDW);
2078}
2079
2080void Assembler::saddw2(const VRegister& vd, const VRegister& vn,
2081 const VRegister& vm) {
2082 DCHECK(vm.IsQ());
2083 NEON3DifferentW(vd, vn, vm, NEON_SADDW2);
2084}
2085
2086void Assembler::usubw(const VRegister& vd, const VRegister& vn,
2087 const VRegister& vm) {
2088 DCHECK(vm.IsD());
2089 NEON3DifferentW(vd, vn, vm, NEON_USUBW);
2090}
2091
2092void Assembler::usubw2(const VRegister& vd, const VRegister& vn,
2093 const VRegister& vm) {
2094 DCHECK(vm.IsQ());
2095 NEON3DifferentW(vd, vn, vm, NEON_USUBW2);
2096}
2097
2098void Assembler::ssubw(const VRegister& vd, const VRegister& vn,
2099 const VRegister& vm) {
2100 DCHECK(vm.IsD());
2101 NEON3DifferentW(vd, vn, vm, NEON_SSUBW);
2102}
2103
2104void Assembler::ssubw2(const VRegister& vd, const VRegister& vn,
2105 const VRegister& vm) {
2106 DCHECK(vm.IsQ());
2107 NEON3DifferentW(vd, vn, vm, NEON_SSUBW2);
2108}
2109
2110void Assembler::mov(const Register& rd, const Register& rm) {
2111 // Moves involving the stack pointer are encoded as add immediate with
2112 // second operand of zero. Otherwise, orr with first operand zr is
2113 // used.
2114 if (rd.IsSP() || rm.IsSP()) {
2115 add(rd, rm, 0);
2116 } else {
2117 orr(rd, AppropriateZeroRegFor(rd), rm);
2118 }
2119}
2120
2121void Assembler::ins(const VRegister& vd, int vd_index, const Register& rn) {
2122 // We support vd arguments of the form vd.VxT() or vd.T(), where x is the
2123 // number of lanes, and T is b, h, s or d.
2124 int lane_size = vd.LaneSizeInBytes();
2125 NEONFormatField format;
2126 switch (lane_size) {
2127 case 1:
2128 format = NEON_16B;
2129 DCHECK(rn.IsW());
2130 break;
2131 case 2:
2132 format = NEON_8H;
2133 DCHECK(rn.IsW());
2134 break;
2135 case 4:
2136 format = NEON_4S;
2137 DCHECK(rn.IsW());
2138 break;
2139 default:
2140 DCHECK_EQ(lane_size, 8);
2141 DCHECK(rn.IsX());
2142 format = NEON_2D;
2143 break;
2144 }
2145
2146 DCHECK((0 <= vd_index) &&
2147 (vd_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
2148 Emit(NEON_INS_GENERAL | ImmNEON5(format, vd_index) | Rn(rn) | Rd(vd));
2149}
2150
2151void Assembler::mov(const Register& rd, const VRegister& vn, int vn_index) {
2152 DCHECK_GE(vn.SizeInBytes(), 4);
2153 umov(rd, vn, vn_index);
2154}
2155
2156void Assembler::smov(const Register& rd, const VRegister& vn, int vn_index) {
2157 // We support vn arguments of the form vn.VxT() or vn.T(), where x is the
2158 // number of lanes, and T is b, h, s.
2159 int lane_size = vn.LaneSizeInBytes();
2160 NEONFormatField format;
2161 Instr q = 0;
2162 switch (lane_size) {
2163 case 1:
2164 format = NEON_16B;
2165 break;
2166 case 2:
2167 format = NEON_8H;
2168 break;
2169 default:
2170 DCHECK_EQ(lane_size, 4);
2171 DCHECK(rd.IsX());
2172 format = NEON_4S;
2173 break;
2174 }
2175 q = rd.IsW() ? 0 : NEON_Q;
2176 DCHECK((0 <= vn_index) &&
2177 (vn_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
2178 Emit(q | NEON_SMOV | ImmNEON5(format, vn_index) | Rn(vn) | Rd(rd));
2179}
2180
2181void Assembler::cls(const VRegister& vd, const VRegister& vn) {
2182 DCHECK(AreSameFormat(vd, vn));
2183 DCHECK(!vd.Is1D() && !vd.Is2D());
2184 Emit(VFormat(vn) | NEON_CLS | Rn(vn) | Rd(vd));
2185}
2186
2187void Assembler::clz(const VRegister& vd, const VRegister& vn) {
2188 DCHECK(AreSameFormat(vd, vn));
2189 DCHECK(!vd.Is1D() && !vd.Is2D());
2190 Emit(VFormat(vn) | NEON_CLZ | Rn(vn) | Rd(vd));
2191}
2192
2193void Assembler::cnt(const VRegister& vd, const VRegister& vn) {
2194 DCHECK(AreSameFormat(vd, vn));
2195 DCHECK(vd.Is8B() || vd.Is16B());
2196 Emit(VFormat(vn) | NEON_CNT | Rn(vn) | Rd(vd));
2197}
2198
2199void Assembler::rev16(const VRegister& vd, const VRegister& vn) {
2200 DCHECK(AreSameFormat(vd, vn));
2201 DCHECK(vd.Is8B() || vd.Is16B());
2202 Emit(VFormat(vn) | NEON_REV16 | Rn(vn) | Rd(vd));
2203}
2204
2205void Assembler::rev32(const VRegister& vd, const VRegister& vn) {
2206 DCHECK(AreSameFormat(vd, vn));
2207 DCHECK(vd.Is8B() || vd.Is16B() || vd.Is4H() || vd.Is8H());
2208 Emit(VFormat(vn) | NEON_REV32 | Rn(vn) | Rd(vd));
2209}
2210
2211void Assembler::rev64(const VRegister& vd, const VRegister& vn) {
2212 DCHECK(AreSameFormat(vd, vn));
2213 DCHECK(!vd.Is1D() && !vd.Is2D());
2214 Emit(VFormat(vn) | NEON_REV64 | Rn(vn) | Rd(vd));
2215}
2216
2217void Assembler::ursqrte(const VRegister& vd, const VRegister& vn) {
2218 DCHECK(AreSameFormat(vd, vn));
2219 DCHECK(vd.Is2S() || vd.Is4S());
2220 Emit(VFormat(vn) | NEON_URSQRTE | Rn(vn) | Rd(vd));
2221}
2222
2223void Assembler::urecpe(const VRegister& vd, const VRegister& vn) {
2224 DCHECK(AreSameFormat(vd, vn));
2225 DCHECK(vd.Is2S() || vd.Is4S());
2226 Emit(VFormat(vn) | NEON_URECPE | Rn(vn) | Rd(vd));
2227}
2228
2229void Assembler::NEONAddlp(const VRegister& vd, const VRegister& vn,
2230 NEON2RegMiscOp op) {
2231 DCHECK((op == NEON_SADDLP) || (op == NEON_UADDLP) || (op == NEON_SADALP) ||
2232 (op == NEON_UADALP));
2233
2234 DCHECK((vn.Is8B() && vd.Is4H()) || (vn.Is4H() && vd.Is2S()) ||
2235 (vn.Is2S() && vd.Is1D()) || (vn.Is16B() && vd.Is8H()) ||
2236 (vn.Is8H() && vd.Is4S()) || (vn.Is4S() && vd.Is2D()));
2237 Emit(VFormat(vn) | op | Rn(vn) | Rd(vd));
2238}
2239
2240void Assembler::saddlp(const VRegister& vd, const VRegister& vn) {
2241 NEONAddlp(vd, vn, NEON_SADDLP);
2242}
2243
2244void Assembler::uaddlp(const VRegister& vd, const VRegister& vn) {
2245 NEONAddlp(vd, vn, NEON_UADDLP);
2246}
2247
2248void Assembler::sadalp(const VRegister& vd, const VRegister& vn) {
2249 NEONAddlp(vd, vn, NEON_SADALP);
2250}
2251
2252void Assembler::uadalp(const VRegister& vd, const VRegister& vn) {
2253 NEONAddlp(vd, vn, NEON_UADALP);
2254}
2255
2256void Assembler::NEONAcrossLanesL(const VRegister& vd, const VRegister& vn,
2257 NEONAcrossLanesOp op) {
2258 DCHECK((vn.Is8B() && vd.Is1H()) || (vn.Is16B() && vd.Is1H()) ||
2259 (vn.Is4H() && vd.Is1S()) || (vn.Is8H() && vd.Is1S()) ||
2260 (vn.Is4S() && vd.Is1D()));
2261 Emit(VFormat(vn) | op | Rn(vn) | Rd(vd));
2262}
2263
2264void Assembler::saddlv(const VRegister& vd, const VRegister& vn) {
2265 NEONAcrossLanesL(vd, vn, NEON_SADDLV);
2266}
2267
2268void Assembler::uaddlv(const VRegister& vd, const VRegister& vn) {
2269 NEONAcrossLanesL(vd, vn, NEON_UADDLV);
2270}
2271
2272void Assembler::NEONAcrossLanes(const VRegister& vd, const VRegister& vn,
2273 NEONAcrossLanesOp op) {
2274 DCHECK((vn.Is8B() && vd.Is1B()) || (vn.Is16B() && vd.Is1B()) ||
2275 (vn.Is4H() && vd.Is1H()) || (vn.Is8H() && vd.Is1H()) ||
2276 (vn.Is4S() && vd.Is1S()));
2277 if ((op & NEONAcrossLanesFPFMask) == NEONAcrossLanesFPFixed) {
2278 Emit(FPFormat(vn) | op | Rn(vn) | Rd(vd));
2279 } else {
2280 Emit(VFormat(vn) | op | Rn(vn) | Rd(vd));
2281 }
2282}
2283
2284#define NEON_ACROSSLANES_LIST(V) \
2285 V(fmaxv, NEON_FMAXV, vd.Is1S()) \
2286 V(fminv, NEON_FMINV, vd.Is1S()) \
2287 V(fmaxnmv, NEON_FMAXNMV, vd.Is1S()) \
2288 V(fminnmv, NEON_FMINNMV, vd.Is1S()) \
2289 V(addv, NEON_ADDV, true) \
2290 V(smaxv, NEON_SMAXV, true) \
2291 V(sminv, NEON_SMINV, true) \
2292 V(umaxv, NEON_UMAXV, true) \
2293 V(uminv, NEON_UMINV, true)
2294
2295#define DEFINE_ASM_FUNC(FN, OP, AS) \
2296 void Assembler::FN(const VRegister& vd, const VRegister& vn) { \
2297 DCHECK(AS); \
2298 NEONAcrossLanes(vd, vn, OP); \
2299 }
2300NEON_ACROSSLANES_LIST(DEFINE_ASM_FUNC)
2301#undef DEFINE_ASM_FUNC
2302
2303void Assembler::mov(const VRegister& vd, int vd_index, const Register& rn) {
2304 ins(vd, vd_index, rn);
2305}
2306
2307void Assembler::umov(const Register& rd, const VRegister& vn, int vn_index) {
2308 // We support vn arguments of the form vn.VxT() or vn.T(), where x is the
2309 // number of lanes, and T is b, h, s or d.
2310 int lane_size = vn.LaneSizeInBytes();
2311 NEONFormatField format;
2312 Instr q = 0;
2313 switch (lane_size) {
2314 case 1:
2315 format = NEON_16B;
2316 DCHECK(rd.IsW());
2317 break;
2318 case 2:
2319 format = NEON_8H;
2320 DCHECK(rd.IsW());
2321 break;
2322 case 4:
2323 format = NEON_4S;
2324 DCHECK(rd.IsW());
2325 break;
2326 default:
2327 DCHECK_EQ(lane_size, 8);
2328 DCHECK(rd.IsX());
2329 format = NEON_2D;
2330 q = NEON_Q;
2331 break;
2332 }
2333
2334 DCHECK((0 <= vn_index) &&
2335 (vn_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
2336 Emit(q | NEON_UMOV | ImmNEON5(format, vn_index) | Rn(vn) | Rd(rd));
2337}
2338
2339void Assembler::mov(const VRegister& vd, const VRegister& vn, int vn_index) {
2340 DCHECK(vd.IsScalar());
2341 dup(vd, vn, vn_index);
2342}
2343
2344void Assembler::dup(const VRegister& vd, const Register& rn) {
2345 DCHECK(!vd.Is1D());
2346 DCHECK_EQ(vd.Is2D(), rn.IsX());
2347 Instr q = vd.IsD() ? 0 : NEON_Q;
2348 Emit(q | NEON_DUP_GENERAL | ImmNEON5(VFormat(vd), 0) | Rn(rn) | Rd(vd));
2349}
2350
2351void Assembler::ins(const VRegister& vd, int vd_index, const VRegister& vn,
2352 int vn_index) {
2353 DCHECK(AreSameFormat(vd, vn));
2354 // We support vd arguments of the form vd.VxT() or vd.T(), where x is the
2355 // number of lanes, and T is b, h, s or d.
2356 int lane_size = vd.LaneSizeInBytes();
2357 NEONFormatField format;
2358 switch (lane_size) {
2359 case 1:
2360 format = NEON_16B;
2361 break;
2362 case 2:
2363 format = NEON_8H;
2364 break;
2365 case 4:
2366 format = NEON_4S;
2367 break;
2368 default:
2369 DCHECK_EQ(lane_size, 8);
2370 format = NEON_2D;
2371 break;
2372 }
2373
2374 DCHECK((0 <= vd_index) &&
2375 (vd_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
2376 DCHECK((0 <= vn_index) &&
2377 (vn_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
2378 Emit(NEON_INS_ELEMENT | ImmNEON5(format, vd_index) |
2379 ImmNEON4(format, vn_index) | Rn(vn) | Rd(vd));
2380}
2381
2382void Assembler::NEONTable(const VRegister& vd, const VRegister& vn,
2383 const VRegister& vm, NEONTableOp op) {
2384 DCHECK(vd.Is16B() || vd.Is8B());
2385 DCHECK(vn.Is16B());
2386 DCHECK(AreSameFormat(vd, vm));
2387 Emit(op | (vd.IsQ() ? NEON_Q : 0) | Rm(vm) | Rn(vn) | Rd(vd));
2388}
2389
2390void Assembler::tbl(const VRegister& vd, const VRegister& vn,
2391 const VRegister& vm) {
2392 NEONTable(vd, vn, vm, NEON_TBL_1v);
2393}
2394
2395void Assembler::tbl(const VRegister& vd, const VRegister& vn,
2396 const VRegister& vn2, const VRegister& vm) {
2397 USE(vn2);
2398 DCHECK(AreSameFormat(vn, vn2));
2399 DCHECK(AreConsecutive(vn, vn2));
2400 NEONTable(vd, vn, vm, NEON_TBL_2v);
2401}
2402
2403void Assembler::tbl(const VRegister& vd, const VRegister& vn,
2404 const VRegister& vn2, const VRegister& vn3,
2405 const VRegister& vm) {
2406 USE(vn2);
2407 USE(vn3);
2408 DCHECK(AreSameFormat(vn, vn2, vn3));
2409 DCHECK(AreConsecutive(vn, vn2, vn3));
2410 NEONTable(vd, vn, vm, NEON_TBL_3v);
2411}
2412
2413void Assembler::tbl(const VRegister& vd, const VRegister& vn,
2414 const VRegister& vn2, const VRegister& vn3,
2415 const VRegister& vn4, const VRegister& vm) {
2416 USE(vn2);
2417 USE(vn3);
2418 USE(vn4);
2419 DCHECK(AreSameFormat(vn, vn2, vn3, vn4));
2420 DCHECK(AreConsecutive(vn, vn2, vn3, vn4));
2421 NEONTable(vd, vn, vm, NEON_TBL_4v);
2422}
2423
2424void Assembler::tbx(const VRegister& vd, const VRegister& vn,
2425 const VRegister& vm) {
2426 NEONTable(vd, vn, vm, NEON_TBX_1v);
2427}
2428
2429void Assembler::tbx(const VRegister& vd, const VRegister& vn,
2430 const VRegister& vn2, const VRegister& vm) {
2431 USE(vn2);
2432 DCHECK(AreSameFormat(vn, vn2));
2433 DCHECK(AreConsecutive(vn, vn2));
2434 NEONTable(vd, vn, vm, NEON_TBX_2v);
2435}
2436
2437void Assembler::tbx(const VRegister& vd, const VRegister& vn,
2438 const VRegister& vn2, const VRegister& vn3,
2439 const VRegister& vm) {
2440 USE(vn2);
2441 USE(vn3);
2442 DCHECK(AreSameFormat(vn, vn2, vn3));
2443 DCHECK(AreConsecutive(vn, vn2, vn3));
2444 NEONTable(vd, vn, vm, NEON_TBX_3v);
2445}
2446
2447void Assembler::tbx(const VRegister& vd, const VRegister& vn,
2448 const VRegister& vn2, const VRegister& vn3,
2449 const VRegister& vn4, const VRegister& vm) {
2450 USE(vn2);
2451 USE(vn3);
2452 USE(vn4);
2453 DCHECK(AreSameFormat(vn, vn2, vn3, vn4));
2454 DCHECK(AreConsecutive(vn, vn2, vn3, vn4));
2455 NEONTable(vd, vn, vm, NEON_TBX_4v);
2456}
2457
2458void Assembler::mov(const VRegister& vd, int vd_index, const VRegister& vn,
2459 int vn_index) {
2460 ins(vd, vd_index, vn, vn_index);
2461}
2462
2463void Assembler::mvn(const Register& rd, const Operand& operand) {
2464 orn(rd, AppropriateZeroRegFor(rd), operand);
2465}
2466
2467void Assembler::mrs(const Register& rt, SystemRegister sysreg) {
2468 DCHECK(rt.Is64Bits());
2469 Emit(MRS | ImmSystemRegister(sysreg) | Rt(rt));
2470}
2471
2472void Assembler::msr(SystemRegister sysreg, const Register& rt) {
2473 DCHECK(rt.Is64Bits());
2474 Emit(MSR | Rt(rt) | ImmSystemRegister(sysreg));
2475}
2476
2477void Assembler::hint(SystemHint code) { Emit(HINT | ImmHint(code) | Rt(xzr)); }
2478
2479// NEON structure loads and stores.
2480Instr Assembler::LoadStoreStructAddrModeField(const MemOperand& addr) {
2481 Instr addr_field = RnSP(addr.base());
2482
2483 if (addr.IsPostIndex()) {
2484 static_assert(NEONLoadStoreMultiStructPostIndex ==
2487 "Opcodes must match for NEON post index memop.");
2488
2490 if (addr.offset() == 0) {
2491 addr_field |= RmNot31(addr.regoffset());
2492 } else {
2493 // The immediate post index addressing mode is indicated by rm = 31.
2494 // The immediate is implied by the number of vector registers used.
2495 addr_field |= (0x1F << Rm_offset);
2496 }
2497 } else {
2498 DCHECK(addr.IsImmediateOffset() && (addr.offset() == 0));
2499 }
2500 return addr_field;
2501}
2502
2503void Assembler::LoadStoreStructVerify(const VRegister& vt,
2504 const MemOperand& addr, Instr op) {
2505#ifdef DEBUG
2506 // Assert that addressing mode is either offset (with immediate 0), post
2507 // index by immediate of the size of the register list, or post index by a
2508 // value in a core register.
2509 if (addr.IsImmediateOffset()) {
2510 DCHECK_EQ(addr.offset(), 0);
2511 } else {
2512 int offset = vt.SizeInBytes();
2513 switch (op) {
2514 case NEON_LD1_1v:
2515 case NEON_ST1_1v:
2516 offset *= 1;
2517 break;
2520 case NEON_LD1R:
2521 offset = (offset / vt.LaneCount()) * 1;
2522 break;
2523
2524 case NEON_LD1_2v:
2525 case NEON_ST1_2v:
2526 case NEON_LD2:
2527 case NEON_ST2:
2528 offset *= 2;
2529 break;
2532 case NEON_LD2R:
2533 offset = (offset / vt.LaneCount()) * 2;
2534 break;
2535
2536 case NEON_LD1_3v:
2537 case NEON_ST1_3v:
2538 case NEON_LD3:
2539 case NEON_ST3:
2540 offset *= 3;
2541 break;
2544 case NEON_LD3R:
2545 offset = (offset / vt.LaneCount()) * 3;
2546 break;
2547
2548 case NEON_LD1_4v:
2549 case NEON_ST1_4v:
2550 case NEON_LD4:
2551 case NEON_ST4:
2552 offset *= 4;
2553 break;
2556 case NEON_LD4R:
2557 offset = (offset / vt.LaneCount()) * 4;
2558 break;
2559 default:
2560 UNREACHABLE();
2561 }
2562 DCHECK(addr.regoffset() != NoReg || addr.offset() == offset);
2563 }
2564#else
2565 USE(vt);
2566 USE(addr);
2567 USE(op);
2568#endif
2569}
2570
2571void Assembler::LoadStoreStruct(const VRegister& vt, const MemOperand& addr,
2572 NEONLoadStoreMultiStructOp op) {
2573 LoadStoreStructVerify(vt, addr, op);
2574 DCHECK(vt.IsVector() || vt.Is1D());
2575 Emit(op | LoadStoreStructAddrModeField(addr) | LSVFormat(vt) | Rt(vt));
2576}
2577
2578void Assembler::LoadStoreStructSingleAllLanes(const VRegister& vt,
2579 const MemOperand& addr,
2580 NEONLoadStoreSingleStructOp op) {
2581 LoadStoreStructVerify(vt, addr, op);
2582 Emit(op | LoadStoreStructAddrModeField(addr) | LSVFormat(vt) | Rt(vt));
2583}
2584
2585void Assembler::ld1(const VRegister& vt, const MemOperand& src) {
2586 LoadStoreStruct(vt, src, NEON_LD1_1v);
2587}
2588
2589void Assembler::ld1(const VRegister& vt, const VRegister& vt2,
2590 const MemOperand& src) {
2591 USE(vt2);
2592 DCHECK(AreSameFormat(vt, vt2));
2593 DCHECK(AreConsecutive(vt, vt2));
2594 LoadStoreStruct(vt, src, NEON_LD1_2v);
2595}
2596
2597void Assembler::ld1(const VRegister& vt, const VRegister& vt2,
2598 const VRegister& vt3, const MemOperand& src) {
2599 USE(vt2);
2600 USE(vt3);
2601 DCHECK(AreSameFormat(vt, vt2, vt3));
2602 DCHECK(AreConsecutive(vt, vt2, vt3));
2603 LoadStoreStruct(vt, src, NEON_LD1_3v);
2604}
2605
2606void Assembler::ld1(const VRegister& vt, const VRegister& vt2,
2607 const VRegister& vt3, const VRegister& vt4,
2608 const MemOperand& src) {
2609 USE(vt2);
2610 USE(vt3);
2611 USE(vt4);
2612 DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
2613 DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
2614 LoadStoreStruct(vt, src, NEON_LD1_4v);
2615}
2616
2617void Assembler::ld2(const VRegister& vt, const VRegister& vt2,
2618 const MemOperand& src) {
2619 USE(vt2);
2620 DCHECK(AreSameFormat(vt, vt2));
2621 DCHECK(AreConsecutive(vt, vt2));
2622 LoadStoreStruct(vt, src, NEON_LD2);
2623}
2624
2625void Assembler::ld2(const VRegister& vt, const VRegister& vt2, int lane,
2626 const MemOperand& src) {
2627 USE(vt2);
2628 DCHECK(AreSameFormat(vt, vt2));
2629 DCHECK(AreConsecutive(vt, vt2));
2630 LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad2);
2631}
2632
2633void Assembler::ld2r(const VRegister& vt, const VRegister& vt2,
2634 const MemOperand& src) {
2635 USE(vt2);
2636 DCHECK(AreSameFormat(vt, vt2));
2637 DCHECK(AreConsecutive(vt, vt2));
2638 LoadStoreStructSingleAllLanes(vt, src, NEON_LD2R);
2639}
2640
2641void Assembler::ld3(const VRegister& vt, const VRegister& vt2,
2642 const VRegister& vt3, const MemOperand& src) {
2643 USE(vt2);
2644 USE(vt3);
2645 DCHECK(AreSameFormat(vt, vt2, vt3));
2646 DCHECK(AreConsecutive(vt, vt2, vt3));
2647 LoadStoreStruct(vt, src, NEON_LD3);
2648}
2649
2650void Assembler::ld3(const VRegister& vt, const VRegister& vt2,
2651 const VRegister& vt3, int lane, const MemOperand& src) {
2652 USE(vt2);
2653 USE(vt3);
2654 DCHECK(AreSameFormat(vt, vt2, vt3));
2655 DCHECK(AreConsecutive(vt, vt2, vt3));
2656 LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad3);
2657}
2658
2659void Assembler::ld3r(const VRegister& vt, const VRegister& vt2,
2660 const VRegister& vt3, const MemOperand& src) {
2661 USE(vt2);
2662 USE(vt3);
2663 DCHECK(AreSameFormat(vt, vt2, vt3));
2664 DCHECK(AreConsecutive(vt, vt2, vt3));
2665 LoadStoreStructSingleAllLanes(vt, src, NEON_LD3R);
2666}
2667
2668void Assembler::ld4(const VRegister& vt, const VRegister& vt2,
2669 const VRegister& vt3, const VRegister& vt4,
2670 const MemOperand& src) {
2671 USE(vt2);
2672 USE(vt3);
2673 USE(vt4);
2674 DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
2675 DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
2676 LoadStoreStruct(vt, src, NEON_LD4);
2677}
2678
2679void Assembler::ld4(const VRegister& vt, const VRegister& vt2,
2680 const VRegister& vt3, const VRegister& vt4, int lane,
2681 const MemOperand& src) {
2682 USE(vt2);
2683 USE(vt3);
2684 USE(vt4);
2685 DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
2686 DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
2687 LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad4);
2688}
2689
2690void Assembler::ld4r(const VRegister& vt, const VRegister& vt2,
2691 const VRegister& vt3, const VRegister& vt4,
2692 const MemOperand& src) {
2693 USE(vt2);
2694 USE(vt3);
2695 USE(vt4);
2696 DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
2697 DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
2698 LoadStoreStructSingleAllLanes(vt, src, NEON_LD4R);
2699}
2700
2701void Assembler::st1(const VRegister& vt, const MemOperand& src) {
2702 LoadStoreStruct(vt, src, NEON_ST1_1v);
2703}
2704
2705void Assembler::st1(const VRegister& vt, const VRegister& vt2,
2706 const MemOperand& src) {
2707 USE(vt2);
2708 DCHECK(AreSameFormat(vt, vt2));
2709 DCHECK(AreConsecutive(vt, vt2));
2710 LoadStoreStruct(vt, src, NEON_ST1_2v);
2711}
2712
2713void Assembler::st1(const VRegister& vt, const VRegister& vt2,
2714 const VRegister& vt3, const MemOperand& src) {
2715 USE(vt2);
2716 USE(vt3);
2717 DCHECK(AreSameFormat(vt, vt2, vt3));
2718 DCHECK(AreConsecutive(vt, vt2, vt3));
2719 LoadStoreStruct(vt, src, NEON_ST1_3v);
2720}
2721
2722void Assembler::st1(const VRegister& vt, const VRegister& vt2,
2723 const VRegister& vt3, const VRegister& vt4,
2724 const MemOperand& src) {
2725 USE(vt2);
2726 USE(vt3);
2727 USE(vt4);
2728 DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
2729 DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
2730 LoadStoreStruct(vt, src, NEON_ST1_4v);
2731}
2732
2733void Assembler::st2(const VRegister& vt, const VRegister& vt2,
2734 const MemOperand& dst) {
2735 USE(vt2);
2736 DCHECK(AreSameFormat(vt, vt2));
2737 DCHECK(AreConsecutive(vt, vt2));
2738 LoadStoreStruct(vt, dst, NEON_ST2);
2739}
2740
2741void Assembler::st2(const VRegister& vt, const VRegister& vt2, int lane,
2742 const MemOperand& dst) {
2743 USE(vt2);
2744 DCHECK(AreSameFormat(vt, vt2));
2745 DCHECK(AreConsecutive(vt, vt2));
2746 LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore2);
2747}
2748
2749void Assembler::st3(const VRegister& vt, const VRegister& vt2,
2750 const VRegister& vt3, const MemOperand& dst) {
2751 USE(vt2);
2752 USE(vt3);
2753 DCHECK(AreSameFormat(vt, vt2, vt3));
2754 DCHECK(AreConsecutive(vt, vt2, vt3));
2755 LoadStoreStruct(vt, dst, NEON_ST3);
2756}
2757
2758void Assembler::st3(const VRegister& vt, const VRegister& vt2,
2759 const VRegister& vt3, int lane, const MemOperand& dst) {
2760 USE(vt2);
2761 USE(vt3);
2762 DCHECK(AreSameFormat(vt, vt2, vt3));
2763 DCHECK(AreConsecutive(vt, vt2, vt3));
2764 LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore3);
2765}
2766
2767void Assembler::st4(const VRegister& vt, const VRegister& vt2,
2768 const VRegister& vt3, const VRegister& vt4,
2769 const MemOperand& dst) {
2770 USE(vt2);
2771 USE(vt3);
2772 USE(vt4);
2773 DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
2774 DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
2775 LoadStoreStruct(vt, dst, NEON_ST4);
2776}
2777
2778void Assembler::st4(const VRegister& vt, const VRegister& vt2,
2779 const VRegister& vt3, const VRegister& vt4, int lane,
2780 const MemOperand& dst) {
2781 USE(vt2);
2782 USE(vt3);
2783 USE(vt4);
2784 DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
2785 DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
2786 LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore4);
2787}
2788
2789void Assembler::LoadStoreStructSingle(const VRegister& vt, uint32_t lane,
2790 const MemOperand& addr,
2791 NEONLoadStoreSingleStructOp op) {
2792 LoadStoreStructVerify(vt, addr, op);
2793
2794 // We support vt arguments of the form vt.VxT() or vt.T(), where x is the
2795 // number of lanes, and T is b, h, s or d.
2796 unsigned lane_size = vt.LaneSizeInBytes();
2797 DCHECK_LT(lane, kQRegSize / lane_size);
2798
2799 // Lane size is encoded in the opcode field. Lane index is encoded in the Q,
2800 // S and size fields.
2801 lane *= lane_size;
2802
2803 // Encodings for S[0]/D[0] and S[2]/D[1] are distinguished using the least-
2804 // significant bit of the size field, so we increment lane here to account for
2805 // that.
2806 if (lane_size == 8) lane++;
2807
2808 Instr size = (lane << NEONLSSize_offset) & NEONLSSize_mask;
2809 Instr s = (lane << (NEONS_offset - 2)) & NEONS_mask;
2810 Instr q = (lane << (NEONQ_offset - 3)) & NEONQ_mask;
2811
2812 Instr instr = op;
2813 switch (lane_size) {
2814 case 1:
2816 break;
2817 case 2:
2819 break;
2820 case 4:
2822 break;
2823 default:
2824 DCHECK_EQ(lane_size, 8U);
2826 }
2827
2828 Emit(instr | LoadStoreStructAddrModeField(addr) | q | size | s | Rt(vt));
2829}
2830
2831void Assembler::ld1(const VRegister& vt, int lane, const MemOperand& src) {
2832 LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad1);
2833}
2834
2835void Assembler::ld1r(const VRegister& vt, const MemOperand& src) {
2836 LoadStoreStructSingleAllLanes(vt, src, NEON_LD1R);
2837}
2838
2839void Assembler::st1(const VRegister& vt, int lane, const MemOperand& dst) {
2840 LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore1);
2841}
2842
2843void Assembler::dmb(BarrierDomain domain, BarrierType type) {
2844 Emit(DMB | ImmBarrierDomain(domain) | ImmBarrierType(type));
2845}
2846
2847void Assembler::dsb(BarrierDomain domain, BarrierType type) {
2848 Emit(DSB | ImmBarrierDomain(domain) | ImmBarrierType(type));
2849}
2850
2851void Assembler::isb() {
2852 Emit(ISB | ImmBarrierDomain(FullSystem) | ImmBarrierType(BarrierAll));
2853}
2854
2855void Assembler::csdb() { hint(CSDB); }
2856
2857void Assembler::fmov(const VRegister& vd, double imm) {
2858 if (vd.IsScalar()) {
2859 DCHECK(vd.Is1D());
2860 Emit(FMOV_d_imm | Rd(vd) | ImmFP(imm));
2861 } else {
2862 DCHECK(vd.Is2D());
2864 Emit(NEON_Q | op | ImmNEONFP(imm) | NEONCmode(0xF) | Rd(vd));
2865 }
2866}
2867
2868void Assembler::fmov(const VRegister& vd, float imm) {
2869 if (vd.IsScalar()) {
2870 DCHECK(vd.Is1S());
2871 Emit(FMOV_s_imm | Rd(vd) | ImmFP(imm));
2872 } else {
2873 DCHECK(vd.Is2S() || vd.Is4S());
2875 Instr q = vd.Is4S() ? NEON_Q : 0;
2876 Emit(q | op | ImmNEONFP(imm) | NEONCmode(0xF) | Rd(vd));
2877 }
2878}
2879
2880void Assembler::fmov(const Register& rd, const VRegister& fn) {
2881 DCHECK_EQ(rd.SizeInBits(), fn.SizeInBits());
2882 FPIntegerConvertOp op = rd.Is32Bits() ? FMOV_ws : FMOV_xd;
2883 Emit(op | Rd(rd) | Rn(fn));
2884}
2885
2886void Assembler::fmov(const VRegister& vd, const Register& rn) {
2887 DCHECK_EQ(vd.SizeInBits(), rn.SizeInBits());
2888 FPIntegerConvertOp op = vd.Is32Bits() ? FMOV_sw : FMOV_dx;
2889 Emit(op | Rd(vd) | Rn(rn));
2890}
2891
2892void Assembler::fmov(const VRegister& vd, const VRegister& vn) {
2893 DCHECK_EQ(vd.SizeInBits(), vn.SizeInBits());
2894 Emit(FPType(vd) | FMOV | Rd(vd) | Rn(vn));
2895}
2896
2897void Assembler::fmov(const VRegister& vd, int index, const Register& rn) {
2898 DCHECK((index == 1) && vd.Is1D() && rn.IsX());
2899 USE(index);
2900 Emit(FMOV_d1_x | Rd(vd) | Rn(rn));
2901}
2902
2903void Assembler::fmov(const Register& rd, const VRegister& vn, int index) {
2904 DCHECK((index == 1) && vn.Is1D() && rd.IsX());
2905 USE(index);
2906 Emit(FMOV_x_d1 | Rd(rd) | Rn(vn));
2907}
2908
2909void Assembler::fmadd(const VRegister& fd, const VRegister& fn,
2910 const VRegister& fm, const VRegister& fa) {
2911 FPDataProcessing3Source(fd, fn, fm, fa, fd.Is32Bits() ? FMADD_s : FMADD_d);
2912}
2913
2914void Assembler::fmsub(const VRegister& fd, const VRegister& fn,
2915 const VRegister& fm, const VRegister& fa) {
2916 FPDataProcessing3Source(fd, fn, fm, fa, fd.Is32Bits() ? FMSUB_s : FMSUB_d);
2917}
2918
2919void Assembler::fnmadd(const VRegister& fd, const VRegister& fn,
2920 const VRegister& fm, const VRegister& fa) {
2921 FPDataProcessing3Source(fd, fn, fm, fa, fd.Is32Bits() ? FNMADD_s : FNMADD_d);
2922}
2923
2924void Assembler::fnmsub(const VRegister& fd, const VRegister& fn,
2925 const VRegister& fm, const VRegister& fa) {
2926 FPDataProcessing3Source(fd, fn, fm, fa, fd.Is32Bits() ? FNMSUB_s : FNMSUB_d);
2927}
2928
2929void Assembler::fnmul(const VRegister& vd, const VRegister& vn,
2930 const VRegister& vm) {
2931 DCHECK(AreSameSizeAndType(vd, vn, vm));
2932 Instr op = vd.Is1S() ? FNMUL_s : FNMUL_d;
2933 Emit(FPType(vd) | op | Rm(vm) | Rn(vn) | Rd(vd));
2934}
2935
2936void Assembler::fcmp(const VRegister& fn, const VRegister& fm) {
2937 DCHECK_EQ(fn.SizeInBits(), fm.SizeInBits());
2938 Emit(FPType(fn) | FCMP | Rm(fm) | Rn(fn));
2939}
2940
2941void Assembler::fcmp(const VRegister& fn, double value) {
2942 USE(value);
2943 // Although the fcmp instruction can strictly only take an immediate value of
2944 // +0.0, we don't need to check for -0.0 because the sign of 0.0 doesn't
2945 // affect the result of the comparison.
2946 DCHECK_EQ(value, 0.0);
2947 Emit(FPType(fn) | FCMP_zero | Rn(fn));
2948}
2949
2950void Assembler::fccmp(const VRegister& fn, const VRegister& fm,
2951 StatusFlags nzcv, Condition cond) {
2952 DCHECK_EQ(fn.SizeInBits(), fm.SizeInBits());
2953 Emit(FPType(fn) | FCCMP | Rm(fm) | Cond(cond) | Rn(fn) | Nzcv(nzcv));
2954}
2955
2956void Assembler::fcsel(const VRegister& fd, const VRegister& fn,
2957 const VRegister& fm, Condition cond) {
2958 DCHECK_EQ(fd.SizeInBits(), fn.SizeInBits());
2959 DCHECK_EQ(fd.SizeInBits(), fm.SizeInBits());
2960 Emit(FPType(fd) | FCSEL | Rm(fm) | Cond(cond) | Rn(fn) | Rd(fd));
2961}
2962
2963void Assembler::NEONFPConvertToInt(const Register& rd, const VRegister& vn,
2964 Instr op) {
2965 Emit(SF(rd) | FPType(vn) | op | Rn(vn) | Rd(rd));
2966}
2967
2968void Assembler::NEONFPConvertToInt(const VRegister& vd, const VRegister& vn,
2969 Instr op) {
2970 if (vn.IsScalar()) {
2971 DCHECK((vd.Is1S() && vn.Is1S()) || (vd.Is1D() && vn.Is1D()));
2972 op |= NEON_Q | NEONScalar;
2973 }
2974 Emit(FPFormat(vn) | op | Rn(vn) | Rd(vd));
2975}
2976
2977void Assembler::fcvt(const VRegister& vd, const VRegister& vn) {
2979 if (vd.Is1D()) {
2980 DCHECK(vn.Is1S() || vn.Is1H());
2981 op = vn.Is1S() ? FCVT_ds : FCVT_dh;
2982 } else if (vd.Is1S()) {
2983 DCHECK(vn.Is1D() || vn.Is1H());
2984 op = vn.Is1D() ? FCVT_sd : FCVT_sh;
2985 } else {
2986 DCHECK(vd.Is1H());
2987 DCHECK(vn.Is1D() || vn.Is1S());
2988 op = vn.Is1D() ? FCVT_hd : FCVT_hs;
2989 }
2990 FPDataProcessing1Source(vd, vn, op);
2991}
2992
2993void Assembler::fcvtl(const VRegister& vd, const VRegister& vn) {
2994 DCHECK((vd.Is4S() && vn.Is4H()) || (vd.Is2D() && vn.Is2S()));
2995 Instr format = vd.Is2D() ? (1 << NEONSize_offset) : 0;
2996 Emit(format | NEON_FCVTL | Rn(vn) | Rd(vd));
2997}
2998
2999void Assembler::fcvtl2(const VRegister& vd, const VRegister& vn) {
3000 DCHECK((vd.Is4S() && vn.Is8H()) || (vd.Is2D() && vn.Is4S()));
3001 Instr format = vd.Is2D() ? (1 << NEONSize_offset) : 0;
3002 Emit(NEON_Q | format | NEON_FCVTL | Rn(vn) | Rd(vd));
3003}
3004
3005void Assembler::fcvtn(const VRegister& vd, const VRegister& vn) {
3006 DCHECK((vn.Is4S() && vd.Is4H()) || (vn.Is2D() && vd.Is2S()));
3007 Instr format = vn.Is2D() ? (1 << NEONSize_offset) : 0;
3008 Emit(format | NEON_FCVTN | Rn(vn) | Rd(vd));
3009}
3010
3011void Assembler::fcvtn2(const VRegister& vd, const VRegister& vn) {
3012 DCHECK((vn.Is4S() && vd.Is8H()) || (vn.Is2D() && vd.Is4S()));
3013 Instr format = vn.Is2D() ? (1 << NEONSize_offset) : 0;
3014 Emit(NEON_Q | format | NEON_FCVTN | Rn(vn) | Rd(vd));
3015}
3016
3017void Assembler::fcvtxn(const VRegister& vd, const VRegister& vn) {
3018 Instr format = 1 << NEONSize_offset;
3019 if (vd.IsScalar()) {
3020 DCHECK(vd.Is1S() && vn.Is1D());
3021 Emit(format | NEON_FCVTXN_scalar | Rn(vn) | Rd(vd));
3022 } else {
3023 DCHECK(vd.Is2S() && vn.Is2D());
3024 Emit(format | NEON_FCVTXN | Rn(vn) | Rd(vd));
3025 }
3026}
3027
3028void Assembler::fcvtxn2(const VRegister& vd, const VRegister& vn) {
3029 DCHECK(vd.Is4S() && vn.Is2D());
3030 Instr format = 1 << NEONSize_offset;
3031 Emit(NEON_Q | format | NEON_FCVTXN | Rn(vn) | Rd(vd));
3032}
3033
3034void Assembler::fjcvtzs(const Register& rd, const VRegister& vn) {
3035 DCHECK(rd.IsW() && vn.Is1D());
3036 Emit(FJCVTZS | Rn(vn) | Rd(rd));
3037}
3038
3039#define NEON_FP2REGMISC_FCVT_LIST(V) \
3040 V(fcvtnu, NEON_FCVTNU, FCVTNU) \
3041 V(fcvtns, NEON_FCVTNS, FCVTNS) \
3042 V(fcvtpu, NEON_FCVTPU, FCVTPU) \
3043 V(fcvtps, NEON_FCVTPS, FCVTPS) \
3044 V(fcvtmu, NEON_FCVTMU, FCVTMU) \
3045 V(fcvtms, NEON_FCVTMS, FCVTMS) \
3046 V(fcvtau, NEON_FCVTAU, FCVTAU) \
3047 V(fcvtas, NEON_FCVTAS, FCVTAS)
3048
3049#define DEFINE_ASM_FUNCS(FN, VEC_OP, SCA_OP) \
3050 void Assembler::FN(const Register& rd, const VRegister& vn) { \
3051 NEONFPConvertToInt(rd, vn, SCA_OP); \
3052 } \
3053 void Assembler::FN(const VRegister& vd, const VRegister& vn) { \
3054 NEONFPConvertToInt(vd, vn, VEC_OP); \
3055 }
3056NEON_FP2REGMISC_FCVT_LIST(DEFINE_ASM_FUNCS)
3057#undef DEFINE_ASM_FUNCS
3058
3059void Assembler::scvtf(const VRegister& vd, const VRegister& vn, int fbits) {
3060 DCHECK_GE(fbits, 0);
3061 if (fbits == 0) {
3062 NEONFP2RegMisc(vd, vn, NEON_SCVTF, 0.0);
3063 } else {
3064 DCHECK(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S());
3065 NEONShiftRightImmediate(vd, vn, fbits, NEON_SCVTF_imm);
3066 }
3067}
3068
3069void Assembler::ucvtf(const VRegister& vd, const VRegister& vn, int fbits) {
3070 DCHECK_GE(fbits, 0);
3071 if (fbits == 0) {
3072 NEONFP2RegMisc(vd, vn, NEON_UCVTF, 0.0);
3073 } else {
3074 DCHECK(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S());
3075 NEONShiftRightImmediate(vd, vn, fbits, NEON_UCVTF_imm);
3076 }
3077}
3078
3079void Assembler::scvtf(const VRegister& vd, const Register& rn, int fbits) {
3080 DCHECK_GE(fbits, 0);
3081 if (fbits == 0) {
3082 Emit(SF(rn) | FPType(vd) | SCVTF | Rn(rn) | Rd(vd));
3083 } else {
3084 Emit(SF(rn) | FPType(vd) | SCVTF_fixed | FPScale(64 - fbits) | Rn(rn) |
3085 Rd(vd));
3086 }
3087}
3088
3089void Assembler::ucvtf(const VRegister& fd, const Register& rn, int fbits) {
3090 DCHECK_GE(fbits, 0);
3091 if (fbits == 0) {
3092 Emit(SF(rn) | FPType(fd) | UCVTF | Rn(rn) | Rd(fd));
3093 } else {
3094 Emit(SF(rn) | FPType(fd) | UCVTF_fixed | FPScale(64 - fbits) | Rn(rn) |
3095 Rd(fd));
3096 }
3097}
3098
3099void Assembler::NEON3Same(const VRegister& vd, const VRegister& vn,
3100 const VRegister& vm, NEON3SameOp vop) {
3101 DCHECK(AreSameFormat(vd, vn, vm));
3102 DCHECK(vd.IsVector() || !vd.IsQ());
3103
3104 Instr format, op = vop;
3105 if (vd.IsScalar()) {
3106 op |= NEON_Q | NEONScalar;
3107 format = SFormat(vd);
3108 } else {
3109 format = VFormat(vd);
3110 }
3111
3112 Emit(format | op | Rm(vm) | Rn(vn) | Rd(vd));
3113}
3114
3115void Assembler::NEONFP3Same(const VRegister& vd, const VRegister& vn,
3116 const VRegister& vm, Instr op) {
3117 DCHECK(AreSameFormat(vd, vn, vm));
3118 if (vd.Is4H() || vd.Is8H()) {
3119 op |= NEON_sz;
3120 op ^= NEON3SameHPMask;
3121 }
3122 Emit(FPFormat(vd) | op | Rm(vm) | Rn(vn) | Rd(vd));
3123}
3124
3125#define NEON_FP2REGMISC_LIST(V) \
3126 V(fabs, NEON_FABS, FABS) \
3127 V(fneg, NEON_FNEG, FNEG) \
3128 V(fsqrt, NEON_FSQRT, FSQRT) \
3129 V(frintn, NEON_FRINTN, FRINTN) \
3130 V(frinta, NEON_FRINTA, FRINTA) \
3131 V(frintp, NEON_FRINTP, FRINTP) \
3132 V(frintm, NEON_FRINTM, FRINTM) \
3133 V(frintx, NEON_FRINTX, FRINTX) \
3134 V(frintz, NEON_FRINTZ, FRINTZ) \
3135 V(frinti, NEON_FRINTI, FRINTI) \
3136 V(frsqrte, NEON_FRSQRTE, NEON_FRSQRTE_scalar) \
3137 V(frecpe, NEON_FRECPE, NEON_FRECPE_scalar)
3138
3139#define DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP) \
3140 void Assembler::FN(const VRegister& vd, const VRegister& vn) { \
3141 if (vd.IsScalar()) { \
3142 DCHECK(vd.Is1S() || vd.Is1D()); \
3143 NEONFP2RegMisc(vd, vn, SCA_OP); \
3144 } else { \
3145 NEONFP2RegMisc(vd, vn, VEC_OP, 0.0); \
3146 } \
3147 }
3148NEON_FP2REGMISC_LIST(DEFINE_ASM_FUNC)
3149#undef DEFINE_ASM_FUNC
3150
3151void Assembler::shll(const VRegister& vd, const VRegister& vn, int shift) {
3152 DCHECK((vd.Is8H() && vn.Is8B() && shift == 8) ||
3153 (vd.Is4S() && vn.Is4H() && shift == 16) ||
3154 (vd.Is2D() && vn.Is2S() && shift == 32));
3155 USE(shift);
3156 Emit(VFormat(vn) | NEON_SHLL | Rn(vn) | Rd(vd));
3157}
3158
3159void Assembler::shll2(const VRegister& vd, const VRegister& vn, int shift) {
3160 USE(shift);
3161 DCHECK((vd.Is8H() && vn.Is16B() && shift == 8) ||
3162 (vd.Is4S() && vn.Is8H() && shift == 16) ||
3163 (vd.Is2D() && vn.Is4S() && shift == 32));
3164 Emit(VFormat(vn) | NEON_SHLL | Rn(vn) | Rd(vd));
3165}
3166
3167void Assembler::NEONFP2RegMisc(const VRegister& vd, const VRegister& vn,
3168 NEON2RegMiscOp vop, double value) {
3169 DCHECK(AreSameFormat(vd, vn));
3170 DCHECK_EQ(value, 0.0);
3171 USE(value);
3172
3173 Instr op = vop;
3174 if (vd.IsScalar()) {
3175 DCHECK(vd.Is1S() || vd.Is1D());
3176 op |= NEON_Q | NEONScalar;
3177 } else if (vd.Is4H() || vd.Is8H()) {
3179 } else {
3180 DCHECK(vd.Is2S() || vd.Is2D() || vd.Is4S());
3181 }
3182
3183 Emit(FPFormat(vd) | op | Rn(vn) | Rd(vd));
3184}
3185
3186void Assembler::fcmeq(const VRegister& vd, const VRegister& vn, double value) {
3187 NEONFP2RegMisc(vd, vn, NEON_FCMEQ_zero, value);
3188}
3189
3190void Assembler::fcmge(const VRegister& vd, const VRegister& vn, double value) {
3191 NEONFP2RegMisc(vd, vn, NEON_FCMGE_zero, value);
3192}
3193
3194void Assembler::fcmgt(const VRegister& vd, const VRegister& vn, double value) {
3195 NEONFP2RegMisc(vd, vn, NEON_FCMGT_zero, value);
3196}
3197
3198void Assembler::fcmle(const VRegister& vd, const VRegister& vn, double value) {
3199 NEONFP2RegMisc(vd, vn, NEON_FCMLE_zero, value);
3200}
3201
3202void Assembler::fcmlt(const VRegister& vd, const VRegister& vn, double value) {
3203 NEONFP2RegMisc(vd, vn, NEON_FCMLT_zero, value);
3204}
3205
3206void Assembler::frecpx(const VRegister& vd, const VRegister& vn) {
3207 DCHECK(vd.IsScalar());
3208 DCHECK(AreSameFormat(vd, vn));
3209 DCHECK(vd.Is1S() || vd.Is1D());
3210 Emit(FPFormat(vd) | NEON_FRECPX_scalar | Rn(vn) | Rd(vd));
3211}
3212
3213void Assembler::fcvtzs(const Register& rd, const VRegister& vn, int fbits) {
3214 DCHECK(vn.Is1S() || vn.Is1D());
3215 DCHECK((fbits >= 0) && (fbits <= rd.SizeInBits()));
3216 if (fbits == 0) {
3217 Emit(SF(rd) | FPType(vn) | FCVTZS | Rn(vn) | Rd(rd));
3218 } else {
3219 Emit(SF(rd) | FPType(vn) | FCVTZS_fixed | FPScale(64 - fbits) | Rn(vn) |
3220 Rd(rd));
3221 }
3222}
3223
3224void Assembler::fcvtzs(const VRegister& vd, const VRegister& vn, int fbits) {
3225 DCHECK_GE(fbits, 0);
3226 if (fbits == 0) {
3227 NEONFP2RegMisc(vd, vn, NEON_FCVTZS, 0.0);
3228 } else {
3229 DCHECK(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S());
3230 NEONShiftRightImmediate(vd, vn, fbits, NEON_FCVTZS_imm);
3231 }
3232}
3233
3234void Assembler::fcvtzu(const Register& rd, const VRegister& vn, int fbits) {
3235 DCHECK(vn.Is1S() || vn.Is1D());
3236 DCHECK((fbits >= 0) && (fbits <= rd.SizeInBits()));
3237 if (fbits == 0) {
3238 Emit(SF(rd) | FPType(vn) | FCVTZU | Rn(vn) | Rd(rd));
3239 } else {
3240 Emit(SF(rd) | FPType(vn) | FCVTZU_fixed | FPScale(64 - fbits) | Rn(vn) |
3241 Rd(rd));
3242 }
3243}
3244
3245void Assembler::fcvtzu(const VRegister& vd, const VRegister& vn, int fbits) {
3246 DCHECK_GE(fbits, 0);
3247 if (fbits == 0) {
3248 NEONFP2RegMisc(vd, vn, NEON_FCVTZU, 0.0);
3249 } else {
3250 DCHECK(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S());
3251 NEONShiftRightImmediate(vd, vn, fbits, NEON_FCVTZU_imm);
3252 }
3253}
3254
3255void Assembler::NEONFP2RegMisc(const VRegister& vd, const VRegister& vn,
3256 Instr op) {
3257 DCHECK(AreSameFormat(vd, vn));
3258 Emit(FPFormat(vd) | op | Rn(vn) | Rd(vd));
3259}
3260
3261void Assembler::NEON2RegMisc(const VRegister& vd, const VRegister& vn,
3262 NEON2RegMiscOp vop, int value) {
3263 DCHECK(AreSameFormat(vd, vn));
3264 DCHECK_EQ(value, 0);
3265 USE(value);
3266
3267 Instr format, op = vop;
3268 if (vd.IsScalar()) {
3269 op |= NEON_Q | NEONScalar;
3270 format = SFormat(vd);
3271 } else {
3272 format = VFormat(vd);
3273 }
3274
3275 Emit(format | op | Rn(vn) | Rd(vd));
3276}
3277
3278void Assembler::cmeq(const VRegister& vd, const VRegister& vn, int value) {
3279 DCHECK(vd.IsVector() || vd.Is1D());
3280 NEON2RegMisc(vd, vn, NEON_CMEQ_zero, value);
3281}
3282
3283void Assembler::cmge(const VRegister& vd, const VRegister& vn, int value) {
3284 DCHECK(vd.IsVector() || vd.Is1D());
3285 NEON2RegMisc(vd, vn, NEON_CMGE_zero, value);
3286}
3287
3288void Assembler::cmgt(const VRegister& vd, const VRegister& vn, int value) {
3289 DCHECK(vd.IsVector() || vd.Is1D());
3290 NEON2RegMisc(vd, vn, NEON_CMGT_zero, value);
3291}
3292
3293void Assembler::cmle(const VRegister& vd, const VRegister& vn, int value) {
3294 DCHECK(vd.IsVector() || vd.Is1D());
3295 NEON2RegMisc(vd, vn, NEON_CMLE_zero, value);
3296}
3297
3298void Assembler::cmlt(const VRegister& vd, const VRegister& vn, int value) {
3299 DCHECK(vd.IsVector() || vd.Is1D());
3300 NEON2RegMisc(vd, vn, NEON_CMLT_zero, value);
3301}
3302
3303#define NEON_3SAME_LIST(V) \
3304 V(add, NEON_ADD, vd.IsVector() || vd.Is1D()) \
3305 V(addp, NEON_ADDP, vd.IsVector() || vd.Is1D()) \
3306 V(sub, NEON_SUB, vd.IsVector() || vd.Is1D()) \
3307 V(cmeq, NEON_CMEQ, vd.IsVector() || vd.Is1D()) \
3308 V(cmge, NEON_CMGE, vd.IsVector() || vd.Is1D()) \
3309 V(cmgt, NEON_CMGT, vd.IsVector() || vd.Is1D()) \
3310 V(cmhi, NEON_CMHI, vd.IsVector() || vd.Is1D()) \
3311 V(cmhs, NEON_CMHS, vd.IsVector() || vd.Is1D()) \
3312 V(cmtst, NEON_CMTST, vd.IsVector() || vd.Is1D()) \
3313 V(sshl, NEON_SSHL, vd.IsVector() || vd.Is1D()) \
3314 V(ushl, NEON_USHL, vd.IsVector() || vd.Is1D()) \
3315 V(srshl, NEON_SRSHL, vd.IsVector() || vd.Is1D()) \
3316 V(urshl, NEON_URSHL, vd.IsVector() || vd.Is1D()) \
3317 V(sqdmulh, NEON_SQDMULH, vd.IsLaneSizeH() || vd.IsLaneSizeS()) \
3318 V(sqrdmulh, NEON_SQRDMULH, vd.IsLaneSizeH() || vd.IsLaneSizeS()) \
3319 V(shadd, NEON_SHADD, vd.IsVector() && !vd.IsLaneSizeD()) \
3320 V(uhadd, NEON_UHADD, vd.IsVector() && !vd.IsLaneSizeD()) \
3321 V(srhadd, NEON_SRHADD, vd.IsVector() && !vd.IsLaneSizeD()) \
3322 V(urhadd, NEON_URHADD, vd.IsVector() && !vd.IsLaneSizeD()) \
3323 V(shsub, NEON_SHSUB, vd.IsVector() && !vd.IsLaneSizeD()) \
3324 V(uhsub, NEON_UHSUB, vd.IsVector() && !vd.IsLaneSizeD()) \
3325 V(smax, NEON_SMAX, vd.IsVector() && !vd.IsLaneSizeD()) \
3326 V(smaxp, NEON_SMAXP, vd.IsVector() && !vd.IsLaneSizeD()) \
3327 V(smin, NEON_SMIN, vd.IsVector() && !vd.IsLaneSizeD()) \
3328 V(sminp, NEON_SMINP, vd.IsVector() && !vd.IsLaneSizeD()) \
3329 V(umax, NEON_UMAX, vd.IsVector() && !vd.IsLaneSizeD()) \
3330 V(umaxp, NEON_UMAXP, vd.IsVector() && !vd.IsLaneSizeD()) \
3331 V(umin, NEON_UMIN, vd.IsVector() && !vd.IsLaneSizeD()) \
3332 V(uminp, NEON_UMINP, vd.IsVector() && !vd.IsLaneSizeD()) \
3333 V(saba, NEON_SABA, vd.IsVector() && !vd.IsLaneSizeD()) \
3334 V(sabd, NEON_SABD, vd.IsVector() && !vd.IsLaneSizeD()) \
3335 V(uaba, NEON_UABA, vd.IsVector() && !vd.IsLaneSizeD()) \
3336 V(uabd, NEON_UABD, vd.IsVector() && !vd.IsLaneSizeD()) \
3337 V(mla, NEON_MLA, vd.IsVector() && !vd.IsLaneSizeD()) \
3338 V(mls, NEON_MLS, vd.IsVector() && !vd.IsLaneSizeD()) \
3339 V(mul, NEON_MUL, vd.IsVector() && !vd.IsLaneSizeD()) \
3340 V(and_, NEON_AND, vd.Is8B() || vd.Is16B()) \
3341 V(orr, NEON_ORR, vd.Is8B() || vd.Is16B()) \
3342 V(orn, NEON_ORN, vd.Is8B() || vd.Is16B()) \
3343 V(eor, NEON_EOR, vd.Is8B() || vd.Is16B()) \
3344 V(bic, NEON_BIC, vd.Is8B() || vd.Is16B()) \
3345 V(bit, NEON_BIT, vd.Is8B() || vd.Is16B()) \
3346 V(bif, NEON_BIF, vd.Is8B() || vd.Is16B()) \
3347 V(bsl, NEON_BSL, vd.Is8B() || vd.Is16B()) \
3348 V(pmul, NEON_PMUL, vd.Is8B() || vd.Is16B()) \
3349 V(uqadd, NEON_UQADD, true) \
3350 V(sqadd, NEON_SQADD, true) \
3351 V(uqsub, NEON_UQSUB, true) \
3352 V(sqsub, NEON_SQSUB, true) \
3353 V(sqshl, NEON_SQSHL, true) \
3354 V(uqshl, NEON_UQSHL, true) \
3355 V(sqrshl, NEON_SQRSHL, true) \
3356 V(uqrshl, NEON_UQRSHL, true)
3357
3358#define DEFINE_ASM_FUNC(FN, OP, AS) \
3359 void Assembler::FN(const VRegister& vd, const VRegister& vn, \
3360 const VRegister& vm) { \
3361 DCHECK(AS); \
3362 NEON3Same(vd, vn, vm, OP); \
3363 }
3364NEON_3SAME_LIST(DEFINE_ASM_FUNC)
3365#undef DEFINE_ASM_FUNC
3366
3367#define NEON_FP3SAME_LIST_V2(V) \
3368 V(fadd, NEON_FADD, FADD) \
3369 V(fsub, NEON_FSUB, FSUB) \
3370 V(fmul, NEON_FMUL, FMUL) \
3371 V(fdiv, NEON_FDIV, FDIV) \
3372 V(fmax, NEON_FMAX, FMAX) \
3373 V(fmaxnm, NEON_FMAXNM, FMAXNM) \
3374 V(fmin, NEON_FMIN, FMIN) \
3375 V(fminnm, NEON_FMINNM, FMINNM) \
3376 V(fmulx, NEON_FMULX, NEON_FMULX_scalar) \
3377 V(frecps, NEON_FRECPS, NEON_FRECPS_scalar) \
3378 V(frsqrts, NEON_FRSQRTS, NEON_FRSQRTS_scalar) \
3379 V(fabd, NEON_FABD, NEON_FABD_scalar) \
3380 V(fmla, NEON_FMLA, 0) \
3381 V(fmls, NEON_FMLS, 0) \
3382 V(facge, NEON_FACGE, NEON_FACGE_scalar) \
3383 V(facgt, NEON_FACGT, NEON_FACGT_scalar) \
3384 V(fcmeq, NEON_FCMEQ, NEON_FCMEQ_scalar) \
3385 V(fcmge, NEON_FCMGE, NEON_FCMGE_scalar) \
3386 V(fcmgt, NEON_FCMGT, NEON_FCMGT_scalar) \
3387 V(faddp, NEON_FADDP, 0) \
3388 V(fmaxp, NEON_FMAXP, 0) \
3389 V(fminp, NEON_FMINP, 0) \
3390 V(fmaxnmp, NEON_FMAXNMP, 0) \
3391 V(fminnmp, NEON_FMINNMP, 0)
3392
3393#define DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP) \
3394 void Assembler::FN(const VRegister& vd, const VRegister& vn, \
3395 const VRegister& vm) { \
3396 Instr op; \
3397 if ((SCA_OP != 0) && vd.IsScalar()) { \
3398 DCHECK(vd.Is1S() || vd.Is1D()); \
3399 op = SCA_OP; \
3400 } else { \
3401 DCHECK(vd.IsVector()); \
3402 DCHECK(vd.Is2S() || vd.Is2D() || vd.Is4S() || vd.Is4H() || vd.Is8H()); \
3403 op = VEC_OP; \
3404 } \
3405 NEONFP3Same(vd, vn, vm, op); \
3406 }
3407NEON_FP3SAME_LIST_V2(DEFINE_ASM_FUNC)
3408#undef DEFINE_ASM_FUNC
3409
3410void Assembler::bcax(const VRegister& vd, const VRegister& vn,
3411 const VRegister& vm, const VRegister& va) {
3412 DCHECK(IsEnabled(SHA3));
3413 DCHECK(vd.Is16B() && vn.Is16B() && vm.Is16B());
3414 Emit(NEON_BCAX | Rd(vd) | Rn(vn) | Rm(vm) | Ra(va));
3415}
3416
3417void Assembler::eor3(const VRegister& vd, const VRegister& vn,
3418 const VRegister& vm, const VRegister& va) {
3419 DCHECK(IsEnabled(SHA3));
3420 DCHECK(vd.Is16B() && vn.Is16B() && vm.Is16B() && va.Is16B());
3421 Emit(NEON_EOR3 | Rd(vd) | Rn(vn) | Rm(vm) | Ra(va));
3422}
3423
3424void Assembler::addp(const VRegister& vd, const VRegister& vn) {
3425 DCHECK((vd.Is1D() && vn.Is2D()));
3426 Emit(SFormat(vd) | NEON_ADDP_scalar | Rn(vn) | Rd(vd));
3427}
3428
3429void Assembler::faddp(const VRegister& vd, const VRegister& vn) {
3430 DCHECK((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()));
3431 Emit(FPFormat(vd) | NEON_FADDP_scalar | Rn(vn) | Rd(vd));
3432}
3433
3434void Assembler::fmaxp(const VRegister& vd, const VRegister& vn) {
3435 DCHECK((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()));
3436 Emit(FPFormat(vd) | NEON_FMAXP_scalar | Rn(vn) | Rd(vd));
3437}
3438
3439void Assembler::fminp(const VRegister& vd, const VRegister& vn) {
3440 DCHECK((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()));
3441 Emit(FPFormat(vd) | NEON_FMINP_scalar | Rn(vn) | Rd(vd));
3442}
3443
3444void Assembler::fmaxnmp(const VRegister& vd, const VRegister& vn) {
3445 DCHECK((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()));
3446 Emit(FPFormat(vd) | NEON_FMAXNMP_scalar | Rn(vn) | Rd(vd));
3447}
3448
3449void Assembler::fminnmp(const VRegister& vd, const VRegister& vn) {
3450 DCHECK((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()));
3451 Emit(FPFormat(vd) | NEON_FMINNMP_scalar | Rn(vn) | Rd(vd));
3452}
3453
3454void Assembler::orr(const VRegister& vd, const int imm8, const int left_shift) {
3455 NEONModifiedImmShiftLsl(vd, imm8, left_shift, NEONModifiedImmediate_ORR);
3456}
3457
3458void Assembler::mov(const VRegister& vd, const VRegister& vn) {
3459 DCHECK(AreSameFormat(vd, vn));
3460 if (vd.IsD()) {
3461 orr(vd.V8B(), vn.V8B(), vn.V8B());
3462 } else {
3463 DCHECK(vd.IsQ());
3464 orr(vd.V16B(), vn.V16B(), vn.V16B());
3465 }
3466}
3467
3468void Assembler::bic(const VRegister& vd, const int imm8, const int left_shift) {
3469 NEONModifiedImmShiftLsl(vd, imm8, left_shift, NEONModifiedImmediate_BIC);
3470}
3471
3472void Assembler::movi(const VRegister& vd, const uint64_t imm, Shift shift,
3473 const int shift_amount) {
3474 DCHECK((shift == LSL) || (shift == MSL));
3475 if (vd.Is2D() || vd.Is1D()) {
3476 DCHECK_EQ(shift_amount, 0);
3477 int imm8 = 0;
3478 for (int i = 0; i < 8; ++i) {
3479 int byte = (imm >> (i * 8)) & 0xFF;
3480 DCHECK((byte == 0) || (byte == 0xFF));
3481 if (byte == 0xFF) {
3482 imm8 |= (1 << i);
3483 }
3484 }
3485 Instr q = vd.Is2D() ? NEON_Q : 0;
3486 Emit(q | NEONModImmOp(1) | NEONModifiedImmediate_MOVI |
3487 ImmNEONabcdefgh(imm8) | NEONCmode(0xE) | Rd(vd));
3488 } else if (shift == LSL) {
3489 DCHECK(is_uint8(imm));
3490 NEONModifiedImmShiftLsl(vd, static_cast<int>(imm), shift_amount,
3491 NEONModifiedImmediate_MOVI);
3492 } else {
3493 DCHECK(is_uint8(imm));
3494 NEONModifiedImmShiftMsl(vd, static_cast<int>(imm), shift_amount,
3495 NEONModifiedImmediate_MOVI);
3496 }
3497}
3498
3499void Assembler::mvn(const VRegister& vd, const VRegister& vn) {
3500 DCHECK(AreSameFormat(vd, vn));
3501 if (vd.IsD()) {
3502 not_(vd.V8B(), vn.V8B());
3503 } else {
3504 DCHECK(vd.IsQ());
3505 not_(vd.V16B(), vn.V16B());
3506 }
3507}
3508
3509void Assembler::mvni(const VRegister& vd, const int imm8, Shift shift,
3510 const int shift_amount) {
3511 DCHECK((shift == LSL) || (shift == MSL));
3512 if (shift == LSL) {
3513 NEONModifiedImmShiftLsl(vd, imm8, shift_amount, NEONModifiedImmediate_MVNI);
3514 } else {
3515 NEONModifiedImmShiftMsl(vd, imm8, shift_amount, NEONModifiedImmediate_MVNI);
3516 }
3517}
3518
3519void Assembler::NEONFPByElement(const VRegister& vd, const VRegister& vn,
3520 const VRegister& vm, int vm_index,
3521 NEONByIndexedElementOp vop) {
3522 DCHECK(AreSameFormat(vd, vn));
3523 DCHECK((vd.Is2S() && vm.Is1S()) || (vd.Is4S() && vm.Is1S()) ||
3524 (vd.Is1S() && vm.Is1S()) || (vd.Is2D() && vm.Is1D()) ||
3525 (vd.Is1D() && vm.Is1D()));
3526 DCHECK((vm.Is1S() && (vm_index < 4)) || (vm.Is1D() && (vm_index < 2)));
3527
3528 Instr op = vop;
3529 int index_num_bits = vm.Is1S() ? 2 : 1;
3530 if (vd.IsScalar()) {
3531 op |= NEON_Q | NEONScalar;
3532 }
3533
3534 Emit(FPFormat(vd) | op | ImmNEONHLM(vm_index, index_num_bits) | Rm(vm) |
3535 Rn(vn) | Rd(vd));
3536}
3537
3538void Assembler::NEONByElement(const VRegister& vd, const VRegister& vn,
3539 const VRegister& vm, int vm_index,
3540 NEONByIndexedElementOp vop) {
3541 DCHECK(AreSameFormat(vd, vn));
3542 DCHECK((vd.Is4H() && vm.Is1H()) || (vd.Is8H() && vm.Is1H()) ||
3543 (vd.Is1H() && vm.Is1H()) || (vd.Is2S() && vm.Is1S()) ||
3544 (vd.Is4S() && vm.Is1S()) || (vd.Is1S() && vm.Is1S()));
3545 DCHECK((vm.Is1H() && (vm.code() < 16) && (vm_index < 8)) ||
3546 (vm.Is1S() && (vm_index < 4)));
3547
3548 Instr format, op = vop;
3549 int index_num_bits = vm.Is1H() ? 3 : 2;
3550 if (vd.IsScalar()) {
3551 op |= NEONScalar | NEON_Q;
3552 format = SFormat(vn);
3553 } else {
3554 format = VFormat(vn);
3555 }
3556 Emit(format | op | ImmNEONHLM(vm_index, index_num_bits) | Rm(vm) | Rn(vn) |
3557 Rd(vd));
3558}
3559
3560void Assembler::NEONByElementL(const VRegister& vd, const VRegister& vn,
3561 const VRegister& vm, int vm_index,
3562 NEONByIndexedElementOp vop) {
3563 DCHECK((vd.Is4S() && vn.Is4H() && vm.Is1H()) ||
3564 (vd.Is4S() && vn.Is8H() && vm.Is1H()) ||
3565 (vd.Is1S() && vn.Is1H() && vm.Is1H()) ||
3566 (vd.Is2D() && vn.Is2S() && vm.Is1S()) ||
3567 (vd.Is2D() && vn.Is4S() && vm.Is1S()) ||
3568 (vd.Is1D() && vn.Is1S() && vm.Is1S()));
3569
3570 DCHECK((vm.Is1H() && (vm.code() < 16) && (vm_index < 8)) ||
3571 (vm.Is1S() && (vm_index < 4)));
3572
3573 Instr format, op = vop;
3574 int index_num_bits = vm.Is1H() ? 3 : 2;
3575 if (vd.IsScalar()) {
3576 op |= NEONScalar | NEON_Q;
3577 format = SFormat(vn);
3578 } else {
3579 format = VFormat(vn);
3580 }
3581 Emit(format | op | ImmNEONHLM(vm_index, index_num_bits) | Rm(vm) | Rn(vn) |
3582 Rd(vd));
3583}
3584
3585#define NEON_BYELEMENT_LIST(V) \
3586 V(mul, NEON_MUL_byelement, vn.IsVector()) \
3587 V(mla, NEON_MLA_byelement, vn.IsVector()) \
3588 V(mls, NEON_MLS_byelement, vn.IsVector()) \
3589 V(sqdmulh, NEON_SQDMULH_byelement, true) \
3590 V(sqrdmulh, NEON_SQRDMULH_byelement, true)
3591
3592#define DEFINE_ASM_FUNC(FN, OP, AS) \
3593 void Assembler::FN(const VRegister& vd, const VRegister& vn, \
3594 const VRegister& vm, int vm_index) { \
3595 DCHECK(AS); \
3596 NEONByElement(vd, vn, vm, vm_index, OP); \
3597 }
3598NEON_BYELEMENT_LIST(DEFINE_ASM_FUNC)
3599#undef DEFINE_ASM_FUNC
3600
3601#define NEON_FPBYELEMENT_LIST(V) \
3602 V(fmul, NEON_FMUL_byelement) \
3603 V(fmla, NEON_FMLA_byelement) \
3604 V(fmls, NEON_FMLS_byelement) \
3605 V(fmulx, NEON_FMULX_byelement)
3606
3607#define DEFINE_ASM_FUNC(FN, OP) \
3608 void Assembler::FN(const VRegister& vd, const VRegister& vn, \
3609 const VRegister& vm, int vm_index) { \
3610 NEONFPByElement(vd, vn, vm, vm_index, OP); \
3611 }
3612NEON_FPBYELEMENT_LIST(DEFINE_ASM_FUNC)
3613#undef DEFINE_ASM_FUNC
3614
3615#define NEON_BYELEMENT_LONG_LIST(V) \
3616 V(sqdmull, NEON_SQDMULL_byelement, vn.IsScalar() || vn.IsD()) \
3617 V(sqdmull2, NEON_SQDMULL_byelement, vn.IsVector() && vn.IsQ()) \
3618 V(sqdmlal, NEON_SQDMLAL_byelement, vn.IsScalar() || vn.IsD()) \
3619 V(sqdmlal2, NEON_SQDMLAL_byelement, vn.IsVector() && vn.IsQ()) \
3620 V(sqdmlsl, NEON_SQDMLSL_byelement, vn.IsScalar() || vn.IsD()) \
3621 V(sqdmlsl2, NEON_SQDMLSL_byelement, vn.IsVector() && vn.IsQ()) \
3622 V(smull, NEON_SMULL_byelement, vn.IsVector() && vn.IsD()) \
3623 V(smull2, NEON_SMULL_byelement, vn.IsVector() && vn.IsQ()) \
3624 V(umull, NEON_UMULL_byelement, vn.IsVector() && vn.IsD()) \
3625 V(umull2, NEON_UMULL_byelement, vn.IsVector() && vn.IsQ()) \
3626 V(smlal, NEON_SMLAL_byelement, vn.IsVector() && vn.IsD()) \
3627 V(smlal2, NEON_SMLAL_byelement, vn.IsVector() && vn.IsQ()) \
3628 V(umlal, NEON_UMLAL_byelement, vn.IsVector() && vn.IsD()) \
3629 V(umlal2, NEON_UMLAL_byelement, vn.IsVector() && vn.IsQ()) \
3630 V(smlsl, NEON_SMLSL_byelement, vn.IsVector() && vn.IsD()) \
3631 V(smlsl2, NEON_SMLSL_byelement, vn.IsVector() && vn.IsQ()) \
3632 V(umlsl, NEON_UMLSL_byelement, vn.IsVector() && vn.IsD()) \
3633 V(umlsl2, NEON_UMLSL_byelement, vn.IsVector() && vn.IsQ())
3634
3635#define DEFINE_ASM_FUNC(FN, OP, AS) \
3636 void Assembler::FN(const VRegister& vd, const VRegister& vn, \
3637 const VRegister& vm, int vm_index) { \
3638 DCHECK(AS); \
3639 NEONByElementL(vd, vn, vm, vm_index, OP); \
3640 }
3641NEON_BYELEMENT_LONG_LIST(DEFINE_ASM_FUNC)
3642#undef DEFINE_ASM_FUNC
3643
3644void Assembler::suqadd(const VRegister& vd, const VRegister& vn) {
3645 NEON2RegMisc(vd, vn, NEON_SUQADD);
3646}
3647
3648void Assembler::usqadd(const VRegister& vd, const VRegister& vn) {
3649 NEON2RegMisc(vd, vn, NEON_USQADD);
3650}
3651
3652void Assembler::abs(const VRegister& vd, const VRegister& vn) {
3653 DCHECK(vd.IsVector() || vd.Is1D());
3654 NEON2RegMisc(vd, vn, NEON_ABS);
3655}
3656
3657void Assembler::sqabs(const VRegister& vd, const VRegister& vn) {
3658 NEON2RegMisc(vd, vn, NEON_SQABS);
3659}
3660
3661void Assembler::neg(const VRegister& vd, const VRegister& vn) {
3662 DCHECK(vd.IsVector() || vd.Is1D());
3663 NEON2RegMisc(vd, vn, NEON_NEG);
3664}
3665
3666void Assembler::sqneg(const VRegister& vd, const VRegister& vn) {
3667 NEON2RegMisc(vd, vn, NEON_SQNEG);
3668}
3669
3670void Assembler::NEONXtn(const VRegister& vd, const VRegister& vn,
3671 NEON2RegMiscOp vop) {
3672 Instr format, op = vop;
3673 if (vd.IsScalar()) {
3674 DCHECK((vd.Is1B() && vn.Is1H()) || (vd.Is1H() && vn.Is1S()) ||
3675 (vd.Is1S() && vn.Is1D()));
3676 op |= NEON_Q | NEONScalar;
3677 format = SFormat(vd);
3678 } else {
3679 DCHECK((vd.Is8B() && vn.Is8H()) || (vd.Is4H() && vn.Is4S()) ||
3680 (vd.Is2S() && vn.Is2D()) || (vd.Is16B() && vn.Is8H()) ||
3681 (vd.Is8H() && vn.Is4S()) || (vd.Is4S() && vn.Is2D()));
3682 format = VFormat(vd);
3683 }
3684 Emit(format | op | Rn(vn) | Rd(vd));
3685}
3686
3687void Assembler::xtn(const VRegister& vd, const VRegister& vn) {
3688 DCHECK(vd.IsVector() && vd.IsD());
3689 NEONXtn(vd, vn, NEON_XTN);
3690}
3691
3692void Assembler::xtn2(const VRegister& vd, const VRegister& vn) {
3693 DCHECK(vd.IsVector() && vd.IsQ());
3694 NEONXtn(vd, vn, NEON_XTN);
3695}
3696
3697void Assembler::sqxtn(const VRegister& vd, const VRegister& vn) {
3698 DCHECK(vd.IsScalar() || vd.IsD());
3699 NEONXtn(vd, vn, NEON_SQXTN);
3700}
3701
3702void Assembler::sqxtn2(const VRegister& vd, const VRegister& vn) {
3703 DCHECK(vd.IsVector() && vd.IsQ());
3704 NEONXtn(vd, vn, NEON_SQXTN);
3705}
3706
3707void Assembler::sqxtun(const VRegister& vd, const VRegister& vn) {
3708 DCHECK(vd.IsScalar() || vd.IsD());
3709 NEONXtn(vd, vn, NEON_SQXTUN);
3710}
3711
3712void Assembler::sqxtun2(const VRegister& vd, const VRegister& vn) {
3713 DCHECK(vd.IsVector() && vd.IsQ());
3714 NEONXtn(vd, vn, NEON_SQXTUN);
3715}
3716
3717void Assembler::uqxtn(const VRegister& vd, const VRegister& vn) {
3718 DCHECK(vd.IsScalar() || vd.IsD());
3719 NEONXtn(vd, vn, NEON_UQXTN);
3720}
3721
3722void Assembler::uqxtn2(const VRegister& vd, const VRegister& vn) {
3723 DCHECK(vd.IsVector() && vd.IsQ());
3724 NEONXtn(vd, vn, NEON_UQXTN);
3725}
3726
3727// NEON NOT and RBIT are distinguised by bit 22, the bottom bit of "size".
3728void Assembler::not_(const VRegister& vd, const VRegister& vn) {
3729 DCHECK(AreSameFormat(vd, vn));
3730 DCHECK(vd.Is8B() || vd.Is16B());
3731 Emit(VFormat(vd) | NEON_RBIT_NOT | Rn(vn) | Rd(vd));
3732}
3733
3734void Assembler::rbit(const VRegister& vd, const VRegister& vn) {
3735 DCHECK(AreSameFormat(vd, vn));
3736 DCHECK(vd.Is8B() || vd.Is16B());
3737 Emit(VFormat(vn) | (1 << NEONSize_offset) | NEON_RBIT_NOT | Rn(vn) | Rd(vd));
3738}
3739
3740void Assembler::ext(const VRegister& vd, const VRegister& vn,
3741 const VRegister& vm, int index) {
3742 DCHECK(AreSameFormat(vd, vn, vm));
3743 DCHECK(vd.Is8B() || vd.Is16B());
3744 DCHECK((0 <= index) && (index < vd.LaneCount()));
3745 Emit(VFormat(vd) | NEON_EXT | Rm(vm) | ImmNEONExt(index) | Rn(vn) | Rd(vd));
3746}
3747
3748void Assembler::dup(const VRegister& vd, const VRegister& vn, int vn_index) {
3749 Instr q, scalar;
3750
3751 // We support vn arguments of the form vn.VxT() or vn.T(), where x is the
3752 // number of lanes, and T is b, h, s or d.
3753 int lane_size = vn.LaneSizeInBytes();
3754 NEONFormatField format;
3755 switch (lane_size) {
3756 case 1:
3757 format = NEON_16B;
3758 break;
3759 case 2:
3760 format = NEON_8H;
3761 break;
3762 case 4:
3763 format = NEON_4S;
3764 break;
3765 default:
3766 DCHECK_EQ(lane_size, 8);
3767 format = NEON_2D;
3768 break;
3769 }
3770
3771 if (vd.IsScalar()) {
3772 q = NEON_Q;
3773 scalar = NEONScalar;
3774 } else {
3775 DCHECK(!vd.Is1D());
3776 q = vd.IsD() ? 0 : NEON_Q;
3777 scalar = 0;
3778 }
3779 Emit(q | scalar | NEON_DUP_ELEMENT | ImmNEON5(format, vn_index) | Rn(vn) |
3780 Rd(vd));
3781}
3782
3783void Assembler::dcptr(Label* label) {
3784 BlockPoolsScope no_pool_inbetween(this);
3785 RecordRelocInfo(RelocInfo::INTERNAL_REFERENCE);
3786 if (label->is_bound()) {
3787 // The label is bound, so it does not need to be updated and the internal
3788 // reference should be emitted.
3789 //
3790 // In this case, label->pos() returns the offset of the label from the
3791 // start of the buffer.
3792 internal_reference_positions_.push_back(pc_offset());
3793 dc64(reinterpret_cast<uintptr_t>(buffer_start_ + label->pos()));
3794 } else {
3796 if (label->is_linked()) {
3797 // The label is linked, so the internal reference should be added
3798 // onto the end of the label's link chain.
3799 //
3800 // In this case, label->pos() returns the offset of the last linked
3801 // instruction from the start of the buffer.
3802 offset = label->pos() - pc_offset();
3803 DCHECK_NE(offset, kStartOfLabelLinkChain);
3804 } else {
3805 // The label is unused, so it now becomes linked and the internal
3806 // reference is at the start of the new link chain.
3807 offset = kStartOfLabelLinkChain;
3808 }
3809 // The instruction at pc is now the last link in the label's chain.
3810 label->link_to(pc_offset());
3811
3812 // Traditionally the offset to the previous instruction in the chain is
3813 // encoded in the instruction payload (e.g. branch range) but internal
3814 // references are not instructions so while unbound they are encoded as
3815 // two consecutive brk instructions. The two 16-bit immediates are used
3816 // to encode the offset.
3818 DCHECK(is_int32(offset));
3819 uint32_t high16 = unsigned_bitextract_32(31, 16, offset);
3820 uint32_t low16 = unsigned_bitextract_32(15, 0, offset);
3821
3822 brk(high16);
3823 brk(low16);
3824 }
3825}
3826
3827// Below, a difference in case for the same letter indicates a
3828// negated bit. If b is 1, then B is 0.
3829uint32_t Assembler::FPToImm8(double imm) {
3830 uint64_t bits = base::bit_cast<uint64_t>(imm);
3831 DCHECK(IsImmFP64(bits));
3832 // bits: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
3833 // 0000.0000.0000.0000.0000.0000.0000.0000
3834 // bit7: a000.0000
3835 uint64_t bit7 = ((bits >> 63) & 0x1) << 7;
3836 // bit6: 0b00.0000
3837 uint64_t bit6 = ((bits >> 61) & 0x1) << 6;
3838 // bit5_to_0: 00cd.efgh
3839 uint64_t bit5_to_0 = (bits >> 48) & 0x3F;
3840
3841 return static_cast<uint32_t>(bit7 | bit6 | bit5_to_0);
3842}
3843
3844Instr Assembler::ImmFP(double imm) { return FPToImm8(imm) << ImmFP_offset; }
3845Instr Assembler::ImmNEONFP(double imm) {
3846 return ImmNEONabcdefgh(FPToImm8(imm));
3847}
3848
3849// InstructionStream generation helpers.
3850void Assembler::MoveWide(const Register& rd, uint64_t imm, int shift,
3851 MoveWideImmediateOp mov_op) {
3852 // Ignore the top 32 bits of an immediate if we're moving to a W register.
3853 if (rd.Is32Bits()) {
3854 // Check that the top 32 bits are zero (a positive 32-bit number) or top
3855 // 33 bits are one (a negative 32-bit number, sign extended to 64 bits).
3856 DCHECK(((imm >> kWRegSizeInBits) == 0) ||
3857 ((imm >> (kWRegSizeInBits - 1)) == 0x1FFFFFFFF));
3858 imm &= kWRegMask;
3859 }
3860
3861 if (shift >= 0) {
3862 // Explicit shift specified.
3863 DCHECK((shift == 0) || (shift == 16) || (shift == 32) || (shift == 48));
3864 DCHECK(rd.Is64Bits() || (shift == 0) || (shift == 16));
3865 shift /= 16;
3866 } else {
3867 // Calculate a new immediate and shift combination to encode the immediate
3868 // argument.
3869 shift = 0;
3870 if ((imm & ~0xFFFFULL) == 0) {
3871 // Nothing to do.
3872 } else if ((imm & ~(0xFFFFULL << 16)) == 0) {
3873 imm >>= 16;
3874 shift = 1;
3875 } else if ((imm & ~(0xFFFFULL << 32)) == 0) {
3876 DCHECK(rd.Is64Bits());
3877 imm >>= 32;
3878 shift = 2;
3879 } else if ((imm & ~(0xFFFFULL << 48)) == 0) {
3880 DCHECK(rd.Is64Bits());
3881 imm >>= 48;
3882 shift = 3;
3883 }
3884 }
3885
3886 DCHECK(is_uint16(imm));
3887
3888 Emit(SF(rd) | MoveWideImmediateFixed | mov_op | Rd(rd) |
3889 ImmMoveWide(static_cast<int>(imm)) | ShiftMoveWide(shift));
3890}
3891
3892void Assembler::AddSub(const Register& rd, const Register& rn,
3893 const Operand& operand, FlagsUpdate S, AddSubOp op) {
3894 DCHECK_EQ(rd.SizeInBits(), rn.SizeInBits());
3895 DCHECK(!operand.NeedsRelocation(this));
3896 if (operand.IsImmediate()) {
3897 int64_t immediate = operand.ImmediateValue();
3898 DCHECK(IsImmAddSub(immediate));
3899 Instr dest_reg = (S == SetFlags) ? Rd(rd) : RdSP(rd);
3900 Emit(SF(rd) | AddSubImmediateFixed | op | Flags(S) |
3901 ImmAddSub(static_cast<int>(immediate)) | dest_reg | RnSP(rn));
3902 } else if (operand.IsShiftedRegister()) {
3903 DCHECK_EQ(operand.reg().SizeInBits(), rd.SizeInBits());
3904 DCHECK_NE(operand.shift(), ROR);
3905
3906 // For instructions of the form:
3907 // add/sub wsp, <Wn>, <Wm> [, LSL #0-3 ]
3908 // add/sub <Wd>, wsp, <Wm> [, LSL #0-3 ]
3909 // add/sub wsp, wsp, <Wm> [, LSL #0-3 ]
3910 // adds/subs <Wd>, wsp, <Wm> [, LSL #0-3 ]
3911 // or their 64-bit register equivalents, convert the operand from shifted to
3912 // extended register mode, and emit an add/sub extended instruction.
3913 if (rn.IsSP() || rd.IsSP()) {
3914 DCHECK(!(rd.IsSP() && (S == SetFlags)));
3915 DataProcExtendedRegister(rd, rn, operand.ToExtendedRegister(), S,
3916 AddSubExtendedFixed | op);
3917 } else {
3918 DataProcShiftedRegister(rd, rn, operand, S, AddSubShiftedFixed | op);
3919 }
3920 } else {
3921 DCHECK(operand.IsExtendedRegister());
3922 DataProcExtendedRegister(rd, rn, operand, S, AddSubExtendedFixed | op);
3923 }
3924}
3925
3926void Assembler::AddSubWithCarry(const Register& rd, const Register& rn,
3927 const Operand& operand, FlagsUpdate S,
3928 AddSubWithCarryOp op) {
3929 DCHECK_EQ(rd.SizeInBits(), rn.SizeInBits());
3930 DCHECK_EQ(rd.SizeInBits(), operand.reg().SizeInBits());
3931 DCHECK(operand.IsShiftedRegister() && (operand.shift_amount() == 0));
3932 DCHECK(!operand.NeedsRelocation(this));
3933 Emit(SF(rd) | op | Flags(S) | Rm(operand.reg()) | Rn(rn) | Rd(rd));
3934}
3935
3936void Assembler::hlt(int code) {
3937 DCHECK(is_uint16(code));
3938 Emit(HLT | ImmException(code));
3939}
3940
3941void Assembler::brk(int code) {
3942 DCHECK(is_uint16(code));
3943 Emit(BRK | ImmException(code));
3944}
3945
3946void Assembler::EmitStringData(const char* string) {
3947 size_t len = strlen(string) + 1;
3948 DCHECK_LE(RoundUp(len, kInstrSize), static_cast<size_t>(kGap));
3949 EmitData(string, static_cast<int>(len));
3950 // Pad with nullptr characters until pc_ is aligned.
3951 const char pad[] = {'\0', '\0', '\0', '\0'};
3952 static_assert(sizeof(pad) == kInstrSize,
3953 "Size of padding must match instruction size.");
3954 EmitData(pad, RoundUp(pc_offset(), kInstrSize) - pc_offset());
3955}
3956
3957void Assembler::debug(const char* message, uint32_t code, Instr params) {
3958 if (options().enable_simulator_code) {
3959 size_t size_of_debug_sequence =
3960 4 * kInstrSize + RoundUp<kInstrSize>(strlen(message) + 1);
3961
3962 // The arguments to the debug marker need to be contiguous in memory, so
3963 // make sure we don't try to emit pools.
3964 BlockPoolsScope scope(this, size_of_debug_sequence);
3965
3966 Label start;
3967 bind(&start);
3968
3969 // Refer to instructions-arm64.h for a description of the marker and its
3970 // arguments.
3971 hlt(kImmExceptionIsDebug);
3972 DCHECK_EQ(SizeOfCodeGeneratedSince(&start), kDebugCodeOffset);
3973 dc32(code);
3974 DCHECK_EQ(SizeOfCodeGeneratedSince(&start), kDebugParamsOffset);
3975 dc32(params);
3976 DCHECK_EQ(SizeOfCodeGeneratedSince(&start), kDebugMessageOffset);
3977 EmitStringData(message);
3978 hlt(kImmExceptionIsUnreachable);
3979 DCHECK_EQ(SizeOfCodeGeneratedSince(&start), size_of_debug_sequence);
3980
3981 return;
3982 }
3983
3984 if (params & BREAK) {
3985 brk(0);
3986 }
3987}
3988
3989void Assembler::Logical(const Register& rd, const Register& rn,
3990 const Operand& operand, LogicalOp op) {
3991 DCHECK(rd.SizeInBits() == rn.SizeInBits());
3992 DCHECK(!operand.NeedsRelocation(this));
3993 if (operand.IsImmediate()) {
3994 int64_t immediate = operand.ImmediateValue();
3995 unsigned reg_size = rd.SizeInBits();
3996
3997 DCHECK_NE(immediate, 0);
3998 DCHECK_NE(immediate, -1);
3999 DCHECK(rd.Is64Bits() || is_uint32(immediate));
4000
4001 // If the operation is NOT, invert the operation and immediate.
4002 if ((op & NOT) == NOT) {
4003 op = static_cast<LogicalOp>(op & ~NOT);
4004 immediate = rd.Is64Bits() ? ~immediate : (~immediate & kWRegMask);
4005 }
4006
4007 unsigned n, imm_s, imm_r;
4008 if (IsImmLogical(immediate, reg_size, &n, &imm_s, &imm_r)) {
4009 // Immediate can be encoded in the instruction.
4010 LogicalImmediate(rd, rn, n, imm_s, imm_r, op);
4011 } else {
4012 // This case is handled in the macro assembler.
4013 UNREACHABLE();
4014 }
4015 } else {
4016 DCHECK(operand.IsShiftedRegister());
4017 DCHECK(operand.reg().SizeInBits() == rd.SizeInBits());
4018 Instr dp_op = static_cast<Instr>(op | LogicalShiftedFixed);
4019 DataProcShiftedRegister(rd, rn, operand, LeaveFlags, dp_op);
4020 }
4021}
4022
4023void Assembler::LogicalImmediate(const Register& rd, const Register& rn,
4024 unsigned n, unsigned imm_s, unsigned imm_r,
4025 LogicalOp op) {
4026 unsigned reg_size = rd.SizeInBits();
4027 Instr dest_reg = (op == ANDS) ? Rd(rd) : RdSP(rd);
4028 Emit(SF(rd) | LogicalImmediateFixed | op | BitN(n, reg_size) |
4029 ImmSetBits(imm_s, reg_size) | ImmRotate(imm_r, reg_size) | dest_reg |
4030 Rn(rn));
4031}
4032
4033void Assembler::ConditionalCompare(const Register& rn, const Operand& operand,
4034 StatusFlags nzcv, Condition cond,
4035 ConditionalCompareOp op) {
4036 Instr ccmpop;
4037 DCHECK(!operand.NeedsRelocation(this));
4038 if (operand.IsImmediate()) {
4039 int64_t immediate = operand.ImmediateValue();
4040 DCHECK(IsImmConditionalCompare(immediate));
4041 ccmpop = ConditionalCompareImmediateFixed | op |
4042 ImmCondCmp(static_cast<unsigned>(immediate));
4043 } else {
4044 DCHECK(operand.IsShiftedRegister() && (operand.shift_amount() == 0));
4045 ccmpop = ConditionalCompareRegisterFixed | op | Rm(operand.reg());
4046 }
4047 Emit(SF(rn) | ccmpop | Cond(cond) | Rn(rn) | Nzcv(nzcv));
4048}
4049
4050void Assembler::DataProcessing1Source(const Register& rd, const Register& rn,
4051 DataProcessing1SourceOp op) {
4052 DCHECK(rd.SizeInBits() == rn.SizeInBits());
4053 Emit(SF(rn) | op | Rn(rn) | Rd(rd));
4054}
4055
4056void Assembler::FPDataProcessing1Source(const VRegister& vd,
4057 const VRegister& vn,
4058 FPDataProcessing1SourceOp op) {
4059 Emit(FPType(vn) | op | Rn(vn) | Rd(vd));
4060}
4061
4062void Assembler::FPDataProcessing2Source(const VRegister& fd,
4063 const VRegister& fn,
4064 const VRegister& fm,
4065 FPDataProcessing2SourceOp op) {
4066 DCHECK(fd.SizeInBits() == fn.SizeInBits());
4067 DCHECK(fd.SizeInBits() == fm.SizeInBits());
4068 Emit(FPType(fd) | op | Rm(fm) | Rn(fn) | Rd(fd));
4069}
4070
4071void Assembler::FPDataProcessing3Source(const VRegister& fd,
4072 const VRegister& fn,
4073 const VRegister& fm,
4074 const VRegister& fa,
4075 FPDataProcessing3SourceOp op) {
4076 DCHECK(AreSameSizeAndType(fd, fn, fm, fa));
4077 Emit(FPType(fd) | op | Rm(fm) | Rn(fn) | Rd(fd) | Ra(fa));
4078}
4079
4080void Assembler::NEONModifiedImmShiftLsl(const VRegister& vd, const int imm8,
4081 const int left_shift,
4082 NEONModifiedImmediateOp op) {
4083 DCHECK(vd.Is8B() || vd.Is16B() || vd.Is4H() || vd.Is8H() || vd.Is2S() ||
4084 vd.Is4S());
4085 DCHECK((left_shift == 0) || (left_shift == 8) || (left_shift == 16) ||
4086 (left_shift == 24));
4087 DCHECK(is_uint8(imm8));
4088
4089 int cmode_1, cmode_2, cmode_3;
4090 if (vd.Is8B() || vd.Is16B()) {
4091 DCHECK_EQ(op, NEONModifiedImmediate_MOVI);
4092 cmode_1 = 1;
4093 cmode_2 = 1;
4094 cmode_3 = 1;
4095 } else {
4096 cmode_1 = (left_shift >> 3) & 1;
4097 cmode_2 = left_shift >> 4;
4098 cmode_3 = 0;
4099 if (vd.Is4H() || vd.Is8H()) {
4100 DCHECK((left_shift == 0) || (left_shift == 8));
4101 cmode_3 = 1;
4102 }
4103 }
4104 int cmode = (cmode_3 << 3) | (cmode_2 << 2) | (cmode_1 << 1);
4105
4106 Instr q = vd.IsQ() ? NEON_Q : 0;
4107
4108 Emit(q | op | ImmNEONabcdefgh(imm8) | NEONCmode(cmode) | Rd(vd));
4109}
4110
4111void Assembler::NEONModifiedImmShiftMsl(const VRegister& vd, const int imm8,
4112 const int shift_amount,
4113 NEONModifiedImmediateOp op) {
4114 DCHECK(vd.Is2S() || vd.Is4S());
4115 DCHECK((shift_amount == 8) || (shift_amount == 16));
4116 DCHECK(is_uint8(imm8));
4117
4118 int cmode_0 = (shift_amount >> 4) & 1;
4119 int cmode = 0xC | cmode_0;
4120
4121 Instr q = vd.IsQ() ? NEON_Q : 0;
4122
4123 Emit(q | op | ImmNEONabcdefgh(imm8) | NEONCmode(cmode) | Rd(vd));
4124}
4125
4126void Assembler::EmitShift(const Register& rd, const Register& rn, Shift shift,
4127 unsigned shift_amount) {
4128 switch (shift) {
4129 case LSL:
4130 lsl(rd, rn, shift_amount);
4131 break;
4132 case LSR:
4133 lsr(rd, rn, shift_amount);
4134 break;
4135 case ASR:
4136 asr(rd, rn, shift_amount);
4137 break;
4138 case ROR:
4139 ror(rd, rn, shift_amount);
4140 break;
4141 default:
4142 UNREACHABLE();
4143 }
4144}
4145
4146void Assembler::EmitExtendShift(const Register& rd, const Register& rn,
4147 Extend extend, unsigned left_shift) {
4148 DCHECK(rd.SizeInBits() >= rn.SizeInBits());
4149 unsigned reg_size = rd.SizeInBits();
4150 // Use the correct size of register.
4151 Register rn_ = Register::Create(rn.code(), rd.SizeInBits());
4152 // Bits extracted are high_bit:0.
4153 unsigned high_bit = (8 << (extend & 0x3)) - 1;
4154 // Number of bits left in the result that are not introduced by the shift.
4155 unsigned non_shift_bits = (reg_size - left_shift) & (reg_size - 1);
4156
4157 if ((non_shift_bits > high_bit) || (non_shift_bits == 0)) {
4158 switch (extend) {
4159 case UXTB:
4160 case UXTH:
4161 case UXTW:
4162 ubfm(rd, rn_, non_shift_bits, high_bit);
4163 break;
4164 case SXTB:
4165 case SXTH:
4166 case SXTW:
4167 sbfm(rd, rn_, non_shift_bits, high_bit);
4168 break;
4169 case UXTX:
4170 case SXTX: {
4171 DCHECK_EQ(rn.SizeInBits(), kXRegSizeInBits);
4172 // Nothing to extend. Just shift.
4173 lsl(rd, rn_, left_shift);
4174 break;
4175 }
4176 default:
4177 UNREACHABLE();
4178 }
4179 } else {
4180 // No need to extend as the extended bits would be shifted away.
4181 lsl(rd, rn_, left_shift);
4182 }
4183}
4184
4185void Assembler::DataProcShiftedRegister(const Register& rd, const Register& rn,
4186 const Operand& operand, FlagsUpdate S,
4187 Instr op) {
4188 DCHECK(operand.IsShiftedRegister());
4189 DCHECK(rn.Is64Bits() || (rn.Is32Bits() && is_uint5(operand.shift_amount())));
4190 DCHECK(!operand.NeedsRelocation(this));
4191 Emit(SF(rd) | op | Flags(S) | ShiftDP(operand.shift()) |
4192 ImmDPShift(operand.shift_amount()) | Rm(operand.reg()) | Rn(rn) |
4193 Rd(rd));
4194}
4195
4196void Assembler::DataProcExtendedRegister(const Register& rd, const Register& rn,
4197 const Operand& operand, FlagsUpdate S,
4198 Instr op) {
4199 DCHECK(!operand.NeedsRelocation(this));
4200 Instr dest_reg = (S == SetFlags) ? Rd(rd) : RdSP(rd);
4201 Emit(SF(rd) | op | Flags(S) | Rm(operand.reg()) |
4202 ExtendMode(operand.extend()) | ImmExtendShift(operand.shift_amount()) |
4203 dest_reg | RnSP(rn));
4204}
4205
4206void Assembler::LoadStore(const CPURegister& rt, const MemOperand& addr,
4207 LoadStoreOp op) {
4208 Instr memop = op | Rt(rt) | RnSP(addr.base());
4209
4210 if (addr.IsImmediateOffset()) {
4211 unsigned size_log2 = CalcLSDataSizeLog2(op);
4212 int offset = static_cast<int>(addr.offset());
4213 if (IsImmLSScaled(addr.offset(), size_log2)) {
4214 LoadStoreScaledImmOffset(memop, offset, size_log2);
4215 } else {
4216 DCHECK(IsImmLSUnscaled(addr.offset()));
4217 LoadStoreUnscaledImmOffset(memop, offset);
4218 }
4219 } else if (addr.IsRegisterOffset()) {
4220 Extend ext = addr.extend();
4221 Shift shift = addr.shift();
4222 unsigned shift_amount = addr.shift_amount();
4223
4224 // LSL is encoded in the option field as UXTX.
4225 if (shift == LSL) {
4226 ext = UXTX;
4227 }
4228
4229 // Shifts are encoded in one bit, indicating a left shift by the memory
4230 // access size.
4231 DCHECK(shift_amount == 0 || shift_amount == CalcLSDataSizeLog2(op));
4232 Emit(LoadStoreRegisterOffsetFixed | memop | Rm(addr.regoffset()) |
4233 ExtendMode(ext) | ImmShiftLS((shift_amount > 0) ? 1 : 0));
4234 } else {
4235 // Pre-index and post-index modes.
4236 DCHECK(IsImmLSUnscaled(addr.offset()));
4237 DCHECK_NE(rt, addr.base());
4238 int offset = static_cast<int>(addr.offset());
4239 if (addr.IsPreIndex()) {
4240 Emit(LoadStorePreIndexFixed | memop | ImmLS(offset));
4241 } else {
4242 DCHECK(addr.IsPostIndex());
4243 Emit(LoadStorePostIndexFixed | memop | ImmLS(offset));
4244 }
4245 }
4246}
4247
4248void Assembler::pmull(const VRegister& vd, const VRegister& vn,
4249 const VRegister& vm) {
4250 DCHECK(AreSameFormat(vn, vm));
4251 DCHECK((vn.Is8B() && vd.Is8H()) || (vn.Is1D() && vd.Is1Q()));
4252 DCHECK(IsEnabled(PMULL1Q) || vd.Is8H());
4253 Emit(VFormat(vn) | NEON_PMULL | Rm(vm) | Rn(vn) | Rd(vd));
4254}
4255
4256void Assembler::pmull2(const VRegister& vd, const VRegister& vn,
4257 const VRegister& vm) {
4258 DCHECK(AreSameFormat(vn, vm));
4259 DCHECK((vn.Is16B() && vd.Is8H()) || (vn.Is2D() && vd.Is1Q()));
4260 DCHECK(IsEnabled(PMULL1Q) || vd.Is8H());
4261 Emit(VFormat(vn) | NEON_PMULL2 | Rm(vm) | Rn(vn) | Rd(vd));
4262}
4263
4264bool Assembler::IsImmLSPair(int64_t offset, unsigned size) {
4265 bool offset_is_size_multiple =
4266 (static_cast<int64_t>(static_cast<uint64_t>(offset >> size) << size) ==
4267 offset);
4268 return offset_is_size_multiple && is_int7(offset >> size);
4269}
4270
4271bool Assembler::IsImmLLiteral(int64_t offset) {
4272 int inst_size = static_cast<int>(kInstrSizeLog2);
4273 bool offset_is_inst_multiple =
4274 (static_cast<int64_t>(static_cast<uint64_t>(offset >> inst_size)
4275 << inst_size) == offset);
4276 DCHECK_GT(offset, 0);
4278 return offset_is_inst_multiple && is_intn(offset, ImmLLiteral_width);
4279}
4280
4281// Test if a given value can be encoded in the immediate field of a logical
4282// instruction.
4283// If it can be encoded, the function returns true, and values pointed to by n,
4284// imm_s and imm_r are updated with immediates encoded in the format required
4285// by the corresponding fields in the logical instruction.
4286// If it can not be encoded, the function returns false, and the values pointed
4287// to by n, imm_s and imm_r are undefined.
4288bool Assembler::IsImmLogical(uint64_t value, unsigned width, unsigned* n,
4289 unsigned* imm_s, unsigned* imm_r) {
4290 DCHECK((n != nullptr) && (imm_s != nullptr) && (imm_r != nullptr));
4291 DCHECK((width == kWRegSizeInBits) || (width == kXRegSizeInBits));
4292
4293 bool negate = false;
4294
4295 // Logical immediates are encoded using parameters n, imm_s and imm_r using
4296 // the following table:
4297 //
4298 // N imms immr size S R
4299 // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
4300 // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
4301 // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
4302 // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
4303 // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
4304 // 0 11110s xxxxxr 2 UInt(s) UInt(r)
4305 // (s bits must not be all set)
4306 //
4307 // A pattern is constructed of size bits, where the least significant S+1 bits
4308 // are set. The pattern is rotated right by R, and repeated across a 32 or
4309 // 64-bit value, depending on destination register width.
4310 //
4311 // Put another way: the basic format of a logical immediate is a single
4312 // contiguous stretch of 1 bits, repeated across the whole word at intervals
4313 // given by a power of 2. To identify them quickly, we first locate the
4314 // lowest stretch of 1 bits, then the next 1 bit above that; that combination
4315 // is different for every logical immediate, so it gives us all the
4316 // information we need to identify the only logical immediate that our input
4317 // could be, and then we simply check if that's the value we actually have.
4318 //
4319 // (The rotation parameter does give the possibility of the stretch of 1 bits
4320 // going 'round the end' of the word. To deal with that, we observe that in
4321 // any situation where that happens the bitwise NOT of the value is also a
4322 // valid logical immediate. So we simply invert the input whenever its low bit
4323 // is set, and then we know that the rotated case can't arise.)
4324
4325 if (value & 1) {
4326 // If the low bit is 1, negate the value, and set a flag to remember that we
4327 // did (so that we can adjust the return values appropriately).
4328 negate = true;
4329 value = ~value;
4330 }
4331
4332 if (width == kWRegSizeInBits) {
4333 // To handle 32-bit logical immediates, the very easiest thing is to repeat
4334 // the input value twice to make a 64-bit word. The correct encoding of that
4335 // as a logical immediate will also be the correct encoding of the 32-bit
4336 // value.
4337
4338 // The most-significant 32 bits may not be zero (ie. negate is true) so
4339 // shift the value left before duplicating it.
4340 value <<= kWRegSizeInBits;
4341 value |= value >> kWRegSizeInBits;
4342 }
4343
4344 // The basic analysis idea: imagine our input word looks like this.
4345 //
4346 // 0011111000111110001111100011111000111110001111100011111000111110
4347 // c b a
4348 // |<--d-->|
4349 //
4350 // We find the lowest set bit (as an actual power-of-2 value, not its index)
4351 // and call it a. Then we add a to our original number, which wipes out the
4352 // bottommost stretch of set bits and replaces it with a 1 carried into the
4353 // next zero bit. Then we look for the new lowest set bit, which is in
4354 // position b, and subtract it, so now our number is just like the original
4355 // but with the lowest stretch of set bits completely gone. Now we find the
4356 // lowest set bit again, which is position c in the diagram above. Then we'll
4357 // measure the distance d between bit positions a and c (using CLZ), and that
4358 // tells us that the only valid logical immediate that could possibly be equal
4359 // to this number is the one in which a stretch of bits running from a to just
4360 // below b is replicated every d bits.
4361 uint64_t a = LargestPowerOf2Divisor(value);
4362 uint64_t value_plus_a = value + a;
4363 uint64_t b = LargestPowerOf2Divisor(value_plus_a);
4364 uint64_t value_plus_a_minus_b = value_plus_a - b;
4365 uint64_t c = LargestPowerOf2Divisor(value_plus_a_minus_b);
4366
4367 int d, clz_a, out_n;
4368 uint64_t mask;
4369
4370 if (c != 0) {
4371 // The general case, in which there is more than one stretch of set bits.
4372 // Compute the repeat distance d, and set up a bitmask covering the basic
4373 // unit of repetition (i.e. a word with the bottom d bits set). Also, in all
4374 // of these cases the N bit of the output will be zero.
4375 clz_a = CountLeadingZeros(a, kXRegSizeInBits);
4376 int clz_c = CountLeadingZeros(c, kXRegSizeInBits);
4377 d = clz_a - clz_c;
4378 mask = ((uint64_t{1} << d) - 1);
4379 out_n = 0;
4380 } else {
4381 // Handle degenerate cases.
4382 //
4383 // If any of those 'find lowest set bit' operations didn't find a set bit at
4384 // all, then the word will have been zero thereafter, so in particular the
4385 // last lowest_set_bit operation will have returned zero. So we can test for
4386 // all the special case conditions in one go by seeing if c is zero.
4387 if (a == 0) {
4388 // The input was zero (or all 1 bits, which will come to here too after we
4389 // inverted it at the start of the function), for which we just return
4390 // false.
4391 return false;
4392 } else {
4393 // Otherwise, if c was zero but a was not, then there's just one stretch
4394 // of set bits in our word, meaning that we have the trivial case of
4395 // d == 64 and only one 'repetition'. Set up all the same variables as in
4396 // the general case above, and set the N bit in the output.
4397 clz_a = CountLeadingZeros(a, kXRegSizeInBits);
4398 d = 64;
4399 mask = ~uint64_t{0};
4400 out_n = 1;
4401 }
4402 }
4403
4404 // If the repeat period d is not a power of two, it can't be encoded.
4405 if (!base::bits::IsPowerOfTwo(d)) {
4406 return false;
4407 }
4408
4409 if (((b - a) & ~mask) != 0) {
4410 // If the bit stretch (b - a) does not fit within the mask derived from the
4411 // repeat period, then fail.
4412 return false;
4413 }
4414
4415 // The only possible option is b - a repeated every d bits. Now we're going to
4416 // actually construct the valid logical immediate derived from that
4417 // specification, and see if it equals our original input.
4418 //
4419 // To repeat a value every d bits, we multiply it by a number of the form
4420 // (1 + 2^d + 2^(2d) + ...), i.e. 0x0001000100010001 or similar. These can
4421 // be derived using a table lookup on CLZ(d).
4422 static const uint64_t multipliers[] = {
4423 0x0000000000000001UL, 0x0000000100000001UL, 0x0001000100010001UL,
4424 0x0101010101010101UL, 0x1111111111111111UL, 0x5555555555555555UL,
4425 };
4426 int multiplier_idx = CountLeadingZeros(d, kXRegSizeInBits) - 57;
4427 // Ensure that the index to the multipliers array is within bounds.
4428 DCHECK((multiplier_idx >= 0) &&
4429 (static_cast<size_t>(multiplier_idx) < arraysize(multipliers)));
4430 uint64_t multiplier = multipliers[multiplier_idx];
4431 uint64_t candidate = (b - a) * multiplier;
4432
4433 if (value != candidate) {
4434 // The candidate pattern doesn't match our input value, so fail.
4435 return false;
4436 }
4437
4438 // We have a match! This is a valid logical immediate, so now we have to
4439 // construct the bits and pieces of the instruction encoding that generates
4440 // it.
4441
4442 // Count the set bits in our basic stretch. The special case of clz(0) == -1
4443 // makes the answer come out right for stretches that reach the very top of
4444 // the word (e.g. numbers like 0xFFFFC00000000000).
4445 int clz_b = (b == 0) ? -1 : CountLeadingZeros(b, kXRegSizeInBits);
4446 int s = clz_a - clz_b;
4447
4448 // Decide how many bits to rotate right by, to put the low bit of that basic
4449 // stretch in position a.
4450 int r;
4451 if (negate) {
4452 // If we inverted the input right at the start of this function, here's
4453 // where we compensate: the number of set bits becomes the number of clear
4454 // bits, and the rotation count is based on position b rather than position
4455 // a (since b is the location of the 'lowest' 1 bit after inversion).
4456 s = d - s;
4457 r = (clz_b + 1) & (d - 1);
4458 } else {
4459 r = (clz_a + 1) & (d - 1);
4460 }
4461
4462 // Now we're done, except for having to encode the S output in such a way that
4463 // it gives both the number of set bits and the length of the repeated
4464 // segment. The s field is encoded like this:
4465 //
4466 // imms size S
4467 // ssssss 64 UInt(ssssss)
4468 // 0sssss 32 UInt(sssss)
4469 // 10ssss 16 UInt(ssss)
4470 // 110sss 8 UInt(sss)
4471 // 1110ss 4 UInt(ss)
4472 // 11110s 2 UInt(s)
4473 //
4474 // So we 'or' (-d * 2) with our computed s to form imms.
4475 *n = out_n;
4476 *imm_s = ((-d * 2) | (s - 1)) & 0x3F;
4477 *imm_r = r;
4478
4479 return true;
4480}
4481
4482bool Assembler::IsImmFP32(uint32_t bits) {
4483 // Valid values will have the form:
4484 // aBbb.bbbc.defg.h000.0000.0000.0000.0000
4485 // bits[19..0] are cleared.
4486 if ((bits & 0x7FFFF) != 0) {
4487 return false;
4488 }
4489
4490 // bits[29..25] are all set or all cleared.
4491 uint32_t b_pattern = (bits >> 16) & 0x3E00;
4492 if (b_pattern != 0 && b_pattern != 0x3E00) {
4493 return false;
4494 }
4495
4496 // bit[30] and bit[29] are opposite.
4497 if (((bits ^ (bits << 1)) & 0x40000000) == 0) {
4498 return false;
4499 }
4500
4501 return true;
4502}
4503
4504bool Assembler::IsImmFP64(uint64_t bits) {
4505 // Valid values will have the form:
4506 // aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
4507 // 0000.0000.0000.0000.0000.0000.0000.0000
4508 // bits[47..0] are cleared.
4509 if ((bits & 0xFFFFFFFFFFFFL) != 0) {
4510 return false;
4511 }
4512
4513 // bits[61..54] are all set or all cleared.
4514 uint32_t b_pattern = (bits >> 48) & 0x3FC0;
4515 if (b_pattern != 0 && b_pattern != 0x3FC0) {
4516 return false;
4517 }
4518
4519 // bit[62] and bit[61] are opposite.
4520 if (((bits ^ (bits << 1)) & 0x4000000000000000L) == 0) {
4521 return false;
4522 }
4523
4524 return true;
4525}
4526
4527void Assembler::GrowBuffer() {
4528 // Compute new buffer size.
4529 int old_size = buffer_->size();
4530 int new_size = std::min(2 * old_size, old_size + 1 * MB);
4531
4532 // Some internal data structures overflow for very large buffers,
4533 // they must ensure that kMaximalBufferSize is not too large.
4534 if (new_size > kMaximalBufferSize) {
4535 V8::FatalProcessOutOfMemory(nullptr, "Assembler::GrowBuffer");
4536 }
4537
4538 // Set up new buffer.
4539 std::unique_ptr<AssemblerBuffer> new_buffer = buffer_->Grow(new_size);
4540 DCHECK_EQ(new_size, new_buffer->size());
4541 uint8_t* new_start = new_buffer->start();
4542
4543 // Copy the data.
4544 intptr_t pc_delta = new_start - buffer_start_;
4545 intptr_t rc_delta = (new_start + new_size) - (buffer_start_ + old_size);
4546 size_t reloc_size = (buffer_start_ + old_size) - reloc_info_writer.pos();
4547 memmove(new_start, buffer_start_, pc_offset());
4548 memmove(reloc_info_writer.pos() + rc_delta, reloc_info_writer.pos(),
4549 reloc_size);
4550
4551 // Switch buffers.
4552 buffer_ = std::move(new_buffer);
4553 buffer_start_ = new_start;
4554 pc_ += pc_delta;
4555 reloc_info_writer.Reposition(reloc_info_writer.pos() + rc_delta,
4556 reloc_info_writer.last_pc() + pc_delta);
4557
4558 // None of our relocation types are pc relative pointing outside the code
4559 // buffer nor pc absolute pointing inside the code buffer, so there is no need
4560 // to relocate any emitted relocation entries.
4561
4562 // Relocate internal references.
4563 for (auto pos : internal_reference_positions_) {
4564 Address address = reinterpret_cast<intptr_t>(buffer_start_) + pos;
4565 intptr_t internal_ref = ReadUnalignedValue<intptr_t>(address);
4566 internal_ref += pc_delta;
4567 WriteUnalignedValue<intptr_t>(address, internal_ref);
4568 }
4569
4570 // Pending relocation entries are also relative, no need to relocate.
4571}
4572
4573void Assembler::RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data,
4574 ConstantPoolMode constant_pool_mode) {
4575 if (rmode == RelocInfo::INTERNAL_REFERENCE ||
4576 rmode == RelocInfo::CONST_POOL || rmode == RelocInfo::VENEER_POOL ||
4577 rmode == RelocInfo::DEOPT_SCRIPT_OFFSET ||
4578 rmode == RelocInfo::DEOPT_INLINING_ID ||
4579 rmode == RelocInfo::DEOPT_REASON || rmode == RelocInfo::DEOPT_ID ||
4580 rmode == RelocInfo::DEOPT_NODE_ID) {
4581 // Adjust code for new modes.
4582 DCHECK(RelocInfo::IsDeoptReason(rmode) || RelocInfo::IsDeoptId(rmode) ||
4583 RelocInfo::IsDeoptNodeId(rmode) ||
4584 RelocInfo::IsDeoptPosition(rmode) ||
4585 RelocInfo::IsInternalReference(rmode) ||
4586 RelocInfo::IsConstPool(rmode) || RelocInfo::IsVeneerPool(rmode));
4587 // These modes do not need an entry in the constant pool.
4588 } else if (constant_pool_mode == NEEDS_POOL_ENTRY) {
4589 if (RelocInfo::IsEmbeddedObjectMode(rmode)) {
4590 Handle<HeapObject> handle(reinterpret_cast<Address*>(data));
4591 data = AddEmbeddedObject(handle);
4592 }
4593 if (rmode == RelocInfo::COMPRESSED_EMBEDDED_OBJECT) {
4594 if (constpool_.RecordEntry(static_cast<uint32_t>(data), rmode) ==
4595 RelocInfoStatus::kMustOmitForDuplicate) {
4596 return;
4597 }
4598 } else {
4599 if (constpool_.RecordEntry(static_cast<uint64_t>(data), rmode) ==
4600 RelocInfoStatus::kMustOmitForDuplicate) {
4601 return;
4602 }
4603 }
4604 }
4605 // For modes that cannot use the constant pool, a different sequence of
4606 // instructions will be emitted by this function's caller.
4607
4608 if (!ShouldRecordRelocInfo(rmode)) return;
4609
4610 // Callers should ensure that constant pool emission is blocked until the
4611 // instruction the reloc info is associated with has been emitted.
4612 DCHECK(constpool_.IsBlocked());
4613
4614 // We do not try to reuse pool constants.
4615 RelocInfo rinfo(reinterpret_cast<Address>(pc_), rmode, data);
4616 DCHECK_GE(buffer_space(), kMaxRelocSize); // too late to grow buffer here
4617 reloc_info_writer.Write(&rinfo);
4618}
4619
4620void Assembler::near_jump(int offset, RelocInfo::Mode rmode) {
4621 BlockPoolsScope no_pool_before_b_instr(this);
4622 if (!RelocInfo::IsNoInfo(rmode))
4623 RecordRelocInfo(rmode, offset, NO_POOL_ENTRY);
4624 b(offset);
4625}
4626
4627void Assembler::near_call(int offset, RelocInfo::Mode rmode) {
4628 BlockPoolsScope no_pool_before_bl_instr(this);
4629 if (!RelocInfo::IsNoInfo(rmode))
4630 RecordRelocInfo(rmode, offset, NO_POOL_ENTRY);
4631 bl(offset);
4632}
4633
4634void Assembler::near_call(HeapNumberRequest request) {
4635 BlockPoolsScope no_pool_before_bl_instr(this);
4636 RequestHeapNumber(request);
4637 EmbeddedObjectIndex index = AddEmbeddedObject(Handle<Code>());
4638 RecordRelocInfo(RelocInfo::CODE_TARGET, index, NO_POOL_ENTRY);
4639 DCHECK(is_int32(index));
4640 bl(static_cast<int>(index));
4641}
4642
4643// Constant Pool
4644
4645void ConstantPool::EmitPrologue(Alignment require_alignment) {
4646 // Recorded constant pool size is expressed in number of 32-bits words,
4647 // and includes prologue and alignment, but not the jump around the pool
4648 // and the size of the marker itself.
4649 const int marker_size = 1;
4650 int word_count =
4651 ComputeSize(Jump::kOmitted, require_alignment) / kInt32Size - marker_size;
4652 assm_->Emit(LDR_x_lit | Assembler::ImmLLiteral(word_count) |
4653 Assembler::Rt(xzr));
4654 assm_->EmitPoolGuard();
4655}
4656
4657int ConstantPool::PrologueSize(Jump require_jump) const {
4658 // Prologue is:
4659 // b over ;; if require_jump
4660 // ldr xzr, #pool_size
4661 // blr xzr
4662 int prologue_size = require_jump == Jump::kRequired ? kInstrSize : 0;
4663 prologue_size += 2 * kInstrSize;
4664 return prologue_size;
4665}
4666
4667void ConstantPool::SetLoadOffsetToConstPoolEntry(int load_offset,
4668 Instruction* entry_offset,
4669 const ConstantPoolKey& key) {
4670 Instruction* instr = assm_->InstructionAt(load_offset);
4671 // Instruction to patch must be 'ldr rd, [pc, #offset]' with offset == 0.
4672 DCHECK(instr->IsLdrLiteral() && instr->ImmLLiteral() == 0);
4673 instr->SetImmPCOffsetTarget(assm_->zone(), assm_->options(), entry_offset);
4674}
4675
4676void ConstantPool::Check(Emission force_emit, Jump require_jump,
4677 size_t margin) {
4678 // Some short sequence of instruction must not be broken up by constant pool
4679 // emission, such sequences are protected by a ConstPool::BlockScope.
4680 if (IsBlocked()) {
4681 // Something is wrong if emission is forced and blocked at the same time.
4682 DCHECK_EQ(force_emit, Emission::kIfNeeded);
4683 return;
4684 }
4685
4686 // We emit a constant pool only if :
4687 // * it is not empty
4688 // * emission is forced by parameter force_emit (e.g. at function end).
4689 // * emission is mandatory or opportune according to {ShouldEmitNow}.
4690 if (!IsEmpty() && (force_emit == Emission::kForced ||
4691 ShouldEmitNow(require_jump, margin))) {
4692 // Emit veneers for branches that would go out of range during emission of
4693 // the constant pool.
4694 int worst_case_size = ComputeSize(Jump::kRequired, Alignment::kRequired);
4695 assm_->CheckVeneerPool(false, require_jump == Jump::kRequired,
4696 assm_->kVeneerDistanceMargin + worst_case_size +
4697 static_cast<int>(margin));
4698
4699 // Check that the code buffer is large enough before emitting the constant
4700 // pool (this includes the gap to the relocation information).
4701 int needed_space = worst_case_size + assm_->kGap;
4702 while (assm_->buffer_space() <= needed_space) {
4703 assm_->GrowBuffer();
4704 }
4705
4706 EmitAndClear(require_jump);
4707 }
4708 // Since a constant pool is (now) empty, move the check offset forward by
4709 // the standard interval.
4710 SetNextCheckIn(ConstantPool::kCheckInterval);
4711}
4712
4713// Pool entries are accessed with pc relative load therefore this cannot be more
4714// than 1 * MB. Since constant pool emission checks are interval based, and we
4715// want to keep entries close to the code, we try to emit every 64KB.
4716const size_t ConstantPool::kMaxDistToPool32 = 1 * MB;
4717const size_t ConstantPool::kMaxDistToPool64 = 1 * MB;
4718const size_t ConstantPool::kCheckInterval = 128 * kInstrSize;
4719const size_t ConstantPool::kApproxDistToPool32 = 64 * KB;
4720const size_t ConstantPool::kApproxDistToPool64 = kApproxDistToPool32;
4721
4722const size_t ConstantPool::kOpportunityDistToPool32 = 64 * KB;
4723const size_t ConstantPool::kOpportunityDistToPool64 = 64 * KB;
4724const size_t ConstantPool::kApproxMaxEntryCount = 512;
4725
4726intptr_t Assembler::MaxPCOffsetAfterVeneerPoolIfEmittedNow(size_t margin) {
4727 // Account for the branch and guard around the veneers.
4728 static constexpr int kBranchSizeInBytes = kInstrSize;
4729 static constexpr int kGuardSizeInBytes = kInstrSize;
4730 const size_t max_veneer_size_in_bytes =
4731 unresolved_branches_.size() * kVeneerCodeSize;
4732 return static_cast<intptr_t>(pc_offset() + kBranchSizeInBytes +
4733 kGuardSizeInBytes + max_veneer_size_in_bytes +
4734 margin);
4735}
4736
4737void Assembler::RecordVeneerPool(int location_offset, int size) {
4738 Assembler::BlockPoolsScope block_pools(this, PoolEmissionCheck::kSkip);
4739 RelocInfo rinfo(reinterpret_cast<Address>(buffer_start_) + location_offset,
4740 RelocInfo::VENEER_POOL, static_cast<intptr_t>(size));
4741 reloc_info_writer.Write(&rinfo);
4742}
4743
4744void Assembler::EmitVeneers(bool force_emit, bool need_protection,
4745 size_t margin) {
4746 ASM_CODE_COMMENT(this);
4747 BlockPoolsScope scope(this, PoolEmissionCheck::kSkip);
4748
4749 // The exact size of the veneer pool must be recorded (see the comment at the
4750 // declaration site of RecordConstPool()), but computing the number of
4751 // veneers that will be generated is not obvious. So instead we remember the
4752 // current position and will record the size after the pool has been
4753 // generated.
4754 Label size_check;
4755 bind(&size_check);
4756 int veneer_pool_relocinfo_loc = pc_offset();
4757
4758 Label end;
4759 if (need_protection) {
4760 b(&end);
4761 }
4762
4763 EmitVeneersGuard();
4764
4765 // We only emit veneers if needed (unless emission is forced), i.e. when the
4766 // max-reachable-pc of the branch has been exhausted by the current codegen
4767 // state. Specifically, we emit when the max-reachable-pc of the branch <= the
4768 // max-pc-after-veneers (over-approximated).
4769 const intptr_t max_pc_after_veneers =
4770 MaxPCOffsetAfterVeneerPoolIfEmittedNow(margin);
4771
4772 {
4773 // The `unresolved_branches_` map is sorted by max-reachable-pc in ascending
4774 // order.
4775 auto it = unresolved_branches_.begin();
4776 while (it != unresolved_branches_.end()) {
4777 const int max_reachable_pc = it->first & ~1;
4778 if (!force_emit && max_reachable_pc > max_pc_after_veneers) break;
4779
4780 // Found a task. We'll emit a veneer for this.
4781
4782 // Calculate the branch location from the maximum reachable PC. Only
4783 // B.cond, CB[N]Z and TB[N]Z are veneered, and the first two branch types
4784 // have the same range. The LSB (branch type tag bit) is set for TB[N]Z,
4785 // clear otherwise.
4786 int pc_offset = it->first;
4787 if (pc_offset & 1) {
4788 pc_offset -= (Instruction::ImmBranchRange(TestBranchType) + 1);
4789 } else {
4790 static_assert(Instruction::ImmBranchRange(CondBranchType) ==
4791 Instruction::ImmBranchRange(CompareBranchType));
4792 pc_offset -= Instruction::ImmBranchRange(CondBranchType);
4793 }
4794#ifdef DEBUG
4795 Label veneer_size_check;
4796 bind(&veneer_size_check);
4797#endif
4798 Label* label = it->second;
4799 Instruction* veneer = reinterpret_cast<Instruction*>(pc_);
4800 Instruction* branch = InstructionAt(pc_offset);
4801 RemoveBranchFromLabelLinkChain(branch, label, veneer);
4802 branch->SetImmPCOffsetTarget(zone(), options(), veneer);
4803 b(label); // This may end up pointing at yet another veneer later on.
4804 DCHECK_EQ(SizeOfCodeGeneratedSince(&veneer_size_check),
4805 static_cast<uint64_t>(kVeneerCodeSize));
4806 it = unresolved_branches_.erase(it);
4807 }
4808 }
4809
4810 // Update next_veneer_pool_check_ (tightly coupled with unresolved_branches_).
4811 // This must happen after the calls to {RemoveBranchFromLabelLinkChain},
4812 // because that function can resolve additional branches.
4813 if (unresolved_branches_.empty()) {
4814 next_veneer_pool_check_ = kMaxInt;
4815 } else {
4816 next_veneer_pool_check_ =
4817 unresolved_branches_first_limit() - kVeneerDistanceCheckMargin;
4818 }
4819
4820 // Record the veneer pool size.
4821 int pool_size = static_cast<int>(SizeOfCodeGeneratedSince(&size_check));
4822 RecordVeneerPool(veneer_pool_relocinfo_loc, pool_size);
4823
4824 bind(&end);
4825}
4826
4827void Assembler::CheckVeneerPool(bool force_emit, bool require_jump,
4828 size_t margin) {
4829 // There is nothing to do if there are no pending veneer pool entries.
4830 if (unresolved_branches_.empty()) {
4831 DCHECK_EQ(next_veneer_pool_check_, kMaxInt);
4832 return;
4833 }
4834
4835 DCHECK(pc_offset() < unresolved_branches_first_limit());
4836
4837 // Some short sequence of instruction mustn't be broken up by veneer pool
4838 // emission, such sequences are protected by calls to BlockVeneerPoolFor and
4839 // BlockVeneerPoolScope.
4840 if (is_veneer_pool_blocked()) {
4841 DCHECK(!force_emit);
4842 return;
4843 }
4844
4845 if (!require_jump) {
4846 // Prefer emitting veneers protected by an existing instruction.
4847 margin *= kVeneerNoProtectionFactor;
4848 }
4849 if (force_emit || ShouldEmitVeneers(margin)) {
4850 EmitVeneers(force_emit, require_jump, margin);
4851 } else {
4852 next_veneer_pool_check_ =
4853 unresolved_branches_first_limit() - kVeneerDistanceCheckMargin;
4854 }
4855}
4856
4857int Assembler::buffer_space() const {
4858 return static_cast<int>(reloc_info_writer.pos() - pc_);
4859}
4860
4861void Assembler::RecordConstPool(int size) {
4862 // We only need this for debugger support, to correctly compute offsets in the
4863 // code.
4864 Assembler::BlockPoolsScope block_pools(this);
4865 RecordRelocInfo(RelocInfo::CONST_POOL, static_cast<intptr_t>(size));
4866}
4867
4868void PatchingAssembler::PatchAdrFar(int64_t target_offset) {
4869 // The code at the current instruction should be:
4870 // adr rd, 0
4871 // nop (adr_far)
4872 // nop (adr_far)
4873 // movz scratch, 0
4874
4875 // Verify the expected code.
4876 Instruction* expected_adr = InstructionAt(0);
4877 CHECK(expected_adr->IsAdr() && (expected_adr->ImmPCRel() == 0));
4878 int rd_code = expected_adr->Rd();
4879 for (int i = 0; i < kAdrFarPatchableNNops; ++i) {
4880 CHECK(InstructionAt((i + 1) * kInstrSize)->IsNop(ADR_FAR_NOP));
4881 }
4882 Instruction* expected_movz =
4883 InstructionAt((kAdrFarPatchableNInstrs - 1) * kInstrSize);
4884 CHECK(expected_movz->IsMovz() && (expected_movz->ImmMoveWide() == 0) &&
4885 (expected_movz->ShiftMoveWide() == 0));
4886 int scratch_code = expected_movz->Rd();
4887
4888 // Patch to load the correct address.
4889 Register rd = Register::XRegFromCode(rd_code);
4890 Register scratch = Register::XRegFromCode(scratch_code);
4891 // Addresses are only 48 bits.
4892 adr(rd, target_offset & 0xFFFF);
4893 movz(scratch, (target_offset >> 16) & 0xFFFF, 16);
4894 movk(scratch, (target_offset >> 32) & 0xFFFF, 32);
4895 DCHECK_EQ(target_offset >> 48, 0);
4896 add(rd, rd, scratch);
4897}
4898
4899void PatchingAssembler::PatchSubSp(uint32_t immediate) {
4900 // The code at the current instruction should be:
4901 // sub sp, sp, #0
4902
4903 // Verify the expected code.
4904 Instruction* expected_adr = InstructionAt(0);
4905 CHECK(expected_adr->IsAddSubImmediate());
4906 sub(sp, sp, immediate);
4907}
4908
4909#undef NEON_3DIFF_LONG_LIST
4910#undef NEON_3DIFF_HN_LIST
4911#undef NEON_ACROSSLANES_LIST
4912#undef NEON_FP2REGMISC_FCVT_LIST
4913#undef NEON_FP2REGMISC_LIST
4914#undef NEON_3SAME_LIST
4915#undef NEON_FP3SAME_LIST_V2
4916#undef NEON_BYELEMENT_LIST
4917#undef NEON_FPBYELEMENT_LIST
4918#undef NEON_BYELEMENT_LONG_LIST
4919
4920} // namespace internal
4921} // namespace v8
4922
4923#endif // V8_TARGET_ARCH_ARM64
#define BREAK
SourcePosition pos
static Address target_pointer_address_at(Address pc)
Assembler(const AssemblerOptions &, std::unique_ptr< AssemblerBuffer >={})
void Remove(const CPURegList &other)
bool IncludesAliasOf(const CPURegister &other1, const CPURegister &other2=NoCPUReg, const CPURegister &other3=NoCPUReg, const CPURegister &other4=NoCPUReg) const
static CPURegList GetCallerSavedV(int size=kDRegSizeInBits)
static CPURegList GetCallerSaved(int size=kXRegSizeInBits)
static CPURegList GetCalleeSavedV(int size=kDRegSizeInBits)
CPURegister PopHighestIndex()
CPURegister::RegisterType type_
CPURegister PopLowestIndex()
static CPURegList GetCalleeSaved(int size=kXRegSizeInBits)
void Combine(const CPURegList &other)
CPURegList(CPURegister reg0, CPURegisters... regs)
static constexpr CPURegister Create(int code, int size, RegisterType type)
static bool supports_wasm_simd_128_
static unsigned supported_
static void ProbeImpl(bool cross_compile)
RelocInfo::Mode rmode() const
RelocInfo::Mode rmode()
bool NeedsRelocation(const Assembler *assembler) const
static bool IsOnlyForSerializer(Mode mode)
Definition reloc-info.h:259
static const int kApplyMask
Definition reloc-info.h:369
uint32_t wasm_call_tag() const
static constexpr int ModeMask(Mode mode)
Definition reloc-info.h:272
static constexpr bool IsNoInfo(Mode mode)
Definition reloc-info.h:257
Zone * zone_
base::OwnedVector< uint8_t > buffer_
Definition assembler.cc:111
#define ASM_CODE_COMMENT(asm)
Definition assembler.h:617
#define COMPRESS_POINTERS_BOOL
Definition globals.h:99
int start
int end
Label label
too high values may cause the compiler to set high thresholds for inlining to as much as possible avoid inlined allocation of objects that cannot escape trace load stores from virtual maglev objects use TurboFan fast string builder analyze liveness of environment slots and zap dead values trace TurboFan load elimination emit data about basic block usage in builtins to this enable builtin reordering when run mksnapshot flag for emit warnings when applying builtin profile data verify register allocation in TurboFan randomly schedule instructions to stress dependency tracking enable store store elimination in TurboFan rewrite far to near simulate GC compiler thread race related to allow float parameters to be passed in simulator mode JS Wasm Run additional turbo_optimize_inlined_js_wasm_wrappers enable experimental feedback collection in generic lowering enable Turboshaft s WasmLoadElimination enable Turboshaft s low level load elimination for JS enable Turboshaft s escape analysis for string concatenation use enable Turbolev features that we want to ship in the not too far future trace individual Turboshaft reduction steps trace intermediate Turboshaft reduction steps invocation count threshold for early optimization Enables optimizations which favor memory size over execution speed Enables sampling allocation profiler with X as a sample interval min size of a semi the new space consists of two semi spaces max size of the Collect garbage after Collect garbage after keeps maps alive for< n > old space garbage collections print one detailed trace line in allocation gc speed threshold for starting incremental marking via a task in percent of available threshold for starting incremental marking immediately in percent of available Use a single schedule for determining a marking schedule between JS and C objects schedules the minor GC task with kUserVisible priority max worker number of concurrent for NumberOfWorkerThreads start background threads that allocate memory concurrent_array_buffer_sweeping use parallel threads to clear weak refs in the atomic pause trace progress of the incremental marking trace object counts and memory usage * MB
refactor address components for immediate indexing make OptimizeMaglevOnNextCall optimize to turbofan instead of maglev filter for tracing turbofan compilation trace turbo cfg trace TurboFan s graph trimmer trace TurboFan s control equivalence trace TurboFan s register allocator trace stack load store counters for optimized code in run fuzzing &&concurrent_recompilation trace_turbo trace_turbo_scheduled trace_turbo_stack_accesses verify TurboFan machine graph of code stubs enable FixedArray bounds checks print TurboFan statistics of wasm compilations maximum cumulative size of bytecode considered for inlining scale factor of bytecode size used to calculate the inlining budget * KB
int32_t offset
std::optional< TNode< JSArray > > a
DirectHandle< JSReceiver > options
Instruction * instr
ZoneVector< RpoNumber > & result
int pc_offset
EmitFn fn
uint32_t const mask
int s
Definition mul-fft.cc:297
int m
Definition mul-fft.cc:294
int n
Definition mul-fft.cc:296
int r
Definition mul-fft.cc:298
STL namespace.
int int32_t
Definition unicode.cc:40
constexpr unsigned CountLeadingZeros(T value)
Definition bits.h:100
constexpr unsigned CountTrailingZeros(T value)
Definition bits.h:144
uintptr_t Address
Definition memory.h:13
V8_EXPORT_PRIVATE bool AreConsecutive(const CPURegister &reg1, const CPURegister &reg2, const CPURegister &reg3=NoReg, const CPURegister &reg4=NoReg)
V8_INLINE IndirectHandle< T > handle(Tagged< T > object, Isolate *isolate)
Definition handles-inl.h:72
constexpr NEONFormatField NEON_Q
V8_EXPORT_PRIVATE base::Vector< Flag > Flags()
Definition flags.cc:300
constexpr NEONLoadStoreMultiStructOp NEON_LD3
constexpr NEONFormatField NEON_16B
V8_EXPORT_PRIVATE int CountSetBits(uint64_t value, int width)
uint32_t NEONLoadStoreMultiStructPostIndexOp
constexpr NEONLoadStoreMultiStructOp NEON_ST3
constexpr NEONLoadStoreSingleOp NEONLoadStoreSingle_h
constexpr NEONLoadStoreSingleStructOp NEONLoadStoreSingleStructLoad4
constexpr ConditionalCompareRegisterOp ConditionalCompareRegisterFixed
constexpr int kSFOffset
constexpr NEONLoadStoreMultiStructOp NEON_ST1_4v
constexpr NEONLoadStoreSingleOp NEONLoadStoreSingle_b
constexpr int kBitsPerByte
Definition globals.h:682
constexpr FPDataProcessing1SourceOp FCVT_hs
V8_EXPORT_PRIVATE int LaneCountFromFormat(VectorFormat vform)
constexpr NEONLoadStoreSingleStructOp NEONLoadStoreSingleStructLoad1
constexpr NEONLoadStoreSingleStructOp NEONLoadStoreSingleStructLoad3
uint32_t FPIntegerConvertOp
constexpr NEONLoadStoreMultiStructOp NEON_LD2
bool DoubleToSmiInteger(double value, int *smi_int_value)
constexpr LoadStoreAcquireReleaseOp STLXR_x
constexpr int64_t kWRegMask
constexpr NEONFormatField NEON_sz
constexpr LoadStoreAcquireReleaseOp LDAR_x
constexpr uint8_t kLoadLiteralScaleLog2
std::variant< Zone *, AccountingAllocator * > MaybeAssemblerZone
Definition assembler.h:262
constexpr NEONLoadStoreMultiStructOp NEON_ST1_3v
constexpr NEONLoadStoreMultiStructOp NEON_LD1_3v
constexpr ShiftOp LSR
constexpr LoadStoreAcquireReleaseOp LDAR_w
constexpr FPDataProcessing1SourceOp FCVT_dh
constexpr NEONLoadStoreSingleStructOp NEON_LD1R
constexpr NEONLoadStoreSingleStructOp NEONLoadStoreSingleStructStore1
constexpr int kWRegSizeInBits
bool AreEven(const CPURegister &reg1, const CPURegister &reg2, const CPURegister &reg3=NoReg, const CPURegister &reg4=NoReg, const CPURegister &reg5=NoReg, const CPURegister &reg6=NoReg, const CPURegister &reg7=NoReg, const CPURegister &reg8=NoReg)
constexpr ShiftOp ASR
const unsigned kPrintfLength
constexpr ShiftOp LSL
constexpr NEONLoadStoreMultiStructOp NEON_ST2
constexpr NEONLoadStoreSingleStructPostIndexOp NEONLoadStoreSingleStructPostIndex
constexpr FPDataProcessing2SourceOp FNMUL_s
constexpr NEON2RegMiscOp NEON2RegMiscHPFixed
constexpr int N
constexpr NEONLoadStoreMultiStructOp NEON_LD1_2v
constexpr FPDataProcessing1SourceOp FCVT_sh
constexpr NEONFormatField NEON_8H
constexpr NEONLoadStoreSingleStructOp NEONLoadStoreSingleStructStore4
constexpr FPDataProcessing1SourceOp FCVT_hd
constexpr LoadStoreAcquireReleaseOp STLR_x
constexpr uint8_t kInstrSizeLog2
constexpr NEONScalarFormatField NEONScalar
unsigned CalcLSDataSizeLog2(LoadStoreOp op)
constexpr int kBitfieldNOffset
constexpr NEONLoadStoreMultiStructOp NEON_LD4
constexpr int S
constexpr NEONLoadStoreMultiStructPostIndexOp NEONLoadStoreMultiStructPostIndex
constexpr NEONLoadStoreSingleOp NEONLoadStoreSingle_s
constexpr LogicalOp ANDS
constexpr LogicalShiftedOp LogicalShiftedFixed
constexpr NEONLoadStoreSingleStructOp NEONLoadStoreSingleStructStore3
Condition NegateCondition(Condition cond)
constexpr bool is_intn(int64_t x, unsigned n)
Definition utils.h:568
constexpr int kInt32Size
Definition globals.h:401
uint32_t LoadStoreAcquireReleaseOp
V8_EXPORT_PRIVATE bool AreAliased(const CPURegister &reg1, const CPURegister &reg2, const CPURegister &reg3=NoReg, const CPURegister &reg4=NoReg, const CPURegister &reg5=NoReg, const CPURegister &reg6=NoReg, const CPURegister &reg7=NoReg, const CPURegister &reg8=NoReg)
constexpr NEONFormatField NEON_4S
bool AreSameFormat(const Register &reg1, const Register &reg2, const Register &reg3=NoReg, const Register &reg4=NoReg)
constexpr NEONLoadStoreSingleStructOp NEON_LD3R
constexpr LoadStoreAcquireReleaseOp LDAXR_w
constexpr NEONLoadStoreSingleOp NEONLoadStoreSingle_d
constexpr FPDataProcessing2SourceOp FNMUL_d
V8_EXPORT_PRIVATE FlagValues v8_flags
constexpr NEON3SameOp NEON3SameHPMask
constexpr NEONLoadStoreSingleStructOp NEONLoadStoreSingleStructStore2
constexpr FPIntegerConvertOp FMOV_dx
constexpr CPURegister NoCPUReg
constexpr LoadStoreAcquireReleaseOp STLR_w
constexpr NEONModifiedImmediateOp NEONModifiedImmediateOpBit
constexpr ShiftOp ROR
constexpr NEONLoadStoreMultiStructOp NEON_LD1_4v
constexpr LoadStoreAcquireReleaseOp STLXR_w
constexpr NEONFormatField NEON_2D
constexpr int kXRegSizeInBits
constexpr FPDataProcessing1SourceOp FCVT_sd
constexpr NEONLoadStoreMultiStructOp NEON_ST4
constexpr LoadStorePairPostIndexOp LoadStorePairPostIndexFixed
constexpr uint8_t kInstrSize
constexpr int kMaxInt
Definition globals.h:374
constexpr NEONLoadStoreMultiStructOp NEON_ST1_2v
constexpr FPIntegerConvertOp FMOV_ws
static int CountLeadingZeros(uint64_t value, int width)
Definition utils-arm64.h:34
constexpr LoadStoreAcquireReleaseOp LDAXR_x
V8_EXPORT_PRIVATE bool AreSameSizeAndType(const CPURegister &reg1, const CPURegister &reg2=NoCPUReg, const CPURegister &reg3=NoCPUReg, const CPURegister &reg4=NoCPUReg, const CPURegister &reg5=NoCPUReg, const CPURegister &reg6=NoCPUReg, const CPURegister &reg7=NoCPUReg, const CPURegister &reg8=NoCPUReg)
constexpr int kRegListSizeInBits
constexpr ConditionalCompareImmediateOp ConditionalCompareImmediateFixed
constexpr NEONLoadStoreSingleStructOp NEONLoadStoreSingleStructLoad2
constexpr NEONLoadStoreSingleStructOp NEON_LD4R
constexpr LoadStorePairOffsetOp LoadStorePairOffsetFixed
uint32_t FPDataProcessing1SourceOp
constexpr NEONLoadStoreMultiStructOp NEON_LD1_1v
constexpr NEONModifiedImmediateOp NEONModifiedImmediate_MOVI
constexpr LoadStorePairPreIndexOp LoadStorePairPreIndexFixed
uint32_t unsigned_bitextract_32(int msb, int lsb, uint32_t x)
Definition utils.h:555
constexpr FPIntegerConvertOp FMOV_sw
constexpr FPIntegerConvertOp FMOV_xd
uint32_t NEONFormatField
constexpr FPDataProcessing1SourceOp FCVT_ds
constexpr NEONLoadStoreMultiStructOp NEON_ST1_1v
constexpr NEONLoadStoreSingleStructOp NEON_LD2R
unsigned CalcLSPairDataSize(LoadStorePairOp op)
const unsigned kDebugMessageOffset
constexpr Register padreg
#define ror(value, bits)
Definition sha-256.cc:30
#define UNREACHABLE()
Definition logging.h:67
#define FATAL(...)
Definition logging.h:47
#define DCHECK_LE(v1, v2)
Definition logging.h:490
#define CHECK(condition)
Definition logging.h:124
#define DCHECK_NOT_NULL(val)
Definition logging.h:492
#define DCHECK_IMPLIES(v1, v2)
Definition logging.h:493
#define DCHECK_NE(v1, v2)
Definition logging.h:486
#define DCHECK_GE(v1, v2)
Definition logging.h:488
#define DCHECK(condition)
Definition logging.h:482
#define DCHECK_LT(v1, v2)
Definition logging.h:489
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
#define DCHECK_GT(v1, v2)
Definition logging.h:487
#define USE(...)
Definition macros.h:293
constexpr T RoundUp(T x, intptr_t m)
Definition macros.h:387
constexpr bool IsAligned(T value, U alignment)
Definition macros.h:403
#define arraysize(array)
Definition macros.h:67
std::unique_ptr< ValueMirror > key