v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
move-optimizer.cc
Go to the documentation of this file.
1// Copyright 2014 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
6
8
9namespace v8 {
10namespace internal {
11namespace compiler {
12
13namespace {
14
15struct MoveKey {
16 InstructionOperand source;
17 InstructionOperand destination;
18 bool operator<(const MoveKey& other) const {
19 if (this->source != other.source) {
20 return this->source.Compare(other.source);
21 }
22 return this->destination.Compare(other.destination);
23 }
24 bool operator==(const MoveKey& other) const {
25 return std::tie(this->source, this->destination) ==
26 std::tie(other.source, other.destination);
27 }
28};
29
30class OperandSet {
31 public:
32 explicit OperandSet(ZoneVector<InstructionOperand>* buffer)
33 : set_(buffer), fp_reps_(0) {
34 buffer->clear();
35 }
36
37 void InsertOp(const InstructionOperand& op) {
38 set_->push_back(op);
39
40 if (kFPAliasing == AliasingKind::kCombine && op.IsFPRegister())
41 fp_reps_ |= RepresentationBit(LocationOperand::cast(op).representation());
42 }
43
44 bool Contains(const InstructionOperand& op) const {
45 for (const InstructionOperand& elem : *set_) {
46 if (elem.EqualsCanonicalized(op)) return true;
47 }
48 return false;
49 }
50
51 bool ContainsOpOrAlias(const InstructionOperand& op) const {
52 if (Contains(op)) return true;
53
54 if (kFPAliasing == AliasingKind::kCombine && op.IsFPRegister()) {
55 // Platforms where FP registers have complex aliasing need extra checks.
56 const LocationOperand& loc = LocationOperand::cast(op);
57 MachineRepresentation rep = loc.representation();
58 // If haven't encountered mixed rep FP registers, skip the extra checks.
59 if (!HasMixedFPReps(fp_reps_ | RepresentationBit(rep))) return false;
60
61 // Check register against aliasing registers of other FP representations.
62 MachineRepresentation other_rep1, other_rep2;
63 switch (rep) {
67 break;
71 break;
75 break;
76 default:
78 }
79 const RegisterConfiguration* config = RegisterConfiguration::Default();
80 int base = -1;
81 int aliases =
82 config->GetAliases(rep, loc.register_code(), other_rep1, &base);
83 DCHECK(aliases > 0 || (aliases == 0 && base == -1));
84 while (aliases--) {
85 if (Contains(AllocatedOperand(LocationOperand::REGISTER, other_rep1,
86 base + aliases))) {
87 return true;
88 }
89 }
90 aliases = config->GetAliases(rep, loc.register_code(), other_rep2, &base);
91 DCHECK(aliases > 0 || (aliases == 0 && base == -1));
92 while (aliases--) {
93 if (Contains(AllocatedOperand(LocationOperand::REGISTER, other_rep2,
94 base + aliases))) {
95 return true;
96 }
97 }
98 }
99 return false;
100 }
101
102 private:
103 static bool HasMixedFPReps(int reps) {
104 return reps && !base::bits::IsPowerOfTwo(reps);
105 }
106
107 ZoneVector<InstructionOperand>* set_;
109};
110
111int FindFirstNonEmptySlot(const Instruction* instr) {
113 for (; i <= Instruction::LAST_GAP_POSITION; i++) {
114 ParallelMove* moves = instr->parallel_moves()[i];
115 if (moves == nullptr) continue;
116 for (MoveOperands* move : *moves) {
117 if (!move->IsRedundant()) return i;
118 move->Eliminate();
119 }
120 moves->clear(); // Clear this redundant move.
121 }
122 return i;
123}
124
125} // namespace
126
128 : local_zone_(local_zone),
129 code_(code),
130 local_vector_(local_zone),
131 operand_buffer1(local_zone),
132 operand_buffer2(local_zone) {}
133
135 for (Instruction* instruction : code()->instructions()) {
136 CompressGaps(instruction);
137 }
138 for (InstructionBlock* block : code()->instruction_blocks()) {
139 CompressBlock(block);
140 }
141 for (InstructionBlock* block : code()->instruction_blocks()) {
142 if (block->PredecessorCount() <= 1) continue;
143 if (!block->IsDeferred()) {
144 bool has_only_deferred = true;
145 for (RpoNumber& pred_id : block->predecessors()) {
146 if (!code()->InstructionBlockAt(pred_id)->IsDeferred()) {
147 has_only_deferred = false;
148 break;
149 }
150 }
151 // This would pull down common moves. If the moves occur in deferred
152 // blocks, and the closest common successor is not deferred, we lose the
153 // optimization of just spilling/filling in deferred blocks, when the
154 // current block is not deferred.
155 if (has_only_deferred) continue;
156 }
157 OptimizeMerge(block);
158 }
159 for (Instruction* gap : code()->instructions()) {
160 FinalizeMoves(gap);
161 }
162}
163
165 if (instruction->IsCall()) return;
166 ParallelMove* moves = instruction->parallel_moves()[0];
167 if (moves == nullptr) return;
168
169 DCHECK(instruction->parallel_moves()[1] == nullptr ||
170 instruction->parallel_moves()[1]->empty());
171
172 OperandSet outputs(&operand_buffer1);
173 OperandSet inputs(&operand_buffer2);
174
175 // Outputs and temps are treated together as potentially clobbering a
176 // destination operand.
177 for (size_t i = 0; i < instruction->OutputCount(); ++i) {
178 outputs.InsertOp(*instruction->OutputAt(i));
179 }
180 for (size_t i = 0; i < instruction->TempCount(); ++i) {
181 outputs.InsertOp(*instruction->TempAt(i));
182 }
183
184 // Input operands block elisions.
185 for (size_t i = 0; i < instruction->InputCount(); ++i) {
186 inputs.InsertOp(*instruction->InputAt(i));
187 }
188
189 // Elide moves made redundant by the instruction.
190 for (MoveOperands* move : *moves) {
191 if (outputs.ContainsOpOrAlias(move->destination()) &&
192 !inputs.ContainsOpOrAlias(move->destination())) {
193 move->Eliminate();
194 }
195 }
196
197 // The ret instruction makes any assignment before it unnecessary, except for
198 // the one for its input.
199 if (instruction->IsRet() || instruction->IsTailCall()) {
200 for (MoveOperands* move : *moves) {
201 if (!inputs.ContainsOpOrAlias(move->destination())) {
202 move->Eliminate();
203 }
204 }
205 }
206}
207
209 if (from->IsCall()) return;
210
211 ParallelMove* from_moves = from->parallel_moves()[0];
212 if (from_moves == nullptr || from_moves->empty()) return;
213
214 OperandSet dst_cant_be(&operand_buffer1);
215 OperandSet src_cant_be(&operand_buffer2);
216
217 // If an operand is an input to the instruction, we cannot move assignments
218 // where it appears on the LHS.
219 for (size_t i = 0; i < from->InputCount(); ++i) {
220 dst_cant_be.InsertOp(*from->InputAt(i));
221 }
222 // If an operand is output to the instruction, we cannot move assignments
223 // where it appears on the RHS, because we would lose its value before the
224 // instruction.
225 // Same for temp operands.
226 // The output can't appear on the LHS because we performed
227 // RemoveClobberedDestinations for the "from" instruction.
228 for (size_t i = 0; i < from->OutputCount(); ++i) {
229 src_cant_be.InsertOp(*from->OutputAt(i));
230 }
231 for (size_t i = 0; i < from->TempCount(); ++i) {
232 src_cant_be.InsertOp(*from->TempAt(i));
233 }
234 for (MoveOperands* move : *from_moves) {
235 if (move->IsRedundant()) continue;
236 // Assume dest has a value "V". If we have a "dest = y" move, then we can't
237 // move "z = dest", because z would become y rather than "V".
238 // We assume CompressMoves has happened before this, which means we don't
239 // have more than one assignment to dest.
240 src_cant_be.InsertOp(move->destination());
241 }
242
243 // This set is usually small, e.g., for JetStream2 it has 16 elements or less
244 // in 99.99% of the cases, hence use inline storage and fast linear search.
245 // It is encoded as a `SmallMap` to `Dummy` values, since we don't have an
246 // equivalent `SmallSet` type.
247 struct Dummy {};
249 // We start with all the moves that don't have conflicting source or
250 // destination operands are eligible for being moved down.
251 for (MoveOperands* move : *from_moves) {
252 if (move->IsRedundant()) continue;
253 if (!dst_cant_be.ContainsOpOrAlias(move->destination())) {
254 MoveKey key = {move->source(), move->destination()};
255 move_candidates.emplace(key, Dummy{});
256 }
257 }
258 if (move_candidates.empty()) return;
259
260 // Stabilize the candidate set.
261 bool changed = false;
262 do {
263 changed = false;
264 for (auto iter = move_candidates.begin(); iter != move_candidates.end();) {
265 auto [move, _] = *iter;
266 if (src_cant_be.ContainsOpOrAlias(move.source)) {
267 src_cant_be.InsertOp(move.destination);
268 iter = move_candidates.erase(iter);
269 changed = true;
270 } else {
271 ++iter;
272 }
273 }
274 } while (changed);
275
276 ParallelMove to_move(local_zone());
277 for (MoveOperands* move : *from_moves) {
278 if (move->IsRedundant()) continue;
279 MoveKey key = {move->source(), move->destination()};
280 if (move_candidates.find(key) != move_candidates.end()) {
281 to_move.AddMove(move->source(), move->destination(), code_zone());
282 move->Eliminate();
283 }
284 }
285 if (to_move.empty()) return;
286
287 ParallelMove* dest =
288 to->GetOrCreateParallelMove(Instruction::GapPosition::START, code_zone());
289
290 CompressMoves(&to_move, dest);
291 DCHECK(dest->empty());
292 for (MoveOperands* m : to_move) {
293 dest->push_back(m);
294 }
295}
296
298 if (right == nullptr) return;
299
300 MoveOpVector& eliminated = local_vector();
301 DCHECK(eliminated.empty());
302
303 if (!left->empty()) {
304 // Modify the right moves in place and collect moves that will be killed by
305 // merging the two gaps.
306 for (MoveOperands* move : *right) {
307 if (move->IsRedundant()) continue;
308 left->PrepareInsertAfter(move, &eliminated);
309 }
310 // Eliminate dead moves.
311 for (MoveOperands* to_eliminate : eliminated) {
312 to_eliminate->Eliminate();
313 }
314 eliminated.clear();
315 }
316 // Add all possibly modified moves from right side.
317 for (MoveOperands* move : *right) {
318 if (move->IsRedundant()) continue;
319 left->push_back(move);
320 }
321 // Nuke right.
322 right->clear();
323 DCHECK(eliminated.empty());
324}
325
327 int i = FindFirstNonEmptySlot(instruction);
328 bool has_moves = i <= Instruction::LAST_GAP_POSITION;
329 USE(has_moves);
330
332 std::swap(instruction->parallel_moves()[Instruction::FIRST_GAP_POSITION],
333 instruction->parallel_moves()[Instruction::LAST_GAP_POSITION]);
334 } else if (i == Instruction::FIRST_GAP_POSITION) {
336 instruction->parallel_moves()[Instruction::FIRST_GAP_POSITION],
337 instruction->parallel_moves()[Instruction::LAST_GAP_POSITION]);
338 }
339 // We either have no moves, or, after swapping or compressing, we have
340 // all the moves in the first gap position, and none in the second/end gap
341 // position.
342 ParallelMove* first =
343 instruction->parallel_moves()[Instruction::FIRST_GAP_POSITION];
344 ParallelMove* last =
345 instruction->parallel_moves()[Instruction::LAST_GAP_POSITION];
346 USE(first);
347 USE(last);
348
349 DCHECK(!has_moves ||
350 (first != nullptr && (last == nullptr || last->empty())));
351}
352
354 int first_instr_index = block->first_instruction_index();
355 int last_instr_index = block->last_instruction_index();
356
357 // Start by removing gap assignments where the output of the subsequent
358 // instruction appears on LHS, as long as they are not needed by its input.
359 Instruction* prev_instr = code()->instructions()[first_instr_index];
360 RemoveClobberedDestinations(prev_instr);
361
362 for (int index = first_instr_index + 1; index <= last_instr_index; ++index) {
364 // Migrate to the gap of prev_instr eligible moves from instr.
365 MigrateMoves(instr, prev_instr);
366 // Remove gap assignments clobbered by instr's output.
368 prev_instr = instr;
369 }
370}
371
373 const InstructionBlock* block) const {
374 return code()->instructions()[block->last_instruction_index()];
375}
376
378 DCHECK_LT(1, block->PredecessorCount());
379 // Ensure that the last instruction in all incoming blocks don't contain
380 // things that would prevent moving gap moves across them.
381 for (RpoNumber& pred_index : block->predecessors()) {
382 const InstructionBlock* pred = code()->InstructionBlockAt(pred_index);
383
384 // If the predecessor has more than one successor, we shouldn't attempt to
385 // move down to this block (one of the successors) any of the gap moves,
386 // because their effect may be necessary to the other successors.
387 if (pred->SuccessorCount() > 1) return;
388
389 const Instruction* last_instr =
391 if (last_instr->IsCall()) return;
392 if (last_instr->TempCount() != 0) return;
393 if (last_instr->OutputCount() != 0) return;
394 for (size_t i = 0; i < last_instr->InputCount(); ++i) {
395 const InstructionOperand* op = last_instr->InputAt(i);
396 if (!op->IsConstant() && !op->IsImmediate()) return;
397 }
398 }
399
400 // This map is usually small, e.g., for JetStream2 in 99.5% of the cases it
401 // has 16 elements or less. Hence use a `SmallMap` with inline storage and
402 // fast linear search in the common case.
403 SmallZoneMap<MoveKey, /* count */ size_t, 16> move_map(local_zone());
404 size_t correct_counts = 0;
405 // Accumulate set of shared moves.
406 for (RpoNumber& pred_index : block->predecessors()) {
407 const InstructionBlock* pred = code()->InstructionBlockAt(pred_index);
408 const Instruction* instr = LastInstruction(pred);
409 if (instr->parallel_moves()[0] == nullptr ||
410 instr->parallel_moves()[0]->empty()) {
411 return;
412 }
413 for (const MoveOperands* move : *instr->parallel_moves()[0]) {
414 if (move->IsRedundant()) continue;
415 InstructionOperand src = move->source();
416 InstructionOperand dst = move->destination();
417 MoveKey key = {src, dst};
418 auto [it, inserted] = move_map.emplace(key, 1);
419 if (!inserted) {
420 it->second++;
421 if (it->second == block->PredecessorCount()) {
422 correct_counts++;
423 }
424 }
425 }
426 }
427 if (move_map.empty() || correct_counts == 0) return;
428
429 // Find insertion point.
430 Instruction* instr = code()->instructions()[block->first_instruction_index()];
431
432 if (correct_counts != move_map.size()) {
433 // Moves that are unique to each predecessor won't be pushed to the common
434 // successor.
435 OperandSet conflicting_srcs(&operand_buffer1);
436 for (auto iter = move_map.begin(); iter != move_map.end();) {
437 auto [move, count] = *iter;
438 if (count != block->PredecessorCount()) {
439 // Not all the moves in all the gaps are the same. Maybe some are. If
440 // there are such moves, we could move them, but the destination of the
441 // moves staying behind can't appear as a source of a common move,
442 // because the move staying behind will clobber this destination.
443 conflicting_srcs.InsertOp(move.destination);
444 iter = move_map.erase(iter);
445 } else {
446 ++iter;
447 }
448 }
449
450 bool changed = false;
451 do {
452 // If a common move can't be pushed to the common successor, then its
453 // destination also can't appear as source to any move being pushed.
454 changed = false;
455 for (auto iter = move_map.begin(); iter != move_map.end();) {
456 auto [move, count] = *iter;
457 DCHECK_EQ(block->PredecessorCount(), count);
458 USE(count);
459 if (conflicting_srcs.ContainsOpOrAlias(move.source)) {
460 conflicting_srcs.InsertOp(move.destination);
461 iter = move_map.erase(iter);
462 changed = true;
463 } else {
464 ++iter;
465 }
466 }
467 } while (changed);
468 }
469
470 if (move_map.empty()) return;
471
473 bool gap_initialized = true;
474 if (instr->parallel_moves()[0] != nullptr &&
475 !instr->parallel_moves()[0]->empty()) {
476 // Will compress after insertion.
477 gap_initialized = false;
478 std::swap(instr->parallel_moves()[0], instr->parallel_moves()[1]);
479 }
480 ParallelMove* moves = instr->GetOrCreateParallelMove(
481 static_cast<Instruction::GapPosition>(0), code_zone());
482 // Delete relevant entries in predecessors and move everything to block.
483 bool first_iteration = true;
484 for (RpoNumber& pred_index : block->predecessors()) {
485 const InstructionBlock* pred = code()->InstructionBlockAt(pred_index);
486 for (MoveOperands* move : *LastInstruction(pred)->parallel_moves()[0]) {
487 if (move->IsRedundant()) continue;
488 MoveKey key = {move->source(), move->destination()};
489 auto it = move_map.find(key);
490 if (it != move_map.end()) {
491 if (first_iteration) {
492 moves->AddMove(move->source(), move->destination());
493 }
494 move->Eliminate();
495 }
496 }
497 first_iteration = false;
498 }
499 // Compress.
500 if (!gap_initialized) {
501 CompressMoves(instr->parallel_moves()[0], instr->parallel_moves()[1]);
502 }
503 CompressBlock(block);
504}
505
506namespace {
507
508bool IsSlot(const InstructionOperand& op) {
509 return op.IsStackSlot() || op.IsFPStackSlot();
510}
511
512bool Is64BitsWide(const InstructionOperand& op) {
514#if V8_COMPRESS_POINTERS
515 // We can't use {ElementSizeInBytes} because it's made for on-heap object
516 // slots and assumes that kTagged == kCompressed, whereas for the purpose
517 // here we specifically need to distinguish those cases.
518 return (rep == MachineRepresentation::kTagged ||
521#else
522 return rep == MachineRepresentation::kWord64;
523#endif
524}
525
526bool LoadCompare(const MoveOperands* a, const MoveOperands* b) {
527 if (!a->source().EqualsCanonicalized(b->source())) {
528 return a->source().CompareCanonicalized(b->source());
529 }
530 // The replacements below are only safe if wider values are preferred.
531 // In particular, replacing an uncompressed pointer with a compressed
532 // pointer is disallowed.
533 if (a->destination().IsLocationOperand() &&
534 b->destination().IsLocationOperand()) {
535 if (Is64BitsWide(a->destination()) && !Is64BitsWide(b->destination())) {
536 return true;
537 }
538 if (!Is64BitsWide(a->destination()) && Is64BitsWide(b->destination())) {
539 return false;
540 }
541 }
542 if (IsSlot(a->destination()) && !IsSlot(b->destination())) return false;
543 if (!IsSlot(a->destination()) && IsSlot(b->destination())) return true;
544 return a->destination().CompareCanonicalized(b->destination());
545}
546
547} // namespace
548
549// Split multiple loads of the same constant or stack slot off into the second
550// slot and keep remaining moves in the first slot.
552 MoveOpVector& loads = local_vector();
553 DCHECK(loads.empty());
554
555 ParallelMove* parallel_moves = instr->parallel_moves()[0];
556 if (parallel_moves == nullptr) return;
557 // Find all the loads.
558 for (MoveOperands* move : *parallel_moves) {
559 if (move->IsRedundant()) continue;
560 if (move->source().IsConstant() || IsSlot(move->source())) {
561 loads.push_back(move);
562 }
563 }
564 if (loads.empty()) return;
565 // Group the loads by source, moving the preferred destination to the
566 // beginning of the group.
567 std::sort(loads.begin(), loads.end(), LoadCompare);
568 MoveOperands* group_begin = nullptr;
569 for (MoveOperands* load : loads) {
570 // New group.
571 if (group_begin == nullptr ||
572 !load->source().EqualsCanonicalized(group_begin->source())) {
573 group_begin = load;
574 continue;
575 }
576 // Nothing to be gained from splitting here. However, due to the sorting
577 // scheme, there could be optimizable groups of loads later in the group,
578 // so bump the {group_begin} along.
579 if (IsSlot(group_begin->destination())) {
580 group_begin = load;
581 continue;
582 }
583 // Insert new move into slot 1.
584 ParallelMove* slot_1 = instr->GetOrCreateParallelMove(
585 static_cast<Instruction::GapPosition>(1), code_zone());
586 slot_1->AddMove(group_begin->destination(), load->destination());
587 load->Eliminate();
588 }
589 loads.clear();
590}
591
592} // namespace compiler
593} // namespace internal
594} // namespace v8
iterator begin()
Definition small-map.h:479
std::pair< iterator, bool > emplace(Args &&... args)
Definition small-map.h:422
iterator erase(const iterator &position)
Definition small-map.h:509
size_t size() const
Definition small-map.h:541
bool empty() const
Definition small-map.h:543
iterator find(const key_type &key)
Definition small-map.h:329
static const RegisterConfiguration * Default()
void push_back(const T &value)
InstructionBlock * InstructionBlockAt(RpoNumber rpo_number)
const Instructions & instructions() const
const InstructionOperand * InputAt(size_t i) const
ParallelMove *const * parallel_moves() const
MachineRepresentation representation() const
static LocationOperand * cast(InstructionOperand *op)
const InstructionOperand & source() const
const InstructionOperand & destination() const
void CompressGaps(Instruction *instr)
ZoneVector< InstructionOperand > operand_buffer2
void CompressBlock(InstructionBlock *block)
void MigrateMoves(Instruction *to, Instruction *from)
void FinalizeMoves(Instruction *instr)
void CompressMoves(ParallelMove *left, MoveOpVector *right)
void OptimizeMerge(InstructionBlock *block)
MoveOptimizer(Zone *local_zone, InstructionSequence *code)
ZoneVector< InstructionOperand > operand_buffer1
InstructionSequence * code() const
void RemoveClobberedDestinations(Instruction *instruction)
const Instruction * LastInstruction(const InstructionBlock *block) const
void PrepareInsertAfter(MoveOperands *move, ZoneVector< MoveOperands * > *to_eliminate) const
MoveOperands * AddMove(const InstructionOperand &from, const InstructionOperand &to)
uint32_t count
ZoneList< RegExpInstruction > code_
#define _
Instruction * instr
ZoneVector< InstructionOperand > * set_
int fp_reps_
InstructionOperand destination
int m
Definition mul-fft.cc:294
constexpr bool IsPowerOfTwo(T value)
Definition bits.h:187
bool operator<(const CaseInfoT &l, const CaseInfoT &r)
bool operator==(const BranchParameters &lhs, const BranchParameters &rhs)
constexpr AliasingKind kFPAliasing
V8_EXPORT_PRIVATE constexpr int RepresentationBit(MachineRepresentation rep)
void Dummy(char *arg)
Definition d8.cc:4558
#define DCHECK_NOT_NULL(val)
Definition logging.h:492
#define DCHECK(condition)
Definition logging.h:482
#define DCHECK_LT(v1, v2)
Definition logging.h:489
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
#define USE(...)
Definition macros.h:293