v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
instruction-scheduler.cc
Go to the documentation of this file.
1// Copyright 2015 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
6
7#include <optional>
8
9#include "src/base/iterator.h"
12
13namespace v8 {
14namespace internal {
15namespace compiler {
16
18 ScheduleGraphNode* node) {
19 // We keep the ready list sorted by total latency so that we can quickly find
20 // the next best candidate to schedule.
21 auto it = nodes_.begin();
22 while ((it != nodes_.end()) &&
23 ((*it)->total_latency() >= node->total_latency())) {
24 ++it;
25 }
26 nodes_.insert(it, node);
27}
28
31 DCHECK(!IsEmpty());
32 auto candidate = nodes_.end();
33 for (auto iterator = nodes_.begin(); iterator != nodes_.end(); ++iterator) {
34 // We only consider instructions that have all their operands ready.
35 if (cycle >= (*iterator)->start_cycle()) {
36 candidate = iterator;
37 break;
38 }
39 }
40
41 if (candidate != nodes_.end()) {
42 ScheduleGraphNode* result = *candidate;
43 nodes_.erase(candidate);
44 return result;
45 }
46
47 return nullptr;
48}
49
52 DCHECK(!IsEmpty());
53 // Choose a random element from the ready list.
54 auto candidate = nodes_.begin();
55 std::advance(candidate, random_number_generator()->NextInt(
56 static_cast<int>(nodes_.size())));
57 ScheduleGraphNode* result = *candidate;
58 nodes_.erase(candidate);
59 return result;
60}
61
64 : instr_(instr),
65 successors_(zone),
66 unscheduled_predecessors_count_(0),
68 total_latency_(-1),
69 start_cycle_(-1) {}
70
72 ScheduleGraphNode* node) {
73 successors_.push_back(node);
74 node->unscheduled_predecessors_count_++;
75}
76
78 InstructionSequence* sequence)
79 : zone_(zone),
80 sequence_(sequence),
81 graph_(zone),
85 last_deopt_or_trap_(nullptr),
87 if (v8_flags.turbo_stress_instruction_scheduling) {
88 random_number_generator_ =
89 std::optional<base::RandomNumberGenerator>(v8_flags.random_seed);
90 }
91}
92
102
104 if (v8_flags.turbo_stress_instruction_scheduling) {
106 } else {
108 }
109 sequence()->EndBlock(rpo);
110}
111
114 // Make sure that basic block terminators are not moved by adding them
115 // as successor of every instruction.
116 for (ScheduleGraphNode* node : graph_) {
117 node->AddSuccessor(new_node);
118 }
119 graph_.push_back(new_node);
120}
121
123 if (IsBarrier(instr)) {
124 if (v8_flags.turbo_stress_instruction_scheduling) {
126 } else {
128 }
130 return;
131 }
132
134
135 // We should not have branches in the middle of a block.
136 DCHECK_NE(instr->flags_mode(), kFlags_branch);
137
139 if (last_live_in_reg_marker_ != nullptr) {
141 }
142 last_live_in_reg_marker_ = new_node;
143 } else {
144 if (last_live_in_reg_marker_ != nullptr) {
146 }
147
148 // Make sure that instructions are not scheduled before the last
149 // deoptimization or trap point when they depend on it.
150 if ((last_deopt_or_trap_ != nullptr) && DependsOnDeoptOrTrap(instr)) {
152 }
153
154 // Instructions with side effects and memory operations can't be
155 // reordered with respect to each other.
156 if (HasSideEffect(instr)) {
157 if (last_side_effect_instr_ != nullptr) {
159 }
160 for (ScheduleGraphNode* load : pending_loads_) {
161 load->AddSuccessor(new_node);
162 }
163 pending_loads_.clear();
164 last_side_effect_instr_ = new_node;
165 } else if (IsLoadOperation(instr)) {
166 // Load operations can't be reordered with side effects instructions but
167 // independent loads can be reordered with respect to each other.
168 if (last_side_effect_instr_ != nullptr) {
170 }
171 pending_loads_.push_back(new_node);
172 } else if (instr->IsDeoptimizeCall() || CanTrap(instr)) {
173 // Ensure that deopts or traps are not reordered with respect to
174 // side-effect instructions.
175 if (last_side_effect_instr_ != nullptr) {
177 }
178 }
179
180 // Update last deoptimization or trap point.
181 if (instr->IsDeoptimizeCall() || CanTrap(instr)) {
182 last_deopt_or_trap_ = new_node;
183 }
184
185 // Look for operand dependencies.
186 for (size_t i = 0; i < instr->InputCount(); ++i) {
187 const InstructionOperand* input = instr->InputAt(i);
188 if (input->IsUnallocated()) {
189 int32_t vreg = UnallocatedOperand::cast(input)->virtual_register();
190 auto it = operands_map_.find(vreg);
191 if (it != operands_map_.end()) {
192 it->second->AddSuccessor(new_node);
193 }
194 }
195 }
196
197 // Record the virtual registers defined by this instruction.
198 for (size_t i = 0; i < instr->OutputCount(); ++i) {
199 const InstructionOperand* output = instr->OutputAt(i);
200 if (output->IsUnallocated()) {
201 operands_map_[UnallocatedOperand::cast(output)->virtual_register()] =
202 new_node;
203 } else if (output->IsConstant()) {
204 operands_map_[ConstantOperand::cast(output)->virtual_register()] =
205 new_node;
206 }
207 }
208 }
209
210 graph_.push_back(new_node);
211}
212
213template <typename QueueType>
215 QueueType ready_list(this);
216
217 // Compute total latencies so that we can schedule the critical path first.
219
220 // Add nodes which don't have dependencies to the ready list.
221 for (ScheduleGraphNode* node : graph_) {
222 if (!node->HasUnscheduledPredecessor()) {
223 ready_list.AddNode(node);
224 }
225 }
226
227 // Go through the ready list and schedule the instructions.
228 int cycle = 0;
229 while (!ready_list.IsEmpty()) {
230 ScheduleGraphNode* candidate = ready_list.PopBestCandidate(cycle);
231
232 if (candidate != nullptr) {
233 sequence()->AddInstruction(candidate->instruction());
234
235 for (ScheduleGraphNode* successor : candidate->successors()) {
236 successor->DropUnscheduledPredecessor();
237 successor->set_start_cycle(
238 std::max(successor->start_cycle(), cycle + candidate->latency()));
239
240 if (!successor->HasUnscheduledPredecessor()) {
241 ready_list.AddNode(successor);
242 }
243 }
244 }
245
246 cycle++;
247 }
248
249 // Reset own state.
250 graph_.clear();
251 operands_map_.clear();
252 pending_loads_.clear();
253 last_deopt_or_trap_ = nullptr;
254 last_live_in_reg_marker_ = nullptr;
255 last_side_effect_instr_ = nullptr;
256}
257
259 switch (instr->arch_opcode()) {
260 case kArchNop:
261 case kArchStackCheckOffset:
262 case kArchFramePointer:
263 case kArchParentFramePointer:
264 case kArchStackSlot: // Despite its name this opcode will produce a
265 // reference to a frame slot, so it is not affected
266 // by the arm64 dual stack issues mentioned below.
267 case kArchComment:
268 case kArchDeoptimize:
269 case kArchJmp:
270 case kArchBinarySearchSwitch:
271 case kArchRet:
272 case kArchTableSwitch:
273 case kArchThrowTerminator:
274 return kNoOpcodeFlags;
275
276 case kArchTruncateDoubleToI:
277 case kIeee754Float64Acos:
278 case kIeee754Float64Acosh:
279 case kIeee754Float64Asin:
280 case kIeee754Float64Asinh:
281 case kIeee754Float64Atan:
282 case kIeee754Float64Atanh:
283 case kIeee754Float64Atan2:
284 case kIeee754Float64Cbrt:
285 case kIeee754Float64Cos:
286 case kIeee754Float64Cosh:
287 case kIeee754Float64Exp:
288 case kIeee754Float64Expm1:
289 case kIeee754Float64Log:
290 case kIeee754Float64Log1p:
291 case kIeee754Float64Log10:
292 case kIeee754Float64Log2:
293 case kIeee754Float64Pow:
294 case kIeee754Float64Sin:
295 case kIeee754Float64Sinh:
296 case kIeee754Float64Tan:
297 case kIeee754Float64Tanh:
298 return kNoOpcodeFlags;
299
300 case kArchStackPointerGreaterThan:
301 // The ArchStackPointerGreaterThan instruction loads the current stack
302 // pointer value and must not be reordered with instructions with side
303 // effects.
304 return kIsLoadOperation;
305
306#if V8_ENABLE_WEBASSEMBLY
307 case kArchStackPointer:
308 case kArchSetStackPointer:
309 // Instructions that load or set the stack pointer must not be reordered
310 // with instructions with side effects or with each other.
311 return kHasSideEffect;
312#endif // V8_ENABLE_WEBASSEMBLY
313
314 case kArchPrepareCallCFunction:
315 case kArchPrepareTailCall:
316 case kArchTailCallCodeObject:
317 case kArchTailCallAddress:
318#if V8_ENABLE_WEBASSEMBLY
319 case kArchTailCallWasm:
320 case kArchTailCallWasmIndirect:
321#endif // V8_ENABLE_WEBASSEMBLY
322 case kArchAbortCSADcheck:
323 return kHasSideEffect;
324
325 case kArchDebugBreak:
326 return kIsBarrier;
327
328 case kArchSaveCallerRegisters:
329 case kArchRestoreCallerRegisters:
330 return kIsBarrier;
331
332 case kArchCallCFunction:
333 case kArchCallCFunctionWithFrameState:
334 case kArchCallCodeObject:
335 case kArchCallJSFunction:
336#if V8_ENABLE_WEBASSEMBLY
337 case kArchCallWasmFunction:
338 case kArchCallWasmFunctionIndirect:
339#endif // V8_ENABLE_WEBASSEMBLY
340 case kArchCallBuiltinPointer:
341 // Calls can cause GC and GC may relocate objects. If a pure instruction
342 // operates on a tagged pointer that was cast to a word then it may be
343 // incorrect to move the instruction across the call. Hence we mark all
344 // (non-tail-)calls as barriers.
345 return kIsBarrier;
346
347 case kArchStoreWithWriteBarrier:
348 case kArchAtomicStoreWithWriteBarrier:
349 case kArchStoreIndirectWithWriteBarrier:
350 return kHasSideEffect;
351
352 case kAtomicLoadInt8:
353 case kAtomicLoadUint8:
354 case kAtomicLoadInt16:
355 case kAtomicLoadUint16:
356 case kAtomicLoadWord32:
357 return kIsLoadOperation;
358
359 case kAtomicStoreWord8:
360 case kAtomicStoreWord16:
361 case kAtomicStoreWord32:
362 return kHasSideEffect;
363
364 case kAtomicExchangeInt8:
365 case kAtomicExchangeUint8:
366 case kAtomicExchangeInt16:
367 case kAtomicExchangeUint16:
368 case kAtomicExchangeWord32:
369 case kAtomicCompareExchangeInt8:
370 case kAtomicCompareExchangeUint8:
371 case kAtomicCompareExchangeInt16:
372 case kAtomicCompareExchangeUint16:
373 case kAtomicCompareExchangeWord32:
374 case kAtomicAddInt8:
375 case kAtomicAddUint8:
376 case kAtomicAddInt16:
377 case kAtomicAddUint16:
378 case kAtomicAddWord32:
379 case kAtomicSubInt8:
380 case kAtomicSubUint8:
381 case kAtomicSubInt16:
382 case kAtomicSubUint16:
383 case kAtomicSubWord32:
384 case kAtomicAndInt8:
385 case kAtomicAndUint8:
386 case kAtomicAndInt16:
387 case kAtomicAndUint16:
388 case kAtomicAndWord32:
389 case kAtomicOrInt8:
390 case kAtomicOrUint8:
391 case kAtomicOrInt16:
392 case kAtomicOrUint16:
393 case kAtomicOrWord32:
394 case kAtomicXorInt8:
395 case kAtomicXorUint8:
396 case kAtomicXorInt16:
397 case kAtomicXorUint16:
398 case kAtomicXorWord32:
399 return kHasSideEffect;
400
401#define CASE(Name) case k##Name:
403#undef CASE
405 }
406
407 UNREACHABLE();
408}
409
412 int max_latency = 0;
413
414 for (ScheduleGraphNode* successor : node->successors()) {
415 DCHECK_NE(-1, successor->total_latency());
416 if (successor->total_latency() > max_latency) {
417 max_latency = successor->total_latency();
418 }
419 }
420
421 node->set_total_latency(max_latency + node->latency());
422 }
423}
424
425} // namespace compiler
426} // namespace internal
427} // namespace v8
T * New(Args &&... args)
Definition zone.h:114
V8_EXPORT_PRIVATE InstructionScheduler(Zone *zone, InstructionSequence *sequence)
ZoneVector< ScheduleGraphNode * > pending_loads_
base::RandomNumberGenerator * random_number_generator()
bool IsBarrier(const Instruction *instr) const
V8_EXPORT_PRIVATE void StartBlock(RpoNumber rpo)
bool HasSideEffect(const Instruction *instr) const
bool DependsOnDeoptOrTrap(const Instruction *instr) const
V8_EXPORT_PRIVATE void AddTerminator(Instruction *instr)
bool IsLoadOperation(const Instruction *instr) const
ZoneMap< int32_t, ScheduleGraphNode * > operands_map_
bool CanTrap(const Instruction *instr) const
V8_EXPORT_PRIVATE int GetInstructionFlags(const Instruction *instr) const
V8_EXPORT_PRIVATE void AddInstruction(Instruction *instr)
int GetTargetInstructionFlags(const Instruction *instr) const
static int GetInstructionLatency(const Instruction *instr)
bool IsFixedRegisterParameter(const Instruction *instr) const
V8_EXPORT_PRIVATE void EndBlock(RpoNumber rpo)
ZoneLinkedList< BFEntry > nodes_
#define TARGET_ARCH_OPCODE_LIST(V)
Instruction * instr
ZoneVector< RpoNumber > & result
auto Reversed(T &t)
Definition iterator.h:105
V8_EXPORT_PRIVATE FlagValues v8_flags
#define DCHECK_NULL(val)
Definition logging.h:491
#define DCHECK_NE(v1, v2)
Definition logging.h:486
#define DCHECK(condition)
Definition logging.h:482