v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
js-inlining-heuristic.cc
Go to the documentation of this file.
1// Copyright 2015 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
6
13
14namespace v8 {
15namespace internal {
16namespace compiler {
17
18#define TRACE(...) \
19 do { \
20 if (v8_flags.trace_turbo_inlining) \
21 StdoutStream{} << __VA_ARGS__ << std::endl; \
22 } while (false)
23
24namespace {
25bool IsSmall(int const size) {
26 return size <= v8_flags.max_inlined_bytecode_size_small;
27}
28
29bool CanConsiderForInlining(JSHeapBroker* broker,
30 FeedbackCellRef feedback_cell) {
31 OptionalFeedbackVectorRef feedback_vector =
32 feedback_cell.feedback_vector(broker);
33 if (!feedback_vector.has_value()) {
34 TRACE("Cannot consider " << feedback_cell
35 << " for inlining (no feedback vector)");
36 return false;
37 }
38 SharedFunctionInfoRef shared = feedback_vector->shared_function_info(broker);
39
40 if (!shared.HasBytecodeArray()) {
41 TRACE("Cannot consider " << shared << " for inlining (no bytecode)");
42 return false;
43 }
44 // Ensure we have a persistent handle to the bytecode in order to avoid
45 // flushing it during the remaining compilation.
46 shared.GetBytecodeArray(broker);
47
48 // Read feedback vector again in case it got flushed before we were able to
49 // prevent flushing above.
50 OptionalFeedbackVectorRef feedback_vector_again =
51 feedback_cell.feedback_vector(broker);
52 if (!feedback_vector_again.has_value()) {
53 TRACE("Cannot consider " << shared << " for inlining (no feedback vector)");
54 return false;
55 }
56 if (!feedback_vector_again->equals(*feedback_vector)) {
57 // The new feedback vector likely contains lots of uninitialized slots, so
58 // it doesn't make much sense to inline this function now.
59 TRACE("Not considering " << shared
60 << " for inlining (feedback vector changed)");
61 return false;
62 }
63
65 shared.GetInlineability(broker);
66 if (inlineability != SharedFunctionInfo::kIsInlineable) {
67 TRACE("Cannot consider "
68 << shared << " for inlining (reason: " << inlineability << ")");
69 return false;
70 }
71
72 TRACE("Considering " << shared << " for inlining with " << *feedback_vector);
73 return true;
74}
75
76bool CanConsiderForInlining(JSHeapBroker* broker, JSFunctionRef function) {
77 FeedbackCellRef feedback_cell = function.raw_feedback_cell(broker);
78 bool const result = CanConsiderForInlining(broker, feedback_cell);
79 if (result) {
80 CHECK(function.shared(broker).equals(
81 feedback_cell.shared_function_info(broker).value()));
82 }
83 return result;
84}
85
86} // namespace
87
89 Node* node, int functions_size) {
90 DCHECK_NE(0, functions_size);
91 Node* callee = node->InputAt(0);
92 Candidate out;
93 out.node = node;
94
95 HeapObjectMatcher m(callee);
96 if (m.HasResolvedValue() && m.Ref(broker()).IsJSFunction()) {
97 JSFunctionRef function = m.Ref(broker()).AsJSFunction();
98 out.functions[0] = function;
99 if (CanConsiderForInlining(broker(), function)) {
100 out.bytecode[0] = function.shared(broker()).GetBytecodeArray(broker());
101 out.num_functions = 1;
102 return out;
103 }
104 }
105 if (m.IsPhi()) {
106 int const value_input_count = m.node()->op()->ValueInputCount();
107 if (value_input_count > functions_size) {
108 out.num_functions = 0;
109 return out;
110 }
111 for (int n = 0; n < value_input_count; ++n) {
112 HeapObjectMatcher m2(callee->InputAt(n));
113 if (!m2.HasResolvedValue() || !m2.Ref(broker()).IsJSFunction()) {
114 out.num_functions = 0;
115 return out;
116 }
117
118 out.functions[n] = m2.Ref(broker()).AsJSFunction();
119 JSFunctionRef function = out.functions[n].value();
120 if (CanConsiderForInlining(broker(), function)) {
121 out.bytecode[n] = function.shared(broker()).GetBytecodeArray(broker());
122 }
123 }
124 out.num_functions = value_input_count;
125 return out;
126 }
127 if (m.IsCheckClosure()) {
128 DCHECK(!out.functions[0].has_value());
129 FeedbackCellRef feedback_cell = MakeRef(broker(), FeedbackCellOf(m.op()));
130 if (CanConsiderForInlining(broker(), feedback_cell)) {
131 out.shared_info = feedback_cell.shared_function_info(broker()).value();
132 out.bytecode[0] = out.shared_info->GetBytecodeArray(broker());
133 }
134 out.num_functions = 1;
135 return out;
136 }
137 if (m.IsJSCreateClosure()) {
138 DCHECK(!out.functions[0].has_value());
139 JSCreateClosureNode n(callee);
140 FeedbackCellRef feedback_cell = n.GetFeedbackCellRefChecked(broker());
141 if (CanConsiderForInlining(broker(), feedback_cell)) {
142 out.shared_info = feedback_cell.shared_function_info(broker()).value();
143 out.bytecode[0] = out.shared_info->GetBytecodeArray(broker());
144 CHECK(out.shared_info->equals(n.Parameters().shared_info()));
145 }
146 out.num_functions = 1;
147 return out;
148 }
149 out.num_functions = 0;
150 return out;
151}
152
154#if V8_ENABLE_WEBASSEMBLY
155 if (mode() == kWasmWrappersOnly || mode() == kWasmFullInlining) {
156 if (node->opcode() == IrOpcode::kJSWasmCall) {
157 return inliner_.ReduceJSWasmCall(node);
158 }
159 return NoChange();
160 }
161#endif // V8_ENABLE_WEBASSEMBLY
162
164 if (!IrOpcode::IsInlineeOpcode(node->opcode())) return NoChange();
165
167 return NoChange();
168 }
169
170 // Check if we already saw that {node} before, and if so, just skip it.
171 if (seen_.find(node->id()) != seen_.end()) return NoChange();
172
173 // Check if the {node} is an appropriate candidate for inlining.
175 if (candidate.num_functions == 0) {
176 return NoChange();
177 } else if (candidate.num_functions > 1 && !v8_flags.polymorphic_inlining) {
178 TRACE("Not considering call site #"
179 << node->id() << ":" << node->op()->mnemonic()
180 << ", because polymorphic inlining is disabled");
181 return NoChange();
182 }
183
184 bool can_inline_candidate = false, candidate_is_small = true;
185 candidate.total_size = 0;
187 FrameStateInfo const& frame_info = frame_state.frame_state_info();
188 Handle<SharedFunctionInfo> frame_shared_info;
189 for (int i = 0; i < candidate.num_functions; ++i) {
190 if (!candidate.bytecode[i].has_value()) {
191 candidate.can_inline_function[i] = false;
192 continue;
193 }
194
195 SharedFunctionInfoRef shared =
196 candidate.functions[i].has_value()
197 ? candidate.functions[i].value().shared(broker())
198 : candidate.shared_info.value();
199 candidate.can_inline_function[i] = candidate.bytecode[i].has_value();
200 // Because of concurrent optimization, optimization of the inlining
201 // candidate could have been disabled meanwhile.
202 // JSInliner will check this again and not actually inline the function in
203 // this case.
205 shared.IsInlineable(broker()) ||
206 shared.GetInlineability(broker()) ==
208 // Do not allow direct recursion i.e. f() -> f(). We still allow indirect
209 // recursion like f() -> g() -> f(). The indirect recursion is helpful in
210 // cases where f() is a small dispatch function that calls the appropriate
211 // function. In the case of direct recursion, we only have some static
212 // information for the first level of inlining and it may not be that useful
213 // to just inline one level in recursive calls. In some cases like tail
214 // recursion we may benefit from recursive inlining, if we have additional
215 // analysis that converts them to iterative implementations. Though it is
216 // not obvious if such an analysis is needed.
217 if (frame_info.shared_info().ToHandle(&frame_shared_info) &&
218 frame_shared_info.equals(shared.object())) {
219 TRACE("Not considering call site #" << node->id() << ":"
220 << node->op()->mnemonic()
221 << ", because of recursive inlining");
222 candidate.can_inline_function[i] = false;
223 }
224 if (candidate.can_inline_function[i]) {
225 can_inline_candidate = true;
226 BytecodeArrayRef bytecode = candidate.bytecode[i].value();
227 candidate.total_size += bytecode.length();
228 unsigned inlined_bytecode_size = 0;
229 if (OptionalJSFunctionRef function = candidate.functions[i]) {
230 if (OptionalCodeRef code = function->code(broker())) {
231 inlined_bytecode_size = code->GetInlinedBytecodeSize();
232 candidate.total_size += inlined_bytecode_size;
233 }
234 }
235 candidate_is_small = candidate_is_small &&
236 IsSmall(bytecode.length() + inlined_bytecode_size);
237 }
238 }
239 if (!can_inline_candidate) return NoChange();
240
241 // Gather feedback on how often this call site has been hit before.
242 if (node->opcode() == IrOpcode::kJSCall) {
243 CallParameters const p = CallParametersOf(node->op());
244 candidate.frequency = p.frequency();
245 } else {
246 ConstructParameters const p = ConstructParametersOf(node->op());
247 candidate.frequency = p.frequency();
248 }
249
250 // Don't consider a {candidate} whose frequency is below the
251 // threshold, i.e. a call site that is only hit once every N
252 // invocations of the caller.
253 if (candidate.frequency.IsKnown() &&
254 candidate.frequency.value() < v8_flags.min_inlining_frequency) {
255 return NoChange();
256 }
257
258 // Found a candidate. Insert it into the set of seen nodes s.t. we don't
259 // revisit in the future. Note this insertion happens here and not earlier in
260 // order to make inlining decisions order-independent. A node may not be a
261 // candidate when first seen, but later reductions may turn it into a valid
262 // candidate. In that case, the node should be revisited by
263 // JSInliningHeuristic.
264 seen_.insert(node->id());
265
266 // Forcibly inline small functions here. In the case of polymorphic inlining
267 // candidate_is_small is set only when all functions are small.
268 if (candidate_is_small) {
269 TRACE("Inlining small function(s) at call site #"
270 << node->id() << ":" << node->op()->mnemonic());
271 return InlineCandidate(candidate, true);
272 }
273
274 // In the general case we remember the candidate for later.
275 candidates_.insert(candidate);
276 return NoChange();
277}
278
280 if (candidates_.empty()) return; // Nothing to do without candidates.
281 if (v8_flags.trace_turbo_inlining) PrintCandidates();
282
283 // We inline at most one candidate in every iteration of the fixpoint.
284 // This is to ensure that we don't consume the full inlining budget
285 // on things that aren't called very often.
286 // TODO(bmeurer): Use std::priority_queue instead of std::set here.
287 while (!candidates_.empty()) {
288 auto i = candidates_.begin();
289 Candidate candidate = *i;
290 candidates_.erase(i);
291
292 // Ignore this candidate if it's no longer valid.
293 if (!IrOpcode::IsInlineeOpcode(candidate.node->opcode())) continue;
294 if (candidate.node->IsDead()) continue;
295
296 // Make sure we have some extra budget left, so that any small functions
297 // exposed by this function would be given a chance to inline.
298 double size_of_candidate =
299 candidate.total_size * v8_flags.reserve_inline_budget_scale_factor;
300 int total_size =
301 total_inlined_bytecode_size_ + static_cast<int>(size_of_candidate);
302 if (total_size > max_inlined_bytecode_size_cumulative_) {
303 info_->set_could_not_inline_all_candidates();
304 // Try if any smaller functions are available to inline.
305 continue;
306 }
307
308 Reduction const reduction = InlineCandidate(candidate, false);
309 if (reduction.Changed()) return;
310 }
311}
312
313namespace {
314
315struct NodeAndIndex {
316 Node* node;
317 int index;
318};
319
320bool CollectStateValuesOwnedUses(Node* node, Node* state_values,
321 NodeAndIndex* uses_buffer, size_t* use_count,
322 size_t max_uses) {
323 // Only accumulate states that are not shared with other users.
324 if (state_values->UseCount() > 1) return true;
325 for (int i = 0; i < state_values->InputCount(); i++) {
326 Node* input = state_values->InputAt(i);
327 if (input->opcode() == IrOpcode::kStateValues) {
328 if (!CollectStateValuesOwnedUses(node, input, uses_buffer, use_count,
329 max_uses)) {
330 return false;
331 }
332 } else if (input == node) {
333 if (*use_count >= max_uses) return false;
334 uses_buffer[*use_count] = {state_values, i};
335 (*use_count)++;
336 }
337 }
338 return true;
339}
340
341} // namespace
342
344 Node* from, Node* to,
345 StateCloneMode mode) {
346 // Only rename in states that are not shared with other users. This needs to
347 // be in sync with the condition in {CollectStateValuesOwnedUses}.
348 if (state_values->UseCount() > 1) return state_values;
349 Node* copy = mode == kChangeInPlace ? state_values : nullptr;
350 for (int i = 0; i < state_values->InputCount(); i++) {
351 Node* input = state_values->InputAt(i);
352 Node* processed;
353 if (input->opcode() == IrOpcode::kStateValues) {
354 processed = DuplicateStateValuesAndRename(input, from, to, mode);
355 } else if (input == from) {
356 processed = to;
357 } else {
358 processed = input;
359 }
360 if (processed != input) {
361 if (!copy) {
362 copy = graph()->CloneNode(state_values);
363 }
364 copy->ReplaceInput(i, processed);
365 }
366 }
367 return copy ? copy : state_values;
368}
369
370namespace {
371
372bool CollectFrameStateUniqueUses(Node* node, FrameState frame_state,
373 NodeAndIndex* uses_buffer, size_t* use_count,
374 size_t max_uses) {
375 // Only accumulate states that are not shared with other users.
376 if (frame_state->UseCount() > 1) return true;
377 if (frame_state.stack() == node) {
378 if (*use_count >= max_uses) return false;
379 uses_buffer[*use_count] = {frame_state, FrameState::kFrameStateStackInput};
380 (*use_count)++;
381 }
382 if (!CollectStateValuesOwnedUses(node, frame_state.locals(), uses_buffer,
383 use_count, max_uses)) {
384 return false;
385 }
386 return true;
387}
388
389} // namespace
390
392 FrameState frame_state, Node* from, Node* to, StateCloneMode mode) {
393 // Only rename in states that are not shared with other users. This needs to
394 // be in sync with the condition in {DuplicateFrameStateAndRename}.
395 if (frame_state->UseCount() > 1) return frame_state;
396 Node* copy =
397 mode == kChangeInPlace ? static_cast<Node*>(frame_state) : nullptr;
398 if (frame_state.stack() == from) {
399 if (!copy) {
400 copy = graph()->CloneNode(frame_state);
401 }
403 }
404 Node* locals = frame_state.locals();
405 Node* new_locals = DuplicateStateValuesAndRename(locals, from, to, mode);
406 if (new_locals != locals) {
407 if (!copy) {
408 copy = graph()->CloneNode(frame_state);
409 }
411 }
412 return copy != nullptr ? FrameState{copy} : frame_state;
413}
414
416 Node** if_successes, Node** calls,
417 Node** inputs, int input_count,
418 int* num_calls) {
419 // We will try to reuse the control flow branch created for computing
420 // the {callee} target of the call. We only reuse the branch if there
421 // is no side-effect between the call and the branch, and if the callee is
422 // only used as the target (and possibly also in the related frame states).
423
424 // We are trying to match the following pattern:
425 //
426 // C1 C2
427 // . .
428 // | |
429 // Merge(merge) <-----------------+
430 // ^ ^ |
431 // V1 V2 | | E1 E2 |
432 // . . | +----+ . . |
433 // | | | | | | |
434 // Phi(callee) EffectPhi(effect_phi) |
435 // ^ ^ |
436 // | | |
437 // +----+ | |
438 // | | | |
439 // | StateValues | |
440 // | ^ | |
441 // +----+ | | |
442 // | | | | |
443 // | FrameState | |
444 // | ^ | |
445 // | | | +---+
446 // | | | | |
447 // +----+ Checkpoint(checkpoint) |
448 // | | ^ |
449 // | StateValues | +-------------+
450 // | | | |
451 // +-----+ | | |
452 // | | | | |
453 // | FrameState | |
454 // | ^ | |
455 // +-----------+ | | |
456 // Call(node)
457 // |
458 // |
459 // .
460 //
461 // The {callee} here is a phi that merges the possible call targets, {node}
462 // is the actual call that we will try to duplicate and connect to the
463 // control that comes into {merge}. There can be a {checkpoint} between
464 // the call and the calle phi.
465 //
466 // The idea is to get rid of the merge, effect phi and phi, then duplicate
467 // the call (with all the frame states and such), and connect the duplicated
468 // calls and states directly to the inputs of the ex-phi, ex-effect-phi and
469 // ex-merge. The tricky part is to make sure that there is no interference
470 // from the outside. In particular, there should not be any unaccounted uses
471 // of the phi, effect-phi and merge because we will remove them from
472 // the graph.
473 //
474 // V1 E1 C1 V2 E2 C2
475 // . . . . . .
476 // | | | | | |
477 // +----+ | | +----+ |
478 // | | | | | | |
479 // | StateValues | | | StateValues |
480 // | ^ | | | ^ |
481 // +----+ | | | +----+ | |
482 // | | | | | | | | |
483 // | FrameState | | | FrameState |
484 // | ^ | | | ^ |
485 // | | | | | | |
486 // | | | | | | |
487 // +----+ Checkpoint | +----+ Checkpoint |
488 // | | ^ | | | ^ |
489 // | StateValues | | | StateValues | |
490 // | | | | | | | |
491 // +-----+ | | | +-----+ | | |
492 // | | | | | | | | | |
493 // | FrameState | | | FrameState | |
494 // | ^ | | | ^ | |
495 // +-------------+| | | +-------------+| | |
496 // Call----+ Call----+
497 // | |
498 // +-------+ +------------+
499 // | |
500 // Merge
501 // EffectPhi
502 // Phi
503 // |
504 // ...
505
506 // Bailout if the call is not polymorphic anymore (other reducers might
507 // have replaced the callee phi with a constant).
508 if (callee->opcode() != IrOpcode::kPhi) return false;
509
510 // If there is a control node between the callee computation
511 // and the call, bail out.
512 Node* merge = NodeProperties::GetControlInput(callee);
513 if (NodeProperties::GetControlInput(node) != merge) return false;
514
515 // If there is a non-checkpoint effect node between the callee computation
516 // and the call, bail out. We will drop any checkpoint between the call and
517 // the callee phi because the callee computation should have its own
518 // checkpoint that the call can fall back to.
519 Node* checkpoint = nullptr;
520 Node* effect = NodeProperties::GetEffectInput(node);
521 if (effect->opcode() == IrOpcode::kCheckpoint) {
522 checkpoint = effect;
523 if (NodeProperties::GetControlInput(checkpoint) != merge) return false;
524 effect = NodeProperties::GetEffectInput(effect);
525 }
526 if (effect->opcode() != IrOpcode::kEffectPhi) return false;
527 if (NodeProperties::GetControlInput(effect) != merge) return false;
528 Node* effect_phi = effect;
529
530 // The effect phi, the callee, the call and the checkpoint must be the only
531 // users of the merge.
532 for (Node* merge_use : merge->uses()) {
533 if (merge_use != effect_phi && merge_use != callee && merge_use != node &&
534 merge_use != checkpoint) {
535 return false;
536 }
537 }
538
539 // The effect phi must be only used by the checkpoint or the call.
540 for (Node* effect_phi_use : effect_phi->uses()) {
541 if (effect_phi_use != node && effect_phi_use != checkpoint) return false;
542 }
543
544 // We must replace the callee phi with the appropriate constant in
545 // the entire subgraph reachable by inputs from the call (terminating
546 // at phis and merges). Since we do not want to walk (and later duplicate)
547 // the subgraph here, we limit the possible uses to this set:
548 //
549 // 1. In the call (as a target).
550 // 2. The checkpoint between the call and the callee computation merge.
551 // 3. The lazy deoptimization frame state.
552 //
553 // This corresponds to the most common pattern, where the function is
554 // called with only local variables or constants as arguments.
555 //
556 // To check the uses, we first collect all the occurrences of callee in 1, 2
557 // and 3, and then we check that all uses of callee are in the collected
558 // occurrences. If there is an unaccounted use, we do not try to rewire
559 // the control flow.
560 //
561 // Note: With CFG, this would be much easier and more robust - we would just
562 // duplicate all the nodes between the merge and the call, replacing all
563 // occurrences of the {callee} phi with the appropriate constant.
564
565 // First compute the set of uses that are only reachable from 2 and 3.
566 const size_t kMaxUses = 8;
567 NodeAndIndex replaceable_uses[kMaxUses];
568 size_t replaceable_uses_count = 0;
569
570 // Collect the uses to check case 2.
571 Node* checkpoint_state = nullptr;
572 if (checkpoint) {
573 checkpoint_state = checkpoint->InputAt(0);
574 if (!CollectFrameStateUniqueUses(callee, FrameState{checkpoint_state},
575 replaceable_uses, &replaceable_uses_count,
576 kMaxUses)) {
577 return false;
578 }
579 }
580
581 // Collect the uses to check case 3.
583 if (!CollectFrameStateUniqueUses(callee, frame_state, replaceable_uses,
584 &replaceable_uses_count, kMaxUses)) {
585 return false;
586 }
587
588 // Bail out if there is a use of {callee} that is not reachable from 1, 2
589 // and 3.
590 for (Edge edge : callee->use_edges()) {
591 // Case 1 (use by the call as a target).
592 if (edge.from() == node && edge.index() == 0) continue;
593 // Case 2 and 3 - used in checkpoint and/or lazy deopt frame states.
594 bool found = false;
595 for (size_t i = 0; i < replaceable_uses_count; i++) {
596 if (replaceable_uses[i].node == edge.from() &&
597 replaceable_uses[i].index == edge.index()) {
598 found = true;
599 break;
600 }
601 }
602 if (!found) return false;
603 }
604
605 *num_calls = callee->op()->ValueInputCount();
606
607 // Clone the call and the framestate, including the uniquely reachable
608 // state values, making sure that we replace the phi with the constant.
609 for (int i = 0; i < *num_calls; ++i) {
610 // Clone the calls for each branch.
611 // We need to specialize the calls to the correct target, effect, and
612 // control. We also need to duplicate the checkpoint and the lazy
613 // frame state, and change all the uses of the callee to the constant
614 // callee.
615 Node* target = callee->InputAt(i);
616 Node* effect_phi_effect = effect_phi->InputAt(i);
617 Node* control = merge->InputAt(i);
618
619 if (checkpoint) {
620 // Duplicate the checkpoint.
621 FrameState new_checkpoint_state = DuplicateFrameStateAndRename(
622 FrameState{checkpoint_state}, callee, target,
623 (i == *num_calls - 1) ? kChangeInPlace : kCloneState);
624 effect_phi_effect = graph()->NewNode(
625 checkpoint->op(), new_checkpoint_state, effect_phi_effect, control);
626 }
627
628 // Duplicate the call.
629 FrameState new_lazy_frame_state = DuplicateFrameStateAndRename(
630 frame_state, callee, target,
631 (i == *num_calls - 1) ? kChangeInPlace : kCloneState);
632 inputs[0] = target;
633 inputs[input_count - 3] = new_lazy_frame_state;
634 inputs[input_count - 2] = effect_phi_effect;
635 inputs[input_count - 1] = control;
636 calls[i] = if_successes[i] =
637 graph()->NewNode(node->op(), input_count, inputs);
638 }
639
640 // Mark the control inputs dead, so that we can kill the merge.
641 node->ReplaceInput(input_count - 1, jsgraph()->Dead());
642 callee->ReplaceInput(*num_calls, jsgraph()->Dead());
643 effect_phi->ReplaceInput(*num_calls, jsgraph()->Dead());
644 if (checkpoint) {
645 checkpoint->ReplaceInput(2, jsgraph()->Dead());
646 }
647
648 merge->Kill();
649 return true;
650}
651
653 Node* node, Node* callee, Candidate const& candidate, Node** if_successes,
654 Node** calls, Node** inputs, int input_count, int* num_calls) {
657 if (TryReuseDispatch(node, callee, if_successes, calls, inputs, input_count,
658 num_calls)) {
659 return;
660 }
661
663
664 Node* fallthrough_control = NodeProperties::GetControlInput(node);
665 *num_calls = candidate.num_functions;
666
667 // Create the appropriate control flow to dispatch to the cloned calls.
668 for (int i = 0; i < *num_calls; ++i) {
669 // TODO(2206): Make comparison be based on underlying SharedFunctionInfo
670 // instead of the target JSFunction reference directly.
671 Node* target =
672 jsgraph()->ConstantNoHole(candidate.functions[i].value(), broker());
673 if (i != (*num_calls - 1)) {
674 Node* check =
675 graph()->NewNode(simplified()->ReferenceEqual(), callee, target);
676 Node* branch =
677 graph()->NewNode(common()->Branch(), check, fallthrough_control);
678 fallthrough_control = graph()->NewNode(common()->IfFalse(), branch);
679 if_successes[i] = graph()->NewNode(common()->IfTrue(), branch);
680 } else {
681 if_successes[i] = fallthrough_control;
682 }
683
684 // Clone the calls for each branch.
685 // The first input to the call is the actual target (which we specialize
686 // to the known {target}); the last input is the control dependency.
687 // We also specialize the new.target of JSConstruct {node}s if it refers
688 // to the same node as the {node}'s target input, so that we can later
689 // properly inline the JSCreate operations.
690 if (node->opcode() == IrOpcode::kJSConstruct) {
691 // TODO(jgruber, v8:10675): This branch seems unreachable.
692 JSConstructNode n(node);
693 if (inputs[n.TargetIndex()] == inputs[n.NewTargetIndex()]) {
694 inputs[n.NewTargetIndex()] = target;
695 }
696 }
697 inputs[JSCallOrConstructNode::TargetIndex()] = target;
698 inputs[input_count - 1] = if_successes[i];
699 calls[i] = if_successes[i] =
700 graph()->NewNode(node->op(), input_count, inputs);
701 }
702}
703
705 bool small_function) {
706 int num_calls = candidate.num_functions;
707 Node* const node = candidate.node;
708#if V8_ENABLE_WEBASSEMBLY
709 DCHECK_NE(node->opcode(), IrOpcode::kJSWasmCall);
710#endif // V8_ENABLE_WEBASSEMBLY
711 if (num_calls == 1) {
712 Reduction const reduction = inliner_.ReduceJSCall(node);
713 if (reduction.Changed()) {
714 total_inlined_bytecode_size_ += candidate.bytecode[0].value().length();
715 }
716 return reduction;
717 }
718
719 // Expand the JSCall/JSConstruct node to a subgraph first if
720 // we have multiple known target functions.
721 DCHECK_LT(1, num_calls);
722 Node* calls[kMaxCallPolymorphism + 1];
723 Node* if_successes[kMaxCallPolymorphism];
724 Node* callee = NodeProperties::GetValueInput(node, 0);
725
726 // Setup the inputs for the cloned call nodes.
727 int const input_count = node->InputCount();
728 Node** inputs = graph()->zone()->AllocateArray<Node*>(input_count);
729 for (int i = 0; i < input_count; ++i) {
730 inputs[i] = node->InputAt(i);
731 }
732
733 // Create the appropriate control flow to dispatch to the cloned calls.
734 CreateOrReuseDispatch(node, callee, candidate, if_successes, calls, inputs,
735 input_count, &num_calls);
736
737 // Check if we have an exception projection for the call {node}.
738 Node* if_exception = nullptr;
739 if (NodeProperties::IsExceptionalCall(node, &if_exception)) {
740 Node* if_exceptions[kMaxCallPolymorphism + 1];
741 for (int i = 0; i < num_calls; ++i) {
742 if_successes[i] = graph()->NewNode(common()->IfSuccess(), calls[i]);
743 if_exceptions[i] =
744 graph()->NewNode(common()->IfException(), calls[i], calls[i]);
745 }
746
747 // Morph the {if_exception} projection into a join.
748 Node* exception_control =
749 graph()->NewNode(common()->Merge(num_calls), num_calls, if_exceptions);
750 if_exceptions[num_calls] = exception_control;
751 Node* exception_effect = graph()->NewNode(common()->EffectPhi(num_calls),
752 num_calls + 1, if_exceptions);
753 Node* exception_value = graph()->NewNode(
754 common()->Phi(MachineRepresentation::kTagged, num_calls), num_calls + 1,
755 if_exceptions);
756 ReplaceWithValue(if_exception, exception_value, exception_effect,
757 exception_control);
758 }
759
760 // Morph the original call site into a join of the dispatched call sites.
761 Node* control =
762 graph()->NewNode(common()->Merge(num_calls), num_calls, if_successes);
763 calls[num_calls] = control;
764 Node* effect =
765 graph()->NewNode(common()->EffectPhi(num_calls), num_calls + 1, calls);
766 Node* value =
768 num_calls + 1, calls);
769 ReplaceWithValue(node, value, effect, control);
770
771 // Inline the individual, cloned call sites.
772 for (int i = 0; i < num_calls && total_inlined_bytecode_size_ <
774 ++i) {
775 if (candidate.can_inline_function[i] &&
776 (small_function || total_inlined_bytecode_size_ <
778 Node* call = calls[i];
779 Reduction const reduction = inliner_.ReduceJSCall(call);
780 if (reduction.Changed()) {
781 total_inlined_bytecode_size_ += candidate.bytecode[i]->length();
782 // Killing the call node is not strictly necessary, but it is safer to
783 // make sure we do not resurrect the node.
784 call->Kill();
785 }
786 }
787 }
788
789 return Replace(value);
790}
791
793 const Candidate& left, const Candidate& right) const {
794 constexpr bool kInlineLeftFirst = true, kInlineRightFirst = false;
795 if (right.frequency.IsUnknown()) {
796 if (left.frequency.IsUnknown()) {
797 // If left and right are both unknown then the ordering is indeterminate,
798 // which breaks strict weak ordering requirements, so we fall back to the
799 // node id as a tie breaker.
800 if (left.total_size < right.total_size) {
801 return kInlineLeftFirst;
802 } else if (left.total_size > right.total_size) {
803 return kInlineRightFirst;
804 } else {
805 return left.node->id() > right.node->id();
806 }
807 } else {
808 return kInlineLeftFirst;
809 }
810 } else if (left.frequency.IsUnknown()) {
811 return kInlineRightFirst;
812 }
813
814 float left_score = left.frequency.value() / left.total_size;
815 float right_score = right.frequency.value() / right.total_size;
816
817 if (left_score > right_score) {
818 return kInlineLeftFirst;
819 } else if (left_score < right_score) {
820 return kInlineRightFirst;
821 } else {
822 return left.node->id() > right.node->id();
823 }
824}
825
827 StdoutStream os;
828 os << candidates_.size() << " candidate(s) for inlining:" << std::endl;
829 for (const Candidate& candidate : candidates_) {
830 os << "- candidate: " << candidate.node->op()->mnemonic() << " node #"
831 << candidate.node->id() << " with frequency " << candidate.frequency
832 << ", " << candidate.num_functions << " target(s):" << std::endl;
833 for (int i = 0; i < candidate.num_functions; ++i) {
834 SharedFunctionInfoRef shared =
835 candidate.functions[i].has_value()
836 ? candidate.functions[i]->shared(broker())
837 : candidate.shared_info.value();
838 os << " - target: " << shared;
839 if (candidate.bytecode[i].has_value()) {
840 os << ", bytecode size: " << candidate.bytecode[i]->length();
841 if (OptionalJSFunctionRef function = candidate.functions[i]) {
842 if (OptionalCodeRef code = function->code(broker())) {
843 unsigned inlined_bytecode_size = code->GetInlinedBytecodeSize();
844 if (inlined_bytecode_size > 0) {
845 os << ", existing opt code's inlined bytecode size: "
846 << inlined_bytecode_size;
847 }
848 }
849 }
850 } else {
851 os << ", no bytecode";
852 }
853 os << std::endl;
854 }
855 }
856}
857
859
863
867
871
872#undef TRACE
873
874} // namespace compiler
875} // namespace internal
876} // namespace v8
bool equals(Handle< T > other) const
Definition handles.h:209
T * AllocateArray(size_t length)
Definition zone.h:127
void ReplaceWithValue(Node *node, Node *value, Node *effect=nullptr, Node *control=nullptr)
static Reduction Replace(Node *node)
CallFrequency const & frequency() const
CallFrequency const & frequency() const
OptionalSharedFunctionInfoRef shared_function_info(JSHeapBroker *broker) const
MaybeIndirectHandle< SharedFunctionInfo > shared_info() const
static constexpr int kFrameStateLocalsInput
static constexpr int kFrameStateStackInput
static bool IsInlineeOpcode(Value value)
Definition opcodes.h:1421
SimplifiedOperatorBuilder * simplified() const
Definition js-graph.h:105
Node * ConstantNoHole(ObjectRef ref, JSHeapBroker *broker)
Definition js-graph.cc:51
CompilationDependencies * dependencies() const
Reduction ReduceJSCall(Node *node)
SimplifiedOperatorBuilder * simplified() const
Node * DuplicateStateValuesAndRename(Node *state_values, Node *from, Node *to, StateCloneMode mode)
CompilationDependencies * dependencies() const
bool TryReuseDispatch(Node *node, Node *callee, Node **if_successes, Node **calls, Node **inputs, int input_count, int *num_calls)
void CreateOrReuseDispatch(Node *node, Node *callee, Candidate const &candidate, Node **if_successes, Node **calls, Node **inputs, int input_count, int *num_calls)
Candidate CollectFunctions(Node *node, int functions_size)
Reduction InlineCandidate(Candidate const &candidate, bool small_function)
FrameState DuplicateFrameStateAndRename(FrameState frame_state, Node *from, Node *to, StateCloneMode mode)
CommonOperatorBuilder * common() const
static Node * GetEffectInput(Node *node, int index=0)
static Node * GetFrameStateInput(Node *node)
static Node * GetValueInput(Node *node, int index)
static bool IsExceptionalCall(Node *node, Node **out_exception=nullptr)
static Node * GetControlInput(Node *node, int index=0)
constexpr IrOpcode::Value opcode() const
Definition node.h:52
const Operator * op() const
Definition node.h:50
int InputCount() const
Definition node.h:59
void ReplaceInput(int index, Node *new_to)
Definition node.h:76
Node * InputAt(int index) const
Definition node.h:70
NodeId id() const
Definition node.h:57
Node * CloneNode(const Node *node)
Node * NewNode(const Operator *op, int input_count, Node *const *inputs, bool incomplete=false)
JSHeapBroker * broker
TNode< Object > target
SharedFunctionInfoRef shared
Node * node
ZoneVector< RpoNumber > & result
#define TRACE(...)
Point to
int position
Definition liveedit.cc:290
int m
Definition mul-fft.cc:294
int n
Definition mul-fft.cc:296
const CallParameters & CallParametersOf(const Operator *op)
Handle< FeedbackCell > FeedbackCellOf(const Operator *op)
ConstructParameters const & ConstructParametersOf(Operator const *op)
ref_traits< T >::ref_type MakeRef(JSHeapBroker *broker, Tagged< T > object)
V8_EXPORT_PRIVATE FlagValues v8_flags
#define CHECK_IMPLIES(lhs, rhs)
#define CHECK(condition)
Definition logging.h:124
#define DCHECK_NE(v1, v2)
Definition logging.h:486
#define DCHECK(condition)
Definition logging.h:482
#define DCHECK_LT(v1, v2)
Definition logging.h:489
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
HeapObjectRef Ref(JSHeapBroker *broker) const
bool operator()(const Candidate &left, const Candidate &right) const
OptionalJSFunctionRef functions[kMaxCallPolymorphism]
OptionalBytecodeArrayRef bytecode[kMaxCallPolymorphism]