v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
sort-builtins.h
Go to the documentation of this file.
1// Copyright 2023 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_SNAPSHOT_SORT_BUILTINS_H_
6#define V8_SNAPSHOT_SORT_BUILTINS_H_
7
8#include <unordered_map>
9#include <vector>
10
13
14// The inputs were the builtin size, call graph and basic block execution count.
15// There are 3 steps in this sorting algorithm:
16// 1. Initializing cluster and sorting:
17// A cluster represents a group of functions. At the beginning, each
18// function was in an individual cluster, and we sort these clusters
19// by their density (which means how much probabilities this function was
20// invoked).
21//
22// 2. Merge the best predecessor:
23// After step 1, we will get lots of clusters which may contain only
24// one function. According to this order, we iterate each function
25// and merge cluster with some conditions, like:
26// 1) The most incoming probability.
27// 2) Incoming probability must be bigger than a threshold, like 0.1
28// 3) Merged cluster size couldn't be bigger than a threshold, like 1 mb.
29// 4) Predecessor cluster density couldn't be bigger N times than the new
30// merged cluster, N is 8 now.
31//
32// 3. Sorting clusters:
33// After step 2, we obtain lots of clusters which comprise several functions.
34// We will finally sort these clusters by their density.
35
36namespace v8 {
37namespace internal {
38
39class Cluster;
41 CallProbability(int32_t incoming = 0, int32_t outgoing = 0)
42 : incoming_(incoming), outgoing_(outgoing) {}
43
44 // There are a caller and a callee, we assume caller was invoked
45 // "caller-count" times, it calls callee "call-count" times, the callee was
46 // invoked "callee-count" times. imcoming_ means the possibity the callee
47 // calls from caller, it was calculted by call-count / callee-count. If
48 // callee-count is 0 (may not be compiled by TurboFan or normalized as 0 due
49 // to too small), we set imcoming_ as -1.
50 int32_t incoming_;
51 // outgoing_ means the possibity the caller
52 // calls to callee, it was calculted by call-count / caller-count. If
53 // caller-count is 0 (may not be compiled by TurboFan or normalized as 0 due
54 // to too small), we set outgoing_ as -1. We didn't use outgoing_ as condition
55 // for reordering builtins yet, but we could try to do some experiments with
56 // it later for obtaining a better order of builtins.
57 int32_t outgoing_;
58};
59// The key is the callee builtin, the value is call probabilities in percent
60// (mostly range in 0 ~ 100, except one call happend in a loop block which was
61// executed more times than block 0 of this builtin).
62using CallProbabilities = std::unordered_map<Builtin, CallProbability>;
63// The key is the caller builtin.
64using CallGraph = std::unordered_map<Builtin, CallProbabilities>;
65// The key is the builtin id, the value is density of builtin (range in 0 ~
66// 10000).
67using BuiltinDensityMap = std::unordered_map<Builtin, uint32_t>;
68// The index is the builtin id, the value is size of builtin (in bytes).
69using BuiltinSize = std::vector<uint32_t>;
70// The key is the builtin id, the value is the cluster which it was comprised.
71using BuiltinClusterMap = std::unordered_map<Builtin, Cluster*>;
72
74 const int32_t kMinEdgeProbabilityThreshold = 10;
75 const uint32_t kMaxClusterSize = 1 * MB;
76 const uint32_t kMaxDensityDecreaseThreshold = 8;
77
78 const std::string kBuiltinCallBlockDensityMarker = "block_count";
79 const std::string kBuiltinDensityMarker = "builtin_count";
80
81 // Pair of denstity of builtin and builtin id.
83 BuiltinDensitySlot(uint32_t density, Builtin builtin)
84 : density_(density), builtin_(builtin) {}
85
86 uint32_t density_;
88 };
89
90 public:
93 std::vector<Builtin> SortBuiltins(const char* profiling_file,
94 const std::vector<uint32_t>& builtin_size);
95
96 private:
97 void InitializeCallGraph(const char* profiling_file,
98 const std::vector<uint32_t>& size);
99 void InitializeClusters();
101 void SortClusters();
104 std::istringstream& line_stream,
105 std::unordered_map<std::string, Builtin>& name2id);
107 std::istringstream& line_stream,
108 std::unordered_map<std::string, Builtin>& name2id);
109
110 std::vector<Cluster*> clusters_;
111
112 std::vector<BuiltinDensitySlot> builtin_density_order_;
113
115
117
119
121
122 friend class Cluster;
123};
124
125class Cluster {
126 public:
127 Cluster(uint32_t density, uint32_t size, Builtin target,
128 BuiltinsSorter* sorter);
129 void Merge(Cluster* other);
130 uint64_t time_approximation();
131
132 private:
133 // Max initialized density was normalized as 10000.
134 uint32_t density_;
135 // Size of the cluster in bytes.
136 uint32_t size_;
137 std::vector<Builtin> targets_;
139
140 friend class BuiltinsSorter;
141};
142
143} // namespace internal
144
145} // namespace v8
146
147#endif // V8_SNAPSHOT_SORT_BUILTINS_H_
std::vector< BuiltinDensitySlot > builtin_density_order_
BuiltinClusterMap builtin_cluster_map_
void ProcessBlockCountLineInfo(std::istringstream &line_stream, std::unordered_map< std::string, Builtin > &name2id)
std::vector< Builtin > SortBuiltins(const char *profiling_file, const std::vector< uint32_t > &builtin_size)
std::vector< Cluster * > clusters_
void ProcessBuiltinDensityLineInfo(std::istringstream &line_stream, std::unordered_map< std::string, Builtin > &name2id)
const std::string kBuiltinCallBlockDensityMarker
void InitializeCallGraph(const char *profiling_file, const std::vector< uint32_t > &size)
const int32_t kMinEdgeProbabilityThreshold
Builtin FindBestPredecessorOf(Builtin callee)
const std::string kBuiltinDensityMarker
BuiltinDensityMap builtin_density_map_
const uint32_t kMaxDensityDecreaseThreshold
uint64_t time_approximation()
void Merge(Cluster *other)
std::vector< Builtin > targets_
Cluster(uint32_t density, uint32_t size, Builtin target, BuiltinsSorter *sorter)
BuiltinsSorter * sorter_
std::unordered_map< Builtin, CallProbabilities > CallGraph
std::unordered_map< Builtin, Cluster * > BuiltinClusterMap
std::vector< uint32_t > BuiltinSize
std::unordered_map< Builtin, uint32_t > BuiltinDensityMap
std::unordered_map< Builtin, CallProbability > CallProbabilities
too high values may cause the compiler to set high thresholds for inlining to as much as possible avoid inlined allocation of objects that cannot escape trace load stores from virtual maglev objects use TurboFan fast string builder analyze liveness of environment slots and zap dead values trace TurboFan load elimination emit data about basic block usage in builtins to this enable builtin reordering when run mksnapshot flag for emit warnings when applying builtin profile data verify register allocation in TurboFan randomly schedule instructions to stress dependency tracking enable store store elimination in TurboFan rewrite far to near simulate GC compiler thread race related to allow float parameters to be passed in simulator mode JS Wasm Run additional turbo_optimize_inlined_js_wasm_wrappers enable experimental feedback collection in generic lowering enable Turboshaft s WasmLoadElimination enable Turboshaft s low level load elimination for JS enable Turboshaft s escape analysis for string concatenation use enable Turbolev features that we want to ship in the not too far future trace individual Turboshaft reduction steps trace intermediate Turboshaft reduction steps invocation count threshold for early optimization Enables optimizations which favor memory size over execution speed Enables sampling allocation profiler with X as a sample interval min size of a semi the new space consists of two semi spaces max size of the Collect garbage after Collect garbage after keeps maps alive for< n > old space garbage collections print one detailed trace line in allocation gc speed threshold for starting incremental marking via a task in percent of available threshold for starting incremental marking immediately in percent of available Use a single schedule for determining a marking schedule between JS and C objects schedules the minor GC task with kUserVisible priority max worker number of concurrent for NumberOfWorkerThreads start background threads that allocate memory concurrent_array_buffer_sweeping use parallel threads to clear weak refs in the atomic pause trace progress of the incremental marking trace object counts and memory usage * MB
Definition flags.cc:2197
BuiltinDensitySlot(uint32_t density, Builtin builtin)
CallProbability(int32_t incoming=0, int32_t outgoing=0)