v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
sort-builtins.cc
Go to the documentation of this file.
1// Copyright 2023 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "sort-builtins.h"
6
7#include <algorithm>
8#include <fstream>
9
12
13namespace v8 {
14namespace internal {
15
16Cluster::Cluster(uint32_t density, uint32_t size, Builtin target,
17 BuiltinsSorter* sorter)
18 : density_(density), size_(size), sorter_(sorter) {
19 CHECK(size_);
20 targets_.push_back(target);
22}
23
25
27 for (Cluster* cls : clusters_) {
28 delete cls;
29 }
30}
31
32void Cluster::Merge(Cluster* other) {
33 for (Builtin builtin : other->targets_) {
34 targets_.push_back(builtin);
35 sorter_->builtin_cluster_map_.emplace(builtin, this);
36 }
37 density_ = static_cast<uint32_t>(
38 (time_approximation() + other->time_approximation()) /
39 (size_ + other->size_));
40 size_ += other->size_;
41 other->density_ = 0;
42 other->size_ = 0;
43 other->targets_.clear();
44}
45
47 return static_cast<uint64_t>(size_) * density_;
48}
49
51 for (uint32_t i = 0; i < static_cast<uint32_t>(builtin_size_.size()); i++) {
55 // CHECK there is no data for execution count for non TurboFan compiled
56 // builtin.
58 continue;
59 }
60 Cluster* cls =
61 new Cluster(builtin_density_map_[id], builtin_size_[i], id, this);
62 clusters_.push_back(cls);
63 builtin_density_order_.push_back(
65 }
66
67 std::sort(builtin_density_order_.begin(), builtin_density_order_.end(),
68 [](const BuiltinDensitySlot& x, const BuiltinDensitySlot& y) {
69 return x.density_ > y.density_;
70 });
71}
72
75 int32_t bestProb = 0;
76
77 for (auto caller_it = call_graph_.begin(); caller_it != call_graph_.end();
78 caller_it++) {
79 Builtin caller = caller_it->first;
80 const CallProbabilities& callees_prob = caller_it->second;
81 if (callees_prob.count(callee) > 0) {
82 int32_t incoming_prob = callees_prob.at(callee).incoming_;
83 if (incoming_prob == -1) {
84 // We dont want to merge any cluster with -1 prob, because it means it's
85 // either a non TurboFan compiled builtin or its execution count too
86 // small.
87 continue;
88 }
89 if (bestPred == Builtin::kNoBuiltinId || incoming_prob > bestProb) {
90 bestPred = caller;
91 bestProb = incoming_prob;
92 }
93 }
94
95 if (bestProb < kMinEdgeProbabilityThreshold ||
96 bestPred == Builtin::kNoBuiltinId)
97 continue;
98
99 Cluster* predCls = builtin_cluster_map_[bestPred];
100 Cluster* succCls = builtin_cluster_map_[callee];
101
102 // Don't merge if the caller and callee are already in same cluster.
103 if (predCls == succCls) continue;
104 // Don't merge clusters if the combined size is too big.
105 if (predCls->size_ + succCls->size_ > kMaxClusterSize) continue;
106 if (predCls->density_ == 0) {
107 // Some density of cluster after normalized may be 0, in that case we dont
108 // merge them.
109 continue;
110 }
111 CHECK(predCls->size_);
112
113 uint32_t new_density = static_cast<uint32_t>(
114 (predCls->time_approximation() + succCls->time_approximation()) /
115 (predCls->size_ + succCls->size_));
116
117 // Don't merge clusters if the new merged density is lower too many times
118 // than current cluster, to avoid a huge dropping in cluster density, it
119 // will harm locality of builtins.
120 if (predCls->density_ / kMaxDensityDecreaseThreshold > new_density)
121 continue;
122 }
123
124 return bestPred;
125}
126
128 for (size_t i = 0; i < builtin_density_order_.size(); i++) {
129 Builtin id = builtin_density_order_[i].builtin_;
130 Cluster* succ_cluster = builtin_cluster_map_[id];
131
132 Builtin bestPred = FindBestPredecessorOf(id);
133 if (bestPred != Builtin::kNoBuiltinId) {
134 Cluster* pred_cluster = builtin_cluster_map_[bestPred];
135 pred_cluster->Merge(succ_cluster);
136 }
137 }
138}
139
141 std::sort(clusters_.begin(), clusters_.end(),
142 [](const Cluster* x, const Cluster* y) {
143 return x->density_ > y->density_;
144 });
145
146 clusters_.erase(
147 std::remove_if(clusters_.begin(), clusters_.end(),
148 [](const Cluster* x) { return x->targets_.empty(); }),
149 clusters_.end());
150}
151
152bool AddBuiltinIfNotProcessed(Builtin builtin, std::vector<Builtin>& order,
153 std::unordered_set<Builtin>& processed_builtins) {
154 if (processed_builtins.count(builtin) == 0) {
155 order.push_back(builtin);
156 processed_builtins.emplace(builtin);
157 return true;
158 }
159 return false;
160}
161
163 std::istringstream& line_stream,
164 std::unordered_map<std::string, Builtin>& name2id) {
165 // Any line starting with kBuiltinCallBlockDensityMarker is a normalized
166 // execution count of block with call. The format is:
167 // literal kBuiltinCallBlockDensityMarker , caller , block ,
168 // normalized_count
169 std::string token;
170 std::string caller_name;
171 CHECK(std::getline(line_stream, caller_name, ','));
172 Builtin caller_id = name2id[caller_name];
173
175
176 char* end = nullptr;
177 errno = 0;
178 CHECK(std::getline(line_stream, token, ','));
179 int32_t block_id = static_cast<int32_t>(strtoul(token.c_str(), &end, 0));
180 CHECK(errno == 0 && end != token.c_str());
181
182 CHECK(std::getline(line_stream, token, ','));
183 int32_t normalized_count =
184 static_cast<int32_t>(strtoul(token.c_str(), &end, 0));
185 CHECK(errno == 0 && end != token.c_str());
186 CHECK(line_stream.eof());
187
188 const BuiltinCallees* block_callees = profiler->GetBuiltinCallees(caller_id);
189 if (block_callees) {
190 int32_t outgoing_prob = 0;
191 int32_t incoming_prob = 0;
192 int caller_density = 0;
193 int callee_density = 0;
194
195 CHECK(builtin_density_map_.count(caller_id));
196 caller_density = builtin_density_map_.at(caller_id);
197
198 // TODO(v8:13938): Remove the below if check when we just store
199 // interesting blocks (contain call other builtins) execution count into
200 // profiling file.
201 if (block_callees->count(block_id)) {
202 // If the line of block density make sense (means it contain call to
203 // other builtins in this block).
204 for (const auto& callee_id : block_callees->at(block_id)) {
205 if (caller_density != 0) {
206 outgoing_prob = normalized_count * 100 / caller_density;
207 } else {
208 // If the caller density was normalized as 0 but the block density
209 // was not, we set caller prob as 100, otherwise it's 0. Because in
210 // the normalization, we may loss fidelity.
211 // For example, a caller was executed 8 times, but after
212 // normalization, it may be 0 time. At that time, if the
213 // normalized_count of this block (it may be a loop body) is a
214 // positive number, we could think normalized_count is bigger than the
215 // execution count of caller, hence we set it as 100, otherwise it's
216 // smaller than execution count of caller, we could set it as 0.
217 outgoing_prob = normalized_count ? 100 : 0;
218 }
219
220 if (builtin_density_map_.count(callee_id)) {
221 callee_density = builtin_density_map_.at(callee_id);
222 if (callee_density != 0) {
223 incoming_prob = normalized_count * 100 / callee_density;
224 } else {
225 // Same as caller prob when callee density exists but is 0.
226 incoming_prob = normalized_count ? 100 : 0;
227 }
228
229 } else {
230 // If callee_density does not exist, it means the callee was not
231 // compiled by TurboFan or execution count is too small (0 after
232 // normalization), we couldn't get the callee count, so we set it as
233 // -1. In that case we could avoid merging this callee builtin into
234 // any other cluster.
235 incoming_prob = -1;
236 }
237
238 CallProbability probs = CallProbability(incoming_prob, outgoing_prob);
239 if (call_graph_.count(caller_id) == 0) {
240 call_graph_.emplace(caller_id, CallProbabilities());
241 }
242 CallProbabilities& call_probs = call_graph_.at(caller_id);
243 call_probs.emplace(callee_id, probs);
244 }
245 }
246 }
247 CHECK(line_stream.eof());
248}
249
251 std::istringstream& line_stream,
252 std::unordered_map<std::string, Builtin>& name2id) {
253 // Any line starting with kBuiltinDensityMarker is normalized execution count
254 // for block 0 of a builtin, we take it as density of this builtin. The format
255 // is:
256 // literal kBuiltinDensityMarker , builtin_name , density
257 std::string token;
258 std::string builtin_name;
259 CHECK(std::getline(line_stream, builtin_name, ','));
260 std::getline(line_stream, token, ',');
261 CHECK(line_stream.eof());
262 char* end = nullptr;
263 errno = 0;
264 int density = static_cast<int>(strtol(token.c_str(), &end, 0));
265 CHECK(errno == 0 && end != token.c_str());
266
267 Builtin builtin_id = name2id[builtin_name];
268 builtin_density_map_.emplace(builtin_id, density);
269}
270
271void BuiltinsSorter::InitializeCallGraph(const char* profiling_file,
272 const std::vector<uint32_t>& size) {
273 std::ifstream file(profiling_file);
274 CHECK_WITH_MSG(file.good(), "Can't read log file");
275
276 std::unordered_map<std::string, Builtin> name2id;
278 std::string name = Builtins::name(i);
279 name2id.emplace(name, i);
280 builtin_size_.push_back(size.at(static_cast<uint32_t>(i)));
281 }
282
283 for (std::string line; std::getline(file, line);) {
284 std::string token;
285 std::istringstream line_stream(line);
286 // We must put lines start with kBuiltinDensityMarker before lines start
287 // with kBuiltinCallBlockDensityMarker, because we have to density to
288 // calculate call prob.
289 if (!std::getline(line_stream, token, ',')) continue;
290 if (token == kBuiltinCallBlockDensityMarker) {
291 ProcessBlockCountLineInfo(line_stream, name2id);
292 } else if (token == kBuiltinDensityMarker) {
293 ProcessBuiltinDensityLineInfo(line_stream, name2id);
294 }
295 }
296}
297
298std::vector<Builtin> BuiltinsSorter::SortBuiltins(
299 const char* profiling_file, const std::vector<uint32_t>& builtin_size) {
300 InitializeCallGraph(profiling_file, builtin_size);
301
302 // Step 1: initialization.
304
305 // Step 2: Merge best predecessors.
307
308 // Step 3: Sort clusters again.
309 SortClusters();
310
311 std::unordered_set<Builtin> processed_builtins;
312 std::vector<Builtin> builtin_order;
313
314 // For functions in the sorted cluster from step 3.
315 for (size_t i = 0; i < clusters_.size(); i++) {
316 Cluster* cls = clusters_.at(i);
317 for (size_t j = 0; j < cls->targets_.size(); j++) {
318 Builtin builtin = cls->targets_[j];
319 CHECK(
320 AddBuiltinIfNotProcessed(builtin, builtin_order, processed_builtins));
321 }
322 }
323
324 // For the remaining builtins.
326 AddBuiltinIfNotProcessed(i, builtin_order, processed_builtins);
327 }
328
329 return builtin_order;
330}
331
332} // namespace internal
333} // namespace v8
Builtins::Kind kind
Definition builtins.cc:40
static BuiltinsCallGraph * Get()
std::vector< BuiltinDensitySlot > builtin_density_order_
BuiltinClusterMap builtin_cluster_map_
void ProcessBlockCountLineInfo(std::istringstream &line_stream, std::unordered_map< std::string, Builtin > &name2id)
std::vector< Builtin > SortBuiltins(const char *profiling_file, const std::vector< uint32_t > &builtin_size)
std::vector< Cluster * > clusters_
void ProcessBuiltinDensityLineInfo(std::istringstream &line_stream, std::unordered_map< std::string, Builtin > &name2id)
const std::string kBuiltinCallBlockDensityMarker
void InitializeCallGraph(const char *profiling_file, const std::vector< uint32_t > &size)
const int32_t kMinEdgeProbabilityThreshold
Builtin FindBestPredecessorOf(Builtin callee)
const std::string kBuiltinDensityMarker
BuiltinDensityMap builtin_density_map_
const uint32_t kMaxDensityDecreaseThreshold
static V8_EXPORT_PRIVATE Kind KindOf(Builtin builtin)
Definition builtins.cc:471
static constexpr Builtin kFirst
Definition builtins.h:112
static constexpr Builtin FromInt(int id)
Definition builtins.h:140
static constexpr Builtin kLast
Definition builtins.h:113
static V8_EXPORT_PRIVATE const char * name(Builtin builtin)
Definition builtins.cc:226
uint64_t time_approximation()
void Merge(Cluster *other)
std::vector< Builtin > targets_
Cluster(uint32_t density, uint32_t size, Builtin target, BuiltinsSorter *sorter)
BuiltinsSorter * sorter_
const int size_
Definition assembler.cc:132
int end
TNode< Object > target
int x
bool AddBuiltinIfNotProcessed(Builtin builtin, std::vector< Builtin > &order, std::unordered_set< Builtin > &processed_builtins)
std::unordered_map< int32_t, BlockCallees > BuiltinCallees
std::unordered_map< Builtin, CallProbability > CallProbabilities
#define CHECK(condition)
Definition logging.h:124
#define CHECK_WITH_MSG(condition, message)
Definition logging.h:118
#define CHECK_EQ(lhs, rhs)
Symbol file