v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
store-store-elimination-reducer-inl.h
Go to the documentation of this file.
1// Copyright 2023 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_COMPILER_TURBOSHAFT_STORE_STORE_ELIMINATION_REDUCER_INL_H_
6#define V8_COMPILER_TURBOSHAFT_STORE_STORE_ELIMINATION_REDUCER_INL_H_
7
8#include <optional>
9
18
20
21// 1. StoreStoreEliminationReducer tries to identify and remove redundant
22// stores. E.g. for an input like
23//
24// let o = {};
25// o.x = 2;
26// o.y = 3;
27// o.x = 4;
28// use(o.x);
29//
30// we don't need the first store to `o.x` because the value is overwritten
31// before it can be observed.
32//
33// The analysis considers loads and stores as corresponding pairs of `base` (the
34// OpIndex that defines `o` in the above example) and `offset` (that is the
35// static offset at which the field is stored inside the object). We run the
36// analysis backwards and track potentially redundant stores in a
37// `MaybeRedundantStoresTable` roughly as follows:
38//
39// 1. When we see a `base`+`offset` store and
40// a. `base`+`offset` is observable in the table, we keep the store, but
41// track following `base`+`offset` stores as unobservable in the table.
42// b. `base`+`offset` is unobservable in the table, we can eliminate the
43// store and keep the table unchanged.
44// c. `base`+`offset` is gc-observable in the table, we eliminate it only
45// if it is not an initializing store, otherwise, we keep it and update
46// the table accordingly.
47// 2. When we see a `base`+`offset` load, we mark all stores to this `offset`
48// as observable in the table. Notice that we do this regardless of the
49// `base`, because they might alias potentially.
50// 3. When we see an allocation, we mark all stores that are currently
51// unobservable, as gc-observable. The idea behind gc-observability is
52// that we do not observe the actual value stored, but we need to make
53// sure that the fields are written at least once, so that the GC does not
54// see uninitialized fields.
55// 4. When we see another operation that can observe memory, we mark all
56// stores as observable.
57//
58// Notice that the table also tracks the `size` of the store, such that if
59// fields are partially written, we don't incorrectly treat them as redundant to
60// the full store (this can happen in strings for example).
61//
62// When the analysis reaches a branch, we combine values using traditional least
63// upper bound operation on the `StoreObservability` lattice. After processing a
64// loop header, we revisit the loop if the resulting state has changed until we
65// reach a fixpoint.
66//
67//
68// 2. StoreStoreEliminationReducer tries to merge 2 continuous 32-bits stores
69// into a 64-bits one.
70// When v8 create a new js object, it will initialize it's in object fields to
71// some constant value after allocation, like `undefined`. When pointer
72// compression is enabled, they are continuous 32-bits stores, and the store
73// values are usually constants (heap object). This reducer will try to merge 2
74// continuous 32-bits stores into a 64-bits one.
75
77
79 kUnobservable = 0,
80 kGCObservable = 1,
81 kObservable = 2,
82};
83
84inline std::ostream& operator<<(std::ostream& os,
85 StoreObservability observability) {
86 switch (observability) {
88 return os << "Unobservable";
90 return os << "GCObservable";
92 return os << "Observable";
93 }
94}
95
102
104 : public ChangeTrackingSnapshotTable<MaybeRedundantStoresTable,
105 StoreObservability,
106 MaybeRedundantStoresKeyData> {
107 using super =
110
111 public:
112 explicit MaybeRedundantStoresTable(const Graph& graph, Zone* zone)
114 graph_(graph),
116 key_mapping_(zone),
117 active_keys_(zone),
118 successor_snapshots_(zone) {}
119
124
126 StoreObservability new_value) {
127 DCHECK_NE(old_value, new_value);
128 if (new_value == StoreObservability::kObservable) {
129 active_keys_.Remove(key);
130 } else if (old_value == StoreObservability::kObservable) {
131 active_keys_.Add(key);
132 }
133 }
134
135 void BeginBlock(const Block* block) {
136 // Seal the current block first.
137 if (IsSealed()) {
139 } else {
140 // If we bind a new block while the previous one is still unsealed, we
141 // finalize it.
142 Seal();
143 }
144
145 // Collect the snapshots of all successors.
146 {
147 auto successors = SuccessorBlocks(block->LastOperation(graph_));
148 successor_snapshots_.clear();
149 for (const Block* s : successors) {
150 std::optional<Snapshot> s_snapshot =
151 block_to_snapshot_mapping_[s->index()];
152 // When we visit the loop for the first time, the loop header hasn't
153 // been visited yet, so we ignore it.
154 DCHECK_IMPLIES(!s_snapshot.has_value(), s->IsLoop());
155 if (!s_snapshot.has_value()) continue;
156 successor_snapshots_.push_back(*s_snapshot);
157 }
158 }
159
160 // Start a new snapshot for this block by merging information from
161 // successors.
165 return static_cast<StoreObservability>(
166 *std::max_element(successors.begin(), successors.end()));
167 });
168
170 }
171
173 uint8_t size) {
174 Key key = map_to_key(base, offset, size);
175 if (key.data().size < size) return StoreObservability::kObservable;
176 return Get(key);
177 }
178
179 void MarkStoreAsUnobservable(OpIndex base, int32_t offset, uint8_t size) {
180 // We can only shadow stores to the exact same `base`+`offset` and keep
181 // everything else because they might or might not alias.
182 Key key = map_to_key(base, offset, size);
183 // If the `size` we want to mark unobservable here is less than the size we
184 // have seen for this key before, we do not overwrite the entire field, so
185 // preceeding stores are not (fully) unobservable.
186 if (size < key.data().size) return;
188 }
189
191 // For now, we consider all stores to the same offset as potentially
192 // aliasing. We might improve this to eliminate more precisely, if we have
193 // some sort of aliasing information.
194 for (Key key : active_keys_) {
195 if (key.data().offset == offset)
197 }
198 }
199
205
207 for (Key key : active_keys_) {
208 auto current = Get(key);
210 if (current == StoreObservability::kUnobservable) {
212 }
213 }
214 }
215
216 void Seal(bool* snapshot_has_changed = nullptr) {
217 DCHECK(!IsSealed());
219 DCHECK(current_block_->index().valid());
220 auto& snapshot = block_to_snapshot_mapping_[current_block_->index()];
221 if (!snapshot_has_changed) {
222 snapshot = super::Seal();
223 } else if (!snapshot.has_value()) {
224 *snapshot_has_changed = true;
225 snapshot = super::Seal();
226 } else {
227 auto new_snapshot = super::Seal();
228 *snapshot_has_changed = false;
230 base::VectorOf({snapshot.value(), new_snapshot}),
232 DCHECK_LE(successors[0], successors[1]);
233 if (successors[0] != successors[1]) *snapshot_has_changed = true;
234 return static_cast<StoreObservability>(
235 *std::max_element(successors.begin(), successors.end()));
236 });
237 snapshot = super::Seal();
238 }
239 current_block_ = nullptr;
240 }
241
242 void Print(std::ostream& os, const char* sep = "\n") const {
243 bool first = true;
244 for (Key key : active_keys_) {
245 os << (first ? "" : sep) << key.data().base.id() << "@"
246 << key.data().offset << ": " << Get(key);
247 first = false;
248 }
249 }
250
251 private:
252 Key map_to_key(OpIndex base, int32_t offset, uint8_t size) {
253 std::pair p{base, offset};
254 auto it = key_mapping_.find(p);
255 if (it != key_mapping_.end()) return it->second;
258 key_mapping_.emplace(p, new_key);
259 return new_key;
260 }
263 return key.data().active_keys_index;
264 }
265 };
266
267 const Graph& graph_;
270 // In `active_keys_`, we track the keys of all stores that arge gc-observable
271 // or unobservable. Keys that are mapped to the default value (observable) are
272 // removed from the `active_keys_`.
274 const Block* current_block_ = nullptr;
275 // {successor_snapshots_} and {temp_key_vector_} are used as temporary vectors
276 // inside functions. We store them as members to avoid reallocation.
278};
279
281 public:
283 : graph_(graph), table_(graph, phase_zone) {}
284
285 void Run(ZoneSet<OpIndex>& eliminable_stores,
286 ZoneMap<OpIndex, uint64_t>& mergeable_store_pairs) {
287 eliminable_stores_ = &eliminable_stores;
288 mergeable_store_pairs_ = &mergeable_store_pairs;
289 for (uint32_t processed = graph_.block_count(); processed > 0;
290 --processed) {
291 BlockIndex block_index = static_cast<BlockIndex>(processed - 1);
292
293 const Block& block = graph_.Get(block_index);
294 ProcessBlock(block);
295
296 // If this block is a loop header, check if this loop needs to be
297 // revisited.
298 if (block.IsLoop()) {
300 bool needs_revisit = false;
301 table_.Seal(&needs_revisit);
302 if (needs_revisit) {
303 Block* back_edge = block.LastPredecessor();
304 DCHECK_GE(back_edge->index(), block_index);
305 processed = back_edge->index().id() + 1;
306 }
307 }
308 }
309 eliminable_stores_ = nullptr;
310 mergeable_store_pairs_ = nullptr;
311 }
312
313 void ProcessBlock(const Block& block) {
314 table_.BeginBlock(&block);
315
316 auto op_range = graph_.OperationIndices(block);
317 for (auto it = op_range.end(); it != op_range.begin();) {
318 --it;
319 OpIndex index = *it;
320 const Operation& op = graph_.Get(index);
321
322 switch (op.opcode) {
323 case Opcode::kStore: {
324 const StoreOp& store = op.Cast<StoreOp>();
325 // TODO(nicohartmann@): Use the new effect flags to distinguish heap
326 // access once available.
327 const bool is_on_heap_store = store.kind.tagged_base;
328 const bool is_field_store = !store.index().valid();
329 const uint8_t size = store.stored_rep.SizeInBytes();
330 // For now we consider only stores of fields of objects on the heap.
331 if (is_on_heap_store && is_field_store) {
332 bool is_eliminable_store = false;
333 switch (table_.GetObservability(store.base(), store.offset, size)) {
335 eliminable_stores_->insert(index);
337 is_eliminable_store = true;
338 break;
340 if (store.maybe_initializing_or_transitioning) {
341 // We cannot eliminate this store, but we mark all following
342 // stores to the same `base+offset` as unobservable.
343 table_.MarkStoreAsUnobservable(store.base(), store.offset,
344 size);
345 } else {
346 eliminable_stores_->insert(index);
348 is_eliminable_store = true;
349 }
350 break;
352 // We cannot eliminate this store, but we mark all following
353 // stores to the same `base+offset` as unobservable.
354 table_.MarkStoreAsUnobservable(store.base(), store.offset,
355 size);
356 break;
357 }
358
359 // Try to merge 2 consecutive 32-bit stores into a single 64-bit
360 // one.
361 if (COMPRESS_POINTERS_BOOL && !is_eliminable_store &&
362 store.maybe_initializing_or_transitioning &&
363 store.kind == StoreOp::Kind::TaggedBase() &&
364 store.write_barrier == WriteBarrierKind::kNoWriteBarrier &&
365 store.stored_rep.IsCompressibleTagged()) {
367 graph_.NextIndex(index) == last_field_initialization_store_) {
368 const StoreOp& store0 = store;
369 const StoreOp& store1 =
371 .Cast<StoreOp>();
372
373 DCHECK(!store0.index().valid());
374 DCHECK(!store1.index().valid());
375
376 const ConstantOp* c0 =
377 graph_.Get(store0.value()).TryCast<ConstantOp>();
378 const ConstantOp* c1 =
379 graph_.Get(store1.value()).TryCast<ConstantOp>();
380
381 // TODO(dmercadier): for now, we only apply this optimization
382 // when storing read-only values, because otherwise the GC will
383 // lose track of Handles when we convert them to a raw Word64.
384 // However, if we were to keep the reloc info up-to-date, then
385 // this might work for any object. To do this, we might need to
386 // delay this optimization to later (instruction selector for
387 // instance).
388 if (c0 && c1 && c0->kind == ConstantOp::Kind::kHeapObject &&
389 c1->kind == ConstantOp::Kind::kHeapObject &&
390 store1.offset - store0.offset == 4 &&
392 HeapLayout::InReadOnlySpace(*c1->handle())) {
393 uint32_t high = static_cast<uint32_t>(c1->handle()->ptr());
394 uint32_t low = static_cast<uint32_t>(c0->handle()->ptr());
395#if V8_TARGET_BIG_ENDIAN
396 uint64_t merged = make_uint64(low, high);
397#else
398 uint64_t merged = make_uint64(high, low);
399#endif
400 mergeable_store_pairs_->insert({index, merged});
401
404 }
405
406 } else {
408 }
409 }
410 }
411 break;
412 }
413 case Opcode::kLoad: {
414 const LoadOp& load = op.Cast<LoadOp>();
415 // TODO(nicohartmann@): Use the new effect flags to distinguish heap
416 // access once available.
417 const bool is_on_heap_load = load.kind.tagged_base;
418 const bool is_field_load = !load.index().valid();
419 // For now we consider only loads of fields of objects on the heap.
420 if (is_on_heap_load && is_field_load) {
422 load.offset);
423 }
424 break;
425 }
426 default: {
427 OpEffects effects = op.Effects();
428 if (effects.can_read_mutable_memory()) {
430 } else if (effects.requires_consistent_heap()) {
432 }
433 } break;
434 }
435 }
436 }
437
438 private:
439 const Graph& graph_;
442
445};
446
447template <class Next>
448class StoreStoreEliminationReducer : public Next {
449 public:
450 TURBOSHAFT_REDUCER_BOILERPLATE(StoreStoreElimination)
451
452 void Analyze() {
454 Next::Analyze();
455 }
456
458 if (eliminable_stores_.count(ig_index) > 0) {
459 return OpIndex::Invalid();
460 } else if (mergeable_store_pairs_.count(ig_index) > 0) {
462 OpIndex value = __ Word64Constant(mergeable_store_pairs_[ig_index]);
463 __ Store(__ MapToNewGraph(store.base()), value,
466 return OpIndex::Invalid();
467 }
468 return Next::ReduceInputGraphStore(ig_index, store);
469 }
470
471 private:
472 RedundantStoreAnalysis analysis_{Asm().input_graph(), Asm().phase_zone()};
475};
476
478
479} // namespace v8::internal::compiler::turboshaft
480
481#endif // V8_COMPILER_TURBOSHAFT_STORE_STORE_ELIMINATION_REDUCER_INL_H_
#define REDUCE_INPUT_GRAPH(operation)
constexpr T * begin() const
Definition vector.h:96
constexpr T * end() const
Definition vector.h:103
static V8_INLINE bool InReadOnlySpace(Tagged< HeapObject > object)
ZoneAbslFlatHashMap< std::pair< OpIndex, int32_t >, Key > key_mapping_
GrowingBlockSidetable< std::optional< Snapshot > > block_to_snapshot_mapping_
StoreObservability GetObservability(OpIndex base, int32_t offset, uint8_t size)
void OnValueChange(Key key, StoreObservability old_value, StoreObservability new_value)
static constexpr MemoryRepresentation Uint64()
static constexpr OpIndex Invalid()
Definition index.h:88
void Run(ZoneSet< OpIndex > &eliminable_stores, ZoneMap< OpIndex, uint64_t > &mergeable_store_pairs)
OpIndex REDUCE_INPUT_GRAPH Store(OpIndex ig_index, const StoreOp &store)
#define COMPRESS_POINTERS_BOOL
Definition globals.h:99
#define TURBOSHAFT_REDUCER_BOILERPLATE(Name)
Definition assembler.h:823
int32_t offset
RpoNumber block
constexpr Vector< T > VectorOf(T *start, size_t size)
Definition vector.h:360
std::ostream & operator<<(std::ostream &os, PaddingSpace padding)
V8_INLINE const Operation & Get(const Graph &graph, OpIndex index)
Definition graph.h:1231
base::SmallVector< Block *, 4 > SuccessorBlocks(const Block &block, const Graph &graph)
too high values may cause the compiler to set high thresholds for inlining to as much as possible avoid inlined allocation of objects that cannot escape trace load stores from virtual maglev objects use TurboFan fast string builder analyze liveness of environment slots and zap dead values trace TurboFan load elimination emit data about basic block usage in builtins to this enable builtin reordering when run mksnapshot flag for emit warnings when applying builtin profile data verify register allocation in TurboFan randomly schedule instructions to stress dependency tracking enable store store elimination in TurboFan rewrite far to near simulate GC compiler thread race related to allow float parameters to be passed in simulator mode JS Wasm Run additional turbo_optimize_inlined_js_wasm_wrappers enable experimental feedback collection in generic lowering enable Turboshaft s WasmLoadElimination enable Turboshaft s low level load elimination for JS enable Turboshaft s escape analysis for string concatenation use enable Turbolev features that we want to ship in the not too far future trace individual Turboshaft reduction steps trace intermediate Turboshaft reduction steps invocation count threshold for early optimization Enables optimizations which favor memory size over execution speed Enables sampling allocation profiler with X as a sample interval min size of a semi the new space consists of two semi spaces max size of the Collect garbage after Collect garbage after keeps maps alive for< n > old space garbage collections print one detailed trace line in allocation gc speed threshold for starting incremental marking via a task in percent of available threshold for starting incremental marking immediately in percent of available Use a single schedule for determining a marking schedule between JS and C objects schedules the minor GC task with kUserVisible priority max worker number of concurrent for NumberOfWorkerThreads start background threads that allocate memory concurrent_array_buffer_sweeping use parallel threads to clear weak refs in the atomic pause trace progress of the incremental marking trace object counts and memory usage report a tick only when allocated zone memory changes by this amount TracingFlags::gc_stats store(v8::tracing::TracingCategoryObserver::ENABLED_BY_NATIVE)) DEFINE_GENERIC_IMPLICATION(trace_gc_object_stats
#define DCHECK_LE(v1, v2)
Definition logging.h:490
#define DCHECK_NULL(val)
Definition logging.h:491
#define DCHECK_NOT_NULL(val)
Definition logging.h:492
#define DCHECK_IMPLIES(v1, v2)
Definition logging.h:493
#define DCHECK_NE(v1, v2)
Definition logging.h:486
#define DCHECK_GE(v1, v2)
Definition logging.h:488
#define DCHECK(condition)
Definition logging.h:482
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
uint64_t make_uint64(uint32_t high, uint32_t low)
Definition macros.h:365
IndirectHandle< i::HeapObject > handle() const
underlying_operation_t< Op > & Cast()
Definition operations.h:980