v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
late-load-elimination-reducer.h
Go to the documentation of this file.
1// Copyright 2023 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_COMPILER_TURBOSHAFT_LATE_LOAD_ELIMINATION_REDUCER_H_
6#define V8_COMPILER_TURBOSHAFT_LATE_LOAD_ELIMINATION_REDUCER_H_
7
8#include <optional>
9
24#include "src/zone/zone.h"
25
27
29
30#ifdef DEBUG
31#define TRACE(x) \
32 do { \
33 if (v8_flags.turboshaft_trace_load_elimination) \
34 StdoutStream() << x << std::endl; \
35 } while (false)
36#else
37#define TRACE(x)
38#endif
39
40// Design doc:
41// https://docs.google.com/document/d/1AEl4dATNLu8GlLyUBQFXJoCxoAT5BeG7RCWxoEtIBJE/edit?usp=sharing
42
43// Load Elimination removes redundant loads. Loads can be redundant because:
44//
45// - they follow a store to the same address. For instance:
46//
47// x.a = 42;
48// y = x.a;
49//
50// - or, they follow the same load. For instance:
51//
52// y = x.a;
53// z = x.a;
54//
55// The "annoying" part of load elimination is that object can alias, and stores
56// to dynamically computed indices tend to invalidate the whole state. For
57// instance, if we don't know anything about aliasing regarding `a` and `b`,
58// then, in this situation:
59//
60// x.a = 42
61// y.a = 25
62// z = x.a
63//
64// We can't load-eliminate `z = x.a`, since `y` could alias with `x`, and `y.a =
65// 25` could have overwritten `x.a`. Similarly, if we have something like:
66//
67// x[0] = 42
68// y[i] = 25
69// z = x[0]
70//
71// We can't load-eliminate `z = x[0]`, since `y` could alias with `x`, and
72// `y[i]` thus have overwritten `x[0]`.
73//
74//
75// Implementation:
76//
77// - In a `MemoryContentTable` (a SnapshotTable), we keep track of known
78// memory values.
79// * When we visit a Store:
80// + if it's to a constant offset, we invalidate all of the known values
81// at the same offset (for all bases).
82// + if it's to a dynamic index, we invalidate everything (because things
83// could alias).
84// We then update the known value at the address of the store.
85// * When we visit a Call, we invalidate everything (since the function
86// called could change any memory through aliases).
87// * When we visit a Load:
88// + if there is a known value at the address, we replace the Load by this
89// value.
90// + otherwise, the result of the Load becomes the known value at the load
91// address.
92//
93// - We keep track (using a SparseOpIndexSnapshotTable) of some objects that
94// are known to not alias with anything: freshly allocated objects, until
95// they are passed to a function call, stored in an object, or flow in a
96// Phi. When storing in a fresh object, we only need to invalidate things in
97// the same object, leaving the rest of the state untouched. When storing in
98// a non-fresh object, we don't invalidate the state for fresh objects.
99//
100// - We keep track (using a SparseOpIndexSnapshotTable) of the maps of some
101// objects (which we get from AssumeMap operations, which are inserted when
102// lowering CheckMaps). We use them to know if some objects can alias or
103// not: 2 objects with different maps cannot alias.
104//
105// - When a loop contains a Store or a Call, it could invalidate previously
106// eliminated loads in the beginning of the loop. Thus, once we reach the
107// end of a loop, we recompute the header's snapshot using {header,
108// backedge} as predecessors, and if anything is invalidated by the
109// backedge, we revisit the loop.
110//
111// How we "keep track" of objects:
112//
113// We need the following operation:
114// 1. Load the value for a {base, index, offset}.
115// 2. Store that {base, index, offset} = value
116// 3. Invalidate everything at a given offset + everything at an index (for
117// when storing to a base that could alias with other things).
118// 4. Invalidate everything in a base (for when said base is passed to a
119// function, or when there is an indexed store in this base).
120// 5. Invalidate everything (for an indexed store into an arbitrary base)
121//
122// To have 1. in constant time, we maintain a global hashmap (`all_keys`) from
123// MemoryAddress (= {base, index, offset, element_size_log2, size}) to Keys, and
124// from these Keys, we have constant-time lookup in the SnapshotTable.
125// To have 3. efficiently, we maintain a Map from offsets to lists of every
126// MemoryAddress at this offset (`offset_keys_`).
127// To have 4. efficiently, we have a similar map from bases to lists of every
128// MemoryAddress at this base (`base_keys_`).
129// For 5., we can use either `offset_keys_` or `base_keys_`. In practice, we use
130// the latter because it allows us to efficiently skip bases that are known to
131// have no aliases.
132
133// MapMask and related functions are an attempt to avoid having to store sets of
134// maps for each AssumeMap that we encounter by compressing all of the maps into
135// a single uint64_t.
136//
137// For each object, we keep in a MapMaskAndOr the "minimum" and "maximum" of
138// all of its potential maps, where
139// - "or_" is computed using the union (logical or) of all of its potential
140// maps.
141// - "and_" is computed using the intersection (logical and) of all of its
142// potential maps.
143//
144// Then, given two objects A and B, if A.and_ has a bit set that isn't set in
145// B.or_, it means that all of the maps of A have a bit that none of the maps of
146// B have, ie, A and B are guaranteed to not have a map in common.
147using MapMask = uint64_t;
150 MapMask and_ = -1ull;
151
152 bool operator==(const MapMaskAndOr& other) const {
153 return or_ == other.or_ && and_ == other.and_;
154 }
155
156 bool operator!=(const MapMaskAndOr& other) const { return !(*this == other); }
157};
158inline bool is_empty(MapMaskAndOr minmax) {
159 return minmax.or_ == 0 && minmax.and_ == -1ull;
160}
162 // `map.hash_value()` is probably not a good enough hash, since most user maps
163 // will have the same upper bits, so we re-hash. We're using xorshift64* (from
164 // "An experimental exploration of Marsaglia’s xorshift generators, scrambled"
165 // by Vigna in ACM Transactions on Mathematical Software, Volume 42).
166 MapMask hash = map.hash_value();
167 hash ^= hash >> 12;
168 hash ^= hash << 25;
169 hash ^= hash >> 27;
170 return hash * 0x2545f4914f6cdd1d;
171}
173 MapMaskAndOr minmax;
174 for (MapRef map : maps) {
175 MapMask hash = ComputeMapHash(map);
176 minmax.or_ |= hash;
177 minmax.and_ &= hash;
178 }
179 return minmax;
180}
182 return {a.or_ | b.or_, a.and_ & b.and_};
183}
184// Returns true if {a} and {b} could have a map in common.
186 return ((a.and_ & b.or_) == a.and_) || ((b.and_ & a.or_) == b.and_);
187}
188
192 int32_t offset;
194 uint8_t size;
195
196 bool operator==(const MemoryAddress& other) const {
197 return base == other.base && index == other.index &&
198 offset == other.offset &&
199 element_size_log2 == other.element_size_log2 && size == other.size;
200 }
201
202 template <typename H>
203 friend H AbslHashValue(H h, const MemoryAddress& mem) {
204 return H::combine(std::move(h), mem.base, mem.index, mem.offset,
205 mem.element_size_log2, mem.size);
206 }
207};
208std::ostream& operator<<(std::ostream& os, const MemoryAddress& mem);
209
210inline size_t hash_value(MemoryAddress const& mem) {
211 return fast_hash_combine(mem.base, mem.index, mem.offset,
212 mem.element_size_log2, mem.size);
213}
214
215struct KeyData {
218 // Pointers to the previous and the next Keys at the same base.
219 Key* prev_same_base = nullptr;
221 // Pointers to either the next/previous Keys at the same offset.
224};
225
228 static T** prev(T t) { return &(t.data().prev_same_offset); }
229 static T* next(T t) { return &(t.data().next_same_offset); }
230 static bool non_empty(T t) { return t.valid(); }
231};
232
235 static T** prev(T t) { return &(t.data().prev_same_base); }
236 static T* next(T t) { return &(t.data().next_same_base); }
237 static bool non_empty(T t) { return t.valid(); }
238};
239
240struct BaseData {
242 // List of every value at this base that has an offset rather than an index.
244 // List of every value at this base that has a valid index.
246};
247
249 public:
250 enum class Kind {
251 kNone, // We don't replace the operation
252 kLoadElimination, // We load eliminate a load operation
253 // The following replacements are used for the special case optimization:
254 // TruncateWord64ToWord32(
255 // BitcastTaggedToWordPtrForTagAndSmiBits(Load(x, Tagged)))
256 // =>
257 // Load(x, Int32)
258 //
259 kTaggedLoadToInt32Load, // Turn a tagged load into a direct int32 load.
260 kTaggedBitcastElimination, // Remove this (now unused) bitcast.
261 kInt32TruncationElimination, // Replace truncation by the updated load.
262 };
263
265
284
285 bool IsNone() const { return kind_ == Kind::kNone; }
289 }
296 OpIndex replacement() const { return replacement_; }
297
298 private:
301
304};
305
307 const Graph& graph, OpIndex change_idx, const ChangeOp& change,
308 OpIndex* bitcast_idx = nullptr, OpIndex* load_idx = nullptr);
309
311 : public ChangeTrackingSnapshotTable<MemoryContentTable, OpIndex, KeyData> {
312 public:
315 Zone* zone, SparseOpIndexSnapshotTable<bool>& non_aliasing_objects,
319 non_aliasing_objects_(non_aliasing_objects),
320 object_maps_(object_maps),
321 replacements_(replacements),
322 all_keys_(zone),
323 base_keys_(zone),
324 offset_keys_(zone) {}
325
326 void OnNewKey(Key key, OpIndex value) {
327 if (value.valid()) {
329 }
330 }
331
332 void OnValueChange(Key key, OpIndex old_value, OpIndex new_value) {
333 DCHECK_NE(old_value, new_value);
334 if (old_value.valid() && !new_value.valid()) {
336 } else if (new_value.valid() && !old_value.valid()) {
338 } else {
339 DCHECK_EQ(new_value.valid(), old_value.valid());
340 }
341 }
342
343 // Invalidate all previous known memory that could alias with {store}.
344 void Invalidate(const StoreOp& store) {
345 Invalidate(store.base(), store.index(), store.offset);
346 }
347
348 void Invalidate(OpIndex base, OptionalOpIndex index, int32_t offset) {
349 TRACE("> MemoryContentTable: Invalidating based on "
350 << base << ", " << index << ", " << offset);
352
354 TRACE(">> base is non-aliasing");
355 // Since {base} is non-aliasing, it's enough to just iterate the values at
356 // this base.
357 auto base_keys = base_keys_.find(base);
358 if (base_keys == base_keys_.end()) return;
359 for (auto it = base_keys->second.with_offsets.begin();
360 it != base_keys->second.with_offsets.end();) {
361 Key key = *it;
362 DCHECK_EQ(key.data().mem.base, base);
363 DCHECK(!key.data().mem.index.valid());
364 if (index.valid() || offset == key.data().mem.offset) {
365 // Overwrites {key}.
366 it = base_keys->second.with_offsets.RemoveAt(it);
367 TRACE(">>> invalidating " << key.data().mem);
369 } else {
370 ++it;
371 }
372 }
373 // Invalidating all of the value with valid Index at base {base}.
374 for (auto it = base_keys->second.with_indices.begin();
375 it != base_keys->second.with_indices.end();) {
376 Key key = *it;
377 DCHECK(key.data().mem.index.valid());
378 it = base_keys->second.with_indices.RemoveAt(it);
380 }
381 } else {
382 TRACE(">> base is maybe-aliasing");
383 // {base} could alias with other things, so we iterate the whole state.
384 if (index.valid()) {
385 // {index} could be anything, so we invalidate everything.
386 TRACE(">> Invalidating everything because of valid index");
388 }
389
390 // Invalidating all of the values with valid Index.
391 // TODO(dmercadier): we could keep keys that don't alias here, but that
392 // would require doing a map lookup on the base of each key. A better
393 // alternative would probably be to have 2 {non_alias_index_keys_} and
394 // {maybe_alias_index_keys_} tables instead of just {index_keys_}. This
395 // has the downside that when a base stops being non-alias, all of its
396 // indexed memory cells have to be moved. This could be worked around by
397 // having these 2 tables contain BaseData.with_indices values instead of
398 // Keys, so that a whole BaseData.with_indices can be removed in a single
399 // operation from the global {non_alias_index_keys_}.
400 for (auto it = index_keys_.begin(); it != index_keys_.end();) {
401 Key key = *it;
402 it = index_keys_.RemoveAt(it);
403 TRACE(">>> Invalidating indexed memory " << key.data().mem);
405 }
406
407 TRACE(">>> Invalidating everything maybe-aliasing at offset " << offset);
409 }
410 }
411
412 // Invalidates all Keys that are not known as non-aliasing.
414 TRACE(">> InvalidateMaybeAliasing");
415 // We find current active keys through {base_keys_} so that we can bail out
416 // for whole buckets non-aliasing bases (if we had gone through
417 // {offset_keys_} instead, then for each key we would've had to check
418 // whether it was non-aliasing or not).
419 for (auto& base_keys : base_keys_) {
420 OpIndex base = base_keys.first;
421 if (non_aliasing_objects_.Get(base)) continue;
422 for (auto it = base_keys.second.with_offsets.begin();
423 it != base_keys.second.with_offsets.end();) {
424 Key key = *it;
425 // It's important to remove with RemoveAt before Setting the key to
426 // invalid, otherwise OnKeyChange will remove {key} from {base_keys},
427 // which will invalidate {it}.
428 it = base_keys.second.with_offsets.RemoveAt(it);
429 TRACE(">>> Invalidating " << key.data().mem);
431 }
432 for (auto it = base_keys.second.with_indices.begin();
433 it != base_keys.second.with_indices.end();) {
434 Key key = *it;
435 it = base_keys.second.with_indices.RemoveAt(it);
436 TRACE(">>> Invalidating " << key.data().mem);
438 }
439 }
440 }
441
442 OpIndex Find(const LoadOp& load) {
443 OpIndex base = ResolveBase(load.base());
444 OptionalOpIndex index = load.index();
445 int32_t offset = load.offset;
446 uint8_t element_size_log2 = index.valid() ? load.element_size_log2 : 0;
447 uint8_t size = load.loaded_rep.SizeInBytes();
448
449 MemoryAddress mem{base, index, offset, element_size_log2, size};
450 auto key = all_keys_.find(mem);
451 if (key == all_keys_.end()) return OpIndex::Invalid();
452 return Get(key->second);
453 }
454
455 void Insert(const StoreOp& store) {
456 OpIndex base = ResolveBase(store.base());
457 OptionalOpIndex index = store.index();
458 int32_t offset = store.offset;
459 uint8_t element_size_log2 = index.valid() ? store.element_size_log2 : 0;
460 OpIndex value = store.value();
461 uint8_t size = store.stored_rep.SizeInBytes();
462
463 if (store.kind.is_immutable) {
464 InsertImmutable(base, index, offset, element_size_log2, size, value);
465 } else {
466 Insert(base, index, offset, element_size_log2, size, value);
467 }
468 }
469
470 void Insert(const LoadOp& load, OpIndex load_idx) {
471 OpIndex base = ResolveBase(load.base());
472 OptionalOpIndex index = load.index();
473 int32_t offset = load.offset;
474 uint8_t element_size_log2 = index.valid() ? load.element_size_log2 : 0;
475 uint8_t size = load.loaded_rep.SizeInBytes();
476
477 if (load.kind.is_immutable) {
478 InsertImmutable(base, index, offset, element_size_log2, size, load_idx);
479 } else {
480 Insert(base, index, offset, element_size_log2, size, load_idx);
481 }
482 }
483
484#ifdef DEBUG
485 void Print() {
486 std::cout << "MemoryContentTable:\n";
487 for (const auto& base_keys : base_keys_) {
488 for (Key key : base_keys.second.with_offsets) {
489 std::cout << " * " << key.data().mem.base << " - "
490 << key.data().mem.index << " - " << key.data().mem.offset
491 << " - " << key.data().mem.element_size_log2 << " ==> "
492 << Get(key) << "\n";
493 }
494 for (Key key : base_keys.second.with_indices) {
495 std::cout << " * " << key.data().mem.base << " - "
496 << key.data().mem.index << " - " << key.data().mem.offset
497 << " - " << key.data().mem.element_size_log2 << " ==> "
498 << Get(key) << "\n";
499 }
500 }
501 }
502#endif
503
504 private:
505 // To avoid pathological execution times, we cap the maximum number of
506 // keys we track. This is safe, because *not* tracking objects (even
507 // though we could) only makes us miss out on possible optimizations.
508 // TODO(dmercadier/jkummerow): Find a more elegant solution to keep
509 // execution time in check. One example of a test case can be found in
510 // crbug.com/v8/14370.
511 static constexpr size_t kMaxKeys = 10000;
512
513 void Insert(OpIndex base, OptionalOpIndex index, int32_t offset,
514 uint8_t element_size_log2, uint8_t size, OpIndex value) {
516
517 MemoryAddress mem{base, index, offset, element_size_log2, size};
518 TRACE("> MemoryContentTable: will insert " << mem
519 << " with value=" << value);
520 auto existing_key = all_keys_.find(mem);
521 if (existing_key != all_keys_.end()) {
522 TRACE(">> Reusing existing key");
523 Set(existing_key->second, value);
524 return;
525 }
526
527 if (all_keys_.size() > kMaxKeys) {
528 TRACE(">> Bailing out because too many keys");
529 return;
530 }
531
532 // Creating a new key.
533 Key key = NewKey({mem});
534 all_keys_.insert({mem, key});
535 Set(key, value);
536 }
537
539 uint8_t element_size_log2, uint8_t size, OpIndex value) {
541
542 MemoryAddress mem{base, index, offset, element_size_log2, size};
543 TRACE("> MemoryContentTable: will insert immutable "
544 << mem << " with value=" << value);
545 auto existing_key = all_keys_.find(mem);
546 if (existing_key != all_keys_.end()) {
547 TRACE(">> Reusing existing key");
548 SetNoNotify(existing_key->second, value);
549 return;
550 }
551
552 if (all_keys_.size() > kMaxKeys) {
553 TRACE(">> Bailing out because too many keys");
554 return;
555 }
556
557 // Creating a new key.
558 Key key = NewKey({mem});
559 all_keys_.insert({mem, key});
560 // Call `SetNoNotify` to avoid calls to `OnNewKey` and `OnValueChanged`.
561 SetNoNotify(key, value);
562 }
563
564 void InvalidateAtOffset(int32_t offset, OpIndex base) {
565 MapMaskAndOr base_maps = object_maps_.Get(base);
566 auto offset_keys = offset_keys_.find(offset);
567 if (offset_keys == offset_keys_.end()) return;
568 for (auto it = offset_keys->second.begin();
569 it != offset_keys->second.end();) {
570 Key key = *it;
571 DCHECK_EQ(offset, key.data().mem.offset);
572 // It can overwrite previous stores to any base (except non-aliasing
573 // ones).
574 if (non_aliasing_objects_.Get(key.data().mem.base)) {
575 ++it;
576 continue;
577 }
578 MapMaskAndOr this_maps = key.data().mem.base == base
579 ? base_maps
580 : object_maps_.Get(key.data().mem.base);
581 if (!is_empty(base_maps) && !is_empty(this_maps) &&
582 !CouldHaveSameMap(base_maps, this_maps)) {
583 TRACE(">>>> InvalidateAtOffset: not invalidating thanks for maps: "
584 << key.data().mem);
585 ++it;
586 continue;
587 }
588 it = offset_keys->second.RemoveAt(it);
589 TRACE(">>>> InvalidateAtOffset: invalidating " << key.data().mem);
591 }
592 }
593
595 while (replacements_[base].IsLoadElimination()) {
596 base = replacements_[base].replacement();
597 }
598 return base;
599 }
600
602 // Inserting in {base_keys_}.
603 OpIndex base = key.data().mem.base;
604 auto base_keys = base_keys_.find(base);
605 if (base_keys != base_keys_.end()) {
606 if (key.data().mem.index.valid()) {
607 base_keys->second.with_indices.PushFront(key);
608 } else {
609 base_keys->second.with_offsets.PushFront(key);
610 }
611 } else {
613 if (key.data().mem.index.valid()) {
614 data.with_indices.PushFront(key);
615 } else {
616 data.with_offsets.PushFront(key);
617 }
618 base_keys_.insert({base, std::move(data)});
619 }
620
621 if (key.data().mem.index.valid()) {
622 // Inserting in {index_keys_}.
623 index_keys_.PushFront(key);
624 } else {
625 // Inserting in {offset_keys_}.
626 int offset = key.data().mem.offset;
627 auto offset_keys = offset_keys_.find(offset);
628 if (offset_keys != offset_keys_.end()) {
629 offset_keys->second.PushFront(key);
630 } else {
632 list.PushFront(key);
633 offset_keys_.insert({offset, std::move(list)});
634 }
635 }
636 }
637
643
647
648 // A map containing all of the keys, for fast lookup of a specific
649 // MemoryAddress.
651 // Map from base OpIndex to keys associated with this base.
653 // Map from offsets to keys associated with this offset.
656
657 // List of all of the keys that have a valid index.
659};
660
662 public:
665 using AliasSnapshot = AliasTable::Snapshot;
666
669 using MapSnapshot = MapTable::Snapshot;
670
672 using MemorySnapshot = MemoryContentTable::Snapshot;
673
675
676 enum class RawBaseAssumption {
677 kNoInnerPointer,
678 kMaybeInnerPointer,
679 };
680
683 RawBaseAssumption raw_base_assumption)
684 : data_(data),
685 graph_(graph),
686 phase_zone_(phase_zone),
688 raw_base_assumption_(raw_base_assumption),
689 replacements_(graph.op_id_count(), phase_zone, &graph),
690 non_aliasing_objects_(phase_zone),
691 object_maps_(phase_zone),
692 memory_(phase_zone, non_aliasing_objects_, object_maps_, replacements_),
693 block_to_snapshot_mapping_(graph.block_count(), phase_zone),
694 predecessor_alias_snapshots_(phase_zone),
695 predecessor_maps_snapshots_(phase_zone),
696 predecessor_memory_snapshots_(phase_zone) {
697 USE(data_);
698 }
699
700 void Run();
701
702 Replacement GetReplacement(OpIndex index) { return replacements_[index]; }
703
704 private:
705 void ProcessBlock(const Block& block, bool compute_start_snapshot);
706 void ProcessLoad(OpIndex op_idx, const LoadOp& op);
707 void ProcessStore(OpIndex op_idx, const StoreOp& op);
708 void ProcessAllocate(OpIndex op_idx, const AllocateOp& op);
709 void ProcessCall(OpIndex op_idx, const CallOp& op);
710 void ProcessAssumeMap(OpIndex op_idx, const AssumeMapOp& op);
711 void ProcessChange(OpIndex op_idx, const ChangeOp& change);
712
713 void DcheckWordBinop(OpIndex op_idx, const WordBinopOp& binop);
714
715 // BeginBlock initializes the various SnapshotTables for {block}, and returns
716 // true if {block} is a loop that should be revisited.
717 template <bool for_loop_revisit = false>
718 bool BeginBlock(const Block* block);
719 void FinishBlock(const Block* block);
720 // Seals the current snapshot, but discards it. This is used when considering
721 // whether a loop should be revisited or not: we recompute the loop header's
722 // snapshots, and then revisit the loop if the snapshots contain
723 // modifications. If the snapshots are unchanged, we discard them and don't
724 // revisit the loop.
725 void SealAndDiscard();
726 void StoreLoopSnapshotInForwardPredecessor(const Block& loop_header);
727
728 // Returns true if the loop's backedge already has snapshot data (meaning that
729 // it was already visited).
730 bool BackedgeHasSnapshot(const Block& loop_header) const;
731
732 void InvalidateAllNonAliasingInputs(const Operation& op);
733 void InvalidateIfAlias(OpIndex op_idx);
734
740
741#if V8_ENABLE_WEBASSEMBLY
742 bool is_wasm_ = data_->is_wasm();
743#endif
744
746 // We map: Load-index -> Change-index -> Bitcast-index
747 std::map<OpIndex, base::SmallMap<std::map<OpIndex, OpIndex>, 4>>
749
750 // TODO(dmercadier): {non_aliasing_objects_} tends to be weak for
751 // backing-stores, because they are often stored into an object right after
752 // being created, and often don't have other aliases throughout their
753 // lifetime. It would be more useful to have a more precise tracking of
754 // aliases. Storing a non-aliasing object into a potentially-aliasing one
755 // probably always means that the former becomes potentially-aliasing.
756 // However, storing a non-aliasing object into another non-aliasing object
757 // should be reasonably not-too-hard to track.
761
768
769 // {predecessor_alias_napshots_}, {predecessor_maps_snapshots_} and
770 // {predecessor_memory_snapshots_} are used as temporary vectors when starting
771 // to process a block. We store them as members to avoid reallocation.
775};
776
777template <class Next>
779 public:
780 TURBOSHAFT_REDUCER_BOILERPLATE(LateLoadElimination)
782
783 void Analyze() {
784 if (is_wasm_ || v8_flags.turboshaft_load_elimination) {
785 DCHECK(AllowHandleDereference::IsAllowed());
786 analyzer_.Run();
787 }
788 Next::Analyze();
789 }
790
791 OpIndex REDUCE_INPUT_GRAPH(Load)(OpIndex ig_index, const LoadOp& load) {
792 if (is_wasm_ || v8_flags.turboshaft_load_elimination) {
793 Replacement replacement = analyzer_.GetReplacement(ig_index);
794 if (replacement.IsLoadElimination()) {
795 OpIndex replacement_ig_index = replacement.replacement();
796 OpIndex replacement_idx = Asm().MapToNewGraph(replacement_ig_index);
797 // The replacement might itself be a load that int32-truncated.
798 if (analyzer_.GetReplacement(replacement_ig_index)
799 .IsTaggedLoadToInt32Load()) {
800 DCHECK_EQ(Asm().output_graph().Get(replacement_idx).outputs_rep()[0],
801 RegisterRepresentation::Word32());
802 } else {
803 DCHECK(Asm()
804 .output_graph()
805 .Get(replacement_idx)
806 .outputs_rep()[0]
807 .AllowImplicitRepresentationChangeTo(
808 load.outputs_rep()[0],
809 Asm().output_graph().IsCreatedFromTurbofan()));
810 }
811 return replacement_idx;
812 } else if (replacement.IsTaggedLoadToInt32Load()) {
813 auto loaded_rep = load.loaded_rep;
814 auto result_rep = load.result_rep;
815 DCHECK_EQ(result_rep, RegisterRepresentation::Tagged());
816 loaded_rep = MemoryRepresentation::Int32();
817 result_rep = RegisterRepresentation::Word32();
818 return Asm().Load(Asm().MapToNewGraph(load.base()),
819 Asm().MapToNewGraph(load.index()), load.kind,
820 loaded_rep, result_rep, load.offset,
821 load.element_size_log2);
822 }
823 }
824 return Next::ReduceInputGraphLoad(ig_index, load);
825 }
826
827 OpIndex REDUCE_INPUT_GRAPH(Change)(OpIndex ig_index, const ChangeOp& change) {
828 if (is_wasm_ || v8_flags.turboshaft_load_elimination) {
829 Replacement replacement = analyzer_.GetReplacement(ig_index);
830 if (replacement.IsInt32TruncationElimination()) {
831 DCHECK(
832 IsInt32TruncatedLoadPattern(Asm().input_graph(), ig_index, change));
833 return Asm().MapToNewGraph(replacement.replacement());
834 }
835 }
836 return Next::ReduceInputGraphChange(ig_index, change);
837 }
838
839 OpIndex REDUCE_INPUT_GRAPH(TaggedBitcast)(OpIndex ig_index,
840 const TaggedBitcastOp& bitcast) {
841 if (is_wasm_ || v8_flags.turboshaft_load_elimination) {
842 Replacement replacement = analyzer_.GetReplacement(ig_index);
843 if (replacement.IsTaggedBitcastElimination()) {
844 return OpIndex::Invalid();
845 }
846 }
847 return Next::ReduceInputGraphTaggedBitcast(ig_index, bitcast);
848 }
849
851 // AssumeMaps are currently not used after Load Elimination. We thus remove
852 // them now. If they ever become needed for later optimizations, we could
853 // consider leaving them in the graph and just ignoring them in the
854 // Instruction Selector.
855 return {};
856 }
857
858 private:
859 const bool is_wasm_ = __ data() -> is_wasm();
861 RawBaseAssumption raw_base_assumption_ =
862 __ data() -> pipeline_kind() == TurboshaftPipelineKind::kCSA
863 ? RawBaseAssumption::kMaybeInnerPointer
864 : RawBaseAssumption::kNoInnerPointer;
865 LateLoadEliminationAnalyzer analyzer_{__ data(), __ modifiable_input_graph(),
866 __ phase_zone(), __ data()->broker(),
867 raw_base_assumption_};
868};
869
870#undef TRACE
871
873
874} // namespace v8::internal::compiler::turboshaft
875
876#endif // V8_COMPILER_TURBOSHAFT_LATE_LOAD_ELIMINATION_REDUCER_H_
#define REDUCE(operation)
#define REDUCE_INPUT_GRAPH(operation)
uint8_t data_[MAX_STACK_LENGTH]
union v8::internal::@341::BuiltinMetadata::KindSpecificData data
Builtins::Kind kind
Definition builtins.cc:40
LateLoadEliminationAnalyzer(PipelineData *data, Graph &graph, Zone *phase_zone, JSHeapBroker *broker, RawBaseAssumption raw_base_assumption)
std::map< OpIndex, base::SmallMap< std::map< OpIndex, OpIndex >, 4 > > int32_truncated_loads_
FixedBlockSidetable< std::optional< Snapshot > > block_to_snapshot_mapping_
static LoadEliminationReplacement LoadElimination(OpIndex replacement)
static LoadEliminationReplacement Int32TruncationElimination(OpIndex replacement)
void OnValueChange(Key key, OpIndex old_value, OpIndex new_value)
void Insert(OpIndex base, OptionalOpIndex index, int32_t offset, uint8_t element_size_log2, uint8_t size, OpIndex value)
ZoneAbslFlatHashMap< int, v8::base::DoublyThreadedList< Key, OffsetListTraits > > offset_keys_
void InsertImmutable(OpIndex base, OptionalOpIndex index, int32_t offset, uint8_t element_size_log2, uint8_t size, OpIndex value)
MemoryContentTable(Zone *zone, SparseOpIndexSnapshotTable< bool > &non_aliasing_objects, SparseOpIndexSnapshotTable< MapMaskAndOr > &object_maps, FixedOpIndexSidetable< Replacement > &replacements)
v8::base::DoublyThreadedList< Key, OffsetListTraits > index_keys_
void Invalidate(OpIndex base, OptionalOpIndex index, int32_t offset)
static constexpr OpIndex Invalid()
Definition index.h:88
JSHeapBroker *const broker_
#define TURBOSHAFT_REDUCER_BOILERPLATE(Name)
Definition assembler.h:823
bool is_empty
Definition sweeper.cc:229
TurboshaftPipelineKind pipeline_kind
JSHeapBroker * broker
OptionalOpIndex index
int32_t offset
#define TRACE(...)
MapMaskAndOr ComputeMinMaxHash(ZoneRefSet< Map > maps)
std::ostream & operator<<(std::ostream &os, PaddingSpace padding)
V8_INLINE const Operation & Get(const Graph &graph, OpIndex index)
Definition graph.h:1231
MapMaskAndOr CombineMinMax(MapMaskAndOr a, MapMaskAndOr b)
V8_INLINE size_t fast_hash_combine()
Definition fast-hash.h:19
V8_INLINE size_t hash_value(OpIndex op)
Definition index.h:773
bool IsInt32TruncatedLoadPattern(const Graph &graph, OpIndex change_idx, const ChangeOp &change, OpIndex *bitcast_idx, OpIndex *load_idx)
bool CouldHaveSameMap(MapMaskAndOr a, MapMaskAndOr b)
constexpr int H
void Print(Tagged< Object > obj)
Definition objects.h:774
V8_EXPORT_PRIVATE FlagValues v8_flags
#define DCHECK_NE(v1, v2)
Definition logging.h:486
#define DCHECK(condition)
Definition logging.h:482
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
#define USE(...)
Definition macros.h:293
#define V8_EXPORT_PRIVATE
Definition macros.h:460
v8::base::DoublyThreadedList< Key, BaseListTraits > with_offsets
v8::base::DoublyThreadedList< Key, BaseListTraits > with_indices
TFGraph * graph_