v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
wasm-load-elimination-reducer.h
Go to the documentation of this file.
1// Copyright 2023 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_COMPILER_TURBOSHAFT_WASM_LOAD_ELIMINATION_REDUCER_H_
6#define V8_COMPILER_TURBOSHAFT_WASM_LOAD_ELIMINATION_REDUCER_H_
7
8#include <optional>
9
10#if !V8_ENABLE_WEBASSEMBLY
11#error This header should only be included if WebAssembly is enabled.
12#endif // !V8_ENABLE_WEBASSEMBLY
13
23#include "src/zone/zone.h"
24
26
28
29// WLE is short for Wasm Load Elimination.
30// We need the namespace because we reuse class names below that also exist
31// in the LateLoadEliminationReducer, and in the same namespace that'd be
32// an ODR violation, i.e. Undefined Behavior.
33// TODO(jkummerow): Refactor the two Load Elimination implementations to
34// reuse commonalities.
35namespace wle {
36
37// We model array length and string canonicalization as fields at negative
38// indices.
39static constexpr int kArrayLengthFieldIndex = -1;
40static constexpr int kStringPrepareForGetCodeunitIndex = -2;
41static constexpr int kStringAsWtf16Index = -3;
42static constexpr int kAnyConvertExternIndex = -4;
43static constexpr int kAssertNotNullIndex = -5;
44
45// All "load-like" special cases use the same fake size and type. The specific
46// values we use don't matter; for accurate alias analysis, the type should
47// be "unrelated" to any struct type.
49static constexpr int kLoadLikeSize = 4; // Chosen by fair dice roll.
50
53 int32_t offset;
55 uint8_t size;
57
58 bool operator==(const WasmMemoryAddress& other) const {
59 return base == other.base && offset == other.offset &&
60 type_index == other.type_index && size == other.size &&
61 mutability == other.mutability;
62 }
63};
64
65inline size_t hash_value(WasmMemoryAddress const& mem) {
66 return fast_hash_combine(mem.base, mem.offset, mem.type_index, mem.size,
67 mem.mutability);
68}
69
70struct KeyData {
73 // Pointers to the previous and the next Keys at the same base.
74 Key* prev_same_base = nullptr;
76 // Pointers to either the next/previous Keys at the same offset.
79};
80
83 static T** prev(T t) { return &(t.data().prev_same_offset); }
84 static T* next(T t) { return &(t.data().next_same_offset); }
85 static bool non_empty(T t) { return t.valid(); }
86};
87
90 static T** prev(T t) { return &(t.data().prev_same_base); }
91 static T* next(T t) { return &(t.data().next_same_base); }
92 static bool non_empty(T t) { return t.valid(); }
93};
94
95struct BaseData {
97 // List of every value at this base that has an offset rather than an index.
99};
100
102 : public ChangeTrackingSnapshotTable<WasmMemoryContentTable, OpIndex,
103 KeyData> {
104 public:
106
108 PipelineData* data, Zone* zone,
109 SparseOpIndexSnapshotTable<bool>& non_aliasing_objects,
110 FixedOpIndexSidetable<OpIndex>& replacements, Graph& graph)
112 non_aliasing_objects_(non_aliasing_objects),
113 replacements_(replacements),
114 data_(data),
115 graph_(graph),
116 all_keys_(zone),
117 base_keys_(zone),
118 offset_keys_(zone) {}
119
120 void OnNewKey(Key key, OpIndex value) {
121 if (value.valid()) {
123 }
124 }
125
126 void OnValueChange(Key key, OpIndex old_value, OpIndex new_value) {
127 DCHECK_NE(old_value, new_value);
128 if (old_value.valid() && !new_value.valid()) {
130 } else if (new_value.valid() && !old_value.valid()) {
132 } else {
133 DCHECK_EQ(new_value.valid(), old_value.valid());
134 }
135 }
136
138 wasm::ModuleTypeIndex type2) {
140 module_->heap_type(type1), module_->heap_type(type2), module_, module_);
141 }
142
143 void Invalidate(const StructSetOp& set) {
144 // This is like LateLoadElimination's {InvalidateAtOffset}, but based
145 // on Wasm types instead of tracked JS maps.
146 int offset = field_offset(set.type, set.field_index);
147 auto offset_keys = offset_keys_.find(offset);
148 if (offset_keys == offset_keys_.end()) return;
149 for (auto it = offset_keys->second.begin();
150 it != offset_keys->second.end();) {
151 Key key = *it;
152 DCHECK_EQ(offset, key.data().mem.offset);
153 OpIndex base = key.data().mem.base;
154
155 // If the base is guaranteed non-aliasing, we don't need to clear any
156 // other entries. Any existing entry for this base will be overwritten
157 // by {Insert(set)}.
159 ++it;
160 continue;
161 }
162
163 if (TypesUnrelated(set.type_index, key.data().mem.type_index)) {
164 ++it;
165 continue;
166 }
167
168 it = offset_keys->second.RemoveAt(it);
170 }
171 }
172
173 // Invalidates all Keys that are not known as non-aliasing.
175 template <EntriesWithOffsets offsets = EntriesWithOffsets::kInvalidate>
177 // We find current active keys through {base_keys_} so that we can bail out
178 // for whole buckets non-aliasing buckets (if we had gone through
179 // {offset_keys_} instead, then for each key we would've had to check
180 // whether it was non-aliasing or not).
181 for (auto& base_keys : base_keys_) {
182 OpIndex base = base_keys.first;
183 if (non_aliasing_objects_.Get(base)) continue;
184 if constexpr (offsets == EntriesWithOffsets::kInvalidate) {
185 for (auto it = base_keys.second.with_offsets.begin();
186 it != base_keys.second.with_offsets.end();) {
187 Key key = *it;
188 if (key.data().mem.mutability == false) {
189 ++it;
190 continue;
191 }
192 // It's important to remove with RemoveAt before Setting the key to
193 // invalid, otherwise OnKeyChange will remove {key} from {base_keys},
194 // which will invalidate {it}.
195 it = base_keys.second.with_offsets.RemoveAt(it);
197 }
198 }
199 }
200 }
201
202 // TODO(jkummerow): Move this to the WasmStruct class?
203 int field_offset(const wasm::StructType* type, int field_index) {
204 return WasmStruct::kHeaderSize + type->field_offset(field_index);
205 }
206
207 OpIndex Find(const StructGetOp& get) {
208 int32_t offset = field_offset(get.type, get.field_index);
209 uint8_t size = get.type->field(get.field_index).value_kind_size();
210 bool mutability = get.type->mutability(get.field_index);
211 return FindImpl(ResolveBase(get.object()), offset, get.type_index, size,
212 mutability);
213 }
214
215 bool HasValueWithIncorrectMutability(const StructSetOp& set) {
216 int32_t offset = field_offset(set.type, set.field_index);
217 uint8_t size = set.type->field(set.field_index).value_kind_size();
218 bool mutability = set.type->mutability(set.field_index);
219 WasmMemoryAddress mem{ResolveBase(set.object()), offset, set.type_index,
220 size, !mutability};
221 return all_keys_.find(mem) != all_keys_.end();
222 }
223
224 OpIndex FindLoadLike(OpIndex op_idx, int offset_sentinel) {
225 static constexpr bool mutability = false;
226 return FindImpl(ResolveBase(op_idx), offset_sentinel, kLoadLikeType,
227 kLoadLikeSize, mutability);
228 }
229
231 uint8_t size, bool mutability,
233 WasmMemoryAddress mem{object, offset, type_index, size, mutability};
234 auto key = all_keys_.find(mem);
235 if (key == all_keys_.end()) return OpIndex::Invalid();
236 return Get(key->second);
237 }
238
239 void Insert(const StructSetOp& set) {
240 OpIndex base = ResolveBase(set.object());
241 int32_t offset = field_offset(set.type, set.field_index);
242 uint8_t size = set.type->field(set.field_index).value_kind_size();
243 bool mutability = set.type->mutability(set.field_index);
244 Insert(base, offset, set.type_index, size, mutability, set.value());
245 }
246
247 void Insert(const StructGetOp& get, OpIndex get_idx) {
248 OpIndex base = ResolveBase(get.object());
249 int32_t offset = field_offset(get.type, get.field_index);
250 uint8_t size = get.type->field(get.field_index).value_kind_size();
251 bool mutability = get.type->mutability(get.field_index);
252 Insert(base, offset, get.type_index, size, mutability, get_idx);
253 }
254
255 void InsertLoadLike(OpIndex base_idx, int offset_sentinel,
256 OpIndex value_idx) {
257 OpIndex base = ResolveBase(base_idx);
258 static constexpr bool mutability = false;
259 Insert(base, offset_sentinel, kLoadLikeType, kLoadLikeSize, mutability,
260 value_idx);
261 }
262
263#ifdef DEBUG
264 void Print() {
265 std::cout << "WasmMemoryContentTable:\n";
266 for (const auto& base_keys : base_keys_) {
267 for (Key key : base_keys.second.with_offsets) {
268 std::cout << " * " << key.data().mem.base << " - "
269 << key.data().mem.offset << " ==> " << Get(key) << "\n";
270 }
271 }
272 }
273#endif // DEBUG
274
275 private:
276 void Insert(OpIndex base, int32_t offset, wasm::ModuleTypeIndex type_index,
277 uint8_t size, bool mutability, OpIndex value) {
279
280 WasmMemoryAddress mem{base, offset, type_index, size, mutability};
281 auto existing_key = all_keys_.find(mem);
282 if (existing_key != all_keys_.end()) {
283 if (mutability) {
284 Set(existing_key->second, value);
285 } else {
286 SetNoNotify(existing_key->second, value);
287 }
288 return;
289 }
290
291 // Creating a new key.
292 Key key = NewKey({mem});
293 all_keys_.insert({mem, key});
294 if (mutability) {
295 Set(key, value);
296 } else {
297 // Call `SetNoNotify` to avoid calls to `OnNewKey` and `OnValueChanged`.
298 SetNoNotify(key, value);
299 }
300 }
301
302 public:
304 while (true) {
307 continue;
308 }
309 Operation& op = graph_.Get(base);
310 if (AssertNotNullOp* check = op.TryCast<AssertNotNullOp>()) {
311 base = check->object();
312 continue;
313 }
314 if (WasmTypeCastOp* cast = op.TryCast<WasmTypeCastOp>()) {
315 base = cast->object();
316 continue;
317 }
318 break; // Terminate if nothing happened.
319 }
320 return base;
321 }
322
324 // Inserting in {base_keys_}.
325 OpIndex base = key.data().mem.base;
326 auto base_keys = base_keys_.find(base);
327 if (base_keys != base_keys_.end()) {
328 base_keys->second.with_offsets.PushFront(key);
329 } else {
331 data.with_offsets.PushFront(key);
332 base_keys_.insert({base, std::move(data)});
333 }
334
335 // Inserting in {offset_keys_}.
336 int offset = key.data().mem.offset;
337 auto offset_keys = offset_keys_.find(offset);
338 if (offset_keys != offset_keys_.end()) {
339 offset_keys->second.PushFront(key);
340 } else {
342 list.PushFront(key);
343 offset_keys_.insert({offset, std::move(list)});
344 }
345 }
346
352
355
358
359 const wasm::WasmModule* module_ = data_->wasm_module();
360
361 // TODO(dmercadier): consider using a faster datastructure than
362 // ZoneUnorderedMap for {all_keys_}, {base_keys_} and {offset_keys_}.
363
364 // A map containing all of the keys, for fast lookup of a specific
365 // MemoryAddress.
367 // Map from base OpIndex to keys associated with this base.
369 // Map from offsets to keys associated with this offset.
372};
373
374} // namespace wle
375
377 public:
380 using AliasSnapshot = AliasTable::Snapshot;
381
383 using MemorySnapshot = wle::WasmMemoryContentTable::Snapshot;
384
395
396 void Run() {
397 LoopFinder loop_finder(phase_zone_, &graph_);
398 AnalyzerIterator iterator(phase_zone_, graph_, loop_finder);
399
400 bool compute_start_snapshot = true;
401 while (iterator.HasNext()) {
402 const Block* block = iterator.Next();
403
404 ProcessBlock(*block, compute_start_snapshot);
405 compute_start_snapshot = true;
406
407 // Consider re-processing for loops.
408 if (const GotoOp* last = block->LastOperation(graph_).TryCast<GotoOp>()) {
409 if (last->destination->IsLoop() &&
410 last->destination->LastPredecessor() == block) {
411 const Block* loop_header = last->destination;
412 // {block} is the backedge of a loop. We recompute the loop header's
413 // initial snapshots, and if they differ from its original snapshot,
414 // then we revisit the loop.
415 if (BeginBlock<true>(loop_header)) {
416 // We set the snapshot of the loop's 1st predecessor to the newly
417 // computed snapshot. It's not quite correct, but this predecessor
418 // is guaranteed to end with a Goto, and we are now visiting the
419 // loop, which means that we don't really care about this
420 // predecessor anymore.
421 // The reason for saving this snapshot is to prevent infinite
422 // looping, since the next time we reach this point, the backedge
423 // snapshot could still invalidate things from the forward edge
424 // snapshot. By restricting the forward edge snapshot, we prevent
425 // this.
426 const Block* loop_1st_pred =
427 loop_header->LastPredecessor()->NeighboringPredecessor();
428 FinishBlock(loop_1st_pred);
429 // And we start a new fresh snapshot from this predecessor.
430 auto pred_snapshots =
431 block_to_snapshot_mapping_[loop_1st_pred->index()];
433 pred_snapshots->alias_snapshot);
434 memory_.StartNewSnapshot(pred_snapshots->memory_snapshot);
435
436 iterator.MarkLoopForRevisit();
437 compute_start_snapshot = false;
438 } else {
440 }
441 }
442 }
443 }
444 }
445
447
448 private:
449 void ProcessBlock(const Block& block, bool compute_start_snapshot);
450 void ProcessStructGet(OpIndex op_idx, const StructGetOp& op);
451 void ProcessStructSet(OpIndex op_idx, const StructSetOp& op);
452 void ProcessArrayLength(OpIndex op_idx, const ArrayLengthOp& op);
453 void ProcessWasmAllocateArray(OpIndex op_idx, const WasmAllocateArrayOp& op);
454 void ProcessStringAsWtf16(OpIndex op_idx, const StringAsWtf16Op& op);
456 OpIndex op_idx, const StringPrepareForGetCodeUnitOp& op);
457 void ProcessAnyConvertExtern(OpIndex op_idx, const AnyConvertExternOp& op);
458 void ProcessAssertNotNull(OpIndex op_idx, const AssertNotNullOp& op);
459 void ProcessAllocate(OpIndex op_idx, const AllocateOp& op);
460 void ProcessCall(OpIndex op_idx, const CallOp& op);
461 void ProcessPhi(OpIndex op_idx, const PhiOp& op);
462
463 void DcheckWordBinop(OpIndex op_idx, const WordBinopOp& binop);
464
465 // BeginBlock initializes the various SnapshotTables for {block}, and returns
466 // true if {block} is a loop that should be revisited.
467 template <bool for_loop_revisit = false>
468 bool BeginBlock(const Block* block);
469 void FinishBlock(const Block* block);
470 // Seals the current snapshot, but discards it. This is used when considering
471 // whether a loop should be revisited or not: we recompute the loop header's
472 // snapshots, and then revisit the loop if the snapshots contain
473 // modifications. If the snapshots are unchanged, we discard them and don't
474 // revisit the loop.
475 void SealAndDiscard();
476 void StoreLoopSnapshotInForwardPredecessor(const Block& loop_header);
477
478 // Returns true if the loop's backedge already has snapshot data (meaning that
479 // it was already visited).
480 bool BackedgeHasSnapshot(const Block& loop_header) const;
481
483 void InvalidateIfAlias(OpIndex op_idx);
484
487
489
492
498
499 // {predecessor_alias_napshots_}, {predecessor_maps_snapshots_} and
500 // {predecessor_memory_snapshots_} are used as temporary vectors when starting
501 // to process a block. We store them as members to avoid reallocation.
504};
505
506template <class Next>
507class WasmLoadEliminationReducer : public Next {
508 public:
510
511 void Analyze() {
512 if (v8_flags.turboshaft_wasm_load_elimination) {
513 DCHECK(AllowHandleDereference::IsAllowed());
514 analyzer_.Run();
515 }
516 Next::Analyze();
517 }
518
519#define EMIT_OP(Name) \
520 OpIndex REDUCE_INPUT_GRAPH(Name)(OpIndex ig_index, const Name##Op& op) { \
521 if (v8_flags.turboshaft_wasm_load_elimination) { \
522 OpIndex ig_replacement_index = analyzer_.Replacement(ig_index); \
523 if (ig_replacement_index.valid()) { \
524 OpIndex replacement = Asm().MapToNewGraph(ig_replacement_index); \
525 return replacement; \
526 } \
527 } \
528 return Next::ReduceInputGraph##Name(ig_index, op); \
529 }
530
531 EMIT_OP(StructGet)
532 EMIT_OP(ArrayLength)
533 EMIT_OP(StringAsWtf16)
534 EMIT_OP(StringPrepareForGetCodeUnit)
535 EMIT_OP(AnyConvertExtern)
536
538 const StructSetOp& op) {
539 if (v8_flags.turboshaft_wasm_load_elimination) {
540 OpIndex ig_replacement_index = analyzer_.Replacement(ig_index);
541 if (ig_replacement_index.valid()) {
542 // For struct.set, "replace with itself" is a sentinel for
543 // "unreachable", and those are the only replacements we schedule for
544 // this operation.
545 DCHECK_EQ(ig_replacement_index, ig_index);
546 __ Unreachable();
547 return OpIndex::Invalid();
548 }
549 }
550 return Next::ReduceInputGraphStructSet(ig_index, op);
551 }
552
553 private:
555 Asm().data(), Asm().modifiable_input_graph(), Asm().phase_zone()};
556};
557
558void WasmLoadEliminationAnalyzer::ProcessBlock(const Block& block,
559 bool compute_start_snapshot) {
560 if (compute_start_snapshot) {
561 BeginBlock(&block);
562 }
563 if (block.IsLoop() && BackedgeHasSnapshot(block)) {
564 // Update the associated snapshot for the forward edge with the merged
565 // snapshot information from the forward- and backward edge.
566 // This will make sure that when evaluating whether a loop needs to be
567 // revisited, the inner loop compares the merged state with the backedge
568 // preventing us from exponential revisits for loops where the backedge
569 // invalidates loads which are eliminatable on the forward edge.
570 StoreLoopSnapshotInForwardPredecessor(block);
571 }
572
573 for (OpIndex op_idx : graph_.OperationIndices(block)) {
574 Operation& op = graph_.Get(op_idx);
575 if (ShouldSkipOptimizationStep()) continue;
576 if (ShouldSkipOperation(op)) continue;
577 switch (op.opcode) {
578 case Opcode::kStructGet:
579 ProcessStructGet(op_idx, op.Cast<StructGetOp>());
580 break;
581 case Opcode::kStructSet:
582 ProcessStructSet(op_idx, op.Cast<StructSetOp>());
583 break;
584 case Opcode::kArrayLength:
585 ProcessArrayLength(op_idx, op.Cast<ArrayLengthOp>());
586 break;
587 case Opcode::kWasmAllocateArray:
588 ProcessWasmAllocateArray(op_idx, op.Cast<WasmAllocateArrayOp>());
589 break;
590 case Opcode::kStringAsWtf16:
591 ProcessStringAsWtf16(op_idx, op.Cast<StringAsWtf16Op>());
592 break;
593 case Opcode::kStringPrepareForGetCodeUnit:
594 ProcessStringPrepareForGetCodeUnit(
595 op_idx, op.Cast<StringPrepareForGetCodeUnitOp>());
596 break;
597 case Opcode::kAnyConvertExtern:
598 ProcessAnyConvertExtern(op_idx, op.Cast<AnyConvertExternOp>());
599 break;
600 case Opcode::kAssertNotNull:
601 // TODO(14108): We'll probably want to handle WasmTypeCast as
602 // a "load-like" instruction too, to eliminate repeated casts.
603 ProcessAssertNotNull(op_idx, op.Cast<AssertNotNullOp>());
604 break;
605 case Opcode::kArraySet:
606 break;
607 case Opcode::kAllocate:
608 // Create new non-alias.
609 ProcessAllocate(op_idx, op.Cast<AllocateOp>());
610 break;
611 case Opcode::kCall:
612 // Invalidate state (+ maybe invalidate aliases).
613 ProcessCall(op_idx, op.Cast<CallOp>());
614 break;
615 case Opcode::kPhi:
616 // Invalidate aliases.
617 ProcessPhi(op_idx, op.Cast<PhiOp>());
618 break;
619 case Opcode::kLoad:
620 // Atomic loads have the "can_write" bit set, because they make
621 // writes on other threads visible. At any rate, we have to
622 // explicitly skip them here.
623 case Opcode::kStore:
624 // We rely on having no raw "Store" operations operating on Wasm
625 // objects at this point in the pipeline.
626 // TODO(jkummerow): Is there any way to DCHECK that?
627 case Opcode::kAssumeMap:
628 case Opcode::kCatchBlockBegin:
629 case Opcode::kRetain:
630 case Opcode::kDidntThrow:
631 case Opcode::kCheckException:
632 case Opcode::kAtomicRMW:
633 case Opcode::kAtomicWord32Pair:
634 case Opcode::kMemoryBarrier:
635 case Opcode::kJSStackCheck:
636 case Opcode::kWasmStackCheck:
637 case Opcode::kSimd128LaneMemory:
638 case Opcode::kGlobalSet:
639 case Opcode::kParameter:
640 case Opcode::kSetStackPointer:
641 // We explicitly break for those operations that have can_write effects
642 // but don't actually write, or cannot interfere with load elimination.
643 break;
644
645 case Opcode::kWordBinop:
646 // A WordBinop should never invalidate aliases (since the only time when
647 // it should take a non-aliasing object as input is for Smi checks).
648 DcheckWordBinop(op_idx, op.Cast<WordBinopOp>());
649 break;
650
651 case Opcode::kFrameState:
652 case Opcode::kDeoptimizeIf:
653 case Opcode::kComparison:
654 case Opcode::kTrapIf:
655 // We explicitly break for these opcodes so that we don't call
656 // InvalidateAllNonAliasingInputs on their inputs, since they don't
657 // really create aliases. (and also, they don't write so it's
658 // fine to break)
659 DCHECK(!op.Effects().can_write());
660 break;
661
662 case Opcode::kDeoptimize:
663 case Opcode::kReturn:
664 // We explicitly break for these opcodes so that we don't call
665 // InvalidateAllNonAliasingInputs on their inputs, since they are block
666 // terminators without successors, meaning that it's not useful for the
667 // rest of the analysis to invalidate anything here.
668 DCHECK(op.IsBlockTerminator() && SuccessorBlocks(op).empty());
669 break;
670
671 default:
672 // Operations that `can_write` should invalidate the state. All such
673 // operations should be already handled above, which means that we don't
674 // need a `if (can_write) { Invalidate(); }` here.
675 CHECK(!op.Effects().can_write());
676
677 // Even if the operation doesn't write, it could create an alias to its
678 // input by returning it. This happens for instance in Phis and in
679 // Change (although ChangeOp is already handled earlier by calling
680 // ProcessChange). We are conservative here by calling
681 // InvalidateAllNonAliasingInputs for all operations even though only
682 // few can actually create aliases to fresh allocations, the reason
683 // being that missing such a case would be a security issue, and it
684 // should be rare for fresh allocations to be used outside of
685 // Call/Store/Load/Change anyways.
686 InvalidateAllNonAliasingInputs(op);
687
688 break;
689 }
690 }
691
692 FinishBlock(&block);
693}
694
695// Returns true if replacing a load with a RegisterRepresentation
696// {expected_reg_rep} and size {in_memory_size} with an
697// operation with RegisterRepresentation {actual} is valid. For instance,
698// replacing an operation that returns a Float64 by one that returns a Word64 is
699// not valid. Similarly, replacing a Tagged with an untagged value is probably
700// not valid because of the GC.
701
703 RegisterRepresentation expected_reg_repr,
704 uint8_t in_memory_size) {
705 if (in_memory_size !=
706 MemoryRepresentation::FromRegisterRepresentation(actual, true)
707 .SizeInBytes()) {
708 // The replacement was truncated when being stored or should be truncated
709 // (or sign-extended) during the load. Since we don't have enough
710 // truncations operators in Turboshaft (eg, we don't have Int32 to Int8
711 // truncation), we just prevent load elimination in this case.
712
713 // TODO(jkummerow): support eliminating repeated loads of the same i8/i16
714 // field.
715 return false;
716 }
717
718 return expected_reg_repr == actual;
719}
720
721void WasmLoadEliminationAnalyzer::ProcessStructGet(OpIndex op_idx,
722 const StructGetOp& get) {
723 OpIndex existing = memory_.Find(get);
724 if (existing.valid()) {
725 const Operation& replacement = graph_.Get(existing);
726 DCHECK_EQ(replacement.outputs_rep().size(), 1);
727 DCHECK_EQ(get.outputs_rep().size(), 1);
728 uint8_t size = get.type->field(get.field_index).value_kind_size();
729 if (RepIsCompatible(replacement.outputs_rep()[0], get.outputs_rep()[0],
730 size)) {
731 replacements_[op_idx] = existing;
732 return;
733 }
734 }
735 replacements_[op_idx] = OpIndex::Invalid();
736 memory_.Insert(get, op_idx);
737}
738
739void WasmLoadEliminationAnalyzer::ProcessStructSet(OpIndex op_idx,
740 const StructSetOp& set) {
741 if (memory_.HasValueWithIncorrectMutability(set)) {
742 // This struct.set is unreachable. We don't have a good way to annotate
743 // it as such, so we use "replace with itself" as a sentinel.
744 // TODO(jkummerow): Check how often this case is triggered in practice.
745 replacements_[op_idx] = op_idx;
746 return;
747 }
748
749 memory_.Invalidate(set);
750 memory_.Insert(set);
751
752 // Updating aliases if the value stored was known as non-aliasing.
753 OpIndex value = set.value();
754 if (non_aliasing_objects_.HasKeyFor(value)) {
755 non_aliasing_objects_.Set(value, false);
756 }
757}
758
759void WasmLoadEliminationAnalyzer::ProcessArrayLength(
760 OpIndex op_idx, const ArrayLengthOp& length) {
761 static constexpr int offset = wle::kArrayLengthFieldIndex;
762 OpIndex existing = memory_.FindLoadLike(length.array(), offset);
763 if (existing.valid()) {
764#if DEBUG
765 const Operation& replacement = graph_.Get(existing);
766 DCHECK_EQ(replacement.outputs_rep().size(), 1);
767 DCHECK_EQ(length.outputs_rep().size(), 1);
768 DCHECK_EQ(replacement.outputs_rep()[0], length.outputs_rep()[0]);
769#endif
770 replacements_[op_idx] = existing;
771 return;
772 }
773 replacements_[op_idx] = OpIndex::Invalid();
774 memory_.InsertLoadLike(length.array(), offset, op_idx);
775}
776
777void WasmLoadEliminationAnalyzer::ProcessWasmAllocateArray(
778 OpIndex op_idx, const WasmAllocateArrayOp& alloc) {
779 non_aliasing_objects_.Set(op_idx, true);
780 static constexpr int offset = wle::kArrayLengthFieldIndex;
781 memory_.InsertLoadLike(op_idx, offset, alloc.length());
782}
783
784void WasmLoadEliminationAnalyzer::ProcessStringAsWtf16(
785 OpIndex op_idx, const StringAsWtf16Op& op) {
786 static constexpr int offset = wle::kStringAsWtf16Index;
787 OpIndex existing = memory_.FindLoadLike(op.string(), offset);
788 if (existing.valid()) {
789 DCHECK_EQ(Opcode::kStringAsWtf16, graph_.Get(existing).opcode);
790 replacements_[op_idx] = existing;
791 return;
792 }
793 replacements_[op_idx] = OpIndex::Invalid();
794 memory_.InsertLoadLike(op.string(), offset, op_idx);
795}
796
797void WasmLoadEliminationAnalyzer::ProcessStringPrepareForGetCodeUnit(
798 OpIndex op_idx, const StringPrepareForGetCodeUnitOp& prep) {
799 static constexpr int offset = wle::kStringPrepareForGetCodeunitIndex;
800 OpIndex existing = memory_.FindLoadLike(prep.string(), offset);
801 if (existing.valid()) {
802 DCHECK_EQ(Opcode::kStringPrepareForGetCodeUnit,
803 graph_.Get(existing).opcode);
804 replacements_[op_idx] = existing;
805 return;
806 }
807 replacements_[op_idx] = OpIndex::Invalid();
808 memory_.InsertLoadLike(prep.string(), offset, op_idx);
809}
810
811void WasmLoadEliminationAnalyzer::ProcessAnyConvertExtern(
812 OpIndex op_idx, const AnyConvertExternOp& convert) {
813 static constexpr int offset = wle::kAnyConvertExternIndex;
814 OpIndex existing = memory_.FindLoadLike(convert.object(), offset);
815 if (existing.valid()) {
816 DCHECK_EQ(Opcode::kAnyConvertExtern, graph_.Get(existing).opcode);
817 replacements_[op_idx] = existing;
818 return;
819 }
820 replacements_[op_idx] = OpIndex::Invalid();
821 memory_.InsertLoadLike(convert.object(), offset, op_idx);
822}
823
824void WasmLoadEliminationAnalyzer::ProcessAssertNotNull(
825 OpIndex op_idx, const AssertNotNullOp& assert) {
826 static constexpr int offset = wle::kAssertNotNullIndex;
827 OpIndex existing = memory_.FindLoadLike(assert.object(), offset);
828 if (existing.valid()) {
829 DCHECK_EQ(Opcode::kAssertNotNull, graph_.Get(existing).opcode);
830 replacements_[op_idx] = existing;
831 return;
832 }
833 replacements_[op_idx] = OpIndex::Invalid();
834 memory_.InsertLoadLike(assert.object(), offset, op_idx);
835}
836
837// Since we only loosely keep track of what can or can't alias, we assume that
838// anything that was guaranteed to not alias with anything (because it's in
839// {non_aliasing_objects_}) can alias with anything when coming back from the
840// call if it was an argument of the call.
841void WasmLoadEliminationAnalyzer::ProcessCall(OpIndex op_idx,
842 const CallOp& op) {
843 // Some builtins do not create aliases and do not invalidate existing
844 // memory, and some even return fresh objects. For such cases, we don't
845 // invalidate the state, and record the non-alias if any.
846 if (!op.Effects().can_write()) {
847 return;
848 }
849 // TODO(jkummerow): Add special handling to builtins that are known not to
850 // have relevant side effects. Alternatively, specify their effects to not
851 // include `CanWriteMemory()`.
852#if 0
853 if (auto builtin_id = TryGetBuiltinId(
854 graph_.Get(op.callee()).TryCast<ConstantOp>(), broker_)) {
855 switch (*builtin_id) {
856 case Builtin::kExample:
857 // This builtin touches no Wasm objects, and calls no other functions.
858 return;
859 default:
860 break;
861 }
862 }
863#endif
864 // Not a builtin call, or not a builtin that we know doesn't invalidate
865 // memory.
866
867 InvalidateAllNonAliasingInputs(op);
868
869 // The call could modify arbitrary memory, so we invalidate every
870 // potentially-aliasing object.
871 memory_.InvalidateMaybeAliasing();
872}
873
874void WasmLoadEliminationAnalyzer::InvalidateAllNonAliasingInputs(
875 const Operation& op) {
876 for (OpIndex input : op.inputs()) {
877 InvalidateIfAlias(input);
878 }
879}
880
881void WasmLoadEliminationAnalyzer::InvalidateIfAlias(OpIndex op_idx) {
882 if (auto key = non_aliasing_objects_.TryGetKeyFor(op_idx);
883 key.has_value() && non_aliasing_objects_.Get(*key)) {
884 // An known non-aliasing object was passed as input to the Call; the Call
885 // could create aliases, so we have to consider going forward that this
886 // object could actually have aliases.
887 non_aliasing_objects_.Set(*key, false);
888 }
889}
890
891// The only time an Allocate should flow into a WordBinop is for Smi checks
892// (which, by the way, should be removed by MachineOptimizationReducer (since
893// Allocate never returns a Smi), but there is no guarantee that this happens
894// before load elimination). So, there is no need to invalidate non-aliases, and
895// we just DCHECK in this function that indeed, nothing else than a Smi check
896// happens on non-aliasing objects.
897void WasmLoadEliminationAnalyzer::DcheckWordBinop(OpIndex op_idx,
898 const WordBinopOp& binop) {
899#ifdef DEBUG
900 auto check = [&](V<Word> left, V<Word> right) {
901 if (auto key = non_aliasing_objects_.TryGetKeyFor(left);
902 key.has_value() && non_aliasing_objects_.Get(*key)) {
903 int64_t cst;
904 DCHECK_EQ(binop.kind, WordBinopOp::Kind::kBitwiseAnd);
905 DCHECK(OperationMatcher(graph_).MatchSignedIntegralConstant(right, &cst));
907 }
908 };
909 check(binop.left(), binop.right());
910 check(binop.right(), binop.left());
911#endif
912}
913
914void WasmLoadEliminationAnalyzer::ProcessAllocate(OpIndex op_idx,
915 const AllocateOp&) {
916 // In particular, this handles {struct.new}.
917 non_aliasing_objects_.Set(op_idx, true);
918}
919
920void WasmLoadEliminationAnalyzer::ProcessPhi(OpIndex op_idx, const PhiOp& phi) {
921 InvalidateAllNonAliasingInputs(phi);
922
923 base::Vector<const OpIndex> inputs = phi.inputs();
924 // This copies some of the functionality of {RequiredOptimizationReducer}:
925 // Phis whose inputs are all the same value can be replaced by that value.
926 // We need to have this logic here because interleaving it with other cases
927 // of load elimination can unlock further optimizations: simplifying Phis
928 // can allow elimination of more loads, which can then allow simplification
929 // of even more Phis.
930 if (inputs.size() > 0) {
931 bool same_inputs = true;
932 OpIndex first = memory_.ResolveBase(inputs.first());
933 for (const OpIndex& input : inputs.SubVectorFrom(1)) {
934 if (memory_.ResolveBase(input) != first) {
935 same_inputs = false;
936 break;
937 }
938 }
939 if (same_inputs) {
940 replacements_[op_idx] = first;
941 }
942 }
943}
944
945void WasmLoadEliminationAnalyzer::FinishBlock(const Block* block) {
946 block_to_snapshot_mapping_[block->index()] =
947 Snapshot{non_aliasing_objects_.Seal(), memory_.Seal()};
948}
949
950void WasmLoadEliminationAnalyzer::SealAndDiscard() {
951 non_aliasing_objects_.Seal();
952 memory_.Seal();
953}
954
955void WasmLoadEliminationAnalyzer::StoreLoopSnapshotInForwardPredecessor(
956 const Block& loop_header) {
957 auto non_aliasing_snapshot = non_aliasing_objects_.Seal();
958 auto memory_snapshot = memory_.Seal();
959
960 block_to_snapshot_mapping_
961 [loop_header.LastPredecessor()->NeighboringPredecessor()->index()] =
962 Snapshot{non_aliasing_snapshot, memory_snapshot};
963
964 non_aliasing_objects_.StartNewSnapshot(non_aliasing_snapshot);
965 memory_.StartNewSnapshot(memory_snapshot);
966}
967
968bool WasmLoadEliminationAnalyzer::BackedgeHasSnapshot(
969 const Block& loop_header) const {
970 DCHECK(loop_header.IsLoop());
971 return block_to_snapshot_mapping_[loop_header.LastPredecessor()->index()]
972 .has_value();
973}
974
975template <bool for_loop_revisit>
976bool WasmLoadEliminationAnalyzer::BeginBlock(const Block* block) {
978 for_loop_revisit,
979 block->IsLoop() &&
980 block_to_snapshot_mapping_[block->LastPredecessor()->index()]
981 .has_value());
982
983 // Collect the snapshots of all predecessors.
984 {
985 predecessor_alias_snapshots_.clear();
986 predecessor_memory_snapshots_.clear();
987 for (const Block* p : block->PredecessorsIterable()) {
988 auto pred_snapshots = block_to_snapshot_mapping_[p->index()];
989 // When we visit the loop for the first time, the loop header hasn't
990 // been visited yet, so we ignore it.
991 DCHECK_IMPLIES(!pred_snapshots.has_value(),
992 block->IsLoop() && block->LastPredecessor() == p);
993 if (!pred_snapshots.has_value()) {
994 DCHECK(!for_loop_revisit);
995 continue;
996 }
997 // Note that the backedge snapshot of an inner loop in kFirstVisit will
998 // also be taken into account if we are in the kSecondVisit of an outer
999 // loop. The data in the backedge snapshot could be out-dated, but if it
1000 // is, then it's fine: if the backedge of the outer-loop was more
1001 // restrictive than its forward incoming edge, then the forward incoming
1002 // edge of the inner loop should reflect this restriction.
1003 predecessor_alias_snapshots_.push_back(pred_snapshots->alias_snapshot);
1004 predecessor_memory_snapshots_.push_back(pred_snapshots->memory_snapshot);
1005 }
1006 }
1007
1008 // Note that predecessors are in reverse order, which means that the backedge
1009 // is at offset 0.
1010 constexpr int kBackedgeOffset = 0;
1011 constexpr int kForwardEdgeOffset = 1;
1012
1013 bool loop_needs_revisit = false;
1014 // Start a new snapshot for this block by merging information from
1015 // predecessors.
1016 auto merge_aliases = [&](AliasKey key,
1017 base::Vector<const bool> predecessors) -> bool {
1018 if (for_loop_revisit && predecessors[kForwardEdgeOffset] &&
1019 !predecessors[kBackedgeOffset]) {
1020 // The backedge doesn't think that {key} is no-alias, but the loop
1021 // header previously thought it was --> need to revisit.
1022 loop_needs_revisit = true;
1023 }
1024 return base::all_of(predecessors);
1025 };
1026 non_aliasing_objects_.StartNewSnapshot(
1027 base::VectorOf(predecessor_alias_snapshots_), merge_aliases);
1028
1029 // Merging for {memory_} means setting values to Invalid unless all
1030 // predecessors have the same value.
1031 // TODO(dmercadier): we could insert of Phis during the pass to merge existing
1032 // information. This is a bit hard, because we are currently in an analyzer
1033 // rather than a reducer. Still, we could "prepare" the insertion now and then
1034 // really insert them during the Reduce phase of the CopyingPhase.
1035 auto merge_memory = [&](MemoryKey key,
1036 base::Vector<const OpIndex> predecessors) -> OpIndex {
1037 if (for_loop_revisit && predecessors[kForwardEdgeOffset].valid() &&
1038 predecessors[kBackedgeOffset] != predecessors[kForwardEdgeOffset]) {
1039 // {key} had a value in the loop header, but the backedge and the forward
1040 // edge don't agree on its value, which means that the loop invalidated
1041 // some memory data, and thus needs to be revisited.
1042 loop_needs_revisit = true;
1043 }
1044 return base::all_equal(predecessors) ? predecessors[0] : OpIndex::Invalid();
1045 };
1046 memory_.StartNewSnapshot(base::VectorOf(predecessor_memory_snapshots_),
1047 merge_memory);
1048
1049 if (block->IsLoop()) return loop_needs_revisit;
1050 return false;
1051}
1052
1054
1055} // namespace v8::internal::compiler::turboshaft
1056
1057#endif // V8_COMPILER_TURBOSHAFT_WASM_LOAD_ELIMINATION_REDUCER_H_
#define REDUCE_INPUT_GRAPH(operation)
union v8::internal::@341::BuiltinMetadata::KindSpecificData data
constexpr size_t size() const
Definition vector.h:70
Vector< T > SubVectorFrom(size_t from) const
Definition vector.h:46
NeighboringPredecessorIterable PredecessorsIterable() const
Definition graph.h:340
void StartNewSnapshot(base::Vector< const Snapshot > predecessors)
V8_INLINE const Operation & Get(OpIndex i) const
Definition graph.h:618
static constexpr OpIndex Invalid()
Definition index.h:88
static constexpr OptionalOpIndex Nullopt()
Definition index.h:171
void StartNewSnapshot(base::Vector< const Snapshot > predecessors, const ChangeCallback &change_callback={})
void ProcessStringPrepareForGetCodeUnit(OpIndex op_idx, const StringPrepareForGetCodeUnitOp &op)
FixedBlockSidetable< std::optional< Snapshot > > block_to_snapshot_mapping_
void ProcessWasmAllocateArray(OpIndex op_idx, const WasmAllocateArrayOp &op)
void ProcessBlock(const Block &block, bool compute_start_snapshot)
WasmLoadEliminationAnalyzer(PipelineData *data, Graph &graph, Zone *phase_zone)
void ProcessAnyConvertExtern(OpIndex op_idx, const AnyConvertExternOp &op)
OpIndex REDUCE_INPUT_GRAPH StructSet(OpIndex ig_index, const StructSetOp &op)
OpIndex FindImpl(OpIndex object, int offset, wasm::ModuleTypeIndex type_index, uint8_t size, bool mutability, OptionalOpIndex index=OptionalOpIndex::Nullopt())
bool TypesUnrelated(wasm::ModuleTypeIndex type1, wasm::ModuleTypeIndex type2)
void OnValueChange(Key key, OpIndex old_value, OpIndex new_value)
WasmMemoryContentTable(PipelineData *data, Zone *zone, SparseOpIndexSnapshotTable< bool > &non_aliasing_objects, FixedOpIndexSidetable< OpIndex > &replacements, Graph &graph)
void InsertLoadLike(OpIndex base_idx, int offset_sentinel, OpIndex value_idx)
ZoneUnorderedMap< int, v8::base::DoublyThreadedList< Key, OffsetListTraits > > offset_keys_
void Insert(OpIndex base, int32_t offset, wasm::ModuleTypeIndex type_index, uint8_t size, bool mutability, OpIndex value)
JSHeapBroker *const broker_
#define TURBOSHAFT_REDUCER_BOILERPLATE(Name)
Definition assembler.h:823
#define EMIT_OP(Name)
Definition assembler.h:1068
int32_t offset
static constexpr wasm::ModuleTypeIndex kLoadLikeType
size_t hash_value(WasmMemoryAddress const &mem)
bool RepIsCompatible(RegisterRepresentation actual, RegisterRepresentation expected_reg_repr, uint8_t in_memory_size)
V8_INLINE size_t fast_hash_combine()
Definition fast-hash.h:19
V8_EXPORT_PRIVATE V8_INLINE bool ShouldSkipOperation(const Operation &op)
std::optional< Builtin > TryGetBuiltinId(const ConstantOp *target, JSHeapBroker *broker)
Definition operations.cc:45
V8_EXPORT_PRIVATE bool ShouldSkipOptimizationStep()
Definition utils.h:84
base::SmallVector< Block *, 4 > SuccessorBlocks(const Block &block, const Graph &graph)
V8_INLINE bool HeapTypesUnrelated(HeapType heap1, HeapType heap2, const WasmModule *module1, const WasmModule *module2)
void Print(Tagged< Object > obj)
Definition objects.h:774
V8_EXPORT_PRIVATE FlagValues v8_flags
const intptr_t kSmiTagMask
Definition v8-internal.h:88
uint32_t cast
#define CHECK(condition)
Definition logging.h:124
#define DCHECK_IMPLIES(v1, v2)
Definition logging.h:493
#define DCHECK_NE(v1, v2)
Definition logging.h:486
#define DCHECK(condition)
Definition logging.h:482
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
base::Vector< const OpIndex > inputs() const
const underlying_operation_t< Op > * TryCast() const
Definition operations.h:990
underlying_operation_t< Op > & Cast()
Definition operations.h:980
base::Vector< const RegisterRepresentation > outputs_rep() const
v8::base::DoublyThreadedList< Key, BaseListTraits > with_offsets
std::unique_ptr< ValueMirror > key
TFGraph * graph_