v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
memory-optimization-reducer.h
Go to the documentation of this file.
1// Copyright 2022 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_COMPILER_TURBOSHAFT_MEMORY_OPTIMIZATION_REDUCER_H_
6#define V8_COMPILER_TURBOSHAFT_MEMORY_OPTIMIZATION_REDUCER_H_
7
8#include <optional>
9
21
23
25
26const TSCallDescriptor* CreateAllocateBuiltinDescriptor(Zone* zone,
27 Isolate* isolate);
28
29inline bool ValueNeedsWriteBarrier(const Graph* graph, const Operation& value,
30 Isolate* isolate) {
31 if (value.Is<Opmask::kBitcastWordPtrToSmi>()) {
32 return false;
33 } else if (const ConstantOp* constant = value.TryCast<ConstantOp>()) {
34 if (constant->kind == ConstantOp::Kind::kHeapObject) {
35 RootIndex root_index;
36 if (isolate->roots_table().IsRootHandle(constant->handle(),
37 &root_index) &&
39 return false;
40 }
41 }
42 } else if (const PhiOp* phi = value.TryCast<PhiOp>()) {
43 if (phi->rep == RegisterRepresentation::Tagged()) {
44 return base::any_of(phi->inputs(), [graph, isolate](OpIndex input) {
45 const Operation& input_op = graph->Get(input);
46 // If we have a Phi as the Phi's input, we give up to avoid infinite
47 // recursion.
48 if (input_op.Is<PhiOp>()) return true;
49 return ValueNeedsWriteBarrier(graph, input_op, isolate);
50 });
51 }
52 }
53 return true;
54}
55
56inline const AllocateOp* UnwrapAllocate(const Graph* graph,
57 const Operation* op) {
58 while (true) {
59 if (const AllocateOp* allocate = op->TryCast<AllocateOp>()) {
60 return allocate;
61 } else if (const TaggedBitcastOp* bitcast =
62 op->TryCast<TaggedBitcastOp>()) {
63 op = &graph->Get(bitcast->input());
64 } else if (const WordBinopOp* binop = op->TryCast<WordBinopOp>();
65 binop && binop->kind == any_of(WordBinopOp::Kind::kAdd,
67 op = &graph->Get(binop->left());
68 } else {
69 return nullptr;
70 }
71 }
72}
73
74// The main purpose of memory optimization is folding multiple allocations
75// into one. For this, the first allocation reserves additional space, that is
76// consumed by subsequent allocations, which only move the allocation top
77// pointer and are therefore guaranteed to succeed. Another nice side-effect
78// of allocation folding is that more stores are performed on the most recent
79// allocation, which allows us to eliminate the write barrier for the store.
80//
81// This analysis works by keeping track of the most recent non-folded
82// allocation, as well as the number of bytes this allocation needs to reserve
83// to satisfy all subsequent allocations.
84// We can do write barrier elimination across loops if the loop does not
85// contain any potentially allocating operations.
88
94 bool is_wasm;
102
103 struct BlockState {
104 const AllocateOp* last_allocation = nullptr;
105 std::optional<uint32_t> reserved_size = std::nullopt;
106
107 bool operator!=(const BlockState& other) {
108 return last_allocation != other.last_allocation ||
109 reserved_size != other.reserved_size;
110 }
111 };
120 TurboshaftPipelineKind pipeline_kind = data->pipeline_kind();
121
123 const AllocateOp* allocation = UnwrapAllocate(&input_graph, op);
124 if (allocation == nullptr) return false;
125 if (state.last_allocation == nullptr) return false;
126 if (state.last_allocation->type != AllocationType::kYoung) return false;
127 if (state.last_allocation == allocation) return true;
128 auto it = folded_into.find(allocation);
129 if (it == folded_into.end()) return false;
130 return it->second == state.last_allocation;
131 }
132
133 bool SkipWriteBarrier(const StoreOp& store) {
134 const Operation& object = input_graph.Get(store.base());
135 const Operation& value = input_graph.Get(store.value());
136
137 WriteBarrierKind write_barrier_kind = store.write_barrier;
138 if (write_barrier_kind != WriteBarrierKind::kAssertNoWriteBarrier) {
139 // If we have {kAssertNoWriteBarrier}, we cannot skip elimination
140 // checks.
141 if (ShouldSkipOptimizationStep()) return false;
142 }
143 if (IsPartOfLastAllocation(&object)) return true;
144 if (!ValueNeedsWriteBarrier(&input_graph, value, isolate_)) return true;
145 if (v8_flags.disable_write_barriers) return true;
146 if (write_barrier_kind == WriteBarrierKind::kAssertNoWriteBarrier) {
147 std::stringstream str;
148 str << "MemoryOptimizationReducer could not remove write barrier for "
149 "operation\n #"
150 << input_graph.Index(store) << ": " << store.ToString() << "\n";
151 FATAL("%s", str.str().c_str());
152 }
153 return false;
154 }
155
157 return folded_into.count(
158 input_graph.Get(op).template TryCast<AllocateOp>());
159 }
160
161 std::optional<uint32_t> ReservedSize(V<AnyOrNone> alloc) {
162 if (auto it = reserved_size.find(
163 input_graph.Get(alloc).template TryCast<AllocateOp>());
164 it != reserved_size.end()) {
165 return it->second;
166 }
167 return std::nullopt;
168 }
169
170 void Run();
171
172 void Process(const Operation& op);
173 void ProcessBlockTerminator(const Operation& op);
174 void ProcessAllocation(const AllocateOp& alloc);
175 void ProcessStore(const StoreOp& store);
176 void MergeCurrentStateIntoSuccessor(const Block* successor);
177};
178
179template <class Next>
180class MemoryOptimizationReducer : public Next {
181 public:
182 TURBOSHAFT_REDUCER_BOILERPLATE(MemoryOptimization)
183 // TODO(dmercadier): Add static_assert that this is ran as part of a
184 // CopyingPhase.
185
186 void Analyze() {
187 auto* info = __ data() -> info();
188#if V8_ENABLE_WEBASSEMBLY
189 bool is_wasm = info->IsWasm() || info->IsWasmBuiltin();
190#else
191 bool is_wasm = false;
192#endif
193 analyzer_.emplace(
194 __ data(), __ phase_zone(), __ input_graph(),
195 info->allocation_folding()
198 is_wasm);
199 analyzer_->Run();
200 Next::Analyze();
201 }
202
204 if (store.write_barrier != WriteBarrierKind::kAssertNoWriteBarrier) {
205 // We cannot skip this optimization if we have to eliminate a
206 // {kAssertNoWriteBarrier}.
208 return Next::ReduceInputGraphStore(ig_index, store);
209 }
210 }
211 if (analyzer_->skipped_write_barriers.count(ig_index)) {
212 __ Store(__ MapToNewGraph(store.base()), __ MapToNewGraph(store.index()),
213 __ MapToNewGraph(store.value()), store.kind, store.stored_rep,
215 store.element_size_log2,
216 store.maybe_initializing_or_transitioning,
217 store.indirect_pointer_tag());
218 return V<None>::Invalid();
219 }
221 return Next::ReduceInputGraphStore(ig_index, store);
222 }
223
226
227 if (v8_flags.single_generation && type == AllocationType::kYoung) {
229 }
230
231 V<WordPtr> top_address;
232 if (isolate_ != nullptr) {
233 top_address = __ ExternalConstant(
235 ? ExternalReference::new_space_allocation_top_address(isolate_)
236 : ExternalReference::old_space_allocation_top_address(isolate_));
237 } else {
238 // Wasm mode: producing isolate-independent code, loading the isolate
239 // address at runtime.
240#if V8_ENABLE_WEBASSEMBLY
241 V<WasmTrustedInstanceData> instance_data = __ WasmInstanceDataParameter();
242 int top_address_offset =
244 ? WasmTrustedInstanceData::kNewAllocationTopAddressOffset
245 : WasmTrustedInstanceData::kOldAllocationTopAddressOffset;
246 top_address =
248 MemoryRepresentation::UintPtr(), top_address_offset);
249#else
250 UNREACHABLE();
251#endif // V8_ENABLE_WEBASSEMBLY
252 }
253
254 if (analyzer_->IsFoldedAllocation(__ current_operation_origin())) {
255 DCHECK_NE(__ GetVariable(top(type)), V<WordPtr>::Invalid());
256 V<WordPtr> obj_addr = __ GetVariable(top(type));
257 __ SetVariable(top(type), __ WordPtrAdd(__ GetVariable(top(type)), size));
258 __ StoreOffHeap(top_address, __ GetVariable(top(type)),
260 return __ BitcastWordPtrToHeapObject(
261 __ WordPtrAdd(obj_addr, __ IntPtrConstant(kHeapObjectTag)));
262 }
263
264 __ SetVariable(top(type), __ LoadOffHeap(top_address,
266
267 V<CallTarget> allocate_builtin;
268 if (!analyzer_->is_wasm) {
269 if (type == AllocationType::kYoung) {
270 allocate_builtin =
271 __ BuiltinCode(Builtin::kAllocateInYoungGeneration, isolate_);
272 } else {
273 allocate_builtin =
274 __ BuiltinCode(Builtin::kAllocateInOldGeneration, isolate_);
275 }
276 } else {
277#if V8_ENABLE_WEBASSEMBLY
278 // This lowering is used by Wasm, where we compile isolate-independent
279 // code. Builtin calls simply encode the target builtin ID, which will
280 // be patched to the builtin's address later.
281 if (isolate_ == nullptr) {
283 if (type == AllocationType::kYoung) {
284 builtin = Builtin::kWasmAllocateInYoungGeneration;
285 } else {
286 builtin = Builtin::kWasmAllocateInOldGeneration;
287 }
288 static_assert(std::is_same<Smi, BuiltinPtr>(),
289 "BuiltinPtr must be Smi");
290 allocate_builtin = __ NumberConstant(static_cast<int>(builtin));
291 } else {
292 if (type == AllocationType::kYoung) {
293 allocate_builtin =
294 __ BuiltinCode(Builtin::kWasmAllocateInYoungGeneration, isolate_);
295 } else {
296 allocate_builtin =
297 __ BuiltinCode(Builtin::kWasmAllocateInOldGeneration, isolate_);
298 }
299 }
300#else
301 UNREACHABLE();
302#endif
303 }
304
305 Block* call_runtime = __ NewBlock();
306 Block* done = __ NewBlock();
307
308 V<WordPtr> limit_address = GetLimitAddress(type);
309
310 // If the allocation size is not statically known or is known to be larger
311 // than kMaxRegularHeapObjectSize, do not update {top(type)} in case of a
312 // runtime call. This is needed because we cannot allocation-fold large and
313 // normal-sized objects.
314 uint64_t constant_size{};
315 if (!__ matcher().MatchIntegralWordConstant(
316 size, WordRepresentation::WordPtr(), &constant_size) ||
317 constant_size > kMaxRegularHeapObjectSize) {
319 __ NewLoopInvariantVariable(RegisterRepresentation::Tagged());
320 if (!constant_size) {
321 // Check if we can do bump pointer allocation here.
322 V<WordPtr> top_value = __ GetVariable(top(type));
323 __ SetVariable(result,
324 __ BitcastWordPtrToHeapObject(__ WordPtrAdd(
325 top_value, __ IntPtrConstant(kHeapObjectTag))));
326 V<WordPtr> new_top = __ WordPtrAdd(top_value, size);
327 V<WordPtr> limit =
328 __ LoadOffHeap(limit_address, MemoryRepresentation::UintPtr());
329 __ GotoIfNot(LIKELY(__ UintPtrLessThan(new_top, limit)), call_runtime);
330 __ GotoIfNot(LIKELY(__ UintPtrLessThan(
332 call_runtime);
333 __ SetVariable(top(type), new_top);
334 __ StoreOffHeap(top_address, new_top, MemoryRepresentation::UintPtr());
335 __ Goto(done);
336 }
337 if (constant_size || __ Bind(call_runtime)) {
338 __ SetVariable(
339 result, __ template Call<HeapObject>(allocate_builtin, {size},
341 __ Goto(done);
342 }
343
344 __ BindReachable(done);
345 return __ GetVariable(result);
346 }
347
348 V<WordPtr> reservation_size;
349 if (auto c = analyzer_->ReservedSize(__ current_operation_origin())) {
350 reservation_size = __ UintPtrConstant(*c);
351 } else {
352 reservation_size = size;
353 }
354 // Check if we can do bump pointer allocation here.
355 bool reachable =
356 __ GotoIfNot(__ UintPtrLessThan(
358 call_runtime, BranchHint::kTrue) !=
360 if (reachable) {
361 V<WordPtr> limit =
362 __ LoadOffHeap(limit_address, MemoryRepresentation::UintPtr());
363 __ Branch(__ UintPtrLessThan(
364 __ WordPtrAdd(__ GetVariable(top(type)), reservation_size),
365 limit),
366 done, call_runtime, BranchHint::kTrue);
367 }
368
369 // Call the runtime if bump pointer area exhausted.
370 if (__ Bind(call_runtime)) {
371 V<HeapObject> allocated = __ template Call<HeapObject>(
372 allocate_builtin, {reservation_size}, AllocateBuiltinDescriptor());
373 __ SetVariable(top(type),
374 __ WordPtrSub(__ BitcastHeapObjectToWordPtr(allocated),
376 __ Goto(done);
377 }
378
379 __ BindReachable(done);
380 // Compute the new top and write it back.
381 V<WordPtr> obj_addr = __ GetVariable(top(type));
382 __ SetVariable(top(type), __ WordPtrAdd(__ GetVariable(top(type)), size));
383 __ StoreOffHeap(top_address, __ GetVariable(top(type)),
385 return __ BitcastWordPtrToHeapObject(
386 __ WordPtrAdd(obj_addr, __ IntPtrConstant(kHeapObjectTag)));
387 }
388
390 ExternalPointerTagRange tag_range) {
391#ifdef V8_ENABLE_SANDBOX
392 // Decode loaded external pointer.
393 V<WordPtr> table;
394 if (isolate_ != nullptr) {
395 // Here we access the external pointer table through an ExternalReference.
396 // Alternatively, we could also hardcode the address of the table since it
397 // is never reallocated. However, in that case we must be able to
398 // guarantee that the generated code is never executed under a different
399 // Isolate, as that would allow access to external objects from different
400 // Isolates. It also would break if the code is serialized/deserialized at
401 // some point.
402 V<WordPtr> table_address =
404 ? __
405 LoadOffHeap(
406 __ ExternalConstant(
408 shared_external_pointer_table_address_address(
409 isolate_)),
411 : __ ExternalConstant(
412 ExternalReference::external_pointer_table_address(
413 isolate_));
414 table = __ LoadOffHeap(table_address,
417 } else {
418#if V8_ENABLE_WEBASSEMBLY
419 V<WordPtr> isolate_root = __ LoadRootRegister();
420 if (IsSharedExternalPointerType(tag_range)) {
421 V<WordPtr> table_address =
422 __ Load(isolate_root, LoadOp::Kind::RawAligned(),
424 IsolateData::shared_external_pointer_table_offset());
425 table = __ Load(table_address, LoadOp::Kind::RawAligned(),
428 } else {
429 table = __ Load(isolate_root, LoadOp::Kind::RawAligned(),
431 IsolateData::external_pointer_table_offset() +
433 }
434#else
435 UNREACHABLE();
436#endif
437 }
438
439 V<Word32> index =
440 __ Word32ShiftRightLogical(handle, kExternalPointerIndexShift);
441 V<Word64> pointer = __ LoadOffHeap(table, __ ChangeUint32ToUint64(index), 0,
443
444 // We don't expect to see empty fields here. If this is ever needed,
445 // consider using an dedicated empty value entry for those tags instead
446 // (i.e. an entry with the right tag and nullptr payload).
448
449 Block* done = __ NewBlock();
450 if (tag_range.Size() == 1) {
451 // The common and simple case: we expect a specific tag.
452 V<Word64> tag_bits = __ Word64BitwiseAnd(
453 pointer, __ Word64Constant(kExternalPointerTagMask));
454 tag_bits = __ Word64ShiftRightLogical(tag_bits, kExternalPointerTagShift);
455 V<Word32> tag = __ TruncateWord64ToWord32(tag_bits);
456 V<Word32> expected_tag = __ Word32Constant(tag_range.first);
457 __ GotoIf(__ Word32Equal(tag, expected_tag), done, BranchHint::kTrue);
458 // TODO(saelo): it would be nicer to abort here with
459 // AbortReason::kExternalPointerTagMismatch. That might require adding a
460 // builtin call here though, which is not currently available.
461 __ Unreachable();
462 } else {
463 // Not currently supported. Implement once needed.
465 UNREACHABLE();
466 }
467 __ BindReachable(done);
468 return __ Word64BitwiseAnd(pointer, kExternalPointerPayloadMask);
469#else // V8_ENABLE_SANDBOX
470 UNREACHABLE();
471#endif // V8_ENABLE_SANDBOX
472 }
473
474 private:
475 std::optional<MemoryAnalyzer> analyzer_;
478 std::optional<Variable> top_[2];
479
480 static_assert(static_cast<int>(AllocationType::kYoung) == 0);
481 static_assert(static_cast<int>(AllocationType::kOld) == 1);
484 if (V8_UNLIKELY(!top_[static_cast<int>(type)].has_value())) {
485 top_[static_cast<int>(type)].emplace(
486 __ NewLoopInvariantVariable(RegisterRepresentation::WordPtr()));
487 }
488 return top_[static_cast<int>(type)].value();
489 }
490
498
500 V<WordPtr> limit_address;
501 if (isolate_ != nullptr) {
502 limit_address = __ ExternalConstant(
504 ? ExternalReference::new_space_allocation_limit_address(isolate_)
505 : ExternalReference::old_space_allocation_limit_address(
506 isolate_));
507 } else {
508 // Wasm mode: producing isolate-independent code, loading the isolate
509 // address at runtime.
510#if V8_ENABLE_WEBASSEMBLY
511 V<WasmTrustedInstanceData> instance_node = __ WasmInstanceDataParameter();
512 int limit_address_offset =
514 ? WasmTrustedInstanceData::kNewAllocationLimitAddressOffset
515 : WasmTrustedInstanceData::kOldAllocationLimitAddressOffset;
516 limit_address =
517 __ Load(instance_node, LoadOp::Kind::TaggedBase(),
518 MemoryRepresentation::UintPtr(), limit_address_offset);
519#else
520 UNREACHABLE();
521#endif // V8_ENABLE_WEBASSEMBLY
522 }
523 return limit_address;
524 }
525};
526
528
529} // namespace v8::internal::compiler::turboshaft
530
531#endif // V8_COMPILER_TURBOSHAFT_MEMORY_OPTIMIZATION_REDUCER_H_
friend Zone
Definition asm-types.cc:195
ThreadLocalTop * top
#define REDUCE(operation)
#define REDUCE_INPUT_GRAPH(operation)
#define LIKELY(...)
union v8::internal::@341::BuiltinMetadata::KindSpecificData data
Isolate * isolate() const
Definition factory.h:1281
static const int kExternalPointerTableBasePointerOffset
static constexpr bool IsImmortalImmovable(RootIndex root_index)
Definition roots.h:616
OpIndex REDUCE DecodeExternalPointer(OpIndex handle, ExternalPointerTagRange tag_range)
V< HeapObject > REDUCE Allocate(V< WordPtr > size, AllocationType type)
V< None > REDUCE_INPUT_GRAPH Store(V< None > ig_index, const StoreOp &store)
static constexpr MemoryRepresentation UintPtr()
static constexpr MemoryRepresentation Uint64()
static constexpr RegisterRepresentation WordPtr()
static constexpr RegisterRepresentation Tagged()
static constexpr WordRepresentation WordPtr()
#define TURBOSHAFT_REDUCER_BOILERPLATE(Name)
Definition assembler.h:823
Handle< SharedFunctionInfo > info
Isolate * isolate
Zone * graph_zone
ZoneVector< RpoNumber > & result
Builtin builtin
bool any_of(const C &container, const P &predicate)
bool ValueNeedsWriteBarrier(const Graph *graph, const Operation &value, Isolate *isolate)
any_of(const Args &...) -> any_of< Args... >
const TSCallDescriptor * CreateAllocateBuiltinDescriptor(Zone *zone, Isolate *isolate)
const AllocateOp * UnwrapAllocate(const Graph *graph, const Operation *op)
V8_EXPORT_PRIVATE bool ShouldSkipOptimizationStep()
Definition utils.h:84
NumberConstant(std::numeric_limits< double >::quiet_NaN())) DEFINE_GETTER(EmptyStateValues
static const Operator * IntPtrConstant(CommonOperatorBuilder *common, intptr_t value)
V8_INLINE IndirectHandle< T > handle(Tagged< T > object, Isolate *isolate)
Definition handles-inl.h:72
bool TryCast(Tagged< From > value, Tagged< To > *out)
Definition casting.h:77
constexpr uint64_t kExternalPointerTagShift
constexpr int kMaxRegularHeapObjectSize
Definition globals.h:680
static V8_INLINE constexpr bool IsSharedExternalPointerType(ExternalPointerTagRange tag_range)
constexpr uint64_t kExternalPointerPayloadMask
constexpr ExternalPointerTagRange kAnyExternalPointerTagRange(kFirstExternalPointerTag, kLastExternalPointerTag)
constexpr uint64_t kExternalPointerTagMask
kWasmInternalFunctionIndirectPointerTag instance_data
too high values may cause the compiler to set high thresholds for inlining to as much as possible avoid inlined allocation of objects that cannot escape trace load stores from virtual maglev objects use TurboFan fast string builder analyze liveness of environment slots and zap dead values trace TurboFan load elimination emit data about basic block usage in builtins to this enable builtin reordering when run mksnapshot flag for emit warnings when applying builtin profile data verify register allocation in TurboFan randomly schedule instructions to stress dependency tracking enable store store elimination in TurboFan rewrite far to near simulate GC compiler thread race related to allow float parameters to be passed in simulator mode JS Wasm Run additional turbo_optimize_inlined_js_wasm_wrappers enable experimental feedback collection in generic lowering enable Turboshaft s WasmLoadElimination enable Turboshaft s low level load elimination for JS enable Turboshaft s escape analysis for string concatenation use enable Turbolev features that we want to ship in the not too far future trace individual Turboshaft reduction steps trace intermediate Turboshaft reduction steps invocation count threshold for early optimization Enables optimizations which favor memory size over execution speed Enables sampling allocation profiler with X as a sample interval min size of a semi the new space consists of two semi spaces max size of the Collect garbage after Collect garbage after keeps maps alive for< n > old space garbage collections print one detailed trace line in allocation gc speed threshold for starting incremental marking via a task in percent of available threshold for starting incremental marking immediately in percent of available Use a single schedule for determining a marking schedule between JS and C objects schedules the minor GC task with kUserVisible priority max worker number of concurrent for NumberOfWorkerThreads start background threads that allocate memory concurrent_array_buffer_sweeping use parallel threads to clear weak refs in the atomic pause trace progress of the incremental marking trace object counts and memory usage report a tick only when allocated zone memory changes by this amount TracingFlags::gc_stats store(v8::tracing::TracingCategoryObserver::ENABLED_BY_NATIVE)) DEFINE_GENERIC_IMPLICATION(trace_gc_object_stats
const int kHeapObjectTag
Definition v8-internal.h:72
V8_EXPORT_PRIVATE FlagValues v8_flags
return value
Definition map-inl.h:893
static V8_INLINE constexpr bool ExternalPointerCanBeEmpty(ExternalPointerTagRange tag_range)
i::Address Load(i::Address address)
Definition unwinder.cc:19
#define FATAL(...)
Definition logging.h:47
#define DCHECK_NE(v1, v2)
Definition logging.h:486
#define DCHECK(condition)
Definition logging.h:482
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
std::optional< uint32_t > ReservedSize(V< AnyOrNone > alloc)
FixedBlockSidetable< std::optional< BlockState > > block_states
ZoneAbslFlatHashMap< const AllocateOp *, uint32_t > reserved_size
ZoneAbslFlatHashMap< const AllocateOp *, const AllocateOp * > folded_into
MemoryAnalyzer(PipelineData *data, Zone *phase_zone, const Graph &input_graph, AllocationFolding allocation_folding, bool is_wasm)
const underlying_operation_t< Op > * TryCast() const
Definition operations.h:990
#define V8_UNLIKELY(condition)
Definition v8config.h:660
wasm::ValueType type