loop-unrolling-reducer_8h_source.html

// Copyright 2023 the V8 project authors. All rights reserved.

// Use of this source code is governed by a BSD-style license that can be

// found in the LICENSE file.


#ifndef V8_COMPILER_TURBOSHAFT_LOOP_UNROLLING_REDUCER_H_

#define V8_COMPILER_TURBOSHAFT_LOOP_UNROLLING_REDUCER_H_


#include <optional>


#include "src/base/logging.h"

#include "src/compiler/globals.h"

#include "src/compiler/turboshaft/assembler.h"

#include "src/compiler/turboshaft/copying-phase.h"

#include "src/compiler/turboshaft/index.h"

#include "src/compiler/turboshaft/loop-finder.h"

#include "src/compiler/turboshaft/machine-optimization-reducer.h"

#include "src/compiler/turboshaft/operations.h"

#include "src/compiler/turboshaft/phase.h"


namespace v8::internal::compiler::turboshaft {


#include "src/compiler/turboshaft/define-assembler-macros.inc"


// OVERVIEW:

//

// LoopUnrollingReducer fully unrolls small inner loops with a small

// statically-computable number of iterations, partially unrolls other small

// inner loops, and remove loops that we detect as always having 0 iterations.


#ifdef DEBUG

#define TRACE(x)                                                               \

  do {                                                                         \

    if (v8_flags.turboshaft_trace_unrolling) StdoutStream() << x << std::endl; \

  } while (false)

#else

#define TRACE(x)

#endif


class IterationCount {

  enum class Kind { kExact, kApprox, kUnknown };


 public:

  // Loops with an exact number of iteration could be unrolled.


  static IterationCount Exact(size_t count) {

    return IterationCount(Kind::kExact, count);

  }


  // We can remove stack checks from loops with a small number of iterations.


  static IterationCount Approx(size_t count) {

    return IterationCount(Kind::kApprox, count);

  }


  static IterationCount Unknown() { return IterationCount(Kind::kUnknown); }


  IterationCount() : kind_(Kind::kUnknown) {}


  explicit IterationCount(Kind kind) : kind_(kind) {

    DCHECK_NE(kind, Kind::kExact);

  }


  IterationCount(Kind kind, size_t count) : kind_(kind), count_(count) {

    DCHECK_EQ(kind, any_of(Kind::kExact, Kind::kApprox));

  }


  size_t exact_count() const {

    DCHECK_EQ(kind_, Kind::kExact);

    return count_;

  }


  size_t approx_count() const {

    DCHECK_EQ(kind_, Kind::kApprox);

    return count_;

  }


  bool IsExact() const { return kind_ == Kind::kExact; }

  bool IsApprox() const { return kind_ == Kind::kApprox; }

  bool IsUnknown() const { return kind_ == Kind::kUnknown; }


  bool IsSmallerThan(size_t max) {

    return (IsExact() || IsApprox()) && count_ < max;

  }


 private:

  Kind kind_;

  size_t count_;

};


std::ostream& operator<<(std::ostream& os, const IterationCount& count);


class V8_EXPORT_PRIVATE StaticCanonicalForLoopMatcher {

  // In the context of this class, a "static canonical for-loop" is one of the

  // form `for (let i = cst; i cmp cst; i = i binop cst)`. That is, a fairly

  // simple for-loop, for which we can statically compute the number of

  // iterations.

  //

  // There is an added constraint that this class can only match loops with few

  // iterations (controlled by the `max_iter_` parameter), for performance

  // reasons (because it's a bit tricky to compute how many iterations a loop

  // has, see the `HasFewerIterationsThan` method).

  //

  // This class and its methods are not in OperationMatcher, even though they

  // could fit there, because they seemed a bit too loop-unrolling specific.

  // However, if they can ever be useful for something else, any of the

  // "MatchXXX" method of this class could be moved to OperationMatcher.

 public:


  explicit StaticCanonicalForLoopMatcher(const OperationMatcher& matcher)

      : matcher_(matcher) {}


  IterationCount GetIterCountIfStaticCanonicalForLoop(

      const Block* header, OpIndex cond_idx, bool loop_if_cond_is) const;


  enum class CmpOp {

    kEqual,

    kSignedLessThan,

    kSignedLessThanOrEqual,

    kUnsignedLessThan,

    kUnsignedLessThanOrEqual,

    kSignedGreaterThan,

    kSignedGreaterThanOrEqual,

    kUnsignedGreaterThan,

    kUnsignedGreaterThanOrEqual,

  };


  static constexpr CmpOp ComparisonKindToCmpOp(ComparisonOp::Kind kind);

  static constexpr CmpOp InvertComparisonOp(CmpOp op);


  enum class BinOp {

    kAdd,

    kMul,

    kSub,

    kBitwiseAnd,

    kBitwiseOr,

    kBitwiseXor,

    kOverflowCheckedAdd,

    kOverflowCheckedMul,

    kOverflowCheckedSub

  };


  static constexpr BinOp BinopFromWordBinopKind(WordBinopOp::Kind kind);

  static constexpr BinOp BinopFromOverflowCheckedBinopKind(

      OverflowCheckedBinopOp::Kind kind);

  static constexpr bool BinopKindIsSupported(WordBinopOp::Kind binop_kind);


 private:

  bool MatchPhiCompareCst(OpIndex cond_idx,

                          StaticCanonicalForLoopMatcher::CmpOp* cmp_op,

                          OpIndex* phi, uint64_t* cst) const;

  bool MatchCheckedOverflowBinop(OpIndex idx, V<Word>* left, V<Word>* right,

                                 BinOp* binop_op,

                                 WordRepresentation* binop_rep) const;

  bool MatchWordBinop(OpIndex idx, V<Word>* left, V<Word>* right,

                      BinOp* binop_op, WordRepresentation* binop_rep) const;

  IterationCount CountIterations(uint64_t equal_cst, CmpOp cmp_op,

                                 uint64_t initial_input, uint64_t binop_cst,

                                 BinOp binop_op, WordRepresentation binop_rep,

                                 bool loop_if_cond_is) const;

  template <class Int>

  IterationCount CountIterationsImpl(

      Int init, Int max, CmpOp cmp_op, Int binop_cst,

      StaticCanonicalForLoopMatcher::BinOp binop_op,

      WordRepresentation binop_rep, bool loop_if_cond_is) const;


  const OperationMatcher& matcher_;


  // When trying to compute the number of iterations of a loop, we simulate the

  // first {kMaxExactIter} iterations of the loop, and check if the loop ends

  // during these first few iterations. This is slightly inneficient, hence the

  // small value for {kMaxExactIter}, but it's simpler than using a formula to

  // compute the number of iterations (in particular because of overflows).

  static constexpr size_t kMaxExactIter = 5;

};


std::ostream& operator<<(std::ostream& os,

                         const StaticCanonicalForLoopMatcher::CmpOp& cmp);

std::ostream& operator<<(std::ostream& os,

                         const StaticCanonicalForLoopMatcher::BinOp& binop);


class V8_EXPORT_PRIVATE LoopUnrollingAnalyzer {

  // LoopUnrollingAnalyzer analyzes the loops of the graph, and in particular

  // tries to figure out if some inner loops have a fixed (and known) number of

  // iterations. In particular, it tries to pattern match loops like

  //

  //    for (let i = 0; i < 4; i++) { ... }

  //

  // where `i++` could alternatively be pretty much any WordBinopOp or

  // OverflowCheckedBinopOp, and `i < 4` could be any ComparisonOp.

  // Such loops, if small enough, could be fully unrolled.

  //

  // Loops that don't have statically-known bounds could still be partially

  // unrolled if they are small enough.

 public:


  LoopUnrollingAnalyzer(Zone* phase_zone, Graph* input_graph, bool is_wasm)

      : input_graph_(input_graph),

        matcher_(*input_graph),

        loop_finder_(phase_zone, input_graph),

        loop_iteration_count_(phase_zone),

        canonical_loop_matcher_(matcher_),

        is_wasm_(is_wasm),

        stack_checks_to_remove_(input_graph->stack_checks_to_remove()) {

    DetectUnrollableLoops();

  }


  bool ShouldFullyUnrollLoop(const Block* loop_header) const {

    DCHECK(loop_header->IsLoop());


    LoopFinder::LoopInfo header_info = loop_finder_.GetLoopInfo(loop_header);

    if (header_info.has_inner_loops) return false;

    if (header_info.op_count > kMaxLoopSizeForFullUnrolling) return false;


    auto iter_count = GetIterationCount(loop_header);

    return iter_count.IsExact() &&

           iter_count.exact_count() < kMaxLoopIterationsForFullUnrolling;

  }


  bool ShouldPartiallyUnrollLoop(const Block* loop_header) const {

    DCHECK(loop_header->IsLoop());

    LoopFinder::LoopInfo info = loop_finder_.GetLoopInfo(loop_header);

    return !info.has_inner_loops &&

           info.op_count < kMaxLoopSizeForPartialUnrolling;

  }


  // The returned unroll count is the total number of copies of the loop body

  // in the resulting graph, i.e., an unroll count of N means N-1 copies of the

  // body which were partially unrolled, and 1 for the original/remaining body.


  size_t GetPartialUnrollCount(const Block* loop_header) const {

    // Don't unroll if the function is already huge.

    // Otherwise we have run into pathological runtimes or large memory usage,

    // e.g., in register allocation in the past, see https://crbug.com/383661627

    // for an example / reproducer.

    // Even though we return an unroll count of one (i.e., don't unroll at all

    // really), running this phase can speed up subsequent optimizations,

    // probably because it produces loops in a "compact"/good block order for

    // analyses, namely <loop header>, <loop body>, <loop exit>, <rest of code>.

    // In principle, we should fix complexity problems in analyses, make sure

    // loops are already produced in this order, and not rely on the "unrolling"

    // here for the order alone, but this is a longer standing issue.

    if (input_graph_->op_id_count() > kMaxFunctionSizeForPartialUnrolling) {

      return 1;

    }

    if (is_wasm_) {

      LoopFinder::LoopInfo info = loop_finder_.GetLoopInfo(loop_header);

      return std::min(

          LoopUnrollingAnalyzer::kMaxPartialUnrollingCount,

          LoopUnrollingAnalyzer::kWasmMaxUnrolledLoopSize / info.op_count);

    }

    return LoopUnrollingAnalyzer::kMaxPartialUnrollingCount;

  }


  bool ShouldRemoveLoop(const Block* loop_header) const {

    auto iter_count = GetIterationCount(loop_header);

    return iter_count.IsExact() && iter_count.exact_count() == 0;

  }


  IterationCount GetIterationCount(const Block* loop_header) const {

    DCHECK(loop_header->IsLoop());

    auto it = loop_iteration_count_.find(loop_header);

    if (it == loop_iteration_count_.end()) return IterationCount::Unknown();

    return it->second;

  }


  ZoneSet<const Block*, LoopFinder::BlockCmp> GetLoopBody(

      const Block* loop_header) {

    return loop_finder_.GetLoopBody(loop_header);

  }


  const Block* GetLoopHeader(const Block* block) {

    return loop_finder_.GetLoopHeader(block);

  }


  bool CanUnrollAtLeastOneLoop() const { return can_unroll_at_least_one_loop_; }


  // TODO(dmercadier): consider tweaking these value for a better size-speed

  // trade-off. In particular, having the number of iterations to unroll be a

  // function of the loop's size and a MaxLoopSize could make sense.

  static constexpr size_t kMaxLoopSizeForFullUnrolling = 150;

  // This function size limit is quite arbitrary. It is large enough that we

  // probably never hit it in JavaScript and it is lower than the operation

  // count we have seen in some huge Wasm functions in the past, e.g., function

  // #21937 of https://crbug.com/383661627 (1.7M operations, 2.7MB wire bytes).

  static constexpr size_t kMaxFunctionSizeForPartialUnrolling = 1'000'000;

  static constexpr size_t kJSMaxLoopSizeForPartialUnrolling = 50;

  static constexpr size_t kWasmMaxLoopSizeForPartialUnrolling = 80;

  static constexpr size_t kWasmMaxUnrolledLoopSize = 240;

  static constexpr size_t kMaxLoopIterationsForFullUnrolling = 4;

  static constexpr size_t kMaxPartialUnrollingCount = 4;

  static constexpr size_t kMaxIterForStackCheckRemoval = 5000;


 private:

  void DetectUnrollableLoops();

  IterationCount GetLoopIterationCount(const LoopFinder::LoopInfo& info) const;


  Graph* input_graph_;

  OperationMatcher matcher_;

  LoopFinder loop_finder_;

  // {loop_iteration_count_} maps loop headers to number of iterations. It

  // doesn't contain entries for loops for which we don't know the number of

  // iterations.

  ZoneUnorderedMap<const Block*, IterationCount> loop_iteration_count_;

  const StaticCanonicalForLoopMatcher canonical_loop_matcher_;

  const bool is_wasm_;

  const size_t kMaxLoopSizeForPartialUnrolling =

      is_wasm_ ? kWasmMaxLoopSizeForPartialUnrolling

               : kJSMaxLoopSizeForPartialUnrolling;

  bool can_unroll_at_least_one_loop_ = false;


  ZoneAbslFlatHashSet<uint32_t>& stack_checks_to_remove_;

};


template <class Next>

class LoopPeelingReducer;


template <class Next>


class LoopStackCheckElisionReducer : public Next {

 public:

  TURBOSHAFT_REDUCER_BOILERPLATE(LoopStackCheckElision)


  void Bind(Block* new_block) {

    Next::Bind(new_block);

    if (!remove_stack_checks_) return;


    if (new_block->IsLoop()) {

      const Block* origin = new_block->OriginForBlockEnd();

      if (origin) {

        if (stack_checks_to_remove_.contains(origin->index().id())) {

          skip_next_stack_check_ = true;

        }

      }

    }

  }


  V<AnyOrNone> REDUCE_INPUT_GRAPH(Call)(V<AnyOrNone> ig_idx,

                                        const CallOp& call) {

    LABEL_BLOCK(no_change) { return Next::ReduceInputGraphCall(ig_idx, call); }

    if (ShouldSkipOptimizationStep()) goto no_change;


    if (skip_next_stack_check_ &&

        call.IsStackCheck(__ input_graph(), broker_,

                          StackCheckKind::kJSIterationBody)) {

      skip_next_stack_check_ = false;

      return {};

    }


    goto no_change;

  }


  V<None> REDUCE_INPUT_GRAPH(JSStackCheck)(V<None> ig_idx,

                                           const JSStackCheckOp& stack_check) {

    if (skip_next_stack_check_ &&

        stack_check.kind == JSStackCheckOp::Kind::kLoop) {

      skip_next_stack_check_ = false;

      return {};

    }

    return Next::ReduceInputGraphJSStackCheck(ig_idx, stack_check);

  }


#if V8_ENABLE_WEBASSEMBLY

  V<None> REDUCE_INPUT_GRAPH(WasmStackCheck)(

      V<None> ig_idx, const WasmStackCheckOp& stack_check) {

    if (skip_next_stack_check_ &&

        stack_check.kind == WasmStackCheckOp::Kind::kLoop) {

      skip_next_stack_check_ = false;

      return {};

    }

    return Next::ReduceInputGraphWasmStackCheck(ig_idx, stack_check);

  }

#endif


 private:

  bool skip_next_stack_check_ = false;


  // The analysis should have ran before the CopyingPhase starts, and stored in

  // `PipelineData::Get().stack_checks_to_remove()` the loops whose stack checks

  // should be removed.

  const ZoneAbslFlatHashSet<uint32_t>& stack_checks_to_remove_ =

      __ input_graph().stack_checks_to_remove();

  bool remove_stack_checks_ = !stack_checks_to_remove_.empty();


  JSHeapBroker* broker_ = __ data() -> broker();

};


template <class Next>


class LoopUnrollingReducer : public Next {

 public:

  TURBOSHAFT_REDUCER_BOILERPLATE(LoopUnrolling)


#if defined(__clang__)

  // LoopUnrolling and LoopPeeling shouldn't be performed in the same phase, see

  // the comment in pipeline.cc where LoopUnrolling is triggered.

  static_assert(!reducer_list_contains<ReducerList, LoopPeelingReducer>::value);


  // TODO(dmercadier): Add static_assert that this is ran as part of a

  // CopyingPhase.

#endif


  V<None> REDUCE_INPUT_GRAPH(Goto)(V<None> ig_idx, const GotoOp& gto) {

    // Note that the "ShouldSkipOptimizationStep" are placed in the parts of

    // this Reduce method triggering the unrolling rather than at the begining.

    // This is because the backedge skipping is not an optimization but a

    // mandatory lowering when unrolling is being performed.

    LABEL_BLOCK(no_change) { return Next::ReduceInputGraphGoto(ig_idx, gto); }


    const Block* dst = gto.destination;

    if (unrolling_ == UnrollingStatus::kNotUnrolling && dst->IsLoop() &&

        !gto.is_backedge) {

      // We trigger unrolling when reaching the GotoOp that jumps to the loop

      // header (note that loop headers only have 2 predecessor, including the

      // backedge), and that isn't the backedge.

      if (ShouldSkipOptimizationStep()) goto no_change;

      if (analyzer_.ShouldRemoveLoop(dst)) {

        RemoveLoop(dst);

        return {};

      } else if (analyzer_.ShouldFullyUnrollLoop(dst)) {

        FullyUnrollLoop(dst);

        return {};

      } else if (analyzer_.ShouldPartiallyUnrollLoop(dst)) {

        PartiallyUnrollLoop(dst);

        return {};

      }

    } else if ((unrolling_ == UnrollingStatus::kUnrolling) &&

               dst == current_loop_header_) {

      // Skipping the backedge of the loop: FullyUnrollLoop and

      // PartiallyUnrollLoop will emit a Goto to the next unrolled iteration.

      return {};

    }

    goto no_change;

  }


  V<None> REDUCE_INPUT_GRAPH(Branch)(V<None> ig_idx, const BranchOp& branch) {

    LABEL_BLOCK(no_change) {

      return Next::ReduceInputGraphBranch(ig_idx, branch);

    }


    if (unrolling_ == UnrollingStatus::kRemoveLoop) {

      // We know that the branch of the final inlined header of a fully unrolled

      // loop never actually goes to the loop, so we can replace it by a Goto

      // (so that the non-unrolled loop doesn't get emitted). We still need to

      // figure out if we should Goto to the true or false side of the BranchOp.

      const Block* header = __ current_block()->OriginForBlockEnd();

      bool is_true_in_loop = analyzer_.GetLoopHeader(branch.if_true) == header;

      bool is_false_in_loop =

          analyzer_.GetLoopHeader(branch.if_false) == header;


      if (is_true_in_loop && !is_false_in_loop) {

        __ Goto(__ MapToNewGraph(branch.if_false));

        return OpIndex::Invalid();

      } else if (is_false_in_loop && !is_true_in_loop) {

        __ Goto(__ MapToNewGraph(branch.if_true));

        return OpIndex::Invalid();

      } else {

        // Both the true and false destinations of this block are in the loop,

        // which means that the exit of the loop is later down the graph. We

        // thus still emit the branch, which will lead to the loop being emitted

        // (unless some other reducers in the stack manage to get rid of the

        // loop).

        DCHECK(is_true_in_loop && is_false_in_loop);

      }

    }

    goto no_change;

  }


  V<AnyOrNone> REDUCE_INPUT_GRAPH(Call)(V<AnyOrNone> ig_idx,

                                        const CallOp& call) {

    LABEL_BLOCK(no_change) { return Next::ReduceInputGraphCall(ig_idx, call); }

    if (ShouldSkipOptimizationStep()) goto no_change;


    if (V8_LIKELY(!IsRunningBuiltinPipeline())) {

      if (skip_next_stack_check_ &&

          call.IsStackCheck(__ input_graph(), broker_,

                            StackCheckKind::kJSIterationBody)) {

        // When we unroll a loop, we get rid of its stack checks. (note that

        // we don't do this for the last folded body of partially unrolled

        // loops so that the loop keeps one stack check).

        return {};

      }

    }


    goto no_change;

  }


  V<None> REDUCE_INPUT_GRAPH(JSStackCheck)(V<None> ig_idx,

                                           const JSStackCheckOp& check) {

    if (ShouldSkipOptimizationStep() || !skip_next_stack_check_) {

      return Next::ReduceInputGraphJSStackCheck(ig_idx, check);

    }

    return V<None>::Invalid();

  }


#if V8_ENABLE_WEBASSEMBLY

  V<None> REDUCE_INPUT_GRAPH(WasmStackCheck)(V<None> ig_idx,

                                             const WasmStackCheckOp& check) {

    if (ShouldSkipOptimizationStep() || !skip_next_stack_check_) {

      return Next::ReduceInputGraphWasmStackCheck(ig_idx, check);

    }

    return V<None>::Invalid();

  }

#endif


 private:


  enum class UnrollingStatus {

    // Not currently unrolling a loop.

    kNotUnrolling,

    // Currently unrolling a loop.

    kUnrolling,

    // We use kRemoveLoop in 2 cases:

    //   - When unrolling is finished and we are currently emitting the header

    //     one last time, and should change its final branch into a Goto.

    //   - We decided to remove a loop and will just emit its header.

    // Both cases are fairly similar: we are currently emitting a loop header,

    // and would like to not emit the loop body that follows.

    kRemoveLoop,

  };


  void RemoveLoop(const Block* header);

  void FullyUnrollLoop(const Block* header);

  void PartiallyUnrollLoop(const Block* header);

  void FixLoopPhis(const Block* input_graph_loop, Block* output_graph_loop,

                   const Block* backedge_block);


  bool IsRunningBuiltinPipeline() {

    return __ data() -> pipeline_kind() == TurboshaftPipelineKind::kCSA;

  }


  bool StopUnrollingIfUnreachable(

      std::optional<Block*> output_graph_header = std::nullopt) {

    if (__ generating_unreachable_operations()) {

      // By unrolling the loop, we realized that it was actually exiting early

      // (probably because a Branch inside the loop was using a loop Phi in a

      // condition, and unrolling showed that this loop Phi became true or

      // false), and that lasts iterations were unreachable. We thus don't both

      // unrolling the next iterations of the loop.

      unrolling_ = UnrollingStatus::kNotUnrolling;

      if (output_graph_header.has_value()) {

        // The loop that we're unrolling has a header (which means that we're

        // only partially unrolling), which needs to be turned into a Merge (and

        // its PendingLoopPhis into regular Phis).

        __ FinalizeLoop(*output_graph_header);

      }

      return true;

    }

    return false;

  }


  // The analysis should be ran ahead of time so that the LoopUnrollingPhase

  // doesn't trigger the CopyingPhase if there are no loops to unroll.

  LoopUnrollingAnalyzer& analyzer_ =

      *__ input_graph().loop_unrolling_analyzer();

  // {unrolling_} is true if a loop is currently being unrolled.

  UnrollingStatus unrolling_ = UnrollingStatus::kNotUnrolling;

  bool skip_next_stack_check_ = false;


  const Block* current_loop_header_ = nullptr;

  JSHeapBroker* broker_ = __ data() -> broker();

};


template <class Next>


void LoopUnrollingReducer<Next>::PartiallyUnrollLoop(const Block* header) {

  TRACE("LoopUnrolling: partially unrolling loop at " << header->index().id());

  DCHECK_EQ(unrolling_, UnrollingStatus::kNotUnrolling);

  DCHECK(!skip_next_stack_check_);

  unrolling_ = UnrollingStatus::kUnrolling;


  auto loop_body = analyzer_.GetLoopBody(header);

  current_loop_header_ = header;


  size_t unroll_count = analyzer_.GetPartialUnrollCount(header);

  DCHECK_GT(unroll_count, 0);

  TRACE("> UnrollCount: " << unroll_count);


  ScopedModification<bool> set_true(__ turn_loop_without_backedge_into_merge(),

                                    false);


  // We remove the stack check of all iterations but the last one.

  // Emitting the 1st iteration of the loop (with a proper loop header). We

  // remove the stack check of all iterations except the last one.

  ScopedModification<bool> skip_stack_checks(&skip_next_stack_check_, true);

  TRACE("> Emitting first iteraton (with header)");

  Block* output_graph_header =

      __ CloneSubGraph(loop_body, /* keep_loop_kinds */ true);

  if (StopUnrollingIfUnreachable(output_graph_header)) {

    TRACE("> Next iteration is unreachable, stopping unrolling");

    return;

  }


  // Emitting the subsequent folded iterations. We set `unrolling_` to

  // kUnrolling so that stack checks are skipped.

  unrolling_ = UnrollingStatus::kUnrolling;

  for (size_t i = 0; i < unroll_count - 1; i++) {

    // We remove the stack check of all iterations but the last one.

    TRACE("> Emitting iteration " << i);

    bool is_last_iteration = i == unroll_count - 2;

    ScopedModification<bool> inner_skip_stack_checks(&skip_next_stack_check_,

                                                     !is_last_iteration);


    __ CloneSubGraph(loop_body, /* keep_loop_kinds */ false);

    if (StopUnrollingIfUnreachable(output_graph_header)) {

      TRACE("> Next iteration is unreachable, stopping unrolling");

      return;

    }

  }


  // ReduceInputGraphGoto ignores backedge Gotos while kUnrolling is true, which

  // means that we are still missing the loop's backedge, which we thus emit

  // now.

  DCHECK(output_graph_header->IsLoop());

  Block* backedge_block = __ current_block();

  __ Goto(output_graph_header);

  // We use a custom `FixLoopPhis` because the mapping from old->new is a bit

  // "messed up" by having emitted multiple times the same block. See the

  // comments in `FixLoopPhis` for more details.

  TRACE("> Patching loop phis");

  FixLoopPhis(header, output_graph_header, backedge_block);


  unrolling_ = UnrollingStatus::kNotUnrolling;

  TRACE("> Finished partially unrolling loop " << header->index().id());

}


template <class Next>


void LoopUnrollingReducer<Next>::FixLoopPhis(const Block* input_graph_loop,

                                             Block* output_graph_loop,

                                             const Block* backedge_block) {

  // FixLoopPhis for partially unrolled loops is a bit tricky: the mapping from

  // input Loop Phis to output Loop Phis is in the Variable Snapshot of the

  // header (`output_graph_loop`), but the mapping from the 2nd input of the

  // input graph loop phis to the 2nd input of the output graph loop phis is in

  // the snapshot of the backedge (`backedge_block`).

  // VariableReducer::ReduceGotoOp (which was called right before this function

  // because we emitted the backedge Goto) already set the current snapshot to

  // be at the loop header. So, we start by computing the mapping input loop

  // phis -> output loop phis (using the loop header's snapshot). Then, we

  // restore the backedge snapshot to compute the mapping input graph 2nd phi

  // input to output graph 2nd phi input.

  DCHECK(input_graph_loop->IsLoop());

  DCHECK(output_graph_loop->IsLoop());


  // The mapping InputGraphPhi -> OutputGraphPendingPhi should be retrieved from

  // `output_graph_loop`'s snapshot (the current mapping is for the latest

  // folded loop iteration, not for the loop header).

  __ SealAndSaveVariableSnapshot();

  __ RestoreTemporaryVariableSnapshotAfter(output_graph_loop);

  base::SmallVector<std::pair<const PhiOp*, const OpIndex>, 16> phis;

  for (const Operation& op : __ input_graph().operations(

           input_graph_loop->begin(), input_graph_loop->end())) {

    if (auto* input_phi = op.TryCast<PhiOp>()) {

      OpIndex phi_index =

          __ template MapToNewGraph<true>(__ input_graph().Index(*input_phi));

      if (!phi_index.valid() || !output_graph_loop->Contains(phi_index)) {

        // Unused phis are skipped, so they are not be mapped to anything in

        // the new graph. If the phi is reduced to an operation from a

        // different block, then there is no loop phi in the current loop

        // header to take care of.

        continue;

      }

      phis.push_back({input_phi, phi_index});

    }

  }


  // The mapping for the InputGraphPhi 2nd input should however be retrieved

  // from the last block of the loop.

  __ CloseTemporaryVariableSnapshot();

  __ RestoreTemporaryVariableSnapshotAfter(backedge_block);


  for (auto [input_phi, output_phi_index] : phis) {

    __ FixLoopPhi(*input_phi, output_phi_index, output_graph_loop);

  }


  __ CloseTemporaryVariableSnapshot();

}


template <class Next>


void LoopUnrollingReducer<Next>::RemoveLoop(const Block* header) {

  TRACE("LoopUnrolling: removing loop at " << header->index().id());

  DCHECK_EQ(unrolling_, UnrollingStatus::kNotUnrolling);

  DCHECK(!skip_next_stack_check_);

  // When removing a loop, we still need to emit the header (since it has to

  // always be executed before the 1st iteration anyways), but by setting

  // {unrolling_} to `kRemoveLoop`, the final Branch of the loop will become a

  // Goto to outside the loop.

  unrolling_ = UnrollingStatus::kRemoveLoop;

  __ CloneAndInlineBlock(header);

  unrolling_ = UnrollingStatus::kNotUnrolling;

}


template <class Next>


void LoopUnrollingReducer<Next>::FullyUnrollLoop(const Block* header) {

  TRACE("LoopUnrolling: fully unrolling loop at " << header->index().id());

  DCHECK_EQ(unrolling_, UnrollingStatus::kNotUnrolling);

  DCHECK(!skip_next_stack_check_);

  ScopedModification<bool> skip_stack_checks(&skip_next_stack_check_, true);


  size_t iter_count = analyzer_.GetIterationCount(header).exact_count();

  TRACE("> iter_count: " << iter_count);


  auto loop_body = analyzer_.GetLoopBody(header);

  current_loop_header_ = header;


  unrolling_ = UnrollingStatus::kUnrolling;

  for (size_t i = 0; i < iter_count; i++) {

    TRACE("> Emitting iteration " << i);

    __ CloneSubGraph(loop_body, /* keep_loop_kinds */ false);

    if (StopUnrollingIfUnreachable()) {

      TRACE("> Next iteration is unreachable, stopping unrolling");

      return;

    }

  }


  // The loop actually finishes on the header rather than its last block. We

  // thus inline the header, and we'll replace its final BranchOp by a GotoOp to

  // outside of the loop.

  TRACE("> Emitting the final header");

  unrolling_ = UnrollingStatus::kRemoveLoop;

  __ CloneAndInlineBlock(header);


  unrolling_ = UnrollingStatus::kNotUnrolling;

  TRACE("> Finished fully unrolling loop " << header->index().id());

}


#undef TRACE


#include "src/compiler/turboshaft/undef-assembler-macros.inc"


}  // namespace v8::internal::compiler::turboshaft


#endif  // V8_COMPILER_TURBOSHAFT_LOOP_UNROLLING_REDUCER_H_

phase_zone
Zone * phase_zone
Definition add-type-assertions-reducer.cc:18

__
#define __
Definition baseline-assembler-arm-inl.h:52

REDUCE_INPUT_GRAPH
#define REDUCE_INPUT_GRAPH(operation)

data
union v8::internal::@341::BuiltinMetadata::KindSpecificData data

kind
Builtins::Kind kind
Definition builtins.cc:40

v8::base::SmallVector
Definition small-vector.h:22

v8::base::SmallVector::push_back
void push_back(T x)
Definition small-vector.h:191

v8::internal::Call
Definition ast.h:1769

v8::internal::ZoneAbslFlatHashSet
Definition zone-containers.h:816

v8::internal::ZoneSet
Definition zone-containers.h:675

v8::internal::ZoneUnorderedMap
Definition zone-containers.h:712

v8::internal::Zone
Definition zone.h:43

v8::internal::compiler::JSHeapBroker
Definition js-heap-broker.h:95

v8::internal::compiler::turboshaft::BlockIndex::id
uint32_t id() const
Definition index.h:869

v8::internal::compiler::turboshaft::Block
Definition graph.h:306

v8::internal::compiler::turboshaft::Block::end
OpIndex end() const
Definition graph.h:442

v8::internal::compiler::turboshaft::Block::OriginForBlockEnd
const Block * OriginForBlockEnd() const
Definition graph.h:428

v8::internal::compiler::turboshaft::Block::IsLoop
bool IsLoop() const
Definition graph.h:313

v8::internal::compiler::turboshaft::Block::Contains
bool Contains(OpIndex op_idx) const
Definition graph.h:322

v8::internal::compiler::turboshaft::Block::begin
OpIndex begin() const
Definition graph.h:438

v8::internal::compiler::turboshaft::Block::index
BlockIndex index() const
Definition graph.h:320

v8::internal::compiler::turboshaft::Graph
Definition graph.h:578

v8::internal::compiler::turboshaft::IterationCount
Definition loop-unrolling-reducer.h:39

v8::internal::compiler::turboshaft::IterationCount::IsApprox
bool IsApprox() const
Definition loop-unrolling-reducer.h:71

v8::internal::compiler::turboshaft::IterationCount::IterationCount
IterationCount(Kind kind, size_t count)
Definition loop-unrolling-reducer.h:57

v8::internal::compiler::turboshaft::IterationCount::Approx
static IterationCount Approx(size_t count)
Definition loop-unrolling-reducer.h:48

v8::internal::compiler::turboshaft::IterationCount::Kind
Kind
Definition loop-unrolling-reducer.h:40

v8::internal::compiler::turboshaft::IterationCount::Kind::kUnknown
@ kUnknown

v8::internal::compiler::turboshaft::IterationCount::Kind::kApprox
@ kApprox

v8::internal::compiler::turboshaft::IterationCount::Kind::kExact
@ kExact

v8::internal::compiler::turboshaft::IterationCount::exact_count
size_t exact_count() const
Definition loop-unrolling-reducer.h:61

v8::internal::compiler::turboshaft::IterationCount::IsSmallerThan
bool IsSmallerThan(size_t max)
Definition loop-unrolling-reducer.h:74

v8::internal::compiler::turboshaft::IterationCount::count_
size_t count_
Definition loop-unrolling-reducer.h:80

v8::internal::compiler::turboshaft::IterationCount::kind_
Kind kind_
Definition loop-unrolling-reducer.h:79

v8::internal::compiler::turboshaft::IterationCount::IsUnknown
bool IsUnknown() const
Definition loop-unrolling-reducer.h:72

v8::internal::compiler::turboshaft::IterationCount::Exact
static IterationCount Exact(size_t count)
Definition loop-unrolling-reducer.h:44

v8::internal::compiler::turboshaft::IterationCount::IterationCount
IterationCount(Kind kind)
Definition loop-unrolling-reducer.h:54

v8::internal::compiler::turboshaft::IterationCount::approx_count
size_t approx_count() const
Definition loop-unrolling-reducer.h:65

v8::internal::compiler::turboshaft::IterationCount::IterationCount
IterationCount()
Definition loop-unrolling-reducer.h:53

v8::internal::compiler::turboshaft::IterationCount::Unknown
static IterationCount Unknown()
Definition loop-unrolling-reducer.h:51

v8::internal::compiler::turboshaft::IterationCount::IsExact
bool IsExact() const
Definition loop-unrolling-reducer.h:70

v8::internal::compiler::turboshaft::LoopFinder
Definition loop-finder.h:15

v8::internal::compiler::turboshaft::LoopPeelingReducer
Definition loop-unrolling-reducer.h:300

v8::internal::compiler::turboshaft::LoopStackCheckElisionReducer
Definition loop-unrolling-reducer.h:303

v8::internal::compiler::turboshaft::LoopStackCheckElisionReducer::JSStackCheck
V< None > REDUCE_INPUT_GRAPH JSStackCheck(V< None > ig_idx, const JSStackCheckOp &stack_check)
Definition loop-unrolling-reducer.h:336

v8::internal::compiler::turboshaft::LoopStackCheckElisionReducer::broker_
JSHeapBroker * broker_
Definition loop-unrolling-reducer.h:368

v8::internal::compiler::turboshaft::LoopStackCheckElisionReducer::skip_next_stack_check_
bool skip_next_stack_check_
Definition loop-unrolling-reducer.h:359

v8::internal::compiler::turboshaft::LoopStackCheckElisionReducer::stack_checks_to_remove_
const ZoneAbslFlatHashSet< uint32_t > & stack_checks_to_remove_
Definition loop-unrolling-reducer.h:364

v8::internal::compiler::turboshaft::LoopStackCheckElisionReducer::remove_stack_checks_
bool remove_stack_checks_
Definition loop-unrolling-reducer.h:366

v8::internal::compiler::turboshaft::LoopStackCheckElisionReducer::Bind
void Bind(Block *new_block)
Definition loop-unrolling-reducer.h:307

v8::internal::compiler::turboshaft::LoopUnrollingAnalyzer
Definition loop-unrolling-reducer.h:168

v8::internal::compiler::turboshaft::LoopUnrollingAnalyzer::ShouldRemoveLoop
bool ShouldRemoveLoop(const Block *loop_header) const
Definition loop-unrolling-reducer.h:239

v8::internal::compiler::turboshaft::LoopUnrollingAnalyzer::is_wasm_
const bool is_wasm_
Definition loop-unrolling-reducer.h:290

v8::internal::compiler::turboshaft::LoopUnrollingAnalyzer::input_graph_
Graph * input_graph_
Definition loop-unrolling-reducer.h:282

v8::internal::compiler::turboshaft::LoopUnrollingAnalyzer::stack_checks_to_remove_
ZoneAbslFlatHashSet< uint32_t > & stack_checks_to_remove_
Definition loop-unrolling-reducer.h:296

v8::internal::compiler::turboshaft::LoopUnrollingAnalyzer::ShouldPartiallyUnrollLoop
bool ShouldPartiallyUnrollLoop(const Block *loop_header) const
Definition loop-unrolling-reducer.h:205

v8::internal::compiler::turboshaft::LoopUnrollingAnalyzer::CanUnrollAtLeastOneLoop
bool CanUnrollAtLeastOneLoop() const
Definition loop-unrolling-reducer.h:260

v8::internal::compiler::turboshaft::LoopUnrollingAnalyzer::ShouldFullyUnrollLoop
bool ShouldFullyUnrollLoop(const Block *loop_header) const
Definition loop-unrolling-reducer.h:193

v8::internal::compiler::turboshaft::LoopUnrollingAnalyzer::matcher_
OperationMatcher matcher_
Definition loop-unrolling-reducer.h:283

v8::internal::compiler::turboshaft::LoopUnrollingAnalyzer::LoopUnrollingAnalyzer
LoopUnrollingAnalyzer(Zone *phase_zone, Graph *input_graph, bool is_wasm)
Definition loop-unrolling-reducer.h:182

v8::internal::compiler::turboshaft::LoopUnrollingAnalyzer::loop_finder_
LoopFinder loop_finder_
Definition loop-unrolling-reducer.h:284

v8::internal::compiler::turboshaft::LoopUnrollingAnalyzer::GetIterationCount
IterationCount GetIterationCount(const Block *loop_header) const
Definition loop-unrolling-reducer.h:244

v8::internal::compiler::turboshaft::LoopUnrollingAnalyzer::GetLoopHeader
const Block * GetLoopHeader(const Block *block)
Definition loop-unrolling-reducer.h:256

v8::internal::compiler::turboshaft::LoopUnrollingAnalyzer::GetLoopBody
ZoneSet< const Block *, LoopFinder::BlockCmp > GetLoopBody(const Block *loop_header)
Definition loop-unrolling-reducer.h:251

v8::internal::compiler::turboshaft::LoopUnrollingAnalyzer::GetPartialUnrollCount
size_t GetPartialUnrollCount(const Block *loop_header) const
Definition loop-unrolling-reducer.h:215

v8::internal::compiler::turboshaft::LoopUnrollingAnalyzer::canonical_loop_matcher_
const StaticCanonicalForLoopMatcher canonical_loop_matcher_
Definition loop-unrolling-reducer.h:289

v8::internal::compiler::turboshaft::LoopUnrollingAnalyzer::loop_iteration_count_
ZoneUnorderedMap< const Block *, IterationCount > loop_iteration_count_
Definition loop-unrolling-reducer.h:288

v8::internal::compiler::turboshaft::LoopUnrollingReducer
Definition loop-unrolling-reducer.h:372

v8::internal::compiler::turboshaft::LoopUnrollingReducer::current_loop_header_
const Block * current_loop_header_
Definition loop-unrolling-reducer.h:538

v8::internal::compiler::turboshaft::LoopUnrollingReducer::RemoveLoop
void RemoveLoop(const Block *header)
Definition loop-unrolling-reducer.h:657

v8::internal::compiler::turboshaft::LoopUnrollingReducer::StopUnrollingIfUnreachable
bool StopUnrollingIfUnreachable(std::optional< Block * > output_graph_header=std::nullopt)
Definition loop-unrolling-reducer.h:510

v8::internal::compiler::turboshaft::LoopUnrollingReducer::skip_next_stack_check_
bool skip_next_stack_check_
Definition loop-unrolling-reducer.h:536

v8::internal::compiler::turboshaft::LoopUnrollingReducer::PartiallyUnrollLoop
void PartiallyUnrollLoop(const Block *header)
Definition loop-unrolling-reducer.h:543

v8::internal::compiler::turboshaft::LoopUnrollingReducer::analyzer_
LoopUnrollingAnalyzer & analyzer_
Definition loop-unrolling-reducer.h:532

v8::internal::compiler::turboshaft::LoopUnrollingReducer::Branch
V< None > REDUCE_INPUT_GRAPH Branch(V< None > ig_idx, const BranchOp &branch)
Definition loop-unrolling-reducer.h:418

v8::internal::compiler::turboshaft::LoopUnrollingReducer::Goto
V< None > REDUCE_INPUT_GRAPH Goto(V< None > ig_idx, const GotoOp &gto)
Definition loop-unrolling-reducer.h:385

v8::internal::compiler::turboshaft::LoopUnrollingReducer::broker_
JSHeapBroker * broker_
Definition loop-unrolling-reducer.h:539

v8::internal::compiler::turboshaft::LoopUnrollingReducer::UnrollingStatus
UnrollingStatus
Definition loop-unrolling-reducer.h:489

v8::internal::compiler::turboshaft::LoopUnrollingReducer::UnrollingStatus::kNotUnrolling
@ kNotUnrolling

v8::internal::compiler::turboshaft::LoopUnrollingReducer::UnrollingStatus::kUnrolling
@ kUnrolling

v8::internal::compiler::turboshaft::LoopUnrollingReducer::UnrollingStatus::kRemoveLoop
@ kRemoveLoop

v8::internal::compiler::turboshaft::LoopUnrollingReducer::FixLoopPhis
void FixLoopPhis(const Block *input_graph_loop, Block *output_graph_loop, const Block *backedge_block)
Definition loop-unrolling-reducer.h:605

v8::internal::compiler::turboshaft::LoopUnrollingReducer::FullyUnrollLoop
void FullyUnrollLoop(const Block *header)
Definition loop-unrolling-reducer.h:671

v8::internal::compiler::turboshaft::LoopUnrollingReducer::IsRunningBuiltinPipeline
bool IsRunningBuiltinPipeline()
Definition loop-unrolling-reducer.h:507

v8::internal::compiler::turboshaft::LoopUnrollingReducer::unrolling_
UnrollingStatus unrolling_
Definition loop-unrolling-reducer.h:535

v8::internal::compiler::turboshaft::LoopUnrollingReducer::JSStackCheck
V< None > REDUCE_INPUT_GRAPH JSStackCheck(V< None > ig_idx, const JSStackCheckOp &check)
Definition loop-unrolling-reducer.h:470

v8::internal::compiler::turboshaft::OpIndex
Definition index.h:39

v8::internal::compiler::turboshaft::OpIndex::Invalid
static constexpr OpIndex Invalid()
Definition index.h:88

v8::internal::compiler::turboshaft::OpIndex::valid
constexpr bool valid() const
Definition index.h:86

v8::internal::compiler::turboshaft::OperationMatcher
Definition operation-matcher.h:85

v8::internal::compiler::turboshaft::ScopedModification
Definition utils.h:90

v8::internal::compiler::turboshaft::StaticCanonicalForLoopMatcher
Definition loop-unrolling-reducer.h:84

v8::internal::compiler::turboshaft::StaticCanonicalForLoopMatcher::CmpOp
CmpOp
Definition loop-unrolling-reducer.h:106

v8::internal::compiler::turboshaft::StaticCanonicalForLoopMatcher::matcher_
const OperationMatcher & matcher_
Definition loop-unrolling-reducer.h:154

v8::internal::compiler::turboshaft::StaticCanonicalForLoopMatcher::BinOp
BinOp
Definition loop-unrolling-reducer.h:119

v8::internal::compiler::turboshaft::StaticCanonicalForLoopMatcher::StaticCanonicalForLoopMatcher
StaticCanonicalForLoopMatcher(const OperationMatcher &matcher)
Definition loop-unrolling-reducer.h:100

v8::internal::compiler::turboshaft::V
Definition index.h:607

v8::internal::compiler::turboshaft::V::Invalid
static V Invalid()
Definition index.h:629

v8::internal::compiler::turboshaft::WordRepresentation
Definition representations.h:427

globals.h

assembler.h

TURBOSHAFT_REDUCER_BOILERPLATE
#define TURBOSHAFT_REDUCER_BOILERPLATE(Name)
Definition assembler.h:823

LABEL_BLOCK
#define LABEL_BLOCK(label)
Definition assembler.h:910

copying-phase.h

define-assembler-macros.inc

pipeline_kind
TurboshaftPipelineKind pipeline_kind
Definition graph-builder.cc:75

broker
JSHeapBroker * broker
Definition graph-builder.cc:68

index.h

TRACE
#define TRACE(...)
Definition jump-threading.cc:12

loop-finder.h

machine-optimization-reducer.h

v8::internal::compiler::turboshaft
Definition builtins.h:33

v8::internal::compiler::turboshaft::operator<<
std::ostream & operator<<(std::ostream &os, PaddingSpace padding)
Definition copying-phase.cc:18

v8::internal::compiler::turboshaft::any_of
any_of(const Args &...) -> any_of< Args... >

v8::internal::compiler::turboshaft::ShouldSkipOptimizationStep
V8_EXPORT_PRIVATE bool ShouldSkipOptimizationStep()
Definition utils.h:84

v8::internal::compiler::turboshaft::TurboshaftPipelineKind::kCSA
@ kCSA

v8::internal::compiler::StackCheckKind::kJSIterationBody
@ kJSIterationBody

v8::internal::compiler::kSignedLessThanOrEqual
@ kSignedLessThanOrEqual
Definition instruction-codes.h:241

v8::internal::compiler::kSignedGreaterThanOrEqual
@ kSignedGreaterThanOrEqual
Definition instruction-codes.h:240

v8::internal::compiler::kUnsignedLessThanOrEqual
@ kUnsignedLessThanOrEqual
Definition instruction-codes.h:245

v8::internal::compiler::kSignedGreaterThan
@ kSignedGreaterThan
Definition instruction-codes.h:242

v8::internal::compiler::kSignedLessThan
@ kSignedLessThan
Definition instruction-codes.h:239

v8::internal::compiler::kUnsignedGreaterThanOrEqual
@ kUnsignedGreaterThanOrEqual
Definition instruction-codes.h:244

v8::internal
Definition api-arguments-inl.h:20

v8::internal::kUnsignedLessThan
@ kUnsignedLessThan
Definition constants-arm.h:116

v8::internal::kUnsignedGreaterThan
@ kUnsignedGreaterThan
Definition constants-arm.h:117

v8::internal::kEqual
@ kEqual
Definition constants-arm.h:110

operations.h

logging.h

DCHECK_NE
#define DCHECK_NE(v1, v2)
Definition logging.h:486

DCHECK
#define DCHECK(condition)
Definition logging.h:482

DCHECK_EQ
#define DCHECK_EQ(v1, v2)
Definition logging.h:485

DCHECK_GT
#define DCHECK_GT(v1, v2)
Definition logging.h:487

V8_EXPORT_PRIVATE
#define V8_EXPORT_PRIVATE
Definition macros.h:460

v8::internal::compiler::turboshaft::BranchOp
Definition operations.h:4417

v8::internal::compiler::turboshaft::CallOp
Definition operations.h:4057

v8::internal::compiler::turboshaft::ComparisonOp::Kind
Kind
Definition operations.h:2078

v8::internal::compiler::turboshaft::GotoOp
Definition operations.h:4399

v8::internal::compiler::turboshaft::JSStackCheckOp
Definition operations.h:3561

v8::internal::compiler::turboshaft::JSStackCheckOp::Kind::kLoop
@ kLoop

v8::internal::compiler::turboshaft::LoopFinder::LoopInfo
Definition loop-finder.h:46

v8::internal::compiler::turboshaft::LoopFinder::LoopInfo::has_inner_loops
bool has_inner_loops
Definition loop-finder.h:49

v8::internal::compiler::turboshaft::LoopFinder::LoopInfo::op_count
size_t op_count
Definition loop-finder.h:52

v8::internal::compiler::turboshaft::Operation
Definition operations.h:934

v8::internal::compiler::turboshaft::OverflowCheckedBinopOp::Kind
Kind
Definition operations.h:1837

v8::internal::compiler::turboshaft::PhiOp
Definition operations.h:2544

v8::internal::compiler::turboshaft::WordBinopOp::Kind
Kind
Definition operations.h:1581

v8::internal::compiler::turboshaft::reducer_list_contains
Definition reducer-traits.h:32

v8::internal::count
Definition v8-fast-api-calls.h:511

phase.h

undef-assembler-macros.inc

V8_LIKELY
#define V8_LIKELY(condition)
Definition v8config.h:661