duplication-optimization-reducer_8h_source.html

// Copyright 2022 the V8 project authors. All rights reserved.

// Use of this source code is governed by a BSD-style license that can be

// found in the LICENSE file.


#ifndef V8_COMPILER_TURBOSHAFT_DUPLICATION_OPTIMIZATION_REDUCER_H_

#define V8_COMPILER_TURBOSHAFT_DUPLICATION_OPTIMIZATION_REDUCER_H_


#include "src/compiler/turboshaft/assembler.h"

#include "src/compiler/turboshaft/graph.h"

#include "src/compiler/turboshaft/index.h"

#include "src/compiler/turboshaft/operations.h"

#include "src/compiler/turboshaft/value-numbering-reducer.h"


namespace v8::internal::compiler::turboshaft {


// DuplicationOptimizationReducer introduces duplication where this can be

// beneficial for generated code. It should run late in the pipeline so that the

// duplication isn't optimized away by some other phases (such as GVN).

//

// In particular, it introduces duplication in 2 places:

//

// 1. Branch condition duplication: it tries to ensure that the condition nodes

// of branches are used only once (under some conditions). When it finds a

// branch node whose condition has multiples uses, this condition is duplicated.

//

// Doing this enables the InstructionSelector to generate more efficient code

// for branches. For instance, consider this code:

//

//     c = a + b;

//     if (c == 0) { /* some code */ }

//     if (c == 0) { /* more code */ }

//

// Then the generated code will be something like (using registers "ra" for "a"

// and "rb" for "b", and "rt" a temporary register):

//

//     add ra, rb  ; a + b

//     cmp ra, 0   ; a + b == 0

//     sete rt     ; rt = (a + b == 0)

//     cmp rt, 0   ; rt == 0

//     jz

//     ...

//     cmp rt, 0   ; rt == 0

//     jz

//

// As you can see, TurboFan materialized the == bit into a temporary register.

// However, since the "add" instruction sets the ZF flag (on x64), it can be

// used to determine wether the jump should be taken or not. The code we'd like

// to generate instead if thus:

//

//     add ra, rb

//     jnz

//     ...

//     add ra, rb

//     jnz

//

// However, this requires to generate twice the instruction "add ra, rb". Due to

// how virtual registers are assigned in TurboFan (there is a map from node ID

// to virtual registers), both "add" instructions will use the same virtual

// register as output, which will break SSA.

//

// In order to overcome this issue, BranchConditionDuplicator duplicates branch

// conditions that are used more than once, so that they can be generated right

// before each branch without worrying about breaking SSA.

//

// 2. Load/Store flexible second operand duplication: on Arm64, it tries to

// duplicate the "index" input of Loads/Stores when it's a shift by a constant.

// This allows the Instruction Selector to compute said shift using a flexible

// second operand, which in most cases on recent Arm64 CPUs should be for free.


#include "src/compiler/turboshaft/define-assembler-macros.inc"


template <class Next>


class DuplicationOptimizationReducer : public Next {

 public:

  TURBOSHAFT_REDUCER_BOILERPLATE(DuplucationOptimization)


  V<None> REDUCE_INPUT_GRAPH(Branch)(V<None> ig_index, const BranchOp& branch) {

    LABEL_BLOCK(no_change) {

      return Next::ReduceInputGraphBranch(ig_index, branch);

    }

    if (ShouldSkipOptimizationStep()) goto no_change;


    const Operation& cond = __ input_graph().Get(branch.condition());

    V<Word32> new_cond;

    if (!MaybeDuplicateCond(cond, branch.condition(), &new_cond)) {

      goto no_change;

    }


    DCHECK(new_cond.valid());

    __ Branch(new_cond, __ MapToNewGraph(branch.if_true),

              __ MapToNewGraph(branch.if_false), branch.hint);

    return V<None>::Invalid();

  }


  V<Any> REDUCE_INPUT_GRAPH(Select)(V<Any> ig_index, const SelectOp& select) {

    LABEL_BLOCK(no_change) {

      return Next::ReduceInputGraphSelect(ig_index, select);

    }

    if (ShouldSkipOptimizationStep()) goto no_change;


    const Operation& cond = __ input_graph().Get(select.cond());

    V<Word32> new_cond;

    if (!MaybeDuplicateCond(cond, select.cond(), &new_cond)) goto no_change;


    DCHECK(new_cond.valid());

    return __ Select(new_cond, __ MapToNewGraph(select.vtrue()),

                     __ MapToNewGraph(select.vfalse()), select.rep, select.hint,

                     select.implem);

  }


#if V8_TARGET_ARCH_ARM64

  // TODO(dmercadier): duplicating a shift to use a flexible second operand is

  // not always worth it; this depends mostly on the CPU, the kind of shift, and

  // the size of the loaded/stored data. Ideally, we would have cost models for

  // all the CPUs we target, and use those to decide to duplicate shifts or not.

  OpIndex REDUCE(Load)(OpIndex base, OptionalOpIndex index, LoadOp::Kind kind,

                       MemoryRepresentation loaded_rep,

                       RegisterRepresentation result_rep, int32_t offset,

                       uint8_t element_size_log2) {

    if (offset == 0 && element_size_log2 == 0 && index.valid()) {

      index = MaybeDuplicateOutputGraphShift(index.value());

    }

    return Next::ReduceLoad(base, index, kind, loaded_rep, result_rep, offset,

                            element_size_log2);

  }


  OpIndex REDUCE(Store)(OpIndex base, OptionalOpIndex index, OpIndex value,

                        StoreOp::Kind kind, MemoryRepresentation stored_rep,

                        WriteBarrierKind write_barrier, int32_t offset,

                        uint8_t element_size_log2,

                        bool maybe_initializing_or_transitioning,

                        IndirectPointerTag maybe_indirect_pointer_tag) {

    if (offset == 0 && element_size_log2 == 0 && index.valid()) {

      index = MaybeDuplicateOutputGraphShift(index.value());

    }

    return Next::ReduceStore(base, index, value, kind, stored_rep,

                             write_barrier, offset, element_size_log2,

                             maybe_initializing_or_transitioning,

                             maybe_indirect_pointer_tag);

  }

#endif


 private:


  bool MaybeDuplicateCond(const Operation& cond, OpIndex input_idx,

                          V<Word32>* new_cond) {

    if (cond.saturated_use_count.IsOne()) return false;


    switch (cond.opcode) {

      case Opcode::kComparison:

        *new_cond =

            MaybeDuplicateComparison(cond.Cast<ComparisonOp>(), input_idx);

        break;

      case Opcode::kWordBinop:

        *new_cond =

            MaybeDuplicateWordBinop(cond.Cast<WordBinopOp>(), input_idx);

        break;

      case Opcode::kShift:

        *new_cond = MaybeDuplicateShift(cond.Cast<ShiftOp>(), input_idx);

        break;

      default:

        return false;

    }

    return new_cond->valid();

  }


  bool MaybeCanDuplicateGenericBinop(OpIndex input_idx, OpIndex left,

                                     OpIndex right) {

    if (__ input_graph().Get(left).saturated_use_count.IsOne() &&

        __ input_graph().Get(right).saturated_use_count.IsOne()) {

      // We don't duplicate binops when all of their inputs are used a single

      // time (this would increase register pressure by keeping 2 values alive

      // instead of 1).

      return false;

    }

    OpIndex binop_output_idx = __ MapToNewGraph(input_idx);

    if (__ Get(binop_output_idx).saturated_use_count.IsZero()) {

      // This is the 1st use of {binop} in the output graph, so there is no need

      // to duplicate it just yet.

      return false;

    }

    return true;

  }


  OpIndex MaybeDuplicateWordBinop(const WordBinopOp& binop, OpIndex input_idx) {

    if (!MaybeCanDuplicateGenericBinop(input_idx, binop.left(),

                                       binop.right())) {

      return OpIndex::Invalid();

    }


    switch (binop.kind) {

      case WordBinopOp::Kind::kSignedDiv:

      case WordBinopOp::Kind::kUnsignedDiv:

      case WordBinopOp::Kind::kSignedMod:

      case WordBinopOp::Kind::kUnsignedMod:

        // These operations are somewhat expensive, and duplicating them is

        // probably not worth it.

        return OpIndex::Invalid();

      default:

        break;

    }


    DisableValueNumbering disable_gvn(this);

    return __ WordBinop(__ MapToNewGraph(binop.left()),

                        __ MapToNewGraph(binop.right()), binop.kind, binop.rep);

  }


  V<Word32> MaybeDuplicateComparison(const ComparisonOp& comp,

                                     OpIndex input_idx) {

    if (!MaybeCanDuplicateGenericBinop(input_idx, comp.left(), comp.right())) {

      return {};

    }


    DisableValueNumbering disable_gvn(this);

    return __ Comparison(__ MapToNewGraph(comp.left()),

                         __ MapToNewGraph(comp.right()), comp.kind, comp.rep);

  }


  OpIndex MaybeDuplicateShift(const ShiftOp& shift, OpIndex input_idx) {

    if (!MaybeCanDuplicateGenericBinop(input_idx, shift.left(),

                                       shift.right())) {

      return OpIndex::Invalid();

    }


    DisableValueNumbering disable_gvn(this);

    return __ Shift(__ MapToNewGraph(shift.left()),

                    __ MapToNewGraph(shift.right()), shift.kind, shift.rep);

  }


  OpIndex MaybeDuplicateOutputGraphShift(OpIndex index) {

    V<Word> shifted;

    int shifted_by;

    ShiftOp::Kind shift_kind;

    WordRepresentation shift_rep;

    if (__ matcher().MatchConstantShift(index, &shifted, &shift_kind,

                                        &shift_rep, &shifted_by) &&

        !__ matcher().Get(index).saturated_use_count.IsZero()) {

      // We don't check the use count of {shifted}, because it might have uses

      // in the future that haven't been emitted yet.

      DisableValueNumbering disable_gvn(this);

      return __ Shift(shifted, __ Word32Constant(shifted_by), shift_kind,

                      shift_rep);

    }

    return index;

  }


};


#include "src/compiler/turboshaft/undef-assembler-macros.inc"


}  // namespace v8::internal::compiler::turboshaft


#endif  // V8_COMPILER_TURBOSHAFT_DUPLICATION_OPTIMIZATION_REDUCER_H_

__
#define __
Definition baseline-assembler-arm-inl.h:52

REDUCE
#define REDUCE(operation)

REDUCE_INPUT_GRAPH
#define REDUCE_INPUT_GRAPH(operation)

kind
Builtins::Kind kind
Definition builtins.cc:40

v8::internal::compiler::turboshaft::DisableValueNumbering
Definition value-numbering-reducer.h:87

v8::internal::compiler::turboshaft::DuplicationOptimizationReducer
Definition duplication-optimization-reducer.h:73

v8::internal::compiler::turboshaft::DuplicationOptimizationReducer::MaybeDuplicateCond
bool MaybeDuplicateCond(const Operation &cond, OpIndex input_idx, V< Word32 > *new_cond)
Definition duplication-optimization-reducer.h:144

v8::internal::compiler::turboshaft::DuplicationOptimizationReducer::MaybeDuplicateWordBinop
OpIndex MaybeDuplicateWordBinop(const WordBinopOp &binop, OpIndex input_idx)
Definition duplication-optimization-reducer.h:184

v8::internal::compiler::turboshaft::DuplicationOptimizationReducer::Select
V< Any > REDUCE_INPUT_GRAPH Select(V< Any > ig_index, const SelectOp &select)
Definition duplication-optimization-reducer.h:95

v8::internal::compiler::turboshaft::DuplicationOptimizationReducer::MaybeDuplicateComparison
V< Word32 > MaybeDuplicateComparison(const ComparisonOp &comp, OpIndex input_idx)
Definition duplication-optimization-reducer.h:207

v8::internal::compiler::turboshaft::DuplicationOptimizationReducer::MaybeDuplicateOutputGraphShift
OpIndex MaybeDuplicateOutputGraphShift(OpIndex index)
Definition duplication-optimization-reducer.h:229

v8::internal::compiler::turboshaft::DuplicationOptimizationReducer::MaybeDuplicateShift
OpIndex MaybeDuplicateShift(const ShiftOp &shift, OpIndex input_idx)
Definition duplication-optimization-reducer.h:218

v8::internal::compiler::turboshaft::DuplicationOptimizationReducer::MaybeCanDuplicateGenericBinop
bool MaybeCanDuplicateGenericBinop(OpIndex input_idx, OpIndex left, OpIndex right)
Definition duplication-optimization-reducer.h:166

v8::internal::compiler::turboshaft::DuplicationOptimizationReducer::Branch
V< None > REDUCE_INPUT_GRAPH Branch(V< None > ig_index, const BranchOp &branch)
Definition duplication-optimization-reducer.h:77

v8::internal::compiler::turboshaft::MemoryRepresentation
Definition representations.h:513

v8::internal::compiler::turboshaft::OpIndex
Definition index.h:39

v8::internal::compiler::turboshaft::OpIndex::Invalid
static constexpr OpIndex Invalid()
Definition index.h:88

v8::internal::compiler::turboshaft::OpIndex::valid
constexpr bool valid() const
Definition index.h:86

v8::internal::compiler::turboshaft::OptionalOpIndex
Definition index.h:163

v8::internal::compiler::turboshaft::RegisterRepresentation
Definition representations.h:210

v8::internal::compiler::turboshaft::SaturatedUint8::IsOne
bool IsOne() const
Definition operations.h:896

v8::internal::compiler::turboshaft::V
Definition index.h:607

v8::internal::compiler::turboshaft::V::rep
static constexpr auto rep
Definition index.h:618

v8::internal::compiler::turboshaft::V::Invalid
static V Invalid()
Definition index.h:629

v8::internal::compiler::turboshaft::WordRepresentation
Definition representations.h:427

assembler.h

TURBOSHAFT_REDUCER_BOILERPLATE
#define TURBOSHAFT_REDUCER_BOILERPLATE(Name)
Definition assembler.h:823

LABEL_BLOCK
#define LABEL_BLOCK(label)
Definition assembler.h:910

define-assembler-macros.inc

graph.h

index.h

base
OpIndex base
Definition instruction-selector-ia32.cc:65

offset
int32_t offset
Definition instruction-selector-ia32.cc:67

v8::internal::compiler::turboshaft
Definition builtins.h:33

v8::internal::compiler::turboshaft::Get
V8_INLINE const Operation & Get(const Graph &graph, OpIndex index)
Definition graph.h:1231

v8::internal::compiler::turboshaft::ShouldSkipOptimizationStep
V8_EXPORT_PRIVATE bool ShouldSkipOptimizationStep()
Definition utils.h:84

v8::internal::compiler::WriteBarrierKind
WriteBarrierKind
Definition write-barrier-kind.h:17

v8::internal::index
int index
Definition heap-snapshot-generator.cc:1670

v8::internal::IndirectPointerTag
IndirectPointerTag
Definition indirect-pointer-tag.h:108

v8::internal::Shift
Shift
Definition constants-arm64.h:378

v8::internal::value
return value
Definition map-inl.h:893

v8::Load
i::Address Load(i::Address address)
Definition unwinder.cc:19

operations.h

DCHECK
#define DCHECK(condition)
Definition logging.h:482

v8::internal::compiler::turboshaft::BranchOp
Definition operations.h:4417

v8::internal::compiler::turboshaft::ComparisonOp
Definition operations.h:2077

v8::internal::compiler::turboshaft::ComparisonOp::rep
RegisterRepresentation rep
Definition operations.h:2086

v8::internal::compiler::turboshaft::ComparisonOp::right
V< T > right() const
Definition operations.h:2106

v8::internal::compiler::turboshaft::ComparisonOp::kind
Kind kind
Definition operations.h:2085

v8::internal::compiler::turboshaft::ComparisonOp::left
V< T > left() const
Definition operations.h:2101

v8::internal::compiler::turboshaft::LoadOp::Kind
Definition operations.h:2858

v8::internal::compiler::turboshaft::None
Definition index.h:215

v8::internal::compiler::turboshaft::Operation
Definition operations.h:934

v8::internal::compiler::turboshaft::Operation::saturated_use_count
SaturatedUint8 saturated_use_count
Definition operations.h:952

v8::internal::compiler::turboshaft::Operation::opcode
const Opcode opcode
Definition operations.h:944

v8::internal::compiler::turboshaft::Operation::Cast
underlying_operation_t< Op > & Cast()
Definition operations.h:980

v8::internal::compiler::turboshaft::SelectOp
Definition operations.h:2502

v8::internal::compiler::turboshaft::ShiftOp
Definition operations.h:2008

v8::internal::compiler::turboshaft::ShiftOp::kind
Kind kind
Definition operations.h:2017

v8::internal::compiler::turboshaft::ShiftOp::right
V< Word32 > right() const
Definition operations.h:2038

v8::internal::compiler::turboshaft::ShiftOp::rep
WordRepresentation rep
Definition operations.h:2018

v8::internal::compiler::turboshaft::ShiftOp::left
V< WordT > left() const
Definition operations.h:2034

v8::internal::compiler::turboshaft::WordBinopOp
Definition operations.h:1580

v8::internal::compiler::turboshaft::WordBinopOp::rep
WordRepresentation rep
Definition operations.h:1596

v8::internal::compiler::turboshaft::WordBinopOp::Kind::kUnsignedDiv
@ kUnsignedDiv

v8::internal::compiler::turboshaft::WordBinopOp::Kind::kSignedDiv
@ kSignedDiv

v8::internal::compiler::turboshaft::WordBinopOp::Kind::kSignedMod
@ kSignedMod

v8::internal::compiler::turboshaft::WordBinopOp::Kind::kUnsignedMod
@ kUnsignedMod

v8::internal::compiler::turboshaft::WordBinopOp::left
V< WordType > left() const
Definition operations.h:1610

v8::internal::compiler::turboshaft::WordBinopOp::right
V< WordType > right() const
Definition operations.h:1616

v8::internal::compiler::turboshaft::WordBinopOp::kind
Kind kind
Definition operations.h:1595

value-numbering-reducer.h

undef-assembler-macros.inc