string-builder-optimizer_8h_source.html

// Copyright 2022 the V8 project authors. All rights reserved.

// Use of this source code is governed by a BSD-style license that can be

// found in the LICENSE file.


#ifndef V8_COMPILER_STRING_BUILDER_OPTIMIZER_H_

#define V8_COMPILER_STRING_BUILDER_OPTIMIZER_H_


#include <cstdint>

#include <optional>

#include <unordered_map>

#include <vector>


#include "src/base/macros.h"

#include "src/compiler/graph-assembler.h"

#include "src/compiler/graph-reducer.h"

#include "src/compiler/js-graph.h"

#include "src/compiler/machine-operator.h"

#include "src/compiler/node-marker.h"

#include "src/compiler/node.h"

#include "src/compiler/schedule.h"

#include "src/zone/zone-containers.h"


namespace v8 {

namespace internal {


class Zone;


namespace compiler {


class JSGraphAssembler;

class NodeOriginTable;

class Schedule;

class SourcePositionTable;

class JSHeapBroker;


// StringBuilderOptimizer aims at avoid ConsString for some loops that build

// strings, and instead update a mutable over-allocated backing store, while

// keeping a (mutable) SlicedString to the valid part of the backing store.

//

// StringBuilderOptimizer only does the analysis: it finds out which nodes could

// benefit from this optimization. Then, EffectControlLinearizer actually

// applies the optimization to the graph.

//

// A typical example of what the StringBuilderOptimizer can optimize is:

//

//    let s = "";

//    for (...) {

//        s += "...";

//    }

//

// In general, for a series of concatenations to be optimized, they need:

//   - To start on a single initial concatenation

//   - All the concatenations in the string builder must have constant strings

//     or String.FromCharCode on their right-hand side.

//   - At least one of the concatenation must be in a loop.

//

// Because everything is nicer with a picture, here is one of what kind of

// patterns the StringBuilderOptimizer tries to optimize:

//

//            +--------+

//            |kLiteral|

//            +--------+

//                |

//                |

//                v

//         +-------------+          +--------+

//         |kStringConcat| <------- |kLiteral|

//         +-------------+          +--------+

//                |

//                |

//                v

//           optionally,

//        more kStringConcat

//                |

//                |

//                v

//             +----+

//    -------->|kPhi|------------------------------------------

//    |        +----+                                         |

//    |           |                                           |

//    |           |                                           |

//    |           |                                           |

//    |           |                                           |

//    |           |                                           |

//    |           |                                           |

//    |           |                                           |

//    |           v                                           |

//    |    +-------------+          +--------+                |

//    |    |kStringConcat| <------- |kLiteral|                |

//    |    +-------------+          +--------+                |

//    |           |                                           |

//    |           |                                           |

//    |           v                                           |

//    |      optionally,                                      v

//    |   more kStringConcat                            optionally,

//    |           |                                   more kStringConcat

//    |           |                                   or more kPhi/loops

//    |           |                                           |

//    ------------|                                           |

//                                                            |

//                                                            |

//                                                            v

//

// Where "kLiteral" actually means "either a string literal (HeapConstant) or a

// StringFromSingleCharCode". And kStringConcat can also be kNewConsString (when

// the size of the concatenation is known to be more than 13 bytes, Turbofan's

// front-end generates kNewConsString opcodes rather than kStringConcat).

// The StringBuilder also supports merge phis. For instance:

//

//                                  +--------+

//                                  |kLiteral|

//                                  +--------+

//                                      |

//                                      |

//                                      v

//                               +-------------+          +--------+

//                               |kStringConcat| <------- |kLiteral|

//                               +-------------+          +--------+

//                                  |       |

//                                  |       |

//                                  |       |

//                  +---------------+       +---------------+

//                  |                                       |

//                  |                                       |

//                  v                                       v

//           +-------------+                         +-------------+

//           |kStringConcat|                         |kStringConcat|

//           +-------------+                         +-------------+

//                  |                                       |

//                  |                                       |

//                  |                                       |

//                  +---------------+       +---------------+

//                                  |       |

//                                  |       |

//                                  v       v

//                               +-------------+

//                               |    kPhi     |

//                               |   (merge)   |

//                               +-------------+

//                                      |

//                                      |

//                                      v

//

// (and, of course, loops and merge can be mixed).


class OneOrTwoByteAnalysis final {

  // The class OneOrTwoByteAnalysis is used to try to statically determine

  // whether a string constant or StringFromSingleCharCode is a 1-byte or a

  // 2-byte string.

  // If we succeed to do this analysis for all of the nodes in a string builder,

  // then we know statically whether this string builder is building a 1-byte or

  // a 2-byte string, and we can optimize the generated code to remove all

  // 1-byte/2-byte checks.

 public:


  OneOrTwoByteAnalysis(TFGraph* graph, Zone* zone, JSHeapBroker* broker)

      : states_(graph->NodeCount(), State::kUnknown, zone), broker_(broker) {}


  enum class State : uint8_t {

    kUnknown,  // Not yet determined if the string is 1 or 2-bytes

    kOneByte,  // Only 1-byte strings in the string builder

    kTwoByte,  // At least one 2-byte string in the string builder

    kCantKnow  // Cannot determine statically if the string will be 1 or 2-bytes


    // Lattice of possible transitions:

    //

    //      kUnknown

    //      /     | \

    //     /      |  \

    //    v       |   \

    //  kOneByte  |    |

    //  |   |     |    |

    //  |   |     |    |

    //  |   v     v    |

    //  |   kTwoByte   |

    //  |      |      /

    //   \     |     /

    //    v    v    v

    //     kCantKnow

    //

    // Which means that for instance it's possible to realize that a kUnknown

    // string builder will produce a 1-byte string, and we can later realize

    // that it will instead be a 2-byte string. Or, we could be in kOneByte

    // state, and then realize that the string may or may not end up being

    // 2-byte, so we'll move to kCantKnow state.

  };


  // Computes and returns a State reflecting whether {node} is a 1-byte or

  // 2-byte string.

  State OneOrTwoByte(Node* node);


  // Computes whether the string builder will be on 1-byte or 2-byte if it

  // contains two nodes that have states {a} and {b}. For instance, if both {a}

  // and {b} are kOneByte, ConcatResultIsOneOrTwoByte returns kOneByte.

  static State ConcatResultIsOneOrTwoByte(State a, State b);


 private:

  // Returns the positive integral range that {node} can take. If {node} can be

  // negative or is not a number, returns nullopt. If the range exceeds 2**32,

  // returns nullopt as well. The analysis of TryGetRange is not complete (some

  // operators are ignored), so if {node} isn't handled, then nullopt is

  // returned. If this function returns a range between 0 and 255, then we

  // assume that calling StringFromSingleCharCode on {node} will produce a

  // 1-byte string. The analysis is sound (it doesn't make mistake), but is not

  // complete (it bails out (returns nullopt) on operators that are not

  // handled).

  std::optional<std::pair<int64_t, int64_t>> TryGetRange(Node* node);


  JSHeapBroker* broker() { return broker_; }


  ZoneVector<State> states_;

  JSHeapBroker* broker_;

};


class V8_EXPORT_PRIVATE StringBuilderOptimizer final {

 public:

  StringBuilderOptimizer(JSGraph* jsgraph, Schedule* schedule, Zone* temp_zone,

                         JSHeapBroker* broker);


  // Returns true if some trimming code should be inserted at the beginning of

  // {block} to finalize some string builders.

  bool BlockShouldFinalizeStringBuilders(BasicBlock* block);

  // Returns which nodes should be trimmed at the beginning of {block} to

  // finalize some string builders.

  ZoneVector<Node*> GetStringBuildersToFinalize(BasicBlock* block);


  // Returns true if {node} is the last node of a StringBuilder (which means

  // that trimming code should be inserted after {node}).

  // Note that string builders cannot end in the middle of a loop (unless it was

  // started in the same loop). The way it's enforced is that when we first

  // visit a loop Phi that could be part of a String Builder, we set its status

  // to State::kPendingPhi. Only once we've visited the whole loop and the

  // backedge and that the use chain following the loop phi up to and including

  // the backedge are valid as part of a String Builder, then the loop phi

  // status is siwtched to State::kInStringBuilder. Then, in the final step

  // where we switch the status to State::kConfirmedInStringBuilder, we ignore

  // nodes that have a status that isn't kInStringBuilder, which means that we

  // ignore loop phis that still have the kPendingPhi status (and their

  // successors). The String Builders thus cannot end inside loops.

  bool IsStringBuilderEnd(Node* node);

  // Returns true if {node} is a the last node of a StringBuilder and is not a

  // loop phi. The "loop phi" distinction matters, because trimming for loop

  // phis is trickier (because we don't want to trim at every iteration of the

  // loop, but only once after the loop).

  bool IsNonLoopPhiStringBuilderEnd(Node* node);

  // Returns true if {node} is the input of a concatenation that is part of a

  // StringBuilder.

  bool IsStringBuilderConcatInput(Node* node);

  // Returns true if {node} is part of a StringBuilder.

  bool ConcatIsInStringBuilder(Node* node);

  // Returns true if {node} is the 1st node of a StringBuilder (which means that

  // when lowering {node}, we should allocate and initialize everything for this

  // particular StringBuilder).

  bool IsFirstConcatInStringBuilder(Node* node);


  // Returns an OneOrTwoByteAnalysis::State representing whether the

  // StringBuilder that contains {node} is building a 1-byte or a 2-byte.

  OneOrTwoByteAnalysis::State GetOneOrTwoByte(Node* node);


  void Run();


  JSGraph* jsgraph() const { return jsgraph_; }

  TFGraph* graph() const { return jsgraph_->graph(); }

  Schedule* schedule() const { return schedule_; }

  Zone* temp_zone() const { return temp_zone_; }

  JSHeapBroker* broker() const { return broker_; }


 private:


  enum class State : uint8_t {

    kUnvisited = 0,

    kBeginStringBuilder,        // A (potential) beginning of a StringBuilder

    kInStringBuilder,           // A node that could be in a StringBuilder

    kPendingPhi,                // A phi that could be in a StringBuilder

    kConfirmedInStringBuilder,  // A node that is definitely in a StringBuilder

    kEndStringBuilder,  // A node that ends definitely a StringBuilder, and that

                        // can be trimmed right away

    kEndStringBuilderLoopPhi,  // A phi that ends a StringBuilder, and whose

                               // trimming need to be done at the beginning of

                               // the following blocks.

    kInvalid,  // A node that we visited and that we can't optimize.

    kNumberOfState

  };


  struct Status {

    int id;       // The id of the StringBuilder that the node belongs to (or

                  // kInvalidId).

    State state;  // The state of the node.

  };


  static constexpr int kInvalidId = -1;


  Status GetStatus(Node* node) const {

    if (node->id() > status_.size()) {

      return Status{kInvalidId, State::kInvalid};

    } else {

      return status_[node->id()];

    }

  }


  void SetStatus(Node* node, State state, int id = kInvalidId) {

    DCHECK_NE(state, State::kUnvisited);

    DCHECK_IMPLIES(id != kInvalidId, state != State::kInvalid);

    if (node->id() >= status_.size()) {

      // We should really not allocate too many new nodes: the only new nodes we

      // allocate are constant inputs of nodes in the string builder that have

      // multiple uses. Thus, we use a slow exponential growth for {status_}.

      constexpr double growth_factor = 1.1;

      status_.resize(node->id() * growth_factor,

                     Status{kInvalidId, State::kUnvisited});

    }

    status_[node->id()] = Status{id, state};

  }


  void UpdateStatus(Node* node, State state) {

    int id = state == State::kInvalid ? kInvalidId : GetStatus(node).id;

    status_[node->id()] = Status{id, state};

  }


  struct StringBuilder {

    Node* start;

    int id;

    bool has_loop_phi;

    OneOrTwoByteAnalysis::State one_or_two_bytes;

  };


  const StringBuilder kInvalidStringBuilder = {

      nullptr, kInvalidId, false, OneOrTwoByteAnalysis::State::kUnknown};


#ifdef DEBUG

  bool StringBuilderIsValid(StringBuilder string_builder) {

    return string_builder.start != nullptr && string_builder.id != kInvalidId &&

           string_builder.has_loop_phi;

  }

#endif


  bool IsLoopPhi(Node* node) const {

    return node->opcode() == IrOpcode::kPhi &&

           schedule()->block(node)->IsLoopHeader();

  }


  bool LoopContains(Node* loop_phi, Node* node) {

    DCHECK(IsLoopPhi(loop_phi));

    return schedule()->block(loop_phi)->LoopContains(schedule()->block(node));

  }


  int GetStringBuilderIdForConcat(Node* node);

  void ReplaceConcatInputIfNeeded(Node* node, int input_idx);

  bool CheckNodeUses(Node* node, Node* concat_child, Status status);

  bool CheckPreviousNodeUses(Node* child, Status status,

                             int input_if_loop_phi = 0);

  int GetPhiPredecessorsCommonId(Node* node);


  void FinalizeStringBuilders();

  void VisitNode(Node* node, BasicBlock* block);

  void VisitGraph();


  static constexpr bool kAllowAnyStringOnTheRhs = false;


  JSGraph* jsgraph_;

  Schedule* schedule_;

  Zone* temp_zone_;

  JSHeapBroker* broker_;

  unsigned int string_builder_count_ = 0;

  // {blocks_to_trimmings_map_} is a map from block IDs to loop phi nodes that

  // end string builders. For each such node, a trimming should be inserted at

  // the beginning of the block (in EffectControlLinearizer) in order to

  // properly finish the string builder (well, most things will work if the

  // trimming is omitted, but adding this trimming save memory and removes the

  // SlicedString indirection; the only thing that would be an issue is that the

  // rest of the VM could have access to a SlicedString that is less than

  // SlicedString::kMinLength characters, which may or may not break things).

  ZoneVector<std::optional<ZoneVector<Node*>>> blocks_to_trimmings_map_;

  ZoneVector<Status> status_;

  ZoneVector<StringBuilder> string_builders_;

  // {loop_headers_} is used to keep track ot the start of each loop that the

  // block currently being visited is part of.

  ZoneVector<BasicBlock*> loop_headers_;

};


}  // namespace compiler

}  // namespace internal

}  // namespace v8


#endif  // V8_COMPILER_STRING_BUILDER_OPTIMIZER_H_

schedule
Schedule * schedule
Definition add-type-assertions-reducer.cc:17

jsgraph
JSGraph * jsgraph
Definition add-type-assertions-reducer.cc:16

Zone
friend Zone
Definition asm-types.cc:195

v8::internal::State
Definition cpp-snapshot.cc:203

v8::internal::ZoneVector
Definition zone-containers.h:53

v8::internal::Zone
Definition zone.h:43

v8::internal::compiler::BasicBlock
Definition schedule.h:30

v8::internal::compiler::JSGraph
Definition js-graph.h:27

v8::internal::compiler::JSHeapBroker
Definition js-heap-broker.h:95

v8::internal::compiler::Node
Definition node.h:41

v8::internal::compiler::Node::id
NodeId id() const
Definition node.h:57

v8::internal::compiler::OneOrTwoByteAnalysis
Definition string-builder-optimizer.h:146

v8::internal::compiler::OneOrTwoByteAnalysis::states_
ZoneVector< State > states_
Definition string-builder-optimizer.h:210

v8::internal::compiler::OneOrTwoByteAnalysis::broker_
JSHeapBroker * broker_
Definition string-builder-optimizer.h:211

v8::internal::compiler::OneOrTwoByteAnalysis::OneOrTwoByte
State OneOrTwoByte(Node *node)
Definition string-builder-optimizer.cc:228

v8::internal::compiler::OneOrTwoByteAnalysis::ConcatResultIsOneOrTwoByte
static State ConcatResultIsOneOrTwoByte(State a, State b)
Definition string-builder-optimizer.cc:71

v8::internal::compiler::OneOrTwoByteAnalysis::OneOrTwoByteAnalysis
OneOrTwoByteAnalysis(TFGraph *graph, Zone *zone, JSHeapBroker *broker)
Definition string-builder-optimizer.h:155

v8::internal::compiler::OneOrTwoByteAnalysis::TryGetRange
std::optional< std::pair< int64_t, int64_t > > TryGetRange(Node *node)
Definition string-builder-optimizer.cc:83

v8::internal::compiler::OneOrTwoByteAnalysis::State
State
Definition string-builder-optimizer.h:158

v8::internal::compiler::OneOrTwoByteAnalysis::State::kUnknown
@ kUnknown

v8::internal::compiler::OneOrTwoByteAnalysis::State::kOneByte
@ kOneByte

v8::internal::compiler::OneOrTwoByteAnalysis::State::kTwoByte
@ kTwoByte

v8::internal::compiler::OneOrTwoByteAnalysis::State::kCantKnow
@ kCantKnow

v8::internal::compiler::OneOrTwoByteAnalysis::broker
JSHeapBroker * broker()
Definition string-builder-optimizer.h:208

v8::internal::compiler::Schedule
Definition schedule.h:210

v8::internal::compiler::StringBuilderOptimizer
Definition string-builder-optimizer.h:214

v8::internal::compiler::StringBuilderOptimizer::UpdateStatus
void UpdateStatus(Node *node, State state)
Definition string-builder-optimizer.h:310

v8::internal::compiler::StringBuilderOptimizer::broker
JSHeapBroker * broker() const
Definition string-builder-optimizer.h:265

v8::internal::compiler::StringBuilderOptimizer::string_builders_
ZoneVector< StringBuilder > string_builders_
Definition string-builder-optimizer.h:368

v8::internal::compiler::StringBuilderOptimizer::SetStatus
void SetStatus(Node *node, State state, int id=kInvalidId)
Definition string-builder-optimizer.h:297

v8::internal::compiler::StringBuilderOptimizer::schedule_
Schedule * schedule_
Definition string-builder-optimizer.h:354

v8::internal::compiler::StringBuilderOptimizer::IsLoopPhi
bool IsLoopPhi(Node *node) const
Definition string-builder-optimizer.h:331

v8::internal::compiler::StringBuilderOptimizer::loop_headers_
ZoneVector< BasicBlock * > loop_headers_
Definition string-builder-optimizer.h:371

v8::internal::compiler::StringBuilderOptimizer::broker_
JSHeapBroker * broker_
Definition string-builder-optimizer.h:356

v8::internal::compiler::StringBuilderOptimizer::jsgraph
JSGraph * jsgraph() const
Definition string-builder-optimizer.h:261

v8::internal::compiler::StringBuilderOptimizer::blocks_to_trimmings_map_
ZoneVector< std::optional< ZoneVector< Node * > > > blocks_to_trimmings_map_
Definition string-builder-optimizer.h:366

v8::internal::compiler::StringBuilderOptimizer::jsgraph_
JSGraph * jsgraph_
Definition string-builder-optimizer.h:353

v8::internal::compiler::StringBuilderOptimizer::LoopContains
bool LoopContains(Node *loop_phi, Node *node)
Definition string-builder-optimizer.h:335

v8::internal::compiler::StringBuilderOptimizer::State
State
Definition string-builder-optimizer.h:268

v8::internal::compiler::StringBuilderOptimizer::status_
ZoneVector< Status > status_
Definition string-builder-optimizer.h:367

v8::internal::compiler::StringBuilderOptimizer::schedule
Schedule * schedule() const
Definition string-builder-optimizer.h:263

v8::internal::compiler::StringBuilderOptimizer::graph
TFGraph * graph() const
Definition string-builder-optimizer.h:262

v8::internal::compiler::StringBuilderOptimizer::temp_zone_
Zone * temp_zone_
Definition string-builder-optimizer.h:355

v8::internal::compiler::StringBuilderOptimizer::temp_zone
Zone * temp_zone() const
Definition string-builder-optimizer.h:264

v8::internal::compiler::StringBuilderOptimizer::GetStatus
Status GetStatus(Node *node) const
Definition string-builder-optimizer.h:290

v8::internal::compiler::TFGraph
Definition turbofan-graph.h:32

broker_
JSHeapBroker *const broker_
Definition compilation-dependencies.cc:497

graph-assembler.h

broker
JSHeapBroker * broker
Definition graph-builder.cc:68

graph-reducer.h

js-graph.h

block
RpoNumber block
Definition jump-threading.cc:62

schedule_
Schedule const  *const schedule_
Definition machine-graph-verifier.cc:317

machine-operator.h

v8::internal::compiler::kUnvisited
const int kUnvisited
Definition turbofan-graph-visualizer.cc:910

v8::internal::compiler::FirstMacroFusionInstKind::kInvalid
@ kInvalid

v8::internal::internal
internal
Definition wasm-objects-inl.h:458

v8
Definition api-arguments-inl.h:19

node-marker.h

node.h

schedule.h

DCHECK_IMPLIES
#define DCHECK_IMPLIES(v1, v2)
Definition logging.h:493

DCHECK_NE
#define DCHECK_NE(v1, v2)
Definition logging.h:486

DCHECK
#define DCHECK(condition)
Definition logging.h:482

macros.h

V8_EXPORT_PRIVATE
#define V8_EXPORT_PRIVATE
Definition macros.h:460

v8::internal::compiler::StringBuilderOptimizer::Status
Definition string-builder-optimizer.h:283

v8::internal::compiler::StringBuilderOptimizer::Status::state
State state
Definition string-builder-optimizer.h:286

v8::internal::compiler::StringBuilderOptimizer::Status::id
int id
Definition string-builder-optimizer.h:284

v8::internal::compiler::StringBuilderOptimizer::StringBuilder
Definition string-builder-optimizer.h:315

v8::internal::compiler::StringBuilderOptimizer::StringBuilder::has_loop_phi
bool has_loop_phi
Definition string-builder-optimizer.h:318

v8::internal::compiler::StringBuilderOptimizer::StringBuilder::start
Node * start
Definition string-builder-optimizer.h:316

v8::internal::compiler::StringBuilderOptimizer::StringBuilder::one_or_two_bytes
OneOrTwoByteAnalysis::State one_or_two_bytes
Definition string-builder-optimizer.h:319

v8::internal::compiler::StringBuilderOptimizer::StringBuilder::id
int id
Definition string-builder-optimizer.h:317

zone-containers.h