mul-schoolbook_8cc_source.html

// Copyright 2021 the V8 project authors. All rights reserved.

// Use of this source code is governed by a BSD-style license that can be

// found in the LICENSE file.


#include "src/bigint/bigint-internal.h"

#include "src/bigint/digit-arithmetic.h"

#include "src/bigint/vector-arithmetic.h"


namespace v8 {

namespace bigint {


// Z := X * y, where y is a single digit.


void ProcessorImpl::MultiplySingle(RWDigits Z, Digits X, digit_t y) {

  DCHECK(y != 0);

  digit_t carry = 0;

  digit_t high = 0;

  for (int i = 0; i < X.len(); i++) {

    digit_t new_high;

    digit_t low = digit_mul(X[i], y, &new_high);

    Z[i] = digit_add3(low, high, carry, &carry);

    high = new_high;

  }

  AddWorkEstimate(X.len());

  Z[X.len()] = carry + high;

  for (int i = X.len() + 1; i < Z.len(); i++) Z[i] = 0;

}


#define BODY(min, max)                              \

  for (int j = min; j <= max; j++) {                \

    digit_t high;                                   \

    digit_t low = digit_mul(X[j], Y[i - j], &high); \

    digit_t carrybit;                               \

    zi = digit_add2(zi, low, &carrybit);            \

    carry += carrybit;                              \

    next = digit_add2(next, high, &carrybit);       \

    next_carry += carrybit;                         \

  }                                                 \

  Z[i] = zi


// Z := X * Y.

// O(n²) "schoolbook" multiplication algorithm. Optimized to minimize

// bounds and overflow checks: rather than looping over X for every digit

// of Y (or vice versa), we loop over Z. The {BODY} macro above is what

// computes one of Z's digits as a sum of the products of relevant digits

// of X and Y. This yields a nearly 2x improvement compared to more obvious

// implementations.

// This method is *highly* performance sensitive even for the advanced

// algorithms, which use this as the base case of their recursive calls.


void ProcessorImpl::MultiplySchoolbook(RWDigits Z, Digits X, Digits Y) {

  DCHECK(IsDigitNormalized(X));

  DCHECK(IsDigitNormalized(Y));

  DCHECK(X.len() >= Y.len());

  DCHECK(Z.len() >= X.len() + Y.len());

  if (X.len() == 0 || Y.len() == 0) return Z.Clear();

  digit_t next, next_carry = 0, carry = 0;

  // Unrolled first iteration: it's trivial.

  Z[0] = digit_mul(X[0], Y[0], &next);

  int i = 1;

  // Unrolled second iteration: a little less setup.

  if (i < Y.len()) {

    digit_t zi = next;

    next = 0;

    BODY(0, 1);

    i++;

  }

  // Main part: since X.len() >= Y.len() > i, no bounds checks are needed.

  for (; i < Y.len(); i++) {

    digit_t zi = digit_add2(next, carry, &carry);

    next = next_carry + carry;

    carry = 0;

    next_carry = 0;

    BODY(0, i);

    AddWorkEstimate(i);

  }

  // Last part: i exceeds Y now, we have to be careful about bounds.

  int loop_end = X.len() + Y.len() - 2;

  for (; i <= loop_end; i++) {

    int max_x_index = std::min(i, X.len() - 1);

    int max_y_index = Y.len() - 1;

    int min_x_index = i - max_y_index;

    digit_t zi = digit_add2(next, carry, &carry);

    next = next_carry + carry;

    carry = 0;

    next_carry = 0;

    BODY(min_x_index, max_x_index);

    AddWorkEstimate(max_x_index - min_x_index);

  }

  // Write the last digit, and zero out any extra space in Z.

  Z[i++] = digit_add2(next, carry, &carry);

  DCHECK(carry == 0);

  for (; i < Z.len(); i++) Z[i] = 0;

}


#undef BODY


}  // namespace bigint

}  // namespace v8

bigint-internal.h

v8::bigint::Digits
Definition bigint.h:57

v8::bigint::Digits::len
int len()
Definition bigint.h:113

v8::bigint::ProcessorImpl::MultiplySingle
void MultiplySingle(RWDigits Z, Digits X, digit_t y)
Definition mul-schoolbook.cc:13

v8::bigint::ProcessorImpl::MultiplySchoolbook
void MultiplySchoolbook(RWDigits Z, Digits X, Digits Y)
Definition mul-schoolbook.cc:49

v8::bigint::ProcessorImpl::AddWorkEstimate
void AddWorkEstimate(uintptr_t estimate)
Definition bigint-internal.h:86

v8::bigint::RWDigits
Definition bigint.h:138

v8::bigint::RWDigits::Clear
void Clear()
Definition bigint.h:193

digit-arithmetic.h

X
too high values may cause the compiler to set high thresholds for inlining to as much as possible avoid inlined allocation of objects that cannot escape trace load stores from virtual maglev objects use TurboFan fast string builder analyze liveness of environment slots and zap dead values trace TurboFan load elimination emit data about basic block usage in builtins to this enable builtin reordering when run mksnapshot flag for emit warnings when applying builtin profile data verify register allocation in TurboFan randomly schedule instructions to stress dependency tracking enable store store elimination in TurboFan rewrite far to near simulate GC compiler thread race related to allow float parameters to be passed in simulator mode JS Wasm Run additional turbo_optimize_inlined_js_wasm_wrappers enable experimental feedback collection in generic lowering enable Turboshaft s WasmLoadElimination enable Turboshaft s low level load elimination for JS enable Turboshaft s escape analysis for string concatenation use enable Turbolev features that we want to ship in the not too far future trace individual Turboshaft reduction steps trace intermediate Turboshaft reduction steps invocation count threshold for early optimization Enables optimizations which favor memory size over execution speed Enables sampling allocation profiler with X as a sample interval min size of a semi the new space consists of two semi spaces max size of the Collect garbage after Collect garbage after keeps maps alive for< n > old space garbage collections print one detailed trace line in allocation gc speed threshold for starting incremental marking via a task in percent of available threshold for starting incremental marking immediately in percent of available Use a single schedule for determining a marking schedule between JS and C objects schedules the minor GC task with kUserVisible priority max worker number of concurrent for NumberOfWorkerThreads start background threads that allocate memory concurrent_array_buffer_sweeping use parallel threads to clear weak refs in the atomic pause trace progress of the incremental marking trace object counts and memory usage report a tick only when allocated zone memory changes by this amount TracingFlags::gc_stats TracingFlags::gc_stats track native contexts that are expected to be garbage collected verify heap pointers before and after GC memory reducer runs GC with ReduceMemoryFootprint flag Maximum number of memory reducer GCs scheduled Old gen GC speed is computed directly from gc tracer counters Perform compaction on full GCs based on V8 s default heuristics Perform compaction on every full GC Perform code space compaction when finalizing a full GC with stack Stress GC compaction to flush out bugs with moving objects flush of baseline code when it has not been executed recently Use time base code flushing instead of age Use a progress bar to scan large objects in increments when incremental marking is active force incremental marking for small heaps and run it more often force marking at random points between and X(inclusive) percent " "of the regular marking start limit") DEFINE_INT(stress_scavenge

y
int y
Definition liveedit-diff.cc:60

BODY
#define BODY(min, max)
Definition mul-schoolbook.cc:28

v8::bigint::digit_add3
digit_t digit_add3(digit_t a, digit_t b, digit_t c, digit_t *carry)
Definition digit-arithmetic.h:37

v8::bigint::IsDigitNormalized
bool IsDigitNormalized(Digits X)
Definition vector-arithmetic.h:47

v8::bigint::digit_t
uintptr_t digit_t
Definition bigint.h:34

v8::bigint::digit_mul
digit_t digit_mul(digit_t a, digit_t b, digit_t *high)
Definition digit-arithmetic.h:82

v8::bigint::digit_add2
digit_t digit_add2(digit_t a, digit_t b, digit_t *carry)
Definition digit-arithmetic.h:23

v8::internal
Definition api-arguments-inl.h:20

v8
Definition api-arguments-inl.h:19

DCHECK
#define DCHECK(condition)
Definition logging.h:482

vector-arithmetic.h