div-barrett_8cc_source.html

// Copyright 2021 the V8 project authors. All rights reserved.

// Use of this source code is governed by a BSD-style license that can be

// found in the LICENSE file.


// Barrett division, finding the inverse with Newton's method.

// Reference: "Fast Division of Large Integers" by Karl Hasselström,

// found at https://treskal.com/s/masters-thesis.pdf


// Many thanks to Karl Wiberg, k@w5.se, for both writing up an

// understandable theoretical description of the algorithm and privately

// providing a demo implementation, on which the implementation in this file is

// based.


#include <algorithm>


#include "src/bigint/bigint-internal.h"

#include "src/bigint/digit-arithmetic.h"

#include "src/bigint/div-helpers.h"

#include "src/bigint/vector-arithmetic.h"


namespace v8 {

namespace bigint {


namespace {


void DcheckIntegerPartRange(Digits X, digit_t min, digit_t max) {

#if DEBUG

  digit_t integer_part = X.msd();

  DCHECK(integer_part >= min);

  DCHECK(integer_part <= max);

#else

  USE(X);

  USE(min);

  USE(max);

#endif

}


}  // namespace


// Z := (the fractional part of) 1/V, via naive division.

// See comments at {Invert} and {InvertNewton} below for details.

void ProcessorImpl::InvertBasecase(RWDigits Z, Digits V, RWDigits scratch) {

  DCHECK(Z.len() > V.len());

  DCHECK(V.len() > 0);

  DCHECK(scratch.len() >= 2 * V.len());

  int n = V.len();

  RWDigits X(scratch, 0, 2 * n);

  digit_t borrow = 0;

  int i = 0;

  for (; i < n; i++) X[i] = 0;

  for (; i < 2 * n; i++) X[i] = digit_sub2(0, V[i - n], borrow, &borrow);

  DCHECK(borrow == 1);

  RWDigits R(nullptr, 0);  // We don't need the remainder.

  if (n < kBurnikelThreshold) {

    DivideSchoolbook(Z, R, X, V);

  } else {

    DivideBurnikelZiegler(Z, R, X, V);

  }

}


// This is Algorithm 4.2 from the paper.

// Computes the inverse of V, shifted by kDigitBits * 2 * V.len, accurate to

// V.len+1 digits. The V.len low digits of the result digits will be written

// to Z, plus there is an implicit top digit with value 1.

// Needs InvertNewtonScratchSpace(V.len) of scratch space.

// The result is either correct or off by one (about half the time it is

// correct, half the time it is one too much, and in the corner case where V is

// minimal and the implicit top digit would have to be 2 it is one too little).

// Barrett's division algorithm can handle that, so we don't care.

void ProcessorImpl::InvertNewton(RWDigits Z, Digits V, RWDigits scratch) {

  const int vn = V.len();

  DCHECK(Z.len() >= vn);

  DCHECK(scratch.len() >= InvertNewtonScratchSpace(vn));

  const int kSOffset = 0;

  const int kWOffset = 0;  // S and W can share their scratch space.

  const int kUOffset = vn + kInvertNewtonExtraSpace;


  // The base case won't work otherwise.

  DCHECK(V.len() >= 3);


  constexpr int kBasecasePrecision = kNewtonInversionThreshold - 1;

  // V must have more digits than the basecase.

  DCHECK(V.len() > kBasecasePrecision);

  DCHECK(IsBitNormalized(V));


  // Step (1): Setup.

  // Calculate precision required at each step.

  // {k} is the number of fraction bits for the current iteration.

  int k = vn * kDigitBits;

  int target_fraction_bits[8 * sizeof(vn)];  // "k_i" in the paper.

  int iteration = -1;  // "i" in the paper, except inverted to run downwards.

  while (k > kBasecasePrecision * kDigitBits) {

    iteration++;

    target_fraction_bits[iteration] = k;

    k = DIV_CEIL(k, 2);

  }

  // At this point, k <= kBasecasePrecision*kDigitBits is the number of

  // fraction bits to use in the base case. {iteration} is the highest index

  // in use for f[].


  // Step (2): Initial approximation.

  int initial_digits = DIV_CEIL(k + 1, kDigitBits);

  Digits top_part_of_v(V, vn - initial_digits, initial_digits);

  InvertBasecase(Z, top_part_of_v, scratch);

  Z[initial_digits] = Z[initial_digits] + 1;  // Implicit top digit.

  // From now on, we'll keep Z.len updated to the part that's already computed.

  Z.set_len(initial_digits + 1);


  // Step (3): Precision doubling loop.

  while (true) {

    DcheckIntegerPartRange(Z, 1, 2);


    // (3b): S = Z^2

    RWDigits S(scratch, kSOffset, 2 * Z.len());

    Multiply(S, Z, Z);

    if (should_terminate()) return;

    S.TrimOne();  // Top digit of S is unused.

    DcheckIntegerPartRange(S, 1, 4);


    // (3c): T = V, truncated so that at least 2k+3 fraction bits remain.

    int fraction_digits = DIV_CEIL(2 * k + 3, kDigitBits);

    int t_len = std::min(V.len(), fraction_digits);

    Digits T(V, V.len() - t_len, t_len);


    // (3d): U = T * S, truncated so that at least 2k+1 fraction bits remain

    // (U has one integer digit, which might be zero).

    fraction_digits = DIV_CEIL(2 * k + 1, kDigitBits);

    RWDigits U(scratch, kUOffset, S.len() + T.len());

    DCHECK(U.len() > fraction_digits);

    Multiply(U, S, T);

    if (should_terminate()) return;

    U = U + (U.len() - (1 + fraction_digits));

    DcheckIntegerPartRange(U, 0, 3);


    // (3e): W = 2 * Z, padded with "0" fraction bits so that it has the

    // same number of fraction bits as U.

    DCHECK(U.len() >= Z.len());

    RWDigits W(scratch, kWOffset, U.len());

    int padding_digits = U.len() - Z.len();

    for (int i = 0; i < padding_digits; i++) W[i] = 0;

    LeftShift(W + padding_digits, Z, 1);

    DcheckIntegerPartRange(W, 2, 4);


    // (3f): Z = W - U.

    // This check is '<=' instead of '<' because U's top digit is its

    // integer part, and we want vn fraction digits.

    if (U.len() <= vn) {

      // Normal subtraction.

      // This is not the last iteration.

      DCHECK(iteration > 0);

      Z.set_len(U.len());

      digit_t borrow = SubtractAndReturnBorrow(Z, W, U);

      DCHECK(borrow == 0);

      USE(borrow);

      DcheckIntegerPartRange(Z, 1, 2);

    } else {

      // Truncate some least significant digits so that we get vn

      // fraction digits, and compute the integer digit separately.

      // This is the last iteration.

      DCHECK(iteration == 0);

      Z.set_len(vn);

      Digits W_part(W, W.len() - vn - 1, vn);

      Digits U_part(U, U.len() - vn - 1, vn);

      digit_t borrow = SubtractAndReturnBorrow(Z, W_part, U_part);

      digit_t integer_part = W.msd() - U.msd() - borrow;

      DCHECK(integer_part == 1 || integer_part == 2);

      if (integer_part == 2) {

        // This is the rare case where the correct result would be 2.0, but

        // since we can't express that by returning only the fractional part

        // with an implicit 1-digit, we have to return [1.]9999... instead.

        for (int i = 0; i < Z.len(); i++) Z[i] = ~digit_t{0};

      }

      break;

    }

    // (3g, 3h): Update local variables and loop.

    k = target_fraction_bits[iteration];

    iteration--;

  }

}


// Computes the inverse of V, shifted by kDigitBits * 2 * V.len, accurate to

// V.len+1 digits. The V.len low digits of the result digits will be written

// to Z, plus there is an implicit top digit with value 1.

// (Corner case: if V is minimal, the implicit digit should be 2; in that case

// we return one less than the correct answer. DivideBarrett can handle that.)

// Needs InvertScratchSpace(V.len) digits of scratch space.

void ProcessorImpl::Invert(RWDigits Z, Digits V, RWDigits scratch) {

  DCHECK(Z.len() > V.len());

  DCHECK(V.len() >= 1);

  DCHECK(IsBitNormalized(V));

  DCHECK(scratch.len() >= InvertScratchSpace(V.len()));


  int vn = V.len();

  if (vn >= kNewtonInversionThreshold) {

    return InvertNewton(Z, V, scratch);

  }

  if (vn == 1) {

    digit_t d = V[0];

    digit_t dummy_remainder;

    Z[0] = digit_div(~d, ~digit_t{0}, d, &dummy_remainder);

    Z[1] = 0;

  } else {

    InvertBasecase(Z, V, scratch);

    if (Z[vn] == 1) {

      for (int i = 0; i < vn; i++) Z[i] = ~digit_t{0};

      Z[vn] = 0;

    }

  }

}


// This is algorithm 3.5 from the paper.

// Computes Q(uotient) and R(emainder) for A/B using I, which is a

// precomputed approximation of 1/B (e.g. with Invert() above).

// Needs DivideBarrettScratchSpace(A.len) scratch space.

void ProcessorImpl::DivideBarrett(RWDigits Q, RWDigits R, Digits A, Digits B,

                                  Digits I, RWDigits scratch) {

  DCHECK(Q.len() > A.len() - B.len());

  DCHECK(R.len() >= B.len());

  DCHECK(A.len() > B.len());  // Careful: This is *not* '>=' !

  DCHECK(A.len() <= 2 * B.len());

  DCHECK(B.len() > 0);

  DCHECK(IsBitNormalized(B));

  DCHECK(I.len() == A.len() - B.len());

  DCHECK(scratch.len() >= DivideBarrettScratchSpace(A.len()));


  int orig_q_len = Q.len();


  // (1): A1 = A with B.len fewer digits.

  Digits A1 = A + B.len();

  DCHECK(A1.len() == I.len());


  // (2): Q = A1*I with I.len fewer digits.

  // {I} has an implicit high digit with value 1, so we add {A1} to the high

  // part of the multiplication result.

  RWDigits K(scratch, 0, 2 * I.len());

  Multiply(K, A1, I);

  if (should_terminate()) return;

  Q.set_len(I.len() + 1);

  Add(Q, K + I.len(), A1);

  // K is no longer used, can reuse {scratch} for P.


  // (3): R = A - B*Q (approximate remainder).

  RWDigits P(scratch, 0, A.len() + 1);

  Multiply(P, B, Q);

  if (should_terminate()) return;

  digit_t borrow = SubtractAndReturnBorrow(R, A, Digits(P, 0, B.len()));

  // R may be allocated wider than B, zero out any extra digits if so.

  for (int i = B.len(); i < R.len(); i++) R[i] = 0;

  digit_t r_high = A[B.len()] - P[B.len()] - borrow;


  // Adjust R and Q so that they become the correct remainder and quotient.

  // The number of iterations is guaranteed to be at most some very small

  // constant, unless the caller gave us a bad approximate quotient.

  if (r_high >> (kDigitBits - 1) == 1) {

    // (5b): R < 0, so R += B

    digit_t q_sub = 0;

    do {

      r_high += AddAndReturnCarry(R, R, B);

      q_sub++;

      DCHECK(q_sub <= 5);

    } while (r_high != 0);

    Subtract(Q, q_sub);

  } else {

    digit_t q_add = 0;

    while (r_high != 0 || GreaterThanOrEqual(R, B)) {

      // (5c): R >= B, so R -= B

      r_high -= SubtractAndReturnBorrow(R, R, B);

      q_add++;

      DCHECK(q_add <= 5);

    }

    Add(Q, q_add);

  }

  // (5a): Return.

  int final_q_len = Q.len();

  Q.set_len(orig_q_len);

  for (int i = final_q_len; i < orig_q_len; i++) Q[i] = 0;

}


// Computes Q(uotient) and R(emainder) for A/B, using Barrett division.

void ProcessorImpl::DivideBarrett(RWDigits Q, RWDigits R, Digits A, Digits B) {

  DCHECK(Q.len() > A.len() - B.len());

  DCHECK(R.len() >= B.len());

  DCHECK(A.len() > B.len());  // Careful: This is *not* '>=' !

  DCHECK(B.len() > 0);


  // Normalize B, and shift A by the same amount.

  ShiftedDigits b_normalized(B);

  ShiftedDigits a_normalized(A, b_normalized.shift());

  // Keep the code below more concise.

  B = b_normalized;

  A = a_normalized;


  // The core DivideBarrett function above only supports A having at most

  // twice as many digits as B. We generalize this to arbitrary inputs

  // similar to Burnikel-Ziegler division by performing a t-by-1 division

  // of B-sized chunks. It's easy to special-case the situation where we

  // don't need to bother.

  int barrett_dividend_length = A.len() <= 2 * B.len() ? A.len() : 2 * B.len();

  int i_len = barrett_dividend_length - B.len();

  ScratchDigits I(i_len + 1);  // +1 is for temporary use by Invert().

  int scratch_len =

      std::max(InvertScratchSpace(i_len),

               DivideBarrettScratchSpace(barrett_dividend_length));

  ScratchDigits scratch(scratch_len);

  Invert(I, Digits(B, B.len() - i_len, i_len), scratch);

  if (should_terminate()) return;

  I.TrimOne();

  DCHECK(I.len() == i_len);

  if (A.len() > 2 * B.len()) {

    // This follows the variable names and and algorithmic steps of

    // DivideBurnikelZiegler().

    int n = B.len();  // Chunk length.

    // (5): {t} is the number of B-sized chunks of A.

    int t = DIV_CEIL(A.len(), n);

    DCHECK(t >= 3);

    // (6)/(7): Z is used for the current 2-chunk block to be divided by B,

    // initialized to the two topmost chunks of A.

    int z_len = n * 2;

    ScratchDigits Z(z_len);

    PutAt(Z, A + n * (t - 2), z_len);

    // (8): For i from t-2 downto 0 do

    int qi_len = n + 1;

    ScratchDigits Qi(qi_len);

    ScratchDigits Ri(n);

    // First iteration unrolled and specialized.

    {

      int i = t - 2;

      DivideBarrett(Qi, Ri, Z, B, I, scratch);

      if (should_terminate()) return;

      RWDigits target = Q + n * i;

      // In the first iteration, all qi_len = n + 1 digits may be used.

      int to_copy = std::min(qi_len, target.len());

      for (int j = 0; j < to_copy; j++) target[j] = Qi[j];

      for (int j = to_copy; j < target.len(); j++) target[j] = 0;

#if DEBUG

      for (int j = to_copy; j < Qi.len(); j++) {

        DCHECK(Qi[j] == 0);

      }

#endif

    }

    // Now loop over any remaining iterations.

    for (int i = t - 3; i >= 0; i--) {

      // (8b): If i > 0, set Z_(i-1) = [Ri, A_(i-1)].

      // (De-duped with unrolled first iteration, hence reading A_(i).)

      PutAt(Z + n, Ri, n);

      PutAt(Z, A + n * i, n);

      // (8a): Compute Qi, Ri such that Zi = B*Qi + Ri.

      DivideBarrett(Qi, Ri, Z, B, I, scratch);

      DCHECK(Qi[qi_len - 1] == 0);

      if (should_terminate()) return;

      // (9): Return Q = [Q_(t-2), ..., Q_0]...

      PutAt(Q + n * i, Qi, n);

    }

    Ri.Normalize();

    DCHECK(Ri.len() <= R.len());

    // (9): ...and R = R_0 * 2^(-leading_zeros).

    RightShift(R, Ri, b_normalized.shift());

  } else {

    DivideBarrett(Q, R, A, B, I, scratch);

    if (should_terminate()) return;

    RightShift(R, R, b_normalized.shift());

  }

}


}  // namespace bigint

}  // namespace v8

V
#define V(Name)

T
#define T

bigint-internal.h

v8::bigint::ProcessorImpl::DivideBurnikelZiegler
void DivideBurnikelZiegler(RWDigits Q, RWDigits R, Digits A, Digits B)
Definition div-burnikel.cc:197

v8::bigint::ProcessorImpl::should_terminate
bool should_terminate()
Definition bigint-internal.h:77

v8::bigint::ProcessorImpl::Multiply
void Multiply(RWDigits Z, Digits X, Digits Y)
Definition bigint-internal.cc:36

v8::bigint::ProcessorImpl::DivideSchoolbook
void DivideSchoolbook(RWDigits Q, RWDigits R, Digits A, Digits B)
Definition div-schoolbook.cc:94

digit-arithmetic.h

div-helpers.h

X
too high values may cause the compiler to set high thresholds for inlining to as much as possible avoid inlined allocation of objects that cannot escape trace load stores from virtual maglev objects use TurboFan fast string builder analyze liveness of environment slots and zap dead values trace TurboFan load elimination emit data about basic block usage in builtins to this enable builtin reordering when run mksnapshot flag for emit warnings when applying builtin profile data verify register allocation in TurboFan randomly schedule instructions to stress dependency tracking enable store store elimination in TurboFan rewrite far to near simulate GC compiler thread race related to allow float parameters to be passed in simulator mode JS Wasm Run additional turbo_optimize_inlined_js_wasm_wrappers enable experimental feedback collection in generic lowering enable Turboshaft s WasmLoadElimination enable Turboshaft s low level load elimination for JS enable Turboshaft s escape analysis for string concatenation use enable Turbolev features that we want to ship in the not too far future trace individual Turboshaft reduction steps trace intermediate Turboshaft reduction steps invocation count threshold for early optimization Enables optimizations which favor memory size over execution speed Enables sampling allocation profiler with X as a sample interval min size of a semi the new space consists of two semi spaces max size of the Collect garbage after Collect garbage after keeps maps alive for< n > old space garbage collections print one detailed trace line in allocation gc speed threshold for starting incremental marking via a task in percent of available threshold for starting incremental marking immediately in percent of available Use a single schedule for determining a marking schedule between JS and C objects schedules the minor GC task with kUserVisible priority max worker number of concurrent for NumberOfWorkerThreads start background threads that allocate memory concurrent_array_buffer_sweeping use parallel threads to clear weak refs in the atomic pause trace progress of the incremental marking trace object counts and memory usage report a tick only when allocated zone memory changes by this amount TracingFlags::gc_stats TracingFlags::gc_stats track native contexts that are expected to be garbage collected verify heap pointers before and after GC memory reducer runs GC with ReduceMemoryFootprint flag Maximum number of memory reducer GCs scheduled Old gen GC speed is computed directly from gc tracer counters Perform compaction on full GCs based on V8 s default heuristics Perform compaction on every full GC Perform code space compaction when finalizing a full GC with stack Stress GC compaction to flush out bugs with moving objects flush of baseline code when it has not been executed recently Use time base code flushing instead of age Use a progress bar to scan large objects in increments when incremental marking is active force incremental marking for small heaps and run it more often force marking at random points between and X(inclusive) percent " "of the regular marking start limit") DEFINE_INT(stress_scavenge

K
int K
Definition mul-fft.cc:295

n
int n
Definition mul-fft.cc:296

v8::bigint::digit_div
static digit_t digit_div(digit_t high, digit_t low, digit_t divisor, digit_t *remainder)
Definition digit-arithmetic.h:119

v8::bigint::PutAt
void PutAt(RWDigits Z, Digits A, int count)
Definition div-helpers.h:19

v8::bigint::IsBitNormalized
bool IsBitNormalized(Digits X)
Definition vector-arithmetic.h:48

v8::bigint::LeftShift
void LeftShift(RWDigits Z, Digits X, digit_t shift)
Definition bitwise.cc:136

v8::bigint::kDigitBits
static constexpr int kDigitBits
Definition bigint.h:51

v8::bigint::InvertNewtonScratchSpace
constexpr int InvertNewtonScratchSpace(int n)
Definition bigint-internal.h:109

v8::bigint::Add
void Add(RWDigits Z, Digits X, Digits Y)
Definition vector-arithmetic.cc:41

v8::bigint::GreaterThanOrEqual
bool GreaterThanOrEqual(Digits A, Digits B)
Definition vector-arithmetic.h:52

v8::bigint::kInvertNewtonExtraSpace
constexpr int kInvertNewtonExtraSpace
Definition bigint-internal.h:108

v8::bigint::Subtract
void Subtract(RWDigits Z, Digits X, Digits Y)
Definition vector-arithmetic.cc:59

v8::bigint::digit_sub2
digit_t digit_sub2(digit_t a, digit_t b, digit_t borrow_in, digit_t *borrow_out)
Definition digit-arithmetic.h:65

v8::bigint::SubtractAndReturnBorrow
digit_t SubtractAndReturnBorrow(RWDigits Z, Digits X, Digits Y)
Definition vector-arithmetic.cc:84

v8::bigint::RightShift
void RightShift(RWDigits Z, Digits X, digit_t shift, const RightShiftState &state)
Definition bitwise.cc:195

v8::bigint::AddAndReturnCarry
digit_t AddAndReturnCarry(RWDigits Z, Digits X, Digits Y)
Definition vector-arithmetic.cc:75

v8::bigint::kNewtonInversionThreshold
constexpr int kNewtonInversionThreshold
Definition bigint-internal.h:21

v8::bigint::kBurnikelThreshold
constexpr int kBurnikelThreshold
Definition bigint-internal.h:20

v8::bigint::InvertScratchSpace
constexpr int InvertScratchSpace(int n)
Definition bigint-internal.h:112

v8::bigint::DivideBarrettScratchSpace
constexpr int DivideBarrettScratchSpace(int n)
Definition bigint-internal.h:104

v8::bigint::digit_t
uintptr_t digit_t
Definition bigint.h:34

v8::internal
Definition api-arguments-inl.h:20

v8::internal::W
constexpr int W
Definition constants-arm.h:176

v8::internal::B
constexpr int B
Definition constants-arm.h:178

v8::internal::U
constexpr int U
Definition constants-arm.h:180

v8::internal::S
constexpr int S
Definition constants-arm.h:175

v8::internal::A
constexpr int A
Definition constants-arm.h:177

v8
Definition api-arguments-inl.h:19

P
#define P(name, number_of_args, result_size)
Definition runtime.cc:22

I
#define I(name, number_of_args, result_size)
Definition runtime.cc:36

DCHECK
#define DCHECK(condition)
Definition logging.h:482

USE
#define USE(...)
Definition macros.h:293

DIV_CEIL
#define DIV_CEIL(x, y)
Definition util.h:19

vector-arithmetic.h