div-burnikel_8cc_source.html

// Copyright 2021 the V8 project authors. All rights reserved.

// Use of this source code is governed by a BSD-style license that can be

// found in the LICENSE file.


// Burnikel-Ziegler division.

// Reference: "Fast Recursive Division" by Christoph Burnikel and Joachim

// Ziegler, found at http://cr.yp.to/bib/1998/burnikel.ps


#include <string.h>


#include "src/bigint/bigint-internal.h"

#include "src/bigint/digit-arithmetic.h"

#include "src/bigint/div-helpers.h"

#include "src/bigint/util.h"

#include "src/bigint/vector-arithmetic.h"


namespace v8 {

namespace bigint {


namespace {


// Compares [a_high, A] with B.

// Returns:

// - a value < 0 if [a_high, A] < B

// - 0           if [a_high, A] == B

// - a value > 0 if [a_high, A] > B.

int SpecialCompare(digit_t a_high, Digits A, Digits B) {

  B.Normalize();

  int a_len;

  if (a_high == 0) {

    A.Normalize();

    a_len = A.len();

  } else {

    a_len = A.len() + 1;

  }

  int diff = a_len - B.len();

  if (diff != 0) return diff;

  int i = a_len - 1;

  if (a_high != 0) {

    if (a_high > B[i]) return 1;

    if (a_high < B[i]) return -1;

    i--;

  }

  while (i >= 0 && A[i] == B[i]) i--;

  if (i < 0) return 0;

  return A[i] > B[i] ? 1 : -1;

}


void SetOnes(RWDigits X) {

  memset(X.digits(), 0xFF, X.len() * sizeof(digit_t));

}


// Since the Burnikel-Ziegler method is inherently recursive, we put

// non-changing data into a container object.

class BZ {

 public:

  BZ(ProcessorImpl* proc, int scratch_space)

      : proc_(proc),

        scratch_mem_(scratch_space >= kBurnikelThreshold ? scratch_space : 0) {}


  void DivideBasecase(RWDigits Q, RWDigits R, Digits A, Digits B);

  void D3n2n(RWDigits Q, RWDigits R, Digits A1A2, Digits A3, Digits B);

  void D2n1n(RWDigits Q, RWDigits R, Digits A, Digits B);


 private:

  ProcessorImpl* proc_;

  Storage scratch_mem_;

};


void BZ::DivideBasecase(RWDigits Q, RWDigits R, Digits A, Digits B) {

  A.Normalize();

  B.Normalize();

  DCHECK(B.len() > 0);

  int cmp = Compare(A, B);

  if (cmp <= 0) {

    Q.Clear();

    if (cmp == 0) {

      // If A == B, then Q=1, R=0.

      R.Clear();

      Q[0] = 1;

    } else {

      // If A < B, then Q=0, R=A.

      PutAt(R, A, R.len());

    }

    return;

  }

  if (B.len() == 1) {

    return proc_->DivideSingle(Q, R.digits(), A, B[0]);

  }

  return proc_->DivideSchoolbook(Q, R, A, B);

}


// Algorithm 2 from the paper. Variable names same as there.

// Returns Q(uotient) and R(emainder) for A/B, with B having two thirds

// the size of A = [A1, A2, A3].

void BZ::D3n2n(RWDigits Q, RWDigits R, Digits A1A2, Digits A3, Digits B) {

  DCHECK((B.len() & 1) == 0);

  int n = B.len() / 2;

  DCHECK(A1A2.len() == 2 * n);

  // Actual condition is stricter than length: A < B * 2^(kDigitBits * n)

  DCHECK(Compare(A1A2, B) < 0);

  DCHECK(A3.len() == n);

  DCHECK(Q.len() == n);

  DCHECK(R.len() == 2 * n);

  // 1. Split A into three parts A = [A1, A2, A3] with Ai < 2^(kDigitBits * n).

  Digits A1(A1A2, n, n);

  // 2. Split B into two parts B = [B1, B2] with Bi < 2^(kDigitBits * n).

  Digits B1(B, n, n);

  Digits B2(B, 0, n);

  // 3. Distinguish the cases A1 < B1 or A1 >= B1.

  RWDigits Qhat = Q;

  RWDigits R1(R, n, n);

  digit_t r1_high = 0;

  if (Compare(A1, B1) < 0) {

    // 3a. If A1 < B1, compute Qhat = floor([A1, A2] / B1) with remainder R1

    //     using algorithm D2n1n.

    D2n1n(Qhat, R1, A1A2, B1);

    if (proc_->should_terminate()) return;

  } else {

    // 3b. If A1 >= B1, set Qhat = 2^(kDigitBits * n) - 1 and set

    //     R1 = [A1, A2] - [B1, 0] + [0, B1]

    SetOnes(Qhat);

    // Step 1: compute A1 - B1, which can't underflow because of the comparison

    // guarding this else-branch, and always has a one-digit result because

    // of this function's preconditions.

    RWDigits temp = R1;

    Subtract(temp, A1, B1);

    temp.Normalize();

    DCHECK(temp.len() <= 1);

    if (temp.len() > 0) r1_high = temp[0];

    // Step 2: compute A2 + B1.

    Digits A2(A1A2, 0, n);

    r1_high += AddAndReturnCarry(R1, A2, B1);

  }

  // 4. Compute D = Qhat * B2 using (Karatsuba) multiplication.

  RWDigits D(scratch_mem_.get(), 2 * n);

  proc_->Multiply(D, Qhat, B2);

  if (proc_->should_terminate()) return;


  // 5. Compute Rhat = R1*2^(kDigitBits * n) + A3 - D = [R1, A3] - D.

  PutAt(R, A3, n);

  // 6. As long as Rhat < 0, repeat:

  while (SpecialCompare(r1_high, R, D) < 0) {

    // 6a. Rhat = Rhat + B

    r1_high += AddAndReturnCarry(R, R, B);

    // 6b. Qhat = Qhat - 1

    Subtract(Qhat, 1);

  }

  // 5. Compute Rhat = R1*2^(kDigitBits * n) + A3 - D = [R1, A3] - D.

  digit_t borrow = SubtractAndReturnBorrow(R, R, D);

  DCHECK(borrow == r1_high);

  DCHECK(Compare(R, B) < 0);

  (void)borrow;

  // 7. Return R = Rhat, Q = Qhat.

}


// Algorithm 1 from the paper. Variable names same as there.

// Returns Q(uotient) and (R)emainder for A/B, with A twice the size of B.

void BZ::D2n1n(RWDigits Q, RWDigits R, Digits A, Digits B) {

  int n = B.len();

  DCHECK(A.len() <= 2 * n);

  // A < B * 2^(kDigitsBits * n)

  DCHECK(Compare(Digits(A, n, n), B) < 0);

  DCHECK(Q.len() == n);

  DCHECK(R.len() == n);

  // 1. If n is odd or smaller than some convenient constant, compute Q and R

  //    by school division and return.

  if ((n & 1) == 1 || n < kBurnikelThreshold) {

    return DivideBasecase(Q, R, A, B);

  }

  // 2. Split A into four parts A = [A1, ..., A4] with

  //    Ai < 2^(kDigitBits * n / 2). Split B into two parts [B2, B1] with

  //    Bi < 2^(kDigitBits * n / 2).

  Digits A1A2(A, n, n);

  Digits A3(A, n / 2, n / 2);

  Digits A4(A, 0, n / 2);

  // 3. Compute the high part Q1 of floor(A/B) as

  //    Q1 = floor([A1, A2, A3] / [B1, B2]) with remainder R1 = [R11, R12],

  //    using algorithm D3n2n.

  RWDigits Q1(Q, n / 2, n / 2);

  ScratchDigits R1(n);

  D3n2n(Q1, R1, A1A2, A3, B);

  if (proc_->should_terminate()) return;

  // 4. Compute the low part Q2 of floor(A/B) as

  //    Q2 = floor([R11, R12, A4] / [B1, B2]) with remainder R, using

  //    algorithm D3n2n.

  RWDigits Q2(Q, 0, n / 2);

  D3n2n(Q2, R, R1, A4, B);

  // 5. Return Q = [Q1, Q2] and R.

}


}  // namespace


// Algorithm 3 from the paper. Variables names same as there.

// Returns Q(uotient) and R(emainder) for A/B (no size restrictions).

// R is optional, Q is not.


void ProcessorImpl::DivideBurnikelZiegler(RWDigits Q, RWDigits R, Digits A,

                                          Digits B) {

  DCHECK(A.len() >= B.len());

  DCHECK(R.len() == 0 || R.len() >= B.len());

  DCHECK(Q.len() > A.len() - B.len());

  int r = A.len();

  int s = B.len();

  // The requirements are:

  // - n >= s, n as small as possible.

  // - m must be a power of two.

  // 1. Set m = min {2^k | 2^k * kBurnikelThreshold > s}.

  int m = 1 << BitLength(s / kBurnikelThreshold);

  // 2. Set j = roundup(s/m) and n = j * m.

  int j = DIV_CEIL(s, m);

  int n = j * m;

  // 3. Set sigma = max{tao | 2^tao * B < 2^(kDigitBits * n)}.

  int sigma = CountLeadingZeros(B[s - 1]);

  int digit_shift = n - s;

  // 4. Set B = B * 2^sigma to normalize B. Shift A by the same amount.

  ScratchDigits B_shifted(n);

  LeftShift(B_shifted + digit_shift, B, sigma);

  for (int i = 0; i < digit_shift; i++) B_shifted[i] = 0;

  B = B_shifted;

  // We need an extra digit if A's top digit does not have enough space for

  // the left-shift by {sigma}. Additionally, the top bit of A must be 0

  // (see "-1" in step 5 below), which combined with B being normalized (i.e.

  // B's top bit is 1) ensures the preconditions of the helper functions.

  int extra_digit = CountLeadingZeros(A[r - 1]) < (sigma + 1) ? 1 : 0;

  r = A.len() + digit_shift + extra_digit;

  ScratchDigits A_shifted(r);

  LeftShift(A_shifted + digit_shift, A, sigma);

  for (int i = 0; i < digit_shift; i++) A_shifted[i] = 0;

  A = A_shifted;

  // 5. Set t = min{t >= 2 | A < 2^(kDigitBits * t * n - 1)}.

  int t = std::max(DIV_CEIL(r, n), 2);

  // 6. Split A conceptually into t blocks.

  // 7. Set Z_(t-2) = [A_(t-1), A_(t-2)].

  int z_len = n * 2;

  ScratchDigits Z(z_len);

  PutAt(Z, A + n * (t - 2), z_len);

  // 8. For i from t-2 downto 0 do:

  BZ bz(this, n);

  ScratchDigits Ri(n);

  {

    // First iteration unrolled and specialized.

    // We might not have n digits at the top of Q, so use temporary storage

    // for Qi...

    ScratchDigits Qi(n);

    bz.D2n1n(Qi, Ri, Z, B);

    if (should_terminate()) return;

    // ...but there *will* be enough space for any non-zero result digits!

    Qi.Normalize();

    RWDigits target = Q + n * (t - 2);

    DCHECK(Qi.len() <= target.len());

    PutAt(target, Qi, target.len());

  }

  // Now loop over any remaining iterations.

  for (int i = t - 3; i >= 0; i--) {

    // 8b. If i > 0, set Z_(i-1) = [Ri, A_(i-1)].

    // (De-duped with unrolled first iteration, hence reading A_(i).)

    PutAt(Z + n, Ri, n);

    PutAt(Z, A + n * i, n);

    // 8a. Using algorithm D2n1n compute Qi, Ri such that Zi = B*Qi + Ri.

    RWDigits Qi(Q, i * n, n);

    bz.D2n1n(Qi, Ri, Z, B);

    if (should_terminate()) return;

  }

  // 9. Return Q = [Q_(t-2), ..., Q_0] and R = R_0 * 2^(-sigma).

#if DEBUG

  for (int i = 0; i < digit_shift; i++) {

    DCHECK(Ri[i] == 0);

  }

#endif

  if (R.len() != 0) {

    Digits Ri_part(Ri, digit_shift, Ri.len());

    Ri_part.Normalize();

    DCHECK(Ri_part.len() <= R.len());

    RightShift(R, Ri_part, sigma);

  }

}


}  // namespace bigint

}  // namespace v8

bigint-internal.h

v8::bigint::Digits
Definition bigint.h:57

v8::bigint::Digits::len
int len()
Definition bigint.h:113

v8::bigint::Digits::Normalize
void Normalize()
Definition bigint.h:104

v8::bigint::ProcessorImpl::DivideBurnikelZiegler
void DivideBurnikelZiegler(RWDigits Q, RWDigits R, Digits A, Digits B)
Definition div-burnikel.cc:197

v8::bigint::RWDigits
Definition bigint.h:138

v8::bigint::ScratchDigits
Definition bigint-internal.h:143

digit-arithmetic.h

scratch_mem_
Storage scratch_mem_
Definition div-burnikel.cc:67

proc_
ProcessorImpl * proc_
Definition div-burnikel.cc:66

div-helpers.h

X
too high values may cause the compiler to set high thresholds for inlining to as much as possible avoid inlined allocation of objects that cannot escape trace load stores from virtual maglev objects use TurboFan fast string builder analyze liveness of environment slots and zap dead values trace TurboFan load elimination emit data about basic block usage in builtins to this enable builtin reordering when run mksnapshot flag for emit warnings when applying builtin profile data verify register allocation in TurboFan randomly schedule instructions to stress dependency tracking enable store store elimination in TurboFan rewrite far to near simulate GC compiler thread race related to allow float parameters to be passed in simulator mode JS Wasm Run additional turbo_optimize_inlined_js_wasm_wrappers enable experimental feedback collection in generic lowering enable Turboshaft s WasmLoadElimination enable Turboshaft s low level load elimination for JS enable Turboshaft s escape analysis for string concatenation use enable Turbolev features that we want to ship in the not too far future trace individual Turboshaft reduction steps trace intermediate Turboshaft reduction steps invocation count threshold for early optimization Enables optimizations which favor memory size over execution speed Enables sampling allocation profiler with X as a sample interval min size of a semi the new space consists of two semi spaces max size of the Collect garbage after Collect garbage after keeps maps alive for< n > old space garbage collections print one detailed trace line in allocation gc speed threshold for starting incremental marking via a task in percent of available threshold for starting incremental marking immediately in percent of available Use a single schedule for determining a marking schedule between JS and C objects schedules the minor GC task with kUserVisible priority max worker number of concurrent for NumberOfWorkerThreads start background threads that allocate memory concurrent_array_buffer_sweeping use parallel threads to clear weak refs in the atomic pause trace progress of the incremental marking trace object counts and memory usage report a tick only when allocated zone memory changes by this amount TracingFlags::gc_stats TracingFlags::gc_stats track native contexts that are expected to be garbage collected verify heap pointers before and after GC memory reducer runs GC with ReduceMemoryFootprint flag Maximum number of memory reducer GCs scheduled Old gen GC speed is computed directly from gc tracer counters Perform compaction on full GCs based on V8 s default heuristics Perform compaction on every full GC Perform code space compaction when finalizing a full GC with stack Stress GC compaction to flush out bugs with moving objects flush of baseline code when it has not been executed recently Use time base code flushing instead of age Use a progress bar to scan large objects in increments when incremental marking is active force incremental marking for small heaps and run it more often force marking at random points between and X(inclusive) percent " "of the regular marking start limit") DEFINE_INT(stress_scavenge

D
#define D(Name)
Definition maglev-ir.h:6426

s
int s
Definition mul-fft.cc:297

m
int m
Definition mul-fft.cc:294

r
int r
Definition mul-fft.cc:298

v8::bigint::BitLength
constexpr int BitLength(int n)
Definition util.h:65

v8::bigint::PutAt
void PutAt(RWDigits Z, Digits A, int count)
Definition div-helpers.h:19

v8::bigint::LeftShift
void LeftShift(RWDigits Z, Digits X, digit_t shift)
Definition bitwise.cc:136

v8::bigint::CountLeadingZeros
constexpr int CountLeadingZeros(T value)
Definition util.h:30

v8::bigint::Subtract
void Subtract(RWDigits Z, Digits X, Digits Y)
Definition vector-arithmetic.cc:59

v8::bigint::SubtractAndReturnBorrow
digit_t SubtractAndReturnBorrow(RWDigits Z, Digits X, Digits Y)
Definition vector-arithmetic.cc:84

v8::bigint::RightShift
void RightShift(RWDigits Z, Digits X, digit_t shift, const RightShiftState &state)
Definition bitwise.cc:195

v8::bigint::AddAndReturnCarry
digit_t AddAndReturnCarry(RWDigits Z, Digits X, Digits Y)
Definition vector-arithmetic.cc:75

v8::bigint::kBurnikelThreshold
constexpr int kBurnikelThreshold
Definition bigint-internal.h:20

v8::bigint::digit_t
uintptr_t digit_t
Definition bigint.h:34

v8::internal
Definition api-arguments-inl.h:20

v8::internal::B1
@ B1
Definition constants-ppc.h:2807

v8::internal::B
constexpr int B
Definition constants-arm.h:178

v8::internal::B2
@ B2
Definition constants-ppc.h:2808

v8::internal::A
constexpr int A
Definition constants-arm.h:177

v8
Definition api-arguments-inl.h:19

Compare
int Compare(const T &a, const T &b)
Definition simulator-loong64.h:18

DCHECK
#define DCHECK(condition)
Definition logging.h:482

string.h

util.h

DIV_CEIL
#define DIV_CEIL(x, y)
Definition util.h:19

vector-arithmetic.h