mul-karatsuba_8cc_source.html

// Copyright 2021 the V8 project authors. All rights reserved.

// Use of this source code is governed by a BSD-style license that can be

// found in the LICENSE file.


// Karatsuba multiplication. This is loosely based on Go's implementation

// found at https://golang.org/src/math/big/nat.go, licensed as follows:

//

// Copyright 2009 The Go Authors. All rights reserved.

// Use of this source code is governed by a BSD-style

// license that can be found in the LICENSE file [1].

//

// [1] https://golang.org/LICENSE


#include <algorithm>

#include <utility>


#include "src/bigint/bigint-internal.h"

#include "src/bigint/digit-arithmetic.h"

#include "src/bigint/util.h"

#include "src/bigint/vector-arithmetic.h"


namespace v8 {

namespace bigint {


// If Karatsuba is the best supported algorithm, then it must check for

// termination requests. If there are more advanced algorithms available

// for larger inputs, then Karatsuba will only be used for sufficiently

// small chunks that checking for termination requests is not necessary.

#if V8_ADVANCED_BIGINT_ALGORITHMS

#define MAYBE_TERMINATE

#else


#define MAYBE_TERMINATE \

  if (should_terminate()) return;


#endif


namespace {


// The Karatsuba algorithm sometimes finishes more quickly when the

// input length is rounded up a bit. This method encodes some heuristics

// to accomplish this. The details have been determined experimentally.

int RoundUpLen(int len) {

  if (len <= 36) return RoundUp(len, 2);

  // Keep the 4 or 5 most significant non-zero bits.

  int shift = BitLength(len) - 5;

  if ((len >> shift) >= 0x18) {

    shift++;

  }

  // Round up, unless we're only just above the threshold. This smoothes

  // the steps by which time goes up as input size increases.

  int additive = ((1 << shift) - 1);

  if (shift >= 2 && (len & additive) < (1 << (shift - 2))) {

    return len;

  }

  return ((len + additive) >> shift) << shift;

}


// This method makes the final decision how much to bump up the input size.

int KaratsubaLength(int n) {

  n = RoundUpLen(n);

  int i = 0;

  while (n > kKaratsubaThreshold) {

    n >>= 1;

    i++;

  }

  return n << i;

}


// Performs the specific subtraction required by {KaratsubaMain} below.

void KaratsubaSubtractionHelper(RWDigits result, Digits X, Digits Y,

                                int* sign) {

  X.Normalize();

  Y.Normalize();

  digit_t borrow = 0;

  int i = 0;

  if (!GreaterThanOrEqual(X, Y)) {

    *sign = -(*sign);

    std::swap(X, Y);

  }

  for (; i < Y.len(); i++) {

    result[i] = digit_sub2(X[i], Y[i], borrow, &borrow);

  }

  for (; i < X.len(); i++) {

    result[i] = digit_sub(X[i], borrow, &borrow);

  }

  DCHECK(borrow == 0);

  for (; i < result.len(); i++) result[i] = 0;

}


}  // namespace


void ProcessorImpl::MultiplyKaratsuba(RWDigits Z, Digits X, Digits Y) {

  DCHECK(X.len() >= Y.len());

  DCHECK(Y.len() >= kKaratsubaThreshold);

  DCHECK(Z.len() >= X.len() + Y.len());

  int k = KaratsubaLength(Y.len());

  int scratch_len = 4 * k;

  ScratchDigits scratch(scratch_len);

  KaratsubaStart(Z, X, Y, scratch, k);

}


// Entry point for Karatsuba-based multiplication, takes care of inputs

// with unequal lengths by chopping the larger into chunks.


void ProcessorImpl::KaratsubaStart(RWDigits Z, Digits X, Digits Y,

                                   RWDigits scratch, int k) {

  KaratsubaMain(Z, X, Y, scratch, k);

  MAYBE_TERMINATE

  for (int i = 2 * k; i < Z.len(); i++) Z[i] = 0;

  if (k < Y.len() || X.len() != Y.len()) {

    ScratchDigits T(2 * k);

    // Add X0 * Y1 * b.

    Digits X0(X, 0, k);

    Digits Y1 = Y + std::min(k, Y.len());

    if (Y1.len() > 0) {

      KaratsubaChunk(T, X0, Y1, scratch);

      MAYBE_TERMINATE

      AddAndReturnOverflow(Z + k, T);  // Can't overflow.

    }


    // Add Xi * Y0 << i and Xi * Y1 * b << (i + k).

    Digits Y0(Y, 0, k);

    for (int i = k; i < X.len(); i += k) {

      Digits Xi(X, i, k);

      KaratsubaChunk(T, Xi, Y0, scratch);

      MAYBE_TERMINATE

      AddAndReturnOverflow(Z + i, T);  // Can't overflow.

      if (Y1.len() > 0) {

        KaratsubaChunk(T, Xi, Y1, scratch);

        MAYBE_TERMINATE

        AddAndReturnOverflow(Z + (i + k), T);  // Can't overflow.

      }

    }

  }

}


// Entry point for chunk-wise multiplications, selects an appropriate

// algorithm for the inputs based on their sizes.


void ProcessorImpl::KaratsubaChunk(RWDigits Z, Digits X, Digits Y,

                                   RWDigits scratch) {

  X.Normalize();

  Y.Normalize();

  if (X.len() == 0 || Y.len() == 0) return Z.Clear();

  if (X.len() < Y.len()) std::swap(X, Y);

  if (Y.len() == 1) return MultiplySingle(Z, X, Y[0]);

  if (Y.len() < kKaratsubaThreshold) return MultiplySchoolbook(Z, X, Y);

  int k = KaratsubaLength(Y.len());

  DCHECK(scratch.len() >= 4 * k);

  return KaratsubaStart(Z, X, Y, scratch, k);

}


// The main recursive Karatsuba method.


void ProcessorImpl::KaratsubaMain(RWDigits Z, Digits X, Digits Y,

                                  RWDigits scratch, int n) {

  if (n < kKaratsubaThreshold) {

    X.Normalize();

    Y.Normalize();

    if (X.len() >= Y.len()) {

      return MultiplySchoolbook(RWDigits(Z, 0, 2 * n), X, Y);

    } else {

      return MultiplySchoolbook(RWDigits(Z, 0, 2 * n), Y, X);

    }

  }

  DCHECK(scratch.len() >= 4 * n);

  DCHECK((n & 1) == 0);

  int n2 = n >> 1;

  Digits X0(X, 0, n2);

  Digits X1(X, n2, n2);

  Digits Y0(Y, 0, n2);

  Digits Y1(Y, n2, n2);

  RWDigits scratch_for_recursion(scratch, 2 * n, 2 * n);

  RWDigits P0(scratch, 0, n);

  KaratsubaMain(P0, X0, Y0, scratch_for_recursion, n2);

  MAYBE_TERMINATE

  for (int i = 0; i < n; i++) Z[i] = P0[i];

  RWDigits P2(scratch, n, n);

  KaratsubaMain(P2, X1, Y1, scratch_for_recursion, n2);

  MAYBE_TERMINATE

  RWDigits Z2 = Z + n;

  int end = std::min(Z2.len(), P2.len());

  for (int i = 0; i < end; i++) Z2[i] = P2[i];

  for (int i = end; i < n; i++) {

    DCHECK(P2[i] == 0);

  }

  // The intermediate result can be one digit too large; the subtraction

  // below will fix this.

  digit_t overflow = AddAndReturnOverflow(Z + n2, P0);

  overflow += AddAndReturnOverflow(Z + n2, P2);

  RWDigits X_diff(scratch, 0, n2);

  RWDigits Y_diff(scratch, n2, n2);

  int sign = 1;

  KaratsubaSubtractionHelper(X_diff, X1, X0, &sign);

  KaratsubaSubtractionHelper(Y_diff, Y0, Y1, &sign);

  RWDigits P1(scratch, n, n);

  KaratsubaMain(P1, X_diff, Y_diff, scratch_for_recursion, n2);

  if (sign > 0) {

    overflow += AddAndReturnOverflow(Z + n2, P1);

  } else {

    overflow -= SubAndReturnBorrow(Z + n2, P1);

  }

  // The intermediate result may have been bigger, but the final result fits.

  DCHECK(overflow == 0);

  USE(overflow);

}


#undef MAYBE_TERMINATE


}  // namespace bigint

}  // namespace v8

T
#define T

bigint-internal.h

v8::bigint::Digits
Definition bigint.h:57

v8::bigint::Digits::len
int len()
Definition bigint.h:113

v8::bigint::Digits::Normalize
void Normalize()
Definition bigint.h:104

v8::bigint::ProcessorImpl::KaratsubaMain
void KaratsubaMain(RWDigits Z, Digits X, Digits Y, RWDigits scratch, int n)
Definition mul-karatsuba.cc:151

v8::bigint::ProcessorImpl::MultiplySingle
void MultiplySingle(RWDigits Z, Digits X, digit_t y)
Definition mul-schoolbook.cc:13

v8::bigint::ProcessorImpl::MultiplySchoolbook
void MultiplySchoolbook(RWDigits Z, Digits X, Digits Y)
Definition mul-schoolbook.cc:49

v8::bigint::ProcessorImpl::MultiplyKaratsuba
void MultiplyKaratsuba(RWDigits Z, Digits X, Digits Y)
Definition mul-karatsuba.cc:91

v8::bigint::ProcessorImpl::KaratsubaStart
void KaratsubaStart(RWDigits Z, Digits X, Digits Y, RWDigits scratch, int k)
Definition mul-karatsuba.cc:103

v8::bigint::ProcessorImpl::KaratsubaChunk
void KaratsubaChunk(RWDigits Z, Digits X, Digits Y, RWDigits scratch)
Definition mul-karatsuba.cc:137

v8::bigint::RWDigits
Definition bigint.h:138

v8::bigint::RWDigits::Clear
void Clear()
Definition bigint.h:193

v8::bigint::ScratchDigits
Definition bigint-internal.h:143

end
int end
Definition debug-coverage.cc:596

digit-arithmetic.h

X
too high values may cause the compiler to set high thresholds for inlining to as much as possible avoid inlined allocation of objects that cannot escape trace load stores from virtual maglev objects use TurboFan fast string builder analyze liveness of environment slots and zap dead values trace TurboFan load elimination emit data about basic block usage in builtins to this enable builtin reordering when run mksnapshot flag for emit warnings when applying builtin profile data verify register allocation in TurboFan randomly schedule instructions to stress dependency tracking enable store store elimination in TurboFan rewrite far to near simulate GC compiler thread race related to allow float parameters to be passed in simulator mode JS Wasm Run additional turbo_optimize_inlined_js_wasm_wrappers enable experimental feedback collection in generic lowering enable Turboshaft s WasmLoadElimination enable Turboshaft s low level load elimination for JS enable Turboshaft s escape analysis for string concatenation use enable Turbolev features that we want to ship in the not too far future trace individual Turboshaft reduction steps trace intermediate Turboshaft reduction steps invocation count threshold for early optimization Enables optimizations which favor memory size over execution speed Enables sampling allocation profiler with X as a sample interval min size of a semi the new space consists of two semi spaces max size of the Collect garbage after Collect garbage after keeps maps alive for< n > old space garbage collections print one detailed trace line in allocation gc speed threshold for starting incremental marking via a task in percent of available threshold for starting incremental marking immediately in percent of available Use a single schedule for determining a marking schedule between JS and C objects schedules the minor GC task with kUserVisible priority max worker number of concurrent for NumberOfWorkerThreads start background threads that allocate memory concurrent_array_buffer_sweeping use parallel threads to clear weak refs in the atomic pause trace progress of the incremental marking trace object counts and memory usage report a tick only when allocated zone memory changes by this amount TracingFlags::gc_stats TracingFlags::gc_stats track native contexts that are expected to be garbage collected verify heap pointers before and after GC memory reducer runs GC with ReduceMemoryFootprint flag Maximum number of memory reducer GCs scheduled Old gen GC speed is computed directly from gc tracer counters Perform compaction on full GCs based on V8 s default heuristics Perform compaction on every full GC Perform code space compaction when finalizing a full GC with stack Stress GC compaction to flush out bugs with moving objects flush of baseline code when it has not been executed recently Use time base code flushing instead of age Use a progress bar to scan large objects in increments when incremental marking is active force incremental marking for small heaps and run it more often force marking at random points between and X(inclusive) percent " "of the regular marking start limit") DEFINE_INT(stress_scavenge

result
ZoneVector< RpoNumber > & result
Definition jump-threading.cc:21

n
int n
Definition mul-fft.cc:296

MAYBE_TERMINATE
#define MAYBE_TERMINATE
Definition mul-karatsuba.cc:32

v8::bigint::BitLength
constexpr int BitLength(int n)
Definition util.h:65

v8::bigint::GreaterThanOrEqual
bool GreaterThanOrEqual(Digits A, Digits B)
Definition vector-arithmetic.h:52

v8::bigint::AddAndReturnOverflow
digit_t AddAndReturnOverflow(RWDigits Z, Digits X)
Definition vector-arithmetic.cc:13

v8::bigint::SubAndReturnBorrow
digit_t SubAndReturnBorrow(RWDigits Z, Digits X)
Definition vector-arithmetic.cc:27

v8::bigint::digit_sub2
digit_t digit_sub2(digit_t a, digit_t b, digit_t borrow_in, digit_t *borrow_out)
Definition digit-arithmetic.h:65

v8::bigint::digit_sub
digit_t digit_sub(digit_t a, digit_t b, digit_t *borrow)
Definition digit-arithmetic.h:52

v8::bigint::digit_t
uintptr_t digit_t
Definition bigint.h:34

v8::bigint::kKaratsubaThreshold
constexpr int kKaratsubaThreshold
Definition bigint-internal.h:15

v8::bigint::RoundUp
constexpr int RoundUp(int x, int y)
Definition util.h:25

v8::internal
Definition api-arguments-inl.h:20

v8
Definition api-arguments-inl.h:19

DCHECK
#define DCHECK(condition)
Definition logging.h:482

USE
#define USE(...)
Definition macros.h:293

util.h

vector-arithmetic.h