v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
mul-karatsuba.cc
Go to the documentation of this file.
1// Copyright 2021 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Karatsuba multiplication. This is loosely based on Go's implementation
6// found at https://golang.org/src/math/big/nat.go, licensed as follows:
7//
8// Copyright 2009 The Go Authors. All rights reserved.
9// Use of this source code is governed by a BSD-style
10// license that can be found in the LICENSE file [1].
11//
12// [1] https://golang.org/LICENSE
13
14#include <algorithm>
15#include <utility>
16
19#include "src/bigint/util.h"
21
22namespace v8 {
23namespace bigint {
24
25// If Karatsuba is the best supported algorithm, then it must check for
26// termination requests. If there are more advanced algorithms available
27// for larger inputs, then Karatsuba will only be used for sufficiently
28// small chunks that checking for termination requests is not necessary.
29#if V8_ADVANCED_BIGINT_ALGORITHMS
30#define MAYBE_TERMINATE
31#else
32#define MAYBE_TERMINATE \
33 if (should_terminate()) return;
34#endif
35
36namespace {
37
38// The Karatsuba algorithm sometimes finishes more quickly when the
39// input length is rounded up a bit. This method encodes some heuristics
40// to accomplish this. The details have been determined experimentally.
41int RoundUpLen(int len) {
42 if (len <= 36) return RoundUp(len, 2);
43 // Keep the 4 or 5 most significant non-zero bits.
44 int shift = BitLength(len) - 5;
45 if ((len >> shift) >= 0x18) {
46 shift++;
47 }
48 // Round up, unless we're only just above the threshold. This smoothes
49 // the steps by which time goes up as input size increases.
50 int additive = ((1 << shift) - 1);
51 if (shift >= 2 && (len & additive) < (1 << (shift - 2))) {
52 return len;
53 }
54 return ((len + additive) >> shift) << shift;
55}
56
57// This method makes the final decision how much to bump up the input size.
58int KaratsubaLength(int n) {
59 n = RoundUpLen(n);
60 int i = 0;
61 while (n > kKaratsubaThreshold) {
62 n >>= 1;
63 i++;
64 }
65 return n << i;
66}
67
68// Performs the specific subtraction required by {KaratsubaMain} below.
69void KaratsubaSubtractionHelper(RWDigits result, Digits X, Digits Y,
70 int* sign) {
71 X.Normalize();
72 Y.Normalize();
73 digit_t borrow = 0;
74 int i = 0;
75 if (!GreaterThanOrEqual(X, Y)) {
76 *sign = -(*sign);
77 std::swap(X, Y);
78 }
79 for (; i < Y.len(); i++) {
80 result[i] = digit_sub2(X[i], Y[i], borrow, &borrow);
81 }
82 for (; i < X.len(); i++) {
83 result[i] = digit_sub(X[i], borrow, &borrow);
84 }
85 DCHECK(borrow == 0);
86 for (; i < result.len(); i++) result[i] = 0;
87}
88
89} // namespace
90
92 DCHECK(X.len() >= Y.len());
94 DCHECK(Z.len() >= X.len() + Y.len());
95 int k = KaratsubaLength(Y.len());
96 int scratch_len = 4 * k;
97 ScratchDigits scratch(scratch_len);
98 KaratsubaStart(Z, X, Y, scratch, k);
99}
100
101// Entry point for Karatsuba-based multiplication, takes care of inputs
102// with unequal lengths by chopping the larger into chunks.
104 RWDigits scratch, int k) {
105 KaratsubaMain(Z, X, Y, scratch, k);
107 for (int i = 2 * k; i < Z.len(); i++) Z[i] = 0;
108 if (k < Y.len() || X.len() != Y.len()) {
109 ScratchDigits T(2 * k);
110 // Add X0 * Y1 * b.
111 Digits X0(X, 0, k);
112 Digits Y1 = Y + std::min(k, Y.len());
113 if (Y1.len() > 0) {
114 KaratsubaChunk(T, X0, Y1, scratch);
116 AddAndReturnOverflow(Z + k, T); // Can't overflow.
117 }
118
119 // Add Xi * Y0 << i and Xi * Y1 * b << (i + k).
120 Digits Y0(Y, 0, k);
121 for (int i = k; i < X.len(); i += k) {
122 Digits Xi(X, i, k);
123 KaratsubaChunk(T, Xi, Y0, scratch);
125 AddAndReturnOverflow(Z + i, T); // Can't overflow.
126 if (Y1.len() > 0) {
127 KaratsubaChunk(T, Xi, Y1, scratch);
129 AddAndReturnOverflow(Z + (i + k), T); // Can't overflow.
130 }
131 }
132 }
133}
134
135// Entry point for chunk-wise multiplications, selects an appropriate
136// algorithm for the inputs based on their sizes.
138 RWDigits scratch) {
139 X.Normalize();
140 Y.Normalize();
141 if (X.len() == 0 || Y.len() == 0) return Z.Clear();
142 if (X.len() < Y.len()) std::swap(X, Y);
143 if (Y.len() == 1) return MultiplySingle(Z, X, Y[0]);
144 if (Y.len() < kKaratsubaThreshold) return MultiplySchoolbook(Z, X, Y);
145 int k = KaratsubaLength(Y.len());
146 DCHECK(scratch.len() >= 4 * k);
147 return KaratsubaStart(Z, X, Y, scratch, k);
148}
149
150// The main recursive Karatsuba method.
152 RWDigits scratch, int n) {
153 if (n < kKaratsubaThreshold) {
154 X.Normalize();
155 Y.Normalize();
156 if (X.len() >= Y.len()) {
157 return MultiplySchoolbook(RWDigits(Z, 0, 2 * n), X, Y);
158 } else {
159 return MultiplySchoolbook(RWDigits(Z, 0, 2 * n), Y, X);
160 }
161 }
162 DCHECK(scratch.len() >= 4 * n);
163 DCHECK((n & 1) == 0);
164 int n2 = n >> 1;
165 Digits X0(X, 0, n2);
166 Digits X1(X, n2, n2);
167 Digits Y0(Y, 0, n2);
168 Digits Y1(Y, n2, n2);
169 RWDigits scratch_for_recursion(scratch, 2 * n, 2 * n);
170 RWDigits P0(scratch, 0, n);
171 KaratsubaMain(P0, X0, Y0, scratch_for_recursion, n2);
173 for (int i = 0; i < n; i++) Z[i] = P0[i];
174 RWDigits P2(scratch, n, n);
175 KaratsubaMain(P2, X1, Y1, scratch_for_recursion, n2);
177 RWDigits Z2 = Z + n;
178 int end = std::min(Z2.len(), P2.len());
179 for (int i = 0; i < end; i++) Z2[i] = P2[i];
180 for (int i = end; i < n; i++) {
181 DCHECK(P2[i] == 0);
182 }
183 // The intermediate result can be one digit too large; the subtraction
184 // below will fix this.
185 digit_t overflow = AddAndReturnOverflow(Z + n2, P0);
186 overflow += AddAndReturnOverflow(Z + n2, P2);
187 RWDigits X_diff(scratch, 0, n2);
188 RWDigits Y_diff(scratch, n2, n2);
189 int sign = 1;
190 KaratsubaSubtractionHelper(X_diff, X1, X0, &sign);
191 KaratsubaSubtractionHelper(Y_diff, Y0, Y1, &sign);
192 RWDigits P1(scratch, n, n);
193 KaratsubaMain(P1, X_diff, Y_diff, scratch_for_recursion, n2);
194 if (sign > 0) {
195 overflow += AddAndReturnOverflow(Z + n2, P1);
196 } else {
197 overflow -= SubAndReturnBorrow(Z + n2, P1);
198 }
199 // The intermediate result may have been bigger, but the final result fits.
200 DCHECK(overflow == 0);
201 USE(overflow);
202}
203
204#undef MAYBE_TERMINATE
205
206} // namespace bigint
207} // namespace v8
#define T
void KaratsubaMain(RWDigits Z, Digits X, Digits Y, RWDigits scratch, int n)
void MultiplySingle(RWDigits Z, Digits X, digit_t y)
void MultiplySchoolbook(RWDigits Z, Digits X, Digits Y)
void MultiplyKaratsuba(RWDigits Z, Digits X, Digits Y)
void KaratsubaStart(RWDigits Z, Digits X, Digits Y, RWDigits scratch, int k)
void KaratsubaChunk(RWDigits Z, Digits X, Digits Y, RWDigits scratch)
int end
too high values may cause the compiler to set high thresholds for inlining to as much as possible avoid inlined allocation of objects that cannot escape trace load stores from virtual maglev objects use TurboFan fast string builder analyze liveness of environment slots and zap dead values trace TurboFan load elimination emit data about basic block usage in builtins to this enable builtin reordering when run mksnapshot flag for emit warnings when applying builtin profile data verify register allocation in TurboFan randomly schedule instructions to stress dependency tracking enable store store elimination in TurboFan rewrite far to near simulate GC compiler thread race related to allow float parameters to be passed in simulator mode JS Wasm Run additional turbo_optimize_inlined_js_wasm_wrappers enable experimental feedback collection in generic lowering enable Turboshaft s WasmLoadElimination enable Turboshaft s low level load elimination for JS enable Turboshaft s escape analysis for string concatenation use enable Turbolev features that we want to ship in the not too far future trace individual Turboshaft reduction steps trace intermediate Turboshaft reduction steps invocation count threshold for early optimization Enables optimizations which favor memory size over execution speed Enables sampling allocation profiler with X as a sample interval min size of a semi the new space consists of two semi spaces max size of the Collect garbage after Collect garbage after keeps maps alive for< n > old space garbage collections print one detailed trace line in allocation gc speed threshold for starting incremental marking via a task in percent of available threshold for starting incremental marking immediately in percent of available Use a single schedule for determining a marking schedule between JS and C objects schedules the minor GC task with kUserVisible priority max worker number of concurrent for NumberOfWorkerThreads start background threads that allocate memory concurrent_array_buffer_sweeping use parallel threads to clear weak refs in the atomic pause trace progress of the incremental marking trace object counts and memory usage report a tick only when allocated zone memory changes by this amount TracingFlags::gc_stats TracingFlags::gc_stats track native contexts that are expected to be garbage collected verify heap pointers before and after GC memory reducer runs GC with ReduceMemoryFootprint flag Maximum number of memory reducer GCs scheduled Old gen GC speed is computed directly from gc tracer counters Perform compaction on full GCs based on V8 s default heuristics Perform compaction on every full GC Perform code space compaction when finalizing a full GC with stack Stress GC compaction to flush out bugs with moving objects flush of baseline code when it has not been executed recently Use time base code flushing instead of age Use a progress bar to scan large objects in increments when incremental marking is active force incremental marking for small heaps and run it more often force marking at random points between and X(inclusive) percent " "of the regular marking start limit") DEFINE_INT(stress_scavenge
ZoneVector< RpoNumber > & result
int n
Definition mul-fft.cc:296
#define MAYBE_TERMINATE
constexpr int BitLength(int n)
Definition util.h:65
bool GreaterThanOrEqual(Digits A, Digits B)
digit_t AddAndReturnOverflow(RWDigits Z, Digits X)
digit_t SubAndReturnBorrow(RWDigits Z, Digits X)
digit_t digit_sub2(digit_t a, digit_t b, digit_t borrow_in, digit_t *borrow_out)
digit_t digit_sub(digit_t a, digit_t b, digit_t *borrow)
uintptr_t digit_t
Definition bigint.h:34
constexpr int kKaratsubaThreshold
constexpr int RoundUp(int x, int y)
Definition util.h:25
#define DCHECK(condition)
Definition logging.h:482
#define USE(...)
Definition macros.h:293