v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
mul-schoolbook.cc
Go to the documentation of this file.
1// Copyright 2021 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
8
9namespace v8 {
10namespace bigint {
11
12// Z := X * y, where y is a single digit.
14 DCHECK(y != 0);
15 digit_t carry = 0;
16 digit_t high = 0;
17 for (int i = 0; i < X.len(); i++) {
18 digit_t new_high;
19 digit_t low = digit_mul(X[i], y, &new_high);
20 Z[i] = digit_add3(low, high, carry, &carry);
21 high = new_high;
22 }
23 AddWorkEstimate(X.len());
24 Z[X.len()] = carry + high;
25 for (int i = X.len() + 1; i < Z.len(); i++) Z[i] = 0;
26}
27
28#define BODY(min, max) \
29 for (int j = min; j <= max; j++) { \
30 digit_t high; \
31 digit_t low = digit_mul(X[j], Y[i - j], &high); \
32 digit_t carrybit; \
33 zi = digit_add2(zi, low, &carrybit); \
34 carry += carrybit; \
35 next = digit_add2(next, high, &carrybit); \
36 next_carry += carrybit; \
37 } \
38 Z[i] = zi
39
40// Z := X * Y.
41// O(n²) "schoolbook" multiplication algorithm. Optimized to minimize
42// bounds and overflow checks: rather than looping over X for every digit
43// of Y (or vice versa), we loop over Z. The {BODY} macro above is what
44// computes one of Z's digits as a sum of the products of relevant digits
45// of X and Y. This yields a nearly 2x improvement compared to more obvious
46// implementations.
47// This method is *highly* performance sensitive even for the advanced
48// algorithms, which use this as the base case of their recursive calls.
52 DCHECK(X.len() >= Y.len());
53 DCHECK(Z.len() >= X.len() + Y.len());
54 if (X.len() == 0 || Y.len() == 0) return Z.Clear();
55 digit_t next, next_carry = 0, carry = 0;
56 // Unrolled first iteration: it's trivial.
57 Z[0] = digit_mul(X[0], Y[0], &next);
58 int i = 1;
59 // Unrolled second iteration: a little less setup.
60 if (i < Y.len()) {
61 digit_t zi = next;
62 next = 0;
63 BODY(0, 1);
64 i++;
65 }
66 // Main part: since X.len() >= Y.len() > i, no bounds checks are needed.
67 for (; i < Y.len(); i++) {
68 digit_t zi = digit_add2(next, carry, &carry);
69 next = next_carry + carry;
70 carry = 0;
71 next_carry = 0;
72 BODY(0, i);
74 }
75 // Last part: i exceeds Y now, we have to be careful about bounds.
76 int loop_end = X.len() + Y.len() - 2;
77 for (; i <= loop_end; i++) {
78 int max_x_index = std::min(i, X.len() - 1);
79 int max_y_index = Y.len() - 1;
80 int min_x_index = i - max_y_index;
81 digit_t zi = digit_add2(next, carry, &carry);
82 next = next_carry + carry;
83 carry = 0;
84 next_carry = 0;
85 BODY(min_x_index, max_x_index);
86 AddWorkEstimate(max_x_index - min_x_index);
87 }
88 // Write the last digit, and zero out any extra space in Z.
89 Z[i++] = digit_add2(next, carry, &carry);
90 DCHECK(carry == 0);
91 for (; i < Z.len(); i++) Z[i] = 0;
92}
93
94#undef BODY
95
96} // namespace bigint
97} // namespace v8
void MultiplySingle(RWDigits Z, Digits X, digit_t y)
void MultiplySchoolbook(RWDigits Z, Digits X, Digits Y)
void AddWorkEstimate(uintptr_t estimate)
too high values may cause the compiler to set high thresholds for inlining to as much as possible avoid inlined allocation of objects that cannot escape trace load stores from virtual maglev objects use TurboFan fast string builder analyze liveness of environment slots and zap dead values trace TurboFan load elimination emit data about basic block usage in builtins to this enable builtin reordering when run mksnapshot flag for emit warnings when applying builtin profile data verify register allocation in TurboFan randomly schedule instructions to stress dependency tracking enable store store elimination in TurboFan rewrite far to near simulate GC compiler thread race related to allow float parameters to be passed in simulator mode JS Wasm Run additional turbo_optimize_inlined_js_wasm_wrappers enable experimental feedback collection in generic lowering enable Turboshaft s WasmLoadElimination enable Turboshaft s low level load elimination for JS enable Turboshaft s escape analysis for string concatenation use enable Turbolev features that we want to ship in the not too far future trace individual Turboshaft reduction steps trace intermediate Turboshaft reduction steps invocation count threshold for early optimization Enables optimizations which favor memory size over execution speed Enables sampling allocation profiler with X as a sample interval min size of a semi the new space consists of two semi spaces max size of the Collect garbage after Collect garbage after keeps maps alive for< n > old space garbage collections print one detailed trace line in allocation gc speed threshold for starting incremental marking via a task in percent of available threshold for starting incremental marking immediately in percent of available Use a single schedule for determining a marking schedule between JS and C objects schedules the minor GC task with kUserVisible priority max worker number of concurrent for NumberOfWorkerThreads start background threads that allocate memory concurrent_array_buffer_sweeping use parallel threads to clear weak refs in the atomic pause trace progress of the incremental marking trace object counts and memory usage report a tick only when allocated zone memory changes by this amount TracingFlags::gc_stats TracingFlags::gc_stats track native contexts that are expected to be garbage collected verify heap pointers before and after GC memory reducer runs GC with ReduceMemoryFootprint flag Maximum number of memory reducer GCs scheduled Old gen GC speed is computed directly from gc tracer counters Perform compaction on full GCs based on V8 s default heuristics Perform compaction on every full GC Perform code space compaction when finalizing a full GC with stack Stress GC compaction to flush out bugs with moving objects flush of baseline code when it has not been executed recently Use time base code flushing instead of age Use a progress bar to scan large objects in increments when incremental marking is active force incremental marking for small heaps and run it more often force marking at random points between and X(inclusive) percent " "of the regular marking start limit") DEFINE_INT(stress_scavenge
int y
#define BODY(min, max)
digit_t digit_add3(digit_t a, digit_t b, digit_t c, digit_t *carry)
bool IsDigitNormalized(Digits X)
uintptr_t digit_t
Definition bigint.h:34
digit_t digit_mul(digit_t a, digit_t b, digit_t *high)
digit_t digit_add2(digit_t a, digit_t b, digit_t *carry)
#define DCHECK(condition)
Definition logging.h:482