30 for (
int i = 0;
i < len;
i++) {
32 if (borrow == 0)
break;
37 for (
int i = 0;
i < len;
i++) {
39 if (carry == 0)
break;
49 if (high == 0)
return;
50 ModFn_Helper(
x, len, high);
52 if (high == 0)
return;
53 DCHECK(high == 1 || high == -1);
54 ModFn_Helper(
x, len, high);
56 if (high == -1) ModFn_Helper(
x, len, high);
66 for (
int i = 0;
i <
K;
i++) {
81 for (
int i = 0;
i < len;
i++) {
94 int bits_shift,
int K) {
112 if (bits_shift == 0) {
114 for (
int i = 0;
i < digit_shift;
i++) {
118 for (
int i = digit_shift + 1;
i <
K;
i++) {
125 input[
K - digit_shift - 1] >> (
kDigitBits - bits_shift);
126 for (
int i = 0;
i < digit_shift;
i++) {
128 digit_t summand = (d << bits_shift) | input_carry;
135 digit_t iK_part = (d << bits_shift) | input_carry;
140 iK_carry += add_carry;
142 digit_t i0_part = d << bits_shift;
145 if (digit_shift + 1 <
K) {
147 digit_t subtrahend = (d << bits_shift) | input_carry;
149 digit_sub2(iK_carry, subtrahend, borrow, &borrow);
153 for (
int i = digit_shift + 2;
i <
K;
i++) {
155 digit_t subtrahend = (d << bits_shift) | input_carry;
166 for (
int i = 0;
i <
K;
i++) {
168 if (borrow == 0)
break;
181 int zero_above = 0x7FFFFFFF) {
193 while (digit_shift >= 2 *
K) digit_shift -= 2 *
K;
194 if (digit_shift >=
K) {
195 return ShiftModFn_Large(
result, input, digit_shift, bits_shift,
K);
198 if (bits_shift == 0) {
203 int cap = std::min(
K - digit_shift, zero_above);
204 for (;
i < cap;
i++) {
208 for (;
i <
K - digit_shift;
i++) {
214 cap = std::min(
K, zero_above);
215 for (;
i < cap;
i++) {
232 int cap = std::min(
K - digit_shift, zero_above);
233 for (;
i < cap;
i++) {
235 result[
i + digit_shift] = (d << bits_shift) | carry;
239 for (;
i <
K - digit_shift;
i++) {
245 cap = std::min(
K, zero_above);
246 for (;
i < cap;
i++) {
249 digit_sub2(0, (d << bits_shift) | carry, borrow, &borrow);
266 result[digit_shift], (d << bits_shift) | carry, borrow, &borrow);
271 for (
int i = digit_shift + 1; i <= K && borrow > 0;
i++) {
277 for (
int i = 0;
i <=
K;
i++) {
279 if (carry == 0)
break;
303void ComputeParameters(
int N,
int m, Parameters* params) {
307 int s = (
N + n - 1) >>
m;
309 int K =
m + 2 * s + 1;
311 int r =
K >> (
m - 1);
320 while (K_tz < threshold) {
335void ComputeParameters_Inner(
int N, Parameters* params) {
339 m = std::min(max_m,
m);
345 int K =
m + 2 * s + 1;
357int PredictInnerK(
int N) {
359 ComputeParameters_Inner(N, ¶ms);
365bool ShouldDecrementM(
const Parameters& current,
const Parameters& next,
366 const Parameters& after_next) {
368 if (current.K == 64 && next.K >= 112)
return false;
370 if (current.s < 6)
return true;
379 double factor =
static_cast<double>(after_next.K) / current.K;
380 if ((current.s == 6 && factor < 3.85) ||
381 (current.s == 7 && factor < 3.73) ||
382 (current.s == 8 && factor < 3.55) ||
383 (current.s == 9 && factor < 3.50) ||
391 if (current.K >= 160 && current.K < 250 && PredictInnerK(next.K) < 28) {
400int GetParameters(
int N, Parameters* params) {
402 int max_m = N_bits - 3;
403 max_m = std::max(kLog2DigitBits, max_m);
406 ComputeParameters(N,
m, ¤t);
408 ComputeParameters(N,
m - 1, &next);
410 Parameters after_next;
411 ComputeParameters(N,
m - 2, &after_next);
412 if (ShouldDecrementM(current, next, after_next)) {
431 FFTContainer(
int n,
int K, ProcessorImpl* processor)
441 FFTContainer() =
delete;
442 FFTContainer(
const FFTContainer&) =
delete;
443 FFTContainer& operator=(
const FFTContainer&) =
delete;
451 void Start_Default(Digits
X,
int chunk_size,
int theta,
int omega);
452 void Start(Digits
X,
int chunk_size,
int theta,
int omega);
454 void NormalizeAndRecombine(
int omega,
int m, RWDigits Z,
int chunk_size);
455 void CounterWeightAndRecombine(
int theta,
int m, RWDigits Z,
int chunk_size);
457 void FFT_ReturnShuffledThreadsafe(
int start,
int len,
int omega,
459 void FFT_Recurse(
int start,
int half,
int omega,
digit_t* temp);
461 void BackwardFFT(
int start,
int len,
int omega);
462 void BackwardFFT_Threadsafe(
int start,
int len,
int omega,
digit_t* temp);
464 void PointwiseMultiply(
const FFTContainer& other);
465 void DoPointwiseMultiplication(
const FFTContainer& other,
int start,
int end,
481 int digits_to_copy,
size_t total_bytes) {
482 size_t bytes_to_copy = digits_to_copy *
sizeof(
digit_t);
483 memcpy(dst,
static_cast<const void*
>(src), bytes_to_copy);
484 memset(dst + digits_to_copy, 0, total_bytes - bytes_to_copy);
489void FFTContainer::Start_Default(Digits
X,
int chunk_size,
int theta,
492 const digit_t* pointer =
X.digits();
494 int current_theta = 0;
496 for (; i < n_ && len > 0;
i++, current_theta += theta) {
497 chunk_size = std::min(chunk_size, len);
503 if (
i ==
n_ - 1 && len == chunk_size + 1) {
508 if (current_theta != 0) {
511 CopyAndZeroExtend(
temp_, pointer, chunk_size, part_length_in_bytes);
514 CopyAndZeroExtend(
part_[
i], pointer, chunk_size, part_length_in_bytes);
516 pointer += chunk_size;
520 for (;
i <
n_;
i++) {
521 memset(
part_[
i], 0, part_length_in_bytes);
523 FFT_ReturnShuffledThreadsafe(0,
n_, omega,
temp_);
528void FFTContainer::Start(Digits
X,
int chunk_size,
int theta,
int omega) {
530 if (len >
n_ * chunk_size / 2) {
531 return Start_Default(
X, chunk_size, theta, omega);
534 const digit_t* pointer =
X.digits();
538 CopyAndZeroExtend(
part_[0], pointer, chunk_size, part_length_in_bytes);
539 CopyAndZeroExtend(
part_[nhalf], pointer, chunk_size, part_length_in_bytes);
540 pointer += chunk_size;
543 for (; i < nhalf && len > 0;
i++) {
544 chunk_size = std::min(chunk_size, len);
545 CopyAndZeroExtend(
part_[
i], pointer, chunk_size, part_length_in_bytes);
548 pointer += chunk_size;
551 for (;
i < nhalf;
i++) {
552 memset(
part_[
i], 0, part_length_in_bytes);
553 memset(
part_[
i + nhalf], 0, part_length_in_bytes);
555 FFT_Recurse(0, nhalf, omega,
temp_);
562void FFTContainer::FFT_ReturnShuffledThreadsafe(
int start,
int len,
int omega,
568 for (
int k = 1; k < half; k++) {
574 FFT_Recurse(
start, half, omega, temp);
578void FFTContainer::FFT_Recurse(
int start,
int half,
int omega,
digit_t* temp) {
580 FFT_ReturnShuffledThreadsafe(
start, half, 2 * omega, temp);
581 FFT_ReturnShuffledThreadsafe(
start + half, half, 2 * omega, temp);
588void FFTContainer::BackwardFFT(
int start,
int len,
int omega) {
589 BackwardFFT_Threadsafe(
start, len, omega,
temp_);
592void FFTContainer::BackwardFFT_Threadsafe(
int start,
int len,
int omega,
599 BackwardFFT_Threadsafe(
start, half, 2 * omega, temp);
600 BackwardFFT_Threadsafe(
start + half, half, 2 * omega, temp);
604 for (
int k = 1; k < half; k++) {
605 int w = omega * (len - k);
613void FFTContainer::NormalizeAndRecombine(
int omega,
int m, RWDigits Z,
617 const int shift =
n_ * omega -
m;
618 for (
int i = 0;
i <
n_;
i++, z_index += chunk_size) {
620 ShiftModFn(
temp_, part, shift,
K_);
624 for (; j <
length_ && zi < Z.len(); j++, zi++) {
639 if (
x[2 * s] >= threshold)
return true;
640 for (
int i = 2 * s + 1;
i <
xlen;
i++) {
641 if (
x[
i] > 0)
return true;
649void FFTContainer::CounterWeightAndRecombine(
int theta,
int m, RWDigits Z,
653 for (
int k = 0; k <
n_; k++, z_index +=
s) {
654 int shift = -theta * k -
m;
655 if (shift < 0) shift += 2 *
n_ * theta;
658 ShiftModFn(
temp_, input, shift,
K_);
659 int remaining_z = Z.len() - z_index;
669 Z[z_index] =
digit_sub(Z[z_index], d, &borrow_z);
672 for (;
i <
K_ &&
i < remaining_z;
i++) {
674 Z[z_index +
i] =
digit_sub2(Z[z_index +
i], d, borrow_z, &borrow_z);
679 Z[z_index +
i] =
digit_sub2(Z[z_index +
i], d, borrow_z, &borrow_z);
682 for (; borrow_z > 0 &&
i < remaining_z;
i++) {
683 Z[z_index +
i] =
digit_sub(Z[z_index +
i], borrow_z, &borrow_z);
694 for (;
carry > 0 &&
i < remaining_z;
i++) {
695 Z[z_index +
i] =
digit_add2(Z[z_index +
i], carry, &carry);
703void MultiplyFFT_Inner(RWDigits Z, Digits
X, Digits Y,
const Parameters& params,
704 ProcessorImpl* processor) {
705 int omega = 2 * params.r;
706 int theta = params.r;
708 FFTContainer
a(params.n, params.K, processor);
709 a.Start_Default(
X, params.s, theta, omega);
710 FFTContainer b(params.n, params.K, processor);
711 b.Start_Default(Y, params.s, theta, omega);
713 a.PointwiseMultiply(b);
714 if (processor->should_terminate())
return;
717 c.BackwardFFT(0, params.n, omega);
719 c.CounterWeightAndRecombine(theta, params.m, Z, params.s);
723void FFTContainer::DoPointwiseMultiplication(
const FFTContainer& other,
730 if (use_fft) ComputeParameters_Inner(
K_, ¶ms);
751void FFTContainer::PointwiseMultiply(
const FFTContainer& other) {
753 DoPointwiseMultiplication(other, 0,
n_,
temp_);
767void ProcessorImpl::MultiplyFFT(RWDigits Z, Digits
X, Digits Y) {
769 int m = GetParameters(
X.len() + Y.len(), ¶ms);
770 int omega = params.r;
772 FFTContainer
a(params.n, params.K,
this);
773 a.Start(
X, params.s, 0, omega);
776 a.PointwiseMultiply(a);
778 FFTContainer b(params.n, params.K,
this);
779 b.Start(Y, params.s, 0, omega);
780 a.PointwiseMultiply(b);
782 if (should_terminate())
return;
784 a.BackwardFFT(0, params.n, omega);
785 a.NormalizeAndRecombine(omega,
m, Z, params.s);
too high values may cause the compiler to set high thresholds for inlining to as much as possible avoid inlined allocation of objects that cannot escape trace load stores from virtual maglev objects use TurboFan fast string builder analyze liveness of environment slots and zap dead values trace TurboFan load elimination emit data about basic block usage in builtins to this enable builtin reordering when run mksnapshot flag for emit warnings when applying builtin profile data verify register allocation in TurboFan randomly schedule instructions to stress dependency tracking enable store store elimination in TurboFan rewrite far to near simulate GC compiler thread race related to allow float parameters to be passed in simulator mode JS Wasm Run additional turbo_optimize_inlined_js_wasm_wrappers enable experimental feedback collection in generic lowering enable Turboshaft s WasmLoadElimination enable Turboshaft s low level load elimination for JS enable Turboshaft s escape analysis for string concatenation use enable Turbolev features that we want to ship in the not too far future trace individual Turboshaft reduction steps trace intermediate Turboshaft reduction steps invocation count threshold for early optimization Enables optimizations which favor memory size over execution speed Enables sampling allocation profiler with X as a sample interval min size of a semi the new space consists of two semi spaces max size of the Collect garbage after Collect garbage after keeps maps alive for< n > old space garbage collections print one detailed trace line in allocation gc speed threshold for starting incremental marking via a task in percent of available threshold for starting incremental marking immediately in percent of available Use a single schedule for determining a marking schedule between JS and C objects schedules the minor GC task with kUserVisible priority max worker number of concurrent for NumberOfWorkerThreads start background threads that allocate memory concurrent_array_buffer_sweeping use parallel threads to clear weak refs in the atomic pause trace progress of the incremental marking trace object counts and memory usage report a tick only when allocated zone memory changes by this amount TracingFlags::gc_stats TracingFlags::gc_stats track native contexts that are expected to be garbage collected verify heap pointers before and after GC memory reducer runs GC with ReduceMemoryFootprint flag Maximum number of memory reducer GCs scheduled Old gen GC speed is computed directly from gc tracer counters Perform compaction on full GCs based on V8 s default heuristics Perform compaction on every full GC Perform code space compaction when finalizing a full GC with stack Stress GC compaction to flush out bugs with moving objects flush of baseline code when it has not been executed recently Use time base code flushing instead of age Use a progress bar to scan large objects in increments when incremental marking is active force incremental marking for small heaps and run it more often force marking at random points between and X(inclusive) percent " "of the regular marking start limit") DEFINE_INT(stress_scavenge
std::optional< TNode< JSArray > > a
ZoneVector< RpoNumber > & result
ProcessorImpl * processor_
constexpr int BitLength(int n)
static constexpr int kDigitBits
constexpr int kFftInnerThreshold
digit_t digit_add3(digit_t a, digit_t b, digit_t c, digit_t *carry)
constexpr int CountTrailingZeros(uint32_t value)
digit_t digit_sub2(digit_t a, digit_t b, digit_t borrow_in, digit_t *borrow_out)
digit_t digit_sub(digit_t a, digit_t b, digit_t *borrow)
digit_t digit_add2(digit_t a, digit_t b, digit_t *carry)
constexpr int RoundUp(int x, int y)
#define DCHECK(condition)