Skip to content

Commit 5df4f92

Browse files
Merge pull request #87 from byeongkeunahn/reeds-sloane
Add Reeds-Sloane algorithm
2 parents bafccad + 701666d commit 5df4f92

File tree

2 files changed

+264
-0
lines changed

2 files changed

+264
-0
lines changed

Diff for: basm-std/src/math.rs

+2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ mod sieve;
44
pub use sieve::LinearSieve;
55
mod pollard_rho;
66
pub use pollard_rho::factorize;
7+
mod reeds_sloane;
8+
pub use reeds_sloane::{linear_fit, reeds_sloane};
79

810
pub mod ntt;
911
pub use ntt::*;

Diff for: basm-std/src/math/reeds_sloane.rs

+262
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,262 @@
1+
use alloc::{vec, vec::Vec};
2+
use core::cmp::max;
3+
use crate::math::{factorize, modadd, modinv, modmul, modsub};
4+
5+
fn reeds_sloane_prime_power(first_terms: &[u64], p: u64, e: usize) -> Vec<u64> {
6+
let n = first_terms.len();
7+
assert!(n >= 2);
8+
assert!(p >= 2);
9+
assert!(e >= 1);
10+
11+
let ppow: Vec<u64> = (0..=e).map(|x| p.wrapping_pow(x as u32)).collect();
12+
13+
let p_e = ppow[e];
14+
let s: Vec<u64> = first_terms.iter().map(|&x| if p_e > 0 { x % p_e } else { x }).collect();
15+
16+
// Utility functions (modulo operations)
17+
let modadd_p_e = |x: u64, y: u64| -> u64 {
18+
if p_e == 0 { x.wrapping_add(y) } else { modadd(x, y, p_e) }
19+
};
20+
let modsub_p_e = |x: u64, y: u64| -> u64 {
21+
if p_e == 0 { x.wrapping_sub(y) } else { modsub(x, y, p_e) }
22+
};
23+
let modmul_p_e = |x: u64, y: u64| -> u64 {
24+
if p_e == 0 { x.wrapping_mul(y) } else { modmul(x, y, p_e) }
25+
};
26+
let modinv_p_e = |x: u64| -> u64 {
27+
if p_e == 0 { modinv(x as u128, 1u128 << 64).unwrap() as u64 } else { modinv(x, p_e).unwrap() }
28+
};
29+
30+
// Utility functions (core logic)
31+
fn l(a: &[u64], b: &[u64]) -> usize {
32+
max(a.len() - 1, b.len())
33+
}
34+
let factor_by_p = |x: u64| -> (u64, usize) {
35+
// Returns (theta, u)
36+
let (mut lo, mut hi) = (0, e);
37+
while lo < hi {
38+
let mid = (lo + hi + 1) / 2;
39+
#[allow(clippy::collapsible_else_if)]
40+
if ppow[mid] == 0 {
41+
if x == 0 {
42+
lo = mid;
43+
} else {
44+
hi = mid - 1;
45+
}
46+
} else {
47+
if x % ppow[mid] == 0 {
48+
lo = mid;
49+
} else {
50+
hi = mid - 1;
51+
}
52+
}
53+
}
54+
(if ppow[lo] == 0 { 0 } else { x / ppow[lo]}, lo)
55+
};
56+
57+
// Step 0
58+
let mut a: Vec<Vec<u64>> = vec![];
59+
let mut b = vec![];
60+
let mut a_new = vec![];
61+
let mut b_new = vec![];
62+
let mut theta = vec![0; e];
63+
let mut u = vec![0; e];
64+
for eta in 0..e {
65+
let p_eta = ppow[eta];
66+
a.push(vec![p_eta]);
67+
b.push(vec![]);
68+
a_new.push(vec![p_eta]);
69+
let p_eta_s0 = modmul_p_e(p_eta, s[0]);
70+
b_new.push(if p_eta_s0 == 0 { vec![] } else { vec![p_eta_s0] } );
71+
let c = modmul_p_e(s[0], p_eta);
72+
(theta[eta], u[eta]) = factor_by_p(c);
73+
}
74+
75+
// Step k
76+
let mut a_old: Vec<Vec<u64>> = vec![vec![]; e];
77+
let mut b_old: Vec<Vec<u64>> = vec![vec![]; e];
78+
let mut u_old = vec![0; e];
79+
let mut theta_old = vec![0; e];
80+
let mut r = vec![0; e];
81+
for k in 1..n {
82+
// Part 1
83+
for g in 0..e {
84+
if l(&a_new[g], &b_new[g]) > l(&a[g], &b[g]) {
85+
let h = e - 1 - u[g];
86+
a_old[g].clone_from(&a[h]);
87+
b_old[g].clone_from(&b[h]);
88+
u_old[g] = u[h];
89+
theta_old[g] = theta[h];
90+
r[g] = k - 1;
91+
}
92+
}
93+
// Part 2
94+
a.clone_from(&a_new);
95+
b.clone_from(&b_new);
96+
// Part 3
97+
for eta in 0..e {
98+
let mut c = modsub_p_e(0, if b[eta].len() > k { b[eta][k] } else { 0 });
99+
for j in 0..=k {
100+
if a[eta].len() > j {
101+
c = modadd_p_e(c, modmul_p_e(s[k - j], a[eta][j]));
102+
}
103+
}
104+
(theta[eta], u[eta]) = factor_by_p(c);
105+
if u[eta] == e {
106+
// Case I
107+
a_new[eta].clone_from(&a[eta]);
108+
b_new[eta].clone_from(&b[eta]);
109+
} else {
110+
// Case II
111+
let g = e - 1 - u[eta];
112+
if l(&a[g], &b[g]) == 0 {
113+
// Case IIa
114+
a_new[eta].clone_from(&a[eta]);
115+
let mut tmp = b[eta].clone();
116+
if tmp.len() <= k {
117+
tmp.resize(k + 1, 0);
118+
}
119+
tmp[k] = modadd_p_e(tmp[k], modmul_p_e(theta[eta], ppow[eta]));
120+
b_new[eta] = tmp;
121+
} else {
122+
// Case IIb
123+
let theta_g_old_inv = modinv_p_e(theta_old[g]);
124+
let m = modmul_p_e(theta[eta], theta_g_old_inv);
125+
let m = modmul_p_e(m, ppow[u[eta] - u_old[g]]);
126+
let d = k - r[g];
127+
let mut tmp = a[eta].clone();
128+
if tmp.len() < a_old[g].len() + d {
129+
tmp.resize(a_old[g].len() + d, 0);
130+
}
131+
for j in 0..a_old[g].len() {
132+
tmp[j + d] = modsub_p_e(tmp[j + d], modmul_p_e(m, a_old[g][j]));
133+
}
134+
while tmp.last() == Some(&0) {
135+
tmp.pop();
136+
}
137+
a_new[eta] = tmp;
138+
let mut tmp = b[eta].clone();
139+
if tmp.len() < b_old[g].len() + d {
140+
tmp.resize(b_old[g].len() + d, 0);
141+
}
142+
for j in 0..b_old[g].len() {
143+
tmp[j + d] = modsub_p_e(tmp[j + d], modmul_p_e(m, b_old[g][j]));
144+
}
145+
while tmp.last() == Some(&0) {
146+
tmp.pop();
147+
}
148+
b_new[eta] = tmp;
149+
}
150+
}
151+
}
152+
}
153+
154+
// Extract output
155+
let mut out = vec![];
156+
for i in 1..a_new[0].len() {
157+
out.push(modsub_p_e(0, a_new[0][i]));
158+
}
159+
out
160+
}
161+
162+
/// Finds a minimal length linear recurrence for `first_terms`
163+
/// under modulo `modulo`, via the Reeds-Sloane algorithm.
164+
///
165+
/// Note that `modulo` of `0` is interpreted as `2**64`.
166+
pub fn reeds_sloane(first_terms: &[u64], modulo: u64) -> Vec<u64> {
167+
if first_terms.len() <= 1 {
168+
return vec![];
169+
}
170+
if modulo == 1 {
171+
return vec![0];
172+
}
173+
if modulo == 0 {
174+
// We deal with 2**64 first, to ensure modulo > 0 below.
175+
return reeds_sloane_prime_power(first_terms, 2, 64);
176+
}
177+
178+
let factors = {
179+
let factors = factorize(modulo);
180+
let mut out = vec![];
181+
let (mut prev, mut cnt) = (0, 0usize);
182+
for f in factors {
183+
if f == prev {
184+
cnt += 1;
185+
} else {
186+
if prev != 0 {
187+
out.push((prev, cnt));
188+
}
189+
(prev, cnt) = (f, 1);
190+
}
191+
}
192+
if prev != 0 {
193+
out.push((prev, cnt));
194+
}
195+
out
196+
};
197+
let mut out_prime = vec![];
198+
let mut max_len = 0;
199+
for &(p, e) in factors.iter() {
200+
let val = reeds_sloane_prime_power(first_terms, p, e);
201+
max_len = max(val.len(), max_len);
202+
out_prime.push((val, p.pow(e as u32)));
203+
}
204+
for v in out_prime.iter_mut() {
205+
v.0.resize(max_len, 0);
206+
}
207+
let mut out = vec![0; max_len];
208+
let mut cumul_mod = 1;
209+
for (v, cur_mod) in out_prime {
210+
if cumul_mod == 1 {
211+
out = v;
212+
} else {
213+
let (p, q) = (cumul_mod, cur_mod);
214+
let (pinv, qinv) = (modinv(p, q).unwrap(), modinv(q, p).unwrap());
215+
for i in 0..max_len {
216+
// out[i] mod cumul_mod, v[i] mod cur_mod
217+
// No overflow since we have ensured p*q < 2**64
218+
let mp = modmul(out[i], qinv, p);
219+
let mq = modmul(v[i], pinv, q);
220+
out[i] = modadd(q * mp, p * mq, p * q);
221+
}
222+
}
223+
cumul_mod *= cur_mod;
224+
}
225+
out
226+
}
227+
228+
/// This function is an alias for the function `reeds_sloane`.
229+
///
230+
/// Finds a minimal length linear recurrence for `first_terms`
231+
/// under modulo `modulo`, via the Reeds-Sloane algorithm.
232+
///
233+
/// Note that `modulo` of `0` is interpreted as `2**64`.
234+
pub fn linear_fit(first_terms: &[u64], modulo: u64) -> Vec<u64> {
235+
reeds_sloane(first_terms, modulo)
236+
}
237+
238+
#[cfg(test)]
239+
mod test {
240+
use super::*;
241+
242+
#[test]
243+
fn check_reeds_sloane_fibosqsum() {
244+
let mut first_terms = [0, 1, 1, 2*2, 3*3, 5*5, 8*8, 13*13, 21*21, 34*34, 55*55, 89*89, 144*144, 233*233, 377*377, 610*610, 987*987, 1597*1597];
245+
for i in 1..first_terms.len() {
246+
first_terms[i] += first_terms[i - 1];
247+
}
248+
let modulo = 1_000_000_007;
249+
let coeff = linear_fit(&first_terms, modulo);
250+
assert!(coeff == vec![2, 2, modulo - 1]);
251+
let coeff = linear_fit(&first_terms, 0);
252+
assert!(coeff == vec![2, 2, u64::MAX]);
253+
}
254+
255+
#[test]
256+
fn check_reeds_sloane_example_in_paper() {
257+
let first_terms = [6, 3, 1, 5, 6];
258+
let modulo = 9;
259+
let coeff = linear_fit(&first_terms, modulo);
260+
assert!(coeff == vec![5, 2, 8]);
261+
}
262+
}

0 commit comments

Comments
 (0)