From 2e44f6742eaf9c84fad9fe1c4dc883ee138a3be5 Mon Sep 17 00:00:00 2001 From: Xander van der Goot Date: Mon, 15 Dec 2025 19:29:20 +0800 Subject: [PATCH 01/12] M31: init --- Cargo.toml | 2 +- cm31/Cargo.toml | 14 ++++++++ cm31/src/lib.rs | 69 ++++++++++++++++++++++++++++++++++++ cm31/src/main.rs | 14 ++++++++ cm31/src/u32.rs | 92 ++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 190 insertions(+), 1 deletion(-) create mode 100644 cm31/Cargo.toml create mode 100644 cm31/src/lib.rs create mode 100644 cm31/src/main.rs create mode 100644 cm31/src/u32.rs diff --git a/Cargo.toml b/Cargo.toml index 72dfd4db..71e5202b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,7 @@ members = [ "tooling/provekit-bench", "tooling/provekit-gnark", "tooling/verifier-server", - "ntt", + "ntt", "cm31", ] exclude = [ "playground/passport-input-gen", diff --git a/cm31/Cargo.toml b/cm31/Cargo.toml new file mode 100644 index 00000000..465dc292 --- /dev/null +++ b/cm31/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "cm31" +version = "0.1.0" +edition.workspace = true +rust-version.workspace = true +authors.workspace = true +license.workspace = true +homepage.workspace = true +repository.workspace = true + +[dependencies] + +[lints] +workspace = true diff --git a/cm31/src/lib.rs b/cm31/src/lib.rs new file mode 100644 index 00000000..b2cceb27 --- /dev/null +++ b/cm31/src/lib.rs @@ -0,0 +1,69 @@ +use std::ops::{Add, Mul, Neg, Shl, Sub}; + +struct M31I(T); + +impl> Add> for M31I { + type Output = M31I; + + fn add(self, rhs: M31I) -> Self::Output { + Self(self.0 + rhs.0.into()) + } +} + +trait Reduce { + fn reduce_u32(self) -> M31I; +} + +impl Reduce for M31I { + // Assumes bits(hi) < 31 + // Safety: this should only be called on M31I whose range is [61:0] with + // the top bits zero. + // The only way the upper bits can be set is due to additions. After a + // multiplication will go to 34 bits. + fn reduce_u32(self) -> M31I { + // TODO add debug assertion for top bits + // these operations are only intended for kernel + let reduced = reduce(self.0); + M31I(reduced as u32) + } +} + +impl Reduce for M31I { + #[inline] + fn reduce_u32(self) -> M31I { + self + } +} + +// Output bits +// max(lo, hi) + 1 +// where lo max 31 bits and hi max 33 bits +// so maximum is 34 bits +fn reduce(r: u64) -> u64 { + // What are the precedence rules for shift? + // Three cheap operations mask, shift, addition + let lo = r & ((1 << 31) - 1); + let hi = r >> 31; + hi + lo +} + +// Might not be the right type for the multiplication in a kernel. +impl Mul for M31I { + type Output = M31I; + + fn mul(self, rhs: T) -> Self::Output { + let lhs = self.reduce_u32(); + let rhs = rhs.reduce_u32(); + let res = lhs.0 as u64 * rhs.0 as u64; + // After reduction 34 bits + M31I(reduce(res)) + } +} + +impl Sub for M31I { + type Output = M31I; + + fn sub(self, rhs: Self) -> Self::Output { + todo!() + } +} diff --git a/cm31/src/main.rs b/cm31/src/main.rs new file mode 100644 index 00000000..059d6a3a --- /dev/null +++ b/cm31/src/main.rs @@ -0,0 +1,14 @@ +// Type your code here, or load an example. + +// As of Rust 1.75, small functions are automatically +// marked as `#[inline]` so they will not show up in +// the output when compiling with optimisations. Use +// `#[no_mangle]` or `#[inline(never)]` to work around +// this issue. +// See https://github.com/compiler-explorer/compiler-explorer/issues/5939 +use std::hint::black_box; +// If you use `main()`, declare it as `pub` to see it in the output: +pub fn main() { + let (lo, hi) = (u32::carrying_mul_add(black_box(5), black_box(6), 0, 0)); + let _res = black_box(2 * hi + lo); +} diff --git a/cm31/src/u32.rs b/cm31/src/u32.rs new file mode 100644 index 00000000..e976984b --- /dev/null +++ b/cm31/src/u32.rs @@ -0,0 +1,92 @@ +const P3: u64 = 3 * (1 << 31 - 1); + +// Tag can also be the internal type +// 64 byte, 32 byte, 31 byte, 31 byte zero check. De laatste twee is +// waarschijnlijk geen interessant ondersheid Aan de andere kant lijkt het geen +struct M31(u32); + +// Introduce tag to track in which state the M31 is? +// State +impl M31 { + // Needs to return a value if it's going to be tagged with the internal state. + fn reduce_u31(&mut self) { + let tmp = self.0; + // TODO: mask + let tmp = tmp >> 31 + ((tmp << 1) >> 1); + let tmp = tmp >> 31 + ((tmp << 1) >> 1); + self.0 = tmp; + } +} + +impl Add for M31 { + type Output = M31; + + // 32 bit sized because otherwise there might be a masking operations + // while adds W will zero it out. + #[inline] + fn add(self, rhs: Self) -> Self::Output { + // Different ways of presenting it + // 32 bytes + // 64 bytes + // adds w + let (tmp, o) = self.0.overflowing_add(rhs.0); + // can't use adcs as it needs to be shifted first + // so 64 bit might be better as it doesn't require moving the carry to another + // register. + // Interested in how the compiler takes this? + let (tmp, o) = tmp.overflowing_add(2 * o as u32); + let out = tmp + 2 * o as u32; + + Self(out) + } +} + +impl Sub for M31 { + type Output = M31; + + fn sub(self, rhs: Self) -> Self::Output { + // 33 bits + 32 bits -> 34 bits + let tmp = P3 + self.0 as u64 - rhs.0 as u64; + // 34 bits -> 32 bits + let hi = (tmp >> 32) as u32; + let lo = tmp as u32; + Self(2 * hi + lo) + } +} + +impl Neg for M31 { + type Output = M31; + + fn neg(self) -> Self::Output { + let tmp = P3 - self.0 as u64; + let hi = (tmp >> 32) as u32; + let lo = tmp as u32; + Self(2 * hi + lo) + } +} + +impl Mul for M31 { + type Output = M31; + + fn mul(self, rhs: Self) -> Self::Output { + let (lo, hi) = u32::carrying_mul_add(self.0, rhs.0, 0, 0); + // shift addition + // shift, shift, addition + // 64bits -> 34 bits + // 2 * hi -> 33 bits; 33 bits + 32 bits -> 34 bits + let tmp = 2 * hi as u64 + lo as u64; + // 34 bits -> 32 bits + let hi = (tmp >> 32) as u32; + let lo = tmp as u32; + Self(2 * hi + lo) + } +} + +impl Shl for M31 { + type Output = M31; + + fn shl(mut self, rhs: u8) -> Self::Output { + self.reduce_u31(); + todo!(); + } +} From 8eb50e99924e95e748c471fd1a419f0ba5a5a0c6 Mon Sep 17 00:00:00 2001 From: Xander van der Goot Date: Tue, 16 Dec 2025 10:36:13 +0800 Subject: [PATCH 02/12] M31: signed integers support --- cm31/src/lib.rs | 128 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 105 insertions(+), 23 deletions(-) diff --git a/cm31/src/lib.rs b/cm31/src/lib.rs index b2cceb27..d4281c53 100644 --- a/cm31/src/lib.rs +++ b/cm31/src/lib.rs @@ -1,58 +1,132 @@ use std::ops::{Add, Mul, Neg, Shl, Sub}; +#[derive(Copy, Clone)] struct M31I(T); -impl> Add> for M31I { - type Output = M31I; - - fn add(self, rhs: M31I) -> Self::Output { - Self(self.0 + rhs.0.into()) +trait Reduce { + fn reduce_u32(&self) -> M31I; + // Assumes >= 62 bits representation, "enforced" by the allowed trait + // implementations. And also not being i32. + fn full_reduce(&self) -> u32 { + let tmp = self.reduce_u32().0; + let tmp = (tmp >> 31) + (tmp & ((1 << 31) - 1)); + let tmp = (tmp >> 31) + (tmp & ((1 << 31) - 1)); + // branch should become CSEL + if tmp == ((1 << 31) - 1) { + 0 + } else { + tmp + } } } -trait Reduce { - fn reduce_u32(self) -> M31I; -} +// Working with two redundant forms i64 and u32 +// i64 to deal with subtractions +// u32 for storage, i32 is not interesting as we want to be able to add two 31 +// bits values together. i32 would only allow for a subtraction. +// internally u64 is also used + +// For unsigned value smaller than 61 bits conversion to i64 is problem free -impl Reduce for M31I { +// i signifies two complement form +// u signifies valid one complement form. +// for unsigned integers two complement and one complement is the same. Which +// means that conversion back and forth can be done. Whenever i is used we can +// expect the value to be negative so it first needs to be converted to +// one-complement form before reduction. That is why u64 exist internally. And +// why after reduction we move back to i64 again. +// +// i32 signifies fully reduced but that should maybe not be inside a M31, but +// just be a i32 return. i32 signifying that the upper bit is not used. + +impl Reduce for M31I { // Assumes bits(hi) < 31 // Safety: this should only be called on M31I whose range is [61:0] with // the top bits zero. // The only way the upper bits can be set is due to additions. After a // multiplication will go to 34 bits. - fn reduce_u32(self) -> M31I { + fn reduce_u32(&self) -> M31I { // TODO add debug assertion for top bits // these operations are only intended for kernel - let reduced = reduce(self.0); - M31I(reduced as u32) + let reduced = reduce(one_complement(self.0)) as u32; + M31I(reduced) } } impl Reduce for M31I { #[inline] - fn reduce_u32(self) -> M31I { - self + fn reduce_u32(&self) -> M31I { + *self } } +// switching between i64 and u64 because of the sign shift. + +// After multiplication this can use the full range. +// When coming from one complement this is u32. // Output bits // max(lo, hi) + 1 // where lo max 31 bits and hi max 33 bits // so maximum is 34 bits -fn reduce(r: u64) -> u64 { +fn reduce(r: u64) -> i64 { // What are the precedence rules for shift? // Three cheap operations mask, shift, addition let lo = r & ((1 << 31) - 1); let hi = r >> 31; - hi + lo + (hi + lo) as i64 +} + +// Other option is to keep everything in unsigned and only do on complement on +// subtraction and negation. That however would requiring three operations. not, +// and, add instead of just sub. + +// i64 signed representation taking full 64 bits +// u64 one complement representation. By going to one complement we have a valid +// mersenne number. So it actually does two things. +// It can potentially take up 64 bits but those will all be over. It works +// because we work on a Once in one complement form you can go back and forward +// between the two representations. But not when it's in two complement. + +// limit to 62 bits otherwise two reduction steps need to be done. +fn one_complement(r: i64) -> u64 { + // A shift of 61 and 62 might look strange at first, but that is because in + // contrary to other shifting operations there is no splitting into an upper + // and lower part. It's just for checking the MSB and perform an operation + // basedon that. + + // Would checking the N-flag + // be better? Does the compiler do this optimisation? + let sign = r >> 61; + // Turn into one complement and clear out the top two bits. These top bits would + // lead to wrong calculations as we only do not contain any information. + ((r + sign) & ((1 << 62) - 1)) as u64 } -// Might not be the right type for the multiplication in a kernel. -impl Mul for M31I { - type Output = M31I; +// The following traits are to make working with M31 across different +// representation easier. +// On ARM64 conversion from u32 to i64 is free as a u32 already takes up a full +// 64 bit register. +// Monomorphisation should optimise out when the reductions is +// not needed. + +// M31I and M31I are split up becaues rust otherwise requires a dyn +// which would break monomorphisation. +impl Mul for M31I { + type Output = M31I; fn mul(self, rhs: T) -> Self::Output { let lhs = self.reduce_u32(); + lhs * rhs + } +} + +impl Mul for M31I { + type Output = M31I; + + // Mul returns a partial reduced to 34 bits as to only have to do a single + // round. + fn mul(self, rhs: T) -> Self::Output { + let lhs = self; let rhs = rhs.reduce_u32(); let res = lhs.0 as u64 * rhs.0 as u64; // After reduction 34 bits @@ -60,10 +134,18 @@ impl Mul for M31I { } } -impl Sub for M31I { - type Output = M31I; +impl, K: Into> Sub> for M31I { + type Output = M31I; + + fn sub(self, rhs: M31I) -> Self::Output { + M31I(self.0.into() - rhs.0.into()) + } +} + +impl, K: Into> Add> for M31I { + type Output = M31I; - fn sub(self, rhs: Self) -> Self::Output { - todo!() + fn add(self, rhs: M31I) -> Self::Output { + M31I(self.0.into() + rhs.0.into()) } } From 25048cb6c275f4c633997c358d5c764a56240acd Mon Sep 17 00:00:00 2001 From: Xander van der Goot Date: Tue, 16 Dec 2025 13:38:55 +0800 Subject: [PATCH 03/12] M31: Add proptest and kani --- cm31/Cargo.toml | 3 +++ cm31/src/lib.rs | 67 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/cm31/Cargo.toml b/cm31/Cargo.toml index 465dc292..b6667d48 100644 --- a/cm31/Cargo.toml +++ b/cm31/Cargo.toml @@ -10,5 +10,8 @@ repository.workspace = true [dependencies] +[dev-dependencies] +proptest.workspace = true + [lints] workspace = true diff --git a/cm31/src/lib.rs b/cm31/src/lib.rs index d4281c53..b093560b 100644 --- a/cm31/src/lib.rs +++ b/cm31/src/lib.rs @@ -149,3 +149,70 @@ impl, K: Into> Add> for M31I { M31I(self.0.into() + rhs.0.into()) } } + +#[cfg(test)] +mod tests { + use { + crate::{reduce, Reduce, M31I}, + proptest::prelude::*, + }; + proptest! { + #[test] + fn reduce_u64(x: u64){ + assert_eq!(x.rem_euclid((1<<31) - 1) as u32, M31I(reduce(x)).full_reduce()) + } + } + proptest! { + // Use Kani + fn reduce_i64(x: i64){ + assert_eq!(x.rem_euclid((1<<31) - 1) as u32, M31I(x).full_reduce()) + } + } + proptest! { + fn reduce_u32(x: u32){ + assert_eq!(x.rem_euclid((1<<31) - 1) as u32, M31I(x).full_reduce()) + } + } +} + +// Takes a 62bit number and sign extends it into a valid i64 +fn sign_extend(x: u64) -> i64 { + let sign = x >> 61; + let sign = sign << 2 | sign << 1 | sign; + ((sign << 61) | x) as i64 +} + +#[kani::proof] +fn reduce_i64() { + let x: u64 = kani::any::() & ((1 << 61) - 1); + let x = sign_extend(x); + assert_eq!(x.rem_euclid((1 << 31) - 1) as u32, M31I(x).full_reduce()) +} + +// Checks the reduction inside the multiplier +#[kani::proof] +fn reduce_u64() { + let x: u64 = kani::any::(); + assert_eq!( + x.rem_euclid((1 << 31) - 1) as u32, + M31I(reduce(x)).full_reduce() + ) +} + +// TOO slow +// #[kani::proof] +// fn reduce_mul_u32() { +// let x: u32 = kani::any::(); +// let y: u32 = kani::any::(); +// let z = (x as u64) * (y as u64); +// assert_eq!( +// z.rem_euclid((1 << 31) - 1) as u32, +// (M31I(x) * M31I(y)).full_reduce() +// ) +// } + +#[kani::proof] +fn reduce_u32() { + let x = kani::any::(); + assert_eq!(x.rem_euclid((1 << 31) - 1) as u32, M31I(x).full_reduce()) +} From 4412ddeb04bef24e9351134c3b5acdce4640a786 Mon Sep 17 00:00:00 2001 From: Xander van der Goot Date: Tue, 16 Dec 2025 13:57:46 +0800 Subject: [PATCH 04/12] cm31: remove unnecessary reduction in i64 --- cm31/src/lib.rs | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/cm31/src/lib.rs b/cm31/src/lib.rs index b093560b..08bafdf0 100644 --- a/cm31/src/lib.rs +++ b/cm31/src/lib.rs @@ -7,17 +7,7 @@ trait Reduce { fn reduce_u32(&self) -> M31I; // Assumes >= 62 bits representation, "enforced" by the allowed trait // implementations. And also not being i32. - fn full_reduce(&self) -> u32 { - let tmp = self.reduce_u32().0; - let tmp = (tmp >> 31) + (tmp & ((1 << 31) - 1)); - let tmp = (tmp >> 31) + (tmp & ((1 << 31) - 1)); - // branch should become CSEL - if tmp == ((1 << 31) - 1) { - 0 - } else { - tmp - } - } + fn full_reduce(&self) -> u32; } // Working with two redundant forms i64 and u32 @@ -51,6 +41,17 @@ impl Reduce for M31I { let reduced = reduce(one_complement(self.0)) as u32; M31I(reduced) } + + fn full_reduce(&self) -> u32 { + let tmp = self.reduce_u32().0; + let tmp = (tmp >> 31) + (tmp & ((1 << 31) - 1)); + // branch should become CSEL + if tmp == ((1 << 31) - 1) { + 0 + } else { + tmp + } + } } impl Reduce for M31I { @@ -58,6 +59,18 @@ impl Reduce for M31I { fn reduce_u32(&self) -> M31I { *self } + + fn full_reduce(&self) -> u32 { + let tmp = self.0; + let tmp = (tmp >> 31) + (tmp & ((1 << 31) - 1)); + let tmp = (tmp >> 31) + (tmp & ((1 << 31) - 1)); + // branch should become CSEL + if tmp == ((1 << 31) - 1) { + 0 + } else { + tmp + } + } } // switching between i64 and u64 because of the sign shift. From b356893d96533200b7830701e32bfc2558939371 Mon Sep 17 00:00:00 2001 From: Xander van der Goot Date: Tue, 16 Dec 2025 14:27:17 +0800 Subject: [PATCH 05/12] m31: remove proptest kani provides better coverage than proptest --- cm31/Cargo.toml | 1 - cm31/src/lib.rs | 97 ++++++++++++++++++------------------------------- 2 files changed, 36 insertions(+), 62 deletions(-) diff --git a/cm31/Cargo.toml b/cm31/Cargo.toml index b6667d48..dee4cafc 100644 --- a/cm31/Cargo.toml +++ b/cm31/Cargo.toml @@ -11,7 +11,6 @@ repository.workspace = true [dependencies] [dev-dependencies] -proptest.workspace = true [lints] workspace = true diff --git a/cm31/src/lib.rs b/cm31/src/lib.rs index 08bafdf0..cf6cd720 100644 --- a/cm31/src/lib.rs +++ b/cm31/src/lib.rs @@ -1,3 +1,4 @@ +/// When modifying this file, rerun with Kani use std::ops::{Add, Mul, Neg, Shl, Sub}; #[derive(Copy, Clone)] @@ -43,6 +44,7 @@ impl Reduce for M31I { } fn full_reduce(&self) -> u32 { + // Only need two rounds to fully reduce down let tmp = self.reduce_u32().0; let tmp = (tmp >> 31) + (tmp & ((1 << 31) - 1)); // branch should become CSEL @@ -54,6 +56,8 @@ impl Reduce for M31I { } } +// u32 is mostly for storage. Staying within u32 has no benefit on 64 bit +// machines impl Reduce for M31I { #[inline] fn reduce_u32(&self) -> M31I { @@ -109,6 +113,7 @@ fn one_complement(r: i64) -> u64 { // Would checking the N-flag // be better? Does the compiler do this optimisation? + // Relies on the *arithmic* shift left to maintain the sign. let sign = r >> 61; // Turn into one complement and clear out the top two bits. These top bits would // lead to wrong calculations as we only do not contain any information. @@ -163,69 +168,39 @@ impl, K: Into> Add> for M31I { } } -#[cfg(test)] -mod tests { - use { - crate::{reduce, Reduce, M31I}, - proptest::prelude::*, - }; - proptest! { - #[test] - fn reduce_u64(x: u64){ - assert_eq!(x.rem_euclid((1<<31) - 1) as u32, M31I(reduce(x)).full_reduce()) - } - } - proptest! { - // Use Kani - fn reduce_i64(x: i64){ - assert_eq!(x.rem_euclid((1<<31) - 1) as u32, M31I(x).full_reduce()) - } +#[cfg(kani)] +mod verification { + use crate::{reduce, Reduce, M31I}; + // Takes a 62bit number and sign extends it into a valid i64 + fn sign_extend(x: u64) -> i64 { + let sign = x >> 61; + let sign = sign << 2 | sign << 1 | sign; + ((sign << 61) | x) as i64 } - proptest! { - fn reduce_u32(x: u32){ - assert_eq!(x.rem_euclid((1<<31) - 1) as u32, M31I(x).full_reduce()) - } - } -} -// Takes a 62bit number and sign extends it into a valid i64 -fn sign_extend(x: u64) -> i64 { - let sign = x >> 61; - let sign = sign << 2 | sign << 1 | sign; - ((sign << 61) | x) as i64 -} - -#[kani::proof] -fn reduce_i64() { - let x: u64 = kani::any::() & ((1 << 61) - 1); - let x = sign_extend(x); - assert_eq!(x.rem_euclid((1 << 31) - 1) as u32, M31I(x).full_reduce()) -} + #[kani::proof] + fn reduce_i64() { + let x: u64 = kani::any::() & ((1 << 62) - 1); + let x = sign_extend(x); + assert_eq!(x.rem_euclid((1 << 31) - 1) as u32, M31I(x).full_reduce()) + } -// Checks the reduction inside the multiplier -#[kani::proof] -fn reduce_u64() { - let x: u64 = kani::any::(); - assert_eq!( - x.rem_euclid((1 << 31) - 1) as u32, - M31I(reduce(x)).full_reduce() - ) -} + // The proof for the multiplier is too slow. Therefore we model check the inner + // part. The input slightly larger than what it would be within the multiplier. + #[kani::proof] + fn reduce_u64() { + let x: u64 = kani::any::(); + assert_eq!( + x.rem_euclid((1 << 31) - 1) as u32, + M31I(reduce(x)).full_reduce() + ) + } -// TOO slow -// #[kani::proof] -// fn reduce_mul_u32() { -// let x: u32 = kani::any::(); -// let y: u32 = kani::any::(); -// let z = (x as u64) * (y as u64); -// assert_eq!( -// z.rem_euclid((1 << 31) - 1) as u32, -// (M31I(x) * M31I(y)).full_reduce() -// ) -// } - -#[kani::proof] -fn reduce_u32() { - let x = kani::any::(); - assert_eq!(x.rem_euclid((1 << 31) - 1) as u32, M31I(x).full_reduce()) + // Checking the reduction of i64 is not enough as it will not cover the full + // range of u32. Other libraries use + #[kani::proof] + fn reduce_u32() { + let x = kani::any::(); + assert_eq!(x.rem_euclid((1 << 31) - 1) as u32, M31I(x).full_reduce()) + } } From 4b585f48fd4f3032fbd5e0b6b2e16ff59dbc8c51 Mon Sep 17 00:00:00 2001 From: Xander van der Goot Date: Tue, 16 Dec 2025 15:11:04 +0800 Subject: [PATCH 06/12] m31: Add left shift --- cm31/src/lib.rs | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/cm31/src/lib.rs b/cm31/src/lib.rs index cf6cd720..43040fdb 100644 --- a/cm31/src/lib.rs +++ b/cm31/src/lib.rs @@ -36,6 +36,7 @@ impl Reduce for M31I { // the top bits zero. // The only way the upper bits can be set is due to additions. After a // multiplication will go to 34 bits. + #[inline(always)] fn reduce_u32(&self) -> M31I { // TODO add debug assertion for top bits // these operations are only intended for kernel @@ -43,6 +44,7 @@ impl Reduce for M31I { M31I(reduced) } + #[inline(always)] fn full_reduce(&self) -> u32 { // Only need two rounds to fully reduce down let tmp = self.reduce_u32().0; @@ -59,11 +61,12 @@ impl Reduce for M31I { // u32 is mostly for storage. Staying within u32 has no benefit on 64 bit // machines impl Reduce for M31I { - #[inline] + #[inline(always)] fn reduce_u32(&self) -> M31I { *self } + #[inline(always)] fn full_reduce(&self) -> u32 { let tmp = self.0; let tmp = (tmp >> 31) + (tmp & ((1 << 31) - 1)); @@ -85,6 +88,7 @@ impl Reduce for M31I { // max(lo, hi) + 1 // where lo max 31 bits and hi max 33 bits // so maximum is 34 bits +#[inline(always)] fn reduce(r: u64) -> i64 { // What are the precedence rules for shift? // Three cheap operations mask, shift, addition @@ -105,6 +109,7 @@ fn reduce(r: u64) -> i64 { // between the two representations. But not when it's in two complement. // limit to 62 bits otherwise two reduction steps need to be done. +// implicitly says that the result we take in is two complement fn one_complement(r: i64) -> u64 { // A shift of 61 and 62 might look strange at first, but that is because in // contrary to other shifting operations there is no splitting into an upper @@ -132,6 +137,7 @@ fn one_complement(r: i64) -> u64 { impl Mul for M31I { type Output = M31I; + #[inline(always)] fn mul(self, rhs: T) -> Self::Output { let lhs = self.reduce_u32(); lhs * rhs @@ -143,6 +149,7 @@ impl Mul for M31I { // Mul returns a partial reduced to 34 bits as to only have to do a single // round. + #[inline(always)] fn mul(self, rhs: T) -> Self::Output { let lhs = self; let rhs = rhs.reduce_u32(); @@ -168,6 +175,15 @@ impl, K: Into> Add> for M31I { } } +// TODO implement shift left +impl> Shl for M31I { + type Output = M31I; + + fn shl(self, rhs: u8) -> Self::Output { + M31I(self.0.into() << rhs) + } +} + #[cfg(kani)] mod verification { use crate::{reduce, Reduce, M31I}; From 1200733d500eb6951813b9754bda93707de28f42 Mon Sep 17 00:00:00 2001 From: Xander van der Goot Date: Tue, 16 Dec 2025 18:21:58 +0800 Subject: [PATCH 07/12] m31: Add documentation --- cm31/src/lib.rs | 195 ++++++++++++++++++------------------------------ 1 file changed, 72 insertions(+), 123 deletions(-) diff --git a/cm31/src/lib.rs b/cm31/src/lib.rs index 43040fdb..48d4b6eb 100644 --- a/cm31/src/lib.rs +++ b/cm31/src/lib.rs @@ -1,52 +1,35 @@ -/// When modifying this file, rerun with Kani -use std::ops::{Add, Mul, Neg, Shl, Sub}; +/// Safety: When modifying this file, rerun with Kani +use std::ops::{Add, Mul, Shl, Sub}; #[derive(Copy, Clone)] -struct M31I(T); +pub struct M31(T); -trait Reduce { - fn reduce_u32(&self) -> M31I; - // Assumes >= 62 bits representation, "enforced" by the allowed trait - // implementations. And also not being i32. - fn full_reduce(&self) -> u32; +trait M31Reduce { + fn reduce_u32(self) -> M31; + fn full_reduce(self) -> u32; } -// Working with two redundant forms i64 and u32 -// i64 to deal with subtractions -// u32 for storage, i32 is not interesting as we want to be able to add two 31 -// bits values together. i32 would only allow for a subtraction. -// internally u64 is also used - -// For unsigned value smaller than 61 bits conversion to i64 is problem free - -// i signifies two complement form -// u signifies valid one complement form. -// for unsigned integers two complement and one complement is the same. Which -// means that conversion back and forth can be done. Whenever i is used we can -// expect the value to be negative so it first needs to be converted to -// one-complement form before reduction. That is why u64 exist internally. And -// why after reduction we move back to i64 again. -// -// i32 signifies fully reduced but that should maybe not be inside a M31, but -// just be a i32 return. i32 signifying that the upper bit is not used. - -impl Reduce for M31I { - // Assumes bits(hi) < 31 - // Safety: this should only be called on M31I whose range is [61:0] with - // the top bits zero. - // The only way the upper bits can be set is due to additions. After a - // multiplication will go to 34 bits. +/// Code operating on M31 integers will mostly be operation on i62 (i64). +/// +/// All operations assume that i64 works is being treated as a i62 integer. No +/// newtype wrapper for i62 has been introduced to keep boilerplate to a +/// minimum. It is up to the user to make sure that the signed results stays +/// within 62 bits. As most operations will never reach 62 bits and using the +/// upper two bits would require an extra reduction round. +/// +/// One way to think of it is that the upper three bits act as the sign but and +/// therefore can not be different for each other. +/// This also allows for efficient implementation on 64 bit register machines +impl M31Reduce for i64 { #[inline(always)] - fn reduce_u32(&self) -> M31I { - // TODO add debug assertion for top bits - // these operations are only intended for kernel - let reduced = reduce(one_complement(self.0)) as u32; - M31I(reduced) + fn reduce_u32(self) -> M31 { + let reduced = reduce_round(one_complement(self)) as u32; + M31(reduced) } #[inline(always)] - fn full_reduce(&self) -> u32 { - // Only need two rounds to fully reduce down + fn full_reduce(self) -> u32 { + // Two rounds to fully reduce down let tmp = self.reduce_u32().0; let tmp = (tmp >> 31) + (tmp & ((1 << 31) - 1)); // branch should become CSEL @@ -58,17 +41,19 @@ impl Reduce for M31I { } } -// u32 is mostly for storage. Staying within u32 has no benefit on 64 bit -// machines -impl Reduce for M31I { +/// From an end user perspective u32 is for storage as staying within u32 has no +/// benefit on 64 bit machines. Internally it is used to reduce the operands +/// before multiplition such that only a single multiplication has to be done on +/// 64 bit machines. +impl M31Reduce for u32 { #[inline(always)] - fn reduce_u32(&self) -> M31I { - *self + fn reduce_u32(self) -> M31 { + M31(self) } #[inline(always)] - fn full_reduce(&self) -> u32 { - let tmp = self.0; + fn full_reduce(self) -> u32 { + let tmp = self; let tmp = (tmp >> 31) + (tmp & ((1 << 31) - 1)); let tmp = (tmp >> 31) + (tmp & ((1 << 31) - 1)); // branch should become CSEL @@ -80,113 +65,77 @@ impl Reduce for M31I { } } -// switching between i64 and u64 because of the sign shift. - -// After multiplication this can use the full range. -// When coming from one complement this is u32. -// Output bits -// max(lo, hi) + 1 -// where lo max 31 bits and hi max 33 bits -// so maximum is 34 bits +/// Performs a single reduction round +/// +/// Up to the caller to keep track of how many bits are actualy reduced. #[inline(always)] -fn reduce(r: u64) -> i64 { - // What are the precedence rules for shift? - // Three cheap operations mask, shift, addition +fn reduce_round(r: u64) -> i64 { let lo = r & ((1 << 31) - 1); let hi = r >> 31; (hi + lo) as i64 } -// Other option is to keep everything in unsigned and only do on complement on -// subtraction and negation. That however would requiring three operations. not, -// and, add instead of just sub. - -// i64 signed representation taking full 64 bits -// u64 one complement representation. By going to one complement we have a valid -// mersenne number. So it actually does two things. -// It can potentially take up 64 bits but those will all be over. It works -// because we work on a Once in one complement form you can go back and forward -// between the two representations. But not when it's in two complement. - -// limit to 62 bits otherwise two reduction steps need to be done. -// implicitly says that the result we take in is two complement +/// Assumes that the input fits in i62 +/// +/// By taking the one complement we can make use of the correspondence between +/// one complement and numbers modulo Mersenne numbers. Another way of seeing +/// this is going from signed 62 bit value to a unsigned 62 bit number +/// hence the type change. fn one_complement(r: i64) -> u64 { - // A shift of 61 and 62 might look strange at first, but that is because in - // contrary to other shifting operations there is no splitting into an upper - // and lower part. It's just for checking the MSB and perform an operation - // basedon that. - - // Would checking the N-flag - // be better? Does the compiler do this optimisation? - // Relies on the *arithmic* shift left to maintain the sign. + // Relies on the *arithmic* shift right to extend the three bit sign bits to an + // i64. let sign = r >> 61; - // Turn into one complement and clear out the top two bits. These top bits would - // lead to wrong calculations as we only do not contain any information. + // Use the sign information to turn into one complement and clear out the + // redudant sign bits as these would lead to an overcorrection. ((r + sign) & ((1 << 62) - 1)) as u64 } -// The following traits are to make working with M31 across different -// representation easier. -// On ARM64 conversion from u32 to i64 is free as a u32 already takes up a full -// 64 bit register. -// Monomorphisation should optimise out when the reductions is -// not needed. +// The following traits reduce the boiler plate when working with M31 by taking +// on some of the widening and reducing required. -// M31I and M31I are split up becaues rust otherwise requires a dyn -// which would break monomorphisation. -impl Mul for M31I { - type Output = M31I; +impl Mul> for M31 { + type Output = M31; #[inline(always)] - fn mul(self, rhs: T) -> Self::Output { - let lhs = self.reduce_u32(); - lhs * rhs - } -} - -impl Mul for M31I { - type Output = M31I; - - // Mul returns a partial reduced to 34 bits as to only have to do a single - // round. - #[inline(always)] - fn mul(self, rhs: T) -> Self::Output { - let lhs = self; - let rhs = rhs.reduce_u32(); + fn mul(self, rhs: M31) -> Self::Output { + let lhs = self.0.reduce_u32(); + let rhs = rhs.0.reduce_u32(); let res = lhs.0 as u64 * rhs.0 as u64; // After reduction 34 bits - M31I(reduce(res)) + M31(reduce_round(res)) } } -impl, K: Into> Sub> for M31I { - type Output = M31I; +impl, K: Into> Sub> for M31 { + type Output = M31; - fn sub(self, rhs: M31I) -> Self::Output { - M31I(self.0.into() - rhs.0.into()) + fn sub(self, rhs: M31) -> Self::Output { + M31(self.0.into() - rhs.0.into()) } } -impl, K: Into> Add> for M31I { - type Output = M31I; +impl, K: Into> Add> for M31 { + type Output = M31; - fn add(self, rhs: M31I) -> Self::Output { - M31I(self.0.into() + rhs.0.into()) + fn add(self, rhs: M31) -> Self::Output { + M31(self.0.into() + rhs.0.into()) } } // TODO implement shift left -impl> Shl for M31I { - type Output = M31I; - +impl> Shl for M31 { + type Output = M31; + // A fully reduced m31 has space for two 15 left shifts. So in practice there + // might only be space for one left shift. In that case adding a reduction + // might be the right thing to do. fn shl(self, rhs: u8) -> Self::Output { - M31I(self.0.into() << rhs) + M31(self.0.into() << rhs) } } #[cfg(kani)] mod verification { - use crate::{reduce, Reduce, M31I}; + use crate::{reduce_round, M31Reduce, M31}; // Takes a 62bit number and sign extends it into a valid i64 fn sign_extend(x: u64) -> i64 { let sign = x >> 61; @@ -198,7 +147,7 @@ mod verification { fn reduce_i64() { let x: u64 = kani::any::() & ((1 << 62) - 1); let x = sign_extend(x); - assert_eq!(x.rem_euclid((1 << 31) - 1) as u32, M31I(x).full_reduce()) + assert_eq!(x.rem_euclid((1 << 31) - 1) as u32, x.full_reduce()) } // The proof for the multiplier is too slow. Therefore we model check the inner @@ -208,7 +157,7 @@ mod verification { let x: u64 = kani::any::(); assert_eq!( x.rem_euclid((1 << 31) - 1) as u32, - M31I(reduce(x)).full_reduce() + reduce_round(x).full_reduce() ) } @@ -217,6 +166,6 @@ mod verification { #[kani::proof] fn reduce_u32() { let x = kani::any::(); - assert_eq!(x.rem_euclid((1 << 31) - 1) as u32, M31I(x).full_reduce()) + assert_eq!(x.rem_euclid((1 << 31) - 1) as u32, x.full_reduce()) } } From f7f822d5ea5ea01816f37e4aa756e87e6101c53a Mon Sep 17 00:00:00 2001 From: Xander van der Goot Date: Wed, 17 Dec 2025 10:22:53 +0800 Subject: [PATCH 08/12] cm31: Kani lint in Cargo --- Cargo.toml | 5 ++- cm31/src/lib.rs | 40 +++++++++++---------- cm31/src/main.rs | 14 -------- cm31/src/u32.rs | 92 ------------------------------------------------ 4 files changed, 26 insertions(+), 125 deletions(-) delete mode 100644 cm31/src/main.rs delete mode 100644 cm31/src/u32.rs diff --git a/Cargo.toml b/Cargo.toml index 71e5202b..07928ec4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,6 +41,9 @@ license = "MIT" homepage = "https://github.com/worldfnd/ProveKit" repository = "https://github.com/worldfnd/ProveKit" +[workspace.lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(kani)'] } + [workspace.lints.clippy] cargo = "warn" perf = "warn" @@ -151,4 +154,4 @@ spongefish = { git = "https://github.com/arkworks-rs/spongefish", features = [ "arkworks-algebra", ], rev = "ecb4f08373ed930175585c856517efdb1851fb47" } spongefish-pow = { git = "https://github.com/arkworks-rs/spongefish", rev = "ecb4f08373ed930175585c856517efdb1851fb47" } -whir = { git = "https://github.com/WizardOfMenlo/whir/", features = ["tracing"], rev = "15cf6668e904ed2e80c9e6209dcce69f5bcf79b9" } \ No newline at end of file +whir = { git = "https://github.com/WizardOfMenlo/whir/", features = ["tracing"], rev = "15cf6668e904ed2e80c9e6209dcce69f5bcf79b9" } diff --git a/cm31/src/lib.rs b/cm31/src/lib.rs index 48d4b6eb..5ac614bd 100644 --- a/cm31/src/lib.rs +++ b/cm31/src/lib.rs @@ -2,24 +2,29 @@ use std::ops::{Add, Mul, Shl, Sub}; #[derive(Copy, Clone)] +/// pub struct M31(T); trait M31Reduce { + /// Performs the necessary rounds to get the result to fit inside 32 bits. fn reduce_u32(self) -> M31; - fn full_reduce(self) -> u32; + /// Performs the necessary rounds to get the result to fit inside 31 bits. + fn reduce_fully(self) -> u32; } -/// Code operating on M31 integers will mostly be operation on i62 (i64). +/// Code operating on M31 integers will mostly be operation on i62 (represented +/// as i64). /// -/// All operations assume that i64 works is being treated as a i62 integer. No -/// newtype wrapper for i62 has been introduced to keep boilerplate to a -/// minimum. It is up to the user to make sure that the signed results stays -/// within 62 bits. As most operations will never reach 62 bits and using the -/// upper two bits would require an extra reduction round. +/// All operations assume that i64 is as a i62. No newtype wrapper for i62 has +/// been introduced to keep boilerplate to a minimum. It is up to the user to +/// make sure that the signed results stays within 62 bits. As most operations +/// will never reach 62 bits and using the upper two bits would require an extra +/// reduction round. /// -/// One way to think of it is that the upper three bits act as the sign but and -/// therefore can not be different for each other. -/// This also allows for efficient implementation on 64 bit register machines +/// One way to think of it is that the upper three bits act as the sign bit and +/// therefore can not be different from each other. +/// This representation allows for efficient implementation of i62 on 64b +/// machines. impl M31Reduce for i64 { #[inline(always)] fn reduce_u32(self) -> M31 { @@ -28,7 +33,7 @@ impl M31Reduce for i64 { } #[inline(always)] - fn full_reduce(self) -> u32 { + fn reduce_fully(self) -> u32 { // Two rounds to fully reduce down let tmp = self.reduce_u32().0; let tmp = (tmp >> 31) + (tmp & ((1 << 31) - 1)); @@ -52,7 +57,7 @@ impl M31Reduce for u32 { } #[inline(always)] - fn full_reduce(self) -> u32 { + fn reduce_fully(self) -> u32 { let tmp = self; let tmp = (tmp >> 31) + (tmp & ((1 << 31) - 1)); let tmp = (tmp >> 31) + (tmp & ((1 << 31) - 1)); @@ -122,7 +127,6 @@ impl, K: Into> Add> for M31 { } } -// TODO implement shift left impl> Shl for M31 { type Output = M31; // A fully reduced m31 has space for two 15 left shifts. So in practice there @@ -135,8 +139,8 @@ impl> Shl for M31 { #[cfg(kani)] mod verification { - use crate::{reduce_round, M31Reduce, M31}; - // Takes a 62bit number and sign extends it into a valid i64 + use super::*; + // Takes a signed 62bit number and sign extends it into a valid i64 fn sign_extend(x: u64) -> i64 { let sign = x >> 61; let sign = sign << 2 | sign << 1 | sign; @@ -147,7 +151,7 @@ mod verification { fn reduce_i64() { let x: u64 = kani::any::() & ((1 << 62) - 1); let x = sign_extend(x); - assert_eq!(x.rem_euclid((1 << 31) - 1) as u32, x.full_reduce()) + assert_eq!(x.rem_euclid((1 << 31) - 1) as u32, x.reduce_fully()) } // The proof for the multiplier is too slow. Therefore we model check the inner @@ -157,7 +161,7 @@ mod verification { let x: u64 = kani::any::(); assert_eq!( x.rem_euclid((1 << 31) - 1) as u32, - reduce_round(x).full_reduce() + reduce_round(x).reduce_fully() ) } @@ -166,6 +170,6 @@ mod verification { #[kani::proof] fn reduce_u32() { let x = kani::any::(); - assert_eq!(x.rem_euclid((1 << 31) - 1) as u32, x.full_reduce()) + assert_eq!(x.rem_euclid((1 << 31) - 1) as u32, x.reduce_fully()) } } diff --git a/cm31/src/main.rs b/cm31/src/main.rs deleted file mode 100644 index 059d6a3a..00000000 --- a/cm31/src/main.rs +++ /dev/null @@ -1,14 +0,0 @@ -// Type your code here, or load an example. - -// As of Rust 1.75, small functions are automatically -// marked as `#[inline]` so they will not show up in -// the output when compiling with optimisations. Use -// `#[no_mangle]` or `#[inline(never)]` to work around -// this issue. -// See https://github.com/compiler-explorer/compiler-explorer/issues/5939 -use std::hint::black_box; -// If you use `main()`, declare it as `pub` to see it in the output: -pub fn main() { - let (lo, hi) = (u32::carrying_mul_add(black_box(5), black_box(6), 0, 0)); - let _res = black_box(2 * hi + lo); -} diff --git a/cm31/src/u32.rs b/cm31/src/u32.rs deleted file mode 100644 index e976984b..00000000 --- a/cm31/src/u32.rs +++ /dev/null @@ -1,92 +0,0 @@ -const P3: u64 = 3 * (1 << 31 - 1); - -// Tag can also be the internal type -// 64 byte, 32 byte, 31 byte, 31 byte zero check. De laatste twee is -// waarschijnlijk geen interessant ondersheid Aan de andere kant lijkt het geen -struct M31(u32); - -// Introduce tag to track in which state the M31 is? -// State -impl M31 { - // Needs to return a value if it's going to be tagged with the internal state. - fn reduce_u31(&mut self) { - let tmp = self.0; - // TODO: mask - let tmp = tmp >> 31 + ((tmp << 1) >> 1); - let tmp = tmp >> 31 + ((tmp << 1) >> 1); - self.0 = tmp; - } -} - -impl Add for M31 { - type Output = M31; - - // 32 bit sized because otherwise there might be a masking operations - // while adds W will zero it out. - #[inline] - fn add(self, rhs: Self) -> Self::Output { - // Different ways of presenting it - // 32 bytes - // 64 bytes - // adds w - let (tmp, o) = self.0.overflowing_add(rhs.0); - // can't use adcs as it needs to be shifted first - // so 64 bit might be better as it doesn't require moving the carry to another - // register. - // Interested in how the compiler takes this? - let (tmp, o) = tmp.overflowing_add(2 * o as u32); - let out = tmp + 2 * o as u32; - - Self(out) - } -} - -impl Sub for M31 { - type Output = M31; - - fn sub(self, rhs: Self) -> Self::Output { - // 33 bits + 32 bits -> 34 bits - let tmp = P3 + self.0 as u64 - rhs.0 as u64; - // 34 bits -> 32 bits - let hi = (tmp >> 32) as u32; - let lo = tmp as u32; - Self(2 * hi + lo) - } -} - -impl Neg for M31 { - type Output = M31; - - fn neg(self) -> Self::Output { - let tmp = P3 - self.0 as u64; - let hi = (tmp >> 32) as u32; - let lo = tmp as u32; - Self(2 * hi + lo) - } -} - -impl Mul for M31 { - type Output = M31; - - fn mul(self, rhs: Self) -> Self::Output { - let (lo, hi) = u32::carrying_mul_add(self.0, rhs.0, 0, 0); - // shift addition - // shift, shift, addition - // 64bits -> 34 bits - // 2 * hi -> 33 bits; 33 bits + 32 bits -> 34 bits - let tmp = 2 * hi as u64 + lo as u64; - // 34 bits -> 32 bits - let hi = (tmp >> 32) as u32; - let lo = tmp as u32; - Self(2 * hi + lo) - } -} - -impl Shl for M31 { - type Output = M31; - - fn shl(mut self, rhs: u8) -> Self::Output { - self.reduce_u31(); - todo!(); - } -} From cdba6b8c6ae711ae37da01b6066d1b49c40eede5 Mon Sep 17 00:00:00 2001 From: Xander van der Goot Date: Wed, 17 Dec 2025 11:24:52 +0800 Subject: [PATCH 09/12] cm31: improve documentation --- cm31/src/lib.rs | 114 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 77 insertions(+), 37 deletions(-) diff --git a/cm31/src/lib.rs b/cm31/src/lib.rs index 5ac614bd..5796da02 100644 --- a/cm31/src/lib.rs +++ b/cm31/src/lib.rs @@ -1,30 +1,59 @@ -/// Safety: When modifying this file, rerun with Kani +//! Arithmetic over the Mersenne prime p = 2^31 - 1 (“M31”). +//! +//! This module provides fast, allocation-free helpers to work with values that +//! are intended to live in the finite field modulo \(2^{31}-1\). It relies on: +//! - a 62-bit signed accumulator representation (`i62` stored in `i64`) for +//! intermediate results on 64-bit machines, and +//! - reductions that add the high 31-bit chunk into the low 31-bit chunk until +//! the result fits within the desired bits. +//! +//! Safety: When modifying this file, rerun with Kani use std::ops::{Add, Mul, Shl, Sub}; +/// Newtype wrapper for values participating in arithmetic modulo 2^31 - 1. +/// +/// The generic parameter models the “shape” of the wrapped value: +/// - `u32`: a value known to fit in 32 bits. Primarily used for storage and to +/// tighten bounds when reasoning about sizes. +/// - `i64`: a 62-bit signed accumulator (stored in `i64`). This representation +/// is used for intermediate results. +/// +/// In general, operations widen into the accumulator form and should be +/// followed by an explicit reduction when a canonical representative is +/// required. #[derive(Copy, Clone)] -/// pub struct M31(T); +/// Trait for types that can be reduced modulo \(2^{31}-1\). trait M31Reduce { - /// Performs the necessary rounds to get the result to fit inside 32 bits. + /// Partially reduces the value so it fits in 32 bits. This is typically one + /// reduction round for Mersenne moduli. fn reduce_u32(self) -> M31; - /// Performs the necessary rounds to get the result to fit inside 31 bits. + /// Fully reduces into the canonical representative in 0..(2^31 - 1). + /// Typically requires two reduction rounds fn reduce_fully(self) -> u32; } -/// Code operating on M31 integers will mostly be operation on i62 (represented -/// as i64). +impl M31Reduce for M31 { + fn reduce_u32(self) -> M31 { + self.0.reduce_u32() + } + + fn reduce_fully(self) -> u32 { + self.0.reduce_fully() + } +} + +/// All operations assume that `i64` values are valid `i62`. No separate newtype +/// is introduced to keep boilerplate to a minimum. Callers must ensure signed +/// intermediate results stay within 62 bits. /// -/// All operations assume that i64 is as a i62. No newtype wrapper for i62 has -/// been introduced to keep boilerplate to a minimum. It is up to the user to -/// make sure that the signed results stays within 62 bits. As most operations -/// will never reach 62 bits and using the upper two bits would require an extra -/// reduction round. +/// Intuitively, the upper three bits act as a sign extension; they must not +/// disagree with each other. This enables an efficient `i62` representation on +/// 64-bit machines. /// -/// One way to think of it is that the upper three bits act as the sign bit and -/// therefore can not be different from each other. -/// This representation allows for efficient implementation of i62 on 64b -/// machines. +/// 62 bits has been chosen over 64 bits as 64 bits would necessitate another +/// round of reduction while most algorithms do not require that much space. impl M31Reduce for i64 { #[inline(always)] fn reduce_u32(self) -> M31 { @@ -46,10 +75,11 @@ impl M31Reduce for i64 { } } -/// From an end user perspective u32 is for storage as staying within u32 has no -/// benefit on 64 bit machines. Internally it is used to reduce the operands -/// before multiplition such that only a single multiplication has to be done on -/// 64 bit machines. +/// `u32` is the preferred storage type. +/// +/// On 64-bit machines, the computational benefit comes from using `i64` +/// accumulators and reducing operands to 32 bits before multiplication so +/// that only a single 64-bit multiplication is required. impl M31Reduce for u32 { #[inline(always)] fn reduce_u32(self) -> M31 { @@ -70,9 +100,9 @@ impl M31Reduce for u32 { } } -/// Performs a single reduction round -/// -/// Up to the caller to keep track of how many bits are actualy reduced. +/// Performs a single reduction round. +/// The caller is responsible for invoking a sufficient number of +/// rounds to reach the desired bound. #[inline(always)] fn reduce_round(r: u64) -> i64 { let lo = r & ((1 << 31) - 1); @@ -80,24 +110,28 @@ fn reduce_round(r: u64) -> i64 { (hi + lo) as i64 } -/// Assumes that the input fits in i62 +/// Converts an `i62` accumulator into its ones’ complement `u62` form. /// -/// By taking the one complement we can make use of the correspondence between -/// one complement and numbers modulo Mersenne numbers. Another way of seeing -/// this is going from signed 62 bit value to a unsigned 62 bit number -/// hence the type change. +/// By taking the ones’ complement we leverage the correspondence between ones’ +/// complement and arithmetic modulo Mersenne numbers. Equivalently, this maps a +/// signed 62-bit value to an unsigned 62-bit number while preserving the +/// residue class modulo \(2^{31}-1\). fn one_complement(r: i64) -> u64 { - // Relies on the *arithmic* shift right to extend the three bit sign bits to an + // Relies on the arithmetic shift right to extend the three sign bits to an // i64. let sign = r >> 61; // Use the sign information to turn into one complement and clear out the - // redudant sign bits as these would lead to an overcorrection. + // redundant sign bits as these would lead to an overcorrection. ((r + sign) & ((1 << 62) - 1)) as u64 } // The following traits reduce the boiler plate when working with M31 by taking // on some of the widening and reducing required. +/// Multiplication in M31 arithmetic. +/// +/// Operands are first reduced to 32 bits, multiplied, and then folded +/// once, yielding an 34 bit result. impl Mul> for M31 { type Output = M31; @@ -111,6 +145,7 @@ impl Mul> for M31 { } } +/// Subtraction in M31 arithmetic. Widens into a `i62`. impl, K: Into> Sub> for M31 { type Output = M31; @@ -119,6 +154,7 @@ impl, K: Into> Sub> for M31 { } } +/// Addition in M31 arithmetic. Widens into an `i64`. impl, K: Into> Add> for M31 { type Output = M31; @@ -127,11 +163,12 @@ impl, K: Into> Add> for M31 { } } +/// Left shift on an `i62` accumulator. +/// +/// A fully reduced residue has room for at most two 15-bit left shifts (8-th +/// root of unity). impl> Shl for M31 { type Output = M31; - // A fully reduced m31 has space for two 15 left shifts. So in practice there - // might only be space for one left shift. In that case adding a reduction - // might be the right thing to do. fn shl(self, rhs: u8) -> Self::Output { M31(self.0.into() << rhs) } @@ -140,13 +177,14 @@ impl> Shl for M31 { #[cfg(kani)] mod verification { use super::*; - // Takes a signed 62bit number and sign extends it into a valid i64 + // Takes a signed 62-bit number and sign-extends it into a valid i64. fn sign_extend(x: u64) -> i64 { let sign = x >> 61; let sign = sign << 2 | sign << 1 | sign; ((sign << 61) | x) as i64 } + /// Proves that reducing an `i62` accumulator matches modulo p semantics. #[kani::proof] fn reduce_i64() { let x: u64 = kani::any::() & ((1 << 62) - 1); @@ -154,8 +192,10 @@ mod verification { assert_eq!(x.rem_euclid((1 << 31) - 1) as u32, x.reduce_fully()) } - // The proof for the multiplier is too slow. Therefore we model check the inner - // part. The input slightly larger than what it would be within the multiplier. + // The proof for the multiplier is too slow. Therefore we model-check the + // inner part. The input is slightly larger than what it would be within the + // multiplier. + /// Proves that one fold plus full reduction matches modulo p on `u64`. #[kani::proof] fn reduce_u64() { let x: u64 = kani::any::(); @@ -165,8 +205,8 @@ mod verification { ) } - // Checking the reduction of i64 is not enough as it will not cover the full - // range of u32. Other libraries use + // Checking only `i64` does not cover the full `u32` range. + /// Proves full reduction on all `u32` inputs. #[kani::proof] fn reduce_u32() { let x = kani::any::(); From 6402359a7876f17bf6f4f78de0263452e4d9967b Mon Sep 17 00:00:00 2001 From: Xander van der Goot Date: Wed, 17 Dec 2025 12:15:36 +0800 Subject: [PATCH 10/12] CI: add kani support --- .github/workflows/kani.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 .github/workflows/kani.yml diff --git a/.github/workflows/kani.yml b/.github/workflows/kani.yml new file mode 100644 index 00000000..e2779470 --- /dev/null +++ b/.github/workflows/kani.yml @@ -0,0 +1,18 @@ +name: Kani model checking + +on: + workflow_dispatch: + push: + branches: ["main"] + pull_request: + branches: ["main"] + +jobs: + kani: + runs-on: ubuntu-latest + steps: + - name: Run Kani + uses: model-checking/kani-github-action@v1 + with: + # Whitelist the directories that use Kani to prevent it building the entire project. + working-directory: "./cm31" From f26cab6f153b8642d56cf365dc3472f74c9fe6b8 Mon Sep 17 00:00:00 2001 From: Xander van der Goot Date: Wed, 17 Dec 2025 12:20:39 +0800 Subject: [PATCH 11/12] fixup! CI: add kani support --- .github/workflows/kani.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/kani.yml b/.github/workflows/kani.yml index e2779470..53208f12 100644 --- a/.github/workflows/kani.yml +++ b/.github/workflows/kani.yml @@ -14,5 +14,5 @@ jobs: - name: Run Kani uses: model-checking/kani-github-action@v1 with: - # Whitelist the directories that use Kani to prevent it building the entire project. + # Whitelist directories that use Kani this prevents the entire project from being built. working-directory: "./cm31" From 06f3b18deb332b4e92572f7f5eb97cea447468c5 Mon Sep 17 00:00:00 2001 From: Xander van der Goot Date: Wed, 24 Dec 2025 15:41:16 +0800 Subject: [PATCH 12/12] CI: path trigger + checkout --- .github/workflows/kani.yml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/kani.yml b/.github/workflows/kani.yml index 53208f12..1daa5331 100644 --- a/.github/workflows/kani.yml +++ b/.github/workflows/kani.yml @@ -1,18 +1,20 @@ name: Kani model checking on: - workflow_dispatch: push: - branches: ["main"] + paths: + - "cm31/**" pull_request: - branches: ["main"] + paths: + - "cm31/**" jobs: kani: runs-on: ubuntu-latest steps: + - uses: actions/checkout@v4 - name: Run Kani uses: model-checking/kani-github-action@v1 with: # Whitelist directories that use Kani this prevents the entire project from being built. - working-directory: "./cm31" + working-directory: "cm31"