From 8df231379240b28736e4868f744a5ef357b2ee66 Mon Sep 17 00:00:00 2001 From: Jack Lloyd Date: Wed, 11 Feb 2026 12:49:45 -0500 Subject: [PATCH] perf(crypto): Optimize hash to scalar operation for secp256k1 and secp256r1 In from_wide_bytes, use a single scalar multiplication rather than 255 doublings. This function is in turn called by hash_to_scalar. With this change the cost of hash_to_scalar is reduced from ~2800 nanoseconds to ~330 nanoseconds. Hashing to scalar is used for MEGa encryption and decryption, for generating challenges in ZK proofs, and for rerandomizing the presignature. --- .../canister_threshold_sig/benches/scalar.rs | 8 ++++++++ .../src/utils/group/secp256k1.rs | 19 ++++++++++--------- .../src/utils/group/secp256r1.rs | 17 +++++++++-------- 3 files changed, 27 insertions(+), 17 deletions(-) diff --git a/rs/crypto/internal/crypto_lib/threshold_sig/canister_threshold_sig/benches/scalar.rs b/rs/crypto/internal/crypto_lib/threshold_sig/canister_threshold_sig/benches/scalar.rs index 68111a637a33..e328e5015f26 100644 --- a/rs/crypto/internal/crypto_lib/threshold_sig/canister_threshold_sig/benches/scalar.rs +++ b/rs/crypto/internal/crypto_lib/threshold_sig/canister_threshold_sig/benches/scalar.rs @@ -65,6 +65,14 @@ fn scalar_math(c: &mut Criterion) { ) }); + group.bench_function("hash_to_scalar", |b| { + b.iter_batched_ref( + || rng.r#gen::<[u8; 32]>(), + |s| EccScalar::hash_to_scalar(curve_type, s, b"hash-to-scalar-bench"), + BatchSize::SmallInput, + ) + }); + for n in [2, 4, 16, 32] { group.bench_function(format!("batch_invert_vartime_{}", n), |b| { b.iter_batched_ref( diff --git a/rs/crypto/internal/crypto_lib/threshold_sig/canister_threshold_sig/src/utils/group/secp256k1.rs b/rs/crypto/internal/crypto_lib/threshold_sig/canister_threshold_sig/src/utils/group/secp256k1.rs index d7898e62374a..67d91a6a02c1 100644 --- a/rs/crypto/internal/crypto_lib/threshold_sig/canister_threshold_sig/src/utils/group/secp256k1.rs +++ b/rs/crypto/internal/crypto_lib/threshold_sig/canister_threshold_sig/src/utils/group/secp256k1.rs @@ -1,6 +1,6 @@ use hex_literal::hex; use k256::elliptic_curve::{ - Field, Group, + Group, group::{GroupEncoding, ff::PrimeField}, ops::{Invert, LinearCombination, MulByGenerator, Reduce}, scalar::IsHigh, @@ -174,11 +174,11 @@ impl Scalar { /// group order. pub fn from_wide_bytes(bytes: &[u8]) -> Option { /* - As the k256 crates is lacking a native function that reduces an input + As the k256 crate is lacking a native function that reduces an input modulo the group order we have to synthesize it using other operations. Do so by splitting up the input into two parts each of which is at most - scalar_len bytes long. Then compute s0*2^X + s1 + scalar_len bytes long. Then compute s0*2^256 + s1 */ if bytes.len() > Self::BYTES * 2 { @@ -192,15 +192,16 @@ impl Scalar { let fb0 = k256::FieldBytes::from_slice(&extended[..Self::BYTES]); let fb1 = k256::FieldBytes::from_slice(&extended[Self::BYTES..]); - let mut s0 = >::reduce_bytes(fb0); + let s0 = >::reduce_bytes(fb0); let s1 = >::reduce_bytes(fb1); - for _bit in 1..=Self::BYTES * 8 { - s0 = s0.double(); - } - s0 += s1; + // 2^256 mod n (secp256k1 group order) + let shift = k256::Scalar::from_repr(k256::FieldBytes::from(hex!( + "000000000000000000000000000000014551231950b75fc4402da1732fc9bebf" + ))) + .unwrap(); - Some(Self::new(s0)) + Some(Self::new(s0 * shift + s1)) } /// Return constant zero diff --git a/rs/crypto/internal/crypto_lib/threshold_sig/canister_threshold_sig/src/utils/group/secp256r1.rs b/rs/crypto/internal/crypto_lib/threshold_sig/canister_threshold_sig/src/utils/group/secp256r1.rs index 411552a53846..f27f2383dc68 100644 --- a/rs/crypto/internal/crypto_lib/threshold_sig/canister_threshold_sig/src/utils/group/secp256r1.rs +++ b/rs/crypto/internal/crypto_lib/threshold_sig/canister_threshold_sig/src/utils/group/secp256r1.rs @@ -76,11 +76,11 @@ impl Scalar { /// group order. pub fn from_wide_bytes(bytes: &[u8]) -> Option { /* - As the p256 crates is lacking a native function that reduces an input + As the p256 crate is lacking a native function that reduces an input modulo the group order we have to synthesize it using other operations. Do so by splitting up the input into two parts each of which is at most - scalar_len bytes long. Then compute s0*2^X + s1 + scalar_len bytes long. Then compute s0*2^256 + s1 */ if bytes.len() > Self::BYTES * 2 { @@ -94,15 +94,16 @@ impl Scalar { let fb0 = p256::FieldBytes::from_slice(&extended[..Self::BYTES]); let fb1 = p256::FieldBytes::from_slice(&extended[Self::BYTES..]); - let mut s0 = >::reduce_bytes(fb0); + let s0 = >::reduce_bytes(fb0); let s1 = >::reduce_bytes(fb1); - for _bit in 1..=Self::BYTES * 8 { - s0 = s0.double(); - } - s0 += s1; + // 2^256 mod n (secp256r1 group order) + let shift = p256::Scalar::from_repr(p256::FieldBytes::from(hex!( + "00000000ffffffff00000000000000004319055258e8617b0c46353d039cdaaf" + ))) + .unwrap(); - Some(Self::new(s0)) + Some(Self::new(s0 * shift + s1)) } /// Return constant zero