From 627664a312de7ac7f224d62299ed83f794620ebf Mon Sep 17 00:00:00 2001 From: Davidson Souza Date: Wed, 11 Mar 2026 14:18:05 -0300 Subject: [PATCH] feat: use positions based on a forest with 63 rows When we compute the position of a node, for any node not in the 0th row, their position depends on how many leaves there are. This happens because the 0th row's size is allocated to the nearest power of two that can fit that many leaves. Therefore, in a forest with 6 leaves, the bottom row goes from zero through 7, the row 1 from 8 through 11 (the size of each row halves as you move up). If you add three extra UTXOs, growing the forest to nine leaves, adding the 9th will require allocating 16 0-row leaves, row 1 therefore goes from 16 to 23 and so on. If leaves always stay at the bottom, that fine. Nothing at the bottom ever needs to care about this, because there's no row before it to grow and shift their positions. However, leaves **do** move up during deletions. For that reason, whenever the forest grow, all targets that aren't at the bottom needs to be updated. Now imagine that we want to keep a leaf map that maps leaf_hash -> position within the forest: this works fine, we know where a node must go when deleting, by calling [`parent`] with their current position and `num_leaves`. But now imagine the forest has to grow: we need to go through the map and update all non-row 0 leaves. This could potentially involve going through millions of UTXOs and update one-by-one. Note that we can find the next position, it's not super efficient but works (see [`crate::proof::Proof::maybe_remap`] for more details), but doing this for every UTXO that isn't at the bottom is too expensive, even though it happens exponentially less frequently, when it happens, it's going to take an absurd amount of time and potentially stall the Utreexo network for hours. For that reason, we communicate positions as if the forest is always filled with the maximum amount of leaves we can possibly have, which is 63. Therefore, those positions never need to be remapped. Internally, we still use the dynamic size, and use this function to translate between the two. --- src/lib.rs | 26 +++++++++++++++++++ src/mem_forest/mod.rs | 10 +++++++- src/pollard/mod.rs | 10 +++++++- src/proof/mod.rs | 21 ++++++++++++--- src/util/mod.rs | 60 ++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 120 insertions(+), 7 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 2c325c32..d39019f0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,6 +28,32 @@ extern crate alloc; +/// This is the maximum size the forest is ever allowed to have, this caps how big `num_leaves` can +/// be (we use a [`u64`]) and is also used by the [`util::translate`] logic. +/// +/// # Calculations +/// +/// If you think: "but... is 63 enough space"? Well... assuming there's around 999,000 WUs +/// available on each block (let's account for header and coinbase), a non-segwit transaction's +/// size is: +/// `4 (version) + 1 (vin count) + 41 (input) + 5 (vout for many outputs) + 10N + 4 (locktime)` +/// +/// `N` is how many outputs we have (we are considering outputs with amount and a zero-sized +/// script), for 999,000 WUs we can fit: +/// - `55 + 10N <= 999,000` +/// - `N ~= 90k` outputs (a little over) +/// +/// Since `2^63 = 9,223,372,036,854,775,808`, if you divide this by 90,000 we get +/// 102,481,911,520,608 blocks. It would take us 3,249,680 years to mine that many blocks. +/// +/// For the poor soul in 3,249,682 A.D., who needs to fix this hard-fork, here's what you gotta do: +/// - Change the `leaf_data` type to u128, or q128 if Quantum Bits are the fashionable standard. +/// - Change `MAX_FOREST_ROWS` to 128 or higher in `lib.rs` +/// - Modify [`util::start_position_at_row`] to avoid overflows. +/// +/// That should save you the trouble. +pub(crate) const MAX_FOREST_ROWS: u8 = 63; + #[cfg(not(feature = "std"))] /// Re-exports `alloc` basics plus HashMap/HashSet and IO traits. pub mod prelude { diff --git a/src/mem_forest/mod.rs b/src/mem_forest/mod.rs index 1c342089..70e0102d 100644 --- a/src/mem_forest/mod.rs +++ b/src/mem_forest/mod.rs @@ -49,6 +49,8 @@ use super::util::right_child; use super::util::root_position; use super::util::tree_rows; use crate::prelude::*; +use crate::util::translate; +use crate::MAX_FOREST_ROWS; #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum NodeType { @@ -329,7 +331,13 @@ impl MemForest { .map(|pos| self.get_hash(*pos).unwrap()) .collect::>(); - Ok(Proof::new_with_hash(positions, proof)) + let tree_rows = tree_rows(self.leaves); + let translated_targets = positions + .into_iter() + .map(|pos| translate(pos, tree_rows, MAX_FOREST_ROWS)) + .collect(); + + Ok(Proof::new_with_hash(translated_targets, proof)) } /// Returns a reference to the roots in this MemForest. diff --git a/src/pollard/mod.rs b/src/pollard/mod.rs index a0f6842c..757b8cc9 100644 --- a/src/pollard/mod.rs +++ b/src/pollard/mod.rs @@ -63,6 +63,8 @@ use super::util::right_child; use super::util::root_position; use super::util::tree_rows; use crate::prelude::*; +use crate::util::translate; +use crate::MAX_FOREST_ROWS; #[derive(Default, Clone)] /// A node in the Pollard tree @@ -703,9 +705,15 @@ impl Pollard { proof_hashes.push(hash); } + let tree_rows = tree_rows(self.leaves); + let translated_targets = target_positions + .into_iter() + .map(|pos| translate(pos, tree_rows, MAX_FOREST_ROWS)) + .collect(); + Ok(Proof:: { hashes: proof_hashes, - targets: target_positions, + targets: translated_targets, }) } diff --git a/src/proof/mod.rs b/src/proof/mod.rs index 76ba4431..1964baaf 100644 --- a/src/proof/mod.rs +++ b/src/proof/mod.rs @@ -74,6 +74,8 @@ use super::util::get_proof_positions; use super::util::read_u64; use super::util::tree_rows; use crate::prelude::*; +use crate::util::translate; +use crate::MAX_FOREST_ROWS; #[derive(Clone, Debug, Eq, PartialEq)] #[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] @@ -446,11 +448,16 @@ impl Proof { Vec::<(Hash, Hash)>::with_capacity(util::num_roots(num_leaves)); // the positions that should be passed as a proof - let proof_positions = get_proof_positions(&self.targets, num_leaves, total_rows); + let translated: Vec<_> = self + .targets + .iter() + .copied() + .map(|pos| translate(pos, MAX_FOREST_ROWS, total_rows)) + .collect(); + let proof_positions = get_proof_positions(&translated, num_leaves, total_rows); // As we calculate nodes upwards, it accumulates here - let mut nodes: Vec<_> = self - .targets + let mut nodes: Vec<_> = translated .iter() .copied() .zip(del_hashes.to_owned()) @@ -527,7 +534,13 @@ impl Proof { let mut calculated_root_hashes = Vec::::with_capacity(util::num_roots(num_leaves)); // the positions that should be passed as a proof - let proof_positions = get_proof_positions(&self.targets, num_leaves, total_rows); + let translated: Vec<_> = self + .targets + .iter() + .copied() + .map(|pos| translate(pos, MAX_FOREST_ROWS, total_rows)) + .collect(); + let proof_positions = get_proof_positions(&translated, num_leaves, total_rows); // As we calculate nodes upwards, it accumulates here let mut nodes: Vec<_> = self diff --git a/src/util/mod.rs b/src/util/mod.rs index 9036b16d..e72bedd4 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -30,6 +30,50 @@ pub fn remove_bit(val: u64, bit: u64) -> u64 { (upper >> 1) | lower } + +/// Translates targets from a forest with `from_rows` to a forest with `to_rows`. +/// +/// When we compute the position of a node, any node not in row 0 has a position that depends +/// on how many leaves there are. This happens because row 0 is allocated to the nearest power of +/// two that can fit that many leaves. Therefore, in a forest with 6 leaves, the bottom row goes +/// from 0 through 7, and row 1 goes from 8 through 11 (the size of each row halves as you move +/// up). If you add three extra UTXOs, growing the forest to 9 leaves, adding the 9th will require +/// allocating 16 row-0 leaves; row 1 therefore goes from 16 through 23, and so on. +/// +/// If leaves always stayed at the bottom, that's fine. Nothing at the bottom ever needs to care +/// about this, because there is no row below it whose growth would shift its positions. However, +/// leaves **do** move up during deletions. For that reason, whenever the forest grows, all targets +/// that are not at the bottom need to be updated. +/// +/// Now imagine that we want to keep a leaf map from `leaf_hash` to position within the forest: +/// this works fine, and we know where a node must go when deleting by calling [`parent`] with its +/// current position and `num_leaves`. But now imagine the forest has to grow: we need to go +/// through the map and update all non-row-0 leaves. This could potentially involve going through +/// millions of UTXOs and updating them one by one. Note that we can find the next position; it is +/// not super efficient, but it works (see [`crate::proof::Proof::maybe_remap`] for more details). +/// But doing this for every UTXO that is not at the bottom is too expensive. Even though it +/// happens exponentially less frequently, when it does happen, it is going to take an absurd +/// amount of time and could potentially stall the Utreexo network for hours. +/// +/// For that reason, we communicate positions as if the forest was always filled with the maximum +/// number of leaves we can possibly have, which is 63. Therefore, those positions never need to be +/// remapped. Internally, we still use the dynamic size, and use this function to translate between +/// the two. +/// +/// # Implementation +/// +/// This function simply computes how far away from the start of the row this leaf is, then uses +/// that to offset the same amount in the new structure. +pub fn translate(pos: u64, from_rows: u8, to_rows: u8) -> u64 { + let row = detect_row(pos, from_rows); + if row == 0 { + return pos; + } + + let offset = pos - start_position_at_row(row, from_rows); + offset + start_position_at_row(row, to_rows) +} + pub fn calc_next_pos(position: u64, del_pos: u64, forest_rows: u8) -> Result { let del_row = detect_row(del_pos, forest_rows); let pos_row = detect_row(position, forest_rows); @@ -93,7 +137,7 @@ pub fn start_position_at_row(row: u8, forest_rows: u8) -> u64 { // 2 << forest_rows is 2 more than the max position // to get the correct offset for a given row, // subtract (2 << `row complement of forest_rows`) from (2 << forest_rows) - (2 << forest_rows) - (2 << (forest_rows - row)) as u64 + ((2_u128 << forest_rows) - (2_u128 << (forest_rows - row))) as u64 } pub fn is_left_niece(position: u64) -> bool { @@ -359,6 +403,7 @@ mod tests { use super::roots_to_destroy; use crate::node_hash::BitcoinNodeHash; use crate::util::children; + use crate::util::start_position_at_row; use crate::util::tree_rows; #[test] @@ -501,4 +546,17 @@ mod tests { let res = super::calc_next_pos(1, 9, 3); assert_eq!(Ok(9), res); } + + #[test] + fn test_start_position_at_row() { + assert_eq!(start_position_at_row(1, 12), 4096); + + // Check if we don't overflow with bigger forests + assert_eq!(start_position_at_row(63, 63), 18446744073709551614); + assert_eq!(start_position_at_row(44, 63), 18446744073708503040); + + assert_eq!(start_position_at_row(0, 63), 0); + assert_eq!(start_position_at_row(0, 32), 0); + assert_eq!(start_position_at_row(1, 5), 32); + } }