diff --git a/src/lib.rs b/src/lib.rs index 2c325c32..d39019f0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,6 +28,32 @@ extern crate alloc; +/// This is the maximum size the forest is ever allowed to have, this caps how big `num_leaves` can +/// be (we use a [`u64`]) and is also used by the [`util::translate`] logic. +/// +/// # Calculations +/// +/// If you think: "but... is 63 enough space"? Well... assuming there's around 999,000 WUs +/// available on each block (let's account for header and coinbase), a non-segwit transaction's +/// size is: +/// `4 (version) + 1 (vin count) + 41 (input) + 5 (vout for many outputs) + 10N + 4 (locktime)` +/// +/// `N` is how many outputs we have (we are considering outputs with amount and a zero-sized +/// script), for 999,000 WUs we can fit: +/// - `55 + 10N <= 999,000` +/// - `N ~= 90k` outputs (a little over) +/// +/// Since `2^63 = 9,223,372,036,854,775,808`, if you divide this by 90,000 we get +/// 102,481,911,520,608 blocks. It would take us 3,249,680 years to mine that many blocks. +/// +/// For the poor soul in 3,249,682 A.D., who needs to fix this hard-fork, here's what you gotta do: +/// - Change the `leaf_data` type to u128, or q128 if Quantum Bits are the fashionable standard. +/// - Change `MAX_FOREST_ROWS` to 128 or higher in `lib.rs` +/// - Modify [`util::start_position_at_row`] to avoid overflows. +/// +/// That should save you the trouble. +pub(crate) const MAX_FOREST_ROWS: u8 = 63; + #[cfg(not(feature = "std"))] /// Re-exports `alloc` basics plus HashMap/HashSet and IO traits. pub mod prelude { diff --git a/src/mem_forest/mod.rs b/src/mem_forest/mod.rs index 1c342089..70e0102d 100644 --- a/src/mem_forest/mod.rs +++ b/src/mem_forest/mod.rs @@ -49,6 +49,8 @@ use super::util::right_child; use super::util::root_position; use super::util::tree_rows; use crate::prelude::*; +use crate::util::translate; +use crate::MAX_FOREST_ROWS; #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum NodeType { @@ -329,7 +331,13 @@ impl MemForest { .map(|pos| self.get_hash(*pos).unwrap()) .collect::>(); - Ok(Proof::new_with_hash(positions, proof)) + let tree_rows = tree_rows(self.leaves); + let translated_targets = positions + .into_iter() + .map(|pos| translate(pos, tree_rows, MAX_FOREST_ROWS)) + .collect(); + + Ok(Proof::new_with_hash(translated_targets, proof)) } /// Returns a reference to the roots in this MemForest. diff --git a/src/pollard/mod.rs b/src/pollard/mod.rs index a0f6842c..757b8cc9 100644 --- a/src/pollard/mod.rs +++ b/src/pollard/mod.rs @@ -63,6 +63,8 @@ use super::util::right_child; use super::util::root_position; use super::util::tree_rows; use crate::prelude::*; +use crate::util::translate; +use crate::MAX_FOREST_ROWS; #[derive(Default, Clone)] /// A node in the Pollard tree @@ -703,9 +705,15 @@ impl Pollard { proof_hashes.push(hash); } + let tree_rows = tree_rows(self.leaves); + let translated_targets = target_positions + .into_iter() + .map(|pos| translate(pos, tree_rows, MAX_FOREST_ROWS)) + .collect(); + Ok(Proof:: { hashes: proof_hashes, - targets: target_positions, + targets: translated_targets, }) } diff --git a/src/proof/mod.rs b/src/proof/mod.rs index 76ba4431..1964baaf 100644 --- a/src/proof/mod.rs +++ b/src/proof/mod.rs @@ -74,6 +74,8 @@ use super::util::get_proof_positions; use super::util::read_u64; use super::util::tree_rows; use crate::prelude::*; +use crate::util::translate; +use crate::MAX_FOREST_ROWS; #[derive(Clone, Debug, Eq, PartialEq)] #[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] @@ -446,11 +448,16 @@ impl Proof { Vec::<(Hash, Hash)>::with_capacity(util::num_roots(num_leaves)); // the positions that should be passed as a proof - let proof_positions = get_proof_positions(&self.targets, num_leaves, total_rows); + let translated: Vec<_> = self + .targets + .iter() + .copied() + .map(|pos| translate(pos, MAX_FOREST_ROWS, total_rows)) + .collect(); + let proof_positions = get_proof_positions(&translated, num_leaves, total_rows); // As we calculate nodes upwards, it accumulates here - let mut nodes: Vec<_> = self - .targets + let mut nodes: Vec<_> = translated .iter() .copied() .zip(del_hashes.to_owned()) @@ -527,7 +534,13 @@ impl Proof { let mut calculated_root_hashes = Vec::::with_capacity(util::num_roots(num_leaves)); // the positions that should be passed as a proof - let proof_positions = get_proof_positions(&self.targets, num_leaves, total_rows); + let translated: Vec<_> = self + .targets + .iter() + .copied() + .map(|pos| translate(pos, MAX_FOREST_ROWS, total_rows)) + .collect(); + let proof_positions = get_proof_positions(&translated, num_leaves, total_rows); // As we calculate nodes upwards, it accumulates here let mut nodes: Vec<_> = self diff --git a/src/util/mod.rs b/src/util/mod.rs index 9036b16d..e72bedd4 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -30,6 +30,50 @@ pub fn remove_bit(val: u64, bit: u64) -> u64 { (upper >> 1) | lower } + +/// Translates targets from a forest with `from_rows` to a forest with `to_rows`. +/// +/// When we compute the position of a node, any node not in row 0 has a position that depends +/// on how many leaves there are. This happens because row 0 is allocated to the nearest power of +/// two that can fit that many leaves. Therefore, in a forest with 6 leaves, the bottom row goes +/// from 0 through 7, and row 1 goes from 8 through 11 (the size of each row halves as you move +/// up). If you add three extra UTXOs, growing the forest to 9 leaves, adding the 9th will require +/// allocating 16 row-0 leaves; row 1 therefore goes from 16 through 23, and so on. +/// +/// If leaves always stayed at the bottom, that's fine. Nothing at the bottom ever needs to care +/// about this, because there is no row below it whose growth would shift its positions. However, +/// leaves **do** move up during deletions. For that reason, whenever the forest grows, all targets +/// that are not at the bottom need to be updated. +/// +/// Now imagine that we want to keep a leaf map from `leaf_hash` to position within the forest: +/// this works fine, and we know where a node must go when deleting by calling [`parent`] with its +/// current position and `num_leaves`. But now imagine the forest has to grow: we need to go +/// through the map and update all non-row-0 leaves. This could potentially involve going through +/// millions of UTXOs and updating them one by one. Note that we can find the next position; it is +/// not super efficient, but it works (see [`crate::proof::Proof::maybe_remap`] for more details). +/// But doing this for every UTXO that is not at the bottom is too expensive. Even though it +/// happens exponentially less frequently, when it does happen, it is going to take an absurd +/// amount of time and could potentially stall the Utreexo network for hours. +/// +/// For that reason, we communicate positions as if the forest was always filled with the maximum +/// number of leaves we can possibly have, which is 63. Therefore, those positions never need to be +/// remapped. Internally, we still use the dynamic size, and use this function to translate between +/// the two. +/// +/// # Implementation +/// +/// This function simply computes how far away from the start of the row this leaf is, then uses +/// that to offset the same amount in the new structure. +pub fn translate(pos: u64, from_rows: u8, to_rows: u8) -> u64 { + let row = detect_row(pos, from_rows); + if row == 0 { + return pos; + } + + let offset = pos - start_position_at_row(row, from_rows); + offset + start_position_at_row(row, to_rows) +} + pub fn calc_next_pos(position: u64, del_pos: u64, forest_rows: u8) -> Result { let del_row = detect_row(del_pos, forest_rows); let pos_row = detect_row(position, forest_rows); @@ -93,7 +137,7 @@ pub fn start_position_at_row(row: u8, forest_rows: u8) -> u64 { // 2 << forest_rows is 2 more than the max position // to get the correct offset for a given row, // subtract (2 << `row complement of forest_rows`) from (2 << forest_rows) - (2 << forest_rows) - (2 << (forest_rows - row)) as u64 + ((2_u128 << forest_rows) - (2_u128 << (forest_rows - row))) as u64 } pub fn is_left_niece(position: u64) -> bool { @@ -359,6 +403,7 @@ mod tests { use super::roots_to_destroy; use crate::node_hash::BitcoinNodeHash; use crate::util::children; + use crate::util::start_position_at_row; use crate::util::tree_rows; #[test] @@ -501,4 +546,17 @@ mod tests { let res = super::calc_next_pos(1, 9, 3); assert_eq!(Ok(9), res); } + + #[test] + fn test_start_position_at_row() { + assert_eq!(start_position_at_row(1, 12), 4096); + + // Check if we don't overflow with bigger forests + assert_eq!(start_position_at_row(63, 63), 18446744073709551614); + assert_eq!(start_position_at_row(44, 63), 18446744073708503040); + + assert_eq!(start_position_at_row(0, 63), 0); + assert_eq!(start_position_at_row(0, 32), 0); + assert_eq!(start_position_at_row(1, 5), 32); + } }