feat: use positions based on a forest with 63 rows

Davidson-Souza · Davidson-Souza · commit 95384ad0573d · 2026-03-12T18:49:37.000-03:00
When we compute the position of a node, for any node not in the 0th row,
their position depends on how many leaves there are. This happens because
the 0th row's size is allocated to the nearest power of two that can
fit that many leaves. Therefore, in a forest with 6 leaves, the bottom row
goes from zero through 7, the row 1 from 8 through 11 (the size of  each row
halves as you move up). If you add three extra UTXOs, growing the forest to
nine leaves, adding the 9th will require allocating 16 0-row leaves, row 1
therefore goes from 16 to 23 and so on.

If leaves always stay at the bottom, that fine. Nothing at the bottom ever
needs to care about this, because there's no row before it to grow and shift
 their positions. However, leaves **do** move up during deletions. For that
reason, whenever the forest grow, all targets that aren't at the bottom needs
to be updated.

Now imagine that we want to keep a leaf map that maps leaf_hash -&gt; position
within the forest: this works fine, we know where a node must go when deleting,
by calling [`parent`] with their current position and `num_leaves`. But now
imagine the forest has to grow: we need to go through the map and update
all non-row 0 leaves. This could potentially involve going through millions of
UTXOs and update one-by-one. Note that we can find the next position, it's not super
efficient but works (see [`crate::proof::Proof::maybe_remap`] for more details),
but doing this for every UTXO that isn't at the bottom is too expensive, even
though it happens exponentially less frequently, when it happens, it's going to
take an absurd amount of time and potentially stall the Utreexo network for hours.

For that reason, we communicate positions as if the forest is always filled with
the maximum amount of leaves we can possibly have, which is 63. Therefore, those
positions never need to be remapped. Internally, we still use the dynamic size,
and use this function  to translate between the two.
diff --git a/src/lib.rs b/src/lib.rs
@@ -26,6 +26,34 @@
 
 extern crate alloc;
 
+/// This is the maximum size the forest is ever allowed to have, this caps how big `num_leaves` can
+/// be (we use a [`u64`]) and is also used by the [`util::translate`] logic.
+///
+/// # Calculations
+///
+/// If you think: "but... is 63 enough space"? Well... assuming there's around 999,000 WUs
+/// available on each block (let's account for header and coinbase), a non-segwit transaction's
+/// size is:
+///  4 (version) + 1 (vin count) + 41 (input) + 5 (vout for a large number of outputs) + 10N + 4
+///    (locktime)
+///
+///  N is how many outputs we have (we are considering outputs with amount and a zero-sized
+///  script), for 999,000 WU we can fit
+///  55 + 10N <= 999,000
+///  N ~= 90k outputs (a little over)
+///
+///  2^63 = 9,223,372,036,854,775,808
+///  dividing this by 90,000 we get 102,481,911,520,608 blocks
+///  it would take 3,249,680 years to mine that many blocks...
+///
+///  For the poor soul in 3,249,682 A.D., who need to fix this hard-fork, here's what you gotta do:
+///   - Change the leaf_data type to a u128 or something q128 if Quantum Bits are the fashionable standard
+///   - Change `MAX_FOREST_ROWS` to 128 or higher in `lib.rs`
+///   - Modify [`start_position_at_row`] to avoid overflows.
+///
+/// That should save you the trouble.
+pub(crate) const MAX_FOREST_ROWS: u8 = 63;
+
 #[cfg(not(feature = "std"))]
 /// Re-exports `alloc` basics plus HashMap/HashSet and IO traits.
 pub mod prelude {
diff --git a/src/mem_forest/mod.rs b/src/mem_forest/mod.rs
@@ -47,6 +47,8 @@ use super::util::right_child;
 use super::util::root_position;
 use super::util::tree_rows;
 use crate::prelude::*;
+use crate::util::translate;
+use crate::MAX_FOREST_ROWS;
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 enum NodeType {
@@ -327,7 +329,13 @@ impl<Hash: AccumulatorHash> MemForest<Hash> {
             .map(|pos| self.get_hash(*pos).unwrap())
             .collect::<Vec<_>>();
 
-        Ok(Proof::new_with_hash(positions, proof))
+        let tree_rows = tree_rows(self.leaves);
+        let translated_targets = positions
+            .into_iter()
+            .map(|pos| translate(pos, tree_rows, MAX_FOREST_ROWS))
+            .collect();
+
+        Ok(Proof::new_with_hash(translated_targets, proof))
     }
 
     /// Returns a reference to the roots in this MemForest.
diff --git a/src/pollard/mod.rs b/src/pollard/mod.rs
@@ -61,6 +61,8 @@ use super::util::right_child;
 use super::util::root_position;
 use super::util::tree_rows;
 use crate::prelude::*;
+use crate::util::translate;
+use crate::MAX_FOREST_ROWS;
 
 #[derive(Default, Clone)]
 /// A node in the Pollard tree
@@ -701,9 +703,15 @@ impl<Hash: AccumulatorHash> Pollard<Hash> {
             proof_hashes.push(hash);
         }
 
+        let tree_rows = tree_rows(self.leaves);
+        let translated_targets = target_positions
+            .into_iter()
+            .map(|pos| translate(pos, tree_rows, MAX_FOREST_ROWS))
+            .collect();
+
         Ok(Proof::<Hash> {
             hashes: proof_hashes,
-            targets: target_positions,
+            targets: translated_targets,
         })
     }
 
diff --git a/src/proof/mod.rs b/src/proof/mod.rs
@@ -72,6 +72,7 @@ use super::util::get_proof_positions;
 use super::util::read_u64;
 use super::util::tree_rows;
 use crate::prelude::*;
+use crate::util::translate;
 
 #[derive(Clone, Debug, Eq, PartialEq)]
 #[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
@@ -444,11 +445,16 @@ impl<Hash: AccumulatorHash> Proof<Hash> {
             Vec::<(Hash, Hash)>::with_capacity(util::num_roots(num_leaves));
 
         // the positions that should be passed as a proof
-        let proof_positions = get_proof_positions(&self.targets, num_leaves, total_rows);
+        let translated: Vec<_> = self
+            .targets
+            .iter()
+            .copied()
+            .map(|pos| translate(pos, 63, total_rows))
+            .collect();
+        let proof_positions = get_proof_positions(&translated, num_leaves, total_rows);
 
         // As we calculate nodes upwards, it accumulates here
-        let mut nodes: Vec<_> = self
-            .targets
+        let mut nodes: Vec<_> = translated
             .iter()
             .copied()
             .zip(del_hashes.to_owned())
@@ -525,7 +531,13 @@ impl<Hash: AccumulatorHash> Proof<Hash> {
         let mut calculated_root_hashes = Vec::<Hash>::with_capacity(util::num_roots(num_leaves));
 
         // the positions that should be passed as a proof
-        let proof_positions = get_proof_positions(&self.targets, num_leaves, total_rows);
+        let translated: Vec<_> = self
+            .targets
+            .iter()
+            .copied()
+            .map(|pos| translate(pos, 63, total_rows))
+            .collect();
+        let proof_positions = get_proof_positions(&translated, num_leaves, total_rows);
 
         // As we calculate nodes upwards, it accumulates here
         let mut nodes: Vec<_> = self
diff --git a/src/util/mod.rs b/src/util/mod.rs
@@ -28,6 +28,50 @@ pub fn remove_bit(val: u64, bit: u64) -> u64 {
 
     (upper >> 1) | lower
 }
+
+/// Translates targets from a forest with `from_rows` to a forest with `to_rows`.
+///
+/// When we compute the position of a node, for any node not in row 0 has a position that depends
+/// on how many leaves there are. This happens because row 0 is allocated to the nearest power of
+/// two that can fit that many leaves. Therefore, in a forest with 6 leaves, the bottom row goes
+/// from 0 through 7, and row 1 goes from 8 through 11 (the size of  each row halves as you
+/// move up). If you add three extra UTXOs, growing the forest to 9 leaves, adding the 9th
+/// will require allocating 16 row-0 leaves; row 1 therefore goes from 16 though 23, and so on.
+///
+/// If leaves always stayed at the bottom, that fine. Nothing at the bottom ever needs to care about
+/// this, because there is no row below it whose growth would shift its positions. However, leaves
+/// **do** move up during deletions. For that reason, whenever the forest grows, all targets that
+/// are not at the bottom needs to be updated.
+///
+/// Now, imagine that we want to keep a leaf map from `leaf_hash` to position within the forest:
+/// this works fine, and we know where a node must go when deleting, by calling [`parent`] with their
+/// current position and `num_leaves`. But now imagine the forest has to grow: we need to go through
+/// the map and update all non-row-0 leaves. This could potentially involve going through millions
+/// of UTXOs and update them one by one. Note that we can find the next position, it is not super
+/// efficient, but it works (see [`crate::proof::Proof::maybe_remap`] for more details). But doing this
+/// for every UTXO that are not at the bottom is too expensive. Even though it happens exponentially
+/// less frequently, when it happens, it is going to take an absurd amount of time and potentially
+/// stall the Utreexo network for hours.
+///
+/// For that reason, we communicate positions as if the forest was always filled with the maximum
+/// number of leaves we can possibly have, which is 63. Therefore, those positions never need to be
+/// remapped. Internally, we still use the dynamic size, and use this function to translate between
+/// the two.
+///
+/// # Implementation
+///
+/// This function simply computes how far away from the start of the row this leaf is, then uses
+/// that to offset the same amount in the new structure.
+pub fn translate(pos: u64, from_rows: u8, to_rows: u8) -> u64 {
+    let row = detect_row(pos, from_rows);
+    if row == 0 {
+        return pos;
+    }
+
+    let offset = pos - start_position_at_row(row, from_rows);
+    offset + start_position_at_row(row, to_rows)
+}
+
 pub fn calc_next_pos(position: u64, del_pos: u64, forest_rows: u8) -> Result<u64, String> {
     let del_row = detect_row(del_pos, forest_rows);
     let pos_row = detect_row(position, forest_rows);
@@ -91,7 +135,7 @@ pub fn start_position_at_row(row: u8, forest_rows: u8) -> u64 {
     // 2 << forest_rows is 2 more than the max position
     // to get the correct offset for a given row,
     // subtract (2 << `row complement of forest_rows`) from (2 << forest_rows)
-    (2 << forest_rows) - (2 << (forest_rows - row)) as u64
+    ((2_u128 << forest_rows) - (2_u128 << (forest_rows - row))) as u64
 }
 
 pub fn is_left_niece(position: u64) -> bool {
@@ -357,6 +401,7 @@ mod tests {
     use super::roots_to_destroy;
     use crate::node_hash::BitcoinNodeHash;
     use crate::util::children;
+    use crate::util::start_position_at_row;
     use crate::util::tree_rows;
 
     #[test]
@@ -499,4 +544,9 @@ mod tests {
         let res = super::calc_next_pos(1, 9, 3);
         assert_eq!(Ok(9), res);
     }
+
+    #[test]
+    fn test_start_position_at_row() {
+        assert_eq!(start_position_at_row(1, 12), 4096);
+    }
 }