From d548252ee80c8ec086a55f1b64490ada43cf6f42 Mon Sep 17 00:00:00 2001 From: Tomi Jaga Date: Sat, 19 Jul 2025 10:35:21 -0700 Subject: [PATCH 1/3] Update median_key to separator_key --- src/MemoryBTree/Base.mo | 12 +++---- src/MemoryBTree/modules/Branch.mo | 54 ++++++++++++++-------------- src/MemoryBTree/modules/Methods.mo | 56 +++++++++++++++--------------- 3 files changed, 61 insertions(+), 61 deletions(-) diff --git a/src/MemoryBTree/Base.mo b/src/MemoryBTree/Base.mo index 6c2e2ad..b8d9722 100644 --- a/src/MemoryBTree/Base.mo +++ b/src/MemoryBTree/Base.mo @@ -376,7 +376,7 @@ module { var right_index = Leaf.get_index(btree, right_node_address); let ?first_key_address = Leaf.get_kv_address(btree, right_node_address, 0) else Debug.trap("insert: first_key_address accessed a null value"); - var median_key_address = first_key_address; + var separator_key_address = first_key_address; // assert Leaf.get_count(btree, left_node_address) == (btree.node_capacity / 2) + 1; // assert Leaf.get_count(btree, right_node_address) == (btree.node_capacity / 2); @@ -392,7 +392,7 @@ module { // Debug.print("found branch with enough space"); // Debug.print("parent before insert: " # debug_show Branch.from_memory(btree, parent_address)); - Branch.insert(btree, parent_address, right_index, median_key_address, right_node_address); + Branch.insert(btree, parent_address, right_index, separator_key_address, right_node_address); update_count(btree, btree.count + 1); // Debug.print("parent after insert: " # debug_show Branch.from_memory(btree, parent_address)); @@ -402,12 +402,12 @@ module { // otherwise split parent left_node_address := parent_address; - right_node_address := Branch.split(btree, left_node_address, right_index, median_key_address, right_node_address); + right_node_address := Branch.split(btree, left_node_address, right_index, separator_key_address, right_node_address); update_branch_count(btree, btree.branch_count + 1); let ?first_key_address = Branch.get_key_address(btree, right_node_address, btree.node_capacity - 2) else Debug.trap("4. insert: accessed a null value in first key of branch"); Branch.set_key_address_to_null(btree, right_node_address, btree.node_capacity - 2); - median_key_address := first_key_address; + separator_key_address := first_key_address; right_index := Branch.get_index(btree, right_node_address); opt_parent := Branch.get_parent(btree, right_node_address); @@ -423,7 +423,7 @@ module { Branch.update_depth(btree, new_root, new_depth); assert Branch.get_depth(btree, new_root) == new_depth; - Branch.put_key_address(btree, new_root, 0, median_key_address); + Branch.put_key_address(btree, new_root, 0, separator_key_address); Branch.add_child(btree, new_root, left_node_address); Branch.add_child(btree, new_root, right_node_address); @@ -723,7 +723,7 @@ module { if (elem_index == 0) { // if the first element is removed then update the parent key let ?next_key_address = Leaf.get_kv_address(btree, leaf_address, 0) else Debug.trap("remove: next_key_block is null"); - Branch.update_median_key_address(btree, parent, leaf_index, next_key_address); + Branch.update_separator_key_address(btree, parent, leaf_index, next_key_address); }; let min_count = btree.node_capacity / 2; diff --git a/src/MemoryBTree/modules/Branch.mo b/src/MemoryBTree/modules/Branch.mo index 7fae09b..fd69db8 100644 --- a/src/MemoryBTree/modules/Branch.mo +++ b/src/MemoryBTree/modules/Branch.mo @@ -601,7 +601,7 @@ module Branch { MemoryRegion.storeNat64(btree.branches, branch_address + MC.PARENT_START, parent); }; - public func update_median_key_address(btree : MemoryBTree, parent_address : Nat, child_index : Nat, new_key_address : UniqueId) { + public func update_separator_key_address(btree : MemoryBTree, parent_address : Nat, child_index : Nat, new_key_address : UniqueId) { var curr_address = parent_address; var i = child_index; @@ -630,7 +630,7 @@ module Branch { // elements inserted are always nodes created as a result of split // so their index is always greater than one as new nodes created from // a split operation are always inserted at the right - // update_median_key_address(btree, branch, i, key); + // update_separator_key_address(btree, branch, i, key); // Debug.trap("Branch.insert(): inserting at index 0 is not allowed"); } else { let key_offset = get_node_key_offset(branch_address, i - 1); @@ -681,7 +681,7 @@ module Branch { let is_elem_added_to_right = child_index >= median; - var median_key_address = ?child_key_address; + var separator_key_address = ?child_key_address; var offset = if (is_elem_added_to_right) 0 else 1; var already_inserted = false; @@ -698,7 +698,7 @@ module Branch { if (not is_elem_added_to_right) { let j = i + median - offset : Nat; - median_key_address := Branch.get_key_address(btree, branch_address, j - 1); + separator_key_address := Branch.get_key_address(btree, branch_address, j - 1); let start_key = get_node_key_offset(branch_address, j); let end_key = get_node_key_offset(branch_address, arr_len - 1); @@ -751,7 +751,7 @@ module Branch { child; } else { if (i == 0) { - median_key_address := Branch.get_key_address(btree, branch_address, j - 1); + separator_key_address := Branch.get_key_address(btree, branch_address, j - 1); } else { let ?shifted_key_address = Branch.get_key_address(btree, branch_address, j - 1) else Debug.trap("Branch.split: accessed a null value"); @@ -799,8 +799,8 @@ module Branch { // store the first key of the right node at the end of the keys in left node // no need to delete as the value will get overwritten because it exceeds the count position - let ?_median_key_address = median_key_address else Debug.trap("Branch.split: median key_block is null"); - Branch.put_key_address(btree, right_address, btree.node_capacity - 2, _median_key_address); + let ?_separator_key_address = separator_key_address else Debug.trap("Branch.split: median key_block is null"); + Branch.put_key_address(btree, right_address, btree.node_capacity - 2, _separator_key_address); right_address; }; @@ -932,8 +932,8 @@ module Branch { if (neighbour_index < branch_index) { // Debug.print("redistribute: left neighbour"); // move data from the left neighbour to the right branch - let ?_median_key_address = Branch.get_key_address(btree, parent, neighbour_index) else return Debug.trap("Branch.redistribute: median_key_address should not be null"); - var median_key_address = _median_key_address; + let ?_separator_key_address = Branch.get_key_address(btree, parent, neighbour_index) else return Debug.trap("Branch.redistribute: separator_key_address should not be null"); + var separator_key_address = _separator_key_address; Branch.shift(btree, branch, 0, branch_count, data_to_move); @@ -945,39 +945,39 @@ module Branch { let ?child = Branch.get_child(btree, neighbour, j) else return Debug.trap("Branch.redistribute: child should not be null"); Branch.remove(btree, neighbour, j); - // Debug.print("median_key_address: " # debug_show median_key_address); + // Debug.print("separator_key_address: " # debug_show separator_key_address); let new_index = data_to_move - i - 1 : Nat; - Branch.put_key_address(btree, branch, new_index, median_key_address); + Branch.put_key_address(btree, branch, new_index, separator_key_address); Branch.put_child(btree, branch, new_index, child); let child_subtree_size = if (branch_has_leaves) Leaf.get_count(btree, child) else Branch.get_subtree_size(btree, child); moved_subtree_size += child_subtree_size; - median_key_address := key_address; + separator_key_address := key_address; i += 1; }; - // Debug.print("parent median_key_address: " # debug_show median_key_address); - // Debug.print("parent median_key_blob: " # debug_show median_key_blob); + // Debug.print("parent separator_key_address: " # debug_show separator_key_address); + // Debug.print("parent separator_key_blob: " # debug_show separator_key_blob); - Branch.put_key_address(btree, parent, neighbour_index, median_key_address); + Branch.put_key_address(btree, parent, neighbour_index, separator_key_address); } else { // Debug.print("redistribute: right neighbour"); // move data from the right neighbour to the left branch - let ?_median_key_address = Branch.get_key_address(btree, parent, branch_index) else return Debug.trap("Branch.redistribute: median_key_address should not be null"); - var median_key_address = _median_key_address; + let ?_separator_key_address = Branch.get_key_address(btree, parent, branch_index) else return Debug.trap("Branch.redistribute: separator_key_address should not be null"); + var separator_key_address = _separator_key_address; var i = 0; while (i < data_to_move) { - // Debug.print("median_key_address: " # debug_show median_key_address); + // Debug.print("separator_key_address: " # debug_show separator_key_address); let ?child = Branch.get_child(btree, neighbour, i) else return Debug.trap("Branch.redistribute: child should not be null"); - Branch.insert(btree, branch, branch_count + i, median_key_address, child); + Branch.insert(btree, branch, branch_count + i, separator_key_address, child); let child_subtree_size = if (branch_has_leaves) Leaf.get_count(btree, child) else Branch.get_subtree_size(btree, child); moved_subtree_size += child_subtree_size; @@ -985,12 +985,12 @@ module Branch { let ?key_block = Branch.get_key_address(btree, neighbour, i) else return Debug.trap("Branch.redistribute: key_block should not be null"); let ?key_blob = Branch.get_key_blob(btree, neighbour, i) else return Debug.trap("Branch.redistribute: key_blob should not be null"); - median_key_address := key_block; + separator_key_address := key_block; i += 1; }; - // Debug.print("parent median_key_address: " # debug_show median_key_address); + // Debug.print("parent separator_key_address: " # debug_show separator_key_address); // shift keys and children in the right neighbour // since we can't shift to the first child index, @@ -1001,7 +1001,7 @@ module Branch { Branch.put_child(btree, neighbour, 0, first_child); // update median key in parent - Branch.put_key_address(btree, parent, branch_index, median_key_address); + Branch.put_key_address(btree, parent, branch_index, separator_key_address); }; Branch.update_count(btree, branch, branch_count + data_to_move); @@ -1039,21 +1039,21 @@ module Branch { let left_subtree_size = Branch.get_subtree_size(btree, left); let right_subtree_size = Branch.get_subtree_size(btree, right); - let ?_median_key_address = Branch.get_key_address(btree, parent, right_index - 1) else Debug.trap("Branch.merge: median_key_address should not be null"); - var median_key_address = _median_key_address; + let ?_separator_key_address = Branch.get_key_address(btree, parent, right_index - 1) else Debug.trap("Branch.merge: separator_key_address should not be null"); + var separator_key_address = _separator_key_address; // Debug.print("left branch before merge: " # debug_show Branch.from_memory(btree, left)); // Debug.print("right branch before merge: " # debug_show Branch.from_memory(btree, right)); var i = 0; label while_loop while (i < right_count) { - // Debug.print("median_key_address: " # debug_show median_key_address); + // Debug.print("separator_key_address: " # debug_show separator_key_address); let ?child = Branch.get_child(btree, right, i) else return Debug.trap("Branch.merge: child should not be null"); - Branch.insert(btree, left, left_count + i, median_key_address, child); + Branch.insert(btree, left, left_count + i, separator_key_address, child); if (i < (right_count - 1 : Nat)) { let ?key_block = Branch.get_key_address(btree, right, i) else return Debug.trap("Branch.merge: key_block should not be null"); - median_key_address := key_block; + separator_key_address := key_block; }; i += 1; diff --git a/src/MemoryBTree/modules/Methods.mo b/src/MemoryBTree/modules/Methods.mo index 601aad1..1b9c584 100644 --- a/src/MemoryBTree/modules/Methods.mo +++ b/src/MemoryBTree/modules/Methods.mo @@ -733,26 +733,26 @@ module Methods { assert address == leaf.0 [Leaf.AC.ADDRESS]; assert depth == 1; - let (left_median_key, right_median_key) = switch (Leaf.get_parent(btree, address)) { + let (left_separator_key, right_separator_key) = switch (Leaf.get_parent(btree, address)) { case (?parent) { - var left_median_key : ?Nat = null; - var right_median_key : ?Nat = null; + var left_separator_key : ?Nat = null; + var right_separator_key : ?Nat = null; if (index > 0) { - let ?left_median_key_blob = Branch.get_key_blob(btree, parent, index - 1) else Debug.trap("1. validate: accessed a null value"); - left_median_key := ?btree_utils.key.blobify.from_blob(left_median_key_blob); + let ?left_separator_key_blob = Branch.get_key_blob(btree, parent, index - 1) else Debug.trap("1. validate: accessed a null value"); + left_separator_key := ?btree_utils.key.blobify.from_blob(left_separator_key_blob); }; let parent_count = Branch.get_count(btree, parent); if (index + 1 < parent_count) { - let ?right_median_key_blob = Branch.get_key_blob(btree, parent, index) else Debug.trap("2. validate: accessed a null value"); - right_median_key := ?btree_utils.key.blobify.from_blob(right_median_key_blob); + let ?right_separator_key_blob = Branch.get_key_blob(btree, parent, index) else Debug.trap("2. validate: accessed a null value"); + right_separator_key := ?btree_utils.key.blobify.from_blob(right_separator_key_blob); }; - (left_median_key, right_median_key); + (left_separator_key, right_separator_key); }; case (null) (null, null); @@ -783,16 +783,16 @@ module Methods { }; }; - switch (left_median_key) { - case (?left_median_key) { - assert left_median_key <= key; + switch (left_separator_key) { + case (?left_separator_key) { + assert left_separator_key <= key; }; case (null) {}; }; - switch (right_median_key) { - case (?right_median_key) { - assert key < right_median_key; + switch (right_separator_key) { + case (?right_separator_key) { + assert key < right_separator_key; }; case (null) {}; }; @@ -820,26 +820,26 @@ module Methods { assert address == branch.0 [Branch.AC.ADDRESS]; assert subtree_size == branch.0 [Branch.AC.SUBTREE_SIZE]; - let (left_median_key, right_median_key) = switch (Branch.get_parent(btree, address)) { + let (left_separator_key, right_separator_key) = switch (Branch.get_parent(btree, address)) { case (?parent) { - var left_median_key : ?Nat = null; - var right_median_key : ?Nat = null; + var left_separator_key : ?Nat = null; + var right_separator_key : ?Nat = null; if (index > 0) { - let ?left_median_key_blob = Branch.get_key_blob(btree, parent, index - 1) else Debug.trap("7. validate: accessed a null value"); - left_median_key := ?btree_utils.key.blobify.from_blob(left_median_key_blob); + let ?left_separator_key_blob = Branch.get_key_blob(btree, parent, index - 1) else Debug.trap("7. validate: accessed a null value"); + left_separator_key := ?btree_utils.key.blobify.from_blob(left_separator_key_blob); }; let parent_count = Branch.get_count(btree, parent); if (index + 1 < parent_count) { - let ?right_median_key_blob = Branch.get_key_blob(btree, parent, index) else Debug.trap("8. validate: accessed a null value"); - right_median_key := ?btree_utils.key.blobify.from_blob(right_median_key_blob); + let ?right_separator_key_blob = Branch.get_key_blob(btree, parent, index) else Debug.trap("8. validate: accessed a null value"); + right_separator_key := ?btree_utils.key.blobify.from_blob(right_separator_key_blob); }; - (left_median_key, right_median_key); + (left_separator_key, right_separator_key); }; case (null) (null, null); @@ -868,16 +868,16 @@ module Methods { }; }; - switch (left_median_key) { - case (?left_median_key) { - assert left_median_key <= key; + switch (left_separator_key) { + case (?left_separator_key) { + assert left_separator_key <= key; }; case (null) {}; }; - switch (right_median_key) { - case (?right_median_key) { - assert key < right_median_key; + switch (right_separator_key) { + case (?right_separator_key) { + assert key < right_separator_key; }; case (null) {}; }; From 71e7b92ace7591776a21845e018f7254dcd3f2b5 Mon Sep 17 00:00:00 2001 From: Tomi Jaga Date: Sat, 19 Jul 2025 16:08:06 -0700 Subject: [PATCH 2/3] store branch keys independently in their own memory blocks --- src/MemoryBTree/Base.mo | 35 ++++++----- src/MemoryBTree/modules/Branch.mo | 85 +++++++++++++++++++++----- src/MemoryBTree/modules/MemoryBlock.mo | 81 +++++++++++++++++++++--- tests/MemoryBTree/MemoryBTree.Test.mo | 33 ++++------ 4 files changed, 174 insertions(+), 60 deletions(-) diff --git a/src/MemoryBTree/Base.mo b/src/MemoryBTree/Base.mo index b8d9722..a7ec731 100644 --- a/src/MemoryBTree/Base.mo +++ b/src/MemoryBTree/Base.mo @@ -375,8 +375,8 @@ module { var opt_parent = Leaf.get_parent(btree, right_node_address); var right_index = Leaf.get_index(btree, right_node_address); - let ?first_key_address = Leaf.get_kv_address(btree, right_node_address, 0) else Debug.trap("insert: first_key_address accessed a null value"); - var separator_key_address = first_key_address; + let ?first_key = Leaf.get_key_blob(btree, right_node_address, 0) else Debug.trap("insert: first_key_address accessed a null value"); + var separator_key = first_key; // assert Leaf.get_count(btree, left_node_address) == (btree.node_capacity / 2) + 1; // assert Leaf.get_count(btree, right_node_address) == (btree.node_capacity / 2); @@ -392,7 +392,7 @@ module { // Debug.print("found branch with enough space"); // Debug.print("parent before insert: " # debug_show Branch.from_memory(btree, parent_address)); - Branch.insert(btree, parent_address, right_index, separator_key_address, right_node_address); + Branch.insert_with_key_blob(btree, parent_address, right_index, separator_key, right_node_address); update_count(btree, btree.count + 1); // Debug.print("parent after insert: " # debug_show Branch.from_memory(btree, parent_address)); @@ -402,12 +402,12 @@ module { // otherwise split parent left_node_address := parent_address; - right_node_address := Branch.split(btree, left_node_address, right_index, separator_key_address, right_node_address); + right_node_address := Branch.split_with_key_blob(btree, left_node_address, right_index, separator_key, right_node_address); update_branch_count(btree, btree.branch_count + 1); - let ?first_key_address = Branch.get_key_address(btree, right_node_address, btree.node_capacity - 2) else Debug.trap("4. insert: accessed a null value in first key of branch"); + let ?first_key = Branch.get_key_blob(btree, right_node_address, btree.node_capacity - 2) else Debug.trap("4. insert: accessed a null value in first key of branch"); Branch.set_key_address_to_null(btree, right_node_address, btree.node_capacity - 2); - separator_key_address := first_key_address; + separator_key := first_key; right_index := Branch.get_index(btree, right_node_address); opt_parent := Branch.get_parent(btree, right_node_address); @@ -423,7 +423,7 @@ module { Branch.update_depth(btree, new_root, new_depth); assert Branch.get_depth(btree, new_root) == new_depth; - Branch.put_key_address(btree, new_root, 0, separator_key_address); + Branch.put_key(btree, new_root, 0, separator_key); Branch.add_child(btree, new_root, left_node_address); Branch.add_child(btree, new_root, right_node_address); @@ -722,8 +722,8 @@ module { if (elem_index == 0) { // if the first element is removed then update the parent key - let ?next_key_address = Leaf.get_kv_address(btree, leaf_address, 0) else Debug.trap("remove: next_key_block is null"); - Branch.update_separator_key_address(btree, parent, leaf_index, next_key_address); + let ?next_key = Leaf.get_key_blob(btree, leaf_address, 0) else Debug.trap("remove: next_key_block is null"); + Branch.update_separator_key(btree, parent, leaf_index, next_key); }; let min_count = btree.node_capacity / 2; @@ -752,8 +752,8 @@ module { if (Leaf.redistribute(btree, leaf_address, neighbour)) { - let ?key_address = Leaf.get_kv_address(btree, right, 0) else Debug.trap("remove: key_block is null"); - Branch.put_key_address(btree, parent, right_index - 1, key_address); + let ?key_blob = Leaf.get_key_blob(btree, right, 0) else Debug.trap("remove: key_block is null"); + Branch.replace_key(btree, parent, right_index - 1, key_blob); return ?prev_val; }; @@ -761,12 +761,15 @@ module { // Debug.print("merging leaf"); // remove merged leaf from parent - // Debug.print("remove merged index: " # debug_show right_index); + // Debug.print("remove merged index: " # debug_show right_index # ", address: " # debug_show right); // Debug.print("parent: " # debug_show Branch.from_memory(btree, parent)); // merge leaf with neighbour Leaf.merge(btree, left, right); - Branch.remove(btree, parent, right_index); + let removed_key_address = Branch.remove(btree, parent, right_index); + + // Deallocate the key that was separating the merged leaves + MemoryBlock.Branch.remove_key_blob(btree, removed_key_address); // deallocate right leaf that was merged into left Leaf.deallocate(btree, right); @@ -822,7 +825,11 @@ module { let merged_branch = Branch.merge(btree, branch, neighbour); let merged_branch_index = Branch.get_index(btree, merged_branch); - Branch.remove(btree, parent, merged_branch_index); + ignore Branch.remove(btree, parent, merged_branch_index); + + // Deallocate the key that was separating the merged branches as the leaf stores its keys separately + // MemoryBlock.Branch.remove_key_blob(btree, removed_key_address); + Branch.deallocate(btree, merged_branch); update_branch_count(btree, btree.branch_count - 1); diff --git a/src/MemoryBTree/modules/Branch.mo b/src/MemoryBTree/modules/Branch.mo index fd69db8..31ba5c7 100644 --- a/src/MemoryBTree/modules/Branch.mo +++ b/src/MemoryBTree/modules/Branch.mo @@ -180,8 +180,8 @@ module Branch { continue while_loop; }; - let key_block = MemoryBlock.get_key_block(btree, key_address); - let key_blob = MemoryBlock.get_key_blob(btree, key_address); + let key_block = MemoryBlock.Branch.get_key_block(btree, key_address); + let key_blob = MemoryBlock.Branch.get_key_blob(btree, key_address); branch.2 [i] := ?key_block; branch.6 [i] := ?key_blob; @@ -287,8 +287,8 @@ module Branch { // func(key_block : ?MemoryBlock) : ?Nat { // switch (key_block) { // case (?key_block) { - // let blob = MemoryBlock.get_key(btree, key_block); - // let key = btree_utils.key.blobify.from_blob(MemoryBlock.get_key(btree, key_block)); + // let blob = MemoryBlock.Branch.get_key(btree, key_block); + // let key = btree_utils.key.blobify.from_blob(MemoryBlock.Branch.get_key(btree, key_block)); // key; // }; @@ -318,6 +318,29 @@ module Branch { MemoryRegion.storeNat64(btree.branches, offset, Nat64.fromNat(key_address)); }; + public func put_key(btree : MemoryBTree, branch_address : Nat, i : Nat, key : Blob) { + assert i < (btree.node_capacity - 1 : Nat); + + let key_address = MemoryBlock.Branch.store_key_blob(btree, key); + Branch.put_key_address(btree, branch_address, i, key_address); + }; + + public func replace_key(btree : MemoryBTree, branch_address : Nat, i : Nat, key : Blob) { + assert i < (btree.node_capacity - 1 : Nat); + + let ?prev_key_address = Branch.get_key_address(btree, branch_address, i) else Debug.trap("Branch.replace_key: accessed a null value"); + + switch (MemoryBlock.Branch.replace_key_blob(btree, prev_key_address, key)) { + case (?new_key_address) { + Branch.put_key_address(btree, branch_address, i, new_key_address); + }; + case (null) { + // Key was replaced in-place, no address change needed + }; + }; + + }; + public func put_child(btree : MemoryBTree, branch_address : Nat, i : Nat, child_address : Nat) { assert i < btree.node_capacity; @@ -398,8 +421,8 @@ module Branch { let leaf_magic = MemoryRegion.loadBlob(btree.leaves, node_address, MC.MAGIC_SIZE); let branch_magic = MemoryRegion.loadBlob(btree.branches, node_address, MC.MAGIC_SIZE); - Debug.print("leaf_magic = " # debug_show leaf_magic); - Debug.print("branch_magic = " # debug_show branch_magic); + // Debug.print("leaf_magic = " # debug_show leaf_magic); + // Debug.print("branch_magic = " # debug_show branch_magic); let is_leaf = leaf_magic == MC.MAGIC; let is_branch = branch_magic == MC.MAGIC; @@ -415,11 +438,11 @@ module Branch { let leaf_depth = Leaf.get_depth(btree, node_address); let branch_depth = Branch.get_depth(btree, node_address); - Debug.print("leaf_depth = " # debug_show leaf_depth); - Debug.print("branch_depth = " # debug_show branch_depth); + // Debug.print("leaf_depth = " # debug_show leaf_depth); + // Debug.print("branch_depth = " # debug_show branch_depth); let mem_depth = MemoryRegion.loadNat8(btree.branches, node_address + MC.DEPTH_START) |> Nat8.toNat(_); - Debug.print("mem_depth = " # debug_show mem_depth); + // Debug.print("mem_depth = " # debug_show mem_depth); if (mem_depth == 1) { #leaf; @@ -457,7 +480,7 @@ module Branch { public func get_key_blob(btree : MemoryBTree, branch_address : Nat, i : Nat) : ?(Blob) { let ?kv_address = Branch.get_key_address(btree, branch_address, i) else return null; - ?MemoryBlock.get_key_blob(btree, kv_address); + ?MemoryBlock.Branch.get_key_blob(btree, kv_address); }; public func set_key_address_to_null(btree : MemoryBTree, branch_address : Nat, i : Nat) { @@ -614,6 +637,19 @@ module Branch { Branch.put_key_address(btree, curr_address, i - 1, new_key_address); }; + public func update_separator_key(btree : MemoryBTree, parent_address : Nat, child_index : Nat, new_key : Blob) { + var curr_address = parent_address; + var i = child_index; + + while (i == 0) { + i := Branch.get_index(btree, curr_address); + let ?parent_address = Branch.get_parent(btree, curr_address) else return; // occurs when key is the first key in the tree + curr_address := parent_address; + }; + + Branch.replace_key(btree, curr_address, i - 1, new_key); + }; + // inserts node but does not update the subtree size with the node's subtree size // because it's likely that the inserted node is a node split from a node // in this branch's subtree @@ -674,6 +710,11 @@ module Branch { }; + public func insert_with_key_blob(btree : MemoryBTree, branch_address : Nat, i : Nat, key_blob : Blob, child_address : Nat) { + let key_address = MemoryBlock.Branch.store_key_blob(btree, key_blob); + Branch.insert(btree, branch_address, i, key_address, child_address); + }; + public func split(btree : MemoryBTree, branch_address : Nat, child_index : Nat, child_key_address : UniqueId, child : Nat) : Nat { let arr_len = btree.node_capacity; @@ -805,6 +846,11 @@ module Branch { right_address; }; + public func split_with_key_blob(btree : MemoryBTree, branch_address : Nat, child_index : Nat, child_key_blob : Blob, child : Nat) : Nat { + let key_address = MemoryBlock.Branch.store_key_blob(btree, child_key_blob); + Branch.split(btree, branch_address, child_index, key_address, child); + }; + public func get_larger_neighbour(btree : MemoryBTree, parent_address : Address, index : Nat) : ?Address { let ?child = Branch.get_child(btree, parent_address, index) else Debug.trap("1. get_larger_neighbor: accessed a null value"); @@ -895,11 +941,19 @@ module Branch { // the right node is always merged into the left node so it unlikely // that we would need to remove the 0th index, which will cause issues // because the keys hold one less value than the children array - public func remove(btree : MemoryBTree, branch : Address, index : Nat) { + // Returns the key address that was removed so the caller can decide whether to deallocate it + public func remove(btree : MemoryBTree, branch : Address, index : Nat) : UniqueId { + assert index > 0; let count = Branch.get_count(btree, branch); + let ?key_address_at_index = Branch.get_key_address(btree, branch, index - 1) else Debug.trap("Branch.remove: accessed a null value"); + Branch.set_key_address_to_null(btree, branch, index - 1); + // Debug.print("key_address_at_index = " # debug_show key_address_at_index # ", branch = " # debug_show branch # ", index = " # debug_show index); + Branch.shift(btree, branch, index + 1, count, - 1); Branch.update_count(btree, branch, count - 1); + + key_address_at_index; }; public func redistribute(btree : MemoryBTree, branch : Address) : Bool { @@ -941,9 +995,8 @@ module Branch { while (i < data_to_move) { let j = neighbour_count - 1 - i : Nat; // Debug.print("neighbour: " # debug_show from_memory(btree, neighbour)); - let ?key_address = Branch.get_key_address(btree, neighbour, j - 1) else return Debug.trap("Branch.redistribute: key_address should not be null"); let ?child = Branch.get_child(btree, neighbour, j) else return Debug.trap("Branch.redistribute: child should not be null"); - Branch.remove(btree, neighbour, j); + let removed_key_address = Branch.remove(btree, neighbour, j); // Debug.print("separator_key_address: " # debug_show separator_key_address); @@ -954,7 +1007,7 @@ module Branch { let child_subtree_size = if (branch_has_leaves) Leaf.get_count(btree, child) else Branch.get_subtree_size(btree, child); moved_subtree_size += child_subtree_size; - separator_key_address := key_address; + separator_key_address := removed_key_address; i += 1; }; @@ -1052,8 +1105,8 @@ module Branch { Branch.insert(btree, left, left_count + i, separator_key_address, child); if (i < (right_count - 1 : Nat)) { - let ?key_block = Branch.get_key_address(btree, right, i) else return Debug.trap("Branch.merge: key_block should not be null"); - separator_key_address := key_block; + let ?key_address = Branch.get_key_address(btree, right, i) else return Debug.trap("Branch.merge: key_address should not be null"); + separator_key_address := key_address; }; i += 1; diff --git a/src/MemoryBTree/modules/MemoryBlock.mo b/src/MemoryBTree/modules/MemoryBlock.mo index 624ac7f..7244dc5 100644 --- a/src/MemoryBTree/modules/MemoryBlock.mo +++ b/src/MemoryBTree/modules/MemoryBlock.mo @@ -14,7 +14,78 @@ import T "Types"; module MemoryBlock { - // Memory Layout - (15 bytes) + type Address = Nat; + type MemoryRegion = MemoryRegion.MemoryRegion; + type RevIter = RevIter.RevIter; + + public type MemoryBTree = Migrations.MemoryBTree; + public type MemoryBlock = T.MemoryBlock; + type UniqueId = T.UniqueId; + + public module Branch { + + // Branch Key Memory Layout + // | Field | Size (bytes) | Description | + // |-----------------|--------------|-----------------------------------------| + // | key size | 2 | key size | + // | key blob | key size | serialized key | + // |-----------------|--------------|-----------------------------------------| + + public let KEY_SIZE_START = 0; + public let KEY_BLOB_START = 2; + + public func store_key_blob(btree : MemoryBTree, key : Blob) : UniqueId { + let key_address = MemoryRegion.allocate(btree.data, KEY_BLOB_START + key.size()); + + MemoryRegion.storeNat16(btree.data, key_address + KEY_SIZE_START, Nat16.fromNat(key.size())); // key mem block size + MemoryRegion.storeBlob(btree.data, key_address + KEY_BLOB_START, key); + + key_address; + }; + + public func get_key_blob(btree : MemoryBTree, key_address : UniqueId) : Blob { + let key_size = MemoryRegion.loadNat16(btree.data, key_address + KEY_SIZE_START) |> Nat16.toNat(_); + let blob = MemoryRegion.loadBlob(btree.data, key_address + KEY_BLOB_START, key_size); + + blob; + }; + + public func get_key_block(btree : MemoryBTree, key_address : UniqueId) : MemoryBlock { + let key_size = MemoryRegion.loadNat16(btree.data, key_address + KEY_SIZE_START) |> Nat16.toNat(_); + + (key_address + KEY_BLOB_START, key_size); + }; + + public func remove_key_blob(btree : MemoryBTree, key_address : UniqueId) { + let key_size = MemoryRegion.loadNat16(btree.data, key_address + KEY_SIZE_START) |> Nat16.toNat(_); + MemoryRegion.deallocate(btree.data, key_address, KEY_BLOB_START + key_size); + }; + + // Replaces the key blob at 'prev_key_address' with 'new_key'. + // If the memory block address remains the same after resizing, it returns null. + // Otherwise, it returns the new memory block address. + public func replace_key_blob(btree : MemoryBTree, prev_key_address : UniqueId, new_key : Blob) : ?UniqueId { + + let prev_key_size = MemoryRegion.loadNat16(btree.data, prev_key_address + KEY_SIZE_START) |> Nat16.toNat(_); + + if (prev_key_size == new_key.size()) { + MemoryRegion.storeBlob(btree.data, prev_key_address + KEY_BLOB_START, new_key); + return null; + }; + + let new_key_address = MemoryRegion.resize(btree.data, prev_key_address, prev_key_size + KEY_SIZE_START, new_key.size() + KEY_SIZE_START); + + MemoryRegion.storeNat16(btree.data, new_key_address + KEY_SIZE_START, Nat16.fromNat(new_key.size())); + MemoryRegion.storeBlob(btree.data, new_key_address + KEY_BLOB_START, new_key); + + if (new_key_address == prev_key_address) return null; + + ?new_key_address; + }; + + }; + + // Leaf Entry Memory Layout - (15 bytes) // // | Field | Size (bytes) | Description | // |-----------------|--------------|-----------------------------------------| @@ -26,14 +97,6 @@ module MemoryBlock { // | // └--> value blob of 'value size' stored at this address - type Address = Nat; - type MemoryRegion = MemoryRegion.MemoryRegion; - type RevIter = RevIter.RevIter; - - public type MemoryBTree = Migrations.MemoryBTree; - public type MemoryBlock = T.MemoryBlock; - type UniqueId = T.UniqueId; - let BLOCK_ENTRY_SIZE = 15; let REFERENCE_COUNT_START = 0; diff --git a/tests/MemoryBTree/MemoryBTree.Test.mo b/tests/MemoryBTree/MemoryBTree.Test.mo index 7ed3df8..d2871d3 100644 --- a/tests/MemoryBTree/MemoryBTree.Test.mo +++ b/tests/MemoryBTree/MemoryBTree.Test.mo @@ -26,25 +26,8 @@ type Order = Order.Order; type MemoryBlock = MemoryBTree.MemoryBlock; let { nhash } = Map; -func xorshift128plus(seed : Nat) : { next() : Nat } { - var state0 : Nat64 = Nat64.fromNat(seed); - var state1 : Nat64 = Nat64.fromNat(seed + 1); - if (state0 == 0) state0 := 1; - if (state1 == 0) state1 := 2; - - { - next = func() : Nat { - var s1 = state0; - let s0 = state1; - state0 := s0; - s1 ^= s1 << 23 : Nat64; - state1 := s1 ^ s0 ^ (s1 >> 18 : Nat64) ^ (s0 >> 5 : Nat64); - Nat64.toNat(state1 +% s0); // Use wrapping addition - }; - }; -}; -let fuzz = Fuzz.create(xorshift128plus(0xdeadbeef)); +let fuzz = Fuzz.fromSeed(0xdeadbeef); let limit = 10_000; @@ -67,7 +50,7 @@ let random = Itertools.toBuffer<(Nat, Nat)>( let sorted = Buffer.clone(random); sorted.sort(func(a : (Nat, Nat), b : (Nat, Nat)) : Order = Nat.compare(a.0, b.0)); -let btree = MemoryBTree._new_with_options(?8, ?0, false); +let btree = MemoryBTree._new_with_options(?4, ?0, false); let btree_utils = MemoryBTree.createUtils(TypeUtils.Nat, TypeUtils.Nat); suite( @@ -525,16 +508,24 @@ suite( assert ?(1 + i * 10) == val; assert MemoryBTree.size(btree) == random.size() - i - 1; - // Debug.print("node keys: " # debug_show MemoryBTree.toNodeKeys(btree, btree_utils)); + // let node_keys = MemoryBTree.toNodeKeys(btree, btree_utils); + // Debug.print("node keys: " # debug_show node_keys); // Debug.print("leaf nodes: " # debug_show Iter.toArray(MemoryBTree.leafNodes(btree, btree_utils))); - }; + }; assert Methods.validate_memory(btree, btree_utils); }, ); + // test( + // "check for memory leaks", + // func() { + + // } + // ); + test( "clear()", func() { From ac3baa1f3de56d0bdf842a23fd93eb7baed349e1 Mon Sep 17 00:00:00 2001 From: Tomi Jaga Date: Wed, 30 Jul 2025 04:11:31 -0700 Subject: [PATCH 3/3] Fix memory leaks and add tests to check for leaks --- src/MemoryBTree/Base.mo | 26 +++++++++++++++++--------- src/MemoryBTree/modules/Branch.mo | 3 +++ tests/MemoryBTree/MemoryBTree.Test.mo | 27 ++++++++++++++++++++++----- 3 files changed, 42 insertions(+), 14 deletions(-) diff --git a/src/MemoryBTree/Base.mo b/src/MemoryBTree/Base.mo index a7ec731..133cf30 100644 --- a/src/MemoryBTree/Base.mo +++ b/src/MemoryBTree/Base.mo @@ -20,7 +20,7 @@ import MemoryBlock "modules/MemoryBlock"; import Branch "modules/Branch"; import Utils "../Utils"; import Migrations "Migrations"; -import Leaf "modules/Leaf"; +import LeafModule "modules/Leaf"; import T "modules/Types"; import TypeUtils "../TypeUtils"; @@ -45,6 +45,7 @@ module { let CACHE_LIMIT = 50_000; let DEFAULT_ORDER = 256; + public let Leaf = LeafModule; public func _new_with_options(node_capacity : ?Nat, opt_cache_size : ?Nat, is_set : Bool) : MemoryBTree { let cache_size = Option.get(opt_cache_size, CACHE_LIMIT); @@ -376,7 +377,7 @@ module { var right_index = Leaf.get_index(btree, right_node_address); let ?first_key = Leaf.get_key_blob(btree, right_node_address, 0) else Debug.trap("insert: first_key_address accessed a null value"); - var separator_key = first_key; + var separator_key_address = MemoryBlock.Branch.store_key_blob(btree, first_key); // assert Leaf.get_count(btree, left_node_address) == (btree.node_capacity / 2) + 1; // assert Leaf.get_count(btree, right_node_address) == (btree.node_capacity / 2); @@ -392,7 +393,7 @@ module { // Debug.print("found branch with enough space"); // Debug.print("parent before insert: " # debug_show Branch.from_memory(btree, parent_address)); - Branch.insert_with_key_blob(btree, parent_address, right_index, separator_key, right_node_address); + Branch.insert(btree, parent_address, right_index, separator_key_address, right_node_address); update_count(btree, btree.count + 1); // Debug.print("parent after insert: " # debug_show Branch.from_memory(btree, parent_address)); @@ -402,12 +403,14 @@ module { // otherwise split parent left_node_address := parent_address; - right_node_address := Branch.split_with_key_blob(btree, left_node_address, right_index, separator_key, right_node_address); + right_node_address := Branch.split(btree, left_node_address, right_index, separator_key_address, right_node_address); update_branch_count(btree, btree.branch_count + 1); - let ?first_key = Branch.get_key_blob(btree, right_node_address, btree.node_capacity - 2) else Debug.trap("4. insert: accessed a null value in first key of branch"); + // The separator key is temporarily stored in the right node at the last position. + // We need to move it to the left node and update the separator key address. + let ?first_key_address = Branch.get_key_address(btree, right_node_address, btree.node_capacity - 2) else Debug.trap("4. insert: accessed a null value in first key of branch"); Branch.set_key_address_to_null(btree, right_node_address, btree.node_capacity - 2); - separator_key := first_key; + separator_key_address := first_key_address; right_index := Branch.get_index(btree, right_node_address); opt_parent := Branch.get_parent(btree, right_node_address); @@ -423,7 +426,7 @@ module { Branch.update_depth(btree, new_root, new_depth); assert Branch.get_depth(btree, new_root) == new_depth; - Branch.put_key(btree, new_root, 0, separator_key); + Branch.put_key_address(btree, new_root, 0, separator_key_address); Branch.add_child(btree, new_root, left_node_address); Branch.add_child(btree, new_root, right_node_address); @@ -793,6 +796,10 @@ module { update_root(btree, child); update_is_root_a_leaf(btree, child_is_leaf); update_depth(btree, btree.depth - 1); + + Branch.deallocate(btree, parent); + update_branch_count(btree, btree.branch_count - 1); + return ?prev_val; } else { @@ -825,9 +832,10 @@ module { let merged_branch = Branch.merge(btree, branch, neighbour); let merged_branch_index = Branch.get_index(btree, merged_branch); - ignore Branch.remove(btree, parent, merged_branch_index); + let removed_key_address = Branch.remove(btree, parent, merged_branch_index); - // Deallocate the key that was separating the merged branches as the leaf stores its keys separately + // The separator key is transferred to the merged branch during merge, + // so it should not be deallocated here // MemoryBlock.Branch.remove_key_blob(btree, removed_key_address); Branch.deallocate(btree, merged_branch); diff --git a/src/MemoryBTree/modules/Branch.mo b/src/MemoryBTree/modules/Branch.mo index 31ba5c7..69c2d9d 100644 --- a/src/MemoryBTree/modules/Branch.mo +++ b/src/MemoryBTree/modules/Branch.mo @@ -1070,6 +1070,8 @@ module Branch { }; public func deallocate(btree : MemoryBTree, branch : Address) { + // Only deallocate the branch node itself, not the keys + // Keys should be explicitly managed by the caller to avoid double-free errors let memory_size = Branch.get_memory_size(btree.node_capacity); MemoryRegion.deallocate(btree.branches, branch, memory_size); }; @@ -1106,6 +1108,7 @@ module Branch { if (i < (right_count - 1 : Nat)) { let ?key_address = Branch.get_key_address(btree, right, i) else return Debug.trap("Branch.merge: key_address should not be null"); + Branch.set_key_address_to_null(btree, right, i); separator_key_address := key_address; }; diff --git a/tests/MemoryBTree/MemoryBTree.Test.mo b/tests/MemoryBTree/MemoryBTree.Test.mo index d2871d3..5802a28 100644 --- a/tests/MemoryBTree/MemoryBTree.Test.mo +++ b/tests/MemoryBTree/MemoryBTree.Test.mo @@ -519,12 +519,23 @@ suite( ); - // test( - // "check for memory leaks", - // func() { + test( + "check for memory leaks", + func() { + + Debug.print("checking for memory leaks"); + Debug.print("data info: " # debug_show MemoryRegion.memoryInfo(btree.data)); + Debug.print("values info: " # debug_show MemoryRegion.memoryInfo(btree.values)); + Debug.print("leaves info: " # debug_show MemoryRegion.memoryInfo(btree.leaves)); + Debug.print("branches info: " # debug_show MemoryRegion.memoryInfo(btree.branches)); + + assert MemoryRegion.size(btree.data) == MemoryBTree.MC.REGION_HEADER_SIZE; + assert MemoryRegion.allocated(btree.values) == MemoryBTree.MC.REGION_HEADER_SIZE; + assert MemoryRegion.size(btree.leaves) == MemoryBTree.MC.REGION_HEADER_SIZE + MemoryBTree.Leaf.get_memory_size(btree.node_capacity); + assert MemoryRegion.size(btree.branches) == MemoryBTree.MC.REGION_HEADER_SIZE; - // } - // ); + }, + ); test( "clear()", @@ -538,6 +549,12 @@ suite( assert MemoryBTree.size(btree) == 0; assert Methods.validate_memory(btree, btree_utils); + + assert MemoryRegion.size(btree.data) == MemoryBTree.MC.REGION_HEADER_SIZE; + assert MemoryRegion.allocated(btree.values) == MemoryBTree.MC.REGION_HEADER_SIZE; + assert MemoryRegion.size(btree.leaves) == MemoryBTree.MC.REGION_HEADER_SIZE + MemoryBTree.Leaf.get_memory_size(btree.node_capacity); + assert MemoryRegion.size(btree.branches) == MemoryBTree.MC.REGION_HEADER_SIZE; + }, );