From d0281cb49bc863db18a0442d98be0fe2d3cde4c3 Mon Sep 17 00:00:00 2001 From: Taewoo An Date: Wed, 17 Nov 2021 00:30:26 +0900 Subject: [PATCH 01/19] Simple construct ART --- Cargo.toml | 1 + src/art/mod.rs | 112 +++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 1 + 3 files changed, 114 insertions(+) create mode 100644 src/art/mod.rs diff --git a/Cargo.toml b/Cargo.toml index c46a9e2..c03633e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,7 @@ edition = "2018" crossbeam-epoch = "0.9.5" crossbeam-utils = "0.8.5" rand = "0.8.4" +either = "1.6.1" [dev-dependencies] criterion = "0.3.4" diff --git a/src/art/mod.rs b/src/art/mod.rs new file mode 100644 index 0000000..cd32b84 --- /dev/null +++ b/src/art/mod.rs @@ -0,0 +1,112 @@ +use std::{ + marker::PhantomData, + mem, +}; + +use either::Either; + +use crate::map::SequentialMap; + +struct NodeHeader { + len: u8, // the len of prefix + prefix: [u8; 15], // prefix for path compression +} + +/// the child node type +/// This is used for bitflag on child pointer. +const NODETYPE_MASK: usize = 0b111; +#[repr(usize)] +enum NodeType { + Value = 0b000, + Node4 = 0b001, + Node16 = 0b010, + Node48 = 0b011, + Node256 = 0b100, +} + +trait NodeOps { + fn insert(&mut self, key: u8, node: Node); + fn lookup(&self, key: u8) -> &Node; + fn remove(&mut self, key: u8) -> Node; +} + +/// the pointer struct for Nodes or value +struct Node { + pointer: usize, + _marker: PhantomData>, +} + +impl Node { + fn deref(&self) -> Either<&dyn NodeOps, &V> { + unsafe { + let pointer = self.pointer & !NODETYPE_MASK; + let tag = mem::transmute(self.pointer & NODETYPE_MASK); + + match tag { + NodeType::Value => Either::Right(&*(pointer as *const V)), + NodeType::Node4 => Either::Left(&*(pointer as *const Node4)), + NodeType::Node16 => Either::Left(&*(pointer as *const Node16)), + NodeType::Node48 => Either::Left(&*(pointer as *const Node48)), + NodeType::Node256 => Either::Left(&*(pointer as *const Node256)), + } + } + } +} + +struct Node4 { + header: NodeHeader, + keys: [u8; 4], + children: [Node; 4], +} + +impl NodeOps for Node4 {} + +struct Node16 { + header: NodeHeader, + keys: [u8; 16], + children: [Node; 16], +} + +impl NodeOps for Node16 {} + +struct Node48 { + header: NodeHeader, + keys: [u8; 256], + children: [Node; 48], +} + +impl NodeOps for Node48 {} + +struct Node256 { + header: NodeHeader, + children: [Node; 256], +} + +impl NodeOps for Node256 {} + +pub struct ART { + root: Box>, + _marker: PhantomData, +} + +impl ART { + +} + +impl SequentialMap for ART { + fn new() -> Self { + todo!() + } + + fn insert(&mut self, key: &K, value: V) -> Result<(), V> { + todo!() + } + + fn lookup(&self, key: &K) -> Option<&V> { + todo!() + } + + fn remove(&mut self, key: &K) -> Result { + todo!() + } +} diff --git a/src/lib.rs b/src/lib.rs index fb35999..30a6c0e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ pub mod avltree; pub mod btree; +pub mod art; pub mod linkedlist; mod lock; pub mod map; From dfd50a7e77f484a86b59fc66e0b3d8d20fdc1d33 Mon Sep 17 00:00:00 2001 From: Taewoo An Date: Wed, 17 Nov 2021 00:59:16 +0900 Subject: [PATCH 02/19] Add some --- src/art/mod.rs | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/art/mod.rs b/src/art/mod.rs index cd32b84..7e3067e 100644 --- a/src/art/mod.rs +++ b/src/art/mod.rs @@ -1,7 +1,4 @@ -use std::{ - marker::PhantomData, - mem, -}; +use std::{iter::Peekable, marker::PhantomData, mem, ptr::NonNull}; use either::Either; @@ -84,8 +81,17 @@ struct Node256 { impl NodeOps for Node256 {} +trait Encodable { + fn encode(self) -> Vec; +} + +struct Cursor { + parent: Option>>, + current: NonNull>, +} + pub struct ART { - root: Box>, + root: NonNull>, _marker: PhantomData, } From cc3da120a188647de130baf0f32041157c98b9cc Mon Sep 17 00:00:00 2001 From: Taewoo An Date: Thu, 18 Nov 2021 01:39:59 +0900 Subject: [PATCH 03/19] Just construct the outline of Nodes --- Cargo.toml | 1 + src/art/mod.rs | 420 +++++++++++++++++++++++++++++++++++++++++++++-- src/btree/mod.rs | 32 +--- src/util/mod.rs | 33 ++++ 4 files changed, 439 insertions(+), 47 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c03633e..285167c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,7 @@ crossbeam-epoch = "0.9.5" crossbeam-utils = "0.8.5" rand = "0.8.4" either = "1.6.1" +arr_macro = "0.1.3" [dev-dependencies] criterion = "0.3.4" diff --git a/src/art/mod.rs b/src/art/mod.rs index 7e3067e..3eeef0c 100644 --- a/src/art/mod.rs +++ b/src/art/mod.rs @@ -1,12 +1,29 @@ -use std::{iter::Peekable, marker::PhantomData, mem, ptr::NonNull}; +use std::{ + cmp::Ordering, + marker::PhantomData, + mem, + ptr::{self, NonNull}, +}; use either::Either; -use crate::map::SequentialMap; +use crate::{map::SequentialMap, util::slice_insert}; struct NodeHeader { - len: u8, // the len of prefix - prefix: [u8; 15], // prefix for path compression + len: u32, // the len of prefix + prefix: [u8; 12], // prefix for path compression +} + +impl Default for NodeHeader { + #[allow(deprecated)] + fn default() -> Self { + unsafe { + Self { + len: 0, + prefix: mem::uninitialized(), + } + } + } } /// the child node type @@ -22,9 +39,12 @@ enum NodeType { } trait NodeOps { - fn insert(&mut self, key: u8, node: Node); - fn lookup(&self, key: u8) -> &Node; - fn remove(&mut self, key: u8) -> Node; + fn is_full(&self) -> bool; + fn is_shrinkable(&self) -> bool; + fn insert(&mut self, key: u8, node: Node) -> Result<(), Node>; + fn lookup(&self, key: u8) -> Option<&Node>; + fn update(&mut self, key: u8, node: Node) -> Result, Node>; + fn remove(&mut self, key: u8) -> Result, ()>; } /// the pointer struct for Nodes or value @@ -34,13 +54,13 @@ struct Node { } impl Node { - fn deref(&self) -> Either<&dyn NodeOps, &V> { + fn deref(&self) -> Either<&dyn NodeOps, &NodeV> { unsafe { let pointer = self.pointer & !NODETYPE_MASK; let tag = mem::transmute(self.pointer & NODETYPE_MASK); match tag { - NodeType::Value => Either::Right(&*(pointer as *const V)), + NodeType::Value => Either::Right(&*(pointer as *const NodeV)), NodeType::Node4 => Either::Left(&*(pointer as *const Node4)), NodeType::Node16 => Either::Left(&*(pointer as *const Node16)), NodeType::Node48 => Either::Left(&*(pointer as *const Node48)), @@ -48,38 +68,408 @@ impl Node { } } } + + fn deref_mut(&mut self) -> Either<&mut dyn NodeOps, &mut NodeV> { + unsafe { + let pointer = self.pointer & !NODETYPE_MASK; + let tag = mem::transmute(self.pointer & NODETYPE_MASK); + + match tag { + NodeType::Value => Either::Right(&mut *(pointer as *mut NodeV)), + NodeType::Node4 => Either::Left(&mut *(pointer as *mut Node4)), + NodeType::Node16 => Either::Left(&mut *(pointer as *mut Node16)), + NodeType::Node48 => Either::Left(&mut *(pointer as *mut Node48)), + NodeType::Node256 => Either::Left(&mut *(pointer as *mut Node256)), + } + } + } + + fn new(node: impl NodeOps, node_type: NodeType) -> Self { + let node = Box::into_raw(Box::new(node)); + + Self { + pointer: node as usize | node_type as usize, + _marker: PhantomData, + } + } + + const fn null() -> Self { + Self { + pointer: 0, + _marker: PhantomData, + } + } + + #[inline] + fn is_null(&self) -> bool { + self.pointer == 0 + } + + fn node_type(&self) -> NodeType { + unsafe { mem::transmute(self.pointer & NODETYPE_MASK) } + } + + /// extend node to bigger one only if necessary + fn extend(&mut self) {} + + /// shrink node to smaller one only if necessary + fn shrink(&mut self) {} +} + +struct NodeV { + key: Box<[u8]>, + value: V, } struct Node4 { header: NodeHeader, + len: usize, keys: [u8; 4], children: [Node; 4], } -impl NodeOps for Node4 {} +impl Default for Node4 { + #[allow(deprecated)] + fn default() -> Self { + unsafe { + Self { + header: Default::default(), + len: 0, + keys: mem::uninitialized(), + children: mem::uninitialized(), + } + } + } +} + +impl Node4 { + fn keys(&self) -> &[u8] { + unsafe { self.keys.get_unchecked(..self.len as usize) } + } + + fn mut_keys(&mut self) -> &mut [u8] { + unsafe { self.keys.get_unchecked_mut(..self.len as usize) } + } + + fn children(&self) -> &[Node] { + unsafe { self.children.get_unchecked(..self.len as usize) } + } + + fn mut_children(&mut self) -> &mut [Node] { + unsafe { self.children.get_unchecked_mut(..self.len as usize) } + } +} + +impl NodeOps for Node4 { + #[inline] + fn is_full(&self) -> bool { + self.len == 4 + } + + #[inline] + fn is_shrinkable(&self) -> bool { + false + } + + fn insert(&mut self, key: u8, node: Node) -> Result<(), Node> { + // since the &mut self is the pointer of Node4, not the pointer of Node, + // simple extension like this is impossble. + // if self.len == 4 { + // unsafe { + // let pointer = self as *const Node4 as *mut Node; + // let extended = Node::new( + // Node16::from(ptr::read(pointer as *const Node4)), + // NodeType::Node16, + // ); + // *(pointer as *mut Node) = extended; + // return (*pointer).deref_mut().left().unwrap().insert(key, node); + // } + // } + + for (index, k) in self.keys().iter().enumerate() { + match key.cmp(k) { + Ordering::Less => unsafe { + self.len += 1; + slice_insert(self.mut_keys(), index, key); + slice_insert(self.mut_children(), index, node); + return Ok(()); + }, + Ordering::Equal => return Err(node), + Ordering::Greater => {} + } + } + + Err(node) + } + + fn lookup(&self, key: u8) -> Option<&Node> { + for (index, k) in self.keys().iter().enumerate() { + if key == *k { + return unsafe { Some(self.children.get_unchecked(index)) }; + } + } + + None + } + + fn update(&mut self, key: u8, node: Node) -> Result, Node> { + for (index, k) in self.keys().iter().enumerate() { + match key.cmp(k) { + Ordering::Less => {} + Ordering::Equal => unsafe { + let node = mem::replace(self.children.get_unchecked_mut(index), node); + return Ok(node); + }, + Ordering::Greater => {} + } + } + + Err(node) + } + + fn remove(&mut self, key: u8) -> Result, ()> { + todo!() + } +} struct Node16 { header: NodeHeader, + len: usize, keys: [u8; 16], children: [Node; 16], } -impl NodeOps for Node16 {} +impl Default for Node16 { + #[allow(deprecated)] + fn default() -> Self { + unsafe { + Self { + header: Default::default(), + len: 0, + keys: mem::uninitialized(), + children: mem::uninitialized(), + } + } + } +} + +impl Node16 { + fn keys(&self) -> &[u8] { + unsafe { self.keys.get_unchecked(..self.len as usize) } + } + + fn mut_keys(&mut self) -> &mut [u8] { + unsafe { self.keys.get_unchecked_mut(..self.len as usize) } + } + + fn children(&self) -> &[Node] { + unsafe { self.children.get_unchecked(..self.len as usize) } + } + + fn mut_children(&mut self) -> &mut [Node] { + unsafe { self.children.get_unchecked_mut(..self.len as usize) } + } + + fn from(node: Node4) -> Self { + let mut new = Self::default(); + new.header = node.header; + new.len = node.len; + + unsafe { + ptr::copy_nonoverlapping(node.keys.as_ptr(), new.keys.as_mut_ptr(), node.len as usize); + ptr::copy_nonoverlapping( + node.children.as_ptr(), + new.children.as_mut_ptr(), + node.len as usize, + ); + } + + new + } +} + +impl NodeOps for Node16 { + #[inline] + fn is_full(&self) -> bool { + self.len == 16 + } + + #[inline] + fn is_shrinkable(&self) -> bool { + self.len <= 4 + } + + fn insert(&mut self, key: u8, node: Node) -> Result<(), Node> { + todo!() + } + + fn lookup(&self, key: u8) -> Option<&Node> { + todo!() + } + + fn update(&mut self, key: u8, node: Node) -> Result, Node> { + todo!() + } + + fn remove(&mut self, key: u8) -> Result, ()> { + todo!() + } +} struct Node48 { header: NodeHeader, + len: usize, keys: [u8; 256], children: [Node; 48], } -impl NodeOps for Node48 {} +impl Default for Node48 { + #[allow(deprecated)] + fn default() -> Self { + unsafe { + Self { + header: Default::default(), + len: 0, + keys: mem::uninitialized(), + children: mem::uninitialized(), + } + } + } +} + +impl Node48 { + fn keys(&self) -> &[u8] { + unsafe { self.keys.get_unchecked(..self.len as usize) } + } + + fn mut_keys(&mut self) -> &mut [u8] { + unsafe { self.keys.get_unchecked_mut(..self.len as usize) } + } + + fn children(&self) -> &[Node] { + unsafe { self.children.get_unchecked(..self.len as usize) } + } + + fn mut_children(&mut self) -> &mut [Node] { + unsafe { self.children.get_unchecked_mut(..self.len as usize) } + } + + fn from(node: Node16) -> Self { + let mut new = Self::default(); + + unsafe { + for (index, key) in node.keys().iter().enumerate() { + *new.keys.get_unchecked_mut(*key as usize) = index as u8; + } + + ptr::copy_nonoverlapping( + node.children.as_ptr(), + new.children.as_mut_ptr(), + node.len as usize, + ); + } + + new.header = node.header; + new.len = node.len; + + new + } +} + +impl NodeOps for Node48 { + #[inline] + fn is_full(&self) -> bool { + self.len == 48 + } + + #[inline] + fn is_shrinkable(&self) -> bool { + self.len <= 16 + } + + fn insert(&mut self, key: u8, node: Node) -> Result<(), Node> { + todo!() + } + + fn lookup(&self, key: u8) -> Option<&Node> { + todo!() + } + + fn update(&mut self, key: u8, node: Node) -> Result, Node> { + todo!() + } + + fn remove(&mut self, key: u8) -> Result, ()> { + todo!() + } +} struct Node256 { header: NodeHeader, + len: usize, children: [Node; 256], } -impl NodeOps for Node256 {} +impl Default for Node256 { + #[allow(deprecated)] + fn default() -> Self { + unsafe { + Self { + header: Default::default(), + len: 0, + children: mem::uninitialized(), + } + } + } +} + +impl Node256 { + fn from(node: Node48) -> Self { + let mut new = Self::default(); + + unsafe { + for key in node.keys() { + *new.children.get_unchecked_mut(*key as usize) = ptr::read( + node.children + .get_unchecked(*node.keys.get_unchecked(*key as usize) as usize), + ); + } + } + + new.header = node.header; + new.len = node.len; + + new + } +} + +impl NodeOps for Node256 { + #[inline] + fn is_full(&self) -> bool { + self.len == 256 + } + + #[inline] + fn is_shrinkable(&self) -> bool { + self.len <= 48 + } + + fn insert(&mut self, key: u8, node: Node) -> Result<(), Node> { + todo!() + } + + fn lookup(&self, key: u8) -> Option<&Node> { + todo!() + } + + fn update(&mut self, key: u8, node: Node) -> Result, Node> { + todo!() + } + + fn remove(&mut self, key: u8) -> Result, ()> { + todo!() + } +} trait Encodable { fn encode(self) -> Vec; @@ -95,9 +485,7 @@ pub struct ART { _marker: PhantomData, } -impl ART { - -} +impl ART {} impl SequentialMap for ART { fn new() -> Self { diff --git a/src/btree/mod.rs b/src/btree/mod.rs index 174c0db..43e2202 100644 --- a/src/btree/mod.rs +++ b/src/btree/mod.rs @@ -4,41 +4,11 @@ use std::ptr; use std::{cmp::Ordering, mem, ptr::NonNull}; use crate::map::SequentialMap; +use crate::util::{slice_insert, slice_remove}; const B_MAX_NODES: usize = 11; const B_MID_INDEX: usize = B_MAX_NODES / 2; -/// insert value into [T], which has one empty area on last. -/// ex) insert C at 1 into [A, B, uninit] => [A, C, B] -unsafe fn slice_insert(ptr: &mut [T], index: usize, value: T) { - let size = ptr.len(); - debug_assert!(size > index); - - let ptr = ptr.as_mut_ptr(); - - if size > index + 1 { - ptr::copy(ptr.add(index), ptr.add(index + 1), size - index - 1); - } - - ptr::write(ptr.add(index), value); -} - -/// remove value from [T] and remain last area without any init -/// ex) remove at 1 from [A, B, C] => [A, C, C(but you should not access here)] -unsafe fn slice_remove(ptr: &mut [T], index: usize) -> T { - let size = ptr.len(); - debug_assert!(size > index); - - let ptr = ptr.as_mut_ptr(); - let value = ptr::read(ptr.add(index)); - - if size > index + 1 { - ptr::copy(ptr.add(index + 1), ptr.add(index), size - index - 1); - } - - value -} - struct Node { size: usize, depth: usize, diff --git a/src/util/mod.rs b/src/util/mod.rs index cf6de98..100a442 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -1,5 +1,38 @@ +use std::ptr; + pub mod random; +/// insert value into [T], which has one empty area on last. +/// ex) insert C at 1 into [A, B, uninit] => [A, C, B] +pub unsafe fn slice_insert(ptr: &mut [T], index: usize, value: T) { + let size = ptr.len(); + debug_assert!(size > index); + + let ptr = ptr.as_mut_ptr(); + + if size > index + 1 { + ptr::copy(ptr.add(index), ptr.add(index + 1), size - index - 1); + } + + ptr::write(ptr.add(index), value); +} + +/// remove value from [T] and remain last area without any init +/// ex) remove at 1 from [A, B, C] => [A, C, C(but you should not access here)] +pub unsafe fn slice_remove(ptr: &mut [T], index: usize) -> T { + let size = ptr.len(); + debug_assert!(size > index); + + let ptr = ptr.as_mut_ptr(); + let value = ptr::read(ptr.add(index)); + + if size > index + 1 { + ptr::copy(ptr.add(index + 1), ptr.add(index), size - index - 1); + } + + value +} + #[macro_export] macro_rules! ok_or { ($e:expr, $err:expr) => {{ From 571c3a7940fafe6a5f5148008eef47c597b0b671 Mon Sep 17 00:00:00 2001 From: Taewoo An Date: Fri, 19 Nov 2021 00:57:32 +0900 Subject: [PATCH 04/19] Impl From for extending and shrinking --- src/art/mod.rs | 238 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 190 insertions(+), 48 deletions(-) diff --git a/src/art/mod.rs b/src/art/mod.rs index 3eeef0c..ba49cd0 100644 --- a/src/art/mod.rs +++ b/src/art/mod.rs @@ -5,6 +5,7 @@ use std::{ ptr::{self, NonNull}, }; +use arr_macro::arr; use either::Either; use crate::{map::SequentialMap, util::slice_insert}; @@ -110,10 +111,72 @@ impl Node { } /// extend node to bigger one only if necessary - fn extend(&mut self) {} + fn extend(&mut self) { + if self.deref().is_right() { + panic!("NodeV cannot be extended.") + } + + if !self.deref().left().unwrap().is_full() { + return; + } + + let node_type = self.node_type(); + let node = self.deref_mut().left().unwrap(); + + match node_type { + NodeType::Value => unreachable!(), + NodeType::Node4 => unsafe { + let node = node as *const dyn NodeOps as *const Node4; + let new = Box::new(Node16::from(ptr::read(node))); + self.pointer = Box::into_raw(new) as usize | node_type as usize; + }, + NodeType::Node16 => unsafe { + let node = node as *const dyn NodeOps as *const Node16; + let new = Box::new(Node48::from(ptr::read(node))); + self.pointer = Box::into_raw(new) as usize | node_type as usize; + }, + NodeType::Node48 => unsafe { + let node = node as *const dyn NodeOps as *const Node48; + let new = Box::new(Node256::from(ptr::read(node))); + self.pointer = Box::into_raw(new) as usize | node_type as usize; + }, + NodeType::Node256 => panic!("Node256 cannot be extended."), + } + } /// shrink node to smaller one only if necessary - fn shrink(&mut self) {} + fn shrink(&mut self) { + if self.deref().is_right() { + panic!("NodeV cannot be shrinked.") + } + + if !self.deref().left().unwrap().is_shrinkable() { + return; + } + + let node_type = self.node_type(); + let node = self.deref_mut().left().unwrap(); + + match node_type { + NodeType::Value => unreachable!(), + NodeType::Node4 => panic!("Node4 cannot be shrinked."), + NodeType::Node16 => unsafe { + let node = node as *const dyn NodeOps as *const Node16; + let new = Box::new(Node4::from(ptr::read(node))); + self.pointer = Box::into_raw(new) as usize | node_type as usize; + }, + NodeType::Node48 => unsafe { + let node = node as *const dyn NodeOps as *const Node48; + let new = Box::new(Node16::from(ptr::read(node))); + self.pointer = Box::into_raw(new) as usize | node_type as usize; + }, + NodeType::Node256 => unsafe { + let node = node as *const dyn NodeOps as *const Node256; + let new = Box::new(Node48::from(ptr::read(node))); + self.pointer = Box::into_raw(new) as usize | node_type as usize; + }, + } + } } struct NodeV { @@ -142,6 +205,27 @@ impl Default for Node4 { } } +impl From> for Node4 { + fn from(node: Node16) -> Self { + debug_assert!(node.len <= 4); + + let mut new = Self::default(); + new.header = node.header; + new.len = node.len; + + unsafe { + ptr::copy_nonoverlapping(node.keys.as_ptr(), new.keys.as_mut_ptr(), node.len as usize); + ptr::copy_nonoverlapping( + node.children.as_ptr(), + new.children.as_mut_ptr(), + node.len as usize, + ); + } + + new + } +} + impl Node4 { fn keys(&self) -> &[u8] { unsafe { self.keys.get_unchecked(..self.len as usize) } @@ -253,24 +337,10 @@ impl Default for Node16 { } } -impl Node16 { - fn keys(&self) -> &[u8] { - unsafe { self.keys.get_unchecked(..self.len as usize) } - } - - fn mut_keys(&mut self) -> &mut [u8] { - unsafe { self.keys.get_unchecked_mut(..self.len as usize) } - } - - fn children(&self) -> &[Node] { - unsafe { self.children.get_unchecked(..self.len as usize) } - } - - fn mut_children(&mut self) -> &mut [Node] { - unsafe { self.children.get_unchecked_mut(..self.len as usize) } - } - +impl From> for Node16 { fn from(node: Node4) -> Self { + debug_assert!(node.len == 4); + let mut new = Self::default(); new.header = node.header; new.len = node.len; @@ -288,6 +358,48 @@ impl Node16 { } } +impl From> for Node16 { + fn from(node: Node48) -> Self { + debug_assert!(node.len <= 16); + + let mut new = Self::default(); + new.header = node.header; + new.len = node.len; + + unsafe { + let mut i = 0; + for (key, index) in node.keys.iter().enumerate() { + if *index != 0xff { + *new.keys.get_unchecked_mut(i) = key as u8; + *new.children.get_unchecked_mut(i) = + ptr::read(node.children.get_unchecked(*index as usize)); + i += 1; + } + } + } + + new + } +} + +impl Node16 { + fn keys(&self) -> &[u8] { + unsafe { self.keys.get_unchecked(..self.len as usize) } + } + + fn mut_keys(&mut self) -> &mut [u8] { + unsafe { self.keys.get_unchecked_mut(..self.len as usize) } + } + + fn children(&self) -> &[Node] { + unsafe { self.children.get_unchecked(..self.len as usize) } + } + + fn mut_children(&mut self) -> &mut [Node] { + unsafe { self.children.get_unchecked_mut(..self.len as usize) } + } +} + impl NodeOps for Node16 { #[inline] fn is_full(&self) -> bool { @@ -330,31 +442,17 @@ impl Default for Node48 { Self { header: Default::default(), len: 0, - keys: mem::uninitialized(), - children: mem::uninitialized(), + keys: arr![0xff; 256], // the invalid index is 0xff + children: arr![Node::null(); 48], } } } } -impl Node48 { - fn keys(&self) -> &[u8] { - unsafe { self.keys.get_unchecked(..self.len as usize) } - } - - fn mut_keys(&mut self) -> &mut [u8] { - unsafe { self.keys.get_unchecked_mut(..self.len as usize) } - } - - fn children(&self) -> &[Node] { - unsafe { self.children.get_unchecked(..self.len as usize) } - } - - fn mut_children(&mut self) -> &mut [Node] { - unsafe { self.children.get_unchecked_mut(..self.len as usize) } - } - +impl From> for Node48 { fn from(node: Node16) -> Self { + debug_assert!(node.len == 16); + let mut new = Self::default(); unsafe { @@ -376,6 +474,48 @@ impl Node48 { } } +impl From> for Node48 { + fn from(node: Node256) -> Self { + debug_assert!(node.len <= 48); + + let mut new = Self::default(); + + unsafe { + // TODO: child is dropping? + for (key, child) in node.children.iter().enumerate() { + if !child.is_null() { + new.len += 1; + *new.keys.get_unchecked_mut(key) = (new.len - 1) as u8; + *new.children.get_unchecked_mut(new.len - 1) = ptr::read(child); + } + } + } + + new.header = node.header; + new.len = node.len; + + new + } +} + +impl Node48 { + fn keys(&self) -> &[u8] { + unsafe { self.keys.get_unchecked(..self.len as usize) } + } + + fn mut_keys(&mut self) -> &mut [u8] { + unsafe { self.keys.get_unchecked_mut(..self.len as usize) } + } + + fn children(&self) -> &[Node] { + unsafe { self.children.get_unchecked(..self.len as usize) } + } + + fn mut_children(&mut self) -> &mut [Node] { + unsafe { self.children.get_unchecked_mut(..self.len as usize) } + } +} + impl NodeOps for Node48 { #[inline] fn is_full(&self) -> bool { @@ -413,18 +553,18 @@ struct Node256 { impl Default for Node256 { #[allow(deprecated)] fn default() -> Self { - unsafe { - Self { - header: Default::default(), - len: 0, - children: mem::uninitialized(), - } + Self { + header: Default::default(), + len: 0, + children: arr![Node::null(); 256], } } } -impl Node256 { +impl From> for Node256 { fn from(node: Node48) -> Self { + debug_assert!(node.len == 48); + let mut new = Self::default(); unsafe { @@ -443,6 +583,8 @@ impl Node256 { } } +impl Node256 {} + impl NodeOps for Node256 { #[inline] fn is_full(&self) -> bool { @@ -471,8 +613,8 @@ impl NodeOps for Node256 { } } -trait Encodable { - fn encode(self) -> Vec; +pub trait Encodable { + fn encode(&self) -> Vec; } struct Cursor { @@ -487,7 +629,7 @@ pub struct ART { impl ART {} -impl SequentialMap for ART { +impl SequentialMap for ART { fn new() -> Self { todo!() } From da529ba08ac85de1af364ebb408662f866233cff Mon Sep 17 00:00:00 2001 From: Taewoo An Date: Sun, 21 Nov 2021 14:09:59 +0900 Subject: [PATCH 05/19] Complete implementing Nodes, not tested --- src/art/mod.rs | 210 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 161 insertions(+), 49 deletions(-) diff --git a/src/art/mod.rs b/src/art/mod.rs index ba49cd0..a4324c6 100644 --- a/src/art/mod.rs +++ b/src/art/mod.rs @@ -8,7 +8,10 @@ use std::{ use arr_macro::arr; use either::Either; -use crate::{map::SequentialMap, util::slice_insert}; +use crate::{ + map::SequentialMap, + util::{slice_insert, slice_remove}, +}; struct NodeHeader { len: u32, // the len of prefix @@ -256,19 +259,7 @@ impl NodeOps for Node4 { } fn insert(&mut self, key: u8, node: Node) -> Result<(), Node> { - // since the &mut self is the pointer of Node4, not the pointer of Node, - // simple extension like this is impossble. - // if self.len == 4 { - // unsafe { - // let pointer = self as *const Node4 as *mut Node; - // let extended = Node::new( - // Node16::from(ptr::read(pointer as *const Node4)), - // NodeType::Node16, - // ); - // *(pointer as *mut Node) = extended; - // return (*pointer).deref_mut().left().unwrap().insert(key, node); - // } - // } + debug_assert!(!self.is_full()); for (index, k) in self.keys().iter().enumerate() { match key.cmp(k) { @@ -312,7 +303,21 @@ impl NodeOps for Node4 { } fn remove(&mut self, key: u8) -> Result, ()> { - todo!() + debug_assert!(self.len != 0); + + for (index, k) in self.keys().iter().enumerate() { + match key.cmp(k) { + Ordering::Less => {} + Ordering::Equal => unsafe { + self.len -= 1; + let node = mem::replace(self.children.get_unchecked_mut(index), Node::null()); + return Ok(node); + }, + Ordering::Greater => {} + } + } + + Err(()) } } @@ -412,19 +417,65 @@ impl NodeOps for Node16 { } fn insert(&mut self, key: u8, node: Node) -> Result<(), Node> { - todo!() + debug_assert!(!self.is_full()); + + for (index, k) in self.keys().iter().enumerate() { + match key.cmp(k) { + Ordering::Less => unsafe { + self.len += 1; + slice_insert(self.mut_keys(), index, key); + slice_insert(self.mut_children(), index, node); + return Ok(()); + }, + Ordering::Equal => return Err(node), + Ordering::Greater => {} + } + } + + Err(node) } fn lookup(&self, key: u8) -> Option<&Node> { - todo!() + for (index, k) in self.keys().iter().enumerate() { + if key == *k { + return unsafe { Some(self.children.get_unchecked(index)) }; + } + } + + None } fn update(&mut self, key: u8, node: Node) -> Result, Node> { - todo!() + for (index, k) in self.keys().iter().enumerate() { + match key.cmp(k) { + Ordering::Less => {} + Ordering::Equal => unsafe { + let node = mem::replace(self.children.get_unchecked_mut(index), node); + return Ok(node); + }, + Ordering::Greater => {} + } + } + + Err(node) } fn remove(&mut self, key: u8) -> Result, ()> { - todo!() + debug_assert!(self.len != 0); + + for (index, k) in self.keys().iter().enumerate() { + match key.cmp(k) { + Ordering::Less => {} + Ordering::Equal => unsafe { + self.len -= 1; + let node = mem::replace(self.children.get_unchecked_mut(index), Node::null()); + return Ok(node); + }, + Ordering::Greater => {} + } + } + + Err(()) } } @@ -438,13 +489,11 @@ struct Node48 { impl Default for Node48 { #[allow(deprecated)] fn default() -> Self { - unsafe { - Self { - header: Default::default(), - len: 0, - keys: arr![0xff; 256], // the invalid index is 0xff - children: arr![Node::null(); 48], - } + Self { + header: Default::default(), + len: 0, + keys: arr![0xff; 256], // the invalid index is 0xff + children: arr![Node::null(); 48], } } } @@ -499,14 +548,6 @@ impl From> for Node48 { } impl Node48 { - fn keys(&self) -> &[u8] { - unsafe { self.keys.get_unchecked(..self.len as usize) } - } - - fn mut_keys(&mut self) -> &mut [u8] { - unsafe { self.keys.get_unchecked_mut(..self.len as usize) } - } - fn children(&self) -> &[Node] { unsafe { self.children.get_unchecked(..self.len as usize) } } @@ -528,19 +569,58 @@ impl NodeOps for Node48 { } fn insert(&mut self, key: u8, node: Node) -> Result<(), Node> { - todo!() + debug_assert!(!self.is_full()); + + let index = unsafe { self.keys.get_unchecked_mut(key as usize) }; + + if *index != 0xff { + Err(node) + } else { + unsafe { + *self.children.get_unchecked_mut(self.len) = node; + } + + *index = self.len as u8; + self.len += 1; + Ok(()) + } } fn lookup(&self, key: u8) -> Option<&Node> { - todo!() + let index = unsafe { self.keys.get_unchecked(key as usize) }; + + if *index == 0xff { + None + } else { + unsafe { Some(self.children.get_unchecked(*index as usize)) } + } } fn update(&mut self, key: u8, node: Node) -> Result, Node> { - todo!() + let index = unsafe { self.keys.get_unchecked_mut(key as usize) }; + + if *index == 0xff { + Err(node) + } else { + let child = unsafe { self.children.get_unchecked_mut(*index as usize) }; + let old = mem::replace(child, node); + Ok(old) + } } fn remove(&mut self, key: u8) -> Result, ()> { - todo!() + let index = unsafe { self.keys.get_unchecked(key as usize).clone() }; + + if index == 0xff { + Err(()) + } else { + unsafe { + let node = slice_remove(self.mut_children(), index as usize); + *self.keys.get_unchecked_mut(key as usize) = 0xff; + self.len -= 1; + Ok(node) + } + } } } @@ -568,11 +648,11 @@ impl From> for Node256 { let mut new = Self::default(); unsafe { - for key in node.keys() { - *new.children.get_unchecked_mut(*key as usize) = ptr::read( - node.children - .get_unchecked(*node.keys.get_unchecked(*key as usize) as usize), - ); + for (key, index) in node.keys.iter().enumerate() { + if *index != 0xff { + *new.children.get_unchecked_mut(key) = + ptr::read(node.children.get_unchecked(*index as usize)); + } } } @@ -597,19 +677,46 @@ impl NodeOps for Node256 { } fn insert(&mut self, key: u8, node: Node) -> Result<(), Node> { - todo!() + let child = unsafe { self.children.get_unchecked_mut(key as usize) }; + + if child.is_null() { + *child = node; + Ok(()) + } else { + Err(node) + } } fn lookup(&self, key: u8) -> Option<&Node> { - todo!() + let child = unsafe { self.children.get_unchecked(key as usize) }; + + if child.is_null() { + None + } else { + Some(child) + } } fn update(&mut self, key: u8, node: Node) -> Result, Node> { - todo!() + let child = unsafe { self.children.get_unchecked_mut(key as usize) }; + + if child.is_null() { + Err(node) + } else { + let old = mem::replace(child, node); + Ok(old) + } } fn remove(&mut self, key: u8) -> Result, ()> { - todo!() + let child = unsafe { self.children.get_unchecked_mut(key as usize) }; + + if child.is_null() { + Err(()) + } else { + let node = mem::replace(child, Node::null()); + Ok(node) + } } } @@ -623,7 +730,7 @@ struct Cursor { } pub struct ART { - root: NonNull>, + root: Node, _marker: PhantomData, } @@ -631,7 +738,12 @@ impl ART {} impl SequentialMap for ART { fn new() -> Self { - todo!() + let root = Node::new(Node256::default(), NodeType::Node256); + + Self { + root, + _marker: PhantomData, + } } fn insert(&mut self, key: &K, value: V) -> Result<(), V> { From 93ebe34e5463ad2944610503c36ed0ec0269b19a Mon Sep 17 00:00:00 2001 From: Taewoo An Date: Sun, 21 Nov 2021 17:53:34 +0900 Subject: [PATCH 06/19] Add Debug foramt, start to debug the implementation --- src/art/mod.rs | 367 +++++++++++++++++++++++++++++++++++++++++++++-- src/util/mod.rs | 11 ++ tests/art/mod.rs | 12 ++ tests/tests.rs | 1 + 4 files changed, 379 insertions(+), 12 deletions(-) create mode 100644 tests/art/mod.rs diff --git a/src/art/mod.rs b/src/art/mod.rs index a4324c6..0f5537d 100644 --- a/src/art/mod.rs +++ b/src/art/mod.rs @@ -7,15 +7,19 @@ use std::{ use arr_macro::arr; use either::Either; +use std::fmt::Debug; use crate::{ + left_or, map::SequentialMap, util::{slice_insert, slice_remove}, }; +const PREFIX_LEN: usize = 12; +#[derive(Debug)] struct NodeHeader { - len: u32, // the len of prefix - prefix: [u8; 12], // prefix for path compression + len: u32, // the len of prefix + prefix: [u8; PREFIX_LEN], // prefix for path compression } impl Default for NodeHeader { @@ -43,10 +47,14 @@ enum NodeType { } trait NodeOps { + fn header(&self) -> &NodeHeader; + fn header_mut(&mut self) -> &mut NodeHeader; fn is_full(&self) -> bool; fn is_shrinkable(&self) -> bool; + fn get_any_child(&self) -> Option>; fn insert(&mut self, key: u8, node: Node) -> Result<(), Node>; fn lookup(&self, key: u8) -> Option<&Node>; + fn lookup_mut(&mut self, key: u8) -> Option<&mut Node>; fn update(&mut self, key: u8, node: Node) -> Result, Node>; fn remove(&mut self, key: u8) -> Result, ()>; } @@ -57,6 +65,23 @@ struct Node { _marker: PhantomData>, } +impl Debug for Node { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + unsafe { + let pointer = self.pointer & !NODETYPE_MASK; + let tag = mem::transmute(self.pointer & NODETYPE_MASK); + + match tag { + NodeType::Value => (&*(pointer as *const NodeV)).fmt(f), + NodeType::Node4 => (&*(pointer as *const Node4)).fmt(f), + NodeType::Node16 => (&*(pointer as *const Node16)).fmt(f), + NodeType::Node48 => (&*(pointer as *const Node48)).fmt(f), + NodeType::Node256 => (&*(pointer as *const Node256)).fmt(f), + } + } + } +} + impl Node { fn deref(&self) -> Either<&dyn NodeOps, &NodeV> { unsafe { @@ -73,7 +98,7 @@ impl Node { } } - fn deref_mut(&mut self) -> Either<&mut dyn NodeOps, &mut NodeV> { + fn deref_mut(&self) -> Either<&mut dyn NodeOps, &mut NodeV> { unsafe { let pointer = self.pointer & !NODETYPE_MASK; let tag = mem::transmute(self.pointer & NODETYPE_MASK); @@ -88,7 +113,7 @@ impl Node { } } - fn new(node: impl NodeOps, node_type: NodeType) -> Self { + fn new(node: T, node_type: NodeType) -> Self { let node = Box::into_raw(Box::new(node)); Self { @@ -116,7 +141,7 @@ impl Node { /// extend node to bigger one only if necessary fn extend(&mut self) { if self.deref().is_right() { - panic!("NodeV cannot be extended.") + return; } if !self.deref().left().unwrap().is_full() { @@ -150,7 +175,7 @@ impl Node { /// shrink node to smaller one only if necessary fn shrink(&mut self) { if self.deref().is_right() { - panic!("NodeV cannot be shrinked.") + return; } if !self.deref().left().unwrap().is_shrinkable() { @@ -180,6 +205,40 @@ impl Node { }, } } + + /// compare the keys from depth to header.len + fn prefix_match(keys: &[u8], node: &dyn NodeOps, depth: usize) -> Result<(), usize> { + let header = node.header(); + + for (index, prefix) in unsafe { + header + .prefix + .get_unchecked(..header.len as usize) + .iter() + .enumerate() + } { + if keys[depth + index] != *prefix { + return Err(depth + index); + } + } + + if header.len > PREFIX_LEN as u32 { + // check strictly by using leaf node + let any_child = node.get_any_child().unwrap(); + + let mut depth = depth + PREFIX_LEN; + + while depth < depth + header.len as usize { + if keys[depth] != any_child.key[depth] { + return Err(depth); + } + + depth += 1; + } + } + + Ok(()) + } } struct NodeV { @@ -187,6 +246,24 @@ struct NodeV { value: V, } +impl Debug for NodeV { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("NodeV") + .field("key", &self.key) + .field("value", &self.value) + .finish() + } +} + +impl NodeV { + fn new(key: Vec, value: V) -> Self { + Self { + key: key.into(), + value, + } + } +} + struct Node4 { header: NodeHeader, len: usize, @@ -194,6 +271,17 @@ struct Node4 { children: [Node; 4], } +impl Debug for Node4 { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Node4") + .field("header", &self.header) + .field("len", &self.len) + .field("keys", &self.keys()) + .field("children", &self.children()) + .finish() + } +} + impl Default for Node4 { #[allow(deprecated)] fn default() -> Self { @@ -248,6 +336,16 @@ impl Node4 { } impl NodeOps for Node4 { + #[inline] + fn header(&self) -> &NodeHeader { + &self.header + } + + #[inline] + fn header_mut(&mut self) -> &mut NodeHeader { + &mut self.header + } + #[inline] fn is_full(&self) -> bool { self.len == 4 @@ -258,6 +356,10 @@ impl NodeOps for Node4 { false } + fn get_any_child(&self) -> Option> { + todo!() + } + fn insert(&mut self, key: u8, node: Node) -> Result<(), Node> { debug_assert!(!self.is_full()); @@ -287,6 +389,16 @@ impl NodeOps for Node4 { None } + fn lookup_mut(&mut self, key: u8) -> Option<&mut Node> { + for (index, k) in self.keys().iter().enumerate() { + if key == *k { + return unsafe { Some(self.children.get_unchecked_mut(index)) }; + } + } + + None + } + fn update(&mut self, key: u8, node: Node) -> Result, Node> { for (index, k) in self.keys().iter().enumerate() { match key.cmp(k) { @@ -328,6 +440,17 @@ struct Node16 { children: [Node; 16], } +impl Debug for Node16 { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Node16") + .field("header", &self.header) + .field("len", &self.len) + .field("keys", &self.keys()) + .field("children", &self.children()) + .finish() + } +} + impl Default for Node16 { #[allow(deprecated)] fn default() -> Self { @@ -406,6 +529,16 @@ impl Node16 { } impl NodeOps for Node16 { + #[inline] + fn header(&self) -> &NodeHeader { + &self.header + } + + #[inline] + fn header_mut(&mut self) -> &mut NodeHeader { + &mut self.header + } + #[inline] fn is_full(&self) -> bool { self.len == 16 @@ -416,6 +549,10 @@ impl NodeOps for Node16 { self.len <= 4 } + fn get_any_child(&self) -> Option> { + todo!() + } + fn insert(&mut self, key: u8, node: Node) -> Result<(), Node> { debug_assert!(!self.is_full()); @@ -445,6 +582,16 @@ impl NodeOps for Node16 { None } + fn lookup_mut(&mut self, key: u8) -> Option<&mut Node> { + for (index, k) in self.keys().iter().enumerate() { + if key == *k { + return unsafe { Some(self.children.get_unchecked_mut(index)) }; + } + } + + None + } + fn update(&mut self, key: u8, node: Node) -> Result, Node> { for (index, k) in self.keys().iter().enumerate() { match key.cmp(k) { @@ -478,7 +625,6 @@ impl NodeOps for Node16 { Err(()) } } - struct Node48 { header: NodeHeader, len: usize, @@ -486,6 +632,25 @@ struct Node48 { children: [Node; 48], } +impl Debug for Node48 { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let valid_keys = self + .keys + .iter() + .enumerate() + .filter(|(_, index)| **index != 0xff) + .map(|(key, _)| key) + .collect::>(); + + f.debug_struct("Node48") + .field("header", &self.header) + .field("len", &self.len) + .field("keys", &valid_keys) + .field("children", &self.children()) + .finish() + } +} + impl Default for Node48 { #[allow(deprecated)] fn default() -> Self { @@ -558,6 +723,16 @@ impl Node48 { } impl NodeOps for Node48 { + #[inline] + fn header(&self) -> &NodeHeader { + &self.header + } + + #[inline] + fn header_mut(&mut self) -> &mut NodeHeader { + &mut self.header + } + #[inline] fn is_full(&self) -> bool { self.len == 48 @@ -568,6 +743,10 @@ impl NodeOps for Node48 { self.len <= 16 } + fn get_any_child(&self) -> Option> { + todo!() + } + fn insert(&mut self, key: u8, node: Node) -> Result<(), Node> { debug_assert!(!self.is_full()); @@ -596,6 +775,16 @@ impl NodeOps for Node48 { } } + fn lookup_mut(&mut self, key: u8) -> Option<&mut Node> { + let index = unsafe { self.keys.get_unchecked(key as usize) }; + + if *index == 0xff { + None + } else { + unsafe { Some(self.children.get_unchecked_mut(*index as usize)) } + } + } + fn update(&mut self, key: u8, node: Node) -> Result, Node> { let index = unsafe { self.keys.get_unchecked_mut(key as usize) }; @@ -630,6 +819,23 @@ struct Node256 { children: [Node; 256], } +impl Debug for Node256 { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let valid_children = self + .children + .iter() + .enumerate() + .filter(|(_, child)| !child.is_null()) + .collect::>(); + + f.debug_struct("Node256") + .field("header", &self.header) + .field("len", &self.len) + .field("children", &valid_children) + .finish() + } +} + impl Default for Node256 { #[allow(deprecated)] fn default() -> Self { @@ -663,9 +869,17 @@ impl From> for Node256 { } } -impl Node256 {} - impl NodeOps for Node256 { + #[inline] + fn header(&self) -> &NodeHeader { + &self.header + } + + #[inline] + fn header_mut(&mut self) -> &mut NodeHeader { + &mut self.header + } + #[inline] fn is_full(&self) -> bool { self.len == 256 @@ -676,6 +890,10 @@ impl NodeOps for Node256 { self.len <= 48 } + fn get_any_child(&self) -> Option> { + todo!() + } + fn insert(&mut self, key: u8, node: Node) -> Result<(), Node> { let child = unsafe { self.children.get_unchecked_mut(key as usize) }; @@ -697,6 +915,16 @@ impl NodeOps for Node256 { } } + fn lookup_mut(&mut self, key: u8) -> Option<&mut Node> { + let child = unsafe { self.children.get_unchecked_mut(key as usize) }; + + if child.is_null() { + None + } else { + Some(child) + } + } + fn update(&mut self, key: u8, node: Node) -> Result, Node> { let child = unsafe { self.children.get_unchecked_mut(key as usize) }; @@ -724,6 +952,12 @@ pub trait Encodable { fn encode(&self) -> Vec; } +impl Encodable for String { + fn encode(&self) -> Vec { + self.clone().into_bytes() + } +} + struct Cursor { parent: Option>>, current: NonNull>, @@ -734,11 +968,17 @@ pub struct ART { _marker: PhantomData, } +impl Debug for ART { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ART").field("root", &self.root).finish() + } +} + impl ART {} impl SequentialMap for ART { fn new() -> Self { - let root = Node::new(Node256::default(), NodeType::Node256); + let root = Node::new(Node256::::default(), NodeType::Node256); Self { root, @@ -747,11 +987,114 @@ impl SequentialMap for ART { } fn insert(&mut self, key: &K, value: V) -> Result<(), V> { - todo!() + let keys = key.encode(); + let mut depth = 0; + let mut prefix_len: u32 = 0; + let mut parent = None; + let mut current = NonNull::new(&mut self.root).unwrap(); + + while depth < keys.len() { + let current_ref = unsafe { current.as_mut() }; + let node = left_or!(current_ref.deref_mut(), break); + + if let Err(common_depth) = Node::prefix_match(&keys, node, depth) { + prefix_len = (common_depth - depth) as u32; + break; + } + + let prefix = node.header().len; + + if let Some(node) = node.lookup_mut(keys[depth]) { + depth += 1 + prefix as usize; + parent = Some(current); + current = NonNull::new(node).unwrap(); + } else { + prefix_len = prefix; + break; + } + } + + let current_ref = unsafe { current.as_mut() }; + current_ref.extend(); + + match current_ref.deref_mut() { + Either::Left(node) => { + let key = keys[depth]; + let new = NodeV::new(keys.clone(), value); + + if prefix_len == node.header().len { + // just insert value into this node + let insert = node.insert(key, Node::new(new, NodeType::Value)); + debug_assert!(insert.is_ok()); + } else { + // split prefix + let mut inter_node = Node4::::default(); + inter_node + .header + .prefix + .clone_from_slice(&keys[depth..(depth + prefix_len as usize)]); + inter_node.header.len = prefix_len; + + let mut inter_node_ptr = NonNull::new(&mut inter_node).unwrap(); + + // re-set the old's prefix + let header = node.header_mut(); + let prefix = header.prefix.clone(); + unsafe { + ptr::copy_nonoverlapping( + prefix.as_ptr(), + header.prefix.as_mut_ptr(), + (header.len - prefix_len) as usize, + ) + }; + header.len = header.len - prefix_len; + + let old = unsafe { + mem::replace(current.as_mut(), Node::new(inter_node, NodeType::Node4)) + }; + + let inter_node_ptr = unsafe { inter_node_ptr.as_mut() }; + let insert_old = inter_node_ptr + .insert(node.header().prefix[depth + prefix_len as usize], old); + debug_assert!(insert_old.is_ok()); + let insert_new = inter_node_ptr.insert(key, Node::new(new, NodeType::Value)); + debug_assert!(insert_new.is_ok()); + } + + Ok(()) + } + Either::Right(_) => Err(value), + } } fn lookup(&self, key: &K) -> Option<&V> { - todo!() + let keys = key.encode(); + let mut depth = 0; + + let mut current = &self.root; + + while depth < keys.len() { + let node = left_or!(current.deref(), return None); + depth += node.header().len as usize; + + if let Some(node) = node.lookup(keys[depth]) { + depth += 1; + current = node; + } else { + return None; + } + } + + match current.deref() { + Either::Left(_) => None, + Either::Right(nodev) => { + if *nodev.key == keys { + Some(&nodev.value) + } else { + None + } + } + } } fn remove(&mut self, key: &K) -> Result { diff --git a/src/util/mod.rs b/src/util/mod.rs index 100a442..4bb9d59 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -1,4 +1,5 @@ use std::ptr; +use either::Either; pub mod random; @@ -52,3 +53,13 @@ macro_rules! some_or { } }}; } + +#[macro_export] +macro_rules! left_or { + ($e:expr, $err:expr) => {{ + match $e { + Either::Left(l) => l, + Either::Right(_) => $err, + } + }}; +} diff --git a/tests/art/mod.rs b/tests/art/mod.rs new file mode 100644 index 0000000..404d273 --- /dev/null +++ b/tests/art/mod.rs @@ -0,0 +1,12 @@ +use cds::art::ART; +use cds::map::SequentialMap; + +#[test] +fn test_art() { + let mut art: ART = ART::new(); + + assert_eq!(art.insert(&"a".to_string(), 1), Ok(())); + assert_eq!(art.insert(&"aa".to_string(), 1), Ok(())); + + println!("{:?}", art); +} diff --git a/tests/tests.rs b/tests/tests.rs index a6d01cc..5d30181 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -7,6 +7,7 @@ use rand::{prelude::SliceRandom, thread_rng, Rng}; mod avltree; mod btree; +mod art; mod linkedlist; mod queue; mod stack; From 2071c87da431ac605b6bd1ba5b4b6fa3c686bc8c Mon Sep 17 00:00:00 2001 From: Taewoo An Date: Sun, 21 Nov 2021 19:47:39 +0900 Subject: [PATCH 07/19] Finish simple test on insert, lookup --- src/art/mod.rs | 137 ++++++++++++++++++++++++++++++++++------------- tests/art/mod.rs | 11 +++- 2 files changed, 108 insertions(+), 40 deletions(-) diff --git a/src/art/mod.rs b/src/art/mod.rs index 0f5537d..f3f5691 100644 --- a/src/art/mod.rs +++ b/src/art/mod.rs @@ -2,6 +2,7 @@ use std::{ cmp::Ordering, marker::PhantomData, mem, + ops::Add, ptr::{self, NonNull}, }; @@ -16,12 +17,23 @@ use crate::{ }; const PREFIX_LEN: usize = 12; -#[derive(Debug)] +const KEY_ENDMARK: u8 = 0xff; struct NodeHeader { len: u32, // the len of prefix prefix: [u8; PREFIX_LEN], // prefix for path compression } +impl Debug for NodeHeader { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + unsafe { + f.debug_struct("NodeHeader") + .field("len", &self.len) + .field("prefix", &self.prefix.get_unchecked(..self.len as usize)) + .finish() + } + } +} + impl Default for NodeHeader { #[allow(deprecated)] fn default() -> Self { @@ -156,19 +168,19 @@ impl Node { NodeType::Node4 => unsafe { let node = node as *const dyn NodeOps as *const Node4; let new = Box::new(Node16::from(ptr::read(node))); - self.pointer = Box::into_raw(new) as usize | node_type as usize; + self.pointer = Box::into_raw(new) as usize | NodeType::Node16 as usize; }, NodeType::Node16 => unsafe { let node = node as *const dyn NodeOps as *const Node16; let new = Box::new(Node48::from(ptr::read(node))); - self.pointer = Box::into_raw(new) as usize | node_type as usize; + self.pointer = Box::into_raw(new) as usize | NodeType::Node48 as usize; }, NodeType::Node48 => unsafe { let node = node as *const dyn NodeOps as *const Node48; let new = Box::new(Node256::from(ptr::read(node))); - self.pointer = Box::into_raw(new) as usize | node_type as usize; + self.pointer = Box::into_raw(new) as usize | NodeType::Node256 as usize; }, - NodeType::Node256 => panic!("Node256 cannot be extended."), + NodeType::Node256 => {} } } @@ -187,7 +199,7 @@ impl Node { match node_type { NodeType::Value => unreachable!(), - NodeType::Node4 => panic!("Node4 cannot be shrinked."), + NodeType::Node4 => {} NodeType::Node16 => unsafe { let node = node as *const dyn NodeOps as *const Node16; let new = Box::new(Node4::from(ptr::read(node))); @@ -376,7 +388,14 @@ impl NodeOps for Node4 { } } - Err(node) + let index = self.len; + unsafe { + self.len += 1; + slice_insert(self.mut_keys(), index, key); + slice_insert(self.mut_children(), index, node); + } + + Ok(()) } fn lookup(&self, key: u8) -> Option<&Node> { @@ -569,7 +588,14 @@ impl NodeOps for Node16 { } } - Err(node) + let index = self.len; + unsafe { + self.len += 1; + slice_insert(self.mut_keys(), index, key); + slice_insert(self.mut_children(), index, node); + } + + Ok(()) } fn lookup(&self, key: u8) -> Option<&Node> { @@ -954,15 +980,12 @@ pub trait Encodable { impl Encodable for String { fn encode(&self) -> Vec { - self.clone().into_bytes() + let mut array = self.clone().into_bytes(); + array.push(KEY_ENDMARK); // prevent to certain string cannot be the prefix of another string + array } } -struct Cursor { - parent: Option>>, - current: NonNull>, -} - pub struct ART { root: Node, _marker: PhantomData, @@ -976,7 +999,7 @@ impl Debug for ART { impl ART {} -impl SequentialMap for ART { +impl SequentialMap for ART { fn new() -> Self { let root = Node::new(Node256::::default(), NodeType::Node256); @@ -989,8 +1012,7 @@ impl SequentialMap for ART { fn insert(&mut self, key: &K, value: V) -> Result<(), V> { let keys = key.encode(); let mut depth = 0; - let mut prefix_len: u32 = 0; - let mut parent = None; + let mut common_prefix: u32 = 0; let mut current = NonNull::new(&mut self.root).unwrap(); while depth < keys.len() { @@ -998,7 +1020,7 @@ impl SequentialMap for ART { let node = left_or!(current_ref.deref_mut(), break); if let Err(common_depth) = Node::prefix_match(&keys, node, depth) { - prefix_len = (common_depth - depth) as u32; + common_prefix = (common_depth - depth) as u32; break; } @@ -1006,10 +1028,9 @@ impl SequentialMap for ART { if let Some(node) = node.lookup_mut(keys[depth]) { depth += 1 + prefix as usize; - parent = Some(current); current = NonNull::new(node).unwrap(); } else { - prefix_len = prefix; + common_prefix = prefix; break; } } @@ -1022,20 +1043,23 @@ impl SequentialMap for ART { let key = keys[depth]; let new = NodeV::new(keys.clone(), value); - if prefix_len == node.header().len { + if common_prefix == node.header().len { // just insert value into this node let insert = node.insert(key, Node::new(new, NodeType::Value)); debug_assert!(insert.is_ok()); } else { // split prefix let mut inter_node = Node4::::default(); - inter_node - .header - .prefix - .clone_from_slice(&keys[depth..(depth + prefix_len as usize)]); - inter_node.header.len = prefix_len; - let mut inter_node_ptr = NonNull::new(&mut inter_node).unwrap(); + unsafe { + ptr::copy_nonoverlapping( + keys.as_ptr().add(depth), + inter_node.header.prefix.as_mut_ptr(), + common_prefix as usize, + ); + } + + inter_node.header.len = common_prefix; // re-set the old's prefix let header = node.header_mut(); @@ -1044,26 +1068,63 @@ impl SequentialMap for ART { ptr::copy_nonoverlapping( prefix.as_ptr(), header.prefix.as_mut_ptr(), - (header.len - prefix_len) as usize, + (header.len - common_prefix) as usize, ) }; - header.len = header.len - prefix_len; + header.len = header.len - common_prefix; - let old = unsafe { - mem::replace(current.as_mut(), Node::new(inter_node, NodeType::Node4)) - }; + let current = unsafe { current.as_mut() }; + let old = mem::replace(current, Node::new(inter_node, NodeType::Node4)); + let current = current.deref_mut().left().unwrap(); - let inter_node_ptr = unsafe { inter_node_ptr.as_mut() }; - let insert_old = inter_node_ptr - .insert(node.header().prefix[depth + prefix_len as usize], old); + let insert_old = + current.insert(node.header().prefix[depth + common_prefix as usize], old); debug_assert!(insert_old.is_ok()); - let insert_new = inter_node_ptr.insert(key, Node::new(new, NodeType::Value)); + let insert_new = current.insert(key, Node::new(new, NodeType::Value)); debug_assert!(insert_new.is_ok()); } Ok(()) } - Either::Right(_) => Err(value), + Either::Right(nodev) => { + if depth == keys.len() { + return Err(value); + } + + let new = NodeV::new(keys.clone(), value); + + // insert inter node with zero prefix + // ex) 'aE', 'aaE' + let key = keys[depth]; + + let mut common_prefix = 0; + + while keys[depth + common_prefix] == nodev.key[depth + common_prefix] { + common_prefix += 1; + } + + let mut inter_node = Node4::::default(); + unsafe { + ptr::copy_nonoverlapping( + keys.as_ptr().add(depth), + inter_node.header.prefix.as_mut_ptr(), + common_prefix, + ); + } + inter_node.header.len = common_prefix as u32; + + let current = unsafe { current.as_mut() }; + let old = mem::replace(current, Node::new(inter_node, NodeType::Node4)); + + let current = current.deref_mut().left().unwrap(); + + let insert_old = current.insert(nodev.key[depth], old); + debug_assert!(insert_old.is_ok()); + let insert_new = current.insert(key, Node::new(new, NodeType::Value)); + debug_assert!(insert_new.is_ok()); + + Ok(()) + } } } @@ -1074,7 +1135,7 @@ impl SequentialMap for ART { let mut current = &self.root; while depth < keys.len() { - let node = left_or!(current.deref(), return None); + let node = left_or!(current.deref(), break); depth += node.header().len as usize; if let Some(node) = node.lookup(keys[depth]) { diff --git a/tests/art/mod.rs b/tests/art/mod.rs index 404d273..b0179f4 100644 --- a/tests/art/mod.rs +++ b/tests/art/mod.rs @@ -6,7 +6,14 @@ fn test_art() { let mut art: ART = ART::new(); assert_eq!(art.insert(&"a".to_string(), 1), Ok(())); - assert_eq!(art.insert(&"aa".to_string(), 1), Ok(())); + assert_eq!(art.insert(&"ab".to_string(), 2), Ok(())); + assert_eq!(art.insert(&"ac".to_string(), 3), Ok(())); + assert_eq!(art.insert(&"ad".to_string(), 4), Ok(())); + assert_eq!(art.insert(&"acb".to_string(), 5), Ok(())); - println!("{:?}", art); + assert_eq!(art.lookup(&"a".to_string()), Some(&1)); + assert_eq!(art.lookup(&"ab".to_string()),Some(&2)); + assert_eq!(art.lookup(&"ac".to_string()),Some(&3)); + assert_eq!(art.lookup(&"ad".to_string()),Some(&4)); + assert_eq!(art.lookup(&"acb".to_string()),Some(&5)); } From 7c69e51d128b19c0608802f66e14243f5b4f48b0 Mon Sep 17 00:00:00 2001 From: Taewoo An Date: Mon, 22 Nov 2021 23:35:50 +0900 Subject: [PATCH 08/19] Complete ART on small test case --- src/art/mod.rs | 119 +++++++++++++++++++++++++++++++++++++---------- tests/art/mod.rs | 14 ++++-- 2 files changed, 104 insertions(+), 29 deletions(-) diff --git a/src/art/mod.rs b/src/art/mod.rs index f3f5691..6de7959 100644 --- a/src/art/mod.rs +++ b/src/art/mod.rs @@ -2,7 +2,6 @@ use std::{ cmp::Ordering, marker::PhantomData, mem, - ops::Add, ptr::{self, NonNull}, }; @@ -50,6 +49,7 @@ impl Default for NodeHeader { /// This is used for bitflag on child pointer. const NODETYPE_MASK: usize = 0b111; #[repr(usize)] +#[derive(Debug, PartialEq)] enum NodeType { Value = 0b000, Node4 = 0b001, @@ -61,6 +61,7 @@ enum NodeType { trait NodeOps { fn header(&self) -> &NodeHeader; fn header_mut(&mut self) -> &mut NodeHeader; + fn is_empty(&self) -> bool; fn is_full(&self) -> bool; fn is_shrinkable(&self) -> bool; fn get_any_child(&self) -> Option>; @@ -125,6 +126,16 @@ impl Node { } } + fn inner(self) -> Box { + // TODO: how to improve this function safely(self.node_type() == T::node_type()) + unsafe { + let pointer = self.pointer & !NODETYPE_MASK; + // let tag = mem::transmute(self.pointer & NODETYPE_MASK); + + Box::from_raw(pointer as *mut T) + } + } + fn new(node: T, node_type: NodeType) -> Self { let node = Box::into_raw(Box::new(node)); @@ -238,14 +249,14 @@ impl Node { // check strictly by using leaf node let any_child = node.get_any_child().unwrap(); - let mut depth = depth + PREFIX_LEN; + let mut d = depth + PREFIX_LEN; - while depth < depth + header.len as usize { - if keys[depth] != any_child.key[depth] { - return Err(depth); + while d < depth + header.len as usize { + if keys[d] != any_child.key[d] { + return Err(d); } - depth += 1; + d += 1; } } @@ -348,22 +359,22 @@ impl Node4 { } impl NodeOps for Node4 { - #[inline] fn header(&self) -> &NodeHeader { &self.header } - #[inline] fn header_mut(&mut self) -> &mut NodeHeader { &mut self.header } - #[inline] + fn is_empty(&self) -> bool { + self.len == 0 + } + fn is_full(&self) -> bool { self.len == 4 } - #[inline] fn is_shrinkable(&self) -> bool { false } @@ -440,8 +451,9 @@ impl NodeOps for Node4 { match key.cmp(k) { Ordering::Less => {} Ordering::Equal => unsafe { + let _ = slice_remove(self.mut_keys(), index); + let node = slice_remove(self.mut_children(), index); self.len -= 1; - let node = mem::replace(self.children.get_unchecked_mut(index), Node::null()); return Ok(node); }, Ordering::Greater => {} @@ -548,22 +560,22 @@ impl Node16 { } impl NodeOps for Node16 { - #[inline] + fn is_empty(&self) -> bool { + self.len == 0 + } + fn header(&self) -> &NodeHeader { &self.header } - #[inline] fn header_mut(&mut self) -> &mut NodeHeader { &mut self.header } - #[inline] fn is_full(&self) -> bool { self.len == 16 } - #[inline] fn is_shrinkable(&self) -> bool { self.len <= 4 } @@ -640,8 +652,9 @@ impl NodeOps for Node16 { match key.cmp(k) { Ordering::Less => {} Ordering::Equal => unsafe { + let _ = slice_remove(self.mut_keys(), index); + let node = slice_remove(self.mut_children(), index); self.len -= 1; - let node = mem::replace(self.children.get_unchecked_mut(index), Node::null()); return Ok(node); }, Ordering::Greater => {} @@ -749,22 +762,22 @@ impl Node48 { } impl NodeOps for Node48 { - #[inline] + fn is_empty(&self) -> bool { + self.len == 0 + } + fn header(&self) -> &NodeHeader { &self.header } - #[inline] fn header_mut(&mut self) -> &mut NodeHeader { &mut self.header } - #[inline] fn is_full(&self) -> bool { self.len == 48 } - #[inline] fn is_shrinkable(&self) -> bool { self.len <= 16 } @@ -896,22 +909,22 @@ impl From> for Node256 { } impl NodeOps for Node256 { - #[inline] + fn is_empty(&self) -> bool { + self.len == 0 + } + fn header(&self) -> &NodeHeader { &self.header } - #[inline] fn header_mut(&mut self) -> &mut NodeHeader { &mut self.header } - #[inline] fn is_full(&self) -> bool { self.len == 256 } - #[inline] fn is_shrinkable(&self) -> bool { self.len <= 48 } @@ -1159,6 +1172,62 @@ impl SequentialMap for ART { } fn remove(&mut self, key: &K) -> Result { - todo!() + let keys = key.encode(); + let mut depth = 0; + + let mut parent = None; + let mut current = NonNull::new(&mut self.root).unwrap(); + + while depth < keys.len() { + let current_ref = unsafe { current.as_mut() }; + let node = current_ref.deref_mut().unwrap_left(); + depth += node.header().len as usize; + + if let Some(node) = node.lookup_mut(keys[depth]) { + // println!("{:?}, key: {}", node, keys[depth]); + + if node.node_type() == NodeType::Value { + if *node.deref().right().unwrap().key == keys { + break; + } else { + return Err(()); + } + } + + depth += 1; + parent = Some(current); + current = NonNull::new(node).unwrap(); + } else { + return Err(()); + } + } + + let current = unsafe { current.as_mut() }; + let current_ref = current.deref_mut().left().unwrap(); + let node = current_ref.remove(keys[depth]); + debug_assert!(node.is_ok()); + let node = node.unwrap().inner::>(); + + if let Some(mut parent) = parent { + if current_ref.is_empty() { + // remove the node + // println!("empty"); + let parent = unsafe { parent.as_mut() }; + let parent_ref = parent.deref_mut().left().unwrap(); + + let remove = parent_ref.remove(keys[depth - current_ref.header().len as usize - 1]); + debug_assert!(remove.is_ok()); + let remove = remove.unwrap(); + debug_assert!(remove.deref().left().unwrap().is_empty()); + debug_assert_eq!(remove.node_type(), NodeType::Node4); + remove.inner::>(); + } else if current_ref.is_shrinkable() { + // shrink the node + // println!("shrinkable"); + current.shrink(); + } + } + + Ok(node.value) } } diff --git a/tests/art/mod.rs b/tests/art/mod.rs index b0179f4..23359cf 100644 --- a/tests/art/mod.rs +++ b/tests/art/mod.rs @@ -12,8 +12,14 @@ fn test_art() { assert_eq!(art.insert(&"acb".to_string(), 5), Ok(())); assert_eq!(art.lookup(&"a".to_string()), Some(&1)); - assert_eq!(art.lookup(&"ab".to_string()),Some(&2)); - assert_eq!(art.lookup(&"ac".to_string()),Some(&3)); - assert_eq!(art.lookup(&"ad".to_string()),Some(&4)); - assert_eq!(art.lookup(&"acb".to_string()),Some(&5)); + assert_eq!(art.lookup(&"ab".to_string()), Some(&2)); + assert_eq!(art.lookup(&"ac".to_string()), Some(&3)); + assert_eq!(art.lookup(&"ad".to_string()), Some(&4)); + assert_eq!(art.lookup(&"acb".to_string()), Some(&5)); + + assert_eq!(art.remove(&"a".to_string()), Ok(1)); + assert_eq!(art.remove(&"ab".to_string()), Ok(2)); + assert_eq!(art.remove(&"ac".to_string()), Ok(3)); + assert_eq!(art.remove(&"ad".to_string()), Ok(4)); + assert_eq!(art.remove(&"acb".to_string()), Ok(5)); } From d6e8acb3f7cdfcf3be6d2044bf0adf2799dad59e Mon Sep 17 00:00:00 2001 From: Taewoo An Date: Tue, 23 Nov 2021 00:09:32 +0900 Subject: [PATCH 09/19] Complete on large key test, but need to impl path compression on removal --- src/art/mod.rs | 66 ++++++++++++++++++++++++++++++------------------ tests/art/mod.rs | 24 ++++++++++++++++++ 2 files changed, 65 insertions(+), 25 deletions(-) diff --git a/src/art/mod.rs b/src/art/mod.rs index 6de7959..4f80d4a 100644 --- a/src/art/mod.rs +++ b/src/art/mod.rs @@ -1,5 +1,5 @@ use std::{ - cmp::Ordering, + cmp::{min, Ordering}, marker::PhantomData, mem, ptr::{self, NonNull}, @@ -61,7 +61,7 @@ enum NodeType { trait NodeOps { fn header(&self) -> &NodeHeader; fn header_mut(&mut self) -> &mut NodeHeader; - fn is_empty(&self) -> bool; + fn size(&self) -> usize; fn is_full(&self) -> bool; fn is_shrinkable(&self) -> bool; fn get_any_child(&self) -> Option>; @@ -367,8 +367,8 @@ impl NodeOps for Node4 { &mut self.header } - fn is_empty(&self) -> bool { - self.len == 0 + fn size(&self) -> usize { + self.len } fn is_full(&self) -> bool { @@ -560,10 +560,6 @@ impl Node16 { } impl NodeOps for Node16 { - fn is_empty(&self) -> bool { - self.len == 0 - } - fn header(&self) -> &NodeHeader { &self.header } @@ -572,6 +568,10 @@ impl NodeOps for Node16 { &mut self.header } + fn size(&self) -> usize { + self.len + } + fn is_full(&self) -> bool { self.len == 16 } @@ -762,10 +762,6 @@ impl Node48 { } impl NodeOps for Node48 { - fn is_empty(&self) -> bool { - self.len == 0 - } - fn header(&self) -> &NodeHeader { &self.header } @@ -774,6 +770,10 @@ impl NodeOps for Node48 { &mut self.header } + fn size(&self) -> usize { + self.len + } + fn is_full(&self) -> bool { self.len == 48 } @@ -909,10 +909,6 @@ impl From> for Node256 { } impl NodeOps for Node256 { - fn is_empty(&self) -> bool { - self.len == 0 - } - fn header(&self) -> &NodeHeader { &self.header } @@ -921,6 +917,10 @@ impl NodeOps for Node256 { &mut self.header } + fn size(&self) -> usize { + self.len + } + fn is_full(&self) -> bool { self.len == 256 } @@ -1033,16 +1033,18 @@ impl SequentialMap for ART { let node = left_or!(current_ref.deref_mut(), break); if let Err(common_depth) = Node::prefix_match(&keys, node, depth) { + // println!("same common prefix"); common_prefix = (common_depth - depth) as u32; break; } let prefix = node.header().len; - if let Some(node) = node.lookup_mut(keys[depth]) { + if let Some(node) = node.lookup_mut(keys[depth + prefix as usize]) { depth += 1 + prefix as usize; current = NonNull::new(node).unwrap(); } else { + // println!("cannot find {} on {:?}", keys[depth], current_ref); common_prefix = prefix; break; } @@ -1051,6 +1053,8 @@ impl SequentialMap for ART { let current_ref = unsafe { current.as_mut() }; current_ref.extend(); + // println!("current: {:?}", current_ref); + match current_ref.deref_mut() { Either::Left(node) => { let key = keys[depth]; @@ -1108,32 +1112,39 @@ impl SequentialMap for ART { // insert inter node with zero prefix // ex) 'aE', 'aaE' - let key = keys[depth]; - + // println!("split with same index {}", keys[depth]); let mut common_prefix = 0; while keys[depth + common_prefix] == nodev.key[depth + common_prefix] { common_prefix += 1; } + // println!( + // "{}, common_prefix: {}, {}", + // depth, + // common_prefix, + // nodev.key[depth + common_prefix] + // ); + let mut inter_node = Node4::::default(); unsafe { ptr::copy_nonoverlapping( keys.as_ptr().add(depth), inter_node.header.prefix.as_mut_ptr(), - common_prefix, + min(common_prefix, PREFIX_LEN), ); } - inter_node.header.len = common_prefix as u32; + inter_node.header.len = min(common_prefix, PREFIX_LEN) as u32; let current = unsafe { current.as_mut() }; let old = mem::replace(current, Node::new(inter_node, NodeType::Node4)); let current = current.deref_mut().left().unwrap(); - let insert_old = current.insert(nodev.key[depth], old); + let insert_old = current.insert(nodev.key[depth + common_prefix], old); debug_assert!(insert_old.is_ok()); - let insert_new = current.insert(key, Node::new(new, NodeType::Value)); + let insert_new = + current.insert(keys[depth + common_prefix], Node::new(new, NodeType::Value)); debug_assert!(insert_new.is_ok()); Ok(()) @@ -1208,8 +1219,13 @@ impl SequentialMap for ART { debug_assert!(node.is_ok()); let node = node.unwrap().inner::>(); + if current_ref.size() == 1 { + // path compression + todo!() + } + if let Some(mut parent) = parent { - if current_ref.is_empty() { + if current_ref.size() == 0 { // remove the node // println!("empty"); let parent = unsafe { parent.as_mut() }; @@ -1218,7 +1234,7 @@ impl SequentialMap for ART { let remove = parent_ref.remove(keys[depth - current_ref.header().len as usize - 1]); debug_assert!(remove.is_ok()); let remove = remove.unwrap(); - debug_assert!(remove.deref().left().unwrap().is_empty()); + debug_assert_eq!(remove.deref().left().unwrap().size(), 0); debug_assert_eq!(remove.node_type(), NodeType::Node4); remove.inner::>(); } else if current_ref.is_shrinkable() { diff --git a/tests/art/mod.rs b/tests/art/mod.rs index 23359cf..83a3d91 100644 --- a/tests/art/mod.rs +++ b/tests/art/mod.rs @@ -23,3 +23,27 @@ fn test_art() { assert_eq!(art.remove(&"ad".to_string()), Ok(4)); assert_eq!(art.remove(&"acb".to_string()), Ok(5)); } + +#[test] +fn test_large_key_art() { + let mut art: ART = ART::new(); + assert_eq!(art.insert(&"1234567890".to_string(), 1), Ok(())); + assert_eq!(art.insert(&"12345678901234567890".to_string(), 2), Ok(())); + assert_eq!(art.insert(&"123456789012345678901234567890".to_string(), 3), Ok(())); + assert_eq!(art.insert(&"1234567890123456789012345678901234567890".to_string(), 4), Ok(())); + assert_eq!(art.insert(&"12345678901234567890123456789012345678901234567890".to_string(), 5), Ok(())); + assert_eq!(art.insert(&"123456789012345678901234567890123456789012345678901234567890".to_string(), 6), Ok(())); + assert_eq!(art.lookup(&"1234567890".to_string()), Some(&1)); + assert_eq!(art.lookup(&"12345678901234567890".to_string()), Some(&2)); + assert_eq!(art.lookup(&"123456789012345678901234567890".to_string()), Some(&3)); + assert_eq!(art.lookup(&"1234567890123456789012345678901234567890".to_string()), Some(&4)); + assert_eq!(art.lookup(&"12345678901234567890123456789012345678901234567890".to_string()), Some(&5)); + assert_eq!(art.lookup(&"123456789012345678901234567890123456789012345678901234567890".to_string()), Some(&6)); + assert_eq!(art.remove(&"1234567890".to_string()), Ok(1)); + assert_eq!(art.remove(&"12345678901234567890".to_string()), Ok(2)); + assert_eq!(art.remove(&"123456789012345678901234567890".to_string()), Ok(3)); + assert_eq!(art.remove(&"1234567890123456789012345678901234567890".to_string()), Ok(4)); + assert_eq!(art.remove(&"12345678901234567890123456789012345678901234567890".to_string()), Ok(5)); + assert_eq!(art.remove(&"123456789012345678901234567890123456789012345678901234567890".to_string()), Ok(6)); + println!("{:?}", art); +} From 05f5b660cfcd80ec69952044f07fc40a016fd45d Mon Sep 17 00:00:00 2001 From: Taewoo An Date: Tue, 23 Nov 2021 01:16:30 +0900 Subject: [PATCH 10/19] Impl path compression on removal --- src/art/mod.rs | 118 ++++++++++++++++++++++++++++++++++++----------- tests/art/mod.rs | 1 - 2 files changed, 92 insertions(+), 27 deletions(-) diff --git a/src/art/mod.rs b/src/art/mod.rs index 4f80d4a..6bac9f8 100644 --- a/src/art/mod.rs +++ b/src/art/mod.rs @@ -229,6 +229,69 @@ impl Node { } } + /// compress path if the node is Node4 with having one child + fn compress_path(&mut self) { + if self.deref().is_right() { + return; + } + + if self.node_type() != NodeType::Node4 { + return; + } + + unsafe { + if self.deref().left().unwrap().size() != 1 { + return; + } + + let node = Box::from_raw((self.pointer & !NODETYPE_MASK) as *mut Node4); + + let child_key = *node.keys.get_unchecked(0); + let child = ptr::read(node.children.get_unchecked(0)); + + // if the child is not NodeV, then move prefix from parent to child + if let Either::Left(child) = child.deref_mut() { + // push child key on front of child header prefix + let prefix_ptr = child.header_mut().prefix.as_mut_ptr(); + let prefix_len = child.header().len as usize; + + ptr::copy( + prefix_ptr, + prefix_ptr.add(1), + min(prefix_len, PREFIX_LEN - 1), + ); + *prefix_ptr = child_key; + + child.header_mut().len += 1; + + if node.header.len > 0 { + // println!("prefix move"); + let node_prefix_len = node.header.len as usize; + let prefix_len = child.header().len as usize; + + if PREFIX_LEN > node_prefix_len { + ptr::copy( + prefix_ptr, + prefix_ptr.add(node_prefix_len as usize), + min(prefix_len, PREFIX_LEN - node_prefix_len), + ); + } + + ptr::copy_nonoverlapping( + node.header.prefix.as_ptr(), + prefix_ptr, + min(node_prefix_len, PREFIX_LEN), + ); + + child.header_mut().len = (prefix_len + node_prefix_len) as u32; + } + } + + mem::forget(node); + *self = child; + } + } + /// compare the keys from depth to header.len fn prefix_match(keys: &[u8], node: &dyn NodeOps, depth: usize) -> Result<(), usize> { let header = node.header(); @@ -1195,12 +1258,13 @@ impl SequentialMap for ART { depth += node.header().len as usize; if let Some(node) = node.lookup_mut(keys[depth]) { - // println!("{:?}, key: {}", node, keys[depth]); + // println!("{:?}, key: {}, {}", node, keys[depth], depth); if node.node_type() == NodeType::Value { if *node.deref().right().unwrap().key == keys { break; } else { + // println!("dismatched key"); return Err(()); } } @@ -1209,38 +1273,40 @@ impl SequentialMap for ART { parent = Some(current); current = NonNull::new(node).unwrap(); } else { + // println!("fail to lookup"); return Err(()); } } - let current = unsafe { current.as_mut() }; - let current_ref = current.deref_mut().left().unwrap(); - let node = current_ref.remove(keys[depth]); + let current_ref = unsafe { current.as_mut() }; + let current_node = current_ref.deref_mut().left().unwrap(); + let node = current_node.remove(keys[depth]); debug_assert!(node.is_ok()); let node = node.unwrap().inner::>(); - if current_ref.size() == 1 { - // path compression - todo!() - } - - if let Some(mut parent) = parent { - if current_ref.size() == 0 { - // remove the node - // println!("empty"); - let parent = unsafe { parent.as_mut() }; - let parent_ref = parent.deref_mut().left().unwrap(); - - let remove = parent_ref.remove(keys[depth - current_ref.header().len as usize - 1]); - debug_assert!(remove.is_ok()); - let remove = remove.unwrap(); - debug_assert_eq!(remove.deref().left().unwrap().size(), 0); - debug_assert_eq!(remove.node_type(), NodeType::Node4); - remove.inner::>(); - } else if current_ref.is_shrinkable() { - // shrink the node - // println!("shrinkable"); - current.shrink(); + let current_ref = unsafe { current.as_mut() }; + current_ref.compress_path(); + + if let Either::Left(current_node) = current_ref.deref_mut() { + if let Some(mut parent) = parent { + if current_node.size() == 0 { + // remove the node + // println!("empty"); + let parent = unsafe { parent.as_mut() }; + let parent_ref = parent.deref_mut().left().unwrap(); + + let remove = + parent_ref.remove(keys[depth - current_node.header().len as usize - 1]); + debug_assert!(remove.is_ok()); + let remove = remove.unwrap(); + debug_assert_eq!(remove.deref().left().unwrap().size(), 0); + debug_assert_eq!(remove.node_type(), NodeType::Node4); + remove.inner::>(); + } else if current_node.is_shrinkable() { + // shrink the node + // println!("shrinkable"); + current_ref.shrink(); + } } } diff --git a/tests/art/mod.rs b/tests/art/mod.rs index 83a3d91..092f117 100644 --- a/tests/art/mod.rs +++ b/tests/art/mod.rs @@ -45,5 +45,4 @@ fn test_large_key_art() { assert_eq!(art.remove(&"1234567890123456789012345678901234567890".to_string()), Ok(4)); assert_eq!(art.remove(&"12345678901234567890123456789012345678901234567890".to_string()), Ok(5)); assert_eq!(art.remove(&"123456789012345678901234567890123456789012345678901234567890".to_string()), Ok(6)); - println!("{:?}", art); } From a329602d94bd75e088bf5dc6983a341754d98363 Mon Sep 17 00:00:00 2001 From: Taewoo An Date: Wed, 24 Nov 2021 23:51:45 +0900 Subject: [PATCH 11/19] Fix on large prefix splitting --- src/art/mod.rs | 134 ++++++++++++++++++++++++++++++++++------------- src/util/mod.rs | 1 - tests/art/mod.rs | 25 +++++++++ 3 files changed, 124 insertions(+), 36 deletions(-) diff --git a/src/art/mod.rs b/src/art/mod.rs index 6bac9f8..67f99a2 100644 --- a/src/art/mod.rs +++ b/src/art/mod.rs @@ -27,7 +27,7 @@ impl Debug for NodeHeader { unsafe { f.debug_struct("NodeHeader") .field("len", &self.len) - .field("prefix", &self.prefix.get_unchecked(..self.len as usize)) + .field("prefix", &self.prefix.get_unchecked(..min(PREFIX_LEN, self.len as usize))) .finish() } } @@ -64,7 +64,7 @@ trait NodeOps { fn size(&self) -> usize; fn is_full(&self) -> bool; fn is_shrinkable(&self) -> bool; - fn get_any_child(&self) -> Option>; + fn get_any_child(&self) -> Option<&NodeV>; fn insert(&mut self, key: u8, node: Node) -> Result<(), Node>; fn lookup(&self, key: u8) -> Option<&Node>; fn lookup_mut(&mut self, key: u8) -> Option<&mut Node>; @@ -231,19 +231,15 @@ impl Node { /// compress path if the node is Node4 with having one child fn compress_path(&mut self) { - if self.deref().is_right() { + if self.node_type() != NodeType::Node4 { return; } - if self.node_type() != NodeType::Node4 { + if self.deref().left().unwrap().size() != 1 { return; } unsafe { - if self.deref().left().unwrap().size() != 1 { - return; - } - let node = Box::from_raw((self.pointer & !NODETYPE_MASK) as *mut Node4); let child_key = *node.keys.get_unchecked(0); @@ -299,7 +295,7 @@ impl Node { for (index, prefix) in unsafe { header .prefix - .get_unchecked(..header.len as usize) + .get_unchecked(..min(PREFIX_LEN, header.len as usize)) .iter() .enumerate() } { @@ -442,8 +438,13 @@ impl NodeOps for Node4 { false } - fn get_any_child(&self) -> Option> { - todo!() + fn get_any_child(&self) -> Option<&NodeV> { + debug_assert!(self.size() > 0); + + match unsafe { self.children.get_unchecked(0).deref() } { + Either::Left(node) => node.get_any_child(), + Either::Right(nodev) => return Some(nodev), + } } fn insert(&mut self, key: u8, node: Node) -> Result<(), Node> { @@ -643,8 +644,13 @@ impl NodeOps for Node16 { self.len <= 4 } - fn get_any_child(&self) -> Option> { - todo!() + fn get_any_child(&self) -> Option<&NodeV> { + debug_assert!(self.size() > 0); + + match unsafe { self.children.get_unchecked(0).deref() } { + Either::Left(node) => node.get_any_child(), + Either::Right(nodev) => Some(nodev), + } } fn insert(&mut self, key: u8, node: Node) -> Result<(), Node> { @@ -845,8 +851,13 @@ impl NodeOps for Node48 { self.len <= 16 } - fn get_any_child(&self) -> Option> { - todo!() + fn get_any_child(&self) -> Option<&NodeV> { + debug_assert!(self.size() > 0); + + match unsafe { self.children.get_unchecked(0).deref() } { + Either::Left(node) => node.get_any_child(), + Either::Right(nodev) => Some(nodev), + } } fn insert(&mut self, key: u8, node: Node) -> Result<(), Node> { @@ -992,8 +1003,19 @@ impl NodeOps for Node256 { self.len <= 48 } - fn get_any_child(&self) -> Option> { - todo!() + fn get_any_child(&self) -> Option<&NodeV> { + debug_assert!(self.size() > 0); + + for child in self.children.iter() { + if !child.is_null() { + return match child.deref() { + Either::Left(node) => node.get_any_child(), + Either::Right(nodev) => Some(nodev), + }; + } + } + + unreachable!() } fn insert(&mut self, key: u8, node: Node) -> Result<(), Node> { @@ -1120,17 +1142,27 @@ impl SequentialMap for ART { match current_ref.deref_mut() { Either::Left(node) => { - let key = keys[depth]; let new = NodeV::new(keys.clone(), value); if common_prefix == node.header().len { // just insert value into this node + let key = keys[depth]; let insert = node.insert(key, Node::new(new, NodeType::Value)); debug_assert!(insert.is_ok()); } else { + drop(node); + // split prefix + let key = keys[depth + common_prefix as usize]; let mut inter_node = Node4::::default(); + // println!( + // "split prefix: {}, {:?}, {}", + // common_prefix, + // keys.get(depth..(depth + common_prefix as usize)), + // key + // ); + unsafe { ptr::copy_nonoverlapping( keys.as_ptr().add(depth), @@ -1138,27 +1170,57 @@ impl SequentialMap for ART { common_prefix as usize, ); } - inter_node.header.len = common_prefix; - // re-set the old's prefix - let header = node.header_mut(); - let prefix = header.prefix.clone(); - unsafe { - ptr::copy_nonoverlapping( - prefix.as_ptr(), - header.prefix.as_mut_ptr(), - (header.len - common_prefix) as usize, - ) - }; - header.len = header.len - common_prefix; - + // replace with inter_node and get old node let current = unsafe { current.as_mut() }; let old = mem::replace(current, Node::new(inter_node, NodeType::Node4)); let current = current.deref_mut().left().unwrap(); - let insert_old = - current.insert(node.header().prefix[depth + common_prefix as usize], old); + // get old's key and re-set the old's prefix + let old_ref = old.deref_mut().left().unwrap(); + let header = old_ref.header(); + + let old_key; + + if header.len > PREFIX_LEN as u32 { + // need to get omitted prefix from any child + // println!("big long prefix"); + + let prefix = old_ref.get_any_child().unwrap().key.clone(); + let prefix_start = depth + common_prefix as usize + 1; + + let header = old_ref.header_mut(); + unsafe { + ptr::copy_nonoverlapping( + prefix.as_ptr().add(prefix_start), + header.prefix.as_mut_ptr(), + min(PREFIX_LEN, header.len as usize - (common_prefix + 1) as usize), + ) + }; + header.len -= common_prefix + 1; + + old_key = unsafe { *prefix.get_unchecked(depth + common_prefix as usize) }; + } else { + // just move prefix + // println!("just move prefix"); + + old_key = unsafe { *header.prefix.get_unchecked(common_prefix as usize) }; + + let header = old_ref.header_mut(); + unsafe { + ptr::copy( + header.prefix.as_ptr().add(common_prefix as usize + 1), + header.prefix.as_mut_ptr(), + (header.len - (common_prefix + 1)) as usize, + ) + }; + header.len -= common_prefix + 1; + } + + // println!("old key: {}", old_key); + + let insert_old = current.insert(old_key, old); debug_assert!(insert_old.is_ok()); let insert_new = current.insert(key, Node::new(new, NodeType::Value)); debug_assert!(insert_new.is_ok()); @@ -1176,6 +1238,8 @@ impl SequentialMap for ART { // insert inter node with zero prefix // ex) 'aE', 'aaE' // println!("split with same index {}", keys[depth]); + + let mut common_prefix = 0; while keys[depth + common_prefix] == nodev.key[depth + common_prefix] { @@ -1194,10 +1258,10 @@ impl SequentialMap for ART { ptr::copy_nonoverlapping( keys.as_ptr().add(depth), inter_node.header.prefix.as_mut_ptr(), - min(common_prefix, PREFIX_LEN), + min(PREFIX_LEN, common_prefix), ); } - inter_node.header.len = min(common_prefix, PREFIX_LEN) as u32; + inter_node.header.len = common_prefix as u32; let current = unsafe { current.as_mut() }; let old = mem::replace(current, Node::new(inter_node, NodeType::Node4)); diff --git a/src/util/mod.rs b/src/util/mod.rs index 4bb9d59..90a1913 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -1,5 +1,4 @@ use std::ptr; -use either::Either; pub mod random; diff --git a/tests/art/mod.rs b/tests/art/mod.rs index 092f117..35d6138 100644 --- a/tests/art/mod.rs +++ b/tests/art/mod.rs @@ -46,3 +46,28 @@ fn test_large_key_art() { assert_eq!(art.remove(&"12345678901234567890123456789012345678901234567890".to_string()), Ok(5)); assert_eq!(art.remove(&"123456789012345678901234567890123456789012345678901234567890".to_string()), Ok(6)); } + +#[test] +fn test_split_key_insert_art() { + let mut art: ART = ART::new(); + assert_eq!(art.insert(&"123456789012345678901234567890123456789012345678901234567890".to_string(), 6), Ok(())); + assert_eq!(art.insert(&"12345678901234567890123456789012345678901234567890".to_string(), 5), Ok(())); + assert_eq!(art.lookup(&"12345678901234567890123456789012345678901234567890".to_string()), Some(&5)); + assert_eq!(art.lookup(&"123456789012345678901234567890123456789012345678901234567890".to_string()), Some(&6)); + assert_eq!(art.insert(&"1234567890123456789012345678901234567890".to_string(), 4), Ok(())); + assert_eq!(art.insert(&"123456789012345678901234567890".to_string(), 3), Ok(())); + assert_eq!(art.insert(&"12345678901234567890".to_string(), 2), Ok(())); + assert_eq!(art.insert(&"1234567890".to_string(), 1), Ok(())); + assert_eq!(art.lookup(&"1234567890".to_string()), Some(&1)); + assert_eq!(art.lookup(&"12345678901234567890".to_string()), Some(&2)); + assert_eq!(art.lookup(&"123456789012345678901234567890".to_string()), Some(&3)); + assert_eq!(art.lookup(&"1234567890123456789012345678901234567890".to_string()), Some(&4)); + assert_eq!(art.lookup(&"12345678901234567890123456789012345678901234567890".to_string()), Some(&5)); + assert_eq!(art.lookup(&"123456789012345678901234567890123456789012345678901234567890".to_string()), Some(&6)); + assert_eq!(art.remove(&"123456789012345678901234567890123456789012345678901234567890".to_string()), Ok(6)); + assert_eq!(art.remove(&"12345678901234567890123456789012345678901234567890".to_string()), Ok(5)); + assert_eq!(art.remove(&"1234567890123456789012345678901234567890".to_string()), Ok(4)); + assert_eq!(art.remove(&"123456789012345678901234567890".to_string()), Ok(3)); + assert_eq!(art.remove(&"12345678901234567890".to_string()), Ok(2)); + assert_eq!(art.remove(&"1234567890".to_string()), Ok(1)); +} From d9d3ad8c70de1c5995ebc9d32883e0bc42c5077c Mon Sep 17 00:00:00 2001 From: Taewoo An Date: Thu, 25 Nov 2021 00:14:20 +0900 Subject: [PATCH 12/19] Ony bug on remove some make drop another child...? --- src/art/mod.rs | 12 +++++++++--- src/linkedlist/mod.rs | 2 ++ tests/art/mod.rs | 12 ++++++++++++ tests/util/map.rs | 14 +++++++++----- 4 files changed, 32 insertions(+), 8 deletions(-) diff --git a/src/art/mod.rs b/src/art/mod.rs index 67f99a2..365bbed 100644 --- a/src/art/mod.rs +++ b/src/art/mod.rs @@ -1229,7 +1229,7 @@ impl SequentialMap for ART { Ok(()) } Either::Right(nodev) => { - if depth == keys.len() { + if *nodev.key == keys { return Err(value); } @@ -1238,8 +1238,6 @@ impl SequentialMap for ART { // insert inter node with zero prefix // ex) 'aE', 'aaE' // println!("split with same index {}", keys[depth]); - - let mut common_prefix = 0; while keys[depth + common_prefix] == nodev.key[depth + common_prefix] { @@ -1289,6 +1287,10 @@ impl SequentialMap for ART { let node = left_or!(current.deref(), break); depth += node.header().len as usize; + if depth >= keys.len() { + return None; + } + if let Some(node) = node.lookup(keys[depth]) { depth += 1; current = node; @@ -1321,6 +1323,10 @@ impl SequentialMap for ART { let node = current_ref.deref_mut().unwrap_left(); depth += node.header().len as usize; + if depth >= keys.len() { + return Err(()); + } + if let Some(node) = node.lookup_mut(keys[depth]) { // println!("{:?}, key: {}, {}", node, keys[depth], depth); diff --git a/src/linkedlist/mod.rs b/src/linkedlist/mod.rs index 7fd37e0..f7c8f88 100644 --- a/src/linkedlist/mod.rs +++ b/src/linkedlist/mod.rs @@ -1,10 +1,12 @@ use crate::map::SequentialMap; // simple sequential linked list +#[derive(Debug)] pub struct LinkedList { head: Node, // dummy node with key = Default, but the key is not considered on algorithm } +#[derive(Debug)] struct Node { key: K, value: V, diff --git a/tests/art/mod.rs b/tests/art/mod.rs index 35d6138..6df7b3e 100644 --- a/tests/art/mod.rs +++ b/tests/art/mod.rs @@ -1,6 +1,8 @@ use cds::art::ART; use cds::map::SequentialMap; +use crate::util::map::stress_sequential; + #[test] fn test_art() { let mut art: ART = ART::new(); @@ -71,3 +73,13 @@ fn test_split_key_insert_art() { assert_eq!(art.remove(&"12345678901234567890".to_string()), Ok(2)); assert_eq!(art.remove(&"1234567890".to_string()), Ok(1)); } + +#[test] +fn stress_art() { + stress_sequential::>(100_000); +} + +#[test] +fn test_fuzz_case_art() { + +} diff --git a/tests/util/map.rs b/tests/util/map.rs index 1b1d4c2..934b0f8 100644 --- a/tests/util/map.rs +++ b/tests/util/map.rs @@ -33,7 +33,7 @@ enum OperationType { pub fn stress_sequential(iter: u64) where K: Ord + Clone + Random + Debug, - M: SequentialMap, + M: SequentialMap + Debug, { // 10 times try to get not existing key, or return if failing let gen_not_existing_key = |rng: &mut ThreadRng, map: &BTreeMap| { @@ -121,6 +121,8 @@ where } Operation::Remove => { // should success + println!("{:?}", map); + let value = ref_map.remove(&existing_key); println!( @@ -132,15 +134,17 @@ where assert_eq!(map.remove(&existing_key).ok(), value); // early stop code if the remove has any problems - // for key in ref_map.keys().collect::>() { - // assert_eq!(map.lookup(key).is_some(), true, "the key {:?} is not found.", key); - // } + println!("{:?}", map); + for key in ref_map.keys().collect::>() { + assert_eq!(map.lookup(key).is_some(), true, "the key {:?} is not found.", key); + } } } } } } +#[derive(Debug)] struct Sequentialized where K: Eq, @@ -189,7 +193,7 @@ where pub fn stress_concurrent_as_sequential(iter: u64) where K: Ord + Clone + Random + Debug, - M: ConcurrentMap, + M: ConcurrentMap + Debug, { stress_sequential::>(iter) } From 90c9f2040a8e21fa0a22f4a794fdb88441564c71 Mon Sep 17 00:00:00 2001 From: Taewoo An Date: Thu, 25 Nov 2021 00:48:44 +0900 Subject: [PATCH 13/19] Ony bug on insert node make drop another child...? --- src/art/mod.rs | 21 ++++++++++++++++----- tests/art/mod.rs | 7 +------ tests/util/map.rs | 19 +++++++++++-------- 3 files changed, 28 insertions(+), 19 deletions(-) diff --git a/src/art/mod.rs b/src/art/mod.rs index 365bbed..8170af9 100644 --- a/src/art/mod.rs +++ b/src/art/mod.rs @@ -174,6 +174,8 @@ impl Node { let node_type = self.node_type(); let node = self.deref_mut().left().unwrap(); + println!("EXTEND!: {:?}", node_type); + match node_type { NodeType::Value => unreachable!(), NodeType::Node4 => unsafe { @@ -208,23 +210,25 @@ impl Node { let node_type = self.node_type(); let node = self.deref_mut().left().unwrap(); + println!("SHRINK!: {:?}", node_type); + match node_type { NodeType::Value => unreachable!(), NodeType::Node4 => {} NodeType::Node16 => unsafe { let node = node as *const dyn NodeOps as *const Node16; let new = Box::new(Node4::from(ptr::read(node))); - self.pointer = Box::into_raw(new) as usize | node_type as usize; + self.pointer = Box::into_raw(new) as usize | NodeType::Node4 as usize; }, NodeType::Node48 => unsafe { let node = node as *const dyn NodeOps as *const Node48; let new = Box::new(Node16::from(ptr::read(node))); - self.pointer = Box::into_raw(new) as usize | node_type as usize; + self.pointer = Box::into_raw(new) as usize | NodeType::Node16 as usize; }, NodeType::Node256 => unsafe { let node = node as *const dyn NodeOps as *const Node256; let new = Box::new(Node48::from(ptr::read(node))); - self.pointer = Box::into_raw(new) as usize | node_type as usize; + self.pointer = Box::into_raw(new) as usize | NodeType::Node48 as usize; }, } } @@ -382,6 +386,9 @@ impl From> for Node4 { fn from(node: Node16) -> Self { debug_assert!(node.len <= 4); + // println!("from Node16: {:?}", node); + + let mut new = Self::default(); new.header = node.header; new.len = node.len; @@ -395,6 +402,8 @@ impl From> for Node4 { ); } + // println!("to Node4: {:?}", new); + new } } @@ -599,6 +608,8 @@ impl From> for Node16 { i += 1; } } + + debug_assert!(i <= 16); } new @@ -1361,7 +1372,7 @@ impl SequentialMap for ART { if let Some(mut parent) = parent { if current_node.size() == 0 { // remove the node - // println!("empty"); + println!("empty"); let parent = unsafe { parent.as_mut() }; let parent_ref = parent.deref_mut().left().unwrap(); @@ -1374,7 +1385,7 @@ impl SequentialMap for ART { remove.inner::>(); } else if current_node.is_shrinkable() { // shrink the node - // println!("shrinkable"); + println!("shrinkable"); current_ref.shrink(); } } diff --git a/tests/art/mod.rs b/tests/art/mod.rs index 6df7b3e..e99c9d9 100644 --- a/tests/art/mod.rs +++ b/tests/art/mod.rs @@ -76,10 +76,5 @@ fn test_split_key_insert_art() { #[test] fn stress_art() { - stress_sequential::>(100_000); -} - -#[test] -fn test_fuzz_case_art() { - + stress_sequential::>(1_000_000); } diff --git a/tests/util/map.rs b/tests/util/map.rs index 934b0f8..11920d6 100644 --- a/tests/util/map.rs +++ b/tests/util/map.rs @@ -58,6 +58,8 @@ where let mut rng = thread_rng(); for i in 1..=iter { + let before = format!("{:?}", map); + let t = types.choose(&mut rng).unwrap(); let ref_map_keys = ref_map.keys().collect::>(); let existing_key = ref_map_keys.choose(&mut rng); @@ -121,8 +123,6 @@ where } Operation::Remove => { // should success - println!("{:?}", map); - let value = ref_map.remove(&existing_key); println!( @@ -132,15 +132,18 @@ where value.unwrap() ); assert_eq!(map.remove(&existing_key).ok(), value); - - // early stop code if the remove has any problems - println!("{:?}", map); - for key in ref_map.keys().collect::>() { - assert_eq!(map.lookup(key).is_some(), true, "the key {:?} is not found.", key); - } } } } + + // early stop code if the op has any problems + for key in ref_map.keys().collect::>() { + if map.lookup(key).is_none() { + println!("before: {}", before); + println!("after: {:?}", map); + panic!("the key {:?} is not found.", key); + } + } } } From 776376bd0427002e5e19cbc9cbd75a7d3ee46317 Mon Sep 17 00:00:00 2001 From: Taewoo An Date: Thu, 25 Nov 2021 01:23:43 +0900 Subject: [PATCH 14/19] Need to test for extend and shrink --- src/art/mod.rs | 62 +++++++++++++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 26 deletions(-) diff --git a/src/art/mod.rs b/src/art/mod.rs index 8170af9..e6a4b24 100644 --- a/src/art/mod.rs +++ b/src/art/mod.rs @@ -27,7 +27,12 @@ impl Debug for NodeHeader { unsafe { f.debug_struct("NodeHeader") .field("len", &self.len) - .field("prefix", &self.prefix.get_unchecked(..min(PREFIX_LEN, self.len as usize))) + .field( + "prefix", + &self + .prefix + .get_unchecked(..min(PREFIX_LEN, self.len as usize)), + ) .finish() } } @@ -388,7 +393,6 @@ impl From> for Node4 { // println!("from Node16: {:?}", node); - let mut new = Self::default(); new.header = node.header; new.len = node.len; @@ -761,11 +765,16 @@ impl Debug for Node48 { .map(|(key, _)| key) .collect::>(); + let valid_children = valid_keys + .iter() + .map(|key| &self.children[self.keys[*key] as usize]) + .collect::>(); + f.debug_struct("Node48") .field("header", &self.header) .field("len", &self.len) .field("keys", &valid_keys) - .field("children", &self.children()) + .field("children", &valid_children) .finish() } } @@ -831,16 +840,6 @@ impl From> for Node48 { } } -impl Node48 { - fn children(&self) -> &[Node] { - unsafe { self.children.get_unchecked(..self.len as usize) } - } - - fn mut_children(&mut self) -> &mut [Node] { - unsafe { self.children.get_unchecked_mut(..self.len as usize) } - } -} - impl NodeOps for Node48 { fn header(&self) -> &NodeHeader { &self.header @@ -879,13 +878,16 @@ impl NodeOps for Node48 { if *index != 0xff { Err(node) } else { - unsafe { - *self.children.get_unchecked_mut(self.len) = node; + for (idx, child) in self.children.iter_mut().enumerate() { + if !child.is_null() { + *child = node; + *index = idx as u8; + self.len += 1; + return Ok(()); + } } - *index = self.len as u8; - self.len += 1; - Ok(()) + unreachable!() } } @@ -928,7 +930,10 @@ impl NodeOps for Node48 { Err(()) } else { unsafe { - let node = slice_remove(self.mut_children(), index as usize); + let node = mem::replace( + self.children.get_unchecked_mut(index as usize), + Node::null(), + ); *self.keys.get_unchecked_mut(key as usize) = 0xff; self.len -= 1; Ok(node) @@ -1129,7 +1134,7 @@ impl SequentialMap for ART { let node = left_or!(current_ref.deref_mut(), break); if let Err(common_depth) = Node::prefix_match(&keys, node, depth) { - // println!("same common prefix"); + println!("same common prefix"); common_prefix = (common_depth - depth) as u32; break; } @@ -1157,9 +1162,11 @@ impl SequentialMap for ART { if common_prefix == node.header().len { // just insert value into this node - let key = keys[depth]; + println!("just insert"); + let key = keys[depth + common_prefix as usize]; let insert = node.insert(key, Node::new(new, NodeType::Value)); debug_assert!(insert.is_ok()); + // println!("result: {:?}", current_ref); } else { drop(node); @@ -1196,7 +1203,7 @@ impl SequentialMap for ART { if header.len > PREFIX_LEN as u32 { // need to get omitted prefix from any child - // println!("big long prefix"); + println!("big long prefix"); let prefix = old_ref.get_any_child().unwrap().key.clone(); let prefix_start = depth + common_prefix as usize + 1; @@ -1206,7 +1213,10 @@ impl SequentialMap for ART { ptr::copy_nonoverlapping( prefix.as_ptr().add(prefix_start), header.prefix.as_mut_ptr(), - min(PREFIX_LEN, header.len as usize - (common_prefix + 1) as usize), + min( + PREFIX_LEN, + header.len as usize - (common_prefix + 1) as usize, + ), ) }; header.len -= common_prefix + 1; @@ -1214,7 +1224,7 @@ impl SequentialMap for ART { old_key = unsafe { *prefix.get_unchecked(depth + common_prefix as usize) }; } else { // just move prefix - // println!("just move prefix"); + println!("just move prefix"); old_key = unsafe { *header.prefix.get_unchecked(common_prefix as usize) }; @@ -1229,7 +1239,7 @@ impl SequentialMap for ART { header.len -= common_prefix + 1; } - // println!("old key: {}", old_key); + println!("old key: {}", old_key); let insert_old = current.insert(old_key, old); debug_assert!(insert_old.is_ok()); @@ -1248,7 +1258,7 @@ impl SequentialMap for ART { // insert inter node with zero prefix // ex) 'aE', 'aaE' - // println!("split with same index {}", keys[depth]); + println!("split with same index {}", keys[depth]); let mut common_prefix = 0; while keys[depth + common_prefix] == nodev.key[depth + common_prefix] { From 44fc6439936bceb7c4139f372d74ead2c5d92eaf Mon Sep 17 00:00:00 2001 From: Taewoo An Date: Thu, 25 Nov 2021 21:51:55 +0900 Subject: [PATCH 15/19] Fix some bugs --- src/art/mod.rs | 34 +++++++++++++--------------------- tests/art/mod.rs | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 21 deletions(-) diff --git a/src/art/mod.rs b/src/art/mod.rs index e6a4b24..c4e2257 100644 --- a/src/art/mod.rs +++ b/src/art/mod.rs @@ -179,8 +179,6 @@ impl Node { let node_type = self.node_type(); let node = self.deref_mut().left().unwrap(); - println!("EXTEND!: {:?}", node_type); - match node_type { NodeType::Value => unreachable!(), NodeType::Node4 => unsafe { @@ -215,8 +213,6 @@ impl Node { let node_type = self.node_type(); let node = self.deref_mut().left().unwrap(); - println!("SHRINK!: {:?}", node_type); - match node_type { NodeType::Value => unreachable!(), NodeType::Node4 => {} @@ -391,8 +387,6 @@ impl From> for Node4 { fn from(node: Node16) -> Self { debug_assert!(node.len <= 4); - // println!("from Node16: {:?}", node); - let mut new = Self::default(); new.header = node.header; new.len = node.len; @@ -406,8 +400,6 @@ impl From> for Node4 { ); } - // println!("to Node4: {:?}", new); - new } } @@ -823,18 +815,16 @@ impl From> for Node48 { let mut new = Self::default(); unsafe { - // TODO: child is dropping? for (key, child) in node.children.iter().enumerate() { if !child.is_null() { + *new.keys.get_unchecked_mut(key) = new.len as u8; + *new.children.get_unchecked_mut(new.len) = ptr::read(child); new.len += 1; - *new.keys.get_unchecked_mut(key) = (new.len - 1) as u8; - *new.children.get_unchecked_mut(new.len - 1) = ptr::read(child); } } } new.header = node.header; - new.len = node.len; new } @@ -879,7 +869,7 @@ impl NodeOps for Node48 { Err(node) } else { for (idx, child) in self.children.iter_mut().enumerate() { - if !child.is_null() { + if child.is_null() { *child = node; *index = idx as u8; self.len += 1; @@ -1038,6 +1028,7 @@ impl NodeOps for Node256 { let child = unsafe { self.children.get_unchecked_mut(key as usize) }; if child.is_null() { + self.len += 1; *child = node; Ok(()) } else { @@ -1083,6 +1074,7 @@ impl NodeOps for Node256 { Err(()) } else { let node = mem::replace(child, Node::null()); + self.len -= 1; Ok(node) } } @@ -1134,7 +1126,7 @@ impl SequentialMap for ART { let node = left_or!(current_ref.deref_mut(), break); if let Err(common_depth) = Node::prefix_match(&keys, node, depth) { - println!("same common prefix"); + // println!("same common prefix"); common_prefix = (common_depth - depth) as u32; break; } @@ -1162,7 +1154,7 @@ impl SequentialMap for ART { if common_prefix == node.header().len { // just insert value into this node - println!("just insert"); + // println!("just insert"); let key = keys[depth + common_prefix as usize]; let insert = node.insert(key, Node::new(new, NodeType::Value)); debug_assert!(insert.is_ok()); @@ -1203,7 +1195,7 @@ impl SequentialMap for ART { if header.len > PREFIX_LEN as u32 { // need to get omitted prefix from any child - println!("big long prefix"); + // println!("big long prefix"); let prefix = old_ref.get_any_child().unwrap().key.clone(); let prefix_start = depth + common_prefix as usize + 1; @@ -1224,7 +1216,7 @@ impl SequentialMap for ART { old_key = unsafe { *prefix.get_unchecked(depth + common_prefix as usize) }; } else { // just move prefix - println!("just move prefix"); + // println!("just move prefix"); old_key = unsafe { *header.prefix.get_unchecked(common_prefix as usize) }; @@ -1239,7 +1231,7 @@ impl SequentialMap for ART { header.len -= common_prefix + 1; } - println!("old key: {}", old_key); + // println!("old key: {}", old_key); let insert_old = current.insert(old_key, old); debug_assert!(insert_old.is_ok()); @@ -1258,7 +1250,7 @@ impl SequentialMap for ART { // insert inter node with zero prefix // ex) 'aE', 'aaE' - println!("split with same index {}", keys[depth]); + // println!("split with same index {}", keys[depth]); let mut common_prefix = 0; while keys[depth + common_prefix] == nodev.key[depth + common_prefix] { @@ -1382,7 +1374,7 @@ impl SequentialMap for ART { if let Some(mut parent) = parent { if current_node.size() == 0 { // remove the node - println!("empty"); + // println!("empty"); let parent = unsafe { parent.as_mut() }; let parent_ref = parent.deref_mut().left().unwrap(); @@ -1395,7 +1387,7 @@ impl SequentialMap for ART { remove.inner::>(); } else if current_node.is_shrinkable() { // shrink the node - println!("shrinkable"); + // println!("shrinkable"); current_ref.shrink(); } } diff --git a/tests/art/mod.rs b/tests/art/mod.rs index e99c9d9..2fbc8b0 100644 --- a/tests/art/mod.rs +++ b/tests/art/mod.rs @@ -1,5 +1,7 @@ use cds::art::ART; use cds::map::SequentialMap; +use rand::prelude::SliceRandom; +use rand::thread_rng; use crate::util::map::stress_sequential; @@ -27,6 +29,7 @@ fn test_art() { } #[test] +#[rustfmt::skip] fn test_large_key_art() { let mut art: ART = ART::new(); assert_eq!(art.insert(&"1234567890".to_string(), 1), Ok(())); @@ -50,6 +53,7 @@ fn test_large_key_art() { } #[test] +#[rustfmt::skip] fn test_split_key_insert_art() { let mut art: ART = ART::new(); assert_eq!(art.insert(&"123456789012345678901234567890123456789012345678901234567890".to_string(), 6), Ok(())); @@ -74,6 +78,41 @@ fn test_split_key_insert_art() { assert_eq!(art.remove(&"1234567890".to_string()), Ok(1)); } +#[test] +fn test_extend_shrink_art() { + let mut art: ART = ART::new(); + let mut keys = Vec::new(); + + for i in '0'..'z' { + let key = "a".to_string() + &i.to_string(); + assert_eq!(art.insert(&key, i as usize), Ok(())); + keys.push(key); + + for k in &keys { + assert!(art.lookup(k).is_some(), "key: {:?}", k); + } + } + + let mut rng = thread_rng(); + keys.shuffle(&mut rng); + + let mut removed_keys = Vec::new(); + + for _ in 0..keys.len() { + let key = keys.pop().unwrap(); + assert!(art.remove(&key).is_ok()); + removed_keys.push(key); + + for k in &keys { + assert!(art.lookup(k).is_some(), "key: {:?}", k); + } + + for k in &removed_keys { + assert!(art.lookup(k).is_none(), "key: {:?}", k); + } + } +} + #[test] fn stress_art() { stress_sequential::>(1_000_000); From c60f32d5293fd62e5fe6971c95a394d8d3b15128 Mon Sep 17 00:00:00 2001 From: Taewoo An Date: Thu, 25 Nov 2021 22:54:53 +0900 Subject: [PATCH 16/19] Add bench for ART --- benches/sequential.rs | 38 ++++++++++++++++++++++---- benches/util/sequential.rs | 56 +++++++++++++++++++++----------------- src/art/mod.rs | 44 +++++++++++++++++++++++++++++- src/util/random.rs | 2 +- 4 files changed, 107 insertions(+), 33 deletions(-) diff --git a/benches/sequential.rs b/benches/sequential.rs index 7f5b38d..8ae7769 100644 --- a/benches/sequential.rs +++ b/benches/sequential.rs @@ -4,7 +4,7 @@ use criterion::{criterion_main, SamplingMode, Throughput}; mod util; -use cds::{avltree::AVLTree, btree::BTree}; +use cds::{art::ART, avltree::AVLTree, btree::BTree}; use criterion::{criterion_group, Criterion}; use util::sequential::fuzz_sequential_logs; @@ -23,7 +23,7 @@ const OPS_RATE: [(usize, usize, usize); 7] = [ (50, 0, 50), ]; -fn bench_vs_btreemap(c: &mut Criterion) { +fn bench_u64_vs_btreemap(c: &mut Criterion) { for (insert, lookup, remove) in OPS_RATE { let logs = fuzz_sequential_logs( 200, @@ -34,7 +34,7 @@ fn bench_vs_btreemap(c: &mut Criterion) { ); let mut group = c.benchmark_group(format!( - "Inserted {:+e}, Ops (I: {}%, L: {}%, R: {}%, total: {:+e})", + "[u64] Inserted {:+e}, Ops (I: {}%, L: {}%, R: {}%, total: {:+e})", MAP_ALREADY_INSERTED, insert, lookup, remove, MAP_TOTAL_OPS )); group.measurement_time(Duration::from_secs(15)); // Note: make almost same the measurement_time to iters * avg_op_time @@ -43,12 +43,38 @@ fn bench_vs_btreemap(c: &mut Criterion) { group.throughput(Throughput::Elements(MAP_TOTAL_OPS as u64)); bench_logs_btreemap(logs.clone(), &mut group); - bench_logs_sequential_map::>("BTree", logs.clone(), &mut group); - bench_logs_sequential_map::>("AVLTree", logs, &mut group); + bench_logs_sequential_map::>("BTree", logs.clone(), &mut group); + bench_logs_sequential_map::>("AVLTree", logs, &mut group); } } -criterion_group!(bench, bench_vs_btreemap); +fn bench_string_vs_btreemap(c: &mut Criterion) { + for (insert, lookup, remove) in OPS_RATE { + let logs = fuzz_sequential_logs( + 50, + MAP_ALREADY_INSERTED, + MAP_TOTAL_OPS * insert / 100, + MAP_TOTAL_OPS * lookup / 100, + MAP_TOTAL_OPS * remove / 100, + ); + + let mut group = c.benchmark_group(format!( + "[String] Inserted {:+e}, Ops (I: {}%, L: {}%, R: {}%, total: {:+e})", + MAP_ALREADY_INSERTED, insert, lookup, remove, MAP_TOTAL_OPS + )); + group.measurement_time(Duration::from_secs(15)); // Note: make almost same the measurement_time to iters * avg_op_time + group.sampling_mode(SamplingMode::Flat); + group.sample_size(10); + group.throughput(Throughput::Elements(MAP_TOTAL_OPS as u64)); + + bench_logs_btreemap(logs.clone(), &mut group); + bench_logs_sequential_map::>("ART", logs.clone(), &mut group); + bench_logs_sequential_map::>("BTree", logs.clone(), &mut group); + bench_logs_sequential_map::>("AVLTree", logs, &mut group); + } +} + +criterion_group!(bench, bench_string_vs_btreemap, bench_u64_vs_btreemap); criterion_main! { bench, } diff --git a/benches/util/sequential.rs b/benches/util/sequential.rs index f10163f..e1c4798 100644 --- a/benches/util/sequential.rs +++ b/benches/util/sequential.rs @@ -3,43 +3,48 @@ use std::{ time::{Duration, Instant}, }; -use cds::map::SequentialMap; +use cds::{map::SequentialMap, util::random::Random}; use criterion::{black_box, measurement::WallTime, BenchmarkGroup}; -use rand::{prelude::SliceRandom, thread_rng, Rng}; +use rand::{prelude::SliceRandom, thread_rng}; #[derive(Clone, Copy)] -pub enum Op { - Insert(u64), - Lookup(u64), - Remove(u64), +pub enum Op { + Insert(K, V), + Lookup(K), + Remove(K), } -pub fn fuzz_sequential_logs( +type Logs = Vec<(Vec<(K, V)>, Vec>)>; + +pub fn fuzz_sequential_logs( iters: u64, already_inserted: u64, insert: usize, lookup: usize, remove: usize, -) -> Vec<(Vec, Vec)> { +) -> Logs { let mut rng = thread_rng(); let mut result = Vec::new(); for _ in 0..iters { let mut logs = Vec::new(); - let mut pre_inserted: Vec = (0..already_inserted).collect(); - pre_inserted.shuffle(&mut rng); + let mut pre_inserted = Vec::new(); + + for _ in 0..already_inserted { + pre_inserted.push((K::gen(&mut rng), V::gen(&mut rng))); + } for _ in 0..insert { - logs.push(Op::Insert(rng.gen_range(already_inserted..u64::MAX))); + logs.push(Op::Insert(K::gen(&mut rng), V::gen(&mut rng))); } for _ in 0..lookup { - logs.push(Op::Lookup(rng.gen_range(0..already_inserted))); + logs.push(Op::Lookup(K::gen(&mut rng))); } for _ in 0..remove { - logs.push(Op::Remove(rng.gen_range(0..already_inserted))); + logs.push(Op::Remove(K::gen(&mut rng))); } logs.shuffle(&mut rng); @@ -49,7 +54,7 @@ pub fn fuzz_sequential_logs( result } -pub fn bench_logs_btreemap(mut logs: Vec<(Vec, Vec)>, c: &mut BenchmarkGroup) { +pub fn bench_logs_btreemap(mut logs: Logs, c: &mut BenchmarkGroup) { c.bench_function("std::BTreeMap", |b| { b.iter_custom(|iters| { let mut duration = Duration::ZERO; @@ -59,15 +64,15 @@ pub fn bench_logs_btreemap(mut logs: Vec<(Vec, Vec)>, c: &mut Benchmark let mut map = BTreeMap::new(); // pre-insert - for key in pre_inserted { - let _ = map.insert(key, key); + for (key, value) in pre_inserted { + let _ = map.insert(key, value); } let start = Instant::now(); for op in logs { match op { - Op::Insert(key) => { - let _ = black_box(map.insert(key, key)); + Op::Insert(key, value) => { + let _ = black_box(map.insert(key, value)); } Op::Lookup(key) => { let _ = black_box(map.get(&key)); @@ -85,12 +90,13 @@ pub fn bench_logs_btreemap(mut logs: Vec<(Vec, Vec)>, c: &mut Benchmark }); } -pub fn bench_logs_sequential_map( +pub fn bench_logs_sequential_map( name: &str, - mut logs: Vec<(Vec, Vec)>, + mut logs: Logs, c: &mut BenchmarkGroup, ) where - M: SequentialMap, + K: Eq + Random, + M: SequentialMap, { c.bench_function(name, |b| { b.iter_custom(|iters| { @@ -101,15 +107,15 @@ pub fn bench_logs_sequential_map( let mut map = M::new(); // pre-insert - for key in pre_inserted { - let _ = map.insert(&key, key); + for (key, value) in pre_inserted { + let _ = map.insert(&key, value); } let start = Instant::now(); for op in logs { match op { - Op::Insert(key) => { - let _ = black_box(map.insert(&key, key)); + Op::Insert(key, value) => { + let _ = black_box(map.insert(&key, value)); } Op::Lookup(key) => { let _ = black_box(map.lookup(&key)); diff --git a/src/art/mod.rs b/src/art/mod.rs index c4e2257..cf1c507 100644 --- a/src/art/mod.rs +++ b/src/art/mod.rs @@ -1103,7 +1103,49 @@ impl Debug for ART { } } -impl ART {} +impl Drop for ART { + fn drop(&mut self) { + fn clean(node: &Node) { + match node.node_type() { + NodeType::Value => unsafe { drop(ptr::read(node).inner::>())}, + NodeType::Node4 => { + let node4 = unsafe { ptr::read(node).inner::>() }; + + for child in node4.children() { + clean(child); + } + }, + NodeType::Node16 => { + let node16 = unsafe { ptr::read(node).inner::>() }; + + for child in node16.children() { + clean(child); + } + }, + NodeType::Node48 => { + let node48 = unsafe { ptr::read(node).inner::>() }; + + for child in &node48.children { + if !child.is_null() { + clean(child); + } + } + }, + NodeType::Node256 => { + let node256 = unsafe { ptr::read(node).inner::>() }; + + for child in &node256.children { + if !child.is_null() { + clean(child); + } + } + }, + } + } + + clean(&self.root); + } +} impl SequentialMap for ART { fn new() -> Self { diff --git a/src/util/random.rs b/src/util/random.rs index cb6b358..2856518 100644 --- a/src/util/random.rs +++ b/src/util/random.rs @@ -6,7 +6,7 @@ pub trait Random { } const RANDOM_STRING_MIN: usize = 0; -const RANDOM_STRING_MAX: usize = 10; +const RANDOM_STRING_MAX: usize = 128; impl Random for String { // get random string whose length is in [RANDOM_STRING_MIN, RANDOM_STRING_MAX) From b3f3b536d101f0f2be2a3a432979ed27d2e836da Mon Sep 17 00:00:00 2001 From: Taewoo An Date: Thu, 25 Nov 2021 23:26:47 +0900 Subject: [PATCH 17/19] Fix generate logs --- benches/util/sequential.rs | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/benches/util/sequential.rs b/benches/util/sequential.rs index e1c4798..75fd279 100644 --- a/benches/util/sequential.rs +++ b/benches/util/sequential.rs @@ -16,7 +16,7 @@ pub enum Op { type Logs = Vec<(Vec<(K, V)>, Vec>)>; -pub fn fuzz_sequential_logs( +pub fn fuzz_sequential_logs( iters: u64, already_inserted: u64, insert: usize, @@ -39,12 +39,20 @@ pub fn fuzz_sequential_logs( logs.push(Op::Insert(K::gen(&mut rng), V::gen(&mut rng))); } - for _ in 0..lookup { - logs.push(Op::Lookup(K::gen(&mut rng))); + for i in 0..lookup { + if i % 2 == 0 { + logs.push(Op::Lookup(K::gen(&mut rng))); + } else { + logs.push(Op::Lookup(pre_inserted.choose(&mut rng).cloned().unwrap().0)); + } } - for _ in 0..remove { - logs.push(Op::Remove(K::gen(&mut rng))); + for i in 0..remove { + if i % 2 == 0 { + logs.push(Op::Remove(K::gen(&mut rng))); + } else { + logs.push(Op::Remove(pre_inserted.choose(&mut rng).cloned().unwrap().0)); + } } logs.shuffle(&mut rng); From 5dffbb2ac35473cdc0a85de8a2470ac5676a89b9 Mon Sep 17 00:00:00 2001 From: Taewoo An Date: Sat, 27 Nov 2021 19:23:01 +0900 Subject: [PATCH 18/19] Remove printf and add comments --- src/art/mod.rs | 76 ++++++++++++++++++++---------------------------- tests/art/mod.rs | 8 ++++- 2 files changed, 39 insertions(+), 45 deletions(-) diff --git a/src/art/mod.rs b/src/art/mod.rs index cf1c507..06b26d2 100644 --- a/src/art/mod.rs +++ b/src/art/mod.rs @@ -16,7 +16,7 @@ use crate::{ }; const PREFIX_LEN: usize = 12; -const KEY_ENDMARK: u8 = 0xff; +const KEY_ENDMARK: u8 = 0xff; // invalid on utf-8. Thus, use it for preventing that any key cannot be the prefix of another key. struct NodeHeader { len: u32, // the len of prefix prefix: [u8; PREFIX_LEN], // prefix for path compression @@ -235,6 +235,9 @@ impl Node { } /// compress path if the node is Node4 with having one child + /// If self's unique one child is not NodeV(internal node), then compress path from self.header to + /// self.header + self.key(of child) + child.header and set child on self. + /// If self's one is NodeV(external node), just set child on self.(not need to compress path on header). fn compress_path(&mut self) { if self.node_type() != NodeType::Node4 { return; @@ -266,7 +269,6 @@ impl Node { child.header_mut().len += 1; if node.header.len > 0 { - // println!("prefix move"); let node_prefix_len = node.header.len as usize; let prefix_len = child.header().len as usize; @@ -1107,21 +1109,21 @@ impl Drop for ART { fn drop(&mut self) { fn clean(node: &Node) { match node.node_type() { - NodeType::Value => unsafe { drop(ptr::read(node).inner::>())}, + NodeType::Value => unsafe { drop(ptr::read(node).inner::>()) }, NodeType::Node4 => { let node4 = unsafe { ptr::read(node).inner::>() }; for child in node4.children() { clean(child); } - }, + } NodeType::Node16 => { let node16 = unsafe { ptr::read(node).inner::>() }; for child in node16.children() { clean(child); } - }, + } NodeType::Node48 => { let node48 = unsafe { ptr::read(node).inner::>() }; @@ -1130,7 +1132,7 @@ impl Drop for ART { clean(child); } } - }, + } NodeType::Node256 => { let node256 = unsafe { ptr::read(node).inner::>() }; @@ -1139,7 +1141,7 @@ impl Drop for ART { clean(child); } } - }, + } } } @@ -1147,6 +1149,19 @@ impl Drop for ART { } } +impl ART { + pub fn print_debug_info(&self) { + println!("V is {:>5} byte.", mem::size_of::()); + println!("NodeV is {:>5} byte.", mem::size_of::>()); + println!("NodeHeader is {:>5} byte.", mem::size_of::()); + println!("Node is {:>5} byte.", mem::size_of::>()); + println!("Node4 is {:>5} byte.", mem::size_of::>()); + println!("Node16 is {:>5} byte.", mem::size_of::>()); + println!("Node48 is {:>5} byte.", mem::size_of::>()); + println!("Node256 is {:>5} byte.", mem::size_of::>()); + } +} + impl SequentialMap for ART { fn new() -> Self { let root = Node::new(Node256::::default(), NodeType::Node256); @@ -1168,7 +1183,6 @@ impl SequentialMap for ART { let node = left_or!(current_ref.deref_mut(), break); if let Err(common_depth) = Node::prefix_match(&keys, node, depth) { - // println!("same common prefix"); common_prefix = (common_depth - depth) as u32; break; } @@ -1179,7 +1193,6 @@ impl SequentialMap for ART { depth += 1 + prefix as usize; current = NonNull::new(node).unwrap(); } else { - // println!("cannot find {} on {:?}", keys[depth], current_ref); common_prefix = prefix; break; } @@ -1188,8 +1201,6 @@ impl SequentialMap for ART { let current_ref = unsafe { current.as_mut() }; current_ref.extend(); - // println!("current: {:?}", current_ref); - match current_ref.deref_mut() { Either::Left(node) => { let new = NodeV::new(keys.clone(), value); @@ -1200,21 +1211,13 @@ impl SequentialMap for ART { let key = keys[depth + common_prefix as usize]; let insert = node.insert(key, Node::new(new, NodeType::Value)); debug_assert!(insert.is_ok()); - // println!("result: {:?}", current_ref); } else { - drop(node); + drop(node); // since the current(ref of node) will be changed, drop it for safety not to use it. // split prefix let key = keys[depth + common_prefix as usize]; let mut inter_node = Node4::::default(); - // println!( - // "split prefix: {}, {:?}, {}", - // common_prefix, - // keys.get(depth..(depth + common_prefix as usize)), - // key - // ); - unsafe { ptr::copy_nonoverlapping( keys.as_ptr().add(depth), @@ -1237,15 +1240,13 @@ impl SequentialMap for ART { if header.len > PREFIX_LEN as u32 { // need to get omitted prefix from any child - // println!("big long prefix"); - - let prefix = old_ref.get_any_child().unwrap().key.clone(); + let full_key = old_ref.get_any_child().unwrap().key.clone(); let prefix_start = depth + common_prefix as usize + 1; let header = old_ref.header_mut(); unsafe { ptr::copy_nonoverlapping( - prefix.as_ptr().add(prefix_start), + full_key.as_ptr().add(prefix_start), header.prefix.as_mut_ptr(), min( PREFIX_LEN, @@ -1255,11 +1256,10 @@ impl SequentialMap for ART { }; header.len -= common_prefix + 1; - old_key = unsafe { *prefix.get_unchecked(depth + common_prefix as usize) }; + old_key = + unsafe { *full_key.get_unchecked(depth + common_prefix as usize) }; } else { // just move prefix - // println!("just move prefix"); - old_key = unsafe { *header.prefix.get_unchecked(common_prefix as usize) }; let header = old_ref.header_mut(); @@ -1273,8 +1273,6 @@ impl SequentialMap for ART { header.len -= common_prefix + 1; } - // println!("old key: {}", old_key); - let insert_old = current.insert(old_key, old); debug_assert!(insert_old.is_ok()); let insert_new = current.insert(key, Node::new(new, NodeType::Value)); @@ -1292,19 +1290,13 @@ impl SequentialMap for ART { // insert inter node with zero prefix // ex) 'aE', 'aaE' - // println!("split with same index {}", keys[depth]); let mut common_prefix = 0; while keys[depth + common_prefix] == nodev.key[depth + common_prefix] { common_prefix += 1; } - // println!( - // "{}, common_prefix: {}, {}", - // depth, - // common_prefix, - // nodev.key[depth + common_prefix] - // ); + drop(nodev); // since the nodev will be changed, drop it for safety not to use it. let mut inter_node = Node4::::default(); unsafe { @@ -1318,10 +1310,10 @@ impl SequentialMap for ART { let current = unsafe { current.as_mut() }; let old = mem::replace(current, Node::new(inter_node, NodeType::Node4)); - let current = current.deref_mut().left().unwrap(); - let insert_old = current.insert(nodev.key[depth + common_prefix], old); + let old_full_key = &old.deref().right().unwrap().key; + let insert_old = current.insert(old_full_key[depth + common_prefix], old); debug_assert!(insert_old.is_ok()); let insert_new = current.insert(keys[depth + common_prefix], Node::new(new, NodeType::Value)); @@ -1383,13 +1375,10 @@ impl SequentialMap for ART { } if let Some(node) = node.lookup_mut(keys[depth]) { - // println!("{:?}, key: {}, {}", node, keys[depth], depth); - if node.node_type() == NodeType::Value { if *node.deref().right().unwrap().key == keys { break; } else { - // println!("dismatched key"); return Err(()); } } @@ -1398,7 +1387,6 @@ impl SequentialMap for ART { parent = Some(current); current = NonNull::new(node).unwrap(); } else { - // println!("fail to lookup"); return Err(()); } } @@ -1409,14 +1397,15 @@ impl SequentialMap for ART { debug_assert!(node.is_ok()); let node = node.unwrap().inner::>(); + // if it can compress path for only one child, do it. let current_ref = unsafe { current.as_mut() }; current_ref.compress_path(); + // if it was not removed since it had have at least one child, then if let Either::Left(current_node) = current_ref.deref_mut() { if let Some(mut parent) = parent { if current_node.size() == 0 { // remove the node - // println!("empty"); let parent = unsafe { parent.as_mut() }; let parent_ref = parent.deref_mut().left().unwrap(); @@ -1429,7 +1418,6 @@ impl SequentialMap for ART { remove.inner::>(); } else if current_node.is_shrinkable() { // shrink the node - // println!("shrinkable"); current_ref.shrink(); } } diff --git a/tests/art/mod.rs b/tests/art/mod.rs index 2fbc8b0..0c44756 100644 --- a/tests/art/mod.rs +++ b/tests/art/mod.rs @@ -54,7 +54,7 @@ fn test_large_key_art() { #[test] #[rustfmt::skip] -fn test_split_key_insert_art() { +fn test_split_key_art() { let mut art: ART = ART::new(); assert_eq!(art.insert(&"123456789012345678901234567890123456789012345678901234567890".to_string(), 6), Ok(())); assert_eq!(art.insert(&"12345678901234567890123456789012345678901234567890".to_string(), 5), Ok(())); @@ -117,3 +117,9 @@ fn test_extend_shrink_art() { fn stress_art() { stress_sequential::>(1_000_000); } + +#[test] +fn debug_art() { + let art: ART = ART::new(); + art.print_debug_info(); +} From 6569352c26b09f5ed41c8f23633d02215d121717 Mon Sep 17 00:00:00 2001 From: Taewoo An Date: Sun, 25 Dec 2022 15:40:41 +0900 Subject: [PATCH 19/19] chore: refactoring... --- Cargo.toml | 1 + src/art/mod.rs | 2 ++ src/art/utf8art/mod.rs | 48 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+) create mode 100644 src/art/utf8art/mod.rs diff --git a/Cargo.toml b/Cargo.toml index f0f1e51..4640209 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ either = "1.6.1" parking_lot = "0.12.1" rand = "0.8.4" thread_local = "1.1.4" +uninit = "0.5.1" [dev-dependencies] criterion = "0.3.4" diff --git a/src/art/mod.rs b/src/art/mod.rs index 06b26d2..2cf9163 100644 --- a/src/art/mod.rs +++ b/src/art/mod.rs @@ -1,3 +1,5 @@ +mod utf8art; + use std::{ cmp::{min, Ordering}, marker::PhantomData, diff --git a/src/art/utf8art/mod.rs b/src/art/utf8art/mod.rs new file mode 100644 index 0000000..73397a1 --- /dev/null +++ b/src/art/utf8art/mod.rs @@ -0,0 +1,48 @@ +use std::{cmp::min, fmt::Debug, mem::MaybeUninit}; + +use uninit::uninit_array; + +const PREFIX_LEN: usize = 12; +const KEY_ENDMARK: u8 = 0xff; // invalid on utf-8. Thus, use it for preventing that any key cannot be the prefix of another key. +struct NodeHeader { + len: u32, // the len of prefix + prefix: [MaybeUninit; PREFIX_LEN], // prefix for path compression +} + +impl Debug for NodeHeader { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + unsafe { + f.debug_struct("NodeHeader") + .field("len", &self.len) + .field( + "prefix", + &self + .prefix + .get_unchecked(..min(PREFIX_LEN, self.len as usize)), + ) + .finish() + } + } +} + +impl Default for NodeHeader { + fn default() -> Self { + Self { + len: 0, + prefix: uninit_array![u8; PREFIX_LEN], + } + } +} + +/// the child node type +/// This is used for bitflag on child pointer. +const NODETYPE_MASK: usize = 0b111; +#[repr(usize)] +#[derive(Debug, PartialEq)] +enum NodeType { + Value = 0b000, + Node4 = 0b001, + Node16 = 0b010, + Node48 = 0b011, + Node256 = 0b100, +}