diff --git a/Cargo.toml b/Cargo.toml index e883ccf..4402bdb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,14 +17,19 @@ features = ["serde"] rustdoc-args = ["--cfg", "docsrs"] [features] -encode = ["dep:sha2", "dep:varint-simd"] +encode = ["dep:sha2", "dep:unsigned-varint", "dep:varint-simd"] serde = ["encode", "dep:serde"] [dependencies] serde = { version = "1.0", optional = true } sha2 = { version = "0.10", optional = true } + +[target.'cfg(any(target_arch = "x86", target_arch = "x86_64"))'.dependencies] varint-simd = { version = "0.4", optional = true } +[target.'cfg(not(any(target_arch = "x86", target_arch = "x86_64")))'.dependencies] +unsigned-varint = { version = "0.8", optional = true } + [dev-dependencies] bincode = { version = "2", features = ["serde"] } criterion = "0.6" diff --git a/src/encode.rs b/src/encode.rs index ce7e609..4cc71ed 100644 --- a/src/encode.rs +++ b/src/encode.rs @@ -1,5 +1,9 @@ use core::fmt::Display; +use crate::leb128; + +pub(crate) type IntDecodeError = leb128::DecodeError; + /// TODO: docs pub(crate) trait Encode { /// TODO: docs @@ -40,7 +44,6 @@ impl Encode for bool { impl Decode for bool { type Value = bool; - type Error = BoolDecodeError; #[inline] @@ -79,81 +82,55 @@ impl core::fmt::Display for BoolDecodeError { } } -impl_int_encode!(u16); -impl_int_encode!(u32); -impl_int_encode!(u64); - -impl_int_decode!(u16); -impl_int_decode!(u32); -impl_int_decode!(u64); - -impl Encode for usize { - #[inline(always)] - fn encode(&self, buf: &mut Vec) { - (*self as u64).encode(buf) - } -} - -impl Decode for usize { - type Value = usize; - - type Error = IntDecodeError; - - #[inline(always)] - fn decode(buf: &[u8]) -> Result<(usize, &[u8]), Self::Error> { - u64::decode(buf).map(|(value, rest)| (value as usize, rest)) - } -} - macro_rules! impl_int_encode { - ($ty:ty) => { + ($ty:ty, $encode_fn:ident) => { impl Encode for $ty { #[inline] fn encode(&self, buf: &mut Vec) { - let (array, len) = varint_simd::encode(*self); + let (array, len) = leb128::$encode_fn(*self); buf.extend_from_slice(&array[..len as usize]); } } }; } -use impl_int_encode; - macro_rules! impl_int_decode { - ($ty:ty) => { + ($ty:ty, $decode_fn:ident) => { impl Decode for $ty { type Value = Self; - - type Error = $crate::encode::IntDecodeError; + type Error = leb128::DecodeError; #[inline] fn decode(buf: &[u8]) -> Result<($ty, &[u8]), Self::Error> { - let (decoded, len) = varint_simd::decode::(buf) - .map_err(IntDecodeError)?; - - // TODO: this check shouldn't be necessary, `decode` should - // fail. Open an issue. - let Some(rest) = buf.get(len as usize..) else { - return Err(IntDecodeError( - varint_simd::VarIntDecodeError::NotEnoughBytes, - )); - }; - - Ok((decoded, rest)) + let (decoded, len) = leb128::$decode_fn(buf)?; + Ok((decoded, &buf[len as usize..])) } } }; } -use impl_int_decode; +impl_int_encode!(u16, encode_u16); +impl_int_encode!(u32, encode_u32); +impl_int_encode!(u64, encode_u64); -/// An error that can occur when decoding an [`Int`]. -pub(crate) struct IntDecodeError(varint_simd::VarIntDecodeError); +impl_int_decode!(u16, decode_u16); +impl_int_decode!(u32, decode_u32); +impl_int_decode!(u64, decode_u64); -impl Display for IntDecodeError { - #[inline] - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - Display::fmt(&self.0, f) +impl Encode for usize { + #[inline(always)] + fn encode(&self, buf: &mut Vec) { + (*self as u64).encode(buf) + } +} + +impl Decode for usize { + type Value = usize; + type Error = ::Error; + + #[inline(always)] + fn decode(buf: &[u8]) -> Result<(usize, &[u8]), Self::Error> { + u64::decode(buf).map(|(value, rest)| (value as usize, rest)) } } @@ -251,22 +228,6 @@ mod serde { mod tests { use super::*; - impl PartialEq for IntDecodeError { - fn eq(&self, other: &Self) -> bool { - use varint_simd::VarIntDecodeError::*; - matches!( - (&self.0, &other.0), - (Overflow, Overflow) | (NotEnoughBytes, NotEnoughBytes) - ) - } - } - - impl core::fmt::Debug for IntDecodeError { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - core::fmt::Display::fmt(self, f) - } - } - /// Tests that some integers can be encoded with a single byte. #[test] fn encode_int_single_byte() { @@ -323,7 +284,7 @@ mod tests { assert_eq!( u32::decode(&buf).unwrap_err(), - IntDecodeError(varint_simd::VarIntDecodeError::NotEnoughBytes), + leb128::DecodeError::NotEnoughBytes, ); } @@ -339,7 +300,7 @@ mod tests { assert_eq!( u32::decode(&buf).unwrap_err(), - IntDecodeError(varint_simd::VarIntDecodeError::NotEnoughBytes), + leb128::DecodeError::NotEnoughBytes, ); } } diff --git a/src/leb128.rs b/src/leb128.rs new file mode 100644 index 0000000..4d52266 --- /dev/null +++ b/src/leb128.rs @@ -0,0 +1,103 @@ +use core::fmt; + +macro_rules! encode { + ($fn_name:ident, $ty:ident, $max_encoded_bytes:expr) => { + #[inline] + pub(crate) fn $fn_name(value: $ty) -> ([u8; $max_encoded_bytes], u8) { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + let (long_array, len_u8) = varint_simd::encode(value); + let mut short_array = [0; $max_encoded_bytes]; + let len = len_u8 as usize; + short_array[..len].copy_from_slice(&long_array[..len]); + (short_array, len_u8) + } + #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] + { + let mut buf = [0; $max_encoded_bytes]; + let len = unsigned_varint::encode::$ty(value, &mut buf).len(); + (buf, len as u8) + } + } + }; +} + +macro_rules! decode { + ($fn_name:ident, $ty:ident) => { + #[inline] + pub(crate) fn $fn_name(buf: &[u8]) -> Result<($ty, u8), DecodeError> { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + let (decoded, len) = varint_simd::decode(buf)?; + // TODO: this check shouldn't be necessary, `decode` should + // fail. Open an issue. + if buf.len() < len { + return Err(DecodeError::NotEnoughBytes); + } + Ok((decoded, len as u8)) + } + #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] + { + let (decoded, rest) = unsigned_varint::decode::$ty(buf)?; + Ok((decoded, (buf.len() - rest.len()) as u8)) + } + } + }; +} + +encode!(encode_u16, u16, 3); +encode!(encode_u32, u32, 5); +encode!(encode_u64, u64, 10); + +decode!(decode_u16, u16); +decode!(decode_u32, u32); +decode!(decode_u64, u64); + +#[cfg_attr(test, derive(Debug, PartialEq))] +pub(crate) enum DecodeError { + NotEnoughBytes, + #[cfg_attr( + any(target_arch = "x86", target_arch = "x86_64"), + allow(dead_code) + )] + NotMinimal, + Overflow, +} + +impl fmt::Display for DecodeError { + #[inline] + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::NotEnoughBytes => f.write_str("not enough input bytes"), + Self::NotMinimal => f.write_str("encoding is not minimal"), + Self::Overflow => f.write_str("input bytes exceed maximum"), + } + } +} + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +impl From for DecodeError { + #[inline] + fn from(err: varint_simd::VarIntDecodeError) -> Self { + match err { + varint_simd::VarIntDecodeError::Overflow => Self::Overflow, + varint_simd::VarIntDecodeError::NotEnoughBytes => { + Self::NotEnoughBytes + }, + } + } +} + +#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] +impl From for DecodeError { + #[inline] + fn from(err: unsigned_varint::decode::Error) -> Self { + match err { + unsigned_varint::decode::Error::Insufficient => { + Self::NotEnoughBytes + }, + unsigned_varint::decode::Error::Overflow => Self::Overflow, + unsigned_varint::decode::Error::NotMinimal => Self::NotMinimal, + } + } +} diff --git a/src/lib.rs b/src/lib.rs index f40c2dc..31bafc0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -135,6 +135,8 @@ mod encode; mod encoded_replica; mod gtree; mod insertion; +#[cfg(feature = "encode")] +mod leb128; mod replica; mod replica_id; mod run_indices;