diff --git a/.travis.yml b/.travis.yml index f8bfc42..cdcac7b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,3 +16,8 @@ matrix: - cd ruby - bundle - bundle exec rake + - language: rust + rust: stable + script: + - cd rust + - cargo test diff --git a/rust/CHANGES.md b/rust/CHANGES.md new file mode 100644 index 0000000..a9ce717 --- /dev/null +++ b/rust/CHANGES.md @@ -0,0 +1,35 @@ +## 0.4.1 (2017-01-16) + +* Clippy fixups + +## 0.4.0 (2017-01-16) + +* Rename `objecthash_struct_member!` to `objecthash_member!` +* Do not automatically create references from objecthash macro args +* Remove associated digest type from ObjectHasher trait. Always use + objecthash::Digest instead + +## 0.3.0 (2016-08-21) + +* `objecthash_struct_member!` macro + +## 0.2.2 (2016-08-21) + +* Coerce `objecthash_struct!` key names to Strings + +## 0.2.1 (2016-08-21) + +* Bugfix for `objecthash_struct!` macro + +## 0.2.0 (2016-08-21) + +* Add `objecthash_struct!` and `objecthash_dict_entry!` macros + +## 0.1.1 (2016-08-15) + +* Add the newly released *ring* crate as an optional dependency + +## 0.1.0 (2016-08-08) + +* Initial release + diff --git a/rust/Cargo.toml b/rust/Cargo.toml new file mode 100644 index 0000000..c1f974b --- /dev/null +++ b/rust/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "objecthash" +version = "0.4.1" +description = "A content hashing algorithm which works across multiple encodings (JSON, Protobufs, etc)" +homepage = "https://github.com/cryptosphere/rust-objecthash" +repository = "https://github.com/cryptosphere/rust-objecthash" +readme = "README.md" +keywords = ["hash", "digest", "signatures", "Merkle", "blockchain"] +license = "Apache-2.0" +authors = ["Tony Arcieri "] + +[dependencies.unicode-normalization] +version = ">= 0.1.2" + +[dependencies.ring] +optional = true +version = ">= 0.2" + +[dev-dependencies.rustc-serialize] +version = ">= 0.3.19" + +[features] +default = ["objecthash-ring"] +objecthash-ring = ["ring"] +octet-strings = [] diff --git a/rust/README.md b/rust/README.md new file mode 100644 index 0000000..477b8b2 --- /dev/null +++ b/rust/README.md @@ -0,0 +1,119 @@ +# ObjectHash for Rust [![Latest Version][crate-image]][crate-link] [![Build Status][build-image]][build-link] [![Apache 2 licensed][license-image]][license-link] + +[crate-image]: https://img.shields.io/crates/v/objecthash.svg +[crate-link]: https://crates.io/crates/objecthash +[build-image]: https://travis-ci.org/cryptosphere/objecthash-rs.svg?branch=master +[build-link]: https://travis-ci.org/cryptosphere/objecthash-rs +[license-image]: https://img.shields.io/badge/license-Apache2-blue.svg +[license-link]: https://github.com/cryptosphere/objecthash-rs/blob/master/LICENSE + +A content hash algorithm which works across multiple encodings (JSON, Protobufs, etc). + +This crate provides a Rust implementation of an algorithm originally created by Ben Laurie: + +https://github.com/benlaurie/objecthash + +### Is it any good? + +[Yes.](http://news.ycombinator.com/item?id=3067434) + +### Is it "Production Ready™"? + +![DANGER: EXPERIMENTAL](https://raw.github.com/cryptosphere/cryptosphere/master/images/experimental.png) + +**No!** ObjectHash is an *experimental* algorithm, and is subject to change. Please do not depend on it yet. + +Additionally, this is a project of a cryptographic nature and has not received any expert review. + +Use at your own risk. + +## Installation + +You will need to select a supported cryptography library to use as ObjectHash's backend. The following backend libraries +are supported: + +* [ring]: A safe, fast, small Rust crypto library based on BoringSSL's cryptography primitives + +[ring]: https://github.com/briansmith/ring + +Please make sure to add a crypto backend crate or the `objecthash` crate will not work! + +## Usage + +ObjectHashes can be used to compute a content hash of a deeply nested structure. The intended use is to first +deserialize data into a nested structure, then perform an ObjectHash digest of its contents. This way, the same +content hash to be computed regardless of how the data is serialized, which allows the data to be transcoded between +formats without having to recompute the content hash. + +This crate defines a trait called ObjectHash: + +```rust +pub trait ObjectHash { + fn objecthash(&self, hasher: &mut H); +} +``` + +There are built-in implementations of the `ObjectHash` trait for the +following types: + +* `Vec` +* `HashMap` +* `str` +* `String` +* **Integers:** + * `i8` + * `i16` + * `i32` + * `i64` + * `u8` + * `u16` + * `u32` + * `u64` + * `isize` + * `usize` + +To calculate the ObjectHash digest of some data, call the following: + +```rust +let digest: Vec = objecthash::digest(42); +``` + +This will compute a digest (using the SHA-256 algorithm) of the given value, provided the type of the value given +implements the ObjectHash trait. + + +## Macros + +The `objecthash_struct!` macro is designed to simplify implementing the ObjectHash trait on structs, producing +a dict-type hash across their keys and values: + +```rust +impl ObjectHash for MyStruct { + #[inline] + fn objecthash(&self, hasher: &mut H) { + objecthash_struct!( + hasher, + "foo" => self.foo, + "bar" => self.bar, + "baz" => self.baz + ) + } +} +``` + +## TODO + +* More types +* More test vectors +* Redaction support + +## Contributing + +* Fork this repository on Github +* Make your changes and send a pull request +* If your changes look good, we'll merge them + +## Copyright + +Copyright (c) 2016-2017 Tony Arcieri. Distributed under the Apache 2.0 License. +See LICENSE file for further details. diff --git a/rust/src/hasher/mod.rs b/rust/src/hasher/mod.rs new file mode 100644 index 0000000..1cbdf25 --- /dev/null +++ b/rust/src/hasher/mod.rs @@ -0,0 +1,8 @@ +#[cfg(feature = "objecthash-ring")] +pub mod ring; + +// TODO: Use std::hash::BuildHasherDefault or our own similar version +#[cfg(feature = "objecthash-ring")] +pub fn default() -> ring::Hasher { + ring::Hasher::default() +} diff --git a/rust/src/hasher/ring.rs b/rust/src/hasher/ring.rs new file mode 100644 index 0000000..0d5077f --- /dev/null +++ b/rust/src/hasher/ring.rs @@ -0,0 +1,65 @@ +extern crate ring; + +use Digest; +use ObjectHasher; + +pub struct Hasher { + ctx: ring::digest::Context, +} + +impl Hasher { + pub fn new(alg: &'static ring::digest::Algorithm) -> Hasher { + Hasher { ctx: ring::digest::Context::new(alg) } + } +} + +impl Default for Hasher { + fn default() -> Self { + Self::new(&ring::digest::SHA256) + } +} + +impl ObjectHasher for Hasher { + #[inline] + fn output_len(&self) -> usize { + self.ctx.algorithm.output_len + } + + #[inline] + fn update(&mut self, bytes: &[u8]) { + self.ctx.update(bytes); + } + + #[inline] + fn update_nested(&mut self, nested: F) + where F: Fn(&mut Self) + { + let mut nested_hasher = Hasher::new(self.ctx.algorithm); + nested(&mut nested_hasher); + self.update(nested_hasher.finish().as_ref()); + } + + #[inline] + fn finish(self) -> Digest { + Digest::new(self.ctx.finish().as_ref()).unwrap() + } +} + +#[cfg(test)] +mod tests { + use super::Hasher; + use ObjectHasher; + use rustc_serialize::hex::ToHex; + + // From Project NESSIE + // https://www.cosic.esat.kuleuven.be/nessie/testvectors/hash/sha/Sha-2-256.unverified.test-vectors + const SHA256_VECTOR_STRING: &'static str = "abcdefghijklmnopqrstuvwxyz"; + const SHA256_VECTOR_DIGEST: &'static str = "71c480df93d6ae2f1efad1447c66c9525e316218cf51fc8d9ed832f2daf18b73"; + + #[test] + fn sha256() { + let mut hasher = Hasher::default(); + hasher.update(SHA256_VECTOR_STRING.as_bytes()); + assert_eq!(hasher.finish().as_ref().to_hex(), SHA256_VECTOR_DIGEST); + } +} diff --git a/rust/src/lib.rs b/rust/src/lib.rs new file mode 100644 index 0000000..92f42f5 --- /dev/null +++ b/rust/src/lib.rs @@ -0,0 +1,72 @@ +extern crate unicode_normalization; + +#[cfg(test)] +extern crate rustc_serialize; + +#[macro_use] +pub mod macros; + +pub mod hasher; +pub mod types; + +const MAX_OUTPUT_LEN: usize = 32; + +pub struct Digest { + output_len: usize, + value: [u8; MAX_OUTPUT_LEN], +} + +impl Digest { + pub fn new(bytes: &[u8]) -> Result { + if bytes.len() > MAX_OUTPUT_LEN { + return Err(()); + } + + let mut digest_bytes = [0u8; MAX_OUTPUT_LEN]; + digest_bytes.copy_from_slice(bytes); + + Ok(Digest { + output_len: bytes.len(), + value: digest_bytes, + }) + } +} + +impl AsRef<[u8]> for Digest { + #[inline] + fn as_ref(&self) -> &[u8] { + &self.value[..self.output_len] + } +} + +#[cfg(feature = "objecthash-ring")] +pub fn digest(msg: &T) -> Digest { + let mut hasher = hasher::default(); + msg.objecthash(&mut hasher); + hasher.finish() +} + +pub trait ObjectHasher { + fn output_len(&self) -> usize; + fn update(&mut self, bytes: &[u8]); + fn update_nested(&mut self, nested: F) where F: Fn(&mut Self); + fn finish(self) -> Digest; +} + +pub trait ObjectHash { + fn objecthash(&self, hasher: &mut H); +} + +#[cfg(test)] +#[cfg(feature = "objecthash-ring")] +mod tests { + use digest; + use rustc_serialize::hex::ToHex; + + #[test] + fn digest_test() { + let result = digest(&1000); + assert_eq!(result.as_ref().to_hex(), + "a3346d18105ef801c3598fec426dcc5d4be9d0374da5343f6c8dcbdf24cb8e0b"); + } +} diff --git a/rust/src/macros.rs b/rust/src/macros.rs new file mode 100644 index 0000000..017ff2e --- /dev/null +++ b/rust/src/macros.rs @@ -0,0 +1,53 @@ +#[macro_export] +macro_rules! objecthash_member { + ($key:expr => $value:expr) => { + { + let key_digest = $crate::digest($key); + let value_digest = $crate::digest($value); + let mut result = Vec::with_capacity(key_digest.as_ref().len() + value_digest.as_ref().len()); + + result.extend_from_slice(key_digest.as_ref()); + result.extend_from_slice(value_digest.as_ref()); + result + } + } +} + +#[macro_export] +macro_rules! objecthash_struct( + { $hasher:expr, $($key:expr => $value:expr),+ } => { + { + let mut digests: Vec> = Vec::new(); + + $( + digests.push(objecthash_member!($key => $value)); + )+ + + digests.sort(); + + $hasher.update($crate::types::DICT_TAG); + for value in &digests { + $hasher.update(&value); + } + } + }; +); + +#[cfg(test)] +#[cfg(feature = "objecthash-ring")] +mod tests { + use {hasher, ObjectHasher}; + use rustc_serialize::hex::ToHex; + + #[test] + fn objecthash_struct_test() { + let mut h = hasher::default(); + + objecthash_struct!(h, "foo" => &1); + + assert_eq!( + h.finish().as_ref().to_hex(), + "bf4c58f5e308e31e2cd64bdbf7a01b9b595a13602438be5e912c7d94f6d8177a" + ); + } +} diff --git a/rust/src/types.rs b/rust/src/types.rs new file mode 100644 index 0000000..a491bf7 --- /dev/null +++ b/rust/src/types.rs @@ -0,0 +1,194 @@ +use std; +use std::collections::HashMap; + +use {ObjectHash, ObjectHasher}; + +use unicode_normalization::UnicodeNormalization; + +pub const INTEGER_TAG: &'static [u8; 1] = b"i"; +pub const STRING_TAG: &'static [u8; 1] = b"u"; +pub const LIST_TAG: &'static [u8; 1] = b"l"; +pub const DICT_TAG: &'static [u8; 1] = b"d"; + +#[cfg(feature = "octet-strings")] +pub const OCTET_TAG: &'static [u8; 1] = b"o"; + +macro_rules! objecthash_digest { + ($hasher:expr, $tag:expr, $bytes:expr) => { + $hasher.update($tag); + $hasher.update($bytes); + }; +} + +impl ObjectHash for Vec { + #[inline] + fn objecthash(&self, hasher: &mut H) { + hasher.update(LIST_TAG); + + for value in self { + hasher.update_nested(|h| value.objecthash(h)); + } + } +} + +impl ObjectHash for HashMap + where K: ObjectHash + Eq + std::hash::Hash, + V: ObjectHash + PartialEq, + S: std::hash::BuildHasher +{ + #[inline] + fn objecthash(&self, hasher: &mut H) { + hasher.update(DICT_TAG); + + let mut digests: Vec> = self.iter() + .map(|(k, v)| objecthash_member!(k => v)) + .collect(); + + digests.sort(); + + for value in &digests { + hasher.update(value); + } + } +} + +impl ObjectHash for str { + #[inline] + fn objecthash(&self, hasher: &mut H) { + let normalized = self.nfc().collect::(); + objecthash_digest!(hasher, STRING_TAG, normalized.as_bytes()); + } +} + +impl ObjectHash for String { + #[inline] + fn objecthash(&self, hasher: &mut H) { + let normalized = self.nfc().collect::(); + objecthash_digest!(hasher, STRING_TAG, normalized.as_bytes()); + } +} + +// Technically ObjectHash does not define a representation for binary data +// For now this is a non-standard extension of ObjectHash +#[cfg(feature = "octet-strings")] +impl ObjectHash for [u8] { + #[inline] + fn objecthash(&self, hasher: &mut H) { + objecthash_digest!(hasher, OCTET_TAG, self); + } +} + +macro_rules! impl_inttype (($inttype:ident) => ( + impl ObjectHash for $inttype { + #[inline] + fn objecthash(&self, hasher: &mut H) { + objecthash_digest!(hasher, INTEGER_TAG, self.to_string().as_bytes()); + } + } +)); + +impl_inttype!(i8); +impl_inttype!(i16); +impl_inttype!(i32); +impl_inttype!(i64); +impl_inttype!(u8); +impl_inttype!(u16); +impl_inttype!(u32); +impl_inttype!(u64); +impl_inttype!(isize); +impl_inttype!(usize); + +#[cfg(test)] +#[cfg(feature = "objecthash-ring")] +mod tests { + use std::collections::HashMap; + + use {hasher, ObjectHash, ObjectHasher}; + use rustc_serialize::hex::ToHex; + + macro_rules! h { + ($value:expr) => { + { + let mut hasher = hasher::default(); + $value.objecthash(&mut hasher); + hasher.finish().as_ref().to_hex() + } + }; + } + + #[test] + fn integers() { + assert_eq!(h!(-1), "f105b11df43d5d321f5c773ef904af979024887b4d2b0fab699387f59e2ff01e"); + assert_eq!(h!(0), "a4e167a76a05add8a8654c169b07b0447a916035aef602df103e8ae0fe2ff390"); + assert_eq!(h!(10), "73f6128db300f3751f2e509545be996d162d20f9e030864632f85e34fd0324ce"); + assert_eq!(h!(1000), "a3346d18105ef801c3598fec426dcc5d4be9d0374da5343f6c8dcbdf24cb8e0b"); + + assert_eq!(h!(-1 as i8), "f105b11df43d5d321f5c773ef904af979024887b4d2b0fab699387f59e2ff01e"); + assert_eq!(h!(-1 as i16), "f105b11df43d5d321f5c773ef904af979024887b4d2b0fab699387f59e2ff01e"); + assert_eq!(h!(-1 as i32), "f105b11df43d5d321f5c773ef904af979024887b4d2b0fab699387f59e2ff01e"); + assert_eq!(h!(-1 as i64), "f105b11df43d5d321f5c773ef904af979024887b4d2b0fab699387f59e2ff01e"); + assert_eq!(h!(-1 as isize), "f105b11df43d5d321f5c773ef904af979024887b4d2b0fab699387f59e2ff01e"); + + assert_eq!(h!(10 as u8), "73f6128db300f3751f2e509545be996d162d20f9e030864632f85e34fd0324ce"); + assert_eq!(h!(10 as u16), "73f6128db300f3751f2e509545be996d162d20f9e030864632f85e34fd0324ce"); + assert_eq!(h!(10 as u32), "73f6128db300f3751f2e509545be996d162d20f9e030864632f85e34fd0324ce"); + assert_eq!(h!(10 as u64), "73f6128db300f3751f2e509545be996d162d20f9e030864632f85e34fd0324ce"); + assert_eq!(h!(10 as usize), "73f6128db300f3751f2e509545be996d162d20f9e030864632f85e34fd0324ce"); + } + + #[test] + fn strings() { + let u1n = "\u{03D3}"; + let u1d = "\u{03D2}\u{0301}"; + + let digest = "f72826713a01881404f34975447bd6edcb8de40b191dc57097ebf4f5417a554d"; + assert_eq!(h!(u1n), digest); + assert_eq!(h!(&u1d), digest); + + assert_eq!(h!("ԱԲաբ"), "2a2a4485a4e338d8df683971956b1090d2f5d33955a81ecaad1a75125f7a316c"); + assert_eq!(h!(String::from("ԱԲաբ")), "2a2a4485a4e338d8df683971956b1090d2f5d33955a81ecaad1a75125f7a316c"); + } + + #[test] + fn vectors() { + assert_eq!(h!(vec![123]), "1b93f704451e1a7a1b8c03626ffcd6dec0bc7ace947ff60d52e1b69b4658ccaa"); + assert_eq!(h!(vec![1, 2, 3]), "157bf16c70bd4c9673ffb5030552df0ee2c40282042ccdf6167850edc9044ab7"); + assert_eq!(h!(vec![123456789012345u64]), "3488b9bc37cce8223a032760a9d4ef488cdfebddd9e1af0b31fcd1d7006369a4"); + assert_eq!(h!(vec![123456789012345u64, 678901234567890u64]), "031ef1aaeccea3bced3a1c6237a4fc00ed4d629c9511922c5a3f4e5c128b0ae4"); + } + + #[test] + fn hashmaps() { + { + let hashmap: HashMap = HashMap::new(); + assert_eq!(h!(hashmap), "18ac3e7343f016890c510e93f935261169d9e3f565436429830faf0934f4f8e4"); + } + + { + let mut hashmap = HashMap::new(); + hashmap.insert(String::from("foo"), 1); + assert_eq!(h!(hashmap), "bf4c58f5e308e31e2cd64bdbf7a01b9b595a13602438be5e912c7d94f6d8177a"); + } + } + + #[test] + fn hashmap_ordering() { + { + let mut hashmap = HashMap::new(); + hashmap.insert(String::from("k1"), String::from("v1")); + hashmap.insert(String::from("k2"), String::from("v2")); + hashmap.insert(String::from("k3"), String::from("v3")); + + assert_eq!(h!(hashmap), "ddd65f1f7568269a30df7cafc26044537dc2f02a1a0d830da61762fc3e687057"); + } + + { + let mut hashmap = HashMap::new(); + hashmap.insert(String::from("k2"), String::from("v2")); + hashmap.insert(String::from("k1"), String::from("v1")); + hashmap.insert(String::from("k3"), String::from("v3")); + + assert_eq!(h!(hashmap), "ddd65f1f7568269a30df7cafc26044537dc2f02a1a0d830da61762fc3e687057"); + } + } +}