Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
File renamed without changes.
File renamed without changes.
157 changes: 89 additions & 68 deletions objecthash.go
Original file line number Diff line number Diff line change
@@ -1,100 +1,114 @@
package objecthash

import "bytes"
import "crypto/sha256"
import "encoding/json"
import "fmt"
import "sort"
import (
"bytes"
"crypto/sha256"
"encoding/json"
"errors"
"fmt"
"sort"
)

var (
ErrNormalizingFloat = errors.New("ErrNormalizingFloat")
ErrUnrecognizedObjectType = errors.New("ErrUnrecognizedObjectType")
)

//import "golang.org/x/text/unicode/norm"

const hashLength int = sha256.Size

func hash(t string, b []byte) [hashLength]byte {
//fmt.Printf("%x %x\n", []byte(t), b)
func hash(t byte, b []byte) []byte {
h := sha256.New()
h.Write([]byte(t))
h.Write([]byte{t})
h.Write(b)
// FIXME: Seriously, WTF?
var r []byte
r = h.Sum(r)
var rr [hashLength]byte
copy(rr[:], r)
//fmt.Printf("= %x\n", rr)
return rr;
return h.Sum(nil)
}

// FIXME: if What You Hash Is What You Get, then this needs to be safe
// to use as a set.
// Note: not actually safe to use as a set
type Set []interface{}

type sortableHashes [][hashLength]byte
func (h sortableHashes) Len() int { return len(h) }
func (h sortableHashes) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
func (h sortableHashes) Less(i, j int) bool { return bytes.Compare(h[i][:], h[j][:]) < 0 }
type sortableHashes [][]byte

func (h sortableHashes) Len() int { return len(h) }
func (h sortableHashes) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
func (h sortableHashes) Less(i, j int) bool { return bytes.Compare(h[i], h[j]) < 0 }

func hashSet(s Set) [hashLength]byte {
h := make([][hashLength]byte, len(s))
func hashSet(s Set) ([]byte, error) {
h := make([][]byte, len(s))
for n, e := range s {
h[n] = ObjectHash(e)
var err error
if h[n], err = ObjectHash(e); err != nil {
return nil, err
}
}
sort.Sort(sortableHashes(h))
b := new(bytes.Buffer)
var prev [hashLength]byte
var prev []byte
for _, hh := range h {
if hh != prev {
b.Write(hh[:])
if !bytes.Equal(hh, prev) {
b.Write(hh)
}
prev = hh
}
return hash(`s`, b.Bytes())
return hash('s', b.Bytes()), nil
}

func hashList(l []interface{}) [hashLength]byte {
func hashList(l []interface{}) ([]byte, error) {
h := new(bytes.Buffer)
for _, o := range l {
b := ObjectHash(o)
h.Write(b[:])
var b []byte
var err error
if b, err = ObjectHash(o); err != nil {
return nil, err
}
h.Write(b)
}
return hash(`l`, h.Bytes())
return hash('l', h.Bytes()), nil
}

func hashUnicode(s string) [hashLength]byte {
func hashUnicode(s string) ([]byte, error) {
//return hash(`u`, norm.NFC.Bytes([]byte(s)))
return hash(`u`, []byte(s))
return hash('u', []byte(s)), nil
}

type hashEntry struct {
khash [hashLength]byte
vhash [hashLength]byte
khash []byte
vhash []byte
}
type byKHash []hashEntry
func (h byKHash) Len() int { return len(h) }

func (h byKHash) Len() int { return len(h) }
func (h byKHash) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
func (h byKHash) Less(i, j int) bool { return bytes.Compare(h[i].khash[:],
h[j].khash[:]) < 0 }
func (h byKHash) Less(i, j int) bool {
return bytes.Compare(h[i].khash, h[j].khash) < 0
}

func hashDict(d map[string]interface {}) [hashLength]byte {
func hashDict(d map[string]interface{}) ([]byte, error) {
e := make([]hashEntry, len(d))
n := 0
for k, v := range d {
e[n].khash = ObjectHash(k)
e[n].vhash = ObjectHash(v)
var err error
if e[n].khash, err = ObjectHash(k); err != nil {
return nil, err
}
if e[n].vhash, err = ObjectHash(v); err != nil {
return nil, err
}
n++
}
sort.Sort(byKHash(e))
h := new(bytes.Buffer)
for _, ee := range e {
h.Write(ee.khash[:])
h.Write(ee.vhash[:])
h.Write(ee.khash)
h.Write(ee.vhash)
}
return hash(`d`, h.Bytes())
return hash('d', h.Bytes()), nil
}

func floatNormalize(f float64) (s string) {
func floatNormalize(f float64) (string, error) {
// sign
s = `+`
s := `+`
if f < 0 {
s = `-`
f = -f
Expand All @@ -112,7 +126,7 @@ func floatNormalize(f float64) (s string) {
s += fmt.Sprintf("%d:", e)
// mantissa
if f > 1 || f <= .5 {
panic(f)
return "", ErrNormalizingFloat
}
for f != 0 {
if f >= 1 {
Expand All @@ -121,60 +135,67 @@ func floatNormalize(f float64) (s string) {
} else {
s += `0`
}
if (f >= 1) {
panic(f)
if f >= 1 {
return "", ErrNormalizingFloat
}
if (len(s) >= 1000) {
panic(s)
if len(s) >= 1000 {
return "", ErrNormalizingFloat
}
f *= 2
}
return
return s, nil
}

func hashFloat(f float64) [hashLength]byte {
return hash(`f`, []byte(floatNormalize(f)))
func hashFloat(f float64) ([]byte, error) {
var n string
var err error
if n, err = floatNormalize(f); err != nil {
return nil, err
}
return hash('f', []byte(n)), nil
}

func hashInt(i int) [hashLength]byte {
return hash(`i`, []byte(fmt.Sprintf("%d", i)))
func hashInt(i int) ([]byte, error) {
return hash('i', []byte(fmt.Sprintf("%d", i))), nil
}

func hashBool(b bool) [hashLength]byte {
bb := []byte(`0`)
func hashBool(b bool) ([]byte, error) {
var bb []byte
if b {
bb = []byte(`1`)
bb = []byte{'1'}
} else {
bb = []byte{'0'}
}
return hash(`b`, bb)
return hash('b', bb), nil
}

func ObjectHash(o interface{}) [hashLength]byte {
func ObjectHash(o interface{}) ([]byte, error) {
switch v := o.(type) {
case []interface{}:
return hashList(v)
case string:
return hashUnicode(v)
case map[string]interface {}:
case map[string]interface{}:
return hashDict(v)
case float64:
return hashFloat(v)
case nil:
return hash(`n`, []byte(``))
return hash('n', nil), nil
case int:
return hashInt(v)
case Set:
return hashSet(v)
case bool:
return hashBool(v)
default:
panic(o)
return nil, ErrUnrecognizedObjectType
}
}

func CommonJSONHash(j string) [hashLength]byte {
func CommonJSONHash(j []byte) ([]byte, error) {
var f interface{}
if err := json.Unmarshal([]byte(j), &f); err != nil {
panic(err)
if err := json.Unmarshal(j, &f); err != nil {
return nil, err
}
return ObjectHash(f)
}
14 changes: 9 additions & 5 deletions objecthash_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ import "os"
import "testing"

func commonJSON(j string) {
fmt.Printf("%x\n", CommonJSONHash(j))
h, _ := CommonJSONHash([]byte(j))
fmt.Printf("%x\n", h)
}

func ExampleCommonJSONHash_Common() {
Expand All @@ -16,7 +17,7 @@ func ExampleCommonJSONHash_Common() {

func ExampleCommonJSONHash_FloatAndInt() {
commonJSON(`["foo", {"bar":["baz", null, 1.0, 1.5, 0.0001, 1000.0, 2.0, -23.1234, 2.0]}]`)
// Integers and floats are the same in common JSON
// Integers and floats are the same in common JSON
commonJSON(`["foo", {"bar":["baz", null, 1, 1.5, 0.0001, 1000, 2, -23.1234, 2]}]`)
// Output:
// 783a423b094307bcb28d005bc2f026ff44204442ef3513585e7e73b66e3c2213
Expand All @@ -35,6 +36,7 @@ func ExampleCommonJSONHash_KeyOrderIndependence() {
// ddd65f1f7568269a30df7cafc26044537dc2f02a1a0d830da61762fc3e687057
// ddd65f1f7568269a30df7cafc26044537dc2f02a1a0d830da61762fc3e687057
}

/*
func ExampleCommonJSONHash_UnicodeNormalisation() {
commonJSON("\"\u03d3\"")
Expand All @@ -45,7 +47,8 @@ func ExampleCommonJSONHash_UnicodeNormalisation() {
}
*/
func objectHash(o interface{}) {
fmt.Printf("%x\n", ObjectHash(o))
h, _ := ObjectHash(o)
fmt.Printf("%x\n", h)
}

func ExampleObjectHash_JSON() {
Expand All @@ -68,7 +71,7 @@ func ExampleObjectHash_JSON2() {
}

func ExampleObjectHash_Set() {
o := map[string]interface{}{`thing1`: map[string]interface{}{`thing2`: Set{1, 2, `s`}}, `thing3`: 1234.567 }
o := map[string]interface{}{`thing1`: map[string]interface{}{`thing2`: Set{1, 2, `s`}}, `thing3`: 1234.567}
objectHash(o)
// Output: 618cf0582d2e716a70e99c2f3079d74892fec335e3982eb926835967cb0c246c
}
Expand Down Expand Up @@ -109,7 +112,8 @@ func TestGolden(t *testing.T) {
return
}
h := s.Text()
hh := fmt.Sprintf("%x", CommonJSONHash(j))
jh, _ := CommonJSONHash([]byte(j))
hh := fmt.Sprintf("%x", jh)
if h != hh {
t.Errorf("Got %s expected %s", hh, h)
}
Expand Down