From e153340e5a356528261d9360b97d4ad0253e821c Mon Sep 17 00:00:00 2001 From: dignifiedquire Date: Thu, 3 May 2018 15:12:25 +0200 Subject: [PATCH 01/18] feat: use CIDs as their byte representation instead of a struct --- builder.go | 6 +-- cid.go | 146 ++++++++++++++++++++++++++-------------------------- cid_fuzz.go | 2 +- cid_test.go | 62 +++++++++++++--------- set.go | 14 ++--- set_test.go | 14 ++--- 6 files changed, 127 insertions(+), 117 deletions(-) diff --git a/builder.go b/builder.go index af33aac..5dea278 100644 --- a/builder.go +++ b/builder.go @@ -5,7 +5,7 @@ import ( ) type Builder interface { - Sum(data []byte) (*Cid, error) + Sum(data []byte) (Cid, error) GetCodec() uint64 WithCodec(uint64) Builder } @@ -33,7 +33,7 @@ func (p Prefix) WithCodec(c uint64) Builder { return p } -func (p V0Builder) Sum(data []byte) (*Cid, error) { +func (p V0Builder) Sum(data []byte) (Cid, error) { hash, err := mh.Sum(data, mh.SHA2_256, -1) if err != nil { return nil, err @@ -52,7 +52,7 @@ func (p V0Builder) WithCodec(c uint64) Builder { return V1Builder{Codec: c, MhType: mh.SHA2_256} } -func (p V1Builder) Sum(data []byte) (*Cid, error) { +func (p V1Builder) Sum(data []byte) (Cid, error) { mhLen := p.MhLength if mhLen <= 0 { mhLen = -1 diff --git a/cid.go b/cid.go index 6bf2aa5..9e1b950 100644 --- a/cid.go +++ b/cid.go @@ -132,36 +132,53 @@ var CodecToStr = map[uint64]string{ // They exist to allow IPFS to work with Cids while keeping // compatibility with the plain-multihash format used used in IPFS. // NewCidV1 should be used preferentially. -func NewCidV0(mhash mh.Multihash) *Cid { - return &Cid{ - version: 0, - codec: DagProtobuf, - hash: mhash, - } +func NewCidV0(mhash mh.Multihash) Cid { + return newCid(0, DagProtobuf, mhash) } // NewCidV1 returns a new Cid using the given multicodec-packed // content type. -func NewCidV1(codecType uint64, mhash mh.Multihash) *Cid { - return &Cid{ - version: 1, - codec: codecType, - hash: mhash, +func NewCidV1(codecType uint64, mhash mh.Multihash) Cid { + return newCid(1, codecType, mhash) +} + +func newCid(version, codecType uint64, mhash mh.Multihash) Cid { + hashlen := len(mhash) + // two 8 bytes (max) numbers plus hash + buf := make([]byte, 2*binary.MaxVarintLen64+hashlen) + n := binary.PutUvarint(buf, version) + n += binary.PutUvarint(buf[n:], codecType) + cn := copy(buf[n:], mhash) + if cn != hashlen { + panic("copy hash length is inconsistent") } + + return Cid(buf[:n+hashlen]) } // Cid represents a self-describing content adressed // identifier. It is formed by a Version, a Codec (which indicates // a multicodec-packed content type) and a Multihash. -type Cid struct { - version uint64 - codec uint64 - hash mh.Multihash +// Byte layout: [version, codec, multihash] +// - version uvarint +// - codec uvarint +// - hash mh.Multihash +type Cid []byte + +func (c Cid) version() uint64 { + v, _ := binary.Uvarint(c) + return v +} + +func (c Cid) codec() uint64 { + _, n := binary.Uvarint(c) + codec, _ := binary.Uvarint(c[n:]) + return codec } // Parse is a short-hand function to perform Decode, Cast etc... on // a generic interface{} type. -func Parse(v interface{}) (*Cid, error) { +func Parse(v interface{}) (Cid, error) { switch v2 := v.(type) { case string: if strings.Contains(v2, "/ipfs/") { @@ -172,7 +189,7 @@ func Parse(v interface{}) (*Cid, error) { return Cast(v2) case mh.Multihash: return NewCidV0(v2), nil - case *Cid: + case Cid: return v2, nil default: return nil, fmt.Errorf("can't parse %+v as Cid", v2) @@ -191,7 +208,7 @@ func Parse(v interface{}) (*Cid, error) { // Decode will also detect and parse CidV0 strings. Strings // starting with "Qm" are considered CidV0 and treated directly // as B58-encoded multihashes. -func Decode(v string) (*Cid, error) { +func Decode(v string) (Cid, error) { if len(v) < 2 { return nil, ErrCidTooShort } @@ -257,18 +274,14 @@ func uvError(read int) error { // // Please use decode when parsing a regular Cid string, as Cast does not // expect multibase-encoded data. Cast accepts the output of Cid.Bytes(). -func Cast(data []byte) (*Cid, error) { +func Cast(data []byte) (Cid, error) { if len(data) == 34 && data[0] == 18 && data[1] == 32 { h, err := mh.Cast(data) if err != nil { return nil, err } - return &Cid{ - codec: DagProtobuf, - version: 0, - hash: h, - }, nil + return NewCidV0(h), nil } vers, n := binary.Uvarint(data) @@ -280,7 +293,7 @@ func Cast(data []byte) (*Cid, error) { return nil, fmt.Errorf("invalid cid version number: %d", vers) } - codec, cn := binary.Uvarint(data[n:]) + _, cn := binary.Uvarint(data[n:]) if err := uvError(cn); err != nil { return nil, err } @@ -291,25 +304,21 @@ func Cast(data []byte) (*Cid, error) { return nil, err } - return &Cid{ - version: vers, - codec: codec, - hash: h, - }, nil + return Cid(data[0 : n+cn+len(h)]), nil } // Type returns the multicodec-packed content type of a Cid. -func (c *Cid) Type() uint64 { - return c.codec +func (c Cid) Type() uint64 { + return c.codec() } // String returns the default string representation of a // Cid. Currently, Base58 is used as the encoding for the // multibase string. -func (c *Cid) String() string { - switch c.version { +func (c Cid) String() string { + switch c.version() { case 0: - return c.hash.B58String() + return c.Hash().B58String() case 1: mbstr, err := mbase.Encode(mbase.Base58BTC, c.bytesV1()) if err != nil { @@ -324,13 +333,13 @@ func (c *Cid) String() string { // String returns the string representation of a Cid // encoded is selected base -func (c *Cid) StringOfBase(base mbase.Encoding) (string, error) { - switch c.version { +func (c Cid) StringOfBase(base mbase.Encoding) (string, error) { + switch c.version() { case 0: if base != mbase.Base58BTC { return "", ErrInvalidEncoding } - return c.hash.B58String(), nil + return c.Hash().B58String(), nil case 1: return mbase.Encode(base, c.bytesV1()) default: @@ -341,10 +350,10 @@ func (c *Cid) StringOfBase(base mbase.Encoding) (string, error) { // Encode return the string representation of a Cid in a given base // when applicable. Version 0 Cid's are always in Base58 as they do // not take a multibase prefix. -func (c *Cid) Encode(base mbase.Encoder) string { - switch c.version { +func (c Cid) Encode(base mbase.Encoder) string { + switch c.version() { case 0: - return c.hash.B58String() + return c.Hash().B58String() case 1: return base.Encode(c.bytesV1()) default: @@ -353,15 +362,20 @@ func (c *Cid) Encode(base mbase.Encoder) string { } // Hash returns the multihash contained by a Cid. -func (c *Cid) Hash() mh.Multihash { - return c.hash +func (c Cid) Hash() mh.Multihash { + // skip version length + _, n1 := binary.Uvarint(c) + // skip codec length + _, n2 := binary.Uvarint(c[n1:]) + + return mh.Multihash(c[n1+n2:]) } // Bytes returns the byte representation of a Cid. // The output of bytes can be parsed back into a Cid // with Cast(). -func (c *Cid) Bytes() []byte { - switch c.version { +func (c Cid) Bytes() []byte { + switch c.version() { case 0: return c.bytesV0() case 1: @@ -371,30 +385,19 @@ func (c *Cid) Bytes() []byte { } } -func (c *Cid) bytesV0() []byte { - return []byte(c.hash) +func (c Cid) bytesV0() []byte { + return []byte(c.Hash()) } -func (c *Cid) bytesV1() []byte { - // two 8 bytes (max) numbers plus hash - buf := make([]byte, 2*binary.MaxVarintLen64+len(c.hash)) - n := binary.PutUvarint(buf, c.version) - n += binary.PutUvarint(buf[n:], c.codec) - cn := copy(buf[n:], c.hash) - if cn != len(c.hash) { - panic("copy hash length is inconsistent") - } - - return buf[:n+len(c.hash)] +func (c Cid) bytesV1() []byte { + return []byte(c) } // Equals checks that two Cids are the same. // In order for two Cids to be considered equal, the // Version, the Codec and the Multihash must match. -func (c *Cid) Equals(o *Cid) bool { - return c.codec == o.codec && - c.version == o.version && - bytes.Equal(c.hash, o.hash) +func (c Cid) Equals(o Cid) bool { + return bytes.Equal(c, o) } // UnmarshalJSON parses the JSON representation of a Cid. @@ -419,9 +422,8 @@ func (c *Cid) UnmarshalJSON(b []byte) error { return err } - c.version = out.version - c.hash = out.hash - c.codec = out.codec + *c = out[:] + return nil } @@ -436,26 +438,26 @@ func (c Cid) MarshalJSON() ([]byte, error) { } // KeyString casts the result of cid.Bytes() as a string, and returns it. -func (c *Cid) KeyString() string { +func (c Cid) KeyString() string { return string(c.Bytes()) } // Loggable returns a Loggable (as defined by // https://godoc.org/github.com/ipfs/go-log). -func (c *Cid) Loggable() map[string]interface{} { +func (c Cid) Loggable() map[string]interface{} { return map[string]interface{}{ "cid": c, } } // Prefix builds and returns a Prefix out of a Cid. -func (c *Cid) Prefix() Prefix { - dec, _ := mh.Decode(c.hash) // assuming we got a valid multiaddr, this will not error +func (c Cid) Prefix() Prefix { + dec, _ := mh.Decode(c.Hash()) // assuming we got a valid multiaddr, this will not error return Prefix{ MhType: dec.Code, MhLength: dec.Length, - Version: c.version, - Codec: c.codec, + Version: c.version(), + Codec: c.codec(), } } @@ -474,7 +476,7 @@ type Prefix struct { // Sum uses the information in a prefix to perform a multihash.Sum() // and return a newly constructed Cid with the resulting multihash. -func (p Prefix) Sum(data []byte) (*Cid, error) { +func (p Prefix) Sum(data []byte) (Cid, error) { hash, err := mh.Sum(data, p.MhType, p.MhLength) if err != nil { return nil, err diff --git a/cid_fuzz.go b/cid_fuzz.go index 357e907..99842b5 100644 --- a/cid_fuzz.go +++ b/cid_fuzz.go @@ -23,7 +23,7 @@ func Fuzz(data []byte) int { if err != nil { panic(err.Error()) } - cid2 := &Cid{} + cid2 := Cid{} err = cid2.UnmarshalJSON(json) if err != nil { panic(err.Error()) diff --git a/cid_test.go b/cid_test.go index 97294c0..0b7f343 100644 --- a/cid_test.go +++ b/cid_test.go @@ -37,16 +37,16 @@ var tCodecs = map[uint64]string{ DecredTx: "decred-tx", } -func assertEqual(t *testing.T, a, b *Cid) { - if a.codec != b.codec { +func assertEqual(t *testing.T, a, b Cid) { + if a.codec() != b.codec() { t.Fatal("mismatch on type") } - if a.version != b.version { + if a.version() != b.version() { t.Fatal("mismatch on version") } - if !bytes.Equal(a.hash, b.hash) { + if !bytes.Equal(a.Hash(), b.Hash()) { t.Fatal("multihash mismatch") } } @@ -77,11 +77,7 @@ func TestBasicMarshaling(t *testing.T) { t.Fatal(err) } - cid := &Cid{ - codec: 7, - version: 1, - hash: h, - } + cid := newCid(1, 7, h) data := cid.Bytes() @@ -107,11 +103,7 @@ func TestBasesMarshaling(t *testing.T) { t.Fatal(err) } - cid := &Cid{ - codec: 7, - version: 1, - hash: h, - } + cid := newCid(1, 7, h) data := cid.Bytes() @@ -179,12 +171,12 @@ func TestV0Handling(t *testing.T) { t.Fatal(err) } - if cid.version != 0 { + if cid.version() != 0 { t.Fatal("should have gotten version 0 cid") } - if cid.hash.B58String() != old { - t.Fatal("marshaling roundtrip failed") + if cid.Hash().B58String() != old { + t.Fatalf("marshaling roundtrip failed: %s != %s", cid.Hash().B58String(), old) } if cid.String() != old { @@ -306,9 +298,7 @@ func TestPrefixRoundtrip(t *testing.T) { func Test16BytesVarint(t *testing.T) { data := []byte("this is some test content") hash, _ := mh.Sum(data, mh.SHA2_256, -1) - c := NewCidV1(DagCBOR, hash) - - c.codec = 1 << 63 + c := newCid(1, 1<<63, hash) _ = c.Bytes() } @@ -351,8 +341,8 @@ func TestParse(t *testing.T) { if err != nil { return err } - if cid.version != 0 { - return fmt.Errorf("expected version 0, got %s", string(cid.version)) + if cid.version() != 0 { + return fmt.Errorf("expected version 0, got %s", string(cid.version())) } actual := cid.Hash().B58String() if actual != expected { @@ -424,18 +414,18 @@ func TestJsonRoundTrip(t *testing.T) { } var actual Cid err = json.Unmarshal(enc, &actual) - if !exp.Equals(&actual) { + if !exp.Equals(actual) { t.Fatal("cids not equal for *Cid") } // Verify it works for a Cid. - enc, err = json.Marshal(*exp) + enc, err = json.Marshal(exp) if err != nil { t.Fatal(err) } var actual2 Cid err = json.Unmarshal(enc, &actual2) - if !exp.Equals(&actual2) { + if !exp.Equals(actual2) { t.Fatal("cids not equal for Cid") } } @@ -444,7 +434,10 @@ func BenchmarkStringV1(b *testing.B) { data := []byte("this is some test content") hash, _ := mh.Sum(data, mh.SHA2_256, -1) cid := NewCidV1(Raw, hash) + + b.ReportAllocs() b.ResetTimer() + count := 0 for i := 0; i < b.N; i++ { count += len(cid.String()) @@ -453,3 +446,22 @@ func BenchmarkStringV1(b *testing.B) { b.FailNow() } } + +// making sure we don't allocate when returning bytes +func BenchmarkBytesV1(b *testing.B) { + data := []byte("this is some test content") + hash, _ := mh.Sum(data, mh.SHA2_256, -1) + cid := NewCidV1(Raw, hash) + + b.ReportAllocs() + b.ResetTimer() + + count := 0 + for i := 0; i < b.N; i++ { + count += len(cid.Bytes()) + count += len([]byte(cid)) + } + if count != 36*2*b.N { + b.FailNow() + } +} diff --git a/set.go b/set.go index 7f68657..9d83ced 100644 --- a/set.go +++ b/set.go @@ -12,18 +12,18 @@ func NewSet() *Set { } // Add puts a Cid in the Set. -func (s *Set) Add(c *Cid) { +func (s *Set) Add(c Cid) { s.set[string(c.Bytes())] = struct{}{} } // Has returns if the Set contains a given Cid. -func (s *Set) Has(c *Cid) bool { +func (s *Set) Has(c Cid) bool { _, ok := s.set[string(c.Bytes())] return ok } // Remove deletes a Cid from the Set. -func (s *Set) Remove(c *Cid) { +func (s *Set) Remove(c Cid) { delete(s.set, string(c.Bytes())) } @@ -33,8 +33,8 @@ func (s *Set) Len() int { } // Keys returns the Cids in the set. -func (s *Set) Keys() []*Cid { - out := make([]*Cid, 0, len(s.set)) +func (s *Set) Keys() []Cid { + out := make([]Cid, 0, len(s.set)) for k := range s.set { c, _ := Cast([]byte(k)) out = append(out, c) @@ -44,7 +44,7 @@ func (s *Set) Keys() []*Cid { // Visit adds a Cid to the set only if it is // not in it already. -func (s *Set) Visit(c *Cid) bool { +func (s *Set) Visit(c Cid) bool { if !s.Has(c) { s.Add(c) return true @@ -55,7 +55,7 @@ func (s *Set) Visit(c *Cid) bool { // ForEach allows to run a custom function on each // Cid in the set. -func (s *Set) ForEach(f func(c *Cid) error) error { +func (s *Set) ForEach(f func(c Cid) error) error { for cs := range s.set { c, _ := Cast([]byte(cs)) err := f(c) diff --git a/set_test.go b/set_test.go index 38d7317..fa553d0 100644 --- a/set_test.go +++ b/set_test.go @@ -8,7 +8,7 @@ import ( mh "github.com/multiformats/go-multihash" ) -func makeRandomCid(t *testing.T) *Cid { +func makeRandomCid(t *testing.T) Cid { p := make([]byte, 256) _, err := rand.Read(p) if err != nil { @@ -20,11 +20,7 @@ func makeRandomCid(t *testing.T) *Cid { t.Fatal(err) } - cid := &Cid{ - codec: 7, - version: 1, - hash: h, - } + cid := NewCidV1(7, h) return cid } @@ -54,8 +50,8 @@ func TestSet(t *testing.T) { t.Error("visit should return false") } - foreach := []*Cid{} - foreachF := func(c *Cid) error { + foreach := []Cid{} + foreachF := func(c Cid) error { foreach = append(foreach, c) return nil } @@ -68,7 +64,7 @@ func TestSet(t *testing.T) { t.Error("ForEach should have visited 1 element") } - foreachErr := func(c *Cid) error { + foreachErr := func(c Cid) error { return errors.New("test") } From 92496b54940193fe7f121ef88c6e9c4c3a034e17 Mon Sep 17 00:00:00 2001 From: dignifiedquire Date: Thu, 3 May 2018 19:26:57 +0200 Subject: [PATCH 02/18] use string instead of []byte as underlying store --- builder.go | 4 ++-- cid.go | 43 ++++++++++++++++++++++++------------------- set.go | 13 ++++++------- 3 files changed, 32 insertions(+), 28 deletions(-) diff --git a/builder.go b/builder.go index 5dea278..661bcc3 100644 --- a/builder.go +++ b/builder.go @@ -36,7 +36,7 @@ func (p Prefix) WithCodec(c uint64) Builder { func (p V0Builder) Sum(data []byte) (Cid, error) { hash, err := mh.Sum(data, mh.SHA2_256, -1) if err != nil { - return nil, err + return EmptyCid, err } return NewCidV0(hash), nil } @@ -59,7 +59,7 @@ func (p V1Builder) Sum(data []byte) (Cid, error) { } hash, err := mh.Sum(data, p.MhType, mhLen) if err != nil { - return nil, err + return EmptyCid, err } return NewCidV1(p.Codec, hash), nil } diff --git a/cid.go b/cid.go index 9e1b950..72624ec 100644 --- a/cid.go +++ b/cid.go @@ -163,16 +163,19 @@ func newCid(version, codecType uint64, mhash mh.Multihash) Cid { // - version uvarint // - codec uvarint // - hash mh.Multihash -type Cid []byte +type Cid string + +var EmptyCid = Cid(string([]byte{})) func (c Cid) version() uint64 { - v, _ := binary.Uvarint(c) + v, _ := binary.Uvarint([]byte(c)) return v } func (c Cid) codec() uint64 { - _, n := binary.Uvarint(c) - codec, _ := binary.Uvarint(c[n:]) + bytes := []byte(c) + _, n := binary.Uvarint(bytes) + codec, _ := binary.Uvarint(bytes[n:]) return codec } @@ -192,7 +195,7 @@ func Parse(v interface{}) (Cid, error) { case Cid: return v2, nil default: - return nil, fmt.Errorf("can't parse %+v as Cid", v2) + return EmptyCid, fmt.Errorf("can't parse %+v as Cid", v2) } } @@ -210,13 +213,13 @@ func Parse(v interface{}) (Cid, error) { // as B58-encoded multihashes. func Decode(v string) (Cid, error) { if len(v) < 2 { - return nil, ErrCidTooShort + return EmptyCid, ErrCidTooShort } if len(v) == 46 && v[:2] == "Qm" { hash, err := mh.FromB58String(v) if err != nil { - return nil, err + return EmptyCid, err } return NewCidV0(hash), nil @@ -224,7 +227,7 @@ func Decode(v string) (Cid, error) { _, data, err := mbase.Decode(v) if err != nil { - return nil, err + return EmptyCid, err } return Cast(data) @@ -278,7 +281,7 @@ func Cast(data []byte) (Cid, error) { if len(data) == 34 && data[0] == 18 && data[1] == 32 { h, err := mh.Cast(data) if err != nil { - return nil, err + return EmptyCid, err } return NewCidV0(h), nil @@ -286,22 +289,22 @@ func Cast(data []byte) (Cid, error) { vers, n := binary.Uvarint(data) if err := uvError(n); err != nil { - return nil, err + return EmptyCid, err } if vers != 0 && vers != 1 { - return nil, fmt.Errorf("invalid cid version number: %d", vers) + return EmptyCid, fmt.Errorf("invalid cid version number: %d", vers) } _, cn := binary.Uvarint(data[n:]) if err := uvError(cn); err != nil { - return nil, err + return EmptyCid, err } rest := data[n+cn:] h, err := mh.Cast(rest) if err != nil { - return nil, err + return EmptyCid, err } return Cid(data[0 : n+cn+len(h)]), nil @@ -363,12 +366,13 @@ func (c Cid) Encode(base mbase.Encoder) string { // Hash returns the multihash contained by a Cid. func (c Cid) Hash() mh.Multihash { + bytes := []byte(c) // skip version length - _, n1 := binary.Uvarint(c) + _, n1 := binary.Uvarint(bytes) // skip codec length - _, n2 := binary.Uvarint(c[n1:]) + _, n2 := binary.Uvarint(bytes[n1:]) - return mh.Multihash(c[n1+n2:]) + return mh.Multihash(bytes[n1+n2:]) } // Bytes returns the byte representation of a Cid. @@ -397,7 +401,8 @@ func (c Cid) bytesV1() []byte { // In order for two Cids to be considered equal, the // Version, the Codec and the Multihash must match. func (c Cid) Equals(o Cid) bool { - return bytes.Equal(c, o) + // TODO: can we use regular string equality? + return bytes.Equal([]byte(c), []byte(o)) } // UnmarshalJSON parses the JSON representation of a Cid. @@ -479,7 +484,7 @@ type Prefix struct { func (p Prefix) Sum(data []byte) (Cid, error) { hash, err := mh.Sum(data, p.MhType, p.MhLength) if err != nil { - return nil, err + return EmptyCid, err } switch p.Version { @@ -488,7 +493,7 @@ func (p Prefix) Sum(data []byte) (Cid, error) { case 1: return NewCidV1(p.Codec, hash), nil default: - return nil, fmt.Errorf("invalid cid version") + return EmptyCid, fmt.Errorf("invalid cid version") } } diff --git a/set.go b/set.go index 9d83ced..f4015f3 100644 --- a/set.go +++ b/set.go @@ -3,28 +3,28 @@ package cid // Set is a implementation of a set of Cids, that is, a structure // to which holds a single copy of every Cids that is added to it. type Set struct { - set map[string]struct{} + set map[Cid]struct{} } // NewSet initializes and returns a new Set. func NewSet() *Set { - return &Set{set: make(map[string]struct{})} + return &Set{set: make(map[Cid]struct{})} } // Add puts a Cid in the Set. func (s *Set) Add(c Cid) { - s.set[string(c.Bytes())] = struct{}{} + s.set[c] = struct{}{} } // Has returns if the Set contains a given Cid. func (s *Set) Has(c Cid) bool { - _, ok := s.set[string(c.Bytes())] + _, ok := s.set[c] return ok } // Remove deletes a Cid from the Set. func (s *Set) Remove(c Cid) { - delete(s.set, string(c.Bytes())) + delete(s.set, c) } // Len returns how many elements the Set has. @@ -36,8 +36,7 @@ func (s *Set) Len() int { func (s *Set) Keys() []Cid { out := make([]Cid, 0, len(s.set)) for k := range s.set { - c, _ := Cast([]byte(k)) - out = append(out, c) + out = append(out, k) } return out } From 8009448a20591e767e6394be14fbde4b03820e54 Mon Sep 17 00:00:00 2001 From: dignifiedquire Date: Thu, 3 May 2018 19:30:05 +0200 Subject: [PATCH 03/18] fix KeyString() --- cid.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cid.go b/cid.go index 72624ec..099dd63 100644 --- a/cid.go +++ b/cid.go @@ -444,7 +444,7 @@ func (c Cid) MarshalJSON() ([]byte, error) { // KeyString casts the result of cid.Bytes() as a string, and returns it. func (c Cid) KeyString() string { - return string(c.Bytes()) + return string(c) } // Loggable returns a Loggable (as defined by From d7974d2277b030dab9c63a213c26c59c55656250 Mon Sep 17 00:00:00 2001 From: Kevin Atkinson Date: Sat, 25 Aug 2018 02:32:22 -0400 Subject: [PATCH 04/18] Export version() method, various other code cleanups. --- cid.go | 40 ++++++++++++++++++---------------------- cid_test.go | 29 +++++------------------------ 2 files changed, 23 insertions(+), 46 deletions(-) diff --git a/cid.go b/cid.go index 099dd63..12fa3bd 100644 --- a/cid.go +++ b/cid.go @@ -165,19 +165,7 @@ func newCid(version, codecType uint64, mhash mh.Multihash) Cid { // - hash mh.Multihash type Cid string -var EmptyCid = Cid(string([]byte{})) - -func (c Cid) version() uint64 { - v, _ := binary.Uvarint([]byte(c)) - return v -} - -func (c Cid) codec() uint64 { - bytes := []byte(c) - _, n := binary.Uvarint(bytes) - codec, _ := binary.Uvarint(bytes[n:]) - return codec -} +var EmptyCid = Cid("") // Parse is a short-hand function to perform Decode, Cast etc... on // a generic interface{} type. @@ -310,16 +298,25 @@ func Cast(data []byte) (Cid, error) { return Cid(data[0 : n+cn+len(h)]), nil } +// Version returns the Cid version. +func (c Cid) Version() uint64 { + v, _ := binary.Uvarint([]byte(c)) + return v +} + // Type returns the multicodec-packed content type of a Cid. func (c Cid) Type() uint64 { - return c.codec() + bytes := []byte(c) + _, n := binary.Uvarint(bytes) + codec, _ := binary.Uvarint(bytes[n:]) + return codec } // String returns the default string representation of a // Cid. Currently, Base58 is used as the encoding for the // multibase string. func (c Cid) String() string { - switch c.version() { + switch c.Version() { case 0: return c.Hash().B58String() case 1: @@ -337,7 +334,7 @@ func (c Cid) String() string { // String returns the string representation of a Cid // encoded is selected base func (c Cid) StringOfBase(base mbase.Encoding) (string, error) { - switch c.version() { + switch c.Version() { case 0: if base != mbase.Base58BTC { return "", ErrInvalidEncoding @@ -354,7 +351,7 @@ func (c Cid) StringOfBase(base mbase.Encoding) (string, error) { // when applicable. Version 0 Cid's are always in Base58 as they do // not take a multibase prefix. func (c Cid) Encode(base mbase.Encoder) string { - switch c.version() { + switch c.Version() { case 0: return c.Hash().B58String() case 1: @@ -379,7 +376,7 @@ func (c Cid) Hash() mh.Multihash { // The output of bytes can be parsed back into a Cid // with Cast(). func (c Cid) Bytes() []byte { - switch c.version() { + switch c.Version() { case 0: return c.bytesV0() case 1: @@ -401,8 +398,7 @@ func (c Cid) bytesV1() []byte { // In order for two Cids to be considered equal, the // Version, the Codec and the Multihash must match. func (c Cid) Equals(o Cid) bool { - // TODO: can we use regular string equality? - return bytes.Equal([]byte(c), []byte(o)) + return c == o } // UnmarshalJSON parses the JSON representation of a Cid. @@ -461,8 +457,8 @@ func (c Cid) Prefix() Prefix { return Prefix{ MhType: dec.Code, MhLength: dec.Length, - Version: c.version(), - Codec: c.codec(), + Version: c.Version(), + Codec: c.Type(), } } diff --git a/cid_test.go b/cid_test.go index 0b7f343..ab1e668 100644 --- a/cid_test.go +++ b/cid_test.go @@ -38,11 +38,11 @@ var tCodecs = map[uint64]string{ } func assertEqual(t *testing.T, a, b Cid) { - if a.codec() != b.codec() { + if a.Type() != b.Type() { t.Fatal("mismatch on type") } - if a.version() != b.version() { + if a.Version() != b.Version() { t.Fatal("mismatch on version") } @@ -171,7 +171,7 @@ func TestV0Handling(t *testing.T) { t.Fatal(err) } - if cid.version() != 0 { + if cid.Version() != 0 { t.Fatal("should have gotten version 0 cid") } @@ -341,8 +341,8 @@ func TestParse(t *testing.T) { if err != nil { return err } - if cid.version() != 0 { - return fmt.Errorf("expected version 0, got %s", string(cid.version())) + if cid.Version() != 0 { + return fmt.Errorf("expected version 0, got %s", string(cid.Version())) } actual := cid.Hash().B58String() if actual != expected { @@ -446,22 +446,3 @@ func BenchmarkStringV1(b *testing.B) { b.FailNow() } } - -// making sure we don't allocate when returning bytes -func BenchmarkBytesV1(b *testing.B) { - data := []byte("this is some test content") - hash, _ := mh.Sum(data, mh.SHA2_256, -1) - cid := NewCidV1(Raw, hash) - - b.ReportAllocs() - b.ResetTimer() - - count := 0 - for i := 0; i < b.N; i++ { - count += len(cid.Bytes()) - count += len([]byte(cid)) - } - if count != 36*2*b.N { - b.FailNow() - } -} From 9831436a6fd8a5737d698c437759ddb9412fc615 Mon Sep 17 00:00:00 2001 From: Kevin Atkinson Date: Sat, 25 Aug 2018 02:52:23 -0400 Subject: [PATCH 05/18] Change string representation to represent actual binary representation. --- cid.go | 48 +++++++++++++++++++----------------------------- cid_test.go | 6 +++--- 2 files changed, 22 insertions(+), 32 deletions(-) diff --git a/cid.go b/cid.go index 12fa3bd..8d71310 100644 --- a/cid.go +++ b/cid.go @@ -133,20 +133,16 @@ var CodecToStr = map[uint64]string{ // compatibility with the plain-multihash format used used in IPFS. // NewCidV1 should be used preferentially. func NewCidV0(mhash mh.Multihash) Cid { - return newCid(0, DagProtobuf, mhash) + return Cid(mhash) } // NewCidV1 returns a new Cid using the given multicodec-packed // content type. func NewCidV1(codecType uint64, mhash mh.Multihash) Cid { - return newCid(1, codecType, mhash) -} - -func newCid(version, codecType uint64, mhash mh.Multihash) Cid { hashlen := len(mhash) // two 8 bytes (max) numbers plus hash buf := make([]byte, 2*binary.MaxVarintLen64+hashlen) - n := binary.PutUvarint(buf, version) + n := binary.PutUvarint(buf, 1) n += binary.PutUvarint(buf[n:], codecType) cn := copy(buf[n:], mhash) if cn != hashlen { @@ -280,8 +276,8 @@ func Cast(data []byte) (Cid, error) { return EmptyCid, err } - if vers != 0 && vers != 1 { - return EmptyCid, fmt.Errorf("invalid cid version number: %d", vers) + if vers != 1 { + return EmptyCid, fmt.Errorf("expected 1 as the cid version number, got: %d", vers) } _, cn := binary.Uvarint(data[n:]) @@ -300,12 +296,17 @@ func Cast(data []byte) (Cid, error) { // Version returns the Cid version. func (c Cid) Version() uint64 { - v, _ := binary.Uvarint([]byte(c)) - return v + if len(c) == 34 && c[0] == 18 && c[1] == 32 { + return 0 + } + return 1 } // Type returns the multicodec-packed content type of a Cid. func (c Cid) Type() uint64 { + if c.Version() == 0 { + return DagProtobuf + } bytes := []byte(c) _, n := binary.Uvarint(bytes) codec, _ := binary.Uvarint(bytes[n:]) @@ -320,7 +321,7 @@ func (c Cid) String() string { case 0: return c.Hash().B58String() case 1: - mbstr, err := mbase.Encode(mbase.Base58BTC, c.bytesV1()) + mbstr, err := mbase.Encode(mbase.Base58BTC, c.Bytes()) if err != nil { panic("should not error with hardcoded mbase: " + err.Error()) } @@ -341,7 +342,7 @@ func (c Cid) StringOfBase(base mbase.Encoding) (string, error) { } return c.Hash().B58String(), nil case 1: - return mbase.Encode(base, c.bytesV1()) + return mbase.Encode(base, c.Bytes()) default: panic("not possible to reach this point") } @@ -355,7 +356,7 @@ func (c Cid) Encode(base mbase.Encoder) string { case 0: return c.Hash().B58String() case 1: - return base.Encode(c.bytesV1()) + return base.Encode(c.Bytes()) default: panic("not possible to reach this point") } @@ -363,6 +364,10 @@ func (c Cid) Encode(base mbase.Encoder) string { // Hash returns the multihash contained by a Cid. func (c Cid) Hash() mh.Multihash { + if c.Version() == 0 { + return mh.Multihash([]byte(c)) + } + bytes := []byte(c) // skip version length _, n1 := binary.Uvarint(bytes) @@ -376,21 +381,6 @@ func (c Cid) Hash() mh.Multihash { // The output of bytes can be parsed back into a Cid // with Cast(). func (c Cid) Bytes() []byte { - switch c.Version() { - case 0: - return c.bytesV0() - case 1: - return c.bytesV1() - default: - panic("not possible to reach this point") - } -} - -func (c Cid) bytesV0() []byte { - return []byte(c.Hash()) -} - -func (c Cid) bytesV1() []byte { return []byte(c) } @@ -438,7 +428,7 @@ func (c Cid) MarshalJSON() ([]byte, error) { return []byte(fmt.Sprintf("{\"/\":\"%s\"}", c.String())), nil } -// KeyString casts the result of cid.Bytes() as a string, and returns it. +// KeyString returns the binary representation of the Cid as a string func (c Cid) KeyString() string { return string(c) } diff --git a/cid_test.go b/cid_test.go index ab1e668..1b1e9c4 100644 --- a/cid_test.go +++ b/cid_test.go @@ -77,7 +77,7 @@ func TestBasicMarshaling(t *testing.T) { t.Fatal(err) } - cid := newCid(1, 7, h) + cid := NewCidV1(7, h) data := cid.Bytes() @@ -103,7 +103,7 @@ func TestBasesMarshaling(t *testing.T) { t.Fatal(err) } - cid := newCid(1, 7, h) + cid := NewCidV1(7, h) data := cid.Bytes() @@ -298,7 +298,7 @@ func TestPrefixRoundtrip(t *testing.T) { func Test16BytesVarint(t *testing.T) { data := []byte("this is some test content") hash, _ := mh.Sum(data, mh.SHA2_256, -1) - c := newCid(1, 1<<63, hash) + c := NewCidV1(1<<63, hash) _ = c.Bytes() } From b5a08dcaaac3e1aae3daf98fa500d2094fcce3d8 Mon Sep 17 00:00:00 2001 From: Kevin Atkinson Date: Sat, 25 Aug 2018 03:05:17 -0400 Subject: [PATCH 06/18] Change EmptyCid to just Nil. --- builder.go | 4 ++-- cid.go | 24 ++++++++++++------------ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/builder.go b/builder.go index 661bcc3..a5a4806 100644 --- a/builder.go +++ b/builder.go @@ -36,7 +36,7 @@ func (p Prefix) WithCodec(c uint64) Builder { func (p V0Builder) Sum(data []byte) (Cid, error) { hash, err := mh.Sum(data, mh.SHA2_256, -1) if err != nil { - return EmptyCid, err + return Nil, err } return NewCidV0(hash), nil } @@ -59,7 +59,7 @@ func (p V1Builder) Sum(data []byte) (Cid, error) { } hash, err := mh.Sum(data, p.MhType, mhLen) if err != nil { - return EmptyCid, err + return Nil, err } return NewCidV1(p.Codec, hash), nil } diff --git a/cid.go b/cid.go index 8d71310..1b55b4b 100644 --- a/cid.go +++ b/cid.go @@ -161,7 +161,7 @@ func NewCidV1(codecType uint64, mhash mh.Multihash) Cid { // - hash mh.Multihash type Cid string -var EmptyCid = Cid("") +var Nil = Cid("") // Parse is a short-hand function to perform Decode, Cast etc... on // a generic interface{} type. @@ -179,7 +179,7 @@ func Parse(v interface{}) (Cid, error) { case Cid: return v2, nil default: - return EmptyCid, fmt.Errorf("can't parse %+v as Cid", v2) + return Nil, fmt.Errorf("can't parse %+v as Cid", v2) } } @@ -197,13 +197,13 @@ func Parse(v interface{}) (Cid, error) { // as B58-encoded multihashes. func Decode(v string) (Cid, error) { if len(v) < 2 { - return EmptyCid, ErrCidTooShort + return Nil, ErrCidTooShort } if len(v) == 46 && v[:2] == "Qm" { hash, err := mh.FromB58String(v) if err != nil { - return EmptyCid, err + return Nil, err } return NewCidV0(hash), nil @@ -211,7 +211,7 @@ func Decode(v string) (Cid, error) { _, data, err := mbase.Decode(v) if err != nil { - return EmptyCid, err + return Nil, err } return Cast(data) @@ -265,7 +265,7 @@ func Cast(data []byte) (Cid, error) { if len(data) == 34 && data[0] == 18 && data[1] == 32 { h, err := mh.Cast(data) if err != nil { - return EmptyCid, err + return Nil, err } return NewCidV0(h), nil @@ -273,22 +273,22 @@ func Cast(data []byte) (Cid, error) { vers, n := binary.Uvarint(data) if err := uvError(n); err != nil { - return EmptyCid, err + return Nil, err } if vers != 1 { - return EmptyCid, fmt.Errorf("expected 1 as the cid version number, got: %d", vers) + return Nil, fmt.Errorf("expected 1 as the cid version number, got: %d", vers) } _, cn := binary.Uvarint(data[n:]) if err := uvError(cn); err != nil { - return EmptyCid, err + return Nil, err } rest := data[n+cn:] h, err := mh.Cast(rest) if err != nil { - return EmptyCid, err + return Nil, err } return Cid(data[0 : n+cn+len(h)]), nil @@ -470,7 +470,7 @@ type Prefix struct { func (p Prefix) Sum(data []byte) (Cid, error) { hash, err := mh.Sum(data, p.MhType, p.MhLength) if err != nil { - return EmptyCid, err + return Nil, err } switch p.Version { @@ -479,7 +479,7 @@ func (p Prefix) Sum(data []byte) (Cid, error) { case 1: return NewCidV1(p.Codec, hash), nil default: - return EmptyCid, fmt.Errorf("invalid cid version") + return Nil, fmt.Errorf("invalid cid version") } } From cad52160a4d980c856f1a6464c680724f3879186 Mon Sep 17 00:00:00 2001 From: Kevin Atkinson Date: Sat, 25 Aug 2018 03:15:22 -0400 Subject: [PATCH 07/18] Ensure we always have a valid Cid by hiding the type in a struct. --- cid.go | 25 +++++++++++++------------ set.go | 2 +- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/cid.go b/cid.go index 1b55b4b..ec0ebdb 100644 --- a/cid.go +++ b/cid.go @@ -133,7 +133,7 @@ var CodecToStr = map[uint64]string{ // compatibility with the plain-multihash format used used in IPFS. // NewCidV1 should be used preferentially. func NewCidV0(mhash mh.Multihash) Cid { - return Cid(mhash) + return Cid{string(mhash)} } // NewCidV1 returns a new Cid using the given multicodec-packed @@ -149,7 +149,7 @@ func NewCidV1(codecType uint64, mhash mh.Multihash) Cid { panic("copy hash length is inconsistent") } - return Cid(buf[:n+hashlen]) + return Cid{string(buf[:n+hashlen])} } // Cid represents a self-describing content adressed @@ -159,9 +159,9 @@ func NewCidV1(codecType uint64, mhash mh.Multihash) Cid { // - version uvarint // - codec uvarint // - hash mh.Multihash -type Cid string +type Cid struct{ string } -var Nil = Cid("") +var Nil = Cid{} // Parse is a short-hand function to perform Decode, Cast etc... on // a generic interface{} type. @@ -291,12 +291,12 @@ func Cast(data []byte) (Cid, error) { return Nil, err } - return Cid(data[0 : n+cn+len(h)]), nil + return Cid{string(data[0 : n+cn+len(h)])}, nil } // Version returns the Cid version. func (c Cid) Version() uint64 { - if len(c) == 34 && c[0] == 18 && c[1] == 32 { + if len(c.string) == 34 && c.string[0] == 18 && c.string[1] == 32 { return 0 } return 1 @@ -307,7 +307,7 @@ func (c Cid) Type() uint64 { if c.Version() == 0 { return DagProtobuf } - bytes := []byte(c) + bytes := c.Bytes() _, n := binary.Uvarint(bytes) codec, _ := binary.Uvarint(bytes[n:]) return codec @@ -364,11 +364,12 @@ func (c Cid) Encode(base mbase.Encoder) string { // Hash returns the multihash contained by a Cid. func (c Cid) Hash() mh.Multihash { + bytes := c.Bytes() + if c.Version() == 0 { - return mh.Multihash([]byte(c)) + return mh.Multihash(bytes) } - bytes := []byte(c) // skip version length _, n1 := binary.Uvarint(bytes) // skip codec length @@ -381,7 +382,7 @@ func (c Cid) Hash() mh.Multihash { // The output of bytes can be parsed back into a Cid // with Cast(). func (c Cid) Bytes() []byte { - return []byte(c) + return []byte(c.string) } // Equals checks that two Cids are the same. @@ -413,7 +414,7 @@ func (c *Cid) UnmarshalJSON(b []byte) error { return err } - *c = out[:] + *c = Cid{out.string[:]} return nil } @@ -430,7 +431,7 @@ func (c Cid) MarshalJSON() ([]byte, error) { // KeyString returns the binary representation of the Cid as a string func (c Cid) KeyString() string { - return string(c) + return c.string } // Loggable returns a Loggable (as defined by diff --git a/set.go b/set.go index f4015f3..4591007 100644 --- a/set.go +++ b/set.go @@ -56,7 +56,7 @@ func (s *Set) Visit(c Cid) bool { // Cid in the set. func (s *Set) ForEach(f func(c Cid) error) error { for cs := range s.set { - c, _ := Cast([]byte(cs)) + c, _ := Cast(cs.Bytes()) err := f(c) if err != nil { return err From 426ebe9e5598f7b60a7f1acff7eb0195aa5c90ba Mon Sep 17 00:00:00 2001 From: Kevin Atkinson Date: Sat, 25 Aug 2018 15:26:16 -0400 Subject: [PATCH 08/18] Simplify assignment in UnmarshalJSON. --- cid.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cid.go b/cid.go index ec0ebdb..2cfdb1b 100644 --- a/cid.go +++ b/cid.go @@ -414,7 +414,7 @@ func (c *Cid) UnmarshalJSON(b []byte) error { return err } - *c = Cid{out.string[:]} + *c = out return nil } From 667c6a941860176a2f4e2fd623705c2e9b608c7d Mon Sep 17 00:00:00 2001 From: Kevin Atkinson Date: Sat, 25 Aug 2018 15:27:05 -0400 Subject: [PATCH 09/18] Avoid allocating memory in Type() method. --- cid.go | 13 ++++++------- varint.go | 34 ++++++++++++++++++++++++++++++++++ varint_test.go | 22 ++++++++++++++++++++++ 3 files changed, 62 insertions(+), 7 deletions(-) create mode 100644 varint.go create mode 100644 varint_test.go diff --git a/cid.go b/cid.go index 2cfdb1b..786dbf9 100644 --- a/cid.go +++ b/cid.go @@ -159,7 +159,7 @@ func NewCidV1(codecType uint64, mhash mh.Multihash) Cid { // - version uvarint // - codec uvarint // - hash mh.Multihash -type Cid struct{ string } +type Cid struct{ str string } var Nil = Cid{} @@ -296,7 +296,7 @@ func Cast(data []byte) (Cid, error) { // Version returns the Cid version. func (c Cid) Version() uint64 { - if len(c.string) == 34 && c.string[0] == 18 && c.string[1] == 32 { + if len(c.str) == 34 && c.str[0] == 18 && c.str[1] == 32 { return 0 } return 1 @@ -307,9 +307,8 @@ func (c Cid) Type() uint64 { if c.Version() == 0 { return DagProtobuf } - bytes := c.Bytes() - _, n := binary.Uvarint(bytes) - codec, _ := binary.Uvarint(bytes[n:]) + _, n := uvarint(c.str) + codec, _ := uvarint(c.str[n:]) return codec } @@ -382,7 +381,7 @@ func (c Cid) Hash() mh.Multihash { // The output of bytes can be parsed back into a Cid // with Cast(). func (c Cid) Bytes() []byte { - return []byte(c.string) + return []byte(c.str) } // Equals checks that two Cids are the same. @@ -431,7 +430,7 @@ func (c Cid) MarshalJSON() ([]byte, error) { // KeyString returns the binary representation of the Cid as a string func (c Cid) KeyString() string { - return c.string + return c.str } // Loggable returns a Loggable (as defined by diff --git a/varint.go b/varint.go new file mode 100644 index 0000000..391c1f4 --- /dev/null +++ b/varint.go @@ -0,0 +1,34 @@ +package cid + +// Version of varint function that work with a string rather than +// []byte to avoid unnecessary allocation + +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license as given at https://golang.org/LICENSE + +// uvarint decodes a uint64 from buf and returns that value and the +// number of characters read (> 0). If an error occurred, the value is 0 +// and the number of bytes n is <= 0 meaning: +// +// n == 0: buf too small +// n < 0: value larger than 64 bits (overflow) +// and -n is the number of bytes read +// +func uvarint(buf string) (uint64, int) { + var x uint64 + var s uint + // we have a binary string so we can't use a range loope + for i := 0; i < len(buf); i++ { + b := buf[i] + if b < 0x80 { + if i > 9 || i == 9 && b > 1 { + return 0, -(i + 1) // overflow + } + return x | uint64(b)< Date: Tue, 28 Aug 2018 22:30:41 -0400 Subject: [PATCH 10/18] Add IsNil() method. --- cid.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/cid.go b/cid.go index 786dbf9..cf7fcc6 100644 --- a/cid.go +++ b/cid.go @@ -161,8 +161,17 @@ func NewCidV1(codecType uint64, mhash mh.Multihash) Cid { // - hash mh.Multihash type Cid struct{ str string } +// Nil can be used to represent a nil Cid, using Cid{} directly is +// also acceptable. var Nil = Cid{} +// Nil returns true if a Cid is uninitialized or the Nil value. +// Calling any other methods on an uninitialized Cid will result in +// undefined behavior. +func (c Cid) IsNil() bool { + return c.str == "" +} + // Parse is a short-hand function to perform Decode, Cast etc... on // a generic interface{} type. func Parse(v interface{}) (Cid, error) { From 440a1c1a5a7648ba66d4e2f8a0bda21668f44381 Mon Sep 17 00:00:00 2001 From: Kevin Atkinson Date: Wed, 29 Aug 2018 22:29:11 -0400 Subject: [PATCH 11/18] Removed description of layout of CID as it is not correct for CIDv0. --- cid.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/cid.go b/cid.go index cf7fcc6..fb6496e 100644 --- a/cid.go +++ b/cid.go @@ -155,10 +155,6 @@ func NewCidV1(codecType uint64, mhash mh.Multihash) Cid { // Cid represents a self-describing content adressed // identifier. It is formed by a Version, a Codec (which indicates // a multicodec-packed content type) and a Multihash. -// Byte layout: [version, codec, multihash] -// - version uvarint -// - codec uvarint -// - hash mh.Multihash type Cid struct{ str string } // Nil can be used to represent a nil Cid, using Cid{} directly is From 7b4617fa6e62d7e1c7303ec51305668dae18c495 Mon Sep 17 00:00:00 2001 From: Kevin Atkinson Date: Sat, 1 Sep 2018 00:09:38 -0400 Subject: [PATCH 12/18] Eliminate unnecessary copy of Cid now that its an immutable string. --- set.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/set.go b/set.go index 4591007..eb3b3f0 100644 --- a/set.go +++ b/set.go @@ -55,8 +55,7 @@ func (s *Set) Visit(c Cid) bool { // ForEach allows to run a custom function on each // Cid in the set. func (s *Set) ForEach(f func(c Cid) error) error { - for cs := range s.set { - c, _ := Cast(cs.Bytes()) + for c := range s.set { err := f(c) if err != nil { return err @@ -64,4 +63,3 @@ func (s *Set) ForEach(f func(c Cid) error) error { } return nil } - From 643f78a8f906161860652632df7261616259debf Mon Sep 17 00:00:00 2001 From: Kevin Atkinson Date: Wed, 5 Sep 2018 03:26:26 -0400 Subject: [PATCH 13/18] Change 'IsNil' method to 'Defined'. --- cid.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cid.go b/cid.go index fb6496e..a9ef2cb 100644 --- a/cid.go +++ b/cid.go @@ -161,11 +161,11 @@ type Cid struct{ str string } // also acceptable. var Nil = Cid{} -// Nil returns true if a Cid is uninitialized or the Nil value. -// Calling any other methods on an uninitialized Cid will result in +// Defined returns true if a Cid is defined +// Calling any other methods on an undefined Cid will result in // undefined behavior. -func (c Cid) IsNil() bool { - return c.str == "" +func (c Cid) Defined() bool { + return c.str != "" } // Parse is a short-hand function to perform Decode, Cast etc... on From 67a2bcf7e774019f803036b667d9aeb9ce286a9a Mon Sep 17 00:00:00 2001 From: Kevin Atkinson Date: Wed, 5 Sep 2018 15:42:14 -0400 Subject: [PATCH 14/18] Change 'Nil' constant to 'Undef'. --- builder.go | 4 ++-- cid.go | 28 ++++++++++++++-------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/builder.go b/builder.go index a5a4806..a168832 100644 --- a/builder.go +++ b/builder.go @@ -36,7 +36,7 @@ func (p Prefix) WithCodec(c uint64) Builder { func (p V0Builder) Sum(data []byte) (Cid, error) { hash, err := mh.Sum(data, mh.SHA2_256, -1) if err != nil { - return Nil, err + return Undef, err } return NewCidV0(hash), nil } @@ -59,7 +59,7 @@ func (p V1Builder) Sum(data []byte) (Cid, error) { } hash, err := mh.Sum(data, p.MhType, mhLen) if err != nil { - return Nil, err + return Undef, err } return NewCidV1(p.Codec, hash), nil } diff --git a/cid.go b/cid.go index a9ef2cb..f04c95d 100644 --- a/cid.go +++ b/cid.go @@ -157,9 +157,9 @@ func NewCidV1(codecType uint64, mhash mh.Multihash) Cid { // a multicodec-packed content type) and a Multihash. type Cid struct{ str string } -// Nil can be used to represent a nil Cid, using Cid{} directly is -// also acceptable. -var Nil = Cid{} +// Undef can be used to represent a nil or undefined Cid, using Cid{} +// directly is also acceptable. +var Undef = Cid{} // Defined returns true if a Cid is defined // Calling any other methods on an undefined Cid will result in @@ -184,7 +184,7 @@ func Parse(v interface{}) (Cid, error) { case Cid: return v2, nil default: - return Nil, fmt.Errorf("can't parse %+v as Cid", v2) + return Undef, fmt.Errorf("can't parse %+v as Cid", v2) } } @@ -202,13 +202,13 @@ func Parse(v interface{}) (Cid, error) { // as B58-encoded multihashes. func Decode(v string) (Cid, error) { if len(v) < 2 { - return Nil, ErrCidTooShort + return Undef, ErrCidTooShort } if len(v) == 46 && v[:2] == "Qm" { hash, err := mh.FromB58String(v) if err != nil { - return Nil, err + return Undef, err } return NewCidV0(hash), nil @@ -216,7 +216,7 @@ func Decode(v string) (Cid, error) { _, data, err := mbase.Decode(v) if err != nil { - return Nil, err + return Undef, err } return Cast(data) @@ -270,7 +270,7 @@ func Cast(data []byte) (Cid, error) { if len(data) == 34 && data[0] == 18 && data[1] == 32 { h, err := mh.Cast(data) if err != nil { - return Nil, err + return Undef, err } return NewCidV0(h), nil @@ -278,22 +278,22 @@ func Cast(data []byte) (Cid, error) { vers, n := binary.Uvarint(data) if err := uvError(n); err != nil { - return Nil, err + return Undef, err } if vers != 1 { - return Nil, fmt.Errorf("expected 1 as the cid version number, got: %d", vers) + return Undef, fmt.Errorf("expected 1 as the cid version number, got: %d", vers) } _, cn := binary.Uvarint(data[n:]) if err := uvError(cn); err != nil { - return Nil, err + return Undef, err } rest := data[n+cn:] h, err := mh.Cast(rest) if err != nil { - return Nil, err + return Undef, err } return Cid{string(data[0 : n+cn+len(h)])}, nil @@ -475,7 +475,7 @@ type Prefix struct { func (p Prefix) Sum(data []byte) (Cid, error) { hash, err := mh.Sum(data, p.MhType, p.MhLength) if err != nil { - return Nil, err + return Undef, err } switch p.Version { @@ -484,7 +484,7 @@ func (p Prefix) Sum(data []byte) (Cid, error) { case 1: return NewCidV1(p.Codec, hash), nil default: - return Nil, fmt.Errorf("invalid cid version") + return Undef, fmt.Errorf("invalid cid version") } } From 46dd393ad15524de0ba15557e7cd3c4d691a3491 Mon Sep 17 00:00:00 2001 From: Kevin Atkinson Date: Fri, 7 Sep 2018 14:03:03 -0400 Subject: [PATCH 15/18] Handel undefined Cid is JSON representation. --- cid.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/cid.go b/cid.go index f04c95d..ececb26 100644 --- a/cid.go +++ b/cid.go @@ -404,10 +404,15 @@ func (c *Cid) UnmarshalJSON(b []byte) error { obj := struct { CidTarget string `json:"/"` }{} - err := json.Unmarshal(b, &obj) + objptr := &obj + err := json.Unmarshal(b, &objptr) if err != nil { return err } + if objptr == nil { + *c = Cid{} + return nil + } if obj.CidTarget == "" { return fmt.Errorf("cid was incorrectly formatted") @@ -430,6 +435,9 @@ func (c *Cid) UnmarshalJSON(b []byte) error { // Note that this formatting comes from the IPLD specification // (https://github.com/ipld/specs/tree/master/ipld) func (c Cid) MarshalJSON() ([]byte, error) { + if !c.Defined() { + return []byte("null"), nil + } return []byte(fmt.Sprintf("{\"/\":\"%s\"}", c.String())), nil } From dfc48d3ec4812d9b0203edd8f0b517f2273604e0 Mon Sep 17 00:00:00 2001 From: Kevin Atkinson Date: Mon, 10 Sep 2018 05:10:23 -0400 Subject: [PATCH 16/18] Make sure we have a SHA2_256, length 32 hash when creating a CidV0. --- cid.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/cid.go b/cid.go index ececb26..cfcbcde 100644 --- a/cid.go +++ b/cid.go @@ -133,6 +133,15 @@ var CodecToStr = map[uint64]string{ // compatibility with the plain-multihash format used used in IPFS. // NewCidV1 should be used preferentially. func NewCidV0(mhash mh.Multihash) Cid { + // Need to make sure hash is valid for CidV0 otherwise we will + // incorrectly detect it as CidV1 in the Version() method + dec, err := mh.Decode(mhash) + if err != nil { + panic(err) + } + if dec.Code != mh.SHA2_256 || dec.Length != 32 { + panic("invalid hash for cidv0") + } return Cid{string(mhash)} } From f0033600ca4804a159c8ad0646a5eb961943ddc9 Mon Sep 17 00:00:00 2001 From: Kevin Atkinson Date: Tue, 11 Sep 2018 14:18:23 -0400 Subject: [PATCH 17/18] Gx update go-multibase. --- package.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package.json b/package.json index 8b821c4..6c55611 100644 --- a/package.json +++ b/package.json @@ -15,9 +15,9 @@ }, { "author": "whyrusleeping", - "hash": "QmSbvata2WqNkqGtZNg8MR3SKwnB8iQ7vTPJgWqB8bC5kR", + "hash": "QmekxXDhCxCJRNuzmHreuaT3BsuJcsjcXWNrtV9C8DRHtd", "name": "go-multibase", - "version": "0.2.7" + "version": "0.3.0" } ], "gxVersion": "0.8.0", From 6e296c5c49ad84dc6a44af69fa1fe4e1245cd0cf Mon Sep 17 00:00:00 2001 From: Kevin Atkinson Date: Tue, 11 Sep 2018 19:18:20 -0400 Subject: [PATCH 18/18] gx publish 0.9.0 --- .gx/lastpubver | 2 +- package.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gx/lastpubver b/.gx/lastpubver index 3d7db28..1401252 100644 --- a/.gx/lastpubver +++ b/.gx/lastpubver @@ -1 +1 @@ -0.8.0: QmZFbDTY9jfSBms2MchvYM9oYRbAF19K7Pby47yDBfpPrb +0.9.0: QmPSQnBKM9g7BaUcZCvswUJVscQ1ipjmwxN5PXCjkp9EQ7 diff --git a/package.json b/package.json index 6c55611..7d9a6c4 100644 --- a/package.json +++ b/package.json @@ -25,6 +25,6 @@ "license": "MIT", "name": "go-cid", "releaseCmd": "git commit -a -m \"gx publish $VERSION\"", - "version": "0.8.0" + "version": "0.9.0" }