From c38dd39b7b0b3cf6993c42586d85c8ab2ee99f70 Mon Sep 17 00:00:00 2001 From: Kevin Atkinson Date: Sat, 4 Aug 2018 05:40:15 -0400 Subject: [PATCH 1/5] Convert Cid type to an interface. --- cid-fmt/main.go | 10 ++++---- cid.go | 66 ++++++++++++++++++++++++++++++------------------- cid_test.go | 26 ++++++------------- set.go | 14 +++++------ 4 files changed, 60 insertions(+), 56 deletions(-) diff --git a/cid-fmt/main.go b/cid-fmt/main.go index 7dacb8a..394affe 100644 --- a/cid-fmt/main.go +++ b/cid-fmt/main.go @@ -46,7 +46,7 @@ func main() { usage() } newBase := mb.Encoding(-1) - var verConv func(cid *c.Cid) (*c.Cid, error) + var verConv func(cid c.Cid) (c.Cid, error) args := os.Args[1:] outer: for { @@ -132,7 +132,7 @@ func errorMsg(fmtStr string, a ...interface{}) { exitCode = 1 } -func decode(v string) (mb.Encoding, *c.Cid, error) { +func decode(v string) (mb.Encoding, c.Cid, error) { if len(v) < 2 { return 0, nil, c.ErrCidTooShort } @@ -158,7 +158,7 @@ func decode(v string) (mb.Encoding, *c.Cid, error) { const ERR_STR = "!ERROR!" -func fmtCid(fmtStr string, base mb.Encoding, cid *c.Cid) (string, error) { +func fmtCid(fmtStr string, base mb.Encoding, cid c.Cid) (string, error) { p := cid.Prefix() out := new(bytes.Buffer) var err error @@ -265,13 +265,13 @@ func encode(base mb.Encoding, data []byte, strip bool) string { return str } -func toCidV0(cid *c.Cid) (*c.Cid, error) { +func toCidV0(cid c.Cid) (c.Cid, error) { if cid.Type() != c.DagProtobuf { return nil, fmt.Errorf("can't convert non-protobuf nodes to cidv0") } return c.NewCidV0(cid.Hash()), nil } -func toCidV1(cid *c.Cid) (*c.Cid, error) { +func toCidV1(cid c.Cid) (c.Cid, error) { return c.NewCidV1(cid.Type(), cid.Hash()), nil } diff --git a/cid.go b/cid.go index 7859f75..487f633 100644 --- a/cid.go +++ b/cid.go @@ -31,6 +31,17 @@ import ( mh "github.com/multiformats/go-multihash" ) +type Cid interface { + Type() uint64 + String() string + StringOfBase(mbase.Encoding) (string, error) + Hash() mh.Multihash + Bytes() []byte + Equals(o Cid) bool + KeyString() string + Prefix() Prefix +} + // UnsupportedVersionString just holds an error message const UnsupportedVersionString = "" @@ -132,8 +143,8 @@ var CodecToStr = map[uint64]string{ // They exist to allow IPFS to work with Cids while keeping // compatibility with the plain-multihash format used used in IPFS. // NewCidV1 should be used preferentially. -func NewCidV0(mhash mh.Multihash) *Cid { - return &Cid{ +func NewCidV0(mhash mh.Multihash) *cid_ { + return &cid_{ version: 0, codec: DagProtobuf, hash: mhash, @@ -142,8 +153,8 @@ func NewCidV0(mhash mh.Multihash) *Cid { // NewCidV1 returns a new Cid using the given multicodec-packed // content type. -func NewCidV1(codecType uint64, mhash mh.Multihash) *Cid { - return &Cid{ +func NewCidV1(codecType uint64, mhash mh.Multihash) *cid_ { + return &cid_{ version: 1, codec: codecType, hash: mhash, @@ -174,7 +185,7 @@ func NewPrefixV1(codecType uint64, mhType uint64) Prefix { // Cid represents a self-describing content adressed // identifier. It is formed by a Version, a Codec (which indicates // a multicodec-packed content type) and a Multihash. -type Cid struct { +type cid_ struct { version uint64 codec uint64 hash mh.Multihash @@ -182,7 +193,7 @@ type Cid struct { // Parse is a short-hand function to perform Decode, Cast etc... on // a generic interface{} type. -func Parse(v interface{}) (*Cid, error) { +func Parse(v interface{}) (*cid_, error) { switch v2 := v.(type) { case string: if strings.Contains(v2, "/ipfs/") { @@ -193,7 +204,7 @@ func Parse(v interface{}) (*Cid, error) { return Cast(v2) case mh.Multihash: return NewCidV0(v2), nil - case *Cid: + case *cid_: return v2, nil default: return nil, fmt.Errorf("can't parse %+v as Cid", v2) @@ -212,7 +223,7 @@ func Parse(v interface{}) (*Cid, error) { // Decode will also detect and parse CidV0 strings. Strings // starting with "Qm" are considered CidV0 and treated directly // as B58-encoded multihashes. -func Decode(v string) (*Cid, error) { +func Decode(v string) (*cid_, error) { if len(v) < 2 { return nil, ErrCidTooShort } @@ -256,14 +267,14 @@ func uvError(read int) error { // // Please use decode when parsing a regular Cid string, as Cast does not // expect multibase-encoded data. Cast accepts the output of Cid.Bytes(). -func Cast(data []byte) (*Cid, error) { +func Cast(data []byte) (*cid_, error) { if len(data) == 34 && data[0] == 18 && data[1] == 32 { h, err := mh.Cast(data) if err != nil { return nil, err } - return &Cid{ + return &cid_{ codec: DagProtobuf, version: 0, hash: h, @@ -290,7 +301,7 @@ func Cast(data []byte) (*Cid, error) { return nil, err } - return &Cid{ + return &cid_{ version: vers, codec: codec, hash: h, @@ -298,14 +309,14 @@ func Cast(data []byte) (*Cid, error) { } // Type returns the multicodec-packed content type of a Cid. -func (c *Cid) Type() uint64 { +func (c *cid_) Type() uint64 { return c.codec } // String returns the default string representation of a // Cid. Currently, Base58 is used as the encoding for the // multibase string. -func (c *Cid) String() string { +func (c *cid_) String() string { switch c.version { case 0: return c.hash.B58String() @@ -323,7 +334,7 @@ func (c *Cid) String() string { // String returns the string representation of a Cid // encoded is selected base -func (c *Cid) StringOfBase(base mbase.Encoding) (string, error) { +func (c *cid_) StringOfBase(base mbase.Encoding) (string, error) { switch c.version { case 0: if base != mbase.Base58BTC { @@ -338,14 +349,14 @@ func (c *Cid) StringOfBase(base mbase.Encoding) (string, error) { } // Hash returns the multihash contained by a Cid. -func (c *Cid) Hash() mh.Multihash { +func (c *cid_) Hash() mh.Multihash { return c.hash } // Bytes returns the byte representation of a Cid. // The output of bytes can be parsed back into a Cid // with Cast(). -func (c *Cid) Bytes() []byte { +func (c *cid_) Bytes() []byte { switch c.version { case 0: return c.bytesV0() @@ -356,11 +367,11 @@ func (c *Cid) Bytes() []byte { } } -func (c *Cid) bytesV0() []byte { +func (c *cid_) bytesV0() []byte { return []byte(c.hash) } -func (c *Cid) bytesV1() []byte { +func (c *cid_) bytesV1() []byte { // two 8 bytes (max) numbers plus hash buf := make([]byte, 2*binary.MaxVarintLen64+len(c.hash)) n := binary.PutUvarint(buf, c.version) @@ -376,14 +387,19 @@ func (c *Cid) bytesV1() []byte { // Equals checks that two Cids are the same. // In order for two Cids to be considered equal, the // Version, the Codec and the Multihash must match. -func (c *Cid) Equals(o *Cid) bool { +func (c *cid_) Equals(o0 Cid) bool { + o := o0.(*cid_) return c.codec == o.codec && c.version == o.version && bytes.Equal(c.hash, o.hash) } +func EmptyCid() Cid { + return &cid_{} +} + // UnmarshalJSON parses the JSON representation of a Cid. -func (c *Cid) UnmarshalJSON(b []byte) error { +func (c *cid_) UnmarshalJSON(b []byte) error { if len(b) < 2 { return fmt.Errorf("invalid cid json blob") } @@ -416,25 +432,25 @@ func (c *Cid) UnmarshalJSON(b []byte) error { // // Note that this formatting comes from the IPLD specification // (https://github.com/ipld/specs/tree/master/ipld) -func (c Cid) MarshalJSON() ([]byte, error) { +func (c *cid_) MarshalJSON() ([]byte, error) { return []byte(fmt.Sprintf("{\"/\":\"%s\"}", c.String())), nil } // KeyString casts the result of cid.Bytes() as a string, and returns it. -func (c *Cid) KeyString() string { +func (c *cid_) KeyString() string { return string(c.Bytes()) } // Loggable returns a Loggable (as defined by // https://godoc.org/github.com/ipfs/go-log). -func (c *Cid) Loggable() map[string]interface{} { +func (c *cid_) Loggable() map[string]interface{} { return map[string]interface{}{ "cid": c, } } // Prefix builds and returns a Prefix out of a Cid. -func (c *Cid) Prefix() Prefix { +func (c *cid_) Prefix() Prefix { dec, _ := mh.Decode(c.hash) // assuming we got a valid multiaddr, this will not error return Prefix{ MhType: dec.Code, @@ -457,7 +473,7 @@ type Prefix struct { // Sum uses the information in a prefix to perform a multihash.Sum() // and return a newly constructed Cid with the resulting multihash. -func (p Prefix) Sum(data []byte) (*Cid, error) { +func (p Prefix) Sum(data []byte) (*cid_, error) { hash, err := mh.Sum(data, p.MhType, p.MhLength) if err != nil { return nil, err diff --git a/cid_test.go b/cid_test.go index ed690d8..b98b280 100644 --- a/cid_test.go +++ b/cid_test.go @@ -37,7 +37,7 @@ var tCodecs = map[uint64]string{ DecredTx: "decred-tx", } -func assertEqual(t *testing.T, a, b *Cid) { +func assertEqual(t *testing.T, a, b *cid_) { if a.codec != b.codec { t.Fatal("mismatch on type") } @@ -77,7 +77,7 @@ func TestBasicMarshaling(t *testing.T) { t.Fatal(err) } - cid := &Cid{ + cid := &cid_{ codec: 7, version: 1, hash: h, @@ -107,7 +107,7 @@ func TestBasesMarshaling(t *testing.T) { t.Fatal(err) } - cid := &Cid{ + cid := &cid_{ codec: 7, version: 1, hash: h, @@ -375,7 +375,7 @@ func ExampleDecode() { func TestFromJson(t *testing.T) { cval := "zb2rhhFAEMepUBbGyP1k8tGfz7BSciKXP6GHuUeUsJBaK6cqG" jsoncid := []byte(`{"/":"` + cval + `"}`) - var c Cid + c := EmptyCid() err := json.Unmarshal(jsoncid, &c) if err != nil { t.Fatal(err) @@ -392,25 +392,13 @@ func TestJsonRoundTrip(t *testing.T) { t.Fatal(err) } - // Verify it works for a *Cid. enc, err := json.Marshal(exp) if err != nil { t.Fatal(err) } - var actual Cid - err = json.Unmarshal(enc, &actual) - if !exp.Equals(&actual) { - t.Fatal("cids not equal for *Cid") - } - - // Verify it works for a Cid. - enc, err = json.Marshal(*exp) - if err != nil { - t.Fatal(err) - } - var actual2 Cid - err = json.Unmarshal(enc, &actual2) - if !exp.Equals(&actual2) { + actual := EmptyCid() + err = json.Unmarshal(enc, actual) + if !exp.Equals(actual) { t.Fatal("cids not equal for Cid") } } diff --git a/set.go b/set.go index b801ade..2ce8803 100644 --- a/set.go +++ b/set.go @@ -12,18 +12,18 @@ func NewSet() *Set { } // Add puts a Cid in the Set. -func (s *Set) Add(c *Cid) { +func (s *Set) Add(c Cid) { s.set[string(c.Bytes())] = struct{}{} } // Has returns if the Set contains a given Cid. -func (s *Set) Has(c *Cid) bool { +func (s *Set) Has(c Cid) bool { _, ok := s.set[string(c.Bytes())] return ok } // Remove deletes a Cid from the Set. -func (s *Set) Remove(c *Cid) { +func (s *Set) Remove(c Cid) { delete(s.set, string(c.Bytes())) } @@ -33,8 +33,8 @@ func (s *Set) Len() int { } // Keys returns the Cids in the set. -func (s *Set) Keys() []*Cid { - out := make([]*Cid, 0, len(s.set)) +func (s *Set) Keys() []Cid { + out := make([]Cid, 0, len(s.set)) for k := range s.set { c, _ := Cast([]byte(k)) out = append(out, c) @@ -44,7 +44,7 @@ func (s *Set) Keys() []*Cid { // Visit adds a Cid to the set only if it is // not in it already. -func (s *Set) Visit(c *Cid) bool { +func (s *Set) Visit(c Cid) bool { if !s.Has(c) { s.Add(c) return true @@ -55,7 +55,7 @@ func (s *Set) Visit(c *Cid) bool { // ForEach allows to run a custom function on each // Cid in the set. -func (s *Set) ForEach(f func(c *Cid) error) error { +func (s *Set) ForEach(f func(c Cid) error) error { for cs := range s.set { c, _ := Cast([]byte(cs)) err := f(c) From 8324872aef141d025c9ea4f88a382bdd56da843e Mon Sep 17 00:00:00 2001 From: Kevin Atkinson Date: Sat, 4 Aug 2018 15:04:55 -0400 Subject: [PATCH 2/5] Change internal representation of a Cid to a string. Authors: Steven Allen dignifiedquire@gmail.com Kevin Atkinson --- cid.go | 188 +++++++++++++++++++++++++--------------------------- cid_test.go | 34 ++++------ set_test.go | 90 +++++++++++++++++++++++++ 3 files changed, 191 insertions(+), 121 deletions(-) create mode 100644 set_test.go diff --git a/cid.go b/cid.go index 487f633..40b2728 100644 --- a/cid.go +++ b/cid.go @@ -31,17 +31,24 @@ import ( mh "github.com/multiformats/go-multihash" ) +// Cid represents a self-describing content adressed +// identifier. It is formed by a Version, a Codec (which indicates +// a multicodec-packed content type) and a Multihash. type Cid interface { + Version() int Type() uint64 String() string StringOfBase(mbase.Encoding) (string, error) Hash() mh.Multihash Bytes() []byte Equals(o Cid) bool - KeyString() string + KeyString() CidString Prefix() Prefix } +// CidString is a representation of a Cid as a binary string +type CidString string + // UnsupportedVersionString just holds an error message const UnsupportedVersionString = "" @@ -143,22 +150,24 @@ var CodecToStr = map[uint64]string{ // They exist to allow IPFS to work with Cids while keeping // compatibility with the plain-multihash format used used in IPFS. // NewCidV1 should be used preferentially. -func NewCidV0(mhash mh.Multihash) *cid_ { - return &cid_{ - version: 0, - codec: DagProtobuf, - hash: mhash, - } +func NewCidV0(mhash mh.Multihash) CidString { + return CidString(mhash) } // NewCidV1 returns a new Cid using the given multicodec-packed // content type. -func NewCidV1(codecType uint64, mhash mh.Multihash) *cid_ { - return &cid_{ - version: 1, - codec: codecType, - hash: mhash, +func NewCidV1(codecType uint64, mhash mh.Multihash) CidString { + hashlen := len(mhash) + // two 8 bytes (max) numbers plus hash + buf := make([]byte, 2*binary.MaxVarintLen64+hashlen) + n := binary.PutUvarint(buf, 1) + n += binary.PutUvarint(buf[n:], codecType) + cn := copy(buf[n:], mhash) + if cn != hashlen { + panic("copy hash length is inconsistent") } + + return CidString(buf[:n+hashlen]) } // NewPrefixV0 returns a CIDv0 prefix with the specified multihash type. @@ -182,18 +191,9 @@ func NewPrefixV1(codecType uint64, mhType uint64) Prefix { } } -// Cid represents a self-describing content adressed -// identifier. It is formed by a Version, a Codec (which indicates -// a multicodec-packed content type) and a Multihash. -type cid_ struct { - version uint64 - codec uint64 - hash mh.Multihash -} - // Parse is a short-hand function to perform Decode, Cast etc... on // a generic interface{} type. -func Parse(v interface{}) (*cid_, error) { +func Parse(v interface{}) (Cid, error) { switch v2 := v.(type) { case string: if strings.Contains(v2, "/ipfs/") { @@ -204,7 +204,7 @@ func Parse(v interface{}) (*cid_, error) { return Cast(v2) case mh.Multihash: return NewCidV0(v2), nil - case *cid_: + case CidString: return v2, nil default: return nil, fmt.Errorf("can't parse %+v as Cid", v2) @@ -223,7 +223,7 @@ func Parse(v interface{}) (*cid_, error) { // Decode will also detect and parse CidV0 strings. Strings // starting with "Qm" are considered CidV0 and treated directly // as B58-encoded multihashes. -func Decode(v string) (*cid_, error) { +func Decode(v string) (Cid, error) { if len(v) < 2 { return nil, ErrCidTooShort } @@ -267,61 +267,67 @@ func uvError(read int) error { // // Please use decode when parsing a regular Cid string, as Cast does not // expect multibase-encoded data. Cast accepts the output of Cid.Bytes(). -func Cast(data []byte) (*cid_, error) { +func Cast(data []byte) (CidString, error) { if len(data) == 34 && data[0] == 18 && data[1] == 32 { h, err := mh.Cast(data) if err != nil { - return nil, err + return "", err } - return &cid_{ - codec: DagProtobuf, - version: 0, - hash: h, - }, nil + return NewCidV0(h), nil } vers, n := binary.Uvarint(data) if err := uvError(n); err != nil { - return nil, err + return "", err } if vers != 0 && vers != 1 { - return nil, fmt.Errorf("invalid cid version number: %d", vers) + return "", fmt.Errorf("invalid cid version number: %d", vers) } - codec, cn := binary.Uvarint(data[n:]) + _, cn := binary.Uvarint(data[n:]) if err := uvError(cn); err != nil { - return nil, err + return "", err } rest := data[n+cn:] h, err := mh.Cast(rest) if err != nil { - return nil, err + return "", err } - return &cid_{ - version: vers, - codec: codec, - hash: h, - }, nil + return CidString(data[0 : n+cn+len(h)]), nil +} + +// Version returns the Cid version +func (c CidString) Version() int { + if len(c) == 34 && c[0] == 18 && c[1] == 32 { + return 0 + } + return 1 } // Type returns the multicodec-packed content type of a Cid. -func (c *cid_) Type() uint64 { - return c.codec +func (c CidString) Type() uint64 { + if c.Version() == 0 { + return DagProtobuf + } + bytes := []byte(c) + _, n := binary.Uvarint(bytes) + codec, _ := binary.Uvarint(bytes[n:]) + return codec } // String returns the default string representation of a // Cid. Currently, Base58 is used as the encoding for the // multibase string. -func (c *cid_) String() string { - switch c.version { +func (c CidString) String() string { + switch c.Version() { case 0: - return c.hash.B58String() + return c.Hash().B58String() case 1: - mbstr, err := mbase.Encode(mbase.Base58BTC, c.bytesV1()) + mbstr, err := mbase.Encode(mbase.Base58BTC, c.Bytes()) if err != nil { panic("should not error with hardcoded mbase: " + err.Error()) } @@ -334,72 +340,57 @@ func (c *cid_) String() string { // String returns the string representation of a Cid // encoded is selected base -func (c *cid_) StringOfBase(base mbase.Encoding) (string, error) { - switch c.version { +func (c CidString) StringOfBase(base mbase.Encoding) (string, error) { + switch c.Version() { case 0: if base != mbase.Base58BTC { return "", ErrInvalidEncoding } - return c.hash.B58String(), nil + return c.Hash().B58String(), nil case 1: - return mbase.Encode(base, c.bytesV1()) + return mbase.Encode(base, c.Bytes()) default: panic("not possible to reach this point") } } // Hash returns the multihash contained by a Cid. -func (c *cid_) Hash() mh.Multihash { - return c.hash +func (c CidString) Hash() mh.Multihash { + if c.Version() == 0 { + return mh.Multihash([]byte(c)) + } + + bytes := []byte(c) + // skip version length + _, n1 := binary.Uvarint(bytes) + // skip codec length + _, n2 := binary.Uvarint(bytes[n1:]) + + return mh.Multihash(bytes[n1+n2:]) } // Bytes returns the byte representation of a Cid. // The output of bytes can be parsed back into a Cid // with Cast(). -func (c *cid_) Bytes() []byte { - switch c.version { - case 0: - return c.bytesV0() - case 1: - return c.bytesV1() - default: - panic("not possible to reach this point") - } -} - -func (c *cid_) bytesV0() []byte { - return []byte(c.hash) -} - -func (c *cid_) bytesV1() []byte { - // two 8 bytes (max) numbers plus hash - buf := make([]byte, 2*binary.MaxVarintLen64+len(c.hash)) - n := binary.PutUvarint(buf, c.version) - n += binary.PutUvarint(buf[n:], c.codec) - cn := copy(buf[n:], c.hash) - if cn != len(c.hash) { - panic("copy hash length is inconsistent") - } - - return buf[:n+len(c.hash)] +func (c CidString) Bytes() []byte { + return []byte(c) } // Equals checks that two Cids are the same. // In order for two Cids to be considered equal, the // Version, the Codec and the Multihash must match. -func (c *cid_) Equals(o0 Cid) bool { - o := o0.(*cid_) - return c.codec == o.codec && - c.version == o.version && - bytes.Equal(c.hash, o.hash) +func (c CidString) Equals(c0 Cid) bool { + return c.KeyString() == c0.KeyString() } -func EmptyCid() Cid { - return &cid_{} +// Returns an empty (and invalid) Cid, used primary for UnmarshalJSON +func EmptyCid() *CidString { + c := CidString("") + return &c } // UnmarshalJSON parses the JSON representation of a Cid. -func (c *cid_) UnmarshalJSON(b []byte) error { +func (c *CidString) UnmarshalJSON(b []byte) error { if len(b) < 2 { return fmt.Errorf("invalid cid json blob") } @@ -420,9 +411,8 @@ func (c *cid_) UnmarshalJSON(b []byte) error { return err } - c.version = out.version - c.hash = out.hash - c.codec = out.codec + *c = out.KeyString() + return nil } @@ -432,31 +422,31 @@ func (c *cid_) UnmarshalJSON(b []byte) error { // // Note that this formatting comes from the IPLD specification // (https://github.com/ipld/specs/tree/master/ipld) -func (c *cid_) MarshalJSON() ([]byte, error) { +func (c CidString) MarshalJSON() ([]byte, error) { return []byte(fmt.Sprintf("{\"/\":\"%s\"}", c.String())), nil } // KeyString casts the result of cid.Bytes() as a string, and returns it. -func (c *cid_) KeyString() string { - return string(c.Bytes()) +func (c CidString) KeyString() CidString { + return c } // Loggable returns a Loggable (as defined by // https://godoc.org/github.com/ipfs/go-log). -func (c *cid_) Loggable() map[string]interface{} { +func (c CidString) Loggable() map[string]interface{} { return map[string]interface{}{ "cid": c, } } // Prefix builds and returns a Prefix out of a Cid. -func (c *cid_) Prefix() Prefix { - dec, _ := mh.Decode(c.hash) // assuming we got a valid multiaddr, this will not error +func (c CidString) Prefix() Prefix { + dec, _ := mh.Decode(c.Hash()) // assuming we got a valid multiaddr, this will not error return Prefix{ MhType: dec.Code, MhLength: dec.Length, - Version: c.version, - Codec: c.codec, + Version: uint64(c.Version()), + Codec: c.Type(), } } @@ -473,7 +463,7 @@ type Prefix struct { // Sum uses the information in a prefix to perform a multihash.Sum() // and return a newly constructed Cid with the resulting multihash. -func (p Prefix) Sum(data []byte) (*cid_, error) { +func (p Prefix) Sum(data []byte) (Cid, error) { hash, err := mh.Sum(data, p.MhType, p.MhLength) if err != nil { return nil, err diff --git a/cid_test.go b/cid_test.go index b98b280..21f4dcc 100644 --- a/cid_test.go +++ b/cid_test.go @@ -37,16 +37,16 @@ var tCodecs = map[uint64]string{ DecredTx: "decred-tx", } -func assertEqual(t *testing.T, a, b *cid_) { - if a.codec != b.codec { +func assertEqual(t *testing.T, a, b Cid) { + if a.Type() != b.Type() { t.Fatal("mismatch on type") } - if a.version != b.version { + if a.Version() != b.Version() { t.Fatal("mismatch on version") } - if !bytes.Equal(a.hash, b.hash) { + if !bytes.Equal(a.Hash(), b.Hash()) { t.Fatal("multihash mismatch") } } @@ -77,11 +77,7 @@ func TestBasicMarshaling(t *testing.T) { t.Fatal(err) } - cid := &cid_{ - codec: 7, - version: 1, - hash: h, - } + cid := NewCidV1(7, h) data := cid.Bytes() @@ -107,11 +103,7 @@ func TestBasesMarshaling(t *testing.T) { t.Fatal(err) } - cid := &cid_{ - codec: 7, - version: 1, - hash: h, - } + cid := NewCidV1(7, h) data := cid.Bytes() @@ -170,11 +162,11 @@ func TestV0Handling(t *testing.T) { t.Fatal(err) } - if cid.version != 0 { + if cid.Version() != 0 { t.Fatal("should have gotten version 0 cid") } - if cid.hash.B58String() != old { + if cid.Hash().B58String() != old { t.Fatal("marshaling roundtrip failed") } @@ -281,9 +273,7 @@ func TestPrefixRoundtrip(t *testing.T) { func Test16BytesVarint(t *testing.T) { data := []byte("this is some test content") hash, _ := mh.Sum(data, mh.SHA2_256, -1) - c := NewCidV1(DagCBOR, hash) - - c.codec = 1 << 63 + c := NewCidV1(1 <<63, hash) _ = c.Bytes() } @@ -326,8 +316,8 @@ func TestParse(t *testing.T) { if err != nil { return err } - if cid.version != 0 { - return fmt.Errorf("expected version 0, got %s", string(cid.version)) + if cid.Version() != 0 { + return fmt.Errorf("expected version 0, got %s", string(cid.Version())) } actual := cid.Hash().B58String() if actual != expected { @@ -376,7 +366,7 @@ func TestFromJson(t *testing.T) { cval := "zb2rhhFAEMepUBbGyP1k8tGfz7BSciKXP6GHuUeUsJBaK6cqG" jsoncid := []byte(`{"/":"` + cval + `"}`) c := EmptyCid() - err := json.Unmarshal(jsoncid, &c) + err := json.Unmarshal(jsoncid, c) if err != nil { t.Fatal(err) } diff --git a/set_test.go b/set_test.go new file mode 100644 index 0000000..abcf432 --- /dev/null +++ b/set_test.go @@ -0,0 +1,90 @@ +package cid + +import ( + "fmt" + "testing" + + mh "github.com/multiformats/go-multihash" +) + +func makeCid(i int) Cid { + data := []byte(fmt.Sprintf("this is some test content %d", i)) + hash, _ := mh.Sum(data, mh.SHA2_256, -1) + return NewCidV1(Raw, hash) +} + +func TestSetRemove(t *testing.T) { + s := NewSet() + + c1 := makeCid(1) + s.Add(c1) + + if !s.Has(c1) { + t.Fatal("failed to add cid") + } + + s.Remove(c1) + if s.Has(c1) { + t.Fatal("failed to remove cid") + } + + // make sure this doesn't fail, removing a removed one + s.Remove(c1) +} + +func BenchmarkSetVisit(b *testing.B) { + s := NewSet() + + cids := make([]Cid, b.N) + for i := 0; i < b.N; i++ { + cids[i] = makeCid(i) + } + + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + s.Visit(cids[i]) + // twice to ensure we test the adding of an existing element + s.Visit(cids[i]) + } + if s.Len() != b.N { + b.FailNow() + } +} + +func BenchmarkStringV1(b *testing.B) { + data := []byte("this is some test content") + hash, _ := mh.Sum(data, mh.SHA2_256, -1) + cid := NewCidV1(Raw, hash) + + b.ReportAllocs() + b.ResetTimer() + + count := 0 + for i := 0; i < b.N; i++ { + count += len(cid.String()) + } + if count != 49*b.N { + b.FailNow() + } +} + +// making sure we don't allocate when returning bytes +func BenchmarkBytesV1(b *testing.B) { + data := []byte("this is some test content") + hash, _ := mh.Sum(data, mh.SHA2_256, -1) + cid := NewCidV1(Raw, hash) + + b.ReportAllocs() + b.ResetTimer() + + count := 0 + for i := 0; i < b.N; i++ { + count += len(cid.Bytes()) + count += len([]byte(cid)) + } + if count != 36*2*b.N { + b.FailNow() + } +} From 8bb13c5d8586a829e8d1bb022170764ac18299fc Mon Sep 17 00:00:00 2001 From: Kevin Atkinson Date: Fri, 27 Jul 2018 18:02:02 -0400 Subject: [PATCH 3/5] gx update go-multibase to 0.2.7 --- package.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package.json b/package.json index 4dcadb0..e6ea612 100644 --- a/package.json +++ b/package.json @@ -15,9 +15,9 @@ }, { "author": "whyrusleeping", - "hash": "QmexBtiTTEwwn42Yi6ouKt6VqzpA6wjJgiW1oh9VfaRrup", + "hash": "QmSbvata2WqNkqGtZNg8MR3SKwnB8iQ7vTPJgWqB8bC5kR", "name": "go-multibase", - "version": "0.2.6" + "version": "0.2.7" } ], "gxVersion": "0.8.0", From 601b3d8baf2aebc52a4beeb36b92f9e01fdbfa48 Mon Sep 17 00:00:00 2001 From: Kevin Atkinson Date: Sat, 4 Aug 2018 17:09:32 -0400 Subject: [PATCH 4/5] Add CidWithBase type that is a Cid with an associated multibase. --- cid.go | 95 +++++++++++++++++++++++++++++++++++++++++++++++++---- cid_test.go | 85 ++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 165 insertions(+), 15 deletions(-) diff --git a/cid.go b/cid.go index 40b2728..882e862 100644 --- a/cid.go +++ b/cid.go @@ -38,17 +38,26 @@ type Cid interface { Version() int Type() uint64 String() string + // StringOfBase is deprecated: use WithBase(encoder).String() StringOfBase(mbase.Encoding) (string, error) Hash() mh.Multihash Bytes() []byte Equals(o Cid) bool KeyString() CidString Prefix() Prefix + WithBase(mbase.Encoder) CidWithBase + Base() (mbase.Encoder, bool) } // CidString is a representation of a Cid as a binary string type CidString string +// CidWithBase is a representation of a Cid with a Multibase +type CidWithBase struct { + CidString + encoder mbase.Encoder +} + // UnsupportedVersionString just holds an error message const UnsupportedVersionString = "" @@ -237,12 +246,22 @@ func Decode(v string) (Cid, error) { return NewCidV0(hash), nil } - _, data, err := mbase.Decode(v) + base, data, err := mbase.Decode(v) + if err != nil { + return nil, err + } + + encoder, err := mbase.NewEncoder(base) + if err != nil { + panic(err) // should not happen + } + + c, err := Cast(data) if err != nil { return nil, err } - return Cast(data) + return CidWithBase{c, encoder}, nil } func uvError(read int) error { @@ -319,6 +338,23 @@ func (c CidString) Type() uint64 { return codec } +func (c CidString) WithBase(b mbase.Encoder) CidWithBase { + return CidWithBase{c, b} +} + +func (c CidString) Base() (mbase.Encoder, bool) { + encoder, _ := mbase.NewEncoder(mbase.Base58BTC) + return encoder, false +} + +func (c CidWithBase) Base() (mbase.Encoder, bool) { + if c.Version() == 0 { + encoder, _ := mbase.NewEncoder(mbase.Base58BTC) + return encoder, false + } + return c.encoder, true +} + // String returns the default string representation of a // Cid. Currently, Base58 is used as the encoding for the // multibase string. @@ -338,6 +374,20 @@ func (c CidString) String() string { } } +// String returns the default string representation of a +// Cid. Currently, Base58 is used as the encoding for the +// multibase string. +func (c CidWithBase) String() string { + switch c.Version() { + case 0: + return c.Hash().B58String() + case 1: + return c.encoder.Encode(c.Bytes()) + default: + panic("not possible to reach this point") + } +} + // String returns the string representation of a Cid // encoded is selected base func (c CidString) StringOfBase(base mbase.Encoding) (string, error) { @@ -383,14 +433,40 @@ func (c CidString) Equals(c0 Cid) bool { return c.KeyString() == c0.KeyString() } -// Returns an empty (and invalid) Cid, used primary for UnmarshalJSON -func EmptyCid() *CidString { - c := CidString("") - return &c +// CidPtr represents a Cid that is a pointer and can be used with +// UnmarshalJSON +type CidPtr interface { + Cid + Normalize() Cid +} + +// NewCidPtr returns a pointer to an empty Cid for use in +// json.Unmarshal, it is recommended you call Normalize() after +// json.Unmarshal to avoid unnecessary redirection +func NewCidPtr() CidPtr { + return &CidWithBase{} +} + +func (c *CidWithBase) Normalize() Cid { + if c.Version() == 0 { + return c.KeyString() + } + return *c } // UnmarshalJSON parses the JSON representation of a Cid. func (c *CidString) UnmarshalJSON(b []byte) error { + c2 := &CidWithBase{} + err := c2.UnmarshalJSON(b) + if err != nil { + return err + } + *c = c2.CidString + return nil +} + +// UnmarshalJSON parses the JSON representation of a Cid. +func (c *CidWithBase) UnmarshalJSON(b []byte) error { if len(b) < 2 { return fmt.Errorf("invalid cid json blob") } @@ -411,7 +487,8 @@ func (c *CidString) UnmarshalJSON(b []byte) error { return err } - *c = out.KeyString() + c.CidString = out.KeyString() + c.encoder, _ = out.Base() return nil } @@ -426,6 +503,10 @@ func (c CidString) MarshalJSON() ([]byte, error) { return []byte(fmt.Sprintf("{\"/\":\"%s\"}", c.String())), nil } +func (c CidWithBase) MarshalJSON() ([]byte, error) { + return []byte(fmt.Sprintf("{\"/\":\"%s\"}", c.String())), nil +} + // KeyString casts the result of cid.Bytes() as a string, and returns it. func (c CidString) KeyString() CidString { return c diff --git a/cid_test.go b/cid_test.go index 21f4dcc..95b46ca 100644 --- a/cid_test.go +++ b/cid_test.go @@ -144,6 +144,15 @@ func TestBasesMarshaling(t *testing.T) { } assertEqual(t, cid, out2) + + encoder, err := mbase.NewEncoder(b) + if err != nil { + t.Fatal(err) + } + s2 := cid.WithBase(encoder).String() + if s != s2 { + t.Fatalf("'%s' != '%s'", s, s2) + } } } @@ -173,6 +182,21 @@ func TestV0Handling(t *testing.T) { if cid.String() != old { t.Fatal("marshaling roundtrip failed") } + + new, err := cid.StringOfBase(mbase.Base58BTC) + if err != nil { + t.Fatal(err) + } + if new != old { + t.Fatal("StringOfBase roundtrip failed") + } + encoder, err := mbase.NewEncoder(mbase.Base58BTC) + if err != nil { + t.Fatal(err) + } + if cid.WithBase(encoder).String() != old { + t.Fatal("Encode roundtrip failed") + } } func TestV0ErrorCases(t *testing.T) { @@ -273,7 +297,7 @@ func TestPrefixRoundtrip(t *testing.T) { func Test16BytesVarint(t *testing.T) { data := []byte("this is some test content") hash, _ := mh.Sum(data, mh.SHA2_256, -1) - c := NewCidV1(1 <<63, hash) + c := NewCidV1(1<<63, hash) _ = c.Bytes() } @@ -345,7 +369,7 @@ func TestHexDecode(t *testing.T) { t.Fatal(err) } - if c.String() != "zb2rhhFAEMepUBbGyP1k8tGfz7BSciKXP6GHuUeUsJBaK6cqG" { + if c.String() != hexcid { t.Fatal("hash value failed to round trip decoding from hex") } } @@ -363,21 +387,60 @@ func ExampleDecode() { } func TestFromJson(t *testing.T) { - cval := "zb2rhhFAEMepUBbGyP1k8tGfz7BSciKXP6GHuUeUsJBaK6cqG" + t.Run("cidv0", func(t *testing.T) { + testFromJson(t, "QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n") + }) + t.Run("cidv1", func(t *testing.T) { // must be in default base + testFromJson(t, "zb2rhhFAEMepUBbGyP1k8tGfz7BSciKXP6GHuUeUsJBaK6cqG") + }) +} + +func testFromJson(t *testing.T, cval string) { jsoncid := []byte(`{"/":"` + cval + `"}`) - c := EmptyCid() - err := json.Unmarshal(jsoncid, c) + + c0 := NewCidPtr() + err := json.Unmarshal(jsoncid, c0) if err != nil { t.Fatal(err) } - + c := c0.Normalize() if c.String() != cval { t.Fatal("json parsing failed") } + + var c2 CidString + err = json.Unmarshal(jsoncid, &c2) + if err != nil { + t.Fatal(err) + } + if c2.String() != cval { + t.Fatal("json parsing failed (CidString)") + } + + var c3 CidWithBase + err = json.Unmarshal(jsoncid, &c3) + if err != nil { + t.Fatal(err) + } + if c3.String() != cval { + t.Fatal("json parsing failed (CidWithBase)") + } } func TestJsonRoundTrip(t *testing.T) { - exp, err := Decode("zb2rhhFAEMepUBbGyP1k8tGfz7BSciKXP6GHuUeUsJBaK6cqG") + t.Run("cidv0", func(t *testing.T) { + testJsonRoundTrip(t, "QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n") + }) + t.Run("cidv1", func(t *testing.T) { + testJsonRoundTrip(t, "zb2rhhFAEMepUBbGyP1k8tGfz7BSciKXP6GHuUeUsJBaK6cqG") + }) + t.Run("cidv1-base32", func(t *testing.T) { + testJsonRoundTrip(t, "bafkreie5qrjvaw64n4tjm6hbnm7fnqvcssfed4whsjqxzslbd3jwhsk3mm") + }) +} + +func testJsonRoundTrip(t *testing.T, cval string) { + exp, err := Decode(cval) if err != nil { t.Fatal(err) } @@ -386,9 +449,15 @@ func TestJsonRoundTrip(t *testing.T) { if err != nil { t.Fatal(err) } - actual := EmptyCid() + + actual := NewCidPtr() err = json.Unmarshal(enc, actual) + if !exp.Equals(actual) { t.Fatal("cids not equal for Cid") } + + if actual.String() != cval { + t.Fatal("cid String() does not return same value") + } } From 25758b046c064e5c68f7a776401e58ca8e8fdf0d Mon Sep 17 00:00:00 2001 From: Kevin Atkinson Date: Sat, 4 Aug 2018 21:34:07 -0400 Subject: [PATCH 5/5] Tweaks. --- cid.go | 72 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/cid.go b/cid.go index 882e862..65185f4 100644 --- a/cid.go +++ b/cid.go @@ -45,7 +45,10 @@ type Cid interface { Equals(o Cid) bool KeyString() CidString Prefix() Prefix + // WithBase associates a multibase with the Cid WithBase(mbase.Encoder) CidWithBase + // Base() returns the multibase associated with the Cid and true + // if is defined, otherwise it returns DefaultBase and false Base() (mbase.Encoder, bool) } @@ -55,12 +58,14 @@ type CidString string // CidWithBase is a representation of a Cid with a Multibase type CidWithBase struct { CidString - encoder mbase.Encoder + base mbase.Encoding // -1 if not defined } // UnsupportedVersionString just holds an error message const UnsupportedVersionString = "" +const DefaultBase = mbase.Base58BTC + var ( // ErrVarintBuffSmall means that a buffer passed to the cid parser was not // long enough, or did not contain an invalid cid @@ -232,36 +237,31 @@ func Parse(v interface{}) (Cid, error) { // Decode will also detect and parse CidV0 strings. Strings // starting with "Qm" are considered CidV0 and treated directly // as B58-encoded multihashes. -func Decode(v string) (Cid, error) { +func Decode(v string) (CidWithBase, error) { if len(v) < 2 { - return nil, ErrCidTooShort + return CidWithBase{}, ErrCidTooShort } if len(v) == 46 && v[:2] == "Qm" { hash, err := mh.FromB58String(v) if err != nil { - return nil, err + return CidWithBase{}, err } - return NewCidV0(hash), nil + return CidWithBase{NewCidV0(hash), mbase.Encoding(-1)}, nil } base, data, err := mbase.Decode(v) if err != nil { - return nil, err - } - - encoder, err := mbase.NewEncoder(base) - if err != nil { - panic(err) // should not happen + return CidWithBase{}, err } c, err := Cast(data) if err != nil { - return nil, err + return CidWithBase{}, err } - return CidWithBase{c, encoder}, nil + return CidWithBase{c, base}, nil } func uvError(read int) error { @@ -339,20 +339,23 @@ func (c CidString) Type() uint64 { } func (c CidString) WithBase(b mbase.Encoder) CidWithBase { - return CidWithBase{c, b} + return CidWithBase{c, b.Encoding()} } func (c CidString) Base() (mbase.Encoder, bool) { - encoder, _ := mbase.NewEncoder(mbase.Base58BTC) + encoder, _ := mbase.NewEncoder(DefaultBase) return encoder, false } func (c CidWithBase) Base() (mbase.Encoder, bool) { - if c.Version() == 0 { - encoder, _ := mbase.NewEncoder(mbase.Base58BTC) - return encoder, false + if c.base == -1 { + return c.CidString.Base() + } + encoder, err := mbase.NewEncoder(c.base) + if err != nil { + panic(err) // should not happen } - return c.encoder, true + return encoder, true } // String returns the default string representation of a @@ -363,29 +366,26 @@ func (c CidString) String() string { case 0: return c.Hash().B58String() case 1: - mbstr, err := mbase.Encode(mbase.Base58BTC, c.Bytes()) + mbstr, err := mbase.Encode(DefaultBase, c.Bytes()) if err != nil { - panic("should not error with hardcoded mbase: " + err.Error()) + panic("should not error with default mbase: " + err.Error()) } - return mbstr default: panic("not possible to reach this point") } } -// String returns the default string representation of a -// Cid. Currently, Base58 is used as the encoding for the -// multibase string. +// String returns the string representation of a Cid. func (c CidWithBase) String() string { - switch c.Version() { - case 0: - return c.Hash().B58String() - case 1: - return c.encoder.Encode(c.Bytes()) - default: - panic("not possible to reach this point") + if c.base == -1 || c.Version() == 0 { + return c.CidString.String() + } + mbstr, err := mbase.Encode(c.base, c.Bytes()) + if err != nil { + panic(err) // should not happen } + return mbstr } // String returns the string representation of a Cid @@ -448,8 +448,8 @@ func NewCidPtr() CidPtr { } func (c *CidWithBase) Normalize() Cid { - if c.Version() == 0 { - return c.KeyString() + if c.base == -1 { + return c.CidString } return *c } @@ -487,8 +487,8 @@ func (c *CidWithBase) UnmarshalJSON(b []byte) error { return err } - c.CidString = out.KeyString() - c.encoder, _ = out.Base() + c.CidString = out.CidString + c.base = out.base return nil }