From f48445894e0aa4a5c9f7f8fdf42ae8b5fef0ac74 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Fri, 29 Aug 2025 01:01:29 +0200 Subject: [PATCH 1/9] fix(verifcid): enforce size limit for identity CIDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Limit identity CIDs to 128 bytes maximum to prevent abuse. Identity CIDs inline data directly and without a limit could embed arbitrary amounts, potentially being used by poorly written clients to echo back big amounts of unnecessary data. The limit matches the existing MaxDigestSize for cryptographic functions. Identity CIDs lack integrity verification and should only be used in controlled contexts like raw leaves when the size of data is smaller than a regular CID. BREAKING CHANGE: CIDs with identity multihash digests longer than MaxDigestSize (128 bytes) now produce ErrAboveMaxDigestSize error. Also renamed errors for consistency (old names deprecated but still work): - ErrBelowMinimumHashLength → ErrBelowMinDigestSize - ErrAboveMaximumHashLength → ErrAboveMaxDigestSize --- CHANGELOG.md | 9 ++ bitswap/internal/defaults/defaults.go | 7 +- blockservice/blockservice_test.go | 30 +++++ gateway/gateway_test.go | 33 +++++- gateway/handler.go | 8 ++ verifcid/allowlist_test.go | 6 +- verifcid/cid.go | 27 +++-- verifcid/cid_test.go | 157 ++++++++++++++++++++++++++ 8 files changed, 259 insertions(+), 18 deletions(-) create mode 100644 verifcid/cid_test.go diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f83b5e59..b1085556f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,12 +18,21 @@ The following emojis are used to highlight certain changes: ### Changed +- `verifcid`: Made digest size constants public: `MinDigestSize` (20 bytes) and `MaxDigestSize` (128 bytes) + - Renamed errors for consistency (old names are deprecated but still available): + - `ErrBelowMinimumHashLength` → `ErrBelowMinDigestSize` (old name deprecated) + - `ErrAboveMaximumHashLength` → `ErrAboveMaxDigestSize` (old name deprecated) + ### Removed ### Fixed ### Security +- `verifcid`: Now enforces maximum size limit of 128 bytes for identity CIDs to prevent abuse. + - 🛠 Attempts to read CIDs with identity multihash digests longer than `MaxDigestSize` will now produce `ErrAboveMaxDigestSize` error. + - Identity CIDs can inline data directly, and without a size limit, they could embed arbitrary amounts of data. Limiting the size also protects gateways from poorly written clients that might send absurdly big data to the gateway encoded as identity CIDs only to retrieve it back. Note that identity CIDs do not provide integrity verification, making them vulnerable to bit flips. They should only be used in controlled contexts like raw leaves of a larger DAG. The limit matches the already existing `MaxDigestSize` that was enforced for regular cryptographic functions. + ## [v0.34.0] diff --git a/bitswap/internal/defaults/defaults.go b/bitswap/internal/defaults/defaults.go index 2c5cc3668..f61052ca3 100644 --- a/bitswap/internal/defaults/defaults.go +++ b/bitswap/internal/defaults/defaults.go @@ -3,6 +3,8 @@ package defaults import ( "encoding/binary" "time" + + "github.com/ipfs/boxo/verifcid" ) const ( @@ -27,9 +29,8 @@ const ( // Maximum size of the wantlist we are willing to keep in memory. MaxQueuedWantlistEntiresPerPeer = 1024 - // Copied from github.com/ipfs/go-verifcid#maximumHashLength - // FIXME: expose this in go-verifcid. - MaximumHashLength = 128 + // MaximumHashLength is now exposed from verifcid.MaxDigestSize + MaximumHashLength = verifcid.MaxDigestSize MaximumAllowedCid = binary.MaxVarintLen64*4 + MaximumHashLength // RebroadcastDelay is the default delay to trigger broadcast of diff --git a/blockservice/blockservice_test.go b/blockservice/blockservice_test.go index 39c4b6e4b..bbb249d1a 100644 --- a/blockservice/blockservice_test.go +++ b/blockservice/blockservice_test.go @@ -1,6 +1,7 @@ package blockservice import ( + "bytes" "context" "testing" @@ -288,6 +289,35 @@ func TestAllowlist(t *testing.T) { check(NewSession(ctx, blockservice).GetBlock) } +func TestIdentityHashSizeLimit(t *testing.T) { + a := assert.New(t) + ctx := context.Background() + bs := blockstore.NewBlockstore(dssync.MutexWrap(ds.NewMapDatastore())) + blockservice := New(bs, nil) + + // Create identity CID at the MaxDigestSize limit (should be valid) + validData := bytes.Repeat([]byte("a"), verifcid.MaxDigestSize) + validHash, err := multihash.Sum(validData, multihash.IDENTITY, -1) + a.NoError(err) + validCID := cid.NewCidV1(cid.Raw, validHash) + + // Create identity CID over the MaxDigestSize limit (should be rejected) + invalidData := bytes.Repeat([]byte("b"), verifcid.MaxDigestSize+1) + invalidHash, err := multihash.Sum(invalidData, multihash.IDENTITY, -1) + a.NoError(err) + invalidCID := cid.NewCidV1(cid.Raw, invalidHash) + + // Valid identity CID should work (though block won't be found) + _, err = blockservice.GetBlock(ctx, validCID) + a.Error(err) + a.True(ipld.IsNotFound(err), "expected not found error for valid identity CID") + + // Invalid identity CID should fail validation + _, err = blockservice.GetBlock(ctx, invalidCID) + a.Error(err) + a.ErrorIs(err, verifcid.ErrAboveMaxDigestSize) +} + type fakeIsNewSessionCreateExchange struct { ses exchange.Fetcher newSessionWasCalled bool diff --git a/gateway/gateway_test.go b/gateway/gateway_test.go index 41833ceca..f2f95e1ea 100644 --- a/gateway/gateway_test.go +++ b/gateway/gateway_test.go @@ -1,6 +1,7 @@ package gateway import ( + "bytes" "context" "errors" "fmt" @@ -13,8 +14,10 @@ import ( "github.com/ipfs/boxo/namesys" "github.com/ipfs/boxo/path" "github.com/ipfs/boxo/path/resolver" + "github.com/ipfs/boxo/verifcid" "github.com/ipfs/go-cid" ipld "github.com/ipfs/go-ipld-format" + mh "github.com/multiformats/go-multihash" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -51,6 +54,25 @@ func TestGatewayGet(t *testing.T) { // detection is platform dependent. backend.namesys["/ipns/example.man"] = newMockNamesysItem(k, 0) + // Create identity CIDs for testing + // MaxDigestSize bytes (at the limit, should be valid) + validIdentityData := bytes.Repeat([]byte("a"), verifcid.MaxDigestSize) + validIdentityHash, err := mh.Sum(validIdentityData, mh.IDENTITY, -1) + require.NoError(t, err) + validIdentityCID := cid.NewCidV1(cid.Raw, validIdentityHash) + + // MaxDigestSize+1 bytes (over the limit, should be rejected) + invalidIdentityData := bytes.Repeat([]byte("b"), verifcid.MaxDigestSize+1) + invalidIdentityHash, err := mh.Sum(invalidIdentityData, mh.IDENTITY, -1) + require.NoError(t, err) + invalidIdentityCID := cid.NewCidV1(cid.Raw, invalidIdentityHash) + + // Short identity CID (below MinDigestSize, should still be valid) + shortIdentityData := []byte("hello") + shortIdentityHash, err := mh.Sum(shortIdentityData, mh.IDENTITY, -1) + require.NoError(t, err) + shortIdentityCID := cid.NewCidV1(cid.Raw, shortIdentityHash) + for _, test := range []struct { host string path string @@ -62,6 +84,9 @@ func TestGatewayGet(t *testing.T) { {"127.0.0.1:8080", "/ipns", http.StatusBadRequest, "invalid path \"/ipns/\": path does not have enough components\n"}, {"127.0.0.1:8080", "/" + k.RootCid().String(), http.StatusNotFound, "404 page not found\n"}, {"127.0.0.1:8080", "/ipfs/this-is-not-a-cid", http.StatusBadRequest, "invalid path \"/ipfs/this-is-not-a-cid\": invalid cid: illegal base32 data at input byte 3\n"}, + {"127.0.0.1:8080", "/ipfs/" + validIdentityCID.String(), http.StatusOK, string(validIdentityData)}, // Valid identity CID returns the inlined data + {"127.0.0.1:8080", "/ipfs/" + invalidIdentityCID.String(), http.StatusBadRequest, "multihash digest must be at most 128 bytes long\n"}, // Invalid identity CID, over size limit + {"127.0.0.1:8080", "/ipfs/" + shortIdentityCID.String(), http.StatusOK, "hello"}, // Short identity CID (below MinDigestSize) should work {"127.0.0.1:8080", k.String(), http.StatusOK, "fnord"}, {"127.0.0.1:8080", "/ipns/nxdomain.example.com", http.StatusInternalServerError, "failed to resolve /ipns/nxdomain.example.com: " + namesys.ErrResolveFailed.Error() + "\n"}, {"127.0.0.1:8080", "/ipns/%0D%0A%0D%0Ahello", http.StatusInternalServerError, "failed to resolve /ipns/\\r\\n\\r\\nhello: " + namesys.ErrResolveFailed.Error() + "\n"}, @@ -87,8 +112,12 @@ func TestGatewayGet(t *testing.T) { require.Equal(t, "text/plain; charset=utf-8", resp.Header.Get("Content-Type")) body, err := io.ReadAll(resp.Body) require.NoError(t, err) - require.Equal(t, test.status, resp.StatusCode, "body", body) - require.Equal(t, test.text, string(body)) + require.Equal(t, test.status, resp.StatusCode, "body", string(body)) + + // Check body content if expected text is provided + if test.text != "" { + require.Equal(t, test.text, string(body)) + } }) } } diff --git a/gateway/handler.go b/gateway/handler.go index 03ff2a2c7..0145172dd 100644 --- a/gateway/handler.go +++ b/gateway/handler.go @@ -18,6 +18,7 @@ import ( "github.com/ipfs/boxo/gateway/assets" "github.com/ipfs/boxo/ipns" "github.com/ipfs/boxo/path" + "github.com/ipfs/boxo/verifcid" cid "github.com/ipfs/go-cid" logging "github.com/ipfs/go-log/v2" "github.com/libp2p/go-libp2p/core/peer" @@ -313,6 +314,13 @@ func (i *handler) getOrHeadHandler(w http.ResponseWriter, r *http.Request) { } } + // Validate the root CID before any backend calls + rootCid := rq.immutablePath.RootCid() + if err := verifcid.ValidateCid(verifcid.DefaultAllowlist, rootCid); err != nil { + i.webError(w, r, err, http.StatusBadRequest) + return + } + // CAR response format can be handled now, since (1) it explicitly needs the // full immutable path to include in the CAR, and (2) has custom If-None-Match // header handling due to custom ETag. diff --git a/verifcid/allowlist_test.go b/verifcid/allowlist_test.go index b6057502f..a3d3c7a77 100644 --- a/verifcid/allowlist_test.go +++ b/verifcid/allowlist_test.go @@ -42,7 +42,7 @@ func TestDefaultAllowList(t *testing.T) { err error }{ {mhcid(mh.SHA2_256, 32), nil}, - {mhcid(mh.SHA2_256, 16), ErrBelowMinimumHashLength}, + {mhcid(mh.SHA2_256, 16), ErrBelowMinDigestSize}, {mhcid(mh.MURMUR3X64_64, 4), ErrPossiblyInsecureHashFunction}, {mhcid(mh.BLAKE3, 32), nil}, {mhcid(mh.BLAKE3, 69), nil}, @@ -61,7 +61,7 @@ func TestDefaultAllowList(t *testing.T) { if err != nil { t.Fatalf("failed to produce a multihash from the long blake3 hash: %v", err) } - if ValidateCid(allowlist, cid.NewCidV1(cid.DagCBOR, longBlake3Mh)) != ErrAboveMaximumHashLength { - t.Errorf("a CID that was longer than the maximum hash length did not error with ErrAboveMaximumHashLength") + if ValidateCid(allowlist, cid.NewCidV1(cid.DagCBOR, longBlake3Mh)) != ErrAboveMaxDigestSize { + t.Errorf("a CID that was longer than the maximum hash length did not error with ErrAboveMaxDigestSize") } } diff --git a/verifcid/cid.go b/verifcid/cid.go index f3baf3343..756059a05 100644 --- a/verifcid/cid.go +++ b/verifcid/cid.go @@ -8,15 +8,22 @@ import ( mh "github.com/multiformats/go-multihash" ) +const ( + // MinDigestSize is the minimum size for hash digests (except for identity hashes) + MinDigestSize = 20 + // MaxDigestSize is the maximum size for all digest types (including identity) + MaxDigestSize = 128 +) + var ( ErrPossiblyInsecureHashFunction = errors.New("potentially insecure hash functions not allowed") - ErrBelowMinimumHashLength = fmt.Errorf("hashes must be at least %d bytes long", minimumHashLength) - ErrAboveMaximumHashLength = fmt.Errorf("hashes must be at most %d bytes long", maximumHashLength) -) + ErrBelowMinDigestSize = fmt.Errorf("multihash digest must be at least %d bytes long", MinDigestSize) + ErrAboveMaxDigestSize = fmt.Errorf("multihash digest must be at most %d bytes long", MaxDigestSize) -const ( - minimumHashLength = 20 - maximumHashLength = 128 + // Deprecated: Use ErrBelowMinDigestSize instead + ErrBelowMinimumHashLength = ErrBelowMinDigestSize + // Deprecated: Use ErrAboveMaxDigestSize instead + ErrAboveMaximumHashLength = ErrAboveMaxDigestSize ) // ValidateCid validates multihash allowance behind given CID. @@ -26,12 +33,12 @@ func ValidateCid(allowlist Allowlist, c cid.Cid) error { return ErrPossiblyInsecureHashFunction } - if pref.MhType != mh.IDENTITY && pref.MhLength < minimumHashLength { - return ErrBelowMinimumHashLength + if pref.MhType != mh.IDENTITY && pref.MhLength < MinDigestSize { + return ErrBelowMinDigestSize } - if pref.MhType != mh.IDENTITY && pref.MhLength > maximumHashLength { - return ErrAboveMaximumHashLength + if pref.MhLength > MaxDigestSize { + return ErrAboveMaxDigestSize } return nil diff --git a/verifcid/cid_test.go b/verifcid/cid_test.go new file mode 100644 index 000000000..6a744f67e --- /dev/null +++ b/verifcid/cid_test.go @@ -0,0 +1,157 @@ +package verifcid + +import ( + "bytes" + "testing" + + "github.com/ipfs/go-cid" + mh "github.com/multiformats/go-multihash" +) + +func TestValidateCid(t *testing.T) { + allowlist := DefaultAllowlist + + tests := []struct { + name string + data []byte + mhType uint64 + wantErr error + }{ + { + name: "identity at max size", + data: bytes.Repeat([]byte("a"), MaxDigestSize), + mhType: mh.IDENTITY, + wantErr: nil, + }, + { + name: "identity over max size", + data: bytes.Repeat([]byte("b"), MaxDigestSize+1), + mhType: mh.IDENTITY, + wantErr: ErrAboveMaxDigestSize, + }, + { + name: "identity at 64 bytes", + data: bytes.Repeat([]byte("c"), 64), + mhType: mh.IDENTITY, + wantErr: nil, + }, + { + name: "identity with 1KB data", + data: bytes.Repeat([]byte("d"), 1024), + mhType: mh.IDENTITY, + wantErr: ErrAboveMaxDigestSize, + }, + { + name: "small identity", + data: []byte("hello"), + mhType: mh.IDENTITY, + wantErr: nil, + }, + { + name: "empty identity", + data: []byte{}, + mhType: mh.IDENTITY, + wantErr: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + hash, err := mh.Sum(tt.data, tt.mhType, -1) + if err != nil { + t.Fatal(err) + } + c := cid.NewCidV1(cid.Raw, hash) + + err = ValidateCid(allowlist, c) + if err != tt.wantErr { + t.Errorf("ValidateCid() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } + + t.Run("identity hash extracts correct data", func(t *testing.T) { + // Verify we can extract the original data from identity CID + originalData := []byte("inline data in CID") + hash, err := mh.Sum(originalData, mh.IDENTITY, -1) + if err != nil { + t.Fatal(err) + } + c := cid.NewCidV1(cid.Raw, hash) + + // Validate the CID + err = ValidateCid(allowlist, c) + if err != nil { + t.Errorf("expected valid identity CID, got error: %v", err) + } + + // Extract and verify the data + decoded, err := mh.Decode(c.Hash()) + if err != nil { + t.Fatal(err) + } + if decoded.Code != mh.IDENTITY { + t.Errorf("expected identity hash code, got: %v", decoded.Code) + } + if !bytes.Equal(decoded.Digest, originalData) { + t.Errorf("extracted data doesn't match original") + } + }) + + t.Run("regular hash still respects minimum", func(t *testing.T) { + // Create a SHA256 hash with less than MinDigestSize bytes (truncated) + data := []byte("test data") + fullHash, err := mh.Sum(data, mh.SHA2_256, -1) + if err != nil { + t.Fatal(err) + } + + // Manually create a truncated hash (19 bytes) + decoded, err := mh.Decode(fullHash) + if err != nil { + t.Fatal(err) + } + truncatedDigest := decoded.Digest[:MinDigestSize-1] + truncatedHash, err := mh.Encode(truncatedDigest, mh.SHA2_256) + if err != nil { + t.Fatal(err) + } + + c := cid.NewCidV1(cid.Raw, truncatedHash) + + err = ValidateCid(allowlist, c) + if err != ErrBelowMinDigestSize { + t.Errorf("expected ErrBelowMinDigestSize for truncated SHA256, got: %v", err) + } + }) + + t.Run("identity hash exempt from minimum size", func(t *testing.T) { + // Test that identity hashes below MinDigestSize are still valid + // This confirms identity hashes are exempt from the minimum size requirement + smallData := []byte("tiny") // Only 4 bytes, well below MinDigestSize of 20 + hash, err := mh.Sum(smallData, mh.IDENTITY, -1) + if err != nil { + t.Fatal(err) + } + c := cid.NewCidV1(cid.Raw, hash) + + // Should pass validation despite being below MinDigestSize + err = ValidateCid(allowlist, c) + if err != nil { + t.Errorf("identity CID with %d bytes should be valid (exempt from minimum), got error: %v", len(smallData), err) + } + + // Also test with exactly MinDigestSize-1 bytes for clarity + dataAt19Bytes := bytes.Repeat([]byte("x"), MinDigestSize-1) + hash19, err := mh.Sum(dataAt19Bytes, mh.IDENTITY, -1) + if err != nil { + t.Fatal(err) + } + c19 := cid.NewCidV1(cid.Raw, hash19) + + err = ValidateCid(allowlist, c19) + if err != nil { + t.Errorf("identity CID with %d bytes should be valid (exempt from minimum), got error: %v", MinDigestSize-1, err) + } + }) +} From 8206238460d467be9977ec18511178900dfed3b6 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Fri, 29 Aug 2025 21:06:21 +0200 Subject: [PATCH 2/9] refactor(verifcid): rename digest size errors for clarity - add ErrDigestTooSmall and ErrDigestTooLarge as primary errors - add ErrIdentityDigestTooLarge for identity-specific overflow - update ValidateCid to return specific error for identity overflow - keep ErrBelowMinimumHashLength and ErrAboveMaximumHashLength as deprecated aliases --- CHANGELOG.md | 9 +++++---- gateway/gateway_test.go | 6 +++--- verifcid/allowlist_test.go | 6 +++--- verifcid/cid.go | 22 +++++++++++++--------- verifcid/cid_test.go | 8 ++++---- 5 files changed, 28 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b1085556f..8abc05d44 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,9 +19,10 @@ The following emojis are used to highlight certain changes: ### Changed - `verifcid`: Made digest size constants public: `MinDigestSize` (20 bytes) and `MaxDigestSize` (128 bytes) - - Renamed errors for consistency (old names are deprecated but still available): - - `ErrBelowMinimumHashLength` → `ErrBelowMinDigestSize` (old name deprecated) - - `ErrAboveMaximumHashLength` → `ErrAboveMaxDigestSize` (old name deprecated) +- `verifcid`: 🛠 Renamed errors for clarity: + - Added `ErrDigestTooSmall` and `ErrDigestTooLarge` as the new primary errors + - Added `ErrIdentityDigestTooLarge` for identity-specific size violations + - `ErrBelowMinimumHashLength` and `ErrAboveMaximumHashLength` remain as deprecated aliases pointing to the new errors ### Removed @@ -30,7 +31,7 @@ The following emojis are used to highlight certain changes: ### Security - `verifcid`: Now enforces maximum size limit of 128 bytes for identity CIDs to prevent abuse. - - 🛠 Attempts to read CIDs with identity multihash digests longer than `MaxDigestSize` will now produce `ErrAboveMaxDigestSize` error. + - 🛠 Attempts to read CIDs with identity multihash digests longer than `MaxDigestSize` will now produce `ErrIdentityDigestTooLarge` error. - Identity CIDs can inline data directly, and without a size limit, they could embed arbitrary amounts of data. Limiting the size also protects gateways from poorly written clients that might send absurdly big data to the gateway encoded as identity CIDs only to retrieve it back. Note that identity CIDs do not provide integrity verification, making them vulnerable to bit flips. They should only be used in controlled contexts like raw leaves of a larger DAG. The limit matches the already existing `MaxDigestSize` that was enforced for regular cryptographic functions. diff --git a/gateway/gateway_test.go b/gateway/gateway_test.go index f2f95e1ea..f09638c9e 100644 --- a/gateway/gateway_test.go +++ b/gateway/gateway_test.go @@ -84,9 +84,9 @@ func TestGatewayGet(t *testing.T) { {"127.0.0.1:8080", "/ipns", http.StatusBadRequest, "invalid path \"/ipns/\": path does not have enough components\n"}, {"127.0.0.1:8080", "/" + k.RootCid().String(), http.StatusNotFound, "404 page not found\n"}, {"127.0.0.1:8080", "/ipfs/this-is-not-a-cid", http.StatusBadRequest, "invalid path \"/ipfs/this-is-not-a-cid\": invalid cid: illegal base32 data at input byte 3\n"}, - {"127.0.0.1:8080", "/ipfs/" + validIdentityCID.String(), http.StatusOK, string(validIdentityData)}, // Valid identity CID returns the inlined data - {"127.0.0.1:8080", "/ipfs/" + invalidIdentityCID.String(), http.StatusBadRequest, "multihash digest must be at most 128 bytes long\n"}, // Invalid identity CID, over size limit - {"127.0.0.1:8080", "/ipfs/" + shortIdentityCID.String(), http.StatusOK, "hello"}, // Short identity CID (below MinDigestSize) should work + {"127.0.0.1:8080", "/ipfs/" + validIdentityCID.String(), http.StatusOK, string(validIdentityData)}, // Valid identity CID returns the inlined data + {"127.0.0.1:8080", "/ipfs/" + invalidIdentityCID.String(), http.StatusBadRequest, "identity digest too large: must be at most 128 bytes\n"}, // Invalid identity CID, over size limit + {"127.0.0.1:8080", "/ipfs/" + shortIdentityCID.String(), http.StatusOK, "hello"}, // Short identity CID (below MinDigestSize) should work {"127.0.0.1:8080", k.String(), http.StatusOK, "fnord"}, {"127.0.0.1:8080", "/ipns/nxdomain.example.com", http.StatusInternalServerError, "failed to resolve /ipns/nxdomain.example.com: " + namesys.ErrResolveFailed.Error() + "\n"}, {"127.0.0.1:8080", "/ipns/%0D%0A%0D%0Ahello", http.StatusInternalServerError, "failed to resolve /ipns/\\r\\n\\r\\nhello: " + namesys.ErrResolveFailed.Error() + "\n"}, diff --git a/verifcid/allowlist_test.go b/verifcid/allowlist_test.go index a3d3c7a77..884c42b28 100644 --- a/verifcid/allowlist_test.go +++ b/verifcid/allowlist_test.go @@ -42,7 +42,7 @@ func TestDefaultAllowList(t *testing.T) { err error }{ {mhcid(mh.SHA2_256, 32), nil}, - {mhcid(mh.SHA2_256, 16), ErrBelowMinDigestSize}, + {mhcid(mh.SHA2_256, 16), ErrDigestTooSmall}, {mhcid(mh.MURMUR3X64_64, 4), ErrPossiblyInsecureHashFunction}, {mhcid(mh.BLAKE3, 32), nil}, {mhcid(mh.BLAKE3, 69), nil}, @@ -61,7 +61,7 @@ func TestDefaultAllowList(t *testing.T) { if err != nil { t.Fatalf("failed to produce a multihash from the long blake3 hash: %v", err) } - if ValidateCid(allowlist, cid.NewCidV1(cid.DagCBOR, longBlake3Mh)) != ErrAboveMaxDigestSize { - t.Errorf("a CID that was longer than the maximum hash length did not error with ErrAboveMaxDigestSize") + if ValidateCid(allowlist, cid.NewCidV1(cid.DagCBOR, longBlake3Mh)) != ErrDigestTooLarge { + t.Errorf("a CID that was longer than the maximum hash length did not error with ErrDigestTooLarge") } } diff --git a/verifcid/cid.go b/verifcid/cid.go index 756059a05..6e7effc56 100644 --- a/verifcid/cid.go +++ b/verifcid/cid.go @@ -17,13 +17,14 @@ const ( var ( ErrPossiblyInsecureHashFunction = errors.New("potentially insecure hash functions not allowed") - ErrBelowMinDigestSize = fmt.Errorf("multihash digest must be at least %d bytes long", MinDigestSize) - ErrAboveMaxDigestSize = fmt.Errorf("multihash digest must be at most %d bytes long", MaxDigestSize) - - // Deprecated: Use ErrBelowMinDigestSize instead - ErrBelowMinimumHashLength = ErrBelowMinDigestSize - // Deprecated: Use ErrAboveMaxDigestSize instead - ErrAboveMaximumHashLength = ErrAboveMaxDigestSize + ErrDigestTooSmall = fmt.Errorf("digest too small: must be at least %d bytes", MinDigestSize) + ErrDigestTooLarge = fmt.Errorf("digest too large: must be at most %d bytes", MaxDigestSize) + ErrIdentityDigestTooLarge = fmt.Errorf("identity digest too large: must be at most %d bytes", MaxDigestSize) + + // Deprecated: Use ErrDigestTooSmall instead + ErrBelowMinimumHashLength = ErrDigestTooSmall + // Deprecated: Use ErrDigestTooLarge instead + ErrAboveMaximumHashLength = ErrDigestTooLarge ) // ValidateCid validates multihash allowance behind given CID. @@ -34,11 +35,14 @@ func ValidateCid(allowlist Allowlist, c cid.Cid) error { } if pref.MhType != mh.IDENTITY && pref.MhLength < MinDigestSize { - return ErrBelowMinDigestSize + return ErrDigestTooSmall } if pref.MhLength > MaxDigestSize { - return ErrAboveMaxDigestSize + if pref.MhType == mh.IDENTITY { + return ErrIdentityDigestTooLarge + } + return ErrDigestTooLarge } return nil diff --git a/verifcid/cid_test.go b/verifcid/cid_test.go index 6a744f67e..df061fd7d 100644 --- a/verifcid/cid_test.go +++ b/verifcid/cid_test.go @@ -27,7 +27,7 @@ func TestValidateCid(t *testing.T) { name: "identity over max size", data: bytes.Repeat([]byte("b"), MaxDigestSize+1), mhType: mh.IDENTITY, - wantErr: ErrAboveMaxDigestSize, + wantErr: ErrIdentityDigestTooLarge, }, { name: "identity at 64 bytes", @@ -39,7 +39,7 @@ func TestValidateCid(t *testing.T) { name: "identity with 1KB data", data: bytes.Repeat([]byte("d"), 1024), mhType: mh.IDENTITY, - wantErr: ErrAboveMaxDigestSize, + wantErr: ErrIdentityDigestTooLarge, }, { name: "small identity", @@ -120,8 +120,8 @@ func TestValidateCid(t *testing.T) { c := cid.NewCidV1(cid.Raw, truncatedHash) err = ValidateCid(allowlist, c) - if err != ErrBelowMinDigestSize { - t.Errorf("expected ErrBelowMinDigestSize for truncated SHA256, got: %v", err) + if err != ErrDigestTooSmall { + t.Errorf("expected ErrDigestTooSmall for truncated SHA256, got: %v", err) } }) From 19189a5b8b9fd94ecbc2f81952623b9bdef181ec Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Sat, 30 Aug 2025 01:13:28 +0200 Subject: [PATCH 3/9] fix(unixfs/mod): prevent identity CID overflow in DagModifier - prevent creation of identity CIDs exceeding verifcid.MaxDigestSize - preserve raw node codec when modifying data under chunker threshold - convert RawNode to UnixFS structure for append operations - inherit full CID prefix from parent directories in MFS When modifying nodes with identity CIDs, automatically switch to cryptographic hash when data would exceed the size limit. RawNodes that need to grow beyond a single block are converted to UnixFS file structures with the original raw data as the first leaf. Fixes ipfs/kubo#6011 --- CHANGELOG.md | 6 + blockservice/blockservice_test.go | 2 +- ipld/unixfs/mod/dagmodifier.go | 144 +++++- ipld/unixfs/mod/dagmodifier_test.go | 666 ++++++++++++++++++++++++++++ mfs/file.go | 25 ++ 5 files changed, 835 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8abc05d44..5583b120d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,12 @@ The following emojis are used to highlight certain changes: ### Fixed +- `ipld/unixfs/mod`: + - `DagModifier` now correctly preserves raw node codec when modifying data under the chunker threshold, instead of incorrectly forcing everything to dag-pb + - `DagModifier` prevents creation of identity CIDs exceeding `verifcid.MaxDigestSize` limit when modifying data, automatically switching to proper cryptographic hash while preserving small identity CIDs + - `DagModifier` now supports appending data to a `RawNode` by automatically converting it into a UnixFS file structure where the original `RawNode` becomes the first leaf block, fixing previously impossible append operations that would fail with "expected protobuf dag node" errors +- `mfs`: Files with identity CIDs now properly inherit full CID prefix from parent directories (version, codec, hash type, length), not just hash type + ### Security - `verifcid`: Now enforces maximum size limit of 128 bytes for identity CIDs to prevent abuse. diff --git a/blockservice/blockservice_test.go b/blockservice/blockservice_test.go index bbb249d1a..c12d59d02 100644 --- a/blockservice/blockservice_test.go +++ b/blockservice/blockservice_test.go @@ -315,7 +315,7 @@ func TestIdentityHashSizeLimit(t *testing.T) { // Invalid identity CID should fail validation _, err = blockservice.GetBlock(ctx, invalidCID) a.Error(err) - a.ErrorIs(err, verifcid.ErrAboveMaxDigestSize) + a.ErrorIs(err, verifcid.ErrIdentityDigestTooLarge) } type fakeIsNewSessionCreateExchange struct { diff --git a/ipld/unixfs/mod/dagmodifier.go b/ipld/unixfs/mod/dagmodifier.go index e069587c5..1653921a8 100644 --- a/ipld/unixfs/mod/dagmodifier.go +++ b/ipld/unixfs/mod/dagmodifier.go @@ -1,5 +1,24 @@ // Package mod provides DAG modification utilities to, for example, // insert additional nodes in a unixfs DAG or truncate them. +// +// Identity CID Handling: +// +// This package automatically handles identity CIDs (multihash code 0x00) which +// inline data directly in the CID. When modifying nodes with identity CIDs, +// the package ensures the [verifcid.MaxDigestSize] limit is respected by +// automatically switching to a cryptographic hash function when the encoded +// data would exceed this limit. The replacement hash function is chosen from +// (in order): the configured Prefix if non-identity, or [util.DefaultIpfsHash] +// as a fallback. This prevents creation of unacceptably big identity CIDs +// while preserving them for small data that fits within the limit. +// +// RawNode Growth: +// +// When appending data to a RawNode that would require multiple blocks, the +// node is automatically converted to a UnixFS file structure. This is necessary +// because RawNodes cannot have child nodes. The original raw data remains +// accessible via its original CID, while the new structure provides full +// UnixFS capabilities. package mod import ( @@ -17,8 +36,11 @@ import ( help "github.com/ipfs/boxo/ipld/unixfs/importer/helpers" trickle "github.com/ipfs/boxo/ipld/unixfs/importer/trickle" uio "github.com/ipfs/boxo/ipld/unixfs/io" + "github.com/ipfs/boxo/util" + "github.com/ipfs/boxo/verifcid" cid "github.com/ipfs/go-cid" ipld "github.com/ipfs/go-ipld-format" + mh "github.com/multiformats/go-multihash" ) // Common errors @@ -54,8 +76,8 @@ type DagModifier struct { } // NewDagModifier returns a new DagModifier, the Cid prefix for newly -// created nodes will be inhered from the passed in node. If the Cid -// version if not 0 raw leaves will also be enabled. The Prefix and +// created nodes will be inherited from the passed in node. If the Cid +// version is not 0 raw leaves will also be enabled. The Prefix and // RawLeaves options can be overridden by changing them after the call. func NewDagModifier(ctx context.Context, from ipld.Node, serv ipld.DAGService, spl chunker.SplitterGen) (*DagModifier, error) { switch from.(type) { @@ -66,7 +88,11 @@ func NewDagModifier(ctx context.Context, from ipld.Node, serv ipld.DAGService, s } prefix := from.Cid().Prefix() - prefix.Codec = cid.DagProtobuf + // Preserve the original codec - don't force everything to DagProtobuf + // Only ProtoNodes with UnixFS data should use DagProtobuf + if _, ok := from.(*mdag.ProtoNode); ok { + prefix.Codec = cid.DagProtobuf + } rawLeaves := false if prefix.Version > 0 { rawLeaves = true @@ -244,6 +270,41 @@ func (dm *DagModifier) Sync() error { return nil } +// safePrefixForSize checks if using identity hash would exceed the size limit +// and returns an appropriate CID prefix. If the data fits within identity hash +// limits, returns the original prefix and false. Otherwise returns a prefix with +// a proper cryptographic hash function and true to indicate the prefix changed. +func (dm *DagModifier) safePrefixForSize(originalPrefix cid.Prefix, dataSize int) (cid.Prefix, bool) { + if originalPrefix.MhType != mh.IDENTITY || dataSize <= verifcid.MaxDigestSize { + return originalPrefix, false + } + + // Identity would overflow, use configured prefix + if dm.Prefix.MhType != mh.IDENTITY { + return dm.Prefix, true + } + + // Configured prefix is also identity, build a safe fallback + newPrefix := dm.Prefix // Copy all fields (Version, Codec, MhType, MhLength) + newPrefix.MhType = util.DefaultIpfsHash + newPrefix.MhLength = -1 // Only reset length when using fallback hash + return newPrefix, true +} + +// ensureSafeProtoNodeHash checks if a ProtoNode's identity hash would exceed +// the size limit and updates its CID builder if necessary. +func (dm *DagModifier) ensureSafeProtoNodeHash(node *mdag.ProtoNode) { + // CidBuilder() returns the cid.Builder which for V1CidPrefix is the Prefix itself + if prefix, ok := node.CidBuilder().(cid.Prefix); ok && prefix.MhType == mh.IDENTITY { + encodedData, _ := node.EncodeProtobuf(false) + if newPrefix, changed := dm.safePrefixForSize(prefix, len(encodedData)); changed { + // Only update the CID builder if the prefix changed + // to avoid unnecessary cache invalidation in ProtoNode + node.SetCidBuilder(newPrefix) + } + } +} + // modifyDag writes the data in 'dm.wrBuf' over the data in 'node' starting at 'offset' // returns the new key of the passed in node. func (dm *DagModifier) modifyDag(n ipld.Node, offset uint64) (cid.Cid, error) { @@ -256,6 +317,8 @@ func (dm *DagModifier) modifyDag(n ipld.Node, offset uint64) (cid.Cid, error) { return cid.Cid{}, err } + // For leaf ProtoNodes, we can only modify data within the existing size + // Expanding the data requires rebuilding the tree structure _, err = dm.wrBuf.Read(fsn.Data()[offset:]) if err != nil && err != io.EOF { return cid.Cid{}, err @@ -273,6 +336,10 @@ func (dm *DagModifier) modifyDag(n ipld.Node, offset uint64) (cid.Cid, error) { nd := new(mdag.ProtoNode) nd.SetData(b) nd.SetCidBuilder(nd0.CidBuilder()) + + // Check if using identity hash would exceed the size limit + dm.ensureSafeProtoNodeHash(nd) + err = dm.dagserv.Add(dm.ctx, nd) if err != nil { return cid.Cid{}, err @@ -312,7 +379,10 @@ func (dm *DagModifier) modifyDag(n ipld.Node, offset uint64) (cid.Cid, error) { copy(bytes[offsetPlusN:], origData[offsetPlusN:]) } - nd, err := mdag.NewRawNodeWPrefix(bytes, nd0.Cid().Prefix()) + // Check if using identity hash would exceed the size limit + prefix, _ := dm.safePrefixForSize(nd0.Cid().Prefix(), len(bytes)) + + nd, err := mdag.NewRawNodeWPrefix(bytes, prefix) if err != nil { return cid.Cid{}, err } @@ -366,14 +436,33 @@ func (dm *DagModifier) modifyDag(n ipld.Node, offset uint64) (cid.Cid, error) { cur += bs } + // Check if using identity hash would exceed the size limit for branch nodes + dm.ensureSafeProtoNodeHash(node) + err = dm.dagserv.Add(dm.ctx, node) return node.Cid(), err } -// appendData appends the blocks from the given chan to the end of this dag +// appendData appends blocks from the splitter to the end of this dag. +// +// For ProtoNodes: Uses trickle.Append directly to add new blocks to the +// existing UnixFS DAG structure. +// +// For RawNodes: Automatically converts to a UnixFS file structure when growth +// is needed. This conversion is necessary because RawNodes are pure data and +// cannot have child nodes. The conversion process: +// - Creates a ProtoNode with UnixFS file metadata +// - Adds the original RawNode as the first leaf block +// - Continues appending new data as raw leaves (if RawLeaves is set) +// +// This conversion is transparent to users but changes the node type from +// RawNode to ProtoNode. The original data remains accessible through the +// direct CID of the original raw block, and the new dag-pb CID makes +// post-append data accessible through the standard UnixFS APIs. func (dm *DagModifier) appendData(nd ipld.Node, spl chunker.Splitter) (ipld.Node, error) { switch nd := nd.(type) { - case *mdag.ProtoNode, *mdag.RawNode: + case *mdag.ProtoNode: + // ProtoNode can be directly passed to trickle.Append dbp := &help.DagBuilderParams{ Dagserv: dm.dagserv, Maxlinks: dm.MaxLinks, @@ -385,6 +474,45 @@ func (dm *DagModifier) appendData(nd ipld.Node, spl chunker.Splitter) (ipld.Node return nil, err } return trickle.Append(dm.ctx, nd, db) + + case *mdag.RawNode: + // RawNode needs to be converted to UnixFS structure for growth + // because RawNodes are just raw data and cannot have child nodes. + // We create a UnixFS file structure with the original RawNode as the first leaf. + fileNode := ft.NewFSNode(ft.TFile) + + // Add the original raw data size as the first block size + fileNode.AddBlockSize(uint64(len(nd.RawData()))) + + // Serialize the UnixFS node + fileNodeBytes, err := fileNode.GetBytes() + if err != nil { + return nil, fmt.Errorf("failed to serialize UnixFS metadata for RawNode conversion: %w", err) + } + + // Create a ProtoNode with the UnixFS metadata + protoNode := mdag.NodeWithData(fileNodeBytes) + protoNode.SetCidBuilder(dm.Prefix) + + // Add the RawNode as the first leaf + err = protoNode.AddNodeLink("", nd) + if err != nil { + return nil, fmt.Errorf("failed to add RawNode as leaf during conversion: %w", err) + } + + // Now we can append new data using trickle + dbp := &help.DagBuilderParams{ + Dagserv: dm.dagserv, + Maxlinks: dm.MaxLinks, + CidBuilder: dm.Prefix, + RawLeaves: true, // Ensure future leaves are raw for consistency + } + db, err := dbp.New(spl) + if err != nil { + return nil, fmt.Errorf("failed to create DAG builder for RawNode growth: %w", err) + } + return trickle.Append(dm.ctx, protoNode, db) + default: return nil, ErrNotUnixfs } @@ -560,7 +688,9 @@ func (dm *DagModifier) dagTruncate(ctx context.Context, n ipld.Node, size uint64 return mdag.NodeWithData(data), nil case *mdag.RawNode: - return mdag.NewRawNodeWPrefix(nd.RawData()[:size], nd.Cid().Prefix()) + data := nd.RawData()[:size] + prefix, _ := dm.safePrefixForSize(nd.Cid().Prefix(), len(data)) + return mdag.NewRawNodeWPrefix(data, prefix) } } diff --git a/ipld/unixfs/mod/dagmodifier_test.go b/ipld/unixfs/mod/dagmodifier_test.go index ab69374ef..8011f1e16 100644 --- a/ipld/unixfs/mod/dagmodifier_test.go +++ b/ipld/unixfs/mod/dagmodifier_test.go @@ -1,6 +1,7 @@ package mod import ( + "bytes" "context" "fmt" "io" @@ -11,7 +12,11 @@ import ( trickle "github.com/ipfs/boxo/ipld/unixfs/importer/trickle" uio "github.com/ipfs/boxo/ipld/unixfs/io" testu "github.com/ipfs/boxo/ipld/unixfs/test" + "github.com/ipfs/boxo/util" + "github.com/ipfs/boxo/verifcid" + "github.com/ipfs/go-cid" "github.com/ipfs/go-test/random" + mh "github.com/multiformats/go-multihash" ) func testModWrite(t *testing.T, beg, size uint64, orig []byte, dm *DagModifier, opts testu.NodeOpts) []byte { @@ -867,3 +872,664 @@ func getOffset(reader uio.DagReader) int64 { } return offset } + +// makeTestDataPattern generates test data with a repeating pattern +func makeTestDataPattern(size int, pattern string) []byte { + data := make([]byte, size) + patternBytes := []byte(pattern) + for i := range data { + data[i] = patternBytes[i%len(patternBytes)] + } + return data +} + +// makeIdentityNode creates a ProtoNode with identity CID for testing +func makeIdentityNode(data []byte) *dag.ProtoNode { + node := dag.NodeWithData(unixfs.FilePBData(data, uint64(len(data)))) + node.SetCidBuilder(cid.Prefix{ + Version: 1, + Codec: cid.DagProtobuf, + MhType: mh.IDENTITY, + MhLength: -1, + }) + return node +} + +// makeRawNodeWithIdentity creates a RawNode with identity CID for testing +func makeRawNodeWithIdentity(data []byte) (*dag.RawNode, error) { + return dag.NewRawNodeWPrefix(data, cid.Prefix{ + Version: 1, + Codec: cid.Raw, + MhType: mh.IDENTITY, + MhLength: -1, + }) +} + +func TestIdentityCIDHandling(t *testing.T) { + ctx := context.Background() + dserv := testu.GetDAGServ() + + // Test that DagModifier prevents creating overlarge identity CIDs + // This addresses https://github.com/ipfs/kubo/issues/6011 + t.Run("prevents identity CID overflow on modification", func(t *testing.T) { + // Create a UnixFS file node with identity hash near the size limit + // UnixFS overhead is approximately 8-10 bytes, so we use data that will + // encode to just under verifcid.MaxDigestSize when combined with metadata + initialData := makeTestDataPattern(verifcid.MaxDigestSize-10, "abcdefghijklmnopqrstuvwxyz") + + // Create a UnixFS file node with identity CID + node := makeIdentityNode(initialData) + + // Verify the encoded size is under limit + encoded, _ := node.EncodeProtobuf(false) + if len(encoded) > verifcid.MaxDigestSize { + t.Skipf("Initial node too large for identity: %d bytes", len(encoded)) + } + + // Store the node + err := dserv.Add(ctx, node) + if err != nil { + t.Fatal(err) + } + + // Create DagModifier + dmod, err := NewDagModifier(ctx, node, dserv, testu.SizeSplitterGen(512)) + if err != nil { + t.Fatal(err) + } + + // Configure fallback hash (simulating what MFS does) + dmod.Prefix.MhType = util.DefaultIpfsHash + dmod.Prefix.MhLength = -1 + + // Modify first few bytes - this won't change the size but validates + // that our check works for any modification + modData := []byte("MODIFIED") + n, err := dmod.WriteAt(modData, 0) + if err != nil { + t.Fatal(err) + } + if n != len(modData) { + t.Fatalf("Expected to write %d bytes, wrote %d", len(modData), n) + } + + // Sync the changes + err = dmod.Sync() + if err != nil { + t.Fatal(err) + } + + // Get result + resultNode, err := dmod.GetNode() + if err != nil { + t.Fatal(err) + } + + // The key verification: if it's a leaf node and would exceed verifcid.MaxDigestSize, + // it must not use identity hash + if len(resultNode.Links()) == 0 { + if protoNode, ok := resultNode.(*dag.ProtoNode); ok { + encodedData, _ := protoNode.EncodeProtobuf(false) + if len(encodedData) > verifcid.MaxDigestSize && resultNode.Cid().Prefix().MhType == mh.IDENTITY { + t.Errorf("Leaf node with %d bytes must not use identity hash", len(encodedData)) + } + } + } + }) + + t.Run("small identity CID remains identity", func(t *testing.T) { + // Create a small UnixFS file node with identity hash + // Keep it well under verifcid.MaxDigestSize (the identity hash limit) + smallData := []byte("hello world") + + // Create a UnixFS file node with identity CID + smallNode := makeIdentityNode(smallData) + + // Store the node + err := dserv.Add(ctx, smallNode) + if err != nil { + t.Fatal(err) + } + + // Create DagModifier with default splitter + dmod, err := NewDagModifier(ctx, smallNode, dserv, testu.SizeSplitterGen(512)) + if err != nil { + t.Fatal(err) + } + + // Write small amount of data (total still under verifcid.MaxDigestSize) + additionalData := []byte(" and more") + n, err := dmod.WriteAt(additionalData, int64(len(smallData))) + if err != nil { + t.Fatalf("WriteAt failed: %v", err) + } + if n != len(additionalData) { + t.Fatalf("expected to write %d bytes, wrote %d", len(additionalData), n) + } + + // Get the resulting node + resultNode, err := dmod.GetNode() + if err != nil { + t.Fatal(err) + } + + // Verify it still uses identity hash + resultCID := resultNode.Cid() + if resultCID.Prefix().MhType != mh.IDENTITY { + t.Fatalf("expected identity hash for small data, got %v", resultCID.Prefix().MhType) + } + }) + + t.Run("preserves all prefix fields when switching from identity", func(t *testing.T) { + // Create initial data that fits well within identity CID limit + // This needs to be large enough to trigger modifyDag but small enough + // to fit in identity with UnixFS overhead + initialData := make([]byte, 100) + for i := range initialData { + initialData[i] = byte('a' + i%26) + } + + // Create identity CID ProtoNode with UnixFS data + initialNode := dag.NodeWithData(unixfs.FilePBData(initialData, uint64(len(initialData)))) + initialNode.SetCidBuilder(cid.Prefix{ + Version: 1, + Codec: cid.DagProtobuf, + MhType: mh.IDENTITY, + MhLength: -1, + }) + + // Verify initial data fits in identity + encodedInitial, _ := initialNode.EncodeProtobuf(false) + if len(encodedInitial) > verifcid.MaxDigestSize { + t.Skipf("Initial data too large for identity: %d bytes", len(encodedInitial)) + } + + err := dserv.Add(ctx, initialNode) + if err != nil { + t.Fatal(err) + } + + // Create DagModifier + dmod, err := NewDagModifier(ctx, initialNode, dserv, testu.SizeSplitterGen(512)) + if err != nil { + t.Fatal(err) + } + + // Configure a custom prefix with specific values + dmod.Prefix = cid.Prefix{ + Version: 1, + Codec: cid.DagProtobuf, + MhType: mh.SHA2_512, + MhLength: 32, // Truncated SHA-512 + } + + // Write within the existing data bounds to trigger modifyDag + // Use enough data that would cause identity overflow + modData := make([]byte, 50) + for i := range modData { + modData[i] = 'X' + } + + // Write at offset 0, replacing first 50 bytes + n, err := dmod.WriteAt(modData, 0) + if err != nil { + t.Fatal(err) + } + if n != len(modData) { + t.Fatalf("expected to write %d bytes, wrote %d", len(modData), n) + } + + err = dmod.Sync() + if err != nil { + t.Fatal(err) + } + + // Get the modified node + modifiedNode, err := dmod.GetNode() + if err != nil { + t.Fatal(err) + } + + // The node should still be using identity since the data is small + // But let's test that prefix fields would be preserved if it switched + + // For this test, let's verify that our safePrefixForSize function + // correctly preserves fields when it would switch + testPrefix, changed := dmod.safePrefixForSize(initialNode.Cid().Prefix(), verifcid.MaxDigestSize+10) + + if !changed { + t.Errorf("safePrefixForSize: expected prefix to change for oversized identity") + } + if testPrefix.Version != 1 { + t.Errorf("safePrefixForSize: expected Version 1, got %d", testPrefix.Version) + } + if testPrefix.Codec != cid.DagProtobuf { + t.Errorf("safePrefixForSize: expected Codec DagProtobuf, got %d", testPrefix.Codec) + } + if testPrefix.MhType != mh.SHA2_512 { + t.Errorf("safePrefixForSize: expected MhType SHA2_512 (%d), got %d", mh.SHA2_512, testPrefix.MhType) + } + if testPrefix.MhLength != 32 { + t.Errorf("safePrefixForSize: expected MhLength 32, got %d", testPrefix.MhLength) + } + + // Also verify the actual node behavior (it should still be identity for this small data) + if modifiedNode.Cid().Prefix().MhType != mh.IDENTITY { + t.Logf("Note: Node switched from identity even though data is small") + } + }) + + t.Run("raw node preserves codec", func(t *testing.T) { + // Create a RawNode with identity CID that's near the limit + // This allows us to test overflow with small modifications + initialData := make([]byte, verifcid.MaxDigestSize-10) + for i := range initialData { + initialData[i] = byte('a' + i%26) + } + + rawNode, err := dag.NewRawNodeWPrefix(initialData, cid.Prefix{ + Version: 1, + Codec: cid.Raw, + MhType: mh.IDENTITY, + MhLength: -1, + }) + if err != nil { + t.Fatal(err) + } + + // Verify it's using Raw codec and identity hash + if rawNode.Cid().Prefix().Codec != cid.Raw { + t.Fatalf("expected Raw codec, got %d", rawNode.Cid().Prefix().Codec) + } + if rawNode.Cid().Prefix().MhType != mh.IDENTITY { + t.Fatalf("expected identity hash, got %d", rawNode.Cid().Prefix().MhType) + } + + err = dserv.Add(ctx, rawNode) + if err != nil { + t.Fatal(err) + } + + // Create DagModifier + dmod, err := NewDagModifier(ctx, rawNode, dserv, testu.SizeSplitterGen(512)) + if err != nil { + t.Fatal(err) + } + + // Verify DagModifier preserves the Raw codec (not forcing to DagProtobuf) + if dmod.Prefix.Codec != cid.Raw { + t.Errorf("BUG: DagModifier changed codec from Raw (%d) to %d", + cid.Raw, dmod.Prefix.Codec) + } + + // Configure fallback hash for when identity overflows + dmod.Prefix.MhType = util.DefaultIpfsHash + dmod.Prefix.MhLength = -1 + + // Modify a few bytes - the node should stay the same size + // but switch from identity to the configured hash + modData := []byte("MODIFIED") + _, err = dmod.WriteAt(modData, 0) + if err != nil { + t.Fatal(err) + } + + err = dmod.Sync() + if err != nil { + t.Fatal(err) + } + + modifiedNode, err := dmod.GetNode() + if err != nil { + t.Fatal(err) + } + + // The node should switch from identity to SHA256 but keep Raw codec + if modifiedNode.Cid().Prefix().MhType == mh.IDENTITY { + // It might still be identity if total size is under limit + t.Logf("Note: Node still uses identity hash") + } + + // CRITICAL: Must preserve Raw codec + if modifiedNode.Cid().Prefix().Codec != cid.Raw { + t.Errorf("BUG: Raw node changed codec! Expected Raw (%d), got %d", + cid.Raw, modifiedNode.Cid().Prefix().Codec) + } + }) + + t.Run("raw node identity overflow via truncate", func(t *testing.T) { + // Test that Truncate also handles identity overflow correctly for RawNodes + // Start with a RawNode using identity that's at max size + maxData := make([]byte, verifcid.MaxDigestSize) + for i := range maxData { + maxData[i] = byte('A' + i%26) + } + + rawNode, err := dag.NewRawNodeWPrefix(maxData, cid.Prefix{ + Version: 1, + Codec: cid.Raw, + MhType: mh.IDENTITY, + MhLength: -1, + }) + if err != nil { + t.Fatal(err) + } + + // Verify initial state + if rawNode.Cid().Prefix().Codec != cid.Raw { + t.Fatalf("expected Raw codec, got %d", rawNode.Cid().Prefix().Codec) + } + if rawNode.Cid().Prefix().MhType != mh.IDENTITY { + t.Fatalf("expected identity hash, got %d", rawNode.Cid().Prefix().MhType) + } + + err = dserv.Add(ctx, rawNode) + if err != nil { + t.Fatal(err) + } + + // Create DagModifier with non-identity fallback + dmod, err := NewDagModifier(ctx, rawNode, dserv, testu.SizeSplitterGen(512)) + if err != nil { + t.Fatal(err) + } + + // Even though we're truncating to smaller, the original uses identity + // and we should preserve that if it still fits + err = dmod.Truncate(100) + if err != nil { + t.Fatal(err) + } + + truncatedNode, err := dmod.GetNode() + if err != nil { + t.Fatal(err) + } + + // Should still use identity (100 bytes < verifcid.MaxDigestSize) + if truncatedNode.Cid().Prefix().MhType != mh.IDENTITY { + t.Errorf("Expected identity hash for 100 bytes, got %d", + truncatedNode.Cid().Prefix().MhType) + } + + // Must preserve Raw codec + if truncatedNode.Cid().Prefix().Codec != cid.Raw { + t.Errorf("Truncate changed codec! Expected Raw (%d), got %d", + cid.Raw, truncatedNode.Cid().Prefix().Codec) + } + + // Verify data was actually truncated + if rawTrunc, ok := truncatedNode.(*dag.RawNode); ok { + if len(rawTrunc.RawData()) != 100 { + t.Errorf("Expected 100 bytes after truncate, got %d", len(rawTrunc.RawData())) + } + if !bytes.Equal(rawTrunc.RawData(), maxData[:100]) { + t.Error("Truncated data doesn't match expected") + } + } else { + t.Error("Truncated node is not a RawNode") + } + }) + + t.Run("raw node switches from identity when modified to exceed limit", func(t *testing.T) { + // Create a RawNode with identity CID near the limit + // Use size that allows modification without triggering append + initialData := make([]byte, verifcid.MaxDigestSize-1) // 127 bytes + for i := range initialData { + initialData[i] = byte('a') + } + + rawNode, err := dag.NewRawNodeWPrefix(initialData, cid.Prefix{ + Version: 1, + Codec: cid.Raw, + MhType: mh.IDENTITY, + MhLength: -1, + }) + if err != nil { + t.Fatal(err) + } + + // Verify it starts with identity (127 bytes < verifcid.MaxDigestSize) + if rawNode.Cid().Prefix().MhType != mh.IDENTITY { + t.Fatalf("Expected identity hash for 127 bytes, got %d", rawNode.Cid().Prefix().MhType) + } + + err = dserv.Add(ctx, rawNode) + if err != nil { + t.Fatal(err) + } + + // Create DagModifier + dmod, err := NewDagModifier(ctx, rawNode, dserv, testu.SizeSplitterGen(512)) + if err != nil { + t.Fatal(err) + } + + // Configure fallback for identity overflow + dmod.Prefix.MhType = util.DefaultIpfsHash + dmod.Prefix.MhLength = -1 + + // Replace last byte with 2 bytes, pushing size to verifcid.MaxDigestSize + // This modifies within bounds but increases total size just enough + twoBytes := []byte("XX") + _, err = dmod.WriteAt(twoBytes, int64(len(initialData)-1)) + if err != nil { + t.Fatal(err) + } + + err = dmod.Sync() + if err != nil { + t.Fatal(err) + } + + modifiedNode, err := dmod.GetNode() + if err != nil { + t.Fatal(err) + } + + // When extending a RawNode past its end, it gets converted to UnixFS structure + // because RawNodes can't have child nodes - they're just raw data + if protoNode, ok := modifiedNode.(*dag.ProtoNode); ok { + // This is expected - RawNode was converted to UnixFS for growth + fsn, err := unixfs.FSNodeFromBytes(protoNode.Data()) + if err != nil { + t.Fatalf("Failed to parse UnixFS metadata: %v", err) + } + + expectedSize := uint64(verifcid.MaxDigestSize) // 127 original + 1 byte extension + if fsn.FileSize() != expectedSize { + t.Errorf("Expected file size %d, got %d", expectedSize, fsn.FileSize()) + } + + // Should have switched from identity hash + if modifiedNode.Cid().Prefix().MhType == mh.IDENTITY { + t.Error("Large node should not use identity hash") + } + } else if rawMod, ok := modifiedNode.(*dag.RawNode); ok { + // If it's still a RawNode, it means the write didn't extend past the end + actualSize := len(rawMod.RawData()) + t.Logf("Modified node size: %d bytes", actualSize) + + // If size > verifcid.MaxDigestSize, must not use identity + if actualSize > verifcid.MaxDigestSize { + if modifiedNode.Cid().Prefix().MhType == mh.IDENTITY { + t.Errorf("Node with %d bytes still uses identity (max %d)", + actualSize, verifcid.MaxDigestSize) + } + } + + // Must preserve Raw codec + if modifiedNode.Cid().Prefix().Codec != cid.Raw { + t.Errorf("BUG: Raw node changed codec! Expected Raw (%d), got %d", + cid.Raw, modifiedNode.Cid().Prefix().Codec) + } + } else { + t.Error("Node is neither RawNode nor ProtoNode") + } + }) +} + +func TestRawNodeGrowthConversion(t *testing.T) { + ctx := context.Background() + dserv := testu.GetDAGServ() + + t.Run("raw node converts to UnixFS when growing beyond single block", func(t *testing.T) { + // Create a small RawNode + initialData := []byte("small raw data") + rawNode, err := dag.NewRawNodeWPrefix(initialData, cid.Prefix{ + Version: 1, + Codec: cid.Raw, + MhType: mh.SHA2_256, + MhLength: -1, + }) + if err != nil { + t.Fatal(err) + } + + err = dserv.Add(ctx, rawNode) + if err != nil { + t.Fatal(err) + } + + // Create DagModifier with small chunk size to force multiple blocks + dmod, err := NewDagModifier(ctx, rawNode, dserv, testu.SizeSplitterGen(32)) + if err != nil { + t.Fatal(err) + } + + // Seek to end to append (not overwrite) + _, err = dmod.Seek(0, io.SeekEnd) + if err != nil { + t.Fatal(err) + } + + // Append data that will create multiple blocks + largeData := make([]byte, 100) + for i := range largeData { + largeData[i] = byte('A' + i%26) + } + + _, err = dmod.Write(largeData) + if err != nil { + t.Fatal(err) + } + + // Get the resulting node + resultNode, err := dmod.GetNode() + if err != nil { + t.Fatal(err) + } + + // Should now be a ProtoNode with UnixFS metadata + protoNode, ok := resultNode.(*dag.ProtoNode) + if !ok { + t.Fatal("Expected ProtoNode after growth, got RawNode") + } + + // Verify it has UnixFS metadata + fsn, err := unixfs.FSNodeFromBytes(protoNode.Data()) + if err != nil { + t.Fatalf("Failed to parse UnixFS metadata: %v", err) + } + + if fsn.Type() != unixfs.TFile { + t.Errorf("Expected file type, got %v", fsn.Type()) + } + + // Should have multiple links (blocks) + if len(protoNode.Links()) < 2 { + t.Errorf("Expected multiple blocks, got %d", len(protoNode.Links())) + } + + // First link should be the original RawNode + if len(protoNode.Links()) > 0 { + firstChild, err := protoNode.Links()[0].GetNode(ctx, dserv) + if err != nil { + t.Fatal(err) + } + + // First block should contain our original data + if rawChild, ok := firstChild.(*dag.RawNode); ok { + if !bytes.HasPrefix(rawChild.RawData(), initialData) { + t.Error("First block doesn't contain original data") + } + } + } + + // Verify the total size is correct + totalSize := uint64(len(initialData) + len(largeData)) + if fsn.FileSize() != totalSize { + t.Errorf("Expected file size %d, got %d", totalSize, fsn.FileSize()) + } + }) + + t.Run("identity raw node converts and switches hash when growing", func(t *testing.T) { + // Create a RawNode with identity hash + smallData := []byte("tiny") + rawNode, err := dag.NewRawNodeWPrefix(smallData, cid.Prefix{ + Version: 1, + Codec: cid.Raw, + MhType: mh.IDENTITY, + MhLength: -1, + }) + if err != nil { + t.Fatal(err) + } + + err = dserv.Add(ctx, rawNode) + if err != nil { + t.Fatal(err) + } + + // Create DagModifier with small chunk size + dmod, err := NewDagModifier(ctx, rawNode, dserv, testu.SizeSplitterGen(32)) + if err != nil { + t.Fatal(err) + } + + // Configure fallback for identity overflow + dmod.Prefix = cid.Prefix{ + Version: 1, + Codec: cid.DagProtobuf, + MhType: mh.SHA2_256, + MhLength: -1, + } + + // Seek to end to append (not overwrite) + _, err = dmod.Seek(0, io.SeekEnd) + if err != nil { + t.Fatal(err) + } + + // Append enough data to force multi-block structure + appendData := make([]byte, 200) + for i := range appendData { + appendData[i] = byte('X') + } + + _, err = dmod.Write(appendData) + if err != nil { + t.Fatal(err) + } + + resultNode, err := dmod.GetNode() + if err != nil { + t.Fatal(err) + } + + // Should be ProtoNode with non-identity hash + protoNode, ok := resultNode.(*dag.ProtoNode) + if !ok { + t.Fatal("Expected ProtoNode after growth") + } + + if protoNode.Cid().Prefix().MhType == mh.IDENTITY { + t.Error("Large multi-block node should not use identity hash") + } + + // Verify structure + if len(protoNode.Links()) < 2 { + t.Errorf("Expected multiple blocks, got %d", len(protoNode.Links())) + } + }) +} diff --git a/mfs/file.go b/mfs/file.go index 3789e0daf..f31ec47ee 100644 --- a/mfs/file.go +++ b/mfs/file.go @@ -11,8 +11,10 @@ import ( dag "github.com/ipfs/boxo/ipld/merkledag" ft "github.com/ipfs/boxo/ipld/unixfs" mod "github.com/ipfs/boxo/ipld/unixfs/mod" + "github.com/ipfs/boxo/util" ipld "github.com/ipfs/go-ipld-format" "github.com/libp2p/go-libp2p/core/routing" + mh "github.com/multiformats/go-multihash" ) // File represents a file in the MFS, its logic its mainly targeted @@ -110,6 +112,29 @@ func (fi *File) Open(flags Flags) (_ FileDescriptor, _retErr error) { } dmod.RawLeaves = fi.RawLeaves + // If the node uses identity hash, configure DagModifier with a fallback + // to avoid issues when the data grows beyond the identity hash size limit + if dmod.Prefix.MhType == mh.IDENTITY { + // Try to inherit full prefix from parent directory + if fi.parent != nil { + if dir, ok := fi.parent.(*Directory); ok { + if parentNode, err := dir.GetNode(); err == nil { + parentPrefix := parentNode.Cid().Prefix() + if parentPrefix.MhType != mh.IDENTITY { + // Use parent's full prefix (all fields) + dmod.Prefix = parentPrefix + } + } + } + } + + // If still identity (no suitable parent), set up fallback + if dmod.Prefix.MhType == mh.IDENTITY { + dmod.Prefix.MhType = util.DefaultIpfsHash + dmod.Prefix.MhLength = -1 // Use default length for the hash function + } + } + return &fileDescriptor{ inode: fi, flags: flags, From 955e7ef0e31080f636fb38a8c15d1faa35deec3c Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Sat, 30 Aug 2025 02:21:45 +0200 Subject: [PATCH 4/9] feat(verifcid): add MaxIdentityDigestSize constant - introduce MaxIdentityDigestSize for identity-specific size limits - update all identity CID checks to use new constant - improve safePrefixForSize logic to only check size for identity hashes This provides clearer semantic distinction between general digest limits and identity-specific limits, making the code more maintainable and future-proof. --- CHANGELOG.md | 14 +++++------ blockservice/blockservice_test.go | 8 +++--- gateway/gateway_test.go | 8 +++--- ipld/unixfs/mod/dagmodifier.go | 13 +++++++--- ipld/unixfs/mod/dagmodifier_test.go | 38 ++++++++++++++--------------- verifcid/cid.go | 24 +++++++++++------- verifcid/cid_test.go | 4 +-- 7 files changed, 61 insertions(+), 48 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5583b120d..0a5c4388d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,10 +18,10 @@ The following emojis are used to highlight certain changes: ### Changed -- `verifcid`: Made digest size constants public: `MinDigestSize` (20 bytes) and `MaxDigestSize` (128 bytes) -- `verifcid`: 🛠 Renamed errors for clarity: - - Added `ErrDigestTooSmall` and `ErrDigestTooLarge` as the new primary errors - - Added `ErrIdentityDigestTooLarge` for identity-specific size violations +- `verifcid`: 🛠 Improved digest size limit handling ([#1018](https://github.com/ipfs/boxo/pull/1018)) + - Made digest size constants public: `MinDigestSize` (20 bytes) and `MaxDigestSize` (128 bytes) + - Added `MaxIdentityDigestSize` (128 bytes) constant specifically for identity CID size limits + - Renamed errors for clarity: Added `ErrDigestTooSmall`, `ErrDigestTooLarge`, and `ErrIdentityDigestTooLarge` as the new primary errors - `ErrBelowMinimumHashLength` and `ErrAboveMaximumHashLength` remain as deprecated aliases pointing to the new errors ### Removed @@ -30,15 +30,15 @@ The following emojis are used to highlight certain changes: - `ipld/unixfs/mod`: - `DagModifier` now correctly preserves raw node codec when modifying data under the chunker threshold, instead of incorrectly forcing everything to dag-pb - - `DagModifier` prevents creation of identity CIDs exceeding `verifcid.MaxDigestSize` limit when modifying data, automatically switching to proper cryptographic hash while preserving small identity CIDs + - `DagModifier` prevents creation of identity CIDs exceeding `verifcid.MaxIdentityDigestSize` limit when modifying data, automatically switching to proper cryptographic hash while preserving small identity CIDs - `DagModifier` now supports appending data to a `RawNode` by automatically converting it into a UnixFS file structure where the original `RawNode` becomes the first leaf block, fixing previously impossible append operations that would fail with "expected protobuf dag node" errors - `mfs`: Files with identity CIDs now properly inherit full CID prefix from parent directories (version, codec, hash type, length), not just hash type ### Security - `verifcid`: Now enforces maximum size limit of 128 bytes for identity CIDs to prevent abuse. - - 🛠 Attempts to read CIDs with identity multihash digests longer than `MaxDigestSize` will now produce `ErrIdentityDigestTooLarge` error. - - Identity CIDs can inline data directly, and without a size limit, they could embed arbitrary amounts of data. Limiting the size also protects gateways from poorly written clients that might send absurdly big data to the gateway encoded as identity CIDs only to retrieve it back. Note that identity CIDs do not provide integrity verification, making them vulnerable to bit flips. They should only be used in controlled contexts like raw leaves of a larger DAG. The limit matches the already existing `MaxDigestSize` that was enforced for regular cryptographic functions. + - 🛠 Attempts to read CIDs with identity multihash digests longer than `MaxIdentityDigestSize` will now produce `ErrIdentityDigestTooLarge` error. + - Identity CIDs can inline data directly, and without a size limit, they could embed arbitrary amounts of data. Limiting the size also protects gateways from poorly written clients that might send absurdly big data to the gateway encoded as identity CIDs only to retrieve it back. Note that identity CIDs do not provide integrity verification, making them vulnerable to bit flips. They should only be used in controlled contexts like raw leaves of a larger DAG. The limit is explicitly defined as `MaxIdentityDigestSize` (128 bytes). ## [v0.34.0] diff --git a/blockservice/blockservice_test.go b/blockservice/blockservice_test.go index c12d59d02..a855e47e1 100644 --- a/blockservice/blockservice_test.go +++ b/blockservice/blockservice_test.go @@ -295,14 +295,14 @@ func TestIdentityHashSizeLimit(t *testing.T) { bs := blockstore.NewBlockstore(dssync.MutexWrap(ds.NewMapDatastore())) blockservice := New(bs, nil) - // Create identity CID at the MaxDigestSize limit (should be valid) - validData := bytes.Repeat([]byte("a"), verifcid.MaxDigestSize) + // Create identity CID at the MaxIdentityDigestSize limit (should be valid) + validData := bytes.Repeat([]byte("a"), verifcid.MaxIdentityDigestSize) validHash, err := multihash.Sum(validData, multihash.IDENTITY, -1) a.NoError(err) validCID := cid.NewCidV1(cid.Raw, validHash) - // Create identity CID over the MaxDigestSize limit (should be rejected) - invalidData := bytes.Repeat([]byte("b"), verifcid.MaxDigestSize+1) + // Create identity CID over the MaxIdentityDigestSize limit (should be rejected) + invalidData := bytes.Repeat([]byte("b"), verifcid.MaxIdentityDigestSize+1) invalidHash, err := multihash.Sum(invalidData, multihash.IDENTITY, -1) a.NoError(err) invalidCID := cid.NewCidV1(cid.Raw, invalidHash) diff --git a/gateway/gateway_test.go b/gateway/gateway_test.go index f09638c9e..6288e8372 100644 --- a/gateway/gateway_test.go +++ b/gateway/gateway_test.go @@ -55,14 +55,14 @@ func TestGatewayGet(t *testing.T) { backend.namesys["/ipns/example.man"] = newMockNamesysItem(k, 0) // Create identity CIDs for testing - // MaxDigestSize bytes (at the limit, should be valid) - validIdentityData := bytes.Repeat([]byte("a"), verifcid.MaxDigestSize) + // MaxIdentityDigestSize bytes (at the limit, should be valid) + validIdentityData := bytes.Repeat([]byte("a"), verifcid.MaxIdentityDigestSize) validIdentityHash, err := mh.Sum(validIdentityData, mh.IDENTITY, -1) require.NoError(t, err) validIdentityCID := cid.NewCidV1(cid.Raw, validIdentityHash) - // MaxDigestSize+1 bytes (over the limit, should be rejected) - invalidIdentityData := bytes.Repeat([]byte("b"), verifcid.MaxDigestSize+1) + // MaxIdentityDigestSize+1 bytes (over the limit, should be rejected) + invalidIdentityData := bytes.Repeat([]byte("b"), verifcid.MaxIdentityDigestSize+1) invalidIdentityHash, err := mh.Sum(invalidIdentityData, mh.IDENTITY, -1) require.NoError(t, err) invalidIdentityCID := cid.NewCidV1(cid.Raw, invalidIdentityHash) diff --git a/ipld/unixfs/mod/dagmodifier.go b/ipld/unixfs/mod/dagmodifier.go index 1653921a8..4344b77e3 100644 --- a/ipld/unixfs/mod/dagmodifier.go +++ b/ipld/unixfs/mod/dagmodifier.go @@ -5,7 +5,7 @@ // // This package automatically handles identity CIDs (multihash code 0x00) which // inline data directly in the CID. When modifying nodes with identity CIDs, -// the package ensures the [verifcid.MaxDigestSize] limit is respected by +// the package ensures the [verifcid.MaxIdentityDigestSize] limit is respected by // automatically switching to a cryptographic hash function when the encoded // data would exceed this limit. The replacement hash function is chosen from // (in order): the configured Prefix if non-identity, or [util.DefaultIpfsHash] @@ -275,11 +275,18 @@ func (dm *DagModifier) Sync() error { // limits, returns the original prefix and false. Otherwise returns a prefix with // a proper cryptographic hash function and true to indicate the prefix changed. func (dm *DagModifier) safePrefixForSize(originalPrefix cid.Prefix, dataSize int) (cid.Prefix, bool) { - if originalPrefix.MhType != mh.IDENTITY || dataSize <= verifcid.MaxDigestSize { + // If not identity hash, no size check needed - return as is + if originalPrefix.MhType != mh.IDENTITY { return originalPrefix, false } - // Identity would overflow, use configured prefix + // For identity hash, check if data fits within the limit + if dataSize <= verifcid.MaxIdentityDigestSize { + return originalPrefix, false + } + + // Identity would overflow, need to switch to a different hash + // Use configured prefix if it's not identity if dm.Prefix.MhType != mh.IDENTITY { return dm.Prefix, true } diff --git a/ipld/unixfs/mod/dagmodifier_test.go b/ipld/unixfs/mod/dagmodifier_test.go index 8011f1e16..d190f936e 100644 --- a/ipld/unixfs/mod/dagmodifier_test.go +++ b/ipld/unixfs/mod/dagmodifier_test.go @@ -914,15 +914,15 @@ func TestIdentityCIDHandling(t *testing.T) { t.Run("prevents identity CID overflow on modification", func(t *testing.T) { // Create a UnixFS file node with identity hash near the size limit // UnixFS overhead is approximately 8-10 bytes, so we use data that will - // encode to just under verifcid.MaxDigestSize when combined with metadata - initialData := makeTestDataPattern(verifcid.MaxDigestSize-10, "abcdefghijklmnopqrstuvwxyz") + // encode to just under verifcid.MaxIdentityDigestSize when combined with metadata + initialData := makeTestDataPattern(verifcid.MaxIdentityDigestSize-10, "abcdefghijklmnopqrstuvwxyz") // Create a UnixFS file node with identity CID node := makeIdentityNode(initialData) // Verify the encoded size is under limit encoded, _ := node.EncodeProtobuf(false) - if len(encoded) > verifcid.MaxDigestSize { + if len(encoded) > verifcid.MaxIdentityDigestSize { t.Skipf("Initial node too large for identity: %d bytes", len(encoded)) } @@ -965,12 +965,12 @@ func TestIdentityCIDHandling(t *testing.T) { t.Fatal(err) } - // The key verification: if it's a leaf node and would exceed verifcid.MaxDigestSize, + // The key verification: if it's a leaf node and would exceed verifcid.MaxIdentityDigestSize, // it must not use identity hash if len(resultNode.Links()) == 0 { if protoNode, ok := resultNode.(*dag.ProtoNode); ok { encodedData, _ := protoNode.EncodeProtobuf(false) - if len(encodedData) > verifcid.MaxDigestSize && resultNode.Cid().Prefix().MhType == mh.IDENTITY { + if len(encodedData) > verifcid.MaxIdentityDigestSize && resultNode.Cid().Prefix().MhType == mh.IDENTITY { t.Errorf("Leaf node with %d bytes must not use identity hash", len(encodedData)) } } @@ -979,7 +979,7 @@ func TestIdentityCIDHandling(t *testing.T) { t.Run("small identity CID remains identity", func(t *testing.T) { // Create a small UnixFS file node with identity hash - // Keep it well under verifcid.MaxDigestSize (the identity hash limit) + // Keep it well under verifcid.MaxIdentityDigestSize (the identity hash limit) smallData := []byte("hello world") // Create a UnixFS file node with identity CID @@ -997,7 +997,7 @@ func TestIdentityCIDHandling(t *testing.T) { t.Fatal(err) } - // Write small amount of data (total still under verifcid.MaxDigestSize) + // Write small amount of data (total still under verifcid.MaxIdentityDigestSize) additionalData := []byte(" and more") n, err := dmod.WriteAt(additionalData, int64(len(smallData))) if err != nil { @@ -1040,7 +1040,7 @@ func TestIdentityCIDHandling(t *testing.T) { // Verify initial data fits in identity encodedInitial, _ := initialNode.EncodeProtobuf(false) - if len(encodedInitial) > verifcid.MaxDigestSize { + if len(encodedInitial) > verifcid.MaxIdentityDigestSize { t.Skipf("Initial data too large for identity: %d bytes", len(encodedInitial)) } @@ -1095,7 +1095,7 @@ func TestIdentityCIDHandling(t *testing.T) { // For this test, let's verify that our safePrefixForSize function // correctly preserves fields when it would switch - testPrefix, changed := dmod.safePrefixForSize(initialNode.Cid().Prefix(), verifcid.MaxDigestSize+10) + testPrefix, changed := dmod.safePrefixForSize(initialNode.Cid().Prefix(), verifcid.MaxIdentityDigestSize+10) if !changed { t.Errorf("safePrefixForSize: expected prefix to change for oversized identity") @@ -1122,7 +1122,7 @@ func TestIdentityCIDHandling(t *testing.T) { t.Run("raw node preserves codec", func(t *testing.T) { // Create a RawNode with identity CID that's near the limit // This allows us to test overflow with small modifications - initialData := make([]byte, verifcid.MaxDigestSize-10) + initialData := make([]byte, verifcid.MaxIdentityDigestSize-10) for i := range initialData { initialData[i] = byte('a' + i%26) } @@ -1200,7 +1200,7 @@ func TestIdentityCIDHandling(t *testing.T) { t.Run("raw node identity overflow via truncate", func(t *testing.T) { // Test that Truncate also handles identity overflow correctly for RawNodes // Start with a RawNode using identity that's at max size - maxData := make([]byte, verifcid.MaxDigestSize) + maxData := make([]byte, verifcid.MaxIdentityDigestSize) for i := range maxData { maxData[i] = byte('A' + i%26) } @@ -1246,7 +1246,7 @@ func TestIdentityCIDHandling(t *testing.T) { t.Fatal(err) } - // Should still use identity (100 bytes < verifcid.MaxDigestSize) + // Should still use identity (100 bytes < verifcid.MaxIdentityDigestSize) if truncatedNode.Cid().Prefix().MhType != mh.IDENTITY { t.Errorf("Expected identity hash for 100 bytes, got %d", truncatedNode.Cid().Prefix().MhType) @@ -1274,7 +1274,7 @@ func TestIdentityCIDHandling(t *testing.T) { t.Run("raw node switches from identity when modified to exceed limit", func(t *testing.T) { // Create a RawNode with identity CID near the limit // Use size that allows modification without triggering append - initialData := make([]byte, verifcid.MaxDigestSize-1) // 127 bytes + initialData := make([]byte, verifcid.MaxIdentityDigestSize-1) // 127 bytes for i := range initialData { initialData[i] = byte('a') } @@ -1289,7 +1289,7 @@ func TestIdentityCIDHandling(t *testing.T) { t.Fatal(err) } - // Verify it starts with identity (127 bytes < verifcid.MaxDigestSize) + // Verify it starts with identity (127 bytes < verifcid.MaxIdentityDigestSize) if rawNode.Cid().Prefix().MhType != mh.IDENTITY { t.Fatalf("Expected identity hash for 127 bytes, got %d", rawNode.Cid().Prefix().MhType) } @@ -1309,7 +1309,7 @@ func TestIdentityCIDHandling(t *testing.T) { dmod.Prefix.MhType = util.DefaultIpfsHash dmod.Prefix.MhLength = -1 - // Replace last byte with 2 bytes, pushing size to verifcid.MaxDigestSize + // Replace last byte with 2 bytes, pushing size to verifcid.MaxIdentityDigestSize // This modifies within bounds but increases total size just enough twoBytes := []byte("XX") _, err = dmod.WriteAt(twoBytes, int64(len(initialData)-1)) @@ -1336,7 +1336,7 @@ func TestIdentityCIDHandling(t *testing.T) { t.Fatalf("Failed to parse UnixFS metadata: %v", err) } - expectedSize := uint64(verifcid.MaxDigestSize) // 127 original + 1 byte extension + expectedSize := uint64(verifcid.MaxIdentityDigestSize) // 127 original + 1 byte extension if fsn.FileSize() != expectedSize { t.Errorf("Expected file size %d, got %d", expectedSize, fsn.FileSize()) } @@ -1350,11 +1350,11 @@ func TestIdentityCIDHandling(t *testing.T) { actualSize := len(rawMod.RawData()) t.Logf("Modified node size: %d bytes", actualSize) - // If size > verifcid.MaxDigestSize, must not use identity - if actualSize > verifcid.MaxDigestSize { + // If size > verifcid.MaxIdentityDigestSize, must not use identity + if actualSize > verifcid.MaxIdentityDigestSize { if modifiedNode.Cid().Prefix().MhType == mh.IDENTITY { t.Errorf("Node with %d bytes still uses identity (max %d)", - actualSize, verifcid.MaxDigestSize) + actualSize, verifcid.MaxIdentityDigestSize) } } diff --git a/verifcid/cid.go b/verifcid/cid.go index 6e7effc56..ba97c67e2 100644 --- a/verifcid/cid.go +++ b/verifcid/cid.go @@ -11,15 +11,18 @@ import ( const ( // MinDigestSize is the minimum size for hash digests (except for identity hashes) MinDigestSize = 20 - // MaxDigestSize is the maximum size for all digest types (including identity) + // MaxDigestSize is the maximum size for cryptographic hash digests MaxDigestSize = 128 + // MaxIdentityDigestSize is the maximum size for identity CID digests + // Identity CIDs embed data directly, and this limit prevents abuse + MaxIdentityDigestSize = 128 ) var ( ErrPossiblyInsecureHashFunction = errors.New("potentially insecure hash functions not allowed") ErrDigestTooSmall = fmt.Errorf("digest too small: must be at least %d bytes", MinDigestSize) ErrDigestTooLarge = fmt.Errorf("digest too large: must be at most %d bytes", MaxDigestSize) - ErrIdentityDigestTooLarge = fmt.Errorf("identity digest too large: must be at most %d bytes", MaxDigestSize) + ErrIdentityDigestTooLarge = fmt.Errorf("identity digest too large: must be at most %d bytes", MaxIdentityDigestSize) // Deprecated: Use ErrDigestTooSmall instead ErrBelowMinimumHashLength = ErrDigestTooSmall @@ -34,15 +37,18 @@ func ValidateCid(allowlist Allowlist, c cid.Cid) error { return ErrPossiblyInsecureHashFunction } - if pref.MhType != mh.IDENTITY && pref.MhLength < MinDigestSize { - return ErrDigestTooSmall - } - - if pref.MhLength > MaxDigestSize { - if pref.MhType == mh.IDENTITY { + switch pref.MhType { + case mh.IDENTITY: + if pref.MhLength > MaxIdentityDigestSize { return ErrIdentityDigestTooLarge } - return ErrDigestTooLarge + default: + if pref.MhLength < MinDigestSize { + return ErrDigestTooSmall + } + if pref.MhLength > MaxDigestSize { + return ErrDigestTooLarge + } } return nil diff --git a/verifcid/cid_test.go b/verifcid/cid_test.go index df061fd7d..cb2301268 100644 --- a/verifcid/cid_test.go +++ b/verifcid/cid_test.go @@ -19,13 +19,13 @@ func TestValidateCid(t *testing.T) { }{ { name: "identity at max size", - data: bytes.Repeat([]byte("a"), MaxDigestSize), + data: bytes.Repeat([]byte("a"), MaxIdentityDigestSize), mhType: mh.IDENTITY, wantErr: nil, }, { name: "identity over max size", - data: bytes.Repeat([]byte("b"), MaxDigestSize+1), + data: bytes.Repeat([]byte("b"), MaxIdentityDigestSize+1), mhType: mh.IDENTITY, wantErr: ErrIdentityDigestTooLarge, }, From 9b014be0bccbb785aed6a6671305b214d54bca11 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Sat, 30 Aug 2025 03:44:43 +0200 Subject: [PATCH 5/9] fix(verifcid): use sentinel errors with detailed helpers addresses pr feedback by converting formatted errors to sentinel errors and adding private helper functions that provide detailed error context - sentinel errors are immutable and can be checked with errors.Is() - helper functions add size details when validation fails - maintains backward compatibility with error checking - update gateway test to match new error format Signed-off-by: Marcin Rataj --- gateway/gateway_test.go | 6 +-- ipld/unixfs/mod/dagmodifier_test.go | 10 ----- verifcid/allowlist_test.go | 63 +++++++++++++++++++++-------- verifcid/cid.go | 24 ++++++++--- verifcid/cid_test.go | 9 ++++- 5 files changed, 74 insertions(+), 38 deletions(-) diff --git a/gateway/gateway_test.go b/gateway/gateway_test.go index 6288e8372..ad098fe90 100644 --- a/gateway/gateway_test.go +++ b/gateway/gateway_test.go @@ -84,9 +84,9 @@ func TestGatewayGet(t *testing.T) { {"127.0.0.1:8080", "/ipns", http.StatusBadRequest, "invalid path \"/ipns/\": path does not have enough components\n"}, {"127.0.0.1:8080", "/" + k.RootCid().String(), http.StatusNotFound, "404 page not found\n"}, {"127.0.0.1:8080", "/ipfs/this-is-not-a-cid", http.StatusBadRequest, "invalid path \"/ipfs/this-is-not-a-cid\": invalid cid: illegal base32 data at input byte 3\n"}, - {"127.0.0.1:8080", "/ipfs/" + validIdentityCID.String(), http.StatusOK, string(validIdentityData)}, // Valid identity CID returns the inlined data - {"127.0.0.1:8080", "/ipfs/" + invalidIdentityCID.String(), http.StatusBadRequest, "identity digest too large: must be at most 128 bytes\n"}, // Invalid identity CID, over size limit - {"127.0.0.1:8080", "/ipfs/" + shortIdentityCID.String(), http.StatusOK, "hello"}, // Short identity CID (below MinDigestSize) should work + {"127.0.0.1:8080", "/ipfs/" + validIdentityCID.String(), http.StatusOK, string(validIdentityData)}, // Valid identity CID returns the inlined data + {"127.0.0.1:8080", "/ipfs/" + invalidIdentityCID.String(), http.StatusBadRequest, "identity digest too large: got 129 bytes, maximum 128\n"}, // Invalid identity CID, over size limit + {"127.0.0.1:8080", "/ipfs/" + shortIdentityCID.String(), http.StatusOK, "hello"}, // Short identity CID (below MinDigestSize) should work {"127.0.0.1:8080", k.String(), http.StatusOK, "fnord"}, {"127.0.0.1:8080", "/ipns/nxdomain.example.com", http.StatusInternalServerError, "failed to resolve /ipns/nxdomain.example.com: " + namesys.ErrResolveFailed.Error() + "\n"}, {"127.0.0.1:8080", "/ipns/%0D%0A%0D%0Ahello", http.StatusInternalServerError, "failed to resolve /ipns/\\r\\n\\r\\nhello: " + namesys.ErrResolveFailed.Error() + "\n"}, diff --git a/ipld/unixfs/mod/dagmodifier_test.go b/ipld/unixfs/mod/dagmodifier_test.go index d190f936e..6e161fab4 100644 --- a/ipld/unixfs/mod/dagmodifier_test.go +++ b/ipld/unixfs/mod/dagmodifier_test.go @@ -895,16 +895,6 @@ func makeIdentityNode(data []byte) *dag.ProtoNode { return node } -// makeRawNodeWithIdentity creates a RawNode with identity CID for testing -func makeRawNodeWithIdentity(data []byte) (*dag.RawNode, error) { - return dag.NewRawNodeWPrefix(data, cid.Prefix{ - Version: 1, - Codec: cid.Raw, - MhType: mh.IDENTITY, - MhLength: -1, - }) -} - func TestIdentityCIDHandling(t *testing.T) { ctx := context.Background() dserv := testu.GetDAGServ() diff --git a/verifcid/allowlist_test.go b/verifcid/allowlist_test.go index 884c42b28..6e5d47447 100644 --- a/verifcid/allowlist_test.go +++ b/verifcid/allowlist_test.go @@ -1,6 +1,7 @@ package verifcid import ( + "errors" "testing" "github.com/ipfs/go-cid" @@ -20,13 +21,6 @@ func TestDefaultAllowList(t *testing.T) { t.Fatal("expected failure") } } - mhcid := func(code uint64, length int) cid.Cid { - mhash, err := mh.Sum([]byte{}, code, length) - if err != nil { - t.Fatalf("%v: code: %x length: %d", err, code, length) - } - return cid.NewCidV1(cid.DagCBOR, mhash) - } allowlist := DefaultAllowlist assertTrue(allowlist.IsAllowed(mh.SHA2_256)) @@ -35,24 +29,35 @@ func TestDefaultAllowList(t *testing.T) { assertTrue(allowlist.IsAllowed(mh.KECCAK_256)) assertTrue(allowlist.IsAllowed(mh.SHA3)) assertTrue(allowlist.IsAllowed(mh.SHA1)) + assertTrue(allowlist.IsAllowed(mh.IDENTITY)) assertFalse(allowlist.IsAllowed(mh.BLAKE2B_MIN + 5)) cases := []struct { cid cid.Cid err error }{ - {mhcid(mh.SHA2_256, 32), nil}, - {mhcid(mh.SHA2_256, 16), ErrDigestTooSmall}, - {mhcid(mh.MURMUR3X64_64, 4), ErrPossiblyInsecureHashFunction}, - {mhcid(mh.BLAKE3, 32), nil}, - {mhcid(mh.BLAKE3, 69), nil}, - {mhcid(mh.BLAKE3, 128), nil}, + {mhcid(t, mh.SHA2_256, 32), nil}, + {mhcid(t, mh.SHA2_256, 16), ErrDigestTooSmall}, + {mhcid(t, mh.MURMUR3X64_64, 4), ErrPossiblyInsecureHashFunction}, + {mhcid(t, mh.BLAKE3, 32), nil}, + {mhcid(t, mh.BLAKE3, 69), nil}, + {mhcid(t, mh.BLAKE3, 128), nil}, + {identityCid(t, 19), nil}, // identity below MinDigestSize (exempt from minimum) + {identityCid(t, 64), nil}, // identity under MaxIdentityDigestSize + {identityCid(t, 128), nil}, // identity at MaxIdentityDigestSize + {identityCid(t, 129), ErrIdentityDigestTooLarge}, // identity above MaxIdentityDigestSize } for i, cas := range cases { - if ValidateCid(allowlist, cas.cid) != cas.err { + err := ValidateCid(allowlist, cas.cid) + if cas.err == nil { + if err != nil { + t.Errorf("wrong result in case of %s (index %d). Expected: nil, got %s", + cas.cid, i, err) + } + } else if !errors.Is(err, cas.err) { t.Errorf("wrong result in case of %s (index %d). Expected: %s, got %s", - cas.cid, i, cas.err, ValidateCid(DefaultAllowlist, cas.cid)) + cas.cid, i, cas.err, err) } } @@ -61,7 +66,31 @@ func TestDefaultAllowList(t *testing.T) { if err != nil { t.Fatalf("failed to produce a multihash from the long blake3 hash: %v", err) } - if ValidateCid(allowlist, cid.NewCidV1(cid.DagCBOR, longBlake3Mh)) != ErrDigestTooLarge { - t.Errorf("a CID that was longer than the maximum hash length did not error with ErrDigestTooLarge") + if err := ValidateCid(allowlist, cid.NewCidV1(cid.DagCBOR, longBlake3Mh)); !errors.Is(err, ErrDigestTooLarge) { + t.Errorf("a CID that was longer than the maximum hash length did not error with ErrDigestTooLarge, got: %v", err) + } +} + +// mhcid creates a CID with the specified multihash type and length +func mhcid(t *testing.T, code uint64, length int) cid.Cid { + t.Helper() + mhash, err := mh.Sum([]byte{}, code, length) + if err != nil { + t.Fatalf("%v: code: %x length: %d", err, code, length) + } + return cid.NewCidV1(cid.DagCBOR, mhash) +} + +// identityCid creates an identity CID with specific data size +func identityCid(t *testing.T, size int) cid.Cid { + t.Helper() + data := make([]byte, size) + for i := range data { + data[i] = byte(i % 256) + } + hash, err := mh.Sum(data, mh.IDENTITY, -1) + if err != nil { + t.Fatalf("failed to create identity hash: %v", err) } + return cid.NewCidV1(cid.Raw, hash) } diff --git a/verifcid/cid.go b/verifcid/cid.go index ba97c67e2..8943be0bb 100644 --- a/verifcid/cid.go +++ b/verifcid/cid.go @@ -20,9 +20,9 @@ const ( var ( ErrPossiblyInsecureHashFunction = errors.New("potentially insecure hash functions not allowed") - ErrDigestTooSmall = fmt.Errorf("digest too small: must be at least %d bytes", MinDigestSize) - ErrDigestTooLarge = fmt.Errorf("digest too large: must be at most %d bytes", MaxDigestSize) - ErrIdentityDigestTooLarge = fmt.Errorf("identity digest too large: must be at most %d bytes", MaxIdentityDigestSize) + ErrDigestTooSmall = errors.New("digest too small") + ErrDigestTooLarge = errors.New("digest too large") + ErrIdentityDigestTooLarge = errors.New("identity digest too large") // Deprecated: Use ErrDigestTooSmall instead ErrBelowMinimumHashLength = ErrDigestTooSmall @@ -40,16 +40,28 @@ func ValidateCid(allowlist Allowlist, c cid.Cid) error { switch pref.MhType { case mh.IDENTITY: if pref.MhLength > MaxIdentityDigestSize { - return ErrIdentityDigestTooLarge + return newErrIdentityDigestTooLarge(pref.MhLength) } default: if pref.MhLength < MinDigestSize { - return ErrDigestTooSmall + return newErrDigestTooSmall(pref.MhLength) } if pref.MhLength > MaxDigestSize { - return ErrDigestTooLarge + return newErrDigestTooLarge(pref.MhLength) } } return nil } + +func newErrDigestTooSmall(got int) error { + return fmt.Errorf("%w: got %d bytes, minimum %d", ErrDigestTooSmall, got, MinDigestSize) +} + +func newErrDigestTooLarge(got int) error { + return fmt.Errorf("%w: got %d bytes, maximum %d", ErrDigestTooLarge, got, MaxDigestSize) +} + +func newErrIdentityDigestTooLarge(got int) error { + return fmt.Errorf("%w: got %d bytes, maximum %d", ErrIdentityDigestTooLarge, got, MaxIdentityDigestSize) +} diff --git a/verifcid/cid_test.go b/verifcid/cid_test.go index cb2301268..5716ae372 100644 --- a/verifcid/cid_test.go +++ b/verifcid/cid_test.go @@ -2,6 +2,7 @@ package verifcid import ( "bytes" + "errors" "testing" "github.com/ipfs/go-cid" @@ -64,7 +65,11 @@ func TestValidateCid(t *testing.T) { c := cid.NewCidV1(cid.Raw, hash) err = ValidateCid(allowlist, c) - if err != tt.wantErr { + if tt.wantErr == nil { + if err != nil { + t.Errorf("ValidateCid() error = %v, wantErr nil", err) + } + } else if !errors.Is(err, tt.wantErr) { t.Errorf("ValidateCid() error = %v, wantErr %v", err, tt.wantErr) } }) @@ -120,7 +125,7 @@ func TestValidateCid(t *testing.T) { c := cid.NewCidV1(cid.Raw, truncatedHash) err = ValidateCid(allowlist, c) - if err != ErrDigestTooSmall { + if !errors.Is(err, ErrDigestTooSmall) { t.Errorf("expected ErrDigestTooSmall for truncated SHA256, got: %v", err) } }) From 3025889071e94f2638532b1bd1ea5f0f8e880596 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Sat, 30 Aug 2025 18:29:08 +0200 Subject: [PATCH 6/9] feat(verifcid): add Allowlist interface with per-hash size limits - expand Allowlist interface with MinDigestSize/MaxDigestSize methods - support different size limits per hash function type - identity CIDs use separate DefaultMaxIdentityDigestSize limit - cryptographic hashes use DefaultMaxDigestSize limit - replace magic numbers with named Default* constants throughout - update bitswap to reference verifcid defaults --- bitswap/internal/defaults/defaults.go | 29 +++++++++- bitswap/options.go | 2 +- bitswap/server/internal/decision/engine.go | 7 ++- .../server/internal/decision/engine_test.go | 2 +- bitswap/server/server.go | 8 +-- blockservice/blockservice_test.go | 10 ++-- gateway/gateway_test.go | 14 ++--- ipld/unixfs/mod/dagmodifier.go | 2 +- ipld/unixfs/mod/dagmodifier_test.go | 39 +++++++------- verifcid/allowlist.go | 46 ++++++++++++++++ verifcid/allowlist_test.go | 8 +-- verifcid/cid.go | 54 ++++++++++--------- verifcid/cid_test.go | 20 +++---- 13 files changed, 160 insertions(+), 81 deletions(-) diff --git a/bitswap/internal/defaults/defaults.go b/bitswap/internal/defaults/defaults.go index f61052ca3..f7184eb4c 100644 --- a/bitswap/internal/defaults/defaults.go +++ b/bitswap/internal/defaults/defaults.go @@ -29,8 +29,33 @@ const ( // Maximum size of the wantlist we are willing to keep in memory. MaxQueuedWantlistEntiresPerPeer = 1024 - // MaximumHashLength is now exposed from verifcid.MaxDigestSize - MaximumHashLength = verifcid.MaxDigestSize + // MaximumHashLength is the maximum size for hash digests we accept. + // This references the default from verifcid for consistency. + MaximumHashLength = verifcid.DefaultMaxDigestSize + + // MaximumAllowedCid is the maximum total CID size we accept in bitswap messages. + // Bitswap sends full CIDs (not just multihashes) on the wire, so we must + // limit the total size to prevent DoS attacks from maliciously large CIDs. + // + // The calculation is based on the CID binary format: + // - CIDv0: Just a multihash (hash type + hash length + hash digest) + // - CIDv1: + // - version: varint (usually 1 byte for version 1) + // - multicodec: varint (usually 1-2 bytes for common codecs) + // - multihash: + // - hash-type: varint (usually 1-2 bytes) + // - hash-length: varint (usually 1-2 bytes) + // - hash-digest: up to MaximumHashLength bytes + // + // We use binary.MaxVarintLen64*4 (40 bytes) to accommodate worst-case varint encoding: + // - 1 varint for CID version (max 10 bytes) + // - 1 varint for multicodec (max 10 bytes) + // - 1 varint for multihash type (max 10 bytes) + // - 1 varint for multihash length (max 10 bytes) + // Total: 40 bytes overhead + MaximumHashLength (128) = 168 bytes max + // + // This prevents peers from sending CIDs with pathologically large varint encodings + // that could exhaust memory or cause other issues. MaximumAllowedCid = binary.MaxVarintLen64*4 + MaximumHashLength // RebroadcastDelay is the default delay to trigger broadcast of diff --git a/bitswap/options.go b/bitswap/options.go index 7688c14e8..2d31168dd 100644 --- a/bitswap/options.go +++ b/bitswap/options.go @@ -33,7 +33,7 @@ func MaxQueuedWantlistEntriesPerPeer(count uint) Option { return Option{server.MaxQueuedWantlistEntriesPerPeer(count)} } -// MaxCidSize only affects the server. +// MaxCidSize limits the size of incoming CIDs in requests (server only). // If it is 0 no limit is applied. func MaxCidSize(n uint) Option { return Option{server.MaxCidSize(n)} diff --git a/bitswap/server/internal/decision/engine.go b/bitswap/server/internal/decision/engine.go index 28c60db26..5ac00b895 100644 --- a/bitswap/server/internal/decision/engine.go +++ b/bitswap/server/internal/decision/engine.go @@ -282,8 +282,11 @@ func WithMaxQueuedWantlistEntriesPerPeer(count uint) Option { } } -// WithMaxQueuedWantlistEntriesPerPeer limits how much individual entries each peer is allowed to send. -// If a peer send us more than this we will truncate newest entries. +// WithMaxCidSize limits the size of incoming CIDs that we are willing to accept. +// We will ignore requests for CIDs whose total encoded size exceeds this limit. +// This protects against malicious peers sending CIDs with pathologically large +// varint encodings that could exhaust memory. +// It defaults to [defaults.MaximumAllowedCid] func WithMaxCidSize(n uint) Option { return func(e *Engine) { e.maxCidSize = n diff --git a/bitswap/server/internal/decision/engine_test.go b/bitswap/server/internal/decision/engine_test.go index 60b4938a5..1a88e9260 100644 --- a/bitswap/server/internal/decision/engine_test.go +++ b/bitswap/server/internal/decision/engine_test.go @@ -1660,7 +1660,7 @@ func TestIgnoresCidsAboveLimit(t *testing.T) { wl := warsaw.Engine.WantlistForPeer(riga.Peer) if len(wl) != 1 { - t.Fatal("wantlist add a CID too big") + t.Fatalf("expected 1 entry in wantlist, got %d", len(wl)) } } diff --git a/bitswap/server/server.go b/bitswap/server/server.go index a0b7dd4c9..1e8dd3a31 100644 --- a/bitswap/server/server.go +++ b/bitswap/server/server.go @@ -190,9 +190,11 @@ func MaxQueuedWantlistEntriesPerPeer(count uint) Option { } } -// MaxCidSize limits how big CIDs we are willing to serve. -// We will ignore CIDs over this limit. -// It defaults to [defaults.MaxCidSize]. +// MaxCidSize limits the size of incoming CIDs in requests that we are willing to accept. +// We will ignore requests for CIDs whose total encoded size exceeds this limit. +// This protects against malicious peers sending CIDs with pathologically large +// varint encodings that could exhaust memory. +// It defaults to [defaults.MaximumAllowedCid]. // If it is 0 no limit is applied. func MaxCidSize(n uint) Option { o := decision.WithMaxCidSize(n) diff --git a/blockservice/blockservice_test.go b/blockservice/blockservice_test.go index a855e47e1..9af9488eb 100644 --- a/blockservice/blockservice_test.go +++ b/blockservice/blockservice_test.go @@ -295,14 +295,14 @@ func TestIdentityHashSizeLimit(t *testing.T) { bs := blockstore.NewBlockstore(dssync.MutexWrap(ds.NewMapDatastore())) blockservice := New(bs, nil) - // Create identity CID at the MaxIdentityDigestSize limit (should be valid) - validData := bytes.Repeat([]byte("a"), verifcid.MaxIdentityDigestSize) + // Create identity CID at the DefaultMaxIdentityDigestSize limit (should be valid) + validData := bytes.Repeat([]byte("a"), verifcid.DefaultMaxIdentityDigestSize) validHash, err := multihash.Sum(validData, multihash.IDENTITY, -1) a.NoError(err) validCID := cid.NewCidV1(cid.Raw, validHash) - // Create identity CID over the MaxIdentityDigestSize limit (should be rejected) - invalidData := bytes.Repeat([]byte("b"), verifcid.MaxIdentityDigestSize+1) + // Create identity CID over the DefaultMaxIdentityDigestSize limit (should be rejected) + invalidData := bytes.Repeat([]byte("b"), verifcid.DefaultMaxIdentityDigestSize+1) invalidHash, err := multihash.Sum(invalidData, multihash.IDENTITY, -1) a.NoError(err) invalidCID := cid.NewCidV1(cid.Raw, invalidHash) @@ -315,7 +315,7 @@ func TestIdentityHashSizeLimit(t *testing.T) { // Invalid identity CID should fail validation _, err = blockservice.GetBlock(ctx, invalidCID) a.Error(err) - a.ErrorIs(err, verifcid.ErrIdentityDigestTooLarge) + a.ErrorIs(err, verifcid.ErrDigestTooLarge) } type fakeIsNewSessionCreateExchange struct { diff --git a/gateway/gateway_test.go b/gateway/gateway_test.go index ad098fe90..ec0a99c82 100644 --- a/gateway/gateway_test.go +++ b/gateway/gateway_test.go @@ -55,14 +55,14 @@ func TestGatewayGet(t *testing.T) { backend.namesys["/ipns/example.man"] = newMockNamesysItem(k, 0) // Create identity CIDs for testing - // MaxIdentityDigestSize bytes (at the limit, should be valid) - validIdentityData := bytes.Repeat([]byte("a"), verifcid.MaxIdentityDigestSize) + // verifcid.DefaultMaxIdentityDigestSize bytes (at the identity limit, should be valid) + validIdentityData := bytes.Repeat([]byte("a"), verifcid.DefaultMaxIdentityDigestSize) validIdentityHash, err := mh.Sum(validIdentityData, mh.IDENTITY, -1) require.NoError(t, err) validIdentityCID := cid.NewCidV1(cid.Raw, validIdentityHash) - // MaxIdentityDigestSize+1 bytes (over the limit, should be rejected) - invalidIdentityData := bytes.Repeat([]byte("b"), verifcid.MaxIdentityDigestSize+1) + // verifcid.DefaultMaxIdentityDigestSize+1 bytes (over the identity limit, should be rejected) + invalidIdentityData := bytes.Repeat([]byte("b"), verifcid.DefaultMaxIdentityDigestSize+1) invalidIdentityHash, err := mh.Sum(invalidIdentityData, mh.IDENTITY, -1) require.NoError(t, err) invalidIdentityCID := cid.NewCidV1(cid.Raw, invalidIdentityHash) @@ -84,9 +84,9 @@ func TestGatewayGet(t *testing.T) { {"127.0.0.1:8080", "/ipns", http.StatusBadRequest, "invalid path \"/ipns/\": path does not have enough components\n"}, {"127.0.0.1:8080", "/" + k.RootCid().String(), http.StatusNotFound, "404 page not found\n"}, {"127.0.0.1:8080", "/ipfs/this-is-not-a-cid", http.StatusBadRequest, "invalid path \"/ipfs/this-is-not-a-cid\": invalid cid: illegal base32 data at input byte 3\n"}, - {"127.0.0.1:8080", "/ipfs/" + validIdentityCID.String(), http.StatusOK, string(validIdentityData)}, // Valid identity CID returns the inlined data - {"127.0.0.1:8080", "/ipfs/" + invalidIdentityCID.String(), http.StatusBadRequest, "identity digest too large: got 129 bytes, maximum 128\n"}, // Invalid identity CID, over size limit - {"127.0.0.1:8080", "/ipfs/" + shortIdentityCID.String(), http.StatusOK, "hello"}, // Short identity CID (below MinDigestSize) should work + {"127.0.0.1:8080", "/ipfs/" + validIdentityCID.String(), http.StatusOK, string(validIdentityData)}, // Valid identity CID returns the inlined data + {"127.0.0.1:8080", "/ipfs/" + invalidIdentityCID.String(), http.StatusBadRequest, "digest too large: identity digest got 129 bytes, maximum 128\n"}, // Invalid identity CID, over size limit + {"127.0.0.1:8080", "/ipfs/" + shortIdentityCID.String(), http.StatusOK, "hello"}, // Short identity CID (below MinDigestSize) should work {"127.0.0.1:8080", k.String(), http.StatusOK, "fnord"}, {"127.0.0.1:8080", "/ipns/nxdomain.example.com", http.StatusInternalServerError, "failed to resolve /ipns/nxdomain.example.com: " + namesys.ErrResolveFailed.Error() + "\n"}, {"127.0.0.1:8080", "/ipns/%0D%0A%0D%0Ahello", http.StatusInternalServerError, "failed to resolve /ipns/\\r\\n\\r\\nhello: " + namesys.ErrResolveFailed.Error() + "\n"}, diff --git a/ipld/unixfs/mod/dagmodifier.go b/ipld/unixfs/mod/dagmodifier.go index 4344b77e3..3db1fb23a 100644 --- a/ipld/unixfs/mod/dagmodifier.go +++ b/ipld/unixfs/mod/dagmodifier.go @@ -281,7 +281,7 @@ func (dm *DagModifier) safePrefixForSize(originalPrefix cid.Prefix, dataSize int } // For identity hash, check if data fits within the limit - if dataSize <= verifcid.MaxIdentityDigestSize { + if dataSize <= verifcid.DefaultMaxIdentityDigestSize { return originalPrefix, false } diff --git a/ipld/unixfs/mod/dagmodifier_test.go b/ipld/unixfs/mod/dagmodifier_test.go index 6e161fab4..3ae1ee29c 100644 --- a/ipld/unixfs/mod/dagmodifier_test.go +++ b/ipld/unixfs/mod/dagmodifier_test.go @@ -901,18 +901,19 @@ func TestIdentityCIDHandling(t *testing.T) { // Test that DagModifier prevents creating overlarge identity CIDs // This addresses https://github.com/ipfs/kubo/issues/6011 + t.Run("prevents identity CID overflow on modification", func(t *testing.T) { // Create a UnixFS file node with identity hash near the size limit // UnixFS overhead is approximately 8-10 bytes, so we use data that will - // encode to just under verifcid.MaxIdentityDigestSize when combined with metadata - initialData := makeTestDataPattern(verifcid.MaxIdentityDigestSize-10, "abcdefghijklmnopqrstuvwxyz") + // encode to just under DefaultMaxIdentityDigestSize when combined with metadata + initialData := makeTestDataPattern(verifcid.DefaultMaxIdentityDigestSize-10, "abcdefghijklmnopqrstuvwxyz") // Create a UnixFS file node with identity CID node := makeIdentityNode(initialData) // Verify the encoded size is under limit encoded, _ := node.EncodeProtobuf(false) - if len(encoded) > verifcid.MaxIdentityDigestSize { + if len(encoded) > verifcid.DefaultMaxIdentityDigestSize { t.Skipf("Initial node too large for identity: %d bytes", len(encoded)) } @@ -955,12 +956,12 @@ func TestIdentityCIDHandling(t *testing.T) { t.Fatal(err) } - // The key verification: if it's a leaf node and would exceed verifcid.MaxIdentityDigestSize, + // The key verification: if it's a leaf node and would exceed verifcid.DefaultMaxIdentityDigestSize, // it must not use identity hash if len(resultNode.Links()) == 0 { if protoNode, ok := resultNode.(*dag.ProtoNode); ok { encodedData, _ := protoNode.EncodeProtobuf(false) - if len(encodedData) > verifcid.MaxIdentityDigestSize && resultNode.Cid().Prefix().MhType == mh.IDENTITY { + if len(encodedData) > verifcid.DefaultMaxIdentityDigestSize && resultNode.Cid().Prefix().MhType == mh.IDENTITY { t.Errorf("Leaf node with %d bytes must not use identity hash", len(encodedData)) } } @@ -969,7 +970,7 @@ func TestIdentityCIDHandling(t *testing.T) { t.Run("small identity CID remains identity", func(t *testing.T) { // Create a small UnixFS file node with identity hash - // Keep it well under verifcid.MaxIdentityDigestSize (the identity hash limit) + // Keep it well under verifcid.DefaultMaxIdentityDigestSize (the identity hash limit) smallData := []byte("hello world") // Create a UnixFS file node with identity CID @@ -987,7 +988,7 @@ func TestIdentityCIDHandling(t *testing.T) { t.Fatal(err) } - // Write small amount of data (total still under verifcid.MaxIdentityDigestSize) + // Write small amount of data (total still under verifcid.DefaultMaxIdentityDigestSize) additionalData := []byte(" and more") n, err := dmod.WriteAt(additionalData, int64(len(smallData))) if err != nil { @@ -1030,7 +1031,7 @@ func TestIdentityCIDHandling(t *testing.T) { // Verify initial data fits in identity encodedInitial, _ := initialNode.EncodeProtobuf(false) - if len(encodedInitial) > verifcid.MaxIdentityDigestSize { + if len(encodedInitial) > verifcid.DefaultMaxIdentityDigestSize { t.Skipf("Initial data too large for identity: %d bytes", len(encodedInitial)) } @@ -1085,7 +1086,7 @@ func TestIdentityCIDHandling(t *testing.T) { // For this test, let's verify that our safePrefixForSize function // correctly preserves fields when it would switch - testPrefix, changed := dmod.safePrefixForSize(initialNode.Cid().Prefix(), verifcid.MaxIdentityDigestSize+10) + testPrefix, changed := dmod.safePrefixForSize(initialNode.Cid().Prefix(), verifcid.DefaultMaxIdentityDigestSize+10) if !changed { t.Errorf("safePrefixForSize: expected prefix to change for oversized identity") @@ -1112,7 +1113,7 @@ func TestIdentityCIDHandling(t *testing.T) { t.Run("raw node preserves codec", func(t *testing.T) { // Create a RawNode with identity CID that's near the limit // This allows us to test overflow with small modifications - initialData := make([]byte, verifcid.MaxIdentityDigestSize-10) + initialData := make([]byte, verifcid.DefaultMaxIdentityDigestSize-10) for i := range initialData { initialData[i] = byte('a' + i%26) } @@ -1190,7 +1191,7 @@ func TestIdentityCIDHandling(t *testing.T) { t.Run("raw node identity overflow via truncate", func(t *testing.T) { // Test that Truncate also handles identity overflow correctly for RawNodes // Start with a RawNode using identity that's at max size - maxData := make([]byte, verifcid.MaxIdentityDigestSize) + maxData := make([]byte, verifcid.DefaultMaxIdentityDigestSize) for i := range maxData { maxData[i] = byte('A' + i%26) } @@ -1236,7 +1237,7 @@ func TestIdentityCIDHandling(t *testing.T) { t.Fatal(err) } - // Should still use identity (100 bytes < verifcid.MaxIdentityDigestSize) + // Should still use identity (100 bytes < verifcid.DefaultMaxIdentityDigestSize) if truncatedNode.Cid().Prefix().MhType != mh.IDENTITY { t.Errorf("Expected identity hash for 100 bytes, got %d", truncatedNode.Cid().Prefix().MhType) @@ -1264,7 +1265,7 @@ func TestIdentityCIDHandling(t *testing.T) { t.Run("raw node switches from identity when modified to exceed limit", func(t *testing.T) { // Create a RawNode with identity CID near the limit // Use size that allows modification without triggering append - initialData := make([]byte, verifcid.MaxIdentityDigestSize-1) // 127 bytes + initialData := make([]byte, verifcid.DefaultMaxIdentityDigestSize-1) // 127 bytes for i := range initialData { initialData[i] = byte('a') } @@ -1279,7 +1280,7 @@ func TestIdentityCIDHandling(t *testing.T) { t.Fatal(err) } - // Verify it starts with identity (127 bytes < verifcid.MaxIdentityDigestSize) + // Verify it starts with identity (127 bytes < verifcid.DefaultMaxIdentityDigestSize) if rawNode.Cid().Prefix().MhType != mh.IDENTITY { t.Fatalf("Expected identity hash for 127 bytes, got %d", rawNode.Cid().Prefix().MhType) } @@ -1299,7 +1300,7 @@ func TestIdentityCIDHandling(t *testing.T) { dmod.Prefix.MhType = util.DefaultIpfsHash dmod.Prefix.MhLength = -1 - // Replace last byte with 2 bytes, pushing size to verifcid.MaxIdentityDigestSize + // Replace last byte with 2 bytes, pushing size to verifcid.DefaultMaxIdentityDigestSize // This modifies within bounds but increases total size just enough twoBytes := []byte("XX") _, err = dmod.WriteAt(twoBytes, int64(len(initialData)-1)) @@ -1326,7 +1327,7 @@ func TestIdentityCIDHandling(t *testing.T) { t.Fatalf("Failed to parse UnixFS metadata: %v", err) } - expectedSize := uint64(verifcid.MaxIdentityDigestSize) // 127 original + 1 byte extension + expectedSize := uint64(verifcid.DefaultMaxIdentityDigestSize) // 127 original + 1 byte extension if fsn.FileSize() != expectedSize { t.Errorf("Expected file size %d, got %d", expectedSize, fsn.FileSize()) } @@ -1340,11 +1341,11 @@ func TestIdentityCIDHandling(t *testing.T) { actualSize := len(rawMod.RawData()) t.Logf("Modified node size: %d bytes", actualSize) - // If size > verifcid.MaxIdentityDigestSize, must not use identity - if actualSize > verifcid.MaxIdentityDigestSize { + // If size > verifcid.DefaultMaxIdentityDigestSize, must not use identity + if actualSize > verifcid.DefaultMaxIdentityDigestSize { if modifiedNode.Cid().Prefix().MhType == mh.IDENTITY { t.Errorf("Node with %d bytes still uses identity (max %d)", - actualSize, verifcid.MaxIdentityDigestSize) + actualSize, verifcid.DefaultMaxIdentityDigestSize) } } diff --git a/verifcid/allowlist.go b/verifcid/allowlist.go index b572de3a6..5068908d8 100644 --- a/verifcid/allowlist.go +++ b/verifcid/allowlist.go @@ -11,6 +11,12 @@ var DefaultAllowlist defaultAllowlist type Allowlist interface { // IsAllowed checks for multihash allowance by the code. IsAllowed(code uint64) bool + + // MinDigestSize returns the minimum digest size for a given multihash code. + MinDigestSize(code uint64) int + + // MaxDigestSize returns the maximum digest size for a given multihash code. + MaxDigestSize(code uint64) int } // NewAllowlist constructs new [Allowlist] from the given map set. @@ -41,6 +47,24 @@ func (al allowlist) IsAllowed(code uint64) bool { return false } +func (al allowlist) MinDigestSize(code uint64) int { + // If we have an override, delegate to it + if al.override != nil { + return al.override.MinDigestSize(code) + } + // Otherwise use default behavior + return DefaultAllowlist.MinDigestSize(code) +} + +func (al allowlist) MaxDigestSize(code uint64) int { + // If we have an override, delegate to it + if al.override != nil { + return al.override.MaxDigestSize(code) + } + // Otherwise use default behavior + return DefaultAllowlist.MaxDigestSize(code) +} + type defaultAllowlist struct{} func (defaultAllowlist) IsAllowed(code uint64) bool { @@ -67,3 +91,25 @@ func (defaultAllowlist) IsAllowed(code uint64) bool { return false } } + +func (defaultAllowlist) MinDigestSize(code uint64) int { + switch code { + case mh.IDENTITY: + // Identity hashes are exempt from minimum size requirements + // as they embed data directly + return 0 + default: + return DefaultMinDigestSize + } +} + +func (defaultAllowlist) MaxDigestSize(code uint64) int { + switch code { + case mh.IDENTITY: + // Identity CIDs embed data directly, limit to prevent abuse + return DefaultMaxIdentityDigestSize + default: + // Maximum size for cryptographic hash digests + return DefaultMaxDigestSize + } +} diff --git a/verifcid/allowlist_test.go b/verifcid/allowlist_test.go index 6e5d47447..1532f2f35 100644 --- a/verifcid/allowlist_test.go +++ b/verifcid/allowlist_test.go @@ -42,10 +42,10 @@ func TestDefaultAllowList(t *testing.T) { {mhcid(t, mh.BLAKE3, 32), nil}, {mhcid(t, mh.BLAKE3, 69), nil}, {mhcid(t, mh.BLAKE3, 128), nil}, - {identityCid(t, 19), nil}, // identity below MinDigestSize (exempt from minimum) - {identityCid(t, 64), nil}, // identity under MaxIdentityDigestSize - {identityCid(t, 128), nil}, // identity at MaxIdentityDigestSize - {identityCid(t, 129), ErrIdentityDigestTooLarge}, // identity above MaxIdentityDigestSize + {identityCid(t, DefaultMinDigestSize-1), nil}, // identity below minimum (exempt from minimum) + {identityCid(t, 64), nil}, // identity under DefaultMaxIdentityDigestSize limit + {identityCid(t, DefaultMaxIdentityDigestSize), nil}, // identity at DefaultMaxIdentityDigestSize limit + {identityCid(t, DefaultMaxIdentityDigestSize+1), ErrDigestTooLarge}, // identity above DefaultMaxIdentityDigestSize limit } for i, cas := range cases { diff --git a/verifcid/cid.go b/verifcid/cid.go index 8943be0bb..c541a0876 100644 --- a/verifcid/cid.go +++ b/verifcid/cid.go @@ -9,20 +9,22 @@ import ( ) const ( - // MinDigestSize is the minimum size for hash digests (except for identity hashes) - MinDigestSize = 20 - // MaxDigestSize is the maximum size for cryptographic hash digests - MaxDigestSize = 128 - // MaxIdentityDigestSize is the maximum size for identity CID digests - // Identity CIDs embed data directly, and this limit prevents abuse - MaxIdentityDigestSize = 128 + // DefaultMinDigestSize is the default minimum size for hash digests (except for identity hashes) + DefaultMinDigestSize = 20 + // DefaultMaxDigestSize is the default maximum size for cryptographic hash digests. + // This does not apply to identity hashes which are not cryptographic and use DefaultMaxIdentityDigestSize instead. + DefaultMaxDigestSize = 128 + // DefaultMaxIdentityDigestSize is the default maximum size for identity CID digests. + // Identity CIDs (with multihash code 0x00) are not cryptographic hashes - they embed + // data directly in the CID. This separate limit prevents abuse while allowing + // different size constraints than cryptographic digests. + DefaultMaxIdentityDigestSize = 128 ) var ( ErrPossiblyInsecureHashFunction = errors.New("potentially insecure hash functions not allowed") ErrDigestTooSmall = errors.New("digest too small") ErrDigestTooLarge = errors.New("digest too large") - ErrIdentityDigestTooLarge = errors.New("identity digest too large") // Deprecated: Use ErrDigestTooSmall instead ErrBelowMinimumHashLength = ErrDigestTooSmall @@ -37,31 +39,31 @@ func ValidateCid(allowlist Allowlist, c cid.Cid) error { return ErrPossiblyInsecureHashFunction } - switch pref.MhType { - case mh.IDENTITY: - if pref.MhLength > MaxIdentityDigestSize { - return newErrIdentityDigestTooLarge(pref.MhLength) - } - default: - if pref.MhLength < MinDigestSize { - return newErrDigestTooSmall(pref.MhLength) - } - if pref.MhLength > MaxDigestSize { - return newErrDigestTooLarge(pref.MhLength) - } + minSize := allowlist.MinDigestSize(pref.MhType) + maxSize := allowlist.MaxDigestSize(pref.MhType) + + if pref.MhLength < minSize { + return newErrDigestTooSmall(pref.MhType, pref.MhLength, minSize) + } + if pref.MhLength > maxSize { + return newErrDigestTooLarge(pref.MhType, pref.MhLength, maxSize) } return nil } -func newErrDigestTooSmall(got int) error { - return fmt.Errorf("%w: got %d bytes, minimum %d", ErrDigestTooSmall, got, MinDigestSize) +func getHashName(code uint64) string { + name, ok := mh.Codes[code] + if !ok { + return fmt.Sprintf("unknown(%d)", code) + } + return name } -func newErrDigestTooLarge(got int) error { - return fmt.Errorf("%w: got %d bytes, maximum %d", ErrDigestTooLarge, got, MaxDigestSize) +func newErrDigestTooSmall(code uint64, got int, min int) error { + return fmt.Errorf("%w: %s digest got %d bytes, minimum %d", ErrDigestTooSmall, getHashName(code), got, min) } -func newErrIdentityDigestTooLarge(got int) error { - return fmt.Errorf("%w: got %d bytes, maximum %d", ErrIdentityDigestTooLarge, got, MaxIdentityDigestSize) +func newErrDigestTooLarge(code uint64, got int, max int) error { + return fmt.Errorf("%w: %s digest got %d bytes, maximum %d", ErrDigestTooLarge, getHashName(code), got, max) } diff --git a/verifcid/cid_test.go b/verifcid/cid_test.go index 5716ae372..738e0ace9 100644 --- a/verifcid/cid_test.go +++ b/verifcid/cid_test.go @@ -20,15 +20,15 @@ func TestValidateCid(t *testing.T) { }{ { name: "identity at max size", - data: bytes.Repeat([]byte("a"), MaxIdentityDigestSize), + data: bytes.Repeat([]byte("a"), DefaultMaxIdentityDigestSize), // max identity size mhType: mh.IDENTITY, wantErr: nil, }, { name: "identity over max size", - data: bytes.Repeat([]byte("b"), MaxIdentityDigestSize+1), + data: bytes.Repeat([]byte("b"), DefaultMaxIdentityDigestSize+1), // over max identity size mhType: mh.IDENTITY, - wantErr: ErrIdentityDigestTooLarge, + wantErr: ErrDigestTooLarge, }, { name: "identity at 64 bytes", @@ -40,7 +40,7 @@ func TestValidateCid(t *testing.T) { name: "identity with 1KB data", data: bytes.Repeat([]byte("d"), 1024), mhType: mh.IDENTITY, - wantErr: ErrIdentityDigestTooLarge, + wantErr: ErrDigestTooLarge, }, { name: "small identity", @@ -111,12 +111,12 @@ func TestValidateCid(t *testing.T) { t.Fatal(err) } - // Manually create a truncated hash (19 bytes) + // Manually create a truncated hash (DefaultMinDigestSize-1 bytes) decoded, err := mh.Decode(fullHash) if err != nil { t.Fatal(err) } - truncatedDigest := decoded.Digest[:MinDigestSize-1] + truncatedDigest := decoded.Digest[:DefaultMinDigestSize-1] // less than minimum truncatedHash, err := mh.Encode(truncatedDigest, mh.SHA2_256) if err != nil { t.Fatal(err) @@ -133,7 +133,7 @@ func TestValidateCid(t *testing.T) { t.Run("identity hash exempt from minimum size", func(t *testing.T) { // Test that identity hashes below MinDigestSize are still valid // This confirms identity hashes are exempt from the minimum size requirement - smallData := []byte("tiny") // Only 4 bytes, well below MinDigestSize of 20 + smallData := []byte("tiny") // Only 4 bytes, well below DefaultMinDigestSize hash, err := mh.Sum(smallData, mh.IDENTITY, -1) if err != nil { t.Fatal(err) @@ -146,8 +146,8 @@ func TestValidateCid(t *testing.T) { t.Errorf("identity CID with %d bytes should be valid (exempt from minimum), got error: %v", len(smallData), err) } - // Also test with exactly MinDigestSize-1 bytes for clarity - dataAt19Bytes := bytes.Repeat([]byte("x"), MinDigestSize-1) + // Also test with exactly DefaultMinDigestSize-1 bytes (less than normal minimum) + dataAt19Bytes := bytes.Repeat([]byte("x"), DefaultMinDigestSize-1) hash19, err := mh.Sum(dataAt19Bytes, mh.IDENTITY, -1) if err != nil { t.Fatal(err) @@ -156,7 +156,7 @@ func TestValidateCid(t *testing.T) { err = ValidateCid(allowlist, c19) if err != nil { - t.Errorf("identity CID with %d bytes should be valid (exempt from minimum), got error: %v", MinDigestSize-1, err) + t.Errorf("identity CID with %d bytes should be valid (exempt from minimum), got error: %v", DefaultMinDigestSize-1, err) } }) } From ce3536cc9f92ba9f9baa4214dfbbe34d9f6008aa Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Sat, 30 Aug 2025 18:38:36 +0200 Subject: [PATCH 7/9] docs: update changelog --- CHANGELOG.md | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a5c4388d..d1be3742b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,11 +18,14 @@ The following emojis are used to highlight certain changes: ### Changed -- `verifcid`: 🛠 Improved digest size limit handling ([#1018](https://github.com/ipfs/boxo/pull/1018)) - - Made digest size constants public: `MinDigestSize` (20 bytes) and `MaxDigestSize` (128 bytes) - - Added `MaxIdentityDigestSize` (128 bytes) constant specifically for identity CID size limits - - Renamed errors for clarity: Added `ErrDigestTooSmall`, `ErrDigestTooLarge`, and `ErrIdentityDigestTooLarge` as the new primary errors +- `verifcid`: 🛠 Enhanced Allowlist interface with per-hash size limits ([#1018](https://github.com/ipfs/boxo/pull/1018)) + - Expanded `Allowlist` interface with `MinDigestSize(code uint64)` and `MaxDigestSize(code uint64)` methods for per-hash function size validation + - Added public constants: `DefaultMinDigestSize` (20 bytes), `DefaultMaxDigestSize` (128 bytes for cryptographic hashes), and `DefaultMaxIdentityDigestSize` (128 bytes for identity CIDs) + - `DefaultAllowlist` implementation now uses these constants and supports different size limits per hash type + - Renamed errors for clarity: Added `ErrDigestTooSmall` and `ErrDigestTooLarge` as the new primary errors - `ErrBelowMinimumHashLength` and `ErrAboveMaximumHashLength` remain as deprecated aliases pointing to the new errors +- `bitswap`: Updated to use `verifcid.DefaultMaxDigestSize` for `MaximumHashLength` constant + - The default `MaximumAllowedCid` limit for incoming CIDs can be adjusted using `bitswap.MaxCidSize` or `server.MaxCidSize` options ### Removed @@ -30,15 +33,15 @@ The following emojis are used to highlight certain changes: - `ipld/unixfs/mod`: - `DagModifier` now correctly preserves raw node codec when modifying data under the chunker threshold, instead of incorrectly forcing everything to dag-pb - - `DagModifier` prevents creation of identity CIDs exceeding `verifcid.MaxIdentityDigestSize` limit when modifying data, automatically switching to proper cryptographic hash while preserving small identity CIDs + - `DagModifier` prevents creation of identity CIDs exceeding `verifcid.DefaultMaxIdentityDigestSize` limit when modifying data, automatically switching to proper cryptographic hash while preserving small identity CIDs - `DagModifier` now supports appending data to a `RawNode` by automatically converting it into a UnixFS file structure where the original `RawNode` becomes the first leaf block, fixing previously impossible append operations that would fail with "expected protobuf dag node" errors - `mfs`: Files with identity CIDs now properly inherit full CID prefix from parent directories (version, codec, hash type, length), not just hash type ### Security - `verifcid`: Now enforces maximum size limit of 128 bytes for identity CIDs to prevent abuse. - - 🛠 Attempts to read CIDs with identity multihash digests longer than `MaxIdentityDigestSize` will now produce `ErrIdentityDigestTooLarge` error. - - Identity CIDs can inline data directly, and without a size limit, they could embed arbitrary amounts of data. Limiting the size also protects gateways from poorly written clients that might send absurdly big data to the gateway encoded as identity CIDs only to retrieve it back. Note that identity CIDs do not provide integrity verification, making them vulnerable to bit flips. They should only be used in controlled contexts like raw leaves of a larger DAG. The limit is explicitly defined as `MaxIdentityDigestSize` (128 bytes). + - 🛠 Attempts to read CIDs with identity multihash digests longer than `DefaultMaxIdentityDigestSize` will now produce `ErrDigestTooLarge` error. + - Identity CIDs can inline data directly, and without a size limit, they could embed arbitrary amounts of data. Limiting the size also protects gateways from poorly written clients that might send absurdly big data to the gateway encoded as identity CIDs only to retrieve it back. Note that identity CIDs do not provide integrity verification, making them vulnerable to bit flips. They should only be used in controlled contexts like raw leaves of a larger DAG. The limit is explicitly defined as `DefaultMaxIdentityDigestSize` (128 bytes). ## [v0.34.0] From 373354403a530fc2fe499111ae51fb8b2baeba1b Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Tue, 2 Sep 2025 03:00:09 +0200 Subject: [PATCH 8/9] fix(gateway): remove redundant CID validation blockservice already validates CIDs, no need to duplicate in gateway per @aschmahmann feedback on #1018 --- gateway/gateway_test.go | 6 +++--- gateway/handler.go | 8 -------- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/gateway/gateway_test.go b/gateway/gateway_test.go index ec0a99c82..d2798c36c 100644 --- a/gateway/gateway_test.go +++ b/gateway/gateway_test.go @@ -84,9 +84,9 @@ func TestGatewayGet(t *testing.T) { {"127.0.0.1:8080", "/ipns", http.StatusBadRequest, "invalid path \"/ipns/\": path does not have enough components\n"}, {"127.0.0.1:8080", "/" + k.RootCid().String(), http.StatusNotFound, "404 page not found\n"}, {"127.0.0.1:8080", "/ipfs/this-is-not-a-cid", http.StatusBadRequest, "invalid path \"/ipfs/this-is-not-a-cid\": invalid cid: illegal base32 data at input byte 3\n"}, - {"127.0.0.1:8080", "/ipfs/" + validIdentityCID.String(), http.StatusOK, string(validIdentityData)}, // Valid identity CID returns the inlined data - {"127.0.0.1:8080", "/ipfs/" + invalidIdentityCID.String(), http.StatusBadRequest, "digest too large: identity digest got 129 bytes, maximum 128\n"}, // Invalid identity CID, over size limit - {"127.0.0.1:8080", "/ipfs/" + shortIdentityCID.String(), http.StatusOK, "hello"}, // Short identity CID (below MinDigestSize) should work + {"127.0.0.1:8080", "/ipfs/" + validIdentityCID.String(), http.StatusOK, string(validIdentityData)}, // Valid identity CID returns the inlined data + {"127.0.0.1:8080", "/ipfs/" + invalidIdentityCID.String(), http.StatusInternalServerError, "failed to resolve /ipfs/" + invalidIdentityCID.String() + ": digest too large: identity digest got 129 bytes, maximum 128\n"}, // Invalid identity CID, over size limit + {"127.0.0.1:8080", "/ipfs/" + shortIdentityCID.String(), http.StatusOK, "hello"}, // Short identity CID (below MinDigestSize) should work {"127.0.0.1:8080", k.String(), http.StatusOK, "fnord"}, {"127.0.0.1:8080", "/ipns/nxdomain.example.com", http.StatusInternalServerError, "failed to resolve /ipns/nxdomain.example.com: " + namesys.ErrResolveFailed.Error() + "\n"}, {"127.0.0.1:8080", "/ipns/%0D%0A%0D%0Ahello", http.StatusInternalServerError, "failed to resolve /ipns/\\r\\n\\r\\nhello: " + namesys.ErrResolveFailed.Error() + "\n"}, diff --git a/gateway/handler.go b/gateway/handler.go index 0145172dd..03ff2a2c7 100644 --- a/gateway/handler.go +++ b/gateway/handler.go @@ -18,7 +18,6 @@ import ( "github.com/ipfs/boxo/gateway/assets" "github.com/ipfs/boxo/ipns" "github.com/ipfs/boxo/path" - "github.com/ipfs/boxo/verifcid" cid "github.com/ipfs/go-cid" logging "github.com/ipfs/go-log/v2" "github.com/libp2p/go-libp2p/core/peer" @@ -314,13 +313,6 @@ func (i *handler) getOrHeadHandler(w http.ResponseWriter, r *http.Request) { } } - // Validate the root CID before any backend calls - rootCid := rq.immutablePath.RootCid() - if err := verifcid.ValidateCid(verifcid.DefaultAllowlist, rootCid); err != nil { - i.webError(w, r, err, http.StatusBadRequest) - return - } - // CAR response format can be handled now, since (1) it explicitly needs the // full immutable path to include in the CAR, and (2) has custom If-None-Match // header handling due to custom ETag. From e3cfa3bf8a309094e047028d6c9272bf6b2eeddc Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Tue, 9 Sep 2025 18:14:07 +0200 Subject: [PATCH 9/9] test: address review feedback on TestIdentityCIDHandling - fail instead of skip when test setup is broken - simplify makeTestData helper function - use bytes.Repeat for single-character byte slices addresses PR review: https://github.com/ipfs/boxo/pull/1018#discussion_r2317126053 https://github.com/ipfs/boxo/pull/1018#discussion_r2317136965 https://github.com/ipfs/boxo/pull/1018#discussion_r2317186335 --- ipld/unixfs/mod/dagmodifier_test.go | 46 ++++++++--------------------- 1 file changed, 12 insertions(+), 34 deletions(-) diff --git a/ipld/unixfs/mod/dagmodifier_test.go b/ipld/unixfs/mod/dagmodifier_test.go index 3ae1ee29c..eee243750 100644 --- a/ipld/unixfs/mod/dagmodifier_test.go +++ b/ipld/unixfs/mod/dagmodifier_test.go @@ -873,12 +873,11 @@ func getOffset(reader uio.DagReader) int64 { return offset } -// makeTestDataPattern generates test data with a repeating pattern -func makeTestDataPattern(size int, pattern string) []byte { +// makeTestData generates test data with repeating lowercase ascii alphabet. +func makeTestData(size int) []byte { data := make([]byte, size) - patternBytes := []byte(pattern) for i := range data { - data[i] = patternBytes[i%len(patternBytes)] + data[i] = byte('a' + i%26) } return data } @@ -906,7 +905,7 @@ func TestIdentityCIDHandling(t *testing.T) { // Create a UnixFS file node with identity hash near the size limit // UnixFS overhead is approximately 8-10 bytes, so we use data that will // encode to just under DefaultMaxIdentityDigestSize when combined with metadata - initialData := makeTestDataPattern(verifcid.DefaultMaxIdentityDigestSize-10, "abcdefghijklmnopqrstuvwxyz") + initialData := makeTestData(verifcid.DefaultMaxIdentityDigestSize - 10) // Create a UnixFS file node with identity CID node := makeIdentityNode(initialData) @@ -914,7 +913,7 @@ func TestIdentityCIDHandling(t *testing.T) { // Verify the encoded size is under limit encoded, _ := node.EncodeProtobuf(false) if len(encoded) > verifcid.DefaultMaxIdentityDigestSize { - t.Skipf("Initial node too large for identity: %d bytes", len(encoded)) + t.Fatalf("Test setup error: initial node too large for identity: %d bytes (max %d)", len(encoded), verifcid.DefaultMaxIdentityDigestSize) } // Store the node @@ -1015,10 +1014,7 @@ func TestIdentityCIDHandling(t *testing.T) { // Create initial data that fits well within identity CID limit // This needs to be large enough to trigger modifyDag but small enough // to fit in identity with UnixFS overhead - initialData := make([]byte, 100) - for i := range initialData { - initialData[i] = byte('a' + i%26) - } + initialData := makeTestData(100) // Create identity CID ProtoNode with UnixFS data initialNode := dag.NodeWithData(unixfs.FilePBData(initialData, uint64(len(initialData)))) @@ -1056,10 +1052,7 @@ func TestIdentityCIDHandling(t *testing.T) { // Write within the existing data bounds to trigger modifyDag // Use enough data that would cause identity overflow - modData := make([]byte, 50) - for i := range modData { - modData[i] = 'X' - } + modData := bytes.Repeat([]byte{'X'}, 50) // Write at offset 0, replacing first 50 bytes n, err := dmod.WriteAt(modData, 0) @@ -1113,10 +1106,7 @@ func TestIdentityCIDHandling(t *testing.T) { t.Run("raw node preserves codec", func(t *testing.T) { // Create a RawNode with identity CID that's near the limit // This allows us to test overflow with small modifications - initialData := make([]byte, verifcid.DefaultMaxIdentityDigestSize-10) - for i := range initialData { - initialData[i] = byte('a' + i%26) - } + initialData := makeTestData(verifcid.DefaultMaxIdentityDigestSize - 10) rawNode, err := dag.NewRawNodeWPrefix(initialData, cid.Prefix{ Version: 1, @@ -1191,10 +1181,7 @@ func TestIdentityCIDHandling(t *testing.T) { t.Run("raw node identity overflow via truncate", func(t *testing.T) { // Test that Truncate also handles identity overflow correctly for RawNodes // Start with a RawNode using identity that's at max size - maxData := make([]byte, verifcid.DefaultMaxIdentityDigestSize) - for i := range maxData { - maxData[i] = byte('A' + i%26) - } + maxData := makeTestData(verifcid.DefaultMaxIdentityDigestSize) rawNode, err := dag.NewRawNodeWPrefix(maxData, cid.Prefix{ Version: 1, @@ -1265,10 +1252,7 @@ func TestIdentityCIDHandling(t *testing.T) { t.Run("raw node switches from identity when modified to exceed limit", func(t *testing.T) { // Create a RawNode with identity CID near the limit // Use size that allows modification without triggering append - initialData := make([]byte, verifcid.DefaultMaxIdentityDigestSize-1) // 127 bytes - for i := range initialData { - initialData[i] = byte('a') - } + initialData := bytes.Repeat([]byte{'a'}, verifcid.DefaultMaxIdentityDigestSize-1) // 127 bytes rawNode, err := dag.NewRawNodeWPrefix(initialData, cid.Prefix{ Version: 1, @@ -1395,10 +1379,7 @@ func TestRawNodeGrowthConversion(t *testing.T) { } // Append data that will create multiple blocks - largeData := make([]byte, 100) - for i := range largeData { - largeData[i] = byte('A' + i%26) - } + largeData := makeTestData(100) _, err = dmod.Write(largeData) if err != nil { @@ -1493,10 +1474,7 @@ func TestRawNodeGrowthConversion(t *testing.T) { } // Append enough data to force multi-block structure - appendData := make([]byte, 200) - for i := range appendData { - appendData[i] = byte('X') - } + appendData := bytes.Repeat([]byte{'X'}, 200) _, err = dmod.Write(appendData) if err != nil {