From a6848731686d7005593225443544a067ae2282d5 Mon Sep 17 00:00:00 2001 From: nugaon Date: Thu, 20 Mar 2025 13:40:17 +0100 Subject: [PATCH 01/62] feat: soc dispersed replica --- pkg/api/soc.go | 43 +++++-- pkg/replicas/getter_soc.go | 135 ++++++++++++++++++++ pkg/replicas/putter_soc.go | 74 +++++++++++ pkg/replicas/putter_soc_test.go | 218 ++++++++++++++++++++++++++++++++ pkg/replicas/replicas_soc.go | 101 +++++++++++++++ pkg/soc/validator.go | 19 ++- 6 files changed, 581 insertions(+), 9 deletions(-) create mode 100644 pkg/replicas/getter_soc.go create mode 100644 pkg/replicas/putter_soc.go create mode 100644 pkg/replicas/putter_soc_test.go create mode 100644 pkg/replicas/replicas_soc.go diff --git a/pkg/api/soc.go b/pkg/api/soc.go index 85d9bf5aaa3..534ba47d935 100644 --- a/pkg/api/soc.go +++ b/pkg/api/soc.go @@ -14,8 +14,10 @@ import ( "github.com/ethersphere/bee/v2/pkg/accesscontrol" "github.com/ethersphere/bee/v2/pkg/cac" + "github.com/ethersphere/bee/v2/pkg/file/redundancy" "github.com/ethersphere/bee/v2/pkg/jsonhttp" "github.com/ethersphere/bee/v2/pkg/postage" + "github.com/ethersphere/bee/v2/pkg/replicas" "github.com/ethersphere/bee/v2/pkg/soc" "github.com/ethersphere/bee/v2/pkg/storer" "github.com/ethersphere/bee/v2/pkg/swarm" @@ -47,10 +49,11 @@ func (s *Service) socUploadHandler(w http.ResponseWriter, r *http.Request) { } headers := struct { - BatchID []byte `map:"Swarm-Postage-Batch-Id"` - StampSig []byte `map:"Swarm-Postage-Stamp"` - Act bool `map:"Swarm-Act"` - HistoryAddress swarm.Address `map:"Swarm-Act-History-Address"` + BatchID []byte `map:"Swarm-Postage-Batch-Id"` + StampSig []byte `map:"Swarm-Postage-Stamp"` + Act bool `map:"Swarm-Act"` + HistoryAddress swarm.Address `map:"Swarm-Act-History-Address"` + RLevel *redundancy.Level `map:"Swarm-Redundancy-Level"` }{} if response := s.mapStructure(r.Header, &headers); response != nil { response("invalid header params", logger, w) @@ -64,11 +67,23 @@ func (s *Service) socUploadHandler(w http.ResponseWriter, r *http.Request) { } var ( - putter storer.PutterSession - err error + basePutter storer.PutterSession + putter storer.PutterSession + err error ) + rLevel := redundancy.DefaultLevel + if headers.RLevel != nil { + rLevel = *headers.RLevel + } + if len(headers.StampSig) != 0 { + if headers.RLevel != nil { + logger.Error(nil, "redundancy level is not supported with stamp signature") + jsonhttp.BadRequest(w, "redundancy level is not supported with stamp signature") + return + } + rLevel = redundancy.NONE stamp := postage.Stamp{} if err := stamp.UnmarshalBinary(headers.StampSig); err != nil { errorMsg := "Stamp deserialization failure" @@ -91,6 +106,10 @@ func (s *Service) socUploadHandler(w http.ResponseWriter, r *http.Request) { Pin: false, Deferred: false, }) + basePutter = putter + if rLevel != redundancy.NONE { + putter = replicas.NewSocPutter(putter, rLevel) + } } if err != nil { logger.Debug("get putter failed", "error", err) @@ -183,7 +202,7 @@ func (s *Service) socUploadHandler(w http.ResponseWriter, r *http.Request) { reference := sch.Address() historyReference := swarm.ZeroAddress if headers.Act { - reference, historyReference, err = s.actEncryptionHandler(r.Context(), putter, reference, headers.HistoryAddress) + reference, historyReference, err = s.actEncryptionHandler(r.Context(), basePutter, reference, headers.HistoryAddress) if err != nil { logger.Debug("access control upload failed", "error", err) logger.Error(nil, "access control upload failed") @@ -229,12 +248,17 @@ func (s *Service) socGetHandler(w http.ResponseWriter, r *http.Request) { } headers := struct { - OnlyRootChunk bool `map:"Swarm-Only-Root-Chunk"` + OnlyRootChunk bool `map:"Swarm-Only-Root-Chunk"` + RLevel *redundancy.Level `map:"Swarm-Redundancy-Level"` }{} if response := s.mapStructure(r.Header, &headers); response != nil { response("invalid header params", logger, w) return } + rLevel := redundancy.DefaultLevel + if headers.RLevel != nil { + rLevel = *headers.RLevel + } address, err := soc.CreateAddress(paths.ID, paths.Owner) if err != nil { @@ -244,6 +268,9 @@ func (s *Service) socGetHandler(w http.ResponseWriter, r *http.Request) { } getter := s.storer.Download(true) + if rLevel != 0 { + getter = replicas.NewSocGetter(getter, rLevel) + } sch, err := getter.Get(r.Context(), address) if err != nil { logger.Error(err, "soc retrieval has been failed") diff --git a/pkg/replicas/getter_soc.go b/pkg/replicas/getter_soc.go new file mode 100644 index 00000000000..957a106ed48 --- /dev/null +++ b/pkg/replicas/getter_soc.go @@ -0,0 +1,135 @@ +// Copyright 2023 The Swarm Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// the code below implements the integration of dispersed replicas in chunk fetching. +// using storage.Getter interface. +package replicas + +import ( + "context" + "errors" + "sync" + "time" + + "github.com/ethersphere/bee/v2/pkg/file/redundancy" + "github.com/ethersphere/bee/v2/pkg/storage" + "github.com/ethersphere/bee/v2/pkg/swarm" +) + +// getter is the private implementation of storage.Getter, an interface for +// retrieving chunks. This getter embeds the original simple chunk getter and extends it +// to a multiplexed variant that fetches chunks with replicas for SOC. +// +// the strategy to retrieve a chunk that has replicas can be configured with a few parameters: +// - RetryInterval: the delay before a new batch of replicas is fetched. +// - depth: 2^{depth} is the total number of additional replicas that have been uploaded +// (by default, it is assumed to be 4, ie. total of 16) +// - (not implemented) pivot: replicas with address in the proximity of pivot will be tried first +type socGetter struct { + wg sync.WaitGroup + storage.Getter + level redundancy.Level +} + +// NewSocGetter is the getter constructor +func NewSocGetter(g storage.Getter, level redundancy.Level) storage.Getter { + return &socGetter{Getter: g, level: level} +} + +// Get makes the socGetter satisfy the storage.Getter interface +func (g *socGetter) Get(ctx context.Context, addr swarm.Address) (ch swarm.Chunk, err error) { + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + // channel that the results (retrieved chunks) are gathered to from concurrent + // workers each fetching a replica + resultC := make(chan swarm.Chunk) + // errc collects the errors + errc := make(chan error, 17) + var errs error + errcnt := 0 + + // concurrently call to retrieve chunk using original SOC address + g.wg.Add(1) + go func() { + defer g.wg.Done() + ch, err := g.Getter.Get(ctx, addr) + if err != nil { + errc <- err + return + } + + select { + case resultC <- ch: + case <-ctx.Done(): + } + }() + // counters + n := 0 // counts the replica addresses tried + target := 2 // the number of replicas attempted to download in this batch + total := g.level.GetReplicaCount() + + // + rr := newSocReplicator(addr, g.level) + next := rr.c + var wait <-chan time.Time // nil channel to disable case + // addresses used are doubling each period of search expansion + // (at intervals of RetryInterval) + ticker := time.NewTicker(RetryInterval) + defer ticker.Stop() + for level := uint8(0); level <= uint8(g.level); { + select { + // at least one chunk is retrieved, cancel the rest and return early + case chunk := <-resultC: + cancel() + return chunk, nil + + case err = <-errc: + errs = errors.Join(errs, err) + errcnt++ + if errcnt > total { + return nil, errors.Join(ErrSwarmageddon, errs) + } + + // ticker switches on the address channel + case <-wait: + wait = nil + next = rr.c + level++ + target = 1 << level + n = 0 + continue + + // getting the addresses in order + case so := <-next: + if so == nil { + next = nil + continue + } + + g.wg.Add(1) + go func() { + defer g.wg.Done() + ch, err := g.Getter.Get(ctx, swarm.NewAddress(so.addr)) + if err != nil { + errc <- err + return + } + + select { + case resultC <- ch: + case <-ctx.Done(): + } + }() + n++ + if n < target { + continue + } + next = nil + wait = ticker.C + } + } + + return nil, nil +} diff --git a/pkg/replicas/putter_soc.go b/pkg/replicas/putter_soc.go new file mode 100644 index 00000000000..13cfe026094 --- /dev/null +++ b/pkg/replicas/putter_soc.go @@ -0,0 +1,74 @@ +// Copyright 2020 The Swarm Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// the code below implements the integration of dispersed replicas in SOC upload. +// using storer.PutterSession interface. +package replicas + +import ( + "context" + "errors" + "sync" + + "github.com/ethersphere/bee/v2/pkg/file/redundancy" + "github.com/ethersphere/bee/v2/pkg/storer" + "github.com/ethersphere/bee/v2/pkg/swarm" +) + +// socPutter is the private implementation of the public storage.Putter interface +// socPutter extends the original putter to a concurrent multiputter +type socPutter struct { + putter storer.PutterSession + rLevel redundancy.Level +} + +// NewSocPutter is the putter constructor +func NewSocPutter(p storer.PutterSession, rLevel redundancy.Level) storer.PutterSession { + return &socPutter{ + putter: p, + rLevel: rLevel, + } +} + +// Put makes the putter satisfy the storage.Putter interface +func (p *socPutter) Put(ctx context.Context, ch swarm.Chunk) (err error) { + errs := []error{} + // Put base chunk first + if err = p.putter.Put(ctx, ch); err != nil { + return err + } + if p.rLevel == 0 { + return nil + } + + rr := newSocReplicator(ch.Address(), p.rLevel) + errc := make(chan error, p.rLevel.GetReplicaCount()) + wg := sync.WaitGroup{} + for r := range rr.c { + wg.Add(1) + go func() { + defer wg.Done() + sch := swarm.NewChunk(swarm.NewAddress(r.addr), ch.Data()) + if err == nil { + err = p.putter.Put(ctx, sch) + } + errc <- err + }() + } + + wg.Wait() + close(errc) + for err := range errc { + errs = append(errs, err) + } + return errors.Join(errs...) +} + +func (p *socPutter) Cleanup() error { + return p.putter.Cleanup() +} + +func (p *socPutter) Done(addr swarm.Address) error { + return p.putter.Done(addr) +} diff --git a/pkg/replicas/putter_soc_test.go b/pkg/replicas/putter_soc_test.go new file mode 100644 index 00000000000..8ee1dede4f1 --- /dev/null +++ b/pkg/replicas/putter_soc_test.go @@ -0,0 +1,218 @@ +// Copyright 2025 The Swarm Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package replicas_test + +import ( + "context" + "crypto/rand" + "errors" + "fmt" + "io" + "sync/atomic" + "testing" + "time" + + "github.com/ethersphere/bee/v2/pkg/cac" + "github.com/ethersphere/bee/v2/pkg/crypto" + "github.com/ethersphere/bee/v2/pkg/file/redundancy" + "github.com/ethersphere/bee/v2/pkg/replicas" + "github.com/ethersphere/bee/v2/pkg/soc" + "github.com/ethersphere/bee/v2/pkg/storage" + "github.com/ethersphere/bee/v2/pkg/storage/inmemchunkstore" + "github.com/ethersphere/bee/v2/pkg/swarm" +) + +type putterSession struct { + chunkStore storage.ChunkStore + getErrors func(context.Context, swarm.Address) error + putErrors func(context.Context, swarm.Address) error +} + +func (tbp *putterSession) Get(ctx context.Context, addr swarm.Address) (swarm.Chunk, error) { + + g := tbp.getErrors + if g != nil { + return nil, g(ctx, addr) + } + return tbp.chunkStore.Get(ctx, addr) +} + +func (p *putterSession) Put(ctx context.Context, ch swarm.Chunk) error { + g := p.putErrors + if g != nil { + return g(ctx, ch.Address()) + } + + return p.chunkStore.Put(ctx, ch) +} + +func (p *putterSession) Done(address swarm.Address) error { return nil } + +func (p *putterSession) Cleanup() error { return nil } + +func TestSocPutter(t *testing.T) { + t.Parallel() + + // test key to sign soc chunks + privKey, err := crypto.GenerateSecp256k1Key() + if err != nil { + t.Fatal(err) + } + signer := crypto.NewDefaultSigner(privKey) + + tcs := []struct { + level redundancy.Level + length int + }{ + {0, 1}, + {1, 1}, + {2, 1}, + {3, 1}, + {4, 1}, + {0, 4096}, + {1, 4096}, + {2, 4096}, + {3, 4096}, + {4, 4096}, + } + for _, tc := range tcs { + t.Run(fmt.Sprintf("redundancy:%d, size:%d", tc.level, tc.length), func(t *testing.T) { + buf := make([]byte, tc.length) + if _, err := io.ReadFull(rand.Reader, buf); err != nil { + t.Fatal(err) + } + ctx := context.Background() + ch, err := cac.New(buf) + if err != nil { + t.Fatal(err) + } + // create soc from cac + id := make([]byte, swarm.HashSize) + s := soc.New(id, ch) + sch, err := s.Sign(signer) + if err != nil { + t.Fatal(err) + } + + store := inmemchunkstore.New() + defer store.Close() + session := &putterSession{chunkStore: store} + p := replicas.NewSocPutter(session, tc.level) + + if err := p.Put(ctx, sch); err != nil { + t.Fatalf("expected no error. got %v", err) + } + var addrs []swarm.Address + orig := false + _ = store.Iterate(ctx, func(chunk swarm.Chunk) (stop bool, err error) { + if sch.Address().Equal(chunk.Address()) { + orig = true + return false, nil + } + addrs = append(addrs, chunk.Address()) + return false, nil + }) + if !orig { + t.Fatal("original chunk missing") + } + t.Run("dispersion", func(t *testing.T) { + if err := dispersed(tc.level, ch, addrs); err != nil { + t.Fatalf("addresses are not dispersed: %v", err) + } + }) + t.Run("attempts", func(t *testing.T) { + count := tc.level.GetReplicaCount() + if len(addrs) != count { + t.Fatalf("incorrect number of attempts. want %v, got %v", count, len(addrs)) + } + }) + + t.Run("replication", func(t *testing.T) { + if err := replicated(store, ch, addrs); err != nil { + t.Fatalf("chunks are not replicas: %v", err) + } + }) + }) + } + t.Run("error handling", func(t *testing.T) { + tcs := []struct { + name string + level redundancy.Level + length int + f func(*putterSession) *putterSession + err []error + }{ + {"put errors", 4, 4096, func(tbp *putterSession) *putterSession { + var j int32 + i := &j + atomic.StoreInt32(i, 0) + tbp.putErrors = func(ctx context.Context, _ swarm.Address) error { + j := atomic.AddInt32(i, 1) + if j == 6 { + return errTestA + } + if j == 12 { + return errTestB + } + return nil + } + return tbp + }, []error{errTestA, errTestB}}, + {"put latencies", 4, 4096, func(tbp *putterSession) *putterSession { + var j int32 + i := &j + atomic.StoreInt32(i, 0) + tbp.putErrors = func(ctx context.Context, _ swarm.Address) error { + j := atomic.AddInt32(i, 1) + if j == 6 { + select { + case <-time.After(100 * time.Millisecond): + case <-ctx.Done(): + return ctx.Err() + } + } + if j == 12 { + return errTestA + } + return nil + } + return tbp + }, []error{errTestA, context.DeadlineExceeded}}, + } + for _, tc := range tcs { + t.Run(tc.name, func(t *testing.T) { + buf := make([]byte, tc.length) + if _, err := io.ReadFull(rand.Reader, buf); err != nil { + t.Fatal(err) + } + ctx := context.Background() + ctx, cancel := context.WithTimeout(ctx, 50*time.Millisecond) + defer cancel() + ch, err := cac.New(buf) + if err != nil { + t.Fatal(err) + } + + id := make([]byte, swarm.HashSize) + s := soc.New(id, ch) + sch, err := s.Sign(signer) + if err != nil { + t.Fatal(err) + } + + store := inmemchunkstore.New() + defer store.Close() + p := replicas.NewSocPutter(tc.f(&putterSession{chunkStore: store}), tc.level) + errs := p.Put(ctx, sch) + for _, err := range tc.err { + if !errors.Is(errs, err) { + t.Fatalf("incorrect error. want it to contain %v. got %v.", tc.err, errs) + } + } + }) + } + }) + +} diff --git a/pkg/replicas/replicas_soc.go b/pkg/replicas/replicas_soc.go new file mode 100644 index 00000000000..e5828a2d86b --- /dev/null +++ b/pkg/replicas/replicas_soc.go @@ -0,0 +1,101 @@ +// Copyright 2025 The Swarm Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package replicas implements a scheme to replicate chunks +// in such a way that +// - the replicas are optimally dispersed to aid cross-neighbourhood redundancy +// - the replicas addresses can be deduced by retrievers only knowing the address +// of the original content addressed chunk +// - no new chunk validation rules are introduced +package replicas + +import ( + "math" + + "github.com/ethersphere/bee/v2/pkg/file/redundancy" + "github.com/ethersphere/bee/v2/pkg/swarm" +) + +// socReplicator running the find for replicas +type socReplicator struct { + addr []byte // chunk address + queue [16]*socReplica // to sort addresses according to di + exist [30]bool // maps the 16 distinct nibbles on all levels + sizes [5]int // number of distinct neighbourhoods redcorded for each depth + c chan *socReplica + rLevel redundancy.Level +} + +// newSocReplicator socReplicator constructor +func newSocReplicator(addr swarm.Address, rLevel redundancy.Level) *socReplicator { + rr := &socReplicator{ + addr: addr.Bytes(), + sizes: redundancy.GetReplicaCounts(), + c: make(chan *socReplica, 16), + rLevel: rLevel, + } + go rr.replicas() + return rr +} + +// socReplica of the mined SOC chunk (address) that serve as replicas +type socReplica struct { + addr []byte // byte slice of SOC address + nonce uint8 // byte of the mined nonce +} + +// replicate returns a replica params structure seeded with a byte of entropy as argument +func (rr *socReplicator) replicate(i uint8) (sp *socReplica) { + // calculate SOC replica address for potential replica + h := swarm.NewHasher() + _, _ = h.Write([]byte{i}) + _, _ = h.Write(rr.addr) + return &socReplica{addr: h.Sum(nil), nonce: i} +} + +// replicas enumerates replica parameters (nonce) pushing it in a channel given as argument +// the order of replicas is so that addresses are always maximally dispersed +// in successive sets of addresses. +// I.e., the binary tree representing the new addresses prefix bits up to depth is balanced +func (rr *socReplicator) replicas() { + defer close(rr.c) + n := 0 + for i := uint8(0); n < rr.rLevel.GetReplicaCount() && i < math.MaxUint8; i++ { + // create soc replica (with address and nonce) + // the soc is added to neighbourhoods of depths in the closed interval [from...to] + r := rr.replicate(i) + d, m := rr.add(r, rr.rLevel) + if d == 0 { + continue + } + for m, r = range rr.queue[n:] { + if r == nil { + break + } + rr.c <- r + } + n += m + } +} + +// add inserts the soc replica into a replicator so that addresses are balanced +func (rr *socReplicator) add(r *socReplica, rLevel redundancy.Level) (depth int, rank int) { + if rLevel == redundancy.NONE { + return 0, 0 + } + nh := nh(rLevel, r.addr) + if rr.exist[nh] { + return 0, 0 + } + rr.exist[nh] = true + l, o := rr.add(r, rLevel.Decrement()) + d := uint8(rLevel) - 1 + if l == 0 { + o = rr.sizes[d] + rr.sizes[d]++ + rr.queue[o] = r + l = rLevel.GetReplicaCount() + } + return l, o +} diff --git a/pkg/soc/validator.go b/pkg/soc/validator.go index 06f2fb72e0a..7cc69dc58a1 100644 --- a/pkg/soc/validator.go +++ b/pkg/soc/validator.go @@ -6,6 +6,7 @@ package soc import ( "bytes" + "math" "github.com/ethersphere/bee/v2/pkg/swarm" ) @@ -26,5 +27,21 @@ func Valid(ch swarm.Chunk) bool { if err != nil { return false } - return ch.Address().Equal(address) + defaultSoc := ch.Address().Equal(address) + if !defaultSoc { + // check whether the SOC chunk is a replica + for i := uint8(0); i < math.MaxUint8; i++ { + rAddr, err := hash([]byte{i}, ch.Address().Bytes()) + if err != nil { + return false + } + + if ch.Address().Equal(swarm.NewAddress(rAddr)) { + return true + } + } + } else { + return true + } + return false } From c252d50f78fcf63c7b04f8cbf7e712c98c45618b Mon Sep 17 00:00:00 2001 From: nugaon Date: Thu, 20 Mar 2025 15:56:58 +0100 Subject: [PATCH 02/62] feat: new soc putter session --- pkg/api/soc.go | 2 +- pkg/replicas/putter_soc.go | 29 +++++++++++++++++++++++------ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/pkg/api/soc.go b/pkg/api/soc.go index 534ba47d935..317d9cac786 100644 --- a/pkg/api/soc.go +++ b/pkg/api/soc.go @@ -108,7 +108,7 @@ func (s *Service) socUploadHandler(w http.ResponseWriter, r *http.Request) { }) basePutter = putter if rLevel != redundancy.NONE { - putter = replicas.NewSocPutter(putter, rLevel) + putter = replicas.NewSocPutterSession(putter, rLevel) } } if err != nil { diff --git a/pkg/replicas/putter_soc.go b/pkg/replicas/putter_soc.go index 13cfe026094..aa7dbe90f0f 100644 --- a/pkg/replicas/putter_soc.go +++ b/pkg/replicas/putter_soc.go @@ -12,6 +12,7 @@ import ( "sync" "github.com/ethersphere/bee/v2/pkg/file/redundancy" + "github.com/ethersphere/bee/v2/pkg/storage" "github.com/ethersphere/bee/v2/pkg/storer" "github.com/ethersphere/bee/v2/pkg/swarm" ) @@ -19,12 +20,12 @@ import ( // socPutter is the private implementation of the public storage.Putter interface // socPutter extends the original putter to a concurrent multiputter type socPutter struct { - putter storer.PutterSession + putter storage.Putter rLevel redundancy.Level } // NewSocPutter is the putter constructor -func NewSocPutter(p storer.PutterSession, rLevel redundancy.Level) storer.PutterSession { +func NewSocPutter(p storage.Putter, rLevel redundancy.Level) storage.Putter { return &socPutter{ putter: p, rLevel: rLevel, @@ -65,10 +66,26 @@ func (p *socPutter) Put(ctx context.Context, ch swarm.Chunk) (err error) { return errors.Join(errs...) } -func (p *socPutter) Cleanup() error { - return p.putter.Cleanup() +// socPutterSession extends the original socPutter +type socPutterSession struct { + socPutter + ps storer.PutterSession } -func (p *socPutter) Done(addr swarm.Address) error { - return p.putter.Done(addr) +// NewSocPutter is the putterSession constructor +func NewSocPutterSession(p storer.PutterSession, rLevel redundancy.Level) storer.PutterSession { + return &socPutterSession{ + socPutter{ + putter: p, + rLevel: rLevel, + }, p, + } +} + +func (p *socPutterSession) Cleanup() error { + return p.ps.Cleanup() +} + +func (p *socPutterSession) Done(addr swarm.Address) error { + return p.ps.Done(addr) } From 2f4214543ef877b0aaf2dc2f76d85fc0abe5792e Mon Sep 17 00:00:00 2001 From: nugaon Date: Thu, 5 Jun 2025 12:35:52 +0200 Subject: [PATCH 03/62] refactor: lint issue --- pkg/api/soc.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/api/soc.go b/pkg/api/soc.go index 317d9cac786..9a768c0fe95 100644 --- a/pkg/api/soc.go +++ b/pkg/api/soc.go @@ -67,8 +67,8 @@ func (s *Service) socUploadHandler(w http.ResponseWriter, r *http.Request) { } var ( - basePutter storer.PutterSession - putter storer.PutterSession + basePutter storer.PutterSession // the putter used to store regular chunks + putter storer.PutterSession // the putter used to store SOC replica chunks err error ) @@ -83,7 +83,6 @@ func (s *Service) socUploadHandler(w http.ResponseWriter, r *http.Request) { jsonhttp.BadRequest(w, "redundancy level is not supported with stamp signature") return } - rLevel = redundancy.NONE stamp := postage.Stamp{} if err := stamp.UnmarshalBinary(headers.StampSig); err != nil { errorMsg := "Stamp deserialization failure" @@ -99,6 +98,7 @@ func (s *Service) socUploadHandler(w http.ResponseWriter, r *http.Request) { Pin: false, Deferred: false, }, &stamp) + basePutter = putter } else { putter, err = s.newStamperPutter(r.Context(), putterOptions{ BatchID: headers.BatchID, From 19971d1112b6c5c87407b8027cc076814ecf4402 Mon Sep 17 00:00:00 2001 From: nugaon Date: Thu, 5 Jun 2025 13:49:57 +0200 Subject: [PATCH 04/62] test: outline race condition --- pkg/replicas/putter_soc_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/replicas/putter_soc_test.go b/pkg/replicas/putter_soc_test.go index 8ee1dede4f1..6b714fc6ba7 100644 --- a/pkg/replicas/putter_soc_test.go +++ b/pkg/replicas/putter_soc_test.go @@ -150,6 +150,7 @@ func TestSocPutter(t *testing.T) { atomic.StoreInt32(i, 0) tbp.putErrors = func(ctx context.Context, _ swarm.Address) error { j := atomic.AddInt32(i, 1) + <-time.After(10 * time.Millisecond) if j == 6 { return errTestA } From 2ff200cb46da35ffb72d4f0f4d560f07913dcf56 Mon Sep 17 00:00:00 2001 From: nugaon Date: Thu, 5 Jun 2025 14:24:01 +0200 Subject: [PATCH 05/62] fix: randomize ids at soc generation --- pkg/replicas/putter_soc_test.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pkg/replicas/putter_soc_test.go b/pkg/replicas/putter_soc_test.go index 6b714fc6ba7..ff7d0b40cd4 100644 --- a/pkg/replicas/putter_soc_test.go +++ b/pkg/replicas/putter_soc_test.go @@ -90,6 +90,9 @@ func TestSocPutter(t *testing.T) { } // create soc from cac id := make([]byte, swarm.HashSize) + if _, err := rand.Read(id); err != nil { + t.Fatal(err) + } s := soc.New(id, ch) sch, err := s.Sign(signer) if err != nil { @@ -197,6 +200,9 @@ func TestSocPutter(t *testing.T) { } id := make([]byte, swarm.HashSize) + if _, err := rand.Read(id); err != nil { + t.Fatal(err) + } s := soc.New(id, ch) sch, err := s.Sign(signer) if err != nil { From 40c5c17cc810d50ad35b1554a4ebd684baf90084 Mon Sep 17 00:00:00 2001 From: nugaon Date: Fri, 6 Jun 2025 11:14:25 +0200 Subject: [PATCH 06/62] fix: data race in putter soc --- pkg/replicas/putter_soc.go | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pkg/replicas/putter_soc.go b/pkg/replicas/putter_soc.go index aa7dbe90f0f..1762d14a2c5 100644 --- a/pkg/replicas/putter_soc.go +++ b/pkg/replicas/putter_soc.go @@ -33,10 +33,10 @@ func NewSocPutter(p storage.Putter, rLevel redundancy.Level) storage.Putter { } // Put makes the putter satisfy the storage.Putter interface -func (p *socPutter) Put(ctx context.Context, ch swarm.Chunk) (err error) { +func (p *socPutter) Put(ctx context.Context, ch swarm.Chunk) error { errs := []error{} // Put base chunk first - if err = p.putter.Put(ctx, ch); err != nil { + if err := p.putter.Put(ctx, ch); err != nil { return err } if p.rLevel == 0 { @@ -51,9 +51,7 @@ func (p *socPutter) Put(ctx context.Context, ch swarm.Chunk) (err error) { go func() { defer wg.Done() sch := swarm.NewChunk(swarm.NewAddress(r.addr), ch.Data()) - if err == nil { - err = p.putter.Put(ctx, sch) - } + err := p.putter.Put(ctx, sch) errc <- err }() } From b982b2080b6550c83d5bca89bd0bdf74d38936f7 Mon Sep 17 00:00:00 2001 From: nugaon Date: Fri, 6 Jun 2025 16:54:43 +0200 Subject: [PATCH 07/62] test: soc api --- pkg/api/soc_test.go | 75 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/pkg/api/soc_test.go b/pkg/api/soc_test.go index fb34eb82297..3f4c2291fdd 100644 --- a/pkg/api/soc_test.go +++ b/pkg/api/soc_test.go @@ -22,6 +22,7 @@ import ( testingpostage "github.com/ethersphere/bee/v2/pkg/postage/testing" testingsoc "github.com/ethersphere/bee/v2/pkg/soc/testing" "github.com/ethersphere/bee/v2/pkg/spinlock" + "github.com/ethersphere/bee/v2/pkg/storage" mockstorer "github.com/ethersphere/bee/v2/pkg/storer/mock" "github.com/ethersphere/bee/v2/pkg/swarm" ) @@ -209,3 +210,77 @@ func TestSOC(t *testing.T) { }) }) } + +// Verify that replicas provide fault tolerance +func TestSOCWithRedundancy(t *testing.T) { + + testWithRedundancy := func(t *testing.T, redundancyLevel int) { + t.Run(fmt.Sprintf("redundancy=%d", redundancyLevel), func(t *testing.T) { + testData := []byte(fmt.Sprintf("redundant-soc-data-%d", redundancyLevel)) + + mockStorer := mockstorer.New() + client, _, _, chanStore := newTestServer(t, testServerOptions{ + Storer: mockStorer, + Post: newTestPostService(), + DirectUpload: true, + }) + + soc := testingsoc.GenerateMockSOC(t, testData) + + chanStore.Subscribe(func(ch swarm.Chunk) { + err := mockStorer.Put(context.Background(), ch) + if err != nil { + t.Fatal(err) + } + }) + + jsonhttptest.Request(t, client, http.MethodPost, + fmt.Sprintf("/soc/%s/%s?sig=%s", + hex.EncodeToString(soc.Owner), + hex.EncodeToString(soc.ID), + hex.EncodeToString(soc.Signature)), + http.StatusCreated, + jsonhttptest.WithRequestHeader(api.SwarmPostageBatchIdHeader, batchOkStr), + jsonhttptest.WithRequestHeader(api.SwarmRedundancyLevelHeader, fmt.Sprintf("%d", redundancyLevel)), + jsonhttptest.WithRequestBody(bytes.NewReader(soc.WrappedChunk.Data())), + jsonhttptest.WithExpectedJSONResponse(api.SocPostResponse{ + Reference: soc.Address(), + }), + ) + + // Wait for replicas to be created in background + time.Sleep(100 * time.Millisecond) + + originalAddress := soc.Address() + + // Delete the original chunk to trigger dispersed retrieval + cs, ok := mockStorer.ChunkStore().(storage.ChunkStore) + if !ok { + t.Fatal("Could not access underlying ChunkStore with Delete method") + } + + err := cs.Delete(context.Background(), originalAddress) + if err != nil { + t.Fatalf("Failed to delete the original chunk: %v", err) + } + + // Try to retrieve the SOC after deletion + if redundancyLevel > 0 { + jsonhttptest.Request(t, client, http.MethodGet, + fmt.Sprintf("/soc/%s/%s", hex.EncodeToString(soc.Owner), hex.EncodeToString(soc.ID)), + http.StatusOK, + jsonhttptest.WithExpectedResponse(soc.WrappedChunk.Data()[swarm.SpanSize:]), + jsonhttptest.WithExpectedContentLength(len(soc.WrappedChunk.Data()[swarm.SpanSize:])), + ) + } else { + jsonhttptest.Request(t, client, http.MethodGet, + fmt.Sprintf("/soc/%s/%s", hex.EncodeToString(soc.Owner), hex.EncodeToString(soc.ID)), + http.StatusNotFound, + ) + } + }) + } + + testWithRedundancy(t, 0) + testWithRedundancy(t, 2) +} From 61d8a8d0130a20574c505d941f04543efec3c91a Mon Sep 17 00:00:00 2001 From: nugaon Date: Fri, 6 Jun 2025 16:55:53 +0200 Subject: [PATCH 08/62] fix: same replica address logic as in validity --- pkg/replicas/replicas_soc.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/replicas/replicas_soc.go b/pkg/replicas/replicas_soc.go index e5828a2d86b..f4f20475393 100644 --- a/pkg/replicas/replicas_soc.go +++ b/pkg/replicas/replicas_soc.go @@ -48,10 +48,10 @@ type socReplica struct { // replicate returns a replica params structure seeded with a byte of entropy as argument func (rr *socReplicator) replicate(i uint8) (sp *socReplica) { // calculate SOC replica address for potential replica - h := swarm.NewHasher() - _, _ = h.Write([]byte{i}) - _, _ = h.Write(rr.addr) - return &socReplica{addr: h.Sum(nil), nonce: i} + addr := make([]byte, 32) + copy(addr, rr.addr) + addr[0] = i + return &socReplica{addr: addr, nonce: i} } // replicas enumerates replica parameters (nonce) pushing it in a channel given as argument From 7ec15cb05555ad8cbb44168da1141f8e93fdfddf Mon Sep 17 00:00:00 2001 From: nugaon Date: Wed, 11 Jun 2025 11:58:02 +0200 Subject: [PATCH 09/62] fix: lint issue --- pkg/api/soc_test.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/api/soc_test.go b/pkg/api/soc_test.go index 3f4c2291fdd..2b87c572a51 100644 --- a/pkg/api/soc_test.go +++ b/pkg/api/soc_test.go @@ -215,6 +215,8 @@ func TestSOC(t *testing.T) { func TestSOCWithRedundancy(t *testing.T) { testWithRedundancy := func(t *testing.T, redundancyLevel int) { + t.Helper() + t.Run(fmt.Sprintf("redundancy=%d", redundancyLevel), func(t *testing.T) { testData := []byte(fmt.Sprintf("redundant-soc-data-%d", redundancyLevel)) From ec191e7cf44179007cbce70c56d8f9b6ed1d10ec Mon Sep 17 00:00:00 2001 From: nugaon Date: Wed, 11 Jun 2025 17:13:18 +0200 Subject: [PATCH 10/62] chore: comments --- pkg/api/soc.go | 2 +- pkg/replicas/putter_soc.go | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/api/soc.go b/pkg/api/soc.go index 9a768c0fe95..4c1b06eb291 100644 --- a/pkg/api/soc.go +++ b/pkg/api/soc.go @@ -72,7 +72,7 @@ func (s *Service) socUploadHandler(w http.ResponseWriter, r *http.Request) { err error ) - rLevel := redundancy.DefaultLevel + rLevel := redundancy.DefaultLevel //TODO: base level should be zero instead if headers.RLevel != nil { rLevel = *headers.RLevel } diff --git a/pkg/replicas/putter_soc.go b/pkg/replicas/putter_soc.go index 1762d14a2c5..5db497c0d8b 100644 --- a/pkg/replicas/putter_soc.go +++ b/pkg/replicas/putter_soc.go @@ -50,6 +50,7 @@ func (p *socPutter) Put(ctx context.Context, ch swarm.Chunk) error { wg.Add(1) go func() { defer wg.Done() + // create a new chunk with the replica address sch := swarm.NewChunk(swarm.NewAddress(r.addr), ch.Data()) err := p.putter.Put(ctx, sch) errc <- err From eafef3924b7ceccec2196b71e618c30365526569 Mon Sep 17 00:00:00 2001 From: nugaon Date: Thu, 12 Jun 2025 08:35:32 +0200 Subject: [PATCH 11/62] fix: gsoc parallel upload --- pkg/api/soc.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pkg/api/soc.go b/pkg/api/soc.go index 4c1b06eb291..88f2c87e81c 100644 --- a/pkg/api/soc.go +++ b/pkg/api/soc.go @@ -220,7 +220,12 @@ func (s *Service) socUploadHandler(w http.ResponseWriter, r *http.Request) { } } - err = putter.Done(sch.Address()) + // do not pass sch.Address() since it causes error on parallel GSOC uploads + // in case of deferred upload + // pkg/storer/internal/pinning/pinning.go:collectionPutter.Close -> throws error if pin true but that is not a valid use-case at SOC upload + // pkg/storer/internal/upload/uploadstore.go:uploadPutter.Close -> updates tagID, and the address would be set along with it -> not necessary + // in case of directupload it only waits for the waitgroup for chunk upload and do not use swarm address + err = putter.Done(swarm.Address{}) if err != nil { logger.Debug("done split failed", "error", err) logger.Error(nil, "done split failed") From 2fd1e12e07bea9dedb4a93dcd4ce757cfefccb78 Mon Sep 17 00:00:00 2001 From: nugaon Date: Thu, 12 Jun 2025 10:06:03 +0200 Subject: [PATCH 12/62] chore: print out error message for integration test --- pkg/api/soc.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/api/soc.go b/pkg/api/soc.go index 88f2c87e81c..04f410c4a33 100644 --- a/pkg/api/soc.go +++ b/pkg/api/soc.go @@ -8,6 +8,7 @@ import ( "bytes" "encoding/hex" "errors" + "fmt" "io" "net/http" "strconv" @@ -229,7 +230,7 @@ func (s *Service) socUploadHandler(w http.ResponseWriter, r *http.Request) { if err != nil { logger.Debug("done split failed", "error", err) logger.Error(nil, "done split failed") - jsonhttp.InternalServerError(ow, "done split failed") + jsonhttp.InternalServerError(ow, fmt.Sprintf("done split failed: %v", err)) // TODO: put it back after fixing parallel upload issue return } if headers.Act { From 8acb9907f51f40d04e611d22f82fa75ca86f9baa Mon Sep 17 00:00:00 2001 From: nugaon Date: Thu, 12 Jun 2025 11:16:26 +0200 Subject: [PATCH 13/62] fix: parallel gsoc upload --- pkg/api/soc.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pkg/api/soc.go b/pkg/api/soc.go index 04f410c4a33..0975a4ce560 100644 --- a/pkg/api/soc.go +++ b/pkg/api/soc.go @@ -73,10 +73,11 @@ func (s *Service) socUploadHandler(w http.ResponseWriter, r *http.Request) { err error ) - rLevel := redundancy.DefaultLevel //TODO: base level should be zero instead - if headers.RLevel != nil { - rLevel = *headers.RLevel - } + // if rLevel > 0 then it can cause error on parallel upload with a great chance + // because of multiple writes on the same postage index + // https://github.com/ethersphere/bee/actions/runs/15605098232/job/43952677866?pr=5057 + // the solution would be either ignoring the error for dispersed replicas or do sequential upload + rLevel := *headers.RLevel if len(headers.StampSig) != 0 { if headers.RLevel != nil { From 8ff9e533064175badb25d585169831dd2a41a89b Mon Sep 17 00:00:00 2001 From: nugaon Date: Thu, 12 Jun 2025 11:50:04 +0200 Subject: [PATCH 14/62] fix: nil reference on redundancy header --- pkg/api/soc.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pkg/api/soc.go b/pkg/api/soc.go index 0975a4ce560..4086f9a0e33 100644 --- a/pkg/api/soc.go +++ b/pkg/api/soc.go @@ -77,7 +77,12 @@ func (s *Service) socUploadHandler(w http.ResponseWriter, r *http.Request) { // because of multiple writes on the same postage index // https://github.com/ethersphere/bee/actions/runs/15605098232/job/43952677866?pr=5057 // the solution would be either ignoring the error for dispersed replicas or do sequential upload - rLevel := *headers.RLevel + var rLevel redundancy.Level + if headers.RLevel != nil { + rLevel = *headers.RLevel + } else { + rLevel = 0 // default redundancy level if header is missing + } if len(headers.StampSig) != 0 { if headers.RLevel != nil { From 1f5011c150360628f34317696d81b5b16cd54f54 Mon Sep 17 00:00:00 2001 From: nugaon Date: Fri, 13 Jun 2025 10:13:11 +0200 Subject: [PATCH 15/62] fix: goroutines access loop variable --- pkg/replicas/putter_soc.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/replicas/putter_soc.go b/pkg/replicas/putter_soc.go index 5db497c0d8b..1182bd4f5c1 100644 --- a/pkg/replicas/putter_soc.go +++ b/pkg/replicas/putter_soc.go @@ -48,13 +48,13 @@ func (p *socPutter) Put(ctx context.Context, ch swarm.Chunk) error { wg := sync.WaitGroup{} for r := range rr.c { wg.Add(1) - go func() { + go func(r *socReplica) { defer wg.Done() // create a new chunk with the replica address sch := swarm.NewChunk(swarm.NewAddress(r.addr), ch.Data()) err := p.putter.Put(ctx, sch) errc <- err - }() + }(r) } wg.Wait() From 20671a4ed8d10db3cbe0d2ca42222b0b5eeafc6e Mon Sep 17 00:00:00 2001 From: nugaon Date: Fri, 13 Jun 2025 16:42:16 +0200 Subject: [PATCH 16/62] feat: feedfactory integration on bzz and feed --- pkg/api/bzz.go | 9 ++++++--- pkg/api/feed.go | 30 ++++++++++++++++++++---------- pkg/feeds/factory/factory.go | 11 ++++++++--- pkg/feeds/feed.go | 2 +- pkg/feeds/getter.go | 1 + pkg/node/bootstrap.go | 2 +- 6 files changed, 37 insertions(+), 18 deletions(-) diff --git a/pkg/api/bzz.go b/pkg/api/bzz.go index c05a73d177c..3c6562cf784 100644 --- a/pkg/api/bzz.go +++ b/pkg/api/bzz.go @@ -32,6 +32,7 @@ import ( "github.com/ethersphere/bee/v2/pkg/log" "github.com/ethersphere/bee/v2/pkg/manifest" "github.com/ethersphere/bee/v2/pkg/postage" + "github.com/ethersphere/bee/v2/pkg/replicas" "github.com/ethersphere/bee/v2/pkg/storage" "github.com/ethersphere/bee/v2/pkg/storer" "github.com/ethersphere/bee/v2/pkg/swarm" @@ -390,7 +391,8 @@ func (s *Service) serveReference(logger log.Logger, address swarm.Address, pathV } ctx := r.Context() - ls := loadsave.NewReadonly(s.storer.Download(cache), s.storer.Cache(), redundancy.DefaultLevel) + g := s.storer.Download(cache) + ls := loadsave.NewReadonly(g, s.storer.Cache(), rLevel) feedDereferenced := false ctx, err := getter.SetConfigInContext(ctx, headers.Strategy, headers.FallbackMode, headers.ChunkRetrievalTimeout, logger) @@ -418,7 +420,7 @@ FETCH: // unmarshal as mantaray first and possibly resolve the feed, otherwise // go on normally. if !feedDereferenced { - if l, err := s.manifestFeed(ctx, m); err == nil { + if l, err := s.manifestFeed(ctx, m, replicas.NewSocGetter(g, rLevel)); err == nil { // we have a feed manifest here ch, cur, _, err := l.At(ctx, time.Now().Unix(), 0) if err != nil { @@ -673,6 +675,7 @@ func manifestMetadataLoad( func (s *Service) manifestFeed( ctx context.Context, m manifest.Interface, + st storage.Getter, ) (feeds.Lookup, error) { e, err := m.Lookup(ctx, "/") if err != nil { @@ -705,5 +708,5 @@ func (s *Service) manifestFeed( return nil, fmt.Errorf("node lookup: %s", "feed metadata absent") } f := feeds.New(topic, common.BytesToAddress(owner)) - return s.feedFactory.NewLookup(*t, f) + return s.feedFactory.NewLookup(*t, f, st) } diff --git a/pkg/api/feed.go b/pkg/api/feed.go index a49992e6213..132adaa2fc8 100644 --- a/pkg/api/feed.go +++ b/pkg/api/feed.go @@ -23,6 +23,7 @@ import ( "github.com/ethersphere/bee/v2/pkg/manifest/mantaray" "github.com/ethersphere/bee/v2/pkg/manifest/simple" "github.com/ethersphere/bee/v2/pkg/postage" + "github.com/ethersphere/bee/v2/pkg/replicas" "github.com/ethersphere/bee/v2/pkg/soc" "github.com/ethersphere/bee/v2/pkg/storage" "github.com/ethersphere/bee/v2/pkg/storer" @@ -65,16 +66,22 @@ func (s *Service) feedGetHandler(w http.ResponseWriter, r *http.Request) { } headers := struct { - OnlyRootChunk bool `map:"Swarm-Only-Root-Chunk"` - LegacyFeedResolve bool `map:"Swarm-Feed-Legacy-Resolve"` + OnlyRootChunk bool `map:"Swarm-Only-Root-Chunk"` + LegacyFeedResolve bool `map:"Swarm-Feed-Legacy-Resolve"` + RedundancyLevel redundancy.Level `map:"Swarm-Redundancy-Level"` }{} if response := s.mapStructure(r.Header, &headers); response != nil { response("invalid header params", logger, w) return } + getter := s.storer.Download(false) + if headers.RedundancyLevel > redundancy.NONE { + getter = replicas.NewSocGetter(getter, headers.RedundancyLevel) + } + f := feeds.New(paths.Topic, paths.Owner) - lookup, err := s.feedFactory.NewLookup(feeds.Sequence, f) + lookup, err := s.feedFactory.NewLookup(feeds.Sequence, f, getter) if err != nil { logger.Debug("new lookup failed", "owner", paths.Owner, "error", err) logger.Error(nil, "new lookup failed") @@ -103,7 +110,7 @@ func (s *Service) feedGetHandler(w http.ResponseWriter, r *http.Request) { return } - wc, err := feeds.GetWrappedChunk(r.Context(), s.storer.Download(false), ch, headers.LegacyFeedResolve) + wc, err := feeds.GetWrappedChunk(r.Context(), getter, ch, headers.LegacyFeedResolve) if err != nil { logger.Error(nil, "wrapped chunk cannot be retrieved") jsonhttp.NotFound(w, "wrapped chunk cannot be retrieved") @@ -170,11 +177,12 @@ func (s *Service) feedPostHandler(w http.ResponseWriter, r *http.Request) { } headers := struct { - BatchID []byte `map:"Swarm-Postage-Batch-Id" validate:"required"` - Pin bool `map:"Swarm-Pin"` - Deferred *bool `map:"Swarm-Deferred-Upload"` - Act bool `map:"Swarm-Act"` - HistoryAddress swarm.Address `map:"Swarm-Act-History-Address"` + BatchID []byte `map:"Swarm-Postage-Batch-Id" validate:"required"` + Pin bool `map:"Swarm-Pin"` + Deferred *bool `map:"Swarm-Deferred-Upload"` + Act bool `map:"Swarm-Act"` + HistoryAddress swarm.Address `map:"Swarm-Act-History-Address"` + RedundancyLevel redundancy.Level `map:"Swarm-Redundancy-Level"` }{} if response := s.mapStructure(r.Header, &headers); response != nil { response("invalid header params", logger, w) @@ -231,7 +239,9 @@ func (s *Service) feedPostHandler(w http.ResponseWriter, r *http.Request) { logger: logger, } - l := loadsave.New(s.storer.ChunkStore(), s.storer.Cache(), requestPipelineFactory(r.Context(), putter, false, 0), redundancy.DefaultLevel) + rLevel := headers.RedundancyLevel + + l := loadsave.New(s.storer.ChunkStore(), s.storer.Cache(), requestPipelineFactory(r.Context(), putter, false, 0), rLevel) feedManifest, err := manifest.NewDefaultManifest(l, false) if err != nil { logger.Debug("create manifest failed", "error", err) diff --git a/pkg/feeds/factory/factory.go b/pkg/feeds/factory/factory.go index 1d555416407..daf82950057 100644 --- a/pkg/feeds/factory/factory.go +++ b/pkg/feeds/factory/factory.go @@ -19,12 +19,17 @@ func New(getter storage.Getter) feeds.Factory { return &factory{getter} } -func (f *factory) NewLookup(t feeds.Type, feed *feeds.Feed) (feeds.Lookup, error) { +func (f *factory) NewLookup(t feeds.Type, feed *feeds.Feed, specialGetter storage.Getter) (feeds.Lookup, error) { + getter := f.Getter + if specialGetter != nil { + getter = specialGetter + } + switch t { case feeds.Sequence: - return sequence.NewAsyncFinder(f.Getter, feed), nil + return sequence.NewAsyncFinder(getter, feed), nil case feeds.Epoch: - return epochs.NewAsyncFinder(f.Getter, feed), nil + return epochs.NewAsyncFinder(getter, feed), nil } return nil, feeds.ErrFeedTypeNotFound diff --git a/pkg/feeds/feed.go b/pkg/feeds/feed.go index ac8d232f5ce..31eca88caa3 100644 --- a/pkg/feeds/feed.go +++ b/pkg/feeds/feed.go @@ -25,7 +25,7 @@ var ErrFeedTypeNotFound = errors.New("no such feed type") // Factory creates feed lookups for different types of feeds. type Factory interface { - NewLookup(Type, *Feed) (Lookup, error) + NewLookup(Type, *Feed, storage.Getter) (Lookup, error) } // Type enumerates the time-based feed types diff --git a/pkg/feeds/getter.go b/pkg/feeds/getter.go index f8bf000932f..ae30a555c26 100644 --- a/pkg/feeds/getter.go +++ b/pkg/feeds/getter.go @@ -59,6 +59,7 @@ func GetWrappedChunk(ctx context.Context, getter storage.Getter, ch swarm.Chunk, // possible values right now: // unencrypted ref: span+timestamp+ref => 8+8+32=48 // encrypted ref: span+timestamp+ref+decryptKey => 8+8+64=80 + // legacy soc does not need special getter for replicas if legacyResolve { ref, err := legacyPayload(wc) if err != nil { diff --git a/pkg/node/bootstrap.go b/pkg/node/bootstrap.go index 4ce709b16d9..ce7346dec93 100644 --- a/pkg/node/bootstrap.go +++ b/pkg/node/bootstrap.go @@ -326,7 +326,7 @@ func getLatestSnapshot( } f := feeds.New(topic, common.BytesToAddress(owner)) - l, err := feedFactory.NewLookup(*t, f) + l, err := feedFactory.NewLookup(*t, f, nil) if err != nil { return nil, fmt.Errorf("feed lookup failed: %w", err) } From e41cb552eed5efc477ecc5601f764382ba4be2d1 Mon Sep 17 00:00:00 2001 From: nugaon Date: Fri, 13 Jun 2025 16:44:19 +0200 Subject: [PATCH 17/62] test: feed api for red --- pkg/api/feed_test.go | 110 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 109 insertions(+), 1 deletion(-) diff --git a/pkg/api/feed_test.go b/pkg/api/feed_test.go index b60d726f637..3cbcafad6ca 100644 --- a/pkg/api/feed_test.go +++ b/pkg/api/feed_test.go @@ -16,6 +16,9 @@ import ( "net/http" "testing" + "github.com/ethersphere/bee/v2/pkg/replicas" + "github.com/ethersphere/bee/v2/pkg/storage" + "github.com/ethersphere/bee/v2/pkg/api" "github.com/ethersphere/bee/v2/pkg/feeds" "github.com/ethersphere/bee/v2/pkg/file/loadsave" @@ -322,6 +325,111 @@ func TestFeedDirectUpload(t *testing.T) { ) } +// redundancyMockLookup is a specialized mockLookup that uses redundancy SOC getter +type redundancyMockLookup struct { + redundancyLevel redundancy.Level + getter storage.Getter + lastChunkFn func() (swarm.Chunk, feeds.Index, feeds.Index) // Hook to get the latest chunk, index, nextIndex +} + +func newRedundancyMockLookup(rLevel redundancy.Level, getter storage.Getter, lastChunkFn func() (swarm.Chunk, feeds.Index, feeds.Index)) *redundancyMockLookup { + return &redundancyMockLookup{ + redundancyLevel: rLevel, + getter: getter, + lastChunkFn: lastChunkFn, + } +} + +// At overrides mockLookup.At to use redundancy SOC getter +func (l *redundancyMockLookup) At(ctx context.Context, at int64, after uint64) (swarm.Chunk, feeds.Index, feeds.Index, error) { + chunk, cur, next := l.lastChunkFn() + + // Create redundancy SOC getter if redundancy level is set + if l.redundancyLevel > 0 { + redGetter := replicas.NewSocGetter(l.getter, l.redundancyLevel) + + // Try to get the chunk with redundancy + redChunk, err := redGetter.Get(ctx, chunk.Address()) + if err == nil { + // Use the chunk retrieved with redundancy + return redChunk, cur, next, nil + } + } + + return chunk, cur, next, nil +} + +// TestFeedAPIWithRedundancy tests the feed API with SOC redundancy +func TestFeedAPIWithRedundancy(t *testing.T) { + t.Parallel() + + var ( + redundancyLevel = redundancy.PARANOID // Use highest redundancy level + topic = swarm.RandAddress(t) + mockStorer = mockstorer.New() + feedData = []byte("feed redundancy test data") + ) + socChunk := testingsoc.GenerateMockSOC(t, feedData) + + // Variables to track the last chunk, index, and next index + var ( + lastChunk swarm.Chunk + lastIndex feeds.Index + lastNext feeds.Index + ) + + // Provide a hook function to return the latest chunk, index, and next index + lastChunkFn := func() (swarm.Chunk, feeds.Index, feeds.Index) { + return lastChunk, lastIndex, lastNext + } + lastChunk = socChunk.Chunk() + lastIndex = &id{} + lastNext = &id{} + + // Create redundancy-aware lookup that wraps our lookup + redLookup := newRedundancyMockLookup(redundancyLevel, mockStorer.ChunkStore(), lastChunkFn) + factory := newMockFactory(redLookup) + + // Create test server with our custom setup + mp := mockpost.New(mockpost.WithIssuer(postage.NewStampIssuer("", "", batchOk, big.NewInt(3), 11, 10, 1000, true))) + client, _, _, _ := newTestServer(t, testServerOptions{ + Storer: mockStorer, + Post: mp, + Feeds: factory, + }) + + socPutter := replicas.NewSocPutter(mockStorer, redundancyLevel) + + ctx := context.Background() + err := socPutter.Put(ctx, socChunk.Chunk()) + if err != nil { + t.Fatalf("failed to put SOC chunk with redundancy: %v", err) + } + + // Get access to the underlying chunk store + cs, ok := mockStorer.ChunkStore().(storage.ChunkStore) + if !ok { + t.Fatal("Could not access underlying ChunkStore with Delete method") + } + + // Delete the original SOC chunk by using the address tracked in lastSOCAddress + // or use socChunk.Address() directly + err = cs.Delete(context.Background(), socChunk.Address()) + if err != nil { + t.Fatalf("Failed to delete original SOC chunk: %v", err) + } + + feedResource := fmt.Sprintf("/feeds/%s/%s", ownerString, topic) + + // Try to retrieve the feed content with redundancy + jsonhttptest.Request(t, client, http.MethodGet, + feedResource, + http.StatusOK, + jsonhttptest.WithRequestHeader(api.SwarmRedundancyLevelHeader, fmt.Sprintf("%d", redundancyLevel)), + jsonhttptest.WithExpectedResponse(feedData), + ) +} + type factoryMock struct { sequenceCalled bool epochCalled bool @@ -333,7 +441,7 @@ func newMockFactory(mockLookup feeds.Lookup) *factoryMock { return &factoryMock{lookup: mockLookup} } -func (f *factoryMock) NewLookup(t feeds.Type, feed *feeds.Feed) (feeds.Lookup, error) { +func (f *factoryMock) NewLookup(t feeds.Type, feed *feeds.Feed, getter storage.Getter) (feeds.Lookup, error) { switch t { case feeds.Sequence: f.sequenceCalled = true From 7199f6a3f154bab37bd0b01f7b45531b6097c464 Mon Sep 17 00:00:00 2001 From: nugaon Date: Tue, 17 Jun 2025 14:56:07 +0200 Subject: [PATCH 18/62] test: bzz api --- pkg/api/bzz_test.go | 184 +++++++++++++++++++++++++++++++++---------- pkg/api/feed_test.go | 19 ++--- 2 files changed, 151 insertions(+), 52 deletions(-) diff --git a/pkg/api/bzz_test.go b/pkg/api/bzz_test.go index bb6f7410fda..370ca97e2ef 100644 --- a/pkg/api/bzz_test.go +++ b/pkg/api/bzz_test.go @@ -7,6 +7,7 @@ package api_test import ( "bytes" "context" + "encoding/hex" "errors" "fmt" "io" @@ -18,6 +19,7 @@ import ( "testing" "github.com/ethersphere/bee/v2/pkg/api" + "github.com/ethersphere/bee/v2/pkg/feeds" "github.com/ethersphere/bee/v2/pkg/file/loadsave" "github.com/ethersphere/bee/v2/pkg/file/redundancy" "github.com/ethersphere/bee/v2/pkg/jsonhttp" @@ -26,7 +28,10 @@ import ( "github.com/ethersphere/bee/v2/pkg/manifest" mockbatchstore "github.com/ethersphere/bee/v2/pkg/postage/batchstore/mock" mockpost "github.com/ethersphere/bee/v2/pkg/postage/mock" + "github.com/ethersphere/bee/v2/pkg/replicas" + "github.com/ethersphere/bee/v2/pkg/soc" testingsoc "github.com/ethersphere/bee/v2/pkg/soc/testing" + "github.com/ethersphere/bee/v2/pkg/storage" "github.com/ethersphere/bee/v2/pkg/storage/inmemchunkstore" mockstorer "github.com/ethersphere/bee/v2/pkg/storer/mock" "github.com/ethersphere/bee/v2/pkg/swarm" @@ -809,49 +814,146 @@ func TestFeedIndirection(t *testing.T) { // called from the bzz endpoint. then call the bzz endpoint with // the pregenerated feed root manifest hash - var ( - look = newMockLookup(-1, 0, socRootCh, nil, &id{}, nil) - factory = newMockFactory(look) - bzzDownloadResource = func(addr, path string) string { return "/bzz/" + addr + "/" + path } - ctx = context.Background() - ) - client, _, _, _ = newTestServer(t, testServerOptions{ - Storer: storer, - Logger: logger, - Feeds: factory, + t.Run("feed wrapping", func(t *testing.T) { + var ( + look = newMockLookup(-1, 0, socRootCh, nil, &id{}, nil) + factory = newMockFactory(look) + bzzDownloadResource = func(addr, path string) string { return "/bzz/" + addr + "/" + path } + ctx = context.Background() + ) + client, _, _, _ = newTestServer(t, testServerOptions{ + Storer: storer, + Logger: logger, + Feeds: factory, + }) + if err != nil { + t.Fatal(err) + } + m, err := manifest.NewDefaultManifest( + loadsave.New(storer.ChunkStore(), storer.Cache(), pipelineFactory(storer.Cache(), false, 0), redundancy.DefaultLevel), + false, + ) + if err != nil { + t.Fatal(err) + } + emptyAddr := make([]byte, 32) + err = m.Add(ctx, manifest.RootPath, manifest.NewEntry(swarm.NewAddress(emptyAddr), map[string]string{ + api.FeedMetadataEntryOwner: "8d3766440f0d7b949a5e32995d09619a7f86e632", + api.FeedMetadataEntryTopic: "abcc", + api.FeedMetadataEntryType: "epoch", + })) + if err != nil { + t.Fatal(err) + } + manifRef, err := m.Store(ctx) + if err != nil { + t.Fatal(err) + } + + jsonhttptest.Request(t, client, http.MethodGet, bzzDownloadResource(manifRef.String(), ""), http.StatusOK, + jsonhttptest.WithExpectedResponse(updateData), + jsonhttptest.WithExpectedContentLength(len(updateData)), + jsonhttptest.WithExpectedResponseHeader(api.AccessControlExposeHeaders, api.SwarmFeedIndexHeader), + jsonhttptest.WithExpectedResponseHeader(api.AccessControlExposeHeaders, api.ContentDispositionHeader), + jsonhttptest.WithExpectedResponseHeader(api.ContentDispositionHeader, `inline; filename="index.html"`), + jsonhttptest.WithExpectedResponseHeader(api.ContentTypeHeader, "text/html; charset=utf-8"), + ) }) - if err != nil { - t.Fatal(err) - } - m, err := manifest.NewDefaultManifest( - loadsave.New(storer.ChunkStore(), storer.Cache(), pipelineFactory(storer.Cache(), false, 0), redundancy.DefaultLevel), - false, - ) - if err != nil { - t.Fatal(err) - } - emptyAddr := make([]byte, 32) - err = m.Add(ctx, manifest.RootPath, manifest.NewEntry(swarm.NewAddress(emptyAddr), map[string]string{ - api.FeedMetadataEntryOwner: "8d3766440f0d7b949a5e32995d09619a7f86e632", - api.FeedMetadataEntryTopic: "abcc", - api.FeedMetadataEntryType: "epoch", - })) - if err != nil { - t.Fatal(err) - } - manifRef, err := m.Store(ctx) - if err != nil { - t.Fatal(err) - } - jsonhttptest.Request(t, client, http.MethodGet, bzzDownloadResource(manifRef.String(), ""), http.StatusOK, - jsonhttptest.WithExpectedResponse(updateData), - jsonhttptest.WithExpectedContentLength(len(updateData)), - jsonhttptest.WithExpectedResponseHeader(api.AccessControlExposeHeaders, api.SwarmFeedIndexHeader), - jsonhttptest.WithExpectedResponseHeader(api.AccessControlExposeHeaders, api.ContentDispositionHeader), - jsonhttptest.WithExpectedResponseHeader(api.ContentDispositionHeader, `inline; filename="index.html"`), - jsonhttptest.WithExpectedResponseHeader(api.ContentTypeHeader, "text/html; charset=utf-8"), - ) + t.Run("redundancy", func(t *testing.T) { + // enough to test two redundancy levels since + tests := []struct { + name string + rLevel redundancy.Level + }{ + { + name: "none", + rLevel: redundancy.NONE, + }, + { + name: "medium", + rLevel: redundancy.MEDIUM, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + rLevel := tt.rLevel + socRoot, _ := soc.FromChunk(socRootCh) + socPutter := replicas.NewSocPutter(storer, rLevel) + err = socPutter.Put(context.Background(), socRootCh) + if err != nil { + t.Fatalf("failed to put SOC chunk with redundancy: %v", err) + } + + m, err := manifest.NewDefaultManifest( + loadsave.New(storer.ChunkStore(), storer.Cache(), pipelineFactory(storer.Cache(), false, rLevel), rLevel), + false, + ) + if err != nil { + t.Fatal(err) + } + + // Add the feed entry to the manifest + hexId := hex.EncodeToString(socRoot.ID()) + hexOwner := hex.EncodeToString(socRoot.OwnerAddress()) + err = m.Add(context.Background(), manifest.RootPath, manifest.NewEntry(socRootCh.Address(), map[string]string{ + api.FeedMetadataEntryOwner: hexOwner, + api.FeedMetadataEntryTopic: hexId, + api.FeedMetadataEntryType: "sequence", + })) + if err != nil { + t.Fatal(err) + } + manifestRef, err := m.Store(context.Background()) + if err != nil { + t.Fatal(err) + } + + // Create mockLookup and mockFactory for feed + look := newRedundancyMockLookup( + rLevel, + storer.ChunkStore(), + func() (swarm.Chunk, feeds.Index, feeds.Index) { + return socRootCh, &id{}, &id{} + }, + ) + feedFactory := newMockFactory(look) + + // Update the test server with the feed factory + client, _, _, _ := newTestServer(t, testServerOptions{ + Storer: storer, + Logger: log.Noop, + Post: mockpost.New(mockpost.WithAcceptAll()), + Feeds: feedFactory, + }) + + // remove original chunk from store + cs, ok := storer.ChunkStore().(storage.ChunkStore) + if !ok { + t.Fatalf("chunk store not available for deletion") + } + err = cs.Delete(context.Background(), socRootCh.Address()) + if err != nil { + t.Fatalf("Failed to delete soc chunk: %v", err) + } + + manifestHex := manifestRef.String() + + if rLevel == redundancy.NONE { + jsonhttptest.Request(t, client, http.MethodGet, "/bzz/"+manifestHex+"/", http.StatusNotFound) + return + } + jsonhttptest.Request(t, client, http.MethodGet, "/bzz/"+manifestHex+"/", http.StatusOK, + jsonhttptest.WithExpectedResponse(updateData), + jsonhttptest.WithExpectedContentLength(len(updateData)), + jsonhttptest.WithExpectedResponseHeader(api.AccessControlExposeHeaders, api.SwarmFeedIndexHeader), + jsonhttptest.WithExpectedResponseHeader(api.AccessControlExposeHeaders, api.ContentDispositionHeader), + jsonhttptest.WithExpectedResponseHeader(api.ContentTypeHeader, "text/html; charset=utf-8"), + jsonhttptest.WithExpectedResponseHeader(api.ContentDispositionHeader, `inline; filename="index.html"`), + ) + }) + } + }) } func Test_bzzDownloadHandler_invalidInputs(t *testing.T) { diff --git a/pkg/api/feed_test.go b/pkg/api/feed_test.go index 3cbcafad6ca..82c497cb471 100644 --- a/pkg/api/feed_test.go +++ b/pkg/api/feed_test.go @@ -345,18 +345,15 @@ func (l *redundancyMockLookup) At(ctx context.Context, at int64, after uint64) ( chunk, cur, next := l.lastChunkFn() // Create redundancy SOC getter if redundancy level is set - if l.redundancyLevel > 0 { - redGetter := replicas.NewSocGetter(l.getter, l.redundancyLevel) - - // Try to get the chunk with redundancy - redChunk, err := redGetter.Get(ctx, chunk.Address()) - if err == nil { - // Use the chunk retrieved with redundancy - return redChunk, cur, next, nil - } - } + redGetter := replicas.NewSocGetter(l.getter, l.redundancyLevel) - return chunk, cur, next, nil + // Try to get the chunk with redundancy + redChunk, err := redGetter.Get(ctx, chunk.Address()) + if err != nil { + return nil, nil, nil, err + } + // Use the chunk retrieved with redundancy + return redChunk, cur, next, nil } // TestFeedAPIWithRedundancy tests the feed API with SOC redundancy From db800a8cdea242677046e31caa8af12c18d7b4bd Mon Sep 17 00:00:00 2001 From: nugaon Date: Wed, 18 Jun 2025 15:31:35 +0200 Subject: [PATCH 19/62] feat: soc validation check bytes except first --- pkg/soc/validator.go | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/pkg/soc/validator.go b/pkg/soc/validator.go index 7cc69dc58a1..b767572c579 100644 --- a/pkg/soc/validator.go +++ b/pkg/soc/validator.go @@ -6,7 +6,6 @@ package soc import ( "bytes" - "math" "github.com/ethersphere/bee/v2/pkg/swarm" ) @@ -30,18 +29,7 @@ func Valid(ch swarm.Chunk) bool { defaultSoc := ch.Address().Equal(address) if !defaultSoc { // check whether the SOC chunk is a replica - for i := uint8(0); i < math.MaxUint8; i++ { - rAddr, err := hash([]byte{i}, ch.Address().Bytes()) - if err != nil { - return false - } - - if ch.Address().Equal(swarm.NewAddress(rAddr)) { - return true - } - } - } else { - return true + return bytes.Equal(ch.Address().Bytes()[1:32], address.Bytes()[1:32]) } - return false + return true } From 1706ccce7308ca06d170b8e9d5b39065127cd477 Mon Sep 17 00:00:00 2001 From: nugaon Date: Wed, 18 Jun 2025 15:31:47 +0200 Subject: [PATCH 20/62] test: add soc valid test --- pkg/replicas/putter_soc_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/replicas/putter_soc_test.go b/pkg/replicas/putter_soc_test.go index ff7d0b40cd4..2ad0377ce61 100644 --- a/pkg/replicas/putter_soc_test.go +++ b/pkg/replicas/putter_soc_test.go @@ -114,6 +114,9 @@ func TestSocPutter(t *testing.T) { orig = true return false, nil } + if !soc.Valid(chunk) { + t.Fatalf("chunk %v is not a valid SOC chunk", chunk.Address()) + } addrs = append(addrs, chunk.Address()) return false, nil }) From 031d6f541606e1c23ef3664ef5bec4c55e029f70 Mon Sep 17 00:00:00 2001 From: nugaon Date: Wed, 18 Jun 2025 15:32:02 +0200 Subject: [PATCH 21/62] feat: flaky attempt to replicate address --- pkg/replicas/replicas_soc.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pkg/replicas/replicas_soc.go b/pkg/replicas/replicas_soc.go index f4f20475393..5cadfc8831f 100644 --- a/pkg/replicas/replicas_soc.go +++ b/pkg/replicas/replicas_soc.go @@ -50,8 +50,13 @@ func (rr *socReplicator) replicate(i uint8) (sp *socReplica) { // calculate SOC replica address for potential replica addr := make([]byte, 32) copy(addr, rr.addr) - addr[0] = i - return &socReplica{addr: addr, nonce: i} + seed1 := addr[i%32] + seed2 := addr[(i+13)%32] + addr[0] = i ^ seed1 ^ seed2 + // this somehow does not give enough randomness + // addr[0] &= 0x0f + // addr[0] |= (i ^ rand) & 0xf0 // set first 4 bits to the nonce + return &socReplica{addr: addr, nonce: addr[0]} } // replicas enumerates replica parameters (nonce) pushing it in a channel given as argument From 415c66faedf4df48c71607b9d966e7ad74c9ed0a Mon Sep 17 00:00:00 2001 From: nugaon Date: Thu, 19 Jun 2025 10:27:28 +0200 Subject: [PATCH 22/62] feat: saturating by mirrorbits --- pkg/replicas/replicas_soc.go | 80 ++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 45 deletions(-) diff --git a/pkg/replicas/replicas_soc.go b/pkg/replicas/replicas_soc.go index 5cadfc8831f..b8796fa07a4 100644 --- a/pkg/replicas/replicas_soc.go +++ b/pkg/replicas/replicas_soc.go @@ -11,18 +11,14 @@ package replicas import ( - "math" - "github.com/ethersphere/bee/v2/pkg/file/redundancy" "github.com/ethersphere/bee/v2/pkg/swarm" ) // socReplicator running the find for replicas type socReplicator struct { - addr []byte // chunk address - queue [16]*socReplica // to sort addresses according to di - exist [30]bool // maps the 16 distinct nibbles on all levels - sizes [5]int // number of distinct neighbourhoods redcorded for each depth + addr []byte // chunk address + sizes [5]int // number of distinct neighbourhoods redcorded for each depth c chan *socReplica rLevel redundancy.Level } @@ -46,16 +42,13 @@ type socReplica struct { } // replicate returns a replica params structure seeded with a byte of entropy as argument -func (rr *socReplicator) replicate(i uint8) (sp *socReplica) { - // calculate SOC replica address for potential replica +func (rr *socReplicator) replicate(i uint8, bitsRequired uint8) (sp *socReplica) { addr := make([]byte, 32) copy(addr, rr.addr) - seed1 := addr[i%32] - seed2 := addr[(i+13)%32] - addr[0] = i ^ seed1 ^ seed2 - // this somehow does not give enough randomness - // addr[0] &= 0x0f - // addr[0] |= (i ^ rand) & 0xf0 // set first 4 bits to the nonce + mirroredBits := mirrorBitsToMSB(i, bitsRequired) + // zero out the first leading bitsRequired bits of addr[0] and set mirroredBits of `i` + addr[0] &= 0xFF >> bitsRequired + addr[0] |= mirroredBits return &socReplica{addr: addr, nonce: addr[0]} } @@ -65,42 +58,39 @@ func (rr *socReplicator) replicate(i uint8) (sp *socReplica) { // I.e., the binary tree representing the new addresses prefix bits up to depth is balanced func (rr *socReplicator) replicas() { defer close(rr.c) - n := 0 - for i := uint8(0); n < rr.rLevel.GetReplicaCount() && i < math.MaxUint8; i++ { + // number of bits required to represent all replicas + bitsRequired := countBitsRequired(uint8(rr.rLevel.GetReplicaCount() - 1)) + // replicate iteration saturates all leading bits in generated addresses until bitsRequired + for i := uint8(0); i < uint8(rr.rLevel.GetReplicaCount()); i++ { // create soc replica (with address and nonce) - // the soc is added to neighbourhoods of depths in the closed interval [from...to] - r := rr.replicate(i) - d, m := rr.add(r, rr.rLevel) - if d == 0 { - continue - } - for m, r = range rr.queue[n:] { - if r == nil { - break - } - rr.c <- r - } - n += m + r := rr.replicate(i, bitsRequired) + rr.c <- r } } -// add inserts the soc replica into a replicator so that addresses are balanced -func (rr *socReplicator) add(r *socReplica, rLevel redundancy.Level) (depth int, rank int) { - if rLevel == redundancy.NONE { - return 0, 0 +// mirrorBitsToMSB mirrors the lowest n bits of v to the most significant bits of a byte. +// For example, mirrorBitsToMSB(0b00001101, 4) == 0b10110000 +func mirrorBitsToMSB(v byte, n uint8) byte { + var res byte + for i := uint8(0); i < n; i++ { + if (v & (1 << i)) != 0 { + res |= (1 << (7 - i)) + } } - nh := nh(rLevel, r.addr) - if rr.exist[nh] { - return 0, 0 + return res +} + +// countBitsRequired returns the minimum number of bits required to represent value v. +// For 0, it returns 1 (we need 1 bit to represent 0). +func countBitsRequired(v uint8) uint8 { + if v == 0 { + return 1 } - rr.exist[nh] = true - l, o := rr.add(r, rLevel.Decrement()) - d := uint8(rLevel) - 1 - if l == 0 { - o = rr.sizes[d] - rr.sizes[d]++ - rr.queue[o] = r - l = rLevel.GetReplicaCount() + + var bits uint8 + for v > 0 { + bits++ + v >>= 1 } - return l, o + return bits } From d3f46a34090d3c01b2a4a15e57f7056d882f471f Mon Sep 17 00:00:00 2001 From: nugaon Date: Thu, 19 Jun 2025 11:10:05 +0200 Subject: [PATCH 23/62] fix: flip bit after mirroredbits if necessary --- pkg/replicas/replicas_soc.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/replicas/replicas_soc.go b/pkg/replicas/replicas_soc.go index b8796fa07a4..f3e605fe76c 100644 --- a/pkg/replicas/replicas_soc.go +++ b/pkg/replicas/replicas_soc.go @@ -49,6 +49,10 @@ func (rr *socReplicator) replicate(i uint8, bitsRequired uint8) (sp *socReplica) // zero out the first leading bitsRequired bits of addr[0] and set mirroredBits of `i` addr[0] &= 0xFF >> bitsRequired addr[0] |= mirroredBits + if addr[0] == rr.addr[0] { + // xor MSB after the mirrored bits because the iteration found the original address + addr[0] ^= 1 << (bitsRequired - 1) + } return &socReplica{addr: addr, nonce: addr[0]} } From 342df84943e8693bfad069945c095c13cb758114 Mon Sep 17 00:00:00 2001 From: nugaon Date: Thu, 19 Jun 2025 11:33:44 +0200 Subject: [PATCH 24/62] test: change test according to the new validation rules --- pkg/soc/validator_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/soc/validator_test.go b/pkg/soc/validator_test.go index 203e0b1bc84..72995d260da 100644 --- a/pkg/soc/validator_test.go +++ b/pkg/soc/validator_test.go @@ -112,7 +112,7 @@ func TestInvalid(t *testing.T) { name: "wrong soc address", chunk: func() swarm.Chunk { wrongAddressBytes := socAddress.Clone().Bytes() - wrongAddressBytes[0] = 255 - wrongAddressBytes[0] + wrongAddressBytes[1] = 255 - wrongAddressBytes[1] wrongAddress := swarm.NewAddress(wrongAddressBytes) data := makeSocData() return swarm.NewChunk(wrongAddress, data) From 1aca908f2a11b317e0fa41f9fcdf270a24305d07 Mon Sep 17 00:00:00 2001 From: nugaon Date: Thu, 19 Jun 2025 14:25:47 +0200 Subject: [PATCH 25/62] test: getter --- pkg/replicas/export_test.go | 10 +- pkg/replicas/getter_soc_test.go | 214 ++++++++++++++++++++++++++++++++ 2 files changed, 223 insertions(+), 1 deletion(-) create mode 100644 pkg/replicas/getter_soc_test.go diff --git a/pkg/replicas/export_test.go b/pkg/replicas/export_test.go index 271ad71ed0b..34508034ede 100644 --- a/pkg/replicas/export_test.go +++ b/pkg/replicas/export_test.go @@ -11,5 +11,13 @@ var ( ) func Wait(g storage.Getter) { - g.(*getter).wg.Wait() + if g, ok := g.(*getter); ok { + g.wg.Wait() + return + } + if g, ok := g.(*socGetter); ok { + g.wg.Wait() + return + } + panic("g is not a replicas.Getter") } diff --git a/pkg/replicas/getter_soc_test.go b/pkg/replicas/getter_soc_test.go new file mode 100644 index 00000000000..505bd2f073c --- /dev/null +++ b/pkg/replicas/getter_soc_test.go @@ -0,0 +1,214 @@ +// Copyright 2023 The Swarm Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package replicas_test + +import ( + "context" + "crypto/rand" + "errors" + "fmt" + "io" + "testing" + "time" + + "github.com/ethersphere/bee/v2/pkg/cac" + "github.com/ethersphere/bee/v2/pkg/crypto" + "github.com/ethersphere/bee/v2/pkg/file/redundancy" + "github.com/ethersphere/bee/v2/pkg/replicas" + "github.com/ethersphere/bee/v2/pkg/soc" + "github.com/ethersphere/bee/v2/pkg/storage" +) + +func TestSOCGetter(t *testing.T) { + t.Parallel() + // failure is a struct that defines a failure scenario to test + type failure struct { + name string + err error + errf func(int, int) func(int) chan struct{} + } + // failures is a list of failure scenarios to test + failures := []failure{ + { + "timeout", + context.Canceled, + func(_, _ int) func(i int) chan struct{} { + return func(i int) chan struct{} { + return nil + } + }, + }, + { + "not found", + storage.ErrNotFound, + func(_, _ int) func(i int) chan struct{} { + c := make(chan struct{}) + close(c) + return func(i int) chan struct{} { + return c + } + }, + }, + } + type test struct { + name string + failure failure + level int + count int + found int + } + + var tests []test + for _, f := range failures { + for level, c := range redundancy.GetReplicaCounts() { + for j := 0; j <= c*2+1; j++ { + tests = append(tests, test{ + name: fmt.Sprintf("%s level %d count %d found %d", f.name, level, c, j), + failure: f, + level: level, + count: c, + found: j, + }) + } + } + } + + // initialise the base chunk + chunkLen := 420 + buf := make([]byte, chunkLen) + if _, err := io.ReadFull(rand.Reader, buf); err != nil { + t.Fatal(err) + } + ch, err := cac.New(buf) + if err != nil { + t.Fatal(err) + } + // create soc from cac + // test key to sign soc chunks + privKey, err := crypto.GenerateSecp256k1Key() + if err != nil { + t.Fatal(err) + } + signer := crypto.NewDefaultSigner(privKey) + id := make([]byte, 32) + if _, err := rand.Read(id); err != nil { + t.Fatal(err) + } + s := soc.New(id, ch) + ch, err = s.Sign(signer) + if err != nil { + t.Fatal(err) + } + + // reset retry interval to speed up tests + retryInterval := replicas.RetryInterval + defer func() { replicas.RetryInterval = retryInterval }() + replicas.RetryInterval = 100 * time.Millisecond + + // run the tests + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + // initiate a chunk retrieval session using replicas.Getter + // embedding a testGetter that simulates the behaviour of a chunk store + store := newTestGetter(ch, tc.found, tc.failure.errf(tc.found, tc.count)) + g := replicas.NewSocGetter(store, redundancy.Level(tc.level)) + store.now = time.Now() + ctx, cancel := context.WithCancel(context.Background()) + if tc.found > tc.count { + wait := replicas.RetryInterval / 2 * time.Duration(1+2*tc.level) + go func() { + time.Sleep(wait) + cancel() + }() + } + _, err := g.Get(ctx, ch.Address()) + replicas.Wait(g) + cancel() + + // test the returned error + if tc.found <= tc.count { + if err != nil { + t.Fatalf("expected no error. got %v", err) + } + // if j <= c, the original chunk should be retrieved and the context should be cancelled + t.Run("retrievals cancelled", func(t *testing.T) { + select { + case <-time.After(100 * time.Millisecond): + t.Fatal("timed out waiting for context to be cancelled") + case <-store.cancelled: + } + }) + + } else { + if err == nil { + t.Fatalf("expected error. got ") + } + + t.Run("returns correct error", func(t *testing.T) { + if !errors.Is(err, replicas.ErrSwarmageddon) { + t.Fatalf("incorrect error. want Swarmageddon. got %v", err) + } + if !errors.Is(err, tc.failure.err) { + t.Fatalf("incorrect error. want it to wrap %v. got %v", tc.failure.err, err) + } + }) + } + + attempts := int(store.attempts.Load()) + // the original chunk should be among those attempted for retrieval + addresses := store.addresses[:attempts] + latencies := store.latencies[:attempts] + t.Run("original address called", func(t *testing.T) { + select { + case <-time.After(100 * time.Millisecond): + t.Fatal("timed out waiting form original address to be attempted for retrieval") + case <-store.origCalled: + i := store.origIndex + if i > 2 { + t.Fatalf("original address called too late. want at most 2 (preceding attempts). got %v (latency: %v)", i, latencies[i]) + } + addresses = append(addresses[:i], addresses[i+1:]...) + latencies = append(latencies[:i], latencies[i+1:]...) + attempts-- + } + }) + + t.Run("retrieved count", func(t *testing.T) { + if attempts > tc.count { + t.Fatalf("too many attempts to retrieve a replica: want at most %v. got %v.", tc.count, attempts) + } + if tc.found > tc.count { + if attempts < tc.count { + t.Fatalf("too few attempts to retrieve a replica: want at least %v. got %v.", tc.count, attempts) + } + return + } + maxValue := 2 + for i := 1; i < tc.level && maxValue < tc.found; i++ { + maxValue = maxValue * 2 + } + if attempts > maxValue { + t.Fatalf("too many attempts to retrieve a replica: want at most %v. got %v. latencies %v", maxValue, attempts, latencies) + } + }) + + t.Run("dispersion", func(t *testing.T) { + if err := dispersed(redundancy.Level(tc.level), ch, addresses); err != nil { + t.Fatalf("addresses are not dispersed: %v", err) + } + }) + + t.Run("latency", func(t *testing.T) { + counts := redundancy.GetReplicaCounts() + for i, latency := range latencies { + multiplier := latency / replicas.RetryInterval + if multiplier > 0 && i < counts[multiplier-1] { + t.Fatalf("incorrect latency for retrieving replica %d: %v", i, err) + } + } + }) + }) + } +} From a97908fd1559943ab75fce9721d840f5a89a4fa4 Mon Sep 17 00:00:00 2001 From: nugaon Date: Wed, 25 Jun 2025 08:11:53 +0200 Subject: [PATCH 26/62] fix: race issue --- pkg/replicas/getter_soc.go | 6 ++++-- pkg/replicas/getter_soc_test.go | 13 ++++++++----- pkg/replicas/putter_soc_test.go | 3 +++ 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/pkg/replicas/getter_soc.go b/pkg/replicas/getter_soc.go index 957a106ed48..4e052c78988 100644 --- a/pkg/replicas/getter_soc.go +++ b/pkg/replicas/getter_soc.go @@ -22,7 +22,7 @@ import ( // to a multiplexed variant that fetches chunks with replicas for SOC. // // the strategy to retrieve a chunk that has replicas can be configured with a few parameters: -// - RetryInterval: the delay before a new batch of replicas is fetched. +// - SOCRetryInterval: the delay before a new batch of replicas is fetched. // - depth: 2^{depth} is the total number of additional replicas that have been uploaded // (by default, it is assumed to be 4, ie. total of 16) // - (not implemented) pivot: replicas with address in the proximity of pivot will be tried first @@ -32,6 +32,8 @@ type socGetter struct { level redundancy.Level } +var SOCRetryInterval = 300 * time.Millisecond + // NewSocGetter is the getter constructor func NewSocGetter(g storage.Getter, level redundancy.Level) storage.Getter { return &socGetter{Getter: g, level: level} @@ -76,7 +78,7 @@ func (g *socGetter) Get(ctx context.Context, addr swarm.Address) (ch swarm.Chunk var wait <-chan time.Time // nil channel to disable case // addresses used are doubling each period of search expansion // (at intervals of RetryInterval) - ticker := time.NewTicker(RetryInterval) + ticker := time.NewTicker(SOCRetryInterval) defer ticker.Stop() for level := uint8(0); level <= uint8(g.level); { select { diff --git a/pkg/replicas/getter_soc_test.go b/pkg/replicas/getter_soc_test.go index 505bd2f073c..f5143183a50 100644 --- a/pkg/replicas/getter_soc_test.go +++ b/pkg/replicas/getter_soc_test.go @@ -1,7 +1,10 @@ -// Copyright 2023 The Swarm Authors. All rights reserved. +// Copyright 2025 The Swarm Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +// This file is a copy of the original getter_test.go file +// and tailored to socGetter implementation. + package replicas_test import ( @@ -103,9 +106,9 @@ func TestSOCGetter(t *testing.T) { } // reset retry interval to speed up tests - retryInterval := replicas.RetryInterval - defer func() { replicas.RetryInterval = retryInterval }() - replicas.RetryInterval = 100 * time.Millisecond + retryInterval := replicas.SOCRetryInterval + defer func() { replicas.SOCRetryInterval = retryInterval }() + replicas.SOCRetryInterval = 100 * time.Millisecond // run the tests for _, tc := range tests { @@ -117,7 +120,7 @@ func TestSOCGetter(t *testing.T) { store.now = time.Now() ctx, cancel := context.WithCancel(context.Background()) if tc.found > tc.count { - wait := replicas.RetryInterval / 2 * time.Duration(1+2*tc.level) + wait := replicas.SOCRetryInterval / 2 * time.Duration(1+2*tc.level) go func() { time.Sleep(wait) cancel() diff --git a/pkg/replicas/putter_soc_test.go b/pkg/replicas/putter_soc_test.go index 2ad0377ce61..e0315eadc42 100644 --- a/pkg/replicas/putter_soc_test.go +++ b/pkg/replicas/putter_soc_test.go @@ -2,6 +2,9 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +// This file was created as a copy of the original putter_test.go file +// and tailored to the socPutter implementation. + package replicas_test import ( From 7d7b5879870ba5667e639eba213658249a1bbee4 Mon Sep 17 00:00:00 2001 From: nugaon Date: Wed, 25 Jun 2025 10:11:40 +0200 Subject: [PATCH 27/62] docs: openapi changes --- openapi/Swarm.yaml | 4 ++++ pkg/api/soc.go | 6 ------ 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/openapi/Swarm.yaml b/openapi/Swarm.yaml index 561537d31bf..4e28252dfeb 100644 --- a/openapi/Swarm.yaml +++ b/openapi/Swarm.yaml @@ -883,6 +883,7 @@ paths: - $ref: "SwarmCommon.yaml#/components/parameters/SwarmPostageStamp" - $ref: "SwarmCommon.yaml#/components/parameters/SwarmAct" - $ref: "SwarmCommon.yaml#/components/parameters/SwarmActHistoryAddress" + - $ref: "SwarmCommon.yaml#/components/parameters/SwarmRedundancyLevelParameter" requestBody: required: true description: The SOC binary data is composed of the span (8 bytes) and the at most 4KB payload. @@ -930,6 +931,7 @@ paths: description: Arbitrary identifier of the related data - $ref: "SwarmCommon.yaml#/components/parameters/SwarmOnlyRootChunkParameter" - $ref: "SwarmCommon.yaml#/components/parameters/SwarmCache" + - $ref: "SwarmCommon.yaml#/components/parameters/SwarmRedundancyLevelParameter" - $ref: "SwarmCommon.yaml#/components/parameters/SwarmRedundancyStrategyParameter" - $ref: "SwarmCommon.yaml#/components/parameters/SwarmRedundancyFallbackModeParameter" - $ref: "SwarmCommon.yaml#/components/parameters/SwarmChunkRetrievalTimeoutParameter" @@ -981,6 +983,7 @@ paths: - $ref: "SwarmCommon.yaml#/components/parameters/SwarmPostageBatchId" - $ref: "SwarmCommon.yaml#/components/parameters/SwarmAct" - $ref: "SwarmCommon.yaml#/components/parameters/SwarmActHistoryAddress" + - $ref: "SwarmCommon.yaml#/components/parameters/SwarmRedundancyLevelParameter" responses: "201": description: Created @@ -1044,6 +1047,7 @@ paths: description: "Resolves feed payloads in legacy structure (timestamp, content address)." - $ref: "SwarmCommon.yaml#/components/parameters/SwarmOnlyRootChunkParameter" - $ref: "SwarmCommon.yaml#/components/parameters/SwarmCache" + - $ref: "SwarmCommon.yaml#/components/parameters/SwarmRedundancyLevelParameter" - $ref: "SwarmCommon.yaml#/components/parameters/SwarmRedundancyStrategyParameter" - $ref: "SwarmCommon.yaml#/components/parameters/SwarmRedundancyFallbackModeParameter" - $ref: "SwarmCommon.yaml#/components/parameters/SwarmChunkRetrievalTimeoutParameter" diff --git a/pkg/api/soc.go b/pkg/api/soc.go index 4086f9a0e33..9dfe6e8e870 100644 --- a/pkg/api/soc.go +++ b/pkg/api/soc.go @@ -73,15 +73,9 @@ func (s *Service) socUploadHandler(w http.ResponseWriter, r *http.Request) { err error ) - // if rLevel > 0 then it can cause error on parallel upload with a great chance - // because of multiple writes on the same postage index - // https://github.com/ethersphere/bee/actions/runs/15605098232/job/43952677866?pr=5057 - // the solution would be either ignoring the error for dispersed replicas or do sequential upload var rLevel redundancy.Level if headers.RLevel != nil { rLevel = *headers.RLevel - } else { - rLevel = 0 // default redundancy level if header is missing } if len(headers.StampSig) != 0 { From 575e68393157be11eaef12a802707e56060ff64a Mon Sep 17 00:00:00 2001 From: nugaon Date: Fri, 18 Jul 2025 09:39:47 +0200 Subject: [PATCH 28/62] fix: socretryinterval instead of retryinterval --- pkg/replicas/getter_soc_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/replicas/getter_soc_test.go b/pkg/replicas/getter_soc_test.go index f5143183a50..f2b0bacf5e1 100644 --- a/pkg/replicas/getter_soc_test.go +++ b/pkg/replicas/getter_soc_test.go @@ -206,7 +206,7 @@ func TestSOCGetter(t *testing.T) { t.Run("latency", func(t *testing.T) { counts := redundancy.GetReplicaCounts() for i, latency := range latencies { - multiplier := latency / replicas.RetryInterval + multiplier := latency / replicas.SOCRetryInterval if multiplier > 0 && i < counts[multiplier-1] { t.Fatalf("incorrect latency for retrieving replica %d: %v", i, err) } From e18e95a0658ccca6f9e5c76a15b5ffd093a2a87c Mon Sep 17 00:00:00 2001 From: nugaon Date: Wed, 17 Sep 2025 21:33:43 +0200 Subject: [PATCH 29/62] fix: identity address using assigned address --- pkg/storage/storage.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pkg/storage/storage.go b/pkg/storage/storage.go index 312364e2676..df575a83ce5 100644 --- a/pkg/storage/storage.go +++ b/pkg/storage/storage.go @@ -307,10 +307,7 @@ func IdentityAddress(chunk swarm.Chunk) (swarm.Address, error) { // check the chunk is single owner chunk or cac if sch, err := soc.FromChunk(chunk); err == nil { - socAddress, err := sch.Address() - if err != nil { - return swarm.ZeroAddress, err - } + socAddress := chunk.Address() // cannot use sch.Address() because of SOC replicas h := swarm.NewHasher() _, err = h.Write(socAddress.Bytes()) if err != nil { From f3bc1342e1c1ca285368fc200003281f45a0cd99 Mon Sep 17 00:00:00 2001 From: nugaon Date: Wed, 17 Sep 2025 21:49:34 +0200 Subject: [PATCH 30/62] fix: wg usage --- pkg/replicas/getter_soc.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pkg/replicas/getter_soc.go b/pkg/replicas/getter_soc.go index 4e052c78988..f3078a4961c 100644 --- a/pkg/replicas/getter_soc.go +++ b/pkg/replicas/getter_soc.go @@ -27,7 +27,6 @@ import ( // (by default, it is assumed to be 4, ie. total of 16) // - (not implemented) pivot: replicas with address in the proximity of pivot will be tried first type socGetter struct { - wg sync.WaitGroup storage.Getter level redundancy.Level } @@ -44,6 +43,9 @@ func (g *socGetter) Get(ctx context.Context, addr swarm.Address) (ch swarm.Chunk ctx, cancel := context.WithCancel(ctx) defer cancel() + var wg sync.WaitGroup + defer wg.Wait() + // channel that the results (retrieved chunks) are gathered to from concurrent // workers each fetching a replica resultC := make(chan swarm.Chunk) @@ -53,9 +55,9 @@ func (g *socGetter) Get(ctx context.Context, addr swarm.Address) (ch swarm.Chunk errcnt := 0 // concurrently call to retrieve chunk using original SOC address - g.wg.Add(1) + wg.Add(1) go func() { - defer g.wg.Done() + defer wg.Done() ch, err := g.Getter.Get(ctx, addr) if err != nil { errc <- err @@ -110,9 +112,9 @@ func (g *socGetter) Get(ctx context.Context, addr swarm.Address) (ch swarm.Chunk continue } - g.wg.Add(1) + wg.Add(1) go func() { - defer g.wg.Done() + defer wg.Done() ch, err := g.Getter.Get(ctx, swarm.NewAddress(so.addr)) if err != nil { errc <- err From 58fde4db7df185956e303ef610461d35c80ab627 Mon Sep 17 00:00:00 2001 From: nugaon Date: Wed, 17 Sep 2025 21:51:07 +0200 Subject: [PATCH 31/62] fix: wg usage at cac --- pkg/replicas/getter.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pkg/replicas/getter.go b/pkg/replicas/getter.go index 5dc42cb7c82..d59071de0b4 100644 --- a/pkg/replicas/getter.go +++ b/pkg/replicas/getter.go @@ -36,7 +36,6 @@ var ErrSwarmageddon = errors.New("swarmageddon has begun") // (by default, it is assumed to be 4, ie. total of 16) // - (not implemented) pivot: replicas with address in the proximity of pivot will be tried first type getter struct { - wg sync.WaitGroup storage.Getter level redundancy.Level } @@ -51,6 +50,9 @@ func (g *getter) Get(ctx context.Context, addr swarm.Address) (ch swarm.Chunk, e ctx, cancel := context.WithCancel(ctx) defer cancel() + var wg sync.WaitGroup + defer wg.Wait() + // channel that the results (retrieved chunks) are gathered to from concurrent // workers each fetching a replica resultC := make(chan swarm.Chunk) @@ -60,9 +62,9 @@ func (g *getter) Get(ctx context.Context, addr swarm.Address) (ch swarm.Chunk, e errcnt := 0 // concurrently call to retrieve chunk using original CAC address - g.wg.Add(1) + wg.Add(1) go func() { - defer g.wg.Done() + defer wg.Done() ch, err := g.Getter.Get(ctx, addr) if err != nil { errc <- err @@ -117,9 +119,9 @@ func (g *getter) Get(ctx context.Context, addr swarm.Address) (ch swarm.Chunk, e continue } - g.wg.Add(1) + wg.Add(1) go func() { - defer g.wg.Done() + defer wg.Done() ch, err := g.Getter.Get(ctx, swarm.NewAddress(so.addr)) if err != nil { errc <- err From 3cabd355023883e76ced696d32e9d89c6c1ee0b7 Mon Sep 17 00:00:00 2001 From: nugaon Date: Wed, 17 Sep 2025 22:02:01 +0200 Subject: [PATCH 32/62] test: remove wg wait for get --- pkg/replicas/export_test.go | 14 -------------- pkg/replicas/getter_soc_test.go | 1 - pkg/replicas/getter_test.go | 1 - 3 files changed, 16 deletions(-) diff --git a/pkg/replicas/export_test.go b/pkg/replicas/export_test.go index 34508034ede..e8aee696a3a 100644 --- a/pkg/replicas/export_test.go +++ b/pkg/replicas/export_test.go @@ -4,20 +4,6 @@ package replicas -import "github.com/ethersphere/bee/v2/pkg/storage" - var ( Signer = signer ) - -func Wait(g storage.Getter) { - if g, ok := g.(*getter); ok { - g.wg.Wait() - return - } - if g, ok := g.(*socGetter); ok { - g.wg.Wait() - return - } - panic("g is not a replicas.Getter") -} diff --git a/pkg/replicas/getter_soc_test.go b/pkg/replicas/getter_soc_test.go index f2b0bacf5e1..ed3e0c6125e 100644 --- a/pkg/replicas/getter_soc_test.go +++ b/pkg/replicas/getter_soc_test.go @@ -127,7 +127,6 @@ func TestSOCGetter(t *testing.T) { }() } _, err := g.Get(ctx, ch.Address()) - replicas.Wait(g) cancel() // test the returned error diff --git a/pkg/replicas/getter_test.go b/pkg/replicas/getter_test.go index d1d727dd5fd..b11a55d12c8 100644 --- a/pkg/replicas/getter_test.go +++ b/pkg/replicas/getter_test.go @@ -171,7 +171,6 @@ func TestGetter(t *testing.T) { }() } _, err := g.Get(ctx, ch.Address()) - replicas.Wait(g) cancel() // test the returned error From 6eee49043f9026e8750b3f5f2d03942b92e62460 Mon Sep 17 00:00:00 2001 From: nugaon Date: Wed, 17 Sep 2025 22:02:36 +0200 Subject: [PATCH 33/62] refactor: errc size --- pkg/replicas/getter_soc.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/replicas/getter_soc.go b/pkg/replicas/getter_soc.go index f3078a4961c..93a4e566df6 100644 --- a/pkg/replicas/getter_soc.go +++ b/pkg/replicas/getter_soc.go @@ -50,7 +50,7 @@ func (g *socGetter) Get(ctx context.Context, addr swarm.Address) (ch swarm.Chunk // workers each fetching a replica resultC := make(chan swarm.Chunk) // errc collects the errors - errc := make(chan error, 17) + errc := make(chan error, g.level.GetReplicaCount()+1) var errs error errcnt := 0 From 64aa115831beee77fe03210e45b2af3264cd0a1e Mon Sep 17 00:00:00 2001 From: nugaon Date: Wed, 17 Sep 2025 22:13:02 +0200 Subject: [PATCH 34/62] refactor: rename specialgetter --- pkg/feeds/factory/factory.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pkg/feeds/factory/factory.go b/pkg/feeds/factory/factory.go index daf82950057..18a60b17918 100644 --- a/pkg/feeds/factory/factory.go +++ b/pkg/feeds/factory/factory.go @@ -19,17 +19,17 @@ func New(getter storage.Getter) feeds.Factory { return &factory{getter} } -func (f *factory) NewLookup(t feeds.Type, feed *feeds.Feed, specialGetter storage.Getter) (feeds.Lookup, error) { - getter := f.Getter - if specialGetter != nil { - getter = specialGetter +func (f *factory) NewLookup(t feeds.Type, feed *feeds.Feed, getter storage.Getter) (feeds.Lookup, error) { + g := f.Getter + if getter != nil { + g = getter } switch t { case feeds.Sequence: - return sequence.NewAsyncFinder(getter, feed), nil + return sequence.NewAsyncFinder(g, feed), nil case feeds.Epoch: - return epochs.NewAsyncFinder(getter, feed), nil + return epochs.NewAsyncFinder(g, feed), nil } return nil, feeds.ErrFeedTypeNotFound From 1d3b9ff3eb3ba759ed1604dd5eb6fce4606cb217 Mon Sep 17 00:00:00 2001 From: nugaon Date: Fri, 19 Sep 2025 12:27:52 +0200 Subject: [PATCH 35/62] refactor: time.After instead of time.NewTicker --- pkg/replicas/getter.go | 5 +---- pkg/replicas/getter_soc.go | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/pkg/replicas/getter.go b/pkg/replicas/getter.go index d59071de0b4..7fdf6e818cd 100644 --- a/pkg/replicas/getter.go +++ b/pkg/replicas/getter.go @@ -87,8 +87,6 @@ func (g *getter) Get(ctx context.Context, addr swarm.Address) (ch swarm.Chunk, e var wait <-chan time.Time // nil channel to disable case // addresses used are doubling each period of search expansion // (at intervals of RetryInterval) - ticker := time.NewTicker(RetryInterval) - defer ticker.Stop() for level := uint8(0); level <= uint8(g.level); { select { // at least one chunk is retrieved, cancel the rest and return early @@ -105,7 +103,6 @@ func (g *getter) Get(ctx context.Context, addr swarm.Address) (ch swarm.Chunk, e // ticker switches on the address channel case <-wait: - wait = nil next = rr.c level++ target = 1 << level @@ -144,7 +141,7 @@ func (g *getter) Get(ctx context.Context, addr swarm.Address) (ch swarm.Chunk, e continue } next = nil - wait = ticker.C + wait = time.After(RetryInterval) } } diff --git a/pkg/replicas/getter_soc.go b/pkg/replicas/getter_soc.go index 93a4e566df6..93d30854db0 100644 --- a/pkg/replicas/getter_soc.go +++ b/pkg/replicas/getter_soc.go @@ -80,8 +80,6 @@ func (g *socGetter) Get(ctx context.Context, addr swarm.Address) (ch swarm.Chunk var wait <-chan time.Time // nil channel to disable case // addresses used are doubling each period of search expansion // (at intervals of RetryInterval) - ticker := time.NewTicker(SOCRetryInterval) - defer ticker.Stop() for level := uint8(0); level <= uint8(g.level); { select { // at least one chunk is retrieved, cancel the rest and return early @@ -98,7 +96,6 @@ func (g *socGetter) Get(ctx context.Context, addr swarm.Address) (ch swarm.Chunk // ticker switches on the address channel case <-wait: - wait = nil next = rr.c level++ target = 1 << level @@ -131,7 +128,7 @@ func (g *socGetter) Get(ctx context.Context, addr swarm.Address) (ch swarm.Chunk continue } next = nil - wait = ticker.C + wait = time.After(SOCRetryInterval) } } From 495797d812875b8cee54e86fbd1231e4812339ec Mon Sep 17 00:00:00 2001 From: nugaon Date: Thu, 25 Sep 2025 09:36:42 +0200 Subject: [PATCH 36/62] refactor: comments --- pkg/api/soc.go | 9 +++------ pkg/replicas/putter_soc.go | 6 ++++-- pkg/replicas/replicas.go | 2 +- pkg/replicas/replicas_soc.go | 4 +--- 4 files changed, 9 insertions(+), 12 deletions(-) diff --git a/pkg/api/soc.go b/pkg/api/soc.go index 9dfe6e8e870..c7c6147a02f 100644 --- a/pkg/api/soc.go +++ b/pkg/api/soc.go @@ -254,17 +254,14 @@ func (s *Service) socGetHandler(w http.ResponseWriter, r *http.Request) { } headers := struct { - OnlyRootChunk bool `map:"Swarm-Only-Root-Chunk"` - RLevel *redundancy.Level `map:"Swarm-Redundancy-Level"` + OnlyRootChunk bool `map:"Swarm-Only-Root-Chunk"` + RLevel redundancy.Level `map:"Swarm-Redundancy-Level"` }{} if response := s.mapStructure(r.Header, &headers); response != nil { response("invalid header params", logger, w) return } - rLevel := redundancy.DefaultLevel - if headers.RLevel != nil { - rLevel = *headers.RLevel - } + rLevel := headers.RLevel address, err := soc.CreateAddress(paths.ID, paths.Owner) if err != nil { diff --git a/pkg/replicas/putter_soc.go b/pkg/replicas/putter_soc.go index 1182bd4f5c1..d9a70f02250 100644 --- a/pkg/replicas/putter_soc.go +++ b/pkg/replicas/putter_soc.go @@ -53,7 +53,9 @@ func (p *socPutter) Put(ctx context.Context, ch swarm.Chunk) error { // create a new chunk with the replica address sch := swarm.NewChunk(swarm.NewAddress(r.addr), ch.Data()) err := p.putter.Put(ctx, sch) - errc <- err + if err != nil { + errc <- err + } }(r) } @@ -71,7 +73,7 @@ type socPutterSession struct { ps storer.PutterSession } -// NewSocPutter is the putterSession constructor +// NewSocPutterSession is the putterSession constructor func NewSocPutterSession(p storer.PutterSession, rLevel redundancy.Level) storer.PutterSession { return &socPutterSession{ socPutter{ diff --git a/pkg/replicas/replicas.go b/pkg/replicas/replicas.go index cdeb93cc30e..f6f7328ce87 100644 --- a/pkg/replicas/replicas.go +++ b/pkg/replicas/replicas.go @@ -30,7 +30,7 @@ type replicator struct { addr []byte // chunk address queue [16]*replica // to sort addresses according to di exist [30]bool // maps the 16 distinct nibbles on all levels - sizes [5]int // number of distinct neighnourhoods redcorded for each depth + sizes [5]int // number of distinct neighbourhoods recorded for each depth c chan *replica rLevel redundancy.Level } diff --git a/pkg/replicas/replicas_soc.go b/pkg/replicas/replicas_soc.go index f3e605fe76c..d539a2c5cb0 100644 --- a/pkg/replicas/replicas_soc.go +++ b/pkg/replicas/replicas_soc.go @@ -18,7 +18,6 @@ import ( // socReplicator running the find for replicas type socReplicator struct { addr []byte // chunk address - sizes [5]int // number of distinct neighbourhoods redcorded for each depth c chan *socReplica rLevel redundancy.Level } @@ -27,8 +26,7 @@ type socReplicator struct { func newSocReplicator(addr swarm.Address, rLevel redundancy.Level) *socReplicator { rr := &socReplicator{ addr: addr.Bytes(), - sizes: redundancy.GetReplicaCounts(), - c: make(chan *socReplica, 16), + c: make(chan *socReplica, rLevel.GetReplicaCount()), rLevel: rLevel, } go rr.replicas() From 84585e7e33cbb8464d0364cb69e9dc6591422c87 Mon Sep 17 00:00:00 2001 From: Ljubisa Gacevic Date: Thu, 6 Nov 2025 13:47:24 +0100 Subject: [PATCH 37/62] fix: soc dispersed replica --- pkg/api/accesscontrol.go | 14 +- pkg/api/bzz.go | 6 +- pkg/api/feed.go | 19 ++- pkg/api/feed_test.go | 2 +- pkg/api/pin.go | 2 +- pkg/api/soc.go | 14 +- pkg/api/soc_test.go | 2 +- pkg/feeds/factory/factory.go | 13 +- pkg/feeds/feed.go | 18 ++- pkg/file/redundancy/level.go | 1 - pkg/file/redundancy/redundancy_test.go | 2 +- pkg/hive/hive_test.go | 2 +- pkg/node/bootstrap.go | 2 +- pkg/replicas/export_test.go | 4 +- pkg/replicas/getter_soc.go | 20 +-- pkg/replicas/getter_soc_test.go | 12 +- pkg/replicas/getter_test.go | 2 +- pkg/replicas/putter_soc.go | 12 +- pkg/replicas/putter_soc_test.go | 2 +- pkg/replicas/putter_test.go | 2 +- pkg/replicas/replica_test.go | 2 +- pkg/replicas/replicas.go | 4 +- pkg/replicas/replicas_soc.go | 4 +- pkg/replicas/replicas_soc_test.go | 210 +++++++++++++++++++++++++ pkg/soc/validator.go | 7 +- pkg/topology/kademlia/kademlia_test.go | 2 +- 26 files changed, 304 insertions(+), 76 deletions(-) create mode 100644 pkg/replicas/replicas_soc_test.go diff --git a/pkg/api/accesscontrol.go b/pkg/api/accesscontrol.go index 1ae0fb2fe6e..78c2a0e11b5 100644 --- a/pkg/api/accesscontrol.go +++ b/pkg/api/accesscontrol.go @@ -126,7 +126,7 @@ func (s *Service) actDecryptionHandler() func(h http.Handler) http.Handler { cache = *headers.Cache } ctx := r.Context() - ls := loadsave.NewReadonly(s.storer.Download(cache), s.storer.Cache(), redundancy.DefaultLevel) + ls := loadsave.NewReadonly(s.storer.Download(cache), s.storer.Cache(), redundancy.PARANOID) reference, err := s.accesscontrol.DownloadHandler(ctx, ls, paths.Address, headers.Publisher, *headers.HistoryAddress, timestamp) if err != nil { logger.Debug("access control download failed", "error", err) @@ -159,7 +159,7 @@ func (s *Service) actEncryptionHandler( historyRootHash swarm.Address, ) (swarm.Address, swarm.Address, error) { publisherPublicKey := &s.publicKey - ls := loadsave.New(s.storer.Download(true), s.storer.Cache(), requestPipelineFactory(ctx, putter, false, redundancy.NONE), redundancy.DefaultLevel) + ls := loadsave.New(s.storer.Download(true), s.storer.Cache(), requestPipelineFactory(ctx, putter, false, redundancy.NONE), redundancy.PARANOID) storageReference, historyReference, encryptedReference, err := s.accesscontrol.UploadHandler(ctx, ls, reference, publisherPublicKey, historyRootHash) if err != nil { return swarm.ZeroAddress, swarm.ZeroAddress, err @@ -204,7 +204,7 @@ func (s *Service) actListGranteesHandler(w http.ResponseWriter, r *http.Request) cache = *headers.Cache } publisher := &s.publicKey - ls := loadsave.NewReadonly(s.storer.Download(cache), s.storer.Cache(), redundancy.DefaultLevel) + ls := loadsave.NewReadonly(s.storer.Download(cache), s.storer.Cache(), redundancy.PARANOID) grantees, err := s.accesscontrol.Get(r.Context(), ls, publisher, paths.GranteesAddress) if err != nil { logger.Debug("could not get grantees", "error", err) @@ -344,8 +344,8 @@ func (s *Service) actGrantRevokeHandler(w http.ResponseWriter, r *http.Request) granteeref := paths.GranteesAddress publisher := &s.publicKey - ls := loadsave.New(s.storer.Download(true), s.storer.Cache(), requestPipelineFactory(ctx, putter, false, redundancy.NONE), redundancy.DefaultLevel) - gls := loadsave.New(s.storer.Download(true), s.storer.Cache(), requestPipelineFactory(ctx, putter, granteeListEncrypt, redundancy.NONE), redundancy.DefaultLevel) + ls := loadsave.New(s.storer.Download(true), s.storer.Cache(), requestPipelineFactory(ctx, putter, false, redundancy.NONE), redundancy.PARANOID) + gls := loadsave.New(s.storer.Download(true), s.storer.Cache(), requestPipelineFactory(ctx, putter, granteeListEncrypt, redundancy.NONE), redundancy.PARANOID) granteeref, encryptedglref, historyref, actref, err := s.accesscontrol.UpdateHandler(ctx, ls, gls, granteeref, historyAddress, publisher, grantees.Addlist, grantees.Revokelist) if err != nil { logger.Debug("failed to update grantee list", "error", err) @@ -498,8 +498,8 @@ func (s *Service) actCreateGranteesHandler(w http.ResponseWriter, r *http.Reques } publisher := &s.publicKey - ls := loadsave.New(s.storer.Download(true), s.storer.Cache(), requestPipelineFactory(ctx, putter, false, redundancy.NONE), redundancy.DefaultLevel) - gls := loadsave.New(s.storer.Download(true), s.storer.Cache(), requestPipelineFactory(ctx, putter, granteeListEncrypt, redundancy.NONE), redundancy.DefaultLevel) + ls := loadsave.New(s.storer.Download(true), s.storer.Cache(), requestPipelineFactory(ctx, putter, false, redundancy.NONE), redundancy.PARANOID) + gls := loadsave.New(s.storer.Download(true), s.storer.Cache(), requestPipelineFactory(ctx, putter, granteeListEncrypt, redundancy.NONE), redundancy.PARANOID) granteeref, encryptedglref, historyref, actref, err := s.accesscontrol.UpdateHandler(ctx, ls, gls, swarm.ZeroAddress, historyAddress, publisher, list, nil) if err != nil { logger.Debug("failed to create grantee list", "error", err) diff --git a/pkg/api/bzz.go b/pkg/api/bzz.go index fd24b425da0..f84b0a51fc5 100644 --- a/pkg/api/bzz.go +++ b/pkg/api/bzz.go @@ -401,7 +401,7 @@ func (s *Service) serveReference(logger log.Logger, address swarm.Address, pathV cache = *headers.Cache } - rLevel := redundancy.DefaultLevel + rLevel := redundancy.PARANOID if headers.RLevel != nil { rLevel = *headers.RLevel } @@ -624,7 +624,7 @@ func (s *Service) downloadHandler(logger log.Logger, w http.ResponseWriter, r *h jsonhttp.BadRequest(w, "could not parse headers") return } - rLevel := redundancy.DefaultLevel + rLevel := redundancy.PARANOID if headers.RLevel != nil { rLevel = *headers.RLevel } @@ -736,5 +736,5 @@ func (s *Service) manifestFeed( return nil, fmt.Errorf("node lookup: %s", "feed metadata absent") } f := feeds.New(topic, common.BytesToAddress(owner)) - return s.feedFactory.NewLookup(*t, f, st) + return s.feedFactory.NewLookup(*t, f, feeds.WithGetter(st)) } diff --git a/pkg/api/feed.go b/pkg/api/feed.go index 6b5282a2b92..4941165d693 100644 --- a/pkg/api/feed.go +++ b/pkg/api/feed.go @@ -81,7 +81,7 @@ func (s *Service) feedGetHandler(w http.ResponseWriter, r *http.Request) { } f := feeds.New(paths.Topic, paths.Owner) - lookup, err := s.feedFactory.NewLookup(feeds.Sequence, f, getter) + lookup, err := s.feedFactory.NewLookup(feeds.Sequence, f, feeds.WithGetter(getter)) if err != nil { logger.Debug("new lookup failed", "owner", paths.Owner, "error", err) logger.Error(nil, "new lookup failed") @@ -177,12 +177,12 @@ func (s *Service) feedPostHandler(w http.ResponseWriter, r *http.Request) { } headers := struct { - BatchID []byte `map:"Swarm-Postage-Batch-Id" validate:"required"` - Pin bool `map:"Swarm-Pin"` - Deferred *bool `map:"Swarm-Deferred-Upload"` - Act bool `map:"Swarm-Act"` - HistoryAddress swarm.Address `map:"Swarm-Act-History-Address"` - RedundancyLevel redundancy.Level `map:"Swarm-Redundancy-Level"` + BatchID []byte `map:"Swarm-Postage-Batch-Id" validate:"required"` + Pin bool `map:"Swarm-Pin"` + Deferred *bool `map:"Swarm-Deferred-Upload"` + Act bool `map:"Swarm-Act"` + HistoryAddress swarm.Address `map:"Swarm-Act-History-Address"` + RedundancyLevel *redundancy.Level `map:"Swarm-Redundancy-Level"` }{} if response := s.mapStructure(r.Header, &headers); response != nil { response("invalid header params", logger, w) @@ -239,7 +239,10 @@ func (s *Service) feedPostHandler(w http.ResponseWriter, r *http.Request) { logger: logger, } - rLevel := headers.RedundancyLevel + rLevel := redundancy.PARANOID + if headers.RedundancyLevel != nil { + rLevel = *headers.RedundancyLevel + } l := loadsave.New(s.storer.ChunkStore(), s.storer.Cache(), requestPipelineFactory(r.Context(), putter, false, 0), rLevel) feedManifest, err := manifest.NewDefaultManifest(l, false) diff --git a/pkg/api/feed_test.go b/pkg/api/feed_test.go index 4ee338ca0af..5782c4abd0c 100644 --- a/pkg/api/feed_test.go +++ b/pkg/api/feed_test.go @@ -461,7 +461,7 @@ func newMockFactory(mockLookup feeds.Lookup) *factoryMock { return &factoryMock{lookup: mockLookup} } -func (f *factoryMock) NewLookup(t feeds.Type, feed *feeds.Feed, getter storage.Getter) (feeds.Lookup, error) { +func (f *factoryMock) NewLookup(t feeds.Type, feed *feeds.Feed, _ ...feeds.FactoryOption) (feeds.Lookup, error) { switch t { case feeds.Sequence: f.sequenceCalled = true diff --git a/pkg/api/pin.go b/pkg/api/pin.go index 696f5185150..6b2a42586a9 100644 --- a/pkg/api/pin.go +++ b/pkg/api/pin.go @@ -53,7 +53,7 @@ func (s *Service) pinRootHash(w http.ResponseWriter, r *http.Request) { } getter := s.storer.Download(true) - traverser := traversal.New(getter, s.storer.Cache(), redundancy.DefaultLevel) + traverser := traversal.New(getter, s.storer.Cache(), redundancy.PARANOID) sem := semaphore.NewWeighted(100) var errTraverse error diff --git a/pkg/api/soc.go b/pkg/api/soc.go index c7c6147a02f..cd4d893f531 100644 --- a/pkg/api/soc.go +++ b/pkg/api/soc.go @@ -73,7 +73,7 @@ func (s *Service) socUploadHandler(w http.ResponseWriter, r *http.Request) { err error ) - var rLevel redundancy.Level + rLevel := redundancy.PARANOID if headers.RLevel != nil { rLevel = *headers.RLevel } @@ -254,14 +254,18 @@ func (s *Service) socGetHandler(w http.ResponseWriter, r *http.Request) { } headers := struct { - OnlyRootChunk bool `map:"Swarm-Only-Root-Chunk"` - RLevel redundancy.Level `map:"Swarm-Redundancy-Level"` + OnlyRootChunk bool `map:"Swarm-Only-Root-Chunk"` + RLevel *redundancy.Level `map:"Swarm-Redundancy-Level"` }{} if response := s.mapStructure(r.Header, &headers); response != nil { response("invalid header params", logger, w) return } - rLevel := headers.RLevel + + rLevel := redundancy.PARANOID + if headers.RLevel != nil { + rLevel = *headers.RLevel + } address, err := soc.CreateAddress(paths.ID, paths.Owner) if err != nil { @@ -271,7 +275,7 @@ func (s *Service) socGetHandler(w http.ResponseWriter, r *http.Request) { } getter := s.storer.Download(true) - if rLevel != 0 { + if rLevel > redundancy.NONE { getter = replicas.NewSocGetter(getter, rLevel) } sch, err := getter.Get(r.Context(), address) diff --git a/pkg/api/soc_test.go b/pkg/api/soc_test.go index 2b87c572a51..559b03d2ee7 100644 --- a/pkg/api/soc_test.go +++ b/pkg/api/soc_test.go @@ -218,7 +218,7 @@ func TestSOCWithRedundancy(t *testing.T) { t.Helper() t.Run(fmt.Sprintf("redundancy=%d", redundancyLevel), func(t *testing.T) { - testData := []byte(fmt.Sprintf("redundant-soc-data-%d", redundancyLevel)) + testData := fmt.Appendf(nil, "redundant-soc-data-%d", redundancyLevel) mockStorer := mockstorer.New() client, _, _, chanStore := newTestServer(t, testServerOptions{ diff --git a/pkg/feeds/factory/factory.go b/pkg/feeds/factory/factory.go index 18a60b17918..b4d26d1590f 100644 --- a/pkg/feeds/factory/factory.go +++ b/pkg/feeds/factory/factory.go @@ -19,17 +19,18 @@ func New(getter storage.Getter) feeds.Factory { return &factory{getter} } -func (f *factory) NewLookup(t feeds.Type, feed *feeds.Feed, getter storage.Getter) (feeds.Lookup, error) { - g := f.Getter - if getter != nil { - g = getter +func (f *factory) NewLookup(t feeds.Type, feed *feeds.Feed, opts ...feeds.FactoryOption) (feeds.Lookup, error) { + cfg := &feeds.FactoryConfig{Getter: f.Getter} + + for _, opt := range opts { + opt(cfg) } switch t { case feeds.Sequence: - return sequence.NewAsyncFinder(g, feed), nil + return sequence.NewAsyncFinder(cfg.Getter, feed), nil case feeds.Epoch: - return epochs.NewAsyncFinder(g, feed), nil + return epochs.NewAsyncFinder(cfg.Getter, feed), nil } return nil, feeds.ErrFeedTypeNotFound diff --git a/pkg/feeds/feed.go b/pkg/feeds/feed.go index 31eca88caa3..ca855c570be 100644 --- a/pkg/feeds/feed.go +++ b/pkg/feeds/feed.go @@ -25,7 +25,23 @@ var ErrFeedTypeNotFound = errors.New("no such feed type") // Factory creates feed lookups for different types of feeds. type Factory interface { - NewLookup(Type, *Feed, storage.Getter) (Lookup, error) + NewLookup(Type, *Feed, ...FactoryOption) (Lookup, error) +} + +// FactoryConfig holds configuration for the feed factory +type FactoryConfig struct { + Getter storage.Getter +} + +// LookupOption defines the type for functional options +type FactoryOption func(*FactoryConfig) + +// WithGetter is a factory option to use a custom storage.Getter, overriding +// the default one provided to the factory constructor. +func WithGetter(getter storage.Getter) FactoryOption { + return func(c *FactoryConfig) { + c.Getter = getter + } } // Type enumerates the time-based feed types diff --git a/pkg/file/redundancy/level.go b/pkg/file/redundancy/level.go index 411da15ec98..6045f29be8c 100644 --- a/pkg/file/redundancy/level.go +++ b/pkg/file/redundancy/level.go @@ -164,5 +164,4 @@ func GetReplicaCounts() [5]int { // we use an approximation as the successive powers of 2 var replicaCounts = [5]int{0, 2, 4, 8, 16} -// DefaultLevel is the default redundancy level const DefaultLevel = PARANOID diff --git a/pkg/file/redundancy/redundancy_test.go b/pkg/file/redundancy/redundancy_test.go index e6ad81a16a7..f534969ae46 100644 --- a/pkg/file/redundancy/redundancy_test.go +++ b/pkg/file/redundancy/redundancy_test.go @@ -84,7 +84,7 @@ func TestEncode(t *testing.T) { redundancy.SetErasureEncoder(newMockEncoder) // test on the data level - for _, level := range []redundancy.Level{redundancy.MEDIUM, redundancy.STRONG, redundancy.INSANE, redundancy.PARANOID} { + for _, level := range []redundancy.Level{redundancy.MEDIUM, redundancy.STRONG, redundancy.INSANE, redundancy.DefaultLevel} { for _, encrypted := range []bool{false, true} { maxShards := level.GetMaxShards() if encrypted { diff --git a/pkg/hive/hive_test.go b/pkg/hive/hive_test.go index 1f0d6eecd53..fbf39c459d4 100644 --- a/pkg/hive/hive_test.go +++ b/pkg/hive/hive_test.go @@ -147,7 +147,7 @@ func TestBroadcastPeers(t *testing.T) { underlays = []ma.Multiaddr{u, u2} } else { n := (i % 3) + 1 - for j := 0; j < n; j++ { + for j := range n { port := i + j*10000 u, err := ma.NewMultiaddr("/ip4/127.0.0.1/udp/" + strconv.Itoa(port)) if err != nil { diff --git a/pkg/node/bootstrap.go b/pkg/node/bootstrap.go index d9aa885f429..047c306aaf2 100644 --- a/pkg/node/bootstrap.go +++ b/pkg/node/bootstrap.go @@ -326,7 +326,7 @@ func getLatestSnapshot( } f := feeds.New(topic, common.BytesToAddress(owner)) - l, err := feedFactory.NewLookup(*t, f, nil) + l, err := feedFactory.NewLookup(*t, f) if err != nil { return nil, fmt.Errorf("feed lookup failed: %w", err) } diff --git a/pkg/replicas/export_test.go b/pkg/replicas/export_test.go index e8aee696a3a..341d51e1517 100644 --- a/pkg/replicas/export_test.go +++ b/pkg/replicas/export_test.go @@ -4,6 +4,4 @@ package replicas -var ( - Signer = signer -) +var Signer = signer diff --git a/pkg/replicas/getter_soc.go b/pkg/replicas/getter_soc.go index 93d30854db0..c0cdfdb8420 100644 --- a/pkg/replicas/getter_soc.go +++ b/pkg/replicas/getter_soc.go @@ -17,12 +17,12 @@ import ( "github.com/ethersphere/bee/v2/pkg/swarm" ) -// getter is the private implementation of storage.Getter, an interface for +// socGetter is the private implementation of storage.Getter, an interface for // retrieving chunks. This getter embeds the original simple chunk getter and extends it // to a multiplexed variant that fetches chunks with replicas for SOC. // // the strategy to retrieve a chunk that has replicas can be configured with a few parameters: -// - SOCRetryInterval: the delay before a new batch of replicas is fetched. +// - RetryInterval: the delay before a new batch of replicas is fetched. // - depth: 2^{depth} is the total number of additional replicas that have been uploaded // (by default, it is assumed to be 4, ie. total of 16) // - (not implemented) pivot: replicas with address in the proximity of pivot will be tried first @@ -31,8 +31,6 @@ type socGetter struct { level redundancy.Level } -var SOCRetryInterval = 300 * time.Millisecond - // NewSocGetter is the getter constructor func NewSocGetter(g storage.Getter, level redundancy.Level) storage.Getter { return &socGetter{Getter: g, level: level} @@ -55,9 +53,7 @@ func (g *socGetter) Get(ctx context.Context, addr swarm.Address) (ch swarm.Chunk errcnt := 0 // concurrently call to retrieve chunk using original SOC address - wg.Add(1) - go func() { - defer wg.Done() + wg.Go(func() { ch, err := g.Getter.Get(ctx, addr) if err != nil { errc <- err @@ -68,7 +64,7 @@ func (g *socGetter) Get(ctx context.Context, addr swarm.Address) (ch swarm.Chunk case resultC <- ch: case <-ctx.Done(): } - }() + }) // counters n := 0 // counts the replica addresses tried target := 2 // the number of replicas attempted to download in this batch @@ -109,9 +105,7 @@ func (g *socGetter) Get(ctx context.Context, addr swarm.Address) (ch swarm.Chunk continue } - wg.Add(1) - go func() { - defer wg.Done() + wg.Go(func() { ch, err := g.Getter.Get(ctx, swarm.NewAddress(so.addr)) if err != nil { errc <- err @@ -122,13 +116,13 @@ func (g *socGetter) Get(ctx context.Context, addr swarm.Address) (ch swarm.Chunk case resultC <- ch: case <-ctx.Done(): } - }() + }) n++ if n < target { continue } next = nil - wait = time.After(SOCRetryInterval) + wait = time.After(RetryInterval) } } diff --git a/pkg/replicas/getter_soc_test.go b/pkg/replicas/getter_soc_test.go index ed3e0c6125e..84428bbe18c 100644 --- a/pkg/replicas/getter_soc_test.go +++ b/pkg/replicas/getter_soc_test.go @@ -106,9 +106,9 @@ func TestSOCGetter(t *testing.T) { } // reset retry interval to speed up tests - retryInterval := replicas.SOCRetryInterval - defer func() { replicas.SOCRetryInterval = retryInterval }() - replicas.SOCRetryInterval = 100 * time.Millisecond + retryInterval := replicas.RetryInterval + defer func() { replicas.RetryInterval = retryInterval }() + replicas.RetryInterval = 100 * time.Millisecond // run the tests for _, tc := range tests { @@ -120,7 +120,7 @@ func TestSOCGetter(t *testing.T) { store.now = time.Now() ctx, cancel := context.WithCancel(context.Background()) if tc.found > tc.count { - wait := replicas.SOCRetryInterval / 2 * time.Duration(1+2*tc.level) + wait := replicas.RetryInterval / 2 * time.Duration(1+2*tc.level) go func() { time.Sleep(wait) cancel() @@ -197,7 +197,7 @@ func TestSOCGetter(t *testing.T) { }) t.Run("dispersion", func(t *testing.T) { - if err := dispersed(redundancy.Level(tc.level), ch, addresses); err != nil { + if err := dispersed(redundancy.Level(tc.level), addresses); err != nil { t.Fatalf("addresses are not dispersed: %v", err) } }) @@ -205,7 +205,7 @@ func TestSOCGetter(t *testing.T) { t.Run("latency", func(t *testing.T) { counts := redundancy.GetReplicaCounts() for i, latency := range latencies { - multiplier := latency / replicas.SOCRetryInterval + multiplier := latency / replicas.RetryInterval if multiplier > 0 && i < counts[multiplier-1] { t.Fatalf("incorrect latency for retrieving replica %d: %v", i, err) } diff --git a/pkg/replicas/getter_test.go b/pkg/replicas/getter_test.go index b11a55d12c8..dcfdc0e068e 100644 --- a/pkg/replicas/getter_test.go +++ b/pkg/replicas/getter_test.go @@ -241,7 +241,7 @@ func TestGetter(t *testing.T) { }) t.Run("dispersion", func(t *testing.T) { - if err := dispersed(redundancy.Level(tc.level), ch, addresses); err != nil { + if err := dispersed(redundancy.Level(tc.level), addresses); err != nil { t.Fatalf("addresses are not dispersed: %v", err) } }) diff --git a/pkg/replicas/putter_soc.go b/pkg/replicas/putter_soc.go index d9a70f02250..dab3065f741 100644 --- a/pkg/replicas/putter_soc.go +++ b/pkg/replicas/putter_soc.go @@ -9,6 +9,7 @@ package replicas import ( "context" "errors" + "fmt" "sync" "github.com/ethersphere/bee/v2/pkg/file/redundancy" @@ -37,7 +38,7 @@ func (p *socPutter) Put(ctx context.Context, ch swarm.Chunk) error { errs := []error{} // Put base chunk first if err := p.putter.Put(ctx, ch); err != nil { - return err + return fmt.Errorf("soc putter: put base chunk: %w", err) } if p.rLevel == 0 { return nil @@ -47,16 +48,13 @@ func (p *socPutter) Put(ctx context.Context, ch swarm.Chunk) error { errc := make(chan error, p.rLevel.GetReplicaCount()) wg := sync.WaitGroup{} for r := range rr.c { - wg.Add(1) - go func(r *socReplica) { - defer wg.Done() + wg.Go(func() { // create a new chunk with the replica address sch := swarm.NewChunk(swarm.NewAddress(r.addr), ch.Data()) - err := p.putter.Put(ctx, sch) - if err != nil { + if err := p.putter.Put(ctx, sch); err != nil { errc <- err } - }(r) + }) } wg.Wait() diff --git a/pkg/replicas/putter_soc_test.go b/pkg/replicas/putter_soc_test.go index e0315eadc42..365597335e7 100644 --- a/pkg/replicas/putter_soc_test.go +++ b/pkg/replicas/putter_soc_test.go @@ -127,7 +127,7 @@ func TestSocPutter(t *testing.T) { t.Fatal("original chunk missing") } t.Run("dispersion", func(t *testing.T) { - if err := dispersed(tc.level, ch, addrs); err != nil { + if err := dispersed(tc.level, addrs); err != nil { t.Fatalf("addresses are not dispersed: %v", err) } }) diff --git a/pkg/replicas/putter_test.go b/pkg/replicas/putter_test.go index cee4960f5b0..a6f7d85b873 100644 --- a/pkg/replicas/putter_test.go +++ b/pkg/replicas/putter_test.go @@ -104,7 +104,7 @@ func TestPutter(t *testing.T) { t.Fatal("original chunk missing") } t.Run("dispersion", func(t *testing.T) { - if err := dispersed(tc.level, ch, addrs); err != nil { + if err := dispersed(tc.level, addrs); err != nil { t.Fatalf("addresses are not dispersed: %v", err) } }) diff --git a/pkg/replicas/replica_test.go b/pkg/replicas/replica_test.go index 3a253f24f5b..9e11cdfa102 100644 --- a/pkg/replicas/replica_test.go +++ b/pkg/replicas/replica_test.go @@ -17,7 +17,7 @@ import ( ) // dispersed verifies that a set of addresses are maximally dispersed without repetition -func dispersed(level redundancy.Level, ch swarm.Chunk, addrs []swarm.Address) error { +func dispersed(level redundancy.Level, addrs []swarm.Address) error { nhoods := make(map[byte]bool) for _, addr := range addrs { diff --git a/pkg/replicas/replicas.go b/pkg/replicas/replicas.go index f6f7328ce87..20b3e00cc6d 100644 --- a/pkg/replicas/replicas.go +++ b/pkg/replicas/replicas.go @@ -40,10 +40,12 @@ func newReplicator(addr swarm.Address, rLevel redundancy.Level) *replicator { rr := &replicator{ addr: addr.Bytes(), sizes: redundancy.GetReplicaCounts(), - c: make(chan *replica, 16), + c: make(chan *replica, rLevel.GetReplicaCount()), rLevel: rLevel, } + go rr.replicas() + return rr } diff --git a/pkg/replicas/replicas_soc.go b/pkg/replicas/replicas_soc.go index d539a2c5cb0..6887caae7b5 100644 --- a/pkg/replicas/replicas_soc.go +++ b/pkg/replicas/replicas_soc.go @@ -29,7 +29,9 @@ func newSocReplicator(addr swarm.Address, rLevel redundancy.Level) *socReplicato c: make(chan *socReplica, rLevel.GetReplicaCount()), rLevel: rLevel, } + go rr.replicas() + return rr } @@ -74,7 +76,7 @@ func (rr *socReplicator) replicas() { // For example, mirrorBitsToMSB(0b00001101, 4) == 0b10110000 func mirrorBitsToMSB(v byte, n uint8) byte { var res byte - for i := uint8(0); i < n; i++ { + for i := range n { if (v & (1 << i)) != 0 { res |= (1 << (7 - i)) } diff --git a/pkg/replicas/replicas_soc_test.go b/pkg/replicas/replicas_soc_test.go new file mode 100644 index 00000000000..8e143820de3 --- /dev/null +++ b/pkg/replicas/replicas_soc_test.go @@ -0,0 +1,210 @@ +// Copyright 2025 The Swarm Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package replicas + +import ( + "fmt" + "testing" + + "github.com/ethersphere/bee/v2/pkg/file/redundancy" + "github.com/ethersphere/bee/v2/pkg/swarm" +) + +func TestMirrorBitsToMSB(t *testing.T) { + t.Parallel() + + tests := []struct { + v byte + n uint8 + expected byte + }{ + {0b00001101, 4, 0b10110000}, // Example from comment + {0b00000001, 1, 0b10000000}, + {0b00001111, 4, 0b11110000}, + {0b00000000, 4, 0b00000000}, + } + + for _, tt := range tests { + t.Run(fmt.Sprintf("v=%b_n=%d", tt.v, tt.n), func(t *testing.T) { + t.Parallel() + if got := mirrorBitsToMSB(tt.v, tt.n); got != tt.expected { + t.Errorf("mirrorBitsToMSB(%b, %d) = %b, want %b", tt.v, tt.n, got, tt.expected) + } + }) + } +} + +func TestCountBitsRequired(t *testing.T) { + t.Parallel() + + tests := []struct { + v uint8 + expected uint8 + }{ + {0, 1}, // Special case + {1, 1}, // 1 bit + {3, 2}, // 2 bits + {7, 3}, // 3 bits + {15, 4}, // 4 bits + {255, 8}, // 8 bits + } + + for _, tt := range tests { + t.Run(fmt.Sprintf("v=%d", tt.v), func(t *testing.T) { + t.Parallel() + if got := countBitsRequired(tt.v); got != tt.expected { + t.Errorf("countBitsRequired(%d) = %d, want %d", tt.v, got, tt.expected) + } + }) + } +} + +func TestReplicate_Line48(t *testing.T) { + t.Parallel() + + // Test line 48: addr[0] &= 0xFF >> bitsRequired + // This clears the first bitsRequired MSBs + baseAddr := swarm.MustParseHexAddress("FF00000000000000000000000000000000000000000000000000000000000000") + + tests := []struct { + bitsRequired uint8 + expectedMask byte + }{ + {1, 0x7F}, // 0b01111111 + {2, 0x3F}, // 0b00111111 + {4, 0x0F}, // 0b00001111 + } + + for _, tt := range tests { + t.Run(fmt.Sprintf("bits=%d", tt.bitsRequired), func(t *testing.T) { + t.Parallel() + // Test the mask calculation + mask := byte(0xFF >> tt.bitsRequired) + if mask != tt.expectedMask { + t.Errorf("mask = %b, want %b", mask, tt.expectedMask) + } + + // Test that applying the mask clears the MSBs + addr := make([]byte, 32) + copy(addr, baseAddr.Bytes()) + addr[0] &= mask + + if addr[0] != tt.expectedMask { + t.Errorf("after mask: addr[0] = %b, want %b", addr[0], tt.expectedMask) + } + + // Verify MSBs are cleared + msbMask := byte(0xFF) << (8 - tt.bitsRequired) + if addr[0]&msbMask != 0 { + t.Errorf("first %d bits should be zero, got %b", tt.bitsRequired, addr[0]) + } + }) + } +} + +func TestReplicate(t *testing.T) { + t.Parallel() + + baseAddr := swarm.MustParseHexAddress("1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef") + replicator := &socReplicator{addr: baseAddr.Bytes(), rLevel: redundancy.MEDIUM} + + tests := []struct { + i uint8 + bitsRequired uint8 + }{ + {0, 1}, + {1, 2}, + {3, 4}, + } + + for _, tt := range tests { + t.Run(fmt.Sprintf("i=%d_bits=%d", tt.i, tt.bitsRequired), func(t *testing.T) { + t.Parallel() + replica := replicator.replicate(tt.i, tt.bitsRequired) + + // Verify nonce matches first byte + if replica.nonce != replica.addr[0] { + t.Errorf("nonce = %d, want %d", replica.nonce, replica.addr[0]) + } + + // Verify remaining bytes unchanged + for i := 1; i < len(replica.addr); i++ { + if replica.addr[i] != baseAddr.Bytes()[i] { + t.Errorf("byte[%d] changed: got %d, want %d", i, replica.addr[i], baseAddr.Bytes()[i]) + } + } + + // Verify first byte differs from original (or was modified) + if replica.addr[0] == baseAddr.Bytes()[0] { + // This is okay if the code explicitly handles this case (line 50-52) + // But we should verify the logic worked + mask := byte(0xFF >> tt.bitsRequired) + mirroredBits := mirrorBitsToMSB(tt.i, tt.bitsRequired) + expected := (baseAddr.Bytes()[0] & mask) | mirroredBits + if expected == baseAddr.Bytes()[0] { + // Original would have been flipped, so replica should differ + if replica.addr[0] == baseAddr.Bytes()[0] { + t.Errorf("replica first byte should differ from original") + } + } + } + }) + } +} + +func TestReplicas(t *testing.T) { + t.Parallel() + + baseAddr := swarm.MustParseHexAddress("1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef") + + for _, rLevel := range []redundancy.Level{redundancy.MEDIUM, redundancy.STRONG, redundancy.INSANE, redundancy.DefaultLevel} { + t.Run(fmt.Sprintf("level_%d", rLevel), func(t *testing.T) { + t.Parallel() + + replicator := newSocReplicator(baseAddr, rLevel) + var replicas []*socReplica + for r := range replicator.c { + replicas = append(replicas, r) + } + + // Verify count + if len(replicas) != rLevel.GetReplicaCount() { + t.Errorf("got %d replicas, want %d", len(replicas), rLevel.GetReplicaCount()) + } + + // Verify structure and uniqueness + seen := make(map[string]bool) + for i, r := range replicas { + if len(r.addr) != 32 { + t.Errorf("replica %d: invalid address length", i) + } + if r.nonce != r.addr[0] { + t.Errorf("replica %d: nonce mismatch", i) + } + // Verify remaining bytes unchanged + for j := 1; j < 32; j++ { + if r.addr[j] != baseAddr.Bytes()[j] { + t.Errorf("replica %d: byte[%d] changed", i, j) + } + } + // Check uniqueness + addrStr := string(r.addr) + if seen[addrStr] { + t.Errorf("replica %d: duplicate address", i) + } + seen[addrStr] = true + } + + // Verify dispersion (at least some first bytes differ) + firstBytes := make(map[byte]bool) + for _, r := range replicas { + firstBytes[r.addr[0]] = true + } + if len(firstBytes) < len(replicas)/2 { + t.Errorf("poor dispersion: only %d unique first bytes", len(firstBytes)) + } + }) + } +} diff --git a/pkg/soc/validator.go b/pkg/soc/validator.go index b767572c579..84b0565ad55 100644 --- a/pkg/soc/validator.go +++ b/pkg/soc/validator.go @@ -26,10 +26,11 @@ func Valid(ch swarm.Chunk) bool { if err != nil { return false } - defaultSoc := ch.Address().Equal(address) - if !defaultSoc { - // check whether the SOC chunk is a replica + + // if the address does not match the chunk address, check if it is a disperse replica + if !ch.Address().Equal(address) { return bytes.Equal(ch.Address().Bytes()[1:32], address.Bytes()[1:32]) } + return true } diff --git a/pkg/topology/kademlia/kademlia_test.go b/pkg/topology/kademlia/kademlia_test.go index 4d534ddade8..2fa975adac7 100644 --- a/pkg/topology/kademlia/kademlia_test.go +++ b/pkg/topology/kademlia/kademlia_test.go @@ -2244,7 +2244,7 @@ func generateMultipleUnderlays(t *testing.T, n int, baseUnderlay string) []ma.Mu t.Helper() underlays := make([]ma.Multiaddr, n) - for i := 0; i < n; i++ { + for i := range n { multiaddr, err := ma.NewMultiaddr(baseUnderlay + strconv.Itoa(i)) if err != nil { t.Fatal(err) From 1dd14557a0aba8d03129ba8616c3cdbeca870f27 Mon Sep 17 00:00:00 2001 From: Ljubisa Gacevic Date: Mon, 10 Nov 2025 14:25:35 +0100 Subject: [PATCH 38/62] chore: add QUESTIONS.md --- pkg/api/bzz.go | 17 +-- pkg/api/feed.go | 5 +- pkg/api/soc.go | 10 +- pkg/replicas/QUESTIONS.md | 227 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 239 insertions(+), 20 deletions(-) create mode 100644 pkg/replicas/QUESTIONS.md diff --git a/pkg/api/bzz.go b/pkg/api/bzz.go index f84b0a51fc5..7174af9dc2c 100644 --- a/pkg/api/bzz.go +++ b/pkg/api/bzz.go @@ -61,6 +61,13 @@ func lookaheadBufferSize(size int64) int { return largeFileBufferSize } +func getRedundancyLevel(rLevel *redundancy.Level) redundancy.Level { + if rLevel != nil { + return *rLevel + } + return redundancy.PARANOID +} + func (s *Service) bzzUploadHandler(w http.ResponseWriter, r *http.Request) { span, logger, ctx := s.tracer.StartSpanFromContext(r.Context(), "post_bzz", s.logger.WithName("post_bzz").Build()) defer span.Finish() @@ -401,10 +408,7 @@ func (s *Service) serveReference(logger log.Logger, address swarm.Address, pathV cache = *headers.Cache } - rLevel := redundancy.PARANOID - if headers.RLevel != nil { - rLevel = *headers.RLevel - } + rLevel := getRedundancyLevel(headers.RLevel) ctx := r.Context() g := s.storer.Download(cache) @@ -624,10 +628,7 @@ func (s *Service) downloadHandler(logger log.Logger, w http.ResponseWriter, r *h jsonhttp.BadRequest(w, "could not parse headers") return } - rLevel := redundancy.PARANOID - if headers.RLevel != nil { - rLevel = *headers.RLevel - } + rLevel := getRedundancyLevel(headers.RLevel) var ( reader file.Joiner diff --git a/pkg/api/feed.go b/pkg/api/feed.go index 4941165d693..ca77f2b6e23 100644 --- a/pkg/api/feed.go +++ b/pkg/api/feed.go @@ -239,10 +239,7 @@ func (s *Service) feedPostHandler(w http.ResponseWriter, r *http.Request) { logger: logger, } - rLevel := redundancy.PARANOID - if headers.RedundancyLevel != nil { - rLevel = *headers.RedundancyLevel - } + rLevel := getRedundancyLevel(headers.RedundancyLevel) l := loadsave.New(s.storer.ChunkStore(), s.storer.Cache(), requestPipelineFactory(r.Context(), putter, false, 0), rLevel) feedManifest, err := manifest.NewDefaultManifest(l, false) diff --git a/pkg/api/soc.go b/pkg/api/soc.go index cd4d893f531..f4d46abf453 100644 --- a/pkg/api/soc.go +++ b/pkg/api/soc.go @@ -73,10 +73,7 @@ func (s *Service) socUploadHandler(w http.ResponseWriter, r *http.Request) { err error ) - rLevel := redundancy.PARANOID - if headers.RLevel != nil { - rLevel = *headers.RLevel - } + rLevel := getRedundancyLevel(headers.RLevel) if len(headers.StampSig) != 0 { if headers.RLevel != nil { @@ -262,10 +259,7 @@ func (s *Service) socGetHandler(w http.ResponseWriter, r *http.Request) { return } - rLevel := redundancy.PARANOID - if headers.RLevel != nil { - rLevel = *headers.RLevel - } + rLevel := getRedundancyLevel(headers.RLevel) address, err := soc.CreateAddress(paths.ID, paths.Owner) if err != nil { diff --git a/pkg/replicas/QUESTIONS.md b/pkg/replicas/QUESTIONS.md new file mode 100644 index 00000000000..9bbbefe3d14 --- /dev/null +++ b/pkg/replicas/QUESTIONS.md @@ -0,0 +1,227 @@ +# Replicas Package - Research Questions + +Questions about the design and implementation of the `replicas` package. + +## 1. Error Handling in Put Operations + +**Question:** What is the expected behavior when some replica `Put` operations succeed and others fail? + +**Why this question matters:** + +- In `putter.go:36-60` and `putter_soc.go:37-65`, all errors are collected and joined +- The function returns `errors.Join(errs...)` which includes all errors +- If 15 out of 16 replicas succeed, is this considered a success or failure? +- The caller receives all errors but may not know which replicas were successfully stored + +**Current Behavior:** + +- All errors are collected and returned together +- No distinction between partial success and complete failure +- Caller must inspect the joined error to determine success rate + +**Answer/Suggestion:** + +- Is partial replication acceptable? **This must be validated against Book of Swarm probability calculations** +- Consider returning a structured result with success count and errors +- If partial replication is acceptable, document the impact on reliability guarantees + +**Viktor**: Return apprpriate answer, and how many was expected/succeeded? We should decide if error is appropriate! + +--- + +## 2. Context Cancellation in Put Operations + +**Question:** Are `Put` operations properly respecting context cancellation? + +**Why this question matters:** + +- In `putter.go` and `putter_soc.go`, the context is passed to `Put` calls +- However, if the context is cancelled, all goroutines continue running until completion +- There's no early termination when context is cancelled +- This could lead to wasted resources if the caller cancels the operation + +**Current Implementation:** + +- Context is passed to `p.putter.Put(ctx, sch)` but cancellation is not checked +- `wg.Wait()` waits for all goroutines regardless of context state + +**Answer/Suggestion:** + +- Check `ctx.Done()` in the goroutine loop +- Cancel remaining operations when context is cancelled +- **Important**: If cancellation is allowed, document the impact on replica count and reliability guarantees + +**Viktor**: User should be able to cancel. + +## 3. Swarmageddon Error Strategy + +**Question:** Is the "Swarmageddon" error approach the right way to handle complete replica retrieval failure? + +**Why this question matters:** + +- `ErrSwarmageddon` is returned when all replicas fail to retrieve +- The error message suggests this is an extremely rare event +- However, the error handling doesn't distinguish between temporary network issues and permanent data loss +- **Error Message Clarity**: The "Swarmageddon" term is not clear to users. The error message "swarmageddon has begun" doesn't explain what happened or what it means +- **Semantic Confusion**: The term "Swarmageddon" historically refers to complete data loss on the entire network, but the code uses it when all replicas of a single chunk fail to retrieve +- **Scope Question**: Is the extremely rare event (all replicas of one chunk failing) equivalent to assuming that data on the whole network is lost? Or is it just a local retrieval failure for that specific chunk? +- **User Experience**: Users receiving this error may not understand: + - Whether this is a temporary issue or permanent data loss + - Whether it affects just their chunk or the entire network + - What actions they can take (retry? report? accept loss?) + +The question should be validated with the research team to: + +- Clarify the intended meaning of "Swarmageddon" +- Determine if the error message should be more descriptive +- Decide if the term should be changed to avoid confusion +- Establish whether retry logic should be implemented + +**Viktor**: Return apprpriate answer, and how many was expected/succeeded? We should decide if error is appropriate! + +--- + +## 4. Concurrent Put Operations with Disk I/O + +**Question:** Does it make sense to use concurrent `Put` operations when the underlying storage layer performs disk I/O operations that are serialized? + +**Why this question matters:** + +- The `putter.go` and `putter_soc.go` implementations use `sync.WaitGroup` to concurrently call `Put` for all replicas +- However, the underlying storage layer has multiple serialization points: + - **Upload Store Global Lock**: `pkg/storer/uploadstore.go:74` uses `db.Lock(uploadsLock)` which serializes all upload operations + - **Sharky Serialization**: `pkg/sharky/shard.go` processes writes sequentially per shard through channels + - **Transaction Locking**: `pkg/storer/internal/transaction/transaction.go:237` locks per chunk address + +**Current Behavior:** + +- Multiple goroutines are spawned to call `Put` concurrently +- All goroutines serialize at the upload store lock +- No actual parallelism is achieved +- Overhead of goroutine creation and context switching without benefit + +**Answer/Suggestion:** + +- If the global lock is intentional for consistency, consider making `Put` operations sequential to reduce overhead + +**Viktor**: Use sequential approach. + +--- + +## 5. Goroutine Explosion with Multiple Chunks + +**Question:** Is there a risk of goroutine explosion when processing multiple chunks concurrently, and should there be a limit on concurrent replica operations? + +**Why this question matters:** + +- Both `Put` and `Get` operations spawn multiple goroutines per chunk +- Both CAC and SOC implementations have the same goroutine spawning pattern +- Multiple chunks can be processed concurrently from various sources +- `Get` and `Put` operations can happen simultaneously, compounding the goroutine count + +**Concurrent Scenarios:** + +**PUT Operations:** + +- **Pusher Service**: `pkg/pusher/pusher.go:66` allows `ConcurrentPushes = swarm.Branches = 128` concurrent chunk pushes +- **API SOC Uploads**: `pkg/api/soc.go:112` - SOC chunk uploads via API (multiple concurrent clients) +- **API Chunk Stream**: `pkg/api/chunk_stream.go:200` - WebSocket chunk stream uploads (multiple concurrent clients) +- **File Uploads**: `pkg/file/pipeline/hashtrie/hashtrie.go:53` - File upload pipeline (root chunk replicas) + +**GET Operations:** + +- **File Joiner**: `pkg/file/joiner/joiner.go:135` - Uses `replicas.NewGetter` for root chunk retrieval +- **API Feed Retrieval**: `pkg/api/feed.go:80` - Uses `replicas.NewSocGetter` for feed chunk retrieval +- **API SOC Retrieval**: `pkg/api/soc.go:279` - Uses `replicas.NewSocGetter` for SOC chunk retrieval +- **Puller Service**: Chunks being pulled from network (multiple concurrent pulls) +- **Multiple concurrent API clients** requesting chunks simultaneously + +**Goroutine Calculation Examples:** + +**PUT Operations (Worst Case - PARANOID level):** + +- 128 concurrent chunks (from pusher) × (16 replicas + 1 replicator) = **2,176 goroutines** +- Additional concurrent uploads from API clients can add more + +**GET Operations (Worst Case - PARANOID level):** + +- 128 concurrent Get calls × (1 original + 16 replicas + 1 replicator) = **2,304 goroutines** +- Note: Get operations spawn goroutines in batches, but if early batches fail, all goroutines can accumulate + +**Combined Worst Case:** + +- 128 concurrent Put + 128 concurrent Get = **4,480+ goroutines** just from the replicas package +- Plus goroutines from: + - Other system components + - Network I/O operations + - Storage layer operations + +**Current Behavior:** + +- No limit on concurrent `Put` operations across chunks +- No limit on concurrent `Get` operations across chunks +- No limit on total goroutines spawned by the replicas package +- Each chunk upload spawns all replicas concurrently (no batching) +- Each chunk retrieval spawns replicas in batches, but batches can accumulate +- No backpressure mechanism to prevent goroutine explosion +- The upload store's global lock (`uploadsLock`) serializes Put operations but doesn't prevent goroutine accumulation + +**Answer/Suggestion:** + +- Consider implementing a semaphore or worker pool to limit concurrent replica operations globally +- Add a global limit on concurrent `Put` operations across all chunks +- Add a global limit on concurrent `Get` operations across all chunks +- Consider sequential `Put` operations per chunk to reduce goroutine count (though this may impact performance) +- Consider limiting the number of concurrent Get batches that can be in-flight +- Monitor goroutine count in production to validate if this is a real issue +- Consider if the upload store's global lock already provides sufficient backpressure (it serializes but doesn't limit goroutine count) + +**Viktor** - Invesigate limits before we introduce limitations. + +--- + +## 6. Goroutine Usage in socReplicator + +**Question:** Is the goroutine in `socReplicator` necessary, and could the replica address generation functionality be exported for external verification tools like beekeeper? + +**Why this question matters:** + +- The `socReplicator` (see `pkg/replicas/replicas_soc.go:25-36`) uses a goroutine to generate replica addresses +- The computation is trivial: simple bit manipulation operations that generate at most 16 addresses +- The goroutine overhead may exceed the computation time for such simple operations +- The address generation is deterministic and could be exported for external verification tools + +**Benchmark Results:** + +Benchmark tests comparing synchronous vs asynchronous implementations show: + +- **4.8x faster**: 299 ns/op (sync) vs 1,427 ns/op (async) +- **33% less memory**: 896 B/op (sync) vs 1,328 B/op (async) +- **53% fewer allocations**: 17 allocs/op (sync) vs 36 allocs/op (async) + +The goroutine overhead significantly outweighs the trivial bit manipulation work. + +**Answer/Suggestion:** + +- Make `socReplicator` address generation synchronous (remove goroutine) +- Export a function like `GenerateSocReplicaAddresses(addr swarm.Address, level redundancy.Level) []swarm.Address` for external use +- This would allow beekeeper and other verification tools to independently calculate and verify replica addresses + +**Viktor** Improve to use it without worker. + +--- + +## 7. Exponential Backoff Strategy in Get Operations + +**Question:** Is the exponential doubling strategy (2, 4, 8, 16 replicas per batch) optimal for retrieval? + +**Why this question matters:** + +- In `getter.go:79` and `getter_soc.go:70`, the number of replicas attempted doubles each `RetryInterval` +- This means: try 2, wait 300ms, try 4 more, wait 300ms, try 8 more, etc. +- The strategy assumes that if early replicas fail, more should be tried +- However, this might delay successful retrieval if early batches fail due to temporary issues + +**Viktor**: If Redundancy Level is specified on PUT, most efficient is to use that exact one on GET. Leave it as is. + +--- From 4a3dc7be73f99b6801c75249c42d40d66df22e01 Mon Sep 17 00:00:00 2001 From: Ljubisa Gacevic Date: Tue, 11 Nov 2025 10:09:55 +0100 Subject: [PATCH 39/62] refactor(replicas): refactor socReplicator from channel-based to slice-based --- pkg/replicas/getter.go | 2 +- pkg/replicas/getter_soc.go | 131 +++++++++++++++--------------- pkg/replicas/putter_soc.go | 6 +- pkg/replicas/replicas_soc.go | 47 +++++------ pkg/replicas/replicas_soc_test.go | 35 +++----- 5 files changed, 102 insertions(+), 119 deletions(-) diff --git a/pkg/replicas/getter.go b/pkg/replicas/getter.go index 6eeb57c82bf..276bcbc6475 100644 --- a/pkg/replicas/getter.go +++ b/pkg/replicas/getter.go @@ -57,7 +57,7 @@ func (g *getter) Get(ctx context.Context, addr swarm.Address) (ch swarm.Chunk, e // workers each fetching a replica resultC := make(chan swarm.Chunk) // errc collects the errors - errc := make(chan error, 17) + errc := make(chan error, g.level.GetReplicaCount()+1) var errs error errcnt := 0 diff --git a/pkg/replicas/getter_soc.go b/pkg/replicas/getter_soc.go index c0cdfdb8420..a408d306293 100644 --- a/pkg/replicas/getter_soc.go +++ b/pkg/replicas/getter_soc.go @@ -37,24 +37,24 @@ func NewSocGetter(g storage.Getter, level redundancy.Level) storage.Getter { } // Get makes the socGetter satisfy the storage.Getter interface +// It attempts to fetch the chunk by its original address first. +// If the original address does not return a result within RetryInterval, +// it starts dispatching exponentially growing batches of replica requests +// at each RetryInterval until a chunk is found or all replicas are tried. func (g *socGetter) Get(ctx context.Context, addr swarm.Address) (ch swarm.Chunk, err error) { ctx, cancel := context.WithCancel(ctx) defer cancel() var wg sync.WaitGroup - defer wg.Wait() + replicas := NewSocReplicator(addr, g.level).Replicas() - // channel that the results (retrieved chunks) are gathered to from concurrent - // workers each fetching a replica resultC := make(chan swarm.Chunk) - // errc collects the errors - errc := make(chan error, g.level.GetReplicaCount()+1) - var errs error - errcnt := 0 + errc := make(chan error, 1+len(replicas)) + + worker := func(chunkAddr swarm.Address) { + defer wg.Done() - // concurrently call to retrieve chunk using original SOC address - wg.Go(func() { - ch, err := g.Getter.Get(ctx, addr) + ch, err := g.Getter.Get(ctx, chunkAddr) if err != nil { errc <- err return @@ -64,67 +64,70 @@ func (g *socGetter) Get(ctx context.Context, addr swarm.Address) (ch swarm.Chunk case resultC <- ch: case <-ctx.Done(): } - }) - // counters - n := 0 // counts the replica addresses tried - target := 2 // the number of replicas attempted to download in this batch - total := g.level.GetReplicaCount() - - // - rr := newSocReplicator(addr, g.level) - next := rr.c - var wait <-chan time.Time // nil channel to disable case - // addresses used are doubling each period of search expansion - // (at intervals of RetryInterval) - for level := uint8(0); level <= uint8(g.level); { - select { - // at least one chunk is retrieved, cancel the rest and return early - case chunk := <-resultC: - cancel() - return chunk, nil + } - case err = <-errc: - errs = errors.Join(errs, err) - errcnt++ - if errcnt > total { - return nil, errors.Join(ErrSwarmageddon, errs) - } + // try the original address + wg.Add(1) + go worker(addr) - // ticker switches on the address channel - case <-wait: - next = rr.c - level++ - target = 1 << level - n = 0 - continue - - // getting the addresses in order - case so := <-next: - if so == nil { - next = nil - continue - } + // This goroutine waits for RetryInterval, then dispatches the first + // batch, waits again, dispatches the second, and so on. + go func() { + replicaIndex := 0 + batchLevel := uint8(1) // start with 1 (batch size 1 << 1 = 2) - wg.Go(func() { - ch, err := g.Getter.Get(ctx, swarm.NewAddress(so.addr)) - if err != nil { - errc <- err - return + timer := time.NewTimer(RetryInterval) + defer timer.Stop() + + for { + select { + case <-timer.C: + batchSize := 1 << batchLevel // 2, 4, 8... + sentInBatch := 0 + + for sentInBatch < batchSize && replicaIndex < len(replicas) { + addr := replicas[replicaIndex] + replicaIndex++ + sentInBatch++ + + wg.Add(1) + go worker(addr) } - select { - case resultC <- ch: - case <-ctx.Done(): + if replicaIndex >= len(replicas) { + // all replicas have been dispatched + return } - }) - n++ - if n < target { - continue + + // reset timer for the next batch + batchLevel++ + timer.Reset(RetryInterval) + + case <-ctx.Done(): + return } - next = nil - wait = time.After(RetryInterval) } - } + }() + + // collect results + waitC := make(chan struct{}) + go func() { + wg.Wait() + close(waitC) + }() - return nil, nil + var errs error + for { + select { + case chunk := <-resultC: + cancel() // cancel the context to stop all other workers. + return chunk, nil + + case err := <-errc: + errs = errors.Join(errs, err) + + case <-waitC: + return nil, errors.Join(ErrSwarmageddon, errs) + } + } } diff --git a/pkg/replicas/putter_soc.go b/pkg/replicas/putter_soc.go index dab3065f741..43042be21fd 100644 --- a/pkg/replicas/putter_soc.go +++ b/pkg/replicas/putter_soc.go @@ -44,13 +44,13 @@ func (p *socPutter) Put(ctx context.Context, ch swarm.Chunk) error { return nil } - rr := newSocReplicator(ch.Address(), p.rLevel) + rr := NewSocReplicator(ch.Address(), p.rLevel) errc := make(chan error, p.rLevel.GetReplicaCount()) wg := sync.WaitGroup{} - for r := range rr.c { + for _, replicaAddr := range rr.Replicas() { wg.Go(func() { // create a new chunk with the replica address - sch := swarm.NewChunk(swarm.NewAddress(r.addr), ch.Data()) + sch := swarm.NewChunk(replicaAddr, ch.Data()) if err := p.putter.Put(ctx, sch); err != nil { errc <- err } diff --git a/pkg/replicas/replicas_soc.go b/pkg/replicas/replicas_soc.go index 6887caae7b5..822e30c28ee 100644 --- a/pkg/replicas/replicas_soc.go +++ b/pkg/replicas/replicas_soc.go @@ -18,31 +18,38 @@ import ( // socReplicator running the find for replicas type socReplicator struct { addr []byte // chunk address - c chan *socReplica rLevel redundancy.Level } -// newSocReplicator socReplicator constructor -func newSocReplicator(addr swarm.Address, rLevel redundancy.Level) *socReplicator { +// NewSocReplicator socReplicator constructor +func NewSocReplicator(addr swarm.Address, rLevel redundancy.Level) *socReplicator { rr := &socReplicator{ addr: addr.Bytes(), - c: make(chan *socReplica, rLevel.GetReplicaCount()), rLevel: rLevel, } - go rr.replicas() - return rr } -// socReplica of the mined SOC chunk (address) that serve as replicas -type socReplica struct { - addr []byte // byte slice of SOC address - nonce uint8 // byte of the mined nonce +// Replicas returns all replica addresses as a slice +// the order of replicas is so that addresses are always maximally dispersed +// in successive sets of addresses. +// I.e., the binary tree representing the new addresses prefix bits up to depth is balanced +func (rr *socReplicator) Replicas() []swarm.Address { + replicaCount := rr.rLevel.GetReplicaCount() + replicas := make([]swarm.Address, replicaCount) + // number of bits required to represent all replicas + bitsRequired := countBitsRequired(uint8(replicaCount - 1)) + // replicate iteration saturates all leading bits in generated addresses until bitsRequired + for i := uint8(0); i < uint8(replicaCount); i++ { + // create soc replica (with address and nonce) + replicas[i] = rr.replicate(i, bitsRequired) + } + return replicas } // replicate returns a replica params structure seeded with a byte of entropy as argument -func (rr *socReplicator) replicate(i uint8, bitsRequired uint8) (sp *socReplica) { +func (rr *socReplicator) replicate(i uint8, bitsRequired uint8) swarm.Address { addr := make([]byte, 32) copy(addr, rr.addr) mirroredBits := mirrorBitsToMSB(i, bitsRequired) @@ -53,23 +60,7 @@ func (rr *socReplicator) replicate(i uint8, bitsRequired uint8) (sp *socReplica) // xor MSB after the mirrored bits because the iteration found the original address addr[0] ^= 1 << (bitsRequired - 1) } - return &socReplica{addr: addr, nonce: addr[0]} -} - -// replicas enumerates replica parameters (nonce) pushing it in a channel given as argument -// the order of replicas is so that addresses are always maximally dispersed -// in successive sets of addresses. -// I.e., the binary tree representing the new addresses prefix bits up to depth is balanced -func (rr *socReplicator) replicas() { - defer close(rr.c) - // number of bits required to represent all replicas - bitsRequired := countBitsRequired(uint8(rr.rLevel.GetReplicaCount() - 1)) - // replicate iteration saturates all leading bits in generated addresses until bitsRequired - for i := uint8(0); i < uint8(rr.rLevel.GetReplicaCount()); i++ { - // create soc replica (with address and nonce) - r := rr.replicate(i, bitsRequired) - rr.c <- r - } + return swarm.NewAddress(addr) } // mirrorBitsToMSB mirrors the lowest n bits of v to the most significant bits of a byte. diff --git a/pkg/replicas/replicas_soc_test.go b/pkg/replicas/replicas_soc_test.go index 8e143820de3..2e69dae2117 100644 --- a/pkg/replicas/replicas_soc_test.go +++ b/pkg/replicas/replicas_soc_test.go @@ -124,20 +124,15 @@ func TestReplicate(t *testing.T) { t.Parallel() replica := replicator.replicate(tt.i, tt.bitsRequired) - // Verify nonce matches first byte - if replica.nonce != replica.addr[0] { - t.Errorf("nonce = %d, want %d", replica.nonce, replica.addr[0]) - } - // Verify remaining bytes unchanged - for i := 1; i < len(replica.addr); i++ { - if replica.addr[i] != baseAddr.Bytes()[i] { - t.Errorf("byte[%d] changed: got %d, want %d", i, replica.addr[i], baseAddr.Bytes()[i]) + for i := 1; i < len(replica.Bytes()); i++ { + if replica.Bytes()[i] != baseAddr.Bytes()[i] { + t.Errorf("byte[%d] changed: got %d, want %d", i, replica.Bytes()[i], baseAddr.Bytes()[i]) } } // Verify first byte differs from original (or was modified) - if replica.addr[0] == baseAddr.Bytes()[0] { + if replica.Bytes()[0] == baseAddr.Bytes()[0] { // This is okay if the code explicitly handles this case (line 50-52) // But we should verify the logic worked mask := byte(0xFF >> tt.bitsRequired) @@ -145,7 +140,7 @@ func TestReplicate(t *testing.T) { expected := (baseAddr.Bytes()[0] & mask) | mirroredBits if expected == baseAddr.Bytes()[0] { // Original would have been flipped, so replica should differ - if replica.addr[0] == baseAddr.Bytes()[0] { + if replica.Bytes()[0] == baseAddr.Bytes()[0] { t.Errorf("replica first byte should differ from original") } } @@ -159,15 +154,12 @@ func TestReplicas(t *testing.T) { baseAddr := swarm.MustParseHexAddress("1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef") - for _, rLevel := range []redundancy.Level{redundancy.MEDIUM, redundancy.STRONG, redundancy.INSANE, redundancy.DefaultLevel} { + for _, rLevel := range []redundancy.Level{redundancy.MEDIUM, redundancy.STRONG, redundancy.INSANE, redundancy.PARANOID} { t.Run(fmt.Sprintf("level_%d", rLevel), func(t *testing.T) { t.Parallel() - replicator := newSocReplicator(baseAddr, rLevel) - var replicas []*socReplica - for r := range replicator.c { - replicas = append(replicas, r) - } + replicator := NewSocReplicator(baseAddr, rLevel) + replicas := replicator.Replicas() // Verify count if len(replicas) != rLevel.GetReplicaCount() { @@ -177,20 +169,17 @@ func TestReplicas(t *testing.T) { // Verify structure and uniqueness seen := make(map[string]bool) for i, r := range replicas { - if len(r.addr) != 32 { + if len(r.Bytes()) != 32 { t.Errorf("replica %d: invalid address length", i) } - if r.nonce != r.addr[0] { - t.Errorf("replica %d: nonce mismatch", i) - } // Verify remaining bytes unchanged for j := 1; j < 32; j++ { - if r.addr[j] != baseAddr.Bytes()[j] { + if r.Bytes()[j] != baseAddr.Bytes()[j] { t.Errorf("replica %d: byte[%d] changed", i, j) } } // Check uniqueness - addrStr := string(r.addr) + addrStr := string(r.Bytes()) if seen[addrStr] { t.Errorf("replica %d: duplicate address", i) } @@ -200,7 +189,7 @@ func TestReplicas(t *testing.T) { // Verify dispersion (at least some first bytes differ) firstBytes := make(map[byte]bool) for _, r := range replicas { - firstBytes[r.addr[0]] = true + firstBytes[r.Bytes()[0]] = true } if len(firstBytes) < len(replicas)/2 { t.Errorf("poor dispersion: only %d unique first bytes", len(firstBytes)) From be5ba52e74ba322575ef1a06474a64385cae1491 Mon Sep 17 00:00:00 2001 From: Ljubisa Gacevic Date: Tue, 11 Nov 2025 13:25:40 +0100 Subject: [PATCH 40/62] fix: ensure original address first returns response --- pkg/replicas/getter_soc.go | 59 ++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 25 deletions(-) diff --git a/pkg/replicas/getter_soc.go b/pkg/replicas/getter_soc.go index a408d306293..c53a219ecd5 100644 --- a/pkg/replicas/getter_soc.go +++ b/pkg/replicas/getter_soc.go @@ -42,6 +42,15 @@ func NewSocGetter(g storage.Getter, level redundancy.Level) storage.Getter { // it starts dispatching exponentially growing batches of replica requests // at each RetryInterval until a chunk is found or all replicas are tried. func (g *socGetter) Get(ctx context.Context, addr swarm.Address) (ch swarm.Chunk, err error) { + // try original address first + ch, err = g.Getter.Get(ctx, addr) + if err == nil { + return ch, nil + } + + var errs error + errs = errors.Join(errs, err) + ctx, cancel := context.WithCancel(ctx) defer cancel() @@ -49,7 +58,7 @@ func (g *socGetter) Get(ctx context.Context, addr swarm.Address) (ch swarm.Chunk replicas := NewSocReplicator(addr, g.level).Replicas() resultC := make(chan swarm.Chunk) - errc := make(chan error, 1+len(replicas)) + errc := make(chan error, len(replicas)) worker := func(chunkAddr swarm.Address) { defer wg.Done() @@ -66,41 +75,42 @@ func (g *socGetter) Get(ctx context.Context, addr swarm.Address) (ch swarm.Chunk } } - // try the original address - wg.Add(1) - go worker(addr) - - // This goroutine waits for RetryInterval, then dispatches the first - // batch, waits again, dispatches the second, and so on. go func() { replicaIndex := 0 batchLevel := uint8(1) // start with 1 (batch size 1 << 1 = 2) + dispatchBatch := func() (done bool) { + batchSize := 1 << batchLevel // 2, 4, 8... + sentInBatch := 0 + + for sentInBatch < batchSize && replicaIndex < len(replicas) { + addr := replicas[replicaIndex] + replicaIndex++ + sentInBatch++ + + wg.Add(1) + go worker(addr) + } + + batchLevel++ + + // we are done if all replicas are sent + return replicaIndex >= len(replicas) + } + + if done := dispatchBatch(); done { + return + } + timer := time.NewTimer(RetryInterval) defer timer.Stop() for { select { case <-timer.C: - batchSize := 1 << batchLevel // 2, 4, 8... - sentInBatch := 0 - - for sentInBatch < batchSize && replicaIndex < len(replicas) { - addr := replicas[replicaIndex] - replicaIndex++ - sentInBatch++ - - wg.Add(1) - go worker(addr) - } - - if replicaIndex >= len(replicas) { - // all replicas have been dispatched + if done := dispatchBatch(); done { return } - - // reset timer for the next batch - batchLevel++ timer.Reset(RetryInterval) case <-ctx.Done(): @@ -116,7 +126,6 @@ func (g *socGetter) Get(ctx context.Context, addr swarm.Address) (ch swarm.Chunk close(waitC) }() - var errs error for { select { case chunk := <-resultC: From 999a5128b3c375d99f683e013a9ffcc0f8d16e68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jano=C5=A1=20Gulja=C5=A1?= Date: Wed, 12 Nov 2025 00:10:51 +0100 Subject: [PATCH 41/62] feat(combinator): add swarm address combinator package --- pkg/replicas/combinator/combinator.go | 130 ++++++++++++ pkg/replicas/combinator/combinator_test.go | 217 +++++++++++++++++++++ 2 files changed, 347 insertions(+) create mode 100644 pkg/replicas/combinator/combinator.go create mode 100644 pkg/replicas/combinator/combinator_test.go diff --git a/pkg/replicas/combinator/combinator.go b/pkg/replicas/combinator/combinator.go new file mode 100644 index 00000000000..f4a2eecde76 --- /dev/null +++ b/pkg/replicas/combinator/combinator.go @@ -0,0 +1,130 @@ +// Copyright 2025 The Swarm Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package combinator + +import ( + "iter" + "math/bits" + + "github.com/ethersphere/bee/v2/pkg/swarm" +) + +// maxDepth defines the maximum depth of the combination generation, serving as a +// safeguard against excessive memory allocation and computation time. A depth of +// 24 results in approximately 16.7 million combinations. +const maxDepth = 24 + +// IterateAddressCombinations returns an iterator (iter.Seq) that yields bit +// combinations of an address. The combinations are produced in order of +// increasing 'depth', starting from depth 0. This approach allows for +// memory-efficient iteration over a large set of combinations. +// +// # Performance and Memory Considerations +// +// For optimal performance, this function yields the same byte slice on each +// iteration, modifying its content in place. This avoids memory allocations +// within the loop. +// +// Consequently, it is unsafe to retain a reference to the yielded slice after +// the loop advances. If the slice needs to be stored, a copy must be created. +// +// Example of correct usage: +// +// // Safe: A copy of the slice is created and stored. +// var allCombinations [][]byte +// for combo := range IterateAddressCombinations(data) { +// allCombinations = append(allCombinations, slices.Clone(combo)) +// } +// +// Example of incorrect usage: +// +// // Unsafe: This will result in a slice where all elements point to the +// // same underlying byte slice, which will hold the value of the last +// // combination generated. +// var allCombinationsBad [][]byte +// for combo := range IterateAddressCombinations(data) { +// allCombinationsBad = append(allCombinationsBad, combo) +// } +// +// The iterator terminates if the depth exceeds maxDepth or if the input data +// slice is not long enough for the bit manipulations required at the next +// depth level. +func IterateAddressCombinations(addr swarm.Address) iter.Seq[swarm.Address] { + // State variables for the iterator closure. + // A single buffer is used, mutated, and yielded in each iteration. + // It is initialized with a copy of the original address data. + currentSlice := append([]byte{}, addr.Bytes()...) + + var currentDepth int = 0 + var bytesNeeded int = 0 + // nextDepthIndex marks the combination index at which the depth increases + // (e.g., 1, 2, 4, 8, ...). + var nextDepthIndex int = 1 + // prevCombinationIndex is used to calculate the bitwise difference for + // efficient state transitions. + var prevCombinationIndex int = 0 + + return func(yield func(swarm.Address) bool) { + // combinationIndex iterates through all possible combinations. + for combinationIndex := 0; ; combinationIndex++ { + // When the combinationIndex reaches the next power of two, the depth + // of bit combinations is increased for subsequent iterations. + if combinationIndex >= nextDepthIndex { + // The depth is determined by the number of bits in the combinationIndex. + // combinationIndex=1 -> depth=1 + // combinationIndex=2 -> depth=2 + // combinationIndex=4 -> depth=3 + // combinationIndex=8 -> depth=4 + currentDepth = bits.Len(uint(combinationIndex)) + // Set the threshold for the next depth increase. + // For depth=1 (idx=1), next threshold is 2. + // For depth=2 (idx=2,3), next threshold is 4. + // For depth=3 (idx=4..7), next threshold is 8. + nextDepthIndex = 1 << currentDepth + + // Boundary checks are performed only when the depth changes. + if currentDepth > maxDepth { + return // Iteration completed up to the defined maximum depth. + } + + bytesNeeded = (currentDepth + 7) / 8 // Ceiling of integer division. + + if len(addr.Bytes()) < bytesNeeded { + // The data slice is too short for the current depth. + return + } + } + + // The generation logic is optimized to flip only the bits that + // differ from the previous combination. For combinationIndex=0, + // (0^0) is 0, so no bits are flipped. For subsequent indices, + // the buffer is XORed with the difference between the current and + // previous combination indices. + bitsToFlip := combinationIndex ^ prevCombinationIndex + for bitIndex := 0; bitIndex < currentDepth; bitIndex++ { + // Check if the bit at bitIndex is set in the difference. + if (bitsToFlip>>bitIndex)&1 == 1 { + // If set, flip the corresponding bit in the buffer. + byteIndex := bitIndex / 8 + bitPositionInByte := 7 - (bitIndex % 8) + bitMask := byte(1 << bitPositionInByte) + currentSlice[byteIndex] ^= bitMask + } + } + prevCombinationIndex = combinationIndex // Update for the next iteration. + + // Yield the mutated slice. If yield returns false, the consumer + // has requested to stop the iteration. + if !yield(swarm.NewAddress(currentSlice)) { + return // Consumer-requested stop. + } + + // Check for integer overflow on the combinationIndex. + if combinationIndex < 0 { + return // Integer overflow; terminate iteration. + } + } + } +} diff --git a/pkg/replicas/combinator/combinator_test.go b/pkg/replicas/combinator/combinator_test.go new file mode 100644 index 00000000000..a321cfe7489 --- /dev/null +++ b/pkg/replicas/combinator/combinator_test.go @@ -0,0 +1,217 @@ +// Copyright 2025 The Swarm Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package combinator_test + +import ( + "testing" + + "github.com/ethersphere/bee/v2/pkg/replicas/combinator" + "github.com/ethersphere/bee/v2/pkg/swarm" +) + +func TestIterateAddressCombinationsSeq(t *testing.T) { + t.Run("Iterate up to depth=3", func(t *testing.T) { + input := swarm.NewAddress(make([]byte, swarm.HashSize)) + allCombinations := make(map[string]bool) + count := 0 + maxItems := 8 // 2^3 (which covers depth=0, 1, 2, 3) + + // These are the 8 combinations we expect for depth=3 + expected := []swarm.Address{ + swarm.NewAddress(append([]byte{0b00000000}, make([]byte, swarm.HashSize-1)...)), // i=0 (depth=0) + swarm.NewAddress(append([]byte{0b10000000}, make([]byte, swarm.HashSize-1)...)), // i=1 (depth=1) + swarm.NewAddress(append([]byte{0b01000000}, make([]byte, swarm.HashSize-1)...)), // i=2 (depth=2) + swarm.NewAddress(append([]byte{0b11000000}, make([]byte, swarm.HashSize-1)...)), // i=3 (depth=2) + swarm.NewAddress(append([]byte{0b00100000}, make([]byte, swarm.HashSize-1)...)), // i=4 (depth=3) + swarm.NewAddress(append([]byte{0b10100000}, make([]byte, swarm.HashSize-1)...)), // i=5 (depth=3) + swarm.NewAddress(append([]byte{0b01100000}, make([]byte, swarm.HashSize-1)...)), // i=6 (depth=3) + swarm.NewAddress(append([]byte{0b11100000}, make([]byte, swarm.HashSize-1)...)), // i=7 (depth=3) + } + expectedMap := addressesToHexMapHelper(expected) + + // Manually stop the iterator after 8 items. + for combo := range combinator.IterateAddressCombinations(input) { + if count >= maxItems { + break // Stop iterating + } + + comboHex := combo.String() + if allCombinations[comboHex] { + t.Errorf("Duplicate combination found at count %d: %s", count, comboHex) + } + allCombinations[comboHex] = true + count++ + } + + if count != maxItems { + t.Fatalf("Expected to iterate %d times, got %d", maxItems, count) + } + + // Check that the 8 items we got are the 8 we expected + if len(allCombinations) != len(expectedMap) { + t.Errorf("Mismatched map sizes. Expected %d, got %d", len(expectedMap), len(allCombinations)) + } + for hexStr := range expectedMap { + if !allCombinations[hexStr] { + t.Errorf("Expected combination %s not found in results", hexStr) + } + } + }) + + t.Run("Iterator stops correctly at end of byte slice", func(t *testing.T) { + // 1 byte = 8 bits. + // Iterator should produce 2^8 = 256 items (for depth=0 through depth=8). + // The 257th item (i=256) would require depth=9, + // which needs 2 bytes. The iterator should stop there. + input := swarm.NewAddress([]byte{0xDE}) // 1 byte + expectedCount := 1 << 8 // 256 + count := 0 + + allCombinations := make(map[string]bool) + + for combo := range combinator.IterateAddressCombinations(input) { + // Just in case, prevent infinite loop in test + if count > expectedCount { + t.Fatalf("Iterator produced more than %d items, count=%d", expectedCount, count) + break + } + comboHex := combo.String() + if allCombinations[comboHex] { + t.Errorf("Duplicate combination found: %s", comboHex) + } + allCombinations[comboHex] = true + count++ + } + + if count != expectedCount { + t.Errorf("Expected exactly %d items for 1 byte, got %d", expectedCount, count) + } + }) + + t.Run("depth=0 edge case (nil slice)", func(t *testing.T) { + // depth=0 is i=0. This needs 0 bytes, which a nil slice has. + // The *next* item, i=1, needs depth=1, which needs 1 byte. + // A nil slice fails this. + // So, this should iterate *exactly once*. + var input swarm.Address + count := 0 + var firstCombo swarm.Address + + for combo := range combinator.IterateAddressCombinations(input) { + if count == 0 { + firstCombo = combo + } + count++ + } + + if count != 1 { + t.Fatalf("Expected exactly 1 item (depth=0) for nil slice, got %d", count) + } + // A copy of a nil slice is a non-nil, zero-length slice + if len(firstCombo.Bytes()) != 0 { + t.Errorf("Expected first item to be empty slice, got %x", firstCombo.Bytes()) + } + }) + + t.Run("Consumer stops early (break)", func(t *testing.T) { + input := swarm.NewAddress(make([]byte, swarm.HashSize)) + count := 0 + stopAt := 5 + + seq := combinator.IterateAddressCombinations(input) + for range seq { + count++ + if count == stopAt { + break + } + } + + if count != stopAt { + t.Errorf("Expected loop to run %d times, got %d", stopAt, count) + } + // This test just proves the 'break' is correctly handled + // by the iterator's `if !yield(newSlice)` check. + }) +} + +// setupBenchmarkData creates a 4-byte slice, which is enough to +// iterate up to depth=24 (maxDepth) without triggering the data length check. +var benchAddress = swarm.NewAddress(append([]byte{0xDE, 0xAD, 0xBE, 0xEF}, make([]byte, swarm.HashSize-4)...)) + +// runBenchmark is a helper to run the iterator for a fixed number of items. +func runBenchmark(b *testing.B, items int) { + // We run the loop b.N times, as required by the benchmark harness. + for b.Loop() { + count := 0 + // We use a volatile variable to ensure the loop body + // (the slice generation) isn't optimized away. + var volatileAddr swarm.Address + + seq := combinator.IterateAddressCombinations(benchAddress) + for combo := range seq { + volatileAddr = combo + count++ + if count == items { + break + } + } + + // To prevent compiler optimizing out the loop if volatileAddr isn't used. + // This is a common pattern, though often `go:noinline` on a helper + // function or global assignment is also used. + if volatileAddr.IsZero() { + b.Error("volatileAddr should not be nil") + } + } +} + +// BenchmarkDepth1 iterates over 2^1 = 2 items +func BenchmarkDepth1(b *testing.B) { + runBenchmark(b, 1<<1) +} + +// BenchmarkDepth2 iterates over 2^2 = 4 items +func BenchmarkDepth2(b *testing.B) { + runBenchmark(b, 1<<2) +} + +// BenchmarkDepth3 iterates over 2^3 = 8 items +func BenchmarkDepth3(b *testing.B) { + runBenchmark(b, 1<<3) +} + +// BenchmarkDepth4 iterates over 2^4 = 16 items +func BenchmarkDepth4(b *testing.B) { + runBenchmark(b, 1<<4) +} + +// BenchmarkDepth8 iterates over 2^8 = 256 items +func BenchmarkDepth8(b *testing.B) { + runBenchmark(b, 1<<8) +} + +// BenchmarkDepth12 iterates over 2^12 = 4096 items +func BenchmarkDepth12(b *testing.B) { + runBenchmark(b, 1<<12) +} + +// BenchmarkDepth16 iterates over 2^16 = 65536 items +func BenchmarkDepth16(b *testing.B) { + runBenchmark(b, 1<<16) +} + +// BenchmarkDepth20 iterates over 2^20 = 1,048,576 items +func BenchmarkDepth20(b *testing.B) { + runBenchmark(b, 1<<20) +} + +// addressesToHexMapHelper is a helper to convert a slice of addresses to a map of hex strings. +func addressesToHexMapHelper(addresses []swarm.Address) map[string]bool { + set := make(map[string]bool, len(addresses)) + for _, s := range addresses { + set[s.String()] = true + } + return set +} From 0006ec1a50992be4bb28d930ead9f29e23422f15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jano=C5=A1=20Gulja=C5=A1?= Date: Wed, 12 Nov 2025 00:45:32 +0100 Subject: [PATCH 42/62] feat(combinator): add maxDepth argument to IterateAddressCombinations --- pkg/replicas/combinator/combinator.go | 24 +++++----- pkg/replicas/combinator/combinator_test.go | 51 +++++++++++++--------- 2 files changed, 43 insertions(+), 32 deletions(-) diff --git a/pkg/replicas/combinator/combinator.go b/pkg/replicas/combinator/combinator.go index f4a2eecde76..e3358a95b45 100644 --- a/pkg/replicas/combinator/combinator.go +++ b/pkg/replicas/combinator/combinator.go @@ -11,16 +11,16 @@ import ( "github.com/ethersphere/bee/v2/pkg/swarm" ) -// maxDepth defines the maximum depth of the combination generation, serving as a -// safeguard against excessive memory allocation and computation time. A depth of -// 24 results in approximately 16.7 million combinations. -const maxDepth = 24 - // IterateAddressCombinations returns an iterator (iter.Seq) that yields bit // combinations of an address. The combinations are produced in order of // increasing 'depth', starting from depth 0. This approach allows for // memory-efficient iteration over a large set of combinations. // +// The maxDepth parameter defines the maximum depth of the combination +// generation, serving as a safeguard against excessive memory allocation and +// computation time. A depth of 24 results in approximately 16.7 million +// combinations. +// // # Performance and Memory Considerations // // For optimal performance, this function yields the same byte slice on each @@ -34,7 +34,7 @@ const maxDepth = 24 // // // Safe: A copy of the slice is created and stored. // var allCombinations [][]byte -// for combo := range IterateAddressCombinations(data) { +// for combo := range IterateAddressCombinations(data, 8) { // allCombinations = append(allCombinations, slices.Clone(combo)) // } // @@ -44,27 +44,27 @@ const maxDepth = 24 // // same underlying byte slice, which will hold the value of the last // // combination generated. // var allCombinationsBad [][]byte -// for combo := range IterateAddressCombinations(data) { +// for combo := range IterateAddressCombinations(data, 8) { // allCombinationsBad = append(allCombinationsBad, combo) // } // // The iterator terminates if the depth exceeds maxDepth or if the input data // slice is not long enough for the bit manipulations required at the next // depth level. -func IterateAddressCombinations(addr swarm.Address) iter.Seq[swarm.Address] { +func IterateAddressCombinations(addr swarm.Address, maxDepth int) iter.Seq[swarm.Address] { // State variables for the iterator closure. // A single buffer is used, mutated, and yielded in each iteration. // It is initialized with a copy of the original address data. currentSlice := append([]byte{}, addr.Bytes()...) - var currentDepth int = 0 - var bytesNeeded int = 0 + var currentDepth int + var bytesNeeded int // nextDepthIndex marks the combination index at which the depth increases // (e.g., 1, 2, 4, 8, ...). - var nextDepthIndex int = 1 + nextDepthIndex := 1 // prevCombinationIndex is used to calculate the bitwise difference for // efficient state transitions. - var prevCombinationIndex int = 0 + var prevCombinationIndex int return func(yield func(swarm.Address) bool) { // combinationIndex iterates through all possible combinations. diff --git a/pkg/replicas/combinator/combinator_test.go b/pkg/replicas/combinator/combinator_test.go index a321cfe7489..5a0e8ad1296 100644 --- a/pkg/replicas/combinator/combinator_test.go +++ b/pkg/replicas/combinator/combinator_test.go @@ -11,6 +11,8 @@ import ( "github.com/ethersphere/bee/v2/pkg/swarm" ) +const maxDepth = 8 + func TestIterateAddressCombinationsSeq(t *testing.T) { t.Run("Iterate up to depth=3", func(t *testing.T) { input := swarm.NewAddress(make([]byte, swarm.HashSize)) @@ -19,7 +21,7 @@ func TestIterateAddressCombinationsSeq(t *testing.T) { maxItems := 8 // 2^3 (which covers depth=0, 1, 2, 3) // These are the 8 combinations we expect for depth=3 - expected := []swarm.Address{ + expected := addressesToHexMap([]swarm.Address{ swarm.NewAddress(append([]byte{0b00000000}, make([]byte, swarm.HashSize-1)...)), // i=0 (depth=0) swarm.NewAddress(append([]byte{0b10000000}, make([]byte, swarm.HashSize-1)...)), // i=1 (depth=1) swarm.NewAddress(append([]byte{0b01000000}, make([]byte, swarm.HashSize-1)...)), // i=2 (depth=2) @@ -28,15 +30,9 @@ func TestIterateAddressCombinationsSeq(t *testing.T) { swarm.NewAddress(append([]byte{0b10100000}, make([]byte, swarm.HashSize-1)...)), // i=5 (depth=3) swarm.NewAddress(append([]byte{0b01100000}, make([]byte, swarm.HashSize-1)...)), // i=6 (depth=3) swarm.NewAddress(append([]byte{0b11100000}, make([]byte, swarm.HashSize-1)...)), // i=7 (depth=3) - } - expectedMap := addressesToHexMapHelper(expected) - - // Manually stop the iterator after 8 items. - for combo := range combinator.IterateAddressCombinations(input) { - if count >= maxItems { - break // Stop iterating - } + }) + for combo := range combinator.IterateAddressCombinations(input, 3) { comboHex := combo.String() if allCombinations[comboHex] { t.Errorf("Duplicate combination found at count %d: %s", count, comboHex) @@ -50,16 +46,31 @@ func TestIterateAddressCombinationsSeq(t *testing.T) { } // Check that the 8 items we got are the 8 we expected - if len(allCombinations) != len(expectedMap) { - t.Errorf("Mismatched map sizes. Expected %d, got %d", len(expectedMap), len(allCombinations)) + if len(allCombinations) != len(expected) { + t.Errorf("Mismatched map sizes. Expected %d, got %d", len(expected), len(allCombinations)) } - for hexStr := range expectedMap { + for hexStr := range expected { if !allCombinations[hexStr] { t.Errorf("Expected combination %s not found in results", hexStr) } } }) + t.Run("maxDepth limits iteration", func(t *testing.T) { + input := swarm.NewAddress(make([]byte, swarm.HashSize)) + count := 0 + // maxDepth=2 should give 4 items (2^2 for depths 0, 1, 2) + expectedCount := 4 + + for range combinator.IterateAddressCombinations(input, 2) { + count++ + } + + if count != expectedCount { + t.Errorf("Expected %d items for maxDepth=2, got %d", expectedCount, count) + } + }) + t.Run("Iterator stops correctly at end of byte slice", func(t *testing.T) { // 1 byte = 8 bits. // Iterator should produce 2^8 = 256 items (for depth=0 through depth=8). @@ -71,7 +82,7 @@ func TestIterateAddressCombinationsSeq(t *testing.T) { allCombinations := make(map[string]bool) - for combo := range combinator.IterateAddressCombinations(input) { + for combo := range combinator.IterateAddressCombinations(input, maxDepth) { // Just in case, prevent infinite loop in test if count > expectedCount { t.Fatalf("Iterator produced more than %d items, count=%d", expectedCount, count) @@ -99,7 +110,7 @@ func TestIterateAddressCombinationsSeq(t *testing.T) { count := 0 var firstCombo swarm.Address - for combo := range combinator.IterateAddressCombinations(input) { + for combo := range combinator.IterateAddressCombinations(input, maxDepth) { if count == 0 { firstCombo = combo } @@ -120,7 +131,7 @@ func TestIterateAddressCombinationsSeq(t *testing.T) { count := 0 stopAt := 5 - seq := combinator.IterateAddressCombinations(input) + seq := combinator.IterateAddressCombinations(input, maxDepth) for range seq { count++ if count == stopAt { @@ -136,12 +147,12 @@ func TestIterateAddressCombinationsSeq(t *testing.T) { }) } -// setupBenchmarkData creates a 4-byte slice, which is enough to -// iterate up to depth=24 (maxDepth) without triggering the data length check. var benchAddress = swarm.NewAddress(append([]byte{0xDE, 0xAD, 0xBE, 0xEF}, make([]byte, swarm.HashSize-4)...)) // runBenchmark is a helper to run the iterator for a fixed number of items. func runBenchmark(b *testing.B, items int) { + b.Helper() + // We run the loop b.N times, as required by the benchmark harness. for b.Loop() { count := 0 @@ -149,7 +160,7 @@ func runBenchmark(b *testing.B, items int) { // (the slice generation) isn't optimized away. var volatileAddr swarm.Address - seq := combinator.IterateAddressCombinations(benchAddress) + seq := combinator.IterateAddressCombinations(benchAddress, maxDepth) for combo := range seq { volatileAddr = combo count++ @@ -207,8 +218,8 @@ func BenchmarkDepth20(b *testing.B) { runBenchmark(b, 1<<20) } -// addressesToHexMapHelper is a helper to convert a slice of addresses to a map of hex strings. -func addressesToHexMapHelper(addresses []swarm.Address) map[string]bool { +// addressesToHexMap is a helper to convert a slice of addresses to a map of hex strings. +func addressesToHexMap(addresses []swarm.Address) map[string]bool { set := make(map[string]bool, len(addresses)) for _, s := range addresses { set[s.String()] = true From 1e3889cf06b0e5429be394a1c45ddf3a7bb440c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jano=C5=A1=20Gulja=C5=A1?= Date: Wed, 12 Nov 2025 11:44:55 +0100 Subject: [PATCH 43/62] feat(replicas): use IterateAddressCombinations for socGetter and socPutter --- pkg/replicas/getter_soc.go | 110 ++++------------- pkg/replicas/putter_soc.go | 36 ++---- pkg/replicas/replicas_soc.go | 91 -------------- pkg/replicas/replicas_soc_test.go | 199 ------------------------------ 4 files changed, 30 insertions(+), 406 deletions(-) delete mode 100644 pkg/replicas/replicas_soc.go delete mode 100644 pkg/replicas/replicas_soc_test.go diff --git a/pkg/replicas/getter_soc.go b/pkg/replicas/getter_soc.go index c53a219ecd5..2fbbdd0ce99 100644 --- a/pkg/replicas/getter_soc.go +++ b/pkg/replicas/getter_soc.go @@ -9,10 +9,11 @@ package replicas import ( "context" "errors" - "sync" - "time" + "fmt" + "github.com/ethersphere/bee/v2/pkg/cac" "github.com/ethersphere/bee/v2/pkg/file/redundancy" + "github.com/ethersphere/bee/v2/pkg/replicas/combinator" "github.com/ethersphere/bee/v2/pkg/storage" "github.com/ethersphere/bee/v2/pkg/swarm" ) @@ -42,101 +43,32 @@ func NewSocGetter(g storage.Getter, level redundancy.Level) storage.Getter { // it starts dispatching exponentially growing batches of replica requests // at each RetryInterval until a chunk is found or all replicas are tried. func (g *socGetter) Get(ctx context.Context, addr swarm.Address) (ch swarm.Chunk, err error) { - // try original address first - ch, err = g.Getter.Get(ctx, addr) - if err == nil { - return ch, nil - } - var errs error - errs = errors.Join(errs, err) - - ctx, cancel := context.WithCancel(ctx) - defer cancel() - var wg sync.WaitGroup - replicas := NewSocReplicator(addr, g.level).Replicas() + for replicaAddr := range combinator.IterateAddressCombinations(addr, int(g.level)) { + ctx, cancel := context.WithTimeout(ctx, RetryInterval) - resultC := make(chan swarm.Chunk) - errc := make(chan error, len(replicas)) - - worker := func(chunkAddr swarm.Address) { - defer wg.Done() - - ch, err := g.Getter.Get(ctx, chunkAddr) + // Download the replica. + ch, err := g.Getter.Get(ctx, replicaAddr) if err != nil { - errc <- err - return - } - - select { - case resultC <- ch: - case <-ctx.Done(): + cancel() + errs = errors.Join(errs, fmt.Errorf("get chunk replica address %v: %w", replicaAddr, err)) + continue } - } - - go func() { - replicaIndex := 0 - batchLevel := uint8(1) // start with 1 (batch size 1 << 1 = 2) - - dispatchBatch := func() (done bool) { - batchSize := 1 << batchLevel // 2, 4, 8... - sentInBatch := 0 - - for sentInBatch < batchSize && replicaIndex < len(replicas) { - addr := replicas[replicaIndex] - replicaIndex++ - sentInBatch++ - - wg.Add(1) - go worker(addr) - } + cancel() - batchLevel++ + // Construct the original chunk with the original address. + originalChunk := swarm.NewChunk(addr, ch.Data()) - // we are done if all replicas are sent - return replicaIndex >= len(replicas) + // Validate that the data of the chunk is correct against the original address. + isValid := cac.Valid(originalChunk) + if !isValid { + errs = errors.Join(errs, fmt.Errorf("validate data at replica address %v: %w", replicaAddr, swarm.ErrInvalidChunk)) + continue } - if done := dispatchBatch(); done { - return - } - - timer := time.NewTimer(RetryInterval) - defer timer.Stop() - - for { - select { - case <-timer.C: - if done := dispatchBatch(); done { - return - } - timer.Reset(RetryInterval) - - case <-ctx.Done(): - return - } - } - }() - - // collect results - waitC := make(chan struct{}) - go func() { - wg.Wait() - close(waitC) - }() - - for { - select { - case chunk := <-resultC: - cancel() // cancel the context to stop all other workers. - return chunk, nil - - case err := <-errc: - errs = errors.Join(errs, err) - - case <-waitC: - return nil, errors.Join(ErrSwarmageddon, errs) - } + return originalChunk, nil } + + return nil, errors.Join(errs, ErrSwarmageddon) } diff --git a/pkg/replicas/putter_soc.go b/pkg/replicas/putter_soc.go index 43042be21fd..8ad00365e1a 100644 --- a/pkg/replicas/putter_soc.go +++ b/pkg/replicas/putter_soc.go @@ -9,10 +9,9 @@ package replicas import ( "context" "errors" - "fmt" - "sync" "github.com/ethersphere/bee/v2/pkg/file/redundancy" + "github.com/ethersphere/bee/v2/pkg/replicas/combinator" "github.com/ethersphere/bee/v2/pkg/storage" "github.com/ethersphere/bee/v2/pkg/storer" "github.com/ethersphere/bee/v2/pkg/swarm" @@ -35,34 +34,17 @@ func NewSocPutter(p storage.Putter, rLevel redundancy.Level) storage.Putter { // Put makes the putter satisfy the storage.Putter interface func (p *socPutter) Put(ctx context.Context, ch swarm.Chunk) error { - errs := []error{} - // Put base chunk first - if err := p.putter.Put(ctx, ch); err != nil { - return fmt.Errorf("soc putter: put base chunk: %w", err) - } - if p.rLevel == 0 { - return nil - } + var errs error - rr := NewSocReplicator(ch.Address(), p.rLevel) - errc := make(chan error, p.rLevel.GetReplicaCount()) - wg := sync.WaitGroup{} - for _, replicaAddr := range rr.Replicas() { - wg.Go(func() { - // create a new chunk with the replica address - sch := swarm.NewChunk(replicaAddr, ch.Data()) - if err := p.putter.Put(ctx, sch); err != nil { - errc <- err - } - }) - } + for replicaAddr := range combinator.IterateAddressCombinations(ch.Address(), int(p.rLevel)) { + sch := swarm.NewChunk(replicaAddr, ch.Data()) - wg.Wait() - close(errc) - for err := range errc { - errs = append(errs, err) + if err := p.putter.Put(ctx, sch); err != nil { + errs = errors.Join(errs, err) + } } - return errors.Join(errs...) + + return errs } // socPutterSession extends the original socPutter diff --git a/pkg/replicas/replicas_soc.go b/pkg/replicas/replicas_soc.go deleted file mode 100644 index 822e30c28ee..00000000000 --- a/pkg/replicas/replicas_soc.go +++ /dev/null @@ -1,91 +0,0 @@ -// Copyright 2025 The Swarm Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package replicas implements a scheme to replicate chunks -// in such a way that -// - the replicas are optimally dispersed to aid cross-neighbourhood redundancy -// - the replicas addresses can be deduced by retrievers only knowing the address -// of the original content addressed chunk -// - no new chunk validation rules are introduced -package replicas - -import ( - "github.com/ethersphere/bee/v2/pkg/file/redundancy" - "github.com/ethersphere/bee/v2/pkg/swarm" -) - -// socReplicator running the find for replicas -type socReplicator struct { - addr []byte // chunk address - rLevel redundancy.Level -} - -// NewSocReplicator socReplicator constructor -func NewSocReplicator(addr swarm.Address, rLevel redundancy.Level) *socReplicator { - rr := &socReplicator{ - addr: addr.Bytes(), - rLevel: rLevel, - } - - return rr -} - -// Replicas returns all replica addresses as a slice -// the order of replicas is so that addresses are always maximally dispersed -// in successive sets of addresses. -// I.e., the binary tree representing the new addresses prefix bits up to depth is balanced -func (rr *socReplicator) Replicas() []swarm.Address { - replicaCount := rr.rLevel.GetReplicaCount() - replicas := make([]swarm.Address, replicaCount) - // number of bits required to represent all replicas - bitsRequired := countBitsRequired(uint8(replicaCount - 1)) - // replicate iteration saturates all leading bits in generated addresses until bitsRequired - for i := uint8(0); i < uint8(replicaCount); i++ { - // create soc replica (with address and nonce) - replicas[i] = rr.replicate(i, bitsRequired) - } - return replicas -} - -// replicate returns a replica params structure seeded with a byte of entropy as argument -func (rr *socReplicator) replicate(i uint8, bitsRequired uint8) swarm.Address { - addr := make([]byte, 32) - copy(addr, rr.addr) - mirroredBits := mirrorBitsToMSB(i, bitsRequired) - // zero out the first leading bitsRequired bits of addr[0] and set mirroredBits of `i` - addr[0] &= 0xFF >> bitsRequired - addr[0] |= mirroredBits - if addr[0] == rr.addr[0] { - // xor MSB after the mirrored bits because the iteration found the original address - addr[0] ^= 1 << (bitsRequired - 1) - } - return swarm.NewAddress(addr) -} - -// mirrorBitsToMSB mirrors the lowest n bits of v to the most significant bits of a byte. -// For example, mirrorBitsToMSB(0b00001101, 4) == 0b10110000 -func mirrorBitsToMSB(v byte, n uint8) byte { - var res byte - for i := range n { - if (v & (1 << i)) != 0 { - res |= (1 << (7 - i)) - } - } - return res -} - -// countBitsRequired returns the minimum number of bits required to represent value v. -// For 0, it returns 1 (we need 1 bit to represent 0). -func countBitsRequired(v uint8) uint8 { - if v == 0 { - return 1 - } - - var bits uint8 - for v > 0 { - bits++ - v >>= 1 - } - return bits -} diff --git a/pkg/replicas/replicas_soc_test.go b/pkg/replicas/replicas_soc_test.go deleted file mode 100644 index 2e69dae2117..00000000000 --- a/pkg/replicas/replicas_soc_test.go +++ /dev/null @@ -1,199 +0,0 @@ -// Copyright 2025 The Swarm Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package replicas - -import ( - "fmt" - "testing" - - "github.com/ethersphere/bee/v2/pkg/file/redundancy" - "github.com/ethersphere/bee/v2/pkg/swarm" -) - -func TestMirrorBitsToMSB(t *testing.T) { - t.Parallel() - - tests := []struct { - v byte - n uint8 - expected byte - }{ - {0b00001101, 4, 0b10110000}, // Example from comment - {0b00000001, 1, 0b10000000}, - {0b00001111, 4, 0b11110000}, - {0b00000000, 4, 0b00000000}, - } - - for _, tt := range tests { - t.Run(fmt.Sprintf("v=%b_n=%d", tt.v, tt.n), func(t *testing.T) { - t.Parallel() - if got := mirrorBitsToMSB(tt.v, tt.n); got != tt.expected { - t.Errorf("mirrorBitsToMSB(%b, %d) = %b, want %b", tt.v, tt.n, got, tt.expected) - } - }) - } -} - -func TestCountBitsRequired(t *testing.T) { - t.Parallel() - - tests := []struct { - v uint8 - expected uint8 - }{ - {0, 1}, // Special case - {1, 1}, // 1 bit - {3, 2}, // 2 bits - {7, 3}, // 3 bits - {15, 4}, // 4 bits - {255, 8}, // 8 bits - } - - for _, tt := range tests { - t.Run(fmt.Sprintf("v=%d", tt.v), func(t *testing.T) { - t.Parallel() - if got := countBitsRequired(tt.v); got != tt.expected { - t.Errorf("countBitsRequired(%d) = %d, want %d", tt.v, got, tt.expected) - } - }) - } -} - -func TestReplicate_Line48(t *testing.T) { - t.Parallel() - - // Test line 48: addr[0] &= 0xFF >> bitsRequired - // This clears the first bitsRequired MSBs - baseAddr := swarm.MustParseHexAddress("FF00000000000000000000000000000000000000000000000000000000000000") - - tests := []struct { - bitsRequired uint8 - expectedMask byte - }{ - {1, 0x7F}, // 0b01111111 - {2, 0x3F}, // 0b00111111 - {4, 0x0F}, // 0b00001111 - } - - for _, tt := range tests { - t.Run(fmt.Sprintf("bits=%d", tt.bitsRequired), func(t *testing.T) { - t.Parallel() - // Test the mask calculation - mask := byte(0xFF >> tt.bitsRequired) - if mask != tt.expectedMask { - t.Errorf("mask = %b, want %b", mask, tt.expectedMask) - } - - // Test that applying the mask clears the MSBs - addr := make([]byte, 32) - copy(addr, baseAddr.Bytes()) - addr[0] &= mask - - if addr[0] != tt.expectedMask { - t.Errorf("after mask: addr[0] = %b, want %b", addr[0], tt.expectedMask) - } - - // Verify MSBs are cleared - msbMask := byte(0xFF) << (8 - tt.bitsRequired) - if addr[0]&msbMask != 0 { - t.Errorf("first %d bits should be zero, got %b", tt.bitsRequired, addr[0]) - } - }) - } -} - -func TestReplicate(t *testing.T) { - t.Parallel() - - baseAddr := swarm.MustParseHexAddress("1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef") - replicator := &socReplicator{addr: baseAddr.Bytes(), rLevel: redundancy.MEDIUM} - - tests := []struct { - i uint8 - bitsRequired uint8 - }{ - {0, 1}, - {1, 2}, - {3, 4}, - } - - for _, tt := range tests { - t.Run(fmt.Sprintf("i=%d_bits=%d", tt.i, tt.bitsRequired), func(t *testing.T) { - t.Parallel() - replica := replicator.replicate(tt.i, tt.bitsRequired) - - // Verify remaining bytes unchanged - for i := 1; i < len(replica.Bytes()); i++ { - if replica.Bytes()[i] != baseAddr.Bytes()[i] { - t.Errorf("byte[%d] changed: got %d, want %d", i, replica.Bytes()[i], baseAddr.Bytes()[i]) - } - } - - // Verify first byte differs from original (or was modified) - if replica.Bytes()[0] == baseAddr.Bytes()[0] { - // This is okay if the code explicitly handles this case (line 50-52) - // But we should verify the logic worked - mask := byte(0xFF >> tt.bitsRequired) - mirroredBits := mirrorBitsToMSB(tt.i, tt.bitsRequired) - expected := (baseAddr.Bytes()[0] & mask) | mirroredBits - if expected == baseAddr.Bytes()[0] { - // Original would have been flipped, so replica should differ - if replica.Bytes()[0] == baseAddr.Bytes()[0] { - t.Errorf("replica first byte should differ from original") - } - } - } - }) - } -} - -func TestReplicas(t *testing.T) { - t.Parallel() - - baseAddr := swarm.MustParseHexAddress("1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef") - - for _, rLevel := range []redundancy.Level{redundancy.MEDIUM, redundancy.STRONG, redundancy.INSANE, redundancy.PARANOID} { - t.Run(fmt.Sprintf("level_%d", rLevel), func(t *testing.T) { - t.Parallel() - - replicator := NewSocReplicator(baseAddr, rLevel) - replicas := replicator.Replicas() - - // Verify count - if len(replicas) != rLevel.GetReplicaCount() { - t.Errorf("got %d replicas, want %d", len(replicas), rLevel.GetReplicaCount()) - } - - // Verify structure and uniqueness - seen := make(map[string]bool) - for i, r := range replicas { - if len(r.Bytes()) != 32 { - t.Errorf("replica %d: invalid address length", i) - } - // Verify remaining bytes unchanged - for j := 1; j < 32; j++ { - if r.Bytes()[j] != baseAddr.Bytes()[j] { - t.Errorf("replica %d: byte[%d] changed", i, j) - } - } - // Check uniqueness - addrStr := string(r.Bytes()) - if seen[addrStr] { - t.Errorf("replica %d: duplicate address", i) - } - seen[addrStr] = true - } - - // Verify dispersion (at least some first bytes differ) - firstBytes := make(map[byte]bool) - for _, r := range replicas { - firstBytes[r.Bytes()[0]] = true - } - if len(firstBytes) < len(replicas)/2 { - t.Errorf("poor dispersion: only %d unique first bytes", len(firstBytes)) - } - }) - } -} From 1e54f3fa316c6829d4f1a90dc6c28c0225924240 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jano=C5=A1=20Gulja=C5=A1?= Date: Wed, 12 Nov 2025 12:36:08 +0100 Subject: [PATCH 44/62] feat: paralel attempts for socReplica.Get --- pkg/replicas/getter_soc.go | 94 ++++++++++++++++++++++++++++---------- 1 file changed, 70 insertions(+), 24 deletions(-) diff --git a/pkg/replicas/getter_soc.go b/pkg/replicas/getter_soc.go index 2fbbdd0ce99..a4d0bed037e 100644 --- a/pkg/replicas/getter_soc.go +++ b/pkg/replicas/getter_soc.go @@ -10,12 +10,14 @@ import ( "context" "errors" "fmt" + "sync" "github.com/ethersphere/bee/v2/pkg/cac" "github.com/ethersphere/bee/v2/pkg/file/redundancy" "github.com/ethersphere/bee/v2/pkg/replicas/combinator" "github.com/ethersphere/bee/v2/pkg/storage" "github.com/ethersphere/bee/v2/pkg/swarm" + "golang.org/x/sync/semaphore" ) // socGetter is the private implementation of storage.Getter, an interface for @@ -37,38 +39,82 @@ func NewSocGetter(g storage.Getter, level redundancy.Level) storage.Getter { return &socGetter{Getter: g, level: level} } +const socGetterConcurrency = 4 + // Get makes the socGetter satisfy the storage.Getter interface // It attempts to fetch the chunk by its original address first. -// If the original address does not return a result within RetryInterval, -// it starts dispatching exponentially growing batches of replica requests -// at each RetryInterval until a chunk is found or all replicas are tried. +// If the original address does not return a result, +// it starts dispatching parallel requests for replicas +// until a chunk is found or all replicas are tried. func (g *socGetter) Get(ctx context.Context, addr swarm.Address) (ch swarm.Chunk, err error) { - var errs error + var ( + errs error + mu sync.Mutex + wg sync.WaitGroup + ) - for replicaAddr := range combinator.IterateAddressCombinations(addr, int(g.level)) { - ctx, cancel := context.WithTimeout(ctx, RetryInterval) + ctx, cancel := context.WithCancel(ctx) + defer cancel() - // Download the replica. - ch, err := g.Getter.Get(ctx, replicaAddr) - if err != nil { - cancel() - errs = errors.Join(errs, fmt.Errorf("get chunk replica address %v: %w", replicaAddr, err)) - continue - } - cancel() + sem := semaphore.NewWeighted(socGetterConcurrency) + replicaIter := combinator.IterateAddressCombinations(addr, int(g.level)) + + resultChan := make(chan swarm.Chunk, 1) + doneChan := make(chan struct{}) + + go func() { + defer close(doneChan) + for replicaAddr := range replicaIter { + select { + case <-ctx.Done(): + return + default: + } + + if err := sem.Acquire(ctx, 1); err != nil { + return + } - // Construct the original chunk with the original address. - originalChunk := swarm.NewChunk(addr, ch.Data()) + wg.Add(1) + go func(replicaAddr swarm.Address) { + defer sem.Release(1) + defer wg.Done() - // Validate that the data of the chunk is correct against the original address. - isValid := cac.Valid(originalChunk) - if !isValid { - errs = errors.Join(errs, fmt.Errorf("validate data at replica address %v: %w", replicaAddr, swarm.ErrInvalidChunk)) - continue + ch, err := g.Getter.Get(ctx, replicaAddr) + if err != nil { + mu.Lock() + errs = errors.Join(errs, fmt.Errorf("get chunk replica address %v: %w", replicaAddr, err)) + mu.Unlock() + return + } + + originalChunk := swarm.NewChunk(addr, ch.Data()) + if !cac.Valid(originalChunk) { + mu.Lock() + errs = errors.Join(errs, fmt.Errorf("validate data at replica address %v: %w", replicaAddr, swarm.ErrInvalidChunk)) + mu.Unlock() + return + } + + select { + case resultChan <- originalChunk: + cancel() + case <-ctx.Done(): + } + }(replicaAddr) } + wg.Wait() + }() - return originalChunk, nil + select { + case ch := <-resultChan: + return ch, nil + case <-doneChan: + if errs == nil { + return nil, ErrSwarmageddon + } + return nil, errors.Join(errs, ErrSwarmageddon) + case <-ctx.Done(): + return nil, ctx.Err() } - - return nil, errors.Join(errs, ErrSwarmageddon) } From b48a44fe88e98bee3e3f29790f234201da82d596 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jano=C5=A1=20Gulja=C5=A1?= Date: Wed, 12 Nov 2025 12:36:28 +0100 Subject: [PATCH 45/62] feat: add +1 replica in combinator --- pkg/replicas/combinator/combinator.go | 23 ++++++++++++++++++++++ pkg/replicas/combinator/combinator_test.go | 7 ++++--- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/pkg/replicas/combinator/combinator.go b/pkg/replicas/combinator/combinator.go index e3358a95b45..43940c13249 100644 --- a/pkg/replicas/combinator/combinator.go +++ b/pkg/replicas/combinator/combinator.go @@ -86,6 +86,29 @@ func IterateAddressCombinations(addr swarm.Address, maxDepth int) iter.Seq[swarm // Boundary checks are performed only when the depth changes. if currentDepth > maxDepth { + // Create a new slice based on the original address. + originalAddrBytes := addr.Bytes() + flippedAddrBytes := make([]byte, len(originalAddrBytes)) + copy(flippedAddrBytes, originalAddrBytes) + + // Calculate the byte index for the bit to flip. + bitIndexToFlip := maxDepth + byteIndex := bitIndexToFlip / 8 + + // Ensure the flippedAddrBytes is long enough to flip this bit. + if len(flippedAddrBytes) <= byteIndex { + return // Cannot flip bit, slice is too short. + } + + // Flip the maxDepth bit in the new slice. + bitPositionInByte := 7 - (bitIndexToFlip % 8) + bitMask := byte(1 << bitPositionInByte) + flippedAddrBytes[byteIndex] ^= bitMask + + // Yield this modified address + if !yield(swarm.NewAddress(flippedAddrBytes)) { + return // Consumer-requested stop. + } return // Iteration completed up to the defined maximum depth. } diff --git a/pkg/replicas/combinator/combinator_test.go b/pkg/replicas/combinator/combinator_test.go index 5a0e8ad1296..e9a3f490086 100644 --- a/pkg/replicas/combinator/combinator_test.go +++ b/pkg/replicas/combinator/combinator_test.go @@ -18,7 +18,7 @@ func TestIterateAddressCombinationsSeq(t *testing.T) { input := swarm.NewAddress(make([]byte, swarm.HashSize)) allCombinations := make(map[string]bool) count := 0 - maxItems := 8 // 2^3 (which covers depth=0, 1, 2, 3) + maxItems := 9 // 2^3 (which covers depth=0, 1, 2, 3) + 1 for the maxDepth+1 bit flipped address // These are the 8 combinations we expect for depth=3 expected := addressesToHexMap([]swarm.Address{ @@ -30,6 +30,7 @@ func TestIterateAddressCombinationsSeq(t *testing.T) { swarm.NewAddress(append([]byte{0b10100000}, make([]byte, swarm.HashSize-1)...)), // i=5 (depth=3) swarm.NewAddress(append([]byte{0b01100000}, make([]byte, swarm.HashSize-1)...)), // i=6 (depth=3) swarm.NewAddress(append([]byte{0b11100000}, make([]byte, swarm.HashSize-1)...)), // i=7 (depth=3) + swarm.NewAddress(append([]byte{0b00010000}, make([]byte, swarm.HashSize-1)...)), // i=8 (depth=3) }) for combo := range combinator.IterateAddressCombinations(input, 3) { @@ -59,8 +60,8 @@ func TestIterateAddressCombinationsSeq(t *testing.T) { t.Run("maxDepth limits iteration", func(t *testing.T) { input := swarm.NewAddress(make([]byte, swarm.HashSize)) count := 0 - // maxDepth=2 should give 4 items (2^2 for depths 0, 1, 2) - expectedCount := 4 + // maxDepth=2 should give 4 items (2^2 for depths 0, 1, 2) + 1 for the maxDepth bit flipped address + expectedCount := 5 for range combinator.IterateAddressCombinations(input, 2) { count++ From 83e9ac5cbd2dc304ae76f5395ba47a00bc35a6eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jano=C5=A1=20Gulja=C5=A1?= Date: Wed, 12 Nov 2025 12:48:40 +0100 Subject: [PATCH 46/62] chore: update IterateAddressCombinations function comment --- pkg/replicas/combinator/combinator.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/replicas/combinator/combinator.go b/pkg/replicas/combinator/combinator.go index 43940c13249..095f2cf7e25 100644 --- a/pkg/replicas/combinator/combinator.go +++ b/pkg/replicas/combinator/combinator.go @@ -15,6 +15,7 @@ import ( // combinations of an address. The combinations are produced in order of // increasing 'depth', starting from depth 0. This approach allows for // memory-efficient iteration over a large set of combinations. +// The combination with the one flipped bit of the original address will be returned at the end. // // The maxDepth parameter defines the maximum depth of the combination // generation, serving as a safeguard against excessive memory allocation and From 12327545e3abbc1949822cbbba211f88d62720b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jano=C5=A1=20Gulja=C5=A1?= Date: Wed, 12 Nov 2025 14:05:05 +0100 Subject: [PATCH 47/62] fix: pass the replica chunk in socGetter.Get --- pkg/replicas/getter_soc.go | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/pkg/replicas/getter_soc.go b/pkg/replicas/getter_soc.go index a4d0bed037e..2817b1de686 100644 --- a/pkg/replicas/getter_soc.go +++ b/pkg/replicas/getter_soc.go @@ -12,7 +12,6 @@ import ( "fmt" "sync" - "github.com/ethersphere/bee/v2/pkg/cac" "github.com/ethersphere/bee/v2/pkg/file/redundancy" "github.com/ethersphere/bee/v2/pkg/replicas/combinator" "github.com/ethersphere/bee/v2/pkg/storage" @@ -88,16 +87,8 @@ func (g *socGetter) Get(ctx context.Context, addr swarm.Address) (ch swarm.Chunk return } - originalChunk := swarm.NewChunk(addr, ch.Data()) - if !cac.Valid(originalChunk) { - mu.Lock() - errs = errors.Join(errs, fmt.Errorf("validate data at replica address %v: %w", replicaAddr, swarm.ErrInvalidChunk)) - mu.Unlock() - return - } - select { - case resultChan <- originalChunk: + case resultChan <- ch: cancel() case <-ctx.Done(): } From 2fed6d6baf4a5bb7b0da4a4916a6a8d455585f2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jano=C5=A1=20Gulja=C5=A1?= Date: Wed, 12 Nov 2025 14:12:28 +0100 Subject: [PATCH 48/62] feat(soc): more strict disperse replica validation --- pkg/soc/validator.go | 7 +++- pkg/soc/validator_test.go | 73 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 1 deletion(-) diff --git a/pkg/soc/validator.go b/pkg/soc/validator.go index 84b0565ad55..47c9a9f48b4 100644 --- a/pkg/soc/validator.go +++ b/pkg/soc/validator.go @@ -29,7 +29,12 @@ func Valid(ch swarm.Chunk) bool { // if the address does not match the chunk address, check if it is a disperse replica if !ch.Address().Equal(address) { - return bytes.Equal(ch.Address().Bytes()[1:32], address.Bytes()[1:32]) + c := ch.Address().Bytes() + a := address.Bytes() + // for disperse replicas it is allowed to have the first 4 bits of the first + // byte to be different, and the last 4 bits must be equal. + // another case is when only the fifth bit is flipped + return ((c[0]&0x0f == a[0]&0x0f) || (c[0]^a[0] == 0x10)) && bytes.Equal(c[1:], a[1:]) } return true diff --git a/pkg/soc/validator_test.go b/pkg/soc/validator_test.go index 72995d260da..6ba1fc1a34b 100644 --- a/pkg/soc/validator_test.go +++ b/pkg/soc/validator_test.go @@ -182,3 +182,76 @@ func TestInvalid(t *testing.T) { }) } } + +func TestValidDisperseReplicaAddress(t *testing.T) { + t.Parallel() + + privKey, err := crypto.GenerateSecp256k1Key() + if err != nil { + t.Fatal(err) + } + signer := crypto.NewDefaultSigner(privKey) + + payload := []byte("foo") + ch, err := cac.New(payload) + if err != nil { + t.Fatal(err) + } + + id := make([]byte, swarm.HashSize) + s := soc.New(id, ch) + + socCh, err := s.Sign(signer) + if err != nil { + t.Fatal(err) + } + + // original address + originalAddr := socCh.Address().Bytes() + + t.Run("last 4 bits equal", func(t *testing.T) { + // change first 4 bits of first byte + addr := make([]byte, len(originalAddr)) + copy(addr, originalAddr) + addr[0] = (addr[0] & 0x0f) | 0xf0 + + replica := swarm.NewChunk(swarm.NewAddress(addr), socCh.Data()) + if !soc.Valid(replica) { + t.Fatal("replica with last 4 bits equal should be valid") + } + }) + + t.Run("5th bit flipped", func(t *testing.T) { + addr := make([]byte, len(originalAddr)) + copy(addr, originalAddr) + addr[0] ^= 0x10 + + replica := swarm.NewChunk(swarm.NewAddress(addr), socCh.Data()) + if !soc.Valid(replica) { + t.Fatal("replica with 5th bit flipped should be valid") + } + }) + + t.Run("invalid change", func(t *testing.T) { + addr := make([]byte, len(originalAddr)) + copy(addr, originalAddr) + addr[0]++ // change the first byte in a way that is not allowed + + replica := swarm.NewChunk(swarm.NewAddress(addr), socCh.Data()) + if soc.Valid(replica) { + t.Fatal("replica with invalid change should be invalid") + } + }) + + t.Run("invalid change - different last 4 bits", func(t *testing.T) { + addr := make([]byte, len(originalAddr)) + copy(addr, originalAddr) + addr[0] = (addr[0] & 0xf0) | ((addr[0] + 1) & 0x0f) + + replica := swarm.NewChunk(swarm.NewAddress(addr), socCh.Data()) + if soc.Valid(replica) { + t.Fatal("replica with different last 4 bits should be invalid") + } + }) +} + From fec7400491fbb0bcb51eebe83c9b65ce16f62fd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jano=C5=A1=20Gulja=C5=A1?= Date: Wed, 12 Nov 2025 14:18:43 +0100 Subject: [PATCH 49/62] chore: return ErrNotFound from socGetter.Get --- pkg/replicas/getter_soc.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/replicas/getter_soc.go b/pkg/replicas/getter_soc.go index 2817b1de686..a40890e5b5f 100644 --- a/pkg/replicas/getter_soc.go +++ b/pkg/replicas/getter_soc.go @@ -102,9 +102,9 @@ func (g *socGetter) Get(ctx context.Context, addr swarm.Address) (ch swarm.Chunk return ch, nil case <-doneChan: if errs == nil { - return nil, ErrSwarmageddon + return nil, storage.ErrNotFound } - return nil, errors.Join(errs, ErrSwarmageddon) + return nil, errs case <-ctx.Done(): return nil, ctx.Err() } From 15081690f61db282e714fe2435eaf50c9b5521d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jano=C5=A1=20Gulja=C5=A1?= Date: Wed, 12 Nov 2025 17:04:55 +0100 Subject: [PATCH 50/62] fix: adjust combinator not to return original address --- pkg/replicas/combinator/combinator.go | 31 ++- pkg/replicas/combinator/combinator_test.go | 252 +++++++++++++++------ pkg/replicas/getter_soc.go | 16 +- pkg/replicas/putter_soc.go | 23 +- 4 files changed, 231 insertions(+), 91 deletions(-) diff --git a/pkg/replicas/combinator/combinator.go b/pkg/replicas/combinator/combinator.go index 095f2cf7e25..9a280091557 100644 --- a/pkg/replicas/combinator/combinator.go +++ b/pkg/replicas/combinator/combinator.go @@ -11,17 +11,12 @@ import ( "github.com/ethersphere/bee/v2/pkg/swarm" ) -// IterateAddressCombinations returns an iterator (iter.Seq) that yields bit -// combinations of an address. The combinations are produced in order of -// increasing 'depth', starting from depth 0. This approach allows for -// memory-efficient iteration over a large set of combinations. +// IterateReplicaAddresses returns an iterator (iter.Seq) that yields bit +// combinations of an address, starting from depth 1. The original address is +// not returned. This approach allows for memory-efficient iteration over a large +// set of combinations. // The combination with the one flipped bit of the original address will be returned at the end. // -// The maxDepth parameter defines the maximum depth of the combination -// generation, serving as a safeguard against excessive memory allocation and -// computation time. A depth of 24 results in approximately 16.7 million -// combinations. -// // # Performance and Memory Considerations // // For optimal performance, this function yields the same byte slice on each @@ -35,7 +30,7 @@ import ( // // // Safe: A copy of the slice is created and stored. // var allCombinations [][]byte -// for combo := range IterateAddressCombinations(data, 8) { +// for combo := range IterateReplicaAddresses(data, 8) { // allCombinations = append(allCombinations, slices.Clone(combo)) // } // @@ -45,14 +40,14 @@ import ( // // same underlying byte slice, which will hold the value of the last // // combination generated. // var allCombinationsBad [][]byte -// for combo := range IterateAddressCombinations(data, 8) { +// for combo := range IterateReplicaAddresses(data, 8) { // allCombinationsBad = append(allCombinationsBad, combo) // } // // The iterator terminates if the depth exceeds maxDepth or if the input data // slice is not long enough for the bit manipulations required at the next // depth level. -func IterateAddressCombinations(addr swarm.Address, maxDepth int) iter.Seq[swarm.Address] { +func IterateReplicaAddresses(addr swarm.Address, maxDepth int) iter.Seq[swarm.Address] { // State variables for the iterator closure. // A single buffer is used, mutated, and yielded in each iteration. // It is initialized with a copy of the original address data. @@ -68,8 +63,8 @@ func IterateAddressCombinations(addr swarm.Address, maxDepth int) iter.Seq[swarm var prevCombinationIndex int return func(yield func(swarm.Address) bool) { - // combinationIndex iterates through all possible combinations. - for combinationIndex := 0; ; combinationIndex++ { + // combinationIndex iterates through all possible combinations, but skip the original address. + for combinationIndex := 1; ; combinationIndex++ { // When the combinationIndex reaches the next power of two, the depth // of bit combinations is increased for subsequent iterations. if combinationIndex >= nextDepthIndex { @@ -87,6 +82,11 @@ func IterateAddressCombinations(addr swarm.Address, maxDepth int) iter.Seq[swarm // Boundary checks are performed only when the depth changes. if currentDepth > maxDepth { + if maxDepth == 0 { + // Do not return the bit flip address of depth 0, + // because depth 0 should have no replicas. + return + } // Create a new slice based on the original address. originalAddrBytes := addr.Bytes() flippedAddrBytes := make([]byte, len(originalAddrBytes)) @@ -122,8 +122,7 @@ func IterateAddressCombinations(addr swarm.Address, maxDepth int) iter.Seq[swarm } // The generation logic is optimized to flip only the bits that - // differ from the previous combination. For combinationIndex=0, - // (0^0) is 0, so no bits are flipped. For subsequent indices, + // differ from the previous combination. For subsequent indices, // the buffer is XORed with the difference between the current and // previous combination indices. bitsToFlip := combinationIndex ^ prevCombinationIndex diff --git a/pkg/replicas/combinator/combinator_test.go b/pkg/replicas/combinator/combinator_test.go index e9a3f490086..abe90b28bf2 100644 --- a/pkg/replicas/combinator/combinator_test.go +++ b/pkg/replicas/combinator/combinator_test.go @@ -13,27 +13,123 @@ import ( const maxDepth = 8 -func TestIterateAddressCombinationsSeq(t *testing.T) { +func TestIterateReplicaAddressesSeq(t *testing.T) { + t.Run("iterate up to depth 0", func(t *testing.T) { + input := swarm.NewAddress(make([]byte, swarm.HashSize)) + allCombinations := make(map[string]bool) + count := 0 + maxD := 0 + expectedCount := 0 // No addresses should be returned as depth 0 represents no replication. + expected := map[string]bool{} // Not even the maxDepth-bit-flipped address. + + for combo := range combinator.IterateReplicaAddresses(input, maxD) { + comboHex := combo.String() + if allCombinations[comboHex] { + t.Errorf("Duplicate combination found at count %d: %s", count, comboHex) + } + allCombinations[comboHex] = true + count++ + } + + if count != expectedCount { + t.Fatalf("Expected to iterate %d times, got %d", expectedCount, count) + } + if len(allCombinations) != len(expected) { + t.Errorf("Mismatched map sizes. Expected %d, got %d", len(expected), len(allCombinations)) + } + for hexStr := range expected { + if !allCombinations[hexStr] { + t.Errorf("Expected combination %s not found in results", hexStr) + } + } + }) + + t.Run("iterate up to depth 1", func(t *testing.T) { + input := swarm.NewAddress(make([]byte, swarm.HashSize)) + allCombinations := make(map[string]bool) + count := 0 + maxD := 1 + expectedCount := 1 << maxD // 2^1 = 2 items + expected := map[string]bool{ + swarm.NewAddress(append([]byte{0b10000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=1 (depth=1) + swarm.NewAddress(append([]byte{0b01000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // 1st bit flipped + } + + for combo := range combinator.IterateReplicaAddresses(input, maxD) { + comboHex := combo.String() + if allCombinations[comboHex] { + t.Errorf("Duplicate combination found at count %d: %s", count, comboHex) + } + allCombinations[comboHex] = true + count++ + } + + if count != expectedCount { + t.Fatalf("Expected to iterate %d times, got %d", expectedCount, count) + } + if len(allCombinations) != len(expected) { + t.Errorf("Mismatched map sizes. Expected %d, got %d", len(expected), len(allCombinations)) + } + for hexStr := range expected { + if !allCombinations[hexStr] { + t.Errorf("Expected combination %s not found in results", hexStr) + } + } + }) + + t.Run("iterate up to depth 2", func(t *testing.T) { + input := swarm.NewAddress(make([]byte, swarm.HashSize)) + allCombinations := make(map[string]bool) + count := 0 + maxD := 2 + expectedCount := 1 << maxD // 2^2 = 4 items + expected := map[string]bool{ + swarm.NewAddress(append([]byte{0b10000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=1 (depth=1) + swarm.NewAddress(append([]byte{0b01000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=2 (depth=2) + swarm.NewAddress(append([]byte{0b11000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=3 (depth=2) + swarm.NewAddress(append([]byte{0b00100000}, make([]byte, swarm.HashSize-1)...)).String(): true, // 2nd bit flipped + } + + for combo := range combinator.IterateReplicaAddresses(input, maxD) { + comboHex := combo.String() + if allCombinations[comboHex] { + t.Errorf("Duplicate combination found at count %d: %s", count, comboHex) + } + allCombinations[comboHex] = true + count++ + } + + if count != expectedCount { + t.Fatalf("Expected to iterate %d times, got %d", expectedCount, count) + } + if len(allCombinations) != len(expected) { + t.Errorf("Mismatched map sizes. Expected %d, got %d", len(expected), len(allCombinations)) + } + for hexStr := range expected { + if !allCombinations[hexStr] { + t.Errorf("Expected combination %s not found in results", hexStr) + } + } + }) + t.Run("Iterate up to depth=3", func(t *testing.T) { input := swarm.NewAddress(make([]byte, swarm.HashSize)) allCombinations := make(map[string]bool) count := 0 - maxItems := 9 // 2^3 (which covers depth=0, 1, 2, 3) + 1 for the maxDepth+1 bit flipped address - - // These are the 8 combinations we expect for depth=3 - expected := addressesToHexMap([]swarm.Address{ - swarm.NewAddress(append([]byte{0b00000000}, make([]byte, swarm.HashSize-1)...)), // i=0 (depth=0) - swarm.NewAddress(append([]byte{0b10000000}, make([]byte, swarm.HashSize-1)...)), // i=1 (depth=1) - swarm.NewAddress(append([]byte{0b01000000}, make([]byte, swarm.HashSize-1)...)), // i=2 (depth=2) - swarm.NewAddress(append([]byte{0b11000000}, make([]byte, swarm.HashSize-1)...)), // i=3 (depth=2) - swarm.NewAddress(append([]byte{0b00100000}, make([]byte, swarm.HashSize-1)...)), // i=4 (depth=3) - swarm.NewAddress(append([]byte{0b10100000}, make([]byte, swarm.HashSize-1)...)), // i=5 (depth=3) - swarm.NewAddress(append([]byte{0b01100000}, make([]byte, swarm.HashSize-1)...)), // i=6 (depth=3) - swarm.NewAddress(append([]byte{0b11100000}, make([]byte, swarm.HashSize-1)...)), // i=7 (depth=3) - swarm.NewAddress(append([]byte{0b00010000}, make([]byte, swarm.HashSize-1)...)), // i=8 (depth=3) - }) - - for combo := range combinator.IterateAddressCombinations(input, 3) { + maxD := 3 + expectedCount := 1 << maxD // 2^3 = 8 items + expected := map[string]bool{ + swarm.NewAddress(append([]byte{0b10000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=1 (depth=1) + swarm.NewAddress(append([]byte{0b01000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=2 (depth=2) + swarm.NewAddress(append([]byte{0b11000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=3 (depth=2) + swarm.NewAddress(append([]byte{0b00100000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=4 (depth=3) + swarm.NewAddress(append([]byte{0b10100000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=5 (depth=3) + swarm.NewAddress(append([]byte{0b01100000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=6 (depth=3) + swarm.NewAddress(append([]byte{0b11100000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=7 (depth=3) + swarm.NewAddress(append([]byte{0b00010000}, make([]byte, swarm.HashSize-1)...)).String(): true, // 3rd bit flipped + } + + for combo := range combinator.IterateReplicaAddresses(input, maxD) { comboHex := combo.String() if allCombinations[comboHex] { t.Errorf("Duplicate combination found at count %d: %s", count, comboHex) @@ -42,11 +138,58 @@ func TestIterateAddressCombinationsSeq(t *testing.T) { count++ } - if count != maxItems { - t.Fatalf("Expected to iterate %d times, got %d", maxItems, count) + if count != expectedCount { + t.Fatalf("Expected to iterate %d times, got %d", expectedCount, count) } - // Check that the 8 items we got are the 8 we expected + // Check that the items we got are the ones we expected + if len(allCombinations) != len(expected) { + t.Errorf("Mismatched map sizes. Expected %d, got %d", len(expected), len(allCombinations)) + } + for hexStr := range expected { + if !allCombinations[hexStr] { + t.Errorf("Expected combination %s not found in results", hexStr) + } + } + }) + + t.Run("iterate up to depth 4", func(t *testing.T) { + input := swarm.NewAddress(make([]byte, swarm.HashSize)) + allCombinations := make(map[string]bool) + count := 0 + maxD := 4 + expectedCount := 1 << maxD // 2^4 = 16 items + expected := map[string]bool{ + swarm.NewAddress(append([]byte{0b10000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=1 (depth=1) + swarm.NewAddress(append([]byte{0b01000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=2 (depth=2) + swarm.NewAddress(append([]byte{0b11000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=3 (depth=2) + swarm.NewAddress(append([]byte{0b00100000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=4 (depth=3) + swarm.NewAddress(append([]byte{0b10100000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=5 (depth=3) + swarm.NewAddress(append([]byte{0b01100000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=6 (depth=3) + swarm.NewAddress(append([]byte{0b11100000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=7 (depth=3) + swarm.NewAddress(append([]byte{0b00010000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=8 (depth=4) + swarm.NewAddress(append([]byte{0b10010000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=9 (depth=4) + swarm.NewAddress(append([]byte{0b01010000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=10 (depth=4) + swarm.NewAddress(append([]byte{0b11010000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=11 (depth=4) + swarm.NewAddress(append([]byte{0b00110000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=12 (depth=4) + swarm.NewAddress(append([]byte{0b10110000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=13 (depth=4) + swarm.NewAddress(append([]byte{0b01110000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=14 (depth=4) + swarm.NewAddress(append([]byte{0b11110000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=15 (depth=4) + swarm.NewAddress(append([]byte{0b00001000}, make([]byte, swarm.HashSize-1)...)).String(): true, // 4th bit flipped + } + + for combo := range combinator.IterateReplicaAddresses(input, maxD) { + comboHex := combo.String() + if allCombinations[comboHex] { + t.Errorf("Duplicate combination found at count %d: %s", count, comboHex) + } + allCombinations[comboHex] = true + count++ + } + + if count != expectedCount { + t.Fatalf("Expected to iterate %d times, got %d", expectedCount, count) + } if len(allCombinations) != len(expected) { t.Errorf("Mismatched map sizes. Expected %d, got %d", len(expected), len(allCombinations)) } @@ -60,10 +203,10 @@ func TestIterateAddressCombinationsSeq(t *testing.T) { t.Run("maxDepth limits iteration", func(t *testing.T) { input := swarm.NewAddress(make([]byte, swarm.HashSize)) count := 0 - // maxDepth=2 should give 4 items (2^2 for depths 0, 1, 2) + 1 for the maxDepth bit flipped address - expectedCount := 5 + // maxDepth=2 should give 3 items (2^2-1 for depths 1, 2) + 1 for the maxDepth bit flipped address + expectedCount := 4 - for range combinator.IterateAddressCombinations(input, 2) { + for range combinator.IterateReplicaAddresses(input, 2) { count++ } @@ -74,16 +217,16 @@ func TestIterateAddressCombinationsSeq(t *testing.T) { t.Run("Iterator stops correctly at end of byte slice", func(t *testing.T) { // 1 byte = 8 bits. - // Iterator should produce 2^8 = 256 items (for depth=0 through depth=8). + // Iterator should produce 2^8-1 = 255 items (for depth=1 through depth=8). // The 257th item (i=256) would require depth=9, // which needs 2 bytes. The iterator should stop there. input := swarm.NewAddress([]byte{0xDE}) // 1 byte - expectedCount := 1 << 8 // 256 + expectedCount := (1 << 8) - 1 // 255 count := 0 allCombinations := make(map[string]bool) - for combo := range combinator.IterateAddressCombinations(input, maxDepth) { + for combo := range combinator.IterateReplicaAddresses(input, maxDepth) { // Just in case, prevent infinite loop in test if count > expectedCount { t.Fatalf("Iterator produced more than %d items, count=%d", expectedCount, count) @@ -103,27 +246,18 @@ func TestIterateAddressCombinationsSeq(t *testing.T) { }) t.Run("depth=0 edge case (nil slice)", func(t *testing.T) { - // depth=0 is i=0. This needs 0 bytes, which a nil slice has. - // The *next* item, i=1, needs depth=1, which needs 1 byte. + // The iterator starts at i=1, which needs depth=1, which needs 1 byte. // A nil slice fails this. - // So, this should iterate *exactly once*. + // So, this should iterate *exactly zero times*. var input swarm.Address count := 0 - var firstCombo swarm.Address - for combo := range combinator.IterateAddressCombinations(input, maxDepth) { - if count == 0 { - firstCombo = combo - } + for range combinator.IterateReplicaAddresses(input, maxDepth) { count++ } - if count != 1 { - t.Fatalf("Expected exactly 1 item (depth=0) for nil slice, got %d", count) - } - // A copy of a nil slice is a non-nil, zero-length slice - if len(firstCombo.Bytes()) != 0 { - t.Errorf("Expected first item to be empty slice, got %x", firstCombo.Bytes()) + if count != 0 { + t.Fatalf("Expected exactly 0 items for nil slice, got %d", count) } }) @@ -132,7 +266,7 @@ func TestIterateAddressCombinationsSeq(t *testing.T) { count := 0 stopAt := 5 - seq := combinator.IterateAddressCombinations(input, maxDepth) + seq := combinator.IterateReplicaAddresses(input, maxDepth) for range seq { count++ if count == stopAt { @@ -150,24 +284,19 @@ func TestIterateAddressCombinationsSeq(t *testing.T) { var benchAddress = swarm.NewAddress(append([]byte{0xDE, 0xAD, 0xBE, 0xEF}, make([]byte, swarm.HashSize-4)...)) -// runBenchmark is a helper to run the iterator for a fixed number of items. -func runBenchmark(b *testing.B, items int) { +// runBenchmark is a helper to run the iterator for a fixed depth. +func runBenchmark(b *testing.B, depth int) { b.Helper() // We run the loop b.N times, as required by the benchmark harness. for b.Loop() { - count := 0 // We use a volatile variable to ensure the loop body // (the slice generation) isn't optimized away. var volatileAddr swarm.Address - seq := combinator.IterateAddressCombinations(benchAddress, maxDepth) + seq := combinator.IterateReplicaAddresses(benchAddress, depth) for combo := range seq { volatileAddr = combo - count++ - if count == items { - break - } } // To prevent compiler optimizing out the loop if volatileAddr isn't used. @@ -181,49 +310,40 @@ func runBenchmark(b *testing.B, items int) { // BenchmarkDepth1 iterates over 2^1 = 2 items func BenchmarkDepth1(b *testing.B) { - runBenchmark(b, 1<<1) + runBenchmark(b, 1) } // BenchmarkDepth2 iterates over 2^2 = 4 items func BenchmarkDepth2(b *testing.B) { - runBenchmark(b, 1<<2) + runBenchmark(b, 2) } // BenchmarkDepth3 iterates over 2^3 = 8 items func BenchmarkDepth3(b *testing.B) { - runBenchmark(b, 1<<3) + runBenchmark(b, 3) } // BenchmarkDepth4 iterates over 2^4 = 16 items func BenchmarkDepth4(b *testing.B) { - runBenchmark(b, 1<<4) + runBenchmark(b, 4) } // BenchmarkDepth8 iterates over 2^8 = 256 items func BenchmarkDepth8(b *testing.B) { - runBenchmark(b, 1<<8) + runBenchmark(b, 8) } // BenchmarkDepth12 iterates over 2^12 = 4096 items func BenchmarkDepth12(b *testing.B) { - runBenchmark(b, 1<<12) + runBenchmark(b, 12) } // BenchmarkDepth16 iterates over 2^16 = 65536 items func BenchmarkDepth16(b *testing.B) { - runBenchmark(b, 1<<16) + runBenchmark(b, 16) } // BenchmarkDepth20 iterates over 2^20 = 1,048,576 items func BenchmarkDepth20(b *testing.B) { - runBenchmark(b, 1<<20) -} - -// addressesToHexMap is a helper to convert a slice of addresses to a map of hex strings. -func addressesToHexMap(addresses []swarm.Address) map[string]bool { - set := make(map[string]bool, len(addresses)) - for _, s := range addresses { - set[s.String()] = true - } - return set + runBenchmark(b, 20) } diff --git a/pkg/replicas/getter_soc.go b/pkg/replicas/getter_soc.go index a40890e5b5f..faa033db192 100644 --- a/pkg/replicas/getter_soc.go +++ b/pkg/replicas/getter_soc.go @@ -35,7 +35,10 @@ type socGetter struct { // NewSocGetter is the getter constructor func NewSocGetter(g storage.Getter, level redundancy.Level) storage.Getter { - return &socGetter{Getter: g, level: level} + return &socGetter{ + Getter: g, + level: min(level, maxRedundancyLevel), + } } const socGetterConcurrency = 4 @@ -52,11 +55,20 @@ func (g *socGetter) Get(ctx context.Context, addr swarm.Address) (ch swarm.Chunk wg sync.WaitGroup ) + // First try to get the original chunk. + ch, err = g.Getter.Get(ctx, addr) + if err != nil { + errs = errors.Join(errs, fmt.Errorf("get chunk original address %v: %w", addr, err)) + } else { + return ch, nil + } + + // Try to retrieve replicas. ctx, cancel := context.WithCancel(ctx) defer cancel() sem := semaphore.NewWeighted(socGetterConcurrency) - replicaIter := combinator.IterateAddressCombinations(addr, int(g.level)) + replicaIter := combinator.IterateReplicaAddresses(addr, int(g.level)) resultChan := make(chan swarm.Chunk, 1) doneChan := make(chan struct{}) diff --git a/pkg/replicas/putter_soc.go b/pkg/replicas/putter_soc.go index 8ad00365e1a..4b5af7284f8 100644 --- a/pkg/replicas/putter_soc.go +++ b/pkg/replicas/putter_soc.go @@ -17,29 +17,38 @@ import ( "github.com/ethersphere/bee/v2/pkg/swarm" ) +// maxRedundancyLevel ensures that no more than 2^4 = 16 replicas are generated +const maxRedundancyLevel = 4 + // socPutter is the private implementation of the public storage.Putter interface // socPutter extends the original putter to a concurrent multiputter type socPutter struct { putter storage.Putter - rLevel redundancy.Level + level redundancy.Level } // NewSocPutter is the putter constructor -func NewSocPutter(p storage.Putter, rLevel redundancy.Level) storage.Putter { +func NewSocPutter(p storage.Putter, level redundancy.Level) storage.Putter { return &socPutter{ putter: p, - rLevel: rLevel, + level: min(level, maxRedundancyLevel), } } // Put makes the putter satisfy the storage.Putter interface func (p *socPutter) Put(ctx context.Context, ch swarm.Chunk) error { + // Put original chunk. + if err := p.putter.Put(ctx, ch); err != nil { + return err + } + var errs error - for replicaAddr := range combinator.IterateAddressCombinations(ch.Address(), int(p.rLevel)) { - sch := swarm.NewChunk(replicaAddr, ch.Data()) + // Put replicas. + for replicaAddr := range combinator.IterateReplicaAddresses(ch.Address(), int(p.level)) { + ch := swarm.NewChunk(replicaAddr, ch.Data()) - if err := p.putter.Put(ctx, sch); err != nil { + if err := p.putter.Put(ctx, ch); err != nil { errs = errors.Join(errs, err) } } @@ -58,7 +67,7 @@ func NewSocPutterSession(p storer.PutterSession, rLevel redundancy.Level) storer return &socPutterSession{ socPutter{ putter: p, - rLevel: rLevel, + level: rLevel, }, p, } } From 7aa70e4d233d1b99648bdd2961c416b60c93589b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jano=C5=A1=20Gulja=C5=A1?= Date: Wed, 12 Nov 2025 17:32:40 +0100 Subject: [PATCH 51/62] fix: safe addresses from combinator --- pkg/replicas/combinator/combinator.go | 35 ++++++--------------------- 1 file changed, 7 insertions(+), 28 deletions(-) diff --git a/pkg/replicas/combinator/combinator.go b/pkg/replicas/combinator/combinator.go index 9a280091557..7671b9cb4f7 100644 --- a/pkg/replicas/combinator/combinator.go +++ b/pkg/replicas/combinator/combinator.go @@ -19,37 +19,16 @@ import ( // // # Performance and Memory Considerations // -// For optimal performance, this function yields the same byte slice on each -// iteration, modifying its content in place. This avoids memory allocations -// within the loop. -// -// Consequently, it is unsafe to retain a reference to the yielded slice after -// the loop advances. If the slice needs to be stored, a copy must be created. -// -// Example of correct usage: -// -// // Safe: A copy of the slice is created and stored. -// var allCombinations [][]byte -// for combo := range IterateReplicaAddresses(data, 8) { -// allCombinations = append(allCombinations, slices.Clone(combo)) -// } -// -// Example of incorrect usage: -// -// // Unsafe: This will result in a slice where all elements point to the -// // same underlying byte slice, which will hold the value of the last -// // combination generated. -// var allCombinationsBad [][]byte -// for combo := range IterateReplicaAddresses(data, 8) { -// allCombinationsBad = append(allCombinationsBad, combo) -// } +// To ensure safe use of the yielded addresses, this function returns a new copy +// of the address on each iteration. This prevents accidental modification of +// previously yielded addresses. // // The iterator terminates if the depth exceeds maxDepth or if the input data // slice is not long enough for the bit manipulations required at the next // depth level. func IterateReplicaAddresses(addr swarm.Address, maxDepth int) iter.Seq[swarm.Address] { // State variables for the iterator closure. - // A single buffer is used, mutated, and yielded in each iteration. + // A single buffer is used and mutated in each iteration, and a copy is yielded. // It is initialized with a copy of the original address data. currentSlice := append([]byte{}, addr.Bytes()...) @@ -138,9 +117,9 @@ func IterateReplicaAddresses(addr swarm.Address, maxDepth int) iter.Seq[swarm.Ad } prevCombinationIndex = combinationIndex // Update for the next iteration. - // Yield the mutated slice. If yield returns false, the consumer - // has requested to stop the iteration. - if !yield(swarm.NewAddress(currentSlice)) { + // Yield a copy of the mutated slice. If yield returns false, the + // consumer has requested to stop the iteration. + if !yield(swarm.NewAddress(append([]byte(nil), currentSlice...))) { return // Consumer-requested stop. } From 5db08000e9a3eb6478d58b97bf7ce3c97cbbfa2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jano=C5=A1=20Gulja=C5=A1?= Date: Wed, 12 Nov 2025 17:47:59 +0100 Subject: [PATCH 52/62] fix(soc): correclty check the 5th bit flipping --- pkg/soc/validator.go | 6 +++--- pkg/soc/validator_test.go | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pkg/soc/validator.go b/pkg/soc/validator.go index 47c9a9f48b4..0b478d5e71a 100644 --- a/pkg/soc/validator.go +++ b/pkg/soc/validator.go @@ -31,10 +31,10 @@ func Valid(ch swarm.Chunk) bool { if !ch.Address().Equal(address) { c := ch.Address().Bytes() a := address.Bytes() - // for disperse replicas it is allowed to have the first 4 bits of the first + // For disperse replicas it is allowed to have the first 4 bits of the first // byte to be different, and the last 4 bits must be equal. - // another case is when only the fifth bit is flipped - return ((c[0]&0x0f == a[0]&0x0f) || (c[0]^a[0] == 0x10)) && bytes.Equal(c[1:], a[1:]) + // Another case is when only the fifth bit from the left is flipped. + return ((c[0]&0x0f == a[0]&0x0f) || (c[0]^a[0] == 1<<3)) && bytes.Equal(c[1:], a[1:]) } return true diff --git a/pkg/soc/validator_test.go b/pkg/soc/validator_test.go index 6ba1fc1a34b..c900da740e0 100644 --- a/pkg/soc/validator_test.go +++ b/pkg/soc/validator_test.go @@ -224,7 +224,7 @@ func TestValidDisperseReplicaAddress(t *testing.T) { t.Run("5th bit flipped", func(t *testing.T) { addr := make([]byte, len(originalAddr)) copy(addr, originalAddr) - addr[0] ^= 0x10 + addr[0] ^= 1 << 3 // flip 5th bit from the left replica := swarm.NewChunk(swarm.NewAddress(addr), socCh.Data()) if !soc.Valid(replica) { @@ -254,4 +254,3 @@ func TestValidDisperseReplicaAddress(t *testing.T) { } }) } - From 3b6452bd814f74c9949432b4d8292c5d4e9e1736 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jano=C5=A1=20Gulja=C5=A1?= Date: Wed, 12 Nov 2025 18:12:28 +0100 Subject: [PATCH 53/62] chore: adjust comments in tests --- pkg/replicas/combinator/combinator_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/replicas/combinator/combinator_test.go b/pkg/replicas/combinator/combinator_test.go index abe90b28bf2..41cef80b372 100644 --- a/pkg/replicas/combinator/combinator_test.go +++ b/pkg/replicas/combinator/combinator_test.go @@ -52,7 +52,7 @@ func TestIterateReplicaAddressesSeq(t *testing.T) { expectedCount := 1 << maxD // 2^1 = 2 items expected := map[string]bool{ swarm.NewAddress(append([]byte{0b10000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=1 (depth=1) - swarm.NewAddress(append([]byte{0b01000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // 1st bit flipped + swarm.NewAddress(append([]byte{0b01000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // 2nd bit flipped } for combo := range combinator.IterateReplicaAddresses(input, maxD) { @@ -87,7 +87,7 @@ func TestIterateReplicaAddressesSeq(t *testing.T) { swarm.NewAddress(append([]byte{0b10000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=1 (depth=1) swarm.NewAddress(append([]byte{0b01000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=2 (depth=2) swarm.NewAddress(append([]byte{0b11000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=3 (depth=2) - swarm.NewAddress(append([]byte{0b00100000}, make([]byte, swarm.HashSize-1)...)).String(): true, // 2nd bit flipped + swarm.NewAddress(append([]byte{0b00100000}, make([]byte, swarm.HashSize-1)...)).String(): true, // 3rd bit flipped } for combo := range combinator.IterateReplicaAddresses(input, maxD) { @@ -126,7 +126,7 @@ func TestIterateReplicaAddressesSeq(t *testing.T) { swarm.NewAddress(append([]byte{0b10100000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=5 (depth=3) swarm.NewAddress(append([]byte{0b01100000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=6 (depth=3) swarm.NewAddress(append([]byte{0b11100000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=7 (depth=3) - swarm.NewAddress(append([]byte{0b00010000}, make([]byte, swarm.HashSize-1)...)).String(): true, // 3rd bit flipped + swarm.NewAddress(append([]byte{0b00010000}, make([]byte, swarm.HashSize-1)...)).String(): true, // 4th bit flipped } for combo := range combinator.IterateReplicaAddresses(input, maxD) { @@ -175,7 +175,7 @@ func TestIterateReplicaAddressesSeq(t *testing.T) { swarm.NewAddress(append([]byte{0b10110000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=13 (depth=4) swarm.NewAddress(append([]byte{0b01110000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=14 (depth=4) swarm.NewAddress(append([]byte{0b11110000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=15 (depth=4) - swarm.NewAddress(append([]byte{0b00001000}, make([]byte, swarm.HashSize-1)...)).String(): true, // 4th bit flipped + swarm.NewAddress(append([]byte{0b00001000}, make([]byte, swarm.HashSize-1)...)).String(): true, // 5th bit flipped } for combo := range combinator.IterateReplicaAddresses(input, maxD) { From 77fa1e3cdd2314ba20688a0561392029c4cf96ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jano=C5=A1=20Gulja=C5=A1?= Date: Wed, 12 Nov 2025 19:21:01 +0100 Subject: [PATCH 54/62] feat: new replicas soc getter tests --- pkg/replicas/getter_soc_test.go | 513 +++++++++++++++++++++----------- 1 file changed, 341 insertions(+), 172 deletions(-) diff --git a/pkg/replicas/getter_soc_test.go b/pkg/replicas/getter_soc_test.go index 84428bbe18c..4a8962bce33 100644 --- a/pkg/replicas/getter_soc_test.go +++ b/pkg/replicas/getter_soc_test.go @@ -2,215 +2,384 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// This file is a copy of the original getter_test.go file -// and tailored to socGetter implementation. - package replicas_test import ( "context" - "crypto/rand" "errors" - "fmt" - "io" + "sync" "testing" + "testing/synctest" "time" - "github.com/ethersphere/bee/v2/pkg/cac" - "github.com/ethersphere/bee/v2/pkg/crypto" "github.com/ethersphere/bee/v2/pkg/file/redundancy" "github.com/ethersphere/bee/v2/pkg/replicas" - "github.com/ethersphere/bee/v2/pkg/soc" + "github.com/ethersphere/bee/v2/pkg/replicas/combinator" "github.com/ethersphere/bee/v2/pkg/storage" + "github.com/ethersphere/bee/v2/pkg/storage/inmemchunkstore" + "github.com/ethersphere/bee/v2/pkg/swarm" ) -func TestSOCGetter(t *testing.T) { +func TestSocGetter(t *testing.T) { t.Parallel() - // failure is a struct that defines a failure scenario to test - type failure struct { - name string - err error - errf func(int, int) func(int) chan struct{} - } - // failures is a list of failure scenarios to test - failures := []failure{ - { - "timeout", - context.Canceled, - func(_, _ int) func(i int) chan struct{} { - return func(i int) chan struct{} { - return nil - } - }, - }, - { - "not found", - storage.ErrNotFound, - func(_, _ int) func(i int) chan struct{} { - c := make(chan struct{}) - close(c) - return func(i int) chan struct{} { - return c - } - }, - }, + + var ( + chunk = swarm.NewChunk(swarm.NewAddress(make([]byte, 32)), make([]byte, 32)) + chunkAddr = chunk.Address() + mock = &mockGetter{ + getter: inmemchunkstore.New(), + } + socPutter = replicas.NewSocPutter(mock.getter, redundancy.MEDIUM) + getter = replicas.NewSocGetter(mock, redundancy.MEDIUM) + ) + + t.Run("happy path", func(t *testing.T) { + if err := socPutter.Put(context.Background(), chunk); err != nil { + t.Fatal(err) + } + got, err := getter.Get(context.Background(), chunkAddr) + if err != nil { + t.Fatalf("got error %v", err) + } + if !got.Equal(chunk) { + t.Fatalf("got chunk %v, want %v", got, chunk) + } + }) + + t.Run("not found", func(t *testing.T) { + _, err := getter.Get(context.Background(), swarm.RandAddress(t)) + if !errors.Is(err, storage.ErrNotFound) { + t.Fatalf("got error %v, want %v", err, storage.ErrNotFound) + } + }) +} + +func TestSocGetter_ReplicaFound(t *testing.T) { + t.Parallel() + + var ( + chunk = swarm.NewChunk(swarm.NewAddress(make([]byte, 32)), make([]byte, 32)) + chunkAddr = chunk.Address() + mock = &mockGetter{ + getter: inmemchunkstore.New(), + } + socPutter = replicas.NewSocPutter(mock.getter, redundancy.MEDIUM) + getter = replicas.NewSocGetter(mock, redundancy.MEDIUM) + ) + + var replicaChunk swarm.Chunk + replicaIter := combinator.IterateReplicaAddresses(chunkAddr, int(redundancy.MEDIUM)) + for replicaAddr := range replicaIter { + replicaChunk = swarm.NewChunk(replicaAddr, chunk.Data()) + if err := socPutter.Put(context.Background(), replicaChunk); err != nil { + t.Fatal(err) + } + break + } + + got, err := getter.Get(context.Background(), chunkAddr) + if err != nil { + t.Fatalf("got error %v", err) } - type test struct { - name string - failure failure - level int - count int - found int - } - - var tests []test - for _, f := range failures { - for level, c := range redundancy.GetReplicaCounts() { - for j := 0; j <= c*2+1; j++ { - tests = append(tests, test{ - name: fmt.Sprintf("%s level %d count %d found %d", f.name, level, c, j), - failure: f, - level: level, - count: c, - found: j, - }) + if !got.Equal(chunk) { + t.Fatalf("got chunk %v, want %v", got, chunk) + } +} + +func TestSocGetter_MultipleReplicasFound(t *testing.T) { + t.Parallel() + + var ( + chunk = swarm.NewChunk(swarm.NewAddress(make([]byte, 32)), make([]byte, 32)) + chunkAddr = chunk.Address() + mock = &mockGetter{ + getter: inmemchunkstore.New(), + } + socPutter = replicas.NewSocPutter(mock.getter, redundancy.MEDIUM) + getter = replicas.NewSocGetter(mock, redundancy.MEDIUM) + ) + + replicaIter := combinator.IterateReplicaAddresses(chunkAddr, int(redundancy.MEDIUM)) + var replicaChunk1, replicaChunk2 swarm.Chunk + i := 0 + for replicaAddr := range replicaIter { + if i == 0 { + replicaChunk1 = swarm.NewChunk(replicaAddr, chunk.Data()) + if err := socPutter.Put(context.Background(), replicaChunk1); err != nil { + t.Fatal(err) + } + } else { + replicaChunk2 = swarm.NewChunk(replicaAddr, chunk.Data()) + if err := socPutter.Put(context.Background(), replicaChunk2); err != nil { + t.Fatal(err) } + break } + i++ } - // initialise the base chunk - chunkLen := 420 - buf := make([]byte, chunkLen) - if _, err := io.ReadFull(rand.Reader, buf); err != nil { - t.Fatal(err) - } - ch, err := cac.New(buf) + got, err := getter.Get(context.Background(), chunkAddr) if err != nil { - t.Fatal(err) + t.Fatalf("got error %v", err) } - // create soc from cac - // test key to sign soc chunks - privKey, err := crypto.GenerateSecp256k1Key() - if err != nil { - t.Fatal(err) + + if !got.Equal(chunk) { + t.Fatalf("got unexpected chunk %v, want %v", got, chunk) } - signer := crypto.NewDefaultSigner(privKey) - id := make([]byte, 32) - if _, err := rand.Read(id); err != nil { - t.Fatal(err) +} + +func TestSocGetter_ContextCanceled(t *testing.T) { + t.Parallel() + + var ( + chunkAddr = swarm.RandAddress(t) + mock = &mockGetterWithDelay{ + getter: inmemchunkstore.New(), + } + getter = replicas.NewSocGetter(mock, redundancy.MEDIUM) + ) + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + _, err := getter.Get(ctx, chunkAddr) + if !errors.Is(err, context.Canceled) { + t.Fatalf("got error %v, want %v", err, context.Canceled) } - s := soc.New(id, ch) - ch, err = s.Sign(signer) - if err != nil { - t.Fatal(err) +} + +func TestSocGetter_DeadlineExceeded(t *testing.T) { + t.Parallel() + + synctest.Test(t, func(t *testing.T) { + t.Helper() + + var ( + chunkAddr = swarm.RandAddress(t) + mock = &mockGetterWithDelay{ + getter: inmemchunkstore.New(), + getDelay: 100 * time.Millisecond, + } + getter = replicas.NewSocGetter(mock, redundancy.MEDIUM) + ) + + ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond) + defer cancel() + + _, err := getter.Get(ctx, chunkAddr) + if !errors.Is(err, context.DeadlineExceeded) { + t.Fatalf("got error %v, want %v", err, context.DeadlineExceeded) + } + }) +} + +func TestSocGetter_AllReplicasFail(t *testing.T) { + t.Parallel() + + var ( + chunkAddr = swarm.RandAddress(t) + mock = &mockGetter{ + getter: inmemchunkstore.New(), + err: errors.New("some error"), + } + getter = replicas.NewSocGetter(mock, redundancy.MEDIUM) + ) + + _, err := getter.Get(context.Background(), chunkAddr) + if err == nil { + t.Fatal("expected error, got nil") } +} + +func TestSocGetter_PartialReplicaFailure(t *testing.T) { + t.Parallel() + + var ( + chunk = swarm.NewChunk(swarm.NewAddress(make([]byte, 32)), make([]byte, 32)) + chunkAddr = chunk.Address() + mock = &failingMockGetter{ + getter: inmemchunkstore.New(), + failAddrs: make(map[string]struct{}), + } + socPutter = replicas.NewSocPutter(mock.getter, redundancy.MEDIUM) + getter = replicas.NewSocGetter(mock, redundancy.MEDIUM) + ) - // reset retry interval to speed up tests - retryInterval := replicas.RetryInterval - defer func() { replicas.RetryInterval = retryInterval }() - replicas.RetryInterval = 100 * time.Millisecond + replicaIter := combinator.IterateReplicaAddresses(chunkAddr, int(redundancy.MEDIUM)) - // run the tests - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - // initiate a chunk retrieval session using replicas.Getter - // embedding a testGetter that simulates the behaviour of a chunk store - store := newTestGetter(ch, tc.found, tc.failure.errf(tc.found, tc.count)) - g := replicas.NewSocGetter(store, redundancy.Level(tc.level)) - store.now = time.Now() - ctx, cancel := context.WithCancel(context.Background()) - if tc.found > tc.count { - wait := replicas.RetryInterval / 2 * time.Duration(1+2*tc.level) - go func() { - time.Sleep(wait) - cancel() - }() + i := 0 + var successChunk swarm.Chunk + for addr := range replicaIter { + switch i { + case 0: + // First replica will fail + mock.failAddrs[addr.String()] = struct{}{} + case 1: + // Second replica will succeed + successChunk = swarm.NewChunk(addr, chunk.Data()) + if err := socPutter.Put(context.Background(), successChunk); err != nil { + t.Fatal(err) } - _, err := g.Get(ctx, ch.Address()) - cancel() + default: + // Make other replicas fail + mock.failAddrs[addr.String()] = struct{}{} + } + i++ + } - // test the returned error - if tc.found <= tc.count { - if err != nil { - t.Fatalf("expected no error. got %v", err) - } - // if j <= c, the original chunk should be retrieved and the context should be cancelled - t.Run("retrievals cancelled", func(t *testing.T) { - select { - case <-time.After(100 * time.Millisecond): - t.Fatal("timed out waiting for context to be cancelled") - case <-store.cancelled: - } - }) + got, err := getter.Get(context.Background(), chunkAddr) + if err != nil { + t.Fatalf("got error %v", err) + } + if !got.Equal(chunk) { + t.Fatalf("got chunk %v, want %v", got, chunk) + } +} - } else { - if err == nil { - t.Fatalf("expected error. got ") +func TestSocGetter_DifferentRedundancyLevel(t *testing.T) { + t.Parallel() + + testCases := []struct { + name string + uploadRedundancyLevel redundancy.Level + retrieveRedundancyLevel redundancy.Level + }{ + { + name: "upload PARANOID, retrieve MEDIUM", + uploadRedundancyLevel: redundancy.PARANOID, + retrieveRedundancyLevel: redundancy.MEDIUM, + }, + { + name: "upload PARANOID, retrieve STRONG", + uploadRedundancyLevel: redundancy.PARANOID, + retrieveRedundancyLevel: redundancy.STRONG, + }, + { + name: "upload STRONG, retrieve MEDIUM", + uploadRedundancyLevel: redundancy.STRONG, + retrieveRedundancyLevel: redundancy.MEDIUM, + }, + { + name: "upload MEDIUM, retrieve MEDIUM", + uploadRedundancyLevel: redundancy.MEDIUM, + retrieveRedundancyLevel: redundancy.MEDIUM, + }, + { + name: "upload INSANE, retrieve MEDIUM", + uploadRedundancyLevel: redundancy.INSANE, + retrieveRedundancyLevel: redundancy.MEDIUM, + }, + { + name: "upload INSANE, retrieve PARANOID", + uploadRedundancyLevel: redundancy.INSANE, + retrieveRedundancyLevel: redundancy.PARANOID, + }, + { + name: "upload NONE, retrieve MEDIUM", + uploadRedundancyLevel: redundancy.NONE, + retrieveRedundancyLevel: redundancy.MEDIUM, + }, + { + name: "upload MEDIUM, retrieve NONE", + uploadRedundancyLevel: redundancy.MEDIUM, + retrieveRedundancyLevel: redundancy.NONE, + }, + { + name: "upload MEDIUM, retrieve STRONG (should still find if replica exists)", + uploadRedundancyLevel: redundancy.MEDIUM, + retrieveRedundancyLevel: redundancy.STRONG, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + var ( + chunk = swarm.NewChunk(swarm.NewAddress(make([]byte, 32)), make([]byte, 32)) + chunkAddr = chunk.Address() + mock = &mockGetter{ + getter: inmemchunkstore.New(), } + ) - t.Run("returns correct error", func(t *testing.T) { - if !errors.Is(err, replicas.ErrSwarmageddon) { - t.Fatalf("incorrect error. want Swarmageddon. got %v", err) - } - if !errors.Is(err, tc.failure.err) { - t.Fatalf("incorrect error. want it to wrap %v. got %v", tc.failure.err, err) - } - }) + // Use socPutter to put the original chunk and its replicas + putter := replicas.NewSocPutter(mock.getter, tc.uploadRedundancyLevel) + err := putter.Put(context.Background(), chunk) + if err != nil { + t.Fatalf("socPutter.Put failed: %v", err) } - attempts := int(store.attempts.Load()) - // the original chunk should be among those attempted for retrieval - addresses := store.addresses[:attempts] - latencies := store.latencies[:attempts] - t.Run("original address called", func(t *testing.T) { - select { - case <-time.After(100 * time.Millisecond): - t.Fatal("timed out waiting form original address to be attempted for retrieval") - case <-store.origCalled: - i := store.origIndex - if i > 2 { - t.Fatalf("original address called too late. want at most 2 (preceding attempts). got %v (latency: %v)", i, latencies[i]) - } - addresses = append(addresses[:i], addresses[i+1:]...) - latencies = append(latencies[:i], latencies[i+1:]...) - attempts-- - } - }) + getter := replicas.NewSocGetter(mock, tc.retrieveRedundancyLevel) - t.Run("retrieved count", func(t *testing.T) { - if attempts > tc.count { - t.Fatalf("too many attempts to retrieve a replica: want at most %v. got %v.", tc.count, attempts) - } - if tc.found > tc.count { - if attempts < tc.count { - t.Fatalf("too few attempts to retrieve a replica: want at least %v. got %v.", tc.count, attempts) - } - return - } - maxValue := 2 - for i := 1; i < tc.level && maxValue < tc.found; i++ { - maxValue = maxValue * 2 - } - if attempts > maxValue { - t.Fatalf("too many attempts to retrieve a replica: want at most %v. got %v. latencies %v", maxValue, attempts, latencies) - } - }) + got, err := getter.Get(context.Background(), chunkAddr) + if err != nil { + t.Fatalf("got error %v", err) + } + if got == nil { + t.Fatal("expected a chunk, got nil") + } - t.Run("dispersion", func(t *testing.T) { - if err := dispersed(redundancy.Level(tc.level), addresses); err != nil { - t.Fatalf("addresses are not dispersed: %v", err) - } - }) - - t.Run("latency", func(t *testing.T) { - counts := redundancy.GetReplicaCounts() - for i, latency := range latencies { - multiplier := latency / replicas.RetryInterval - if multiplier > 0 && i < counts[multiplier-1] { - t.Fatalf("incorrect latency for retrieving replica %d: %v", i, err) + // Verify that the retrieved chunk is either the original or one of its replicas + found := false + if got.Equal(chunk) { + found = true + } else { + replicaIter := combinator.IterateReplicaAddresses(chunkAddr, int(tc.uploadRedundancyLevel)) + for replicaAddr := range replicaIter { + replicaChunk := swarm.NewChunk(replicaAddr, chunk.Data()) + if got.Equal(replicaChunk) { + found = true + break } } - }) + } + + if !found { + t.Fatalf("retrieved chunk %v is neither the original nor any of its replicas", got) + } }) } } + +type mockGetter struct { + getter storage.ChunkStore + err error +} + +func (m *mockGetter) Get(ctx context.Context, addr swarm.Address) (swarm.Chunk, error) { + if m.err != nil { + return nil, m.err + } + return m.getter.Get(ctx, addr) +} + +type failingMockGetter struct { + getter storage.ChunkStore + failAddrs map[string]struct{} + mu sync.Mutex +} + +func (m *failingMockGetter) Get(ctx context.Context, addr swarm.Address) (swarm.Chunk, error) { + m.mu.Lock() + defer m.mu.Unlock() + + if _, found := m.failAddrs[addr.String()]; found { + return nil, errors.New("failed to get chunk") + } + return m.getter.Get(ctx, addr) +} + +type mockGetterWithDelay struct { + getter storage.ChunkStore + err error + getDelay time.Duration +} + +func (m *mockGetterWithDelay) Get(ctx context.Context, addr swarm.Address) (swarm.Chunk, error) { + time.Sleep(m.getDelay) + if m.err != nil { + return nil, m.err + } + return m.getter.Get(ctx, addr) +} From b3c97811384ee76fda412282f3b1e419b7f925cb Mon Sep 17 00:00:00 2001 From: Ljubisa Gacevic Date: Wed, 12 Nov 2025 19:37:58 +0100 Subject: [PATCH 55/62] chore(ci): set beekeeper branch to feat/soc-dispersed --- .github/workflows/beekeeper.yml | 2 +- pkg/api/chunk.go | 2 +- pkg/replicas/putter_soc.go | 7 +++---- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/beekeeper.yml b/.github/workflows/beekeeper.yml index 4c69fce7a6f..3ae978ec5b6 100644 --- a/.github/workflows/beekeeper.yml +++ b/.github/workflows/beekeeper.yml @@ -14,7 +14,7 @@ env: SETUP_CONTRACT_IMAGE: "ethersphere/bee-localchain" SETUP_CONTRACT_IMAGE_TAG: "0.9.4" BEELOCAL_BRANCH: "main" - BEEKEEPER_BRANCH: "master" + BEEKEEPER_BRANCH: "feat/soc-dispersed" BEEKEEPER_METRICS_ENABLED: false REACHABILITY_OVERRIDE_PUBLIC: true BATCHFACTOR_OVERRIDE_PUBLIC: 2 diff --git a/pkg/api/chunk.go b/pkg/api/chunk.go index a9ffcad5ae0..fba6ca6d29e 100644 --- a/pkg/api/chunk.go +++ b/pkg/api/chunk.go @@ -260,8 +260,8 @@ func (s *Service) chunkGetHandler(w http.ResponseWriter, r *http.Request) { loggerV1.Debug("chunk not found", "address", address) jsonhttp.NotFound(w, "chunk not found") return - } + logger.Debug("read chunk failed", "chunk_address", address, "error", err) logger.Error(nil, "read chunk failed") jsonhttp.InternalServerError(w, "read chunk failed") diff --git a/pkg/replicas/putter_soc.go b/pkg/replicas/putter_soc.go index 4b5af7284f8..9b4dfd52f42 100644 --- a/pkg/replicas/putter_soc.go +++ b/pkg/replicas/putter_soc.go @@ -9,6 +9,7 @@ package replicas import ( "context" "errors" + "fmt" "github.com/ethersphere/bee/v2/pkg/file/redundancy" "github.com/ethersphere/bee/v2/pkg/replicas/combinator" @@ -37,19 +38,17 @@ func NewSocPutter(p storage.Putter, level redundancy.Level) storage.Putter { // Put makes the putter satisfy the storage.Putter interface func (p *socPutter) Put(ctx context.Context, ch swarm.Chunk) error { - // Put original chunk. if err := p.putter.Put(ctx, ch); err != nil { - return err + return fmt.Errorf("put original chunk: %w", err) } var errs error - // Put replicas. for replicaAddr := range combinator.IterateReplicaAddresses(ch.Address(), int(p.level)) { ch := swarm.NewChunk(replicaAddr, ch.Data()) if err := p.putter.Put(ctx, ch); err != nil { - errs = errors.Join(errs, err) + errs = errors.Join(errs, fmt.Errorf("put replica chunk %v: %w", ch.Address(), err)) } } From ee3f535e75826753dd335655d811b66d05ac51bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jano=C5=A1=20Gulja=C5=A1?= Date: Wed, 12 Nov 2025 20:14:27 +0100 Subject: [PATCH 56/62] chore: polish some code comments in replicas package --- pkg/replicas/getter_soc.go | 17 +++++------------ pkg/replicas/putter_soc.go | 12 ++++++------ 2 files changed, 11 insertions(+), 18 deletions(-) diff --git a/pkg/replicas/getter_soc.go b/pkg/replicas/getter_soc.go index faa033db192..8da10f1edfd 100644 --- a/pkg/replicas/getter_soc.go +++ b/pkg/replicas/getter_soc.go @@ -2,8 +2,6 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// the code below implements the integration of dispersed replicas in chunk fetching. -// using storage.Getter interface. package replicas import ( @@ -19,21 +17,15 @@ import ( "golang.org/x/sync/semaphore" ) -// socGetter is the private implementation of storage.Getter, an interface for -// retrieving chunks. This getter embeds the original simple chunk getter and extends it -// to a multiplexed variant that fetches chunks with replicas for SOC. -// -// the strategy to retrieve a chunk that has replicas can be configured with a few parameters: -// - RetryInterval: the delay before a new batch of replicas is fetched. -// - depth: 2^{depth} is the total number of additional replicas that have been uploaded -// (by default, it is assumed to be 4, ie. total of 16) -// - (not implemented) pivot: replicas with address in the proximity of pivot will be tried first +// socGetter is the implementation of storage.Getter. This getter embeds the +// original simple chunk getter and extends it to a multiplexed variant that +// fetches chunks with replicas for SOC. type socGetter struct { storage.Getter level redundancy.Level } -// NewSocGetter is the getter constructor +// NewSocGetter is the getter constructor. func NewSocGetter(g storage.Getter, level redundancy.Level) storage.Getter { return &socGetter{ Getter: g, @@ -41,6 +33,7 @@ func NewSocGetter(g storage.Getter, level redundancy.Level) storage.Getter { } } +// Number of parallel replica get requests. const socGetterConcurrency = 4 // Get makes the socGetter satisfy the storage.Getter interface diff --git a/pkg/replicas/putter_soc.go b/pkg/replicas/putter_soc.go index 9b4dfd52f42..d1712cac57e 100644 --- a/pkg/replicas/putter_soc.go +++ b/pkg/replicas/putter_soc.go @@ -21,14 +21,14 @@ import ( // maxRedundancyLevel ensures that no more than 2^4 = 16 replicas are generated const maxRedundancyLevel = 4 -// socPutter is the private implementation of the public storage.Putter interface -// socPutter extends the original putter to a concurrent multiputter +// socPutter is the implementation of the public storage.Putter interface. +// socPutter extends the original putter to a concurrent multiputter. type socPutter struct { putter storage.Putter level redundancy.Level } -// NewSocPutter is the putter constructor +// NewSocPutter is the putter constructor. func NewSocPutter(p storage.Putter, level redundancy.Level) storage.Putter { return &socPutter{ putter: p, @@ -36,7 +36,7 @@ func NewSocPutter(p storage.Putter, level redundancy.Level) storage.Putter { } } -// Put makes the putter satisfy the storage.Putter interface +// Put makes the putter satisfy the storage.Putter interface. func (p *socPutter) Put(ctx context.Context, ch swarm.Chunk) error { if err := p.putter.Put(ctx, ch); err != nil { return fmt.Errorf("put original chunk: %w", err) @@ -55,13 +55,13 @@ func (p *socPutter) Put(ctx context.Context, ch swarm.Chunk) error { return errs } -// socPutterSession extends the original socPutter +// socPutterSession extends the original socPutter. type socPutterSession struct { socPutter ps storer.PutterSession } -// NewSocPutterSession is the putterSession constructor +// NewSocPutterSession is the putterSession constructor. func NewSocPutterSession(p storer.PutterSession, rLevel redundancy.Level) storer.PutterSession { return &socPutterSession{ socPutter{ From 78bca3c4eeae7a29802441be614662f7b1819a19 Mon Sep 17 00:00:00 2001 From: Ljubisa Gacevic Date: Thu, 13 Nov 2025 08:41:14 +0100 Subject: [PATCH 57/62] chore(replicas): remove Questions.md --- pkg/replicas/QUESTIONS.md | 227 -------------------------------------- 1 file changed, 227 deletions(-) delete mode 100644 pkg/replicas/QUESTIONS.md diff --git a/pkg/replicas/QUESTIONS.md b/pkg/replicas/QUESTIONS.md deleted file mode 100644 index 9bbbefe3d14..00000000000 --- a/pkg/replicas/QUESTIONS.md +++ /dev/null @@ -1,227 +0,0 @@ -# Replicas Package - Research Questions - -Questions about the design and implementation of the `replicas` package. - -## 1. Error Handling in Put Operations - -**Question:** What is the expected behavior when some replica `Put` operations succeed and others fail? - -**Why this question matters:** - -- In `putter.go:36-60` and `putter_soc.go:37-65`, all errors are collected and joined -- The function returns `errors.Join(errs...)` which includes all errors -- If 15 out of 16 replicas succeed, is this considered a success or failure? -- The caller receives all errors but may not know which replicas were successfully stored - -**Current Behavior:** - -- All errors are collected and returned together -- No distinction between partial success and complete failure -- Caller must inspect the joined error to determine success rate - -**Answer/Suggestion:** - -- Is partial replication acceptable? **This must be validated against Book of Swarm probability calculations** -- Consider returning a structured result with success count and errors -- If partial replication is acceptable, document the impact on reliability guarantees - -**Viktor**: Return apprpriate answer, and how many was expected/succeeded? We should decide if error is appropriate! - ---- - -## 2. Context Cancellation in Put Operations - -**Question:** Are `Put` operations properly respecting context cancellation? - -**Why this question matters:** - -- In `putter.go` and `putter_soc.go`, the context is passed to `Put` calls -- However, if the context is cancelled, all goroutines continue running until completion -- There's no early termination when context is cancelled -- This could lead to wasted resources if the caller cancels the operation - -**Current Implementation:** - -- Context is passed to `p.putter.Put(ctx, sch)` but cancellation is not checked -- `wg.Wait()` waits for all goroutines regardless of context state - -**Answer/Suggestion:** - -- Check `ctx.Done()` in the goroutine loop -- Cancel remaining operations when context is cancelled -- **Important**: If cancellation is allowed, document the impact on replica count and reliability guarantees - -**Viktor**: User should be able to cancel. - -## 3. Swarmageddon Error Strategy - -**Question:** Is the "Swarmageddon" error approach the right way to handle complete replica retrieval failure? - -**Why this question matters:** - -- `ErrSwarmageddon` is returned when all replicas fail to retrieve -- The error message suggests this is an extremely rare event -- However, the error handling doesn't distinguish between temporary network issues and permanent data loss -- **Error Message Clarity**: The "Swarmageddon" term is not clear to users. The error message "swarmageddon has begun" doesn't explain what happened or what it means -- **Semantic Confusion**: The term "Swarmageddon" historically refers to complete data loss on the entire network, but the code uses it when all replicas of a single chunk fail to retrieve -- **Scope Question**: Is the extremely rare event (all replicas of one chunk failing) equivalent to assuming that data on the whole network is lost? Or is it just a local retrieval failure for that specific chunk? -- **User Experience**: Users receiving this error may not understand: - - Whether this is a temporary issue or permanent data loss - - Whether it affects just their chunk or the entire network - - What actions they can take (retry? report? accept loss?) - -The question should be validated with the research team to: - -- Clarify the intended meaning of "Swarmageddon" -- Determine if the error message should be more descriptive -- Decide if the term should be changed to avoid confusion -- Establish whether retry logic should be implemented - -**Viktor**: Return apprpriate answer, and how many was expected/succeeded? We should decide if error is appropriate! - ---- - -## 4. Concurrent Put Operations with Disk I/O - -**Question:** Does it make sense to use concurrent `Put` operations when the underlying storage layer performs disk I/O operations that are serialized? - -**Why this question matters:** - -- The `putter.go` and `putter_soc.go` implementations use `sync.WaitGroup` to concurrently call `Put` for all replicas -- However, the underlying storage layer has multiple serialization points: - - **Upload Store Global Lock**: `pkg/storer/uploadstore.go:74` uses `db.Lock(uploadsLock)` which serializes all upload operations - - **Sharky Serialization**: `pkg/sharky/shard.go` processes writes sequentially per shard through channels - - **Transaction Locking**: `pkg/storer/internal/transaction/transaction.go:237` locks per chunk address - -**Current Behavior:** - -- Multiple goroutines are spawned to call `Put` concurrently -- All goroutines serialize at the upload store lock -- No actual parallelism is achieved -- Overhead of goroutine creation and context switching without benefit - -**Answer/Suggestion:** - -- If the global lock is intentional for consistency, consider making `Put` operations sequential to reduce overhead - -**Viktor**: Use sequential approach. - ---- - -## 5. Goroutine Explosion with Multiple Chunks - -**Question:** Is there a risk of goroutine explosion when processing multiple chunks concurrently, and should there be a limit on concurrent replica operations? - -**Why this question matters:** - -- Both `Put` and `Get` operations spawn multiple goroutines per chunk -- Both CAC and SOC implementations have the same goroutine spawning pattern -- Multiple chunks can be processed concurrently from various sources -- `Get` and `Put` operations can happen simultaneously, compounding the goroutine count - -**Concurrent Scenarios:** - -**PUT Operations:** - -- **Pusher Service**: `pkg/pusher/pusher.go:66` allows `ConcurrentPushes = swarm.Branches = 128` concurrent chunk pushes -- **API SOC Uploads**: `pkg/api/soc.go:112` - SOC chunk uploads via API (multiple concurrent clients) -- **API Chunk Stream**: `pkg/api/chunk_stream.go:200` - WebSocket chunk stream uploads (multiple concurrent clients) -- **File Uploads**: `pkg/file/pipeline/hashtrie/hashtrie.go:53` - File upload pipeline (root chunk replicas) - -**GET Operations:** - -- **File Joiner**: `pkg/file/joiner/joiner.go:135` - Uses `replicas.NewGetter` for root chunk retrieval -- **API Feed Retrieval**: `pkg/api/feed.go:80` - Uses `replicas.NewSocGetter` for feed chunk retrieval -- **API SOC Retrieval**: `pkg/api/soc.go:279` - Uses `replicas.NewSocGetter` for SOC chunk retrieval -- **Puller Service**: Chunks being pulled from network (multiple concurrent pulls) -- **Multiple concurrent API clients** requesting chunks simultaneously - -**Goroutine Calculation Examples:** - -**PUT Operations (Worst Case - PARANOID level):** - -- 128 concurrent chunks (from pusher) × (16 replicas + 1 replicator) = **2,176 goroutines** -- Additional concurrent uploads from API clients can add more - -**GET Operations (Worst Case - PARANOID level):** - -- 128 concurrent Get calls × (1 original + 16 replicas + 1 replicator) = **2,304 goroutines** -- Note: Get operations spawn goroutines in batches, but if early batches fail, all goroutines can accumulate - -**Combined Worst Case:** - -- 128 concurrent Put + 128 concurrent Get = **4,480+ goroutines** just from the replicas package -- Plus goroutines from: - - Other system components - - Network I/O operations - - Storage layer operations - -**Current Behavior:** - -- No limit on concurrent `Put` operations across chunks -- No limit on concurrent `Get` operations across chunks -- No limit on total goroutines spawned by the replicas package -- Each chunk upload spawns all replicas concurrently (no batching) -- Each chunk retrieval spawns replicas in batches, but batches can accumulate -- No backpressure mechanism to prevent goroutine explosion -- The upload store's global lock (`uploadsLock`) serializes Put operations but doesn't prevent goroutine accumulation - -**Answer/Suggestion:** - -- Consider implementing a semaphore or worker pool to limit concurrent replica operations globally -- Add a global limit on concurrent `Put` operations across all chunks -- Add a global limit on concurrent `Get` operations across all chunks -- Consider sequential `Put` operations per chunk to reduce goroutine count (though this may impact performance) -- Consider limiting the number of concurrent Get batches that can be in-flight -- Monitor goroutine count in production to validate if this is a real issue -- Consider if the upload store's global lock already provides sufficient backpressure (it serializes but doesn't limit goroutine count) - -**Viktor** - Invesigate limits before we introduce limitations. - ---- - -## 6. Goroutine Usage in socReplicator - -**Question:** Is the goroutine in `socReplicator` necessary, and could the replica address generation functionality be exported for external verification tools like beekeeper? - -**Why this question matters:** - -- The `socReplicator` (see `pkg/replicas/replicas_soc.go:25-36`) uses a goroutine to generate replica addresses -- The computation is trivial: simple bit manipulation operations that generate at most 16 addresses -- The goroutine overhead may exceed the computation time for such simple operations -- The address generation is deterministic and could be exported for external verification tools - -**Benchmark Results:** - -Benchmark tests comparing synchronous vs asynchronous implementations show: - -- **4.8x faster**: 299 ns/op (sync) vs 1,427 ns/op (async) -- **33% less memory**: 896 B/op (sync) vs 1,328 B/op (async) -- **53% fewer allocations**: 17 allocs/op (sync) vs 36 allocs/op (async) - -The goroutine overhead significantly outweighs the trivial bit manipulation work. - -**Answer/Suggestion:** - -- Make `socReplicator` address generation synchronous (remove goroutine) -- Export a function like `GenerateSocReplicaAddresses(addr swarm.Address, level redundancy.Level) []swarm.Address` for external use -- This would allow beekeeper and other verification tools to independently calculate and verify replica addresses - -**Viktor** Improve to use it without worker. - ---- - -## 7. Exponential Backoff Strategy in Get Operations - -**Question:** Is the exponential doubling strategy (2, 4, 8, 16 replicas per batch) optimal for retrieval? - -**Why this question matters:** - -- In `getter.go:79` and `getter_soc.go:70`, the number of replicas attempted doubles each `RetryInterval` -- This means: try 2, wait 300ms, try 4 more, wait 300ms, try 8 more, etc. -- The strategy assumes that if early replicas fail, more should be tried -- However, this might delay successful retrieval if early batches fail due to temporary issues - -**Viktor**: If Redundancy Level is specified on PUT, most efficient is to use that exact one on GET. Leave it as is. - ---- From 5ef6f8dbbef4660d02a5d40fa2f9671b11b55387 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jano=C5=A1=20Gulja=C5=A1?= Date: Thu, 13 Nov 2025 11:45:15 +0100 Subject: [PATCH 58/62] chore: correclty set the copyright year --- pkg/replicas/putter_soc.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/replicas/putter_soc.go b/pkg/replicas/putter_soc.go index d1712cac57e..54126039028 100644 --- a/pkg/replicas/putter_soc.go +++ b/pkg/replicas/putter_soc.go @@ -1,4 +1,4 @@ -// Copyright 2020 The Swarm Authors. All rights reserved. +// Copyright 2025 The Swarm Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. From b83417c157cd8eb832b9382750e51676be738568 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jano=C5=A1=20Gulja=C5=A1?= Date: Thu, 13 Nov 2025 12:40:36 +0100 Subject: [PATCH 59/62] chore: add more tests to replicas and combinator --- pkg/replicas/combinator/combinator.go | 6 ++- pkg/replicas/combinator/combinator_test.go | 14 ++++++ pkg/replicas/getter_soc_test.go | 51 +++++++++++++++++++++- 3 files changed, 68 insertions(+), 3 deletions(-) diff --git a/pkg/replicas/combinator/combinator.go b/pkg/replicas/combinator/combinator.go index 7671b9cb4f7..1c8e50fa18c 100644 --- a/pkg/replicas/combinator/combinator.go +++ b/pkg/replicas/combinator/combinator.go @@ -61,9 +61,11 @@ func IterateReplicaAddresses(addr swarm.Address, maxDepth int) iter.Seq[swarm.Ad // Boundary checks are performed only when the depth changes. if currentDepth > maxDepth { - if maxDepth == 0 { + if maxDepth <= 0 { // Do not return the bit flip address of depth 0, - // because depth 0 should have no replicas. + // because depth 0 should have no replicas. Negative + // depths are invalid and should not return any + // replicas, as well. return } // Create a new slice based on the original address. diff --git a/pkg/replicas/combinator/combinator_test.go b/pkg/replicas/combinator/combinator_test.go index 41cef80b372..67658a5011c 100644 --- a/pkg/replicas/combinator/combinator_test.go +++ b/pkg/replicas/combinator/combinator_test.go @@ -280,6 +280,20 @@ func TestIterateReplicaAddressesSeq(t *testing.T) { // This test just proves the 'break' is correctly handled // by the iterator's `if !yield(newSlice)` check. }) + + t.Run("iterate with negative depth", func(t *testing.T) { + input := swarm.NewAddress(make([]byte, swarm.HashSize)) + count := 0 + maxD := -1 // Negative depth + + for range combinator.IterateReplicaAddresses(input, maxD) { + count++ + } + + if count != 0 { + t.Fatalf("Expected to iterate 0 times for negative depth, got %d", count) + } + }) } var benchAddress = swarm.NewAddress(append([]byte{0xDE, 0xAD, 0xBE, 0xEF}, make([]byte, swarm.HashSize-4)...)) diff --git a/pkg/replicas/getter_soc_test.go b/pkg/replicas/getter_soc_test.go index 4a8962bce33..5fb3e6acba0 100644 --- a/pkg/replicas/getter_soc_test.go +++ b/pkg/replicas/getter_soc_test.go @@ -128,6 +128,31 @@ func TestSocGetter_MultipleReplicasFound(t *testing.T) { } } +func TestSocGetter_MaxRedundancyLevelLimit(t *testing.T) { + t.Parallel() + + var ( + chunkAddr = swarm.RandAddress(t) + mock = &countingGetter{ + getter: inmemchunkstore.New(), + } + // Initialize SocGetter with a redundancy level higher than maxRedundancyLevel (which is 4) + getter = replicas.NewSocGetter(mock, redundancy.Level(10)) + ) + + // maxRedundancyLevel is 4, so 2^4 = 16 replicas. Total expected calls: 1 (original) + 16 (replicas) = 17. + expectedCalls := 1 + (1 << 4) // 1 + 2^4 = 17 + + _, err := getter.Get(context.Background(), chunkAddr) + if err == nil { + t.Fatal("expected error, got nil") + } + + if mock.calls != expectedCalls { + t.Fatalf("expected %d Get calls, got %d", expectedCalls, mock.calls) + } +} + func TestSocGetter_ContextCanceled(t *testing.T) { t.Parallel() @@ -377,9 +402,33 @@ type mockGetterWithDelay struct { } func (m *mockGetterWithDelay) Get(ctx context.Context, addr swarm.Address) (swarm.Chunk, error) { - time.Sleep(m.getDelay) + if m.getDelay > 0 { + time.Sleep(m.getDelay) + } if m.err != nil { return nil, m.err } return m.getter.Get(ctx, addr) } + +// countingGetter is a mock storage.Getter that counts Get calls. + +type countingGetter struct { + getter storage.ChunkStore + + mu sync.Mutex + + calls int +} + +func (c *countingGetter) Get(ctx context.Context, addr swarm.Address) (swarm.Chunk, error) { + + c.mu.Lock() + + c.calls++ + + c.mu.Unlock() + + return c.getter.Get(ctx, addr) + +} From 1afb16f9cdb7148235f4834c9f1af287bd4af84f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jano=C5=A1=20Gulja=C5=A1?= Date: Thu, 13 Nov 2025 23:22:15 +0100 Subject: [PATCH 60/62] chore(combinator): use term replication level instead depth for clarity --- pkg/replicas/combinator/combinator.go | 74 ++++++------ pkg/replicas/combinator/combinator_test.go | 132 ++++++++++----------- 2 files changed, 103 insertions(+), 103 deletions(-) diff --git a/pkg/replicas/combinator/combinator.go b/pkg/replicas/combinator/combinator.go index 1c8e50fa18c..5c1253383a0 100644 --- a/pkg/replicas/combinator/combinator.go +++ b/pkg/replicas/combinator/combinator.go @@ -12,10 +12,10 @@ import ( ) // IterateReplicaAddresses returns an iterator (iter.Seq) that yields bit -// combinations of an address, starting from depth 1. The original address is -// not returned. This approach allows for memory-efficient iteration over a large -// set of combinations. -// The combination with the one flipped bit of the original address will be returned at the end. +// combinations of an address, starting from replication level 1. The original +// address is not returned. This approach allows for memory-efficient iteration +// over a large set of combinations. The combination with the one flipped bit of +// the original address will be returned at the end. // // # Performance and Memory Considerations // @@ -23,20 +23,20 @@ import ( // of the address on each iteration. This prevents accidental modification of // previously yielded addresses. // -// The iterator terminates if the depth exceeds maxDepth or if the input data -// slice is not long enough for the bit manipulations required at the next -// depth level. -func IterateReplicaAddresses(addr swarm.Address, maxDepth int) iter.Seq[swarm.Address] { +// The iterator terminates if the replication level exceeds passed maxLevel or if +// the input data slice is not long enough for the bit manipulations required at +// the next replication level. +func IterateReplicaAddresses(addr swarm.Address, maxLevel int) iter.Seq[swarm.Address] { // State variables for the iterator closure. // A single buffer is used and mutated in each iteration, and a copy is yielded. // It is initialized with a copy of the original address data. currentSlice := append([]byte{}, addr.Bytes()...) - var currentDepth int + var currentLevel int var bytesNeeded int - // nextDepthIndex marks the combination index at which the depth increases + // nextLevelIndex marks the combination index at which the replication level increases // (e.g., 1, 2, 4, 8, ...). - nextDepthIndex := 1 + nextLevelIndex := 1 // prevCombinationIndex is used to calculate the bitwise difference for // efficient state transitions. var prevCombinationIndex int @@ -44,27 +44,27 @@ func IterateReplicaAddresses(addr swarm.Address, maxDepth int) iter.Seq[swarm.Ad return func(yield func(swarm.Address) bool) { // combinationIndex iterates through all possible combinations, but skip the original address. for combinationIndex := 1; ; combinationIndex++ { - // When the combinationIndex reaches the next power of two, the depth + // When the combinationIndex reaches the next power of two, the replication level // of bit combinations is increased for subsequent iterations. - if combinationIndex >= nextDepthIndex { - // The depth is determined by the number of bits in the combinationIndex. - // combinationIndex=1 -> depth=1 - // combinationIndex=2 -> depth=2 - // combinationIndex=4 -> depth=3 - // combinationIndex=8 -> depth=4 - currentDepth = bits.Len(uint(combinationIndex)) - // Set the threshold for the next depth increase. - // For depth=1 (idx=1), next threshold is 2. - // For depth=2 (idx=2,3), next threshold is 4. - // For depth=3 (idx=4..7), next threshold is 8. - nextDepthIndex = 1 << currentDepth - - // Boundary checks are performed only when the depth changes. - if currentDepth > maxDepth { - if maxDepth <= 0 { - // Do not return the bit flip address of depth 0, - // because depth 0 should have no replicas. Negative - // depths are invalid and should not return any + if combinationIndex >= nextLevelIndex { + // The replication level is determined by the number of bits in the combinationIndex. + // combinationIndex=1 -> replication level=1 + // combinationIndex=2 -> replication level=2 + // combinationIndex=4 -> replication level=3 + // combinationIndex=8 -> replication level=4 + currentLevel = bits.Len(uint(combinationIndex)) + // Set the threshold for the next replication level increase. + // For replication level=1 (idx=1), next threshold is 2. + // For replication level=2 (idx=2,3), next threshold is 4. + // For replication level=3 (idx=4..7), next threshold is 8. + nextLevelIndex = 1 << currentLevel + + // Boundary checks are performed only when the replication level changes. + if currentLevel > maxLevel { + if maxLevel <= 0 { + // Do not return the bit flip address of replication level 0, + // because replication level 0 should have no replicas. Negative + // replication levels are invalid and should not return any // replicas, as well. return } @@ -74,7 +74,7 @@ func IterateReplicaAddresses(addr swarm.Address, maxDepth int) iter.Seq[swarm.Ad copy(flippedAddrBytes, originalAddrBytes) // Calculate the byte index for the bit to flip. - bitIndexToFlip := maxDepth + bitIndexToFlip := maxLevel byteIndex := bitIndexToFlip / 8 // Ensure the flippedAddrBytes is long enough to flip this bit. @@ -82,7 +82,7 @@ func IterateReplicaAddresses(addr swarm.Address, maxDepth int) iter.Seq[swarm.Ad return // Cannot flip bit, slice is too short. } - // Flip the maxDepth bit in the new slice. + // Flip the level bit in the new slice. bitPositionInByte := 7 - (bitIndexToFlip % 8) bitMask := byte(1 << bitPositionInByte) flippedAddrBytes[byteIndex] ^= bitMask @@ -91,13 +91,13 @@ func IterateReplicaAddresses(addr swarm.Address, maxDepth int) iter.Seq[swarm.Ad if !yield(swarm.NewAddress(flippedAddrBytes)) { return // Consumer-requested stop. } - return // Iteration completed up to the defined maximum depth. + return // Iteration completed up to the defined maximum replication level. } - bytesNeeded = (currentDepth + 7) / 8 // Ceiling of integer division. + bytesNeeded = (currentLevel + 7) / 8 // Ceiling of integer division. if len(addr.Bytes()) < bytesNeeded { - // The data slice is too short for the current depth. + // The data slice is too short for the current replication level. return } } @@ -107,7 +107,7 @@ func IterateReplicaAddresses(addr swarm.Address, maxDepth int) iter.Seq[swarm.Ad // the buffer is XORed with the difference between the current and // previous combination indices. bitsToFlip := combinationIndex ^ prevCombinationIndex - for bitIndex := 0; bitIndex < currentDepth; bitIndex++ { + for bitIndex := 0; bitIndex < currentLevel; bitIndex++ { // Check if the bit at bitIndex is set in the difference. if (bitsToFlip>>bitIndex)&1 == 1 { // If set, flip the corresponding bit in the buffer. diff --git a/pkg/replicas/combinator/combinator_test.go b/pkg/replicas/combinator/combinator_test.go index 67658a5011c..5c09c464e08 100644 --- a/pkg/replicas/combinator/combinator_test.go +++ b/pkg/replicas/combinator/combinator_test.go @@ -11,16 +11,16 @@ import ( "github.com/ethersphere/bee/v2/pkg/swarm" ) -const maxDepth = 8 +const maxLevel = 8 func TestIterateReplicaAddressesSeq(t *testing.T) { - t.Run("iterate up to depth 0", func(t *testing.T) { + t.Run("iterate up to level 0", func(t *testing.T) { input := swarm.NewAddress(make([]byte, swarm.HashSize)) allCombinations := make(map[string]bool) count := 0 maxD := 0 - expectedCount := 0 // No addresses should be returned as depth 0 represents no replication. - expected := map[string]bool{} // Not even the maxDepth-bit-flipped address. + expectedCount := 0 // No addresses should be returned as level 0 represents no replication. + expected := map[string]bool{} // Not even the maxLevel-bit-flipped address. for combo := range combinator.IterateReplicaAddresses(input, maxD) { comboHex := combo.String() @@ -44,14 +44,14 @@ func TestIterateReplicaAddressesSeq(t *testing.T) { } }) - t.Run("iterate up to depth 1", func(t *testing.T) { + t.Run("iterate up to level 1", func(t *testing.T) { input := swarm.NewAddress(make([]byte, swarm.HashSize)) allCombinations := make(map[string]bool) count := 0 maxD := 1 expectedCount := 1 << maxD // 2^1 = 2 items expected := map[string]bool{ - swarm.NewAddress(append([]byte{0b10000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=1 (depth=1) + swarm.NewAddress(append([]byte{0b10000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=1 (level=1) swarm.NewAddress(append([]byte{0b01000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // 2nd bit flipped } @@ -77,16 +77,16 @@ func TestIterateReplicaAddressesSeq(t *testing.T) { } }) - t.Run("iterate up to depth 2", func(t *testing.T) { + t.Run("iterate up to level 2", func(t *testing.T) { input := swarm.NewAddress(make([]byte, swarm.HashSize)) allCombinations := make(map[string]bool) count := 0 maxD := 2 expectedCount := 1 << maxD // 2^2 = 4 items expected := map[string]bool{ - swarm.NewAddress(append([]byte{0b10000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=1 (depth=1) - swarm.NewAddress(append([]byte{0b01000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=2 (depth=2) - swarm.NewAddress(append([]byte{0b11000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=3 (depth=2) + swarm.NewAddress(append([]byte{0b10000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=1 (level=1) + swarm.NewAddress(append([]byte{0b01000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=2 (level=2) + swarm.NewAddress(append([]byte{0b11000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=3 (level=2) swarm.NewAddress(append([]byte{0b00100000}, make([]byte, swarm.HashSize-1)...)).String(): true, // 3rd bit flipped } @@ -112,20 +112,20 @@ func TestIterateReplicaAddressesSeq(t *testing.T) { } }) - t.Run("Iterate up to depth=3", func(t *testing.T) { + t.Run("Iterate up to level=3", func(t *testing.T) { input := swarm.NewAddress(make([]byte, swarm.HashSize)) allCombinations := make(map[string]bool) count := 0 maxD := 3 expectedCount := 1 << maxD // 2^3 = 8 items expected := map[string]bool{ - swarm.NewAddress(append([]byte{0b10000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=1 (depth=1) - swarm.NewAddress(append([]byte{0b01000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=2 (depth=2) - swarm.NewAddress(append([]byte{0b11000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=3 (depth=2) - swarm.NewAddress(append([]byte{0b00100000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=4 (depth=3) - swarm.NewAddress(append([]byte{0b10100000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=5 (depth=3) - swarm.NewAddress(append([]byte{0b01100000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=6 (depth=3) - swarm.NewAddress(append([]byte{0b11100000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=7 (depth=3) + swarm.NewAddress(append([]byte{0b10000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=1 (level=1) + swarm.NewAddress(append([]byte{0b01000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=2 (level=2) + swarm.NewAddress(append([]byte{0b11000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=3 (level=2) + swarm.NewAddress(append([]byte{0b00100000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=4 (level=3) + swarm.NewAddress(append([]byte{0b10100000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=5 (level=3) + swarm.NewAddress(append([]byte{0b01100000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=6 (level=3) + swarm.NewAddress(append([]byte{0b11100000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=7 (level=3) swarm.NewAddress(append([]byte{0b00010000}, make([]byte, swarm.HashSize-1)...)).String(): true, // 4th bit flipped } @@ -153,28 +153,28 @@ func TestIterateReplicaAddressesSeq(t *testing.T) { } }) - t.Run("iterate up to depth 4", func(t *testing.T) { + t.Run("iterate up to level 4", func(t *testing.T) { input := swarm.NewAddress(make([]byte, swarm.HashSize)) allCombinations := make(map[string]bool) count := 0 maxD := 4 expectedCount := 1 << maxD // 2^4 = 16 items expected := map[string]bool{ - swarm.NewAddress(append([]byte{0b10000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=1 (depth=1) - swarm.NewAddress(append([]byte{0b01000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=2 (depth=2) - swarm.NewAddress(append([]byte{0b11000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=3 (depth=2) - swarm.NewAddress(append([]byte{0b00100000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=4 (depth=3) - swarm.NewAddress(append([]byte{0b10100000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=5 (depth=3) - swarm.NewAddress(append([]byte{0b01100000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=6 (depth=3) - swarm.NewAddress(append([]byte{0b11100000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=7 (depth=3) - swarm.NewAddress(append([]byte{0b00010000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=8 (depth=4) - swarm.NewAddress(append([]byte{0b10010000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=9 (depth=4) - swarm.NewAddress(append([]byte{0b01010000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=10 (depth=4) - swarm.NewAddress(append([]byte{0b11010000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=11 (depth=4) - swarm.NewAddress(append([]byte{0b00110000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=12 (depth=4) - swarm.NewAddress(append([]byte{0b10110000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=13 (depth=4) - swarm.NewAddress(append([]byte{0b01110000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=14 (depth=4) - swarm.NewAddress(append([]byte{0b11110000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=15 (depth=4) + swarm.NewAddress(append([]byte{0b10000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=1 (level=1) + swarm.NewAddress(append([]byte{0b01000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=2 (level=2) + swarm.NewAddress(append([]byte{0b11000000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=3 (level=2) + swarm.NewAddress(append([]byte{0b00100000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=4 (level=3) + swarm.NewAddress(append([]byte{0b10100000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=5 (level=3) + swarm.NewAddress(append([]byte{0b01100000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=6 (level=3) + swarm.NewAddress(append([]byte{0b11100000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=7 (level=3) + swarm.NewAddress(append([]byte{0b00010000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=8 (level=4) + swarm.NewAddress(append([]byte{0b10010000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=9 (level=4) + swarm.NewAddress(append([]byte{0b01010000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=10 (level=4) + swarm.NewAddress(append([]byte{0b11010000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=11 (level=4) + swarm.NewAddress(append([]byte{0b00110000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=12 (level=4) + swarm.NewAddress(append([]byte{0b10110000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=13 (level=4) + swarm.NewAddress(append([]byte{0b01110000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=14 (level=4) + swarm.NewAddress(append([]byte{0b11110000}, make([]byte, swarm.HashSize-1)...)).String(): true, // i=15 (level=4) swarm.NewAddress(append([]byte{0b00001000}, make([]byte, swarm.HashSize-1)...)).String(): true, // 5th bit flipped } @@ -200,10 +200,10 @@ func TestIterateReplicaAddressesSeq(t *testing.T) { } }) - t.Run("maxDepth limits iteration", func(t *testing.T) { + t.Run("maxLevel limits iteration", func(t *testing.T) { input := swarm.NewAddress(make([]byte, swarm.HashSize)) count := 0 - // maxDepth=2 should give 3 items (2^2-1 for depths 1, 2) + 1 for the maxDepth bit flipped address + // maxLevel=2 should give 3 items (2^2-1 for levels 1, 2) + 1 for the maxLevel bit flipped address expectedCount := 4 for range combinator.IterateReplicaAddresses(input, 2) { @@ -211,14 +211,14 @@ func TestIterateReplicaAddressesSeq(t *testing.T) { } if count != expectedCount { - t.Errorf("Expected %d items for maxDepth=2, got %d", expectedCount, count) + t.Errorf("Expected %d items for maxLevel=2, got %d", expectedCount, count) } }) t.Run("Iterator stops correctly at end of byte slice", func(t *testing.T) { // 1 byte = 8 bits. - // Iterator should produce 2^8-1 = 255 items (for depth=1 through depth=8). - // The 257th item (i=256) would require depth=9, + // Iterator should produce 2^8-1 = 255 items (for level=1 through level=8). + // The 257th item (i=256) would require level=9, // which needs 2 bytes. The iterator should stop there. input := swarm.NewAddress([]byte{0xDE}) // 1 byte expectedCount := (1 << 8) - 1 // 255 @@ -226,7 +226,7 @@ func TestIterateReplicaAddressesSeq(t *testing.T) { allCombinations := make(map[string]bool) - for combo := range combinator.IterateReplicaAddresses(input, maxDepth) { + for combo := range combinator.IterateReplicaAddresses(input, maxLevel) { // Just in case, prevent infinite loop in test if count > expectedCount { t.Fatalf("Iterator produced more than %d items, count=%d", expectedCount, count) @@ -245,14 +245,14 @@ func TestIterateReplicaAddressesSeq(t *testing.T) { } }) - t.Run("depth=0 edge case (nil slice)", func(t *testing.T) { - // The iterator starts at i=1, which needs depth=1, which needs 1 byte. + t.Run("level=0 edge case (nil slice)", func(t *testing.T) { + // The iterator starts at i=1, which needs level=1, which needs 1 byte. // A nil slice fails this. // So, this should iterate *exactly zero times*. var input swarm.Address count := 0 - for range combinator.IterateReplicaAddresses(input, maxDepth) { + for range combinator.IterateReplicaAddresses(input, maxLevel) { count++ } @@ -266,7 +266,7 @@ func TestIterateReplicaAddressesSeq(t *testing.T) { count := 0 stopAt := 5 - seq := combinator.IterateReplicaAddresses(input, maxDepth) + seq := combinator.IterateReplicaAddresses(input, maxLevel) for range seq { count++ if count == stopAt { @@ -281,25 +281,25 @@ func TestIterateReplicaAddressesSeq(t *testing.T) { // by the iterator's `if !yield(newSlice)` check. }) - t.Run("iterate with negative depth", func(t *testing.T) { + t.Run("iterate with negative level", func(t *testing.T) { input := swarm.NewAddress(make([]byte, swarm.HashSize)) count := 0 - maxD := -1 // Negative depth + maxD := -1 // Negative level for range combinator.IterateReplicaAddresses(input, maxD) { count++ } if count != 0 { - t.Fatalf("Expected to iterate 0 times for negative depth, got %d", count) + t.Fatalf("Expected to iterate 0 times for negative level, got %d", count) } }) } var benchAddress = swarm.NewAddress(append([]byte{0xDE, 0xAD, 0xBE, 0xEF}, make([]byte, swarm.HashSize-4)...)) -// runBenchmark is a helper to run the iterator for a fixed depth. -func runBenchmark(b *testing.B, depth int) { +// runBenchmark is a helper to run the iterator for a fixed level. +func runBenchmark(b *testing.B, maxLevel int) { b.Helper() // We run the loop b.N times, as required by the benchmark harness. @@ -308,7 +308,7 @@ func runBenchmark(b *testing.B, depth int) { // (the slice generation) isn't optimized away. var volatileAddr swarm.Address - seq := combinator.IterateReplicaAddresses(benchAddress, depth) + seq := combinator.IterateReplicaAddresses(benchAddress, maxLevel) for combo := range seq { volatileAddr = combo } @@ -322,42 +322,42 @@ func runBenchmark(b *testing.B, depth int) { } } -// BenchmarkDepth1 iterates over 2^1 = 2 items -func BenchmarkDepth1(b *testing.B) { +// BenchmarkMaxLevel1 iterates over 2^1 = 2 items +func BenchmarkMaxLevel1(b *testing.B) { runBenchmark(b, 1) } -// BenchmarkDepth2 iterates over 2^2 = 4 items -func BenchmarkDepth2(b *testing.B) { +// BenchmarkMaxLevel2 iterates over 2^2 = 4 items +func BenchmarkMaxLevel2(b *testing.B) { runBenchmark(b, 2) } -// BenchmarkDepth3 iterates over 2^3 = 8 items -func BenchmarkDepth3(b *testing.B) { +// BenchmarkMaxLevel3 iterates over 2^3 = 8 items +func BenchmarkMaxLevel3(b *testing.B) { runBenchmark(b, 3) } -// BenchmarkDepth4 iterates over 2^4 = 16 items -func BenchmarkDepth4(b *testing.B) { +// BenchmarkMaxLevel4 iterates over 2^4 = 16 items +func BenchmarkMaxLevel4(b *testing.B) { runBenchmark(b, 4) } -// BenchmarkDepth8 iterates over 2^8 = 256 items -func BenchmarkDepth8(b *testing.B) { +// BenchmarkMaxLevel8 iterates over 2^8 = 256 items +func BenchmarkMaxLevel8(b *testing.B) { runBenchmark(b, 8) } -// BenchmarkDepth12 iterates over 2^12 = 4096 items -func BenchmarkDepth12(b *testing.B) { +// BenchmarkMaxLevel12 iterates over 2^12 = 4096 items +func BenchmarkMaxLevel12(b *testing.B) { runBenchmark(b, 12) } -// BenchmarkDepth16 iterates over 2^16 = 65536 items -func BenchmarkDepth16(b *testing.B) { +// BenchmarkMaxLevel16 iterates over 2^16 = 65536 items +func BenchmarkMaxLevel16(b *testing.B) { runBenchmark(b, 16) } -// BenchmarkDepth20 iterates over 2^20 = 1,048,576 items -func BenchmarkDepth20(b *testing.B) { +// BenchmarkMaxLevel20 iterates over 2^20 = 1,048,576 items +func BenchmarkMaxLevel20(b *testing.B) { runBenchmark(b, 20) } From 89cd8af8b48df6963d9c0b3139bd051acd034e4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jano=C5=A1=20Gulja=C5=A1?= Date: Fri, 14 Nov 2025 10:12:13 +0100 Subject: [PATCH 61/62] chore(api): remove debug error in json http response --- pkg/api/soc.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/api/soc.go b/pkg/api/soc.go index f4d46abf453..b38263eb3dc 100644 --- a/pkg/api/soc.go +++ b/pkg/api/soc.go @@ -8,7 +8,6 @@ import ( "bytes" "encoding/hex" "errors" - "fmt" "io" "net/http" "strconv" @@ -227,7 +226,7 @@ func (s *Service) socUploadHandler(w http.ResponseWriter, r *http.Request) { if err != nil { logger.Debug("done split failed", "error", err) logger.Error(nil, "done split failed") - jsonhttp.InternalServerError(ow, fmt.Sprintf("done split failed: %v", err)) // TODO: put it back after fixing parallel upload issue + jsonhttp.InternalServerError(ow, "done split failed") return } if headers.Act { From 50acac1aeb3ec2414bdf0c52165bdb2159650afb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jano=C5=A1=20Gulja=C5=A1?= Date: Fri, 21 Nov 2025 16:54:58 +0100 Subject: [PATCH 62/62] fix: address pr review comments --- pkg/api/pin.go | 2 +- pkg/hive/hive_test.go | 2 +- pkg/replicas/getter_soc.go | 30 +++++++++++++++++++++----- pkg/topology/kademlia/kademlia_test.go | 2 +- 4 files changed, 28 insertions(+), 8 deletions(-) diff --git a/pkg/api/pin.go b/pkg/api/pin.go index 6b2a42586a9..696f5185150 100644 --- a/pkg/api/pin.go +++ b/pkg/api/pin.go @@ -53,7 +53,7 @@ func (s *Service) pinRootHash(w http.ResponseWriter, r *http.Request) { } getter := s.storer.Download(true) - traverser := traversal.New(getter, s.storer.Cache(), redundancy.PARANOID) + traverser := traversal.New(getter, s.storer.Cache(), redundancy.DefaultLevel) sem := semaphore.NewWeighted(100) var errTraverse error diff --git a/pkg/hive/hive_test.go b/pkg/hive/hive_test.go index fbf39c459d4..1f0d6eecd53 100644 --- a/pkg/hive/hive_test.go +++ b/pkg/hive/hive_test.go @@ -147,7 +147,7 @@ func TestBroadcastPeers(t *testing.T) { underlays = []ma.Multiaddr{u, u2} } else { n := (i % 3) + 1 - for j := range n { + for j := 0; j < n; j++ { port := i + j*10000 u, err := ma.NewMultiaddr("/ip4/127.0.0.1/udp/" + strconv.Itoa(port)) if err != nil { diff --git a/pkg/replicas/getter_soc.go b/pkg/replicas/getter_soc.go index 8da10f1edfd..42ef87d0e20 100644 --- a/pkg/replicas/getter_soc.go +++ b/pkg/replicas/getter_soc.go @@ -12,6 +12,7 @@ import ( "github.com/ethersphere/bee/v2/pkg/file/redundancy" "github.com/ethersphere/bee/v2/pkg/replicas/combinator" + "github.com/ethersphere/bee/v2/pkg/soc" "github.com/ethersphere/bee/v2/pkg/storage" "github.com/ethersphere/bee/v2/pkg/swarm" "golang.org/x/sync/semaphore" @@ -57,32 +58,44 @@ func (g *socGetter) Get(ctx context.Context, addr swarm.Address) (ch swarm.Chunk } // Try to retrieve replicas. + // Context for cancellation of replica fetching. + // Once a replica is found, this context is cancelled to stop further replica requests. ctx, cancel := context.WithCancel(ctx) defer cancel() + // sem is used to limit the number of concurrent replica fetch operations. sem := semaphore.NewWeighted(socGetterConcurrency) replicaIter := combinator.IterateReplicaAddresses(addr, int(g.level)) + // resultChan is used to send the first successfully fetched chunk back to the main goroutine. resultChan := make(chan swarm.Chunk, 1) + // doneChan signals when all replica iteration and fetching attempts have concluded. doneChan := make(chan struct{}) + // This goroutine iterates through potential replica addresses and dispatches + // concurrent fetch operations, respecting the concurrency limit. go func() { - defer close(doneChan) + defer close(doneChan) // Ensure doneChan is closed when all replica attempts are finished. for replicaAddr := range replicaIter { select { case <-ctx.Done(): + // If the context is cancelled (e.g., a replica was found or parent context cancelled), + // stop dispatching new replica requests. return default: } + // Acquire a semaphore slot to limit concurrency. if err := sem.Acquire(ctx, 1); err != nil { + // If context is cancelled while acquiring, stop. return } wg.Add(1) + // Each replica fetch is performed in its own goroutine. go func(replicaAddr swarm.Address) { - defer sem.Release(1) - defer wg.Done() + defer sem.Release(1) // Release the semaphore slot when done. + defer wg.Done() // Decrement the WaitGroup counter. ch, err := g.Getter.Get(ctx, replicaAddr) if err != nil { @@ -92,16 +105,23 @@ func (g *socGetter) Get(ctx context.Context, addr swarm.Address) (ch swarm.Chunk return } + if !soc.Valid(swarm.NewChunk(addr, ch.Data())) { + return + } + select { case resultChan <- ch: + // If a chunk is successfully fetched and validated, send it to resultChan + // and cancel the context to stop other in-flight replica fetches. cancel() case <-ctx.Done(): + // If the context is already cancelled, it means another goroutine found a chunk, + // so this chunk is not needed. } }(replicaAddr) } - wg.Wait() + wg.Wait() // Wait for all launched goroutines to complete. }() - select { case ch := <-resultChan: return ch, nil diff --git a/pkg/topology/kademlia/kademlia_test.go b/pkg/topology/kademlia/kademlia_test.go index 2fa975adac7..4d534ddade8 100644 --- a/pkg/topology/kademlia/kademlia_test.go +++ b/pkg/topology/kademlia/kademlia_test.go @@ -2244,7 +2244,7 @@ func generateMultipleUnderlays(t *testing.T, n int, baseUnderlay string) []ma.Mu t.Helper() underlays := make([]ma.Multiaddr, n) - for i := range n { + for i := 0; i < n; i++ { multiaddr, err := ma.NewMultiaddr(baseUnderlay + strconv.Itoa(i)) if err != nil { t.Fatal(err)