From 9d2dd436fd9c644a79596a5e15ec4046bd61cc98 Mon Sep 17 00:00:00 2001
From: folbrich <frank.olbricht@gmail.com>
Date: Sat, 14 Mar 2026 14:44:54 +0100
Subject: [PATCH 01/11] Refactor assembly pipeline into plan-based architecture

Introduce AssemblePlan that separates planning from execution in file
assembly. The plan pre-computes all chunk placements (self-seed, file
seeds, store fetches, skip-in-place) into a DAG of steps with explicit
dependencies, replacing the interleaved sequencer approach.

This lays the groundwork for #312 (destination-as-seed) by making
assembly sources composable and the planning phase extensible.

Key changes:
- New AssemblePlan with functional options and step-based execution
- Split assembly sources into separate files (fileseed, selfseed, store,
  skip)
- Self-seed matching now uses longestMatchFrom for longer sequences
- Plan validation detects stale file seeds before execution
- Comprehensive tests for plan generation and in-place detection
- Remove sequencer.go, selfseed.go in favor of new plan types

Closes #312
---
 assemble-fileseed.go       |  24 +++
 assemble-plan.go           | 352 +++++++++++++++++++++++++++++++++++++
 assemble-plan_test.go      | 344 ++++++++++++++++++++++++++++++++++++
 assemble-selfseed.go       | 174 ++++++++++++++++++
 assemble-skip.go           |  20 +++
 assemble-step.go           |  62 +++++++
 assemble-store.go          |  31 ++++
 assemble.go                | 326 ++++++++++++++++------------------
 assemble_test.go           | 230 ++++++++----------------
 cmd/desync/extract_test.go |   5 +-
 errors.go                  |  14 ++
 extractstats.go            |   4 +-
 fileseed.go                |  24 +--
 go.mod                     |   2 +-
 nullseed.go                |   9 -
 seed.go                    |  29 ---
 selfseed.go                |  93 ----------
 selfseed_test.go           | 136 --------------
 sequencer.go               | 176 -------------------
 19 files changed, 1251 insertions(+), 804 deletions(-)
 create mode 100644 assemble-fileseed.go
 create mode 100644 assemble-plan.go
 create mode 100644 assemble-plan_test.go
 create mode 100644 assemble-selfseed.go
 create mode 100644 assemble-skip.go
 create mode 100644 assemble-step.go
 create mode 100644 assemble-store.go
 delete mode 100644 selfseed.go
 delete mode 100644 selfseed_test.go
 delete mode 100644 sequencer.go

diff --git a/assemble-fileseed.go b/assemble-fileseed.go
new file mode 100644
index 0000000..6c193d8
--- /dev/null
+++ b/assemble-fileseed.go
@@ -0,0 +1,24 @@
+package desync
+
+import (
+	"fmt"
+	"os"
+)
+
+type fileSeedSource struct {
+	segment SeedSegment
+	seed    Seed
+	srcFile string
+	offset  uint64
+	length  uint64
+	isBlank bool
+}
+
+func (s *fileSeedSource) Execute(f *os.File) (copied uint64, cloned uint64, err error) {
+	blocksize := blocksizeOfFile(f.Name())
+	return s.segment.WriteInto(f, s.offset, s.length, blocksize, s.isBlank)
+}
+
+func (s *fileSeedSource) String() string {
+	return fmt.Sprintf("FileSeed(%s): Copy to [%d:%d]", s.srcFile, s.offset, s.offset+s.length)
+}
diff --git a/assemble-plan.go b/assemble-plan.go
new file mode 100644
index 0000000..1679980
--- /dev/null
+++ b/assemble-plan.go
@@ -0,0 +1,352 @@
+package desync
+
+import (
+	"errors"
+	"fmt"
+	"os"
+
+	"golang.org/x/sync/errgroup"
+)
+
+type PlanOption func(*AssemblePlan)
+
+func PlanWithConcurrency(n int) PlanOption {
+	return func(p *AssemblePlan) {
+		p.concurrency = n
+	}
+}
+
+func PlanWithSeeds(seeds []Seed) PlanOption {
+	return func(p *AssemblePlan) {
+		p.seeds = seeds
+	}
+}
+
+func PlanWithTargetIsBlank(isBlank bool) PlanOption {
+	return func(p *AssemblePlan) {
+		p.targetIsBlank = isBlank
+	}
+}
+
+// AssemblePlan holds a directed acyclic graph of steps.
+type AssemblePlan struct {
+	idx           Index
+	concurrency   int
+	target        string
+	store         Store
+	seeds         []Seed
+	targetIsBlank bool
+
+	// Placements is an intermediate representation of the target index,
+	// capturing what source is used to populate each chunk. It mirrors the
+	// length of the index but a single step can span multiple chunks.
+	placements []*placement
+
+	selfSeed *selfSeed
+}
+
+type assembleSource interface {
+	fmt.Stringer
+	Execute(f *os.File) (copied uint64, cloned uint64, err error)
+}
+
+type placement struct {
+	source         assembleSource
+	dependsOnStart int // index of another placement this one depends on
+	dependsOnSize  int // number of sequential placements (from dependsOnStart) this depends on
+}
+
+// NewPlan creates a fully populated AssemblePlan.
+func NewPlan(name string, idx Index, s Store, opts ...PlanOption) (*AssemblePlan, error) {
+	p := &AssemblePlan{
+		idx:           idx,
+		concurrency:   1,
+		target:        name,
+		store:         s,
+		targetIsBlank: true,
+		placements:    make([]*placement, len(idx.Chunks)),
+	}
+	for _, opt := range opts {
+		opt(p)
+	}
+
+	ss, err := newSelfSeed(p.target, p.idx, p.concurrency)
+	if err != nil {
+		return nil, err
+	}
+	p.selfSeed = ss
+
+	if err := p.generate(); err != nil {
+		p.Close()
+		return nil, err
+	}
+	return p, nil
+}
+
+// Close releases resources held by the plan.
+func (p *AssemblePlan) Close() {
+	if p.selfSeed != nil {
+		p.selfSeed.Close()
+	}
+}
+
+// Validate checks that all file seed placements still match their underlying
+// data. Returns a SeedInvalid error if a seed file was modified after its
+// index was created.
+// TODO: run the verification steps in parallel.
+func (p *AssemblePlan) Validate() error {
+	seen := make(map[*placement]struct{})
+	fileMap := make(map[string]*os.File)
+	defer func() {
+		for _, f := range fileMap {
+			f.Close()
+		}
+	}()
+
+	invalidSeeds := make(map[Seed]error)
+	failedFiles := make(map[string]struct{})
+
+	for _, pl := range p.placements {
+		if _, ok := seen[pl]; ok {
+			continue
+		}
+		seen[pl] = struct{}{}
+
+		fs, ok := pl.source.(*fileSeedSource)
+		if !ok || fs.srcFile == "" {
+			continue
+		}
+
+		// Skip seeds and files already known to be invalid
+		if _, ok := invalidSeeds[fs.seed]; ok {
+			continue
+		}
+		if _, ok := failedFiles[fs.srcFile]; ok {
+			invalidSeeds[fs.seed] = fmt.Errorf("seed file %s could not be opened", fs.srcFile)
+			continue
+		}
+
+		if _, ok := fileMap[fs.srcFile]; !ok {
+			f, err := os.Open(fs.srcFile)
+			if err != nil {
+				failedFiles[fs.srcFile] = struct{}{}
+				invalidSeeds[fs.seed] = err
+				continue
+			}
+			fileMap[fs.srcFile] = f
+		}
+
+		if err := fs.segment.Validate(fileMap[fs.srcFile]); err != nil {
+			invalidSeeds[fs.seed] = err
+		}
+	}
+
+	if len(invalidSeeds) > 0 {
+		seeds := make([]Seed, 0, len(invalidSeeds))
+		errs := make([]error, 0, len(invalidSeeds))
+		for seed, err := range invalidSeeds {
+			seeds = append(seeds, seed)
+			errs = append(errs, err)
+		}
+		return SeedInvalid{Seeds: seeds, Err: errors.Join(errs...)}
+	}
+	return nil
+}
+
+func (p *AssemblePlan) generate() error {
+	// Mark chunks that are already correct in the target file so they can
+	// be skipped during assembly.
+	if !p.targetIsBlank {
+		f, err := os.Open(p.target)
+		if err == nil {
+			var g errgroup.Group
+			g.SetLimit(p.concurrency)
+			for i, chunk := range p.idx.Chunks {
+				g.Go(func() error {
+					b := make([]byte, chunk.Size)
+					if _, err := f.ReadAt(b, int64(chunk.Start)); err != nil {
+						return nil
+					}
+					if Digest.Sum(b) == chunk.ID {
+						p.placements[i] = &placement{source: &skipInPlace{
+							start: chunk.Start,
+							end:   chunk.Start + chunk.Size,
+						}}
+					}
+					return nil
+				})
+			}
+			g.Wait()
+			f.Close()
+
+			// Merge consecutive in-place chunks into a single placement
+			// so that Steps() produces one step per run instead of one
+			// per chunk. This works because Steps() deduplicates by
+			// pointer identity.
+			var run *placement
+			for i, pl := range p.placements {
+				if pl == nil {
+					run = nil
+					continue
+				}
+				if _, ok := pl.source.(*skipInPlace); !ok {
+					run = nil
+					continue
+				}
+				if run == nil {
+					run = pl
+					continue
+				}
+				// Extend the existing run and share the pointer
+				run.source.(*skipInPlace).end = p.idx.Chunks[i].Start + p.idx.Chunks[i].Size
+				p.placements[i] = run
+			}
+		}
+	}
+
+	// Find all matches in file itself. As it's populated, sections can be
+	// copied to other chunks. This involves depending on earlier steps
+	// before chunks can be copied within the file.
+	for i := 0; i < len(p.idx.Chunks); i++ {
+		if p.placements[i] != nil {
+			continue // Already filled
+		}
+
+		start, n := p.selfSeed.longestMatchFrom(p.idx.Chunks, i)
+		if n < 1 {
+			continue
+		}
+
+		// Repeat the same placement for all chunks in the sequence.
+		// We dedup sequences later.
+		pl := &placement{}
+
+		// We can use up to n chunks from the seed, find out how much
+		// we can actually use without overwriting any existing placements
+		// in the list.
+		var (
+			to   = i
+			size int
+		)
+		for range n {
+			if p.placements[i] != nil {
+				break
+			}
+
+			p.placements[i] = pl
+			i++
+			size++
+		}
+		i-- // compensate for the outer loop's i++
+
+		// Update the step with the potentially adjusted length
+		pl.source = p.selfSeed.getSegment(start, to, size)
+		pl.dependsOnStart = start
+		pl.dependsOnSize = size
+	}
+
+	// Check file seeds for matches in unfilled positions.
+	for _, seed := range p.seeds {
+		for i := 0; i < len(p.idx.Chunks); {
+			if p.placements[i] != nil {
+				i++
+				continue
+			}
+
+			// Count consecutive unfilled positions to bound the match.
+			available := 0
+			for j := i; j < len(p.idx.Chunks) && p.placements[j] == nil; j++ {
+				available++
+			}
+
+			n, segment := seed.LongestMatchWith(p.idx.Chunks[i : i+available])
+			if n < 1 {
+				i++
+				continue
+			}
+
+			offset := p.idx.Chunks[i].Start
+			last := p.idx.Chunks[i+n-1]
+			length := last.Start + last.Size - offset
+
+			pl := &placement{
+				source: &fileSeedSource{
+					segment: segment,
+					seed:    seed,
+					srcFile: segment.FileName(),
+					offset:  offset,
+					length:  length,
+					isBlank: p.targetIsBlank,
+				},
+			}
+
+			for j := i; j < i+n; j++ {
+				p.placements[j] = pl
+			}
+			i += n
+		}
+	}
+
+	// Fill any gaps in the file by copying from the store.
+	for i := range p.placements {
+		if p.placements[i] != nil {
+			continue
+		}
+		p.placements[i] = &placement{
+			source: &copyFromStore{
+				store: p.store,
+				chunk: p.idx.Chunks[i],
+			},
+		}
+	}
+
+	// We now have a fully populated list of placements. Some are
+	// duplicates, spanning multiple chunks. Dependencies are only defined
+	// forward, like chunk-A needs chunk-B to be written first, etc.
+
+	return nil
+}
+
+func (p *AssemblePlan) Steps() []*PlanStep {
+	// Create a step for every unique placement, counting how many
+	// index chunks each step covers.
+	stepsPerPlacement := make(map[*placement]*PlanStep)
+	for _, pl := range p.placements {
+		step, ok := stepsPerPlacement[pl]
+		if !ok {
+			step = &PlanStep{
+				source: pl.source,
+			}
+			stepsPerPlacement[pl] = step
+		}
+		step.numChunks++
+	}
+
+	// Link the steps together. Use a seen set to avoid redundant work
+	// when the same placement pointer spans multiple chunks.
+	linked := make(map[*placement]struct{}, len(stepsPerPlacement))
+	for _, pl := range p.placements {
+		if _, ok := linked[pl]; ok {
+			continue
+		}
+		linked[pl] = struct{}{}
+
+		for i := pl.dependsOnStart; i < pl.dependsOnStart+pl.dependsOnSize; i++ {
+			stepsPerPlacement[pl].addDependency(stepsPerPlacement[p.placements[i]])
+			stepsPerPlacement[p.placements[i]].addDependent(stepsPerPlacement[pl])
+		}
+	}
+
+	// Make a slice of steps, preserving the order
+	steps := make([]*PlanStep, 0, len(stepsPerPlacement))
+	for _, pl := range p.placements {
+		s, ok := stepsPerPlacement[pl]
+		if !ok {
+			continue
+		}
+		steps = append(steps, s)
+		delete(stepsPerPlacement, pl)
+	}
+
+	return steps
+}
diff --git a/assemble-plan_test.go b/assemble-plan_test.go
new file mode 100644
index 0000000..238f532
--- /dev/null
+++ b/assemble-plan_test.go
@@ -0,0 +1,344 @@
+package desync
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestSelfSeedPlanSteps(t *testing.T) {
+	tests := map[string]struct {
+		index    Index
+		expected []string
+	}{
+		"all unique chunks": {
+			index: indexSequence(0x01, 0x02, 0x03),
+			expected: []string{
+				"Store: Copy 0100000000000000000000000000000000000000000000000000000000000000 to [0:100]",
+				"Store: Copy 0200000000000000000000000000000000000000000000000000000000000000 to [100:200]",
+				"Store: Copy 0300000000000000000000000000000000000000000000000000000000000000 to [200:300]",
+			},
+		},
+		"single chunk": {
+			index: indexSequence(0x01),
+			expected: []string{
+				"Store: Copy 0100000000000000000000000000000000000000000000000000000000000000 to [0:100]",
+			},
+		},
+		"repeated pair at end": {
+			// Sequence: 01 02 03 01 02 01 02
+			// Positions 0,1 copy from 5,6; positions 3,4 copy from 5,6;
+			// positions 2,5,6 come from store.
+			index: indexSequence(0x01, 0x02, 0x03, 0x01, 0x02, 0x01, 0x02),
+			expected: []string{
+				"SelfSeed: Copy [500:700] to [0:200]",
+				"Store: Copy 0300000000000000000000000000000000000000000000000000000000000000 to [200:300]",
+				"SelfSeed: Copy [500:700] to [300:500]",
+				"Store: Copy 0100000000000000000000000000000000000000000000000000000000000000 to [500:600]",
+				"Store: Copy 0200000000000000000000000000000000000000000000000000000000000000 to [600:700]",
+			},
+		},
+		"full duplicate sequence": {
+			// Sequence: 01 02 03 01 02 03
+			// Positions 0-2 copy from 3-5; positions 3-5 come from store.
+			index: indexSequence(0x01, 0x02, 0x03, 0x01, 0x02, 0x03),
+			expected: []string{
+				"SelfSeed: Copy [300:600] to [0:300]",
+				"Store: Copy 0100000000000000000000000000000000000000000000000000000000000000 to [300:400]",
+				"Store: Copy 0200000000000000000000000000000000000000000000000000000000000000 to [400:500]",
+				"Store: Copy 0300000000000000000000000000000000000000000000000000000000000000 to [500:600]",
+			},
+		},
+		"same chunk repeated": {
+			// Sequence: 01 01 01
+			// Position 2 comes from store, then positions 0 and 1 each
+			// self-seed from position 2.
+			index: indexSequence(0x01, 0x01, 0x01),
+			expected: []string{
+				"SelfSeed: Copy [200:300] to [0:100]",
+				"SelfSeed: Copy [200:300] to [100:200]",
+				"Store: Copy 0100000000000000000000000000000000000000000000000000000000000000 to [200:300]",
+			},
+		},
+		"single repeated chunk": {
+			// Sequence: 01 02 01
+			// Position 0 copies from 2; positions 1,2 come from store.
+			index: indexSequence(0x01, 0x02, 0x01),
+			expected: []string{
+				"SelfSeed: Copy [200:300] to [0:100]",
+				"Store: Copy 0200000000000000000000000000000000000000000000000000000000000000 to [100:200]",
+				"Store: Copy 0100000000000000000000000000000000000000000000000000000000000000 to [200:300]",
+			},
+		},
+	}
+
+	for name, test := range tests {
+		t.Run(name, func(t *testing.T) {
+			plan, err := NewPlan("test", test.index, nil)
+			require.NoError(t, err)
+			defer plan.Close()
+
+			steps := plan.Steps()
+			got := make([]string, len(steps))
+			for i, s := range steps {
+				got[i] = s.source.String()
+			}
+			require.Equal(t, test.expected, got)
+		})
+	}
+}
+
+func TestInPlaceChunkDetection(t *testing.T) {
+	// Create chunk data and compute their IDs
+	data1 := make([]byte, 100)
+	data1[0] = 0x01
+	id1 := Digest.Sum(data1)
+
+	data2 := make([]byte, 100)
+	data2[0] = 0x02
+	id2 := Digest.Sum(data2)
+
+	data3 := make([]byte, 100)
+	data3[0] = 0x03
+	id3 := Digest.Sum(data3)
+
+	idx := Index{
+		Chunks: []IndexChunk{
+			{ID: id1, Start: 0, Size: 100},
+			{ID: id2, Start: 100, Size: 100},
+			{ID: id3, Start: 200, Size: 100},
+		},
+	}
+
+	// Create a target file where chunks 0 and 2 match but chunk 1 does not
+	target := filepath.Join(t.TempDir(), "target")
+	f, err := os.Create(target)
+	require.NoError(t, err)
+
+	_, err = f.Write(data1) // chunk 0: correct
+	require.NoError(t, err)
+	_, err = f.Write(make([]byte, 100)) // chunk 1: wrong data
+	require.NoError(t, err)
+	_, err = f.Write(data3) // chunk 2: correct
+	require.NoError(t, err)
+	f.Close()
+
+	plan, err := NewPlan(target, idx, nil, PlanWithTargetIsBlank(false))
+	require.NoError(t, err)
+	defer plan.Close()
+
+	steps := plan.Steps()
+	got := make([]string, len(steps))
+	for i, s := range steps {
+		got[i] = s.source.String()
+	}
+
+	cid2 := ChunkID(id2)
+	expected := []string{
+		"InPlace: Skip [0:100]",
+		fmt.Sprintf("Store: Copy %s to [100:200]", &cid2),
+		"InPlace: Skip [200:300]",
+	}
+	require.Equal(t, expected, got)
+
+	// Subtest: all chunks match in-place — consecutive ones should merge
+	t.Run("consecutive merge", func(t *testing.T) {
+		target2 := filepath.Join(t.TempDir(), "target2")
+		f2, err := os.Create(target2)
+		require.NoError(t, err)
+		_, err = f2.Write(data1)
+		require.NoError(t, err)
+		_, err = f2.Write(data2)
+		require.NoError(t, err)
+		_, err = f2.Write(data3)
+		require.NoError(t, err)
+		f2.Close()
+
+		plan2, err := NewPlan(target2, idx, nil, PlanWithTargetIsBlank(false))
+		require.NoError(t, err)
+		defer plan2.Close()
+
+		steps2 := plan2.Steps()
+		got2 := make([]string, len(steps2))
+		for i, s := range steps2 {
+			got2[i] = s.source.String()
+		}
+
+		expected2 := []string{
+			"InPlace: Skip [0:300]",
+		}
+		require.Equal(t, expected2, got2)
+	})
+}
+
+func TestFileSeedPlanSteps(t *testing.T) {
+	tests := map[string]struct {
+		target   Index
+		seed     Index
+		expected []string
+	}{
+		"basic matching": {
+			// Target: 01, 02, 03, 04
+			// Seed:   02, 03
+			// Chunks 1-2 from seed, 0 and 3 from store.
+			target: indexSequence(0x01, 0x02, 0x03, 0x04),
+			seed:   indexSequence(0x02, 0x03),
+			expected: []string{
+				"Store: Copy 0100000000000000000000000000000000000000000000000000000000000000 to [0:100]",
+				"FileSeed(seed): Copy to [100:300]",
+				"Store: Copy 0400000000000000000000000000000000000000000000000000000000000000 to [300:400]",
+			},
+		},
+		"all from seed": {
+			// Target: 01, 02, 03
+			// Seed:   01, 02, 03
+			// One seed step covering all.
+			target: indexSequence(0x01, 0x02, 0x03),
+			seed:   indexSequence(0x01, 0x02, 0x03),
+			expected: []string{
+				"FileSeed(seed): Copy to [0:300]",
+			},
+		},
+		"no match": {
+			// Target: 01, 02
+			// Seed:   05, 06
+			// Both from store.
+			target: indexSequence(0x01, 0x02),
+			seed:   indexSequence(0x05, 0x06),
+			expected: []string{
+				"Store: Copy 0100000000000000000000000000000000000000000000000000000000000000 to [0:100]",
+				"Store: Copy 0200000000000000000000000000000000000000000000000000000000000000 to [100:200]",
+			},
+		},
+		"self-seed priority": {
+			// Target: 01, 02, 01
+			// Seed:   01, 02, 01
+			// Self-seed fills position 0 (copy from position 2),
+			// seed fills positions 1-2 (matching seed chunks 02, 01).
+			target: indexSequence(0x01, 0x02, 0x01),
+			seed:   indexSequence(0x01, 0x02, 0x01),
+			expected: []string{
+				"SelfSeed: Copy [200:300] to [0:100]",
+				"FileSeed(seed): Copy to [100:300]",
+			},
+		},
+	}
+
+	for name, test := range tests {
+		t.Run(name, func(t *testing.T) {
+			seed, err := NewIndexSeed("test", "seed", test.seed)
+			require.NoError(t, err)
+
+			plan, err := NewPlan("test", test.target, nil, PlanWithSeeds([]Seed{seed}))
+			require.NoError(t, err)
+			defer plan.Close()
+
+			steps := plan.Steps()
+			got := make([]string, len(steps))
+			for i, s := range steps {
+				got[i] = s.source.String()
+			}
+			require.Equal(t, test.expected, got)
+		})
+	}
+}
+
+func TestFileSeedValidation(t *testing.T) {
+	// Create two chunks with known data and compute their IDs
+	data1 := make([]byte, 100)
+	data1[0] = 0xAA
+	id1 := Digest.Sum(data1)
+
+	data2 := make([]byte, 100)
+	data2[0] = 0xBB
+	id2 := Digest.Sum(data2)
+
+	seedIndex := Index{
+		Chunks: []IndexChunk{
+			{ID: id1, Start: 0, Size: 100},
+			{ID: id2, Start: 100, Size: 100},
+		},
+	}
+
+	// Target index matches the seed exactly
+	targetIndex := Index{
+		Chunks: []IndexChunk{
+			{ID: id1, Start: 0, Size: 100},
+			{ID: id2, Start: 100, Size: 100},
+		},
+	}
+
+	t.Run("valid seed", func(t *testing.T) {
+		seedFile := filepath.Join(t.TempDir(), "seed")
+		f, err := os.Create(seedFile)
+		require.NoError(t, err)
+		_, err = f.Write(data1)
+		require.NoError(t, err)
+		_, err = f.Write(data2)
+		require.NoError(t, err)
+		f.Close()
+
+		seed, err := NewIndexSeed("target", seedFile, seedIndex)
+		require.NoError(t, err)
+
+		plan, err := NewPlan("target", targetIndex, nil, PlanWithSeeds([]Seed{seed}))
+		require.NoError(t, err)
+		defer plan.Close()
+
+		require.NoError(t, plan.Validate())
+	})
+
+	t.Run("invalid seed", func(t *testing.T) {
+		seedFile := filepath.Join(t.TempDir(), "seed")
+		f, err := os.Create(seedFile)
+		require.NoError(t, err)
+		_, err = f.Write(data1)
+		require.NoError(t, err)
+		_, err = f.Write(data2)
+		require.NoError(t, err)
+		f.Close()
+
+		seed, err := NewIndexSeed("target", seedFile, seedIndex)
+		require.NoError(t, err)
+
+		plan, err := NewPlan("target", targetIndex, nil, PlanWithSeeds([]Seed{seed}))
+		require.NoError(t, err)
+		defer plan.Close()
+
+		// Corrupt the seed file after the plan was created
+		err = os.WriteFile(seedFile, make([]byte, 200), 0644)
+		require.NoError(t, err)
+
+		err = plan.Validate()
+		require.Error(t, err)
+
+		var seedErr SeedInvalid
+		require.ErrorAs(t, err, &seedErr)
+		require.Equal(t, []Seed{seed}, seedErr.Seeds)
+	})
+
+	t.Run("null seed skipped", func(t *testing.T) {
+		// Create a null chunk index — data is all zeros
+		nullData := make([]byte, 100)
+		nullID := Digest.Sum(nullData)
+
+		nullTargetIndex := Index{
+			Chunks: []IndexChunk{
+				{ID: nullID, Start: 0, Size: 100},
+			},
+		}
+
+		// Use a null seed (FileName() returns "", so Validate skips it)
+		ns := &nullChunkSeed{id: nullID}
+		defer ns.close()
+
+		plan, err := NewPlan("target", nullTargetIndex, nil, PlanWithSeeds([]Seed{ns}))
+		require.NoError(t, err)
+		defer plan.Close()
+
+		require.NoError(t, plan.Validate())
+	})
+}
diff --git a/assemble-selfseed.go b/assemble-selfseed.go
new file mode 100644
index 0000000..d86ce2d
--- /dev/null
+++ b/assemble-selfseed.go
@@ -0,0 +1,174 @@
+package desync
+
+import (
+	"fmt"
+	"io"
+	"os"
+)
+
+type selfSeed struct {
+	file       string
+	index      Index
+	pos        map[ChunkID][]int
+	canReflink bool
+	readers    chan *os.File
+}
+
+func newSelfSeed(file string, index Index, n int) (*selfSeed, error) {
+	s := &selfSeed{
+		file:       file,
+		pos:        make(map[ChunkID][]int),
+		index:      index,
+		canReflink: CanClone(file, file),
+		readers:    make(chan *os.File, n),
+	}
+	for i, c := range s.index.Chunks {
+		s.pos[c.ID] = append(s.pos[c.ID], i)
+	}
+	// Only open read handles if the file exists. If it doesn't, self-seed
+	// segments won't be created since there's nothing to match.
+	if _, err := os.Stat(file); err == nil {
+		for range n {
+			f, err := os.Open(file)
+			if err != nil {
+				s.Close()
+				return nil, err
+			}
+			s.readers <- f
+		}
+	}
+	return s, nil
+}
+
+func (s *selfSeed) Close() {
+	if s.readers == nil {
+		return
+	}
+	close(s.readers)
+	for f := range s.readers {
+		f.Close()
+	}
+	s.readers = nil
+}
+
+// longestMatchFrom returns the longest sequence of matching chunks after a
+// given starting position.
+func (s *selfSeed) longestMatchFrom(chunks []IndexChunk, startPos int) (int, int) {
+	if len(chunks) <= startPos || len(s.index.Chunks) == 0 {
+		return 0, 0
+	}
+	pos, ok := s.pos[chunks[startPos].ID]
+	if !ok {
+		return 0, 0
+	}
+	// From every position of chunks[startPos] in the source, find a slice of
+	// matching chunks. Then return the longest of those slices.
+	var (
+		maxStart int
+		maxLen   int
+		limit    int
+	)
+	if !s.canReflink {
+		// Limit the maximum number of chunks, in a single sequence, to
+		// avoid having jobs that are too unbalanced. However, if
+		// reflinks are supported, we don't limit it to make it faster
+		// and take less space.
+		limit = 100
+	}
+	for _, p := range pos {
+		if p <= startPos {
+			continue
+		}
+		start, n := s.maxMatchFrom(chunks[startPos:], p, limit)
+		// Clamp to prevent source [p, p+n) overlapping destination [startPos, startPos+n)
+		if max := p - startPos; n > max {
+			n = max
+		}
+		if n >= maxLen { // Using >= here to get the last (longest) match
+			maxStart = start
+			maxLen = n
+		}
+		if limit != 0 && limit == maxLen {
+			break
+		}
+	}
+	return maxStart, maxLen
+}
+
+func (s *selfSeed) maxMatchFrom(chunks []IndexChunk, p int, limit int) (int, int) {
+	if len(chunks) == 0 {
+		return 0, 0
+	}
+	var (
+		sp int
+		dp = p
+	)
+	for {
+		if limit != 0 && sp == limit {
+			break
+		}
+		if dp >= len(s.index.Chunks) || sp >= len(chunks) {
+			break
+		}
+		if chunks[sp].ID != s.index.Chunks[dp].ID {
+			break
+		}
+		dp++
+		sp++
+	}
+	return p, dp - p
+}
+
+func (s *selfSeed) getSegment(from, to, length int) *selfSeedSegment {
+	return &selfSeedSegment{
+		seed:   s,
+		from:   from,
+		to:     to,
+		length: length,
+	}
+}
+
+type selfSeedSegment struct {
+	seed   *selfSeed
+	from   int // Index of the first chunk to copy from
+	to     int // Index of the first chunk to copy to
+	length int // Number of chunks to copy
+}
+
+func (s *selfSeedSegment) Execute(f *os.File) (copied uint64, cloned uint64, err error) {
+	srcStart := s.seed.index.Chunks[s.from].Start
+	dstStart := s.seed.index.Chunks[s.to].Start
+	lastFrom := s.from + s.length - 1
+	length := s.seed.index.Chunks[lastFrom].Start + s.seed.index.Chunks[lastFrom].Size - srcStart
+
+	blocksize := blocksizeOfFile(f.Name())
+
+	// Use reflinks if supported and blocks are aligned
+	if s.seed.canReflink && srcStart%blocksize == dstStart%blocksize {
+		return 0, length, CloneRange(f, f, srcStart, length, dstStart)
+	}
+
+	// Borrow a read handle from the pool
+	src := <-s.seed.readers
+	defer func() { s.seed.readers <- src }()
+
+	if _, err := src.Seek(int64(srcStart), io.SeekStart); err != nil {
+		return 0, 0, err
+	}
+	if _, err := f.Seek(int64(dstStart), io.SeekStart); err != nil {
+		return 0, 0, err
+	}
+	_, err = io.CopyBuffer(f, io.LimitReader(src, int64(length)), make([]byte, 64*1024))
+	return length, 0, err
+}
+
+func (s *selfSeedSegment) String() string {
+	fromStart := s.seed.index.Chunks[s.from].Start
+	toStart := s.seed.index.Chunks[s.to].Start
+	lastFromChunkIndex := s.from + s.length - 1
+	lastToChunkIndex := s.to + s.length - 1
+	fromEnd := s.seed.index.Chunks[lastFromChunkIndex].Start + s.seed.index.Chunks[lastFromChunkIndex].Size
+	toEnd := s.seed.index.Chunks[lastToChunkIndex].Start + s.seed.index.Chunks[lastToChunkIndex].Size
+
+	return fmt.Sprintf("SelfSeed: Copy [%d:%d] to [%d:%d]", fromStart, fromEnd, toStart, toEnd)
+}
diff --git a/assemble-skip.go b/assemble-skip.go
new file mode 100644
index 0000000..52cdb0a
--- /dev/null
+++ b/assemble-skip.go
@@ -0,0 +1,20 @@
+package desync
+
+import (
+	"fmt"
+	"os"
+)
+
+// skipInPlace skips data chunks that are already in place.
+type skipInPlace struct {
+	start uint64
+	end   uint64
+}
+
+func (s *skipInPlace) Execute(f *os.File) (copied uint64, cloned uint64, err error) {
+	return 0, 0, nil
+}
+
+func (s *skipInPlace) String() string {
+	return fmt.Sprintf("InPlace: Skip [%d:%d]", s.start, s.end)
+}
diff --git a/assemble-step.go b/assemble-step.go
new file mode 100644
index 0000000..9701b62
--- /dev/null
+++ b/assemble-step.go
@@ -0,0 +1,62 @@
+package desync
+
+import (
+	"iter"
+	"maps"
+)
+
+type PlanStep struct {
+	source assembleSource
+
+	// numChunks is the number of index chunks this step covers.
+	numChunks int
+
+	// Steps that depend on this one.
+	dependents stepSet
+
+	// Steps that this one depends on.
+	dependencies stepSet
+}
+
+// addDependent adds a step that depends on this one.
+func (n *PlanStep) addDependent(other *PlanStep) {
+	if n.dependents == nil {
+		n.dependents = newStepSet()
+	}
+	n.dependents.add(other)
+}
+
+// addDependency adds a step that this one depends on.
+func (n *PlanStep) addDependency(other *PlanStep) {
+	if n.dependencies == nil {
+		n.dependencies = newStepSet()
+	}
+	n.dependencies.add(other)
+}
+
+// ready returns true when all dependencies have been resolved.
+func (n *PlanStep) ready() bool {
+	return n.dependencies.len() == 0
+}
+
+type stepSet map[*PlanStep]struct{}
+
+func newStepSet() stepSet {
+	return make(stepSet)
+}
+
+func (s stepSet) add(n *PlanStep) {
+	s[n] = struct{}{}
+}
+
+func (s stepSet) remove(n *PlanStep) {
+	delete(s, n)
+}
+
+func (s stepSet) Each() iter.Seq[*PlanStep] {
+	return maps.Keys(s)
+}
+
+func (s stepSet) len() int {
+	return len(s)
+}
diff --git a/assemble-store.go b/assemble-store.go
new file mode 100644
index 0000000..2e9ae64
--- /dev/null
+++ b/assemble-store.go
@@ -0,0 +1,31 @@
+package desync
+
+import (
+	"fmt"
+	"os"
+)
+
+type copyFromStore struct {
+	store Store
+	chunk IndexChunk
+}
+
+func (s *copyFromStore) Execute(f *os.File) (copied uint64, cloned uint64, err error) {
+	chunk, err := s.store.GetChunk(s.chunk.ID)
+	if err != nil {
+		return 0, 0, err
+	}
+	b, err := chunk.Data()
+	if err != nil {
+		return 0, 0, err
+	}
+	if s.chunk.Size != uint64(len(b)) {
+		return 0, 0, fmt.Errorf("unexpected size for chunk %s", s.chunk.ID)
+	}
+	_, err = f.WriteAt(b, int64(s.chunk.Start))
+	return 0, 0, err
+}
+
+func (s *copyFromStore) String() string {
+	return fmt.Sprintf("Store: Copy %v to [%d:%d]", s.chunk.ID.String(), s.chunk.Start, s.chunk.Start+s.chunk.Size)
+}
diff --git a/assemble.go b/assemble.go
index ca13980..48e1843 100644
--- a/assemble.go
+++ b/assemble.go
@@ -2,9 +2,13 @@ package desync
 
 import (
 	"context"
+	"errors"
 	"fmt"
-	"golang.org/x/sync/errgroup"
 	"os"
+	"slices"
+	"sync"
+
+	"golang.org/x/sync/errgroup"
 )
 
 // InvalidSeedAction represents the action that we will take if a seed
@@ -25,61 +29,6 @@ type AssembleOptions struct {
 	InvalidSeedAction InvalidSeedAction
 }
 
-// writeChunk tries to write a chunk by looking at the self seed, if it is already existing in the
-// destination file or by taking it from the store. The in-place check runs first to avoid unnecessary
-// writes. If the target already has the correct data, no write is performed.
-func writeChunk(c IndexChunk, ss *selfSeed, f *os.File, blocksize uint64, s Store, stats *ExtractStats, isBlank bool) error {
-	// If we operate on an existing file there's a good chance we already
-	// have the data written for this chunk. Let's read it from disk and
-	// compare to what is expected. This is checked first to avoid rewriting
-	// data that is already correct, even for chunks available in the selfSeed.
-	if !isBlank {
-		b := make([]byte, c.Size)
-		if _, err := f.ReadAt(b, int64(c.Start)); err != nil {
-			return err
-		}
-		sum := Digest.Sum(b)
-		if sum == c.ID {
-			// Record we kept this chunk in the file (when using in-place extract)
-			stats.incChunksInPlace()
-			return nil
-		}
-	}
-
-	// If we already took this chunk from the store we can reuse it by looking
-	// into the selfSeed.
-	if segment := ss.getChunk(c.ID); segment != nil {
-		copied, cloned, err := segment.WriteInto(f, c.Start, c.Size, blocksize, isBlank)
-		if err != nil {
-			return err
-		}
-		stats.addBytesCopied(copied)
-		stats.addBytesCloned(cloned)
-		return nil
-	}
-
-	// Record this chunk having been pulled from the store
-	stats.incChunksFromStore()
-	// Pull the (compressed) chunk from the store
-	chunk, err := s.GetChunk(c.ID)
-	if err != nil {
-		return err
-	}
-	b, err := chunk.Data()
-	if err != nil {
-		return err
-	}
-	// Might as well verify the chunk size while we're at it
-	if c.Size != uint64(len(b)) {
-		return fmt.Errorf("unexpected size for chunk %s", c.ID.String())
-	}
-	// Write the decompressed chunk into the file at the right position
-	if _, err = f.WriteAt(b, int64(c.Start)); err != nil {
-		return err
-	}
-	return nil
-}
-
 // AssembleFile re-assembles a file based on a list of index chunks. It runs n
 // goroutines, creating one filehandle for the file "name" per goroutine
 // and writes to the file simultaneously. If progress is provided, it'll be
@@ -89,18 +38,11 @@ func writeChunk(c IndexChunk, ss *selfSeed, f *os.File, blocksize uint64, s Stor
 // differ from the expected content. This can be used to complete partly
 // written files.
 func AssembleFile(ctx context.Context, name string, idx Index, s Store, seeds []Seed, options AssembleOptions) (*ExtractStats, error) {
-	type Job struct {
-		segment IndexSegment
-		source  SeedSegment
-	}
 	var (
-		attempt     = 1
-		in          = make(chan Job)
 		isBlank     bool
 		isBlkDevice bool
-		pb          ProgressBar
+		attempt     = 1
 	)
-	g, ctx := errgroup.WithContext(ctx)
 
 	// Initialize stats to be gathered during extraction
 	stats := &ExtractStats{
@@ -147,138 +89,178 @@ func AssembleFile(ctx context.Context, name string, idx Index, s Store, seeds []
 	defer ns.close()
 	seeds = append([]Seed{ns}, seeds...)
 
-	// Start a self-seed which will become usable once chunks are written contiguously
-	// beginning at position 0. There is no need to add this to the seeds list because
-	// when we create a plan it will be empty.
-	ss, err := newSelfSeed(name, idx)
-	if err != nil {
-		return stats, err
-	}
-
 	// Record the total number of seeds and blocksize in the stats
 	stats.Seeds = len(seeds)
 	stats.Blocksize = blocksize
 
-	// Start the workers, each having its own filehandle to write concurrently
-	for i := 0; i < options.N; i++ {
-		f, err := os.OpenFile(name, os.O_RDWR, 0666)
-		if err != nil {
-			return stats, fmt.Errorf("unable to open file %s, %s", name, err)
-		}
-		defer f.Close()
-		g.Go(func() error {
-			for job := range in {
-				pb.Add(job.segment.lengthChunks())
-				if job.source != nil {
-					// If we have a seedSegment we expect 1 or more chunks between
-					// the start and the end of this segment.
-					stats.addChunksFromSeed(uint64(job.segment.lengthChunks()))
-					offset := job.segment.start()
-					length := job.segment.lengthBytes()
-					copied, cloned, err := job.source.WriteInto(f, offset, length, blocksize, isBlank)
-					if err != nil {
-						return err
-					}
-
-					// Validate that the written chunks are exactly what we were expecting.
-					// Because the seed might point to a RW location, if the data changed
-					// while we were extracting an index, we might end up writing to the
-					// destination some unexpected values.
-					for _, c := range job.segment.chunks() {
-						b := make([]byte, c.Size)
-						if _, err := f.ReadAt(b, int64(c.Start)); err != nil {
-							return err
-						}
-						sum := Digest.Sum(b)
-						if sum != c.ID {
-							if options.InvalidSeedAction == InvalidSeedActionRegenerate {
-								// Try harder before giving up and aborting
-								Log.WithField("ID", c.ID.String()).Info("The seed may have changed during processing, trying to take the chunk from the self seed or the store")
-								if err := writeChunk(c, ss, f, blocksize, s, stats, isBlank); err != nil {
-									return err
-								}
-							} else {
-								return fmt.Errorf("written data in %s doesn't match its expected hash value, seed may have changed during processing", name)
-							}
-						}
-					}
-
-					stats.addBytesCopied(copied)
-					stats.addBytesCloned(cloned)
-					// Record this segment's been written in the self-seed to make it
-					// available going forward
-					ss.add(job.segment)
-					continue
-				}
+	// Create the plan
+retry:
+	plan, err := NewPlan(name, idx, s,
+		PlanWithConcurrency(options.N),
+		PlanWithSeeds(seeds),
+		PlanWithTargetIsBlank(isBlank),
+	)
+	if err != nil {
+		return stats, err
+	}
 
-				// If we don't have a seedSegment we expect an IndexSegment with just
-				// a single chunk, that we can take from either the selfSeed, from the
-				// destination file, or from the store.
-				if len(job.segment.chunks()) != 1 {
-					panic("Received an unexpected segment that doesn't contain just a single chunk")
-				}
-				c := job.segment.chunks()[0]
+	// Validate the seed indexes provided and potentially regenerate them
+	if err := plan.Validate(); err != nil {
+		// Close the invalid plan
+		plan.Close()
 
-				if err := writeChunk(c, ss, f, blocksize, s, stats, isBlank); err != nil {
-					return err
-				}
+		var seedError SeedInvalid
+		if errors.As(err, &seedError) {
 
-				// Record this chunk's been written in the self-seed.
-				// Even if we already confirmed that this chunk is present in the
-				// self-seed, we still need to record it as being written, otherwise
-				// the self-seed position pointer doesn't advance as we expect.
-				ss.add(job.segment)
-			}
-			return nil
-		})
-	}
-
-	// Let the sequencer break up the index into segments, create and validate a plan,
-	// feed the workers, and stop if there are any errors
-	seq := NewSeedSequencer(idx, seeds...)
-	plan := seq.Plan()
-	for {
-		validatingPrefix := fmt.Sprintf("Attempt %d: Validating ", attempt)
-		if err := plan.Validate(ctx, options.N, NewProgressBar(validatingPrefix)); err != nil {
-			// This plan has at least one invalid seed
 			switch options.InvalidSeedAction {
 			case InvalidSeedActionBailOut:
 				return stats, err
 			case InvalidSeedActionRegenerate:
-				Log.WithError(err).Info("Unable to use one of the chosen seeds, regenerating it")
-				if err := seq.RegenerateInvalidSeeds(ctx, options.N, attempt); err != nil {
-					return stats, err
+				Log.WithError(err).Info("Unable to use one or more seeds, regenerating them")
+				for i, s := range seedError.Seeds {
+					if err := s.RegenerateIndex(ctx, options.N, attempt, i+1); err != nil {
+						return stats, err
+					}
 				}
+				attempt++
+				goto retry
 			case InvalidSeedActionSkip:
-				// Recreate the plan. This time the seed marked as invalid will be skipped
-				Log.WithError(err).Info("Unable to use one of the chosen seeds, skipping it")
+				Log.WithError(err).Infof("Unable to use one or more seeds, skipping them")
+				seeds = slices.DeleteFunc(seeds, func(s Seed) bool {
+					return slices.Contains(seedError.Seeds, s)
+				})
+				goto retry
 			default:
 				panic("Unhandled InvalidSeedAction")
 			}
+		}
+		return stats, err
+	}
+	defer plan.Close()
 
-			attempt += 1
-			seq.Rewind()
-			plan = seq.Plan()
-			continue
+	// Generate the plan steps necessary to build the target
+	steps := plan.Steps()
+	if len(steps) == 0 {
+		return stats, nil
+	}
+
+	// Split the steps into those that are independent and those that
+	// require other steps to complete first.
+	var (
+		ready   []*PlanStep
+		delayed = make(stepSet)
+	)
+	for _, step := range steps {
+		if step.ready() {
+			ready = append(ready, step)
+		} else {
+			delayed.add(step)
 		}
-		// Found a valid plan
-		break
 	}
 
-	pb = NewProgressBar(fmt.Sprintf("Attempt %d: Assembling ", attempt))
+	// Set up progress bar
+	pb := NewProgressBar(fmt.Sprintf("Attempt %d: Assembling ", attempt))
 	pb.SetTotal(len(idx.Chunks))
 	pb.Start()
 	defer pb.Finish()
 
-loop:
-	for _, segment := range plan {
-		select {
-		case <-ctx.Done():
-			break loop
-		case in <- Job{segment.indexSegment, segment.source}:
-		}
+	// Create two channels, one for steps that can run and one for those
+	// that are complete.
+	var (
+		inProgress = make(chan *PlanStep, len(steps))
+		completed  = make(chan *PlanStep, options.N)
+	)
+
+	g, ctx := errgroup.WithContext(ctx)
+	g.SetLimit(options.N)
+
+	// Bring up the workers
+	for range options.N {
+		g.Go(func() error {
+			f, err := os.OpenFile(name, os.O_RDWR, 0666)
+			if err != nil {
+				return fmt.Errorf("unable to open file %s, %s", name, err)
+			}
+			defer f.Close()
+			for {
+				select {
+				case step, ok := <-inProgress:
+					if !ok {
+						return nil
+					}
+					copied, cloned, err := step.source.Execute(f)
+					if err != nil {
+						return err
+					}
+					// Update byte-level stats
+					stats.addBytesCopied(copied)
+					stats.addBytesCloned(cloned)
+					// Update chunk-level stats based on source type
+					switch step.source.(type) {
+					case *copyFromStore:
+						stats.incChunksFromStore()
+					case *skipInPlace:
+						stats.addChunksInPlace(uint64(step.numChunks))
+					case *fileSeedSource, *selfSeedSegment:
+						stats.addChunksFromSeed(uint64(step.numChunks))
+					}
+					select {
+					case completed <- step:
+					case <-ctx.Done():
+						return ctx.Err()
+					}
+				case <-ctx.Done():
+					return ctx.Err()
+				}
+			}
+		})
+	}
+
+	// Populate all steps that are ready to be executed
+	for _, step := range ready {
+		inProgress <- step
 	}
-	close(in)
 
-	return stats, g.Wait()
+	// Start the dispatch goroutine which runs the plan. This should be
+	// outside the errgroup as it'll only be stopped once the workers are
+	// done.
+	var wg sync.WaitGroup
+	wg.Go(func() {
+		for step := range completed {
+			pb.Add(step.numChunks)
+
+			// Go through all the steps that are blocked by this
+			// one and remove the dependency. If all deps have been
+			// removed, send them for processing and remove them
+			// from the ready list.
+			for b := range step.dependents.Each() {
+				b.dependencies.remove(step)
+				if b.ready() {
+					delayed.remove(b)
+					inProgress <- b
+				}
+			}
+
+			// If there are no more delayed steps, close the work queue.
+			if delayed.len() == 0 {
+				close(inProgress)
+				break
+			}
+		}
+
+		// Drain the completed queue, updating the progress bar for any
+		// steps that finished after the work queue was closed.
+		for step := range completed {
+			pb.Add(step.numChunks)
+		}
+	})
+
+	// Wait for the workers to complete
+	err = g.Wait()
+
+	// Stop the dispatch goroutine
+	close(completed)
+	wg.Wait()
+
+	return stats, err
 }
diff --git a/assemble_test.go b/assemble_test.go
index 88e2cb7..b28b3fa 100644
--- a/assemble_test.go
+++ b/assemble_test.go
@@ -10,27 +10,37 @@ import (
 	"path/filepath"
 	"testing"
 
+	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
 
+// Build an index with a pre-determined set of (potentially repeated) chunks
+func indexSequence(ids ...uint8) Index {
+	var (
+		chunks        = make([]IndexChunk, len(ids))
+		start  uint64 = 0
+		size   uint64 = 100
+	)
+	for i, id := range ids {
+		chunks[i] = IndexChunk{Start: start, Size: size, ID: ChunkID{id}}
+		start += size
+	}
+	return Index{Chunks: chunks}
+}
+
 func TestExtract(t *testing.T) {
 	// Make a test file that's guaranteed to have duplicate chunks.
 	b, err := os.ReadFile("testdata/chunker.input")
-	if err != nil {
-		t.Fatal(err)
-	}
+	require.NoError(t, err)
 	for range 4 { // Replicate it a few times to make sure we get dupes
 		b = append(b, b...)
 	}
 	b = append(b, make([]byte, 2*ChunkSizeMaxDefault)...) // want to have at least one null-chunk in the input
 	in, err := os.CreateTemp("", "in")
-	if err != nil {
-		t.Fatal(err)
-	}
+	require.NoError(t, err)
 	defer os.RemoveAll(in.Name())
-	if _, err := io.Copy(in, bytes.NewReader(b)); err != nil {
-		t.Fatal(err)
-	}
+	_, err = io.Copy(in, bytes.NewReader(b))
+	require.NoError(t, err)
 	in.Close()
 
 	// Record the checksum of the input file, used to compare to the output later
@@ -44,76 +54,50 @@ func TestExtract(t *testing.T) {
 		ChunkSizeMinDefault, ChunkSizeAvgDefault, ChunkSizeMaxDefault,
 		NewProgressBar(""),
 	)
-	if err != nil {
-		t.Fatal(err)
-	}
+	require.NoError(t, err)
 
 	// Chop up the input file into a (temporary) local store
 	store := t.TempDir()
 
 	s, err := NewLocalStore(store, StoreOptions{})
-	if err != nil {
-		t.Fatal(err)
-	}
-	if err := ChopFile(context.Background(), in.Name(), index.Chunks, s, 10, NewProgressBar("")); err != nil {
-		t.Fatal(err)
-	}
+	require.NoError(t, err)
+
+	err = ChopFile(context.Background(), in.Name(), index.Chunks, s, 10, NewProgressBar(""))
+	require.NoError(t, err)
 
 	// Make a blank store - used to test a case where no chunk *should* be requested
 	blankstore := t.TempDir()
 	bs, err := NewLocalStore(blankstore, StoreOptions{})
-	if err != nil {
-		t.Fatal(err)
-	}
+	require.NoError(t, err)
 
 	// Prepare output files for each test - first a non-existing one
-	out1, err := os.CreateTemp("", "out1")
-	if err != nil {
-		t.Fatal(err)
-	}
-	os.Remove(out1.Name())
+	outdir := t.TempDir()
+	out1 := filepath.Join(outdir, "out1")
 
 	// This one is a complete file matching what we expect at the end
 	out2, err := os.CreateTemp("", "out2")
-	if err != nil {
-		t.Fatal(err)
-	}
-	if _, err := io.Copy(out2, bytes.NewReader(b)); err != nil {
-		t.Fatal(err)
-	}
+	require.NoError(t, err)
+	_, err = io.Copy(out2, bytes.NewReader(b))
+	require.NoError(t, err)
 	out2.Close()
 	defer os.Remove(out2.Name())
 
 	// Incomplete or damaged file that has most but not all data
 	out3, err := os.CreateTemp("", "out3")
-	if err != nil {
-		t.Fatal(err)
-	}
+	require.NoError(t, err)
 	b[0] ^= 0xff // flip some bits
 	b[len(b)-1] ^= 0xff
 	b = append(b, 0) // make it longer
-	if _, err := io.Copy(out3, bytes.NewReader(b)); err != nil {
-		t.Fatal(err)
-	}
+	_, err = io.Copy(out3, bytes.NewReader(b))
+	require.NoError(t, err)
 	out3.Close()
 	defer os.Remove(out3.Name())
 
-	// At this point we have the data needed for the test setup
-	// in - Temp file that represents the original input file
-	// inSub - MD5 of the input file
-	// index - Index file for the input file
-	// s - Local store containing the chunks needed to rebuild the input file
-	// bs - A blank local store, all GetChunk fail on it
-	// out1 - Just a non-existing file that gets assembled
-	// out2 - The output file already fully complete, no GetChunk should be needed
-	// out3 - Partial/damaged file with most, but not all data correct
-	// seedIndex + seedFile - Seed file to help assemble the input
 	tests := map[string]struct {
 		outfile string
 		store   Store
-		seed    []Seed
 	}{
-		"extract to new file":        {outfile: out1.Name(), store: s},
+		"extract to new file":        {outfile: out1, store: s},
 		"extract to complete file":   {outfile: out2.Name(), store: bs},
 		"extract to incomplete file": {outfile: out3.Name(), store: s},
 	}
@@ -121,19 +105,16 @@ func TestExtract(t *testing.T) {
 	for name, test := range tests {
 		t.Run(name, func(t *testing.T) {
 			defer os.Remove(test.outfile)
-			if _, err := AssembleFile(context.Background(), test.outfile, index, test.store, nil,
+			_, err := AssembleFile(context.Background(), test.outfile, index, test.store, nil,
 				AssembleOptions{10, InvalidSeedActionBailOut},
-			); err != nil {
-				t.Fatal(err)
-			}
-			b, err := os.ReadFile(test.outfile)
-			if err != nil {
-				t.Fatal(err)
-			}
-			outSum := md5.Sum(b)
-			if inSum != outSum {
-				t.Fatal("checksum of extracted file doesn't match expected")
-			}
+			)
+			require.NoError(t, err)
+
+			outBytes, err := os.ReadFile(test.outfile)
+			require.NoError(t, err)
+
+			outSum := md5.Sum(outBytes)
+			assert.Equal(t, inSum, outSum, "checksum of extracted file doesn't match expected")
 		})
 	}
 }
@@ -142,9 +123,7 @@ func TestSeed(t *testing.T) {
 	// Prepare different types of data slices that'll be used to assemble target
 	// and seed files with varying amount of duplication
 	data1, err := os.ReadFile("testdata/chunker.input")
-	if err != nil {
-		t.Fatal(err)
-	}
+	require.NoError(t, err)
 	null := make([]byte, 4*ChunkSizeMaxDefault)
 	rand1 := make([]byte, 4*ChunkSizeMaxDefault)
 	rand.Read(rand1)
@@ -155,9 +134,7 @@ func TestSeed(t *testing.T) {
 	store := t.TempDir()
 
 	s, err := NewLocalStore(store, StoreOptions{})
-	if err != nil {
-		t.Fatal(err)
-	}
+	require.NoError(t, err)
 
 	// Define tests with files with different content, by building files out
 	// of sets of byte slices to create duplication or not between the target and
@@ -201,13 +178,10 @@ func TestSeed(t *testing.T) {
 		t.Run(name, func(t *testing.T) {
 			// Build the destination file so we can chunk it
 			dst, err := os.CreateTemp("", "dst")
-			if err != nil {
-				t.Fatal(err)
-			}
+			require.NoError(t, err)
 			dstBytes := join(test.target...)
-			if _, err := io.Copy(dst, bytes.NewReader(dstBytes)); err != nil {
-				t.Fatal(err)
-			}
+			_, err = io.Copy(dst, bytes.NewReader(dstBytes))
+			require.NoError(t, err)
 			dst.Close()
 			defer os.Remove(dst.Name())
 
@@ -222,25 +196,19 @@ func TestSeed(t *testing.T) {
 				ChunkSizeMinDefault, ChunkSizeAvgDefault, ChunkSizeMaxDefault,
 				NewProgressBar(""),
 			)
-			if err != nil {
-				t.Fatal(err)
-			}
+			require.NoError(t, err)
 
 			// Chop up the input file into the store
-			if err := ChopFile(context.Background(), dst.Name(), dstIndex.Chunks, s, 10, NewProgressBar("")); err != nil {
-				t.Fatal(err)
-			}
+			err = ChopFile(context.Background(), dst.Name(), dstIndex.Chunks, s, 10, NewProgressBar(""))
+			require.NoError(t, err)
 
 			// Build the seed files and indexes then populate the array of seeds
 			var seeds []Seed
 			for _, f := range test.seeds {
 				seedFile, err := os.CreateTemp("", "seed")
-				if err != nil {
-					t.Fatal(err)
-				}
-				if _, err := io.Copy(seedFile, bytes.NewReader(join(f...))); err != nil {
-					t.Fatal(err)
-				}
+				require.NoError(t, err)
+				_, err = io.Copy(seedFile, bytes.NewReader(join(f...)))
+				require.NoError(t, err)
 				seedFile.Close()
 				defer os.Remove(seedFile.Name())
 				seedIndex, _, err := IndexFromFile(
@@ -250,29 +218,20 @@ func TestSeed(t *testing.T) {
 					ChunkSizeMinDefault, ChunkSizeAvgDefault, ChunkSizeMaxDefault,
 					NewProgressBar(""),
 				)
-				if err != nil {
-					t.Fatal(err)
-				}
+				require.NoError(t, err)
 				seed, err := NewIndexSeed(dst.Name(), seedFile.Name(), seedIndex)
-				if err != nil {
-					t.Fatal(err)
-				}
+				require.NoError(t, err)
 				seeds = append(seeds, seed)
 			}
 
-			if _, err := AssembleFile(context.Background(), dst.Name(), dstIndex, s, seeds,
+			_, err = AssembleFile(context.Background(), dst.Name(), dstIndex, s, seeds,
 				AssembleOptions{10, InvalidSeedActionBailOut},
-			); err != nil {
-				t.Fatal(err)
-			}
+			)
+			require.NoError(t, err)
 			b, err := os.ReadFile(dst.Name())
-			if err != nil {
-				t.Fatal(err)
-			}
+			require.NoError(t, err)
 			outSum := md5.Sum(b)
-			if dstSum != outSum {
-				t.Fatal("checksum of extracted file doesn't match expected")
-			}
+			assert.Equal(t, dstSum, outSum, "checksum of extracted file doesn't match expected")
 		})
 	}
 
@@ -286,9 +245,7 @@ func TestSelfSeedInPlace(t *testing.T) {
 	store := t.TempDir()
 
 	s, err := NewLocalStore(store, StoreOptions{})
-	if err != nil {
-		t.Fatal(err)
-	}
+	require.NoError(t, err)
 
 	// Build a number of fake chunks that can then be used in the test in any order
 	type rawChunk struct {
@@ -303,9 +260,7 @@ func TestSelfSeedInPlace(t *testing.T) {
 		b := make([]byte, size)
 		rand.Read(b)
 		chunk := NewChunk(b)
-		if err = s.StoreChunk(chunk); err != nil {
-			t.Fatal(err)
-		}
+		require.NoError(t, s.StoreChunk(chunk))
 		chunks[i] = rawChunk{chunk.ID(), b}
 	}
 
@@ -354,40 +309,28 @@ func TestSelfSeedInPlace(t *testing.T) {
 
 			// Build a temp target file pre-populated with the correct content
 			dst, err := os.CreateTemp("", "dst")
-			if err != nil {
-				t.Fatal(err)
-			}
+			require.NoError(t, err)
 			defer os.Remove(dst.Name())
 			_, err = dst.Write(b)
-			if err != nil {
-				t.Fatal(err)
-			}
+			require.NoError(t, err)
 			dst.Close()
 
 			// Extract the file
 			stats, err := AssembleFile(context.Background(), dst.Name(), idx, s, nil,
 				AssembleOptions{1, InvalidSeedActionBailOut},
 			)
-			if err != nil {
-				t.Fatal(err)
-			}
+			require.NoError(t, err)
 
 			// Compare the checksums to that of the input data
 			b, err = os.ReadFile(dst.Name())
-			if err != nil {
-				t.Fatal(err)
-			}
+			require.NoError(t, err)
 			outSum := md5.Sum(b)
-			if sum != outSum {
-				t.Fatal("checksum of extracted file doesn't match expected")
-			}
+			assert.Equal(t, sum, outSum, "checksum of extracted file doesn't match expected")
 
 			// All chunks must be in-place. The in-place check in writeChunk
 			// runs before the self-seed lookup, so repeated chunks are not
 			// re-written from the self-seed.
-			if stats.ChunksInPlace != uint64(len(test.index)) {
-				t.Fatalf("expected all %d chunks in-place, got %d", len(test.index), stats.ChunksInPlace)
-			}
+			assert.Equal(t, uint64(len(test.index)), stats.ChunksInPlace, "expected all chunks in-place")
 		})
 	}
 
@@ -410,38 +353,3 @@ func readCaibxFile(t *testing.T, indexLocation string) (idx Index) {
 	require.NoError(t, err)
 	return idx
 }
-
-func TestExtractWithNonStaticSeeds(t *testing.T) {
-	n := 10
-	outDir := t.TempDir()
-	out := filepath.Join(outDir, "out")
-
-	// Test a seed that is initially valid, but becomes corrupted halfway through
-	// the extraction operation
-	MockValidate = true
-
-	store, err := NewLocalStore("testdata/blob2.store", StoreOptions{})
-	require.NoError(t, err)
-	defer store.Close()
-
-	index := readCaibxFile(t, "testdata/blob2.caibx")
-
-	var seeds []Seed
-	srcIndex := readCaibxFile(t, "testdata/blob2_corrupted.caibx")
-	seed, err := NewIndexSeed(out, "testdata/blob2_corrupted", srcIndex)
-	seeds = append(seeds, seed)
-
-	// Test that the MockValidate works as expected
-	seq := NewSeedSequencer(index, seeds...)
-	plan := seq.Plan()
-	err = plan.Validate(context.Background(), n, NullProgressBar{})
-	require.NoError(t, err)
-
-	options := AssembleOptions{n, InvalidSeedActionRegenerate}
-	_, err = AssembleFile(context.Background(), out, index, store, seeds, options)
-	require.NoError(t, err)
-
-	//Test the output
-	err = VerifyIndex(context.Background(), out, index, n, NullProgressBar{})
-	require.NoError(t, err)
-}
diff --git a/cmd/desync/extract_test.go b/cmd/desync/extract_test.go
index d857ac4..105ee1d 100644
--- a/cmd/desync/extract_test.go
+++ b/cmd/desync/extract_test.go
@@ -68,9 +68,10 @@ func TestExtractCommand(t *testing.T) {
 		// Explicitly set blob1 seed because seed-dir skips a seed if it's the same index file we gave in input.
 		{"extract with seed directory without skipping invalid seeds",
 			[]string{"-s", "testdata/blob1.store", "--seed-dir", "testdata", "--seed", "testdata/blob1.caibx", "testdata/blob1.caibx"}, out1},
-		// Same as above, no need for `--skip-invalid-seeds`
+		// The plan generator processes seeds in order, so the corrupted seed
+		// may get placements that fail validation. Use --skip-invalid-seeds.
 		{"extract with multiple corrupted seeds",
-			[]string{"--store", "testdata/empty.store", "--seed", "testdata/blob2_corrupted.caibx", "--seed", "testdata/blob1.caibx", "testdata/blob1.caibx"}, out1},
+			[]string{"--store", "testdata/empty.store", "--seed", "testdata/blob2_corrupted.caibx", "--seed", "testdata/blob1.caibx", "--skip-invalid-seeds", "testdata/blob1.caibx"}, out1},
 		{"extract with single seed that has all the expected chunks",
 			[]string{"--store", "testdata/empty.store", "--seed", "testdata/blob1.caibx", "testdata/blob1.caibx"}, out1},
 		// blob2_corrupted is a corrupted blob that doesn't match its seed index. We regenerate the seed index to match
diff --git a/errors.go b/errors.go
index 45425c6..5d0fa12 100644
--- a/errors.go
+++ b/errors.go
@@ -44,3 +44,17 @@ func (e InvalidFormat) Error() string {
 type Interrupted struct{}
 
 func (e Interrupted) Error() string { return "interrupted" }
+
+// SeedInvalid is returned when a seed's data doesn't match its index.
+type SeedInvalid struct {
+	Seeds []Seed
+	Err   error
+}
+
+func (e SeedInvalid) Error() string {
+	return fmt.Sprintf("invalid seed: %s", e.Err)
+}
+
+func (e SeedInvalid) Unwrap() error {
+	return e.Err
+}
diff --git a/extractstats.go b/extractstats.go
index 9deefcf..229f7b9 100644
--- a/extractstats.go
+++ b/extractstats.go
@@ -22,8 +22,8 @@ func (s *ExtractStats) incChunksFromStore() {
 	atomic.AddUint64(&s.ChunksFromStore, 1)
 }
 
-func (s *ExtractStats) incChunksInPlace() {
-	atomic.AddUint64(&s.ChunksInPlace, 1)
+func (s *ExtractStats) addChunksInPlace(n uint64) {
+	atomic.AddUint64(&s.ChunksInPlace, n)
 }
 
 func (s *ExtractStats) addChunksFromSeed(n uint64) {
diff --git a/fileseed.go b/fileseed.go
index be1c8a6..10791ab 100644
--- a/fileseed.go
+++ b/fileseed.go
@@ -5,7 +5,6 @@ import (
 	"fmt"
 	"io"
 	"os"
-	"sync"
 )
 
 // FileSeed is used to copy or clone blocks from an existing index+blob during
@@ -15,8 +14,6 @@ type FileSeed struct {
 	index      Index
 	pos        map[ChunkID][]int
 	canReflink bool
-	isInvalid  bool
-	mu         sync.RWMutex
 }
 
 // NewIndexSeed initializes a new seed that uses an existing index and its blob
@@ -26,7 +23,6 @@ func NewIndexSeed(dstFile string, srcFile string, index Index) (*FileSeed, error
 		pos:        make(map[ChunkID][]int),
 		index:      index,
 		canReflink: CanClone(dstFile, srcFile),
-		isInvalid:  false,
 	}
 	for i, c := range s.index.Chunks {
 		s.pos[c.ID] = append(s.pos[c.ID], i)
@@ -39,12 +35,7 @@ func NewIndexSeed(dstFile string, srcFile string, index Index) (*FileSeed, error
 // if reflinks are not supported. If there is no match, it returns a length of zero
 // and a nil SeedSegment.
 func (s *FileSeed) LongestMatchWith(chunks []IndexChunk) (int, SeedSegment) {
-	s.mu.RLock()
-	isInvalid := s.isInvalid
-	s.mu.RUnlock()
-
-	// isInvalid can be concurrently read or written. Use a mutex to avoid a race
-	if len(chunks) == 0 || len(s.index.Chunks) == 0 || isInvalid {
+	if len(chunks) == 0 || len(s.index.Chunks) == 0 {
 		return 0, nil
 	}
 	pos, ok := s.pos[chunks[0].ID]
@@ -87,7 +78,6 @@ func (s *FileSeed) RegenerateIndex(ctx context.Context, n int, attempt int, seed
 	}
 
 	s.index = index
-	s.SetInvalid(false)
 	s.pos = make(map[ChunkID][]int, len(s.index.Chunks))
 	for i, c := range s.index.Chunks {
 		s.pos[c.ID] = append(s.pos[c.ID], i)
@@ -96,18 +86,6 @@ func (s *FileSeed) RegenerateIndex(ctx context.Context, n int, attempt int, seed
 	return nil
 }
 
-func (s *FileSeed) SetInvalid(value bool) {
-	s.mu.Lock()
-	defer s.mu.Unlock()
-	s.isInvalid = value
-}
-
-func (s *FileSeed) IsInvalid() bool {
-	s.mu.Lock()
-	defer s.mu.Unlock()
-	return s.isInvalid
-}
-
 // Returns a slice of chunks from the seed. Compares chunks from position 0
 // with seed chunks starting at p. A "limit" value of zero means that there is no limit.
 func (s *FileSeed) maxMatchFrom(chunks []IndexChunk, p int, limit int) []IndexChunk {
diff --git a/go.mod b/go.mod
index 50d0a7e..81d97b5 100644
--- a/go.mod
+++ b/go.mod
@@ -1,6 +1,6 @@
 module github.com/folbricht/desync
 
-go 1.24.0
+go 1.25.0
 
 require (
 	cloud.google.com/go/storage v1.30.1
diff --git a/nullseed.go b/nullseed.go
index 0cc45a9..92d363b 100644
--- a/nullseed.go
+++ b/nullseed.go
@@ -77,15 +77,6 @@ func (s *nullChunkSeed) RegenerateIndex(ctx context.Context, n int, attempt int,
 	panic("A nullseed can't be regenerated")
 }
 
-func (s *nullChunkSeed) SetInvalid(value bool) {
-	panic("A nullseed is never expected to be invalid")
-}
-
-func (s *nullChunkSeed) IsInvalid() bool {
-	// A nullseed is never expected to be invalid
-	return false
-}
-
 type nullChunkSection struct {
 	from, to   uint64
 	blockfile  *os.File
diff --git a/seed.go b/seed.go
index ffc49f3..b5fa88c 100644
--- a/seed.go
+++ b/seed.go
@@ -14,8 +14,6 @@ const DefaultBlockSize = 4096
 type Seed interface {
 	LongestMatchWith(chunks []IndexChunk) (int, SeedSegment)
 	RegenerateIndex(ctx context.Context, n int, attempt int, seedNumber int) error
-	SetInvalid(value bool)
-	IsInvalid() bool
 }
 
 // SeedSegment represents a matching range between a Seed and a file being
@@ -27,30 +25,3 @@ type SeedSegment interface {
 	Validate(file *os.File) error
 	WriteInto(dst *os.File, offset, end, blocksize uint64, isBlank bool) (copied uint64, cloned uint64, err error)
 }
-
-// IndexSegment represents a contiguous section of an index which is used when
-// assembling a file from seeds. first/last are positions in the index.
-type IndexSegment struct {
-	index       Index
-	first, last int
-}
-
-func (s IndexSegment) lengthChunks() int {
-	return s.last - s.first + 1
-}
-
-func (s IndexSegment) lengthBytes() uint64 {
-	return s.end() - s.start()
-}
-
-func (s IndexSegment) start() uint64 {
-	return s.index.Chunks[s.first].Start
-}
-
-func (s IndexSegment) end() uint64 {
-	return s.index.Chunks[s.last].Start + s.index.Chunks[s.last].Size
-}
-
-func (s IndexSegment) chunks() []IndexChunk {
-	return s.index.Chunks[s.first : s.last+1]
-}
diff --git a/selfseed.go b/selfseed.go
deleted file mode 100644
index 86a5220..0000000
--- a/selfseed.go
+++ /dev/null
@@ -1,93 +0,0 @@
-package desync
-
-import (
-	"context"
-	"sync"
-)
-
-// FileSeed is used to populate a contiguous seed during extraction in order
-// to copy/clone ranges that were written to the output file earlier. This is
-// to potentially dedup/reflink duplicate chunks or ranges of chunks within the
-// same file.
-type selfSeed struct {
-	file       string
-	index      Index
-	pos        map[ChunkID][]int
-	canReflink bool
-	written    int
-	mu         sync.RWMutex
-	cache      map[int]int
-}
-
-// newSelfSeed initializes a new seed based on the file being extracted
-func newSelfSeed(file string, index Index) (*selfSeed, error) {
-	s := selfSeed{
-		file:       file,
-		pos:        make(map[ChunkID][]int),
-		index:      index,
-		canReflink: CanClone(file, file),
-		cache:      make(map[int]int),
-	}
-	return &s, nil
-}
-
-// add records a new segment that's been written to the file. Since only contiguous
-// ranges of chunks are considered and writing happens concurrently, the segment
-// written here will not be usable until all earlier chunks have been written as
-// well.
-func (s *selfSeed) add(segment IndexSegment) {
-	s.mu.Lock()
-	defer s.mu.Unlock()
-
-	// Make a record of this segment in the cache since those could come in
-	// out-of-order
-	s.cache[segment.first] = segment.last + 1
-
-	// Advance pos until we find a chunk we don't yet have recorded while recording
-	// the chunk positions we do have in the position map used to find seed matches.
-	// Since it's guaranteed that the numbers are only increasing, we drop old numbers
-	// from the cache map to keep its size to a minimum and only store out-of-sequence
-	// numbers
-	for {
-		// See if we can advance the write pointer in the self-seed which requires
-		// consecutive chunks. If we don't have the next segment yet, just keep it
-		// in the cache until we do.
-		next, ok := s.cache[s.written]
-		if !ok {
-			break
-		}
-		// Record all chunks in this segment as written by adding them to the position map
-		for i := s.written; i < next; i++ {
-			chunk := s.index.Chunks[i]
-			s.pos[chunk.ID] = append(s.pos[chunk.ID], i)
-		}
-		delete(s.cache, s.written)
-		s.written = next
-	}
-}
-
-// getChunk returns a segment with the requested chunk ID. If selfSeed doesn't
-// have the requested chunk, nil will be returned.
-func (s *selfSeed) getChunk(id ChunkID) SeedSegment {
-	s.mu.RLock()
-	pos, ok := s.pos[id]
-	s.mu.RUnlock()
-	if !ok {
-		return nil
-	}
-	first := pos[0]
-	return newFileSeedSegment(s.file, s.index.Chunks[first:first+1], s.canReflink)
-}
-
-func (s *selfSeed) RegenerateIndex(ctx context.Context, n int, attempt int, seedNumber int) error {
-	panic("A selfSeed can't be regenerated")
-}
-
-func (s *selfSeed) SetInvalid(value bool) {
-	panic("A selfSeed is never expected to be invalid")
-}
-
-func (s *selfSeed) IsInvalid() bool {
-	// A selfSeed is never expected to be invalid
-	return false
-}
diff --git a/selfseed_test.go b/selfseed_test.go
deleted file mode 100644
index 93c38e8..0000000
--- a/selfseed_test.go
+++ /dev/null
@@ -1,136 +0,0 @@
-package desync
-
-import (
-	"context"
-	"crypto/md5"
-	"crypto/rand"
-	"os"
-	"testing"
-)
-
-func TestSelfSeed(t *testing.T) {
-	// Setup a temporary store
-	store := t.TempDir()
-
-	s, err := NewLocalStore(store, StoreOptions{})
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	// Build a number of fake chunks that can then be used in the test in any order
-	type rawChunk struct {
-		id   ChunkID
-		data []byte
-	}
-	size := 1024
-	numChunks := 10
-	chunks := make([]rawChunk, numChunks)
-
-	for i := range numChunks {
-		b := make([]byte, size)
-		rand.Read(b)
-		chunk := NewChunk(b)
-		if err = s.StoreChunk(chunk); err != nil {
-			t.Fatal(err)
-		}
-		chunks[i] = rawChunk{chunk.ID(), b}
-	}
-
-	// Define tests with files with different content, by building files out
-	// of sets of byte slices to create duplication or not between the target and
-	// its seeds. Also define a min/max of bytes that should be cloned (from the
-	// self-seed). That number can vary since even with 1 worker goroutine there
-	// another feeder goroutine which can influence timings/results a little.
-	tests := map[string]struct {
-		index     []int
-		minCloned int
-		maxCloned int
-	}{
-		"single chunk": {
-			index:     []int{0},
-			minCloned: 0,
-			maxCloned: 0,
-		},
-		"repeating single chunk": {
-			index:     []int{0, 0, 0, 0, 0},
-			minCloned: 3 * size,
-			maxCloned: 4 * size,
-		},
-		"repeating chunk sequence": {
-			index:     []int{0, 1, 2, 0, 1, 2, 2},
-			minCloned: 4 * size,
-			maxCloned: 4 * size,
-		},
-		"repeating chunk sequence mid file": {
-			index:     []int{1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3},
-			minCloned: 7 * size,
-			maxCloned: 7 * size,
-		},
-		"repeating chunk sequence reversed": {
-			index:     []int{0, 1, 2, 2, 1, 0},
-			minCloned: 2 * size,
-			maxCloned: 3 * size,
-		},
-		"non-repeating chunks": {
-			index:     []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
-			minCloned: 0,
-			maxCloned: 0,
-		},
-	}
-
-	for name, test := range tests {
-		t.Run(name, func(t *testing.T) {
-			// Build an index from the target chunks
-			var idx Index
-			var b []byte
-			for i, p := range test.index {
-				chunk := IndexChunk{
-					ID:    chunks[p].id,
-					Start: uint64(i * size),
-					Size:  uint64(size),
-				}
-				b = append(b, chunks[p].data...)
-				idx.Chunks = append(idx.Chunks, chunk)
-			}
-
-			// Calculate the expected checksum
-			sum := md5.Sum(b)
-
-			// Build a temp target file to extract into
-			dst, err := os.CreateTemp("", "dst")
-			if err != nil {
-				t.Fatal(err)
-			}
-			defer os.Remove(dst.Name())
-			defer dst.Close()
-
-			// Extract the file
-			stats, err := AssembleFile(context.Background(), dst.Name(), idx, s, nil,
-				AssembleOptions{1, InvalidSeedActionBailOut},
-			)
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			// Compare the checksums to that of the input data
-			b, err = os.ReadFile(dst.Name())
-			if err != nil {
-				t.Fatal(err)
-			}
-			outSum := md5.Sum(b)
-			if sum != outSum {
-				t.Fatal("checksum of extracted file doesn't match expected")
-			}
-
-			// Compare to the expected number of bytes copied or cloned from the seed
-			fromSeed := int(stats.BytesCopied + stats.BytesCloned)
-			if fromSeed < test.minCloned {
-				t.Fatalf("expected min %d bytes copied/cloned from self-seed, got %d", test.minCloned, fromSeed)
-			}
-			if fromSeed > test.maxCloned {
-				t.Fatalf("expected max %d bytes copied/cloned from self-seed, got %d", test.maxCloned, fromSeed)
-			}
-		})
-	}
-
-}
diff --git a/sequencer.go b/sequencer.go
deleted file mode 100644
index 1248a06..0000000
--- a/sequencer.go
+++ /dev/null
@@ -1,176 +0,0 @@
-package desync
-
-import (
-	"context"
-	"golang.org/x/sync/errgroup"
-	"os"
-)
-
-// SeedSequencer is used to find sequences of chunks from seed files when assembling
-// a file from an index. Using seeds reduces the need to download and decompress chunks
-// from chunk stores. It also enables the use of reflinking/cloning of sections of
-// files from a seed file where supported to reduce disk usage.
-type SeedSequencer struct {
-	seeds   []Seed
-	index   Index
-	current int
-}
-
-// SeedSegmentCandidate represent a single segment that we expect to use
-// in a Plan
-type SeedSegmentCandidate struct {
-	seed         Seed
-	source       SeedSegment
-	indexSegment IndexSegment
-}
-
-type Plan []SeedSegmentCandidate
-
-var MockValidate = false
-
-// NewSeedSequencer initializes a new sequencer from a number of seeds.
-func NewSeedSequencer(idx Index, src ...Seed) *SeedSequencer {
-	return &SeedSequencer{
-		seeds: src,
-		index: idx,
-	}
-}
-
-// Plan returns a new possible plan, representing an ordered list of
-// segments that can be used to re-assemble the requested file
-func (r *SeedSequencer) Plan() (plan Plan) {
-	for {
-		seed, segment, source, done := r.Next()
-		plan = append(plan, SeedSegmentCandidate{seed, source, segment})
-		if done {
-			break
-		}
-	}
-	return plan
-}
-
-// Next returns a sequence of index chunks (from the target index) and the
-// longest matching segment from one of the seeds. If source is nil, no
-// match was found in the seeds and the chunk needs to be retrieved from a
-// store. If done is true, the sequencer is complete.
-func (r *SeedSequencer) Next() (seed Seed, segment IndexSegment, source SeedSegment, done bool) {
-	var (
-		max     uint64
-		advance = 1
-	)
-	for _, s := range r.seeds {
-		n, m := s.LongestMatchWith(r.index.Chunks[r.current:])
-		if n > 0 && m.Size() > max {
-			seed = s
-			source = m
-			advance = n
-			max = m.Size()
-		}
-	}
-
-	segment = IndexSegment{index: r.index, first: r.current, last: r.current + advance - 1}
-	r.current += advance
-	return seed, segment, source, r.current >= len(r.index.Chunks)
-}
-
-// Rewind resets the current target index to the beginning.
-func (r *SeedSequencer) Rewind() {
-	r.current = 0
-}
-
-// isFileSeed returns true if this segment is pointing to a fileSeed
-func (s SeedSegmentCandidate) isFileSeed() bool {
-	// We expect an empty filename when using nullSeeds
-	return s.source != nil && s.source.FileName() != ""
-}
-
-// RegenerateInvalidSeeds regenerates the index to match the unexpected seed content
-func (r *SeedSequencer) RegenerateInvalidSeeds(ctx context.Context, n int, attempt int) error {
-	seedNumber := 1
-	for _, s := range r.seeds {
-		if s.IsInvalid() {
-			if err := s.RegenerateIndex(ctx, n, attempt, seedNumber); err != nil {
-				return err
-			}
-			seedNumber += 1
-		}
-	}
-	return nil
-}
-
-// Validate validates a proposed plan by checking if all the chosen chunks
-// are correctly provided from the seeds. In case a seed has invalid chunks, the
-// entire seed is marked as invalid and an error is returned.
-func (p Plan) Validate(ctx context.Context, n int, pb ProgressBar) (err error) {
-	type Job struct {
-		candidate SeedSegmentCandidate
-		file      *os.File
-	}
-	var (
-		in      = make(chan Job)
-		fileMap = make(map[string]*os.File)
-	)
-	if MockValidate {
-		// This is used in the automated tests to mock a plan that is valid
-		return nil
-	}
-	length := 0
-	for _, s := range p {
-		if !s.isFileSeed() {
-			continue
-		}
-		length += s.indexSegment.lengthChunks()
-	}
-	pb.SetTotal(length)
-	pb.Start()
-	defer pb.Finish()
-	// Share a single file descriptor per seed for all the goroutines
-	for _, s := range p {
-		if !s.isFileSeed() {
-			continue
-		}
-		name := s.source.FileName()
-		if _, present := fileMap[name]; present {
-			continue
-		} else {
-			file, err := os.Open(name)
-			if err != nil {
-				// We were not able to open the seed. Mark it as invalid and return
-				s.seed.SetInvalid(true)
-				return err
-			}
-			fileMap[name] = file
-			defer file.Close()
-		}
-	}
-	g, ctx := errgroup.WithContext(ctx)
-	// Concurrently validate all the chunks in this plan
-	for range n {
-		g.Go(func() error {
-			for job := range in {
-				if err := job.candidate.source.Validate(job.file); err != nil {
-					job.candidate.seed.SetInvalid(true)
-					return err
-				}
-				pb.Add(job.candidate.indexSegment.lengthChunks())
-			}
-			return nil
-		})
-	}
-
-loop:
-	for _, s := range p {
-		if !s.isFileSeed() {
-			// This is not a fileSeed, we have nothing to validate
-			continue
-		}
-		select {
-		case <-ctx.Done():
-			break loop
-		case in <- Job{s, fileMap[s.source.FileName()]}:
-		}
-	}
-	close(in)
-
-	return g.Wait()
-}

From 757ae03508b352921ad0d015cc325d26e72a18a0 Mon Sep 17 00:00:00 2001
From: folbrich <frank.olbricht@gmail.com>
Date: Sat, 14 Mar 2026 15:33:38 +0100
Subject: [PATCH 02/11] Run validation concurrently

---
 assemble-plan.go | 31 +++++++++++++++++++++++++++----
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/assemble-plan.go b/assemble-plan.go
index 1679980..469cdab 100644
--- a/assemble-plan.go
+++ b/assemble-plan.go
@@ -4,6 +4,7 @@ import (
 	"errors"
 	"fmt"
 	"os"
+	"sync"
 
 	"golang.org/x/sync/errgroup"
 )
@@ -93,8 +94,14 @@ func (p *AssemblePlan) Close() {
 // Validate checks that all file seed placements still match their underlying
 // data. Returns a SeedInvalid error if a seed file was modified after its
 // index was created.
-// TODO: run the verification steps in parallel.
 func (p *AssemblePlan) Validate() error {
+	// Phase 1 — Sequential: collect unique fileSeedSource placements, open
+	// their backing files, and build a list of items to validate.
+	type validateItem struct {
+		fs   *fileSeedSource
+		file *os.File
+	}
+
 	seen := make(map[*placement]struct{})
 	fileMap := make(map[string]*os.File)
 	defer func() {
@@ -106,6 +113,7 @@ func (p *AssemblePlan) Validate() error {
 	invalidSeeds := make(map[Seed]error)
 	failedFiles := make(map[string]struct{})
 
+	var items []validateItem
 	for _, pl := range p.placements {
 		if _, ok := seen[pl]; ok {
 			continue
@@ -136,11 +144,26 @@ func (p *AssemblePlan) Validate() error {
 			fileMap[fs.srcFile] = f
 		}
 
-		if err := fs.segment.Validate(fileMap[fs.srcFile]); err != nil {
-			invalidSeeds[fs.seed] = err
-		}
+		items = append(items, validateItem{fs: fs, file: fileMap[fs.srcFile]})
+	}
+
+	// Phase 2 — Concurrent: validate each segment in parallel.
+	var mu sync.Mutex
+	var g errgroup.Group
+	g.SetLimit(p.concurrency)
+	for _, item := range items {
+		g.Go(func() error {
+			if err := item.fs.segment.Validate(item.file); err != nil {
+				mu.Lock()
+				invalidSeeds[item.fs.seed] = err
+				mu.Unlock()
+			}
+			return nil
+		})
 	}
+	g.Wait()
 
+	// Phase 3 — Sequential: build the error result.
 	if len(invalidSeeds) > 0 {
 		seeds := make([]Seed, 0, len(invalidSeeds))
 		errs := make([]error, 0, len(invalidSeeds))

From dc2f0303798375a696d6da5aecca80aa7dd0e89f Mon Sep 17 00:00:00 2001
From: folbrich <frank.olbricht@gmail.com>
Date: Wed, 18 Mar 2026 13:18:59 +0100
Subject: [PATCH 03/11] Simplify seed logic and improve plan performance

---
 assemble-plan.go     | 69 +++++++++++++++++++++++++------------------
 assemble-selfseed.go | 50 +++++++++++++------------------
 fileseed.go          | 70 ++++++++++++++++++++++++++++----------------
 nullseed.go          | 24 +++++++--------
 seed.go              |  3 +-
 5 files changed, 120 insertions(+), 96 deletions(-)

diff --git a/assemble-plan.go b/assemble-plan.go
index 469cdab..f4db61d 100644
--- a/assemble-plan.go
+++ b/assemble-plan.go
@@ -235,7 +235,7 @@ func (p *AssemblePlan) generate() error {
 			continue // Already filled
 		}
 
-		start, n := p.selfSeed.longestMatchFrom(p.idx.Chunks, i)
+		start, n := p.selfSeed.LongestMatchFrom(p.idx.Chunks, i)
 		if n < 1 {
 			continue
 		}
@@ -263,50 +263,63 @@ func (p *AssemblePlan) generate() error {
 		i-- // compensate for the outer loop's i++
 
 		// Update the step with the potentially adjusted length
-		pl.source = p.selfSeed.getSegment(start, to, size)
+		seedOffset := p.idx.Chunks[start].Start
+		last := p.idx.Chunks[start+size-1]
+		length := last.Start + last.Size - seedOffset
+		offset := p.idx.Chunks[to].Start
+
+		pl.source = p.selfSeed.GetSegment(seedOffset, offset, length)
 		pl.dependsOnStart = start
 		pl.dependsOnSize = size
 	}
 
 	// Check file seeds for matches in unfilled positions.
 	for _, seed := range p.seeds {
-		for i := 0; i < len(p.idx.Chunks); {
+		for i := 0; i < len(p.idx.Chunks); i++ {
 			if p.placements[i] != nil {
-				i++
 				continue
 			}
 
-			// Count consecutive unfilled positions to bound the match.
-			available := 0
-			for j := i; j < len(p.idx.Chunks) && p.placements[j] == nil; j++ {
-				available++
-			}
-
-			n, segment := seed.LongestMatchWith(p.idx.Chunks[i : i+available])
+			seedOffset, n := seed.LongestMatchFrom(p.idx.Chunks, i)
 			if n < 1 {
-				i++
 				continue
 			}
 
-			offset := p.idx.Chunks[i].Start
-			last := p.idx.Chunks[i+n-1]
-			length := last.Start + last.Size - offset
-
-			pl := &placement{
-				source: &fileSeedSource{
-					segment: segment,
-					seed:    seed,
-					srcFile: segment.FileName(),
-					offset:  offset,
-					length:  length,
-					isBlank: p.targetIsBlank,
-				},
+			// Repeat the same placement for all chunks in the sequence.
+			// We dedup sequences later.
+			pl := &placement{}
+
+			// We can use up to n chunks from the seed, find out how much
+			// we can actually use without overwriting any existing placements
+			// in the list.
+			var (
+				to   = i
+				size int
+			)
+			for range n {
+				if p.placements[i] != nil {
+					break
+				}
+				p.placements[i] = pl
+				i++
+				size++
 			}
+			i-- // compensate for the outer loop's i++
 
-			for j := i; j < i+n; j++ {
-				p.placements[j] = pl
+			// Update the step with the potentially adjusted length
+			offset := p.idx.Chunks[to].Start
+			last := p.idx.Chunks[to+size-1]
+			length := last.Start + last.Size - offset
+			segment := seed.GetSegment(seedOffset, length)
+
+			pl.source = &fileSeedSource{
+				segment: segment,
+				seed:    seed,
+				srcFile: segment.FileName(),
+				offset:  offset,
+				length:  length,
+				isBlank: p.targetIsBlank,
 			}
-			i += n
 		}
 	}
 
diff --git a/assemble-selfseed.go b/assemble-selfseed.go
index d86ce2d..7005b68 100644
--- a/assemble-selfseed.go
+++ b/assemble-selfseed.go
@@ -51,9 +51,9 @@ func (s *selfSeed) Close() {
 	s.readers = nil
 }
 
-// longestMatchFrom returns the longest sequence of matching chunks after a
+// LongestMatchFrom returns the longest sequence of matching chunks after a
 // given starting position.
-func (s *selfSeed) longestMatchFrom(chunks []IndexChunk, startPos int) (int, int) {
+func (s *selfSeed) LongestMatchFrom(chunks []IndexChunk, startPos int) (int, int) {
 	if len(chunks) <= startPos || len(s.index.Chunks) == 0 {
 		return 0, 0
 	}
@@ -119,56 +119,46 @@ func (s *selfSeed) maxMatchFrom(chunks []IndexChunk, p int, limit int) (int, int
 	return p, dp - p
 }
 
-func (s *selfSeed) getSegment(from, to, length int) *selfSeedSegment {
+func (s *selfSeed) GetSegment(srcOffset, dstOffset, size uint64) *selfSeedSegment {
 	return &selfSeedSegment{
-		seed:   s,
-		from:   from,
-		to:     to,
-		length: length,
+		seed:      s,
+		srcOffset: srcOffset,
+		dstOffset: dstOffset,
+		size:      size,
 	}
 }
 
 type selfSeedSegment struct {
-	seed   *selfSeed
-	from   int // Index of the first chunk to copy from
-	to     int // Index of the first chunk to copy to
-	length int // Number of chunks to copy
+	seed      *selfSeed
+	srcOffset uint64
+	dstOffset uint64
+	size      uint64
 }
 
 func (s *selfSeedSegment) Execute(f *os.File) (copied uint64, cloned uint64, err error) {
-	srcStart := s.seed.index.Chunks[s.from].Start
-	dstStart := s.seed.index.Chunks[s.to].Start
-	lastFrom := s.from + s.length - 1
-	length := s.seed.index.Chunks[lastFrom].Start + s.seed.index.Chunks[lastFrom].Size - srcStart
-
 	blocksize := blocksizeOfFile(f.Name())
 
 	// Use reflinks if supported and blocks are aligned
-	if s.seed.canReflink && srcStart%blocksize == dstStart%blocksize {
-		return 0, length, CloneRange(f, f, srcStart, length, dstStart)
+	if s.seed.canReflink && s.srcOffset%blocksize == s.dstOffset%blocksize {
+		return 0, s.size, CloneRange(f, f, s.srcOffset, s.size, s.dstOffset)
 	}
 
 	// Borrow a read handle from the pool
 	src := <-s.seed.readers
 	defer func() { s.seed.readers <- src }()
 
-	if _, err := src.Seek(int64(srcStart), io.SeekStart); err != nil {
+	if _, err := src.Seek(int64(s.srcOffset), io.SeekStart); err != nil {
 		return 0, 0, err
 	}
-	if _, err := f.Seek(int64(dstStart), io.SeekStart); err != nil {
+	if _, err := f.Seek(int64(s.dstOffset), io.SeekStart); err != nil {
 		return 0, 0, err
 	}
-	_, err = io.CopyBuffer(f, io.LimitReader(src, int64(length)), make([]byte, 64*1024))
-	return length, 0, err
+	_, err = io.CopyBuffer(f, io.LimitReader(src, int64(s.size)), make([]byte, 64*1024))
+	return s.size, 0, err
 }
 
 func (s *selfSeedSegment) String() string {
-	fromStart := s.seed.index.Chunks[s.from].Start
-	toStart := s.seed.index.Chunks[s.to].Start
-	lastFromChunkIndex := s.from + s.length - 1
-	lastToChunkIndex := s.to + s.length - 1
-	fromEnd := s.seed.index.Chunks[lastFromChunkIndex].Start + s.seed.index.Chunks[lastFromChunkIndex].Size
-	toEnd := s.seed.index.Chunks[lastToChunkIndex].Start + s.seed.index.Chunks[lastToChunkIndex].Size
-
-	return fmt.Sprintf("SelfSeed: Copy [%d:%d] to [%d:%d]", fromStart, fromEnd, toStart, toEnd)
+	return fmt.Sprintf("SelfSeed: Copy [%d:%d] to [%d:%d]",
+		s.srcOffset, s.srcOffset+s.size,
+		s.dstOffset, s.dstOffset+s.size)
 }
diff --git a/fileseed.go b/fileseed.go
index 10791ab..bb2408f 100644
--- a/fileseed.go
+++ b/fileseed.go
@@ -5,6 +5,7 @@ import (
 	"fmt"
 	"io"
 	"os"
+	"sort"
 )
 
 // FileSeed is used to copy or clone blocks from an existing index+blob during
@@ -30,24 +31,24 @@ func NewIndexSeed(dstFile string, srcFile string, index Index) (*FileSeed, error
 	return &s, nil
 }
 
-// LongestMatchWith returns the longest sequence of chunks anywhere in Source
-// that match `chunks` starting at chunks[0], limiting the maximum number of chunks
-// if reflinks are not supported. If there is no match, it returns a length of zero
-// and a nil SeedSegment.
-func (s *FileSeed) LongestMatchWith(chunks []IndexChunk) (int, SeedSegment) {
-	if len(chunks) == 0 || len(s.index.Chunks) == 0 {
-		return 0, nil
+// LongestMatchFrom returns the longest sequence of chunks anywhere in the seed
+// that match chunks starting at chunks[startPos]. It returns the byte offset
+// of the match in the seed and the number of matching chunks. Returns (0, 0)
+// if there is no match.
+func (s *FileSeed) LongestMatchFrom(chunks []IndexChunk, startPos int) (uint64, int) {
+	if startPos >= len(chunks) || len(s.index.Chunks) == 0 {
+		return 0, 0
 	}
-	pos, ok := s.pos[chunks[0].ID]
+	pos, ok := s.pos[chunks[startPos].ID]
 	if !ok {
-		return 0, nil
+		return 0, 0
 	}
-	// From every position of chunks[0] in the source, find a slice of
-	// matching chunks. Then return the longest of those slices.
+	// From every position of chunks[startPos] in the source, find a run of
+	// matching chunks. Then return the longest of those runs.
 	var (
-		match []IndexChunk
-		max   int
-		limit int
+		bestSeedPos int
+		maxLen      int
+		limit       int
 	)
 	if !s.canReflink {
 		// Limit the maximum number of chunks, in a single sequence, to avoid
@@ -57,16 +58,34 @@ func (s *FileSeed) LongestMatchWith(chunks []IndexChunk) (int, SeedSegment) {
 		limit = 100
 	}
 	for _, p := range pos {
-		m := s.maxMatchFrom(chunks, p, limit)
-		if len(m) > max {
-			match = m
-			max = len(m)
+		seedPos, n := s.maxMatchFrom(chunks[startPos:], p, limit)
+		if n > maxLen {
+			bestSeedPos = seedPos
+			maxLen = n
 		}
-		if limit != 0 && limit == max {
+		if limit != 0 && limit == maxLen {
 			break
 		}
 	}
-	return max, newFileSeedSegment(s.srcFile, match, s.canReflink)
+	if maxLen == 0 {
+		return 0, 0
+	}
+	return s.index.Chunks[bestSeedPos].Start, maxLen
+}
+
+// GetSegment constructs a SeedSegment for a matched range identified by its
+// byte offset and size in the seed.
+func (s *FileSeed) GetSegment(offset, size uint64) SeedSegment {
+	i := sort.Search(len(s.index.Chunks), func(j int) bool {
+		return s.index.Chunks[j].Start >= offset
+	})
+	var covered uint64
+	end := i
+	for end < len(s.index.Chunks) && covered < size {
+		covered += s.index.Chunks[end].Size
+		end++
+	}
+	return newFileSeedSegment(s.srcFile, s.index.Chunks[i:end], s.canReflink)
 }
 
 func (s *FileSeed) RegenerateIndex(ctx context.Context, n int, attempt int, seedNumber int) error {
@@ -86,11 +105,12 @@ func (s *FileSeed) RegenerateIndex(ctx context.Context, n int, attempt int, seed
 	return nil
 }
 
-// Returns a slice of chunks from the seed. Compares chunks from position 0
-// with seed chunks starting at p. A "limit" value of zero means that there is no limit.
-func (s *FileSeed) maxMatchFrom(chunks []IndexChunk, p int, limit int) []IndexChunk {
+// maxMatchFrom compares chunks from position 0 with seed chunks starting at p.
+// Returns (p, count) where p is the seed start and count is the number of
+// matching chunks. A "limit" value of zero means that there is no limit.
+func (s *FileSeed) maxMatchFrom(chunks []IndexChunk, p int, limit int) (int, int) {
 	if len(chunks) == 0 {
-		return nil
+		return 0, 0
 	}
 	var (
 		sp int
@@ -109,7 +129,7 @@ func (s *FileSeed) maxMatchFrom(chunks []IndexChunk, p int, limit int) []IndexCh
 		dp++
 		sp++
 	}
-	return s.index.Chunks[p:dp]
+	return p, dp - p
 }
 
 type fileSeedSegment struct {
diff --git a/nullseed.go b/nullseed.go
index 92d363b..464b19d 100644
--- a/nullseed.go
+++ b/nullseed.go
@@ -42,9 +42,9 @@ func (s *nullChunkSeed) close() error {
 	return nil
 }
 
-func (s *nullChunkSeed) LongestMatchWith(chunks []IndexChunk) (int, SeedSegment) {
-	if len(chunks) == 0 {
-		return 0, nil
+func (s *nullChunkSeed) LongestMatchFrom(chunks []IndexChunk, startPos int) (uint64, int) {
+	if startPos >= len(chunks) {
+		return 0, 0
 	}
 	var (
 		n     int
@@ -53,7 +53,7 @@ func (s *nullChunkSeed) LongestMatchWith(chunks []IndexChunk) (int, SeedSegment)
 	if !s.canReflink {
 		limit = 100
 	}
-	for _, c := range chunks {
+	for _, c := range chunks[startPos:] {
 		if limit != 0 && limit == n {
 			break
 		}
@@ -62,12 +62,12 @@ func (s *nullChunkSeed) LongestMatchWith(chunks []IndexChunk) (int, SeedSegment)
 		}
 		n++
 	}
-	if n == 0 {
-		return 0, nil
-	}
-	return n, &nullChunkSection{
-		from:       chunks[0].Start,
-		to:         chunks[n-1].Start + chunks[n-1].Size,
+	return 0, n
+}
+
+func (s *nullChunkSeed) GetSegment(offset, size uint64) SeedSegment {
+	return &nullChunkSection{
+		size:       size,
 		blockfile:  s.blockfile,
 		canReflink: s.canReflink,
 	}
@@ -78,7 +78,7 @@ func (s *nullChunkSeed) RegenerateIndex(ctx context.Context, n int, attempt int,
 }
 
 type nullChunkSection struct {
-	from, to   uint64
+	size       uint64
 	blockfile  *os.File
 	canReflink bool
 }
@@ -92,7 +92,7 @@ func (s *nullChunkSection) FileName() string {
 	return ""
 }
 
-func (s *nullChunkSection) Size() uint64 { return s.to - s.from }
+func (s *nullChunkSection) Size() uint64 { return s.size }
 
 func (s *nullChunkSection) WriteInto(dst *os.File, offset, length, blocksize uint64, isBlank bool) (uint64, uint64, error) {
 	if length != s.Size() {
diff --git a/seed.go b/seed.go
index b5fa88c..2692e87 100644
--- a/seed.go
+++ b/seed.go
@@ -12,7 +12,8 @@ const DefaultBlockSize = 4096
 // another index+blob that present on disk already and is used to copy or clone
 // existing chunks or blocks into the target from.
 type Seed interface {
-	LongestMatchWith(chunks []IndexChunk) (int, SeedSegment)
+	LongestMatchFrom(chunks []IndexChunk, startPos int) (uint64, int)
+	GetSegment(offset, size uint64) SeedSegment
 	RegenerateIndex(ctx context.Context, n int, attempt int, seedNumber int) error
 }
 

From f2ddded52d2a90f016f7c7de7976911a22989f9f Mon Sep 17 00:00:00 2001
From: folbrich <frank.olbricht@gmail.com>
Date: Wed, 18 Mar 2026 13:34:14 +0100
Subject: [PATCH 04/11] reorg

---
 assemble-fileseed.go | 233 +++++++++++++++++++++++++++++++++++++++++
 fileseed.go          | 239 -------------------------------------------
 2 files changed, 233 insertions(+), 239 deletions(-)
 delete mode 100644 fileseed.go

diff --git a/assemble-fileseed.go b/assemble-fileseed.go
index 6c193d8..149cd5d 100644
--- a/assemble-fileseed.go
+++ b/assemble-fileseed.go
@@ -1,10 +1,243 @@
 package desync
 
 import (
+	"context"
 	"fmt"
+	"io"
 	"os"
+	"sort"
 )
 
+// FileSeed is used to copy or clone blocks from an existing index+blob during
+// file extraction.
+type FileSeed struct {
+	srcFile    string
+	index      Index
+	pos        map[ChunkID][]int
+	canReflink bool
+}
+
+// NewIndexSeed initializes a new seed that uses an existing index and its blob
+func NewIndexSeed(dstFile string, srcFile string, index Index) (*FileSeed, error) {
+	s := FileSeed{
+		srcFile:    srcFile,
+		pos:        make(map[ChunkID][]int),
+		index:      index,
+		canReflink: CanClone(dstFile, srcFile),
+	}
+	for i, c := range s.index.Chunks {
+		s.pos[c.ID] = append(s.pos[c.ID], i)
+	}
+	return &s, nil
+}
+
+// LongestMatchFrom returns the longest sequence of chunks anywhere in the seed
+// that match chunks starting at chunks[startPos]. It returns the byte offset
+// of the match in the seed and the number of matching chunks. Returns (0, 0)
+// if there is no match.
+func (s *FileSeed) LongestMatchFrom(chunks []IndexChunk, startPos int) (uint64, int) {
+	if startPos >= len(chunks) || len(s.index.Chunks) == 0 {
+		return 0, 0
+	}
+	pos, ok := s.pos[chunks[startPos].ID]
+	if !ok {
+		return 0, 0
+	}
+	// From every position of chunks[startPos] in the source, find a run of
+	// matching chunks. Then return the longest of those runs.
+	var (
+		bestSeedPos int
+		maxLen      int
+		limit       int
+	)
+	if !s.canReflink {
+		// Limit the maximum number of chunks, in a single sequence, to avoid
+		// having jobs that are too unbalanced.
+		// However, if reflinks are supported, we don't limit it to make it faster and
+		// take less space.
+		limit = 100
+	}
+	for _, p := range pos {
+		seedPos, n := s.maxMatchFrom(chunks[startPos:], p, limit)
+		if n > maxLen {
+			bestSeedPos = seedPos
+			maxLen = n
+		}
+		if limit != 0 && limit == maxLen {
+			break
+		}
+	}
+	if maxLen == 0 {
+		return 0, 0
+	}
+	return s.index.Chunks[bestSeedPos].Start, maxLen
+}
+
+// GetSegment constructs a SeedSegment for a matched range identified by its
+// byte offset and size in the seed.
+func (s *FileSeed) GetSegment(offset, size uint64) SeedSegment {
+	i := sort.Search(len(s.index.Chunks), func(j int) bool {
+		return s.index.Chunks[j].Start >= offset
+	})
+	var covered uint64
+	end := i
+	for end < len(s.index.Chunks) && covered < size {
+		covered += s.index.Chunks[end].Size
+		end++
+	}
+	return newFileSeedSegment(s.srcFile, s.index.Chunks[i:end], s.canReflink)
+}
+
+func (s *FileSeed) RegenerateIndex(ctx context.Context, n int, attempt int, seedNumber int) error {
+	chunkingPrefix := fmt.Sprintf("Attempt %d: Chunking Seed %d ", attempt, seedNumber)
+	index, _, err := IndexFromFile(ctx, s.srcFile, n, s.index.Index.ChunkSizeMin, s.index.Index.ChunkSizeAvg,
+		s.index.Index.ChunkSizeMax, NewProgressBar(chunkingPrefix))
+	if err != nil {
+		return err
+	}
+
+	s.index = index
+	s.pos = make(map[ChunkID][]int, len(s.index.Chunks))
+	for i, c := range s.index.Chunks {
+		s.pos[c.ID] = append(s.pos[c.ID], i)
+	}
+
+	return nil
+}
+
+// maxMatchFrom compares chunks from position 0 with seed chunks starting at p.
+// Returns (p, count) where p is the seed start and count is the number of
+// matching chunks. A "limit" value of zero means that there is no limit.
+func (s *FileSeed) maxMatchFrom(chunks []IndexChunk, p int, limit int) (int, int) {
+	if len(chunks) == 0 {
+		return 0, 0
+	}
+	var (
+		sp int
+		dp = p
+	)
+	for {
+		if limit != 0 && sp == limit {
+			break
+		}
+		if dp >= len(s.index.Chunks) || sp >= len(chunks) {
+			break
+		}
+		if chunks[sp].ID != s.index.Chunks[dp].ID {
+			break
+		}
+		dp++
+		sp++
+	}
+	return p, dp - p
+}
+
+type fileSeedSegment struct {
+	file           string
+	chunks         []IndexChunk
+	canReflink     bool
+	needValidation bool
+}
+
+func newFileSeedSegment(file string, chunks []IndexChunk, canReflink bool) *fileSeedSegment {
+	return &fileSeedSegment{
+		canReflink: canReflink,
+		file:       file,
+		chunks:     chunks,
+	}
+}
+
+func (s *fileSeedSegment) FileName() string {
+	return s.file
+}
+
+func (s *fileSeedSegment) Size() uint64 {
+	if len(s.chunks) == 0 {
+		return 0
+	}
+	last := s.chunks[len(s.chunks)-1]
+	return last.Start + last.Size - s.chunks[0].Start
+}
+
+func (s *fileSeedSegment) WriteInto(dst *os.File, offset, length, blocksize uint64, isBlank bool) (uint64, uint64, error) {
+	if length != s.Size() {
+		return 0, 0, fmt.Errorf("unable to copy %d bytes from %s to %s : wrong size", length, s.file, dst.Name())
+	}
+	src, err := os.Open(s.file)
+	if err != nil {
+		return 0, 0, err
+	}
+	defer src.Close()
+
+	// Do a straight copy if reflinks are not supported or blocks aren't aligned
+	if !s.canReflink || s.chunks[0].Start%blocksize != offset%blocksize {
+		return s.copy(dst, src, s.chunks[0].Start, length, offset)
+	}
+	return s.clone(dst, src, s.chunks[0].Start, length, offset, blocksize)
+}
+
+// Validate compares all chunks in this slice of the seed index to the underlying data
+// and fails if they don't match.
+func (s *fileSeedSegment) Validate(file *os.File) error {
+	for _, c := range s.chunks {
+		b := make([]byte, c.Size)
+		if _, err := file.ReadAt(b, int64(c.Start)); err != nil {
+			return err
+		}
+		sum := Digest.Sum(b)
+		if sum != c.ID {
+			return fmt.Errorf("seed index for %s doesn't match its data", s.file)
+		}
+	}
+	return nil
+}
+
+// Performs a plain copy of everything in the seed to the target, not cloning
+// of blocks.
+func (s *fileSeedSegment) copy(dst, src *os.File, srcOffset, length, dstOffset uint64) (uint64, uint64, error) {
+	if _, err := dst.Seek(int64(dstOffset), os.SEEK_SET); err != nil {
+		return 0, 0, err
+	}
+	if _, err := src.Seek(int64(srcOffset), os.SEEK_SET); err != nil {
+		return 0, 0, err
+	}
+
+	// Copy using a fixed buffer. Using io.Copy() with a LimitReader will make it
+	// create a buffer matching N of the LimitReader which can be too large
+	copied, err := io.CopyBuffer(dst, io.LimitReader(src, int64(length)), make([]byte, 64*1024))
+	return uint64(copied), 0, err
+}
+
+// Reflink the overlapping blocks in the two ranges and copy the bit before and
+// after the blocks.
+func (s *fileSeedSegment) clone(dst, src *os.File, srcOffset, srcLength, dstOffset, blocksize uint64) (uint64, uint64, error) {
+	if srcOffset%blocksize != dstOffset%blocksize {
+		return 0, 0, fmt.Errorf("reflink ranges not aligned between %s and %s", src.Name(), dst.Name())
+	}
+
+	srcAlignStart := (srcOffset/blocksize + 1) * blocksize
+	srcAlignEnd := (srcOffset + srcLength) / blocksize * blocksize
+	dstAlignStart := (dstOffset/blocksize + 1) * blocksize
+	alignLength := srcAlignEnd - srcAlignStart
+	dstAlignEnd := dstAlignStart + alignLength
+
+	// fill the area before the first aligned block
+	var copied uint64
+	c1, _, err := s.copy(dst, src, srcOffset, srcAlignStart-srcOffset, dstOffset)
+	if err != nil {
+		return c1, 0, err
+	}
+	copied += c1
+	// fill the area after the last aligned block
+	c2, _, err := s.copy(dst, src, srcAlignEnd, srcOffset+srcLength-srcAlignEnd, dstAlignEnd)
+	if err != nil {
+		return copied + c2, 0, err
+	}
+	copied += c2
+	// close the aligned blocks
+	return copied, alignLength, CloneRange(dst, src, srcAlignStart, alignLength, dstAlignStart)
+}
+
 type fileSeedSource struct {
 	segment SeedSegment
 	seed    Seed
diff --git a/fileseed.go b/fileseed.go
deleted file mode 100644
index bb2408f..0000000
--- a/fileseed.go
+++ /dev/null
@@ -1,239 +0,0 @@
-package desync
-
-import (
-	"context"
-	"fmt"
-	"io"
-	"os"
-	"sort"
-)
-
-// FileSeed is used to copy or clone blocks from an existing index+blob during
-// file extraction.
-type FileSeed struct {
-	srcFile    string
-	index      Index
-	pos        map[ChunkID][]int
-	canReflink bool
-}
-
-// NewIndexSeed initializes a new seed that uses an existing index and its blob
-func NewIndexSeed(dstFile string, srcFile string, index Index) (*FileSeed, error) {
-	s := FileSeed{
-		srcFile:    srcFile,
-		pos:        make(map[ChunkID][]int),
-		index:      index,
-		canReflink: CanClone(dstFile, srcFile),
-	}
-	for i, c := range s.index.Chunks {
-		s.pos[c.ID] = append(s.pos[c.ID], i)
-	}
-	return &s, nil
-}
-
-// LongestMatchFrom returns the longest sequence of chunks anywhere in the seed
-// that match chunks starting at chunks[startPos]. It returns the byte offset
-// of the match in the seed and the number of matching chunks. Returns (0, 0)
-// if there is no match.
-func (s *FileSeed) LongestMatchFrom(chunks []IndexChunk, startPos int) (uint64, int) {
-	if startPos >= len(chunks) || len(s.index.Chunks) == 0 {
-		return 0, 0
-	}
-	pos, ok := s.pos[chunks[startPos].ID]
-	if !ok {
-		return 0, 0
-	}
-	// From every position of chunks[startPos] in the source, find a run of
-	// matching chunks. Then return the longest of those runs.
-	var (
-		bestSeedPos int
-		maxLen      int
-		limit       int
-	)
-	if !s.canReflink {
-		// Limit the maximum number of chunks, in a single sequence, to avoid
-		// having jobs that are too unbalanced.
-		// However, if reflinks are supported, we don't limit it to make it faster and
-		// take less space.
-		limit = 100
-	}
-	for _, p := range pos {
-		seedPos, n := s.maxMatchFrom(chunks[startPos:], p, limit)
-		if n > maxLen {
-			bestSeedPos = seedPos
-			maxLen = n
-		}
-		if limit != 0 && limit == maxLen {
-			break
-		}
-	}
-	if maxLen == 0 {
-		return 0, 0
-	}
-	return s.index.Chunks[bestSeedPos].Start, maxLen
-}
-
-// GetSegment constructs a SeedSegment for a matched range identified by its
-// byte offset and size in the seed.
-func (s *FileSeed) GetSegment(offset, size uint64) SeedSegment {
-	i := sort.Search(len(s.index.Chunks), func(j int) bool {
-		return s.index.Chunks[j].Start >= offset
-	})
-	var covered uint64
-	end := i
-	for end < len(s.index.Chunks) && covered < size {
-		covered += s.index.Chunks[end].Size
-		end++
-	}
-	return newFileSeedSegment(s.srcFile, s.index.Chunks[i:end], s.canReflink)
-}
-
-func (s *FileSeed) RegenerateIndex(ctx context.Context, n int, attempt int, seedNumber int) error {
-	chunkingPrefix := fmt.Sprintf("Attempt %d: Chunking Seed %d ", attempt, seedNumber)
-	index, _, err := IndexFromFile(ctx, s.srcFile, n, s.index.Index.ChunkSizeMin, s.index.Index.ChunkSizeAvg,
-		s.index.Index.ChunkSizeMax, NewProgressBar(chunkingPrefix))
-	if err != nil {
-		return err
-	}
-
-	s.index = index
-	s.pos = make(map[ChunkID][]int, len(s.index.Chunks))
-	for i, c := range s.index.Chunks {
-		s.pos[c.ID] = append(s.pos[c.ID], i)
-	}
-
-	return nil
-}
-
-// maxMatchFrom compares chunks from position 0 with seed chunks starting at p.
-// Returns (p, count) where p is the seed start and count is the number of
-// matching chunks. A "limit" value of zero means that there is no limit.
-func (s *FileSeed) maxMatchFrom(chunks []IndexChunk, p int, limit int) (int, int) {
-	if len(chunks) == 0 {
-		return 0, 0
-	}
-	var (
-		sp int
-		dp = p
-	)
-	for {
-		if limit != 0 && sp == limit {
-			break
-		}
-		if dp >= len(s.index.Chunks) || sp >= len(chunks) {
-			break
-		}
-		if chunks[sp].ID != s.index.Chunks[dp].ID {
-			break
-		}
-		dp++
-		sp++
-	}
-	return p, dp - p
-}
-
-type fileSeedSegment struct {
-	file           string
-	chunks         []IndexChunk
-	canReflink     bool
-	needValidation bool
-}
-
-func newFileSeedSegment(file string, chunks []IndexChunk, canReflink bool) *fileSeedSegment {
-	return &fileSeedSegment{
-		canReflink: canReflink,
-		file:       file,
-		chunks:     chunks,
-	}
-}
-
-func (s *fileSeedSegment) FileName() string {
-	return s.file
-}
-
-func (s *fileSeedSegment) Size() uint64 {
-	if len(s.chunks) == 0 {
-		return 0
-	}
-	last := s.chunks[len(s.chunks)-1]
-	return last.Start + last.Size - s.chunks[0].Start
-}
-
-func (s *fileSeedSegment) WriteInto(dst *os.File, offset, length, blocksize uint64, isBlank bool) (uint64, uint64, error) {
-	if length != s.Size() {
-		return 0, 0, fmt.Errorf("unable to copy %d bytes from %s to %s : wrong size", length, s.file, dst.Name())
-	}
-	src, err := os.Open(s.file)
-	if err != nil {
-		return 0, 0, err
-	}
-	defer src.Close()
-
-	// Do a straight copy if reflinks are not supported or blocks aren't aligned
-	if !s.canReflink || s.chunks[0].Start%blocksize != offset%blocksize {
-		return s.copy(dst, src, s.chunks[0].Start, length, offset)
-	}
-	return s.clone(dst, src, s.chunks[0].Start, length, offset, blocksize)
-}
-
-// Validate compares all chunks in this slice of the seed index to the underlying data
-// and fails if they don't match.
-func (s *fileSeedSegment) Validate(file *os.File) error {
-	for _, c := range s.chunks {
-		b := make([]byte, c.Size)
-		if _, err := file.ReadAt(b, int64(c.Start)); err != nil {
-			return err
-		}
-		sum := Digest.Sum(b)
-		if sum != c.ID {
-			return fmt.Errorf("seed index for %s doesn't match its data", s.file)
-		}
-	}
-	return nil
-}
-
-// Performs a plain copy of everything in the seed to the target, not cloning
-// of blocks.
-func (s *fileSeedSegment) copy(dst, src *os.File, srcOffset, length, dstOffset uint64) (uint64, uint64, error) {
-	if _, err := dst.Seek(int64(dstOffset), os.SEEK_SET); err != nil {
-		return 0, 0, err
-	}
-	if _, err := src.Seek(int64(srcOffset), os.SEEK_SET); err != nil {
-		return 0, 0, err
-	}
-
-	// Copy using a fixed buffer. Using io.Copy() with a LimitReader will make it
-	// create a buffer matching N of the LimitReader which can be too large
-	copied, err := io.CopyBuffer(dst, io.LimitReader(src, int64(length)), make([]byte, 64*1024))
-	return uint64(copied), 0, err
-}
-
-// Reflink the overlapping blocks in the two ranges and copy the bit before and
-// after the blocks.
-func (s *fileSeedSegment) clone(dst, src *os.File, srcOffset, srcLength, dstOffset, blocksize uint64) (uint64, uint64, error) {
-	if srcOffset%blocksize != dstOffset%blocksize {
-		return 0, 0, fmt.Errorf("reflink ranges not aligned between %s and %s", src.Name(), dst.Name())
-	}
-
-	srcAlignStart := (srcOffset/blocksize + 1) * blocksize
-	srcAlignEnd := (srcOffset + srcLength) / blocksize * blocksize
-	dstAlignStart := (dstOffset/blocksize + 1) * blocksize
-	alignLength := srcAlignEnd - srcAlignStart
-	dstAlignEnd := dstAlignStart + alignLength
-
-	// fill the area before the first aligned block
-	var copied uint64
-	c1, _, err := s.copy(dst, src, srcOffset, srcAlignStart-srcOffset, dstOffset)
-	if err != nil {
-		return c1, 0, err
-	}
-	copied += c1
-	// fill the area after the last aligned block
-	c2, _, err := s.copy(dst, src, srcAlignEnd, srcOffset+srcLength-srcAlignEnd, dstAlignEnd)
-	if err != nil {
-		return copied + c2, 0, err
-	}
-	copied += c2
-	// close the aligned blocks
-	return copied, alignLength, CloneRange(dst, src, srcAlignStart, alignLength, dstAlignStart)
-}

From 701aa34547dd9924f8be265ad2a9c85fff545109 Mon Sep 17 00:00:00 2001
From: folbrich <frank.olbricht@gmail.com>
Date: Fri, 20 Mar 2026 10:38:34 +0100
Subject: [PATCH 05/11] Implement in-place seed logic

---
 assemble-fileseed.go    | 26 +++++++----
 assemble-inplaceseed.go | 18 ++++++++
 assemble-plan.go        | 98 +++++++++++++++++++++++++++++++++--------
 assemble-plan_test.go   | 57 ++++++++++++++++++++++--
 assemble-selfseed.go    | 14 ++++--
 assemble.go             |  8 ++++
 assemble_test.go        |  2 +-
 cmd/desync/extract.go   | 31 ++++++++++++-
 nullseed.go             | 11 +++--
 seed.go                 |  6 ++-
 10 files changed, 230 insertions(+), 41 deletions(-)
 create mode 100644 assemble-inplaceseed.go

diff --git a/assemble-fileseed.go b/assemble-fileseed.go
index 149cd5d..4c3e40c 100644
--- a/assemble-fileseed.go
+++ b/assemble-fileseed.go
@@ -18,7 +18,7 @@ type FileSeed struct {
 }
 
 // NewIndexSeed initializes a new seed that uses an existing index and its blob
-func NewIndexSeed(dstFile string, srcFile string, index Index) (*FileSeed, error) {
+func NewFileSeed(dstFile string, srcFile string, index Index) (*FileSeed, error) {
 	s := FileSeed{
 		srcFile:    srcFile,
 		pos:        make(map[ChunkID][]int),
@@ -33,15 +33,15 @@ func NewIndexSeed(dstFile string, srcFile string, index Index) (*FileSeed, error
 
 // LongestMatchFrom returns the longest sequence of chunks anywhere in the seed
 // that match chunks starting at chunks[startPos]. It returns the byte offset
-// of the match in the seed and the number of matching chunks. Returns (0, 0)
-// if there is no match.
-func (s *FileSeed) LongestMatchFrom(chunks []IndexChunk, startPos int) (uint64, int) {
+// and byte length of the match in the seed, plus the chunk offset and chunk
+// length. Returns (0, 0, 0, 0) if there is no match.
+func (s *FileSeed) LongestMatchFrom(chunks []IndexChunk, startPos int) (uint64, uint64, int, int) {
 	if startPos >= len(chunks) || len(s.index.Chunks) == 0 {
-		return 0, 0
+		return 0, 0, 0, 0
 	}
 	pos, ok := s.pos[chunks[startPos].ID]
 	if !ok {
-		return 0, 0
+		return 0, 0, 0, 0
 	}
 	// From every position of chunks[startPos] in the source, find a run of
 	// matching chunks. Then return the longest of those runs.
@@ -68,9 +68,12 @@ func (s *FileSeed) LongestMatchFrom(chunks []IndexChunk, startPos int) (uint64,
 		}
 	}
 	if maxLen == 0 {
-		return 0, 0
+		return 0, 0, 0, 0
 	}
-	return s.index.Chunks[bestSeedPos].Start, maxLen
+	byteOffset := s.index.Chunks[bestSeedPos].Start
+	last := s.index.Chunks[bestSeedPos+maxLen-1]
+	byteLength := last.Start + last.Size - byteOffset
+	return byteOffset, byteLength, bestSeedPos, maxLen
 }
 
 // GetSegment constructs a SeedSegment for a matched range identified by its
@@ -252,6 +255,13 @@ func (s *fileSeedSource) Execute(f *os.File) (copied uint64, cloned uint64, err
 	return s.segment.WriteInto(f, s.offset, s.length, blocksize, s.isBlank)
 }
 
+func (s *fileSeedSource) Seed() Seed { return s.seed }
+func (s *fileSeedSource) File() string { return s.srcFile }
+
+func (s *fileSeedSource) Validate(file *os.File) error {
+	return s.segment.Validate(file)
+}
+
 func (s *fileSeedSource) String() string {
 	return fmt.Sprintf("FileSeed(%s): Copy to [%d:%d]", s.srcFile, s.offset, s.offset+s.length)
 }
diff --git a/assemble-inplaceseed.go b/assemble-inplaceseed.go
new file mode 100644
index 0000000..6763c50
--- /dev/null
+++ b/assemble-inplaceseed.go
@@ -0,0 +1,18 @@
+package desync
+
+// InPlaceSeed is a FileSeed where the source and destination are the same file.
+// This makes the relationship explicit when desync extract is used with seeds
+// that resolve to the same path as the extraction target.
+type InPlaceSeed struct {
+	*FileSeed
+}
+
+// NewInPlaceSeed initializes a seed where the source and destination are the
+// same file. It passes the file path as both src and dst to NewFileSeed.
+func NewInPlaceSeed(file string, index Index) (*InPlaceSeed, error) {
+	fs, err := NewFileSeed(file, file, index)
+	if err != nil {
+		return nil, err
+	}
+	return &InPlaceSeed{FileSeed: fs}, nil
+}
diff --git a/assemble-plan.go b/assemble-plan.go
index f4db61d..9d2ba14 100644
--- a/assemble-plan.go
+++ b/assemble-plan.go
@@ -43,6 +43,13 @@ type AssemblePlan struct {
 	// length of the index but a single step can span multiple chunks.
 	placements []*placement
 
+	// InPlaceReads is a list of placements with sources that read sections
+	// from the target file. This needs to happen before any steps that
+	// overwrite the in-place source data. This is sparsely populated and
+	// used to express a dependency in the form "don't write to this chunk
+	// until these chunks are read from the in-place target".
+	inPlaceReads []*placement
+
 	selfSeed *selfSeed
 }
 
@@ -51,6 +58,13 @@ type assembleSource interface {
 	Execute(f *os.File) (copied uint64, cloned uint64, err error)
 }
 
+type assembleSeedSource interface {
+	assembleSource
+	Seed() Seed
+	File() string
+	Validate(file *os.File) error
+}
+
 type placement struct {
 	source         assembleSource
 	dependsOnStart int // index of another placement this one depends on
@@ -66,6 +80,7 @@ func NewPlan(name string, idx Index, s Store, opts ...PlanOption) (*AssemblePlan
 		store:         s,
 		targetIsBlank: true,
 		placements:    make([]*placement, len(idx.Chunks)),
+		inPlaceReads:  make([]*placement, len(idx.Chunks)),
 	}
 	for _, opt := range opts {
 		opt(p)
@@ -98,7 +113,7 @@ func (p *AssemblePlan) Validate() error {
 	// Phase 1 — Sequential: collect unique fileSeedSource placements, open
 	// their backing files, and build a list of items to validate.
 	type validateItem struct {
-		fs   *fileSeedSource
+		fs   assembleSeedSource
 		file *os.File
 	}
 
@@ -120,31 +135,31 @@ func (p *AssemblePlan) Validate() error {
 		}
 		seen[pl] = struct{}{}
 
-		fs, ok := pl.source.(*fileSeedSource)
-		if !ok || fs.srcFile == "" {
+		fs, ok := pl.source.(assembleSeedSource)
+		if !ok || fs.File() == "" {
 			continue
 		}
 
 		// Skip seeds and files already known to be invalid
-		if _, ok := invalidSeeds[fs.seed]; ok {
+		if _, ok := invalidSeeds[fs.Seed()]; ok {
 			continue
 		}
-		if _, ok := failedFiles[fs.srcFile]; ok {
-			invalidSeeds[fs.seed] = fmt.Errorf("seed file %s could not be opened", fs.srcFile)
+		if _, ok := failedFiles[fs.File()]; ok {
+			invalidSeeds[fs.Seed()] = fmt.Errorf("seed file %s could not be opened", fs.File())
 			continue
 		}
 
-		if _, ok := fileMap[fs.srcFile]; !ok {
-			f, err := os.Open(fs.srcFile)
+		if _, ok := fileMap[fs.File()]; !ok {
+			f, err := os.Open(fs.File())
 			if err != nil {
-				failedFiles[fs.srcFile] = struct{}{}
-				invalidSeeds[fs.seed] = err
+				failedFiles[fs.File()] = struct{}{}
+				invalidSeeds[fs.Seed()] = err
 				continue
 			}
-			fileMap[fs.srcFile] = f
+			fileMap[fs.File()] = f
 		}
 
-		items = append(items, validateItem{fs: fs, file: fileMap[fs.srcFile]})
+		items = append(items, validateItem{fs: fs, file: fileMap[fs.File()]})
 	}
 
 	// Phase 2 — Concurrent: validate each segment in parallel.
@@ -153,9 +168,9 @@ func (p *AssemblePlan) Validate() error {
 	g.SetLimit(p.concurrency)
 	for _, item := range items {
 		g.Go(func() error {
-			if err := item.fs.segment.Validate(item.file); err != nil {
+			if err := item.fs.Validate(item.file); err != nil {
 				mu.Lock()
-				invalidSeeds[item.fs.seed] = err
+				invalidSeeds[item.fs.Seed()] = err
 				mu.Unlock()
 			}
 			return nil
@@ -227,15 +242,52 @@ func (p *AssemblePlan) generate() error {
 		}
 	}
 
-	// Find all matches in file itself. As it's populated, sections can be
-	// copied to other chunks. This involves depending on earlier steps
-	// before chunks can be copied within the file.
+	// If we have an in-place seed, use it to find matches in the file
+	// before anything gets overwritten by subsequent steps. We schedule
+	// steps that re-arrange chunks that already exist in other places in
+	// the target file before they get overwritten by subsequent steps like
+	// copying from other seeds or the store.
+	for _, seed := range p.seeds {
+		inPlaceSeed, ok := seed.(*InPlaceSeed)
+		if !ok {
+			continue
+		}
+
+		_ = inPlaceSeed
+
+		// TODO: Implement finding chunk slices in the existing file.
+		// Create placements with sources that consist of multiple
+		// operations, such as "Copy to other spot in the file", "Read
+		// to memory", "Write memory to file". Each dependency cycle is
+		// represented as multiple operations. Each cycle can be
+		// executed independently of other (disconnected) cycles.
+		// Update inPlaceReads placements so subsequent steps that
+		// write to the same sections in the file from other seeds or
+		// store happen after the selfseed operations are done.
+		for i := 0; i < len(p.idx.Chunks); i++ {
+			byteOffset, byteLength, seedOffset, n := inPlaceSeed.LongestMatchFrom(p.idx.Chunks, i)
+			if n < 1 {
+				continue
+			}
+
+			_ = byteOffset
+			_ = byteLength
+			_ = seedOffset
+		}
+
+		break // There can only be one in-place seed
+	}
+
+	// Find all matches in file itself as they're written. As it's
+	// populated, sections can be copied to other chunks. This involves
+	// depending on earlier steps before chunks can be copied within the
+	// file.
 	for i := 0; i < len(p.idx.Chunks); i++ {
 		if p.placements[i] != nil {
 			continue // Already filled
 		}
 
-		start, n := p.selfSeed.LongestMatchFrom(p.idx.Chunks, i)
+		_, _, start, n := p.selfSeed.LongestMatchFrom(p.idx.Chunks, i)
 		if n < 1 {
 			continue
 		}
@@ -275,12 +327,16 @@ func (p *AssemblePlan) generate() error {
 
 	// Check file seeds for matches in unfilled positions.
 	for _, seed := range p.seeds {
+		if _, ok := seed.(*InPlaceSeed); ok { // Skip the in-place seed, it's already handled
+			continue
+		}
+
 		for i := 0; i < len(p.idx.Chunks); i++ {
 			if p.placements[i] != nil {
 				continue
 			}
 
-			seedOffset, n := seed.LongestMatchFrom(p.idx.Chunks, i)
+			seedOffset, _, _, n := seed.LongestMatchFrom(p.idx.Chunks, i)
 			if n < 1 {
 				continue
 			}
@@ -371,6 +427,10 @@ func (p *AssemblePlan) Steps() []*PlanStep {
 			stepsPerPlacement[pl].addDependency(stepsPerPlacement[p.placements[i]])
 			stepsPerPlacement[p.placements[i]].addDependent(stepsPerPlacement[pl])
 		}
+
+		// TODO: setup dependencies on inPlaceReads to make sure
+		// in-seed operations on chunks are done before subsequent
+		// writes overwrite the data.
 	}
 
 	// Make a slice of steps, preserving the order
diff --git a/assemble-plan_test.go b/assemble-plan_test.go
index 238f532..ee3567d 100644
--- a/assemble-plan_test.go
+++ b/assemble-plan_test.go
@@ -229,7 +229,58 @@ func TestFileSeedPlanSteps(t *testing.T) {
 
 	for name, test := range tests {
 		t.Run(name, func(t *testing.T) {
-			seed, err := NewIndexSeed("test", "seed", test.seed)
+			seed, err := NewFileSeed("test", "seed", test.seed)
+			require.NoError(t, err)
+
+			plan, err := NewPlan("test", test.target, nil, PlanWithSeeds([]Seed{seed}))
+			require.NoError(t, err)
+			defer plan.Close()
+
+			steps := plan.Steps()
+			got := make([]string, len(steps))
+			for i, s := range steps {
+				got[i] = s.source.String()
+			}
+			require.Equal(t, test.expected, got)
+		})
+	}
+}
+
+func TestInPlaceSeedPlanSteps(t *testing.T) {
+	tests := map[string]struct {
+		target   Index
+		seed     Index
+		expected []string
+	}{
+		"basic matching": {
+			target: indexSequence(0x01, 0x02, 0x03, 0x04),
+			seed:   indexSequence(0x02, 0x03),
+			expected: []string{
+				"Store: Copy 0100000000000000000000000000000000000000000000000000000000000000 to [0:100]",
+				"FileSeed(test): Copy to [100:300]",
+				"Store: Copy 0400000000000000000000000000000000000000000000000000000000000000 to [300:400]",
+			},
+		},
+		"all from seed": {
+			target: indexSequence(0x01, 0x02, 0x03),
+			seed:   indexSequence(0x01, 0x02, 0x03),
+			expected: []string{
+				"FileSeed(test): Copy to [0:300]",
+			},
+		},
+		"no match": {
+			target: indexSequence(0x01, 0x02),
+			seed:   indexSequence(0x05, 0x06),
+			expected: []string{
+				"Store: Copy 0100000000000000000000000000000000000000000000000000000000000000 to [0:100]",
+				"Store: Copy 0200000000000000000000000000000000000000000000000000000000000000 to [100:200]",
+			},
+		},
+	}
+
+	for name, test := range tests {
+		t.Run(name, func(t *testing.T) {
+			seed, err := NewInPlaceSeed("test", test.seed)
 			require.NoError(t, err)
 
 			plan, err := NewPlan("test", test.target, nil, PlanWithSeeds([]Seed{seed}))
@@ -281,7 +332,7 @@ func TestFileSeedValidation(t *testing.T) {
 		require.NoError(t, err)
 		f.Close()
 
-		seed, err := NewIndexSeed("target", seedFile, seedIndex)
+		seed, err := NewFileSeed("target", seedFile, seedIndex)
 		require.NoError(t, err)
 
 		plan, err := NewPlan("target", targetIndex, nil, PlanWithSeeds([]Seed{seed}))
@@ -301,7 +352,7 @@ func TestFileSeedValidation(t *testing.T) {
 		require.NoError(t, err)
 		f.Close()
 
-		seed, err := NewIndexSeed("target", seedFile, seedIndex)
+		seed, err := NewFileSeed("target", seedFile, seedIndex)
 		require.NoError(t, err)
 
 		plan, err := NewPlan("target", targetIndex, nil, PlanWithSeeds([]Seed{seed}))
diff --git a/assemble-selfseed.go b/assemble-selfseed.go
index 7005b68..a51a1e9 100644
--- a/assemble-selfseed.go
+++ b/assemble-selfseed.go
@@ -53,13 +53,13 @@ func (s *selfSeed) Close() {
 
 // LongestMatchFrom returns the longest sequence of matching chunks after a
 // given starting position.
-func (s *selfSeed) LongestMatchFrom(chunks []IndexChunk, startPos int) (int, int) {
+func (s *selfSeed) LongestMatchFrom(chunks []IndexChunk, startPos int) (uint64, uint64, int, int) {
 	if len(chunks) <= startPos || len(s.index.Chunks) == 0 {
-		return 0, 0
+		return 0, 0, 0, 0
 	}
 	pos, ok := s.pos[chunks[startPos].ID]
 	if !ok {
-		return 0, 0
+		return 0, 0, 0, 0
 	}
 	// From every position of chunks[startPos] in the source, find a slice of
 	// matching chunks. Then return the longest of those slices.
@@ -92,7 +92,13 @@ func (s *selfSeed) LongestMatchFrom(chunks []IndexChunk, startPos int) (int, int
 			break
 		}
 	}
-	return maxStart, maxLen
+	if maxLen == 0 {
+		return 0, 0, 0, 0
+	}
+	byteOffset := s.index.Chunks[maxStart].Start
+	last := s.index.Chunks[maxStart+maxLen-1]
+	byteLength := last.Start + last.Size - byteOffset
+	return byteOffset, byteLength, maxStart, maxLen
 }
 
 func (s *selfSeed) maxMatchFrom(chunks []IndexChunk, p int, limit int) (int, int) {
diff --git a/assemble.go b/assemble.go
index 48e1843..1d5e308 100644
--- a/assemble.go
+++ b/assemble.go
@@ -68,6 +68,14 @@ func AssembleFile(ctx context.Context, name string, idx Index, s Store, seeds []
 		isBlank = true
 	}
 
+	// TODO: Update to account for inplace-seeds. If the inplace-seed is
+	// longer than the file we probably want to truncate the file down
+	// after execution all steps. If the in-place-seed is smaller than the
+	// target file, we can truncate here. Note, it's possible that the
+	// in-place seed is recalculated below. If we truncated the target file
+	// down, that in-place seed's chunk list may need to be truncated as
+	// well.
+
 	// Truncate the output file to the full expected size. Not only does this
 	// confirm there's enough disk space, but it allows for an optimization
 	// when dealing with the Null Chunk
diff --git a/assemble_test.go b/assemble_test.go
index b28b3fa..13f0d46 100644
--- a/assemble_test.go
+++ b/assemble_test.go
@@ -219,7 +219,7 @@ func TestSeed(t *testing.T) {
 					NewProgressBar(""),
 				)
 				require.NoError(t, err)
-				seed, err := NewIndexSeed(dst.Name(), seedFile.Name(), seedIndex)
+				seed, err := NewFileSeed(dst.Name(), seedFile.Name(), seedIndex)
 				require.NoError(t, err)
 				seeds = append(seeds, seed)
 			}
diff --git a/cmd/desync/extract.go b/cmd/desync/extract.go
index e7e17b4..ca06657 100644
--- a/cmd/desync/extract.go
+++ b/cmd/desync/extract.go
@@ -170,6 +170,10 @@ func writeInplace(ctx context.Context, name string, idx desync.Index, s desync.S
 
 func readSeeds(dstFile string, seedsInfo []string, opts cmdStoreOptions) ([]desync.Seed, error) {
 	var seeds []desync.Seed
+	absDst, err := filepath.Abs(dstFile)
+	if err != nil {
+		return nil, err
+	}
 	for _, seedInfo := range seedsInfo {
 		var (
 			srcIndexFile string
@@ -196,7 +200,17 @@ func readSeeds(dstFile string, seedsInfo []string, opts cmdStoreOptions) ([]desy
 			return nil, err
 		}
 
-		seed, err := desync.NewIndexSeed(dstFile, srcFile, srcIndex)
+		absSrc, err := filepath.Abs(srcFile)
+		if err != nil {
+			return nil, err
+		}
+
+		var seed desync.Seed
+		if absSrc == absDst {
+			seed, err = desync.NewInPlaceSeed(srcFile, srcIndex)
+		} else {
+			seed, err = desync.NewFileSeed(dstFile, srcFile, srcIndex)
+		}
 		if err != nil {
 			return nil, err
 		}
@@ -211,6 +225,10 @@ func readSeedDirs(dstFile, dstIdxFile string, dirs []string, opts cmdStoreOption
 	if err != nil {
 		return nil, err
 	}
+	absDst, err := filepath.Abs(dstFile)
+	if err != nil {
+		return nil, err
+	}
 	for _, dir := range dirs {
 		err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
 			if err != nil {
@@ -240,7 +258,16 @@ func readSeedDirs(dstFile, dstIdxFile string, dirs []string, opts cmdStoreOption
 			if err != nil {
 				return err
 			}
-			seed, err := desync.NewIndexSeed(dstFile, srcFile, srcIndex)
+			absSrc, err := filepath.Abs(srcFile)
+			if err != nil {
+				return err
+			}
+			var seed desync.Seed
+			if absSrc == absDst {
+				seed, err = desync.NewInPlaceSeed(srcFile, srcIndex)
+			} else {
+				seed, err = desync.NewFileSeed(dstFile, srcFile, srcIndex)
+			}
 			if err != nil {
 				return err
 			}
diff --git a/nullseed.go b/nullseed.go
index 464b19d..a784542 100644
--- a/nullseed.go
+++ b/nullseed.go
@@ -42,9 +42,9 @@ func (s *nullChunkSeed) close() error {
 	return nil
 }
 
-func (s *nullChunkSeed) LongestMatchFrom(chunks []IndexChunk, startPos int) (uint64, int) {
+func (s *nullChunkSeed) LongestMatchFrom(chunks []IndexChunk, startPos int) (uint64, uint64, int, int) {
 	if startPos >= len(chunks) {
-		return 0, 0
+		return 0, 0, 0, 0
 	}
 	var (
 		n     int
@@ -62,7 +62,12 @@ func (s *nullChunkSeed) LongestMatchFrom(chunks []IndexChunk, startPos int) (uin
 		}
 		n++
 	}
-	return 0, n
+	if n == 0 {
+		return 0, 0, 0, 0
+	}
+	last := chunks[startPos+n-1]
+	byteLength := last.Start + last.Size - chunks[startPos].Start
+	return 0, byteLength, 0, n
 }
 
 func (s *nullChunkSeed) GetSegment(offset, size uint64) SeedSegment {
diff --git a/seed.go b/seed.go
index 2692e87..8a1550d 100644
--- a/seed.go
+++ b/seed.go
@@ -12,7 +12,11 @@ const DefaultBlockSize = 4096
 // another index+blob that present on disk already and is used to copy or clone
 // existing chunks or blocks into the target from.
 type Seed interface {
-	LongestMatchFrom(chunks []IndexChunk, startPos int) (uint64, int)
+	// LongestMatchFrom returns the longest sequence of chunks anywhere in the seed
+	// that match chunks starting at chunks[startPos]. It returns the byte offset
+	// and byte length of the match in the seed, plus the chunk offset and chunk
+	// length. Returns (0, 0, 0, 0) if there is no match.
+	LongestMatchFrom(chunks []IndexChunk, startPos int) (byteOffset uint64, byteLength uint64, chunkOffset int, chunkLength int)
 	GetSegment(offset, size uint64) SeedSegment
 	RegenerateIndex(ctx context.Context, n int, attempt int, seedNumber int) error
 }

From 072061543e8ca6adff1e352b75533ee766068b4a Mon Sep 17 00:00:00 2001
From: folbrich <frank.olbricht@gmail.com>
Date: Fri, 20 Mar 2026 10:53:28 +0100
Subject: [PATCH 06/11] dev

---
 assemble-plan_test.go | 177 ++++++++++++++++++++++++++++++++----------
 1 file changed, 134 insertions(+), 43 deletions(-)

diff --git a/assemble-plan_test.go b/assemble-plan_test.go
index ee3567d..d53b061 100644
--- a/assemble-plan_test.go
+++ b/assemble-plan_test.go
@@ -247,54 +247,145 @@ func TestFileSeedPlanSteps(t *testing.T) {
 }
 
 func TestInPlaceSeedPlanSteps(t *testing.T) {
-	tests := map[string]struct {
-		target   Index
-		seed     Index
-		expected []string
-	}{
-		"basic matching": {
-			target: indexSequence(0x01, 0x02, 0x03, 0x04),
-			seed:   indexSequence(0x02, 0x03),
-			expected: []string{
-				"Store: Copy 0100000000000000000000000000000000000000000000000000000000000000 to [0:100]",
-				"FileSeed(test): Copy to [100:300]",
-				"Store: Copy 0400000000000000000000000000000000000000000000000000000000000000 to [300:400]",
-			},
-		},
-		"all from seed": {
-			target: indexSequence(0x01, 0x02, 0x03),
-			seed:   indexSequence(0x01, 0x02, 0x03),
-			expected: []string{
-				"FileSeed(test): Copy to [0:300]",
-			},
-		},
-		"no match": {
-			target: indexSequence(0x01, 0x02),
-			seed:   indexSequence(0x05, 0x06),
-			expected: []string{
-				"Store: Copy 0100000000000000000000000000000000000000000000000000000000000000 to [0:100]",
-				"Store: Copy 0200000000000000000000000000000000000000000000000000000000000000 to [100:200]",
-			},
-		},
+	// Create variable-size chunks with known data and compute real ChunkIDs.
+	// Each chunk is filled with a distinct byte so the SHA512/256 hash is unique.
+	type chunk struct {
+		id   ChunkID
+		size uint64
+	}
+	newChunk := func(size int, fill byte) chunk {
+		data := make([]byte, size)
+		for i := range data {
+			data[i] = fill
+		}
+		return chunk{id: ChunkID(Digest.Sum(data)), size: uint64(size)}
 	}
 
-	for name, test := range tests {
-		t.Run(name, func(t *testing.T) {
-			seed, err := NewInPlaceSeed("test", test.seed)
-			require.NoError(t, err)
+	A := newChunk(200, 0xAA)
+	B := newChunk(150, 0xBB)
+	C := newChunk(100, 0xCC)
+	D := newChunk(50, 0xDD)
+	E := newChunk(180, 0xEE) // only appears in store
+	F := newChunk(120, 0xFF) // only appears in file seed
+
+	// buildIndex creates an Index with contiguous chunk offsets.
+	buildIndex := func(chunks ...chunk) Index {
+		ic := make([]IndexChunk, len(chunks))
+		var start uint64
+		for i, c := range chunks {
+			ic[i] = IndexChunk{ID: c.id, Start: start, Size: c.size}
+			start += c.size
+		}
+		return Index{Chunks: ic}
+	}
 
-			plan, err := NewPlan("test", test.target, nil, PlanWithSeeds([]Seed{seed}))
-			require.NoError(t, err)
-			defer plan.Close()
+	// planSteps is a helper that creates a plan and returns its step strings.
+	planSteps := func(t *testing.T, target Index, opts ...PlanOption) []string {
+		t.Helper()
+		plan, err := NewPlan("test", target, nil, opts...)
+		require.NoError(t, err)
+		t.Cleanup(func() { plan.Close() })
+		steps := plan.Steps()
+		got := make([]string, len(steps))
+		for i, s := range steps {
+			got[i] = s.source.String()
+		}
+		return got
+	}
 
-			steps := plan.Steps()
-			got := make([]string, len(steps))
-			for i, s := range steps {
-				got[i] = s.source.String()
-			}
-			require.Equal(t, test.expected, got)
-		})
+	// storeStep formats a store-sourced step string.
+	storeStep := func(c chunk, start uint64) string {
+		id := c.id
+		return fmt.Sprintf("Store: Copy %s to [%d:%d]", &id, start, start+c.size)
 	}
+
+	t.Run("swap two chunks", func(t *testing.T) {
+		// In-place: [A:200][B:150]
+		// Target:   [B:150][A:200]
+		// One cycle: A↔B.
+		inPlace, err := NewInPlaceSeed("test", buildIndex(A, B))
+		require.NoError(t, err)
+
+		got := planSteps(t, buildIndex(B, A), PlanWithSeeds([]Seed{inPlace}))
+		expected := []string{
+			"InPlace: Copy [0:200] to [150:350]",
+			"InPlace: Copy [200:350] to [0:150]",
+		}
+		require.Equal(t, expected, got)
+	})
+
+	t.Run("two independent cycles", func(t *testing.T) {
+		// In-place: [A:200][B:150][C:100][D:50]
+		// Target:   [B:150][A:200][D:50][C:100]
+		// Cycle 1: A↔B in byte range [0,350)
+		// Cycle 2: C↔D in byte range [350,500)
+		inPlace, err := NewInPlaceSeed("test", buildIndex(A, B, C, D))
+		require.NoError(t, err)
+
+		got := planSteps(t, buildIndex(B, A, D, C), PlanWithSeeds([]Seed{inPlace}))
+		expected := []string{
+			"InPlace: Copy [0:200] to [150:350]",
+			"InPlace: Copy [200:350] to [0:150]",
+			"InPlace: Copy [350:450] to [400:500]",
+			"InPlace: Copy [450:500] to [350:400]",
+		}
+		require.Equal(t, expected, got)
+	})
+
+	t.Run("rearrange with store chunks", func(t *testing.T) {
+		// In-place: [A:200][B:150]
+		// Target:   [B:150][A:200][E:180]
+		// A↔B cycle, E from store (not in seed).
+		inPlace, err := NewInPlaceSeed("test", buildIndex(A, B))
+		require.NoError(t, err)
+
+		got := planSteps(t, buildIndex(B, A, E), PlanWithSeeds([]Seed{inPlace}))
+		expected := []string{
+			"InPlace: Copy [0:200] to [150:350]",
+			"InPlace: Copy [200:350] to [0:150]",
+			storeStep(E, 350),
+		}
+		require.Equal(t, expected, got)
+	})
+
+	t.Run("partial rearrangement with skip", func(t *testing.T) {
+		// In-place: [A:200][B:150][C:100]
+		// Target:   [A:200][C:100][B:150]
+		// A already at [0:200] in both indexes → skip.
+		// B↔C cycle: B [200:350]→[300:450], C [350:450]→[200:300].
+		inPlace, err := NewInPlaceSeed("test", buildIndex(A, B, C))
+		require.NoError(t, err)
+
+		got := planSteps(t, buildIndex(A, C, B), PlanWithSeeds([]Seed{inPlace}))
+		expected := []string{
+			"InPlace: Skip [0:200]",
+			"InPlace: Copy [200:350] to [300:450]",
+			"InPlace: Copy [350:450] to [200:300]",
+		}
+		require.Equal(t, expected, got)
+	})
+
+	t.Run("mixed in-place and file seed", func(t *testing.T) {
+		// In-place: [A:200][B:150]
+		// File seed "seedfile": [F:120]
+		// Target:   [A:200][F:120][B:150]
+		// A at same offset → skip.
+		// B moves [200:350]→[320:470] (B must read before F writes to [200:320]).
+		// F from file seed at [200:320].
+		inPlaceSeed, err := NewInPlaceSeed("test", buildIndex(A, B))
+		require.NoError(t, err)
+		fileSeed, err := NewFileSeed("test", "seedfile", buildIndex(F))
+		require.NoError(t, err)
+
+		got := planSteps(t, buildIndex(A, F, B),
+			PlanWithSeeds([]Seed{inPlaceSeed, fileSeed}))
+		expected := []string{
+			"InPlace: Skip [0:200]",
+			"InPlace: Copy [200:350] to [320:470]",
+			"FileSeed(seedfile): Copy to [200:320]",
+		}
+		require.Equal(t, expected, got)
+	})
 }
 
 func TestFileSeedValidation(t *testing.T) {

From 9e04e4f9ed7d60626ad866ac7e3d5ed7450b3fef Mon Sep 17 00:00:00 2001
From: folbrich <frank.olbricht@gmail.com>
Date: Fri, 20 Mar 2026 14:45:41 +0100
Subject: [PATCH 07/11] dev

---
 assemble-fileseed.go    |   2 +-
 assemble-inplacecopy.go |  57 ++++++
 assemble-plan.go        | 390 +++++++++++++++++++++++++++++++++++++---
 assemble-plan_test.go   |  52 ++++--
 assemble.go             |  35 ++--
 assemble_test.go        | 240 +++++++++++++++++++++++++
 6 files changed, 724 insertions(+), 52 deletions(-)
 create mode 100644 assemble-inplacecopy.go

diff --git a/assemble-fileseed.go b/assemble-fileseed.go
index 4c3e40c..5ace80f 100644
--- a/assemble-fileseed.go
+++ b/assemble-fileseed.go
@@ -255,7 +255,7 @@ func (s *fileSeedSource) Execute(f *os.File) (copied uint64, cloned uint64, err
 	return s.segment.WriteInto(f, s.offset, s.length, blocksize, s.isBlank)
 }
 
-func (s *fileSeedSource) Seed() Seed { return s.seed }
+func (s *fileSeedSource) Seed() Seed   { return s.seed }
 func (s *fileSeedSource) File() string { return s.srcFile }
 
 func (s *fileSeedSource) Validate(file *os.File) error {
diff --git a/assemble-inplacecopy.go b/assemble-inplacecopy.go
new file mode 100644
index 0000000..b436a59
--- /dev/null
+++ b/assemble-inplacecopy.go
@@ -0,0 +1,57 @@
+package desync
+
+import (
+	"fmt"
+	"os"
+)
+
+// inPlaceCopy copies a chunk from one position to another within the same file.
+// It uses ReadAt/WriteAt (pread/pwrite) which are position-independent and safe
+// for concurrent use on the same file handle.
+type inPlaceCopy struct {
+	srcOffset uint64
+	srcSize   uint64
+	dstOffset uint64
+	dstSize   uint64
+
+	// Cycle-breaking: the first mover in a cycle pre-reads the buffered
+	// operation's source before executing its own copy.
+	preBuffers []*inPlaceCopy // targets whose writeBuf to populate before own copy
+	writeBuf   []byte         // non-nil → write from this buffer, skip file read
+}
+
+func (s *inPlaceCopy) Execute(f *os.File) (copied uint64, cloned uint64, err error) {
+	// Step 1: Pre-read sources for cycle-broken chunks before our own copy
+	// overwrites their data.
+	for _, pb := range s.preBuffers {
+		pb.writeBuf = make([]byte, pb.srcSize)
+		if _, err := f.ReadAt(pb.writeBuf, int64(pb.srcOffset)); err != nil {
+			return 0, 0, fmt.Errorf("inPlaceCopy pre-buffer read at %d: %w", pb.srcOffset, err)
+		}
+	}
+
+	// Step 2: If this chunk was cycle-broken, write from the pre-read buffer.
+	if s.writeBuf != nil {
+		if _, err := f.WriteAt(s.writeBuf, int64(s.dstOffset)); err != nil {
+			return 0, 0, fmt.Errorf("inPlaceCopy buffer write at %d: %w", s.dstOffset, err)
+		}
+		return s.dstSize, 0, nil
+	}
+
+	// Step 3: Normal copy — read source into a temp buffer, then write to dest.
+	// Always buffer first to handle overlapping ranges safely.
+	buf := make([]byte, s.srcSize)
+	if _, err := f.ReadAt(buf, int64(s.srcOffset)); err != nil {
+		return 0, 0, fmt.Errorf("inPlaceCopy read at %d: %w", s.srcOffset, err)
+	}
+	if _, err := f.WriteAt(buf, int64(s.dstOffset)); err != nil {
+		return 0, 0, fmt.Errorf("inPlaceCopy write at %d: %w", s.dstOffset, err)
+	}
+	return s.dstSize, 0, nil
+}
+
+func (s *inPlaceCopy) String() string {
+	return fmt.Sprintf("InPlace: Copy [%d:%d] to [%d:%d]",
+		s.srcOffset, s.srcOffset+s.srcSize,
+		s.dstOffset, s.dstOffset+s.dstSize)
+}
diff --git a/assemble-plan.go b/assemble-plan.go
index 9d2ba14..ab60201 100644
--- a/assemble-plan.go
+++ b/assemble-plan.go
@@ -4,6 +4,7 @@ import (
 	"errors"
 	"fmt"
 	"os"
+	"slices"
 	"sync"
 
 	"golang.org/x/sync/errgroup"
@@ -50,9 +51,22 @@ type AssemblePlan struct {
 	// until these chunks are read from the in-place target".
 	inPlaceReads []*placement
 
+	// inPlaceDeps records ordering constraints between in-place copy
+	// placements produced by Tarjan's SCC linearization. Each entry
+	// says placement[from] must complete before placement[to] starts.
+	inPlaceDeps []inPlaceDep
+
+	// inPlaceOrder lists placements from generateInPlace in their
+	// desired step output order: skips first, then copies in
+	// linearized cycle order. Steps() iterates this before
+	// p.placements so in-place operations precede other sources.
+	inPlaceOrder []*placement
+
 	selfSeed *selfSeed
 }
 
+type inPlaceDep struct{ from, to int }
+
 type assembleSource interface {
 	fmt.Stringer
 	Execute(f *os.File) (copied uint64, cloned uint64, err error)
@@ -253,28 +267,7 @@ func (p *AssemblePlan) generate() error {
 			continue
 		}
 
-		_ = inPlaceSeed
-
-		// TODO: Implement finding chunk slices in the existing file.
-		// Create placements with sources that consist of multiple
-		// operations, such as "Copy to other spot in the file", "Read
-		// to memory", "Write memory to file". Each dependency cycle is
-		// represented as multiple operations. Each cycle can be
-		// executed independently of other (disconnected) cycles.
-		// Update inPlaceReads placements so subsequent steps that
-		// write to the same sections in the file from other seeds or
-		// store happen after the selfseed operations are done.
-		for i := 0; i < len(p.idx.Chunks); i++ {
-			byteOffset, byteLength, seedOffset, n := inPlaceSeed.LongestMatchFrom(p.idx.Chunks, i)
-			if n < 1 {
-				continue
-			}
-
-			_ = byteOffset
-			_ = byteLength
-			_ = seedOffset
-		}
-
+		p.generateInPlace(inPlaceSeed)
 		break // There can only be one in-place seed
 	}
 
@@ -428,13 +421,52 @@ func (p *AssemblePlan) Steps() []*PlanStep {
 			stepsPerPlacement[p.placements[i]].addDependent(stepsPerPlacement[pl])
 		}
 
-		// TODO: setup dependencies on inPlaceReads to make sure
-		// in-seed operations on chunks are done before subsequent
-		// writes overwrite the data.
+		// Link in-place read dependencies: if a subsequent step (store
+		// copy, file seed) writes to a byte range that an in-place
+		// copy needs to read, the in-place copy must execute first.
+		for i, inPlaceRead := range p.inPlaceReads {
+			if inPlaceRead == nil {
+				continue
+			}
+			target := p.placements[i]
+			if target == inPlaceRead {
+				continue
+			}
+			ipStep := stepsPerPlacement[inPlaceRead]
+			step := stepsPerPlacement[target]
+			if step != ipStep {
+				step.addDependency(ipStep)
+				ipStep.addDependent(step)
+			}
+		}
 	}
 
-	// Make a slice of steps, preserving the order
+	// Link in-place inter-operation dependencies from Tarjan
+	// linearization. These ensure cycle members and cross-SCC
+	// operations execute in the correct order.
+	for _, dep := range p.inPlaceDeps {
+		from := stepsPerPlacement[p.placements[dep.from]]
+		to := stepsPerPlacement[p.placements[dep.to]]
+		if from != to {
+			to.addDependency(from)
+			from.addDependent(to)
+		}
+	}
+
+	// Make a slice of steps, preserving the order. Iterate
+	// inPlaceOrder first so in-place seed placements (skips + copies)
+	// precede other sources. Then iterate p.placements for everything
+	// else. Deduplication by pointer identity ensures each step
+	// appears exactly once.
 	steps := make([]*PlanStep, 0, len(stepsPerPlacement))
+	for _, pl := range p.inPlaceOrder {
+		s, ok := stepsPerPlacement[pl]
+		if !ok {
+			continue
+		}
+		steps = append(steps, s)
+		delete(stepsPerPlacement, pl)
+	}
 	for _, pl := range p.placements {
 		s, ok := stepsPerPlacement[pl]
 		if !ok {
@@ -446,3 +478,309 @@ func (p *AssemblePlan) Steps() []*PlanStep {
 
 	return steps
 }
+
+// generateInPlace processes an in-place seed to find chunks that exist at
+// different offsets in the file and creates placements that rearrange them.
+// It handles dependency cycles using Tarjan's SCC algorithm.
+func (p *AssemblePlan) generateInPlace(seed *InPlaceSeed) {
+	// Stage 1: Source mapping — index every chunk in the seed by its
+	// ChunkID, recording all byte ranges where it appears.
+	type byteRange struct{ start, size uint64 }
+	srcOf := make(map[ChunkID][]byteRange)
+	for _, c := range seed.index.Chunks {
+		srcOf[c.ID] = append(srcOf[c.ID], byteRange{c.Start, c.Size})
+	}
+
+	// Stage 2: Operation list — walk target index and classify each chunk.
+	type moveOp struct {
+		targetIdx int    // index into p.idx.Chunks
+		srcStart  uint64 // byte offset in old file
+		srcSize   uint64
+		dstStart  uint64 // byte offset in target file
+		dstSize   uint64
+	}
+	var moves []moveOp
+
+	// Collect skip placements from the initial scan that correspond to
+	// seed chunks into inPlaceOrder so they precede other sources in
+	// the step list.
+	skipSeen := make(map[*placement]bool)
+	for i, c := range p.idx.Chunks {
+		pl := p.placements[i]
+		if pl == nil || skipSeen[pl] {
+			continue
+		}
+		if _, ok := pl.source.(*skipInPlace); !ok {
+			continue
+		}
+		if _, ok := srcOf[c.ID]; !ok {
+			continue
+		}
+		skipSeen[pl] = true
+		p.inPlaceOrder = append(p.inPlaceOrder, pl)
+	}
+
+	for i, c := range p.idx.Chunks {
+		if p.placements[i] != nil {
+			continue // Already placed (e.g. skipInPlace from initial scan)
+		}
+
+		sources := srcOf[c.ID]
+		if len(sources) == 0 {
+			continue // Not in seed; will be filled by store or file seed later
+		}
+
+		// Use the first available copy as the move source.
+		src := sources[0]
+		moves = append(moves, moveOp{
+			targetIdx: i,
+			srcStart:  src.start,
+			srcSize:   src.size,
+			dstStart:  c.Start,
+			dstSize:   c.Size,
+		})
+	}
+
+	if len(moves) == 0 {
+		return
+	}
+
+	// Stage 3: Dependency graph — edge from i to j when move i's source
+	// overlaps move j's destination (i must read before j writes).
+	n := len(moves)
+	succ := make([][]int, n)
+	for i := range moves {
+		for j := range moves {
+			if i != j && overlaps(moves[i].srcStart, moves[i].srcSize, moves[j].dstStart, moves[j].dstSize) {
+				succ[i] = append(succ[i], j)
+			}
+		}
+	}
+
+	// Stage 4: Tarjan's SCC + linearization
+	sccs := tarjanSCC(n, succ)
+	slices.Reverse(sccs) // topological order
+
+	// Stable-sort independent SCCs by minimum target index so the
+	// output order is deterministic and follows the target layout.
+	slices.SortStableFunc(sccs, func(a, b []int) int {
+		minA := moves[a[0]].targetIdx
+		for _, i := range a[1:] {
+			if moves[i].targetIdx < minA {
+				minA = moves[i].targetIdx
+			}
+		}
+		minB := moves[b[0]].targetIdx
+		for _, i := range b[1:] {
+			if moves[i].targetIdx < minB {
+				minB = moves[i].targetIdx
+			}
+		}
+		return minA - minB
+	})
+
+	for _, scc := range sccs {
+		if len(scc) == 1 {
+			// Non-cyclic: single placement.
+			m := moves[scc[0]]
+			pl := &placement{source: &inPlaceCopy{
+				srcOffset: m.srcStart,
+				srcSize:   m.srcSize,
+				dstOffset: m.dstStart,
+				dstSize:   m.dstSize,
+			}}
+			p.placements[m.targetIdx] = pl
+			p.inPlaceOrder = append(p.inPlaceOrder, pl)
+			continue
+		}
+
+		// Cycle: pick the member with smallest srcSize as buffer-break.
+		bufIdx := scc[0]
+		for _, i := range scc[1:] {
+			if moves[i].srcSize < moves[bufIdx].srcSize {
+				bufIdx = i
+			}
+		}
+
+		// Remove bufIdx's outgoing edges and topologically sort the
+		// remaining cycle members.
+		localSucc := make([][]int, n)
+		localInDeg := make(map[int]int)
+		for _, i := range scc {
+			if i == bufIdx {
+				continue // exclude buffer-break from topo sort
+			}
+			localInDeg[i] = 0
+		}
+		for _, i := range scc {
+			if i == bufIdx {
+				continue
+			}
+			for _, j := range succ[i] {
+				// Only consider edges within this SCC (excluding bufIdx).
+				if _, ok := localInDeg[j]; ok {
+					localSucc[i] = append(localSucc[i], j)
+					localInDeg[j]++
+				}
+			}
+		}
+
+		// Kahn's algorithm for topological sort within the cycle.
+		var queue []int
+		for _, i := range scc {
+			if i == bufIdx {
+				continue
+			}
+			if localInDeg[i] == 0 {
+				queue = append(queue, i)
+			}
+		}
+		var order []int
+		for len(queue) > 0 {
+			cur := queue[0]
+			queue = queue[1:]
+			order = append(order, cur)
+			for _, j := range localSucc[cur] {
+				localInDeg[j]--
+				if localInDeg[j] == 0 {
+					queue = append(queue, j)
+				}
+			}
+		}
+
+		// Build the inPlaceCopy sources. The first element in order
+		// gets preBuffers pointing to the buffer-break target. The
+		// buffer-break target writes from writeBuf.
+		bufMove := moves[bufIdx]
+		bufCopy := &inPlaceCopy{
+			srcOffset: bufMove.srcStart,
+			srcSize:   bufMove.srcSize,
+			dstOffset: bufMove.dstStart,
+			dstSize:   bufMove.dstSize,
+		}
+
+		// Create placements in order, with the first one pre-buffering
+		// the cycle-break target.
+		var prevIdx int
+		for k, i := range order {
+			m := moves[i]
+			ipc := &inPlaceCopy{
+				srcOffset: m.srcStart,
+				srcSize:   m.srcSize,
+				dstOffset: m.dstStart,
+				dstSize:   m.dstSize,
+			}
+			if k == 0 {
+				ipc.preBuffers = []*inPlaceCopy{bufCopy}
+			}
+			pl := &placement{source: ipc}
+			p.placements[m.targetIdx] = pl
+			p.inPlaceOrder = append(p.inPlaceOrder, pl)
+
+			// Record ordering dependencies between consecutive cycle members.
+			if k > 0 {
+				p.inPlaceDeps = append(p.inPlaceDeps, inPlaceDep{
+					from: moves[prevIdx].targetIdx,
+					to:   m.targetIdx,
+				})
+			}
+			prevIdx = i
+		}
+
+		// The buffer-break target is placed last and depends on the
+		// last member in order.
+		bufPl := &placement{source: bufCopy}
+		p.placements[bufMove.targetIdx] = bufPl
+		p.inPlaceOrder = append(p.inPlaceOrder, bufPl)
+		if len(order) > 0 {
+			p.inPlaceDeps = append(p.inPlaceDeps, inPlaceDep{
+				from: moves[order[len(order)-1]].targetIdx,
+				to:   bufMove.targetIdx,
+			})
+		}
+	}
+
+	// Stage 5: Populate inPlaceReads — for each move, find all target
+	// chunks whose byte range overlaps the move's source range and
+	// record the dependency so subsequent writes wait for the read.
+	// Only record dependencies for positions not yet placed (nil).
+	// Non-nil positions at this point are all in-place sources whose
+	// ordering is already handled by inPlaceDeps above.
+	for _, m := range moves {
+		pl := p.placements[m.targetIdx]
+		for j, c := range p.idx.Chunks {
+			if p.placements[j] != nil {
+				continue // in-place source ordering handled by inPlaceDeps
+			}
+			if overlaps(m.srcStart, m.srcSize, c.Start, c.Size) {
+				p.inPlaceReads[j] = pl
+			}
+		}
+	}
+}
+
+// overlaps returns true if byte ranges [aStart, aStart+aSize) and
+// [bStart, bStart+bSize) overlap.
+func overlaps(aStart, aSize, bStart, bSize uint64) bool {
+	if aSize == 0 || bSize == 0 {
+		return false
+	}
+	return aStart < bStart+bSize && bStart < aStart+aSize
+}
+
+// tarjanSCC finds all strongly connected components of a directed graph.
+// adj[v] lists the successors of node v. Returns SCCs in reverse
+// topological order (sinks first).
+func tarjanSCC(n int, adj [][]int) [][]int {
+	index := make([]int, n)
+	lowlink := make([]int, n)
+	onStack := make([]bool, n)
+	for i := range index {
+		index[i] = -1
+	}
+
+	var (
+		stack []int
+		sccs  [][]int
+		idx   int
+	)
+
+	var visit func(v int)
+	visit = func(v int) {
+		index[v] = idx
+		lowlink[v] = idx
+		idx++
+		stack = append(stack, v)
+		onStack[v] = true
+
+		for _, w := range adj[v] {
+			if index[w] == -1 {
+				visit(w)
+				lowlink[v] = min(lowlink[v], lowlink[w])
+			} else if onStack[w] {
+				lowlink[v] = min(lowlink[v], index[w])
+			}
+		}
+
+		if lowlink[v] == index[v] {
+			var scc []int
+			for {
+				w := stack[len(stack)-1]
+				stack = stack[:len(stack)-1]
+				onStack[w] = false
+				scc = append(scc, w)
+				if w == v {
+					break
+				}
+			}
+			sccs = append(sccs, scc)
+		}
+	}
+
+	for v := range n {
+		if index[v] == -1 {
+			visit(v)
+		}
+	}
+	return sccs
+}
diff --git a/assemble-plan_test.go b/assemble-plan_test.go
index d53b061..bc87b07 100644
--- a/assemble-plan_test.go
+++ b/assemble-plan_test.go
@@ -251,6 +251,7 @@ func TestInPlaceSeedPlanSteps(t *testing.T) {
 	// Each chunk is filled with a distinct byte so the SHA512/256 hash is unique.
 	type chunk struct {
 		id   ChunkID
+		data []byte
 		size uint64
 	}
 	newChunk := func(size int, fill byte) chunk {
@@ -258,7 +259,7 @@ func TestInPlaceSeedPlanSteps(t *testing.T) {
 		for i := range data {
 			data[i] = fill
 		}
-		return chunk{id: ChunkID(Digest.Sum(data)), size: uint64(size)}
+		return chunk{id: ChunkID(Digest.Sum(data)), data: data, size: uint64(size)}
 	}
 
 	A := newChunk(200, 0xAA)
@@ -279,10 +280,22 @@ func TestInPlaceSeedPlanSteps(t *testing.T) {
 		return Index{Chunks: ic}
 	}
 
+	// writeFile writes concatenated chunk data to a temp file and returns its path.
+	writeFile := func(t *testing.T, chunks ...chunk) string {
+		t.Helper()
+		f := filepath.Join(t.TempDir(), "target")
+		var content []byte
+		for _, c := range chunks {
+			content = append(content, c.data...)
+		}
+		require.NoError(t, os.WriteFile(f, content, 0644))
+		return f
+	}
+
 	// planSteps is a helper that creates a plan and returns its step strings.
-	planSteps := func(t *testing.T, target Index, opts ...PlanOption) []string {
+	planSteps := func(t *testing.T, name string, target Index, opts ...PlanOption) []string {
 		t.Helper()
-		plan, err := NewPlan("test", target, nil, opts...)
+		plan, err := NewPlan(name, target, nil, opts...)
 		require.NoError(t, err)
 		t.Cleanup(func() { plan.Close() })
 		steps := plan.Steps()
@@ -303,10 +316,12 @@ func TestInPlaceSeedPlanSteps(t *testing.T) {
 		// In-place: [A:200][B:150]
 		// Target:   [B:150][A:200]
 		// One cycle: A↔B.
-		inPlace, err := NewInPlaceSeed("test", buildIndex(A, B))
+		f := writeFile(t, A, B)
+		inPlace, err := NewInPlaceSeed(f, buildIndex(A, B))
 		require.NoError(t, err)
 
-		got := planSteps(t, buildIndex(B, A), PlanWithSeeds([]Seed{inPlace}))
+		got := planSteps(t, f, buildIndex(B, A),
+			PlanWithSeeds([]Seed{inPlace}), PlanWithTargetIsBlank(false))
 		expected := []string{
 			"InPlace: Copy [0:200] to [150:350]",
 			"InPlace: Copy [200:350] to [0:150]",
@@ -319,10 +334,12 @@ func TestInPlaceSeedPlanSteps(t *testing.T) {
 		// Target:   [B:150][A:200][D:50][C:100]
 		// Cycle 1: A↔B in byte range [0,350)
 		// Cycle 2: C↔D in byte range [350,500)
-		inPlace, err := NewInPlaceSeed("test", buildIndex(A, B, C, D))
+		f := writeFile(t, A, B, C, D)
+		inPlace, err := NewInPlaceSeed(f, buildIndex(A, B, C, D))
 		require.NoError(t, err)
 
-		got := planSteps(t, buildIndex(B, A, D, C), PlanWithSeeds([]Seed{inPlace}))
+		got := planSteps(t, f, buildIndex(B, A, D, C),
+			PlanWithSeeds([]Seed{inPlace}), PlanWithTargetIsBlank(false))
 		expected := []string{
 			"InPlace: Copy [0:200] to [150:350]",
 			"InPlace: Copy [200:350] to [0:150]",
@@ -336,10 +353,12 @@ func TestInPlaceSeedPlanSteps(t *testing.T) {
 		// In-place: [A:200][B:150]
 		// Target:   [B:150][A:200][E:180]
 		// A↔B cycle, E from store (not in seed).
-		inPlace, err := NewInPlaceSeed("test", buildIndex(A, B))
+		f := writeFile(t, A, B)
+		inPlace, err := NewInPlaceSeed(f, buildIndex(A, B))
 		require.NoError(t, err)
 
-		got := planSteps(t, buildIndex(B, A, E), PlanWithSeeds([]Seed{inPlace}))
+		got := planSteps(t, f, buildIndex(B, A, E),
+			PlanWithSeeds([]Seed{inPlace}), PlanWithTargetIsBlank(false))
 		expected := []string{
 			"InPlace: Copy [0:200] to [150:350]",
 			"InPlace: Copy [200:350] to [0:150]",
@@ -353,10 +372,12 @@ func TestInPlaceSeedPlanSteps(t *testing.T) {
 		// Target:   [A:200][C:100][B:150]
 		// A already at [0:200] in both indexes → skip.
 		// B↔C cycle: B [200:350]→[300:450], C [350:450]→[200:300].
-		inPlace, err := NewInPlaceSeed("test", buildIndex(A, B, C))
+		f := writeFile(t, A, B, C)
+		inPlace, err := NewInPlaceSeed(f, buildIndex(A, B, C))
 		require.NoError(t, err)
 
-		got := planSteps(t, buildIndex(A, C, B), PlanWithSeeds([]Seed{inPlace}))
+		got := planSteps(t, f, buildIndex(A, C, B),
+			PlanWithSeeds([]Seed{inPlace}), PlanWithTargetIsBlank(false))
 		expected := []string{
 			"InPlace: Skip [0:200]",
 			"InPlace: Copy [200:350] to [300:450]",
@@ -372,13 +393,14 @@ func TestInPlaceSeedPlanSteps(t *testing.T) {
 		// A at same offset → skip.
 		// B moves [200:350]→[320:470] (B must read before F writes to [200:320]).
 		// F from file seed at [200:320].
-		inPlaceSeed, err := NewInPlaceSeed("test", buildIndex(A, B))
+		f := writeFile(t, A, B)
+		inPlaceSeed, err := NewInPlaceSeed(f, buildIndex(A, B))
 		require.NoError(t, err)
-		fileSeed, err := NewFileSeed("test", "seedfile", buildIndex(F))
+		fileSeed, err := NewFileSeed(f, "seedfile", buildIndex(F))
 		require.NoError(t, err)
 
-		got := planSteps(t, buildIndex(A, F, B),
-			PlanWithSeeds([]Seed{inPlaceSeed, fileSeed}))
+		got := planSteps(t, f, buildIndex(A, F, B),
+			PlanWithSeeds([]Seed{inPlaceSeed, fileSeed}), PlanWithTargetIsBlank(false))
 		expected := []string{
 			"InPlace: Skip [0:200]",
 			"InPlace: Copy [200:350] to [320:470]",
diff --git a/assemble.go b/assemble.go
index 1d5e308..341a3ee 100644
--- a/assemble.go
+++ b/assemble.go
@@ -68,20 +68,25 @@ func AssembleFile(ctx context.Context, name string, idx Index, s Store, seeds []
 		isBlank = true
 	}
 
-	// TODO: Update to account for inplace-seeds. If the inplace-seed is
-	// longer than the file we probably want to truncate the file down
-	// after execution all steps. If the in-place-seed is smaller than the
-	// target file, we can truncate here. Note, it's possible that the
-	// in-place seed is recalculated below. If we truncated the target file
-	// down, that in-place seed's chunk list may need to be truncated as
-	// well.
+	// Find the in-place seed size (if any) to decide truncation strategy.
+	var inPlaceSeedSize int64
+	for _, seed := range seeds {
+		if ips, ok := seed.(*InPlaceSeed); ok {
+			inPlaceSeedSize = ips.index.Length()
+			break
+		}
+	}
 
 	// Truncate the output file to the full expected size. Not only does this
 	// confirm there's enough disk space, but it allows for an optimization
-	// when dealing with the Null Chunk
+	// when dealing with the Null Chunk. If the in-place seed is larger than
+	// the target, defer truncation until after assembly so in-place reads
+	// can access the tail data.
 	if !isBlkDevice {
-		if err := os.Truncate(name, idx.Length()); err != nil {
-			return stats, err
+		if inPlaceSeedSize <= idx.Length() {
+			if err := os.Truncate(name, idx.Length()); err != nil {
+				return stats, err
+			}
 		}
 	}
 
@@ -209,6 +214,8 @@ retry:
 						stats.incChunksFromStore()
 					case *skipInPlace:
 						stats.addChunksInPlace(uint64(step.numChunks))
+					case *inPlaceCopy:
+						stats.addChunksInPlace(uint64(step.numChunks))
 					case *fileSeedSource, *selfSeedSegment:
 						stats.addChunksFromSeed(uint64(step.numChunks))
 					}
@@ -270,5 +277,13 @@ retry:
 	close(completed)
 	wg.Wait()
 
+	// If the in-place seed was larger than the target, truncate now that
+	// all in-place reads are complete.
+	if err == nil && inPlaceSeedSize > idx.Length() && !isBlkDevice {
+		if err := os.Truncate(name, idx.Length()); err != nil {
+			return stats, err
+		}
+	}
+
 	return stats, err
 }
diff --git a/assemble_test.go b/assemble_test.go
index 13f0d46..b56bb27 100644
--- a/assemble_test.go
+++ b/assemble_test.go
@@ -344,6 +344,246 @@ func join(slices ...[]byte) []byte {
 	return out
 }
 
+// TestAssembleIntegration exercises the full assembly pipeline end-to-end,
+// combining all source types in a single reconstruction: in-place skips,
+// in-place copies (including cycle detection with buffer-break), self-seed,
+// file seeds, and store fetches. It uses variable-size chunks so that
+// byte-offset calculations, overlap detection, and buffer sizing are tested
+// with non-uniform boundaries.
+//
+// Each scenario writes an "old" file (the in-place seed), then calls
+// AssembleFile to reconstruct a different target layout. The test verifies
+// both the output content (md5 checksum + file size) and the per-source
+// chunk statistics reported by ExtractStats.
+func TestAssembleIntegration(t *testing.T) {
+	// Create 10 chunks of different sizes filled with random data.
+	// Variable sizes ensure offset math in overlaps(), inPlaceCopy.Execute()
+	// (buffer sizing), and Tarjan cycle detection are exercised with
+	// non-trivial byte boundaries.
+	type rawChunk struct {
+		id   ChunkID
+		data []byte
+	}
+	chunkSizes := []int{1024, 768, 512, 896, 640, 1152, 384, 1280, 576, 704}
+	chunks := make([]rawChunk, len(chunkSizes))
+	for i, size := range chunkSizes {
+		b := make([]byte, size)
+		rand.Read(b)
+		id := Digest.Sum(b)
+		chunks[i] = rawChunk{id: id, data: b}
+	}
+
+	// Named constants for chunk indices to make scenario definitions readable.
+	const (
+		A = 0 // 1024 bytes
+		B = 1 // 768 bytes
+		C = 2 // 512 bytes
+		D = 3 // 896 bytes
+		E = 4 // 640 bytes
+		F = 5 // 1152 bytes
+		G = 6 // 384 bytes
+		H = 7 // 1280 bytes
+		X = 8 // 576 bytes
+		Y = 9 // 704 bytes
+	)
+
+	// buildIndex constructs an Index from chunk references, laying them out
+	// contiguously. It also sets ChunkSizeMax to the largest chunk in the
+	// index, which is required by newNullChunkSeed inside AssembleFile.
+	buildIndex := func(indices ...int) Index {
+		ic := make([]IndexChunk, len(indices))
+		var offset uint64
+		var maxSize uint64
+		for i, idx := range indices {
+			size := uint64(len(chunks[idx].data))
+			ic[i] = IndexChunk{ID: chunks[idx].id, Start: offset, Size: size}
+			offset += size
+			if size > maxSize {
+				maxSize = size
+			}
+		}
+		return Index{
+			Index:  FormatIndex{ChunkSizeMax: maxSize},
+			Chunks: ic,
+		}
+	}
+
+	// buildContent returns the raw bytes for a sequence of chunks,
+	// used both as file content and as the expected output for verification.
+	buildContent := func(indices ...int) []byte {
+		var out []byte
+		for _, idx := range indices {
+			out = append(out, chunks[idx].data...)
+		}
+		return out
+	}
+
+	// buildStore creates a TestStore containing only the specified chunks.
+	// Limiting the store to the minimum required set means that if the
+	// planner incorrectly routes a chunk to the store (instead of a seed
+	// or in-place source), the test fails with ChunkMissing rather than
+	// silently succeeding.
+	buildStore := func(indices ...int) *TestStore {
+		s := &TestStore{Chunks: make(map[ChunkID][]byte)}
+		for _, idx := range indices {
+			s.Chunks[chunks[idx].id] = chunks[idx].data
+		}
+		return s
+	}
+
+	type scenario struct {
+		name            string
+		inPlaceIndices  []int // Chunks written to target file before assembly (the "old" content)
+		targetIndices   []int // Desired output layout
+		fileSeedIndices []int // External file seed content (nil = no file seed)
+		storeIndices    []int // Chunks available in the store
+		wantInPlace     uint64
+		wantFromSeeds   uint64
+		wantFromStore   uint64
+	}
+
+	scenarios := []scenario{
+		// Scenario 1: exercises every source type in one assembly.
+		//
+		// In-place seed (old file): [A][B][C][D][E] = 3840 bytes
+		// Target:                   [B][A][C][F][G][G][D][H] = 6400 bytes
+		// File seed:                [F][X][X]
+		// Store:                    G, H
+		//
+		// After truncation to 6400 bytes the file is:
+		//   [A:1024][B:768][C:512][D:896][E:640][zeros:2560]
+		//
+		// Source analysis per target position:
+		//   Pos 0 (B): in-place copy — B exists at seed offset 1024, target offset 0.
+		//              Part of A↔B cycle (asymmetric sizes: 1024 vs 768).
+		//   Pos 1 (A): in-place copy — A exists at seed offset 0, target offset 768.
+		//              Part of A↔B cycle. Buffer-break picks B (smaller src).
+		//   Pos 2 (C): skip in-place — C is at offset 1792 in both seed and target.
+		//   Pos 3 (F): file seed — F is not in the in-place seed, found in file seed.
+		//              D's in-place read [2304:3200] overlaps F's write [2304:3456],
+		//              so D's read must complete first (enforced by inPlaceReads).
+		//   Pos 4 (G): self-seed — G appears at both pos 4 and 5. Self-seed copies
+		//              from pos 5 (requires source position > target position).
+		//   Pos 5 (G): store — self-seed can't source from itself (p <= startPos).
+		//   Pos 6 (D): in-place copy — D at seed offset 2304, target offset 4224.
+		//              Independent move, no cycle.
+		//   Pos 7 (H): store — H is not in any seed.
+		{
+			name:            "all source types combined",
+			inPlaceIndices:  []int{A, B, C, D, E},
+			targetIndices:   []int{B, A, C, F, G, G, D, H},
+			fileSeedIndices: []int{F, X, X},
+			storeIndices:    []int{G, H},
+			wantInPlace:     4, // B (cycle), A (cycle), C (skip), D (independent move)
+			wantFromSeeds:   2, // F (file seed), G at pos 4 (self-seed)
+			wantFromStore:   2, // G at pos 5, H
+		},
+
+		// Scenario 2: in-place seed is larger than the target.
+		//
+		// In-place seed: [A][B][C][D] = 3200 bytes
+		// Target:        [B][A] = 1792 bytes
+		//
+		// Since the seed (3200) is larger than the target (1792), truncation
+		// is deferred until after assembly so that in-place reads can access
+		// the full seed data. A↔B form a swap cycle. After assembly, the
+		// file is truncated to 1792 bytes.
+		{
+			name:           "in-place seed larger than target",
+			inPlaceIndices: []int{A, B, C, D},
+			targetIndices:  []int{B, A},
+			storeIndices:   nil,
+			wantInPlace:    2, // A↔B swap cycle
+			wantFromSeeds:  0,
+			wantFromStore:  0,
+		},
+
+		// Scenario 3: in-place seed is smaller than the target.
+		//
+		// In-place seed: [A][B] = 1792 bytes
+		// Target:        [A][B][C][D] = 3200 bytes
+		//
+		// The file is extended (truncated up) to 3200 bytes. A and B are
+		// already at the correct offsets and detected by the initial scan.
+		// C and D are beyond the seed data and must come from the store.
+		{
+			name:           "in-place seed smaller than target",
+			inPlaceIndices: []int{A, B},
+			targetIndices:  []int{A, B, C, D},
+			storeIndices:   []int{C, D},
+			wantInPlace:    2, // A, B detected in-place by initial scan
+			wantFromSeeds:  0,
+			wantFromStore:  2, // C, D fetched from store
+		},
+	}
+
+	for _, sc := range scenarios {
+		t.Run(sc.name, func(t *testing.T) {
+			dir := t.TempDir()
+			targetPath := filepath.Join(dir, "target")
+
+			// Write the "old" file content — this is what the in-place seed
+			// describes. AssembleFile will detect it as non-empty, run the
+			// initial scan, and use the in-place seed to rearrange chunks.
+			inPlaceContent := buildContent(sc.inPlaceIndices...)
+			require.NoError(t, os.WriteFile(targetPath, inPlaceContent, 0644))
+
+			// Create the in-place seed. This wraps a FileSeed where source
+			// and destination are the same file.
+			inPlaceIdx := buildIndex(sc.inPlaceIndices...)
+			inPlaceSeed, err := NewInPlaceSeed(targetPath, inPlaceIdx)
+			require.NoError(t, err)
+			seeds := []Seed{inPlaceSeed}
+
+			// If the scenario includes a file seed, write it to a separate
+			// file and create a FileSeed that maps its chunks by ID.
+			if sc.fileSeedIndices != nil {
+				seedPath := filepath.Join(dir, "fileseed")
+				seedContent := buildContent(sc.fileSeedIndices...)
+				require.NoError(t, os.WriteFile(seedPath, seedContent, 0644))
+				seedIdx := buildIndex(sc.fileSeedIndices...)
+				fs, err := NewFileSeed(targetPath, seedPath, seedIdx)
+				require.NoError(t, err)
+				seeds = append(seeds, fs)
+			}
+
+			// Build the target index (desired output layout) and compute
+			// the expected content for verification.
+			targetIdx := buildIndex(sc.targetIndices...)
+			expected := buildContent(sc.targetIndices...)
+			expectedSum := md5.Sum(expected)
+
+			// Build the store with only the chunks that should be fetched
+			// from it. Any chunk incorrectly routed here will succeed;
+			// any chunk missing from here will fail with ChunkMissing.
+			store := buildStore(sc.storeIndices...)
+
+			// Run the full assembly pipeline with 4 concurrent workers.
+			stats, err := AssembleFile(
+				context.Background(), targetPath, targetIdx, store, seeds,
+				AssembleOptions{N: 4, InvalidSeedAction: InvalidSeedActionBailOut},
+			)
+			require.NoError(t, err)
+
+			// Verify the output file matches the expected content.
+			output, err := os.ReadFile(targetPath)
+			require.NoError(t, err)
+			assert.Equal(t, int64(len(expected)), int64(len(output)), "output file size mismatch")
+			outSum := md5.Sum(output)
+			assert.Equal(t, expectedSum, outSum, "output checksum mismatch")
+
+			// Verify that chunks were sourced from the expected places.
+			// This catches planner bugs where the output is correct but
+			// chunks were fetched from the wrong source (e.g. store
+			// instead of in-place copy).
+			assert.Equal(t, len(sc.targetIndices), stats.ChunksTotal, "ChunksTotal")
+			assert.Equal(t, sc.wantInPlace, stats.ChunksInPlace, "ChunksInPlace")
+			assert.Equal(t, sc.wantFromSeeds, stats.ChunksFromSeeds, "ChunksFromSeeds")
+			assert.Equal(t, sc.wantFromStore, stats.ChunksFromStore, "ChunksFromStore")
+		})
+	}
+}
+
 func readCaibxFile(t *testing.T, indexLocation string) (idx Index) {
 	is, err := NewLocalIndexStore(filepath.Dir(indexLocation))
 	require.NoError(t, err)

From 292f30e66cc926352e7f958f059e601fc59ed4ad Mon Sep 17 00:00:00 2001
From: folbrich <frank.olbricht@gmail.com>
Date: Fri, 20 Mar 2026 17:35:57 +0100
Subject: [PATCH 08/11] use binary search

---
 assemble-plan.go | 41 ++++++++++++++++++++++++++++++++++-------
 1 file changed, 34 insertions(+), 7 deletions(-)

diff --git a/assemble-plan.go b/assemble-plan.go
index ab60201..ce142d8 100644
--- a/assemble-plan.go
+++ b/assemble-plan.go
@@ -5,6 +5,7 @@ import (
 	"fmt"
 	"os"
 	"slices"
+	"sort"
 	"sync"
 
 	"golang.org/x/sync/errgroup"
@@ -549,9 +550,29 @@ func (p *AssemblePlan) generateInPlace(seed *InPlaceSeed) {
 	// overlaps move j's destination (i must read before j writes).
 	n := len(moves)
 	succ := make([][]int, n)
+
+	// Build a sorted index of moves by destination start for O(n log n) overlap search.
+	sortedByDst := make([]int, n)
+	for i := range sortedByDst {
+		sortedByDst[i] = i
+	}
+	sort.Slice(sortedByDst, func(a, b int) bool {
+		return moves[sortedByDst[a]].dstStart < moves[sortedByDst[b]].dstStart
+	})
+
 	for i := range moves {
-		for j := range moves {
-			if i != j && overlaps(moves[i].srcStart, moves[i].srcSize, moves[j].dstStart, moves[j].dstSize) {
+		srcEnd := moves[i].srcStart + moves[i].srcSize
+		// First move whose dstStart+dstSize > srcStart
+		lo := sort.Search(len(sortedByDst), func(k int) bool {
+			m := moves[sortedByDst[k]]
+			return m.dstStart+m.dstSize > moves[i].srcStart
+		})
+		for k := lo; k < len(sortedByDst); k++ {
+			j := sortedByDst[k]
+			if moves[j].dstStart >= srcEnd {
+				break
+			}
+			if i != j {
 				succ[i] = append(succ[i], j)
 			}
 		}
@@ -707,14 +728,20 @@ func (p *AssemblePlan) generateInPlace(seed *InPlaceSeed) {
 	// Non-nil positions at this point are all in-place sources whose
 	// ordering is already handled by inPlaceDeps above.
 	for _, m := range moves {
+		srcEnd := m.srcStart + m.srcSize
 		pl := p.placements[m.targetIdx]
-		for j, c := range p.idx.Chunks {
-			if p.placements[j] != nil {
-				continue // in-place source ordering handled by inPlaceDeps
+		// Binary search for first chunk where Start+Size > srcStart.
+		lo := sort.Search(len(p.idx.Chunks), func(j int) bool {
+			return p.idx.Chunks[j].Start+p.idx.Chunks[j].Size > m.srcStart
+		})
+		for j := lo; j < len(p.idx.Chunks); j++ {
+			if p.idx.Chunks[j].Start >= srcEnd {
+				break
 			}
-			if overlaps(m.srcStart, m.srcSize, c.Start, c.Size) {
-				p.inPlaceReads[j] = pl
+			if p.placements[j] != nil {
+				continue
 			}
+			p.inPlaceReads[j] = pl
 		}
 	}
 }

From 9825a6abd36928add1e4ffa37351479fe1f261d0 Mon Sep 17 00:00:00 2001
From: folbrich <frank.olbricht@gmail.com>
Date: Fri, 20 Mar 2026 18:04:06 +0100
Subject: [PATCH 09/11] cleanup

---
 assemble-plan.go      | 9 ---------
 cmd/desync/extract.go | 7 ++++---
 2 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/assemble-plan.go b/assemble-plan.go
index ce142d8..065ca6d 100644
--- a/assemble-plan.go
+++ b/assemble-plan.go
@@ -746,15 +746,6 @@ func (p *AssemblePlan) generateInPlace(seed *InPlaceSeed) {
 	}
 }
 
-// overlaps returns true if byte ranges [aStart, aStart+aSize) and
-// [bStart, bStart+bSize) overlap.
-func overlaps(aStart, aSize, bStart, bSize uint64) bool {
-	if aSize == 0 || bSize == 0 {
-		return false
-	}
-	return aStart < bStart+bSize && bStart < aStart+aSize
-}
-
 // tarjanSCC finds all strongly connected components of a directed graph.
 // adj[v] lists the successors of node v. Returns SCCs in reverse
 // topological order (sinks first).
diff --git a/cmd/desync/extract.go b/cmd/desync/extract.go
index ca06657..ae0fff0 100644
--- a/cmd/desync/extract.go
+++ b/cmd/desync/extract.go
@@ -45,9 +45,10 @@ the index from STDIN. If a seed is invalid, by default the extract operation wil
 aborted. With --skip-invalid-seeds, the invalid seeds will be discarded and the
 extraction will continue without them. Otherwise with --regenerate-invalid-seeds,
 any invalid seed indexes will be regenerated, in memory, by using the
-available data, and neither data nor indexes will be changed on disk. Also, if the seed changes
-while processing, its invalid chunks will be taken from the self seed, or the store, instead
-of aborting.`,
+available data, and neither data nor indexes will be changed on disk.
+Seeds are validated once before extraction begins. If a seed file is modified while
+the extraction is running, it could result in a corrupted target file. In such cases,
+use the verify-index command to check the integrity of the target afterwards.`,
 		Example: `  desync extract -s http://192.168.1.1/ -c /path/to/local file.caibx largefile.bin
   desync extract -s /mnt/store -s /tmp/other/store file.tar.caibx file.tar
   desync extract -s /mnt/store --seed /mnt/v1.caibx v2.caibx v2.vmdk

From c3e9dd9cc8050445563eb8310168001f45b99f39 Mon Sep 17 00:00:00 2001
From: folbrich <frank.olbricht@gmail.com>
Date: Fri, 20 Mar 2026 20:19:02 +0100
Subject: [PATCH 10/11] simplify

---
 assemble-fileseed.go | 34 ++------------------
 assemble-plan.go     | 76 ++++++++++++++++++++++++--------------------
 assemble-selfseed.go | 26 +--------------
 seed.go              | 27 ++++++++++++++++
 4 files changed, 73 insertions(+), 90 deletions(-)

diff --git a/assemble-fileseed.go b/assemble-fileseed.go
index 5ace80f..8ed0f57 100644
--- a/assemble-fileseed.go
+++ b/assemble-fileseed.go
@@ -58,7 +58,7 @@ func (s *FileSeed) LongestMatchFrom(chunks []IndexChunk, startPos int) (uint64,
 		limit = 100
 	}
 	for _, p := range pos {
-		seedPos, n := s.maxMatchFrom(chunks[startPos:], p, limit)
+		seedPos, n := maxMatchFrom(chunks[startPos:], s.index.Chunks, p, limit)
 		if n > maxLen {
 			bestSeedPos = seedPos
 			maxLen = n
@@ -108,33 +108,6 @@ func (s *FileSeed) RegenerateIndex(ctx context.Context, n int, attempt int, seed
 	return nil
 }
 
-// maxMatchFrom compares chunks from position 0 with seed chunks starting at p.
-// Returns (p, count) where p is the seed start and count is the number of
-// matching chunks. A "limit" value of zero means that there is no limit.
-func (s *FileSeed) maxMatchFrom(chunks []IndexChunk, p int, limit int) (int, int) {
-	if len(chunks) == 0 {
-		return 0, 0
-	}
-	var (
-		sp int
-		dp = p
-	)
-	for {
-		if limit != 0 && sp == limit {
-			break
-		}
-		if dp >= len(s.index.Chunks) || sp >= len(chunks) {
-			break
-		}
-		if chunks[sp].ID != s.index.Chunks[dp].ID {
-			break
-		}
-		dp++
-		sp++
-	}
-	return p, dp - p
-}
-
 type fileSeedSegment struct {
 	file           string
 	chunks         []IndexChunk
@@ -244,7 +217,6 @@ func (s *fileSeedSegment) clone(dst, src *os.File, srcOffset, srcLength, dstOffs
 type fileSeedSource struct {
 	segment SeedSegment
 	seed    Seed
-	srcFile string
 	offset  uint64
 	length  uint64
 	isBlank bool
@@ -256,12 +228,12 @@ func (s *fileSeedSource) Execute(f *os.File) (copied uint64, cloned uint64, err
 }
 
 func (s *fileSeedSource) Seed() Seed   { return s.seed }
-func (s *fileSeedSource) File() string { return s.srcFile }
+func (s *fileSeedSource) File() string { return s.segment.FileName() }
 
 func (s *fileSeedSource) Validate(file *os.File) error {
 	return s.segment.Validate(file)
 }
 
 func (s *fileSeedSource) String() string {
-	return fmt.Sprintf("FileSeed(%s): Copy to [%d:%d]", s.srcFile, s.offset, s.offset+s.length)
+	return fmt.Sprintf("FileSeed(%s): Copy to [%d:%d]", s.segment.FileName(), s.offset, s.offset+s.length)
 }
diff --git a/assemble-plan.go b/assemble-plan.go
index 065ca6d..8a559eb 100644
--- a/assemble-plan.go
+++ b/assemble-plan.go
@@ -365,7 +365,6 @@ func (p *AssemblePlan) generate() error {
 			pl.source = &fileSeedSource{
 				segment: segment,
 				seed:    seed,
-				srcFile: segment.FileName(),
 				offset:  offset,
 				length:  length,
 				isBlank: p.targetIsBlank,
@@ -421,24 +420,24 @@ func (p *AssemblePlan) Steps() []*PlanStep {
 			stepsPerPlacement[pl].addDependency(stepsPerPlacement[p.placements[i]])
 			stepsPerPlacement[p.placements[i]].addDependent(stepsPerPlacement[pl])
 		}
+	}
 
-		// Link in-place read dependencies: if a subsequent step (store
-		// copy, file seed) writes to a byte range that an in-place
-		// copy needs to read, the in-place copy must execute first.
-		for i, inPlaceRead := range p.inPlaceReads {
-			if inPlaceRead == nil {
-				continue
-			}
-			target := p.placements[i]
-			if target == inPlaceRead {
-				continue
-			}
-			ipStep := stepsPerPlacement[inPlaceRead]
-			step := stepsPerPlacement[target]
-			if step != ipStep {
-				step.addDependency(ipStep)
-				ipStep.addDependent(step)
-			}
+	// Link in-place read dependencies: if a subsequent step (store
+	// copy, file seed) writes to a byte range that an in-place
+	// copy needs to read, the in-place copy must execute first.
+	for i, inPlaceRead := range p.inPlaceReads {
+		if inPlaceRead == nil {
+			continue
+		}
+		target := p.placements[i]
+		if target == inPlaceRead {
+			continue
+		}
+		ipStep := stepsPerPlacement[inPlaceRead]
+		step := stepsPerPlacement[target]
+		if step != ipStep {
+			step.addDependency(ipStep)
+			ipStep.addDependent(step)
 		}
 	}
 
@@ -582,23 +581,32 @@ func (p *AssemblePlan) generateInPlace(seed *InPlaceSeed) {
 	sccs := tarjanSCC(n, succ)
 	slices.Reverse(sccs) // topological order
 
-	// Stable-sort independent SCCs by minimum target index so the
-	// output order is deterministic and follows the target layout.
-	slices.SortStableFunc(sccs, func(a, b []int) int {
-		minA := moves[a[0]].targetIdx
-		for _, i := range a[1:] {
-			if moves[i].targetIdx < minA {
-				minA = moves[i].targetIdx
-			}
-		}
-		minB := moves[b[0]].targetIdx
-		for _, i := range b[1:] {
-			if moves[i].targetIdx < minB {
-				minB = moves[i].targetIdx
+	// Pre-compute minimum target index per SCC for deterministic sorting.
+	sccMin := make([]int, len(sccs))
+	for si, scc := range sccs {
+		m := moves[scc[0]].targetIdx
+		for _, i := range scc[1:] {
+			if moves[i].targetIdx < m {
+				m = moves[i].targetIdx
 			}
 		}
-		return minA - minB
+		sccMin[si] = m
+	}
+
+	// Stable-sort independent SCCs by minimum target index so the
+	// output order is deterministic and follows the target layout.
+	indices := make([]int, len(sccs))
+	for i := range indices {
+		indices[i] = i
+	}
+	slices.SortStableFunc(indices, func(a, b int) int {
+		return sccMin[a] - sccMin[b]
 	})
+	sorted := make([][]int, len(sccs))
+	for i, idx := range indices {
+		sorted[i] = sccs[idx]
+	}
+	sccs = sorted
 
 	for _, scc := range sccs {
 		if len(scc) == 1 {
@@ -625,8 +633,8 @@ func (p *AssemblePlan) generateInPlace(seed *InPlaceSeed) {
 
 		// Remove bufIdx's outgoing edges and topologically sort the
 		// remaining cycle members.
-		localSucc := make([][]int, n)
-		localInDeg := make(map[int]int)
+		localSucc := make(map[int][]int, len(scc))
+		localInDeg := make(map[int]int, len(scc))
 		for _, i := range scc {
 			if i == bufIdx {
 				continue // exclude buffer-break from topo sort
diff --git a/assemble-selfseed.go b/assemble-selfseed.go
index a51a1e9..ccb3e92 100644
--- a/assemble-selfseed.go
+++ b/assemble-selfseed.go
@@ -79,7 +79,7 @@ func (s *selfSeed) LongestMatchFrom(chunks []IndexChunk, startPos int) (uint64,
 		if p <= startPos {
 			continue
 		}
-		start, n := s.maxMatchFrom(chunks[startPos:], p, limit)
+		start, n := maxMatchFrom(chunks[startPos:], s.index.Chunks, p, limit)
 		// Clamp to prevent source [p, p+n) overlapping destination [startPos, startPos+n)
 		if max := p - startPos; n > max {
 			n = max
@@ -101,30 +101,6 @@ func (s *selfSeed) LongestMatchFrom(chunks []IndexChunk, startPos int) (uint64,
 	return byteOffset, byteLength, maxStart, maxLen
 }
 
-func (s *selfSeed) maxMatchFrom(chunks []IndexChunk, p int, limit int) (int, int) {
-	if len(chunks) == 0 {
-		return 0, 0
-	}
-	var (
-		sp int
-		dp = p
-	)
-	for {
-		if limit != 0 && sp == limit {
-			break
-		}
-		if dp >= len(s.index.Chunks) || sp >= len(chunks) {
-			break
-		}
-		if chunks[sp].ID != s.index.Chunks[dp].ID {
-			break
-		}
-		dp++
-		sp++
-	}
-	return p, dp - p
-}
-
 func (s *selfSeed) GetSegment(srcOffset, dstOffset, size uint64) *selfSeedSegment {
 	return &selfSeedSegment{
 		seed:      s,
diff --git a/seed.go b/seed.go
index 8a1550d..1e8f08a 100644
--- a/seed.go
+++ b/seed.go
@@ -21,6 +21,33 @@ type Seed interface {
 	RegenerateIndex(ctx context.Context, n int, attempt int, seedNumber int) error
 }
 
+// maxMatchFrom compares chunks starting at position 0 with seedChunks starting
+// at position p. Returns (p, count) where count is the number of consecutive
+// matching chunks. A limit of zero means no limit.
+func maxMatchFrom(chunks, seedChunks []IndexChunk, p, limit int) (int, int) {
+	if len(chunks) == 0 {
+		return 0, 0
+	}
+	var (
+		sp int
+		dp = p
+	)
+	for {
+		if limit != 0 && sp == limit {
+			break
+		}
+		if dp >= len(seedChunks) || sp >= len(chunks) {
+			break
+		}
+		if chunks[sp].ID != seedChunks[dp].ID {
+			break
+		}
+		dp++
+		sp++
+	}
+	return p, dp - p
+}
+
 // SeedSegment represents a matching range between a Seed and a file being
 // assembled from an Index. It's used to copy or reflink data from seeds into
 // a target file during an extract operation.

From 8afe60728412397ad82b84e8c4c82de42d3f5a6b Mon Sep 17 00:00:00 2001
From: folbrich <frank.olbricht@gmail.com>
Date: Mon, 23 Mar 2026 07:05:43 -0700
Subject: [PATCH 11/11] Split inPlace scan from inPlace scan with seed

---
 assemble-plan.go      | 169 +++++++++++++++++++++++-------------------
 assemble-plan_test.go |  26 +++++++
 assemble-skip.go      |  32 ++++++++
 assemble.go           |   4 +-
 4 files changed, 152 insertions(+), 79 deletions(-)

diff --git a/assemble-plan.go b/assemble-plan.go
index 8a559eb..16a9bf9 100644
--- a/assemble-plan.go
+++ b/assemble-plan.go
@@ -207,69 +207,25 @@ func (p *AssemblePlan) Validate() error {
 }
 
 func (p *AssemblePlan) generate() error {
-	// Mark chunks that are already correct in the target file so they can
-	// be skipped during assembly.
-	if !p.targetIsBlank {
-		f, err := os.Open(p.target)
-		if err == nil {
-			var g errgroup.Group
-			g.SetLimit(p.concurrency)
-			for i, chunk := range p.idx.Chunks {
-				g.Go(func() error {
-					b := make([]byte, chunk.Size)
-					if _, err := f.ReadAt(b, int64(chunk.Start)); err != nil {
-						return nil
-					}
-					if Digest.Sum(b) == chunk.ID {
-						p.placements[i] = &placement{source: &skipInPlace{
-							start: chunk.Start,
-							end:   chunk.Start + chunk.Size,
-						}}
-					}
-					return nil
-				})
-			}
-			g.Wait()
-			f.Close()
-
-			// Merge consecutive in-place chunks into a single placement
-			// so that Steps() produces one step per run instead of one
-			// per chunk. This works because Steps() deduplicates by
-			// pointer identity.
-			var run *placement
-			for i, pl := range p.placements {
-				if pl == nil {
-					run = nil
-					continue
-				}
-				if _, ok := pl.source.(*skipInPlace); !ok {
-					run = nil
-					continue
-				}
-				if run == nil {
-					run = pl
-					continue
-				}
-				// Extend the existing run and share the pointer
-				run.source.(*skipInPlace).end = p.idx.Chunks[i].Start + p.idx.Chunks[i].Size
-				p.placements[i] = run
-			}
+	// Find the in-place seed, if any. There can only be one.
+	var inPlaceSeed *InPlaceSeed
+	for _, seed := range p.seeds {
+		if ips, ok := seed.(*InPlaceSeed); ok {
+			inPlaceSeed = ips
+			break
 		}
 	}
 
-	// If we have an in-place seed, use it to find matches in the file
-	// before anything gets overwritten by subsequent steps. We schedule
-	// steps that re-arrange chunks that already exist in other places in
-	// the target file before they get overwritten by subsequent steps like
-	// copying from other seeds or the store.
-	for _, seed := range p.seeds {
-		inPlaceSeed, ok := seed.(*InPlaceSeed)
-		if !ok {
-			continue
+	// When the target file already exists, mark chunks that are already
+	// correct so they can be skipped during assembly. If we have an
+	// in-place seed, its index tells us what's already in place without
+	// any file I/O. Otherwise fall back to reading and hashing each chunk.
+	if !p.targetIsBlank {
+		if inPlaceSeed != nil {
+			p.generateInPlace(inPlaceSeed)
+		} else {
+			p.generateSkips()
 		}
-
-		p.generateInPlace(inPlaceSeed)
-		break // There can only be one in-place seed
 	}
 
 	// Find all matches in file itself as they're written. As it's
@@ -479,9 +435,64 @@ func (p *AssemblePlan) Steps() []*PlanStep {
 	return steps
 }
 
-// generateInPlace processes an in-place seed to find chunks that exist at
-// different offsets in the file and creates placements that rearrange them.
-// It handles dependency cycles using Tarjan's SCC algorithm.
+// generateSkips reads the target file and marks chunks that are already in
+// the correct position so they can be skipped during assembly. Consecutive
+// matching chunks are merged into a single placement for efficiency.
+func (p *AssemblePlan) generateSkips() {
+	f, err := os.Open(p.target)
+	if err != nil {
+		return
+	}
+
+	var g errgroup.Group
+	g.SetLimit(p.concurrency)
+	for i, chunk := range p.idx.Chunks {
+		g.Go(func() error {
+			b := make([]byte, chunk.Size)
+			if _, err := f.ReadAt(b, int64(chunk.Start)); err != nil {
+				return nil
+			}
+			if Digest.Sum(b) == chunk.ID {
+				p.placements[i] = &placement{source: &skipInPlace{
+					start: chunk.Start,
+					end:   chunk.Start + chunk.Size,
+				}}
+			}
+			return nil
+		})
+	}
+	g.Wait()
+	f.Close()
+
+	// Merge consecutive in-place chunks into a single placement
+	// so that Steps() produces one step per run instead of one
+	// per chunk. This works because Steps() deduplicates by
+	// pointer identity.
+	var run *placement
+	for i, pl := range p.placements {
+		if pl == nil {
+			run = nil
+			continue
+		}
+		if _, ok := pl.source.(*skipInPlace); !ok {
+			run = nil
+			continue
+		}
+		if run == nil {
+			run = pl
+			continue
+		}
+		// Extend the existing run and share the pointer
+		run.source.(*skipInPlace).end = p.idx.Chunks[i].Start + p.idx.Chunks[i].Size
+		p.placements[i] = run
+	}
+}
+
+// generateInPlace processes an in-place seed to classify each target chunk.
+// Chunks already at the correct position get skipInPlace placements (detected
+// by comparing the seed and target indexes, with no file I/O). Chunks that
+// exist at different offsets get inPlaceCopy placements, with dependency
+// cycles resolved using Tarjan's SCC algorithm.
 func (p *AssemblePlan) generateInPlace(seed *InPlaceSeed) {
 	// Stage 1: Source mapping — index every chunk in the seed by its
 	// ChunkID, recording all byte ranges where it appears.
@@ -501,28 +512,34 @@ func (p *AssemblePlan) generateInPlace(seed *InPlaceSeed) {
 	}
 	var moves []moveOp
 
-	// Collect skip placements from the initial scan that correspond to
-	// seed chunks into inPlaceOrder so they precede other sources in
-	// the step list.
-	skipSeen := make(map[*placement]bool)
+	// Create inPlaceSeedSkip placements for chunks that are already at
+	// the correct position. A chunk is "in place" when its ChunkID
+	// appears in the seed index at the same byte offset and size as in
+	// the target index. This is a pure index comparison — no file I/O.
+	// Unlike skipInPlace (created by generateSkips after hashing),
+	// these carry validation info so Validate() can verify the data.
 	for i, c := range p.idx.Chunks {
-		pl := p.placements[i]
-		if pl == nil || skipSeen[pl] {
-			continue
-		}
-		if _, ok := pl.source.(*skipInPlace); !ok {
+		sources, ok := srcOf[c.ID]
+		if !ok {
 			continue
 		}
-		if _, ok := srcOf[c.ID]; !ok {
-			continue
+		for _, src := range sources {
+			if src.start == c.Start && src.size == c.Size {
+				pl := &placement{source: &inPlaceSeedSkip{
+					chunk: c,
+					seed:  seed,
+					file:  p.target,
+				}}
+				p.placements[i] = pl
+				p.inPlaceOrder = append(p.inPlaceOrder, pl)
+				break
+			}
 		}
-		skipSeen[pl] = true
-		p.inPlaceOrder = append(p.inPlaceOrder, pl)
 	}
 
 	for i, c := range p.idx.Chunks {
 		if p.placements[i] != nil {
-			continue // Already placed (e.g. skipInPlace from initial scan)
+			continue // Already placed (skipInPlace or other)
 		}
 
 		sources := srcOf[c.ID]
diff --git a/assemble-plan_test.go b/assemble-plan_test.go
index bc87b07..6107951 100644
--- a/assemble-plan_test.go
+++ b/assemble-plan_test.go
@@ -484,6 +484,32 @@ func TestFileSeedValidation(t *testing.T) {
 		require.Equal(t, []Seed{seed}, seedErr.Seeds)
 	})
 
+	t.Run("invalid in-place seed skip", func(t *testing.T) {
+		// Create a file with two chunks where the first is already at the
+		// correct position (will be an inPlaceSeedSkip). After plan
+		// creation, corrupt the file and verify Validate catches it.
+		f := filepath.Join(t.TempDir(), "target")
+		err := os.WriteFile(f, append(data1, data2...), 0644)
+		require.NoError(t, err)
+
+		inPlace, err := NewInPlaceSeed(f, seedIndex)
+		require.NoError(t, err)
+
+		plan, err := NewPlan(f, targetIndex, nil,
+			PlanWithSeeds([]Seed{inPlace}), PlanWithTargetIsBlank(false))
+		require.NoError(t, err)
+		defer plan.Close()
+
+		// Corrupt the target file after the plan was created
+		require.NoError(t, os.WriteFile(f, make([]byte, 200), 0644))
+
+		err = plan.Validate()
+		require.Error(t, err)
+
+		var seedErr SeedInvalid
+		require.ErrorAs(t, err, &seedErr)
+	})
+
 	t.Run("null seed skipped", func(t *testing.T) {
 		// Create a null chunk index — data is all zeros
 		nullData := make([]byte, 100)
diff --git a/assemble-skip.go b/assemble-skip.go
index 52cdb0a..5e9e394 100644
--- a/assemble-skip.go
+++ b/assemble-skip.go
@@ -18,3 +18,35 @@ func (s *skipInPlace) Execute(f *os.File) (copied uint64, cloned uint64, err err
 func (s *skipInPlace) String() string {
 	return fmt.Sprintf("InPlace: Skip [%d:%d]", s.start, s.end)
 }
+
+// inPlaceSeedSkip skips a chunk that is already in the correct position
+// according to an in-place seed's index. Unlike skipInPlace (which is
+// created after hashing the data), this is based on index comparison
+// and carries validation info so Validate() can verify the data.
+type inPlaceSeedSkip struct {
+	chunk IndexChunk
+	seed  Seed
+	file  string
+}
+
+func (s *inPlaceSeedSkip) Execute(f *os.File) (uint64, uint64, error) {
+	return 0, 0, nil
+}
+
+func (s *inPlaceSeedSkip) String() string {
+	return fmt.Sprintf("InPlace: Skip [%d:%d]", s.chunk.Start, s.chunk.Start+s.chunk.Size)
+}
+
+func (s *inPlaceSeedSkip) Seed() Seed   { return s.seed }
+func (s *inPlaceSeedSkip) File() string { return s.file }
+
+func (s *inPlaceSeedSkip) Validate(file *os.File) error {
+	b := make([]byte, s.chunk.Size)
+	if _, err := file.ReadAt(b, int64(s.chunk.Start)); err != nil {
+		return err
+	}
+	if Digest.Sum(b) != s.chunk.ID {
+		return fmt.Errorf("in-place seed index for %s doesn't match its data", s.file)
+	}
+	return nil
+}
diff --git a/assemble.go b/assemble.go
index 341a3ee..d053783 100644
--- a/assemble.go
+++ b/assemble.go
@@ -212,9 +212,7 @@ retry:
 					switch step.source.(type) {
 					case *copyFromStore:
 						stats.incChunksFromStore()
-					case *skipInPlace:
-						stats.addChunksInPlace(uint64(step.numChunks))
-					case *inPlaceCopy:
+					case *skipInPlace, *inPlaceSeedSkip, *inPlaceCopy:
 						stats.addChunksInPlace(uint64(step.numChunks))
 					case *fileSeedSource, *selfSeedSegment:
 						stats.addChunksFromSeed(uint64(step.numChunks))