folbricht · folbricht · Mar 14, 2026 · Mar 14, 2026 · Mar 18, 2026 · Mar 18, 2026
diff --git a/fileseed.go → assemble-fileseed.go b/fileseed.go → assemble-fileseed.go
@@ -5,7 +5,7 @@ import (
 	"fmt"
 	"io"
 	"os"
-	"sync"
+	"sort"
 )
 
 // FileSeed is used to copy or clone blocks from an existing index+blob during
@@ -15,48 +15,40 @@ type FileSeed struct {
 	index      Index
 	pos        map[ChunkID][]int
 	canReflink bool
-	isInvalid  bool
-	mu         sync.RWMutex
 }
 
 // NewIndexSeed initializes a new seed that uses an existing index and its blob
-func NewIndexSeed(dstFile string, srcFile string, index Index) (*FileSeed, error) {
+func NewFileSeed(dstFile string, srcFile string, index Index) (*FileSeed, error) {
 	s := FileSeed{
 		srcFile:    srcFile,
 		pos:        make(map[ChunkID][]int),
 		index:      index,
 		canReflink: CanClone(dstFile, srcFile),
-		isInvalid:  false,
 	}
 	for i, c := range s.index.Chunks {
 		s.pos[c.ID] = append(s.pos[c.ID], i)
 	}
 	return &s, nil
 }
 
-// LongestMatchWith returns the longest sequence of chunks anywhere in Source
-// that match `chunks` starting at chunks[0], limiting the maximum number of chunks
-// if reflinks are not supported. If there is no match, it returns a length of zero
-// and a nil SeedSegment.
-func (s *FileSeed) LongestMatchWith(chunks []IndexChunk) (int, SeedSegment) {
-	s.mu.RLock()
-	isInvalid := s.isInvalid
-	s.mu.RUnlock()
-
-	// isInvalid can be concurrently read or written. Use a mutex to avoid a race
-	if len(chunks) == 0 || len(s.index.Chunks) == 0 || isInvalid {
-		return 0, nil
+// LongestMatchFrom returns the longest sequence of chunks anywhere in the seed
+// that match chunks starting at chunks[startPos]. It returns the byte offset
+// and byte length of the match in the seed, plus the chunk offset and chunk
+// length. Returns (0, 0, 0, 0) if there is no match.
+func (s *FileSeed) LongestMatchFrom(chunks []IndexChunk, startPos int) (uint64, uint64, int, int) {
+	if startPos >= len(chunks) || len(s.index.Chunks) == 0 {
+		return 0, 0, 0, 0
 	}
-	pos, ok := s.pos[chunks[0].ID]
+	pos, ok := s.pos[chunks[startPos].ID]
 	if !ok {
-		return 0, nil
+		return 0, 0, 0, 0
 	}
-	// From every position of chunks[0] in the source, find a slice of
-	// matching chunks. Then return the longest of those slices.
+	// From every position of chunks[startPos] in the source, find a run of
+	// matching chunks. Then return the longest of those runs.
 	var (
-		match []IndexChunk
-		max   int
-		limit int
+		bestSeedPos int
+		maxLen      int
+		limit       int
 	)
 	if !s.canReflink {
 		// Limit the maximum number of chunks, in a single sequence, to avoid
@@ -66,16 +58,37 @@ func (s *FileSeed) LongestMatchWith(chunks []IndexChunk) (int, SeedSegment) {
 		limit = 100
 	}
 	for _, p := range pos {
-		m := s.maxMatchFrom(chunks, p, limit)
-		if len(m) > max {
-			match = m
-			max = len(m)
+		seedPos, n := maxMatchFrom(chunks[startPos:], s.index.Chunks, p, limit)
+		if n > maxLen {
+			bestSeedPos = seedPos
+			maxLen = n
 		}
-		if limit != 0 && limit == max {
+		if limit != 0 && limit == maxLen {
 			break
 		}
 	}
-	return max, newFileSeedSegment(s.srcFile, match, s.canReflink)
+	if maxLen == 0 {
+		return 0, 0, 0, 0
+	}
+	byteOffset := s.index.Chunks[bestSeedPos].Start
+	last := s.index.Chunks[bestSeedPos+maxLen-1]
+	byteLength := last.Start + last.Size - byteOffset
+	return byteOffset, byteLength, bestSeedPos, maxLen
+}
+
+// GetSegment constructs a SeedSegment for a matched range identified by its
+// byte offset and size in the seed.
+func (s *FileSeed) GetSegment(offset, size uint64) SeedSegment {
+	i := sort.Search(len(s.index.Chunks), func(j int) bool {
+		return s.index.Chunks[j].Start >= offset
+	})
+	var covered uint64
+	end := i
+	for end < len(s.index.Chunks) && covered < size {
+		covered += s.index.Chunks[end].Size
+		end++
+	}
+	return newFileSeedSegment(s.srcFile, s.index.Chunks[i:end], s.canReflink)
 }
 
 func (s *FileSeed) RegenerateIndex(ctx context.Context, n int, attempt int, seedNumber int) error {
@@ -87,7 +100,6 @@ func (s *FileSeed) RegenerateIndex(ctx context.Context, n int, attempt int, seed
 	}
 
 	s.index = index
-	s.SetInvalid(false)
 	s.pos = make(map[ChunkID][]int, len(s.index.Chunks))
 	for i, c := range s.index.Chunks {
 		s.pos[c.ID] = append(s.pos[c.ID], i)
@@ -96,44 +108,6 @@ func (s *FileSeed) RegenerateIndex(ctx context.Context, n int, attempt int, seed
 	return nil
 }
 
-func (s *FileSeed) SetInvalid(value bool) {
-	s.mu.Lock()
-	defer s.mu.Unlock()
-	s.isInvalid = value
-}
-
-func (s *FileSeed) IsInvalid() bool {
-	s.mu.Lock()
-	defer s.mu.Unlock()
-	return s.isInvalid
-}
-
-// Returns a slice of chunks from the seed. Compares chunks from position 0
-// with seed chunks starting at p. A "limit" value of zero means that there is no limit.
-func (s *FileSeed) maxMatchFrom(chunks []IndexChunk, p int, limit int) []IndexChunk {
-	if len(chunks) == 0 {
-		return nil
-	}
-	var (
-		sp int
-		dp = p
-	)
-	for {
-		if limit != 0 && sp == limit {
-			break
-		}
-		if dp >= len(s.index.Chunks) || sp >= len(chunks) {
-			break
-		}
-		if chunks[sp].ID != s.index.Chunks[dp].ID {
-			break
-		}
-		dp++
-		sp++
-	}
-	return s.index.Chunks[p:dp]
-}
-
 type fileSeedSegment struct {
 	file           string
 	chunks         []IndexChunk
@@ -239,3 +213,27 @@ func (s *fileSeedSegment) clone(dst, src *os.File, srcOffset, srcLength, dstOffs
 	// close the aligned blocks
 	return copied, alignLength, CloneRange(dst, src, srcAlignStart, alignLength, dstAlignStart)
 }
+
+type fileSeedSource struct {
+	segment SeedSegment
+	seed    Seed
+	offset  uint64
+	length  uint64
+	isBlank bool
+}
+
+func (s *fileSeedSource) Execute(f *os.File) (copied uint64, cloned uint64, err error) {
+	blocksize := blocksizeOfFile(f.Name())
+	return s.segment.WriteInto(f, s.offset, s.length, blocksize, s.isBlank)
+}
+
+func (s *fileSeedSource) Seed() Seed   { return s.seed }
+func (s *fileSeedSource) File() string { return s.segment.FileName() }
+
+func (s *fileSeedSource) Validate(file *os.File) error {
+	return s.segment.Validate(file)
+}
+
+func (s *fileSeedSource) String() string {
+	return fmt.Sprintf("FileSeed(%s): Copy to [%d:%d]", s.segment.FileName(), s.offset, s.offset+s.length)
+}
diff --git a/assemble-inplacecopy.go b/assemble-inplacecopy.go
@@ -0,0 +1,57 @@
+package desync
+
+import (
+	"fmt"
+	"os"
+)
+
+// inPlaceCopy copies a chunk from one position to another within the same file.
+// It uses ReadAt/WriteAt (pread/pwrite) which are position-independent and safe
+// for concurrent use on the same file handle.
+type inPlaceCopy struct {
+	srcOffset uint64
+	srcSize   uint64
+	dstOffset uint64
+	dstSize   uint64
+
+	// Cycle-breaking: the first mover in a cycle pre-reads the buffered
+	// operation's source before executing its own copy.
+	preBuffers []*inPlaceCopy // targets whose writeBuf to populate before own copy
+	writeBuf   []byte         // non-nil → write from this buffer, skip file read
+}
+
+func (s *inPlaceCopy) Execute(f *os.File) (copied uint64, cloned uint64, err error) {
+	// Step 1: Pre-read sources for cycle-broken chunks before our own copy
+	// overwrites their data.
+	for _, pb := range s.preBuffers {
+		pb.writeBuf = make([]byte, pb.srcSize)
+		if _, err := f.ReadAt(pb.writeBuf, int64(pb.srcOffset)); err != nil {
+			return 0, 0, fmt.Errorf("inPlaceCopy pre-buffer read at %d: %w", pb.srcOffset, err)
+		}
+	}
+
+	// Step 2: If this chunk was cycle-broken, write from the pre-read buffer.
+	if s.writeBuf != nil {
+		if _, err := f.WriteAt(s.writeBuf, int64(s.dstOffset)); err != nil {
+			return 0, 0, fmt.Errorf("inPlaceCopy buffer write at %d: %w", s.dstOffset, err)
+		}
+		return s.dstSize, 0, nil
+	}
+
+	// Step 3: Normal copy — read source into a temp buffer, then write to dest.
+	// Always buffer first to handle overlapping ranges safely.
+	buf := make([]byte, s.srcSize)
+	if _, err := f.ReadAt(buf, int64(s.srcOffset)); err != nil {
+		return 0, 0, fmt.Errorf("inPlaceCopy read at %d: %w", s.srcOffset, err)
+	}
+	if _, err := f.WriteAt(buf, int64(s.dstOffset)); err != nil {
+		return 0, 0, fmt.Errorf("inPlaceCopy write at %d: %w", s.dstOffset, err)
+	}
+	return s.dstSize, 0, nil
+}
+
+func (s *inPlaceCopy) String() string {
+	return fmt.Sprintf("InPlace: Copy [%d:%d] to [%d:%d]",
+		s.srcOffset, s.srcOffset+s.srcSize,
+		s.dstOffset, s.dstOffset+s.dstSize)
+}
diff --git a/assemble-inplaceseed.go b/assemble-inplaceseed.go
@@ -0,0 +1,18 @@
+package desync
+
+// InPlaceSeed is a FileSeed where the source and destination are the same file.
+// This makes the relationship explicit when desync extract is used with seeds
+// that resolve to the same path as the extraction target.
+type InPlaceSeed struct {
+	*FileSeed
+}
+
+// NewInPlaceSeed initializes a seed where the source and destination are the
+// same file. It passes the file path as both src and dst to NewFileSeed.
+func NewInPlaceSeed(file string, index Index) (*InPlaceSeed, error) {
+	fs, err := NewFileSeed(file, file, index)
+	if err != nil {
+		return nil, err
+	}
+	return &InPlaceSeed{FileSeed: fs}, nil
+}