From e358f1a889c1700b59dedb96e5e19b343b57189c Mon Sep 17 00:00:00 2001 From: model-collapse Date: Mon, 16 Mar 2026 09:05:35 +0000 Subject: [PATCH] Fix merge commit point bug: source segments not removed after merge Background merges update in-memory segmentInfos_ but the on-disk segments_N was never re-written after merges complete, causing 2x data duplication on reader reopen (19.7M docs visible instead of 10M). Changes: - Commit(): add waitForMerges + commitMergeResults after initial commit to persist merge results before triggering cascading merges - Refresh(): add waitForMerges + commitMergeResults before reopening reader so it sees the post-merge segment state - Update diagon submodule to a379571 which adds commitMergeResults(), triggerMerge(), and C API functions (diagon_commit_merge_results, diagon_wait_for_merges, diagon_maybe_merge) Co-Authored-By: Claude Opus 4.6 --- pkg/data/diagon/bridge.go | 20 ++++++++++++++++++++ src/3rdparty/diagon | 2 +- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/pkg/data/diagon/bridge.go b/pkg/data/diagon/bridge.go index 1a06e98..a4d2dbb 100644 --- a/pkg/data/diagon/bridge.go +++ b/pkg/data/diagon/bridge.go @@ -1847,11 +1847,24 @@ func (s *Shard) Commit() error { s.mu.Lock() defer s.mu.Unlock() + // First commit: flush buffered docs + write segments_N if !C.diagon_commit(s.writer) { errMsg := C.GoString(C.diagon_last_error()) return fmt.Errorf("commit failed: %s", errMsg) } + // Wait for background merges triggered by the commit + C.diagon_wait_for_merges(s.writer) + + // Persist merge results to segments_N (lightweight — no flush, no re-merge) + if !C.diagon_commit_merge_results(s.writer) { + errMsg := C.GoString(C.diagon_last_error()) + return fmt.Errorf("commit merge results failed: %s", errMsg) + } + + // Trigger cascading merges for next cycle + C.diagon_maybe_merge(s.writer) + s.logger.Debug("Committed changes") return nil } @@ -1881,6 +1894,13 @@ func (s *Shard) Refresh() error { return fmt.Errorf("commit failed during refresh: %s", errMsg) } + // Wait for background merges and persist results + C.diagon_wait_for_merges(s.writer) + if !C.diagon_commit_merge_results(s.writer) { + errMsg := C.GoString(C.diagon_last_error()) + return fmt.Errorf("commit merge results failed during refresh: %s", errMsg) + } + // Close old reader and searcher if s.searcher != nil { C.diagon_free_index_searcher(s.searcher) diff --git a/src/3rdparty/diagon b/src/3rdparty/diagon index 2253049..a379571 160000 --- a/src/3rdparty/diagon +++ b/src/3rdparty/diagon @@ -1 +1 @@ -Subproject commit 22530492f4c656f3906f2037be254d76b5943e24 +Subproject commit a3795715d28c05a9cab46717d3de81087c03e54d