From 60c8665f33beca1e87d5c9d5290b4d80588b075c Mon Sep 17 00:00:00 2001 From: githubzilla Date: Wed, 18 Mar 2026 22:59:38 +0800 Subject: [PATCH] fix: reorder BackFill operations to prevent dirty_data_key_count_ underflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BackFill() in TemplateCcMap called SetCkptTs() (which sets the flush bit) before SetCommitTsPayloadStatus() (which overwrites the entire commit_ts_and_status_ field, clearing the flush bit). This left backfilled entries dirty without a corresponding dirty-count increment. When those entries were later flushed by checkpoint, the decrement caused dirty_data_key_count_ to underflow, triggering the assertion in AdjustDataKeyStats. Fix: 1. template_cc_map.h BackFill(): call SetCommitTsPayloadStatus() before SetCkptTs() so the flush bit set by SetCkptTs() is preserved. Add OnCommittedUpdate() to reconcile dirty-key stats. 2. template_cc_map.h RemoteReadOutsideCc(): add OnCommittedUpdate() for consistency with other backfill paths. 3. cc_req_misc.cpp UpdateCceCkptTsCc::Execute(): relax 4 assertions that checked !IsPersistent() — BackFill can legitimately mark entries as persistent before the checkpoint callback fires. The existing was_dirty/OnEntryFlushed logic already handles this case correctly. --- tx_service/include/cc/template_cc_map.h | 15 +++++++++++++-- tx_service/src/cc/cc_req_misc.cpp | 12 ++++++++---- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/tx_service/include/cc/template_cc_map.h b/tx_service/include/cc/template_cc_map.h index 136b9b00..310a2e68 100644 --- a/tx_service/include/cc/template_cc_map.h +++ b/tx_service/include/cc/template_cc_map.h @@ -2290,6 +2290,7 @@ class TemplateCcMap : public CcMap // backfill version. cce->SetCkptTs(req.CommitTs()); OnFlushed(cce, was_dirty); + OnCommittedUpdate(cce, was_dirty); // Refill mvcc archives. if (shard_->EnableMvcc()) @@ -10042,8 +10043,6 @@ class TemplateCcMap : public CcMap const uint64_t cce_version = cce->CommitTs(); bool was_dirty = cce->IsDirty(); - cce->SetCkptTs(commit_ts); - OnFlushed(cce, was_dirty); if (cce_version < commit_ts) { @@ -10062,6 +10061,11 @@ class TemplateCcMap : public CcMap return false; } } + // Update the payload first, then mark as flushed via + // SetCkptTs. The previous order (SetCkptTs before + // SetCommitTsPayloadStatus) caused the flush bit to be + // set and then immediately cleared, leaving the entry + // dirty with no corresponding dirty-count increment. cce->SetCommitTsPayloadStatus(commit_ts, status); if (status == RecordStatus::Deleted) @@ -10086,6 +10090,13 @@ class TemplateCcMap : public CcMap payload->Deserialize(rec_str.c_str(), offset); cce->AddArchiveRecord(payload, status, commit_ts); } + // Mark the backfilled version as flushed and reconcile + // dirty-key stats. SetCkptTs must run after + // SetCommitTsPayloadStatus so that the flush bit it sets is + // not immediately cleared by the payload update. + cce->SetCkptTs(commit_ts); + OnFlushed(cce, was_dirty); + OnCommittedUpdate(cce, was_dirty); if (RangePartitioned && cce->entry_info_.DataStoreSize() == INT32_MAX) { cce->entry_info_.SetDataStoreSize( diff --git a/tx_service/src/cc/cc_req_misc.cpp b/tx_service/src/cc/cc_req_misc.cpp index 2d6dbf31..8a1f87bc 100644 --- a/tx_service/src/cc/cc_req_misc.cpp +++ b/tx_service/src/cc/cc_req_misc.cpp @@ -1164,7 +1164,7 @@ bool UpdateCceCkptTsCc::Execute(CcShard &ccs) VersionedLruEntry *v_entry = static_cast *>(ref.cce_); - assert(v_entry->CommitTs() > 1 && !v_entry->IsPersistent()); + assert(v_entry->CommitTs() > 1); bool was_dirty = v_entry->IsDirty(); v_entry->entry_info_.SetDataStoreSize(ref.post_flush_size_); @@ -1176,7 +1176,7 @@ bool UpdateCceCkptTsCc::Execute(CcShard &ccs) { VersionedLruEntry *v_entry = static_cast *>(ref.cce_); - assert(v_entry->CommitTs() > 1 && !v_entry->IsPersistent()); + assert(v_entry->CommitTs() > 1); bool was_dirty = v_entry->IsDirty(); v_entry->entry_info_.SetDataStoreSize(ref.post_flush_size_); @@ -1192,7 +1192,7 @@ bool UpdateCceCkptTsCc::Execute(CcShard &ccs) VersionedLruEntry *v_entry = static_cast *>(ref.cce_); - assert(v_entry->CommitTs() > 1 && !v_entry->IsPersistent()); + assert(v_entry->CommitTs() > 1); bool was_dirty = v_entry->IsDirty(); v_entry->SetCkptTs(ref.commit_ts_); v_entry->ClearBeingCkpt(); @@ -1203,7 +1203,11 @@ bool UpdateCceCkptTsCc::Execute(CcShard &ccs) VersionedLruEntry *v_entry = static_cast *>(ref.cce_); - assert(v_entry->CommitTs() > 1 && !v_entry->IsPersistent()); + assert(v_entry->CommitTs() > 1); + // The entry may already be persistent if a concurrent + // BackFill marked it as flushed before this checkpoint + // callback ran. That is benign — proceed to reconcile + // the dirty-key stats. bool was_dirty = v_entry->IsDirty(); v_entry->SetCkptTs(ref.commit_ts_); v_entry->ClearBeingCkpt();