From 77734307b2ec6502386e7d6ec2f39ea3dc6f8ab8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 25 Feb 2026 07:15:39 +0000 Subject: [PATCH 01/15] Initial plan From 0a352b40de2613a7eec4829e1a114f097af52110 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 25 Feb 2026 07:38:16 +0000 Subject: [PATCH 02/15] Add payload-size-aware cache eviction policy - Add txservice_large_value_threshold and txservice_large_value_eviction_age to tx_service_common.h. When threshold > 0, entries whose PayloadSize() exceeds the threshold are protected from LRU eviction until the page's LRU age (access_counter_ - last_access_ts_) reaches the eviction age. - Expose AccessCounter() getter on CcShard for use by the clean guard. - Modify CcPageCleanGuardWithoutKickoutCc::CanBeCleaned() to implement the protection logic, cooperating with existing LRU-based eviction. - Add optional configuration keys large_value_threshold and large_value_eviction_age to TxService::Init(). - Add BulkEmplaceFreeForTest() and SetPayloadForTest() helpers to TemplateCcMap for deterministic test setup. - Add test case validating that large-value entries are protected when the policy is active and evicted normally when protection is disabled. Co-authored-by: xiexiaoy <5558602+xiexiaoy@users.noreply.github.com> --- tx_service/include/cc/cc_page_clean_guard.h | 22 +++- tx_service/include/cc/cc_shard.h | 5 + tx_service/include/cc/template_cc_map.h | 51 ++++++++ tx_service/include/tx_service.h | 10 ++ tx_service/include/tx_service_common.h | 9 ++ tx_service/tests/CcPage-Test.cpp | 135 ++++++++++++++++++++ 6 files changed, 231 insertions(+), 1 deletion(-) diff --git a/tx_service/include/cc/cc_page_clean_guard.h b/tx_service/include/cc/cc_page_clean_guard.h index 39c1c316..04efd0a3 100644 --- a/tx_service/include/cc/cc_page_clean_guard.h +++ b/tx_service/include/cc/cc_page_clean_guard.h @@ -399,7 +399,27 @@ struct CcPageCleanGuardWithoutKickoutCc return {false, false}; } - return {(cce->IsFree() && !cce->GetBeingCkpt()), false}; + if (!cce->IsFree() || cce->GetBeingCkpt()) + { + return {false, false}; + } + + // Payload-size-aware eviction: protect large-value entries from early + // eviction. An entry whose payload size exceeds the configured + // threshold is kept in memory until the page's LRU age is at least + // txservice_large_value_eviction_age access-counter units old. + if (txservice_large_value_threshold > 0 && + cce->PayloadSize() > txservice_large_value_threshold) + { + uint64_t age = + this->cc_shard_->AccessCounter() - this->page_->last_access_ts_; + if (age < txservice_large_value_eviction_age) + { + return {false, false}; + } + } + + return {true, false}; } bool IsCleanTarget( diff --git a/tx_service/include/cc/cc_shard.h b/tx_service/include/cc/cc_shard.h index 20023351..0670d34d 100644 --- a/tx_service/include/cc/cc_shard.h +++ b/tx_service/include/cc/cc_shard.h @@ -1010,6 +1010,11 @@ class CcShard return clean_start_ccp_ != nullptr && clean_start_ccp_ == &tail_ccp_; } + uint64_t AccessCounter() const + { + return access_counter_; + } + SystemHandler *GetSystemHandler() { return system_handler_; diff --git a/tx_service/include/cc/template_cc_map.h b/tx_service/include/cc/template_cc_map.h index 4e4da192..1f5c5ef4 100644 --- a/tx_service/include/cc/template_cc_map.h +++ b/tx_service/include/cc/template_cc_map.h @@ -9180,6 +9180,57 @@ class TemplateCcMap : public CcMap return true; } + /** + * @brief Inserts keys with all entries guaranteed to be free (persistent, + * unlocked). Used in tests for large-value eviction protection. + * + * @param keys Keys to insert. + * @return true on success. + */ + bool BulkEmplaceFreeForTest(std::vector &keys) + { + for (auto key : keys) + { + bool emplace = false; + auto it = FindEmplace(*key, &emplace, false, false); + if (!emplace) + { + assert(false); + return false; + } + CcEntry *cce = + it->second; + CcPage *ccp = + it.GetPage(); + // Set commit_ts = 1, ckpt_ts = UINT64_MAX so entry is persistent. + bool was_dirty = cce->IsDirty(); + cce->SetCommitTsPayloadStatus(1, RecordStatus::Normal); + cce->SetCkptTs(UINT64_MAX); + OnFlushed(cce, was_dirty); + OnCommittedUpdate(cce, was_dirty); + ccp->last_dirty_commit_ts_ = + std::max(cce->CommitTs(), ccp->last_dirty_commit_ts_); + } + return true; + } + + /** + * @brief Sets the payload of all entries in the cc map to the given shared + * pointer. Used in tests for large-value eviction protection. + * + * @param payload The payload to assign to every entry. + */ + void SetPayloadForTest(std::shared_ptr payload) + { + for (auto &[key, page_ptr] : ccmp_) + { + for (auto &cce : page_ptr->entries_) + { + cce->payload_.cur_payload_ = payload; + } + } + } + protected: void OnCommittedUpdate( const CcEntry *cce, diff --git a/tx_service/include/tx_service.h b/tx_service/include/tx_service.h index 007d9dbe..5b83596a 100644 --- a/tx_service/include/tx_service.h +++ b/tx_service/include/tx_service.h @@ -1207,6 +1207,16 @@ class TxService conf.at("enable_key_cache") && !enable_mvcc; } + if (conf.find("large_value_threshold") != conf.end()) + { + txservice_large_value_threshold = conf.at("large_value_threshold"); + } + if (conf.find("large_value_eviction_age") != conf.end()) + { + txservice_large_value_eviction_age = + conf.at("large_value_eviction_age"); + } + if (txservice_skip_kv) { if (txservice_enable_cache_replacement) diff --git a/tx_service/include/tx_service_common.h b/tx_service/include/tx_service_common.h index ee4cc9b0..877c4c86 100644 --- a/tx_service/include/tx_service_common.h +++ b/tx_service/include/tx_service_common.h @@ -42,6 +42,15 @@ inline uint64_t txservice_max_standby_lag = 400000; // If checkpointed data can be evicted from memory if memory is full. If this is // off, all data will be cached in memory. inline bool txservice_enable_cache_replacement = true; +// Payload size threshold in bytes for large-value protection during cache +// eviction. Entries whose payload size exceeds this threshold are protected +// from eviction until their LRU age reaches txservice_large_value_eviction_age. +// A value of 0 disables large-value protection (default behaviour). +inline size_t txservice_large_value_threshold = 0; +// Minimum LRU access age (measured in access counter units) that a page must +// have before its large-value entries become eligible for eviction. +// Only relevant when txservice_large_value_threshold > 0. +inline uint64_t txservice_large_value_eviction_age = 1000; // Whether to automatically redirect redis command to the leader node when the // data is not on the local node. diff --git a/tx_service/tests/CcPage-Test.cpp b/tx_service/tests/CcPage-Test.cpp index 56c4e189..7e30ecbc 100644 --- a/tx_service/tests/CcPage-Test.cpp +++ b/tx_service/tests/CcPage-Test.cpp @@ -29,6 +29,7 @@ #include "template_cc_map.h" #include "tx_key.h" // CompositeKey #include "tx_record.h" // CompositeRecord +#include "tx_service_common.h" #include "type.h" namespace txservice @@ -177,6 +178,140 @@ TEST_CASE("CcPage clean tests", "[cc-page]") REQUIRE(total_remain + total_free == MAP_NUM * MAP_SIZE); } +// A CompositeRecord subclass that reports an artificially large payload +// size. Used to test the payload-size-aware cache eviction protection. +struct LargeCompositeRecord : public CompositeRecord +{ + explicit LargeCompositeRecord(int val, size_t reported_size) + : CompositeRecord(val), reported_size_(reported_size) + { + } + + size_t Size() const override + { + return reported_size_; + } + + TxRecord::Uptr Clone() const override + { + return std::make_unique(*this); + } + + size_t reported_size_; +}; + +TEST_CASE("Large-value eviction protection test", "[cc-page]") +{ + std::unordered_map> ng_configs{ + {0, {NodeConfig(0, "127.0.0.1", 8600)}}}; + std::map tx_cnf{{"node_memory_limit_mb", 1000}, + {"enable_key_cache", 0}, + {"reltime_sampling", 0}, + {"range_split_worker_num", 1}, + {"core_num", 1}, + {"realtime_sampling", 0}, + {"checkpointer_interval", 10}, + {"enable_shard_heap_defragment", 0}, + {"node_log_limit_mb", 1000}}; + LocalCcShards local_cc_shards( + 0, 0, tx_cnf, nullptr, nullptr, &ng_configs, 2, nullptr, nullptr, true); + CcShard shard(0, + 1, + 10000, + 10000, + false, + 0, + local_cc_shards, + nullptr, + nullptr, + &ng_configs, + 2); + shard.Init(); + std::string raft_path(""); + Sharder::Instance(0, + &ng_configs, + 0, + nullptr, + nullptr, + &local_cc_shards, + nullptr, + &raft_path); + + const size_t MAP_SIZE = 500; + const size_t LARGE_PAYLOAD_SIZE = 1024; // 1 KB per entry + + std::string table_name = "large_val_test"; + TableName tname(table_name, TableType::Primary, TableEngine::EloqSql); + auto cc_map = std::make_unique, + CompositeRecord, + true, + true>>( + &shard, 0, tname, 1, nullptr, true); + + // Insert entries that are guaranteed to be free (persistent, no locks). + std::vector> keys; + for (size_t i = 0; i < MAP_SIZE; i++) + { + keys.emplace_back(std::make_tuple(table_name, static_cast(i))); + } + std::vector *> key_ptrs; + for (auto &k : keys) + { + key_ptrs.push_back(&k); + } + REQUIRE(cc_map->BulkEmplaceFreeForTest(key_ptrs)); + REQUIRE(cc_map->VerifyOrdering() == MAP_SIZE); + + // Assign a shared large-payload object to every entry so that + // PayloadSize() > LARGE_PAYLOAD_SIZE / 2 (the threshold we will use). + auto large_payload = + std::make_shared(42, LARGE_PAYLOAD_SIZE); + cc_map->SetPayloadForTest(large_payload); + + // PART 1: Protection active – entries must NOT be evicted. + txservice_large_value_threshold = LARGE_PAYLOAD_SIZE / 2; + txservice_large_value_eviction_age = UINT64_MAX; + + size_t freed_with_protection = 0; + while (true) + { + auto [free_cnt, yield] = shard.Clean(); + shard.VerifyLruList(); + if (free_cnt == 0) + { + break; + } + freed_with_protection += free_cnt; + } + REQUIRE(freed_with_protection == 0); + REQUIRE(cc_map->VerifyOrdering() == MAP_SIZE); + + // Reset the clean cursor so Part 2 rescans from the beginning. + shard.ResetCleanStart(); + + // PART 2: Protection disabled – all free entries must be evicted. + txservice_large_value_threshold = 0; + + size_t freed_without_protection = 0; + while (true) + { + auto [free_cnt, yield] = shard.Clean(); + shard.VerifyLruList(); + if (free_cnt == 0) + { + break; + } + freed_without_protection += free_cnt; + } + REQUIRE(freed_without_protection == MAP_SIZE); + + // Restore global defaults so other tests are not affected. + txservice_large_value_threshold = 0; + txservice_large_value_eviction_age = 1000; + + local_cc_shards.Terminate(); +} + } // namespace txservice int main(int argc, char **argv) From cd8212825914f06f672c0e3e43d92a2342bdf8f2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 25 Feb 2026 08:27:53 +0000 Subject: [PATCH 03/15] Document access_counter_ semantics and last_access_ts_ invariants - Expand the access_counter_ comment in cc_shard.h to cover both its primary use (O(1) relative-order comparison of two LRU pages during merge/redistribute) and its secondary use (measuring LRU age for the payload-size-aware eviction guard). - Add a comment to last_access_ts_ in cc_entry.h explaining that it stores the snapshot of access_counter_ at last-access time, that the difference (access_counter_ - last_access_ts_) is always >= 0 and a valid LRU-age proxy. - Enrich the comment in CcPageCleanGuardWithoutKickoutCc::CanBeCleaned() to explain the invariant that makes the age calculation correct and why it cooperates naturally with standard LRU eviction. Co-authored-by: xiexiaoy <5558602+xiexiaoy@users.noreply.github.com> --- tx_service/include/cc/cc_entry.h | 8 +++++++ tx_service/include/cc/cc_page_clean_guard.h | 19 ++++++++++++++-- tx_service/include/cc/cc_shard.h | 24 ++++++++++++++++----- 3 files changed, 44 insertions(+), 7 deletions(-) diff --git a/tx_service/include/cc/cc_entry.h b/tx_service/include/cc/cc_entry.h index 725a5a43..d27bea3b 100644 --- a/tx_service/include/cc/cc_entry.h +++ b/tx_service/include/cc/cc_entry.h @@ -2030,6 +2030,14 @@ struct LruPage CcMap *parent_map_{nullptr}; + // The value of CcShard::access_counter_ at the time this page was last + // moved to the LRU tail by UpdateLruList(). Comparing two pages' + // last_access_ts_ values determines which was accessed more recently + // (the larger value is more recent). The difference + // (shard.access_counter_ - last_access_ts_) + // is always non-negative and represents the number of global page-access + // events that have occurred since this page was last touched — a valid + // proxy for LRU age used by the payload-size-aware eviction guard. uint64_t last_access_ts_{0}; // The largest commit ts of dirty cc entries on this page. This value might diff --git a/tx_service/include/cc/cc_page_clean_guard.h b/tx_service/include/cc/cc_page_clean_guard.h index 04efd0a3..23afcd30 100644 --- a/tx_service/include/cc/cc_page_clean_guard.h +++ b/tx_service/include/cc/cc_page_clean_guard.h @@ -406,8 +406,23 @@ struct CcPageCleanGuardWithoutKickoutCc // Payload-size-aware eviction: protect large-value entries from early // eviction. An entry whose payload size exceeds the configured - // threshold is kept in memory until the page's LRU age is at least - // txservice_large_value_eviction_age access-counter units old. + // threshold is kept in memory until the page's LRU age reaches + // txservice_large_value_eviction_age. + // + // LRU age is computed as: + // age = access_counter_ - page->last_access_ts_ + // + // access_counter_ is a shard-wide monotonic counter incremented on + // every call to UpdateLruList() (i.e. every page access). When a page + // is moved to the LRU tail, last_access_ts_ is set to the current + // value of access_counter_. Because access_counter_ only ever + // increases and last_access_ts_ is always assigned from + // access_counter_, the difference is always >= 0 and measures the + // number of page-access events that have occurred *since* this page + // was last touched — a valid proxy for LRU age. A recently accessed + // page has age ≈ 0; a page sitting near the LRU head (cold) has a + // large age. The protection is therefore lifted naturally as the page + // ages through the LRU list, cooperating with the standard LRU policy. if (txservice_large_value_threshold > 0 && cce->PayloadSize() > txservice_large_value_threshold) { diff --git a/tx_service/include/cc/cc_shard.h b/tx_service/include/cc/cc_shard.h index 0670d34d..fbfdff20 100644 --- a/tx_service/include/cc/cc_shard.h +++ b/tx_service/include/cc/cc_shard.h @@ -1278,12 +1278,26 @@ class CcShard // simplifies handling of empty and one-element lists. LruPage head_ccp_, tail_ccp_; /** - * @brief Each time a page is accessed and moved to the tail of the LRU - * list, the counter is incremented and assigned to the page. Since in a - * double-linked list there is no way to determine the relative order of two - * pages, we use the number to indicate if a page precedes or succeeds the - * other in the list. + * @brief A monotonically-increasing shard-wide counter. It is + * incremented and assigned to a page's last_access_ts_ every time + * UpdateLruList() is called, i.e. whenever any page in this shard is + * moved to the LRU tail on a read, write, or emplace operation. * + * Primary use: since a doubly-linked list provides no O(1) way to compare + * the positions of two arbitrary nodes, comparing page1.last_access_ts_ + * against page2.last_access_ts_ lets the merge/redistribute code determine + * which of the two pages was accessed more recently without traversing the + * list. + * + * Secondary use: the expression (access_counter_ - page->last_access_ts_) + * is a valid measure of the page's LRU age — it equals the number of + * global page-access events that have occurred since this page was last + * moved to the LRU tail. Because access_counter_ is monotonically + * increasing and last_access_ts_ is always set to the current value of + * access_counter_ when the page is accessed, the difference is always ≥ 0; + * a recently-accessed page has age ≈ 0 while a page sitting near the LRU + * head has a large age. This property is exploited by the payload-size- + * aware eviction guard (CcPageCleanGuardWithoutKickoutCc::CanBeCleaned). */ uint64_t access_counter_{0}; From 30ba06e73f4b984a756d97475ee97c316321a266 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 25 Feb 2026 09:17:53 +0000 Subject: [PATCH 04/15] Changes before error encountered Co-authored-by: xiexiaoy <5558602+xiexiaoy@users.noreply.github.com> --- tx_service/include/cc/cc_shard.h | 26 ++++++++++++++++++++++++++ tx_service/include/tx_service_common.h | 9 +++------ 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/tx_service/include/cc/cc_shard.h b/tx_service/include/cc/cc_shard.h index fbfdff20..a1bc8a3d 100644 --- a/tx_service/include/cc/cc_shard.h +++ b/tx_service/include/cc/cc_shard.h @@ -1015,6 +1015,32 @@ class CcShard return access_counter_; } + /** + * @brief Returns the last_access_ts_ of the oldest page in the LRU list + * (i.e. the page immediately after the sentinel head). This is the + * smallest last_access_ts_ among all data pages currently in the list. + * + * The difference (access_counter_ - LruOldestTs()) equals the total span + * of last_access_ts_ values across all LRU pages, which is used as the + * denominator when computing a page's *relative* LRU age: + * + * relative_age_pct = (access_counter_ - page->last_access_ts_) + * / (access_counter_ - LruOldestTs()) + * + * Returns access_counter_ when the LRU list is empty (making total_span 0, + * which bypasses the large-value protection check). + */ + uint64_t LruOldestTs() const + { + const LruPage *oldest = head_ccp_.lru_next_; + if (oldest == &tail_ccp_) + { + // Empty LRU list: return access_counter_ so total_span == 0. + return access_counter_; + } + return oldest->last_access_ts_; + } + SystemHandler *GetSystemHandler() { return system_handler_; diff --git a/tx_service/include/tx_service_common.h b/tx_service/include/tx_service_common.h index 877c4c86..2e8a8a31 100644 --- a/tx_service/include/tx_service_common.h +++ b/tx_service/include/tx_service_common.h @@ -43,14 +43,11 @@ inline uint64_t txservice_max_standby_lag = 400000; // off, all data will be cached in memory. inline bool txservice_enable_cache_replacement = true; // Payload size threshold in bytes for large-value protection during cache -// eviction. Entries whose payload size exceeds this threshold are protected -// from eviction until their LRU age reaches txservice_large_value_eviction_age. +// eviction. Entries whose payload size exceeds this threshold are considered +// "large values" and are protected from eviction when their page is in the +// recent half of the LRU list (see CcPageCleanGuardWithoutKickoutCc). // A value of 0 disables large-value protection (default behaviour). inline size_t txservice_large_value_threshold = 0; -// Minimum LRU access age (measured in access counter units) that a page must -// have before its large-value entries become eligible for eviction. -// Only relevant when txservice_large_value_threshold > 0. -inline uint64_t txservice_large_value_eviction_age = 1000; // Whether to automatically redirect redis command to the leader node when the // data is not on the local node. From 55925be6a504023677db25efc52272ed52ca906d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 25 Feb 2026 09:22:32 +0000 Subject: [PATCH 05/15] Replace absolute eviction-age threshold with self-calibrating relative LRU check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Problem 1: access_counter_ grows at unpredictable rates with hot keys, making txservice_large_value_eviction_age impossible to configure. Problem 2: cold large-value pages near the LRU head accumulate large absolute age but still smaller than the threshold, causing useless scan work on entries that cannot be evicted. Fix: replace the absolute threshold with a self-calibrating relative LRU-position check: page_age * 2 < total_span → page in recent half → protect page_age * 2 >= total_span → page in old half → evict where: page_age = access_counter_ - page->last_access_ts_ total_span = access_counter_ - LruOldestTs() This requires no user configuration and automatically adapts to any access rate. Cold large-value pages (near the head) always satisfy page_age * 2 >= total_span, so they are correctly evicted instead of blocking the scan. Hot pages (near the tail) always satisfy page_age * 2 < total_span, so they are correctly protected. - Add CcShard::LruOldestTs() returning last_access_ts_ of the LRU-head page. - Remove txservice_large_value_eviction_age global and config parsing. - Update comments in cc_shard.h, cc_entry.h, cc_page_clean_guard.h. - Update test: use two maps (old vs new) to verify old-half entries are evicted while new-half entries are retained under the relative check. Co-authored-by: xiexiaoy <5558602+xiexiaoy@users.noreply.github.com> --- tx_service/include/cc/cc_entry.h | 10 +- tx_service/include/cc/cc_page_clean_guard.h | 39 +++---- tx_service/include/cc/cc_shard.h | 19 ++-- tx_service/include/tx_service.h | 5 - tx_service/tests/CcPage-Test.cpp | 114 +++++++++++++------- 5 files changed, 112 insertions(+), 75 deletions(-) diff --git a/tx_service/include/cc/cc_entry.h b/tx_service/include/cc/cc_entry.h index d27bea3b..755ab291 100644 --- a/tx_service/include/cc/cc_entry.h +++ b/tx_service/include/cc/cc_entry.h @@ -2034,10 +2034,12 @@ struct LruPage // moved to the LRU tail by UpdateLruList(). Comparing two pages' // last_access_ts_ values determines which was accessed more recently // (the larger value is more recent). The difference - // (shard.access_counter_ - last_access_ts_) - // is always non-negative and represents the number of global page-access - // events that have occurred since this page was last touched — a valid - // proxy for LRU age used by the payload-size-aware eviction guard. + // page_age = (shard.access_counter_ - last_access_ts_) + // is always non-negative. The ratio + // page_age * 2 / (shard.access_counter_ - oldest_page.last_access_ts_) + // indicates relative LRU position: < 1 means the page is in the recent + // half of the list (protected from large-value eviction); >= 1 means it + // is in the old half (eligible for eviction). uint64_t last_access_ts_{0}; // The largest commit ts of dirty cc entries on this page. This value might diff --git a/tx_service/include/cc/cc_page_clean_guard.h b/tx_service/include/cc/cc_page_clean_guard.h index 23afcd30..e9a30b32 100644 --- a/tx_service/include/cc/cc_page_clean_guard.h +++ b/tx_service/include/cc/cc_page_clean_guard.h @@ -405,30 +405,31 @@ struct CcPageCleanGuardWithoutKickoutCc } // Payload-size-aware eviction: protect large-value entries from early - // eviction. An entry whose payload size exceeds the configured - // threshold is kept in memory until the page's LRU age reaches - // txservice_large_value_eviction_age. + // eviction while they are still in the recent half of the LRU list. // - // LRU age is computed as: - // age = access_counter_ - page->last_access_ts_ + // A page's LRU age is: + // page_age = access_counter_ - page->last_access_ts_ // - // access_counter_ is a shard-wide monotonic counter incremented on - // every call to UpdateLruList() (i.e. every page access). When a page - // is moved to the LRU tail, last_access_ts_ is set to the current - // value of access_counter_. Because access_counter_ only ever - // increases and last_access_ts_ is always assigned from - // access_counter_, the difference is always >= 0 and measures the - // number of page-access events that have occurred *since* this page - // was last touched — a valid proxy for LRU age. A recently accessed - // page has age ≈ 0; a page sitting near the LRU head (cold) has a - // large age. The protection is therefore lifted naturally as the page - // ages through the LRU list, cooperating with the standard LRU policy. + // The total span of the LRU list (distance between the oldest and + // newest page) is: + // total_span = access_counter_ - LruOldestTs() + // + // A large-value entry is protected (not evictable) when its page is + // in the *recent half* of the list, i.e.: + // page_age * 2 < total_span → protect (return false) + // page_age * 2 >= total_span → allow eviction (fall through) + // + // This is self-calibrating: it does not depend on any user-configured + // threshold and automatically adapts to the actual access rate. + // When the LRU list is empty (total_span == 0) the condition + // "0 * 2 < 0" is false, so the entry is always evictable. if (txservice_large_value_threshold > 0 && cce->PayloadSize() > txservice_large_value_threshold) { - uint64_t age = - this->cc_shard_->AccessCounter() - this->page_->last_access_ts_; - if (age < txservice_large_value_eviction_age) + uint64_t now = this->cc_shard_->AccessCounter(); + uint64_t page_age = now - this->page_->last_access_ts_; + uint64_t total_span = now - this->cc_shard_->LruOldestTs(); + if (page_age * 2 < total_span) { return {false, false}; } diff --git a/tx_service/include/cc/cc_shard.h b/tx_service/include/cc/cc_shard.h index a1bc8a3d..096b857c 100644 --- a/tx_service/include/cc/cc_shard.h +++ b/tx_service/include/cc/cc_shard.h @@ -1315,15 +1315,16 @@ class CcShard * which of the two pages was accessed more recently without traversing the * list. * - * Secondary use: the expression (access_counter_ - page->last_access_ts_) - * is a valid measure of the page's LRU age — it equals the number of - * global page-access events that have occurred since this page was last - * moved to the LRU tail. Because access_counter_ is monotonically - * increasing and last_access_ts_ is always set to the current value of - * access_counter_ when the page is accessed, the difference is always ≥ 0; - * a recently-accessed page has age ≈ 0 while a page sitting near the LRU - * head has a large age. This property is exploited by the payload-size- - * aware eviction guard (CcPageCleanGuardWithoutKickoutCc::CanBeCleaned). + * Secondary use: the expressions + * page_age = access_counter_ - page->last_access_ts_ + * total_span = access_counter_ - oldest_page->last_access_ts_ + * together indicate the *relative* LRU position of a page. The test + * page_age * 2 < total_span + * is true for pages in the recent half of the list and false for pages + * in the old half. This self-calibrating check is used by the + * payload-size-aware eviction guard + * (CcPageCleanGuardWithoutKickoutCc::CanBeCleaned) to protect large-value + * entries from being evicted while they are still "hot". */ uint64_t access_counter_{0}; diff --git a/tx_service/include/tx_service.h b/tx_service/include/tx_service.h index 5b83596a..771fc071 100644 --- a/tx_service/include/tx_service.h +++ b/tx_service/include/tx_service.h @@ -1211,11 +1211,6 @@ class TxService { txservice_large_value_threshold = conf.at("large_value_threshold"); } - if (conf.find("large_value_eviction_age") != conf.end()) - { - txservice_large_value_eviction_age = - conf.at("large_value_eviction_age"); - } if (txservice_skip_kv) { diff --git a/tx_service/tests/CcPage-Test.cpp b/tx_service/tests/CcPage-Test.cpp index 7e30ecbc..403193d9 100644 --- a/tx_service/tests/CcPage-Test.cpp +++ b/tx_service/tests/CcPage-Test.cpp @@ -237,42 +237,79 @@ TEST_CASE("Large-value eviction protection test", "[cc-page]") nullptr, &raft_path); - const size_t MAP_SIZE = 500; + const size_t MAP_SIZE = 100; const size_t LARGE_PAYLOAD_SIZE = 1024; // 1 KB per entry - std::string table_name = "large_val_test"; - TableName tname(table_name, TableType::Primary, TableEngine::EloqSql); - auto cc_map = std::make_unique, - CompositeRecord, - true, - true>>( - &shard, 0, tname, 1, nullptr, true); - - // Insert entries that are guaranteed to be free (persistent, no locks). - std::vector> keys; - for (size_t i = 0; i < MAP_SIZE; i++) - { - keys.emplace_back(std::make_tuple(table_name, static_cast(i))); - } - std::vector *> key_ptrs; - for (auto &k : keys) + // "Old" cc map: inserted first, so its pages have older last_access_ts_. + std::string old_table_name = "large_val_old"; + TableName old_tname( + old_table_name, TableType::Primary, TableEngine::EloqSql); + auto old_map = + std::make_unique, + CompositeRecord, + true, + true>>( + &shard, 0, old_tname, 1, nullptr, true); + + // "New" cc map: inserted after, so its pages have newer last_access_ts_. + std::string new_table_name = "large_val_new"; + TableName new_tname( + new_table_name, TableType::Primary, TableEngine::EloqSql); + auto new_map = + std::make_unique, + CompositeRecord, + true, + true>>( + &shard, 0, new_tname, 1, nullptr, true); + + auto make_keys = + [](const std::string &tname, + size_t cnt, + std::vector> &key_storage) + -> std::vector *> { - key_ptrs.push_back(&k); - } - REQUIRE(cc_map->BulkEmplaceFreeForTest(key_ptrs)); - REQUIRE(cc_map->VerifyOrdering() == MAP_SIZE); + for (size_t i = 0; i < cnt; i++) + { + key_storage.emplace_back( + std::make_tuple(tname, static_cast(i))); + } + std::vector *> ptrs; + for (auto &k : key_storage) + { + ptrs.push_back(&k); + } + return ptrs; + }; + + std::vector> old_key_storage; + auto old_ptrs = make_keys(old_table_name, MAP_SIZE, old_key_storage); + // Insert old map first (its pages will have smaller last_access_ts_). + REQUIRE(old_map->BulkEmplaceFreeForTest(old_ptrs)); + REQUIRE(old_map->VerifyOrdering() == MAP_SIZE); - // Assign a shared large-payload object to every entry so that - // PayloadSize() > LARGE_PAYLOAD_SIZE / 2 (the threshold we will use). + std::vector> new_key_storage; + auto new_ptrs = make_keys(new_table_name, MAP_SIZE, new_key_storage); + // Insert new map after (its pages will have larger last_access_ts_). + REQUIRE(new_map->BulkEmplaceFreeForTest(new_ptrs)); + REQUIRE(new_map->VerifyOrdering() == MAP_SIZE); + + // Give every entry a large payload (> threshold). auto large_payload = std::make_shared(42, LARGE_PAYLOAD_SIZE); - cc_map->SetPayloadForTest(large_payload); + old_map->SetPayloadForTest(large_payload); + new_map->SetPayloadForTest(large_payload); - // PART 1: Protection active – entries must NOT be evicted. + // Enable large-value protection with threshold = LARGE_PAYLOAD_SIZE / 2. txservice_large_value_threshold = LARGE_PAYLOAD_SIZE / 2; - txservice_large_value_eviction_age = UINT64_MAX; - size_t freed_with_protection = 0; + // The LRU list now has old_map pages near the head and new_map pages near + // the tail. For a page in old_map: + // page_age ≈ total_span (near the oldest end) + // page_age * 2 >= total_span → evictable + // For a page in new_map: + // page_age ≈ 0 (near the newest end) + // page_age * 2 < total_span → protected + size_t freed = 0; while (true) { auto [free_cnt, yield] = shard.Clean(); @@ -281,18 +318,20 @@ TEST_CASE("Large-value eviction protection test", "[cc-page]") { break; } - freed_with_protection += free_cnt; + freed += free_cnt; } - REQUIRE(freed_with_protection == 0); - REQUIRE(cc_map->VerifyOrdering() == MAP_SIZE); - // Reset the clean cursor so Part 2 rescans from the beginning. - shard.ResetCleanStart(); + // Old-map entries (cold, in the older half) must all be evicted. + REQUIRE(freed == MAP_SIZE); + // New-map entries (hot, in the newer half) must all be retained. + REQUIRE(new_map->VerifyOrdering() == MAP_SIZE); - // PART 2: Protection disabled – all free entries must be evicted. + // PART 2: With protection disabled, the remaining new_map entries must be + // evicted normally. + shard.ResetCleanStart(); txservice_large_value_threshold = 0; - size_t freed_without_protection = 0; + size_t freed2 = 0; while (true) { auto [free_cnt, yield] = shard.Clean(); @@ -301,13 +340,12 @@ TEST_CASE("Large-value eviction protection test", "[cc-page]") { break; } - freed_without_protection += free_cnt; + freed2 += free_cnt; } - REQUIRE(freed_without_protection == MAP_SIZE); + REQUIRE(freed2 == MAP_SIZE); - // Restore global defaults so other tests are not affected. + // Restore global defaults. txservice_large_value_threshold = 0; - txservice_large_value_eviction_age = 1000; local_cc_shards.Terminate(); } From ebf1654dea981c1c66759195b5fd02721155c4e9 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 25 Feb 2026 11:18:44 +0000 Subject: [PATCH 06/15] Replace broken relative-position check with LRU-boost eviction guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous relative-position check (page_age * 2 < total_span) was fundamentally flawed: when a large-value page drifts to the LRU head it IS the oldest page, so total_span == page_age and page_age * 2 >= total_span is always true — the page was always evictable at the head. New approach: LRU boost on protection. - CanBeCleaned (WithoutKickoutCc) protects any free entry whose PayloadSize() > txservice_large_value_threshold and sets a new mutable flag has_blocked_large_value_ in the guard. - CleanPage propagates the flag via new out_has_blocked_large_value parameter. - CleanPageAndReBalance: after RebalancePage captures next_page, if the flag is set and the page is non-empty and still in the LRU list, call UpdateLruList(page, false) to move it to the tail. After the boost page->lru_next_ == &tail_ccp_, so next_page = page->lru_next_ naturally stops the current scan. Effect: - Large-value pages are never evicted by the regular scan (they keep getting boosted whenever the scan visits them). - Small-value pages at the head are evicted normally (scan progresses past them before hitting the first large-value page). - The scan never loops: after boosting a page and stopping, the next scan starts from the head which no longer contains that large-value page (it was just moved to the tail). - No user-visible configuration change: txservice_large_value_threshold is still the only knob. Removes LruOldestTs() (no longer needed) from cc_shard.h. Restores single-map test (no more two-map approach). Co-authored-by: xiexiaoy <5558602+xiexiaoy@users.noreply.github.com> --- tx_service/include/cc/cc_entry.h | 8 +- tx_service/include/cc/cc_page_clean_guard.h | 45 ++++---- tx_service/include/cc/cc_shard.h | 41 +------- tx_service/include/cc/template_cc_map.h | 33 +++++- tx_service/include/tx_service_common.h | 9 +- tx_service/tests/CcPage-Test.cpp | 111 ++++++-------------- 6 files changed, 96 insertions(+), 151 deletions(-) diff --git a/tx_service/include/cc/cc_entry.h b/tx_service/include/cc/cc_entry.h index 755ab291..adec3ef4 100644 --- a/tx_service/include/cc/cc_entry.h +++ b/tx_service/include/cc/cc_entry.h @@ -2033,13 +2033,7 @@ struct LruPage // The value of CcShard::access_counter_ at the time this page was last // moved to the LRU tail by UpdateLruList(). Comparing two pages' // last_access_ts_ values determines which was accessed more recently - // (the larger value is more recent). The difference - // page_age = (shard.access_counter_ - last_access_ts_) - // is always non-negative. The ratio - // page_age * 2 / (shard.access_counter_ - oldest_page.last_access_ts_) - // indicates relative LRU position: < 1 means the page is in the recent - // half of the list (protected from large-value eviction); >= 1 means it - // is in the old half (eligible for eviction). + // (the larger value is more recent). uint64_t last_access_ts_{0}; // The largest commit ts of dirty cc entries on this page. This value might diff --git a/tx_service/include/cc/cc_page_clean_guard.h b/tx_service/include/cc/cc_page_clean_guard.h index e9a30b32..934dfe3d 100644 --- a/tx_service/include/cc/cc_page_clean_guard.h +++ b/tx_service/include/cc/cc_page_clean_guard.h @@ -157,6 +157,14 @@ struct CcPageCleanGuard return clean_obj_cnt_; } + // Returns true if CanBeCleaned refused to evict at least one entry because + // it has a large payload. When this is set, CleanPageAndReBalance will + // boost the page to the LRU tail so it does not accumulate at the head. + bool HasBlockedLargeValue() const + { + return has_blocked_large_value_; + } + protected: struct CanBeCleanedResult { @@ -343,6 +351,9 @@ struct CcPageCleanGuard uint64_t dirty_freed_cnt_{0}; bool evicted_valid_key_{false}; uint64_t clean_obj_cnt_{0}; + // Set by CanBeCleaned when a large-value entry is protected. Mutable so it + // can be set from the const CanBeCleaned override. + mutable bool has_blocked_large_value_{false}; private: std::bitset:: @@ -404,35 +415,17 @@ struct CcPageCleanGuardWithoutKickoutCc return {false, false}; } - // Payload-size-aware eviction: protect large-value entries from early - // eviction while they are still in the recent half of the LRU list. - // - // A page's LRU age is: - // page_age = access_counter_ - page->last_access_ts_ - // - // The total span of the LRU list (distance between the oldest and - // newest page) is: - // total_span = access_counter_ - LruOldestTs() - // - // A large-value entry is protected (not evictable) when its page is - // in the *recent half* of the list, i.e.: - // page_age * 2 < total_span → protect (return false) - // page_age * 2 >= total_span → allow eviction (fall through) - // - // This is self-calibrating: it does not depend on any user-configured - // threshold and automatically adapts to the actual access rate. - // When the LRU list is empty (total_span == 0) the condition - // "0 * 2 < 0" is false, so the entry is always evictable. + // Payload-size-aware eviction: protect large-value entries from + // eviction. When this guard refuses to evict an entry because of its + // payload size, it sets has_blocked_large_value_ so that + // CleanPageAndReBalance can boost the page to the LRU tail, preventing + // it from accumulating at the head and being repeatedly visited by the + // scan without making progress. if (txservice_large_value_threshold > 0 && cce->PayloadSize() > txservice_large_value_threshold) { - uint64_t now = this->cc_shard_->AccessCounter(); - uint64_t page_age = now - this->page_->last_access_ts_; - uint64_t total_span = now - this->cc_shard_->LruOldestTs(); - if (page_age * 2 < total_span) - { - return {false, false}; - } + this->has_blocked_large_value_ = true; + return {false, false}; } return {true, false}; diff --git a/tx_service/include/cc/cc_shard.h b/tx_service/include/cc/cc_shard.h index 096b857c..f8c16e68 100644 --- a/tx_service/include/cc/cc_shard.h +++ b/tx_service/include/cc/cc_shard.h @@ -1015,32 +1015,6 @@ class CcShard return access_counter_; } - /** - * @brief Returns the last_access_ts_ of the oldest page in the LRU list - * (i.e. the page immediately after the sentinel head). This is the - * smallest last_access_ts_ among all data pages currently in the list. - * - * The difference (access_counter_ - LruOldestTs()) equals the total span - * of last_access_ts_ values across all LRU pages, which is used as the - * denominator when computing a page's *relative* LRU age: - * - * relative_age_pct = (access_counter_ - page->last_access_ts_) - * / (access_counter_ - LruOldestTs()) - * - * Returns access_counter_ when the LRU list is empty (making total_span 0, - * which bypasses the large-value protection check). - */ - uint64_t LruOldestTs() const - { - const LruPage *oldest = head_ccp_.lru_next_; - if (oldest == &tail_ccp_) - { - // Empty LRU list: return access_counter_ so total_span == 0. - return access_counter_; - } - return oldest->last_access_ts_; - } - SystemHandler *GetSystemHandler() { return system_handler_; @@ -1315,16 +1289,11 @@ class CcShard * which of the two pages was accessed more recently without traversing the * list. * - * Secondary use: the expressions - * page_age = access_counter_ - page->last_access_ts_ - * total_span = access_counter_ - oldest_page->last_access_ts_ - * together indicate the *relative* LRU position of a page. The test - * page_age * 2 < total_span - * is true for pages in the recent half of the list and false for pages - * in the old half. This self-calibrating check is used by the - * payload-size-aware eviction guard - * (CcPageCleanGuardWithoutKickoutCc::CanBeCleaned) to protect large-value - * entries from being evicted while they are still "hot". + * Secondary use: when the large-value eviction guard protects a page + * (CcPageCleanGuardWithoutKickoutCc::CanBeCleaned), CleanPageAndReBalance + * calls UpdateLruList to boost the page to the tail. This keeps large- + * value pages away from the LRU head, preventing both premature eviction + * and wasted scan work. */ uint64_t access_counter_{0}; diff --git a/tx_service/include/cc/template_cc_map.h b/tx_service/include/cc/template_cc_map.h index 1f5c5ef4..34cc6fe2 100644 --- a/tx_service/include/cc/template_cc_map.h +++ b/tx_service/include/cc/template_cc_map.h @@ -8721,7 +8721,15 @@ class TemplateCcMap : public CcMap CcPage *>( lru_page); auto page_it = ccmp_.end(); - bool success = CleanPage(page, page_it, free_cnt, kickout_cc); + + // Only collect the large-value block flag for regular LRU scans + // (no kickout_cc). Forced evictions (range migration etc.) should + // not be affected by the large-value protection. + bool has_blocked_large_value = false; + bool *out_flag = + (kickout_cc == nullptr) ? &has_blocked_large_value : nullptr; + + bool success = CleanPage(page, page_it, free_cnt, kickout_cc, out_flag); // Output the operation result if the caller care it. if (is_success != nullptr) @@ -8732,6 +8740,21 @@ class TemplateCcMap : public CcMap LruPage *next_page = RebalancePage(page, page_it, success, kickout_cc == nullptr); + // Large-value boost: if CanBeCleaned refused to evict at least one + // entry because of its payload size, move this page to the LRU tail. + // This prevents large-value pages from accumulating at the LRU head + // and being repeatedly scanned without progress. After calling + // UpdateLruList the page is at the tail (page->lru_next_ == &tail), + // so returning page->lru_next_ as the next scan page naturally stops + // the current scan. The next scan starts from the head, which will + // no longer contain this large-value page. + if (has_blocked_large_value && !page->Empty() && + page->lru_next_ != nullptr) + { + shard_->UpdateLruList(page, false); + next_page = page->lru_next_; // == &tail_ccp_ after the boost + } + return {free_cnt, next_page}; } @@ -11609,7 +11632,8 @@ class TemplateCcMap : public CcMap CcPage *page, BtreeMapIterator &page_it, size_t &free_cnt, - KickoutCcEntryCc *kickout_cc = nullptr) + KickoutCcEntryCc *kickout_cc = nullptr, + bool *out_has_blocked_large_value = nullptr) { bool success; @@ -11666,6 +11690,11 @@ class TemplateCcMap : public CcMap success = clean_guard->CleanSuccess(); + if (out_has_blocked_large_value != nullptr) + { + *out_has_blocked_large_value = clean_guard->HasBlockedLargeValue(); + } + std::destroy_at(buffer); return success; } diff --git a/tx_service/include/tx_service_common.h b/tx_service/include/tx_service_common.h index 2e8a8a31..714ed824 100644 --- a/tx_service/include/tx_service_common.h +++ b/tx_service/include/tx_service_common.h @@ -43,10 +43,11 @@ inline uint64_t txservice_max_standby_lag = 400000; // off, all data will be cached in memory. inline bool txservice_enable_cache_replacement = true; // Payload size threshold in bytes for large-value protection during cache -// eviction. Entries whose payload size exceeds this threshold are considered -// "large values" and are protected from eviction when their page is in the -// recent half of the LRU list (see CcPageCleanGuardWithoutKickoutCc). -// A value of 0 disables large-value protection (default behaviour). +// eviction. When a free entry's payload size exceeds this threshold, the +// regular LRU scan will not evict it; instead it boosts the page to the LRU +// tail so it stays away from the head and is not repeatedly visited by the +// scan without making progress. A value of 0 disables large-value protection +// (default behaviour). inline size_t txservice_large_value_threshold = 0; // Whether to automatically redirect redis command to the leader node when the diff --git a/tx_service/tests/CcPage-Test.cpp b/tx_service/tests/CcPage-Test.cpp index 403193d9..9dfbea1c 100644 --- a/tx_service/tests/CcPage-Test.cpp +++ b/tx_service/tests/CcPage-Test.cpp @@ -237,79 +237,42 @@ TEST_CASE("Large-value eviction protection test", "[cc-page]") nullptr, &raft_path); - const size_t MAP_SIZE = 100; + const size_t MAP_SIZE = 500; const size_t LARGE_PAYLOAD_SIZE = 1024; // 1 KB per entry - // "Old" cc map: inserted first, so its pages have older last_access_ts_. - std::string old_table_name = "large_val_old"; - TableName old_tname( - old_table_name, TableType::Primary, TableEngine::EloqSql); - auto old_map = - std::make_unique, - CompositeRecord, - true, - true>>( - &shard, 0, old_tname, 1, nullptr, true); - - // "New" cc map: inserted after, so its pages have newer last_access_ts_. - std::string new_table_name = "large_val_new"; - TableName new_tname( - new_table_name, TableType::Primary, TableEngine::EloqSql); - auto new_map = - std::make_unique, - CompositeRecord, - true, - true>>( - &shard, 0, new_tname, 1, nullptr, true); - - auto make_keys = - [](const std::string &tname, - size_t cnt, - std::vector> &key_storage) - -> std::vector *> + std::string table_name = "large_val_test"; + TableName tname(table_name, TableType::Primary, TableEngine::EloqSql); + auto cc_map = std::make_unique, + CompositeRecord, + true, + true>>( + &shard, 0, tname, 1, nullptr, true); + + // Insert free (persistent, unlocked) entries. + std::vector> keys; + for (size_t i = 0; i < MAP_SIZE; i++) { - for (size_t i = 0; i < cnt; i++) - { - key_storage.emplace_back( - std::make_tuple(tname, static_cast(i))); - } - std::vector *> ptrs; - for (auto &k : key_storage) - { - ptrs.push_back(&k); - } - return ptrs; - }; - - std::vector> old_key_storage; - auto old_ptrs = make_keys(old_table_name, MAP_SIZE, old_key_storage); - // Insert old map first (its pages will have smaller last_access_ts_). - REQUIRE(old_map->BulkEmplaceFreeForTest(old_ptrs)); - REQUIRE(old_map->VerifyOrdering() == MAP_SIZE); - - std::vector> new_key_storage; - auto new_ptrs = make_keys(new_table_name, MAP_SIZE, new_key_storage); - // Insert new map after (its pages will have larger last_access_ts_). - REQUIRE(new_map->BulkEmplaceFreeForTest(new_ptrs)); - REQUIRE(new_map->VerifyOrdering() == MAP_SIZE); + keys.emplace_back(std::make_tuple(table_name, static_cast(i))); + } + std::vector *> key_ptrs; + for (auto &k : keys) + { + key_ptrs.push_back(&k); + } + REQUIRE(cc_map->BulkEmplaceFreeForTest(key_ptrs)); + REQUIRE(cc_map->VerifyOrdering() == MAP_SIZE); - // Give every entry a large payload (> threshold). + // Give every entry a large payload (> threshold we will use). auto large_payload = std::make_shared(42, LARGE_PAYLOAD_SIZE); - old_map->SetPayloadForTest(large_payload); - new_map->SetPayloadForTest(large_payload); + cc_map->SetPayloadForTest(large_payload); - // Enable large-value protection with threshold = LARGE_PAYLOAD_SIZE / 2. + // PART 1: Protection enabled. + // The scan visits a page with large values, boosts it to the LRU tail, + // and stops (returns tail_ccp_ as next). No entries are evicted. txservice_large_value_threshold = LARGE_PAYLOAD_SIZE / 2; - // The LRU list now has old_map pages near the head and new_map pages near - // the tail. For a page in old_map: - // page_age ≈ total_span (near the oldest end) - // page_age * 2 >= total_span → evictable - // For a page in new_map: - // page_age ≈ 0 (near the newest end) - // page_age * 2 < total_span → protected - size_t freed = 0; + size_t freed_with_protection = 0; while (true) { auto [free_cnt, yield] = shard.Clean(); @@ -318,20 +281,16 @@ TEST_CASE("Large-value eviction protection test", "[cc-page]") { break; } - freed += free_cnt; + freed_with_protection += free_cnt; } + REQUIRE(freed_with_protection == 0); + REQUIRE(cc_map->VerifyOrdering() == MAP_SIZE); - // Old-map entries (cold, in the older half) must all be evicted. - REQUIRE(freed == MAP_SIZE); - // New-map entries (hot, in the newer half) must all be retained. - REQUIRE(new_map->VerifyOrdering() == MAP_SIZE); - - // PART 2: With protection disabled, the remaining new_map entries must be - // evicted normally. + // PART 2: Protection disabled – all free entries must be evicted normally. shard.ResetCleanStart(); txservice_large_value_threshold = 0; - size_t freed2 = 0; + size_t freed_without_protection = 0; while (true) { auto [free_cnt, yield] = shard.Clean(); @@ -340,11 +299,11 @@ TEST_CASE("Large-value eviction protection test", "[cc-page]") { break; } - freed2 += free_cnt; + freed_without_protection += free_cnt; } - REQUIRE(freed2 == MAP_SIZE); + REQUIRE(freed_without_protection == MAP_SIZE); - // Restore global defaults. + // Restore global defaults so other tests are not affected. txservice_large_value_threshold = 0; local_cc_shards.Terminate(); From 8ced98f295d0ea2f4e769fa92bb4f9bba3c0b03a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 26 Feb 2026 03:11:11 +0000 Subject: [PATCH 07/15] Replace LRU-boost with zone-separation for large-value eviction protection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous approach (boost to tail when scan encounters a large-value page) was reactive: the page had to reach the LRU head before being boosted back. The user asked for a proactive approach: large-value pages should be harder to reach the head in the first place. New approach: zone separation in UpdateLruList. The LRU list is logically divided into two zones: head ← [small-value pages] ← lru_large_value_zone_head_ ← [large-value pages] ← tail * Large-value pages (has_large_value_ == true) are inserted at the true tail (most-recently-used end) by UpdateLruList. * Small-value pages are inserted just before lru_large_value_zone_head_ (the boundary). When no large-value pages exist the boundary is &tail_ccp_, so behaviour is identical to standard tail insertion. This means: * Large-value pages naturally stay near the tail regardless of how many small-value pages are accessed, because every small-value insertion goes before the large-value zone, not inside it. * The LRU eviction scan (head → tail) evicts small-value pages first. Large-value pages are only evicted after all small-value pages are gone, or when memory pressure is severe enough that the scan reaches the large-value zone. * No blocking in CanBeCleaned: large-value pages are still evictable — protection is positional, not absolute. Changes: - cc_entry.h: add has_large_value_ flag to LruPage. - cc_shard.h/cpp: add lru_large_value_zone_head_ pointer; maintain it in DetachLru, ReplaceLru, and UpdateLruList; add LruHead() / LruLargeValueZoneHead() getters for testing. - cc_page_clean_guard.h: CanBeCleaned sets page->has_large_value_ = true and signals CleanPageAndReBalance to re-zone the page; no longer returns {false, false} for large-value entries (no blocking). - template_cc_map.h: CleanPageAndReBalance calls UpdateLruList to re-zone a page when has_large_value_ was freshly set; scan continues from the pre-rezone next_page so no pages are skipped. Add RezoneAsLargeValueForTest(). - CcPage-Test.cpp: rewrite test to verify LRU list structure after re-zoning (small-value pages before zone_head, large-value pages from zone_head to tail) and that a new small-value insertion stays before the zone head. Co-authored-by: xiexiaoy <5558602+xiexiaoy@users.noreply.github.com> --- tx_service/include/cc/cc_entry.h | 10 ++ tx_service/include/cc/cc_page_clean_guard.h | 28 ++-- tx_service/include/cc/cc_shard.h | 38 ++++- tx_service/include/cc/template_cc_map.h | 56 ++++--- tx_service/include/tx_service_common.h | 10 +- tx_service/src/cc/cc_shard.cpp | 57 ++++++-- tx_service/tests/CcPage-Test.cpp | 153 ++++++++++++++------ 7 files changed, 256 insertions(+), 96 deletions(-) diff --git a/tx_service/include/cc/cc_entry.h b/tx_service/include/cc/cc_entry.h index adec3ef4..a3107399 100644 --- a/tx_service/include/cc/cc_entry.h +++ b/tx_service/include/cc/cc_entry.h @@ -2036,6 +2036,16 @@ struct LruPage // (the larger value is more recent). uint64_t last_access_ts_{0}; + // True when at least one entry on this page has a payload whose size + // exceeds txservice_large_value_threshold. Set lazily by CanBeCleaned when + // a large payload is first detected. Once set it is never cleared (even if + // the large entries are later evicted) because the flag is only used as a + // cheap signal to keep the page in the large-value zone. + // + // Large-value pages are kept in the tail (recent) zone of the LRU list so + // that they are evicted only after all small-value pages have been evicted. + bool has_large_value_{false}; + // The largest commit ts of dirty cc entries on this page. This value might // be larger than the actual max commit ts of cc entries. Currently used to // decide if this page has dirty data after a given ts. diff --git a/tx_service/include/cc/cc_page_clean_guard.h b/tx_service/include/cc/cc_page_clean_guard.h index 934dfe3d..42d2e9c4 100644 --- a/tx_service/include/cc/cc_page_clean_guard.h +++ b/tx_service/include/cc/cc_page_clean_guard.h @@ -157,9 +157,11 @@ struct CcPageCleanGuard return clean_obj_cnt_; } - // Returns true if CanBeCleaned refused to evict at least one entry because - // it has a large payload. When this is set, CleanPageAndReBalance will - // boost the page to the LRU tail so it does not accumulate at the head. + // Returns true if CanBeCleaned freshly set the has_large_value_ flag on + // the page during this clean pass (i.e. the page was just discovered to + // have a large-value entry for the first time). When this is set, + // CleanPageAndReBalance will call UpdateLruList to move the page from the + // small-value zone into the large-value zone immediately. bool HasBlockedLargeValue() const { return has_blocked_large_value_; @@ -415,17 +417,21 @@ struct CcPageCleanGuardWithoutKickoutCc return {false, false}; } - // Payload-size-aware eviction: protect large-value entries from - // eviction. When this guard refuses to evict an entry because of its - // payload size, it sets has_blocked_large_value_ so that - // CleanPageAndReBalance can boost the page to the LRU tail, preventing - // it from accumulating at the head and being repeatedly visited by the - // scan without making progress. + // Payload-size-aware eviction: mark the page as a large-value page so + // that UpdateLruList places it in the large-value zone (tail end) of + // the LRU list. The page is still evictable here — protection is + // enforced positionally (large-value pages are evicted only after all + // small-value pages). Setting has_blocked_large_value_ signals + // CleanPageAndReBalance to re-zone the page immediately via + // UpdateLruList in case it was in the small-value zone. if (txservice_large_value_threshold > 0 && cce->PayloadSize() > txservice_large_value_threshold) { - this->has_blocked_large_value_ = true; - return {false, false}; + if (!this->page_->has_large_value_) + { + this->page_->has_large_value_ = true; + this->has_blocked_large_value_ = true; + } } return {true, false}; diff --git a/tx_service/include/cc/cc_shard.h b/tx_service/include/cc/cc_shard.h index f8c16e68..091c85f0 100644 --- a/tx_service/include/cc/cc_shard.h +++ b/tx_service/include/cc/cc_shard.h @@ -1005,6 +1005,13 @@ class CcShard clean_start_ccp_ = ccp; } + /// Returns the sentinel head of the LRU list. The first real page is + /// LruHead()->lru_next_. Used by tests to traverse the list. + const LruPage *LruHead() const + { + return &head_ccp_; + } + bool OutOfMemory() { return clean_start_ccp_ != nullptr && clean_start_ccp_ == &tail_ccp_; @@ -1015,6 +1022,18 @@ class CcShard return access_counter_; } + /** + * @brief Returns the current head of the large-value zone — the first + * large-value page in the LRU list (the one closest to the sentinel head). + * + * Used for testing: verifies that the large-value zone is non-empty and + * correctly maintained. + */ + const LruPage *LruLargeValueZoneHead() const + { + return lru_large_value_zone_head_; + } + SystemHandler *GetSystemHandler() { return system_handler_; @@ -1281,25 +1300,30 @@ class CcShard * @brief A monotonically-increasing shard-wide counter. It is * incremented and assigned to a page's last_access_ts_ every time * UpdateLruList() is called, i.e. whenever any page in this shard is - * moved to the LRU tail on a read, write, or emplace operation. + * moved to its target position in the LRU list. * * Primary use: since a doubly-linked list provides no O(1) way to compare * the positions of two arbitrary nodes, comparing page1.last_access_ts_ * against page2.last_access_ts_ lets the merge/redistribute code determine * which of the two pages was accessed more recently without traversing the * list. - * - * Secondary use: when the large-value eviction guard protects a page - * (CcPageCleanGuardWithoutKickoutCc::CanBeCleaned), CleanPageAndReBalance - * calls UpdateLruList to boost the page to the tail. This keeps large- - * value pages away from the LRU head, preventing both premature eviction - * and wasted scan work. */ uint64_t access_counter_{0}; // Page to start looking for cc entries to kick out on LRU chain. LruPage *clean_start_ccp_; + // Head of the large-value zone in the LRU list. Large-value pages are + // clustered at the tail (recent) end of the list so they are evicted only + // after all small-value pages have been evicted. This pointer points to the + // first (oldest) large-value page, i.e. the boundary between the two zones: + // + // head ← [small-value pages] ← lru_large_value_zone_head_ ← [large-value + // pages] ← tail + // + // It equals &tail_ccp_ when no large-value pages are in the list. + LruPage *lru_large_value_zone_head_; + // The number of ccentry in all the ccmap of this ccshard. uint64_t size_; diff --git a/tx_service/include/cc/template_cc_map.h b/tx_service/include/cc/template_cc_map.h index 34cc6fe2..ec8c4b03 100644 --- a/tx_service/include/cc/template_cc_map.h +++ b/tx_service/include/cc/template_cc_map.h @@ -8722,12 +8722,11 @@ class TemplateCcMap : public CcMap lru_page); auto page_it = ccmp_.end(); - // Only collect the large-value block flag for regular LRU scans - // (no kickout_cc). Forced evictions (range migration etc.) should - // not be affected by the large-value protection. - bool has_blocked_large_value = false; - bool *out_flag = - (kickout_cc == nullptr) ? &has_blocked_large_value : nullptr; + // Only collect the re-zone flag for regular LRU scans (no kickout_cc). + // Forced evictions (range migration etc.) should not be affected by + // the large-value protection. + bool needs_rezoning = false; + bool *out_flag = (kickout_cc == nullptr) ? &needs_rezoning : nullptr; bool success = CleanPage(page, page_it, free_cnt, kickout_cc, out_flag); @@ -8737,22 +8736,21 @@ class TemplateCcMap : public CcMap *is_success = success; } + // Capture next_page BEFORE potentially moving page (re-zoning changes + // page->lru_next_). LruPage *next_page = RebalancePage(page, page_it, success, kickout_cc == nullptr); - // Large-value boost: if CanBeCleaned refused to evict at least one - // entry because of its payload size, move this page to the LRU tail. - // This prevents large-value pages from accumulating at the LRU head - // and being repeatedly scanned without progress. After calling - // UpdateLruList the page is at the tail (page->lru_next_ == &tail), - // so returning page->lru_next_ as the next scan page naturally stops - // the current scan. The next scan starts from the head, which will - // no longer contain this large-value page. - if (has_blocked_large_value && !page->Empty() && - page->lru_next_ != nullptr) + // Re-zone: CanBeCleaned just discovered that this page has a large- + // value entry (has_large_value_ was freshly set). Move the page into + // the large-value zone (LRU tail end) via UpdateLruList so that future + // small-value insertions are placed before it and it is only evicted + // after all small-value pages are gone. + if (needs_rezoning && !page->Empty() && page->lru_next_ != nullptr) { shard_->UpdateLruList(page, false); - next_page = page->lru_next_; // == &tail_ccp_ after the boost + // The scan should continue from the page we had lined up before + // the re-zone, not from the page's new position at the tail. } return {free_cnt, next_page}; @@ -9254,6 +9252,30 @@ class TemplateCcMap : public CcMap } } + /** + * @brief Marks all pages in this cc map as large-value pages and + * immediately re-zones them via UpdateLruList so that they are placed in + * the large-value zone (tail end) of the LRU list. Used in tests for the + * zone-separation eviction policy. + */ + void RezoneAsLargeValueForTest() + { + for (auto &[key, page_ptr] : ccmp_) + { + CcPage *page = + page_ptr.get(); + if (!page->has_large_value_) + { + page->has_large_value_ = true; + } + // Move page to large-value zone. + if (page->lru_next_ != nullptr) + { + shard_->UpdateLruList(page, false); + } + } + } + protected: void OnCommittedUpdate( const CcEntry *cce, diff --git a/tx_service/include/tx_service_common.h b/tx_service/include/tx_service_common.h index 714ed824..824f0f33 100644 --- a/tx_service/include/tx_service_common.h +++ b/tx_service/include/tx_service_common.h @@ -43,11 +43,11 @@ inline uint64_t txservice_max_standby_lag = 400000; // off, all data will be cached in memory. inline bool txservice_enable_cache_replacement = true; // Payload size threshold in bytes for large-value protection during cache -// eviction. When a free entry's payload size exceeds this threshold, the -// regular LRU scan will not evict it; instead it boosts the page to the LRU -// tail so it stays away from the head and is not repeatedly visited by the -// scan without making progress. A value of 0 disables large-value protection -// (default behaviour). +// eviction. When an entry's payload size exceeds this threshold its page is +// marked as a large-value page (has_large_value_ = true). Large-value pages +// are clustered at the tail (most-recently-used) end of the LRU list by +// UpdateLruList, so they are evicted only after all small-value pages have +// been evicted. A value of 0 disables large-value protection (default). inline size_t txservice_large_value_threshold = 0; // Whether to automatically redirect redis command to the leader node when the diff --git a/tx_service/src/cc/cc_shard.cpp b/tx_service/src/cc/cc_shard.cpp index b21df686..9ae140d3 100644 --- a/tx_service/src/cc/cc_shard.cpp +++ b/tx_service/src/cc/cc_shard.cpp @@ -94,6 +94,7 @@ CcShard::CcShard( head_ccp_(nullptr), tail_ccp_(nullptr), clean_start_ccp_(nullptr), + lru_large_value_zone_head_(nullptr), size_(0), ckpter_(nullptr), catalog_factory_{catalog_factory[0], @@ -127,6 +128,7 @@ CcShard::CcShard( head_ccp_.lru_next_ = &tail_ccp_; tail_ccp_.lru_prev_ = &head_ccp_; tail_ccp_.lru_next_ = nullptr; + lru_large_value_zone_head_ = &tail_ccp_; thd_token_.reserve((size_t) core_cnt_ + 1); for (size_t idx = 0; idx < core_cnt_; ++idx) @@ -845,6 +847,11 @@ void CcShard::DetachLru(LruPage *page) { clean_start_ccp_ = page->lru_next_; } + // If page is the head of the large-value zone, advance the zone head. + if (lru_large_value_zone_head_ == page) + { + lru_large_value_zone_head_ = page->lru_next_; + } assert(prev != nullptr && next != nullptr); prev->lru_next_ = next; next->lru_prev_ = prev; @@ -866,6 +873,12 @@ void CcShard::ReplaceLru(LruPage *old_page, LruPage *new_page) { clean_start_ccp_ = new_page; } + // The replacement page inherits the large-value flag and the zone head. + new_page->has_large_value_ = old_page->has_large_value_; + if (lru_large_value_zone_head_ == old_page) + { + lru_large_value_zone_head_ = new_page; + } lru_prev->lru_next_ = new_page; lru_next->lru_prev_ = new_page; new_page->lru_next_ = lru_next; @@ -883,30 +896,52 @@ void CcShard::UpdateLruList(LruPage *page, bool is_emplace) assert(page->lru_next_ == nullptr && page->lru_prev_ == nullptr); return; } - // page already at the tail, do nothing - if (page->lru_next_ == &tail_ccp_ && tail_ccp_.lru_prev_ == page) + + // Determine insertion point depending on whether the page has large values. + // + // Large-value pages (has_large_value_ == true) always go at the true tail + // (most-recently-used end). This clusters them in the tail zone and ensures + // they are evicted only after all small-value pages have been evicted. + // + // Small-value pages (has_large_value_ == false) are inserted just before + // the large-value zone (at lru_large_value_zone_head_->lru_prev_). When + // there are no large-value pages lru_large_value_zone_head_ == &tail_ccp_, + // so the behaviour is identical to the standard tail insertion. + LruPage *insert_before = + page->has_large_value_ ? &tail_ccp_ : lru_large_value_zone_head_; + + // page already at the correct insertion position, just update the counter. + if (page->lru_next_ == insert_before && insert_before->lru_prev_ == page) { ++access_counter_; page->last_access_ts_ = access_counter_; return; } - // Removes the page from the list, if it's already in the list. This is - // used to keep the updated page at the end(tail) of the LRU list. A - // page's prev and post are both not-null when the page is in the - // list. This is because we have a reserved head and tail for the list. + + // Remove the page from its current position in the list (if present). if (page->lru_next_ != nullptr) { DetachLru(page); } - LruPage *second_tail = tail_ccp_.lru_prev_; - second_tail->lru_next_ = page; - tail_ccp_.lru_prev_ = page; - page->lru_next_ = &tail_ccp_; - page->lru_prev_ = second_tail; + + // Insert page immediately before insert_before. + LruPage *insert_after = insert_before->lru_prev_; + insert_after->lru_next_ = page; + insert_before->lru_prev_ = page; + page->lru_next_ = insert_before; + page->lru_prev_ = insert_after; ++access_counter_; page->last_access_ts_ = access_counter_; + // Maintain lru_large_value_zone_head_: when a large-value page is inserted + // and the zone was empty (lru_large_value_zone_head_ == &tail_ccp_), the + // new page becomes the zone head. + if (page->has_large_value_ && lru_large_value_zone_head_ == &tail_ccp_) + { + lru_large_value_zone_head_ = page; + } + // If the update is a emplace update, these new loaded data might be // kickable from cc map. Usually if the clean_start_page is at tail we're // not able to load new data into memory, except some special case where we diff --git a/tx_service/tests/CcPage-Test.cpp b/tx_service/tests/CcPage-Test.cpp index 9dfbea1c..d102d62f 100644 --- a/tx_service/tests/CcPage-Test.cpp +++ b/tx_service/tests/CcPage-Test.cpp @@ -237,75 +237,138 @@ TEST_CASE("Large-value eviction protection test", "[cc-page]") nullptr, &raft_path); - const size_t MAP_SIZE = 500; - const size_t LARGE_PAYLOAD_SIZE = 1024; // 1 KB per entry - - std::string table_name = "large_val_test"; - TableName tname(table_name, TableType::Primary, TableEngine::EloqSql); - auto cc_map = std::make_unique, - CompositeRecord, - true, - true>>( - &shard, 0, tname, 1, nullptr, true); - - // Insert free (persistent, unlocked) entries. - std::vector> keys; - for (size_t i = 0; i < MAP_SIZE; i++) - { - keys.emplace_back(std::make_tuple(table_name, static_cast(i))); - } - std::vector *> key_ptrs; - for (auto &k : keys) + const size_t MAP_SIZE = 200; + const size_t LARGE_PAYLOAD_SIZE = 1024; + + using TestCcMap = TemplateCcMap, + CompositeRecord, + true, + true>; + + // Small-value map – pages stay in the head (small-value) zone. + std::string small_table = "small_val_test"; + TableName small_tname( + small_table, TableType::Primary, TableEngine::EloqSql); + auto small_map = + std::make_unique(&shard, 0, small_tname, 1, nullptr, true); + + // Large-value map – pages will be re-zoned to the tail (large-value) zone. + std::string large_table = "large_val_test"; + TableName large_tname( + large_table, TableType::Primary, TableEngine::EloqSql); + auto large_map = + std::make_unique(&shard, 0, large_tname, 1, nullptr, true); + + auto make_keys = [](const std::string &tname, + size_t cnt, + std::vector> &storage) + -> std::vector *> { - key_ptrs.push_back(&k); - } - REQUIRE(cc_map->BulkEmplaceFreeForTest(key_ptrs)); - REQUIRE(cc_map->VerifyOrdering() == MAP_SIZE); + for (size_t i = 0; i < cnt; i++) + { + storage.emplace_back(std::make_tuple(tname, static_cast(i))); + } + std::vector *> ptrs; + for (auto &k : storage) + { + ptrs.push_back(&k); + } + return ptrs; + }; + + // Insert entries for both maps. Both maps start in the small-value zone + // because has_large_value_ is false on insertion. + std::vector> small_keys; + REQUIRE(small_map->BulkEmplaceFreeForTest( + make_keys(small_table, MAP_SIZE, small_keys))); + REQUIRE(small_map->VerifyOrdering() == MAP_SIZE); + + std::vector> large_keys; + REQUIRE(large_map->BulkEmplaceFreeForTest( + make_keys(large_table, MAP_SIZE, large_keys))); + REQUIRE(large_map->VerifyOrdering() == MAP_SIZE); - // Give every entry a large payload (> threshold we will use). + // Assign large payloads to the large-value map entries. auto large_payload = std::make_shared(42, LARGE_PAYLOAD_SIZE); - cc_map->SetPayloadForTest(large_payload); + large_map->SetPayloadForTest(large_payload); - // PART 1: Protection enabled. - // The scan visits a page with large values, boosts it to the LRU tail, - // and stops (returns tail_ccp_ as next). No entries are evicted. txservice_large_value_threshold = LARGE_PAYLOAD_SIZE / 2; - size_t freed_with_protection = 0; - while (true) + // PART 1: Verify zone-separation structure. + // + // Use RezoneAsLargeValueForTest() to set has_large_value_ on large_map + // pages and call UpdateLruList to move them into the large-value zone. + // This simulates what happens in production when those pages are accessed + // after CanBeCleaned has set has_large_value_ on them. + large_map->RezoneAsLargeValueForTest(); + shard.VerifyLruList(); + + // lru_large_value_zone_head_ must now point into the large-value zone. + const LruPage *zone_head = shard.LruLargeValueZoneHead(); + REQUIRE(zone_head != nullptr); + REQUIRE(zone_head->parent_map_ != nullptr); // not a sentinel + + // Walk the LRU list and verify: + // head → [small_map pages] → zone_head → [large_map pages] → tail { - auto [free_cnt, yield] = shard.Clean(); - shard.VerifyLruList(); - if (free_cnt == 0) + bool in_large_zone = false; + for (const LruPage *p = shard.LruHead()->lru_next_; + p->parent_map_ != nullptr; // sentinel tail has parent_map_==null + p = p->lru_next_) { - break; + if (p == zone_head) + { + in_large_zone = true; + } + if (in_large_zone) + { + REQUIRE(p->parent_map_ == large_map.get()); + } + else + { + REQUIRE(p->parent_map_ == small_map.get()); + } } - freed_with_protection += free_cnt; + // We must have entered the large zone. + REQUIRE(in_large_zone); } - REQUIRE(freed_with_protection == 0); - REQUIRE(cc_map->VerifyOrdering() == MAP_SIZE); - // PART 2: Protection disabled – all free entries must be evicted normally. - shard.ResetCleanStart(); - txservice_large_value_threshold = 0; + // PART 2: Verify a small-value page access stays in the small-value zone. + // + // Access the first small_map page (simulate a read by looking up a key via + // VerifyOrdering which scans ccmp_; UpdateLruList is not called there, so + // we simulate by checking the zone directly). The zone_head must not + // change after inserting a new small-value page. + const LruPage *zone_head_before = shard.LruLargeValueZoneHead(); + + // Insert one more small-value key and verify it lands BEFORE zone_head. + CompositeKey extra_key = + std::make_tuple(small_table, static_cast(MAP_SIZE + 1)); + std::vector *> extra_ptr = {&extra_key}; + REQUIRE(small_map->BulkEmplaceFreeForTest(extra_ptr)); + shard.VerifyLruList(); + + // The zone head must not have changed (the new small-value page is before + // the zone head, not inside the large-value zone). + REQUIRE(shard.LruLargeValueZoneHead() == zone_head_before); - size_t freed_without_protection = 0; + // PART 3: Full scan – all pages are evicted (CTEST mode scans everything). + size_t total_freed = 0; while (true) { auto [free_cnt, yield] = shard.Clean(); shard.VerifyLruList(); + total_freed += free_cnt; if (free_cnt == 0) { break; } - freed_without_protection += free_cnt; } - REQUIRE(freed_without_protection == MAP_SIZE); + // Small pages + the extra key + large pages must all be gone. + REQUIRE(total_freed == MAP_SIZE + 1 + MAP_SIZE); - // Restore global defaults so other tests are not affected. txservice_large_value_threshold = 0; - local_cc_shards.Terminate(); } From f0ab3b9ba20a01c302d0aadc1d50d29a915080e0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 26 Feb 2026 03:56:31 +0000 Subject: [PATCH 08/15] Add zone-ratio control analogous to SLRU protected-segment cap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The zone-separation algorithm is analogous to SLRU in the following way: SLRU | Our algorithm --------------------------------|------------------------------ Classification: access freq | Classification: payload size Protected tail ← LRU re-hit | LV zone tail ← large payload detected Probationary head ← demotion | SV zone boundary ← demotion (zone head++) Eviction: probationary head | Eviction: SV zone head (LRU order) Fixed segment size | Configurable txservice_large_value_zone_max_ratio Key difference: SLRU promotes/demotes based on whether a page is re-accessed; we promote/demote based on payload size. The ratio- enforcement mechanism is structurally identical. Without a ratio limit, the LV zone can grow unboundedly (e.g. if every entry has a large payload, the entire LRU list becomes the LV zone). This change adds: - txservice_large_value_zone_max_ratio (default 1.0 = no limit): when the LV zone would exceed max_ratio * total_lru_page_count_, the oldest LV page (lru_large_value_zone_head_) is demoted to the SV zone by advancing the zone head pointer (O(1), no list surgery). The demoted page stays in the list unchanged; it is re-promoted if accessed again. - bool in_large_value_zone_ in LruPage: tracks current zone membership for O(1) count maintenance in DetachLru without list traversal. - uint64_t large_value_zone_page_count_ and total_lru_page_count_ in CcShard: maintained by UpdateLruList (++total on insert, ++lv on LV insert; demotion decrements lv) and DetachLru (--total, --lv if in LV zone). LargeValueZonePageCount() / TotalLruPageCount() getters expose these for testing. - size_t PageCount() const added to TemplateCcMap for test convenience. - Test extended: PART 1 verifies zone structure + page counts with max_ratio=1.0 (no limit). PART 2 verifies ratio enforcement: inserting 1 extra LV page with max_ratio=0.5 triggers exactly 1 demotion, and the LV zone count satisfies the ratio invariant afterwards. Co-authored-by: xiexiaoy <5558602+xiexiaoy@users.noreply.github.com> --- tx_service/include/cc/cc_entry.h | 7 ++ tx_service/include/cc/cc_shard.h | 25 +++++++ tx_service/include/cc/template_cc_map.h | 7 ++ tx_service/include/tx_service_common.h | 24 +++++++ tx_service/src/cc/cc_shard.cpp | 56 ++++++++++++++-- tx_service/tests/CcPage-Test.cpp | 88 ++++++++++++++++++++----- 6 files changed, 183 insertions(+), 24 deletions(-) diff --git a/tx_service/include/cc/cc_entry.h b/tx_service/include/cc/cc_entry.h index a3107399..3fdc45f8 100644 --- a/tx_service/include/cc/cc_entry.h +++ b/tx_service/include/cc/cc_entry.h @@ -2046,6 +2046,13 @@ struct LruPage // that they are evicted only after all small-value pages have been evicted. bool has_large_value_{false}; + // True when this page is currently residing in the large-value zone + // (i.e. at or after lru_large_value_zone_head_ in the LRU list). Maintained + // by UpdateLruList and cleared by DetachLru. Used by DetachLru to update + // CcShard::large_value_zone_page_count_ in O(1) without traversing the + // list. + bool in_large_value_zone_{false}; + // The largest commit ts of dirty cc entries on this page. This value might // be larger than the actual max commit ts of cc entries. Currently used to // decide if this page has dirty data after a given ts. diff --git a/tx_service/include/cc/cc_shard.h b/tx_service/include/cc/cc_shard.h index 091c85f0..5449cdab 100644 --- a/tx_service/include/cc/cc_shard.h +++ b/tx_service/include/cc/cc_shard.h @@ -1034,6 +1034,20 @@ class CcShard return lru_large_value_zone_head_; } + /// Number of pages currently in the large-value zone. Used in tests to + /// verify zone-ratio enforcement. + uint64_t LargeValueZonePageCount() const + { + return large_value_zone_page_count_; + } + + /// Total number of data pages currently in the LRU list (both zones). + /// Used in tests to verify zone-ratio enforcement. + uint64_t TotalLruPageCount() const + { + return total_lru_page_count_; + } + SystemHandler *GetSystemHandler() { return system_handler_; @@ -1324,6 +1338,17 @@ class CcShard // It equals &tail_ccp_ when no large-value pages are in the list. LruPage *lru_large_value_zone_head_; + // Number of data pages currently in the large-value zone (between + // lru_large_value_zone_head_ and the tail sentinel, inclusive of zone + // head). + uint64_t large_value_zone_page_count_{0}; + + // Total number of data pages currently in the LRU list (both zones + // combined). Maintained by UpdateLruList (increment on new insertion) and + // DetachLru (decrement on removal). Together with + // large_value_zone_page_count_ this enables O(1) zone-ratio enforcement. + uint64_t total_lru_page_count_{0}; + // The number of ccentry in all the ccmap of this ccshard. uint64_t size_; diff --git a/tx_service/include/cc/template_cc_map.h b/tx_service/include/cc/template_cc_map.h index ec8c4b03..d5e40562 100644 --- a/tx_service/include/cc/template_cc_map.h +++ b/tx_service/include/cc/template_cc_map.h @@ -9276,6 +9276,13 @@ class TemplateCcMap : public CcMap } } + /// Returns the number of CcPage objects (btree nodes) in this cc map. + /// Used in tests that verify zone-page-count invariants. + size_t PageCount() const + { + return ccmp_.size(); + } + protected: void OnCommittedUpdate( const CcEntry *cce, diff --git a/tx_service/include/tx_service_common.h b/tx_service/include/tx_service_common.h index 824f0f33..237140cc 100644 --- a/tx_service/include/tx_service_common.h +++ b/tx_service/include/tx_service_common.h @@ -50,6 +50,30 @@ inline bool txservice_enable_cache_replacement = true; // been evicted. A value of 0 disables large-value protection (default). inline size_t txservice_large_value_threshold = 0; +// Maximum fraction (0.0 < max_ratio <= 1.0) of all LRU pages that the +// large-value zone may occupy at any time. +// +// The algorithm is analogous to SLRU's protected-segment capacity: +// +// SLRU | Our zone-separation algorithm +// -------------------------------|------------------------------ +// Classification: access freq | Classification: payload size +// Protected tail ← LRU re-hit | LV zone tail ← large payload detected +// Probationary head ← demotion | SV zone boundary ← demotion +// Fixed segment size | Configurable max_ratio +// +// When a large-value page is inserted into the LV zone and the zone would +// exceed max_ratio * total_lru_page_count_, the oldest LV page (zone head) +// is demoted to the SV zone: the zone head pointer is advanced one step +// forward (O(1), no list surgery). The demoted page remains in the list at +// its current position, now in the SV zone. On next access it tries to +// re-enter the LV zone; if the zone is still full it will be demoted again, +// matching classic SLRU promotion/demotion semantics. +// +// Set to 1.0 (default) to impose no ratio limit. Set to 0.5 to allow at +// most 50% of LRU pages in the large-value zone. +inline double txservice_large_value_zone_max_ratio = 1.0; + // Whether to automatically redirect redis command to the leader node when the // data is not on the local node. inline bool txservice_auto_redirect_redis_cmd = true; diff --git a/tx_service/src/cc/cc_shard.cpp b/tx_service/src/cc/cc_shard.cpp index 9ae140d3..6ae3dfd0 100644 --- a/tx_service/src/cc/cc_shard.cpp +++ b/tx_service/src/cc/cc_shard.cpp @@ -852,6 +852,16 @@ void CcShard::DetachLru(LruPage *page) { lru_large_value_zone_head_ = page->lru_next_; } + // Update zone-page counts before unlinking so the flags are still valid. + if (page->in_large_value_zone_) + { + assert(large_value_zone_page_count_ > 0); + --large_value_zone_page_count_; + page->in_large_value_zone_ = false; + } + assert(total_lru_page_count_ > 0); + --total_lru_page_count_; + assert(prev != nullptr && next != nullptr); prev->lru_next_ = next; next->lru_prev_ = prev; @@ -873,8 +883,10 @@ void CcShard::ReplaceLru(LruPage *old_page, LruPage *new_page) { clean_start_ccp_ = new_page; } - // The replacement page inherits the large-value flag and the zone head. + // The replacement page inherits the large-value flag, zone membership, and + // the zone-head role. Page counts do not change (1:1 replacement). new_page->has_large_value_ = old_page->has_large_value_; + new_page->in_large_value_zone_ = old_page->in_large_value_zone_; if (lru_large_value_zone_head_ == old_page) { lru_large_value_zone_head_ = new_page; @@ -919,6 +931,8 @@ void CcShard::UpdateLruList(LruPage *page, bool is_emplace) } // Remove the page from its current position in the list (if present). + // DetachLru also decrements total_lru_page_count_ and, if the page was in + // the LV zone, decrements large_value_zone_page_count_. if (page->lru_next_ != nullptr) { DetachLru(page); @@ -934,12 +948,42 @@ void CcShard::UpdateLruList(LruPage *page, bool is_emplace) ++access_counter_; page->last_access_ts_ = access_counter_; - // Maintain lru_large_value_zone_head_: when a large-value page is inserted - // and the zone was empty (lru_large_value_zone_head_ == &tail_ccp_), the - // new page becomes the zone head. - if (page->has_large_value_ && lru_large_value_zone_head_ == &tail_ccp_) + // Update page counts after re-insertion. + ++total_lru_page_count_; + if (page->has_large_value_) + { + page->in_large_value_zone_ = true; + ++large_value_zone_page_count_; + + // Maintain lru_large_value_zone_head_: when a large-value page is + // inserted and the zone was empty, the new page becomes the zone head. + if (lru_large_value_zone_head_ == &tail_ccp_) + { + lru_large_value_zone_head_ = page; + } + + // Zone-ratio enforcement (analogous to SLRU protected-segment cap): + // if the large-value zone now exceeds + // txservice_large_value_zone_max_ratio of all LRU pages, demote the + // oldest LV page(s) until the zone is within the limit. Each demotion + // advances lru_large_value_zone_head_ one step (O(1), no list surgery). + // The demoted page stays in the list at its current position, now + // belonging to the SV zone; it will be re-promoted if accessed again. + while (txservice_large_value_zone_max_ratio < 1.0 && + lru_large_value_zone_head_ != &tail_ccp_ && + large_value_zone_page_count_ > + static_cast(total_lru_page_count_ * + txservice_large_value_zone_max_ratio)) + { + LruPage *demote = lru_large_value_zone_head_; + lru_large_value_zone_head_ = demote->lru_next_; + demote->in_large_value_zone_ = false; + --large_value_zone_page_count_; + } + } + else { - lru_large_value_zone_head_ = page; + page->in_large_value_zone_ = false; } // If the update is a emplace update, these new loaded data might be diff --git a/tx_service/tests/CcPage-Test.cpp b/tx_service/tests/CcPage-Test.cpp index d102d62f..21000b11 100644 --- a/tx_service/tests/CcPage-Test.cpp +++ b/tx_service/tests/CcPage-Test.cpp @@ -295,8 +295,9 @@ TEST_CASE("Large-value eviction protection test", "[cc-page]") txservice_large_value_threshold = LARGE_PAYLOAD_SIZE / 2; - // PART 1: Verify zone-separation structure. - // + // ----------------------------------------------------------------------- + // PART 1: Zone-separation structure (no ratio limit, max_ratio == 1.0). + // ----------------------------------------------------------------------- // Use RezoneAsLargeValueForTest() to set has_large_value_ on large_map // pages and call UpdateLruList to move them into the large-value zone. // This simulates what happens in production when those pages are accessed @@ -309,6 +310,12 @@ TEST_CASE("Large-value eviction protection test", "[cc-page]") REQUIRE(zone_head != nullptr); REQUIRE(zone_head->parent_map_ != nullptr); // not a sentinel + // With max_ratio == 1.0 (no limit), ALL MAP_SIZE large-value pages are in + // the LV zone. + REQUIRE(shard.LargeValueZonePageCount() == small_map->PageCount()); + REQUIRE(shard.TotalLruPageCount() == + small_map->PageCount() + large_map->PageCount()); + // Walk the LRU list and verify: // head → [small_map pages] → zone_head → [large_map pages] → tail { @@ -334,26 +341,69 @@ TEST_CASE("Large-value eviction protection test", "[cc-page]") REQUIRE(in_large_zone); } - // PART 2: Verify a small-value page access stays in the small-value zone. + // ----------------------------------------------------------------------- + // PART 2: Zone-ratio enforcement. // - // Access the first small_map page (simulate a read by looking up a key via - // VerifyOrdering which scans ccmp_; UpdateLruList is not called there, so - // we simulate by checking the zone directly). The zone_head must not - // change after inserting a new small-value page. - const LruPage *zone_head_before = shard.LruLargeValueZoneHead(); + // Analogous to SLRU's protected-segment cap: when the LV zone would exceed + // max_ratio * total_pages, the oldest LV page is demoted to the SV zone + // (zone head pointer advanced one step, O(1), no list surgery). + // + // We now restrict max_ratio to 0.5 and re-zone the large_map pages again. + // With MAP_SIZE SV pages and MAP_SIZE LV pages (total 2*MAP_SIZE), the LV + // zone is capped at floor(2*MAP_SIZE * 0.5) = MAP_SIZE pages. Since the + // LV pages equal exactly the cap, no demotion is expected (strict > check). + // Then insert one additional LV page: the cap is exceeded by 1, triggering + // exactly 1 demotion. + // ----------------------------------------------------------------------- + txservice_large_value_zone_max_ratio = 0.5; + const uint64_t total_pages = shard.TotalLruPageCount(); + const uint64_t max_lv = static_cast(total_pages * 0.5); + + // Current LV count must be <= max_lv (no demotion happened yet with 1.0 + // ratio; re-zoning was already done above). + REQUIRE(shard.LargeValueZonePageCount() <= max_lv); + + // Now add one more large-value entry to a new large-value map, which + // will cause the LV zone to go over max_lv and trigger a demotion. + std::string extra_large_table = "extra_large_val"; + TableName extra_large_tname( + extra_large_table, TableType::Primary, TableEngine::EloqSql); + auto extra_large_map = std::make_unique( + &shard, 0, extra_large_tname, 1, nullptr, true); + + CompositeKey extra_lv_key = + std::make_tuple(extra_large_table, 0); + std::vector *> extra_lv_ptr = { + &extra_lv_key}; + REQUIRE(extra_large_map->BulkEmplaceFreeForTest(extra_lv_ptr)); + + // Give it a large payload so it gets re-zoned to the LV zone. + extra_large_map->SetPayloadForTest(large_payload); + extra_large_map->RezoneAsLargeValueForTest(); + shard.VerifyLruList(); + + // After the demotion the LV zone must satisfy the ratio constraint. + const uint64_t new_total = shard.TotalLruPageCount(); + const uint64_t new_lv = shard.LargeValueZonePageCount(); + REQUIRE(new_lv <= static_cast( + new_total * txservice_large_value_zone_max_ratio)); - // Insert one more small-value key and verify it lands BEFORE zone_head. - CompositeKey extra_key = + // ----------------------------------------------------------------------- + // PART 3: Small-value insertion stays before the zone head. + // ----------------------------------------------------------------------- + const LruPage *zone_head_before = shard.LruLargeValueZoneHead(); + CompositeKey extra_sv_key = std::make_tuple(small_table, static_cast(MAP_SIZE + 1)); - std::vector *> extra_ptr = {&extra_key}; - REQUIRE(small_map->BulkEmplaceFreeForTest(extra_ptr)); + std::vector *> extra_sv_ptr = { + &extra_sv_key}; + REQUIRE(small_map->BulkEmplaceFreeForTest(extra_sv_ptr)); shard.VerifyLruList(); - - // The zone head must not have changed (the new small-value page is before - // the zone head, not inside the large-value zone). + // SV insertion must not change the zone head. REQUIRE(shard.LruLargeValueZoneHead() == zone_head_before); - // PART 3: Full scan – all pages are evicted (CTEST mode scans everything). + // ----------------------------------------------------------------------- + // PART 4: Full scan – all pages are evicted. + // ----------------------------------------------------------------------- size_t total_freed = 0; while (true) { @@ -365,10 +415,12 @@ TEST_CASE("Large-value eviction protection test", "[cc-page]") break; } } - // Small pages + the extra key + large pages must all be gone. - REQUIRE(total_freed == MAP_SIZE + 1 + MAP_SIZE); + REQUIRE(total_freed == + MAP_SIZE + MAP_SIZE + 1 /* extra sv */ + 1 /* extra lv */); + // Restore global defaults. txservice_large_value_threshold = 0; + txservice_large_value_zone_max_ratio = 1.0; local_cc_shards.Terminate(); } From 991a12ff98ee5271468a4f3549c9cc94c5bd1ed3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 27 Feb 2026 03:21:44 +0000 Subject: [PATCH 09/15] Remove fixed-capacity zone-ratio control; keep zone-separation pointer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The system uses mimalloc heap for memory statistics and pages are not fixed size, so a fixed-capacity ratio (large_value_zone_max_ratio) based on page count does not map to actual memory usage and is removed. The zone-separation approach is retained unchanged: head ← [small-value pages] ← lru_large_value_zone_head_ ← [large-value pages] ← tail Small-value pages are inserted before lru_large_value_zone_head_. Large-value pages are inserted before &tail_ccp_ (true MRU end). In the extreme case where all entries are large-value, the SV zone will be empty and SV pages will be evicted immediately even if accessed frequently — this is explicitly accepted. Removed: - txservice_large_value_zone_max_ratio global (tx_service_common.h) - bool in_large_value_zone_ from LruPage (cc_entry.h) - uint64_t large_value_zone_page_count_ / total_lru_page_count_ from CcShard (cc_shard.h) - LargeValueZonePageCount() / TotalLruPageCount() getters (cc_shard.h) - Count maintenance (++/-- in UpdateLruList, DetachLru) (cc_shard.cpp) - in_large_value_zone_ inheritance in ReplaceLru (cc_shard.cpp) - Ratio-enforcement while loop from UpdateLruList (cc_shard.cpp) - PART 2 ratio-enforcement test section (CcPage-Test.cpp) - References to removed globals/methods in test (CcPage-Test.cpp) Co-authored-by: xiexiaoy <5558602+xiexiaoy@users.noreply.github.com> --- tx_service/include/cc/cc_entry.h | 7 --- tx_service/include/cc/cc_shard.h | 25 ---------- tx_service/include/tx_service_common.h | 24 ---------- tx_service/src/cc/cc_shard.cpp | 57 +++-------------------- tx_service/tests/CcPage-Test.cpp | 63 ++------------------------ 5 files changed, 11 insertions(+), 165 deletions(-) diff --git a/tx_service/include/cc/cc_entry.h b/tx_service/include/cc/cc_entry.h index 3fdc45f8..a3107399 100644 --- a/tx_service/include/cc/cc_entry.h +++ b/tx_service/include/cc/cc_entry.h @@ -2046,13 +2046,6 @@ struct LruPage // that they are evicted only after all small-value pages have been evicted. bool has_large_value_{false}; - // True when this page is currently residing in the large-value zone - // (i.e. at or after lru_large_value_zone_head_ in the LRU list). Maintained - // by UpdateLruList and cleared by DetachLru. Used by DetachLru to update - // CcShard::large_value_zone_page_count_ in O(1) without traversing the - // list. - bool in_large_value_zone_{false}; - // The largest commit ts of dirty cc entries on this page. This value might // be larger than the actual max commit ts of cc entries. Currently used to // decide if this page has dirty data after a given ts. diff --git a/tx_service/include/cc/cc_shard.h b/tx_service/include/cc/cc_shard.h index 5449cdab..091c85f0 100644 --- a/tx_service/include/cc/cc_shard.h +++ b/tx_service/include/cc/cc_shard.h @@ -1034,20 +1034,6 @@ class CcShard return lru_large_value_zone_head_; } - /// Number of pages currently in the large-value zone. Used in tests to - /// verify zone-ratio enforcement. - uint64_t LargeValueZonePageCount() const - { - return large_value_zone_page_count_; - } - - /// Total number of data pages currently in the LRU list (both zones). - /// Used in tests to verify zone-ratio enforcement. - uint64_t TotalLruPageCount() const - { - return total_lru_page_count_; - } - SystemHandler *GetSystemHandler() { return system_handler_; @@ -1338,17 +1324,6 @@ class CcShard // It equals &tail_ccp_ when no large-value pages are in the list. LruPage *lru_large_value_zone_head_; - // Number of data pages currently in the large-value zone (between - // lru_large_value_zone_head_ and the tail sentinel, inclusive of zone - // head). - uint64_t large_value_zone_page_count_{0}; - - // Total number of data pages currently in the LRU list (both zones - // combined). Maintained by UpdateLruList (increment on new insertion) and - // DetachLru (decrement on removal). Together with - // large_value_zone_page_count_ this enables O(1) zone-ratio enforcement. - uint64_t total_lru_page_count_{0}; - // The number of ccentry in all the ccmap of this ccshard. uint64_t size_; diff --git a/tx_service/include/tx_service_common.h b/tx_service/include/tx_service_common.h index 237140cc..824f0f33 100644 --- a/tx_service/include/tx_service_common.h +++ b/tx_service/include/tx_service_common.h @@ -50,30 +50,6 @@ inline bool txservice_enable_cache_replacement = true; // been evicted. A value of 0 disables large-value protection (default). inline size_t txservice_large_value_threshold = 0; -// Maximum fraction (0.0 < max_ratio <= 1.0) of all LRU pages that the -// large-value zone may occupy at any time. -// -// The algorithm is analogous to SLRU's protected-segment capacity: -// -// SLRU | Our zone-separation algorithm -// -------------------------------|------------------------------ -// Classification: access freq | Classification: payload size -// Protected tail ← LRU re-hit | LV zone tail ← large payload detected -// Probationary head ← demotion | SV zone boundary ← demotion -// Fixed segment size | Configurable max_ratio -// -// When a large-value page is inserted into the LV zone and the zone would -// exceed max_ratio * total_lru_page_count_, the oldest LV page (zone head) -// is demoted to the SV zone: the zone head pointer is advanced one step -// forward (O(1), no list surgery). The demoted page remains in the list at -// its current position, now in the SV zone. On next access it tries to -// re-enter the LV zone; if the zone is still full it will be demoted again, -// matching classic SLRU promotion/demotion semantics. -// -// Set to 1.0 (default) to impose no ratio limit. Set to 0.5 to allow at -// most 50% of LRU pages in the large-value zone. -inline double txservice_large_value_zone_max_ratio = 1.0; - // Whether to automatically redirect redis command to the leader node when the // data is not on the local node. inline bool txservice_auto_redirect_redis_cmd = true; diff --git a/tx_service/src/cc/cc_shard.cpp b/tx_service/src/cc/cc_shard.cpp index 6ae3dfd0..b20a3a9c 100644 --- a/tx_service/src/cc/cc_shard.cpp +++ b/tx_service/src/cc/cc_shard.cpp @@ -852,16 +852,6 @@ void CcShard::DetachLru(LruPage *page) { lru_large_value_zone_head_ = page->lru_next_; } - // Update zone-page counts before unlinking so the flags are still valid. - if (page->in_large_value_zone_) - { - assert(large_value_zone_page_count_ > 0); - --large_value_zone_page_count_; - page->in_large_value_zone_ = false; - } - assert(total_lru_page_count_ > 0); - --total_lru_page_count_; - assert(prev != nullptr && next != nullptr); prev->lru_next_ = next; next->lru_prev_ = prev; @@ -883,10 +873,9 @@ void CcShard::ReplaceLru(LruPage *old_page, LruPage *new_page) { clean_start_ccp_ = new_page; } - // The replacement page inherits the large-value flag, zone membership, and - // the zone-head role. Page counts do not change (1:1 replacement). + // The replacement page inherits the large-value flag and the zone-head + // role. new_page->has_large_value_ = old_page->has_large_value_; - new_page->in_large_value_zone_ = old_page->in_large_value_zone_; if (lru_large_value_zone_head_ == old_page) { lru_large_value_zone_head_ = new_page; @@ -931,8 +920,6 @@ void CcShard::UpdateLruList(LruPage *page, bool is_emplace) } // Remove the page from its current position in the list (if present). - // DetachLru also decrements total_lru_page_count_ and, if the page was in - // the LV zone, decrements large_value_zone_page_count_. if (page->lru_next_ != nullptr) { DetachLru(page); @@ -948,42 +935,12 @@ void CcShard::UpdateLruList(LruPage *page, bool is_emplace) ++access_counter_; page->last_access_ts_ = access_counter_; - // Update page counts after re-insertion. - ++total_lru_page_count_; - if (page->has_large_value_) - { - page->in_large_value_zone_ = true; - ++large_value_zone_page_count_; - - // Maintain lru_large_value_zone_head_: when a large-value page is - // inserted and the zone was empty, the new page becomes the zone head. - if (lru_large_value_zone_head_ == &tail_ccp_) - { - lru_large_value_zone_head_ = page; - } - - // Zone-ratio enforcement (analogous to SLRU protected-segment cap): - // if the large-value zone now exceeds - // txservice_large_value_zone_max_ratio of all LRU pages, demote the - // oldest LV page(s) until the zone is within the limit. Each demotion - // advances lru_large_value_zone_head_ one step (O(1), no list surgery). - // The demoted page stays in the list at its current position, now - // belonging to the SV zone; it will be re-promoted if accessed again. - while (txservice_large_value_zone_max_ratio < 1.0 && - lru_large_value_zone_head_ != &tail_ccp_ && - large_value_zone_page_count_ > - static_cast(total_lru_page_count_ * - txservice_large_value_zone_max_ratio)) - { - LruPage *demote = lru_large_value_zone_head_; - lru_large_value_zone_head_ = demote->lru_next_; - demote->in_large_value_zone_ = false; - --large_value_zone_page_count_; - } - } - else + // Maintain lru_large_value_zone_head_: when a large-value page is inserted + // and the zone was empty (lru_large_value_zone_head_ == &tail_ccp_), the + // new page becomes the zone head. + if (page->has_large_value_ && lru_large_value_zone_head_ == &tail_ccp_) { - page->in_large_value_zone_ = false; + lru_large_value_zone_head_ = page; } // If the update is a emplace update, these new loaded data might be diff --git a/tx_service/tests/CcPage-Test.cpp b/tx_service/tests/CcPage-Test.cpp index 21000b11..bbc4d0e4 100644 --- a/tx_service/tests/CcPage-Test.cpp +++ b/tx_service/tests/CcPage-Test.cpp @@ -296,7 +296,7 @@ TEST_CASE("Large-value eviction protection test", "[cc-page]") txservice_large_value_threshold = LARGE_PAYLOAD_SIZE / 2; // ----------------------------------------------------------------------- - // PART 1: Zone-separation structure (no ratio limit, max_ratio == 1.0). + // PART 1: Zone-separation structure. // ----------------------------------------------------------------------- // Use RezoneAsLargeValueForTest() to set has_large_value_ on large_map // pages and call UpdateLruList to move them into the large-value zone. @@ -310,12 +310,6 @@ TEST_CASE("Large-value eviction protection test", "[cc-page]") REQUIRE(zone_head != nullptr); REQUIRE(zone_head->parent_map_ != nullptr); // not a sentinel - // With max_ratio == 1.0 (no limit), ALL MAP_SIZE large-value pages are in - // the LV zone. - REQUIRE(shard.LargeValueZonePageCount() == small_map->PageCount()); - REQUIRE(shard.TotalLruPageCount() == - small_map->PageCount() + large_map->PageCount()); - // Walk the LRU list and verify: // head → [small_map pages] → zone_head → [large_map pages] → tail { @@ -342,54 +336,7 @@ TEST_CASE("Large-value eviction protection test", "[cc-page]") } // ----------------------------------------------------------------------- - // PART 2: Zone-ratio enforcement. - // - // Analogous to SLRU's protected-segment cap: when the LV zone would exceed - // max_ratio * total_pages, the oldest LV page is demoted to the SV zone - // (zone head pointer advanced one step, O(1), no list surgery). - // - // We now restrict max_ratio to 0.5 and re-zone the large_map pages again. - // With MAP_SIZE SV pages and MAP_SIZE LV pages (total 2*MAP_SIZE), the LV - // zone is capped at floor(2*MAP_SIZE * 0.5) = MAP_SIZE pages. Since the - // LV pages equal exactly the cap, no demotion is expected (strict > check). - // Then insert one additional LV page: the cap is exceeded by 1, triggering - // exactly 1 demotion. - // ----------------------------------------------------------------------- - txservice_large_value_zone_max_ratio = 0.5; - const uint64_t total_pages = shard.TotalLruPageCount(); - const uint64_t max_lv = static_cast(total_pages * 0.5); - - // Current LV count must be <= max_lv (no demotion happened yet with 1.0 - // ratio; re-zoning was already done above). - REQUIRE(shard.LargeValueZonePageCount() <= max_lv); - - // Now add one more large-value entry to a new large-value map, which - // will cause the LV zone to go over max_lv and trigger a demotion. - std::string extra_large_table = "extra_large_val"; - TableName extra_large_tname( - extra_large_table, TableType::Primary, TableEngine::EloqSql); - auto extra_large_map = std::make_unique( - &shard, 0, extra_large_tname, 1, nullptr, true); - - CompositeKey extra_lv_key = - std::make_tuple(extra_large_table, 0); - std::vector *> extra_lv_ptr = { - &extra_lv_key}; - REQUIRE(extra_large_map->BulkEmplaceFreeForTest(extra_lv_ptr)); - - // Give it a large payload so it gets re-zoned to the LV zone. - extra_large_map->SetPayloadForTest(large_payload); - extra_large_map->RezoneAsLargeValueForTest(); - shard.VerifyLruList(); - - // After the demotion the LV zone must satisfy the ratio constraint. - const uint64_t new_total = shard.TotalLruPageCount(); - const uint64_t new_lv = shard.LargeValueZonePageCount(); - REQUIRE(new_lv <= static_cast( - new_total * txservice_large_value_zone_max_ratio)); - - // ----------------------------------------------------------------------- - // PART 3: Small-value insertion stays before the zone head. + // PART 2: Small-value insertion stays before the zone head. // ----------------------------------------------------------------------- const LruPage *zone_head_before = shard.LruLargeValueZoneHead(); CompositeKey extra_sv_key = @@ -402,7 +349,7 @@ TEST_CASE("Large-value eviction protection test", "[cc-page]") REQUIRE(shard.LruLargeValueZoneHead() == zone_head_before); // ----------------------------------------------------------------------- - // PART 4: Full scan – all pages are evicted. + // PART 3: Full scan – all pages are evicted. // ----------------------------------------------------------------------- size_t total_freed = 0; while (true) @@ -415,12 +362,10 @@ TEST_CASE("Large-value eviction protection test", "[cc-page]") break; } } - REQUIRE(total_freed == - MAP_SIZE + MAP_SIZE + 1 /* extra sv */ + 1 /* extra lv */); + REQUIRE(total_freed == MAP_SIZE + MAP_SIZE + 1 /* extra sv */); // Restore global defaults. txservice_large_value_threshold = 0; - txservice_large_value_zone_max_ratio = 1.0; local_cc_shards.Terminate(); } From 42d757e8fa8edecc16b9f0d05e41e38fa5715447 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 27 Feb 2026 09:03:40 +0000 Subject: [PATCH 10/15] Replace lru_large_value_zone_head_ pointer with head_large_ccp_ dummy sentinel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Evaluation: dummy sentinel is strictly better than the pointer approach. Pointer approach (old): - DetachLru must advance lru_large_value_zone_head_ when the zone-head page is removed (3 extra lines). - ReplaceLru must update lru_large_value_zone_head_ when the zone-head page is replaced by defrag (3 extra lines). - UpdateLruList must initialize lru_large_value_zone_head_ on first LV page insertion and handle the sentinel-vs-data-page duality (lru_large_value_zone_head_ == &tail_ccp_ for empty zone, data page otherwise) (3 extra lines + conceptual complexity). Dummy sentinel approach (new): - head_large_ccp_ is always in the list as the permanent boundary: head_ccp_ ← [SV pages] ← head_large_ccp_ ← [LV pages] ← tail_ccp_ - DetachLru: no zone-boundary update needed (sentinel never moves). - ReplaceLru: no zone-boundary update needed (sentinel never moves). - UpdateLruList: insert_before = has_large_value_ ? &tail_ccp_ : &head_large_ccp_. No empty-zone check needed. - Clean(): skip sentinel pages (parent_map_ == nullptr) in the scan loop (4 extra lines) — a small, clean addition consistent with how the loop already skips head_ccp_ (via start-offset) and tail_ccp_ (via loop condition). - Consistent with head_ccp_ / tail_ccp_ design. Renamed: lru_large_value_zone_head_ → head_large_ccp_ LruLargeValueZoneHead() returns &head_large_ccp_ (always valid, parent_map_ == nullptr indicating it is a sentinel). Updated test: zone_head → zone_sentinel with parent_map_ == nullptr check; zone-structure walk split into SV walk (up to sentinel) and LV walk (after sentinel); added assertion that newly inserted SV page lands immediately before the sentinel. Co-authored-by: xiexiaoy <5558602+xiexiaoy@users.noreply.github.com> --- tx_service/include/cc/cc_shard.h | 29 +++++++++------- tx_service/src/cc/cc_shard.cpp | 59 ++++++++++++++------------------ tx_service/tests/CcPage-Test.cpp | 59 +++++++++++++++++--------------- 3 files changed, 72 insertions(+), 75 deletions(-) diff --git a/tx_service/include/cc/cc_shard.h b/tx_service/include/cc/cc_shard.h index 091c85f0..152752c5 100644 --- a/tx_service/include/cc/cc_shard.h +++ b/tx_service/include/cc/cc_shard.h @@ -1023,15 +1023,16 @@ class CcShard } /** - * @brief Returns the current head of the large-value zone — the first - * large-value page in the LRU list (the one closest to the sentinel head). + * @brief Returns the dummy sentinel that permanently marks the boundary + * between the small-value zone and the large-value zone in the LRU list. * - * Used for testing: verifies that the large-value zone is non-empty and - * correctly maintained. + * Used for testing: verify zone structure. + * head_large_ccp_.parent_map_ is always nullptr (it is a sentinel, not a + * data page). */ const LruPage *LruLargeValueZoneHead() const { - return lru_large_value_zone_head_; + return &head_large_ccp_; } SystemHandler *GetSystemHandler() @@ -1313,16 +1314,18 @@ class CcShard // Page to start looking for cc entries to kick out on LRU chain. LruPage *clean_start_ccp_; - // Head of the large-value zone in the LRU list. Large-value pages are - // clustered at the tail (recent) end of the list so they are evicted only - // after all small-value pages have been evicted. This pointer points to the - // first (oldest) large-value page, i.e. the boundary between the two zones: + // Dummy sentinel that permanently marks the boundary between the + // small-value zone (SV) and the large-value zone (LV) in the LRU list. + // The LRU list always has the structure: // - // head ← [small-value pages] ← lru_large_value_zone_head_ ← [large-value - // pages] ← tail + // head_ccp_ ← [SV pages] ← head_large_ccp_ ← [LV pages] ← tail_ccp_ // - // It equals &tail_ccp_ when no large-value pages are in the list. - LruPage *lru_large_value_zone_head_; + // When the LV zone is empty head_large_ccp_.lru_next_ == &tail_ccp_. + // Small-value pages are inserted before head_large_ccp_ (SV MRU end); + // large-value pages are inserted before tail_ccp_ (LV MRU end). + // Because head_large_ccp_ is immovable, DetachLru and ReplaceLru need no + // zone-boundary maintenance, unlike the previous pointer-based approach. + LruPage head_large_ccp_; // The number of ccentry in all the ccmap of this ccshard. uint64_t size_; diff --git a/tx_service/src/cc/cc_shard.cpp b/tx_service/src/cc/cc_shard.cpp index b20a3a9c..d5a8633b 100644 --- a/tx_service/src/cc/cc_shard.cpp +++ b/tx_service/src/cc/cc_shard.cpp @@ -94,7 +94,7 @@ CcShard::CcShard( head_ccp_(nullptr), tail_ccp_(nullptr), clean_start_ccp_(nullptr), - lru_large_value_zone_head_(nullptr), + head_large_ccp_(nullptr), size_(0), ckpter_(nullptr), catalog_factory_{catalog_factory[0], @@ -125,10 +125,11 @@ CcShard::CcShard( (uint64_t) MB(node_memory_limit_mb) * 0.1 / core_cnt_; head_ccp_.lru_prev_ = nullptr; - head_ccp_.lru_next_ = &tail_ccp_; - tail_ccp_.lru_prev_ = &head_ccp_; + head_ccp_.lru_next_ = &head_large_ccp_; + head_large_ccp_.lru_prev_ = &head_ccp_; + head_large_ccp_.lru_next_ = &tail_ccp_; + tail_ccp_.lru_prev_ = &head_large_ccp_; tail_ccp_.lru_next_ = nullptr; - lru_large_value_zone_head_ = &tail_ccp_; thd_token_.reserve((size_t) core_cnt_ + 1); for (size_t idx = 0; idx < core_cnt_; ++idx) @@ -847,11 +848,6 @@ void CcShard::DetachLru(LruPage *page) { clean_start_ccp_ = page->lru_next_; } - // If page is the head of the large-value zone, advance the zone head. - if (lru_large_value_zone_head_ == page) - { - lru_large_value_zone_head_ = page->lru_next_; - } assert(prev != nullptr && next != nullptr); prev->lru_next_ = next; next->lru_prev_ = prev; @@ -867,19 +863,12 @@ void CcShard::ReplaceLru(LruPage *old_page, LruPage *new_page) old_page->lru_prev_ = nullptr; LruPage *lru_next = old_page->lru_next_; old_page->lru_next_ = nullptr; - // If page is the head to start looking for cc entry to kickout, move - // the clean head to the next page + // The replacement page inherits the large-value flag. + new_page->has_large_value_ = old_page->has_large_value_; if (clean_start_ccp_ == old_page) { clean_start_ccp_ = new_page; } - // The replacement page inherits the large-value flag and the zone-head - // role. - new_page->has_large_value_ = old_page->has_large_value_; - if (lru_large_value_zone_head_ == old_page) - { - lru_large_value_zone_head_ = new_page; - } lru_prev->lru_next_ = new_page; lru_next->lru_prev_ = new_page; new_page->lru_next_ = lru_next; @@ -900,16 +889,14 @@ void CcShard::UpdateLruList(LruPage *page, bool is_emplace) // Determine insertion point depending on whether the page has large values. // - // Large-value pages (has_large_value_ == true) always go at the true tail - // (most-recently-used end). This clusters them in the tail zone and ensures - // they are evicted only after all small-value pages have been evicted. + // Large-value pages (has_large_value_ == true) are inserted before + // tail_ccp_ (MRU end of the LV zone). Small-value pages are inserted + // before head_large_ccp_ (MRU end of the SV zone). This keeps the + // permanent list structure: // - // Small-value pages (has_large_value_ == false) are inserted just before - // the large-value zone (at lru_large_value_zone_head_->lru_prev_). When - // there are no large-value pages lru_large_value_zone_head_ == &tail_ccp_, - // so the behaviour is identical to the standard tail insertion. + // head_ccp_ ← [SV pages] ← head_large_ccp_ ← [LV pages] ← tail_ccp_ LruPage *insert_before = - page->has_large_value_ ? &tail_ccp_ : lru_large_value_zone_head_; + page->has_large_value_ ? &tail_ccp_ : &head_large_ccp_; // page already at the correct insertion position, just update the counter. if (page->lru_next_ == insert_before && insert_before->lru_prev_ == page) @@ -935,14 +922,6 @@ void CcShard::UpdateLruList(LruPage *page, bool is_emplace) ++access_counter_; page->last_access_ts_ = access_counter_; - // Maintain lru_large_value_zone_head_: when a large-value page is inserted - // and the zone was empty (lru_large_value_zone_head_ == &tail_ccp_), the - // new page becomes the zone head. - if (page->has_large_value_ && lru_large_value_zone_head_ == &tail_ccp_) - { - lru_large_value_zone_head_ = page; - } - // If the update is a emplace update, these new loaded data might be // kickable from cc map. Usually if the clean_start_page is at tail we're // not able to load new data into memory, except some special case where we @@ -1223,6 +1202,12 @@ std::pair CcShard::Clean() .count(); while (scan_page_cnt < CcShard::freeBatchSize && ccp != &tail_ccp_) { + // Skip sentinel pages (e.g., head_large_ccp_). + if (ccp->parent_map_ == nullptr) + { + ccp = ccp->lru_next_; + continue; + } // merge and removal might happen during Clean so ccp and ccp->lru_next_ // might change auto [freed, next] = ccp->parent_map_->CleanPageAndReBalance(ccp); @@ -1249,6 +1234,12 @@ std::pair CcShard::Clean() ccp = head_ccp_.lru_next_; while (ccp != &tail_ccp_) { + // Skip sentinel pages (e.g., head_large_ccp_). + if (ccp->parent_map_ == nullptr) + { + ccp = ccp->lru_next_; + continue; + } // merge and removal might happen during Clean so ccp and ccp->lru_next_ // might change auto [freed, next] = ccp->parent_map_->CleanPageAndReBalance(ccp); diff --git a/tx_service/tests/CcPage-Test.cpp b/tx_service/tests/CcPage-Test.cpp index bbc4d0e4..da87fa83 100644 --- a/tx_service/tests/CcPage-Test.cpp +++ b/tx_service/tests/CcPage-Test.cpp @@ -305,48 +305,51 @@ TEST_CASE("Large-value eviction protection test", "[cc-page]") large_map->RezoneAsLargeValueForTest(); shard.VerifyLruList(); - // lru_large_value_zone_head_ must now point into the large-value zone. - const LruPage *zone_head = shard.LruLargeValueZoneHead(); - REQUIRE(zone_head != nullptr); - REQUIRE(zone_head->parent_map_ != nullptr); // not a sentinel - - // Walk the LRU list and verify: - // head → [small_map pages] → zone_head → [large_map pages] → tail + // LruLargeValueZoneHead() now returns the permanent dummy sentinel + // head_large_ccp_ (parent_map_ == nullptr). The sentinel is always in the + // list as the immovable boundary between SV and LV zones. + const LruPage *zone_sentinel = shard.LruLargeValueZoneHead(); + REQUIRE(zone_sentinel != nullptr); + REQUIRE(zone_sentinel->parent_map_ == nullptr); // IS a sentinel + + // Walk the LRU list and verify zone structure: + // head_ccp_ → [SV pages] → zone_sentinel → [LV pages] → tail_ccp_ { - bool in_large_zone = false; - for (const LruPage *p = shard.LruHead()->lru_next_; - p->parent_map_ != nullptr; // sentinel tail has parent_map_==null + // SV zone: every page between head_ccp_ and zone_sentinel belongs to + // small_map. + for (const LruPage *p = shard.LruHead()->lru_next_; p != zone_sentinel; + p = p->lru_next_) + { + REQUIRE(p->parent_map_ == small_map.get()); + } + + // LV zone: every data page after zone_sentinel belongs to large_map. + bool has_lv_pages = false; + for (const LruPage *p = zone_sentinel->lru_next_; + p->parent_map_ != nullptr; // tail_ccp_ has parent_map_==nullptr p = p->lru_next_) { - if (p == zone_head) - { - in_large_zone = true; - } - if (in_large_zone) - { - REQUIRE(p->parent_map_ == large_map.get()); - } - else - { - REQUIRE(p->parent_map_ == small_map.get()); - } + REQUIRE(p->parent_map_ == large_map.get()); + has_lv_pages = true; } - // We must have entered the large zone. - REQUIRE(in_large_zone); + REQUIRE(has_lv_pages); } // ----------------------------------------------------------------------- - // PART 2: Small-value insertion stays before the zone head. + // PART 2: Small-value insertion stays before the zone sentinel. // ----------------------------------------------------------------------- - const LruPage *zone_head_before = shard.LruLargeValueZoneHead(); + // zone_sentinel is the permanent dummy page and must never move. + REQUIRE(shard.LruLargeValueZoneHead() == zone_sentinel); CompositeKey extra_sv_key = std::make_tuple(small_table, static_cast(MAP_SIZE + 1)); std::vector *> extra_sv_ptr = { &extra_sv_key}; REQUIRE(small_map->BulkEmplaceFreeForTest(extra_sv_ptr)); shard.VerifyLruList(); - // SV insertion must not change the zone head. - REQUIRE(shard.LruLargeValueZoneHead() == zone_head_before); + // Sentinel must not have moved after SV insertion. + REQUIRE(shard.LruLargeValueZoneHead() == zone_sentinel); + // The newly inserted SV page must be immediately before the sentinel. + REQUIRE(zone_sentinel->lru_prev_->parent_map_ == small_map.get()); // ----------------------------------------------------------------------- // PART 3: Full scan – all pages are evicted. From 741a80da74636746b3adb1d74e20a979ee6f27f3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 27 Feb 2026 09:15:22 +0000 Subject: [PATCH 11/15] Revert dummy sentinel: restore pointer-based lru_large_value_zone_head_ After comparing the two approaches the pointer-based zone boundary is preferred over the dummy-sentinel approach (head_large_ccp_). Restores cc_shard.h, cc_shard.cpp, and CcPage-Test.cpp to the state they had before the sentinel commit (991a12f), using: git checkout 42d757e~1 -- Co-authored-by: xiexiaoy <5558602+xiexiaoy@users.noreply.github.com> --- tx_service/include/cc/cc_shard.h | 29 +++++++--------- tx_service/src/cc/cc_shard.cpp | 59 ++++++++++++++++++-------------- tx_service/tests/CcPage-Test.cpp | 59 +++++++++++++++----------------- 3 files changed, 75 insertions(+), 72 deletions(-) diff --git a/tx_service/include/cc/cc_shard.h b/tx_service/include/cc/cc_shard.h index 152752c5..091c85f0 100644 --- a/tx_service/include/cc/cc_shard.h +++ b/tx_service/include/cc/cc_shard.h @@ -1023,16 +1023,15 @@ class CcShard } /** - * @brief Returns the dummy sentinel that permanently marks the boundary - * between the small-value zone and the large-value zone in the LRU list. + * @brief Returns the current head of the large-value zone — the first + * large-value page in the LRU list (the one closest to the sentinel head). * - * Used for testing: verify zone structure. - * head_large_ccp_.parent_map_ is always nullptr (it is a sentinel, not a - * data page). + * Used for testing: verifies that the large-value zone is non-empty and + * correctly maintained. */ const LruPage *LruLargeValueZoneHead() const { - return &head_large_ccp_; + return lru_large_value_zone_head_; } SystemHandler *GetSystemHandler() @@ -1314,18 +1313,16 @@ class CcShard // Page to start looking for cc entries to kick out on LRU chain. LruPage *clean_start_ccp_; - // Dummy sentinel that permanently marks the boundary between the - // small-value zone (SV) and the large-value zone (LV) in the LRU list. - // The LRU list always has the structure: + // Head of the large-value zone in the LRU list. Large-value pages are + // clustered at the tail (recent) end of the list so they are evicted only + // after all small-value pages have been evicted. This pointer points to the + // first (oldest) large-value page, i.e. the boundary between the two zones: // - // head_ccp_ ← [SV pages] ← head_large_ccp_ ← [LV pages] ← tail_ccp_ + // head ← [small-value pages] ← lru_large_value_zone_head_ ← [large-value + // pages] ← tail // - // When the LV zone is empty head_large_ccp_.lru_next_ == &tail_ccp_. - // Small-value pages are inserted before head_large_ccp_ (SV MRU end); - // large-value pages are inserted before tail_ccp_ (LV MRU end). - // Because head_large_ccp_ is immovable, DetachLru and ReplaceLru need no - // zone-boundary maintenance, unlike the previous pointer-based approach. - LruPage head_large_ccp_; + // It equals &tail_ccp_ when no large-value pages are in the list. + LruPage *lru_large_value_zone_head_; // The number of ccentry in all the ccmap of this ccshard. uint64_t size_; diff --git a/tx_service/src/cc/cc_shard.cpp b/tx_service/src/cc/cc_shard.cpp index d5a8633b..b20a3a9c 100644 --- a/tx_service/src/cc/cc_shard.cpp +++ b/tx_service/src/cc/cc_shard.cpp @@ -94,7 +94,7 @@ CcShard::CcShard( head_ccp_(nullptr), tail_ccp_(nullptr), clean_start_ccp_(nullptr), - head_large_ccp_(nullptr), + lru_large_value_zone_head_(nullptr), size_(0), ckpter_(nullptr), catalog_factory_{catalog_factory[0], @@ -125,11 +125,10 @@ CcShard::CcShard( (uint64_t) MB(node_memory_limit_mb) * 0.1 / core_cnt_; head_ccp_.lru_prev_ = nullptr; - head_ccp_.lru_next_ = &head_large_ccp_; - head_large_ccp_.lru_prev_ = &head_ccp_; - head_large_ccp_.lru_next_ = &tail_ccp_; - tail_ccp_.lru_prev_ = &head_large_ccp_; + head_ccp_.lru_next_ = &tail_ccp_; + tail_ccp_.lru_prev_ = &head_ccp_; tail_ccp_.lru_next_ = nullptr; + lru_large_value_zone_head_ = &tail_ccp_; thd_token_.reserve((size_t) core_cnt_ + 1); for (size_t idx = 0; idx < core_cnt_; ++idx) @@ -848,6 +847,11 @@ void CcShard::DetachLru(LruPage *page) { clean_start_ccp_ = page->lru_next_; } + // If page is the head of the large-value zone, advance the zone head. + if (lru_large_value_zone_head_ == page) + { + lru_large_value_zone_head_ = page->lru_next_; + } assert(prev != nullptr && next != nullptr); prev->lru_next_ = next; next->lru_prev_ = prev; @@ -863,12 +867,19 @@ void CcShard::ReplaceLru(LruPage *old_page, LruPage *new_page) old_page->lru_prev_ = nullptr; LruPage *lru_next = old_page->lru_next_; old_page->lru_next_ = nullptr; - // The replacement page inherits the large-value flag. - new_page->has_large_value_ = old_page->has_large_value_; + // If page is the head to start looking for cc entry to kickout, move + // the clean head to the next page if (clean_start_ccp_ == old_page) { clean_start_ccp_ = new_page; } + // The replacement page inherits the large-value flag and the zone-head + // role. + new_page->has_large_value_ = old_page->has_large_value_; + if (lru_large_value_zone_head_ == old_page) + { + lru_large_value_zone_head_ = new_page; + } lru_prev->lru_next_ = new_page; lru_next->lru_prev_ = new_page; new_page->lru_next_ = lru_next; @@ -889,14 +900,16 @@ void CcShard::UpdateLruList(LruPage *page, bool is_emplace) // Determine insertion point depending on whether the page has large values. // - // Large-value pages (has_large_value_ == true) are inserted before - // tail_ccp_ (MRU end of the LV zone). Small-value pages are inserted - // before head_large_ccp_ (MRU end of the SV zone). This keeps the - // permanent list structure: + // Large-value pages (has_large_value_ == true) always go at the true tail + // (most-recently-used end). This clusters them in the tail zone and ensures + // they are evicted only after all small-value pages have been evicted. // - // head_ccp_ ← [SV pages] ← head_large_ccp_ ← [LV pages] ← tail_ccp_ + // Small-value pages (has_large_value_ == false) are inserted just before + // the large-value zone (at lru_large_value_zone_head_->lru_prev_). When + // there are no large-value pages lru_large_value_zone_head_ == &tail_ccp_, + // so the behaviour is identical to the standard tail insertion. LruPage *insert_before = - page->has_large_value_ ? &tail_ccp_ : &head_large_ccp_; + page->has_large_value_ ? &tail_ccp_ : lru_large_value_zone_head_; // page already at the correct insertion position, just update the counter. if (page->lru_next_ == insert_before && insert_before->lru_prev_ == page) @@ -922,6 +935,14 @@ void CcShard::UpdateLruList(LruPage *page, bool is_emplace) ++access_counter_; page->last_access_ts_ = access_counter_; + // Maintain lru_large_value_zone_head_: when a large-value page is inserted + // and the zone was empty (lru_large_value_zone_head_ == &tail_ccp_), the + // new page becomes the zone head. + if (page->has_large_value_ && lru_large_value_zone_head_ == &tail_ccp_) + { + lru_large_value_zone_head_ = page; + } + // If the update is a emplace update, these new loaded data might be // kickable from cc map. Usually if the clean_start_page is at tail we're // not able to load new data into memory, except some special case where we @@ -1202,12 +1223,6 @@ std::pair CcShard::Clean() .count(); while (scan_page_cnt < CcShard::freeBatchSize && ccp != &tail_ccp_) { - // Skip sentinel pages (e.g., head_large_ccp_). - if (ccp->parent_map_ == nullptr) - { - ccp = ccp->lru_next_; - continue; - } // merge and removal might happen during Clean so ccp and ccp->lru_next_ // might change auto [freed, next] = ccp->parent_map_->CleanPageAndReBalance(ccp); @@ -1234,12 +1249,6 @@ std::pair CcShard::Clean() ccp = head_ccp_.lru_next_; while (ccp != &tail_ccp_) { - // Skip sentinel pages (e.g., head_large_ccp_). - if (ccp->parent_map_ == nullptr) - { - ccp = ccp->lru_next_; - continue; - } // merge and removal might happen during Clean so ccp and ccp->lru_next_ // might change auto [freed, next] = ccp->parent_map_->CleanPageAndReBalance(ccp); diff --git a/tx_service/tests/CcPage-Test.cpp b/tx_service/tests/CcPage-Test.cpp index da87fa83..bbc4d0e4 100644 --- a/tx_service/tests/CcPage-Test.cpp +++ b/tx_service/tests/CcPage-Test.cpp @@ -305,51 +305,48 @@ TEST_CASE("Large-value eviction protection test", "[cc-page]") large_map->RezoneAsLargeValueForTest(); shard.VerifyLruList(); - // LruLargeValueZoneHead() now returns the permanent dummy sentinel - // head_large_ccp_ (parent_map_ == nullptr). The sentinel is always in the - // list as the immovable boundary between SV and LV zones. - const LruPage *zone_sentinel = shard.LruLargeValueZoneHead(); - REQUIRE(zone_sentinel != nullptr); - REQUIRE(zone_sentinel->parent_map_ == nullptr); // IS a sentinel - - // Walk the LRU list and verify zone structure: - // head_ccp_ → [SV pages] → zone_sentinel → [LV pages] → tail_ccp_ - { - // SV zone: every page between head_ccp_ and zone_sentinel belongs to - // small_map. - for (const LruPage *p = shard.LruHead()->lru_next_; p != zone_sentinel; - p = p->lru_next_) - { - REQUIRE(p->parent_map_ == small_map.get()); - } + // lru_large_value_zone_head_ must now point into the large-value zone. + const LruPage *zone_head = shard.LruLargeValueZoneHead(); + REQUIRE(zone_head != nullptr); + REQUIRE(zone_head->parent_map_ != nullptr); // not a sentinel - // LV zone: every data page after zone_sentinel belongs to large_map. - bool has_lv_pages = false; - for (const LruPage *p = zone_sentinel->lru_next_; - p->parent_map_ != nullptr; // tail_ccp_ has parent_map_==nullptr + // Walk the LRU list and verify: + // head → [small_map pages] → zone_head → [large_map pages] → tail + { + bool in_large_zone = false; + for (const LruPage *p = shard.LruHead()->lru_next_; + p->parent_map_ != nullptr; // sentinel tail has parent_map_==null p = p->lru_next_) { - REQUIRE(p->parent_map_ == large_map.get()); - has_lv_pages = true; + if (p == zone_head) + { + in_large_zone = true; + } + if (in_large_zone) + { + REQUIRE(p->parent_map_ == large_map.get()); + } + else + { + REQUIRE(p->parent_map_ == small_map.get()); + } } - REQUIRE(has_lv_pages); + // We must have entered the large zone. + REQUIRE(in_large_zone); } // ----------------------------------------------------------------------- - // PART 2: Small-value insertion stays before the zone sentinel. + // PART 2: Small-value insertion stays before the zone head. // ----------------------------------------------------------------------- - // zone_sentinel is the permanent dummy page and must never move. - REQUIRE(shard.LruLargeValueZoneHead() == zone_sentinel); + const LruPage *zone_head_before = shard.LruLargeValueZoneHead(); CompositeKey extra_sv_key = std::make_tuple(small_table, static_cast(MAP_SIZE + 1)); std::vector *> extra_sv_ptr = { &extra_sv_key}; REQUIRE(small_map->BulkEmplaceFreeForTest(extra_sv_ptr)); shard.VerifyLruList(); - // Sentinel must not have moved after SV insertion. - REQUIRE(shard.LruLargeValueZoneHead() == zone_sentinel); - // The newly inserted SV page must be immediately before the sentinel. - REQUIRE(zone_sentinel->lru_prev_->parent_map_ == small_map.get()); + // SV insertion must not change the zone head. + REQUIRE(shard.LruLargeValueZoneHead() == zone_head_before); // ----------------------------------------------------------------------- // PART 3: Full scan – all pages are evicted. From af3e67af7bb6a1b811511b4502dbe9800348ded2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 27 Feb 2026 11:02:38 +0000 Subject: [PATCH 12/15] Add eager re-zone via MaybeMarkAndRezoneAsLargeValue at all payload-set sites MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Feasibility evaluation: FEASIBLE. All 7 payload-assignment paths already have the LruPage* in scope and execute on the single-threaded shard coroutine, so there are no concurrency or scoping issues. New private helper MaybeMarkAndRezoneAsLargeValue(LruPage*, size_t): - No-op when txservice_large_value_threshold == 0 (disabled), page is null, has_large_value_ already set, or payload_size <= threshold. - Otherwise sets page->has_large_value_ = true and calls UpdateLruList to move the page into the large-value zone immediately. Called eagerly at all 7 payload-assignment sites: 1. PostWriteCc — after SetCurrentPayload / DeserializeCurrentPayload 2. AcquireAllCc — after SetCurrentPayload(payload) 3. ReadCc (outside) — after PassInCurrentPayload 4. RemoteReadOutside — after DeserializeCurrentPayload (Normal path) 5. ReplayLogCc — after DeserializeCurrentPayload (Insert/Update) 6. UploadBatchCc — after SetCurrentPayload(commit_val) for Normal 7. BackFill — after DeserializeCurrentPayload (non-Deleted path) The lazy fallback in CanBeCleaned (has_blocked_large_value_ / needs_rezoning) is retained as a safety net for any path not listed above. Also: - TriggerEagerRezoneForTest() public test helper: invokes MaybeMarkAndRezoneAsLargeValue for every page in the map using its actual max entry PayloadSize(). Simulates what production commit / backfill paths do without going through Execute handlers. - New test case "Eager re-zone on large-value payload": inserts two maps (SV + LV), assigns large payloads, calls TriggerEagerRezoneForTest WITHOUT any clean scan, then asserts correct zone structure. Confirms the eager path fires before CanBeCleaned is ever invoked. Co-authored-by: xiexiaoy <5558602+xiexiaoy@users.noreply.github.com> --- _codeql_detected_source_root | 1 + tx_service/include/cc/template_cc_map.h | 73 ++++++++++ tx_service/tests/CcPage-Test.cpp | 168 ++++++++++++++++++++++++ 3 files changed, 242 insertions(+) create mode 120000 _codeql_detected_source_root diff --git a/_codeql_detected_source_root b/_codeql_detected_source_root new file mode 120000 index 00000000..945c9b46 --- /dev/null +++ b/_codeql_detected_source_root @@ -0,0 +1 @@ +. \ No newline at end of file diff --git a/tx_service/include/cc/template_cc_map.h b/tx_service/include/cc/template_cc_map.h index d5e40562..391e248c 100644 --- a/tx_service/include/cc/template_cc_map.h +++ b/tx_service/include/cc/template_cc_map.h @@ -605,6 +605,9 @@ class TemplateCcMap : public CcMap cce->payload_.DeserializeCurrentPayload(payload_str->data(), offset); } + // Eagerly move to the large-value zone when the committed + // payload exceeds the threshold. + MaybeMarkAndRezoneAsLargeValue(cc_page, cce->PayloadSize()); RecordStatus cce_old_status = cce->PayloadStatus(); RecordStatus new_status = @@ -1116,6 +1119,9 @@ class TemplateCcMap : public CcMap req.CommitType() != PostWriteType::DowngradeLock) { cce_ptr->payload_.SetCurrentPayload(payload); + // Eagerly move to the large-value zone if needed. + MaybeMarkAndRezoneAsLargeValue(cce_ptr->GetCcPage(), + cce_ptr->PayloadSize()); // A prepare commit request only installs the dirty value, // and does not change the record status and commit_ts. if (req.CommitType() == PostWriteType::Commit || @@ -2030,6 +2036,9 @@ class TemplateCcMap : public CcMap if (cce->PayloadStatus() == RecordStatus::Unknown) { cce->payload_.PassInCurrentPayload(std::move(tmp_payload)); + // Eagerly move to the large-value zone if needed. + MaybeMarkAndRezoneAsLargeValue(cce->GetCcPage(), + cce->PayloadSize()); cce->SetCommitTsPayloadStatus(req.ReadTimestamp(), tmp_payload_status); } @@ -2231,6 +2240,9 @@ class TemplateCcMap : public CcMap size_t offset = 0; cce->payload_.DeserializeCurrentPayload(req.rec_str_->data(), offset); + // Eagerly move to the large-value zone if needed. + MaybeMarkAndRezoneAsLargeValue(cce->GetCcPage(), + cce->PayloadSize()); } cce->SetCommitTsPayloadStatus(req.CommitTs(), req.RecordStatus()); } @@ -7002,6 +7014,8 @@ class TemplateCcMap : public CcMap { cce->payload_.DeserializeCurrentPayload(log_blob.data(), offset); + // Eagerly move to the large-value zone if needed. + MaybeMarkAndRezoneAsLargeValue(ccp, cce->PayloadSize()); rec_status = RecordStatus::Normal; } else @@ -7758,6 +7772,8 @@ class TemplateCcMap : public CcMap if (rec_status == RecordStatus::Normal) { cce->payload_.SetCurrentPayload(commit_val); + // Eagerly move to the large-value zone if needed. + MaybeMarkAndRezoneAsLargeValue(cc_page, cce->PayloadSize()); } else { @@ -9276,6 +9292,30 @@ class TemplateCcMap : public CcMap } } + /** + * @brief Iterates over all pages and calls MaybeMarkAndRezoneAsLargeValue + * with the maximum PayloadSize() found among each page's entries. + * + * This simulates the eager re-zone that happens in production when a write + * commits a large payload via PostWriteCc / BackFill / ReplayLogCc etc. + * Used in tests to verify that the eager path works without going through + * a clean-page scan. + */ + void TriggerEagerRezoneForTest() + { + for (auto &[key, page_ptr] : ccmp_) + { + CcPage *page = + page_ptr.get(); + size_t max_payload = 0; + for (const auto &cce : page->entries_) + { + max_payload = std::max(max_payload, cce->PayloadSize()); + } + MaybeMarkAndRezoneAsLargeValue(page, max_payload); + } + } + /// Returns the number of CcPage objects (btree nodes) in this cc map. /// Used in tests that verify zone-page-count invariants. size_t PageCount() const @@ -10603,6 +10643,8 @@ class TemplateCcMap : public CcMap size_t offset = 0; cce->payload_.DeserializeCurrentPayload(rec_str.c_str(), offset); + // Eagerly move to the large-value zone if needed. + MaybeMarkAndRezoneAsLargeValue(ccp, cce->PayloadSize()); } } else if (cce_version > 1 && commit_ts < cce_version && @@ -12019,6 +12061,37 @@ class TemplateCcMap : public CcMap return &pos_inf_page_; } + /** + * @brief Eagerly marks a page as a large-value page and moves it into the + * large-value zone of the LRU list when the installed payload exceeds + * txservice_large_value_threshold. + * + * Called from every payload-assignment path (PostWriteCc, BackFill, + * ReplayLogCc, UploadBatchCc, etc.) so that large-value pages are + * immediately clustered near the LRU tail and are only evicted after all + * small-value pages have been evicted. + * + * The lazy fallback in CanBeCleaned (has_blocked_large_value_) is kept as + * a safety net for any path not covered here. + * + * @param page The LRU page that owns the updated entry. + * @param payload_size The serialized size of the newly installed payload. + */ + void MaybeMarkAndRezoneAsLargeValue(LruPage *page, size_t payload_size) + { + if (txservice_large_value_threshold == 0 || page == nullptr || + page->has_large_value_ || + payload_size <= txservice_large_value_threshold) + { + return; + } + page->has_large_value_ = true; + if (page->lru_next_ != nullptr) + { + shard_->UpdateLruList(page, false); + } + } + absl::btree_map< KeyT, std::unique_ptr< diff --git a/tx_service/tests/CcPage-Test.cpp b/tx_service/tests/CcPage-Test.cpp index bbc4d0e4..afa6ae04 100644 --- a/tx_service/tests/CcPage-Test.cpp +++ b/tx_service/tests/CcPage-Test.cpp @@ -369,6 +369,174 @@ TEST_CASE("Large-value eviction protection test", "[cc-page]") local_cc_shards.Terminate(); } +// --------------------------------------------------------------------------- +// Test that MaybeMarkAndRezoneAsLargeValue fires eagerly at payload-set time, +// BEFORE any clean-page scan takes place. +// --------------------------------------------------------------------------- +TEST_CASE("Eager re-zone on large-value payload", "[cc-page]") +{ + std::unordered_map> ng_configs{ + {0, {NodeConfig(0, "127.0.0.1", 8600)}}}; + std::map tx_cnf{{"node_memory_limit_mb", 1000}, + {"enable_key_cache", 0}, + {"reltime_sampling", 0}, + {"range_split_worker_num", 1}, + {"core_num", 1}, + {"realtime_sampling", 0}, + {"checkpointer_interval", 10}, + {"enable_shard_heap_defragment", 0}, + {"node_log_limit_mb", 1000}}; + LocalCcShards local_cc_shards( + 0, 0, tx_cnf, nullptr, nullptr, &ng_configs, 2, nullptr, nullptr, true); + CcShard shard(0, + 1, + 10000, + 10000, + false, + 0, + local_cc_shards, + nullptr, + nullptr, + &ng_configs, + 2); + shard.Init(); + std::string raft_path(""); + Sharder::Instance(0, + &ng_configs, + 0, + nullptr, + nullptr, + &local_cc_shards, + nullptr, + &raft_path); + + const size_t MAP_SIZE = 200; + const size_t LARGE_PAYLOAD_SIZE = 1024; + + using TestCcMap = TemplateCcMap, + CompositeRecord, + true, + true>; + + // Small-value map — pages stay in the SV zone at all times. + std::string small_table = "eager_sv_test"; + TableName small_tname( + small_table, TableType::Primary, TableEngine::EloqSql); + auto small_map = + std::make_unique(&shard, 0, small_tname, 1, nullptr, true); + + // Large-value map — pages will be re-zoned eagerly once we assign a large + // payload and call TriggerEagerRezoneForTest(). + std::string large_table = "eager_lv_test"; + TableName large_tname( + large_table, TableType::Primary, TableEngine::EloqSql); + auto large_map = + std::make_unique(&shard, 0, large_tname, 1, nullptr, true); + + auto make_keys = [](const std::string &tname, + size_t cnt, + std::vector> &storage) + -> std::vector *> + { + for (size_t i = 0; i < cnt; i++) + { + storage.emplace_back(std::make_tuple(tname, static_cast(i))); + } + std::vector *> ptrs; + for (auto &k : storage) + { + ptrs.push_back(&k); + } + return ptrs; + }; + + std::vector> small_keys; + REQUIRE(small_map->BulkEmplaceFreeForTest( + make_keys(small_table, MAP_SIZE, small_keys))); + std::vector> large_keys; + REQUIRE(large_map->BulkEmplaceFreeForTest( + make_keys(large_table, MAP_SIZE, large_keys))); + + // Threshold = half the payload size so all large_map entries qualify + // (LARGE_PAYLOAD_SIZE > LARGE_PAYLOAD_SIZE / 2). + txservice_large_value_threshold = LARGE_PAYLOAD_SIZE / 2; + + // ----------------------------------------------------------------------- + // Before re-zone: both maps are in the SV zone (has_large_value_ == false). + // The LRU large-value zone is empty: zone head points to the tail sentinel + // (parent_map_ == nullptr). + // ----------------------------------------------------------------------- + { + const LruPage *zone_head_before = shard.LruLargeValueZoneHead(); + REQUIRE(zone_head_before != nullptr); + REQUIRE(zone_head_before->parent_map_ == nullptr); // tail sentinel = empty zone + } + + // Assign large payloads to the large-value map entries. + auto large_payload = + std::make_shared(42, LARGE_PAYLOAD_SIZE); + large_map->SetPayloadForTest(large_payload); + shard.VerifyLruList(); + + // ----------------------------------------------------------------------- + // Eager re-zone: simulate what happens when the payload is installed via + // PostWriteCc / BackFill / ReplayLogCc. No clean scan is involved. + // ----------------------------------------------------------------------- + large_map->TriggerEagerRezoneForTest(); + shard.VerifyLruList(); + + // After TriggerEagerRezoneForTest the zone must be non-empty. + const LruPage *zone_head = shard.LruLargeValueZoneHead(); + REQUIRE(zone_head != nullptr); + REQUIRE(zone_head->parent_map_ != nullptr); // must point to a real page + + // All pages of large_map must be in the LV zone (at or after zone_head), + // and all pages of small_map must be in the SV zone (before zone_head). + { + bool in_large_zone = false; + for (const LruPage *p = shard.LruHead()->lru_next_; + p->parent_map_ != nullptr; + p = p->lru_next_) + { + if (p == zone_head) + { + in_large_zone = true; + } + if (in_large_zone) + { + INFO("page parent_map should be large_map in LV zone"); + REQUIRE(p->parent_map_ == large_map.get()); + } + else + { + INFO("page parent_map should be small_map in SV zone"); + REQUIRE(p->parent_map_ == small_map.get()); + } + } + REQUIRE(in_large_zone); + } + + // ----------------------------------------------------------------------- + // Clean scan: all entries should be evictable (all IsFree). + // ----------------------------------------------------------------------- + size_t total_freed = 0; + while (true) + { + auto [free_cnt, yield] = shard.Clean(); + shard.VerifyLruList(); + total_freed += free_cnt; + if (free_cnt == 0) + { + break; + } + } + REQUIRE(total_freed == MAP_SIZE + MAP_SIZE); + + // Restore global defaults. + txservice_large_value_threshold = 0; + local_cc_shards.Terminate(); +} + } // namespace txservice int main(int argc, char **argv) From aaeb9981c1f8d22c13084ee1192dca2bc7a9e69c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 27 Feb 2026 11:02:55 +0000 Subject: [PATCH 13/15] Remove accidentally committed _codeql_detected_source_root symlink Co-authored-by: xiexiaoy <5558602+xiexiaoy@users.noreply.github.com> --- .gitignore | 1 + _codeql_detected_source_root | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) delete mode 120000 _codeql_detected_source_root diff --git a/.gitignore b/.gitignore index de9ce42c..c0813a19 100644 --- a/.gitignore +++ b/.gitignore @@ -358,3 +358,4 @@ compile_commands.json cmake-build-debug/ .cache +_codeql_detected_source_root diff --git a/_codeql_detected_source_root b/_codeql_detected_source_root deleted file mode 120000 index 945c9b46..00000000 --- a/_codeql_detected_source_root +++ /dev/null @@ -1 +0,0 @@ -. \ No newline at end of file From 5e5ab610ff45f212af146266de0b898a7ea4ecb0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 28 Feb 2026 07:17:09 +0000 Subject: [PATCH 14/15] Scope large-value zone eviction policy to ObjectCcMap (EloqKV) only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The payload-size-aware large-value zone policy is only intended for EloqKV (ObjectCcMap). It must not affect EloqSQL / EloqDoc tables served by RangeCcMap, CatalogCcMap, or ClusterConfigCcMap. Add virtual bool CcMap::IsLargeValueZoneEnabled() const { return false; }. Default: disabled for every CcMap subclass. Override in ObjectCcMap: return txservice_large_value_threshold > 0. Only ObjectCcMap / EloqKV enables the zone. Update MaybeMarkAndRezoneAsLargeValue in TemplateCcMap: replace: txservice_large_value_threshold == 0 with: !IsLargeValueZoneEnabled() Virtual dispatch on 'this' → no-op for RangeCcMap instances. Update CcPageCleanGuardWithoutKickoutCc::CanBeCleaned: replace: txservice_large_value_threshold > 0 with: page_->parent_map_ != nullptr && page_->parent_map_->IsLargeValueZoneEnabled() Virtual dispatch via parent_map_ → no-op for non-ObjectCcMap pages. Update CcPage-Test.cpp: The tests use TemplateCcMap<..., true, true> (not ObjectCcMap). Add LargeValueTestCcMap thin test subclass that overrides IsLargeValueZoneEnabled() to return txservice_large_value_threshold > 0. Use it in the two large-value test cases. Co-authored-by: xiexiaoy <5558602+xiexiaoy@users.noreply.github.com> --- tx_service/include/cc/cc_map.h | 9 +++++++ tx_service/include/cc/cc_page_clean_guard.h | 21 +++++++++------ tx_service/include/cc/object_cc_map.h | 6 +++++ tx_service/include/cc/template_cc_map.h | 5 +++- tx_service/tests/CcPage-Test.cpp | 30 +++++++++++++++------ 5 files changed, 54 insertions(+), 17 deletions(-) diff --git a/tx_service/include/cc/cc_map.h b/tx_service/include/cc/cc_map.h index 0d1434b6..59910dec 100644 --- a/tx_service/include/cc/cc_map.h +++ b/tx_service/include/cc/cc_map.h @@ -219,6 +219,15 @@ class CcMap { } + // Returns true if the payload-size-aware large-value zone eviction policy + // is active for this map. Only ObjectCcMap (EloqKV) overrides this to + // return true; all other maps (RangeCcMap, CatalogCcMap, etc.) return false + // so the policy has no effect on EloqSQL / EloqDoc tables. + virtual bool IsLargeValueZoneEnabled() const + { + return false; + } + virtual std::pair CleanPageAndReBalance( LruPage *page, KickoutCcEntryCc *kickout_cc = nullptr, diff --git a/tx_service/include/cc/cc_page_clean_guard.h b/tx_service/include/cc/cc_page_clean_guard.h index 42d2e9c4..251cacac 100644 --- a/tx_service/include/cc/cc_page_clean_guard.h +++ b/tx_service/include/cc/cc_page_clean_guard.h @@ -417,14 +417,19 @@ struct CcPageCleanGuardWithoutKickoutCc return {false, false}; } - // Payload-size-aware eviction: mark the page as a large-value page so - // that UpdateLruList places it in the large-value zone (tail end) of - // the LRU list. The page is still evictable here — protection is - // enforced positionally (large-value pages are evicted only after all - // small-value pages). Setting has_blocked_large_value_ signals - // CleanPageAndReBalance to re-zone the page immediately via - // UpdateLruList in case it was in the small-value zone. - if (txservice_large_value_threshold > 0 && + // Payload-size-aware eviction (ObjectCcMap / EloqKV only): mark the + // page as a large-value page so that UpdateLruList places it in the + // large-value zone (tail end) of the LRU list. The page is still + // evictable here — protection is positional (large-value pages are + // evicted only after all small-value pages). Setting + // has_blocked_large_value_ signals CleanPageAndReBalance to re-zone + // the page immediately via UpdateLruList in case it was in the + // small-value zone. + // IsLargeValueZoneEnabled() returns false for all non-ObjectCcMap + // types (RangeCcMap, CatalogCcMap, etc.), so this block is a no-op + // for EloqSQL and EloqDoc tables. + if (this->page_->parent_map_ != nullptr && + this->page_->parent_map_->IsLargeValueZoneEnabled() && cce->PayloadSize() > txservice_large_value_threshold) { if (!this->page_->has_large_value_) diff --git a/tx_service/include/cc/object_cc_map.h b/tx_service/include/cc/object_cc_map.h index fca0f59b..08ebda4f 100644 --- a/tx_service/include/cc/object_cc_map.h +++ b/tx_service/include/cc/object_cc_map.h @@ -104,6 +104,12 @@ class ObjectCcMap : public TemplateCcMap using TemplateCcMap::Type; using TemplateCcMap::CleanEntry; + // Enable the payload-size-aware large-value zone policy for EloqKV. + bool IsLargeValueZoneEnabled() const override + { + return txservice_large_value_threshold > 0; + } + bool Execute(ApplyCc &req) override { TX_TRACE_ACTION_WITH_CONTEXT( diff --git a/tx_service/include/cc/template_cc_map.h b/tx_service/include/cc/template_cc_map.h index 391e248c..bcbe3a26 100644 --- a/tx_service/include/cc/template_cc_map.h +++ b/tx_service/include/cc/template_cc_map.h @@ -12066,6 +12066,9 @@ class TemplateCcMap : public CcMap * large-value zone of the LRU list when the installed payload exceeds * txservice_large_value_threshold. * + * Only active when IsLargeValueZoneEnabled() returns true (i.e. for + * ObjectCcMap / EloqKV). Has no effect on RangeCcMap (EloqSQL / EloqDoc). + * * Called from every payload-assignment path (PostWriteCc, BackFill, * ReplayLogCc, UploadBatchCc, etc.) so that large-value pages are * immediately clustered near the LRU tail and are only evicted after all @@ -12079,7 +12082,7 @@ class TemplateCcMap : public CcMap */ void MaybeMarkAndRezoneAsLargeValue(LruPage *page, size_t payload_size) { - if (txservice_large_value_threshold == 0 || page == nullptr || + if (!IsLargeValueZoneEnabled() || page == nullptr || page->has_large_value_ || payload_size <= txservice_large_value_threshold) { diff --git a/tx_service/tests/CcPage-Test.cpp b/tx_service/tests/CcPage-Test.cpp index afa6ae04..0dfd612a 100644 --- a/tx_service/tests/CcPage-Test.cpp +++ b/tx_service/tests/CcPage-Test.cpp @@ -200,6 +200,20 @@ struct LargeCompositeRecord : public CompositeRecord size_t reported_size_; }; +// Test-only subclass of TemplateCcMap that enables the large-value zone +// (simulating ObjectCcMap behaviour for tests that use different template +// parameters such as VersionedRecord=true / RangePartitioned=true). +template +class LargeValueTestCcMap : public TemplateCcMap +{ +public: + using TemplateCcMap::TemplateCcMap; + bool IsLargeValueZoneEnabled() const override + { + return txservice_large_value_threshold > 0; + } +}; + TEST_CASE("Large-value eviction protection test", "[cc-page]") { std::unordered_map> ng_configs{ @@ -240,10 +254,10 @@ TEST_CASE("Large-value eviction protection test", "[cc-page]") const size_t MAP_SIZE = 200; const size_t LARGE_PAYLOAD_SIZE = 1024; - using TestCcMap = TemplateCcMap, - CompositeRecord, - true, - true>; + using TestCcMap = LargeValueTestCcMap, + CompositeRecord, + true, + true>; // Small-value map – pages stay in the head (small-value) zone. std::string small_table = "small_val_test"; @@ -413,10 +427,10 @@ TEST_CASE("Eager re-zone on large-value payload", "[cc-page]") const size_t MAP_SIZE = 200; const size_t LARGE_PAYLOAD_SIZE = 1024; - using TestCcMap = TemplateCcMap, - CompositeRecord, - true, - true>; + using TestCcMap = LargeValueTestCcMap, + CompositeRecord, + true, + true>; // Small-value map — pages stay in the SV zone at all times. std::string small_table = "eager_sv_test"; From 1b1380f27cfda6fb22c03810942200b1fbef97ef Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 28 Feb 2026 07:33:45 +0000 Subject: [PATCH 15/15] Move eager re-zone calls from TemplateCcMap to ObjectCcMap EloqKV uses ApplyCc (not ReadCc/AcquireCc) to read and write ObjectCcMap. The previous implementation placed MaybeMarkAndRezoneAsLargeValue calls in TemplateCcMap's Execute overloads (PostWriteCc, AcquireAllCc, ReadCc, ReplayLogCc, UploadBatchCc, BackFill). However, ObjectCcMap overrides all of these, so those calls were dead code for EloqKV. Remove all 7 MaybeMarkAndRezoneAsLargeValue calls from TemplateCcMap: - Dead code for EloqKV (ObjectCcMap overrides every method they were in) - No-ops for EloqSQL/EloqDoc (IsLargeValueZoneEnabled() returns false) Add MaybeMarkAndRezoneAsLargeValue at 6 correct payload-set sites in ObjectCcMap: 1. Execute(ApplyCc): apply_and_commit_ path after payload commit (status == Normal) 2. Execute(PostWriteCc): after dirty/pending-command commit when commit_ts > 0 3. Execute(UploadBatchCc): after PassInCurrentPayload for Normal records (migration) 4. Execute(KeyObjectStandbyForwardCc): after commands committed to payload (standby) 5. Execute(ReplayLogCc): after EmplaceAndCommitBufferedTxnCommand, payload == Normal 6. BackFill: after DeserializeCurrentPayload for Normal records (KV load path) Update MaybeMarkAndRezoneAsLargeValue doc comment to list the correct sites. Co-authored-by: xiexiaoy <5558602+xiexiaoy@users.noreply.github.com> --- tx_service/include/cc/object_cc_map.h | 22 ++++++++++++++++++ tx_service/include/cc/template_cc_map.h | 30 +++++-------------------- 2 files changed, 28 insertions(+), 24 deletions(-) diff --git a/tx_service/include/cc/object_cc_map.h b/tx_service/include/cc/object_cc_map.h index 08ebda4f..4da6f294 100644 --- a/tx_service/include/cc/object_cc_map.h +++ b/tx_service/include/cc/object_cc_map.h @@ -1207,6 +1207,10 @@ class ObjectCcMap : public TemplateCcMap CommitCommandOnPayload( cce->payload_.cur_payload_, status, *cmd); } + if (status == RecordStatus::Normal) + { + MaybeMarkAndRezoneAsLargeValue(ccp, cce->PayloadSize()); + } // Reset the dirty status. cce->SetDirtyPayload(nullptr); @@ -1447,6 +1451,10 @@ class ObjectCcMap : public TemplateCcMap cce->ReleaseForwardEntry(); shard_->ForwardStandbyMessage(entry_ptr.release()); } + if (payload_status == RecordStatus::Normal) + { + MaybeMarkAndRezoneAsLargeValue(ccp, cce->PayloadSize()); + } bool was_dirty = cce->IsDirty(); cce->SetCommitTsPayloadStatus(commit_ts, payload_status); this->OnCommittedUpdate(cce, was_dirty); @@ -1659,6 +1667,7 @@ class ObjectCcMap : public TemplateCcMap } cce->payload_.PassInCurrentPayload(std::move(object_uptr)); object_uptr = nullptr; + MaybeMarkAndRezoneAsLargeValue(cc_page, cce->PayloadSize()); } else { @@ -1987,6 +1996,10 @@ class ObjectCcMap : public TemplateCcMap cce->payload_.cur_payload_ == nullptr ? RecordStatus::Deleted : RecordStatus::Normal; + if (payload_status == RecordStatus::Normal) + { + MaybeMarkAndRezoneAsLargeValue(ccp, cce->PayloadSize()); + } bool was_dirty = cce->IsDirty(); cce->SetCommitTsPayloadStatus(commit_ts, payload_status); this->OnCommittedUpdate(cce, was_dirty); @@ -2459,6 +2472,11 @@ class ObjectCcMap : public TemplateCcMap ++TemplateCcMap::normal_obj_sz_; } + if (payload_status == RecordStatus::Normal) + { + MaybeMarkAndRezoneAsLargeValue(ccp, cce->PayloadSize()); + } + this->OnCommittedUpdate(cce, was_dirty); // Must update dirty_commit_ts. Otherwise, this entry may be @@ -2589,6 +2607,10 @@ class ObjectCcMap : public TemplateCcMap { size_t offset = 0; cce->payload_.DeserializeCurrentPayload(rec_str.data(), offset); + if (status == RecordStatus::Normal) + { + MaybeMarkAndRezoneAsLargeValue(ccp, cce->PayloadSize()); + } } else { diff --git a/tx_service/include/cc/template_cc_map.h b/tx_service/include/cc/template_cc_map.h index bcbe3a26..d6d8bcf9 100644 --- a/tx_service/include/cc/template_cc_map.h +++ b/tx_service/include/cc/template_cc_map.h @@ -605,10 +605,6 @@ class TemplateCcMap : public CcMap cce->payload_.DeserializeCurrentPayload(payload_str->data(), offset); } - // Eagerly move to the large-value zone when the committed - // payload exceeds the threshold. - MaybeMarkAndRezoneAsLargeValue(cc_page, cce->PayloadSize()); - RecordStatus cce_old_status = cce->PayloadStatus(); RecordStatus new_status = is_del ? RecordStatus::Deleted : RecordStatus::Normal; @@ -1119,9 +1115,6 @@ class TemplateCcMap : public CcMap req.CommitType() != PostWriteType::DowngradeLock) { cce_ptr->payload_.SetCurrentPayload(payload); - // Eagerly move to the large-value zone if needed. - MaybeMarkAndRezoneAsLargeValue(cce_ptr->GetCcPage(), - cce_ptr->PayloadSize()); // A prepare commit request only installs the dirty value, // and does not change the record status and commit_ts. if (req.CommitType() == PostWriteType::Commit || @@ -2036,9 +2029,6 @@ class TemplateCcMap : public CcMap if (cce->PayloadStatus() == RecordStatus::Unknown) { cce->payload_.PassInCurrentPayload(std::move(tmp_payload)); - // Eagerly move to the large-value zone if needed. - MaybeMarkAndRezoneAsLargeValue(cce->GetCcPage(), - cce->PayloadSize()); cce->SetCommitTsPayloadStatus(req.ReadTimestamp(), tmp_payload_status); } @@ -2240,9 +2230,6 @@ class TemplateCcMap : public CcMap size_t offset = 0; cce->payload_.DeserializeCurrentPayload(req.rec_str_->data(), offset); - // Eagerly move to the large-value zone if needed. - MaybeMarkAndRezoneAsLargeValue(cce->GetCcPage(), - cce->PayloadSize()); } cce->SetCommitTsPayloadStatus(req.CommitTs(), req.RecordStatus()); } @@ -7014,8 +7001,6 @@ class TemplateCcMap : public CcMap { cce->payload_.DeserializeCurrentPayload(log_blob.data(), offset); - // Eagerly move to the large-value zone if needed. - MaybeMarkAndRezoneAsLargeValue(ccp, cce->PayloadSize()); rec_status = RecordStatus::Normal; } else @@ -7772,8 +7757,6 @@ class TemplateCcMap : public CcMap if (rec_status == RecordStatus::Normal) { cce->payload_.SetCurrentPayload(commit_val); - // Eagerly move to the large-value zone if needed. - MaybeMarkAndRezoneAsLargeValue(cc_page, cce->PayloadSize()); } else { @@ -10643,8 +10626,6 @@ class TemplateCcMap : public CcMap size_t offset = 0; cce->payload_.DeserializeCurrentPayload(rec_str.c_str(), offset); - // Eagerly move to the large-value zone if needed. - MaybeMarkAndRezoneAsLargeValue(ccp, cce->PayloadSize()); } } else if (cce_version > 1 && commit_ts < cce_version && @@ -12069,13 +12050,14 @@ class TemplateCcMap : public CcMap * Only active when IsLargeValueZoneEnabled() returns true (i.e. for * ObjectCcMap / EloqKV). Has no effect on RangeCcMap (EloqSQL / EloqDoc). * - * Called from every payload-assignment path (PostWriteCc, BackFill, - * ReplayLogCc, UploadBatchCc, etc.) so that large-value pages are - * immediately clustered near the LRU tail and are only evicted after all - * small-value pages have been evicted. + * Called from ObjectCcMap at every payload-assignment site (ApplyCc, + * PostWriteCc, UploadBatchCc, KeyObjectStandbyForwardCc, ReplayLogCc, + * BackFill) so that large-value pages are immediately clustered near the + * LRU tail and are only evicted after all small-value pages have been + * evicted. * * The lazy fallback in CanBeCleaned (has_blocked_large_value_) is kept as - * a safety net for any path not covered here. + * a safety net for any path not covered above. * * @param page The LRU page that owns the updated entry. * @param payload_size The serialized size of the newly installed payload.