diff --git a/.gitignore b/.gitignore index de9ce42c..c0813a19 100644 --- a/.gitignore +++ b/.gitignore @@ -358,3 +358,4 @@ compile_commands.json cmake-build-debug/ .cache +_codeql_detected_source_root diff --git a/tx_service/include/cc/cc_entry.h b/tx_service/include/cc/cc_entry.h index 725a5a43..a3107399 100644 --- a/tx_service/include/cc/cc_entry.h +++ b/tx_service/include/cc/cc_entry.h @@ -2030,8 +2030,22 @@ struct LruPage CcMap *parent_map_{nullptr}; + // The value of CcShard::access_counter_ at the time this page was last + // moved to the LRU tail by UpdateLruList(). Comparing two pages' + // last_access_ts_ values determines which was accessed more recently + // (the larger value is more recent). uint64_t last_access_ts_{0}; + // True when at least one entry on this page has a payload whose size + // exceeds txservice_large_value_threshold. Set lazily by CanBeCleaned when + // a large payload is first detected. Once set it is never cleared (even if + // the large entries are later evicted) because the flag is only used as a + // cheap signal to keep the page in the large-value zone. + // + // Large-value pages are kept in the tail (recent) zone of the LRU list so + // that they are evicted only after all small-value pages have been evicted. + bool has_large_value_{false}; + // The largest commit ts of dirty cc entries on this page. This value might // be larger than the actual max commit ts of cc entries. Currently used to // decide if this page has dirty data after a given ts. diff --git a/tx_service/include/cc/cc_map.h b/tx_service/include/cc/cc_map.h index 0d1434b6..59910dec 100644 --- a/tx_service/include/cc/cc_map.h +++ b/tx_service/include/cc/cc_map.h @@ -219,6 +219,15 @@ class CcMap { } + // Returns true if the payload-size-aware large-value zone eviction policy + // is active for this map. Only ObjectCcMap (EloqKV) overrides this to + // return true; all other maps (RangeCcMap, CatalogCcMap, etc.) return false + // so the policy has no effect on EloqSQL / EloqDoc tables. + virtual bool IsLargeValueZoneEnabled() const + { + return false; + } + virtual std::pair CleanPageAndReBalance( LruPage *page, KickoutCcEntryCc *kickout_cc = nullptr, diff --git a/tx_service/include/cc/cc_page_clean_guard.h b/tx_service/include/cc/cc_page_clean_guard.h index 39c1c316..251cacac 100644 --- a/tx_service/include/cc/cc_page_clean_guard.h +++ b/tx_service/include/cc/cc_page_clean_guard.h @@ -157,6 +157,16 @@ struct CcPageCleanGuard return clean_obj_cnt_; } + // Returns true if CanBeCleaned freshly set the has_large_value_ flag on + // the page during this clean pass (i.e. the page was just discovered to + // have a large-value entry for the first time). When this is set, + // CleanPageAndReBalance will call UpdateLruList to move the page from the + // small-value zone into the large-value zone immediately. + bool HasBlockedLargeValue() const + { + return has_blocked_large_value_; + } + protected: struct CanBeCleanedResult { @@ -343,6 +353,9 @@ struct CcPageCleanGuard uint64_t dirty_freed_cnt_{0}; bool evicted_valid_key_{false}; uint64_t clean_obj_cnt_{0}; + // Set by CanBeCleaned when a large-value entry is protected. Mutable so it + // can be set from the const CanBeCleaned override. + mutable bool has_blocked_large_value_{false}; private: std::bitset:: @@ -399,7 +412,34 @@ struct CcPageCleanGuardWithoutKickoutCc return {false, false}; } - return {(cce->IsFree() && !cce->GetBeingCkpt()), false}; + if (!cce->IsFree() || cce->GetBeingCkpt()) + { + return {false, false}; + } + + // Payload-size-aware eviction (ObjectCcMap / EloqKV only): mark the + // page as a large-value page so that UpdateLruList places it in the + // large-value zone (tail end) of the LRU list. The page is still + // evictable here — protection is positional (large-value pages are + // evicted only after all small-value pages). Setting + // has_blocked_large_value_ signals CleanPageAndReBalance to re-zone + // the page immediately via UpdateLruList in case it was in the + // small-value zone. + // IsLargeValueZoneEnabled() returns false for all non-ObjectCcMap + // types (RangeCcMap, CatalogCcMap, etc.), so this block is a no-op + // for EloqSQL and EloqDoc tables. + if (this->page_->parent_map_ != nullptr && + this->page_->parent_map_->IsLargeValueZoneEnabled() && + cce->PayloadSize() > txservice_large_value_threshold) + { + if (!this->page_->has_large_value_) + { + this->page_->has_large_value_ = true; + this->has_blocked_large_value_ = true; + } + } + + return {true, false}; } bool IsCleanTarget( diff --git a/tx_service/include/cc/cc_shard.h b/tx_service/include/cc/cc_shard.h index 20023351..091c85f0 100644 --- a/tx_service/include/cc/cc_shard.h +++ b/tx_service/include/cc/cc_shard.h @@ -1005,11 +1005,35 @@ class CcShard clean_start_ccp_ = ccp; } + /// Returns the sentinel head of the LRU list. The first real page is + /// LruHead()->lru_next_. Used by tests to traverse the list. + const LruPage *LruHead() const + { + return &head_ccp_; + } + bool OutOfMemory() { return clean_start_ccp_ != nullptr && clean_start_ccp_ == &tail_ccp_; } + uint64_t AccessCounter() const + { + return access_counter_; + } + + /** + * @brief Returns the current head of the large-value zone — the first + * large-value page in the LRU list (the one closest to the sentinel head). + * + * Used for testing: verifies that the large-value zone is non-empty and + * correctly maintained. + */ + const LruPage *LruLargeValueZoneHead() const + { + return lru_large_value_zone_head_; + } + SystemHandler *GetSystemHandler() { return system_handler_; @@ -1273,18 +1297,33 @@ class CcShard // simplifies handling of empty and one-element lists. LruPage head_ccp_, tail_ccp_; /** - * @brief Each time a page is accessed and moved to the tail of the LRU - * list, the counter is incremented and assigned to the page. Since in a - * double-linked list there is no way to determine the relative order of two - * pages, we use the number to indicate if a page precedes or succeeds the - * other in the list. + * @brief A monotonically-increasing shard-wide counter. It is + * incremented and assigned to a page's last_access_ts_ every time + * UpdateLruList() is called, i.e. whenever any page in this shard is + * moved to its target position in the LRU list. * + * Primary use: since a doubly-linked list provides no O(1) way to compare + * the positions of two arbitrary nodes, comparing page1.last_access_ts_ + * against page2.last_access_ts_ lets the merge/redistribute code determine + * which of the two pages was accessed more recently without traversing the + * list. */ uint64_t access_counter_{0}; // Page to start looking for cc entries to kick out on LRU chain. LruPage *clean_start_ccp_; + // Head of the large-value zone in the LRU list. Large-value pages are + // clustered at the tail (recent) end of the list so they are evicted only + // after all small-value pages have been evicted. This pointer points to the + // first (oldest) large-value page, i.e. the boundary between the two zones: + // + // head ← [small-value pages] ← lru_large_value_zone_head_ ← [large-value + // pages] ← tail + // + // It equals &tail_ccp_ when no large-value pages are in the list. + LruPage *lru_large_value_zone_head_; + // The number of ccentry in all the ccmap of this ccshard. uint64_t size_; diff --git a/tx_service/include/cc/object_cc_map.h b/tx_service/include/cc/object_cc_map.h index fca0f59b..4da6f294 100644 --- a/tx_service/include/cc/object_cc_map.h +++ b/tx_service/include/cc/object_cc_map.h @@ -104,6 +104,12 @@ class ObjectCcMap : public TemplateCcMap using TemplateCcMap::Type; using TemplateCcMap::CleanEntry; + // Enable the payload-size-aware large-value zone policy for EloqKV. + bool IsLargeValueZoneEnabled() const override + { + return txservice_large_value_threshold > 0; + } + bool Execute(ApplyCc &req) override { TX_TRACE_ACTION_WITH_CONTEXT( @@ -1201,6 +1207,10 @@ class ObjectCcMap : public TemplateCcMap CommitCommandOnPayload( cce->payload_.cur_payload_, status, *cmd); } + if (status == RecordStatus::Normal) + { + MaybeMarkAndRezoneAsLargeValue(ccp, cce->PayloadSize()); + } // Reset the dirty status. cce->SetDirtyPayload(nullptr); @@ -1441,6 +1451,10 @@ class ObjectCcMap : public TemplateCcMap cce->ReleaseForwardEntry(); shard_->ForwardStandbyMessage(entry_ptr.release()); } + if (payload_status == RecordStatus::Normal) + { + MaybeMarkAndRezoneAsLargeValue(ccp, cce->PayloadSize()); + } bool was_dirty = cce->IsDirty(); cce->SetCommitTsPayloadStatus(commit_ts, payload_status); this->OnCommittedUpdate(cce, was_dirty); @@ -1653,6 +1667,7 @@ class ObjectCcMap : public TemplateCcMap } cce->payload_.PassInCurrentPayload(std::move(object_uptr)); object_uptr = nullptr; + MaybeMarkAndRezoneAsLargeValue(cc_page, cce->PayloadSize()); } else { @@ -1981,6 +1996,10 @@ class ObjectCcMap : public TemplateCcMap cce->payload_.cur_payload_ == nullptr ? RecordStatus::Deleted : RecordStatus::Normal; + if (payload_status == RecordStatus::Normal) + { + MaybeMarkAndRezoneAsLargeValue(ccp, cce->PayloadSize()); + } bool was_dirty = cce->IsDirty(); cce->SetCommitTsPayloadStatus(commit_ts, payload_status); this->OnCommittedUpdate(cce, was_dirty); @@ -2453,6 +2472,11 @@ class ObjectCcMap : public TemplateCcMap ++TemplateCcMap::normal_obj_sz_; } + if (payload_status == RecordStatus::Normal) + { + MaybeMarkAndRezoneAsLargeValue(ccp, cce->PayloadSize()); + } + this->OnCommittedUpdate(cce, was_dirty); // Must update dirty_commit_ts. Otherwise, this entry may be @@ -2583,6 +2607,10 @@ class ObjectCcMap : public TemplateCcMap { size_t offset = 0; cce->payload_.DeserializeCurrentPayload(rec_str.data(), offset); + if (status == RecordStatus::Normal) + { + MaybeMarkAndRezoneAsLargeValue(ccp, cce->PayloadSize()); + } } else { diff --git a/tx_service/include/cc/template_cc_map.h b/tx_service/include/cc/template_cc_map.h index 4e4da192..d6d8bcf9 100644 --- a/tx_service/include/cc/template_cc_map.h +++ b/tx_service/include/cc/template_cc_map.h @@ -605,7 +605,6 @@ class TemplateCcMap : public CcMap cce->payload_.DeserializeCurrentPayload(payload_str->data(), offset); } - RecordStatus cce_old_status = cce->PayloadStatus(); RecordStatus new_status = is_del ? RecordStatus::Deleted : RecordStatus::Normal; @@ -8721,7 +8720,14 @@ class TemplateCcMap : public CcMap CcPage *>( lru_page); auto page_it = ccmp_.end(); - bool success = CleanPage(page, page_it, free_cnt, kickout_cc); + + // Only collect the re-zone flag for regular LRU scans (no kickout_cc). + // Forced evictions (range migration etc.) should not be affected by + // the large-value protection. + bool needs_rezoning = false; + bool *out_flag = (kickout_cc == nullptr) ? &needs_rezoning : nullptr; + + bool success = CleanPage(page, page_it, free_cnt, kickout_cc, out_flag); // Output the operation result if the caller care it. if (is_success != nullptr) @@ -8729,9 +8735,23 @@ class TemplateCcMap : public CcMap *is_success = success; } + // Capture next_page BEFORE potentially moving page (re-zoning changes + // page->lru_next_). LruPage *next_page = RebalancePage(page, page_it, success, kickout_cc == nullptr); + // Re-zone: CanBeCleaned just discovered that this page has a large- + // value entry (has_large_value_ was freshly set). Move the page into + // the large-value zone (LRU tail end) via UpdateLruList so that future + // small-value insertions are placed before it and it is only evicted + // after all small-value pages are gone. + if (needs_rezoning && !page->Empty() && page->lru_next_ != nullptr) + { + shard_->UpdateLruList(page, false); + // The scan should continue from the page we had lined up before + // the re-zone, not from the page's new position at the tail. + } + return {free_cnt, next_page}; } @@ -9180,6 +9200,112 @@ class TemplateCcMap : public CcMap return true; } + /** + * @brief Inserts keys with all entries guaranteed to be free (persistent, + * unlocked). Used in tests for large-value eviction protection. + * + * @param keys Keys to insert. + * @return true on success. + */ + bool BulkEmplaceFreeForTest(std::vector &keys) + { + for (auto key : keys) + { + bool emplace = false; + auto it = FindEmplace(*key, &emplace, false, false); + if (!emplace) + { + assert(false); + return false; + } + CcEntry *cce = + it->second; + CcPage *ccp = + it.GetPage(); + // Set commit_ts = 1, ckpt_ts = UINT64_MAX so entry is persistent. + bool was_dirty = cce->IsDirty(); + cce->SetCommitTsPayloadStatus(1, RecordStatus::Normal); + cce->SetCkptTs(UINT64_MAX); + OnFlushed(cce, was_dirty); + OnCommittedUpdate(cce, was_dirty); + ccp->last_dirty_commit_ts_ = + std::max(cce->CommitTs(), ccp->last_dirty_commit_ts_); + } + return true; + } + + /** + * @brief Sets the payload of all entries in the cc map to the given shared + * pointer. Used in tests for large-value eviction protection. + * + * @param payload The payload to assign to every entry. + */ + void SetPayloadForTest(std::shared_ptr payload) + { + for (auto &[key, page_ptr] : ccmp_) + { + for (auto &cce : page_ptr->entries_) + { + cce->payload_.cur_payload_ = payload; + } + } + } + + /** + * @brief Marks all pages in this cc map as large-value pages and + * immediately re-zones them via UpdateLruList so that they are placed in + * the large-value zone (tail end) of the LRU list. Used in tests for the + * zone-separation eviction policy. + */ + void RezoneAsLargeValueForTest() + { + for (auto &[key, page_ptr] : ccmp_) + { + CcPage *page = + page_ptr.get(); + if (!page->has_large_value_) + { + page->has_large_value_ = true; + } + // Move page to large-value zone. + if (page->lru_next_ != nullptr) + { + shard_->UpdateLruList(page, false); + } + } + } + + /** + * @brief Iterates over all pages and calls MaybeMarkAndRezoneAsLargeValue + * with the maximum PayloadSize() found among each page's entries. + * + * This simulates the eager re-zone that happens in production when a write + * commits a large payload via PostWriteCc / BackFill / ReplayLogCc etc. + * Used in tests to verify that the eager path works without going through + * a clean-page scan. + */ + void TriggerEagerRezoneForTest() + { + for (auto &[key, page_ptr] : ccmp_) + { + CcPage *page = + page_ptr.get(); + size_t max_payload = 0; + for (const auto &cce : page->entries_) + { + max_payload = std::max(max_payload, cce->PayloadSize()); + } + MaybeMarkAndRezoneAsLargeValue(page, max_payload); + } + } + + /// Returns the number of CcPage objects (btree nodes) in this cc map. + /// Used in tests that verify zone-page-count invariants. + size_t PageCount() const + { + return ccmp_.size(); + } + protected: void OnCommittedUpdate( const CcEntry *cce, @@ -11558,7 +11684,8 @@ class TemplateCcMap : public CcMap CcPage *page, BtreeMapIterator &page_it, size_t &free_cnt, - KickoutCcEntryCc *kickout_cc = nullptr) + KickoutCcEntryCc *kickout_cc = nullptr, + bool *out_has_blocked_large_value = nullptr) { bool success; @@ -11615,6 +11742,11 @@ class TemplateCcMap : public CcMap success = clean_guard->CleanSuccess(); + if (out_has_blocked_large_value != nullptr) + { + *out_has_blocked_large_value = clean_guard->HasBlockedLargeValue(); + } + std::destroy_at(buffer); return success; } @@ -11910,6 +12042,41 @@ class TemplateCcMap : public CcMap return &pos_inf_page_; } + /** + * @brief Eagerly marks a page as a large-value page and moves it into the + * large-value zone of the LRU list when the installed payload exceeds + * txservice_large_value_threshold. + * + * Only active when IsLargeValueZoneEnabled() returns true (i.e. for + * ObjectCcMap / EloqKV). Has no effect on RangeCcMap (EloqSQL / EloqDoc). + * + * Called from ObjectCcMap at every payload-assignment site (ApplyCc, + * PostWriteCc, UploadBatchCc, KeyObjectStandbyForwardCc, ReplayLogCc, + * BackFill) so that large-value pages are immediately clustered near the + * LRU tail and are only evicted after all small-value pages have been + * evicted. + * + * The lazy fallback in CanBeCleaned (has_blocked_large_value_) is kept as + * a safety net for any path not covered above. + * + * @param page The LRU page that owns the updated entry. + * @param payload_size The serialized size of the newly installed payload. + */ + void MaybeMarkAndRezoneAsLargeValue(LruPage *page, size_t payload_size) + { + if (!IsLargeValueZoneEnabled() || page == nullptr || + page->has_large_value_ || + payload_size <= txservice_large_value_threshold) + { + return; + } + page->has_large_value_ = true; + if (page->lru_next_ != nullptr) + { + shard_->UpdateLruList(page, false); + } + } + absl::btree_map< KeyT, std::unique_ptr< diff --git a/tx_service/include/tx_service.h b/tx_service/include/tx_service.h index 007d9dbe..771fc071 100644 --- a/tx_service/include/tx_service.h +++ b/tx_service/include/tx_service.h @@ -1207,6 +1207,11 @@ class TxService conf.at("enable_key_cache") && !enable_mvcc; } + if (conf.find("large_value_threshold") != conf.end()) + { + txservice_large_value_threshold = conf.at("large_value_threshold"); + } + if (txservice_skip_kv) { if (txservice_enable_cache_replacement) diff --git a/tx_service/include/tx_service_common.h b/tx_service/include/tx_service_common.h index ee4cc9b0..824f0f33 100644 --- a/tx_service/include/tx_service_common.h +++ b/tx_service/include/tx_service_common.h @@ -42,6 +42,13 @@ inline uint64_t txservice_max_standby_lag = 400000; // If checkpointed data can be evicted from memory if memory is full. If this is // off, all data will be cached in memory. inline bool txservice_enable_cache_replacement = true; +// Payload size threshold in bytes for large-value protection during cache +// eviction. When an entry's payload size exceeds this threshold its page is +// marked as a large-value page (has_large_value_ = true). Large-value pages +// are clustered at the tail (most-recently-used) end of the LRU list by +// UpdateLruList, so they are evicted only after all small-value pages have +// been evicted. A value of 0 disables large-value protection (default). +inline size_t txservice_large_value_threshold = 0; // Whether to automatically redirect redis command to the leader node when the // data is not on the local node. diff --git a/tx_service/src/cc/cc_shard.cpp b/tx_service/src/cc/cc_shard.cpp index b21df686..b20a3a9c 100644 --- a/tx_service/src/cc/cc_shard.cpp +++ b/tx_service/src/cc/cc_shard.cpp @@ -94,6 +94,7 @@ CcShard::CcShard( head_ccp_(nullptr), tail_ccp_(nullptr), clean_start_ccp_(nullptr), + lru_large_value_zone_head_(nullptr), size_(0), ckpter_(nullptr), catalog_factory_{catalog_factory[0], @@ -127,6 +128,7 @@ CcShard::CcShard( head_ccp_.lru_next_ = &tail_ccp_; tail_ccp_.lru_prev_ = &head_ccp_; tail_ccp_.lru_next_ = nullptr; + lru_large_value_zone_head_ = &tail_ccp_; thd_token_.reserve((size_t) core_cnt_ + 1); for (size_t idx = 0; idx < core_cnt_; ++idx) @@ -845,6 +847,11 @@ void CcShard::DetachLru(LruPage *page) { clean_start_ccp_ = page->lru_next_; } + // If page is the head of the large-value zone, advance the zone head. + if (lru_large_value_zone_head_ == page) + { + lru_large_value_zone_head_ = page->lru_next_; + } assert(prev != nullptr && next != nullptr); prev->lru_next_ = next; next->lru_prev_ = prev; @@ -866,6 +873,13 @@ void CcShard::ReplaceLru(LruPage *old_page, LruPage *new_page) { clean_start_ccp_ = new_page; } + // The replacement page inherits the large-value flag and the zone-head + // role. + new_page->has_large_value_ = old_page->has_large_value_; + if (lru_large_value_zone_head_ == old_page) + { + lru_large_value_zone_head_ = new_page; + } lru_prev->lru_next_ = new_page; lru_next->lru_prev_ = new_page; new_page->lru_next_ = lru_next; @@ -883,30 +897,52 @@ void CcShard::UpdateLruList(LruPage *page, bool is_emplace) assert(page->lru_next_ == nullptr && page->lru_prev_ == nullptr); return; } - // page already at the tail, do nothing - if (page->lru_next_ == &tail_ccp_ && tail_ccp_.lru_prev_ == page) + + // Determine insertion point depending on whether the page has large values. + // + // Large-value pages (has_large_value_ == true) always go at the true tail + // (most-recently-used end). This clusters them in the tail zone and ensures + // they are evicted only after all small-value pages have been evicted. + // + // Small-value pages (has_large_value_ == false) are inserted just before + // the large-value zone (at lru_large_value_zone_head_->lru_prev_). When + // there are no large-value pages lru_large_value_zone_head_ == &tail_ccp_, + // so the behaviour is identical to the standard tail insertion. + LruPage *insert_before = + page->has_large_value_ ? &tail_ccp_ : lru_large_value_zone_head_; + + // page already at the correct insertion position, just update the counter. + if (page->lru_next_ == insert_before && insert_before->lru_prev_ == page) { ++access_counter_; page->last_access_ts_ = access_counter_; return; } - // Removes the page from the list, if it's already in the list. This is - // used to keep the updated page at the end(tail) of the LRU list. A - // page's prev and post are both not-null when the page is in the - // list. This is because we have a reserved head and tail for the list. + + // Remove the page from its current position in the list (if present). if (page->lru_next_ != nullptr) { DetachLru(page); } - LruPage *second_tail = tail_ccp_.lru_prev_; - second_tail->lru_next_ = page; - tail_ccp_.lru_prev_ = page; - page->lru_next_ = &tail_ccp_; - page->lru_prev_ = second_tail; + + // Insert page immediately before insert_before. + LruPage *insert_after = insert_before->lru_prev_; + insert_after->lru_next_ = page; + insert_before->lru_prev_ = page; + page->lru_next_ = insert_before; + page->lru_prev_ = insert_after; ++access_counter_; page->last_access_ts_ = access_counter_; + // Maintain lru_large_value_zone_head_: when a large-value page is inserted + // and the zone was empty (lru_large_value_zone_head_ == &tail_ccp_), the + // new page becomes the zone head. + if (page->has_large_value_ && lru_large_value_zone_head_ == &tail_ccp_) + { + lru_large_value_zone_head_ = page; + } + // If the update is a emplace update, these new loaded data might be // kickable from cc map. Usually if the clean_start_page is at tail we're // not able to load new data into memory, except some special case where we diff --git a/tx_service/tests/CcPage-Test.cpp b/tx_service/tests/CcPage-Test.cpp index 56c4e189..0dfd612a 100644 --- a/tx_service/tests/CcPage-Test.cpp +++ b/tx_service/tests/CcPage-Test.cpp @@ -29,6 +29,7 @@ #include "template_cc_map.h" #include "tx_key.h" // CompositeKey #include "tx_record.h" // CompositeRecord +#include "tx_service_common.h" #include "type.h" namespace txservice @@ -177,6 +178,379 @@ TEST_CASE("CcPage clean tests", "[cc-page]") REQUIRE(total_remain + total_free == MAP_NUM * MAP_SIZE); } +// A CompositeRecord subclass that reports an artificially large payload +// size. Used to test the payload-size-aware cache eviction protection. +struct LargeCompositeRecord : public CompositeRecord +{ + explicit LargeCompositeRecord(int val, size_t reported_size) + : CompositeRecord(val), reported_size_(reported_size) + { + } + + size_t Size() const override + { + return reported_size_; + } + + TxRecord::Uptr Clone() const override + { + return std::make_unique(*this); + } + + size_t reported_size_; +}; + +// Test-only subclass of TemplateCcMap that enables the large-value zone +// (simulating ObjectCcMap behaviour for tests that use different template +// parameters such as VersionedRecord=true / RangePartitioned=true). +template +class LargeValueTestCcMap : public TemplateCcMap +{ +public: + using TemplateCcMap::TemplateCcMap; + bool IsLargeValueZoneEnabled() const override + { + return txservice_large_value_threshold > 0; + } +}; + +TEST_CASE("Large-value eviction protection test", "[cc-page]") +{ + std::unordered_map> ng_configs{ + {0, {NodeConfig(0, "127.0.0.1", 8600)}}}; + std::map tx_cnf{{"node_memory_limit_mb", 1000}, + {"enable_key_cache", 0}, + {"reltime_sampling", 0}, + {"range_split_worker_num", 1}, + {"core_num", 1}, + {"realtime_sampling", 0}, + {"checkpointer_interval", 10}, + {"enable_shard_heap_defragment", 0}, + {"node_log_limit_mb", 1000}}; + LocalCcShards local_cc_shards( + 0, 0, tx_cnf, nullptr, nullptr, &ng_configs, 2, nullptr, nullptr, true); + CcShard shard(0, + 1, + 10000, + 10000, + false, + 0, + local_cc_shards, + nullptr, + nullptr, + &ng_configs, + 2); + shard.Init(); + std::string raft_path(""); + Sharder::Instance(0, + &ng_configs, + 0, + nullptr, + nullptr, + &local_cc_shards, + nullptr, + &raft_path); + + const size_t MAP_SIZE = 200; + const size_t LARGE_PAYLOAD_SIZE = 1024; + + using TestCcMap = LargeValueTestCcMap, + CompositeRecord, + true, + true>; + + // Small-value map – pages stay in the head (small-value) zone. + std::string small_table = "small_val_test"; + TableName small_tname( + small_table, TableType::Primary, TableEngine::EloqSql); + auto small_map = + std::make_unique(&shard, 0, small_tname, 1, nullptr, true); + + // Large-value map – pages will be re-zoned to the tail (large-value) zone. + std::string large_table = "large_val_test"; + TableName large_tname( + large_table, TableType::Primary, TableEngine::EloqSql); + auto large_map = + std::make_unique(&shard, 0, large_tname, 1, nullptr, true); + + auto make_keys = [](const std::string &tname, + size_t cnt, + std::vector> &storage) + -> std::vector *> + { + for (size_t i = 0; i < cnt; i++) + { + storage.emplace_back(std::make_tuple(tname, static_cast(i))); + } + std::vector *> ptrs; + for (auto &k : storage) + { + ptrs.push_back(&k); + } + return ptrs; + }; + + // Insert entries for both maps. Both maps start in the small-value zone + // because has_large_value_ is false on insertion. + std::vector> small_keys; + REQUIRE(small_map->BulkEmplaceFreeForTest( + make_keys(small_table, MAP_SIZE, small_keys))); + REQUIRE(small_map->VerifyOrdering() == MAP_SIZE); + + std::vector> large_keys; + REQUIRE(large_map->BulkEmplaceFreeForTest( + make_keys(large_table, MAP_SIZE, large_keys))); + REQUIRE(large_map->VerifyOrdering() == MAP_SIZE); + + // Assign large payloads to the large-value map entries. + auto large_payload = + std::make_shared(42, LARGE_PAYLOAD_SIZE); + large_map->SetPayloadForTest(large_payload); + + txservice_large_value_threshold = LARGE_PAYLOAD_SIZE / 2; + + // ----------------------------------------------------------------------- + // PART 1: Zone-separation structure. + // ----------------------------------------------------------------------- + // Use RezoneAsLargeValueForTest() to set has_large_value_ on large_map + // pages and call UpdateLruList to move them into the large-value zone. + // This simulates what happens in production when those pages are accessed + // after CanBeCleaned has set has_large_value_ on them. + large_map->RezoneAsLargeValueForTest(); + shard.VerifyLruList(); + + // lru_large_value_zone_head_ must now point into the large-value zone. + const LruPage *zone_head = shard.LruLargeValueZoneHead(); + REQUIRE(zone_head != nullptr); + REQUIRE(zone_head->parent_map_ != nullptr); // not a sentinel + + // Walk the LRU list and verify: + // head → [small_map pages] → zone_head → [large_map pages] → tail + { + bool in_large_zone = false; + for (const LruPage *p = shard.LruHead()->lru_next_; + p->parent_map_ != nullptr; // sentinel tail has parent_map_==null + p = p->lru_next_) + { + if (p == zone_head) + { + in_large_zone = true; + } + if (in_large_zone) + { + REQUIRE(p->parent_map_ == large_map.get()); + } + else + { + REQUIRE(p->parent_map_ == small_map.get()); + } + } + // We must have entered the large zone. + REQUIRE(in_large_zone); + } + + // ----------------------------------------------------------------------- + // PART 2: Small-value insertion stays before the zone head. + // ----------------------------------------------------------------------- + const LruPage *zone_head_before = shard.LruLargeValueZoneHead(); + CompositeKey extra_sv_key = + std::make_tuple(small_table, static_cast(MAP_SIZE + 1)); + std::vector *> extra_sv_ptr = { + &extra_sv_key}; + REQUIRE(small_map->BulkEmplaceFreeForTest(extra_sv_ptr)); + shard.VerifyLruList(); + // SV insertion must not change the zone head. + REQUIRE(shard.LruLargeValueZoneHead() == zone_head_before); + + // ----------------------------------------------------------------------- + // PART 3: Full scan – all pages are evicted. + // ----------------------------------------------------------------------- + size_t total_freed = 0; + while (true) + { + auto [free_cnt, yield] = shard.Clean(); + shard.VerifyLruList(); + total_freed += free_cnt; + if (free_cnt == 0) + { + break; + } + } + REQUIRE(total_freed == MAP_SIZE + MAP_SIZE + 1 /* extra sv */); + + // Restore global defaults. + txservice_large_value_threshold = 0; + local_cc_shards.Terminate(); +} + +// --------------------------------------------------------------------------- +// Test that MaybeMarkAndRezoneAsLargeValue fires eagerly at payload-set time, +// BEFORE any clean-page scan takes place. +// --------------------------------------------------------------------------- +TEST_CASE("Eager re-zone on large-value payload", "[cc-page]") +{ + std::unordered_map> ng_configs{ + {0, {NodeConfig(0, "127.0.0.1", 8600)}}}; + std::map tx_cnf{{"node_memory_limit_mb", 1000}, + {"enable_key_cache", 0}, + {"reltime_sampling", 0}, + {"range_split_worker_num", 1}, + {"core_num", 1}, + {"realtime_sampling", 0}, + {"checkpointer_interval", 10}, + {"enable_shard_heap_defragment", 0}, + {"node_log_limit_mb", 1000}}; + LocalCcShards local_cc_shards( + 0, 0, tx_cnf, nullptr, nullptr, &ng_configs, 2, nullptr, nullptr, true); + CcShard shard(0, + 1, + 10000, + 10000, + false, + 0, + local_cc_shards, + nullptr, + nullptr, + &ng_configs, + 2); + shard.Init(); + std::string raft_path(""); + Sharder::Instance(0, + &ng_configs, + 0, + nullptr, + nullptr, + &local_cc_shards, + nullptr, + &raft_path); + + const size_t MAP_SIZE = 200; + const size_t LARGE_PAYLOAD_SIZE = 1024; + + using TestCcMap = LargeValueTestCcMap, + CompositeRecord, + true, + true>; + + // Small-value map — pages stay in the SV zone at all times. + std::string small_table = "eager_sv_test"; + TableName small_tname( + small_table, TableType::Primary, TableEngine::EloqSql); + auto small_map = + std::make_unique(&shard, 0, small_tname, 1, nullptr, true); + + // Large-value map — pages will be re-zoned eagerly once we assign a large + // payload and call TriggerEagerRezoneForTest(). + std::string large_table = "eager_lv_test"; + TableName large_tname( + large_table, TableType::Primary, TableEngine::EloqSql); + auto large_map = + std::make_unique(&shard, 0, large_tname, 1, nullptr, true); + + auto make_keys = [](const std::string &tname, + size_t cnt, + std::vector> &storage) + -> std::vector *> + { + for (size_t i = 0; i < cnt; i++) + { + storage.emplace_back(std::make_tuple(tname, static_cast(i))); + } + std::vector *> ptrs; + for (auto &k : storage) + { + ptrs.push_back(&k); + } + return ptrs; + }; + + std::vector> small_keys; + REQUIRE(small_map->BulkEmplaceFreeForTest( + make_keys(small_table, MAP_SIZE, small_keys))); + std::vector> large_keys; + REQUIRE(large_map->BulkEmplaceFreeForTest( + make_keys(large_table, MAP_SIZE, large_keys))); + + // Threshold = half the payload size so all large_map entries qualify + // (LARGE_PAYLOAD_SIZE > LARGE_PAYLOAD_SIZE / 2). + txservice_large_value_threshold = LARGE_PAYLOAD_SIZE / 2; + + // ----------------------------------------------------------------------- + // Before re-zone: both maps are in the SV zone (has_large_value_ == false). + // The LRU large-value zone is empty: zone head points to the tail sentinel + // (parent_map_ == nullptr). + // ----------------------------------------------------------------------- + { + const LruPage *zone_head_before = shard.LruLargeValueZoneHead(); + REQUIRE(zone_head_before != nullptr); + REQUIRE(zone_head_before->parent_map_ == nullptr); // tail sentinel = empty zone + } + + // Assign large payloads to the large-value map entries. + auto large_payload = + std::make_shared(42, LARGE_PAYLOAD_SIZE); + large_map->SetPayloadForTest(large_payload); + shard.VerifyLruList(); + + // ----------------------------------------------------------------------- + // Eager re-zone: simulate what happens when the payload is installed via + // PostWriteCc / BackFill / ReplayLogCc. No clean scan is involved. + // ----------------------------------------------------------------------- + large_map->TriggerEagerRezoneForTest(); + shard.VerifyLruList(); + + // After TriggerEagerRezoneForTest the zone must be non-empty. + const LruPage *zone_head = shard.LruLargeValueZoneHead(); + REQUIRE(zone_head != nullptr); + REQUIRE(zone_head->parent_map_ != nullptr); // must point to a real page + + // All pages of large_map must be in the LV zone (at or after zone_head), + // and all pages of small_map must be in the SV zone (before zone_head). + { + bool in_large_zone = false; + for (const LruPage *p = shard.LruHead()->lru_next_; + p->parent_map_ != nullptr; + p = p->lru_next_) + { + if (p == zone_head) + { + in_large_zone = true; + } + if (in_large_zone) + { + INFO("page parent_map should be large_map in LV zone"); + REQUIRE(p->parent_map_ == large_map.get()); + } + else + { + INFO("page parent_map should be small_map in SV zone"); + REQUIRE(p->parent_map_ == small_map.get()); + } + } + REQUIRE(in_large_zone); + } + + // ----------------------------------------------------------------------- + // Clean scan: all entries should be evictable (all IsFree). + // ----------------------------------------------------------------------- + size_t total_freed = 0; + while (true) + { + auto [free_cnt, yield] = shard.Clean(); + shard.VerifyLruList(); + total_freed += free_cnt; + if (free_cnt == 0) + { + break; + } + } + REQUIRE(total_freed == MAP_SIZE + MAP_SIZE); + + // Restore global defaults. + txservice_large_value_threshold = 0; + local_cc_shards.Terminate(); +} + } // namespace txservice int main(int argc, char **argv)