-
Notifications
You must be signed in to change notification settings - Fork 9
Dirty memory ratio and size can trigger ckpt #406
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
108bca1
d88a237
4235db2
7ecd512
13dbcc1
df9bfb6
4e454e2
a34a21d
87efd56
bd6508c
5619eb6
9b336eb
4aabbe8
75885c1
51c683e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1499,8 +1499,9 @@ bool ShardCleanCc::Execute(CcShard &ccs) | |
| if (free_count_ == 0 && !shard_heap->IsDefragHeapCcOnFly() && | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should keep this as a safety check. This should not trigger. But if user set some terrible params like check interval = 1000000 and dirty ratio = 99%, we might be stuck at memory full and unable to trigger checkpoint |
||
| !Sharder::Instance().GetCheckpointer()->IsOngoingDataSync()) | ||
| { | ||
| ccs.NotifyCkpt(); | ||
| ccs.NotifyCkpt(true); | ||
| } | ||
|
|
||
| free_count_ = 0; | ||
| // Return true will set the request as free, which means the | ||
| // request is not in working state. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,6 +27,7 @@ | |
|
|
||
| #include <chrono> // std::chrono | ||
| #include <cstdint> | ||
| #include <iomanip> // std::setprecision | ||
| #include <string> | ||
|
|
||
| #include "cc/catalog_cc_map.h" | ||
|
|
@@ -72,7 +73,9 @@ CcShard::CcShard( | |
| uint64_t cluster_config_version, | ||
| metrics::MetricsRegistry *metrics_registry, | ||
| metrics::CommonLabels common_labels, | ||
| uint32_t range_slice_memory_limit_percent) | ||
| uint32_t range_slice_memory_limit_percent, | ||
| uint64_t dirty_memory_check_interval, | ||
| uint64_t dirty_memory_size_threshold_mb) | ||
| : core_id_(core_id), | ||
| core_cnt_(core_cnt), | ||
| ng_id_(ng_id), | ||
|
|
@@ -102,7 +105,8 @@ CcShard::CcShard( | |
| catalog_factory[3], | ||
| catalog_factory[4]}, | ||
| system_handler_(system_handler), | ||
| active_si_txs_() | ||
| active_si_txs_(), | ||
| dirty_memory_check_interval_(dirty_memory_check_interval) | ||
| { | ||
| // Reserve range_slice_memory_limit_percent% for range slice info. | ||
| // We update this to dynamically reserve the configured range slice | ||
|
|
@@ -117,6 +121,23 @@ CcShard::CcShard( | |
| static_cast<uint64_t>(MB(node_memory_limit_mb) * memory_usage_ratio); | ||
| memory_limit_ /= core_cnt_; | ||
|
|
||
| // Pre-calculate dirty memory threshold in bytes | ||
| if (dirty_memory_size_threshold_mb == 0) | ||
| { | ||
| // Default: 10% of memory limit per shard, with minimum floor of 1 MB | ||
| dirty_memory_threshold_bytes_ = | ||
| static_cast<uint64_t>(memory_limit_ * 0.1); | ||
| if (dirty_memory_threshold_bytes_ == 0) | ||
| { | ||
| dirty_memory_threshold_bytes_ = 1024 * 1024; // 1 MB minimum | ||
| } | ||
| } | ||
| else | ||
| { | ||
| dirty_memory_threshold_bytes_ = | ||
| dirty_memory_size_threshold_mb * 1024 * 1024; | ||
| } | ||
|
|
||
| // Calculate standby buffer memory limit: 10% of node memory limit per | ||
| // shard. These part of memory is calculated together with shard memory so | ||
| // no need to subtract it from memory_limit_. | ||
|
|
@@ -419,6 +440,14 @@ void CcShard::AdjustDataKeyStats(const TableName &table_name, | |
| { | ||
| dirty_data_key_count_ = static_cast<size_t>(new_dirty); | ||
| } | ||
|
|
||
| // Check dirty memory thresholds periodically | ||
| if (dirty_memory_check_interval_ > 0 && | ||
| ++adjust_stats_call_count_ >= dirty_memory_check_interval_) | ||
| { | ||
| adjust_stats_call_count_ = 0; | ||
| CheckAndTriggerCkptByDirtyMemory(); | ||
| } | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -427,6 +456,45 @@ std::pair<size_t, size_t> CcShard::GetDataKeyStats() const | |
| return {data_key_count_, dirty_data_key_count_}; | ||
| } | ||
|
|
||
| void CcShard::CheckAndTriggerCkptByDirtyMemory() | ||
| { | ||
| if (memory_limit_ == 0 || data_key_count_ == 0 || ckpter_ == nullptr) | ||
| { | ||
| return; | ||
| } | ||
|
|
||
| // Get current memory usage | ||
| if (GetShardHeap() == nullptr) | ||
| { | ||
| return; | ||
| } | ||
| int64_t allocated = 0, committed = 0; | ||
| GetShardHeap()->Full(&allocated, &committed); | ||
|
|
||
| if (allocated <= 0) | ||
| { | ||
| return; | ||
| } | ||
|
|
||
| // Calculate dirty memory | ||
| double dirty_key_ratio = static_cast<double>(dirty_data_key_count_) / | ||
| static_cast<double>(data_key_count_); | ||
| uint64_t dirty_memory = static_cast<uint64_t>(allocated * dirty_key_ratio); | ||
|
|
||
| // Trigger checkpoint when dirty memory exceeds threshold | ||
| if (dirty_memory > dirty_memory_threshold_bytes_) | ||
| { | ||
| DLOG(INFO) << "Shard " << core_id_ | ||
| << " triggering checkpoint - dirty_memory=" | ||
| << (dirty_memory / (1024 * 1024)) << "MB (threshold=" | ||
| << (dirty_memory_threshold_bytes_ / (1024 * 1024)) | ||
| << "MB), dirty_keys=" << dirty_data_key_count_ << "/" | ||
| << data_key_count_; | ||
|
|
||
| NotifyCkpt(true); // Request immediate checkpoint | ||
| } | ||
| } | ||
|
Comment on lines
+459
to
+496
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Null-check
Additionally, Proposed fix void CcShard::CheckAndTriggerCkptByDirtyMemory()
{
if (memory_limit_ == 0 || data_key_count_ == 0 || ckpter_ == nullptr)
{
return;
}
+ if (GetShardHeap() == nullptr)
+ {
+ return;
+ }
+
// Get current memory usage
int64_t allocated = 0, committed = 0;
GetShardHeap()->Full(&allocated, &committed);
+ if (allocated <= 0)
+ {
+ return;
+ }
+
// Calculate dirty memory🤖 Prompt for AI Agents |
||
|
|
||
| void CcShard::Enqueue(uint32_t thd_id, CcRequestBase *req) | ||
| { | ||
| // The memory order in enqueue() of the concurrent queue ensures that the | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why is this changed to atomic? atomic and mutex + cv does not work well together