Skip to content

Commit d4dcc78

Browse files
committed
fix(common): add cleanup for stale file locks to prevent memory leak
Fixes #5145 and #5142 - File lock HashMaps grow without bound. Problem: Lock entries accumulate forever without cleanup. Solution: Added stale lock cleanup mechanism that removes entries where no external reference exists (strong_count == 1). Cleanup triggers automatically when the map reaches MAX_LOCK_ENTRIES (10,000).
1 parent d201070 commit d4dcc78

File tree

2 files changed

+54
-0
lines changed

2 files changed

+54
-0
lines changed

src/cortex-common/src/file_locking.rs

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -557,6 +557,9 @@ pub async fn atomic_write_async(
557557
.map_err(|e| FileLockError::AtomicWriteFailed(format!("spawn_blocking failed: {}", e)))?
558558
}
559559

560+
/// Maximum number of lock entries before triggering cleanup.
561+
const MAX_LOCK_ENTRIES: usize = 10_000;
562+
560563
/// A file lock manager for coordinating access across multiple operations.
561564
///
562565
/// This is useful when you need to perform multiple operations on a file
@@ -577,15 +580,47 @@ impl FileLockManager {
577580
///
578581
/// This is in addition to the filesystem-level advisory lock and helps
579582
/// coordinate access within the same process.
583+
///
584+
/// Automatically cleans up stale lock entries when the map grows too large.
580585
pub fn get_lock(&self, path: impl AsRef<Path>) -> Arc<std::sync::Mutex<()>> {
581586
let path = path.as_ref().to_path_buf();
582587
let mut locks = self.locks.lock().unwrap();
588+
589+
// Clean up stale entries if the map is getting large
590+
if locks.len() >= MAX_LOCK_ENTRIES {
591+
Self::cleanup_stale_entries(&mut locks);
592+
}
593+
583594
locks
584595
.entry(path)
585596
.or_insert_with(|| Arc::new(std::sync::Mutex::new(())))
586597
.clone()
587598
}
588599

600+
/// Remove lock entries that are no longer in use.
601+
///
602+
/// An entry is considered stale when only the HashMap holds a reference
603+
/// to it (strong_count == 1), meaning no caller is currently using the lock.
604+
fn cleanup_stale_entries(
605+
locks: &mut std::collections::HashMap<PathBuf, Arc<std::sync::Mutex<()>>>,
606+
) {
607+
locks.retain(|_, arc| Arc::strong_count(arc) > 1);
608+
}
609+
610+
/// Manually trigger cleanup of stale lock entries.
611+
///
612+
/// This removes entries where no external reference exists (only the
613+
/// manager holds the Arc). Useful for periodic maintenance.
614+
pub fn cleanup(&self) {
615+
let mut locks = self.locks.lock().unwrap();
616+
Self::cleanup_stale_entries(&mut locks);
617+
}
618+
619+
/// Returns the current number of lock entries in the manager.
620+
pub fn lock_count(&self) -> usize {
621+
self.locks.lock().unwrap().len()
622+
}
623+
589624
/// Execute an operation with both process-local and file-system locks.
590625
pub fn with_lock<T, F>(&self, path: impl AsRef<Path>, mode: LockMode, f: F) -> FileLockResult<T>
591626
where

src/cortex-resume/src/session_store.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,34 @@ use tokio::fs;
1313
use tokio::sync::{Mutex as AsyncMutex, RwLock};
1414
use tracing::{debug, info};
1515

16+
/// Maximum number of lock entries before triggering cleanup.
17+
const MAX_LOCK_ENTRIES: usize = 10_000;
18+
1619
/// Global file lock manager for session store operations.
1720
/// Prevents concurrent modifications to the same file within the process.
1821
static FILE_LOCKS: once_cell::sync::Lazy<std::sync::Mutex<HashMap<PathBuf, Arc<AsyncMutex<()>>>>> =
1922
once_cell::sync::Lazy::new(|| std::sync::Mutex::new(HashMap::new()));
2023

24+
/// Remove lock entries that are no longer in use.
25+
///
26+
/// An entry is considered stale when only the HashMap holds a reference
27+
/// to it (strong_count == 1), meaning no caller is currently using the lock.
28+
fn cleanup_stale_file_locks(locks: &mut HashMap<PathBuf, Arc<AsyncMutex<()>>>) {
29+
locks.retain(|_, arc| Arc::strong_count(arc) > 1);
30+
}
31+
2132
/// Acquire an async lock for a specific file path.
33+
///
34+
/// Automatically cleans up stale lock entries when the map grows too large.
2235
fn get_file_lock(path: &Path) -> Arc<AsyncMutex<()>> {
2336
let canonical = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
2437
let mut locks = FILE_LOCKS.lock().unwrap();
38+
39+
// Clean up stale entries if the map is getting large
40+
if locks.len() >= MAX_LOCK_ENTRIES {
41+
cleanup_stale_file_locks(&mut locks);
42+
}
43+
2544
locks
2645
.entry(canonical)
2746
.or_insert_with(|| Arc::new(AsyncMutex::new(())))

0 commit comments

Comments
 (0)