From 3c980744bd21aeff5903eb0790c2246edabd8852 Mon Sep 17 00:00:00 2001 From: multiplex55 <6619098+multiplex55@users.noreply.github.com> Date: Sat, 14 Mar 2026 17:37:27 -0400 Subject: [PATCH 1/2] Refactor indexing to batched streaming with dedupe and limits --- src/gui/mod.rs | 30 +++++--- src/indexer.rs | 160 ++++++++++++++++++++++++++++++++++++----- src/main.rs | 5 +- src/settings.rs | 5 ++ src/settings_editor.rs | 1 + tests/indexer.rs | 35 ++++++++- 6 files changed, 205 insertions(+), 31 deletions(-) diff --git a/src/gui/mod.rs b/src/gui/mod.rs index 5ae8c4ad..2d59b8a8 100644 --- a/src/gui/mod.rs +++ b/src/gui/mod.rs @@ -483,6 +483,7 @@ pub struct LauncherApp { bookmark_aliases_lc: HashMap>, plugin_dirs: Option>, index_paths: Option>, + max_indexed_items: Option, enabled_plugins: Option>, enabled_capabilities: Option>>, visible_flag: Arc, @@ -785,20 +786,30 @@ impl LauncherApp { WatchEvent::Actions => { if let Ok(mut acts) = load_actions(&self.actions_path) { let custom_len = acts.len(); + self.custom_len = custom_len; if let Some(paths) = &self.index_paths { - match indexer::index_paths(paths) { - Ok(idx) => acts.extend(idx), - Err(e) => { - tracing::error!(error = %e, "failed to index paths"); - self.report_error_message( - "launcher", - format!("Failed to index paths: {e}"), - ); + let options = + indexer::IndexOptions::with_max_items(self.max_indexed_items); + for batch in indexer::index_paths_batched(paths, options) { + match batch { + Ok(idx) => { + acts.extend(idx); + self.actions = Arc::new(acts.clone()); + self.update_action_cache(); + self.search(); + } + Err(e) => { + tracing::error!(error = %e, "failed to index paths"); + self.report_error_message( + "launcher", + format!("Failed to index paths: {e}"), + ); + break; + } } } } self.actions = Arc::new(acts); - self.custom_len = custom_len; self.update_action_cache(); self.search(); crate::actions::bump_actions_version(); @@ -1538,6 +1549,7 @@ impl LauncherApp { bookmark_aliases_lc, plugin_dirs, index_paths, + max_indexed_items: settings.max_indexed_items, enabled_plugins, enabled_capabilities, visible_flag: visible_flag.clone(), diff --git a/src/indexer.rs b/src/indexer.rs index c66f5a69..4a3fb0a7 100644 --- a/src/indexer.rs +++ b/src/indexer.rs @@ -1,32 +1,154 @@ use crate::actions::Action; -use walkdir::WalkDir; +use std::collections::HashSet; +use std::fs; +use std::path::PathBuf; +use walkdir::{IntoIter as WalkDirIter, WalkDir}; -/// Index the provided filesystem paths and return a list of [`Action`]s. +const DEFAULT_BATCH_SIZE: usize = 512; +const DEFAULT_MAX_ITEMS: usize = 100_000; + +#[derive(Debug, Clone, Copy)] +pub struct IndexOptions { + pub batch_size: usize, + pub max_items: usize, +} + +impl Default for IndexOptions { + fn default() -> Self { + Self { + batch_size: DEFAULT_BATCH_SIZE, + max_items: DEFAULT_MAX_ITEMS, + } + } +} + +impl IndexOptions { + pub fn with_max_items(max_items: Option) -> Self { + Self { + max_items: max_items.unwrap_or(DEFAULT_MAX_ITEMS), + ..Self::default() + } + } +} + +/// Lazily indexes files from one or more roots and yields actions in batches. /// -/// Any errors encountered while traversing the directory tree are logged and -/// returned to the caller. -pub fn index_paths(paths: &[String]) -> anyhow::Result> { - let mut results = Vec::new(); - for p in paths { - for entry in WalkDir::new(p).into_iter() { - let entry = match entry { - Ok(e) => e, - Err(e) => { - tracing::error!(path = %p, error = %e, "failed to read directory entry"); - return Err(e.into()); - } +/// Duplicate files are skipped by canonical path. Traversal errors stop +/// iteration and are returned to the caller. +pub struct IndexBatchIter { + roots: Vec, + root_idx: usize, + current: Option, + seen: HashSet, + options: IndexOptions, + produced: usize, +} + +impl IndexBatchIter { + fn new(paths: &[String], options: IndexOptions) -> Self { + let options = IndexOptions { + batch_size: options.batch_size.max(1), + max_items: options.max_items.max(1), + }; + Self { + roots: paths.to_vec(), + root_idx: 0, + current: None, + seen: HashSet::new(), + options, + produced: 0, + } + } + + fn next_root(&mut self) -> Option { + let root = self.roots.get(self.root_idx).cloned(); + if root.is_some() { + self.root_idx += 1; + } + root + } +} + +impl Iterator for IndexBatchIter { + type Item = anyhow::Result>; + + fn next(&mut self) -> Option { + if self.produced >= self.options.max_items { + return None; + } + + let mut batch = Vec::with_capacity(self.options.batch_size); + while self.produced < self.options.max_items && batch.len() < self.options.batch_size { + if self.current.is_none() { + let root = self.next_root()?; + self.current = Some(WalkDir::new(root).into_iter()); + } + + let Some(iter) = self.current.as_mut() else { + continue; }; - if entry.file_type().is_file() { - if let Some(name) = entry.path().file_name().and_then(|n| n.to_str()) { - results.push(Action { + + match iter.next() { + Some(Ok(entry)) => { + if !entry.file_type().is_file() { + continue; + } + let canonical = match fs::canonicalize(entry.path()) { + Ok(path) => path, + Err(err) => { + tracing::error!( + path = %entry.path().display(), + error = %err, + "failed to canonicalize indexed path" + ); + return Some(Err(err.into())); + } + }; + if !self.seen.insert(canonical.clone()) { + continue; + } + let Some(name) = canonical.file_name().and_then(|n| n.to_str()) else { + continue; + }; + let display = canonical.display().to_string(); + batch.push(Action { label: name.to_string(), - desc: entry.path().display().to_string(), - action: entry.path().display().to_string(), + desc: display.clone(), + action: display, args: None, }); + self.produced += 1; + } + Some(Err(err)) => { + tracing::error!(error = %err, "failed to read directory entry"); + return Some(Err(err.into())); + } + None => { + self.current = None; } } } + + if batch.is_empty() { + None + } else { + Some(Ok(batch)) + } + } +} + +pub fn index_paths_batched(paths: &[String], options: IndexOptions) -> IndexBatchIter { + IndexBatchIter::new(paths, options) +} + +/// Index the provided filesystem paths and return a list of [`Action`]s. +/// +/// This compatibility helper exhausts the batched iterator into a single +/// vector; prefer [`index_paths_batched`] when possible. +pub fn index_paths(paths: &[String]) -> anyhow::Result> { + let mut results = Vec::new(); + for batch in index_paths_batched(paths, IndexOptions::default()) { + results.extend(batch?); } Ok(results) } diff --git a/src/main.rs b/src/main.rs index 82421c6d..8ffe16ff 100644 --- a/src/main.rs +++ b/src/main.rs @@ -199,7 +199,10 @@ fn main() -> anyhow::Result<()> { } if let Some(paths) = &settings.index_paths { - actions_vec.extend(indexer::index_paths(paths)?); + let options = indexer::IndexOptions::with_max_items(settings.max_indexed_items); + for batch in indexer::index_paths_batched(paths, options) { + actions_vec.extend(batch?); + } } let actions = Arc::new(actions_vec); diff --git a/src/settings.rs b/src/settings.rs index 0de414ad..b494f3f0 100644 --- a/src/settings.rs +++ b/src/settings.rs @@ -328,6 +328,10 @@ pub struct Settings { /// Hotkey to show the quick help overlay. If `None`, the overlay is disabled. pub help_hotkey: Option, pub index_paths: Option>, + /// Maximum number of filesystem entries to index from `index_paths`. + /// + /// When missing, a conservative default is applied to protect memory. + pub max_indexed_items: Option, pub plugin_dirs: Option>, /// Set of plugin names which should be enabled. If `None`, all loaded /// plugins are enabled. @@ -606,6 +610,7 @@ impl Default for Settings { quit_hotkey: None, help_hotkey: Some("F1".into()), index_paths: None, + max_indexed_items: None, plugin_dirs: None, enabled_plugins: None, enabled_capabilities: None, diff --git a/src/settings_editor.rs b/src/settings_editor.rs index 31bba7e6..eb5fc396 100644 --- a/src/settings_editor.rs +++ b/src/settings_editor.rs @@ -335,6 +335,7 @@ impl SettingsEditor { Some(self.help_hotkey.clone()) }, index_paths: current.index_paths.clone(), + max_indexed_items: current.max_indexed_items, plugin_dirs: current.plugin_dirs.clone(), enabled_plugins: current.enabled_plugins.clone(), enabled_capabilities: current.enabled_capabilities.clone(), diff --git a/tests/indexer.rs b/tests/indexer.rs index 182b5bbf..053d1879 100644 --- a/tests/indexer.rs +++ b/tests/indexer.rs @@ -21,8 +21,9 @@ fn indexer_indexes_files_recursively() { let expected = [file1, file2, file3]; for path in expected.iter() { - let label = path.file_name().unwrap().to_str().unwrap(); - let display = path.display().to_string(); + let canonical = fs::canonicalize(path).expect("canonical path"); + let label = canonical.file_name().unwrap().to_str().unwrap(); + let display = canonical.display().to_string(); assert!(actions.iter().any(|a| a.label == label && a.action == display && a.desc == display @@ -30,6 +31,36 @@ fn indexer_indexes_files_recursively() { } } +#[test] +fn indexer_batches_dedupes_and_honors_max_items() { + let dir = tempdir().expect("failed to create temp dir"); + let one = dir.path().join("one.txt"); + let two = dir.path().join("two.txt"); + let three = dir.path().join("three.txt"); + fs::write(&one, b"1").expect("write one"); + fs::write(&two, b"2").expect("write two"); + fs::write(&three, b"3").expect("write three"); + + let same_root = dir.path().to_string_lossy().to_string(); + let paths = vec![same_root.clone(), same_root]; + let mut iter = multi_launcher::indexer::index_paths_batched( + &paths, + multi_launcher::indexer::IndexOptions { + batch_size: 2, + max_items: 2, + }, + ); + + let first = iter.next().expect("first batch").expect("first ok"); + assert_eq!(first.len(), 2); + assert!(iter.next().is_none(), "max_items should stop iteration"); + + let mut seen = std::collections::HashSet::new(); + for action in first { + assert!(seen.insert(action.action), "deduped paths only"); + } +} + // Ensure indexing a missing path returns an error #[test] fn indexer_errors_on_missing_path() { From b9863b357aed668a764b204a7f70125258cfa387 Mon Sep 17 00:00:00 2001 From: multiplex55 <6619098+multiplex55@users.noreply.github.com> Date: Sat, 14 Mar 2026 21:39:03 -0400 Subject: [PATCH 2/2] Fix batched indexer dropping final partial batch --- src/indexer.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/indexer.rs b/src/indexer.rs index 4a3fb0a7..76d6ff09 100644 --- a/src/indexer.rs +++ b/src/indexer.rs @@ -80,8 +80,11 @@ impl Iterator for IndexBatchIter { let mut batch = Vec::with_capacity(self.options.batch_size); while self.produced < self.options.max_items && batch.len() < self.options.batch_size { if self.current.is_none() { - let root = self.next_root()?; - self.current = Some(WalkDir::new(root).into_iter()); + if let Some(root) = self.next_root() { + self.current = Some(WalkDir::new(root).into_iter()); + } else { + break; + } } let Some(iter) = self.current.as_mut() else {