Skip to content

Commit 3d56184

Browse files
authored
Merge pull request #932 from multiplex55/codex/refactor-indexer-for-streaming-api
Streamed batched indexer with dedupe and max-index limit
2 parents 4f16b48 + b9863b3 commit 3d56184

6 files changed

Lines changed: 207 additions & 30 deletions

File tree

src/gui/mod.rs

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,7 @@ pub struct LauncherApp {
483483
bookmark_aliases_lc: HashMap<String, Option<String>>,
484484
plugin_dirs: Option<Vec<String>>,
485485
index_paths: Option<Vec<String>>,
486+
max_indexed_items: Option<usize>,
486487
enabled_plugins: Option<HashSet<String>>,
487488
enabled_capabilities: Option<std::collections::HashMap<String, Vec<String>>>,
488489
visible_flag: Arc<AtomicBool>,
@@ -785,20 +786,30 @@ impl LauncherApp {
785786
WatchEvent::Actions => {
786787
if let Ok(mut acts) = load_actions(&self.actions_path) {
787788
let custom_len = acts.len();
789+
self.custom_len = custom_len;
788790
if let Some(paths) = &self.index_paths {
789-
match indexer::index_paths(paths) {
790-
Ok(idx) => acts.extend(idx),
791-
Err(e) => {
792-
tracing::error!(error = %e, "failed to index paths");
793-
self.report_error_message(
794-
"launcher",
795-
format!("Failed to index paths: {e}"),
796-
);
791+
let options =
792+
indexer::IndexOptions::with_max_items(self.max_indexed_items);
793+
for batch in indexer::index_paths_batched(paths, options) {
794+
match batch {
795+
Ok(idx) => {
796+
acts.extend(idx);
797+
self.actions = Arc::new(acts.clone());
798+
self.update_action_cache();
799+
self.search();
800+
}
801+
Err(e) => {
802+
tracing::error!(error = %e, "failed to index paths");
803+
self.report_error_message(
804+
"launcher",
805+
format!("Failed to index paths: {e}"),
806+
);
807+
break;
808+
}
797809
}
798810
}
799811
}
800812
self.actions = Arc::new(acts);
801-
self.custom_len = custom_len;
802813
self.update_action_cache();
803814
self.search();
804815
crate::actions::bump_actions_version();
@@ -1538,6 +1549,7 @@ impl LauncherApp {
15381549
bookmark_aliases_lc,
15391550
plugin_dirs,
15401551
index_paths,
1552+
max_indexed_items: settings.max_indexed_items,
15411553
enabled_plugins,
15421554
enabled_capabilities,
15431555
visible_flag: visible_flag.clone(),

src/indexer.rs

Lines changed: 143 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,157 @@
11
use crate::actions::Action;
2-
use walkdir::WalkDir;
2+
use std::collections::HashSet;
3+
use std::fs;
4+
use std::path::PathBuf;
5+
use walkdir::{IntoIter as WalkDirIter, WalkDir};
36

4-
/// Index the provided filesystem paths and return a list of [`Action`]s.
7+
const DEFAULT_BATCH_SIZE: usize = 512;
8+
const DEFAULT_MAX_ITEMS: usize = 100_000;
9+
10+
#[derive(Debug, Clone, Copy)]
11+
pub struct IndexOptions {
12+
pub batch_size: usize,
13+
pub max_items: usize,
14+
}
15+
16+
impl Default for IndexOptions {
17+
fn default() -> Self {
18+
Self {
19+
batch_size: DEFAULT_BATCH_SIZE,
20+
max_items: DEFAULT_MAX_ITEMS,
21+
}
22+
}
23+
}
24+
25+
impl IndexOptions {
26+
pub fn with_max_items(max_items: Option<usize>) -> Self {
27+
Self {
28+
max_items: max_items.unwrap_or(DEFAULT_MAX_ITEMS),
29+
..Self::default()
30+
}
31+
}
32+
}
33+
34+
/// Lazily indexes files from one or more roots and yields actions in batches.
535
///
6-
/// Any errors encountered while traversing the directory tree are logged and
7-
/// returned to the caller.
8-
pub fn index_paths(paths: &[String]) -> anyhow::Result<Vec<Action>> {
9-
let mut results = Vec::new();
10-
for p in paths {
11-
for entry in WalkDir::new(p).into_iter() {
12-
let entry = match entry {
13-
Ok(e) => e,
14-
Err(e) => {
15-
tracing::error!(path = %p, error = %e, "failed to read directory entry");
16-
return Err(e.into());
36+
/// Duplicate files are skipped by canonical path. Traversal errors stop
37+
/// iteration and are returned to the caller.
38+
pub struct IndexBatchIter {
39+
roots: Vec<String>,
40+
root_idx: usize,
41+
current: Option<WalkDirIter>,
42+
seen: HashSet<PathBuf>,
43+
options: IndexOptions,
44+
produced: usize,
45+
}
46+
47+
impl IndexBatchIter {
48+
fn new(paths: &[String], options: IndexOptions) -> Self {
49+
let options = IndexOptions {
50+
batch_size: options.batch_size.max(1),
51+
max_items: options.max_items.max(1),
52+
};
53+
Self {
54+
roots: paths.to_vec(),
55+
root_idx: 0,
56+
current: None,
57+
seen: HashSet::new(),
58+
options,
59+
produced: 0,
60+
}
61+
}
62+
63+
fn next_root(&mut self) -> Option<String> {
64+
let root = self.roots.get(self.root_idx).cloned();
65+
if root.is_some() {
66+
self.root_idx += 1;
67+
}
68+
root
69+
}
70+
}
71+
72+
impl Iterator for IndexBatchIter {
73+
type Item = anyhow::Result<Vec<Action>>;
74+
75+
fn next(&mut self) -> Option<Self::Item> {
76+
if self.produced >= self.options.max_items {
77+
return None;
78+
}
79+
80+
let mut batch = Vec::with_capacity(self.options.batch_size);
81+
while self.produced < self.options.max_items && batch.len() < self.options.batch_size {
82+
if self.current.is_none() {
83+
if let Some(root) = self.next_root() {
84+
self.current = Some(WalkDir::new(root).into_iter());
85+
} else {
86+
break;
1787
}
88+
}
89+
90+
let Some(iter) = self.current.as_mut() else {
91+
continue;
1892
};
19-
if entry.file_type().is_file() {
20-
if let Some(name) = entry.path().file_name().and_then(|n| n.to_str()) {
21-
results.push(Action {
93+
94+
match iter.next() {
95+
Some(Ok(entry)) => {
96+
if !entry.file_type().is_file() {
97+
continue;
98+
}
99+
let canonical = match fs::canonicalize(entry.path()) {
100+
Ok(path) => path,
101+
Err(err) => {
102+
tracing::error!(
103+
path = %entry.path().display(),
104+
error = %err,
105+
"failed to canonicalize indexed path"
106+
);
107+
return Some(Err(err.into()));
108+
}
109+
};
110+
if !self.seen.insert(canonical.clone()) {
111+
continue;
112+
}
113+
let Some(name) = canonical.file_name().and_then(|n| n.to_str()) else {
114+
continue;
115+
};
116+
let display = canonical.display().to_string();
117+
batch.push(Action {
22118
label: name.to_string(),
23-
desc: entry.path().display().to_string(),
24-
action: entry.path().display().to_string(),
119+
desc: display.clone(),
120+
action: display,
25121
args: None,
26122
});
123+
self.produced += 1;
124+
}
125+
Some(Err(err)) => {
126+
tracing::error!(error = %err, "failed to read directory entry");
127+
return Some(Err(err.into()));
128+
}
129+
None => {
130+
self.current = None;
27131
}
28132
}
29133
}
134+
135+
if batch.is_empty() {
136+
None
137+
} else {
138+
Some(Ok(batch))
139+
}
140+
}
141+
}
142+
143+
pub fn index_paths_batched(paths: &[String], options: IndexOptions) -> IndexBatchIter {
144+
IndexBatchIter::new(paths, options)
145+
}
146+
147+
/// Index the provided filesystem paths and return a list of [`Action`]s.
148+
///
149+
/// This compatibility helper exhausts the batched iterator into a single
150+
/// vector; prefer [`index_paths_batched`] when possible.
151+
pub fn index_paths(paths: &[String]) -> anyhow::Result<Vec<Action>> {
152+
let mut results = Vec::new();
153+
for batch in index_paths_batched(paths, IndexOptions::default()) {
154+
results.extend(batch?);
30155
}
31156
Ok(results)
32157
}

src/main.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,10 @@ fn main() -> anyhow::Result<()> {
199199
}
200200

201201
if let Some(paths) = &settings.index_paths {
202-
actions_vec.extend(indexer::index_paths(paths)?);
202+
let options = indexer::IndexOptions::with_max_items(settings.max_indexed_items);
203+
for batch in indexer::index_paths_batched(paths, options) {
204+
actions_vec.extend(batch?);
205+
}
203206
}
204207
let actions = Arc::new(actions_vec);
205208

src/settings.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,10 @@ pub struct Settings {
328328
/// Hotkey to show the quick help overlay. If `None`, the overlay is disabled.
329329
pub help_hotkey: Option<String>,
330330
pub index_paths: Option<Vec<String>>,
331+
/// Maximum number of filesystem entries to index from `index_paths`.
332+
///
333+
/// When missing, a conservative default is applied to protect memory.
334+
pub max_indexed_items: Option<usize>,
331335
pub plugin_dirs: Option<Vec<String>>,
332336
/// Set of plugin names which should be enabled. If `None`, all loaded
333337
/// plugins are enabled.
@@ -606,6 +610,7 @@ impl Default for Settings {
606610
quit_hotkey: None,
607611
help_hotkey: Some("F1".into()),
608612
index_paths: None,
613+
max_indexed_items: None,
609614
plugin_dirs: None,
610615
enabled_plugins: None,
611616
enabled_capabilities: None,

src/settings_editor.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,7 @@ impl SettingsEditor {
335335
Some(self.help_hotkey.clone())
336336
},
337337
index_paths: current.index_paths.clone(),
338+
max_indexed_items: current.max_indexed_items,
338339
plugin_dirs: current.plugin_dirs.clone(),
339340
enabled_plugins: current.enabled_plugins.clone(),
340341
enabled_capabilities: current.enabled_capabilities.clone(),

tests/indexer.rs

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,46 @@ fn indexer_indexes_files_recursively() {
2121

2222
let expected = [file1, file2, file3];
2323
for path in expected.iter() {
24-
let label = path.file_name().unwrap().to_str().unwrap();
25-
let display = path.display().to_string();
24+
let canonical = fs::canonicalize(path).expect("canonical path");
25+
let label = canonical.file_name().unwrap().to_str().unwrap();
26+
let display = canonical.display().to_string();
2627
assert!(actions.iter().any(|a| a.label == label
2728
&& a.action == display
2829
&& a.desc == display
2930
&& a.args.is_none()));
3031
}
3132
}
3233

34+
#[test]
35+
fn indexer_batches_dedupes_and_honors_max_items() {
36+
let dir = tempdir().expect("failed to create temp dir");
37+
let one = dir.path().join("one.txt");
38+
let two = dir.path().join("two.txt");
39+
let three = dir.path().join("three.txt");
40+
fs::write(&one, b"1").expect("write one");
41+
fs::write(&two, b"2").expect("write two");
42+
fs::write(&three, b"3").expect("write three");
43+
44+
let same_root = dir.path().to_string_lossy().to_string();
45+
let paths = vec![same_root.clone(), same_root];
46+
let mut iter = multi_launcher::indexer::index_paths_batched(
47+
&paths,
48+
multi_launcher::indexer::IndexOptions {
49+
batch_size: 2,
50+
max_items: 2,
51+
},
52+
);
53+
54+
let first = iter.next().expect("first batch").expect("first ok");
55+
assert_eq!(first.len(), 2);
56+
assert!(iter.next().is_none(), "max_items should stop iteration");
57+
58+
let mut seen = std::collections::HashSet::new();
59+
for action in first {
60+
assert!(seen.insert(action.action), "deduped paths only");
61+
}
62+
}
63+
3364
// Ensure indexing a missing path returns an error
3465
#[test]
3566
fn indexer_errors_on_missing_path() {

0 commit comments

Comments
 (0)