From 2e1a053a23db65ccb28519bb611edec8f303776e Mon Sep 17 00:00:00 2001 From: Alistair Israel Date: Sat, 14 Mar 2026 23:33:47 -0400 Subject: [PATCH 1/2] Optimize REPL read |> count() to use metadata for Parquet/ORC Made-with: Cursor --- src/cli/repl.rs | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/src/cli/repl.rs b/src/cli/repl.rs index 3ddf60a..70b43b9 100644 --- a/src/cli/repl.rs +++ b/src/cli/repl.rs @@ -502,6 +502,7 @@ fn plan_pipeline_with_state(exprs: Vec) -> crate::Result<(Vec>>()?; + optimize_read_then_count(&mut stages); let statement_incomplete = stages.last().is_some_and(PipelineStage::is_non_terminal); if let Some(implicit_stage) = stages .last() @@ -512,6 +513,18 @@ fn plan_pipeline_with_state(exprs: Vec) -> crate::Result<(Vec) { + if let [PipelineStage::Read { path }, PipelineStage::Count { path: None }] = + stages.as_slice() + { + *stages = vec![PipelineStage::Count { + path: Some(path.clone()), + }]; + } +} + /// Extracts a single positive integer argument from a function call's args. fn extract_usize_arg(func_name: &str, args: &[Expr]) -> crate::Result { match args { @@ -1608,14 +1621,31 @@ mod tests { let mut exprs = Vec::new(); collect_pipe_stages(expr, &mut exprs); let pipeline = plan_pipeline(exprs).unwrap(); - assert_eq!(pipeline.len(), 2); + // read(path) |> count() is optimized to count(path) so Parquet/ORC use metadata + assert_eq!(pipeline.len(), 1); + assert_eq!( + pipeline[0], + PipelineStage::Count { + path: Some("a.parquet".to_string()) + } + ); + } + + #[test] + fn test_plan_pipeline_read_select_count_not_optimized() { + let expr = parse(r#"read("a.parquet") |> select(:x) |> count()"#); + let mut exprs = Vec::new(); + collect_pipe_stages(expr, &mut exprs); + let pipeline = plan_pipeline(exprs).unwrap(); + assert_eq!(pipeline.len(), 3); assert_eq!( pipeline[0], PipelineStage::Read { path: "a.parquet".to_string() } ); - assert_eq!(pipeline[1], PipelineStage::Count { path: None }); + assert!(matches!(&pipeline[1], PipelineStage::Select { .. })); + assert_eq!(pipeline[2], PipelineStage::Count { path: None }); } // ── eval_count ───────────────────────────────────────────── From 0d690d215e7e68b3ab38cae2ab31c643fa08ca1d Mon Sep 17 00:00:00 2001 From: Alistair Israel Date: Sat, 14 Mar 2026 23:44:47 -0400 Subject: [PATCH 2/2] cargo fmt --- src/cli/repl.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/cli/repl.rs b/src/cli/repl.rs index 70b43b9..6095028 100644 --- a/src/cli/repl.rs +++ b/src/cli/repl.rs @@ -516,8 +516,10 @@ fn plan_pipeline_with_state(exprs: Vec) -> crate::Result<(Vec) { - if let [PipelineStage::Read { path }, PipelineStage::Count { path: None }] = - stages.as_slice() + if let [ + PipelineStage::Read { path }, + PipelineStage::Count { path: None }, + ] = stages.as_slice() { *stages = vec![PipelineStage::Count { path: Some(path.clone()),