diff --git a/src/cli/repl.rs b/src/cli/repl.rs index 3ddf60a..6095028 100644 --- a/src/cli/repl.rs +++ b/src/cli/repl.rs @@ -502,6 +502,7 @@ fn plan_pipeline_with_state(exprs: Vec) -> crate::Result<(Vec>>()?; + optimize_read_then_count(&mut stages); let statement_incomplete = stages.last().is_some_and(PipelineStage::is_non_terminal); if let Some(implicit_stage) = stages .last() @@ -512,6 +513,20 @@ fn plan_pipeline_with_state(exprs: Vec) -> crate::Result<(Vec) { + if let [ + PipelineStage::Read { path }, + PipelineStage::Count { path: None }, + ] = stages.as_slice() + { + *stages = vec![PipelineStage::Count { + path: Some(path.clone()), + }]; + } +} + /// Extracts a single positive integer argument from a function call's args. fn extract_usize_arg(func_name: &str, args: &[Expr]) -> crate::Result { match args { @@ -1608,14 +1623,31 @@ mod tests { let mut exprs = Vec::new(); collect_pipe_stages(expr, &mut exprs); let pipeline = plan_pipeline(exprs).unwrap(); - assert_eq!(pipeline.len(), 2); + // read(path) |> count() is optimized to count(path) so Parquet/ORC use metadata + assert_eq!(pipeline.len(), 1); + assert_eq!( + pipeline[0], + PipelineStage::Count { + path: Some("a.parquet".to_string()) + } + ); + } + + #[test] + fn test_plan_pipeline_read_select_count_not_optimized() { + let expr = parse(r#"read("a.parquet") |> select(:x) |> count()"#); + let mut exprs = Vec::new(); + collect_pipe_stages(expr, &mut exprs); + let pipeline = plan_pipeline(exprs).unwrap(); + assert_eq!(pipeline.len(), 3); assert_eq!( pipeline[0], PipelineStage::Read { path: "a.parquet".to_string() } ); - assert_eq!(pipeline[1], PipelineStage::Count { path: None }); + assert!(matches!(&pipeline[1], PipelineStage::Select { .. })); + assert_eq!(pipeline[2], PipelineStage::Count { path: None }); } // ── eval_count ─────────────────────────────────────────────