From d30bc854f33999122a48ce4e0dfd724dbff6ad68 Mon Sep 17 00:00:00 2001
From: Alistair Israel <aisrael@gmail.com>
Date: Sun, 15 Mar 2026 10:37:38 -0400
Subject: [PATCH 01/18] Rewording

---
 features/cli/convert.feature     | 2 +-
 features/repl/conversion.feature | 6 +++---
 features/repl/head.feature       | 2 +-
 features/repl/select.feature     | 2 +-
 features/repl/tail.feature       | 2 +-
 tests/repl.rs                    | 2 +-
 6 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/features/cli/convert.feature b/features/cli/convert.feature
index 9a8b956..d47f7a7 100644
--- a/features/cli/convert.feature
+++ b/features/cli/convert.feature
@@ -70,7 +70,7 @@ Feature: Convert
     Then the command should succeed
     And the output should contain "Converted fixtures/no_header.csv to $TEMPDIR/no_header.parquet"
     And the file "$TEMPDIR/no_header.parquet" should exist
-    And that file should be valid Parquet
+    And that file should be a valid Parquet file
     And that file should have 3 records
 
   Scenario: Avro to CSV
diff --git a/features/repl/conversion.feature b/features/repl/conversion.feature
index 20e2461..fcb4643 100644
--- a/features/repl/conversion.feature
+++ b/features/repl/conversion.feature
@@ -79,7 +79,7 @@ Feature: Conversion
       read("fixtures/userdata5.avro") |> write("$TEMPDIR/userdata5.parquet")
       ```
     Then the file "$TEMPDIR/userdata5.parquet" should exist
-    And that file should be valid Parquet
+    And that file should be a valid Parquet file
 
   Scenario: Avro to ORC
     When the REPL is ran and the user types:
@@ -103,7 +103,7 @@ Feature: Conversion
       read("fixtures/table.csv") |> write("$TEMPDIR/table_from_csv.parquet")
       ```
     Then the file "$TEMPDIR/table_from_csv.parquet" should exist
-    And that file should be valid Parquet
+    And that file should be a valid Parquet file
 
   Scenario: CSV to JSON
     When the REPL is ran and the user types:
@@ -155,7 +155,7 @@ Feature: Conversion
       read("fixtures/userdata.orc") |> write("$TEMPDIR/userdata_orc.parquet")
       ```
     Then the file "$TEMPDIR/userdata_orc.parquet" should exist
-    And that file should be valid Parquet
+    And that file should be a valid Parquet file
 
   Scenario: ORC to Avro
     When the REPL is ran and the user types:
diff --git a/features/repl/head.feature b/features/repl/head.feature
index da889e7..c213502 100644
--- a/features/repl/head.feature
+++ b/features/repl/head.feature
@@ -100,7 +100,7 @@ Feature: Head
       read("fixtures/userdata5.avro") |> head(10) |> write("$TEMPDIR/head.parquet")
       ```
     Then the file "$TEMPDIR/head.parquet" should exist
-    And that file should be valid Parquet
+    And that file should be a valid Parquet file
     And that file should have 10 records
 
   Scenario: Head to Avro
diff --git a/features/repl/select.feature b/features/repl/select.feature
index f7657ab..2e115b0 100644
--- a/features/repl/select.feature
+++ b/features/repl/select.feature
@@ -121,4 +121,4 @@ Feature: Select
       read("fixtures/userdata5.avro") |> select(:id, :first_name, :email) |> write("$TEMPDIR/select.parquet")
       ```
     Then the file "$TEMPDIR/select.parquet" should exist
-    And that file should be valid Parquet
+    And that file should be a valid Parquet file
diff --git a/features/repl/tail.feature b/features/repl/tail.feature
index e2b641b..ef3128a 100644
--- a/features/repl/tail.feature
+++ b/features/repl/tail.feature
@@ -99,7 +99,7 @@ Feature: Tail
       read("fixtures/userdata5.avro") |> tail(10) |> write("$TEMPDIR/tail.parquet")
       ```
     Then the file "$TEMPDIR/tail.parquet" should exist
-    And that file should be valid Parquet
+    And that file should be a valid Parquet file
 
   Scenario: Tail to Avro
     When the REPL is ran and the user types:
diff --git a/tests/repl.rs b/tests/repl.rs
index 76383a7..ab98175 100644
--- a/tests/repl.rs
+++ b/tests/repl.rs
@@ -221,7 +221,7 @@ fn that_file_should_be_valid_avro(world: &mut ReplWorld) {
     );
 }
 
-#[then(regex = r#"^that file should be valid Parquet$"#)]
+#[then(regex = r#"^that file should be a valid Parquet file$"#)]
 fn that_file_should_be_valid_parquet(world: &mut ReplWorld) {
     let path = world
         .last_file

From 7ab5def1cff30963bb0fdf6ed697dcb85e64f166 Mon Sep 17 00:00:00 2001
From: Alistair Israel <aisrael@gmail.com>
Date: Sun, 15 Mar 2026 11:02:19 -0400
Subject: [PATCH 02/18] actions-rust-lang/setup-rust-toolchain@v1

---
 .github/workflows/ci.yml | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 59e2e62..2c76422 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,4 +1,4 @@
-name: CI
+cname: CI
 
 on:
   push:
@@ -11,6 +11,11 @@ on:
       - "README.md"
       - ".github/workflows/ci.yml"
 
+env:
+  # We use environment variables to specify the Rust version and other settings once
+  RUST_TOOLCHAIN: 1.91.1
+  CARGO_TERM_COLOR: always
+
 jobs:
   check:
     name: Lint & Test
@@ -19,17 +24,10 @@ jobs:
       - name: Checkout
         uses: actions/checkout@v4
 
-      - name: Install Rust toolchain
-        uses: dtolnay/rust-toolchain@stable
+      - uses: actions-rust-lang/setup-rust-toolchain@v1
         with:
-          toolchain: nightly
-          components: clippy
-
-      - name: Install rustfmt
-        run: rustup component add rustfmt
-
-      - name: Cache cargo registry and target
-        uses: Swatinem/rust-cache@v2
+          toolchain: ${{ env.RUST_TOOLCHAIN }}
+          components: rustfmt, clippy
 
       - name: Check formatting
         run: cargo +nightly fmt -- --check

From 69352cb63b2ed28e2302ad76f74ad7e4bcce5f72 Mon Sep 17 00:00:00 2001
From: Alistair Israel <aisrael@gmail.com>
Date: Sun, 15 Mar 2026 11:02:56 -0400
Subject: [PATCH 03/18] Use 1.94.0

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 2c76422..ecc2ce1 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -13,7 +13,7 @@ on:
 
 env:
   # We use environment variables to specify the Rust version and other settings once
-  RUST_TOOLCHAIN: 1.91.1
+  RUST_TOOLCHAIN: 1.94.0
   CARGO_TERM_COLOR: always
 
 jobs:

From 547cfe66f575dfa9be2598ea027f89217df4c40f Mon Sep 17 00:00:00 2001
From: Alistair Israel <aisrael@gmail.com>
Date: Sun, 15 Mar 2026 11:04:16 -0400
Subject: [PATCH 04/18] typo

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ecc2ce1..8ed289e 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,4 +1,4 @@
-cname: CI
+name: CI
 
 on:
   push:

From fe05ee19c6afbfc91b0b5bc45a7316274f26c6b2 Mon Sep 17 00:00:00 2001
From: Alistair Israel <aisrael@gmail.com>
Date: Sun, 15 Mar 2026 11:12:36 -0400
Subject: [PATCH 05/18] Don't use nightly

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 8ed289e..de798f6 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -30,7 +30,7 @@ jobs:
           components: rustfmt, clippy
 
       - name: Check formatting
-        run: cargo +nightly fmt -- --check
+        run: cargo fmt -- --check
 
       - name: Run clippy
         run: cargo clippy --all-targets -- -D warnings

From 5a1226f6cb3fed2b2337a81ebc52feca9ba287e1 Mon Sep 17 00:00:00 2001
From: Alistair Israel <aisrael@gmail.com>
Date: Sun, 15 Mar 2026 12:00:22 -0400
Subject: [PATCH 06/18] Implement DataFrame API support in convert command for
 streamlined file processing

- Added support for reading and writing JSON files using DataFusion's DataFrame API.
- Introduced a new function `is_datafusion_native` to determine if the input format is compatible with DataFusion.
- Refactored the `convert` function to utilize either the DataFrame API or a fallback method based on the input and output file types.
- Enhanced error handling for unsupported file types in the DataFrameReader.
- Updated tests to reflect the new JSON support and error messages.
---
 src/bin/datu/commands/convert.rs | 177 +++++++++++++++++++++++++------
 src/pipeline/dataframe.rs        |  16 ++-
 2 files changed, 157 insertions(+), 36 deletions(-)

diff --git a/src/bin/datu/commands/convert.rs b/src/bin/datu/commands/convert.rs
index cc3390f..c784915 100644
--- a/src/bin/datu/commands/convert.rs
+++ b/src/bin/datu/commands/convert.rs
@@ -5,6 +5,12 @@ use arrow::array::RecordBatchReader;
 use arrow::datatypes::Schema;
 use arrow::record_batch::RecordBatch;
 use clap::Args;
+use datafusion::dataframe::DataFrameWriteOptions;
+use datafusion::execution::context::SessionContext;
+use datafusion::prelude::AvroReadOptions;
+use datafusion::prelude::CsvReadOptions;
+use datafusion::prelude::NdJsonReadOptions;
+use datafusion::prelude::ParquetReadOptions;
 use datu::Error;
 use datu::FileType;
 use datu::pipeline::Source;
@@ -13,6 +19,7 @@ use datu::pipeline::VecRecordBatchReader;
 use datu::pipeline::dataframe::DataFrameReader;
 use datu::pipeline::dataframe::write_record_batches_from_reader;
 use datu::resolve_input_file_type;
+use datu::utils::parse_select_columns;
 use indicatif::ProgressBar;
 use indicatif::ProgressStyle;
 
@@ -66,6 +73,14 @@ pub struct ConvertArgs {
     pub input_headers: Option<bool>,
 }
 
+/// Returns true if the format is supported by DataFusion's DataFrame read/write API.
+fn is_datafusion_native(file_type: FileType) -> bool {
+    matches!(
+        file_type,
+        FileType::Parquet | FileType::Avro | FileType::Csv | FileType::Json
+    )
+}
+
 /// Returns the total number of rows from file metadata, if available.
 fn get_total_rows(path: &str, file_type: FileType) -> Option<u64> {
     datu::get_total_rows_result(path, file_type)
@@ -103,7 +118,14 @@ pub async fn convert(args: ConvertArgs) -> anyhow::Result<()> {
     let input_file_type = resolve_input_file_type(args.input, &args.input_path)?;
     let output_file_type = resolve_input_file_type(args.output, &args.output_path)?;
 
-    let total_rows = get_total_rows(&args.input_path, input_file_type);
+    let use_dataframe_api = is_datafusion_native(input_file_type)
+        && (output_file_type == FileType::Parquet || output_file_type == FileType::Csv);
+
+    let total_rows = if !use_dataframe_api || input_file_type == FileType::Parquet {
+        get_total_rows(&args.input_path, input_file_type)
+    } else {
+        None
+    };
 
     let progress = match total_rows {
         Some(total) => {
@@ -133,36 +155,11 @@ pub async fn convert(args: ConvertArgs) -> anyhow::Result<()> {
         }
     };
 
-    let reader_step = DataFrameReader::new(
-        &args.input_path,
-        input_file_type,
-        args.select,
-        args.limit,
-        args.input_headers,
-    );
-
-    let result: Result<(), datu::Error> = async {
-        let mut source = reader_step.execute(()).await?;
-        let df = source.get()?;
-
-        let handle = tokio::runtime::Handle::current();
-        let batches = tokio::task::block_in_place(|| handle.block_on(df.collect()))
-            .map_err(|e| Error::GenericError(e.to_string()))?;
-
-        let mut reader = ProgressRecordBatchReader {
-            inner: VecRecordBatchReader::new(batches),
-            progress: progress.clone(),
-        };
-
-        write_record_batches_from_reader(
-            &mut reader,
-            &args.output_path,
-            output_file_type,
-            args.sparse,
-            args.json_pretty,
-        )
-    }
-    .await;
+    let result: Result<(), datu::Error> = if use_dataframe_api {
+        convert_via_dataframe_api(&args, input_file_type, output_file_type, &progress).await
+    } else {
+        convert_via_fallback(&args, input_file_type, output_file_type, &progress).await
+    };
 
     match result {
         Ok(()) => {
@@ -181,6 +178,124 @@ pub async fn convert(args: ConvertArgs) -> anyhow::Result<()> {
     }
 }
 
+/// Streamlined path: DataFusion DataFrame read → select/limit → write. No collect or RecordBatchReader.
+async fn convert_via_dataframe_api(
+    args: &ConvertArgs,
+    input_file_type: FileType,
+    output_file_type: FileType,
+    _progress: &ProgressBar,
+) -> Result<(), datu::Error> {
+    let ctx = SessionContext::new();
+
+    let mut df = match input_file_type {
+        FileType::Parquet => ctx
+            .read_parquet(&args.input_path, ParquetReadOptions::default())
+            .await
+            .map_err(|e| Error::GenericError(e.to_string()))?,
+        FileType::Avro => ctx
+            .read_avro(&args.input_path, AvroReadOptions::default())
+            .await
+            .map_err(|e| Error::GenericError(e.to_string()))?,
+        FileType::Csv => {
+            let has_header = args.input_headers.unwrap_or(true);
+            ctx.read_csv(
+                &args.input_path,
+                CsvReadOptions::new().has_header(has_header),
+            )
+            .await
+            .map_err(|e| Error::GenericError(e.to_string()))?
+        }
+        FileType::Json => ctx
+            .read_json(&args.input_path, NdJsonReadOptions::default())
+            .await
+            .map_err(|e| Error::GenericError(e.to_string()))?,
+        _ => {
+            return Err(Error::GenericError(
+                "Streamlined path only supports Parquet, Avro, CSV, JSON input".to_string(),
+            ));
+        }
+    };
+
+    if let Some(columns) = &args.select {
+        let parsed = parse_select_columns(columns);
+        if !parsed.is_empty() {
+            let col_refs: Vec<&str> = parsed.iter().map(String::as_str).collect();
+            df = df
+                .select_columns(&col_refs)
+                .map_err(|e| Error::GenericError(e.to_string()))?;
+        }
+    }
+
+    if let Some(n) = args.limit {
+        df = df
+            .limit(0, Some(n))
+            .map_err(|e| Error::GenericError(e.to_string()))?;
+    }
+
+    let write_opts = DataFrameWriteOptions::new();
+
+    match output_file_type {
+        FileType::Parquet => {
+            df.write_parquet(&args.output_path, write_opts, None)
+                .await
+                .map_err(|e| Error::GenericError(e.to_string()))?;
+        }
+        FileType::Csv => {
+            df.write_csv(&args.output_path, write_opts, None)
+                .await
+                .map_err(|e| Error::GenericError(e.to_string()))?;
+        }
+        FileType::Json => {
+            df.write_json(&args.output_path, write_opts, None)
+                .await
+                .map_err(|e| Error::GenericError(e.to_string()))?;
+        }
+        _ => {
+            return Err(Error::GenericError(
+                "Streamlined path only supports Parquet, Avro, CSV, JSON output".to_string(),
+            ));
+        }
+    }
+
+    Ok(())
+}
+
+/// Fallback path: DataFrameReader → collect → write_record_batches_from_reader.
+async fn convert_via_fallback(
+    args: &ConvertArgs,
+    input_file_type: FileType,
+    output_file_type: FileType,
+    progress: &ProgressBar,
+) -> Result<(), datu::Error> {
+    let reader_step = DataFrameReader::new(
+        &args.input_path,
+        input_file_type,
+        args.select.clone(),
+        args.limit,
+        args.input_headers,
+    );
+
+    let mut source = reader_step.execute(()).await?;
+    let df = source.get()?;
+
+    let handle = tokio::runtime::Handle::current();
+    let batches = tokio::task::block_in_place(|| handle.block_on(df.collect()))
+        .map_err(|e| Error::GenericError(e.to_string()))?;
+
+    let mut reader = ProgressRecordBatchReader {
+        inner: VecRecordBatchReader::new(batches),
+        progress: progress.clone(),
+    };
+
+    write_record_batches_from_reader(
+        &mut reader,
+        &args.output_path,
+        output_file_type,
+        args.sparse,
+        args.json_pretty,
+    )
+}
+
 #[cfg(test)]
 mod tests {
     use arrow::array::RecordBatchReader;
diff --git a/src/pipeline/dataframe.rs b/src/pipeline/dataframe.rs
index d054778..20699e2 100644
--- a/src/pipeline/dataframe.rs
+++ b/src/pipeline/dataframe.rs
@@ -3,6 +3,7 @@ use async_trait::async_trait;
 use datafusion::execution::context::SessionContext;
 use datafusion::prelude::AvroReadOptions;
 use datafusion::prelude::CsvReadOptions;
+use datafusion::prelude::NdJsonReadOptions;
 use datafusion::prelude::ParquetReadOptions;
 
 use crate::Error;
@@ -170,6 +171,10 @@ impl DataFrameReader {
                 .await
                 .map_err(|e| Error::GenericError(e.to_string()))?
             }
+            FileType::Json => ctx
+                .read_json(&self.input_path, NdJsonReadOptions::default())
+                .await
+                .map_err(|e| Error::GenericError(e.to_string()))?,
             FileType::Orc => {
                 let batches = read_orc_to_batches(&self.input_path)?;
                 if batches.is_empty() {
@@ -182,7 +187,7 @@ impl DataFrameReader {
             }
             _ => {
                 return Err(Error::GenericError(
-                    "Only Parquet, Avro, CSV, and ORC are supported as input file types"
+                    "Only Parquet, Avro, CSV, JSON, and ORC are supported as input file types"
                         .to_string(),
                 ));
             }
@@ -383,15 +388,16 @@ mod tests {
 
     #[tokio::test(flavor = "multi_thread")]
     async fn test_read_dataframe_unsupported_type() {
-        let result = DataFrameReader::new("fixtures/data.json", FileType::Json, None, None, None)
-            .execute(())
-            .await;
+        let result =
+            DataFrameReader::new("fixtures/table.parquet", FileType::Xlsx, None, None, None)
+                .execute(())
+                .await;
         assert!(result.is_err());
         assert!(
             result
                 .unwrap_err()
                 .to_string()
-                .contains("Only Parquet, Avro, CSV, and ORC")
+                .contains("Only Parquet, Avro, CSV, JSON, and ORC")
         );
     }
 

From 5a319054d930904f580657ed0740125b4c8b375d Mon Sep 17 00:00:00 2001
From: Alistair Israel <aisrael@gmail.com>
Date: Sun, 15 Mar 2026 12:01:15 -0400
Subject: [PATCH 07/18] resolve_input_file_type -> resolve_file_type

---
 src/bin/datu/commands/convert.rs | 6 +++---
 src/bin/datu/commands/count.rs   | 4 ++--
 src/bin/datu/commands/head.rs    | 4 ++--
 src/bin/datu/commands/sample.rs  | 4 ++--
 src/bin/datu/commands/schema.rs  | 4 ++--
 src/bin/datu/commands/tail.rs    | 4 ++--
 src/lib.rs                       | 2 +-
 src/utils.rs                     | 2 +-
 8 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/bin/datu/commands/convert.rs b/src/bin/datu/commands/convert.rs
index c784915..60bc839 100644
--- a/src/bin/datu/commands/convert.rs
+++ b/src/bin/datu/commands/convert.rs
@@ -18,7 +18,7 @@ use datu::pipeline::Step;
 use datu::pipeline::VecRecordBatchReader;
 use datu::pipeline::dataframe::DataFrameReader;
 use datu::pipeline::dataframe::write_record_batches_from_reader;
-use datu::resolve_input_file_type;
+use datu::resolve_file_type;
 use datu::utils::parse_select_columns;
 use indicatif::ProgressBar;
 use indicatif::ProgressStyle;
@@ -115,8 +115,8 @@ impl RecordBatchReader for ProgressRecordBatchReader {
 
 /// Converts between file formats; reads from input and writes to output, optionally selecting columns.
 pub async fn convert(args: ConvertArgs) -> anyhow::Result<()> {
-    let input_file_type = resolve_input_file_type(args.input, &args.input_path)?;
-    let output_file_type = resolve_input_file_type(args.output, &args.output_path)?;
+    let input_file_type = resolve_file_type(args.input, &args.input_path)?;
+    let output_file_type = resolve_file_type(args.output, &args.output_path)?;
 
     let use_dataframe_api = is_datafusion_native(input_file_type)
         && (output_file_type == FileType::Parquet || output_file_type == FileType::Csv);
diff --git a/src/bin/datu/commands/count.rs b/src/bin/datu/commands/count.rs
index 137542f..48a8a64 100644
--- a/src/bin/datu/commands/count.rs
+++ b/src/bin/datu/commands/count.rs
@@ -2,12 +2,12 @@
 
 use datu::cli::CountArgs;
 use datu::pipeline::count_rows;
-use datu::resolve_input_file_type;
+use datu::resolve_file_type;
 
 /// The `datu count` command. Uses metadata for Parquet and ORC (no data read);
 /// streams batches for Avro and CSV.
 pub async fn count(args: CountArgs) -> anyhow::Result<()> {
-    let file_type = resolve_input_file_type(args.input, &args.input_path)?;
+    let file_type = resolve_file_type(args.input, &args.input_path)?;
     let total = count_rows(&args.input_path, file_type, args.input_headers)?;
     println!("{total}");
     Ok(())
diff --git a/src/bin/datu/commands/head.rs b/src/bin/datu/commands/head.rs
index e00f8b2..445ad5b 100644
--- a/src/bin/datu/commands/head.rs
+++ b/src/bin/datu/commands/head.rs
@@ -5,11 +5,11 @@ use datu::cli::HeadsOrTails;
 use datu::pipeline::build_reader;
 use datu::pipeline::display::apply_select_and_display;
 use datu::pipeline::record_batch_filter::parse_select_step;
-use datu::resolve_input_file_type;
+use datu::resolve_file_type;
 
 /// head command implementation: print the first N lines of an Avro, CSV, Parquet, or ORC file.
 pub async fn head(args: HeadsOrTails) -> Result<()> {
-    let input_file_type = resolve_input_file_type(args.input, &args.input_path)?;
+    let input_file_type = resolve_file_type(args.input, &args.input_path)?;
     match input_file_type {
         FileType::Parquet | FileType::Avro | FileType::Csv | FileType::Orc => {}
         _ => bail!("Only Parquet, Avro, CSV, and ORC are supported for head"),
diff --git a/src/bin/datu/commands/sample.rs b/src/bin/datu/commands/sample.rs
index 8a5222f..292e04e 100644
--- a/src/bin/datu/commands/sample.rs
+++ b/src/bin/datu/commands/sample.rs
@@ -13,11 +13,11 @@ use datu::pipeline::read_to_batches;
 use datu::pipeline::record_batch_filter::parse_select_step;
 use datu::pipeline::reservoir_sample_from_reader;
 use datu::pipeline::sample_from_reader;
-use datu::resolve_input_file_type;
+use datu::resolve_file_type;
 
 /// sample command implementation: print N random rows from an Avro, CSV, Parquet, or ORC file.
 pub async fn sample(args: HeadsOrTails) -> Result<()> {
-    let input_file_type = resolve_input_file_type(args.input, &args.input_path)?;
+    let input_file_type = resolve_file_type(args.input, &args.input_path)?;
     match input_file_type {
         FileType::Parquet => sample_seekable_format(args, FileType::Parquet).await,
         FileType::Avro => sample_avro(args).await,
diff --git a/src/bin/datu/commands/schema.rs b/src/bin/datu/commands/schema.rs
index 8c8eb4c..17a7375 100644
--- a/src/bin/datu/commands/schema.rs
+++ b/src/bin/datu/commands/schema.rs
@@ -4,11 +4,11 @@ use anyhow::Result;
 use datu::cli::SchemaArgs;
 use datu::pipeline::schema::get_schema_fields;
 use datu::pipeline::schema::print_schema_fields;
-use datu::resolve_input_file_type;
+use datu::resolve_file_type;
 
 /// The `datu schema` command
 pub async fn schema(args: SchemaArgs) -> Result<()> {
-    let file_type = resolve_input_file_type(args.input, &args.input_path)?;
+    let file_type = resolve_file_type(args.input, &args.input_path)?;
     let fields = get_schema_fields(&args.input_path, file_type, args.input_headers)?;
     print_schema_fields(&fields, args.output, args.sparse)
 }
diff --git a/src/bin/datu/commands/tail.rs b/src/bin/datu/commands/tail.rs
index a4c9dfb..4ceea8e 100644
--- a/src/bin/datu/commands/tail.rs
+++ b/src/bin/datu/commands/tail.rs
@@ -12,11 +12,11 @@ use datu::pipeline::display::apply_select_and_display;
 use datu::pipeline::read_to_batches;
 use datu::pipeline::record_batch_filter::parse_select_step;
 use datu::pipeline::tail_batches;
-use datu::resolve_input_file_type;
+use datu::resolve_file_type;
 
 /// tail command implementation: print the last N lines of an Avro, CSV, Parquet, or ORC file.
 pub async fn tail(args: HeadsOrTails) -> Result<()> {
-    let input_file_type = resolve_input_file_type(args.input, &args.input_path)?;
+    let input_file_type = resolve_file_type(args.input, &args.input_path)?;
     match input_file_type {
         FileType::Parquet => tail_seekable_format(args, FileType::Parquet).await,
         FileType::Avro => tail_avro(args).await,
diff --git a/src/lib.rs b/src/lib.rs
index 2e98673..262538d 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -11,7 +11,7 @@ pub mod utils;
 pub use errors::Error;
 pub use utils::FileType;
 pub use utils::get_total_rows_result;
-pub use utils::resolve_input_file_type;
+pub use utils::resolve_file_type;
 
 /// Result type alias for datu operations.
 pub type Result<T> = std::result::Result<T, Error>;
diff --git a/src/utils.rs b/src/utils.rs
index cd6be6e..ad88dff 100644
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -113,7 +113,7 @@ impl TryFrom<&str> for FileType {
 }
 
 /// Resolve the input file type: use the explicit override if provided, otherwise infer from the file path extension.
-pub fn resolve_input_file_type(
+pub fn resolve_file_type(
     input_override: Option<FileType>,
     path: &str,
 ) -> crate::Result<FileType> {

From 96bf0d831931469e6264f01add283ca17774d47f Mon Sep 17 00:00:00 2001
From: Alistair Israel <aisrael@gmail.com>
Date: Sun, 15 Mar 2026 12:05:57 -0400
Subject: [PATCH 08/18] cargo fmt

---
 src/utils.rs | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/utils.rs b/src/utils.rs
index ad88dff..851bf28 100644
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -113,10 +113,7 @@ impl TryFrom<&str> for FileType {
 }
 
 /// Resolve the input file type: use the explicit override if provided, otherwise infer from the file path extension.
-pub fn resolve_file_type(
-    input_override: Option<FileType>,
-    path: &str,
-) -> crate::Result<FileType> {
+pub fn resolve_file_type(input_override: Option<FileType>, path: &str) -> crate::Result<FileType> {
     match input_override {
         Some(ft) => Ok(ft),
         None => FileType::try_from(path),

From b7bb128a6b9b97ebef26423c63e8c4be91440f8d Mon Sep 17 00:00:00 2001
From: Alistair Israel <aisrael@gmail.com>
Date: Sun, 15 Mar 2026 12:10:27 -0400
Subject: [PATCH 09/18] Make this more explicit

---
 src/pipeline.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/pipeline.rs b/src/pipeline.rs
index 63969c4..405b688 100644
--- a/src/pipeline.rs
+++ b/src/pipeline.rs
@@ -162,8 +162,8 @@ pub fn build_reader(
 /// Counts rows in a file. Uses metadata for Parquet and ORC (no data read);
 /// streams batches for Avro and CSV.
 pub fn count_rows(path: &str, file_type: FileType, csv_has_header: Option<bool>) -> Result<usize> {
-    if let Ok(total) = crate::get_total_rows_result(path, file_type) {
-        return Ok(total);
+    if matches!(file_type, FileType::Parquet | FileType::Orc) {
+        return crate::get_total_rows_result(path, file_type);
     }
     let mut reader_step = build_reader(path, file_type, None, None, csv_has_header)?;
     let reader = reader_step.get()?;

From 59510c9002da54af4c89e5729f8bfecf759eb738 Mon Sep 17 00:00:00 2001
From: Alistair Israel <aisrael@gmail.com>
Date: Sun, 15 Mar 2026 12:12:41 -0400
Subject: [PATCH 10/18] Update README with JSON support

---
 README.md | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 53ff28c..11c0cd6 100644
--- a/README.md
+++ b/README.md
@@ -27,8 +27,8 @@ cargo install --git https://github.com/aisrael/datu
 | Avro (`.avro`)                |  ✓   |   ✓   |    —    |
 | ORC (`.orc`)                  |  ✓   |   ✓   |    —    |
 | CSV (`.csv`)                  |  ✓   |   ✓   |    ✓    |
+| JSON (`.json`)                |  ✓   |   ✓   |    ✓    |
 | XLSX (`.xlsx`)                |  —   |   ✓   |    —    |
-| JSON (`.json`)                |  —   |   ✓   |    ✓    |
 | JSON (pretty)                 |  —   |   —   |    ✓    |
 | YAML                          |  —   |   —   |    ✓    |
 
@@ -148,7 +148,7 @@ datu count data.csv --input-headers=false
 
 Convert data between supported formats. Input and output formats are inferred from file extensions, or can be specified explicitly with `--input` and `--output`.
 
-**Supported input formats:** Parquet (`.parquet`, `.parq`), Avro (`.avro`), CSV (`.csv`), ORC (`.orc`).
+**Supported input formats:** Parquet (`.parquet`, `.parq`), Avro (`.avro`), CSV (`.csv`), JSON (`.json`), ORC (`.orc`).
 
 **Supported output formats:** CSV (`.csv`), JSON (`.json`), Parquet (`.parquet`, `.parq`), Avro (`.avro`), ORC (`.orc`), XLSX (`.xlsx`).
 
@@ -191,13 +191,17 @@ datu convert data.csv output.json --input-headers=false
 # Parquet to Parquet with column subset
 datu convert input.parq output.parquet --select one,two,three
 
-# Parquet, Avro, CSV, or ORC to Excel (.xlsx)
+# JSON to CSV or Parquet
+datu convert data.json data.csv
+datu convert data.json data.parquet
+
+# Parquet, Avro, CSV, JSON, or ORC to Excel (.xlsx)
 datu convert data.parquet report.xlsx
 
-# Parquet or Avro to ORC
+# Parquet, Avro, or JSON to ORC
 datu convert data.parquet data.orc
 
-# Parquet or Avro to JSON
+# Parquet, Avro, or JSON to JSON
 datu convert data.parquet data.json
 ```
 
@@ -361,7 +365,7 @@ read("input") |> ... |> write("output")
 
 #### `read(path)`
 
-Read a data file. Supported formats: Parquet (`.parquet`, `.parq`), Avro (`.avro`), CSV (`.csv`), ORC (`.orc`). CSV files are assumed to have a header row by default.
+Read a data file. Supported formats: Parquet (`.parquet`, `.parq`), Avro (`.avro`), CSV (`.csv`), JSON (`.json`), ORC (`.orc`). CSV files are assumed to have a header row by default.
 
 ```text
 > read("data.parquet") |> write("data.csv")

From 0a3364c1af3d2133524d850784b71cf38e1134ad Mon Sep 17 00:00:00 2001
From: Alistair Israel <aisrael@gmail.com>
Date: Sun, 15 Mar 2026 12:16:33 -0400
Subject: [PATCH 11/18] Updated with REPL equivalents

---
 README.md | 56 ++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 49 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 11c0cd6..70d75c9 100644
--- a/README.md
+++ b/README.md
@@ -68,12 +68,18 @@ Display the schema of a Parquet, Avro, CSV, or ORC file (column names, types, an
 
 **Supported input formats:** Parquet (`.parquet`, `.parq`), Avro (`.avro`), CSV (`.csv`), ORC (`.orc`).
 
-**Usage:**
+**Usage (CLI):**
 
 ```sh
 datu schema <FILE> [OPTIONS]
 ```
 
+**Usage (REPL):**
+
+> `read("file") |> schema()`
+>
+> Use `schema()` after a read to print the schema of the data in the pipeline. For a single file, `read("data.parquet") |> schema()` is equivalent.
+
 **Options:**
 
 | Option | Description |
@@ -114,12 +120,18 @@ Return the number of rows in a Parquet, Avro, CSV, or ORC file.
 
 **Supported input formats:** Parquet (`.parquet`, `.parq`), Avro (`.avro`), CSV (`.csv`), ORC (`.orc`).
 
-**Usage:**
+**Usage (CLI):**
 
 ```sh
 datu count <FILE> [OPTIONS]
 ```
 
+**Usage (REPL):**
+
+> `count("file")`
+>
+> Count rows in a file directly. Or use `read("file") |> count()` after other steps, e.g. `read("data.parquet") |> select(:id) |> count()`.
+
 **Options:**
 
 | Option | Description |
@@ -152,12 +164,18 @@ Convert data between supported formats. Input and output formats are inferred fr
 
 **Supported output formats:** CSV (`.csv`), JSON (`.json`), Parquet (`.parquet`, `.parq`), Avro (`.avro`), ORC (`.orc`), XLSX (`.xlsx`).
 
-**Usage:**
+**Usage (CLI):**
 
 ```sh
 datu convert <INPUT> <OUTPUT> [OPTIONS]
 ```
 
+**Usage (REPL):**
+
+> `read("input") |> ... |> write("output")`
+>
+> Example: `read("table.parquet") |> select(:id, :email) |> write("table.csv")`. Add `select(...)`, `head(n)`, `tail(n)`, or `sample(n)` as needed.
+
 **Options:**
 
 | Option | Description |
@@ -213,12 +231,18 @@ Print N randomly sampled rows from a Parquet, Avro, CSV, or ORC file to stdout (
 
 **Supported input formats:** Parquet (`.parquet`, `.parq`), Avro (`.avro`), CSV (`.csv`), ORC (`.orc`).
 
-**Usage:**
+**Usage (CLI):**
 
 ```sh
 datu sample <INPUT> [OPTIONS]
 ```
 
+**Usage (REPL):**
+
+> `read("file") |> sample(n)`
+>
+> Prints _n_ random rows to stdout. Chain `|> write("out.csv")` to write to a file, e.g. `read("data.parquet") |> sample(5) |> write("sample.csv")`.
+
 **Options:**
 
 | Option | Description |
@@ -254,12 +278,18 @@ Print the first N rows of a Parquet, Avro, CSV, or ORC file to stdout (default C
 
 **Supported input formats:** Parquet (`.parquet`, `.parq`), Avro (`.avro`), CSV (`.csv`), ORC (`.orc`).
 
-**Usage:**
+**Usage (CLI):**
 
 ```sh
 datu head <INPUT> [OPTIONS]
 ```
 
+**Usage (REPL):**
+
+> `read("file") |> head(n)`
+>
+> Prints the first _n_ rows to stdout. Chain `|> write("out.csv")` to write to a file, e.g. `read("data.parquet") |> head(10) |> write("first10.csv")`.
+
 **Options:**
 
 | Option | Description |
@@ -300,12 +330,18 @@ Print the last N rows of a Parquet, Avro, CSV, or ORC file to stdout (default CS
 
 > **Note:** For Avro and CSV files, `tail` requires a full file scan since these formats do not support random access to the end of the file.
 
-**Usage:**
+**Usage (CLI):**
 
 ```sh
 datu tail <INPUT> [OPTIONS]
 ```
 
+**Usage (REPL):**
+
+> `read("file") |> tail(n)`
+>
+> Prints the last _n_ rows to stdout. Chain `|> write("out.csv")` to write to a file, e.g. `read("data.parquet") |> tail(10) |> write("last10.csv")`.
+
 **Options:**
 
 | Option | Description |
@@ -340,12 +376,18 @@ datu tail data.parquet -n 1000 > last1000.csv
 
 ### Version
 
-Print the installed `datu` version:
+Print the installed `datu` version.
+
+**Usage (CLI):**
 
 ```sh
 datu version
 ```
 
+**Usage (REPL):**
+
+> No equivalent; run `datu version` from the shell.
+
 ## Interactive Mode (REPL)
 
 Running `datu` without any command starts an interactive REPL (Read-Eval-Print Loop):

From c9d17c22db6757fc01bca1d695fb1b1cf11de672 Mon Sep 17 00:00:00 2001
From: Alistair Israel <aisrael@gmail.com>
Date: Sun, 15 Mar 2026 13:01:59 -0400
Subject: [PATCH 12/18] Refactor REPL evaluation methods to use exec_* naming
 convention and improve test structure

- Removed async evaluation methods from ReplPipelineBuilder and replaced them with exec_* methods.
- Updated corresponding tests to reflect the new method names and improved error handling.
- Consolidated path extraction tests for read operations.
---
 src/cli/repl.rs | 383 ++++++++++++------------------------------------
 1 file changed, 93 insertions(+), 290 deletions(-)

diff --git a/src/cli/repl.rs b/src/cli/repl.rs
index 58418ff..77348a1 100644
--- a/src/cli/repl.rs
+++ b/src/cli/repl.rs
@@ -366,57 +366,6 @@ fn extract_column_specs(args: &[Expr]) -> crate::Result<Vec<ColumnSpec>> {
         .collect()
 }
 
-#[cfg(test)]
-impl ReplPipelineBuilder {
-    async fn eval_stage(&mut self, expr: Expr) -> crate::Result<()> {
-        let stage = plan_stage(expr)?;
-        self.execute_stage(stage).await
-    }
-
-    async fn eval_read(&mut self, args: Vec<Expr>) -> crate::Result<()> {
-        let path = extract_path_from_args("read", &args)?;
-        self.exec_read(&path).await
-    }
-
-    async fn eval_select(&mut self, args: Vec<Expr>) -> crate::Result<()> {
-        let columns = extract_column_specs(&args)?;
-        if columns.is_empty() {
-            return Err(Error::UnsupportedFunctionCall(
-                "select expects at least one column name".to_string(),
-            ));
-        }
-        self.exec_select(&columns).await
-    }
-
-    fn eval_head(&mut self, args: Vec<Expr>) -> crate::Result<()> {
-        let n = extract_head_n(&args)?;
-        self.exec_head(n)
-    }
-
-    fn eval_tail(&mut self, args: Vec<Expr>) -> crate::Result<()> {
-        let n = extract_tail_n(&args)?;
-        self.exec_tail(n)
-    }
-
-    fn eval_sample(&mut self, args: Vec<Expr>) -> crate::Result<()> {
-        let n = extract_sample_n(&args)?;
-        self.exec_sample(n)
-    }
-
-    async fn eval_write(&mut self, args: Vec<Expr>) -> crate::Result<()> {
-        let path = extract_path_from_args("write", &args)?;
-        self.exec_write(&path).await
-    }
-
-    fn eval_count(&mut self) -> crate::Result<()> {
-        self.exec_count()
-    }
-
-    fn eval_schema(&mut self) -> crate::Result<()> {
-        self.exec_schema()
-    }
-}
-
 #[cfg(test)]
 fn is_head_call(expr: Option<&Expr>) -> bool {
     if let Some(Expr::FunctionCall(name, _)) = expr {
@@ -957,50 +906,22 @@ mod tests {
         assert!(ctx.pending_exprs.is_empty());
     }
 
-    // ── eval_stage: error paths ─────────────────────────────────────
-
-    #[tokio::test(flavor = "multi_thread")]
-    async fn test_eval_stage_unsupported_function() {
-        let mut ctx = new_context();
-        let expr = Expr::FunctionCall(Identifier("unknown".into()), vec![]);
-        let result = ctx.eval_stage(expr).await;
-        assert!(result.is_err());
-        assert!(matches!(
-            result.unwrap_err(),
-            Error::UnsupportedFunctionCall(_)
-        ));
-    }
-
-    #[tokio::test(flavor = "multi_thread")]
-    async fn test_eval_stage_non_function_expr() {
-        let mut ctx = new_context();
-        let expr = Expr::Ident("x".into());
-        let result = ctx.eval_stage(expr).await;
-        assert!(result.is_err());
-        assert!(matches!(
-            result.unwrap_err(),
-            Error::UnsupportedExpression(_)
-        ));
-    }
-
-    // ── eval_read ───────────────────────────────────────────────────
+    // ── exec_read / extract_path_from_args ──────────────────────────
 
     #[tokio::test(flavor = "multi_thread")]
-    async fn test_eval_read_success() {
+    async fn test_exec_read_success() {
         let mut ctx = new_context();
-        let args = vec![Expr::Literal(Literal::String(
-            "fixtures/table.parquet".into(),
-        ))];
-        ctx.eval_read(args).await.expect("eval_read");
+        ctx.exec_read("fixtures/table.parquet")
+            .await
+            .expect("exec_read");
         assert!(ctx.batches.is_some());
         assert!(!ctx.batches.as_ref().unwrap().is_empty());
     }
 
-    #[tokio::test(flavor = "multi_thread")]
-    async fn test_eval_read_bad_args() {
-        let mut ctx = new_context();
+    #[test]
+    fn test_extract_path_from_args_read_bad_args() {
         let args = vec![Expr::Ident("not_a_string".into())];
-        let result = ctx.eval_read(args).await;
+        let result = extract_path_from_args("read", &args);
         assert!(result.is_err());
         assert!(matches!(
             result.unwrap_err(),
@@ -1008,40 +929,34 @@ mod tests {
         ));
     }
 
-    #[tokio::test(flavor = "multi_thread")]
-    async fn test_eval_read_no_args() {
-        let mut ctx = new_context();
-        let result = ctx.eval_read(vec![]).await;
+    #[test]
+    fn test_extract_path_from_args_read_no_args() {
+        let result = extract_path_from_args("read", &[]);
         assert!(result.is_err());
     }
 
-    #[tokio::test(flavor = "multi_thread")]
-    async fn test_eval_read_too_many_args() {
-        let mut ctx = new_context();
+    #[test]
+    fn test_extract_path_from_args_read_too_many_args() {
         let args = vec![
             Expr::Literal(Literal::String("a.parquet".into())),
             Expr::Literal(Literal::String("b.parquet".into())),
         ];
-        let result = ctx.eval_read(args).await;
+        let result = extract_path_from_args("read", &args);
         assert!(result.is_err());
     }
 
-    // ── eval_select ─────────────────────────────────────────────────
+    // ── exec_select ─────────────────────────────────────────────────
 
     #[tokio::test(flavor = "multi_thread")]
-    async fn test_eval_select_success() {
+    async fn test_exec_select_success() {
         let mut ctx = new_context();
-        ctx.eval_read(vec![Expr::Literal(Literal::String(
-            "fixtures/table.parquet".into(),
-        ))])
-        .await
-        .expect("read");
+        ctx.exec_read("fixtures/table.parquet").await.expect("read");
 
-        let args = vec![
-            Expr::Literal(Literal::Symbol("one".into())),
-            Expr::Literal(Literal::Symbol("two".into())),
+        let columns = vec![
+            ColumnSpec::CaseInsensitive("one".into()),
+            ColumnSpec::CaseInsensitive("two".into()),
         ];
-        ctx.eval_select(args).await.expect("select");
+        ctx.exec_select(&columns).await.expect("select");
         let batches = ctx.batches.as_ref().expect("batches after select");
         let schema = batches[0].schema();
         let col_names: Vec<&str> = schema.fields().iter().map(|f| f.name().as_str()).collect();
@@ -1049,24 +964,18 @@ mod tests {
     }
 
     #[tokio::test(flavor = "multi_thread")]
-    async fn test_eval_select_no_preceding_read() {
+    async fn test_exec_select_no_preceding_read() {
         let mut ctx = new_context();
-        let args = vec![Expr::Literal(Literal::Symbol("one".into()))];
-        let result = ctx.eval_select(args).await;
+        let columns = vec![ColumnSpec::CaseInsensitive("one".into())];
+        let result = ctx.exec_select(&columns).await;
         assert!(result.is_err());
         assert!(matches!(result.unwrap_err(), Error::GenericError(_)));
     }
 
-    #[tokio::test(flavor = "multi_thread")]
-    async fn test_eval_select_empty_columns() {
-        let mut ctx = new_context();
-        ctx.eval_read(vec![Expr::Literal(Literal::String(
-            "fixtures/table.parquet".into(),
-        ))])
-        .await
-        .expect("read");
-
-        let result = ctx.eval_select(vec![]).await;
+    #[test]
+    fn test_plan_stage_select_empty_columns_rejected() {
+        let expr = parse("select()");
+        let result = plan_stage(expr);
         assert!(result.is_err());
         assert!(matches!(
             result.unwrap_err(),
@@ -1074,42 +983,35 @@ mod tests {
         ));
     }
 
-    // ── eval_write ──────────────────────────────────────────────────
+    // ── exec_write ──────────────────────────────────────────────────
 
     #[tokio::test(flavor = "multi_thread")]
-    async fn test_eval_write_success() {
+    async fn test_exec_write_success() {
         let temp_dir = tempfile::tempdir().expect("tempdir");
         let output_path = temp_dir.path().join("output.parquet");
         let output_str = output_path.to_str().unwrap().to_string();
 
         let mut ctx = new_context();
-        ctx.eval_read(vec![Expr::Literal(Literal::String(
-            "fixtures/table.parquet".into(),
-        ))])
-        .await
-        .expect("read");
+        ctx.exec_read("fixtures/table.parquet").await.expect("read");
 
-        let args = vec![Expr::Literal(Literal::String(output_str.clone()))];
-        ctx.eval_write(args).await.expect("write");
+        ctx.exec_write(&output_str).await.expect("write");
 
         assert!(output_path.exists());
         assert_eq!(ctx.writer.as_deref(), Some(output_str.as_str()));
     }
 
     #[tokio::test(flavor = "multi_thread")]
-    async fn test_eval_write_no_preceding_read() {
+    async fn test_exec_write_no_preceding_read() {
         let mut ctx = new_context();
-        let args = vec![Expr::Literal(Literal::String("out.csv".into()))];
-        let result = ctx.eval_write(args).await;
+        let result = ctx.exec_write("out.csv").await;
         assert!(result.is_err());
         assert!(matches!(result.unwrap_err(), Error::GenericError(_)));
     }
 
-    #[tokio::test(flavor = "multi_thread")]
-    async fn test_eval_write_bad_args() {
-        let mut ctx = new_context();
+    #[test]
+    fn test_extract_path_from_args_write_bad_args() {
         let args = vec![Expr::Ident("not_a_string".into())];
-        let result = ctx.eval_write(args).await;
+        let result = extract_path_from_args("write", &args);
         assert!(result.is_err());
         assert!(matches!(
             result.unwrap_err(),
@@ -1188,17 +1090,13 @@ mod tests {
     #[tokio::test(flavor = "multi_thread")]
     async fn test_repl_pipeline_select_symbol_case_insensitive() {
         let mut ctx = new_context();
-        ctx.eval_read(vec![Expr::Literal(Literal::String(
-            "fixtures/table.parquet".into(),
-        ))])
-        .await
-        .expect("read");
+        ctx.exec_read("fixtures/table.parquet").await.expect("read");
 
-        let args = vec![
-            Expr::Literal(Literal::Symbol("ONE".into())),
-            Expr::Literal(Literal::Symbol("TWO".into())),
+        let columns = vec![
+            ColumnSpec::CaseInsensitive("ONE".into()),
+            ColumnSpec::CaseInsensitive("TWO".into()),
         ];
-        ctx.eval_select(args).await.expect("select");
+        ctx.exec_select(&columns).await.expect("select");
         let batches = ctx.batches.as_ref().expect("batches after select");
         let schema = batches[0].schema();
         let col_names: Vec<&str> = schema.fields().iter().map(|f| f.name().as_str()).collect();
@@ -1208,14 +1106,10 @@ mod tests {
     #[tokio::test(flavor = "multi_thread")]
     async fn test_repl_pipeline_select_string_exact_match_fails_on_wrong_case() {
         let mut ctx = new_context();
-        ctx.eval_read(vec![Expr::Literal(Literal::String(
-            "fixtures/table.parquet".into(),
-        ))])
-        .await
-        .expect("read");
-
-        let args = vec![Expr::Literal(Literal::String("ONE".into()))];
-        let result = ctx.eval_select(args).await;
+        ctx.exec_read("fixtures/table.parquet").await.expect("read");
+
+        let columns = vec![ColumnSpec::Exact("ONE".into())];
+        let result = ctx.exec_select(&columns).await;
         assert!(result.is_err());
     }
 
@@ -1254,7 +1148,7 @@ mod tests {
         assert!(!is_head_call(None));
     }
 
-    // ── eval_head ──────────────────────────────────────────────────
+    // ── exec_head ──────────────────────────────────────────────────
 
     fn parse_fn_args(input: &str) -> Vec<Expr> {
         match parse(input) {
@@ -1264,16 +1158,11 @@ mod tests {
     }
 
     #[tokio::test(flavor = "multi_thread")]
-    async fn test_eval_head_success() {
+    async fn test_exec_head_success() {
         let mut ctx = new_context();
-        ctx.eval_read(vec![Expr::Literal(Literal::String(
-            "fixtures/table.parquet".into(),
-        ))])
-        .await
-        .expect("read");
+        ctx.exec_read("fixtures/table.parquet").await.expect("read");
 
-        let args = parse_fn_args("head(2)");
-        ctx.eval_head(args).expect("head");
+        ctx.exec_head(2).expect("head");
 
         let batches = ctx.batches.as_ref().expect("batches after head");
         let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
@@ -1281,55 +1170,26 @@ mod tests {
     }
 
     #[tokio::test(flavor = "multi_thread")]
-    async fn test_eval_head_preserves_schema() {
+    async fn test_exec_head_preserves_schema() {
         let mut ctx = new_context();
-        ctx.eval_read(vec![Expr::Literal(Literal::String(
-            "fixtures/table.parquet".into(),
-        ))])
-        .await
-        .expect("read");
+        ctx.exec_read("fixtures/table.parquet").await.expect("read");
 
         let original_schema = ctx.batches.as_ref().unwrap()[0].schema();
-        let args = parse_fn_args("head(1)");
-        ctx.eval_head(args).expect("head");
+        ctx.exec_head(1).expect("head");
 
         let batches = ctx.batches.as_ref().expect("batches");
         assert_eq!(batches[0].schema(), original_schema);
     }
 
     #[test]
-    fn test_eval_head_no_preceding_read() {
+    fn test_exec_head_no_preceding_read() {
         let mut ctx = new_context();
-        let args = parse_fn_args("head(5)");
-        let result = ctx.eval_head(args);
+        let result = ctx.exec_head(5);
         assert!(result.is_err());
         assert!(matches!(result.unwrap_err(), Error::GenericError(_)));
     }
 
-    #[test]
-    fn test_eval_head_bad_args_string() {
-        let mut ctx = new_context();
-        let args = vec![Expr::Literal(Literal::String("not_a_number".into()))];
-        let result = ctx.eval_head(args);
-        assert!(result.is_err());
-        assert!(matches!(
-            result.unwrap_err(),
-            Error::UnsupportedFunctionCall(_)
-        ));
-    }
-
-    #[test]
-    fn test_eval_head_no_args() {
-        let mut ctx = new_context();
-        let result = ctx.eval_head(vec![]);
-        assert!(result.is_err());
-        assert!(matches!(
-            result.unwrap_err(),
-            Error::UnsupportedFunctionCall(_)
-        ));
-    }
-
-    // ── eval_head: pipeline integration ────────────────────────────
+    // ── exec_head: pipeline integration ──────────────────────────────
 
     #[tokio::test(flavor = "multi_thread")]
     async fn test_repl_pipeline_read_head_write() {
@@ -1484,19 +1344,14 @@ mod tests {
         assert!(extract_tail_n(&[]).is_err());
     }
 
-    // ── eval_tail ───────────────────────────────────────────────
+    // ── exec_tail ───────────────────────────────────────────────
 
     #[tokio::test(flavor = "multi_thread")]
-    async fn test_eval_tail_success() {
+    async fn test_exec_tail_success() {
         let mut ctx = new_context();
-        ctx.eval_read(vec![Expr::Literal(Literal::String(
-            "fixtures/table.parquet".into(),
-        ))])
-        .await
-        .expect("read");
+        ctx.exec_read("fixtures/table.parquet").await.expect("read");
 
-        let args = parse_fn_args("tail(2)");
-        ctx.eval_tail(args).expect("tail");
+        ctx.exec_tail(2).expect("tail");
 
         let batches = ctx.batches.as_ref().expect("batches after tail");
         let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
@@ -1504,55 +1359,26 @@ mod tests {
     }
 
     #[tokio::test(flavor = "multi_thread")]
-    async fn test_eval_tail_preserves_schema() {
+    async fn test_exec_tail_preserves_schema() {
         let mut ctx = new_context();
-        ctx.eval_read(vec![Expr::Literal(Literal::String(
-            "fixtures/table.parquet".into(),
-        ))])
-        .await
-        .expect("read");
+        ctx.exec_read("fixtures/table.parquet").await.expect("read");
 
         let original_schema = ctx.batches.as_ref().unwrap()[0].schema();
-        let args = parse_fn_args("tail(1)");
-        ctx.eval_tail(args).expect("tail");
+        ctx.exec_tail(1).expect("tail");
 
         let batches = ctx.batches.as_ref().expect("batches");
         assert_eq!(batches[0].schema(), original_schema);
     }
 
     #[test]
-    fn test_eval_tail_no_preceding_read() {
+    fn test_exec_tail_no_preceding_read() {
         let mut ctx = new_context();
-        let args = parse_fn_args("tail(5)");
-        let result = ctx.eval_tail(args);
+        let result = ctx.exec_tail(5);
         assert!(result.is_err());
         assert!(matches!(result.unwrap_err(), Error::GenericError(_)));
     }
 
-    #[test]
-    fn test_eval_tail_bad_args_string() {
-        let mut ctx = new_context();
-        let args = vec![Expr::Literal(Literal::String("not_a_number".into()))];
-        let result = ctx.eval_tail(args);
-        assert!(result.is_err());
-        assert!(matches!(
-            result.unwrap_err(),
-            Error::UnsupportedFunctionCall(_)
-        ));
-    }
-
-    #[test]
-    fn test_eval_tail_no_args() {
-        let mut ctx = new_context();
-        let result = ctx.eval_tail(vec![]);
-        assert!(result.is_err());
-        assert!(matches!(
-            result.unwrap_err(),
-            Error::UnsupportedFunctionCall(_)
-        ));
-    }
-
-    // ── eval_tail: pipeline integration ─────────────────────────
+    // ── exec_tail: pipeline integration ──────────────────────────
 
     #[tokio::test(flavor = "multi_thread")]
     async fn test_repl_pipeline_read_tail_write() {
@@ -1650,30 +1476,26 @@ mod tests {
         assert_eq!(pipeline[2], PipelineStage::Count { path: None });
     }
 
-    // ── eval_count ─────────────────────────────────────────────
+    // ── exec_count ─────────────────────────────────────────────
 
     #[tokio::test(flavor = "multi_thread")]
-    async fn test_eval_count_success() {
+    async fn test_exec_count_success() {
         let mut ctx = new_context();
-        ctx.eval_read(vec![Expr::Literal(Literal::String(
-            "fixtures/table.parquet".into(),
-        ))])
-        .await
-        .expect("read");
+        ctx.exec_read("fixtures/table.parquet").await.expect("read");
 
-        ctx.eval_count().expect("count");
+        ctx.exec_count().expect("count");
         assert!(ctx.batches.is_none(), "batches consumed by count");
     }
 
     #[test]
-    fn test_eval_count_no_preceding_read() {
+    fn test_exec_count_no_preceding_read() {
         let mut ctx = new_context();
-        let result = ctx.eval_count();
+        let result = ctx.exec_count();
         assert!(result.is_err());
         assert!(matches!(result.unwrap_err(), Error::GenericError(_)));
     }
 
-    // ── eval_count: pipeline integration ────────────────────────
+    // ── exec_count: pipeline integration ────────────────────────
 
     #[tokio::test(flavor = "multi_thread")]
     async fn test_repl_pipeline_read_count() {
@@ -1775,30 +1597,26 @@ mod tests {
         assert_eq!(pipeline[1], PipelineStage::Schema);
     }
 
-    // ── eval_schema ─────────────────────────────────────────────
+    // ── exec_schema ─────────────────────────────────────────────
 
     #[tokio::test(flavor = "multi_thread")]
-    async fn test_eval_schema_success() {
+    async fn test_exec_schema_success() {
         let mut ctx = new_context();
-        ctx.eval_read(vec![Expr::Literal(Literal::String(
-            "fixtures/table.parquet".into(),
-        ))])
-        .await
-        .expect("read");
+        ctx.exec_read("fixtures/table.parquet").await.expect("read");
 
-        ctx.eval_schema().expect("schema");
+        ctx.exec_schema().expect("schema");
         assert!(ctx.batches.is_none(), "batches consumed by schema");
     }
 
     #[test]
-    fn test_eval_schema_no_preceding_read() {
+    fn test_exec_schema_no_preceding_read() {
         let mut ctx = new_context();
-        let result = ctx.eval_schema();
+        let result = ctx.exec_schema();
         assert!(result.is_err());
         assert!(matches!(result.unwrap_err(), Error::GenericError(_)));
     }
 
-    // ── eval_schema: pipeline integration ────────────────────────
+    // ── exec_schema: pipeline integration ────────────────────────
 
     #[tokio::test(flavor = "multi_thread")]
     async fn test_repl_pipeline_read_schema() {
@@ -1920,19 +1738,14 @@ mod tests {
         assert!(extract_sample_n(&args).is_err());
     }
 
-    // ── eval_sample ─────────────────────────────────────────────
+    // ── exec_sample ─────────────────────────────────────────────
 
     #[tokio::test(flavor = "multi_thread")]
-    async fn test_eval_sample_success() {
+    async fn test_exec_sample_success() {
         let mut ctx = new_context();
-        ctx.eval_read(vec![Expr::Literal(Literal::String(
-            "fixtures/table.parquet".into(),
-        ))])
-        .await
-        .expect("read");
+        ctx.exec_read("fixtures/table.parquet").await.expect("read");
 
-        let args = parse_fn_args("sample(2)");
-        ctx.eval_sample(args).expect("sample");
+        ctx.exec_sample(2).expect("sample");
 
         let batches = ctx.batches.as_ref().expect("batches after sample");
         let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
@@ -1940,15 +1753,11 @@ mod tests {
     }
 
     #[tokio::test(flavor = "multi_thread")]
-    async fn test_eval_sample_default_n() {
+    async fn test_exec_sample_default_n() {
         let mut ctx = new_context();
-        ctx.eval_read(vec![Expr::Literal(Literal::String(
-            "fixtures/table.parquet".into(),
-        ))])
-        .await
-        .expect("read");
+        ctx.exec_read("fixtures/table.parquet").await.expect("read");
 
-        ctx.eval_sample(vec![]).expect("sample with default n");
+        ctx.exec_sample(10).expect("sample with n=10");
 
         let batches = ctx.batches.as_ref().expect("batches after sample");
         let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
@@ -1956,32 +1765,26 @@ mod tests {
     }
 
     #[tokio::test(flavor = "multi_thread")]
-    async fn test_eval_sample_preserves_schema() {
+    async fn test_exec_sample_preserves_schema() {
         let mut ctx = new_context();
-        ctx.eval_read(vec![Expr::Literal(Literal::String(
-            "fixtures/table.parquet".into(),
-        ))])
-        .await
-        .expect("read");
+        ctx.exec_read("fixtures/table.parquet").await.expect("read");
 
         let original_schema = ctx.batches.as_ref().unwrap()[0].schema();
-        let args = parse_fn_args("sample(1)");
-        ctx.eval_sample(args).expect("sample");
+        ctx.exec_sample(1).expect("sample");
 
         let batches = ctx.batches.as_ref().expect("batches");
         assert_eq!(batches[0].schema(), original_schema);
     }
 
     #[test]
-    fn test_eval_sample_no_preceding_read() {
+    fn test_exec_sample_no_preceding_read() {
         let mut ctx = new_context();
-        let args = parse_fn_args("sample(5)");
-        let result = ctx.eval_sample(args);
+        let result = ctx.exec_sample(5);
         assert!(result.is_err());
         assert!(matches!(result.unwrap_err(), Error::GenericError(_)));
     }
 
-    // ── eval_sample: pipeline integration ───────────────────────
+    // ── exec_sample: pipeline integration ────────────────────────
 
     #[tokio::test(flavor = "multi_thread")]
     async fn test_repl_pipeline_read_sample_write() {

From d70e1e911e4142a152c2305a7a3b6e114919ac6b Mon Sep 17 00:00:00 2001
From: Alistair Israel <aisrael@gmail.com>
Date: Sun, 15 Mar 2026 13:07:01 -0400
Subject: [PATCH 13/18] Just updating the comment

---
 src/cli.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/cli.rs b/src/cli.rs
index 4294eab..7d88b86 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -111,7 +111,7 @@ pub struct CountArgs {
     pub input_headers: Option<bool>,
 }
 
-/// Arguments for the `datu head` and `datu tail` commands.
+/// Arguments for the `head`, `sample`, and `tail` commands.
 #[derive(Args)]
 pub struct HeadsOrTails {
     /// Path to the Parquet, Avro, ORC, or CSV file

From 68297930aa6294f9ec73f89d607470f215a18735 Mon Sep 17 00:00:00 2001
From: Alistair Israel <aisrael@gmail.com>
Date: Sun, 15 Mar 2026 13:33:36 -0400
Subject: [PATCH 14/18] Update dependencies and versions in Cargo files

- Updated various dependencies in Cargo.toml, including `anyhow`, `clap`, `datafusion`, and `tokio`, to their latest compatible versions.
- Adjusted version specifications for `anstyle`, `console`, and other packages in Cargo.lock to reflect new releases.
- Improved dependency management by ensuring consistent versioning across related packages.
---
 Cargo.lock | 389 +++++++++++++++++++++++++++--------------------------
 Cargo.toml |  34 ++---
 2 files changed, 216 insertions(+), 207 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index c69f88b..376f122 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -74,7 +74,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a"
 dependencies = [
  "anstyle",
- "anstyle-parse",
+ "anstyle-parse 0.2.7",
+ "anstyle-query",
+ "anstyle-wincon",
+ "colorchoice",
+ "is_terminal_polyfill",
+ "utf8parse",
+]
+
+[[package]]
+name = "anstream"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d"
+dependencies = [
+ "anstyle",
+ "anstyle-parse 1.0.0",
  "anstyle-query",
  "anstyle-wincon",
  "colorchoice",
@@ -84,9 +99,9 @@ dependencies = [
 
 [[package]]
 name = "anstyle"
-version = "1.0.13"
+version = "1.0.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78"
+checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000"
 
 [[package]]
 name = "anstyle-parse"
@@ -97,6 +112,15 @@ dependencies = [
  "utf8parse",
 ]
 
+[[package]]
+name = "anstyle-parse"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e"
+dependencies = [
+ "utf8parse",
+]
+
 [[package]]
 name = "anstyle-query"
 version = "1.1.5"
@@ -330,7 +354,7 @@ dependencies = [
  "arrow-schema",
  "arrow-select",
  "flatbuffers",
- "lz4_flex 0.12.0",
+ "lz4_flex 0.12.1",
  "zstd",
 ]
 
@@ -427,9 +451,9 @@ dependencies = [
 
 [[package]]
 name = "async-compression"
-version = "0.4.40"
+version = "0.4.41"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7d67d43201f4d20c78bcda740c142ca52482d81da80681533d33bf3f0596c8e2"
+checksum = "d0f9ee0f6e02ffd7ad5816e9464499fba7b3effd01123b515c41d1697c43dad1"
 dependencies = [
  "compression-codecs",
  "compression-core",
@@ -529,9 +553,9 @@ dependencies = [
 
 [[package]]
 name = "bon"
-version = "3.9.0"
+version = "3.9.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2d13a61f2963b88eef9c1be03df65d42f6996dfeac1054870d950fcf66686f83"
+checksum = "f47dbe92550676ee653353c310dfb9cf6ba17ee70396e1f7cf0a2020ad49b2fe"
 dependencies = [
  "bon-macros",
  "rustversion",
@@ -539,9 +563,9 @@ dependencies = [
 
 [[package]]
 name = "bon-macros"
-version = "3.9.0"
+version = "3.9.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d314cc62af2b6b0c65780555abb4d02a03dd3b799cd42419044f0c38d99738c0"
+checksum = "519bd3116aeeb42d5372c29d982d16d0170d3d4a5ed85fc7dd91642ffff3c67c"
 dependencies = [
  "darling",
  "ident_case",
@@ -630,9 +654,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
 
 [[package]]
 name = "cc"
-version = "1.2.56"
+version = "1.2.57"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2"
+checksum = "7a0dd1ca384932ff3641c8718a02769f1698e7563dc6974ffd03346116310423"
 dependencies = [
  "find-msvc-tools",
  "jobserver",
@@ -705,9 +729,9 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "4.5.60"
+version = "4.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2797f34da339ce31042b27d23607e051786132987f595b02ba4f6a6dffb7030a"
+checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351"
 dependencies = [
  "clap_builder",
  "clap_derive",
@@ -715,11 +739,11 @@ dependencies = [
 
 [[package]]
 name = "clap_builder"
-version = "4.5.60"
+version = "4.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "24a241312cea5059b13574bb9b3861cabf758b879c15190b37b6d6fd63ab6876"
+checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f"
 dependencies = [
- "anstream",
+ "anstream 1.0.0",
  "anstyle",
  "clap_lex",
  "strsim",
@@ -728,9 +752,9 @@ dependencies = [
 
 [[package]]
 name = "clap_derive"
-version = "4.5.55"
+version = "4.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5"
+checksum = "1110bd8a634a1ab8cb04345d8d878267d57c3cf1b38d91b71af6686408bbca6a"
 dependencies = [
  "heck",
  "proc-macro2",
@@ -740,9 +764,9 @@ dependencies = [
 
 [[package]]
 name = "clap_lex"
-version = "1.0.0"
+version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831"
+checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9"
 
 [[package]]
 name = "clipboard-win"
@@ -755,9 +779,9 @@ dependencies = [
 
 [[package]]
 name = "colorchoice"
-version = "1.0.4"
+version = "1.0.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
+checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570"
 
 [[package]]
 name = "comfy-table"
@@ -801,26 +825,12 @@ dependencies = [
 
 [[package]]
 name = "console"
-version = "0.15.11"
+version = "0.16.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8"
+checksum = "d64e8af5551369d19cf50138de61f1c42074ab970f74e99be916646777f8fc87"
 dependencies = [
  "encode_unicode",
  "libc",
- "once_cell",
- "unicode-width",
- "windows-sys 0.59.0",
-]
-
-[[package]]
-name = "console"
-version = "0.16.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "03e45a4a8926227e4197636ba97a9fc9b00477e9f4bd711395687c5f0734bec4"
-dependencies = [
- "encode_unicode",
- "libc",
- "once_cell",
  "unicode-width",
  "windows-sys 0.61.2",
 ]
@@ -1005,7 +1015,7 @@ checksum = "16cbb27bc2064274afa3a3d8bc9a0e71333589850573aa632ec4520e4af14d94"
 dependencies = [
  "anyhow",
  "clap",
- "console 0.16.2",
+ "console",
  "cucumber-codegen",
  "cucumber-expressions",
  "derive_more",
@@ -1056,9 +1066,9 @@ dependencies = [
 
 [[package]]
 name = "darling"
-version = "0.20.11"
+version = "0.23.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee"
+checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d"
 dependencies = [
  "darling_core",
  "darling_macro",
@@ -1066,11 +1076,10 @@ dependencies = [
 
 [[package]]
 name = "darling_core"
-version = "0.20.11"
+version = "0.23.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e"
+checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0"
 dependencies = [
- "fnv",
  "ident_case",
  "proc-macro2",
  "quote",
@@ -1080,9 +1089,9 @@ dependencies = [
 
 [[package]]
 name = "darling_macro"
-version = "0.20.11"
+version = "0.23.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
+checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d"
 dependencies = [
  "darling_core",
  "quote",
@@ -1105,9 +1114,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion"
-version = "52.1.0"
+version = "52.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d12ee9fdc6cdb5898c7691bb994f0ba606c4acc93a2258d78bb9f26ff8158bb3"
+checksum = "ea28305c211e3541c9cfcf06a23d0d8c7c824b4502ed1fdf0a6ff4ad24ee531c"
 dependencies = [
  "arrow",
  "arrow-schema",
@@ -1161,9 +1170,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-catalog"
-version = "52.1.0"
+version = "52.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "462dc9ef45e5d688aeaae49a7e310587e81b6016b9d03bace5626ad0043e5a9e"
+checksum = "78ab99b6df5f60a6ddbc515e4c05caee1192d395cf3cb67ce5d1c17e3c9b9b74"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1186,9 +1195,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-catalog-listing"
-version = "52.1.0"
+version = "52.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1b96dbf1d728fc321817b744eb5080cdd75312faa6980b338817f68f3caa4208"
+checksum = "77ae3d14912c0d779ada98d30dc60f3244f3c26c2446b87394629ea5c076a31c"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1209,9 +1218,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common"
-version = "52.1.0"
+version = "52.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3237a6ff0d2149af4631290074289cae548c9863c885d821315d54c6673a074a"
+checksum = "ea2df29b9592a5d55b8238eaf67d2f21963d5a08cd1a8b7670134405206caabd"
 dependencies = [
  "ahash",
  "apache-avro",
@@ -1234,9 +1243,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common-runtime"
-version = "52.1.0"
+version = "52.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "70b5e34026af55a1bfccb1ef0a763cf1f64e77c696ffcf5a128a278c31236528"
+checksum = "42639baa0049d5fffd7e283504b9b5e7b9b2e7a2dea476eed60ab0d40d999b85"
 dependencies = [
  "futures",
  "log",
@@ -1245,9 +1254,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource"
-version = "52.1.0"
+version = "52.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1b2a6be734cc3785e18bbf2a7f2b22537f6b9fb960d79617775a51568c281842"
+checksum = "25951b617bb22a9619e1520450590cb2004bfcad10bcb396b961f4a1a10dcec5"
 dependencies = [
  "arrow",
  "async-compression",
@@ -1280,9 +1289,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-arrow"
-version = "52.1.0"
+version = "52.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1739b9b07c9236389e09c74f770e88aff7055250774e9def7d3f4f56b3dcc7be"
+checksum = "dc0b28226960ba99c50d78ac6f736ebe09eb5cb3bb9bb58194266278000ca41f"
 dependencies = [
  "arrow",
  "arrow-ipc",
@@ -1304,9 +1313,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-avro"
-version = "52.1.0"
+version = "52.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "828088c2fb681cc0e06fb42f541f76c82a0c10278f9fd6334e22c8d1e3574ee7"
+checksum = "18de2e914c2c9ed4b31a4920940b181b0957bc164eec4fc04c294533219bf0a7"
 dependencies = [
  "apache-avro",
  "arrow",
@@ -1324,9 +1333,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-csv"
-version = "52.1.0"
+version = "52.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "61c73bc54b518bbba7c7650299d07d58730293cfba4356f6f428cc94c20b7600"
+checksum = "f538b57b052a678b1ce860181c65d3ace5a8486312dc50b41c01dd585a773a51"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1347,9 +1356,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-json"
-version = "52.1.0"
+version = "52.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "37812c8494c698c4d889374ecfabbff780f1f26d9ec095dd1bddfc2a8ca12559"
+checksum = "89fbc1d32b1b03c9734e27c0c5f041232b68621c8455f22769838634750a196c"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1369,9 +1378,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-parquet"
-version = "52.1.0"
+version = "52.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2210937ecd9f0e824c397e73f4b5385c97cd1aff43ab2b5836fcfd2d321523fb"
+checksum = "203271d31fe5613a5943181db70ec98162121d1de94a9a300d5e5f19f9500a32"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1399,15 +1408,15 @@ dependencies = [
 
 [[package]]
 name = "datafusion-doc"
-version = "52.1.0"
+version = "52.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2c825f969126bc2ef6a6a02d94b3c07abff871acf4d6dd759ce1255edb7923ce"
+checksum = "5b6450dc702b3d39e8ced54c3356abb453bd2f3cea86d90d555a4b92f7a38462"
 
 [[package]]
 name = "datafusion-execution"
-version = "52.1.0"
+version = "52.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fa03ef05a2c2f90dd6c743e3e111078e322f4b395d20d4b4d431a245d79521ae"
+checksum = "e66a02fa601de49da5181dbdcf904a18b16a184db2b31f5e5534552ea2d5e660"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1426,9 +1435,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr"
-version = "52.1.0"
+version = "52.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ef33934c1f98ee695cc51192cc5f9ed3a8febee84fdbcd9131bf9d3a9a78276f"
+checksum = "cdf59a9b308a1a07dc2eb2f85e6366bc0226dc390b40f3aa0a72d79f1cfe2465"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1449,9 +1458,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr-common"
-version = "52.1.0"
+version = "52.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "000c98206e3dd47d2939a94b6c67af4bfa6732dd668ac4fafdbde408fd9134ea"
+checksum = "bd99eac4c6538c708638db43e7a3bd88e0e57955ddb722d420fb9a6d38dfc28f"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -1462,9 +1471,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions"
-version = "52.1.0"
+version = "52.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "379b01418ab95ca947014066248c22139fe9af9289354de10b445bd000d5d276"
+checksum = "11aa2c492ac046397b36d57c62a72982aad306495bbcbcdbcabd424d4a2fe245"
 dependencies = [
  "arrow",
  "arrow-buffer",
@@ -1493,9 +1502,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate"
-version = "52.1.0"
+version = "52.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fd00d5454ba4c3f8ebbd04bd6a6a9dc7ced7c56d883f70f2076c188be8459e4c"
+checksum = "325a00081898945d48d6194d9ca26120e523c993be3bb7c084061a5a2a72e787"
 dependencies = [
  "ahash",
  "arrow",
@@ -1514,9 +1523,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate-common"
-version = "52.1.0"
+version = "52.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "aec06b380729a87210a4e11f555ec2d729a328142253f8d557b87593622ecc9f"
+checksum = "809bbcb1e0dbec5d0ce30d493d135aea7564f1ba4550395f7f94321223df2dae"
 dependencies = [
  "ahash",
  "arrow",
@@ -1527,9 +1536,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-nested"
-version = "52.1.0"
+version = "52.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "904f48d45e0f1eb7d0eb5c0f80f2b5c6046a85454364a6b16a2e0b46f62e7dff"
+checksum = "29ebaa5d7024ef45973e0a7db1e9aeaa647936496f4d4061c0448f23d77d6320"
 dependencies = [
  "arrow",
  "arrow-ord",
@@ -1550,9 +1559,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-table"
-version = "52.1.0"
+version = "52.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e9a0d20e2b887e11bee24f7734d780a2588b925796ac741c3118dd06d5aa77f0"
+checksum = "60eab6f39df9ee49a2c7fa38eddc01fa0086ee31b29c7d19f38e72f479609752"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1566,9 +1575,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window"
-version = "52.1.0"
+version = "52.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d3414b0a07e39b6979fe3a69c7aa79a9f1369f1d5c8e52146e66058be1b285ee"
+checksum = "e00b2c15e342a90e65a846199c9e49293dd09fe1bcd63d8be2544604892f7eb8"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -1584,9 +1593,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window-common"
-version = "52.1.0"
+version = "52.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5bf2feae63cd4754e31add64ce75cae07d015bce4bb41cd09872f93add32523a"
+checksum = "493e2e1d1f4753dfc139a5213f1b5d0b97eea46a82d9bda3c7908aa96981b74b"
 dependencies = [
  "datafusion-common",
  "datafusion-physical-expr-common",
@@ -1594,9 +1603,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-macros"
-version = "52.1.0"
+version = "52.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c4fe888aeb6a095c4bcbe8ac1874c4b9a4c7ffa2ba849db7922683ba20875aaf"
+checksum = "ba01c55ade8278a791b429f7bf5cb1de64de587a342d084b18245edfae7096e2"
 dependencies = [
  "datafusion-doc",
  "quote",
@@ -1605,9 +1614,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-optimizer"
-version = "52.1.0"
+version = "52.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a6527c063ae305c11be397a86d8193936f4b84d137fe40bd706dfc178cf733c"
+checksum = "a80c6dfbba6a2163a9507f6353ac78c69d8deb26232c9e419160e58ff7c3e047"
 dependencies = [
  "arrow",
  "chrono",
@@ -1625,9 +1634,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr"
-version = "52.1.0"
+version = "52.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0bb028323dd4efd049dd8a78d78fe81b2b969447b39c51424167f973ac5811d9"
+checksum = "5d3a86264bb9163e7360b6622e789bc7fcbb43672e78a8493f0bc369a41a57c6"
 dependencies = [
  "ahash",
  "arrow",
@@ -1649,9 +1658,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr-adapter"
-version = "52.1.0"
+version = "52.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "78fe0826aef7eab6b4b61533d811234a7a9e5e458331ebbf94152a51fc8ab433"
+checksum = "3f5e00e524ac33500be6c5eeac940bd3f6b984ba9b7df0cd5f6c34a8a2cc4d6b"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -1664,9 +1673,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr-common"
-version = "52.1.0"
+version = "52.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cfccd388620734c661bd8b7ca93c44cdd59fecc9b550eea416a78ffcbb29475f"
+checksum = "2ae769ea5d688b4e74e9be5cad6f9d9f295b540825355868a3ab942380dd97ce"
 dependencies = [
  "ahash",
  "arrow",
@@ -1681,9 +1690,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-optimizer"
-version = "52.1.0"
+version = "52.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bde5fa10e73259a03b705d5fddc136516814ab5f441b939525618a4070f5a059"
+checksum = "f3588753ab2b47b0e43cd823fe5e7944df6734dabd6dafb72e2cc1c2a22f1944"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -1700,9 +1709,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-plan"
-version = "52.1.0"
+version = "52.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0e1098760fb29127c24cc9ade3277051dc73c9ed0ac0131bd7bcd742e0ad7470"
+checksum = "79949cbb109c2a45c527bfe0d956b9f2916807c05d4d2e66f3fd0af827ac2b61"
 dependencies = [
  "ahash",
  "arrow",
@@ -1731,9 +1740,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-pruning"
-version = "52.1.0"
+version = "52.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "64d0fef4201777b52951edec086c21a5b246f3c82621569ddb4a26f488bc38a9"
+checksum = "6434e2ee8a39d04b95fed688ff34dc251af6e4a0c2e1714716b6e3846690d589"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -1748,9 +1757,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-session"
-version = "52.1.0"
+version = "52.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f71f1e39e8f2acbf1c63b0e93756c2e970a64729dab70ac789587d6237c4fde0"
+checksum = "c91efb8302b4877d499c37e9a71886b90236ab27d9cc42fd51112febf341abd6"
 dependencies = [
  "async-trait",
  "datafusion-common",
@@ -1762,9 +1771,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-sql"
-version = "52.1.0"
+version = "52.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f44693cfcaeb7a9f12d71d1c576c3a6dc025a12cef209375fa2d16fb3b5670ee"
+checksum = "3f01eef7bcf4d00e87305b55f1b75792384e130fe0258bac02cd48378ae5ff87"
 dependencies = [
  "arrow",
  "bigdecimal",
@@ -1926,7 +1935,7 @@ version = "0.11.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b2daee4ea451f429a58296525ddf28b45a3b64f1acf6587e2067437bb11e218d"
 dependencies = [
- "anstream",
+ "anstream 0.6.21",
  "anstyle",
  "env_filter",
  "jiff",
@@ -2037,12 +2046,6 @@ dependencies = [
  "thiserror 2.0.18",
 ]
 
-[[package]]
-name = "fnv"
-version = "1.0.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
-
 [[package]]
 name = "foldhash"
 version = "0.1.5"
@@ -2183,19 +2186,19 @@ checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
 dependencies = [
  "cfg-if",
  "libc",
- "r-efi",
+ "r-efi 5.3.0",
  "wasip2",
 ]
 
 [[package]]
 name = "getrandom"
-version = "0.4.1"
+version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec"
+checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555"
 dependencies = [
  "cfg-if",
  "libc",
- "r-efi",
+ "r-efi 6.0.0",
  "wasip2",
  "wasip3",
 ]
@@ -2505,14 +2508,14 @@ dependencies = [
 
 [[package]]
 name = "indicatif"
-version = "0.17.11"
+version = "0.18.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235"
+checksum = "25470f23803092da7d239834776d653104d551bc4d7eacaf31e6837854b8e9eb"
 dependencies = [
- "console 0.15.11",
- "number_prefix",
+ "console",
  "portable-atomic",
  "unicode-width",
+ "unit-prefix",
  "web-time",
 ]
 
@@ -2580,9 +2583,9 @@ checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
 
 [[package]]
 name = "jiff"
-version = "0.2.21"
+version = "0.2.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b3e3d65f018c6ae946ab16e80944b97096ed73c35b221d1c478a6c81d8f57940"
+checksum = "1a3546dc96b6d42c5f24902af9e2538e82e39ad350b0c766eb3fbf2d8f3d8359"
 dependencies = [
  "jiff-static",
  "log",
@@ -2593,9 +2596,9 @@ dependencies = [
 
 [[package]]
 name = "jiff-static"
-version = "0.2.21"
+version = "0.2.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a17c2b211d863c7fde02cbea8a3c1a439b98e109286554f2860bdded7ff83818"
+checksum = "2a8c8b344124222efd714b73bb41f8b5120b27a7cc1c75593a6ff768d9d05aa4"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -2614,9 +2617,9 @@ dependencies = [
 
 [[package]]
 name = "js-sys"
-version = "0.3.88"
+version = "0.3.91"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c7e709f3e3d22866f9c25b3aff01af289b18422cc8b4262fb19103ee80fe513d"
+checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c"
 dependencies = [
  "once_cell",
  "wasm-bindgen",
@@ -2693,9 +2696,9 @@ checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7"
 
 [[package]]
 name = "libc"
-version = "0.2.182"
+version = "0.2.183"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
+checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d"
 
 [[package]]
 name = "liblzma"
@@ -2767,18 +2770,18 @@ checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
 
 [[package]]
 name = "lz4_flex"
-version = "0.11.5"
+version = "0.11.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a"
+checksum = "373f5eceeeab7925e0c1098212f2fbc4d416adec9d35051a6ab251e824c1854a"
 dependencies = [
  "twox-hash",
 ]
 
 [[package]]
 name = "lz4_flex"
-version = "0.12.0"
+version = "0.12.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ab6473172471198271ff72e9379150e9dfd70d8e533e0752a27e515b48dd375e"
+checksum = "98c23545df7ecf1b16c303910a69b079e8e251d60f7dd2cc9b4177f2afaf1746"
 dependencies = [
  "twox-hash",
 ]
@@ -2973,12 +2976,6 @@ dependencies = [
  "libm",
 ]
 
-[[package]]
-name = "number_prefix"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
-
 [[package]]
 name = "object"
 version = "0.37.3"
@@ -3014,9 +3011,9 @@ dependencies = [
 
 [[package]]
 name = "once_cell"
-version = "1.21.3"
+version = "1.21.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
+checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
 
 [[package]]
 name = "once_cell_polyfill"
@@ -3046,7 +3043,7 @@ dependencies = [
  "flate2",
  "futures",
  "futures-util",
- "lz4_flex 0.11.5",
+ "lz4_flex 0.11.6",
  "lzokay-native",
  "num",
  "prost",
@@ -3119,7 +3116,7 @@ dependencies = [
  "futures",
  "half",
  "hashbrown 0.16.1",
- "lz4_flex 0.12.0",
+ "lz4_flex 0.12.1",
  "num-bigint",
  "num-integer",
  "num-traits",
@@ -3205,18 +3202,18 @@ dependencies = [
 
 [[package]]
 name = "pin-project"
-version = "1.1.10"
+version = "1.1.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a"
+checksum = "f1749c7ed4bcaf4c3d0a3efc28538844fb29bcdd7d2b67b2be7e20ba861ff517"
 dependencies = [
  "pin-project-internal",
 ]
 
 [[package]]
 name = "pin-project-internal"
-version = "1.1.10"
+version = "1.1.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861"
+checksum = "d9b20ed30f105399776b9c883e68e536ef602a16ae6f596d2c473591d6ad64c6"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -3225,9 +3222,9 @@ dependencies = [
 
 [[package]]
 name = "pin-project-lite"
-version = "0.2.16"
+version = "0.2.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
+checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
 
 [[package]]
 name = "pin-utils"
@@ -3277,9 +3274,9 @@ checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
 
 [[package]]
 name = "portable-atomic-util"
-version = "0.2.5"
+version = "0.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7a9db96d7fa8782dd8c15ce32ffe8680bbd1e978a43bf51a34d39483540495f5"
+checksum = "091397be61a01d4be58e7841595bd4bfedb15f1cd54977d79b8271e94ed799a3"
 dependencies = [
  "portable-atomic",
 ]
@@ -3371,9 +3368,9 @@ checksum = "5a651516ddc9168ebd67b24afd085a718be02f8858fe406591b013d101ce2f40"
 
 [[package]]
 name = "quote"
-version = "1.0.44"
+version = "1.0.45"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4"
+checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
 dependencies = [
  "proc-macro2",
 ]
@@ -3384,6 +3381,12 @@ version = "5.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
 
+[[package]]
+name = "r-efi"
+version = "6.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf"
+
 [[package]]
 name = "radix_trie"
 version = "0.2.1"
@@ -3564,9 +3567,9 @@ checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973"
 
 [[package]]
 name = "regex-syntax"
-version = "0.8.9"
+version = "0.8.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c"
+checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
 
 [[package]]
 name = "reservoir-sampling"
@@ -3581,9 +3584,9 @@ dependencies = [
 
 [[package]]
 name = "rust_xlsxwriter"
-version = "0.93.0"
+version = "0.94.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "84205e093885c6b4e2d93e225a148df2e7fe67a668c27e3322e56e0d93721848"
+checksum = "efc4a0f1f7b425669996977016152b2939be9be44d40df252a5051c9c6b3b859"
 dependencies = [
  "chrono",
  "zip",
@@ -3997,12 +4000,12 @@ dependencies = [
 
 [[package]]
 name = "tempfile"
-version = "3.25.0"
+version = "3.27.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1"
+checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd"
 dependencies = [
  "fastrand",
- "getrandom 0.4.1",
+ "getrandom 0.4.2",
  "once_cell",
  "rustix",
  "windows-sys 0.61.2",
@@ -4111,9 +4114,9 @@ dependencies = [
 
 [[package]]
 name = "tokio"
-version = "1.49.0"
+version = "1.50.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86"
+checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d"
 dependencies = [
  "bytes",
  "pin-project-lite",
@@ -4122,9 +4125,9 @@ dependencies = [
 
 [[package]]
 name = "tokio-macros"
-version = "2.6.0"
+version = "2.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5"
+checksum = "5c55a2eff8b69ce66c84f85e1da1c233edc36ceb85a2058d11b0d6a3c7e7569c"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -4243,6 +4246,12 @@ version = "0.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
 
+[[package]]
+name = "unit-prefix"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "81e544489bf3d8ef66c953931f56617f423cd4b5494be343d9b9d3dda037b9a3"
+
 [[package]]
 name = "unsafe-libyaml"
 version = "0.2.11"
@@ -4275,11 +4284,11 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
 
 [[package]]
 name = "uuid"
-version = "1.21.0"
+version = "1.22.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b672338555252d43fd2240c714dc444b8c6fb0a5c5335e65a07bba7742735ddb"
+checksum = "a68d3c8f01c0cfa54a75291d83601161799e4a89a39e0929f4b0354d88757a37"
 dependencies = [
- "getrandom 0.4.1",
+ "getrandom 0.4.2",
  "js-sys",
  "serde_core",
  "wasm-bindgen",
@@ -4327,9 +4336,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen"
-version = "0.2.111"
+version = "0.2.114"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec1adf1535672f5b7824f817792b1afd731d7e843d2d04ec8f27e8cb51edd8ac"
+checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e"
 dependencies = [
  "cfg-if",
  "once_cell",
@@ -4340,9 +4349,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-futures"
-version = "0.4.61"
+version = "0.4.64"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fe88540d1c934c4ec8e6db0afa536876c5441289d7f9f9123d4f065ac1250a6b"
+checksum = "e9c5522b3a28661442748e09d40924dfb9ca614b21c00d3fd135720e48b67db8"
 dependencies = [
  "cfg-if",
  "futures-util",
@@ -4354,9 +4363,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro"
-version = "0.2.111"
+version = "0.2.114"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "19e638317c08b21663aed4d2b9a2091450548954695ff4efa75bff5fa546b3b1"
+checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6"
 dependencies = [
  "quote",
  "wasm-bindgen-macro-support",
@@ -4364,9 +4373,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro-support"
-version = "0.2.111"
+version = "0.2.114"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2c64760850114d03d5f65457e96fc988f11f01d38fbaa51b254e4ab5809102af"
+checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3"
 dependencies = [
  "bumpalo",
  "proc-macro2",
@@ -4377,9 +4386,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-shared"
-version = "0.2.111"
+version = "0.2.114"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "60eecd4fe26177cfa3339eb00b4a36445889ba3ad37080c2429879718e20ca41"
+checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16"
 dependencies = [
  "unicode-ident",
 ]
@@ -4420,9 +4429,9 @@ dependencies = [
 
 [[package]]
 name = "web-sys"
-version = "0.3.88"
+version = "0.3.91"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9d6bb20ed2d9572df8584f6dc81d68a41a625cadc6f15999d649a70ce7e3597a"
+checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9"
 dependencies = [
  "js-sys",
  "wasm-bindgen",
@@ -4869,18 +4878,18 @@ dependencies = [
 
 [[package]]
 name = "zerocopy"
-version = "0.8.39"
+version = "0.8.42"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "db6d35d663eadb6c932438e763b262fe1a70987f9ae936e60158176d710cae4a"
+checksum = "f2578b716f8a7a858b7f02d5bd870c14bf4ddbbcf3a4c05414ba6503640505e3"
 dependencies = [
  "zerocopy-derive",
 ]
 
 [[package]]
 name = "zerocopy-derive"
-version = "0.8.39"
+version = "0.8.42"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517"
+checksum = "7e6cc098ea4d3bd6246687de65af3f920c430e236bee1e3bf2e441463f08a02f"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -4957,9 +4966,9 @@ dependencies = [
 
 [[package]]
 name = "zlib-rs"
-version = "0.6.2"
+version = "0.6.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c745c48e1007337ed136dc99df34128b9faa6ed542d80a1c673cf55a6d7236c8"
+checksum = "3be3d40e40a133f9c916ee3f9f4fa2d9d63435b5fbe1bfc6d9dae0aa0ada1513"
 
 [[package]]
 name = "zmij"
diff --git a/Cargo.toml b/Cargo.toml
index f0305ac..ecd6387 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,33 +11,33 @@ exclude = [
 ]
 
 [dependencies]
-anyhow = "1.0.100"
+anyhow = "1.0"
 async-trait = "0.1"
-arrow = "57.2.0"
+arrow = "57.2"
 log = "0.4"
-arrow-avro = "57.2.0"
-arrow-json = "57.2.0"
-clap = { version = "4.5.54", features = ["cargo", "derive"] }
+arrow-avro = "57.2"
+arrow-json = "57.2"
+clap = { version = "4.6", features = ["cargo", "derive"] }
 chrono = "0.4"
-datafusion = { version = "52.1.0", features = ["avro"] }
-parquet = "57.2.0"
-orc-rust = "0.7"
+datafusion = { version = "52.3", features = ["avro"] }
+parquet = "57.2"
+orc-rust = "0.7.1"
 futures = "0.3"
-tokio = { version = "1.48", features = ["macros", "rt", "sync"] }
-thiserror = "2.0.18"
-csv = "1.4.0"
+tokio = { version = "1.50", features = ["macros", "rt", "sync"] }
+thiserror = "2.0"
+csv = "1.4"
 hashlink = "0.10"
 saphyr = "0.0.6"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"
-rust_xlsxwriter = { version = "0.93", features = ["chrono"] }
+rust_xlsxwriter = { version = "0.94", features = ["chrono"] }
 rustc-literal-escaper = "0.0.7"
-rustyline = "17.0"
-flt = { version = "0.0.2" }
-dirs = { version = "2.0.0", package = "dirs-next" }
+rustyline = "17"
+flt = "0.0.2"
+dirs = { version = "2.0", package = "dirs-next" }
 rand = "0.8"
-reservoir-sampling = "0.5"
-indicatif = "0.17"
+reservoir-sampling = "0.5.1"
+indicatif = "0.18"
 
 [dev-dependencies]
 criterion = "0.5"

From b499ea4f91b1ac9f6711540c6881b40c944a9d19 Mon Sep 17 00:00:00 2001
From: Alistair Israel <aisrael@gmail.com>
Date: Sun, 15 Mar 2026 13:44:40 -0400
Subject: [PATCH 15/18] Add Avro compatibility for Int16 fields in record
 batches

- Introduced functions to check for Int16 fields in schemas and convert them to Int32 for Avro compatibility.
- Implemented a wrapper for record batch readers that casts Int16 columns to Int32.
- Updated the `write_record_batches` function to handle schema adjustments before writing to Avro files.
- Added a test to verify that Int16 values are correctly upcast to Int32 in the written Avro output.
---
 benches/parquet_to_avro.rs |   5 +-
 src/pipeline/avro.rs       | 129 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 132 insertions(+), 2 deletions(-)

diff --git a/benches/parquet_to_avro.rs b/benches/parquet_to_avro.rs
index b6a4bad..2b88d91 100644
--- a/benches/parquet_to_avro.rs
+++ b/benches/parquet_to_avro.rs
@@ -12,10 +12,13 @@ fn parquet_to_avro_benchmark(c: &mut Criterion) {
     let output_path = temp_dir.path().join("output.avro");
     let output = output_path.to_str().expect("Path to string").to_string();
 
+    let input_path =
+        std::env::var("DATU_BENCH_PARQUET_PATH").unwrap_or("fixtures/userdata.parquet".to_string());
+
     c.bench_function("parquet_to_avro_userdata", |b| {
         b.iter(|| {
             let result = Command::new(&datu_path)
-                .args(["convert", "fixtures/userdata.parquet", black_box(&output)])
+                .args(["convert", &input_path, black_box(&output)])
                 .output()
                 .expect("Failed to execute datu");
             assert!(
diff --git a/src/pipeline/avro.rs b/src/pipeline/avro.rs
index 046f660..67b34a0 100644
--- a/src/pipeline/avro.rs
+++ b/src/pipeline/avro.rs
@@ -1,6 +1,12 @@
 use std::io::BufReader;
+use std::sync::Arc;
 
 use arrow::array::RecordBatchReader;
+use arrow::compute;
+use arrow::datatypes::DataType;
+use arrow::datatypes::Field;
+use arrow::datatypes::Schema;
+use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
 use arrow_avro::reader::ReaderBuilder;
 use arrow_avro::writer::AvroWriter;
@@ -16,6 +22,71 @@ use crate::pipeline::WriteArgs;
 use crate::pipeline::batch_write::BatchWriteSink;
 use crate::pipeline::batch_write::write_record_batches_with_sink;
 
+/// Returns true if the schema has any Int16 field (avro writer does not support Int16).
+fn schema_has_int16(schema: &Schema) -> bool {
+    schema
+        .fields()
+        .iter()
+        .any(|f| f.data_type() == &DataType::Int16)
+}
+
+/// Builds a schema suitable for the Avro writer: Int16 fields are replaced with Int32.
+fn schema_for_avro_writer(schema: &Schema) -> SchemaRef {
+    let fields: Vec<Field> = schema
+        .fields()
+        .iter()
+        .map(|f| {
+            let dt = if f.data_type() == &DataType::Int16 {
+                DataType::Int32
+            } else {
+                f.data_type().clone()
+            };
+            Field::new(f.name(), dt, f.is_nullable())
+        })
+        .collect();
+    Arc::new(Schema::new(fields))
+}
+
+/// Casts Int16 columns in the batch to Int32 so the batch matches the Avro-compat schema.
+fn cast_record_batch_for_avro(
+    batch: &RecordBatch,
+    compat_schema: &SchemaRef,
+) -> arrow::error::Result<RecordBatch> {
+    let mut columns: Vec<Arc<dyn arrow::array::Array>> = Vec::with_capacity(batch.num_columns());
+    for (i, field) in compat_schema.fields().iter().enumerate() {
+        let col = batch.column(i);
+        let target_type = field.data_type();
+        let cast_col = if col.data_type() == &DataType::Int16 && target_type == &DataType::Int32 {
+            compute::cast(col.as_ref(), target_type)?
+        } else {
+            col.clone()
+        };
+        columns.push(cast_col);
+    }
+    RecordBatch::try_new((*compat_schema).clone(), columns)
+}
+
+/// Wraps a record batch reader and exposes an Avro-compat schema (Int16 → Int32), casting each batch.
+struct AvroCompatRecordBatchReader<'a> {
+    reader: &'a mut dyn RecordBatchReader,
+    compat_schema: SchemaRef,
+}
+
+impl RecordBatchReader for AvroCompatRecordBatchReader<'_> {
+    fn schema(&self) -> SchemaRef {
+        self.compat_schema.clone()
+    }
+}
+
+impl Iterator for AvroCompatRecordBatchReader<'_> {
+    type Item = arrow::error::Result<RecordBatch>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let batch = self.reader.next()?;
+        Some(batch.and_then(|b| cast_record_batch_for_avro(&b, &self.compat_schema)))
+    }
+}
+
 /// Pipeline step that reads an Avro file and produces a record batch reader.
 pub struct ReadAvroStep {
     pub args: ReadArgs,
@@ -145,8 +216,19 @@ pub struct WriteAvroStep {
 pub struct WriteAvroResult {}
 
 /// Write record batches from a reader to an Avro file.
+/// Int16 columns are upcast to Int32 so the arrow-avro writer can write them.
 pub fn write_record_batches(path: &str, reader: &mut dyn RecordBatchReader) -> Result<()> {
-    write_record_batches_with_sink(path, reader, AvroSink::new)
+    let schema = reader.schema();
+    if schema_has_int16(schema.as_ref()) {
+        let compat_schema = schema_for_avro_writer(schema.as_ref());
+        let mut wrapper = AvroCompatRecordBatchReader {
+            reader,
+            compat_schema,
+        };
+        write_record_batches_with_sink(path, &mut wrapper, AvroSink::new)
+    } else {
+        write_record_batches_with_sink(path, reader, AvroSink::new)
+    }
 }
 
 struct AvroSink {
@@ -186,9 +268,54 @@ impl Step for WriteAvroStep {
 
 #[cfg(test)]
 mod tests {
+    use std::io::BufReader;
+    use std::sync::Arc;
+
+    use arrow::array::Int16Array;
+    use arrow::datatypes::DataType;
+    use arrow::datatypes::Field;
+    use arrow::datatypes::Schema;
+
     use super::*;
     use crate::Error;
     use crate::pipeline::ReadArgs;
+    use crate::pipeline::VecRecordBatchReader;
+
+    #[test]
+    fn test_write_avro_int16_upcast_to_int32() {
+        let schema = Schema::new(vec![Field::new("x", DataType::Int16, false)]);
+        let batch = RecordBatch::try_new(
+            Arc::new(schema),
+            vec![Arc::new(Int16Array::from(vec![1_i16, 2, 3]))],
+        )
+        .unwrap();
+        let temp_dir = tempfile::tempdir().unwrap();
+        let path = temp_dir.path().join("out.avro");
+        let mut reader = VecRecordBatchReader::new(vec![batch]);
+        write_record_batches(path.to_str().unwrap(), &mut reader).unwrap();
+        let file = std::fs::File::open(&path).unwrap();
+        let avro_reader = ReaderBuilder::new()
+            .build(BufReader::new(file))
+            .expect("written file should be valid Avro");
+        let batches: Vec<_> = avro_reader.collect();
+        assert_eq!(batches.len(), 1, "expected one batch");
+        let batch = batches.into_iter().next().unwrap().unwrap();
+        assert_eq!(batch.num_rows(), 3);
+        assert_eq!(batch.num_columns(), 1);
+        let col = batch.column(0);
+        assert_eq!(
+            col.data_type(),
+            &DataType::Int32,
+            "Avro int is read as Int32"
+        );
+        let ints = col
+            .as_any()
+            .downcast_ref::<arrow::array::Int32Array>()
+            .unwrap();
+        assert_eq!(ints.value(0), 1);
+        assert_eq!(ints.value(1), 2);
+        assert_eq!(ints.value(2), 3);
+    }
 
     #[test]
     fn test_read_avro() {

From b4abc0c674ab29109a997f42fe437ad5ed0ada00 Mon Sep 17 00:00:00 2001
From: Alistair Israel <aisrael@gmail.com>
Date: Sun, 15 Mar 2026 14:02:27 -0400
Subject: [PATCH 16/18] Added .clocignore

---
 .clocignore | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 .clocignore

diff --git a/.clocignore b/.clocignore
new file mode 100644
index 0000000..9eed7b0
--- /dev/null
+++ b/.clocignore
@@ -0,0 +1,2 @@
+fixtures/
+target/

From 2810db232ab5f0cc54b0fb62286adffd2adc4171 Mon Sep 17 00:00:00 2001
From: Alistair Israel <aisrael@gmail.com>
Date: Sun, 15 Mar 2026 14:03:09 -0400
Subject: [PATCH 17/18] Use eyre instead of anyhow

---
 Cargo.lock                       | 18 +++++++++++++++++-
 Cargo.toml                       |  2 +-
 benches/parquet_to_avro.rs       | 32 ++++++++++++++++++++++++++++----
 src/bin/datu/commands/convert.rs |  2 +-
 src/bin/datu/commands/count.rs   |  2 +-
 src/bin/datu/commands/head.rs    |  4 ++--
 src/bin/datu/commands/sample.rs  |  4 ++--
 src/bin/datu/commands/schema.rs  |  2 +-
 src/bin/datu/commands/tail.rs    |  4 ++--
 src/bin/datu/main.rs             |  2 +-
 src/bin/datu/repl.rs             |  2 +-
 src/pipeline.rs                  | 10 ++++------
 src/pipeline/schema.rs           |  6 +++---
 13 files changed, 64 insertions(+), 26 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 376f122..5fd2eb1 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1791,7 +1791,6 @@ dependencies = [
 name = "datu"
 version = "0.3.1"
 dependencies = [
- "anyhow",
  "arrow",
  "arrow-avro",
  "arrow-json",
@@ -1804,6 +1803,7 @@ dependencies = [
  "datafusion",
  "dirs-next",
  "expectrl",
+ "eyre",
  "flt",
  "futures",
  "gherkin",
@@ -1976,6 +1976,16 @@ dependencies = [
  "regex",
 ]
 
+[[package]]
+name = "eyre"
+version = "0.6.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7cd915d99f24784cdc19fd37ef22b97e3ff0ae756c7e492e9fbfe897d61e2aec"
+dependencies = [
+ "indenter",
+ "once_cell",
+]
+
 [[package]]
 name = "fallible-streaming-iterator"
 version = "0.1.9"
@@ -2494,6 +2504,12 @@ dependencies = [
  "winapi-util",
 ]
 
+[[package]]
+name = "indenter"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "964de6e86d545b246d84badc0fef527924ace5134f30641c203ef52ba83f58d5"
+
 [[package]]
 name = "indexmap"
 version = "2.13.0"
diff --git a/Cargo.toml b/Cargo.toml
index ecd6387..c85ccc1 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,7 +11,7 @@ exclude = [
 ]
 
 [dependencies]
-anyhow = "1.0"
+eyre = "0.6"
 async-trait = "0.1"
 arrow = "57.2"
 log = "0.4"
diff --git a/benches/parquet_to_avro.rs b/benches/parquet_to_avro.rs
index 2b88d91..4ce0ea8 100644
--- a/benches/parquet_to_avro.rs
+++ b/benches/parquet_to_avro.rs
@@ -4,6 +4,20 @@ use criterion::Criterion;
 use criterion::black_box;
 use criterion::criterion_group;
 use criterion::criterion_main;
+use dirs::home_dir;
+
+/// Expand ~ in the path to the home directory
+fn expand_path(path: &str) -> eyre::Result<String> {
+    if path.starts_with("~") {
+        let home = home_dir().ok_or(eyre::eyre!("Failed to get home directory"))?;
+        Ok(home
+            .join(path.trim_start_matches("~"))
+            .to_string_lossy()
+            .to_string())
+    } else {
+        Ok(path.to_string())
+    }
+}
 
 fn parquet_to_avro_benchmark(c: &mut Criterion) {
     let datu_path = std::env::var("CARGO_BIN_EXE_datu")
@@ -12,10 +26,20 @@ fn parquet_to_avro_benchmark(c: &mut Criterion) {
     let output_path = temp_dir.path().join("output.avro");
     let output = output_path.to_str().expect("Path to string").to_string();
 
-    let input_path =
-        std::env::var("DATU_BENCH_PARQUET_PATH").unwrap_or("fixtures/userdata.parquet".to_string());
-
-    c.bench_function("parquet_to_avro_userdata", |b| {
+    let mut id = String::from("parquet_to_avro");
+    let input_path = if let Ok(value) = std::env::var("DATU_BENCH_PARQUET_PATH") {
+        let path = expand_path(&value).unwrap_or_else(|e| {
+            panic!("Failed to expand path: {}", e);
+        });
+        if !std::path::Path::new(&path).exists() {
+            panic!("DATU_BENCH_PARQUET_PATH does not exist: {}", value);
+        }
+        id = format!("parquet_to_avro ({})", value);
+        path
+    } else {
+        "fixtures/userdata.parquet".to_string()
+    };
+    c.bench_function(id.as_str(), |b| {
         b.iter(|| {
             let result = Command::new(&datu_path)
                 .args(["convert", &input_path, black_box(&output)])
diff --git a/src/bin/datu/commands/convert.rs b/src/bin/datu/commands/convert.rs
index 60bc839..e28feaa 100644
--- a/src/bin/datu/commands/convert.rs
+++ b/src/bin/datu/commands/convert.rs
@@ -114,7 +114,7 @@ impl RecordBatchReader for ProgressRecordBatchReader {
 }
 
 /// Converts between file formats; reads from input and writes to output, optionally selecting columns.
-pub async fn convert(args: ConvertArgs) -> anyhow::Result<()> {
+pub async fn convert(args: ConvertArgs) -> eyre::Result<()> {
     let input_file_type = resolve_file_type(args.input, &args.input_path)?;
     let output_file_type = resolve_file_type(args.output, &args.output_path)?;
 
diff --git a/src/bin/datu/commands/count.rs b/src/bin/datu/commands/count.rs
index 48a8a64..55da118 100644
--- a/src/bin/datu/commands/count.rs
+++ b/src/bin/datu/commands/count.rs
@@ -6,7 +6,7 @@ use datu::resolve_file_type;
 
 /// The `datu count` command. Uses metadata for Parquet and ORC (no data read);
 /// streams batches for Avro and CSV.
-pub async fn count(args: CountArgs) -> anyhow::Result<()> {
+pub async fn count(args: CountArgs) -> eyre::Result<()> {
     let file_type = resolve_file_type(args.input, &args.input_path)?;
     let total = count_rows(&args.input_path, file_type, args.input_headers)?;
     println!("{total}");
diff --git a/src/bin/datu/commands/head.rs b/src/bin/datu/commands/head.rs
index 445ad5b..a520f1d 100644
--- a/src/bin/datu/commands/head.rs
+++ b/src/bin/datu/commands/head.rs
@@ -1,11 +1,11 @@
-use anyhow::Result;
-use anyhow::bail;
 use datu::FileType;
 use datu::cli::HeadsOrTails;
 use datu::pipeline::build_reader;
 use datu::pipeline::display::apply_select_and_display;
 use datu::pipeline::record_batch_filter::parse_select_step;
 use datu::resolve_file_type;
+use eyre::Result;
+use eyre::bail;
 
 /// head command implementation: print the first N lines of an Avro, CSV, Parquet, or ORC file.
 pub async fn head(args: HeadsOrTails) -> Result<()> {
diff --git a/src/bin/datu/commands/sample.rs b/src/bin/datu/commands/sample.rs
index 292e04e..6f37ee2 100644
--- a/src/bin/datu/commands/sample.rs
+++ b/src/bin/datu/commands/sample.rs
@@ -1,5 +1,3 @@
-use anyhow::Result;
-use anyhow::bail;
 use datu::FileType;
 use datu::cli::HeadsOrTails;
 use datu::get_total_rows_result;
@@ -14,6 +12,8 @@ use datu::pipeline::record_batch_filter::parse_select_step;
 use datu::pipeline::reservoir_sample_from_reader;
 use datu::pipeline::sample_from_reader;
 use datu::resolve_file_type;
+use eyre::Result;
+use eyre::bail;
 
 /// sample command implementation: print N random rows from an Avro, CSV, Parquet, or ORC file.
 pub async fn sample(args: HeadsOrTails) -> Result<()> {
diff --git a/src/bin/datu/commands/schema.rs b/src/bin/datu/commands/schema.rs
index 17a7375..3edbd72 100644
--- a/src/bin/datu/commands/schema.rs
+++ b/src/bin/datu/commands/schema.rs
@@ -1,10 +1,10 @@
 //! `datu schema` - display the schema of a Parquet, Avro, or ORC file
 
-use anyhow::Result;
 use datu::cli::SchemaArgs;
 use datu::pipeline::schema::get_schema_fields;
 use datu::pipeline::schema::print_schema_fields;
 use datu::resolve_file_type;
+use eyre::Result;
 
 /// The `datu schema` command
 pub async fn schema(args: SchemaArgs) -> Result<()> {
diff --git a/src/bin/datu/commands/tail.rs b/src/bin/datu/commands/tail.rs
index 4ceea8e..9f92d02 100644
--- a/src/bin/datu/commands/tail.rs
+++ b/src/bin/datu/commands/tail.rs
@@ -1,5 +1,3 @@
-use anyhow::Result;
-use anyhow::bail;
 use datu::Error;
 use datu::FileType;
 use datu::cli::HeadsOrTails;
@@ -13,6 +11,8 @@ use datu::pipeline::read_to_batches;
 use datu::pipeline::record_batch_filter::parse_select_step;
 use datu::pipeline::tail_batches;
 use datu::resolve_file_type;
+use eyre::Result;
+use eyre::bail;
 
 /// tail command implementation: print the last N lines of an Avro, CSV, Parquet, or ORC file.
 pub async fn tail(args: HeadsOrTails) -> Result<()> {
diff --git a/src/bin/datu/main.rs b/src/bin/datu/main.rs
index e436f9b..55da0ea 100644
--- a/src/bin/datu/main.rs
+++ b/src/bin/datu/main.rs
@@ -45,7 +45,7 @@ pub enum Command {
 
 /// Application entry point; parses CLI args and dispatches to the appropriate command.
 #[tokio::main]
-async fn main() -> anyhow::Result<()> {
+async fn main() -> eyre::Result<()> {
     let cli = Cli::parse();
     match cli.command {
         None => repl::run().await,
diff --git a/src/bin/datu/repl.rs b/src/bin/datu/repl.rs
index 495251b..6cf1874 100644
--- a/src/bin/datu/repl.rs
+++ b/src/bin/datu/repl.rs
@@ -2,8 +2,8 @@
 
 use std::path::PathBuf;
 
-use anyhow::Result;
 use datu::cli::repl::ReplPipelineBuilder;
+use eyre::Result;
 use flt::parser::parse_expr;
 use rustyline::DefaultEditor;
 use rustyline::error::ReadlineError;
diff --git a/src/pipeline.rs b/src/pipeline.rs
index 405b688..7e4104f 100644
--- a/src/pipeline.rs
+++ b/src/pipeline.rs
@@ -228,7 +228,7 @@ pub async fn read_to_batches(
     select: &Option<Vec<String>>,
     limit: Option<usize>,
     csv_has_header: Option<bool>,
-) -> anyhow::Result<Vec<arrow::record_batch::RecordBatch>> {
+) -> eyre::Result<Vec<arrow::record_batch::RecordBatch>> {
     let source = {
         let select = select.clone();
         DataFrameReader::new(input_path, input_file_type, select, limit, csv_has_header)
@@ -237,7 +237,7 @@ pub async fn read_to_batches(
     .await?;
     let reader = DataFrameToBatchReader::try_new(source)
         .await
-        .map_err(|e| anyhow::anyhow!("{e}"))?;
+        .map_err(|e| eyre::eyre!("{e}"))?;
     Ok(reader.into_batches())
 }
 
@@ -390,11 +390,9 @@ pub async fn write_batches(
     output_file_type: FileType,
     sparse: bool,
     json_pretty: bool,
-) -> anyhow::Result<()> {
+) -> eyre::Result<()> {
     let ctx = datafusion::execution::context::SessionContext::new();
-    let df = ctx
-        .read_batches(batches)
-        .map_err(|e| anyhow::anyhow!("{e}"))?;
+    let df = ctx.read_batches(batches).map_err(|e| eyre::eyre!("{e}"))?;
 
     let source = crate::pipeline::dataframe::DataFrameSource::new(df);
     let writer_step = crate::pipeline::dataframe::DataFrameWriter::new(
diff --git a/src/pipeline/schema.rs b/src/pipeline/schema.rs
index 00cbb6c..31ff698 100644
--- a/src/pipeline/schema.rs
+++ b/src/pipeline/schema.rs
@@ -7,13 +7,13 @@ use std::fs::File;
 use std::io::BufReader;
 use std::sync::Arc;
 
-use anyhow::Result;
-use anyhow::bail;
 use arrow::array::RecordBatchReader;
 use arrow::datatypes::Schema as ArrowSchema;
 use arrow_avro::reader::ReaderBuilder;
 use datafusion::execution::context::SessionContext;
 use datafusion::prelude::CsvReadOptions;
+use eyre::Result;
+use eyre::bail;
 use orc_rust::arrow_reader::ArrowReaderBuilder;
 use parquet::basic::ConvertedType;
 use parquet::file::metadata::ParquetMetaDataReader;
@@ -202,7 +202,7 @@ fn get_schema_fields_csv(path: &str, has_header: bool) -> Result<Vec<SchemaField
         let handle = tokio::runtime::Handle::current();
         handle.block_on(ctx.read_csv(path, CsvReadOptions::new().has_header(has_header)))
     })
-    .map_err(|e| anyhow::anyhow!("{e}"))?;
+    .map_err(|e| eyre::eyre!("{e}"))?;
     let schema = df.schema();
     Ok(schema_fields_from_arrow(schema.as_ref()))
 }

From a506055653485e4c2ebdf20c3856d129cec047ab Mon Sep 17 00:00:00 2001
From: Alistair Israel <aisrael@gmail.com>
Date: Sun, 15 Mar 2026 14:14:26 -0400
Subject: [PATCH 18/18] 0.3.2

---
 CHANGELOG.md             | 39 +++++++++++++++++++++++++++++++++++----
 Cargo.lock               |  2 +-
 Cargo.toml               |  2 +-
 features/cli/cli.feature |  2 +-
 4 files changed, 38 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 604acb7..4c2c2b9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,8 +1,41 @@
 # datu Version Notes
 
-## v0.3.1
+## v0.3.2
+
+### Highlights
+
+- **Convert command** now uses the DataFrame API for streamlined file processing; internal `resolve_input_file_type` renamed to `resolve_file_type`.
+- **Avro**: Added compatibility for Int16 fields in record batches.
+- **Error handling**: Replaced `anyhow` with `eyre`.
+- **REPL**: Refactored evaluation to use `exec_*` naming and improved test structure; feature files updated with REPL equivalents.
+- **Docs**: README updates for JSON support and REPL usage.
+- **CI**: Switched to `actions-rust-lang/setup-rust-toolchain@v1`, Rust 1.94.0, no nightly.
+
+### Improvements
+
+- **Convert**
+  - Implemented DataFrame API support for file processing in the convert command.
+
+- **Avro pipeline**
+  - Int16 fields in record batches are now handled correctly when writing Avro.
 
-Compare range: `0.3.0...0.3.1`
+- **REPL**
+  - Evaluation methods refactored to `exec_*` naming convention.
+  - Test structure and REPL feature scenarios updated.
+
+- **Project**
+  - Added `.clocignore`.
+  - Dependency and version updates in Cargo files.
+  - CI workflow and toolchain adjustments.
+
+### Changelog Stats
+
+- 16 commits
+- 28 files changed
+- 754 insertions
+- 610 deletions
+
+## v0.3.1
 
 ### Highlights
 
@@ -42,8 +75,6 @@ Compare range: `0.3.0...0.3.1`
 
 ## v0.3.0
 
-Compare range: `0.2.4...0.3.0`
-
 ### Highlights
 
 - Added an interactive REPL mode: running `datu` without a subcommand now starts an interactive pipeline shell.
diff --git a/Cargo.lock b/Cargo.lock
index 5fd2eb1..2ea77e3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1789,7 +1789,7 @@ dependencies = [
 
 [[package]]
 name = "datu"
-version = "0.3.1"
+version = "0.3.2"
 dependencies = [
  "arrow",
  "arrow-avro",
diff --git a/Cargo.toml b/Cargo.toml
index c85ccc1..5e154fa 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "datu"
-version = "0.3.1"
+version = "0.3.2"
 edition = "2024"
 description = "datu - a data file utility"
 license = "MIT"
diff --git a/features/cli/cli.feature b/features/cli/cli.feature
index a76f54e..153205a 100644
--- a/features/cli/cli.feature
+++ b/features/cli/cli.feature
@@ -2,7 +2,7 @@ Feature: CLI
 
   Scenario: Print version
     When I run `datu --version`
-    Then the first line of the output should be: datu 0.3.1
+    Then the first line of the output should be: datu 0.3.2
 
   Scenario: Print help with help subcommand
     When I run `datu help`