From e8f6f0f0e2e86bfa130d7f0bd257b6e0e4b3759c Mon Sep 17 00:00:00 2001
From: Jeremy Dyer <jdye64@gmail.com>
Date: Fri, 6 Mar 2026 15:03:03 -0500
Subject: [PATCH 1/4] Add ingest batch and ingest inprocess commands to the new
 'nr' cli command

---
 nemo_retriever/README.md                      |  26 +-
 nemo_retriever/chart_stage_config.yaml        |   4 +-
 nemo_retriever/embedding_stage_config.yaml    |   2 +-
 nemo_retriever/harness/HANDOFF.md             |  22 +-
 nemo_retriever/infographic_stage_config.yaml  |   4 +-
 nemo_retriever/pdf_stage_config.yaml          |   4 +-
 nemo_retriever/pyproject.toml                 |   2 +-
 .../src/nemo_retriever/adapters/cli/main.py   |  12 +-
 .../src/nemo_retriever/audio/cli.py           |   2 +-
 .../src/nemo_retriever/audio/stage.py         |   4 +-
 .../nemo_retriever/examples/batch_pipeline.py |   2 +-
 .../examples/online_pipeline.py               |   2 +-
 .../src/nemo_retriever/html/__main__.py       |   6 +-
 .../src/nemo_retriever/ingest-config.yaml     |  22 +-
 .../ingest_modes/inprocess_cli.py             | 291 ++++++++++++++++++
 .../local/stages/stage1_pdf_extraction.py     |   4 +-
 .../local/stages/stage5_text_embeddings.py    |   2 +-
 .../local/stages/stage6_vdb_upload.py         |   2 +-
 .../local/stages/stage7_vdb_query.py          |   2 +-
 .../stages/stage999_post_mortem_analysis.py   |   4 +-
 .../src/nemo_retriever/txt/__main__.py        |   6 +-
 .../src/nemo_retriever/vector_store/stage.py  |   2 +-
 nemo_retriever/table_stage_config.yaml        |   4 +-
 nemo_retriever/tests/test_audio_stage.py      |   2 +-
 24 files changed, 366 insertions(+), 67 deletions(-)
 create mode 100644 nemo_retriever/src/nemo_retriever/ingest_modes/inprocess_cli.py
diff --git a/nemo_retriever/README.md b/nemo_retriever/README.md
index d51d20bdf..c48a56b11 100644
--- a/nemo_retriever/README.md
+++ b/nemo_retriever/README.md
@@ -16,8 +16,8 @@ From the repo root:
 
 ```bash
 cd /path/to/nv-ingest
-uv venv .retriever
-source .retriever/bin/activate
+uv venv .nr
+source .nr/bin/activate
 uv pip install -e ./nemo_retriever
 ```
 
@@ -52,7 +52,7 @@ uv run python nemo_retriever/src/nemo_retriever/examples/batch_pipeline.py /path
 
 Pass the directory that contains your PDFs as the first argument (`input-dir`). For recall evaluation, the pipeline uses `bo767_query_gt.csv` in the current directory by default; override with `--query-csv <path>`. For document-level recall, use `--recall-match-mode pdf_only` with `query,expected_pdf` data. Recall is skipped if the query file does not exist. By default, per-query details (query, gold, hits) are printed; use `--no-recall-details` to print only the missed-gold summary and recall metrics. To use an existing Ray cluster, pass `--ray-address auto`. If OCR fails with a missing `libcudart.so.13`, install the CUDA 13 runtime and set `LD_LIBRARY_PATH` as shown above.
 
-For **HTML** or **text** ingestion, use `--input-type html` or `--input-type txt` with the same examples (e.g. `batch_pipeline.py <dir> --input-type html`). HTML files are converted to markdown via markitdown, then chunked with the same tokenizer as .txt. Staged CLI: `retriever html run --input-dir <dir>` writes `*.html_extraction.json`; then `retriever local stage5 run --input-dir <dir> --pattern "*.html_extraction.json"` and `retriever local stage6 run --input-dir <dir>`.
+For **HTML** or **text** ingestion, use `--input-type html` or `--input-type txt` with the same examples (e.g. `batch_pipeline.py <dir> --input-type html`). HTML files are converted to markdown via markitdown, then chunked with the same tokenizer as .txt. Staged CLI: `nr html run --input-dir <dir>` writes `*.html_extraction.json`; then `nr local stage5 run --input-dir <dir> --pattern "*.html_extraction.json"` and `nr local stage6 run --input-dir <dir>`.
 
 ## Harness (run, sweep, nightly)
 
@@ -61,7 +61,7 @@ For **HTML** or **text** ingestion, use `--input-type html` or `--input-type txt
 - Config files:
   - `nemo_retriever/harness/test_configs.yaml`
   - `nemo_retriever/harness/nightly_config.yaml`
-- CLI entrypoint is nested under `retriever harness`.
+- CLI entrypoint is nested under `nr harness`.
 - First pass is LanceDB-only and enforces recall-required pass/fail by default.
 - Single-run artifact directories default to `<dataset>_<timestamp>`.
 - Dataset-specific recall adapters are supported via config:
@@ -77,37 +77,37 @@ For **HTML** or **text** ingestion, use `--input-type html` or `--input-type txt
 
 ```bash
 # Dataset preset from test_configs.yaml (recall-required example)
-retriever harness run --dataset jp20 --preset single_gpu
+nr harness run --dataset jp20 --preset single_gpu
 
 # Direct dataset path
-retriever harness run --dataset /datasets/nv-ingest/bo767 --preset single_gpu
+nr harness run --dataset /datasets/nv-ingest/bo767 --preset single_gpu
 
 # Add repeatable run or session tags for later review
-retriever harness run --dataset jp20 --preset single_gpu --tag nightly --tag candidate
+nr harness run --dataset jp20 --preset single_gpu --tag nightly --tag candidate
 ```
 
 ### Sweep runs (explicit runs list)
 
 ```bash
-retriever harness sweep --runs-config nemo_retriever/harness/nightly_config.yaml
+nr harness sweep --runs-config nemo_retriever/harness/nightly_config.yaml
 ```
 
 ### Nightly session
 
 ```bash
-retriever harness nightly --runs-config nemo_retriever/harness/nightly_config.yaml
-retriever harness nightly --dry-run
-retriever harness nightly --runs-config nemo_retriever/harness/nightly_config.yaml --tag nightly
+nr harness nightly --runs-config nemo_retriever/harness/nightly_config.yaml
+nr harness nightly --dry-run
+nr harness nightly --runs-config nemo_retriever/harness/nightly_config.yaml --tag nightly
 ```
 
 ### Session inspection
 
 ```bash
 # Print a compact table from a completed sweep/nightly session
-retriever harness summary nemo_retriever/artifacts/nightly_20260305_010203_UTC
+nr harness summary nemo_retriever/artifacts/nightly_20260305_010203_UTC
 
 # Compare two session summaries by run name
-retriever harness compare \
+nr harness compare \
   nemo_retriever/artifacts/nightly_20260305_010203_UTC \
   nemo_retriever/artifacts/nightly_20260306_010204_UTC
 ```
diff --git a/nemo_retriever/chart_stage_config.yaml b/nemo_retriever/chart_stage_config.yaml
index fada15b68..00abf3c51 100644
--- a/nemo_retriever/chart_stage_config.yaml
+++ b/nemo_retriever/chart_stage_config.yaml
@@ -1,8 +1,8 @@
 # Example config for chart extraction.
 #
 # Intended usage (once the chart stage CLI is wired up similarly to table stage):
-#   - `retriever chart stage run --config <this.yaml> --input <primitives.parquet>`
-#   - `retriever local stage4 run --config <this.yaml> --input <primitives.parquet>`
+#   - `nr chart stage run --config <this.yaml> --input <primitives.parquet>`
+#   - `nr local stage4 run --config <this.yaml> --input <primitives.parquet>`
 #
 # This YAML is parsed into `nv_ingest_api.internal.schemas.extract.extract_chart_schema.ChartExtractorSchema`
 # via `nemo_retriever.chart.config.load_chart_extractor_schema_from_dict`.
diff --git a/nemo_retriever/embedding_stage_config.yaml b/nemo_retriever/embedding_stage_config.yaml
index 46c45d170..df9de3567 100644
--- a/nemo_retriever/embedding_stage_config.yaml
+++ b/nemo_retriever/embedding_stage_config.yaml
@@ -11,7 +11,7 @@
 api_key: ""  # e.g. $NGC_API_KEY or $NVIDIA_API_KEY
 
 # Embedding service settings
-# If set to null/empty, `retriever local stage5` will fall back to local HF embeddings
+# If set to null/empty, `nr local stage5` will fall back to local HF embeddings
 # via `nemo_retriever.model.local.llama_nemotron_embed_1b_v2_embedder`.
 embedding_nim_endpoint: null
 # embedding_nim_endpoint: "http://localhost:8012/v1"
diff --git a/nemo_retriever/harness/HANDOFF.md b/nemo_retriever/harness/HANDOFF.md
index e724307b1..ac992ad79 100644
--- a/nemo_retriever/harness/HANDOFF.md
+++ b/nemo_retriever/harness/HANDOFF.md
@@ -49,36 +49,36 @@ From repo root:
 
 ```bash
 source ~/setup_env.sh
-source .retriever/bin/activate
+source .nr/bin/activate
 uv pip install -e ./nemo_retriever
 ```
 
 Single run:
 
 ```bash
-retriever harness run --dataset jp20 --preset single_gpu
-retriever harness run --dataset jp20 --preset single_gpu --tag nightly --tag candidate
+nr harness run --dataset jp20 --preset single_gpu
+nr harness run --dataset jp20 --preset single_gpu --tag nightly --tag candidate
 ```
 
 Sweep:
 
 ```bash
-retriever harness sweep --runs-config nemo_retriever/harness/nightly_config.yaml
+nr harness sweep --runs-config nemo_retriever/harness/nightly_config.yaml
 ```
 
 Nightly:
 
 ```bash
-retriever harness nightly --runs-config nemo_retriever/harness/nightly_config.yaml
-retriever harness nightly --dry-run
-retriever harness nightly --runs-config nemo_retriever/harness/nightly_config.yaml --tag nightly
+nr harness nightly --runs-config nemo_retriever/harness/nightly_config.yaml
+nr harness nightly --dry-run
+nr harness nightly --runs-config nemo_retriever/harness/nightly_config.yaml --tag nightly
 ```
 
 Session inspection:
 
 ```bash
-retriever harness summary nemo_retriever/artifacts/nightly_20260305_010203_UTC
-retriever harness compare \
+nr harness summary nemo_retriever/artifacts/nightly_20260305_010203_UTC
+nr harness compare \
   nemo_retriever/artifacts/nightly_20260305_010203_UTC \
   nemo_retriever/artifacts/nightly_20260306_010204_UTC
 ```
@@ -148,9 +148,9 @@ Notes:
   - `financebench` now defaults to `data/financebench_train.json` with recall enabled.
 - Session UX improvements:
   - Runs, sweeps, and nightly sessions accept repeatable `--tag` values persisted into artifacts.
-  - `retriever harness summary` prints a compact table from `session_summary.json`.
+  - `nr harness summary` prints a compact table from `session_summary.json`.
 - Comparison utility:
-  - `retriever harness compare` prints pages/sec and recall deltas by run name for two sessions.
+  - `nr harness compare` prints pages/sec and recall deltas by run name for two sessions.
 
 ## Current Validation Status
 
diff --git a/nemo_retriever/infographic_stage_config.yaml b/nemo_retriever/infographic_stage_config.yaml
index 67ac68546..d945aa586 100644
--- a/nemo_retriever/infographic_stage_config.yaml
+++ b/nemo_retriever/infographic_stage_config.yaml
@@ -1,6 +1,6 @@
 # Example config for:
-#   - `retriever infographic stage run --config <this.yaml> --input <primitives.parquet>`
-#   - `retriever local stage2 run --config <this.yaml> --input <primitives.parquet>`
+#   - `nr infographic stage run --config <this.yaml> --input <primitives.parquet>`
+#   - `nr local stage2 run --config <this.yaml> --input <primitives.parquet>`
 #
 # This YAML is parsed into `nv_ingest_api.internal.schemas.extract.extract_infographic_schema.InfographicExtractorSchema`
 # via `nemo_retriever.infographic.config.load_infographic_extractor_schema_from_dict`.
diff --git a/nemo_retriever/pdf_stage_config.yaml b/nemo_retriever/pdf_stage_config.yaml
index 33126ba61..7e9a7352e 100644
--- a/nemo_retriever/pdf_stage_config.yaml
+++ b/nemo_retriever/pdf_stage_config.yaml
@@ -1,11 +1,11 @@
-# Example config for: `retriever pdf stage page-elements --config <this.yaml>`
+# Example config for: `nr pdf stage page-elements --config <this.yaml>`
 #
 # CLI override rule:
 # - If you pass an option explicitly on the CLI, it wins.
 # - Otherwise the value from this YAML file is used.
 #
 # You can run repeatedly:
-#   retriever pdf stage page-elements --config nemo_retriever/pdf_stage_config.yaml
+#   nr pdf stage page-elements --config nemo_retriever/pdf_stage_config.yaml
 #
 
 # Directory containing PDFs (scanned recursively for *.pdf)
diff --git a/nemo_retriever/pyproject.toml b/nemo_retriever/pyproject.toml
index b9d1ac476..54de81697 100644
--- a/nemo_retriever/pyproject.toml
+++ b/nemo_retriever/pyproject.toml
@@ -78,7 +78,7 @@ dev = [
 ]
 
 [project.scripts]
-retriever = "nemo_retriever.__main__:main"
+nr = "nemo_retriever.__main__:main"
 
 [tool.setuptools.dynamic]
 version = {attr = "nemo_retriever.version.get_build_version"}
diff --git a/nemo_retriever/src/nemo_retriever/adapters/cli/main.py b/nemo_retriever/src/nemo_retriever/adapters/cli/main.py
index 32d8012de..72ec2fe96 100644
--- a/nemo_retriever/src/nemo_retriever/adapters/cli/main.py
+++ b/nemo_retriever/src/nemo_retriever/adapters/cli/main.py
@@ -7,12 +7,14 @@
 import typer
 
 from nemo_retriever.audio import app as audio_app
+from nemo_retriever.examples.batch_pipeline import app as batch_app
 from nemo_retriever.utils.benchmark import app as benchmark_app
 from nemo_retriever.chart import app as chart_app
 from nemo_retriever.utils.compare import app as compare_app
 from nemo_retriever.harness import app as harness_app
 from nemo_retriever.html import __main__ as html_main
 from nemo_retriever.utils.image import app as image_app
+from nemo_retriever.ingest_modes.inprocess_cli import app as inprocess_app
 from nemo_retriever.local import app as local_app
 from nemo_retriever.online import __main__ as online_main
 from nemo_retriever.pdf import app as pdf_app
@@ -21,7 +23,13 @@
 from nemo_retriever.vector_store import app as vector_store_app
 from nemo_retriever.version import get_version_info
 
-app = typer.Typer(help="Retriever")
+app = typer.Typer(help="NeMo Retriever – RAG ingestion pipeline CLI.")
+
+ingest_app = typer.Typer(help="Run ingestion pipelines (batch or in-process).")
+ingest_app.add_typer(batch_app, name="batch")
+ingest_app.add_typer(inprocess_app, name="inprocess")
+app.add_typer(ingest_app, name="ingest")
+
 app.add_typer(audio_app, name="audio")
 app.add_typer(image_app, name="image")
 app.add_typer(pdf_app, name="pdf")
@@ -54,7 +62,7 @@ def _callback(
     version: bool = typer.Option(
         False,
         "--version",
-        help="Show retriever version metadata and exit.",
+        help="Show nr version metadata and exit.",
         callback=_version_callback,
         is_eager=True,
     )
diff --git a/nemo_retriever/src/nemo_retriever/audio/cli.py b/nemo_retriever/src/nemo_retriever/audio/cli.py
index 1dbb6ee6d..af91777e8 100644
--- a/nemo_retriever/src/nemo_retriever/audio/cli.py
+++ b/nemo_retriever/src/nemo_retriever/audio/cli.py
@@ -8,7 +8,7 @@
 This module intentionally contains **no configuration logic**. It simply re-exports the
 `nemo_retriever.audio.stage` Typer application so any arguments provided to:
 
-  `retriever audio ...`
+  `nr audio ...`
 
 are handled exactly the same as the stage commands (e.g. `extract`, `discover`).
 """
diff --git a/nemo_retriever/src/nemo_retriever/audio/stage.py b/nemo_retriever/src/nemo_retriever/audio/stage.py
index 000ae99c9..7284ff1cb 100644
--- a/nemo_retriever/src/nemo_retriever/audio/stage.py
+++ b/nemo_retriever/src/nemo_retriever/audio/stage.py
@@ -5,8 +5,8 @@
 """
 Audio extraction stage: chunk + ASR only, write *.audio_extraction.json sidecars.
 
-Invoked as `retriever audio extract` / `retriever audio discover` (or
-`python -m nemo_retriever.audio extract` / `discover`). Analogous to `retriever pdf stage page-elements`.
+Invoked as `nr audio extract` / `nr audio discover` (or
+`python -m nemo_retriever.audio extract` / `discover`). Analogous to `nr pdf stage page-elements`.
 """
 
 from __future__ import annotations
diff --git a/nemo_retriever/src/nemo_retriever/examples/batch_pipeline.py b/nemo_retriever/src/nemo_retriever/examples/batch_pipeline.py
index 6c8574577..1999c6437 100644
--- a/nemo_retriever/src/nemo_retriever/examples/batch_pipeline.py
+++ b/nemo_retriever/src/nemo_retriever/examples/batch_pipeline.py
@@ -4,7 +4,7 @@
 
 """
 Batch ingestion pipeline with optional recall evaluation.
-Run with: uv run python -m nemo_retriever.examples.batch_pipeline <input-dir>
+Run with: nr batch <input-dir>
 """
 
 import json
diff --git a/nemo_retriever/src/nemo_retriever/examples/online_pipeline.py b/nemo_retriever/src/nemo_retriever/examples/online_pipeline.py
index 9808e29d6..d23a82225 100644
--- a/nemo_retriever/src/nemo_retriever/examples/online_pipeline.py
+++ b/nemo_retriever/src/nemo_retriever/examples/online_pipeline.py
@@ -7,7 +7,7 @@
 
 - Inprocess: runs the full pipeline locally (no server).
 - Online: submits each document to the online ingest REST service (start with
-  `retriever online serve`). Uses the same LanceDB for recall evaluation.
+  `nr online serve`). Uses the same LanceDB for recall evaluation.
 
 Run with:
   uv run python -m nemo_retriever.examples.online_pipeline <input-dir>
diff --git a/nemo_retriever/src/nemo_retriever/html/__main__.py b/nemo_retriever/src/nemo_retriever/html/__main__.py
index 28d4e9a37..748f5f361 100644
--- a/nemo_retriever/src/nemo_retriever/html/__main__.py
+++ b/nemo_retriever/src/nemo_retriever/html/__main__.py
@@ -5,7 +5,7 @@
 """
 CLI for .html extraction: markitdown -> markdown -> tokenizer split, write *.html_extraction.json.
 
-Use with: retriever local stage5 run --pattern "*.html_extraction.json" then stage6.
+Use with: nr local stage5 run --pattern "*.html_extraction.json" then stage6.
 """
 
 from __future__ import annotations
@@ -63,8 +63,8 @@ def run(
     Scan input_dir for *.html, convert to markdown and chunk each, write <stem>.html_extraction.json.
 
     Output JSON has the same primitives-like shape as stage5 input (text, path, page_number, metadata).
-    Then run: retriever local stage5 run --input-dir <dir> --pattern "*.html_extraction.json"
-    and retriever local stage6 run --input-dir <dir>.
+    Then run: nr local stage5 run --input-dir <dir> --pattern "*.html_extraction.json"
+    and nr local stage6 run --input-dir <dir>.
     """
     input_dir = Path(input_dir)
     html_files = sorted(input_dir.glob("*.html"))
diff --git a/nemo_retriever/src/nemo_retriever/ingest-config.yaml b/nemo_retriever/src/nemo_retriever/ingest-config.yaml
index 59fb95303..b35c1b573 100644
--- a/nemo_retriever/src/nemo_retriever/ingest-config.yaml
+++ b/nemo_retriever/src/nemo_retriever/ingest-config.yaml
@@ -1,4 +1,4 @@
-# nv-ingest retriever consolidated configuration
+# nv-ingest nr consolidated configuration
 #
 # This single file replaces the older per-stage YAML configs:
 #   - pdf_stage_config.yaml
@@ -17,7 +17,7 @@
 # - Sections below are consumed by their respective stages.
 
 pdf:
-  # Example config for: `retriever pdf stage page-elements --config <this.yaml>`
+  # Example config for: `nr pdf stage page-elements --config <this.yaml>`
   #
   # Directory containing PDFs (scanned recursively for *.pdf)
   input_dir: /home/local/jdyer/datasets/jp20
@@ -59,14 +59,14 @@ pdf:
   # Optionally limit number of PDFs processed
   limit: null
 
-# Optional config for `retriever txt run` and .extract_txt() API
+# Optional config for `nr txt run` and .extract_txt() API
 txt:
   max_tokens: 512
   overlap_tokens: 0
   tokenizer_model_id: nvidia/llama-3.2-nv-embedqa-1b-v2
   encoding: utf-8
 
-# Optional config for `retriever html run` and .extract_html() API
+# Optional config for `nr html run` and .extract_html() API
 html:
   max_tokens: 512
   overlap_tokens: 0
@@ -94,8 +94,8 @@ audio_asr:
 
 table:
   # Example config for:
-  #   - `retriever table stage run --config <this.yaml> --input <primitives.parquet>`
-  #   - `retriever local stage3 run --config <this.yaml> --input <primitives.parquet>`
+  #   - `nr table stage run --config <this.yaml> --input <primitives.parquet>`
+  #   - `nr local stage3 run --config <this.yaml> --input <primitives.parquet>`
   #
   # This YAML is parsed into `nv_ingest_api.internal.schemas.extract.extract_table_schema.TableExtractorSchema`
   # via `nemo_retriever.table.config.load_table_extractor_schema_from_dict`.
@@ -133,8 +133,8 @@ table:
 
 chart:
   # Example config for:
-  #   - `retriever chart stage run --config <this.yaml> --input <primitives.parquet>`
-  #   - `retriever local stage4 run --config <this.yaml> --input <primitives.parquet>`
+  #   - `nr chart stage run --config <this.yaml> --input <primitives.parquet>`
+  #   - `nr local stage4 run --config <this.yaml> --input <primitives.parquet>`
   #
   # This YAML is parsed into `nv_ingest_api.internal.schemas.extract.extract_chart_schema.ChartExtractorSchema`
   # via `nemo_retriever.chart.config.load_chart_extractor_schema_from_dict`.
@@ -185,8 +185,8 @@ chart:
 
 infographic:
   # Example config for:
-  #   - `retriever infographic stage run --config <this.yaml> --input <primitives.parquet>`
-  #   - `retriever local stage2 run --config <this.yaml> --input <primitives.parquet>`
+  #   - `nr infographic stage run --config <this.yaml> --input <primitives.parquet>`
+  #   - `nr local stage2 run --config <this.yaml> --input <primitives.parquet>`
   #
   # This YAML is parsed into `nv_ingest_api.internal.schemas.extract.extract_infographic_schema.InfographicExtractorSchema`
   # via `nemo_retriever.infographic.config.load_infographic_extractor_schema_from_dict`.
@@ -231,7 +231,7 @@ embedding:
   api_key: ""  # e.g. $NGC_API_KEY or $NVIDIA_API_KEY
 
   # Embedding service settings
-  # If set to null/empty, `retriever local stage5` will fall back to local HF embeddings
+  # If set to null/empty, `nr local stage5` will fall back to local HF embeddings
   # via `nemo_retriever.model.local.llama_nemotron_embed_1b_v2_embedder`.
   embedding_nim_endpoint: null
   # embedding_nim_endpoint: "http://localhost:8012/v1"
diff --git a/nemo_retriever/src/nemo_retriever/ingest_modes/inprocess_cli.py b/nemo_retriever/src/nemo_retriever/ingest_modes/inprocess_cli.py
new file mode 100644
index 000000000..db9f0ef99
--- /dev/null
+++ b/nemo_retriever/src/nemo_retriever/ingest_modes/inprocess_cli.py
@@ -0,0 +1,291 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
+# All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+In-process ingestion pipeline CLI.
+
+Run with: nr inprocess <input-path>
+"""
+
+from __future__ import annotations
+
+import logging
+import time
+from pathlib import Path
+from typing import Optional
+
+import typer
+
+logger = logging.getLogger(__name__)
+
+app = typer.Typer(help="Run the in-process ingestion pipeline (no Ray required).")
+
+LANCEDB_URI = "lancedb"
+LANCEDB_TABLE = "nv-ingest"
+
+
+@app.command()
+def main(
+    input_path: Path = typer.Argument(
+        ...,
+        help="File or directory containing PDFs, .txt, .html, or .doc/.pptx files to ingest.",
+        path_type=Path,
+    ),
+    debug: bool = typer.Option(
+        False,
+        "--debug/--no-debug",
+        help="Enable debug-level logging.",
+    ),
+    input_type: str = typer.Option(
+        "pdf",
+        "--input-type",
+        help="Input format: 'pdf', 'txt', 'html', or 'doc'. "
+        "Use 'txt' for .txt, 'html' for .html (markitdown -> chunks), "
+        "'doc' for .docx/.pptx (converted to PDF via LibreOffice).",
+    ),
+    # -- Extract params --------------------------------------------------------
+    extract_text: bool = typer.Option(
+        True,
+        "--extract-text/--no-extract-text",
+        help="Extract text from PDF pages.",
+    ),
+    extract_tables: bool = typer.Option(
+        True,
+        "--extract-tables/--no-extract-tables",
+        help="Extract tables from PDF pages.",
+    ),
+    extract_charts: bool = typer.Option(
+        True,
+        "--extract-charts/--no-extract-charts",
+        help="Extract charts from PDF pages.",
+    ),
+    extract_infographics: bool = typer.Option(
+        False,
+        "--extract-infographics/--no-extract-infographics",
+        help="Extract infographics from PDF pages.",
+    ),
+    use_table_structure: bool = typer.Option(
+        False,
+        "--use-table-structure",
+        help="Enable the combined table-structure + OCR stage for tables.",
+    ),
+    table_output_format: Optional[str] = typer.Option(
+        None,
+        "--table-output-format",
+        help="Table output format: 'pseudo_markdown' (OCR-only) or 'markdown' (table-structure + OCR).",
+    ),
+    inference_batch_size: Optional[int] = typer.Option(
+        None,
+        "--inference-batch-size",
+        help="Inference batch size for detection models.",
+    ),
+    page_elements_invoke_url: Optional[str] = typer.Option(
+        None,
+        "--page-elements-invoke-url",
+        help="Optional remote endpoint URL for page-elements model inference.",
+    ),
+    ocr_invoke_url: Optional[str] = typer.Option(
+        None,
+        "--ocr-invoke-url",
+        help="Optional remote endpoint URL for OCR model inference.",
+    ),
+    table_structure_invoke_url: Optional[str] = typer.Option(
+        None,
+        "--table-structure-invoke-url",
+        help="Optional remote endpoint URL for table-structure model inference.",
+    ),
+    # -- Embed params ----------------------------------------------------------
+    embed_model_name: str = typer.Option(
+        "nvidia/llama-3.2-nv-embedqa-1b-v2",
+        "--embed-model-name",
+        help="Embedding model name passed to .embed().",
+    ),
+    embed_invoke_url: Optional[str] = typer.Option(
+        None,
+        "--embed-invoke-url",
+        help="Optional remote endpoint URL for embedding model inference.",
+    ),
+    embed_modality: str = typer.Option(
+        "text",
+        "--embed-modality",
+        help="Default embedding modality: 'text', 'image', or 'text_image'.",
+    ),
+    embed_granularity: str = typer.Option(
+        "element",
+        "--embed-granularity",
+        help="Embedding granularity: 'element' (per table/chart/text) or 'page' (per page).",
+    ),
+    text_elements_modality: Optional[str] = typer.Option(
+        None,
+        "--text-elements-modality",
+        help="Embedding modality override for page-text rows. Falls back to --embed-modality.",
+    ),
+    structured_elements_modality: Optional[str] = typer.Option(
+        None,
+        "--structured-elements-modality",
+        help="Embedding modality override for table/chart/infographic rows. Falls back to --embed-modality.",
+    ),
+    # -- VDB upload params -----------------------------------------------------
+    lancedb_uri: str = typer.Option(
+        LANCEDB_URI,
+        "--lancedb-uri",
+        help="LanceDB URI/path for this run.",
+    ),
+    lancedb_table: str = typer.Option(
+        LANCEDB_TABLE,
+        "--lancedb-table",
+        help="LanceDB table name.",
+    ),
+    hybrid: bool = typer.Option(
+        False,
+        "--hybrid/--no-hybrid",
+        help="Enable LanceDB hybrid mode (dense + FTS text).",
+    ),
+    overwrite: bool = typer.Option(
+        True,
+        "--overwrite/--no-overwrite",
+        help="Overwrite existing LanceDB table (False to append).",
+    ),
+    # -- Output ----------------------------------------------------------------
+    output_directory: Optional[str] = typer.Option(
+        None,
+        "--output-dir",
+        help="Optional directory to write per-document JSON results.",
+    ),
+    # -- Execution params ------------------------------------------------------
+    parallel: bool = typer.Option(
+        False,
+        "--parallel/--no-parallel",
+        help="Enable parallel CPU extraction via ProcessPoolExecutor.",
+    ),
+    max_workers: Optional[int] = typer.Option(
+        None,
+        "--max-workers",
+        help="Max workers for parallel CPU extraction (default: CPU count).",
+    ),
+    gpu_devices: Optional[str] = typer.Option(
+        None,
+        "--gpu-devices",
+        help="Comma-separated GPU device IDs for multi-GPU pipelined execution (e.g. '0,1').",
+    ),
+    page_chunk_size: int = typer.Option(
+        32,
+        "--page-chunk-size",
+        help="Number of pages per chunk for parallel processing.",
+    ),
+    show_progress: bool = typer.Option(
+        True,
+        "--show-progress/--no-show-progress",
+        help="Show progress bars during ingestion.",
+    ),
+) -> None:
+    """Run the in-process ingestion pipeline on local documents (no Ray required)."""
+    from nemo_retriever.ingest_modes.inprocess import InProcessIngestor
+    from nemo_retriever.params import (
+        EmbedParams,
+        ExtractParams,
+        HtmlChunkParams,
+        IngestExecuteParams,
+        TextChunkParams,
+        VdbUploadParams,
+    )
+
+    log_level = logging.DEBUG if debug else logging.INFO
+    logging.basicConfig(
+        level=log_level,
+        format="%(asctime)s %(levelname)s %(name)s: %(message)s",
+        force=True,
+    )
+
+    lancedb_uri_abs = str(Path(lancedb_uri).expanduser().resolve())
+
+    input_path = Path(input_path)
+    if input_path.is_file():
+        file_patterns = [str(input_path)]
+    elif input_path.is_dir():
+        ext_map = {
+            "txt": ["*.txt"],
+            "html": ["*.html"],
+            "doc": ["*.docx", "*.pptx"],
+        }
+        exts = ext_map.get(input_type, ["*.pdf"])
+        file_patterns = [str(input_path / e) for e in exts]
+    else:
+        raise typer.BadParameter(f"Path does not exist: {input_path}")
+
+    ingestor = InProcessIngestor()
+    ingestor = ingestor.files(file_patterns)
+
+    if input_type == "txt":
+        ingestor = ingestor.extract_txt(TextChunkParams(max_tokens=512, overlap_tokens=0))
+    elif input_type == "html":
+        ingestor = ingestor.extract_html(HtmlChunkParams(max_tokens=512, overlap_tokens=0))
+    else:
+        extract_kw: dict = dict(
+            extract_text=extract_text,
+            extract_tables=extract_tables,
+            extract_charts=extract_charts,
+            extract_infographics=extract_infographics,
+            use_table_structure=use_table_structure,
+            table_output_format=table_output_format,
+        )
+        if inference_batch_size is not None:
+            extract_kw["inference_batch_size"] = inference_batch_size
+        if page_elements_invoke_url:
+            extract_kw["page_elements_invoke_url"] = page_elements_invoke_url
+        if ocr_invoke_url:
+            extract_kw["ocr_invoke_url"] = ocr_invoke_url
+        if table_structure_invoke_url:
+            extract_kw["table_structure_invoke_url"] = table_structure_invoke_url
+        ingestor = ingestor.extract(ExtractParams(**extract_kw))
+
+    ingestor = ingestor.embed(
+        EmbedParams(
+            model_name=str(embed_model_name),
+            embed_invoke_url=embed_invoke_url,
+            embed_modality=embed_modality,
+            text_elements_modality=text_elements_modality,
+            structured_elements_modality=structured_elements_modality,
+            embed_granularity=embed_granularity,
+        )
+    )
+
+    ingestor = ingestor.vdb_upload(
+        VdbUploadParams(
+            lancedb={
+                "lancedb_uri": lancedb_uri_abs,
+                "table_name": lancedb_table,
+                "overwrite": overwrite,
+                "create_index": True,
+                "hybrid": hybrid,
+            }
+        )
+    )
+
+    if output_directory:
+        ingestor = ingestor.save_to_disk(output_directory=output_directory)
+
+    parsed_gpu_devices = None
+    if gpu_devices:
+        parsed_gpu_devices = [int(d.strip()) for d in gpu_devices.split(",") if d.strip()]
+
+    logger.info("Running in-process ingestion...")
+    start = time.perf_counter()
+
+    ingestor.ingest(
+        params=IngestExecuteParams(
+            show_progress=show_progress,
+            parallel=parallel,
+            max_workers=max_workers,
+            gpu_devices=parsed_gpu_devices,
+            page_chunk_size=page_chunk_size,
+        )
+    )
+
+    elapsed = time.perf_counter() - start
+    logger.info(f"In-process ingestion complete in {elapsed:.2f}s")
+
+
+if __name__ == "__main__":
+    app()
diff --git a/nemo_retriever/src/nemo_retriever/local/stages/stage1_pdf_extraction.py b/nemo_retriever/src/nemo_retriever/local/stages/stage1_pdf_extraction.py
index 06bc08a4a..863bea08c 100644
--- a/nemo_retriever/src/nemo_retriever/local/stages/stage1_pdf_extraction.py
+++ b/nemo_retriever/src/nemo_retriever/local/stages/stage1_pdf_extraction.py
@@ -10,11 +10,11 @@
 This module intentionally contains **no configuration logic**. It simply re-exports the
 `nemo_retriever.pdf.stage` Typer application so any arguments provided to:
 
-  `retriever local stage1 ...`
+  `nr local stage1 ...`
 
 are handled exactly the same as:
 
-  `retriever pdf ...`
+  `nr pdf ...`
 """
 
 from nemo_retriever.pdf.stage import app as app
diff --git a/nemo_retriever/src/nemo_retriever/local/stages/stage5_text_embeddings.py b/nemo_retriever/src/nemo_retriever/local/stages/stage5_text_embeddings.py
index 3ea038f3d..5cdae2f2a 100644
--- a/nemo_retriever/src/nemo_retriever/local/stages/stage5_text_embeddings.py
+++ b/nemo_retriever/src/nemo_retriever/local/stages/stage5_text_embeddings.py
@@ -10,7 +10,7 @@
 This module intentionally contains no configuration logic. It re-exports the
 `nemo_retriever.text_embed.stage` Typer application so arguments provided to:
 
-  `retriever local stage5 ...`
+  `nr local stage5 ...`
 
 are handled by `nemo_retriever.text_embed.stage`, including local-HF fallback options
 when an embedding endpoint is not configured.
diff --git a/nemo_retriever/src/nemo_retriever/local/stages/stage6_vdb_upload.py b/nemo_retriever/src/nemo_retriever/local/stages/stage6_vdb_upload.py
index 17c18b45f..a61808957 100644
--- a/nemo_retriever/src/nemo_retriever/local/stages/stage6_vdb_upload.py
+++ b/nemo_retriever/src/nemo_retriever/local/stages/stage6_vdb_upload.py
@@ -10,7 +10,7 @@
 This module intentionally contains no configuration logic. It re-exports the
 `nemo_retriever.vector_store.stage` Typer application so arguments provided to:
 
-  `retriever local stage6 ...`
+  `nr local stage6 ...`
 
 are handled by `nemo_retriever.vector_store.stage`.
 """
diff --git a/nemo_retriever/src/nemo_retriever/local/stages/stage7_vdb_query.py b/nemo_retriever/src/nemo_retriever/local/stages/stage7_vdb_query.py
index 451befd0d..e11be2c5d 100644
--- a/nemo_retriever/src/nemo_retriever/local/stages/stage7_vdb_query.py
+++ b/nemo_retriever/src/nemo_retriever/local/stages/stage7_vdb_query.py
@@ -10,7 +10,7 @@
 This module intentionally contains no configuration logic. It re-exports the
 `nemo_retriever.recall.vdb_recall` Typer application so arguments provided to:
 
-  `retriever local stage7 ...`
+  `nr local stage7 ...`
 
 are handled by `nemo_retriever.recall.vdb_recall`, including a local-HF fallback path
 when embedding HTTP/gRPC endpoints are not configured.
diff --git a/nemo_retriever/src/nemo_retriever/local/stages/stage999_post_mortem_analysis.py b/nemo_retriever/src/nemo_retriever/local/stages/stage999_post_mortem_analysis.py
index e4f6ba1ae..951d9dfe5 100644
--- a/nemo_retriever/src/nemo_retriever/local/stages/stage999_post_mortem_analysis.py
+++ b/nemo_retriever/src/nemo_retriever/local/stages/stage999_post_mortem_analysis.py
@@ -659,7 +659,7 @@ def _arc_for(p: Path) -> str:
 
         # Include a small readme to guide recipients.
         readme = (
-            "retriever stage999 gathered results\n"
+            "nr stage999 gathered results\n"
             "\n"
             "Contents:\n"
             "- bo767_query_gt.csv: original query->pdf_page mapping\n"
@@ -1697,7 +1697,7 @@ def _short(s: str, n: int = 90) -> str:
     summary_cache: Dict[int, Dict[str, Any]] = {}
 
     root = tk.Tk()
-    root.title("retriever stage999 post-mortem analysis")
+    root.title("nr stage999 post-mortem analysis")
 
     # Layout: left search+list, right detail panel.
     root.columnconfigure(0, weight=0)
diff --git a/nemo_retriever/src/nemo_retriever/txt/__main__.py b/nemo_retriever/src/nemo_retriever/txt/__main__.py
index b769c774e..fb60cb962 100644
--- a/nemo_retriever/src/nemo_retriever/txt/__main__.py
+++ b/nemo_retriever/src/nemo_retriever/txt/__main__.py
@@ -5,7 +5,7 @@
 """
 CLI for .txt extraction: tokenizer-based split, write *.txt_extraction.json.
 
-Use with: retriever local stage5 run --pattern "*.txt_extraction.json" then stage6.
+Use with: nr local stage5 run --pattern "*.txt_extraction.json" then stage6.
 """
 
 from __future__ import annotations
@@ -63,8 +63,8 @@ def run(
     Scan input_dir for *.txt, tokenizer-split each into chunks, write <stem>.txt_extraction.json.
 
     Output JSON has the same primitives-like shape as stage5 input (text, path, page_number, metadata).
-    Then run: retriever local stage5 run --input-dir <dir> --pattern "*.txt_extraction.json"
-    and retriever local stage6 run --input-dir <dir>.
+    Then run: nr local stage5 run --input-dir <dir> --pattern "*.txt_extraction.json"
+    and nr local stage6 run --input-dir <dir>.
     """
     input_dir = Path(input_dir)
     txt_files = sorted(input_dir.glob("*.txt"))
diff --git a/nemo_retriever/src/nemo_retriever/vector_store/stage.py b/nemo_retriever/src/nemo_retriever/vector_store/stage.py
index ba3f994a1..94c535272 100644
--- a/nemo_retriever/src/nemo_retriever/vector_store/stage.py
+++ b/nemo_retriever/src/nemo_retriever/vector_store/stage.py
@@ -24,7 +24,7 @@ def run(
         exists=True,
         file_okay=False,
         dir_okay=True,
-        help="Directory containing `*.text_embeddings.json` files (from `retriever local stage5`).",
+        help="Directory containing `*.text_embeddings.json` files (from `nr local stage5`).",
     ),
     recursive: bool = typer.Option(False, "--recursive/--no-recursive", help="Scan subdirectories too."),
     limit: Optional[int] = typer.Option(None, "--limit", min=1, help="Optionally limit number of input files."),
diff --git a/nemo_retriever/table_stage_config.yaml b/nemo_retriever/table_stage_config.yaml
index e268c8ba7..8a7b80152 100644
--- a/nemo_retriever/table_stage_config.yaml
+++ b/nemo_retriever/table_stage_config.yaml
@@ -1,6 +1,6 @@
 # Example config for:
-#   - `retriever table stage run --config <this.yaml> --input <primitives.parquet>`
-#   - `retriever local stage3 run --config <this.yaml> --input <primitives.parquet>`
+#   - `nr table stage run --config <this.yaml> --input <primitives.parquet>`
+#   - `nr local stage3 run --config <this.yaml> --input <primitives.parquet>`
 #
 # This YAML is parsed into `nv_ingest_api.internal.schemas.extract.extract_table_schema.TableExtractorSchema`
 # via `nemo_retriever.table.config.load_table_extractor_schema_from_dict`.
diff --git a/nemo_retriever/tests/test_audio_stage.py b/nemo_retriever/tests/test_audio_stage.py
index 33124adbf..b23209513 100644
--- a/nemo_retriever/tests/test_audio_stage.py
+++ b/nemo_retriever/tests/test_audio_stage.py
@@ -3,7 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 """
-Tests for the audio extraction-only stage (retriever audio extract).
+Tests for the audio extraction-only stage (nr audio extract).
 """
 
 from pathlib import Path

From 2f94f14f3b9a1f75420de03e355e9f4fc5343cf5 Mon Sep 17 00:00:00 2001
From: Jeremy Dyer <jdye64@gmail.com>
Date: Fri, 6 Mar 2026 15:22:33 -0500
Subject: [PATCH 2/4] updates and unit tests

---
 nemo_retriever/pyproject.toml                 |   1 +
 .../src/nemo_retriever/adapters/cli/main.py   |   2 +-
 .../examples/inprocess_pipeline.py            |   2 +-
 .../ingest_modes/inprocess_cli.py             | 291 ------------------
 nemo_retriever/tests/test_nr_cli.py           | 153 +++++++++
 5 files changed, 156 insertions(+), 293 deletions(-)
 delete mode 100644 nemo_retriever/src/nemo_retriever/ingest_modes/inprocess_cli.py
 create mode 100644 nemo_retriever/tests/test_nr_cli.py

diff --git a/nemo_retriever/pyproject.toml b/nemo_retriever/pyproject.toml
index 54de81697..24c400f43 100644
--- a/nemo_retriever/pyproject.toml
+++ b/nemo_retriever/pyproject.toml
@@ -69,6 +69,7 @@ dependencies = [
   "soundfile>=0.12.0",
   "scipy>=1.11.0",
   "nvidia-ml-py",
+  "pytest"
 ]
 
 [project.optional-dependencies]
diff --git a/nemo_retriever/src/nemo_retriever/adapters/cli/main.py b/nemo_retriever/src/nemo_retriever/adapters/cli/main.py
index 72ec2fe96..e533892e8 100644
--- a/nemo_retriever/src/nemo_retriever/adapters/cli/main.py
+++ b/nemo_retriever/src/nemo_retriever/adapters/cli/main.py
@@ -14,7 +14,7 @@
 from nemo_retriever.harness import app as harness_app
 from nemo_retriever.html import __main__ as html_main
 from nemo_retriever.utils.image import app as image_app
-from nemo_retriever.ingest_modes.inprocess_cli import app as inprocess_app
+from nemo_retriever.examples.inprocess_pipeline import app as inprocess_app
 from nemo_retriever.local import app as local_app
 from nemo_retriever.online import __main__ as online_main
 from nemo_retriever.pdf import app as pdf_app
diff --git a/nemo_retriever/src/nemo_retriever/examples/inprocess_pipeline.py b/nemo_retriever/src/nemo_retriever/examples/inprocess_pipeline.py
index ea6295c2b..37835af99 100644
--- a/nemo_retriever/src/nemo_retriever/examples/inprocess_pipeline.py
+++ b/nemo_retriever/src/nemo_retriever/examples/inprocess_pipeline.py
@@ -4,7 +4,7 @@
 
 """
 In-process ingestion pipeline (no Ray) with optional recall evaluation.
-Run with: uv run python -m nemo_retriever.examples.inprocess_pipeline <input-dir>
+Run with: nr ingest inprocess <input-dir>
 """
 
 import json
diff --git a/nemo_retriever/src/nemo_retriever/ingest_modes/inprocess_cli.py b/nemo_retriever/src/nemo_retriever/ingest_modes/inprocess_cli.py
deleted file mode 100644
index db9f0ef99..000000000
--- a/nemo_retriever/src/nemo_retriever/ingest_modes/inprocess_cli.py
+++ /dev/null
@@ -1,291 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
-# All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-
-"""
-In-process ingestion pipeline CLI.
-
-Run with: nr inprocess <input-path>
-"""
-
-from __future__ import annotations
-
-import logging
-import time
-from pathlib import Path
-from typing import Optional
-
-import typer
-
-logger = logging.getLogger(__name__)
-
-app = typer.Typer(help="Run the in-process ingestion pipeline (no Ray required).")
-
-LANCEDB_URI = "lancedb"
-LANCEDB_TABLE = "nv-ingest"
-
-
-@app.command()
-def main(
-    input_path: Path = typer.Argument(
-        ...,
-        help="File or directory containing PDFs, .txt, .html, or .doc/.pptx files to ingest.",
-        path_type=Path,
-    ),
-    debug: bool = typer.Option(
-        False,
-        "--debug/--no-debug",
-        help="Enable debug-level logging.",
-    ),
-    input_type: str = typer.Option(
-        "pdf",
-        "--input-type",
-        help="Input format: 'pdf', 'txt', 'html', or 'doc'. "
-        "Use 'txt' for .txt, 'html' for .html (markitdown -> chunks), "
-        "'doc' for .docx/.pptx (converted to PDF via LibreOffice).",
-    ),
-    # -- Extract params --------------------------------------------------------
-    extract_text: bool = typer.Option(
-        True,
-        "--extract-text/--no-extract-text",
-        help="Extract text from PDF pages.",
-    ),
-    extract_tables: bool = typer.Option(
-        True,
-        "--extract-tables/--no-extract-tables",
-        help="Extract tables from PDF pages.",
-    ),
-    extract_charts: bool = typer.Option(
-        True,
-        "--extract-charts/--no-extract-charts",
-        help="Extract charts from PDF pages.",
-    ),
-    extract_infographics: bool = typer.Option(
-        False,
-        "--extract-infographics/--no-extract-infographics",
-        help="Extract infographics from PDF pages.",
-    ),
-    use_table_structure: bool = typer.Option(
-        False,
-        "--use-table-structure",
-        help="Enable the combined table-structure + OCR stage for tables.",
-    ),
-    table_output_format: Optional[str] = typer.Option(
-        None,
-        "--table-output-format",
-        help="Table output format: 'pseudo_markdown' (OCR-only) or 'markdown' (table-structure + OCR).",
-    ),
-    inference_batch_size: Optional[int] = typer.Option(
-        None,
-        "--inference-batch-size",
-        help="Inference batch size for detection models.",
-    ),
-    page_elements_invoke_url: Optional[str] = typer.Option(
-        None,
-        "--page-elements-invoke-url",
-        help="Optional remote endpoint URL for page-elements model inference.",
-    ),
-    ocr_invoke_url: Optional[str] = typer.Option(
-        None,
-        "--ocr-invoke-url",
-        help="Optional remote endpoint URL for OCR model inference.",
-    ),
-    table_structure_invoke_url: Optional[str] = typer.Option(
-        None,
-        "--table-structure-invoke-url",
-        help="Optional remote endpoint URL for table-structure model inference.",
-    ),
-    # -- Embed params ----------------------------------------------------------
-    embed_model_name: str = typer.Option(
-        "nvidia/llama-3.2-nv-embedqa-1b-v2",
-        "--embed-model-name",
-        help="Embedding model name passed to .embed().",
-    ),
-    embed_invoke_url: Optional[str] = typer.Option(
-        None,
-        "--embed-invoke-url",
-        help="Optional remote endpoint URL for embedding model inference.",
-    ),
-    embed_modality: str = typer.Option(
-        "text",
-        "--embed-modality",
-        help="Default embedding modality: 'text', 'image', or 'text_image'.",
-    ),
-    embed_granularity: str = typer.Option(
-        "element",
-        "--embed-granularity",
-        help="Embedding granularity: 'element' (per table/chart/text) or 'page' (per page).",
-    ),
-    text_elements_modality: Optional[str] = typer.Option(
-        None,
-        "--text-elements-modality",
-        help="Embedding modality override for page-text rows. Falls back to --embed-modality.",
-    ),
-    structured_elements_modality: Optional[str] = typer.Option(
-        None,
-        "--structured-elements-modality",
-        help="Embedding modality override for table/chart/infographic rows. Falls back to --embed-modality.",
-    ),
-    # -- VDB upload params -----------------------------------------------------
-    lancedb_uri: str = typer.Option(
-        LANCEDB_URI,
-        "--lancedb-uri",
-        help="LanceDB URI/path for this run.",
-    ),
-    lancedb_table: str = typer.Option(
-        LANCEDB_TABLE,
-        "--lancedb-table",
-        help="LanceDB table name.",
-    ),
-    hybrid: bool = typer.Option(
-        False,
-        "--hybrid/--no-hybrid",
-        help="Enable LanceDB hybrid mode (dense + FTS text).",
-    ),
-    overwrite: bool = typer.Option(
-        True,
-        "--overwrite/--no-overwrite",
-        help="Overwrite existing LanceDB table (False to append).",
-    ),
-    # -- Output ----------------------------------------------------------------
-    output_directory: Optional[str] = typer.Option(
-        None,
-        "--output-dir",
-        help="Optional directory to write per-document JSON results.",
-    ),
-    # -- Execution params ------------------------------------------------------
-    parallel: bool = typer.Option(
-        False,
-        "--parallel/--no-parallel",
-        help="Enable parallel CPU extraction via ProcessPoolExecutor.",
-    ),
-    max_workers: Optional[int] = typer.Option(
-        None,
-        "--max-workers",
-        help="Max workers for parallel CPU extraction (default: CPU count).",
-    ),
-    gpu_devices: Optional[str] = typer.Option(
-        None,
-        "--gpu-devices",
-        help="Comma-separated GPU device IDs for multi-GPU pipelined execution (e.g. '0,1').",
-    ),
-    page_chunk_size: int = typer.Option(
-        32,
-        "--page-chunk-size",
-        help="Number of pages per chunk for parallel processing.",
-    ),
-    show_progress: bool = typer.Option(
-        True,
-        "--show-progress/--no-show-progress",
-        help="Show progress bars during ingestion.",
-    ),
-) -> None:
-    """Run the in-process ingestion pipeline on local documents (no Ray required)."""
-    from nemo_retriever.ingest_modes.inprocess import InProcessIngestor
-    from nemo_retriever.params import (
-        EmbedParams,
-        ExtractParams,
-        HtmlChunkParams,
-        IngestExecuteParams,
-        TextChunkParams,
-        VdbUploadParams,
-    )
-
-    log_level = logging.DEBUG if debug else logging.INFO
-    logging.basicConfig(
-        level=log_level,
-        format="%(asctime)s %(levelname)s %(name)s: %(message)s",
-        force=True,
-    )
-
-    lancedb_uri_abs = str(Path(lancedb_uri).expanduser().resolve())
-
-    input_path = Path(input_path)
-    if input_path.is_file():
-        file_patterns = [str(input_path)]
-    elif input_path.is_dir():
-        ext_map = {
-            "txt": ["*.txt"],
-            "html": ["*.html"],
-            "doc": ["*.docx", "*.pptx"],
-        }
-        exts = ext_map.get(input_type, ["*.pdf"])
-        file_patterns = [str(input_path / e) for e in exts]
-    else:
-        raise typer.BadParameter(f"Path does not exist: {input_path}")
-
-    ingestor = InProcessIngestor()
-    ingestor = ingestor.files(file_patterns)
-
-    if input_type == "txt":
-        ingestor = ingestor.extract_txt(TextChunkParams(max_tokens=512, overlap_tokens=0))
-    elif input_type == "html":
-        ingestor = ingestor.extract_html(HtmlChunkParams(max_tokens=512, overlap_tokens=0))
-    else:
-        extract_kw: dict = dict(
-            extract_text=extract_text,
-            extract_tables=extract_tables,
-            extract_charts=extract_charts,
-            extract_infographics=extract_infographics,
-            use_table_structure=use_table_structure,
-            table_output_format=table_output_format,
-        )
-        if inference_batch_size is not None:
-            extract_kw["inference_batch_size"] = inference_batch_size
-        if page_elements_invoke_url:
-            extract_kw["page_elements_invoke_url"] = page_elements_invoke_url
-        if ocr_invoke_url:
-            extract_kw["ocr_invoke_url"] = ocr_invoke_url
-        if table_structure_invoke_url:
-            extract_kw["table_structure_invoke_url"] = table_structure_invoke_url
-        ingestor = ingestor.extract(ExtractParams(**extract_kw))
-
-    ingestor = ingestor.embed(
-        EmbedParams(
-            model_name=str(embed_model_name),
-            embed_invoke_url=embed_invoke_url,
-            embed_modality=embed_modality,
-            text_elements_modality=text_elements_modality,
-            structured_elements_modality=structured_elements_modality,
-            embed_granularity=embed_granularity,
-        )
-    )
-
-    ingestor = ingestor.vdb_upload(
-        VdbUploadParams(
-            lancedb={
-                "lancedb_uri": lancedb_uri_abs,
-                "table_name": lancedb_table,
-                "overwrite": overwrite,
-                "create_index": True,
-                "hybrid": hybrid,
-            }
-        )
-    )
-
-    if output_directory:
-        ingestor = ingestor.save_to_disk(output_directory=output_directory)
-
-    parsed_gpu_devices = None
-    if gpu_devices:
-        parsed_gpu_devices = [int(d.strip()) for d in gpu_devices.split(",") if d.strip()]
-
-    logger.info("Running in-process ingestion...")
-    start = time.perf_counter()
-
-    ingestor.ingest(
-        params=IngestExecuteParams(
-            show_progress=show_progress,
-            parallel=parallel,
-            max_workers=max_workers,
-            gpu_devices=parsed_gpu_devices,
-            page_chunk_size=page_chunk_size,
-        )
-    )
-
-    elapsed = time.perf_counter() - start
-    logger.info(f"In-process ingestion complete in {elapsed:.2f}s")
-
-
-if __name__ == "__main__":
-    app()
diff --git a/nemo_retriever/tests/test_nr_cli.py b/nemo_retriever/tests/test_nr_cli.py
new file mode 100644
index 000000000..6542c3dfa
--- /dev/null
+++ b/nemo_retriever/tests/test_nr_cli.py
@@ -0,0 +1,153 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
+# All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Tests for the ``nr`` CLI entrypoint, the ``nr ingest`` command group,
+and the ``nr ingest inprocess`` / ``nr ingest batch`` subcommands.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+try:
+    from nemo_retriever.adapters.cli.main import app
+except ImportError as _exc:
+    pytest.skip(f"CLI dependencies not available: {_exc}", allow_module_level=True)
+
+from typer.testing import CliRunner
+
+RUNNER = CliRunner()
+
+
+# ---------------------------------------------------------------------------
+# pyproject.toml entrypoint
+# ---------------------------------------------------------------------------
+
+
+def test_pyproject_entrypoint_is_nr() -> None:
+    """The installed script name must be ``nr``, not ``retriever``."""
+    pyproject = Path(__file__).resolve().parents[1] / "pyproject.toml"
+    text = pyproject.read_text()
+    assert "\nnr = " in text
+    assert "\nretriever = " not in text
+
+
+# ---------------------------------------------------------------------------
+# Top-level app
+# ---------------------------------------------------------------------------
+
+
+def test_top_level_help_succeeds() -> None:
+    result = RUNNER.invoke(app, ["--help"])
+    assert result.exit_code == 0
+
+
+def test_top_level_help_lists_ingest() -> None:
+    result = RUNNER.invoke(app, ["--help"])
+    assert "ingest" in result.output
+
+
+def test_version_flag() -> None:
+    result = RUNNER.invoke(app, ["--version"])
+    assert result.exit_code == 0
+    assert result.output.strip()
+
+
+def test_existing_subcommands_still_registered() -> None:
+    """All pre-existing subcommands must still appear in top-level help."""
+    result = RUNNER.invoke(app, ["--help"])
+    for name in ("audio", "pdf", "local", "harness", "recall", "benchmark"):
+        assert name in result.output, f"Expected subcommand {name!r} in top-level help"
+
+
+# ---------------------------------------------------------------------------
+# ``nr ingest`` command group
+# ---------------------------------------------------------------------------
+
+
+def test_ingest_help_succeeds() -> None:
+    result = RUNNER.invoke(app, ["ingest", "--help"])
+    assert result.exit_code == 0
+
+
+def test_ingest_help_lists_batch_and_inprocess() -> None:
+    result = RUNNER.invoke(app, ["ingest", "--help"])
+    assert "batch" in result.output
+    assert "inprocess" in result.output
+
+
+# ---------------------------------------------------------------------------
+# ``nr ingest batch`` subcommand
+# ---------------------------------------------------------------------------
+
+
+def test_ingest_batch_help_succeeds() -> None:
+    result = RUNNER.invoke(app, ["ingest", "batch", "--help"])
+    assert result.exit_code == 0
+
+
+def test_ingest_batch_help_shows_key_options() -> None:
+    result = RUNNER.invoke(app, ["ingest", "batch", "--help"])
+    for flag in ("--embed-model-name", "--lancedb-uri", "--input-type", "--debug"):
+        assert flag in result.output, f"Expected {flag!r} in batch --help output"
+
+
+# ---------------------------------------------------------------------------
+# ``nr ingest inprocess`` subcommand – help / option presence
+# ---------------------------------------------------------------------------
+
+
+def test_ingest_inprocess_help_succeeds() -> None:
+    result = RUNNER.invoke(app, ["ingest", "inprocess", "--help"])
+    assert result.exit_code == 0
+
+
+def test_ingest_inprocess_help_shows_extract_options() -> None:
+    result = RUNNER.invoke(app, ["ingest", "inprocess", "--help"])
+    for flag in (
+        "--use-table-structure",
+        "--table-output-format",
+        "--table-structure-invoke-url",
+        "--page-elements-invoke-url",
+        "--ocr-invoke-url",
+    ):
+        assert flag in result.output, f"Expected {flag!r} in inprocess --help"
+
+
+def test_ingest_inprocess_help_shows_embed_options() -> None:
+    result = RUNNER.invoke(app, ["ingest", "inprocess", "--help"])
+    for flag in (
+        "--embed-model-name",
+        "--embed-invoke-url",
+        "--embed-modality",
+        "--embed-granularity",
+        "--text-elements-modality",
+        "--structured-elements-modality",
+    ):
+        assert flag in result.output, f"Expected {flag!r} in inprocess --help"
+
+
+def test_ingest_inprocess_help_shows_execution_options() -> None:
+    result = RUNNER.invoke(app, ["ingest", "inprocess", "--help"])
+    for flag in ("--max-workers", "--gpu-devices", "--num-gpus"):
+        assert flag in result.output, f"Expected {flag!r} in inprocess --help"
+
+
+def test_ingest_inprocess_help_shows_recall_options() -> None:
+    result = RUNNER.invoke(app, ["ingest", "inprocess", "--help"])
+    for flag in ("--query-csv", "--no-recall-details"):
+        assert flag in result.output, f"Expected {flag!r} in inprocess --help"
+
+
+# ---------------------------------------------------------------------------
+# ``nr ingest inprocess`` subcommand – argument validation
+# ---------------------------------------------------------------------------
+
+
+def test_ingest_inprocess_rejects_missing_path() -> None:
+    result = RUNNER.invoke(app, ["ingest", "inprocess"])
+    assert result.exit_code != 0

From 775fcf4ac2148b65340c31c5e24984689a1bffa3 Mon Sep 17 00:00:00 2001
From: Jeremy Dyer <jdye64@gmail.com>
Date: Fri, 6 Mar 2026 15:25:25 -0500
Subject: [PATCH 3/4] updates and unit tests

---
 .../nemo_retriever/examples/batch_pipeline.py |  2 +-
 .../examples/inprocess_pipeline.py            |  2 +-
 nemo_retriever/tests/test_nr_cli.py           | 28 +++++++++----------
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/nemo_retriever/src/nemo_retriever/examples/batch_pipeline.py b/nemo_retriever/src/nemo_retriever/examples/batch_pipeline.py
index 1999c6437..c11e96b2f 100644
--- a/nemo_retriever/src/nemo_retriever/examples/batch_pipeline.py
+++ b/nemo_retriever/src/nemo_retriever/examples/batch_pipeline.py
@@ -345,7 +345,7 @@ def _hit_key_and_distance(hit: dict) -> tuple[str | None, float | None]:
     return key, dist
 
 
-@app.command()
+@app.command(name="pipeline")
 def main(
     ctx: typer.Context,
     debug: bool = typer.Option(
diff --git a/nemo_retriever/src/nemo_retriever/examples/inprocess_pipeline.py b/nemo_retriever/src/nemo_retriever/examples/inprocess_pipeline.py
index 37835af99..930505da8 100644
--- a/nemo_retriever/src/nemo_retriever/examples/inprocess_pipeline.py
+++ b/nemo_retriever/src/nemo_retriever/examples/inprocess_pipeline.py
@@ -96,7 +96,7 @@ def _hit_key_and_distance(hit: dict) -> tuple[str | None, float | None]:
     return key, dist
 
 
-@app.command()
+@app.command(name="pipeline")
 def main(
     input_path: Path = typer.Argument(
         ...,
diff --git a/nemo_retriever/tests/test_nr_cli.py b/nemo_retriever/tests/test_nr_cli.py
index 6542c3dfa..24d37ec3c 100644
--- a/nemo_retriever/tests/test_nr_cli.py
+++ b/nemo_retriever/tests/test_nr_cli.py
@@ -86,14 +86,14 @@ def test_ingest_help_lists_batch_and_inprocess() -> None:
 
 
 def test_ingest_batch_help_succeeds() -> None:
-    result = RUNNER.invoke(app, ["ingest", "batch", "--help"])
+    result = RUNNER.invoke(app, ["ingest", "batch", "pipeline", "--help"])
     assert result.exit_code == 0
 
 
 def test_ingest_batch_help_shows_key_options() -> None:
-    result = RUNNER.invoke(app, ["ingest", "batch", "--help"])
+    result = RUNNER.invoke(app, ["ingest", "batch", "pipeline", "--help"])
     for flag in ("--embed-model-name", "--lancedb-uri", "--input-type", "--debug"):
-        assert flag in result.output, f"Expected {flag!r} in batch --help output"
+        assert flag in result.output, f"Expected {flag!r} in batch pipeline --help output"
 
 
 # ---------------------------------------------------------------------------
@@ -102,12 +102,12 @@ def test_ingest_batch_help_shows_key_options() -> None:
 
 
 def test_ingest_inprocess_help_succeeds() -> None:
-    result = RUNNER.invoke(app, ["ingest", "inprocess", "--help"])
+    result = RUNNER.invoke(app, ["ingest", "inprocess", "pipeline", "--help"])
     assert result.exit_code == 0
 
 
 def test_ingest_inprocess_help_shows_extract_options() -> None:
-    result = RUNNER.invoke(app, ["ingest", "inprocess", "--help"])
+    result = RUNNER.invoke(app, ["ingest", "inprocess", "pipeline", "--help"])
     for flag in (
         "--use-table-structure",
         "--table-output-format",
@@ -115,11 +115,11 @@ def test_ingest_inprocess_help_shows_extract_options() -> None:
         "--page-elements-invoke-url",
         "--ocr-invoke-url",
     ):
-        assert flag in result.output, f"Expected {flag!r} in inprocess --help"
+        assert flag in result.output, f"Expected {flag!r} in inprocess pipeline --help"
 
 
 def test_ingest_inprocess_help_shows_embed_options() -> None:
-    result = RUNNER.invoke(app, ["ingest", "inprocess", "--help"])
+    result = RUNNER.invoke(app, ["ingest", "inprocess", "pipeline", "--help"])
     for flag in (
         "--embed-model-name",
         "--embed-invoke-url",
@@ -128,26 +128,26 @@ def test_ingest_inprocess_help_shows_embed_options() -> None:
         "--text-elements-modality",
         "--structured-elements-modality",
     ):
-        assert flag in result.output, f"Expected {flag!r} in inprocess --help"
+        assert flag in result.output, f"Expected {flag!r} in inprocess pipeline --help"
 
 
 def test_ingest_inprocess_help_shows_execution_options() -> None:
-    result = RUNNER.invoke(app, ["ingest", "inprocess", "--help"])
+    result = RUNNER.invoke(app, ["ingest", "inprocess", "pipeline", "--help"])
     for flag in ("--max-workers", "--gpu-devices", "--num-gpus"):
-        assert flag in result.output, f"Expected {flag!r} in inprocess --help"
+        assert flag in result.output, f"Expected {flag!r} in inprocess pipeline --help"
 
 
 def test_ingest_inprocess_help_shows_recall_options() -> None:
-    result = RUNNER.invoke(app, ["ingest", "inprocess", "--help"])
+    result = RUNNER.invoke(app, ["ingest", "inprocess", "pipeline", "--help"])
     for flag in ("--query-csv", "--no-recall-details"):
-        assert flag in result.output, f"Expected {flag!r} in inprocess --help"
+        assert flag in result.output, f"Expected {flag!r} in inprocess pipeline --help"
 
 
 # ---------------------------------------------------------------------------
-# ``nr ingest inprocess`` subcommand – argument validation
+# ``nr ingest inprocess pipeline`` – argument validation
 # ---------------------------------------------------------------------------
 
 
 def test_ingest_inprocess_rejects_missing_path() -> None:
-    result = RUNNER.invoke(app, ["ingest", "inprocess"])
+    result = RUNNER.invoke(app, ["ingest", "inprocess", "pipeline"])
     assert result.exit_code != 0

From 8b2e7b71fbdb2f312489418119ba3e705ef9f770 Mon Sep 17 00:00:00 2001
From: Jeremy Dyer <jdye64@gmail.com>
Date: Fri, 6 Mar 2026 15:33:09 -0500
Subject: [PATCH 4/4] Fix syntax issue

---
 .../src/nemo_retriever/examples/batch_pipeline.py           | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/nemo_retriever/src/nemo_retriever/examples/batch_pipeline.py b/nemo_retriever/src/nemo_retriever/examples/batch_pipeline.py
index c11e96b2f..2392f964a 100644
--- a/nemo_retriever/src/nemo_retriever/examples/batch_pipeline.py
+++ b/nemo_retriever/src/nemo_retriever/examples/batch_pipeline.py
@@ -855,11 +855,11 @@ def main(
         )
 
         ingest_elapsed_s = time.perf_counter() - ingest_start
+        num_rows = ingest_results.groupby("source_id").count().count()
         logger.info(
-            f"Ingestion complete. {len(ingest_results)} rows procesed in "
-            f"{ingest_elapsed_s:.2f} seconds. {len(ingest_results)/ingest_elapsed_s:.2f} PPS"
+            f"Ingestion complete. {num_rows} rows procesed in "
+            f"{ingest_elapsed_s:.2f} seconds. {num_rows/ingest_elapsed_s:.2f} PPS"
         )
-        logger.info(f"Ingestion Dataset: {ingestor.get_dataset()}")
 
         if isinstance(ingestor, BatchIngestor):
             error_rows = ingestor.get_error_rows(dataset=ingest_results).materialize()