From 050943c068449ea4466ce34c3edb86759ac51725 Mon Sep 17 00:00:00 2001
From: Kurt Heiss <kheiss@nvidia.com>
Date: Fri, 13 Mar 2026 13:04:39 -0700
Subject: [PATCH] updated per bug review

---
 docs/docs/extraction/audio.md                 |  2 +-
 docs/docs/extraction/benchmarking.md          | 74 +++++++++----------
 docs/docs/extraction/cli-reference.md         |  2 +-
 docs/docs/extraction/content-metadata.md      |  6 +-
 docs/docs/extraction/custom-metadata.md       | 22 +++---
 docs/docs/extraction/faq.md                   |  3 +-
 docs/docs/extraction/nimclient.md             | 18 ++---
 docs/docs/extraction/overview.md              |  2 +-
 docs/docs/extraction/prerequisites.md         |  3 +-
 docs/docs/extraction/python-api-reference.md  |  8 +-
 docs/docs/extraction/quickstart-guide.md      | 17 +++--
 .../extraction/quickstart-library-mode.md     | 31 ++++----
 docs/docs/extraction/support-matrix.md        | 40 ++++++----
 .../docs/extraction/user-defined-functions.md | 14 ++--
 docs/docs/extraction/user-defined-stages.md   | 10 +--
 docs/docs/extraction/v2-api-guide.md          |  4 +-
 docs/docs/extraction/vlm-embed.md             |  2 +-
 17 files changed, 139 insertions(+), 119 deletions(-)

diff --git a/docs/docs/extraction/audio.md b/docs/docs/extraction/audio.md
index 4be7ee8ac..192c4ecf4 100644
--- a/docs/docs/extraction/audio.md
+++ b/docs/docs/extraction/audio.md
@@ -27,7 +27,7 @@ to transcribe speech to text, which is then embedded by using the Nemotron embed
 
 !!! important
 
-    Due to limitations in available VRAM controls in the current release, the RIVA ASR NIM microservice must run on a [dedicated additional GPU](support-matrix.md). For the full list of requirements, refer to [Support Matrix](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/support-matrix.html).
+    Due to limitations in available VRAM controls in the current release, the RIVA ASR NIM microservice must run on a [dedicated additional GPU](support-matrix.md). For the full list of requirements, refer to [Support Matrix](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/support-matrix/support-matrix.html).
 
 This pipeline enables users to retrieve speech files at the segment level.
 
diff --git a/docs/docs/extraction/benchmarking.md b/docs/docs/extraction/benchmarking.md
index 62abbf302..54e6eb171 100644
--- a/docs/docs/extraction/benchmarking.md
+++ b/docs/docs/extraction/benchmarking.md
@@ -35,20 +35,20 @@ Before you use this documentation, you need the following:
 ### Run Your First Test
 
 ```bash
-# 1. Navigate to the nemo-retriever-bench directory
+# 1. Navigate to the harness directory
 cd tools/harness
 
 # 2. Install dependencies
 uv sync
 
 # 3. Run with a pre-configured dataset (assumes services are running)
-uv run nemo-retriever-bench --case=e2e --dataset=bo767
+uv run nv-ingest-harness-run --case=e2e --dataset=bo767
 
 # Or use a custom path that uses the "active" configuration
-uv run nemo-retriever-bench --case=e2e --dataset=/path/to/your/data
+uv run nv-ingest-harness-run --case=e2e --dataset=/path/to/your/data
 
 # With managed infrastructure (starts/stops services)
-uv run nemo-retriever-bench --case=e2e --dataset=bo767 --managed
+uv run nv-ingest-harness-run --case=e2e --dataset=bo767 --managed
 ```
 
 ## Configuration System
@@ -144,13 +144,13 @@ datasets:
 **Usage:**
 ```bash
 # Single dataset - configs applied automatically
-uv run nemo-retriever-bench --case=e2e --dataset=bo767
+uv run nv-ingest-harness-run --case=e2e --dataset=bo767
 
 # Multiple datasets (sweeping) - each gets its own config
-uv run nemo-retriever-bench --case=e2e --dataset=bo767,earnings,bo20
+uv run nv-ingest-harness-run --case=e2e --dataset=bo767,earnings,bo20
 
 # Custom path still works (uses active section config)
-uv run nemo-retriever-bench --case=e2e --dataset=/custom/path
+uv run nv-ingest-harness-run --case=e2e --dataset=/custom/path
 ```
 
 **Dataset Extraction Settings:**
@@ -176,7 +176,7 @@ Example:
 # YAML active section has api_version: v1
 # Dataset bo767 has extract_images: false
 # Override via environment variable (highest priority)
-EXTRACT_IMAGES=true API_VERSION=v2 uv run nemo-retriever-bench --case=e2e --dataset=bo767
+EXTRACT_IMAGES=true API_VERSION=v2 uv run nv-ingest-harness-run --case=e2e --dataset=bo767
 # Result: Uses bo767 path, but extract_images=true (env override) and api_version=v2 (env override)
 ```
 
@@ -240,13 +240,13 @@ Configuration is validated on load with helpful error messages.
 
 ```bash
 # Run with default YAML configuration (assumes services are running)
-uv run nemo-retriever-bench --case=e2e --dataset=bo767
+uv run nv-ingest-harness-run --case=e2e --dataset=bo767
 
 # With document-level analysis
-uv run nemo-retriever-bench --case=e2e --dataset=bo767 --doc-analysis
+uv run nv-ingest-harness-run --case=e2e --dataset=bo767 --doc-analysis
 
 # With managed infrastructure (starts/stops services)
-uv run nemo-retriever-bench --case=e2e --dataset=bo767 --managed
+uv run nv-ingest-harness-run --case=e2e --dataset=bo767 --managed
 ```
 
 ### Dataset Sweeping
@@ -255,7 +255,7 @@ Run multiple datasets in a single command - each dataset automatically gets its
 
 ```bash
 # Sweep multiple datasets
-uv run nemo-retriever-bench --case=e2e --dataset=bo767,earnings,bo20
+uv run nv-ingest-harness-run --case=e2e --dataset=bo767,earnings,bo20
 
 # Each dataset runs sequentially with its own:
 # - Extraction settings (from dataset config)
@@ -263,13 +263,13 @@ uv run nemo-retriever-bench --case=e2e --dataset=bo767,earnings,bo20
 # - Results summary at the end
 
 # With managed infrastructure (services start once, shared across all datasets)
-uv run nemo-retriever-bench --case=e2e --dataset=bo767,earnings,bo20 --managed
+uv run nv-ingest-harness-run --case=e2e --dataset=bo767,earnings,bo20 --managed
 
 # E2E+Recall sweep (each dataset ingests then evaluates recall)
-uv run nemo-retriever-bench --case=e2e_recall --dataset=bo767,earnings
+uv run nv-ingest-harness-run --case=e2e_recall --dataset=bo767,earnings
 
 # Recall-only sweep (evaluates existing collections)
-uv run nemo-retriever-bench --case=recall --dataset=bo767,earnings
+uv run nv-ingest-harness-run --case=recall --dataset=bo767,earnings
 ```
 
 **Sweep Behavior:**
@@ -283,10 +283,10 @@ uv run nemo-retriever-bench --case=recall --dataset=bo767,earnings
 
 ```bash
 # Override via environment (useful for CI/CD)
-API_VERSION=v2 EXTRACT_TABLES=false uv run nemo-retriever-bench --case=e2e
+API_VERSION=v2 EXTRACT_TABLES=false uv run nv-ingest-harness-run --case=e2e
 
 # Temporary changes without editing YAML
-DATASET_DIR=/custom/path uv run nemo-retriever-bench --case=e2e
+DATASET_DIR=/custom/path uv run nv-ingest-harness-run --case=e2e
 ```
 
 ## Test Scenarios
@@ -472,23 +472,23 @@ recall:
 ```bash
 # Evaluate existing bo767 collections (no reranker)
 # recall_dataset automatically set from dataset config
-uv run nemo-retriever-bench --case=recall --dataset=bo767
+uv run nv-ingest-harness-run --case=recall --dataset=bo767
 
 # With reranker only (set reranker_mode in YAML recall section)
-uv run nemo-retriever-bench --case=recall --dataset=bo767
+uv run nv-ingest-harness-run --case=recall --dataset=bo767
 
 # Sweep multiple datasets for recall evaluation
-uv run nemo-retriever-bench --case=recall --dataset=bo767,earnings
+uv run nv-ingest-harness-run --case=recall --dataset=bo767,earnings
 ```
 
 **E2E + Recall (fresh ingestion):**
 ```bash
 # Fresh ingestion with recall evaluation
 # recall_dataset automatically set from dataset config
-uv run nemo-retriever-bench --case=e2e_recall --dataset=bo767
+uv run nv-ingest-harness-run --case=e2e_recall --dataset=bo767
 
 # Sweep multiple datasets (each ingests then evaluates)
-uv run nemo-retriever-bench --case=e2e_recall --dataset=bo767,earnings
+uv run nv-ingest-harness-run --case=e2e_recall --dataset=bo767,earnings
 ```
 
 **Dataset configuration:**
@@ -536,7 +536,7 @@ The easiest way to test multiple datasets is using dataset sweeping:
 
 ```bash
 # Test multiple datasets - each gets its native config automatically
-uv run nemo-retriever-bench --case=e2e --dataset=bo767,earnings,bo20
+uv run nv-ingest-harness-run --case=e2e --dataset=bo767,earnings,bo20
 
 # Each dataset runs with its pre-configured extraction settings
 # Results are organized in separate artifact directories
@@ -547,7 +547,7 @@ uv run nemo-retriever-bench --case=e2e --dataset=bo767,earnings,bo20
 To sweep through different parameter values:
 
 1. **Edit** `test_configs.yaml` - Update values in the `active` section
-2. **Run** the test: `uv run nemo-retriever-bench --case=e2e --dataset=<name>`
+2. **Run** the test: `uv run nv-ingest-harness-run --case=e2e --dataset=<name>`
 3. **Analyze** results in `artifacts/<test_name>_<timestamp>/`
 4. **Repeat** steps 1-3 for next parameter combination
 
@@ -555,18 +555,18 @@ Example parameter sweep workflow:
 ```bash
 # Test 1: Baseline V1
 vim test_configs.yaml  # Set: api_version=v1, extract_tables=true
-uv run nemo-retriever-bench --case=e2e --dataset=bo767
+uv run nv-ingest-harness-run --case=e2e --dataset=bo767
 
 # Test 2: V2 with 32-page splitting
 vim test_configs.yaml  # Set: api_version=v2, pdf_split_page_count=32
-uv run nemo-retriever-bench --case=e2e --dataset=bo767
+uv run nv-ingest-harness-run --case=e2e --dataset=bo767
 
 # Test 3: V2 with 8-page splitting
 vim test_configs.yaml  # Set: pdf_split_page_count=8
-uv run nemo-retriever-bench --case=e2e --dataset=bo767
+uv run nv-ingest-harness-run --case=e2e --dataset=bo767
 
 # Test 4: Tables disabled (override via env var)
-EXTRACT_TABLES=false uv run nemo-retriever-bench --case=e2e --dataset=bo767
+EXTRACT_TABLES=false uv run nv-ingest-harness-run --case=e2e --dataset=bo767
 ```
 
 **Note**: Each test run creates a new timestamped artifact directory, so you can compare results across sweeps.
@@ -576,7 +576,7 @@ EXTRACT_TABLES=false uv run nemo-retriever-bench --case=e2e --dataset=bo767
 ### Attach Mode (Default)
 
 ```bash
-uv run nemo-retriever-bench --case=e2e --dataset=bo767
+uv run nv-ingest-harness-run --case=e2e --dataset=bo767
 ```
 
 - **Default behavior**: Assumes services are already running
@@ -588,7 +588,7 @@ uv run nemo-retriever-bench --case=e2e --dataset=bo767
 ### Managed Mode
 
 ```bash
-uv run nemo-retriever-bench --case=e2e --dataset=bo767 --managed
+uv run nv-ingest-harness-run --case=e2e --dataset=bo767 --managed
 ```
 
 - Starts Docker services automatically
@@ -600,10 +600,10 @@ uv run nemo-retriever-bench --case=e2e --dataset=bo767 --managed
 **Managed mode options:**
 ```bash
 # Skip Docker image rebuild (faster startup)
-uv run nemo-retriever-bench --case=e2e --dataset=bo767 --managed --no-build
+uv run nv-ingest-harness-run --case=e2e --dataset=bo767 --managed --no-build
 
 # Keep services running after test (useful for multi-test scenarios)
-uv run nemo-retriever-bench --case=e2e --dataset=bo767 --managed --keep-up
+uv run nv-ingest-harness-run --case=e2e --dataset=bo767 --managed --keep-up
 ```
 
 ## Artifacts and Logging
@@ -631,7 +631,7 @@ tools/harness/artifacts/<test_name>_<timestamp>_UTC/
 Enable per-document element breakdown:
 
 ```bash
-uv run nemo-retriever-bench --case=e2e --doc-analysis
+uv run nv-ingest-harness-run --case=e2e --doc-analysis
 ```
 
 **Sample Output:**
@@ -812,7 +812,7 @@ The framework is dataset-agnostic and supports multiple approaches:
 **Option 1: Use pre-configured dataset (Recommended)**
 ```bash
 # Dataset configs automatically applied
-uv run nemo-retriever-bench --case=e2e --dataset=bo767
+uv run nv-ingest-harness-run --case=e2e --dataset=bo767
 ```
 
 **Option 2: Add new dataset to YAML**
@@ -827,17 +827,17 @@ datasets:
     extract_infographics: false
     recall_dataset: null  # or set to evaluator name if applicable
 ```
-Then use: `uv run nemo-retriever-bench --case=e2e --dataset=my_dataset`
+Then use: `uv run nv-ingest-harness-run --case=e2e --dataset=my_dataset`
 
 **Option 3: Use custom path (uses active section config)**
 ```bash
-uv run nemo-retriever-bench --case=e2e --dataset=/path/to/your/dataset
+uv run nv-ingest-harness-run --case=e2e --dataset=/path/to/your/dataset
 ```
 
 **Option 4: Environment variable override**
 ```bash
 # Override specific settings via env vars
-EXTRACT_IMAGES=true uv run nemo-retriever-bench --case=e2e --dataset=bo767
+EXTRACT_IMAGES=true uv run nv-ingest-harness-run --case=e2e --dataset=bo767
 ```
 
 **Best Practice**: For repeated testing, add your dataset to the `datasets` section with its native extraction settings. This ensures consistent configuration and enables dataset sweeping.
diff --git a/docs/docs/extraction/cli-reference.md b/docs/docs/extraction/cli-reference.md
index 5bee4f046..6ce311402 100644
--- a/docs/docs/extraction/cli-reference.md
+++ b/docs/docs/extraction/cli-reference.md
@@ -203,7 +203,7 @@ nemo-retriever \
 To submit a .pdf file with both a splitting task and an extraction task, run the following code.
 
 !!! note
-    Currently, `split` only works for pdfium, nemotron-parse, and Unstructured.io.
+    Currently, `split` only works for pdfium and nemotron-parse.
 
 ```bash
 nemo-retriever \
diff --git a/docs/docs/extraction/content-metadata.md b/docs/docs/extraction/content-metadata.md
index c02aa8c46..df16242fa 100644
--- a/docs/docs/extraction/content-metadata.md
+++ b/docs/docs/extraction/content-metadata.md
@@ -164,7 +164,7 @@ Describes the structural location of content within a document.
 | `span`           | `int`                 | `-1`                       | Span identifier within a line, for finer granularity.                                                   |
 | `nearby_objects` | `NearbyObjectsSchema` | `NearbyObjectsSchema()`    | Information about objects (text, images, structured data) near the current content. See [NearbyObjectsSchema](#nearbyobjectsschema). |
 
-### `NearbyObjectsSchema` (Currently Unused)
+### `NearbyObjectsSchema` (Currently Unused) {#nearbyobjectsschema}
 Container for different types of nearby objects.
 
 | Field        | Type                   | Default Value                | Description                                                              |
@@ -243,7 +243,7 @@ Specific metadata for audio content.
 | `audio_transcript` | `str` | `""`          | Transcript of the audio content.                |
 | `audio_type`       | `str` | `""`          | Type or format of the audio (e.g., `mp3`, `wav`). |
 
-### `ErrorMetadataSchema` (Currently Unused)
+### `ErrorMetadataSchema` (Currently Unused) {#errormetadataschema}
 Metadata describing errors encountered during processing.
 
 | Field       | Type           | Default Value | Description                                                              |
@@ -253,7 +253,7 @@ Metadata describing errors encountered during processing.
 | `source_id` | `str`          | `""`          | Identifier of the source item that caused the error, if applicable.        |
 | `error_msg` | `str`          | *Required*    | The error message.                                                       |
 
-### `InfoMessageMetadataSchema` (Currently Unused)
+### `InfoMessageMetadataSchema` (Currently Unused) {#infomessagemetadataschema}
 Informational messages related to processing.
 
 | Field     | Type           | Default Value | Description                                                              |
diff --git a/docs/docs/extraction/custom-metadata.md b/docs/docs/extraction/custom-metadata.md
index 1ac644243..613443e93 100644
--- a/docs/docs/extraction/custom-metadata.md
+++ b/docs/docs/extraction/custom-metadata.md
@@ -60,7 +60,7 @@ For more information about the `Ingestor` class, see [Use the NeMo Retriever Lib
 For more information about the `vdb_upload` method, see [Upload Data](data-store.md).
 
 ```python
-from nemo_retriever.client import Ingestor
+from nv_ingest_client.client.interface import Ingestor
 
 hostname="localhost"
 collection_name = "nemo_retriever_collection"
@@ -142,7 +142,7 @@ you can use the `content_metadata` field to filter search results.
 The following example uses a filter expression to narrow results by department.
 
 ```python
-from nemo_retriever.util.milvus import query
+from nv_ingest_client.util.vdb.milvus import nvingest_retrieval
 
 hostname="localhost"
 collection_name = "nemo_retriever_collection"
@@ -156,15 +156,15 @@ queries = ["this is expensive"]
 q_results = []
 for que in queries:
     q_results.append(
-        query(
-            [que], 
-            collection_name, 
-            milvus_uri=f"http://{hostname}:19530", 
-            embedding_endpoint=f"http://{hostname}:8012/v1",  
-            hybrid=sparse, 
-            top_k=top_k, 
-            model_name=model_name, 
-            gpu_search=False, 
+        nvingest_retrieval(
+            [que],
+            collection_name=collection_name,
+            milvus_uri=f"http://{hostname}:19530",
+            embedding_endpoint=f"http://{hostname}:8012/v1",
+            hybrid=sparse,
+            top_k=top_k,
+            model_name=model_name,
+            gpu_search=False,
             _filter=filter_expr
         )
     )
diff --git a/docs/docs/extraction/faq.md b/docs/docs/extraction/faq.md
index d7eabd490..a83d2c20c 100644
--- a/docs/docs/extraction/faq.md
+++ b/docs/docs/extraction/faq.md
@@ -76,12 +76,11 @@ For more information, refer to [Extract Specific Elements from PDFs](python-api-
 ```python
 Ingestor(client=client)
     .files("data/multimodal_test.pdf")
-    .extract(              
+    .extract(
         extract_text=True,
         extract_tables=True,
         extract_charts=True,
         extract_images=True,
-        paddle_output_format="markdown",
         extract_infographics=True,
         text_depth="page"
     )
diff --git a/docs/docs/extraction/nimclient.md b/docs/docs/extraction/nimclient.md
index cc1c402f2..9d4a5fe42 100644
--- a/docs/docs/extraction/nimclient.md
+++ b/docs/docs/extraction/nimclient.md
@@ -12,7 +12,7 @@ The NimClient architecture consists of two main components:
 1. **NimClient**: The client class that handles communication with NIM endpoints via gRPC or HTTP protocols
 2. **ModelInterface**: An abstract base class that defines how to format input data, parse output responses, and process inference results for specific models
 
-For advanced usage patterns, see the existing model interfaces in `api/src/nemo_retriever/internal/primitives/nim/model_interface/`.
+For advanced usage patterns, see the existing model interfaces in `api/src/nv_ingest_api/internal/primitives/nim/model_interface/`.
 
 
 ## Quick Start
@@ -20,8 +20,8 @@ For advanced usage patterns, see the existing model interfaces in `api/src/nemo_
 ### Basic NimClient Creation
 
 ```python
-from nemo_retriever.util.nim import create_inference_client
-from nemo_retriever.internal.primitives.nim import ModelInterface
+from nv_ingest_api.util.nim import create_inference_client
+from nv_ingest_api.internal.primitives.nim import ModelInterface
 
 # Create a custom model interface (see examples below)
 model_interface = MyCustomModelInterface()
@@ -48,7 +48,7 @@ results = client.infer(data, model_name="your-model-name")
 
 ```python
 import os
-from nemo_retriever.util.nim import create_inference_client
+from nv_ingest_api.util.nim import create_inference_client
 
 # Use environment variables for configuration
 auth_token = os.getenv("NGC_API_KEY")
@@ -71,7 +71,7 @@ To integrate a new NIM, you need to create a custom `ModelInterface` subclass th
 ```python
 from typing import Dict, Any, List, Tuple, Optional
 import numpy as np
-from nemo_retriever.internal.primitives.nim import ModelInterface
+from nv_ingest_api.internal.primitives.nim import ModelInterface
 
 class MyCustomModelInterface(ModelInterface):
     """
@@ -305,7 +305,7 @@ class TextGenerationModelInterface(ModelInterface):
 
 ```python
 import base64
-from nemo_retriever.util.image_processing.transforms import numpy_to_base64
+from nv_ingest_api.util.image_processing.transforms import numpy_to_base64
 
 class ImageAnalysisModelInterface(ModelInterface):
     """Interface for image analysis NIMs (e.g., vision models)."""
@@ -382,8 +382,8 @@ class ImageAnalysisModelInterface(ModelInterface):
 ### Basic UDF with NimClient
 
 ```python
-from nemo_retriever.internal.primitives.control_message import IngestControlMessage
-from nemo_retriever.util.nim import create_inference_client
+from nv_ingest_api.internal.primitives.control_message import IngestControlMessage
+from nv_ingest_api.util.nim import create_inference_client
 import os
 
 def analyze_document_with_nim(control_message: IngestControlMessage) -> IngestControlMessage:
@@ -570,7 +570,7 @@ If memory issues persist, you can reduce the `NIM_TRITON_RATE_LIMIT` value — e
 import logging
 
 # Enable debug logging
-logging.getLogger("nemo_retriever.internal.primitives.nim").setLevel(logging.DEBUG)
+logging.getLogger("nv_ingest_api.internal.primitives.nim").setLevel(logging.DEBUG)
 
 # Test your model interface separately
 model_interface = MyCustomModelInterface()
diff --git a/docs/docs/extraction/overview.md b/docs/docs/extraction/overview.md
index 263204ddc..3c2d390d9 100644
--- a/docs/docs/extraction/overview.md
+++ b/docs/docs/extraction/overview.md
@@ -28,7 +28,7 @@ NeMo Retriever Library is a microservice service that does the following:
 
 - Accept a JSON job description, containing a document payload, and a set of ingestion tasks to perform on that payload.
 - Allow the results of a job to be retrieved. The result is a JSON dictionary that contains a list of metadata describing objects extracted from the base document, and processing annotations and timing/trace data.
-- Support multiple methods of extraction for each document type to balance trade-offs between throughput and accuracy. For example, for .pdf documents, extraction is performed by using pdfium, [nemotron-parse](https://build.nvidia.com/nvidia/nemotron-parse), Unstructured.io, and Adobe Content Extraction Services.
+- Support multiple methods of extraction for each document type to balance trade-offs between throughput and accuracy. For example, for .pdf documents, extraction is performed by using pdfium and [nemotron-parse](https://build.nvidia.com/nvidia/nemotron-parse).
 - Support various types of pre- and post- processing operations, including text splitting and chunking, transform and filtering, embedding generation, and image offloading to storage.
 
 NeMo Retriever Library supports the following file types:
diff --git a/docs/docs/extraction/prerequisites.md b/docs/docs/extraction/prerequisites.md
index 902c499c8..c96af36fa 100644
--- a/docs/docs/extraction/prerequisites.md
+++ b/docs/docs/extraction/prerequisites.md
@@ -11,6 +11,7 @@ Before you begin using [NeMo Retriever Library](overview.md), ensure the followi
 ## Software Requirements
 
 - Linux operating systems (Ubuntu 22.04 or later recommended)
+- **Python 3.12 or later** (required for NeMo Retriever Library packages; see note below)
 - [Docker](https://docs.docker.com/engine/install/)
 - [Docker Compose](https://docs.docker.com/compose/install/)
 - [Docker Buildx](https://docs.docker.com/build/concepts/overview/#buildx) `>= 0.17` (Compose 2.40+ enforces this)
@@ -21,7 +22,7 @@ Before you begin using [NeMo Retriever Library](overview.md), ensure the followi
 
 !!! note
 
-    You install Python later.
+    Install **Python 3.12 or later** before creating your environment. Using Python 3.10 or 3.11 will cause dependency resolution failures when installing NeMo Retriever Library packages.
 
 
 
diff --git a/docs/docs/extraction/python-api-reference.md b/docs/docs/extraction/python-api-reference.md
index b9d914649..e5908d028 100644
--- a/docs/docs/extraction/python-api-reference.md
+++ b/docs/docs/extraction/python-api-reference.md
@@ -80,7 +80,7 @@ The caption task can call a vision-language model (VLM) with the following optio
 
 Example:
 ```python
-from nemo_retriever.client.interface import Ingestor
+from nv_ingest_client.client.interface import Ingestor
 
 ingestor = (
     Ingestor()
@@ -224,7 +224,7 @@ The `extract` method enables different types of data to be extracted.
 Use the following code to extract a single PDF file.
 
 ```python
-from nemo_retriever.client.interface import Ingestor
+from nv_ingest_client.client.interface import Ingestor
 
 # Initialize Ingestor with a local PDF file
 ingestor = Ingestor().files("path/to/document.pdf")
@@ -527,7 +527,7 @@ The caption task can call a VLM with optional prompt and system prompt overrides
 
 Example:
 ```python
-from nemo_retriever.client.interface import Ingestor
+from nv_ingest_client.client.interface import Ingestor
 
 ingestor = (
     Ingestor()
@@ -662,7 +662,7 @@ For more information on environment variables, refer to [Environment Variables](
 Use the following code to extract mp3 audio content.
 
 ```python
-from nemo_retriever.client import Ingestor
+from nv_ingest_client.client.interface import Ingestor
 
 ingestor = Ingestor().files("audio_file.mp3")
 
diff --git a/docs/docs/extraction/quickstart-guide.md b/docs/docs/extraction/quickstart-guide.md
index a996d4f21..015094216 100644
--- a/docs/docs/extraction/quickstart-guide.md
+++ b/docs/docs/extraction/quickstart-guide.md
@@ -82,6 +82,12 @@ h. Run the command `docker ps`. You should see output similar to the following.
 
     ```
     CONTAINER ID  IMAGE                                            COMMAND                 CREATED         STATUS                  PORTS            NAMES
+    ...
+    ```
+
+To run the NeMo Retriever Library Python client from your host machine, **Python 3.12 or later is required**. Create a virtual environment and install the client packages:
+
+```shell
 uv venv --python 3.12 nv-ingest-dev
 source nv-ingest-dev/bin/activate
 uv pip install nv-ingest==26.03.0-RC2 nv-ingest-api==26.03.0-RC2 nv-ingest-client==26.03.0-RC2
@@ -89,7 +95,7 @@ uv pip install nv-ingest==26.03.0-RC2 nv-ingest-api==26.03.0-RC2 nv-ingest-clien
 
 !!! tip
 
-    To confirm that you have activated your Conda environment, run `which pip` and `which python`, and confirm that you see `nemo_retriever` in the result. You can do this before any pip or python command that you run.
+    To confirm that you have activated your virtual environment, run `which pip` and `which python`, and confirm that you see `nemo_retriever` or your venv path in the result. You can do this before any pip or python command that you run.
 
 
 !!! note
@@ -131,9 +137,10 @@ The following examples demonstrate how to extract text, charts, tables, and imag
 <a id="ingest_python_example"></a>
 ```python
 import logging, os, time
-from nemo_retriever.client import Ingestor, NemoRetrieverClient
-from nemo_retriever.util.process_json_files import ingest_json_results_to_blob
-client = NemoRetrieverClient(                                                                         
+from nv_ingest_client.client.interface import Ingestor
+from nv_ingest_client.client import NvIngestClient
+from nv_ingest_client.util.process_json_files import ingest_json_results_to_blob
+client = NvIngestClient(                                                                         
     message_client_port=7670,                                                               
     message_client_hostname="localhost"        
 )                                                                 
@@ -459,7 +466,7 @@ docker compose \
 
 ## Specify MIG slices for NIM models
 
-When you deploy NeMo Retriever Library with NIM models on MIG‑enabled GPUs, MIG device slices are requested and scheduled through the `values.yaml` file for the corresponding NIM microservice. For IBM Content-Aware Storage (CAS) deployments, this allows NeMo Retriever Library NIM pods to land only on nodes that expose the desired MIG profiles [raw.githubusercontent](https://raw.githubusercontent.com/NVIDIA/NeMo-Retriever/main/helm/README.md%E2%80%8B).​
+When you deploy NeMo Retriever Library with NIM models on MIG‑enabled GPUs, MIG device slices are requested and scheduled through the `values.yaml` file for the corresponding NIM microservice. For IBM Content-Aware Storage (CAS) deployments, this allows NeMo Retriever Library NIM pods to land only on nodes that expose the desired MIG profiles [raw.githubusercontent](https://raw.githubusercontent.com/NVIDIA/NeMo-Retriever/main/helm/README.md).​
 
 To target a specific MIG profile—for example, a 3g.20gb slice on an A100, which is a hardware-partitioned virtual GPU instance that gives your workload a fixed mid-sized share of the A100’s compute plus 20 GB of dedicated GPU memory and behaves like a smaller independent GPU—for a given NIM, configure the `resources` and `nodeSelector` under that NIM’s values path in `values.yaml`.
 
diff --git a/docs/docs/extraction/quickstart-library-mode.md b/docs/docs/extraction/quickstart-library-mode.md
index b9e6ca371..866b50dd8 100644
--- a/docs/docs/extraction/quickstart-library-mode.md
+++ b/docs/docs/extraction/quickstart-library-mode.md
@@ -81,10 +81,11 @@ On a 4 CPU core low end laptop, the following code should take about 10 seconds.
 ```python
 import time
 
-from nemo_retriever.framework.orchestration.ray.util.pipeline.pipeline_runners import run_pipeline
-from nemo_retriever.client import Ingestor, NemoRetrieverClient
-from nemo_retriever.util.message_brokers.simple_message_broker import SimpleClient
-from nemo_retriever.util.process_json_files import ingest_json_results_to_blob
+from nv_ingest.framework.orchestration.ray.util.pipeline.pipeline_runners import run_pipeline
+from nv_ingest_client.client.interface import Ingestor
+from nv_ingest_client.client import NvIngestClient
+from nv_ingest_api.util.message_brokers.simple_message_broker import SimpleClient
+from nv_ingest_client.util.process_json_files import ingest_json_results_to_blob
 
 def main():
     # Start the pipeline subprocess for library mode
@@ -190,7 +191,7 @@ To query for relevant snippets of the ingested content, and use them with an LLM
 ```python
 import os
 from openai import OpenAI
-from nemo_retriever.util.milvus import query
+from nv_ingest_client.util.vdb.milvus import nvingest_retrieval
 
 milvus_uri = "milvus.db"
 collection_name = "test"
@@ -198,16 +199,16 @@ sparse=False
 
 queries = ["Which animal is responsible for the typos?"]
 
-retrieved_docs = query(
+retrieved_docs = nvingest_retrieval(
     queries,
-    collection_name,
+    collection_name=collection_name,
     milvus_uri=milvus_uri,
     hybrid=sparse,
     top_k=1,
 )
 
 # simple generation example
-extract = retrieved_docs[0][0]["entity"]["text"]
+extract = retrieved_docs[0][0].get("entity", retrieved_docs[0][0]).get("text", "")
 client = OpenAI(
   base_url = "https://integrate.api.nvidia.com/v1",
   api_key = os.environ["NVIDIA_API_KEY"]
@@ -307,8 +308,8 @@ It listens for ingestion requests on port `7671` from an external client.
 import logging
 import os
 
-from nemo_retriever.framework.orchestration.ray.util.pipeline.pipeline_runners import run_pipeline
-from nemo_retriever.util.logging.configuration import configure_logging as configure_local_logging
+from nv_ingest.framework.orchestration.ray.util.pipeline.pipeline_runners import run_pipeline
+from nv_ingest_api.util.logging.configuration import configure_logging as configure_local_logging
 
 # Configure the logger
 logger = logging.getLogger(__name__)
@@ -353,11 +354,11 @@ import logging
 import os
 import time
 
-from nemo_retriever.framework.orchestration.ray.util.pipeline.pipeline_runners import run_pipeline
-from nemo_retriever.util.logging.configuration import configure_logging as configure_local_logging
-from nemo_retriever.util.message_brokers.simple_message_broker import SimpleClient
-from nemo_retriever.client import Ingestor
-from nemo_retriever.client import NemoRetrieverClient
+from nv_ingest.framework.orchestration.ray.util.pipeline.pipeline_runners import run_pipeline
+from nv_ingest_api.util.logging.configuration import configure_logging as configure_local_logging
+from nv_ingest_api.util.message_brokers.simple_message_broker import SimpleClient
+from nv_ingest_client.client.interface import Ingestor
+from nv_ingest_client.client import NvIngestClient
 
 # Configure the logger
 logger = logging.getLogger(__name__)
diff --git a/docs/docs/extraction/support-matrix.md b/docs/docs/extraction/support-matrix.md
index eec709b8c..7873e014c 100644
--- a/docs/docs/extraction/support-matrix.md
+++ b/docs/docs/extraction/support-matrix.md
@@ -7,12 +7,17 @@ Before you begin using [NeMo Retriever Library](overview.md), ensure that you ha
     NVIDIA Ingest (nv-ingest) has been renamed to the NeMo Retriever Library.
 
 
+## Software Requirements
+
+- **Python**: 3.12 or later. The NeMo Retriever Library core and harness require Python 3.12+; the client supports Python 3.11+. Using Python 3.10 or earlier will cause dependency resolution failures. For details, see [Prerequisites](prerequisites.md).
+
+
 ## Core and Advanced Pipeline Features
 
 The NeMo Retriever Library core pipeline features run on a single A10G or better GPU. 
 The core pipeline features include the following:
 
-- llama3.2-nv-embedqa-1b-v2 — Embedding model for converting text chunks into vectors.
+- llama-nemotron-embed-1b-v2 — Embedding model for converting text chunks into vectors.
 - nemotron-page-elements-v3 — Detects and classifies images on a page as a table, chart or infographic.
 - nemotron-table-structure-v1 — Detects rows, columns, and cells within a table to preserve table structure and convert to Markdown format. 
 - nemotron-graphic-elements-v1 — Detects graphic elements within chart images such as titles, legends, axes, and numerical values. 
@@ -39,6 +44,7 @@ This includes the following:
 NeMo Retriever Library supports the following GPU hardware.
 
 - [RTX Pro 6000 Blackwell Server Edition](https://www.nvidia.com/en-us/data-center/rtx-pro-6000-blackwell-server-edition/)
+- [RTX PRO 4500 Blackwell](https://www.nvidia.com/en-us/products/workstations/professional-desktop-gpus/rtx-pro-4500/)
 - [DGX B200](https://www.nvidia.com/en-us/data-center/dgx-b200/)
 - [H200 NVL](https://www.nvidia.com/en-us/data-center/h200/)
 - [H100 Tensor Core GPU](https://www.nvidia.com/en-us/data-center/h100/)
@@ -49,24 +55,30 @@ NeMo Retriever Library supports the following GPU hardware.
 
 The following are the hardware requirements to run NeMo Retriever Library.
 
-|Feature         | GPU Option                | RTX Pro 6000  | B200          | H200 NVL      | H100        | A100 80GB   | A100 40GB     | A10G          | L40S   |
-|----------------|---------------------------|---------------|---------------|---------------|-------------|-------------|---------------|---------------|--------|
-| GPU            | Memory                    | 96GB          | 180GB         | 141GB         | 80GB        | 80GB        | 40GB          | 24GB          | 48GB   |
-| Core Features  | Total GPUs                | 1             | 1             | 1             | 1           | 1           | 1             | 1             | 1      |
-| Core Features  | Total Disk Space          | ~150GB        | ~150GB        | ~150GB        | ~150GB      | ~150GB      | ~150GB        | ~150GB        | ~150GB |
-| Audio          | Additional Dedicated GPUs | 1             | 1             | 1             | 1           | 1           | 1             | 1             | 1      |
-| Audio          | Additional Disk Space     | ~37GB         | ~37GB         | ~37GB         | ~37GB       | ~37GB       | ~37GB         | ~37GB         | ~37GB  |
-| nemotron-parse | Additional Dedicated GPUs | Not supported | Not supported | Not supported | 1           | 1           | 1             | 1             | 1      |
-| nemotron-parse | Additional Disk Space     | Not supported | Not supported | Not supported | ~16GB       | ~16GB       | ~16GB         | ~16GB         | ~16GB  |
-| VLM            | Additional Dedicated GPUs | 1             | 1             | 1             | 1           | 1           | Not supported | Not supported | 1      |
-| VLM            | Additional Disk Space     | ~16GB         | ~16GB         | ~16GB         | ~16GB       | ~16GB       | Not supported | Not supported | ~16GB  |
-| Reranker       | With Core Pipeline        | Yes           | Yes           | Yes           | Yes         | Yes         | No*           | No*           | No*    |
-| Reranker       | Standalone (recall only)  | Yes           | Yes           | Yes           | Yes         | Yes         | Yes           | Yes           | Yes    |
+|Feature         | GPU Option                | RTX Pro 6000  | RTX PRO 4500  | B200          | H200 NVL      | H100        | A100 80GB   | A100 40GB     | A10G          | L40S   |
+|----------------|---------------------------|---------------|---------------|---------------|---------------|-------------|-------------|---------------|---------------|--------|
+| GPU            | Memory                    | 96GB          | 32GB          | 180GB         | 141GB         | 80GB        | 80GB        | 40GB          | 24GB          | 48GB   |
+| Core Features  | Total GPUs                | 1             | 1             | 1             | 1             | 1           | 1           | 1             | 1             | 1      |
+| Core Features  | Total Disk Space          | ~150GB        | ~150GB        | ~150GB        | ~150GB        | ~150GB      | ~150GB      | ~150GB        | ~150GB        | ~150GB |
+| Audio          | Additional Dedicated GPUs | 1             | 1†            | 1             | 1             | 1           | 1           | 1             | 1             | 1      |
+| Audio          | Additional Disk Space     | ~37GB         | ~37GB         | ~37GB         | ~37GB         | ~37GB       | ~37GB       | ~37GB         | ~37GB         | ~37GB  |
+| nemotron-parse | Additional Dedicated GPUs | Not supported | Not supported‡| Not supported | Not supported | 1           | 1           | 1             | 1             | 1      |
+| nemotron-parse | Additional Disk Space     | Not supported | Not supported | Not supported | Not supported | ~16GB       | ~16GB       | ~16GB         | ~16GB         | ~16GB  |
+| VLM            | Additional Dedicated GPUs | 1             | Not supported§| 1             | 1             | 1           | 1           | Not supported | Not supported | 1      |
+| VLM            | Additional Disk Space     | ~16GB         | Not supported | ~16GB         | ~16GB         | ~16GB       | ~16GB       | Not supported | Not supported | ~16GB  |
+| Reranker       | With Core Pipeline        | Yes           | No*           | Yes           | Yes           | Yes         | Yes         | No*           | No*           | No*    |
+| Reranker       | Standalone (recall only)  | Yes           | Yes           | Yes           | Yes           | Yes         | Yes         | Yes           | Yes           | Yes    |
 
 \* GPUs with less than 80GB VRAM cannot run the reranker concurrently with the core pipeline. 
 To perform recall testing with the reranker on these GPUs, shut down the core pipeline NIM microservices 
 and run only the embedder, reranker, and your vector database.
 
+† Audio (Parakeet) runs but requires a runtime engine build — no pre-defined model profile for this GPU. Dev team to confirm official support status.
+
+‡ Nemotron Parse fails to start on 32GB despite being supported on A10G (24GB). Pending engineering investigation — may be Blackwell architecture compatibility issue (see related bug).
+
+§ VLM (nemotron-nano-12b-v2-vl) fails to load on 32GB, consistent with "Not supported" on A100-40GB (40GB). 32GB is below the threshold.
+
 
 
 ## Related Topics
diff --git a/docs/docs/extraction/user-defined-functions.md b/docs/docs/extraction/user-defined-functions.md
index 62013d1d8..d225df447 100644
--- a/docs/docs/extraction/user-defined-functions.md
+++ b/docs/docs/extraction/user-defined-functions.md
@@ -16,7 +16,7 @@ This guide covers how to write, validate, and submit UDFs using both the CLI and
 Create a Python function that accepts an `IngestControlMessage` and returns a modified `IngestControlMessage`:
 
 ```python
-from nemo_retriever.internal.primitives.ingest_control_message import IngestControlMessage
+from nv_ingest_api.internal.primitives.ingest_control_message import IngestControlMessage
 
 def my_custom_processor(control_message: IngestControlMessage) -> IngestControlMessage:
     """Add custom metadata to all documents."""
@@ -77,7 +77,7 @@ nemo-retriever \
 ### 3. Submit via Python Client
 
 ```python
-from nemo_retriever.client.interface import Ingestor
+from nv_ingest_client.client.interface import Ingestor
 
 # Create an Ingestor instance with default client
 ingestor = Ingestor()
@@ -305,7 +305,7 @@ UDFs can be executed at different stages of the pipeline by specifying the `targ
 - `broker_response` - Response message handling
 - `otel_tracer` - OpenTelemetry tracing
 
-> **Note:** For the complete and up-to-date list of pipeline stages, see the [default_pipeline.yaml](../../../config/default_pipeline.yaml) configuration file.
+> **Note:** For the complete and up-to-date list of pipeline stages, see the [default_pipeline.yaml](https://github.com/NVIDIA/nv-ingest/blob/main/config/default_pipeline.yaml) configuration file.
 
 #### Target Stage Selection Examples
 
@@ -461,9 +461,9 @@ NVIDIA Inference Microservices (NIMs) provide powerful AI capabilities that can
 ### Quick NIM Integration
 
 ```python
-from nemo_retriever.internal.primitives.control_message import IngestControlMessage
-from nemo_retriever.util.nim import create_inference_client
-from nemo_retriever.internal.primitives.nim.model_interface.vlm import VLMModelInterface
+from nv_ingest_api.internal.primitives.control_message import IngestControlMessage
+from nv_ingest_api.util.nim import create_inference_client
+from nv_ingest_api.internal.primitives.nim.model_interface.vlm import VLMModelInterface
 import os
 
 def document_analysis_with_nim(control_message: IngestControlMessage) -> IngestControlMessage:
@@ -873,7 +873,7 @@ Test your UDF functions in isolation before deploying them to the pipeline:
 
 ```python
 import pandas as pd
-from nemo_retriever.internal.primitives.ingest_control_message import IngestControlMessage
+from nv_ingest_api.internal.primitives.ingest_control_message import IngestControlMessage
 
 def test_my_udf():
     # Create test data
diff --git a/docs/docs/extraction/user-defined-stages.md b/docs/docs/extraction/user-defined-stages.md
index a20e17673..54dd8edb8 100644
--- a/docs/docs/extraction/user-defined-stages.md
+++ b/docs/docs/extraction/user-defined-stages.md
@@ -44,8 +44,8 @@ The following example demonstrates how to create a valid Lambda function and con
 ```python
 import pandas as pd
 from pydantic import BaseModel
-from nemo_retriever.internal.primitives.ingest_control_message import IngestControlMessage
-from nemo_retriever.internal.schemas.meta.metadata_schema import validate_metadata
+from nv_ingest_api.internal.primitives.ingest_control_message import IngestControlMessage
+from nv_ingest_api.internal.schemas.meta.metadata_schema import validate_metadata
 
 # Config schema for your stage
 class MyToyConfig(BaseModel):
@@ -166,7 +166,7 @@ After you change any metadata, you can validate it by using the `validate_metada
 as demonstrated in the following code example.
 
 ```python
-from nemo_retriever.internal.schemas.meta.metadata_schema import validate_metadata
+from nv_ingest_api.internal.schemas.meta.metadata_schema import validate_metadata
 
 def edit_metadata(control_message: IngestControlMessage, stage_config: MyToyConfig) -> IngestControlMessage:
   df = control_message.payload()
@@ -235,8 +235,8 @@ The  following example adds user-defined stages to your NeMo Retriever Library p
     ```python
     # my_pipeline/stages.py
     from pydantic import BaseModel
-    from nemo_retriever.internal.primitives.ingest_control_message import IngestControlMessage
-    from nemo_retriever.internal.schemas.meta.metadata_schema import validate_metadata
+    from nv_ingest_api.internal.primitives.ingest_control_message import IngestControlMessage
+    from nv_ingest_api.internal.schemas.meta.metadata_schema import validate_metadata
 
     class DoubleConfig(BaseModel):
     multiply_by: int = 2
diff --git a/docs/docs/extraction/v2-api-guide.md b/docs/docs/extraction/v2-api-guide.md
index 1ac15d216..52f7ae22e 100644
--- a/docs/docs/extraction/v2-api-guide.md
+++ b/docs/docs/extraction/v2-api-guide.md
@@ -30,7 +30,7 @@ The V2 API automatically splits large PDFs into smaller chunks before processing
 ### Minimal Example
 
 ```python
-from nemo_retriever.client import Ingestor
+from nv_ingest_client.client.interface import Ingestor
 
 # Two-step configuration
 ingestor = Ingestor(
@@ -432,7 +432,7 @@ For test scripts like `tools/harness/src/nemo_retriever_harness/cases/e2e.py`:
 
 ```python
 import os
-from nemo_retriever.client import Ingestor
+from nv_ingest_client.client.interface import Ingestor
 
 # Read from environment
 api_version = os.getenv("API_VERSION", "v1")
diff --git a/docs/docs/extraction/vlm-embed.md b/docs/docs/extraction/vlm-embed.md
index 331379ab3..2e493675c 100644
--- a/docs/docs/extraction/vlm-embed.md
+++ b/docs/docs/extraction/vlm-embed.md
@@ -1,6 +1,6 @@
 # Use Multimodal Embedding with NeMo Retriever Library
 
-This guide explains how to use the [NeMo Retriever Library](https://www.perplexity.ai/search/overview.md) with the multimodal embedding model [Llama Nemotron Embed VL 1B v2](https://build.nvidia.com/nvidia/llama-nemotron-embed-vl-1b-v2).
+This guide explains how to use the [NeMo Retriever Library](overview.md) with the multimodal embedding model [Llama Nemotron Embed VL 1B v2](https://build.nvidia.com/nvidia/llama-nemotron-embed-vl-1b-v2).
 
 The `Llama Nemotron Embed VL 1B v2` model is optimized for multimodal question-answering and retrieval tasks.
 It can embed documents as text, images, or paired text-image combinations.