diff --git a/client/client_examples/examples/cli_client_usage.ipynb b/client/client_examples/examples/cli_client_usage.ipynb index 315ea888d..8216f5020 100644 --- a/client/client_examples/examples/cli_client_usage.ipynb +++ b/client/client_examples/examples/cli_client_usage.ipynb @@ -15,6 +15,22 @@ "- View NV-Ingest job outputs" ] }, + { + "cell_type": "markdown", + "id": "70343d26-9f65-4fdb-9f49-6fb0546d8080", + "metadata": {}, + "source": [ + "## Container setup before you run this notebook\n", + "\n", + "If you opened this notebook directly from the quickstart docs, make sure the NV-Ingest client container is running first. From `client/client_examples/`, build and start it with the commands shown in `client/client_examples/README.md`.\n", + "\n", + "The container clones this repository into `/workspace/nv-ingest` and copies the example notebooks into `/workspace/client_examples/examples`.\n", + "\n", + "- The default sample files below come from `/workspace/nv-ingest/data` inside the container.\n", + "- If you mounted your own dataset with `-v ${DATASET_ROOT}:/workspace/client_examples/data`, override `SAMPLE_PDF0` and `SAMPLE_PDF1` before running the next cell.\n", + "- Set `REDIS_HOST`, `REDIS_PORT`, and `TASK_QUEUE` if your NV-Ingest services are not reachable at the defaults.\n" + ] + }, { "cell_type": "markdown", "id": "6e0a6279-e78f-412a-94e7-a64ce5c0e4df", @@ -33,17 +49,20 @@ "import os\n", "\n", "# sample input file and output directories\n", - "SAMPLE_PDF0 = \"/workspace/nv-ingest/data/multimodal_test.pdf\"\n", + "SAMPLE_PDF0 = os.environ.get(\"SAMPLE_PDF0\", \"/workspace/nv-ingest/data/multimodal_test.pdf\")\n", "os.environ[\"SAMPLE_PDF0\"] = SAMPLE_PDF0\n", - "SAMPLE_PDF1 = \"/workspace/nv-ingest/data/functional_validation.pdf\"\n", - "BATCH_FILE = \"/workspace/client_examples/examples/dataset.json\"\n", + "SAMPLE_PDF1 = os.environ.get(\"SAMPLE_PDF1\", \"/workspace/nv-ingest/data/functional_validation.pdf\")\n", + "os.environ[\"SAMPLE_PDF1\"] = SAMPLE_PDF1\n", + "BATCH_FILE = os.environ.get(\"BATCH_FILE\", \"/workspace/client_examples/examples/dataset.json\")\n", "os.environ[\"BATCH_FILE\"] = BATCH_FILE\n", - "OUTPUT_DIRECTORY_SINGLE = \"/workspace/client_examples/examples/processed_docs_single\"\n", - "OUTPUT_DIRECTORY_BATCH = \"/workspace/client_examples/examples/processed_docs_batch\"\n", + "OUTPUT_DIRECTORY_SINGLE = os.environ.get(\"OUTPUT_DIRECTORY_SINGLE\", \"/workspace/client_examples/examples/processed_docs_single\")\n", + "OUTPUT_DIRECTORY_BATCH = os.environ.get(\"OUTPUT_DIRECTORY_BATCH\", \"/workspace/client_examples/examples/processed_docs_batch\")\n", "os.environ[\"OUTPUT_DIRECTORY_SINGLE\"] = OUTPUT_DIRECTORY_SINGLE\n", "os.environ[\"OUTPUT_DIRECTORY_BATCH\"] = OUTPUT_DIRECTORY_BATCH\n", - "REDIS_HOST = \"localhost\"\n", - "REDIS_PORT = \"7670\"" + "TASK_QUEUE = os.environ.get(\"TASK_QUEUE\", \"ingest_task_queue\")\n", + "os.environ[\"TASK_QUEUE\"] = TASK_QUEUE\n", + "REDIS_HOST = os.environ.get(\"REDIS_HOST\", \"localhost\")\n", + "REDIS_PORT = os.environ.get(\"REDIS_PORT\", \"7670\")" ] }, { diff --git a/client/client_examples/examples/python_client_usage.ipynb b/client/client_examples/examples/python_client_usage.ipynb index 46bde9f80..fdb4d2f49 100644 --- a/client/client_examples/examples/python_client_usage.ipynb +++ b/client/client_examples/examples/python_client_usage.ipynb @@ -14,6 +14,23 @@ "- Investigate the multimodal extractions" ] }, + { + "cell_type": "markdown", + "id": "0e63106b", + "metadata": {}, + "source": [ + "## Container setup before you run this notebook\n", + "\n", + "If you opened this notebook directly from the quickstart docs, make sure the NV-Ingest client container is running first. From `client/client_examples/`, build and start it with the commands shown in `client/client_examples/README.md`.\n", + "\n", + "The container clones this repository into `/workspace/nv-ingest` and copies the example notebooks into `/workspace/client_examples/examples`.\n", + "\n", + "- If you want to use the repository sample PDFs, keep the default `SAMPLE_PDF` path below.\n", + "- If you mounted your own dataset with `-v ${DATASET_ROOT}:/workspace/client_examples/data`, set `SAMPLE_PDF` to a file under `/workspace/client_examples/data` before running the next cell.\n", + "- Set `HTTP_HOST`, `HTTP_PORT`, and `TASK_QUEUE` if your NV-Ingest services are not reachable at the defaults.\n", + "- Set `MINIO_ACCESS_KEY` and `MINIO_SECRET_KEY` only if your workflow needs object storage access.\n" + ] + }, { "cell_type": "markdown", "id": "c14fe242", @@ -44,7 +61,7 @@ "DEFAULT_JOB_TIMEOUT = 90\n", "\n", "# sample input file and output directory\n", - "SAMPLE_PDF = \"/workspace/data/multimodal_test.pdf\"" + "SAMPLE_PDF = os.environ.get('SAMPLE_PDF', \"/workspace/nv-ingest/data/multimodal_test.pdf\")" ] }, {