From 7d5a3a91039b98e9b7de933cd8dedae0d5abe77b Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Mon, 2 Mar 2026 15:47:31 +0000 Subject: [PATCH 01/13] Drop pull_request_target from PR CI workflow --- .github/workflows/ci-pull-request.yml | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/.github/workflows/ci-pull-request.yml b/.github/workflows/ci-pull-request.yml index 47667eace..51c3c23da 100644 --- a/.github/workflows/ci-pull-request.yml +++ b/.github/workflows/ci-pull-request.yml @@ -2,11 +2,6 @@ name: Pull Request Validation on: pull_request: - types: - - opened - - synchronize - - reopened - pull_request_target: types: - opened - synchronize @@ -50,7 +45,6 @@ jobs: # Library mode integration tests (requires approval for external contributors) library-mode-check-access: name: Check Library Mode Test Access - if: github.event_name == 'pull_request_target' runs-on: ubuntu-latest outputs: has-access: ${{ steps.check.outputs.has-access }} @@ -70,9 +64,7 @@ jobs: library-mode-test: name: Test Library Mode needs: library-mode-check-access - if: | - github.event_name == 'pull_request_target' && - needs.library-mode-check-access.outputs.has-access == 'true' + if: needs.library-mode-check-access.outputs.has-access == 'true' uses: ./.github/workflows/reusable-integration-test.yml with: runner: 'ubuntu-latest' From 80ea4fa65f4a3f953584bca305c6f0494495a1da Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Mon, 2 Mar 2026 15:55:17 +0000 Subject: [PATCH 02/13] Are 26.1.2 libmode tests still running? From 94a31685f25c10d42f504fdcffdf328e2cb74f0b Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Mon, 2 Mar 2026 15:59:24 +0000 Subject: [PATCH 03/13] Fix comment in reusable integration test workflow --- .github/workflows/reusable-integration-test.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/reusable-integration-test.yml b/.github/workflows/reusable-integration-test.yml index f82b0606e..9e16936c8 100644 --- a/.github/workflows/reusable-integration-test.yml +++ b/.github/workflows/reusable-integration-test.yml @@ -80,7 +80,8 @@ jobs: 'milvus-lite==2.4.12' \ 'nvidia-riva-client==2.20.0' \ 'unstructured-client>=0.25.9' \ - 'pypdfium2<5' \ # block pulling in breaking changes in pypdfium2 v5 + # block pulling in breaking changes in pypdfium2 v5 + 'pypdfium2<5' \ tritonclient \ markitdown \ glom From d9332b7720c838f60fef48aa55d7546176f2154b Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Mon, 2 Mar 2026 16:01:51 +0000 Subject: [PATCH 04/13] Actually fix comment --- .github/workflows/reusable-integration-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/reusable-integration-test.yml b/.github/workflows/reusable-integration-test.yml index 9e16936c8..23c7520f9 100644 --- a/.github/workflows/reusable-integration-test.yml +++ b/.github/workflows/reusable-integration-test.yml @@ -72,6 +72,7 @@ jobs: uv venv .venv source .venv/bin/activate uv pip install -e ./src -e ./api -e ./client + # block pulling in breaking changes in pypdfium2 v5 uv pip install \ opencv-python \ llama-index-embeddings-nvidia \ @@ -80,7 +81,6 @@ jobs: 'milvus-lite==2.4.12' \ 'nvidia-riva-client==2.20.0' \ 'unstructured-client>=0.25.9' \ - # block pulling in breaking changes in pypdfium2 v5 'pypdfium2<5' \ tritonclient \ markitdown \ From e50557105e03d7c519ba658338ba200a59d7c148 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Mon, 2 Mar 2026 16:08:53 +0000 Subject: [PATCH 05/13] Disable default integration pytest filter --- .github/workflows/reusable-integration-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/reusable-integration-test.yml b/.github/workflows/reusable-integration-test.yml index 23c7520f9..9b79004dd 100644 --- a/.github/workflows/reusable-integration-test.yml +++ b/.github/workflows/reusable-integration-test.yml @@ -110,4 +110,4 @@ jobs: source .venv/bin/activate echo 'Running integration tests...' unset LD_LIBRARY_PATH - python -m pytest -rsx tests/integration + python -m pytest -rsx -m integration tests/integration From 8425b1299e2a7d4cebed4ecd4ff0856c44a72f06 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Mon, 2 Mar 2026 16:16:48 +0000 Subject: [PATCH 06/13] Don't run retriever unit tests on all pushes --- .github/workflows/retriever-unit-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/retriever-unit-tests.yml b/.github/workflows/retriever-unit-tests.yml index 14eb87daf..0f52addcc 100644 --- a/.github/workflows/retriever-unit-tests.yml +++ b/.github/workflows/retriever-unit-tests.yml @@ -3,7 +3,7 @@ name: Retriever Unit Tests on: push: branches: - - "**" + - main pull_request: jobs: From 4e39477aac13daceb314c983baf0a20a893403e2 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Mon, 2 Mar 2026 16:35:21 +0000 Subject: [PATCH 07/13] Add YAML string processing for env vars that are set as empty string --- api/api_tests/util/string_processing/test_yaml.py | 8 ++++++++ api/src/nv_ingest_api/util/string_processing/yaml.py | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/api/api_tests/util/string_processing/test_yaml.py b/api/api_tests/util/string_processing/test_yaml.py index a5628d80f..8b91e3be4 100644 --- a/api/api_tests/util/string_processing/test_yaml.py +++ b/api/api_tests/util/string_processing/test_yaml.py @@ -52,6 +52,14 @@ def test_env_var_overrides_default(self): result = substitute_env_vars_in_yaml_content("key: $TEST_VAR|default_value") assert result == "key: env_value" + def test_empty_string_treated_as_unset_uses_default(self): + """When primary var is set but empty, treat as unset and use default (avoids invalid YAML in lists).""" + with patch.dict(os.environ, {"YOLOX_HTTP_ENDPOINT": ""}, clear=True): + result = substitute_env_vars_in_yaml_content( + 'yolox_endpoints: [$GRPC|"page-elements:8001", $YOLOX_HTTP_ENDPOINT|"http://page-elements:8000/v1/infer"]' + ) + assert result == 'yolox_endpoints: ["page-elements:8001", "http://page-elements:8000/v1/infer"]' + def test_missing_var_no_default(self): """Test that missing variables without defaults become empty strings.""" with patch.dict(os.environ, {}, clear=True): diff --git a/api/src/nv_ingest_api/util/string_processing/yaml.py b/api/src/nv_ingest_api/util/string_processing/yaml.py index af274a3f9..9ad7a4786 100644 --- a/api/src/nv_ingest_api/util/string_processing/yaml.py +++ b/api/src/nv_ingest_api/util/string_processing/yaml.py @@ -41,9 +41,9 @@ def _replacer(match: re.Match) -> str: var_name = match.group("braced") or match.group("named") default_val = match.group("braced_default") or match.group("named_default") - # First try the primary env var + # First try the primary env var (treat empty string as unset so default is used) value = os.environ.get(var_name) - if value is not None: + if value is not None and value != "": return _quote_if_needed(value) # If primary is missing, try the default. From 933f19cfae29ff49b6550d66d92f392b0e42dfd6 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Mon, 2 Mar 2026 16:37:28 +0000 Subject: [PATCH 08/13] Linting --- api/api_tests/util/string_processing/test_yaml.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/api/api_tests/util/string_processing/test_yaml.py b/api/api_tests/util/string_processing/test_yaml.py index 8b91e3be4..bd19491cd 100644 --- a/api/api_tests/util/string_processing/test_yaml.py +++ b/api/api_tests/util/string_processing/test_yaml.py @@ -56,7 +56,8 @@ def test_empty_string_treated_as_unset_uses_default(self): """When primary var is set but empty, treat as unset and use default (avoids invalid YAML in lists).""" with patch.dict(os.environ, {"YOLOX_HTTP_ENDPOINT": ""}, clear=True): result = substitute_env_vars_in_yaml_content( - 'yolox_endpoints: [$GRPC|"page-elements:8001", $YOLOX_HTTP_ENDPOINT|"http://page-elements:8000/v1/infer"]' + "yolox_endpoints: " + '[$GRPC|"page-elements:8001", $YOLOX_HTTP_ENDPOINT|"http://page-elements:8000/v1/infer"]' ) assert result == 'yolox_endpoints: ["page-elements:8001", "http://page-elements:8000/v1/infer"]' From c020eddcd3902ffe2d0e365ac2491f2e0e673068 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Mon, 2 Mar 2026 17:48:36 +0000 Subject: [PATCH 09/13] Try setting only pull_request_target trigger --- .github/workflows/ci-pull-request.yml | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/.github/workflows/ci-pull-request.yml b/.github/workflows/ci-pull-request.yml index 51c3c23da..3f863e18d 100644 --- a/.github/workflows/ci-pull-request.yml +++ b/.github/workflows/ci-pull-request.yml @@ -1,7 +1,7 @@ name: Pull Request Validation on: - pull_request: + pull_request_target: types: - opened - synchronize @@ -9,7 +9,7 @@ on: - labeled concurrency: - group: ${{ github.workflow }}-pr-${{ github.event.pull_request.number }} + group: ${{ github.workflow }}-pr-${{ github.event.pull_request_target.number }} cancel-in-progress: true jobs: @@ -32,7 +32,7 @@ jobs: with: platform: 'linux/amd64' target: 'test' - tags: 'nv-ingest:pr-${{ github.event.pull_request.number }}' + tags: 'nv-ingest:pr-${{ github.event.pull_request_target.number }}' base-image: 'ubuntu' base-image-tag: 'jammy-20250415.1' test-selection: 'full' @@ -45,6 +45,7 @@ jobs: # Library mode integration tests (requires approval for external contributors) library-mode-check-access: name: Check Library Mode Test Access + if: github.event_name == 'pull_request_target' runs-on: ubuntu-latest outputs: has-access: ${{ steps.check.outputs.has-access }} @@ -53,10 +54,10 @@ jobs: id: check run: | HAS_ACCESS="false" - if [[ "${{ github.event.pull_request.author_association }}" == "MEMBER" ]] || \ - [[ "${{ github.event.pull_request.author_association }}" == "COLLABORATOR" ]] || \ - [[ "${{ github.event.pull_request.author_association }}" == "OWNER" ]] || \ - [[ "${{ contains(github.event.pull_request.labels.*.name, 'ok-to-test') }}" == "true" ]]; then + if [[ "${{ github.event.pull_request_target.author_association }}" == "MEMBER" ]] || \ + [[ "${{ github.event.pull_request_target.author_association }}" == "COLLABORATOR" ]] || \ + [[ "${{ github.event.pull_request_target.author_association }}" == "OWNER" ]] || \ + [[ "${{ contains(github.event.pull_request_target.labels.*.name, 'ok-to-test') }}" == "true" ]]; then HAS_ACCESS="true" fi echo "has-access=$HAS_ACCESS" >> $GITHUB_OUTPUT @@ -64,12 +65,14 @@ jobs: library-mode-test: name: Test Library Mode needs: library-mode-check-access - if: needs.library-mode-check-access.outputs.has-access == 'true' + if: | + github.event_name == 'pull_request_target' && + needs.library-mode-check-access.outputs.has-access == 'true' uses: ./.github/workflows/reusable-integration-test.yml with: runner: 'ubuntu-latest' timeout-minutes: 60 - source-ref: ${{ github.event.pull_request.head.sha }} + source-ref: ${{ github.event.pull_request_target.head.sha }} secrets: AUDIO_FUNCTION_ID: ${{ secrets.AUDIO_FUNCTION_ID }} EMBEDDING_NIM_MODEL_NAME: ${{ secrets.EMBEDDING_NIM_MODEL_NAME }} From 552f84e3830ee02e81ae4f666de23d0c653102fe Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Mon, 2 Mar 2026 19:17:15 +0000 Subject: [PATCH 10/13] Bump ci From bebe16cbdb6e405b0ed076c304425c25b14d1581 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Mon, 2 Mar 2026 20:03:31 +0000 Subject: [PATCH 11/13] Bump ci From c5b56e6f3bf6ec1372e93e90c18ed42cec32e6d3 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Mon, 2 Mar 2026 20:24:55 +0000 Subject: [PATCH 12/13] Bump ci From 95f98b371d73d4eeef3df5b1796f858bab938d26 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Tue, 3 Mar 2026 13:52:36 +0000 Subject: [PATCH 13/13] Undo retriever unit tests diff --- .github/workflows/retriever-unit-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/retriever-unit-tests.yml b/.github/workflows/retriever-unit-tests.yml index 0f52addcc..14eb87daf 100644 --- a/.github/workflows/retriever-unit-tests.yml +++ b/.github/workflows/retriever-unit-tests.yml @@ -3,7 +3,7 @@ name: Retriever Unit Tests on: push: branches: - - main + - "**" pull_request: jobs: