From 55984c0cf6ada85b7d0b48519bb92ab41d108c12 Mon Sep 17 00:00:00 2001 From: Mohamed Awnallah Date: Mon, 14 Jul 2025 16:36:52 +0000 Subject: [PATCH 1/5] .github+sdks: run ml tests that require non self-hosted env --- .github/workflows/beam_PostCommit_Python.yml | 10 +++++++-- .../workflows/beam_PreCommit_Python_ML.yml | 11 +++++++--- sdks/python/pytest.ini | 1 + sdks/python/test-suites/direct/common.gradle | 22 +++++++++++++++++++ 4 files changed, 39 insertions(+), 5 deletions(-) diff --git a/.github/workflows/beam_PostCommit_Python.yml b/.github/workflows/beam_PostCommit_Python.yml index 2a98ccb0efb0..6a452494628b 100644 --- a/.github/workflows/beam_PostCommit_Python.yml +++ b/.github/workflows/beam_PostCommit_Python.yml @@ -54,7 +54,7 @@ env: jobs: beam_PostCommit_Python: name: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) - runs-on: [self-hosted, ubuntu-20.04, highmem22] + runs-on: ${{ matrix.os }} timeout-minutes: 240 strategy: fail-fast: false @@ -62,6 +62,7 @@ jobs: job_name: [beam_PostCommit_Python] job_phrase: [Run Python PostCommit] python_version: ['3.9', '3.10', '3.11', '3.12'] + os: [[self-hosted, ubuntu-20.04, highmem22], [ubuntu-latest]] if: | github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request_target' || @@ -99,6 +100,11 @@ jobs: arguments: | -Pjava21Home=$JAVA_HOME_21_X64 \ -PuseWheelDistribution \ + -Pposargs="${{ + contains(matrix.os, 'self-hosted') && + '-m ''not require_non_self_hosted''' || + '-m ''require_non_self_hosted''' + }}" \ -PpythonVersion=${{ matrix.python_version }} \ env: CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} @@ -115,4 +121,4 @@ jobs: commit: '${{ env.prsha || env.GITHUB_SHA }}' comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/pytest*.xml' - large_files: true \ No newline at end of file + large_files: true diff --git a/.github/workflows/beam_PreCommit_Python_ML.yml b/.github/workflows/beam_PreCommit_Python_ML.yml index 50ae079d3db3..7faaa3b5ae6a 100644 --- a/.github/workflows/beam_PreCommit_Python_ML.yml +++ b/.github/workflows/beam_PreCommit_Python_ML.yml @@ -58,7 +58,7 @@ env: jobs: beam_PreCommit_Python_ML: name: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) - runs-on: [self-hosted, ubuntu-20.04, main] + runs-on: ${{ matrix.os }} timeout-minutes: 180 strategy: fail-fast: false @@ -66,6 +66,7 @@ jobs: job_name: ['beam_PreCommit_Python_ML'] job_phrase: ['Run Python_ML PreCommit'] python_version: ['3.9','3.10','3.11','3.12'] + os: [[self-hosted, ubuntu-20.04, main], [ubuntu-latest]] if: | github.event_name == 'push' || github.event_name == 'pull_request_target' || @@ -96,7 +97,11 @@ jobs: with: gradle-command: :sdks:python:test-suites:tox:py${{steps.set_py_ver_clean.outputs.py_ver_clean}}:testPy${{steps.set_py_ver_clean.outputs.py_ver_clean}}ML arguments: | - -Pposargs=apache_beam/ml/ \ + -Pposargs="${{ + contains(matrix.os, 'self-hosted') && + 'apache_beam/ml/ -m ''not require_non_self_hosted''' || + 'apache_beam/ml/ -m ''require_non_self_hosted''' + }}" \ -PpythonVersion=${{ matrix.python_version }} - name: Archive Python Test Results uses: actions/upload-artifact@v4 @@ -111,4 +116,4 @@ jobs: commit: '${{ env.prsha || env.GITHUB_SHA }}' comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/pytest*.xml' - large_files: true \ No newline at end of file + large_files: true diff --git a/sdks/python/pytest.ini b/sdks/python/pytest.ini index 2b53441927d9..a3223ca0a246 100644 --- a/sdks/python/pytest.ini +++ b/sdks/python/pytest.ini @@ -70,6 +70,7 @@ markers = uses_mock_api: tests that uses the mock API cluster. uses_feast: tests that uses feast in some way gemini_postcommit: gemini postcommits that need additional deps. + require_non_self_hosted: tests requiring specialized environments or infrastructure not present in self-hosted setups. # Default timeout intended for unit tests. # If certain tests need a different value, please see the docs on how to diff --git a/sdks/python/test-suites/direct/common.gradle b/sdks/python/test-suites/direct/common.gradle index 1dd15ecb09f9..cb81c6c8d672 100644 --- a/sdks/python/test-suites/direct/common.gradle +++ b/sdks/python/test-suites/direct/common.gradle @@ -419,6 +419,27 @@ task feastIntegrationTest { } } +// Integration tests that runs on non self-hosted environments. +task nonSelfHostedIntegrationTest { + dependsOn 'installGcpTest' + dependsOn ':sdks:python:sdist' + + doLast { + def testOpts = basicTestOpts + def argMap = [ + "test_opts": testOpts, + "suite": "postCommitIT-direct-py${pythonVersionSuffix}", + "collect": "require_non_self_hosted", + "runner": "TestDirectRunner", + ] + def cmdArgs = mapToArgString(argMap) + exec { + executable 'sh' + args '-c', ". ${envdir}/bin/activate && ${runScriptsDir}/run_integration_test.sh $cmdArgs" + } + } +} + // Add all the RunInference framework IT tests to this gradle task that runs on Direct Runner Post commit suite. project.tasks.register("inferencePostCommitIT") { dependsOn = [ @@ -429,6 +450,7 @@ project.tasks.register("inferencePostCommitIT") { 'transformersInferenceTest', 'testcontainersTest', 'feastIntegrationTest', + 'nonSelfHostedIntegrationTest', // (TODO) https://github.com/apache/beam/issues/25799 // uncomment tfx bsl tests once tfx supports protobuf 4.x // 'tfxInferenceTest', From 4c75ab8326f334353fb2ffdf6c5e84d5d0a33e4d Mon Sep 17 00:00:00 2001 From: Mohamed Awnallah Date: Mon, 14 Jul 2025 16:38:41 +0000 Subject: [PATCH 2/5] .github: trigger post commit python --- .github/trigger_files/beam_PostCommit_Python.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/trigger_files/beam_PostCommit_Python.json b/.github/trigger_files/beam_PostCommit_Python.json index 2934a91b84b1..13a309763b58 100644 --- a/.github/trigger_files/beam_PostCommit_Python.json +++ b/.github/trigger_files/beam_PostCommit_Python.json @@ -1,5 +1,5 @@ { "comment": "Modify this file in a trivial way to cause this test suite to run.", - "modification": 1 + "modification": 11 } From 7f9f38e6655e59a6aa54db7d8d35f18c96307221 Mon Sep 17 00:00:00 2001 From: Mohamed Awnallah Date: Mon, 14 Jul 2025 21:41:36 +0300 Subject: [PATCH 3/5] Update sdks/python/pytest.ini Co-authored-by: Danny McCormick --- sdks/python/pytest.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/pytest.ini b/sdks/python/pytest.ini index a3223ca0a246..868760b3888f 100644 --- a/sdks/python/pytest.ini +++ b/sdks/python/pytest.ini @@ -70,7 +70,7 @@ markers = uses_mock_api: tests that uses the mock API cluster. uses_feast: tests that uses feast in some way gemini_postcommit: gemini postcommits that need additional deps. - require_non_self_hosted: tests requiring specialized environments or infrastructure not present in self-hosted setups. + require_non_self_hosted: tests which can't be run on Beam's self hosted GitHub Actions runners. Usually this is because of issues with docker-in-docker or other environmental challenges. Context: https://github.com/apache/beam/pull/35585 # Default timeout intended for unit tests. # If certain tests need a different value, please see the docs on how to From 81319fb529a5afe2489143ad4f7f36cbc23ad4af Mon Sep 17 00:00:00 2001 From: Mohamed Awnallah Date: Mon, 14 Jul 2025 18:49:12 +0000 Subject: [PATCH 4/5] .github+sdks: add descriptive comments about runner changes --- .github/workflows/beam_PostCommit_Python.yml | 5 +++++ .github/workflows/beam_PreCommit_Python_ML.yml | 5 +++++ sdks/python/test-suites/direct/common.gradle | 7 ++++++- 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/.github/workflows/beam_PostCommit_Python.yml b/.github/workflows/beam_PostCommit_Python.yml index 6a452494628b..75d8df570296 100644 --- a/.github/workflows/beam_PostCommit_Python.yml +++ b/.github/workflows/beam_PostCommit_Python.yml @@ -62,6 +62,11 @@ jobs: job_name: [beam_PostCommit_Python] job_phrase: [Run Python PostCommit] python_version: ['3.9', '3.10', '3.11', '3.12'] + # Run on both self-hosted and GitHub-hosted runners. + # Some tests (marked require_non_self_hosted) can't run on Beam's + # self-hosted runners due to Docker-in-Docker or other environment + # limitations. These tests will only execute on ubuntu-latest + # (GitHub-hosted). Context: https://github.com/apache/beam/pull/35585 os: [[self-hosted, ubuntu-20.04, highmem22], [ubuntu-latest]] if: | github.event_name == 'workflow_dispatch' || diff --git a/.github/workflows/beam_PreCommit_Python_ML.yml b/.github/workflows/beam_PreCommit_Python_ML.yml index 7faaa3b5ae6a..dac177d79c17 100644 --- a/.github/workflows/beam_PreCommit_Python_ML.yml +++ b/.github/workflows/beam_PreCommit_Python_ML.yml @@ -66,6 +66,11 @@ jobs: job_name: ['beam_PreCommit_Python_ML'] job_phrase: ['Run Python_ML PreCommit'] python_version: ['3.9','3.10','3.11','3.12'] + # Run on both self-hosted and GitHub-hosted runners. + # Some tests (marked require_non_self_hosted) can't run on Beam's + # self-hosted runners due to Docker-in-Docker or other environment + # limitations. These tests will only execute on ubuntu-latest + # (GitHub-hosted). Context: https://github.com/apache/beam/pull/35585 os: [[self-hosted, ubuntu-20.04, main], [ubuntu-latest]] if: | github.event_name == 'push' || diff --git a/sdks/python/test-suites/direct/common.gradle b/sdks/python/test-suites/direct/common.gradle index cb81c6c8d672..770ad333f096 100644 --- a/sdks/python/test-suites/direct/common.gradle +++ b/sdks/python/test-suites/direct/common.gradle @@ -419,7 +419,12 @@ task feastIntegrationTest { } } -// Integration tests that runs on non self-hosted environments. +// Integration tests that must run on non self-hosted environments. +// These tests are marked with the `require_non_self_hosted` pytest marker +// because they rely on features like Docker-in-Docker or other system-level +// configurations that are not supported on Beam's self-hosted GitHub Actions +// runners. They will be executed on GitHub-hosted runners +// (e.g., ubuntu-latest). Context: https://github.com/apache/beam/pull/35585 task nonSelfHostedIntegrationTest { dependsOn 'installGcpTest' dependsOn ':sdks:python:sdist' From 1e03a43f6fcba17d20e6fd3bf29cf76deecf46a5 Mon Sep 17 00:00:00 2001 From: Mohamed Awnallah Date: Mon, 14 Jul 2025 20:09:42 +0000 Subject: [PATCH 5/5] workflows+gradle: use `require_docker_in_docker` marker --- .github/workflows/beam_PostCommit_Python.yml | 8 ++++---- .../workflows/beam_PreCommit_Python_ML.yml | 8 ++++---- sdks/python/pytest.ini | 2 +- sdks/python/test-suites/direct/common.gradle | 19 ++++++++++--------- 4 files changed, 19 insertions(+), 18 deletions(-) diff --git a/.github/workflows/beam_PostCommit_Python.yml b/.github/workflows/beam_PostCommit_Python.yml index 75d8df570296..546cb312fabe 100644 --- a/.github/workflows/beam_PostCommit_Python.yml +++ b/.github/workflows/beam_PostCommit_Python.yml @@ -63,10 +63,10 @@ jobs: job_phrase: [Run Python PostCommit] python_version: ['3.9', '3.10', '3.11', '3.12'] # Run on both self-hosted and GitHub-hosted runners. - # Some tests (marked require_non_self_hosted) can't run on Beam's - # self-hosted runners due to Docker-in-Docker or other environment - # limitations. These tests will only execute on ubuntu-latest - # (GitHub-hosted). Context: https://github.com/apache/beam/pull/35585 + # Some tests (marked require_docker_in_docker) can't run on Beam's + # self-hosted runners due to Docker-in-Docker environment constraint. + # These tests will only execute on ubuntu-latest (GitHub-hosted). + # Context: https://github.com/apache/beam/pull/35585 os: [[self-hosted, ubuntu-20.04, highmem22], [ubuntu-latest]] if: | github.event_name == 'workflow_dispatch' || diff --git a/.github/workflows/beam_PreCommit_Python_ML.yml b/.github/workflows/beam_PreCommit_Python_ML.yml index dac177d79c17..0dcdc88ef164 100644 --- a/.github/workflows/beam_PreCommit_Python_ML.yml +++ b/.github/workflows/beam_PreCommit_Python_ML.yml @@ -67,10 +67,10 @@ jobs: job_phrase: ['Run Python_ML PreCommit'] python_version: ['3.9','3.10','3.11','3.12'] # Run on both self-hosted and GitHub-hosted runners. - # Some tests (marked require_non_self_hosted) can't run on Beam's - # self-hosted runners due to Docker-in-Docker or other environment - # limitations. These tests will only execute on ubuntu-latest - # (GitHub-hosted). Context: https://github.com/apache/beam/pull/35585 + # Some tests (marked require_docker_in_docker) can't run on Beam's + # self-hosted runners due to Docker-in-Docker environment constraint. + # These tests will only execute on ubuntu-latest (GitHub-hosted). + # Context: https://github.com/apache/beam/pull/35585 os: [[self-hosted, ubuntu-20.04, main], [ubuntu-latest]] if: | github.event_name == 'push' || diff --git a/sdks/python/pytest.ini b/sdks/python/pytest.ini index 868760b3888f..cb244025812d 100644 --- a/sdks/python/pytest.ini +++ b/sdks/python/pytest.ini @@ -70,7 +70,7 @@ markers = uses_mock_api: tests that uses the mock API cluster. uses_feast: tests that uses feast in some way gemini_postcommit: gemini postcommits that need additional deps. - require_non_self_hosted: tests which can't be run on Beam's self hosted GitHub Actions runners. Usually this is because of issues with docker-in-docker or other environmental challenges. Context: https://github.com/apache/beam/pull/35585 + require_docker_in_docker: tests that require running Docker inside Docker (Docker-in-Docker), which is not supported on Beam’s self-hosted runners. Context: https://github.com/apache/beam/pull/35585 # Default timeout intended for unit tests. # If certain tests need a different value, please see the docs on how to diff --git a/sdks/python/test-suites/direct/common.gradle b/sdks/python/test-suites/direct/common.gradle index 770ad333f096..3ca4591bc16f 100644 --- a/sdks/python/test-suites/direct/common.gradle +++ b/sdks/python/test-suites/direct/common.gradle @@ -419,13 +419,14 @@ task feastIntegrationTest { } } -// Integration tests that must run on non self-hosted environments. -// These tests are marked with the `require_non_self_hosted` pytest marker -// because they rely on features like Docker-in-Docker or other system-level -// configurations that are not supported on Beam's self-hosted GitHub Actions -// runners. They will be executed on GitHub-hosted runners -// (e.g., ubuntu-latest). Context: https://github.com/apache/beam/pull/35585 -task nonSelfHostedIntegrationTest { +// Integration tests that require Docker-in-Docker capabilities. +// These tests are marked with the `require_docker_in_docker` pytest marker +// because they rely on Docker-in-Docker configurations that are not supported +// on Beam's self-hosted GitHub Actions runners. Docker-in-Docker works on +// ubuntu-latest GitHub-hosted runners but not on self-hosted environments due +// to containerization architecture differences. +// Context: https://github.com/apache/beam/pull/35585 +task dockerInDockerIntegrationTest { dependsOn 'installGcpTest' dependsOn ':sdks:python:sdist' @@ -434,7 +435,7 @@ task nonSelfHostedIntegrationTest { def argMap = [ "test_opts": testOpts, "suite": "postCommitIT-direct-py${pythonVersionSuffix}", - "collect": "require_non_self_hosted", + "collect": "require_docker_in_docker", "runner": "TestDirectRunner", ] def cmdArgs = mapToArgString(argMap) @@ -455,7 +456,7 @@ project.tasks.register("inferencePostCommitIT") { 'transformersInferenceTest', 'testcontainersTest', 'feastIntegrationTest', - 'nonSelfHostedIntegrationTest', + 'dockerInDockerIntegrationTest', // (TODO) https://github.com/apache/beam/issues/25799 // uncomment tfx bsl tests once tfx supports protobuf 4.x // 'tfxInferenceTest',