diff --git a/.github/workflows/rocm-ci-dispatch.yml b/.github/workflows/rocm-ci-dispatch.yml new file mode 100644 index 000000000..e78270f8c --- /dev/null +++ b/.github/workflows/rocm-ci-dispatch.yml @@ -0,0 +1,200 @@ +# Copyright (c) 2024-2026, Advanced Micro Devices, Inc. All rights reserved. +# +# See LICENSE for license information. + +name: PR Automatic CI + +on: + pull_request: + branches: + - 'dev' + - 'release_v2.*_rocm' + types: [ labeled, unlabeled, synchronize, reopened ] + +permissions: + contents: read + actions: write + +jobs: + determine_level: + runs-on: ubuntu-latest + outputs: + test_level: ${{ steps.set_level.outputs.test_level }} + current_level: ${{ steps.set_level.outputs.current_level }} + should_cancel_others: ${{ steps.set_level.outputs.should_cancel_others }} + steps: + - name: Determine CI dispatch from labels + id: set_level + uses: actions/github-script@v7 + with: + script: | + const parseLevelLabel = (labelName) => { + const label = (labelName || '').toLowerCase(); + if (label === 'ci-level 3') return 3; + if (label === 'ci-level 2') return 2; + if (label === 'ci-level 1') return 1; + return 0; + }; + + const labels = (context.payload.pull_request.labels || []) + .map(label => label.name.toLowerCase()); + const action = context.payload.action; + + // Determine if a CI level label was removed or added, and what level it was + const addedLevel = action === 'labeled' ? parseLevelLabel(context.payload.label?.name) : 0; + const removedLevel = action === 'unlabeled' ? parseLevelLabel(context.payload.label?.name) : 0; + + // Determine the current highest CI level from remaining labels + let level = ''; + if (labels.includes('ci-level 3')) level = '3'; + else if (labels.includes('ci-level 2')) level = '2'; + else if (labels.includes('ci-level 1')) level = '1'; + + // Check if the removed level was higher than the current level + const currentLevel = level ? Number(level) : 0; + const removedWasHighest = removedLevel > currentLevel; + + let requires_dispatch = currentLevel > 0; + if (action === 'labeled') { + // Only dispatch when a CI-level label was added. + requires_dispatch &&= addedLevel > 0; + } else if (action === 'unlabeled') { + // Only dispatch downgrade when the removed label was the highest, + // and the now-highest level (if any) does not have a completed run. + requires_dispatch &&= removedWasHighest; + } + // For unlabeling, only dispatch downgrade if the removed level was highest + // and the now-highest remaining level is not already satisfied for this commit. + const prNumber = context.payload.pull_request.number; + const headSha = context.payload.pull_request.head.sha; + const owner = context.repo.owner; + const repo = context.repo.repo; + + if (action === 'unlabeled' && requires_dispatch) { + const runs = await github.paginate(github.rest.actions.listWorkflowRuns, { + owner, + repo, + workflow_id: 'rocm-ci-dispatch.yml', + event: 'pull_request', + per_page: 100, + }); + + const nextHighestAlreadyCompleted = runs.some(run => { + const prMatch = (run.pull_requests || []).some(pr => pr.number === prNumber); + const sameSha = run.head_sha === headSha; + const completed = run.status === 'completed'; + const title = run.display_title || ''; + const match = title.match(/CI Level (\d+)/i); + const runLevel = match ? Number(match[1]) : 0; + return prMatch && sameSha && completed && runLevel >= currentLevel; + }); + + if (nextHighestAlreadyCompleted) { + requires_dispatch = false; + } + } + let test_level = requires_dispatch ? level : ''; + core.setOutput('test_level', test_level); + core.setOutput('current_level', String(currentLevel)); + const shouldCancelOthers = (test_level !== '') || (action === 'unlabeled' && removedWasHighest); + core.setOutput('should_cancel_others', shouldCancelOthers ? 'true' : 'false'); + + cancel_others: + # Run this job if we might need to cancel running workflows, which happens when: + # - We determined that we need to dispatch a workflow + # - The highest CI-level label was removed + if: ${{ needs.determine_level.outputs.should_cancel_others == 'true' }} + needs: determine_level + runs-on: ubuntu-latest + steps: + - name: Cancel queued/in-progress runs + uses: actions/github-script@v7 + env: + CURRENT_LEVEL: ${{ needs.determine_level.outputs.current_level }} + with: + script: | + const action = context.payload.action; + if (action === 'reopened') { + return; + } + + const parseLevelLabel = (labelName) => { + const label = (labelName || '').toLowerCase(); + if (label === 'ci-level 3') return 3; + if (label === 'ci-level 2') return 2; + if (label === 'ci-level 1') return 1; + return 0; + }; + + // Cancel other runs for the same PR that are queued or + // in progress and have a lower CI level + const prNumber = context.payload.pull_request.number; + const owner = context.repo.owner; + const repo = context.repo.repo; + const currentRunId = context.runId; + const currentLevel = Number(process.env.CURRENT_LEVEL || 0); + // If a label was added, determine its level to compare against other runs + const addedLevel = action === 'labeled' ? parseLevelLabel(context.payload.label?.name) : 0; + const removedLevel = action === 'unlabeled' ? parseLevelLabel(context.payload.label?.name) : 0; + + // Fetch all workflow runs for this workflow and PR + const runs = await github.rest.actions.listWorkflowRuns({ + owner, + repo, + workflow_id: 'rocm-ci-dispatch.yml', + event: 'pull_request', + per_page: 100, + }); + + // Filter runs to find those that are for the same PR, + // are queued or in progress, and have a lower CI level + const toCancel = runs.data.workflow_runs.filter(run => { + const prMatch = (run.pull_requests || []).some(pr => pr.number === prNumber); + const active = run.status === 'queued' || run.status === 'in_progress'; + // In general, don't cancel yourself or inactive/irrelevant PRs + if (run.id === currentRunId || !(prMatch && active)) { + return false; + } + // Always cancel others when updating commits + if (action === 'synchronize') { + return true; + } + // For labeled/unlabeled actions, compare the CI levels + const title = run.display_title || ''; + const match = title.match(/CI Level (\d+)/i); + const runLevel = match ? Number(match[1]) : 0; + // If a label was added, cancel runs with a lower level than the added label + if (action === 'labeled') { + return runLevel < addedLevel; + } + // If last CI label was removed, cancel all active CI-level runs. + if (action === 'unlabeled' && currentLevel === 0 && removedLevel > 0) { + return runLevel > 0; + } + // If a label was removed, cancel runs with a lower level than the current level + return runLevel < currentLevel; + }); + + // Cancel the identified runs + for (const run of toCancel) { + core.info(`Canceling dispatch run ${run.id} for PR #${prNumber}`); + await github.rest.actions.cancelWorkflowRun({ + owner, + repo, + run_id: run.id, + }); + } + + dispatch: + # Run this job if there is a valid level to test, which requires + # that any of the following are true: + # - A ci-level label was added + # - A commit was pushed with existing ci-level label(s) + # - The PR was reopened with existing ci-level label(s) + # - The highest ci-level label was removed with remaining ci-level label(s) + if: ${{ needs.determine_level.outputs.test_level != '' }} + needs: [determine_level, cancel_others] + name: CI Level ${{ needs.determine_level.outputs.test_level }} + uses: ./.github/workflows/rocm-ci.yml + with: + test_level: ${{ needs.determine_level.outputs.test_level }} diff --git a/.github/workflows/rocm-ci.yml b/.github/workflows/rocm-ci.yml index b8a08bfca..25541c18c 100644 --- a/.github/workflows/rocm-ci.yml +++ b/.github/workflows/rocm-ci.yml @@ -2,19 +2,29 @@ # # See LICENSE for license information. -name: TransformerEngine CI +name: Build and Test Branch on: push: branches: - 'dev' - - 'release_v1.*_rocm' - 'release_v2.*_rocm' - pull_request: - branches: - - 'dev' - - 'release_v1.**_rocm' - - 'release_v2.**_rocm' + workflow_call: + inputs: + test_level: + description: 'Test Level (1-3)' + required: false + default: '1' + type: string + docker_image_override: + description: 'Manual Docker Image (Leave empty to use config file value)' + required: false + type: string + test_config_from_source: + description: 'DEBUG: Use config.json from current source branch instead of dev' + required: false + default: false + type: boolean workflow_dispatch: inputs: test_level: @@ -32,11 +42,11 @@ on: concurrency: group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true + cancel-in-progress: ${{ github.event_name != 'workflow_call' }} jobs: build_and_test: - name: Build and Test on GPU (${{ matrix.runner }}) + name: Build and Test on GPU (${{ matrix.runner }}) - Level ${{ inputs.test_level || '1' }} timeout-minutes: 720 runs-on: ${{ matrix.runner }} strategy: