Skip to content

e2e Test - Test H200 DGXC data dir fix (PR #902) - dsr1 sglang #1063

e2e Test - Test H200 DGXC data dir fix (PR #902) - dsr1 sglang

e2e Test - Test H200 DGXC data dir fix (PR #902) - dsr1 sglang #1063

Workflow file for this run

name: End-to-End Tests
run-name: e2e Test - ${{ inputs.test-name || inputs.generate-cli-command || github.event.inputs.generate-cli-command }}
on:
workflow_dispatch:
inputs:
generate-cli-command:
description: "Command passed to generate matrix script"
required: true
type: string
test-name:
description: "Name for this test run"
required: false
type: string
ref:
description: "Ref (branch/sha) to checkout for generating configs"
required: false
type: string
workflow_call:
inputs:
generate-cli-command:
description: "Command passed to generate matrix script"
required: true
type: string
test-name:
description: "Name for this test run"
required: false
type: string
ref:
description: "Ref (branch/sha) to checkout for generating configs"
required: false
type: string
jobs:
get-jobs:
runs-on: ubuntu-latest
outputs:
single-node-config: ${{ steps.get-jobs.outputs.single-node-config }}
multi-node-config: ${{ steps.get-jobs.outputs.multi-node-config }}
steps:
- name: Checkout code (ref)
if: ${{ inputs.ref && inputs.ref != '' }}
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
ref: ${{ inputs.ref }}
- name: Checkout code (default)
if: ${{ !inputs.ref || inputs.ref == '' }}
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- id: get-jobs
run: |
pip install pydantic
CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix_logic/generate_sweep_configs.py \
${{ inputs.generate-cli-command || github.event.inputs.generate-cli-command }})
SINGLE=$(echo "$CONFIG_JSON" | python3 -c "import sys,json; d=json.load(sys.stdin); print(json.dumps([x for x in d if 'prefill' not in x]))")
MULTI=$(echo "$CONFIG_JSON" | python3 -c "import sys,json; d=json.load(sys.stdin); print(json.dumps([x for x in d if 'prefill' in x]))")
echo "single-node-config=$SINGLE" >> $GITHUB_OUTPUT
echo "multi-node-config=$MULTI" >> $GITHUB_OUTPUT
test-sweep-multi-node:
needs: get-jobs
if: ${{ needs.get-jobs.outputs.multi-node-config != '[]' }}
uses: ./.github/workflows/benchmark-multinode-tmpl.yml
name: multi-node /
strategy:
fail-fast: false
matrix:
config: ${{ fromJson(needs.get-jobs.outputs.multi-node-config) }}
secrets: inherit
with:
isl: ${{ matrix.config.isl }}
osl: ${{ matrix.config.osl }}
max-model-len: ${{ matrix.config.max-model-len }}
runner: ${{ matrix.config.runner }}
image: ${{ matrix.config.image }}
model: ${{ matrix.config.model }}
model-prefix: ${{ matrix.config.model-prefix }}
framework: ${{ matrix.config.framework }}
precision: ${{ matrix.config.precision }}
exp-name: ${{ matrix.config.exp-name }}
conc-list: ${{ toJson(matrix.config.conc) }}
spec-decoding: ${{ matrix.config.spec-decoding }}
disagg: ${{ matrix.config.disagg }}
prefill-num-worker: ${{ matrix.config.prefill.num-worker }}
prefill-tp: ${{ matrix.config.prefill.tp }}
prefill-ep: ${{ matrix.config.prefill.ep }}
prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }}
prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }}
decode-num-worker: ${{ matrix.config.decode.num-worker }}
decode-tp: ${{ matrix.config.decode.tp }}
decode-ep: ${{ matrix.config.decode.ep }}
decode-dp-attn: ${{ matrix.config.decode.dp-attn }}
decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }}
ref: ${{ inputs.ref }}
test-sweep-single-node:
needs: get-jobs
if: ${{ needs.get-jobs.outputs.single-node-config != '[]' }}
uses: ./.github/workflows/benchmark-tmpl.yml
name: single-node /
strategy:
fail-fast: false
matrix:
config: ${{ fromJson(needs.get-jobs.outputs.single-node-config) }}
secrets: inherit
with:
exp-name: ${{ matrix.config.exp-name }}
isl: ${{ matrix.config.isl }}
osl: ${{ matrix.config.osl }}
max-model-len: ${{ matrix.config.max-model-len }}
runner: ${{ matrix.config.runner }}
image: ${{ matrix.config.image }}
model: ${{ matrix.config.model }}
model-prefix: ${{ matrix.config.model-prefix }}
framework: ${{ matrix.config.framework }}
precision: ${{ matrix.config.precision }}
tp: ${{ matrix.config.tp }}
ep: ${{ matrix.config.ep }}
dp-attn: ${{ matrix.config.dp-attn }}
conc: ${{ matrix.config.conc }}
spec-decoding: ${{ matrix.config.spec-decoding }}
disagg: ${{ matrix.config.disagg }}
run-eval: ${{ matrix.config.run-eval }}
ref: ${{ inputs.ref }}
collect-results:
needs: [test-sweep-multi-node, test-sweep-single-node]
if: ${{ always() }}
uses: ./.github/workflows/collect-results.yml
secrets: inherit
with:
result-prefix: "bmk"
collect-evals:
needs: [test-sweep-multi-node, test-sweep-single-node]
if: ${{ always() }}
uses: ./.github/workflows/collect-evals.yml
secrets: inherit
calc-success-rate:
needs: [collect-results, collect-evals]
if: ${{ always() }}
runs-on: ubuntu-latest
env:
RESULTS_DIR: "results/"
STATS_FILENAME: "run_stats"
GITHUB_TOKEN: ${{ secrets.REPO_PAT }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
token: ${{ secrets.REPO_PAT }}
fetch-depth: 0
- name: Download results artifacts
uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0
with:
path: ${{ env.RESULTS_DIR }}
pattern: results_*
- name: Install python dependencies
run: pip install PyGithub
- name: Calculate success rate
run: python3 utils/calc_success_rate.py $STATS_FILENAME
- uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
with:
name: "run-stats"
path: ${{ env.STATS_FILENAME }}.json