Skip to content

Run Sweep - Add MiniMax-M2.5 FP8 vLLM benchmark for B200 #1131

Run Sweep - Add MiniMax-M2.5 FP8 vLLM benchmark for B200

Run Sweep - Add MiniMax-M2.5 FP8 vLLM benchmark for B200 #1131

Workflow file for this run

name: "Run Sweep"
run-name: Run Sweep - ${{ github.event.pull_request.title || github.event.head_commit.message }}
concurrency:
group: sweep-${{ github.event.pull_request.number || github.sha }}
cancel-in-progress: true
on:
push:
branches:
- main
paths:
- "perf-changelog.yaml"
pull_request:
branches:
- main
types:
- ready_for_review
- synchronize
- labeled
paths:
- "perf-changelog.yaml"
jobs:
check-newline:
runs-on: ubuntu-latest
if: github.event_name == 'pull_request' && !github.event.pull_request.draft
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Check perf-changelog.yaml ends with newline
run: |
if [ -n "$(tail -c 1 perf-changelog.yaml)" ]; then
echo "::error::perf-changelog.yaml must end with a newline character"
echo "Please add a newline at the end of the file to avoid diff issues in subsequent PRs."
exit 1
fi
setup:
runs-on: ubuntu-latest
if: >-
(github.event_name == 'pull_request' && !github.event.pull_request.draft && contains(github.event.pull_request.labels.*.name, 'sweep-enabled')) ||
(github.event_name != 'pull_request' && !contains(github.event.head_commit.message, '[skip-sweep]'))
outputs:
search-space-config: ${{ steps.setup.outputs.search-space-config }}
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
fetch-depth: 0
- id: setup
run: |
pip install pydantic
if [ "${{ github.event_name }}" == "pull_request" ]; then
BASE_REF="origin/${{ github.base_ref }}"
HEAD_REF="${{ github.event.pull_request.head.sha }}"
else
BASE_REF="${{ github.event.before }}"
HEAD_REF="${{ github.event.after }}"
fi
CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/process_changelog.py \
--changelog-file ${GITHUB_WORKSPACE}/perf-changelog.yaml \
--base-ref "$BASE_REF" \
--head-ref "$HEAD_REF")
echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT
sweep-multi-node-1k1k:
needs: setup
if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k']) != 'null' }}
uses: ./.github/workflows/benchmark-multinode-tmpl.yml
name: multi-node 1k1k /
strategy:
fail-fast: false
matrix:
config: ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k'] }}
secrets: inherit
with: &multi-node-inputs
isl: ${{ matrix.config.isl }}
osl: ${{ matrix.config.osl }}
max-model-len: ${{ matrix.config.max-model-len }}
runner: ${{ matrix.config.runner }}
image: ${{ matrix.config.image }}
model: ${{ matrix.config.model }}
model-prefix: ${{ matrix.config.model-prefix }}
framework: ${{ matrix.config.framework }}
precision: ${{ matrix.config.precision }}
exp-name: ${{ matrix.config.exp-name }}
conc-list: ${{ toJson(matrix.config.conc) }}
spec-decoding: ${{ matrix.config.spec-decoding }}
disagg: ${{ matrix.config.disagg }}
prefill-num-worker: ${{ matrix.config.prefill.num-worker }}
prefill-tp: ${{ matrix.config.prefill.tp }}
prefill-ep: ${{ matrix.config.prefill.ep }}
prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }}
prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }}
decode-num-worker: ${{ matrix.config.decode.num-worker }}
decode-tp: ${{ matrix.config.decode.tp }}
decode-ep: ${{ matrix.config.decode.ep }}
decode-dp-attn: ${{ matrix.config.decode.dp-attn }}
decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }}
sweep-multi-node-1k8k:
needs: setup
if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['1k8k']) != 'null' }}
uses: ./.github/workflows/benchmark-multinode-tmpl.yml
name: multi-node 1k8k /
strategy:
fail-fast: false
matrix:
config: ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['1k8k'] }}
secrets: inherit
with: *multi-node-inputs
sweep-multi-node-8k1k:
needs: setup
if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['8k1k']) != 'null' }}
uses: ./.github/workflows/benchmark-multinode-tmpl.yml
name: multi-node 8k1k /
strategy:
fail-fast: false
matrix:
config: ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['8k1k'] }}
secrets: inherit
with: *multi-node-inputs
sweep-single-node-1k1k:
needs: setup
if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).single_node['1k1k']) != 'null' }}
uses: ./.github/workflows/benchmark-tmpl.yml
name: single-node 1k1k /
strategy:
fail-fast: false
matrix:
config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k1k'] }}
secrets: inherit
with: &single-node-inputs
exp-name: ${{ matrix.config.exp-name }}
isl: ${{ matrix.config.isl }}
osl: ${{ matrix.config.osl }}
max-model-len: ${{ matrix.config.max-model-len }}
runner: ${{ matrix.config.runner }}
image: ${{ matrix.config.image }}
model: ${{ matrix.config.model }}
model-prefix: ${{ matrix.config.model-prefix }}
framework: ${{ matrix.config.framework }}
precision: ${{ matrix.config.precision }}
tp: ${{ matrix.config.tp }}
ep: ${{ matrix.config.ep }}
dp-attn: ${{ matrix.config.dp-attn }}
conc: ${{ matrix.config.conc }}
spec-decoding: ${{ matrix.config.spec-decoding }}
disagg: ${{ matrix.config.disagg }}
run-eval: ${{ matrix.config.run-eval }}
sweep-single-node-1k8k:
needs: setup
if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).single_node['1k8k']) != 'null' }}
uses: ./.github/workflows/benchmark-tmpl.yml
name: single-node 1k8k /
strategy:
fail-fast: false
matrix:
config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k8k'] }}
secrets: inherit
with: *single-node-inputs
sweep-single-node-8k1k:
needs: setup
if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).single_node['8k1k']) != 'null' }}
uses: ./.github/workflows/benchmark-tmpl.yml
name: single-node 8k1k /
strategy:
fail-fast: false
matrix:
config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['8k1k'] }}
secrets: inherit
with: *single-node-inputs
collect-results:
needs:
[
sweep-single-node-1k1k,
sweep-single-node-1k8k,
sweep-single-node-8k1k,
sweep-multi-node-1k1k,
sweep-multi-node-1k8k,
sweep-multi-node-8k1k,
setup,
]
if: ${{ always() && needs.setup.result != 'skipped' }}
uses: ./.github/workflows/collect-results.yml
secrets: inherit
with:
result-prefix: "bmk"
collect-evals:
needs:
[
sweep-single-node-1k1k,
sweep-single-node-1k8k,
sweep-single-node-8k1k,
sweep-multi-node-1k1k,
sweep-multi-node-1k8k,
sweep-multi-node-8k1k,
setup,
]
if: ${{ always() && needs.setup.result != 'skipped' }}
uses: ./.github/workflows/collect-evals.yml
secrets: inherit
upload-changelog-metadata:
needs: [setup, collect-results]
if: ${{ always() && needs.setup.result != 'skipped' }}
runs-on: ubuntu-latest
steps:
- name: Extract and save changelog metadata
env:
CONFIG_JSON: ${{ needs.setup.outputs.search-space-config }}
run: |
echo "$CONFIG_JSON" | jq '.changelog_metadata' > changelog_metadata.json
- name: Upload changelog artifact
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
with:
name: changelog-metadata
path: changelog_metadata.json
calc-success-rate:
needs: collect-results
if: ${{ always() && needs.collect-results.result != 'skipped'}}
runs-on: ubuntu-latest
env:
RESULTS_DIR: "results/"
STATS_FILENAME: "run_stats"
GITHUB_TOKEN: ${{ secrets.REPO_PAT }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
token: ${{ secrets.REPO_PAT }}
fetch-depth: 0
- name: Download results artifacts
uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0
with:
path: ${{ env.RESULTS_DIR }}
pattern: results_*
- name: Install python dependencies
run: pip install PyGithub
- name: Calculate success rate
run: python3 utils/calc_success_rate.py $STATS_FILENAME
- uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
with:
name: "run-stats"
path: ${{ env.STATS_FILENAME }}.json
trigger-ingest:
needs:
[
collect-results,
collect-evals,
calc-success-rate,
upload-changelog-metadata,
]
if: >-
always() &&
github.event_name == 'push' &&
github.ref == 'refs/heads/main' &&
(needs.collect-results.result != 'skipped' || needs.collect-evals.result != 'skipped')
runs-on: ubuntu-latest
steps:
- name: Trigger database ingest
run: |
curl -sSf -X POST \
-H "Authorization: Bearer ${{ secrets.INFX_FRONTEND_PAT }}" \
-H "Accept: application/vnd.github+v3+json" \
https://api.github.com/repos/SemiAnalysisAI/inferencemax-app/dispatches \
-d '{
"event_type": "ingest-results",
"client_payload": {
"run-id": "${{ github.run_id }}",
"run-attempt": "${{ github.run_attempt }}"
}
}'