Multi-Turn Sweep - tp=[1, 2, 4, 8] users=[8, 16, 32, 64, 128, 256, 512, 1024, 2048] offload=["on", "off"] #26
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Multi-Turn Benchmark Sweep | |
| run-name: "Multi-Turn Sweep - tp=${{ inputs.tp_values }} users=${{ inputs.user_values }} offload=${{ inputs.offload_values }}" | |
| on: | |
| # push: | |
| # branches: | |
| # - experimental/multi-turn-benchmark | |
| # paths: | |
| # - .github/workflows/multiturn-sweep.yml | |
| workflow_dispatch: | |
| inputs: | |
| tp_values: | |
| description: 'TP sizes (JSON array)' | |
| required: true | |
| default: '[1, 2, 4, 8]' | |
| type: string | |
| user_values: | |
| description: 'Concurrent user counts (JSON array)' | |
| required: true | |
| default: '[8, 16, 32, 64, 128, 256, 512, 1024, 2048]' | |
| type: string | |
| offload_values: | |
| description: 'Offload modes (JSON array: on/off/noprefix)' | |
| required: true | |
| default: '["on", "off", "noprefix"]' | |
| type: string | |
| duration: | |
| description: 'Benchmark duration in seconds' | |
| required: true | |
| default: '300' | |
| type: string | |
| request_rate: | |
| description: 'Request rate per client (Poisson, req/s). 0 = no delay.' | |
| required: false | |
| default: '0' | |
| type: string | |
| total_cpu_dram_gb: | |
| description: 'Total CPU DRAM for KV offload (GB)' | |
| required: true | |
| default: '100' | |
| type: string | |
| image: | |
| description: 'Container image' | |
| required: true | |
| default: 'vllm/vllm-openai:v0.16.0' | |
| type: string | |
| model: | |
| description: 'Model name' | |
| required: true | |
| default: 'nvidia/Llama-3.3-70B-Instruct-FP4' | |
| type: string | |
| ref: | |
| description: 'Git ref (branch/sha) to checkout' | |
| required: false | |
| type: string | |
| jobs: | |
| # --------------------------------------------------------------------------- | |
| # Matrix benchmark jobs — each cell calls the multiturn template | |
| # --------------------------------------------------------------------------- | |
| sweep: | |
| uses: ./.github/workflows/benchmark-multiturn-tmpl.yml | |
| name: sweep / | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| tp: ${{ fromJson(inputs.tp_values) }} | |
| users: ${{ fromJson(inputs.user_values) }} | |
| offload: ${{ fromJson(inputs.offload_values) }} | |
| secrets: inherit | |
| with: | |
| runner: b200 | |
| image: ${{ inputs.image }} | |
| model: ${{ inputs.model }} | |
| exp-name: "multiturn_tp${{ matrix.tp }}_users${{ matrix.users }}_offload${{ matrix.offload }}" | |
| tp: "${{ matrix.tp }}" | |
| users: "${{ matrix.users }}" | |
| offload-mode: ${{ matrix.offload }} | |
| duration: ${{ inputs.duration }} | |
| request-rate: ${{ inputs.request_rate }} | |
| total-cpu-dram-gb: ${{ inputs.total_cpu_dram_gb }} | |
| ref: ${{ inputs.ref }} | |
| # --------------------------------------------------------------------------- | |
| # Collect & aggregate results | |
| # --------------------------------------------------------------------------- | |
| collect: | |
| runs-on: ubuntu-latest | |
| needs: sweep | |
| if: always() | |
| name: Collect results | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| token: ${{ secrets.REPO_PAT }} | |
| fetch-depth: 1 | |
| ref: ${{ inputs.ref || github.ref }} | |
| - uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.11' | |
| - name: Install dependencies | |
| run: pip install pandas matplotlib numpy | |
| - name: Download all artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| pattern: 'multiturn_*' | |
| path: results/ | |
| - name: Run aggregation | |
| run: | | |
| python experimental/multiturn/vllm_benchmark/scripts/collect_sweep_results.py results/ aggregated/ | |
| - name: Upload aggregated results | |
| uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 | |
| with: | |
| name: multiturn_aggregated | |
| path: aggregated/ |