From 7f78fad74765c63cb4f62996d39e9d2c311d7273 Mon Sep 17 00:00:00 2001 From: Chun Fang Date: Wed, 25 Feb 2026 08:49:40 +0000 Subject: [PATCH 1/3] Patch aiter/sgl-kernel versions for MI300X FP8 DSR1 sglang benchmark - Pin aiter and sgl-kernel to specific commits required by the v0.5.8-rocm700-mi30x image. - This Patch should only work with Image lmsysorg/sglang:v0.5.8-rocm700-mi30x - A work with Zhentao Chen --- benchmarks/single_node/dsr1_fp8_mi300x.sh | 32 +++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/benchmarks/single_node/dsr1_fp8_mi300x.sh b/benchmarks/single_node/dsr1_fp8_mi300x.sh index 757b19dc5..f10fe9d1b 100644 --- a/benchmarks/single_node/dsr1_fp8_mi300x.sh +++ b/benchmarks/single_node/dsr1_fp8_mi300x.sh @@ -15,6 +15,38 @@ if [[ -n "$SLURM_JOB_ID" ]]; then echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME" fi +patch_sgl_components() { + local aiter_ref="9046b6f446e35c5d712ca092b9d84a5db8319ef8" + local sgl_kernel_ref="d40cb2f72551c4a597108dda16507e8188b666b7" + + if [[ ! -d /sgl-workspace ]]; then + echo "/sgl-workspace not found; assuming image ships correct versions." + return 0 + fi + + ( + set -e + + cd /sgl-workspace/aiter + git fetch && git checkout "$aiter_ref" + python setup.py develop + echo "aiter ($aiter_ref) installed." + + cd /sgl-workspace/sgl-kernel + git fetch && git checkout "$sgl_kernel_ref" + python setup_rocm.py install + echo "sgl-kernel ($sgl_kernel_ref) installed." + + cd /sgl-workspace/sglang + rm -f python/pyproject.toml + cp python/pyproject_other.toml python/pyproject.toml + pip install -e "python[all_hip]" + echo "sglang reinstalled." + ) +} +# Apply patch_sgl_components for lmsysorg/sglang:v0.5.8-rocm700-mi30x ONLY +patch_sgl_components + hf download "$MODEL" # Reference From db5eb71d8b1c2df60e039df3c7c9e5847a188523 Mon Sep 17 00:00:00 2001 From: Chun Fang Date: Wed, 25 Feb 2026 14:04:10 +0000 Subject: [PATCH 2/3] Fix aiter/sgl-kernel API mismatch causing cuda graph capture failure The previous aiter ref (9046b6f) changed get_mla_metadata_v1 to expect a Tensor for kv_last_page_lens, but the image's sglang still passed an int, crashing during cuda graph capture. Fix by fresh-cloning aiter at d2ca5a89, pinning sgl-kernel to 8bd6447 (now at sglang/sgl-kernel), and uninstalling stale packages before rebuilding to avoid leftover C extension conflicts. --- benchmarks/single_node/dsr1_fp8_mi300x.sh | 29 ++++++++++++++--------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/benchmarks/single_node/dsr1_fp8_mi300x.sh b/benchmarks/single_node/dsr1_fp8_mi300x.sh index f10fe9d1b..46e8fed35 100644 --- a/benchmarks/single_node/dsr1_fp8_mi300x.sh +++ b/benchmarks/single_node/dsr1_fp8_mi300x.sh @@ -16,28 +16,35 @@ if [[ -n "$SLURM_JOB_ID" ]]; then fi patch_sgl_components() { - local aiter_ref="9046b6f446e35c5d712ca092b9d84a5db8319ef8" - local sgl_kernel_ref="d40cb2f72551c4a597108dda16507e8188b666b7" + local work_dir="/sgl-workspace" + local aiter_ref="d2ca5a897" + local sgl_kernel_ref="8bd644765" - if [[ ! -d /sgl-workspace ]]; then - echo "/sgl-workspace not found; assuming image ships correct versions." + if [[ ! -d "$work_dir" ]]; then + echo "$work_dir not found; assuming image ships correct versions." return 0 fi ( set -e - cd /sgl-workspace/aiter - git fetch && git checkout "$aiter_ref" - python setup.py develop + pip uninstall sglang sgl-kernel amd-aiter -y + + cd "$work_dir" + rm -rf aiter + git clone --recursive https://github.com/ROCm/aiter.git + cd aiter + git fetch && git reset --hard "$aiter_ref" + rm -rf aiter/jit/**.so + PREBUILD_KERNELS=0 python setup.py develop echo "aiter ($aiter_ref) installed." - cd /sgl-workspace/sgl-kernel - git fetch && git checkout "$sgl_kernel_ref" - python setup_rocm.py install + cd "$work_dir/sglang/sgl-kernel" + git fetch && git reset --hard "$sgl_kernel_ref" + python setup_rocm.py develop echo "sgl-kernel ($sgl_kernel_ref) installed." - cd /sgl-workspace/sglang + cd "$work_dir/sglang" rm -f python/pyproject.toml cp python/pyproject_other.toml python/pyproject.toml pip install -e "python[all_hip]" From 1514e4c034febf7d10b46f294f6b2514b0ec816e Mon Sep 17 00:00:00 2001 From: Chun Fang Date: Thu, 26 Feb 2026 09:50:26 +0000 Subject: [PATCH 3/3] Update perf changelog --- perf-changelog.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 9c4c9e438..71248b67f 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -734,3 +734,11 @@ - "Fix MTP 1k8k conc-start from 256 to 4 to enable full concurrency sweep" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/699 +- config-keys: + - dsr1-fp8-mi300x-sglang + description: + - "patching aiter/sgl-kernel versions for MI300X FP8 DSR1 SGLang" + - "Include configuration files for three GEMM operations: https://github.com/ROCm/aiter/pull/2024" + - "Improve TPOT by using fp8 bmm in MLA and MI300X for DSR1/V3: https://github.com/sgl-project/sglang/pull/18624" + - "Broaden the optimized paths to all HIP platforms and add tuned FP8 GEMM configs: https://github.com/sgl-project/sglang/pull/18242" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/811