From 7f78fad74765c63cb4f62996d39e9d2c311d7273 Mon Sep 17 00:00:00 2001
From: Chun Fang <chun.fang@amd.com>
Date: Wed, 25 Feb 2026 08:49:40 +0000
Subject: [PATCH 1/3] Patch aiter/sgl-kernel versions for MI300X FP8 DSR1
 sglang benchmark

- Pin aiter and sgl-kernel to specific commits required by the
v0.5.8-rocm700-mi30x image.
- This Patch should only work with Image
    lmsysorg/sglang:v0.5.8-rocm700-mi30x
- A work with Zhentao Chen
---
 benchmarks/single_node/dsr1_fp8_mi300x.sh | 32 +++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/benchmarks/single_node/dsr1_fp8_mi300x.sh b/benchmarks/single_node/dsr1_fp8_mi300x.sh
index 757b19dc5..f10fe9d1b 100644
--- a/benchmarks/single_node/dsr1_fp8_mi300x.sh
+++ b/benchmarks/single_node/dsr1_fp8_mi300x.sh
@@ -15,6 +15,38 @@ if [[ -n "$SLURM_JOB_ID" ]]; then
   echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME"
 fi
 
+patch_sgl_components() {
+    local aiter_ref="9046b6f446e35c5d712ca092b9d84a5db8319ef8"
+    local sgl_kernel_ref="d40cb2f72551c4a597108dda16507e8188b666b7"
+
+    if [[ ! -d /sgl-workspace ]]; then
+        echo "/sgl-workspace not found; assuming image ships correct versions."
+        return 0
+    fi
+
+    (
+        set -e
+
+        cd /sgl-workspace/aiter
+        git fetch && git checkout "$aiter_ref"
+        python setup.py develop
+        echo "aiter ($aiter_ref) installed."
+
+        cd /sgl-workspace/sgl-kernel
+        git fetch && git checkout "$sgl_kernel_ref"
+        python setup_rocm.py install
+        echo "sgl-kernel ($sgl_kernel_ref) installed."
+
+        cd /sgl-workspace/sglang
+        rm -f python/pyproject.toml
+        cp python/pyproject_other.toml python/pyproject.toml
+        pip install -e "python[all_hip]"
+        echo "sglang reinstalled."
+    )
+}
+# Apply patch_sgl_components for lmsysorg/sglang:v0.5.8-rocm700-mi30x ONLY
+patch_sgl_components
+
 hf download "$MODEL"
 
 # Reference

From db5eb71d8b1c2df60e039df3c7c9e5847a188523 Mon Sep 17 00:00:00 2001
From: Chun Fang <chun.fang@amd.com>
Date: Wed, 25 Feb 2026 14:04:10 +0000
Subject: [PATCH 2/3] Fix aiter/sgl-kernel API mismatch causing cuda graph
 capture failure

The previous aiter ref (9046b6f) changed get_mla_metadata_v1 to expect
a Tensor for kv_last_page_lens, but the image's sglang still passed an
int, crashing during cuda graph capture.

Fix by fresh-cloning aiter at d2ca5a89, pinning sgl-kernel to 8bd6447
(now at sglang/sgl-kernel), and uninstalling stale packages before
rebuilding to avoid leftover C extension conflicts.
---
 benchmarks/single_node/dsr1_fp8_mi300x.sh | 29 ++++++++++++++---------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/benchmarks/single_node/dsr1_fp8_mi300x.sh b/benchmarks/single_node/dsr1_fp8_mi300x.sh
index f10fe9d1b..46e8fed35 100644
--- a/benchmarks/single_node/dsr1_fp8_mi300x.sh
+++ b/benchmarks/single_node/dsr1_fp8_mi300x.sh
@@ -16,28 +16,35 @@ if [[ -n "$SLURM_JOB_ID" ]]; then
 fi
 
 patch_sgl_components() {
-    local aiter_ref="9046b6f446e35c5d712ca092b9d84a5db8319ef8"
-    local sgl_kernel_ref="d40cb2f72551c4a597108dda16507e8188b666b7"
+    local work_dir="/sgl-workspace"
+    local aiter_ref="d2ca5a897"
+    local sgl_kernel_ref="8bd644765"
 
-    if [[ ! -d /sgl-workspace ]]; then
-        echo "/sgl-workspace not found; assuming image ships correct versions."
+    if [[ ! -d "$work_dir" ]]; then
+        echo "$work_dir not found; assuming image ships correct versions."
         return 0
     fi
 
     (
         set -e
 
-        cd /sgl-workspace/aiter
-        git fetch && git checkout "$aiter_ref"
-        python setup.py develop
+        pip uninstall sglang sgl-kernel amd-aiter -y
+
+        cd "$work_dir"
+        rm -rf aiter
+        git clone --recursive https://github.com/ROCm/aiter.git
+        cd aiter
+        git fetch && git reset --hard "$aiter_ref"
+        rm -rf aiter/jit/**.so
+        PREBUILD_KERNELS=0 python setup.py develop
         echo "aiter ($aiter_ref) installed."
 
-        cd /sgl-workspace/sgl-kernel
-        git fetch && git checkout "$sgl_kernel_ref"
-        python setup_rocm.py install
+        cd "$work_dir/sglang/sgl-kernel"
+        git fetch && git reset --hard "$sgl_kernel_ref"
+        python setup_rocm.py develop
         echo "sgl-kernel ($sgl_kernel_ref) installed."
 
-        cd /sgl-workspace/sglang
+        cd "$work_dir/sglang"
         rm -f python/pyproject.toml
         cp python/pyproject_other.toml python/pyproject.toml
         pip install -e "python[all_hip]"

From 1514e4c034febf7d10b46f294f6b2514b0ec816e Mon Sep 17 00:00:00 2001
From: Chun Fang <chun.fang@amd.com>
Date: Thu, 26 Feb 2026 09:50:26 +0000
Subject: [PATCH 3/3] Update perf changelog

---
 perf-changelog.yaml | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 9c4c9e438..71248b67f 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -734,3 +734,11 @@
     - "Fix MTP 1k8k conc-start from 256 to 4 to enable full concurrency sweep"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/699
   
+- config-keys:
+    - dsr1-fp8-mi300x-sglang
+  description:
+    - "patching aiter/sgl-kernel versions for MI300X FP8 DSR1 SGLang"
+    - "Include configuration files for three GEMM operations: https://github.com/ROCm/aiter/pull/2024"
+    - "Improve TPOT by using fp8 bmm in MLA and MI300X for DSR1/V3: https://github.com/sgl-project/sglang/pull/18624"
+    - "Broaden the optimized paths to all HIP platforms and add tuned FP8 GEMM configs: https://github.com/sgl-project/sglang/pull/18242"
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/811