diff --git a/benchmarks/single_node/dsr1_fp8_mi300x.sh b/benchmarks/single_node/dsr1_fp8_mi300x.sh index 757b19dc5..46e8fed35 100644 --- a/benchmarks/single_node/dsr1_fp8_mi300x.sh +++ b/benchmarks/single_node/dsr1_fp8_mi300x.sh @@ -15,6 +15,45 @@ if [[ -n "$SLURM_JOB_ID" ]]; then echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME" fi +patch_sgl_components() { + local work_dir="/sgl-workspace" + local aiter_ref="d2ca5a897" + local sgl_kernel_ref="8bd644765" + + if [[ ! -d "$work_dir" ]]; then + echo "$work_dir not found; assuming image ships correct versions." + return 0 + fi + + ( + set -e + + pip uninstall sglang sgl-kernel amd-aiter -y + + cd "$work_dir" + rm -rf aiter + git clone --recursive https://github.com/ROCm/aiter.git + cd aiter + git fetch && git reset --hard "$aiter_ref" + rm -rf aiter/jit/**.so + PREBUILD_KERNELS=0 python setup.py develop + echo "aiter ($aiter_ref) installed." + + cd "$work_dir/sglang/sgl-kernel" + git fetch && git reset --hard "$sgl_kernel_ref" + python setup_rocm.py develop + echo "sgl-kernel ($sgl_kernel_ref) installed." + + cd "$work_dir/sglang" + rm -f python/pyproject.toml + cp python/pyproject_other.toml python/pyproject.toml + pip install -e "python[all_hip]" + echo "sglang reinstalled." + ) +} +# Apply patch_sgl_components for lmsysorg/sglang:v0.5.8-rocm700-mi30x ONLY +patch_sgl_components + hf download "$MODEL" # Reference diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 9c4c9e438..71248b67f 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -734,3 +734,11 @@ - "Fix MTP 1k8k conc-start from 256 to 4 to enable full concurrency sweep" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/699 +- config-keys: + - dsr1-fp8-mi300x-sglang + description: + - "patching aiter/sgl-kernel versions for MI300X FP8 DSR1 SGLang" + - "Include configuration files for three GEMM operations: https://github.com/ROCm/aiter/pull/2024" + - "Improve TPOT by using fp8 bmm in MLA and MI300X for DSR1/V3: https://github.com/sgl-project/sglang/pull/18624" + - "Broaden the optimized paths to all HIP platforms and add tuned FP8 GEMM configs: https://github.com/sgl-project/sglang/pull/18242" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/811