Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions benchmarks/single_node/dsr1_fp8_mi300x.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,45 @@ if [[ -n "$SLURM_JOB_ID" ]]; then
echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME"
fi

patch_sgl_components() {
local work_dir="/sgl-workspace"
local aiter_ref="d2ca5a897"
local sgl_kernel_ref="8bd644765"

if [[ ! -d "$work_dir" ]]; then
echo "$work_dir not found; assuming image ships correct versions."
return 0
fi

(
set -e

pip uninstall sglang sgl-kernel amd-aiter -y

cd "$work_dir"
rm -rf aiter
git clone --recursive https://github.com/ROCm/aiter.git
cd aiter
git fetch && git reset --hard "$aiter_ref"
rm -rf aiter/jit/**.so
PREBUILD_KERNELS=0 python setup.py develop
echo "aiter ($aiter_ref) installed."

cd "$work_dir/sglang/sgl-kernel"
git fetch && git reset --hard "$sgl_kernel_ref"
python setup_rocm.py develop
echo "sgl-kernel ($sgl_kernel_ref) installed."

cd "$work_dir/sglang"
rm -f python/pyproject.toml
cp python/pyproject_other.toml python/pyproject.toml
pip install -e "python[all_hip]"
echo "sglang reinstalled."
)
}
# Apply patch_sgl_components for lmsysorg/sglang:v0.5.8-rocm700-mi30x ONLY
patch_sgl_components

hf download "$MODEL"

# Reference
Expand Down
8 changes: 8 additions & 0 deletions perf-changelog.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -734,3 +734,11 @@
- "Fix MTP 1k8k conc-start from 256 to 4 to enable full concurrency sweep"
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/699

- config-keys:
- dsr1-fp8-mi300x-sglang
description:
- "patching aiter/sgl-kernel versions for MI300X FP8 DSR1 SGLang"
- "Include configuration files for three GEMM operations: https://github.com/ROCm/aiter/pull/2024"
- "Improve TPOT by using fp8 bmm in MLA and MI300X for DSR1/V3: https://github.com/sgl-project/sglang/pull/18624"
- "Broaden the optimized paths to all HIP platforms and add tuned FP8 GEMM configs: https://github.com/sgl-project/sglang/pull/18242"
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/811