Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

🏆 Grand Prize Winner Project for AMD Developer Challenge 2025

- [2026/02] All2All, GEMM-RS, and AG-GEMM kernels are now open-sourced for [AMD Developer Challenge 2025: Distributed Inference](https://amdchallenge2025.datamonsters.com/)
### News 🔥

- [2025/11] [Easily Build and Share ROCm Kernels with Hugging Face](https://huggingface.co/blog/build-rocm-kernels) – Discover how to easily build and share RadeonFlow_Kernels using [Hugging Face's kernel-builder](https://github.com/huggingface/kernel-builder).

- [2025/06] [AMD Developer Cloud](https://www.amd.com/en/developer/resources/cloud-access/amd-developer-cloud.html) now provides free AMD Instinct Accelerators, you can try out our project using their MI300X.
Expand Down
3 changes: 3 additions & 0 deletions dist-infer/.clang-format
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
IndentWidth: 4
AlignAfterOpenBracket: BlockIndent
PackConstructorInitializers: CurrentLine
3 changes: 3 additions & 0 deletions dist-infer/.clangd
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
CompileFlags:
Add: ["-xhip", "--rocm-path=/opt/rocm"]
Remove: ["-x*"]
11 changes: 11 additions & 0 deletions dist-infer/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
logs/
build/
.cache/
.vscode-server/
.popcorn.yaml
*.cu
*.hip
*.lock
submission.py
torch-build/
__pycache__
21 changes: 21 additions & 0 deletions dist-infer/ag-gemm/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
cmake_minimum_required(VERSION 3.21)
cmake_policy(VERSION 3.21.3...3.27)
set(PROJECT_NAME "ag_gemm")
project(${PROJECT_NAME} LANGUAGES HIP CXX)

find_package(Python3 REQUIRED COMPONENTS Development Interpreter)
find_package(Torch CONFIG REQUIRED)
find_package(HIP CONFIG REQUIRED)

# required for python binding
find_library(TORCH_PYTHON_LIBRARY torch_python PATH ${TORCH_INSTALL_PREFIX}/lib)

add_library(${PROJECT_NAME} SHARED perf_gemm.cc)
set_source_files_properties(ag_gemm.cpp PROPERTIES LANGUAGE HIP)
target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_PYTHON_LIBRARY} ${TORCH_LIBRARIES} hip::device Python3::Python)
set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "")
target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_20)

# for host compile
target_compile_definitions(${PROJECT_NAME} PRIVATE -D__${CMAKE_HIP_ARCHITECTURES}__)
target_compile_options(${PROJECT_NAME} PRIVATE -save-temps)
39 changes: 39 additions & 0 deletions dist-infer/ag-gemm/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
.PHONY: all config build

TARGET ?= gfx942

BUILD_TYPE ?= RelWithDebInfo
BUILD_DIR ?= build

PYTHON_DIR ?= $(shell python -c "import site; print(site.getsitepackages()[0])")

all: config build submit

config:
PYTORCH_ROCM_ARCH=$(TARGET) cmake -B $(BUILD_DIR) . \
-DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
-DCMAKE_PREFIX_PATH="/opt/rocm;$(PYTHON_DIR)" \
-DCMAKE_HIP_ARCHITECTURES=$(TARGET) \
-DGPU_TARGETS=$(TARGET) \
-DAMDGPU_TARGETS=$(TARGET) \
-G Ninja

build:
cmake --build $(BUILD_DIR) -j8

test: build
PYTHONPATH=$(PYTHONPATH):$(realpath tools):$(realpath $(BUILD_DIR)) python tools/smoke_test.py

clean:
rm -r $(BUILD_DIR)

local:
python submit.py local_test
POPCORN_GPUS=2 POPCORN_FD=2 python eval.py benchmark benchmark.txt

submit:
python submit.py

dis:
roc-obj -d build/ag_gemm.so
Loading