ROCm · dhonnappa-amd · Sep 17, 2024 · Sep 18, 2024 · Sep 18, 2024 · Sep 18, 2024
diff --git a/.buildkite/run-amd-test.sh b/.buildkite/run-amd-test.sh
@@ -7,7 +7,7 @@ set -o pipefail
 echo "--- Confirming Clean Initial State"
 while true; do
         sleep 3
-        if grep -q clean /opt/amdgpu/etc/gpu_state; then
+        if grep -q clean ${BUILDKITE_AGENT_META_DATA_RESET_TARGET}; then
                 echo "GPUs state is \"clean\""
                 break
         fi
@@ -46,11 +46,11 @@ cleanup_docker
 
 echo "--- Resetting GPUs"
 
-echo "reset" > /opt/amdgpu/etc/gpu_state
+echo "reset" > ${BUILDKITE_AGENT_META_DATA_RESET_TARGET}
 
 while true; do
         sleep 3
-        if grep -q clean /opt/amdgpu/etc/gpu_state; then
+	if grep -q clean ${BUILDKITE_AGENT_META_DATA_RESET_TARGET}; then
                 echo "GPUs state is \"clean\""
                 break
         fi
@@ -141,8 +141,9 @@ if [[ $commands == *"--shard-id="* ]]; then
     fi
   done
 else
+  echo "Render devices: $BUILDKITE_AGENT_META_DATA_RENDER_DEVICES"
   docker run \
-          --device /dev/kfd --device /dev/dri \
+          --device /dev/kfd $BUILDKITE_AGENT_META_DATA_RENDER_DEVICES \
           --network host \
           --shm-size=16gb \
           --rm \

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
@@ -92,7 +92,9 @@ steps:
   - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_chunked_prefill.py
 
 - label: Core Test # 10min
+  working_dir: "/vllm-workspace/tests"
   mirror_hardwares: [amd]
+  amd_gpus: 4   # Just for the sake of queue testing
   fast_check: true
   source_file_dependencies:
   - vllm/core
@@ -105,6 +107,7 @@ steps:
   working_dir: "/vllm-workspace/tests"
   fast_check: true
   mirror_hardwares: [amd]
+  amd_gpus: 1   # Just for the sake of queue testing
   source_file_dependencies:
   - vllm/
   commands:
@@ -158,6 +161,7 @@ steps:
 
 - label: Regression Test # 5min
   mirror_hardwares: [amd]
+  amd_gpus: 1
   source_file_dependencies:
   - vllm/
   - tests/test_regression
@@ -168,6 +172,7 @@ steps:
 
 - label: Engine Test # 10min
   mirror_hardwares: [amd]
+  amd_gpus: 1
   source_file_dependencies:
   - vllm/
   - tests/engine
@@ -176,6 +181,7 @@ steps:
   - pytest -v -s engine test_sequence.py test_config.py test_logger.py
   # OOM in the CI unless we run this separately
   - pytest -v -s tokenization
+  working_dir: "/vllm-workspace/tests" # optional
 
 - label: V1 Test
   #mirror_hardwares: [amd]
@@ -217,7 +223,9 @@ steps:
     - python3 offline_inference/profiling.py --model facebook/opt-125m run_num_steps --num-steps 2
 
 - label: Prefix Caching Test # 9min
+  working_dir: "/vllm-workspace/tests"
   mirror_hardwares: [amd]
+  amd_gpus: 1
   source_file_dependencies:
   - vllm/
   - tests/prefix_caching
@@ -235,7 +243,9 @@ steps:
     - VLLM_USE_FLASHINFER_SAMPLER=1 pytest -v -s samplers
 
 - label: LogitsProcessor Test # 5min
+  working_dir: "/vllm-workspace/tests"
   mirror_hardwares: [amd]
+  amd_gpus: 1
   source_file_dependencies:
   - vllm/model_executor/layers
   - vllm/model_executor/guided_decoding
@@ -256,7 +266,9 @@ steps:
     - pytest -v -s spec_decode/e2e/test_eagle_correctness.py
 
 - label: LoRA Test %N # 15min each
+  working_dir: "/vllm-workspace/tests"
   mirror_hardwares: [amd]
+  amd_gpus: 8
   source_file_dependencies:
   - vllm/lora
   - tests/lora
@@ -282,7 +294,9 @@ steps:
   - pytest -v -s compile/test_full_graph.py
 
 - label: Kernels Test %N # 1h each
+  working_dir: "/vllm-workspace/tests"
   mirror_hardwares: [amd]
+  amd_gpus: 8
   source_file_dependencies:
   - csrc/
   - vllm/attention
@@ -292,8 +306,10 @@ steps:
   parallelism: 4
 
 - label: Tensorizer Test # 11min
+  working_dir: "/vllm-workspace/tests"
   mirror_hardwares: [amd]
   soft_fail: true
+  amd_gpus: 1
   source_file_dependencies:
   - vllm/model_executor/model_loader
   - tests/tensorizer_loader
@@ -305,6 +321,7 @@ steps:
 - label: Benchmarks # 9min
   working_dir: "/vllm-workspace/.buildkite"
   mirror_hardwares: [amd]
+  amd_gpus: 1
   source_file_dependencies:
   - benchmarks/
   commands:
@@ -334,8 +351,10 @@ steps:
     - pytest -v -s encoder_decoder
 
 - label: OpenAI-Compatible Tool Use # 20 min
+  working_dir: "/vllm-workspace/tests" 
   fast_check: false
   mirror_hardwares: [ amd ]
+  amd_gpus: 1
   source_file_dependencies:
     - vllm/
     - tests/tool_use

diff --git a/.buildkite/test-template.j2 b/.buildkite/test-template.j2
@@ -0,0 +1,46 @@
+{% set docker_image = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT" %}
+{% set docker_image_amd = "rocm/vllm-ci:$BUILDKITE_COMMIT" %}
+{% set default_working_dir = "vllm/tests" %}
+{% set hf_home = "/root/.cache/huggingface" %}
+
+steps:
+  - label: ":docker: build image"
+    depends_on: ~
+    commands:
+      - "docker build --build-arg max_jobs=16 --tag {{ docker_image_amd }} -f Dockerfile.rocm  --target test --progress plain ."
+      - "docker push {{ docker_image_amd }}"
+    key: "amd-build"
+    env:
+      DOCKER_BUILDKIT: "1"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 5
+        - exit_status: -10  # Agent was lost
+          limit: 5
+    agents:
+      queue: amd-cpu
+
+{% for step in steps %}
+{% if step.mirror_hardwares and "amd" in step.mirror_hardwares %}
+  - label: "AMD: {{ step.label }}"
+    depends_on: 
+      - "amd-build"
+    agents:
+{% if step.amd_gpus and step.amd_gpus==8%}
+      queue: amd_gpu_8
+{% elif step.amd_gpus and step.amd_gpus==4%}
+      queue: amd_gpu_4
+{% elif step.amd_gpus and step.amd_gpus==2%}
+      queue: amd_gpu_4
+{% else%}
+      queue: amd_gpu_1
+{% endif%}
+    commands: 
+      - bash .buildkite/run-amd-test.sh "cd {{ (step.working_dir or default_working_dir) | safe  }} ; {{ step.command  or (step.commands | join(" && ")) | safe }}"
+    env:
+      DOCKER_BUILDKIT: "1"
+    priority: 100
+    soft_fail: true
+{% endif %}
+{% endfor %}
diff --git a/.github/workflows/lint-and-deploy.yaml b/.github/workflows/lint-and-deploy.yaml
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -16,7 +16,9 @@ jobs:
   release:
     # Retrieve tag and create release
     name: Create Release
-    runs-on: ubuntu-latest
+    runs-on: self-hosted
+    container:
+      image: rocm/pytorch:rocm6.2_ubuntu20.04_py3.9_pytorch_release_2.3.0
     outputs:
       upload_url: ${{ steps.create_release.outputs.upload_url }}
     steps:
@@ -39,73 +41,42 @@ jobs:
             const script = require('.github/workflows/scripts/create_release.js')
             await script(github, context, core)
 
-  # NOTE(simon): No longer build wheel using Github Actions. See buildkite's release workflow. 
-  # wheel:
-  #   name: Build Wheel
-  #   runs-on: ${{ matrix.os }}
-  #   needs: release
+  wheel:
+    name: Build Wheel
+    runs-on: self-hosted
+    container:
+      image: rocm/pytorch:rocm6.2_ubuntu20.04_py3.9_pytorch_release_2.3.0
+    needs: release
 
-  #   strategy:
-  #     fail-fast: false
-  #     matrix:
-  #         os: ['ubuntu-20.04']
-  #         python-version: ['3.9', '3.10', '3.11', '3.12']
-  #         pytorch-version: ['2.4.0']  # Must be the most recent version that meets requirements-cuda.txt.
-  #         cuda-version: ['11.8', '12.1']
+    strategy:
+      fail-fast: false
 
-  #   steps:
-  #     - name: Checkout
-  #       uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-
-  #     - name: Setup ccache
-  #       uses: hendrikmuhs/ccache-action@ed74d11c0b343532753ecead8a951bb09bb34bc9 # v1.2.14
-  #       with:
-  #         create-symlink: true
-  #         key: ${{ github.job }}-${{ matrix.python-version }}-${{ matrix.cuda-version }}
-
-  #     - name: Set up Linux Env
-  #       if: ${{ runner.os == 'Linux' }}
-  #       run: |
-  #         bash -x .github/workflows/scripts/env.sh
-
-  #     - name: Set up Python
-  #       uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
-  #       with:
-  #           python-version: ${{ matrix.python-version }}
-
-  #     - name: Install CUDA ${{ matrix.cuda-version }}
-  #       run: |
-  #         bash -x .github/workflows/scripts/cuda-install.sh ${{ matrix.cuda-version }} ${{ matrix.os }}
-
-  #     - name: Install PyTorch ${{ matrix.pytorch-version }} with CUDA ${{ matrix.cuda-version }}
-  #       run: |
-  #         bash -x .github/workflows/scripts/pytorch-install.sh ${{ matrix.python-version }} ${{ matrix.pytorch-version }} ${{ matrix.cuda-version }}
-
-  #     - name: Build wheel
-  #       shell: bash
-  #       env:
-  #         CMAKE_BUILD_TYPE: Release # do not compile with debug symbol to reduce wheel size
-  #       run: |
-  #         bash -x .github/workflows/scripts/build.sh ${{ matrix.python-version }} ${{ matrix.cuda-version }}
-  #         wheel_name=$(find dist -name "*whl" -print0 | xargs -0 -n 1 basename)
-  #         asset_name=${wheel_name//"linux"/"manylinux1"}
-  #         echo "wheel_name=${wheel_name}" >> "$GITHUB_ENV"
-  #         echo "asset_name=${asset_name}" >> "$GITHUB_ENV"
+    steps:
+      - name: Prepare
+        run: |
+          pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.2
+          pip3 install -U triton
 
-  #     - name: Upload Release Asset
-  #       uses: actions/upload-release-asset@e8f9f06c4b078e705bd2ea027f0926603fc9b4d5 # v1.0.2
-  #       env:
-  #         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-  #       with:
-  #         upload_url: ${{ needs.release.outputs.upload_url }}
-  #         asset_path: ./dist/${{ env.wheel_name }}
-  #         asset_name: ${{ env.asset_name }}
-  #         asset_content_type: application/*
+      - name: Checkout
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
-      # (Danielkinz): This last step will publish the .whl to pypi. Warning: untested
-      # - name: Publish package
-      #   uses: pypa/gh-action-pypi-publish@release/v1.8
-      #   with:
-      #     repository-url: https://test.pypi.org/legacy/
-      #     password: ${{ secrets.PYPI_API_TOKEN }}
-      #     skip-existing: true
+      - name: Build wheel
+        shell: bash
+        env:
+          CMAKE_BUILD_TYPE: Release # do not compile with debug symbol to reduce wheel size
+        run: |
+          bash -x .github/workflows/scripts/build.sh
+          wheel_name=$(find dist -name "*whl" -print0 | xargs -0 -n 1 basename)
+          asset_name=${wheel_name//"linux"/"manylinux1"}
+          echo "wheel_name=${wheel_name}" >> "$GITHUB_ENV"
+          echo "asset_name=${asset_name}" >> "$GITHUB_ENV"
+
+      - name: Upload vllm Release Asset
+        uses: actions/upload-release-asset@e8f9f06c4b078e705bd2ea027f0926603fc9b4d5 # v1.0.2
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        with:
+          upload_url: ${{ needs.release.outputs.upload_url }}
+          asset_path: ./dist/${{ env.wheel_name }}
+          asset_name: ${{ env.asset_name }}
+          asset_content_type: application/*