SystemPanic
diff --git a/‎.clang-format‎
Lines changed: 7 additions & 0 deletions b/‎.clang-format‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎.devcontainer/Dockerfile‎
Lines changed: 58 additions & 0 deletions b/‎.devcontainer/Dockerfile‎
Lines changed: 58 additions & 0 deletions
diff --git a/‎.devcontainer/devcontainer.json‎
Lines changed: 17 additions & 0 deletions b/‎.devcontainer/devcontainer.json‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎.github/workflows/build-doc.yml‎
Lines changed: 53 additions & 0 deletions b/‎.github/workflows/build-doc.yml‎
Lines changed: 53 additions & 0 deletions
diff --git a/‎.github/workflows/release_wheel.yml‎
Lines changed: 113 additions & 0 deletions b/‎.github/workflows/release_wheel.yml‎
Lines changed: 113 additions & 0 deletions
diff --git a/‎.github/workflows/release_wheel_sglang.yml‎
Lines changed: 97 additions & 0 deletions b/‎.github/workflows/release_wheel_sglang.yml‎
Lines changed: 97 additions & 0 deletions
@@ -0,0 +1,7 @@
+---
+BasedOnStyle: Google
+DerivePointerAlignment: false
+ColumnLimit:     100
+PointerAlignment: Left
+# InsertNewlineAtEOF: true
+...
@@ -0,0 +1,58 @@
+FROM nvidia/cuda:12.4.0-devel-ubuntu22.04
+
+# Update package lists and install system dependencies
+RUN apt-get update && apt-get install -y \
+    curl \
+    git \
+    clang-format \
+    libibverbs-dev \
+    librdmacm-dev \
+    rdma-core \
+    libnuma-dev \
+    vim \
+    openmpi-bin \
+    libopenmpi-dev \
+    zsh \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install oh-my-zsh
+RUN sh -c "$(curl -fsSL https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh)" "" --unattended
+
+# Install powerlevel10k theme
+RUN git clone --depth=1 https://github.com/romkatv/powerlevel10k.git ${ZSH_CUSTOM:-$HOME/.oh-my-zsh/custom}/themes/powerlevel10k
+
+# Install zsh-autosuggestions
+RUN git clone https://github.com/zsh-users/zsh-autosuggestions ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-autosuggestions
+
+# Configure zsh
+RUN sed -i 's/ZSH_THEME="robbyrussell"/ZSH_THEME="powerlevel10k\/powerlevel10k"/' ~/.zshrc && \
+    sed -i 's/plugins=(git)/plugins=(git zsh-autosuggestions)/' ~/.zshrc
+
+# Create a non-root user
+ARG USERNAME=devuser
+ARG USER_UID=1003
+ARG USER_GID=$USER_UID
+
+# Create the user
+RUN groupadd --gid $USER_GID $USERNAME \
+    && useradd --uid $USER_UID --gid $USER_GID -m $USERNAME \
+    # [Optional] Add sudo support
+    && apt-get update \
+    && apt-get install -y sudo \
+    && echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \
+    && chmod 0440 /etc/sudoers.d/$USERNAME \
+    && rm -rf /var/lib/apt/lists/*
+
+# Copy zsh configuration to the new user's home
+RUN cp -r /root/.oh-my-zsh /home/$USERNAME/.oh-my-zsh && \
+    cp /root/.zshrc /home/$USERNAME/.zshrc && \
+    chown -R $USERNAME:$USERNAME /home/$USERNAME/.oh-my-zsh && \
+    chown $USERNAME:$USERNAME /home/$USERNAME/.zshrc
+
+# Switch to non-root user
+USER $USERNAME
+WORKDIR /home/$USERNAME
+
+# Set zsh as default shell
+ENV SHELL=/bin/zsh
+CMD [ "zsh" ]
@@ -0,0 +1,17 @@
+{
+    "name": "CUDA Development Container",
+    "build": {
+        "dockerfile": "Dockerfile",
+        "context": "."
+    },
+    "runArgs": [
+        "--gpus=all"
+    ],
+    "customizations": {
+        "vscode": {
+            "extensions": [
+            ]
+        }
+    },
+    "remoteUser": "devuser"
+}
@@ -0,0 +1,53 @@
+name: Build FlashInfer Docs
+
+on:
+  push:
+    branches:
+      - main
+
+# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
+permissions:
+  contents: read
+  pages: write
+  id-token: write
+
+# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
+# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
+concurrency:
+  group: "pages"
+  cancel-in-progress: false
+
+jobs:
+  test_linux:
+    name: Deploy Docs
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v2
+      with:
+        submodules: recursive
+
+    - name: Configuring build Environment
+      run: |
+        sudo apt-get update
+        python -m pip install -U pip wheel
+
+    - name: Installing dependencies
+      run: |
+        python -m pip install -r docs/requirements.txt
+
+    - name: Bulid Documentation
+      if: github.ref == 'refs/heads/main'
+      run: |
+        cd docs
+        make html
+
+    - name: Upload artifact
+      uses: actions/upload-pages-artifact@v3
+      with:
+        # Upload entire repository
+        path: 'docs/_build/html'
+
+    - name: Deploy to GitHub Pages
+      id: deployment
+      uses: actions/deploy-pages@v4
@@ -0,0 +1,113 @@
+# Adapted from https://github.com/punica-ai/punica/blob/591b59899f0a20760821785d06b331c8a2e5cb86/.github/workflows/release_wheel.yml
+name: Release
+on:
+  workflow_dispatch:
+    inputs:
+      tag_name:
+        required: true
+        type: string
+  workflow_call:
+    inputs:
+      tag_name:
+        required: true
+        type: string
+    secrets:
+      WHL_TOKEN:
+        required: true
+      # PYPI_TEST_TOKEN:
+      #   required: true
+
+env:
+  TORCH_CUDA_ARCH_LIST: "7.5 8.0 8.9 9.0+PTX"
+
+jobs:
+  build:
+    strategy:
+      fail-fast: false
+      matrix:
+        cuda: ["11.8", "12.1", "12.4"]
+        torch: ["2.4", "2.5", "2.6"]
+        exclude: # We use release_wheel_sglang.yml for faster release and verification. If everything is okay, then we trigger release_wheel.yml. This combination (cuda 12.4 or 11.8 + torch 2.5) is already handled in release_wheel_sglang.yml
+          - cuda: "12.4"
+            torch: "2.5"
+          - cuda: "11.8"
+            torch: "2.5"
+          - cuda: "12.1"
+            torch: "2.6"
+
+    runs-on: [self-hosted]
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: true
+
+      - name: Build wheel
+        run: |
+          chown -R $CI_UID:$CI_GID "$GITHUB_WORKSPACE"
+          docker run --rm -t \
+              -v "$CI_RUNNER_CACHE_DIR":/ci-cache \
+              -v "$GITHUB_WORKSPACE":/app \
+              -e FLASHINFER_CI_CACHE=/ci-cache \
+              -e FLASHINFER_CI_CUDA_VERSION=${{ matrix.cuda }} \
+              -e FLASHINFER_CI_TORCH_VERSION=${{ matrix.torch }} \
+              -e FLASHINFER_CI_PYTHON_VERSION=3.10 \
+              -e TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST" \
+              -e MAX_JOBS=128 \
+              --user $CI_UID:$CI_GID \
+              pytorch/manylinux-builder:cuda${{ matrix.cuda }} \
+              bash /app/scripts/run-ci-build-wheel.sh
+        timeout-minutes: 120
+      - run: du -h dist/*
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: wheel-cuda${{ matrix.cuda }}-torch${{ matrix.torch }}
+          path: dist/*
+
+  release:
+    needs: build
+    runs-on: [self-hosted]
+    steps:
+      - uses: actions/download-artifact@v4
+        with:
+          path: dist/
+          merge-multiple: true
+          pattern: wheel-*
+
+      - run: ls -lah dist/
+
+      - uses: softprops/action-gh-release@v1
+        with:
+          tag_name: ${{ inputs.tag_name }}
+          files: |
+            dist/flashinfer*.whl
+
+      - uses: softprops/action-gh-release@v1
+        with:
+          tag_name: ${{ inputs.tag_name }}
+          files: |
+            dist/flashinfer-*.tar.gz
+
+      - name: Clone wheel index
+        run: git clone https://oauth2:${WHL_TOKEN}@github.com/flashinfer-ai/whl.git flashinfer-whl
+        env:
+          WHL_TOKEN: ${{ secrets.WHL_TOKEN }}
+
+      - name: Update wheel index
+        run: python3 scripts/update_whl_index.py
+
+      - name: Push wheel index
+        run: |
+          cd flashinfer-whl
+          git config --local user.name "github-actions[bot]"
+          git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
+          git add -A
+          git commit -m "update whl"
+          git push
+
+      # - name: Upload sdist to pypi
+      #   run: |
+      #     pip install twine
+      #     python -m twine upload --repository testpypi --username=__token__ dist/*.tar.gz
+      #   env:
+      #     TWINE_PASSWORD: ${{ secrets.PYPI_TEST_TOKEN }}
@@ -0,0 +1,97 @@
+name: Release Wheel
+on:
+  workflow_dispatch:
+    inputs:
+      tag_name:
+        required: true
+        type: string
+  workflow_call:
+    inputs:
+      tag_name:
+        required: true
+        type: string
+    secrets:
+      WHL_TOKEN:
+        required: true
+
+env:
+  TORCH_CUDA_ARCH_LIST: "7.5 8.0 8.9 9.0+PTX"
+
+jobs:
+  build:
+    strategy:
+      fail-fast: false
+      matrix:
+        cuda: ["11.8", "12.4"]
+        torch: ["2.5"]
+
+    runs-on: [self-hosted]
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: true
+
+      - name: Build wheel
+        run: |
+          chown -R $CI_UID:$CI_GID "$GITHUB_WORKSPACE"
+          docker run --rm -t \
+              -v "$CI_RUNNER_CACHE_DIR":/ci-cache \
+              -v "$GITHUB_WORKSPACE":/app \
+              -e FLASHINFER_CI_CACHE=/ci-cache \
+              -e FLASHINFER_CI_CUDA_VERSION=${{ matrix.cuda }} \
+              -e FLASHINFER_CI_TORCH_VERSION=${{ matrix.torch }} \
+              -e FLASHINFER_CI_PYTHON_VERSION=3.10 \
+              -e FLASHINFER_HEAD_DIMS="64,128,256" \
+              -e TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST" \
+              -e MAX_JOBS=128 \
+              --user $CI_UID:$CI_GID \
+              pytorch/manylinux-builder:cuda${{ matrix.cuda }} \
+              bash /app/scripts/run-ci-build-wheel.sh
+        timeout-minutes: 120
+      - run: du -h dist/*
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: wheel-cuda${{ matrix.cuda }}-torch${{ matrix.torch }}
+          path: dist/*
+
+  release:
+    needs: build
+    runs-on: [self-hosted]
+    steps:
+      - uses: actions/download-artifact@v4
+        with:
+          path: dist/
+          merge-multiple: true
+          pattern: wheel-*
+
+      - run: ls -lah dist/
+
+      - uses: softprops/action-gh-release@v1
+        with:
+          tag_name: ${{ inputs.tag_name }}
+          files: |
+            dist/flashinfer*.whl
+
+      - uses: softprops/action-gh-release@v1
+        with:
+          tag_name: ${{ inputs.tag_name }}
+          files: |
+            dist/flashinfer-*.tar.gz
+
+      - name: Clone wheel index
+        run: git clone https://oauth2:${WHL_TOKEN}@github.com/flashinfer-ai/whl.git flashinfer-whl
+        env:
+          WHL_TOKEN: ${{ secrets.WHL_TOKEN }}
+
+      - name: Update wheel index
+        run: python3 scripts/update_whl_index.py
+
+      - name: Push wheel index
+        run: |
+          cd flashinfer-whl
+          git config --local user.name "github-actions[bot]"
+          git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
+          git add -A
+          git commit -m "update whl"
+          git push