diff --git a/README.md b/README.md index ba8bf6c8..39ede523 100644 --- a/README.md +++ b/README.md @@ -52,17 +52,21 @@ This is not a product. Use it at your own risk and discretion. We provide both Apptainer and Docker images for easy setup: -#### Using Apptainer -```bash -./apptainer/build.sh -./apptainer/run.sh -``` -#### Using Docker -```bash -./docker/build.sh -./docker/run.sh +```console +$ ./containers/run.sh +Must specify either --docker or --apptainer. +Usage: ./containers/run.sh [--docker|-d] [--apptainer|-a] + --docker Run using Docker container + --apptainer Run using Apptainer container ``` +That's it! The run script will: + +- ✅ Automatically build the container if it doesn't exist +- ✅ Mount your project directory to the container +- ✅ Start an interactive session where you can run IntelliPerf +- ✅ Persist all output files to your host machine + Or use our prebuilt Docker image: ```bash docker pull audacioussw/intelliperf:latest diff --git a/VERSION b/VERSION new file mode 100644 index 00000000..6c6aa7cb --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +0.1.0 \ No newline at end of file diff --git a/apptainer/build.sh b/apptainer/build.sh deleted file mode 100755 index 6fc89cca..00000000 --- a/apptainer/build.sh +++ /dev/null @@ -1,69 +0,0 @@ -#!/bin/bash -################################################################################ -# MIT License - -# Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -################################################################################ - -debug=0 - -while [[ $# -gt 0 ]]; do - case $1 in - -d|--debug) - debug=1 - shift - ;; - *) - echo "Usage: $0 [-d|--debug]" - exit 1 - ;; - esac -done - - -# Auto config SSH agent -if [ ! -S ~/.ssh/ssh_auth_sock ]; then - eval `ssh-agent` > /dev/null - ln -sf "$SSH_AUTH_SOCK" ~/.ssh/ssh_auth_sock -fi -export SSH_AUTH_SOCK=~/.ssh/ssh_auth_sock -[ -f ~/.ssh/id_rsa ] && ssh-add ~/.ssh/id_rsa -[ -f ~/.ssh/id_ed25519 ] && ssh-add ~/.ssh/id_ed25519 - -ssh_auth_sock_path=$(readlink -f "$SSH_AUTH_SOCK") -# Build the Singularity container -# --build-arg SSH_AUTH_SOCK=$SSH_AUTH_SOCK is used to pass the SSH agent socket to the container -# (advantage of this method is that the key is at no point copied to the container image.) -# If your SSH_AUTH_SOCK will not already bound to the container, and is available at /run/..., add `--bind /run` to the build command -definition="apptainer/intelliperf.def" - -if [[ $debug -eq 1 ]]; then - image="apptainer/intelliperf_debug.sif" - cmake_build_type="Debug" -else - image="apptainer/intelliperf.sif" - cmake_build_type="Release" -fi - -apptainer build \ - --build-arg SSH_AUTH_SOCK=${ssh_auth_sock_path} \ - --build-arg CMAKE_BUILD_TYPE=${cmake_build_type}\ - $image $definition \ No newline at end of file diff --git a/apptainer/intelliperf.def b/apptainer/intelliperf.def deleted file mode 100644 index 28c6d4af..00000000 --- a/apptainer/intelliperf.def +++ /dev/null @@ -1,98 +0,0 @@ -Bootstrap: docker -From: ubuntu:22.04 - -%environment - # Locale - export LANG=en_US.UTF-8 - - # ROCm globals - export PATH=/opt/rocm/bin:$PATH - export LD_LIBRARY_PATH=/opt/rocm/lib:$LD_LIBRARY_PATH - export ROCM_PATH=/opt/rocm - - # Misc globals - export GT_TUNING=/root/guided-tuning - export PATH=/opt/omniprobe/bin/logDuration:$PATH - export PATH=/root/rocprofiler-compute/src:$PATH - -%files - examples/bank_conflict/llm.c/requirements.txt /examples/bank_conflict/llm.c/requirements.txt - -%post - # Set locale - apt-get -y update - apt-get install -y locales - locale-gen en_US.UTF-8 - export LANG=en_US.UTF-8 - - # Install dependencies - apt-get -y update - apt-get install -y software-properties-common - apt-get upgrade -y - apt-get install -y build-essential python3 python3-pip python3-setuptools python3-wheel git wget clang lld libzstd-dev libomp-dev vim libdwarf-dev - apt-get install -y locales - locale-gen en_US.UTF-8 - python3 -m pip install --upgrade pip - python3 -m pip install 'cmake==3.22' - - # Add GitHub trusted host - mkdir -p ~/.ssh - touch ~/.ssh/known_hosts - ssh-keyscan github.com >> ~/.ssh/known_hosts - chmod 700 ~/.ssh - chmod 644 ~/.ssh/known_hosts - - # Install ROCm - apt-get -y update - wget https://repo.radeon.com/amdgpu-install/6.3.3/ubuntu/jammy/amdgpu-install_6.3.60303-1_all.deb - apt-get -y install ./amdgpu-install_6.3.60303-1_all.deb - apt-get -y update - apt-get install -y rocm-dev rocm-llvm-dev rocm-hip-runtime-dev rocm-smi-lib rocminfo rocthrust-dev rocprofiler-compute rocblas rocm-gdb gdb tmux - export PATH=/opt/rocm/bin:$PATH - export LD_LIBRARY_PATH=/opt/rocm/lib:$LD_LIBRARY_PATH - export ROCM_PATH=/opt/rocm - - # Install rocprof-compute (via package manager) - # python3 -m pip install --ignore-installed blinker - # python3 -m pip install -r /opt/rocm/libexec/rocprofiler-compute/requirements.txt - # Install rocprof-compute (from feature branch) - export SSH_AUTH_SOCK={{ SSH_AUTH_SOCK }} - cd /root - git clone -v https://github.com/ROCm/rocprofiler-compute.git - cd rocprofiler-compute - git checkout 41e73650d5cfc3dbd98e007d6279235578f8529a - python3 -m pip install --ignore-installed blinker - python3 -m pip install -r requirements.txt - cd src - export PATH=$PWD:$PATH - - # Install Triton (version pinned) - cd /root - export TRITON_HOME=/root - git clone -v https://github.com/triton-lang/triton.git - cd triton - git checkout 6fa33ef1eecc97348d056688df84845db7d22507 - python3 -m pip install ninja wheel pybind11 - python3 -m pip install -e python - - # Install omniprobe - echo "Building with CMAKE_BUILD_TYPE={{ CMAKE_BUILD_TYPE }}" - cd /root - git clone -v git@github.com:AARInternal/omniprobe.git - cd omniprobe - git checkout 9083730ab0da50114c767773df49cb1d2165ba7f - git submodule update --init --recursive - mkdir -p build - cmake -DCMAKE_INSTALL_PREFIX=/opt/omniprobe\ - -DCMAKE_PREFIX_PATH=${ROCM_PATH}\ - -DTRITON_LLVM=/root/.triton/llvm/llvm-ubuntu-x64\ - -DCMAKE_BUILD_TYPE={{ CMAKE_BUILD_TYPE }}\ - -DCMAKE_VERBOSE_MAKEFILE=ON -S . -B build - cmake --build build --target install - export PATH=/opt/omniprobe/bin/logDuration:$PATH - - # Install agents dependencies - python3 -m pip install openai - - # Install examples dependencies - pip3 install --no-cache-dir -r /examples/bank_conflict/llm.c/requirements.txt diff --git a/apptainer/run.sh b/apptainer/run.sh deleted file mode 100755 index 9eb9f0d7..00000000 --- a/apptainer/run.sh +++ /dev/null @@ -1,67 +0,0 @@ -#!/bin/bash -################################################################################ -# MIT License - -# Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -################################################################################ - -script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -parent_dir="$(dirname "$script_dir")" -working_dir=$(pwd) - -cd $parent_dir - -size=2048 -debug=0 - -while [[ $# -gt 0 ]]; do - case $1 in - -s) - size=$2 - shift 2 - ;; - -d|--debug) - debug=1 - shift - ;; - *) - echo "Usage: $0 [-s size] [-d|--debug]" - exit 1 - ;; - esac -done -workload=$(date +"%Y%m%d%H%M%S") -overlay="/tmp/intelliperf_overlay_$(whoami)_$workload.img" -if [ ! -f $overlay ]; then - echo "[Log] Overlay image ${overlay} does not exist. Creating overlay of ${size} MiB..." - apptainer overlay create --size ${size} --create-dir /var/cache/intelliperf ${overlay} -else - echo "[Log] Overlay image ${overlay} already exists. Using this one." -fi -echo "[Log] Utilize the directory /var/cache/intelliperf as a sandbox to store data you'd like to persist between container runs." - -# Run the container -if [[ $debug -eq 1 ]]; then - image="apptainer/intelliperf_debug.sif" -else - image="apptainer/intelliperf.sif" -fi -apptainer exec --bind $HOME/.ssh:/root/.ssh:ro --overlay ${overlay} --pwd "$working_dir" --cleanenv --env OPENAI_API_KEY=$OPENAI_API_KEY $image bash --rcfile /etc/bash.bashrc diff --git a/apptainer/run_cmd.sh b/apptainer/run_cmd.sh deleted file mode 100755 index 928eab38..00000000 --- a/apptainer/run_cmd.sh +++ /dev/null @@ -1,78 +0,0 @@ -#!/bin/bash -################################################################################ -# MIT License - -# Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -################################################################################ - -script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -parent_dir="$(dirname "$script_dir")" - -cd $parent_dir - -size=2048 -cmd="" -debug=0 - -while [[ $# -gt 0 ]]; do - case $1 in - -s) - size=$2 - shift 2 - ;; - --cmd) - cmd=$2 - shift 2 - ;; - -d|--debug) - debug=1 - shift - ;; - *) - echo "Usage: $0 [-s size] --cmd '' [-d|--debug]" - exit 1 - ;; - esac -done - -workload=$(date +"%Y%m%d%H%M%S") - -# Create filesystem image overlay, if it doesn't exist -overlay="/tmp/intelliperf_overlay_$(whoami)_$workload.img" -if [ ! -f $overlay ]; then - echo "[Log] Overlay image ${overlay} does not exist. Creating overlay of ${size} MiB..." - apptainer overlay create --size ${size} --create-dir /var/cache/intelliperf ${overlay} -else - echo "[Log] Overlay image ${overlay} already exists. Using this one." -fi -echo "[Log] Utilize the directory /var/cache/intelliperf as a sandbox to store data you'd like to persist between container runs." - -# Run the container -if [[ $debug -eq 1 ]]; then - image="apptainer/intelliperf_debug.sif" -else - image="apptainer/intelliperf.sif" -fi -echo "cmd: $cmd" -apptainer exec --overlay "${overlay}"\ - --cleanenv --env OPENAI_API_KEY="$OPENAI_API_KEY"\ - "$image" bash --rcfile /etc/bash.bashrc\ - -c "cd src && eval \"$cmd\"" \ No newline at end of file diff --git a/containers/build.sh b/containers/build.sh new file mode 100755 index 00000000..0a614c61 --- /dev/null +++ b/containers/build.sh @@ -0,0 +1,74 @@ +#!/bin/bash + +# Container name +name="intelliperf" + +# Script directories +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +parent_dir="$(dirname "$script_dir")" +cur_dir=$(pwd) + +# Parse arguments +build_docker=false +build_apptainer=false + +while [[ $# -gt 0 ]]; do + case $1 in + --docker|-d) + build_docker=true + shift + ;; + --apptainer|-a) + build_apptainer=true + shift + ;; + *) + echo "Unknown option: $1" + echo "Usage: $0 [--docker|-d] [--apptainer|-a] -- Exactly one option is required." + exit 1 + ;; + esac +done + +if [ "$build_docker" = false ] && [ "$build_apptainer" = false ]; then + echo "Error: At least one of the options --docker or --apptainer is required." + echo "Usage: $0 [--docker] [--apptainer]" + echo " --docker Build Docker container" + echo " --apptainer Build Apptainer container" + exit 1 +fi + +pushd "$parent_dir" + +if [ "$build_docker" = true ]; then + echo "Building Docker container..." + + # Enable BuildKit and build the Docker image + export DOCKER_BUILDKIT=1 + docker build \ + -t "$name:$(cat "$parent_dir/VERSION")" \ + -f "$script_dir/intelliperf.Dockerfile" \ + . + + echo "Docker build complete!" +fi + +if [ "$build_apptainer" = true ]; then + echo "Building Apptainer container..." + + # Check if apptainer is installed + if ! command -v apptainer &> /dev/null; then + echo "Error: Apptainer is not installed or not in PATH" + echo "Please install Apptainer first: https://apptainer.org/docs/admin/main/installation.html" + exit 1 + fi + + # Build the Apptainer container with ROCm version + export ROCM_VERSION="$rocm_version" + apptainer build \ + "${script_dir}/${name}_$(cat "$parent_dir/VERSION").sif" "$script_dir/intelliperf.def" + + echo "Apptainer build complete!" +fi + +popd \ No newline at end of file diff --git a/docker/intelliperf.Dockerfile b/containers/intelliperf.Dockerfile similarity index 51% rename from docker/intelliperf.Dockerfile rename to containers/intelliperf.Dockerfile index b7a3adc2..1a596cdc 100644 --- a/docker/intelliperf.Dockerfile +++ b/containers/intelliperf.Dockerfile @@ -19,19 +19,10 @@ RUN apt-get update && apt-get install -y \ gdb \ && locale-gen en_US.UTF-8 -# Add GitHub trusted host -RUN mkdir -p ~/.ssh && \ - touch ~/.ssh/known_hosts && \ - ssh-keyscan github.com >> ~/.ssh/known_hosts && \ - chmod 700 ~/.ssh && \ - chmod 644 ~/.ssh/known_hosts - # Set the working directory WORKDIR $INTELLIPERF_HOME +COPY ../ $INTELLIPERF_HOME -# Clone IntelliPerf only in non-dev mode -RUN --mount=type=ssh bash -c 'if [ "$DEV_MODE" = "false" ]; then \ - git clone git@github.com:AMDResearch/intelliperf.git . ; \ - pip install -e .; \ - python3 scripts/install_tool.py --all; \ - fi' +# Install tool +RUN pip install -e . && \ + python3 scripts/install_tool.py --all diff --git a/containers/intelliperf.def b/containers/intelliperf.def new file mode 100644 index 00000000..077a2cdb --- /dev/null +++ b/containers/intelliperf.def @@ -0,0 +1,37 @@ +Bootstrap: docker +From: rocm/vllm-dev:nightly_aiter_integration_final_20250325 + +%arguments + INTELLIPERF_HOME=/intelliperf + +%environment + # Runtime environment inside the finished SIF + export LANG=en_US.UTF-8 + export PATH=/opt/rocm/bin:$PATH + export INTELLIPERF_HOME={{ INTELLIPERF_HOME }} + +%files + . /intelliperf + +%post + PATH=/opt/rocm/bin:$PATH + INTELLIPERF_HOME={{ INTELLIPERF_HOME }} + + mkdir -p "${INTELLIPERF_HOME}" + + apt-get update + apt-get install -y --no-install-recommends \ + libzstd-dev \ + python3-setuptools \ + python3-wheel \ + libdwarf-dev \ + rocm-llvm-dev \ + locales \ + gdb \ + ssh + locale-gen en_US.UTF-8 + rm -rf /var/lib/apt/lists/* + + cd "${INTELLIPERF_HOME}" + pip install -e . + python3 scripts/install_tool.py --all \ No newline at end of file diff --git a/containers/run.sh b/containers/run.sh new file mode 100755 index 00000000..39bee16d --- /dev/null +++ b/containers/run.sh @@ -0,0 +1,146 @@ +#!/bin/bash + +# Supports both Docker and Apptainer with automatic building + +# Container name +name="intelliperf" + +# Script directories +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +parent_dir="$(dirname "$script_dir")" + +# Parse arguments +use_docker=false +use_apptainer=false +overlay_size=2048 + +while [[ $# -gt 0 ]]; do + case $1 in + --docker|-d) + use_docker=true + shift + ;; + --apptainer|-a) + use_apptainer=true + shift + ;; + -s|--overlay-size) + overlay_size=$2 + shift 2 + ;; + *) + echo "Unknown option: $1" + echo "Usage: $0 [--docker|-d] [--apptainer|-a] [-s|--overlay-size SIZE] -- Exactly one option is required." + echo " -s, --overlay-size SIZE Size of overlay filesystem in MiB (default: 2048, Apptainer only)" + exit 1 + ;; + esac +done + +# Validate arguments +if [ "$use_docker" = true ] && [ "$use_apptainer" = true ]; then + echo "Error: Cannot use both --docker and --apptainer simultaneously." + echo "Usage: $0 [--docker] [--apptainer]" + exit 1 +elif [ "$use_docker" = false ] && [ "$use_apptainer" = false ]; then + echo "Error: Must specify either --docker or --apptainer." + echo "Usage: $0 [--docker] [--apptainer]" + echo " --docker Run using Docker container" + echo " --apptainer Run using Apptainer container" + exit 1 +fi + +echo "Starting intelliperf container..." +echo "Project directory will be mounted at $(pwd)" +echo "Any files you create/modify will persist after the container closes." +echo "" + +if [ "$use_docker" = true ]; then + echo "Using Docker containerization..." + + # Check if the Docker image exists + if ! docker image inspect "$name:$(cat "$parent_dir/VERSION")" > /dev/null 2>&1; then + echo "Docker image $name:$(cat "$parent_dir/VERSION") not found." + echo "Building Docker image..." + echo "" + + if ! "$script_dir/build.sh" --docker; then + echo "Error: Failed to build Docker image." + exit 1 + fi + + echo "" + echo "Docker image built successfully!" + else + echo "Docker image found." + fi + + # Run the Docker container + echo "Running Docker container with project directory mounted..." + docker run -it --rm \ + --device=/dev/kfd \ + --device=/dev/dri \ + --group-add video \ + --cap-add=SYS_PTRACE \ + --security-opt seccomp=unconfined \ + -e LLM_GATEWAY_KEY="$LLM_GATEWAY_KEY" \ + -v $(pwd):$(pwd) \ + -w $(pwd) \ + "$name:$(cat "$parent_dir/VERSION")" + +elif [ "$use_apptainer" = true ]; then + echo "Using Apptainer containerization..." + + # Check if apptainer is installed + if ! command -v apptainer &> /dev/null; then + echo "Error: Apptainer is not installed or not in PATH" + echo "Please install Apptainer first: https://apptainer.org/docs/admin/main/installation.html" + exit 1 + fi + + # Apptainer image filename + apptainer_image="$script_dir/${name}_$(cat "$parent_dir/VERSION").sif" + + # Check if the Apptainer image exists + if [ ! -f "$apptainer_image" ]; then + echo "Apptainer image $apptainer_image not found." + echo "Building Apptainer image automatically..." + echo "" + + if ! "$script_dir/build.sh" --apptainer; then + echo "Error: Failed to build Apptainer image." + exit 1 + fi + + echo "" + echo "Apptainer image built successfully!" + else + echo "Apptainer image found." + fi + + # Create overlay filesystem for writable areas + workload=$(date +"%Y%m%d%H%M%S") + overlay="/tmp/intelliperf_overlay_$(whoami)_$workload.img" + if [ ! -f "$overlay" ]; then + echo "[Log] Overlay image ${overlay} does not exist. Creating overlay of ${overlay_size} MiB..." + apptainer overlay create --size ${overlay_size} --create-dir /var/cache/intelliperf ${overlay} + else + echo "[Log] Overlay image ${overlay} already exists. Using this one." + fi + echo "[Log] Overlay filesystem provides writable areas for profiling tools." + echo "[Log] Use /var/cache/intelliperf as a sandbox for persistent data between container runs." + + # Run the Apptainer container + echo "Running Apptainer container with project directory mounted..." + cd "$parent_dir" + apptainer exec \ + --cleanenv \ + --pwd $(pwd) \ + --overlay ${overlay} \ + --env LLM_GATEWAY_KEY=$LLM_GATEWAY_KEY \ + "$apptainer_image" \ + /bin/bash \ + --rcfile /etc/bashrc +fi + +echo "Container session ended." \ No newline at end of file diff --git a/docker/build.sh b/docker/build.sh deleted file mode 100755 index c91ac2cd..00000000 --- a/docker/build.sh +++ /dev/null @@ -1,80 +0,0 @@ -#!/bin/bash -################################################################################ -# MIT License - -# Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -################################################################################ - -# Parse command line arguments -dev_mode=false -while [[ $# -gt 0 ]]; do - case $1 in - --dev|-d) - dev_mode=true - shift - ;; - *) - echo "Unknown option: $1" - exit 1 - ;; - esac -done - -# Container name -name="intelliperf" - -# Script directories -script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -parent_dir="$(dirname "$script_dir")" -cur_dir=$(pwd) - -# Set INTELLIPERF_HOME based on dev mode -if [ "$dev_mode" = true ]; then - intelliperf_home="$cur_dir" -else - intelliperf_home="/intelliperf" -fi - -pushd "$script_dir" - -# Auto-configure SSH agent -if [ ! -S ~/.ssh/ssh_auth_sock ]; then - eval "$(ssh-agent)" > /dev/null - ln -sf "$SSH_AUTH_SOCK" ~/.ssh/ssh_auth_sock -fi -export SSH_AUTH_SOCK=~/.ssh/ssh_auth_sock - -# Add default keys if they exist -[ -f ~/.ssh/id_rsa ] && ssh-add ~/.ssh/id_rsa -[ -f ~/.ssh/id_ed25519 ] && ssh-add ~/.ssh/id_ed25519 -[ -f ~/.ssh/id_github ] && ssh-add ~/.ssh/id_github - -# Enable BuildKit and build the Docker image -export DOCKER_BUILDKIT=1 -docker build \ - --ssh default \ - -t "$name" \ - --build-arg DEV_MODE="$dev_mode" \ - --build-arg INTELLIPERF_HOME="$intelliperf_home" \ - -f "$script_dir/intelliperf.Dockerfile" \ - . - -popd diff --git a/docker/run.sh b/docker/run.sh deleted file mode 100755 index 4f4ecdae..00000000 --- a/docker/run.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash -################################################################################ -# MIT License - -# Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -################################################################################ - -name="intelliperf" - -docker run -it --rm \ - --name "$name" \ - --device=/dev/kfd \ - --device=/dev/dri \ - --group-add video \ - -v $HOME/.ssh:/tmp/ssh:ro \ - -v $(pwd):$(pwd) \ - -w $(pwd) \ - -e LLM_GATEWAY_KEY="$LLM_GATEWAY_KEY" \ - -e SSH_AUTH_SOCK="$SSH_AUTH_SOCK" \ - -v $SSH_AUTH_SOCK:$SSH_AUTH_SOCK \ - "$name" \ - bash -c "cp -r /tmp/ssh/* /root/.ssh/ 2>/dev/null || true && chown -R root:root /root/.ssh && chmod 700 /root/.ssh && chmod 600 /root/.ssh/config /root/.ssh/id_* /root/.ssh/known_hosts 2>/dev/null || true; exec bash" diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index f47f1d2d..f92eb12c 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -35,14 +35,12 @@ We provide both Apptainer and Docker images containing all the dependencies. To #### Using Apptainer ```bash -./apptainer/build.sh -./apptainer/run.sh +./containers/run.sh --apptainer ``` #### Using Docker ```bash -./docker/build.sh -./docker/run.sh +./containers/run.sh --docker ``` ### Setting Up Development Environment diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 16a0d784..0ab44fb6 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -31,7 +31,10 @@ function(add_example name source_file) add_executable(${name} ${source_file}) # CMake instrumentation integration example if(INSTRUMENT) - set(OMNIPROBE_PATH ${CMAKE_SOURCE_DIR}/../external/omniprobe/install) + # Use provided OMNIPROBE_PATH or default to the standard location + if(NOT DEFINED OMNIPROBE_PATH) + set(OMNIPROBE_PATH ${CMAKE_SOURCE_DIR}/../external/omniprobe/install) + endif() # Only use omniprobe if the path exists if(EXISTS ${OMNIPROBE_PATH}/lib/libAMDGCNSubmitAddressMessages-rocm.so) # Use the plugin to instrument the code diff --git a/examples/scripts/build_examples.sh b/examples/scripts/build_examples.sh index 8cdb1aca..a5dae7c3 100755 --- a/examples/scripts/build_examples.sh +++ b/examples/scripts/build_examples.sh @@ -32,14 +32,16 @@ parallel=8 build_dir=build verbose=false instrument=false +omniprobe_path="" print_usage() { echo "Usage: $0 [options]" echo "Options:" - echo " -c, --clean Clean build directory" - echo " -v, --verbose Print verbose output" - echo " -i, --instrument Instrument the code with Omniprobe" - echo " -j, --jobs Set number of parallel jobs" + echo " -c, --clean Clean build directory" + echo " -v, --verbose Print verbose output" + echo " -i, --instrument Instrument the code with Omniprobe" + echo " -o, --omniprobe Specify path to omniprobe tool" + echo " -j, --jobs Set number of parallel jobs" } # Parse arguments @@ -57,6 +59,16 @@ while [[ $# -gt 0 ]]; do instrument=true shift ;; + -o|--omniprobe) + if [[ -n "$2" ]]; then + omniprobe_path="$2" + shift 2 + else + echo "Error: --omniprobe requires a path argument" + print_usage + exit 1 + fi + ;; -j|--jobs) if [[ -n "$2" && "$2" =~ ^[0-9]+$ ]]; then parallel="$2" @@ -88,6 +100,9 @@ if [ "$instrument" = true ]; then cmake_config_args+=(-DINSTRUMENT=ON) fi +if [ -n "$omniprobe_path" ]; then + cmake_config_args+=(-DOMNIPROBE_PATH="$omniprobe_path") +fi config_command="cmake -B "$build_dir"\ diff --git a/pyproject.toml b/pyproject.toml index f066944d..c6ccae4f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,8 +38,31 @@ python3 -m pip install -r requirements.txt """ + +[tool.omniprobe] +git = "https://github.com/AMDResearch/omniprobe.git" +branch = "4eb1b3d49b83f2d50823525e073eea356661e871" +build_command = """ +export CC=${ROCM_PATH}/bin/hipcc +export CXX=${ROCM_PATH}/bin/hipcc +git submodule update --init --recursive +python3 -m pip install -r omniprobe/requirements.txt +#TODO: search for a Triton LLVM. +if [ -d "/root/.triton/llvm/llvm-ubuntu-x64" ]; then + export TRITON_LLVM_PATH="/root/.triton/llvm/llvm-ubuntu-x64" +else + export TRITON_LLVM_PATH="${ROCM_PATH}/llvm/" +fi +cmake -DCMAKE_PREFIX_PATH=${ROCM_PATH} \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX=install \ + -DCMAKE_VERBOSE_MAKEFILE=ON -S . -B build && +cmake --build build --parallel 1 && +cmake --install build +""" + [tool.nexus] -git = "git@github.com:AMDResearch/nexus.git" +git = "https://github.com/AMDResearch/nexus" branch = "main" build_command = """ export CC=${ROCM_PATH}/bin/hipcc @@ -59,6 +82,9 @@ dev = [ [tool.ruff] line-length = 120 target-version = "py38" +exclude = [ + "external/guided-tuning/" +] [tool.ruff.lint] select = [ diff --git a/scripts/build_examples.sh b/scripts/build_examples.sh index 9962aa52..8f41c88e 100755 --- a/scripts/build_examples.sh +++ b/scripts/build_examples.sh @@ -32,14 +32,16 @@ parallel=8 build_dir=build verbose=false instrument=false +omniprobe_path="" print_usage() { echo "Usage: $0 [options]" echo "Options:" - echo " -c, --clean Clean build directory" - echo " -v, --verbose Print verbose output" - echo " -i, --instrument Instrument the code with Omniprobe" - echo " -j, --jobs Set number of parallel jobs" + echo " -c, --clean Clean build directory" + echo " -v, --verbose Print verbose output" + echo " -i, --instrument Instrument the code with Omniprobe" + echo " -o, --omniprobe Specify path to omniprobe tool" + echo " -j, --jobs Set number of parallel jobs" } # Parse arguments @@ -57,6 +59,16 @@ while [[ $# -gt 0 ]]; do instrument=true shift ;; + -o|--omniprobe) + if [[ -n "$2" ]]; then + omniprobe_path="$2" + shift 2 + else + echo "Error: --omniprobe requires a path argument" + print_usage + exit 1 + fi + ;; -j|--jobs) if [[ -n "$2" && "$2" =~ ^[0-9]+$ ]]; then parallel="$2" @@ -88,6 +100,9 @@ if [ "$instrument" = true ]; then cmake_config_args+=(-DINSTRUMENT=ON) fi +if [ -n "$omniprobe_path" ]; then + cmake_config_args+=(-DOMNIPROBE_PATH="$omniprobe_path") +fi config_command="cmake -B "$build_dir"\ diff --git a/src/intelliperf/core/application.py b/src/intelliperf/core/application.py index 7accae92..4a763fac 100644 --- a/src/intelliperf/core/application.py +++ b/src/intelliperf/core/application.py @@ -160,7 +160,7 @@ def profile(self, top_n: int): "-w", str(last_matching_id), "-k", - f"{'|'.join(top_n_kernels)}", + f'"{"|".join(top_n_kernels)}"', "--separate", "--save", f"{get_guided_tuning_path()}/intelliperf_report_card.json", diff --git a/src/intelliperf/formulas/bank_conflict.py b/src/intelliperf/formulas/bank_conflict.py index 57ff6bbe..f87f9a33 100644 --- a/src/intelliperf/formulas/bank_conflict.py +++ b/src/intelliperf/formulas/bank_conflict.py @@ -22,11 +22,9 @@ # SOFTWARE. ################################################################################ -import glob import json import logging import os -import shutil from intelliperf.core.llm import LLM from intelliperf.formulas.formula_base import ( @@ -35,7 +33,10 @@ filter_json_field, get_kernel_name, ) -from intelliperf.utils.env import get_llm_api_key +from intelliperf.utils.env import ( + get_llm_api_key, + get_omniprobe_path, +) from intelliperf.utils.process import capture_subprocess_output from intelliperf.utils.regex import generate_ecma_regex_from_list @@ -131,17 +132,6 @@ def instrument_pass(self) -> Result: """ super().instrument_pass() - # Log instrumentation completion - self.get_logger().record( - "instrument_pass_complete", {"success": True, "note": "Instrumentation pass completed via parent class"} - ) - - return Result( - success=False, - asset=self._instrumentation_results, - error_report="Instrumentation pass not implemented for bank conflict.", - ) - # Always instrument the first kernel kernel_to_instrument = self.get_top_kernel() if kernel_to_instrument is None: @@ -150,11 +140,11 @@ def instrument_pass(self) -> Result: error_report="No source code found. Please compile your code with -g.", ) - omniprobe_output_dir = os.path.join(self._application.get_project_directory(), "memory_analysis_output") + omniprobe_output_file = os.path.join(self._application.get_project_directory(), "memory_analysis_output.json") - # Remove directory if it exists and create a new one - if os.path.exists(omniprobe_output_dir): - shutil.rmtree(omniprobe_output_dir) + # Remove file if it exists before running omniprobe + if os.path.exists(omniprobe_output_file): + os.remove(omniprobe_output_file) ecma_regex = generate_ecma_regex_from_list([kernel_to_instrument]) logging.debug(f"ECMA Regex for kernel names: {ecma_regex}") @@ -162,12 +152,16 @@ def instrument_pass(self) -> Result: logging.debug(f"Omniprobe profiling command is: {cmd}") success, output = capture_subprocess_output( [ - "omniprobe", + str(get_omniprobe_path()), "--instrumented", "--analyzers", "MemoryAnalysis", "--kernels", - ecma_regex, + f'"{ecma_regex}"', + "--log-format", + "json", + "--log-location", + omniprobe_output_file, "--", " ".join(self._application.get_app_cmd()), ], @@ -182,10 +176,9 @@ def instrument_pass(self) -> Result: # Try loading the memory analysis output # Find all files in the memory_analysis_output directory - output_files = glob.glob(os.path.join(omniprobe_output_dir, "memory_analysis_*.json")) - if len(output_files) == 0: - return Result(success=False, error_report="No memory analysis output files found.") - output_file = output_files[0] + output_file = omniprobe_output_file + if not os.path.exists(output_file): + return Result(success=False, error_report="Memory analysis output file not found.") try: with open(output_file, "r") as f: self._instrumentation_results = json.load(f) @@ -230,13 +223,6 @@ def optimize_pass(self, temperature: float = 0.0, max_tokens: int = 3000) -> Res api_key=llm_key, system_prompt=system_prompt, model=model, provider=provider, logger=self.get_logger() ) - kernel_to_optimize = self.get_top_kernel() - if kernel_to_optimize is None: - return Result( - success=False, - error_report="No source code or bank conflicts found. Please compile your code with -g.", - ) - kernel = None kernel_file = None @@ -301,7 +287,52 @@ def optimize_pass(self, temperature: float = 0.0, max_tokens: int = 3000) -> Res f"access the same memory bank simultaneously, causing serialization and performance degradation." ) else: - pass + kernel = self._instrumentation_results[0]["kernel_analysis"]["kernel_info"]["name"] + kernel_name = get_kernel_name(kernel) + files = {} # supposed to be a dict of file_path -> list of line numbers + line_numbers = [] + + # Extract files from bank conflict accesses + bank_conflicts = self._instrumentation_results[0]["kernel_analysis"]["bank_conflicts"]["accesses"] + for access in bank_conflicts: + file_path = access["source_location"]["file"] + line_number = access["source_location"]["line"] + if file_path not in files: + files[file_path] = [] + files[file_path].append(line_number) + + kernel_file = None + for file in files.keys(): + if os.path.exists(file): + with open(file, "r") as f: + unoptimized_file_content = f.read() + if kernel_name in unoptimized_file_content: + kernel_file = file + line_numbers.append(files[file]) + break + if kernel_file is None: + return Result(success=False, error_report="Kernel file not found.") + + # Create user prompt and required reports + user_prompt = ( + f"There is a bank conflict in the kernel {kernel} at line(s) numbered {line_numbers} in the source code {unoptimized_file_content}." + f" Please fix the conflict but do not change the semantics of the program." + " Do not remove any comments or licenses." + " Do not include any markdown code blocks or text other than the code." + ) + if self.current_summary is not None: + user_prompt += f"\n\nThe current summary is: {self.current_summary}" + cur_diff = self.compute_diff([kernel_file]) + user_prompt += f"\nThe diff between the current and initial code is: {cur_diff}" + + self.previous_source_code = unoptimized_file_content + + args = kernel.split("(")[1].split(")")[0] + self.bottleneck_report = ( + f"Bank Conflict Detection: IntelliPerf identified shared memory bank conflicts in kernel " + f"`{kernel_name}` with arguments `{args}`. Bank conflicts occur when multiple threads " + f"access the same memory bank simultaneously, causing serialization and performance degradation." + ) if kernel is None: return Result(success=False, error_report="Failed to extract the kernel name.") @@ -363,10 +394,13 @@ def correctness_validation_pass(self, accordo_absolute_tolerance: float = 1e-6) return result def performance_validation_pass(self) -> Result: + # Currently, Omniprobe appends the '[clone .kd]' suffix to the kernel name + # this needs to me adjusted to reflect the actual kernel name + kernel_signature_non_cloned = self.current_kernel_signature.split(" [clone .kd]")[0] unoptimized_results = filter_json_field( self._initial_profiler_results, field="kernel", - comparison_func=lambda x: x == self.current_kernel_signature, + comparison_func=lambda x: x == kernel_signature_non_cloned, ) unoptimized_time = unoptimized_results[0]["durations"]["ns"] @@ -378,7 +412,7 @@ def performance_validation_pass(self) -> Result: optimized_results = filter_json_field( self._optimization_results, field="kernel", - comparison_func=lambda x: x == self.current_kernel_signature, + comparison_func=lambda x: x == kernel_signature_non_cloned, ) optimized_time = optimized_results[0]["durations"]["ns"] diff --git a/src/intelliperf/utils/env.py b/src/intelliperf/utils/env.py index 996d21ed..f728e855 100644 --- a/src/intelliperf/utils/env.py +++ b/src/intelliperf/utils/env.py @@ -28,6 +28,12 @@ from intelliperf.utils.process import exit_on_fail +def get_omniprobe_path(): + if os.environ.get("OMNIPROBE_PATH"): + return Path(os.environ["OMNIPROBE_PATH"]).resolve() + return (Path(__file__).resolve().parent / "../../../external/omniprobe/install/bin/logDuration/omniprobe").resolve() + + def get_guided_tuning_path(): if os.environ.get("GT_TUNING"): return Path(os.environ["GT_TUNING"]).resolve() diff --git a/src/intelliperf/utils/process.py b/src/intelliperf/utils/process.py index ae4befa4..2bcdb32b 100644 --- a/src/intelliperf/utils/process.py +++ b/src/intelliperf/utils/process.py @@ -45,7 +45,7 @@ def capture_subprocess_output( """ verbose = logging.getLogger().getEffectiveLevel() <= logging.DEBUG - logging.debug(f"Running the command: {' '.join(subprocess_args)}") + # logging.debug(f"Running the command: {' '.join(subprocess_args)}") if working_directory is not None: logging.debug(f"Working directory: {working_directory}") @@ -58,7 +58,15 @@ def capture_subprocess_output( if additional_path is not None: env["PATH"] = str(additional_path) + ":" + env["PATH"] + if working_directory is not None: + # Convert to absolute path to be safe + abs_working_dir = os.path.abspath(working_directory) + shell_cmd = f'cd "{abs_working_dir}" && {" ".join(subprocess_args)}' + subprocess_args = ["bash", "-c", shell_cmd] + working_directory = None # Let shell handle it + logging.debug(f"PATH: {env['PATH']}") + logging.debug(f"Running the command: {' '.join(subprocess_args)}") # Run the process and wait for completion try: diff --git a/src/intelliperf/utils/regex.py b/src/intelliperf/utils/regex.py index 30fc8bf9..25fb9633 100644 --- a/src/intelliperf/utils/regex.py +++ b/src/intelliperf/utils/regex.py @@ -38,11 +38,10 @@ def generate_ecma_regex_from_list(kernel_names: set) -> str: # Note: Temporary fix, but until bug in omniprobe is fixed we need to also # add the name of the instrumented kernel clone to the regex, otherwise we'll skip it # and exclude it from the memory analysis report - # duplicate_kernel_str = f"__amd_crk_{i.replace(')', ', void*)', 1)}" - # duplicate_kernel_str = f"__amd_crk_{i.replace(")", ", void*)", 1)}" - # escaped_string = re.escape(duplicate_kernel_str) - # regex_string = r"^" + escaped_string + r"$" - # res.append(regex_string) + duplicate_kernel_str = f"__amd_crk_{i.replace(')', ', void*)', 1)}" + escaped_string = re.escape(duplicate_kernel_str) + regex_string = r"^" + escaped_string + r"$" + res.append(regex_string) regex = f"({'|'.join(res)})" return regex