From a8c0a580e087f92a77022dc9152128735c44a606 Mon Sep 17 00:00:00 2001 From: Guy Shaibi <39763067+gshaibi@users.noreply.github.com> Date: Sun, 7 Dec 2025 11:48:27 +0200 Subject: [PATCH 1/8] chore(ci): move Docker data and image cache to /mnt for more disk space --- .github/workflows/on-pr.yaml | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/.github/workflows/on-pr.yaml b/.github/workflows/on-pr.yaml index 422492386..fff0253b4 100644 --- a/.github/workflows/on-pr.yaml +++ b/.github/workflows/on-pr.yaml @@ -92,23 +92,25 @@ jobs: uses: docker/setup-buildx-action@v3 - name: Create image cache directory - run: mkdir images + run: | + sudo mkdir -p /mnt/images + sudo chown -R $USER:$USER /mnt/images - name: Cache for docker images and helm chart uses: actions/cache@v4 with: - path: images + path: /mnt/images key: images-${{ github.sha }} - name: Build docker images run: | make build DOCKER_BUILDX_ADDITIONAL_ARGS=--load VERSION=$PACKAGE_VERSION - docker save $(docker images --format '{{.Repository}}:{{.Tag}}' | grep $PACKAGE_VERSION) | gzip > images/docker_images.tgz + docker save $(docker images --format '{{.Repository}}:{{.Tag}}' | grep $PACKAGE_VERSION) | gzip > /mnt/images/docker_images.tgz - name: Build helm chart run: | helm package ./deployments/kai-scheduler -d ./charts --app-version $PACKAGE_VERSION --version $PACKAGE_VERSION - cp charts/kai-scheduler-$PACKAGE_VERSION.tgz images/ + cp charts/kai-scheduler-$PACKAGE_VERSION.tgz /mnt/images/ @@ -120,10 +122,23 @@ jobs: - name: Checkout code uses: actions/checkout@v4 + - name: Move Docker Data to /mnt + run: | + sudo systemctl stop docker + sudo mkdir -p /mnt/docker-data + echo '{"data-root": "/mnt/docker-data"}' | sudo tee /etc/docker/daemon.json + sudo systemctl start docker + docker info | grep "Docker Root Dir" + + - name: Create images directory + run: | + sudo mkdir -p /mnt/images + sudo chown -R $USER:$USER /mnt/images + - name: Cache restore uses: actions/cache/restore@v4 with: - path: images + path: /mnt/images key: images-${{ github.sha }} - name: Create k8s Kind Cluster @@ -134,11 +149,10 @@ jobs: config: ./hack/e2e-kind-config.yaml - name: Load docker images to kind - working-directory: images env: PACKAGE_VERSION: ${{ needs.build.outputs.package_version }} run: | - docker load < docker_images.tgz + docker load < /mnt/images/docker_images.tgz for image in $(docker images --format '{{.Repository}}:{{.Tag}}' | grep $PACKAGE_VERSION); do kind load docker-image $image --name kind done @@ -152,7 +166,7 @@ jobs: env: PACKAGE_VERSION: ${{ needs.build.outputs.package_version }} run: | - helm upgrade -i kai-scheduler ./images/kai-scheduler-$PACKAGE_VERSION.tgz -n kai-scheduler --create-namespace \ + helm upgrade -i kai-scheduler ./mnt/images/kai-scheduler-$PACKAGE_VERSION.tgz -n kai-scheduler --create-namespace \ --set "global.gpuSharing=true" --debug --wait - name: Set up Go uses: actions/setup-go@v2 @@ -168,7 +182,7 @@ jobs: PACKAGE_VERSION: ${{ needs.build.outputs.package_version }} run: | docker images --format '{{.Repository}}:{{.Tag}}' | grep $PACKAGE_VERSION | xargs docker rmi -f - rm -rf images + sudo rm -rf /mnt/images - name: Run e2e tests run: | From 77eb9557b6ff5e016152ad396b799fab2d966370 Mon Sep 17 00:00:00 2001 From: gshaibi Date: Sun, 7 Dec 2025 11:52:11 +0200 Subject: [PATCH 2/8] . --- .github/workflows/on-pr.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/on-pr.yaml b/.github/workflows/on-pr.yaml index fff0253b4..3c9b15be5 100644 --- a/.github/workflows/on-pr.yaml +++ b/.github/workflows/on-pr.yaml @@ -83,6 +83,14 @@ jobs: echo "PACKAGE_VERSION=$PACKAGE_VERSION" >> $GITHUB_OUTPUT echo $PACKAGE_VERSION + - name: Move Docker Data to /mnt + run: | + sudo systemctl stop docker + sudo mkdir -p /mnt/docker-data + echo '{"data-root": "/mnt/docker-data"}' | sudo tee /etc/docker/daemon.json + sudo systemctl start docker + docker info | grep "Docker Root Dir" + - name: Set up Go uses: actions/setup-go@v5 with: From 917136fcc283c8b11b02bd0c26a8beff96b26b83 Mon Sep 17 00:00:00 2001 From: gshaibi Date: Sun, 7 Dec 2025 11:55:02 +0200 Subject: [PATCH 3/8] . --- .github/workflows/on-pr.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/on-pr.yaml b/.github/workflows/on-pr.yaml index 3c9b15be5..16e8a273c 100644 --- a/.github/workflows/on-pr.yaml +++ b/.github/workflows/on-pr.yaml @@ -83,6 +83,11 @@ jobs: echo "PACKAGE_VERSION=$PACKAGE_VERSION" >> $GITHUB_OUTPUT echo $PACKAGE_VERSION + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: '1.24.4' + - name: Move Docker Data to /mnt run: | sudo systemctl stop docker @@ -91,11 +96,6 @@ jobs: sudo systemctl start docker docker info | grep "Docker Root Dir" - - name: Set up Go - uses: actions/setup-go@v5 - with: - go-version: '1.24.4' - - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 From 83ef1b99a11b8d5a028df8fa92448e37cd827556 Mon Sep 17 00:00:00 2001 From: gshaibi Date: Sun, 7 Dec 2025 12:16:49 +0200 Subject: [PATCH 4/8] Debug - to revert --- .github/workflows/on-pr.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/on-pr.yaml b/.github/workflows/on-pr.yaml index 16e8a273c..276ac85bc 100644 --- a/.github/workflows/on-pr.yaml +++ b/.github/workflows/on-pr.yaml @@ -120,6 +120,11 @@ jobs: helm package ./deployments/kai-scheduler -d ./charts --app-version $PACKAGE_VERSION --version $PACKAGE_VERSION cp charts/kai-scheduler-$PACKAGE_VERSION.tgz /mnt/images/ + # GuyDebug: ls /mnt/images + - name: List images in /mnt/images + run: | + ls -la /mnt/images + e2e-tests: @@ -149,6 +154,11 @@ jobs: path: /mnt/images key: images-${{ github.sha }} + # GuyDebug: ls /mnt/images + - name: List images in /mnt/images + run: | + ls -la /mnt/images + - name: Create k8s Kind Cluster uses: helm/kind-action@v1.10.0 with: From 54f47b7c559e661ca5ea75917789eaf2d8384b0f Mon Sep 17 00:00:00 2001 From: gshaibi Date: Sun, 7 Dec 2025 12:18:16 +0200 Subject: [PATCH 5/8] . --- .github/workflows/debug.yaml | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 .github/workflows/debug.yaml diff --git a/.github/workflows/debug.yaml b/.github/workflows/debug.yaml new file mode 100644 index 000000000..fb3226dd8 --- /dev/null +++ b/.github/workflows/debug.yaml @@ -0,0 +1,34 @@ +name: Debug Cache Content + +on: + pull_request: + +jobs: + debug-cache: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Restore cache + uses: actions/cache/restore@v4 + with: + path: | + ~/.cache + ./cache + key: images-f86783a7559573ab7a83e5af247d1a02a565f96c + + - name: List cache contents + run: | + echo "=== Contents of ~/.cache ===" + ls -laR ~/.cache || echo "~/.cache does not exist" + echo "" + echo "=== Contents of ./cache ===" + ls -laR ./cache || echo "./cache does not exist" + + - name: Find all cached files + run: | + echo "=== Searching for all files in potential cache locations ===" + find ~/.cache -type f 2>/dev/null || echo "No files in ~/.cache" + find ./cache -type f 2>/dev/null || echo "No files in ./cache" + From a727aa465aaec7e74094dd85306b5455ee220f9e Mon Sep 17 00:00:00 2001 From: gshaibi Date: Sun, 7 Dec 2025 12:47:15 +0200 Subject: [PATCH 6/8] Revert "." This reverts commit 54f47b7c559e661ca5ea75917789eaf2d8384b0f. --- .github/workflows/debug.yaml | 34 ---------------------------------- 1 file changed, 34 deletions(-) delete mode 100644 .github/workflows/debug.yaml diff --git a/.github/workflows/debug.yaml b/.github/workflows/debug.yaml deleted file mode 100644 index fb3226dd8..000000000 --- a/.github/workflows/debug.yaml +++ /dev/null @@ -1,34 +0,0 @@ -name: Debug Cache Content - -on: - pull_request: - -jobs: - debug-cache: - runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Restore cache - uses: actions/cache/restore@v4 - with: - path: | - ~/.cache - ./cache - key: images-f86783a7559573ab7a83e5af247d1a02a565f96c - - - name: List cache contents - run: | - echo "=== Contents of ~/.cache ===" - ls -laR ~/.cache || echo "~/.cache does not exist" - echo "" - echo "=== Contents of ./cache ===" - ls -laR ./cache || echo "./cache does not exist" - - - name: Find all cached files - run: | - echo "=== Searching for all files in potential cache locations ===" - find ~/.cache -type f 2>/dev/null || echo "No files in ~/.cache" - find ./cache -type f 2>/dev/null || echo "No files in ./cache" - From 2c1b3de7661fc047ebfe299e673600d82e6c163d Mon Sep 17 00:00:00 2001 From: gshaibi Date: Sun, 7 Dec 2025 12:47:26 +0200 Subject: [PATCH 7/8] Revert "Debug - to revert" This reverts commit 83ef1b99a11b8d5a028df8fa92448e37cd827556. --- .github/workflows/on-pr.yaml | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/.github/workflows/on-pr.yaml b/.github/workflows/on-pr.yaml index 276ac85bc..16e8a273c 100644 --- a/.github/workflows/on-pr.yaml +++ b/.github/workflows/on-pr.yaml @@ -120,11 +120,6 @@ jobs: helm package ./deployments/kai-scheduler -d ./charts --app-version $PACKAGE_VERSION --version $PACKAGE_VERSION cp charts/kai-scheduler-$PACKAGE_VERSION.tgz /mnt/images/ - # GuyDebug: ls /mnt/images - - name: List images in /mnt/images - run: | - ls -la /mnt/images - e2e-tests: @@ -154,11 +149,6 @@ jobs: path: /mnt/images key: images-${{ github.sha }} - # GuyDebug: ls /mnt/images - - name: List images in /mnt/images - run: | - ls -la /mnt/images - - name: Create k8s Kind Cluster uses: helm/kind-action@v1.10.0 with: From 7d7b560208ea200a97e6cf17c91e03cb95da7359 Mon Sep 17 00:00:00 2001 From: gshaibi Date: Sun, 7 Dec 2025 12:47:38 +0200 Subject: [PATCH 8/8] . --- .github/workflows/on-pr.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/on-pr.yaml b/.github/workflows/on-pr.yaml index 16e8a273c..4865e227c 100644 --- a/.github/workflows/on-pr.yaml +++ b/.github/workflows/on-pr.yaml @@ -174,7 +174,7 @@ jobs: env: PACKAGE_VERSION: ${{ needs.build.outputs.package_version }} run: | - helm upgrade -i kai-scheduler ./mnt/images/kai-scheduler-$PACKAGE_VERSION.tgz -n kai-scheduler --create-namespace \ + helm upgrade -i kai-scheduler /mnt/images/kai-scheduler-$PACKAGE_VERSION.tgz -n kai-scheduler --create-namespace \ --set "global.gpuSharing=true" --debug --wait - name: Set up Go uses: actions/setup-go@v2