From 3539785587318ac268d9875debceee2861206c93 Mon Sep 17 00:00:00 2001 From: Tim Pietrusky Date: Thu, 8 May 2025 11:27:52 +0200 Subject: [PATCH 1/4] feat: update to infinity 0.0.76; added test_input.json; improved error handling when MODEL_NAMES not set; added docker-compose --- .gitignore | 3 ++ .pre-commit-config.yaml | 6 --- Dockerfile | 40 ++++++++++++-------- docker-bake.hcl | 21 ++-------- docker-compose.yml | 18 +++++++++ builder/requirements.txt => requirements.txt | 2 +- src/config.py | 10 ++++- src/handler.py | 10 ++++- test_input.json | 6 +++ 9 files changed, 73 insertions(+), 43 deletions(-) delete mode 100644 .pre-commit-config.yaml create mode 100644 docker-compose.yml rename builder/requirements.txt => requirements.txt (57%) create mode 100644 test_input.json diff --git a/.gitignore b/.gitignore index 68bc17f..b69f0a6 100644 --- a/.gitignore +++ b/.gitignore @@ -158,3 +158,6 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + + +data \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml deleted file mode 100644 index 25ca567..0000000 --- a/.pre-commit-config.yaml +++ /dev/null @@ -1,6 +0,0 @@ -repos: -- repo: https://github.com/ambv/black - rev: 24.4.2 - hooks: - - id: black - language_version: python3.10 \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 15a8b6d..0155c94 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,22 +1,32 @@ -ARG WORKER_CUDA_VERSION=12.4.1 -FROM runpod/pytorch:2.4.0-py3.11-cuda${WORKER_CUDA_VERSION}-devel-ubuntu22.04 +FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 AS base -#Reinitialize, as its lost after the FROM command -ARG WORKER_CUDA_VERSION=12.4.1 +ENV HF_HOME=/runpod-volume -# Python dependencies -COPY builder/requirements.txt /requirements.txt -RUN python3.11 -m pip install --upgrade pip && \ - python3.11 -m pip install -r /requirements.txt --no-cache-dir && \ - rm /requirements.txt +# install python and other packages +RUN apt-get update && apt-get install -y \ + python3.11 \ + python3-pip \ + git \ + wget \ + libgl1 \ + && ln -sf /usr/bin/python3.11 /usr/bin/python \ + && ln -sf /usr/bin/pip3 /usr/bin/pip -RUN pip uninstall torch -y && \ - CUDA_VERSION_SHORT=$(echo ${WORKER_CUDA_VERSION} | cut -d. -f1,2 | tr -d .) && \ - pip install torch==2.5.1 --index-url https://download.pytorch.org/whl/test/cu${CUDA_VERSION_SHORT} --no-cache-dir +# install uv +RUN pip install uv -ENV HF_HOME=/runpod-volume +# install python dependencies +COPY requirements.txt /requirements.txt +RUN uv pip install -r /requirements.txt --system -# Add src files (Worker Template) +# install torch +RUN pip install torch==2.5.1+cu124 --index-url https://download.pytorch.org/whl/test/cu124 --no-cache-dir + +# Add src files ADD src . -CMD python3.11 -u /handler.py +# Add test input +COPY test_input.json /test_input.json + +# start the handler +CMD python -u /handler.py diff --git a/docker-bake.hcl b/docker-bake.hcl index 3d61ccc..df5baac 100644 --- a/docker-bake.hcl +++ b/docker-bake.hcl @@ -11,27 +11,12 @@ variable "WORKER_VERSION" { } group "all" { - targets = ["worker-1180", "worker-1210"] + targets = ["worker-1241"] } -target "worker-1180" { - tags = ["${REPOSITORY}/worker-infinity-text-embedding:${WORKER_VERSION}-cuda11.8.0"] +target "worker-1241" { + tags = ["${REPOSITORY}/worker-infinity-embedding:${WORKER_VERSION}-cuda12.4.1"] context = "." dockerfile = "Dockerfile" - args = { - WORKER_VERSION = "${WORKER_VERSION}" - WORKER_CUDA_VERSION = "11.8.0" - } - output = ["type=docker,push=${PUSH}"] -} - -target "worker-1210" { - tags = ["${REPOSITORY}/worker-infinity-text-embedding:${WORKER_VERSION}-cuda12.1.0"] - context = "." - dockerfile = "Dockerfile" - args = { - WORKER_VERSION = "${WORKER_VERSION}" - WORKER_CUDA_VERSION = "12.1.0" - } output = ["type=docker,push=${PUSH}"] } \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..9ca9087 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,18 @@ +services: + embedding-worker: + build: + context: . + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + environment: + MODEL_NAMES: "BAAI/bge-small-en-v1.5" + NVIDIA_VISIBLE_DEVICES: "all" + volumes: + - ./data/runpod-volume:/runpod-volume + ports: + - "9000:9000" diff --git a/builder/requirements.txt b/requirements.txt similarity index 57% rename from builder/requirements.txt rename to requirements.txt index 4b2c0e9..9812a81 100644 --- a/builder/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ runpod~=1.7.0 -infinity-emb[all,onnxruntime-gpu,cache,ct2,logging,optimum,server,tensorrt,torch]==0.0.73 +infinity-emb[all]==0.0.76 einops # deployment of custom code with nomic git+https://github.com/pytorch-labs/float8_experimental.git diff --git a/src/config.py b/src/config.py index 39e8d48..6266ed5 100644 --- a/src/config.py +++ b/src/config.py @@ -31,7 +31,13 @@ def backend(self): def model_names(self) -> list[str]: model_names = os.environ.get("MODEL_NAMES") if not model_names: - raise ValueError("MODEL_NAMES environment variable is required") + raise ValueError( + "Missing required environment variable 'MODEL_NAMES'.\n" + "Please provide at least one HuggingFace model ID, or multiple IDs separated by a semicolon.\n" + "Examples:\n" + " MODEL_NAMES=BAAI/bge-small-en-v1.5\n" + " MODEL_NAMES=BAAI/bge-small-en-v1.5;intfloat/e5-large-v2\n" + ) model_names = model_names.split(";") model_names = [model_name for model_name in model_names if model_name] return model_names @@ -46,7 +52,7 @@ def batch_sizes(self) -> list[int]: def dtypes(self) -> list[str]: dtypes = self._get_no_required_multi("DTYPES", "auto") return dtypes - + @cached_property def runpod_max_concurrency(self) -> int: return int(os.environ.get("RUNPOD_MAX_CONCURRENCY", 300)) diff --git a/src/handler.py b/src/handler.py index a7bb04b..8f38e78 100644 --- a/src/handler.py +++ b/src/handler.py @@ -3,7 +3,15 @@ from typing import Any from embedding_service import EmbeddingService -embedding_service = EmbeddingService() +# Gracefully catch configuration errors (e.g. missing env vars) so the user sees +# a clean message instead of a full Python traceback when the container starts. +try: + embedding_service = EmbeddingService() +except Exception as e: # noqa: BLE001 (intercept everything on startup) + import sys + + sys.stderr.write(f"\nstartup failed: {e}\n") + sys.exit(1) async def async_generator_handler(job: dict[str, Any]): diff --git a/test_input.json b/test_input.json new file mode 100644 index 0000000..c2aa8e6 --- /dev/null +++ b/test_input.json @@ -0,0 +1,6 @@ +{ + "input": { + "model": "BAAI/bge-small-en-v1.5", + "input": "Hello, world!" + } +} From 4e7cbfad527ca899f4191d044c9fbb1f8b3b7332 Mon Sep 17 00:00:00 2001 From: Tim Pietrusky Date: Thu, 8 May 2025 13:31:49 +0200 Subject: [PATCH 2/4] ci: remove broken workflows --- .github/workflows/CI-runpod_dep.yml | 4 +- .github/workflows/CI-test_e2e.yml | 44 ---------------- .github/workflows/CI-test_handler.yml | 73 --------------------------- 3 files changed, 2 insertions(+), 119 deletions(-) delete mode 100644 .github/workflows/CI-test_e2e.yml delete mode 100644 .github/workflows/CI-test_handler.yml diff --git a/.github/workflows/CI-runpod_dep.yml b/.github/workflows/CI-runpod_dep.yml index fa63c75..18c8729 100644 --- a/.github/workflows/CI-runpod_dep.yml +++ b/.github/workflows/CI-runpod_dep.yml @@ -22,7 +22,7 @@ jobs: echo "Fetching the current runpod version from requirements.txt..." # Get current version (supports '~=' versioning) - current_version=$(grep -oP 'runpod~=\K[^ ]+' ./builder/requirements.txt) + current_version=$(grep -oP 'runpod~=\K[^ ]+' ./requirements.txt) echo "Current version: $current_version" # Get new version from PyPI @@ -51,7 +51,7 @@ jobs: echo "New major/minor detected ($new_major_minor). Updating runpod version..." # Update requirements.txt with the new version while keeping '~=' - sed -i "s/runpod~=.*/runpod~=$new_version/" ./builder/requirements.txt + sed -i "s/runpod~=.*/runpod~=$new_version/" ./requirements.txt echo "requirements.txt has been updated." - name: Create Pull Request diff --git a/.github/workflows/CI-test_e2e.yml b/.github/workflows/CI-test_e2e.yml deleted file mode 100644 index 1fcffb4..0000000 --- a/.github/workflows/CI-test_e2e.yml +++ /dev/null @@ -1,44 +0,0 @@ -name: CD | Test End-to-End - -on: - push: - branches-ignore: - - "refs/tags/*" - -jobs: - docker: - runs-on: ubuntu-latest - steps: - - name: Clear Space - run: | - rm -rf /usr/share/dotnet - rm -rf /opt/ghc - rm -rf "/usr/local/share/boost" - rm -rf "$AGENT_TOOLSDIRECTORY" - - - name: Set up QEMU - uses: docker/setup-qemu-action@v2 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - - - name: Login to Docker Hub - uses: docker/login-action@v2 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - - name: Determine Docker tag - id: docker-tag - run: | - if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then - echo "DOCKER_TAG=dev" >> $GITHUB_ENV - else - echo "DOCKER_TAG=${{ github.sha }}" >> $GITHUB_ENV - fi - - - name: Build and push - uses: docker/build-push-action@v4 - with: - push: true - tags: ${{ vars.DOCKERHUB_REPO }}/${{ vars.DOCKERHUB_IMG }}:${{ env.DOCKER_TAG }} diff --git a/.github/workflows/CI-test_handler.yml b/.github/workflows/CI-test_handler.yml deleted file mode 100644 index e67d683..0000000 --- a/.github/workflows/CI-test_handler.yml +++ /dev/null @@ -1,73 +0,0 @@ -name: CI | Test Handler - -on: - push: - branches: - - main - - pull_request: - branches: - - main - - workflow_dispatch: - -jobs: - launch_runner_worker: - runs-on: ubuntu-latest - - outputs: - id: ${{ steps.extract_id.outputs.runpod_job_id }} - - steps: - - name: Deploy Worker - uses: fjogeleit/http-request-action@v1 - id: deploy - with: - url: "https://api.runpod.ai/v2/${{ vars.RUNNER_24GB }}/run" - method: "POST" - customHeaders: '{"Content-Type": "application/json"}' - bearerToken: ${{ secrets.RUNPOD_API_KEY }} - data: '{"input":{"github_pat": "${{ secrets.GH_PAT }}", "github_org":"${{ vars.GH_ORG }}"}}' - - - name: Extract Job ID - id: extract_id - run: | - ID=$(echo '${{ steps.deploy.outputs.response }}' | jq -r '.id') - echo "::set-output name=runpod_job_id::$ID" - - run_tests: - needs: launch_runner_worker - runs-on: runpod - - steps: - - uses: actions/checkout@v3 - - - name: Set up Python 3.11 & install dependencies - uses: actions/setup-python@v4 - with: - python-version: "3.11" - - - name: Install Dependencies - env: - PIP_ROOT_USER_ACTION: "ignore" - run: | - python -m pip install --upgrade pip - pip install -r builder/requirements.txt - - - name: Execute Tests - run: | - python src/handler.py --test_input='{"input": {"key": "value"}}' - - cleanup: - if: ${{ always() && !success() }} - needs: launch_runner_worker - runs-on: ubuntu-latest - - steps: - - name: Terminate and Shutdown Worker - uses: fjogeleit/http-request-action@v1 - with: - url: "https://api.runpod.ai/v2/${{ vars.RUNNER_24GB }}/cancel/${{ needs.launch_runner_worker.outputs.id }}" - method: "POST" - customHeaders: '{"Content-Type": "application/json"}' - bearerToken: ${{ secrets.RUNPOD_API_KEY }} From d73de90eedce4b0b81febee186b58b8d80a8632c Mon Sep 17 00:00:00 2001 From: Tim Pietrusky Date: Thu, 8 May 2025 13:51:04 +0200 Subject: [PATCH 3/4] ci: simplified release --- .../{build-test-release.yml => release.yml} | 34 +------------------ 1 file changed, 1 insertion(+), 33 deletions(-) rename .github/workflows/{build-test-release.yml => release.yml} (51%) diff --git a/.github/workflows/build-test-release.yml b/.github/workflows/release.yml similarity index 51% rename from .github/workflows/build-test-release.yml rename to .github/workflows/release.yml index 41f1a99..76dfe06 100644 --- a/.github/workflows/build-test-release.yml +++ b/.github/workflows/release.yml @@ -1,4 +1,4 @@ -name: CD | Build-Test-Release +name: release on: push: @@ -38,35 +38,3 @@ jobs: with: push: true tags: ${{ vars.DOCKERHUB_REPO }}/${{ vars.DOCKERHUB_IMG }}:${{ (github.event_name == 'release' && github.event.release.tag_name) || (github.event_name == 'workflow_dispatch' && github.event.inputs.image_tag) || 'dev' }} - - dev-test: - needs: docker-build - runs-on: ubuntu-latest - - steps: - # Checkout - - uses: actions/checkout@v4 - - # Tests - - name: Run Tests - if: github.event_name != 'release' - id: run-tests - uses: direlines/runpod-test-runner@v1.7 - with: - image-tag: ${{ vars.DOCKERHUB_REPO }}/${{ vars.DOCKERHUB_IMG }}:${{ (github.event_name == 'release' && github.event.release.tag_name) || (github.event_name == 'workflow_dispatch' && github.event.inputs.image_tag) || 'dev' }} - runpod-api-key: ${{ secrets.RUNPOD_API_KEY }} - request-timeout: 600 - - # Pass/Fail - - name: Verify Tests - env: - TOTAL_TESTS: ${{ steps.run-tests.outputs.total-tests }} - SUCCESSFUL_TESTS: ${{ steps.run-tests.outputs.succeeded }} - RESULTS: ${{ steps.run-tests.outputs.results }} - run: | - echo "Total tests: $TOTAL_TESTS" - echo "Successful tests: $SUCCESSFUL_TESTS" - echo "Full results: $RESULTS" - if [ "$TOTAL_TESTS" != "$SUCCESSFUL_TESTS" ]; then - exit 1 - fi From 48a9c011e43edbf4bccf4006bde22bb551340bb7 Mon Sep 17 00:00:00 2001 From: Tim Pietrusky Date: Thu, 8 May 2025 13:51:32 +0200 Subject: [PATCH 4/4] ci: removed wrong test --- .github/tests.json | 23 ----------------------- 1 file changed, 23 deletions(-) delete mode 100644 .github/tests.json diff --git a/.github/tests.json b/.github/tests.json deleted file mode 100644 index 62b4069..0000000 --- a/.github/tests.json +++ /dev/null @@ -1,23 +0,0 @@ -[ - { - "hardwareConfig": { - "endpointConfig": { - "gpuIds": "ADA_24,AMPERE_16,AMPERE_24,AMPERE_48,AMPERE_80" - } - }, - "input": { - "name": "Dolly" - }, - "expectedOutput": "Hello, Dolly!" - }, - { - "hardwareConfig": { - "endpointConfig": { - "gpuIds": "ADA_24,AMPERE_16,AMPERE_24,AMPERE_48,AMPERE_80" - } - }, - "input": { - "name": "DALL-E" - } - } -]