runpod-workers · TimPietrusky · May 8, 2025 · May 8, 2025 · May 8, 2025 · May 8, 2025
diff --git a/.github/tests.json b/.github/tests.json
diff --git a/.github/workflows/CI-runpod_dep.yml b/.github/workflows/CI-runpod_dep.yml
@@ -22,7 +22,7 @@ jobs:
           echo "Fetching the current runpod version from requirements.txt..."
 
           # Get current version (supports '~=' versioning)
-          current_version=$(grep -oP 'runpod~=\K[^ ]+' ./builder/requirements.txt)
+          current_version=$(grep -oP 'runpod~=\K[^ ]+' ./requirements.txt)
           echo "Current version: $current_version"
 
           # Get new version from PyPI
@@ -51,7 +51,7 @@ jobs:
           echo "New major/minor detected ($new_major_minor). Updating runpod version..."
 
           # Update requirements.txt with the new version while keeping '~='
-          sed -i "s/runpod~=.*/runpod~=$new_version/" ./builder/requirements.txt
+          sed -i "s/runpod~=.*/runpod~=$new_version/" ./requirements.txt
           echo "requirements.txt has been updated."
 
       - name: Create Pull Request

diff --git a/.github/workflows/CI-test_e2e.yml b/.github/workflows/CI-test_e2e.yml
diff --git a/.github/workflows/CI-test_handler.yml b/.github/workflows/CI-test_handler.yml
diff --git a/.github/workflows/build-test-release.yml → .github/workflows/release.yml b/.github/workflows/build-test-release.yml → .github/workflows/release.yml
@@ -1,4 +1,4 @@
-name: CD | Build-Test-Release
+name: release
 
 on:
   push:
@@ -38,35 +38,3 @@ jobs:
         with:
           push: true
           tags: ${{ vars.DOCKERHUB_REPO }}/${{ vars.DOCKERHUB_IMG }}:${{ (github.event_name == 'release' && github.event.release.tag_name) || (github.event_name == 'workflow_dispatch' && github.event.inputs.image_tag) || 'dev' }}
-
-  dev-test:
-    needs: docker-build
-    runs-on: ubuntu-latest
-
-    steps:
-      # Checkout
-      - uses: actions/checkout@v4
-
-      # Tests
-      - name: Run Tests
-        if: github.event_name != 'release'
-        id: run-tests
-        uses: direlines/runpod-test-runner@v1.7
-        with:
-          image-tag: ${{ vars.DOCKERHUB_REPO }}/${{ vars.DOCKERHUB_IMG }}:${{ (github.event_name == 'release' && github.event.release.tag_name) || (github.event_name == 'workflow_dispatch' && github.event.inputs.image_tag) || 'dev' }}
-          runpod-api-key: ${{ secrets.RUNPOD_API_KEY }}
-          request-timeout: 600
-
-      # Pass/Fail
-      - name: Verify Tests
-        env:
-          TOTAL_TESTS: ${{ steps.run-tests.outputs.total-tests }}
-          SUCCESSFUL_TESTS: ${{ steps.run-tests.outputs.succeeded }}
-          RESULTS: ${{ steps.run-tests.outputs.results }}
-        run: |
-          echo "Total tests: $TOTAL_TESTS"
-          echo "Successful tests: $SUCCESSFUL_TESTS"
-          echo "Full results: $RESULTS"
-          if [ "$TOTAL_TESTS" != "$SUCCESSFUL_TESTS" ]; then
-              exit 1
-          fi
diff --git a/.gitignore b/.gitignore
@@ -158,3 +158,6 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+
+
+data
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
diff --git a/Dockerfile b/Dockerfile
@@ -1,22 +1,32 @@
-ARG WORKER_CUDA_VERSION=12.4.1
-FROM runpod/pytorch:2.4.0-py3.11-cuda${WORKER_CUDA_VERSION}-devel-ubuntu22.04
+FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 AS base
 
-#Reinitialize, as its lost after the FROM command
-ARG WORKER_CUDA_VERSION=12.4.1
+ENV HF_HOME=/runpod-volume
 
-# Python dependencies
-COPY builder/requirements.txt /requirements.txt
-RUN python3.11 -m pip install --upgrade pip && \
-    python3.11 -m pip install -r /requirements.txt --no-cache-dir && \
-    rm /requirements.txt
+# install python and other packages
+RUN apt-get update && apt-get install -y \
+    python3.11 \
+    python3-pip \
+    git \
+    wget \
+    libgl1 \
+    && ln -sf /usr/bin/python3.11 /usr/bin/python \
+    && ln -sf /usr/bin/pip3 /usr/bin/pip
 
-RUN pip uninstall torch -y && \
-    CUDA_VERSION_SHORT=$(echo ${WORKER_CUDA_VERSION} | cut -d. -f1,2 | tr -d .) && \
-    pip install torch==2.5.1 --index-url https://download.pytorch.org/whl/test/cu${CUDA_VERSION_SHORT} --no-cache-dir
+# install uv
+RUN pip install uv
 
-ENV HF_HOME=/runpod-volume
+# install python dependencies
+COPY requirements.txt /requirements.txt
+RUN uv pip install -r /requirements.txt --system
 
-# Add src files (Worker Template)
+# install torch
+RUN pip install torch==2.5.1+cu124 --index-url https://download.pytorch.org/whl/test/cu124 --no-cache-dir
+
+# Add src files
 ADD src .
 
-CMD python3.11 -u /handler.py
+# Add test input
+COPY test_input.json /test_input.json
+
+# start the handler
+CMD python -u /handler.py
diff --git a/docker-bake.hcl b/docker-bake.hcl
@@ -11,27 +11,12 @@ variable "WORKER_VERSION" {
 }
 
 group "all" {
-  targets = ["worker-1180", "worker-1210"]
+  targets = ["worker-1241"]
 }
 
-target "worker-1180" {
-  tags = ["${REPOSITORY}/worker-infinity-text-embedding:${WORKER_VERSION}-cuda11.8.0"]
+target "worker-1241" {
+  tags = ["${REPOSITORY}/worker-infinity-embedding:${WORKER_VERSION}-cuda12.4.1"]
   context = "."
   dockerfile = "Dockerfile"
-  args = {
-    WORKER_VERSION = "${WORKER_VERSION}"
-    WORKER_CUDA_VERSION = "11.8.0"
-  }
-  output = ["type=docker,push=${PUSH}"]
-}
-
-target "worker-1210" {
-  tags = ["${REPOSITORY}/worker-infinity-text-embedding:${WORKER_VERSION}-cuda12.1.0"]
-  context = "."
-  dockerfile = "Dockerfile"
-  args = {
-    WORKER_VERSION = "${WORKER_VERSION}"
-    WORKER_CUDA_VERSION = "12.1.0"
-  }
   output = ["type=docker,push=${PUSH}"]
 }
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -0,0 +1,18 @@
+services:
+  embedding-worker:
+    build:
+      context: .
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+    environment:
+      MODEL_NAMES: "BAAI/bge-small-en-v1.5"
+      NVIDIA_VISIBLE_DEVICES: "all"
+    volumes:
+      - ./data/runpod-volume:/runpod-volume
+    ports:
+      - "9000:9000"
diff --git a/builder/requirements.txt → requirements.txt b/builder/requirements.txt → requirements.txt
@@ -1,4 +1,4 @@
 runpod~=1.7.0
-infinity-emb[all,onnxruntime-gpu,cache,ct2,logging,optimum,server,tensorrt,torch]==0.0.73
+infinity-emb[all]==0.0.76
 einops # deployment of custom code with nomic
 git+https://github.com/pytorch-labs/float8_experimental.git
diff --git a/src/config.py b/src/config.py
@@ -31,7 +31,13 @@ def backend(self):
     def model_names(self) -> list[str]:
         model_names = os.environ.get("MODEL_NAMES")
         if not model_names:
-            raise ValueError("MODEL_NAMES environment variable is required")
+            raise ValueError(
+                "Missing required environment variable 'MODEL_NAMES'.\n"
+                "Please provide at least one HuggingFace model ID, or multiple IDs separated by a semicolon.\n"
+                "Examples:\n"
+                "  MODEL_NAMES=BAAI/bge-small-en-v1.5\n"
+                "  MODEL_NAMES=BAAI/bge-small-en-v1.5;intfloat/e5-large-v2\n"
+            )
         model_names = model_names.split(";")
         model_names = [model_name for model_name in model_names if model_name]
         return model_names
@@ -46,7 +52,7 @@ def batch_sizes(self) -> list[int]:
     def dtypes(self) -> list[str]:
         dtypes = self._get_no_required_multi("DTYPES", "auto")
         return dtypes
-    
+
     @cached_property
     def runpod_max_concurrency(self) -> int:
         return int(os.environ.get("RUNPOD_MAX_CONCURRENCY", 300))
diff --git a/src/handler.py b/src/handler.py
@@ -3,7 +3,15 @@
 from typing import Any
 from embedding_service import EmbeddingService
 
-embedding_service = EmbeddingService()
+# Gracefully catch configuration errors (e.g. missing env vars) so the user sees
+# a clean message instead of a full Python traceback when the container starts.
+try:
+    embedding_service = EmbeddingService()
+except Exception as e:  # noqa: BLE001  (intercept everything on startup)
+    import sys
+
+    sys.stderr.write(f"\nstartup failed: {e}\n")
+    sys.exit(1)
 
 
 async def async_generator_handler(job: dict[str, Any]):

diff --git a/test_input.json b/test_input.json
@@ -0,0 +1,6 @@
+{
+  "input": {
+    "model": "BAAI/bge-small-en-v1.5",
+    "input": "Hello, world!"
+  }
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -158,3 +158,6 @@ cython_debug/ @@
     #  and can be added to the global gitignore or merged into this file.  For a more nuclear
     #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
     #.idea/
+    data