Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 0 additions & 23 deletions .github/tests.json

This file was deleted.

4 changes: 2 additions & 2 deletions .github/workflows/CI-runpod_dep.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
echo "Fetching the current runpod version from requirements.txt..."

# Get current version (supports '~=' versioning)
current_version=$(grep -oP 'runpod~=\K[^ ]+' ./builder/requirements.txt)
current_version=$(grep -oP 'runpod~=\K[^ ]+' ./requirements.txt)
echo "Current version: $current_version"

# Get new version from PyPI
Expand Down Expand Up @@ -51,7 +51,7 @@ jobs:
echo "New major/minor detected ($new_major_minor). Updating runpod version..."

# Update requirements.txt with the new version while keeping '~='
sed -i "s/runpod~=.*/runpod~=$new_version/" ./builder/requirements.txt
sed -i "s/runpod~=.*/runpod~=$new_version/" ./requirements.txt
echo "requirements.txt has been updated."

- name: Create Pull Request
Expand Down
44 changes: 0 additions & 44 deletions .github/workflows/CI-test_e2e.yml

This file was deleted.

73 changes: 0 additions & 73 deletions .github/workflows/CI-test_handler.yml

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: CD | Build-Test-Release
name: release

on:
push:
Expand Down Expand Up @@ -38,35 +38,3 @@ jobs:
with:
push: true
tags: ${{ vars.DOCKERHUB_REPO }}/${{ vars.DOCKERHUB_IMG }}:${{ (github.event_name == 'release' && github.event.release.tag_name) || (github.event_name == 'workflow_dispatch' && github.event.inputs.image_tag) || 'dev' }}

dev-test:
needs: docker-build
runs-on: ubuntu-latest

steps:
# Checkout
- uses: actions/checkout@v4

# Tests
- name: Run Tests
if: github.event_name != 'release'
id: run-tests
uses: direlines/runpod-test-runner@v1.7
with:
image-tag: ${{ vars.DOCKERHUB_REPO }}/${{ vars.DOCKERHUB_IMG }}:${{ (github.event_name == 'release' && github.event.release.tag_name) || (github.event_name == 'workflow_dispatch' && github.event.inputs.image_tag) || 'dev' }}
runpod-api-key: ${{ secrets.RUNPOD_API_KEY }}
request-timeout: 600

# Pass/Fail
- name: Verify Tests
env:
TOTAL_TESTS: ${{ steps.run-tests.outputs.total-tests }}
SUCCESSFUL_TESTS: ${{ steps.run-tests.outputs.succeeded }}
RESULTS: ${{ steps.run-tests.outputs.results }}
run: |
echo "Total tests: $TOTAL_TESTS"
echo "Successful tests: $SUCCESSFUL_TESTS"
echo "Full results: $RESULTS"
if [ "$TOTAL_TESTS" != "$SUCCESSFUL_TESTS" ]; then
exit 1
fi
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,6 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/


data
6 changes: 0 additions & 6 deletions .pre-commit-config.yaml

This file was deleted.

40 changes: 25 additions & 15 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,22 +1,32 @@
ARG WORKER_CUDA_VERSION=12.4.1
FROM runpod/pytorch:2.4.0-py3.11-cuda${WORKER_CUDA_VERSION}-devel-ubuntu22.04
FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 AS base

#Reinitialize, as its lost after the FROM command
ARG WORKER_CUDA_VERSION=12.4.1
ENV HF_HOME=/runpod-volume

# Python dependencies
COPY builder/requirements.txt /requirements.txt
RUN python3.11 -m pip install --upgrade pip && \
python3.11 -m pip install -r /requirements.txt --no-cache-dir && \
rm /requirements.txt
# install python and other packages
RUN apt-get update && apt-get install -y \
python3.11 \
python3-pip \
git \
wget \
libgl1 \
&& ln -sf /usr/bin/python3.11 /usr/bin/python \
&& ln -sf /usr/bin/pip3 /usr/bin/pip

RUN pip uninstall torch -y && \
CUDA_VERSION_SHORT=$(echo ${WORKER_CUDA_VERSION} | cut -d. -f1,2 | tr -d .) && \
pip install torch==2.5.1 --index-url https://download.pytorch.org/whl/test/cu${CUDA_VERSION_SHORT} --no-cache-dir
# install uv
RUN pip install uv

ENV HF_HOME=/runpod-volume
# install python dependencies
COPY requirements.txt /requirements.txt
RUN uv pip install -r /requirements.txt --system

# Add src files (Worker Template)
# install torch
RUN pip install torch==2.5.1+cu124 --index-url https://download.pytorch.org/whl/test/cu124 --no-cache-dir

# Add src files
ADD src .

CMD python3.11 -u /handler.py
# Add test input
COPY test_input.json /test_input.json

# start the handler
CMD python -u /handler.py
21 changes: 3 additions & 18 deletions docker-bake.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -11,27 +11,12 @@ variable "WORKER_VERSION" {
}

group "all" {
targets = ["worker-1180", "worker-1210"]
targets = ["worker-1241"]
}

target "worker-1180" {
tags = ["${REPOSITORY}/worker-infinity-text-embedding:${WORKER_VERSION}-cuda11.8.0"]
target "worker-1241" {
tags = ["${REPOSITORY}/worker-infinity-embedding:${WORKER_VERSION}-cuda12.4.1"]
context = "."
dockerfile = "Dockerfile"
args = {
WORKER_VERSION = "${WORKER_VERSION}"
WORKER_CUDA_VERSION = "11.8.0"
}
output = ["type=docker,push=${PUSH}"]
}

target "worker-1210" {
tags = ["${REPOSITORY}/worker-infinity-text-embedding:${WORKER_VERSION}-cuda12.1.0"]
context = "."
dockerfile = "Dockerfile"
args = {
WORKER_VERSION = "${WORKER_VERSION}"
WORKER_CUDA_VERSION = "12.1.0"
}
output = ["type=docker,push=${PUSH}"]
}
18 changes: 18 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
services:
embedding-worker:
build:
context: .
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
environment:
MODEL_NAMES: "BAAI/bge-small-en-v1.5"
NVIDIA_VISIBLE_DEVICES: "all"
volumes:
- ./data/runpod-volume:/runpod-volume
ports:
- "9000:9000"
2 changes: 1 addition & 1 deletion builder/requirements.txt → requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
runpod~=1.7.0
infinity-emb[all,onnxruntime-gpu,cache,ct2,logging,optimum,server,tensorrt,torch]==0.0.73
infinity-emb[all]==0.0.76
einops # deployment of custom code with nomic
git+https://github.com/pytorch-labs/float8_experimental.git
10 changes: 8 additions & 2 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,13 @@ def backend(self):
def model_names(self) -> list[str]:
model_names = os.environ.get("MODEL_NAMES")
if not model_names:
raise ValueError("MODEL_NAMES environment variable is required")
raise ValueError(
"Missing required environment variable 'MODEL_NAMES'.\n"
"Please provide at least one HuggingFace model ID, or multiple IDs separated by a semicolon.\n"
"Examples:\n"
" MODEL_NAMES=BAAI/bge-small-en-v1.5\n"
" MODEL_NAMES=BAAI/bge-small-en-v1.5;intfloat/e5-large-v2\n"
)
model_names = model_names.split(";")
model_names = [model_name for model_name in model_names if model_name]
return model_names
Expand All @@ -46,7 +52,7 @@ def batch_sizes(self) -> list[int]:
def dtypes(self) -> list[str]:
dtypes = self._get_no_required_multi("DTYPES", "auto")
return dtypes

@cached_property
def runpod_max_concurrency(self) -> int:
return int(os.environ.get("RUNPOD_MAX_CONCURRENCY", 300))
10 changes: 9 additions & 1 deletion src/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,15 @@
from typing import Any
from embedding_service import EmbeddingService

embedding_service = EmbeddingService()
# Gracefully catch configuration errors (e.g. missing env vars) so the user sees
# a clean message instead of a full Python traceback when the container starts.
try:
embedding_service = EmbeddingService()
except Exception as e: # noqa: BLE001 (intercept everything on startup)
import sys

sys.stderr.write(f"\nstartup failed: {e}\n")
sys.exit(1)


async def async_generator_handler(job: dict[str, Any]):
Expand Down
6 changes: 6 additions & 0 deletions test_input.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"input": {
"model": "BAAI/bge-small-en-v1.5",
"input": "Hello, world!"
}
}