semantic-search-engine/Dockerfile at main · Dhanishta-codes/semantic-search-engine · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# ==============================================================================
# Dockerfile — Semantic Search API
# ==============================================================================
#
# DESIGN DECISIONS:
#
# Base image: python:3.11-slim
#   - Python 3.11 chosen over 3.12 because scikit-fuzzy has known compatibility
#     issues with 3.12 (imp module removal). 3.11 is the latest stable version
#     where all dependencies work without patching.
#   - slim variant removes unnecessary system packages — final image ~200MB
#     smaller than the full python image with no functionality loss for our use case.
#
# Two-stage approach (single stage here):
#   We use a single stage because our dependencies (torch CPU, sentence-transformers)
#   are large regardless. A multi-stage build would not meaningfully reduce image
#   size since we need all packages at runtime, not just build time.
#
# Model pre-download:
#   The embedding model (~90MB) is downloaded and cached INSIDE the image layer.
#   This means the container starts instantly with no runtime downloads.
#   Without this, the first request after container start would trigger a model
#   download, adding 30-60 seconds of latency to the first request.
#
# Data volumes:
#   chroma_db/, embeddings/, clustering/ are NOT baked into the image.
#   They are mounted as volumes at runtime (see docker-compose.yml).
#   Reason: these are generated artefacts that can be hundreds of MB.
#   Baking them into the image would make the image impractically large
#   and would require a full rebuild every time the corpus is re-indexed.
#   The correct pattern is: run the pipeline locally once, then mount the
#   generated data into the container.
# ==============================================================================

FROM python:3.11-slim

# Set working directory
WORKDIR /app

# Install system dependencies needed for numpy/scipy compilation
# gcc and g++ are needed for some Python packages that compile C extensions
# --no-install-recommends keeps the layer small
RUN apt-get update && apt-get install -y --no-install-recommends \
    build-essential \
    && rm -rf /var/lib/apt/lists/*

# Copy requirements first — Docker layer caching means pip install only
# re-runs when requirements.txt changes, not on every code change.
# This is the single most important Dockerfile optimisation for dev iteration.
COPY requirements.txt .

# Install Python dependencies
# --no-cache-dir reduces image size by not storing the pip download cache
RUN pip install --no-cache-dir --upgrade pip && \
    pip install --no-cache-dir -r requirements.txt

# Pre-download the embedding model into the image
# This bakes the model weights into a Docker layer so the container
# starts with the model ready — no runtime download needed.
RUN python -c "from sentence_transformers import SentenceTransformer; \
               SentenceTransformer('all-MiniLM-L6-v2'); \
               print('Model cached successfully.')"

# Copy application source code
# Done AFTER pip install so code changes don't invalidate the dependency layer
COPY src/ ./src/
COPY api/ ./api/

# Expose the port uvicorn will listen on
EXPOSE 8000

# Health check — Docker will mark the container unhealthy if this fails
# --start-period=60s gives the app time to load model + ChromaDB on first start
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
    CMD python -c "import urllib.request; \
                   urllib.request.urlopen('http://localhost:8000/health')" \
    || exit 1

# Single uvicorn command as required by the brief
# --host 0.0.0.0 makes the server accessible outside the container
# --port 8000 matches the EXPOSE directive above
CMD ["uvicorn", "api.main:app", "--host", "0.0.0.0", "--port", "8000"]