forked from stonesalltheway1/Epstein-Pipeline
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDockerfile
More file actions
47 lines (35 loc) · 1.59 KB
/
Dockerfile
File metadata and controls
47 lines (35 loc) · 1.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# ── Stage 1: Builder ──────────────────────────────────────────────────────────
FROM python:3.12-slim AS builder
WORKDIR /build
# Install build-time system deps
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
libpq-dev \
&& rm -rf /var/lib/apt/lists/*
COPY pyproject.toml README.md LICENSE ./
COPY src/ src/
# Install the package with all optional deps (except GPU-only extras)
RUN pip install --no-cache-dir --prefix=/install \
".[ocr,ocr-surya,pymupdf,nlp,nlp-gliner,ai,vision,embeddings,neon,classify]"
# ── Stage 2: Runtime ─────────────────────────────────────────────────────────
FROM python:3.12-slim
WORKDIR /app
# Runtime system deps: poppler (pdf2image for Surya), libpq (psycopg)
RUN apt-get update && apt-get install -y --no-install-recommends \
poppler-utils \
libpq5 \
libgl1-mesa-glx \
libglib2.0-0 \
&& rm -rf /var/lib/apt/lists/*
# Copy installed Python packages from builder
COPY --from=builder /install /usr/local
# Copy source
COPY pyproject.toml README.md LICENSE ./
COPY src/ src/
COPY data/ data/
# Install the package itself (deps already present from builder)
RUN pip install --no-cache-dir --no-deps -e .
# Download spaCy model (sm for smaller image; trf available via extras)
RUN python -m spacy download en_core_web_sm || true
ENTRYPOINT ["epstein-pipeline"]
CMD ["--help"]