-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDockerfile
More file actions
69 lines (56 loc) · 2.36 KB
/
Dockerfile
File metadata and controls
69 lines (56 loc) · 2.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# GGM Pipeline Docker Image
# Build: docker build -t ggm-pipeline .
# Run: docker run --rm --network host ggm-pipeline --dest postgres
#
# This image provides an alternative to running with local Python/uv.
# It packages all dependencies and can run the full pipeline.
FROM python:3.12.10-slim
# Install system dependencies for database drivers
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
# For PostgreSQL
libpq-dev \
# For MSSQL (pyodbc)
unixodbc-dev \
gcc \
g++ \
wget \
unzip \
# General utilities
curl \
&& if apt-cache show libaio1t64 >/dev/null 2>&1; then \
apt-get install -y --no-install-recommends libaio1t64; \
else \
apt-get install -y --no-install-recommends libaio1; \
fi \
&& rm -rf /var/lib/apt/lists/*
# Install Oracle Instant Client (required for oracledb thick mode)
RUN wget -q https://download.oracle.com/otn_software/linux/instantclient/2350000/instantclient-basic-linux.x64-23.5.0.24.07.zip \
-O /tmp/instantclient.zip \
&& unzip /tmp/instantclient.zip -d /opt/oracle \
&& rm /tmp/instantclient.zip \
&& echo /opt/oracle/instantclient_23_5 > /etc/ld.so.conf.d/oracle-instantclient.conf \
&& ldconfig
ENV LD_LIBRARY_PATH=/opt/oracle/instantclient_23_5
# Install uv for fast Python package management
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
# Set working directory
WORKDIR /app
# Copy dependency files first (for better layer caching)
COPY pyproject.toml uv.lock ./
# Install locked dependencies into a virtual environment (does not install the project itself)
RUN uv sync --frozen --no-dev --no-install-project
# Copy the rest of the application
COPY . .
# Default environment variables (can be overridden)
# Gateway is auto-detected from destination
ENV GGM_DESTINATION=postgres
# Healthcheck - verify Python and key modules are available
# Use `-P` to avoid current working directory shadowing installed packages
# (this repo contains top-level `ingest/` and `transform/` project folders).
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD uv run python -P -c "from dlt.sources.sql_database import sql_database; from sqlmesh import Context; print('OK')" || exit 1
# Default entrypoint runs the pipeline script
ENTRYPOINT ["uv", "run", "python", "scripts/pipeline.py"]
# Default command shows help
CMD ["--help"]