forked from LearningCircuit/local-deep-research
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDockerfile
More file actions
259 lines (219 loc) · 10.1 KB
/
Dockerfile
File metadata and controls
259 lines (219 loc) · 10.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
####
# Used for building the LDR service dependencies.
####
FROM python:3.14-slim@sha256:9b81fe9acff79e61affb44aaf3b6ff234392e8ca477cb86c9f7fd11732ce9b6a AS builder-base
# Set shell to bash with pipefail for safer pipe handling
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
# Install system dependencies for SQLCipher and Node.js for frontend build
# Using Acquire::Retries to handle transient Debian mirror errors during CI
RUN apt-get update -o Acquire::Retries=3 && apt-get upgrade -y -o Acquire::Retries=3 \
&& apt-get install -y --no-install-recommends -o Acquire::Retries=3 \
libsqlcipher-dev \
sqlcipher \
libsqlcipher1 \
build-essential \
pkg-config \
curl \
ca-certificates \
gnupg \
# Add NodeSource GPG key and repository directly (pinned to Node.js 22.x LTS)
# GPG key fingerprint verification for supply chain security
# Key: NSolid <nsolid-gpg@nodesource.com> (RSA 2048-bit, created 2016-05-23)
# Fingerprint verified from: https://github.com/nodesource/distributions
# If key rotates, update NODESOURCE_GPG_FINGERPRINT and verify new key at:
# https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key
&& NODESOURCE_GPG_FINGERPRINT="6F71F525282841EEDAF851B42F59B5F99B1BE0B4" \
&& curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key -o /tmp/nodesource.gpg.key \
&& ACTUAL_FINGERPRINT=$(gpg --with-fingerprint --with-colons --show-keys /tmp/nodesource.gpg.key 2>/dev/null | grep "^fpr" | head -1 | cut -d: -f10) \
&& if [ "$ACTUAL_FINGERPRINT" != "$NODESOURCE_GPG_FINGERPRINT" ]; then \
echo "ERROR: NodeSource GPG key fingerprint mismatch!" >&2; \
echo "Expected: $NODESOURCE_GPG_FINGERPRINT" >&2; \
echo "Actual: $ACTUAL_FINGERPRINT" >&2; \
echo "The NodeSource signing key may have been rotated or compromised." >&2; \
echo "Verify the new key and update NODESOURCE_GPG_FINGERPRINT if valid." >&2; \
exit 1; \
fi \
&& gpg --batch --dearmor -o /usr/share/keyrings/nodesource.gpg /tmp/nodesource.gpg.key \
&& rm /tmp/nodesource.gpg.key \
&& echo "deb [signed-by=/usr/share/keyrings/nodesource.gpg] https://deb.nodesource.com/node_22.x nodistro main" > /etc/apt/sources.list.d/nodesource.list \
&& apt-get update \
&& apt-get install -y --no-install-recommends nodejs \
&& rm -rf /var/lib/apt/lists/*
# Install dependencies and tools (pinned versions for reproducibility)
# Pin pip, pdm, and playwright to specific versions for OSSF Scorecard compliance
# Note: hishel<1.0.0 is required due to https://github.com/pdm-project/pdm/issues/3657
# Note: wheel>=0.46.2 is required for CVE-2026-24049 fix (path traversal)
RUN pip3 install --no-cache-dir pip==24.3.1 \
&& pip install --no-cache-dir pdm==2.26.2 "hishel<1.0.0" playwright==1.57.0 "wheel>=0.46.2"
# disable update check
ENV PDM_CHECK_UPDATE=false
# Increase PDM request timeout from default 15s to 120s for large packages (numpy, torch)
# This helps prevent httpcore.ReadTimeout errors during CI network congestion
ENV PDM_REQUEST_TIMEOUT=120
# Build argument to invalidate cache when dependencies change
ARG DEPS_HASH
WORKDIR /install
COPY pyproject.toml pyproject.toml
COPY pdm.lock pdm.lock
COPY src/ src
COPY LICENSE LICENSE
COPY README.md README.md
# Copy frontend build files
COPY package.json package.json
COPY package-lock.json* package-lock.json
COPY vite.config.js vite.config.js
####
# Builds the LDR service dependencies used in production.
####
FROM builder-base AS builder
# Install npm dependencies, build frontend, and install Python dependencies
# PDM will automatically select the correct SQLCipher package based on platform
# Using npm ci for reproducible builds with lockfile integrity verification
RUN npm ci \
&& npm run build \
&& for i in 1 2 3; do \
if pdm install --prod --no-editable; then \
break; \
else \
echo "PDM install attempt $i failed, retrying in 15s..."; \
sleep 15; \
fi; \
done
####
# Container for running tests.
####
FROM builder-base AS ldr-test
# Set shell to bash with pipefail for safer pipe handling
# Note: Explicitly set even though inherited from builder-base for hadolint static analysis
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
# Install additional runtime dependencies for testing tools
# Note: Node.js is already installed from builder-base
# Using Acquire::Retries to handle transient Debian mirror errors during CI
RUN apt-get update -o Acquire::Retries=3 && apt-get upgrade -y -o Acquire::Retries=3 \
&& apt-get install -y --no-install-recommends -o Acquire::Retries=3 \
xauth \
xvfb \
# Dependencies for Chromium
fonts-liberation \
libasound2 \
libatk-bridge2.0-0 \
libatk1.0-0 \
libatspi2.0-0 \
libcups2 \
libdbus-1-3 \
libdrm2 \
libgbm1 \
libgtk-3-0 \
libnspr4 \
libnss3 \
libxcomposite1 \
libxdamage1 \
libxfixes3 \
libxkbcommon0 \
libxrandr2 \
xdg-utils \
&& rm -rf /var/lib/apt/lists/*
# Set up Puppeteer environment
ENV PUPPETEER_CACHE_DIR=/app/puppeteer-cache
ENV DOCKER_ENV=true
# Don't skip Chrome download - let Puppeteer download its own Chrome as fallback
# ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
# Create puppeteer cache directory with proper permissions
RUN mkdir -p /app/puppeteer-cache && chmod -R 755 /app/puppeteer-cache
# Install Playwright with Chromium first (before npm packages)
RUN playwright install --with-deps chromium || echo "Playwright install failed, will use Puppeteer's Chrome"
# Copy test package files and lockfiles for npm ci
COPY tests/api_tests_with_login/package.json tests/api_tests_with_login/package-lock.json /install/tests/api_tests_with_login/
COPY tests/ui_tests/package.json tests/ui_tests/package-lock.json /install/tests/ui_tests/
# Install npm packages - Skip Puppeteer Chrome download since we have Playwright's Chrome
WORKDIR /install/tests/api_tests_with_login
ENV PUPPETEER_SKIP_DOWNLOAD=true
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
RUN npm ci
WORKDIR /install/tests/ui_tests
RUN npm ci
# Set CHROME_BIN to help Puppeteer find Chrome from Playwright
# Try to find and set Chrome binary path from Playwright's installation
RUN CHROME_PATH=$(find /root/.cache/ms-playwright -name chrome -type f 2>/dev/null | head -1) && \
if [ -n "$CHROME_PATH" ]; then \
echo "export CHROME_BIN=$CHROME_PATH" >> /etc/profile.d/chrome.sh; \
echo "export PUPPETEER_EXECUTABLE_PATH=$CHROME_PATH" >> /etc/profile.d/chrome.sh; \
fi || true
# Set environment variables for Puppeteer to use Playwright's Chrome
ENV PUPPETEER_SKIP_DOWNLOAD=true
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
ENV PUPPETEER_EXECUTABLE_PATH=/root/.cache/ms-playwright/chromium-1181/chrome-linux/chrome
# Copy test files to /app where they will be run from
RUN mkdir -p /app && cp -r /install/tests /app/
# Ensure Chrome binaries have correct permissions
RUN chmod -R 755 /app/puppeteer-cache
WORKDIR /install
# Install the package using PDM
# PDM will automatically select the correct SQLCipher package based on platform
RUN pdm install --no-editable
# Configure path to default to the venv python.
ENV PATH="/install/.venv/bin:$PATH"
# Note: Test container runs as root because CI workflows mount source code
# volumes that are owned by root. The production container (ldr) runs as
# non-root user for security.
####
# Runs the LDR service.
###
FROM python:3.14-slim@sha256:9b81fe9acff79e61affb44aaf3b6ff234392e8ca477cb86c9f7fd11732ce9b6a AS ldr
# Set shell to bash with pipefail for safer pipe handling
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
# Install runtime dependencies for SQLCipher and WeasyPrint
RUN apt-get update && apt-get upgrade -y \
&& apt-get install -y --no-install-recommends \
sqlcipher \
libsqlcipher1 \
# gosu for safe user switching in entrypoint
gosu \
# WeasyPrint dependencies for PDF generation
libcairo2 \
libpango-1.0-0 \
libpangocairo-1.0-0 \
libgdk-pixbuf-2.0-0 \
libffi-dev \
shared-mime-info \
# GLib and GObject dependencies (libgobject is included in libglib2.0-0)
libglib2.0-0 \
&& rm -rf /var/lib/apt/lists/*
# Create non-root user for running service (security best practice)
RUN groupadd -r ldruser && useradd -r -g ldruser -u 1000 -m -d /home/ldruser ldruser
# Create directories with proper permissions for non-root user
RUN mkdir -p /app/.config/local_deep_research /home/ldruser/.local/share && \
chown -R ldruser:ldruser /app /home/ldruser && \
chmod -R 755 /app /home/ldruser
# retrieve packages from build stage
COPY --from=builder /install/.venv/ /install/.venv
ENV PATH="/install/.venv/bin:$PATH"
# Verify SQLCipher is available after copy using compatibility module
# and install browser automation tools
RUN python -c "from local_deep_research.database.sqlcipher_compat import get_sqlcipher_module; \
sqlcipher = get_sqlcipher_module(); \
print(f'✓ SQLCipher module loaded successfully: {sqlcipher}')" \
&& playwright install
# Create volume for persistent configuration
# Use /app for configuration to support non-root user
VOLUME /app/.config/local_deep_research
# Create volume for Ollama start script
VOLUME /scripts/
# Copy the Ollama entrypoint script
COPY scripts/ollama_entrypoint.sh /scripts/ollama_entrypoint.sh
# Copy LDR entrypoint script to handle volume permissions
COPY scripts/ldr_entrypoint.sh /usr/local/bin/ldr_entrypoint.sh
# Set permissions and ownership for scripts and directories
RUN chmod +x /scripts/ollama_entrypoint.sh \
&& chmod +x /usr/local/bin/ldr_entrypoint.sh \
&& chown -R ldruser:ldruser /install /scripts /home/ldruser
EXPOSE 5000
# Health check for container orchestration (Docker, Kubernetes, etc.)
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:5000/api/v1/health')" || exit 1
STOPSIGNAL SIGINT
# Use entrypoint to fix volume permissions, then switch to ldruser
# The entrypoint runs as root to fix /data permissions, then drops to ldruser
ENTRYPOINT ["/usr/local/bin/ldr_entrypoint.sh"]
# Use PDM to run the application (passed to entrypoint as $@)
CMD [ "ldr-web" ]