forked from infiniflow/ragflow
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDockerfile
More file actions
258 lines (223 loc) · 11.2 KB
/
Dockerfile
File metadata and controls
258 lines (223 loc) · 11.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
# base stage
FROM ubuntu:24.04 AS base
USER root
SHELL ["/bin/bash", "-c"]
ARG NEED_MIRROR=0
WORKDIR /ragflow
# Copy models downloaded via scripts/download_deps.py
RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
tar --exclude='.*' -cf - \
/huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \
/huggingface.co/InfiniFlow/deepdoc \
| tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc
# https://github.com/chrismattmann/tika-python
# This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache.
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
cp -r /deps/nltk_data /root/ && \
cp /deps/tika-server-standard-3.2.3.jar /deps/tika-server-standard-3.2.3.jar.md5 /ragflow/ && \
cp /deps/cl100k_base.tiktoken /ragflow/9b5ad71b2ce5302211f9c61530b329a4922fc6a4
ENV TIKA_SERVER_JAR="file:///ragflow/tika-server-standard-3.2.3.jar"
ENV DEBIAN_FRONTEND=noninteractive
# Setup apt
# Python package and implicit dependencies:
# opencv-python: libglib2.0-0 libglx-mesa0 libgl1
# python-pptx: default-jdk tika-server-standard-3.2.3.jar
# selenium: libatk-bridge2.0-0 chrome-linux64-121-0-6167-85
# Building C extensions: libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev
RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
apt update && \
apt --no-install-recommends install -y ca-certificates; \
if [ "$NEED_MIRROR" == "1" ]; then \
sed -i 's|http://archive.ubuntu.com/ubuntu|https://mirrors.tuna.tsinghua.edu.cn/ubuntu|g' /etc/apt/sources.list.d/ubuntu.sources; \
sed -i 's|http://security.ubuntu.com/ubuntu|https://mirrors.tuna.tsinghua.edu.cn/ubuntu|g' /etc/apt/sources.list.d/ubuntu.sources; \
fi; \
rm -f /etc/apt/apt.conf.d/docker-clean && \
echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache && \
chmod 1777 /tmp && \
apt update && \
apt install -y libglib2.0-0 libglx-mesa0 libgl1 && \
apt install -y pkg-config libicu-dev libgdiplus && \
apt install -y default-jdk && \
apt install -y libatk-bridge2.0-0 && \
apt install -y libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev && \
apt install -y libjemalloc-dev && \
apt install -y nginx unzip curl wget git vim less && \
apt install -y ghostscript && \
apt install -y pandoc && \
apt install -y texlive && \
apt install -y gettext-base && \
apt install -y fonts-freefont-ttf fonts-noto-cjk
# Install uv
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
if [ "$NEED_MIRROR" == "1" ]; then \
mkdir -p /etc/uv && \
echo 'python-install-mirror = "https://registry.npmmirror.com/-/binary/python-build-standalone/"' > /etc/uv/uv.toml && \
echo '[[index]]' >> /etc/uv/uv.toml && \
echo 'url = "https://pypi.tuna.tsinghua.edu.cn/simple"' >> /etc/uv/uv.toml && \
echo 'default = true' >> /etc/uv/uv.toml; \
fi; \
arch="$(uname -m)"; \
if [ "$arch" = "x86_64" ]; then uv_arch="x86_64"; else uv_arch="aarch64"; fi; \
tar xzf "/deps/uv-${uv_arch}-unknown-linux-gnu.tar.gz" \
&& cp "uv-${uv_arch}-unknown-linux-gnu/"* /usr/local/bin/ \
&& rm -rf "uv-${uv_arch}-unknown-linux-gnu" \
&& uv python install 3.12
ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1
ENV PATH=/root/.local/bin:$PATH
# nodejs 12.22 on Ubuntu 22.04 is too old
RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
apt purge -y nodejs npm cargo && \
apt autoremove -y && \
apt update && \
apt install -y nodejs
# A modern version of cargo is needed for the latest version of the Rust compiler.
RUN apt update && apt install -y curl build-essential \
&& if [ "$NEED_MIRROR" == "1" ]; then \
# Use TUNA mirrors for rustup/rust dist files \
export RUSTUP_DIST_SERVER="https://mirrors.tuna.tsinghua.edu.cn/rustup"; \
export RUSTUP_UPDATE_ROOT="https://mirrors.tuna.tsinghua.edu.cn/rustup/rustup"; \
echo "Using TUNA mirrors for Rustup."; \
fi; \
# Force curl to use HTTP/1.1 \
curl --proto '=https' --tlsv1.2 --http1.1 -sSf https://sh.rustup.rs | bash -s -- -y --profile minimal \
&& echo 'export PATH="/root/.cargo/bin:${PATH}"' >> /root/.bashrc
ENV PATH="/root/.cargo/bin:${PATH}"
RUN cargo --version && rustc --version
# Add msssql ODBC driver
# macOS ARM64 environment, install msodbcsql18.
# general x86_64 environment, install msodbcsql17.
RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \
curl https://packages.microsoft.com/config/ubuntu/22.04/prod.list > /etc/apt/sources.list.d/mssql-release.list && \
apt update && \
arch="$(uname -m)"; \
if [ "$arch" = "arm64" ] || [ "$arch" = "aarch64" ]; then \
# ARM64 (macOS/Apple Silicon or Linux aarch64) \
ACCEPT_EULA=Y apt install -y unixodbc-dev msodbcsql18; \
else \
# x86_64 or others \
ACCEPT_EULA=Y apt install -y unixodbc-dev msodbcsql17; \
fi || \
{ echo "Failed to install ODBC driver"; exit 1; }
# Add dependencies of selenium
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chrome-linux64-121-0-6167-85,target=/chrome-linux64.zip \
unzip /chrome-linux64.zip && \
mv chrome-linux64 /opt/chrome && \
ln -s /opt/chrome/chrome /usr/local/bin/
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chromedriver-linux64-121-0-6167-85,target=/chromedriver-linux64.zip \
unzip -j /chromedriver-linux64.zip chromedriver-linux64/chromedriver && \
mv chromedriver /usr/local/bin/ && \
rm -f /usr/bin/google-chrome
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
if [ "$(uname -m)" = "x86_64" ]; then \
dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \
elif [ "$(uname -m)" = "aarch64" ]; then \
dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_arm64.deb; \
fi
# builder stage
FROM base AS builder
USER root
WORKDIR /ragflow
# install dependencies from uv.lock file
COPY pyproject.toml uv.lock ./
# Build argument for optional dependencies
# Default: "all" installs everything for backward compatibility
# Examples:
# - "all" - Full installation (default, backward compatible)
# - "docling-sidecar" - Core + S3 + Elasticsearch (for Docling sidecar users)
# - "db-postgres,storage-s3,vectorstore-elasticsearch,deepdoc" - Custom selection
ARG RAGFLOW_EXTRAS="all"
# https://github.com/astral-sh/uv/issues/10462
# uv records index url into uv.lock but doesn't failover among multiple indexes
RUN --mount=type=cache,id=ragflow_uv,target=/root/.cache/uv,sharing=locked \
if [ "$NEED_MIRROR" == "1" ]; then \
sed -i 's|pypi.org|pypi.tuna.tsinghua.edu.cn|g' uv.lock; \
else \
sed -i 's|pypi.tuna.tsinghua.edu.cn|pypi.org|g' uv.lock; \
fi; \
if [ "$RAGFLOW_EXTRAS" = "all" ] || [ -z "$RAGFLOW_EXTRAS" ]; then \
echo "Using extras: all-extras"; \
uv sync --python 3.12 --frozen --all-extras; \
else \
# Define allowed extras (matches pyproject.toml [project.optional-dependencies] keys).
# IMPORTANT: This list must be kept in sync with pyproject.toml manually.
ALLOWED_EXTRAS="db-postgres db-mysql storage-minio storage-s3 storage-azure storage-opendal storage-webdav vectorstore-elasticsearch vectorstore-opensearch vectorstore-infinity vectorstore-postgres vectorstore-oceanbase deepdoc llm-anthropic llm-google llm-cohere llm-mistral llm-baidu llm-alibaba llm-volcengine llm-zhipu llm-groq llm-replicate llm-voyage llm-ollama llm-hunyuan llm-all grpc integrations-jira integrations-slack integrations-discord integrations-github integrations-gitlab integrations-google integrations-box integrations-dropbox integrations-asana integrations-airtable integrations-office365 integrations-moodle integrations-all search-tavily search-serp search-duckduckgo search-scholar search-crawl search-wikipedia search-all graphrag agent-tools agent-sql agent-translation embeddings finance-china finance-global observability docling-sidecar full-lite test dev"; \
# Build --extra flags for each comma-separated extra \
EXTRA_FLAGS=""; \
INVALID_EXTRAS=""; \
IFS=',' read -ra EXTRAS <<< "$RAGFLOW_EXTRAS"; \
for extra in "${EXTRAS[@]}"; do \
# Trim leading/trailing whitespace \
extra="${extra#"${extra%%[![:space:]]*}"}"; \
extra="${extra%"${extra##*[![:space:]]}"}"; \
# Skip empty elements \
[ -z "$extra" ] && continue; \
# Check if the extra is allowed \
is_allowed=0; \
for allowed in $ALLOWED_EXTRAS; do \
if [ "$extra" = "$allowed" ]; then is_allowed=1; break; fi; \
done; \
if [ "$is_allowed" = "0" ]; then \
INVALID_EXTRAS="$INVALID_EXTRAS $extra"; \
else \
EXTRA_FLAGS="$EXTRA_FLAGS --extra $extra"; \
fi; \
done; \
if [ -n "$INVALID_EXTRAS" ]; then \
echo "ERROR: Unknown RAGFLOW_EXTRAS provided:$INVALID_EXTRAS"; \
echo "Allowed extras are: $ALLOWED_EXTRAS"; \
exit 1; \
fi; \
echo "Using extras: $EXTRA_FLAGS"; \
uv sync --python 3.12 --frozen $EXTRA_FLAGS; \
fi && \
# Install pip for runtime use by entrypoint.sh ensure_pip_dependency() function
# which installs optional dependencies (e.g., docling) at container startup
uv pip install pip==24.3.1
COPY web web
COPY docs docs
RUN --mount=type=cache,id=ragflow_npm,target=/root/.npm,sharing=locked \
cd web && npm install && npm run build
COPY .git /ragflow/.git
RUN version_info=$(git describe --tags --match=v* --first-parent --always); \
version_info="$version_info"; \
echo "RAGFlow version: $version_info"; \
echo $version_info > /ragflow/VERSION
# production stage
FROM base AS production
USER root
WORKDIR /ragflow
# Copy Python environment and packages
ENV VIRTUAL_ENV=/ragflow/.venv
COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
ENV PYTHONPATH=/ragflow/
COPY web web
COPY admin admin
COPY api api
COPY conf conf
# Copy deepdoc if it exists.
# We use the glob pattern deepdo[c] to make this COPY a no-op if the directory is missing
# (e.g., in full-lite builds or when using Docling sidecar).
COPY deepdo[c] deepdoc/
COPY rag rag
# Ensure deepdoc is available as a top-level package via symlink if it was not copied
# directly but exists within rag/parsers/deepdoc (the standard location in lite/all builds).
RUN if [ -d "rag/parsers/deepdoc" ] && [ ! -d "deepdoc" ]; then ln -s rag/parsers/deepdoc deepdoc; fi
COPY agent agent
COPY pyproject.toml uv.lock ./
COPY mcp mcp
COPY common common
COPY memory memory
COPY docker/nginx/ragflow.conf /etc/nginx/conf.d/ragflow.conf
COPY docker/nginx/proxy.conf /etc/nginx/proxy.conf
COPY docker/nginx/nginx.conf /etc/nginx/nginx.conf
COPY docker/service_conf.yaml.template ./conf/service_conf.yaml.template
COPY docker/entrypoint.sh ./
RUN chmod +x ./entrypoint*.sh
# Copy compiled web pages
COPY --from=builder /ragflow/web/dist /ragflow/web/dist
COPY --from=builder /ragflow/VERSION /ragflow/VERSION
ENTRYPOINT ["./entrypoint.sh"]