Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -204,4 +204,22 @@ conf/local.service_conf.yaml
docker/.env
docker/launch_backend_service.sh
docker/.env.oceanbase
local.service_conf.yaml
<<<<<<< HEAD
local.service_conf.yaml
=======
local.service_conf.yaml

# Generated by scripts/deploy.sh (runtime configs)
conf/service_conf_ragflow_*.yaml
nginx_conf/

logs/
pods/
upload_wiki_json.pid
<<<<<<< HEAD
>>>>>>> d641c4e6 (feat: add deploy.sh for startup from source code; support start multi ragflow_server in dcoker and deploy.sh)
=======
.ragflow_secret_key
setup_tools_venv.sh
build_tools_bundle.sh
>>>>>>> fc88e390 (feat: optimize upload_document using batch; fix sevral bugs in deploy.sh and entrypoint.sh)
8 changes: 6 additions & 2 deletions api/ragflow_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,15 @@
# from beartype.claw import beartype_all # <-- you didn't sign up for this
# beartype_all(conf=BeartypeConf(violation_type=UserWarning)) # <-- emit warnings from all code

import os
import logging

from common.log_utils import init_root_logger
from plugin import GlobalPluginManager

import logging
import os
# Initialize logging as early as possible so show_configs() (INFO logs) won't be dropped.
# deploy.sh sets RAGFLOW_LOG_BASENAME=ragflow_server_<port> for multi-instance runs.
init_root_logger(os.environ.get("RAGFLOW_LOG_BASENAME", "ragflow_server"))
import signal
import sys
Comment on lines +21 to 31
Copy link

Copilot AI Jan 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Import statement moved after init_root_logger() call but before the imports it depends on. Lines 30-31 import signal and sys, which come after the init_root_logger() call on line 29. While this works, it creates an unusual import order. If init_root_logger() internally depends on these modules or if there are circular dependencies, this could cause issues. Consider keeping all imports together at the top of the file, or add a comment explaining why logging must be initialized before other imports.

Copilot uses AI. Check for mistakes.
import traceback
Expand Down
5 changes: 3 additions & 2 deletions api/utils/api_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
)

from peewee import OperationalError
from werkzeug.exceptions import NotFound
Copy link

Copilot AI Jan 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Import of 'NotFound' is not used.

Suggested change
from werkzeug.exceptions import NotFound

Copilot uses AI. Check for mistakes.

from common.constants import ActiveEnum
from api.db.db_models import APIToken
Expand Down Expand Up @@ -107,7 +108,7 @@ def serialize_for_json(obj):


def get_data_error_result(code=RetCode.DATA_ERROR, message="Sorry! Data missing!"):
logging.exception(Exception(message))
logging.exception(f"Data error: {message}")
Copy link

Copilot AI Jan 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The logging.exception() call here will log an exception that wasn't caught - it's not inside an exception handler. Use logging.error() instead of logging.exception() when you're not in an exception context. logging.exception() should only be used within an except block to include the traceback.

Suggested change
logging.exception(f"Data error: {message}")
logging.error(f"Data error: {message}")

Copilot uses AI. Check for mistakes.
result_dict = {"code": code, "message": message}
response = {}
for key, value in result_dict.items():
Expand Down Expand Up @@ -556,7 +557,7 @@ def verify_embedding_availability(embd_id: str, tenant_id: str) -> tuple[bool, R
if not (is_builtin_model or is_tenant_model):
return False, get_error_argument_result(f"Unauthorized model: <{embd_id}>")
except OperationalError as e:
logging.exception(e)
logging.exception(f"Database operation failed: {e}")
return False, get_error_data_result(message="Database operation failed")

return True, None
Expand Down
11 changes: 10 additions & 1 deletion common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,19 @@
# limitations under the License.
#

import os
from enum import Enum, IntEnum
from strenum import StrEnum

SERVICE_CONF = "service_conf.yaml"
#
# Allow overriding the service config filename per process.
# This is useful for running multiple ragflow_server processes in one container,
# each reading its own `conf/<name>` (and optional `conf/local.<name>`).
#
# Example:
# RAGFLOW_SERVICE_CONF=service_conf_ragflow_1.yaml python3 api/ragflow_server.py
#
SERVICE_CONF = os.getenv("RAGFLOW_SERVICE_CONF", "service_conf.yaml")
RAG_FLOW_SERVICE_NAME = "ragflow"

class CustomEnum(Enum):
Expand Down
9 changes: 5 additions & 4 deletions common/log_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,14 @@
from logging.handlers import RotatingFileHandler
from common.file_utils import get_project_base_directory

initialized_root_logger = False
_initialized_loggers = set()

def init_root_logger(logfile_basename: str, log_format: str = "%(asctime)-15s %(levelname)-8s %(process)d %(message)s"):
global initialized_root_logger
if initialized_root_logger:
global _initialized_loggers
# Allow re-initialization for different log file names (e.g., multi-instance servers)
if logfile_basename in _initialized_loggers:
return
initialized_root_logger = True
_initialized_loggers.add(logfile_basename)
Comment on lines +26 to +30
Copy link

Copilot AI Jan 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The re-initialization logic changed from a simple boolean flag to a set-based check, but the implementation may not work as intended. If the same logfile_basename is used by different processes/instances, each process will think logging is already initialized and skip configuration. The set is process-local, not shared across instances. If the goal is to prevent multiple init calls within the same process, this works. However, if init_root_logger() is called with different basenames (e.g., "ragflow_server_9380" then "ragflow_server_9400"), the second call will configure a new handler because line 33 does logger.handlers.clear(), potentially removing handlers configured by the first call. Consider clarifying the intended behavior or using per-basename handler tracking.

Copilot uses AI. Check for mistakes.

logger = logging.getLogger()
logger.handlers.clear()
Expand Down
9 changes: 9 additions & 0 deletions docker/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,15 @@ ADMIN_SVR_HTTP_PORT=9381
SVR_MCP_PORT=9382
POWERRAG_SVR_HTTP_PORT=6000


# ------------------------------
# Multi ragflow_server (multiple processes in one container)
# ------------------------------
# Instance 0 listens on SVR_HTTP_PORT.
# Instances 1.. listen on RAGFLOW_SERVER_EXTRA_BASE_PORT + (idx-1).
SVR_COUNT=1
SVR_EXTRA_BASE_HTTP_PORT=9400

# PowerRAG server url, used to generate image links.
# Format: 'http://<HOST-IP>:$POWERRAG_SVR_HTTP_PORT'
PUBLIC_SERVER_URL=
Expand Down
5 changes: 5 additions & 0 deletions docker/docker-compose-self-hosted-ob.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,12 @@ services:
- "host.docker.internal:host-gateway"

# Gotenberg service for document conversion
# Optional (disabled by default). Enable with:
# docker compose --profile gotenberg up -d
# or:
# COMPOSE_PROFILES=gotenberg docker compose up -d
gotenberg:
profiles: ["gotenberg"]
image: gotenberg/gotenberg:8
env_file: .env
environment:
Expand Down
3 changes: 3 additions & 0 deletions docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ services:
- ${SVR_HTTP_PORT}:9380
- ${ADMIN_SVR_HTTP_PORT}:9381
- ${SVR_MCP_PORT}:9382 # entry for MCP (host_port:docker_port). The docker_port must match the value you set for `mcp-port` above.
- ${POWERRAG_SVR_HTTP_PORT:-6000}:6000 # entry for PowerRAG server (host_port:docker_port). The docker_port must match the value you set for `powerrag-port` above.
# Optional: expose extra ragflow_server instances (default ports: 9400..)
# - 9400-9403:9400-9403
volumes:
- ./ragflow-logs:/ragflow/logs
- ./nginx/ragflow.conf:/etc/nginx/conf.d/ragflow.conf
Expand Down
173 changes: 152 additions & 21 deletions docker/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,35 @@ function usage() {
exit 1
}

ENABLE_WEBSERVER=1 # Default to enable web server
ENABLE_TASKEXECUTOR=1 # Default to enable task executor
ENABLE_DATASYNC=1
ENABLE_MCP_SERVER=0
ENABLE_ADMIN_SERVER=0 # Default close admin server
INIT_SUPERUSER_ARGS="" # Default to not initialize superuser
CONSUMER_NO_BEG=0
CONSUMER_NO_END=0
WORKERS=1
ENABLE_WEBSERVER=${ENABLE_WEBSERVER:-1} # Default to enable web server
ENABLE_TASKEXECUTOR=${ENABLE_TASKEXECUTOR:-1} # Default to enable task executor
ENABLE_DATASYNC=${ENABLE_DATASYNC:-1}
ENABLE_MCP_SERVER=${ENABLE_MCP_SERVER:-0}
ENABLE_ADMIN_SERVER=${ENABLE_ADMIN_SERVER:-0} # Default close admin server
ENABLE_POWERRAG_SERVER=${ENABLE_POWERRAG_SERVER:-1} # Default close PowerRAG server
Copy link

Copilot AI Jan 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The default value for ENABLE_POWERRAG_SERVER is inconsistent. Line 37 sets it to 1 (enabled by default), but this contradicts the comment on line 36 which says "Default close PowerRAG server" and the pattern established by other optional components (MCP and ADMIN_SERVER) which default to 0. The default should likely be 0 to match the comment and the "optional component" pattern.

Suggested change
ENABLE_POWERRAG_SERVER=${ENABLE_POWERRAG_SERVER:-1} # Default close PowerRAG server
ENABLE_POWERRAG_SERVER=${ENABLE_POWERRAG_SERVER:-0} # Default close PowerRAG server

Copilot uses AI. Check for mistakes.
CONSUMER_NO_BEG=${CONSUMER_NO_BEG:-0}
CONSUMER_NO_END=${CONSUMER_NO_END:-0}
WORKERS=${WORKERS:-1}

# -----------------------------------------------------------------------------
# Multi ragflow_server support (multiple processes in one container)
#
# Notes:
# - ragflow_server reads its listen port from conf/${RAGFLOW_SERVICE_CONF:-service_conf.yaml}
# - We generate multiple config files (service_conf_ragflow_<idx>.yaml) with different ports
# - We start multiple ragflow_server processes, each with its own RAGFLOW_SERVICE_CONF
# -----------------------------------------------------------------------------
#
# Env vars:
# - SVR_COUNT
# - SVR_HTTP_PORT
# - SVR_EXTRA_BASE_HTTP_PORT
# - ADMIN_SVR_HTTP_PORT
SVR_COUNT="${SVR_COUNT:-1}"
SVR_HTTP_PORT="${SVR_HTTP_PORT:-9380}"
# Extra instances will listen on: SVR_EXTRA_BASE_HTTP_PORT + (idx-1)
SVR_EXTRA_BASE_HTTP_PORT="${SVR_EXTRA_BASE_HTTP_PORT:-9400}"
ADMIN_SVR_HTTP_PORT="${ADMIN_SVR_HTTP_PORT:-9381}"

MCP_HOST="127.0.0.1"
MCP_PORT=9382
Expand Down Expand Up @@ -149,16 +169,64 @@ for arg in "$@"; do
done

# -----------------------------------------------------------------------------
# Replace env variables in the service_conf.yaml file
# Render service config(s) from template
# -----------------------------------------------------------------------------
CONF_DIR="/ragflow/conf"
TEMPLATE_FILE="${CONF_DIR}/service_conf.yaml.template"
CONF_FILE="${CONF_DIR}/service_conf.yaml"

rm -f "${CONF_FILE}"
while IFS= read -r line || [[ -n "$line" ]]; do
eval "echo \"$line\"" >> "${CONF_FILE}"
done < "${TEMPLATE_FILE}"
#
# -----------------------------------------------------------------------------
# Ensure a stable SECRET_KEY across multiple ragflow_server processes.
#
# Why:
# - Auth tokens are signed with settings.SECRET_KEY (derived from RAGFLOW_SECRET_KEY
# or conf ragflow.secret_key). If multiple ragflow_server instances in the same
# container auto-generate different keys, nginx load-balancing will cause:
# "Signature ... does not match" -> 401 -> frontend jumps back to login.
#
# Strategy:
# - If user didn't provide a strong RAGFLOW_SECRET_KEY (>=32 chars), generate ONE
# and export it so all child processes share it.
# - Persist it under /ragflow/conf so restarts inside the same volume keep stable.
# -----------------------------------------------------------------------------
#
function ensure_ragflow_secret_key() {
local key_file="${CONF_DIR}/.ragflow_secret_key"

if [[ -n "${RAGFLOW_SECRET_KEY:-}" && ${#RAGFLOW_SECRET_KEY} -ge 32 ]]; then
export RAGFLOW_SECRET_KEY
return 0
fi

if [[ -f "${key_file}" ]]; then
RAGFLOW_SECRET_KEY="$(cat "${key_file}")"
else
RAGFLOW_SECRET_KEY="$("$PY" -c 'import secrets; print(secrets.token_hex(32))')"
echo -n "${RAGFLOW_SECRET_KEY}" > "${key_file}"
chmod 600 "${key_file}" || true
fi

if [[ ${#RAGFLOW_SECRET_KEY} -lt 32 ]]; then
echo "ERROR: failed to initialize a strong RAGFLOW_SECRET_KEY" >&2
return 1
fi

export RAGFLOW_SECRET_KEY
}

function render_service_conf() {
local out_file="$1"
local ragflow_port="$2"
local admin_port="$3"

rm -f "${out_file}"
while IFS= read -r line || [[ -n "$line" ]]; do
# shellcheck disable=SC2034
SVR_HTTP_PORT="${ragflow_port}" ADMIN_SVR_HTTP_PORT="${admin_port}" \
eval "echo \"$line\"" >> "${out_file}"
done < "${TEMPLATE_FILE}"
Comment on lines +223 to +228
Copy link

Copilot AI Jan 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The render_service_conf function uses eval "echo \"$line\"" to expand environment variables from service_conf.yaml.template, which introduces a shell code injection risk. If an attacker can influence any of the environment variables used in the template (e.g., via orchestrator/UI that lets them set REDIS_PASSWORD, RAGFLOW_HOST, etc.), they can embed command substitutions like $(...) so that when eval re-parses the expanded line, arbitrary commands execute inside the container with the entrypoint's privileges. Replace the eval-based templating with a non-evaluating mechanism (e.g., envsubst or a simple variable substitution script that does not invoke the shell parser on untrusted data) so that environment values are treated as data, not executable shell code.

Suggested change
rm -f "${out_file}"
while IFS= read -r line || [[ -n "$line" ]]; do
# shellcheck disable=SC2034
SVR_HTTP_PORT="${ragflow_port}" ADMIN_SVR_HTTP_PORT="${admin_port}" \
eval "echo \"$line\"" >> "${out_file}"
done < "${TEMPLATE_FILE}"
if ! command -v envsubst >/dev/null 2>&1; then
echo "ERROR: envsubst is required to render service_conf from template." >&2
return 1
fi
rm -f "${out_file}"
SVR_HTTP_PORT="${ragflow_port}" ADMIN_SVR_HTTP_PORT="${admin_port}" \
envsubst < "${TEMPLATE_FILE}" > "${out_file}"

Copilot uses AI. Check for mistakes.
}

export LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu/"
PY=python3
Expand Down Expand Up @@ -193,6 +261,71 @@ function start_mcp_server() {
"${MCP_JSON_RESPONSE_FLAG}" &
}

function start_powerrag_server() {
echo "Starting PowerRAG Server on ${POWERRAG_PORT}..."
while true; do
"$PY" powerrag/server/powerrag_server.py \
--port="${POWERRAG_PORT}"
done &
}

function _prepare_multi_ragflow_confs() {
# Render base service_conf.yaml (used by other processes that don't set RAGFLOW_SERVICE_CONF)
render_service_conf "${CONF_FILE}" "${SVR_HTTP_PORT}" "${ADMIN_SVR_HTTP_PORT}"

# Create per-instance configs
local idx port conf_name conf_path
for (( idx=0; idx<${SVR_COUNT}; idx++ )); do

if [[ "${idx}" -eq 0 ]]; then
port="${SVR_HTTP_PORT}"
conf_name="local.service_conf.yaml"
Copy link

Copilot AI Jan 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Inconsistent configuration file naming between deploy.sh and entrypoint.sh. In deploy.sh (line 647), the main instance uses GLOBAL_SERVICE_CONF (defaults to "service_conf.yaml"), while in entrypoint.sh (lines 282, 319), it uses "local.service_conf.yaml". This inconsistency could cause configuration issues when switching between deployment modes. Consider using a consistent naming scheme or documenting this intentional difference.

Suggested change
conf_name="local.service_conf.yaml"
conf_name="service_conf.yaml"

Copilot uses AI. Check for mistakes.
else
port=$((SVR_EXTRA_BASE_HTTP_PORT + idx - 1))
conf_name="service_conf_ragflow_${port}.yaml"
fi
conf_path="${CONF_DIR}/${conf_name}"
render_service_conf "${conf_path}" "${port}" "${ADMIN_SVR_HTTP_PORT}"
done
}

function _start_ragflow_instance() {
local idx="$1"
local port="$2"
local conf_name="$3"

echo "Starting ragflow_server[${idx}] on ${port} using conf/${conf_name} ..."
# Align with scripts/deploy.sh:
# - run without restart loop (process supervision is external to entrypoint)
# - set per-instance logfile basename so logs are split by port
RAGFLOW_SERVICE_CONF="${conf_name}" \
RAGFLOW_LOG_BASENAME="ragflow_server_${port}" \
"$PY" api/ragflow_server.py ${INIT_SUPERUSER_ARGS} &
Copy link

Copilot AI Jan 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The variable INIT_SUPERUSER_ARGS is used on line 303 but is not defined anywhere in the visible changes. If this variable was previously defined and is still needed, it should be initialized with a default value (e.g., INIT_SUPERUSER_ARGS="${INIT_SUPERUSER_ARGS:-}") to prevent potential undefined variable errors when running with "set -u".

Copilot uses AI. Check for mistakes.
}

function start_ragflow_servers() {
ensure_ragflow_secret_key
_prepare_multi_ragflow_confs

# Generate nginx upstream include files so nginx can proxy/load-balance to all instances
: > /etc/nginx/conf.d/ragflow_upstream.conf
: > /etc/nginx/conf.d/admin_upstream.conf
echo "server 127.0.0.1:${ADMIN_SVR_HTTP_PORT};" >> /etc/nginx/conf.d/admin_upstream.conf

local idx port conf_name
for (( idx=0; idx<${SVR_COUNT}; idx++ )); do
if [[ "${idx}" -eq 0 ]]; then
port="${SVR_HTTP_PORT}"
conf_name="local.service_conf.yaml"
else
port=$((SVR_EXTRA_BASE_HTTP_PORT + idx - 1))
conf_name="service_conf_ragflow_${port}.yaml"
fi
echo "server 127.0.0.1:${port};" >> /etc/nginx/conf.d/ragflow_upstream.conf
_start_ragflow_instance "${idx}" "${port}" "${conf_name}"
done
}

function ensure_docling() {
[[ "${USE_DOCLING}" == "true" ]] || { echo "[docling] disabled by USE_DOCLING"; return 0; }
python3 -c 'import pip' >/dev/null 2>&1 || python3 -m ensurepip --upgrade || true
Expand All @@ -207,15 +340,13 @@ function ensure_docling() {
ensure_docling

if [[ "${ENABLE_WEBSERVER}" -eq 1 ]]; then
echo "Starting ragflow_server..."
start_ragflow_servers

# nginx upstream include files are generated by start_ragflow_servers;
# start nginx after generation so it picks them up (no reload needed).
echo "Starting nginx..."
/usr/sbin/nginx

echo "Starting ragflow_server..."
while true; do
"$PY" api/ragflow_server.py ${INIT_SUPERUSER_ARGS} &
wait;
sleep 1;
done &
fi
Comment on lines 342 to 350
Copy link

Copilot AI Jan 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing start logic for PowerRAG server. ENABLE_POWERRAG_SERVER is defined and set to 1 by default on line 37, and the start_powerrag_server() function is defined on lines 264-270, but the visible component startup section (lines 342-350) doesn't include a check to start the PowerRAG server. Based on the pattern for MCP server and admin server, there should be a corresponding if block checking ENABLE_POWERRAG_SERVER and calling start_powerrag_server.

Copilot uses AI. Check for mistakes.

if [[ "${ENABLE_DATASYNC}" -eq 1 ]]; then
Expand Down
14 changes: 12 additions & 2 deletions docker/nginx/ragflow.conf
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
upstream ragflow_upstream {
# generated by entrypoint.sh
include /etc/nginx/conf.d/ragflow_upstream.conf;
}

upstream admin_upstream {
# generated by entrypoint.sh
include /etc/nginx/conf.d/admin_upstream.conf;
}

server {
listen 80;
server_name _;
Expand All @@ -11,12 +21,12 @@ server {
gzip_disable "MSIE [1-6]\.";

location ~ ^/api/v1/admin {
proxy_pass http://localhost:9381;
proxy_pass http://admin_upstream;
include proxy.conf;
}

location ~ ^/(v1|api) {
proxy_pass http://localhost:9380;
proxy_pass http://ragflow_upstream;
include proxy.conf;
}

Expand Down
Loading
Loading