From bd7bd5b601701260d1d57e18cc87bc9418676c37 Mon Sep 17 00:00:00 2001 From: "keyang.lk" Date: Mon, 22 Dec 2025 21:26:01 +0800 Subject: [PATCH 1/3] feat: add deploy.sh for startup from source code; support start multi ragflow_server in dcoker and deploy.sh # Conflicts: # .gitignore # scripts/README.md # scripts/deploy.sh --- .gitignore | 14 +- api/ragflow_server.py | 16 +- api/utils/api_utils.py | 15 +- common/constants.py | 11 +- common/log_utils.py | 9 +- docker/.env.example | 9 + docker/docker-compose-self-hosted-ob.yml | 5 + docker/docker-compose.yml | 2 + docker/entrypoint.sh | 162 +++- docker/nginx/ragflow.conf | 14 +- docker/service_conf.yaml.template | 7 +- mcp/server/server.py | 43 +- scripts/README.md | 222 +++++ scripts/deploy.sh | 1084 ++++++++++++++++++++++ 14 files changed, 1555 insertions(+), 58 deletions(-) create mode 100644 scripts/README.md create mode 100755 scripts/deploy.sh diff --git a/.gitignore b/.gitignore index 1e4b1642b..b720bc966 100644 --- a/.gitignore +++ b/.gitignore @@ -201,4 +201,16 @@ conf/local.service_conf.yaml docker/.env docker/launch_backend_service.sh docker/.env.oceanbase -local.service_conf.yaml \ No newline at end of file +<<<<<<< HEAD +local.service_conf.yaml +======= +local.service_conf.yaml + +# Generated by scripts/deploy.sh (runtime configs) +conf/service_conf_ragflow_*.yaml +nginx_conf/ + +logs/ +pods/ +upload_wiki_json.pid +>>>>>>> d641c4e6 (feat: add deploy.sh for startup from source code; support start multi ragflow_server in dcoker and deploy.sh) diff --git a/api/ragflow_server.py b/api/ragflow_server.py index 84e996f97..20f778dc2 100644 --- a/api/ragflow_server.py +++ b/api/ragflow_server.py @@ -18,12 +18,15 @@ # from beartype.claw import beartype_all # <-- you didn't sign up for this # beartype_all(conf=BeartypeConf(violation_type=UserWarning)) # <-- emit warnings from all code +import os +import logging + from common.log_utils import init_root_logger from plugin import GlobalPluginManager -init_root_logger("ragflow_server") -import logging -import os +# Initialize logging as early as possible so show_configs() (INFO logs) won't be dropped. +# deploy.sh sets RAGFLOW_LOG_BASENAME=ragflow_server_ for multi-instance runs. +init_root_logger(os.environ.get("RAGFLOW_LOG_BASENAME", "ragflow_server")) import signal import sys import time @@ -73,6 +76,10 @@ def signal_handler(sig, frame): sys.exit(0) if __name__ == '__main__': + show_configs() + settings.init_settings() + settings.print_rag_settings() + logging.info(r""" ____ ___ ______ ______ __ / __ \ / | / ____// ____// /____ _ __ @@ -87,9 +94,6 @@ def signal_handler(sig, frame): logging.info( f'project base: {get_project_base_directory()}' ) - show_configs() - settings.init_settings() - settings.print_rag_settings() if RAGFLOW_DEBUGPY_LISTEN > 0: logging.info(f"debugpy listen on {RAGFLOW_DEBUGPY_LISTEN}") diff --git a/api/utils/api_utils.py b/api/utils/api_utils.py index 34a7c6f84..5656a086a 100644 --- a/api/utils/api_utils.py +++ b/api/utils/api_utils.py @@ -32,6 +32,7 @@ ) from peewee import OperationalError +from werkzeug.exceptions import NotFound from common.constants import ActiveEnum from api.db.db_models import APIToken @@ -77,7 +78,7 @@ def serialize_for_json(obj): def get_data_error_result(code=RetCode.DATA_ERROR, message="Sorry! Data missing!"): - logging.exception(Exception(message)) + logging.exception(f"Data error: {message}") result_dict = {"code": code, "message": message} response = {} for key, value in result_dict.items(): @@ -89,7 +90,15 @@ def get_data_error_result(code=RetCode.DATA_ERROR, message="Sorry! Data missing! def server_error_response(e): - logging.exception(e) + # Handle 404 NotFound errors separately with lower log level + is_404 = isinstance(e, NotFound) or getattr(e, "code", None) == 404 + if is_404: + logging.warning(f"404 Not Found: {getattr(e, 'description', str(e))}") + return get_json_result(code=RetCode.NOT_FOUND, message=str(e)) + + # For other errors, log at exception level + logging.exception(f"Server error: {e}") + try: msg = repr(e).lower() if getattr(e, "code", None) == 401 or ("unauthorized" in msg) or ("401" in msg): @@ -517,7 +526,7 @@ def verify_embedding_availability(embd_id: str, tenant_id: str) -> tuple[bool, R if not (is_builtin_model or is_tenant_model): return False, get_error_argument_result(f"Unauthorized model: <{embd_id}>") except OperationalError as e: - logging.exception(e) + logging.exception(f"Database operation failed: {e}") return False, get_error_data_result(message="Database operation failed") return True, None diff --git a/common/constants.py b/common/constants.py index 6f1fc3e8d..5f678f77f 100644 --- a/common/constants.py +++ b/common/constants.py @@ -14,10 +14,19 @@ # limitations under the License. # +import os from enum import Enum, IntEnum from strenum import StrEnum -SERVICE_CONF = "service_conf.yaml" +# +# Allow overriding the service config filename per process. +# This is useful for running multiple ragflow_server processes in one container, +# each reading its own `conf/` (and optional `conf/local.`). +# +# Example: +# RAGFLOW_SERVICE_CONF=service_conf_ragflow_1.yaml python3 api/ragflow_server.py +# +SERVICE_CONF = os.getenv("RAGFLOW_SERVICE_CONF", "service_conf.yaml") RAG_FLOW_SERVICE_NAME = "ragflow" class CustomEnum(Enum): diff --git a/common/log_utils.py b/common/log_utils.py index abbcd286b..bd4d50436 100644 --- a/common/log_utils.py +++ b/common/log_utils.py @@ -20,13 +20,14 @@ from logging.handlers import RotatingFileHandler from common.file_utils import get_project_base_directory -initialized_root_logger = False +_initialized_loggers = set() def init_root_logger(logfile_basename: str, log_format: str = "%(asctime)-15s %(levelname)-8s %(process)d %(message)s"): - global initialized_root_logger - if initialized_root_logger: + global _initialized_loggers + # Allow re-initialization for different log file names (e.g., multi-instance servers) + if logfile_basename in _initialized_loggers: return - initialized_root_logger = True + _initialized_loggers.add(logfile_basename) logger = logging.getLogger() logger.handlers.clear() diff --git a/docker/.env.example b/docker/.env.example index 4de7e38cc..6eaf2a3d0 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -47,6 +47,15 @@ ADMIN_SVR_HTTP_PORT=9381 SVR_MCP_PORT=9382 POWERRAG_SVR_HTTP_PORT=6000 + +# ------------------------------ +# Multi ragflow_server (multiple processes in one container) +# ------------------------------ +# Instance 0 listens on SVR_HTTP_PORT. +# Instances 1.. listen on RAGFLOW_SERVER_EXTRA_BASE_PORT + (idx-1). +SVR_COUNT=1 +SVR_EXTRA_BASE_HTTP_PORT=9400 + # PowerRAG server url, used to generate image links. # Format: 'http://:$POWERRAG_SVR_HTTP_PORT' PUBLIC_SERVER_URL= diff --git a/docker/docker-compose-self-hosted-ob.yml b/docker/docker-compose-self-hosted-ob.yml index 2c7607fed..f68ea96bd 100644 --- a/docker/docker-compose-self-hosted-ob.yml +++ b/docker/docker-compose-self-hosted-ob.yml @@ -30,7 +30,12 @@ services: - "host.docker.internal:host-gateway" # Gotenberg service for document conversion + # Optional (disabled by default). Enable with: + # docker compose --profile gotenberg up -d + # or: + # COMPOSE_PROFILES=gotenberg docker compose up -d gotenberg: + profiles: ["gotenberg"] image: gotenberg/gotenberg:8 env_file: .env environment: diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 6e38ed641..e8ee7e557 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -11,6 +11,8 @@ services: - ${ADMIN_SVR_HTTP_PORT}:9381 - ${SVR_MCP_PORT}:9382 # entry for MCP (host_port:docker_port). The docker_port must match the value you set for `mcp-port` above. - ${POWERRAG_SVR_HTTP_PORT:-6000}:6000 # entry for PowerRAG server (host_port:docker_port). The docker_port must match the value you set for `powerrag-port` above. + # Optional: expose extra ragflow_server instances (default ports: 9400..) + # - 9400-9403:9400-9403 volumes: - ./ragflow-logs:/ragflow/logs - ./nginx/ragflow.conf:/etc/nginx/conf.d/ragflow.conf diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index 7883337ec..d251490cb 100755 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -30,15 +30,35 @@ function usage() { exit 1 } -ENABLE_WEBSERVER=1 # Default to enable web server -ENABLE_TASKEXECUTOR=1 # Default to enable task executor -ENABLE_DATASYNC=1 -ENABLE_MCP_SERVER=0 -ENABLE_ADMIN_SERVER=0 # Default close admin server -ENABLE_POWERRAG_SERVER=1 # Default close PowerRAG server -CONSUMER_NO_BEG=0 -CONSUMER_NO_END=0 -WORKERS=1 +ENABLE_WEBSERVER=${ENABLE_WEBSERVER:-1} # Default to enable web server +ENABLE_TASKEXECUTOR=${ENABLE_TASKEXECUTOR:-1} # Default to enable task executor +ENABLE_DATASYNC=${ENABLE_DATASYNC:-1} +ENABLE_MCP_SERVER=${ENABLE_MCP_SERVER:-0} +ENABLE_ADMIN_SERVER=${ENABLE_ADMIN_SERVER:-0} # Default close admin server +ENABLE_POWERRAG_SERVER=${ENABLE_POWERRAG_SERVER:-1} # Default close PowerRAG server +CONSUMER_NO_BEG=${CONSUMER_NO_BEG:-0} +CONSUMER_NO_END=${CONSUMER_NO_END:-0} +WORKERS=${WORKERS:-1} + +# ----------------------------------------------------------------------------- +# Multi ragflow_server support (multiple processes in one container) +# +# Notes: +# - ragflow_server reads its listen port from conf/${RAGFLOW_SERVICE_CONF:-service_conf.yaml} +# - We generate multiple config files (service_conf_ragflow_.yaml) with different ports +# - We start multiple ragflow_server processes, each with its own RAGFLOW_SERVICE_CONF +# ----------------------------------------------------------------------------- +# +# Env vars: +# - SVR_COUNT +# - SVR_HTTP_PORT +# - SVR_EXTRA_BASE_HTTP_PORT +# - ADMIN_SVR_HTTP_PORT +SVR_COUNT="${SVR_COUNT:-1}" +SVR_HTTP_PORT="${SVR_HTTP_PORT:-9380}" +# Extra instances will listen on: SVR_EXTRA_BASE_HTTP_PORT + (idx-1) +SVR_EXTRA_BASE_HTTP_PORT="${SVR_EXTRA_BASE_HTTP_PORT:-9400}" +ADMIN_SVR_HTTP_PORT="${ADMIN_SVR_HTTP_PORT:-9381}" MCP_HOST="127.0.0.1" MCP_PORT=9382 @@ -156,16 +176,64 @@ for arg in "$@"; do done # ----------------------------------------------------------------------------- -# Replace env variables in the service_conf.yaml file +# Render service config(s) from template # ----------------------------------------------------------------------------- CONF_DIR="/ragflow/conf" TEMPLATE_FILE="${CONF_DIR}/service_conf.yaml.template" CONF_FILE="${CONF_DIR}/service_conf.yaml" -rm -f "${CONF_FILE}" -while IFS= read -r line || [[ -n "$line" ]]; do - eval "echo \"$line\"" >> "${CONF_FILE}" -done < "${TEMPLATE_FILE}" +# +# ----------------------------------------------------------------------------- +# Ensure a stable SECRET_KEY across multiple ragflow_server processes. +# +# Why: +# - Auth tokens are signed with settings.SECRET_KEY (derived from RAGFLOW_SECRET_KEY +# or conf ragflow.secret_key). If multiple ragflow_server instances in the same +# container auto-generate different keys, nginx load-balancing will cause: +# "Signature ... does not match" -> 401 -> frontend jumps back to login. +# +# Strategy: +# - If user didn't provide a strong RAGFLOW_SECRET_KEY (>=32 chars), generate ONE +# and export it so all child processes share it. +# - Persist it under /ragflow/conf so restarts inside the same volume keep stable. +# ----------------------------------------------------------------------------- +# +function ensure_ragflow_secret_key() { + local key_file="${CONF_DIR}/.ragflow_secret_key" + + if [[ -n "${RAGFLOW_SECRET_KEY:-}" && ${#RAGFLOW_SECRET_KEY} -ge 32 ]]; then + export RAGFLOW_SECRET_KEY + return 0 + fi + + if [[ -f "${key_file}" ]]; then + RAGFLOW_SECRET_KEY="$(cat "${key_file}")" + else + RAGFLOW_SECRET_KEY="$("$PY" -c 'import secrets; print(secrets.token_hex(32))')" + echo -n "${RAGFLOW_SECRET_KEY}" > "${key_file}" + chmod 600 "${key_file}" || true + fi + + if [[ ${#RAGFLOW_SECRET_KEY} -lt 32 ]]; then + echo "ERROR: failed to initialize a strong RAGFLOW_SECRET_KEY" >&2 + return 1 + fi + + export RAGFLOW_SECRET_KEY +} + +function render_service_conf() { + local out_file="$1" + local ragflow_port="$2" + local admin_port="$3" + + rm -f "${out_file}" + while IFS= read -r line || [[ -n "$line" ]]; do + # shellcheck disable=SC2034 + SVR_HTTP_PORT="${ragflow_port}" ADMIN_SVR_HTTP_PORT="${admin_port}" \ + eval "echo \"$line\"" >> "${out_file}" + done < "${TEMPLATE_FILE}" +} export LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu/" PY=python3 @@ -208,6 +276,60 @@ function start_powerrag_server() { done & } +function _prepare_multi_ragflow_confs() { + # Render base service_conf.yaml (used by other processes that don't set RAGFLOW_SERVICE_CONF) + render_service_conf "${CONF_FILE}" "${SVR_HTTP_PORT}" "${ADMIN_SVR_HTTP_PORT}" + + # Create per-instance configs + local idx port conf_name conf_path + for (( idx=0; idx<${SVR_COUNT}; idx++ )); do + conf_name="service_conf_ragflow_${idx}.yaml" + conf_path="${CONF_DIR}/${conf_name}" + if [[ "${idx}" -eq 0 ]]; then + port="${SVR_HTTP_PORT}" + else + port=$((SVR_EXTRA_BASE_HTTP_PORT + idx - 1)) + fi + render_service_conf "${conf_path}" "${port}" "${ADMIN_SVR_HTTP_PORT}" + done +} + +function _start_ragflow_instance() { + local idx="$1" + local port="$2" + local conf_name="$3" + + echo "Starting ragflow_server[${idx}] on ${port} using conf/${conf_name} ..." + # Align with scripts/deploy.sh: + # - run without restart loop (process supervision is external to entrypoint) + # - set per-instance logfile basename so logs are split by port + RAGFLOW_SERVICE_CONF="${conf_name}" \ + RAGFLOW_LOG_BASENAME="ragflow_server_${port}" \ + "$PY" api/ragflow_server.py & +} + +function start_ragflow_servers() { + ensure_ragflow_secret_key + _prepare_multi_ragflow_confs + + # Generate nginx upstream include files so nginx can proxy/load-balance to all instances + : > /etc/nginx/conf.d/ragflow_upstream.conf + : > /etc/nginx/conf.d/admin_upstream.conf + echo "server 127.0.0.1:${ADMIN_SVR_HTTP_PORT};" >> /etc/nginx/conf.d/admin_upstream.conf + + local idx port conf_name + for (( idx=0; idx<${SVR_COUNT}; idx++ )); do + conf_name="service_conf_ragflow_${idx}.yaml" + if [[ "${idx}" -eq 0 ]]; then + port="${SVR_HTTP_PORT}" + else + port=$((SVR_EXTRA_BASE_HTTP_PORT + idx - 1)) + fi + echo "server 127.0.0.1:${port};" >> /etc/nginx/conf.d/ragflow_upstream.conf + _start_ragflow_instance "${idx}" "${port}" "${conf_name}" + done +} + function ensure_docling() { [[ "${USE_DOCLING}" == "true" ]] || { echo "[docling] disabled by USE_DOCLING"; return 0; } python3 -c 'import pip' >/dev/null 2>&1 || python3 -m ensurepip --upgrade || true @@ -257,15 +379,13 @@ ensure_docling ensure_mineru if [[ "${ENABLE_WEBSERVER}" -eq 1 ]]; then + echo "Starting ragflow_server..." + start_ragflow_servers + + # nginx upstream include files are generated by start_ragflow_servers; + # start nginx after generation so it picks them up (no reload needed). echo "Starting nginx..." /usr/sbin/nginx - - echo "Starting ragflow_server..." - while true; do - "$PY" api/ragflow_server.py & - wait; - sleep 1; - done & fi if [[ "${ENABLE_DATASYNC}" -eq 1 ]]; then diff --git a/docker/nginx/ragflow.conf b/docker/nginx/ragflow.conf index a06098276..e44ae21cc 100644 --- a/docker/nginx/ragflow.conf +++ b/docker/nginx/ragflow.conf @@ -1,3 +1,13 @@ +upstream ragflow_upstream { + # generated by entrypoint.sh + include /etc/nginx/conf.d/ragflow_upstream.conf; +} + +upstream admin_upstream { + # generated by entrypoint.sh + include /etc/nginx/conf.d/admin_upstream.conf; +} + server { listen 80; server_name _; @@ -11,12 +21,12 @@ server { gzip_disable "MSIE [1-6]\."; location ~ ^/api/v1/admin { - proxy_pass http://localhost:9381; + proxy_pass http://admin_upstream; include proxy.conf; } location ~ ^/(v1|api) { - proxy_pass http://localhost:9380; + proxy_pass http://ragflow_upstream; include proxy.conf; } diff --git a/docker/service_conf.yaml.template b/docker/service_conf.yaml.template index a46484043..208f8bc1c 100644 --- a/docker/service_conf.yaml.template +++ b/docker/service_conf.yaml.template @@ -1,9 +1,10 @@ ragflow: host: ${RAGFLOW_HOST:-0.0.0.0} - http_port: 9380 + http_port: ${SVR_HTTP_PORT:-9380} + secret_key: '${RAGFLOW_SECRET_KEY}' admin: host: ${RAGFLOW_HOST:-0.0.0.0} - http_port: 9381 + http_port: ${ADMIN_SVR_HTTP_PORT:-9381} mysql: name: '${MYSQL_DBNAME:-rag_flow}' user: '${MYSQL_USER:-root}' @@ -31,7 +32,7 @@ infinity: redis: db: 1 password: '${REDIS_PASSWORD:-infini_rag_flow}' - host: '${REDIS_HOST:-redis}:6379' + host: '${REDIS_HOST:-redis}:${REDIS_PORT:-6379}' oceanbase: scheme: 'mysql' # set 'mysql' to create connection using mysql config config: diff --git a/mcp/server/server.py b/mcp/server/server.py index 8d0d12c25..1cbeb29b0 100644 --- a/mcp/server/server.py +++ b/mcp/server/server.py @@ -34,6 +34,11 @@ import mcp.types as types from mcp.server.lowlevel import Server +try: + # In this repo, PYTHONPATH is set to project root so this import works. + from common.log_utils import init_root_logger +except Exception: # pragma: no cover + init_root_logger = None class LaunchMode(StrEnum): SELF_HOST = "self-host" @@ -629,6 +634,11 @@ def parse_bool_flag(key: str, default: bool) -> bool: TRANSPORT_STREAMABLE_HTTP_ENABLED = parse_bool_flag("RAGFLOW_MCP_TRANSPORT_STREAMABLE_ENABLED", transport_streamable_http_enabled) JSON_RESPONSE = parse_bool_flag("RAGFLOW_MCP_JSON_RESPONSE", json_response) + # Initialize file logging (avoid relying on nohup stdout redirection). + # Use port in basename for multi-instance clarity. + if init_root_logger is not None: + init_root_logger(f"mcp_server_{PORT}") + if MODE == LaunchMode.SELF_HOST and not HOST_API_KEY: raise click.UsageError("--api-key is required when --mode is 'self-host'") @@ -638,42 +648,41 @@ def parse_bool_flag(key: str, default: bool) -> bool: if not TRANSPORT_STREAMABLE_HTTP_ENABLED and JSON_RESPONSE: JSON_RESPONSE = False - print( + logging.info( r""" __ __ ____ ____ ____ _____ ______ _______ ____ | \/ |/ ___| _ \ / ___|| ____| _ \ \ / / ____| _ \ | |\/| | | | |_) | \___ \| _| | |_) \ \ / /| _| | |_) | | | | | |___| __/ ___) | |___| _ < \ V / | |___| _ < |_| |_|\____|_| |____/|_____|_| \_\ \_/ |_____|_| \_\ - """, - flush=True, + """ ) - print(f"MCP launch mode: {MODE}", flush=True) - print(f"MCP host: {HOST}", flush=True) - print(f"MCP port: {PORT}", flush=True) - print(f"MCP base_url: {BASE_URL}", flush=True) + logging.info("MCP launch mode: %s", MODE) + logging.info("MCP host: %s", HOST) + logging.info("MCP port: %s", PORT) + logging.info("MCP base_url: %s", BASE_URL) if not any([TRANSPORT_SSE_ENABLED, TRANSPORT_STREAMABLE_HTTP_ENABLED]): - print("At least one transport should be enabled, enable streamable-http automatically", flush=True) + logging.warning("At least one transport should be enabled, enable streamable-http automatically") TRANSPORT_STREAMABLE_HTTP_ENABLED = True if TRANSPORT_SSE_ENABLED: - print("SSE transport enabled: yes", flush=True) - print("SSE endpoint available at /sse", flush=True) + logging.info("SSE transport enabled: yes") + logging.info("SSE endpoint available at /sse") else: - print("SSE transport enabled: no", flush=True) + logging.info("SSE transport enabled: no") if TRANSPORT_STREAMABLE_HTTP_ENABLED: - print("Streamable HTTP transport enabled: yes", flush=True) - print("Streamable HTTP endpoint available at /mcp", flush=True) + logging.info("Streamable HTTP transport enabled: yes") + logging.info("Streamable HTTP endpoint available at /mcp") if JSON_RESPONSE: - print("Streamable HTTP mode: JSON response enabled", flush=True) + logging.info("Streamable HTTP mode: JSON response enabled") else: - print("Streamable HTTP mode: SSE over HTTP enabled", flush=True) + logging.info("Streamable HTTP mode: SSE over HTTP enabled") else: - print("Streamable HTTP transport enabled: no", flush=True) + logging.info("Streamable HTTP transport enabled: no") if JSON_RESPONSE: - print("Warning: --json-response ignored because streamable transport is disabled.", flush=True) + logging.warning("--json-response ignored because streamable transport is disabled.") uvicorn.run( create_starlette_app(), diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 000000000..af33b5a5a --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,222 @@ +# RAGFlow 脚本使用指南 + +本目录包含 RAGFlow 的运维部署脚本和工具脚本,用于管理服务部署和数据处理任务。 + +## 脚本说明 + +### 1. `deploy.sh` - 运维部署脚本(已按 `docker/entrypoint.sh` 重构) + +现在 `deploy.sh` 采用 **entrypoint 风格的组件开关参数**,用 `start/stop/status` 作为动作,用 `--enable/--disable/--xxx=` 作为配置。 + +**支持的动作:** +- `start` - 启动组件(默认启动 `webserver + taskexecutor`;datasync 默认不启动) +- `stop` - 停止本脚本启动的组件(基于 `pids/`) +- `restart` - stop + start +- `status` - 查看状态 +- `force-stop` - 强制停止相关进程(不依赖 PID 文件,谨慎使用) +- `help` - 查看帮助 + +**主要组件开关(对齐 `docker/entrypoint.sh`):** +- `--enable-webserver` - 启动 WebServer(`ragflow_server` + 可选 nginx 前端) +- `--disable-webserver` - 不启动 WebServer(`ragflow_server` + 可选 nginx 前端) +- `--enable-taskexecutor` - 启动 task executor +- `--disable-taskexecutor` - 不启动 task executor +- `--enable-datasync` - 启动 datasource sync(默认不启动) +- `--disable-datasync` - 不启动 datasource sync +- `--enable-mcpserver` - 启动 MCP Server +- `--enable-adminserver` - 启动 Admin Server +- `--enable-powerragserver` - 启动 PowerRAG Server + +> 规则:如果 `start` 时带了任意 `--enable-*` 参数,则进入 **enable-only 模式**:只启动被 enable 的组件,其它组件都不会启动。 +> 不带任何 `--enable-*` 时,走默认模式:启动 `webserver + taskexecutor`。 + +**多 ragflow_server 支持(对齐 `docker/entrypoint.sh`):** +- `--svr-count=`:实例数(默认 1) +- `--svr-http-port=`:实例 0 端口(默认 9380) +- `--svr-extra-base-http-port=`:实例 1..N 端口基数(默认 9400,即 9400、9401...) + + **注意**:ragflow_server 端口不能与 admin-svr-http-port、mcp-port、powerrag-port、web-port 冲突。脚本会在启动前检查端口冲突并报错。 +- `--admin-svr-http-port=`:写入生成的配置 `admin.http_port`(默认 9381) +- `--service-conf=`:基础配置文件(默认 `conf/service_conf.yaml`) + +> 说明:脚本会在 `conf/` 下生成 `service_conf_ragflow_.yaml`,并通过环境变量 `RAGFLOW_SERVICE_CONF` 启动对应实例。 + +**Task Executor(消费者)配置:** +- `--consumer-no-beg=` +- `--consumer-no-end=`:半开区间 `[beg, end)` +- `--workers=`:如果未指定 range,则启动固定数量 worker(默认 1) +- `--host-id=`:默认 `hostname`(长度 > 32 则 md5) + +**MCP 配置:** +- `--mcp-host=` +- `--mcp-port=` +- `--mcp-base-url=` +- `--mcp-mode=` +- `--mcp-host-api-key=` +- `--no-transport-sse-enabled` +- `--no-transport-streamable-http-enabled` +- `--no-json-response` + +**PowerRAG 配置:** +- `--powerrag-port=`(默认 6000) + +**兼容性:** +- 不再提供单独的 `start-web/stop-web` 命令;前端 nginx(静态 + 反代 API)随 `webserver` 一起启动。 + +### 2. `tools.sh` - 工具脚本 + +用于执行数据上传和处理相关的工具任务。 + +**支持的命令:** +- `upload-wiki` - 上传 Wiki JSON 数据(后台运行,支持断点续传) +- `stop-upload-wiki` - 停止 Wiki JSON 上传任务 +- `reparse-failed` - 重新解析指定数据集中失败的文档 +- `stop-reparse-failed` - 停止重新解析失败文档任务 +- `status` - 查看工具任务状态 + +## 快速开始 + +### 运维部署(deploy.sh) + +```bash +# 启动默认组件:webserver + taskexecutor(datasync 默认不启动) +./scripts/deploy.sh start + +# 查看状态 +./scripts/deploy.sh status + +# 停止 +./scripts/deploy.sh stop + +# 强制停止(不依赖 pid 文件,谨慎使用) +./scripts/deploy.sh force-stop + +# 清理运行时生成文件(会先 stop,再删除 logs/、pids/、nginx_conf/、conf/service_conf_ragflow_*.yaml) +./scripts/deploy.sh clear +``` + +#### 启动多个 ragflow_server 和 多个 task executors +```bash +./scripts/deploy.sh start \ + --svr-count=2 \ + --svr-http-port=9380 \ + --svr-extra-base-http-port=9400 \ + --workers=2 +``` + +#### 仅启动 ragflow_server(不启动 worker/datasync) + +```bash +./scripts/deploy.sh start --enable-webserver +``` + +#### 启动多个 ragflow_server 实例(多端口) + +```bash +./scripts/deploy.sh start \ + --enable-webserver + --svr-count=3 \ + --svr-http-port=9380 \ + --svr-extra-base-http-port=9400 +``` + +#### 启动 task executors(固定数量) + +```bash +./scripts/deploy.sh start --enable-taskexecutor --workers=2 +``` + +#### 启动 task executors(range 模式) + +```bash +./scripts/deploy.sh start --enable-taskexecutor\ + --consumer-no-beg=0 --consumer-no-end=5 \ + --host-id=myhost123 +``` + +#### 启动 MCP / Admin / PowerRAG + +```bash +./scripts/deploy.sh start --enable-mcpserver --enable-adminserver --enable-powerragserver +./scripts/deploy.sh start --enable-powerragserver --powerrag-port=6000 +``` + +### 工具脚本(tools.sh) + +```bash +# 上传 Wiki JSON 数据 +./scripts/tools.sh upload-wiki + +# 使用自定义参数上传 +API_KEY=xxx HOST=xxx WIKI_DATA_DIR=xxx BATCH_SIZE=1000 ./scripts/tools.sh upload-wiki +WIKI_ENABLE_RESUME=false ./scripts/tools.sh upload-wiki + +# 停止上传任务 +./scripts/tools.sh stop-upload-wiki + +# 重新解析失败的文档(需要设置数据集 ID) +API_KEY=xxx HOST=xxx DATASET_ID=xxx BATCH_SIZE=1000 ./scripts/tools.sh reparse-failed + +# 停止重新解析任务 +./scripts/tools.sh stop-reparse-failed + +# 查看工具任务状态 +./scripts/tools.sh status +``` + +## 日志与 PID + +### 服务日志(默认在 `logs/`) + +**注意**:服务日志由各服务通过 `init_root_logger()` 自行管理,脚本不再重复记录日志。 + +- `logs/ragflow_server_{port}.log` - RAGFlow 服务日志(按端口号区分,例如:`logs/ragflow_server_9380.log`) +- `logs/task_executor_{id}.log` - Task executor 日志(例如:`logs/task_executor_0.log`) +- `logs/data_sync_{consumer_no}.log` - Data sync 日志 +- `logs/admin_service.log` - Admin 服务日志 +- `logs/powerrag_server.log` - PowerRAG 服务日志 +- `logs/nginx_access.log` - Nginx 访问日志 +- `logs/nginx_error.log` - Nginx 错误日志 +- `logs/web_frontend.log` - Nginx 启动日志(仅启动时的输出) + +### PID 文件(默认在 `pids/`) + +- `pids/ragflow_server_.pid` +- `pids/task_executor_.pid` +- `pids/datasync.pid` +- `pids/admin_server.pid` +- `pids/mcp_server.pid` +- `pids/powerrag_server.pid` +- `pids/web_frontend.pid` + +## 查看日志 + +```bash +# RAGFlow 服务(根据端口号) +tail -f logs/ragflow_server_9380.log +tail -f logs/ragflow_server_9400.log # 如果有多个实例 + +# Task executor(根据实际的 host_id 和 consumer_id) +# Task executor(worker id) +tail -f logs/task_executor_0.log + +# Data sync +tail -f logs/data_sync_0.log + +# Admin 服务 +tail -f logs/admin_service.log + +# PowerRAG 服务 +tail -f logs/powerrag_server.log + +# Nginx 日志 +tail -f logs/nginx_access.log +tail -f logs/nginx_error.log +``` + +## 注意事项 + +1. 推荐从项目根目录运行:`./scripts/deploy.sh ...` +2. `force-stop` 会强制 kill 相关进程,请谨慎使用 +3. 多实例 `ragflow_server` 通过 `RAGFLOW_SERVICE_CONF` 启动,不再需要替换 `local.service_conf.yaml` +4. **端口配置**:设置 ragflow_server 端口时,需要预留 admin-svr-http-port(默认 9381)、mcp-port(默认 9382)等端口,避免冲突。脚本会在启动前检查并报错。 diff --git a/scripts/deploy.sh b/scripts/deploy.sh new file mode 100755 index 000000000..84cc6ac2f --- /dev/null +++ b/scripts/deploy.sh @@ -0,0 +1,1084 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# ----------------------------------------------------------------------------- +# RAGFlow 服务部署脚本(运维相关) +# +# 目标:参考 docker/entrypoint.sh 的“组件开关 + 多实例 + 端口参数”模式重构。 +# +# - 默认:启动 webserver(ragflow_server + 可选 nginx)、taskexecutor +# - 可选:MCP / Admin / PowerRAG +# - 支持:多 ragflow_server(通过 RAGFLOW_SERVICE_CONF 指向不同 conf 文件) +# ----------------------------------------------------------------------------- + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +WORKSPACE_FOLDER="$(cd "${SCRIPT_DIR}/.." && pwd)" + +PYTHON="${PYTHON:-${WORKSPACE_FOLDER}/.venv/bin/python}" + +RAGFLOW_SERVER_PY="${WORKSPACE_FOLDER}/api/ragflow_server.py" +TASK_EXECUTOR_PY="${WORKSPACE_FOLDER}/rag/svr/task_executor.py" +DATASYNC_PY="${WORKSPACE_FOLDER}/rag/svr/sync_data_source.py" +ADMIN_SERVER_PY="${WORKSPACE_FOLDER}/admin/server/admin_server.py" +MCP_SERVER_PY="${WORKSPACE_FOLDER}/mcp/server/server.py" +POWERRAG_SERVER_PY="${WORKSPACE_FOLDER}/powerrag/server/powerrag_server.py" + +CONF_DIR="${WORKSPACE_FOLDER}/conf" +GLOBAL_SERVICE_CONF="${GLOBAL_SERVICE_CONF:-local.service_conf.yaml}" + +WEB_DIR="${WORKSPACE_FOLDER}/web" +NGINX_CONF_DIR="${WORKSPACE_FOLDER}/nginx_conf" + +LOG_DIR="${WORKSPACE_FOLDER}/logs" +PID_DIR="${WORKSPACE_FOLDER}/pids" +mkdir -p "${LOG_DIR}" "${PID_DIR}" "${NGINX_CONF_DIR}" + +# ----------------------------------------------------------------------------- +# Stable SECRET_KEY for auth token signing across multiple ragflow_server instances +# +# Why: +# - Login returns a signed token in `Authorization` header. +# - If multiple ragflow_server processes use different SECRET_KEY, nginx/clients +# will hit different instances and get "Signature ... does not match" -> 401, +# causing frontend to jump back to login. +# +# Strategy: +# - Prefer externally provided env RAGFLOW_SECRET_KEY (>= 32 chars). +# - Otherwise generate ONE and persist to conf/.ragflow_secret_key, then export it +# so all child processes started by this script share it. +# ----------------------------------------------------------------------------- +function ensure_ragflow_secret_key() { + local key_file="${CONF_DIR}/.ragflow_secret_key" + + if [[ -n "${RAGFLOW_SECRET_KEY:-}" && ${#RAGFLOW_SECRET_KEY} -ge 32 ]]; then + export RAGFLOW_SECRET_KEY + return 0 + fi + + if [[ -f "${key_file}" ]]; then + RAGFLOW_SECRET_KEY="$(cat "${key_file}")" + else + _require_python + RAGFLOW_SECRET_KEY="$("${PYTHON}" -c 'import secrets; print(secrets.token_hex(32))')" + echo -n "${RAGFLOW_SECRET_KEY}" > "${key_file}" + chmod 600 "${key_file}" 2>/dev/null || true + fi + + if [[ ${#RAGFLOW_SECRET_KEY} -lt 32 ]]; then + echo "ERROR: failed to initialize a strong RAGFLOW_SECRET_KEY" >&2 + return 1 + fi + + export RAGFLOW_SECRET_KEY +} + +# ----------------------------------------------------------------------------- +# Defaults (aligned with docker/entrypoint.sh) +# ----------------------------------------------------------------------------- +ENABLE_WEBSERVER="${ENABLE_WEBSERVER:-1}" +ENABLE_TASKEXECUTOR="${ENABLE_TASKEXECUTOR:-1}" +ENABLE_DATASYNC="${ENABLE_DATASYNC:-0}" +ENABLE_MCP_SERVER="${ENABLE_MCP_SERVER:-0}" +ENABLE_ADMIN_SERVER="${ENABLE_ADMIN_SERVER:-0}" +ENABLE_POWERRAG_SERVER="${ENABLE_POWERRAG_SERVER:-0}" + +CONSUMER_NO_BEG="${CONSUMER_NO_BEG:-0}" +CONSUMER_NO_END="${CONSUMER_NO_END:-0}" +WORKERS="${WORKERS:-1}" + +# +# Env vars: +# - SVR_COUNT +# - SVR_HTTP_PORT +# - SVR_EXTRA_BASE_HTTP_PORT +# - ADMIN_SVR_HTTP_PORT +SVR_COUNT="${SVR_COUNT:-1}" +SVR_HTTP_PORT="${SVR_HTTP_PORT:-9380}" +SVR_EXTRA_BASE_HTTP_PORT="${SVR_EXTRA_BASE_HTTP_PORT:-9400}" +ADMIN_SVR_HTTP_PORT="${ADMIN_SVR_HTTP_PORT:-9381}" + +MCP_HOST="${MCP_HOST:-127.0.0.1}" +MCP_PORT="${MCP_PORT:-9382}" +MCP_BASE_URL="${MCP_BASE_URL:-http://127.0.0.1:${SVR_HTTP_PORT}}" +MCP_MODE="${MCP_MODE:-self-host}" +MCP_HOST_API_KEY="${MCP_HOST_API_KEY:-}" +MCP_TRANSPORT_SSE_FLAG="${MCP_TRANSPORT_SSE_FLAG:---transport-sse-enabled}" +MCP_TRANSPORT_STREAMABLE_HTTP_FLAG="${MCP_TRANSPORT_STREAMABLE_HTTP_FLAG:---transport-streamable-http-enabled}" +MCP_JSON_RESPONSE_FLAG="${MCP_JSON_RESPONSE_FLAG:---json-response}" + +POWERRAG_PORT="${POWERRAG_PORT:-6000}" + +# Web frontend (nginx) optional +WEB_PORT="${WEB_PORT:-9222}" +SERVER_HOST_FOR_WEB="${SERVER_HOST_FOR_WEB:-127.0.0.1}" +ADMIN_HOST_FOR_WEB="${ADMIN_HOST_FOR_WEB:-127.0.0.1}" + +# ----------------------------------------------------------------------------- +# Utilities +# ----------------------------------------------------------------------------- +function usage() { + cat <<'EOF' +用法: + ./scripts/deploy.sh [start|stop|restart|status|force-stop|clear|help] [options] + +说明: + - start: 启动组件 + - 默认模式:不带任何 --enable-* 时,启动 webserver + taskexecutor(datasync 默认不启动) + - enable-only 模式:只要带了任意 --enable-*,则只启动被 enable 的组件,其它全部不启动 + - stop: 停止本脚本启动的组件(基于 pids/) + - restart: stop + start + - status: 查看状态 + - force-stop: 强制杀进程(不依赖 pid 文件,谨慎使用) + - clear: 停止并清理运行时生成文件(logs/、pids/、nginx_conf/、conf/service_conf_ragflow_*.yaml) + +核心 options(参考 docker/entrypoint.sh): + --enable-webserver + --disable-webserver + --enable-taskexecutor + --disable-taskexecutor + --enable-datasync + --disable-datasync + --enable-mcpserver + --enable-adminserver + --enable-powerragserver + +Task executor options: + --consumer-no-beg= + --consumer-no-end= # 半开区间 [beg, end) + --workers= # 如果未指定 range,则启动 workers 个 + --host-id= # 默认:hostname(<=32) 否则 md5(hostname) + +Multi ragflow_server: + --svr-count= # SVR_COUNT + --svr-http-port= # SVR_HTTP_PORT (idx=0 端口) + --svr-extra-base-http-port= # SVR_EXTRA_BASE_HTTP_PORT (idx>=1: base+(idx-1)) + --admin-svr-http-port= # ADMIN_SVR_HTTP_PORT (写入 per-instance conf 里的 admin.http_port) + --service-conf= # 基础 conf 文件(默认: service_conf.yaml) + +MCP options: + --mcp-host= + --mcp-port= + --mcp-base-url= + --mcp-mode= + --mcp-host-api-key= + --no-transport-sse-enabled + --no-transport-streamable-http-enabled + --no-json-response + +PowerRAG options: + --powerrag-port= + +说明: + - webserver 组件会尝试启动 ragflow_server + nginx(前端静态 + API 反代)(若存在 web/ 目录)。 +EOF +} + +function is_process_running() { + local pid="${1:-}" + [[ -n "${pid}" ]] && ps -p "${pid}" >/dev/null 2>&1 +} + +function _default_host_id() { + local hn + hn="$(hostname)" + if [[ ${#hn} -le 32 ]]; then + echo "${hn}" + return 0 + fi + if command -v md5sum >/dev/null 2>&1; then + echo -n "${hn}" | md5sum | awk '{print $1}' + return 0 + fi + "${PYTHON}" - <&2 + echo "Hint: run ./scripts/setup_venv.sh or set PYTHON=/path/to/python" >&2 + exit 1 + fi +} + +function _jemalloc_preload_env() { + # best-effort: return "LD_PRELOAD=..." if available + if command -v pkg-config >/dev/null 2>&1 && pkg-config --exists jemalloc >/dev/null 2>&1; then + local libdir + libdir="$(pkg-config --variable=libdir jemalloc 2>/dev/null || true)" + if [[ -n "${libdir}" && -f "${libdir}/libjemalloc.so" ]]; then + echo "LD_PRELOAD=${libdir}/libjemalloc.so" + return 0 + fi + fi + if [[ -f "/usr/lib64/libjemalloc.so" ]]; then + echo "LD_PRELOAD=/usr/lib64/libjemalloc.so" + return 0 + fi + echo "" +} + +function _common_env_kv() { + # keep previous local defaults; can be overridden externally + local jemalloc_kv + jemalloc_kv="$(_jemalloc_preload_env)" + + # Ensure a stable secret key for all python processes started by this script. + ensure_ragflow_secret_key + echo "RAGFLOW_SECRET_KEY=${RAGFLOW_SECRET_KEY}" + + echo "PYTHONPATH=${WORKSPACE_FOLDER}" + echo "DOC_ENGINE=${DOC_ENGINE:-oceanbase}" + echo "CACHE_TYPE=${CACHE_TYPE:-redis}" + echo "STORAGE_IMPL=${STORAGE_IMPL:-OPENDAL}" + echo "NLTK_DATA=${WORKSPACE_FOLDER}/nltk_data" + echo "CHROME_DIR=${WORKSPACE_FOLDER}/chrome-linux64" + echo "CHROMEDRIVER_DIR=${WORKSPACE_FOLDER}/chromedriver-linux64" + echo "TIKA_SERVER_JAR=${WORKSPACE_FOLDER}/tika-server-standard-3.0.0.jar" + echo "HUGGINGFACE_DIR=${WORKSPACE_FOLDER}/huggingface.co" + echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-/usr/lib/x86_64-linux-gnu/:/usr/lib64/}" + echo "TIKTOKEN_CACHE_DIR=${WORKSPACE_FOLDER}" + echo "LIGHTEN=${LIGHTEN:-1}" + echo "http_proxy=" + echo "https_proxy=" + echo "no_proxy=" + echo "HTTP_PROXY=" + echo "HTTPS_PROXY=" + echo "NO_PROXY=" + if [[ -n "${jemalloc_kv}" ]]; then + echo "${jemalloc_kv}" + fi +} + +function _start_process() { + local name="$1"; shift + local pid_file="$1"; shift + local workdir="$1"; shift + local -a cmd=( "$@" ) + + if [[ -f "${pid_file}" ]]; then + local pid + pid="$(cat "${pid_file}" 2>/dev/null || true)" + if is_process_running "${pid}"; then + echo "[skip] ${name} already running (PID: ${pid})" + return 0 + fi + fi + + mkdir -p "$(dirname "${pid_file}")" + + # quote cmd for bash -c + local cmd_quoted="" + local arg + for arg in "${cmd[@]}"; do + cmd_quoted+="$(printf '%q ' "${arg}")" + done + + # Run in background without restart loop. + # Note: Most services manage their own logs via init_root_logger() (file + stream). + # We discard stdout/stderr here to avoid duplicate logging (when stdout is redirected + # to another file) and keep only the service-managed logs under logs/. + nohup bash -c " + set -euo pipefail + cd $(printf '%q' "${workdir}") + ${cmd_quoted} + " >/dev/null 2>&1 & + + local bg_pid=$! + echo "${bg_pid}" > "${pid_file}" + echo "[ok] started ${name} (PID: ${bg_pid})" +} + +function _stop_by_pidfile() { + local name="$1" + local pid_file="$2" + if [[ ! -f "${pid_file}" ]]; then + echo "[skip] ${name} not running (no pid file)" + return 0 + fi + local pid + pid="$(cat "${pid_file}" 2>/dev/null || true)" + if is_process_running "${pid}"; then + echo "[stop] ${name} (PID: ${pid})" + kill "${pid}" 2>/dev/null || true + sleep 0.5 + if is_process_running "${pid}"; then + kill -9 "${pid}" 2>/dev/null || true + fi + else + echo "[skip] ${name} not running (stale pid: ${pid})" + fi + rm -f "${pid_file}" +} + +function _validate_port() { + local port="$1" + [[ "${port}" =~ ^[0-9]+$ ]] && [[ "${port}" -ge 1 ]] && [[ "${port}" -le 65535 ]] +} + +function _check_port_conflicts() { + local -a reserved_ports=() + local -a ragflow_ports=() + local port idx + + # Collect reserved ports (admin, mcp, powerrag, web) + reserved_ports+=("${ADMIN_SVR_HTTP_PORT}") + reserved_ports+=("${MCP_PORT}") + reserved_ports+=("${POWERRAG_PORT}") + reserved_ports+=("${WEB_PORT}") + + # Collect ragflow_server ports + ragflow_ports+=("${SVR_HTTP_PORT}") + for (( idx=1; idx<${SVR_COUNT}; idx++ )); do + port=$((SVR_EXTRA_BASE_HTTP_PORT + idx - 1)) + ragflow_ports+=("${port}") + done + + # Check ragflow ports against reserved ports + for port in "${ragflow_ports[@]}"; do + for reserved in "${reserved_ports[@]}"; do + if [[ "${port}" -eq "${reserved}" ]]; then + echo "ERROR: Port conflict detected: ragflow_server port ${port} conflicts with reserved port ${reserved}" >&2 + if [[ "${port}" -eq "${ADMIN_SVR_HTTP_PORT}" ]]; then + echo " Hint: SVR_HTTP_PORT or SVR_EXTRA_BASE_HTTP_PORT should not equal ADMIN_SVR_HTTP_PORT (${ADMIN_SVR_HTTP_PORT})" >&2 + elif [[ "${port}" -eq "${MCP_PORT}" ]]; then + echo " Hint: ragflow_server port conflicts with MCP_PORT (${MCP_PORT})" >&2 + elif [[ "${port}" -eq "${POWERRAG_PORT}" ]]; then + echo " Hint: ragflow_server port conflicts with POWERRAG_PORT (${POWERRAG_PORT})" >&2 + elif [[ "${port}" -eq "${WEB_PORT}" ]]; then + echo " Hint: ragflow_server port conflicts with WEB_PORT (${WEB_PORT})" >&2 + fi + return 1 + fi + done + done + + # Check for duplicates within ragflow ports + local -a seen=() + for port in "${ragflow_ports[@]}"; do + for seen_port in "${seen[@]}"; do + if [[ "${port}" -eq "${seen_port}" ]]; then + echo "ERROR: Duplicate ragflow_server port detected: ${port}" >&2 + echo " Hint: SVR_COUNT=${SVR_COUNT}, SVR_HTTP_PORT=${SVR_HTTP_PORT}, SVR_EXTRA_BASE_HTTP_PORT=${SVR_EXTRA_BASE_HTTP_PORT}" >&2 + return 1 + fi + done + seen+=("${port}") + done + + return 0 +} + +# ----------------------------------------------------------------------------- +# Config generation (per-instance service conf) +# ----------------------------------------------------------------------------- +function _render_service_conf_copy() { + local out_file="$1" + local ragflow_port="$2" + local admin_port="$3" + local base_file="${CONF_DIR}/${GLOBAL_SERVICE_CONF}" + + if [[ ! -f "${base_file}" ]]; then + echo "ERROR: base service conf not found: ${base_file}" >&2 + exit 1 + fi + + "${PYTHON}" - </dev/null || true + fi + + # env args (newline-separated key=value) + local -a env_args + mapfile -t env_args < <(_common_env_kv) + + echo "[start] ragflow_server port=${port} conf=${conf_name}" + _start_process \ + "ragflow_server:${port}" \ + "${pid_file}" \ + "${WORKSPACE_FOLDER}" \ + env "${env_args[@]}" "RAGFLOW_SERVICE_CONF=${conf_name}" "RAGFLOW_LOG_BASENAME=ragflow_server_${port}" \ + "${PYTHON}" "${RAGFLOW_SERVER_PY}" + done +} + +function start_datasync() { + _require_python + local pid_file="${PID_DIR}/datasync.pid" + local -a env_args + mapfile -t env_args < <(_common_env_kv) + _start_process \ + "datasync" \ + "${pid_file}" \ + "${WORKSPACE_FOLDER}" \ + env "${env_args[@]}" "${PYTHON}" "${DATASYNC_PY}" +} + +function start_admin_server() { + _require_python + local pid_file="${PID_DIR}/admin_server.pid" + local -a env_args + mapfile -t env_args < <(_common_env_kv) + _start_process \ + "admin_server" \ + "${pid_file}" \ + "${WORKSPACE_FOLDER}" \ + env "${env_args[@]}" "${PYTHON}" "${ADMIN_SERVER_PY}" +} + +function start_mcp_server() { + _require_python + local pid_file="${PID_DIR}/mcp_server.pid" + local -a env_args + mapfile -t env_args < <(_common_env_kv) + _start_process \ + "mcp_server" \ + "${pid_file}" \ + "${WORKSPACE_FOLDER}" \ + env "${env_args[@]}" \ + "${PYTHON}" "${MCP_SERVER_PY}" \ + --host="${MCP_HOST}" \ + --port="${MCP_PORT}" \ + --base-url="${MCP_BASE_URL}" \ + --mode="${MCP_MODE}" \ + --api-key="${MCP_HOST_API_KEY}" \ + "${MCP_TRANSPORT_SSE_FLAG}" \ + "${MCP_TRANSPORT_STREAMABLE_HTTP_FLAG}" \ + "${MCP_JSON_RESPONSE_FLAG}" +} + +function start_powerrag_server() { + _require_python + local pid_file="${PID_DIR}/powerrag_server.pid" + local -a env_args + mapfile -t env_args < <(_common_env_kv) + _start_process \ + "powerrag_server" \ + "${pid_file}" \ + "${WORKSPACE_FOLDER}" \ + env "${env_args[@]}" "${PYTHON}" "${POWERRAG_SERVER_PY}" --port="${POWERRAG_PORT}" +} + +function start_task_executor() { + _require_python + local consumer_id="$1" + local pid_file="${PID_DIR}/task_executor_${consumer_id}.pid" + local -a env_args + mapfile -t env_args < <(_common_env_kv) + _start_process \ + "task_executor[${consumer_id}]" \ + "${pid_file}" \ + "${WORKSPACE_FOLDER}" \ + env "${env_args[@]}" "${PYTHON}" "${TASK_EXECUTOR_PY}" "${consumer_id}" +} + +function start_task_executors() { + if [[ "${CONSUMER_NO_END}" -gt "${CONSUMER_NO_BEG}" ]]; then + echo "[start] task executors range=[${CONSUMER_NO_BEG},${CONSUMER_NO_END})" + local i + for (( i=CONSUMER_NO_BEG; i/dev/null || true)" + if is_process_running "${pid}"; then + echo "[skip] web frontend already running (PID: ${pid}, PORT: ${WEB_PORT})" + return 0 + fi + fi + + if [[ ! -d "${WEB_DIR}" ]]; then + echo "[warn] web dir not found: ${WEB_DIR}; skip nginx frontend/proxy" >&2 + return 0 + fi + if ! _validate_port "${WEB_PORT}"; then + echo "ERROR: invalid WEB_PORT=${WEB_PORT}" >&2 + return 1 + fi + if ! command -v nginx >/dev/null 2>&1; then + echo "[warn] nginx not found in PATH; skip nginx frontend/proxy" >&2 + return 0 + fi + + # If dist missing, try build (best-effort) + if [[ ! -d "${WEB_DIR}/dist" ]]; then + if command -v npm >/dev/null 2>&1; then + echo "[web] dist missing, running build..." + if ! (cd "${WEB_DIR}" && npm install && npm run build); then + echo "[warn] web build failed; skip nginx frontend/proxy" >&2 + return 0 + fi + else + echo "[warn] ${WEB_DIR}/dist not found and npm not available; skip nginx frontend/proxy" >&2 + return 0 + fi + fi + + local server_port_for_web="${SVR_HTTP_PORT}" + local admin_port_for_web="${ADMIN_SVR_HTTP_PORT}" + + # nginx temp dirs (must be writable for non-root runs) + local nginx_tmp_dir="${NGINX_CONF_DIR}/tmp" + mkdir -p "${nginx_tmp_dir}/client_body" "${nginx_tmp_dir}/proxy" "${nginx_tmp_dir}/fastcgi" "${nginx_tmp_dir}/uwsgi" "${nginx_tmp_dir}/scgi" + + cat > "${NGINX_CONF_DIR}/ragflow.conf" < "${NGINX_CONF_DIR}/nginx.conf" </dev/null 2>&1; then + echo "[warn] nginx config invalid; skip nginx frontend/proxy" >&2 + nginx -t -c "${NGINX_CONF_DIR}/nginx.conf" -e "${LOG_DIR}/nginx_error.log" -g "pid ${PID_DIR}/nginx.pid;" >&2 || true + return 0 + fi + + echo "[start] web frontend nginx (PORT: ${WEB_PORT})" + nginx -c "${NGINX_CONF_DIR}/nginx.conf" -e "${LOG_DIR}/nginx_error.log" -g "pid ${PID_DIR}/nginx.pid;" > "${LOG_DIR}/web_frontend.log" 2>&1 + + if [[ -f "${PID_DIR}/nginx.pid" ]]; then + local web_pid + web_pid="$(cat "${PID_DIR}/nginx.pid")" + echo "${web_pid}" > "${pid_file}" + echo "[ok] web frontend started (PID: ${web_pid})" + else + echo "[warn] nginx.pid not found; check ${LOG_DIR}/web_frontend.log" >&2 + return 0 + fi +} + +function stop_web() { + _stop_by_pidfile "web_frontend" "${PID_DIR}/web_frontend.pid" + # also cleanup nginx pid if present + if [[ -f "${PID_DIR}/nginx.pid" ]]; then + local pid + pid="$(cat "${PID_DIR}/nginx.pid" 2>/dev/null || true)" + if is_process_running "${pid}"; then + nginx -s quit -c "${NGINX_CONF_DIR}/nginx.conf" 2>/dev/null || kill "${pid}" 2>/dev/null || true + fi + rm -f "${PID_DIR}/nginx.pid" + fi +} + +# ----------------------------------------------------------------------------- +# Stop/Status/Force-stop +# ----------------------------------------------------------------------------- +function stop_all() { + # reverse-ish order + _stop_by_pidfile "powerrag_server" "${PID_DIR}/powerrag_server.pid" + _stop_by_pidfile "mcp_server" "${PID_DIR}/mcp_server.pid" + _stop_by_pidfile "admin_server" "${PID_DIR}/admin_server.pid" + _stop_by_pidfile "datasync" "${PID_DIR}/datasync.pid" + + # task executors + local f + for f in "${PID_DIR}"/task_executor_*.pid; do + [[ -f "${f}" ]] || continue + _stop_by_pidfile "task_executor" "${f}" + done + + # ragflow servers + for f in "${PID_DIR}"/ragflow_server_*.pid; do + [[ -f "${f}" ]] || continue + _stop_by_pidfile "ragflow_server" "${f}" + done + rm -f "${PID_DIR}/ragflow_server.pid" 2>/dev/null || true + + # optional web frontend nginx + stop_web || true +} + +function force_stop_all() { + echo "=== force-stop: killing related processes (best-effort) ===" + pkill -f "${RAGFLOW_SERVER_PY}" 2>/dev/null || true + pkill -f "${TASK_EXECUTOR_PY}" 2>/dev/null || true + pkill -f "${DATASYNC_PY}" 2>/dev/null || true + pkill -f "${ADMIN_SERVER_PY}" 2>/dev/null || true + pkill -f "${MCP_SERVER_PY}" 2>/dev/null || true + pkill -f "${POWERRAG_SERVER_PY}" 2>/dev/null || true + pkill -f "${NGINX_CONF_DIR}/ragflow.conf" 2>/dev/null || true + rm -f "${PID_DIR}"/*.pid 2>/dev/null || true +} + +function clear_runtime_files() { + echo "=== clear: stop services and remove generated logs/configs/pids (best-effort) ===" + + # stop services started by this script (based on pids/) + stop_all || true + + # generated per-instance service confs + rm -f "${CONF_DIR}"/service_conf_ragflow_*.yaml 2>/dev/null || true + + # remove runtime dirs entirely (user expectation for clear) + rm -rf "${NGINX_CONF_DIR}" 2>/dev/null || true + rm -rf "${PID_DIR}" 2>/dev/null || true + rm -rf "${LOG_DIR}" 2>/dev/null || true + + echo "[ok] cleared: logs/, pids/, nginx_conf/, conf/service_conf_ragflow_*.yaml" +} + +function status() { + echo "=== status ===" + + echo "config:" + echo " - service_conf(base) = conf/${GLOBAL_SERVICE_CONF}" + echo " - ragflow main port = ${SVR_HTTP_PORT}" + echo " - ragflow extra base = ${SVR_EXTRA_BASE_HTTP_PORT}" + echo " - admin port = ${ADMIN_SVR_HTTP_PORT}" + echo " - mcp port = ${MCP_PORT}" + echo " - web port = ${WEB_PORT}" + + # ragflow + echo "ragflow_server:" + local any=0 + local f pid port idx conf_name conf_path log_path + for f in "${PID_DIR}"/ragflow_server_*.pid; do + [[ -f "${f}" ]] || continue + port="$(basename "${f}" | sed 's/ragflow_server_\(.*\)\.pid/\1/')" + pid="$(cat "${f}" 2>/dev/null || true)" + + # best-effort infer conf name from port + conf_name="(unknown)" + if [[ "${port}" == "${SVR_HTTP_PORT}" ]]; then + conf_name="service_conf_ragflow_0.yaml" + elif [[ "${port}" =~ ^[0-9]+$ ]]; then + idx=$(( port - SVR_EXTRA_BASE_HTTP_PORT + 1 )) + if [[ "${idx}" -ge 1 ]]; then + conf_name="service_conf_ragflow_${idx}.yaml" + fi + fi + conf_path="conf/${conf_name}" + log_path="logs/ragflow_server_${port}.log" + + if is_process_running "${pid}"; then + any=1 + echo " - [ok] port=${port} pid=${pid} conf=${conf_path} log=${log_path}" + else + echo " - [down] port=${port} pid=${pid} conf=${conf_path} log=${log_path}" + fi + done + if [[ "${any}" -eq 0 ]]; then + echo " - (none)" + fi + + # web frontend (nginx) + echo "web_frontend:" + local web_pf="${PID_DIR}/web_frontend.pid" + if [[ -f "${web_pf}" ]]; then + pid="$(cat "${web_pf}" 2>/dev/null || true)" + if is_process_running "${pid}"; then + echo " - [ok] pid=${pid} port=${WEB_PORT} conf=nginx_conf/nginx.conf access_log=logs/nginx_access.log error_log=logs/nginx_error.log" + else + echo " - [down] pid=${pid} port=${WEB_PORT} conf=nginx_conf/nginx.conf access_log=logs/nginx_access.log error_log=logs/nginx_error.log" + fi + else + echo " - (disabled/not started)" + fi + + # task executors + echo "task_executor:" + any=0 + local args consumer_arg logf + for f in "${PID_DIR}"/task_executor_*.pid; do + [[ -f "${f}" ]] || continue + pid="$(cat "${f}" 2>/dev/null || true)" + if is_process_running "${pid}"; then + any=1 + args="$(ps -p "${pid}" -o args= 2>/dev/null || true)" + consumer_arg="$(echo "${args}" | awk '{print $NF}')" + # If no consumer arg provided, fallback to pid-file id. + if [[ -z "${consumer_arg}" || "${consumer_arg}" == *".py" ]]; then + consumer_arg="$(basename "${f}" | sed 's/task_executor_\(.*\)\.pid/\1/')" + fi + logf="logs/task_executor_${consumer_arg}.log" + echo " - [ok] id=$(basename "${f}") pid=${pid} log=${logf}" + else + echo " - [down] id=$(basename "${f}") pid=${pid}" + fi + done + if [[ "${any}" -eq 0 ]]; then + echo " - (none)" + fi + + # datasync + echo "datasync:" + local ds_pf="${PID_DIR}/datasync.pid" + if [[ -f "${ds_pf}" ]]; then + pid="$(cat "${ds_pf}" 2>/dev/null || true)" + if is_process_running "${pid}"; then + args="$(ps -p "${pid}" -o args= 2>/dev/null || true)" + consumer_arg="$(echo "${args}" | awk '{print $NF}')" + if [[ -z "${consumer_arg}" || "${consumer_arg}" == *".py" ]]; then + consumer_arg="0" + fi + logf="logs/data_sync_${consumer_arg}.log" + echo " - [ok] pid=${pid} log=${logf}" + else + echo " - [down] pid=${pid}" + fi + else + echo " - (disabled/not started)" + fi + + # admin + echo "admin_server:" + local ad_pf="${PID_DIR}/admin_server.pid" + if [[ -f "${ad_pf}" ]]; then + pid="$(cat "${ad_pf}" 2>/dev/null || true)" + if is_process_running "${pid}"; then + echo " - [ok] pid=${pid} port=${ADMIN_SVR_HTTP_PORT} log=logs/admin_service.log" + else + echo " - [down] pid=${pid} port=${ADMIN_SVR_HTTP_PORT} log=logs/admin_service.log" + fi + else + echo " - (disabled/not started)" + fi + + # mcp + echo "mcp_server:" + local mcp_pf="${PID_DIR}/mcp_server.pid" + if [[ -f "${mcp_pf}" ]]; then + pid="$(cat "${mcp_pf}" 2>/dev/null || true)" + if is_process_running "${pid}"; then + # port is from script args by default + echo " - [ok] pid=${pid} port=${MCP_PORT} log=logs/mcp_server_${MCP_PORT}.log" + else + echo " - [down] pid=${pid} port=${MCP_PORT} log=logs/mcp_server_${MCP_PORT}.log" + fi + else + echo " - (disabled/not started)" + fi + + # powerrag + echo "powerrag_server:" + local pr_pf="${PID_DIR}/powerrag_server.pid" + if [[ -f "${pr_pf}" ]]; then + pid="$(cat "${pr_pf}" 2>/dev/null || true)" + if is_process_running "${pid}"; then + echo " - [ok] pid=${pid} port=${POWERRAG_PORT} log=logs/powerrag_server.log" + else + echo " - [down] pid=${pid} port=${POWERRAG_PORT} log=logs/powerrag_server.log" + fi + else + echo " - (disabled/not started)" + fi +} + +# ----------------------------------------------------------------------------- +# Arg parsing (entrypoint-like) +# ----------------------------------------------------------------------------- +ACTION="${1:-start}" +shift || true + +case "${ACTION}" in + start|stop|restart|status|force-stop|clear|help) ;; + *) + # allow calling with only options: ./deploy.sh --disable-taskexecutor ... + if [[ "${ACTION}" == --* ]]; then + set -- "${ACTION}" "$@" + ACTION="start" + else + echo "Unknown action: ${ACTION}" >&2 + usage + exit 1 + fi + ;; +esac + +# If any --enable-* option is provided, switch to "enable-only" mode: +# only explicitly enabled components will be started. +HAS_ENABLE_FLAGS=0 +for arg in "$@"; do + if [[ "${arg}" == --enable-* ]]; then + HAS_ENABLE_FLAGS=1 + break + fi +done +if [[ "${HAS_ENABLE_FLAGS}" -eq 1 ]]; then + ENABLE_WEBSERVER=0 + ENABLE_TASKEXECUTOR=0 + ENABLE_DATASYNC=0 + ENABLE_MCP_SERVER=0 + ENABLE_ADMIN_SERVER=0 + ENABLE_POWERRAG_SERVER=0 +fi + +for arg in "$@"; do + case "${arg}" in + --enable-webserver) ENABLE_WEBSERVER=1 ;; + --disable-webserver) ENABLE_WEBSERVER=0 ;; + --enable-taskexecutor) ENABLE_TASKEXECUTOR=1 ;; + --disable-taskexecutor) ENABLE_TASKEXECUTOR=0 ;; + --disable-datasync) ENABLE_DATASYNC=0 ;; + --enable-datasync) ENABLE_DATASYNC=1 ;; + --enable-mcpserver) ENABLE_MCP_SERVER=1 ;; + --enable-adminserver) ENABLE_ADMIN_SERVER=1 ;; + --enable-powerragserver) ENABLE_POWERRAG_SERVER=1 ;; + --consumer-no-beg=*) CONSUMER_NO_BEG="${arg#*=}" ;; + --consumer-no-end=*) CONSUMER_NO_END="${arg#*=}" ;; + --workers=*) WORKERS="${arg#*=}" ;; + --host-id=*) HOST_ID="${arg#*=}" ;; + --svr-count=*) SVR_COUNT="${arg#*=}" ;; + --svr-http-port=*) SVR_HTTP_PORT="${arg#*=}" ;; + --svr-extra-base-http-port=*) SVR_EXTRA_BASE_HTTP_PORT="${arg#*=}" ;; + --admin-svr-http-port=*) ADMIN_SVR_HTTP_PORT="${arg#*=}" ;; + + --service-conf=*) GLOBAL_SERVICE_CONF="${arg#*=}" ;; + --mcp-host=*) MCP_HOST="${arg#*=}" ;; + --mcp-port=*) MCP_PORT="${arg#*=}" ;; + --mcp-base-url=*) MCP_BASE_URL="${arg#*=}" ;; + --mcp-mode=*) MCP_MODE="${arg#*=}" ;; + --mcp-host-api-key=*) MCP_HOST_API_KEY="${arg#*=}" ;; + --no-transport-sse-enabled) MCP_TRANSPORT_SSE_FLAG="--no-transport-sse-enabled" ;; + --no-transport-streamable-http-enabled) MCP_TRANSPORT_STREAMABLE_HTTP_FLAG="--no-transport-streamable-http-enabled" ;; + --no-json-response) MCP_JSON_RESPONSE_FLAG="--no-json-response" ;; + --powerrag-port=*) POWERRAG_PORT="${arg#*=}" ;; + *) echo "Unknown option: ${arg}" >&2; usage; exit 1 ;; + esac +done + +# Validate ports early (best-effort) +for p in "${SVR_HTTP_PORT}" "${SVR_EXTRA_BASE_HTTP_PORT}" "${ADMIN_SVR_HTTP_PORT}" "${MCP_PORT}" "${POWERRAG_PORT}" "${WEB_PORT}"; do + if ! _validate_port "${p}"; then + echo "ERROR: invalid port: ${p}" >&2 + exit 1 + fi +done + +# Check for port conflicts +if ! _check_port_conflicts; then + exit 1 +fi + +# ----------------------------------------------------------------------------- +# Main +# ----------------------------------------------------------------------------- +case "${ACTION}" in + help) + usage + ;; + + start) + if [[ "${ENABLE_WEBSERVER}" -eq 1 ]]; then + echo "[component] webserver enabled (ragflow_server + nginx frontend/proxy)" + start_ragflow_servers + # start nginx frontend/proxy when web/ exists (build dist if needed) + if [[ -d "${WEB_DIR}" ]]; then + start_web + else + echo "[warn] ${WEB_DIR} not found; skip nginx frontend/proxy" + fi + else + echo "[component] webserver disabled" + fi + + if [[ "${ENABLE_DATASYNC}" -eq 1 ]]; then + echo "[component] datasync enabled" + start_datasync + else + echo "[component] datasync disabled" + fi + + if [[ "${ENABLE_ADMIN_SERVER}" -eq 1 ]]; then + echo "[component] admin_server enabled" + start_admin_server + fi + + if [[ "${ENABLE_MCP_SERVER}" -eq 1 ]]; then + echo "[component] mcp_server enabled" + start_mcp_server + fi + + if [[ "${ENABLE_POWERRAG_SERVER}" -eq 1 ]]; then + echo "[component] powerrag_server enabled" + start_powerrag_server + fi + + if [[ "${ENABLE_TASKEXECUTOR}" -eq 1 ]]; then + echo "[component] taskexecutor enabled" + start_task_executors + else + echo "[component] taskexecutor disabled" + fi + ;; + + stop) + stop_all + ;; + + restart) + stop_all + sleep 1 + "${SCRIPT_DIR}/deploy.sh" start "$@" + ;; + + force-stop) + force_stop_all + ;; + + clear) + clear_runtime_files + ;; + + status) + status + ;; +esac From 1070dabcac057beaeca67098c3a7bba45914ad86 Mon Sep 17 00:00:00 2001 From: "keyang.lk" Date: Fri, 9 Jan 2026 19:40:25 +0800 Subject: [PATCH 2/3] fix: fix several bugs in deploy.sh and entrypoint.sh --- .gitignore | 6 + docker/entrypoint.sh | 9 +- scripts/deploy.sh | 498 +++++++++++++++++++++++++++++++++++++++---- scripts/tools.sh | 243 +++++++++++++++++++++ 4 files changed, 707 insertions(+), 49 deletions(-) create mode 100755 scripts/tools.sh diff --git a/.gitignore b/.gitignore index b720bc966..80a9f7512 100644 --- a/.gitignore +++ b/.gitignore @@ -213,4 +213,10 @@ nginx_conf/ logs/ pods/ upload_wiki_json.pid +<<<<<<< HEAD >>>>>>> d641c4e6 (feat: add deploy.sh for startup from source code; support start multi ragflow_server in dcoker and deploy.sh) +======= +.ragflow_secret_key +setup_tools_venv.sh +build_tools_bundle.sh +>>>>>>> fc88e390 (feat: optimize upload_document using batch; fix sevral bugs in deploy.sh and entrypoint.sh) diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index d251490cb..e43af6f77 100755 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -283,13 +283,15 @@ function _prepare_multi_ragflow_confs() { # Create per-instance configs local idx port conf_name conf_path for (( idx=0; idx<${SVR_COUNT}; idx++ )); do - conf_name="service_conf_ragflow_${idx}.yaml" - conf_path="${CONF_DIR}/${conf_name}" + if [[ "${idx}" -eq 0 ]]; then port="${SVR_HTTP_PORT}" + conf_name="local.service_conf.yaml" else port=$((SVR_EXTRA_BASE_HTTP_PORT + idx - 1)) + conf_name="service_conf_ragflow_${port}.yaml" fi + conf_path="${CONF_DIR}/${conf_name}" render_service_conf "${conf_path}" "${port}" "${ADMIN_SVR_HTTP_PORT}" done } @@ -319,11 +321,12 @@ function start_ragflow_servers() { local idx port conf_name for (( idx=0; idx<${SVR_COUNT}; idx++ )); do - conf_name="service_conf_ragflow_${idx}.yaml" if [[ "${idx}" -eq 0 ]]; then port="${SVR_HTTP_PORT}" + conf_name="local.service_conf.yaml" else port=$((SVR_EXTRA_BASE_HTTP_PORT + idx - 1)) + conf_name="service_conf_ragflow_${port}.yaml" fi echo "server 127.0.0.1:${port};" >> /etc/nginx/conf.d/ragflow_upstream.conf _start_ragflow_instance "${idx}" "${port}" "${conf_name}" diff --git a/scripts/deploy.sh b/scripts/deploy.sh index 84cc6ac2f..883e6b64b 100755 --- a/scripts/deploy.sh +++ b/scripts/deploy.sh @@ -179,6 +179,61 @@ function is_process_running() { [[ -n "${pid}" ]] && ps -p "${pid}" >/dev/null 2>&1 } +function _pids_listening_on_port() { + local port="$1" + local pids="" + if command -v lsof >/dev/null 2>&1; then + # -t: pids only; LISTEN only + pids="$(lsof -tiTCP:"${port}" -sTCP:LISTEN 2>/dev/null | tr '\n' ' ' | xargs echo 2>/dev/null || true)" + elif command -v fuser >/dev/null 2>&1; then + # fuser output format varies; best-effort + pids="$(fuser -n tcp "${port}" 2>/dev/null | tr '\n' ' ' | xargs echo 2>/dev/null || true)" + fi + echo "${pids}" +} + +function _pid_cwd_is_workspace() { + local pid="$1" + local cwd="" + if [[ -r "/proc/${pid}/cwd" ]]; then + cwd="$(readlink -f "/proc/${pid}/cwd" 2>/dev/null || true)" + fi + [[ -n "${cwd}" ]] && [[ "${cwd}" == "${WORKSPACE_FOLDER}"* ]] +} + +function _kill_port_if_matches_cmd() { + local port="$1" + local must_contain="$2" # substring to match in cmdline + local name="${3:-}" + + local pids + pids="$(_pids_listening_on_port "${port}")" + [[ -n "${pids}" ]] || return 0 + + local pid args + for pid in ${pids}; do + args="$(ps -p "${pid}" -o args= 2>/dev/null || true)" + if [[ -z "${args}" ]]; then + continue + fi + # Kill only when we're confident it's our workspace process. + # Some environments may already have other ragflow_server processes running as root. + local match=0 + if [[ "${args}" == *"${must_contain}"* ]]; then + match=1 + elif _pid_cwd_is_workspace "${pid}" && [[ "${args}" == *"api/ragflow_server.py"* ]]; then + match=1 + fi + [[ "${match}" -eq 1 ]] || continue + echo "[stop] ${name:-port ${port}}: killing listener pid=${pid} (matched: ${must_contain})" + kill "${pid}" 2>/dev/null || true + sleep 0.3 + if is_process_running "${pid}"; then + kill -9 "${pid}" 2>/dev/null || true + fi + done +} + function _default_host_id() { local hn hn="$(hostname)" @@ -244,6 +299,24 @@ function _common_env_kv() { echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-/usr/lib/x86_64-linux-gnu/:/usr/lib64/}" echo "TIKTOKEN_CACHE_DIR=${WORKSPACE_FOLDER}" echo "LIGHTEN=${LIGHTEN:-1}" + + # Enable HTTP access logs from Flask/Werkzeug by default. + # - If LOG_LEVELS is empty: set root=INFO,werkzeug=INFO + # - If LOG_LEVELS exists but has no werkzeug: append werkzeug=INFO + # - If LOG_LEVELS exists but has no root: prepend root=INFO + local _log_levels="${LOG_LEVELS:-}" + if [[ -z "${_log_levels}" ]]; then + _log_levels="root=INFO,werkzeug=INFO" + else + if [[ "${_log_levels}" != *"werkzeug="* ]]; then + _log_levels="${_log_levels},werkzeug=INFO" + fi + if [[ "${_log_levels}" != *"root="* ]]; then + _log_levels="root=INFO,${_log_levels}" + fi + fi + echo "LOG_LEVELS=${_log_levels}" + echo "http_proxy=" echo "https_proxy=" echo "no_proxy=" @@ -294,6 +367,8 @@ function _start_process() { echo "[ok] started ${name} (PID: ${bg_pid})" } + + function _stop_by_pidfile() { local name="$1" local pid_file="$2" @@ -303,6 +378,12 @@ function _stop_by_pidfile() { fi local pid pid="$(cat "${pid_file}" 2>/dev/null || true)" + local port="" + # Extract port from pidfile name for ragflow_server (e.g., ragflow_server_9390.pid -> 9390) + if [[ "${name}" == "ragflow_server" ]] && [[ "${pid_file}" =~ ragflow_server_([0-9]+)\.pid ]]; then + port="${BASH_REMATCH[1]}" + fi + if is_process_running "${pid}"; then echo "[stop] ${name} (PID: ${pid})" kill "${pid}" 2>/dev/null || true @@ -313,6 +394,31 @@ function _stop_by_pidfile() { else echo "[skip] ${name} not running (stale pid: ${pid})" fi + + # For ragflow_server, check if port is still listening (child process may have outlived parent) + if [[ -n "${port}" ]] && _port_is_listening "${port}"; then + local listening_pids + listening_pids="$(_pids_listening_on_port "${port}")" + if [[ -n "${listening_pids}" ]]; then + local child_pid + for child_pid in ${listening_pids}; do + # Only kill processes from our workspace + if _pid_cwd_is_workspace "${child_pid}"; then + local args + args="$(ps -p "${child_pid}" -o args= 2>/dev/null || true)" + if [[ "${args}" == *"api/ragflow_server.py"* ]]; then + echo "[stop] ${name} (child PID: ${child_pid} on port ${port})" + kill "${child_pid}" 2>/dev/null || true + sleep 0.5 + if is_process_running "${child_pid}"; then + kill -9 "${child_pid}" 2>/dev/null || true + fi + fi + fi + done + fi + fi + rm -f "${pid_file}" } @@ -321,6 +427,123 @@ function _validate_port() { [[ "${port}" =~ ^[0-9]+$ ]] && [[ "${port}" -ge 1 ]] && [[ "${port}" -le 65535 ]] } +function _port_is_listening() { + local port="$1" + # ss without -p doesn't require extra privileges + ss -ltn "( sport = :${port} )" 2>/dev/null | grep -q ":${port} " +} + +function _pid_from_pidfile() { + local pid_file="$1" + [[ -f "${pid_file}" ]] || return 1 + cat "${pid_file}" 2>/dev/null | tr -d '[:space:]' +} + +function _pidfile_is_running() { + local pid_file="$1" + local pid + pid="$(_pid_from_pidfile "${pid_file}")" + [[ -n "${pid}" ]] && is_process_running "${pid}" +} + +function _preflight_port_or_running() { + # If pidfile indicates the component is already running, treat as OK (will be skipped by start_*). + # Otherwise, the port must be free; we do NOT stop/kill anything in start. + local name="$1" + local pid_file="$2" + local port="$3" + local hint="$4" + + if _pidfile_is_running "${pid_file}"; then + return 0 + fi + + if _port_is_listening "${port}"; then + echo "ERROR: ${name} port ${port} is already in use. start will not stop existing processes." >&2 + echo "Hint: inspect listener: ss -ltnp '( sport = :${port} )' (or lsof -nP -iTCP:${port} -sTCP:LISTEN)" >&2 + [[ -n "${hint}" ]] && echo "Hint: ${hint}" >&2 + return 1 + fi + return 0 +} + +function _preflight_start_all() { + # Goal: if anything would fail to start due to port conflicts, fail BEFORE starting any new process. + local fail=0 + + # ragflow_server instances + local idx port pid_file + for (( idx=0; idx<${SVR_COUNT}; idx++ )); do + if [[ "${idx}" -eq 0 ]]; then + port="${SVR_HTTP_PORT}" + else + port=$((SVR_EXTRA_BASE_HTTP_PORT + idx - 1)) + fi + pid_file="${PID_DIR}/ragflow_server_${port}.pid" + if ! _preflight_port_or_running "ragflow_server" "${pid_file}" "${port}" "pick another port: --svr-http-port / --svr-extra-base-http-port (or stop/restart first)"; then + fail=1 + fi + done + + # nginx web frontend/proxy (if enabled) + if [[ "${ENABLE_WEBSERVER}" -eq 1 ]]; then + if ! _preflight_port_or_running "nginx(web)" "${PID_DIR}/web_frontend.pid" "${WEB_PORT}" "pick another port: --web-port= (or stop/restart first)"; then + fail=1 + fi + fi + + # admin_server (if enabled) + if [[ "${ENABLE_ADMIN_SERVER}" -eq 1 ]]; then + if ! _preflight_port_or_running "admin_server" "${PID_DIR}/admin_server.pid" "${ADMIN_SVR_HTTP_PORT}" "pick another port: --admin-svr-http-port= (or stop/restart first)"; then + fail=1 + fi + fi + + # mcp_server (if enabled) + if [[ "${ENABLE_MCP_SERVER}" -eq 1 ]]; then + if ! _preflight_port_or_running "mcp_server" "${PID_DIR}/mcp_server.pid" "${MCP_PORT}" "pick another port: --mcp-port= (or disable mcp_server)"; then + fail=1 + fi + fi + + # powerrag_server (if enabled) + if [[ "${ENABLE_POWERRAG_SERVER}" -eq 1 ]]; then + if ! _preflight_port_or_running "powerrag_server" "${PID_DIR}/powerrag_server.pid" "${POWERRAG_PORT}" "pick another port: --powerrag-port= (or disable powerrag_server)"; then + fail=1 + fi + fi + + [[ "${fail}" -eq 0 ]] +} + +function _check_ports_available() { + # Fail-fast if any target port is already in use by another service. + # We consider it "available" only if nothing is listening. + local -a ports=("$@") + local port + for port in "${ports[@]}"; do + if ! _port_is_listening "${port}"; then + continue + fi + + echo "ERROR: port ${port} is already in use by another service." >&2 + echo "Hint: check with: ss -ltnp '( sport = :${port} )' (or run as root to see process)" >&2 + if [[ "${port}" -eq "${ADMIN_SVR_HTTP_PORT}" ]]; then + echo "Hint: ${port} is the admin_server default port (ADMIN_SVR_HTTP_PORT). Use: --admin-svr-http-port=" >&2 + elif [[ "${port}" -eq "${WEB_PORT}" ]]; then + echo "Hint: ${port} is the nginx web port (WEB_PORT). Use: --web-port=" >&2 + elif [[ "${port}" -eq "${MCP_PORT}" ]]; then + echo "Hint: ${port} is the mcp_server port (MCP_PORT). Use: --mcp-port= or disable mcp_server" >&2 + elif [[ "${port}" -eq "${POWERRAG_PORT}" ]]; then + echo "Hint: ${port} is the powerrag_server port (POWERRAG_PORT). Use: --powerrag-port= or disable powerrag_server" >&2 + else + echo "Hint: if you intend to run multiple ragflow instances, use different ports: --svr-http-port / --svr-extra-base-http-port (and also consider --admin-svr-http-port)" >&2 + fi + return 1 + done + return 0 +} + function _check_port_conflicts() { local -a reserved_ports=() local -a ragflow_ports=() @@ -418,7 +641,13 @@ PY function _prepare_multi_ragflow_confs() { local idx port conf_name conf_path for (( idx=0; idx<${SVR_COUNT}; idx++ )); do - conf_name="service_conf_ragflow_${idx}.yaml" + # Align with docker/entrypoint.sh: main instance uses base service conf directly; + # extra instances use generated per-instance confs. + if [[ "${idx}" -eq 0 ]]; then + conf_name="${GLOBAL_SERVICE_CONF}" + else + conf_name="service_conf_ragflow_${idx}.yaml" + fi conf_path="${CONF_DIR}/${conf_name}" if [[ "${idx}" -eq 0 ]]; then port="${SVR_HTTP_PORT}" @@ -439,7 +668,11 @@ function start_ragflow_servers() { local idx port conf_name pid_file for (( idx=0; idx<${SVR_COUNT}; idx++ )); do - conf_name="service_conf_ragflow_${idx}.yaml" + if [[ "${idx}" -eq 0 ]]; then + conf_name="${GLOBAL_SERVICE_CONF}" + else + conf_name="service_conf_ragflow_${idx}.yaml" + fi if [[ "${idx}" -eq 0 ]]; then port="${SVR_HTTP_PORT}" else @@ -595,14 +828,53 @@ function start_web() { fi fi - local server_port_for_web="${SVR_HTTP_PORT}" local admin_port_for_web="${ADMIN_SVR_HTTP_PORT}" # nginx temp dirs (must be writable for non-root runs) local nginx_tmp_dir="${NGINX_CONF_DIR}/tmp" mkdir -p "${nginx_tmp_dir}/client_body" "${nginx_tmp_dir}/proxy" "${nginx_tmp_dir}/fastcgi" "${nginx_tmp_dir}/uwsgi" "${nginx_tmp_dir}/scgi" + # Align with docker/entrypoint.sh nginx logic: + # - generate upstream include files so nginx can proxy/load-balance to all instances + # - generate proxy.conf snippet for consistent proxy headers/settings + : > "${NGINX_CONF_DIR}/ragflow_upstream.conf" + : > "${NGINX_CONF_DIR}/admin_upstream.conf" + echo "server ${ADMIN_HOST_FOR_WEB}:${admin_port_for_web};" >> "${NGINX_CONF_DIR}/admin_upstream.conf" + + local idx port + for (( idx=0; idx<${SVR_COUNT}; idx++ )); do + if [[ "${idx}" -eq 0 ]]; then + port="${SVR_HTTP_PORT}" + else + port=$((SVR_EXTRA_BASE_HTTP_PORT + idx - 1)) + fi + echo "server ${SERVER_HOST_FOR_WEB}:${port};" >> "${NGINX_CONF_DIR}/ragflow_upstream.conf" + done + + cat > "${NGINX_CONF_DIR}/proxy.conf" <<'EOF' +proxy_set_header Host $host; +proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; +proxy_set_header X-Forwarded-Proto $scheme; +proxy_http_version 1.1; +proxy_set_header Connection ""; +proxy_buffering off; +proxy_read_timeout 3600s; +proxy_send_timeout 3600s; +proxy_buffer_size 1024k; +proxy_buffers 16 1024k; +proxy_busy_buffers_size 2048k; +proxy_temp_file_write_size 2048k; +EOF + cat > "${NGINX_CONF_DIR}/ragflow.conf" </dev/null || true + # If pidfiles were stale, best-effort kill listeners by port (only if cmd matches our scripts). + local idx port + for (( idx=0; idx<${SVR_COUNT}; idx++ )); do + if [[ "${idx}" -eq 0 ]]; then + port="${SVR_HTTP_PORT}" + else + port=$((SVR_EXTRA_BASE_HTTP_PORT + idx - 1)) + fi + _kill_port_if_matches_cmd "${port}" "${RAGFLOW_SERVER_PY}" "ragflow_server:${port}" + done + _kill_port_if_matches_cmd "${ADMIN_SVR_HTTP_PORT}" "${ADMIN_SERVER_PY}" "admin_server" + # optional web frontend nginx stop_web || true } @@ -762,17 +1033,21 @@ function clear_runtime_files() { echo "=== clear: stop services and remove generated logs/configs/pids (best-effort) ===" # stop services started by this script (based on pids/) + # Only processes with pidfiles in PID_DIR are managed by this deploy.sh instance. + # Other ragflow_server processes on the same machine may be managed by other deploy.sh instances. stop_all || true # generated per-instance service confs rm -f "${CONF_DIR}"/service_conf_ragflow_*.yaml 2>/dev/null || true + # generated secret key file (align with docker/entrypoint.sh) + rm -f "${CONF_DIR}/.ragflow_secret_key" 2>/dev/null || true # remove runtime dirs entirely (user expectation for clear) rm -rf "${NGINX_CONF_DIR}" 2>/dev/null || true rm -rf "${PID_DIR}" 2>/dev/null || true rm -rf "${LOG_DIR}" 2>/dev/null || true - echo "[ok] cleared: logs/, pids/, nginx_conf/, conf/service_conf_ragflow_*.yaml" + echo "[ok] cleared: logs/, pids/, nginx_conf/, conf/service_conf_ragflow_*.yaml, conf/.ragflow_secret_key" } function status() { @@ -780,42 +1055,164 @@ function status() { echo "config:" echo " - service_conf(base) = conf/${GLOBAL_SERVICE_CONF}" - echo " - ragflow main port = ${SVR_HTTP_PORT}" + # Best-effort show ports from the base service conf (more accurate than defaults when status is run without flags). + local base_ragflow_port="${SVR_HTTP_PORT}" + local base_admin_port="${ADMIN_SVR_HTTP_PORT}" + if [[ -f "${CONF_DIR}/${GLOBAL_SERVICE_CONF}" ]] && [[ -x "${PYTHON}" ]]; then + local _ports + _ports="$("${PYTHON}" - </dev/null || true +import os +from ruamel.yaml import YAML +conf = os.path.join(${CONF_DIR@Q}, ${GLOBAL_SERVICE_CONF@Q}) +yaml = YAML(typ="safe") +with open(conf, "r", encoding="utf-8") as f: + data = yaml.load(f) or {} +rag = (data.get("ragflow") or {}).get("http_port") +adm = (data.get("admin") or {}).get("http_port") +print(f"{rag if rag is not None else ''}\\t{adm if adm is not None else ''}") +PY +)" + if [[ -n "${_ports}" ]]; then + base_ragflow_port="$(echo "${_ports}" | awk -F'\t' '{print $1}')" + base_admin_port="$(echo "${_ports}" | awk -F'\t' '{print $2}')" + [[ -n "${base_ragflow_port}" ]] || base_ragflow_port="${SVR_HTTP_PORT}" + [[ -n "${base_admin_port}" ]] || base_admin_port="${ADMIN_SVR_HTTP_PORT}" + fi + fi + + echo " - ragflow main port = ${base_ragflow_port}" echo " - ragflow extra base = ${SVR_EXTRA_BASE_HTTP_PORT}" - echo " - admin port = ${ADMIN_SVR_HTTP_PORT}" + echo " - admin port = ${base_admin_port}" echo " - mcp port = ${MCP_PORT}" echo " - web port = ${WEB_PORT}" + # Build a port -> conf filename map from existing conf files (robust even when status is run with different flags). + declare -A _ragflow_port_to_conf=() + if [[ -x "${PYTHON}" ]]; then + while IFS=$'\t' read -r _p _c; do + [[ -n "${_p}" && -n "${_c}" ]] || continue + _ragflow_port_to_conf["${_p}"]="${_c}" + done < <("${PYTHON}" - </dev/null || true +import glob, os +from ruamel.yaml import YAML + +conf_dir = ${CONF_DIR@Q} +base = os.path.join(conf_dir, ${GLOBAL_SERVICE_CONF@Q}) +files = [] +if os.path.isfile(base): + files.append(base) +files.extend(sorted(glob.glob(os.path.join(conf_dir, "service_conf_ragflow_*.yaml")))) + +yaml = YAML(typ="safe") +for f in files: + try: + with open(f, "r", encoding="utf-8") as fh: + data = yaml.load(fh) or {} + port = (data.get("ragflow") or {}).get("http_port") + if port is None: + continue + print(f"{int(port)}\t{os.path.basename(f)}") + except Exception: + continue +PY +) + fi + # ragflow echo "ragflow_server:" - local any=0 + local found=0 local f pid port idx conf_name conf_path log_path for f in "${PID_DIR}"/ragflow_server_*.pid; do [[ -f "${f}" ]] || continue + found=1 port="$(basename "${f}" | sed 's/ragflow_server_\(.*\)\.pid/\1/')" pid="$(cat "${f}" 2>/dev/null || true)" # best-effort infer conf name from port - conf_name="(unknown)" - if [[ "${port}" == "${SVR_HTTP_PORT}" ]]; then - conf_name="service_conf_ragflow_0.yaml" - elif [[ "${port}" =~ ^[0-9]+$ ]]; then - idx=$(( port - SVR_EXTRA_BASE_HTTP_PORT + 1 )) - if [[ "${idx}" -ge 1 ]]; then - conf_name="service_conf_ragflow_${idx}.yaml" + conf_name="${_ragflow_port_to_conf[${port}]:-(unknown)}" + # Backward compatible fallback when conf map isn't available + if [[ "${conf_name}" == "(unknown)" ]]; then + if [[ "${port}" == "${SVR_HTTP_PORT}" ]]; then + conf_name="${GLOBAL_SERVICE_CONF}" + elif [[ "${port}" =~ ^[0-9]+$ ]]; then + idx=$(( port - SVR_EXTRA_BASE_HTTP_PORT + 1 )) + if [[ "${idx}" -ge 1 ]]; then + conf_name="service_conf_ragflow_${idx}.yaml" + fi fi fi conf_path="conf/${conf_name}" log_path="logs/ragflow_server_${port}.log" + # Try to get actual listening port from process's service conf file + actual_port="${port}" + if is_process_running "${pid}" && [[ -f "${CONF_DIR}/${conf_name}" ]] && [[ -x "${PYTHON}" ]]; then + actual_port="$("${PYTHON}" - </dev/null || echo "${port}" +import os +from ruamel.yaml import YAML +conf = os.path.join(${CONF_DIR@Q}, ${conf_name@Q}) +yaml = YAML(typ="safe") +try: + with open(conf, "r", encoding="utf-8") as f: + data = yaml.load(f) or {} + p = (data.get("ragflow") or {}).get("http_port") + if p is not None: + print(int(p)) +except Exception: + pass +PY +)" + [[ -n "${actual_port}" ]] || actual_port="${port}" + fi + + # Get actual process listening on the port (may be different from pidfile PID if it's a child process) + local actual_pid="${pid}" + local listening_pids + listening_pids="$(_pids_listening_on_port "${actual_port}")" + if [[ -n "${listening_pids}" ]]; then + # Prefer the PID that matches our workspace and is a ragflow_server process + local candidate_pid + for candidate_pid in ${listening_pids}; do + if _pid_cwd_is_workspace "${candidate_pid}"; then + local args + args="$(ps -p "${candidate_pid}" -o args= 2>/dev/null || true)" + if [[ "${args}" == *"api/ragflow_server.py"* ]]; then + actual_pid="${candidate_pid}" + break + fi + fi + done + # If no match found, use first listening PID + if [[ "${actual_pid}" == "${pid}" ]] && [[ -n "${listening_pids}" ]]; then + actual_pid="$(echo "${listening_pids}" | awk '{print $1}')" + fi + fi + + # Check if pidfile process or actual listening process is running + local pidfile_running=0 + local listening_running=0 if is_process_running "${pid}"; then - any=1 - echo " - [ok] port=${port} pid=${pid} conf=${conf_path} log=${log_path}" + pidfile_running=1 + fi + if [[ "${actual_pid}" != "${pid}" ]] && is_process_running "${actual_pid}"; then + listening_running=1 + fi + + if [[ "${pidfile_running}" -eq 1 ]] || [[ "${listening_running}" -eq 1 ]]; then + local port_info="${actual_port}" + if [[ "${actual_port}" != "${port}" ]]; then + port_info="${actual_port} (pidfile=${port})" + fi + local pid_info="${actual_pid}" + if [[ "${actual_pid}" != "${pid}" ]]; then + pid_info="${actual_pid} (pidfile=${pid})" + fi + echo " - [ok] port=${port_info} pid=${pid_info} conf=${conf_path} log=${log_path}" else echo " - [down] port=${port} pid=${pid} conf=${conf_path} log=${log_path}" fi done - if [[ "${any}" -eq 0 ]]; then + if [[ "${found}" -eq 0 ]]; then echo " - (none)" fi @@ -835,13 +1232,13 @@ function status() { # task executors echo "task_executor:" - any=0 + found=0 local args consumer_arg logf for f in "${PID_DIR}"/task_executor_*.pid; do [[ -f "${f}" ]] || continue + found=1 pid="$(cat "${f}" 2>/dev/null || true)" if is_process_running "${pid}"; then - any=1 args="$(ps -p "${pid}" -o args= 2>/dev/null || true)" consumer_arg="$(echo "${args}" | awk '{print $NF}')" # If no consumer arg provided, fallback to pid-file id. @@ -854,7 +1251,7 @@ function status() { echo " - [down] id=$(basename "${f}") pid=${pid}" fi done - if [[ "${any}" -eq 0 ]]; then + if [[ "${found}" -eq 0 ]]; then echo " - (none)" fi @@ -995,17 +1392,26 @@ for arg in "$@"; do esac done -# Validate ports early (best-effort) -for p in "${SVR_HTTP_PORT}" "${SVR_EXTRA_BASE_HTTP_PORT}" "${ADMIN_SVR_HTTP_PORT}" "${MCP_PORT}" "${POWERRAG_PORT}" "${WEB_PORT}"; do - if ! _validate_port "${p}"; then - echo "ERROR: invalid port: ${p}" >&2 +# Port validations / conflict checks / occupancy preflight should only block `start`. +# Other actions (stop/status/clear/help) must not fail just because some default ports are occupied by unrelated services. +if [[ "${ACTION}" == "start" ]]; then + # Validate ports early (best-effort) + for p in "${SVR_HTTP_PORT}" "${SVR_EXTRA_BASE_HTTP_PORT}" "${ADMIN_SVR_HTTP_PORT}" "${MCP_PORT}" "${POWERRAG_PORT}" "${WEB_PORT}"; do + if ! _validate_port "${p}"; then + echo "ERROR: invalid port: ${p}" >&2 + exit 1 + fi + done + + # Check for duplicates/conflicts within our configured ports + if ! _check_port_conflicts; then exit 1 fi -done -# Check for port conflicts -if ! _check_port_conflicts; then - exit 1 + # Preflight all components (atomic start): if anything would fail, don't start anything new. + if ! _preflight_start_all; then + exit 1 + fi fi # ----------------------------------------------------------------------------- diff --git a/scripts/tools.sh b/scripts/tools.sh new file mode 100755 index 000000000..e4bdb231c --- /dev/null +++ b/scripts/tools.sh @@ -0,0 +1,243 @@ +#!/bin/bash + +# RAGFlow 工具脚本(数据上传和处理相关) +# 使用方法: ./scripts/tools.sh [upload-wiki|stop-upload-wiki|reparse-failed|stop-reparse-failed] + +# 获取脚本所在目录的父目录(项目根目录) +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +WORKSPACE_FOLDER="$(cd "${SCRIPT_DIR}/.." && pwd)" + +PYTHON="${WORKSPACE_FOLDER}/.venv/bin/python" +UPLOAD_WIKI_JSON="${WORKSPACE_FOLDER}/scripts/upload_wiki_json.py" +REPARSE_FAILED_DOCS="${WORKSPACE_FOLDER}/scripts/reparse_failed_documents.py" + +# 日志目录 +LOG_DIR="${WORKSPACE_FOLDER}/logs" +mkdir -p "${LOG_DIR}" + +# PID 文件目录 +PID_DIR="${WORKSPACE_FOLDER}/pids" +mkdir -p "${PID_DIR}" + +# Ensure runtime exists +if [ ! -x "${PYTHON}" ]; then + echo "[tools][ERROR] Python venv not found: ${PYTHON}" >&2 + echo "[tools][ERROR] Please run: ${WORKSPACE_FOLDER}/scripts/setup_tools_venv.sh" >&2 + exit 1 +fi + +# 上传 Wiki JSON 数据 +upload_wiki_json() { + local pid_file="${PID_DIR}/upload_wiki_json.pid" + + # 检查是否已经在运行 + if [ -f "$pid_file" ]; then + PID=$(cat "$pid_file") + if ps -p $PID > /dev/null 2>&1; then + echo "Wiki JSON 上传任务已经在运行 (PID: $PID)" + return 1 + fi + fi + + echo "启动 Wiki JSON 上传任务..." + cd "${WORKSPACE_FOLDER}" + + # 默认参数(可通过环境变量覆盖) + local api_key="${API_KEY:-}" + local host="${HOST:-http://127.0.0.1:9380}" + local data_dir="${WIKI_DATA_DIR:-}" + local dataset_id="${DATASET_ID:-}" + local batch_size="${BATCH_SIZE:-1000}" + local snapshot_file="${WIKI_SNAPSHOT_FILE:-${LOG_DIR}/upload_snapshot.json}" + local enable_resume="${WIKI_ENABLE_RESUME:-true}" + + # 构建命令参数 + local resume_args="" + if [ "$enable_resume" = "true" ]; then + resume_args="--resume -s ${snapshot_file}" + fi + + nohup env \ + PYTHONPATH="${WORKSPACE_FOLDER}" \ + "${PYTHON}" "${UPLOAD_WIKI_JSON}" \ + -k "${api_key}" \ + -H "${host}" \ + -d "${data_dir}" \ + -i "${dataset_id}" \ + -b "${batch_size}" \ + ${resume_args} > "${LOG_DIR}/upload_wiki_json.log" 2>&1 & + + UPLOAD_PID=$! + echo $UPLOAD_PID > "$pid_file" + echo "Wiki JSON 上传任务已启动 (PID: $UPLOAD_PID)" + echo "日志文件: ${LOG_DIR}/upload_wiki_json.log" + if [ "$enable_resume" = "true" ]; then + echo "快照文件: ${snapshot_file}" + echo "任务支持断点续传" + fi +} + +# 停止 Wiki JSON 上传任务 +stop_upload_wiki_json() { + local pid_file="${PID_DIR}/upload_wiki_json.pid" + + if [ ! -f "$pid_file" ]; then + echo "Wiki JSON 上传任务未运行" + return 1 + fi + + PID=$(cat "$pid_file") + if ps -p $PID > /dev/null 2>&1; then + echo "停止 Wiki JSON 上传任务 (PID: $PID)..." + kill $PID + rm "$pid_file" + echo "Wiki JSON 上传任务已停止" + else + echo "Wiki JSON 上传任务未运行" + rm "$pid_file" + fi +} + +# 重新解析失败的文档 +reparse_failed_documents() { + local pid_file="${PID_DIR}/reparse_failed_docs.pid" + + # 检查是否已经在运行 + if [ -f "$pid_file" ]; then + PID=$(cat "$pid_file") + if ps -p $PID > /dev/null 2>&1; then + echo "重新解析失败文档任务已经在运行 (PID: $PID)" + return 1 + fi + fi + + echo "启动重新解析失败文档任务..." + cd "${WORKSPACE_FOLDER}" + + # 默认参数(可通过环境变量覆盖) + local api_key="${API_KEY:-}" + local host="${HOST:-http://127.0.0.1:9380}" + local dataset_id="${DATASET_ID:-}" + local batch_size="${BATCH_SIZE:-1000}" + + if [ -z "$dataset_id" ]; then + echo "错误: 必须设置 DATASET_ID 环境变量" + return 1 + fi + + nohup env \ + PYTHONPATH="${WORKSPACE_FOLDER}" \ + "${PYTHON}" "${REPARSE_FAILED_DOCS}" \ + -k "${api_key}" \ + -H "${host}" \ + -i "${dataset_id}" \ + -b "${batch_size}" > "${LOG_DIR}/reparse_failed_docs.log" 2>&1 & + + REPARSE_PID=$! + echo $REPARSE_PID > "$pid_file" + echo "重新解析失败文档任务已启动 (PID: $REPARSE_PID)" + echo "日志文件: ${LOG_DIR}/reparse_failed_docs.log" +} + +# 停止重新解析失败文档任务 +stop_reparse_failed_documents() { + local pid_file="${PID_DIR}/reparse_failed_docs.pid" + + if [ ! -f "$pid_file" ]; then + echo "重新解析失败文档任务未运行" + return 1 + fi + + PID=$(cat "$pid_file") + if ps -p $PID > /dev/null 2>&1; then + echo "停止重新解析失败文档任务 (PID: $PID)..." + kill $PID + rm "$pid_file" + echo "重新解析失败文档任务已停止" + else + echo "重新解析失败文档任务未运行" + rm "$pid_file" + fi +} + +# 查看工具任务状态 +status() { + echo "=== 工具任务状态 ===" + + # Wiki JSON Upload + local upload_pid_file="${PID_DIR}/upload_wiki_json.pid" + if [ -f "$upload_pid_file" ]; then + PID=$(cat "$upload_pid_file") + if ps -p $PID > /dev/null 2>&1; then + echo "Wiki JSON Upload: 运行中 (PID: $PID)" + else + echo "Wiki JSON Upload: 未运行" + fi + else + echo "Wiki JSON Upload: 未运行" + fi + + # Reparse Failed Documents + echo "" + local reparse_pid_file="${PID_DIR}/reparse_failed_docs.pid" + if [ -f "$reparse_pid_file" ]; then + PID=$(cat "$reparse_pid_file") + if ps -p $PID > /dev/null 2>&1; then + echo "Reparse Failed Documents: 运行中 (PID: $PID)" + else + echo "Reparse Failed Documents: 未运行" + fi + else + echo "Reparse Failed Documents: 未运行" + fi +} + +# 主函数 +case "$1" in + upload-wiki) + upload_wiki_json + ;; + stop-upload-wiki) + stop_upload_wiki_json + ;; + reparse-failed) + reparse_failed_documents + ;; + stop-reparse-failed) + stop_reparse_failed_documents + ;; + status) + status + ;; + *) + echo "使用方法: $0 {upload-wiki|stop-upload-wiki|reparse-failed|stop-reparse-failed|status}" + echo "" + echo "环境变量:" + echo "" + echo " Wiki 上传相关:" + echo " API_KEY - API Key" + echo " HOST - 服务器地址 (默认: http://127.0.0.1:9380)" + echo " WIKI_DATA_DIR - 数据目录" + echo " DATASET_ID - 数据集 ID (可选)" + echo " BATCH_SIZE - 批量大小 (默认: 1000)" + echo " WIKI_SNAPSHOT_FILE - 快照文件路径 (默认: \${LOG_DIR}/upload_snapshot.json)" + echo " WIKI_ENABLE_RESUME - 是否启用断点续传 (默认: true)" + echo "" + echo " 重新解析失败文档相关:" + echo " API_KEY - API Key" + echo " HOST - 服务器地址 (默认: http://127.0.0.1:9380)" + echo " DATASET_ID - 数据集 ID (必需)" + echo " BATCH_SIZE - 批量大小 (默认: 1000)" + echo "" + echo "示例:" + echo " $0 upload-wiki # 上传 Wiki JSON 数据(后台运行,支持断点续传)" + echo " $0 stop-upload-wiki # 停止 Wiki JSON 上传任务" + echo " BATCH_SIZE=2000 $0 upload-wiki # 使用自定义批量大小上传" + echo " WIKI_ENABLE_RESUME=false $0 upload-wiki # 禁用断点续传" + echo " DATASET_ID=xxx $0 reparse-failed # 重新解析指定数据集中失败的文档" + echo " $0 stop-reparse-failed # 停止重新解析失败文档任务" + echo " $0 status # 查看工具任务状态" + exit 1 + ;; +esac + From e0786c3ce2a0dcccc1d0db3b980727f848051bd2 Mon Sep 17 00:00:00 2001 From: "keyang.lk" Date: Fri, 9 Jan 2026 20:01:39 +0800 Subject: [PATCH 3/3] feat: remove scripts tools --- scripts/README.md | 36 +------ scripts/tools.sh | 243 ---------------------------------------------- 2 files changed, 1 insertion(+), 278 deletions(-) delete mode 100755 scripts/tools.sh diff --git a/scripts/README.md b/scripts/README.md index af33b5a5a..a941ded97 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -1,6 +1,6 @@ # RAGFlow 脚本使用指南 -本目录包含 RAGFlow 的运维部署脚本和工具脚本,用于管理服务部署和数据处理任务。 +本目录包含 RAGFlow 的运维部署脚本,用于管理服务部署。 ## 脚本说明 @@ -63,17 +63,6 @@ **兼容性:** - 不再提供单独的 `start-web/stop-web` 命令;前端 nginx(静态 + 反代 API)随 `webserver` 一起启动。 -### 2. `tools.sh` - 工具脚本 - -用于执行数据上传和处理相关的工具任务。 - -**支持的命令:** -- `upload-wiki` - 上传 Wiki JSON 数据(后台运行,支持断点续传) -- `stop-upload-wiki` - 停止 Wiki JSON 上传任务 -- `reparse-failed` - 重新解析指定数据集中失败的文档 -- `stop-reparse-failed` - 停止重新解析失败文档任务 -- `status` - 查看工具任务状态 - ## 快速开始 ### 运维部署(deploy.sh) @@ -141,29 +130,6 @@ ./scripts/deploy.sh start --enable-powerragserver --powerrag-port=6000 ``` -### 工具脚本(tools.sh) - -```bash -# 上传 Wiki JSON 数据 -./scripts/tools.sh upload-wiki - -# 使用自定义参数上传 -API_KEY=xxx HOST=xxx WIKI_DATA_DIR=xxx BATCH_SIZE=1000 ./scripts/tools.sh upload-wiki -WIKI_ENABLE_RESUME=false ./scripts/tools.sh upload-wiki - -# 停止上传任务 -./scripts/tools.sh stop-upload-wiki - -# 重新解析失败的文档(需要设置数据集 ID) -API_KEY=xxx HOST=xxx DATASET_ID=xxx BATCH_SIZE=1000 ./scripts/tools.sh reparse-failed - -# 停止重新解析任务 -./scripts/tools.sh stop-reparse-failed - -# 查看工具任务状态 -./scripts/tools.sh status -``` - ## 日志与 PID ### 服务日志(默认在 `logs/`) diff --git a/scripts/tools.sh b/scripts/tools.sh deleted file mode 100755 index e4bdb231c..000000000 --- a/scripts/tools.sh +++ /dev/null @@ -1,243 +0,0 @@ -#!/bin/bash - -# RAGFlow 工具脚本(数据上传和处理相关) -# 使用方法: ./scripts/tools.sh [upload-wiki|stop-upload-wiki|reparse-failed|stop-reparse-failed] - -# 获取脚本所在目录的父目录(项目根目录) -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -WORKSPACE_FOLDER="$(cd "${SCRIPT_DIR}/.." && pwd)" - -PYTHON="${WORKSPACE_FOLDER}/.venv/bin/python" -UPLOAD_WIKI_JSON="${WORKSPACE_FOLDER}/scripts/upload_wiki_json.py" -REPARSE_FAILED_DOCS="${WORKSPACE_FOLDER}/scripts/reparse_failed_documents.py" - -# 日志目录 -LOG_DIR="${WORKSPACE_FOLDER}/logs" -mkdir -p "${LOG_DIR}" - -# PID 文件目录 -PID_DIR="${WORKSPACE_FOLDER}/pids" -mkdir -p "${PID_DIR}" - -# Ensure runtime exists -if [ ! -x "${PYTHON}" ]; then - echo "[tools][ERROR] Python venv not found: ${PYTHON}" >&2 - echo "[tools][ERROR] Please run: ${WORKSPACE_FOLDER}/scripts/setup_tools_venv.sh" >&2 - exit 1 -fi - -# 上传 Wiki JSON 数据 -upload_wiki_json() { - local pid_file="${PID_DIR}/upload_wiki_json.pid" - - # 检查是否已经在运行 - if [ -f "$pid_file" ]; then - PID=$(cat "$pid_file") - if ps -p $PID > /dev/null 2>&1; then - echo "Wiki JSON 上传任务已经在运行 (PID: $PID)" - return 1 - fi - fi - - echo "启动 Wiki JSON 上传任务..." - cd "${WORKSPACE_FOLDER}" - - # 默认参数(可通过环境变量覆盖) - local api_key="${API_KEY:-}" - local host="${HOST:-http://127.0.0.1:9380}" - local data_dir="${WIKI_DATA_DIR:-}" - local dataset_id="${DATASET_ID:-}" - local batch_size="${BATCH_SIZE:-1000}" - local snapshot_file="${WIKI_SNAPSHOT_FILE:-${LOG_DIR}/upload_snapshot.json}" - local enable_resume="${WIKI_ENABLE_RESUME:-true}" - - # 构建命令参数 - local resume_args="" - if [ "$enable_resume" = "true" ]; then - resume_args="--resume -s ${snapshot_file}" - fi - - nohup env \ - PYTHONPATH="${WORKSPACE_FOLDER}" \ - "${PYTHON}" "${UPLOAD_WIKI_JSON}" \ - -k "${api_key}" \ - -H "${host}" \ - -d "${data_dir}" \ - -i "${dataset_id}" \ - -b "${batch_size}" \ - ${resume_args} > "${LOG_DIR}/upload_wiki_json.log" 2>&1 & - - UPLOAD_PID=$! - echo $UPLOAD_PID > "$pid_file" - echo "Wiki JSON 上传任务已启动 (PID: $UPLOAD_PID)" - echo "日志文件: ${LOG_DIR}/upload_wiki_json.log" - if [ "$enable_resume" = "true" ]; then - echo "快照文件: ${snapshot_file}" - echo "任务支持断点续传" - fi -} - -# 停止 Wiki JSON 上传任务 -stop_upload_wiki_json() { - local pid_file="${PID_DIR}/upload_wiki_json.pid" - - if [ ! -f "$pid_file" ]; then - echo "Wiki JSON 上传任务未运行" - return 1 - fi - - PID=$(cat "$pid_file") - if ps -p $PID > /dev/null 2>&1; then - echo "停止 Wiki JSON 上传任务 (PID: $PID)..." - kill $PID - rm "$pid_file" - echo "Wiki JSON 上传任务已停止" - else - echo "Wiki JSON 上传任务未运行" - rm "$pid_file" - fi -} - -# 重新解析失败的文档 -reparse_failed_documents() { - local pid_file="${PID_DIR}/reparse_failed_docs.pid" - - # 检查是否已经在运行 - if [ -f "$pid_file" ]; then - PID=$(cat "$pid_file") - if ps -p $PID > /dev/null 2>&1; then - echo "重新解析失败文档任务已经在运行 (PID: $PID)" - return 1 - fi - fi - - echo "启动重新解析失败文档任务..." - cd "${WORKSPACE_FOLDER}" - - # 默认参数(可通过环境变量覆盖) - local api_key="${API_KEY:-}" - local host="${HOST:-http://127.0.0.1:9380}" - local dataset_id="${DATASET_ID:-}" - local batch_size="${BATCH_SIZE:-1000}" - - if [ -z "$dataset_id" ]; then - echo "错误: 必须设置 DATASET_ID 环境变量" - return 1 - fi - - nohup env \ - PYTHONPATH="${WORKSPACE_FOLDER}" \ - "${PYTHON}" "${REPARSE_FAILED_DOCS}" \ - -k "${api_key}" \ - -H "${host}" \ - -i "${dataset_id}" \ - -b "${batch_size}" > "${LOG_DIR}/reparse_failed_docs.log" 2>&1 & - - REPARSE_PID=$! - echo $REPARSE_PID > "$pid_file" - echo "重新解析失败文档任务已启动 (PID: $REPARSE_PID)" - echo "日志文件: ${LOG_DIR}/reparse_failed_docs.log" -} - -# 停止重新解析失败文档任务 -stop_reparse_failed_documents() { - local pid_file="${PID_DIR}/reparse_failed_docs.pid" - - if [ ! -f "$pid_file" ]; then - echo "重新解析失败文档任务未运行" - return 1 - fi - - PID=$(cat "$pid_file") - if ps -p $PID > /dev/null 2>&1; then - echo "停止重新解析失败文档任务 (PID: $PID)..." - kill $PID - rm "$pid_file" - echo "重新解析失败文档任务已停止" - else - echo "重新解析失败文档任务未运行" - rm "$pid_file" - fi -} - -# 查看工具任务状态 -status() { - echo "=== 工具任务状态 ===" - - # Wiki JSON Upload - local upload_pid_file="${PID_DIR}/upload_wiki_json.pid" - if [ -f "$upload_pid_file" ]; then - PID=$(cat "$upload_pid_file") - if ps -p $PID > /dev/null 2>&1; then - echo "Wiki JSON Upload: 运行中 (PID: $PID)" - else - echo "Wiki JSON Upload: 未运行" - fi - else - echo "Wiki JSON Upload: 未运行" - fi - - # Reparse Failed Documents - echo "" - local reparse_pid_file="${PID_DIR}/reparse_failed_docs.pid" - if [ -f "$reparse_pid_file" ]; then - PID=$(cat "$reparse_pid_file") - if ps -p $PID > /dev/null 2>&1; then - echo "Reparse Failed Documents: 运行中 (PID: $PID)" - else - echo "Reparse Failed Documents: 未运行" - fi - else - echo "Reparse Failed Documents: 未运行" - fi -} - -# 主函数 -case "$1" in - upload-wiki) - upload_wiki_json - ;; - stop-upload-wiki) - stop_upload_wiki_json - ;; - reparse-failed) - reparse_failed_documents - ;; - stop-reparse-failed) - stop_reparse_failed_documents - ;; - status) - status - ;; - *) - echo "使用方法: $0 {upload-wiki|stop-upload-wiki|reparse-failed|stop-reparse-failed|status}" - echo "" - echo "环境变量:" - echo "" - echo " Wiki 上传相关:" - echo " API_KEY - API Key" - echo " HOST - 服务器地址 (默认: http://127.0.0.1:9380)" - echo " WIKI_DATA_DIR - 数据目录" - echo " DATASET_ID - 数据集 ID (可选)" - echo " BATCH_SIZE - 批量大小 (默认: 1000)" - echo " WIKI_SNAPSHOT_FILE - 快照文件路径 (默认: \${LOG_DIR}/upload_snapshot.json)" - echo " WIKI_ENABLE_RESUME - 是否启用断点续传 (默认: true)" - echo "" - echo " 重新解析失败文档相关:" - echo " API_KEY - API Key" - echo " HOST - 服务器地址 (默认: http://127.0.0.1:9380)" - echo " DATASET_ID - 数据集 ID (必需)" - echo " BATCH_SIZE - 批量大小 (默认: 1000)" - echo "" - echo "示例:" - echo " $0 upload-wiki # 上传 Wiki JSON 数据(后台运行,支持断点续传)" - echo " $0 stop-upload-wiki # 停止 Wiki JSON 上传任务" - echo " BATCH_SIZE=2000 $0 upload-wiki # 使用自定义批量大小上传" - echo " WIKI_ENABLE_RESUME=false $0 upload-wiki # 禁用断点续传" - echo " DATASET_ID=xxx $0 reparse-failed # 重新解析指定数据集中失败的文档" - echo " $0 stop-reparse-failed # 停止重新解析失败文档任务" - echo " $0 status # 查看工具任务状态" - exit 1 - ;; -esac -