From bd7bd5b601701260d1d57e18cc87bc9418676c37 Mon Sep 17 00:00:00 2001
From: "keyang.lk" <keyang.lk@sqagpuxdn006013051232.sa128>
Date: Mon, 22 Dec 2025 21:26:01 +0800
Subject: [PATCH 1/3] feat: add deploy.sh for startup from source code; support
 start multi  ragflow_server in dcoker and deploy.sh

# Conflicts:
#	.gitignore
#	scripts/README.md
#	scripts/deploy.sh
---
 .gitignore                               |   14 +-
 api/ragflow_server.py                    |   16 +-
 api/utils/api_utils.py                   |   15 +-
 common/constants.py                      |   11 +-
 common/log_utils.py                      |    9 +-
 docker/.env.example                      |    9 +
 docker/docker-compose-self-hosted-ob.yml |    5 +
 docker/docker-compose.yml                |    2 +
 docker/entrypoint.sh                     |  162 +++-
 docker/nginx/ragflow.conf                |   14 +-
 docker/service_conf.yaml.template        |    7 +-
 mcp/server/server.py                     |   43 +-
 scripts/README.md                        |  222 +++++
 scripts/deploy.sh                        | 1084 ++++++++++++++++++++++
 14 files changed, 1555 insertions(+), 58 deletions(-)
 create mode 100644 scripts/README.md
 create mode 100755 scripts/deploy.sh
diff --git a/.gitignore b/.gitignore
index 1e4b1642b..b720bc966 100644
--- a/.gitignore
+++ b/.gitignore
@@ -201,4 +201,16 @@ conf/local.service_conf.yaml
 docker/.env
 docker/launch_backend_service.sh
 docker/.env.oceanbase
-local.service_conf.yaml
\ No newline at end of file
+<<<<<<< HEAD
+local.service_conf.yaml
+=======
+local.service_conf.yaml
+
+# Generated by scripts/deploy.sh (runtime configs)
+conf/service_conf_ragflow_*.yaml
+nginx_conf/
+
+logs/
+pods/
+upload_wiki_json.pid
+>>>>>>> d641c4e6 (feat: add deploy.sh for startup from source code; support start multi  ragflow_server in dcoker and deploy.sh)
diff --git a/api/ragflow_server.py b/api/ragflow_server.py
index 84e996f97..20f778dc2 100644
--- a/api/ragflow_server.py
+++ b/api/ragflow_server.py
@@ -18,12 +18,15 @@
 # from beartype.claw import beartype_all  # <-- you didn't sign up for this
 # beartype_all(conf=BeartypeConf(violation_type=UserWarning))    # <-- emit warnings from all code
 
+import os
+import logging
+
 from common.log_utils import init_root_logger
 from plugin import GlobalPluginManager
-init_root_logger("ragflow_server")
 
-import logging
-import os
+# Initialize logging as early as possible so show_configs() (INFO logs) won't be dropped.
+# deploy.sh sets RAGFLOW_LOG_BASENAME=ragflow_server_<port> for multi-instance runs.
+init_root_logger(os.environ.get("RAGFLOW_LOG_BASENAME", "ragflow_server"))
 import signal
 import sys
 import time
@@ -73,6 +76,10 @@ def signal_handler(sig, frame):
     sys.exit(0)
 
 if __name__ == '__main__':
+    show_configs()
+    settings.init_settings()
+    settings.print_rag_settings()
+
     logging.info(r"""
         ____   ___    ______ ______ __
        / __ \ /   |  / ____// ____// /____  _      __
@@ -87,9 +94,6 @@ def signal_handler(sig, frame):
     logging.info(
         f'project base: {get_project_base_directory()}'
     )
-    show_configs()
-    settings.init_settings()
-    settings.print_rag_settings()
 
     if RAGFLOW_DEBUGPY_LISTEN > 0:
         logging.info(f"debugpy listen on {RAGFLOW_DEBUGPY_LISTEN}")
diff --git a/api/utils/api_utils.py b/api/utils/api_utils.py
index 34a7c6f84..5656a086a 100644
--- a/api/utils/api_utils.py
+++ b/api/utils/api_utils.py
@@ -32,6 +32,7 @@
 )
 
 from peewee import OperationalError
+from werkzeug.exceptions import NotFound
 
 from common.constants import ActiveEnum
 from api.db.db_models import APIToken
@@ -77,7 +78,7 @@ def serialize_for_json(obj):
 
 
 def get_data_error_result(code=RetCode.DATA_ERROR, message="Sorry! Data missing!"):
-    logging.exception(Exception(message))
+    logging.exception(f"Data error: {message}")
     result_dict = {"code": code, "message": message}
     response = {}
     for key, value in result_dict.items():
@@ -89,7 +90,15 @@ def get_data_error_result(code=RetCode.DATA_ERROR, message="Sorry! Data missing!
 
 
 def server_error_response(e):
-    logging.exception(e)
+    # Handle 404 NotFound errors separately with lower log level
+    is_404 = isinstance(e, NotFound) or getattr(e, "code", None) == 404
+    if is_404:
+        logging.warning(f"404 Not Found: {getattr(e, 'description', str(e))}")
+        return get_json_result(code=RetCode.NOT_FOUND, message=str(e))
+    
+    # For other errors, log at exception level
+    logging.exception(f"Server error: {e}")
+    
     try:
         msg = repr(e).lower()
         if getattr(e, "code", None) == 401 or ("unauthorized" in msg) or ("401" in msg):
@@ -517,7 +526,7 @@ def verify_embedding_availability(embd_id: str, tenant_id: str) -> tuple[bool, R
         if not (is_builtin_model or is_tenant_model):
             return False, get_error_argument_result(f"Unauthorized model: <{embd_id}>")
     except OperationalError as e:
-        logging.exception(e)
+        logging.exception(f"Database operation failed: {e}")
         return False, get_error_data_result(message="Database operation failed")
 
     return True, None
diff --git a/common/constants.py b/common/constants.py
index 6f1fc3e8d..5f678f77f 100644
--- a/common/constants.py
+++ b/common/constants.py
@@ -14,10 +14,19 @@
 #  limitations under the License.
 #
 
+import os
 from enum import Enum, IntEnum
 from strenum import StrEnum
 
-SERVICE_CONF = "service_conf.yaml"
+#
+# Allow overriding the service config filename per process.
+# This is useful for running multiple ragflow_server processes in one container,
+# each reading its own `conf/<name>` (and optional `conf/local.<name>`).
+#
+# Example:
+#   RAGFLOW_SERVICE_CONF=service_conf_ragflow_1.yaml python3 api/ragflow_server.py
+#
+SERVICE_CONF = os.getenv("RAGFLOW_SERVICE_CONF", "service_conf.yaml")
 RAG_FLOW_SERVICE_NAME = "ragflow"
 
 class CustomEnum(Enum):
diff --git a/common/log_utils.py b/common/log_utils.py
index abbcd286b..bd4d50436 100644
--- a/common/log_utils.py
+++ b/common/log_utils.py
@@ -20,13 +20,14 @@
 from logging.handlers import RotatingFileHandler
 from common.file_utils import get_project_base_directory
 
-initialized_root_logger = False
+_initialized_loggers = set()
 
 def init_root_logger(logfile_basename: str, log_format: str = "%(asctime)-15s %(levelname)-8s %(process)d %(message)s"):
-    global initialized_root_logger
-    if initialized_root_logger:
+    global _initialized_loggers
+    # Allow re-initialization for different log file names (e.g., multi-instance servers)
+    if logfile_basename in _initialized_loggers:
         return
-    initialized_root_logger = True
+    _initialized_loggers.add(logfile_basename)
 
     logger = logging.getLogger()
     logger.handlers.clear()
diff --git a/docker/.env.example b/docker/.env.example
index 4de7e38cc..6eaf2a3d0 100644
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -47,6 +47,15 @@ ADMIN_SVR_HTTP_PORT=9381
 SVR_MCP_PORT=9382
 POWERRAG_SVR_HTTP_PORT=6000
 
+
+# ------------------------------
+# Multi ragflow_server (multiple processes in one container)
+# ------------------------------
+# Instance 0 listens on SVR_HTTP_PORT.
+# Instances 1.. listen on RAGFLOW_SERVER_EXTRA_BASE_PORT + (idx-1).
+SVR_COUNT=1
+SVR_EXTRA_BASE_HTTP_PORT=9400
+
 # PowerRAG server url, used to generate image links.
 # Format: 'http://<HOST-IP>:$POWERRAG_SVR_HTTP_PORT'
 PUBLIC_SERVER_URL=
diff --git a/docker/docker-compose-self-hosted-ob.yml b/docker/docker-compose-self-hosted-ob.yml
index 2c7607fed..f68ea96bd 100644
--- a/docker/docker-compose-self-hosted-ob.yml
+++ b/docker/docker-compose-self-hosted-ob.yml
@@ -30,7 +30,12 @@ services:
       - "host.docker.internal:host-gateway"
 
   # Gotenberg service for document conversion
+  # Optional (disabled by default). Enable with:
+  #   docker compose --profile gotenberg up -d
+  # or:
+  #   COMPOSE_PROFILES=gotenberg docker compose up -d
   gotenberg:
+    profiles: ["gotenberg"]
     image: gotenberg/gotenberg:8
     env_file: .env
     environment:
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
index 6e38ed641..e8ee7e557 100644
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -11,6 +11,8 @@ services:
       - ${ADMIN_SVR_HTTP_PORT}:9381
       - ${SVR_MCP_PORT}:9382 # entry for MCP (host_port:docker_port). The docker_port must match the value you set for `mcp-port` above.
       - ${POWERRAG_SVR_HTTP_PORT:-6000}:6000 # entry for PowerRAG server (host_port:docker_port). The docker_port must match the value you set for `powerrag-port` above.
+      # Optional: expose extra ragflow_server instances (default ports: 9400..)
+      # - 9400-9403:9400-9403
     volumes:
       - ./ragflow-logs:/ragflow/logs
       - ./nginx/ragflow.conf:/etc/nginx/conf.d/ragflow.conf
diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh
index 7883337ec..d251490cb 100755
--- a/docker/entrypoint.sh
+++ b/docker/entrypoint.sh
@@ -30,15 +30,35 @@ function usage() {
     exit 1
 }
 
-ENABLE_WEBSERVER=1 # Default to enable web server
-ENABLE_TASKEXECUTOR=1  # Default to enable task executor
-ENABLE_DATASYNC=1
-ENABLE_MCP_SERVER=0
-ENABLE_ADMIN_SERVER=0 # Default close admin server
-ENABLE_POWERRAG_SERVER=1 # Default close PowerRAG server
-CONSUMER_NO_BEG=0
-CONSUMER_NO_END=0
-WORKERS=1
+ENABLE_WEBSERVER=${ENABLE_WEBSERVER:-1} # Default to enable web server
+ENABLE_TASKEXECUTOR=${ENABLE_TASKEXECUTOR:-1}  # Default to enable task executor
+ENABLE_DATASYNC=${ENABLE_DATASYNC:-1}
+ENABLE_MCP_SERVER=${ENABLE_MCP_SERVER:-0}
+ENABLE_ADMIN_SERVER=${ENABLE_ADMIN_SERVER:-0} # Default close admin server
+ENABLE_POWERRAG_SERVER=${ENABLE_POWERRAG_SERVER:-1} # Default close PowerRAG server
+CONSUMER_NO_BEG=${CONSUMER_NO_BEG:-0}
+CONSUMER_NO_END=${CONSUMER_NO_END:-0}
+WORKERS=${WORKERS:-1}
+
+# -----------------------------------------------------------------------------
+# Multi ragflow_server support (multiple processes in one container)
+#
+# Notes:
+# - ragflow_server reads its listen port from conf/${RAGFLOW_SERVICE_CONF:-service_conf.yaml}
+# - We generate multiple config files (service_conf_ragflow_<idx>.yaml) with different ports
+# - We start multiple ragflow_server processes, each with its own RAGFLOW_SERVICE_CONF
+# -----------------------------------------------------------------------------
+#
+# Env vars:
+# - SVR_COUNT
+# - SVR_HTTP_PORT
+# - SVR_EXTRA_BASE_HTTP_PORT
+# - ADMIN_SVR_HTTP_PORT
+SVR_COUNT="${SVR_COUNT:-1}"
+SVR_HTTP_PORT="${SVR_HTTP_PORT:-9380}"
+# Extra instances will listen on: SVR_EXTRA_BASE_HTTP_PORT + (idx-1)
+SVR_EXTRA_BASE_HTTP_PORT="${SVR_EXTRA_BASE_HTTP_PORT:-9400}"
+ADMIN_SVR_HTTP_PORT="${ADMIN_SVR_HTTP_PORT:-9381}"
 
 MCP_HOST="127.0.0.1"
 MCP_PORT=9382
@@ -156,16 +176,64 @@ for arg in "$@"; do
 done
 
 # -----------------------------------------------------------------------------
-# Replace env variables in the service_conf.yaml file
+# Render service config(s) from template
 # -----------------------------------------------------------------------------
 CONF_DIR="/ragflow/conf"
 TEMPLATE_FILE="${CONF_DIR}/service_conf.yaml.template"
 CONF_FILE="${CONF_DIR}/service_conf.yaml"
 
-rm -f "${CONF_FILE}"
-while IFS= read -r line || [[ -n "$line" ]]; do
-    eval "echo \"$line\"" >> "${CONF_FILE}"
-done < "${TEMPLATE_FILE}"
+#
+# -----------------------------------------------------------------------------
+# Ensure a stable SECRET_KEY across multiple ragflow_server processes.
+#
+# Why:
+# - Auth tokens are signed with settings.SECRET_KEY (derived from RAGFLOW_SECRET_KEY
+#   or conf ragflow.secret_key). If multiple ragflow_server instances in the same
+#   container auto-generate different keys, nginx load-balancing will cause:
+#   "Signature ... does not match" -> 401 -> frontend jumps back to login.
+#
+# Strategy:
+# - If user didn't provide a strong RAGFLOW_SECRET_KEY (>=32 chars), generate ONE
+#   and export it so all child processes share it.
+# - Persist it under /ragflow/conf so restarts inside the same volume keep stable.
+# -----------------------------------------------------------------------------
+#
+function ensure_ragflow_secret_key() {
+    local key_file="${CONF_DIR}/.ragflow_secret_key"
+
+    if [[ -n "${RAGFLOW_SECRET_KEY:-}" && ${#RAGFLOW_SECRET_KEY} -ge 32 ]]; then
+        export RAGFLOW_SECRET_KEY
+        return 0
+    fi
+
+    if [[ -f "${key_file}" ]]; then
+        RAGFLOW_SECRET_KEY="$(cat "${key_file}")"
+    else
+        RAGFLOW_SECRET_KEY="$("$PY" -c 'import secrets; print(secrets.token_hex(32))')"
+        echo -n "${RAGFLOW_SECRET_KEY}" > "${key_file}"
+        chmod 600 "${key_file}" || true
+    fi
+
+    if [[ ${#RAGFLOW_SECRET_KEY} -lt 32 ]]; then
+        echo "ERROR: failed to initialize a strong RAGFLOW_SECRET_KEY" >&2
+        return 1
+    fi
+
+    export RAGFLOW_SECRET_KEY
+}
+
+function render_service_conf() {
+    local out_file="$1"
+    local ragflow_port="$2"
+    local admin_port="$3"
+
+    rm -f "${out_file}"
+    while IFS= read -r line || [[ -n "$line" ]]; do
+        # shellcheck disable=SC2034
+        SVR_HTTP_PORT="${ragflow_port}" ADMIN_SVR_HTTP_PORT="${admin_port}" \
+          eval "echo \"$line\"" >> "${out_file}"
+    done < "${TEMPLATE_FILE}"
+}
 
 export LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu/"
 PY=python3
@@ -208,6 +276,60 @@ function start_powerrag_server() {
     done &
 }
 
+function _prepare_multi_ragflow_confs() {
+    # Render base service_conf.yaml (used by other processes that don't set RAGFLOW_SERVICE_CONF)
+    render_service_conf "${CONF_FILE}" "${SVR_HTTP_PORT}" "${ADMIN_SVR_HTTP_PORT}"
+
+    # Create per-instance configs
+    local idx port conf_name conf_path
+    for (( idx=0; idx<${SVR_COUNT}; idx++ )); do
+        conf_name="service_conf_ragflow_${idx}.yaml"
+        conf_path="${CONF_DIR}/${conf_name}"
+        if [[ "${idx}" -eq 0 ]]; then
+            port="${SVR_HTTP_PORT}"
+        else
+            port=$((SVR_EXTRA_BASE_HTTP_PORT + idx - 1))
+        fi
+        render_service_conf "${conf_path}" "${port}" "${ADMIN_SVR_HTTP_PORT}"
+    done
+}
+
+function _start_ragflow_instance() {
+    local idx="$1"
+    local port="$2"
+    local conf_name="$3"
+
+    echo "Starting ragflow_server[${idx}] on ${port} using conf/${conf_name} ..."
+    # Align with scripts/deploy.sh:
+    # - run without restart loop (process supervision is external to entrypoint)
+    # - set per-instance logfile basename so logs are split by port
+    RAGFLOW_SERVICE_CONF="${conf_name}" \
+    RAGFLOW_LOG_BASENAME="ragflow_server_${port}" \
+    "$PY" api/ragflow_server.py &
+}
+
+function start_ragflow_servers() {
+    ensure_ragflow_secret_key
+    _prepare_multi_ragflow_confs
+
+    # Generate nginx upstream include files so nginx can proxy/load-balance to all instances
+    : > /etc/nginx/conf.d/ragflow_upstream.conf
+    : > /etc/nginx/conf.d/admin_upstream.conf
+    echo "server 127.0.0.1:${ADMIN_SVR_HTTP_PORT};" >> /etc/nginx/conf.d/admin_upstream.conf
+
+    local idx port conf_name
+    for (( idx=0; idx<${SVR_COUNT}; idx++ )); do
+        conf_name="service_conf_ragflow_${idx}.yaml"
+        if [[ "${idx}" -eq 0 ]]; then
+            port="${SVR_HTTP_PORT}"
+        else
+            port=$((SVR_EXTRA_BASE_HTTP_PORT + idx - 1))
+        fi
+        echo "server 127.0.0.1:${port};" >> /etc/nginx/conf.d/ragflow_upstream.conf
+        _start_ragflow_instance "${idx}" "${port}" "${conf_name}"
+    done
+}
+
 function ensure_docling() {
     [[ "${USE_DOCLING}" == "true" ]] || { echo "[docling] disabled by USE_DOCLING"; return 0; }
     python3 -c 'import pip' >/dev/null 2>&1 || python3 -m ensurepip --upgrade || true
@@ -257,15 +379,13 @@ ensure_docling
 ensure_mineru
 
 if [[ "${ENABLE_WEBSERVER}" -eq 1 ]]; then
+    echo "Starting ragflow_server..."
+    start_ragflow_servers
+
+    # nginx upstream include files are generated by start_ragflow_servers;
+    # start nginx after generation so it picks them up (no reload needed).
     echo "Starting nginx..."
     /usr/sbin/nginx
-
-    echo "Starting ragflow_server..."
-    while true; do
-        "$PY" api/ragflow_server.py &
-        wait;
-        sleep 1;
-    done &
 fi
 
 if [[ "${ENABLE_DATASYNC}" -eq 1 ]]; then
diff --git a/docker/nginx/ragflow.conf b/docker/nginx/ragflow.conf
index a06098276..e44ae21cc 100644
--- a/docker/nginx/ragflow.conf
+++ b/docker/nginx/ragflow.conf
@@ -1,3 +1,13 @@
+upstream ragflow_upstream {
+    # generated by entrypoint.sh
+    include /etc/nginx/conf.d/ragflow_upstream.conf;
+}
+
+upstream admin_upstream {
+    # generated by entrypoint.sh
+    include /etc/nginx/conf.d/admin_upstream.conf;
+}
+
 server {
     listen 80;
     server_name _;
@@ -11,12 +21,12 @@ server {
     gzip_disable "MSIE [1-6]\.";
 
     location ~ ^/api/v1/admin {
-        proxy_pass http://localhost:9381;
+        proxy_pass http://admin_upstream;
         include proxy.conf;
     }
 
     location ~ ^/(v1|api) {
-        proxy_pass http://localhost:9380;
+        proxy_pass http://ragflow_upstream;
         include proxy.conf;
     }
 
diff --git a/docker/service_conf.yaml.template b/docker/service_conf.yaml.template
index a46484043..208f8bc1c 100644
--- a/docker/service_conf.yaml.template
+++ b/docker/service_conf.yaml.template
@@ -1,9 +1,10 @@
 ragflow:
   host: ${RAGFLOW_HOST:-0.0.0.0}
-  http_port: 9380
+  http_port: ${SVR_HTTP_PORT:-9380}
+  secret_key: '${RAGFLOW_SECRET_KEY}'
 admin:
   host: ${RAGFLOW_HOST:-0.0.0.0}
-  http_port: 9381
+  http_port: ${ADMIN_SVR_HTTP_PORT:-9381}
 mysql:
   name: '${MYSQL_DBNAME:-rag_flow}'
   user: '${MYSQL_USER:-root}'
@@ -31,7 +32,7 @@ infinity:
 redis:
   db: 1
   password: '${REDIS_PASSWORD:-infini_rag_flow}'
-  host: '${REDIS_HOST:-redis}:6379'
+  host: '${REDIS_HOST:-redis}:${REDIS_PORT:-6379}'
 oceanbase:
   scheme: 'mysql' # set 'mysql' to create connection using mysql config
   config:
diff --git a/mcp/server/server.py b/mcp/server/server.py
index 8d0d12c25..1cbeb29b0 100644
--- a/mcp/server/server.py
+++ b/mcp/server/server.py
@@ -34,6 +34,11 @@
 import mcp.types as types
 from mcp.server.lowlevel import Server
 
+try:
+    # In this repo, PYTHONPATH is set to project root so this import works.
+    from common.log_utils import init_root_logger
+except Exception:  # pragma: no cover
+    init_root_logger = None
 
 class LaunchMode(StrEnum):
     SELF_HOST = "self-host"
@@ -629,6 +634,11 @@ def parse_bool_flag(key: str, default: bool) -> bool:
     TRANSPORT_STREAMABLE_HTTP_ENABLED = parse_bool_flag("RAGFLOW_MCP_TRANSPORT_STREAMABLE_ENABLED", transport_streamable_http_enabled)
     JSON_RESPONSE = parse_bool_flag("RAGFLOW_MCP_JSON_RESPONSE", json_response)
 
+    # Initialize file logging (avoid relying on nohup stdout redirection).
+    # Use port in basename for multi-instance clarity.
+    if init_root_logger is not None:
+        init_root_logger(f"mcp_server_{PORT}")
+
     if MODE == LaunchMode.SELF_HOST and not HOST_API_KEY:
         raise click.UsageError("--api-key is required when --mode is 'self-host'")
 
@@ -638,42 +648,41 @@ def parse_bool_flag(key: str, default: bool) -> bool:
     if not TRANSPORT_STREAMABLE_HTTP_ENABLED and JSON_RESPONSE:
         JSON_RESPONSE = False
 
-    print(
+    logging.info(
         r"""
 __  __  ____ ____       ____  _____ ______     _______ ____
 |  \/  |/ ___|  _ \     / ___|| ____|  _ \ \   / / ____|  _ \
 | |\/| | |   | |_) |    \___ \|  _| | |_) \ \ / /|  _| | |_) |
 | |  | | |___|  __/      ___) | |___|  _ < \ V / | |___|  _ <
 |_|  |_|\____|_|        |____/|_____|_| \_\ \_/  |_____|_| \_\
-        """,
-        flush=True,
+        """
     )
-    print(f"MCP launch mode: {MODE}", flush=True)
-    print(f"MCP host: {HOST}", flush=True)
-    print(f"MCP port: {PORT}", flush=True)
-    print(f"MCP base_url: {BASE_URL}", flush=True)
+    logging.info("MCP launch mode: %s", MODE)
+    logging.info("MCP host: %s", HOST)
+    logging.info("MCP port: %s", PORT)
+    logging.info("MCP base_url: %s", BASE_URL)
 
     if not any([TRANSPORT_SSE_ENABLED, TRANSPORT_STREAMABLE_HTTP_ENABLED]):
-        print("At least one transport should be enabled, enable streamable-http automatically", flush=True)
+        logging.warning("At least one transport should be enabled, enable streamable-http automatically")
         TRANSPORT_STREAMABLE_HTTP_ENABLED = True
 
     if TRANSPORT_SSE_ENABLED:
-        print("SSE transport enabled: yes", flush=True)
-        print("SSE endpoint available at /sse", flush=True)
+        logging.info("SSE transport enabled: yes")
+        logging.info("SSE endpoint available at /sse")
     else:
-        print("SSE transport enabled: no", flush=True)
+        logging.info("SSE transport enabled: no")
 
     if TRANSPORT_STREAMABLE_HTTP_ENABLED:
-        print("Streamable HTTP transport enabled: yes", flush=True)
-        print("Streamable HTTP endpoint available at /mcp", flush=True)
+        logging.info("Streamable HTTP transport enabled: yes")
+        logging.info("Streamable HTTP endpoint available at /mcp")
         if JSON_RESPONSE:
-            print("Streamable HTTP mode: JSON response enabled", flush=True)
+            logging.info("Streamable HTTP mode: JSON response enabled")
         else:
-            print("Streamable HTTP mode: SSE over HTTP enabled", flush=True)
+            logging.info("Streamable HTTP mode: SSE over HTTP enabled")
     else:
-        print("Streamable HTTP transport enabled: no", flush=True)
+        logging.info("Streamable HTTP transport enabled: no")
         if JSON_RESPONSE:
-            print("Warning: --json-response ignored because streamable transport is disabled.", flush=True)
+            logging.warning("--json-response ignored because streamable transport is disabled.")
 
     uvicorn.run(
         create_starlette_app(),
diff --git a/scripts/README.md b/scripts/README.md
new file mode 100644
index 000000000..af33b5a5a
--- /dev/null
+++ b/scripts/README.md
@@ -0,0 +1,222 @@
+# RAGFlow 脚本使用指南
+
+本目录包含 RAGFlow 的运维部署脚本和工具脚本，用于管理服务部署和数据处理任务。
+
+## 脚本说明
+
+### 1. `deploy.sh` - 运维部署脚本（已按 `docker/entrypoint.sh` 重构）
+
+现在 `deploy.sh` 采用 **entrypoint 风格的组件开关参数**，用 `start/stop/status` 作为动作，用 `--enable/--disable/--xxx=` 作为配置。
+
+**支持的动作：**
+- `start` - 启动组件（默认启动 `webserver + taskexecutor`；datasync 默认不启动）
+- `stop` - 停止本脚本启动的组件（基于 `pids/`）
+- `restart` - stop + start
+- `status` - 查看状态
+- `force-stop` - 强制停止相关进程（不依赖 PID 文件，谨慎使用）
+- `help` - 查看帮助
+
+**主要组件开关（对齐 `docker/entrypoint.sh`）：**
+- `--enable-webserver` - 启动 WebServer（`ragflow_server` + 可选 nginx 前端）
+- `--disable-webserver` - 不启动 WebServer（`ragflow_server` + 可选 nginx 前端）
+- `--enable-taskexecutor` - 启动 task executor
+- `--disable-taskexecutor` - 不启动 task executor
+- `--enable-datasync` - 启动 datasource sync（默认不启动）
+- `--disable-datasync` - 不启动 datasource sync
+- `--enable-mcpserver` - 启动 MCP Server
+- `--enable-adminserver` - 启动 Admin Server
+- `--enable-powerragserver` - 启动 PowerRAG Server
+
+> 规则：如果 `start` 时带了任意 `--enable-*` 参数，则进入 **enable-only 模式**：只启动被 enable 的组件，其它组件都不会启动。  
+> 不带任何 `--enable-*` 时，走默认模式：启动 `webserver + taskexecutor`。
+
+**多 ragflow_server 支持（对齐 `docker/entrypoint.sh`）：**
+- `--svr-count=<num>`：实例数（默认 1）
+- `--svr-http-port=<num>`：实例 0 端口（默认 9380）
+- `--svr-extra-base-http-port=<num>`：实例 1..N 端口基数（默认 9400，即 9400、9401...）
+
+  **注意**：ragflow_server 端口不能与 admin-svr-http-port、mcp-port、powerrag-port、web-port 冲突。脚本会在启动前检查端口冲突并报错。
+- `--admin-svr-http-port=<num>`：写入生成的配置 `admin.http_port`（默认 9381）
+- `--service-conf=<filename>`：基础配置文件（默认 `conf/service_conf.yaml`）
+
+> 说明：脚本会在 `conf/` 下生成 `service_conf_ragflow_<idx>.yaml`，并通过环境变量 `RAGFLOW_SERVICE_CONF` 启动对应实例。
+
+**Task Executor（消费者）配置：**
+- `--consumer-no-beg=<num>`
+- `--consumer-no-end=<num>`：半开区间 `[beg, end)`
+- `--workers=<num>`：如果未指定 range，则启动固定数量 worker（默认 1）
+- `--host-id=<string>`：默认 `hostname`（长度 > 32 则 md5）
+
+**MCP 配置：**
+- `--mcp-host=<ip>`
+- `--mcp-port=<num>`
+- `--mcp-base-url=<url>`
+- `--mcp-mode=<self-host|hosted>`
+- `--mcp-host-api-key=<string>`
+- `--no-transport-sse-enabled`
+- `--no-transport-streamable-http-enabled`
+- `--no-json-response`
+
+**PowerRAG 配置：**
+- `--powerrag-port=<num>`（默认 6000）
+
+**兼容性：**
+- 不再提供单独的 `start-web/stop-web` 命令；前端 nginx（静态 + 反代 API）随 `webserver` 一起启动。
+
+### 2. `tools.sh` - 工具脚本
+
+用于执行数据上传和处理相关的工具任务。
+
+**支持的命令：**
+- `upload-wiki` - 上传 Wiki JSON 数据（后台运行，支持断点续传）
+- `stop-upload-wiki` - 停止 Wiki JSON 上传任务
+- `reparse-failed` - 重新解析指定数据集中失败的文档
+- `stop-reparse-failed` - 停止重新解析失败文档任务
+- `status` - 查看工具任务状态
+
+## 快速开始
+
+### 运维部署（deploy.sh）
+
+```bash
+# 启动默认组件：webserver + taskexecutor（datasync 默认不启动）
+./scripts/deploy.sh start
+
+# 查看状态
+./scripts/deploy.sh status
+
+# 停止
+./scripts/deploy.sh stop
+
+# 强制停止（不依赖 pid 文件，谨慎使用）
+./scripts/deploy.sh force-stop
+
+# 清理运行时生成文件（会先 stop，再删除 logs/、pids/、nginx_conf/、conf/service_conf_ragflow_*.yaml）
+./scripts/deploy.sh clear
+```
+
+#### 启动多个 ragflow_server 和 多个 task executors
+```bash
+./scripts/deploy.sh start \
+  --svr-count=2 \
+  --svr-http-port=9380 \
+  --svr-extra-base-http-port=9400 \
+  --workers=2
+```
+
+#### 仅启动 ragflow_server（不启动 worker/datasync）
+
+```bash
+./scripts/deploy.sh start --enable-webserver
+```
+
+#### 启动多个 ragflow_server 实例（多端口）
+
+```bash
+./scripts/deploy.sh start \
+  --enable-webserver
+  --svr-count=3 \
+  --svr-http-port=9380 \
+  --svr-extra-base-http-port=9400 
+```
+
+#### 启动 task executors（固定数量）
+
+```bash
+./scripts/deploy.sh start --enable-taskexecutor --workers=2
+```
+
+#### 启动 task executors（range 模式）
+
+```bash
+./scripts/deploy.sh start --enable-taskexecutor\
+  --consumer-no-beg=0 --consumer-no-end=5 \
+  --host-id=myhost123
+```
+
+#### 启动 MCP / Admin / PowerRAG
+
+```bash
+./scripts/deploy.sh start --enable-mcpserver --enable-adminserver --enable-powerragserver
+./scripts/deploy.sh start --enable-powerragserver --powerrag-port=6000
+```
+
+### 工具脚本（tools.sh）
+
+```bash
+# 上传 Wiki JSON 数据
+./scripts/tools.sh upload-wiki
+
+# 使用自定义参数上传
+API_KEY=xxx HOST=xxx WIKI_DATA_DIR=xxx BATCH_SIZE=1000 ./scripts/tools.sh upload-wiki
+WIKI_ENABLE_RESUME=false ./scripts/tools.sh upload-wiki
+
+# 停止上传任务
+./scripts/tools.sh stop-upload-wiki
+
+# 重新解析失败的文档（需要设置数据集 ID）
+API_KEY=xxx HOST=xxx DATASET_ID=xxx BATCH_SIZE=1000 ./scripts/tools.sh reparse-failed
+
+# 停止重新解析任务
+./scripts/tools.sh stop-reparse-failed
+
+# 查看工具任务状态
+./scripts/tools.sh status
+```
+
+## 日志与 PID
+
+### 服务日志（默认在 `logs/`）
+
+**注意**：服务日志由各服务通过 `init_root_logger()` 自行管理，脚本不再重复记录日志。
+
+- `logs/ragflow_server_{port}.log` - RAGFlow 服务日志（按端口号区分，例如：`logs/ragflow_server_9380.log`）
+- `logs/task_executor_{id}.log` - Task executor 日志（例如：`logs/task_executor_0.log`）
+- `logs/data_sync_{consumer_no}.log` - Data sync 日志
+- `logs/admin_service.log` - Admin 服务日志
+- `logs/powerrag_server.log` - PowerRAG 服务日志
+- `logs/nginx_access.log` - Nginx 访问日志
+- `logs/nginx_error.log` - Nginx 错误日志
+- `logs/web_frontend.log` - Nginx 启动日志（仅启动时的输出）
+
+### PID 文件（默认在 `pids/`）
+
+- `pids/ragflow_server_<port>.pid`
+- `pids/task_executor_<id>.pid`
+- `pids/datasync.pid`
+- `pids/admin_server.pid`
+- `pids/mcp_server.pid`
+- `pids/powerrag_server.pid`
+- `pids/web_frontend.pid`
+
+## 查看日志
+
+```bash
+# RAGFlow 服务（根据端口号）
+tail -f logs/ragflow_server_9380.log
+tail -f logs/ragflow_server_9400.log  # 如果有多个实例
+
+# Task executor（根据实际的 host_id 和 consumer_id）
+# Task executor（worker id）
+tail -f logs/task_executor_0.log
+
+# Data sync
+tail -f logs/data_sync_0.log
+
+# Admin 服务
+tail -f logs/admin_service.log
+
+# PowerRAG 服务
+tail -f logs/powerrag_server.log
+
+# Nginx 日志
+tail -f logs/nginx_access.log
+tail -f logs/nginx_error.log
+```
+
+## 注意事项
+
+1. 推荐从项目根目录运行：`./scripts/deploy.sh ...`
+2. `force-stop` 会强制 kill 相关进程，请谨慎使用
+3. 多实例 `ragflow_server` 通过 `RAGFLOW_SERVICE_CONF` 启动，不再需要替换 `local.service_conf.yaml`
+4. **端口配置**：设置 ragflow_server 端口时，需要预留 admin-svr-http-port（默认 9381）、mcp-port（默认 9382）等端口，避免冲突。脚本会在启动前检查并报错。
diff --git a/scripts/deploy.sh b/scripts/deploy.sh
new file mode 100755
index 000000000..84cc6ac2f
--- /dev/null
+++ b/scripts/deploy.sh
@@ -0,0 +1,1084 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+# -----------------------------------------------------------------------------
+# RAGFlow 服务部署脚本（运维相关）
+#
+# 目标：参考 docker/entrypoint.sh 的“组件开关 + 多实例 + 端口参数”模式重构。
+#
+# - 默认：启动 webserver(ragflow_server + 可选 nginx)、taskexecutor
+# - 可选：MCP / Admin / PowerRAG
+# - 支持：多 ragflow_server（通过 RAGFLOW_SERVICE_CONF 指向不同 conf 文件）
+# -----------------------------------------------------------------------------
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+WORKSPACE_FOLDER="$(cd "${SCRIPT_DIR}/.." && pwd)"
+
+PYTHON="${PYTHON:-${WORKSPACE_FOLDER}/.venv/bin/python}"
+
+RAGFLOW_SERVER_PY="${WORKSPACE_FOLDER}/api/ragflow_server.py"
+TASK_EXECUTOR_PY="${WORKSPACE_FOLDER}/rag/svr/task_executor.py"
+DATASYNC_PY="${WORKSPACE_FOLDER}/rag/svr/sync_data_source.py"
+ADMIN_SERVER_PY="${WORKSPACE_FOLDER}/admin/server/admin_server.py"
+MCP_SERVER_PY="${WORKSPACE_FOLDER}/mcp/server/server.py"
+POWERRAG_SERVER_PY="${WORKSPACE_FOLDER}/powerrag/server/powerrag_server.py"
+
+CONF_DIR="${WORKSPACE_FOLDER}/conf"
+GLOBAL_SERVICE_CONF="${GLOBAL_SERVICE_CONF:-local.service_conf.yaml}"
+
+WEB_DIR="${WORKSPACE_FOLDER}/web"
+NGINX_CONF_DIR="${WORKSPACE_FOLDER}/nginx_conf"
+
+LOG_DIR="${WORKSPACE_FOLDER}/logs"
+PID_DIR="${WORKSPACE_FOLDER}/pids"
+mkdir -p "${LOG_DIR}" "${PID_DIR}" "${NGINX_CONF_DIR}"
+
+# -----------------------------------------------------------------------------
+# Stable SECRET_KEY for auth token signing across multiple ragflow_server instances
+#
+# Why:
+# - Login returns a signed token in `Authorization` header.
+# - If multiple ragflow_server processes use different SECRET_KEY, nginx/clients
+#   will hit different instances and get "Signature ... does not match" -> 401,
+#   causing frontend to jump back to login.
+#
+# Strategy:
+# - Prefer externally provided env RAGFLOW_SECRET_KEY (>= 32 chars).
+# - Otherwise generate ONE and persist to conf/.ragflow_secret_key, then export it
+#   so all child processes started by this script share it.
+# -----------------------------------------------------------------------------
+function ensure_ragflow_secret_key() {
+  local key_file="${CONF_DIR}/.ragflow_secret_key"
+
+  if [[ -n "${RAGFLOW_SECRET_KEY:-}" && ${#RAGFLOW_SECRET_KEY} -ge 32 ]]; then
+    export RAGFLOW_SECRET_KEY
+    return 0
+  fi
+
+  if [[ -f "${key_file}" ]]; then
+    RAGFLOW_SECRET_KEY="$(cat "${key_file}")"
+  else
+    _require_python
+    RAGFLOW_SECRET_KEY="$("${PYTHON}" -c 'import secrets; print(secrets.token_hex(32))')"
+    echo -n "${RAGFLOW_SECRET_KEY}" > "${key_file}"
+    chmod 600 "${key_file}" 2>/dev/null || true
+  fi
+
+  if [[ ${#RAGFLOW_SECRET_KEY} -lt 32 ]]; then
+    echo "ERROR: failed to initialize a strong RAGFLOW_SECRET_KEY" >&2
+    return 1
+  fi
+
+  export RAGFLOW_SECRET_KEY
+}
+
+# -----------------------------------------------------------------------------
+# Defaults (aligned with docker/entrypoint.sh)
+# -----------------------------------------------------------------------------
+ENABLE_WEBSERVER="${ENABLE_WEBSERVER:-1}"
+ENABLE_TASKEXECUTOR="${ENABLE_TASKEXECUTOR:-1}"
+ENABLE_DATASYNC="${ENABLE_DATASYNC:-0}"
+ENABLE_MCP_SERVER="${ENABLE_MCP_SERVER:-0}"
+ENABLE_ADMIN_SERVER="${ENABLE_ADMIN_SERVER:-0}"
+ENABLE_POWERRAG_SERVER="${ENABLE_POWERRAG_SERVER:-0}"
+
+CONSUMER_NO_BEG="${CONSUMER_NO_BEG:-0}"
+CONSUMER_NO_END="${CONSUMER_NO_END:-0}"
+WORKERS="${WORKERS:-1}"
+
+#
+# Env vars:
+# - SVR_COUNT
+# - SVR_HTTP_PORT
+# - SVR_EXTRA_BASE_HTTP_PORT
+# - ADMIN_SVR_HTTP_PORT
+SVR_COUNT="${SVR_COUNT:-1}"
+SVR_HTTP_PORT="${SVR_HTTP_PORT:-9380}"
+SVR_EXTRA_BASE_HTTP_PORT="${SVR_EXTRA_BASE_HTTP_PORT:-9400}"
+ADMIN_SVR_HTTP_PORT="${ADMIN_SVR_HTTP_PORT:-9381}"
+
+MCP_HOST="${MCP_HOST:-127.0.0.1}"
+MCP_PORT="${MCP_PORT:-9382}"
+MCP_BASE_URL="${MCP_BASE_URL:-http://127.0.0.1:${SVR_HTTP_PORT}}"
+MCP_MODE="${MCP_MODE:-self-host}"
+MCP_HOST_API_KEY="${MCP_HOST_API_KEY:-}"
+MCP_TRANSPORT_SSE_FLAG="${MCP_TRANSPORT_SSE_FLAG:---transport-sse-enabled}"
+MCP_TRANSPORT_STREAMABLE_HTTP_FLAG="${MCP_TRANSPORT_STREAMABLE_HTTP_FLAG:---transport-streamable-http-enabled}"
+MCP_JSON_RESPONSE_FLAG="${MCP_JSON_RESPONSE_FLAG:---json-response}"
+
+POWERRAG_PORT="${POWERRAG_PORT:-6000}"
+
+# Web frontend (nginx) optional
+WEB_PORT="${WEB_PORT:-9222}"
+SERVER_HOST_FOR_WEB="${SERVER_HOST_FOR_WEB:-127.0.0.1}"
+ADMIN_HOST_FOR_WEB="${ADMIN_HOST_FOR_WEB:-127.0.0.1}"
+
+# -----------------------------------------------------------------------------
+# Utilities
+# -----------------------------------------------------------------------------
+function usage() {
+  cat <<'EOF'
+用法:
+  ./scripts/deploy.sh [start|stop|restart|status|force-stop|clear|help] [options]
+
+说明:
+  - start: 启动组件
+      - 默认模式：不带任何 --enable-* 时，启动 webserver + taskexecutor（datasync 默认不启动）
+      - enable-only 模式：只要带了任意 --enable-*，则只启动被 enable 的组件，其它全部不启动
+  - stop: 停止本脚本启动的组件（基于 pids/）
+  - restart: stop + start
+  - status: 查看状态
+  - force-stop: 强制杀进程（不依赖 pid 文件，谨慎使用）
+  - clear: 停止并清理运行时生成文件（logs/、pids/、nginx_conf/、conf/service_conf_ragflow_*.yaml）
+
+核心 options（参考 docker/entrypoint.sh）:
+  --enable-webserver
+  --disable-webserver
+  --enable-taskexecutor
+  --disable-taskexecutor
+  --enable-datasync
+  --disable-datasync
+  --enable-mcpserver
+  --enable-adminserver
+  --enable-powerragserver
+
+Task executor options:
+  --consumer-no-beg=<num>
+  --consumer-no-end=<num>   # 半开区间 [beg, end)
+  --workers=<num>           # 如果未指定 range，则启动 workers 个
+  --host-id=<string>        # 默认：hostname(<=32) 否则 md5(hostname)
+
+Multi ragflow_server:
+  --svr-count=<num>                       # SVR_COUNT
+  --svr-http-port=<num>                   # SVR_HTTP_PORT (idx=0 端口)
+  --svr-extra-base-http-port=<num>        # SVR_EXTRA_BASE_HTTP_PORT (idx>=1: base+(idx-1))
+  --admin-svr-http-port=<num>             # ADMIN_SVR_HTTP_PORT (写入 per-instance conf 里的 admin.http_port)
+  --service-conf=<filename>               # 基础 conf 文件（默认: service_conf.yaml）
+
+MCP options:
+  --mcp-host=<ip>
+  --mcp-port=<num>
+  --mcp-base-url=<url>
+  --mcp-mode=<self-host|hosted>
+  --mcp-host-api-key=<string>
+  --no-transport-sse-enabled
+  --no-transport-streamable-http-enabled
+  --no-json-response
+
+PowerRAG options:
+  --powerrag-port=<num>
+
+说明:
+  - webserver 组件会尝试启动 ragflow_server + nginx(前端静态 + API 反代)（若存在 web/ 目录）。
+EOF
+}
+
+function is_process_running() {
+  local pid="${1:-}"
+  [[ -n "${pid}" ]] && ps -p "${pid}" >/dev/null 2>&1
+}
+
+function _default_host_id() {
+  local hn
+  hn="$(hostname)"
+  if [[ ${#hn} -le 32 ]]; then
+    echo "${hn}"
+    return 0
+  fi
+  if command -v md5sum >/dev/null 2>&1; then
+    echo -n "${hn}" | md5sum | awk '{print $1}'
+    return 0
+  fi
+  "${PYTHON}" - <<PY
+import hashlib, socket
+print(hashlib.md5(socket.gethostname().encode("utf-8")).hexdigest())
+PY
+}
+
+HOST_ID="${HOST_ID:-$(_default_host_id)}"
+
+function _require_python() {
+  if [[ ! -x "${PYTHON}" ]]; then
+    echo "ERROR: python not found/executable: ${PYTHON}" >&2
+    echo "Hint: run ./scripts/setup_venv.sh or set PYTHON=/path/to/python" >&2
+    exit 1
+  fi
+}
+
+function _jemalloc_preload_env() {
+  # best-effort: return "LD_PRELOAD=..." if available
+  if command -v pkg-config >/dev/null 2>&1 && pkg-config --exists jemalloc >/dev/null 2>&1; then
+    local libdir
+    libdir="$(pkg-config --variable=libdir jemalloc 2>/dev/null || true)"
+    if [[ -n "${libdir}" && -f "${libdir}/libjemalloc.so" ]]; then
+      echo "LD_PRELOAD=${libdir}/libjemalloc.so"
+      return 0
+    fi
+  fi
+  if [[ -f "/usr/lib64/libjemalloc.so" ]]; then
+    echo "LD_PRELOAD=/usr/lib64/libjemalloc.so"
+    return 0
+  fi
+  echo ""
+}
+
+function _common_env_kv() {
+  # keep previous local defaults; can be overridden externally
+  local jemalloc_kv
+  jemalloc_kv="$(_jemalloc_preload_env)"
+
+  # Ensure a stable secret key for all python processes started by this script.
+  ensure_ragflow_secret_key
+  echo "RAGFLOW_SECRET_KEY=${RAGFLOW_SECRET_KEY}"
+
+  echo "PYTHONPATH=${WORKSPACE_FOLDER}"
+  echo "DOC_ENGINE=${DOC_ENGINE:-oceanbase}"
+  echo "CACHE_TYPE=${CACHE_TYPE:-redis}"
+  echo "STORAGE_IMPL=${STORAGE_IMPL:-OPENDAL}"
+  echo "NLTK_DATA=${WORKSPACE_FOLDER}/nltk_data"
+  echo "CHROME_DIR=${WORKSPACE_FOLDER}/chrome-linux64"
+  echo "CHROMEDRIVER_DIR=${WORKSPACE_FOLDER}/chromedriver-linux64"
+  echo "TIKA_SERVER_JAR=${WORKSPACE_FOLDER}/tika-server-standard-3.0.0.jar"
+  echo "HUGGINGFACE_DIR=${WORKSPACE_FOLDER}/huggingface.co"
+  echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-/usr/lib/x86_64-linux-gnu/:/usr/lib64/}"
+  echo "TIKTOKEN_CACHE_DIR=${WORKSPACE_FOLDER}"
+  echo "LIGHTEN=${LIGHTEN:-1}"
+  echo "http_proxy="
+  echo "https_proxy="
+  echo "no_proxy="
+  echo "HTTP_PROXY="
+  echo "HTTPS_PROXY="
+  echo "NO_PROXY="
+  if [[ -n "${jemalloc_kv}" ]]; then
+    echo "${jemalloc_kv}"
+  fi
+}
+
+function _start_process() {
+  local name="$1"; shift
+  local pid_file="$1"; shift
+  local workdir="$1"; shift
+  local -a cmd=( "$@" )
+
+  if [[ -f "${pid_file}" ]]; then
+    local pid
+    pid="$(cat "${pid_file}" 2>/dev/null || true)"
+    if is_process_running "${pid}"; then
+      echo "[skip] ${name} already running (PID: ${pid})"
+      return 0
+    fi
+  fi
+
+  mkdir -p "$(dirname "${pid_file}")"
+
+  # quote cmd for bash -c
+  local cmd_quoted=""
+  local arg
+  for arg in "${cmd[@]}"; do
+    cmd_quoted+="$(printf '%q ' "${arg}")"
+  done
+
+  # Run in background without restart loop.
+  # Note: Most services manage their own logs via init_root_logger() (file + stream).
+  # We discard stdout/stderr here to avoid duplicate logging (when stdout is redirected
+  # to another file) and keep only the service-managed logs under logs/.
+  nohup bash -c "
+    set -euo pipefail
+    cd $(printf '%q' "${workdir}")
+    ${cmd_quoted}
+  " >/dev/null 2>&1 &
+
+  local bg_pid=$!
+  echo "${bg_pid}" > "${pid_file}"
+  echo "[ok] started ${name} (PID: ${bg_pid})"
+}
+
+function _stop_by_pidfile() {
+  local name="$1"
+  local pid_file="$2"
+  if [[ ! -f "${pid_file}" ]]; then
+    echo "[skip] ${name} not running (no pid file)"
+    return 0
+  fi
+  local pid
+  pid="$(cat "${pid_file}" 2>/dev/null || true)"
+  if is_process_running "${pid}"; then
+    echo "[stop] ${name} (PID: ${pid})"
+    kill "${pid}" 2>/dev/null || true
+    sleep 0.5
+    if is_process_running "${pid}"; then
+      kill -9 "${pid}" 2>/dev/null || true
+    fi
+  else
+    echo "[skip] ${name} not running (stale pid: ${pid})"
+  fi
+  rm -f "${pid_file}"
+}
+
+function _validate_port() {
+  local port="$1"
+  [[ "${port}" =~ ^[0-9]+$ ]] && [[ "${port}" -ge 1 ]] && [[ "${port}" -le 65535 ]]
+}
+
+function _check_port_conflicts() {
+  local -a reserved_ports=()
+  local -a ragflow_ports=()
+  local port idx
+
+  # Collect reserved ports (admin, mcp, powerrag, web)
+  reserved_ports+=("${ADMIN_SVR_HTTP_PORT}")
+  reserved_ports+=("${MCP_PORT}")
+  reserved_ports+=("${POWERRAG_PORT}")
+  reserved_ports+=("${WEB_PORT}")
+
+  # Collect ragflow_server ports
+  ragflow_ports+=("${SVR_HTTP_PORT}")
+  for (( idx=1; idx<${SVR_COUNT}; idx++ )); do
+    port=$((SVR_EXTRA_BASE_HTTP_PORT + idx - 1))
+    ragflow_ports+=("${port}")
+  done
+
+  # Check ragflow ports against reserved ports
+  for port in "${ragflow_ports[@]}"; do
+    for reserved in "${reserved_ports[@]}"; do
+      if [[ "${port}" -eq "${reserved}" ]]; then
+        echo "ERROR: Port conflict detected: ragflow_server port ${port} conflicts with reserved port ${reserved}" >&2
+        if [[ "${port}" -eq "${ADMIN_SVR_HTTP_PORT}" ]]; then
+          echo "  Hint: SVR_HTTP_PORT or SVR_EXTRA_BASE_HTTP_PORT should not equal ADMIN_SVR_HTTP_PORT (${ADMIN_SVR_HTTP_PORT})" >&2
+        elif [[ "${port}" -eq "${MCP_PORT}" ]]; then
+          echo "  Hint: ragflow_server port conflicts with MCP_PORT (${MCP_PORT})" >&2
+        elif [[ "${port}" -eq "${POWERRAG_PORT}" ]]; then
+          echo "  Hint: ragflow_server port conflicts with POWERRAG_PORT (${POWERRAG_PORT})" >&2
+        elif [[ "${port}" -eq "${WEB_PORT}" ]]; then
+          echo "  Hint: ragflow_server port conflicts with WEB_PORT (${WEB_PORT})" >&2
+        fi
+        return 1
+      fi
+    done
+  done
+
+  # Check for duplicates within ragflow ports
+  local -a seen=()
+  for port in "${ragflow_ports[@]}"; do
+    for seen_port in "${seen[@]}"; do
+      if [[ "${port}" -eq "${seen_port}" ]]; then
+        echo "ERROR: Duplicate ragflow_server port detected: ${port}" >&2
+        echo "  Hint: SVR_COUNT=${SVR_COUNT}, SVR_HTTP_PORT=${SVR_HTTP_PORT}, SVR_EXTRA_BASE_HTTP_PORT=${SVR_EXTRA_BASE_HTTP_PORT}" >&2
+        return 1
+      fi
+    done
+    seen+=("${port}")
+  done
+
+  return 0
+}
+
+# -----------------------------------------------------------------------------
+# Config generation (per-instance service conf)
+# -----------------------------------------------------------------------------
+function _render_service_conf_copy() {
+  local out_file="$1"
+  local ragflow_port="$2"
+  local admin_port="$3"
+  local base_file="${CONF_DIR}/${GLOBAL_SERVICE_CONF}"
+
+  if [[ ! -f "${base_file}" ]]; then
+    echo "ERROR: base service conf not found: ${base_file}" >&2
+    exit 1
+  fi
+
+  "${PYTHON}" - <<PY
+import sys
+import os
+from ruamel.yaml import YAML
+
+base_file = ${base_file@Q}
+out_file = ${out_file@Q}
+ragflow_port = int(${ragflow_port})
+admin_port = int(${admin_port})
+secret_key = os.environ.get("RAGFLOW_SECRET_KEY", "")
+
+yaml = YAML()
+with open(base_file, "r", encoding="utf-8") as f:
+    data = yaml.load(f) or {}
+
+data.setdefault("ragflow", {})
+data["ragflow"]["http_port"] = ragflow_port
+if secret_key:
+    data["ragflow"]["secret_key"] = secret_key
+data.setdefault("admin", {})
+data["admin"]["http_port"] = admin_port
+
+with open(out_file, "w", encoding="utf-8") as f:
+    yaml.dump(data, f)
+PY
+}
+
+function _prepare_multi_ragflow_confs() {
+  local idx port conf_name conf_path
+  for (( idx=0; idx<${SVR_COUNT}; idx++ )); do
+    conf_name="service_conf_ragflow_${idx}.yaml"
+    conf_path="${CONF_DIR}/${conf_name}"
+    if [[ "${idx}" -eq 0 ]]; then
+      port="${SVR_HTTP_PORT}"
+    else
+      port=$((SVR_EXTRA_BASE_HTTP_PORT + idx - 1))
+    fi
+    _render_service_conf_copy "${conf_path}" "${port}" "${ADMIN_SVR_HTTP_PORT}"
+  done
+}
+
+# -----------------------------------------------------------------------------
+# Components
+# -----------------------------------------------------------------------------
+function start_ragflow_servers() {
+  _require_python
+  ensure_ragflow_secret_key
+  _prepare_multi_ragflow_confs
+
+  local idx port conf_name pid_file
+  for (( idx=0; idx<${SVR_COUNT}; idx++ )); do
+    conf_name="service_conf_ragflow_${idx}.yaml"
+    if [[ "${idx}" -eq 0 ]]; then
+      port="${SVR_HTTP_PORT}"
+    else
+      port=$((SVR_EXTRA_BASE_HTTP_PORT + idx - 1))
+    fi
+
+    pid_file="${PID_DIR}/ragflow_server_${port}.pid"
+
+    # Backward compatible pid for main instance
+    if [[ "${idx}" -eq 0 ]]; then
+      ln -sf "$(basename "${pid_file}")" "${PID_DIR}/ragflow_server.pid" 2>/dev/null || true
+    fi
+
+    # env args (newline-separated key=value)
+    local -a env_args
+    mapfile -t env_args < <(_common_env_kv)
+
+    echo "[start] ragflow_server port=${port} conf=${conf_name}"
+    _start_process \
+      "ragflow_server:${port}" \
+      "${pid_file}" \
+      "${WORKSPACE_FOLDER}" \
+      env "${env_args[@]}" "RAGFLOW_SERVICE_CONF=${conf_name}" "RAGFLOW_LOG_BASENAME=ragflow_server_${port}" \
+      "${PYTHON}" "${RAGFLOW_SERVER_PY}"
+  done
+}
+
+function start_datasync() {
+  _require_python
+  local pid_file="${PID_DIR}/datasync.pid"
+  local -a env_args
+  mapfile -t env_args < <(_common_env_kv)
+  _start_process \
+    "datasync" \
+    "${pid_file}" \
+    "${WORKSPACE_FOLDER}" \
+    env "${env_args[@]}" "${PYTHON}" "${DATASYNC_PY}"
+}
+
+function start_admin_server() {
+  _require_python
+  local pid_file="${PID_DIR}/admin_server.pid"
+  local -a env_args
+  mapfile -t env_args < <(_common_env_kv)
+  _start_process \
+    "admin_server" \
+    "${pid_file}" \
+    "${WORKSPACE_FOLDER}" \
+    env "${env_args[@]}" "${PYTHON}" "${ADMIN_SERVER_PY}"
+}
+
+function start_mcp_server() {
+  _require_python
+  local pid_file="${PID_DIR}/mcp_server.pid"
+  local -a env_args
+  mapfile -t env_args < <(_common_env_kv)
+  _start_process \
+    "mcp_server" \
+    "${pid_file}" \
+    "${WORKSPACE_FOLDER}" \
+    env "${env_args[@]}" \
+    "${PYTHON}" "${MCP_SERVER_PY}" \
+      --host="${MCP_HOST}" \
+      --port="${MCP_PORT}" \
+      --base-url="${MCP_BASE_URL}" \
+      --mode="${MCP_MODE}" \
+      --api-key="${MCP_HOST_API_KEY}" \
+      "${MCP_TRANSPORT_SSE_FLAG}" \
+      "${MCP_TRANSPORT_STREAMABLE_HTTP_FLAG}" \
+      "${MCP_JSON_RESPONSE_FLAG}"
+}
+
+function start_powerrag_server() {
+  _require_python
+  local pid_file="${PID_DIR}/powerrag_server.pid"
+  local -a env_args
+  mapfile -t env_args < <(_common_env_kv)
+  _start_process \
+    "powerrag_server" \
+    "${pid_file}" \
+    "${WORKSPACE_FOLDER}" \
+    env "${env_args[@]}" "${PYTHON}" "${POWERRAG_SERVER_PY}" --port="${POWERRAG_PORT}"
+}
+
+function start_task_executor() {
+  _require_python
+  local consumer_id="$1"
+  local pid_file="${PID_DIR}/task_executor_${consumer_id}.pid"
+  local -a env_args
+  mapfile -t env_args < <(_common_env_kv)
+  _start_process \
+    "task_executor[${consumer_id}]" \
+    "${pid_file}" \
+    "${WORKSPACE_FOLDER}" \
+    env "${env_args[@]}" "${PYTHON}" "${TASK_EXECUTOR_PY}" "${consumer_id}"
+}
+
+function start_task_executors() {
+  if [[ "${CONSUMER_NO_END}" -gt "${CONSUMER_NO_BEG}" ]]; then
+    echo "[start] task executors range=[${CONSUMER_NO_BEG},${CONSUMER_NO_END})"
+    local i
+    for (( i=CONSUMER_NO_BEG; i<CONSUMER_NO_END; i++ )); do
+      start_task_executor "${i}"
+    done
+  else
+    echo "[start] task executors workers=${WORKERS}"
+    local i
+    for (( i=0; i<WORKERS; i++ )); do
+      start_task_executor "${i}"
+    done
+  fi
+}
+
+# -----------------------------------------------------------------------------
+# Web frontend nginx (static + API proxy). Managed as part of webserver.
+# -----------------------------------------------------------------------------
+function start_web() {
+  local pid_file="${PID_DIR}/web_frontend.pid"
+
+  if [[ -f "${pid_file}" ]]; then
+    local pid
+    pid="$(cat "${pid_file}" 2>/dev/null || true)"
+    if is_process_running "${pid}"; then
+      echo "[skip] web frontend already running (PID: ${pid}, PORT: ${WEB_PORT})"
+      return 0
+    fi
+  fi
+
+  if [[ ! -d "${WEB_DIR}" ]]; then
+    echo "[warn] web dir not found: ${WEB_DIR}; skip nginx frontend/proxy" >&2
+    return 0
+  fi
+  if ! _validate_port "${WEB_PORT}"; then
+    echo "ERROR: invalid WEB_PORT=${WEB_PORT}" >&2
+    return 1
+  fi
+  if ! command -v nginx >/dev/null 2>&1; then
+    echo "[warn] nginx not found in PATH; skip nginx frontend/proxy" >&2
+    return 0
+  fi
+
+  # If dist missing, try build (best-effort)
+  if [[ ! -d "${WEB_DIR}/dist" ]]; then
+    if command -v npm >/dev/null 2>&1; then
+      echo "[web] dist missing, running build..."
+      if ! (cd "${WEB_DIR}" && npm install && npm run build); then
+        echo "[warn] web build failed; skip nginx frontend/proxy" >&2
+        return 0
+      fi
+    else
+      echo "[warn] ${WEB_DIR}/dist not found and npm not available; skip nginx frontend/proxy" >&2
+      return 0
+    fi
+  fi
+
+  local server_port_for_web="${SVR_HTTP_PORT}"
+  local admin_port_for_web="${ADMIN_SVR_HTTP_PORT}"
+
+  # nginx temp dirs (must be writable for non-root runs)
+  local nginx_tmp_dir="${NGINX_CONF_DIR}/tmp"
+  mkdir -p "${nginx_tmp_dir}/client_body" "${nginx_tmp_dir}/proxy" "${nginx_tmp_dir}/fastcgi" "${nginx_tmp_dir}/uwsgi" "${nginx_tmp_dir}/scgi"
+
+  cat > "${NGINX_CONF_DIR}/ragflow.conf" <<EOF
+server {
+    listen ${WEB_PORT};
+    server_name _;
+    root ${WEB_DIR}/dist;
+
+    gzip on;
+    gzip_min_length 1k;
+    gzip_comp_level 9;
+    gzip_types text/plain application/javascript application/x-javascript text/css application/xml text/javascript application/x-httpd-php image/jpeg image/gif image/png;
+    gzip_vary on;
+    gzip_disable "MSIE [1-6]\\.";
+
+    location ~ ^/api/v1/admin {
+        proxy_pass http://${ADMIN_HOST_FOR_WEB}:${admin_port_for_web};
+        proxy_set_header Host \$host;
+        proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto \$scheme;
+        proxy_http_version 1.1;
+        proxy_set_header Connection "";
+        proxy_buffering off;
+        proxy_read_timeout 3600s;
+        proxy_send_timeout 3600s;
+    }
+
+    location ~ ^/(v1|api) {
+        proxy_pass http://${SERVER_HOST_FOR_WEB}:${server_port_for_web};
+        proxy_set_header Host \$host;
+        proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto \$scheme;
+        proxy_http_version 1.1;
+        proxy_set_header Connection "";
+        proxy_buffering off;
+        proxy_read_timeout 3600s;
+        proxy_send_timeout 3600s;
+    }
+
+    location / {
+        index index.html;
+        try_files \$uri \$uri/ /index.html;
+    }
+
+    location ~ ^/static/(css|js|media)/ {
+        expires 10y;
+        access_log off;
+    }
+}
+EOF
+
+  cat > "${NGINX_CONF_DIR}/nginx.conf" <<EOF
+worker_processes  auto;
+
+events {
+    worker_connections  1024;
+}
+
+http {
+    include       /etc/nginx/mime.types;
+    default_type  application/octet-stream;
+
+    # temp dirs (avoid /var/lib/nginx/tmp/* which requires root)
+    client_body_temp_path ${nginx_tmp_dir}/client_body;
+    proxy_temp_path       ${nginx_tmp_dir}/proxy;
+    fastcgi_temp_path     ${nginx_tmp_dir}/fastcgi;
+    uwsgi_temp_path       ${nginx_tmp_dir}/uwsgi;
+    scgi_temp_path        ${nginx_tmp_dir}/scgi;
+
+    log_format  main  '\$remote_addr - \$remote_user [\$time_local] "\$request" '
+                      '\$status \$body_bytes_sent "\$http_referer" '
+                      '"\$http_user_agent" "\$http_x_forwarded_for"';
+
+    access_log  ${LOG_DIR}/nginx_access.log  main;
+
+    sendfile        on;
+    keepalive_timeout  65;
+    client_max_body_size 1024M;
+
+    include ${NGINX_CONF_DIR}/ragflow.conf;
+}
+EOF
+
+  # NOTE: nginx may try to open the built-in default error log path (often /var/log/nginx/error.log)
+  # before parsing config. Use -e to force a writable error log for non-root runs.
+  if ! nginx -t -c "${NGINX_CONF_DIR}/nginx.conf" -e "${LOG_DIR}/nginx_error.log" -g "pid ${PID_DIR}/nginx.pid;" >/dev/null 2>&1; then
+    echo "[warn] nginx config invalid; skip nginx frontend/proxy" >&2
+    nginx -t -c "${NGINX_CONF_DIR}/nginx.conf" -e "${LOG_DIR}/nginx_error.log" -g "pid ${PID_DIR}/nginx.pid;" >&2 || true
+    return 0
+  fi
+
+  echo "[start] web frontend nginx (PORT: ${WEB_PORT})"
+  nginx -c "${NGINX_CONF_DIR}/nginx.conf" -e "${LOG_DIR}/nginx_error.log" -g "pid ${PID_DIR}/nginx.pid;" > "${LOG_DIR}/web_frontend.log" 2>&1
+
+  if [[ -f "${PID_DIR}/nginx.pid" ]]; then
+    local web_pid
+    web_pid="$(cat "${PID_DIR}/nginx.pid")"
+    echo "${web_pid}" > "${pid_file}"
+    echo "[ok] web frontend started (PID: ${web_pid})"
+  else
+    echo "[warn] nginx.pid not found; check ${LOG_DIR}/web_frontend.log" >&2
+    return 0
+  fi
+}
+
+function stop_web() {
+  _stop_by_pidfile "web_frontend" "${PID_DIR}/web_frontend.pid"
+  # also cleanup nginx pid if present
+  if [[ -f "${PID_DIR}/nginx.pid" ]]; then
+    local pid
+    pid="$(cat "${PID_DIR}/nginx.pid" 2>/dev/null || true)"
+    if is_process_running "${pid}"; then
+      nginx -s quit -c "${NGINX_CONF_DIR}/nginx.conf" 2>/dev/null || kill "${pid}" 2>/dev/null || true
+    fi
+    rm -f "${PID_DIR}/nginx.pid"
+  fi
+}
+
+# -----------------------------------------------------------------------------
+# Stop/Status/Force-stop
+# -----------------------------------------------------------------------------
+function stop_all() {
+  # reverse-ish order
+  _stop_by_pidfile "powerrag_server" "${PID_DIR}/powerrag_server.pid"
+  _stop_by_pidfile "mcp_server" "${PID_DIR}/mcp_server.pid"
+  _stop_by_pidfile "admin_server" "${PID_DIR}/admin_server.pid"
+  _stop_by_pidfile "datasync" "${PID_DIR}/datasync.pid"
+
+  # task executors
+  local f
+  for f in "${PID_DIR}"/task_executor_*.pid; do
+    [[ -f "${f}" ]] || continue
+    _stop_by_pidfile "task_executor" "${f}"
+  done
+
+  # ragflow servers
+  for f in "${PID_DIR}"/ragflow_server_*.pid; do
+    [[ -f "${f}" ]] || continue
+    _stop_by_pidfile "ragflow_server" "${f}"
+  done
+  rm -f "${PID_DIR}/ragflow_server.pid" 2>/dev/null || true
+
+  # optional web frontend nginx
+  stop_web || true
+}
+
+function force_stop_all() {
+  echo "=== force-stop: killing related processes (best-effort) ==="
+  pkill -f "${RAGFLOW_SERVER_PY}" 2>/dev/null || true
+  pkill -f "${TASK_EXECUTOR_PY}" 2>/dev/null || true
+  pkill -f "${DATASYNC_PY}" 2>/dev/null || true
+  pkill -f "${ADMIN_SERVER_PY}" 2>/dev/null || true
+  pkill -f "${MCP_SERVER_PY}" 2>/dev/null || true
+  pkill -f "${POWERRAG_SERVER_PY}" 2>/dev/null || true
+  pkill -f "${NGINX_CONF_DIR}/ragflow.conf" 2>/dev/null || true
+  rm -f "${PID_DIR}"/*.pid 2>/dev/null || true
+}
+
+function clear_runtime_files() {
+  echo "=== clear: stop services and remove generated logs/configs/pids (best-effort) ==="
+
+  # stop services started by this script (based on pids/)
+  stop_all || true
+
+  # generated per-instance service confs
+  rm -f "${CONF_DIR}"/service_conf_ragflow_*.yaml 2>/dev/null || true
+
+  # remove runtime dirs entirely (user expectation for clear)
+  rm -rf "${NGINX_CONF_DIR}" 2>/dev/null || true
+  rm -rf "${PID_DIR}" 2>/dev/null || true
+  rm -rf "${LOG_DIR}" 2>/dev/null || true
+
+  echo "[ok] cleared: logs/, pids/, nginx_conf/, conf/service_conf_ragflow_*.yaml"
+}
+
+function status() {
+  echo "=== status ==="
+
+  echo "config:"
+  echo "  - service_conf(base) = conf/${GLOBAL_SERVICE_CONF}"
+  echo "  - ragflow main port  = ${SVR_HTTP_PORT}"
+  echo "  - ragflow extra base = ${SVR_EXTRA_BASE_HTTP_PORT}"
+  echo "  - admin port         = ${ADMIN_SVR_HTTP_PORT}"
+  echo "  - mcp port           = ${MCP_PORT}"
+  echo "  - web port           = ${WEB_PORT}"
+
+  # ragflow
+  echo "ragflow_server:"
+  local any=0
+  local f pid port idx conf_name conf_path log_path
+  for f in "${PID_DIR}"/ragflow_server_*.pid; do
+    [[ -f "${f}" ]] || continue
+    port="$(basename "${f}" | sed 's/ragflow_server_\(.*\)\.pid/\1/')"
+    pid="$(cat "${f}" 2>/dev/null || true)"
+
+    # best-effort infer conf name from port
+    conf_name="(unknown)"
+    if [[ "${port}" == "${SVR_HTTP_PORT}" ]]; then
+      conf_name="service_conf_ragflow_0.yaml"
+    elif [[ "${port}" =~ ^[0-9]+$ ]]; then
+      idx=$(( port - SVR_EXTRA_BASE_HTTP_PORT + 1 ))
+      if [[ "${idx}" -ge 1 ]]; then
+        conf_name="service_conf_ragflow_${idx}.yaml"
+      fi
+    fi
+    conf_path="conf/${conf_name}"
+    log_path="logs/ragflow_server_${port}.log"
+
+    if is_process_running "${pid}"; then
+      any=1
+      echo "  - [ok] port=${port} pid=${pid} conf=${conf_path} log=${log_path}"
+    else
+      echo "  - [down] port=${port} pid=${pid} conf=${conf_path} log=${log_path}"
+    fi
+  done
+  if [[ "${any}" -eq 0 ]]; then
+    echo "  - (none)"
+  fi
+
+  # web frontend (nginx)
+  echo "web_frontend:"
+  local web_pf="${PID_DIR}/web_frontend.pid"
+  if [[ -f "${web_pf}" ]]; then
+    pid="$(cat "${web_pf}" 2>/dev/null || true)"
+    if is_process_running "${pid}"; then
+      echo "  - [ok] pid=${pid} port=${WEB_PORT} conf=nginx_conf/nginx.conf access_log=logs/nginx_access.log error_log=logs/nginx_error.log"
+    else
+      echo "  - [down] pid=${pid} port=${WEB_PORT} conf=nginx_conf/nginx.conf access_log=logs/nginx_access.log error_log=logs/nginx_error.log"
+    fi
+  else
+    echo "  - (disabled/not started)"
+  fi
+
+  # task executors
+  echo "task_executor:"
+  any=0
+  local args consumer_arg logf
+  for f in "${PID_DIR}"/task_executor_*.pid; do
+    [[ -f "${f}" ]] || continue
+    pid="$(cat "${f}" 2>/dev/null || true)"
+    if is_process_running "${pid}"; then
+      any=1
+      args="$(ps -p "${pid}" -o args= 2>/dev/null || true)"
+      consumer_arg="$(echo "${args}" | awk '{print $NF}')"
+      # If no consumer arg provided, fallback to pid-file id.
+      if [[ -z "${consumer_arg}" || "${consumer_arg}" == *".py" ]]; then
+        consumer_arg="$(basename "${f}" | sed 's/task_executor_\(.*\)\.pid/\1/')"
+      fi
+      logf="logs/task_executor_${consumer_arg}.log"
+      echo "  - [ok] id=$(basename "${f}") pid=${pid} log=${logf}"
+    else
+      echo "  - [down] id=$(basename "${f}") pid=${pid}"
+    fi
+  done
+  if [[ "${any}" -eq 0 ]]; then
+    echo "  - (none)"
+  fi
+
+  # datasync
+  echo "datasync:"
+  local ds_pf="${PID_DIR}/datasync.pid"
+  if [[ -f "${ds_pf}" ]]; then
+    pid="$(cat "${ds_pf}" 2>/dev/null || true)"
+    if is_process_running "${pid}"; then
+      args="$(ps -p "${pid}" -o args= 2>/dev/null || true)"
+      consumer_arg="$(echo "${args}" | awk '{print $NF}')"
+      if [[ -z "${consumer_arg}" || "${consumer_arg}" == *".py" ]]; then
+        consumer_arg="0"
+      fi
+      logf="logs/data_sync_${consumer_arg}.log"
+      echo "  - [ok] pid=${pid} log=${logf}"
+    else
+      echo "  - [down] pid=${pid}"
+    fi
+  else
+    echo "  - (disabled/not started)"
+  fi
+
+  # admin
+  echo "admin_server:"
+  local ad_pf="${PID_DIR}/admin_server.pid"
+  if [[ -f "${ad_pf}" ]]; then
+    pid="$(cat "${ad_pf}" 2>/dev/null || true)"
+    if is_process_running "${pid}"; then
+      echo "  - [ok] pid=${pid} port=${ADMIN_SVR_HTTP_PORT} log=logs/admin_service.log"
+    else
+      echo "  - [down] pid=${pid} port=${ADMIN_SVR_HTTP_PORT} log=logs/admin_service.log"
+    fi
+  else
+    echo "  - (disabled/not started)"
+  fi
+
+  # mcp
+  echo "mcp_server:"
+  local mcp_pf="${PID_DIR}/mcp_server.pid"
+  if [[ -f "${mcp_pf}" ]]; then
+    pid="$(cat "${mcp_pf}" 2>/dev/null || true)"
+    if is_process_running "${pid}"; then
+      # port is from script args by default
+      echo "  - [ok] pid=${pid} port=${MCP_PORT} log=logs/mcp_server_${MCP_PORT}.log"
+    else
+      echo "  - [down] pid=${pid} port=${MCP_PORT} log=logs/mcp_server_${MCP_PORT}.log"
+    fi
+  else
+    echo "  - (disabled/not started)"
+  fi
+
+  # powerrag
+  echo "powerrag_server:"
+  local pr_pf="${PID_DIR}/powerrag_server.pid"
+  if [[ -f "${pr_pf}" ]]; then
+    pid="$(cat "${pr_pf}" 2>/dev/null || true)"
+    if is_process_running "${pid}"; then
+      echo "  - [ok] pid=${pid} port=${POWERRAG_PORT} log=logs/powerrag_server.log"
+    else
+      echo "  - [down] pid=${pid} port=${POWERRAG_PORT} log=logs/powerrag_server.log"
+    fi
+  else
+    echo "  - (disabled/not started)"
+  fi
+}
+
+# -----------------------------------------------------------------------------
+# Arg parsing (entrypoint-like)
+# -----------------------------------------------------------------------------
+ACTION="${1:-start}"
+shift || true
+
+case "${ACTION}" in
+  start|stop|restart|status|force-stop|clear|help) ;;
+  *)
+    # allow calling with only options: ./deploy.sh --disable-taskexecutor ...
+    if [[ "${ACTION}" == --* ]]; then
+      set -- "${ACTION}" "$@"
+      ACTION="start"
+    else
+      echo "Unknown action: ${ACTION}" >&2
+      usage
+      exit 1
+    fi
+    ;;
+esac
+
+# If any --enable-* option is provided, switch to "enable-only" mode:
+# only explicitly enabled components will be started.
+HAS_ENABLE_FLAGS=0
+for arg in "$@"; do
+  if [[ "${arg}" == --enable-* ]]; then
+    HAS_ENABLE_FLAGS=1
+    break
+  fi
+done
+if [[ "${HAS_ENABLE_FLAGS}" -eq 1 ]]; then
+  ENABLE_WEBSERVER=0
+  ENABLE_TASKEXECUTOR=0
+  ENABLE_DATASYNC=0
+  ENABLE_MCP_SERVER=0
+  ENABLE_ADMIN_SERVER=0
+  ENABLE_POWERRAG_SERVER=0
+fi
+
+for arg in "$@"; do
+  case "${arg}" in
+    --enable-webserver) ENABLE_WEBSERVER=1 ;;
+    --disable-webserver) ENABLE_WEBSERVER=0 ;;
+    --enable-taskexecutor) ENABLE_TASKEXECUTOR=1 ;;
+    --disable-taskexecutor) ENABLE_TASKEXECUTOR=0 ;;
+    --disable-datasync) ENABLE_DATASYNC=0 ;;
+    --enable-datasync) ENABLE_DATASYNC=1 ;;
+    --enable-mcpserver) ENABLE_MCP_SERVER=1 ;;
+    --enable-adminserver) ENABLE_ADMIN_SERVER=1 ;;
+    --enable-powerragserver) ENABLE_POWERRAG_SERVER=1 ;;
+    --consumer-no-beg=*) CONSUMER_NO_BEG="${arg#*=}" ;;
+    --consumer-no-end=*) CONSUMER_NO_END="${arg#*=}" ;;
+    --workers=*) WORKERS="${arg#*=}" ;;
+    --host-id=*) HOST_ID="${arg#*=}" ;;
+    --svr-count=*) SVR_COUNT="${arg#*=}" ;;
+    --svr-http-port=*) SVR_HTTP_PORT="${arg#*=}" ;;
+    --svr-extra-base-http-port=*) SVR_EXTRA_BASE_HTTP_PORT="${arg#*=}" ;;
+    --admin-svr-http-port=*) ADMIN_SVR_HTTP_PORT="${arg#*=}" ;;
+
+    --service-conf=*) GLOBAL_SERVICE_CONF="${arg#*=}" ;;
+    --mcp-host=*) MCP_HOST="${arg#*=}" ;;
+    --mcp-port=*) MCP_PORT="${arg#*=}" ;;
+    --mcp-base-url=*) MCP_BASE_URL="${arg#*=}" ;;
+    --mcp-mode=*) MCP_MODE="${arg#*=}" ;;
+    --mcp-host-api-key=*) MCP_HOST_API_KEY="${arg#*=}" ;;
+    --no-transport-sse-enabled) MCP_TRANSPORT_SSE_FLAG="--no-transport-sse-enabled" ;;
+    --no-transport-streamable-http-enabled) MCP_TRANSPORT_STREAMABLE_HTTP_FLAG="--no-transport-streamable-http-enabled" ;;
+    --no-json-response) MCP_JSON_RESPONSE_FLAG="--no-json-response" ;;
+    --powerrag-port=*) POWERRAG_PORT="${arg#*=}" ;;
+    *) echo "Unknown option: ${arg}" >&2; usage; exit 1 ;;
+  esac
+done
+
+# Validate ports early (best-effort)
+for p in "${SVR_HTTP_PORT}" "${SVR_EXTRA_BASE_HTTP_PORT}" "${ADMIN_SVR_HTTP_PORT}" "${MCP_PORT}" "${POWERRAG_PORT}" "${WEB_PORT}"; do
+  if ! _validate_port "${p}"; then
+    echo "ERROR: invalid port: ${p}" >&2
+    exit 1
+  fi
+done
+
+# Check for port conflicts
+if ! _check_port_conflicts; then
+  exit 1
+fi
+
+# -----------------------------------------------------------------------------
+# Main
+# -----------------------------------------------------------------------------
+case "${ACTION}" in
+  help)
+    usage
+    ;;
+
+  start)
+    if [[ "${ENABLE_WEBSERVER}" -eq 1 ]]; then
+      echo "[component] webserver enabled (ragflow_server + nginx frontend/proxy)"
+      start_ragflow_servers
+      # start nginx frontend/proxy when web/ exists (build dist if needed)
+      if [[ -d "${WEB_DIR}" ]]; then
+        start_web
+      else
+        echo "[warn] ${WEB_DIR} not found; skip nginx frontend/proxy"
+      fi
+    else
+      echo "[component] webserver disabled"
+    fi
+
+    if [[ "${ENABLE_DATASYNC}" -eq 1 ]]; then
+      echo "[component] datasync enabled"
+      start_datasync
+    else
+      echo "[component] datasync disabled"
+    fi
+
+    if [[ "${ENABLE_ADMIN_SERVER}" -eq 1 ]]; then
+      echo "[component] admin_server enabled"
+      start_admin_server
+    fi
+
+    if [[ "${ENABLE_MCP_SERVER}" -eq 1 ]]; then
+      echo "[component] mcp_server enabled"
+      start_mcp_server
+    fi
+
+    if [[ "${ENABLE_POWERRAG_SERVER}" -eq 1 ]]; then
+      echo "[component] powerrag_server enabled"
+      start_powerrag_server
+    fi
+
+    if [[ "${ENABLE_TASKEXECUTOR}" -eq 1 ]]; then
+      echo "[component] taskexecutor enabled"
+      start_task_executors
+    else
+      echo "[component] taskexecutor disabled"
+    fi
+    ;;
+
+  stop)
+    stop_all
+    ;;
+
+  restart)
+    stop_all
+    sleep 1
+    "${SCRIPT_DIR}/deploy.sh" start "$@"
+    ;;
+
+  force-stop)
+    force_stop_all
+    ;;
+
+  clear)
+    clear_runtime_files
+    ;;
+
+  status)
+    status
+    ;;
+esac

From 1070dabcac057beaeca67098c3a7bba45914ad86 Mon Sep 17 00:00:00 2001
From: "keyang.lk" <keyang.lk@sqagpuxdn006013051232.sa128>
Date: Fri, 9 Jan 2026 19:40:25 +0800
Subject: [PATCH 2/3] fix: fix several bugs in deploy.sh and entrypoint.sh

---
 .gitignore           |   6 +
 docker/entrypoint.sh |   9 +-
 scripts/deploy.sh    | 498 +++++++++++++++++++++++++++++++++++++++----
 scripts/tools.sh     | 243 +++++++++++++++++++++
 4 files changed, 707 insertions(+), 49 deletions(-)
 create mode 100755 scripts/tools.sh

diff --git a/.gitignore b/.gitignore
index b720bc966..80a9f7512 100644
--- a/.gitignore
+++ b/.gitignore
@@ -213,4 +213,10 @@ nginx_conf/
 logs/
 pods/
 upload_wiki_json.pid
+<<<<<<< HEAD
 >>>>>>> d641c4e6 (feat: add deploy.sh for startup from source code; support start multi  ragflow_server in dcoker and deploy.sh)
+=======
+.ragflow_secret_key
+setup_tools_venv.sh
+build_tools_bundle.sh
+>>>>>>> fc88e390 (feat: optimize upload_document using batch; fix sevral bugs in deploy.sh and entrypoint.sh)
diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh
index d251490cb..e43af6f77 100755
--- a/docker/entrypoint.sh
+++ b/docker/entrypoint.sh
@@ -283,13 +283,15 @@ function _prepare_multi_ragflow_confs() {
     # Create per-instance configs
     local idx port conf_name conf_path
     for (( idx=0; idx<${SVR_COUNT}; idx++ )); do
-        conf_name="service_conf_ragflow_${idx}.yaml"
-        conf_path="${CONF_DIR}/${conf_name}"
+       
         if [[ "${idx}" -eq 0 ]]; then
             port="${SVR_HTTP_PORT}"
+            conf_name="local.service_conf.yaml"
         else
             port=$((SVR_EXTRA_BASE_HTTP_PORT + idx - 1))
+            conf_name="service_conf_ragflow_${port}.yaml"
         fi
+        conf_path="${CONF_DIR}/${conf_name}"
         render_service_conf "${conf_path}" "${port}" "${ADMIN_SVR_HTTP_PORT}"
     done
 }
@@ -319,11 +321,12 @@ function start_ragflow_servers() {
 
     local idx port conf_name
     for (( idx=0; idx<${SVR_COUNT}; idx++ )); do
-        conf_name="service_conf_ragflow_${idx}.yaml"
         if [[ "${idx}" -eq 0 ]]; then
             port="${SVR_HTTP_PORT}"
+            conf_name="local.service_conf.yaml"
         else
             port=$((SVR_EXTRA_BASE_HTTP_PORT + idx - 1))
+            conf_name="service_conf_ragflow_${port}.yaml"
         fi
         echo "server 127.0.0.1:${port};" >> /etc/nginx/conf.d/ragflow_upstream.conf
         _start_ragflow_instance "${idx}" "${port}" "${conf_name}"
diff --git a/scripts/deploy.sh b/scripts/deploy.sh
index 84cc6ac2f..883e6b64b 100755
--- a/scripts/deploy.sh
+++ b/scripts/deploy.sh
@@ -179,6 +179,61 @@ function is_process_running() {
   [[ -n "${pid}" ]] && ps -p "${pid}" >/dev/null 2>&1
 }
 
+function _pids_listening_on_port() {
+  local port="$1"
+  local pids=""
+  if command -v lsof >/dev/null 2>&1; then
+    # -t: pids only; LISTEN only
+    pids="$(lsof -tiTCP:"${port}" -sTCP:LISTEN 2>/dev/null | tr '\n' ' ' | xargs echo 2>/dev/null || true)"
+  elif command -v fuser >/dev/null 2>&1; then
+    # fuser output format varies; best-effort
+    pids="$(fuser -n tcp "${port}" 2>/dev/null | tr '\n' ' ' | xargs echo 2>/dev/null || true)"
+  fi
+  echo "${pids}"
+}
+
+function _pid_cwd_is_workspace() {
+  local pid="$1"
+  local cwd=""
+  if [[ -r "/proc/${pid}/cwd" ]]; then
+    cwd="$(readlink -f "/proc/${pid}/cwd" 2>/dev/null || true)"
+  fi
+  [[ -n "${cwd}" ]] && [[ "${cwd}" == "${WORKSPACE_FOLDER}"* ]]
+}
+
+function _kill_port_if_matches_cmd() {
+  local port="$1"
+  local must_contain="$2"  # substring to match in cmdline
+  local name="${3:-}"
+
+  local pids
+  pids="$(_pids_listening_on_port "${port}")"
+  [[ -n "${pids}" ]] || return 0
+
+  local pid args
+  for pid in ${pids}; do
+    args="$(ps -p "${pid}" -o args= 2>/dev/null || true)"
+    if [[ -z "${args}" ]]; then
+      continue
+    fi
+    # Kill only when we're confident it's our workspace process.
+    # Some environments may already have other ragflow_server processes running as root.
+    local match=0
+    if [[ "${args}" == *"${must_contain}"* ]]; then
+      match=1
+    elif _pid_cwd_is_workspace "${pid}" && [[ "${args}" == *"api/ragflow_server.py"* ]]; then
+      match=1
+    fi
+    [[ "${match}" -eq 1 ]] || continue
+    echo "[stop] ${name:-port ${port}}: killing listener pid=${pid} (matched: ${must_contain})"
+    kill "${pid}" 2>/dev/null || true
+    sleep 0.3
+    if is_process_running "${pid}"; then
+      kill -9 "${pid}" 2>/dev/null || true
+    fi
+  done
+}
+
 function _default_host_id() {
   local hn
   hn="$(hostname)"
@@ -244,6 +299,24 @@ function _common_env_kv() {
   echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-/usr/lib/x86_64-linux-gnu/:/usr/lib64/}"
   echo "TIKTOKEN_CACHE_DIR=${WORKSPACE_FOLDER}"
   echo "LIGHTEN=${LIGHTEN:-1}"
+
+  # Enable HTTP access logs from Flask/Werkzeug by default.
+  # - If LOG_LEVELS is empty: set root=INFO,werkzeug=INFO
+  # - If LOG_LEVELS exists but has no werkzeug: append werkzeug=INFO
+  # - If LOG_LEVELS exists but has no root: prepend root=INFO
+  local _log_levels="${LOG_LEVELS:-}"
+  if [[ -z "${_log_levels}" ]]; then
+    _log_levels="root=INFO,werkzeug=INFO"
+  else
+    if [[ "${_log_levels}" != *"werkzeug="* ]]; then
+      _log_levels="${_log_levels},werkzeug=INFO"
+    fi
+    if [[ "${_log_levels}" != *"root="* ]]; then
+      _log_levels="root=INFO,${_log_levels}"
+    fi
+  fi
+  echo "LOG_LEVELS=${_log_levels}"
+
   echo "http_proxy="
   echo "https_proxy="
   echo "no_proxy="
@@ -294,6 +367,8 @@ function _start_process() {
   echo "[ok] started ${name} (PID: ${bg_pid})"
 }
 
+
+
 function _stop_by_pidfile() {
   local name="$1"
   local pid_file="$2"
@@ -303,6 +378,12 @@ function _stop_by_pidfile() {
   fi
   local pid
   pid="$(cat "${pid_file}" 2>/dev/null || true)"
+  local port=""
+  # Extract port from pidfile name for ragflow_server (e.g., ragflow_server_9390.pid -> 9390)
+  if [[ "${name}" == "ragflow_server" ]] && [[ "${pid_file}" =~ ragflow_server_([0-9]+)\.pid ]]; then
+    port="${BASH_REMATCH[1]}"
+  fi
+  
   if is_process_running "${pid}"; then
     echo "[stop] ${name} (PID: ${pid})"
     kill "${pid}" 2>/dev/null || true
@@ -313,6 +394,31 @@ function _stop_by_pidfile() {
   else
     echo "[skip] ${name} not running (stale pid: ${pid})"
   fi
+  
+  # For ragflow_server, check if port is still listening (child process may have outlived parent)
+  if [[ -n "${port}" ]] && _port_is_listening "${port}"; then
+    local listening_pids
+    listening_pids="$(_pids_listening_on_port "${port}")"
+    if [[ -n "${listening_pids}" ]]; then
+      local child_pid
+      for child_pid in ${listening_pids}; do
+        # Only kill processes from our workspace
+        if _pid_cwd_is_workspace "${child_pid}"; then
+          local args
+          args="$(ps -p "${child_pid}" -o args= 2>/dev/null || true)"
+          if [[ "${args}" == *"api/ragflow_server.py"* ]]; then
+            echo "[stop] ${name} (child PID: ${child_pid} on port ${port})"
+            kill "${child_pid}" 2>/dev/null || true
+            sleep 0.5
+            if is_process_running "${child_pid}"; then
+              kill -9 "${child_pid}" 2>/dev/null || true
+            fi
+          fi
+        fi
+      done
+    fi
+  fi
+  
   rm -f "${pid_file}"
 }
 
@@ -321,6 +427,123 @@ function _validate_port() {
   [[ "${port}" =~ ^[0-9]+$ ]] && [[ "${port}" -ge 1 ]] && [[ "${port}" -le 65535 ]]
 }
 
+function _port_is_listening() {
+  local port="$1"
+  # ss without -p doesn't require extra privileges
+  ss -ltn "( sport = :${port} )" 2>/dev/null | grep -q ":${port} "
+}
+
+function _pid_from_pidfile() {
+  local pid_file="$1"
+  [[ -f "${pid_file}" ]] || return 1
+  cat "${pid_file}" 2>/dev/null | tr -d '[:space:]'
+}
+
+function _pidfile_is_running() {
+  local pid_file="$1"
+  local pid
+  pid="$(_pid_from_pidfile "${pid_file}")"
+  [[ -n "${pid}" ]] && is_process_running "${pid}"
+}
+
+function _preflight_port_or_running() {
+  # If pidfile indicates the component is already running, treat as OK (will be skipped by start_*).
+  # Otherwise, the port must be free; we do NOT stop/kill anything in start.
+  local name="$1"
+  local pid_file="$2"
+  local port="$3"
+  local hint="$4"
+
+  if _pidfile_is_running "${pid_file}"; then
+    return 0
+  fi
+
+  if _port_is_listening "${port}"; then
+    echo "ERROR: ${name} port ${port} is already in use. start will not stop existing processes." >&2
+    echo "Hint: inspect listener: ss -ltnp '( sport = :${port} )'  (or lsof -nP -iTCP:${port} -sTCP:LISTEN)" >&2
+    [[ -n "${hint}" ]] && echo "Hint: ${hint}" >&2
+    return 1
+  fi
+  return 0
+}
+
+function _preflight_start_all() {
+  # Goal: if anything would fail to start due to port conflicts, fail BEFORE starting any new process.
+  local fail=0
+
+  # ragflow_server instances
+  local idx port pid_file
+  for (( idx=0; idx<${SVR_COUNT}; idx++ )); do
+    if [[ "${idx}" -eq 0 ]]; then
+      port="${SVR_HTTP_PORT}"
+    else
+      port=$((SVR_EXTRA_BASE_HTTP_PORT + idx - 1))
+    fi
+    pid_file="${PID_DIR}/ragflow_server_${port}.pid"
+    if ! _preflight_port_or_running "ragflow_server" "${pid_file}" "${port}" "pick another port: --svr-http-port / --svr-extra-base-http-port (or stop/restart first)"; then
+      fail=1
+    fi
+  done
+
+  # nginx web frontend/proxy (if enabled)
+  if [[ "${ENABLE_WEBSERVER}" -eq 1 ]]; then
+    if ! _preflight_port_or_running "nginx(web)" "${PID_DIR}/web_frontend.pid" "${WEB_PORT}" "pick another port: --web-port=<free_port> (or stop/restart first)"; then
+      fail=1
+    fi
+  fi
+
+  # admin_server (if enabled)
+  if [[ "${ENABLE_ADMIN_SERVER}" -eq 1 ]]; then
+    if ! _preflight_port_or_running "admin_server" "${PID_DIR}/admin_server.pid" "${ADMIN_SVR_HTTP_PORT}" "pick another port: --admin-svr-http-port=<free_port> (or stop/restart first)"; then
+      fail=1
+    fi
+  fi
+
+  # mcp_server (if enabled)
+  if [[ "${ENABLE_MCP_SERVER}" -eq 1 ]]; then
+    if ! _preflight_port_or_running "mcp_server" "${PID_DIR}/mcp_server.pid" "${MCP_PORT}" "pick another port: --mcp-port=<free_port> (or disable mcp_server)"; then
+      fail=1
+    fi
+  fi
+
+  # powerrag_server (if enabled)
+  if [[ "${ENABLE_POWERRAG_SERVER}" -eq 1 ]]; then
+    if ! _preflight_port_or_running "powerrag_server" "${PID_DIR}/powerrag_server.pid" "${POWERRAG_PORT}" "pick another port: --powerrag-port=<free_port> (or disable powerrag_server)"; then
+      fail=1
+    fi
+  fi
+
+  [[ "${fail}" -eq 0 ]]
+}
+
+function _check_ports_available() {
+  # Fail-fast if any target port is already in use by another service.
+  # We consider it "available" only if nothing is listening.
+  local -a ports=("$@")
+  local port
+  for port in "${ports[@]}"; do
+    if ! _port_is_listening "${port}"; then
+      continue
+    fi
+
+    echo "ERROR: port ${port} is already in use by another service." >&2
+    echo "Hint: check with: ss -ltnp '( sport = :${port} )'  (or run as root to see process)" >&2
+    if [[ "${port}" -eq "${ADMIN_SVR_HTTP_PORT}" ]]; then
+      echo "Hint: ${port} is the admin_server default port (ADMIN_SVR_HTTP_PORT). Use: --admin-svr-http-port=<free_port>" >&2
+    elif [[ "${port}" -eq "${WEB_PORT}" ]]; then
+      echo "Hint: ${port} is the nginx web port (WEB_PORT). Use: --web-port=<free_port>" >&2
+    elif [[ "${port}" -eq "${MCP_PORT}" ]]; then
+      echo "Hint: ${port} is the mcp_server port (MCP_PORT). Use: --mcp-port=<free_port> or disable mcp_server" >&2
+    elif [[ "${port}" -eq "${POWERRAG_PORT}" ]]; then
+      echo "Hint: ${port} is the powerrag_server port (POWERRAG_PORT). Use: --powerrag-port=<free_port> or disable powerrag_server" >&2
+    else
+      echo "Hint: if you intend to run multiple ragflow instances, use different ports: --svr-http-port / --svr-extra-base-http-port (and also consider --admin-svr-http-port)" >&2
+    fi
+    return 1
+  done
+  return 0
+}
+
 function _check_port_conflicts() {
   local -a reserved_ports=()
   local -a ragflow_ports=()
@@ -418,7 +641,13 @@ PY
 function _prepare_multi_ragflow_confs() {
   local idx port conf_name conf_path
   for (( idx=0; idx<${SVR_COUNT}; idx++ )); do
-    conf_name="service_conf_ragflow_${idx}.yaml"
+    # Align with docker/entrypoint.sh: main instance uses base service conf directly;
+    # extra instances use generated per-instance confs.
+    if [[ "${idx}" -eq 0 ]]; then
+      conf_name="${GLOBAL_SERVICE_CONF}"
+    else
+      conf_name="service_conf_ragflow_${idx}.yaml"
+    fi
     conf_path="${CONF_DIR}/${conf_name}"
     if [[ "${idx}" -eq 0 ]]; then
       port="${SVR_HTTP_PORT}"
@@ -439,7 +668,11 @@ function start_ragflow_servers() {
 
   local idx port conf_name pid_file
   for (( idx=0; idx<${SVR_COUNT}; idx++ )); do
-    conf_name="service_conf_ragflow_${idx}.yaml"
+    if [[ "${idx}" -eq 0 ]]; then
+      conf_name="${GLOBAL_SERVICE_CONF}"
+    else
+      conf_name="service_conf_ragflow_${idx}.yaml"
+    fi
     if [[ "${idx}" -eq 0 ]]; then
       port="${SVR_HTTP_PORT}"
     else
@@ -595,14 +828,53 @@ function start_web() {
     fi
   fi
 
-  local server_port_for_web="${SVR_HTTP_PORT}"
   local admin_port_for_web="${ADMIN_SVR_HTTP_PORT}"
 
   # nginx temp dirs (must be writable for non-root runs)
   local nginx_tmp_dir="${NGINX_CONF_DIR}/tmp"
   mkdir -p "${nginx_tmp_dir}/client_body" "${nginx_tmp_dir}/proxy" "${nginx_tmp_dir}/fastcgi" "${nginx_tmp_dir}/uwsgi" "${nginx_tmp_dir}/scgi"
 
+  # Align with docker/entrypoint.sh nginx logic:
+  # - generate upstream include files so nginx can proxy/load-balance to all instances
+  # - generate proxy.conf snippet for consistent proxy headers/settings
+  : > "${NGINX_CONF_DIR}/ragflow_upstream.conf"
+  : > "${NGINX_CONF_DIR}/admin_upstream.conf"
+  echo "server ${ADMIN_HOST_FOR_WEB}:${admin_port_for_web};" >> "${NGINX_CONF_DIR}/admin_upstream.conf"
+
+  local idx port
+  for (( idx=0; idx<${SVR_COUNT}; idx++ )); do
+    if [[ "${idx}" -eq 0 ]]; then
+      port="${SVR_HTTP_PORT}"
+    else
+      port=$((SVR_EXTRA_BASE_HTTP_PORT + idx - 1))
+    fi
+    echo "server ${SERVER_HOST_FOR_WEB}:${port};" >> "${NGINX_CONF_DIR}/ragflow_upstream.conf"
+  done
+
+  cat > "${NGINX_CONF_DIR}/proxy.conf" <<'EOF'
+proxy_set_header Host $host;
+proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+proxy_set_header X-Forwarded-Proto $scheme;
+proxy_http_version 1.1;
+proxy_set_header Connection "";
+proxy_buffering off;
+proxy_read_timeout 3600s;
+proxy_send_timeout 3600s;
+proxy_buffer_size 1024k;
+proxy_buffers 16 1024k;
+proxy_busy_buffers_size 2048k;
+proxy_temp_file_write_size 2048k;
+EOF
+
   cat > "${NGINX_CONF_DIR}/ragflow.conf" <<EOF
+upstream ragflow_upstream {
+    include ${NGINX_CONF_DIR}/ragflow_upstream.conf;
+}
+
+upstream admin_upstream {
+    include ${NGINX_CONF_DIR}/admin_upstream.conf;
+}
+
 server {
     listen ${WEB_PORT};
     server_name _;
@@ -616,27 +888,13 @@ server {
     gzip_disable "MSIE [1-6]\\.";
 
     location ~ ^/api/v1/admin {
-        proxy_pass http://${ADMIN_HOST_FOR_WEB}:${admin_port_for_web};
-        proxy_set_header Host \$host;
-        proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for;
-        proxy_set_header X-Forwarded-Proto \$scheme;
-        proxy_http_version 1.1;
-        proxy_set_header Connection "";
-        proxy_buffering off;
-        proxy_read_timeout 3600s;
-        proxy_send_timeout 3600s;
+        proxy_pass http://admin_upstream;
+        include ${NGINX_CONF_DIR}/proxy.conf;
     }
 
     location ~ ^/(v1|api) {
-        proxy_pass http://${SERVER_HOST_FOR_WEB}:${server_port_for_web};
-        proxy_set_header Host \$host;
-        proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for;
-        proxy_set_header X-Forwarded-Proto \$scheme;
-        proxy_http_version 1.1;
-        proxy_set_header Connection "";
-        proxy_buffering off;
-        proxy_read_timeout 3600s;
-        proxy_send_timeout 3600s;
+        proxy_pass http://ragflow_upstream;
+        include ${NGINX_CONF_DIR}/proxy.conf;
     }
 
     location / {
@@ -662,6 +920,7 @@ http {
     include       /etc/nginx/mime.types;
     default_type  application/octet-stream;
 
+
     # temp dirs (avoid /var/lib/nginx/tmp/* which requires root)
     client_body_temp_path ${nginx_tmp_dir}/client_body;
     proxy_temp_path       ${nginx_tmp_dir}/proxy;
@@ -742,6 +1001,18 @@ function stop_all() {
   done
   rm -f "${PID_DIR}/ragflow_server.pid" 2>/dev/null || true
 
+  # If pidfiles were stale, best-effort kill listeners by port (only if cmd matches our scripts).
+  local idx port
+  for (( idx=0; idx<${SVR_COUNT}; idx++ )); do
+    if [[ "${idx}" -eq 0 ]]; then
+      port="${SVR_HTTP_PORT}"
+    else
+      port=$((SVR_EXTRA_BASE_HTTP_PORT + idx - 1))
+    fi
+    _kill_port_if_matches_cmd "${port}" "${RAGFLOW_SERVER_PY}" "ragflow_server:${port}"
+  done
+  _kill_port_if_matches_cmd "${ADMIN_SVR_HTTP_PORT}" "${ADMIN_SERVER_PY}" "admin_server"
+
   # optional web frontend nginx
   stop_web || true
 }
@@ -762,17 +1033,21 @@ function clear_runtime_files() {
   echo "=== clear: stop services and remove generated logs/configs/pids (best-effort) ==="
 
   # stop services started by this script (based on pids/)
+  # Only processes with pidfiles in PID_DIR are managed by this deploy.sh instance.
+  # Other ragflow_server processes on the same machine may be managed by other deploy.sh instances.
   stop_all || true
 
   # generated per-instance service confs
   rm -f "${CONF_DIR}"/service_conf_ragflow_*.yaml 2>/dev/null || true
+  # generated secret key file (align with docker/entrypoint.sh)
+  rm -f "${CONF_DIR}/.ragflow_secret_key" 2>/dev/null || true
 
   # remove runtime dirs entirely (user expectation for clear)
   rm -rf "${NGINX_CONF_DIR}" 2>/dev/null || true
   rm -rf "${PID_DIR}" 2>/dev/null || true
   rm -rf "${LOG_DIR}" 2>/dev/null || true
 
-  echo "[ok] cleared: logs/, pids/, nginx_conf/, conf/service_conf_ragflow_*.yaml"
+  echo "[ok] cleared: logs/, pids/, nginx_conf/, conf/service_conf_ragflow_*.yaml, conf/.ragflow_secret_key"
 }
 
 function status() {
@@ -780,42 +1055,164 @@ function status() {
 
   echo "config:"
   echo "  - service_conf(base) = conf/${GLOBAL_SERVICE_CONF}"
-  echo "  - ragflow main port  = ${SVR_HTTP_PORT}"
+  # Best-effort show ports from the base service conf (more accurate than defaults when status is run without flags).
+  local base_ragflow_port="${SVR_HTTP_PORT}"
+  local base_admin_port="${ADMIN_SVR_HTTP_PORT}"
+  if [[ -f "${CONF_DIR}/${GLOBAL_SERVICE_CONF}" ]] && [[ -x "${PYTHON}" ]]; then
+    local _ports
+    _ports="$("${PYTHON}" - <<PY 2>/dev/null || true
+import os
+from ruamel.yaml import YAML
+conf = os.path.join(${CONF_DIR@Q}, ${GLOBAL_SERVICE_CONF@Q})
+yaml = YAML(typ="safe")
+with open(conf, "r", encoding="utf-8") as f:
+    data = yaml.load(f) or {}
+rag = (data.get("ragflow") or {}).get("http_port")
+adm = (data.get("admin") or {}).get("http_port")
+print(f"{rag if rag is not None else ''}\\t{adm if adm is not None else ''}")
+PY
+)"
+    if [[ -n "${_ports}" ]]; then
+      base_ragflow_port="$(echo "${_ports}" | awk -F'\t' '{print $1}')"
+      base_admin_port="$(echo "${_ports}" | awk -F'\t' '{print $2}')"
+      [[ -n "${base_ragflow_port}" ]] || base_ragflow_port="${SVR_HTTP_PORT}"
+      [[ -n "${base_admin_port}" ]] || base_admin_port="${ADMIN_SVR_HTTP_PORT}"
+    fi
+  fi
+
+  echo "  - ragflow main port  = ${base_ragflow_port}"
   echo "  - ragflow extra base = ${SVR_EXTRA_BASE_HTTP_PORT}"
-  echo "  - admin port         = ${ADMIN_SVR_HTTP_PORT}"
+  echo "  - admin port         = ${base_admin_port}"
   echo "  - mcp port           = ${MCP_PORT}"
   echo "  - web port           = ${WEB_PORT}"
 
+  # Build a port -> conf filename map from existing conf files (robust even when status is run with different flags).
+  declare -A _ragflow_port_to_conf=()
+  if [[ -x "${PYTHON}" ]]; then
+    while IFS=$'\t' read -r _p _c; do
+      [[ -n "${_p}" && -n "${_c}" ]] || continue
+      _ragflow_port_to_conf["${_p}"]="${_c}"
+    done < <("${PYTHON}" - <<PY 2>/dev/null || true
+import glob, os
+from ruamel.yaml import YAML
+
+conf_dir = ${CONF_DIR@Q}
+base = os.path.join(conf_dir, ${GLOBAL_SERVICE_CONF@Q})
+files = []
+if os.path.isfile(base):
+    files.append(base)
+files.extend(sorted(glob.glob(os.path.join(conf_dir, "service_conf_ragflow_*.yaml"))))
+
+yaml = YAML(typ="safe")
+for f in files:
+    try:
+        with open(f, "r", encoding="utf-8") as fh:
+            data = yaml.load(fh) or {}
+        port = (data.get("ragflow") or {}).get("http_port")
+        if port is None:
+            continue
+        print(f"{int(port)}\t{os.path.basename(f)}")
+    except Exception:
+        continue
+PY
+)
+  fi
+
   # ragflow
   echo "ragflow_server:"
-  local any=0
+  local found=0
   local f pid port idx conf_name conf_path log_path
   for f in "${PID_DIR}"/ragflow_server_*.pid; do
     [[ -f "${f}" ]] || continue
+    found=1
     port="$(basename "${f}" | sed 's/ragflow_server_\(.*\)\.pid/\1/')"
     pid="$(cat "${f}" 2>/dev/null || true)"
 
     # best-effort infer conf name from port
-    conf_name="(unknown)"
-    if [[ "${port}" == "${SVR_HTTP_PORT}" ]]; then
-      conf_name="service_conf_ragflow_0.yaml"
-    elif [[ "${port}" =~ ^[0-9]+$ ]]; then
-      idx=$(( port - SVR_EXTRA_BASE_HTTP_PORT + 1 ))
-      if [[ "${idx}" -ge 1 ]]; then
-        conf_name="service_conf_ragflow_${idx}.yaml"
+    conf_name="${_ragflow_port_to_conf[${port}]:-(unknown)}"
+    # Backward compatible fallback when conf map isn't available
+    if [[ "${conf_name}" == "(unknown)" ]]; then
+      if [[ "${port}" == "${SVR_HTTP_PORT}" ]]; then
+        conf_name="${GLOBAL_SERVICE_CONF}"
+      elif [[ "${port}" =~ ^[0-9]+$ ]]; then
+        idx=$(( port - SVR_EXTRA_BASE_HTTP_PORT + 1 ))
+        if [[ "${idx}" -ge 1 ]]; then
+          conf_name="service_conf_ragflow_${idx}.yaml"
+        fi
       fi
     fi
     conf_path="conf/${conf_name}"
     log_path="logs/ragflow_server_${port}.log"
 
+    # Try to get actual listening port from process's service conf file
+    actual_port="${port}"
+    if is_process_running "${pid}" && [[ -f "${CONF_DIR}/${conf_name}" ]] && [[ -x "${PYTHON}" ]]; then
+      actual_port="$("${PYTHON}" - <<PY 2>/dev/null || echo "${port}"
+import os
+from ruamel.yaml import YAML
+conf = os.path.join(${CONF_DIR@Q}, ${conf_name@Q})
+yaml = YAML(typ="safe")
+try:
+    with open(conf, "r", encoding="utf-8") as f:
+        data = yaml.load(f) or {}
+    p = (data.get("ragflow") or {}).get("http_port")
+    if p is not None:
+        print(int(p))
+except Exception:
+    pass
+PY
+)"
+      [[ -n "${actual_port}" ]] || actual_port="${port}"
+    fi
+
+    # Get actual process listening on the port (may be different from pidfile PID if it's a child process)
+    local actual_pid="${pid}"
+    local listening_pids
+    listening_pids="$(_pids_listening_on_port "${actual_port}")"
+    if [[ -n "${listening_pids}" ]]; then
+      # Prefer the PID that matches our workspace and is a ragflow_server process
+      local candidate_pid
+      for candidate_pid in ${listening_pids}; do
+        if _pid_cwd_is_workspace "${candidate_pid}"; then
+          local args
+          args="$(ps -p "${candidate_pid}" -o args= 2>/dev/null || true)"
+          if [[ "${args}" == *"api/ragflow_server.py"* ]]; then
+            actual_pid="${candidate_pid}"
+            break
+          fi
+        fi
+      done
+      # If no match found, use first listening PID
+      if [[ "${actual_pid}" == "${pid}" ]] && [[ -n "${listening_pids}" ]]; then
+        actual_pid="$(echo "${listening_pids}" | awk '{print $1}')"
+      fi
+    fi
+
+    # Check if pidfile process or actual listening process is running
+    local pidfile_running=0
+    local listening_running=0
     if is_process_running "${pid}"; then
-      any=1
-      echo "  - [ok] port=${port} pid=${pid} conf=${conf_path} log=${log_path}"
+      pidfile_running=1
+    fi
+    if [[ "${actual_pid}" != "${pid}" ]] && is_process_running "${actual_pid}"; then
+      listening_running=1
+    fi
+
+    if [[ "${pidfile_running}" -eq 1 ]] || [[ "${listening_running}" -eq 1 ]]; then
+      local port_info="${actual_port}"
+      if [[ "${actual_port}" != "${port}" ]]; then
+        port_info="${actual_port} (pidfile=${port})"
+      fi
+      local pid_info="${actual_pid}"
+      if [[ "${actual_pid}" != "${pid}" ]]; then
+        pid_info="${actual_pid} (pidfile=${pid})"
+      fi
+      echo "  - [ok] port=${port_info} pid=${pid_info} conf=${conf_path} log=${log_path}"
     else
       echo "  - [down] port=${port} pid=${pid} conf=${conf_path} log=${log_path}"
     fi
   done
-  if [[ "${any}" -eq 0 ]]; then
+  if [[ "${found}" -eq 0 ]]; then
     echo "  - (none)"
   fi
 
@@ -835,13 +1232,13 @@ function status() {
 
   # task executors
   echo "task_executor:"
-  any=0
+  found=0
   local args consumer_arg logf
   for f in "${PID_DIR}"/task_executor_*.pid; do
     [[ -f "${f}" ]] || continue
+    found=1
     pid="$(cat "${f}" 2>/dev/null || true)"
     if is_process_running "${pid}"; then
-      any=1
       args="$(ps -p "${pid}" -o args= 2>/dev/null || true)"
       consumer_arg="$(echo "${args}" | awk '{print $NF}')"
       # If no consumer arg provided, fallback to pid-file id.
@@ -854,7 +1251,7 @@ function status() {
       echo "  - [down] id=$(basename "${f}") pid=${pid}"
     fi
   done
-  if [[ "${any}" -eq 0 ]]; then
+  if [[ "${found}" -eq 0 ]]; then
     echo "  - (none)"
   fi
 
@@ -995,17 +1392,26 @@ for arg in "$@"; do
   esac
 done
 
-# Validate ports early (best-effort)
-for p in "${SVR_HTTP_PORT}" "${SVR_EXTRA_BASE_HTTP_PORT}" "${ADMIN_SVR_HTTP_PORT}" "${MCP_PORT}" "${POWERRAG_PORT}" "${WEB_PORT}"; do
-  if ! _validate_port "${p}"; then
-    echo "ERROR: invalid port: ${p}" >&2
+# Port validations / conflict checks / occupancy preflight should only block `start`.
+# Other actions (stop/status/clear/help) must not fail just because some default ports are occupied by unrelated services.
+if [[ "${ACTION}" == "start" ]]; then
+  # Validate ports early (best-effort)
+  for p in "${SVR_HTTP_PORT}" "${SVR_EXTRA_BASE_HTTP_PORT}" "${ADMIN_SVR_HTTP_PORT}" "${MCP_PORT}" "${POWERRAG_PORT}" "${WEB_PORT}"; do
+    if ! _validate_port "${p}"; then
+      echo "ERROR: invalid port: ${p}" >&2
+      exit 1
+    fi
+  done
+
+  # Check for duplicates/conflicts within our configured ports
+  if ! _check_port_conflicts; then
     exit 1
   fi
-done
 
-# Check for port conflicts
-if ! _check_port_conflicts; then
-  exit 1
+  # Preflight all components (atomic start): if anything would fail, don't start anything new.
+  if ! _preflight_start_all; then
+    exit 1
+  fi
 fi
 
 # -----------------------------------------------------------------------------
diff --git a/scripts/tools.sh b/scripts/tools.sh
new file mode 100755
index 000000000..e4bdb231c
--- /dev/null
+++ b/scripts/tools.sh
@@ -0,0 +1,243 @@
+#!/bin/bash
+
+# RAGFlow 工具脚本（数据上传和处理相关）
+# 使用方法: ./scripts/tools.sh [upload-wiki|stop-upload-wiki|reparse-failed|stop-reparse-failed]
+
+# 获取脚本所在目录的父目录（项目根目录）
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+WORKSPACE_FOLDER="$(cd "${SCRIPT_DIR}/.." && pwd)"
+
+PYTHON="${WORKSPACE_FOLDER}/.venv/bin/python"
+UPLOAD_WIKI_JSON="${WORKSPACE_FOLDER}/scripts/upload_wiki_json.py"
+REPARSE_FAILED_DOCS="${WORKSPACE_FOLDER}/scripts/reparse_failed_documents.py"
+
+# 日志目录
+LOG_DIR="${WORKSPACE_FOLDER}/logs"
+mkdir -p "${LOG_DIR}"
+
+# PID 文件目录
+PID_DIR="${WORKSPACE_FOLDER}/pids"
+mkdir -p "${PID_DIR}"
+
+# Ensure runtime exists
+if [ ! -x "${PYTHON}" ]; then
+    echo "[tools][ERROR] Python venv not found: ${PYTHON}" >&2
+    echo "[tools][ERROR] Please run: ${WORKSPACE_FOLDER}/scripts/setup_tools_venv.sh" >&2
+    exit 1
+fi
+
+# 上传 Wiki JSON 数据
+upload_wiki_json() {
+    local pid_file="${PID_DIR}/upload_wiki_json.pid"
+    
+    # 检查是否已经在运行
+    if [ -f "$pid_file" ]; then
+        PID=$(cat "$pid_file")
+        if ps -p $PID > /dev/null 2>&1; then
+            echo "Wiki JSON 上传任务已经在运行 (PID: $PID)"
+            return 1
+        fi
+    fi
+    
+    echo "启动 Wiki JSON 上传任务..."
+    cd "${WORKSPACE_FOLDER}"
+    
+    # 默认参数（可通过环境变量覆盖）
+    local api_key="${API_KEY:-}"
+    local host="${HOST:-http://127.0.0.1:9380}"
+    local data_dir="${WIKI_DATA_DIR:-}"
+    local dataset_id="${DATASET_ID:-}"
+    local batch_size="${BATCH_SIZE:-1000}"
+    local snapshot_file="${WIKI_SNAPSHOT_FILE:-${LOG_DIR}/upload_snapshot.json}"
+    local enable_resume="${WIKI_ENABLE_RESUME:-true}"
+    
+    # 构建命令参数
+    local resume_args=""
+    if [ "$enable_resume" = "true" ]; then
+        resume_args="--resume -s ${snapshot_file}"
+    fi
+    
+    nohup env \
+        PYTHONPATH="${WORKSPACE_FOLDER}" \
+        "${PYTHON}" "${UPLOAD_WIKI_JSON}" \
+        -k "${api_key}" \
+        -H "${host}" \
+        -d "${data_dir}" \
+        -i "${dataset_id}" \
+        -b "${batch_size}" \
+        ${resume_args} > "${LOG_DIR}/upload_wiki_json.log" 2>&1 &
+    
+    UPLOAD_PID=$!
+    echo $UPLOAD_PID > "$pid_file"
+    echo "Wiki JSON 上传任务已启动 (PID: $UPLOAD_PID)"
+    echo "日志文件: ${LOG_DIR}/upload_wiki_json.log"
+    if [ "$enable_resume" = "true" ]; then
+        echo "快照文件: ${snapshot_file}"
+        echo "任务支持断点续传"
+    fi
+}
+
+# 停止 Wiki JSON 上传任务
+stop_upload_wiki_json() {
+    local pid_file="${PID_DIR}/upload_wiki_json.pid"
+    
+    if [ ! -f "$pid_file" ]; then
+        echo "Wiki JSON 上传任务未运行"
+        return 1
+    fi
+    
+    PID=$(cat "$pid_file")
+    if ps -p $PID > /dev/null 2>&1; then
+        echo "停止 Wiki JSON 上传任务 (PID: $PID)..."
+        kill $PID
+        rm "$pid_file"
+        echo "Wiki JSON 上传任务已停止"
+    else
+        echo "Wiki JSON 上传任务未运行"
+        rm "$pid_file"
+    fi
+}
+
+# 重新解析失败的文档
+reparse_failed_documents() {
+    local pid_file="${PID_DIR}/reparse_failed_docs.pid"
+    
+    # 检查是否已经在运行
+    if [ -f "$pid_file" ]; then
+        PID=$(cat "$pid_file")
+        if ps -p $PID > /dev/null 2>&1; then
+            echo "重新解析失败文档任务已经在运行 (PID: $PID)"
+            return 1
+        fi
+    fi
+    
+    echo "启动重新解析失败文档任务..."
+    cd "${WORKSPACE_FOLDER}"
+    
+    # 默认参数（可通过环境变量覆盖）
+    local api_key="${API_KEY:-}"
+    local host="${HOST:-http://127.0.0.1:9380}"
+    local dataset_id="${DATASET_ID:-}"
+    local batch_size="${BATCH_SIZE:-1000}"
+    
+    if [ -z "$dataset_id" ]; then
+        echo "错误: 必须设置 DATASET_ID 环境变量"
+        return 1
+    fi
+    
+    nohup env \
+        PYTHONPATH="${WORKSPACE_FOLDER}" \
+        "${PYTHON}" "${REPARSE_FAILED_DOCS}" \
+        -k "${api_key}" \
+        -H "${host}" \
+        -i "${dataset_id}" \
+        -b "${batch_size}" > "${LOG_DIR}/reparse_failed_docs.log" 2>&1 &
+    
+    REPARSE_PID=$!
+    echo $REPARSE_PID > "$pid_file"
+    echo "重新解析失败文档任务已启动 (PID: $REPARSE_PID)"
+    echo "日志文件: ${LOG_DIR}/reparse_failed_docs.log"
+}
+
+# 停止重新解析失败文档任务
+stop_reparse_failed_documents() {
+    local pid_file="${PID_DIR}/reparse_failed_docs.pid"
+    
+    if [ ! -f "$pid_file" ]; then
+        echo "重新解析失败文档任务未运行"
+        return 1
+    fi
+    
+    PID=$(cat "$pid_file")
+    if ps -p $PID > /dev/null 2>&1; then
+        echo "停止重新解析失败文档任务 (PID: $PID)..."
+        kill $PID
+        rm "$pid_file"
+        echo "重新解析失败文档任务已停止"
+    else
+        echo "重新解析失败文档任务未运行"
+        rm "$pid_file"
+    fi
+}
+
+# 查看工具任务状态
+status() {
+    echo "=== 工具任务状态 ==="
+    
+    # Wiki JSON Upload
+    local upload_pid_file="${PID_DIR}/upload_wiki_json.pid"
+    if [ -f "$upload_pid_file" ]; then
+        PID=$(cat "$upload_pid_file")
+        if ps -p $PID > /dev/null 2>&1; then
+            echo "Wiki JSON Upload: 运行中 (PID: $PID)"
+        else
+            echo "Wiki JSON Upload: 未运行"
+        fi
+    else
+        echo "Wiki JSON Upload: 未运行"
+    fi
+    
+    # Reparse Failed Documents
+    echo ""
+    local reparse_pid_file="${PID_DIR}/reparse_failed_docs.pid"
+    if [ -f "$reparse_pid_file" ]; then
+        PID=$(cat "$reparse_pid_file")
+        if ps -p $PID > /dev/null 2>&1; then
+            echo "Reparse Failed Documents: 运行中 (PID: $PID)"
+        else
+            echo "Reparse Failed Documents: 未运行"
+        fi
+    else
+        echo "Reparse Failed Documents: 未运行"
+    fi
+}
+
+# 主函数
+case "$1" in
+    upload-wiki)
+        upload_wiki_json
+        ;;
+    stop-upload-wiki)
+        stop_upload_wiki_json
+        ;;
+    reparse-failed)
+        reparse_failed_documents
+        ;;
+    stop-reparse-failed)
+        stop_reparse_failed_documents
+        ;;
+    status)
+        status
+        ;;
+    *)
+        echo "使用方法: $0 {upload-wiki|stop-upload-wiki|reparse-failed|stop-reparse-failed|status}"
+        echo ""
+        echo "环境变量:"
+        echo ""
+        echo "  Wiki 上传相关:"
+        echo "    API_KEY        - API Key"
+        echo "    HOST           - 服务器地址 (默认: http://127.0.0.1:9380)"
+        echo "    WIKI_DATA_DIR       - 数据目录"
+        echo "    DATASET_ID     - 数据集 ID (可选)"
+        echo "    BATCH_SIZE     - 批量大小 (默认: 1000)"
+        echo "    WIKI_SNAPSHOT_FILE  - 快照文件路径 (默认: \${LOG_DIR}/upload_snapshot.json)"
+        echo "    WIKI_ENABLE_RESUME  - 是否启用断点续传 (默认: true)"
+        echo ""
+        echo "  重新解析失败文档相关:"
+        echo "    API_KEY     - API Key"
+        echo "    HOST        - 服务器地址 (默认: http://127.0.0.1:9380)"
+        echo "    DATASET_ID  - 数据集 ID (必需)"
+        echo "    BATCH_SIZE  - 批量大小 (默认: 1000)"
+        echo ""
+        echo "示例:"
+        echo "  $0 upload-wiki        # 上传 Wiki JSON 数据（后台运行，支持断点续传）"
+        echo "  $0 stop-upload-wiki   # 停止 Wiki JSON 上传任务"
+        echo "  BATCH_SIZE=2000 $0 upload-wiki        # 使用自定义批量大小上传"
+        echo "  WIKI_ENABLE_RESUME=false $0 upload-wiki    # 禁用断点续传"
+        echo "  DATASET_ID=xxx $0 reparse-failed   # 重新解析指定数据集中失败的文档"
+        echo "  $0 stop-reparse-failed # 停止重新解析失败文档任务"
+        echo "  $0 status             # 查看工具任务状态"
+        exit 1
+        ;;
+esac
+

From e0786c3ce2a0dcccc1d0db3b980727f848051bd2 Mon Sep 17 00:00:00 2001
From: "keyang.lk" <keyang.lk@sqagpuxdn006013051232.sa128>
Date: Fri, 9 Jan 2026 20:01:39 +0800
Subject: [PATCH 3/3] feat: remove scripts tools

---
 scripts/README.md |  36 +------
 scripts/tools.sh  | 243 ----------------------------------------------
 2 files changed, 1 insertion(+), 278 deletions(-)
 delete mode 100755 scripts/tools.sh

diff --git a/scripts/README.md b/scripts/README.md
index af33b5a5a..a941ded97 100644
--- a/scripts/README.md
+++ b/scripts/README.md
@@ -1,6 +1,6 @@
 # RAGFlow 脚本使用指南
 
-本目录包含 RAGFlow 的运维部署脚本和工具脚本，用于管理服务部署和数据处理任务。
+本目录包含 RAGFlow 的运维部署脚本，用于管理服务部署。
 
 ## 脚本说明
 
@@ -63,17 +63,6 @@
 **兼容性：**
 - 不再提供单独的 `start-web/stop-web` 命令；前端 nginx（静态 + 反代 API）随 `webserver` 一起启动。
 
-### 2. `tools.sh` - 工具脚本
-
-用于执行数据上传和处理相关的工具任务。
-
-**支持的命令：**
-- `upload-wiki` - 上传 Wiki JSON 数据（后台运行，支持断点续传）
-- `stop-upload-wiki` - 停止 Wiki JSON 上传任务
-- `reparse-failed` - 重新解析指定数据集中失败的文档
-- `stop-reparse-failed` - 停止重新解析失败文档任务
-- `status` - 查看工具任务状态
-
 ## 快速开始
 
 ### 运维部署（deploy.sh）
@@ -141,29 +130,6 @@
 ./scripts/deploy.sh start --enable-powerragserver --powerrag-port=6000
 ```
 
-### 工具脚本（tools.sh）
-
-```bash
-# 上传 Wiki JSON 数据
-./scripts/tools.sh upload-wiki
-
-# 使用自定义参数上传
-API_KEY=xxx HOST=xxx WIKI_DATA_DIR=xxx BATCH_SIZE=1000 ./scripts/tools.sh upload-wiki
-WIKI_ENABLE_RESUME=false ./scripts/tools.sh upload-wiki
-
-# 停止上传任务
-./scripts/tools.sh stop-upload-wiki
-
-# 重新解析失败的文档（需要设置数据集 ID）
-API_KEY=xxx HOST=xxx DATASET_ID=xxx BATCH_SIZE=1000 ./scripts/tools.sh reparse-failed
-
-# 停止重新解析任务
-./scripts/tools.sh stop-reparse-failed
-
-# 查看工具任务状态
-./scripts/tools.sh status
-```
-
 ## 日志与 PID
 
 ### 服务日志（默认在 `logs/`）
diff --git a/scripts/tools.sh b/scripts/tools.sh
deleted file mode 100755
index e4bdb231c..000000000
--- a/scripts/tools.sh
+++ /dev/null
@@ -1,243 +0,0 @@
-#!/bin/bash
-
-# RAGFlow 工具脚本（数据上传和处理相关）
-# 使用方法: ./scripts/tools.sh [upload-wiki|stop-upload-wiki|reparse-failed|stop-reparse-failed]
-
-# 获取脚本所在目录的父目录（项目根目录）
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-WORKSPACE_FOLDER="$(cd "${SCRIPT_DIR}/.." && pwd)"
-
-PYTHON="${WORKSPACE_FOLDER}/.venv/bin/python"
-UPLOAD_WIKI_JSON="${WORKSPACE_FOLDER}/scripts/upload_wiki_json.py"
-REPARSE_FAILED_DOCS="${WORKSPACE_FOLDER}/scripts/reparse_failed_documents.py"
-
-# 日志目录
-LOG_DIR="${WORKSPACE_FOLDER}/logs"
-mkdir -p "${LOG_DIR}"
-
-# PID 文件目录
-PID_DIR="${WORKSPACE_FOLDER}/pids"
-mkdir -p "${PID_DIR}"
-
-# Ensure runtime exists
-if [ ! -x "${PYTHON}" ]; then
-    echo "[tools][ERROR] Python venv not found: ${PYTHON}" >&2
-    echo "[tools][ERROR] Please run: ${WORKSPACE_FOLDER}/scripts/setup_tools_venv.sh" >&2
-    exit 1
-fi
-
-# 上传 Wiki JSON 数据
-upload_wiki_json() {
-    local pid_file="${PID_DIR}/upload_wiki_json.pid"
-    
-    # 检查是否已经在运行
-    if [ -f "$pid_file" ]; then
-        PID=$(cat "$pid_file")
-        if ps -p $PID > /dev/null 2>&1; then
-            echo "Wiki JSON 上传任务已经在运行 (PID: $PID)"
-            return 1
-        fi
-    fi
-    
-    echo "启动 Wiki JSON 上传任务..."
-    cd "${WORKSPACE_FOLDER}"
-    
-    # 默认参数（可通过环境变量覆盖）
-    local api_key="${API_KEY:-}"
-    local host="${HOST:-http://127.0.0.1:9380}"
-    local data_dir="${WIKI_DATA_DIR:-}"
-    local dataset_id="${DATASET_ID:-}"
-    local batch_size="${BATCH_SIZE:-1000}"
-    local snapshot_file="${WIKI_SNAPSHOT_FILE:-${LOG_DIR}/upload_snapshot.json}"
-    local enable_resume="${WIKI_ENABLE_RESUME:-true}"
-    
-    # 构建命令参数
-    local resume_args=""
-    if [ "$enable_resume" = "true" ]; then
-        resume_args="--resume -s ${snapshot_file}"
-    fi
-    
-    nohup env \
-        PYTHONPATH="${WORKSPACE_FOLDER}" \
-        "${PYTHON}" "${UPLOAD_WIKI_JSON}" \
-        -k "${api_key}" \
-        -H "${host}" \
-        -d "${data_dir}" \
-        -i "${dataset_id}" \
-        -b "${batch_size}" \
-        ${resume_args} > "${LOG_DIR}/upload_wiki_json.log" 2>&1 &
-    
-    UPLOAD_PID=$!
-    echo $UPLOAD_PID > "$pid_file"
-    echo "Wiki JSON 上传任务已启动 (PID: $UPLOAD_PID)"
-    echo "日志文件: ${LOG_DIR}/upload_wiki_json.log"
-    if [ "$enable_resume" = "true" ]; then
-        echo "快照文件: ${snapshot_file}"
-        echo "任务支持断点续传"
-    fi
-}
-
-# 停止 Wiki JSON 上传任务
-stop_upload_wiki_json() {
-    local pid_file="${PID_DIR}/upload_wiki_json.pid"
-    
-    if [ ! -f "$pid_file" ]; then
-        echo "Wiki JSON 上传任务未运行"
-        return 1
-    fi
-    
-    PID=$(cat "$pid_file")
-    if ps -p $PID > /dev/null 2>&1; then
-        echo "停止 Wiki JSON 上传任务 (PID: $PID)..."
-        kill $PID
-        rm "$pid_file"
-        echo "Wiki JSON 上传任务已停止"
-    else
-        echo "Wiki JSON 上传任务未运行"
-        rm "$pid_file"
-    fi
-}
-
-# 重新解析失败的文档
-reparse_failed_documents() {
-    local pid_file="${PID_DIR}/reparse_failed_docs.pid"
-    
-    # 检查是否已经在运行
-    if [ -f "$pid_file" ]; then
-        PID=$(cat "$pid_file")
-        if ps -p $PID > /dev/null 2>&1; then
-            echo "重新解析失败文档任务已经在运行 (PID: $PID)"
-            return 1
-        fi
-    fi
-    
-    echo "启动重新解析失败文档任务..."
-    cd "${WORKSPACE_FOLDER}"
-    
-    # 默认参数（可通过环境变量覆盖）
-    local api_key="${API_KEY:-}"
-    local host="${HOST:-http://127.0.0.1:9380}"
-    local dataset_id="${DATASET_ID:-}"
-    local batch_size="${BATCH_SIZE:-1000}"
-    
-    if [ -z "$dataset_id" ]; then
-        echo "错误: 必须设置 DATASET_ID 环境变量"
-        return 1
-    fi
-    
-    nohup env \
-        PYTHONPATH="${WORKSPACE_FOLDER}" \
-        "${PYTHON}" "${REPARSE_FAILED_DOCS}" \
-        -k "${api_key}" \
-        -H "${host}" \
-        -i "${dataset_id}" \
-        -b "${batch_size}" > "${LOG_DIR}/reparse_failed_docs.log" 2>&1 &
-    
-    REPARSE_PID=$!
-    echo $REPARSE_PID > "$pid_file"
-    echo "重新解析失败文档任务已启动 (PID: $REPARSE_PID)"
-    echo "日志文件: ${LOG_DIR}/reparse_failed_docs.log"
-}
-
-# 停止重新解析失败文档任务
-stop_reparse_failed_documents() {
-    local pid_file="${PID_DIR}/reparse_failed_docs.pid"
-    
-    if [ ! -f "$pid_file" ]; then
-        echo "重新解析失败文档任务未运行"
-        return 1
-    fi
-    
-    PID=$(cat "$pid_file")
-    if ps -p $PID > /dev/null 2>&1; then
-        echo "停止重新解析失败文档任务 (PID: $PID)..."
-        kill $PID
-        rm "$pid_file"
-        echo "重新解析失败文档任务已停止"
-    else
-        echo "重新解析失败文档任务未运行"
-        rm "$pid_file"
-    fi
-}
-
-# 查看工具任务状态
-status() {
-    echo "=== 工具任务状态 ==="
-    
-    # Wiki JSON Upload
-    local upload_pid_file="${PID_DIR}/upload_wiki_json.pid"
-    if [ -f "$upload_pid_file" ]; then
-        PID=$(cat "$upload_pid_file")
-        if ps -p $PID > /dev/null 2>&1; then
-            echo "Wiki JSON Upload: 运行中 (PID: $PID)"
-        else
-            echo "Wiki JSON Upload: 未运行"
-        fi
-    else
-        echo "Wiki JSON Upload: 未运行"
-    fi
-    
-    # Reparse Failed Documents
-    echo ""
-    local reparse_pid_file="${PID_DIR}/reparse_failed_docs.pid"
-    if [ -f "$reparse_pid_file" ]; then
-        PID=$(cat "$reparse_pid_file")
-        if ps -p $PID > /dev/null 2>&1; then
-            echo "Reparse Failed Documents: 运行中 (PID: $PID)"
-        else
-            echo "Reparse Failed Documents: 未运行"
-        fi
-    else
-        echo "Reparse Failed Documents: 未运行"
-    fi
-}
-
-# 主函数
-case "$1" in
-    upload-wiki)
-        upload_wiki_json
-        ;;
-    stop-upload-wiki)
-        stop_upload_wiki_json
-        ;;
-    reparse-failed)
-        reparse_failed_documents
-        ;;
-    stop-reparse-failed)
-        stop_reparse_failed_documents
-        ;;
-    status)
-        status
-        ;;
-    *)
-        echo "使用方法: $0 {upload-wiki|stop-upload-wiki|reparse-failed|stop-reparse-failed|status}"
-        echo ""
-        echo "环境变量:"
-        echo ""
-        echo "  Wiki 上传相关:"
-        echo "    API_KEY        - API Key"
-        echo "    HOST           - 服务器地址 (默认: http://127.0.0.1:9380)"
-        echo "    WIKI_DATA_DIR       - 数据目录"
-        echo "    DATASET_ID     - 数据集 ID (可选)"
-        echo "    BATCH_SIZE     - 批量大小 (默认: 1000)"
-        echo "    WIKI_SNAPSHOT_FILE  - 快照文件路径 (默认: \${LOG_DIR}/upload_snapshot.json)"
-        echo "    WIKI_ENABLE_RESUME  - 是否启用断点续传 (默认: true)"
-        echo ""
-        echo "  重新解析失败文档相关:"
-        echo "    API_KEY     - API Key"
-        echo "    HOST        - 服务器地址 (默认: http://127.0.0.1:9380)"
-        echo "    DATASET_ID  - 数据集 ID (必需)"
-        echo "    BATCH_SIZE  - 批量大小 (默认: 1000)"
-        echo ""
-        echo "示例:"
-        echo "  $0 upload-wiki        # 上传 Wiki JSON 数据（后台运行，支持断点续传）"
-        echo "  $0 stop-upload-wiki   # 停止 Wiki JSON 上传任务"
-        echo "  BATCH_SIZE=2000 $0 upload-wiki        # 使用自定义批量大小上传"
-        echo "  WIKI_ENABLE_RESUME=false $0 upload-wiki    # 禁用断点续传"
-        echo "  DATASET_ID=xxx $0 reparse-failed   # 重新解析指定数据集中失败的文档"
-        echo "  $0 stop-reparse-failed # 停止重新解析失败文档任务"
-        echo "  $0 status             # 查看工具任务状态"
-        exit 1
-        ;;
-esac
-