diff --git a/CLIENT_DEBUG_API.md b/CLIENT_DEBUG_API.md new file mode 100644 index 0000000..13da3df --- /dev/null +++ b/CLIENT_DEBUG_API.md @@ -0,0 +1,359 @@ +# 客户端调试 API 使用指南 + +## 概述 + +现在你可以通过 HTTP API 从客户端查看数据库操作日志和系统信息,无需登录服务器! + +所有调试 API 都**免 Token 认证**,方便快速调试。 + +--- + +## 🔍 API 端点列表 + +### 1. 查看日志 `/api/debug/logs` + +查看最近的 Flask 应用日志,包括所有数据库操作。 + +**注意:** Celery Worker 的日志在单独的文件中:`{DATA_DIR}/logs/celery_worker.log` + +**请求:** +```bash +# 查看最近 100 行日志 +curl http://localhost:5000/api/debug/logs + +# 查看最近 500 行 +curl http://localhost:5000/api/debug/logs?lines=500 + +# 只看数据库相关的日志 +curl http://localhost:5000/api/debug/logs?filter=数据库 + +# 只看错误日志 +curl http://localhost:5000/api/debug/logs?filter=ERROR + +# 格式化输出 +curl http://localhost:5000/api/debug/logs | jq . +``` + +**响应示例:** +```json +{ + "log_file": "/home/user/remoteCI/data/logs/app.log", + "total_lines": 523, + "returned_lines": 100, + "filter": "数据库", + "logs": [ + "2024-01-15 10:23:45 [INFO] remoteCI.database: [数据库初始化] 路径: /home/user/remoteCI/data/jobs.db\n", + "2024-01-15 10:23:45 [INFO] remoteCI.database: [数据库初始化] 文件存在: True\n", + "2024-01-15 10:25:12 [INFO] remoteCI.database: [数据库写入] 准备创建任务记录: job_id=abc123, mode=git, user_id=test\n", + "2024-01-15 10:25:12 [INFO] remoteCI.database: ✓ 任务记录创建成功: job_id=abc123, 验证=1条, 文件大小=12345B\n" + ] +} +``` + +--- + +### 2. 数据库信息 `/api/debug/db-info` + +查看数据库路径、大小、任务统计和最近的数据库操作。 + +**请求:** +```bash +curl http://localhost:5000/api/debug/db-info | jq . +``` + +**响应示例:** +```json +{ + "database_path": "/home/user/remoteCI/data/jobs.db", + "data_dir": "/home/user/remoteCI/data", + "file_exists": true, + "file_size": 45056, + "file_size_mb": 0.04, + "last_modified": "2024-01-15T10:25:12", + "total_jobs": 15, + "jobs_by_status": { + "success": 10, + "running": 2, + "failed": 3 + }, + "recent_db_operations": [ + "2024-01-15 10:25:12 [INFO] [数据库写入] 准备创建任务记录: job_id=abc123", + "2024-01-15 10:25:12 [INFO] ✓ 任务记录创建成功: job_id=abc123, 验证=1条", + "2024-01-15 10:25:15 [INFO] [数据库查询] 查询任务列表: limit=20, offset=0" + ] +} +``` + +--- + +### 3. 配置信息 `/api/debug/config` + +查看环境变量、配置路径和进程信息。 + +**请求:** +```bash +curl http://localhost:5000/api/debug/config | jq . +``` + +**响应示例:** +```json +{ + "environment": { + "CI_DATA_DIR": "/home/user/remoteCI/data", + "CI_WORK_DIR": "/tmp/remote-ci", + "CI_WORKSPACE_DIR": "/var/ci-workspace", + "PYTHONUNBUFFERED": "1" + }, + "config": { + "DATA_DIR": "/home/user/remoteCI/data", + "WORKSPACE_DIR": "/var/ci-workspace", + "API_HOST": "0.0.0.0", + "API_PORT": 5000 + }, + "system": { + "python_version": "3.10.12 ...", + "hostname": "remoteCI-server", + "pid": 12345, + "cwd": "/home/user/remoteCI" + }, + "database": { + "path": "/home/user/remoteCI/data/jobs.db", + "exists": true + }, + "log_file": "/home/user/remoteCI/data/logs/app.log" +} +``` + +--- + +## 🎯 典型使用场景 + +### 场景 1: 检查数据库路径是否正确 + +```bash +# 查看配置 +curl http://localhost:5000/api/debug/config | jq '.database' + +# 输出: +# { +# "path": "/home/user/remoteCI/data/jobs.db", +# "exists": true +# } +``` + +### 场景 2: 追踪任务提交流程 + +```bash +# 1. 提交任务 +JOB_ID=$(curl -s -X POST http://localhost:5000/api/jobs/git \ + -H "Authorization: Bearer your-token" \ + -H "Content-Type: application/json" \ + -d '{ + "repo": "https://github.com/octocat/Hello-World.git", + "branch": "master", + "script": "echo test", + "user_id": "test-user" + }' | jq -r '.job_id') + +echo "任务ID: $JOB_ID" + +# 2. 等待几秒 +sleep 3 + +# 3. 查看日志,追踪这个任务的操作 +curl "http://localhost:5000/api/debug/logs?filter=$JOB_ID&lines=50" | jq -r '.logs[]' +``` + +你会看到: +``` +[INFO] [数据库写入] 准备创建任务记录: job_id=abc-123-xyz, mode=git, user_id=test-user +[INFO] ✓ 任务记录创建成功: job_id=abc-123-xyz, 验证=1条, 文件大小=12345B +[INFO] [数据库更新] 更新任务开始状态: job_id=abc-123-xyz +[INFO] ✓ 任务状态更新为 running: job_id=abc-123-xyz, 影响1行 +[INFO] ✓ 任务状态更新为 success: job_id=abc-123-xyz, 影响1行 +``` + +### 场景 3: 诊断任务无法查询的问题 + +```bash +# 1. 查看数据库信息 +curl http://localhost:5000/api/debug/db-info | jq . + +# 检查: +# - file_exists: 应该是 true +# - total_jobs: 应该 > 0 +# - jobs_by_status: 查看任务分布 + +# 2. 查看最近的数据库操作 +curl "http://localhost:5000/api/debug/logs?filter=数据库&lines=50" | jq -r '.logs[]' + +# 3. 检查写入和查询的数据库路径是否一致 +curl http://localhost:5000/api/debug/logs | jq -r '.logs[]' | grep "数据库路径" +``` + +### 场景 4: 查看错误日志 + +```bash +# 只看错误和警告 +curl "http://localhost:5000/api/debug/logs?filter=ERROR&lines=100" | jq -r '.logs[]' +curl "http://localhost:5000/api/debug/logs?filter=WARNING&lines=100" | jq -r '.logs[]' + +# 查看失败的任务创建 +curl "http://localhost:5000/api/debug/logs?filter=创建任务记录失败" | jq -r '.logs[]' +``` + +### 场景 5: 实时监控日志(轮询) + +创建一个简单的监控脚本: + +```bash +#!/bin/bash +# monitor_logs.sh - 每5秒刷新日志 + +while true; do + clear + echo "=== Remote CI 实时日志 (Ctrl+C 退出) ===" + echo "时间: $(date)" + echo + curl -s "http://localhost:5000/api/debug/logs?filter=数据库&lines=20" | \ + jq -r '.logs[]' | tail -20 + sleep 5 +done +``` + +--- + +## 📝 完整调试工作流 + +```bash +# 1. 检查系统配置 +echo "=== 1. 检查配置 ===" +curl -s http://localhost:5000/api/debug/config | jq '{ + database: .database, + data_dir: .config.DATA_DIR, + env_data_dir: .environment.CI_DATA_DIR +}' + +# 2. 检查数据库状态 +echo -e "\n=== 2. 数据库状态 ===" +curl -s http://localhost:5000/api/debug/db-info | jq '{ + path: .database_path, + exists: .file_exists, + size_mb: .file_size_mb, + total_jobs: .total_jobs, + status: .jobs_by_status +}' + +# 3. 提交测试任务 +echo -e "\n=== 3. 提交测试任务 ===" +JOB_RESPONSE=$(curl -s -X POST http://localhost:5000/api/jobs/git \ + -H "Authorization: Bearer your-token" \ + -H "Content-Type: application/json" \ + -d '{ + "repo": "https://github.com/octocat/Hello-World.git", + "branch": "master", + "script": "echo test", + "user_id": "debug-test" + }') + +JOB_ID=$(echo $JOB_RESPONSE | jq -r '.job_id') +echo "任务ID: $JOB_ID" + +# 4. 等待任务执行 +echo -e "\n=== 4. 等待执行... ===" +sleep 5 + +# 5. 查看任务相关的日志 +echo -e "\n=== 5. 任务日志 ===" +curl -s "http://localhost:5000/api/debug/logs?filter=$JOB_ID&lines=50" | \ + jq -r '.logs[]' | grep -E "(数据库|$JOB_ID)" + +# 6. 验证任务是否可查询 +echo -e "\n=== 6. 查询任务 ===" +curl -s "http://localhost:5000/api/jobs/history?user_id=debug-test" | \ + jq '.jobs[] | {job_id, status, mode}' + +# 7. 查看数据库操作统计 +echo -e "\n=== 7. 最近数据库操作 ===" +curl -s http://localhost:5000/api/debug/db-info | \ + jq -r '.recent_db_operations[]' | tail -10 +``` + +保存为 `full_debug.sh`,然后运行: +```bash +chmod +x full_debug.sh +./full_debug.sh +``` + +--- + +## 🛠️ 浏览器中查看 + +你也可以在浏览器中直接访问(不需要 jq): + +``` +http://localhost:5000/api/debug/logs?lines=100 +http://localhost:5000/api/debug/db-info +http://localhost:5000/api/debug/config +``` + +浏览器会显示 JSON 格式的响应。 + +--- + +## ⚠️ 安全提示 + +这些调试 API 目前**不需要 Token 认证**,方便调试。 + +在生产环境中,建议: +1. 添加 IP 白名单限制 +2. 或者添加 Token 认证 +3. 或者只在调试模式下启用 + +临时禁用方法(在 app.py 中添加): +```python +DEBUG_MODE = os.getenv('DEBUG_MODE', 'false').lower() == 'true' + +@app.route('/api/debug/logs', methods=['GET']) +def get_debug_logs(): + if not DEBUG_MODE: + return jsonify({'error': 'Debug mode disabled'}), 403 + # ... 原有代码 +``` + +--- + +## 📊 日志格式说明 + +日志格式: +``` +时间 [级别] 模块名: 消息 +``` + +例如: +``` +2024-01-15 10:25:12 [INFO] remoteCI.database: [数据库写入] 准备创建任务记录: job_id=abc123 +``` + +- **时间**: `2024-01-15 10:25:12` +- **级别**: `INFO`, `WARNING`, `ERROR` +- **模块**: `remoteCI.database`, `remoteCI.app` +- **消息**: 具体的操作信息 + +--- + +## 🎉 总结 + +有了这些 API,你现在可以: + +✅ 从客户端查看所有数据库操作日志 +✅ 实时追踪任务的完整流程 +✅ 检查数据库路径配置 +✅ 诊断数据不一致问题 +✅ 无需登录服务器查看日志 + +**下一步:** +1. 启动服务 +2. 提交一个测试任务 +3. 用这些 API 追踪任务流程 +4. 如果发现问题,立即能看到详细日志! diff --git a/DATABASE_DIAGNOSIS.md b/DATABASE_DIAGNOSIS.md new file mode 100644 index 0000000..2bb53e0 --- /dev/null +++ b/DATABASE_DIAGNOSIS.md @@ -0,0 +1,268 @@ +# 数据库路径不一致问题诊断指南 + +## 问题描述 +任务提交后,无法在查询数据库时显示,怀疑是提交数据库和查询数据库不一致。 + +## 问题根源分析 + +### 1. 数据库初始化位置 + +系统在两个不同的模块中初始化了数据库实例: + +- **server/app.py:30** (Flask 进程) + ```python + job_db = JobDatabase(f"{DATA_DIR}/jobs.db") + ``` + +- **server/tasks.py:26** (Celery Worker 进程) + ```python + job_db = JobDatabase(f"{DATA_DIR}/jobs.db") + ``` + +### 2. DATA_DIR 的计算逻辑 + +在 **server/config.py:16**: +```python +BASE_DIR = Path(__file__).parent.parent +DATA_DIR = os.getenv('CI_DATA_DIR', str(BASE_DIR / 'data')) +``` + +**问题点:** +1. 如果没有设置 `CI_DATA_DIR` 环境变量,`DATA_DIR` 将基于 `BASE_DIR` 计算 +2. `BASE_DIR = Path(__file__).parent.parent` 依赖于当前文件的位置 +3. **Flask 和 Celery Worker 可能从不同的目录启动**,导致 `BASE_DIR` 不同 +4. 最终导致两个进程使用不同的数据库文件路径 + +### 3. 数据流程 + +``` +用户提交任务 + ↓ +Flask (app.py) → job_db.create_job() → 写入数据库A + ↓ +Celery Worker (tasks.py) → job_db.update_job_started() → 写入数据库B (可能不同) + ↓ +Flask (app.py) → job_db.get_jobs() → 从数据库A读取 (没有更新的数据) +``` + +## 诊断步骤 + +### 步骤 1: 检查当前数据库文件位置 + +```bash +# 查找所有数据库文件 +find /home/user/remoteCI -name 'jobs.db' +find /tmp -name 'jobs.db' +find /var -name 'jobs.db' 2>/dev/null + +# 查看当前 data 目录 +ls -la /home/user/remoteCI/data/ +``` + +### 步骤 2: 检查环境变量 + +```bash +# 查看当前环境变量 +echo "CI_DATA_DIR: $CI_DATA_DIR" + +# 如果使用了 systemd 服务,检查服务配置 +systemctl show -p Environment celery +systemctl show -p Environment remote-ci + +# 如果使用了 supervisor,检查配置文件 +grep CI_DATA_DIR /etc/supervisor/conf.d/*.conf +``` + +### 步骤 3: 查看进程实际使用的路径 + +创建测试脚本来验证每个进程看到的路径: + +**test_flask_db_path.py:** +```python +#!/usr/bin/env python3 +import sys +sys.path.insert(0, '/home/user/remoteCI') + +from server.config import DATA_DIR, BASE_DIR +print(f"Flask 进程看到的路径:") +print(f" BASE_DIR: {BASE_DIR}") +print(f" DATA_DIR: {DATA_DIR}") +print(f" 数据库路径: {DATA_DIR}/jobs.db") +``` + +**test_celery_db_path.py:** +```python +#!/usr/bin/env python3 +import sys +import os +sys.path.insert(0, '/home/user/remoteCI') + +# 模拟 Celery Worker 的环境 +os.chdir('/home/user/remoteCI') # 或者 Celery 实际的工作目录 + +from server.config import DATA_DIR, BASE_DIR +print(f"Celery Worker 进程看到的路径:") +print(f" BASE_DIR: {BASE_DIR}") +print(f" DATA_DIR: {DATA_DIR}") +print(f" 数据库路径: {DATA_DIR}/jobs.db") +``` + +### 步骤 4: 查看数据库内容 + +```bash +# 如果找到了数据库文件,查看内容 +sqlite3 /path/to/jobs.db "SELECT COUNT(*) FROM ci_jobs;" +sqlite3 /path/to/jobs.db "SELECT job_id, status, mode, created_at FROM ci_jobs ORDER BY created_at DESC LIMIT 10;" +``` + +### 步骤 5: 检查进程日志 + +```bash +# Flask 日志 +journalctl -u remote-ci -n 50 --no-pager | grep -i "data_dir\|database" + +# Celery 日志 +journalctl -u celery -n 50 --no-pager | grep -i "data_dir\|database" + +# 或者查看应用日志文件 +tail -100 /var/log/remote-ci/*.log | grep -i "data_dir\|database" +``` + +## 解决方案 + +### 方案 1: 设置环境变量(推荐) + +在系统启动配置中明确设置 `CI_DATA_DIR` 为绝对路径: + +**1. 创建或编辑 .env 文件**(如果使用了): +```bash +cat > /home/user/remoteCI/.env << 'EOF' +CI_DATA_DIR=/home/user/remoteCI/data +CI_WORK_DIR=/tmp/remote-ci +CI_WORKSPACE_DIR=/var/ci-workspace +CI_API_HOST=0.0.0.0 +CI_API_PORT=5000 +CI_API_TOKEN=your-secure-token +CI_BROKER_URL=redis://localhost:6379/0 +CI_RESULT_BACKEND=redis://localhost:6379/0 +EOF +``` + +**2. 如果使用 systemd,编辑服务文件**: + +编辑 `/etc/systemd/system/remote-ci.service`: +```ini +[Service] +Environment="CI_DATA_DIR=/home/user/remoteCI/data" +Environment="CI_WORK_DIR=/tmp/remote-ci" +Environment="CI_WORKSPACE_DIR=/var/ci-workspace" +``` + +编辑 `/etc/systemd/system/celery.service`(或 celery worker 的服务文件): +```ini +[Service] +Environment="CI_DATA_DIR=/home/user/remoteCI/data" +Environment="CI_WORK_DIR=/tmp/remote-ci" +Environment="CI_WORKSPACE_DIR=/var/ci-workspace" +``` + +然后重新加载并重启服务: +```bash +sudo systemctl daemon-reload +sudo systemctl restart remote-ci +sudo systemctl restart celery +``` + +**3. 如果使用 supervisor,编辑配置文件**: + +编辑 `/etc/supervisor/conf.d/remote-ci.conf`: +```ini +[program:remote-ci] +environment=CI_DATA_DIR="/home/user/remoteCI/data",CI_WORK_DIR="/tmp/remote-ci" +``` + +编辑 `/etc/supervisor/conf.d/celery.conf`: +```ini +[program:celery] +environment=CI_DATA_DIR="/home/user/remoteCI/data",CI_WORK_DIR="/tmp/remote-ci" +``` + +然后重启: +```bash +sudo supervisorctl reread +sudo supervisorctl update +sudo supervisorctl restart all +``` + +### 方案 2: 修改代码使用单例模式(备选) + +如果无法统一环境变量,可以修改代码确保所有进程使用同一个硬编码路径: + +**修改 server/config.py:** +```python +# 使用绝对路径,不依赖 __file__ +DATA_DIR = os.getenv('CI_DATA_DIR', '/home/user/remoteCI/data') +WORK_DIR = os.getenv('CI_WORK_DIR', '/tmp/remote-ci') +WORKSPACE_DIR = os.getenv('CI_WORKSPACE_DIR', '/var/ci-workspace') +``` + +### 方案 3: 添加日志记录数据库路径(调试用) + +在 server/app.py 和 server/tasks.py 的开头添加日志: + +```python +# server/app.py +job_db = JobDatabase(f"{DATA_DIR}/jobs.db") +print(f"[Flask] 数据库路径: {DATA_DIR}/jobs.db") + +# server/tasks.py +job_db = JobDatabase(f"{DATA_DIR}/jobs.db") +print(f"[Celery Worker] 数据库路径: {DATA_DIR}/jobs.db") +``` + +这样可以在启动时看到每个进程实际使用的路径。 + +## 验证修复 + +修复后,执行以下步骤验证: + +```bash +# 1. 重启所有服务 +sudo systemctl restart remote-ci celery +# 或 +sudo supervisorctl restart all + +# 2. 提交一个测试任务 +curl -X POST http://localhost:5000/api/jobs/git \ + -H "Authorization: Bearer your-token" \ + -H "Content-Type: application/json" \ + -d '{ + "repo": "https://github.com/your-repo.git", + "branch": "main", + "script": "echo test", + "user_id": "test-user" + }' + +# 3. 查看任务列表 +curl http://localhost:5000/api/jobs/history | jq . + +# 4. 检查数据库 +sqlite3 /home/user/remoteCI/data/jobs.db "SELECT job_id, status, mode FROM ci_jobs ORDER BY created_at DESC LIMIT 5;" +``` + +## 预防措施 + +1. **始终使用环境变量设置路径**,不依赖相对路径计算 +2. **在部署文档中明确说明环境变量要求** +3. **添加健康检查**,定期验证数据库连接 +4. **添加启动时的路径日志**,方便排查问题 +5. **使用容器化部署**(Docker),确保环境一致性 + +## 快速检查清单 + +- [ ] 确认 `CI_DATA_DIR` 环境变量已设置 +- [ ] Flask 和 Celery Worker 使用相同的环境变量 +- [ ] 数据库文件只有一个,位于正确的位置 +- [ ] 两个进程都有权限读写数据库文件 +- [ ] 提交任务后能在历史记录中看到 +- [ ] 任务状态能正确更新 diff --git a/DEBUG_WITH_LOGS.md b/DEBUG_WITH_LOGS.md new file mode 100644 index 0000000..b3d22d3 --- /dev/null +++ b/DEBUG_WITH_LOGS.md @@ -0,0 +1,349 @@ +# 使用日志调试数据库问题 + +## 已添加的日志功能 + +我在 `server/database.py` 中添加了详细的日志输出,可以追踪: + +### 1. 数据库初始化 +``` +[数据库初始化] 路径: /home/user/remoteCI/data/jobs.db +[数据库初始化] 文件存在: True/False +``` + +### 2. 创建任务记录 +``` +[数据库写入] 准备创建任务记录 + 数据库路径: /home/user/remoteCI/data/jobs.db + 任务ID: abc123... + 模式: git/upload/rsync + 用户ID: user123 +✓ 任务记录创建成功 + 验证查询: 找到 1 条记录 + 数据库文件大小: 12345 字节 +``` + +### 3. 更新任务状态 +``` +[数据库更新] 更新任务开始状态 + 数据库路径: /home/user/remoteCI/data/jobs.db + 任务ID: abc123... +✓ 任务状态更新为 running,影响 1 行 +``` + +### 4. 查询任务列表 +``` +[数据库查询] 查询任务列表 + 数据库路径: /home/user/remoteCI/data/jobs.db + limit=50, offset=0, filters=None + 执行SQL: SELECT * FROM ci_jobs ORDER BY created_at DESC LIMIT ? OFFSET ? + 参数: [50, 0] +✓ 查询完成,返回 10 条记录 + 前3条记录: + 1. abc123... | success | git + 2. def456... | running | upload + 3. ghi789... | queued | rsync +``` + +## 查看日志的方法 + +### 方法 1: 实时查看日志(推荐) + +**如果使用 systemd 服务:** + +```bash +# Flask 日志(实时) +sudo journalctl -u remote-ci -f + +# Celery Worker 日志(实时) +sudo journalctl -u celery -f + +# 同时查看两个服务的日志 +sudo journalctl -u remote-ci -u celery -f +``` + +**如果手动启动:** + +```bash +# 终端 1: 启动 Flask(会看到日志输出) +cd /home/user/remoteCI +python3 -m server.app + +# 终端 2: 启动 Celery Worker(会看到日志输出) +cd /home/user/remoteCI +celery -A server.celery_app worker --loglevel=info +``` + +### 方法 2: 查看历史日志 + +```bash +# Flask 最近 100 行日志 +sudo journalctl -u remote-ci -n 100 --no-pager + +# Celery Worker 最近 100 行日志 +sudo journalctl -u celery -n 100 --no-pager + +# 搜索特定关键词 +sudo journalctl -u remote-ci -u celery | grep -E "\[数据库|Database" + +# 只看数据库相关的日志 +sudo journalctl -u remote-ci -u celery | grep "数据库" +``` + +### 方法 3: 保存日志到文件 + +```bash +# 保存最近的日志 +sudo journalctl -u remote-ci -u celery --since "1 hour ago" > debug_logs.txt + +# 查看保存的日志 +less debug_logs.txt +``` + +## 调试步骤 + +### 步骤 1: 清空并重启 + +为了获得干净的日志,先重启服务: + +```bash +# 重启服务 +sudo systemctl restart remote-ci +sudo systemctl restart celery + +# 等待几秒让服务启动 +sleep 3 + +# 检查服务状态 +sudo systemctl status remote-ci +sudo systemctl status celery +``` + +启动时你应该看到: +``` +[数据库初始化] 路径: /home/user/remoteCI/data/jobs.db +[数据库初始化] 文件存在: True +``` + +**如果看到不同的路径,这就是问题所在!** + +### 步骤 2: 提交测试任务 + +在另一个终端提交任务: + +```bash +# 提交一个简单的测试任务 +curl -X POST http://localhost:5000/api/jobs/git \ + -H "Authorization: Bearer your-token" \ + -H "Content-Type: application/json" \ + -d '{ + "repo": "https://github.com/octocat/Hello-World.git", + "branch": "master", + "script": "echo test", + "user_id": "debug-user" + }' | jq . +``` + +### 步骤 3: 观察日志 + +在日志中你应该看到: + +**Flask 日志(提交任务时):** +``` +[数据库写入] 准备创建任务记录 + 数据库路径: /home/user/remoteCI/data/jobs.db + 任务ID: xxxxxxxxxx + 模式: git + 用户ID: debug-user +✓ 任务记录创建成功 + 验证查询: 找到 1 条记录 + 数据库文件大小: xxxxx 字节 +``` + +**Celery Worker 日志(执行任务时):** +``` +[数据库更新] 更新任务开始状态 + 数据库路径: /home/user/remoteCI/data/jobs.db + 任务ID: xxxxxxxxxx +✓ 任务状态更新为 running,影响 1 行 + +[数据库更新] 更新任务完成状态 + 数据库路径: /home/user/remoteCI/data/jobs.db + 任务ID: xxxxxxxxxx + 最终状态: success +✓ 任务状态更新为 success,影响 1 行 +``` + +### 步骤 4: 查询任务 + +```bash +# 查询任务历史 +curl http://localhost:5000/api/jobs/history | jq . +``` + +在 Flask 日志中你应该看到: +``` +[数据库查询] 查询任务列表 + 数据库路径: /home/user/remoteCI/data/jobs.db + limit=20, offset=0, filters=None + 执行SQL: SELECT * FROM ci_jobs ORDER BY created_at DESC LIMIT ? OFFSET ? + 参数: [20, 0] +✓ 查询完成,返回 1 条记录 + 前3条记录: + 1. xxxxxxxxxx... | success | git +``` + +## 问题诊断 + +### 问题 A: 数据库路径不一致 + +**症状:** +- Flask 日志显示: `数据库路径: /home/user/remoteCI/data/jobs.db` +- Celery 日志显示: `数据库路径: /tmp/data/jobs.db` (不同!) + +**原因:** +Flask 和 Celery Worker 使用了不同的数据库文件。 + +**解决方案:** +```bash +# 1. 创建 .env 文件 +cat > /home/user/remoteCI/.env << 'EOF' +CI_DATA_DIR=/home/user/remoteCI/data +EOF + +# 2. 更新 systemd 服务配置 +sudo systemctl edit remote-ci +# 添加: +[Service] +Environment="CI_DATA_DIR=/home/user/remoteCI/data" + +sudo systemctl edit celery +# 添加: +[Service] +Environment="CI_DATA_DIR=/home/user/remoteCI/data" + +# 3. 重启服务 +sudo systemctl daemon-reload +sudo systemctl restart remote-ci celery + +# 4. 验证 - 两个日志中的路径应该相同 +sudo journalctl -u remote-ci -u celery -n 50 | grep "数据库路径" +``` + +### 问题 B: 写入成功但查询为空 + +**症状:** +- 看到 `✓ 任务记录创建成功` +- 看到 `验证查询: 找到 1 条记录` +- 但查询时返回 `✓ 查询完成,返回 0 条记录` + +**可能原因:** +1. 数据被意外删除 +2. 查询条件过滤掉了所有记录 +3. 时间或顺序问题 + +**解决方案:** +```bash +# 1. 直接查询数据库 +sqlite3 /home/user/remoteCI/data/jobs.db "SELECT COUNT(*) FROM ci_jobs;" + +# 2. 查看所有记录 +sqlite3 /home/user/remoteCI/data/jobs.db "SELECT job_id, status, created_at FROM ci_jobs;" + +# 3. 检查是否有清理任务在运行 +sudo journalctl -u remote-ci -u celery | grep -i "cleanup\|delete\|clear" +``` + +### 问题 C: 写入失败 + +**症状:** +- 看到 `✗ 创建任务记录失败` +- 有错误堆栈信息 + +**可能原因:** +1. 数据库文件权限问题 +2. 磁盘空间不足 +3. 数据库文件损坏 + +**解决方案:** +```bash +# 1. 检查权限 +ls -la /home/user/remoteCI/data/jobs.db + +# 2. 检查磁盘空间 +df -h /home/user/remoteCI/data + +# 3. 检查数据库完整性 +sqlite3 /home/user/remoteCI/data/jobs.db "PRAGMA integrity_check;" + +# 4. 如果权限有问题 +sudo chown $USER:$USER /home/user/remoteCI/data/jobs.db +chmod 644 /home/user/remoteCI/data/jobs.db +``` + +## 完整调试命令集 + +将以下命令保存为脚本,一键查看所有关键信息: + +```bash +#!/bin/bash +# debug_database_issue.sh + +echo "==========================================" +echo "数据库问题调试信息收集" +echo "==========================================" +echo + +echo "1. 检查服务状态" +echo "---" +systemctl is-active remote-ci && echo "Flask: 运行中" || echo "Flask: 未运行" +systemctl is-active celery && echo "Celery: 运行中" || echo "Celery: 未运行" +echo + +echo "2. 查看数据库初始化日志(路径)" +echo "---" +sudo journalctl -u remote-ci -u celery | grep "数据库初始化" | tail -5 +echo + +echo "3. 最近的数据库操作" +echo "---" +sudo journalctl -u remote-ci -u celery --since "10 minutes ago" | grep -E "\[数据库" +echo + +echo "4. 数据库文件信息" +echo "---" +find /home/user/remoteCI -name 'jobs.db' -exec ls -lh {} \; +echo + +echo "5. 数据库内容统计" +echo "---" +DB_PATH="/home/user/remoteCI/data/jobs.db" +if [ -f "$DB_PATH" ]; then + echo "总任务数: $(sqlite3 $DB_PATH 'SELECT COUNT(*) FROM ci_jobs;')" + echo "按状态统计:" + sqlite3 $DB_PATH "SELECT status, COUNT(*) FROM ci_jobs GROUP BY status;" -header -column +else + echo "数据库文件不存在: $DB_PATH" +fi +echo + +echo "==========================================" +echo "完成" +echo "==========================================" +``` + +保存并运行: +```bash +chmod +x debug_database_issue.sh +./debug_database_issue.sh +``` + +## 下一步 + +运行上述调试后,请提供: + +1. **数据库初始化时的路径**(Flask 和 Celery 各自的) +2. **任务提交时的日志**(是否显示"创建成功") +3. **任务查询时的日志**(返回多少条记录) +4. **`debug_database_issue.sh` 的完整输出** + +有了这些信息,我们就能精确定位问题! diff --git a/SQL_QUERIES.md b/SQL_QUERIES.md new file mode 100644 index 0000000..05c5cc9 --- /dev/null +++ b/SQL_QUERIES.md @@ -0,0 +1,407 @@ +# Remote CI 数据库查询参考 + +## 连接数据库 + +```bash +# 假设数据库在默认位置 +DB_PATH="/home/user/remoteCI/data/jobs.db" + +# 交互式模式 +sqlite3 $DB_PATH + +# 执行单条查询 +sqlite3 $DB_PATH "SELECT * FROM ci_jobs LIMIT 10;" + +# 格式化输出 +sqlite3 $DB_PATH "SELECT * FROM ci_jobs LIMIT 10;" -header -column +``` + +## 常用查询 + +### 1. 基础查询 + +```sql +-- 查看所有表 +.tables + +-- 查看表结构 +.schema ci_jobs +.schema special_users + +-- 总任务数 +SELECT COUNT(*) as total_jobs FROM ci_jobs; + +-- 最近10条任务 +SELECT + job_id, + status, + mode, + user_id, + created_at +FROM ci_jobs +ORDER BY created_at DESC +LIMIT 10; +``` + +### 2. 按条件查询 + +```sql +-- 查询特定状态的任务 +SELECT * FROM ci_jobs WHERE status = 'failed'; +SELECT * FROM ci_jobs WHERE status = 'success'; +SELECT * FROM ci_jobs WHERE status IN ('running', 'queued'); + +-- 查询特定用户的任务 +SELECT * FROM ci_jobs WHERE user_id = 'your-user-id'; + +-- 查询特定项目的任务 +SELECT * FROM ci_jobs WHERE project_name = 'your-project'; + +-- 查询特定模式的任务 +SELECT * FROM ci_jobs WHERE mode = 'upload'; +SELECT * FROM ci_jobs WHERE mode = 'rsync'; +SELECT * FROM ci_jobs WHERE mode = 'git'; + +-- 查询特定时间范围的任务 +SELECT * FROM ci_jobs +WHERE created_at >= '2024-01-01' + AND created_at < '2024-02-01'; + +-- 查询今天的任务 +SELECT * FROM ci_jobs +WHERE date(created_at) = date('now'); + +-- 查询最近24小时的任务 +SELECT * FROM ci_jobs +WHERE datetime(created_at) >= datetime('now', '-1 day'); +``` + +### 3. 统计查询 + +```sql +-- 按状态统计 +SELECT + status, + COUNT(*) as count +FROM ci_jobs +GROUP BY status +ORDER BY count DESC; + +-- 按模式统计 +SELECT + mode, + COUNT(*) as count +FROM ci_jobs +GROUP BY mode; + +-- 按用户统计 +SELECT + user_id, + COUNT(*) as count +FROM ci_jobs +GROUP BY user_id +ORDER BY count DESC; + +-- 按项目统计 +SELECT + project_name, + COUNT(*) as count, + SUM(CASE WHEN status = 'success' THEN 1 ELSE 0 END) as success_count, + SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END) as failed_count +FROM ci_jobs +GROUP BY project_name +ORDER BY count DESC; + +-- 成功率统计 +SELECT + COUNT(*) as total, + SUM(CASE WHEN status = 'success' THEN 1 ELSE 0 END) as success, + SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END) as failed, + ROUND(100.0 * SUM(CASE WHEN status = 'success' THEN 1 ELSE 0 END) / COUNT(*), 2) as success_rate +FROM ci_jobs; + +-- 每日任务统计 +SELECT + date(created_at) as date, + COUNT(*) as count, + SUM(CASE WHEN status = 'success' THEN 1 ELSE 0 END) as success, + SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END) as failed +FROM ci_jobs +GROUP BY date(created_at) +ORDER BY date DESC +LIMIT 30; +``` + +### 4. 性能分析 + +```sql +-- 平均执行时间 +SELECT + AVG(duration) as avg_duration, + MIN(duration) as min_duration, + MAX(duration) as max_duration +FROM ci_jobs +WHERE duration IS NOT NULL; + +-- 按模式查看平均执行时间 +SELECT + mode, + AVG(duration) as avg_duration, + COUNT(*) as count +FROM ci_jobs +WHERE duration IS NOT NULL +GROUP BY mode; + +-- 最慢的10个任务 +SELECT + job_id, + mode, + project_name, + duration, + created_at +FROM ci_jobs +WHERE duration IS NOT NULL +ORDER BY duration DESC +LIMIT 10; + +-- 最快的10个任务 +SELECT + job_id, + mode, + project_name, + duration, + created_at +FROM ci_jobs +WHERE duration IS NOT NULL +ORDER BY duration ASC +LIMIT 10; +``` + +### 5. 错误分析 + +```sql +-- 失败的任务 +SELECT + job_id, + mode, + user_id, + error_message, + created_at +FROM ci_jobs +WHERE status = 'failed' +ORDER BY created_at DESC; + +-- 超时的任务 +SELECT + job_id, + mode, + user_id, + created_at +FROM ci_jobs +WHERE status = 'timeout' +ORDER BY created_at DESC; + +-- 按错误信息分组 +SELECT + error_message, + COUNT(*) as count +FROM ci_jobs +WHERE status = 'failed' AND error_message IS NOT NULL +GROUP BY error_message +ORDER BY count DESC; + +-- 失败率最高的项目 +SELECT + project_name, + COUNT(*) as total, + SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END) as failed, + ROUND(100.0 * SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END) / COUNT(*), 2) as failure_rate +FROM ci_jobs +WHERE project_name IS NOT NULL +GROUP BY project_name +HAVING COUNT(*) >= 5 +ORDER BY failure_rate DESC; +``` + +### 6. 文件大小分析 + +```sql +-- 磁盘使用统计 +SELECT + SUM(log_size) as total_log_size, + SUM(artifacts_size) as total_artifacts_size, + SUM(code_archive_size) as total_code_size, + SUM(log_size + artifacts_size + code_archive_size) as total_size +FROM ci_jobs; + +-- 按用户统计磁盘使用 +SELECT + user_id, + SUM(log_size + artifacts_size + code_archive_size) as total_size, + COUNT(*) as job_count +FROM ci_jobs +WHERE user_id IS NOT NULL +GROUP BY user_id +ORDER BY total_size DESC; + +-- 最大的任务 +SELECT + job_id, + project_name, + log_size, + artifacts_size, + code_archive_size, + (log_size + artifacts_size + code_archive_size) as total_size +FROM ci_jobs +ORDER BY total_size DESC +LIMIT 10; +``` + +### 7. 用户活跃度 + +```sql +-- 活跃用户排名 +SELECT + user_id, + COUNT(*) as total_jobs, + SUM(CASE WHEN status = 'success' THEN 1 ELSE 0 END) as success_jobs, + MAX(created_at) as last_activity +FROM ci_jobs +WHERE user_id IS NOT NULL +GROUP BY user_id +ORDER BY total_jobs DESC; + +-- 最近活跃的用户 +SELECT + user_id, + COUNT(*) as jobs_today +FROM ci_jobs +WHERE user_id IS NOT NULL + AND date(created_at) = date('now') +GROUP BY user_id +ORDER BY jobs_today DESC; +``` + +### 8. 特殊用户配额查询 + +```sql +-- 查看所有特殊用户 +SELECT + user_id, + ROUND(quota_bytes / 1024.0 / 1024.0 / 1024.0, 2) as quota_gb, + created_at, + updated_at +FROM special_users; + +-- 特殊用户的使用情况 +SELECT + s.user_id, + ROUND(s.quota_bytes / 1024.0 / 1024.0 / 1024.0, 2) as quota_gb, + ROUND(SUM(j.log_size + j.artifacts_size + j.code_archive_size) / 1024.0 / 1024.0 / 1024.0, 2) as used_gb, + ROUND(100.0 * SUM(j.log_size + j.artifacts_size + j.code_archive_size) / s.quota_bytes, 2) as usage_percent +FROM special_users s +LEFT JOIN ci_jobs j ON s.user_id = j.user_id AND j.is_expired = 0 +GROUP BY s.user_id; +``` + +### 9. 数据清理 + +```sql +-- 查询可以清理的旧任务(超过30天) +SELECT + job_id, + created_at, + status +FROM ci_jobs +WHERE datetime(created_at) < datetime('now', '-30 day'); + +-- 统计需要清理的任务数量 +SELECT + COUNT(*) as cleanable_jobs +FROM ci_jobs +WHERE datetime(created_at) < datetime('now', '-30 day'); + +-- 查询已过期的任务 +SELECT + job_id, + status, + created_at +FROM ci_jobs +WHERE is_expired = 1; +``` + +### 10. 完整任务详情 + +```sql +-- 查看特定任务的所有信息 +SELECT * FROM ci_jobs WHERE job_id = 'your-job-id'; + +-- 格式化显示单个任务 +SELECT + 'Job ID: ' || job_id || char(10) || + 'Status: ' || status || char(10) || + 'Mode: ' || mode || char(10) || + 'User: ' || COALESCE(user_id, 'N/A') || char(10) || + 'Project: ' || COALESCE(project_name, 'N/A') || char(10) || + 'Script: ' || script || char(10) || + 'Created: ' || created_at || char(10) || + 'Started: ' || COALESCE(started_at, 'N/A') || char(10) || + 'Finished: ' || COALESCE(finished_at, 'N/A') || char(10) || + 'Duration: ' || COALESCE(CAST(duration AS TEXT), 'N/A') || 's' || char(10) || + 'Exit Code: ' || COALESCE(CAST(exit_code AS TEXT), 'N/A') || char(10) || + 'Log File: ' || COALESCE(log_file, 'N/A') +FROM ci_jobs +WHERE job_id = 'your-job-id'; +``` + +## 导出数据 + +```bash +# 导出为 CSV +sqlite3 -header -csv $DB_PATH "SELECT * FROM ci_jobs;" > jobs.csv + +# 导出特定查询结果 +sqlite3 -header -csv $DB_PATH "SELECT job_id, status, mode, created_at FROM ci_jobs WHERE status='failed';" > failed_jobs.csv + +# 导出为 JSON(需要 jq) +sqlite3 -json $DB_PATH "SELECT * FROM ci_jobs LIMIT 10;" | jq . + +# 备份整个数据库 +cp $DB_PATH jobs.db.backup.$(date +%Y%m%d_%H%M%S) +``` + +## 交互式模式的有用命令 + +```sql +-- 在 sqlite3 交互模式下 +.help -- 显示帮助 +.tables -- 列出所有表 +.schema -- 显示所有表的结构 +.schema TABLE -- 显示特定表的结构 +.mode column -- 列模式显示 +.headers on -- 显示列名 +.width 10 20 30 -- 设置列宽 +.output file.txt -- 输出到文件 +.output stdout -- 恢复输出到屏幕 +.quit -- 退出 +``` + +## Python 脚本示例 + +```python +import sqlite3 + +# 连接数据库 +conn = sqlite3.connect('/home/user/remoteCI/data/jobs.db') +conn.row_factory = sqlite3.Row +cursor = conn.cursor() + +# 查询 +cursor.execute('SELECT * FROM ci_jobs WHERE status = ?', ('failed',)) +rows = cursor.fetchall() + +for row in rows: + print(f"Job: {row['job_id']}, Status: {row['status']}") + +conn.close() +``` diff --git a/TROUBLESHOOTING_GUIDE.md b/TROUBLESHOOTING_GUIDE.md new file mode 100644 index 0000000..aa9d9b1 --- /dev/null +++ b/TROUBLESHOOTING_GUIDE.md @@ -0,0 +1,299 @@ +# 任务提交后无法查询的问题排查指南 + +## 问题现象 +- 任务提交后,通过查询接口无法看到任务记录 +- 查询数据库发现既没有特殊用户也没有任务记录 + +## 排查步骤 + +### 第一步:确认数据库文件位置 + +请告诉我以下信息: + +1. **你是如何查询数据库的?** + - [ ] 通过 API 查询 (`curl http://localhost:5000/api/jobs/history`) + - [ ] 直接用 sqlite3 查询数据库文件 + - [ ] 使用我提供的脚本 (`./read_db.sh` 或 `python3 read_db.py`) + - [ ] 其他方式:___________ + +2. **如果是直接查询数据库文件,文件路径是什么?** + ```bash + # 请提供完整路径 + 数据库路径: ___________ + ``` + +3. **系统是否正在运行?** + ```bash + # 检查服务状态 + systemctl status remote-ci + systemctl status celery + # 或检查进程 + ps aux | grep flask + ps aux | grep celery + ``` + +### 第二步:定位问题 + +#### 情况 A:系统还未运行过 + +**特征:** +- `/home/user/remoteCI/data/` 目录不存在 +- 找不到任何 `.db` 文件 +- Flask 和 Celery 进程未运行 + +**解决方案:** +```bash +# 1. 运行修复脚本,创建必要的目录和配置 +./fix_db_path.sh + +# 2. 启动服务 +# 如果使用 systemd +sudo systemctl start remote-ci +sudo systemctl start celery + +# 如果手动启动 +# 终端 1: 启动 Flask +cd /home/user/remoteCI +export CI_DATA_DIR=/home/user/remoteCI/data +python3 -m server.app + +# 终端 2: 启动 Celery Worker +cd /home/user/remoteCI +export CI_DATA_DIR=/home/user/remoteCI/data +celery -A server.celery_app worker --loglevel=info + +# 3. 提交测试任务 +curl -X POST http://localhost:5000/api/jobs/git \ + -H "Authorization: Bearer your-token" \ + -H "Content-Type: application/json" \ + -d '{ + "repo": "https://github.com/octocat/Hello-World.git", + "branch": "master", + "script": "echo test", + "user_id": "test-user" + }' + +# 4. 查询任务历史 +curl http://localhost:5000/api/jobs/history +``` + +#### 情况 B:数据库存在但为空 + +**特征:** +- 数据库文件存在(你能查询到表结构) +- 但是 `ci_jobs` 和 `special_users` 表都是空的 +- 系统正在运行 + +**可能原因:** +1. **Flask 和 Celery 使用了不同的数据库文件** + - Flask 写入数据库 A + - Celery Worker 写入数据库 B + - 你查询的是数据库 C(空的) + +2. **任务提交失败了** + - 检查 Flask 日志 + - 检查 Celery Worker 日志 + +**解决方案:** + +**方案 1:追踪实际使用的数据库文件** + +```bash +# 1. 启动系统监控(需要 inotify-tools) +sudo apt-get install inotify-tools + +# 2. 监控所有 .db 文件的访问 +find /home/user/remoteCI -name '*.db' -o -name 'jobs.db' 2>/dev/null | while read file; do + inotifywait -m "$file" & +done + +# 3. 提交一个任务,观察哪个数据库文件被修改 + +# 或者使用 lsof 查看进程打开的文件 +# 找到 Flask 进程 PID +flask_pid=$(pgrep -f "flask|app.py" | head -1) +echo "Flask PID: $flask_pid" + +# 查看打开的文件 +lsof -p $flask_pid | grep -E '\.db|database' + +# 找到 Celery Worker PID +celery_pid=$(pgrep -f "celery.*worker" | head -1) +echo "Celery PID: $celery_pid" + +# 查看打开的文件 +lsof -p $celery_pid | grep -E '\.db|database' +``` + +**方案 2:添加调试日志** + +在 `server/app.py` 和 `server/tasks.py` 中添加日志: + +```python +# 在文件顶部添加 +import logging +logging.basicConfig(level=logging.DEBUG) + +# 在数据库初始化后添加 +from server.config import DATA_DIR +print(f"[DEBUG] 当前进程使用的数据库路径: {DATA_DIR}/jobs.db") +logging.info(f"数据库路径: {DATA_DIR}/jobs.db") +``` + +**方案 3:使用统一的数据库路径** + +```bash +# 1. 创建 .env 文件 +cat > /home/user/remoteCI/.env << 'EOF' +CI_DATA_DIR=/home/user/remoteCI/data +EOF + +# 2. 创建目录 +mkdir -p /home/user/remoteCI/data/{logs,uploads,artifacts} + +# 3. 如果使用 systemd,更新服务配置 +# 编辑 /etc/systemd/system/remote-ci.service 和 celery.service +[Service] +Environment="CI_DATA_DIR=/home/user/remoteCI/data" +WorkingDirectory=/home/user/remoteCI + +# 4. 重启服务 +sudo systemctl daemon-reload +sudo systemctl restart remote-ci celery + +# 5. 验证 +./check_db_consistency.sh +``` + +#### 情况 C:任务提交失败 + +**检查方法:** + +```bash +# 1. 查看 Flask 日志 +journalctl -u remote-ci -f +# 或 +tail -f /var/log/remote-ci/app.log + +# 2. 查看 Celery Worker 日志 +journalctl -u celery -f +# 或 +tail -f /var/log/celery/worker.log + +# 3. 检查 Redis 连接 +redis-cli ping +# 应该返回 PONG + +# 4. 手动测试任务提交 +curl -X POST http://localhost:5000/api/jobs/git \ + -H "Authorization: Bearer your-api-token" \ + -H "Content-Type: application/json" \ + -d '{ + "repo": "https://github.com/octocat/Hello-World.git", + "branch": "master", + "script": "echo test", + "user_id": "debug-test" + }' -v + +# 观察返回的响应和日志 +``` + +### 第三步:验证修复 + +修复后,执行以下验证: + +```bash +# 1. 检查数据库一致性 +./check_db_consistency.sh + +# 2. 提交测试任务 +TEST_JOB_ID=$(curl -s -X POST http://localhost:5000/api/jobs/git \ + -H "Authorization: Bearer your-token" \ + -H "Content-Type: application/json" \ + -d '{ + "repo": "https://github.com/octocat/Hello-World.git", + "branch": "master", + "script": "echo test", + "user_id": "test-user" + }' | jq -r '.job_id') + +echo "测试任务 ID: $TEST_JOB_ID" + +# 3. 等待几秒 +sleep 5 + +# 4. 查询任务历史 +curl -s http://localhost:5000/api/jobs/history | jq . + +# 5. 查询数据库 +./read_db.sh + +# 6. 验证数据库中有任务记录 +sqlite3 /home/user/remoteCI/data/jobs.db \ + "SELECT job_id, status, mode FROM ci_jobs ORDER BY created_at DESC LIMIT 5;" \ + -header -column +``` + +### 第四步:持久化配置 + +确保配置持久化,避免重启后问题复现: + +```bash +# 1. 环境变量配置 +cat >> /home/user/remoteCI/.env << 'EOF' +CI_DATA_DIR=/home/user/remoteCI/data +CI_WORK_DIR=/tmp/remote-ci +CI_WORKSPACE_DIR=/var/ci-workspace +EOF + +# 2. systemd 服务配置 +# /etc/systemd/system/remote-ci.service +[Service] +EnvironmentFile=/home/user/remoteCI/.env +Environment="CI_DATA_DIR=/home/user/remoteCI/data" + +# /etc/systemd/system/celery.service +[Service] +EnvironmentFile=/home/user/remoteCI/.env +Environment="CI_DATA_DIR=/home/user/remoteCI/data" + +# 3. 重新加载配置 +sudo systemctl daemon-reload +``` + +## 快速诊断命令 + +请运行以下命令并告诉我结果: + +```bash +# 命令 1: 搜索数据库文件 +echo "=== 搜索数据库文件 ===" +find /home/user/remoteCI /tmp /var -name '*.db' 2>/dev/null + +# 命令 2: 检查 data 目录 +echo -e "\n=== 检查 data 目录 ===" +ls -la /home/user/remoteCI/data/ 2>/dev/null || echo "目录不存在" + +# 命令 3: 检查环境变量 +echo -e "\n=== 环境变量 ===" +echo "CI_DATA_DIR: ${CI_DATA_DIR:-未设置}" + +# 命令 4: 检查进程 +echo -e "\n=== 运行中的进程 ===" +ps aux | grep -E "flask|celery" | grep -v grep + +# 命令 5: 测试 API +echo -e "\n=== 测试 API ===" +curl -s http://localhost:5000/api/jobs/history | head -20 +``` + +## 我需要的信息 + +为了帮你更准确地定位问题,请提供: + +1. **你查询的数据库文件完整路径** +2. **系统运行状态**(进程是否在运行) +3. **如何提交的任务**(命令或截图) +4. **上述快速诊断命令的输出** + +有了这些信息,我可以更精确地帮你解决问题! diff --git a/WHERE_ARE_LOGS.md b/WHERE_ARE_LOGS.md new file mode 100644 index 0000000..8b46f26 --- /dev/null +++ b/WHERE_ARE_LOGS.md @@ -0,0 +1,382 @@ +# 日志输出位置说明 + +## database.py 中的 print 语句会在哪里显示? + +### 场景 1: 手动在终端启动(最直观)✅ 推荐用于调试 + +**启动方式:** +```bash +# 终端 1: 启动 Flask +cd /home/user/remoteCI +python3 -m server.app + +# 终端 2: 启动 Celery Worker +cd /home/user/remoteCI +celery -A server.celery_app worker --loglevel=info +``` + +**日志显示位置:** +- ✅ **直接显示在当前终端窗口** +- 所有 print 输出和 logging 日志都会实时显示 +- Flask 日志同时保存到:`{DATA_DIR}/logs/app.log` +- Celery 日志同时保存到:`{DATA_DIR}/logs/celery_worker.log` + +**示例输出:** +``` +[数据库初始化] 路径: /home/user/remoteCI/data/jobs.db +[数据库初始化] 文件存在: False + * Serving Flask app 'server.app' + * Debug mode: off +[数据库写入] 准备创建任务记录 + 数据库路径: /home/user/remoteCI/data/jobs.db + 任务ID: abc-123-xyz +✓ 任务记录创建成功 +``` + +--- + +### 场景 2: 使用 systemd 服务启动 + +**启动方式:** +```bash +sudo systemctl start remote-ci +sudo systemctl start celery +``` + +**日志显示位置:** +- ✅ **systemd journal 日志系统** +- 通过 `journalctl` 命令查看 + +**查看方法:** + +```bash +# 实时查看 Flask 日志(推荐) +sudo journalctl -u remote-ci -f + +# 实时查看 Celery 日志 +sudo journalctl -u celery -f + +# 同时查看两个服务的日志 +sudo journalctl -u remote-ci -u celery -f + +# 查看最近 100 行 +sudo journalctl -u remote-ci -n 100 + +# 查看最近 1 小时的日志 +sudo journalctl -u remote-ci --since "1 hour ago" + +# 只看数据库相关的日志 +sudo journalctl -u remote-ci -u celery | grep "数据库" + +# 保存日志到文件 +sudo journalctl -u remote-ci -u celery --since "1 hour ago" > debug_logs.txt +``` + +--- + +### 场景 3: 使用 supervisor 启动 + +**启动方式:** +```bash +sudo supervisorctl start remote-ci +sudo supervisorctl start celery +``` + +**日志显示位置:** +- ✅ **supervisor 配置的日志文件** +- 通常在 `/var/log/supervisor/` 目录下 + +**查看方法:** + +```bash +# 查看 Flask 日志 +tail -f /var/log/supervisor/remote-ci-stdout.log +tail -f /var/log/supervisor/remote-ci-stderr.log + +# 查看 Celery 日志 +tail -f /var/log/supervisor/celery-stdout.log +tail -f /var/log/supervisor/celery-stderr.log + +# 搜索数据库相关日志 +grep "数据库" /var/log/supervisor/*.log +``` + +**注意:** 日志文件路径取决于 supervisor 配置文件中的 `stdout_logfile` 和 `stderr_logfile` 设置。 + +--- + +### 场景 4: 后台运行(使用 nohup 或 &) + +**启动方式:** +```bash +nohup python3 -m server.app > flask.log 2>&1 & +nohup celery -A server.celery_app worker > celery.log 2>&1 & +``` + +**日志显示位置:** +- ✅ **指定的日志文件**(上例中是 `flask.log` 和 `celery.log`) + +**查看方法:** +```bash +# 实时查看 +tail -f flask.log +tail -f celery.log + +# 搜索数据库日志 +grep "数据库" flask.log celery.log +``` + +--- + +## 🎯 推荐的调试方法 + +### 方法 1: 终端直接运行(最简单)✨ + +打开 2 个终端窗口: + +**终端 1 - Flask:** +```bash +cd /home/user/remoteCI +export CI_DATA_DIR=/home/user/remoteCI/data +python3 -m server.app +``` + +**终端 2 - Celery:** +```bash +cd /home/user/remoteCI +export CI_DATA_DIR=/home/user/remoteCI/data +celery -A server.celery_app worker --loglevel=info +``` + +**终端 3 - 提交测试任务:** +```bash +# 提交任务 +curl -X POST http://localhost:5000/api/jobs/git \ + -H "Authorization: Bearer your-token" \ + -H "Content-Type: application/json" \ + -d '{ + "repo": "https://github.com/octocat/Hello-World.git", + "branch": "master", + "script": "echo test", + "user_id": "test-user" + }' +``` + +然后观察终端 1 和终端 2 的输出! + +--- + +### 方法 2: systemd + journalctl + +如果系统已经在运行: + +```bash +# 1. 重启服务(获得新的日志) +sudo systemctl restart remote-ci celery + +# 2. 打开实时日志窗口 +sudo journalctl -u remote-ci -u celery -f + +# 3. 在另一个终端提交测试任务 +curl -X POST http://localhost:5000/api/jobs/git \ + -H "Authorization: Bearer your-token" \ + -H "Content-Type: application/json" \ + -d '{ + "repo": "https://github.com/octocat/Hello-World.git", + "branch": "master", + "script": "echo test", + "user_id": "test-user" + }' +``` + +--- + +## 🔍 如何验证日志工作正常 + +启动服务后,你应该立即看到: + +``` +[数据库初始化] 路径: /home/user/remoteCI/data/jobs.db +[数据库初始化] 文件存在: True +``` + +如果没看到这些日志,可能的原因: +1. ❌ Python 输出缓冲(解决:添加 `PYTHONUNBUFFERED=1` 环境变量) +2. ❌ 日志被重定向到其他地方 +3. ❌ 使用了不同的启动脚本 + +--- + +## 💡 增强日志输出 + +如果日志输出被缓冲,添加环境变量: + +```bash +# 临时设置(当前会话) +export PYTHONUNBUFFERED=1 + +# 或在启动命令前添加 +PYTHONUNBUFFERED=1 python3 -m server.app + +# systemd 服务中添加 +[Service] +Environment="PYTHONUNBUFFERED=1" +``` + +--- + +## 📝 完整示例:从零开始调试 + +```bash +# 1. 停止现有服务(如果有) +sudo systemctl stop remote-ci celery +# 或 +pkill -f "flask" +pkill -f "celery.*worker" + +# 2. 设置环境变量 +export CI_DATA_DIR=/home/user/remoteCI/data +export PYTHONUNBUFFERED=1 + +# 3. 终端 1: 启动 Flask(保持打开) +cd /home/user/remoteCI +python3 -m server.app + +# 你应该立即看到: +# [数据库初始化] 路径: /home/user/remoteCI/data/jobs.db +# [数据库初始化] 文件存在: True/False + +# 4. 终端 2: 启动 Celery(保持打开) +cd /home/user/remoteCI +export CI_DATA_DIR=/home/user/remoteCI/data +export PYTHONUNBUFFERED=1 +celery -A server.celery_app worker --loglevel=info + +# 你应该也看到数据库初始化日志 + +# 5. 终端 3: 提交任务并观察 +curl -X POST http://localhost:5000/api/jobs/git \ + -H "Authorization: Bearer your-token" \ + -H "Content-Type: application/json" \ + -d '{ + "repo": "https://github.com/octocat/Hello-World.git", + "branch": "master", + "script": "echo test", + "user_id": "test-user" + }' + +# 观察终端 1(Flask)的输出: +# [数据库写入] 准备创建任务记录 +# ... + +# 观察终端 2(Celery)的输出: +# [数据库更新] 更新任务开始状态 +# ... + +# 6. 查询任务 +curl http://localhost:5000/api/jobs/history + +# 观察终端 1 的输出: +# [数据库查询] 查询任务列表 +# ✓ 查询完成,返回 X 条记录 +``` + +--- + +## 🎯 关键检查点 + +当你提交任务后,应该看到: + +### ✅ 在 Flask 日志中: +``` +[数据库写入] 准备创建任务记录 + 数据库路径: /home/user/remoteCI/data/jobs.db + 任务ID: xxx + 模式: git + 用户ID: test-user +✓ 任务记录创建成功 + 验证查询: 找到 1 条记录 + 数据库文件大小: 12345 字节 +``` + +### ✅ 在 Celery 日志中: +``` +[数据库更新] 更新任务开始状态 + 数据库路径: /home/user/remoteCI/data/jobs.db ← 必须与 Flask 相同! + 任务ID: xxx +✓ 任务状态更新为 running,影响 1 行 +``` + +### ⚠️ 如果路径不同,问题找到了! + +比如: +- Flask: `/home/user/remoteCI/data/jobs.db` +- Celery: `/tmp/data/jobs.db` ❌ 不同! + +这就是导致任务无法查询的原因! + +--- + +## 🛠️ 故障排除 + +### 问题 1: 看不到任何日志 + +**原因:** Python 输出缓冲 + +**解决:** +```bash +export PYTHONUNBUFFERED=1 +python3 -m server.app +``` + +### 问题 2: systemd 日志为空 + +**检查:** +```bash +# 查看服务状态 +sudo systemctl status remote-ci + +# 查看最近的错误 +sudo journalctl -u remote-ci -xe + +# 确认服务正在运行 +ps aux | grep flask +``` + +### 问题 3: 日志输出到了其他地方 + +**查找:** +```bash +# 搜索日志文件 +find /var/log -name "*remote*" -o -name "*celery*" 2>/dev/null + +# 检查 supervisor 配置 +cat /etc/supervisor/conf.d/*.conf | grep -E "stdout_logfile|stderr_logfile" + +# 检查 systemd 服务配置 +systemctl cat remote-ci | grep -E "StandardOutput|StandardError" +``` + +--- + +## 📚 相关命令速查 + +```bash +# 实时查看 systemd 日志 +sudo journalctl -u remote-ci -f + +# 查看最近的日志 +sudo journalctl -u remote-ci -n 100 + +# 只看数据库相关 +sudo journalctl -u remote-ci | grep "数据库" + +# 保存到文件 +sudo journalctl -u remote-ci --since "1 hour ago" > logs.txt + +# 查看进程 +ps aux | grep -E "flask|celery" + +# 查看进程打开的文件 +lsof -p $(pgrep -f flask) | grep ".db" +``` diff --git a/check_db_consistency.sh b/check_db_consistency.sh new file mode 100755 index 0000000..c6c9247 --- /dev/null +++ b/check_db_consistency.sh @@ -0,0 +1,127 @@ +#!/bin/bash +# 数据库一致性检查脚本 + +echo "=======================================================================" +echo "数据库路径一致性检查工具" +echo "=======================================================================" +echo + +# 颜色定义 +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +echo "1. 查找所有 jobs.db 文件..." +echo "-------------------------------------------------------------------" +DB_FILES=$(find /home/user/remoteCI /tmp /var 2>/dev/null | grep 'jobs\.db$' || true) + +if [ -z "$DB_FILES" ]; then + echo -e "${YELLOW}⚠ 没有找到任何数据库文件${NC}" + echo " 这可能意味着系统还未运行过,或者数据库在其他位置" +else + echo -e "${GREEN}✓ 找到以下数据库文件:${NC}" + echo "$DB_FILES" | while read -r file; do + if [ -f "$file" ]; then + size=$(ls -lh "$file" | awk '{print $5}') + count=$(sqlite3 "$file" "SELECT COUNT(*) FROM ci_jobs;" 2>/dev/null || echo "无法访问") + echo " - $file (大小: $size, 任务数: $count)" + fi + done +fi +echo + +echo "2. 检查环境变量..." +echo "-------------------------------------------------------------------" +env_vars=("CI_DATA_DIR" "CI_WORK_DIR" "CI_WORKSPACE_DIR" "CI_API_HOST" "CI_API_PORT") +for var in "${env_vars[@]}"; do + value=$(printenv "$var" || echo "") + if [ -z "$value" ]; then + echo -e "${YELLOW}⚠ $var: 未设置${NC}" + else + echo -e "${GREEN}✓ $var: $value${NC}" + fi +done +echo + +echo "3. 检查进程使用的数据库路径..." +echo "-------------------------------------------------------------------" + +# 检查 Flask 进程 +echo "检查 Flask (remote-ci) 进程:" +if systemctl is-active --quiet remote-ci 2>/dev/null; then + echo -e "${GREEN}✓ remote-ci 服务正在运行${NC}" + systemctl show -p Environment remote-ci 2>/dev/null | grep CI_DATA_DIR || echo " 未找到 CI_DATA_DIR 环境变量" +elif pgrep -f "flask.*app.py" > /dev/null; then + echo -e "${GREEN}✓ Flask 进程正在运行 (非 systemd)${NC}" + pgrep -f "flask.*app.py" -a +else + echo -e "${YELLOW}⚠ Flask 服务未运行${NC}" +fi +echo + +# 检查 Celery 进程 +echo "检查 Celery Worker 进程:" +if systemctl is-active --quiet celery 2>/dev/null; then + echo -e "${GREEN}✓ celery 服务正在运行${NC}" + systemctl show -p Environment celery 2>/dev/null | grep CI_DATA_DIR || echo " 未找到 CI_DATA_DIR 环境变量" +elif pgrep -f "celery.*worker" > /dev/null; then + echo -e "${GREEN}✓ Celery Worker 进程正在运行 (非 systemd)${NC}" + pgrep -f "celery.*worker" -a +else + echo -e "${YELLOW}⚠ Celery Worker 服务未运行${NC}" +fi +echo + +echo "4. 检查目录权限..." +echo "-------------------------------------------------------------------" +dirs=("/home/user/remoteCI/data" "/tmp/remote-ci" "/var/ci-workspace") +for dir in "${dirs[@]}"; do + if [ -d "$dir" ]; then + perms=$(ls -ld "$dir" | awk '{print $1, $3, $4}') + echo -e "${GREEN}✓ $dir${NC}" + echo " 权限: $perms" + else + echo -e "${YELLOW}⚠ $dir (不存在)${NC}" + fi +done +echo + +echo "5. 诊断结果总结..." +echo "-------------------------------------------------------------------" + +# 统计数据库文件数量 +if [ -z "$DB_FILES" ]; then + db_count=0 +else + db_count=$(echo "$DB_FILES" | wc -l) +fi + +if [ "$db_count" -eq 0 ]; then + echo -e "${YELLOW}⚠ 问题: 没有找到数据库文件${NC}" + echo " 建议: 系统可能还未初始化,请先运行一次 Flask 或提交一个任务" +elif [ "$db_count" -eq 1 ]; then + echo -e "${GREEN}✓ 正常: 只找到一个数据库文件${NC}" + echo " 位置: $DB_FILES" +else + echo -e "${RED}✗ 警告: 找到多个数据库文件!${NC}" + echo " 这可能导致数据不一致问题" + echo " 建议: 设置 CI_DATA_DIR 环境变量,确保所有进程使用同一个数据库" +fi +echo + +# 检查环境变量设置 +if [ -z "$CI_DATA_DIR" ]; then + echo -e "${RED}✗ 警告: CI_DATA_DIR 环境变量未设置${NC}" + echo " 建议: 在 systemd 服务文件或 .env 文件中设置 CI_DATA_DIR" + echo " 示例: CI_DATA_DIR=/home/user/remoteCI/data" +else + echo -e "${GREEN}✓ CI_DATA_DIR 已设置: $CI_DATA_DIR${NC}" +fi +echo + +echo "=======================================================================" +echo "完成" +echo "=======================================================================" +echo +echo "如需详细诊断和修复指南,请查看: DATABASE_DIAGNOSIS.md" diff --git a/debug_database_issue.sh b/debug_database_issue.sh new file mode 100755 index 0000000..b24198d --- /dev/null +++ b/debug_database_issue.sh @@ -0,0 +1,157 @@ +#!/bin/bash +# 数据库问题快速调试脚本 + +echo "==========================================" +echo "数据库问题调试信息收集" +echo "==========================================" +echo + +echo "1. 检查服务状态" +echo "----------------------------------------" +if systemctl is-active remote-ci &>/dev/null; then + echo "✓ Flask (remote-ci): 运行中" +else + echo "✗ Flask (remote-ci): 未运行" +fi + +if systemctl is-active celery &>/dev/null; then + echo "✓ Celery Worker: 运行中" +else + echo "✗ Celery Worker: 未运行" +fi + +# 检查进程 +FLASK_PIDS=$(pgrep -f "flask|server.app" || true) +if [ -n "$FLASK_PIDS" ]; then + echo "✓ Flask 进程 (PID: $FLASK_PIDS)" +fi + +CELERY_PIDS=$(pgrep -f "celery.*worker" || true) +if [ -n "$CELERY_PIDS" ]; then + echo "✓ Celery Worker 进程 (PID: $CELERY_PIDS)" +fi +echo + +echo "2. 数据库初始化日志(路径)" +echo "----------------------------------------" +echo "Flask 使用的数据库路径:" +sudo journalctl -u remote-ci --since "1 hour ago" 2>/dev/null | grep "数据库初始化" | tail -2 || \ + (echo "(没有找到 systemd 日志,尝试搜索所有日志)" && \ + sudo journalctl --since "1 hour ago" 2>/dev/null | grep "数据库初始化.*server.app" | tail -2) + +echo +echo "Celery Worker 使用的数据库路径:" +sudo journalctl -u celery --since "1 hour ago" 2>/dev/null | grep "数据库初始化" | tail -2 || \ + (echo "(没有找到 systemd 日志,尝试搜索所有日志)" && \ + sudo journalctl --since "1 hour ago" 2>/dev/null | grep "数据库初始化.*celery" | tail -2) +echo + +echo "3. 最近的数据库写入操作" +echo "----------------------------------------" +sudo journalctl -u remote-ci -u celery --since "30 minutes ago" 2>/dev/null | grep "\[数据库写入\]" | tail -10 || \ + echo "(没有找到数据库写入日志)" +echo + +echo "4. 最近的数据库查询操作" +echo "----------------------------------------" +sudo journalctl -u remote-ci -u celery --since "30 minutes ago" 2>/dev/null | grep "\[数据库查询\]" | tail -10 || \ + echo "(没有找到数据库查询日志)" +echo + +echo "5. 数据库文件位置" +echo "----------------------------------------" +DB_FILES=$(find /home/user/remoteCI /tmp /var -name 'jobs.db' 2>/dev/null || true) +if [ -n "$DB_FILES" ]; then + echo "$DB_FILES" | while read -r file; do + if [ -f "$file" ]; then + size=$(ls -lh "$file" | awk '{print $5}') + mtime=$(stat -c %y "$file" 2>/dev/null | cut -d. -f1 || stat -f "%Sm" "$file" 2>/dev/null) + count=$(sqlite3 "$file" "SELECT COUNT(*) FROM ci_jobs;" 2>/dev/null || echo "?") + echo " • $file" + echo " 大小: $size, 修改时间: $mtime, 任务数: $count" + fi + done +else + echo " (没有找到数据库文件)" +fi +echo + +echo "6. 环境变量" +echo "----------------------------------------" +echo "CI_DATA_DIR: ${CI_DATA_DIR:-未设置}" + +# 检查 systemd 环境变量 +if systemctl show -p Environment remote-ci 2>/dev/null | grep -q CI_DATA_DIR; then + echo "remote-ci 服务环境变量:" + systemctl show -p Environment remote-ci 2>/dev/null | grep CI_DATA_DIR +fi + +if systemctl show -p Environment celery 2>/dev/null | grep -q CI_DATA_DIR; then + echo "celery 服务环境变量:" + systemctl show -p Environment celery 2>/dev/null | grep CI_DATA_DIR +fi +echo + +echo "7. 数据库内容统计" +echo "----------------------------------------" +# 尝试常见位置 +for DB_PATH in "/home/user/remoteCI/data/jobs.db" "/tmp/data/jobs.db" $DB_FILES; do + if [ -f "$DB_PATH" ]; then + echo "数据库: $DB_PATH" + total=$(sqlite3 "$DB_PATH" 'SELECT COUNT(*) FROM ci_jobs;' 2>/dev/null || echo "0") + echo " 总任务数: $total" + + if [ "$total" -gt 0 ]; then + echo " 按状态统计:" + sqlite3 "$DB_PATH" "SELECT status, COUNT(*) FROM ci_jobs GROUP BY status;" 2>/dev/null | while read -r line; do + echo " $line" + done + + echo " 最近3条任务:" + sqlite3 "$DB_PATH" "SELECT substr(job_id,1,30), status, mode, created_at FROM ci_jobs ORDER BY created_at DESC LIMIT 3;" 2>/dev/null | while read -r line; do + echo " $line" + done + fi + echo + fi +done +echo + +echo "8. 诊断建议" +echo "----------------------------------------" + +# 检查是否有多个数据库 +db_count=$(echo "$DB_FILES" | grep -c . || echo "0") +if [ "$db_count" -gt 1 ]; then + echo "⚠ 警告: 发现多个数据库文件!这可能导致数据不一致" + echo " 建议: 设置 CI_DATA_DIR 环境变量" +fi + +# 检查环境变量 +if [ -z "$CI_DATA_DIR" ]; then + echo "⚠ 警告: CI_DATA_DIR 环境变量未设置" + echo " 建议: 运行 ./fix_db_path.sh 修复配置" +fi + +# 检查服务状态 +if ! systemctl is-active remote-ci &>/dev/null && ! pgrep -f "flask|server.app" &>/dev/null; then + echo "⚠ 警告: Flask 服务未运行" + echo " 建议: 启动服务后再测试" +fi + +if ! systemctl is-active celery &>/dev/null && ! pgrep -f "celery.*worker" &>/dev/null; then + echo "⚠ 警告: Celery Worker 未运行" + echo " 建议: 启动服务后再测试" +fi + +echo +echo "==========================================" +echo "完成" +echo "==========================================" +echo +echo "下一步:" +echo "1. 如果发现数据库路径不一致,运行: ./fix_db_path.sh" +echo "2. 重启服务后查看实时日志:" +echo " sudo journalctl -u remote-ci -u celery -f" +echo "3. 提交测试任务并观察日志输出" +echo "4. 查看详细调试指南: cat DEBUG_WITH_LOGS.md" diff --git a/debug_db_path.py b/debug_db_path.py new file mode 100644 index 0000000..8278593 --- /dev/null +++ b/debug_db_path.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python3 +""" +数据库路径诊断脚本 +用于检查 Flask 和 Celery Worker 是否使用相同的数据库路径 +""" + +import os +import sys +from pathlib import Path + +# 尝试加载环境变量(如果有.env文件) +try: + from dotenv import load_dotenv + load_dotenv() +except ImportError: + print("注意: dotenv 模块未安装,跳过 .env 文件加载\n") + +print("=" * 80) +print("数据库路径诊断工具") +print("=" * 80) +print() + +# 1. 检查当前工作目录 +print("1. 当前工作目录:") +print(f" {os.getcwd()}") +print() + +# 2. 模拟 server/config.py 的逻辑 +print("2. 模拟 server/config.py 中的路径计算:") +print() + +# 假设从不同位置导入 config +test_paths = [ + "/home/user/remoteCI/server/config.py", # 实际文件位置 + os.path.join(os.getcwd(), "server/config.py"), # 从当前目录 +] + +for config_path in test_paths: + print(f" 假设 config.py 在: {config_path}") + config_file = Path(config_path) + base_dir = config_file.parent.parent + print(f" BASE_DIR = {base_dir}") + + # 检查环境变量 + env_data_dir = os.getenv('CI_DATA_DIR') + if env_data_dir: + data_dir = env_data_dir + print(f" DATA_DIR (from env) = {data_dir}") + else: + data_dir = str(base_dir / 'data') + print(f" DATA_DIR (from BASE_DIR) = {data_dir}") + + db_path = f"{data_dir}/jobs.db" + print(f" 数据库路径: {db_path}") + + # 检查文件是否存在 + exists = os.path.exists(db_path) + print(f" 文件存在: {exists}") + + if exists: + # 显示文件大小和修改时间 + size = os.path.getsize(db_path) + mtime = os.path.getmtime(db_path) + from datetime import datetime + print(f" 文件大小: {size} 字节") + print(f" 修改时间: {datetime.fromtimestamp(mtime)}") + + # 查询数据库中的任务数量 + try: + import sqlite3 + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + cursor.execute("SELECT COUNT(*) FROM ci_jobs") + count = cursor.fetchone()[0] + print(f" 任务数量: {count}") + + # 显示最近的几条记录 + cursor.execute("SELECT job_id, status, mode, created_at FROM ci_jobs ORDER BY created_at DESC LIMIT 5") + rows = cursor.fetchall() + if rows: + print(f" 最近的任务:") + for row in rows: + print(f" - {row[0]}: {row[1]} ({row[2]}) @ {row[3]}") + + conn.close() + except Exception as e: + print(f" 查询数据库失败: {e}") + + print() + +# 3. 检查环境变量 +print("3. 环境变量检查:") +env_vars = ['CI_DATA_DIR', 'CI_WORK_DIR', 'CI_WORKSPACE_DIR', 'CI_API_HOST', 'CI_API_PORT'] +for var in env_vars: + value = os.getenv(var) + if value: + print(f" {var} = {value}") + else: + print(f" {var} = (未设置)") +print() + +# 4. 实际使用的路径 +print("4. 实际使用的数据库路径:") +print() + +# 导入实际的配置 +sys.path.insert(0, '/home/user/remoteCI') +try: + from server.config import DATA_DIR, BASE_DIR + print(f" BASE_DIR = {BASE_DIR}") + print(f" DATA_DIR = {DATA_DIR}") + db_path = f"{DATA_DIR}/jobs.db" + print(f" 数据库路径: {db_path}") + + exists = os.path.exists(db_path) + print(f" 文件存在: {exists}") + + if exists: + size = os.path.getsize(db_path) + mtime = os.path.getmtime(db_path) + from datetime import datetime + print(f" 文件大小: {size} 字节") + print(f" 修改时间: {datetime.fromtimestamp(mtime)}") + + # 查询数据库 + try: + import sqlite3 + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + cursor.execute("SELECT COUNT(*) FROM ci_jobs") + count = cursor.fetchone()[0] + print(f" ✓ 任务总数: {count}") + + # 显示最近的记录 + cursor.execute("SELECT job_id, status, mode, user_id, created_at FROM ci_jobs ORDER BY created_at DESC LIMIT 5") + rows = cursor.fetchall() + if rows: + print(f" ✓ 最近的 {len(rows)} 条任务:") + for row in rows: + print(f" - {row[0][:20]}... | {row[1]:8} | {row[2]:6} | {row[3] or 'N/A':15} | {row[4]}") + else: + print(f" ⚠ 数据库为空") + + conn.close() + except Exception as e: + print(f" ✗ 查询数据库失败: {e}") + import traceback + traceback.print_exc() + +except Exception as e: + print(f" ✗ 导入配置失败: {e}") + import traceback + traceback.print_exc() + +print() +print("=" * 80) +print("诊断建议:") +print("=" * 80) +print() +print("1. 检查 Flask 和 Celery Worker 是否使用相同的启动目录") +print("2. 确保环境变量 CI_DATA_DIR 已设置为绝对路径") +print("3. 查看是否有多个数据库文件存在于不同路径") +print("4. 运行以下命令查找所有数据库文件:") +print(" find /home/user/remoteCI -name 'jobs.db'") +print(" find /tmp -name 'jobs.db'") +print() diff --git a/fix_db_path.sh b/fix_db_path.sh new file mode 100755 index 0000000..ec85ea7 --- /dev/null +++ b/fix_db_path.sh @@ -0,0 +1,116 @@ +#!/bin/bash +# 快速修复数据库路径不一致问题 + +echo "=======================================================================" +echo "数据库路径修复工具" +echo "=======================================================================" +echo + +# 设置默认路径 +DEFAULT_DATA_DIR="/home/user/remoteCI/data" + +echo "此脚本将帮助你修复数据库路径不一致的问题" +echo +echo "建议的数据库路径: $DEFAULT_DATA_DIR" +echo + +# 创建 .env 文件 +ENV_FILE="/home/user/remoteCI/.env" + +if [ -f "$ENV_FILE" ]; then + echo "⚠ .env 文件已存在: $ENV_FILE" + echo "是否备份并覆盖? (y/N)" + read -r answer + if [ "$answer" != "y" ] && [ "$answer" != "Y" ]; then + echo "取消操作" + exit 0 + fi + cp "$ENV_FILE" "$ENV_FILE.bak.$(date +%Y%m%d_%H%M%S)" + echo "✓ 已备份到: $ENV_FILE.bak.$(date +%Y%m%d_%H%M%S)" +fi + +echo "创建 .env 文件..." +cat > "$ENV_FILE" << EOF +# Remote CI 配置文件 +# 数据库和日志存储路径(必须设置为绝对路径) +CI_DATA_DIR=$DEFAULT_DATA_DIR + +# 临时工作目录 +CI_WORK_DIR=/tmp/remote-ci + +# 用户工作空间目录(rsync模式使用) +CI_WORKSPACE_DIR=/var/ci-workspace + +# API 配置 +CI_API_HOST=0.0.0.0 +CI_API_PORT=5000 +CI_API_TOKEN=change-me-in-production + +# Redis 配置(Celery 使用) +CI_BROKER_URL=redis://localhost:6379/0 +CI_RESULT_BACKEND=redis://localhost:6379/0 + +# 任务配置 +CI_MAX_CONCURRENT=2 +CI_JOB_TIMEOUT=3600 +CI_LOG_RETENTION_DAYS=7 +EOF + +echo "✓ 已创建 .env 文件: $ENV_FILE" +echo + +# 创建必要的目录 +echo "创建必要的目录..." +mkdir -p "$DEFAULT_DATA_DIR" +mkdir -p "$DEFAULT_DATA_DIR/logs" +mkdir -p "$DEFAULT_DATA_DIR/uploads" +mkdir -p "$DEFAULT_DATA_DIR/artifacts" +mkdir -p "/tmp/remote-ci" +mkdir -p "/var/ci-workspace" + +echo "✓ 目录已创建" +echo + +# 设置权限 +echo "设置目录权限..." +chmod 755 "$DEFAULT_DATA_DIR" +chmod 755 "$DEFAULT_DATA_DIR/logs" +chmod 755 "$DEFAULT_DATA_DIR/uploads" +chmod 755 "$DEFAULT_DATA_DIR/artifacts" + +echo "✓ 权限已设置" +echo + +# 检查是否需要更新 systemd 服务 +echo "=======================================================================" +echo "后续步骤:" +echo "=======================================================================" +echo +echo "1. 如果使用 systemd 服务,请更新服务文件添加环境变量:" +echo +echo " 编辑 /etc/systemd/system/remote-ci.service:" +echo " [Service]" +echo " Environment=\"CI_DATA_DIR=$DEFAULT_DATA_DIR\"" +echo " Environment=\"CI_WORK_DIR=/tmp/remote-ci\"" +echo +echo " 编辑 /etc/systemd/system/celery.service:" +echo " [Service]" +echo " Environment=\"CI_DATA_DIR=$DEFAULT_DATA_DIR\"" +echo " Environment=\"CI_WORK_DIR=/tmp/remote-ci\"" +echo +echo " 然后执行:" +echo " sudo systemctl daemon-reload" +echo " sudo systemctl restart remote-ci celery" +echo +echo "2. 如果使用 supervisor,请更新配置文件添加环境变量" +echo +echo "3. 如果手动启动,确保先导入环境变量:" +echo " export CI_DATA_DIR=$DEFAULT_DATA_DIR" +echo " export CI_WORK_DIR=/tmp/remote-ci" +echo +echo "4. 验证修复:" +echo " ./check_db_consistency.sh" +echo +echo "=======================================================================" +echo "完成" +echo "=======================================================================" diff --git a/read_db.py b/read_db.py new file mode 100755 index 0000000..8520b79 --- /dev/null +++ b/read_db.py @@ -0,0 +1,301 @@ +#!/usr/bin/env python3 +""" +数据库内容查看工具 (Python版本) +用于读取和分析 Remote CI 数据库内容 +""" + +import os +import sys +import sqlite3 +from pathlib import Path +from datetime import datetime +import json + + +def find_database(): + """自动查找数据库文件""" + search_paths = [ + "/home/user/remoteCI/data/jobs.db", + "/tmp/remote-ci/jobs.db", + "/var/lib/remote-ci/jobs.db", + os.path.expanduser("~/.remote-ci/jobs.db"), + ] + + # 先检查常见位置 + for path in search_paths: + if os.path.exists(path): + return path + + # 搜索项目目录 + try: + for root, dirs, files in os.walk("/home/user/remoteCI"): + for file in files: + if file == "jobs.db": + return os.path.join(root, file) + except Exception: + pass + + return None + + +def format_size(size_bytes): + """格式化文件大小""" + if size_bytes is None: + return "N/A" + for unit in ['B', 'KB', 'MB', 'GB']: + if size_bytes < 1024.0: + return f"{size_bytes:.2f} {unit}" + size_bytes /= 1024.0 + return f"{size_bytes:.2f} TB" + + +def read_database(db_path): + """读取并显示数据库内容""" + + if not os.path.exists(db_path): + print(f"✗ 错误: 数据库文件不存在: {db_path}") + return + + print("=" * 80) + print("Remote CI 数据库内容查看工具 (Python)") + print("=" * 80) + print() + + # 显示文件信息 + stat = os.stat(db_path) + print("数据库信息:") + print(f" 文件路径: {db_path}") + print(f" 文件大小: {format_size(stat.st_size)}") + print(f" 修改时间: {datetime.fromtimestamp(stat.st_mtime)}") + print() + + try: + conn = sqlite3.connect(db_path) + conn.row_factory = sqlite3.Row + cursor = conn.cursor() + + # 1. 检查表是否存在 + cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") + tables = [row[0] for row in cursor.fetchall()] + print(f"数据库表: {', '.join(tables)}") + print() + + if 'ci_jobs' not in tables: + print("✗ 错误: ci_jobs 表不存在") + return + + # 2. 任务统计 + print("=" * 80) + print("任务统计") + print("=" * 80) + + cursor.execute("SELECT COUNT(*) FROM ci_jobs") + total = cursor.fetchone()[0] + print(f"总任务数: {total}") + print() + + if total == 0: + print("数据库为空,没有任务记录") + return + + # 按状态统计 + print("按状态统计:") + cursor.execute(""" + SELECT status, COUNT(*) as count + FROM ci_jobs + GROUP BY status + ORDER BY count DESC + """) + for row in cursor.fetchall(): + status_icons = { + 'success': '✓', + 'failed': '✗', + 'running': '▶', + 'queued': '⏳', + 'timeout': '⏱', + 'error': '⚠' + } + icon = status_icons.get(row['status'], '•') + print(f" {icon} {row['status']:10} : {row['count']:5}") + print() + + # 按模式统计 + print("按模式统计:") + cursor.execute(""" + SELECT mode, COUNT(*) as count + FROM ci_jobs + GROUP BY mode + ORDER BY count DESC + """) + for row in cursor.fetchall(): + print(f" • {row['mode']:10} : {row['count']:5}") + print() + + # 按用户统计 + print("按用户统计 (Top 10):") + cursor.execute(""" + SELECT COALESCE(user_id, '(未设置)') as user_id, COUNT(*) as count + FROM ci_jobs + GROUP BY user_id + ORDER BY count DESC + LIMIT 10 + """) + for row in cursor.fetchall(): + print(f" • {row['user_id']:20} : {row['count']:5}") + print() + + # 3. 最近的任务 + print("=" * 80) + print("最近的任务 (最新10条)") + print("=" * 80) + + cursor.execute(""" + SELECT job_id, status, mode, user_id, project_name, created_at + FROM ci_jobs + ORDER BY created_at DESC + LIMIT 10 + """) + + print(f"{'Job ID':<25} {'状态':<10} {'模式':<8} {'用户':<15} {'项目':<15} {'创建时间':<20}") + print("-" * 110) + + for row in cursor.fetchall(): + job_id = row['job_id'][:22] + '...' if row['job_id'] else 'N/A' + status = row['status'] or 'N/A' + mode = row['mode'] or 'N/A' + user_id = row['user_id'][:12] + '...' if row['user_id'] and len(row['user_id']) > 15 else (row['user_id'] or 'N/A') + project = row['project_name'][:12] + '...' if row['project_name'] and len(row['project_name']) > 15 else (row['project_name'] or 'N/A') + created = row['created_at'][:19] if row['created_at'] else 'N/A' + + print(f"{job_id:<25} {status:<10} {mode:<8} {user_id:<15} {project:<15} {created:<20}") + print() + + # 4. 最新任务详情 + print("=" * 80) + print("最新任务详情") + print("=" * 80) + + cursor.execute("SELECT * FROM ci_jobs ORDER BY created_at DESC LIMIT 1") + row = cursor.fetchone() + + if row: + print(f"任务ID: {row['job_id']}") + print(f"状态: {row['status']}") + print(f"模式: {row['mode']}") + print(f"用户ID: {row['user_id'] or 'N/A'}") + print(f"项目名: {row['project_name'] or 'N/A'}") + print(f"脚本: {row['script'][:60]}..." if row['script'] and len(row['script']) > 60 else f"脚本: {row['script']}") + print(f"创建时间: {row['created_at']}") + print(f"开始时间: {row['started_at'] or 'N/A'}") + print(f"结束时间: {row['finished_at'] or 'N/A'}") + print(f"耗时: {row['duration']:.2f}秒" if row['duration'] else "耗时: N/A") + print(f"退出码: {row['exit_code'] if row['exit_code'] is not None else 'N/A'}") + print(f"日志文件: {row['log_file'] or 'N/A'}") + print(f"日志大小: {format_size(row['log_size']) if row['log_size'] else 'N/A'}") + + if row['artifacts_path']: + print(f"产物路径: {row['artifacts_path']}") + print(f"产物大小: {format_size(row['artifacts_size'])}") + + if row['error_message']: + print(f"错误信息: {row['error_message']}") + print() + + # 5. 运行中的任务 + print("=" * 80) + print("运行中的任务") + print("=" * 80) + + cursor.execute(""" + SELECT job_id, status, mode, user_id, created_at + FROM ci_jobs + WHERE status IN ('queued', 'running') + ORDER BY created_at DESC + """) + + running = cursor.fetchall() + if running: + print(f"{'Job ID':<25} {'状态':<10} {'模式':<8} {'用户':<15} {'创建时间':<20}") + print("-" * 80) + for row in running: + job_id = row['job_id'][:22] + '...' if row['job_id'] else 'N/A' + status = row['status'] or 'N/A' + mode = row['mode'] or 'N/A' + user_id = row['user_id'][:12] + '...' if row['user_id'] and len(row['user_id']) > 15 else (row['user_id'] or 'N/A') + created = row['created_at'][:19] if row['created_at'] else 'N/A' + + print(f"{job_id:<25} {status:<10} {mode:<8} {user_id:<15} {created:<20}") + else: + print("(无运行中的任务)") + print() + + # 6. 特殊用户配额 + if 'special_users' in tables: + print("=" * 80) + print("特殊用户配额") + print("=" * 80) + + cursor.execute("SELECT * FROM special_users ORDER BY created_at DESC") + special_users = cursor.fetchall() + + if special_users: + print(f"{'用户ID':<20} {'配额':<15} {'创建时间':<20}") + print("-" * 60) + for row in special_users: + quota_gb = row['quota_bytes'] / (1024 * 1024 * 1024) + print(f"{row['user_id']:<20} {quota_gb:.2f} GB{'':<8} {row['created_at'][:19]}") + else: + print("(无特殊用户)") + print() + + conn.close() + + # 7. 使用提示 + print("=" * 80) + print("交互式查询") + print("=" * 80) + print(f"如需进行自定义查询,可以运行:") + print(f" sqlite3 {db_path}") + print() + print("或者使用 Python:") + print(" import sqlite3") + print(f" conn = sqlite3.connect('{db_path}')") + print(" cursor = conn.cursor()") + print(" cursor.execute('SELECT * FROM ci_jobs WHERE status = \"failed\"')") + print() + + except sqlite3.Error as e: + print(f"✗ 数据库错误: {e}") + except Exception as e: + print(f"✗ 错误: {e}") + import traceback + traceback.print_exc() + + +def main(): + """主函数""" + if len(sys.argv) > 1: + db_path = sys.argv[1] + else: + print("正在搜索数据库文件...") + db_path = find_database() + + if not db_path: + print("✗ 未找到数据库文件") + print() + print("使用方法:") + print(f" {sys.argv[0]} /path/to/jobs.db") + print() + print("或者将数据库放在以下位置之一:") + print(" - /home/user/remoteCI/data/jobs.db") + print(" - /tmp/remote-ci/jobs.db") + sys.exit(1) + + print(f"✓ 找到数据库: {db_path}") + print() + + read_database(db_path) + + +if __name__ == '__main__': + main() diff --git a/read_db.sh b/read_db.sh new file mode 100755 index 0000000..4824447 --- /dev/null +++ b/read_db.sh @@ -0,0 +1,223 @@ +#!/bin/bash +# 数据库内容查看工具 + +# 使用方法: +# ./read_db.sh [数据库文件路径] +# 如果不提供路径,会自动搜索 + +DB_PATH="$1" + +echo "=======================================================================" +echo "Remote CI 数据库内容查看工具" +echo "=======================================================================" +echo + +# 如果没有提供路径,尝试查找 +if [ -z "$DB_PATH" ]; then + echo "正在搜索数据库文件..." + + # 常见位置 + SEARCH_PATHS=( + "/home/user/remoteCI/data/jobs.db" + "/tmp/remote-ci/jobs.db" + "/var/lib/remote-ci/jobs.db" + "$HOME/.remote-ci/jobs.db" + ) + + for path in "${SEARCH_PATHS[@]}"; do + if [ -f "$path" ]; then + DB_PATH="$path" + echo "✓ 找到数据库: $DB_PATH" + break + fi + done + + # 如果还没找到,搜索整个项目目录 + if [ -z "$DB_PATH" ]; then + FOUND=$(find /home/user/remoteCI -name 'jobs.db' 2>/dev/null | head -1) + if [ -n "$FOUND" ]; then + DB_PATH="$FOUND" + echo "✓ 找到数据库: $DB_PATH" + fi + fi +fi + +# 检查数据库文件是否存在 +if [ -z "$DB_PATH" ] || [ ! -f "$DB_PATH" ]; then + echo "✗ 错误: 找不到数据库文件" + echo + echo "使用方法:" + echo " ./read_db.sh /path/to/jobs.db" + echo + echo "或者手动指定位置后再运行" + exit 1 +fi + +# 检查 sqlite3 是否安装 +if ! command -v sqlite3 &> /dev/null; then + echo "✗ 错误: sqlite3 未安装" + echo "请先安装: sudo apt-get install sqlite3" + exit 1 +fi + +echo +echo "=======================================================================" +echo "数据库信息" +echo "=======================================================================" +echo "文件路径: $DB_PATH" +echo "文件大小: $(ls -lh "$DB_PATH" | awk '{print $5}')" +echo "修改时间: $(stat -c %y "$DB_PATH" 2>/dev/null || stat -f %Sm "$DB_PATH" 2>/dev/null)" +echo + +echo "=======================================================================" +echo "1. 数据库表结构" +echo "=======================================================================" +sqlite3 "$DB_PATH" ".schema" 2>/dev/null || echo "无法读取表结构" +echo + +echo "=======================================================================" +echo "2. 任务统计" +echo "=======================================================================" +echo "总任务数:" +sqlite3 "$DB_PATH" "SELECT COUNT(*) as total FROM ci_jobs;" 2>/dev/null || echo "0" +echo + +echo "按状态统计:" +sqlite3 "$DB_PATH" " +SELECT + status, + COUNT(*) as count +FROM ci_jobs +GROUP BY status +ORDER BY count DESC; +" -header -column 2>/dev/null || echo "无数据" +echo + +echo "按模式统计:" +sqlite3 "$DB_PATH" " +SELECT + mode, + COUNT(*) as count +FROM ci_jobs +GROUP BY mode +ORDER BY count DESC; +" -header -column 2>/dev/null || echo "无数据" +echo + +echo "按用户统计:" +sqlite3 "$DB_PATH" " +SELECT + COALESCE(user_id, '(未设置)') as user_id, + COUNT(*) as count +FROM ci_jobs +GROUP BY user_id +ORDER BY count DESC +LIMIT 10; +" -header -column 2>/dev/null || echo "无数据" +echo + +echo "=======================================================================" +echo "3. 最近的任务(最新10条)" +echo "=======================================================================" +sqlite3 "$DB_PATH" " +SELECT + substr(job_id, 1, 20) || '...' as job_id, + status, + mode, + COALESCE(user_id, 'N/A') as user_id, + substr(created_at, 1, 19) as created_at +FROM ci_jobs +ORDER BY created_at DESC +LIMIT 10; +" -header -column 2>/dev/null || echo "无数据" +echo + +echo "=======================================================================" +echo "4. 任务详情(选择最新的一条)" +echo "=======================================================================" +LATEST_JOB=$(sqlite3 "$DB_PATH" "SELECT job_id FROM ci_jobs ORDER BY created_at DESC LIMIT 1;" 2>/dev/null) + +if [ -n "$LATEST_JOB" ]; then + echo "最新任务: $LATEST_JOB" + echo "-------------------------------------------------------------------" + sqlite3 "$DB_PATH" " +SELECT + 'job_id: ' || job_id || char(10) || + 'status: ' || status || char(10) || + 'mode: ' || mode || char(10) || + 'user_id: ' || COALESCE(user_id, 'N/A') || char(10) || + 'project_name: ' || COALESCE(project_name, 'N/A') || char(10) || + 'created_at: ' || created_at || char(10) || + 'started_at: ' || COALESCE(started_at, 'N/A') || char(10) || + 'finished_at: ' || COALESCE(finished_at, 'N/A') || char(10) || + 'duration: ' || COALESCE(CAST(duration AS TEXT), 'N/A') || 's' || char(10) || + 'exit_code: ' || COALESCE(CAST(exit_code AS TEXT), 'N/A') || char(10) || + 'log_file: ' || COALESCE(log_file, 'N/A') +FROM ci_jobs +WHERE job_id = '$LATEST_JOB'; +" 2>/dev/null +else + echo "数据库为空" +fi +echo + +echo "=======================================================================" +echo "5. 运行中的任务" +echo "=======================================================================" +sqlite3 "$DB_PATH" " +SELECT + substr(job_id, 1, 20) || '...' as job_id, + status, + mode, + COALESCE(user_id, 'N/A') as user_id, + substr(created_at, 1, 19) as created_at +FROM ci_jobs +WHERE status IN ('queued', 'running') +ORDER BY created_at DESC; +" -header -column 2>/dev/null + +COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM ci_jobs WHERE status IN ('queued', 'running');" 2>/dev/null) +if [ "$COUNT" = "0" ]; then + echo "(无运行中的任务)" +fi +echo + +echo "=======================================================================" +echo "6. 特殊用户配额" +echo "=======================================================================" +HAS_SPECIAL=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM special_users;" 2>/dev/null) + +if [ "$HAS_SPECIAL" -gt 0 ]; then + sqlite3 "$DB_PATH" " +SELECT + user_id, + CAST(quota_bytes / 1024.0 / 1024.0 / 1024.0 AS TEXT) || ' GB' as quota, + substr(created_at, 1, 19) as created_at +FROM special_users +ORDER BY created_at DESC; +" -header -column 2>/dev/null +else + echo "(无特殊用户)" +fi +echo + +echo "=======================================================================" +echo "交互式查询模式" +echo "=======================================================================" +echo "如需进行自定义查询,可以运行:" +echo " sqlite3 $DB_PATH" +echo +echo "示例查询:" +echo " -- 查看所有表" +echo " .tables" +echo +echo " -- 查看特定任务" +echo " SELECT * FROM ci_jobs WHERE job_id = 'xxx';" +echo +echo " -- 查看失败的任务" +echo " SELECT job_id, status, error_message FROM ci_jobs WHERE status = 'failed';" +echo +echo " -- 按项目统计" +echo " SELECT project_name, COUNT(*) FROM ci_jobs GROUP BY project_name;" +echo +echo "=======================================================================" diff --git a/scan_all_dbs.py b/scan_all_dbs.py new file mode 100755 index 0000000..7389af7 --- /dev/null +++ b/scan_all_dbs.py @@ -0,0 +1,246 @@ +#!/usr/bin/env python3 +""" +全面扫描并对比所有数据库文件 +找出哪个数据库有数据 +""" + +import os +import sqlite3 +from pathlib import Path +from datetime import datetime + +print("=" * 80) +print("全面扫描所有数据库文件") +print("=" * 80) +print() + +# 扩展搜索路径 +search_locations = [ + "/home/user/remoteCI", + "/tmp", + "/var", + "/root", + os.path.expanduser("~"), +] + +db_files = [] + +print("正在搜索数据库文件...") +for location in search_locations: + try: + for root, dirs, files in os.walk(location): + # 跳过一些明显不相关的目录 + if any(skip in root for skip in ['/proc', '/sys', '/dev', '/snap', '.git']): + continue + + for file in files: + if file.endswith('.db') and 'jobs' in file.lower(): + full_path = os.path.join(root, file) + db_files.append(full_path) + print(f" 找到: {full_path}") + except (PermissionError, OSError): + pass + +print() + +if not db_files: + print("✗ 没有找到任何数据库文件") + print() + print("可能的原因:") + print(" 1. 系统还未初始化运行过") + print(" 2. 数据库在搜索范围之外") + print(" 3. 数据库名称不包含 'jobs'") + print() + exit(1) + +print(f"✓ 共找到 {len(db_files)} 个相关数据库文件") +print() + +print("=" * 80) +print("分析每个数据库文件") +print("=" * 80) +print() + +results = [] + +for db_path in db_files: + print(f"分析: {db_path}") + print("-" * 80) + + try: + # 文件信息 + stat = os.stat(db_path) + size = stat.st_size + mtime = datetime.fromtimestamp(stat.st_mtime) + + print(f" 文件大小: {size:,} bytes ({size / 1024:.2f} KB)") + print(f" 修改时间: {mtime}") + + # 尝试连接数据库 + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + + # 检查表 + cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") + tables = [row[0] for row in cursor.fetchall()] + print(f" 表: {', '.join(tables) if tables else '(无)'}") + + # 统计数据 + job_count = 0 + special_user_count = 0 + + if 'ci_jobs' in tables: + cursor.execute("SELECT COUNT(*) FROM ci_jobs") + job_count = cursor.fetchone()[0] + print(f" 任务数量: {job_count}") + + if job_count > 0: + # 显示统计 + cursor.execute("SELECT status, COUNT(*) FROM ci_jobs GROUP BY status") + status_stats = cursor.fetchall() + print(f" 按状态统计:") + for status, count in status_stats: + print(f" - {status}: {count}") + + # 最新任务 + cursor.execute("SELECT job_id, status, created_at FROM ci_jobs ORDER BY created_at DESC LIMIT 1") + latest = cursor.fetchone() + if latest: + print(f" 最新任务: {latest[0][:30]}... ({latest[1]}) @ {latest[2][:19]}") + + if 'special_users' in tables: + cursor.execute("SELECT COUNT(*) FROM special_users") + special_user_count = cursor.fetchone()[0] + print(f" 特殊用户数: {special_user_count}") + + conn.close() + + results.append({ + 'path': db_path, + 'size': size, + 'mtime': mtime, + 'job_count': job_count, + 'special_user_count': special_user_count, + 'has_data': job_count > 0 or special_user_count > 0 + }) + + print() + + except Exception as e: + print(f" ✗ 错误: {e}") + print() + +# 总结 +print("=" * 80) +print("总结") +print("=" * 80) +print() + +has_data_dbs = [r for r in results if r['has_data']] +empty_dbs = [r for r in results if not r['has_data']] + +if not has_data_dbs: + print("✗ 所有数据库都是空的!") + print() + print("这说明:") + print(" 1. 可能还没有提交过任务") + print(" 2. 或者任务数据在未被发现的数据库文件中") + print() + print("建议:") + print(" 1. 提交一个测试任务") + print(" 2. 检查 Flask 和 Celery 的启动日志,看它们使用的数据库路径") + print(" 3. 使用 strace 或 lsof 追踪进程打开的文件") + print() + +elif len(has_data_dbs) == 1: + db = has_data_dbs[0] + print(f"✓ 找到包含数据的数据库: {db['path']}") + print(f" 任务数: {db['job_count']}") + print(f" 特殊用户数: {db['special_user_count']}") + print(f" 最后修改: {db['mtime']}") + print() + + if empty_dbs: + print(f"⚠ 同时发现 {len(empty_dbs)} 个空数据库:") + for db in empty_dbs: + print(f" - {db['path']}") + print() + print("建议:") + print(f" 1. 设置环境变量 CI_DATA_DIR={os.path.dirname(has_data_dbs[0]['path'])}") + print(" 2. 确保 Flask 和 Celery Worker 都使用此环境变量") + print(" 3. 考虑删除空的数据库文件以避免混淆") + print() + +else: + print(f"✗ 警告: 发现 {len(has_data_dbs)} 个包含数据的数据库!") + print() + print("这可能导致数据不一致问题。数据库列表:") + print() + for i, db in enumerate(has_data_dbs, 1): + print(f"{i}. {db['path']}") + print(f" 任务数: {db['job_count']}, 最后修改: {db['mtime']}") + print() + + print("建议:") + print(" 1. 确定哪个是正确的数据库") + print(" 2. 设置 CI_DATA_DIR 环境变量指向正确的目录") + print(" 3. 考虑合并或清理其他数据库") + print() + +# 检查进程 +print("=" * 80) +print("检查运行中的进程") +print("=" * 80) +print() + +import subprocess + +try: + # 检查 Flask 进程 + result = subprocess.run(['pgrep', '-f', 'flask'], capture_output=True, text=True) + if result.returncode == 0: + pids = result.stdout.strip().split('\n') + print(f"✓ Flask 进程运行中 (PID: {', '.join(pids)})") + + # 尝试查看打开的文件 + for pid in pids: + try: + lsof_result = subprocess.run(['lsof', '-p', pid], capture_output=True, text=True, timeout=5) + if '.db' in lsof_result.stdout: + print(f" 进程 {pid} 打开的数据库文件:") + for line in lsof_result.stdout.split('\n'): + if '.db' in line and 'jobs' in line.lower(): + print(f" {line}") + except Exception: + pass + else: + print("⚠ Flask 进程未运行") + print() + + # 检查 Celery 进程 + result = subprocess.run(['pgrep', '-f', 'celery.*worker'], capture_output=True, text=True) + if result.returncode == 0: + pids = result.stdout.strip().split('\n') + print(f"✓ Celery Worker 进程运行中 (PID: {', '.join(pids)})") + + # 尝试查看打开的文件 + for pid in pids: + try: + lsof_result = subprocess.run(['lsof', '-p', pid], capture_output=True, text=True, timeout=5) + if '.db' in lsof_result.stdout: + print(f" 进程 {pid} 打开的数据库文件:") + for line in lsof_result.stdout.split('\n'): + if '.db' in line and 'jobs' in line.lower(): + print(f" {line}") + except Exception: + pass + else: + print("⚠ Celery Worker 进程未运行") + +except Exception as e: + print(f"无法检查进程: {e}") + +print() +print("=" * 80) +print("完成") +print("=" * 80) diff --git a/server/app.py b/server/app.py index 086fc05..19058ef 100644 --- a/server/app.py +++ b/server/app.py @@ -6,6 +6,8 @@ import os import json +import logging +from logging.handlers import RotatingFileHandler from datetime import datetime from pathlib import Path from functools import wraps @@ -22,10 +24,64 @@ from server.database import JobDatabase from server.quota_manager import QuotaManager +# 配置日志系统 +def setup_logging(): + """配置日志系统,输出到文件和控制台""" + # 确保日志目录存在 + log_dir = Path(DATA_DIR) / 'logs' + log_dir.mkdir(parents=True, exist_ok=True) + + # 日志文件路径 + log_file = log_dir / 'app.log' + + # 配置根日志记录器 + root_logger = logging.getLogger() + root_logger.setLevel(logging.INFO) + + # 文件处理器 (带轮转,最大10MB,保留5个文件) + file_handler = RotatingFileHandler( + log_file, + maxBytes=10*1024*1024, # 10MB + backupCount=5, + encoding='utf-8' + ) + file_handler.setLevel(logging.INFO) + file_formatter = logging.Formatter( + '%(asctime)s [%(levelname)s] %(name)s: %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' + ) + file_handler.setFormatter(file_formatter) + + # 控制台处理器 + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.INFO) + console_formatter = logging.Formatter( + '[%(levelname)s] %(message)s' + ) + console_handler.setFormatter(console_formatter) + + # 添加处理器 + root_logger.addHandler(file_handler) + root_logger.addHandler(console_handler) + + # 记录启动信息 + logger = logging.getLogger('remoteCI.app') + logger.info("=" * 60) + logger.info("Remote CI 服务启动") + logger.info(f"数据目录: {DATA_DIR}") + logger.info(f"数据库路径: {DATA_DIR}/jobs.db") + logger.info(f"日志文件: {log_file}") + logger.info("=" * 60) + + return log_file + # 配置静态文件目录 app = Flask(__name__, static_folder='static', static_url_path='/static') app.config['MAX_CONTENT_LENGTH'] = MAX_UPLOAD_SIZE +# 设置日志 +LOG_FILE = setup_logging() + # 初始化数据库 job_db = JobDatabase(f"{DATA_DIR}/jobs.db") @@ -690,6 +746,178 @@ def delete_special_user(user_id): return jsonify({'error': str(e)}), 500 +# ============ 调试和诊断接口 ============ + +@app.route('/api/debug/logs', methods=['GET']) +def get_debug_logs(): + """ + 获取最近的日志(免Token认证,方便调试) + + Query参数: + - lines: 返回最后几行(默认100,最大1000) + - filter: 过滤关键词(可选) + - source: 日志来源 (flask|celery|all,默认all) + """ + try: + lines = min(request.args.get('lines', 100, type=int), 1000) + filter_keyword = request.args.get('filter', '') + source = request.args.get('source', 'all') + + log_files = [] + + # 根据 source 参数选择日志文件 + if source in ['flask', 'all']: + if os.path.exists(LOG_FILE): + log_files.append(('flask', LOG_FILE)) + + if source in ['celery', 'all']: + celery_log = Path(DATA_DIR) / 'logs' / 'celery_worker.log' + if os.path.exists(celery_log): + log_files.append(('celery', str(celery_log))) + + if not log_files: + return jsonify({ + 'error': '日志文件不存在', + 'flask_log': str(LOG_FILE), + 'celery_log': str(Path(DATA_DIR) / 'logs' / 'celery_worker.log') + }), 404 + + # 合并所有日志 + all_lines = [] + for log_source, log_path in log_files: + with open(log_path, 'r', encoding='utf-8', errors='replace') as f: + for line in f: + all_lines.append(f"[{log_source}] {line}") + + # 过滤 + if filter_keyword: + all_lines = [line for line in all_lines if filter_keyword in line] + + # 获取最后N行 + log_lines = all_lines[-lines:] if len(all_lines) > lines else all_lines + + return jsonify({ + 'sources': [src for src, _ in log_files], + 'total_lines': len(all_lines), + 'returned_lines': len(log_lines), + 'filter': filter_keyword if filter_keyword else None, + 'logs': log_lines + }) + + except Exception as e: + return jsonify({'error': str(e)}), 500 + + +@app.route('/api/debug/db-info', methods=['GET']) +def get_db_info(): + """ + 获取数据库信息(免Token认证) + + 包含: + - 数据库文件路径 + - 文件大小 + - 任务统计 + - 最近的数据库操作日志 + """ + try: + db_path = f"{DATA_DIR}/jobs.db" + + info = { + 'database_path': db_path, + 'data_dir': DATA_DIR, + } + + # 检查文件是否存在 + if os.path.exists(db_path): + stat = os.stat(db_path) + info['file_exists'] = True + info['file_size'] = stat.st_size + info['file_size_mb'] = round(stat.st_size / 1024 / 1024, 2) + info['last_modified'] = datetime.fromtimestamp(stat.st_mtime).isoformat() + + # 获取任务统计 + total_jobs = job_db.count_jobs() + info['total_jobs'] = total_jobs + + # 按状态统计 + import sqlite3 + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + cursor.execute('SELECT status, COUNT(*) FROM ci_jobs GROUP BY status') + info['jobs_by_status'] = {row[0]: row[1] for row in cursor.fetchall()} + conn.close() + + else: + info['file_exists'] = False + info['total_jobs'] = 0 + + # 读取最近的数据库操作日志 + try: + with open(LOG_FILE, 'r', encoding='utf-8', errors='replace') as f: + all_lines = f.readlines() + + # 过滤数据库相关的日志 + db_logs = [line.strip() for line in all_lines if '数据库' in line or 'database' in line.lower()] + info['recent_db_operations'] = db_logs[-20:] # 最近20条 + except Exception: + info['recent_db_operations'] = [] + + return jsonify(info) + + except Exception as e: + import traceback + return jsonify({ + 'error': str(e), + 'traceback': traceback.format_exc() + }), 500 + + +@app.route('/api/debug/config', methods=['GET']) +def get_config_info(): + """ + 获取配置信息(免Token认证) + + 显示: + - 环境变量 + - 配置路径 + - 进程信息 + """ + try: + import sys + import socket + + info = { + 'environment': { + 'CI_DATA_DIR': os.getenv('CI_DATA_DIR'), + 'CI_WORK_DIR': os.getenv('CI_WORK_DIR'), + 'CI_WORKSPACE_DIR': os.getenv('CI_WORKSPACE_DIR'), + 'PYTHONUNBUFFERED': os.getenv('PYTHONUNBUFFERED'), + }, + 'config': { + 'DATA_DIR': DATA_DIR, + 'WORKSPACE_DIR': WORKSPACE_DIR, + 'API_HOST': API_HOST, + 'API_PORT': API_PORT, + }, + 'system': { + 'python_version': sys.version, + 'hostname': socket.gethostname(), + 'pid': os.getpid(), + 'cwd': os.getcwd(), + }, + 'database': { + 'path': f"{DATA_DIR}/jobs.db", + 'exists': os.path.exists(f"{DATA_DIR}/jobs.db"), + }, + 'log_file': str(LOG_FILE), + } + + return jsonify(info) + + except Exception as e: + return jsonify({'error': str(e)}), 500 + + # ============ Web界面 ============ @app.route('/') diff --git a/server/database.py b/server/database.py index c736d87..81e2f1c 100644 --- a/server/database.py +++ b/server/database.py @@ -5,6 +5,7 @@ import sqlite3 import json +import logging from datetime import datetime, timedelta, timezone from pathlib import Path from typing import Optional, Dict, List, Any @@ -14,6 +15,10 @@ UTC = timezone.utc UTC8 = timezone(timedelta(hours=8)) +# 配置日志 +logger = logging.getLogger('remoteCI.database') +logger.setLevel(logging.DEBUG) + class JobDatabase: """任务数据库管理类""" @@ -23,6 +28,13 @@ def __init__(self, db_path: str): self._local = threading.local() # 确保数据库文件的父目录存在 Path(db_path).parent.mkdir(parents=True, exist_ok=True) + + # 记录数据库路径 - 用于调试 + logger.info(f"[数据库初始化] 路径: {self.db_path}") + logger.info(f"[数据库初始化] 文件存在: {Path(db_path).exists()}") + print(f"[数据库初始化] 路径: {self.db_path}") # 保留 print 用于控制台 + print(f"[数据库初始化] 文件存在: {Path(db_path).exists()}") + self._init_db() def _get_conn(self): @@ -126,6 +138,13 @@ def create_job(self, job_id: str, job_data: Dict[str, Any]) -> bool: Returns: bool: 是否创建成功 """ + logger.info(f"[数据库写入] 准备创建任务记录: job_id={job_id}, mode={job_data.get('mode')}, user_id={job_data.get('user_id')}") + print(f"[数据库写入] 准备创建任务记录") + print(f" 数据库路径: {self.db_path}") + print(f" 任务ID: {job_id}") + print(f" 模式: {job_data.get('mode', 'unknown')}") + print(f" 用户ID: {job_data.get('user_id', 'N/A')}") + try: conn = self._get_conn() cursor = conn.cursor() @@ -151,10 +170,23 @@ def create_job(self, job_id: str, job_data: Dict[str, Any]) -> bool: )) conn.commit() + + # 验证写入 + cursor.execute('SELECT COUNT(*) FROM ci_jobs WHERE job_id = ?', (job_id,)) + count = cursor.fetchone()[0] + + logger.info(f"✓ 任务记录创建成功: job_id={job_id}, 验证={count}条, 文件大小={Path(self.db_path).stat().st_size}B") + print(f"✓ 任务记录创建成功") + print(f" 验证查询: 找到 {count} 条记录") + print(f" 数据库文件大小: {Path(self.db_path).stat().st_size} 字节") + return True except Exception as e: + logger.error(f"✗ 创建任务记录失败: job_id={job_id}, error={e}") print(f"✗ 创建任务记录失败: {e}") + import traceback + traceback.print_exc() return False def update_job_started(self, job_id: str) -> bool: @@ -167,6 +199,11 @@ def update_job_started(self, job_id: str) -> bool: Returns: bool: 是否更新成功 """ + logger.info(f"[数据库更新] 更新任务开始状态: job_id={job_id}") + print(f"[数据库更新] 更新任务开始状态") + print(f" 数据库路径: {self.db_path}") + print(f" 任务ID: {job_id}") + try: conn = self._get_conn() cursor = conn.cursor() @@ -177,11 +214,18 @@ def update_job_started(self, job_id: str) -> bool: WHERE job_id = ? ''', (datetime.now(UTC).replace(tzinfo=None).isoformat() + 'Z', job_id)) + rows_affected = cursor.rowcount conn.commit() + + logger.info(f"✓ 任务状态更新为 running: job_id={job_id}, 影响{rows_affected}行") + print(f"✓ 任务状态更新为 running,影响 {rows_affected} 行") + return True except Exception as e: print(f"✗ 更新任务开始状态失败: {e}") + import traceback + traceback.print_exc() return False def update_job_finished(self, job_id: str, status: str, result: Optional[Dict[str, Any]] = None) -> bool: @@ -196,6 +240,12 @@ def update_job_finished(self, job_id: str, status: str, result: Optional[Dict[st Returns: bool: 是否更新成功 """ + logger.info(f"[数据库更新] 更新任务完成状态: job_id={job_id}, status={status}") + print(f"[数据库更新] 更新任务完成状态") + print(f" 数据库路径: {self.db_path}") + print(f" 任务ID: {job_id}") + print(f" 最终状态: {status}") + try: conn = self._get_conn() cursor = conn.cursor() @@ -219,11 +269,18 @@ def update_job_finished(self, job_id: str, status: str, result: Optional[Dict[st job_id )) + rows_affected = cursor.rowcount conn.commit() + + logger.info(f"✓ 任务状态更新为 {status}: job_id={job_id}, 影响{rows_affected}行") + print(f"✓ 任务状态更新为 {status},影响 {rows_affected} 行") + return True except Exception as e: print(f"✗ 更新任务完成状态失败: {e}") + import traceback + traceback.print_exc() return False def get_job(self, job_id: str) -> Optional[Dict[str, Any]]: @@ -263,6 +320,11 @@ def get_jobs(self, limit: int = 50, offset: int = 0, filters: Optional[Dict[str, Returns: 任务列表 """ + logger.info(f"[数据库查询] 查询任务列表: limit={limit}, offset={offset}, filters={filters}") + print(f"[数据库查询] 查询任务列表") + print(f" 数据库路径: {self.db_path}") + print(f" limit={limit}, offset={offset}, filters={filters}") + try: conn = self._get_conn() cursor = conn.cursor() @@ -295,9 +357,21 @@ def get_jobs(self, limit: int = 50, offset: int = 0, filters: Optional[Dict[str, query += ' ORDER BY created_at DESC LIMIT ? OFFSET ?' params.extend([limit, offset]) + print(f" 执行SQL: {query}") + print(f" 参数: {params}") + cursor.execute(query, params) rows = cursor.fetchall() + logger.info(f"✓ 查询完成,返回 {len(rows)} 条记录") + print(f"✓ 查询完成,返回 {len(rows)} 条记录") + + # 显示前几条的简要信息 + if rows: + print(f" 前3条记录:") + for i, row in enumerate(rows[:3], 1): + print(f" {i}. {row['job_id'][:30]}... | {row['status']} | {row['mode']}") + return [dict(row) for row in rows] except Exception as e: diff --git a/server/tasks.py b/server/tasks.py index 302fc0b..4a65142 100644 --- a/server/tasks.py +++ b/server/tasks.py @@ -9,6 +9,8 @@ import os import subprocess import shutil +import logging +from logging.handlers import RotatingFileHandler from datetime import datetime, timezone, timedelta from pathlib import Path from celery import Task @@ -22,6 +24,53 @@ UTC = timezone.utc UTC8 = timezone(timedelta(hours=8)) +# 配置 Celery Worker 日志 +def setup_celery_logging(): + """配置 Celery Worker 日志系统""" + log_dir = Path(DATA_DIR) / 'logs' + log_dir.mkdir(parents=True, exist_ok=True) + + log_file = log_dir / 'celery_worker.log' + + # 配置日志记录器 + logger = logging.getLogger('remoteCI.celery') + logger.setLevel(logging.INFO) + + # 文件处理器 + file_handler = RotatingFileHandler( + log_file, + maxBytes=10*1024*1024, # 10MB + backupCount=5, + encoding='utf-8' + ) + file_handler.setLevel(logging.INFO) + formatter = logging.Formatter( + '%(asctime)s [%(levelname)s] %(name)s: %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' + ) + file_handler.setFormatter(formatter) + + # 控制台处理器 + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.INFO) + console_handler.setFormatter(formatter) + + logger.addHandler(file_handler) + logger.addHandler(console_handler) + + # 记录启动信息 + logger.info("=" * 60) + logger.info("Celery Worker 启动") + logger.info(f"数据目录: {DATA_DIR}") + logger.info(f"数据库路径: {DATA_DIR}/jobs.db") + logger.info(f"日志文件: {log_file}") + logger.info("=" * 60) + + return logger + +# 设置日志 +celery_logger = setup_celery_logging() + # 初始化数据库连接 job_db = JobDatabase(f"{DATA_DIR}/jobs.db") @@ -93,6 +142,9 @@ def update_progress(state, meta): self.update_state(state=state, meta=meta) try: + # 记录任务开始 + celery_logger.info(f"[任务开始] task_id={task_id}, mode={job_data.get('mode')}, user_id={job_data.get('user_id')}") + # 更新数据库状态为运行中 job_db.update_job_started(task_id) @@ -278,6 +330,9 @@ def update_progress(state, meta): # 更新数据库状态为完成 job_db.update_job_finished(task_id, status, result) + # 记录任务完成 + celery_logger.info(f"[任务完成] task_id={task_id}, status={status}, duration={duration:.2f}s, exit_code={build_result.returncode}") + # 更新文件大小信息 log_size = 0 if os.path.exists(log_file): @@ -316,10 +371,13 @@ def update_progress(state, meta): log(f"超时限制: {JOB_TIMEOUT} 秒") log("=" * 70) + duration = (datetime.now(UTC8) - start_time).total_seconds() + celery_logger.error(f"[任务超时] task_id={task_id}, duration={duration:.2f}s, timeout={JOB_TIMEOUT}s") + result = { 'status': 'timeout', 'exit_code': -1, - 'duration': (datetime.now(UTC8) - start_time).total_seconds(), + 'duration': duration, 'error': 'Task timeout' } @@ -333,10 +391,13 @@ def update_progress(state, meta): log(f"✗ 任务执行错误: {str(e)}") log("=" * 70) + duration = (datetime.now(UTC8) - start_time).total_seconds() + celery_logger.error(f"[任务错误] task_id={task_id}, error={str(e)}, duration={duration:.2f}s") + result = { 'status': 'error', 'exit_code': -2, - 'duration': (datetime.now(UTC8) - start_time).total_seconds(), + 'duration': duration, 'error': str(e) }