From df10362bbfc94e3d995335b1a62a1b74b3ff5aab Mon Sep 17 00:00:00 2001 From: liukaiwen Date: Mon, 27 Oct 2025 17:29:10 +0800 Subject: [PATCH 1/2] feat: add api record mysql --- llm_web_kit/api/DATABASE_README.md | 358 ++++++++++++++++++++++++++++ llm_web_kit/api/database.py | 118 +++++++++ llm_web_kit/api/database_setup.sql | 87 +++++++ llm_web_kit/api/dependencies.py | 5 + llm_web_kit/api/main.py | 22 ++ llm_web_kit/api/models/db_models.py | 31 +++ llm_web_kit/api/models/response.py | 1 + llm_web_kit/api/requirements.txt | 5 + llm_web_kit/api/routers/htmls.py | 114 ++++++++- 9 files changed, 733 insertions(+), 8 deletions(-) create mode 100644 llm_web_kit/api/DATABASE_README.md create mode 100644 llm_web_kit/api/database.py create mode 100644 llm_web_kit/api/database_setup.sql create mode 100644 llm_web_kit/api/models/db_models.py diff --git a/llm_web_kit/api/DATABASE_README.md b/llm_web_kit/api/DATABASE_README.md new file mode 100644 index 00000000..2cc42999 --- /dev/null +++ b/llm_web_kit/api/DATABASE_README.md @@ -0,0 +1,358 @@ +# 数据库配置说明 + +## 功能概述 + +本项目新增了 MySQL 请求日志功能,用于记录每次 HTML 解析请求的详细信息,包括: + +- **任务ID** (`task_id`): 用于关联同一任务的多个请求 +- **请求ID** (`request_id`): 每次请求的唯一标识符(自动生成UUID) +- **输入类型** (`input_type`): html_content(HTML字符串)、url(URL地址)、file(文件上传) +- **输入HTML** (`input_html`): 输入的HTML字符串内容 +- **URL** (`url`): 输入的URL地址 +- **输出Markdown** (`output_markdown`): 解析后输出的Markdown格式内容 +- **成功状态** (`is_success`): 请求是否成功 +- **错误信息** (`error_message`): 失败时的详细错误信息 +- **创建时间** (`created_at`): 请求创建时间 +- **更新时间** (`updated_at`): 记录最后更新时间 + +## 数据库设置 + +### 1. 安装 MySQL + +确保已安装 MySQL 5.7+ 或 MariaDB 10.2+。 + +### 2. 创建数据库和表 + +执行 `database_setup.sql` 文件中的 SQL 语句: + +```bash +mysql -u root -p < database_setup.sql +``` + +或者登录 MySQL 后执行: + +```sql +source /path/to/database_setup.sql; +``` + +### 3. 配置环境变量 + +复制 `.env.example` 为 `.env`: + +```bash +cp .env.example .env +``` + +编辑 `.env` 文件,配置数据库连接: + +```env +DATABASE_URL=mysql+aiomysql://root:your_password@localhost:3306/llm_web_kit +``` + +**连接字符串格式说明:** + +``` +mysql+aiomysql://用户名:密码@主机:端口/数据库名 +``` + +**示例:** + +- 本地开发: `mysql+aiomysql://root:123456@localhost:3306/llm_web_kit` +- 远程服务器: `mysql+aiomysql://user:pass@192.168.1.100:3306/llm_web_kit` + +### 4. 安装依赖 + +```bash +pip install -r requirements.txt +``` + +新增的依赖包括: + +- `sqlalchemy>=2.0.0` - ORM框架 +- `aiomysql>=0.2.0` - 异步MySQL驱动 +- `pymysql>=1.1.0` - MySQL客户端库 + +## 使用说明 + +### 启动服务 + +```bash +python llm_web_kit/api/run_server.py +``` + +或者: + +```bash +python -m llm_web_kit.api.main +``` + +### API 调用示例 + +#### 1. 解析 HTML 内容 + +```bash +curl -X POST "http://127.0.0.1:8000/api/v1/html/parse" \ + -H "Content-Type: application/json" \ + -d '{ + "html_content": "

Hello World

", + "url": "https://example.com", + "options": { + "task_id": "task_001", + "clean_html": true + } + }' +``` + +**响应示例:** + +```json +{ + "success": true, + "message": "HTML 解析成功", + "timestamp": "2025-10-27T15:30:00.123456", + "request_id": "550e8400-e29b-41d4-a716-446655440000", + "data": { + "markdown": "# Hello World", + ... + } +} +``` + +#### 2. 上传 HTML 文件 + +```bash +curl -X POST "http://127.0.0.1:8000/api/v1/html/upload" \ + -F "file=@/path/to/file.html" +``` + +### 传递任务ID + +如果需要关联多个请求到同一任务,可以在 `options` 中传递 `task_id`: + +```json +{ + "html_content": "...", + "options": { + "task_id": "my_task_123" + } +} +``` + +## 数据库查询示例 + +### 查询最近的请求记录 + +```sql +SELECT * FROM request_logs +ORDER BY created_at DESC +LIMIT 100; +``` + +### 查询某个任务的所有请求 + +```sql +SELECT * FROM request_logs +WHERE task_id = 'task_001' +ORDER BY created_at; +``` + +### 查询失败的请求 + +```sql +SELECT request_id, input_type, error_message, created_at +FROM request_logs +WHERE is_success = 0 +ORDER BY created_at DESC; +``` + +### 统计成功率 + +```sql +SELECT + COUNT(*) as total_requests, + SUM(is_success) as success_count, + ROUND(SUM(is_success) / COUNT(*) * 100, 2) as success_rate +FROM request_logs; +``` + +### 按日期统计请求量 + +```sql +SELECT + DATE(created_at) as date, + COUNT(*) as total, + SUM(is_success) as success, + COUNT(*) - SUM(is_success) as failed +FROM request_logs +GROUP BY DATE(created_at) +ORDER BY date DESC; +``` + +### 查询特定请求的详细信息 + +```sql +SELECT * FROM request_logs +WHERE request_id = '550e8400-e29b-41d4-a716-446655440000'; +``` + +## 功能特性 + +### 1. 自动日志记录 + +每次调用 `/api/v1/html/parse` 或 `/api/v1/html/upload` 接口时,系统会自动: + +- 生成唯一的 `request_id` +- 记录请求开始时间 +- 保存输入参数(HTML内容、URL等) +- 记录解析结果(Markdown输出) +- 记录成功/失败状态和错误信息 + +### 2. 异步数据库操作 + +使用 SQLAlchemy 异步引擎和 aiomysql 驱动,不会阻塞 API 请求处理。 + +### 3. 优雅降级 + +如果数据库未配置或连接失败: + +- API 服务仍然正常运行 +- 只是不记录请求日志 +- 不影响 HTML 解析功能 + +### 4. 连接池管理 + +使用数据库连接池,提高性能: + +- 默认池大小: 5 +- 最大溢出: 10 +- 可通过环境变量配置 + +## 故障排查 + +### 问题1: 数据库连接失败 + +**错误信息:** + +``` +数据库连接初始化失败: (2003, "Can't connect to MySQL server...") +``` + +**解决方案:** + +1. 检查 MySQL 服务是否运行 +2. 验证 `DATABASE_URL` 配置是否正确 +3. 确认数据库用户权限 +4. 检查防火墙设置 + +### 问题2: 表不存在 + +**错误信息:** + +``` +Table 'llm_web_kit.request_logs' doesn't exist +``` + +**解决方案:** +执行 `database_setup.sql` 创建表: + +```bash +mysql -u root -p llm_web_kit < database_setup.sql +``` + +### 问题3: 依赖包缺失 + +**错误信息:** + +``` +ModuleNotFoundError: No module named 'aiomysql' +``` + +**解决方案:** +安装依赖包: + +```bash +pip install sqlalchemy aiomysql pymysql +``` + +### 问题4: 字符编码问题 + +**解决方案:** +确保数据库和表使用 `utf8mb4` 字符集: + +```sql +ALTER DATABASE llm_web_kit CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; +ALTER TABLE request_logs CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; +``` + +## 性能优化建议 + +### 1. 定期清理历史数据 + +```sql +-- 删除30天前的日志 +DELETE FROM request_logs +WHERE created_at < DATE_SUB(NOW(), INTERVAL 30 DAY); +``` + +### 2. 添加分区表(可选) + +对于大量数据,可以按月分区: + +```sql +ALTER TABLE request_logs +PARTITION BY RANGE (TO_DAYS(created_at)) ( + PARTITION p202501 VALUES LESS THAN (TO_DAYS('2025-02-01')), + PARTITION p202502 VALUES LESS THAN (TO_DAYS('2025-03-01')), + ... +); +``` + +### 3. 监控慢查询 + +启用 MySQL 慢查询日志,优化查询性能。 + +### 4. 调整连接池大小 + +根据并发量调整 `.env` 中的配置: + +```env +DB_POOL_SIZE=10 +DB_MAX_OVERFLOW=20 +``` + +## 安全建议 + +1. **不要提交 .env 文件到版本控制** +2. **使用强密码** +3. **限制数据库用户权限**(只授予必要的权限) +4. **定期备份数据库** +5. **在生产环境使用 SSL 连接** + +## 技术架构 + +``` +FastAPI Application + ↓ +Router (htmls.py) + ↓ +RequestLogService (request_log_service.py) + ↓ +DatabaseManager (database.py) + ↓ +SQLAlchemy + aiomysql + ↓ +MySQL Database +``` + +## 相关文件 + +- `models/db_models.py` - 数据库模型定义 +- `database.py` - 数据库连接管理 +- `services/request_log_service.py` - 请求日志服务 +- `routers/htmls.py` - API 路由(集成日志记录) +- `database_setup.sql` - 数据库建表语句 +- `.env.example` - 环境变量配置示例 + +## 联系支持 + +如有问题,请查看项目文档或提交 Issue。 diff --git a/llm_web_kit/api/database.py b/llm_web_kit/api/database.py new file mode 100644 index 00000000..f58e1752 --- /dev/null +++ b/llm_web_kit/api/database.py @@ -0,0 +1,118 @@ +"""数据库连接管理. + +提供数据库连接池和会话管理功能。 +""" + +from contextlib import asynccontextmanager +from typing import AsyncGenerator, Optional + +from sqlalchemy.ext.asyncio import (AsyncSession, async_sessionmaker, + create_async_engine) + +from .dependencies import get_logger, get_settings +from .models.db_models import Base + +logger = get_logger(__name__) +settings = get_settings() + + +class DatabaseManager: + """数据库管理器.""" + + def __init__(self): + """初始化数据库管理器.""" + self._engine = None + self._async_session_maker = None + self._initialized = False + + def initialize(self): + """初始化数据库连接.""" + if self._initialized: + return + + try: + # 检查是否配置了数据库连接 + if not settings.database_url: + logger.warning("未配置数据库连接,请求日志功能将被禁用") + return + + # 创建异步引擎 + self._engine = create_async_engine( + settings.database_url, + echo=settings.debug, + pool_pre_ping=True, + pool_size=settings.db_pool_size, + max_overflow=settings.db_max_overflow, + ) + + # 创建异步会话工厂 + self._async_session_maker = async_sessionmaker( + self._engine, + class_=AsyncSession, + expire_on_commit=False, + ) + + self._initialized = True + logger.info("数据库连接初始化成功") + + except Exception as e: + logger.error(f"数据库连接初始化失败: {e}") + self._initialized = False + + async def create_tables(self): + """创建数据库表.""" + if not self._engine: + logger.warning("数据库引擎未初始化,跳过创建表") + return + + try: + async with self._engine.begin() as conn: + await conn.run_sync(Base.metadata.create_all) + logger.info("数据库表创建成功") + except Exception as e: + logger.error(f"创建数据库表失败: {e}") + + @asynccontextmanager + async def get_session(self) -> AsyncGenerator[Optional[AsyncSession], None]: + """获取数据库会话上下文管理器.""" + if not self._async_session_maker: + logger.warning("数据库会话工厂未初始化") + yield None + return + + session = self._async_session_maker() + try: + yield session + await session.commit() + except Exception as e: + await session.rollback() + logger.error(f"数据库会话错误: {e}") + raise + finally: + await session.close() + + async def close(self): + """关闭数据库连接.""" + if self._engine: + await self._engine.dispose() + logger.info("数据库连接已关闭") + + +# 全局数据库管理器实例 +_db_manager: Optional[DatabaseManager] = None + + +def get_db_manager() -> DatabaseManager: + """获取数据库管理器单例.""" + global _db_manager + if _db_manager is None: + _db_manager = DatabaseManager() + _db_manager.initialize() + return _db_manager + + +async def get_db_session() -> AsyncGenerator[Optional[AsyncSession], None]: + """FastAPI 依赖项:获取数据库会话.""" + db_manager = get_db_manager() + async with db_manager.get_session() as session: + yield session diff --git a/llm_web_kit/api/database_setup.sql b/llm_web_kit/api/database_setup.sql new file mode 100644 index 00000000..5736a8bf --- /dev/null +++ b/llm_web_kit/api/database_setup.sql @@ -0,0 +1,87 @@ +-- ============================================ +-- LLM Web Kit API - 数据库建表语句 +-- ============================================ +-- 数据库: mineru_ai +-- 字符集: utf8mb4 +-- 排序规则: utf8mb4_unicode_ci +-- ============================================ + +-- 创建数据库(如果不存在) +CREATE DATABASE IF NOT EXISTS mineru_ai + DEFAULT CHARACTER SET utf8mb4 + DEFAULT COLLATE utf8mb4_unicode_ci; + +USE mineru_ai; + +-- ============================================ +-- 请求日志表 +-- ============================================ +-- 用于记录每次 HTML 解析请求的详细信息 +-- ============================================ + +DROP TABLE IF EXISTS `request_logs`; + +CREATE TABLE `request_logs` ( + `id` INT UNSIGNED NOT NULL AUTO_INCREMENT COMMENT '主键ID,自增', + `request_id` VARCHAR(64) NOT NULL COMMENT '请求ID,每次请求的唯一标识符', + `input_type` VARCHAR(32) NOT NULL COMMENT '输入类型: html_content(HTML字符串), url(URL地址), file(文件上传)', + `input_html` LONGTEXT DEFAULT NULL COMMENT '输入的HTML字符串内容', + `url` TEXT DEFAULT NULL COMMENT '输入的URL地址', + `output_markdown` LONGTEXT DEFAULT NULL COMMENT '输出的Markdown格式内容', + `status` VARCHAR(32) NOT NULL DEFAULT 'processing' COMMENT '状态: processing-处理中, success-成功, fail-失败', + `error_message` TEXT DEFAULT NULL COMMENT '错误信息,失败时记录详细错误', + `created_at` DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `updated_at` DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + + PRIMARY KEY (`id`), + UNIQUE KEY `uk_request_id` (`request_id`), + KEY `idx_created_at` (`created_at`), + KEY `idx_status` (`status`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='请求日志表'; + +-- ============================================ +-- 索引说明 +-- ============================================ +-- 1. PRIMARY KEY (id): 主键索引,自增ID,用于快速定位记录 +-- 2. UNIQUE KEY (request_id): 唯一索引,确保请求ID唯一性 +-- 3. INDEX (created_at): 普通索引,用于按时间范围查询 +-- 4. INDEX (status): 普通索引,用于按状态查询和统计 +-- ============================================ + +-- ============================================ +-- 示例查询语句 +-- ============================================ + +-- 1. 查询最近100条请求记录 +-- SELECT * FROM request_logs ORDER BY created_at DESC LIMIT 100; + +-- 2. 查询处理中的请求 +-- SELECT * FROM request_logs WHERE status = 'processing' ORDER BY created_at DESC; + +-- 3. 查询失败的请求 +-- SELECT * FROM request_logs WHERE status = 'fail' ORDER BY created_at DESC; + +-- 4. 查询成功的请求 +-- SELECT * FROM request_logs WHERE status = 'success' ORDER BY created_at DESC; + +-- 5. 统计各状态的请求数量 +-- SELECT +-- status, +-- COUNT(*) as count, +-- ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM request_logs), 2) as percentage +-- FROM request_logs +-- GROUP BY status; + +-- 6. 按日期统计请求量 +-- SELECT +-- DATE(created_at) as date, +-- COUNT(*) as total, +-- SUM(CASE WHEN status = 'success' THEN 1 ELSE 0 END) as success, +-- SUM(CASE WHEN status = 'fail' THEN 1 ELSE 0 END) as failed, +-- SUM(CASE WHEN status = 'processing' THEN 1 ELSE 0 END) as processing +-- FROM request_logs +-- GROUP BY DATE(created_at) +-- ORDER BY date DESC; + +-- 7. 查询某个请求的详细信息 +-- SELECT * FROM request_logs WHERE request_id = 'your_request_id'; diff --git a/llm_web_kit/api/dependencies.py b/llm_web_kit/api/dependencies.py index a2814fce..60adae7f 100644 --- a/llm_web_kit/api/dependencies.py +++ b/llm_web_kit/api/dependencies.py @@ -36,6 +36,11 @@ class Settings(BaseSettings): # 缓存配置 cache_ttl: int = 3600 # 1小时 + # 数据库配置 + database_url: Optional[str] = "mysql+aiomysql://mineru_ai:8L5D2_#W6tu9QWT@rm-uf6btdj3pp7idm669oo.mysql.rds.aliyuncs.com:3306/mineru_ai" + db_pool_size: int = 5 + db_max_overflow: int = 10 + # pydantic v2 配置写法 model_config = SettingsConfigDict( env_file=".env", diff --git a/llm_web_kit/api/main.py b/llm_web_kit/api/main.py index 18f71663..9271e058 100644 --- a/llm_web_kit/api/main.py +++ b/llm_web_kit/api/main.py @@ -56,6 +56,16 @@ async def health_check(): @app.on_event("startup") async def app_startup(): """应用启动时预热模型,避免首个请求冷启动延迟.""" + # 初始化数据库 + try: + from .database import get_db_manager + db_manager = get_db_manager() + await db_manager.create_tables() + logger.info("数据库初始化完成") + except Exception as e: + logger.warning(f"数据库初始化失败(服务仍可运行,但请求日志功能将被禁用): {e}") + + # 预热模型 try: service = get_inference_service() await service.warmup() @@ -64,6 +74,18 @@ async def app_startup(): logger.warning(f"InferenceService 预热失败(服务仍可运行,将在首次请求时再初始化): {e}") +@app.on_event("shutdown") +async def app_shutdown(): + """应用关闭时清理资源.""" + try: + from .database import get_db_manager + db_manager = get_db_manager() + await db_manager.close() + logger.info("数据库连接已关闭") + except Exception as e: + logger.error(f"关闭数据库连接失败: {e}") + + @app.exception_handler(Exception) async def global_exception_handler(request, exc): """全局异常处理器.""" diff --git a/llm_web_kit/api/models/db_models.py b/llm_web_kit/api/models/db_models.py new file mode 100644 index 00000000..2a15b6c2 --- /dev/null +++ b/llm_web_kit/api/models/db_models.py @@ -0,0 +1,31 @@ +"""数据库模型定义. + +定义请求日志等数据库表的 ORM 模型。 +""" + +from datetime import datetime + +from sqlalchemy import Column, DateTime, Integer, String, Text +from sqlalchemy.ext.declarative import declarative_base + +Base = declarative_base() + + +class RequestLog(Base): + """请求日志表模型.""" + + __tablename__ = 'request_logs' + + id = Column(Integer, primary_key=True, autoincrement=True, comment='主键ID') + request_id = Column(String(64), nullable=False, unique=True, index=True, comment='请求ID') + input_type = Column(String(32), nullable=False, comment='输入类型: html_content, url, file') + input_html = Column(Text, nullable=True, comment='输入HTML字符串') + url = Column(Text, nullable=True, comment='输入URL地址') + output_markdown = Column(Text, nullable=True, comment='输出Markdown内容') + status = Column(String(32), default='processing', nullable=False, comment='状态: processing-处理中, success-成功, fail-失败') + error_message = Column(Text, nullable=True, comment='错误信息') + created_at = Column(DateTime, default=datetime.now, nullable=False, comment='创建时间') + updated_at = Column(DateTime, default=datetime.now, onupdate=datetime.now, nullable=False, comment='更新时间') + + def __repr__(self): + return f"" diff --git a/llm_web_kit/api/models/response.py b/llm_web_kit/api/models/response.py index 1ec6f491..34545fb8 100644 --- a/llm_web_kit/api/models/response.py +++ b/llm_web_kit/api/models/response.py @@ -53,6 +53,7 @@ class HTMLParseResponse(BaseResponse): data: Optional[HTMLParseData] = Field(None, description="解析结果数据") metadata: Optional[Dict[str, Any]] = Field(None, description="元数据信息") + request_id: Optional[str] = Field(None, description="请求ID") model_config = ConfigDict( json_schema_extra={ diff --git a/llm_web_kit/api/requirements.txt b/llm_web_kit/api/requirements.txt index fc428808..c643ca51 100644 --- a/llm_web_kit/api/requirements.txt +++ b/llm_web_kit/api/requirements.txt @@ -1,17 +1,22 @@ # HTTP 客户端 aiohttp>=3.9.0 +aiomysql>=0.2.0 # FastAPI 相关依赖 fastapi>=0.104.0 httpx>=0.27.0 pydantic>=2.0.0 pydantic-settings>=2.0.0 +pymysql>=1.1.0 # 日志和配置 python-dotenv>=1.0.0 # 数据处理 python-multipart>=0.0.6 + +# 数据库支持 +sqlalchemy>=2.0.0 torch==2.6.0 transformers==4.52.4 diff --git a/llm_web_kit/api/routers/htmls.py b/llm_web_kit/api/routers/htmls.py index 0f69074a..0a505ba6 100644 --- a/llm_web_kit/api/routers/htmls.py +++ b/llm_web_kit/api/routers/htmls.py @@ -3,12 +3,17 @@ 提供 HTML 解析、内容提取等功能的 API 端点。 """ +from typing import Optional + from fastapi import APIRouter, Depends, File, HTTPException, UploadFile +from sqlalchemy.ext.asyncio import AsyncSession +from ..database import get_db_session from ..dependencies import get_logger, get_settings from ..models.request import HTMLParseRequest from ..models.response import HTMLParseResponse from ..services.html_service import HTMLService +from ..services.request_log_service import RequestLogService logger = get_logger(__name__) settings = get_settings() @@ -19,14 +24,42 @@ @router.post('/html/parse', response_model=HTMLParseResponse) async def parse_html( request: HTMLParseRequest, - html_service: HTMLService = Depends(HTMLService) + html_service: HTMLService = Depends(HTMLService), + db_session: Optional[AsyncSession] = Depends(get_db_session) ): """解析 HTML 内容. 接收 HTML 字符串并返回解析后的结构化内容。 """ + # 生成请求ID + request_id = RequestLogService.generate_request_id() + + # 确定输入类型 + if request.html_content: + input_type = 'html_content' + elif request.url: + input_type = 'url' + else: + input_type = 'unknown' + + # 创建请求日志 + await RequestLogService.create_log( + session=db_session, + request_id=request_id, + input_type=input_type, + input_html=request.html_content, + url=request.url, + ) + + # 立即提交,使 processing 状态在数据库中可见 + if db_session: + try: + await db_session.commit() + except Exception as commit_error: + logger.error(f'提交初始日志时出错: {commit_error}') + try: - logger.info(f'开始解析 HTML,内容长度: {len(request.html_content) if request.html_content else 0}') + logger.info(f'开始解析 HTML [request_id={request_id}],内容长度: {len(request.html_content) if request.html_content else 0}') result = await html_service.parse_html( html_content=request.html_content, @@ -34,25 +67,52 @@ async def parse_html( options=request.options ) + # 更新日志为成功 + await RequestLogService.update_log_success( + session=db_session, + request_id=request_id, + output_markdown=result.get('markdown'), + ) + return HTMLParseResponse( success=True, data=result, - message='HTML 解析成功' + message='HTML 解析成功', + request_id=request_id ) except Exception as e: - logger.error(f'HTML 解析失败: {str(e)}') + logger.error(f'HTML 解析失败 [request_id={request_id}]: {str(e)}') + + # 更新日志为失败 + await RequestLogService.update_log_failure( + session=db_session, + request_id=request_id, + error_message=str(e), + ) + + # 手动提交事务,确保失败日志被保存 + if db_session: + try: + await db_session.commit() + except Exception as commit_error: + logger.error(f'提交失败日志时出错: {commit_error}') + raise HTTPException(status_code=500, detail=f'HTML 解析失败: {str(e)}') @router.post('/html/upload') async def upload_html_file( file: UploadFile = File(...), - html_service: HTMLService = Depends(HTMLService) + html_service: HTMLService = Depends(HTMLService), + db_session: Optional[AsyncSession] = Depends(get_db_session) ): """上传 HTML 文件进行解析. 支持上传 HTML 文件,自动解析并返回结果。 """ + # 生成请求ID + request_id = RequestLogService.generate_request_id() + try: if not file.filename.endswith(('.html', '.htm')): raise HTTPException(status_code=400, detail='只支持 HTML 文件') @@ -60,18 +120,56 @@ async def upload_html_file( content = await file.read() html_content = content.decode('utf-8') - logger.info(f'上传 HTML 文件: {file.filename}, 大小: {len(content)} bytes') + logger.info(f'上传 HTML 文件 [request_id={request_id}]: {file.filename}, 大小: {len(content)} bytes') + + # 创建请求日志 + await RequestLogService.create_log( + session=db_session, + request_id=request_id, + input_type='file', + input_html=html_content, + url=None, + ) + + # 立即提交,使 processing 状态在数据库中可见 + if db_session: + try: + await db_session.commit() + except Exception as commit_error: + logger.error(f'提交初始日志时出错: {commit_error}') result = await html_service.parse_html(html_content=html_content) + # 更新日志为成功 + await RequestLogService.update_log_success( + session=db_session, + request_id=request_id, + output_markdown=result.get('markdown'), + ) + return HTMLParseResponse( success=True, data=result, message='HTML 文件解析成功', - filename=file.filename + request_id=request_id ) except Exception as e: - logger.error(f'HTML 文件解析失败: {str(e)}') + logger.error(f'HTML 文件解析失败 [request_id={request_id}]: {str(e)}') + + # 更新日志为失败 + await RequestLogService.update_log_failure( + session=db_session, + request_id=request_id, + error_message=str(e), + ) + + # 手动提交事务,确保失败日志被保存 + if db_session: + try: + await db_session.commit() + except Exception as commit_error: + logger.error(f'提交失败日志时出错: {commit_error}') + raise HTTPException(status_code=500, detail=f'HTML 文件解析失败: {str(e)}') From bbc9271303d5afbdda62c8952d1508d51589a249 Mon Sep 17 00:00:00 2001 From: liukaiwen Date: Tue, 28 Oct 2025 11:59:30 +0800 Subject: [PATCH 2/2] feat: add api record mysql --- llm_web_kit/api/dependencies.py | 2 +- llm_web_kit/api/main.py | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/llm_web_kit/api/dependencies.py b/llm_web_kit/api/dependencies.py index 60adae7f..d949e305 100644 --- a/llm_web_kit/api/dependencies.py +++ b/llm_web_kit/api/dependencies.py @@ -37,7 +37,7 @@ class Settings(BaseSettings): cache_ttl: int = 3600 # 1小时 # 数据库配置 - database_url: Optional[str] = "mysql+aiomysql://mineru_ai:8L5D2_#W6tu9QWT@rm-uf6btdj3pp7idm669oo.mysql.rds.aliyuncs.com:3306/mineru_ai" + database_url: Optional[str] = None # 从环境变量 DATABASE_URL 读取 db_pool_size: int = 5 db_max_overflow: int = 10 diff --git a/llm_web_kit/api/main.py b/llm_web_kit/api/main.py index 9271e058..561e8df3 100644 --- a/llm_web_kit/api/main.py +++ b/llm_web_kit/api/main.py @@ -55,7 +55,17 @@ async def health_check(): @app.on_event("startup") async def app_startup(): - """应用启动时预热模型,避免首个请求冷启动延迟.""" + """应用启动时初始化资源.""" + logger.info("应用启动中...") + + # 显示数据库配置(隐藏密码) + if settings.database_url: + # 隐藏密码部分用于日志输出 + db_url_safe = settings.database_url.split('@')[1] if '@' in settings.database_url else settings.database_url + logger.info(f"数据库配置已加载: ...@{db_url_safe}") + else: + logger.info("未配置数据库连接,请求日志功能将被禁用") + # 初始化数据库 try: from .database import get_db_manager