From 1ca8123bb59cdad2040977910f8deb805cfc98a1 Mon Sep 17 00:00:00 2001 From: Yanggq <1041206149@qq.com> Date: Mon, 13 Oct 2025 16:09:35 +0800 Subject: [PATCH 1/4] =?UTF-8?q?=E5=B0=86LLM=20api=20=E9=85=8D=E7=BD=AE?= =?UTF-8?q?=E6=94=BE=E5=88=B0config.py=E4=B8=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.py | 1 + examples/multi_extractor_compare.py | 12 +++--------- webmainbench/metrics/base.py | 2 +- 3 files changed, 5 insertions(+), 10 deletions(-) create mode 100644 config.py diff --git a/config.py b/config.py new file mode 100644 index 0000000..9671046 --- /dev/null +++ b/config.py @@ -0,0 +1 @@ +""" 全局配置文件 """ # LLM配置,用于修正抽取工具的抽取结果 LLM_CONFIG = { 'llm_base_url': '', 'llm_api_key': '', 'llm_model': 'deepseek-chat', 'use_llm': True } \ No newline at end of file diff --git a/examples/multi_extractor_compare.py b/examples/multi_extractor_compare.py index c02bae2..4d16c7a 100644 --- a/examples/multi_extractor_compare.py +++ b/examples/multi_extractor_compare.py @@ -1,13 +1,7 @@ from webmainbench import DataLoader, Evaluator, ExtractorFactory, DataSaver from pathlib import Path -# 全局LLM配置 -LLM_CONFIG = { - 'llm_base_url': '', - 'llm_api_key': '', - 'llm_model': '', - 'use_llm': True -} +# 如需调用LLM修正抽取结果,在config.py中配置 LLM api def all_extractor_comparison(): """演示多抽取器对比""" @@ -15,7 +9,7 @@ def all_extractor_comparison(): print("\n=== 多抽取器对比演示 ===\n") # 创建数据集 - dataset_path = Path("../data/test_math.jsonl") + dataset_path = Path("../data/WebMainBench_llm-webkit_v1_WebMainBench_7887_within_formula.jsonl") dataset = DataLoader.load_jsonl(dataset_path) # 创建webkit抽取器 @@ -35,7 +29,7 @@ def all_extractor_comparison(): # 运行对比 evaluator = Evaluator() extractors = [webkit_extractor, magic_extractor, trafilatura_extractor, resiliparse_extractor] - # extractors = [webkit_extractor] + extractors = [webkit_extractor] results = evaluator.compare_extractors( diff --git a/webmainbench/metrics/base.py b/webmainbench/metrics/base.py index 46b23c6..cf974e2 100644 --- a/webmainbench/metrics/base.py +++ b/webmainbench/metrics/base.py @@ -197,7 +197,7 @@ def _extract_from_markdown(text: str, field_name: str = None) -> Dict[str, str]: return {'code': '', 'formula': '', 'table': '', 'text': ''} # 加载 llm 配置 - from examples.multi_extractor_compare import LLM_CONFIG + from config import LLM_CONFIG # 直接创建具体的提取器实例 from .code_extractor import CodeSplitter from .formula_extractor import FormulaSplitter From e2c7f6a90c8c3b1aebacbfde03a19d5ec5e79283 Mon Sep 17 00:00:00 2001 From: Yanggq <1041206149@qq.com> Date: Mon, 13 Oct 2025 16:10:33 +0800 Subject: [PATCH 2/4] 1 --- config.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/config.py b/config.py index 9671046..d04ddd0 100644 --- a/config.py +++ b/config.py @@ -1 +1,11 @@ -""" 全局配置文件 """ # LLM配置,用于修正抽取工具的抽取结果 LLM_CONFIG = { 'llm_base_url': '', 'llm_api_key': '', 'llm_model': 'deepseek-chat', 'use_llm': True } \ No newline at end of file +""" +全局配置文件 +""" + +# LLM配置,用于修正抽取工具的抽取结果 +LLM_CONFIG = { + 'llm_base_url': '', + 'llm_api_key': '', + 'llm_model': 'deepseek-chat', + 'use_llm': True +} From a7324678f92ce12dd8febf3fc2dc741b0bf59fe6 Mon Sep 17 00:00:00 2001 From: Yanggq <1041206149@qq.com> Date: Mon, 13 Oct 2025 16:10:33 +0800 Subject: [PATCH 3/4] 1 --- config.py | 12 +++++++++++- examples/multi_extractor_compare.py | 2 +- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/config.py b/config.py index 9671046..d04ddd0 100644 --- a/config.py +++ b/config.py @@ -1 +1,11 @@ -""" 全局配置文件 """ # LLM配置,用于修正抽取工具的抽取结果 LLM_CONFIG = { 'llm_base_url': '', 'llm_api_key': '', 'llm_model': 'deepseek-chat', 'use_llm': True } \ No newline at end of file +""" +全局配置文件 +""" + +# LLM配置,用于修正抽取工具的抽取结果 +LLM_CONFIG = { + 'llm_base_url': '', + 'llm_api_key': '', + 'llm_model': 'deepseek-chat', + 'use_llm': True +} diff --git a/examples/multi_extractor_compare.py b/examples/multi_extractor_compare.py index 4d16c7a..46cbc52 100644 --- a/examples/multi_extractor_compare.py +++ b/examples/multi_extractor_compare.py @@ -29,7 +29,7 @@ def all_extractor_comparison(): # 运行对比 evaluator = Evaluator() extractors = [webkit_extractor, magic_extractor, trafilatura_extractor, resiliparse_extractor] - extractors = [webkit_extractor] + # extractors = [webkit_extractor] results = evaluator.compare_extractors( From e45c82aabe08a7c51b51c901b56dd60dc44feed9 Mon Sep 17 00:00:00 2001 From: Yanggq <1041206149@qq.com> Date: Tue, 14 Oct 2025 10:57:54 +0800 Subject: [PATCH 4/4] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E7=94=A8=E6=88=B7?= =?UTF-8?q?=E7=94=A8=E5=A5=BD=E7=9A=84api=E9=85=8D=E7=BD=AE=E6=8F=90?= =?UTF-8?q?=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- webmainbench/evaluator/evaluator.py | 74 ++++++++++++++++++- webmainbench/metrics/base_content_splitter.py | 1 - 2 files changed, 71 insertions(+), 4 deletions(-) diff --git a/webmainbench/evaluator/evaluator.py b/webmainbench/evaluator/evaluator.py index 3a6cc76..a66fbc3 100644 --- a/webmainbench/evaluator/evaluator.py +++ b/webmainbench/evaluator/evaluator.py @@ -4,7 +4,7 @@ from dataclasses import dataclass from typing import Dict, Any, List, Optional, Union, Iterator -import time +import time, sys import itertools from datetime import datetime from pathlib import Path @@ -85,10 +85,78 @@ def __init__(self, metric_config: Dict[str, Any] = None): Args: metric_config: Configuration for metrics """ + + self._validate_llm_config() + self.metric_calculator = MetricCalculator(metric_config) self.metric_config = metric_config or {} - - def evaluate(self, + + def _validate_llm_config(self): + """验证LLM配置的完整性和有效性""" + import time + from ..config import LLM_CONFIG + + if LLM_CONFIG.get('use_llm', False): + # 检查配置完整性 + if not LLM_CONFIG.get('llm_base_url') or not LLM_CONFIG.get('llm_api_key'): + print("\n" + "=" * 60) + print("❌ 错误:LLM配置不完整!") + print("-" * 60) + print("当前 use_llm = True,但缺少必要的API配置。") + print("\n请在 webmainbench/config.py 中完成以下配置:") + print(" 1. llm_base_url (例如: 'https://api.deepseek.com')") + print(" 2. llm_api_key (例如: 'sk-xxxxxxxxxxxx')") + print("\n或者设置 use_llm = False 来禁用LLM功能。") + print("=" * 60 + "\n") + sys.exit(1) + + # 验证API有效性 + try: + from openai import OpenAI + + print("正在验证LLM API配置...") + client = OpenAI( + base_url=LLM_CONFIG.get('llm_base_url'), + api_key=LLM_CONFIG.get('llm_api_key') + ) + + # 发送测试请求 + response = client.chat.completions.create( + model=LLM_CONFIG.get('llm_model', 'deepseek-chat'), + messages=[{"role": "user", "content": "test"}], + max_tokens=5, + temperature=0 + ) + + print("✅ LLM API配置验证成功!\n使用 基础方案➕LLM增强提取效果 进行评测。") + + except Exception as e: + print("\n" + "=" * 60) + print("❌ 错误:LLM API配置无效!") + print("-" * 60) + print(f"验证失败原因: {str(e)}") + print("\n请检查 webmainbench/config.py 中的配置:") + print(" 1. llm_base_url 是否正确") + print(" 2. llm_api_key 是否有效") + print(" 3. llm_model 是否支持") + print(" 4. 网络连接是否正常") + print("\n或者设置 use_llm = False 来禁用LLM功能。") + print("=" * 60 + "\n") + sys.exit(1) + else: + # 未启用LLM的提示 + print("\n" + "=" * 60) + print("⚠️ 注意:当前未启用LLM增强提取效果功能") + print(" 如需启用LLM增强提取效果,请在 webmainbench/config.py 中配置:") + print(" - 设置 use_llm = True") + print(" - 填写 llm_base_url") + print(" - 填写 llm_api_key") + print("=" * 60) + print(" (5秒后使用基础方案进行对比...)") + time.sleep(5) + print() + + def evaluate(self, dataset: BenchmarkDataset, extractor: Union[BaseExtractor, str], extractor_config: Dict[str, Any] = None, diff --git a/webmainbench/metrics/base_content_splitter.py b/webmainbench/metrics/base_content_splitter.py index 0b9ced6..c0685a9 100644 --- a/webmainbench/metrics/base_content_splitter.py +++ b/webmainbench/metrics/base_content_splitter.py @@ -52,7 +52,6 @@ def should_use_llm(self, field_name: str) -> bool: # 默认逻辑:对groundtruth内容不使用LLM,对其他内容使用 if field_name == "groundtruth_content": - print(f"[DEBUG] 检测到groundtruth内容,不使用LLM") return False return True