Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -216,3 +216,4 @@ scripts/oneclick/.env.example
__pycache__/
*.pyc
*.pyo
run.sh
64 changes: 64 additions & 0 deletions config/custom/omnidocbench_qwen_mllm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
api_version: gage/v1alpha1
kind: PipelineConfig
metadata:
name: omnidocbench_qwen_mllm
description: A multi-modal evaluation for OmniDocBench using a Qwen MLLM.

custom:
steps:
- step: inference
- step: auto_eval

# 1. Dataset Configuration
datasets:
- dataset_id: omnidocbench_val
loader: jsonl
params:
path: /mnt/sdb1/ywt/OmniDocBench-main/OmniDocBench1_5/omnidocbench15_gage_r.jsonl
preprocess: omnidoc_image_standardizer
preprocess_kwargs:
question_field: prompt
content_field: image
content_root: /mnt/sdb1/ywt/OmniDocBench-main/OmniDocBench1_5/images
doc_to_visual: gage_eval.assets.datasets.utils.multimodal:embed_local_message_images
# doc_to_visual_kwargs 自动继承 preprocess_kwargs 的同名字段

backends:
- backend_id: omnidocbench_qwen_mllm_backend
type: litellm
config:
provider: openai
api_base: http://127.0.0.1:8685/v1 # 替换为实际 Vision-LLM 服务地址
model: Qwen/Qwen3-Omni-30B-A3B-Instruct
generation_parameters:
max_new_tokens: 4096 # 输出尽量短,避免内存/延迟
temperature: 0.1
async_max_concurrency: 2 # MacBook 本地轻量并发

role_adapters:
- adapter_id: omnidocbench_qwen_vl
role_type: dut_model
backend_id: omnidocbench_qwen_mllm_backend
# Capability must match what the backend/adapter supports for multi-modal input
capabilities:
- vision_chat

# 4. Metric Configuration
metrics:
- metric_id: omnidocbench_all_metric
# This implementation needs to be created as per test-1113.md
implementation: gage_eval.metrics.builtin.ominidoc_all_metric:OmniDocBenchMetric
# implementation: OmniDocBenchMetric
aggregation: omnidoclazycalc

# 5. Task Configuration
tasks:
- task_id: doc_parsing_eval
dataset_id: omnidocbench_val
max_samples: 20 # MacBook 快速 smoke,可 CLI 覆盖
reporting:
sinks:
- type: console
- type: file
params:
output_path: ${GAGE_EVAL_SAVE_DIR:-./runs}/omnidocbench_events.jsonl
2 changes: 2 additions & 0 deletions src/gage_eval/assets/datasets/loaders/jsonl_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ def load(self, hub_handle: Optional[DatasetHubHandle], *, trace=None) -> DataSou
doc_to_text = resolve_doc_to_callable(self.spec, "doc_to_text")
doc_to_visual = resolve_doc_to_callable(self.spec, "doc_to_visual")
doc_to_audio = resolve_doc_to_callable(self.spec, "doc_to_audio")
from gage_eval.assets.datasets import preprocessors
# apply_preprocess need registered preprocessors but fail to import preprocessors.builtin.py at begin due to cross registry
records = apply_preprocess(
raw_records,
self.spec,
Expand Down
3 changes: 2 additions & 1 deletion src/gage_eval/assets/datasets/preprocessors/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
"""Preprocessor utilities."""
from . import builtin

__all__ = []
__all__ = ["builtin"]
9 changes: 9 additions & 0 deletions src/gage_eval/assets/datasets/preprocessors/builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from gage_eval.assets.datasets.preprocessors.default_preprocessor import DefaultPreprocessor
from gage_eval.assets.datasets.preprocessors.multi_choice_preprocessor import MultiChoicePreprocessor as NewMultiChoice
from gage_eval.assets.datasets.preprocessors.docvqa_preprocessor import DocVQAPreprocessor as NewDocVQA
from gage_eval.assets.datasets.preprocessors.omnidoc_preprocessor import OmniDocPreprocessor as NewOmniDoc
from gage_eval.assets.datasets.preprocessors.mathvista_preprocessor import (
MathVistaPreprocessor as NewMathVista,
MathVistaStructOnlyPreprocessor as NewMathVistaStructOnly,
Expand Down Expand Up @@ -72,6 +73,14 @@ class MultiChoicePreprocessor(NewMultiChoice):
class DocVQAPreprocessor(NewDocVQA):
pass

@registry.asset(
"dataset_preprocessors",
"omnidoc_image_standardizer",
desc="OmniDocBench multimodal preprocessor (new)",
tags=("prompt", "vision", "omnidoc"),
)
class OmniDocPreprocessor(NewOmniDoc):
pass

@registry.asset(
"dataset_preprocessors",
Expand Down
121 changes: 121 additions & 0 deletions src/gage_eval/assets/datasets/preprocessors/omnidoc_preprocessor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
"""Class-based DocVQA preprocessor (new implementation)."""

from __future__ import annotations

from pathlib import Path
from typing import Any, Dict, List
import base64
import mimetypes

from gage_eval.assets.datasets.utils.multimodal import (
_derive_root,
collect_content_fragments,
embed_local_message_images,
embed_local_image_as_data_url
)
from gage_eval.assets.datasets.preprocessors.base import BasePreprocessor
from gage_eval.assets.datasets.utils.mapping import extract_field
from gage_eval.assets.datasets.utils.normalization import list_images, ensure_chat_template_flags
from gage_eval.assets.datasets.utils.answers import parse_list_from_string, enrich_answer_with_options
from gage_eval.assets.datasets.utils.rendering import set_render_flags

def encode_image_to_data_uri(image_path):
mime_type, _ = mimetypes.guess_type(image_path)
if mime_type is None:
mime_type = 'image/jpeg'

with open(image_path, "rb") as image_file:
base64_data = base64.b64encode(image_file.read()).decode('utf-8')

return f"data:{mime_type};base64,{base64_data}"

class OmniDocPreprocessor(BasePreprocessor):
"""Normalize OminiDoc samples with text + image content."""

def to_sample(
self,
record: Dict[str, Any],
*,
question_field: str = "question",
content_field: str = "image",
content_root: str | None = None,
data_path: str | None = None,
system_prompt: str | None = None,
instruction: str | None = None,
**kwargs: Any,
) -> Dict[str, Any]:

sample = dict(record)
question = extract_field(sample, question_field)
if question is None:
raise ValueError(f"OminiDoc sample missing question field '{question_field}'")

# 1. Resolve Root Path
# Ensure dataset metadata path is set if data_path is provided, for _derive_root fallback
if data_path and "_dataset_metadata" not in sample:
sample["_dataset_metadata"] = {"path": data_path}
elif data_path:
sample.setdefault("_dataset_metadata", {})["path"] = data_path

resolved_root = _derive_root(sample, content_root)
if resolved_root and isinstance(resolved_root, str) and not resolved_root.startswith(("http://", "https://", "data:")):
try:
resolved_root = str(Path(resolved_root).expanduser().resolve())
except Exception:
resolved_root = str(Path(resolved_root).expanduser())

# 2. Construct Content
text_content = str(question).strip()
if instruction:
text_content = f"{text_content}\n\n{instruction.strip()}"

user_content_parts = [{"type": "text", "text": text_content}]

# 3. Embed Local Images
fragments = collect_content_fragments(sample, content_field=content_field, content_root=resolved_root)

converted = embed_local_image_as_data_url(
sample,
image_field=content_field,
strict=False,
cache_dir=None,
content_root=content_root,
)

# 4. Build Messages
messages: List[Dict[str, Any]] = []
if system_prompt:
messages.append({"role": "system", "content": [{"type": "text", "text": system_prompt.strip()}]})

visual_fragments=[{"type": "image_url", "image_url": sample['image']}]
user_content_parts.extend(visual_fragments)
messages.append({"role": "user", "content": user_content_parts})

sample["messages"] = messages
sample["prompt"] = question
sample["chat_template_mode"] = "preprocess"
sample["rendered_by"] = "preprocess"
sample["template_source"] = "manual"
sample["cache_suffix"] = "-converted"

ensure_chat_template_flags(sample)

# 5. Finalize Metadata
final_image_name = fragments
metadata = dict(sample.get("metadata") or {})
metadata.pop("image_root", None)
metadata.update({
"question_field": question_field,
"content_field": content_field,
})
if final_image_name:
metadata["image_name"] = final_image_name
if resolved_root:
metadata["content_root"] = resolved_root

sample["metadata"] = metadata
sample["inputs"] = sample.get("inputs") or {"prompt": question}
return sample


__all__ = ["OmniDocPreprocessor"]
1 change: 0 additions & 1 deletion src/gage_eval/evaluation/sample_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,6 @@ def run(self, planner: TaskPlanner, role_manager: RoleManager, trace: Observabil
daemon=True,
)
producer.start()

if ff_mode:
self._run_fire_and_forget(
sample_queue,
Expand Down
Loading