From e24291c0432d35e3d26c6a58d4a228c8f162992a Mon Sep 17 00:00:00 2001 From: Ryuichiro Hataya Date: Thu, 12 Feb 2026 13:48:57 +0900 Subject: [PATCH 01/15] Introduce parse_input_utterance and preprocessor --- flexeval/core/chat_dataset/template_based.py | 25 +++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/flexeval/core/chat_dataset/template_based.py b/flexeval/core/chat_dataset/template_based.py index 55b6a3ee..7ecc664d 100644 --- a/flexeval/core/chat_dataset/template_based.py +++ b/flexeval/core/chat_dataset/template_based.py @@ -2,9 +2,10 @@ import json from ast import literal_eval +from collections.abc import Callable from os import PathLike from pathlib import Path -from typing import Any +from typing import Any, Literal import datasets from jinja2 import Template @@ -50,6 +51,9 @@ class TemplateChatDataset(ChatDataset): The key is a Jinja2 template string to embed the item into a string, and the value is the value to keep. remove_conditions: A dictionary to indicate the condition to remove certain items. The key is a Jinja2 template string to embed the item into a string, and the value is the value to remove. + parse_input_utterance: If specified, parse the rendered `input_utterance` string using the given method, + `ast.literal_eval` if "literal_eval" or `json.loads` if "json_loads". If None, do not parse. + preprocessor: A function to preprocess each item. """ def __init__( @@ -64,6 +68,8 @@ def __init__( data_range: tuple[int, int] | None = None, keep_conditions: dict[str, str] | None = None, remove_conditions: dict[str, str] | None = None, + parse_input_utterance: Literal["literal_eval", "json_loads"] | None = None, + preprocessor: Callable[[dict[str, Any]], dict[str, Any]] | None = None, ) -> None: if reference_template and reference_list_template: msg = "Only one of reference_template and reference_list_template can be set." @@ -100,12 +106,21 @@ def __init__( load_jinja2_template(system_message_template) if system_message_template else None ) + self.parse_input_utterance = parse_input_utterance + self.preprocessor = preprocessor + def __len__(self) -> int: return len(self.items) def __getitem__(self, i: int) -> ChatInstance: item = self.items[i] + if self.preprocessor: + item = self.preprocessor(item) input_utterance = self.input_template.render(**item) + if self.parse_input_utterance == "literal_eval": + input_utterance = literal_eval(input_utterance) + elif self.parse_input_utterance == "json_loads": + input_utterance = json.loads(input_utterance, strict=False) messages = [{"role": "user", "content": input_utterance}] if self._system_message_template: @@ -166,6 +181,8 @@ def __init__( data_range: tuple[int, int] | None = None, keep_conditions: dict[str, str] | None = None, remove_conditions: dict[str, str] | None = None, + parse_input_utterance: Literal["literal_eval", "json_loads"] | None = None, + preprocessor: Callable[[dict[str, Any]], dict[str, Any]] | None = None, ) -> None: dataset_kwargs = dataset_kwargs or {} dataset = datasets.load_dataset(path, name=subset, split=split, **dataset_kwargs) @@ -182,6 +199,8 @@ def __init__( data_range=data_range, keep_conditions=keep_conditions, remove_conditions=remove_conditions, + parse_input_utterance=parse_input_utterance, + preprocessor=preprocessor, ) @@ -205,6 +224,8 @@ def __init__( data_range: tuple[int, int] | None = None, keep_conditions: dict[str, str] | None = None, remove_conditions: dict[str, str] | None = None, + parse_input_utterance: Literal["literal_eval", "json_loads"] | None = None, + preprocessor: Callable[[dict[str, Any]], dict[str, Any]] | None = None, ) -> None: with open(path) as f: items = [json.loads(line) for line in f] @@ -220,4 +241,6 @@ def __init__( data_range=data_range, keep_conditions=keep_conditions, remove_conditions=remove_conditions, + parse_input_utterance=parse_input_utterance, + preprocessor=preprocessor, ) From d8b329b8053af4443859308a03f26b8ed2e71046 Mon Sep 17 00:00:00 2001 From: Ryuichiro Hataya Date: Thu, 12 Feb 2026 14:34:03 +0900 Subject: [PATCH 02/15] change preprocessor to class --- flexeval/core/chat_dataset/template_based.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/flexeval/core/chat_dataset/template_based.py b/flexeval/core/chat_dataset/template_based.py index 7ecc664d..e739750b 100644 --- a/flexeval/core/chat_dataset/template_based.py +++ b/flexeval/core/chat_dataset/template_based.py @@ -1,5 +1,6 @@ from __future__ import annotations +import abc import json from ast import literal_eval from collections.abc import Callable @@ -16,6 +17,13 @@ from .base import ChatDataset, ChatInstance +class Preprocessor(abc.ABC): + # An abstract base class for preprocessors + @abc.abstractmethod + def __call__(self, item: dict[str, Any]) -> dict[str, Any]: + pass + + def load_jinja2_template(template: str | PathLike[str]) -> Template: path = Path(template) try: @@ -69,7 +77,7 @@ def __init__( keep_conditions: dict[str, str] | None = None, remove_conditions: dict[str, str] | None = None, parse_input_utterance: Literal["literal_eval", "json_loads"] | None = None, - preprocessor: Callable[[dict[str, Any]], dict[str, Any]] | None = None, + preprocessor: list[Preprocessor] | None = None, ) -> None: if reference_template and reference_list_template: msg = "Only one of reference_template and reference_list_template can be set." @@ -115,7 +123,8 @@ def __len__(self) -> int: def __getitem__(self, i: int) -> ChatInstance: item = self.items[i] if self.preprocessor: - item = self.preprocessor(item) + for preprocessor in self.preprocessor: + item = preprocessor(item) input_utterance = self.input_template.render(**item) if self.parse_input_utterance == "literal_eval": input_utterance = literal_eval(input_utterance) From 34b13a93a30638dd0baff0d64e2c4a6c19a02c3b Mon Sep 17 00:00:00 2001 From: Ryuichiro Hataya Date: Thu, 12 Feb 2026 14:34:13 +0900 Subject: [PATCH 03/15] add preprocessor impl --- flexeval/multimodal/__init__.py | 1 + flexeval/multimodal/image_preprocessor.py | 67 +++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 flexeval/multimodal/__init__.py create mode 100644 flexeval/multimodal/image_preprocessor.py diff --git a/flexeval/multimodal/__init__.py b/flexeval/multimodal/__init__.py new file mode 100644 index 00000000..9303d2bc --- /dev/null +++ b/flexeval/multimodal/__init__.py @@ -0,0 +1 @@ +from .image_preprocessor import ConvertImageToBase64 diff --git a/flexeval/multimodal/image_preprocessor.py b/flexeval/multimodal/image_preprocessor.py new file mode 100644 index 00000000..aaff2e78 --- /dev/null +++ b/flexeval/multimodal/image_preprocessor.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +import base64 +from io import BytesIO +from typing import Any, Literal, Protocol + +from loguru import logger + +from flexeval.core.chat_dataset.base import Preprocessor + + +class Image(Protocol): + # PIL image + def resize(self, size: tuple[int, int], resample: int) -> Image: ... + def convert(self, mode: str) -> Image: ... + def save(self, fp: BytesIO, format: str) -> None: ... # noqa: A002 + + +class ConvertImageToBase64(Preprocessor): + """ + Preprocessor to convert image to base64 string. + + Args: + key: The key in the input data that contains the image. + image_format: The image format to use for encoding. Either "png" or "jpeg". + max_length: The maximum length of the base64 string. Images will be resized + to fit within this length if specified. + """ + + def __init__(self, key: str, image_format: Literal["png", "jpeg"] = "png", max_length: int | None = None) -> None: + self.key = key + self.image_format = image_format + self.max_length = max_length + + def __call__(self, item: dict[str, Any]) -> dict[str, Any]: + image = item[self.key] + if image is None: + base64_image = None + elif isinstance(image, Image): + base64_image = self.encode_image_to_base64(image, image_format=self.format, max_length=self.max_length) + else: + raise NotImplementedError("Unsupported image type: " + str(type(image))) + + item[f"{self.key}_base64"] = base64_image + return item + + @staticmethod + def encode_image_to_base64( + image: Image, + image_format: Literal["png", "jpeg"], + max_length: int | None, + ) -> str: + def to_base64(img: Image) -> str: + buffered = BytesIO() + img.convert("RGB").save(buffered, format=image_format.upper()) + return base64.b64encode(buffered.getvalue()).decode("utf-8") + + base64_image = to_base64(image) + if max_length is not None and len(base64_image) > max_length: + factor = 1 + while len(base64_image) > max_length: + factor *= 0.9 + new_size = (int(image.width * factor), int(image.height * factor)) + resized_image = image.resize(new_size, resample=Image.Resampling.BILINEAR) + base64_image = to_base64(resized_image) + logger.debug(f"Image size reduced to {new_size} to fit max_length {max_length}.") + return f"data:image/{image_format.lower()};base64,{base64_image}" From 4f12613ac4e81205ffc25884bfe698abd6c52660 Mon Sep 17 00:00:00 2001 From: Ryuichiro Hataya Date: Thu, 12 Feb 2026 14:53:49 +0900 Subject: [PATCH 04/15] add test for parse_input_utterance --- .../core/chat_dataset/test_template_based.py | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/core/chat_dataset/test_template_based.py b/tests/core/chat_dataset/test_template_based.py index 942eea2d..13fc8854 100644 --- a/tests/core/chat_dataset/test_template_based.py +++ b/tests/core/chat_dataset/test_template_based.py @@ -226,3 +226,33 @@ def test_load_jinja2_template(dummy_template_file: Path) -> None: embed_result = template_from_string.render() assert isinstance(template_from_string, Template) assert embed_result == "a" * 1000 + + +@pytest.mark.parametrize( + "parse_input_utterance", + ["literal_eval", "json_loads", None], +) +def test_parse_input_utterance(parse_input_utterance: str) -> None: + dataset = TemplateChatDataset( + items=[ + { + "question": "Describe the color of this object.", + "answer": "red", + "image_url": "http://example.com/image1.jpg", + }, + ], + input_template='[{ "type": "image_url", "image_url": {"url": "{{ image_url }}"}},{ "type": "text", "text": "{{ question }}"}]', + parse_input_utterance=parse_input_utterance, + ) + + input_utterance = dataset[0].messages[1]["content"] + + if parse_input_utterance is None: + assert isinstance(input_utterance, str) + + else: + assert isinstance(input_utterance, list) + assert input_utterance[0]["type"] == "image_url" + assert input_utterance[0]["image_url"]["url"] == "http://example.com/image1.jpg" + assert input_utterance[1]["type"] == "text" + assert input_utterance[1]["text"] == "Describe the color of this object." From 28b65c1dcd850573e8e7c338b90e9ea38aec30c3 Mon Sep 17 00:00:00 2001 From: Ryuichiro Hataya Date: Thu, 12 Feb 2026 15:11:18 +0900 Subject: [PATCH 05/15] Update flexeval/multimodal/image_preprocessor.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- flexeval/multimodal/image_preprocessor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flexeval/multimodal/image_preprocessor.py b/flexeval/multimodal/image_preprocessor.py index aaff2e78..f4537e99 100644 --- a/flexeval/multimodal/image_preprocessor.py +++ b/flexeval/multimodal/image_preprocessor.py @@ -6,7 +6,7 @@ from loguru import logger -from flexeval.core.chat_dataset.base import Preprocessor +from flexeval.core.chat_dataset.template_based import Preprocessor class Image(Protocol): From 7b4bd70d3b7d764c1f6010a7a3c8e493f1ff5df7 Mon Sep 17 00:00:00 2001 From: Ryuichiro Hataya Date: Thu, 12 Feb 2026 15:11:45 +0900 Subject: [PATCH 06/15] Update flexeval/core/chat_dataset/template_based.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- flexeval/core/chat_dataset/template_based.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flexeval/core/chat_dataset/template_based.py b/flexeval/core/chat_dataset/template_based.py index e739750b..3c49e2a5 100644 --- a/flexeval/core/chat_dataset/template_based.py +++ b/flexeval/core/chat_dataset/template_based.py @@ -234,7 +234,7 @@ def __init__( keep_conditions: dict[str, str] | None = None, remove_conditions: dict[str, str] | None = None, parse_input_utterance: Literal["literal_eval", "json_loads"] | None = None, - preprocessor: Callable[[dict[str, Any]], dict[str, Any]] | None = None, + preprocessor: list[Preprocessor] | None = None, ) -> None: with open(path) as f: items = [json.loads(line) for line in f] From 9e6ec0c28736b98c384195fa652ae426c9460465 Mon Sep 17 00:00:00 2001 From: Ryuichiro Hataya Date: Thu, 12 Feb 2026 15:17:59 +0900 Subject: [PATCH 07/15] fix --- flexeval/core/chat_dataset/template_based.py | 5 ++--- tests/core/chat_dataset/test_template_based.py | 9 +++++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/flexeval/core/chat_dataset/template_based.py b/flexeval/core/chat_dataset/template_based.py index 3c49e2a5..f5928dae 100644 --- a/flexeval/core/chat_dataset/template_based.py +++ b/flexeval/core/chat_dataset/template_based.py @@ -3,7 +3,6 @@ import abc import json from ast import literal_eval -from collections.abc import Callable from os import PathLike from pathlib import Path from typing import Any, Literal @@ -61,7 +60,7 @@ class TemplateChatDataset(ChatDataset): The key is a Jinja2 template string to embed the item into a string, and the value is the value to remove. parse_input_utterance: If specified, parse the rendered `input_utterance` string using the given method, `ast.literal_eval` if "literal_eval" or `json.loads` if "json_loads". If None, do not parse. - preprocessor: A function to preprocess each item. + preprocessor: A list of Preprocessor instances to preprocess each item. """ def __init__( @@ -191,7 +190,7 @@ def __init__( keep_conditions: dict[str, str] | None = None, remove_conditions: dict[str, str] | None = None, parse_input_utterance: Literal["literal_eval", "json_loads"] | None = None, - preprocessor: Callable[[dict[str, Any]], dict[str, Any]] | None = None, + preprocessor: list[Preprocessor] | None = None, ) -> None: dataset_kwargs = dataset_kwargs or {} dataset = datasets.load_dataset(path, name=subset, split=split, **dataset_kwargs) diff --git a/tests/core/chat_dataset/test_template_based.py b/tests/core/chat_dataset/test_template_based.py index 13fc8854..4b82fabc 100644 --- a/tests/core/chat_dataset/test_template_based.py +++ b/tests/core/chat_dataset/test_template_based.py @@ -233,6 +233,11 @@ def test_load_jinja2_template(dummy_template_file: Path) -> None: ["literal_eval", "json_loads", None], ) def test_parse_input_utterance(parse_input_utterance: str) -> None: + input_template = [ + {"type": "image_url", "image_url": {"url": "{{ image_url }}"}}, + {"type": "text", "text": "{{ question }}"}, + ] + input_template = str(input_template) dataset = TemplateChatDataset( items=[ { @@ -241,11 +246,11 @@ def test_parse_input_utterance(parse_input_utterance: str) -> None: "image_url": "http://example.com/image1.jpg", }, ], - input_template='[{ "type": "image_url", "image_url": {"url": "{{ image_url }}"}},{ "type": "text", "text": "{{ question }}"}]', + input_template=input_template, parse_input_utterance=parse_input_utterance, ) - input_utterance = dataset[0].messages[1]["content"] + input_utterance = dataset[0].messages[0]["content"] if parse_input_utterance is None: assert isinstance(input_utterance, str) From c7994ea5d5bdcb7ead209ca284cacc6dcc09a956 Mon Sep 17 00:00:00 2001 From: Ryuichiro Hataya Date: Thu, 12 Feb 2026 15:52:49 +0900 Subject: [PATCH 08/15] fix test --- tests/core/chat_dataset/test_template_based.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/core/chat_dataset/test_template_based.py b/tests/core/chat_dataset/test_template_based.py index 4b82fabc..1673eeaa 100644 --- a/tests/core/chat_dataset/test_template_based.py +++ b/tests/core/chat_dataset/test_template_based.py @@ -233,11 +233,10 @@ def test_load_jinja2_template(dummy_template_file: Path) -> None: ["literal_eval", "json_loads", None], ) def test_parse_input_utterance(parse_input_utterance: str) -> None: - input_template = [ + input_template = """[ {"type": "image_url", "image_url": {"url": "{{ image_url }}"}}, {"type": "text", "text": "{{ question }}"}, - ] - input_template = str(input_template) + ]""" dataset = TemplateChatDataset( items=[ { From 7bad58235e98d6e3bf35ce59e2f4f9cd1cd4dd55 Mon Sep 17 00:00:00 2001 From: Ryuichiro Hataya Date: Thu, 12 Feb 2026 20:07:01 +0900 Subject: [PATCH 09/15] fix --- tests/core/chat_dataset/test_template_based.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/core/chat_dataset/test_template_based.py b/tests/core/chat_dataset/test_template_based.py index 1673eeaa..fbbc2625 100644 --- a/tests/core/chat_dataset/test_template_based.py +++ b/tests/core/chat_dataset/test_template_based.py @@ -235,7 +235,7 @@ def test_load_jinja2_template(dummy_template_file: Path) -> None: def test_parse_input_utterance(parse_input_utterance: str) -> None: input_template = """[ {"type": "image_url", "image_url": {"url": "{{ image_url }}"}}, - {"type": "text", "text": "{{ question }}"}, + {"type": "text", "text": "{{ question }}"} ]""" dataset = TemplateChatDataset( items=[ From 39ee3b61f01e9e1619eaddd5dc9a16eb7c3b2401 Mon Sep 17 00:00:00 2001 From: Ryuichiro Hataya Date: Mon, 16 Feb 2026 16:53:23 +0900 Subject: [PATCH 10/15] remove multimodal --- flexeval/multimodal/__init__.py | 1 - flexeval/multimodal/image_preprocessor.py | 67 ----------------------- 2 files changed, 68 deletions(-) delete mode 100644 flexeval/multimodal/__init__.py delete mode 100644 flexeval/multimodal/image_preprocessor.py diff --git a/flexeval/multimodal/__init__.py b/flexeval/multimodal/__init__.py deleted file mode 100644 index 9303d2bc..00000000 --- a/flexeval/multimodal/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .image_preprocessor import ConvertImageToBase64 diff --git a/flexeval/multimodal/image_preprocessor.py b/flexeval/multimodal/image_preprocessor.py deleted file mode 100644 index f4537e99..00000000 --- a/flexeval/multimodal/image_preprocessor.py +++ /dev/null @@ -1,67 +0,0 @@ -from __future__ import annotations - -import base64 -from io import BytesIO -from typing import Any, Literal, Protocol - -from loguru import logger - -from flexeval.core.chat_dataset.template_based import Preprocessor - - -class Image(Protocol): - # PIL image - def resize(self, size: tuple[int, int], resample: int) -> Image: ... - def convert(self, mode: str) -> Image: ... - def save(self, fp: BytesIO, format: str) -> None: ... # noqa: A002 - - -class ConvertImageToBase64(Preprocessor): - """ - Preprocessor to convert image to base64 string. - - Args: - key: The key in the input data that contains the image. - image_format: The image format to use for encoding. Either "png" or "jpeg". - max_length: The maximum length of the base64 string. Images will be resized - to fit within this length if specified. - """ - - def __init__(self, key: str, image_format: Literal["png", "jpeg"] = "png", max_length: int | None = None) -> None: - self.key = key - self.image_format = image_format - self.max_length = max_length - - def __call__(self, item: dict[str, Any]) -> dict[str, Any]: - image = item[self.key] - if image is None: - base64_image = None - elif isinstance(image, Image): - base64_image = self.encode_image_to_base64(image, image_format=self.format, max_length=self.max_length) - else: - raise NotImplementedError("Unsupported image type: " + str(type(image))) - - item[f"{self.key}_base64"] = base64_image - return item - - @staticmethod - def encode_image_to_base64( - image: Image, - image_format: Literal["png", "jpeg"], - max_length: int | None, - ) -> str: - def to_base64(img: Image) -> str: - buffered = BytesIO() - img.convert("RGB").save(buffered, format=image_format.upper()) - return base64.b64encode(buffered.getvalue()).decode("utf-8") - - base64_image = to_base64(image) - if max_length is not None and len(base64_image) > max_length: - factor = 1 - while len(base64_image) > max_length: - factor *= 0.9 - new_size = (int(image.width * factor), int(image.height * factor)) - resized_image = image.resize(new_size, resample=Image.Resampling.BILINEAR) - base64_image = to_base64(resized_image) - logger.debug(f"Image size reduced to {new_size} to fit max_length {max_length}.") - return f"data:image/{image_format.lower()};base64,{base64_image}" From ca57e45bf5ea4f208f8006d1e6254c64e93305bf Mon Sep 17 00:00:00 2001 From: Ryuichiro Hataya Date: Mon, 16 Feb 2026 16:53:33 +0900 Subject: [PATCH 11/15] test preprocessor --- .../core/chat_dataset/test_template_based.py | 30 +++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/tests/core/chat_dataset/test_template_based.py b/tests/core/chat_dataset/test_template_based.py index fbbc2625..dd1e5e95 100644 --- a/tests/core/chat_dataset/test_template_based.py +++ b/tests/core/chat_dataset/test_template_based.py @@ -234,7 +234,7 @@ def test_load_jinja2_template(dummy_template_file: Path) -> None: ) def test_parse_input_utterance(parse_input_utterance: str) -> None: input_template = """[ - {"type": "image_url", "image_url": {"url": "{{ image_url }}"}}, + {"type": "image_url", "image_url": {"url": "{{ image }}"}}, {"type": "text", "text": "{{ question }}"} ]""" dataset = TemplateChatDataset( @@ -242,7 +242,7 @@ def test_parse_input_utterance(parse_input_utterance: str) -> None: { "question": "Describe the color of this object.", "answer": "red", - "image_url": "http://example.com/image1.jpg", + "image": "http://example.com/image1.jpg", }, ], input_template=input_template, @@ -260,3 +260,29 @@ def test_parse_input_utterance(parse_input_utterance: str) -> None: assert input_utterance[0]["image_url"]["url"] == "http://example.com/image1.jpg" assert input_utterance[1]["type"] == "text" assert input_utterance[1]["text"] == "Describe the color of this object." + + +def test_preprocessor() -> None: + from flexeval.core.chat_dataset.template_based import Preprocessor + + class ToBase64(Preprocessor): + def __call__(self, item: dict) -> dict: + image = item["image"] + item["image_base64"] = "data:image/jpeg;base64,..." + return item + + input_template = "{{ image_base64 }}" + dataset = TemplateChatDataset( + items=[ + { + "question": "Describe the color of this object.", + "answer": "red", + "image": "http://example.com/image1.jpg", + }, + ], + input_template=input_template, + parse_input_utterance="literal_eval", + preprocessor=[ToBase64()], + ) + input_utterance = dataset[0].messages[0]["content"] + assert input_utterance == "data:image/jpeg;base64,..." From 740b5fbcb825137c525725b68fd254175fa95524 Mon Sep 17 00:00:00 2001 From: Ryuichiro Hataya Date: Mon, 16 Feb 2026 16:59:56 +0900 Subject: [PATCH 12/15] test preprocessor --- tests/core/chat_dataset/test_template_based.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/core/chat_dataset/test_template_based.py b/tests/core/chat_dataset/test_template_based.py index dd1e5e95..e781ee44 100644 --- a/tests/core/chat_dataset/test_template_based.py +++ b/tests/core/chat_dataset/test_template_based.py @@ -267,7 +267,7 @@ def test_preprocessor() -> None: class ToBase64(Preprocessor): def __call__(self, item: dict) -> dict: - image = item["image"] + image = item["image"] # noqa: F841 # simulate using the image for conversion item["image_base64"] = "data:image/jpeg;base64,..." return item From 4a8a31e61c9b97bf656e16a35ad4ef36f36e883f Mon Sep 17 00:00:00 2001 From: Ryuichiro Hataya Date: Mon, 16 Feb 2026 17:32:58 +0900 Subject: [PATCH 13/15] fix --- tests/core/chat_dataset/test_template_based.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/core/chat_dataset/test_template_based.py b/tests/core/chat_dataset/test_template_based.py index e781ee44..80449827 100644 --- a/tests/core/chat_dataset/test_template_based.py +++ b/tests/core/chat_dataset/test_template_based.py @@ -281,7 +281,6 @@ def __call__(self, item: dict) -> dict: }, ], input_template=input_template, - parse_input_utterance="literal_eval", preprocessor=[ToBase64()], ) input_utterance = dataset[0].messages[0]["content"] From c4cb43a27f1f795a18ea15b205903ac29b319341 Mon Sep 17 00:00:00 2001 From: Ryuichiro Hataya Date: Tue, 17 Feb 2026 17:22:17 +0900 Subject: [PATCH 14/15] https://github.com/sbintuitions/flexeval/pull/278#pullrequestreview-3812290261 --- flexeval/core/chat_dataset/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flexeval/core/chat_dataset/__init__.py b/flexeval/core/chat_dataset/__init__.py index be31bd26..9192142b 100644 --- a/flexeval/core/chat_dataset/__init__.py +++ b/flexeval/core/chat_dataset/__init__.py @@ -2,4 +2,4 @@ from .chatbot_bench import ChatbotBench from .openai_messages import OpenAIMessagesDataset from .sacrebleu_dataset import SacreBleuChatDataset -from .template_based import HFChatDataset, JsonlChatDataset, TemplateChatDataset, load_jinja2_template +from .template_based import HFChatDataset, JsonlChatDataset, Preprocessor, TemplateChatDataset, load_jinja2_template From 0d830e3542c28cab1824af4dd077484b9971b7dd Mon Sep 17 00:00:00 2001 From: Ryuichiro Hataya Date: Tue, 17 Feb 2026 22:22:49 +0900 Subject: [PATCH 15/15] processor -> processors --- flexeval/core/chat_dataset/template_based.py | 18 +++++++++--------- tests/core/chat_dataset/test_template_based.py | 4 ++-- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/flexeval/core/chat_dataset/template_based.py b/flexeval/core/chat_dataset/template_based.py index f5928dae..c6a4de0b 100644 --- a/flexeval/core/chat_dataset/template_based.py +++ b/flexeval/core/chat_dataset/template_based.py @@ -60,7 +60,7 @@ class TemplateChatDataset(ChatDataset): The key is a Jinja2 template string to embed the item into a string, and the value is the value to remove. parse_input_utterance: If specified, parse the rendered `input_utterance` string using the given method, `ast.literal_eval` if "literal_eval" or `json.loads` if "json_loads". If None, do not parse. - preprocessor: A list of Preprocessor instances to preprocess each item. + preprocessors: A list of Preprocessor instances to preprocess each item. """ def __init__( @@ -76,7 +76,7 @@ def __init__( keep_conditions: dict[str, str] | None = None, remove_conditions: dict[str, str] | None = None, parse_input_utterance: Literal["literal_eval", "json_loads"] | None = None, - preprocessor: list[Preprocessor] | None = None, + preprocessors: list[Preprocessor] | None = None, ) -> None: if reference_template and reference_list_template: msg = "Only one of reference_template and reference_list_template can be set." @@ -114,15 +114,15 @@ def __init__( ) self.parse_input_utterance = parse_input_utterance - self.preprocessor = preprocessor + self.preprocessors = preprocessors def __len__(self) -> int: return len(self.items) def __getitem__(self, i: int) -> ChatInstance: item = self.items[i] - if self.preprocessor: - for preprocessor in self.preprocessor: + if self.preprocessors: + for preprocessor in self.preprocessors: item = preprocessor(item) input_utterance = self.input_template.render(**item) if self.parse_input_utterance == "literal_eval": @@ -190,7 +190,7 @@ def __init__( keep_conditions: dict[str, str] | None = None, remove_conditions: dict[str, str] | None = None, parse_input_utterance: Literal["literal_eval", "json_loads"] | None = None, - preprocessor: list[Preprocessor] | None = None, + preprocessors: list[Preprocessor] | None = None, ) -> None: dataset_kwargs = dataset_kwargs or {} dataset = datasets.load_dataset(path, name=subset, split=split, **dataset_kwargs) @@ -208,7 +208,7 @@ def __init__( keep_conditions=keep_conditions, remove_conditions=remove_conditions, parse_input_utterance=parse_input_utterance, - preprocessor=preprocessor, + preprocessors=preprocessors, ) @@ -233,7 +233,7 @@ def __init__( keep_conditions: dict[str, str] | None = None, remove_conditions: dict[str, str] | None = None, parse_input_utterance: Literal["literal_eval", "json_loads"] | None = None, - preprocessor: list[Preprocessor] | None = None, + preprocessors: list[Preprocessor] | None = None, ) -> None: with open(path) as f: items = [json.loads(line) for line in f] @@ -250,5 +250,5 @@ def __init__( keep_conditions=keep_conditions, remove_conditions=remove_conditions, parse_input_utterance=parse_input_utterance, - preprocessor=preprocessor, + preprocessors=preprocessors, ) diff --git a/tests/core/chat_dataset/test_template_based.py b/tests/core/chat_dataset/test_template_based.py index 80449827..fbcc3f23 100644 --- a/tests/core/chat_dataset/test_template_based.py +++ b/tests/core/chat_dataset/test_template_based.py @@ -262,7 +262,7 @@ def test_parse_input_utterance(parse_input_utterance: str) -> None: assert input_utterance[1]["text"] == "Describe the color of this object." -def test_preprocessor() -> None: +def test_preprocessors() -> None: from flexeval.core.chat_dataset.template_based import Preprocessor class ToBase64(Preprocessor): @@ -281,7 +281,7 @@ def __call__(self, item: dict) -> dict: }, ], input_template=input_template, - preprocessor=[ToBase64()], + preprocessors=[ToBase64()], ) input_utterance = dataset[0].messages[0]["content"] assert input_utterance == "data:image/jpeg;base64,..."