From 8d0f28eb9ef96dce75420e6ac1410b23c3417853 Mon Sep 17 00:00:00 2001 From: Bernhard Grill Date: Mon, 23 Jun 2025 13:10:59 +0200 Subject: [PATCH 1/2] fix when loading benchmark and adding description field to question --- lmeval/benchmark.py | 3 ++- lmeval/question.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/lmeval/benchmark.py b/lmeval/benchmark.py index 885cca6..2d3a0bb 100644 --- a/lmeval/benchmark.py +++ b/lmeval/benchmark.py @@ -490,7 +490,8 @@ def load_benchmark(path: str, archive = None, use_tempfile: bool | None = None) media_to_load = [] for category in benchmark.categories: for task in category.tasks: - if isinstance(task.scorer.type, ScorerType): + scorer_values = set(item.value for item in ScorerType) + if task.scorer.type in scorer_values: scorer_name = str(task.scorer.type) stype = ScorerType[scorer_name] else: diff --git a/lmeval/question.py b/lmeval/question.py index 1db0c07..d87dfbb 100644 --- a/lmeval/question.py +++ b/lmeval/question.py @@ -37,6 +37,7 @@ class Question(CustomModel): # we need it for (de)serialization - automated added by Task.add() id: int = Field(default=-1) question: str | None = Field(default=None) + description: str = Field(default="") language: str = Field(default="en") # answer @@ -124,4 +125,4 @@ def _compute_file_hash(self, path: Path, class GroupedQuestion(Question): metadata: Dict[str, Any] - question_set: List[Question] \ No newline at end of file + question_set: List[Question] From 2a2d869f1a40981156f5604bc8d90b4757254695 Mon Sep 17 00:00:00 2001 From: Bernhard Grill Date: Thu, 27 Nov 2025 15:05:58 +0100 Subject: [PATCH 2/2] adding answer_old and html modality --- lmeval/question.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lmeval/question.py b/lmeval/question.py index d87dfbb..1970e6e 100644 --- a/lmeval/question.py +++ b/lmeval/question.py @@ -42,6 +42,8 @@ class Question(CustomModel): # answer answer: str | None = Field(default=None) + # old answer (e.g. in case the anwere (label) changed due to a manual review) + answer_old: str | None = Field(default=None) # additional answers e.g. for multiple choice questions additional_answers: List[str] = Field(default_factory=list) @@ -95,6 +97,7 @@ def add_media(self, path: str| Path, filetype: FileType = FileType.auto, ".mp4": [FileType.mp4, Modality.video], ".py": [FileType.python, Modality.code], ".pdf": [FileType.pdf, Modality.document], + ".html": [FileType.html, Modality.code], } # auto detection if needed