From d3e42c2857a621d2abe329b4f74483a94f537525 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 24 Nov 2025 20:56:08 +0000 Subject: [PATCH 1/2] Initial plan From ebb5cd25a6610f429076cb763e123ab8d339850b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 24 Nov 2025 21:02:03 +0000 Subject: [PATCH 2/2] Remove duplicate criteria parameter from GEval instantiation According to deepeval docs, only criteria OR evaluation_steps should be provided. When both are provided, evaluation_steps takes priority and criteria is ignored. This fix removes the unused criteria parameter. Co-authored-by: justaddcoffee <150311+justaddcoffee@users.noreply.github.com> --- src/metacoder/evals/runner.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/metacoder/evals/runner.py b/src/metacoder/evals/runner.py index 67a9619..d7bbf7d 100644 --- a/src/metacoder/evals/runner.py +++ b/src/metacoder/evals/runner.py @@ -64,8 +64,6 @@ def get_default_metrics() -> Dict[str, BaseMetric]: return { "CorrectnessMetric": GEval( name="Correctness", - criteria="Determine whether the actual output is factually correct based on the expected output.", - # NOTE: you can only provide either criteria or evaluation_steps, and not both evaluation_steps=[ "Check whether the facts in 'actual output' contradicts any facts in 'expected output'", "You should also heavily penalize omission of detail",