From d3e42c2857a621d2abe329b4f74483a94f537525 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 24 Nov 2025 20:56:08 +0000
Subject: [PATCH 1/2] Initial plan


From ebb5cd25a6610f429076cb763e123ab8d339850b Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 24 Nov 2025 21:02:03 +0000
Subject: [PATCH 2/2] Remove duplicate criteria parameter from GEval
 instantiation

According to deepeval docs, only criteria OR evaluation_steps should be
provided. When both are provided, evaluation_steps takes priority and
criteria is ignored. This fix removes the unused criteria parameter.

Co-authored-by: justaddcoffee <150311+justaddcoffee@users.noreply.github.com>
---
 src/metacoder/evals/runner.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/metacoder/evals/runner.py b/src/metacoder/evals/runner.py
index 67a9619..d7bbf7d 100644
--- a/src/metacoder/evals/runner.py
+++ b/src/metacoder/evals/runner.py
@@ -64,8 +64,6 @@ def get_default_metrics() -> Dict[str, BaseMetric]:
     return {
         "CorrectnessMetric": GEval(
             name="Correctness",
-            criteria="Determine whether the actual output is factually correct based on the expected output.",
-            # NOTE: you can only provide either criteria or evaluation_steps, and not both
             evaluation_steps=[
                 "Check whether the facts in 'actual output' contradicts any facts in 'expected output'",
                 "You should also heavily penalize omission of detail",