From 5d1675bcb080f506d4eba39ad04bad039b9f83cc Mon Sep 17 00:00:00 2001 From: Karol Charchut <59970980+Quarol@users.noreply.github.com> Date: Mon, 29 Dec 2025 13:43:28 +0100 Subject: [PATCH] Added --limit flag to evaluation --- llmsql/_cli/evaluation.py | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/llmsql/_cli/evaluation.py b/llmsql/_cli/evaluation.py index a32fb85..4e2f08f 100644 --- a/llmsql/_cli/evaluation.py +++ b/llmsql/_cli/evaluation.py @@ -5,9 +5,29 @@ from llmsql.evaluation.evaluate import evaluate +def parse_limit(value: str | None) -> int | float | None: + if value is None: + return None + + try: + if "." in value: + limit = float(value) + if not (0.0 < limit <= 1.0): + raise ValueError + return limit + else: + limit = int(value) + if limit <= 0: + raise ValueError + return limit + except ValueError as e: + raise argparse.ArgumentTypeError( + "--limit must be a positive integer or a float between 0.0 and 1.0" + ) from e + + class EvaluationCommand: """CLI wrapper for the `evaluate()` function.""" - @staticmethod def register(subparsers: argparse._SubParsersAction) -> None: eval_parser = subparsers.add_parser( @@ -28,6 +48,18 @@ def register(subparsers: argparse._SubParsersAction) -> None: ), ) + eval_parser.add_argument( + "--limit", + required=False, + type=parse_limit, + default=None, + help=( + "Optional. Limit the number of evaluated samples.\n" + "Accepts an integer (e.g. 100) or a float between 0.0 and 1.0 (e.g. 0.1 for 10%).\n" + "Useful for debugging." + ) + ) + eval_parser.add_argument( "--workdir-path", type=str, @@ -93,6 +125,8 @@ def execute(args: argparse.Namespace) -> None: except Exception: outputs = args.outputs + limit = args.limit + result = evaluate( outputs=outputs, workdir_path=args.workdir_path, @@ -101,6 +135,7 @@ def execute(args: argparse.Namespace) -> None: save_report=args.save_report, show_mismatches=args.show_mismatches, max_mismatches=args.max_mismatches, + limit=limit ) print(json.dumps(result, indent=2))