From d40ef9e03a8dd96f6dd2487e5acb1c1448ee1eca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Gro=C3=9F?= Date: Mon, 16 Feb 2026 23:42:15 +0100 Subject: [PATCH 1/4] group output by rule --- PythonScripts/audit_translations/auditor.py | 205 ++++++++++++------ .../rich/structure_diff_nonverbose.golden | 8 +- .../golden/rich/structure_diff_verbose.golden | 12 +- 3 files changed, 148 insertions(+), 77 deletions(-) diff --git a/PythonScripts/audit_translations/auditor.py b/PythonScripts/audit_translations/auditor.py index 593475f6..ef9fae24 100644 --- a/PythonScripts/audit_translations/auditor.py +++ b/PythonScripts/audit_translations/auditor.py @@ -9,7 +9,7 @@ import os import sys from pathlib import Path -from typing import Iterable, List, Optional, TextIO, Tuple +from typing import Any, Dict, Iterable, List, Optional, TextIO, Tuple from rich.console import Console from rich.markup import escape @@ -154,24 +154,38 @@ def rule_label(rule: RuleInfo) -> str: if rule.name is None: return f"[yellow]\"{escape(rule.key)}\"[/]" tag = rule.tag or "unknown" - return f"[cyan]{escape(rule.name)}[/] [dim][{escape(tag)}][/]" + return f"[cyan]{escape(rule.name)}[/] [dim]({escape(tag)})[/]" -def print_rule_item(rule: RuleInfo, issue_line: int, context: str = ""): - console.print(f" [dim]•[/] {rule_label(rule)} [dim](line {issue_line}{context})[/]") - +def issue_type_sort_key(issue_type: str) -> Tuple[int, str]: + """ + Stable ordering for per-rule issue groups. -def print_diff_item(diff: RuleDifference, line_en: int, line_tr: int, verbose: bool = False): - """Print a single rule difference""" - rule = diff.english_rule - console.print( - f" [dim]•[/] {rule_label(rule)} " - f"[dim](line {line_en} en, {line_tr} tr)[/]" - ) - console.print(f" [dim]{diff.description}[/]") - if verbose: - console.print(f" [green]en:[/] {escape(diff.english_snippet)}") - console.print(f" [red]tr:[/] {escape(diff.translated_snippet)}") + Differences are grouped by diff type under the same rule. + """ + order = { + "missing_rule": 0, + "untranslated_text": 1, + "rule_difference:match": 2, + "rule_difference:condition": 3, + "rule_difference:variables": 4, + "rule_difference:structure": 5, + "extra_rule": 6, + } + return order.get(issue_type, 99), issue_type + + +def issue_type_label(issue_type: str) -> str: + labels = { + "missing_rule": "Missing in Translation", + "untranslated_text": "Untranslated Text", + "rule_difference:match": "Match Pattern Differences", + "rule_difference:condition": "Condition Differences", + "rule_difference:variables": "Variable Differences", + "rule_difference:structure": "Structure Differences", + "extra_rule": "Extra in Translation", + } + return labels.get(issue_type, issue_type) def issue_base(rule: RuleInfo, file_name: str, language: str) -> dict: @@ -386,60 +400,113 @@ def print_warnings(result: ComparisonResult, file_name: str, verbose: bool = Fal console.print(f" [dim]English: {result.english_rule_count} rules → Translated: {result.translated_rule_count} rules[/]") console.rule(style="cyan") - if result.missing_rules: - console.print(f"\n [red]✗[/] [bold]Missing Rules[/] [[red]{len(result.missing_rules)}[/]] [dim](in English but not in translation)[/]") - for rule in result.missing_rules: - print_rule_item(rule, issue_line=rule.line_number, context=" in English") - issues += 1 - - if result.untranslated_text: - untranslated_count = sum(len(entries) for _, entries in result.untranslated_text) - console.print(f"\n [yellow]⚠[/] [bold]Untranslated Text[/] [[yellow]{untranslated_count}[/]] [dim](lowercase t/ot/ct keys)[/]") - for rule, entries in result.untranslated_text: - for _, text, line in entries: - issue_line = line or rule.line_number - print_rule_item(rule, issue_line=issue_line) - console.print(f" [dim]→[/] [yellow]\"{escape(text)}\"[/]") - issues += 1 - - if result.rule_differences: - # Count only diffs that will actually be displayed - displayable_diffs = [] - for diff in result.rule_differences: - if diff.diff_type == "structure": - en_tokens = extract_structure_elements(diff.english_rule.data) - tr_tokens = extract_structure_elements(diff.translated_rule.data) - en_token, tr_token, mismatch_pos = first_structure_mismatch(en_tokens, tr_tokens) - - # Skip reporting when tokens are misaligned (both exist but differ) - # This avoids misleading line numbers when entire blocks are missing/added - if en_token is not None and tr_token is not None and en_token != tr_token: - continue - - line_en = resolve_issue_line(diff.english_rule, "structure", en_token) - line_tr = resolve_issue_line(diff.translated_rule, "structure", tr_token) - # Skip structure diffs where we can't find both tokens - if line_en is None or line_tr is None: - continue - else: - line_en = resolve_issue_line(diff.english_rule, diff.diff_type) - line_tr = resolve_issue_line(diff.translated_rule, diff.diff_type) - displayable_diffs.append((diff, line_en, line_tr)) - - if displayable_diffs: - console.print( - f"\n [magenta]≠[/] [bold]Rule Differences[/] " - f"[[magenta]{len(displayable_diffs)}[/]] [dim](structural differences between en and translation)[/]" + grouped_issues: Dict[str, Dict[str, Any]] = {} + + def add_issue(rule: RuleInfo, issue_type: str, payload: Dict[str, Any]) -> None: + if rule.key not in grouped_issues: + grouped_issues[rule.key] = { + "rule": rule, + "by_type": {}, + } + type_map: Dict[str, List[Dict[str, Any]]] = grouped_issues[rule.key]["by_type"] + type_map.setdefault(issue_type, []).append(payload) + + for rule in result.missing_rules: + add_issue( + rule, + "missing_rule", + {"line_en": rule.line_number}, + ) + + for rule, entries in result.untranslated_text: + for _, text, line in entries: + issue_line = line or rule.line_number + add_issue( + rule, + "untranslated_text", + {"line_tr": issue_line, "text": text}, ) - for diff, line_en, line_tr in displayable_diffs: - print_diff_item(diff, line_en=line_en, line_tr=line_tr, verbose=verbose) - issues += 1 - - if result.extra_rules: - console.print(f"\n [blue]ℹ[/] [bold]Extra Rules[/] [[blue]{len(result.extra_rules)}[/]] [dim](may be intentional)[/]") - for rule in result.extra_rules: - print_rule_item(rule, issue_line=rule.line_number) - issues += 1 + + for diff in result.rule_differences: + if diff.diff_type == "structure": + en_tokens = extract_structure_elements(diff.english_rule.data) + tr_tokens = extract_structure_elements(diff.translated_rule.data) + en_token, tr_token, mismatch_pos = first_structure_mismatch(en_tokens, tr_tokens) + + # Skip reporting when tokens are misaligned (both exist but differ) + # This avoids misleading line numbers when entire blocks are missing/added + if en_token is not None and tr_token is not None and en_token != tr_token: + continue + + line_en = resolve_issue_line(diff.english_rule, "structure", en_token) + line_tr = resolve_issue_line(diff.translated_rule, "structure", tr_token) + # Skip structure diffs where we can't find both tokens + if line_en is None or line_tr is None: + continue + else: + line_en = resolve_issue_line(diff.english_rule, diff.diff_type) + line_tr = resolve_issue_line(diff.translated_rule, diff.diff_type) + + add_issue( + diff.english_rule, + f"rule_difference:{diff.diff_type}", + {"line_en": line_en, "line_tr": line_tr, "diff": diff}, + ) + + for rule in result.extra_rules: + add_issue( + rule, + "extra_rule", + {"line_tr": rule.line_number}, + ) + + if grouped_issues: + total_grouped_issues = sum( + len(entries) + for group in grouped_issues.values() + for entries in group["by_type"].values() + ) + console.print( + f"\n [magenta]≠[/] [bold]Rule Issues[/] " + f"[[magenta]{total_grouped_issues}[/]] [dim](grouped by rule and issue type)[/]" + ) + for group in grouped_issues.values(): + rule = group["rule"] + by_type: Dict[str, List[Dict[str, Any]]] = group["by_type"] + console.print(f" [dim]•[/] {rule_label(rule)}") + for issue_type in sorted(by_type.keys(), key=issue_type_sort_key): + entries = by_type[issue_type] + console.print( + f" [dim]{issue_type_label(issue_type)} " + f"[{len(entries)}][/]" + ) + for entry in entries: + if issue_type == "missing_rule": + console.print( + f" [dim]•[/] [dim](line {entry['line_en']} in English)[/]" + ) + issues += 1 + elif issue_type == "extra_rule": + console.print( + f" [dim]•[/] [dim](line {entry['line_tr']} in translation)[/]" + ) + issues += 1 + elif issue_type == "untranslated_text": + console.print( + f" [dim]•[/] [dim](line {entry['line_tr']} tr)[/] " + f"[yellow]\"{escape(entry['text'])}\"[/]" + ) + issues += 1 + else: + diff: RuleDifference = entry["diff"] + console.print( + f" [dim]•[/] [dim](line {entry['line_en']} en, {entry['line_tr']} tr)[/]" + ) + console.print(f" [dim]{diff.description}[/]") + if verbose: + console.print(f" [green]en:[/] {escape(diff.english_snippet)}") + console.print(f" [red]tr:[/] {escape(diff.translated_snippet)}") + issues += 1 return issues diff --git a/PythonScripts/audit_translations/tests/golden/rich/structure_diff_nonverbose.golden b/PythonScripts/audit_translations/tests/golden/rich/structure_diff_nonverbose.golden index 40fd15c6..314ad234 100644 --- a/PythonScripts/audit_translations/tests/golden/rich/structure_diff_nonverbose.golden +++ b/PythonScripts/audit_translations/tests/golden/rich/structure_diff_nonverbose.golden @@ -4,6 +4,8 @@ English: 1 rules → Translated: 1 rules ──────────────────────────────────────────────────────────────────────────────── - ≠ Rule Differences [1] (structural differences between en and translation) - • struct-rule (line 9 en, 1 tr) - Rule structure differs (test/if/then/else blocks) + ≠ Rule Issues [1] (grouped by rule and issue type) + • struct-rule (mi) + Structure Differences [1] + • (line 9 en, 1 tr) + Rule structure differs (test/if/then/else blocks) diff --git a/PythonScripts/audit_translations/tests/golden/rich/structure_diff_verbose.golden b/PythonScripts/audit_translations/tests/golden/rich/structure_diff_verbose.golden index 3af596bd..ec624426 100644 --- a/PythonScripts/audit_translations/tests/golden/rich/structure_diff_verbose.golden +++ b/PythonScripts/audit_translations/tests/golden/rich/structure_diff_verbose.golden @@ -4,8 +4,10 @@ English: 1 rules → Translated: 1 rules ──────────────────────────────────────────────────────────────────────────────── - ≠ Rule Differences [1] (structural differences between en and translation) - • struct-rule (line 9 en, 1 tr) - Rule structure differs (test/if/then/else blocks) - en: replace: test: if: then: else: - tr: replace: test: if: then: + ≠ Rule Issues [1] (grouped by rule and issue type) + • struct-rule (mi) + Structure Differences [1] + • (line 9 en, 1 tr) + Rule structure differs (test/if/then/else blocks) + en: replace: test: if: then: else: + tr: replace: test: if: then: From f299d7fda6b428724422575206aaaa1048a25d66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Gro=C3=9F?= Date: Tue, 17 Feb 2026 00:09:23 +0100 Subject: [PATCH 2/4] Group rich audit output by rule/type and expand coverage - Render rich warnings as rule groups with per-issue-type subgroups - Add auditor tests for subgroup ordering, mixed issue types, and verbose snippet behavior - Add CLI rich end-to-end grouping checks plus calculus golden snapshot - Improve docstrings and compact test argument setup --- PythonScripts/audit_translations/auditor.py | 10 +- .../golden/rich/cli_calculus_verbose.golden | 68 ++++++++ .../audit_translations/tests/test_auditor.py | 160 ++++++++++++++++++ .../tests/test_cli_end_to_end.py | 105 ++++++++++-- 4 files changed, 326 insertions(+), 17 deletions(-) create mode 100644 PythonScripts/audit_translations/tests/golden/rich/cli_calculus_verbose.golden diff --git a/PythonScripts/audit_translations/auditor.py b/PythonScripts/audit_translations/auditor.py index ef9fae24..0e0547fa 100644 --- a/PythonScripts/audit_translations/auditor.py +++ b/PythonScripts/audit_translations/auditor.py @@ -161,7 +161,9 @@ def issue_type_sort_key(issue_type: str) -> Tuple[int, str]: """ Stable ordering for per-rule issue groups. - Differences are grouped by diff type under the same rule. + The first tuple element defines user-facing priority (missing/untranslated/ + match/condition/variables/structure/extra). The second element keeps sorting + deterministic for unknown keys. """ order = { "missing_rule": 0, @@ -176,6 +178,12 @@ def issue_type_sort_key(issue_type: str) -> Tuple[int, str]: def issue_type_label(issue_type: str) -> str: + """ + Return the display label used in rich grouped output. + + Unknown issue types fall back to their raw key so renderer behavior remains + robust when new categories are introduced. + """ labels = { "missing_rule": "Missing in Translation", "untranslated_text": "Untranslated Text", diff --git a/PythonScripts/audit_translations/tests/golden/rich/cli_calculus_verbose.golden b/PythonScripts/audit_translations/tests/golden/rich/cli_calculus_verbose.golden new file mode 100644 index 00000000..949f58c4 --- /dev/null +++ b/PythonScripts/audit_translations/tests/golden/rich/cli_calculus_verbose.golden @@ -0,0 +1,68 @@ +╭──────────────────────────────────────────────────────────────────────────────╮ +│ MathCAT Translation Audit: ES │ +╰──────────────────────────────────────────────────────────────────────────────╯ + + Comparing against English (en) reference files + Files to check: 1 + +──────────────────────────────────────────────────────────────────────────────── +⚠ SharedRules/calculus.yaml + English: 4 rules → Translated: 3 rules +──────────────────────────────────────────────────────────────────────────────── + + ≠ Rule Issues [13] (grouped by rule and issue type) + • laplacian (laplacian) + Missing in Translation [1] + • (line 4 in English) + • divergence (divergence) + Untranslated Text [3] + • (line 10 tr) "divergence" + • (line 11 tr) "div" + • (line 12 tr) "of" + Match Pattern Differences [1] + • (line 22 en, 6 tr) + Match pattern differs + en: count(*) = 1 + tr: . + Condition Differences [1] + • (line 25 en, 9 tr) + Conditions differ + en: $Verbosity='Terse', not(IsNode(*[1], 'leaf')) + tr: $Verbosity='Verbose', not(IsNode(*[1], 'leaf')) + • curl (curl) + Untranslated Text [1] + • (line 22 tr) "curl of" + Match Pattern Differences [1] + • (line 35 en, 20 tr) + Match pattern differs + en: count(*) = 1 + tr: . + Condition Differences [1] + • (line 39 en, 24 tr) + Conditions differ + en: $Verbosity!='Terse', not(IsNode(*[1], 'leaf')) + tr: not(IsNode(*[1], 'leaf')) + Structure Differences [1] + • (line 38 en, 18 tr) + Rule structure differs (test/if/then/else blocks) + en: replace: test: if: then: test: if: then: + tr: replace: test: if: then: + • gradient (gradient) + Untranslated Text [2] + • (line 34 tr) "gradient of" + • (line 35 tr) "del" + Match Pattern Differences [1] + • (line 48 en, 30 tr) + Match pattern differs + en: count(*) = 1 + tr: . +╭──────────────────────────────────────────────────────────────────────────────╮ +│ SUMMARY │ +│ Files checked 1 │ +│ Files with issues 1 │ +│ Files OK 0 │ +│ Missing rules 1 │ +│ Untranslated text 6 │ +│ Rule differences 6 │ +│ Extra rules 0 │ +╰──────────────────────────────────────────────────────────────────────────────╯ diff --git a/PythonScripts/audit_translations/tests/test_auditor.py b/PythonScripts/audit_translations/tests/test_auditor.py index 8ed22a96..82923311 100644 --- a/PythonScripts/audit_translations/tests/test_auditor.py +++ b/PythonScripts/audit_translations/tests/test_auditor.py @@ -367,3 +367,163 @@ def test_print_warnings_still_shows_missing_else() -> None: # Should report exactly 1 issue (the structure difference) assert issues_count == 1, f"Expected 1 issue but got {issues_count}" + + +def test_print_warnings_groups_multiple_subgroups_for_single_rule(fixed_console_width) -> None: + """ + Ensure one rule can render multiple subgroup types in stable order. + + This covers the new grouped renderer path where a single rule can include + untranslated entries plus multiple diff types. It validates both subgroup + labels and the expected ordering policy. + """ + en = make_rule("grouped-rule", "mi", 10, "en raw") + tr = make_rule("grouped-rule", "mi", 20, "tr raw") + en.line_map = {"match": [11], "condition": [12], "variables": [13]} + tr.line_map = {"match": [21], "condition": [22], "variables": [23]} + + diffs = [ + RuleDifference( + english_rule=en, + translated_rule=tr, + diff_type="match", + description="Match pattern differs", + english_snippet="en-match", + translated_snippet="tr-match", + ), + RuleDifference( + english_rule=en, + translated_rule=tr, + diff_type="condition", + description="Conditions differ", + english_snippet="en-cond", + translated_snippet="tr-cond", + ), + RuleDifference( + english_rule=en, + translated_rule=tr, + diff_type="variables", + description="Variable definitions differ", + english_snippet="en-var", + translated_snippet="tr-var", + ), + ] + result = ComparisonResult( + missing_rules=[], + extra_rules=[], + untranslated_text=[(tr, [("t", "first", 24), ("ct", "second", 25)])], + rule_differences=diffs, + file_path="", + english_rule_count=1, + translated_rule_count=1, + ) + + with console.capture() as capture: + issues_count = print_warnings(result, "grouped.yaml", verbose=False) + output = capture.get() + + assert output.count("• grouped-rule (mi)") == 1 + assert "Untranslated Text [2]" in output + assert "Match Pattern Differences [1]" in output + assert "Condition Differences [1]" in output + assert "Variable Differences [1]" in output + + untranslated_index = output.index("Untranslated Text [2]") + match_index = output.index("Match Pattern Differences [1]") + condition_index = output.index("Condition Differences [1]") + variable_index = output.index("Variable Differences [1]") + assert untranslated_index < match_index < condition_index < variable_index + + assert issues_count == 5 + + +def test_print_warnings_groups_missing_and_extra_by_rule(fixed_console_width) -> None: + """ + Ensure missing, extra, and diff issues are grouped under their own rule headers. + + This verifies grouping across multiple rules: each rule should appear once, + with only its relevant subgroup block(s), and issue counting should remain + aligned with rendered leaf items. + """ + missing = make_rule("missing-rule", "mn", 30, "missing raw") + extra = make_rule("extra-rule", "mo", 40, "extra raw") + en = make_rule("diff-rule", "mrow", 50, "diff en") + tr = make_rule("diff-rule", "mrow", 60, "diff tr") + en.line_map = {"condition": [51]} + tr.line_map = {"condition": [61]} + diff = RuleDifference( + english_rule=en, + translated_rule=tr, + diff_type="condition", + description="Conditions differ", + english_snippet="en-only", + translated_snippet="tr-only", + ) + + result = ComparisonResult( + missing_rules=[missing], + extra_rules=[extra], + untranslated_text=[], + rule_differences=[diff], + file_path="", + english_rule_count=2, + translated_rule_count=2, + ) + + with console.capture() as capture: + issues_count = print_warnings(result, "mixed.yaml", verbose=False) + output = capture.get() + + assert output.count("• missing-rule (mn)") == 1 + assert output.count("• extra-rule (mo)") == 1 + assert output.count("• diff-rule (mrow)") == 1 + assert "Missing in Translation [1]" in output + assert "Extra in Translation [1]" in output + assert "Condition Differences [1]" in output + assert issues_count == 3 + + +def test_print_warnings_verbose_shows_snippets_only_for_differences(fixed_console_width) -> None: + """ + Ensure verbose snippet lines are printed only for rule differences. + + Missing and untranslated groups should not emit en/tr snippet lines in + verbose mode; only diff subgroups should include these details. + """ + missing = make_rule("missing-rule", "mn", 10, "missing raw") + tr_untranslated = make_rule("untranslated-rule", "mi", 20, "untranslated raw") + en = make_rule("diff-rule", "mrow", 30, "diff en") + tr = make_rule("diff-rule", "mrow", 40, "diff tr") + en.line_map = {"match": [31]} + tr.line_map = {"match": [41]} + + diff = RuleDifference( + english_rule=en, + translated_rule=tr, + diff_type="match", + description="Match pattern differs", + english_snippet="en-snippet", + translated_snippet="tr-snippet", + ) + result = ComparisonResult( + missing_rules=[missing], + extra_rules=[], + untranslated_text=[(tr_untranslated, [("t", "leave me", 21)])], + rule_differences=[diff], + file_path="", + english_rule_count=2, + translated_rule_count=2, + ) + + with console.capture() as capture: + issues_count = print_warnings(result, "verbose.yaml", verbose=True) + output = capture.get() + + assert "Missing in Translation [1]" in output + assert "Untranslated Text [1]" in output + assert "Match Pattern Differences [1]" in output + assert output.count("en:") == 1 + assert output.count("tr:") == 1 + assert "en-snippet" in output + assert "tr-snippet" in output + assert issues_count == 3 diff --git a/PythonScripts/audit_translations/tests/test_cli_end_to_end.py b/PythonScripts/audit_translations/tests/test_cli_end_to_end.py index ce1e325c..a0596457 100644 --- a/PythonScripts/audit_translations/tests/test_cli_end_to_end.py +++ b/PythonScripts/audit_translations/tests/test_cli_end_to_end.py @@ -12,6 +12,7 @@ from pathlib import Path from .. import cli as audit_cli +from ..auditor import console def fixture_rules_dir() -> Path: @@ -37,14 +38,7 @@ def test_cli_main_jsonl_output_matches_fixture(capsys, monkeypatch) -> None: This validates argparse wiring and output formatting without spawning a new process. """ - rules_dir = fixture_rules_dir() - args = [ - "es", - "--format", - "jsonl", - "--rules-dir", - str(rules_dir), - ] + args = ["es", "--format", "jsonl", "--rules-dir", str(fixture_rules_dir())] monkeypatch.setattr(sys, "argv", ["audit_translations", *args]) audit_cli.main() @@ -58,14 +52,7 @@ def test_cli_module_jsonl_output_matches_fixture() -> None: This validates module execution, environment wiring, and exit behavior. """ - rules_dir = fixture_rules_dir() - args = [ - "es", - "--format", - "jsonl", - "--rules-dir", - str(rules_dir), - ] + args = ["es", "--format", "jsonl", "--rules-dir", str(fixture_rules_dir())] python_scripts_dir = Path(__file__).resolve().parents[2] env = os.environ.copy() @@ -82,3 +69,89 @@ def test_cli_module_jsonl_output_matches_fixture() -> None: check=True, ) assert_issue_counts(parse_jsonl(result.stdout)) + + +def test_cli_main_rich_output_groups_by_rule_and_type(capsys, monkeypatch) -> None: + """ + Ensure rich CLI output is grouped by rule and subgrouped by issue type. + + This is a behavioral assertion test (not snapshot-based): it checks that + core grouping markers and subgroup ordering are visible in user-facing CLI + output for a representative fixture file. + """ + args = ["es", "--rules-dir", str(fixture_rules_dir()), "--file", "SharedRules/calculus.yaml", "--verbose"] + + old_width = console.width + console.width = 80 + try: + monkeypatch.setattr(sys, "argv", ["audit_translations", *args]) + audit_cli.main() + output = capsys.readouterr().out + finally: + console.width = old_width + + assert "≠ Rule Issues [13] (grouped by rule and issue type)" in output + assert "• divergence (divergence)" in output + assert "Untranslated Text [3]" in output + assert "Match Pattern Differences [1]" in output + assert "Condition Differences [1]" in output + + untranslated_idx = output.index("Untranslated Text [3]") + match_idx = output.index("Match Pattern Differences [1]") + condition_idx = output.index("Condition Differences [1]") + assert untranslated_idx < match_idx < condition_idx + + +def test_cli_main_rich_output_matches_grouped_golden(capsys, monkeypatch) -> None: + """ + Ensure rich CLI grouped rendering stays stable for a multi-rule fixture. + + The golden file captures overall visual layout so formatting regressions in + grouped sections are caught even when functional issue counts stay the same. + """ + args = ["es", "--rules-dir", str(fixture_rules_dir()), "--file", "SharedRules/calculus.yaml", "--verbose"] + golden_path = Path(__file__).resolve().parent / "golden" / "rich" / "cli_calculus_verbose.golden" + + old_width = console.width + console.width = 80 + + try: + monkeypatch.setattr(sys, "argv", ["audit_translations", *args]) + audit_cli.main() + output = capsys.readouterr().out + finally: + console.width = old_width + + assert output == golden_path.read_text(encoding="utf-8") + + +def test_cli_module_rich_output_groups_by_rule_and_type() -> None: + """ + Ensure `python -m audit_translations` rich output also shows grouped sections. + + This complements the in-process CLI test by validating module execution in + a subprocess with environment wiring and terminal width constraints. + """ + args = ["es", "--rules-dir", str(fixture_rules_dir()), "--file", "SharedRules/calculus.yaml", "--verbose"] + + python_scripts_dir = Path(__file__).resolve().parents[2] + env = os.environ.copy() + env["PYTHONPATH"] = os.pathsep.join( + [str(python_scripts_dir), env.get("PYTHONPATH", "")] + ).strip(os.pathsep) + env["COLUMNS"] = "80" + + result = subprocess.run( + [sys.executable, "-m", "audit_translations", *args], + capture_output=True, + text=True, + cwd=str(python_scripts_dir), + env=env, + check=True, + ) + + output = result.stdout + assert "≠ Rule Issues [13] (grouped by rule and issue type)" in output + assert "• laplacian (laplacian)" in output + assert "• divergence (divergence)" in output + assert "Structure Differences [1]" in output From 87e7794a878ff22ddeef3b5821fd1a94ae5d6206 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Gro=C3=9F?= Date: Tue, 17 Feb 2026 01:27:00 +0100 Subject: [PATCH 3/4] Improve --only CLI coverage and fix rich Path filename crash --- PythonScripts/audit_translations/README.md | 3 +- PythonScripts/audit_translations/auditor.py | 3 +- .../tests/test_cli_end_to_end.py | 98 +++++++++++++++++++ 3 files changed, 101 insertions(+), 3 deletions(-) diff --git a/PythonScripts/audit_translations/README.md b/PythonScripts/audit_translations/README.md index 7104cc74..ef084922 100644 --- a/PythonScripts/audit_translations/README.md +++ b/PythonScripts/audit_translations/README.md @@ -56,7 +56,6 @@ The tool automatically adjusts its matching logic based on the file type: **Syntax:** ```bash -# Preferred: console script (no -m needed) uv run audit-translations [--file ] uv run audit-translations --list @@ -111,4 +110,4 @@ uv run --project PythonScripts audit-translations --list ### Testing -```uv run python -m pytest``` +```uv run pytest``` diff --git a/PythonScripts/audit_translations/auditor.py b/PythonScripts/audit_translations/auditor.py index 0e0547fa..b26a71fa 100644 --- a/PythonScripts/audit_translations/auditor.py +++ b/PythonScripts/audit_translations/auditor.py @@ -395,6 +395,7 @@ def write(self, issue: dict) -> None: def print_warnings(result: ComparisonResult, file_name: str, verbose: bool = False) -> int: """Print warnings to console. Returns count of issues found.""" issues = 0 + display_name = Path(file_name).as_posix() has_issues = result.missing_rules or result.untranslated_text or result.extra_rules or result.rule_differences if not has_issues: @@ -404,7 +405,7 @@ def print_warnings(result: ComparisonResult, file_name: str, verbose: bool = Fal ("red", "✗") if result.translated_rule_count == 0 else ("yellow", "⚠") console.print() console.rule(style="cyan") - console.print(f"[{style}]{icon}[/] [bold]{escape(file_name)}[/]") + console.print(f"[{style}]{icon}[/] [bold]{escape(display_name)}[/]") console.print(f" [dim]English: {result.english_rule_count} rules → Translated: {result.translated_rule_count} rules[/]") console.rule(style="cyan") diff --git a/PythonScripts/audit_translations/tests/test_cli_end_to_end.py b/PythonScripts/audit_translations/tests/test_cli_end_to_end.py index a0596457..b9b49790 100644 --- a/PythonScripts/audit_translations/tests/test_cli_end_to_end.py +++ b/PythonScripts/audit_translations/tests/test_cli_end_to_end.py @@ -11,6 +11,8 @@ from collections import Counter from pathlib import Path +import pytest + from .. import cli as audit_cli from ..auditor import console @@ -71,6 +73,52 @@ def test_cli_module_jsonl_output_matches_fixture() -> None: assert_issue_counts(parse_jsonl(result.stdout)) +def test_cli_main_jsonl_only_filters_issue_types(capsys, monkeypatch) -> None: + """ + Ensure --only limits JSONL output to the requested categories. + + Uses in-process CLI invocation so argparse parsing and filter plumbing + are both exercised without subprocess overhead. + """ + args = ["es", "--format", "jsonl", "--rules-dir", str(fixture_rules_dir()), "--only", "missing,extra"] + + monkeypatch.setattr(sys, "argv", ["audit_translations", *args]) + audit_cli.main() + issues = parse_jsonl(capsys.readouterr().out) + + counts = Counter(issue["issue_type"] for issue in issues) + assert set(counts) == {"missing_rule", "extra_rule"} + assert counts["missing_rule"] == 4 + assert counts["extra_rule"] == 3 + + +def test_cli_main_rich_only_filters_issue_groups(capsys, monkeypatch) -> None: + """ + Ensure --only also filters visible rich subgroup sections. + + We expect missing/extra groups to remain while untranslated and all diff + subgroup labels are omitted from the rendered output. + """ + args = ["es", "--rules-dir", str(fixture_rules_dir()), "--only", "missing,extra"] + + old_width = console.width + console.width = 80 + try: + monkeypatch.setattr(sys, "argv", ["audit_translations", *args]) + audit_cli.main() + output = capsys.readouterr().out + finally: + console.width = old_width + + assert "Missing in Translation" in output + assert "Extra in Translation" in output + assert "Untranslated Text" not in output + assert "Match Pattern Differences" not in output + assert "Condition Differences" not in output + assert "Variable Differences" not in output + assert "Structure Differences" not in output + + def test_cli_main_rich_output_groups_by_rule_and_type(capsys, monkeypatch) -> None: """ Ensure rich CLI output is grouped by rule and subgrouped by issue type. @@ -125,6 +173,56 @@ def test_cli_main_rich_output_matches_grouped_golden(capsys, monkeypatch) -> Non assert output == golden_path.read_text(encoding="utf-8") +def test_cli_main_requires_language_or_list(capsys, monkeypatch) -> None: + """ + Ensure CLI exits with a clear error when neither language nor --list is set. + + This protects the expected help/error UX for accidental empty invocations. + """ + monkeypatch.setattr(sys, "argv", ["audit_translations"]) + + with pytest.raises(SystemExit) as exc: + audit_cli.main() + output = capsys.readouterr().out + + assert exc.value.code == 1 + assert "Please specify a language code or use --list" in output + + +def test_cli_main_rejects_unknown_only_token(capsys, monkeypatch) -> None: + """ + Ensure unsupported --only tokens are rejected before audit execution. + + This keeps filter behavior explicit and prevents silently ignored typos. + """ + args = ["es", "--rules-dir", str(fixture_rules_dir()), "--only", "missing,bogus"] + monkeypatch.setattr(sys, "argv", ["audit_translations", *args]) + + with pytest.raises(SystemExit) as exc: + audit_cli.main() + output = capsys.readouterr().out + + assert exc.value.code == 1 + assert "Unknown issue types: bogus" in output + + +def test_cli_main_reports_missing_region_directory(capsys, monkeypatch) -> None: + """ + Ensure region variants fail fast when the requested subdirectory is absent. + + This validates the error path for languages like es-mx when only es exists. + """ + args = ["es-mx", "--rules-dir", str(fixture_rules_dir())] + monkeypatch.setattr(sys, "argv", ["audit_translations", *args]) + + with pytest.raises(SystemExit) as exc: + audit_cli.main() + output = capsys.readouterr().out + + assert exc.value.code == 1 + assert "Region directory not found" in output + + def test_cli_module_rich_output_groups_by_rule_and_type() -> None: """ Ensure `python -m audit_translations` rich output also shows grouped sections. From ea58cdff0e243532c9e330c7e4a0f09ce48244de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Gro=C3=9F?= Date: Tue, 17 Feb 2026 01:34:05 +0100 Subject: [PATCH 4/4] Test that audit-ignore suppresses untranslated and diff findings in compare_files --- .../audit_translations/tests/test_auditor.py | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/PythonScripts/audit_translations/tests/test_auditor.py b/PythonScripts/audit_translations/tests/test_auditor.py index 82923311..d757f5b4 100644 --- a/PythonScripts/audit_translations/tests/test_auditor.py +++ b/PythonScripts/audit_translations/tests/test_auditor.py @@ -149,6 +149,45 @@ def test_compare_files_merges_region_rules(tmp_path) -> None: assert result.extra_rules == [] +def test_compare_files_skips_untranslated_and_diffs_when_audit_ignored(tmp_path) -> None: + """ + Ensure audit-ignore suppresses untranslated and diff findings for a rule. + + The translated rule intentionally contains both a lowercase text key and a + match mismatch. With an audit-ignore marker present, neither should be + surfaced by compare_files. + """ + english_file = tmp_path / "en.yaml" + translated_file = tmp_path / "de.yaml" + + english_file.write_text( + """- name: ignored-rule + tag: mo + match: "self::m:mo" + replace: + - T: "english" +""", + encoding="utf-8", + ) + translated_file.write_text( + """- name: ignored-rule + tag: mo # audit-ignore + match: "self::m:mi" + replace: + - t: "nicht uebersetzt" +""", + encoding="utf-8", + ) + + result = compare_files(str(english_file), str(translated_file)) + + assert result.missing_rules == [] + assert result.extra_rules == [] + assert result.untranslated_text == [] + assert result.rule_differences == [] + assert collect_issues(result, "de.yaml", "de") == [] + + def test_get_yaml_files_includes_region(tmp_path) -> None: """ Ensures get_yaml_files merges base and region file lists.