From d40ef9e03a8dd96f6dd2487e5acb1c1448ee1eca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Moritz=20Gro=C3=9F?= <hi@mgross.dev>
Date: Mon, 16 Feb 2026 23:42:15 +0100
Subject: [PATCH 1/4] group output by rule

---
 PythonScripts/audit_translations/auditor.py   | 205 ++++++++++++------
 .../rich/structure_diff_nonverbose.golden     |   8 +-
 .../golden/rich/structure_diff_verbose.golden |  12 +-
 3 files changed, 148 insertions(+), 77 deletions(-)

diff --git a/PythonScripts/audit_translations/auditor.py b/PythonScripts/audit_translations/auditor.py
index 593475f6..ef9fae24 100644
--- a/PythonScripts/audit_translations/auditor.py
+++ b/PythonScripts/audit_translations/auditor.py
@@ -9,7 +9,7 @@
 import os
 import sys
 from pathlib import Path
-from typing import Iterable, List, Optional, TextIO, Tuple
+from typing import Any, Dict, Iterable, List, Optional, TextIO, Tuple
 
 from rich.console import Console
 from rich.markup import escape
@@ -154,24 +154,38 @@ def rule_label(rule: RuleInfo) -> str:
     if rule.name is None:
         return f"[yellow]\"{escape(rule.key)}\"[/]"
     tag = rule.tag or "unknown"
-    return f"[cyan]{escape(rule.name)}[/] [dim][{escape(tag)}][/]"
+    return f"[cyan]{escape(rule.name)}[/] [dim]({escape(tag)})[/]"
 
 
-def print_rule_item(rule: RuleInfo, issue_line: int, context: str = ""):
-    console.print(f"      [dim]•[/] {rule_label(rule)} [dim](line {issue_line}{context})[/]")
-
+def issue_type_sort_key(issue_type: str) -> Tuple[int, str]:
+    """
+    Stable ordering for per-rule issue groups.
 
-def print_diff_item(diff: RuleDifference, line_en: int, line_tr: int, verbose: bool = False):
-    """Print a single rule difference"""
-    rule = diff.english_rule
-    console.print(
-        f"      [dim]•[/] {rule_label(rule)} "
-        f"[dim](line {line_en} en, {line_tr} tr)[/]"
-    )
-    console.print(f"          [dim]{diff.description}[/]")
-    if verbose:
-        console.print(f"          [green]en:[/] {escape(diff.english_snippet)}")
-        console.print(f"          [red]tr:[/] {escape(diff.translated_snippet)}")
+    Differences are grouped by diff type under the same rule.
+    """
+    order = {
+        "missing_rule": 0,
+        "untranslated_text": 1,
+        "rule_difference:match": 2,
+        "rule_difference:condition": 3,
+        "rule_difference:variables": 4,
+        "rule_difference:structure": 5,
+        "extra_rule": 6,
+    }
+    return order.get(issue_type, 99), issue_type
+
+
+def issue_type_label(issue_type: str) -> str:
+    labels = {
+        "missing_rule": "Missing in Translation",
+        "untranslated_text": "Untranslated Text",
+        "rule_difference:match": "Match Pattern Differences",
+        "rule_difference:condition": "Condition Differences",
+        "rule_difference:variables": "Variable Differences",
+        "rule_difference:structure": "Structure Differences",
+        "extra_rule": "Extra in Translation",
+    }
+    return labels.get(issue_type, issue_type)
 
 
 def issue_base(rule: RuleInfo, file_name: str, language: str) -> dict:
@@ -386,60 +400,113 @@ def print_warnings(result: ComparisonResult, file_name: str, verbose: bool = Fal
     console.print(f"  [dim]English: {result.english_rule_count} rules  →  Translated: {result.translated_rule_count} rules[/]")
     console.rule(style="cyan")
 
-    if result.missing_rules:
-        console.print(f"\n  [red]✗[/] [bold]Missing Rules[/] [[red]{len(result.missing_rules)}[/]] [dim](in English but not in translation)[/]")
-        for rule in result.missing_rules:
-            print_rule_item(rule, issue_line=rule.line_number, context=" in English")
-            issues += 1
-
-    if result.untranslated_text:
-        untranslated_count = sum(len(entries) for _, entries in result.untranslated_text)
-        console.print(f"\n  [yellow]⚠[/] [bold]Untranslated Text[/] [[yellow]{untranslated_count}[/]] [dim](lowercase t/ot/ct keys)[/]")
-        for rule, entries in result.untranslated_text:
-            for _, text, line in entries:
-                issue_line = line or rule.line_number
-                print_rule_item(rule, issue_line=issue_line)
-                console.print(f"          [dim]→[/] [yellow]\"{escape(text)}\"[/]")
-                issues += 1
-
-    if result.rule_differences:
-        # Count only diffs that will actually be displayed
-        displayable_diffs = []
-        for diff in result.rule_differences:
-            if diff.diff_type == "structure":
-                en_tokens = extract_structure_elements(diff.english_rule.data)
-                tr_tokens = extract_structure_elements(diff.translated_rule.data)
-                en_token, tr_token, mismatch_pos = first_structure_mismatch(en_tokens, tr_tokens)
-
-                # Skip reporting when tokens are misaligned (both exist but differ)
-                # This avoids misleading line numbers when entire blocks are missing/added
-                if en_token is not None and tr_token is not None and en_token != tr_token:
-                    continue
-
-                line_en = resolve_issue_line(diff.english_rule, "structure", en_token)
-                line_tr = resolve_issue_line(diff.translated_rule, "structure", tr_token)
-                # Skip structure diffs where we can't find both tokens
-                if line_en is None or line_tr is None:
-                    continue
-            else:
-                line_en = resolve_issue_line(diff.english_rule, diff.diff_type)
-                line_tr = resolve_issue_line(diff.translated_rule, diff.diff_type)
-            displayable_diffs.append((diff, line_en, line_tr))
-
-        if displayable_diffs:
-            console.print(
-                f"\n  [magenta]≠[/] [bold]Rule Differences[/] "
-                f"[[magenta]{len(displayable_diffs)}[/]] [dim](structural differences between en and translation)[/]"
+    grouped_issues: Dict[str, Dict[str, Any]] = {}
+
+    def add_issue(rule: RuleInfo, issue_type: str, payload: Dict[str, Any]) -> None:
+        if rule.key not in grouped_issues:
+            grouped_issues[rule.key] = {
+                "rule": rule,
+                "by_type": {},
+            }
+        type_map: Dict[str, List[Dict[str, Any]]] = grouped_issues[rule.key]["by_type"]
+        type_map.setdefault(issue_type, []).append(payload)
+
+    for rule in result.missing_rules:
+        add_issue(
+            rule,
+            "missing_rule",
+            {"line_en": rule.line_number},
+        )
+
+    for rule, entries in result.untranslated_text:
+        for _, text, line in entries:
+            issue_line = line or rule.line_number
+            add_issue(
+                rule,
+                "untranslated_text",
+                {"line_tr": issue_line, "text": text},
             )
-            for diff, line_en, line_tr in displayable_diffs:
-                print_diff_item(diff, line_en=line_en, line_tr=line_tr, verbose=verbose)
-                issues += 1
-
-    if result.extra_rules:
-        console.print(f"\n  [blue]ℹ[/] [bold]Extra Rules[/] [[blue]{len(result.extra_rules)}[/]] [dim](may be intentional)[/]")
-        for rule in result.extra_rules:
-            print_rule_item(rule, issue_line=rule.line_number)
-            issues += 1
+
+    for diff in result.rule_differences:
+        if diff.diff_type == "structure":
+            en_tokens = extract_structure_elements(diff.english_rule.data)
+            tr_tokens = extract_structure_elements(diff.translated_rule.data)
+            en_token, tr_token, mismatch_pos = first_structure_mismatch(en_tokens, tr_tokens)
+
+            # Skip reporting when tokens are misaligned (both exist but differ)
+            # This avoids misleading line numbers when entire blocks are missing/added
+            if en_token is not None and tr_token is not None and en_token != tr_token:
+                continue
+
+            line_en = resolve_issue_line(diff.english_rule, "structure", en_token)
+            line_tr = resolve_issue_line(diff.translated_rule, "structure", tr_token)
+            # Skip structure diffs where we can't find both tokens
+            if line_en is None or line_tr is None:
+                continue
+        else:
+            line_en = resolve_issue_line(diff.english_rule, diff.diff_type)
+            line_tr = resolve_issue_line(diff.translated_rule, diff.diff_type)
+
+        add_issue(
+            diff.english_rule,
+            f"rule_difference:{diff.diff_type}",
+            {"line_en": line_en, "line_tr": line_tr, "diff": diff},
+        )
+
+    for rule in result.extra_rules:
+        add_issue(
+            rule,
+            "extra_rule",
+            {"line_tr": rule.line_number},
+        )
+
+    if grouped_issues:
+        total_grouped_issues = sum(
+            len(entries)
+            for group in grouped_issues.values()
+            for entries in group["by_type"].values()
+        )
+        console.print(
+            f"\n  [magenta]≠[/] [bold]Rule Issues[/] "
+            f"[[magenta]{total_grouped_issues}[/]] [dim](grouped by rule and issue type)[/]"
+        )
+        for group in grouped_issues.values():
+            rule = group["rule"]
+            by_type: Dict[str, List[Dict[str, Any]]] = group["by_type"]
+            console.print(f"      [dim]•[/] {rule_label(rule)}")
+            for issue_type in sorted(by_type.keys(), key=issue_type_sort_key):
+                entries = by_type[issue_type]
+                console.print(
+                    f"          [dim]{issue_type_label(issue_type)} "
+                    f"[{len(entries)}][/]"
+                )
+                for entry in entries:
+                    if issue_type == "missing_rule":
+                        console.print(
+                            f"              [dim]•[/] [dim](line {entry['line_en']} in English)[/]"
+                        )
+                        issues += 1
+                    elif issue_type == "extra_rule":
+                        console.print(
+                            f"              [dim]•[/] [dim](line {entry['line_tr']} in translation)[/]"
+                        )
+                        issues += 1
+                    elif issue_type == "untranslated_text":
+                        console.print(
+                            f"              [dim]•[/] [dim](line {entry['line_tr']} tr)[/] "
+                            f"[yellow]\"{escape(entry['text'])}\"[/]"
+                        )
+                        issues += 1
+                    else:
+                        diff: RuleDifference = entry["diff"]
+                        console.print(
+                            f"              [dim]•[/] [dim](line {entry['line_en']} en, {entry['line_tr']} tr)[/]"
+                        )
+                        console.print(f"                  [dim]{diff.description}[/]")
+                        if verbose:
+                            console.print(f"                  [green]en:[/] {escape(diff.english_snippet)}")
+                            console.print(f"                  [red]tr:[/] {escape(diff.translated_snippet)}")
+                        issues += 1
 
     return issues
 
diff --git a/PythonScripts/audit_translations/tests/golden/rich/structure_diff_nonverbose.golden b/PythonScripts/audit_translations/tests/golden/rich/structure_diff_nonverbose.golden
index 40fd15c6..314ad234 100644
--- a/PythonScripts/audit_translations/tests/golden/rich/structure_diff_nonverbose.golden
+++ b/PythonScripts/audit_translations/tests/golden/rich/structure_diff_nonverbose.golden
@@ -4,6 +4,8 @@
   English: 1 rules  →  Translated: 1 rules
 ────────────────────────────────────────────────────────────────────────────────
 
-  ≠ Rule Differences [1] (structural differences between en and translation)
-      • struct-rule  (line 9 en, 1 tr)
-          Rule structure differs (test/if/then/else blocks)
+  ≠ Rule Issues [1] (grouped by rule and issue type)
+      • struct-rule (mi)
+          Structure Differences [1]
+              • (line 9 en, 1 tr)
+                  Rule structure differs (test/if/then/else blocks)
diff --git a/PythonScripts/audit_translations/tests/golden/rich/structure_diff_verbose.golden b/PythonScripts/audit_translations/tests/golden/rich/structure_diff_verbose.golden
index 3af596bd..ec624426 100644
--- a/PythonScripts/audit_translations/tests/golden/rich/structure_diff_verbose.golden
+++ b/PythonScripts/audit_translations/tests/golden/rich/structure_diff_verbose.golden
@@ -4,8 +4,10 @@
   English: 1 rules  →  Translated: 1 rules
 ────────────────────────────────────────────────────────────────────────────────
 
-  ≠ Rule Differences [1] (structural differences between en and translation)
-      • struct-rule  (line 9 en, 1 tr)
-          Rule structure differs (test/if/then/else blocks)
-          en: replace: test: if: then: else:
-          tr: replace: test: if: then:
+  ≠ Rule Issues [1] (grouped by rule and issue type)
+      • struct-rule (mi)
+          Structure Differences [1]
+              • (line 9 en, 1 tr)
+                  Rule structure differs (test/if/then/else blocks)
+                  en: replace: test: if: then: else:
+                  tr: replace: test: if: then:

From f299d7fda6b428724422575206aaaa1048a25d66 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Moritz=20Gro=C3=9F?= <hi@mgross.dev>
Date: Tue, 17 Feb 2026 00:09:23 +0100
Subject: [PATCH 2/4] Group rich audit output by rule/type and expand coverage

- Render rich warnings as rule groups with per-issue-type subgroups

- Add auditor tests for subgroup ordering, mixed issue types, and verbose snippet behavior

- Add CLI rich end-to-end grouping checks plus calculus golden snapshot

- Improve docstrings and compact test argument setup
---
 PythonScripts/audit_translations/auditor.py   |  10 +-
 .../golden/rich/cli_calculus_verbose.golden   |  68 ++++++++
 .../audit_translations/tests/test_auditor.py  | 160 ++++++++++++++++++
 .../tests/test_cli_end_to_end.py              | 105 ++++++++++--
 4 files changed, 326 insertions(+), 17 deletions(-)
 create mode 100644 PythonScripts/audit_translations/tests/golden/rich/cli_calculus_verbose.golden

diff --git a/PythonScripts/audit_translations/auditor.py b/PythonScripts/audit_translations/auditor.py
index ef9fae24..0e0547fa 100644
--- a/PythonScripts/audit_translations/auditor.py
+++ b/PythonScripts/audit_translations/auditor.py
@@ -161,7 +161,9 @@ def issue_type_sort_key(issue_type: str) -> Tuple[int, str]:
     """
     Stable ordering for per-rule issue groups.
 
-    Differences are grouped by diff type under the same rule.
+    The first tuple element defines user-facing priority (missing/untranslated/
+    match/condition/variables/structure/extra). The second element keeps sorting
+    deterministic for unknown keys.
     """
     order = {
         "missing_rule": 0,
@@ -176,6 +178,12 @@ def issue_type_sort_key(issue_type: str) -> Tuple[int, str]:
 
 
 def issue_type_label(issue_type: str) -> str:
+    """
+    Return the display label used in rich grouped output.
+
+    Unknown issue types fall back to their raw key so renderer behavior remains
+    robust when new categories are introduced.
+    """
     labels = {
         "missing_rule": "Missing in Translation",
         "untranslated_text": "Untranslated Text",
diff --git a/PythonScripts/audit_translations/tests/golden/rich/cli_calculus_verbose.golden b/PythonScripts/audit_translations/tests/golden/rich/cli_calculus_verbose.golden
new file mode 100644
index 00000000..949f58c4
--- /dev/null
+++ b/PythonScripts/audit_translations/tests/golden/rich/cli_calculus_verbose.golden
@@ -0,0 +1,68 @@
+╭──────────────────────────────────────────────────────────────────────────────╮
+│ MathCAT Translation Audit: ES                                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+
+  Comparing against English (en) reference files
+  Files to check: 1
+
+────────────────────────────────────────────────────────────────────────────────
+⚠ SharedRules/calculus.yaml
+  English: 4 rules  →  Translated: 3 rules
+────────────────────────────────────────────────────────────────────────────────
+
+  ≠ Rule Issues [13] (grouped by rule and issue type)
+      • laplacian (laplacian)
+          Missing in Translation [1]
+              • (line 4 in English)
+      • divergence (divergence)
+          Untranslated Text [3]
+              • (line 10 tr) "divergence"
+              • (line 11 tr) "div"
+              • (line 12 tr) "of"
+          Match Pattern Differences [1]
+              • (line 22 en, 6 tr)
+                  Match pattern differs
+                  en: count(*) = 1
+                  tr: .
+          Condition Differences [1]
+              • (line 25 en, 9 tr)
+                  Conditions differ
+                  en: $Verbosity='Terse', not(IsNode(*[1], 'leaf'))
+                  tr: $Verbosity='Verbose', not(IsNode(*[1], 'leaf'))
+      • curl (curl)
+          Untranslated Text [1]
+              • (line 22 tr) "curl of"
+          Match Pattern Differences [1]
+              • (line 35 en, 20 tr)
+                  Match pattern differs
+                  en: count(*) = 1
+                  tr: .
+          Condition Differences [1]
+              • (line 39 en, 24 tr)
+                  Conditions differ
+                  en: $Verbosity!='Terse', not(IsNode(*[1], 'leaf'))
+                  tr: not(IsNode(*[1], 'leaf'))
+          Structure Differences [1]
+              • (line 38 en, 18 tr)
+                  Rule structure differs (test/if/then/else blocks)
+                  en: replace: test: if: then: test: if: then:
+                  tr: replace: test: if: then:
+      • gradient (gradient)
+          Untranslated Text [2]
+              • (line 34 tr) "gradient of"
+              • (line 35 tr) "del"
+          Match Pattern Differences [1]
+              • (line 48 en, 30 tr)
+                  Match pattern differs
+                  en: count(*) = 1
+                  tr: .
+╭──────────────────────────────────────────────────────────────────────────────╮
+│                 SUMMARY                                                      │
+│   Files checked                     1                                        │
+│   Files with issues                 1                                        │
+│   Files OK                          0                                        │
+│   Missing rules                     1                                        │
+│   Untranslated text                 6                                        │
+│   Rule differences                  6                                        │
+│   Extra rules                       0                                        │
+╰──────────────────────────────────────────────────────────────────────────────╯
diff --git a/PythonScripts/audit_translations/tests/test_auditor.py b/PythonScripts/audit_translations/tests/test_auditor.py
index 8ed22a96..82923311 100644
--- a/PythonScripts/audit_translations/tests/test_auditor.py
+++ b/PythonScripts/audit_translations/tests/test_auditor.py
@@ -367,3 +367,163 @@ def test_print_warnings_still_shows_missing_else() -> None:
 
     # Should report exactly 1 issue (the structure difference)
     assert issues_count == 1, f"Expected 1 issue but got {issues_count}"
+
+
+def test_print_warnings_groups_multiple_subgroups_for_single_rule(fixed_console_width) -> None:
+    """
+    Ensure one rule can render multiple subgroup types in stable order.
+
+    This covers the new grouped renderer path where a single rule can include
+    untranslated entries plus multiple diff types. It validates both subgroup
+    labels and the expected ordering policy.
+    """
+    en = make_rule("grouped-rule", "mi", 10, "en raw")
+    tr = make_rule("grouped-rule", "mi", 20, "tr raw")
+    en.line_map = {"match": [11], "condition": [12], "variables": [13]}
+    tr.line_map = {"match": [21], "condition": [22], "variables": [23]}
+
+    diffs = [
+        RuleDifference(
+            english_rule=en,
+            translated_rule=tr,
+            diff_type="match",
+            description="Match pattern differs",
+            english_snippet="en-match",
+            translated_snippet="tr-match",
+        ),
+        RuleDifference(
+            english_rule=en,
+            translated_rule=tr,
+            diff_type="condition",
+            description="Conditions differ",
+            english_snippet="en-cond",
+            translated_snippet="tr-cond",
+        ),
+        RuleDifference(
+            english_rule=en,
+            translated_rule=tr,
+            diff_type="variables",
+            description="Variable definitions differ",
+            english_snippet="en-var",
+            translated_snippet="tr-var",
+        ),
+    ]
+    result = ComparisonResult(
+        missing_rules=[],
+        extra_rules=[],
+        untranslated_text=[(tr, [("t", "first", 24), ("ct", "second", 25)])],
+        rule_differences=diffs,
+        file_path="",
+        english_rule_count=1,
+        translated_rule_count=1,
+    )
+
+    with console.capture() as capture:
+        issues_count = print_warnings(result, "grouped.yaml", verbose=False)
+    output = capture.get()
+
+    assert output.count("• grouped-rule (mi)") == 1
+    assert "Untranslated Text [2]" in output
+    assert "Match Pattern Differences [1]" in output
+    assert "Condition Differences [1]" in output
+    assert "Variable Differences [1]" in output
+
+    untranslated_index = output.index("Untranslated Text [2]")
+    match_index = output.index("Match Pattern Differences [1]")
+    condition_index = output.index("Condition Differences [1]")
+    variable_index = output.index("Variable Differences [1]")
+    assert untranslated_index < match_index < condition_index < variable_index
+
+    assert issues_count == 5
+
+
+def test_print_warnings_groups_missing_and_extra_by_rule(fixed_console_width) -> None:
+    """
+    Ensure missing, extra, and diff issues are grouped under their own rule headers.
+
+    This verifies grouping across multiple rules: each rule should appear once,
+    with only its relevant subgroup block(s), and issue counting should remain
+    aligned with rendered leaf items.
+    """
+    missing = make_rule("missing-rule", "mn", 30, "missing raw")
+    extra = make_rule("extra-rule", "mo", 40, "extra raw")
+    en = make_rule("diff-rule", "mrow", 50, "diff en")
+    tr = make_rule("diff-rule", "mrow", 60, "diff tr")
+    en.line_map = {"condition": [51]}
+    tr.line_map = {"condition": [61]}
+    diff = RuleDifference(
+        english_rule=en,
+        translated_rule=tr,
+        diff_type="condition",
+        description="Conditions differ",
+        english_snippet="en-only",
+        translated_snippet="tr-only",
+    )
+
+    result = ComparisonResult(
+        missing_rules=[missing],
+        extra_rules=[extra],
+        untranslated_text=[],
+        rule_differences=[diff],
+        file_path="",
+        english_rule_count=2,
+        translated_rule_count=2,
+    )
+
+    with console.capture() as capture:
+        issues_count = print_warnings(result, "mixed.yaml", verbose=False)
+    output = capture.get()
+
+    assert output.count("• missing-rule (mn)") == 1
+    assert output.count("• extra-rule (mo)") == 1
+    assert output.count("• diff-rule (mrow)") == 1
+    assert "Missing in Translation [1]" in output
+    assert "Extra in Translation [1]" in output
+    assert "Condition Differences [1]" in output
+    assert issues_count == 3
+
+
+def test_print_warnings_verbose_shows_snippets_only_for_differences(fixed_console_width) -> None:
+    """
+    Ensure verbose snippet lines are printed only for rule differences.
+
+    Missing and untranslated groups should not emit en/tr snippet lines in
+    verbose mode; only diff subgroups should include these details.
+    """
+    missing = make_rule("missing-rule", "mn", 10, "missing raw")
+    tr_untranslated = make_rule("untranslated-rule", "mi", 20, "untranslated raw")
+    en = make_rule("diff-rule", "mrow", 30, "diff en")
+    tr = make_rule("diff-rule", "mrow", 40, "diff tr")
+    en.line_map = {"match": [31]}
+    tr.line_map = {"match": [41]}
+
+    diff = RuleDifference(
+        english_rule=en,
+        translated_rule=tr,
+        diff_type="match",
+        description="Match pattern differs",
+        english_snippet="en-snippet",
+        translated_snippet="tr-snippet",
+    )
+    result = ComparisonResult(
+        missing_rules=[missing],
+        extra_rules=[],
+        untranslated_text=[(tr_untranslated, [("t", "leave me", 21)])],
+        rule_differences=[diff],
+        file_path="",
+        english_rule_count=2,
+        translated_rule_count=2,
+    )
+
+    with console.capture() as capture:
+        issues_count = print_warnings(result, "verbose.yaml", verbose=True)
+    output = capture.get()
+
+    assert "Missing in Translation [1]" in output
+    assert "Untranslated Text [1]" in output
+    assert "Match Pattern Differences [1]" in output
+    assert output.count("en:") == 1
+    assert output.count("tr:") == 1
+    assert "en-snippet" in output
+    assert "tr-snippet" in output
+    assert issues_count == 3
diff --git a/PythonScripts/audit_translations/tests/test_cli_end_to_end.py b/PythonScripts/audit_translations/tests/test_cli_end_to_end.py
index ce1e325c..a0596457 100644
--- a/PythonScripts/audit_translations/tests/test_cli_end_to_end.py
+++ b/PythonScripts/audit_translations/tests/test_cli_end_to_end.py
@@ -12,6 +12,7 @@
 from pathlib import Path
 
 from .. import cli as audit_cli
+from ..auditor import console
 
 
 def fixture_rules_dir() -> Path:
@@ -37,14 +38,7 @@ def test_cli_main_jsonl_output_matches_fixture(capsys, monkeypatch) -> None:
 
     This validates argparse wiring and output formatting without spawning a new process.
     """
-    rules_dir = fixture_rules_dir()
-    args = [
-        "es",
-        "--format",
-        "jsonl",
-        "--rules-dir",
-        str(rules_dir),
-    ]
+    args = ["es", "--format", "jsonl", "--rules-dir", str(fixture_rules_dir())]
 
     monkeypatch.setattr(sys, "argv", ["audit_translations", *args])
     audit_cli.main()
@@ -58,14 +52,7 @@ def test_cli_module_jsonl_output_matches_fixture() -> None:
 
     This validates module execution, environment wiring, and exit behavior.
     """
-    rules_dir = fixture_rules_dir()
-    args = [
-        "es",
-        "--format",
-        "jsonl",
-        "--rules-dir",
-        str(rules_dir),
-    ]
+    args = ["es", "--format", "jsonl", "--rules-dir", str(fixture_rules_dir())]
 
     python_scripts_dir = Path(__file__).resolve().parents[2]
     env = os.environ.copy()
@@ -82,3 +69,89 @@ def test_cli_module_jsonl_output_matches_fixture() -> None:
         check=True,
     )
     assert_issue_counts(parse_jsonl(result.stdout))
+
+
+def test_cli_main_rich_output_groups_by_rule_and_type(capsys, monkeypatch) -> None:
+    """
+    Ensure rich CLI output is grouped by rule and subgrouped by issue type.
+
+    This is a behavioral assertion test (not snapshot-based): it checks that
+    core grouping markers and subgroup ordering are visible in user-facing CLI
+    output for a representative fixture file.
+    """
+    args = ["es", "--rules-dir", str(fixture_rules_dir()), "--file", "SharedRules/calculus.yaml", "--verbose"]
+
+    old_width = console.width
+    console.width = 80
+    try:
+        monkeypatch.setattr(sys, "argv", ["audit_translations", *args])
+        audit_cli.main()
+        output = capsys.readouterr().out
+    finally:
+        console.width = old_width
+
+    assert "≠ Rule Issues [13] (grouped by rule and issue type)" in output
+    assert "• divergence (divergence)" in output
+    assert "Untranslated Text [3]" in output
+    assert "Match Pattern Differences [1]" in output
+    assert "Condition Differences [1]" in output
+
+    untranslated_idx = output.index("Untranslated Text [3]")
+    match_idx = output.index("Match Pattern Differences [1]")
+    condition_idx = output.index("Condition Differences [1]")
+    assert untranslated_idx < match_idx < condition_idx
+
+
+def test_cli_main_rich_output_matches_grouped_golden(capsys, monkeypatch) -> None:
+    """
+    Ensure rich CLI grouped rendering stays stable for a multi-rule fixture.
+
+    The golden file captures overall visual layout so formatting regressions in
+    grouped sections are caught even when functional issue counts stay the same.
+    """
+    args = ["es", "--rules-dir", str(fixture_rules_dir()), "--file", "SharedRules/calculus.yaml", "--verbose"]
+    golden_path = Path(__file__).resolve().parent / "golden" / "rich" / "cli_calculus_verbose.golden"
+
+    old_width = console.width
+    console.width = 80
+
+    try:
+        monkeypatch.setattr(sys, "argv", ["audit_translations", *args])
+        audit_cli.main()
+        output = capsys.readouterr().out
+    finally:
+        console.width = old_width
+
+    assert output == golden_path.read_text(encoding="utf-8")
+
+
+def test_cli_module_rich_output_groups_by_rule_and_type() -> None:
+    """
+    Ensure `python -m audit_translations` rich output also shows grouped sections.
+
+    This complements the in-process CLI test by validating module execution in
+    a subprocess with environment wiring and terminal width constraints.
+    """
+    args = ["es", "--rules-dir", str(fixture_rules_dir()), "--file", "SharedRules/calculus.yaml", "--verbose"]
+
+    python_scripts_dir = Path(__file__).resolve().parents[2]
+    env = os.environ.copy()
+    env["PYTHONPATH"] = os.pathsep.join(
+        [str(python_scripts_dir), env.get("PYTHONPATH", "")]
+    ).strip(os.pathsep)
+    env["COLUMNS"] = "80"
+
+    result = subprocess.run(
+        [sys.executable, "-m", "audit_translations", *args],
+        capture_output=True,
+        text=True,
+        cwd=str(python_scripts_dir),
+        env=env,
+        check=True,
+    )
+
+    output = result.stdout
+    assert "≠ Rule Issues [13] (grouped by rule and issue type)" in output
+    assert "• laplacian (laplacian)" in output
+    assert "• divergence (divergence)" in output
+    assert "Structure Differences [1]" in output

From 87e7794a878ff22ddeef3b5821fd1a94ae5d6206 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Moritz=20Gro=C3=9F?= <hi@mgross.dev>
Date: Tue, 17 Feb 2026 01:27:00 +0100
Subject: [PATCH 3/4] Improve --only CLI coverage and fix rich Path filename
 crash

---
 PythonScripts/audit_translations/README.md    |  3 +-
 PythonScripts/audit_translations/auditor.py   |  3 +-
 .../tests/test_cli_end_to_end.py              | 98 +++++++++++++++++++
 3 files changed, 101 insertions(+), 3 deletions(-)

diff --git a/PythonScripts/audit_translations/README.md b/PythonScripts/audit_translations/README.md
index 7104cc74..ef084922 100644
--- a/PythonScripts/audit_translations/README.md
+++ b/PythonScripts/audit_translations/README.md
@@ -56,7 +56,6 @@ The tool automatically adjusts its matching logic based on the file type:
 
 **Syntax:**
 ```bash
-# Preferred: console script (no -m needed)
 uv run audit-translations <language> [--file <specific_file>]
 uv run audit-translations --list
 
@@ -111,4 +110,4 @@ uv run --project PythonScripts audit-translations --list
 
 ### Testing
 
-```uv run python -m pytest```
+```uv run pytest```
diff --git a/PythonScripts/audit_translations/auditor.py b/PythonScripts/audit_translations/auditor.py
index 0e0547fa..b26a71fa 100644
--- a/PythonScripts/audit_translations/auditor.py
+++ b/PythonScripts/audit_translations/auditor.py
@@ -395,6 +395,7 @@ def write(self, issue: dict) -> None:
 def print_warnings(result: ComparisonResult, file_name: str, verbose: bool = False) -> int:
     """Print warnings to console. Returns count of issues found."""
     issues = 0
+    display_name = Path(file_name).as_posix()
 
     has_issues = result.missing_rules or result.untranslated_text or result.extra_rules or result.rule_differences
     if not has_issues:
@@ -404,7 +405,7 @@ def print_warnings(result: ComparisonResult, file_name: str, verbose: bool = Fal
                   ("red", "✗") if result.translated_rule_count == 0 else ("yellow", "⚠")
     console.print()
     console.rule(style="cyan")
-    console.print(f"[{style}]{icon}[/] [bold]{escape(file_name)}[/]")
+    console.print(f"[{style}]{icon}[/] [bold]{escape(display_name)}[/]")
     console.print(f"  [dim]English: {result.english_rule_count} rules  →  Translated: {result.translated_rule_count} rules[/]")
     console.rule(style="cyan")
 
diff --git a/PythonScripts/audit_translations/tests/test_cli_end_to_end.py b/PythonScripts/audit_translations/tests/test_cli_end_to_end.py
index a0596457..b9b49790 100644
--- a/PythonScripts/audit_translations/tests/test_cli_end_to_end.py
+++ b/PythonScripts/audit_translations/tests/test_cli_end_to_end.py
@@ -11,6 +11,8 @@
 from collections import Counter
 from pathlib import Path
 
+import pytest
+
 from .. import cli as audit_cli
 from ..auditor import console
 
@@ -71,6 +73,52 @@ def test_cli_module_jsonl_output_matches_fixture() -> None:
     assert_issue_counts(parse_jsonl(result.stdout))
 
 
+def test_cli_main_jsonl_only_filters_issue_types(capsys, monkeypatch) -> None:
+    """
+    Ensure --only limits JSONL output to the requested categories.
+
+    Uses in-process CLI invocation so argparse parsing and filter plumbing
+    are both exercised without subprocess overhead.
+    """
+    args = ["es", "--format", "jsonl", "--rules-dir", str(fixture_rules_dir()), "--only", "missing,extra"]
+
+    monkeypatch.setattr(sys, "argv", ["audit_translations", *args])
+    audit_cli.main()
+    issues = parse_jsonl(capsys.readouterr().out)
+
+    counts = Counter(issue["issue_type"] for issue in issues)
+    assert set(counts) == {"missing_rule", "extra_rule"}
+    assert counts["missing_rule"] == 4
+    assert counts["extra_rule"] == 3
+
+
+def test_cli_main_rich_only_filters_issue_groups(capsys, monkeypatch) -> None:
+    """
+    Ensure --only also filters visible rich subgroup sections.
+
+    We expect missing/extra groups to remain while untranslated and all diff
+    subgroup labels are omitted from the rendered output.
+    """
+    args = ["es", "--rules-dir", str(fixture_rules_dir()), "--only", "missing,extra"]
+
+    old_width = console.width
+    console.width = 80
+    try:
+        monkeypatch.setattr(sys, "argv", ["audit_translations", *args])
+        audit_cli.main()
+        output = capsys.readouterr().out
+    finally:
+        console.width = old_width
+
+    assert "Missing in Translation" in output
+    assert "Extra in Translation" in output
+    assert "Untranslated Text" not in output
+    assert "Match Pattern Differences" not in output
+    assert "Condition Differences" not in output
+    assert "Variable Differences" not in output
+    assert "Structure Differences" not in output
+
+
 def test_cli_main_rich_output_groups_by_rule_and_type(capsys, monkeypatch) -> None:
     """
     Ensure rich CLI output is grouped by rule and subgrouped by issue type.
@@ -125,6 +173,56 @@ def test_cli_main_rich_output_matches_grouped_golden(capsys, monkeypatch) -> Non
     assert output == golden_path.read_text(encoding="utf-8")
 
 
+def test_cli_main_requires_language_or_list(capsys, monkeypatch) -> None:
+    """
+    Ensure CLI exits with a clear error when neither language nor --list is set.
+
+    This protects the expected help/error UX for accidental empty invocations.
+    """
+    monkeypatch.setattr(sys, "argv", ["audit_translations"])
+
+    with pytest.raises(SystemExit) as exc:
+        audit_cli.main()
+    output = capsys.readouterr().out
+
+    assert exc.value.code == 1
+    assert "Please specify a language code or use --list" in output
+
+
+def test_cli_main_rejects_unknown_only_token(capsys, monkeypatch) -> None:
+    """
+    Ensure unsupported --only tokens are rejected before audit execution.
+
+    This keeps filter behavior explicit and prevents silently ignored typos.
+    """
+    args = ["es", "--rules-dir", str(fixture_rules_dir()), "--only", "missing,bogus"]
+    monkeypatch.setattr(sys, "argv", ["audit_translations", *args])
+
+    with pytest.raises(SystemExit) as exc:
+        audit_cli.main()
+    output = capsys.readouterr().out
+
+    assert exc.value.code == 1
+    assert "Unknown issue types: bogus" in output
+
+
+def test_cli_main_reports_missing_region_directory(capsys, monkeypatch) -> None:
+    """
+    Ensure region variants fail fast when the requested subdirectory is absent.
+
+    This validates the error path for languages like es-mx when only es exists.
+    """
+    args = ["es-mx", "--rules-dir", str(fixture_rules_dir())]
+    monkeypatch.setattr(sys, "argv", ["audit_translations", *args])
+
+    with pytest.raises(SystemExit) as exc:
+        audit_cli.main()
+    output = capsys.readouterr().out
+
+    assert exc.value.code == 1
+    assert "Region directory not found" in output
+
+
 def test_cli_module_rich_output_groups_by_rule_and_type() -> None:
     """
     Ensure `python -m audit_translations` rich output also shows grouped sections.

From ea58cdff0e243532c9e330c7e4a0f09ce48244de Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Moritz=20Gro=C3=9F?= <hi@mgross.dev>
Date: Tue, 17 Feb 2026 01:34:05 +0100
Subject: [PATCH 4/4] Test that audit-ignore suppresses untranslated and diff
 findings in compare_files

---
 .../audit_translations/tests/test_auditor.py  | 39 +++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/PythonScripts/audit_translations/tests/test_auditor.py b/PythonScripts/audit_translations/tests/test_auditor.py
index 82923311..d757f5b4 100644
--- a/PythonScripts/audit_translations/tests/test_auditor.py
+++ b/PythonScripts/audit_translations/tests/test_auditor.py
@@ -149,6 +149,45 @@ def test_compare_files_merges_region_rules(tmp_path) -> None:
     assert result.extra_rules == []
 
 
+def test_compare_files_skips_untranslated_and_diffs_when_audit_ignored(tmp_path) -> None:
+    """
+    Ensure audit-ignore suppresses untranslated and diff findings for a rule.
+
+    The translated rule intentionally contains both a lowercase text key and a
+    match mismatch. With an audit-ignore marker present, neither should be
+    surfaced by compare_files.
+    """
+    english_file = tmp_path / "en.yaml"
+    translated_file = tmp_path / "de.yaml"
+
+    english_file.write_text(
+        """- name: ignored-rule
+  tag: mo
+  match: "self::m:mo"
+  replace:
+    - T: "english"
+""",
+        encoding="utf-8",
+    )
+    translated_file.write_text(
+        """- name: ignored-rule
+  tag: mo  # audit-ignore
+  match: "self::m:mi"
+  replace:
+    - t: "nicht uebersetzt"
+""",
+        encoding="utf-8",
+    )
+
+    result = compare_files(str(english_file), str(translated_file))
+
+    assert result.missing_rules == []
+    assert result.extra_rules == []
+    assert result.untranslated_text == []
+    assert result.rule_differences == []
+    assert collect_issues(result, "de.yaml", "de") == []
+
+
 def test_get_yaml_files_includes_region(tmp_path) -> None:
     """
     Ensures get_yaml_files merges base and region file lists.