Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions PythonScripts/audit_translations/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ The tool automatically adjusts its matching logic based on the file type:

**Syntax:**
```bash
# Preferred: console script (no -m needed)
uv run audit-translations <language> [--file <specific_file>]
uv run audit-translations --list

Expand Down Expand Up @@ -111,4 +110,4 @@ uv run --project PythonScripts audit-translations --list

### Testing

```uv run python -m pytest```
```uv run pytest```
214 changes: 145 additions & 69 deletions PythonScripts/audit_translations/auditor.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import os
import sys
from pathlib import Path
from typing import Iterable, List, Optional, TextIO, Tuple
from typing import Any, Dict, Iterable, List, Optional, TextIO, Tuple

from rich.console import Console
from rich.markup import escape
Expand Down Expand Up @@ -154,24 +154,46 @@ def rule_label(rule: RuleInfo) -> str:
if rule.name is None:
return f"[yellow]\"{escape(rule.key)}\"[/]"
tag = rule.tag or "unknown"
return f"[cyan]{escape(rule.name)}[/] [dim][{escape(tag)}][/]"
return f"[cyan]{escape(rule.name)}[/] [dim]({escape(tag)})[/]"


def print_rule_item(rule: RuleInfo, issue_line: int, context: str = ""):
console.print(f" [dim]•[/] {rule_label(rule)} [dim](line {issue_line}{context})[/]")
def issue_type_sort_key(issue_type: str) -> Tuple[int, str]:
"""
Stable ordering for per-rule issue groups.

The first tuple element defines user-facing priority (missing/untranslated/
match/condition/variables/structure/extra). The second element keeps sorting
deterministic for unknown keys.
"""
order = {
"missing_rule": 0,
"untranslated_text": 1,
"rule_difference:match": 2,
"rule_difference:condition": 3,
"rule_difference:variables": 4,
"rule_difference:structure": 5,
"extra_rule": 6,
}
return order.get(issue_type, 99), issue_type

def print_diff_item(diff: RuleDifference, line_en: int, line_tr: int, verbose: bool = False):
"""Print a single rule difference"""
rule = diff.english_rule
console.print(
f" [dim]•[/] {rule_label(rule)} "
f"[dim](line {line_en} en, {line_tr} tr)[/]"
)
console.print(f" [dim]{diff.description}[/]")
if verbose:
console.print(f" [green]en:[/] {escape(diff.english_snippet)}")
console.print(f" [red]tr:[/] {escape(diff.translated_snippet)}")

def issue_type_label(issue_type: str) -> str:
"""
Return the display label used in rich grouped output.

Unknown issue types fall back to their raw key so renderer behavior remains
robust when new categories are introduced.
"""
labels = {
"missing_rule": "Missing in Translation",
"untranslated_text": "Untranslated Text",
"rule_difference:match": "Match Pattern Differences",
"rule_difference:condition": "Condition Differences",
"rule_difference:variables": "Variable Differences",
"rule_difference:structure": "Structure Differences",
"extra_rule": "Extra in Translation",
}
return labels.get(issue_type, issue_type)


def issue_base(rule: RuleInfo, file_name: str, language: str) -> dict:
Expand Down Expand Up @@ -373,6 +395,7 @@ def write(self, issue: dict) -> None:
def print_warnings(result: ComparisonResult, file_name: str, verbose: bool = False) -> int:
"""Print warnings to console. Returns count of issues found."""
issues = 0
display_name = Path(file_name).as_posix()

has_issues = result.missing_rules or result.untranslated_text or result.extra_rules or result.rule_differences
if not has_issues:
Expand All @@ -382,64 +405,117 @@ def print_warnings(result: ComparisonResult, file_name: str, verbose: bool = Fal
("red", "✗") if result.translated_rule_count == 0 else ("yellow", "⚠")
console.print()
console.rule(style="cyan")
console.print(f"[{style}]{icon}[/] [bold]{escape(file_name)}[/]")
console.print(f"[{style}]{icon}[/] [bold]{escape(display_name)}[/]")
console.print(f" [dim]English: {result.english_rule_count} rules → Translated: {result.translated_rule_count} rules[/]")
console.rule(style="cyan")

if result.missing_rules:
console.print(f"\n [red]✗[/] [bold]Missing Rules[/] [[red]{len(result.missing_rules)}[/]] [dim](in English but not in translation)[/]")
for rule in result.missing_rules:
print_rule_item(rule, issue_line=rule.line_number, context=" in English")
issues += 1

if result.untranslated_text:
untranslated_count = sum(len(entries) for _, entries in result.untranslated_text)
console.print(f"\n [yellow]⚠[/] [bold]Untranslated Text[/] [[yellow]{untranslated_count}[/]] [dim](lowercase t/ot/ct keys)[/]")
for rule, entries in result.untranslated_text:
for _, text, line in entries:
issue_line = line or rule.line_number
print_rule_item(rule, issue_line=issue_line)
console.print(f" [dim]→[/] [yellow]\"{escape(text)}\"[/]")
issues += 1

if result.rule_differences:
# Count only diffs that will actually be displayed
displayable_diffs = []
for diff in result.rule_differences:
if diff.diff_type == "structure":
en_tokens = extract_structure_elements(diff.english_rule.data)
tr_tokens = extract_structure_elements(diff.translated_rule.data)
en_token, tr_token, mismatch_pos = first_structure_mismatch(en_tokens, tr_tokens)

# Skip reporting when tokens are misaligned (both exist but differ)
# This avoids misleading line numbers when entire blocks are missing/added
if en_token is not None and tr_token is not None and en_token != tr_token:
continue

line_en = resolve_issue_line(diff.english_rule, "structure", en_token)
line_tr = resolve_issue_line(diff.translated_rule, "structure", tr_token)
# Skip structure diffs where we can't find both tokens
if line_en is None or line_tr is None:
continue
else:
line_en = resolve_issue_line(diff.english_rule, diff.diff_type)
line_tr = resolve_issue_line(diff.translated_rule, diff.diff_type)
displayable_diffs.append((diff, line_en, line_tr))

if displayable_diffs:
console.print(
f"\n [magenta]≠[/] [bold]Rule Differences[/] "
f"[[magenta]{len(displayable_diffs)}[/]] [dim](structural differences between en and translation)[/]"
grouped_issues: Dict[str, Dict[str, Any]] = {}

def add_issue(rule: RuleInfo, issue_type: str, payload: Dict[str, Any]) -> None:
if rule.key not in grouped_issues:
grouped_issues[rule.key] = {
"rule": rule,
"by_type": {},
}
type_map: Dict[str, List[Dict[str, Any]]] = grouped_issues[rule.key]["by_type"]
type_map.setdefault(issue_type, []).append(payload)

for rule in result.missing_rules:
add_issue(
rule,
"missing_rule",
{"line_en": rule.line_number},
)

for rule, entries in result.untranslated_text:
for _, text, line in entries:
issue_line = line or rule.line_number
add_issue(
rule,
"untranslated_text",
{"line_tr": issue_line, "text": text},
)
for diff, line_en, line_tr in displayable_diffs:
print_diff_item(diff, line_en=line_en, line_tr=line_tr, verbose=verbose)
issues += 1

if result.extra_rules:
console.print(f"\n [blue]ℹ[/] [bold]Extra Rules[/] [[blue]{len(result.extra_rules)}[/]] [dim](may be intentional)[/]")
for rule in result.extra_rules:
print_rule_item(rule, issue_line=rule.line_number)
issues += 1

for diff in result.rule_differences:
if diff.diff_type == "structure":
en_tokens = extract_structure_elements(diff.english_rule.data)
tr_tokens = extract_structure_elements(diff.translated_rule.data)
en_token, tr_token, mismatch_pos = first_structure_mismatch(en_tokens, tr_tokens)

# Skip reporting when tokens are misaligned (both exist but differ)
# This avoids misleading line numbers when entire blocks are missing/added
if en_token is not None and tr_token is not None and en_token != tr_token:
continue

line_en = resolve_issue_line(diff.english_rule, "structure", en_token)
line_tr = resolve_issue_line(diff.translated_rule, "structure", tr_token)
# Skip structure diffs where we can't find both tokens
if line_en is None or line_tr is None:
continue
else:
line_en = resolve_issue_line(diff.english_rule, diff.diff_type)
line_tr = resolve_issue_line(diff.translated_rule, diff.diff_type)

add_issue(
diff.english_rule,
f"rule_difference:{diff.diff_type}",
{"line_en": line_en, "line_tr": line_tr, "diff": diff},
)

for rule in result.extra_rules:
add_issue(
rule,
"extra_rule",
{"line_tr": rule.line_number},
)

if grouped_issues:
total_grouped_issues = sum(
len(entries)
for group in grouped_issues.values()
for entries in group["by_type"].values()
)
console.print(
f"\n [magenta]≠[/] [bold]Rule Issues[/] "
f"[[magenta]{total_grouped_issues}[/]] [dim](grouped by rule and issue type)[/]"
)
for group in grouped_issues.values():
rule = group["rule"]
by_type: Dict[str, List[Dict[str, Any]]] = group["by_type"]
console.print(f" [dim]•[/] {rule_label(rule)}")
for issue_type in sorted(by_type.keys(), key=issue_type_sort_key):
entries = by_type[issue_type]
console.print(
f" [dim]{issue_type_label(issue_type)} "
f"[{len(entries)}][/]"
)
for entry in entries:
if issue_type == "missing_rule":
console.print(
f" [dim]•[/] [dim](line {entry['line_en']} in English)[/]"
)
issues += 1
elif issue_type == "extra_rule":
console.print(
f" [dim]•[/] [dim](line {entry['line_tr']} in translation)[/]"
)
issues += 1
elif issue_type == "untranslated_text":
console.print(
f" [dim]•[/] [dim](line {entry['line_tr']} tr)[/] "
f"[yellow]\"{escape(entry['text'])}\"[/]"
)
issues += 1
else:
diff: RuleDifference = entry["diff"]
console.print(
f" [dim]•[/] [dim](line {entry['line_en']} en, {entry['line_tr']} tr)[/]"
)
console.print(f" [dim]{diff.description}[/]")
if verbose:
console.print(f" [green]en:[/] {escape(diff.english_snippet)}")
console.print(f" [red]tr:[/] {escape(diff.translated_snippet)}")
issues += 1

return issues

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
╭──────────────────────────────────────────────────────────────────────────────╮
│ MathCAT Translation Audit: ES │
╰──────────────────────────────────────────────────────────────────────────────╯

Comparing against English (en) reference files
Files to check: 1

────────────────────────────────────────────────────────────────────────────────
⚠ SharedRules/calculus.yaml
English: 4 rules → Translated: 3 rules
────────────────────────────────────────────────────────────────────────────────

≠ Rule Issues [13] (grouped by rule and issue type)
• laplacian (laplacian)
Missing in Translation [1]
• (line 4 in English)
• divergence (divergence)
Untranslated Text [3]
• (line 10 tr) "divergence"
• (line 11 tr) "div"
• (line 12 tr) "of"
Match Pattern Differences [1]
• (line 22 en, 6 tr)
Match pattern differs
en: count(*) = 1
tr: .
Condition Differences [1]
• (line 25 en, 9 tr)
Conditions differ
en: $Verbosity='Terse', not(IsNode(*[1], 'leaf'))
tr: $Verbosity='Verbose', not(IsNode(*[1], 'leaf'))
• curl (curl)
Untranslated Text [1]
• (line 22 tr) "curl of"
Match Pattern Differences [1]
• (line 35 en, 20 tr)
Match pattern differs
en: count(*) = 1
tr: .
Condition Differences [1]
• (line 39 en, 24 tr)
Conditions differ
en: $Verbosity!='Terse', not(IsNode(*[1], 'leaf'))
tr: not(IsNode(*[1], 'leaf'))
Structure Differences [1]
• (line 38 en, 18 tr)
Rule structure differs (test/if/then/else blocks)
en: replace: test: if: then: test: if: then:
tr: replace: test: if: then:
• gradient (gradient)
Untranslated Text [2]
• (line 34 tr) "gradient of"
• (line 35 tr) "del"
Match Pattern Differences [1]
• (line 48 en, 30 tr)
Match pattern differs
en: count(*) = 1
tr: .
╭──────────────────────────────────────────────────────────────────────────────╮
│ SUMMARY │
│ Files checked 1 │
│ Files with issues 1 │
│ Files OK 0 │
│ Missing rules 1 │
│ Untranslated text 6 │
│ Rule differences 6 │
│ Extra rules 0 │
╰──────────────────────────────────────────────────────────────────────────────╯
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
English: 1 rules → Translated: 1 rules
────────────────────────────────────────────────────────────────────────────────

≠ Rule Differences [1] (structural differences between en and translation)
• struct-rule (line 9 en, 1 tr)
Rule structure differs (test/if/then/else blocks)
≠ Rule Issues [1] (grouped by rule and issue type)
• struct-rule (mi)
Structure Differences [1]
• (line 9 en, 1 tr)
Rule structure differs (test/if/then/else blocks)
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
English: 1 rules → Translated: 1 rules
────────────────────────────────────────────────────────────────────────────────

≠ Rule Differences [1] (structural differences between en and translation)
• struct-rule (line 9 en, 1 tr)
Rule structure differs (test/if/then/else blocks)
en: replace: test: if: then: else:
tr: replace: test: if: then:
≠ Rule Issues [1] (grouped by rule and issue type)
• struct-rule (mi)
Structure Differences [1]
• (line 9 en, 1 tr)
Rule structure differs (test/if/then/else blocks)
en: replace: test: if: then: else:
tr: replace: test: if: then:
Loading