diff --git a/PythonScripts/audit_translations/parsers.py b/PythonScripts/audit_translations/parsers.py index 1553a2b0..7f1c3165 100644 --- a/PythonScripts/audit_translations/parsers.py +++ b/PythonScripts/audit_translations/parsers.py @@ -296,6 +296,19 @@ def normalize_match(value: Any) -> str: def normalize_xpath(value: str) -> str: return " ".join(value.split()) +def dedup_list(values: List[str]) -> List[str]: + """ + Return a list without duplicates while preserving first-seen order. + Originally, rule differences were stored as sets, losing their original order, + which is not helpful and why it changed with the help of this function. + + Example: + >>> dedup_list(["if:a", "if:b", "if:a"]) + ['if:a', 'if:b'] + """ + + return list(dict.fromkeys(values)) # dict preserves insertion order (guaranteed in Python 3.7+) + def extract_match_pattern(rule_data: Any) -> str: if isinstance(rule_data, dict): @@ -403,8 +416,8 @@ def diff_rules(english_rule: RuleInfo, translated_rule: RuleInfo) -> List[RuleDi translated_rule=translated_rule, diff_type='condition', description='Conditions differ', - english_snippet=', '.join(sorted(en_set)) or '(none)', - translated_snippet=', '.join(sorted(tr_set)) or '(none)' + english_snippet=', '.join(dedup_list(en_conditions)) or '(none)', + translated_snippet=', '.join(dedup_list(tr_conditions)) or '(none)' )) # Check variable differences diff --git a/PythonScripts/audit_translations/tests/golden/jsonl/de.json b/PythonScripts/audit_translations/tests/golden/jsonl/de.json index 66cb0214..9ac62086 100644 --- a/PythonScripts/audit_translations/tests/golden/jsonl/de.json +++ b/PythonScripts/audit_translations/tests/golden/jsonl/de.json @@ -158,8 +158,8 @@ "issue_type": "rule_difference", "diff_type": "condition", "description": "Conditions differ", - "english_snippet": "$Setting = 'Value', $Verbosity!='Terse', *[2][.='2'], parent::m:minus", - "translated_snippet": "$Setting = 'Value', *[2][.='2'], parent::m:minus", + "english_snippet": "$Verbosity!='Terse', $Setting = 'Value', parent::m:minus, *[2][.='2']", + "translated_snippet": "$Setting = 'Value', parent::m:minus, *[2][.='2']", "untranslated_texts": [], "_explanation": "structure_misaligned.yaml: English has extra test block causing misalignment. Fix filters out misleading structure differences but reports condition difference." }, diff --git a/PythonScripts/audit_translations/tests/test_parsers.py b/PythonScripts/audit_translations/tests/test_parsers.py index d500ced2..52d3b6aa 100644 --- a/PythonScripts/audit_translations/tests/test_parsers.py +++ b/PythonScripts/audit_translations/tests/test_parsers.py @@ -1,12 +1,13 @@ """ Tests for parsers.py. """ +from typing import List import pytest from ruamel.yaml import YAML from ruamel.yaml.scanner import ScannerError -from ..dataclasses import RuleInfo +from ..dataclasses import RuleInfo, RuleDifference from ..parsers import ( diff_rules, extract_conditions, @@ -383,6 +384,69 @@ def test_detects_condition_difference(self): diffs = diff_rules(en, tr) assert any(d.diff_type == "condition" for d in diffs) + def test_condition_snippet_preserves_rule_order(self): + """ + Condition snippets should preserve the order seen in each rule. + Originally, alphabetical order was used, which is not very helpful. + """ + en = make_rule( + "test", + "mo", + { + "test": { + "if": "condition_b", + "then": [ + { + "test": { + "if": "condition_a", + "then": [{"T": "x"}], + } + } + ], + } + }, + ) + tr = make_rule("test", "mo", {"if": "condition_c"}) + diffs: List[RuleDifference] = diff_rules(en, tr) + cond_diff: RuleDifference = [d for d in diffs if d.diff_type == "condition"][0] + assert cond_diff.english_snippet == "condition_b, condition_a" + assert cond_diff.translated_snippet == "condition_c" + + def test_condition_snippet_deduplicates_repeated_conditions(self): + """ + Repeated conditions should be shown once, in first-seen order. + """ + en = make_rule( + "test", + "mo", + { + "test": { + "if": "condition_a", + "then": [ + { + "test": { + "if": "condition_a", + "then": [{"T": "x"}], + } + }, + { + "test": { + "if": "condition_b", + "then": [{"T": "y"}], + } + }, + ], + } + }, + ) + tr = make_rule("test", "mo", {"if": "condition_c"}) + diffs: List[RuleDifference] = diff_rules(en, tr) + cond_diff: RuleDifference = [d for d in diffs if d.diff_type == "condition"][0] + + # without deduplication, we'd have "condition_a" repeated. + assert cond_diff.english_snippet == "condition_a, condition_b" + assert cond_diff.translated_snippet == "condition_c" + def test_detects_missing_condition(self): """Ensure detects missing condition.""" en = make_rule("test", "mo", {"if": "condition1"})