Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions PythonScripts/audit_translations/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,19 @@ def normalize_match(value: Any) -> str:
def normalize_xpath(value: str) -> str:
return " ".join(value.split())

def dedup_list(values: List[str]) -> List[str]:
"""
Return a list without duplicates while preserving first-seen order.
Originally, rule differences were stored as sets, losing their original order,
which is not helpful and why it changed with the help of this function.

Example:
>>> dedup_list(["if:a", "if:b", "if:a"])
['if:a', 'if:b']
"""

return list(dict.fromkeys(values)) # dict preserves insertion order (guaranteed in Python 3.7+)


def extract_match_pattern(rule_data: Any) -> str:
if isinstance(rule_data, dict):
Expand Down Expand Up @@ -403,8 +416,8 @@ def diff_rules(english_rule: RuleInfo, translated_rule: RuleInfo) -> List[RuleDi
translated_rule=translated_rule,
diff_type='condition',
description='Conditions differ',
english_snippet=', '.join(sorted(en_set)) or '(none)',
translated_snippet=', '.join(sorted(tr_set)) or '(none)'
english_snippet=', '.join(dedup_list(en_conditions)) or '(none)',
translated_snippet=', '.join(dedup_list(tr_conditions)) or '(none)'
))

# Check variable differences
Expand Down
4 changes: 2 additions & 2 deletions PythonScripts/audit_translations/tests/golden/jsonl/de.json
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,8 @@
"issue_type": "rule_difference",
"diff_type": "condition",
"description": "Conditions differ",
"english_snippet": "$Setting = 'Value', $Verbosity!='Terse', *[2][.='2'], parent::m:minus",
"translated_snippet": "$Setting = 'Value', *[2][.='2'], parent::m:minus",
"english_snippet": "$Verbosity!='Terse', $Setting = 'Value', parent::m:minus, *[2][.='2']",
"translated_snippet": "$Setting = 'Value', parent::m:minus, *[2][.='2']",
"untranslated_texts": [],
"_explanation": "structure_misaligned.yaml: English has extra test block causing misalignment. Fix filters out misleading structure differences but reports condition difference."
},
Expand Down
66 changes: 65 additions & 1 deletion PythonScripts/audit_translations/tests/test_parsers.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
"""
Tests for parsers.py.
"""
from typing import List

import pytest
from ruamel.yaml import YAML
from ruamel.yaml.scanner import ScannerError

from ..dataclasses import RuleInfo
from ..dataclasses import RuleInfo, RuleDifference
from ..parsers import (
diff_rules,
extract_conditions,
Expand Down Expand Up @@ -383,6 +384,69 @@ def test_detects_condition_difference(self):
diffs = diff_rules(en, tr)
assert any(d.diff_type == "condition" for d in diffs)

def test_condition_snippet_preserves_rule_order(self):
"""
Condition snippets should preserve the order seen in each rule.
Originally, alphabetical order was used, which is not very helpful.
"""
en = make_rule(
"test",
"mo",
{
"test": {
"if": "condition_b",
"then": [
{
"test": {
"if": "condition_a",
"then": [{"T": "x"}],
}
}
],
}
},
)
tr = make_rule("test", "mo", {"if": "condition_c"})
diffs: List[RuleDifference] = diff_rules(en, tr)
cond_diff: RuleDifference = [d for d in diffs if d.diff_type == "condition"][0]
assert cond_diff.english_snippet == "condition_b, condition_a"
assert cond_diff.translated_snippet == "condition_c"

def test_condition_snippet_deduplicates_repeated_conditions(self):
"""
Repeated conditions should be shown once, in first-seen order.
"""
en = make_rule(
"test",
"mo",
{
"test": {
"if": "condition_a",
"then": [
{
"test": {
"if": "condition_a",
"then": [{"T": "x"}],
}
},
{
"test": {
"if": "condition_b",
"then": [{"T": "y"}],
}
},
],
}
},
)
tr = make_rule("test", "mo", {"if": "condition_c"})
diffs: List[RuleDifference] = diff_rules(en, tr)
cond_diff: RuleDifference = [d for d in diffs if d.diff_type == "condition"][0]

# without deduplication, we'd have "condition_a" repeated.
assert cond_diff.english_snippet == "condition_a, condition_b"
assert cond_diff.translated_snippet == "condition_c"

def test_detects_missing_condition(self):
"""Ensure detects missing condition."""
en = make_rule("test", "mo", {"if": "condition1"})
Expand Down