Skip to content

Commit a73a3c4

Browse files
committed
fix(core): make normalize_text idempotent after mix-keyword removal
1 parent f150d68 commit a73a3c4

2 files changed

Lines changed: 9 additions & 0 deletions

File tree

src/cuepoint/core/text_processing.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,9 @@ def normalize_text(s: str) -> str:
106106
s,
107107
)
108108
s = re.sub(r"(?i)(originalmix|extendedmix|radioedit|clubmix)$", " ", s)
109+
# Mix-keyword replacements can leave whitespace-only strings; strip so
110+
# normalize(normalize(s)) == normalize(s) (e.g. "vip" alone → "" not " ").
111+
s = re.sub(r"\s+", " ", s).strip()
109112
return s
110113

111114

src/tests/unit/core/test_text_processing.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,12 @@ def test_normalize_text_empty(self):
4848
assert normalize_text("") == ""
4949
assert normalize_text(None) == ""
5050

51+
def test_normalize_text_idempotent_after_mix_keyword_strip(self) -> None:
52+
"""Regression (Design 3.16): lone token 'vip' must not normalize to a bare space."""
53+
# NFKD + lower → "vip"; \\bvip\\b replacement leaves whitespace only → final strip → "".
54+
once = normalize_text("vÌP")
55+
assert normalize_text(once) == once
56+
5157
@pytest.mark.unit
5258
@pytest.mark.skipif(
5359
sys.platform == "win32",

0 commit comments

Comments
 (0)