diff --git a/TODO.md b/TODO.md index 15c58d0..3aa4bf9 100644 --- a/TODO.md +++ b/TODO.md @@ -16,6 +16,8 @@ - Suggest some good initial configuration for Jupyter notebook, e.g. `print_auto=True` and `ignore_result_overwrite=True`. - Naming: we call it "uncertainty". Give a hint that others might also call it "error" interchangeably. - Jupyter Notebook tip to avoid +- Add warning that users should not rely on console stringified output for further processing since that might change more often in the future, e.g. better formatting of units or whitespaces etc. Only rely on the final LaTeX output written to the external file. + ``` diff --git a/src/api/console_stringifier.py b/src/api/console_stringifier.py index e3a77db..ca8f67f 100644 --- a/src/api/console_stringifier.py +++ b/src/api/console_stringifier.py @@ -1,3 +1,4 @@ +import re from domain.result import Result from application.stringifier import Stringifier @@ -33,16 +34,84 @@ def _modify_unit(self, unit: str) -> str: """ Returns the modified unit. """ - unit = ( - unit.replace(r"\squared", "^2") - .replace(r"\cubed", "^3") - .replace("\\per\\", "/") - .replace(r"\per", "/") - .replace("\\", " ") - .strip() - ) - - if unit[0] == "/": - unit = f"1{unit}" - - return unit + + # Remove all whitespace characters (space, tab, newline etc.) + unit = "".join(unit.split()) + + # Detect "\squared" etc. + unit = unit.replace(r"\squared", "^2").replace(r"\cubed", "^3") + + # Detect special units + unit = unit.replace(r"\percent", r"\%").replace(r"\degree", r"\°") + + # Detect "/" + unit = unit.replace("/", " / ") + + # Iterate over unit parts + unit_parts = re.split(r"[\\|\s]+", unit) + numerator_parts = [] + denominator_parts = [] + is_next_part_in_denominator = False + + for unit_part in unit_parts: + # Skip empty parts + if unit_part == "": + continue + + # If next part is a denominator part + if unit_part in ("/", "per"): + is_next_part_in_denominator = True + continue + + # Add part to numerator or denominator + if is_next_part_in_denominator: + denominator_parts.append(unit_part) + is_next_part_in_denominator = False + else: + numerator_parts.append(unit_part) + + # Assemble unit + modified_unit = "" + + # Handle empty unit + if not numerator_parts and not denominator_parts: + return "" + + # Numerator + if not numerator_parts: + modified_unit += "1" + elif len(numerator_parts) == 1 or not denominator_parts: + modified_unit += " ".join(numerator_parts) + else: + modified_unit += f"({' '.join(numerator_parts)})" + + # Denominator + if denominator_parts: + modified_unit += "/" + if len(denominator_parts) == 1: + modified_unit += denominator_parts[0] + else: + modified_unit += f"({' '.join(denominator_parts)})" + + modified_unit = self.strip_whitespaces_around_parentheses(modified_unit) + modified_unit = self.replace_per_by_symbol(modified_unit) + + return modified_unit + + def strip_whitespaces_around_parentheses(self, string: str) -> str: + return string.replace(" (", "(").replace("( ", "(").replace(" )", ")").replace(") ", ")") + + def replace_per_by_symbol(self, string: str) -> str: + """ + Replaces all occurrences of `per` with `/`. + + This might be necessary due to limitations of the above parsing method + where `per(` is recognized as a single token. For a proper parser, we + would have to deal with parentheses in a more sophisticated way. As this + is not the scope of this project for now, we just do a simple replacement + of the `per` that slipped through the above logic. + + Note that at this point, `percent` was already replaced by `%`, so + we can safely replace all occurrences of "per" with "/". + """ + return string.replace("per", " / ") diff --git a/tests/playground.py b/tests/playground.py index 6d7f52a..fd9bc80 100644 --- a/tests/playground.py +++ b/tests/playground.py @@ -34,6 +34,13 @@ wiz.res("a911", 1.05, unit=r"\mm\s\per\N\kg") # wiz.res("a911", "1.052", 0.25, r"\mm\s\per\N\kg") +wiz.res("a911_2", 1.05, unit=r"\mm\s\per(\N\kg)") +wiz.res("more parentheses", 1.05, unit=r"\mm\s\per((\N\kg))") +wiz.res("wrong parentheses", 1.05, unit=r"\mm\s\per(((\N\kg)\T") +wiz.res("a_unit_parsing", "1.0", unit=r"\per\percent") +wiz.res("a_unit_parsing_only_numerator", "1.0", unit=r"\m\N\kg") +wiz.res("a_unit_squared", "1.0", unit=r"\m \squared") +wiz.res("a_unit_custom_slash", "1.0", unit=r"\m\squared/\s") wiz.res("1 b", 1.0, 0.01, unit=r"\per\mm\cubed") @@ -54,7 +61,7 @@ Decimal("42.0e-30"), sys=Decimal("0.1e-31"), stat=Decimal("0.05e-31"), - unit=r"\m\per\s\squared", + unit=r"\m\per\s\squared\newton\per\kg", ) wiz.res("j", 0.009, None, "", 2) # really bad, but this is valid # wiz.res("k", 1.55, 0.0, unit=r"\tesla") # -> uncertainty must be positive