diff --git a/CHANGELOG.md b/CHANGELOG.md index be20265..901f888 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## Unreleased +- Parse raw values to values for all fields (#98). - Add constraints and tests for Direction (#97). - Improve support for Distance, Direction and Elapsed Time (#96). - Refactor exceptions (#94). diff --git a/src/euring/codes.py b/src/euring/codes.py index 445ad0e..fa57eac 100644 --- a/src/euring/codes.py +++ b/src/euring/codes.py @@ -182,21 +182,22 @@ def parse_longitude(value: str) -> float: return _parse_decimal_coordinate(value, max_abs=180, max_decimals=4, field_name="Longitude") -def parse_direction(value: str) -> str: +def parse_direction(value: str) -> int | None: """Parse and validate a direction in degrees (000-359) or hyphen placeholder.""" if value is None: raise EuringConstraintException(f'Value "{value}" is not a valid direction.') - if value and set(value) == {"-"}: - return value - if value.startswith("-"): + value_str = f"{value}" + if value_str and set(value_str) == {"-"}: + return None + if value_str.startswith("-"): raise EuringConstraintException(f'Value "{value}" is not a valid direction.') try: - parsed = int(value) + parsed = int(value_str) except (TypeError, ValueError): raise EuringConstraintException(f'Value "{value}" is not a valid direction.') if parsed < 0 or parsed > 359: raise EuringConstraintException("Direction must be between 0 and 359 degrees.") - return value + return parsed def _parse_decimal_coordinate(value: str, *, max_abs: int, max_decimals: int, field_name: str) -> float: @@ -263,12 +264,15 @@ def lookup_place_details(value: str | int) -> dict[str, Any]: raise EuringLookupException(f'Value "{value}" is not a valid EURING place code.') -def lookup_date(value: str) -> date: +def lookup_date(value: str | int) -> date: """Parse a EURING date string into a datetime.date.""" + value_str = f"{value}" + if value_str.isdigit() and len(value_str) < 8: + value_str = value_str.zfill(8) try: - day = int(value[0:2]) - month = int(value[2:4]) - year = int(value[4:8]) + day = int(value_str[0:2]) + month = int(value_str[2:4]) + year = int(value_str[4:8]) return date(year, month, day) except (IndexError, ValueError): raise EuringConstraintException(f'Value "{value}" is not a valid EURING date.') diff --git a/src/euring/field_schema.py b/src/euring/field_schema.py index 4acda37..20d86c1 100644 --- a/src/euring/field_schema.py +++ b/src/euring/field_schema.py @@ -6,7 +6,15 @@ from .codes import lookup_description from .exceptions import EuringConstraintException, EuringTypeException -from .types import is_valid_type +from .types import ( + TYPE_ALPHABETIC, + TYPE_ALPHANUMERIC, + TYPE_INTEGER, + TYPE_NUMERIC, + TYPE_NUMERIC_SIGNED, + TYPE_TEXT, + is_valid_type, +) __all__ = [ "EuringField", @@ -70,8 +78,7 @@ def _validate_length(self, raw: str) -> None: f'Value "{raw}" is length {value_length}, should be at most {self.max_length}.' ) - def parse(self, raw: str) -> Any | None: - """Parse raw text into a Python value.""" + def _validate_raw(self, raw: str) -> str | None: if raw == "": if not self._is_required(): return None @@ -81,6 +88,26 @@ def parse(self, raw: str) -> Any | None: raise EuringTypeException(f'Value "{raw}" is not valid for type {self.type_name}.') return raw + def _coerce_type(self, raw: str) -> Any: + if self.type_name == TYPE_INTEGER: + if set(raw) == {"-"}: + return None + return int(raw) + if self.type_name == TYPE_NUMERIC: + return float(raw) + if self.type_name == TYPE_NUMERIC_SIGNED: + return float(raw) + if self.type_name in {TYPE_ALPHABETIC, TYPE_ALPHANUMERIC, TYPE_TEXT}: + return raw + return raw + + def parse(self, raw: str) -> Any | None: + """Parse raw text into a Python value.""" + validated = self._validate_raw(raw) + if validated is None: + return None + return self._coerce_type(validated) + def encode(self, value: Any | None) -> str: """Encode a Python value to raw text.""" if value is None or value == "": @@ -134,12 +161,12 @@ def _mapping(self) -> dict[str, Any]: return mapping def parse(self, raw: str) -> Any | None: - value = super().parse(raw) - if value is None: + validated = self._validate_raw(raw) + if validated is None: return None if self.parser is None: - return value - return self.parser(value) + return self._coerce_type(validated) + return self.parser(validated) def describe(self, value: Any | None) -> Any | None: if self.lookup is None or value is None: diff --git a/src/euring/parsing.py b/src/euring/parsing.py index 9cda13c..ba67e87 100644 --- a/src/euring/parsing.py +++ b/src/euring/parsing.py @@ -30,8 +30,11 @@ def euring_decode_value( parsed = field.parse(value) if parsed is None: return None - results: dict[str, Any] = {"value": value} + results: dict[str, Any] = {"raw_value": value, "value": parsed} if parser: results["parsed_value"] = parsed - results["description"] = field.describe(parsed) + description_value = parsed + if lookup and not parser and value != "": + description_value = value + results["description"] = field.describe(description_value) return results diff --git a/src/euring/record.py b/src/euring/record.py index 38b6252..4d9f859 100644 --- a/src/euring/record.py +++ b/src/euring/record.py @@ -52,9 +52,11 @@ def set(self, key: str, value: object) -> EuringRecord: field = _FIELD_MAP.get(key) if field is None: raise ValueError(f'Unknown field key "{key}".') + raw_value = "" if value is None else str(value) self._fields[key] = { "name": field["name"], - "value": "" if value is None else str(value), + "raw_value": raw_value, + "value": value, "order": field["order"], } return self @@ -64,9 +66,11 @@ def _set_raw_value(self, key: str, value: object) -> None: field = _FIELD_MAP.get(key) if field is None: return + raw_value = "" if value is None else f"{value}" self._fields[key] = { "name": field["name"], - "value": "" if value is None else value, + "raw_value": raw_value, + "value": raw_value, "order": field["order"], } @@ -135,18 +139,23 @@ def _validate_fields(self) -> list[dict[str, object]]: variable_length_keys = {"distance", "direction", "elapsed_time"} for index, field in enumerate(fields): key = field["key"] - value = self._fields.get(key, {}).get("value", "") - value = "" if value is None else value + field_state = self._fields.get(key, {}) + raw_value = field_state.get("raw_value", field_state.get("value", "")) + raw_value = "" if raw_value is None else raw_value try: field_def = field if self.format != FORMAT_EURING2000 and key in variable_length_keys and field.get("length"): field_def = {**field, "max_length": field["length"]} field_def.pop("length", None) field_obj = coerce_field(field_def) - parsed_value = field_obj.parse(value) - description = field_obj.describe(parsed_value) + parsed_value = field_obj.parse(raw_value) + description_value = parsed_value + if field_obj.get("lookup") is not None and field_obj.get("parser") is None and raw_value != "": + description_value = raw_value + description = field_obj.describe(description_value) if key in self._fields: - if field_obj.get("parser") is not None and parsed_value is not None: + self._fields[key]["value"] = parsed_value + if field_obj.get("parser") is not None: self._fields[key]["parsed_value"] = parsed_value if description is not None: self._fields[key]["description"] = description @@ -154,7 +163,7 @@ def _validate_fields(self) -> list[dict[str, object]]: payload = { "field": field["name"], "message": f"{exc}", - "value": "" if value is None else f"{value}", + "value": "" if raw_value is None else f"{raw_value}", "key": key, "index": index, } @@ -177,7 +186,7 @@ def _has_non_optional_errors(self, errors: dict[str, list]) -> bool: def _validate_record_rules(self) -> list[dict[str, object]]: """Validate multi-field and record-level rules.""" - values_by_key = {key: field.get("value", "") for key, field in self._fields.items()} + values_by_key = {key: field.get("raw_value", field.get("value", "")) for key, field in self._fields.items()} errors: list[dict[str, object]] = [] for error in record_rule_errors(self.format, values_by_key): errors.append(_record_error_for_key(error["key"], error["message"], value=error["value"])) @@ -196,10 +205,22 @@ def _serialize(self) -> str: """Serialize current field values without strict completeness checks.""" fields = _fields_for_format(self.format) values_by_key: dict[str, str] = {} + hyphen_required_keys = {"distance", "direction", "elapsed_time"} for field in fields: key = field["key"] - value = self._fields.get(key, {}).get("value", "") - values_by_key[key] = "" if value is None else value + raw_value = self._fields.get(key, {}).get("raw_value") + value = self._fields.get(key, {}).get("value") + if raw_value is None: + raw_value = self._fields.get(key, {}).get("value", "") + raw_value = "" if raw_value is None else f"{raw_value}" + if self.format == FORMAT_EURING2000 and (value is None or value == ""): + raw_value = "" + if self.format in {FORMAT_EURING2000PLUS, FORMAT_EURING2020} and key in hyphen_required_keys: + if value is None or value == "": + length = field.get("length") or field.get("max_length") + if length: + raw_value = "-" * int(length) + values_by_key[key] = raw_value if self.format == FORMAT_EURING2000: return _format_fixed_width(values_by_key, _fixed_width_fields()) return "|".join(values_by_key.get(field["key"], "") for field in fields) diff --git a/tests/test_decoding.py b/tests/test_decoding.py index a9db8c0..3a09f2b 100644 --- a/tests/test_decoding.py +++ b/tests/test_decoding.py @@ -106,7 +106,8 @@ def test_decode_euring2020_format_rejects_2000_plus(self): def test_decode_value_with_lookup(self): result = euring_decode_value("01012024", TYPE_INTEGER, length=8, lookup=lookup_date) - assert result["value"] == "01012024" + assert result["raw_value"] == "01012024" + assert result["value"] == 1012024 assert result["description"].isoformat() == "2024-01-01" def test_decode_value_invalid_type(self): @@ -148,7 +149,7 @@ def test_decode_euring2000plus_allows_short_elapsed_time(self): encoded = "|".join(record) decoded = EuringRecord.decode(encoded, format="euring2000plus") assert decoded.errors["record"] == [] - assert decoded.fields["elapsed_time"]["value"] == "1" + assert decoded.fields["elapsed_time"]["value"] == 1 def test_decode_euring2020_allows_short_distance(self): record = _make_euring2000_plus_record(accuracy="A").split("|") @@ -160,7 +161,7 @@ def test_decode_euring2020_allows_short_distance(self): encoded = "|".join(record) decoded = EuringRecord.decode(encoded, format="euring2020") assert decoded.errors["record"] == [] - assert decoded.fields["distance"]["value"] == "18" + assert decoded.fields["distance"]["value"] == 18 def test_parse_old_greater_coverts_valid(self): assert parse_old_greater_coverts("0") == "0" @@ -172,12 +173,12 @@ def test_parse_old_greater_coverts_invalid(self): parse_old_greater_coverts("B") def test_parse_direction_allows_degrees(self): - assert parse_direction("0") == "0" - assert parse_direction("359") == "359" + assert parse_direction("0") == 0 + assert parse_direction("359") == 359 def test_parse_direction_allows_hyphens(self): - assert parse_direction("---") == "---" - assert parse_direction("-") == "-" + assert parse_direction("---") is None + assert parse_direction("-") is None def test_parse_direction_rejects_out_of_range(self): with pytest.raises(EuringConstraintException): diff --git a/tests/test_record.py b/tests/test_record.py index a6c93a3..45c4e82 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -16,10 +16,10 @@ def _values_from_record(record: str) -> dict[str, str]: decoded = EuringRecord.decode(record) values: dict[str, str] = {} for key, field in decoded.fields.items(): - value = field.get("value") - if value is None: + raw_value = field.get("raw_value") + if raw_value is None: continue - values[key] = value + values[key] = raw_value return values