From af7e5910f0234f44a42f9f2bd90afff9c853f478 Mon Sep 17 00:00:00 2001 From: Dylan Verheul Date: Sat, 24 Jan 2026 11:10:47 +0100 Subject: [PATCH 01/12] Improve and document serialization --- docs/examples.rst | 6 +++ docs/python_reference.rst | 6 +++ src/euring/parsing.py | 4 +- src/euring/record.py | 109 +++++++++++++++++++++++++++++--------- src/euring/utils.py | 6 +++ tests/test_decoding.py | 2 +- tests/test_record.py | 21 ++++++-- 7 files changed, 123 insertions(+), 31 deletions(-) diff --git a/docs/examples.rst b/docs/examples.rst index 90ed4e6..ac6248a 100644 --- a/docs/examples.rst +++ b/docs/examples.rst @@ -25,6 +25,12 @@ If you want to allow missing optional values and keep placeholders, pass ``strict=False`` to the record. ``serialize()`` raises ``ValueError`` when a field fails validation. +Serialization always re-encodes from the current typed values. For EURING2000, +fixed-width output uses hyphens for empty values and zero-pads integers to the +declared length. For EURING2000+/EURING2020, empty values remain empty strings +except for fields that explicitly use hyphen placeholders (for example Elapsed +Time, Distance, and Direction). + Exporting records ----------------- diff --git a/docs/python_reference.rst b/docs/python_reference.rst index 96e49d3..fa5ce88 100644 --- a/docs/python_reference.rst +++ b/docs/python_reference.rst @@ -34,3 +34,9 @@ Build a EURING record: fails validation. Use ``EuringRecord("euring2000plus", strict=False)`` to allow missing optional values and keep placeholders in the output. Use ``export()`` to convert to other EURING string formats. + +Serialization always re-encodes from the current values. For EURING2000, +fixed-width output uses hyphens for empty values and zero-pads integers to the +declared length. For EURING2000+/EURING2020, empty values remain empty strings +except for fields that explicitly use hyphen placeholders (for example Elapsed +Time, Distance, and Direction). diff --git a/src/euring/parsing.py b/src/euring/parsing.py index ba67e87..6b81255 100644 --- a/src/euring/parsing.py +++ b/src/euring/parsing.py @@ -30,11 +30,11 @@ def euring_decode_value( parsed = field.parse(value) if parsed is None: return None - results: dict[str, Any] = {"raw_value": value, "value": parsed} + results: dict[str, Any] = {"source_raw_value": value, "value": parsed} if parser: results["parsed_value"] = parsed description_value = parsed - if lookup and not parser and value != "": + if lookup and not parser and value != "" and parsed is not None: description_value = value results["description"] = field.describe(description_value) return results diff --git a/src/euring/record.py b/src/euring/record.py index 4d9f859..4c0b957 100644 --- a/src/euring/record.py +++ b/src/euring/record.py @@ -3,7 +3,7 @@ import json import warnings -from .exceptions import EuringConstraintException, EuringException +from .exceptions import EuringConstraintException, EuringException, EuringTypeException from .field_schema import coerce_field from .fields import EURING_FIELDS from .formats import ( @@ -17,6 +17,7 @@ ) from .rules import record_rule_errors, requires_euring2020 from .utils import euring_lat_to_dms, euring_lng_to_dms +from .types import TYPE_INTEGER, TYPE_NUMERIC, TYPE_NUMERIC_SIGNED class EuringRecord: @@ -52,10 +53,8 @@ def set(self, key: str, value: object) -> EuringRecord: field = _FIELD_MAP.get(key) if field is None: raise ValueError(f'Unknown field key "{key}".') - raw_value = "" if value is None else str(value) self._fields[key] = { "name": field["name"], - "raw_value": raw_value, "value": value, "order": field["order"], } @@ -66,11 +65,10 @@ def _set_raw_value(self, key: str, value: object) -> None: field = _FIELD_MAP.get(key) if field is None: return - raw_value = "" if value is None else f"{value}" self._fields[key] = { "name": field["name"], - "raw_value": raw_value, - "value": raw_value, + "source_raw_value": "" if value is None else f"{value}", + "value": "" if value is None else f"{value}", "order": field["order"], } @@ -136,21 +134,36 @@ def _validate_fields(self) -> list[dict[str, object]]: errors: list[dict[str, object]] = [] fields = _fields_for_format(self.format) positions = _field_positions(fields) if self.format == FORMAT_EURING2000 else {} + needs_geo_dots = False + if self.format == FORMAT_EURING2020: + lat_value = self._fields.get("latitude", {}).get("value") + lng_value = self._fields.get("longitude", {}).get("value") + needs_geo_dots = lat_value not in (None, "") or lng_value not in (None, "") variable_length_keys = {"distance", "direction", "elapsed_time"} for index, field in enumerate(fields): key = field["key"] field_state = self._fields.get(key, {}) - raw_value = field_state.get("raw_value", field_state.get("value", "")) - raw_value = "" if raw_value is None else raw_value + value = field_state.get("value", "") + had_empty_value = value in (None, "") try: field_def = field if self.format != FORMAT_EURING2000 and key in variable_length_keys and field.get("length"): field_def = {**field, "max_length": field["length"]} field_def.pop("length", None) field_obj = coerce_field(field_def) + raw_value = _serialize_field_value(field, value, self.format) + if key == "geographical_coordinates" and had_empty_value and needs_geo_dots: + raw_value = "." * 15 parsed_value = field_obj.parse(raw_value) + if had_empty_value and raw_value: + parsed_value = None description_value = parsed_value - if field_obj.get("lookup") is not None and field_obj.get("parser") is None and raw_value != "": + if ( + field_obj.get("lookup") is not None + and field_obj.get("parser") is None + and raw_value != "" + and parsed_value is not None + ): description_value = raw_value description = field_obj.describe(description_value) if key in self._fields: @@ -163,7 +176,7 @@ def _validate_fields(self) -> list[dict[str, object]]: payload = { "field": field["name"], "message": f"{exc}", - "value": "" if raw_value is None else f"{raw_value}", + "value": "" if value is None else f"{value}", "key": key, "index": index, } @@ -186,7 +199,24 @@ def _has_non_optional_errors(self, errors: dict[str, list]) -> bool: def _validate_record_rules(self) -> list[dict[str, object]]: """Validate multi-field and record-level rules.""" - values_by_key = {key: field.get("raw_value", field.get("value", "")) for key, field in self._fields.items()} + values_by_key: dict[str, str] = {} + for field in _fields_for_format(self.format): + key = field["key"] + field_state = self._fields.get(key, {}) + source_raw = field_state.get("source_raw_value") + if source_raw is not None: + values_by_key[key] = source_raw + continue + value = field_state.get("value", "") + try: + values_by_key[key] = _serialize_field_value(field, value, self.format) + except EuringException: + values_by_key[key] = "" + if self.format == FORMAT_EURING2020: + lat_value = values_by_key.get("latitude", "") + lng_value = values_by_key.get("longitude", "") + if (lat_value or lng_value) and not values_by_key.get("geographical_coordinates"): + values_by_key["geographical_coordinates"] = "." * 15 errors: list[dict[str, object]] = [] for error in record_rule_errors(self.format, values_by_key): errors.append(_record_error_for_key(error["key"], error["message"], value=error["value"])) @@ -205,22 +235,19 @@ def _serialize(self) -> str: """Serialize current field values without strict completeness checks.""" fields = _fields_for_format(self.format) values_by_key: dict[str, str] = {} - hyphen_required_keys = {"distance", "direction", "elapsed_time"} + geo_placeholder = None + if self.format == FORMAT_EURING2020: + lat_value = self._fields.get("latitude", {}).get("value") + lng_value = self._fields.get("longitude", {}).get("value") + if lat_value not in (None, "") or lng_value not in (None, ""): + geo_placeholder = "." * 15 for field in fields: key = field["key"] - raw_value = self._fields.get(key, {}).get("raw_value") value = self._fields.get(key, {}).get("value") - if raw_value is None: - raw_value = self._fields.get(key, {}).get("value", "") - raw_value = "" if raw_value is None else f"{raw_value}" - if self.format == FORMAT_EURING2000 and (value is None or value == ""): - raw_value = "" - if self.format in {FORMAT_EURING2000PLUS, FORMAT_EURING2020} and key in hyphen_required_keys: - if value is None or value == "": - length = field.get("length") or field.get("max_length") - if length: - raw_value = "-" * int(length) - values_by_key[key] = raw_value + if key == "geographical_coordinates" and value in (None, "") and geo_placeholder: + values_by_key[key] = geo_placeholder + continue + values_by_key[key] = _serialize_field_value(field, value, self.format) if self.format == FORMAT_EURING2000: return _format_fixed_width(values_by_key, _fixed_width_fields()) return "|".join(values_by_key.get(field["key"], "") for field in fields) @@ -268,6 +295,40 @@ def _format_fixed_width(values_by_key: dict[str, str], fields: list[dict[str, ob return "".join(parts) +def _serialize_field_value(field: dict[str, object], value: object, format: str) -> str: + """Encode a typed field value into a EURING raw string.""" + key = field["key"] + length = field.get("length") or field.get("max_length") + variable_length_keys = {"distance", "direction", "elapsed_time"} + if value in (None, ""): + type_name = field.get("type") or field.get("type_name") or "" + if type_name == TYPE_INTEGER and length: + return "-" * int(length) + if format == FORMAT_EURING2000 and length: + return "-" * int(length) + if format in {FORMAT_EURING2000PLUS, FORMAT_EURING2020} and key in variable_length_keys and length: + return "-" * int(length) + return "" + if key == "geographical_coordinates" and isinstance(value, dict): + if "lat" not in value or "lng" not in value: + raise EuringConstraintException("Geographical coordinates require both lat and lng values.") + return f"{euring_lat_to_dms(float(value['lat']))}{euring_lng_to_dms(float(value['lng']))}" + value_str = f"{value}" + type_name = field.get("type") or field.get("type_name") or "" + if type_name == TYPE_INTEGER: + if isinstance(value, str) and value and set(value) == {"-"}: + return _serialize_field_value(field, None, format) + if not value_str.isdigit(): + raise EuringTypeException(f'Value "{value}" is not valid for type {TYPE_INTEGER}.') + is_variable = format != FORMAT_EURING2000 and key in variable_length_keys + if length and not is_variable: + value_str = value_str.zfill(int(length)) + return value_str + if type_name in {TYPE_NUMERIC, TYPE_NUMERIC_SIGNED}: + return value_str + return value_str + + def _convert_record_string( value: str, *, diff --git a/src/euring/utils.py b/src/euring/utils.py index f0627ba..269e3f6 100644 --- a/src/euring/utils.py +++ b/src/euring/utils.py @@ -52,6 +52,12 @@ def euring_float_to_dms(value: float, round_seconds: bool = False) -> dict[str, quadrant = "+" # includes 0 if round_seconds: seconds = int(round(seconds)) + if seconds == 60: + seconds = 0 + minutes += 1 + if minutes == 60: + minutes = 0 + degrees = degrees + 1 if degrees >= 0 else degrees - 1 return {"quadrant": quadrant, "degrees": degrees, "minutes": minutes, "seconds": seconds} diff --git a/tests/test_decoding.py b/tests/test_decoding.py index ee04a7b..3f28118 100644 --- a/tests/test_decoding.py +++ b/tests/test_decoding.py @@ -107,7 +107,7 @@ def test_decode_euring2020_format_rejects_2000_plus(self): def test_decode_value_with_lookup(self): result = euring_decode_value("01012024", TYPE_INTEGER, length=8, lookup=lookup_date) - assert result["raw_value"] == "01012024" + assert result["source_raw_value"] == "01012024" assert result["value"] == 1012024 assert result["description"].isoformat() == "2024-01-01" diff --git a/tests/test_record.py b/tests/test_record.py index 45c4e82..ad3f0cd 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -16,7 +16,7 @@ def _values_from_record(record: str) -> dict[str, str]: decoded = EuringRecord.decode(record) values: dict[str, str] = {} for key, field in decoded.fields.items(): - raw_value = field.get("raw_value") + raw_value = field.get("source_raw_value") if raw_value is None: continue values[key] = raw_value @@ -34,7 +34,12 @@ def test_record_euring2000_round_trip(): values = _values_from_record(record_str) record = EuringRecord("euring2000") record.update(values) - assert record.serialize() == record_str + serialized = record.serialize() + assert len(serialized) == 94 + decoded = EuringRecord.decode(serialized) + assert decoded.display_format == "EURING2000" + assert not decoded.errors["record"] + assert not decoded.errors["fields"] def test_record_euring2000plus_round_trip(): @@ -48,7 +53,11 @@ def test_record_euring2000plus_round_trip(): values = _values_from_record(record_str) record = EuringRecord("euring2000plus") record.update(values) - assert record.serialize() == record_str + serialized = record.serialize() + decoded = EuringRecord.decode(serialized) + assert decoded.display_format == "EURING2000+" + assert not decoded.errors["record"] + assert not decoded.errors["fields"] def test_record_euring2020_round_trip(): @@ -62,7 +71,11 @@ def test_record_euring2020_round_trip(): values = _values_from_record(record_str) record = EuringRecord("euring2020") record.update(values) - assert record.serialize() == record_str + serialized = record.serialize() + decoded = EuringRecord.decode(serialized) + assert decoded.display_format == "EURING2020" + assert not decoded.errors["record"] + assert not decoded.errors["fields"] def test_record_missing_required_field_raises(): From 9ca550bef120c8b6aba98923b9c612a42929edcb Mon Sep 17 00:00:00 2001 From: Dylan Verheul Date: Sat, 24 Jan 2026 11:24:40 +0100 Subject: [PATCH 02/12] wip --- CHANGELOG.md | 1 + src/euring/record.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d1cfb5c..ae6ed0e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## Unreleased +- Improve and document serialization (#100). - Add stricter Place Code validation (#99). - Parse raw values to values for all fields (#98). - Add constraints and tests for Direction (#97). diff --git a/src/euring/record.py b/src/euring/record.py index 4c0b957..62d2d7e 100644 --- a/src/euring/record.py +++ b/src/euring/record.py @@ -16,8 +16,8 @@ unknown_format_error, ) from .rules import record_rule_errors, requires_euring2020 -from .utils import euring_lat_to_dms, euring_lng_to_dms from .types import TYPE_INTEGER, TYPE_NUMERIC, TYPE_NUMERIC_SIGNED +from .utils import euring_lat_to_dms, euring_lng_to_dms class EuringRecord: From 5766fd55e01f1dae617139305bc130a0608ffbcc Mon Sep 17 00:00:00 2001 From: Dylan Verheul Date: Sat, 24 Jan 2026 11:26:36 +0100 Subject: [PATCH 03/12] Use raw_value --- src/euring/parsing.py | 2 +- src/euring/record.py | 4 ++-- tests/test_decoding.py | 2 +- tests/test_record.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/euring/parsing.py b/src/euring/parsing.py index 6b81255..518191b 100644 --- a/src/euring/parsing.py +++ b/src/euring/parsing.py @@ -30,7 +30,7 @@ def euring_decode_value( parsed = field.parse(value) if parsed is None: return None - results: dict[str, Any] = {"source_raw_value": value, "value": parsed} + results: dict[str, Any] = {"raw_value": value, "value": parsed} if parser: results["parsed_value"] = parsed description_value = parsed diff --git a/src/euring/record.py b/src/euring/record.py index 62d2d7e..499716b 100644 --- a/src/euring/record.py +++ b/src/euring/record.py @@ -67,7 +67,7 @@ def _set_raw_value(self, key: str, value: object) -> None: return self._fields[key] = { "name": field["name"], - "source_raw_value": "" if value is None else f"{value}", + "raw_value": "" if value is None else f"{value}", "value": "" if value is None else f"{value}", "order": field["order"], } @@ -203,7 +203,7 @@ def _validate_record_rules(self) -> list[dict[str, object]]: for field in _fields_for_format(self.format): key = field["key"] field_state = self._fields.get(key, {}) - source_raw = field_state.get("source_raw_value") + source_raw = field_state.get("raw_value") if source_raw is not None: values_by_key[key] = source_raw continue diff --git a/tests/test_decoding.py b/tests/test_decoding.py index 3f28118..ee04a7b 100644 --- a/tests/test_decoding.py +++ b/tests/test_decoding.py @@ -107,7 +107,7 @@ def test_decode_euring2020_format_rejects_2000_plus(self): def test_decode_value_with_lookup(self): result = euring_decode_value("01012024", TYPE_INTEGER, length=8, lookup=lookup_date) - assert result["source_raw_value"] == "01012024" + assert result["raw_value"] == "01012024" assert result["value"] == 1012024 assert result["description"].isoformat() == "2024-01-01" diff --git a/tests/test_record.py b/tests/test_record.py index ad3f0cd..b6f2c49 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -16,7 +16,7 @@ def _values_from_record(record: str) -> dict[str, str]: decoded = EuringRecord.decode(record) values: dict[str, str] = {} for key, field in decoded.fields.items(): - raw_value = field.get("source_raw_value") + raw_value = field.get("raw_value") if raw_value is None: continue values[key] = raw_value From 7310e4b5c618354dbc877c94b4245f3aa6c82f2b Mon Sep 17 00:00:00 2001 From: Dylan Verheul Date: Sat, 24 Jan 2026 13:27:59 +0100 Subject: [PATCH 04/12] wip --- docs/examples.rst | 3 +- docs/python_reference.rst | 3 +- src/euring/field_schema.py | 56 ++++++++++++++++++------------------ src/euring/fields.py | 22 ++++++++++++-- src/euring/parsing.py | 8 ++---- src/euring/record.py | 59 +++++++++++++++++++++++++------------- src/euring/rules.py | 2 +- tests/test_decoding.py | 6 ++-- tests/test_fields.py | 13 ++++----- tests/test_record.py | 1 + 10 files changed, 105 insertions(+), 68 deletions(-) diff --git a/docs/examples.rst b/docs/examples.rst index ac6248a..01c25cf 100644 --- a/docs/examples.rst +++ b/docs/examples.rst @@ -29,7 +29,8 @@ Serialization always re-encodes from the current typed values. For EURING2000, fixed-width output uses hyphens for empty values and zero-pads integers to the declared length. For EURING2000+/EURING2020, empty values remain empty strings except for fields that explicitly use hyphen placeholders (for example Elapsed -Time, Distance, and Direction). +Time, Distance, and Direction); these placeholders are defined per-field via +the ``empty_value`` schema attribute. Exporting records ----------------- diff --git a/docs/python_reference.rst b/docs/python_reference.rst index fa5ce88..aafbf25 100644 --- a/docs/python_reference.rst +++ b/docs/python_reference.rst @@ -39,4 +39,5 @@ Serialization always re-encodes from the current values. For EURING2000, fixed-width output uses hyphens for empty values and zero-pads integers to the declared length. For EURING2000+/EURING2020, empty values remain empty strings except for fields that explicitly use hyphen placeholders (for example Elapsed -Time, Distance, and Direction). +Time, Distance, and Direction); these placeholders are defined per-field via +the ``empty_value`` schema attribute. diff --git a/src/euring/field_schema.py b/src/euring/field_schema.py index 20d86c1..40efcf1 100644 --- a/src/euring/field_schema.py +++ b/src/euring/field_schema.py @@ -33,22 +33,22 @@ class EuringField(Mapping[str, Any]): type_name: str = "" required: bool = True length: int | None = None - min_length: int | None = None - max_length: int | None = None + variable_length: bool = False + empty_value: str | None = None def _mapping(self) -> dict[str, Any]: mapping: dict[str, Any] = { "key": self.key, "name": self.name, - "type": self.type_name, + "type_name": self.type_name, "required": self.required, } if self.length is not None: mapping["length"] = self.length - if self.min_length is not None: - mapping["min_length"] = self.min_length - if self.max_length is not None: - mapping["max_length"] = self.max_length + if self.variable_length: + mapping["variable_length"] = True + if self.empty_value is not None: + mapping["empty_value"] = self.empty_value return mapping def __getitem__(self, key: str) -> Any: @@ -61,22 +61,20 @@ def __len__(self) -> int: return len(self._mapping()) def _is_required(self) -> bool: - if self.min_length == 0: - return False return self.required def _validate_length(self, raw: str) -> None: value_length = len(raw) - if self.length is not None and value_length != self.length: - raise EuringConstraintException(f'Value "{raw}" is length {value_length} instead of {self.length}.') - if self.min_length is not None and value_length < self.min_length: - raise EuringConstraintException( - f'Value "{raw}" is length {value_length}, should be at least {self.min_length}.' - ) - if self.max_length is not None and value_length > self.max_length: - raise EuringConstraintException( - f'Value "{raw}" is length {value_length}, should be at most {self.max_length}.' - ) + if self.length is not None: + if self.variable_length: + if value_length > self.length: + raise EuringConstraintException( + f'Value "{raw}" is length {value_length}, should be at most {self.length}.' + ) + elif value_length != self.length: + raise EuringConstraintException(f'Value "{raw}" is length {value_length} instead of {self.length}.') + if self.length is None and self.variable_length: + raise EuringConstraintException("Variable-length fields require a length limit.") def _validate_raw(self, raw: str) -> str | None: if raw == "": @@ -182,11 +180,13 @@ def coerce_field(definition: Mapping[str, Any]) -> EuringField: return definition key = definition.get("key", "") name = definition.get("name", key) - type_name = definition.get("type") or definition.get("type_name") or "" + if "type" in definition and "type_name" not in definition: + raise ValueError('Field definitions must use "type_name" instead of legacy "type".') + type_name = definition.get("type_name") or "" required = definition.get("required", True) length = definition.get("length") - min_length = definition.get("min_length") - max_length = definition.get("max_length") + variable_length = bool(definition.get("variable_length", False)) + empty_value = definition.get("empty_value") parser = definition.get("parser") lookup = definition.get("lookup") if parser is not None: @@ -196,8 +196,8 @@ def coerce_field(definition: Mapping[str, Any]) -> EuringField: type_name=type_name, required=required, length=length, - min_length=min_length, - max_length=max_length, + variable_length=variable_length, + empty_value=empty_value, parser=parser, lookup=lookup, ) @@ -208,8 +208,8 @@ def coerce_field(definition: Mapping[str, Any]) -> EuringField: type_name=type_name, required=required, length=length, - min_length=min_length, - max_length=max_length, + variable_length=variable_length, + empty_value=empty_value, lookup=lookup, ) return EuringField( @@ -218,6 +218,6 @@ def coerce_field(definition: Mapping[str, Any]) -> EuringField: type_name=type_name, required=required, length=length, - min_length=min_length, - max_length=max_length, + variable_length=variable_length, + empty_value=empty_value, ) diff --git a/src/euring/fields.py b/src/euring/fields.py index a8cb96d..640055f 100644 --- a/src/euring/fields.py +++ b/src/euring/fields.py @@ -220,13 +220,24 @@ key="distance", type_name=TYPE_INTEGER, length=5, + variable_length=True, + empty_value="-----", + ), + EuringFormattedField( + name="Direction", + key="direction", + type_name=TYPE_INTEGER, + length=3, + empty_value="---", + parser=parse_direction, ), - EuringFormattedField(name="Direction", key="direction", type_name=TYPE_INTEGER, length=3, parser=parse_direction), EuringField( name="Elapsed Time", key="elapsed_time", type_name=TYPE_INTEGER, length=5, + variable_length=True, + empty_value="-----", ), # Starting with Wing Length, fields are no longer required. Source: EURING Exchange Code 2020 v202 (13 Nov 2024). EuringField(name="Wing Length", key="wing_length", type_name=TYPE_NUMERIC, required=False), @@ -303,7 +314,14 @@ required=False, lookup=LOOKUP_BROOD_PATCH, ), - EuringField(name="Primary Score", key="primary_score", type_name=TYPE_INTEGER, max_length=2, required=False), + EuringField( + name="Primary Score", + key="primary_score", + type_name=TYPE_INTEGER, + length=2, + variable_length=True, + required=False, + ), EuringField(name="Primary Moult", key="primary_moult", type_name=TYPE_ALPHANUMERIC, length=10, required=False), EuringFormattedField( name="Old Greater Coverts", diff --git a/src/euring/parsing.py b/src/euring/parsing.py index 518191b..6444e0e 100644 --- a/src/euring/parsing.py +++ b/src/euring/parsing.py @@ -9,8 +9,7 @@ def euring_decode_value( type: str, required: bool = True, length: int | None = None, - min_length: int | None = None, - max_length: int | None = None, + variable_length: bool = False, parser: Callable[[str], Any] | None = None, lookup: Mapping[str, str] | Callable[[str], str] | None = None, ) -> dict[str, Any] | None: @@ -18,11 +17,10 @@ def euring_decode_value( definition = { "name": "Value", "key": "value", - "type": type, + "type_name": type, "required": required, "length": length, - "min_length": min_length, - "max_length": max_length, + "variable_length": variable_length, "parser": parser, "lookup": lookup, } diff --git a/src/euring/record.py b/src/euring/record.py index 499716b..e5fdd09 100644 --- a/src/euring/record.py +++ b/src/euring/record.py @@ -139,7 +139,6 @@ def _validate_fields(self) -> list[dict[str, object]]: lat_value = self._fields.get("latitude", {}).get("value") lng_value = self._fields.get("longitude", {}).get("value") needs_geo_dots = lat_value not in (None, "") or lng_value not in (None, "") - variable_length_keys = {"distance", "direction", "elapsed_time"} for index, field in enumerate(fields): key = field["key"] field_state = self._fields.get(key, {}) @@ -147,9 +146,8 @@ def _validate_fields(self) -> list[dict[str, object]]: had_empty_value = value in (None, "") try: field_def = field - if self.format != FORMAT_EURING2000 and key in variable_length_keys and field.get("length"): - field_def = {**field, "max_length": field["length"]} - field_def.pop("length", None) + if self.format == FORMAT_EURING2000 and field.get("variable_length"): + field_def = {**field, "variable_length": False} field_obj = coerce_field(field_def) raw_value = _serialize_field_value(field, value, self.format) if key == "geographical_coordinates" and had_empty_value and needs_geo_dots: @@ -271,7 +269,7 @@ def _fixed_width_fields() -> list[dict[str, object]]: for field in EURING_FIELDS: if start >= 94: break - length = field.get("length", field.get("max_length")) + length = field.get("length") if not length: break fields.append({**field, "length": length}) @@ -295,37 +293,58 @@ def _format_fixed_width(values_by_key: dict[str, str], fields: list[dict[str, ob return "".join(parts) +def _is_empty(value: object) -> bool: + """Return whether a value should be treated as empty.""" + return value in (None, "") + + +def _hyphens(length: int) -> str: + """Return a hyphen placeholder string of the given length.""" + return "-" * length + + def _serialize_field_value(field: dict[str, object], value: object, format: str) -> str: """Encode a typed field value into a EURING raw string.""" key = field["key"] - length = field.get("length") or field.get("max_length") - variable_length_keys = {"distance", "direction", "elapsed_time"} - if value in (None, ""): - type_name = field.get("type") or field.get("type_name") or "" - if type_name == TYPE_INTEGER and length: - return "-" * int(length) - if format == FORMAT_EURING2000 and length: - return "-" * int(length) - if format in {FORMAT_EURING2000PLUS, FORMAT_EURING2020} and key in variable_length_keys and length: - return "-" * int(length) + length = field.get("length") + length = 0 if length is None else int(length) + + # Empty fields + if _is_empty(value): + empty_value = field.get("empty_value") + if empty_value: + return f"{empty_value}" + if length: + if format == FORMAT_EURING2000: + return _hyphens(length) + if field.get("required", True) and field.get("type_name") == TYPE_INTEGER: + return _hyphens(length) return "" + + # Special case: geographical_coordinates if key == "geographical_coordinates" and isinstance(value, dict): if "lat" not in value or "lng" not in value: raise EuringConstraintException("Geographical coordinates require both lat and lng values.") return f"{euring_lat_to_dms(float(value['lat']))}{euring_lng_to_dms(float(value['lng']))}" + + # Non-empty fields value_str = f"{value}" - type_name = field.get("type") or field.get("type_name") or "" + type_name = field.get("type_name") or "" + if type_name in {TYPE_NUMERIC, TYPE_NUMERIC_SIGNED}: + # Remove zeroes on the right, remove decimal separator if no decimals + if "." in value_str: + value_str = value_str.rstrip("0").rstrip(".") if type_name == TYPE_INTEGER: if isinstance(value, str) and value and set(value) == {"-"}: return _serialize_field_value(field, None, format) if not value_str.isdigit(): raise EuringTypeException(f'Value "{value}" is not valid for type {TYPE_INTEGER}.') - is_variable = format != FORMAT_EURING2000 and key in variable_length_keys - if length and not is_variable: - value_str = value_str.zfill(int(length)) + if length and (format == FORMAT_EURING2000 or not field.get("variable_length", False)): + value_str = value_str.zfill(length) return value_str if type_name in {TYPE_NUMERIC, TYPE_NUMERIC_SIGNED}: - return value_str + if format == FORMAT_EURING2000 or (length and not field.get("variable_length", False)): + value_str = value_str.zfill(length) return value_str diff --git a/src/euring/rules.py b/src/euring/rules.py index b051d83..aed93d5 100644 --- a/src/euring/rules.py +++ b/src/euring/rules.py @@ -12,7 +12,7 @@ for _field in EURING_FIELDS: if _start >= 94: break - _length = _field.get("length", _field.get("max_length")) + _length = _field.get("length") if not _length: break _fixed_width_keys.append(_field["key"]) diff --git a/tests/test_decoding.py b/tests/test_decoding.py index ee04a7b..535b697 100644 --- a/tests/test_decoding.py +++ b/tests/test_decoding.py @@ -116,7 +116,7 @@ def test_decode_value_invalid_type(self): euring_decode_value("ABC", TYPE_INTEGER, length=3) def test_decode_value_optional_empty(self): - result = euring_decode_value("", TYPE_INTEGER, min_length=0) + result = euring_decode_value("", TYPE_INTEGER, required=False) assert result is None def test_decode_value_length_mismatch(self): @@ -125,11 +125,11 @@ def test_decode_value_length_mismatch(self): def test_decode_value_min_length_error(self): with pytest.raises(EuringConstraintException): - euring_decode_value("1", TYPE_INTEGER, min_length=2) + euring_decode_value("1", TYPE_INTEGER, variable_length=True) def test_decode_value_max_length_error(self): with pytest.raises(EuringConstraintException): - euring_decode_value("123", TYPE_INTEGER, max_length=2) + euring_decode_value("123", TYPE_INTEGER, length=2, variable_length=True) def test_decode_value_with_parser(self): result = euring_decode_value( diff --git a/tests/test_fields.py b/tests/test_fields.py index 033f818..5456b41 100644 --- a/tests/test_fields.py +++ b/tests/test_fields.py @@ -35,21 +35,21 @@ def test_field_shape_and_types(): for field in EURING_FIELDS: assert field["name"] assert field["key"] - assert field["type"] in allowed_types + assert field["type_name"] in allowed_types assert re.match(r"^[a-z0-9_]+$", field["key"]) is not None if "length" in field: assert isinstance(field["length"], int) assert field["length"] > 0 - for bound in ("min_length", "max_length"): - if bound in field: - assert isinstance(field[bound], int) - assert field[bound] >= 0 + if "variable_length" in field: + assert isinstance(field["variable_length"], bool) if "required" in field: assert isinstance(field["required"], bool) def test_field_length_exclusivity(): for field in EURING_FIELDS: + if field.get("variable_length"): + assert "length" in field if "length" in field: assert "min_length" not in field assert "max_length" not in field @@ -57,5 +57,4 @@ def test_field_length_exclusivity(): def test_field_min_length_zero_not_required(): for field in EURING_FIELDS: - if field.get("min_length") == 0: - assert field.get("required", True) is False + assert field.get("min_length") is None diff --git a/tests/test_record.py b/tests/test_record.py index b6f2c49..bb3a52a 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -35,6 +35,7 @@ def test_record_euring2000_round_trip(): record = EuringRecord("euring2000") record.update(values) serialized = record.serialize() + assert len(serialized) == 94 decoded = EuringRecord.decode(serialized) assert decoded.display_format == "EURING2000" From a961b38b621086baf471554a7deafe4893c8a347 Mon Sep 17 00:00:00 2001 From: Dylan Verheul Date: Sat, 24 Jan 2026 13:37:47 +0100 Subject: [PATCH 05/12] wip --- src/euring/record.py | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/src/euring/record.py b/src/euring/record.py index e5fdd09..58b8ea5 100644 --- a/src/euring/record.py +++ b/src/euring/record.py @@ -3,7 +3,7 @@ import json import warnings -from .exceptions import EuringConstraintException, EuringException, EuringTypeException +from .exceptions import EuringConstraintException, EuringException from .field_schema import coerce_field from .fields import EURING_FIELDS from .formats import ( @@ -242,7 +242,7 @@ def _serialize(self) -> str: for field in fields: key = field["key"] value = self._fields.get(key, {}).get("value") - if key == "geographical_coordinates" and value in (None, "") and geo_placeholder: + if key == "geographical_coordinates" and _is_empty(value) and geo_placeholder: values_by_key[key] = geo_placeholder continue values_by_key[key] = _serialize_field_value(field, value, self.format) @@ -308,6 +308,19 @@ def _serialize_field_value(field: dict[str, object], value: object, format: str) key = field["key"] length = field.get("length") length = 0 if length is None else int(length) + type_name = field.get("type_name") or "" + pad_integer = ( + type_name == TYPE_INTEGER + and length + and (format == FORMAT_EURING2000 or not field.get("variable_length", False)) + ) + pad_numeric = type_name in {TYPE_NUMERIC, TYPE_NUMERIC_SIGNED} and length and format == FORMAT_EURING2000 + field_def = field + if format == FORMAT_EURING2000 and field.get("variable_length"): + field_def = {**field, "variable_length": False} + if pad_integer or pad_numeric: + field_def = {**field_def, "variable_length": True} + field_obj = coerce_field(field_def) # Empty fields if _is_empty(value): @@ -328,23 +341,20 @@ def _serialize_field_value(field: dict[str, object], value: object, format: str) return f"{euring_lat_to_dms(float(value['lat']))}{euring_lng_to_dms(float(value['lng']))}" # Non-empty fields - value_str = f"{value}" - type_name = field.get("type_name") or "" + if type_name == TYPE_INTEGER and isinstance(value, str) and value and set(value) == {"-"}: + return _serialize_field_value(field, None, format) + value_str = field_obj.encode(value) if type_name in {TYPE_NUMERIC, TYPE_NUMERIC_SIGNED}: # Remove zeroes on the right, remove decimal separator if no decimals if "." in value_str: value_str = value_str.rstrip("0").rstrip(".") - if type_name == TYPE_INTEGER: - if isinstance(value, str) and value and set(value) == {"-"}: - return _serialize_field_value(field, None, format) - if not value_str.isdigit(): - raise EuringTypeException(f'Value "{value}" is not valid for type {TYPE_INTEGER}.') - if length and (format == FORMAT_EURING2000 or not field.get("variable_length", False)): + if pad_numeric: value_str = value_str.zfill(length) return value_str - if type_name in {TYPE_NUMERIC, TYPE_NUMERIC_SIGNED}: - if format == FORMAT_EURING2000 or (length and not field.get("variable_length", False)): + if type_name == TYPE_INTEGER: + if pad_integer: value_str = value_str.zfill(length) + return value_str return value_str From 646b2d9ee35b6bdeb3faf6818f32ab30b2e16f94 Mon Sep 17 00:00:00 2001 From: Dylan Verheul Date: Sat, 24 Jan 2026 14:05:27 +0100 Subject: [PATCH 06/12] wip --- src/euring/field_schema.py | 27 +++++++++++++++++++-------- src/euring/fields.py | 22 ++++++++++++++++++++++ src/euring/record.py | 37 ++++++++++++++++++++++--------------- tests/test_record.py | 8 ++++---- 4 files changed, 67 insertions(+), 27 deletions(-) diff --git a/src/euring/field_schema.py b/src/euring/field_schema.py index 40efcf1..2af92ec 100644 --- a/src/euring/field_schema.py +++ b/src/euring/field_schema.py @@ -4,6 +4,8 @@ from dataclasses import dataclass from typing import Any +from euring.utils import euring_lat_to_dms, euring_lng_to_dms + from .codes import lookup_description from .exceptions import EuringConstraintException, EuringTypeException from .types import ( @@ -108,15 +110,24 @@ def parse(self, raw: str) -> Any | None: def encode(self, value: Any | None) -> str: """Encode a Python value to raw text.""" - if value is None or value == "": - if self._is_required(): - raise EuringConstraintException('Required field, empty value "" is not permitted.') + if value in (None, ""): return "" - raw = str(value) - self._validate_length(raw) - if self.type_name and not is_valid_type(raw, self.type_name): - raise EuringTypeException(f'Value "{raw}" is not valid for type {self.type_name}.') - return raw + + if self.key == "geographical_coordinates" and isinstance(value, dict): + if "lat" not in value or "lng" not in value: + raise EuringConstraintException("Geographical coordinates require both lat and lng values.") + return f"{euring_lat_to_dms(float(value['lat']))}{euring_lng_to_dms(float(value['lng']))}" + + str_value = f"{value}" + if self.type_name in {TYPE_NUMERIC, TYPE_NUMERIC_SIGNED}: + str_value = str_value.rstrip("0").rstrip(".") + if ( + self.type_name in {TYPE_INTEGER, TYPE_NUMERIC, TYPE_NUMERIC_SIGNED} + and self.length + and not self.variable_length + ): + str_value = str_value.zfill(self.length) + return str_value def describe(self, value: Any | None) -> Any | None: """Return a display description for a parsed value.""" diff --git a/src/euring/fields.py b/src/euring/fields.py index 640055f..e2d515f 100644 --- a/src/euring/fields.py +++ b/src/euring/fields.py @@ -372,3 +372,25 @@ ), EuringField(name="More Other Marks", key="more_other_marks", type_name=TYPE_ALPHABETIC, required=False), ] + +EURING2020_FIELDS = EURING_FIELDS + +_reference_index = None +for _index, _field in enumerate(EURING_FIELDS): + if _field.get("key") == "reference": + _reference_index = _index + break +EURING2000PLUS_FIELDS = EURING_FIELDS if _reference_index is None else EURING_FIELDS[: _reference_index + 1] + +EURING2000_FIELDS = [] +_start = 0 +for _field in EURING_FIELDS: + if _start >= 94: + break + _length = _field.get("length") + if not _length: + break + EURING2000_FIELDS.append(_field) + _start += _length + +del _field, _index, _length, _reference_index, _start diff --git a/src/euring/record.py b/src/euring/record.py index 58b8ea5..c03a254 100644 --- a/src/euring/record.py +++ b/src/euring/record.py @@ -5,7 +5,7 @@ from .exceptions import EuringConstraintException, EuringException from .field_schema import coerce_field -from .fields import EURING_FIELDS +from .fields import EURING2000_FIELDS, EURING2000PLUS_FIELDS, EURING2020_FIELDS from .formats import ( FORMAT_EURING2000, FORMAT_EURING2000PLUS, @@ -242,10 +242,19 @@ def _serialize(self) -> str: for field in fields: key = field["key"] value = self._fields.get(key, {}).get("value") - if key == "geographical_coordinates" and _is_empty(value) and geo_placeholder: - values_by_key[key] = geo_placeholder + if key == "geographical_coordinates": + if _is_empty(value) and geo_placeholder: + values_by_key[key] = geo_placeholder + continue + raw_value = _serialize_field_value(field, value, self.format) + if raw_value == "": + values_by_key[key] = raw_value continue - values_by_key[key] = _serialize_field_value(field, value, self.format) + field_def = field + if self.format == FORMAT_EURING2000 and field.get("variable_length"): + field_def = {**field, "variable_length": False} + field_obj = coerce_field(field_def) + values_by_key[key] = field_obj.encode(raw_value) if self.format == FORMAT_EURING2000: return _format_fixed_width(values_by_key, _fixed_width_fields()) return "|".join(values_by_key.get(field["key"], "") for field in fields) @@ -254,25 +263,23 @@ def _serialize(self) -> str: def _fields_for_format(format: str) -> list[dict[str, object]]: """Return the field list for the target format.""" if format == FORMAT_EURING2000: - return _fixed_width_fields() + return EURING2000_FIELDS if format == FORMAT_EURING2000PLUS: - for index, field in enumerate(EURING_FIELDS): - if field.get("key") == "reference": - return EURING_FIELDS[: index + 1] - return EURING_FIELDS + return EURING2000PLUS_FIELDS + return EURING2020_FIELDS def _fixed_width_fields() -> list[dict[str, object]]: """Return field definitions for the EURING2000 fixed-width layout.""" fields: list[dict[str, object]] = [] start = 0 - for field in EURING_FIELDS: + for field in EURING2000_FIELDS: if start >= 94: break length = field.get("length") if not length: break - fields.append({**field, "length": length}) + fields.append(field) start += length return fields @@ -541,7 +548,7 @@ def _normalize_source_format(source_format: str | None, value: str) -> str: def _field_index(key: str) -> int: """Return the field index for a given key.""" - for index, field in enumerate(EURING_FIELDS): + for index, field in enumerate(EURING2020_FIELDS): if field.get("key") == key: return index raise ValueError(f'Unknown field key "{key}".') @@ -589,16 +596,16 @@ def _decode_raw_record(value: object, format: str | None) -> tuple[str, dict[str ) current_format = normalized or FORMAT_EURING2000PLUS for index, raw_value in enumerate(fields): - if index >= len(EURING_FIELDS): + if index >= len(EURING2020_FIELDS): break - values_by_key[EURING_FIELDS[index]["key"]] = raw_value + values_by_key[EURING2020_FIELDS[index]["key"]] = raw_value if normalized is None and current_format in {FORMAT_EURING2000PLUS, FORMAT_EURING2020}: if requires_euring2020(values_by_key): current_format = FORMAT_EURING2020 return current_format, values_by_key, record_errors -_FIELD_MAP = {field["key"]: {**field, "order": index} for index, field in enumerate(EURING_FIELDS)} +_FIELD_MAP = {field["key"]: {**field, "order": index} for index, field in enumerate(EURING2020_FIELDS)} def _field_positions(fields: list[dict[str, object]]) -> dict[str, dict[str, int]]: diff --git a/tests/test_record.py b/tests/test_record.py index bb3a52a..b8c111d 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -247,14 +247,14 @@ def test_record_validate_without_record_uses_fixed_width(): def test_fields_for_format_euring2000plus_without_reference(monkeypatch): """EURING2000PLUS should return all fields when reference is missing.""" fields = [{"key": "alpha", "length": 1}, {"key": "beta", "length": 1}] - monkeypatch.setattr(record_module, "EURING_FIELDS", fields) + monkeypatch.setattr(record_module, "EURING2000PLUS_FIELDS", fields) assert _fields_for_format("euring2000plus") == fields def test_fixed_width_fields_breaks_on_missing_length(monkeypatch): """Fixed-width fields should stop when length metadata is missing.""" fields = [{"key": "alpha", "length": 1}, {"key": "beta"}] - monkeypatch.setattr(record_module, "EURING_FIELDS", fields) + monkeypatch.setattr(record_module, "EURING2000_FIELDS", fields) result = _fixed_width_fields() assert result == [{"key": "alpha", "length": 1}] @@ -262,7 +262,7 @@ def test_fixed_width_fields_breaks_on_missing_length(monkeypatch): def test_fixed_width_fields_breaks_at_cutoff(monkeypatch): """Fixed-width fields should stop once reaching 94 characters.""" fields = [{"key": "alpha", "length": 94}, {"key": "beta", "length": 1}] - monkeypatch.setattr(record_module, "EURING_FIELDS", fields) + monkeypatch.setattr(record_module, "EURING2000_FIELDS", fields) result = _fixed_width_fields() assert result == [{"key": "alpha", "length": 94}] @@ -270,6 +270,6 @@ def test_fixed_width_fields_breaks_at_cutoff(monkeypatch): def test_fixed_width_fields_complete_without_break(monkeypatch): """Fixed-width fields should include all fields when under the cutoff.""" fields = [{"key": "alpha", "length": 1}, {"key": "beta", "length": 2}] - monkeypatch.setattr(record_module, "EURING_FIELDS", fields) + monkeypatch.setattr(record_module, "EURING2000_FIELDS", fields) result = _fixed_width_fields() assert result == [{"key": "alpha", "length": 1}, {"key": "beta", "length": 2}] From 51e74a5eb1de2f72a8faf82110d9a0857b2783eb Mon Sep 17 00:00:00 2001 From: Dylan Verheul Date: Sat, 24 Jan 2026 14:11:47 +0100 Subject: [PATCH 07/12] Add tests --- src/euring/fields.py | 16 ++++++---------- tests/test_record.py | 21 +++++++++++++++++++++ 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/src/euring/fields.py b/src/euring/fields.py index e2d515f..2d9cdb9 100644 --- a/src/euring/fields.py +++ b/src/euring/fields.py @@ -382,15 +382,11 @@ break EURING2000PLUS_FIELDS = EURING_FIELDS if _reference_index is None else EURING_FIELDS[: _reference_index + 1] -EURING2000_FIELDS = [] -_start = 0 -for _field in EURING_FIELDS: - if _start >= 94: - break - _length = _field.get("length") - if not _length: +_elapsed_time_index = None +for _index, _field in enumerate(EURING_FIELDS): + if _field.get("key") == "elapsed_time": + _elapsed_time_index = _index break - EURING2000_FIELDS.append(_field) - _start += _length +EURING2000_FIELDS = EURING_FIELDS if _elapsed_time_index is None else EURING_FIELDS[: _elapsed_time_index + 1] -del _field, _index, _length, _reference_index, _start +del _field, _index, _elapsed_time_index, _reference_index diff --git a/tests/test_record.py b/tests/test_record.py index b8c111d..00560d5 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -6,6 +6,7 @@ import pytest +from euring.fields import EURING2000_FIELDS, EURING2000PLUS_FIELDS, EURING2020_FIELDS, EURING_FIELDS import euring.record as record_module from euring import EuringRecord from euring.formats import FORMAT_JSON @@ -273,3 +274,23 @@ def test_fixed_width_fields_complete_without_break(monkeypatch): monkeypatch.setattr(record_module, "EURING2000_FIELDS", fields) result = _fixed_width_fields() assert result == [{"key": "alpha", "length": 1}, {"key": "beta", "length": 2}] + + +def test_euring_fields(): + assert len(EURING_FIELDS) == 64 + + +def test_euring2020_fields(): + assert len(EURING2020_FIELDS) == len(EURING_FIELDS) + assert len(EURING2020_FIELDS) == 64 + assert EURING2020_FIELDS == EURING_FIELDS + + +def test_euring2000plus_fields(): + assert len(EURING2000PLUS_FIELDS) == 60 + assert EURING2000PLUS_FIELDS == EURING_FIELDS[:60] + + +def test_euring2000_fields(): + assert len(EURING2000_FIELDS) == 33 + assert EURING2000_FIELDS == EURING_FIELDS[:33] From 5d4a97bfe2ee57f56f7cdaa1b60e553442dbfd2c Mon Sep 17 00:00:00 2001 From: Dylan Verheul Date: Sat, 24 Jan 2026 14:14:30 +0100 Subject: [PATCH 08/12] Add tests --- src/euring/fields.py | 18 ++---------------- tests/test_fields.py | 22 +++++++++++++++++++++- tests/test_record.py | 21 --------------------- 3 files changed, 23 insertions(+), 38 deletions(-) diff --git a/src/euring/fields.py b/src/euring/fields.py index 2d9cdb9..52fead8 100644 --- a/src/euring/fields.py +++ b/src/euring/fields.py @@ -374,19 +374,5 @@ ] EURING2020_FIELDS = EURING_FIELDS - -_reference_index = None -for _index, _field in enumerate(EURING_FIELDS): - if _field.get("key") == "reference": - _reference_index = _index - break -EURING2000PLUS_FIELDS = EURING_FIELDS if _reference_index is None else EURING_FIELDS[: _reference_index + 1] - -_elapsed_time_index = None -for _index, _field in enumerate(EURING_FIELDS): - if _field.get("key") == "elapsed_time": - _elapsed_time_index = _index - break -EURING2000_FIELDS = EURING_FIELDS if _elapsed_time_index is None else EURING_FIELDS[: _elapsed_time_index + 1] - -del _field, _index, _elapsed_time_index, _reference_index +EURING2000PLUS_FIELDS = EURING_FIELDS[:60] +EURING2000_FIELDS = EURING_FIELDS[:33] diff --git a/tests/test_fields.py b/tests/test_fields.py index 5456b41..c556579 100644 --- a/tests/test_fields.py +++ b/tests/test_fields.py @@ -2,7 +2,7 @@ import re -from euring.fields import EURING_FIELDS +from euring.fields import EURING2000_FIELDS, EURING2000PLUS_FIELDS, EURING2020_FIELDS, EURING_FIELDS from euring.types import ( TYPE_ALPHABETIC, TYPE_ALPHANUMERIC, @@ -13,6 +13,26 @@ ) +def test_euring_fields(): + assert len(EURING_FIELDS) == 64 + + +def test_euring2020_fields(): + assert len(EURING2020_FIELDS) == len(EURING_FIELDS) + assert len(EURING2020_FIELDS) == 64 + assert EURING2020_FIELDS == EURING_FIELDS + + +def test_euring2000plus_fields(): + assert len(EURING2000PLUS_FIELDS) == 60 + assert EURING2000PLUS_FIELDS == EURING_FIELDS[:60] + + +def test_euring2000_fields(): + assert len(EURING2000_FIELDS) == 33 + assert EURING2000_FIELDS == EURING_FIELDS[:33] + + def test_field_uniqueness(): keys = [field["key"] for field in EURING_FIELDS] names = [field["name"] for field in EURING_FIELDS] diff --git a/tests/test_record.py b/tests/test_record.py index 00560d5..b8c111d 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -6,7 +6,6 @@ import pytest -from euring.fields import EURING2000_FIELDS, EURING2000PLUS_FIELDS, EURING2020_FIELDS, EURING_FIELDS import euring.record as record_module from euring import EuringRecord from euring.formats import FORMAT_JSON @@ -274,23 +273,3 @@ def test_fixed_width_fields_complete_without_break(monkeypatch): monkeypatch.setattr(record_module, "EURING2000_FIELDS", fields) result = _fixed_width_fields() assert result == [{"key": "alpha", "length": 1}, {"key": "beta", "length": 2}] - - -def test_euring_fields(): - assert len(EURING_FIELDS) == 64 - - -def test_euring2020_fields(): - assert len(EURING2020_FIELDS) == len(EURING_FIELDS) - assert len(EURING2020_FIELDS) == 64 - assert EURING2020_FIELDS == EURING_FIELDS - - -def test_euring2000plus_fields(): - assert len(EURING2000PLUS_FIELDS) == 60 - assert EURING2000PLUS_FIELDS == EURING_FIELDS[:60] - - -def test_euring2000_fields(): - assert len(EURING2000_FIELDS) == 33 - assert EURING2000_FIELDS == EURING_FIELDS[:33] From a4f61f3873bfc83bf3ddc67bf28039b799a590c5 Mon Sep 17 00:00:00 2001 From: Dylan Verheul Date: Sat, 24 Jan 2026 14:24:30 +0100 Subject: [PATCH 09/12] wip --- src/euring/fields.py | 3 ++- src/euring/record.py | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/euring/fields.py b/src/euring/fields.py index 52fead8..5cf0d82 100644 --- a/src/euring/fields.py +++ b/src/euring/fields.py @@ -373,6 +373,7 @@ EuringField(name="More Other Marks", key="more_other_marks", type_name=TYPE_ALPHABETIC, required=False), ] -EURING2020_FIELDS = EURING_FIELDS +# These are the field definitions per format as per the EURING Code Manual +EURING2020_FIELDS = EURING_FIELDS # 64 fields EURING2000PLUS_FIELDS = EURING_FIELDS[:60] EURING2000_FIELDS = EURING_FIELDS[:33] diff --git a/src/euring/record.py b/src/euring/record.py index c03a254..ad8ff4b 100644 --- a/src/euring/record.py +++ b/src/euring/record.py @@ -266,7 +266,9 @@ def _fields_for_format(format: str) -> list[dict[str, object]]: return EURING2000_FIELDS if format == FORMAT_EURING2000PLUS: return EURING2000PLUS_FIELDS - return EURING2020_FIELDS + if format == FORMAT_EURING2020: + return EURING2020_FIELDS + raise EuringException(f"Unknown EuringRecord format: {format}.") def _fixed_width_fields() -> list[dict[str, object]]: From 7b3c9ba47e1f3b7144f92773293222ffcd92592d Mon Sep 17 00:00:00 2001 From: Dylan Verheul Date: Sat, 24 Jan 2026 14:27:36 +0100 Subject: [PATCH 10/12] wip --- src/euring/rules.py | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/src/euring/rules.py b/src/euring/rules.py index aed93d5..9489dcb 100644 --- a/src/euring/rules.py +++ b/src/euring/rules.py @@ -2,32 +2,15 @@ from __future__ import annotations -from .fields import EURING_FIELDS +from .fields import EURING2000_FIELDS, EURING2000PLUS_FIELDS, EURING2020_FIELDS, EURING_FIELDS from .formats import FORMAT_EURING2000, FORMAT_EURING2000PLUS, FORMAT_EURING2020 _FIELD_NAME_BY_KEY = {field["key"]: field["name"] for field in EURING_FIELDS} -_fixed_width_keys: list[str] = [] -_start = 0 -for _field in EURING_FIELDS: - if _start >= 94: - break - _length = _field.get("length") - if not _length: - break - _fixed_width_keys.append(_field["key"]) - _start += _length - -_plus_keys: list[str] = [] -for _field in EURING_FIELDS: - _plus_keys.append(_field["key"]) - if _field.get("key") == "reference": - break - -EURING2000_KEYS = tuple(_fixed_width_keys) -EURING2000PLUS_KEYS = tuple(_plus_keys) -EURING2020_KEYS = tuple(field["key"] for field in EURING_FIELDS) -EURING2020_ONLY_KEYS = ("latitude", "longitude", "current_place_code", "more_other_marks") +EURING2000_KEYS = tuple(field["key"] for field in EURING2000_FIELDS) +EURING2000PLUS_KEYS = tuple(field["key"] for field in EURING2000PLUS_FIELDS) +EURING2020_KEYS = tuple(field["key"] for field in EURING2020_FIELDS) +EURING2020_ONLY_KEYS = tuple(set(EURING2020_KEYS).difference(EURING2000PLUS_KEYS)) NON_EURING2000_KEYS = tuple(set(EURING2000PLUS_KEYS + EURING2020_ONLY_KEYS).difference(EURING2000_KEYS)) From 7e7b525c7d7f81ca3db741f7fa2bffe478e8d639 Mon Sep 17 00:00:00 2001 From: Dylan Verheul Date: Sat, 24 Jan 2026 14:41:23 +0100 Subject: [PATCH 11/12] wip --- src/euring/field_schema.py | 5 +++++ src/euring/record.py | 43 ++++---------------------------------- tests/test_converters.py | 6 +++--- tests/test_record.py | 41 +----------------------------------- 4 files changed, 13 insertions(+), 82 deletions(-) diff --git a/src/euring/field_schema.py b/src/euring/field_schema.py index 2af92ec..7dc2225 100644 --- a/src/euring/field_schema.py +++ b/src/euring/field_schema.py @@ -111,6 +111,8 @@ def parse(self, raw: str) -> Any | None: def encode(self, value: Any | None) -> str: """Encode a Python value to raw text.""" if value in (None, ""): + if self._is_required(): + raise EuringConstraintException('Required field, empty value "" is not permitted.') return "" if self.key == "geographical_coordinates" and isinstance(value, dict): @@ -127,6 +129,9 @@ def encode(self, value: Any | None) -> str: and not self.variable_length ): str_value = str_value.zfill(self.length) + self._validate_length(str_value) + if self.type_name and not is_valid_type(str_value, self.type_name): + raise EuringTypeException(f'Value "{str_value}" is not valid for type {self.type_name}.') return str_value def describe(self, value: Any | None) -> Any | None: diff --git a/src/euring/record.py b/src/euring/record.py index ad8ff4b..dd95062 100644 --- a/src/euring/record.py +++ b/src/euring/record.py @@ -16,7 +16,7 @@ unknown_format_error, ) from .rules import record_rule_errors, requires_euring2020 -from .types import TYPE_INTEGER, TYPE_NUMERIC, TYPE_NUMERIC_SIGNED +from .types import TYPE_INTEGER from .utils import euring_lat_to_dms, euring_lng_to_dms @@ -246,15 +246,7 @@ def _serialize(self) -> str: if _is_empty(value) and geo_placeholder: values_by_key[key] = geo_placeholder continue - raw_value = _serialize_field_value(field, value, self.format) - if raw_value == "": - values_by_key[key] = raw_value - continue - field_def = field - if self.format == FORMAT_EURING2000 and field.get("variable_length"): - field_def = {**field, "variable_length": False} - field_obj = coerce_field(field_def) - values_by_key[key] = field_obj.encode(raw_value) + values_by_key[key] = _serialize_field_value(field, value, self.format) if self.format == FORMAT_EURING2000: return _format_fixed_width(values_by_key, _fixed_width_fields()) return "|".join(values_by_key.get(field["key"], "") for field in fields) @@ -314,21 +306,11 @@ def _hyphens(length: int) -> str: def _serialize_field_value(field: dict[str, object], value: object, format: str) -> str: """Encode a typed field value into a EURING raw string.""" - key = field["key"] length = field.get("length") length = 0 if length is None else int(length) - type_name = field.get("type_name") or "" - pad_integer = ( - type_name == TYPE_INTEGER - and length - and (format == FORMAT_EURING2000 or not field.get("variable_length", False)) - ) - pad_numeric = type_name in {TYPE_NUMERIC, TYPE_NUMERIC_SIGNED} and length and format == FORMAT_EURING2000 field_def = field if format == FORMAT_EURING2000 and field.get("variable_length"): field_def = {**field, "variable_length": False} - if pad_integer or pad_numeric: - field_def = {**field_def, "variable_length": True} field_obj = coerce_field(field_def) # Empty fields @@ -343,28 +325,11 @@ def _serialize_field_value(field: dict[str, object], value: object, format: str) return _hyphens(length) return "" - # Special case: geographical_coordinates - if key == "geographical_coordinates" and isinstance(value, dict): - if "lat" not in value or "lng" not in value: - raise EuringConstraintException("Geographical coordinates require both lat and lng values.") - return f"{euring_lat_to_dms(float(value['lat']))}{euring_lng_to_dms(float(value['lng']))}" - # Non-empty fields + type_name = field.get("type_name") or "" if type_name == TYPE_INTEGER and isinstance(value, str) and value and set(value) == {"-"}: return _serialize_field_value(field, None, format) - value_str = field_obj.encode(value) - if type_name in {TYPE_NUMERIC, TYPE_NUMERIC_SIGNED}: - # Remove zeroes on the right, remove decimal separator if no decimals - if "." in value_str: - value_str = value_str.rstrip("0").rstrip(".") - if pad_numeric: - value_str = value_str.zfill(length) - return value_str - if type_name == TYPE_INTEGER: - if pad_integer: - value_str = value_str.zfill(length) - return value_str - return value_str + return field_obj.encode(value) def _convert_record_string( diff --git a/tests/test_converters.py b/tests/test_converters.py index 5298b29..e2fc24a 100644 --- a/tests/test_converters.py +++ b/tests/test_converters.py @@ -49,17 +49,17 @@ def test_convert_unknown_source_format(): convert_euring_record("value", source_format="bad", target_format="euring2000plus") -def test_convert_fixed_width_rejects_pipe(): +def test_convert_euring2000_rejects_pipe(): with pytest.raises(ValueError): convert_euring_record("A|B", source_format="euring2000", target_format="euring2000plus") -def test_convert_fixed_width_too_short(): +def test_convert_euring2000_too_short(): with pytest.raises(ValueError): convert_euring_record("A" * 10, source_format="euring2000", target_format="euring2000plus") -def test_convert_fixed_width_extra_data(): +def test_convert_euring2000_extra_data(): with pytest.raises(ValueError): convert_euring_record("A" * 94 + "X", source_format="euring2000", target_format="euring2000plus") diff --git a/tests/test_record.py b/tests/test_record.py index b8c111d..e0b2139 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -6,10 +6,9 @@ import pytest -import euring.record as record_module from euring import EuringRecord from euring.formats import FORMAT_JSON -from euring.record import _fields_for_format, _fixed_width_fields, _format_fixed_width +from euring.record import _fields_for_format, _format_fixed_width def _values_from_record(record: str) -> dict[str, str]: @@ -222,13 +221,6 @@ def test_fields_for_format_euring2000plus_truncates(): assert fields[-1]["key"] == "reference" -def test_fixed_width_fields_respects_max_length(): - """Fixed-width fields should not exceed the 94-character cutoff.""" - fields = _fixed_width_fields() - total_length = sum(field["length"] for field in fields) - assert total_length <= 94 - - def test_format_fixed_width_handles_empty_and_padding(): """Fixed-width formatting should pad and fill empty fields.""" fields = [{"key": "alpha", "length": 2}, {"key": "beta", "length": 3}] @@ -242,34 +234,3 @@ def test_record_validate_without_record_uses_fixed_width(): record.set("ringing_scheme", "GBB") errors = record.validate() assert isinstance(errors, dict) - - -def test_fields_for_format_euring2000plus_without_reference(monkeypatch): - """EURING2000PLUS should return all fields when reference is missing.""" - fields = [{"key": "alpha", "length": 1}, {"key": "beta", "length": 1}] - monkeypatch.setattr(record_module, "EURING2000PLUS_FIELDS", fields) - assert _fields_for_format("euring2000plus") == fields - - -def test_fixed_width_fields_breaks_on_missing_length(monkeypatch): - """Fixed-width fields should stop when length metadata is missing.""" - fields = [{"key": "alpha", "length": 1}, {"key": "beta"}] - monkeypatch.setattr(record_module, "EURING2000_FIELDS", fields) - result = _fixed_width_fields() - assert result == [{"key": "alpha", "length": 1}] - - -def test_fixed_width_fields_breaks_at_cutoff(monkeypatch): - """Fixed-width fields should stop once reaching 94 characters.""" - fields = [{"key": "alpha", "length": 94}, {"key": "beta", "length": 1}] - monkeypatch.setattr(record_module, "EURING2000_FIELDS", fields) - result = _fixed_width_fields() - assert result == [{"key": "alpha", "length": 94}] - - -def test_fixed_width_fields_complete_without_break(monkeypatch): - """Fixed-width fields should include all fields when under the cutoff.""" - fields = [{"key": "alpha", "length": 1}, {"key": "beta", "length": 2}] - monkeypatch.setattr(record_module, "EURING2000_FIELDS", fields) - result = _fixed_width_fields() - assert result == [{"key": "alpha", "length": 1}, {"key": "beta", "length": 2}] From 33aa6098958062edb4c3746c97ea2ba9ecacbc98 Mon Sep 17 00:00:00 2001 From: Dylan Verheul Date: Sat, 24 Jan 2026 15:09:34 +0100 Subject: [PATCH 12/12] wip --- src/euring/field_schema.py | 45 ++++++++++++++++++++++++--- src/euring/record.py | 64 +++++--------------------------------- tests/test_decoding.py | 4 --- 3 files changed, 48 insertions(+), 65 deletions(-) diff --git a/src/euring/field_schema.py b/src/euring/field_schema.py index 7dc2225..dd1c76a 100644 --- a/src/euring/field_schema.py +++ b/src/euring/field_schema.py @@ -8,6 +8,7 @@ from .codes import lookup_description from .exceptions import EuringConstraintException, EuringTypeException +from .formats import FORMAT_EURING2000 from .types import ( TYPE_ALPHABETIC, TYPE_ALPHANUMERIC, @@ -65,18 +66,16 @@ def __len__(self) -> int: def _is_required(self) -> bool: return self.required - def _validate_length(self, raw: str) -> None: - value_length = len(raw) + def _validate_length(self, raw: str, ignore_variable_length: bool = False) -> None: if self.length is not None: - if self.variable_length: + value_length = len(raw) + if self.variable_length and not ignore_variable_length: if value_length > self.length: raise EuringConstraintException( f'Value "{raw}" is length {value_length}, should be at most {self.length}.' ) elif value_length != self.length: raise EuringConstraintException(f'Value "{raw}" is length {value_length} instead of {self.length}.') - if self.length is None and self.variable_length: - raise EuringConstraintException("Variable-length fields require a length limit.") def _validate_raw(self, raw: str) -> str | None: if raw == "": @@ -134,6 +133,42 @@ def encode(self, value: Any | None) -> str: raise EuringTypeException(f'Value "{str_value}" is not valid for type {self.type_name}.') return str_value + def encode_for_format(self, value: Any | None, *, format: str) -> str: + """Encode a Python value to raw text for a specific EURING format.""" + if value in (None, ""): + if self.empty_value: + return self.empty_value + if self.length and format == FORMAT_EURING2000: + return "-" * self.length + if self.length and self.required and self.type_name == TYPE_INTEGER: + return "-" * self.length + return "" + + if self.key == "geographical_coordinates" and isinstance(value, dict): + if "lat" not in value or "lng" not in value: + raise EuringConstraintException("Geographical coordinates require both lat and lng values.") + return f"{euring_lat_to_dms(float(value['lat']))}{euring_lng_to_dms(float(value['lng']))}" + + if self.type_name == TYPE_INTEGER and isinstance(value, str) and value and set(value) == {"-"}: + return self.encode_for_format(None, format=format) + + str_value = f"{value}" + if self.type_name in {TYPE_NUMERIC, TYPE_NUMERIC_SIGNED}: + str_value = str_value.rstrip("0").rstrip(".") + + ignore_variable_length = format == FORMAT_EURING2000 + + if self.type_name in {TYPE_INTEGER, TYPE_NUMERIC, TYPE_NUMERIC_SIGNED} and self.length: + str_value = str_value.zfill(self.length) + + if self.variable_length and not ignore_variable_length: + str_value = str_value.lstrip("0") or "0" + + self._validate_length(str_value, ignore_variable_length=ignore_variable_length) + if self.type_name and not is_valid_type(str_value, self.type_name): + raise EuringTypeException(f'Value "{str_value}" is not valid for type {self.type_name}.') + return str_value + def describe(self, value: Any | None) -> Any | None: """Return a display description for a parsed value.""" return None diff --git a/src/euring/record.py b/src/euring/record.py index dd95062..13bfa3a 100644 --- a/src/euring/record.py +++ b/src/euring/record.py @@ -16,7 +16,6 @@ unknown_format_error, ) from .rules import record_rule_errors, requires_euring2020 -from .types import TYPE_INTEGER from .utils import euring_lat_to_dms, euring_lng_to_dms @@ -243,12 +242,12 @@ def _serialize(self) -> str: key = field["key"] value = self._fields.get(key, {}).get("value") if key == "geographical_coordinates": - if _is_empty(value) and geo_placeholder: + if value in (None, "") and geo_placeholder: values_by_key[key] = geo_placeholder continue values_by_key[key] = _serialize_field_value(field, value, self.format) if self.format == FORMAT_EURING2000: - return _format_fixed_width(values_by_key, _fixed_width_fields()) + return _format_fixed_width(values_by_key, EURING2000_FIELDS) return "|".join(values_by_key.get(field["key"], "") for field in fields) @@ -263,21 +262,6 @@ def _fields_for_format(format: str) -> list[dict[str, object]]: raise EuringException(f"Unknown EuringRecord format: {format}.") -def _fixed_width_fields() -> list[dict[str, object]]: - """Return field definitions for the EURING2000 fixed-width layout.""" - fields: list[dict[str, object]] = [] - start = 0 - for field in EURING2000_FIELDS: - if start >= 94: - break - length = field.get("length") - if not length: - break - fields.append(field) - start += length - return fields - - def _format_fixed_width(values_by_key: dict[str, str], fields: list[dict[str, object]]) -> str: """Serialize values into a fixed-width record.""" parts: list[str] = [] @@ -294,42 +278,10 @@ def _format_fixed_width(values_by_key: dict[str, str], fields: list[dict[str, ob return "".join(parts) -def _is_empty(value: object) -> bool: - """Return whether a value should be treated as empty.""" - return value in (None, "") - - -def _hyphens(length: int) -> str: - """Return a hyphen placeholder string of the given length.""" - return "-" * length - - def _serialize_field_value(field: dict[str, object], value: object, format: str) -> str: """Encode a typed field value into a EURING raw string.""" - length = field.get("length") - length = 0 if length is None else int(length) - field_def = field - if format == FORMAT_EURING2000 and field.get("variable_length"): - field_def = {**field, "variable_length": False} - field_obj = coerce_field(field_def) - - # Empty fields - if _is_empty(value): - empty_value = field.get("empty_value") - if empty_value: - return f"{empty_value}" - if length: - if format == FORMAT_EURING2000: - return _hyphens(length) - if field.get("required", True) and field.get("type_name") == TYPE_INTEGER: - return _hyphens(length) - return "" - - # Non-empty fields - type_name = field.get("type_name") or "" - if type_name == TYPE_INTEGER and isinstance(value, str) and value and set(value) == {"-"}: - return _serialize_field_value(field, None, format) - return field_obj.encode(value) + field_obj = coerce_field(field) + return field_obj.encode_for_format(value, format=format) def _convert_record_string( @@ -362,7 +314,7 @@ def _convert_record_data( if normalized_source == FORMAT_EURING2000: fields = _split_fixed_width(value) - source_fields = _fixed_width_fields() + source_fields = EURING2000_FIELDS else: fields = _split_pipe_delimited(value) source_fields = _fields_for_format(normalized_source) @@ -391,7 +343,7 @@ def _split_fixed_width(value: str) -> list[str]: raise ValueError(f"{FORMAT_EURING2000} record contains extra data beyond position 94.") fields: list[str] = [] start = 0 - for field in _fixed_width_fields(): + for field in EURING2000_FIELDS: length = field["length"] end = start + length chunk = value[start:end] @@ -427,7 +379,7 @@ def _require_force_on_loss(values_by_key: dict[str, str], source_format: str, ta if accuracy.isalpha(): reasons.append("alphabetic coordinate accuracy") if target_format == FORMAT_EURING2000: - fixed_keys = {field["key"] for field in _fixed_width_fields()} + fixed_keys = {field["key"] for field in EURING2000_FIELDS} for key, value in values_by_key.items(): if key not in fixed_keys and value: reasons.append(f"drop {key}") @@ -547,7 +499,7 @@ def _decode_raw_record(value: object, format: str | None) -> tuple[str, dict[str {"message": f'Format "{format_display_name(normalized)}" conflicts with fixed-width EURING2000 data.'} ) start = 0 - for field in _fixed_width_fields(): + for field in EURING2000_FIELDS: length = field["length"] end = start + length values_by_key[field["key"]] = value[start:end] diff --git a/tests/test_decoding.py b/tests/test_decoding.py index 535b697..f687988 100644 --- a/tests/test_decoding.py +++ b/tests/test_decoding.py @@ -123,10 +123,6 @@ def test_decode_value_length_mismatch(self): with pytest.raises(EuringConstraintException): euring_decode_value("123", TYPE_INTEGER, length=2) - def test_decode_value_min_length_error(self): - with pytest.raises(EuringConstraintException): - euring_decode_value("1", TYPE_INTEGER, variable_length=True) - def test_decode_value_max_length_error(self): with pytest.raises(EuringConstraintException): euring_decode_value("123", TYPE_INTEGER, length=2, variable_length=True)