diff --git a/CHANGELOG.md b/CHANGELOG.md index d1cfb5c..ae6ed0e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## Unreleased +- Improve and document serialization (#100). - Add stricter Place Code validation (#99). - Parse raw values to values for all fields (#98). - Add constraints and tests for Direction (#97). diff --git a/docs/examples.rst b/docs/examples.rst index 90ed4e6..01c25cf 100644 --- a/docs/examples.rst +++ b/docs/examples.rst @@ -25,6 +25,13 @@ If you want to allow missing optional values and keep placeholders, pass ``strict=False`` to the record. ``serialize()`` raises ``ValueError`` when a field fails validation. +Serialization always re-encodes from the current typed values. For EURING2000, +fixed-width output uses hyphens for empty values and zero-pads integers to the +declared length. For EURING2000+/EURING2020, empty values remain empty strings +except for fields that explicitly use hyphen placeholders (for example Elapsed +Time, Distance, and Direction); these placeholders are defined per-field via +the ``empty_value`` schema attribute. + Exporting records ----------------- diff --git a/docs/python_reference.rst b/docs/python_reference.rst index 96e49d3..aafbf25 100644 --- a/docs/python_reference.rst +++ b/docs/python_reference.rst @@ -34,3 +34,10 @@ Build a EURING record: fails validation. Use ``EuringRecord("euring2000plus", strict=False)`` to allow missing optional values and keep placeholders in the output. Use ``export()`` to convert to other EURING string formats. + +Serialization always re-encodes from the current values. For EURING2000, +fixed-width output uses hyphens for empty values and zero-pads integers to the +declared length. For EURING2000+/EURING2020, empty values remain empty strings +except for fields that explicitly use hyphen placeholders (for example Elapsed +Time, Distance, and Direction); these placeholders are defined per-field via +the ``empty_value`` schema attribute. diff --git a/src/euring/field_schema.py b/src/euring/field_schema.py index 20d86c1..dd1c76a 100644 --- a/src/euring/field_schema.py +++ b/src/euring/field_schema.py @@ -4,8 +4,11 @@ from dataclasses import dataclass from typing import Any +from euring.utils import euring_lat_to_dms, euring_lng_to_dms + from .codes import lookup_description from .exceptions import EuringConstraintException, EuringTypeException +from .formats import FORMAT_EURING2000 from .types import ( TYPE_ALPHABETIC, TYPE_ALPHANUMERIC, @@ -33,22 +36,22 @@ class EuringField(Mapping[str, Any]): type_name: str = "" required: bool = True length: int | None = None - min_length: int | None = None - max_length: int | None = None + variable_length: bool = False + empty_value: str | None = None def _mapping(self) -> dict[str, Any]: mapping: dict[str, Any] = { "key": self.key, "name": self.name, - "type": self.type_name, + "type_name": self.type_name, "required": self.required, } if self.length is not None: mapping["length"] = self.length - if self.min_length is not None: - mapping["min_length"] = self.min_length - if self.max_length is not None: - mapping["max_length"] = self.max_length + if self.variable_length: + mapping["variable_length"] = True + if self.empty_value is not None: + mapping["empty_value"] = self.empty_value return mapping def __getitem__(self, key: str) -> Any: @@ -61,22 +64,18 @@ def __len__(self) -> int: return len(self._mapping()) def _is_required(self) -> bool: - if self.min_length == 0: - return False return self.required - def _validate_length(self, raw: str) -> None: - value_length = len(raw) - if self.length is not None and value_length != self.length: - raise EuringConstraintException(f'Value "{raw}" is length {value_length} instead of {self.length}.') - if self.min_length is not None and value_length < self.min_length: - raise EuringConstraintException( - f'Value "{raw}" is length {value_length}, should be at least {self.min_length}.' - ) - if self.max_length is not None and value_length > self.max_length: - raise EuringConstraintException( - f'Value "{raw}" is length {value_length}, should be at most {self.max_length}.' - ) + def _validate_length(self, raw: str, ignore_variable_length: bool = False) -> None: + if self.length is not None: + value_length = len(raw) + if self.variable_length and not ignore_variable_length: + if value_length > self.length: + raise EuringConstraintException( + f'Value "{raw}" is length {value_length}, should be at most {self.length}.' + ) + elif value_length != self.length: + raise EuringConstraintException(f'Value "{raw}" is length {value_length} instead of {self.length}.') def _validate_raw(self, raw: str) -> str | None: if raw == "": @@ -110,15 +109,65 @@ def parse(self, raw: str) -> Any | None: def encode(self, value: Any | None) -> str: """Encode a Python value to raw text.""" - if value is None or value == "": + if value in (None, ""): if self._is_required(): raise EuringConstraintException('Required field, empty value "" is not permitted.') return "" - raw = str(value) - self._validate_length(raw) - if self.type_name and not is_valid_type(raw, self.type_name): - raise EuringTypeException(f'Value "{raw}" is not valid for type {self.type_name}.') - return raw + + if self.key == "geographical_coordinates" and isinstance(value, dict): + if "lat" not in value or "lng" not in value: + raise EuringConstraintException("Geographical coordinates require both lat and lng values.") + return f"{euring_lat_to_dms(float(value['lat']))}{euring_lng_to_dms(float(value['lng']))}" + + str_value = f"{value}" + if self.type_name in {TYPE_NUMERIC, TYPE_NUMERIC_SIGNED}: + str_value = str_value.rstrip("0").rstrip(".") + if ( + self.type_name in {TYPE_INTEGER, TYPE_NUMERIC, TYPE_NUMERIC_SIGNED} + and self.length + and not self.variable_length + ): + str_value = str_value.zfill(self.length) + self._validate_length(str_value) + if self.type_name and not is_valid_type(str_value, self.type_name): + raise EuringTypeException(f'Value "{str_value}" is not valid for type {self.type_name}.') + return str_value + + def encode_for_format(self, value: Any | None, *, format: str) -> str: + """Encode a Python value to raw text for a specific EURING format.""" + if value in (None, ""): + if self.empty_value: + return self.empty_value + if self.length and format == FORMAT_EURING2000: + return "-" * self.length + if self.length and self.required and self.type_name == TYPE_INTEGER: + return "-" * self.length + return "" + + if self.key == "geographical_coordinates" and isinstance(value, dict): + if "lat" not in value or "lng" not in value: + raise EuringConstraintException("Geographical coordinates require both lat and lng values.") + return f"{euring_lat_to_dms(float(value['lat']))}{euring_lng_to_dms(float(value['lng']))}" + + if self.type_name == TYPE_INTEGER and isinstance(value, str) and value and set(value) == {"-"}: + return self.encode_for_format(None, format=format) + + str_value = f"{value}" + if self.type_name in {TYPE_NUMERIC, TYPE_NUMERIC_SIGNED}: + str_value = str_value.rstrip("0").rstrip(".") + + ignore_variable_length = format == FORMAT_EURING2000 + + if self.type_name in {TYPE_INTEGER, TYPE_NUMERIC, TYPE_NUMERIC_SIGNED} and self.length: + str_value = str_value.zfill(self.length) + + if self.variable_length and not ignore_variable_length: + str_value = str_value.lstrip("0") or "0" + + self._validate_length(str_value, ignore_variable_length=ignore_variable_length) + if self.type_name and not is_valid_type(str_value, self.type_name): + raise EuringTypeException(f'Value "{str_value}" is not valid for type {self.type_name}.') + return str_value def describe(self, value: Any | None) -> Any | None: """Return a display description for a parsed value.""" @@ -182,11 +231,13 @@ def coerce_field(definition: Mapping[str, Any]) -> EuringField: return definition key = definition.get("key", "") name = definition.get("name", key) - type_name = definition.get("type") or definition.get("type_name") or "" + if "type" in definition and "type_name" not in definition: + raise ValueError('Field definitions must use "type_name" instead of legacy "type".') + type_name = definition.get("type_name") or "" required = definition.get("required", True) length = definition.get("length") - min_length = definition.get("min_length") - max_length = definition.get("max_length") + variable_length = bool(definition.get("variable_length", False)) + empty_value = definition.get("empty_value") parser = definition.get("parser") lookup = definition.get("lookup") if parser is not None: @@ -196,8 +247,8 @@ def coerce_field(definition: Mapping[str, Any]) -> EuringField: type_name=type_name, required=required, length=length, - min_length=min_length, - max_length=max_length, + variable_length=variable_length, + empty_value=empty_value, parser=parser, lookup=lookup, ) @@ -208,8 +259,8 @@ def coerce_field(definition: Mapping[str, Any]) -> EuringField: type_name=type_name, required=required, length=length, - min_length=min_length, - max_length=max_length, + variable_length=variable_length, + empty_value=empty_value, lookup=lookup, ) return EuringField( @@ -218,6 +269,6 @@ def coerce_field(definition: Mapping[str, Any]) -> EuringField: type_name=type_name, required=required, length=length, - min_length=min_length, - max_length=max_length, + variable_length=variable_length, + empty_value=empty_value, ) diff --git a/src/euring/fields.py b/src/euring/fields.py index a8cb96d..5cf0d82 100644 --- a/src/euring/fields.py +++ b/src/euring/fields.py @@ -220,13 +220,24 @@ key="distance", type_name=TYPE_INTEGER, length=5, + variable_length=True, + empty_value="-----", + ), + EuringFormattedField( + name="Direction", + key="direction", + type_name=TYPE_INTEGER, + length=3, + empty_value="---", + parser=parse_direction, ), - EuringFormattedField(name="Direction", key="direction", type_name=TYPE_INTEGER, length=3, parser=parse_direction), EuringField( name="Elapsed Time", key="elapsed_time", type_name=TYPE_INTEGER, length=5, + variable_length=True, + empty_value="-----", ), # Starting with Wing Length, fields are no longer required. Source: EURING Exchange Code 2020 v202 (13 Nov 2024). EuringField(name="Wing Length", key="wing_length", type_name=TYPE_NUMERIC, required=False), @@ -303,7 +314,14 @@ required=False, lookup=LOOKUP_BROOD_PATCH, ), - EuringField(name="Primary Score", key="primary_score", type_name=TYPE_INTEGER, max_length=2, required=False), + EuringField( + name="Primary Score", + key="primary_score", + type_name=TYPE_INTEGER, + length=2, + variable_length=True, + required=False, + ), EuringField(name="Primary Moult", key="primary_moult", type_name=TYPE_ALPHANUMERIC, length=10, required=False), EuringFormattedField( name="Old Greater Coverts", @@ -354,3 +372,8 @@ ), EuringField(name="More Other Marks", key="more_other_marks", type_name=TYPE_ALPHABETIC, required=False), ] + +# These are the field definitions per format as per the EURING Code Manual +EURING2020_FIELDS = EURING_FIELDS # 64 fields +EURING2000PLUS_FIELDS = EURING_FIELDS[:60] +EURING2000_FIELDS = EURING_FIELDS[:33] diff --git a/src/euring/parsing.py b/src/euring/parsing.py index ba67e87..6444e0e 100644 --- a/src/euring/parsing.py +++ b/src/euring/parsing.py @@ -9,8 +9,7 @@ def euring_decode_value( type: str, required: bool = True, length: int | None = None, - min_length: int | None = None, - max_length: int | None = None, + variable_length: bool = False, parser: Callable[[str], Any] | None = None, lookup: Mapping[str, str] | Callable[[str], str] | None = None, ) -> dict[str, Any] | None: @@ -18,11 +17,10 @@ def euring_decode_value( definition = { "name": "Value", "key": "value", - "type": type, + "type_name": type, "required": required, "length": length, - "min_length": min_length, - "max_length": max_length, + "variable_length": variable_length, "parser": parser, "lookup": lookup, } @@ -34,7 +32,7 @@ def euring_decode_value( if parser: results["parsed_value"] = parsed description_value = parsed - if lookup and not parser and value != "": + if lookup and not parser and value != "" and parsed is not None: description_value = value results["description"] = field.describe(description_value) return results diff --git a/src/euring/record.py b/src/euring/record.py index 4d9f859..13bfa3a 100644 --- a/src/euring/record.py +++ b/src/euring/record.py @@ -5,7 +5,7 @@ from .exceptions import EuringConstraintException, EuringException from .field_schema import coerce_field -from .fields import EURING_FIELDS +from .fields import EURING2000_FIELDS, EURING2000PLUS_FIELDS, EURING2020_FIELDS from .formats import ( FORMAT_EURING2000, FORMAT_EURING2000PLUS, @@ -52,10 +52,8 @@ def set(self, key: str, value: object) -> EuringRecord: field = _FIELD_MAP.get(key) if field is None: raise ValueError(f'Unknown field key "{key}".') - raw_value = "" if value is None else str(value) self._fields[key] = { "name": field["name"], - "raw_value": raw_value, "value": value, "order": field["order"], } @@ -66,11 +64,10 @@ def _set_raw_value(self, key: str, value: object) -> None: field = _FIELD_MAP.get(key) if field is None: return - raw_value = "" if value is None else f"{value}" self._fields[key] = { "name": field["name"], - "raw_value": raw_value, - "value": raw_value, + "raw_value": "" if value is None else f"{value}", + "value": "" if value is None else f"{value}", "order": field["order"], } @@ -136,21 +133,34 @@ def _validate_fields(self) -> list[dict[str, object]]: errors: list[dict[str, object]] = [] fields = _fields_for_format(self.format) positions = _field_positions(fields) if self.format == FORMAT_EURING2000 else {} - variable_length_keys = {"distance", "direction", "elapsed_time"} + needs_geo_dots = False + if self.format == FORMAT_EURING2020: + lat_value = self._fields.get("latitude", {}).get("value") + lng_value = self._fields.get("longitude", {}).get("value") + needs_geo_dots = lat_value not in (None, "") or lng_value not in (None, "") for index, field in enumerate(fields): key = field["key"] field_state = self._fields.get(key, {}) - raw_value = field_state.get("raw_value", field_state.get("value", "")) - raw_value = "" if raw_value is None else raw_value + value = field_state.get("value", "") + had_empty_value = value in (None, "") try: field_def = field - if self.format != FORMAT_EURING2000 and key in variable_length_keys and field.get("length"): - field_def = {**field, "max_length": field["length"]} - field_def.pop("length", None) + if self.format == FORMAT_EURING2000 and field.get("variable_length"): + field_def = {**field, "variable_length": False} field_obj = coerce_field(field_def) + raw_value = _serialize_field_value(field, value, self.format) + if key == "geographical_coordinates" and had_empty_value and needs_geo_dots: + raw_value = "." * 15 parsed_value = field_obj.parse(raw_value) + if had_empty_value and raw_value: + parsed_value = None description_value = parsed_value - if field_obj.get("lookup") is not None and field_obj.get("parser") is None and raw_value != "": + if ( + field_obj.get("lookup") is not None + and field_obj.get("parser") is None + and raw_value != "" + and parsed_value is not None + ): description_value = raw_value description = field_obj.describe(description_value) if key in self._fields: @@ -163,7 +173,7 @@ def _validate_fields(self) -> list[dict[str, object]]: payload = { "field": field["name"], "message": f"{exc}", - "value": "" if raw_value is None else f"{raw_value}", + "value": "" if value is None else f"{value}", "key": key, "index": index, } @@ -186,7 +196,24 @@ def _has_non_optional_errors(self, errors: dict[str, list]) -> bool: def _validate_record_rules(self) -> list[dict[str, object]]: """Validate multi-field and record-level rules.""" - values_by_key = {key: field.get("raw_value", field.get("value", "")) for key, field in self._fields.items()} + values_by_key: dict[str, str] = {} + for field in _fields_for_format(self.format): + key = field["key"] + field_state = self._fields.get(key, {}) + source_raw = field_state.get("raw_value") + if source_raw is not None: + values_by_key[key] = source_raw + continue + value = field_state.get("value", "") + try: + values_by_key[key] = _serialize_field_value(field, value, self.format) + except EuringException: + values_by_key[key] = "" + if self.format == FORMAT_EURING2020: + lat_value = values_by_key.get("latitude", "") + lng_value = values_by_key.get("longitude", "") + if (lat_value or lng_value) and not values_by_key.get("geographical_coordinates"): + values_by_key["geographical_coordinates"] = "." * 15 errors: list[dict[str, object]] = [] for error in record_rule_errors(self.format, values_by_key): errors.append(_record_error_for_key(error["key"], error["message"], value=error["value"])) @@ -205,51 +232,34 @@ def _serialize(self) -> str: """Serialize current field values without strict completeness checks.""" fields = _fields_for_format(self.format) values_by_key: dict[str, str] = {} - hyphen_required_keys = {"distance", "direction", "elapsed_time"} + geo_placeholder = None + if self.format == FORMAT_EURING2020: + lat_value = self._fields.get("latitude", {}).get("value") + lng_value = self._fields.get("longitude", {}).get("value") + if lat_value not in (None, "") or lng_value not in (None, ""): + geo_placeholder = "." * 15 for field in fields: key = field["key"] - raw_value = self._fields.get(key, {}).get("raw_value") value = self._fields.get(key, {}).get("value") - if raw_value is None: - raw_value = self._fields.get(key, {}).get("value", "") - raw_value = "" if raw_value is None else f"{raw_value}" - if self.format == FORMAT_EURING2000 and (value is None or value == ""): - raw_value = "" - if self.format in {FORMAT_EURING2000PLUS, FORMAT_EURING2020} and key in hyphen_required_keys: - if value is None or value == "": - length = field.get("length") or field.get("max_length") - if length: - raw_value = "-" * int(length) - values_by_key[key] = raw_value + if key == "geographical_coordinates": + if value in (None, "") and geo_placeholder: + values_by_key[key] = geo_placeholder + continue + values_by_key[key] = _serialize_field_value(field, value, self.format) if self.format == FORMAT_EURING2000: - return _format_fixed_width(values_by_key, _fixed_width_fields()) + return _format_fixed_width(values_by_key, EURING2000_FIELDS) return "|".join(values_by_key.get(field["key"], "") for field in fields) def _fields_for_format(format: str) -> list[dict[str, object]]: """Return the field list for the target format.""" if format == FORMAT_EURING2000: - return _fixed_width_fields() + return EURING2000_FIELDS if format == FORMAT_EURING2000PLUS: - for index, field in enumerate(EURING_FIELDS): - if field.get("key") == "reference": - return EURING_FIELDS[: index + 1] - return EURING_FIELDS - - -def _fixed_width_fields() -> list[dict[str, object]]: - """Return field definitions for the EURING2000 fixed-width layout.""" - fields: list[dict[str, object]] = [] - start = 0 - for field in EURING_FIELDS: - if start >= 94: - break - length = field.get("length", field.get("max_length")) - if not length: - break - fields.append({**field, "length": length}) - start += length - return fields + return EURING2000PLUS_FIELDS + if format == FORMAT_EURING2020: + return EURING2020_FIELDS + raise EuringException(f"Unknown EuringRecord format: {format}.") def _format_fixed_width(values_by_key: dict[str, str], fields: list[dict[str, object]]) -> str: @@ -268,6 +278,12 @@ def _format_fixed_width(values_by_key: dict[str, str], fields: list[dict[str, ob return "".join(parts) +def _serialize_field_value(field: dict[str, object], value: object, format: str) -> str: + """Encode a typed field value into a EURING raw string.""" + field_obj = coerce_field(field) + return field_obj.encode_for_format(value, format=format) + + def _convert_record_string( value: str, *, @@ -298,7 +314,7 @@ def _convert_record_data( if normalized_source == FORMAT_EURING2000: fields = _split_fixed_width(value) - source_fields = _fixed_width_fields() + source_fields = EURING2000_FIELDS else: fields = _split_pipe_delimited(value) source_fields = _fields_for_format(normalized_source) @@ -327,7 +343,7 @@ def _split_fixed_width(value: str) -> list[str]: raise ValueError(f"{FORMAT_EURING2000} record contains extra data beyond position 94.") fields: list[str] = [] start = 0 - for field in _fixed_width_fields(): + for field in EURING2000_FIELDS: length = field["length"] end = start + length chunk = value[start:end] @@ -363,7 +379,7 @@ def _require_force_on_loss(values_by_key: dict[str, str], source_format: str, ta if accuracy.isalpha(): reasons.append("alphabetic coordinate accuracy") if target_format == FORMAT_EURING2000: - fixed_keys = {field["key"] for field in _fixed_width_fields()} + fixed_keys = {field["key"] for field in EURING2000_FIELDS} for key, value in values_by_key.items(): if key not in fixed_keys and value: reasons.append(f"drop {key}") @@ -451,7 +467,7 @@ def _normalize_source_format(source_format: str | None, value: str) -> str: def _field_index(key: str) -> int: """Return the field index for a given key.""" - for index, field in enumerate(EURING_FIELDS): + for index, field in enumerate(EURING2020_FIELDS): if field.get("key") == key: return index raise ValueError(f'Unknown field key "{key}".') @@ -483,7 +499,7 @@ def _decode_raw_record(value: object, format: str | None) -> tuple[str, dict[str {"message": f'Format "{format_display_name(normalized)}" conflicts with fixed-width EURING2000 data.'} ) start = 0 - for field in _fixed_width_fields(): + for field in EURING2000_FIELDS: length = field["length"] end = start + length values_by_key[field["key"]] = value[start:end] @@ -499,16 +515,16 @@ def _decode_raw_record(value: object, format: str | None) -> tuple[str, dict[str ) current_format = normalized or FORMAT_EURING2000PLUS for index, raw_value in enumerate(fields): - if index >= len(EURING_FIELDS): + if index >= len(EURING2020_FIELDS): break - values_by_key[EURING_FIELDS[index]["key"]] = raw_value + values_by_key[EURING2020_FIELDS[index]["key"]] = raw_value if normalized is None and current_format in {FORMAT_EURING2000PLUS, FORMAT_EURING2020}: if requires_euring2020(values_by_key): current_format = FORMAT_EURING2020 return current_format, values_by_key, record_errors -_FIELD_MAP = {field["key"]: {**field, "order": index} for index, field in enumerate(EURING_FIELDS)} +_FIELD_MAP = {field["key"]: {**field, "order": index} for index, field in enumerate(EURING2020_FIELDS)} def _field_positions(fields: list[dict[str, object]]) -> dict[str, dict[str, int]]: diff --git a/src/euring/rules.py b/src/euring/rules.py index b051d83..9489dcb 100644 --- a/src/euring/rules.py +++ b/src/euring/rules.py @@ -2,32 +2,15 @@ from __future__ import annotations -from .fields import EURING_FIELDS +from .fields import EURING2000_FIELDS, EURING2000PLUS_FIELDS, EURING2020_FIELDS, EURING_FIELDS from .formats import FORMAT_EURING2000, FORMAT_EURING2000PLUS, FORMAT_EURING2020 _FIELD_NAME_BY_KEY = {field["key"]: field["name"] for field in EURING_FIELDS} -_fixed_width_keys: list[str] = [] -_start = 0 -for _field in EURING_FIELDS: - if _start >= 94: - break - _length = _field.get("length", _field.get("max_length")) - if not _length: - break - _fixed_width_keys.append(_field["key"]) - _start += _length - -_plus_keys: list[str] = [] -for _field in EURING_FIELDS: - _plus_keys.append(_field["key"]) - if _field.get("key") == "reference": - break - -EURING2000_KEYS = tuple(_fixed_width_keys) -EURING2000PLUS_KEYS = tuple(_plus_keys) -EURING2020_KEYS = tuple(field["key"] for field in EURING_FIELDS) -EURING2020_ONLY_KEYS = ("latitude", "longitude", "current_place_code", "more_other_marks") +EURING2000_KEYS = tuple(field["key"] for field in EURING2000_FIELDS) +EURING2000PLUS_KEYS = tuple(field["key"] for field in EURING2000PLUS_FIELDS) +EURING2020_KEYS = tuple(field["key"] for field in EURING2020_FIELDS) +EURING2020_ONLY_KEYS = tuple(set(EURING2020_KEYS).difference(EURING2000PLUS_KEYS)) NON_EURING2000_KEYS = tuple(set(EURING2000PLUS_KEYS + EURING2020_ONLY_KEYS).difference(EURING2000_KEYS)) diff --git a/src/euring/utils.py b/src/euring/utils.py index f0627ba..269e3f6 100644 --- a/src/euring/utils.py +++ b/src/euring/utils.py @@ -52,6 +52,12 @@ def euring_float_to_dms(value: float, round_seconds: bool = False) -> dict[str, quadrant = "+" # includes 0 if round_seconds: seconds = int(round(seconds)) + if seconds == 60: + seconds = 0 + minutes += 1 + if minutes == 60: + minutes = 0 + degrees = degrees + 1 if degrees >= 0 else degrees - 1 return {"quadrant": quadrant, "degrees": degrees, "minutes": minutes, "seconds": seconds} diff --git a/tests/test_converters.py b/tests/test_converters.py index 5298b29..e2fc24a 100644 --- a/tests/test_converters.py +++ b/tests/test_converters.py @@ -49,17 +49,17 @@ def test_convert_unknown_source_format(): convert_euring_record("value", source_format="bad", target_format="euring2000plus") -def test_convert_fixed_width_rejects_pipe(): +def test_convert_euring2000_rejects_pipe(): with pytest.raises(ValueError): convert_euring_record("A|B", source_format="euring2000", target_format="euring2000plus") -def test_convert_fixed_width_too_short(): +def test_convert_euring2000_too_short(): with pytest.raises(ValueError): convert_euring_record("A" * 10, source_format="euring2000", target_format="euring2000plus") -def test_convert_fixed_width_extra_data(): +def test_convert_euring2000_extra_data(): with pytest.raises(ValueError): convert_euring_record("A" * 94 + "X", source_format="euring2000", target_format="euring2000plus") diff --git a/tests/test_decoding.py b/tests/test_decoding.py index ee04a7b..f687988 100644 --- a/tests/test_decoding.py +++ b/tests/test_decoding.py @@ -116,20 +116,16 @@ def test_decode_value_invalid_type(self): euring_decode_value("ABC", TYPE_INTEGER, length=3) def test_decode_value_optional_empty(self): - result = euring_decode_value("", TYPE_INTEGER, min_length=0) + result = euring_decode_value("", TYPE_INTEGER, required=False) assert result is None def test_decode_value_length_mismatch(self): with pytest.raises(EuringConstraintException): euring_decode_value("123", TYPE_INTEGER, length=2) - def test_decode_value_min_length_error(self): - with pytest.raises(EuringConstraintException): - euring_decode_value("1", TYPE_INTEGER, min_length=2) - def test_decode_value_max_length_error(self): with pytest.raises(EuringConstraintException): - euring_decode_value("123", TYPE_INTEGER, max_length=2) + euring_decode_value("123", TYPE_INTEGER, length=2, variable_length=True) def test_decode_value_with_parser(self): result = euring_decode_value( diff --git a/tests/test_fields.py b/tests/test_fields.py index 033f818..c556579 100644 --- a/tests/test_fields.py +++ b/tests/test_fields.py @@ -2,7 +2,7 @@ import re -from euring.fields import EURING_FIELDS +from euring.fields import EURING2000_FIELDS, EURING2000PLUS_FIELDS, EURING2020_FIELDS, EURING_FIELDS from euring.types import ( TYPE_ALPHABETIC, TYPE_ALPHANUMERIC, @@ -13,6 +13,26 @@ ) +def test_euring_fields(): + assert len(EURING_FIELDS) == 64 + + +def test_euring2020_fields(): + assert len(EURING2020_FIELDS) == len(EURING_FIELDS) + assert len(EURING2020_FIELDS) == 64 + assert EURING2020_FIELDS == EURING_FIELDS + + +def test_euring2000plus_fields(): + assert len(EURING2000PLUS_FIELDS) == 60 + assert EURING2000PLUS_FIELDS == EURING_FIELDS[:60] + + +def test_euring2000_fields(): + assert len(EURING2000_FIELDS) == 33 + assert EURING2000_FIELDS == EURING_FIELDS[:33] + + def test_field_uniqueness(): keys = [field["key"] for field in EURING_FIELDS] names = [field["name"] for field in EURING_FIELDS] @@ -35,21 +55,21 @@ def test_field_shape_and_types(): for field in EURING_FIELDS: assert field["name"] assert field["key"] - assert field["type"] in allowed_types + assert field["type_name"] in allowed_types assert re.match(r"^[a-z0-9_]+$", field["key"]) is not None if "length" in field: assert isinstance(field["length"], int) assert field["length"] > 0 - for bound in ("min_length", "max_length"): - if bound in field: - assert isinstance(field[bound], int) - assert field[bound] >= 0 + if "variable_length" in field: + assert isinstance(field["variable_length"], bool) if "required" in field: assert isinstance(field["required"], bool) def test_field_length_exclusivity(): for field in EURING_FIELDS: + if field.get("variable_length"): + assert "length" in field if "length" in field: assert "min_length" not in field assert "max_length" not in field @@ -57,5 +77,4 @@ def test_field_length_exclusivity(): def test_field_min_length_zero_not_required(): for field in EURING_FIELDS: - if field.get("min_length") == 0: - assert field.get("required", True) is False + assert field.get("min_length") is None diff --git a/tests/test_record.py b/tests/test_record.py index 45c4e82..e0b2139 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -6,10 +6,9 @@ import pytest -import euring.record as record_module from euring import EuringRecord from euring.formats import FORMAT_JSON -from euring.record import _fields_for_format, _fixed_width_fields, _format_fixed_width +from euring.record import _fields_for_format, _format_fixed_width def _values_from_record(record: str) -> dict[str, str]: @@ -34,7 +33,13 @@ def test_record_euring2000_round_trip(): values = _values_from_record(record_str) record = EuringRecord("euring2000") record.update(values) - assert record.serialize() == record_str + serialized = record.serialize() + + assert len(serialized) == 94 + decoded = EuringRecord.decode(serialized) + assert decoded.display_format == "EURING2000" + assert not decoded.errors["record"] + assert not decoded.errors["fields"] def test_record_euring2000plus_round_trip(): @@ -48,7 +53,11 @@ def test_record_euring2000plus_round_trip(): values = _values_from_record(record_str) record = EuringRecord("euring2000plus") record.update(values) - assert record.serialize() == record_str + serialized = record.serialize() + decoded = EuringRecord.decode(serialized) + assert decoded.display_format == "EURING2000+" + assert not decoded.errors["record"] + assert not decoded.errors["fields"] def test_record_euring2020_round_trip(): @@ -62,7 +71,11 @@ def test_record_euring2020_round_trip(): values = _values_from_record(record_str) record = EuringRecord("euring2020") record.update(values) - assert record.serialize() == record_str + serialized = record.serialize() + decoded = EuringRecord.decode(serialized) + assert decoded.display_format == "EURING2020" + assert not decoded.errors["record"] + assert not decoded.errors["fields"] def test_record_missing_required_field_raises(): @@ -208,13 +221,6 @@ def test_fields_for_format_euring2000plus_truncates(): assert fields[-1]["key"] == "reference" -def test_fixed_width_fields_respects_max_length(): - """Fixed-width fields should not exceed the 94-character cutoff.""" - fields = _fixed_width_fields() - total_length = sum(field["length"] for field in fields) - assert total_length <= 94 - - def test_format_fixed_width_handles_empty_and_padding(): """Fixed-width formatting should pad and fill empty fields.""" fields = [{"key": "alpha", "length": 2}, {"key": "beta", "length": 3}] @@ -228,34 +234,3 @@ def test_record_validate_without_record_uses_fixed_width(): record.set("ringing_scheme", "GBB") errors = record.validate() assert isinstance(errors, dict) - - -def test_fields_for_format_euring2000plus_without_reference(monkeypatch): - """EURING2000PLUS should return all fields when reference is missing.""" - fields = [{"key": "alpha", "length": 1}, {"key": "beta", "length": 1}] - monkeypatch.setattr(record_module, "EURING_FIELDS", fields) - assert _fields_for_format("euring2000plus") == fields - - -def test_fixed_width_fields_breaks_on_missing_length(monkeypatch): - """Fixed-width fields should stop when length metadata is missing.""" - fields = [{"key": "alpha", "length": 1}, {"key": "beta"}] - monkeypatch.setattr(record_module, "EURING_FIELDS", fields) - result = _fixed_width_fields() - assert result == [{"key": "alpha", "length": 1}] - - -def test_fixed_width_fields_breaks_at_cutoff(monkeypatch): - """Fixed-width fields should stop once reaching 94 characters.""" - fields = [{"key": "alpha", "length": 94}, {"key": "beta", "length": 1}] - monkeypatch.setattr(record_module, "EURING_FIELDS", fields) - result = _fixed_width_fields() - assert result == [{"key": "alpha", "length": 94}] - - -def test_fixed_width_fields_complete_without_break(monkeypatch): - """Fixed-width fields should include all fields when under the cutoff.""" - fields = [{"key": "alpha", "length": 1}, {"key": "beta", "length": 2}] - monkeypatch.setattr(record_module, "EURING_FIELDS", fields) - result = _fixed_width_fields() - assert result == [{"key": "alpha", "length": 1}, {"key": "beta", "length": 2}]