Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## Unreleased

- Improve and document serialization (#100).
- Add stricter Place Code validation (#99).
- Parse raw values to values for all fields (#98).
- Add constraints and tests for Direction (#97).
Expand Down
7 changes: 7 additions & 0 deletions docs/examples.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,13 @@ If you want to allow missing optional values and keep placeholders, pass
``strict=False`` to the record. ``serialize()`` raises ``ValueError`` when a field
fails validation.

Serialization always re-encodes from the current typed values. For EURING2000,
fixed-width output uses hyphens for empty values and zero-pads integers to the
declared length. For EURING2000+/EURING2020, empty values remain empty strings
except for fields that explicitly use hyphen placeholders (for example Elapsed
Time, Distance, and Direction); these placeholders are defined per-field via
the ``empty_value`` schema attribute.

Exporting records
-----------------

Expand Down
7 changes: 7 additions & 0 deletions docs/python_reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,10 @@ Build a EURING record:
fails validation. Use ``EuringRecord("euring2000plus", strict=False)``
to allow missing optional values and keep placeholders in the output. Use
``export()`` to convert to other EURING string formats.

Serialization always re-encodes from the current values. For EURING2000,
fixed-width output uses hyphens for empty values and zero-pads integers to the
declared length. For EURING2000+/EURING2020, empty values remain empty strings
except for fields that explicitly use hyphen placeholders (for example Elapsed
Time, Distance, and Direction); these placeholders are defined per-field via
the ``empty_value`` schema attribute.
123 changes: 87 additions & 36 deletions src/euring/field_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,11 @@
from dataclasses import dataclass
from typing import Any

from euring.utils import euring_lat_to_dms, euring_lng_to_dms

from .codes import lookup_description
from .exceptions import EuringConstraintException, EuringTypeException
from .formats import FORMAT_EURING2000
from .types import (
TYPE_ALPHABETIC,
TYPE_ALPHANUMERIC,
Expand Down Expand Up @@ -33,22 +36,22 @@ class EuringField(Mapping[str, Any]):
type_name: str = ""
required: bool = True
length: int | None = None
min_length: int | None = None
max_length: int | None = None
variable_length: bool = False
empty_value: str | None = None

def _mapping(self) -> dict[str, Any]:
mapping: dict[str, Any] = {
"key": self.key,
"name": self.name,
"type": self.type_name,
"type_name": self.type_name,
"required": self.required,
}
if self.length is not None:
mapping["length"] = self.length
if self.min_length is not None:
mapping["min_length"] = self.min_length
if self.max_length is not None:
mapping["max_length"] = self.max_length
if self.variable_length:
mapping["variable_length"] = True
if self.empty_value is not None:
mapping["empty_value"] = self.empty_value
return mapping

def __getitem__(self, key: str) -> Any:
Expand All @@ -61,22 +64,18 @@ def __len__(self) -> int:
return len(self._mapping())

def _is_required(self) -> bool:
if self.min_length == 0:
return False
return self.required

def _validate_length(self, raw: str) -> None:
value_length = len(raw)
if self.length is not None and value_length != self.length:
raise EuringConstraintException(f'Value "{raw}" is length {value_length} instead of {self.length}.')
if self.min_length is not None and value_length < self.min_length:
raise EuringConstraintException(
f'Value "{raw}" is length {value_length}, should be at least {self.min_length}.'
)
if self.max_length is not None and value_length > self.max_length:
raise EuringConstraintException(
f'Value "{raw}" is length {value_length}, should be at most {self.max_length}.'
)
def _validate_length(self, raw: str, ignore_variable_length: bool = False) -> None:
if self.length is not None:
value_length = len(raw)
if self.variable_length and not ignore_variable_length:
if value_length > self.length:
raise EuringConstraintException(
f'Value "{raw}" is length {value_length}, should be at most {self.length}.'
)
elif value_length != self.length:
raise EuringConstraintException(f'Value "{raw}" is length {value_length} instead of {self.length}.')

def _validate_raw(self, raw: str) -> str | None:
if raw == "":
Expand Down Expand Up @@ -110,15 +109,65 @@ def parse(self, raw: str) -> Any | None:

def encode(self, value: Any | None) -> str:
"""Encode a Python value to raw text."""
if value is None or value == "":
if value in (None, ""):
if self._is_required():
raise EuringConstraintException('Required field, empty value "" is not permitted.')
return ""
raw = str(value)
self._validate_length(raw)
if self.type_name and not is_valid_type(raw, self.type_name):
raise EuringTypeException(f'Value "{raw}" is not valid for type {self.type_name}.')
return raw

if self.key == "geographical_coordinates" and isinstance(value, dict):
if "lat" not in value or "lng" not in value:
raise EuringConstraintException("Geographical coordinates require both lat and lng values.")
return f"{euring_lat_to_dms(float(value['lat']))}{euring_lng_to_dms(float(value['lng']))}"

str_value = f"{value}"
if self.type_name in {TYPE_NUMERIC, TYPE_NUMERIC_SIGNED}:
str_value = str_value.rstrip("0").rstrip(".")
if (
self.type_name in {TYPE_INTEGER, TYPE_NUMERIC, TYPE_NUMERIC_SIGNED}
and self.length
and not self.variable_length
):
str_value = str_value.zfill(self.length)
self._validate_length(str_value)
if self.type_name and not is_valid_type(str_value, self.type_name):
raise EuringTypeException(f'Value "{str_value}" is not valid for type {self.type_name}.')
return str_value

def encode_for_format(self, value: Any | None, *, format: str) -> str:
"""Encode a Python value to raw text for a specific EURING format."""
if value in (None, ""):
if self.empty_value:
return self.empty_value
if self.length and format == FORMAT_EURING2000:
return "-" * self.length
if self.length and self.required and self.type_name == TYPE_INTEGER:
return "-" * self.length
return ""

if self.key == "geographical_coordinates" and isinstance(value, dict):
if "lat" not in value or "lng" not in value:
raise EuringConstraintException("Geographical coordinates require both lat and lng values.")
return f"{euring_lat_to_dms(float(value['lat']))}{euring_lng_to_dms(float(value['lng']))}"

if self.type_name == TYPE_INTEGER and isinstance(value, str) and value and set(value) == {"-"}:
return self.encode_for_format(None, format=format)

str_value = f"{value}"
if self.type_name in {TYPE_NUMERIC, TYPE_NUMERIC_SIGNED}:
str_value = str_value.rstrip("0").rstrip(".")

ignore_variable_length = format == FORMAT_EURING2000

if self.type_name in {TYPE_INTEGER, TYPE_NUMERIC, TYPE_NUMERIC_SIGNED} and self.length:
str_value = str_value.zfill(self.length)

if self.variable_length and not ignore_variable_length:
str_value = str_value.lstrip("0") or "0"

self._validate_length(str_value, ignore_variable_length=ignore_variable_length)
if self.type_name and not is_valid_type(str_value, self.type_name):
raise EuringTypeException(f'Value "{str_value}" is not valid for type {self.type_name}.')
return str_value

def describe(self, value: Any | None) -> Any | None:
"""Return a display description for a parsed value."""
Expand Down Expand Up @@ -182,11 +231,13 @@ def coerce_field(definition: Mapping[str, Any]) -> EuringField:
return definition
key = definition.get("key", "")
name = definition.get("name", key)
type_name = definition.get("type") or definition.get("type_name") or ""
if "type" in definition and "type_name" not in definition:
raise ValueError('Field definitions must use "type_name" instead of legacy "type".')
type_name = definition.get("type_name") or ""
required = definition.get("required", True)
length = definition.get("length")
min_length = definition.get("min_length")
max_length = definition.get("max_length")
variable_length = bool(definition.get("variable_length", False))
empty_value = definition.get("empty_value")
parser = definition.get("parser")
lookup = definition.get("lookup")
if parser is not None:
Expand All @@ -196,8 +247,8 @@ def coerce_field(definition: Mapping[str, Any]) -> EuringField:
type_name=type_name,
required=required,
length=length,
min_length=min_length,
max_length=max_length,
variable_length=variable_length,
empty_value=empty_value,
parser=parser,
lookup=lookup,
)
Expand All @@ -208,8 +259,8 @@ def coerce_field(definition: Mapping[str, Any]) -> EuringField:
type_name=type_name,
required=required,
length=length,
min_length=min_length,
max_length=max_length,
variable_length=variable_length,
empty_value=empty_value,
lookup=lookup,
)
return EuringField(
Expand All @@ -218,6 +269,6 @@ def coerce_field(definition: Mapping[str, Any]) -> EuringField:
type_name=type_name,
required=required,
length=length,
min_length=min_length,
max_length=max_length,
variable_length=variable_length,
empty_value=empty_value,
)
27 changes: 25 additions & 2 deletions src/euring/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,13 +220,24 @@
key="distance",
type_name=TYPE_INTEGER,
length=5,
variable_length=True,
empty_value="-----",
),
EuringFormattedField(
name="Direction",
key="direction",
type_name=TYPE_INTEGER,
length=3,
empty_value="---",
parser=parse_direction,
),
EuringFormattedField(name="Direction", key="direction", type_name=TYPE_INTEGER, length=3, parser=parse_direction),
EuringField(
name="Elapsed Time",
key="elapsed_time",
type_name=TYPE_INTEGER,
length=5,
variable_length=True,
empty_value="-----",
),
# Starting with Wing Length, fields are no longer required. Source: EURING Exchange Code 2020 v202 (13 Nov 2024).
EuringField(name="Wing Length", key="wing_length", type_name=TYPE_NUMERIC, required=False),
Expand Down Expand Up @@ -303,7 +314,14 @@
required=False,
lookup=LOOKUP_BROOD_PATCH,
),
EuringField(name="Primary Score", key="primary_score", type_name=TYPE_INTEGER, max_length=2, required=False),
EuringField(
name="Primary Score",
key="primary_score",
type_name=TYPE_INTEGER,
length=2,
variable_length=True,
required=False,
),
EuringField(name="Primary Moult", key="primary_moult", type_name=TYPE_ALPHANUMERIC, length=10, required=False),
EuringFormattedField(
name="Old Greater Coverts",
Expand Down Expand Up @@ -354,3 +372,8 @@
),
EuringField(name="More Other Marks", key="more_other_marks", type_name=TYPE_ALPHABETIC, required=False),
]

# These are the field definitions per format as per the EURING Code Manual
EURING2020_FIELDS = EURING_FIELDS # 64 fields
EURING2000PLUS_FIELDS = EURING_FIELDS[:60]
EURING2000_FIELDS = EURING_FIELDS[:33]
10 changes: 4 additions & 6 deletions src/euring/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,18 @@ def euring_decode_value(
type: str,
required: bool = True,
length: int | None = None,
min_length: int | None = None,
max_length: int | None = None,
variable_length: bool = False,
parser: Callable[[str], Any] | None = None,
lookup: Mapping[str, str] | Callable[[str], str] | None = None,
) -> dict[str, Any] | None:
"""Decode a single EURING field value with type checks, parsing, and lookup."""
definition = {
"name": "Value",
"key": "value",
"type": type,
"type_name": type,
"required": required,
"length": length,
"min_length": min_length,
"max_length": max_length,
"variable_length": variable_length,
"parser": parser,
"lookup": lookup,
}
Expand All @@ -34,7 +32,7 @@ def euring_decode_value(
if parser:
results["parsed_value"] = parsed
description_value = parsed
if lookup and not parser and value != "":
if lookup and not parser and value != "" and parsed is not None:
description_value = value
results["description"] = field.describe(description_value)
return results
Loading