From 60e61cbf0c68bafcaa83f60cd28e7ab03e418794 Mon Sep 17 00:00:00 2001 From: mishaschwartz <4380924+mishaschwartz@users.noreply.github.com> Date: Tue, 4 Nov 2025 12:00:24 -0500 Subject: [PATCH] collapse geojson --- marble_api/utils/geojson.py | 77 ++++++++- marble_api/versions/v1/data_request/models.py | 21 ++- test/faker_providers.py | 160 ++++++++++++++---- .../versions/v1/data_request/test_routes.py | 17 +- test/unit/utils/test_utils_geojson.py | 134 ++++++++++++++- .../versions/v1/data_request/test_models.py | 7 +- 6 files changed, 379 insertions(+), 37 deletions(-) diff --git a/marble_api/utils/geojson.py b/marble_api/utils/geojson.py index 1ed5802..de90e18 100644 --- a/marble_api/utils/geojson.py +++ b/marble_api/utils/geojson.py @@ -1,7 +1,17 @@ from collections.abc import Iterable from itertools import zip_longest -from geojson_pydantic import LineString, MultiLineString, MultiPoint, MultiPolygon, Point, Polygon +from geojson_pydantic import ( + Feature, + FeatureCollection, + GeometryCollection, + LineString, + MultiLineString, + MultiPoint, + MultiPolygon, + Point, + Polygon, +) from geojson_pydantic.types import ( BBox, LineStringCoords, @@ -12,7 +22,9 @@ Position, ) +# Note: STAC Geometry differs from the GeoJSON Geometry definition (GeometryCollection not included) type Geometry = LineString | MultiLineString | MultiPoint | MultiPolygon | Point | Polygon +type GeoJSON = Geometry | FeatureCollection | Feature | GeometryCollection type Coordinates = ( LineStringCoords | MultiLineStringCoords | MultiPointCoords | MultiPolygonCoords | PolygonCoords | Position ) @@ -33,3 +45,66 @@ def bbox_from_coordinates(coordinates: Coordinates) -> BBox: real_values = [v or 0 for v in values] # coordinates without elevation are considered to be at elevation 0 min_max.append((min(real_values), max(real_values))) return [v for val in min_max for v in val] + + +def _validate_geometries(geometries: list[Geometry], geojson_type: str) -> None: + geometry_types = frozenset({geo.type for geo in geometries}) + if len(geometry_types) != 1 and geometry_types not in { + frozenset(), + frozenset(("Point", "MultiPoint")), + frozenset(("LineString", "MultiLineString")), + frozenset(("Polygon", "MultiPolygon")), + }: + raise ValueError(f"GeoJSON of type '{geojson_type}' is not convertable to a STAC compliant geometry.") + + +def _extract_geometries(geojson: GeoJSON | None) -> list[Geometry]: + """Return all geometries present in the geojson as a flat list.""" + if geojson.type == "FeatureCollection": + return [geo for feature in geojson.features for geo in _extract_geometries(feature.geometry) if geo] + if geojson.type == "GeometryCollection": + return geojson.geometries + if geojson.type == "Feature": + return _extract_geometries(geojson.geometry) + if geojson is None: + return [] + return [geojson] + + +def validate_collapsible(geojson: GeoJSON) -> None: + """Raise a ValueError if the geojson cannot be collapsed to a STAC compatible geometry.""" + _validate_geometries(_extract_geometries(geojson), geojson.type) + + +def collapse_geometries(geojson: GeoJSON, check: bool = True) -> Geometry | None: + """ + Return a single geometry that represents the same geo-spatial data as the geojson. + + This will collapse Features, FeatureCollections, and GeometryCollections into other + geometry types that represent the same points, lines, or polygons. The converted geometries + are compatible with STAC. + + If check is False, this will not validate that the geojson can be collapsed before attempting + to collapse it. This may result in undefined behaviour. It is strongly recommended that you + call validate_collapsible(geojson) prior to calling this function with check=False. + """ + geometries = _extract_geometries(geojson) + if check: + _validate_geometries(geometries, geojson.type) + if not geometries: + return None + if len(geometries) == 1: + return geometries[0] + coordinates = [] + for geo in geometries: + if geo.type in ("Point", "LineString", "Polygon"): + coordinates.append(geo.coordinates) + else: + coordinates.extend(geo.coordinates) + if geo.type in ("Point", "MultiPoint"): + geo_type = MultiPoint + elif geo.type in ("LineString", "MultiLineString"): + geo_type = MultiLineString + else: + geo_type = MultiPolygon + return geo_type(coordinates=coordinates, type=geo_type.__name__) diff --git a/marble_api/versions/v1/data_request/models.py b/marble_api/versions/v1/data_request/models.py index 0def7b2..262fbf7 100644 --- a/marble_api/versions/v1/data_request/models.py +++ b/marble_api/versions/v1/data_request/models.py @@ -18,7 +18,12 @@ from stac_pydantic.links import Links from typing_extensions import Annotated -from marble_api.utils.geojson import Geometry, bbox_from_coordinates +from marble_api.utils.geojson import ( + GeoJSON, + bbox_from_coordinates, + collapse_geometries, + validate_collapsible, +) from marble_api.utils.models import partial_model PyObjectId = Annotated[str, BeforeValidator(str)] @@ -45,7 +50,7 @@ class DataRequest(BaseModel): title: str description: str | None = None authors: list[Author] - geometry: Geometry | None + geometry: GeoJSON | None temporal: Temporal links: Links path: str @@ -62,6 +67,14 @@ def min_length_if_set(cls, value: Sized | None, info: ValidationInfo) -> Sized | assert value is None or len(value), f"{info.field_name} must be None or non-empty" return value + @field_validator("geometry") + @classmethod + def validate_geometries(cls, value: GeoJSON | None) -> dict | None: + """Check whether a GeoJSON can be collapsed to a STAC compliant geometry.""" + if value is not None: + validate_collapsible(value) + return value + @partial_model class DataRequestUpdate(DataRequest): @@ -91,7 +104,7 @@ def stac_item(self) -> Item: item = { "type": "Feature", "stac_version": "1.1.0", - "geometry": self.geometry and self.geometry.model_dump(), + "geometry": self.geometry and collapse_geometries(self.geometry, check=False).model_dump(), "stac_extensions": [], # TODO "id": self.id, # TODO "bbox": None, @@ -110,7 +123,7 @@ def stac_item(self) -> Item: ] if self.geometry: - item["bbox"] = item["geometry"].get("bbox") or bbox_from_coordinates(self.geometry.coordinates) + item["bbox"] = item["geometry"].get("bbox") or bbox_from_coordinates(item["geometry"]["coordinates"]) return item diff --git a/test/faker_providers.py b/test/faker_providers.py index cbe20f1..8396a33 100644 --- a/test/faker_providers.py +++ b/test/faker_providers.py @@ -4,6 +4,17 @@ import pytest from faker import Faker from faker.providers import BaseProvider +from geojson_pydantic import ( + Feature, + FeatureCollection, + GeometryCollection, + LineString, + MultiLineString, + MultiPoint, + MultiPolygon, + Point, + Polygon, +) from marble_api.versions.v1.data_request.models import DataRequest, DataRequestPublic, DataRequestUpdate @@ -35,50 +46,141 @@ def _geo_base(self): return base def geo_point(self, dimensions=None): - return {**self._geo_base(), "type": "Point", "coordinates": self.point(dimensions)} + return Point(type="Point", coordinates=self.point(dimensions), **self._geo_base()) def geo_multipoint(self, dimensions=None): - return { + return MultiPoint( + type="MultiPoint", + coordinates=[self.point(dimensions) for _ in range(self.generator.pyint(min_value=1, max_value=12))], **self._geo_base(), - "type": "MultiPoint", - "coordinates": [self.point(dimensions) for _ in range(self.generator.pyint(min_value=1, max_value=12))], - } + ) def geo_linestring(self, dimensions=None): - return {**self._geo_base(), "type": "LineString", "coordinates": self.line(dimensions)} + return LineString(type="LineString", coordinates=self.line(dimensions), **self._geo_base()) def geo_multilinestring(self, dimensions=None): - return { + return MultiLineString( + type="MultiLineString", + coordinates=[self.line(dimensions) for _ in range(self.generator.pyint(min_value=1, max_value=12))], **self._geo_base(), - "type": "MultiLineString", - "coordinates": [self.line(dimensions) for _ in range(self.generator.pyint(min_value=1, max_value=12))], - } + ) def geo_polygon(self, dimensions=None): - return {**self._geo_base(), "type": "Polygon", "coordinates": [self.linear_ring(dimensions)]} + return Polygon(type="Polygon", coordinates=[self.linear_ring(dimensions)], **self._geo_base()) def geo_multipolygon(self, dimensions=None): - return { - **self._geo_base(), - "type": "MultiPolygon", - "coordinates": [ + return MultiPolygon( + type="MultiPolygon", + coordinates=[ [self.linear_ring(dimensions) for _ in range(self.generator.pyint(min_value=1, max_value=12))] ], - } + **self._geo_base(), + ) + + def stac_geometries(self, dimensions=None): + return [ + self.geo_point(dimensions=dimensions), + self.geo_multipoint(dimensions=dimensions), + self.geo_linestring(dimensions=dimensions), + self.geo_multilinestring(dimensions=dimensions), + self.geo_polygon(dimensions=dimensions), + self.geo_multipolygon(dimensions=dimensions), + ] + + def collapsible_geometry_combos(self, dimensions=None): + stac_geometries = self.stac_geometries(dimensions=dimensions) + return [ + combo + for i in range(0, len(stac_geometries), 2) + for combo in ([stac_geometries[i]], [stac_geometries[i + 1]], stac_geometries[i : i + 2]) + ] + + def uncollapsible_geometry_combos(self, dimensions=None): + stac_geometries = self.stac_geometries(dimensions=dimensions) + combos = [] + for i in range(0, len(stac_geometries), 2): + for j in range(i + 2, len(stac_geometries)): + combos.append([stac_geometries[i], stac_geometries[j]]) + combos.append([stac_geometries[i + 1], stac_geometries[j]]) + return combos + + def collapsible_geometry_collections(self, dimensions=None): + collapsible_geometry_combos = self.collapsible_geometry_combos(dimensions=dimensions) + return [ + GeometryCollection(type="GeometryCollection", geometries=geos) + for geos in collapsible_geometry_combos + if len(geos) > 1 + ] + + def uncollapsible_geometry_collections(self, dimensions=None): + uncollapsible_geometry_combos = self.uncollapsible_geometry_combos(dimensions=dimensions) + return [ + GeometryCollection(type="GeometryCollection", geometries=geos) for geos in uncollapsible_geometry_combos + ] + + def collapsible_features(self, dimensions=None): + stac_geometries = self.stac_geometries(dimensions=dimensions) + collapsible_geometry_collections = self.collapsible_geometry_collections(dimensions=dimensions) + return [ + Feature(type="Feature", geometry=geo, properties={}) + for geo in stac_geometries + collapsible_geometry_collections + ] + + def uncollapsible_features(self, dimensions=None): + uncollapsible_geometry_collections = self.uncollapsible_geometry_collections(dimensions=dimensions) + return [Feature(type="Feature", geometry=geo, properties={}) for geo in uncollapsible_geometry_collections] + + def collapsible_feature_collections(self, dimensions=None): + collapsible_geometry_combos = self.collapsible_geometry_combos(dimensions=dimensions) + collapsible_features = self.collapsible_features(dimensions=dimensions) + collections = [] + for combo in collapsible_geometry_combos: + collections.append( + FeatureCollection( + type="FeatureCollection", + features=[Feature(type="Feature", geometry=geo, properties={}) for geo in combo], + ) + ) + for feature in collapsible_features: + collections.append(FeatureCollection(type="FeatureCollection", features=[feature])) + return collections + + def uncollapsible_feature_collections(self, dimensions=None): + uncollapsible_geometry_combos = self.uncollapsible_geometry_combos(dimensions=dimensions) + uncollapsible_features = self.uncollapsible_features(dimensions=dimensions) + collections = [] + for combo in uncollapsible_geometry_combos: + collections.append( + FeatureCollection( + type="FeatureCollection", + features=[Feature(type="Feature", geometry=geo, properties={}) for geo in combo], + ) + ) + for feature in uncollapsible_features: + collections.append(FeatureCollection(type="FeatureCollection", features=[feature])) + return collections + + def collapsible_geojsons(self, dimensions=None): + return ( + self.stac_geometries(dimensions=dimensions) + + self.collapsible_geometry_collections(dimensions=dimensions) + + self.collapsible_feature_collections(dimensions=dimensions) + ) + + def uncollapsible_geojsons(self, dimensions=None): + return self.uncollapsible_geometry_collections(dimensions=dimensions) + self.uncollapsible_feature_collections( + dimensions=dimensions + ) + + def collapsible_geojson(self, dimensions=None): + if dimensions is None: + dimensions = self.generator.random.choice([3, 2]) + return self.generator.random.choice(self.collapsible_geojsons(dimensions)) - def geometry(self, dimensions=None): + def uncollapsible_geojson(self, dimensions=None): if dimensions is None: - dimensions = self.generator.random.choice([3, 2, None]) - return self.generator.random.choice( - [ - self.geo_point, - self.geo_multipoint, - self.geo_linestring, - self.geo_multilinestring, - self.geo_polygon, - self.geo_multipolygon, - ] - )(dimensions) + dimensions = self.generator.random.choice([3, 2]) + return self.generator.random.choice(self.uncollapsible_geojsons(dimensions)) class DataRequestProvider(GeoJsonProvider): @@ -117,7 +219,7 @@ def _data_request_inputs(self, unset=None): title=self.generator.sentence(), description=(None if self.generator.pybool(30) else self.generator.paragraph()), authors=[self.author() for _ in range(self.generator.random.randint(1, 10))], - geometry=self.geometry(), + geometry=self.collapsible_geojson(), temporal=self.temporal(), links=[self.link() for _ in range(self.generator.random.randint(0, 10))], path=self.generator.file_path(), diff --git a/test/integration/versions/v1/data_request/test_routes.py b/test/integration/versions/v1/data_request/test_routes.py index 4bdde76..cf15ec9 100644 --- a/test/integration/versions/v1/data_request/test_routes.py +++ b/test/integration/versions/v1/data_request/test_routes.py @@ -142,12 +142,20 @@ async def test_valid(self, fake, async_client): bson.ObjectId(id_) # check that the id is a valid object id assert json.loads(data) == json.loads(DataRequest(**response.json()).model_dump_json()) - async def test_invalid(self, fake, async_client): + async def test_invalid_authors(self, fake, async_client): data = json.loads(fake.data_request().model_dump_json()) data["authors"] = [] response = await async_client.post("/v1/data-requests/", json=data) assert response.status_code == 422 + async def test_invalid_uncollapsible_geometry(self, fake, async_client): + data = { + **json.loads(fake.data_request().model_dump_json()), + "geometry": json.loads(fake.uncollapsible_geojson().model_dump_json()), + } + response = await async_client.post("/v1/data-requests/", json=data) + assert response.status_code == 422 + class _TestUpdate: @pytest.fixture @@ -205,6 +213,13 @@ async def test_invalid_bad_type(self, loaded_data, async_client): response = await async_client.patch(f"/v1/data-requests/{loaded_data['id']}", json={"title": 10}) assert response.status_code == 422 + async def test_invalid_uncollapsible_geometry(self, fake, loaded_data, async_client): + response = await async_client.patch( + f"/v1/data-requests/{loaded_data['id']}", + json={"geometry": json.loads(fake.uncollapsible_geojson().model_dump_json())}, + ) + assert response.status_code == 422 + async def test_bad_id(self, async_client): resp = await async_client.patch("/v1/data-requests/id-does-not-exist", json={}) assert resp.status_code == 404, resp.json() diff --git a/test/unit/utils/test_utils_geojson.py b/test/unit/utils/test_utils_geojson.py index e2d5b71..5adb14a 100644 --- a/test/unit/utils/test_utils_geojson.py +++ b/test/unit/utils/test_utils_geojson.py @@ -1,4 +1,22 @@ -from marble_api.utils.geojson import bbox_from_coordinates +import pytest +from faker import Faker +from geojson_pydantic import ( + Feature, + FeatureCollection, + GeometryCollection, + MultiLineString, + MultiPoint, + MultiPolygon, +) + +from marble_api.utils.geojson import bbox_from_coordinates, collapse_geometries, validate_collapsible + + +@pytest.fixture(scope="session") +def fake(faker_providers) -> Faker: + fake_ = Faker() + fake_.add_provider(faker_providers["GeoJsonProvider"]) + return fake_ class TestBboxFromCoordinates: @@ -22,3 +40,117 @@ def test_deeply_nested(self): def test_different_nested(self): assert bbox_from_coordinates([[1, 2], [[[-1, -3, 33]]]]) == [-1, 1, -3, 2, 0, 33] + + +@pytest.mark.parametrize("dimensions", [2, 3]) +class TestValidateCollapsible: + def test_collapsible(self, fake, dimensions): + errors = [] + for geo in fake.collapsible_geojsons(dimensions): + try: + validate_collapsible(geo) + except ValueError: + errors.append(geo) + assert not errors, "These geojsons should be collapsible but weren't properly collapsed" + + def test_uncollapsible(self, fake, dimensions): + errors = [] + for geo in fake.uncollapsible_geojsons(dimensions): + try: + validate_collapsible(geo) + except ValueError: + pass + else: + errors.append(geo) + assert not errors, "These geojsons should be uncollapsible but were properly collapsed" + + +@pytest.mark.parametrize("dimensions", [2, 3]) +class TestCollapseGeometries: + def test_no_change_to_stac_geometries(self, fake, dimensions): + geometries = fake.stac_geometries(dimensions) + assert [collapse_geometries(geo) for geo in geometries] == geometries + + def test_cannot_collapse_uncollapsible(self, fake, dimensions): + for geo in fake.uncollapsible_geojsons(dimensions): + with pytest.raises(ValueError): + collapse_geometries(geo) + + def test_collapsible_geojson_changed(self, fake, dimensions): + not_changed = [] + stac_geometry_types = [geo.type for geo in fake.stac_geometries()] + for geo in fake.collapsible_geojsons(dimensions): + if geo.type not in stac_geometry_types: + if collapse_geometries(geo) == geo: + not_changed.append(geo) + assert not not_changed, "These geojsons should have been collapsed/changed but they weren't" + + def test_feature_changed(self, fake, dimensions): + not_changed = [] + for feat in fake.collapsible_features(dimensions): + if feat.geometry.type != "GeometryCollection": + if collapse_geometries(feat) != feat.geometry: + not_changed.append(feat) + assert not not_changed, "These features should have been collapsed to their geometry but they weren't" + + @pytest.mark.parametrize( + "geometries", + [ + {"geos": ["point", "multipoint"], "result": MultiPoint}, + {"geos": ["linestring", "multilinestring"], "result": MultiLineString}, + {"geos": ["polygon", "multipolygon"], "result": MultiPolygon}, + ], + ids=lambda val: val["result"].__name__, + ) + @pytest.mark.parametrize( + "geo_factory", + [ + ( + "GeometryCollection(Geometry)", + lambda geos: GeometryCollection(type="GeometryCollection", geometries=geos), + ), + ( + "Feature(GeometryCollection(Geometry))", + lambda geos: Feature( + type="Feature", + properties={}, + geometry=GeometryCollection(type="GeometryCollection", geometries=geos), + ), + ), + ( + "FeatureCollection(Feature(GeometryCollection(Geometry)))", + lambda geos: FeatureCollection( + type="FeatureCollection", + features=[ + Feature( + type="Feature", + properties={}, + geometry=GeometryCollection(type="GeometryCollection", geometries=geos), + ) + ], + ), + ), + ( + "FeatureCollection(Feature(Geometry))", + lambda geos: FeatureCollection( + type="FeatureCollection", + features=[ + Feature( + type="Feature", + properties={}, + geometry=geo, + ) + for geo in geos + ], + ), + ), + ], + ids=lambda val: val[0], + ) + def test_complex_collapsible(self, fake, dimensions, geometries, geo_factory): + geos = [getattr(fake, f"geo_{geo}")(dimensions) for geo in geometries["geos"]] + result = geometries["result"] + factory_name, factory = geo_factory + assert collapse_geometries(factory(geos)) == result( + type=result.__name__, coordinates=[geos[0].coordinates, *geos[1].coordinates] + ), f"Unable to collapse {factory_name} into a {result.__name__}" diff --git a/test/unit/versions/v1/data_request/test_models.py b/test/unit/versions/v1/data_request/test_models.py index bcbe1d6..20fd6ed 100644 --- a/test/unit/versions/v1/data_request/test_models.py +++ b/test/unit/versions/v1/data_request/test_models.py @@ -4,6 +4,7 @@ from pydantic import TypeAdapter, ValidationError from pystac import Item +from marble_api.utils.geojson import collapse_geometries from marble_api.versions.v1.data_request.models import Author, DataRequestUpdate @@ -82,6 +83,10 @@ def test_temporal_tzaware(self, fake_class): with pytest.raises(ValidationError): fake_class(temporal=[datetime.datetime.now()]) + def test_uncollapsible_geometry(self, fake, fake_class): + with pytest.raises(ValueError): + fake_class(geometry=fake.uncollapsible_geojson()) + class TestDataRequestPublic(TestDataRequest): @pytest.fixture @@ -97,7 +102,7 @@ def test_valid(self, fake_class): def test_geometry(self, fake_class): request = fake_class() - assert request.stac_item["geometry"] == request.geometry.model_dump() + assert request.stac_item["geometry"] == collapse_geometries(request.geometry).model_dump() assert request.stac_item["bbox"] def test_null_geometry(self, fake_class):