Skip to content

Commit 873fc2d

Browse files
authored
Merge pull request #1 from digital-land/fix/improve-downloads
make changes to entity downloads
2 parents 795299b + eb3a23e commit 873fc2d

2 files changed

Lines changed: 25 additions & 23 deletions

File tree

Makefile

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,6 @@ init::;
1111
test-unit \
1212
test-acceptance
1313

14-
15-
init ::
16-
pip install --upgrade pip
17-
pip3 install --upgrade -r requirements.txt
18-
19-
2014
make clobber:
2115
rm -rf data/reporting
2216

src/duplicate_geometry_expectations.py

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,22 +2,25 @@
22
import ast
33
import argparse
44
import os
5+
import logging
6+
logger = logging.getLogger(__name__)
57

8+
FILES_URL = os.environ.get("FILES_URL", "https://files.planning.data.gov.uk")
69

710
# Load expectations table
811
EXPECTATIONS_URL = "https://datasette.planning.data.gov.uk/digital-land/expectation.csv?_stream=on"
912

1013
# Entity tables to enrich A/B sides
1114
ENTITY_URLS = {
12-
"conservation-area": "https://datasette.planning.data.gov.uk/conservation-area/entity.csv?_stream=on",
13-
"article-4-direction-area": "https://datasette.planning.data.gov.uk/article-4-direction-area/entity.csv?_stream=on",
14-
"listed-building-outline": "https://datasette.planning.data.gov.uk/listed-building-outline/entity.csv?_stream=on",
15-
"tree-preservation-zone": "https://datasette.planning.data.gov.uk/tree-preservation-zone/entity.csv?_stream=on",
16-
"tree": "https://datasette.planning.data.gov.uk/tree/entity.csv?_stream=on",
15+
"conservation-area": f"{FILES_URL}/dataset/conservation-area.parquet",
16+
"article-4-direction-area": f"{FILES_URL}/dataset/article-4-direction-area.parquet",
17+
"listed-building-outline": f"{FILES_URL}/dataset/listed-building-outline.parquet",
18+
"tree-preservation-zone": f"{FILES_URL}/dataset/tree-preservation-zone.parquet",
19+
"tree": f"{FILES_URL}/dataset/tree.parquet",
1720
}
1821

1922
# Orgs lookup
20-
ORGS_URL = "https://datasette.planning.data.gov.uk/digital-land/organisation.csv?_stream=on"
23+
ORGS_URL = "https://files.planning.data.gov.uk/organisation-collection/dataset/organisation.csv"
2124

2225

2326
def parse_details(val):
@@ -98,17 +101,22 @@ def main(output_dir: str):
98101
]
99102
entity_tbls = []
100103
for dataset_name, entity_url in ENTITY_URLS.items():
101-
t = pd.read_csv(entity_url, low_memory=False)
102-
# Ensure required columns exist (skip missing datasets)
103-
missing = [c for c in ["entity", "end_date", "entry_date", "geometry", "name", "organisation_entity"] if c not in t.columns]
104-
if missing:
105-
continue
106-
t = t[["entity", "end_date", "entry_date", "geometry", "name", "organisation_entity"]].copy()
107-
t["dataset"] = dataset_name
108-
# Normalize key types
109-
t["entity"] = pd.to_numeric(t["entity"], errors="coerce").astype("Int64")
110-
t["organisation_entity"] = pd.to_numeric(t["organisation_entity"], errors="coerce").astype("Int64")
111-
entity_tbls.append(t[cols].copy())
104+
try:
105+
t = pd.read_parquet(entity_url)
106+
t.columns = t.columns.str.replace('-', '_')
107+
# Ensure required columns exist (skip missing datasets)
108+
missing = [c for c in ["entity", "end_date", "entry_date", "geometry", "name", "organisation_entity"] if c not in t.columns]
109+
if missing:
110+
continue
111+
t = t[["entity", "end_date", "entry_date", "geometry", "name", "organisation_entity","dataset"]]
112+
# Normalize key types almost like you could do this on import
113+
t["entity"] = pd.to_numeric(t["entity"], errors="coerce").astype("Int64")
114+
t["organisation_entity"] = pd.to_numeric(t["organisation_entity"], errors="coerce").astype("Int64")
115+
entity_tbls.append(t[cols].copy())
116+
except Exception as e:
117+
logger.error(f"Failed to load entity table for dataset: {dataset_name} from {entity_url}")
118+
raise e
119+
112120

113121
if not entity_tbls:
114122
# No enrichment possible, just save what we have

0 commit comments

Comments
 (0)