Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions application/blueprints/datamanager/controllers/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
from ..services.dataset import (
get_dataset_name,
)
from ..services.dataset_field import (
get_field_names_for_dataset,
)
from ..services.organisation import (
get_organisation_name,
)
Expand Down Expand Up @@ -117,8 +120,8 @@ def handle_check_results(request_id, result):
boundary_geojson_url = ""
try:
if ":" in organisation_code:
dataset_id, lpa_id = organisation_code.split(":", 1)
resp = requests.get(get_entity_search_url(dataset_id, lpa_id))
lpa_prefix, lpa_id = organisation_code.split(":", 1)
resp = requests.get(get_entity_search_url(lpa_prefix, lpa_id))
resp.raise_for_status()
d = resp.json()
entity = d.get("entities", [])[0] if d and d.get("entities") else None
Expand Down Expand Up @@ -164,6 +167,9 @@ def handle_check_results(request_id, result):
# Build column mapping rows for inline configure UI
unmapped_columns = converted_table.get("unmapped_columns", set())
mapping_rows = build_column_mapping_rows(column_field_log, unmapped_columns)
# Merge spec fields with all dataset fields so the mapping dropdown includes
# fields that aren't present in this check's column-field-log
spec_fields = spec_fields | set(get_field_names_for_dataset(dataset_id))

# Checks: must_fix is missing columns in column_field_log, passed_checks is columns that exist
# (even if missing values exist still passes), fixable is everything in error_summary (issue_logs combined)
Expand Down
10 changes: 10 additions & 0 deletions application/blueprints/datamanager/controllers/form.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,16 @@ def handle_dashboard_get():

session.pop("import_csv_data", None)

if (
not form.get("start_day")
and not form.get("start_month")
and not form.get("start_year")
):
today = date.today()
form["start_day"] = str(today.day)
form["start_month"] = str(today.month)
form["start_year"] = str(today.year)

return render_template(
"datamanager/dashboard_add.html",
dataset_input=dataset_input,
Expand Down
67 changes: 67 additions & 0 deletions application/blueprints/datamanager/services/dataset_field.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import csv
import logging
import time
from collections import defaultdict
from io import StringIO

import requests
from flask import current_app

from ..utils import REQUESTS_TIMEOUT

logger = logging.getLogger(__name__)

_cache = {
"data": None,
"expires_at": 0,
}
CACHE_TTL_SECONDS = 300 # 5 minutes


def _get_dataset_fields() -> dict[str, list[dict]]:
"""Fetch and cache dataset-field mapping from the specification CSV.

Returns a dict keyed by dataset ID, each value being a list of field dicts.
"""
now = time.monotonic()
if _cache["data"] is not None and now < _cache["expires_at"]:
return _cache["data"]

url = current_app.config.get("DATASET_FIELD_CSV_URL")

try:
response = requests.get(
url,
timeout=REQUESTS_TIMEOUT,
headers={"User-Agent": "Planning Data - Manage"},
)
response.raise_for_status()

reader = csv.DictReader(StringIO(response.text))
result: dict[str, list[dict]] = defaultdict(list)
for row in reader:
dataset = row.get("dataset", "").strip()
if dataset:
result[dataset].append({k: v for k, v in row.items() if k != "dataset"})

except Exception:
logger.exception("Error fetching dataset-field CSV")
if _cache["data"] is not None:
logger.warning("Returning stale dataset-field cache after fetch failure")
return _cache["data"]
raise

_cache["data"] = dict(result)
_cache["expires_at"] = now + CACHE_TTL_SECONDS

return _cache["data"]


def get_fields_for_dataset(dataset_id: str) -> list[dict]:
"""Return all field rows for a given dataset ID, or an empty list if not found."""
return _get_dataset_fields().get(dataset_id, [])


def get_field_names_for_dataset(dataset_id: str) -> list[str]:
"""Return a sorted list of field names for a given dataset ID."""
return sorted(row["field"] for row in get_fields_for_dataset(dataset_id))
8 changes: 7 additions & 1 deletion config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ class Config:
"https://raw.githubusercontent.com/digital-land/specification/refs/heads/main/specification/provision.csv",
)

# Dataset field specification
DATASET_FIELD_CSV_URL = os.getenv(
"DATASET_FIELD_CSV_URL",
"https://raw.githubusercontent.com/digital-land/specification/refs/heads/main/specification/dataset-field.csv",
)


class DevelopmentConfig(Config):
DEBUG = True
Expand Down Expand Up @@ -81,7 +87,7 @@ def get_request_api_endpoint():
"local": "http://localhost:8000",
"development": "http://development-pub-async-api-lb-69142969.eu-west-2.elb.amazonaws.com",
"staging": "http://staging-pub-async-api-lb-12493311.eu-west-2.elb.amazonaws.com",
"production": "http://development-pub-async-api-lb-69142969.eu-west-2.elb.amazonaws.com",
"production": "http://staging-pub-async-api-lb-12493311.eu-west-2.elb.amazonaws.com",
}

return mapping.get(env, mapping["local"])
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,11 @@ def test_check_results_shows_unmapped_geom_column(self, client):
"application.blueprints.datamanager.controllers.check.get_dataset_name",
return_value="Article 4 Direction Area",
):
response = client.get("/datamanager/check-results/check-id-1")
with patch(
"application.blueprints.datamanager.controllers.check.get_field_names_for_dataset",
return_value=[],
):
response = client.get("/datamanager/check-results/check-id-1")
assert response.status_code == 200
assert b"geom" in response.data
assert b"map[geom]" in response.data # column mapping select input name
Expand Down
Loading