Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 45 additions & 2 deletions application/blueprints/base/views.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,56 @@
from flask import Blueprint, current_app, render_template
from datetime import datetime

from flask import (
Blueprint,
current_app,
redirect,
render_template,
request,
session,
url_for,
)

from application.db.models import ServiceLock
from application.extensions import db

base = Blueprint("base", __name__)

ADD_DATA_LOCK = "add_data"


@base.route("/")
@base.route("/index")
def index():
authentication_on = current_app.config.get("AUTHENTICATION_ON", True)
return render_template("index.html", authentication_on=authentication_on)
add_data_blocked_by = request.args.get("add_data_blocked_by")
try:
add_data_lock = db.session.get(ServiceLock, ADD_DATA_LOCK)
except Exception:
add_data_lock = None
return render_template(
"index.html",
authentication_on=authentication_on,
add_data_lock=add_data_lock,
add_data_blocked_by=add_data_blocked_by,
)


@base.route("/process-lock/add-data/toggle", methods=["POST"])
def toggle_add_data_lock():
user = session.get("user", {})
username = user.get("login", "unknown")

lock = db.session.get(ServiceLock, ADD_DATA_LOCK)
if lock:
db.session.delete(lock)
else:
db.session.add(
ServiceLock(
name=ADD_DATA_LOCK, locked_by=username, locked_at=datetime.utcnow()
)
)
db.session.commit()
return redirect(url_for("base.index"))


@base.route("/health", strict_slashes=False)
Expand Down
10 changes: 9 additions & 1 deletion application/blueprints/datamanager/controllers/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from datetime import datetime

import requests
from flask import redirect, render_template, request, url_for
from flask import redirect, render_template, request, session, url_for
from shapely import wkt
from shapely.geometry import mapping

Expand All @@ -25,6 +25,7 @@
)
from ..utils import (
build_check_tables,
get_allowed_override_users,
)
from ..utils.configure import (
build_column_mapping_rows,
Expand Down Expand Up @@ -176,6 +177,12 @@ def handle_check_results(request_id, result):
must_fix.append(f"Missing required field: {col.get('field')}")
allow_add_data = len(must_fix) == 0

can_override = False
if not allow_add_data:
current_user = (session.get("user") or {}).get("login", "")
allowed = get_allowed_override_users()
can_override = current_user.lower() in allowed

return render_template(
"datamanager/check-results.html",
result=result,
Expand All @@ -184,6 +191,7 @@ def handle_check_results(request_id, result):
fixable=fixable,
passed_checks=passed_checks,
allow_add_data=allow_add_data,
can_override=can_override,
converted_table=converted_table,
transformed_table=transformed_table,
issue_log_table=issue_log_table,
Expand Down
41 changes: 34 additions & 7 deletions application/blueprints/datamanager/controllers/form.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,20 +124,17 @@ def handle_dashboard_add():
licence = (form.get("licence") or "ogl3").strip().lower()
authoritative = form.get("authoritative", "").strip().lower() or None

# start_date defaults to today if blank; partial dates are an error
day = (form.get("start_day") or "").strip()
month = (form.get("start_month") or "").strip()
year = (form.get("start_year") or "").strip()
start_date_str = None

if not any([day, month, year]):
start_date_str = date.today().isoformat()
elif all([day, month, year]):
if all([day, month, year]):
try:
start_date_str = date(int(year), int(month), int(day)).isoformat()
except (ValueError, TypeError):
errors["start_date"] = True
else:
elif any([day, month, year]):
errors["start_date"] = True

org_warning = form.get("org_warning", "false") == "true"
Expand All @@ -163,14 +160,19 @@ def handle_dashboard_add():
)

# Submit Data and set session with add data variables
geom_type = form.get("geom_type", "").strip() or None

if not any(errors.values()):
column_mapping = {"WKT": "geometry"} if geom_type == "polygon" else {}
payload = {
"params": {
"type": "check_url",
"collection": collection_id,
"dataset": dataset_id,
"url": endpoint_url,
"organisationName": org_code_input,
"geom_type": geom_type,
"column_mapping": column_mapping or None,
}
}
session["add_data_fields"] = {
Expand Down Expand Up @@ -213,9 +215,17 @@ def handle_dashboard_add_import():

if request.method == "POST":
mode = request.form.get("mode", "").strip()
csv_data = request.form.get("csv_data", "").strip()

if mode == "parse":
uploaded_file = request.files.get("csv_file")
if uploaded_file and uploaded_file.filename:
try:
csv_data = uploaded_file.read().decode("utf-8").strip()
except Exception as e:
errors["csv_data"] = f"Could not read uploaded file: {str(e)}"
else:
csv_data = request.form.get("csv_data", "").strip()

if mode == "parse" and not errors:
try:
reader = csv.DictReader(StringIO(csv_data))
rows = list(reader)
Expand Down Expand Up @@ -280,6 +290,7 @@ def _submit_add_data_preview(request_id, add_data_fields):
"licence": add_data_fields["licence"],
"start_date": add_data_fields["start_date"],
"authoritative": add_data_fields["authoritative"],
"geom_type": check_params.get("geom_type"),
"github_branch": (
current_app.config.get("CONFIG_REPO_BRANCH") or None
if not add_data_fields.get("github_new", True)
Expand All @@ -302,6 +313,21 @@ def _has_all_add_data_fields(add_data_fields):
)


def _parse_start_date(start_date_str):
"""Split an ISO date string into start_day, start_month, start_year form fields."""
if not start_date_str:
return {}
try:
d = date.fromisoformat(start_date_str)
return {
"start_day": str(d.day),
"start_month": str(d.month),
"start_year": str(d.year),
}
except (ValueError, TypeError):
return {}


def handle_add_data(request_id):
"""
This will only show a form if required fields for add_data are not already in session.
Expand All @@ -326,6 +352,7 @@ def handle_add_data(request_id):
"github_new": (
"false" if add_data_fields.get("github_new") is False else "true"
),
**(_parse_start_date(add_data_fields.get("start_date"))),
},
)

Expand Down
10 changes: 10 additions & 0 deletions application/blueprints/datamanager/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
session,
)

from application.db.models import ServiceLock
from application.extensions import db

from .controllers.form import (
handle_dashboard_get,
handle_dashboard_add,
Expand Down Expand Up @@ -49,6 +52,13 @@ def require_login():
if session.get("user") is None:
return redirect(url_for("auth.login", next=request.url))

try:
lock = db.session.get(ServiceLock, "add_data")
except Exception:
lock = None
if lock:
return redirect(url_for("base.index", add_data_blocked_by=lock.locked_by))


# TODO: remove these view functions and move logic entirely into controllers

Expand Down
66 changes: 53 additions & 13 deletions application/blueprints/datamanager/services/dataset.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import csv
import logging
import time
from io import StringIO

import requests
from flask import current_app

from ..config import get_datasets_url
from ..utils import REQUESTS_TIMEOUT
Expand All @@ -16,32 +19,69 @@


def _get_datasets():
"""Internal: fetch and cache the dataset maps."""
"""Internal: fetch and cache dataset maps.

Dataset list is sourced from the provision CSV (ground truth for supported
datasets). Display name and collection are enriched from the planning API.
"""
now = time.monotonic()
if _cache["data"] is not None and now < _cache["expires_at"]:
return _cache["data"]

try:
ds_response = requests.get(
# Step 1: get unique dataset IDs from the provision CSV
provision_url = current_app.config.get("PROVISION_CSV_URL")
prov_response = requests.get(
provision_url,
timeout=REQUESTS_TIMEOUT,
headers={"User-Agent": "Planning Data - Manage"},
)
prov_response.raise_for_status()
reader = csv.DictReader(StringIO(prov_response.text))
provision_dataset_ids = {
row["dataset"].strip() for row in reader if row.get("dataset", "").strip()
}

# Step 2: enrich with name + collection from the planning API
planning_response = requests.get(
get_datasets_url(),
timeout=REQUESTS_TIMEOUT,
headers={"User-Agent": "Planning Data - Manage"},
).json()

planning_lookup = {
d["dataset"]: d
for d in planning_response.get("datasets", [])
if "dataset" in d
}

name_to_dataset_id = {}
name_to_collection_id = {}
dataset_id_to_name = {}

for dataset_id in provision_dataset_ids:
planning_entry = planning_lookup.get(dataset_id)
if planning_entry:
name = planning_entry.get("name") or dataset_id
collection = planning_entry.get("collection") or dataset_id
else:
name = dataset_id
collection = dataset_id

name_to_dataset_id[name] = dataset_id
name_to_collection_id[name] = collection
dataset_id_to_name[dataset_id] = name

dataset_options = sorted(name_to_dataset_id.keys())

except Exception as e:
logger.exception("Error fetching datasets")
if _cache["data"] is not None:
logger.warning("Returning stale dataset cache after fetch failure")
return _cache["data"]
raise Exception("Failed to fetch dataset list") from e

datasets = [d for d in ds_response.get("datasets", []) if "collection" in d]
dataset_options = sorted([d["name"] for d in datasets])
name_to_dataset_id = {d["name"]: d["dataset"] for d in datasets}
name_to_collection_id = {d["name"]: d["collection"] for d in datasets}
dataset_id_to_name = {d["dataset"]: d["name"] for d in datasets}

result = (
datasets,
dataset_options,
name_to_dataset_id,
name_to_collection_id,
Expand All @@ -56,22 +96,22 @@ def _get_datasets():

def get_dataset_options() -> list:
"""Return sorted list of dataset names for autocomplete."""
return _get_datasets()[1]
return _get_datasets()[0]


def get_dataset_id(name: str) -> str | None:
"""Look up the dataset ID for a given dataset name."""
return _get_datasets()[2].get(name)
return _get_datasets()[1].get(name)


def get_collection_id(name: str) -> str | None:
"""Look up the collection ID for a given dataset name."""
return _get_datasets()[3].get(name)
return _get_datasets()[2].get(name)


def get_dataset_name(dataset_id: str, default: str = None) -> str | None:
"""Look up the dataset name for a given dataset ID."""
return _get_datasets()[4].get(dataset_id, default)
return _get_datasets()[3].get(dataset_id, default)


def search_datasets(query: str, limit: int = 10) -> list:
Expand Down
18 changes: 18 additions & 0 deletions application/blueprints/datamanager/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import csv
import logging
import os
import re
from datetime import datetime
from io import StringIO

Expand All @@ -15,6 +17,22 @@
REQUESTS_TIMEOUT = 20 # seconds


def get_allowed_override_users() -> set:
"""Read config/allowed-users.md and return a set of GitHub usernames."""
project_root = os.path.dirname(current_app.root_path)
path = os.path.join(project_root, "config", "allowed-users.md")
try:
with open(path, "r") as f:
content = f.read()
return {
m.group(1).strip().lower()
for m in re.finditer(r"^[-*]\s+(\S+)", content, re.MULTILINE)
}
except Exception as e:
logger.warning(f"Could not read allowed-users.md: {e}")
return set()


def handle_error(e):
logger.exception(f"Error: {e}")
return render_template("datamanager/error.html", message=str(e)), 500
Expand Down
Loading
Loading