Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ endif
endif
ifndef SPATIAL
ifeq ($(UNAME),Darwin)
$(error GDAL tools not found in PATH)
# $(error GDAL tools not found in PATH)
endif
sudo apt-get install libsqlite3-mod-spatialite -y
# sudo apt-get install libsqlite3-mod-spatialite -y
endif
pyproj sync --file uk_os_OSTN15_NTv2_OSGBtoETRS.tif -v
# install pre-commits
Expand Down
47 changes: 28 additions & 19 deletions digital_land/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from packaging.version import Version
import pandas as pd
from pathlib import Path
from datetime import datetime

# from datetime import datetime
from distutils.dir_util import copy_tree
import geojson
from requests import HTTPError
Expand All @@ -18,7 +19,7 @@

from digital_land.package.organisation import OrganisationPackage
from digital_land.specification import Specification
from digital_land.collect import Collector
from digital_land.collect import Collector, FetchStatus
from digital_land.collection import Collection, resource_path
from digital_land.log import (
DatasetResourceLog,
Expand Down Expand Up @@ -852,31 +853,36 @@ def validate_and_add_data_input(
)
endpoint_resource_info = {}
for endpoint in endpoints:
status, log = collector.fetch(
fetch_status, log = collector.fetch(
url=endpoint["endpoint-url"],
endpoint=endpoint["endpoint"],
end_date=endpoint["end-date"],
plugin=endpoint["plugin"],
refill_todays_logs=True,
)
try:
# log is already returned from fetch, but read from file if needed for verification
log_path = collector.log_path(datetime.utcnow(), endpoint["endpoint"])
if os.path.isfile(log_path):
with open(log_path, "r") as f:
log = json.load(f)
except Exception as e:
print(
f"Error: The log file for {endpoint} could not be read from path {log_path}.\n{e}"
)
break

status = log.get("status", None)
# try:
# # log is already returned from fetch, but read from file if needed for verification
# log_path = collector.log_path(datetime.utcnow(), endpoint["endpoint"])
# if os.path.isfile(log_path):
# with open(log_path, "r") as f:
# log = json.load(f)
# except Exception as e:
# print(
# f"Error: The log file for {endpoint} could not be read from path {log_path}.\n{e}"
# )
# break

log_status = log.get("status", None)
exception = log.get("exception", None)
if fetch_status not in [FetchStatus.OK, FetchStatus.ALREADY_FETCHED]:
raise HTTPError(
f"Failed to collect from URL. fetch status: {fetch_status}, log status: {log_status}, exception: {exception}"
)
# Raise exception if status is not 200
if not status or status != "200":
exception = log.get("exception", None)
if not log_status or log_status != "200":
raise HTTPError(
f"Failed to collect from URL with status: {status if status else exception}"
f"Failed to collect from URL with status: {log_status if log_status else exception}"
)

# Resource and path will only be printed if downloaded successfully but should only happen if status is 200
Expand All @@ -892,7 +898,7 @@ def validate_and_add_data_input(
resource_path,
)

print(f"Log Status for {endpoint['endpoint']}: The status is {status}")
print(f"Log Status for {endpoint['endpoint']}: The status is {log_status}")
endpoint_resource_info.update(
{
"endpoint": endpoint["endpoint"],
Expand All @@ -902,6 +908,9 @@ def validate_and_add_data_input(
"organisation": row["organisation"],
"entry-date": row["entry-date"],
}
# elif:
# raise Error(
# f"No resource avaible: {log_status if log_status else exception}"
)

return collection, endpoint_resource_info
Expand Down
3 changes: 2 additions & 1 deletion digital_land/plugins/arcgis.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def get(collector, url, log={}, plugin="arcgis"):

response = dumper._request("GET", url)
dumper.get_metadata()
log["status"] = str(response.status_code)
response_status = str(response.status_code)

content = '{"type":"FeatureCollection","features":['
sep = "\n"
Expand All @@ -23,6 +23,7 @@ def get(collector, url, log={}, plugin="arcgis"):
content += "]}"

content = str.encode(content)
log["status"] = response_status

except Exception as exception:
logging.warning(exception)
Expand Down
199 changes: 199 additions & 0 deletions tests/unit/test_commands.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
import csv
import logging
import os
import tempfile
from unittest.mock import Mock
import pytest
from requests import HTTPError

from digital_land.commands import validate_and_add_data_input
from tests.acceptance.conftest import copy_latest_specification_files_to

# import from digital_land validate_and_add_data_input_error_thrown_when_no_resource_downloaded


@pytest.fixture(scope="module")
def specification_dir(tmp_path_factory):
specification_dir = tmp_path_factory.mktemp("specification")
copy_latest_specification_files_to(specification_dir)
return specification_dir


@pytest.fixture(scope="function")
def collection_dir(tmp_path_factory):
collection_dir = tmp_path_factory.mktemp("collection")

# create source csv
source_fieldnames = [
"attribution",
"collection",
"documentation-url",
"endpoint",
"licence",
"organisation",
"pipelines",
"entry-date",
"start-date",
"end-date",
]

with open(os.path.join(collection_dir, "source.csv"), "w") as f:
dictwriter = csv.DictWriter(f, fieldnames=source_fieldnames)
dictwriter.writeheader()

# create endpoint csv
endpoint_fieldnames = [
"endpoint",
"endpoint-url",
"parameters",
"plugin",
"entry-date",
"start-date",
"end-date",
]

with open(os.path.join(collection_dir, "endpoint.csv"), "w") as f:
dictwriter = csv.DictWriter(f, fieldnames=endpoint_fieldnames)
dictwriter.writeheader()
return collection_dir


@pytest.fixture(scope="module")
def organisation_csv():
organisation_path = tempfile.NamedTemporaryFile().name
organisation_fieldnames = [
"dataset",
"end-date",
"entity",
"entry-date",
"name",
"organisation",
"prefix",
"reference",
"start-date",
]
organisation_row = {
"dataset": "local-authority",
"end-date": "",
"entity": 314,
"entry-date": "2023-11-19",
"name": "South Staffordshire Council",
"organisation": "local-authority:SST",
"prefix": "local-authority",
"reference": "SST",
"start-date": "",
}

with open(organisation_path, "w") as f:
writer = csv.DictWriter(f, fieldnames=organisation_fieldnames)
writer.writeheader()
writer.writerow(organisation_row)

return organisation_path


@pytest.fixture
def mock_request_get(mocker):
data = {"reference": "1", "value": "test"}
csv_content = str(data).encode("utf-8")

mock_response = Mock()
mock_response.status_code = 200
mock_response.request.headers = {"test": "test"}
mock_response.headers = {"test": "test"}
mock_response.content = csv_content
mocker.patch(
"requests.Session.get",
return_value=mock_response,
)


def create_input_csv(
data,
fieldnames=[
"organisation",
"documentation-url",
"endpoint-url",
"start-date",
"pipelines",
"plugin",
"licence",
],
):
tmp_input_path = tempfile.NamedTemporaryFile().name

with open(tmp_input_path, "w") as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerow(data)

return tmp_input_path


def test_validate_and_add_data_input_no_error(
collection_dir,
specification_dir,
organisation_csv,
caplog,
mock_request_get,
):
collection_name = "conservation-area"
no_error_input_data = {
"organisation": "local-authority:SST",
"documentation-url": "https://www.sstaffs.gov.uk/planning/conservation-and-heritage/south-staffordshires-conservation-areas",
"endpoint-url": "https://www.sstaffs.gov.uk/sites/default/files/2024-11/South Staffs Conservation Area document dataset_1.csv",
"start-date": "",
"pipelines": "conservation-area",
"plugin": "",
"licence": "ogl3",
}

tmp_input_path = create_input_csv(no_error_input_data)

with caplog.at_level(logging.ERROR):
validate_and_add_data_input(
tmp_input_path,
collection_name,
collection_dir,
specification_dir,
organisation_csv,
)
assert len(caplog.text) == 0


def test_validate_and_add_data_input_error_thrown_when_no_resource_downloaded(
collection_dir, specification_dir, organisation_csv, mocker
):

mock_response = Mock()
mock_response.status_code = 200
mock_response.request.headers = {"test": "test"}
mock_response.headers = {"test": "test"}
mock_response.content = ""
mocker.patch(
"requests.Session.get",
return_value=mock_response,
)
collection_name = "conservation-area"
no_error_input_data = {
"organisation": "local-authority:SST",
"documentation-url": "https://www.westoxon.gov.uk/planning-and-building/digital-planning-data/",
"endpoint-url": "https://services5.arcgis.com/z8GJkxrWic0alJoM/arcgis/rest/services/WODC_Conservation_Areas_WGS/FeatureServer",
"start-date": "",
"pipelines": "conservation-area",
"plugin": "",
"licence": "ogl3",
}

tmp_input_path = create_input_csv(no_error_input_data)

with pytest.raises(HTTPError) as error:
validate_and_add_data_input(
tmp_input_path,
collection_name,
collection_dir,
specification_dir,
organisation_csv,
)

assert "Failed to collect resource from URL" in str(error)
Loading