digital-land · eveleighoj · Jan 29, 2026 · Mar 26, 2026 · Mar 26, 2026 · Apr 7, 2026
diff --git a/Makefile b/Makefile
@@ -35,9 +35,9 @@ endif
 endif
 ifndef SPATIAL
 ifeq ($(UNAME),Darwin)
-	$(error GDAL tools not found in PATH)
+# 	$(error GDAL tools not found in PATH)
 endif
-	sudo apt-get install libsqlite3-mod-spatialite -y
+# 	sudo apt-get install libsqlite3-mod-spatialite -y
 endif
 	pyproj sync --file uk_os_OSTN15_NTv2_OSGBtoETRS.tif -v
 # install pre-commits

diff --git a/digital_land/commands.py b/digital_land/commands.py
@@ -9,7 +9,8 @@
 from packaging.version import Version
 import pandas as pd
 from pathlib import Path
-from datetime import datetime
+
+# from datetime import datetime
 from distutils.dir_util import copy_tree
 import geojson
 from requests import HTTPError
@@ -18,7 +19,7 @@
 
 from digital_land.package.organisation import OrganisationPackage
 from digital_land.specification import Specification
-from digital_land.collect import Collector
+from digital_land.collect import Collector, FetchStatus
 from digital_land.collection import Collection, resource_path
 from digital_land.log import (
     DatasetResourceLog,
@@ -852,31 +853,36 @@ def validate_and_add_data_input(
     )
     endpoint_resource_info = {}
     for endpoint in endpoints:
-        status, log = collector.fetch(
+        fetch_status, log = collector.fetch(
             url=endpoint["endpoint-url"],
             endpoint=endpoint["endpoint"],
             end_date=endpoint["end-date"],
             plugin=endpoint["plugin"],
             refill_todays_logs=True,
         )
-        try:
-            # log is already returned from fetch, but read from file if needed for verification
-            log_path = collector.log_path(datetime.utcnow(), endpoint["endpoint"])
-            if os.path.isfile(log_path):
-                with open(log_path, "r") as f:
-                    log = json.load(f)
-        except Exception as e:
-            print(
-                f"Error: The log file for {endpoint} could not be read from path {log_path}.\n{e}"
-            )
-            break
 
-        status = log.get("status", None)
+        # try:
+        #     # log is already returned from fetch, but read from file if needed for verification
+        #     log_path = collector.log_path(datetime.utcnow(), endpoint["endpoint"])
+        #     if os.path.isfile(log_path):
+        #         with open(log_path, "r") as f:
+        #             log = json.load(f)
+        # except Exception as e:
+        #     print(
+        #         f"Error: The log file for {endpoint} could not be read from path {log_path}.\n{e}"
+        #     )
+        #     break
+
+        log_status = log.get("status", None)
+        exception = log.get("exception", None)
+        if fetch_status not in [FetchStatus.OK, FetchStatus.ALREADY_FETCHED]:
+            raise HTTPError(
+                f"Failed to collect from URL. fetch status: {fetch_status}, log status: {log_status}, exception: {exception}"
+            )
         # Raise exception if status is not 200
-        if not status or status != "200":
-            exception = log.get("exception", None)
+        if not log_status or log_status != "200":
             raise HTTPError(
-                f"Failed to collect from URL with status: {status if status else exception}"
+                f"Failed to collect from URL with status: {log_status if log_status else exception}"
             )
 
         # Resource and path will only be printed if downloaded successfully but should only happen if status is 200
@@ -892,7 +898,7 @@ def validate_and_add_data_input(
                 resource_path,
             )
 
-        print(f"Log Status for {endpoint['endpoint']}: The status is {status}")
+        print(f"Log Status for {endpoint['endpoint']}: The status is {log_status}")
         endpoint_resource_info.update(
             {
                 "endpoint": endpoint["endpoint"],
@@ -902,6 +908,9 @@ def validate_and_add_data_input(
                 "organisation": row["organisation"],
                 "entry-date": row["entry-date"],
             }
+            #     elif:
+            #        raise Error(
+            #         f"No resource avaible: {log_status if log_status else exception}"
         )
 
     return collection, endpoint_resource_info

diff --git a/digital_land/plugins/arcgis.py b/digital_land/plugins/arcgis.py
@@ -11,7 +11,7 @@ def get(collector, url, log={}, plugin="arcgis"):
 
         response = dumper._request("GET", url)
         dumper.get_metadata()
-        log["status"] = str(response.status_code)
+        response_status = str(response.status_code)
 
         content = '{"type":"FeatureCollection","features":['
         sep = "\n"
@@ -23,6 +23,7 @@ def get(collector, url, log={}, plugin="arcgis"):
         content += "]}"
 
         content = str.encode(content)
+        log["status"] = response_status
 
     except Exception as exception:
         logging.warning(exception)

diff --git a/tests/unit/test_commands.py b/tests/unit/test_commands.py
@@ -0,0 +1,199 @@
+import csv
+import logging
+import os
+import tempfile
+from unittest.mock import Mock
+import pytest
+from requests import HTTPError
+
+from digital_land.commands import validate_and_add_data_input
+from tests.acceptance.conftest import copy_latest_specification_files_to
+
+# import from digital_land validate_and_add_data_input_error_thrown_when_no_resource_downloaded
+
+
+@pytest.fixture(scope="module")
+def specification_dir(tmp_path_factory):
+    specification_dir = tmp_path_factory.mktemp("specification")
+    copy_latest_specification_files_to(specification_dir)
+    return specification_dir
+
+
+@pytest.fixture(scope="function")
+def collection_dir(tmp_path_factory):
+    collection_dir = tmp_path_factory.mktemp("collection")
+
+    # create source csv
+    source_fieldnames = [
+        "attribution",
+        "collection",
+        "documentation-url",
+        "endpoint",
+        "licence",
+        "organisation",
+        "pipelines",
+        "entry-date",
+        "start-date",
+        "end-date",
+    ]
+
+    with open(os.path.join(collection_dir, "source.csv"), "w") as f:
+        dictwriter = csv.DictWriter(f, fieldnames=source_fieldnames)
+        dictwriter.writeheader()
+
+    # create endpoint csv
+    endpoint_fieldnames = [
+        "endpoint",
+        "endpoint-url",
+        "parameters",
+        "plugin",
+        "entry-date",
+        "start-date",
+        "end-date",
+    ]
+
+    with open(os.path.join(collection_dir, "endpoint.csv"), "w") as f:
+        dictwriter = csv.DictWriter(f, fieldnames=endpoint_fieldnames)
+        dictwriter.writeheader()
+    return collection_dir
+
+
+@pytest.fixture(scope="module")
+def organisation_csv():
+    organisation_path = tempfile.NamedTemporaryFile().name
+    organisation_fieldnames = [
+        "dataset",
+        "end-date",
+        "entity",
+        "entry-date",
+        "name",
+        "organisation",
+        "prefix",
+        "reference",
+        "start-date",
+    ]
+    organisation_row = {
+        "dataset": "local-authority",
+        "end-date": "",
+        "entity": 314,
+        "entry-date": "2023-11-19",
+        "name": "South Staffordshire Council",
+        "organisation": "local-authority:SST",
+        "prefix": "local-authority",
+        "reference": "SST",
+        "start-date": "",
+    }
+
+    with open(organisation_path, "w") as f:
+        writer = csv.DictWriter(f, fieldnames=organisation_fieldnames)
+        writer.writeheader()
+        writer.writerow(organisation_row)
+
+    return organisation_path
+
+
+@pytest.fixture
+def mock_request_get(mocker):
+    data = {"reference": "1", "value": "test"}
+    csv_content = str(data).encode("utf-8")
+
+    mock_response = Mock()
+    mock_response.status_code = 200
+    mock_response.request.headers = {"test": "test"}
+    mock_response.headers = {"test": "test"}
+    mock_response.content = csv_content
+    mocker.patch(
+        "requests.Session.get",
+        return_value=mock_response,
+    )
+
+
+def create_input_csv(
+    data,
+    fieldnames=[
+        "organisation",
+        "documentation-url",
+        "endpoint-url",
+        "start-date",
+        "pipelines",
+        "plugin",
+        "licence",
+    ],
+):
+    tmp_input_path = tempfile.NamedTemporaryFile().name
+
+    with open(tmp_input_path, "w") as f:
+        writer = csv.DictWriter(f, fieldnames=fieldnames)
+        writer.writeheader()
+        writer.writerow(data)
+
+    return tmp_input_path
+
+
+def test_validate_and_add_data_input_no_error(
+    collection_dir,
+    specification_dir,
+    organisation_csv,
+    caplog,
+    mock_request_get,
+):
+    collection_name = "conservation-area"
+    no_error_input_data = {
+        "organisation": "local-authority:SST",
+        "documentation-url": "https://www.sstaffs.gov.uk/planning/conservation-and-heritage/south-staffordshires-conservation-areas",
+        "endpoint-url": "https://www.sstaffs.gov.uk/sites/default/files/2024-11/South Staffs Conservation Area document dataset_1.csv",
+        "start-date": "",
+        "pipelines": "conservation-area",
+        "plugin": "",
+        "licence": "ogl3",
+    }
+
+    tmp_input_path = create_input_csv(no_error_input_data)
+
+    with caplog.at_level(logging.ERROR):
+        validate_and_add_data_input(
+            tmp_input_path,
+            collection_name,
+            collection_dir,
+            specification_dir,
+            organisation_csv,
+        )
+        assert len(caplog.text) == 0
+
+
+def test_validate_and_add_data_input_error_thrown_when_no_resource_downloaded(
+    collection_dir, specification_dir, organisation_csv, mocker
+):
+
+    mock_response = Mock()
+    mock_response.status_code = 200
+    mock_response.request.headers = {"test": "test"}
+    mock_response.headers = {"test": "test"}
+    mock_response.content = ""
+    mocker.patch(
+        "requests.Session.get",
+        return_value=mock_response,
+    )
+    collection_name = "conservation-area"
+    no_error_input_data = {
+        "organisation": "local-authority:SST",
+        "documentation-url": "https://www.westoxon.gov.uk/planning-and-building/digital-planning-data/",
+        "endpoint-url": "https://services5.arcgis.com/z8GJkxrWic0alJoM/arcgis/rest/services/WODC_Conservation_Areas_WGS/FeatureServer",
+        "start-date": "",
+        "pipelines": "conservation-area",
+        "plugin": "",
+        "licence": "ogl3",
+    }
+
+    tmp_input_path = create_input_csv(no_error_input_data)
+
+    with pytest.raises(HTTPError) as error:
+        validate_and_add_data_input(
+            tmp_input_path,
+            collection_name,
+            collection_dir,
+            specification_dir,
+            organisation_csv,
+        )
+
+    assert "Failed to collect resource from URL" in str(error)