From 3f0765984eba39e54541d2dc23a5c03bbda1bae0 Mon Sep 17 00:00:00 2001
From: eveleighoj <35256612+eveleighoj@users.noreply.github.com>
Date: Thu, 29 Jan 2026 15:20:52 +0000
Subject: [PATCH 1/3] correct how arcgis status is reflected

---
 digital_land/commands.py       | 19 +++++++++++--------
 digital_land/plugins/arcgis.py |  3 ++-
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/digital_land/commands.py b/digital_land/commands.py
index 463f0f452..9c819d87b 100644
--- a/digital_land/commands.py
+++ b/digital_land/commands.py
@@ -19,7 +19,7 @@
 from digital_land.package.organisation import OrganisationPackage
 from digital_land.check import duplicate_reference_check
 from digital_land.specification import Specification
-from digital_land.collect import Collector
+from digital_land.collect import Collector, FetchStatus
 from digital_land.collection import Collection, resource_path
 from digital_land.log import (
     DatasetResourceLog,
@@ -954,7 +954,7 @@ def validate_and_add_data_input(
     )
     endpoint_resource_info = {}
     for endpoint in endpoints:
-        status, log = collector.fetch(
+        fetch_status, log = collector.fetch(
             url=endpoint["endpoint-url"],
             endpoint=endpoint["endpoint"],
             end_date=endpoint["end-date"],
@@ -972,13 +972,16 @@ def validate_and_add_data_input(
                 f"Error: The log file for {endpoint} could not be read from path {log_path}.\n{e}"
             )
             break
-
-        status = log.get("status", None)
+        log_status = log.get("status", None)
+        exception = log.get("exception", None)
+        if fetch_status not in [FetchStatus.OK, FetchStatus.ALREADY_FETCHED]:
+            raise HTTPError(
+                f"Failed to collect from URL. fetch status: {fetch_status}, log status: {log_status}, exception: {exception}"
+            )
         # Raise exception if status is not 200
-        if not status or status != "200":
-            exception = log.get("exception", None)
+        if not log_status or log_status != "200":
             raise HTTPError(
-                f"Failed to collect from URL with status: {status if status else exception}"
+                f"Failed to collect from URL with status: {log_status if log_status else exception}"
             )
 
         # Resource and path will only be printed if downloaded successfully but should only happen if status is 200
@@ -994,7 +997,7 @@ def validate_and_add_data_input(
                 resource_path,
             )
 
-        print(f"Log Status for {endpoint['endpoint']}: The status is {status}")
+        print(f"Log Status for {endpoint['endpoint']}: The status is {log_status}")
         endpoint_resource_info.update(
             {
                 "endpoint": endpoint["endpoint"],
diff --git a/digital_land/plugins/arcgis.py b/digital_land/plugins/arcgis.py
index 09a614b19..5f64efb59 100644
--- a/digital_land/plugins/arcgis.py
+++ b/digital_land/plugins/arcgis.py
@@ -11,7 +11,7 @@ def get(collector, url, log={}, plugin="arcgis"):
 
         response = dumper._request("GET", url)
         dumper.get_metadata()
-        log["status"] = str(response.status_code)
+        response_status = str(response.status_code)
 
         content = '{"type":"FeatureCollection","features":['
         sep = "\n"
@@ -23,6 +23,7 @@ def get(collector, url, log={}, plugin="arcgis"):
         content += "]}"
 
         content = str.encode(content)
+        log["status"] = response_status
 
     except Exception as exception:
         logging.warning(exception)

From 6a84c76f6ec3289efb333b922615abcfd9f382ac Mon Sep 17 00:00:00 2001
From: jandums <manbir.jandu@communities.gov.uk>
Date: Thu, 26 Mar 2026 18:33:39 +0000
Subject: [PATCH 2/3] w.i.p

---
 digital_land/commands.py    |   2 +
 tests/unit/test_commands.py | 342 ++++++++++++++++++++++++++++++++++++
 2 files changed, 344 insertions(+)
 create mode 100644 tests/unit/test_commands.py

diff --git a/digital_land/commands.py b/digital_land/commands.py
index 5700a9d55..6773b1a5c 100644
--- a/digital_land/commands.py
+++ b/digital_land/commands.py
@@ -859,6 +859,7 @@ def validate_and_add_data_input(
             plugin=endpoint["plugin"],
             refill_todays_logs=True,
         )
+        '''
         try:
             # log is already returned from fetch, but read from file if needed for verification
             log_path = collector.log_path(datetime.utcnow(), endpoint["endpoint"])
@@ -870,6 +871,7 @@ def validate_and_add_data_input(
                 f"Error: The log file for {endpoint} could not be read from path {log_path}.\n{e}"
             )
             break
+        '''
         log_status = log.get("status", None)
         exception = log.get("exception", None)
         if fetch_status not in [FetchStatus.OK, FetchStatus.ALREADY_FETCHED]:
diff --git a/tests/unit/test_commands.py b/tests/unit/test_commands.py
new file mode 100644
index 000000000..a2b07711f
--- /dev/null
+++ b/tests/unit/test_commands.py
@@ -0,0 +1,342 @@
+def test_validate_and_add_data_input_error_thrown_whhen_no_resource_downloaded(
+    csv_file_path, collection_name, collection_dir, specification_dir, organisation_path
+):
+    expected_cols = [
+        "pipelines",
+        "organisation",
+        "documentation-url",
+        "endpoint-url",
+        "start-date",
+        "licence",
+    ]
+
+    specification = Specification(specification_dir)
+    organisation = Organisation(organisation_path=organisation_path)
+
+    collection = Collection(name=collection_name, directory=collection_dir)
+    collection.load()
+    # ===== FIRST VALIDATION BASED ON IMPORT.CSV INFO
+    # - Check licence, url, date, organisation
+
+    # read and process each record of the new endpoints csv at csv_file_path i.e import.csv
+
+    with open(csv_file_path) as new_endpoints_file:
+        reader = csv.DictReader(new_endpoints_file)
+        csv_columns = reader.fieldnames
+
+        # validate the columns in input .csv
+        for expected_col in expected_cols:
+            if expected_col not in csv_columns:
+                raise Exception(f"required column ({expected_col}) not found in csv")
+
+        for row in reader:
+            # validate licence
+            if row["licence"] == "":
+                raise ValueError("Licence is blank")
+            elif not specification.licence.get(row["licence"], None):
+                raise ValueError(
+                    f"Licence '{row['licence']}' is not a valid licence according to the specification."
+                )
+            # check if urls are not blank and valid urls
+            is_endpoint_valid, endpoint_valid_error = is_url_valid(
+                row["endpoint-url"], "endpoint_url"
+            )
+            is_documentation_valid, documentation_valid_error = is_url_valid(
+                row["documentation-url"], "documentation_url"
+            )
+            if not is_endpoint_valid or not is_documentation_valid:
+                raise ValueError(
+                    f"{endpoint_valid_error} \n {documentation_valid_error}"
+                )
+
+            # if there is no start-date, do we want to populate it with today's date?
+            if row["start-date"]:
+                valid_date, error = is_date_valid(row["start-date"], "start-date")
+                if not valid_date:
+                    raise ValueError(error)
+
+            # validate organisation
+            if row["organisation"] == "":
+                raise ValueError("The organisation must not be blank")
+            elif not organisation.lookup(row["organisation"]):
+                raise ValueError(
+                    f"The given organisation '{row['organisation']}' is not in our valid organisations"
+                )
+
+            # validate pipeline(s) - do they exist and are they in the collection
+            pipelines = row["pipelines"].split(";")
+            for pipeline in pipelines:
+                if not specification.dataset.get(pipeline, None):
+                    raise ValueError(
+                        f"'{pipeline}' is not a valid dataset in the specification"
+                    )
+                collection_in_specification = specification.dataset.get(
+                    pipeline, None
+                ).get("collection")
+                if collection_name != collection_in_specification:
+                    raise ValueError(
+                        f"'{pipeline}' does not belong to provided collection {collection_name}"
+                    )
+
+    # VALIDATION DONE, NOW ADD TO COLLECTION
+    print("======================================================================")
+    print("Endpoint and source details")
+    print("======================================================================")
+    print("Endpoint URL: ", row["endpoint-url"])
+    print("Endpoint Hash:", hash_value(row["endpoint-url"]))
+    print("Documentation URL: ", row["documentation-url"])
+    print()
+
+    endpoints = []
+    # if endpoint already exists, it will indicate it and quit function here
+    if collection.add_source_endpoint(row):
+        endpoint = {
+            "endpoint-url": row["endpoint-url"],
+            "endpoint": hash_value(row["endpoint-url"]),
+            "end-date": row.get("end-date", ""),
+            "plugin": row.get("plugin"),
+            "licence": row["licence"],
+        }
+        endpoints.append(endpoint)
+    else:
+        # We rely on the add_source_endpoint function to log why it couldn't be added
+        raise Exception(
+            "Endpoint and source could not be added - is this a duplicate endpoint?"
+        )
+
+    # if successfully added we can now attempt to fetch from endpoint
+    collector = Collector(
+        resource_dir=str(Path(collection_dir) / "resource"),
+        log_dir=str(Path(collection_dir) / "log"),
+    )
+    endpoint_resource_info = {}
+    for endpoint in endpoints:
+        fetch_status, log = collector.fetch(
+            url=endpoint["endpoint-url"],
+            endpoint=endpoint["endpoint"],
+            end_date=endpoint["end-date"],
+            plugin=endpoint["plugin"],
+            refill_todays_logs=True,
+        )
+        try:
+            # log is already returned from fetch, but read from file if needed for verification
+            log_path = collector.log_path(datetime.utcnow(), endpoint["endpoint"])
+            if os.path.isfile(log_path):
+                with open(log_path, "r") as f:
+                    log = json.load(f)
+        except Exception as e:
+            print(
+                f"Error: The log file for {endpoint} could not be read from path {log_path}.\n{e}"
+            )
+            break
+        log_status = log.get("status", None)
+        exception = log.get("exception", None)
+        if fetch_status not in [FetchStatus.OK, FetchStatus.ALREADY_FETCHED]:
+            raise HTTPError(
+                f"Failed to collect from URL. fetch status: {fetch_status}, log status: {log_status}, exception: {exception}"
+            )
+        # Raise exception if status is not 200
+        if not log_status or log_status != "200":
+            raise HTTPError(
+                f"Failed to collect from URL with status: {log_status if log_status else exception}"
+            )
+
+        # Resource and path will only be printed if downloaded successfully but should only happen if status is 200
+        resource = log.get("resource", None)
+        if resource:
+            resource_path = Path(collection_dir) / "resource" / resource
+            print(
+                "Resource collected: ",
+                resource,
+            )
+            print(
+                "Resource Path is: ",
+                resource_path,
+            )
+
+        print(f"Log Status for {endpoint['endpoint']}: The status is {log_status}")
+        endpoint_resource_info.update(
+            {
+                "endpoint": endpoint["endpoint"],
+                "resource": log.get("resource"),
+                "resource_path": resource_path,
+                "pipelines": row["pipelines"].split(";"),
+                "organisation": row["organisation"],
+                "entry-date": row["entry-date"],
+            }
+        )
+
+    return collection, endpoint_resource_info
+
+
+
+
+def validate_and_add_data_input_non_200(
+    csv_file_path, collection_name, collection_dir, specification_dir, organisation_path
+):
+    expected_cols = [
+        "pipelines",
+        "organisation",
+        "documentation-url",
+        "endpoint-url",
+        "start-date",
+        "licence",
+    ]
+
+    specification = Specification(specification_dir)
+    organisation = Organisation(organisation_path=organisation_path)
+
+    collection = Collection(name=collection_name, directory=collection_dir)
+    collection.load()
+    # ===== FIRST VALIDATION BASED ON IMPORT.CSV INFO
+    # - Check licence, url, date, organisation
+
+    # read and process each record of the new endpoints csv at csv_file_path i.e import.csv
+
+    with open(csv_file_path) as new_endpoints_file:
+        reader = csv.DictReader(new_endpoints_file)
+        csv_columns = reader.fieldnames
+
+        # validate the columns in input .csv
+        for expected_col in expected_cols:
+            if expected_col not in csv_columns:
+                raise Exception(f"required column ({expected_col}) not found in csv")
+
+        for row in reader:
+            # validate licence
+            if row["licence"] == "":
+                raise ValueError("Licence is blank")
+            elif not specification.licence.get(row["licence"], None):
+                raise ValueError(
+                    f"Licence '{row['licence']}' is not a valid licence according to the specification."
+                )
+            # check if urls are not blank and valid urls
+            is_endpoint_valid, endpoint_valid_error = is_url_valid(
+                row["endpoint-url"], "endpoint_url"
+            )
+            is_documentation_valid, documentation_valid_error = is_url_valid(
+                row["documentation-url"], "documentation_url"
+            )
+            if not is_endpoint_valid or not is_documentation_valid:
+                raise ValueError(
+                    f"{endpoint_valid_error} \n {documentation_valid_error}"
+                )
+
+            # if there is no start-date, do we want to populate it with today's date?
+            if row["start-date"]:
+                valid_date, error = is_date_valid(row["start-date"], "start-date")
+                if not valid_date:
+                    raise ValueError(error)
+
+            # validate organisation
+            if row["organisation"] == "":
+                raise ValueError("The organisation must not be blank")
+            elif not organisation.lookup(row["organisation"]):
+                raise ValueError(
+                    f"The given organisation '{row['organisation']}' is not in our valid organisations"
+                )
+
+            # validate pipeline(s) - do they exist and are they in the collection
+            pipelines = row["pipelines"].split(";")
+            for pipeline in pipelines:
+                if not specification.dataset.get(pipeline, None):
+                    raise ValueError(
+                        f"'{pipeline}' is not a valid dataset in the specification"
+                    )
+                collection_in_specification = specification.dataset.get(
+                    pipeline, None
+                ).get("collection")
+                if collection_name != collection_in_specification:
+                    raise ValueError(
+                        f"'{pipeline}' does not belong to provided collection {collection_name}"
+                    )
+
+    # VALIDATION DONE, NOW ADD TO COLLECTION
+    print("======================================================================")
+    print("Endpoint and source details")
+    print("======================================================================")
+    print("Endpoint URL: ", row["endpoint-url"])
+    print("Endpoint Hash:", hash_value(row["endpoint-url"]))
+    print("Documentation URL: ", row["documentation-url"])
+    print()
+
+    endpoints = []
+    # if endpoint already exists, it will indicate it and quit function here
+    if collection.add_source_endpoint(row):
+        endpoint = {
+            "endpoint-url": row["endpoint-url"],
+            "endpoint": hash_value(row["endpoint-url"]),
+            "end-date": row.get("end-date", ""),
+            "plugin": row.get("plugin"),
+            "licence": row["licence"],
+        }
+        endpoints.append(endpoint)
+    else:
+        # We rely on the add_source_endpoint function to log why it couldn't be added
+        raise Exception(
+            "Endpoint and source could not be added - is this a duplicate endpoint?"
+        )
+
+    # if successfully added we can now attempt to fetch from endpoint
+    collector = Collector(
+        resource_dir=str(Path(collection_dir) / "resource"),
+        log_dir=str(Path(collection_dir) / "log"),
+    )
+    endpoint_resource_info = {}
+    for endpoint in endpoints:
+        fetch_status, log = collector.fetch(
+            url=endpoint["endpoint-url"],
+            endpoint=endpoint["endpoint"],
+            end_date=endpoint["end-date"],
+            plugin=endpoint["plugin"],
+            refill_todays_logs=True,
+        )
+        try:
+            # log is already returned from fetch, but read from file if needed for verification
+            log_path = collector.log_path(datetime.utcnow(), endpoint["endpoint"])
+            if os.path.isfile(log_path):
+                with open(log_path, "r") as f:
+                    log = json.load(f)
+        except Exception as e:
+            print(
+                f"Error: The log file for {endpoint} could not be read from path {log_path}.\n{e}"
+            )
+            break
+        log_status = log.get("status", None)
+        exception = log.get("exception", None)
+        if fetch_status not in [FetchStatus.OK, FetchStatus.ALREADY_FETCHED]:
+            raise HTTPError(
+                f"Failed to collect from URL. fetch status: {fetch_status}, log status: {log_status}, exception: {exception}"
+            )
+        # Raise exception if status is not 200
+        if not log_status or log_status != "200":
+            raise HTTPError(
+                f"Failed to collect from URL with status: {log_status if log_status else exception}"
+            )
+
+        # Resource and path will only be printed if downloaded successfully but should only happen if status is 200
+        resource = log.get("resource", None)
+        if resource:
+            resource_path = Path(collection_dir) / "resource" / resource
+            print(
+                "Resource collected: ",
+                resource,
+            )
+            print(
+                "Resource Path is: ",
+                resource_path,
+            )
+
+        print(f"Log Status for {endpoint['endpoint']}: The status is {log_status}")
+        endpoint_resource_info.update(
+            {
+                "endpoint": endpoint["endpoint"],
+                "resource": log.get("resource"),
+                "resource_path": resource_path,
+                "pipelines": row["pipelines"].split(";"),
+                "organisation": row["organisation"],
+                "entry-date": row["entry-date"],
+            }
+        )
+
+    return collection, endpoint_resource_info
\ No newline at end of file

From 67cd393ce386bd0c1506ae6b906586be57cb55b8 Mon Sep 17 00:00:00 2001
From: jandums <manbir.jandu@communities.gov.uk>
Date: Tue, 7 Apr 2026 16:24:35 +0100
Subject: [PATCH 3/3] W.I.P3

---
 Makefile                    |   4 +-
 digital_land/commands.py    |  32 +--
 tests/unit/test_commands.py | 485 +++++++++++++-----------------------
 3 files changed, 191 insertions(+), 330 deletions(-)

diff --git a/Makefile b/Makefile
index 22037acb8..0479332b3 100644
--- a/Makefile
+++ b/Makefile
@@ -35,9 +35,9 @@ endif
 endif
 ifndef SPATIAL
 ifeq ($(UNAME),Darwin)
-	$(error GDAL tools not found in PATH)
+# 	$(error GDAL tools not found in PATH)
 endif
-	sudo apt-get install libsqlite3-mod-spatialite -y
+# 	sudo apt-get install libsqlite3-mod-spatialite -y
 endif
 	pyproj sync --file uk_os_OSTN15_NTv2_OSGBtoETRS.tif -v
 # install pre-commits
diff --git a/digital_land/commands.py b/digital_land/commands.py
index 6773b1a5c..683749da3 100644
--- a/digital_land/commands.py
+++ b/digital_land/commands.py
@@ -9,7 +9,8 @@
 from packaging.version import Version
 import pandas as pd
 from pathlib import Path
-from datetime import datetime
+
+# from datetime import datetime
 from distutils.dir_util import copy_tree
 import geojson
 from requests import HTTPError
@@ -859,19 +860,19 @@ def validate_and_add_data_input(
             plugin=endpoint["plugin"],
             refill_todays_logs=True,
         )
-        '''
-        try:
-            # log is already returned from fetch, but read from file if needed for verification
-            log_path = collector.log_path(datetime.utcnow(), endpoint["endpoint"])
-            if os.path.isfile(log_path):
-                with open(log_path, "r") as f:
-                    log = json.load(f)
-        except Exception as e:
-            print(
-                f"Error: The log file for {endpoint} could not be read from path {log_path}.\n{e}"
-            )
-            break
-        '''
+
+        # try:
+        #     # log is already returned from fetch, but read from file if needed for verification
+        #     log_path = collector.log_path(datetime.utcnow(), endpoint["endpoint"])
+        #     if os.path.isfile(log_path):
+        #         with open(log_path, "r") as f:
+        #             log = json.load(f)
+        # except Exception as e:
+        #     print(
+        #         f"Error: The log file for {endpoint} could not be read from path {log_path}.\n{e}"
+        #     )
+        #     break
+
         log_status = log.get("status", None)
         exception = log.get("exception", None)
         if fetch_status not in [FetchStatus.OK, FetchStatus.ALREADY_FETCHED]:
@@ -907,6 +908,9 @@ def validate_and_add_data_input(
                 "organisation": row["organisation"],
                 "entry-date": row["entry-date"],
             }
+            #     elif:
+            #        raise Error(
+            #         f"No resource avaible: {log_status if log_status else exception}"
         )
 
     return collection, endpoint_resource_info
diff --git a/tests/unit/test_commands.py b/tests/unit/test_commands.py
index a2b07711f..a938e8405 100644
--- a/tests/unit/test_commands.py
+++ b/tests/unit/test_commands.py
@@ -1,342 +1,199 @@
-def test_validate_and_add_data_input_error_thrown_whhen_no_resource_downloaded(
-    csv_file_path, collection_name, collection_dir, specification_dir, organisation_path
-):
-    expected_cols = [
-        "pipelines",
-        "organisation",
-        "documentation-url",
-        "endpoint-url",
-        "start-date",
-        "licence",
-    ]
-
-    specification = Specification(specification_dir)
-    organisation = Organisation(organisation_path=organisation_path)
+import csv
+import logging
+import os
+import tempfile
+from unittest.mock import Mock
+import pytest
+from requests import HTTPError
 
-    collection = Collection(name=collection_name, directory=collection_dir)
-    collection.load()
-    # ===== FIRST VALIDATION BASED ON IMPORT.CSV INFO
-    # - Check licence, url, date, organisation
+from digital_land.commands import validate_and_add_data_input
+from tests.acceptance.conftest import copy_latest_specification_files_to
 
-    # read and process each record of the new endpoints csv at csv_file_path i.e import.csv
+# import from digital_land validate_and_add_data_input_error_thrown_when_no_resource_downloaded
 
-    with open(csv_file_path) as new_endpoints_file:
-        reader = csv.DictReader(new_endpoints_file)
-        csv_columns = reader.fieldnames
 
-        # validate the columns in input .csv
-        for expected_col in expected_cols:
-            if expected_col not in csv_columns:
-                raise Exception(f"required column ({expected_col}) not found in csv")
+@pytest.fixture(scope="module")
+def specification_dir(tmp_path_factory):
+    specification_dir = tmp_path_factory.mktemp("specification")
+    copy_latest_specification_files_to(specification_dir)
+    return specification_dir
 
-        for row in reader:
-            # validate licence
-            if row["licence"] == "":
-                raise ValueError("Licence is blank")
-            elif not specification.licence.get(row["licence"], None):
-                raise ValueError(
-                    f"Licence '{row['licence']}' is not a valid licence according to the specification."
-                )
-            # check if urls are not blank and valid urls
-            is_endpoint_valid, endpoint_valid_error = is_url_valid(
-                row["endpoint-url"], "endpoint_url"
-            )
-            is_documentation_valid, documentation_valid_error = is_url_valid(
-                row["documentation-url"], "documentation_url"
-            )
-            if not is_endpoint_valid or not is_documentation_valid:
-                raise ValueError(
-                    f"{endpoint_valid_error} \n {documentation_valid_error}"
-                )
 
-            # if there is no start-date, do we want to populate it with today's date?
-            if row["start-date"]:
-                valid_date, error = is_date_valid(row["start-date"], "start-date")
-                if not valid_date:
-                    raise ValueError(error)
+@pytest.fixture(scope="function")
+def collection_dir(tmp_path_factory):
+    collection_dir = tmp_path_factory.mktemp("collection")
 
-            # validate organisation
-            if row["organisation"] == "":
-                raise ValueError("The organisation must not be blank")
-            elif not organisation.lookup(row["organisation"]):
-                raise ValueError(
-                    f"The given organisation '{row['organisation']}' is not in our valid organisations"
-                )
-
-            # validate pipeline(s) - do they exist and are they in the collection
-            pipelines = row["pipelines"].split(";")
-            for pipeline in pipelines:
-                if not specification.dataset.get(pipeline, None):
-                    raise ValueError(
-                        f"'{pipeline}' is not a valid dataset in the specification"
-                    )
-                collection_in_specification = specification.dataset.get(
-                    pipeline, None
-                ).get("collection")
-                if collection_name != collection_in_specification:
-                    raise ValueError(
-                        f"'{pipeline}' does not belong to provided collection {collection_name}"
-                    )
+    # create source csv
+    source_fieldnames = [
+        "attribution",
+        "collection",
+        "documentation-url",
+        "endpoint",
+        "licence",
+        "organisation",
+        "pipelines",
+        "entry-date",
+        "start-date",
+        "end-date",
+    ]
 
-    # VALIDATION DONE, NOW ADD TO COLLECTION
-    print("======================================================================")
-    print("Endpoint and source details")
-    print("======================================================================")
-    print("Endpoint URL: ", row["endpoint-url"])
-    print("Endpoint Hash:", hash_value(row["endpoint-url"]))
-    print("Documentation URL: ", row["documentation-url"])
-    print()
+    with open(os.path.join(collection_dir, "source.csv"), "w") as f:
+        dictwriter = csv.DictWriter(f, fieldnames=source_fieldnames)
+        dictwriter.writeheader()
 
-    endpoints = []
-    # if endpoint already exists, it will indicate it and quit function here
-    if collection.add_source_endpoint(row):
-        endpoint = {
-            "endpoint-url": row["endpoint-url"],
-            "endpoint": hash_value(row["endpoint-url"]),
-            "end-date": row.get("end-date", ""),
-            "plugin": row.get("plugin"),
-            "licence": row["licence"],
-        }
-        endpoints.append(endpoint)
-    else:
-        # We rely on the add_source_endpoint function to log why it couldn't be added
-        raise Exception(
-            "Endpoint and source could not be added - is this a duplicate endpoint?"
-        )
+    # create endpoint csv
+    endpoint_fieldnames = [
+        "endpoint",
+        "endpoint-url",
+        "parameters",
+        "plugin",
+        "entry-date",
+        "start-date",
+        "end-date",
+    ]
 
-    # if successfully added we can now attempt to fetch from endpoint
-    collector = Collector(
-        resource_dir=str(Path(collection_dir) / "resource"),
-        log_dir=str(Path(collection_dir) / "log"),
+    with open(os.path.join(collection_dir, "endpoint.csv"), "w") as f:
+        dictwriter = csv.DictWriter(f, fieldnames=endpoint_fieldnames)
+        dictwriter.writeheader()
+    return collection_dir
+
+
+@pytest.fixture(scope="module")
+def organisation_csv():
+    organisation_path = tempfile.NamedTemporaryFile().name
+    organisation_fieldnames = [
+        "dataset",
+        "end-date",
+        "entity",
+        "entry-date",
+        "name",
+        "organisation",
+        "prefix",
+        "reference",
+        "start-date",
+    ]
+    organisation_row = {
+        "dataset": "local-authority",
+        "end-date": "",
+        "entity": 314,
+        "entry-date": "2023-11-19",
+        "name": "South Staffordshire Council",
+        "organisation": "local-authority:SST",
+        "prefix": "local-authority",
+        "reference": "SST",
+        "start-date": "",
+    }
+
+    with open(organisation_path, "w") as f:
+        writer = csv.DictWriter(f, fieldnames=organisation_fieldnames)
+        writer.writeheader()
+        writer.writerow(organisation_row)
+
+    return organisation_path
+
+
+@pytest.fixture
+def mock_request_get(mocker):
+    data = {"reference": "1", "value": "test"}
+    csv_content = str(data).encode("utf-8")
+
+    mock_response = Mock()
+    mock_response.status_code = 200
+    mock_response.request.headers = {"test": "test"}
+    mock_response.headers = {"test": "test"}
+    mock_response.content = csv_content
+    mocker.patch(
+        "requests.Session.get",
+        return_value=mock_response,
     )
-    endpoint_resource_info = {}
-    for endpoint in endpoints:
-        fetch_status, log = collector.fetch(
-            url=endpoint["endpoint-url"],
-            endpoint=endpoint["endpoint"],
-            end_date=endpoint["end-date"],
-            plugin=endpoint["plugin"],
-            refill_todays_logs=True,
-        )
-        try:
-            # log is already returned from fetch, but read from file if needed for verification
-            log_path = collector.log_path(datetime.utcnow(), endpoint["endpoint"])
-            if os.path.isfile(log_path):
-                with open(log_path, "r") as f:
-                    log = json.load(f)
-        except Exception as e:
-            print(
-                f"Error: The log file for {endpoint} could not be read from path {log_path}.\n{e}"
-            )
-            break
-        log_status = log.get("status", None)
-        exception = log.get("exception", None)
-        if fetch_status not in [FetchStatus.OK, FetchStatus.ALREADY_FETCHED]:
-            raise HTTPError(
-                f"Failed to collect from URL. fetch status: {fetch_status}, log status: {log_status}, exception: {exception}"
-            )
-        # Raise exception if status is not 200
-        if not log_status or log_status != "200":
-            raise HTTPError(
-                f"Failed to collect from URL with status: {log_status if log_status else exception}"
-            )
 
-        # Resource and path will only be printed if downloaded successfully but should only happen if status is 200
-        resource = log.get("resource", None)
-        if resource:
-            resource_path = Path(collection_dir) / "resource" / resource
-            print(
-                "Resource collected: ",
-                resource,
-            )
-            print(
-                "Resource Path is: ",
-                resource_path,
-            )
 
-        print(f"Log Status for {endpoint['endpoint']}: The status is {log_status}")
-        endpoint_resource_info.update(
-            {
-                "endpoint": endpoint["endpoint"],
-                "resource": log.get("resource"),
-                "resource_path": resource_path,
-                "pipelines": row["pipelines"].split(";"),
-                "organisation": row["organisation"],
-                "entry-date": row["entry-date"],
-            }
-        )
-
-    return collection, endpoint_resource_info
-
-
-
-
-def validate_and_add_data_input_non_200(
-    csv_file_path, collection_name, collection_dir, specification_dir, organisation_path
-):
-    expected_cols = [
-        "pipelines",
+def create_input_csv(
+    data,
+    fieldnames=[
         "organisation",
         "documentation-url",
         "endpoint-url",
         "start-date",
+        "pipelines",
+        "plugin",
         "licence",
-    ]
-
-    specification = Specification(specification_dir)
-    organisation = Organisation(organisation_path=organisation_path)
-
-    collection = Collection(name=collection_name, directory=collection_dir)
-    collection.load()
-    # ===== FIRST VALIDATION BASED ON IMPORT.CSV INFO
-    # - Check licence, url, date, organisation
-
-    # read and process each record of the new endpoints csv at csv_file_path i.e import.csv
-
-    with open(csv_file_path) as new_endpoints_file:
-        reader = csv.DictReader(new_endpoints_file)
-        csv_columns = reader.fieldnames
-
-        # validate the columns in input .csv
-        for expected_col in expected_cols:
-            if expected_col not in csv_columns:
-                raise Exception(f"required column ({expected_col}) not found in csv")
-
-        for row in reader:
-            # validate licence
-            if row["licence"] == "":
-                raise ValueError("Licence is blank")
-            elif not specification.licence.get(row["licence"], None):
-                raise ValueError(
-                    f"Licence '{row['licence']}' is not a valid licence according to the specification."
-                )
-            # check if urls are not blank and valid urls
-            is_endpoint_valid, endpoint_valid_error = is_url_valid(
-                row["endpoint-url"], "endpoint_url"
-            )
-            is_documentation_valid, documentation_valid_error = is_url_valid(
-                row["documentation-url"], "documentation_url"
-            )
-            if not is_endpoint_valid or not is_documentation_valid:
-                raise ValueError(
-                    f"{endpoint_valid_error} \n {documentation_valid_error}"
-                )
-
-            # if there is no start-date, do we want to populate it with today's date?
-            if row["start-date"]:
-                valid_date, error = is_date_valid(row["start-date"], "start-date")
-                if not valid_date:
-                    raise ValueError(error)
+    ],
+):
+    tmp_input_path = tempfile.NamedTemporaryFile().name
 
-            # validate organisation
-            if row["organisation"] == "":
-                raise ValueError("The organisation must not be blank")
-            elif not organisation.lookup(row["organisation"]):
-                raise ValueError(
-                    f"The given organisation '{row['organisation']}' is not in our valid organisations"
-                )
+    with open(tmp_input_path, "w") as f:
+        writer = csv.DictWriter(f, fieldnames=fieldnames)
+        writer.writeheader()
+        writer.writerow(data)
 
-            # validate pipeline(s) - do they exist and are they in the collection
-            pipelines = row["pipelines"].split(";")
-            for pipeline in pipelines:
-                if not specification.dataset.get(pipeline, None):
-                    raise ValueError(
-                        f"'{pipeline}' is not a valid dataset in the specification"
-                    )
-                collection_in_specification = specification.dataset.get(
-                    pipeline, None
-                ).get("collection")
-                if collection_name != collection_in_specification:
-                    raise ValueError(
-                        f"'{pipeline}' does not belong to provided collection {collection_name}"
-                    )
+    return tmp_input_path
 
-    # VALIDATION DONE, NOW ADD TO COLLECTION
-    print("======================================================================")
-    print("Endpoint and source details")
-    print("======================================================================")
-    print("Endpoint URL: ", row["endpoint-url"])
-    print("Endpoint Hash:", hash_value(row["endpoint-url"]))
-    print("Documentation URL: ", row["documentation-url"])
-    print()
 
-    endpoints = []
-    # if endpoint already exists, it will indicate it and quit function here
-    if collection.add_source_endpoint(row):
-        endpoint = {
-            "endpoint-url": row["endpoint-url"],
-            "endpoint": hash_value(row["endpoint-url"]),
-            "end-date": row.get("end-date", ""),
-            "plugin": row.get("plugin"),
-            "licence": row["licence"],
-        }
-        endpoints.append(endpoint)
-    else:
-        # We rely on the add_source_endpoint function to log why it couldn't be added
-        raise Exception(
-            "Endpoint and source could not be added - is this a duplicate endpoint?"
+def test_validate_and_add_data_input_no_error(
+    collection_dir,
+    specification_dir,
+    organisation_csv,
+    caplog,
+    mock_request_get,
+):
+    collection_name = "conservation-area"
+    no_error_input_data = {
+        "organisation": "local-authority:SST",
+        "documentation-url": "https://www.sstaffs.gov.uk/planning/conservation-and-heritage/south-staffordshires-conservation-areas",
+        "endpoint-url": "https://www.sstaffs.gov.uk/sites/default/files/2024-11/South Staffs Conservation Area document dataset_1.csv",
+        "start-date": "",
+        "pipelines": "conservation-area",
+        "plugin": "",
+        "licence": "ogl3",
+    }
+
+    tmp_input_path = create_input_csv(no_error_input_data)
+
+    with caplog.at_level(logging.ERROR):
+        validate_and_add_data_input(
+            tmp_input_path,
+            collection_name,
+            collection_dir,
+            specification_dir,
+            organisation_csv,
         )
+        assert len(caplog.text) == 0
 
-    # if successfully added we can now attempt to fetch from endpoint
-    collector = Collector(
-        resource_dir=str(Path(collection_dir) / "resource"),
-        log_dir=str(Path(collection_dir) / "log"),
-    )
-    endpoint_resource_info = {}
-    for endpoint in endpoints:
-        fetch_status, log = collector.fetch(
-            url=endpoint["endpoint-url"],
-            endpoint=endpoint["endpoint"],
-            end_date=endpoint["end-date"],
-            plugin=endpoint["plugin"],
-            refill_todays_logs=True,
-        )
-        try:
-            # log is already returned from fetch, but read from file if needed for verification
-            log_path = collector.log_path(datetime.utcnow(), endpoint["endpoint"])
-            if os.path.isfile(log_path):
-                with open(log_path, "r") as f:
-                    log = json.load(f)
-        except Exception as e:
-            print(
-                f"Error: The log file for {endpoint} could not be read from path {log_path}.\n{e}"
-            )
-            break
-        log_status = log.get("status", None)
-        exception = log.get("exception", None)
-        if fetch_status not in [FetchStatus.OK, FetchStatus.ALREADY_FETCHED]:
-            raise HTTPError(
-                f"Failed to collect from URL. fetch status: {fetch_status}, log status: {log_status}, exception: {exception}"
-            )
-        # Raise exception if status is not 200
-        if not log_status or log_status != "200":
-            raise HTTPError(
-                f"Failed to collect from URL with status: {log_status if log_status else exception}"
-            )
 
-        # Resource and path will only be printed if downloaded successfully but should only happen if status is 200
-        resource = log.get("resource", None)
-        if resource:
-            resource_path = Path(collection_dir) / "resource" / resource
-            print(
-                "Resource collected: ",
-                resource,
-            )
-            print(
-                "Resource Path is: ",
-                resource_path,
-            )
+def test_validate_and_add_data_input_error_thrown_when_no_resource_downloaded(
+    collection_dir, specification_dir, organisation_csv, mocker
+):
 
-        print(f"Log Status for {endpoint['endpoint']}: The status is {log_status}")
-        endpoint_resource_info.update(
-            {
-                "endpoint": endpoint["endpoint"],
-                "resource": log.get("resource"),
-                "resource_path": resource_path,
-                "pipelines": row["pipelines"].split(";"),
-                "organisation": row["organisation"],
-                "entry-date": row["entry-date"],
-            }
+    mock_response = Mock()
+    mock_response.status_code = 200
+    mock_response.request.headers = {"test": "test"}
+    mock_response.headers = {"test": "test"}
+    mock_response.content = ""
+    mocker.patch(
+        "requests.Session.get",
+        return_value=mock_response,
+    )
+    collection_name = "conservation-area"
+    no_error_input_data = {
+        "organisation": "local-authority:SST",
+        "documentation-url": "https://www.westoxon.gov.uk/planning-and-building/digital-planning-data/",
+        "endpoint-url": "https://services5.arcgis.com/z8GJkxrWic0alJoM/arcgis/rest/services/WODC_Conservation_Areas_WGS/FeatureServer",
+        "start-date": "",
+        "pipelines": "conservation-area",
+        "plugin": "",
+        "licence": "ogl3",
+    }
+
+    tmp_input_path = create_input_csv(no_error_input_data)
+
+    with pytest.raises(HTTPError) as error:
+        validate_and_add_data_input(
+            tmp_input_path,
+            collection_name,
+            collection_dir,
+            specification_dir,
+            organisation_csv,
         )
 
-    return collection, endpoint_resource_info
\ No newline at end of file
+    assert "Failed to collect resource from URL" in str(error)