Skip to content

Commit 74fe933

Browse files
Jandumseveleighoj
andauthored
Msj/fix refill logs (#417)
* Refill Logs on Fetch * Tests * add test structure * test for overwitting * wip * remove random file * run black on new files * fix test and mock responses --------- Co-authored-by: eveleighoj <35256612+eveleighoj@users.noreply.github.com>
1 parent da143ec commit 74fe933

2 files changed

Lines changed: 100 additions & 0 deletions

File tree

digital_land/commands.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -959,6 +959,7 @@ def validate_and_add_data_input(
959959
endpoint=endpoint["endpoint"],
960960
end_date=endpoint["end-date"],
961961
plugin=endpoint["plugin"],
962+
refill_todays_logs=True,
962963
)
963964
try:
964965
# log is already returned from fetch, but read from file if needed for verification

tests/integration/test_collect.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,15 @@
33
import urllib.request
44
import pytest
55

6+
from pathlib import Path
7+
68
from datetime import datetime
79
from click.testing import CliRunner
10+
from digital_land.collect import FetchStatus
811
from digital_land.cli import cli
912

13+
from digital_land.collect import Collector
14+
1015
from tests.utils.helpers import hash_digest
1116

1217
ENDPOINT = "https://raw.githubusercontent.com/digital-land/digital-land-python/main/tests/data/resource_examples/csv.csv"
@@ -64,3 +69,97 @@ def resource_collected(collection_dir, resource):
6469
raw = urllib.request.urlopen(ENDPOINT).read().decode("utf-8")
6570
downloaded = "\n".join(raw.splitlines()) # Convert CRLF to LF
6671
return saved == downloaded
72+
73+
74+
def create_mock_response(mocker, status_code=200, content=b"", content_type="text/csv"):
75+
"""Helper to create a mock requests response object."""
76+
mock_response = mocker.Mock()
77+
mock_response.status_code = status_code
78+
mock_response.content = content
79+
mock_response.headers = {"Content-Type": content_type}
80+
mock_response.request = mocker.Mock()
81+
mock_response.request.headers = {"User-Agent": "test-agent"}
82+
return mock_response
83+
84+
85+
class TestCollector:
86+
def test_fetch_overwrite_endpoint_logs(self, collection_dir, tmp_path, mocker):
87+
"""fetch a single source endpoint URL, and add it to the collection"""
88+
# -- Arrange --
89+
url = "https://example.com/test-endpoint.csv"
90+
mock_content = b"reference,name\n1,Test Name\n2,Another Name"
91+
92+
mock_response = create_mock_response(
93+
mocker, status_code=500, content=mock_content
94+
)
95+
96+
mocker.patch(
97+
"digital_land.collect.requests.Session.get",
98+
return_value=mock_response,
99+
)
100+
101+
log_dir = Path(collection_dir) / "log"
102+
resource_dir = Path(collection_dir) / "resource"
103+
collector = Collector(resource_dir=str(resource_dir), log_dir=str(log_dir))
104+
fetch_status, log = collector.fetch(url=url, refill_todays_logs=True)
105+
106+
# -- Act --
107+
# run initial fetch to create log
108+
fetch_status, log = collector.fetch(url=url, refill_todays_logs=True)
109+
110+
assert fetch_status == FetchStatus.FAILED, "initial mock should fail"
111+
112+
# update to a successful response
113+
mock_response.status_code = 200
114+
115+
# now run without refill_todays_logs to ensure log is not overwritten
116+
fetch_status, log = collector.fetch(url=url, refill_todays_logs=False)
117+
118+
assert (
119+
fetch_status == FetchStatus.ALREADY_FETCHED
120+
), "log should not be overwritten"
121+
mock_response.status_code = 200
122+
fetch_status, log = collector.fetch(url=url, refill_todays_logs=True)
123+
124+
assert fetch_status == FetchStatus.OK
125+
assert log["endpoint-url"] == url
126+
assert log["status"] == "200"
127+
assert "resource" in log
128+
assert log["resource"] is not None
129+
130+
# Check log file was created
131+
log_files = list(log_dir.rglob("*.json"))
132+
assert len(log_files) == 1
133+
134+
# Read and verify log file content
135+
with open(log_files[0], "r") as f:
136+
saved_log = json.load(f)
137+
assert saved_log["endpoint-url"] == url
138+
assert saved_log["status"] == "200"
139+
140+
def test_fetch_handles_non_200_status(self, collection_dir, tmp_path, mocker):
141+
"""Test that fetch handles non-200 status codes correctly"""
142+
# -- Arrange --
143+
url = "https://example.com/not-found.csv"
144+
145+
mock_response = create_mock_response(
146+
mocker, status_code=404, content=b"Not Found", content_type="text/html"
147+
)
148+
149+
mocker.patch(
150+
"digital_land.collect.requests.Session.get",
151+
return_value=mock_response,
152+
)
153+
154+
log_dir = Path(collection_dir) / "log"
155+
resource_dir = Path(collection_dir) / "resource"
156+
157+
collector = Collector(resource_dir=str(resource_dir), log_dir=str(log_dir))
158+
159+
# -- Act --
160+
fetch_status, log = collector.fetch(url=url)
161+
162+
# -- Assert --
163+
assert fetch_status == FetchStatus.FAILED
164+
assert log["status"] == "404"
165+
assert "resource" not in log

0 commit comments

Comments
 (0)