|
3 | 3 | import urllib.request |
4 | 4 | import pytest |
5 | 5 |
|
| 6 | +from pathlib import Path |
| 7 | + |
6 | 8 | from datetime import datetime |
7 | 9 | from click.testing import CliRunner |
| 10 | +from digital_land.collect import FetchStatus |
8 | 11 | from digital_land.cli import cli |
9 | 12 |
|
| 13 | +from digital_land.collect import Collector |
| 14 | + |
10 | 15 | from tests.utils.helpers import hash_digest |
11 | 16 |
|
12 | 17 | ENDPOINT = "https://raw.githubusercontent.com/digital-land/digital-land-python/main/tests/data/resource_examples/csv.csv" |
@@ -64,3 +69,97 @@ def resource_collected(collection_dir, resource): |
64 | 69 | raw = urllib.request.urlopen(ENDPOINT).read().decode("utf-8") |
65 | 70 | downloaded = "\n".join(raw.splitlines()) # Convert CRLF to LF |
66 | 71 | return saved == downloaded |
| 72 | + |
| 73 | + |
| 74 | +def create_mock_response(mocker, status_code=200, content=b"", content_type="text/csv"): |
| 75 | + """Helper to create a mock requests response object.""" |
| 76 | + mock_response = mocker.Mock() |
| 77 | + mock_response.status_code = status_code |
| 78 | + mock_response.content = content |
| 79 | + mock_response.headers = {"Content-Type": content_type} |
| 80 | + mock_response.request = mocker.Mock() |
| 81 | + mock_response.request.headers = {"User-Agent": "test-agent"} |
| 82 | + return mock_response |
| 83 | + |
| 84 | + |
| 85 | +class TestCollector: |
| 86 | + def test_fetch_overwrite_endpoint_logs(self, collection_dir, tmp_path, mocker): |
| 87 | + """fetch a single source endpoint URL, and add it to the collection""" |
| 88 | + # -- Arrange -- |
| 89 | + url = "https://example.com/test-endpoint.csv" |
| 90 | + mock_content = b"reference,name\n1,Test Name\n2,Another Name" |
| 91 | + |
| 92 | + mock_response = create_mock_response( |
| 93 | + mocker, status_code=500, content=mock_content |
| 94 | + ) |
| 95 | + |
| 96 | + mocker.patch( |
| 97 | + "digital_land.collect.requests.Session.get", |
| 98 | + return_value=mock_response, |
| 99 | + ) |
| 100 | + |
| 101 | + log_dir = Path(collection_dir) / "log" |
| 102 | + resource_dir = Path(collection_dir) / "resource" |
| 103 | + collector = Collector(resource_dir=str(resource_dir), log_dir=str(log_dir)) |
| 104 | + fetch_status, log = collector.fetch(url=url, refill_todays_logs=True) |
| 105 | + |
| 106 | + # -- Act -- |
| 107 | + # run initial fetch to create log |
| 108 | + fetch_status, log = collector.fetch(url=url, refill_todays_logs=True) |
| 109 | + |
| 110 | + assert fetch_status == FetchStatus.FAILED, "initial mock should fail" |
| 111 | + |
| 112 | + # update to a successful response |
| 113 | + mock_response.status_code = 200 |
| 114 | + |
| 115 | + # now run without refill_todays_logs to ensure log is not overwritten |
| 116 | + fetch_status, log = collector.fetch(url=url, refill_todays_logs=False) |
| 117 | + |
| 118 | + assert ( |
| 119 | + fetch_status == FetchStatus.ALREADY_FETCHED |
| 120 | + ), "log should not be overwritten" |
| 121 | + mock_response.status_code = 200 |
| 122 | + fetch_status, log = collector.fetch(url=url, refill_todays_logs=True) |
| 123 | + |
| 124 | + assert fetch_status == FetchStatus.OK |
| 125 | + assert log["endpoint-url"] == url |
| 126 | + assert log["status"] == "200" |
| 127 | + assert "resource" in log |
| 128 | + assert log["resource"] is not None |
| 129 | + |
| 130 | + # Check log file was created |
| 131 | + log_files = list(log_dir.rglob("*.json")) |
| 132 | + assert len(log_files) == 1 |
| 133 | + |
| 134 | + # Read and verify log file content |
| 135 | + with open(log_files[0], "r") as f: |
| 136 | + saved_log = json.load(f) |
| 137 | + assert saved_log["endpoint-url"] == url |
| 138 | + assert saved_log["status"] == "200" |
| 139 | + |
| 140 | + def test_fetch_handles_non_200_status(self, collection_dir, tmp_path, mocker): |
| 141 | + """Test that fetch handles non-200 status codes correctly""" |
| 142 | + # -- Arrange -- |
| 143 | + url = "https://example.com/not-found.csv" |
| 144 | + |
| 145 | + mock_response = create_mock_response( |
| 146 | + mocker, status_code=404, content=b"Not Found", content_type="text/html" |
| 147 | + ) |
| 148 | + |
| 149 | + mocker.patch( |
| 150 | + "digital_land.collect.requests.Session.get", |
| 151 | + return_value=mock_response, |
| 152 | + ) |
| 153 | + |
| 154 | + log_dir = Path(collection_dir) / "log" |
| 155 | + resource_dir = Path(collection_dir) / "resource" |
| 156 | + |
| 157 | + collector = Collector(resource_dir=str(resource_dir), log_dir=str(log_dir)) |
| 158 | + |
| 159 | + # -- Act -- |
| 160 | + fetch_status, log = collector.fetch(url=url) |
| 161 | + |
| 162 | + # -- Assert -- |
| 163 | + assert fetch_status == FetchStatus.FAILED |
| 164 | + assert log["status"] == "404" |
| 165 | + assert "resource" not in log |
0 commit comments