Skip to content

Commit a079ccb

Browse files
committed
Added resource end_date
1 parent 2f9000e commit a079ccb

5 files changed

Lines changed: 28 additions & 3 deletions

File tree

digital_land/cli.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ def convert_cmd(input_path, output_path):
142142
@dataset_resource_dir
143143
@issue_dir
144144
@click.option("--cache-dir", type=click.Path(), default="var/cache/parquet")
145+
@click.option("--resource-path", type=click.Path(), default="collection/resource.csv")
145146
@click.argument("input-paths", nargs=-1, type=click.Path(exists=True))
146147
@click.pass_context
147148
def dataset_create_cmd(
@@ -153,6 +154,7 @@ def dataset_create_cmd(
153154
dataset_resource_dir,
154155
issue_dir,
155156
cache_dir,
157+
resource_path,
156158
):
157159
return dataset_create(
158160
input_paths=input_paths,
@@ -165,6 +167,7 @@ def dataset_create_cmd(
165167
dataset_resource_dir=dataset_resource_dir,
166168
issue_dir=issue_dir,
167169
cache_dir=cache_dir,
170+
resource_path=resource_path,
168171
)
169172

170173

digital_land/commands.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,7 @@ def dataset_create(
361361
column_field_dir="var/column-field",
362362
dataset_resource_dir="var/dataset-resource",
363363
cache_dir="var/cache/parquet",
364+
resource_path="collection/resource.csv",
364365
):
365366
cache_dir = os.path.join(cache_dir, dataset)
366367

@@ -409,6 +410,7 @@ def dataset_create(
409410
organisation=organisation,
410411
path=output_path,
411412
cache_dir=cache_dir,
413+
resource_path=resource_path,
412414
specification_dir=None, # TBD: package should use this specification object
413415
)
414416
pqpackage.create_temp_table(input_paths)

digital_land/package/datasetparquet.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,14 @@
2626

2727

2828
class DatasetParquetPackage(Package):
29-
def __init__(self, dataset, organisation, cache_dir, **kwargs):
29+
def __init__(self, dataset, organisation, cache_dir, resource_path, **kwargs):
3030
self.suffix = ".parquet"
3131
super().__init__(dataset, tables=tables, indexes=indexes, **kwargs)
3232
self.dataset = dataset
3333
self.organisation = organisation
3434
self.cache_dir = cache_dir
3535
self._spatialite = None
36+
self.resource_path = resource_path
3637
# Persistent connection for the class. Given name to ensure that table is stored on disk (not purely in memory)
3738
os.makedirs(cache_dir, exist_ok=True)
3839
self.duckdb_file = os.path.join(cache_dir, f"{dataset}.duckdb")
@@ -192,7 +193,7 @@ def load_entities(self):
192193
SELECT {fields_str} FROM (
193194
SELECT {fields_str}, CASE WHEN resource_csv."end-date" IS NULL THEN '2999-12-31' ELSE resource_csv."end-date" END AS resource_end_date
194195
FROM temp_table
195-
LEFT JOIN read_csv_auto('collection/resource.csv') resource_csv
196+
LEFT JOIN read_csv_auto('{self.resource_path}') resource_csv
196197
ON temp_table.resource = resource_csv.resource
197198
QUALIFY ROW_NUMBER() OVER (
198199
PARTITION BY entity, field

tests/acceptance/test_dataset_create.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,13 +68,23 @@ def issue_dir(session_tmp_path):
6868
return issue_dir
6969

7070

71+
@pytest.fixture
72+
def resource_path(session_tmp_path):
73+
resource_path = session_tmp_path / "resource.csv"
74+
columns = ["resource", "end-date"]
75+
with open(resource_path, "w") as f:
76+
f.write(",".join(columns) + "\n")
77+
return resource_path
78+
79+
7180
def test_acceptance_dataset_create(
7281
session_tmp_path,
7382
organisation_path,
7483
input_paths,
7584
issue_dir,
7685
cache_path,
7786
dataset_dir,
87+
resource_path,
7888
):
7989
output_path = dataset_dir / f"{test_dataset}.sqlite3"
8090

@@ -99,6 +109,8 @@ def test_acceptance_dataset_create(
99109
str(issue_dir),
100110
"--cache-dir",
101111
str(cache_path),
112+
"--resource-path",
113+
str(resource_path),
102114
]
103115
+ input_paths,
104116
catch_exceptions=False,

tests/integration/test_package_datasetparquet.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -518,14 +518,21 @@ def test_dataset_parquet_package(temp_dir):
518518

519519
# Leave hash3.csv empty except for the headers (to test that an empty csv doesn't screw things up).
520520
with open(input_paths[2], "w") as f:
521-
f.write(",".join(map(lambda x: str(x) if x is not np.nan else "", row)) + "\n")
521+
f.write(",".join(columns) + "\n")
522+
# f.write(",".join(map(lambda x: str(x) if x is not np.nan else "", row)) + "\n")
523+
524+
resource_path = str(temp_dir / "resource.csv")
525+
resource_columns = ["resource", "end-date"]
526+
with open(resource_path, "w") as f:
527+
f.write(",".join(resource_columns) + "\n")
522528

523529
# Instantiate the DatasetParquetPackage with temp_dir input paths and a mock schema
524530
package = DatasetParquetPackage(
525531
dataset="conservation-area",
526532
organisation=MockOrganisation(os.path.join(temp_dir, "organisation.csv")),
527533
path=os.path.join(temp_dir, "integration_test.sqlite3"),
528534
cache_dir=temp_dir,
535+
resource_path=resource_path,
529536
specification_dir=None,
530537
)
531538
package.create_temp_table(input_paths)

0 commit comments

Comments
 (0)