Skip to content

Commit f96e9fa

Browse files
authored
Merge pull request #11 from digital-land/st/standardise-scripts
Standardise filename and columns
2 parents 3a9009b + 5aa8da2 commit f96e9fa

10 files changed

+39
-37
lines changed

Makefile

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,10 @@ data/reporting/deleted_entities.csv: data/reporting
2323
data/reporting/duplicate_entity_expectation.csv: data/reporting
2424
python src/duplicate_geometry_expectations.py --output-dir data/reporting
2525

26-
data/reporting/endpoint-dataset-issue-type-summary.csv: data/reporting
26+
data/reporting/endpoint_dataset_issue_type_summary.csv: data/reporting
2727
python src/endpoint_dataset_issue_type_summary.py --output-dir data/reporting
2828

29-
data/reporting/all-endpoints-and-documentation-urls.csv: data/reporting
29+
data/reporting/all_endpoints_and_documentation_urls.csv: data/reporting
3030
python src/endpoints_missing_doc_urls.py --output-dir data/reporting
3131

3232
# produces two files but leave for now
@@ -38,26 +38,26 @@ data/reporting/flagged_failed_resources.csv: data/reporting
3838

3939
# src/generate_odp_conformance_csv.py <- fix this one
4040

41-
data/reporting/odp-issue.csv:
41+
data/reporting/odp_issue.csv:
4242
python src/generate_odp_issues_csv.py --output-dir data/reporting
4343

44-
data/reporting/odp-status.csv:
44+
data/reporting/odp_status.csv:
4545
python src/generate_odp_status_csv.py --output-dir data/reporting
4646

47-
data/reporting/listed-building-end-date.csv:
47+
data/reporting/listed_building_end_date.csv:
4848
python src/listed_building_end_date.py --output-dir data/reporting
4949

50-
data/reporting/logs-by-week.csv:
50+
data/reporting/logs_by_week.csv:
5151
python src/logs_by_week.py --output-dir data/reporting
5252

53-
data/reporting/odp-conformance.csv:
53+
data/reporting/odp_conformance.csv:
5454
python src/generate_odp_conformance_csv.py --output-dir data/reporting --specification-dir data/specification
5555

5656
data/reporting/quality_ODP_dataset_scores_by_LPA.csv data/reporting/quality_ODP_dataset_quality_detail.csv: data/reporting
5757
python src/measure_odp_data_quality.py --output-dir data/reporting
5858
# src/operational_issues.py <- fix this one
5959

60-
# data/reporting/operational-issues.csv: data/reporting
60+
# data/reporting/operational_issues.csv: data/reporting
6161
# python src/operational_issues.py --output-dir data/reporting
6262

6363
data/reporting/entities_with_ended_orgs.csv:
@@ -72,16 +72,16 @@ data/reporting/runaway_resources.csv: data/reporting
7272
.PHONY: all
7373
all: data/reporting/deleted_entities.csv \
7474
data/reporting/duplicate_entity_expectation.csv \
75-
data/reporting/endpoint-dataset-issue-type-summary.csv \
76-
data/reporting/all-endpoints-and-documentation-urls.csv \
75+
data/reporting/endpoint_dataset_issue_type_summary.csv \
76+
data/reporting/all_endpoints_and_documentation_urls.csv \
7777
data/reporting/flag_endpoints_no_provision.csv \
7878
data/reporting/flagged_failed_resources.csv \
79-
data/reporting/odp-issue.csv \
80-
data/reporting/odp-status.csv \
81-
data/reporting/listed-building-end-date.csv \
82-
data/reporting/logs-by-week.csv \
79+
data/reporting/odp_issue.csv \
80+
data/reporting/odp_status.csv \
81+
data/reporting/listed_building_end_date.csv \
82+
data/reporting/logs_by_week.csv \
8383
data/reporting/runaway_resources.csv\
84-
data/reporting/odp-conformance.csv\
84+
data/reporting/odp_conformance.csv\
8585
data/reporting/quality_ODP_dataset_scores_by_LPA.csv\
8686
data/reporting/quality_ODP_dataset_quality_detail.csv\
8787
data/reporting/entities_with_ended_orgs.csv\

src/check_deleted_entities.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,8 @@ def main(output_dir: str):
4646
df_org = df_org[['entity', 'organisation', 'name']].copy()
4747
df_org = df_org.rename(
4848
columns={
49-
'entity': 'organisation-entity',
50-
'name': 'organisation-name'
49+
'entity': 'organisation_entity',
50+
'name': 'organisation_name'
5151
}
5252
)
5353

@@ -127,8 +127,8 @@ def main(output_dir: str):
127127
'dataset',
128128
'entity',
129129
'organisation',
130-
'organisation-name',
131-
'organisation-entity',
130+
'organisation_name',
131+
'organisation_entity',
132132
'name',
133133
'reference'
134134
]

src/duplicate_geometry_expectations.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -230,15 +230,15 @@ def main(output_dir: str):
230230

231231
# Merge for entity_a
232232
df_subset = df_subset.merge(
233-
df_lookup.rename(columns={"organisation": "lookup-org-a"}),
233+
df_lookup.rename(columns={"organisation": "lookup_org_a"}),
234234
how="left",
235235
left_on="entity_a",
236236
right_on="entity",
237237
validate="m:1",
238238
)
239239
# Merge for entity_b
240240
df_subset = df_subset.merge(
241-
df_lookup.rename(columns={"organisation": "lookup-org-b"}),
241+
df_lookup.rename(columns={"organisation": "lookup_org_b"}),
242242
how="left",
243243
left_on="entity_b",
244244
right_on="entity",
@@ -267,7 +267,7 @@ def main(output_dir: str):
267267
# ------------------------------------------------------------
268268
# Create comparison column
269269
# ------------------------------------------------------------
270-
df_matches["lookup-same-org"] = df_matches["lookup-org-a"] == df_matches["lookup-org-b"]
270+
df_matches["lookup_same_org"] = df_matches["lookup_org_a"] == df_matches["lookup_org_b"]
271271

272272
# ------------------------------------------------------------
273273
# Check if entity B organisation is in ODP
@@ -276,10 +276,10 @@ def main(output_dir: str):
276276
df_provision = pd.read_csv(ODP_URL, low_memory=False)
277277
# Get organisations that are in the open-digital-planning project
278278
odp_orgs = set(df_provision[df_provision["project"] == "open-digital-planning"]["organisation"].unique())
279-
df_matches["in-odp"] = df_matches["lookup-org-b"].isin(odp_orgs)
279+
df_matches["in_odp"] = df_matches["lookup_org_b"].isin(odp_orgs)
280280
except Exception as e:
281281
logger.error(f"Failed to load ODP provision data: {e}")
282-
df_matches["in-odp"] = False
282+
df_matches["in_odp"] = False
283283

284284
# ------------------------------------------------------------
285285
# Final column order (only keep those that exist)
@@ -304,10 +304,10 @@ def main(output_dir: str):
304304
"entity_b_geometry",
305305
"organisation_entity_a", # keep originals for auditing
306306
"organisation_entity_b", # keep originals for auditing
307-
"lookup-org-a",
308-
"lookup-org-b",
309-
"lookup-same-org",
310-
"in-odp"
307+
"lookup_org_a",
308+
"lookup_org_b",
309+
"lookup_same_org",
310+
"in_odp"
311311
]
312312
ordered = [c for c in ordered if c in df_matches.columns]
313313
df_matches = df_matches[ordered].copy()

src/endpoint_dataset_issue_type_summary.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def parse_args():
4545

4646
# Dictionary of table names and their Datasette URLs
4747
tables = {
48-
"endpoint-dataset-issue-type-summary":
48+
"endpoint_dataset_issue_type_summary":
4949
"https://datasette.planning.data.gov.uk/performance/endpoint_dataset_issue_type_summary"
5050
}
5151

src/endpoints_missing_doc_urls.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ def save_results(df, output_dir):
124124
"""
125125
os.makedirs(output_dir, exist_ok=True)
126126
#filtered = df.query("documentation_missing and is_active")
127-
output_path = os.path.join(output_dir, "all-endpoints-and-documentation-urls.csv")
127+
output_path = os.path.join(output_dir, "all_endpoints_and_documentation_urls.csv")
128128
df.to_csv(output_path, index=False)
129129
print(f"CSV saved: {output_path}")
130130

src/generate_odp_conformance_csv.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -657,7 +657,7 @@ def get_dataset_field(specification_path):
657657
else:
658658
logger.info(f"Specification file found at {str(specification_path)} so no download is needed")
659659

660-
output_path = os.path.join(output_dir, "odp-conformance.csv")
660+
output_path = os.path.join(output_dir, "odp_conformance.csv")
661661

662662
# Run summary function and filter invalid cohort rows
663663
_, df = get_odp_conformance_summary(dataset_types=["spatial", "document"], cohorts=["ODP-Track1", "ODP-Track2", "ODP-Track3", "ODP-Track4"],specification_path=specification_path)

src/generate_odp_issues_csv.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ def generate_detailed_issue_csv(output_dir: str, dataset_type="all") -> str:
175175

176176
print("[INFO] Saving CSV...")
177177
os.makedirs(output_dir, exist_ok=True)
178-
output_path = os.path.join(output_dir, "odp-issue.csv")
178+
output_path = os.path.join(output_dir, "odp_issue.csv")
179179
merged[
180180
[
181181
"organisation",

src/generate_odp_status_csv.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ def generate_odp_summary_csv(output_dir: str) -> str:
230230
# Convert output to DataFrame and save as CSV
231231
df_final = pd.DataFrame(output_rows)
232232
os.makedirs(output_dir, exist_ok=True)
233-
output_path = os.path.join(output_dir, "odp-status.csv")
233+
output_path = os.path.join(output_dir, "odp_status.csv")
234234
df_final.to_csv(output_path, index=False)
235235
print(f"CSV generated at {output_path} with {len(df_final)} rows")
236236
return output_path

src/listed_building_end_date.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def main(output_dir: str):
4545
logger.error(f"Failed to load listed building data: {e}")
4646
os.makedirs(output_dir, exist_ok=True)
4747
pd.DataFrame().to_csv(
48-
os.path.join(output_dir, "listed-building-end-date.csv"), index=False
48+
os.path.join(output_dir, "listed_building_end_date.csv"), index=False
4949
)
5050
return
5151

@@ -98,8 +98,10 @@ def main(output_dir: str):
9898
df_final = df_final.sort_values("organisation")
9999

100100
os.makedirs(output_dir, exist_ok=True)
101-
output_file = os.path.join(output_dir, "listed-building-end-date.csv")
102-
df_final[['reference', 'entity', 'end-date', 'organisation-entity', 'organisation']].to_csv(output_file, index=False)
101+
output_file = os.path.join(output_dir, "listed_building_end_date.csv")
102+
df_final[['reference', 'entity', 'end-date', 'organisation-entity', 'organisation']].rename(
103+
columns={'end-date': 'end_date', 'organisation-entity': 'organisation_entity'}
104+
).to_csv(output_file, index=False)
103105
logger.info(f"Saved output to {output_file} with {len(df_final)} rows")
104106

105107

src/logs_by_week.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def parse_args():
7676

7777
# Define URLs and SQL queries to export
7878
urls = {
79-
"logs-by-week": "https://datasette.planning.data.gov.uk/digital-land"
79+
"logs_by_week": "https://datasette.planning.data.gov.uk/digital-land"
8080
}
8181

8282
sqls = [

0 commit comments

Comments
 (0)