From bada6c1ea09b58ff6441847687051753fce4b1ba Mon Sep 17 00:00:00 2001
From: Sian Teesdale <43341988+sianteesdale@users.noreply.github.com>
Date: Wed, 8 Apr 2026 15:12:42 +0100
Subject: [PATCH 1/2] Change - to _ in filename and col names

---
 Makefile                                   | 28 +++++++++++-----------
 src/check_deleted_entities.py              |  8 +++----
 src/duplicate_geometry_expectations.py     | 18 +++++++-------
 src/endpoint_dataset_issue_type_summary.py |  2 +-
 src/endpoints_missing_doc_urls.py          |  2 +-
 src/generate_odp_conformance_csv.py        |  2 +-
 src/generate_odp_issues_csv.py             |  2 +-
 src/generate_odp_status_csv.py             |  2 +-
 src/listed_building_end_date.py            |  8 ++++---
 src/logs_by_week.py                        |  2 +-
 10 files changed, 38 insertions(+), 36 deletions(-)

diff --git a/Makefile b/Makefile
index 1ef9d6c..0d090e7 100644
--- a/Makefile
+++ b/Makefile
@@ -23,10 +23,10 @@ data/reporting/deleted_entities.csv: data/reporting
 data/reporting/duplicate_entity_expectation.csv: data/reporting
 	python src/duplicate_geometry_expectations.py --output-dir data/reporting
 
-data/reporting/endpoint-dataset-issue-type-summary.csv: data/reporting
+data/reporting/endpoint_dataset_issue_type_summary.csv: data/reporting
 	python src/endpoint_dataset_issue_type_summary.py --output-dir data/reporting
 	
-data/reporting/all-endpoints-and-documentation-urls.csv: data/reporting
+data/reporting/all_endpoints_and_documentation_urls.csv: data/reporting
 	python src/endpoints_missing_doc_urls.py --output-dir data/reporting
 
 # produces two files but leave for now
@@ -38,19 +38,19 @@ data/reporting/flagged_failed_resources.csv: data/reporting
 
 # src/generate_odp_conformance_csv.py <- fix this one
 
-data/reporting/odp-issue.csv: 
+data/reporting/odp_issue.csv: 
 	python src/generate_odp_issues_csv.py --output-dir data/reporting
 
-data/reporting/odp-status.csv: 
+data/reporting/odp_status.csv: 
 	python src/generate_odp_status_csv.py --output-dir data/reporting
 
-data/reporting/listed-building-end-date.csv:
+data/reporting/listed_building_end_date.csv:
 	python src/listed_building_end_date.py --output-dir data/reporting
 
-data/reporting/logs-by-week.csv:
+data/reporting/logs_by_week.csv:
 	python src/logs_by_week.py --output-dir data/reporting
 
-data/reporting/odp-conformance.csv:
+data/reporting/odp_conformance.csv:
 	python src/generate_odp_conformance_csv.py --output-dir data/reporting --specification-dir data/specification
 
 data/reporting/quality_ODP_dataset_scores_by_LPA.csv data/reporting/quality_ODP_dataset_quality_detail.csv: data/reporting
@@ -72,16 +72,16 @@ data/reporting/runaway_resources.csv: data/reporting
 .PHONY: all
 all: data/reporting/deleted_entities.csv \
 	data/reporting/duplicate_entity_expectation.csv \
-	data/reporting/endpoint-dataset-issue-type-summary.csv \
-	data/reporting/all-endpoints-and-documentation-urls.csv \
+	data/reporting/endpoint_dataset_issue_type_summary.csv \
+	data/reporting/all_endpoints_and_documentation_urls.csv \
 	data/reporting/flag_endpoints_no_provision.csv \
 	data/reporting/flagged_failed_resources.csv \
-	data/reporting/odp-issue.csv \
-	data/reporting/odp-status.csv \
-	data/reporting/listed-building-end-date.csv \
-	data/reporting/logs-by-week.csv \
+	data/reporting/odp_issue.csv \
+	data/reporting/odp_status.csv \
+	data/reporting/listed_building_end_date.csv \
+	data/reporting/logs_by_week.csv \
 	data/reporting/runaway_resources.csv\
-	data/reporting/odp-conformance.csv\
+	data/reporting/odp_conformance.csv\
 	data/reporting/quality_ODP_dataset_scores_by_LPA.csv\
 	data/reporting/quality_ODP_dataset_quality_detail.csv\
 	data/reporting/entities_with_ended_orgs.csv\
diff --git a/src/check_deleted_entities.py b/src/check_deleted_entities.py
index c1fd95c..5f9195e 100644
--- a/src/check_deleted_entities.py
+++ b/src/check_deleted_entities.py
@@ -46,8 +46,8 @@ def main(output_dir: str):
     df_org = df_org[['entity', 'organisation', 'name']].copy()
     df_org = df_org.rename(
         columns={
-            'entity': 'organisation-entity',
-            'name': 'organisation-name'
+            'entity': 'organisation_entity',
+            'name': 'organisation_name'
         }
     )
 
@@ -127,8 +127,8 @@ def main(output_dir: str):
         'dataset',
         'entity',
         'organisation',
-        'organisation-name',
-        'organisation-entity',
+        'organisation_name',
+        'organisation_entity',
         'name',
         'reference'
     ]
diff --git a/src/duplicate_geometry_expectations.py b/src/duplicate_geometry_expectations.py
index 8011306..7485a5e 100644
--- a/src/duplicate_geometry_expectations.py
+++ b/src/duplicate_geometry_expectations.py
@@ -230,7 +230,7 @@ def main(output_dir: str):
                 
                 # Merge for entity_a
                 df_subset = df_subset.merge(
-                    df_lookup.rename(columns={"organisation": "lookup-org-a"}),
+                    df_lookup.rename(columns={"organisation": "lookup_org_a"}),
                     how="left",
                     left_on="entity_a",
                     right_on="entity",
@@ -238,7 +238,7 @@ def main(output_dir: str):
                 )
                 # Merge for entity_b
                 df_subset = df_subset.merge(
-                    df_lookup.rename(columns={"organisation": "lookup-org-b"}),
+                    df_lookup.rename(columns={"organisation": "lookup_org_b"}),
                     how="left",
                     left_on="entity_b",
                     right_on="entity",
@@ -267,7 +267,7 @@ def main(output_dir: str):
     # ------------------------------------------------------------
     # Create comparison column
     # ------------------------------------------------------------
-    df_matches["lookup-same-org"] = df_matches["lookup-org-a"] == df_matches["lookup-org-b"]
+    df_matches["lookup_same_org"] = df_matches["lookup_org_a"] == df_matches["lookup_org_b"]
 
     # ------------------------------------------------------------
     # Check if entity B organisation is in ODP
@@ -276,10 +276,10 @@ def main(output_dir: str):
         df_provision = pd.read_csv(ODP_URL, low_memory=False)
         # Get organisations that are in the open-digital-planning project
         odp_orgs = set(df_provision[df_provision["project"] == "open-digital-planning"]["organisation"].unique())
-        df_matches["in-odp"] = df_matches["lookup-org-b"].isin(odp_orgs)
+        df_matches["in_odp"] = df_matches["lookup_org_b"].isin(odp_orgs)
     except Exception as e:
         logger.error(f"Failed to load ODP provision data: {e}")
-        df_matches["in-odp"] = False
+        df_matches["in_odp"] = False
 
     # ------------------------------------------------------------
     # Final column order (only keep those that exist)
@@ -304,10 +304,10 @@ def main(output_dir: str):
         "entity_b_geometry",
         "organisation_entity_a", # keep originals for auditing
         "organisation_entity_b", # keep originals for auditing
-        "lookup-org-a",
-        "lookup-org-b",
-        "lookup-same-org",
-        "in-odp"
+        "lookup_org_a",
+        "lookup_org_b",
+        "lookup_same_org",
+        "in_odp"
     ]
     ordered = [c for c in ordered if c in df_matches.columns]
     df_matches = df_matches[ordered].copy()
diff --git a/src/endpoint_dataset_issue_type_summary.py b/src/endpoint_dataset_issue_type_summary.py
index af41462..88a11b7 100644
--- a/src/endpoint_dataset_issue_type_summary.py
+++ b/src/endpoint_dataset_issue_type_summary.py
@@ -45,7 +45,7 @@ def parse_args():
 
     # Dictionary of table names and their Datasette URLs
     tables = {
-        "endpoint-dataset-issue-type-summary":
+        "endpoint_dataset_issue_type_summary":
             "https://datasette.planning.data.gov.uk/performance/endpoint_dataset_issue_type_summary"
     }
 
diff --git a/src/endpoints_missing_doc_urls.py b/src/endpoints_missing_doc_urls.py
index ea37f6e..ed565d1 100644
--- a/src/endpoints_missing_doc_urls.py
+++ b/src/endpoints_missing_doc_urls.py
@@ -124,7 +124,7 @@ def save_results(df, output_dir):
     """
     os.makedirs(output_dir, exist_ok=True)
     #filtered = df.query("documentation_missing and is_active")
-    output_path = os.path.join(output_dir, "all-endpoints-and-documentation-urls.csv")
+    output_path = os.path.join(output_dir, "all_endpoints_and_documentation_urls.csv")
     df.to_csv(output_path, index=False)
     print(f"CSV saved: {output_path}")
 
diff --git a/src/generate_odp_conformance_csv.py b/src/generate_odp_conformance_csv.py
index 57511a7..70a8897 100644
--- a/src/generate_odp_conformance_csv.py
+++ b/src/generate_odp_conformance_csv.py
@@ -657,7 +657,7 @@ def get_dataset_field(specification_path):
     else:
         logger.info(f"Specification file found at {str(specification_path)} so no download is needed")
 
-    output_path = os.path.join(output_dir, "odp-conformance.csv")
+    output_path = os.path.join(output_dir, "odp_conformance.csv")
 
     # Run summary function and filter invalid cohort rows
     _, df = get_odp_conformance_summary(dataset_types=["spatial", "document"], cohorts=["ODP-Track1", "ODP-Track2", "ODP-Track3", "ODP-Track4"],specification_path=specification_path)
diff --git a/src/generate_odp_issues_csv.py b/src/generate_odp_issues_csv.py
index de8818f..4c74066 100644
--- a/src/generate_odp_issues_csv.py
+++ b/src/generate_odp_issues_csv.py
@@ -175,7 +175,7 @@ def generate_detailed_issue_csv(output_dir: str, dataset_type="all") -> str:
 
     print("[INFO] Saving CSV...")
     os.makedirs(output_dir, exist_ok=True)
-    output_path = os.path.join(output_dir, "odp-issue.csv")
+    output_path = os.path.join(output_dir, "odp_issue.csv")
     merged[
         [
             "organisation",
diff --git a/src/generate_odp_status_csv.py b/src/generate_odp_status_csv.py
index 8bae225..0ad181e 100644
--- a/src/generate_odp_status_csv.py
+++ b/src/generate_odp_status_csv.py
@@ -230,7 +230,7 @@ def generate_odp_summary_csv(output_dir: str) -> str:
     # Convert output to DataFrame and save as CSV
     df_final = pd.DataFrame(output_rows)
     os.makedirs(output_dir, exist_ok=True)
-    output_path = os.path.join(output_dir, "odp-status.csv")
+    output_path = os.path.join(output_dir, "odp_status.csv")
     df_final.to_csv(output_path, index=False)
     print(f"CSV generated at {output_path} with {len(df_final)} rows")
     return output_path
diff --git a/src/listed_building_end_date.py b/src/listed_building_end_date.py
index 274119a..61a440a 100644
--- a/src/listed_building_end_date.py
+++ b/src/listed_building_end_date.py
@@ -45,7 +45,7 @@ def main(output_dir: str):
         logger.error(f"Failed to load listed building data: {e}")
         os.makedirs(output_dir, exist_ok=True)
         pd.DataFrame().to_csv(
-            os.path.join(output_dir, "listed-building-end-date.csv"), index=False
+            os.path.join(output_dir, "listed_building_end_date.csv"), index=False
         )
         return
 
@@ -98,8 +98,10 @@ def main(output_dir: str):
     df_final = df_final.sort_values("organisation")
 
     os.makedirs(output_dir, exist_ok=True)
-    output_file = os.path.join(output_dir, "listed-building-end-date.csv")
-    df_final[['reference', 'entity', 'end-date', 'organisation-entity', 'organisation']].to_csv(output_file, index=False)
+    output_file = os.path.join(output_dir, "listed_building_end_date.csv")
+    df_final[['reference', 'entity', 'end-date', 'organisation-entity', 'organisation']].rename(
+        columns={'end-date': 'end_date', 'organisation-entity': 'organisation_entity'}
+    ).to_csv(output_file, index=False)
     logger.info(f"Saved output to {output_file} with {len(df_final)} rows")
 
 
diff --git a/src/logs_by_week.py b/src/logs_by_week.py
index c02d128..d09c555 100644
--- a/src/logs_by_week.py
+++ b/src/logs_by_week.py
@@ -76,7 +76,7 @@ def parse_args():
 
     # Define URLs and SQL queries to export
     urls = {
-        "logs-by-week": "https://datasette.planning.data.gov.uk/digital-land"
+        "logs_by_week": "https://datasette.planning.data.gov.uk/digital-land"
     }
 
     sqls = [

From 5aa8da2bf1cd1abcb419acf373641dda5537d5c7 Mon Sep 17 00:00:00 2001
From: Sian Teesdale <43341988+sianteesdale@users.noreply.github.com>
Date: Wed, 8 Apr 2026 15:13:06 +0100
Subject: [PATCH 2/2] Change - to _ in filename and col names pt2

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 0d090e7..7ff4084 100644
--- a/Makefile
+++ b/Makefile
@@ -57,7 +57,7 @@ data/reporting/quality_ODP_dataset_scores_by_LPA.csv data/reporting/quality_ODP_
 	python src/measure_odp_data_quality.py --output-dir data/reporting
 # src/operational_issues.py <- fix this one
 
-# data/reporting/operational-issues.csv: data/reporting
+# data/reporting/operational_issues.csv: data/reporting
 # 	python src/operational_issues.py --output-dir data/reporting
 
 data/reporting/entities_with_ended_orgs.csv: