Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 15 additions & 15 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ data/reporting/deleted_entities.csv: data/reporting
data/reporting/duplicate_entity_expectation.csv: data/reporting
python src/duplicate_geometry_expectations.py --output-dir data/reporting

data/reporting/endpoint-dataset-issue-type-summary.csv: data/reporting
data/reporting/endpoint_dataset_issue_type_summary.csv: data/reporting
python src/endpoint_dataset_issue_type_summary.py --output-dir data/reporting

data/reporting/all-endpoints-and-documentation-urls.csv: data/reporting
data/reporting/all_endpoints_and_documentation_urls.csv: data/reporting
python src/endpoints_missing_doc_urls.py --output-dir data/reporting

# produces two files but leave for now
Expand All @@ -38,26 +38,26 @@ data/reporting/flagged_failed_resources.csv: data/reporting

# src/generate_odp_conformance_csv.py <- fix this one

data/reporting/odp-issue.csv:
data/reporting/odp_issue.csv:
python src/generate_odp_issues_csv.py --output-dir data/reporting

data/reporting/odp-status.csv:
data/reporting/odp_status.csv:
python src/generate_odp_status_csv.py --output-dir data/reporting

data/reporting/listed-building-end-date.csv:
data/reporting/listed_building_end_date.csv:
python src/listed_building_end_date.py --output-dir data/reporting

data/reporting/logs-by-week.csv:
data/reporting/logs_by_week.csv:
python src/logs_by_week.py --output-dir data/reporting

data/reporting/odp-conformance.csv:
data/reporting/odp_conformance.csv:
python src/generate_odp_conformance_csv.py --output-dir data/reporting --specification-dir data/specification

data/reporting/quality_ODP_dataset_scores_by_LPA.csv data/reporting/quality_ODP_dataset_quality_detail.csv: data/reporting
python src/measure_odp_data_quality.py --output-dir data/reporting
# src/operational_issues.py <- fix this one

# data/reporting/operational-issues.csv: data/reporting
# data/reporting/operational_issues.csv: data/reporting
# python src/operational_issues.py --output-dir data/reporting

data/reporting/entities_with_ended_orgs.csv:
Expand All @@ -72,16 +72,16 @@ data/reporting/runaway_resources.csv: data/reporting
.PHONY: all
all: data/reporting/deleted_entities.csv \
data/reporting/duplicate_entity_expectation.csv \
data/reporting/endpoint-dataset-issue-type-summary.csv \
data/reporting/all-endpoints-and-documentation-urls.csv \
data/reporting/endpoint_dataset_issue_type_summary.csv \
data/reporting/all_endpoints_and_documentation_urls.csv \
data/reporting/flag_endpoints_no_provision.csv \
data/reporting/flagged_failed_resources.csv \
data/reporting/odp-issue.csv \
data/reporting/odp-status.csv \
data/reporting/listed-building-end-date.csv \
data/reporting/logs-by-week.csv \
data/reporting/odp_issue.csv \
data/reporting/odp_status.csv \
data/reporting/listed_building_end_date.csv \
data/reporting/logs_by_week.csv \
data/reporting/runaway_resources.csv\
data/reporting/odp-conformance.csv\
data/reporting/odp_conformance.csv\
data/reporting/quality_ODP_dataset_scores_by_LPA.csv\
data/reporting/quality_ODP_dataset_quality_detail.csv\
data/reporting/entities_with_ended_orgs.csv\
Expand Down
8 changes: 4 additions & 4 deletions src/check_deleted_entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ def main(output_dir: str):
df_org = df_org[['entity', 'organisation', 'name']].copy()
df_org = df_org.rename(
columns={
'entity': 'organisation-entity',
'name': 'organisation-name'
'entity': 'organisation_entity',
'name': 'organisation_name'
}
)

Expand Down Expand Up @@ -127,8 +127,8 @@ def main(output_dir: str):
'dataset',
'entity',
'organisation',
'organisation-name',
'organisation-entity',
'organisation_name',
'organisation_entity',
'name',
'reference'
]
Expand Down
18 changes: 9 additions & 9 deletions src/duplicate_geometry_expectations.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,15 +230,15 @@ def main(output_dir: str):

# Merge for entity_a
df_subset = df_subset.merge(
df_lookup.rename(columns={"organisation": "lookup-org-a"}),
df_lookup.rename(columns={"organisation": "lookup_org_a"}),
how="left",
left_on="entity_a",
right_on="entity",
validate="m:1",
)
# Merge for entity_b
df_subset = df_subset.merge(
df_lookup.rename(columns={"organisation": "lookup-org-b"}),
df_lookup.rename(columns={"organisation": "lookup_org_b"}),
how="left",
left_on="entity_b",
right_on="entity",
Expand Down Expand Up @@ -267,7 +267,7 @@ def main(output_dir: str):
# ------------------------------------------------------------
# Create comparison column
# ------------------------------------------------------------
df_matches["lookup-same-org"] = df_matches["lookup-org-a"] == df_matches["lookup-org-b"]
df_matches["lookup_same_org"] = df_matches["lookup_org_a"] == df_matches["lookup_org_b"]

# ------------------------------------------------------------
# Check if entity B organisation is in ODP
Expand All @@ -276,10 +276,10 @@ def main(output_dir: str):
df_provision = pd.read_csv(ODP_URL, low_memory=False)
# Get organisations that are in the open-digital-planning project
odp_orgs = set(df_provision[df_provision["project"] == "open-digital-planning"]["organisation"].unique())
df_matches["in-odp"] = df_matches["lookup-org-b"].isin(odp_orgs)
df_matches["in_odp"] = df_matches["lookup_org_b"].isin(odp_orgs)
except Exception as e:
logger.error(f"Failed to load ODP provision data: {e}")
df_matches["in-odp"] = False
df_matches["in_odp"] = False

# ------------------------------------------------------------
# Final column order (only keep those that exist)
Expand All @@ -304,10 +304,10 @@ def main(output_dir: str):
"entity_b_geometry",
"organisation_entity_a", # keep originals for auditing
"organisation_entity_b", # keep originals for auditing
"lookup-org-a",
"lookup-org-b",
"lookup-same-org",
"in-odp"
"lookup_org_a",
"lookup_org_b",
"lookup_same_org",
"in_odp"
]
ordered = [c for c in ordered if c in df_matches.columns]
df_matches = df_matches[ordered].copy()
Expand Down
2 changes: 1 addition & 1 deletion src/endpoint_dataset_issue_type_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def parse_args():

# Dictionary of table names and their Datasette URLs
tables = {
"endpoint-dataset-issue-type-summary":
"endpoint_dataset_issue_type_summary":
"https://datasette.planning.data.gov.uk/performance/endpoint_dataset_issue_type_summary"
}

Expand Down
2 changes: 1 addition & 1 deletion src/endpoints_missing_doc_urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def save_results(df, output_dir):
"""
os.makedirs(output_dir, exist_ok=True)
#filtered = df.query("documentation_missing and is_active")
output_path = os.path.join(output_dir, "all-endpoints-and-documentation-urls.csv")
output_path = os.path.join(output_dir, "all_endpoints_and_documentation_urls.csv")
df.to_csv(output_path, index=False)
print(f"CSV saved: {output_path}")

Expand Down
2 changes: 1 addition & 1 deletion src/generate_odp_conformance_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -657,7 +657,7 @@ def get_dataset_field(specification_path):
else:
logger.info(f"Specification file found at {str(specification_path)} so no download is needed")

output_path = os.path.join(output_dir, "odp-conformance.csv")
output_path = os.path.join(output_dir, "odp_conformance.csv")

# Run summary function and filter invalid cohort rows
_, df = get_odp_conformance_summary(dataset_types=["spatial", "document"], cohorts=["ODP-Track1", "ODP-Track2", "ODP-Track3", "ODP-Track4"],specification_path=specification_path)
Expand Down
2 changes: 1 addition & 1 deletion src/generate_odp_issues_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def generate_detailed_issue_csv(output_dir: str, dataset_type="all") -> str:

print("[INFO] Saving CSV...")
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, "odp-issue.csv")
output_path = os.path.join(output_dir, "odp_issue.csv")
merged[
[
"organisation",
Expand Down
2 changes: 1 addition & 1 deletion src/generate_odp_status_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def generate_odp_summary_csv(output_dir: str) -> str:
# Convert output to DataFrame and save as CSV
df_final = pd.DataFrame(output_rows)
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, "odp-status.csv")
output_path = os.path.join(output_dir, "odp_status.csv")
df_final.to_csv(output_path, index=False)
print(f"CSV generated at {output_path} with {len(df_final)} rows")
return output_path
Expand Down
8 changes: 5 additions & 3 deletions src/listed_building_end_date.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def main(output_dir: str):
logger.error(f"Failed to load listed building data: {e}")
os.makedirs(output_dir, exist_ok=True)
pd.DataFrame().to_csv(
os.path.join(output_dir, "listed-building-end-date.csv"), index=False
os.path.join(output_dir, "listed_building_end_date.csv"), index=False
)
return

Expand Down Expand Up @@ -98,8 +98,10 @@ def main(output_dir: str):
df_final = df_final.sort_values("organisation")

os.makedirs(output_dir, exist_ok=True)
output_file = os.path.join(output_dir, "listed-building-end-date.csv")
df_final[['reference', 'entity', 'end-date', 'organisation-entity', 'organisation']].to_csv(output_file, index=False)
output_file = os.path.join(output_dir, "listed_building_end_date.csv")
df_final[['reference', 'entity', 'end-date', 'organisation-entity', 'organisation']].rename(
columns={'end-date': 'end_date', 'organisation-entity': 'organisation_entity'}
).to_csv(output_file, index=False)
logger.info(f"Saved output to {output_file} with {len(df_final)} rows")


Expand Down
2 changes: 1 addition & 1 deletion src/logs_by_week.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def parse_args():

# Define URLs and SQL queries to export
urls = {
"logs-by-week": "https://datasette.planning.data.gov.uk/digital-land"
"logs_by_week": "https://datasette.planning.data.gov.uk/digital-land"
}

sqls = [
Expand Down