digital-land · sianteesdale · Apr 8, 2026 · Apr 7, 2026 · Apr 7, 2026 · Apr 8, 2026
diff --git a/.github/scripts/standardise_csvs.py b/.github/scripts/standardise_csvs.py
@@ -46,6 +46,17 @@ def standardise_csv(file_path, expected_columns, sort_cols=None):
             reader = csv.DictReader(f)
             rows = list(reader)
 
+        # Check for unexpected columns
+        actual_cols = list(reader.fieldnames or [])
+        unexpected = [col for col in actual_cols if col not in expected_cols]
+        if unexpected:
+            return f"✗ {file_path}: unexpected column(s) found that would be removed: {', '.join(unexpected)}"
+
+        # Check for extra values beyond the header (e.g. stray trailing commas)
+        for row in rows:
+            if None in row:
+                return f"✗ {file_path}: one or more rows have more values than columns in the header"
+
         # Sort rows if sort columns are specified
         if sort_cols:
             rows.sort(key=lambda row: _sort_key(row, sort_cols))
@@ -80,6 +91,7 @@ def main():
     """Standardise all CSVs in all datasets across pipeline and collection."""
     # Get the root directory (two levels up from this script)
     base_dir = os.path.join(os.path.dirname(__file__), '../..')
+    errors = []
 
     for folder_type in ["collection", "pipeline"]:
         folder_path = os.path.join(base_dir, folder_type)
@@ -103,8 +115,12 @@ def main():
                     result = standardise_csv(file_path, expected_columns, sort_cols)
                     if result:
                         print(result)
+                        errors.append(result)
                 else:
                     print(f"⊘ {filename} (not found)")
 
+    if errors:
+        sys.exit(1)
+
 if __name__ == "__main__":
     main()
diff --git a/.github/workflows/standardise_csvs.yml b/.github/workflows/standardise_csvs.yml
@@ -0,0 +1,87 @@
+name: Standardise CSVs
+
+on:
+  schedule:
+    - cron: "00 22 * * *"
+  workflow_dispatch:
+
+concurrency:
+  group: standardise-csvs
+  cancel-in-progress: true
+
+jobs:
+  standardise:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Generate GitHub App token
+        id: app-token
+        uses: actions/create-github-app-token@v1
+        with:
+          app-id: ${{ secrets.APP_ID }}
+          private-key: ${{ secrets.APP_PRIVATE_KEY }}
+
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          token: ${{ steps.app-token.outputs.token }}
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.11"
+
+      - name: Configure git
+        run: |
+          git config user.name "github-actions-bot"
+          git config user.email "noreply@github.com"
+
+      - name: Run standardise script
+        run: python .github/scripts/standardise_csvs.py
+
+      - name: Commit and push if changed
+        run: |
+          git add collection/ pipeline/
+          if git diff --staged --quiet; then
+            echo "No changes to commit"
+            exit 0
+          fi
+          git commit -m "Standardise row ordering and line endings in CSVs"
+          git push origin main
+
+  check-standardise-failure:
+    runs-on: ubuntu-latest
+    needs:
+      - standardise
+    if: github.event_name == 'schedule' && contains(join(needs.*.result, ','), 'failure')
+    steps:
+      - name: Send failure notification
+        uses: slackapi/slack-github-action@v1
+        env:
+          SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
+        with:
+          channel-id: "planning-data-alerts"
+          payload: |
+            {
+              "text": "Standardise CSVs has failed",
+              "icon_emoji": ":warning:",
+              "username": "Standardise CSVs",
+              "blocks": [
+                {
+                  "type": "header",
+                  "text": {
+                    "type": "plain_text",
+                    "text": "Standardise CSVs has failed"
+                  }
+                },
+                {
+                  "type": "divider"
+                },
+                {
+                  "type": "section",
+                  "text": {
+                    "type": "mrkdwn",
+                    "text": "The report is available on <https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub>"
+                  }
+                }
+              ]
+            }
diff --git a/.github/workflows/upload_to_S3.yml b/.github/workflows/upload_to_S3.yml
@@ -1,8 +1,10 @@
 name: Upload to S3
 
 on:
-  schedule:
-    - cron: '30 22 * * *'
+  workflow_run:
+    workflows: ["Standardise CSVs"]
+    types:
+      - completed
   workflow_dispatch:
     inputs:
       environment: