From 44cf33763b0692f39e6111f7e8a487a0f5965c11 Mon Sep 17 00:00:00 2001 From: eveleighoj <35256612+eveleighoj@users.noreply.github.com> Date: Wed, 1 Apr 2026 18:16:25 +0100 Subject: [PATCH] print count of facts when running dataset create command to help debug --- digital_land/commands.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/digital_land/commands.py b/digital_land/commands.py index 37166354..08b6934b 100644 --- a/digital_land/commands.py +++ b/digital_land/commands.py @@ -422,6 +422,11 @@ def dataset_create( if pqpackage.strategy != "direct": pqpackage.group_parquet_files(input_dir, target_mb=256) pqpackage.load_facts(input_dir) + # temporary diagnostic to verify fact count before sqlite load + fact_parquet_count = pqpackage.conn.execute( + f"SELECT COUNT(*) FROM parquet_scan('{pqpackage.fact_path}')" + ).fetchone()[0] + logger.info(f"fact.parquet row count before sqlite load: {fact_parquet_count}") pqpackage.load_fact_resource(input_dir) pqpackage.load_entities(input_dir, resource_path, organisation_path)