From cd288e50a687aea1b4f23597d607d803c1453476 Mon Sep 17 00:00:00 2001
From: Tomic Riedel <tomicriedel07@gmail.com>
Date: Tue, 24 Mar 2026 20:54:31 +0100
Subject: [PATCH] feat: add full restaurants demo with dynamic metric discovery

---
 .gitattributes                |  1 +
 README.md                     | 15 +++++++++++++++
 data/restaurants.csv          |  3 +++
 data/restaurants.json         |  9 +++++++++
 demo/configs/consistency.json |  4 ++++
 demo/configs/sqlite.json      |  5 +++++
 demo/run_demo.py              | 18 ++++++++++++++++++
 7 files changed, 55 insertions(+)
 create mode 100644 .gitattributes
 create mode 100644 data/restaurants.csv
 create mode 100644 data/restaurants.json
 create mode 100644 demo/configs/consistency.json
 create mode 100644 demo/configs/sqlite.json
 create mode 100644 demo/run_demo.py

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..3010086
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1 @@
+data/restaurants.csv filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
index 264cca2..0d66586 100644
--- a/README.md
+++ b/README.md
@@ -9,6 +9,21 @@ Metis is a framework to automatically assess the quality of tabular data across
 python -m demo.getting_started
 ```
 
+## Full demo (all metrics)
+
+To run every registered metric against the full TripAdvisor European Restaurants dataset, use the extended demo. **Note: this will take some time.**
+
+```
+python -m demo.run_demo
+```
+
+The demo uses `data/restaurants.csv`, the full
+[TripAdvisor European Restaurants dataset from Kaggle](https://www.kaggle.com/datasets/stefanoleone992/tripadvisor-european-restaurants)
+(~1.08 M rows). The CSV includes 42 original columns (ratings, cuisines,
+location, price level, …) plus two synthetic timestamp columns
+(`first_review_date` and `last_review_date`) with ~10 % intentional nulls
+to surface interesting completeness findings.
+
 ## How to implement new metrics
 
 To extend the Metis framework and add new data quality metrics, please check our interface for easy integration. 
diff --git a/data/restaurants.csv b/data/restaurants.csv
new file mode 100644
index 0000000..7bd0bd5
--- /dev/null
+++ b/data/restaurants.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:17bcb14f67996d50b77a8077539713424bc47b9e29b1150f79ac71a1fa74e498
+size 700264775
diff --git a/data/restaurants.json b/data/restaurants.json
new file mode 100644
index 0000000..5154c1a
--- /dev/null
+++ b/data/restaurants.json
@@ -0,0 +1,9 @@
+{
+    "loader": "CSV",
+    "name": "Restaurants",
+    "file_name": "restaurants.csv",
+    "delimiter": ",",
+    "encoding": "utf-8",
+    "header": 0,
+    "nrows": null
+}
diff --git a/demo/configs/consistency.json b/demo/configs/consistency.json
new file mode 100644
index 0000000..994d1ed
--- /dev/null
+++ b/demo/configs/consistency.json
@@ -0,0 +1,4 @@
+{
+    "province": ["region"],
+    "country": ["default_language"]
+}
diff --git a/demo/configs/sqlite.json b/demo/configs/sqlite.json
new file mode 100644
index 0000000..3c76885
--- /dev/null
+++ b/demo/configs/sqlite.json
@@ -0,0 +1,5 @@
+{
+    "writer_name": "sqlite",
+    "table_name": "dqresults",
+    "db_name": "dq_repository/demo.db"
+}
diff --git a/demo/run_demo.py b/demo/run_demo.py
new file mode 100644
index 0000000..e2b6a81
--- /dev/null
+++ b/demo/run_demo.py
@@ -0,0 +1,18 @@
+from metis.dq_orchestrator import DQOrchestrator
+from metis.metric import Metric
+
+_METRIC_CONFIGS = {
+    "consistency_countFDViolations": "demo/configs/consistency.json",
+}
+
+orchestrator = DQOrchestrator(writer_config_path="demo/configs/sqlite.json")
+orchestrator.load(data_loader_configs=["data/restaurants.json"])
+
+for metric_name in Metric.registry:
+    try:
+        orchestrator.assess(
+            metrics=[metric_name],
+            metric_configs=[_METRIC_CONFIGS.get(metric_name)],
+        )
+    except Exception as exc:
+        print(f"Metric {metric_name} failed: {exc}")