From 1ff7d5cdba8dd6a72403e366fa087e55cacaa8af Mon Sep 17 00:00:00 2001
From: Zio Gabber <78922322+Gabrymi93@users.noreply.github.com>
Date: Sun, 1 Mar 2026 15:44:01 +0000
Subject: [PATCH 01/11] repo aggiornata per toolkit

---
 .github/seed-issues/00_kickoff.md             |  43 +++++
 .github/seed-issues/01_civic_questions.md     |  42 +++++
 .github/seed-issues/01_setup.md               |  17 --
 .github/seed-issues/02_dataset_scoping.md     |  17 --
 .github/seed-issues/02_sources.md             |  43 +++++
 .github/seed-issues/03_raw.md                 |  42 +++++
 .github/seed-issues/03_raw_ingestion.md       |  17 --
 .github/seed-issues/04_clean.md               |  46 +++++
 .github/seed-issues/04_raw_to_clean.md        |  17 --
 .github/seed-issues/05_clean_to_mart.md       |  17 --
 .github/seed-issues/05_mart.md                |  44 +++++
 .github/seed-issues/06_qa_checks.md           |  17 --
 .github/seed-issues/06_validation_qa.md       |  44 +++++
 .github/seed-issues/07_dashboard.md           |  42 +++++
 .github/seed-issues/07_viz_dashboard.md       |  17 --
 .github/seed-issues/08_docs_release.md        |  17 --
 .github/seed-issues/08_release.md             |  57 ++++++
 .../09_docs_decisions_dictionary.md           |  44 +++++
 .github/seed-issues/10_maintenance.md         |  46 +++++
 .github/workflows/ci.yml                      |  89 +++++++++
 .gitignore                                    |  41 +++--
 README.md                                     | 128 ++++++++++---
 WORKFLOW.md                                   | 124 ++-----------
 dashboard/README.md                           |  43 +----
 dataset.yml                                   |  70 +++++++
 docs/METHOD.md                                |  23 ---
 docs/README.md                                |  16 ++
 docs/_archive/INDEX.md                        |   4 +
 docs/contributing.md                          |  48 +++++
 docs/data_dictionary.md                       |  30 +++
 docs/decisions.md                             |  22 +++
 docs/definition-of-done.md                    |  37 ----
 docs/lab_links.md                             |  15 ++
 docs/overview.md                              |  36 ++++
 docs/roles.md                                 |  21 ---
 docs/sources.md                               |  23 +++
 notebooks/00_quickstart.ipynb                 | 172 ++++++++++++++++++
 notebooks/01_explore_mart.ipynb               | 140 ++++++++++++++
 notebooks/02_quality_checks.ipynb             | 134 ++++++++++++++
 notebooks/03_dashboard_export.ipynb           | 131 +++++++++++++
 notebooks/README.md                           |  40 ++--
 queries/README.md                             |  11 --
 scripts/smoke.sh                              |  97 ++++++++++
 sql/README.md                                 |  15 ++
 sql/clean.sql                                 |  23 +++
 sql/mart/project_summary.sql                  |  25 +++
 tests/test_contract.py                        | 121 ++++++++++++
 47 files changed, 1863 insertions(+), 445 deletions(-)
 create mode 100644 .github/seed-issues/00_kickoff.md
 create mode 100644 .github/seed-issues/01_civic_questions.md
 delete mode 100644 .github/seed-issues/01_setup.md
 delete mode 100644 .github/seed-issues/02_dataset_scoping.md
 create mode 100644 .github/seed-issues/02_sources.md
 create mode 100644 .github/seed-issues/03_raw.md
 delete mode 100644 .github/seed-issues/03_raw_ingestion.md
 create mode 100644 .github/seed-issues/04_clean.md
 delete mode 100644 .github/seed-issues/04_raw_to_clean.md
 delete mode 100644 .github/seed-issues/05_clean_to_mart.md
 create mode 100644 .github/seed-issues/05_mart.md
 delete mode 100644 .github/seed-issues/06_qa_checks.md
 create mode 100644 .github/seed-issues/06_validation_qa.md
 create mode 100644 .github/seed-issues/07_dashboard.md
 delete mode 100644 .github/seed-issues/07_viz_dashboard.md
 delete mode 100644 .github/seed-issues/08_docs_release.md
 create mode 100644 .github/seed-issues/08_release.md
 create mode 100644 .github/seed-issues/09_docs_decisions_dictionary.md
 create mode 100644 .github/seed-issues/10_maintenance.md
 create mode 100644 .github/workflows/ci.yml
 create mode 100644 dataset.yml
 delete mode 100644 docs/METHOD.md
 create mode 100644 docs/README.md
 create mode 100644 docs/_archive/INDEX.md
 create mode 100644 docs/contributing.md
 create mode 100644 docs/data_dictionary.md
 create mode 100644 docs/decisions.md
 delete mode 100644 docs/definition-of-done.md
 create mode 100644 docs/lab_links.md
 create mode 100644 docs/overview.md
 delete mode 100644 docs/roles.md
 create mode 100644 docs/sources.md
 create mode 100644 notebooks/00_quickstart.ipynb
 create mode 100644 notebooks/01_explore_mart.ipynb
 create mode 100644 notebooks/02_quality_checks.ipynb
 create mode 100644 notebooks/03_dashboard_export.ipynb
 delete mode 100644 queries/README.md
 create mode 100644 scripts/smoke.sh
 create mode 100644 sql/README.md
 create mode 100644 sql/clean.sql
 create mode 100644 sql/mart/project_summary.sql
 create mode 100644 tests/test_contract.py

diff --git a/.github/seed-issues/00_kickoff.md b/.github/seed-issues/00_kickoff.md
new file mode 100644
index 0000000..282ac78
--- /dev/null
+++ b/.github/seed-issues/00_kickoff.md
@@ -0,0 +1,43 @@
+---
+title: "[Kickoff] Definire domanda civica, perimetro e contratto iniziale del dataset"
+labels: ["LEAD", "METODO"]
+assignees: []
+---
+## Perche questa fase conta
+
+Qui si decide se il progetto ha una domanda utile, comprensibile e davvero sostenibile nel tempo.
+Un kickoff fatto bene evita di costruire dati che poi non rispondono alla domanda iniziale.
+
+## Output visibile al pubblico
+
+Una spiegazione chiara di cosa vuole capire il progetto e di quali dati usera.
+
+## Obiettivo
+
+Avviare il progetto dataset con perimetro chiaro, domanda civica misurabile e contratto toolkit-first coerente con il template.
+
+## Checklist
+
+- [ ] Definire una sola domanda civica, chiara e misurabile
+- [ ] Compilare `dataset.yml` con `dataset.name`, `dataset.years` e `root`
+- [ ] Verificare che i path in config siano root-relative POSIX
+- [ ] Confermare la struttura canonica `sql/clean.sql` e `sql/mart/<table>.sql`
+- [ ] Allineare ruoli e ownership con `docs/lab_links.md`
+- [ ] Verificare che `tests/test_contract.py` sia verde in locale
+
+## Output atteso
+
+Progetto inizializzato con contratto di base valido e documentazione minima pronta per il source onboarding.
+
+## File da toccare
+
+- `dataset.yml`
+- `README.md`
+- `docs/lab_links.md`
+
+## Acceptance criteria
+
+- `dataset.yml` esiste in root ed e coerente con il contratto smoke reale
+- il perimetro del progetto e documentato in modo comprensibile
+- `pytest tests/test_contract.py` passa
+- il progetto puo passare alla fase Source onboarding senza ambiguita su nome dataset, anni e scope
diff --git a/.github/seed-issues/01_civic_questions.md b/.github/seed-issues/01_civic_questions.md
new file mode 100644
index 0000000..9487f1f
--- /dev/null
+++ b/.github/seed-issues/01_civic_questions.md
@@ -0,0 +1,42 @@
+---
+title: "[Questions] Esplicitare domande civiche, metriche e ipotesi iniziali"
+labels: ["METODO", "LEAD"]
+assignees: []
+---
+## Perche questa fase conta
+
+Un progetto dati utile nasce da domande chiare, non da una pipeline costruita nel vuoto.
+Questa fase serve a collegare il lavoro tecnico a un problema pubblico leggibile.
+
+## Output visibile al pubblico
+
+Un set di domande guida che spiega cosa il progetto prova a capire e con quali misure.
+
+## Obiettivo
+
+Definire le domande civiche che guideranno fonti, metriche, unità di analisi e output del progetto.
+
+## Checklist
+
+- [ ] 3-5 domande civiche esplicite
+- [ ] 3-5 metriche collegate
+- [ ] Ipotesi dichiarate
+- [ ] Coerenza con unità di analisi
+
+## Output atteso
+
+Una base pubblica e metodologica chiara da cui far discendere le scelte su fonti, CLEAN, MART e output finali.
+
+## File da toccare
+
+- `README.md`
+- `docs/overview.md`
+- `docs/lab_links.md`
+- `docs/decisions.md`
+
+## Acceptance criteria
+
+- le domande guida sono scritte in linguaggio semplice
+- le metriche proposte sono coerenti con le domande
+- le ipotesi iniziali sono esplicitate
+- c'e coerenza fra domande, metriche e unità di analisi
diff --git a/.github/seed-issues/01_setup.md b/.github/seed-issues/01_setup.md
deleted file mode 100644
index b57ede7..0000000
--- a/.github/seed-issues/01_setup.md
+++ /dev/null
@@ -1,17 +0,0 @@
----
-title: "M1-01 — Setup progetto (repo, ruoli, board, convenzioni)"
-labels: ["LEAD"]
-assignees: []
----
-## 🎯 Obiettivo
-Rendere il progetto operabile in modo replicabile.
-
-## ✅ Task
-- [ ] Verificare struttura cartelle (data/, notebooks/, docs/, src/ se serve)
-- [ ] Collegare repo alla Board (Roadmap/Open) e creare views minime
-- [ ] Definire ruoli (Data/Method/Viz/QA/Doc) e owners
-- [ ] Allineare naming: dataset_id, paths, convenzioni file
-- [ ] Inserire link a WORKFLOW / CONTRIBUTING / GOVERNANCE (Lab)
-
-## 📦 Output atteso
-Repo pronta per partire con la pipeline e PR cadence.
\ No newline at end of file
diff --git a/.github/seed-issues/02_dataset_scoping.md b/.github/seed-issues/02_dataset_scoping.md
deleted file mode 100644
index 661e846..0000000
--- a/.github/seed-issues/02_dataset_scoping.md
+++ /dev/null
@@ -1,17 +0,0 @@
----
-title: "M1-02 — Scoping dataset (fonte, perimetro, limiti, chiavi)"
-labels: ["METODO"]
-assignees: []
----
-## 🎯 Obiettivo
-Mettere nero su bianco cosa stiamo usando e cosa NON stiamo facendo.
-
-## ✅ Task
-- [ ] Link fonte ufficiale + eventuali mirror
-- [ ] Definire unità di analisi (comune/provincia/anno/voce ecc.)
-- [ ] Identificare chiavi e campi core (ID, anno, territorio, misura)
-- [ ] Elencare limiti noti (mancanti, revisioni, definizioni ambigue)
-- [ ] Definire assunzioni minime (es. normalizzazioni, conversioni)
-
-## 📦 Output atteso
-Bozza `docs/dataset.md` o sezione README con scoping chiaro.
\ No newline at end of file
diff --git a/.github/seed-issues/02_sources.md b/.github/seed-issues/02_sources.md
new file mode 100644
index 0000000..2373e62
--- /dev/null
+++ b/.github/seed-issues/02_sources.md
@@ -0,0 +1,43 @@
+---
+title: "[Sources] Onboarding fonti, licenza, refresh e decisioni di ingestione"
+labels: ["DATA", "METODO"]
+assignees: []
+---
+## Perche questa fase conta
+
+Se la fonte non e chiara, tutto il resto del progetto diventa fragile.
+Questa fase serve a capire da dove arrivano i dati e con quali limiti.
+
+## Output visibile al pubblico
+
+Una scheda semplice delle fonti usate, con link e note di contesto.
+
+## Obiettivo
+
+Qualificare la fonte e codificare in modo riproducibile come il toolkit deve leggerla.
+
+## Checklist
+
+- [ ] Identificare fonte primaria, URL canonico e frequenza di aggiornamento
+- [ ] Aggiornare `raw.source.type` e `raw.source.args` in `dataset.yml`
+- [ ] Documentare licenza, coverage, refresh cadence e note in `docs/sources.md`
+- [ ] Registrare trade-off e assunzioni di ingestione in `docs/decisions.md`
+- [ ] Verificare che non esistano path assoluti o riferimenti locali
+- [ ] Rieseguire i contract tests
+
+## Output atteso
+
+Fonte verificata e configurata in `dataset.yml`, con documentazione sufficiente per procedere al layer RAW.
+
+## File da toccare
+
+- `dataset.yml`
+- `docs/sources.md`
+- `docs/decisions.md`
+
+## Acceptance criteria
+
+- la fonte e verificabile e documentata
+- `raw.source` e compilato con campi sufficienti all'esecuzione
+- `docs/sources.md` contiene note su licenza, refresh e limiti noti
+- `pytest tests/test_contract.py` passa
diff --git a/.github/seed-issues/03_raw.md b/.github/seed-issues/03_raw.md
new file mode 100644
index 0000000..fd27aa8
--- /dev/null
+++ b/.github/seed-issues/03_raw.md
@@ -0,0 +1,42 @@
+---
+title: "[RAW] Rendere riproducibile l'ingestione RAW con il toolkit"
+labels: ["DATA"]
+assignees: []
+---
+## Perche questa fase conta
+
+Questa e la base del progetto: se il dato in ingresso non e stabile o tracciabile, anche le analisi finali diventano deboli.
+
+## Output visibile al pubblico
+
+Una fonte acquisita in modo ripetibile, con traccia di cosa e stato usato e quando.
+
+## Obiettivo
+
+Ottenere un layer RAW eseguibile e ripetibile, senza committare output in repo.
+
+## Checklist
+
+- [ ] Verificare `raw.source` e eventuale extractor in `dataset.yml`
+- [ ] Eseguire `toolkit run raw --config dataset.yml --year <year>`
+- [ ] Eseguire `toolkit validate --config dataset.yml --year <year>` oppure documentare il blocco
+- [ ] Controllare metadata, manifest e validation report del RAW
+- [ ] Confermare che `data/` non contenga output committati
+- [ ] Aggiornare `docs/decisions.md` con eventuali eccezioni o failure modes
+
+## Output atteso
+
+RAW eseguibile con report minimi di validazione e metadata disponibili negli artifact di run.
+
+## File da toccare
+
+- `dataset.yml`
+- `docs/decisions.md`
+- `docs/sources.md`
+
+## Acceptance criteria
+
+- il run RAW completa o il blocco e documentato in modo riproducibile
+- nessun output RAW viene aggiunto sotto `data/`
+- gli artifact minimi del RAW sono attesi sotto `_runs/`
+- il progetto puo passare a CLEAN con input RAW deterministico
diff --git a/.github/seed-issues/03_raw_ingestion.md b/.github/seed-issues/03_raw_ingestion.md
deleted file mode 100644
index 5e50dfa..0000000
--- a/.github/seed-issues/03_raw_ingestion.md
+++ /dev/null
@@ -1,17 +0,0 @@
----
-title: "M1-03 — RAW ingestion (download, snapshot, metadata)"
-labels: ["DATA"]
-assignees: []
----
-## 🎯 Obiettivo
-Acquisire RAW in modo ripetibile e tracciabile.
-
-## ✅ Task
-- [ ] Definire path RAW standard (Drive/FS) + naming files
-- [ ] Script/notebook per download o import (no trasformazioni)
-- [ ] Salvare metadata: url, timestamp, checksum/size, note versione
-- [ ] Gestire encoding e separatore (senza “pulire” i dati)
-- [ ] Log minimo (righe, colonne, errori)
-
-## 📦 Output atteso
-RAW disponibile + manifest/metadata (anche solo JSON/MD).
\ No newline at end of file
diff --git a/.github/seed-issues/04_clean.md b/.github/seed-issues/04_clean.md
new file mode 100644
index 0000000..d6a3ba4
--- /dev/null
+++ b/.github/seed-issues/04_clean.md
@@ -0,0 +1,46 @@
+---
+title: "[CLEAN] Implementare normalizzazione, required columns e validazioni CLEAN"
+labels: ["DATA"]
+assignees: []
+---
+## Perche questa fase conta
+
+Qui il dato diventa davvero leggibile e confrontabile.
+Una buona fase CLEAN riduce errori, ambiguita e lavoro manuale futuro.
+
+## Output visibile al pubblico
+
+Un dataset piu chiaro, con colonne coerenti e significato documentato.
+
+## Obiettivo
+
+Portare il dataset da RAW a CLEAN con SQL esplicita, schema documentato e validazioni minime.
+
+## Checklist
+
+- [ ] Implementare o aggiornare `sql/clean.sql`
+- [ ] Allineare `clean.read`, `clean.required_columns` e `clean.validate` in `dataset.yml`
+- [ ] Verificare chiavi logiche, `not_null`, `min_rows` e duplicati
+- [ ] Eseguire `toolkit run clean --config dataset.yml --year <year>`
+- [ ] Eseguire `toolkit validate --config dataset.yml --year <year>`
+- [ ] Aggiornare `docs/data_dictionary.md` per il layer CLEAN
+- [ ] Loggare assunzioni e mapping in `docs/decisions.md`
+
+## Output atteso
+
+Layer CLEAN riproducibile, con schema e regole di validazione sufficienti per alimentare i mart.
+
+## File da toccare
+
+- `sql/clean.sql`
+- `dataset.yml`
+- `docs/data_dictionary.md`
+- `docs/decisions.md`
+
+## Acceptance criteria
+
+- `sql/clean.sql` legge da `raw_input`
+- `clean.required_columns` e aggiornato
+- `clean.validate` copre almeno chiavi, `not_null` e `min_rows` quando applicabile
+- rowcount sanity e duplicate check sono stati eseguiti
+- il progetto puo passare a MART senza ambiguita sullo schema CLEAN
diff --git a/.github/seed-issues/04_raw_to_clean.md b/.github/seed-issues/04_raw_to_clean.md
deleted file mode 100644
index 60a20cc..0000000
--- a/.github/seed-issues/04_raw_to_clean.md
+++ /dev/null
@@ -1,17 +0,0 @@
----
-title: "M1-04 — RAW → CLEAN (standard colonne, parsing, validazioni base)"
-labels: ["DATA"]
-assignees: []
----
-## 🎯 Obiettivo
-Produrre CLEAN coerente multi-anno e pronto per analisi.
-
-## ✅ Task
-- [ ] Standardizzare nomi colonne (snake_case) + dizionario
-- [ ] Parsing numeri IT (., , , %, -, null)
-- [ ] Tipi coerenti (string/int/float/date) e regole di casting
-- [ ] Gestire valori speciali (n.d., 0, vuoti) con policy esplicita
-- [ ] Validazioni base: righe attese, colonne obbligatorie, duplicati chiave
-
-## 📦 Output atteso
-File CLEAN (parquet/csv) + mini data dictionary.
\ No newline at end of file
diff --git a/.github/seed-issues/05_clean_to_mart.md b/.github/seed-issues/05_clean_to_mart.md
deleted file mode 100644
index 377a9fb..0000000
--- a/.github/seed-issues/05_clean_to_mart.md
+++ /dev/null
@@ -1,17 +0,0 @@
----
-title: "M1-05 — CLEAN → MART (metriche, aggregazioni, modelli per dashboard)"
-labels: ["DATA"]
-assignees: []
----
-## 🎯 Obiettivo
-Creare dataset MART orientato a domande e dashboard.
-
-## ✅ Task
-- [ ] Definire “domande base” (3–5) che il MART deve supportare
-- [ ] Selezionare metriche e dimensioni (tempo, territorio, categoria)
-- [ ] Creare tabelle MART (fact + dimensioni se serve) o dataset wide
-- [ ] Documentare formule (KPI, percentuali, normalizzazioni)
-- [ ] Export su Drive / BigQuery (se previsto) con naming standard
-
-## 📦 Output atteso
-MART pronto per Looker/PowerBI + note KPI.
\ No newline at end of file
diff --git a/.github/seed-issues/05_mart.md b/.github/seed-issues/05_mart.md
new file mode 100644
index 0000000..9f8574e
--- /dev/null
+++ b/.github/seed-issues/05_mart.md
@@ -0,0 +1,44 @@
+---
+title: "[MART] Costruire tabelle analitiche e regole di validazione MART"
+labels: ["DATA", "METODO"]
+assignees: []
+---
+## Perche questa fase conta
+
+Qui il progetto inizia a produrre risposte utilizzabili.
+I mart sono la parte che alimenta analisi, dashboard e insight condivisibili.
+
+## Output visibile al pubblico
+
+Tabelle finali leggibili, da cui ricavare indicatori e confronti.
+
+## Obiettivo
+
+Produrre uno o piu mart orientati a KPI e output finali, con tabella/e e validation rules esplicite.
+
+## Checklist
+
+- [ ] Creare o aggiornare `sql/mart/<table>.sql` per ogni tabella dichiarata
+- [ ] Allineare `mart.tables`, `mart.required_tables` e `mart.validate` in `dataset.yml`
+- [ ] Eseguire `toolkit run mart --config dataset.yml --year <year>`
+- [ ] Eseguire `toolkit validate --config dataset.yml --year <year>`
+- [ ] Verificare required columns, chiavi, `not_null`, `min_rows` e KPI sanity
+- [ ] Aggiornare `docs/data_dictionary.md` con granularita, KPI e semantica dei mart
+
+## Output atteso
+
+Mart pronti per dashboard o report, con SQL separata per tabella e regole di validazione chiare.
+
+## File da toccare
+
+- `sql/mart/project_summary.sql`
+- `dataset.yml`
+- `docs/data_dictionary.md`
+- `docs/decisions.md`
+
+## Acceptance criteria
+
+- ogni tabella dichiarata in `mart.tables[]` ha un file SQL dedicato
+- `mart.required_tables` e `mart.validate.table_rules` sono coerenti con le tabelle pubblicate
+- rowcount sanity e duplicate check sono stati eseguiti
+- il progetto puo passare a QA con mart leggibili e validabili
diff --git a/.github/seed-issues/06_qa_checks.md b/.github/seed-issues/06_qa_checks.md
deleted file mode 100644
index 65423ab..0000000
--- a/.github/seed-issues/06_qa_checks.md
+++ /dev/null
@@ -1,17 +0,0 @@
----
-title: "M1-06 — QA (controlli qualità, coerenza, regressioni)"
-labels: ["QA"]
-assignees: []
----
-## 🎯 Obiettivo
-Evitare che il progetto “sembri ok” ma sia sbagliato.
-
-## ✅ Task
-- [ ] Check coerenza per anno/territorio (buchi, outlier grossi)
-- [ ] Check somme e percentuali (range 0–100, totali coerenti)
-- [ ] Confronto RAW vs CLEAN: righe perse/aggiunte spiegate
-- [ ] Regole di regressione (se rifacciamo pipeline, cosa non deve cambiare)
-- [ ] Output QA: report breve (MD) con esito e anomalie note
-
-## 📦 Output atteso
-`docs/qa.md` + checklist QA riusabile per dataset futuri.
\ No newline at end of file
diff --git a/.github/seed-issues/06_validation_qa.md b/.github/seed-issues/06_validation_qa.md
new file mode 100644
index 0000000..564aca9
--- /dev/null
+++ b/.github/seed-issues/06_validation_qa.md
@@ -0,0 +1,44 @@
+---
+title: "[QA] Chiudere validazioni, contract tests e smoke opzionale"
+labels: ["QA", "DATA"]
+assignees: []
+---
+## Perche questa fase conta
+
+E il momento in cui si verifica se il progetto regge davvero.
+Serve a evitare output convincenti ma fragili o poco affidabili.
+
+## Output visibile al pubblico
+
+Un dataset piu affidabile, con controlli espliciti e anomalie residue tracciate.
+
+## Obiettivo
+
+Chiudere il gate tecnico di qualita con contract tests verdi, validazioni dataset e smoke opzionale documentato.
+
+## Checklist
+
+- [ ] Eseguire `pytest tests/test_contract.py`
+- [ ] Verificare che la CI `contract` sia verde
+- [ ] Verificare che la CI `smoke` sia documentata e attivabile con `RUN_SMOKE=1`
+- [ ] Rieseguire `toolkit validate --config dataset.yml --year <year>` se disponibile
+- [ ] Controllare outlier, rowcount sanity, duplicates e coerenza dei KPI
+- [ ] Aprire issue residue per anomalie non bloccanti
+
+## Output atteso
+
+Gate QA superato, con standard minimo del Lab rispettato e stato di qualita esplicito.
+
+## File da toccare
+
+- `tests/test_contract.py`
+- `.github/workflows/ci.yml`
+- `README.md`
+- `docs/decisions.md`
+
+## Acceptance criteria
+
+- i contract tests passano
+- il job `contract` in CI e sempre eseguibile senza dipendere da toolkit su PyPI
+- il job `smoke` resta opzionale e documentato
+- le anomalie residue sono documentate o trasformate in issue
diff --git a/.github/seed-issues/07_dashboard.md b/.github/seed-issues/07_dashboard.md
new file mode 100644
index 0000000..c7b6e4e
--- /dev/null
+++ b/.github/seed-issues/07_dashboard.md
@@ -0,0 +1,42 @@
+---
+title: "[Release] Dashboard o output pubblico collegato ai MART"
+labels: ["VIZ", "OPTIONAL"]
+assignees: []
+---
+## Perche questa fase conta
+
+E la fase in cui il lavoro diventa leggibile anche fuori dal team tecnico.
+Serve a trasformare tabelle finali in un output che aiuti davvero chi legge.
+
+## Output visibile al pubblico
+
+Una dashboard, un report o una pagina che spiega cosa emerge dai dati.
+
+## Obiettivo
+
+Preparare un output pubblico che consumi i mart prodotti dal progetto.
+
+## Checklist
+
+- [ ] Identificare il mart sorgente e i KPI che alimentano l'output
+- [ ] Documentare limiti, assunzioni e ultimo aggiornamento in `dashboard/README.md`
+- [ ] Verificare coerenza fra dashboard e `docs/data_dictionary.md`
+- [ ] Esplicitare cosa emerge e cosa non emerge dall'output
+- [ ] Collegare l'output nel `README.md`
+
+## Output atteso
+
+Dashboard, report o pagina pubblica leggibile e coerente con i mart del progetto.
+
+## File da toccare
+
+- `dashboard/README.md`
+- `README.md`
+- `docs/data_dictionary.md`
+
+## Acceptance criteria
+
+- l'output usa solo mart documentati
+- KPI e limiti sono spiegati in modo leggibile
+- il collegamento con il dataset rilasciato e tracciabile
+- l'issue non blocca la release del dataset se l'output pubblico non e previsto
diff --git a/.github/seed-issues/07_viz_dashboard.md b/.github/seed-issues/07_viz_dashboard.md
deleted file mode 100644
index 8cd5cad..0000000
--- a/.github/seed-issues/07_viz_dashboard.md
+++ /dev/null
@@ -1,17 +0,0 @@
----
-title: "M1-07 — Visualizzazione (dashboard MVP + scelte di comunicazione)"
-labels: ["VIZ"]
-assignees: []
----
-## 🎯 Obiettivo
-Trasformare il MART in una dashboard MVP utile e condivisibile.
-
-## ✅ Task
-- [ ] Definire pubblico target + 1 frase “cosa ci portiamo a casa”
-- [ ] Wireframe: 1 pagina (KPI, trend, mappa/tabella, filtri)
-- [ ] Scelte viz: colori, scale, note interpretative, unità misura
-- [ ] Implementazione MVP (Looker/PowerBI/altro) con filtri minimi
-- [ ] Sezione “Come leggere” (3 bullet) + limiti dati
-
-## 📦 Output atteso
-Link dashboard + screenshot + note di lettura.
\ No newline at end of file
diff --git a/.github/seed-issues/08_docs_release.md b/.github/seed-issues/08_docs_release.md
deleted file mode 100644
index 43b7f6e..0000000
--- a/.github/seed-issues/08_docs_release.md
+++ /dev/null
@@ -1,17 +0,0 @@
----
-title: "M1-08 — Documentazione & Release (README, riproducibilità, share)"
-labels: ["DOCS"]
-assignees: []
----
-## 🎯 Obiettivo
-Rendere il progetto replicabile e “pubblicabile” in 10 minuti.
-
-## ✅ Task
-- [ ] README: obiettivo, dataset, output, come eseguire
-- [ ] Struttura “metodo”: assunzioni, limiti, scelte, non-coperto
-- [ ] Link a notebook/script in ordine di esecuzione
-- [ ] Dataset catalog (input/output paths) + versione
-- [ ] Tag/release v0.1 + changelog breve (cosa c’è / cosa manca)
-
-## 📦 Output atteso
-Repo presentabile + release v0.1 pronta per essere condivisa.
\ No newline at end of file
diff --git a/.github/seed-issues/08_release.md b/.github/seed-issues/08_release.md
new file mode 100644
index 0000000..be37dd2
--- /dev/null
+++ b/.github/seed-issues/08_release.md
@@ -0,0 +1,57 @@
+---
+title: "[Release] Preparare release del dataset, README e artifacts minimi"
+labels: ["DOCS", "LEAD"]
+assignees: []
+---
+## Perche questa fase conta
+
+Questa fase rende il progetto condivisibile anche con chi arriva da fuori.
+Una buona release rende chiaro cosa esiste, cosa si puo usare e con quali limiti.
+
+## Output visibile al pubblico
+
+Una homepage chiara, una overview leggibile e una release spiegata bene.
+
+## Obiettivo
+
+Portare il progetto a una release riproducibile, spiegabile e pronta per handoff.
+
+## Checklist
+
+- [ ] Aggiornare `README.md` con scopo, metodo, output e limiti
+- [ ] Verificare `docs/lab_links.md` per release policy, DoD e riferimenti Lab-wide
+- [ ] Confermare che `output.artifacts` resti su `minimal` o motivare eccezioni
+- [ ] Collegare eventuale dashboard o report ai mart corretti
+- [ ] Verificare che documentazione e artifact minimi siano coerenti
+- [ ] Preparare PR o release notes interne
+
+## Public-facing readiness
+
+- [ ] README pubblico-first ok
+- [ ] `docs/overview.md` presente
+- [ ] almeno 3 domande o insight presenti nel README
+- [ ] `docs/data_dictionary.md` aggiornato almeno al minimo
+
+## Civic clarity check
+
+- [ ] Le domande guida sono ancora rilevanti?
+- [ ] Le metriche rispondono davvero alle domande?
+- [ ] Ci sono insight scritti in linguaggio semplice?
+
+## Output atteso
+
+Release interna o pubblica con documentazione finale coerente con i dati e con i mart prodotti.
+
+## File da toccare
+
+- `README.md`
+- `docs/overview.md`
+- `docs/lab_links.md`
+- `docs/data_dictionary.md`
+
+## Acceptance criteria
+
+- README e docs descrivono chiaramente cosa e stato rilasciato
+- il progetto e riprendibile da terzi senza conoscenza implicita
+- gli artifact minimi attesi sono coerenti con la policy del template
+- i gate precedenti sono chiusi o le eccezioni sono documentate
diff --git a/.github/seed-issues/09_docs_decisions_dictionary.md b/.github/seed-issues/09_docs_decisions_dictionary.md
new file mode 100644
index 0000000..1a56619
--- /dev/null
+++ b/.github/seed-issues/09_docs_decisions_dictionary.md
@@ -0,0 +1,44 @@
+---
+title: "[Docs] Aggiornare decision log e data dictionary del progetto"
+labels: ["DOCS", "METODO"]
+assignees: []
+---
+## Perche questa fase conta
+
+Un progetto dati e utile solo se altre persone riescono a capirlo e riprenderlo.
+Questa fase rende visibili scelte, significato dei campi e limiti.
+
+## Output visibile al pubblico
+
+Documentazione leggibile che spiega cosa significano i dati e come interpretarli.
+
+## Obiettivo
+
+Tenere allineata la documentazione strutturata del dataset durante tutto il lifecycle del progetto.
+
+## Checklist
+
+- [ ] Aggiornare `docs/decisions.md` con decisioni, eccezioni e trade-off
+- [ ] Aggiornare `docs/data_dictionary.md` per RAW, CLEAN e MART
+- [ ] Verificare coerenza con `docs/sources.md`
+- [ ] Verificare coerenza con `docs/lab_links.md`
+- [ ] Allineare esempi e naming in README e workflow dataset
+
+## Output atteso
+
+Decision log e data dictionary completi, utili per review, handoff e manutenzione futura.
+
+## File da toccare
+
+- `docs/decisions.md`
+- `docs/data_dictionary.md`
+- `docs/sources.md`
+- `docs/lab_links.md`
+- `README.md`
+
+## Acceptance criteria
+
+- il decision log spiega le scelte non ovvie
+- il data dictionary descrive i campi essenziali di CLEAN e MART
+- la documentazione e coerente con `dataset.yml` e con la SQL canonica
+- il progetto puo passare review metodologica senza knowledge transfer verbale
diff --git a/.github/seed-issues/10_maintenance.md b/.github/seed-issues/10_maintenance.md
new file mode 100644
index 0000000..c1df356
--- /dev/null
+++ b/.github/seed-issues/10_maintenance.md
@@ -0,0 +1,46 @@
+---
+title: "[Maintenance] Gestire nuove annualita, cambi schema e regressioni"
+labels: ["DATA", "MAINTENANCE"]
+assignees: []
+---
+## Perche questa fase conta
+
+I dataset non restano fermi: cambiano fonti, anni, regole e definizioni.
+Questa fase serve a mantenere il progetto utile anche dopo la prima release.
+
+## Output visibile al pubblico
+
+Un progetto che resta aggiornabile e non si rompe al primo cambiamento di fonte.
+
+## Obiettivo
+
+Definire il lavoro necessario per mantenere il dataset nel tempo quando cambiano fonte, schema o regole.
+
+## Checklist
+
+- [ ] Aggiornare `dataset.yml` per nuove annualita o sorgenti
+- [ ] Verificare impatto su `sql/clean.sql` e `sql/mart/*.sql`
+- [ ] Rieseguire contract tests
+- [ ] Rieseguire smoke opzionale in caso di cambio sostanziale
+- [ ] Aggiornare `docs/sources.md`, `docs/data_dictionary.md` e `docs/decisions.md`
+- [ ] Documentare regressioni o incompatibilita
+
+## Output atteso
+
+Piano di manutenzione chiaro e procedimento ripetibile per evolvere il dataset senza rompere il contratto del template.
+
+## File da toccare
+
+- `dataset.yml`
+- `sql/clean.sql`
+- `sql/mart/project_summary.sql`
+- `docs/sources.md`
+- `docs/data_dictionary.md`
+- `docs/decisions.md`
+
+## Acceptance criteria
+
+- i cambi sono tracciati nei documenti corretti
+- i contract tests restano verdi
+- la manutenzione non introduce path assoluti o output committati in `data/`
+- il progetto puo essere rieseguito per un nuovo anno o schema senza lavoro manuale implicito
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..a78e7ee
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,89 @@
+name: CI
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+  workflow_dispatch:
+    inputs:
+      run_smoke:
+        description: "Run optional smoke e2e job"
+        required: false
+        type: boolean
+        default: false
+
+env:
+  PYTHON_VERSION: "3.11"
+  TOOLKIT_PIP_PACKAGE: ""
+  TOOLKIT_BIN: "toolkit"
+
+jobs:
+  contract:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+
+      - name: Install contract test dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install pytest pyyaml
+
+      - name: Verify required files
+        run: |
+          test -f dataset.yml
+          test -f sql/clean.sql
+          test -f sql/mart/project_summary.sql
+          test -f scripts/smoke.sh
+
+      - name: Run contract tests
+        run: pytest tests/test_contract.py
+
+  smoke:
+    if: ${{ vars.RUN_SMOKE == '1' || (github.event_name == 'workflow_dispatch' && inputs.run_smoke) }}
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+
+      - name: Install smoke dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install pytest pyyaml
+          if [ -n "${TOOLKIT_PIP_PACKAGE}" ]; then
+            python -m pip install "${TOOLKIT_PIP_PACKAGE}"
+          elif [ -d "./toolkit" ]; then
+            python -m pip install -e "./toolkit"
+          elif [ -d "../toolkit" ]; then
+            python -m pip install -e "../toolkit"
+          else
+            echo "Unable to install toolkit: set TOOLKIT_PIP_PACKAGE or provide ./toolkit or ../toolkit" >&2
+            exit 1
+          fi
+
+      - name: Export DCL_ROOT
+        run: echo "DCL_ROOT=${GITHUB_WORKSPACE}" >> "${GITHUB_ENV}"
+
+      - name: Smoke run raw clean mart validate
+        run: sh scripts/smoke.sh
+
+      - name: Upload minimal artifacts
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: toolkit-artifacts
+          path: |
+            _runs/**/*.json
+            _runs/**/logs/**
+          if-no-files-found: warn
diff --git a/.gitignore b/.gitignore
index 860a1f2..2d1ba3f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,21 +1,24 @@
 # =========================
-# 🔒 DATI (NON VERSIONARE)
+# DATI (NON VERSIONARE)
 # =========================
-data/raw/**
-data/clean/**
-data/mart/**
+data/**
+!data/**/
 !data/**/README.md
+!data/_examples/
+!data/_examples/**
 
-# eventuali export temporanei
-*.csv
-*.tsv
-*.xlsx
-*.xls
-*.parquet
-*.json
+# output di run e artifact locali
+_runs/
+_runs/**
+!_runs/.gitkeep
+
+# eventuali export temporanei generati dal toolkit
+_runs/**/*.csv
+_runs/**/*.tsv
+_runs/**/*.parquet
 
 # =========================
-# 🧠 NOTEBOOK / PYTHON
+# NOTEBOOK / PYTHON
 # =========================
 __pycache__/
 *.py[cod]
@@ -26,17 +29,19 @@ venv/
 .envrc
 
 # =========================
-# 🧪 TOOL / TEMP
+# TOOL / TEMP
 # =========================
 .tmp/
 .tmp/**
+_tmp/
+_tmp/**
 .cache/
 .cache/**
 logs/
 *.log
 
 # =========================
-# 💻 IDE / EDITOR
+# IDE / EDITOR
 # =========================
 .vscode/*
 !.vscode/settings.json
@@ -46,14 +51,14 @@ logs/
 *.code-workspace
 
 # =========================
-# 🖥️ OS
+# OS
 # =========================
 .DS_Store
 Thumbs.db
 desktop.ini
 
 # =========================
-# 🔑 CREDENTIALS (MAI!)
+# CREDENTIALS (MAI)
 # =========================
 credentials.json
 service_account.json
@@ -61,13 +66,13 @@ service_account.json
 *.pem
 
 # =========================
-# 📦 ARCHIVI
+# ARCHIVI
 # =========================
 *.zip
 *.tar
 *.gz
 
 # =========================
-# 🔍 ALTRO
+# ALTRO
 # =========================
 nohup.out
diff --git a/README.md b/README.md
index 3dd4bed..f5e87c7 100644
--- a/README.md
+++ b/README.md
@@ -1,39 +1,109 @@
-# 📊 Titolo del progetto
+# 📊 [Nome dataset] — DataCivicLab
 
-## Domanda civica
-(Una sola frase, chiara, misurabile)
+Questo progetto analizza **[fenomeno pubblico]** per rispondere a una domanda semplice:
+**cosa sta succedendo, dove e con quali differenze nel tempo?**
 
-## Perché questo progetto
-(2–3 righe: perché questa domanda è rilevante ora)
+È pensato per chi vuole orientarsi in fretta:
+capire cosa mostrano i dati, dove sono solidi, quali limiti hanno e quali domande aiutano ad approfondire.
 
-## Ruoli
-- Project Lead:
-- Data:
-- Metodo:
-- Viz:
-- QA:
-- Docs:
+* **Stato:** [alpha | beta | stable]
+* **Copertura:** [anni], [territorio]
+* **Unità di analisi:** [Comune / ASL / Provincia / …]
 
-## Dataset utilizzati
-- Nome dataset — Fonte ufficiale — Periodo — Livello
-- Nome dataset — Fonte ufficiale — Periodo — Livello
+## 🎯 La domanda civica
 
-## Output pubblico
-- Tipo: (dashboard / report / pagina)
-- Link: (quando disponibile)
+**[Scrivi qui la domanda chiave in una frase chiara.]**
 
-## Stato progetto
-🟢 Attivo | 🟡 In revisione | 🔴 Bloccato | ✅ Chiuso
+Esempio:
 
-## Come si contribuisce
+* Come varia [fenomeno] tra territori?
+* Dove si osservano miglioramenti o peggioramenti?
+* Il mio territorio è sopra o sotto la media?
 
-1. **Discussion** per idee / contesto
-2. **Issue** per task concreti
-3. **Branch** per lavorare
-4. **Pull Request** per revisione e merge
+## 🔎 Cosa puoi capire con questi dati
 
-Dettagli in `WORKFLOW.md`.
+* come cambia il fenomeno nel tempo
+* quali territori mostrano differenze significative
+* se il tuo territorio è sopra o sotto la media
+* se emergono anomalie o salti improvvisi
+* quali aree meritano un approfondimento mirato
 
-## Link utili
-- Spreadsheet progetto ([Template](https://docs.google.com/spreadsheets/d/17EmTUVLzimppd70kckX2r2UxPQIwGDMU_Fzsqec4idg/edit?gid=1775469119#gid=1775469119))
-- [Metodo DataCivicLab](https://github.com/dataciviclab/dataciviclab/blob/main/METHOD.md)
\ No newline at end of file
+Non è solo un dataset: è una base per confronto e monitoraggio.
+
+## 📦 Output disponibili
+
+Le tabelle finali sono pronte per dashboard, grafici e analisi.
+
+* `mart.[tabella_1]` — confronti territoriali o temporali
+* `mart.[tabella_2]` — indicatori sintetici, ranking o riepiloghi
+
+Definizioni dettagliate di colonne e metriche:
+👉 `docs/data_dictionary.md`
+
+
+## ✅ Perché fidarsi
+
+La fiducia si costruisce su trasparenza e metodo.
+
+* fonti ufficiali o verificabili (`docs/sources.md`)
+* trasformazioni documentate (`docs/decisions.md`)
+* controlli automatici prima della pubblicazione
+* standard condivisi del DataCivicLab
+
+Ogni scelta che cambia il significato dei dati viene esplicitata.
+
+
+## 💬 Partecipa
+
+Questo repository distingue chiaramente:
+
+* **Discussions** → domande civiche, interpretazioni, proposte di metriche
+* **Issues** → bug, problemi tecnici, miglioramenti della pipeline
+
+Se non sei tecnico, parti da una **Discussion**:
+spiega il contesto, il territorio o l’anno che ti interessa e cosa vuoi capire.
+
+
+## 📚 Documentazione del dataset
+
+* `docs/overview.md` — contesto, copertura, limiti
+* `docs/sources.md` — fonti ufficiali
+* `docs/data_dictionary.md` — colonne e metriche
+* `docs/decisions.md` — scelte progettuali
+* `docs/contributing.md` — come contribuire
+
+
+## 🧭 Roadmap
+
+La roadmap è gestita con **issue + milestone**.
+
+
+## 🔁 Clonabilità
+
+Questo repository è un modello per progetti dataset DataCivicLab.
+
+Per adattarlo a un nuovo dataset:
+
+1. aggiorna la domanda civica e gli esempi di insight
+2. sostituisci fonti, copertura e unità di analisi
+3. definisci metriche e tabelle finali
+4. documenta le decisioni specifiche del dataset
+
+La struttura resta invariata.
+
+
+## 🧪 Esecuzione tecnica (per contributor)
+
+```bash
+pip install dataciviclab-toolkit
+toolkit run --dataset dataset.yml
+```
+
+Per dettagli tecnici (CLI, configurazione, validazioni, run metadata)
+vedi il repository **Toolkit DataCivicLab**.
+
+
+## 🌍 DataCivicLab
+
+Parte del progetto DataCivicLab.
+Costruiamo infrastruttura open per analisi pubbliche riproducibili.
diff --git a/WORKFLOW.md b/WORKFLOW.md
index 4ca385e..ac62d54 100644
--- a/WORKFLOW.md
+++ b/WORKFLOW.md
@@ -1,116 +1,22 @@
-# 🔁 WORKFLOW – come si lavora
+# Workflow
 
-Questo documento descrive il workflow standard per lavorare su un progetto DataCivicLab.
+Come contribuire in modo semplice a un progetto dataset DataCivicLab.
 
-Obiettivo: **collaborazione semplice, asincrona, scalabile**.
+## Percorsi
 
----
+- feedback o idee: apri una Discussion o una Issue
+- avanzamento: usa gli issues e la Board
+- insight o visual: parti da `sql/` o `dashboard/` se il progetto li prevede
 
-## 🧭 Principio base
+## Dove andare
 
-```text
-Tutto parte da una Discussion.
-Tutto finisce in una Pull Request.
-```
+- setup e contributo rapido: [docs/contributing.md](docs/contributing.md)
+- standard Lab, DoD e release policy: [docs/lab_links.md](docs/lab_links.md)
+- indice docs locali: [docs/README.md](docs/README.md)
 
----
+## Flusso minimo
 
-## 1) Discussion → idee, domande, contesto
-
-Le **Discussions** servono per:
-- proporre una domanda civica
-- discutere dataset e fonti
-- fare scelte metodologiche (KPI, perimetro, definizioni)
-- allineare rapidamente il team
-
-👉 Nessun lavoro “pesante” parte senza almeno **una discussion iniziale**.
-
----
-
-## 2) Issue → task concreti
-
-Le **Issue** rappresentano lavoro reale.
-
-Una issue dovrebbe:
-- avere un obiettivo chiaro
-- essere limitata (no mega-task)
-- avere criteri di chiusura (Definition of Done)
-
-Esempi:
-- ingestione dataset X (raw)
-- pulizia e normalizzazione (clean)
-- costruzione mart + KPI base
-- prima dashboard (MVP)
-
-👉 Una issue = una cosa fatta.
-
----
-
-## 3) Branch → lavorare senza rompere `main`
-
-Ogni issue si lavora su un **branch dedicato**.
-
-Naming consigliato:
-```text
-issue-12-clean-dataset-rifiuti
-```
-
-Regole:
-- non lavorare direttamente su `main`
-- PR piccole e frequenti > PR gigante
-
----
-
-## 4) Pull Request → revisione e qualità
-
-La **Pull Request (PR)** serve a:
-- mostrare cosa è stato fatto
-- permettere review e miglioramenti
-- lasciare traccia delle decisioni
-
-Una PR è pronta quando:
-- il task è completo
-- README / docs coinvolte sono aggiornate
-- non sono stati caricati dati sul repo
-
----
-
-## 5) Review → Merge
-
-La review verifica:
-- coerenza metodologica
-- qualità dei notebook / query
-- chiarezza della documentazione
-
-Chi può approvare:
-- **Project Lead** o **Maintainer** (vedi `GOVERNANCE.md`)
-
-Dopo approvazione → merge su `main`.
-
----
-
-## 📦 Dati e Drive (importante)
-
-```text
-GitHub = codice + metodo + documentazione
-Drive  = dati
-```
-
-Nelle PR:
-- **non** caricare CSV/XLS/Parquet “pesanti”
-- inserire **link Drive** e schema/README aggiornati
-- spiegare cosa è cambiato e perché
-
----
-
-## ✅ Definition of Done (DoD)
-
-Per chiudere un task, in generale:
-- output prodotto (notebook/query/dashboard)
-- documentazione aggiornata (almeno README rilevanti)
-- link Drive inseriti correttamente
-- controlli qualità minimi eseguiti
-
-(Se vuoi una DoD più dettagliata: `docs/definition-of-done.md`.)
-
----
+1. apri una domanda, un feedback o una correzione
+2. scegli una issue o aprine una nuova
+3. lavora su branch dedicato
+4. apri una PR piccola e leggibile
diff --git a/dashboard/README.md b/dashboard/README.md
index 093711b..9512581 100644
--- a/dashboard/README.md
+++ b/dashboard/README.md
@@ -1,41 +1,16 @@
-# 📊 /dashboards – Output pubblico
+# /dashboard - Output pubblico opzionale
 
-Questa cartella raccoglie le informazioni sugli **output pubblici** del progetto (dashboard, report, mappe).
+Questa cartella raccoglie materiali per output pubblici del progetto, come dashboard, report o mappe.
+Qui vanno link, note di lettura, screenshot e limiti dell'output, non i dati.
 
-Qui non ci sono “file dati”: qui ci sono **link, screenshot e spiegazioni**.
+## Cosa inserire qui
 
----
-
-## ✅ Cosa inserire qui
-
-- **Link pubblico** alla dashboard (Looker Studio / Superset / altro)
+- link pubblico alla dashboard
 - breve descrizione dei KPI principali
 - note su come leggere i grafici
-- limiti e assunzioni (cosa NON si può dedurre)
-- data ultimo aggiornamento
-
----
-
-## 🧾 Template consigliato
-
-```text
-Tipo: Dashboard (Looker Studio)
-Link: https://...
-KPI principali:
-- ...
-Come leggere:
-- ...
-Limiti:
-- ...
-Ultimo aggiornamento: YYYY-MM-DD
-```
-
----
-
-## 🔁 Coerenza con i mart
-
-Ogni dashboard dovrebbe essere legata a uno o più dataset in `/data/mart`
-(con schema documentato e link Drive aggiornato).
+- limiti e assunzioni
+- data di ultimo aggiornamento
 
----
+## Coerenza con i mart
 
+Ogni dashboard dovrebbe essere collegata ai mart documentati e aggiornati del progetto.
diff --git a/dataset.yml b/dataset.yml
new file mode 100644
index 0000000..ecd4266
--- /dev/null
+++ b/dataset.yml
@@ -0,0 +1,70 @@
+# Toolkit-first dataset contract aligned with the current smoke suite.
+# Path policy:
+# - use POSIX separators
+# - keep every path root-relative
+# - no absolute paths
+# Root resolution:
+# 1. dataset.yml root
+# 2. DCL_ROOT
+# 3. base_dir
+
+root: "./_runs"
+
+dataset:
+  name: "project_template"
+  years: [2024]
+
+validation:
+  fail_on_error: true
+
+raw:
+  source:
+    type: "http_file"
+    args:
+      url: "https://example.org/datasets/project_template_2024.csv"
+      filename: "project_template_{year}.csv"
+
+clean:
+  sql: "sql/clean.sql"
+  required_columns:
+    - year
+    - entity_id
+    - metric_value
+  read:
+    source: config_only
+    mode: latest
+    delim: ","
+    header: true
+    encoding: "utf-8"
+    trim_whitespace: true
+    columns: null
+  validate:
+    primary_key:
+      - entity_id
+      - year
+    not_null:
+      - year
+      - entity_id
+    min_rows: 1
+
+mart:
+  required_tables:
+    - project_summary
+  tables:
+    - name: "project_summary"
+      sql: "sql/mart/project_summary.sql"
+  validate:
+    table_rules:
+      project_summary:
+        required_columns:
+          - year
+          - rows_in_year
+          - total_metric_value
+        not_null:
+          - year
+        primary_key:
+          - year
+        min_rows: 1
+
+output:
+  artifacts: minimal
diff --git a/docs/METHOD.md b/docs/METHOD.md
deleted file mode 100644
index 4ea39b5..0000000
--- a/docs/METHOD.md
+++ /dev/null
@@ -1,23 +0,0 @@
-# 🧠 Metodo del progetto
-
-## Obiettivo
-(Perché questo progetto esiste)
-
-## Assunzioni
-- Assunzione 1
-- Assunzione 2
-
-## Limiti dei dati
-- Limite 1
-- Limite 2
-
-## Scelte metodologiche
-- Perché abbiamo fatto X invece di Y
-
-## Cosa NON copre questo progetto
-(Esplicito)
-
-## Come replicare
-- Dataset
-- Query / notebook
-- Passaggi principali
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 0000000..91e5184
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,16 @@
+# Docs
+
+Questa cartella contiene i documenti locali, specifici di questo dataset.
+Per standard del Lab vedi [lab_links.md](lab_links.md).
+
+## Essenziali
+
+- [overview.md](overview.md)
+- [sources.md](sources.md)
+- [data_dictionary.md](data_dictionary.md)
+- [decisions.md](decisions.md)
+- [contributing.md](contributing.md)
+
+## Archive
+
+- [Archived Lab-wide docs](_archive/INDEX.md)
diff --git a/docs/_archive/INDEX.md b/docs/_archive/INDEX.md
new file mode 100644
index 0000000..249a970
--- /dev/null
+++ b/docs/_archive/INDEX.md
@@ -0,0 +1,4 @@
+# Archive
+
+Questa cartella raccoglie documenti storici o di riferimento che non fanno parte del set minimo del template.
+Usala solo come archivio leggero: i documenti locali correnti restano indicizzati in [../README.md](../README.md).
diff --git a/docs/contributing.md b/docs/contributing.md
new file mode 100644
index 0000000..1f9094a
--- /dev/null
+++ b/docs/contributing.md
@@ -0,0 +1,48 @@
+# Contributing
+
+Guida rapida per contribuire ai dati senza dover leggere tutta la documentazione tecnica del progetto.
+
+## Setup minimo
+
+Prerequisiti:
+
+- avere accesso al repo
+- avere Python e il toolkit disponibili nel proprio ambiente, oppure un checkout locale del toolkit
+- lavorare sempre dalla root del progetto
+
+## Contract tests
+
+Esegui sempre prima:
+
+```sh
+pytest tests/test_contract.py
+```
+
+## Smoke locale
+
+Per uno smoke test end-to-end:
+
+```sh
+sh scripts/smoke.sh 2024
+```
+
+Se il toolkit non è nel `PATH`, usa il fallback documentato nello script.
+
+## Regole veloci
+
+- non committare output sotto `data/`, salvo sample piccoli in `data/_examples`
+- aggiorna `docs/decisions.md` quando cambi scelte o trade-off
+- aggiorna `docs/data_dictionary.md` quando cambia il significato dei campi
+- usa path root-relative e POSIX nella documentazione tecnica
+
+## Come aiutare in 15 minuti
+
+- controlla che `docs/sources.md` e `docs/overview.md` siano coerenti
+- migliora una descrizione in `docs/data_dictionary.md`
+- esegui `pytest tests/test_contract.py` e segnala eventuali problemi
+
+## Poi dove vado?
+
+- workflow umano: [../WORKFLOW.md](../WORKFLOW.md)
+- docs locali: [README.md](README.md)
+- standard Lab: [lab_links.md](lab_links.md)
diff --git a/docs/data_dictionary.md b/docs/data_dictionary.md
new file mode 100644
index 0000000..dcdb30b
--- /dev/null
+++ b/docs/data_dictionary.md
@@ -0,0 +1,30 @@
+# Data Dictionary
+
+Audience: Maintainers
+
+Schema sintetico dei dataset gestiti dal progetto.
+
+## Raw
+
+| Column | Type | Description | Nullable | Notes |
+| --- | --- | --- | --- | --- |
+| year | int | Reference year | no | Example placeholder |
+| entity_id | string | Stable entity identifier | no | Replace with project key |
+| metric_value | float | Raw metric value | yes | Replace with actual meaning |
+
+## Clean
+
+| Column | Type | Description | Nullable | Validation |
+| --- | --- | --- | --- | --- |
+| year | int | Reference year | no | not_null |
+| entity_id | string | Stable entity identifier | no | not_null, unique_key |
+| metric_value | float | Normalized metric value | yes | range TBD |
+
+## Mart
+
+| Column | Type | Description | Nullable | Consumer |
+| --- | --- | --- | --- | --- |
+| year | int | Reference year | no | dashboard/report |
+| entity_id | string | Stable entity identifier | no | dashboard/report |
+| metric_value | float | Final metric | yes | dashboard/report |
+| rows_in_year | int | Record count by year | no | QA/dashboard |
diff --git a/docs/decisions.md b/docs/decisions.md
new file mode 100644
index 0000000..16928b6
--- /dev/null
+++ b/docs/decisions.md
@@ -0,0 +1,22 @@
+# Decision log - [Nome dataset]
+
+Qui registriamo scelte tecniche che cambiano il significato dei dati, come mapping, filtri o definizioni delle metriche.
+
+## D-001 - Standardizzazione chiavi territoriali
+
+- decisione: [...]
+- motivo: [...]
+- impatto: [...]
+- alternative valutate: [...]
+
+## D-002 - Policy valori mancanti
+
+- decisione: [...]
+- motivo: [...]
+- impatto: [...]
+
+## D-003 - Definizione metrica [X]
+
+- decisione: [...]
+- formula: [...]
+- motivo: [...]
diff --git a/docs/definition-of-done.md b/docs/definition-of-done.md
deleted file mode 100644
index 2a49b5b..0000000
--- a/docs/definition-of-done.md
+++ /dev/null
@@ -1,37 +0,0 @@
-# Definition of Done (DoD)
-
-Un progetto è “Done” quando:
-
-## Domanda civica
-- La domanda è una sola, chiara e misurabile
-- L’output risponde davvero alla domanda (anche con limiti espliciti)
-
-## Dati
-- Raw immutato
-- Clean/Mart riproducibili
-- Tipi coerenti e naming standard (snake_case)
-- Nessuna logica “magica” nascosta
-
-## Metodo
-- Assunzioni dichiarate
-- Limiti dichiarati
-- Confronti omogenei (perimetro, periodo, definizioni)
-
-## Viz
-- Dashboard leggibile da non tecnico
-- Titoli “parlanti”
-- Unità di misura sempre visibili
-- Filtri essenziali (no overload)
-
-## QA
-- Sanity check superati (totali, outlier, ordini di grandezza)
-- Cross-check con fonte o riferimenti dove possibile
-- Issue aperte per anomalie residue + severità
-
-## Doc
-- README aggiornato con: scopo, dataset, metodo, output, limiti
-- Link a dashboard + dataset + query/notebook principali
-- “Cosa emerge” e “cosa non emerge” scritto chiaramente
-
-
-> Nota: una sintesi è in `WORKFLOW.md`.
\ No newline at end of file
diff --git a/docs/lab_links.md b/docs/lab_links.md
new file mode 100644
index 0000000..5c75687
--- /dev/null
+++ b/docs/lab_links.md
@@ -0,0 +1,15 @@
+# Lab Links
+
+Gli standard del Lab sono centralizzati e non vengono duplicati in questo template.
+Usa questa pagina come ponte verso handbook e repository org-wide.
+
+## Handbook
+
+- [Handbook: Method](TODO: link repo dataciviclab/handbook)
+- [Handbook: Definition of Done](TODO: link repo dataciviclab/handbook)
+- [Handbook: Release policy](TODO: link repo dataciviclab/handbook)
+- [Handbook: Roles](TODO: link repo dataciviclab/handbook)
+
+## Org-wide
+
+- [dataciviclab/.github: Issue and PR templates](TODO: link repo dataciviclab/.github)
diff --git a/docs/overview.md b/docs/overview.md
new file mode 100644
index 0000000..3d8b0ce
--- /dev/null
+++ b/docs/overview.md
@@ -0,0 +1,36 @@
+# Overview - [Nome dataset]
+
+## Cos'è il dataset
+
+Questo dataset organizza informazioni pubbliche per aiutare a leggere un fenomeno civico in modo chiaro, confrontabile e verificabile. Serve a trasformare dati sparsi o poco leggibili in una base utile per capire meglio cosa succede e come cambia nel tempo.
+
+## Unità di analisi e chiavi
+
+- unità di analisi: [Comune / ASL / Provincia / Regione / altro]
+- granularità temporale: [anno / mese / trimestre / altro]
+- chiavi principali: [es. codice_istat, anno]
+
+## Copertura
+
+- anni coperti: [...]
+- territorio coperto: [...]
+- note sulla copertura: [...]
+
+## Domande civiche che puoi esplorare
+
+- come varia questo fenomeno tra territori diversi
+- quali aree mostrano i cambiamenti più forti nel tempo
+- dove emergono possibili squilibri o criticità
+- quali territori risultano più esposti o più resilienti
+- dove servono approfondimenti o verifiche ulteriori
+
+## Limiti e caveat
+
+- il dataset dipende dalla qualità e dalla tempestività delle fonti ufficiali
+- alcune definizioni possono cambiare nel tempo
+- copertura e granularità non sempre coincidono per tutti gli anni
+- i dati descrivono il fenomeno solo entro i limiti delle fonti disponibili
+
+## Metodo
+
+Il progetto organizza i dati in tre passaggi semplici: raccolta del dato di partenza, pulizia e normalizzazione, produzione di tabelle finali per analisi e output pubblici. Per dettagli tecnici, vedi il repository Toolkit DataCivicLab.
diff --git a/docs/roles.md b/docs/roles.md
deleted file mode 100644
index e3bcf02..0000000
--- a/docs/roles.md
+++ /dev/null
@@ -1,21 +0,0 @@
-# Ruoli del progetto
-
-Questi ruoli servono a rendere il lavoro replicabile, verificabile e chiudibile.
-
-## Project Lead
-Facilita, coordina, mantiene la direzione, decide quando chiudere.
-
-## Data
-Pulizia dati, query, metriche, output clean/mart.
-
-## Metodo
-Assunzioni, limiti, coerenza definizioni, perimetri e confronti.
-
-## Viz
-Dashboard/visual: struttura, leggibilità, filtri, UX.
-
-## QA
-Controllo qualità: sanity check, cross-check, anomalie, regressioni.
-
-## Doc
-Spiegazioni pubbliche: README, note metodologiche, cosa emerge / non emerge.
diff --git a/docs/sources.md b/docs/sources.md
new file mode 100644
index 0000000..d3d9aa6
--- /dev/null
+++ b/docs/sources.md
@@ -0,0 +1,23 @@
+# Fonti ufficiali - [Nome dataset]
+
+## Fonte primaria
+
+- ente: [...]
+- pagina ufficiale: [...]
+- download: [...]
+- licenza o note d'uso: [...]
+
+## Mirror
+
+- [mirror 1]
+- [mirror 2]
+
+## Versioning
+
+- logica versioni: [per anno / per release / per aggiornamenti]
+- campi che cambiano nel tempo: [se noti]
+
+## Note su qualità e pubblicazione
+
+- [encoding, delimitatore, header, se rilevante]
+- [note su valori mancanti]
diff --git a/notebooks/00_quickstart.ipynb b/notebooks/00_quickstart.ipynb
new file mode 100644
index 0000000..261f93f
--- /dev/null
+++ b/notebooks/00_quickstart.ipynb
@@ -0,0 +1,172 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 00 Quickstart\n",
+    "\n",
+    "## Cosa fa / Cosa NON fa\n",
+    "\n",
+    "- esegue la pipeline solo se abiliti esplicitamente `RUN_TOOLKIT = True`\n",
+    "- cerca un mart leggibile e mostra schema, anteprima e controlli minimi\n",
+    "- non scrive file e non assume output già presenti in una cartella specifica"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "import subprocess\n",
+    "import duckdb\n",
+    "\n",
+    "ROOT = Path('.').resolve()\n",
+    "DATASET_YML = (ROOT / '..' / 'dataset.yml').resolve()\n",
+    "MART_DIRS = [\n",
+    "    (ROOT / '..' / 'data' / 'mart').resolve(),\n",
+    "    (ROOT / '..' / '_runs').resolve(),\n",
+    "]\n",
+    "TABLE_NAME = 'project_summary'\n",
+    "RUN_TOOLKIT = False\n",
+    "ROOT, DATASET_YML"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cmd = ['toolkit', 'run', '--dataset', str(DATASET_YML)]\n",
+    "if RUN_TOOLKIT:\n",
+    "    print('Running:', ' '.join(cmd))\n",
+    "    subprocess.run(cmd, check=True)\n",
+    "else:\n",
+    "    print('Toolkit run disabled.')\n",
+    "    print('Set RUN_TOOLKIT = True to execute the pipeline from this notebook.')\n",
+    "    print('Expected command:', ' '.join(cmd))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def find_mart_candidates(table_name):\n",
+    "    patterns = [\n",
+    "        f'**/{table_name}.parquet',\n",
+    "        f'**/*{table_name}*.parquet',\n",
+    "    ]\n",
+    "    found = []\n",
+    "    for base in MART_DIRS:\n",
+    "        if not base.exists():\n",
+    "            continue\n",
+    "        for pattern in patterns:\n",
+    "            found.extend(sorted(base.glob(pattern)))\n",
+    "    unique = []\n",
+    "    seen = set()\n",
+    "    for path in found:\n",
+    "        key = str(path)\n",
+    "        if key not in seen:\n",
+    "            seen.add(key)\n",
+    "            unique.append(path)\n",
+    "    return unique\n",
+    "\n",
+    "mart_files = find_mart_candidates(TABLE_NAME)\n",
+    "mart_files[:5]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "con = duckdb.connect()\n",
+    "mart_path = str(mart_files[0]) if mart_files else None\n",
+    "if mart_path:\n",
+    "    schema_df = con.execute(f\"DESCRIBE SELECT * FROM read_parquet('{mart_path}')\").df()\n",
+    "    head_df = con.execute(f\"SELECT * FROM read_parquet('{mart_path}') LIMIT 10\").df()\n",
+    "    display(schema_df)\n",
+    "    display(head_df)\n",
+    "else:\n",
+    "    print('No mart parquet found. Run the pipeline first or update TABLE_NAME.')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def read_schema(path):\n",
+    "    schema = con.execute(f\"DESCRIBE SELECT * FROM read_parquet('{path}')\").df()\n",
+    "    schema.columns = [str(col).lower() for col in schema.columns]\n",
+    "    return schema\n",
+    "\n",
+    "def choose_columns(schema):\n",
+    "    name_col = 'column_name' if 'column_name' in schema.columns else schema.columns[0]\n",
+    "    type_col = 'column_type' if 'column_type' in schema.columns else schema.columns[1]\n",
+    "    rows = [\n",
+    "        {'name': str(row[name_col]), 'type': str(row[type_col]).upper()}\n",
+    "        for _, row in schema.iterrows()\n",
+    "    ]\n",
+    "    year_col = next((r['name'] for r in rows if r['name'].lower() == 'year' or 'anno' in r['name'].lower()), None)\n",
+    "    numeric_rows = [r for r in rows if any(token in r['type'] for token in ['INT', 'DECIMAL', 'DOUBLE', 'FLOAT', 'REAL', 'BIGINT'])]\n",
+    "    metric_col = next((r['name'] for r in numeric_rows if any(token in r['name'].lower() for token in ['value', 'tot', 'importo', 'ammontare', 'pct', 'percent'])), None)\n",
+    "    if metric_col is None and numeric_rows:\n",
+    "        metric_col = numeric_rows[0]['name']\n",
+    "    return year_col, metric_col\n",
+    "\n",
+    "if mart_path:\n",
+    "    schema_df = read_schema(mart_path)\n",
+    "    YEAR_COL, METRIC_COL = choose_columns(schema_df)\n",
+    "    print({'YEAR_COL': YEAR_COL, 'METRIC_COL': METRIC_COL})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if mart_path:\n",
+    "    row_count = con.execute(f\"SELECT COUNT(*) AS row_count FROM read_parquet('{mart_path}')\").df()\n",
+    "    display(row_count)\n",
+    "\n",
+    "    if YEAR_COL:\n",
+    "        distinct_count = con.execute(\n",
+    "            f\"SELECT COUNT(DISTINCT {YEAR_COL}) AS distinct_key_count FROM read_parquet('{mart_path}')\"\n",
+    "        ).df()\n",
+    "        display(distinct_count)\n",
+    "    else:\n",
+    "        print('No year-like key detected. Update the helper or inspect schema_df manually.')\n",
+    "\n",
+    "    check_columns = [col for col in [YEAR_COL, METRIC_COL] if col]\n",
+    "    if check_columns:\n",
+    "        null_expr = ', '.join([f\"AVG(CASE WHEN {col} IS NULL THEN 1 ELSE 0 END) AS {col}_null_rate\" for col in check_columns])\n",
+    "        null_rates = con.execute(f\"SELECT {null_expr} FROM read_parquet('{mart_path}')\").df()\n",
+    "        display(null_rates)\n",
+    "    else:\n",
+    "        print('No suitable columns found for null-rate sanity checks.')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/01_explore_mart.ipynb b/notebooks/01_explore_mart.ipynb
new file mode 100644
index 0000000..7ad159b
--- /dev/null
+++ b/notebooks/01_explore_mart.ipynb
@@ -0,0 +1,140 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 01 Explore MART\n",
+    "\n",
+    "## Cosa fa / Cosa NON fa\n",
+    "\n",
+    "- apre una tabella mart e ne mostra una lettura iniziale\n",
+    "- prova a scegliere automaticamente una colonna anno e una metrica numerica\n",
+    "- se non trova colonne adatte, salta le query dipendenti e mostra istruzioni"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "import duckdb\n",
+    "\n",
+    "ROOT = Path('.').resolve()\n",
+    "TABLE_NAME = 'project_summary'\n",
+    "MART_GLOBS = [\n",
+    "    (ROOT / '..' / 'data' / 'mart').resolve(),\n",
+    "    (ROOT / '..' / '_runs').resolve(),\n",
+    "]\n",
+    "\n",
+    "def first_match(table_name):\n",
+    "    for base in MART_GLOBS:\n",
+    "        if not base.exists():\n",
+    "            continue\n",
+    "        for path in sorted(base.glob(f'**/*{table_name}*.parquet')):\n",
+    "            return path\n",
+    "    return None\n",
+    "\n",
+    "mart_path = first_match(TABLE_NAME)\n",
+    "mart_path"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Descrizione breve:\n",
+    "\n",
+    "- questo notebook serve a capire cosa c'è nella tabella finale\n",
+    "- aggiorna `TABLE_NAME` se il dataset usa un altro mart principale"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "con = duckdb.connect()\n",
+    "\n",
+    "def read_schema(path):\n",
+    "    schema = con.execute(f\"DESCRIBE SELECT * FROM read_parquet('{path}')\").df()\n",
+    "    schema.columns = [str(col).lower() for col in schema.columns]\n",
+    "    return schema\n",
+    "\n",
+    "def choose_columns(schema):\n",
+    "    name_col = 'column_name' if 'column_name' in schema.columns else schema.columns[0]\n",
+    "    type_col = 'column_type' if 'column_type' in schema.columns else schema.columns[1]\n",
+    "    rows = [\n",
+    "        {'name': str(row[name_col]), 'type': str(row[type_col]).upper()}\n",
+    "        for _, row in schema.iterrows()\n",
+    "    ]\n",
+    "    year_col = next((r['name'] for r in rows if r['name'].lower() == 'year' or 'anno' in r['name'].lower()), None)\n",
+    "    numeric_rows = [r for r in rows if any(token in r['type'] for token in ['INT', 'DECIMAL', 'DOUBLE', 'FLOAT', 'REAL', 'BIGINT'])]\n",
+    "    metric_col = next((r['name'] for r in numeric_rows if any(token in r['name'].lower() for token in ['value', 'tot', 'importo', 'ammontare', 'pct', 'percent'])), None)\n",
+    "    if metric_col is None and numeric_rows:\n",
+    "        metric_col = numeric_rows[0]['name']\n",
+    "    return year_col, metric_col\n",
+    "\n",
+    "if mart_path:\n",
+    "    schema_df = read_schema(str(mart_path))\n",
+    "    YEAR_COL, METRIC_COL = choose_columns(schema_df)\n",
+    "    preview = con.execute(f\"SELECT * FROM read_parquet('{mart_path}') LIMIT 20\").df()\n",
+    "    display(schema_df)\n",
+    "    display(preview)\n",
+    "    print({'YEAR_COL': YEAR_COL, 'METRIC_COL': METRIC_COL})\n",
+    "else:\n",
+    "    print('No mart parquet found. Run the pipeline first or update TABLE_NAME.')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if mart_path and YEAR_COL and METRIC_COL:\n",
+    "    by_year = con.execute(\n",
+    "        f\"SELECT {YEAR_COL} AS year_like, COUNT(*) AS rows, SUM({METRIC_COL}) AS metric_total FROM read_parquet('{mart_path}') GROUP BY 1 ORDER BY 1\"\n",
+    "    ).df()\n",
+    "    display(by_year)\n",
+    "else:\n",
+    "    print('No year-like column or metric column detected. Update TABLE_NAME or inspect schema_df manually.')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if mart_path and METRIC_COL:\n",
+    "    top_rows = con.execute(\n",
+    "        f\"SELECT * FROM read_parquet('{mart_path}') ORDER BY {METRIC_COL} DESC NULLS LAST LIMIT 10\"\n",
+    "    ).df()\n",
+    "    bottom_rows = con.execute(\n",
+    "        f\"SELECT * FROM read_parquet('{mart_path}') ORDER BY {METRIC_COL} ASC NULLS LAST LIMIT 10\"\n",
+    "    ).df()\n",
+    "    display(top_rows)\n",
+    "    display(bottom_rows)\n",
+    "else:\n",
+    "    print('No metric column detected. Update the helper or inspect schema_df manually.')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/02_quality_checks.ipynb b/notebooks/02_quality_checks.ipynb
new file mode 100644
index 0000000..ab512df
--- /dev/null
+++ b/notebooks/02_quality_checks.ipynb
@@ -0,0 +1,134 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 02 Quality checks\n",
+    "\n",
+    "## Cosa fa / Cosa NON fa\n",
+    "\n",
+    "- esegue controlli riutilizzabili su duplicati, missingness e range\n",
+    "- prova a rilevare colonne numeriche in modo automatico\n",
+    "- se mancano chiavi o colonne adatte, stampa istruzioni invece di fallire"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "import duckdb\n",
+    "\n",
+    "ROOT = Path('.').resolve()\n",
+    "TABLE_NAME = 'project_summary'\n",
+    "KEY_COLUMNS = []\n",
+    "NUMERIC_COLUMNS = []\n",
+    "\n",
+    "def find_mart(table_name):\n",
+    "    for base in [(ROOT / '..' / 'data' / 'mart').resolve(), (ROOT / '..' / '_runs').resolve()]:\n",
+    "        if not base.exists():\n",
+    "            continue\n",
+    "        matches = sorted(base.glob(f'**/*{table_name}*.parquet'))\n",
+    "        if matches:\n",
+    "            return matches[0]\n",
+    "    return None\n",
+    "\n",
+    "mart_path = find_mart(TABLE_NAME)\n",
+    "mart_path"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "con = duckdb.connect()\n",
+    "\n",
+    "def read_schema(path):\n",
+    "    schema = con.execute(f\"DESCRIBE SELECT * FROM read_parquet('{path}')\").df()\n",
+    "    schema.columns = [str(col).lower() for col in schema.columns]\n",
+    "    return schema\n",
+    "\n",
+    "def detect_numeric_columns(schema):\n",
+    "    name_col = 'column_name' if 'column_name' in schema.columns else schema.columns[0]\n",
+    "    type_col = 'column_type' if 'column_type' in schema.columns else schema.columns[1]\n",
+    "    numeric = []\n",
+    "    for _, row in schema.iterrows():\n",
+    "        dtype = str(row[type_col]).upper()\n",
+    "        if any(token in dtype for token in ['INT', 'DECIMAL', 'DOUBLE', 'FLOAT', 'REAL', 'BIGINT']):\n",
+    "            numeric.append(str(row[name_col]))\n",
+    "    return numeric\n",
+    "\n",
+    "def duplicate_key_report(path, key_columns):\n",
+    "    keys = ', '.join(key_columns)\n",
+    "    sql = f\"\"\"\n",
+    "        SELECT {keys}, COUNT(*) AS dup_count\n",
+    "        FROM read_parquet('{path}')\n",
+    "        GROUP BY {keys}\n",
+    "        HAVING COUNT(*) > 1\n",
+    "        ORDER BY dup_count DESC\n",
+    "        LIMIT 20\n",
+    "    \"\"\"\n",
+    "    return con.execute(sql).df()\n",
+    "\n",
+    "def missingness_report(path):\n",
+    "    columns = [row[0] for row in con.execute(f\"DESCRIBE SELECT * FROM read_parquet('{path}')\").fetchall()]\n",
+    "    expr = ', '.join([f\"AVG(CASE WHEN {col} IS NULL THEN 1 ELSE 0 END) AS {col}_null_rate\" for col in columns])\n",
+    "    return con.execute(f\"SELECT {expr} FROM read_parquet('{path}')\").df().T.reset_index()\n",
+    "\n",
+    "def range_report(path, numeric_columns):\n",
+    "    expr = ', '.join([f\"MIN({col}) AS {col}_min, MAX({col}) AS {col}_max\" for col in numeric_columns])\n",
+    "    return con.execute(f\"SELECT {expr} FROM read_parquet('{path}')\").df().T.reset_index()\n",
+    "\n",
+    "if mart_path:\n",
+    "    schema_df = read_schema(str(mart_path))\n",
+    "    detected_numeric = detect_numeric_columns(schema_df)\n",
+    "    numeric_for_range = [col for col in NUMERIC_COLUMNS if col in detected_numeric]\n",
+    "    if not numeric_for_range:\n",
+    "        numeric_for_range = detected_numeric[:3]\n",
+    "    display(schema_df)\n",
+    "    print({'KEY_COLUMNS': KEY_COLUMNS, 'NUMERIC_COLUMNS': numeric_for_range})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if mart_path:\n",
+    "    if KEY_COLUMNS:\n",
+    "        dup_df = duplicate_key_report(str(mart_path), KEY_COLUMNS)\n",
+    "        display(dup_df)\n",
+    "    else:\n",
+    "        print('Duplicate-key check skipped: imposta KEY_COLUMNS.')\n",
+    "\n",
+    "    missing_df = missingness_report(str(mart_path))\n",
+    "    display(missing_df)\n",
+    "\n",
+    "    if numeric_for_range:\n",
+    "        range_df = range_report(str(mart_path), numeric_for_range)\n",
+    "        display(range_df)\n",
+    "    else:\n",
+    "        print('Range check skipped: no numeric columns detected.')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/03_dashboard_export.ipynb b/notebooks/03_dashboard_export.ipynb
new file mode 100644
index 0000000..0b052b9
--- /dev/null
+++ b/notebooks/03_dashboard_export.ipynb
@@ -0,0 +1,131 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 03 Dashboard export\n",
+    "\n",
+    "## Cosa fa / Cosa NON fa\n",
+    "\n",
+    "- prepara un export di esempio partendo da un mart disponibile\n",
+    "- non scrive file finché `EXPORT = False`\n",
+    "- se non trova colonne adatte, esporta un campione generico come fallback"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "import duckdb\n",
+    "\n",
+    "ROOT = Path('.').resolve()\n",
+    "EXPORT = False\n",
+    "OUT_DIR = (ROOT / '..' / '_tmp').resolve()\n",
+    "TABLE_NAME = 'project_summary'\n",
+    "\n",
+    "def find_mart(table_name):\n",
+    "    for base in [(ROOT / '..' / 'data' / 'mart').resolve(), (ROOT / '..' / '_runs').resolve()]:\n",
+    "        if not base.exists():\n",
+    "            continue\n",
+    "        matches = sorted(base.glob(f'**/*{table_name}*.parquet'))\n",
+    "        if matches:\n",
+    "            return matches[0]\n",
+    "    return None\n",
+    "\n",
+    "mart_path = find_mart(TABLE_NAME)\n",
+    "OUT_DIR"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "con = duckdb.connect()\n",
+    "\n",
+    "def read_schema(path):\n",
+    "    schema = con.execute(f\"DESCRIBE SELECT * FROM read_parquet('{path}')\").df()\n",
+    "    schema.columns = [str(col).lower() for col in schema.columns]\n",
+    "    return schema\n",
+    "\n",
+    "def choose_columns(schema):\n",
+    "    name_col = 'column_name' if 'column_name' in schema.columns else schema.columns[0]\n",
+    "    type_col = 'column_type' if 'column_type' in schema.columns else schema.columns[1]\n",
+    "    rows = [\n",
+    "        {'name': str(row[name_col]), 'type': str(row[type_col]).upper()}\n",
+    "        for _, row in schema.iterrows()\n",
+    "    ]\n",
+    "    year_col = next((r['name'] for r in rows if r['name'].lower() == 'year' or 'anno' in r['name'].lower()), None)\n",
+    "    numeric_rows = [r for r in rows if any(token in r['type'] for token in ['INT', 'DECIMAL', 'DOUBLE', 'FLOAT', 'REAL', 'BIGINT'])]\n",
+    "    metric_col = next((r['name'] for r in numeric_rows if any(token in r['name'].lower() for token in ['value', 'tot', 'importo', 'ammontare', 'pct', 'percent'])), None)\n",
+    "    if metric_col is None and numeric_rows:\n",
+    "        metric_col = numeric_rows[0]['name']\n",
+    "    return year_col, metric_col\n",
+    "\n",
+    "if mart_path:\n",
+    "    schema_df = read_schema(str(mart_path))\n",
+    "    YEAR_COL, METRIC_COL = choose_columns(schema_df)\n",
+    "    print({'YEAR_COL': YEAR_COL, 'METRIC_COL': METRIC_COL})\n",
+    "else:\n",
+    "    print('No mart parquet found. Run the pipeline first or update TABLE_NAME.')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if mart_path and YEAR_COL and METRIC_COL:\n",
+    "    export_df = con.execute(\n",
+    "        f\"SELECT {YEAR_COL} AS year_like, {METRIC_COL} AS metric_value FROM read_parquet('{mart_path}') ORDER BY 1\"\n",
+    "    ).df()\n",
+    "elif mart_path:\n",
+    "    print('Using generic fallback export: no year-like or metric column detected.')\n",
+    "    export_df = con.execute(f\"SELECT * FROM read_parquet('{mart_path}') LIMIT 1000\").df()\n",
+    "else:\n",
+    "    export_df = None\n",
+    "\n",
+    "if export_df is not None:\n",
+    "    display(export_df.head())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if EXPORT and export_df is not None:\n",
+    "    OUT_DIR.mkdir(parents=True, exist_ok=True)\n",
+    "    csv_path = OUT_DIR / f'{TABLE_NAME}_dashboard.csv'\n",
+    "    parquet_path = OUT_DIR / f'{TABLE_NAME}_dashboard.parquet'\n",
+    "    export_df.to_csv(csv_path, index=False)\n",
+    "    con.register('export_df_view', export_df)\n",
+    "    con.execute(f\"COPY (SELECT * FROM export_df_view) TO '{parquet_path}' (FORMAT PARQUET)\")\n",
+    "    print(csv_path)\n",
+    "    print(parquet_path)\n",
+    "else:\n",
+    "    print('Export disabled. Set EXPORT = True to write files into ../_tmp/.')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/README.md b/notebooks/README.md
index 8d0842d..a5d1986 100644
--- a/notebooks/README.md
+++ b/notebooks/README.md
@@ -1,32 +1,18 @@
-# 📓 /notebooks – Pipeline e analisi
+# /notebooks - notebook standard per il dataset
 
-Questa cartella contiene notebook (Colab / Jupyter) per:
-- ingestione dati (raw)
-- pulizia/normalizzazione (clean)
-- aggregazioni e KPI (mart)
-- analisi esplorative (quando servono)
+Questa cartella contiene notebook leggeri e clonabili per avvio rapido, esplorazione dei mart e controlli di qualita.
+Usano solo Python standard, `duckdb`, path relativi e il file `../dataset.yml` come riferimento di progetto.
 
----
+## Notebook inclusi
 
-## ✅ Regole minime
+- `00_quickstart.ipynb` - esegue la pipeline e controlla che esistano tabelle mart leggibili
+- `01_explore_mart.ipynb` - esplorazione public-first dei dati finali
+- `02_quality_checks.ipynb` - controlli ripetibili su duplicati, missingness e range
+- `03_dashboard_export.ipynb` - export opzionali in `../_tmp/`, disattivati di default
 
-- notebook numerati: `01_...`, `02_...`, `03_...`
-- eseguibili dall’inizio alla fine (no “celle magiche”)
-- commenti brevi: **cosa** fai e **perché**
-- niente path locali: usare riferimenti chiari al Drive / cartelle di progetto
+## Regole
 
----
-
-## 🔁 Collegamento con `/data`
-
-Ogni notebook dovrebbe aggiornare (o citare) i README di:
-- `/data/raw`
-- `/data/clean`
-- `/data/mart`
-
-Così chi arriva dopo capisce:
-- da dove arrivano i dati
-- cosa è stato fatto
-- dove trovare i file su Drive
-
----
+- non salvare output pesanti nel repo
+- se serve esportare file, usa `../_tmp/`
+- mantieni i notebook generici: aggiorna nomi tabella e chiavi senza introdurre logica dataset-specifica
+- per dettagli tecnici della pipeline, vedi il repository Toolkit DataCivicLab
diff --git a/queries/README.md b/queries/README.md
deleted file mode 100644
index ae558a0..0000000
--- a/queries/README.md
+++ /dev/null
@@ -1,11 +0,0 @@
-# 🧮 Query
-
-Questa cartella contiene le query utilizzate per:
-- pulizia
-- aggregazione
-- calcolo metriche
-
-Ogni query deve:
-- avere un nome descrittivo
-- essere commentata
-- indicare input e output
diff --git a/scripts/smoke.sh b/scripts/smoke.sh
new file mode 100644
index 0000000..4b056e6
--- /dev/null
+++ b/scripts/smoke.sh
@@ -0,0 +1,97 @@
+#!/usr/bin/env sh
+
+set -eu
+
+DATASET_FILE="${DATASET_FILE:-dataset.yml}"
+TOOLKIT_BIN="${TOOLKIT_BIN:-toolkit}"
+DCL_ROOT="${DCL_ROOT:-$(pwd)}"
+
+export DCL_ROOT
+
+detect_python() {
+  if command -v python >/dev/null 2>&1; then
+    echo python
+    return 0
+  fi
+  if command -v python3 >/dev/null 2>&1; then
+    echo python3
+    return 0
+  fi
+  return 1
+}
+
+detect_toolkit_module() {
+  if [ -z "${PYTHON_BIN:-}" ]; then
+    return 1
+  fi
+  if "${PYTHON_BIN}" -c "import importlib.util, sys; sys.exit(0 if importlib.util.find_spec('toolkit') else 1)" >/dev/null 2>&1; then
+    echo toolkit
+    return 0
+  fi
+  if "${PYTHON_BIN}" -c "import importlib.util, sys; sys.exit(0 if importlib.util.find_spec('dataciviclab_toolkit') else 1)" >/dev/null 2>&1; then
+    echo dataciviclab_toolkit
+    return 0
+  fi
+  return 1
+}
+
+detect_year() {
+  if [ -n "${YEAR:-}" ]; then
+    echo "${YEAR}"
+    return 0
+  fi
+  if [ -n "${1:-}" ]; then
+    echo "${1}"
+    return 0
+  fi
+  if [ -f "${DATASET_FILE}" ]; then
+    parsed_year="$(sed -n 's/^[[:space:]]*years:[[:space:]]*\[\([0-9][0-9][0-9][0-9]\).*/\1/p' "${DATASET_FILE}" | head -n 1)"
+    if [ -n "${parsed_year}" ]; then
+      echo "${parsed_year}"
+      return 0
+    fi
+  fi
+  echo 2023
+}
+
+run_toolkit() {
+  if [ -n "${TOOLKIT_COMMAND:-}" ]; then
+    "${TOOLKIT_COMMAND}" "$@"
+    return 0
+  fi
+  if [ -n "${TOOLKIT_MODULE:-}" ]; then
+    "${PYTHON_BIN}" -m "${TOOLKIT_MODULE}" "$@"
+    return 0
+  fi
+  echo "Toolkit non disponibile: imposta TOOLKIT_BIN oppure installa un modulo Python 'toolkit' o 'dataciviclab_toolkit'." >&2
+  exit 2
+}
+
+PYTHON_BIN="$(detect_python || true)"
+TOOLKIT_COMMAND=""
+TOOLKIT_MODULE=""
+
+if command -v "${TOOLKIT_BIN}" >/dev/null 2>&1; then
+  TOOLKIT_COMMAND="${TOOLKIT_BIN}"
+else
+  TOOLKIT_MODULE="$(detect_toolkit_module || true)"
+fi
+
+if [ -z "${TOOLKIT_COMMAND}" ] && [ -z "${TOOLKIT_MODULE}" ]; then
+  echo "Toolkit non trovato. Provati: comando '${TOOLKIT_BIN}', modulo 'toolkit', modulo 'dataciviclab_toolkit'." >&2
+  exit 2
+fi
+
+YEAR="$(detect_year "${1:-}")"
+
+echo "DCL_ROOT=${DCL_ROOT}"
+echo "DATASET_FILE=${DATASET_FILE}"
+echo "TOOLKIT_BIN=${TOOLKIT_BIN}"
+echo "TOOLKIT_COMMAND=${TOOLKIT_COMMAND:-<none>}"
+echo "TOOLKIT_MODULE=${TOOLKIT_MODULE:-<none>}"
+echo "YEAR=${YEAR}"
+
+run_toolkit run raw --config "${DATASET_FILE}" --year "${YEAR}"
+run_toolkit run clean --config "${DATASET_FILE}" --year "${YEAR}"
+run_toolkit run mart --config "${DATASET_FILE}" --year "${YEAR}"
+run_toolkit validate --config "${DATASET_FILE}" --year "${YEAR}"
diff --git a/sql/README.md b/sql/README.md
new file mode 100644
index 0000000..a97570e
--- /dev/null
+++ b/sql/README.md
@@ -0,0 +1,15 @@
+# Query
+
+Questa cartella contiene query ad hoc, esplorative o materiale legacy.
+
+## Regole
+
+- usare nomi descrittivi
+- commentare input e output
+- evitare dipendenze implicite da path assoluti
+
+## Nota
+
+La pipeline toolkit-first usa come riferimento canonico la cartella `sql/`.
+
+Mantieni `queries/` solo per analisi non canoniche o work-in-progress.
diff --git a/sql/clean.sql b/sql/clean.sql
new file mode 100644
index 0000000..51afa63
--- /dev/null
+++ b/sql/clean.sql
@@ -0,0 +1,23 @@
+-- clean.sql
+-- Purpose: placeholder transformation for the CLEAN layer.
+-- Contract: read from the source configured in dataset.yml and keep the query portable.
+
+with source_rows as (
+    select
+        year,
+        entity_id,
+        metric_value
+    from raw_input
+),
+normalized as (
+    select
+        cast(year as integer) as year,
+        cast(entity_id as varchar) as entity_id,
+        cast(metric_value as double) as metric_value
+    from source_rows
+)
+select
+    year,
+    entity_id,
+    metric_value
+from normalized;
diff --git a/sql/mart/project_summary.sql b/sql/mart/project_summary.sql
new file mode 100644
index 0000000..eaa4488
--- /dev/null
+++ b/sql/mart/project_summary.sql
@@ -0,0 +1,25 @@
+-- project_summary.sql
+-- Purpose: placeholder MART query aligned with the toolkit `mart.tables[]` contract.
+-- Contract: consume `clean_input` and expose a dashboard-ready table.
+
+with clean_rows as (
+    select
+        year,
+        entity_id,
+        metric_value
+    from clean_input
+),
+project_summary as (
+    select
+        year,
+        count(*) as rows_in_year,
+        sum(metric_value) as total_metric_value
+    from clean_rows
+    group by year
+)
+select
+    year,
+    rows_in_year,
+    total_metric_value
+from project_summary
+order by year;
diff --git a/tests/test_contract.py b/tests/test_contract.py
new file mode 100644
index 0000000..06c79bf
--- /dev/null
+++ b/tests/test_contract.py
@@ -0,0 +1,121 @@
+from __future__ import annotations
+
+import re
+from pathlib import Path
+
+import yaml
+
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+DATASET_FILE = REPO_ROOT / "dataset.yml"
+DATA_DIR = REPO_ROOT / "data"
+BLOCKED_DATA_EXTENSIONS = {".parquet", ".csv", ".jsonl", ".zip", ".xlsx", ".tsv"}
+REQUIRED_FILES = [
+    REPO_ROOT / "dataset.yml",
+    REPO_ROOT / "sql" / "clean.sql",
+    REPO_ROOT / "sql" / "mart" / "project_summary.sql",
+    REPO_ROOT / "docs" / "sources.md",
+    REPO_ROOT / "docs" / "decisions.md",
+    REPO_ROOT / "docs" / "data_dictionary.md",
+    REPO_ROOT / "scripts" / "smoke.sh",
+    REPO_ROOT / ".github" / "workflows" / "ci.yml",
+]
+
+
+def _iter_path_values(node: object):
+    if isinstance(node, dict):
+        for key, value in node.items():
+            if isinstance(value, str) and key in {"root", "source", "sql", "target", "dir", "path", "filename"}:
+                yield key, value
+            yield from _iter_path_values(value)
+    elif isinstance(node, list):
+        for item in node:
+            yield from _iter_path_values(item)
+
+
+def test_required_files_exist() -> None:
+    missing = [str(path.relative_to(REPO_ROOT)) for path in REQUIRED_FILES if not path.exists()]
+    assert not missing, f"Missing required template files: {missing}"
+
+
+def test_dataset_uses_supported_contract_keys() -> None:
+    dataset = yaml.safe_load(DATASET_FILE.read_text(encoding="utf-8"))
+    clean_read = dataset["clean"]["read"]
+
+    assert "dataset" in dataset
+    assert "name" in dataset["dataset"]
+    assert "years" in dataset["dataset"]
+    assert dataset["validation"]["fail_on_error"] is True
+    assert "source" in clean_read
+    assert "header" in clean_read
+    assert "columns" in clean_read
+    assert "csv" not in clean_read
+    assert dataset["clean"]["required_columns"]
+    assert dataset["clean"]["validate"]["primary_key"]
+    assert dataset["clean"]["validate"]["not_null"]
+    assert "mart" in dataset
+    assert "tables" in dataset["mart"]
+    assert isinstance(dataset["mart"]["tables"], list)
+    assert dataset["mart"]["tables"]
+    assert dataset["mart"]["required_tables"]
+    assert dataset["mart"]["validate"]["table_rules"]["project_summary"]["required_columns"]
+
+
+def test_dataset_matches_smoke_contract_shape() -> None:
+    dataset = yaml.safe_load(DATASET_FILE.read_text(encoding="utf-8"))
+
+    assert dataset["output"]["artifacts"] == "minimal"
+    assert "csv" not in dataset["clean"]["read"]
+
+
+def test_dataset_paths_are_relative_and_posix() -> None:
+    dataset = yaml.safe_load(DATASET_FILE.read_text(encoding="utf-8"))
+
+    for key, value in _iter_path_values(dataset):
+        if value.startswith("http://") or value.startswith("https://"):
+            continue
+        assert value, f"Empty path value for key '{key}'"
+        assert "\\" not in value, f"Path for key '{key}' must use POSIX separators: {value}"
+        assert not value.startswith("/"), f"Absolute POSIX path found for key '{key}': {value}"
+        assert not value.startswith("~"), f"Home-relative path found for key '{key}': {value}"
+        assert not re.match(r"^[A-Za-z]:[\\/]", value), f"Absolute Windows path found for key '{key}': {value}"
+
+
+def test_yaml_sql_paths_match_template_files() -> None:
+    dataset = yaml.safe_load(DATASET_FILE.read_text(encoding="utf-8"))
+
+    assert dataset["clean"]["sql"] == "sql/clean.sql"
+
+    mart_tables = dataset["mart"]["tables"]
+    project_summary = next((table for table in mart_tables if table["name"] == "project_summary"), None)
+    assert project_summary is not None, "Missing mart table 'project_summary'"
+    assert project_summary["sql"] == "sql/mart/project_summary.sql"
+
+
+def test_output_artifacts_is_configured() -> None:
+    dataset = yaml.safe_load(DATASET_FILE.read_text(encoding="utf-8"))
+
+    assert dataset["output"]["artifacts"] == "minimal"
+
+
+def test_data_directory_does_not_contain_committed_outputs() -> None:
+    offenders: list[str] = []
+
+    if not DATA_DIR.exists():
+        return
+
+    for path in DATA_DIR.rglob("*"):
+        if not path.is_file():
+            continue
+        if "_examples" in path.parts:
+            continue
+        if path.name == "README.md":
+            continue
+        if path.suffix.lower() not in BLOCKED_DATA_EXTENSIONS:
+            continue
+        offenders.append(str(path.relative_to(REPO_ROOT)).replace("\\", "/"))
+
+    assert not offenders, (
+        "Non committare output in data/: usa data/_examples per sample piccoli. "
+        f"Found: {offenders}"
+    )

From 1639c14650d7de9fe6a1e5f09f4178f8165499f8 Mon Sep 17 00:00:00 2001
From: Zio Gabber <78922322+Gabrymi93@users.noreply.github.com>
Date: Mon, 2 Mar 2026 09:19:05 +0000
Subject: [PATCH 02/11] cambi finali integrazione toolkit

---
 .github/seed-issues/00_kickoff.md             |   5 +
 .github/seed-issues/01_civic_questions.md     |   5 +
 .github/seed-issues/02_sources.md             |   9 +-
 .github/seed-issues/03_raw.md                 |  13 +-
 .github/seed-issues/04_clean.md               |   9 +-
 .github/seed-issues/05_mart.md                |  11 +-
 .github/seed-issues/06_validation_qa.md       |   9 +-
 .github/seed-issues/07_dashboard.md           |   4 +
 .github/seed-issues/08_release.md             |   5 +
 .../09_docs_decisions_dictionary.md           |   4 +
 .github/seed-issues/10_maintenance.md         |   7 +-
 .github/workflows/ci.yml                      |   9 +-
 .gitignore                                    |   4 +
 README.md                                     |  59 ++++++-
 WORKFLOW.md                                   |  15 ++
 dashboard/README.md                           |   1 +
 dataset.yml                                   |  92 +++++-----
 docs/README.md                                |   3 +
 docs/contributing.md                          |  74 ++++++++-
 notebooks/00_quickstart.ipynb                 | 157 +++++-------------
 notebooks/01_explore_mart.ipynb               | 140 ----------------
 notebooks/01_inspect_raw.ipynb                | 119 +++++++++++++
 notebooks/02_inspect_clean.ipynb              | 102 ++++++++++++
 notebooks/02_quality_checks.ipynb             | 134 ---------------
 notebooks/03_dashboard_export.ipynb           | 131 ---------------
 notebooks/03_explore_mart.ipynb               | 100 +++++++++++
 notebooks/04_quality_checks.ipynb             | 106 ++++++++++++
 notebooks/05_dashboard_export.ipynb           | 122 ++++++++++++++
 notebooks/README.md                           |  17 +-
 scripts/publish_to_drive.py                   | 155 +++++++++++++++++
 scripts/smoke.sh                              |  36 ++--
 sql/clean.sql                                 |  52 +++---
 sql/mart/mart_ok.sql                          |  15 ++
 sql/mart/project_summary.sql                  |  25 ---
 tests/test_contract.py                        |  77 ++++++---
 35 files changed, 1149 insertions(+), 677 deletions(-)
 delete mode 100644 notebooks/01_explore_mart.ipynb
 create mode 100644 notebooks/01_inspect_raw.ipynb
 create mode 100644 notebooks/02_inspect_clean.ipynb
 delete mode 100644 notebooks/02_quality_checks.ipynb
 delete mode 100644 notebooks/03_dashboard_export.ipynb
 create mode 100644 notebooks/03_explore_mart.ipynb
 create mode 100644 notebooks/04_quality_checks.ipynb
 create mode 100644 notebooks/05_dashboard_export.ipynb
 create mode 100644 scripts/publish_to_drive.py
 create mode 100644 sql/mart/mart_ok.sql
 delete mode 100644 sql/mart/project_summary.sql

diff --git a/.github/seed-issues/00_kickoff.md b/.github/seed-issues/00_kickoff.md
index 282ac78..5671614 100644
--- a/.github/seed-issues/00_kickoff.md
+++ b/.github/seed-issues/00_kickoff.md
@@ -29,6 +29,11 @@ Avviare il progetto dataset con perimetro chiaro, domanda civica misurabile e co
 
 Progetto inizializzato con contratto di base valido e documentazione minima pronta per il source onboarding.
 
+## Supporto operativo
+
+- notebook consigliato: `notebooks/00_quickstart.ipynb`
+- comando minimo: `py -m pytest tests/test_contract.py`
+
 ## File da toccare
 
 - `dataset.yml`
diff --git a/.github/seed-issues/01_civic_questions.md b/.github/seed-issues/01_civic_questions.md
index 9487f1f..3a0aeaf 100644
--- a/.github/seed-issues/01_civic_questions.md
+++ b/.github/seed-issues/01_civic_questions.md
@@ -27,6 +27,11 @@ Definire le domande civiche che guideranno fonti, metriche, unità di analisi e
 
 Una base pubblica e metodologica chiara da cui far discendere le scelte su fonti, CLEAN, MART e output finali.
 
+## Supporto operativo
+
+- notebook consigliato: nessuno, il lavoro e principalmente metodologico e documentale
+- file guida: `README.md`, `docs/overview.md`, `docs/decisions.md`
+
 ## File da toccare
 
 - `README.md`
diff --git a/.github/seed-issues/02_sources.md b/.github/seed-issues/02_sources.md
index 2373e62..ce12dc8 100644
--- a/.github/seed-issues/02_sources.md
+++ b/.github/seed-issues/02_sources.md
@@ -19,7 +19,7 @@ Qualificare la fonte e codificare in modo riproducibile come il toolkit deve leg
 ## Checklist
 
 - [ ] Identificare fonte primaria, URL canonico e frequenza di aggiornamento
-- [ ] Aggiornare `raw.source.type` e `raw.source.args` in `dataset.yml`
+- [ ] Aggiornare `raw.sources[].type` e `raw.sources[].args` in `dataset.yml`
 - [ ] Documentare licenza, coverage, refresh cadence e note in `docs/sources.md`
 - [ ] Registrare trade-off e assunzioni di ingestione in `docs/decisions.md`
 - [ ] Verificare che non esistano path assoluti o riferimenti locali
@@ -29,6 +29,11 @@ Qualificare la fonte e codificare in modo riproducibile come il toolkit deve leg
 
 Fonte verificata e configurata in `dataset.yml`, con documentazione sufficiente per procedere al layer RAW.
 
+## Supporto operativo
+
+- notebook consigliato: `notebooks/01_inspect_raw.ipynb`
+- comando minimo: `toolkit run raw --config dataset.yml`
+
 ## File da toccare
 
 - `dataset.yml`
@@ -38,6 +43,6 @@ Fonte verificata e configurata in `dataset.yml`, con documentazione sufficiente
 ## Acceptance criteria
 
 - la fonte e verificabile e documentata
-- `raw.source` e compilato con campi sufficienti all'esecuzione
+- `raw.sources` e compilato con campi sufficienti all'esecuzione
 - `docs/sources.md` contiene note su licenza, refresh e limiti noti
 - `pytest tests/test_contract.py` passa
diff --git a/.github/seed-issues/03_raw.md b/.github/seed-issues/03_raw.md
index fd27aa8..52adf76 100644
--- a/.github/seed-issues/03_raw.md
+++ b/.github/seed-issues/03_raw.md
@@ -17,9 +17,9 @@ Ottenere un layer RAW eseguibile e ripetibile, senza committare output in repo.
 
 ## Checklist
 
-- [ ] Verificare `raw.source` e eventuale extractor in `dataset.yml`
-- [ ] Eseguire `toolkit run raw --config dataset.yml --year <year>`
-- [ ] Eseguire `toolkit validate --config dataset.yml --year <year>` oppure documentare il blocco
+- [ ] Verificare `raw.sources[]`, `primary` ed eventuale extractor in `dataset.yml`
+- [ ] Eseguire `toolkit run raw --config dataset.yml`
+- [ ] Controllare `manifest.json`, `metadata.json` e `raw_validation.json`
 - [ ] Controllare metadata, manifest e validation report del RAW
 - [ ] Confermare che `data/` non contenga output committati
 - [ ] Aggiornare `docs/decisions.md` con eventuali eccezioni o failure modes
@@ -28,6 +28,11 @@ Ottenere un layer RAW eseguibile e ripetibile, senza committare output in repo.
 
 RAW eseguibile con report minimi di validazione e metadata disponibili negli artifact di run.
 
+## Supporto operativo
+
+- notebook consigliato: `notebooks/01_inspect_raw.ipynb`
+- path attesi: `root/data/raw/<dataset>/<year>/`
+
 ## File da toccare
 
 - `dataset.yml`
@@ -38,5 +43,5 @@ RAW eseguibile con report minimi di validazione e metadata disponibili negli art
 
 - il run RAW completa o il blocco e documentato in modo riproducibile
 - nessun output RAW viene aggiunto sotto `data/`
-- gli artifact minimi del RAW sono attesi sotto `_runs/`
+- gli artifact minimi del RAW sono attesi sotto `root/data/raw/<dataset>/<year>/`
 - il progetto puo passare a CLEAN con input RAW deterministico
diff --git a/.github/seed-issues/04_clean.md b/.github/seed-issues/04_clean.md
index d6a3ba4..6fb4487 100644
--- a/.github/seed-issues/04_clean.md
+++ b/.github/seed-issues/04_clean.md
@@ -21,8 +21,8 @@ Portare il dataset da RAW a CLEAN con SQL esplicita, schema documentato e valida
 - [ ] Implementare o aggiornare `sql/clean.sql`
 - [ ] Allineare `clean.read`, `clean.required_columns` e `clean.validate` in `dataset.yml`
 - [ ] Verificare chiavi logiche, `not_null`, `min_rows` e duplicati
-- [ ] Eseguire `toolkit run clean --config dataset.yml --year <year>`
-- [ ] Eseguire `toolkit validate --config dataset.yml --year <year>`
+- [ ] Eseguire `toolkit run clean --config dataset.yml`
+- [ ] Eseguire `toolkit validate clean --config dataset.yml`
 - [ ] Aggiornare `docs/data_dictionary.md` per il layer CLEAN
 - [ ] Loggare assunzioni e mapping in `docs/decisions.md`
 
@@ -30,6 +30,11 @@ Portare il dataset da RAW a CLEAN con SQL esplicita, schema documentato e valida
 
 Layer CLEAN riproducibile, con schema e regole di validazione sufficienti per alimentare i mart.
 
+## Supporto operativo
+
+- notebook consigliato: `notebooks/02_inspect_clean.ipynb`
+- path attesi: `root/data/clean/<dataset>/<year>/`
+
 ## File da toccare
 
 - `sql/clean.sql`
diff --git a/.github/seed-issues/05_mart.md b/.github/seed-issues/05_mart.md
index 9f8574e..9bac54a 100644
--- a/.github/seed-issues/05_mart.md
+++ b/.github/seed-issues/05_mart.md
@@ -19,7 +19,7 @@ Produrre uno o piu mart orientati a KPI e output finali, con tabella/e e validat
 ## Checklist
 
 - [ ] Creare o aggiornare `sql/mart/<table>.sql` per ogni tabella dichiarata
-- [ ] Allineare `mart.tables`, `mart.required_tables` e `mart.validate` in `dataset.yml`
+- [ ] Allineare `mart.tables` in `dataset.yml` e aggiungere eventuali regole di validazione supportate dal toolkit
 - [ ] Eseguire `toolkit run mart --config dataset.yml --year <year>`
 - [ ] Eseguire `toolkit validate --config dataset.yml --year <year>`
 - [ ] Verificare required columns, chiavi, `not_null`, `min_rows` e KPI sanity
@@ -29,9 +29,14 @@ Produrre uno o piu mart orientati a KPI e output finali, con tabella/e e validat
 
 Mart pronti per dashboard o report, con SQL separata per tabella e regole di validazione chiare.
 
+## Supporto operativo
+
+- notebook consigliato: `notebooks/03_explore_mart.ipynb`
+- comando minimo: `toolkit run mart --config dataset.yml`
+
 ## File da toccare
 
-- `sql/mart/project_summary.sql`
+- `sql/mart/<table>.sql`
 - `dataset.yml`
 - `docs/data_dictionary.md`
 - `docs/decisions.md`
@@ -39,6 +44,6 @@ Mart pronti per dashboard o report, con SQL separata per tabella e regole di val
 ## Acceptance criteria
 
 - ogni tabella dichiarata in `mart.tables[]` ha un file SQL dedicato
-- `mart.required_tables` e `mart.validate.table_rules` sono coerenti con le tabelle pubblicate
+- eventuali regole MART dichiarate in `dataset.yml` sono coerenti con le tabelle pubblicate
 - rowcount sanity e duplicate check sono stati eseguiti
 - il progetto puo passare a QA con mart leggibili e validabili
diff --git a/.github/seed-issues/06_validation_qa.md b/.github/seed-issues/06_validation_qa.md
index 564aca9..50232e1 100644
--- a/.github/seed-issues/06_validation_qa.md
+++ b/.github/seed-issues/06_validation_qa.md
@@ -18,10 +18,10 @@ Chiudere il gate tecnico di qualita con contract tests verdi, validazioni datase
 
 ## Checklist
 
-- [ ] Eseguire `pytest tests/test_contract.py`
+- [ ] Eseguire `py -m pytest tests/test_contract.py`
 - [ ] Verificare che la CI `contract` sia verde
 - [ ] Verificare che la CI `smoke` sia documentata e attivabile con `RUN_SMOKE=1`
-- [ ] Rieseguire `toolkit validate --config dataset.yml --year <year>` se disponibile
+- [ ] Rieseguire `toolkit validate all --config dataset.yml`
 - [ ] Controllare outlier, rowcount sanity, duplicates e coerenza dei KPI
 - [ ] Aprire issue residue per anomalie non bloccanti
 
@@ -29,6 +29,11 @@ Chiudere il gate tecnico di qualita con contract tests verdi, validazioni datase
 
 Gate QA superato, con standard minimo del Lab rispettato e stato di qualita esplicito.
 
+## Supporto operativo
+
+- notebook consigliato: `notebooks/04_quality_checks.ipynb`
+- comando di stato: `toolkit status --dataset <dataset> --year <year> --latest --config dataset.yml`
+
 ## File da toccare
 
 - `tests/test_contract.py`
diff --git a/.github/seed-issues/07_dashboard.md b/.github/seed-issues/07_dashboard.md
index c7b6e4e..f0cd351 100644
--- a/.github/seed-issues/07_dashboard.md
+++ b/.github/seed-issues/07_dashboard.md
@@ -28,6 +28,10 @@ Preparare un output pubblico che consumi i mart prodotti dal progetto.
 
 Dashboard, report o pagina pubblica leggibile e coerente con i mart del progetto.
 
+## Supporto operativo
+
+- notebook consigliato: `notebooks/05_dashboard_export.ipynb`
+
 ## File da toccare
 
 - `dashboard/README.md`
diff --git a/.github/seed-issues/08_release.md b/.github/seed-issues/08_release.md
index be37dd2..2b8b517 100644
--- a/.github/seed-issues/08_release.md
+++ b/.github/seed-issues/08_release.md
@@ -42,6 +42,11 @@ Portare il progetto a una release riproducibile, spiegabile e pronta per handoff
 
 Release interna o pubblica con documentazione finale coerente con i dati e con i mart prodotti.
 
+## Supporto operativo
+
+- notebook consigliato: `notebooks/00_quickstart.ipynb`
+- comandi minimi: `py -m pytest tests/test_contract.py` e `toolkit validate all --config dataset.yml`
+
 ## File da toccare
 
 - `README.md`
diff --git a/.github/seed-issues/09_docs_decisions_dictionary.md b/.github/seed-issues/09_docs_decisions_dictionary.md
index 1a56619..ed4e8b5 100644
--- a/.github/seed-issues/09_docs_decisions_dictionary.md
+++ b/.github/seed-issues/09_docs_decisions_dictionary.md
@@ -28,6 +28,10 @@ Tenere allineata la documentazione strutturata del dataset durante tutto il life
 
 Decision log e data dictionary completi, utili per review, handoff e manutenzione futura.
 
+## Supporto operativo
+
+- notebook consigliato: `notebooks/02_inspect_clean.ipynb` e `notebooks/03_explore_mart.ipynb`
+
 ## File da toccare
 
 - `docs/decisions.md`
diff --git a/.github/seed-issues/10_maintenance.md b/.github/seed-issues/10_maintenance.md
index c1df356..5fffb28 100644
--- a/.github/seed-issues/10_maintenance.md
+++ b/.github/seed-issues/10_maintenance.md
@@ -29,11 +29,16 @@ Definire il lavoro necessario per mantenere il dataset nel tempo quando cambiano
 
 Piano di manutenzione chiaro e procedimento ripetibile per evolvere il dataset senza rompere il contratto del template.
 
+## Supporto operativo
+
+- notebook consigliato: `notebooks/01_inspect_raw.ipynb`, `notebooks/02_inspect_clean.ipynb`, `notebooks/03_explore_mart.ipynb`
+- comandi minimi: `py -m pytest tests/test_contract.py`, `toolkit run all --config dataset.yml`, `toolkit validate all --config dataset.yml`
+
 ## File da toccare
 
 - `dataset.yml`
 - `sql/clean.sql`
-- `sql/mart/project_summary.sql`
+- `sql/mart/<table>.sql`
 - `docs/sources.md`
 - `docs/data_dictionary.md`
 - `docs/decisions.md`
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a78e7ee..36f5faf 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -39,7 +39,6 @@ jobs:
         run: |
           test -f dataset.yml
           test -f sql/clean.sql
-          test -f sql/mart/project_summary.sql
           test -f scripts/smoke.sh
 
       - name: Run contract tests
@@ -75,7 +74,7 @@ jobs:
       - name: Export DCL_ROOT
         run: echo "DCL_ROOT=${GITHUB_WORKSPACE}" >> "${GITHUB_ENV}"
 
-      - name: Smoke run raw clean mart validate
+      - name: Smoke run all validate all status
         run: sh scripts/smoke.sh
 
       - name: Upload minimal artifacts
@@ -84,6 +83,8 @@ jobs:
         with:
           name: toolkit-artifacts
           path: |
-            _runs/**/*.json
-            _runs/**/logs/**
+            _smoke_out/data/_runs/**/*.json
+            _smoke_out/data/raw/**/*.json
+            _smoke_out/data/clean/**/*.json
+            _smoke_out/data/mart/**/*.json
           if-no-files-found: warn
diff --git a/.gitignore b/.gitignore
index 2d1ba3f..32a1477 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,6 +11,10 @@ data/**
 _runs/
 _runs/**
 !_runs/.gitkeep
+_smoke_out/
+_smoke_out/**
+_test_out/
+_test_out/**
 
 # eventuali export temporanei generati dal toolkit
 _runs/**/*.csv
diff --git a/README.md b/README.md
index f5e87c7..f3edb3b 100644
--- a/README.md
+++ b/README.md
@@ -73,6 +73,24 @@ spiega il contesto, il territorio o l’anno che ti interessa e cosa vuoi capire
 * `docs/contributing.md` — come contribuire
 
 
+## Confine con il toolkit
+
+Questo repository contiene il contratto del dataset:
+
+* configurazione in `dataset.yml`
+* trasformazioni SQL in `sql/`
+* test di contratto e documentazione locale
+* notebook leggeri per ispezione degli output
+
+Il motore della pipeline vive nel repository **Toolkit DataCivicLab**.
+Questa repo non replica la logica di esecuzione del toolkit: definisce input, regole e output attesi per questo dataset.
+
+In pratica:
+
+* bug o feature della CLI, runner, validazioni runtime e metadata di run → repo `toolkit`
+* bug o modifiche a fonti, mapping, SQL, mart, docs e notebook di dataset → questa repo
+
+
 ## 🧭 Roadmap
 
 La roadmap è gestita con **issue + milestone**.
@@ -82,6 +100,9 @@ La roadmap è gestita con **issue + milestone**.
 
 Questo repository è un modello per progetti dataset DataCivicLab.
 
+`dataset.yml` in root è un esempio eseguibile completo, utile per smoke e onboarding.
+Chi clona questo template deve adattarlo al dataset reale, non copiarlo come contratto finale immutabile.
+
 Per adattarlo a un nuovo dataset:
 
 1. aggiorna la domanda civica e gli esempi di insight
@@ -96,13 +117,47 @@ La struttura resta invariata.
 
 ```bash
 pip install dataciviclab-toolkit
-toolkit run --dataset dataset.yml
+toolkit run all --config dataset.yml
+toolkit validate all --config dataset.yml
 ```
 
-Per dettagli tecnici (CLI, configurazione, validazioni, run metadata)
+Se lavori con un checkout locale del toolkit, installalo in editable e poi esegui i comandi da questa repo.
+
+Per dettagli tecnici su CLI, configurazione supportata, validazioni runtime e run metadata,
 vedi il repository **Toolkit DataCivicLab**.
 
 
+## Archivio Pubblico
+
+Se il progetto pubblica artifact in un archivio pubblico DataCivicLab su Drive, il flusso consigliato e:
+
+1. eseguire e validare la pipeline in locale
+2. verificare gli output sotto `root/data/...`
+3. pubblicare solo gli artifact pubblici con uno script separato
+
+Esempio:
+
+```bash
+py scripts/publish_to_drive.py --config dataset.yml --drive-root "G:\\DataCivicLab" --dry-run
+py scripts/publish_to_drive.py --config dataset.yml --drive-root "G:\\DataCivicLab" --year 2022
+```
+
+Per default lo script pubblica:
+
+* payload RAW
+* metadata, manifest e validation di `raw`, `clean`, `mart`
+* parquet CLEAN
+* parquet MART
+* ultimo run record
+
+La destinazione su Drive mantiene lo stesso path relativo degli output del toolkit sotto `root`.
+
+Esempio:
+
+* locale: `root/data/mart/<dataset>/<year>/mart_ok.parquet`
+* Drive: `<drive-root>/data/mart/<dataset>/<year>/mart_ok.parquet`
+
+
 ## 🌍 DataCivicLab
 
 Parte del progetto DataCivicLab.
diff --git a/WORKFLOW.md b/WORKFLOW.md
index ac62d54..2695a4b 100644
--- a/WORKFLOW.md
+++ b/WORKFLOW.md
@@ -14,9 +14,24 @@ Come contribuire in modo semplice a un progetto dataset DataCivicLab.
 - standard Lab, DoD e release policy: [docs/lab_links.md](docs/lab_links.md)
 - indice docs locali: [docs/README.md](docs/README.md)
 
+## Confine tecnico
+
+- questa repo contiene config dataset, SQL, docs, test di contratto e notebook
+- il motore di esecuzione della pipeline sta nel repo toolkit
+- se il problema riguarda run, CLI o comportamento interno del motore, aprilo nel toolkit
+- se il problema riguarda fonti, mapping, mart o documentazione del dataset, aprilo qui
+
 ## Flusso minimo
 
 1. apri una domanda, un feedback o una correzione
 2. scegli una issue o aprine una nuova
 3. lavora su branch dedicato
 4. apri una PR piccola e leggibile
+
+## Flusso tecnico minimo
+
+1. valida la config con `py -m pytest tests/test_contract.py`
+2. esegui `toolkit run all --config dataset.yml`
+3. esegui `toolkit validate all --config dataset.yml`
+4. usa i notebook per ispezionare RAW, CLEAN, MART e QA
+5. se il progetto ha un archivio pubblico, pubblica gli artifact con `py scripts/publish_to_drive.py`
diff --git a/dashboard/README.md b/dashboard/README.md
index 9512581..b4277ed 100644
--- a/dashboard/README.md
+++ b/dashboard/README.md
@@ -14,3 +14,4 @@ Qui vanno link, note di lettura, screenshot e limiti dell'output, non i dati.
 ## Coerenza con i mart
 
 Ogni dashboard dovrebbe essere collegata ai mart documentati e aggiornati del progetto.
+Se il progetto usa un archivio pubblico su Drive, documenta qui quali file pubblicati alimentano la dashboard.
diff --git a/dataset.yml b/dataset.yml
index ecd4266..07b4636 100644
--- a/dataset.yml
+++ b/dataset.yml
@@ -1,70 +1,70 @@
-# Toolkit-first dataset contract aligned with the current smoke suite.
-# Path policy:
-# - use POSIX separators
-# - keep every path root-relative
-# - no absolute paths
-# Root resolution:
-# 1. dataset.yml root
-# 2. DCL_ROOT
-# 3. base_dir
-
-root: "./_runs"
+schema_version: 1
+root: "./_smoke_out"
 
 dataset:
-  name: "project_template"
-  years: [2024]
-
-validation:
-  fail_on_error: true
+  name: "bdap_http_csv"
+  years: [2022]
 
 raw:
-  source:
-    type: "http_file"
-    args:
-      url: "https://example.org/datasets/project_template_2024.csv"
-      filename: "project_template_{year}.csv"
+  output_policy: "versioned"
+  sources:
+    - name: "bdap_csv"
+      type: "http_file"
+      args:
+        url: "https://bdap-opendata.rgs.mef.gov.it/export/csv/Rendiconto-Pubblicato---Serie-storica---Saldi.csv"
+        filename: "bdap_rendiconto_saldi_{year}.csv"
+      primary: true
 
 clean:
   sql: "sql/clean.sql"
-  required_columns:
-    - year
-    - entity_id
-    - metric_value
+  read_mode: "fallback"
   read:
-    source: config_only
-    mode: latest
-    delim: ","
-    header: true
+    source: "config_only"
+    mode: "explicit"
+    include:
+      - "bdap_rendiconto_saldi_*.csv"
+    delim: ";"
+    decimal: "."
     encoding: "utf-8"
-    trim_whitespace: true
+    header: true
     columns: null
+  required_columns:
+    - "anno"
+    - "saldo_netto"
+    - "indebitamento_netto"
+    - "avanzo_primario"
+    - "entrate_finali"
+    - "spese_finali"
   validate:
     primary_key:
-      - entity_id
-      - year
+      - "anno"
     not_null:
-      - year
-      - entity_id
+      - "anno"
     min_rows: 1
 
 mart:
-  required_tables:
-    - project_summary
   tables:
-    - name: "project_summary"
-      sql: "sql/mart/project_summary.sql"
+    - name: "mart_ok"
+      sql: "sql/mart/mart_ok.sql"
+  required_tables:
+    - "mart_ok"
   validate:
     table_rules:
-      project_summary:
+      mart_ok:
         required_columns:
-          - year
-          - rows_in_year
-          - total_metric_value
-        not_null:
-          - year
+          - "anno"
+          - "saldo_netto"
+          - "entrate_finali"
+          - "spese_finali"
         primary_key:
-          - year
+          - "anno"
+        not_null:
+          - "anno"
         min_rows: 1
 
+validation:
+  fail_on_error: true
+
 output:
-  artifacts: minimal
+  artifacts: "minimal"
+  legacy_aliases: true
diff --git a/docs/README.md b/docs/README.md
index 91e5184..abcaf67 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -3,6 +3,9 @@
 Questa cartella contiene i documenti locali, specifici di questo dataset.
 Per standard del Lab vedi [lab_links.md](lab_links.md).
 
+Il motore della pipeline non vive qui: questa documentazione descrive il dataset e il suo contratto verso il toolkit.
+Le decisioni operative sul run reale devono restare coerenti con la CLI e con lo schema config del toolkit.
+
 ## Essenziali
 
 - [overview.md](overview.md)
diff --git a/docs/contributing.md b/docs/contributing.md
index 1f9094a..2d54479 100644
--- a/docs/contributing.md
+++ b/docs/contributing.md
@@ -10,23 +10,90 @@ Prerequisiti:
 - avere Python e il toolkit disponibili nel proprio ambiente, oppure un checkout locale del toolkit
 - lavorare sempre dalla root del progetto
 
+Questa repo contiene configurazione dataset, SQL, documentazione e test di contratto.
+Il motore della pipeline sta nel repo toolkit.
+
 ## Contract tests
 
 Esegui sempre prima:
 
 ```sh
-pytest tests/test_contract.py
+py -m pytest tests/test_contract.py
 ```
 
+Questi test non verificano il motore del toolkit.
+Verificano che questa repo esponga un contratto coerente per il dataset: file dichiarati, path, tabelle mart e struttura minima della config.
+
 ## Smoke locale
 
 Per uno smoke test end-to-end:
 
 ```sh
-sh scripts/smoke.sh 2024
+sh scripts/smoke.sh
 ```
 
 Se il toolkit non è nel `PATH`, usa il fallback documentato nello script.
+Se lo smoke fallisce per un problema del motore, apri il bug nel repo toolkit.
+Se fallisce per config, SQL o assunzioni sul dato, correggi questa repo.
+
+Su Windows, se `sh` non è disponibile nel `PATH`, usa una shell POSIX come Git Bash oppure esegui i comandi toolkit equivalenti:
+
+```powershell
+toolkit run all --config dataset.yml
+toolkit validate all --config dataset.yml
+toolkit status --dataset <dataset> --year <year> --latest --config dataset.yml
+```
+
+## Publish su Drive
+
+Se il progetto usa un archivio pubblico su Drive, la pubblicazione va fatta dopo `run all` e `validate all`, non durante il run.
+
+Dry-run:
+
+```powershell
+py scripts/publish_to_drive.py --config dataset.yml --drive-root "G:\DataCivicLab" --dry-run
+```
+
+Publish di un anno:
+
+```powershell
+py scripts/publish_to_drive.py --config dataset.yml --drive-root "G:\DataCivicLab" --year 2022
+```
+
+Lo script pubblica per default payload RAW, metadata, manifest e validation di `raw`, `clean`, `mart`, i parquet CLEAN/MART e l'ultimo run record.
+La destinazione su Drive mantiene gli stessi path relativi sotto `root`, quindi pubblica sotto `<drive-root>/data/...`.
+
+## Comandi canonici toolkit
+
+```sh
+toolkit run all --config dataset.yml
+toolkit run raw --config dataset.yml
+toolkit run clean --config dataset.yml
+toolkit run mart --config dataset.yml
+toolkit validate all --config dataset.yml
+toolkit status --dataset <dataset> --year <year> --latest --config dataset.yml
+```
+
+## Fasi operative
+
+- kickoff e contratto: `dataset.yml`, `README.md`, `tests/test_contract.py`
+- sources e raw: `dataset.yml`, `docs/sources.md`, `docs/decisions.md`, `notebooks/01_inspect_raw.ipynb`
+- clean: `sql/clean.sql`, `dataset.yml`, `notebooks/02_inspect_clean.ipynb`
+- mart: `sql/mart/*.sql`, `dataset.yml`, `notebooks/03_explore_mart.ipynb`
+- qa: `tests/test_contract.py`, `.github/workflows/ci.yml`, `notebooks/04_quality_checks.ipynb`
+- dashboard/export: `dashboard/`, `README.md`, `notebooks/05_dashboard_export.ipynb`
+
+## Checklist lifecycle
+
+| Fase | File principali | Comando minimo | Notebook |
+|---|---|---|---|
+| Kickoff | `dataset.yml`, `README.md` | `py -m pytest tests/test_contract.py` | `00_quickstart.ipynb` |
+| Sources/RAW | `dataset.yml`, `docs/sources.md`, `docs/decisions.md` | `toolkit run raw --config dataset.yml` | `01_inspect_raw.ipynb` |
+| CLEAN | `sql/clean.sql`, `dataset.yml`, `docs/data_dictionary.md` | `toolkit run clean --config dataset.yml` | `02_inspect_clean.ipynb` |
+| MART | `sql/mart/*.sql`, `dataset.yml` | `toolkit run mart --config dataset.yml` | `03_explore_mart.ipynb` |
+| QA | `tests/test_contract.py`, `.github/workflows/ci.yml` | `toolkit validate all --config dataset.yml` | `04_quality_checks.ipynb` |
+| Output pubblico | `dashboard/`, `README.md`, `scripts/publish_to_drive.py` | `py scripts/publish_to_drive.py --config dataset.yml --drive-root "<drive>" --dry-run` | `05_dashboard_export.ipynb` |
+| Release | `README.md`, `docs/overview.md`, `docs/data_dictionary.md` | `toolkit status --dataset <dataset> --year <year> --latest --config dataset.yml` | `00_quickstart.ipynb` |
 
 ## Regole veloci
 
@@ -39,7 +106,8 @@ Se il toolkit non è nel `PATH`, usa il fallback documentato nello script.
 
 - controlla che `docs/sources.md` e `docs/overview.md` siano coerenti
 - migliora una descrizione in `docs/data_dictionary.md`
-- esegui `pytest tests/test_contract.py` e segnala eventuali problemi
+- esegui `py -m pytest tests/test_contract.py` e segnala eventuali problemi
+- apri i notebook per ispezionare gli output generati dal toolkit, senza aggiungere logica di pipeline qui
 
 ## Poi dove vado?
 
diff --git a/notebooks/00_quickstart.ipynb b/notebooks/00_quickstart.ipynb
index 261f93f..0f1f20f 100644
--- a/notebooks/00_quickstart.ipynb
+++ b/notebooks/00_quickstart.ipynb
@@ -6,11 +6,9 @@
    "source": [
     "# 00 Quickstart\n",
     "\n",
-    "## Cosa fa / Cosa NON fa\n",
-    "\n",
-    "- esegue la pipeline solo se abiliti esplicitamente `RUN_TOOLKIT = True`\n",
-    "- cerca un mart leggibile e mostra schema, anteprima e controlli minimi\n",
-    "- non scrive file e non assume output già presenti in una cartella specifica"
+    "- legge `../dataset.yml`\n",
+    "- mostra i path reali attesi dal toolkit\n",
+    "- esegue il run solo se abiliti esplicitamente `RUN_TOOLKIT = True`"
    ]
   },
   {
@@ -20,81 +18,35 @@
    "outputs": [],
    "source": [
     "from pathlib import Path\n",
+    "import shutil\n",
     "import subprocess\n",
-    "import duckdb\n",
+    "import yaml\n",
     "\n",
     "ROOT = Path('.').resolve()\n",
-    "DATASET_YML = (ROOT / '..' / 'dataset.yml').resolve()\n",
-    "MART_DIRS = [\n",
-    "    (ROOT / '..' / 'data' / 'mart').resolve(),\n",
-    "    (ROOT / '..' / '_runs').resolve(),\n",
-    "]\n",
-    "TABLE_NAME = 'project_summary'\n",
+    "DATASET_YML = (ROOT / 'dataset.yml').resolve() if (ROOT / 'dataset.yml').exists() else (ROOT / '..' / 'dataset.yml').resolve()\n",
+    "CFG = yaml.safe_load(DATASET_YML.read_text(encoding='utf-8'))\n",
+    "BASE_DIR = DATASET_YML.parent\n",
+    "OUT_ROOT = (BASE_DIR / CFG.get('root', '.')).resolve()\n",
+    "DATASET = CFG['dataset']['name']\n",
+    "YEARS = CFG['dataset']['years']\n",
+    "YEAR_INDEX = 0\n",
+    "YEAR = YEARS[YEAR_INDEX] if YEARS and 0 <= YEAR_INDEX < len(YEARS) else YEARS[0]\n",
+    "MART_TABLES = [table['name'] for table in CFG.get('mart', {}).get('tables', [])]\n",
     "RUN_TOOLKIT = False\n",
-    "ROOT, DATASET_YML"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cmd = ['toolkit', 'run', '--dataset', str(DATASET_YML)]\n",
-    "if RUN_TOOLKIT:\n",
-    "    print('Running:', ' '.join(cmd))\n",
-    "    subprocess.run(cmd, check=True)\n",
-    "else:\n",
-    "    print('Toolkit run disabled.')\n",
-    "    print('Set RUN_TOOLKIT = True to execute the pipeline from this notebook.')\n",
-    "    print('Expected command:', ' '.join(cmd))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def find_mart_candidates(table_name):\n",
-    "    patterns = [\n",
-    "        f'**/{table_name}.parquet',\n",
-    "        f'**/*{table_name}*.parquet',\n",
-    "    ]\n",
-    "    found = []\n",
-    "    for base in MART_DIRS:\n",
-    "        if not base.exists():\n",
-    "            continue\n",
-    "        for pattern in patterns:\n",
-    "            found.extend(sorted(base.glob(pattern)))\n",
-    "    unique = []\n",
-    "    seen = set()\n",
-    "    for path in found:\n",
-    "        key = str(path)\n",
-    "        if key not in seen:\n",
-    "            seen.add(key)\n",
-    "            unique.append(path)\n",
-    "    return unique\n",
+    "CLI_PREFIX = ['toolkit'] if shutil.which('toolkit') else ['py', '-m', 'toolkit.cli.app']\n",
+    "RUN_CMD = CLI_PREFIX + ['run', 'all', '--config', str(DATASET_YML)]\n",
+    "VALIDATE_CMD = CLI_PREFIX + ['validate', 'all', '--config', str(DATASET_YML)]\n",
     "\n",
-    "mart_files = find_mart_candidates(TABLE_NAME)\n",
-    "mart_files[:5]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con = duckdb.connect()\n",
-    "mart_path = str(mart_files[0]) if mart_files else None\n",
-    "if mart_path:\n",
-    "    schema_df = con.execute(f\"DESCRIBE SELECT * FROM read_parquet('{mart_path}')\").df()\n",
-    "    head_df = con.execute(f\"SELECT * FROM read_parquet('{mart_path}') LIMIT 10\").df()\n",
-    "    display(schema_df)\n",
-    "    display(head_df)\n",
-    "else:\n",
-    "    print('No mart parquet found. Run the pipeline first or update TABLE_NAME.')"
+    "{\n",
+    "    'DATASET_YML': str(DATASET_YML),\n",
+    "    'OUT_ROOT': str(OUT_ROOT),\n",
+    "    'DATASET': DATASET,\n",
+    "    'YEARS': YEARS,\n",
+    "    'YEAR_INDEX': YEAR_INDEX,\n",
+    "    'YEAR': YEAR,\n",
+    "    'MART_TABLES': MART_TABLES,\n",
+    "    'CLI_PREFIX': CLI_PREFIX,\n",
+    "}"
    ]
   },
   {
@@ -103,29 +55,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def read_schema(path):\n",
-    "    schema = con.execute(f\"DESCRIBE SELECT * FROM read_parquet('{path}')\").df()\n",
-    "    schema.columns = [str(col).lower() for col in schema.columns]\n",
-    "    return schema\n",
-    "\n",
-    "def choose_columns(schema):\n",
-    "    name_col = 'column_name' if 'column_name' in schema.columns else schema.columns[0]\n",
-    "    type_col = 'column_type' if 'column_type' in schema.columns else schema.columns[1]\n",
-    "    rows = [\n",
-    "        {'name': str(row[name_col]), 'type': str(row[type_col]).upper()}\n",
-    "        for _, row in schema.iterrows()\n",
-    "    ]\n",
-    "    year_col = next((r['name'] for r in rows if r['name'].lower() == 'year' or 'anno' in r['name'].lower()), None)\n",
-    "    numeric_rows = [r for r in rows if any(token in r['type'] for token in ['INT', 'DECIMAL', 'DOUBLE', 'FLOAT', 'REAL', 'BIGINT'])]\n",
-    "    metric_col = next((r['name'] for r in numeric_rows if any(token in r['name'].lower() for token in ['value', 'tot', 'importo', 'ammontare', 'pct', 'percent'])), None)\n",
-    "    if metric_col is None and numeric_rows:\n",
-    "        metric_col = numeric_rows[0]['name']\n",
-    "    return year_col, metric_col\n",
-    "\n",
-    "if mart_path:\n",
-    "    schema_df = read_schema(mart_path)\n",
-    "    YEAR_COL, METRIC_COL = choose_columns(schema_df)\n",
-    "    print({'YEAR_COL': YEAR_COL, 'METRIC_COL': METRIC_COL})"
+    "expected_paths = {\n",
+    "    'raw_dir': str(OUT_ROOT / 'data' / 'raw' / DATASET / str(YEAR)),\n",
+    "    'clean_dir': str(OUT_ROOT / 'data' / 'clean' / DATASET / str(YEAR)),\n",
+    "    'mart_dir': str(OUT_ROOT / 'data' / 'mart' / DATASET / str(YEAR)),\n",
+    "    'run_dir': str(OUT_ROOT / 'data' / '_runs' / DATASET / str(YEAR)),\n",
+    "}\n",
+    "expected_paths"
    ]
   },
   {
@@ -134,25 +70,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "if mart_path:\n",
-    "    row_count = con.execute(f\"SELECT COUNT(*) AS row_count FROM read_parquet('{mart_path}')\").df()\n",
-    "    display(row_count)\n",
-    "\n",
-    "    if YEAR_COL:\n",
-    "        distinct_count = con.execute(\n",
-    "            f\"SELECT COUNT(DISTINCT {YEAR_COL}) AS distinct_key_count FROM read_parquet('{mart_path}')\"\n",
-    "        ).df()\n",
-    "        display(distinct_count)\n",
-    "    else:\n",
-    "        print('No year-like key detected. Update the helper or inspect schema_df manually.')\n",
+    "print('Run command:', ' '.join(RUN_CMD))\n",
+    "print('Validate command:', ' '.join(VALIDATE_CMD))\n",
     "\n",
-    "    check_columns = [col for col in [YEAR_COL, METRIC_COL] if col]\n",
-    "    if check_columns:\n",
-    "        null_expr = ', '.join([f\"AVG(CASE WHEN {col} IS NULL THEN 1 ELSE 0 END) AS {col}_null_rate\" for col in check_columns])\n",
-    "        null_rates = con.execute(f\"SELECT {null_expr} FROM read_parquet('{mart_path}')\").df()\n",
-    "        display(null_rates)\n",
-    "    else:\n",
-    "        print('No suitable columns found for null-rate sanity checks.')"
+    "if RUN_TOOLKIT:\n",
+    "    subprocess.run(RUN_CMD, check=True)\n",
+    "    subprocess.run(VALIDATE_CMD, check=True)\n",
+    "else:\n",
+    "    print('Toolkit run disabled. Set RUN_TOOLKIT = True to execute the pipeline.')"
    ]
   }
  ],
diff --git a/notebooks/01_explore_mart.ipynb b/notebooks/01_explore_mart.ipynb
deleted file mode 100644
index 7ad159b..0000000
--- a/notebooks/01_explore_mart.ipynb
+++ /dev/null
@@ -1,140 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# 01 Explore MART\n",
-    "\n",
-    "## Cosa fa / Cosa NON fa\n",
-    "\n",
-    "- apre una tabella mart e ne mostra una lettura iniziale\n",
-    "- prova a scegliere automaticamente una colonna anno e una metrica numerica\n",
-    "- se non trova colonne adatte, salta le query dipendenti e mostra istruzioni"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from pathlib import Path\n",
-    "import duckdb\n",
-    "\n",
-    "ROOT = Path('.').resolve()\n",
-    "TABLE_NAME = 'project_summary'\n",
-    "MART_GLOBS = [\n",
-    "    (ROOT / '..' / 'data' / 'mart').resolve(),\n",
-    "    (ROOT / '..' / '_runs').resolve(),\n",
-    "]\n",
-    "\n",
-    "def first_match(table_name):\n",
-    "    for base in MART_GLOBS:\n",
-    "        if not base.exists():\n",
-    "            continue\n",
-    "        for path in sorted(base.glob(f'**/*{table_name}*.parquet')):\n",
-    "            return path\n",
-    "    return None\n",
-    "\n",
-    "mart_path = first_match(TABLE_NAME)\n",
-    "mart_path"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Descrizione breve:\n",
-    "\n",
-    "- questo notebook serve a capire cosa c'è nella tabella finale\n",
-    "- aggiorna `TABLE_NAME` se il dataset usa un altro mart principale"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con = duckdb.connect()\n",
-    "\n",
-    "def read_schema(path):\n",
-    "    schema = con.execute(f\"DESCRIBE SELECT * FROM read_parquet('{path}')\").df()\n",
-    "    schema.columns = [str(col).lower() for col in schema.columns]\n",
-    "    return schema\n",
-    "\n",
-    "def choose_columns(schema):\n",
-    "    name_col = 'column_name' if 'column_name' in schema.columns else schema.columns[0]\n",
-    "    type_col = 'column_type' if 'column_type' in schema.columns else schema.columns[1]\n",
-    "    rows = [\n",
-    "        {'name': str(row[name_col]), 'type': str(row[type_col]).upper()}\n",
-    "        for _, row in schema.iterrows()\n",
-    "    ]\n",
-    "    year_col = next((r['name'] for r in rows if r['name'].lower() == 'year' or 'anno' in r['name'].lower()), None)\n",
-    "    numeric_rows = [r for r in rows if any(token in r['type'] for token in ['INT', 'DECIMAL', 'DOUBLE', 'FLOAT', 'REAL', 'BIGINT'])]\n",
-    "    metric_col = next((r['name'] for r in numeric_rows if any(token in r['name'].lower() for token in ['value', 'tot', 'importo', 'ammontare', 'pct', 'percent'])), None)\n",
-    "    if metric_col is None and numeric_rows:\n",
-    "        metric_col = numeric_rows[0]['name']\n",
-    "    return year_col, metric_col\n",
-    "\n",
-    "if mart_path:\n",
-    "    schema_df = read_schema(str(mart_path))\n",
-    "    YEAR_COL, METRIC_COL = choose_columns(schema_df)\n",
-    "    preview = con.execute(f\"SELECT * FROM read_parquet('{mart_path}') LIMIT 20\").df()\n",
-    "    display(schema_df)\n",
-    "    display(preview)\n",
-    "    print({'YEAR_COL': YEAR_COL, 'METRIC_COL': METRIC_COL})\n",
-    "else:\n",
-    "    print('No mart parquet found. Run the pipeline first or update TABLE_NAME.')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "if mart_path and YEAR_COL and METRIC_COL:\n",
-    "    by_year = con.execute(\n",
-    "        f\"SELECT {YEAR_COL} AS year_like, COUNT(*) AS rows, SUM({METRIC_COL}) AS metric_total FROM read_parquet('{mart_path}') GROUP BY 1 ORDER BY 1\"\n",
-    "    ).df()\n",
-    "    display(by_year)\n",
-    "else:\n",
-    "    print('No year-like column or metric column detected. Update TABLE_NAME or inspect schema_df manually.')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "if mart_path and METRIC_COL:\n",
-    "    top_rows = con.execute(\n",
-    "        f\"SELECT * FROM read_parquet('{mart_path}') ORDER BY {METRIC_COL} DESC NULLS LAST LIMIT 10\"\n",
-    "    ).df()\n",
-    "    bottom_rows = con.execute(\n",
-    "        f\"SELECT * FROM read_parquet('{mart_path}') ORDER BY {METRIC_COL} ASC NULLS LAST LIMIT 10\"\n",
-    "    ).df()\n",
-    "    display(top_rows)\n",
-    "    display(bottom_rows)\n",
-    "else:\n",
-    "    print('No metric column detected. Update the helper or inspect schema_df manually.')"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "name": "python",
-   "version": "3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/notebooks/01_inspect_raw.ipynb b/notebooks/01_inspect_raw.ipynb
new file mode 100644
index 0000000..d17e873
--- /dev/null
+++ b/notebooks/01_inspect_raw.ipynb
@@ -0,0 +1,119 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 01 Inspect RAW\n",
+    "\n",
+    "- apre il layer RAW reale del toolkit\n",
+    "- legge `manifest.json`, `metadata.json` e `raw_validation.json`\n",
+    "- prova a mostrare un sample del file primario dichiarato nel manifest"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "import json\n",
+    "import duckdb\n",
+    "import yaml\n",
+    "\n",
+    "ROOT = Path('.').resolve()\n",
+    "DATASET_YML = (ROOT / 'dataset.yml').resolve() if (ROOT / 'dataset.yml').exists() else (ROOT / '..' / 'dataset.yml').resolve()\n",
+    "CFG = yaml.safe_load(DATASET_YML.read_text(encoding='utf-8'))\n",
+    "BASE_DIR = DATASET_YML.parent\n",
+    "OUT_ROOT = (BASE_DIR / CFG.get('root', '.')).resolve()\n",
+    "DATASET = CFG['dataset']['name']\n",
+    "YEARS = CFG['dataset']['years']\n",
+    "YEAR_INDEX = 0\n",
+    "YEAR = YEARS[YEAR_INDEX] if YEARS and 0 <= YEAR_INDEX < len(YEARS) else YEARS[0]\n",
+    "RAW_DIR = OUT_ROOT / 'data' / 'raw' / DATASET / str(YEAR)\n",
+    "MANIFEST_PATH = RAW_DIR / 'manifest.json'\n",
+    "METADATA_PATH = RAW_DIR / 'metadata.json'\n",
+    "VALIDATION_PATH = RAW_DIR / 'raw_validation.json'\n",
+    "PROFILE_DIR = RAW_DIR / '_profile'\n",
+    "\n",
+    "{\n",
+    "    'YEARS': YEARS,\n",
+    "    'YEAR_INDEX': YEAR_INDEX,\n",
+    "    'RAW_DIR': str(RAW_DIR),\n",
+    "    'MANIFEST_EXISTS': MANIFEST_PATH.exists(),\n",
+    "    'METADATA_EXISTS': METADATA_PATH.exists(),\n",
+    "    'VALIDATION_EXISTS': VALIDATION_PATH.exists(),\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "manifest = json.loads(MANIFEST_PATH.read_text(encoding='utf-8')) if MANIFEST_PATH.exists() else {}\n",
+    "metadata = json.loads(METADATA_PATH.read_text(encoding='utf-8')) if METADATA_PATH.exists() else {}\n",
+    "validation = json.loads(VALIDATION_PATH.read_text(encoding='utf-8')) if VALIDATION_PATH.exists() else {}\n",
+    "\n",
+    "display(manifest)\n",
+    "display(metadata)\n",
+    "display(validation)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "primary_rel = manifest.get('primary_output_file') if manifest else None\n",
+    "primary_path = (RAW_DIR / primary_rel).resolve() if primary_rel else None\n",
+    "primary_path"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if primary_path and primary_path.exists():\n",
+    "    suffix = primary_path.suffix.lower()\n",
+    "    if suffix == '.csv':\n",
+    "        con = duckdb.connect()\n",
+    "        preview = con.execute(\n",
+    "            f\"SELECT * FROM read_csv_auto('{primary_path.as_posix()}', SAMPLE_SIZE=-1) LIMIT 20\"\n",
+    "        ).df()\n",
+    "        display(preview)\n",
+    "    elif suffix == '.parquet':\n",
+    "        con = duckdb.connect()\n",
+    "        preview = con.execute(f\"SELECT * FROM read_parquet('{primary_path.as_posix()}') LIMIT 20\").df()\n",
+    "        display(preview)\n",
+    "    else:\n",
+    "        print({'primary_output_file': str(primary_path), 'bytes': primary_path.stat().st_size, 'suffix': suffix})\n",
+    "else:\n",
+    "    print('Primary output file not found. Inspect manifest manually.')\n",
+    "\n",
+    "if PROFILE_DIR.exists():\n",
+    "    display(sorted(path.name for path in PROFILE_DIR.iterdir()))\n",
+    "else:\n",
+    "    print('No _profile directory found for this RAW run.')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/02_inspect_clean.ipynb b/notebooks/02_inspect_clean.ipynb
new file mode 100644
index 0000000..fb6d826
--- /dev/null
+++ b/notebooks/02_inspect_clean.ipynb
@@ -0,0 +1,102 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 02 Inspect CLEAN\n",
+    "\n",
+    "- apre il parquet CLEAN prodotto dal toolkit\n",
+    "- mostra schema, preview e sanity checks minimi\n",
+    "- aiuta a verificare `clean.required_columns` e `clean.validate`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "import duckdb\n",
+    "import yaml\n",
+    "\n",
+    "ROOT = Path('.').resolve()\n",
+    "DATASET_YML = (ROOT / 'dataset.yml').resolve() if (ROOT / 'dataset.yml').exists() else (ROOT / '..' / 'dataset.yml').resolve()\n",
+    "CFG = yaml.safe_load(DATASET_YML.read_text(encoding='utf-8'))\n",
+    "BASE_DIR = DATASET_YML.parent\n",
+    "OUT_ROOT = (BASE_DIR / CFG.get('root', '.')).resolve()\n",
+    "DATASET = CFG['dataset']['name']\n",
+    "YEARS = CFG['dataset']['years']\n",
+    "YEAR_INDEX = 0\n",
+    "YEAR = YEARS[YEAR_INDEX] if YEARS and 0 <= YEAR_INDEX < len(YEARS) else YEARS[0]\n",
+    "CLEAN_DIR = OUT_ROOT / 'data' / 'clean' / DATASET / str(YEAR)\n",
+    "CLEAN_PATH = CLEAN_DIR / f'{DATASET}_{YEAR}_clean.parquet'\n",
+    "REQUIRED_COLUMNS = CFG.get('clean', {}).get('required_columns', [])\n",
+    "PRIMARY_KEY = CFG.get('clean', {}).get('validate', {}).get('primary_key', [])\n",
+    "\n",
+    "{\n",
+    "    'YEARS': YEARS,\n",
+    "    'YEAR_INDEX': YEAR_INDEX,\n",
+    "    'CLEAN_PATH': str(CLEAN_PATH),\n",
+    "    'REQUIRED_COLUMNS': REQUIRED_COLUMNS,\n",
+    "    'PRIMARY_KEY': PRIMARY_KEY,\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "con = duckdb.connect()\n",
+    "\n",
+    "if CLEAN_PATH.exists():\n",
+    "    schema_df = con.execute(f\"DESCRIBE SELECT * FROM read_parquet('{CLEAN_PATH.as_posix()}')\").df()\n",
+    "    preview_df = con.execute(f\"SELECT * FROM read_parquet('{CLEAN_PATH.as_posix()}') LIMIT 20\").df()\n",
+    "    display(schema_df)\n",
+    "    display(preview_df)\n",
+    "else:\n",
+    "    print('CLEAN parquet not found. Run toolkit run clean --config dataset.yml first.')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if CLEAN_PATH.exists():\n",
+    "    row_count = con.execute(f\"SELECT COUNT(*) AS row_count FROM read_parquet('{CLEAN_PATH.as_posix()}')\").df()\n",
+    "    display(row_count)\n",
+    "\n",
+    "    if REQUIRED_COLUMNS:\n",
+    "        available = {row[0] for row in con.execute(f\"DESCRIBE SELECT * FROM read_parquet('{CLEAN_PATH.as_posix()}')\").fetchall()}\n",
+    "        print({'missing_required_columns': [col for col in REQUIRED_COLUMNS if col not in available]})\n",
+    "\n",
+    "    if PRIMARY_KEY:\n",
+    "        keys = ', '.join(PRIMARY_KEY)\n",
+    "        dup_df = con.execute(\n",
+    "            f\"SELECT {keys}, COUNT(*) AS dup_count FROM read_parquet('{CLEAN_PATH.as_posix()}') GROUP BY {keys} HAVING COUNT(*) > 1 LIMIT 20\"\n",
+    "        ).df()\n",
+    "        display(dup_df)\n",
+    "else:\n",
+    "    print('No CLEAN output available.')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/02_quality_checks.ipynb b/notebooks/02_quality_checks.ipynb
deleted file mode 100644
index ab512df..0000000
--- a/notebooks/02_quality_checks.ipynb
+++ /dev/null
@@ -1,134 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# 02 Quality checks\n",
-    "\n",
-    "## Cosa fa / Cosa NON fa\n",
-    "\n",
-    "- esegue controlli riutilizzabili su duplicati, missingness e range\n",
-    "- prova a rilevare colonne numeriche in modo automatico\n",
-    "- se mancano chiavi o colonne adatte, stampa istruzioni invece di fallire"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from pathlib import Path\n",
-    "import duckdb\n",
-    "\n",
-    "ROOT = Path('.').resolve()\n",
-    "TABLE_NAME = 'project_summary'\n",
-    "KEY_COLUMNS = []\n",
-    "NUMERIC_COLUMNS = []\n",
-    "\n",
-    "def find_mart(table_name):\n",
-    "    for base in [(ROOT / '..' / 'data' / 'mart').resolve(), (ROOT / '..' / '_runs').resolve()]:\n",
-    "        if not base.exists():\n",
-    "            continue\n",
-    "        matches = sorted(base.glob(f'**/*{table_name}*.parquet'))\n",
-    "        if matches:\n",
-    "            return matches[0]\n",
-    "    return None\n",
-    "\n",
-    "mart_path = find_mart(TABLE_NAME)\n",
-    "mart_path"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con = duckdb.connect()\n",
-    "\n",
-    "def read_schema(path):\n",
-    "    schema = con.execute(f\"DESCRIBE SELECT * FROM read_parquet('{path}')\").df()\n",
-    "    schema.columns = [str(col).lower() for col in schema.columns]\n",
-    "    return schema\n",
-    "\n",
-    "def detect_numeric_columns(schema):\n",
-    "    name_col = 'column_name' if 'column_name' in schema.columns else schema.columns[0]\n",
-    "    type_col = 'column_type' if 'column_type' in schema.columns else schema.columns[1]\n",
-    "    numeric = []\n",
-    "    for _, row in schema.iterrows():\n",
-    "        dtype = str(row[type_col]).upper()\n",
-    "        if any(token in dtype for token in ['INT', 'DECIMAL', 'DOUBLE', 'FLOAT', 'REAL', 'BIGINT']):\n",
-    "            numeric.append(str(row[name_col]))\n",
-    "    return numeric\n",
-    "\n",
-    "def duplicate_key_report(path, key_columns):\n",
-    "    keys = ', '.join(key_columns)\n",
-    "    sql = f\"\"\"\n",
-    "        SELECT {keys}, COUNT(*) AS dup_count\n",
-    "        FROM read_parquet('{path}')\n",
-    "        GROUP BY {keys}\n",
-    "        HAVING COUNT(*) > 1\n",
-    "        ORDER BY dup_count DESC\n",
-    "        LIMIT 20\n",
-    "    \"\"\"\n",
-    "    return con.execute(sql).df()\n",
-    "\n",
-    "def missingness_report(path):\n",
-    "    columns = [row[0] for row in con.execute(f\"DESCRIBE SELECT * FROM read_parquet('{path}')\").fetchall()]\n",
-    "    expr = ', '.join([f\"AVG(CASE WHEN {col} IS NULL THEN 1 ELSE 0 END) AS {col}_null_rate\" for col in columns])\n",
-    "    return con.execute(f\"SELECT {expr} FROM read_parquet('{path}')\").df().T.reset_index()\n",
-    "\n",
-    "def range_report(path, numeric_columns):\n",
-    "    expr = ', '.join([f\"MIN({col}) AS {col}_min, MAX({col}) AS {col}_max\" for col in numeric_columns])\n",
-    "    return con.execute(f\"SELECT {expr} FROM read_parquet('{path}')\").df().T.reset_index()\n",
-    "\n",
-    "if mart_path:\n",
-    "    schema_df = read_schema(str(mart_path))\n",
-    "    detected_numeric = detect_numeric_columns(schema_df)\n",
-    "    numeric_for_range = [col for col in NUMERIC_COLUMNS if col in detected_numeric]\n",
-    "    if not numeric_for_range:\n",
-    "        numeric_for_range = detected_numeric[:3]\n",
-    "    display(schema_df)\n",
-    "    print({'KEY_COLUMNS': KEY_COLUMNS, 'NUMERIC_COLUMNS': numeric_for_range})"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "if mart_path:\n",
-    "    if KEY_COLUMNS:\n",
-    "        dup_df = duplicate_key_report(str(mart_path), KEY_COLUMNS)\n",
-    "        display(dup_df)\n",
-    "    else:\n",
-    "        print('Duplicate-key check skipped: imposta KEY_COLUMNS.')\n",
-    "\n",
-    "    missing_df = missingness_report(str(mart_path))\n",
-    "    display(missing_df)\n",
-    "\n",
-    "    if numeric_for_range:\n",
-    "        range_df = range_report(str(mart_path), numeric_for_range)\n",
-    "        display(range_df)\n",
-    "    else:\n",
-    "        print('Range check skipped: no numeric columns detected.')"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "name": "python",
-   "version": "3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/notebooks/03_dashboard_export.ipynb b/notebooks/03_dashboard_export.ipynb
deleted file mode 100644
index 0b052b9..0000000
--- a/notebooks/03_dashboard_export.ipynb
+++ /dev/null
@@ -1,131 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# 03 Dashboard export\n",
-    "\n",
-    "## Cosa fa / Cosa NON fa\n",
-    "\n",
-    "- prepara un export di esempio partendo da un mart disponibile\n",
-    "- non scrive file finché `EXPORT = False`\n",
-    "- se non trova colonne adatte, esporta un campione generico come fallback"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from pathlib import Path\n",
-    "import duckdb\n",
-    "\n",
-    "ROOT = Path('.').resolve()\n",
-    "EXPORT = False\n",
-    "OUT_DIR = (ROOT / '..' / '_tmp').resolve()\n",
-    "TABLE_NAME = 'project_summary'\n",
-    "\n",
-    "def find_mart(table_name):\n",
-    "    for base in [(ROOT / '..' / 'data' / 'mart').resolve(), (ROOT / '..' / '_runs').resolve()]:\n",
-    "        if not base.exists():\n",
-    "            continue\n",
-    "        matches = sorted(base.glob(f'**/*{table_name}*.parquet'))\n",
-    "        if matches:\n",
-    "            return matches[0]\n",
-    "    return None\n",
-    "\n",
-    "mart_path = find_mart(TABLE_NAME)\n",
-    "OUT_DIR"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con = duckdb.connect()\n",
-    "\n",
-    "def read_schema(path):\n",
-    "    schema = con.execute(f\"DESCRIBE SELECT * FROM read_parquet('{path}')\").df()\n",
-    "    schema.columns = [str(col).lower() for col in schema.columns]\n",
-    "    return schema\n",
-    "\n",
-    "def choose_columns(schema):\n",
-    "    name_col = 'column_name' if 'column_name' in schema.columns else schema.columns[0]\n",
-    "    type_col = 'column_type' if 'column_type' in schema.columns else schema.columns[1]\n",
-    "    rows = [\n",
-    "        {'name': str(row[name_col]), 'type': str(row[type_col]).upper()}\n",
-    "        for _, row in schema.iterrows()\n",
-    "    ]\n",
-    "    year_col = next((r['name'] for r in rows if r['name'].lower() == 'year' or 'anno' in r['name'].lower()), None)\n",
-    "    numeric_rows = [r for r in rows if any(token in r['type'] for token in ['INT', 'DECIMAL', 'DOUBLE', 'FLOAT', 'REAL', 'BIGINT'])]\n",
-    "    metric_col = next((r['name'] for r in numeric_rows if any(token in r['name'].lower() for token in ['value', 'tot', 'importo', 'ammontare', 'pct', 'percent'])), None)\n",
-    "    if metric_col is None and numeric_rows:\n",
-    "        metric_col = numeric_rows[0]['name']\n",
-    "    return year_col, metric_col\n",
-    "\n",
-    "if mart_path:\n",
-    "    schema_df = read_schema(str(mart_path))\n",
-    "    YEAR_COL, METRIC_COL = choose_columns(schema_df)\n",
-    "    print({'YEAR_COL': YEAR_COL, 'METRIC_COL': METRIC_COL})\n",
-    "else:\n",
-    "    print('No mart parquet found. Run the pipeline first or update TABLE_NAME.')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "if mart_path and YEAR_COL and METRIC_COL:\n",
-    "    export_df = con.execute(\n",
-    "        f\"SELECT {YEAR_COL} AS year_like, {METRIC_COL} AS metric_value FROM read_parquet('{mart_path}') ORDER BY 1\"\n",
-    "    ).df()\n",
-    "elif mart_path:\n",
-    "    print('Using generic fallback export: no year-like or metric column detected.')\n",
-    "    export_df = con.execute(f\"SELECT * FROM read_parquet('{mart_path}') LIMIT 1000\").df()\n",
-    "else:\n",
-    "    export_df = None\n",
-    "\n",
-    "if export_df is not None:\n",
-    "    display(export_df.head())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "if EXPORT and export_df is not None:\n",
-    "    OUT_DIR.mkdir(parents=True, exist_ok=True)\n",
-    "    csv_path = OUT_DIR / f'{TABLE_NAME}_dashboard.csv'\n",
-    "    parquet_path = OUT_DIR / f'{TABLE_NAME}_dashboard.parquet'\n",
-    "    export_df.to_csv(csv_path, index=False)\n",
-    "    con.register('export_df_view', export_df)\n",
-    "    con.execute(f\"COPY (SELECT * FROM export_df_view) TO '{parquet_path}' (FORMAT PARQUET)\")\n",
-    "    print(csv_path)\n",
-    "    print(parquet_path)\n",
-    "else:\n",
-    "    print('Export disabled. Set EXPORT = True to write files into ../_tmp/.')"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "name": "python",
-   "version": "3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/notebooks/03_explore_mart.ipynb b/notebooks/03_explore_mart.ipynb
new file mode 100644
index 0000000..df2d7e7
--- /dev/null
+++ b/notebooks/03_explore_mart.ipynb
@@ -0,0 +1,100 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 03 Explore MART\n",
+    "\n",
+    "- apre il primo mart dichiarato in `dataset.yml`\n",
+    "- mostra schema, preview e aggregazioni esplorative\n",
+    "- aiuta a collegare i mart alle domande civiche"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "import duckdb\n",
+    "import yaml\n",
+    "\n",
+    "ROOT = Path('.').resolve()\n",
+    "DATASET_YML = (ROOT / 'dataset.yml').resolve() if (ROOT / 'dataset.yml').exists() else (ROOT / '..' / 'dataset.yml').resolve()\n",
+    "CFG = yaml.safe_load(DATASET_YML.read_text(encoding='utf-8'))\n",
+    "BASE_DIR = DATASET_YML.parent\n",
+    "OUT_ROOT = (BASE_DIR / CFG.get('root', '.')).resolve()\n",
+    "DATASET = CFG['dataset']['name']\n",
+    "YEARS = CFG['dataset']['years']\n",
+    "YEAR_INDEX = 0\n",
+    "YEAR = YEARS[YEAR_INDEX] if YEARS and 0 <= YEAR_INDEX < len(YEARS) else YEARS[0]\n",
+    "TABLES = CFG.get('mart', {}).get('tables', [])\n",
+    "TABLE_INDEX = 0\n",
+    "SELECTED_TABLE = TABLES[TABLE_INDEX] if TABLES and 0 <= TABLE_INDEX < len(TABLES) else (TABLES[0] if TABLES else {'name': 'mart_ok'})\n",
+    "TABLE_NAME = SELECTED_TABLE['name']\n",
+    "MART_PATH = OUT_ROOT / 'data' / 'mart' / DATASET / str(YEAR) / f'{TABLE_NAME}.parquet'\n",
+    "{'YEARS': YEARS, 'YEAR_INDEX': YEAR_INDEX, 'TABLES': [table['name'] for table in TABLES], 'TABLE_INDEX': TABLE_INDEX, 'TABLE_NAME': TABLE_NAME, 'MART_PATH': str(MART_PATH)}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "con = duckdb.connect()\n",
+    "YEAR_COL = None\n",
+    "METRIC_COL = None\n",
+    "\n",
+    "def choose_columns(schema_rows):\n",
+    "    year_col = next((row[0] for row in schema_rows if str(row[0]).lower() == 'year' or 'anno' in str(row[0]).lower()), None)\n",
+    "    numeric_rows = [row for row in schema_rows if any(token in str(row[1]).upper() for token in ['INT', 'DECIMAL', 'DOUBLE', 'FLOAT', 'REAL', 'BIGINT'])]\n",
+    "    metric_col = next((row[0] for row in numeric_rows if any(token in str(row[0]).lower() for token in ['value', 'tot', 'importo', 'ammontare', 'saldo', 'spese', 'entrate', 'pct', 'percent'])), None)\n",
+    "    if metric_col is None and numeric_rows:\n",
+    "        metric_col = numeric_rows[0][0]\n",
+    "    return year_col, metric_col\n",
+    "\n",
+    "if MART_PATH.exists():\n",
+    "    schema_rows = con.execute(f\"DESCRIBE SELECT * FROM read_parquet('{MART_PATH.as_posix()}')\").fetchall()\n",
+    "    schema_df = con.execute(f\"DESCRIBE SELECT * FROM read_parquet('{MART_PATH.as_posix()}')\").df()\n",
+    "    preview_df = con.execute(f\"SELECT * FROM read_parquet('{MART_PATH.as_posix()}') LIMIT 20\").df()\n",
+    "    YEAR_COL, METRIC_COL = choose_columns(schema_rows)\n",
+    "    display(schema_df)\n",
+    "    display(preview_df)\n",
+    "    print({'YEAR_COL': YEAR_COL, 'METRIC_COL': METRIC_COL})\n",
+    "else:\n",
+    "    print('MART parquet not found. Run toolkit run mart --config dataset.yml first.')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if MART_PATH.exists() and YEAR_COL and METRIC_COL:\n",
+    "    by_year = con.execute(\n",
+    "        f\"SELECT {YEAR_COL} AS year_like, COUNT(*) AS rows, SUM({METRIC_COL}) AS metric_total FROM read_parquet('{MART_PATH.as_posix()}') GROUP BY 1 ORDER BY 1\"\n",
+    "    ).df()\n",
+    "    display(by_year)\n",
+    "else:\n",
+    "    print('No year-like column or metric column detected.')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/04_quality_checks.ipynb b/notebooks/04_quality_checks.ipynb
new file mode 100644
index 0000000..f4df43e
--- /dev/null
+++ b/notebooks/04_quality_checks.ipynb
@@ -0,0 +1,106 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 04 Quality Checks\n",
+    "\n",
+    "- esegue controlli ripetibili sul primo mart dichiarato in config\n",
+    "- usa le chiavi di validazione del mart quando disponibili\n",
+    "- aiuta a investigare anomalie residue dopo `toolkit validate all`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "import duckdb\n",
+    "import yaml\n",
+    "\n",
+    "ROOT = Path('.').resolve()\n",
+    "DATASET_YML = (ROOT / 'dataset.yml').resolve() if (ROOT / 'dataset.yml').exists() else (ROOT / '..' / 'dataset.yml').resolve()\n",
+    "CFG = yaml.safe_load(DATASET_YML.read_text(encoding='utf-8'))\n",
+    "BASE_DIR = DATASET_YML.parent\n",
+    "OUT_ROOT = (BASE_DIR / CFG.get('root', '.')).resolve()\n",
+    "DATASET = CFG['dataset']['name']\n",
+    "YEARS = CFG['dataset']['years']\n",
+    "YEAR_INDEX = 0\n",
+    "YEAR = YEARS[YEAR_INDEX] if YEARS and 0 <= YEAR_INDEX < len(YEARS) else YEARS[0]\n",
+    "TABLES = CFG.get('mart', {}).get('tables', [])\n",
+    "TABLE_INDEX = 0\n",
+    "SELECTED_TABLE = TABLES[TABLE_INDEX] if TABLES and 0 <= TABLE_INDEX < len(TABLES) else (TABLES[0] if TABLES else {'name': 'mart_ok'})\n",
+    "TABLE_NAME = SELECTED_TABLE['name']\n",
+    "TABLE_RULES = CFG.get('mart', {}).get('validate', {}).get('table_rules', {}).get(TABLE_NAME, {})\n",
+    "KEY_COLUMNS = TABLE_RULES.get('primary_key', [])\n",
+    "MART_PATH = OUT_ROOT / 'data' / 'mart' / DATASET / str(YEAR) / f'{TABLE_NAME}.parquet'\n",
+    "{'YEARS': YEARS, 'YEAR_INDEX': YEAR_INDEX, 'TABLES': [table['name'] for table in TABLES], 'TABLE_INDEX': TABLE_INDEX, 'TABLE_NAME': TABLE_NAME, 'MART_PATH': str(MART_PATH)}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "con = duckdb.connect()\n",
+    "NUMERIC_COLUMNS = []\n",
+    "\n",
+    "def detect_numeric_columns(path):\n",
+    "    rows = con.execute(f\"DESCRIBE SELECT * FROM read_parquet('{path.as_posix()}')\").fetchall()\n",
+    "    return [row[0] for row in rows if any(token in str(row[1]).upper() for token in ['INT', 'DECIMAL', 'DOUBLE', 'FLOAT', 'REAL', 'BIGINT'])]\n",
+    "\n",
+    "if MART_PATH.exists():\n",
+    "    NUMERIC_COLUMNS = detect_numeric_columns(MART_PATH)[:3]\n",
+    "    print({'KEY_COLUMNS': KEY_COLUMNS, 'NUMERIC_COLUMNS': NUMERIC_COLUMNS})\n",
+    "else:\n",
+    "    print('MART parquet not found.')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if MART_PATH.exists():\n",
+    "    if KEY_COLUMNS:\n",
+    "        keys = ', '.join(KEY_COLUMNS)\n",
+    "        dup_df = con.execute(\n",
+    "            f\"SELECT {keys}, COUNT(*) AS dup_count FROM read_parquet('{MART_PATH.as_posix()}') GROUP BY {keys} HAVING COUNT(*) > 1 ORDER BY dup_count DESC LIMIT 20\"\n",
+    "        ).df()\n",
+    "        display(dup_df)\n",
+    "    else:\n",
+    "        print('Duplicate-key check skipped: no primary key declared for this mart.')\n",
+    "\n",
+    "    columns = [row[0] for row in con.execute(f\"DESCRIBE SELECT * FROM read_parquet('{MART_PATH.as_posix()}')\").fetchall()]\n",
+    "    null_expr = ', '.join([f\"AVG(CASE WHEN {col} IS NULL THEN 1 ELSE 0 END) AS {col}_null_rate\" for col in columns])\n",
+    "    null_df = con.execute(f\"SELECT {null_expr} FROM read_parquet('{MART_PATH.as_posix()}')\").df().T.reset_index()\n",
+    "    display(null_df)\n",
+    "\n",
+    "    if NUMERIC_COLUMNS:\n",
+    "        range_expr = ', '.join([f\"MIN({col}) AS {col}_min, MAX({col}) AS {col}_max\" for col in NUMERIC_COLUMNS])\n",
+    "        range_df = con.execute(f\"SELECT {range_expr} FROM read_parquet('{MART_PATH.as_posix()}')\").df().T.reset_index()\n",
+    "        display(range_df)\n",
+    "else:\n",
+    "    print('No MART output available.')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/05_dashboard_export.ipynb b/notebooks/05_dashboard_export.ipynb
new file mode 100644
index 0000000..9bb1d7d
--- /dev/null
+++ b/notebooks/05_dashboard_export.ipynb
@@ -0,0 +1,122 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 05 Dashboard Export\n",
+    "\n",
+    "- prepara un export leggero partendo dal primo mart dichiarato in config\n",
+    "- non scrive file finche `EXPORT = False`\n",
+    "- usa `_tmp/` per evitare output committati nel repo"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "import duckdb\n",
+    "import yaml\n",
+    "\n",
+    "ROOT = Path('.').resolve()\n",
+    "DATASET_YML = (ROOT / 'dataset.yml').resolve() if (ROOT / 'dataset.yml').exists() else (ROOT / '..' / 'dataset.yml').resolve()\n",
+    "CFG = yaml.safe_load(DATASET_YML.read_text(encoding='utf-8'))\n",
+    "BASE_DIR = DATASET_YML.parent\n",
+    "OUT_ROOT = (BASE_DIR / CFG.get('root', '.')).resolve()\n",
+    "DATASET = CFG['dataset']['name']\n",
+    "YEARS = CFG['dataset']['years']\n",
+    "YEAR_INDEX = 0\n",
+    "YEAR = YEARS[YEAR_INDEX] if YEARS and 0 <= YEAR_INDEX < len(YEARS) else YEARS[0]\n",
+    "TABLES = CFG.get('mart', {}).get('tables', [])\n",
+    "TABLE_INDEX = 0\n",
+    "SELECTED_TABLE = TABLES[TABLE_INDEX] if TABLES and 0 <= TABLE_INDEX < len(TABLES) else (TABLES[0] if TABLES else {'name': 'mart_ok'})\n",
+    "TABLE_NAME = SELECTED_TABLE['name']\n",
+    "MART_PATH = OUT_ROOT / 'data' / 'mart' / DATASET / str(YEAR) / f'{TABLE_NAME}.parquet'\n",
+    "OUT_DIR = (BASE_DIR / '_tmp').resolve()\n",
+    "EXPORT = False\n",
+    "{'YEARS': YEARS, 'YEAR_INDEX': YEAR_INDEX, 'TABLES': [table['name'] for table in TABLES], 'TABLE_INDEX': TABLE_INDEX, 'TABLE_NAME': TABLE_NAME, 'MART_PATH': str(MART_PATH)}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "con = duckdb.connect()\n",
+    "YEAR_COL = None\n",
+    "METRIC_COL = None\n",
+    "export_df = None\n",
+    "\n",
+    "def choose_columns(path):\n",
+    "    rows = con.execute(f\"DESCRIBE SELECT * FROM read_parquet('{path.as_posix()}')\").fetchall()\n",
+    "    year_col = next((row[0] for row in rows if str(row[0]).lower() == 'year' or 'anno' in str(row[0]).lower()), None)\n",
+    "    numeric_rows = [row[0] for row in rows if any(token in str(row[1]).upper() for token in ['INT', 'DECIMAL', 'DOUBLE', 'FLOAT', 'REAL', 'BIGINT'])]\n",
+    "    metric_col = next((col for col in numeric_rows if any(token in col.lower() for token in ['value', 'tot', 'importo', 'ammontare', 'saldo', 'spese', 'entrate', 'pct', 'percent'])), None)\n",
+    "    if metric_col is None and numeric_rows:\n",
+    "        metric_col = numeric_rows[0]\n",
+    "    return year_col, metric_col\n",
+    "\n",
+    "if MART_PATH.exists():\n",
+    "    YEAR_COL, METRIC_COL = choose_columns(MART_PATH)\n",
+    "    print({'YEAR_COL': YEAR_COL, 'METRIC_COL': METRIC_COL})\n",
+    "else:\n",
+    "    print('MART parquet not found.')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if MART_PATH.exists() and YEAR_COL and METRIC_COL:\n",
+    "    export_df = con.execute(\n",
+    "        f\"SELECT {YEAR_COL} AS year_like, {METRIC_COL} AS metric_value FROM read_parquet('{MART_PATH.as_posix()}') ORDER BY 1\"\n",
+    "    ).df()\n",
+    "elif MART_PATH.exists():\n",
+    "    export_df = con.execute(f\"SELECT * FROM read_parquet('{MART_PATH.as_posix()}') LIMIT 1000\").df()\n",
+    "else:\n",
+    "    export_df = None\n",
+    "\n",
+    "if export_df is not None:\n",
+    "    display(export_df.head())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if EXPORT and export_df is not None:\n",
+    "    OUT_DIR.mkdir(parents=True, exist_ok=True)\n",
+    "    csv_path = OUT_DIR / f'{TABLE_NAME}_dashboard.csv'\n",
+    "    parquet_path = OUT_DIR / f'{TABLE_NAME}_dashboard.parquet'\n",
+    "    export_df.to_csv(csv_path, index=False)\n",
+    "    con.register('export_df_view', export_df)\n",
+    "    con.execute(f\"COPY (SELECT * FROM export_df_view) TO '{parquet_path.as_posix()}' (FORMAT PARQUET)\")\n",
+    "    print(csv_path)\n",
+    "    print(parquet_path)\n",
+    "else:\n",
+    "    print('Export disabled. Set EXPORT = True to write files into ../_tmp/.')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/README.md b/notebooks/README.md
index a5d1986..6fd0746 100644
--- a/notebooks/README.md
+++ b/notebooks/README.md
@@ -1,18 +1,23 @@
 # /notebooks - notebook standard per il dataset
 
-Questa cartella contiene notebook leggeri e clonabili per avvio rapido, esplorazione dei mart e controlli di qualita.
+Questa cartella contiene notebook leggeri e clonabili per tutto il lifecycle operativo del dataset.
 Usano solo Python standard, `duckdb`, path relativi e il file `../dataset.yml` come riferimento di progetto.
 
+I notebook non reimplementano il motore della pipeline.
+Assumono che il toolkit produca output leggibili e servono a ispezionare RAW, CLEAN o MART dal punto di vista del dataset.
+
 ## Notebook inclusi
 
-- `00_quickstart.ipynb` - esegue la pipeline e controlla che esistano tabelle mart leggibili
-- `01_explore_mart.ipynb` - esplorazione public-first dei dati finali
-- `02_quality_checks.ipynb` - controlli ripetibili su duplicati, missingness e range
-- `03_dashboard_export.ipynb` - export opzionali in `../_tmp/`, disattivati di default
+- `00_quickstart.ipynb` - setup, command preview, run opzionale e localizzazione output reali del toolkit
+- `01_inspect_raw.ipynb` - ispezione del layer RAW tramite `manifest.json`, file primario e sample di output
+- `02_inspect_clean.ipynb` - ispezione del parquet CLEAN con schema e sanity checks minimi
+- `03_explore_mart.ipynb` - esplorazione del mart selezionato nella config
+- `04_quality_checks.ipynb` - controlli ripetibili su chiavi, missingness e range del mart selezionato
+- `05_dashboard_export.ipynb` - export opzionali derivati dal mart selezionato nella config
 
 ## Regole
 
 - non salvare output pesanti nel repo
 - se serve esportare file, usa `../_tmp/`
-- mantieni i notebook generici: aggiorna nomi tabella e chiavi senza introdurre logica dataset-specifica
+- mantieni i notebook generici: preferisci leggere `dataset.yml` e usa i parametri iniziali per scegliere anno/tabella
 - per dettagli tecnici della pipeline, vedi il repository Toolkit DataCivicLab
diff --git a/scripts/publish_to_drive.py b/scripts/publish_to_drive.py
new file mode 100644
index 0000000..8da2614
--- /dev/null
+++ b/scripts/publish_to_drive.py
@@ -0,0 +1,155 @@
+from __future__ import annotations
+
+import argparse
+import shutil
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+
+import yaml
+
+
+@dataclass(frozen=True)
+class PublishItem:
+    source: Path
+    destination: Path
+
+
+def load_config(config_path: Path) -> dict:
+    return yaml.safe_load(config_path.read_text(encoding="utf-8"))
+
+
+def resolve_output_root(config_path: Path, cfg: dict) -> Path:
+    root_value = cfg.get("root", ".")
+    return (config_path.parent / root_value).resolve()
+
+
+def year_values(cfg: dict, selected_year: int | None) -> list[int]:
+    years = cfg["dataset"]["years"]
+    if selected_year is None:
+        return years
+    if selected_year not in years:
+        raise ValueError(f"Year {selected_year} not found in dataset.yml years: {years}")
+    return [selected_year]
+
+
+def latest_run_record(run_dir: Path) -> Path | None:
+    if not run_dir.exists():
+        return None
+    candidates = sorted(run_dir.glob("*.json"))
+    return candidates[-1] if candidates else None
+
+
+def build_publish_items(
+    *,
+    output_root: Path,
+    dataset: str,
+    years: list[int],
+    drive_root: Path,
+) -> list[PublishItem]:
+    items: list[PublishItem] = []
+
+    for year in years:
+        year_text = str(year)
+        mart_dir = output_root / "data" / "mart" / dataset / year_text
+        clean_dir = output_root / "data" / "clean" / dataset / year_text
+        raw_dir = output_root / "data" / "raw" / dataset / year_text
+        runs_dir = output_root / "data" / "_runs" / dataset / year_text
+
+        for path in sorted(raw_dir.glob("*")):
+            if path.is_file():
+                items.append(PublishItem(path, drive_root / path.relative_to(output_root)))
+
+        for path in sorted(mart_dir.glob("*.parquet")):
+            items.append(PublishItem(path, drive_root / path.relative_to(output_root)))
+
+        clean_parquet = clean_dir / f"{dataset}_{year_text}_clean.parquet"
+        if clean_parquet.exists():
+            items.append(PublishItem(clean_parquet, drive_root / clean_parquet.relative_to(output_root)))
+
+        for layer_dir, validation_rel in (
+            (raw_dir, "raw_validation.json"),
+            (clean_dir, "_validate/clean_validation.json"),
+            (mart_dir, "_validate/mart_validation.json"),
+        ):
+            for name in ("metadata.json", "manifest.json"):
+                path = layer_dir / name
+                if path.exists():
+                    items.append(PublishItem(path, drive_root / path.relative_to(output_root)))
+
+            validation_path = layer_dir / validation_rel
+            if validation_path.exists():
+                items.append(PublishItem(validation_path, drive_root / validation_path.relative_to(output_root)))
+
+        latest_run = latest_run_record(runs_dir)
+        if latest_run is not None:
+            items.append(PublishItem(latest_run, drive_root / latest_run.relative_to(output_root)))
+
+    return items
+
+
+def copy_items(items: list[PublishItem], *, dry_run: bool) -> tuple[int, int]:
+    copied = 0
+    missing = 0
+
+    for item in items:
+        if not item.source.exists():
+            print(f"MISSING {item.source}")
+            missing += 1
+            continue
+
+        print(f"{'DRY-RUN' if dry_run else 'COPY'} {item.source} -> {item.destination}")
+        if not dry_run:
+            item.destination.parent.mkdir(parents=True, exist_ok=True)
+            shutil.copy2(item.source, item.destination)
+        copied += 1
+
+    return copied, missing
+
+
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        description="Publish dataset artifacts to Drive preserving toolkit output paths under root."
+    )
+    parser.add_argument("--config", default="dataset.yml", help="Path to dataset.yml")
+    parser.add_argument("--drive-root", required=True, help="Destination root in Drive")
+    parser.add_argument("--year", type=int, help="Publish a single year")
+    parser.add_argument("--dry-run", action="store_true", help="Show what would be copied without writing")
+    return parser
+
+
+def main() -> int:
+    args = build_parser().parse_args()
+    config_path = Path(args.config).resolve()
+    cfg = load_config(config_path)
+
+    dataset = cfg["dataset"]["name"]
+    years = year_values(cfg, args.year)
+    output_root = resolve_output_root(config_path, cfg)
+    drive_root = Path(args.drive_root).resolve()
+
+    items = build_publish_items(
+        output_root=output_root,
+        dataset=dataset,
+        years=years,
+        drive_root=drive_root,
+    )
+
+    print(
+        {
+            "dataset": dataset,
+            "years": years,
+            "output_root": str(output_root),
+            "drive_root": str(drive_root),
+            "dry_run": args.dry_run,
+            "items": len(items),
+        }
+    )
+
+    copied, missing = copy_items(items, dry_run=args.dry_run)
+    print({"copied": copied, "missing": missing})
+    return 0 if missing == 0 else 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/scripts/smoke.sh b/scripts/smoke.sh
index 4b056e6..ec02853 100644
--- a/scripts/smoke.sh
+++ b/scripts/smoke.sh
@@ -24,12 +24,8 @@ detect_toolkit_module() {
   if [ -z "${PYTHON_BIN:-}" ]; then
     return 1
   fi
-  if "${PYTHON_BIN}" -c "import importlib.util, sys; sys.exit(0 if importlib.util.find_spec('toolkit') else 1)" >/dev/null 2>&1; then
-    echo toolkit
-    return 0
-  fi
-  if "${PYTHON_BIN}" -c "import importlib.util, sys; sys.exit(0 if importlib.util.find_spec('dataciviclab_toolkit') else 1)" >/dev/null 2>&1; then
-    echo dataciviclab_toolkit
+  if "${PYTHON_BIN}" -c "import importlib.util, sys; sys.exit(0 if importlib.util.find_spec('toolkit.cli.app') else 1)" >/dev/null 2>&1; then
+    echo toolkit.cli.app
     return 0
   fi
   return 1
@@ -54,6 +50,21 @@ detect_year() {
   echo 2023
 }
 
+detect_dataset() {
+  if [ -n "${DATASET_NAME:-}" ]; then
+    echo "${DATASET_NAME}"
+    return 0
+  fi
+  if [ -f "${DATASET_FILE}" ]; then
+    parsed_dataset="$(sed -n 's/^[[:space:]]*name:[[:space:]]*"\{0,1\}\([^"]*\)"\{0,1\}[[:space:]]*$/\1/p' "${DATASET_FILE}" | head -n 1)"
+    if [ -n "${parsed_dataset}" ]; then
+      echo "${parsed_dataset}"
+      return 0
+    fi
+  fi
+  echo "dataset_unknown"
+}
+
 run_toolkit() {
   if [ -n "${TOOLKIT_COMMAND:-}" ]; then
     "${TOOLKIT_COMMAND}" "$@"
@@ -63,7 +74,7 @@ run_toolkit() {
     "${PYTHON_BIN}" -m "${TOOLKIT_MODULE}" "$@"
     return 0
   fi
-  echo "Toolkit non disponibile: imposta TOOLKIT_BIN oppure installa un modulo Python 'toolkit' o 'dataciviclab_toolkit'." >&2
+  echo "Toolkit non disponibile: imposta TOOLKIT_BIN oppure installa il modulo Python del toolkit." >&2
   exit 2
 }
 
@@ -78,20 +89,21 @@ else
 fi
 
 if [ -z "${TOOLKIT_COMMAND}" ] && [ -z "${TOOLKIT_MODULE}" ]; then
-  echo "Toolkit non trovato. Provati: comando '${TOOLKIT_BIN}', modulo 'toolkit', modulo 'dataciviclab_toolkit'." >&2
+  echo "Toolkit non trovato. Provati: comando '${TOOLKIT_BIN}', modulo 'toolkit.cli.app'." >&2
   exit 2
 fi
 
 YEAR="$(detect_year "${1:-}")"
+DATASET_NAME="$(detect_dataset)"
 
 echo "DCL_ROOT=${DCL_ROOT}"
 echo "DATASET_FILE=${DATASET_FILE}"
 echo "TOOLKIT_BIN=${TOOLKIT_BIN}"
 echo "TOOLKIT_COMMAND=${TOOLKIT_COMMAND:-<none>}"
 echo "TOOLKIT_MODULE=${TOOLKIT_MODULE:-<none>}"
+echo "DATASET_NAME=${DATASET_NAME}"
 echo "YEAR=${YEAR}"
 
-run_toolkit run raw --config "${DATASET_FILE}" --year "${YEAR}"
-run_toolkit run clean --config "${DATASET_FILE}" --year "${YEAR}"
-run_toolkit run mart --config "${DATASET_FILE}" --year "${YEAR}"
-run_toolkit validate --config "${DATASET_FILE}" --year "${YEAR}"
+run_toolkit run all --config "${DATASET_FILE}"
+run_toolkit validate all --config "${DATASET_FILE}"
+run_toolkit status --dataset "${DATASET_NAME}" --year "${YEAR}" --latest --config "${DATASET_FILE}"
diff --git a/sql/clean.sql b/sql/clean.sql
index 51afa63..22585a2 100644
--- a/sql/clean.sql
+++ b/sql/clean.sql
@@ -1,23 +1,33 @@
--- clean.sql
--- Purpose: placeholder transformation for the CLEAN layer.
--- Contract: read from the source configured in dataset.yml and keep the query portable.
+WITH base AS (
+  SELECT
+    TRY_CAST(TRIM(CAST("ANNO" AS VARCHAR)) AS INTEGER) AS anno,
 
-with source_rows as (
-    select
-        year,
-        entity_id,
-        metric_value
-    from raw_input
-),
-normalized as (
-    select
-        cast(year as integer) as year,
-        cast(entity_id as varchar) as entity_id,
-        cast(metric_value as double) as metric_value
-    from source_rows
+    TRY_CAST(TRIM(CAST("RISPARMIO_PUBBLICO" AS VARCHAR)) AS DOUBLE) AS risparmio_pubblico,
+    TRY_CAST(TRIM(CAST("SALDO_NETTO" AS VARCHAR)) AS DOUBLE) AS saldo_netto,
+    TRY_CAST(TRIM(CAST("INDEBITAMENTO_NETTO" AS VARCHAR)) AS DOUBLE) AS indebitamento_netto,
+    TRY_CAST(TRIM(CAST("RICORSO_MERCATO" AS VARCHAR)) AS DOUBLE) AS ricorso_mercato,
+    TRY_CAST(TRIM(CAST("AVANZO_PRIMARIO" AS VARCHAR)) AS DOUBLE) AS avanzo_primario,
+
+    TRY_CAST(TRIM(CAST("SPESE_CORRENTI" AS VARCHAR)) AS DOUBLE) AS spese_correnti,
+    TRY_CAST(TRIM(CAST("SPESE_INTERESSI" AS VARCHAR)) AS DOUBLE) AS spese_interessi,
+    TRY_CAST(TRIM(CAST("SPESE_CONTO_CAPITALE" AS VARCHAR)) AS DOUBLE) AS spese_conto_capitale,
+    TRY_CAST(TRIM(CAST("SPESE_ACQ_ATT_FINE" AS VARCHAR)) AS DOUBLE) AS spese_acq_att_fin,
+    TRY_CAST(TRIM(CAST("SPESE_RIMBORSO_PRESTITI" AS VARCHAR)) AS DOUBLE) AS spese_rimborso_prestiti,
+    TRY_CAST(TRIM(CAST("SPESE_COMPLESSIVE" AS VARCHAR)) AS DOUBLE) AS spese_complessive,
+    TRY_CAST(TRIM(CAST("SPESE_FINALI" AS VARCHAR)) AS DOUBLE) AS spese_finali,
+    TRY_CAST(TRIM(CAST("SPESE_FIN_NETTO_ATT_FIN" AS VARCHAR)) AS DOUBLE) AS spese_fin_netto_att_fin,
+
+    TRY_CAST(TRIM(CAST("ENTRATE_TRIBUTARIE" AS VARCHAR)) AS DOUBLE) AS entrate_tributarie,
+    TRY_CAST(TRIM(CAST("ENTRATE_EXTRA_TRIBUTARIE" AS VARCHAR)) AS DOUBLE) AS entrate_extra_tributarie,
+    TRY_CAST(TRIM(CAST("ENTR_ALIEN_PATR_RISCOS" AS VARCHAR)) AS DOUBLE) AS entr_alien_patr_riscos,
+    TRY_CAST(TRIM(CAST("RISCOSSIONE_CREDITI" AS VARCHAR)) AS DOUBLE) AS riscossione_crediti,
+    TRY_CAST(TRIM(CAST("ENTR_ACCENSIONE_PRESTITI" AS VARCHAR)) AS DOUBLE) AS entr_accensione_prestiti,
+    TRY_CAST(TRIM(CAST("ENTRATE_FINALI" AS VARCHAR)) AS DOUBLE) AS entrate_finali,
+    TRY_CAST(TRIM(CAST("ENTR_FIN_NETTO_RISCO_CRED" AS VARCHAR)) AS DOUBLE) AS entr_fin_netto_risco_cred,
+    TRY_CAST(TRIM(CAST("ENTRATE_CORRENTI" AS VARCHAR)) AS DOUBLE) AS entrate_correnti
+  FROM raw_input
 )
-select
-    year,
-    entity_id,
-    metric_value
-from normalized;
+
+SELECT *
+FROM base
+WHERE anno IS NOT NULL;
diff --git a/sql/mart/mart_ok.sql b/sql/mart/mart_ok.sql
new file mode 100644
index 0000000..8893846
--- /dev/null
+++ b/sql/mart/mart_ok.sql
@@ -0,0 +1,15 @@
+SELECT
+  anno,
+  saldo_netto,
+  indebitamento_netto,
+  avanzo_primario,
+  entrate_finali,
+  spese_finali,
+  entrate_finali - spese_finali AS differenza_entrate_spese,
+  CASE
+    WHEN spese_finali IS NULL OR spese_finali = 0 THEN NULL
+    ELSE entrate_finali / spese_finali
+  END AS rapporto_entrate_spese
+FROM clean_input
+WHERE anno IS NOT NULL
+ORDER BY anno
diff --git a/sql/mart/project_summary.sql b/sql/mart/project_summary.sql
deleted file mode 100644
index eaa4488..0000000
--- a/sql/mart/project_summary.sql
+++ /dev/null
@@ -1,25 +0,0 @@
--- project_summary.sql
--- Purpose: placeholder MART query aligned with the toolkit `mart.tables[]` contract.
--- Contract: consume `clean_input` and expose a dashboard-ready table.
-
-with clean_rows as (
-    select
-        year,
-        entity_id,
-        metric_value
-    from clean_input
-),
-project_summary as (
-    select
-        year,
-        count(*) as rows_in_year,
-        sum(metric_value) as total_metric_value
-    from clean_rows
-    group by year
-)
-select
-    year,
-    rows_in_year,
-    total_metric_value
-from project_summary
-order by year;
diff --git a/tests/test_contract.py b/tests/test_contract.py
index 06c79bf..e58fc1b 100644
--- a/tests/test_contract.py
+++ b/tests/test_contract.py
@@ -13,7 +13,6 @@
 REQUIRED_FILES = [
     REPO_ROOT / "dataset.yml",
     REPO_ROOT / "sql" / "clean.sql",
-    REPO_ROOT / "sql" / "mart" / "project_summary.sql",
     REPO_ROOT / "docs" / "sources.md",
     REPO_ROOT / "docs" / "decisions.md",
     REPO_ROOT / "docs" / "data_dictionary.md",
@@ -22,6 +21,10 @@
 ]
 
 
+def _load_dataset() -> dict:
+    return yaml.safe_load(DATASET_FILE.read_text(encoding="utf-8"))
+
+
 def _iter_path_values(node: object):
     if isinstance(node, dict):
         for key, value in node.items():
@@ -38,38 +41,52 @@ def test_required_files_exist() -> None:
     assert not missing, f"Missing required template files: {missing}"
 
 
-def test_dataset_uses_supported_contract_keys() -> None:
-    dataset = yaml.safe_load(DATASET_FILE.read_text(encoding="utf-8"))
-    clean_read = dataset["clean"]["read"]
+def test_dataset_declares_minimum_contract() -> None:
+    dataset = _load_dataset()
 
+    assert dataset.get("schema_version") == 1
+    assert "root" in dataset
     assert "dataset" in dataset
     assert "name" in dataset["dataset"]
     assert "years" in dataset["dataset"]
-    assert dataset["validation"]["fail_on_error"] is True
-    assert "source" in clean_read
-    assert "header" in clean_read
-    assert "columns" in clean_read
-    assert "csv" not in clean_read
+    assert isinstance(dataset["dataset"]["years"], list)
+    assert dataset["dataset"]["years"]
+    assert "raw" in dataset
+    assert "sources" in dataset["raw"]
+    assert isinstance(dataset["raw"]["sources"], list)
+    assert dataset["raw"]["sources"]
+    assert dataset["raw"]["sources"][0]["primary"] is True
+    assert "clean" in dataset
+    assert dataset["clean"]["sql"]
+    assert dataset["clean"]["read_mode"] in {"strict", "fallback", "robust"}
+    assert "read" in dataset["clean"]
+    assert isinstance(dataset["clean"]["read"], dict)
+    assert dataset["clean"]["read"]["source"] in {"auto", "config_only"}
+    assert dataset["clean"]["read"]["mode"] in {"explicit", "latest", "largest", "all"}
+    assert "header" in dataset["clean"]["read"]
+    assert "columns" in dataset["clean"]["read"]
     assert dataset["clean"]["required_columns"]
     assert dataset["clean"]["validate"]["primary_key"]
     assert dataset["clean"]["validate"]["not_null"]
+    assert dataset["clean"]["validate"]["min_rows"] == 1
     assert "mart" in dataset
     assert "tables" in dataset["mart"]
     assert isinstance(dataset["mart"]["tables"], list)
     assert dataset["mart"]["tables"]
     assert dataset["mart"]["required_tables"]
-    assert dataset["mart"]["validate"]["table_rules"]["project_summary"]["required_columns"]
+    assert "table_rules" in dataset["mart"]["validate"]
+    assert dataset["validation"]["fail_on_error"] is True
+    assert dataset["output"]["artifacts"] in {"minimal", "standard", "debug"}
 
 
-def test_dataset_matches_smoke_contract_shape() -> None:
-    dataset = yaml.safe_load(DATASET_FILE.read_text(encoding="utf-8"))
+def test_dataset_avoids_legacy_clean_read_shape() -> None:
+    dataset = _load_dataset()
 
-    assert dataset["output"]["artifacts"] == "minimal"
     assert "csv" not in dataset["clean"]["read"]
 
 
 def test_dataset_paths_are_relative_and_posix() -> None:
-    dataset = yaml.safe_load(DATASET_FILE.read_text(encoding="utf-8"))
+    dataset = _load_dataset()
 
     for key, value in _iter_path_values(dataset):
         if value.startswith("http://") or value.startswith("https://"):
@@ -81,21 +98,35 @@ def test_dataset_paths_are_relative_and_posix() -> None:
         assert not re.match(r"^[A-Za-z]:[\\/]", value), f"Absolute Windows path found for key '{key}': {value}"
 
 
-def test_yaml_sql_paths_match_template_files() -> None:
-    dataset = yaml.safe_load(DATASET_FILE.read_text(encoding="utf-8"))
+def test_declared_sql_files_exist() -> None:
+    dataset = _load_dataset()
 
-    assert dataset["clean"]["sql"] == "sql/clean.sql"
+    clean_sql = REPO_ROOT / dataset["clean"]["sql"]
+    assert clean_sql.exists(), f"Missing clean SQL file declared in dataset.yml: {dataset['clean']['sql']}"
 
     mart_tables = dataset["mart"]["tables"]
-    project_summary = next((table for table in mart_tables if table["name"] == "project_summary"), None)
-    assert project_summary is not None, "Missing mart table 'project_summary'"
-    assert project_summary["sql"] == "sql/mart/project_summary.sql"
+    for table in mart_tables:
+        assert "name" in table and table["name"], "Each mart table must declare a non-empty name"
+        assert "sql" in table and table["sql"], f"Mart table '{table['name']}' must declare an SQL path"
+        sql_path = REPO_ROOT / table["sql"]
+        assert sql_path.exists(), f"Missing mart SQL file declared in dataset.yml: {table['sql']}"
+
+
+def test_mart_table_names_are_unique() -> None:
+    dataset = _load_dataset()
+    names = [table["name"] for table in dataset["mart"]["tables"]]
+    assert len(names) == len(set(names)), f"Duplicate mart table names found: {names}"
+
 
+def test_required_tables_and_rules_match_declared_marts() -> None:
+    dataset = _load_dataset()
 
-def test_output_artifacts_is_configured() -> None:
-    dataset = yaml.safe_load(DATASET_FILE.read_text(encoding="utf-8"))
+    names = {table["name"] for table in dataset["mart"]["tables"]}
+    required_tables = set(dataset["mart"]["required_tables"])
+    table_rules = set(dataset["mart"]["validate"]["table_rules"].keys())
 
-    assert dataset["output"]["artifacts"] == "minimal"
+    assert required_tables <= names, "mart.required_tables must reference declared mart.tables"
+    assert table_rules <= names, "mart.validate.table_rules must reference declared mart.tables"
 
 
 def test_data_directory_does_not_contain_committed_outputs() -> None:

From fcc553c781d7ddd47252a2acc5516bcec252b8e3 Mon Sep 17 00:00:00 2001
From: Zio Gabber <78922322+Gabrymi93@users.noreply.github.com>
Date: Mon, 2 Mar 2026 10:04:35 +0000
Subject: [PATCH 03/11] docs: link stable toolkit notebook and feature docs

---
 README.md                           | 10 +++++++++
 WORKFLOW.md                         |  7 +++++-
 dashboard/README.md                 |  1 +
 docs/contributing.md                | 21 ++++++++++++------
 notebooks/00_quickstart.ipynb       | 34 +++++++++++++++++++++--------
 notebooks/01_inspect_raw.ipynb      | 16 +++++++++-----
 notebooks/02_inspect_clean.ipynb    | 13 +++++++----
 notebooks/03_explore_mart.ipynb     | 13 +++++++----
 notebooks/04_quality_checks.ipynb   | 13 +++++++----
 notebooks/05_dashboard_export.ipynb | 12 +++++++---
 notebooks/README.md                 |  6 +++--
 11 files changed, 106 insertions(+), 40 deletions(-)

diff --git a/README.md b/README.md
index f3edb3b..35e155c 100644
--- a/README.md
+++ b/README.md
@@ -126,6 +126,14 @@ Se lavori con un checkout locale del toolkit, installalo in editable e poi esegu
 Per dettagli tecnici su CLI, configurazione supportata, validazioni runtime e run metadata,
 vedi il repository **Toolkit DataCivicLab**.
 
+I notebook del template usano `toolkit inspect paths --config dataset.yml --year <year> --json` per localizzare gli output reali della pipeline.
+Il workflow principale del template resta centrato su `run all`, `validate all`, `status` e notebook locali; i flow avanzati del toolkit restano documentati nel repo toolkit.
+Per i contratti stabili del toolkit, vedi in particolare:
+
+* `docs/notebook-contract.md`
+* `docs/feature-stability.md`
+* `docs/advanced-workflows.md`
+
 
 ## Archivio Pubblico
 
@@ -135,6 +143,8 @@ Se il progetto pubblica artifact in un archivio pubblico DataCivicLab su Drive,
 2. verificare gli output sotto `root/data/...`
 3. pubblicare solo gli artifact pubblici con uno script separato
 
+Il publish su Drive e una operazione `maintainer-only`, da eseguire in fase di release o merge, non nel workflow base dei contributor.
+
 Esempio:
 
 ```bash
diff --git a/WORKFLOW.md b/WORKFLOW.md
index 2695a4b..466261b 100644
--- a/WORKFLOW.md
+++ b/WORKFLOW.md
@@ -34,4 +34,9 @@ Come contribuire in modo semplice a un progetto dataset DataCivicLab.
 2. esegui `toolkit run all --config dataset.yml`
 3. esegui `toolkit validate all --config dataset.yml`
 4. usa i notebook per ispezionare RAW, CLEAN, MART e QA
-5. se il progetto ha un archivio pubblico, pubblica gli artifact con `py scripts/publish_to_drive.py`
+
+## Maintainers
+
+1. revisiona PR e stato del dataset
+2. verifica `status` e output finali
+3. se il progetto ha un archivio pubblico, pubblica gli artifact con `py scripts/publish_to_drive.py`
diff --git a/dashboard/README.md b/dashboard/README.md
index b4277ed..1f64be5 100644
--- a/dashboard/README.md
+++ b/dashboard/README.md
@@ -15,3 +15,4 @@ Qui vanno link, note di lettura, screenshot e limiti dell'output, non i dati.
 
 Ogni dashboard dovrebbe essere collegata ai mart documentati e aggiornati del progetto.
 Se il progetto usa un archivio pubblico su Drive, documenta qui quali file pubblicati alimentano la dashboard.
+La pubblicazione su Drive resta una operazione `maintainer-only`.
diff --git a/docs/contributing.md b/docs/contributing.md
index 2d54479..904c937 100644
--- a/docs/contributing.md
+++ b/docs/contributing.md
@@ -47,6 +47,7 @@ toolkit status --dataset <dataset> --year <year> --latest --config dataset.yml
 ## Publish su Drive
 
 Se il progetto usa un archivio pubblico su Drive, la pubblicazione va fatta dopo `run all` e `validate all`, non durante il run.
+Questo passaggio e `maintainer-only`: non e richiesto ai contributor per lavorare su SQL, docs, test o notebook.
 
 Dry-run:
 
@@ -67,13 +68,17 @@ La destinazione su Drive mantiene gli stessi path relativi sotto `root`, quindi
 
 ```sh
 toolkit run all --config dataset.yml
-toolkit run raw --config dataset.yml
-toolkit run clean --config dataset.yml
-toolkit run mart --config dataset.yml
 toolkit validate all --config dataset.yml
 toolkit status --dataset <dataset> --year <year> --latest --config dataset.yml
+toolkit inspect paths --config dataset.yml --year <year> --json
 ```
 
+Per workflow avanzati come `run raw|clean|mart`, `resume`, `profile raw` o `gen-sql`, vedi la documentazione advanced del toolkit.
+Per il contratto stabile dei notebook e la matrice di stabilita delle feature, vedi anche:
+
+- `docs/notebook-contract.md`
+- `docs/feature-stability.md`
+
 ## Fasi operative
 
 - kickoff e contratto: `dataset.yml`, `README.md`, `tests/test_contract.py`
@@ -88,13 +93,15 @@ toolkit status --dataset <dataset> --year <year> --latest --config dataset.yml
 | Fase | File principali | Comando minimo | Notebook |
 |---|---|---|---|
 | Kickoff | `dataset.yml`, `README.md` | `py -m pytest tests/test_contract.py` | `00_quickstart.ipynb` |
-| Sources/RAW | `dataset.yml`, `docs/sources.md`, `docs/decisions.md` | `toolkit run raw --config dataset.yml` | `01_inspect_raw.ipynb` |
-| CLEAN | `sql/clean.sql`, `dataset.yml`, `docs/data_dictionary.md` | `toolkit run clean --config dataset.yml` | `02_inspect_clean.ipynb` |
-| MART | `sql/mart/*.sql`, `dataset.yml` | `toolkit run mart --config dataset.yml` | `03_explore_mart.ipynb` |
+| Sources/RAW | `dataset.yml`, `docs/sources.md`, `docs/decisions.md` | `toolkit inspect paths --config dataset.yml --year <year> --json` | `01_inspect_raw.ipynb` |
+| CLEAN | `sql/clean.sql`, `dataset.yml`, `docs/data_dictionary.md` | `toolkit inspect paths --config dataset.yml --year <year> --json` | `02_inspect_clean.ipynb` |
+| MART | `sql/mart/*.sql`, `dataset.yml` | `toolkit inspect paths --config dataset.yml --year <year> --json` | `03_explore_mart.ipynb` |
 | QA | `tests/test_contract.py`, `.github/workflows/ci.yml` | `toolkit validate all --config dataset.yml` | `04_quality_checks.ipynb` |
-| Output pubblico | `dashboard/`, `README.md`, `scripts/publish_to_drive.py` | `py scripts/publish_to_drive.py --config dataset.yml --drive-root "<drive>" --dry-run` | `05_dashboard_export.ipynb` |
+| Output pubblico | `dashboard/`, `README.md`, `scripts/publish_to_drive.py` | `maintainer-only: py scripts/publish_to_drive.py --config dataset.yml --drive-root "<drive>" --dry-run` | `05_dashboard_export.ipynb` |
 | Release | `README.md`, `docs/overview.md`, `docs/data_dictionary.md` | `toolkit status --dataset <dataset> --year <year> --latest --config dataset.yml` | `00_quickstart.ipynb` |
 
+I notebook usano `toolkit inspect paths --config dataset.yml --year <year> --json` come contratto stabile per localizzare gli output.
+
 ## Regole veloci
 
 - non committare output sotto `data/`, salvo sample piccoli in `data/_examples`
diff --git a/notebooks/00_quickstart.ipynb b/notebooks/00_quickstart.ipynb
index 0f1f20f..1592f33 100644
--- a/notebooks/00_quickstart.ipynb
+++ b/notebooks/00_quickstart.ipynb
@@ -18,6 +18,7 @@
    "outputs": [],
    "source": [
     "from pathlib import Path\n",
+    "import json\n",
     "import shutil\n",
     "import subprocess\n",
     "import yaml\n",
@@ -26,7 +27,6 @@
     "DATASET_YML = (ROOT / 'dataset.yml').resolve() if (ROOT / 'dataset.yml').exists() else (ROOT / '..' / 'dataset.yml').resolve()\n",
     "CFG = yaml.safe_load(DATASET_YML.read_text(encoding='utf-8'))\n",
     "BASE_DIR = DATASET_YML.parent\n",
-    "OUT_ROOT = (BASE_DIR / CFG.get('root', '.')).resolve()\n",
     "DATASET = CFG['dataset']['name']\n",
     "YEARS = CFG['dataset']['years']\n",
     "YEAR_INDEX = 0\n",
@@ -36,16 +36,32 @@
     "CLI_PREFIX = ['toolkit'] if shutil.which('toolkit') else ['py', '-m', 'toolkit.cli.app']\n",
     "RUN_CMD = CLI_PREFIX + ['run', 'all', '--config', str(DATASET_YML)]\n",
     "VALIDATE_CMD = CLI_PREFIX + ['validate', 'all', '--config', str(DATASET_YML)]\n",
+    "INSPECT_CMD = CLI_PREFIX + ['inspect', 'paths', '--config', str(DATASET_YML), '--year', str(YEAR), '--json']\n",
+    "try:\n",
+    "    INSPECT = json.loads(subprocess.run(INSPECT_CMD, capture_output=True, text=True, check=True).stdout)\n",
+    "except Exception:\n",
+    "    OUT_ROOT = (BASE_DIR / CFG.get('root', '.')).resolve()\n",
+    "    INSPECT = {\n",
+    "        'root': str(OUT_ROOT),\n",
+    "        'paths': {\n",
+    "            'raw': {'dir': str(OUT_ROOT / 'data' / 'raw' / DATASET / str(YEAR))},\n",
+    "            'clean': {'dir': str(OUT_ROOT / 'data' / 'clean' / DATASET / str(YEAR))},\n",
+    "            'mart': {'dir': str(OUT_ROOT / 'data' / 'mart' / DATASET / str(YEAR))},\n",
+    "            'run_dir': str(OUT_ROOT / 'data' / '_runs' / DATASET / str(YEAR)),\n",
+    "        },\n",
+    "        'latest_run': None,\n",
+    "    }\n",
     "\n",
     "{\n",
     "    'DATASET_YML': str(DATASET_YML),\n",
-    "    'OUT_ROOT': str(OUT_ROOT),\n",
+    "    'ROOT': INSPECT.get('root'),\n",
     "    'DATASET': DATASET,\n",
     "    'YEARS': YEARS,\n",
     "    'YEAR_INDEX': YEAR_INDEX,\n",
     "    'YEAR': YEAR,\n",
     "    'MART_TABLES': MART_TABLES,\n",
     "    'CLI_PREFIX': CLI_PREFIX,\n",
+    "    'INSPECT_CMD': INSPECT_CMD,\n",
     "}"
    ]
   },
@@ -55,13 +71,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "expected_paths = {\n",
-    "    'raw_dir': str(OUT_ROOT / 'data' / 'raw' / DATASET / str(YEAR)),\n",
-    "    'clean_dir': str(OUT_ROOT / 'data' / 'clean' / DATASET / str(YEAR)),\n",
-    "    'mart_dir': str(OUT_ROOT / 'data' / 'mart' / DATASET / str(YEAR)),\n",
-    "    'run_dir': str(OUT_ROOT / 'data' / '_runs' / DATASET / str(YEAR)),\n",
-    "}\n",
-    "expected_paths"
+    "{\n",
+    "    'raw_dir': INSPECT['paths']['raw']['dir'],\n",
+    "    'clean_dir': INSPECT['paths']['clean']['dir'],\n",
+    "    'mart_dir': INSPECT['paths']['mart']['dir'],\n",
+    "    'run_dir': INSPECT['paths']['run_dir'],\n",
+    "    'latest_run': INSPECT.get('latest_run'),\n",
+    "}"
    ]
   },
   {
diff --git a/notebooks/01_inspect_raw.ipynb b/notebooks/01_inspect_raw.ipynb
index d17e873..a3577fd 100644
--- a/notebooks/01_inspect_raw.ipynb
+++ b/notebooks/01_inspect_raw.ipynb
@@ -19,28 +19,32 @@
    "source": [
     "from pathlib import Path\n",
     "import json\n",
+    "import shutil\n",
+    "import subprocess\n",
     "import duckdb\n",
     "import yaml\n",
     "\n",
     "ROOT = Path('.').resolve()\n",
     "DATASET_YML = (ROOT / 'dataset.yml').resolve() if (ROOT / 'dataset.yml').exists() else (ROOT / '..' / 'dataset.yml').resolve()\n",
     "CFG = yaml.safe_load(DATASET_YML.read_text(encoding='utf-8'))\n",
-    "BASE_DIR = DATASET_YML.parent\n",
-    "OUT_ROOT = (BASE_DIR / CFG.get('root', '.')).resolve()\n",
     "DATASET = CFG['dataset']['name']\n",
     "YEARS = CFG['dataset']['years']\n",
     "YEAR_INDEX = 0\n",
     "YEAR = YEARS[YEAR_INDEX] if YEARS and 0 <= YEAR_INDEX < len(YEARS) else YEARS[0]\n",
-    "RAW_DIR = OUT_ROOT / 'data' / 'raw' / DATASET / str(YEAR)\n",
-    "MANIFEST_PATH = RAW_DIR / 'manifest.json'\n",
-    "METADATA_PATH = RAW_DIR / 'metadata.json'\n",
-    "VALIDATION_PATH = RAW_DIR / 'raw_validation.json'\n",
+    "CLI_PREFIX = ['toolkit'] if shutil.which('toolkit') else ['py', '-m', 'toolkit.cli.app']\n",
+    "INSPECT_CMD = CLI_PREFIX + ['inspect', 'paths', '--config', str(DATASET_YML), '--year', str(YEAR), '--json']\n",
+    "INSPECT = json.loads(subprocess.run(INSPECT_CMD, capture_output=True, text=True, check=True).stdout)\n",
+    "RAW_DIR = Path(INSPECT['paths']['raw']['dir'])\n",
+    "MANIFEST_PATH = Path(INSPECT['paths']['raw']['manifest'])\n",
+    "METADATA_PATH = Path(INSPECT['paths']['raw']['metadata'])\n",
+    "VALIDATION_PATH = Path(INSPECT['paths']['raw']['validation'])\n",
     "PROFILE_DIR = RAW_DIR / '_profile'\n",
     "\n",
     "{\n",
     "    'YEARS': YEARS,\n",
     "    'YEAR_INDEX': YEAR_INDEX,\n",
     "    'RAW_DIR': str(RAW_DIR),\n",
+    "    'INSPECT_CMD': INSPECT_CMD,\n",
     "    'MANIFEST_EXISTS': MANIFEST_PATH.exists(),\n",
     "    'METADATA_EXISTS': METADATA_PATH.exists(),\n",
     "    'VALIDATION_EXISTS': VALIDATION_PATH.exists(),\n",
diff --git a/notebooks/02_inspect_clean.ipynb b/notebooks/02_inspect_clean.ipynb
index fb6d826..029fef3 100644
--- a/notebooks/02_inspect_clean.ipynb
+++ b/notebooks/02_inspect_clean.ipynb
@@ -18,20 +18,24 @@
    "outputs": [],
    "source": [
     "from pathlib import Path\n",
+    "import json\n",
+    "import shutil\n",
+    "import subprocess\n",
     "import duckdb\n",
     "import yaml\n",
     "\n",
     "ROOT = Path('.').resolve()\n",
     "DATASET_YML = (ROOT / 'dataset.yml').resolve() if (ROOT / 'dataset.yml').exists() else (ROOT / '..' / 'dataset.yml').resolve()\n",
     "CFG = yaml.safe_load(DATASET_YML.read_text(encoding='utf-8'))\n",
-    "BASE_DIR = DATASET_YML.parent\n",
-    "OUT_ROOT = (BASE_DIR / CFG.get('root', '.')).resolve()\n",
     "DATASET = CFG['dataset']['name']\n",
     "YEARS = CFG['dataset']['years']\n",
     "YEAR_INDEX = 0\n",
     "YEAR = YEARS[YEAR_INDEX] if YEARS and 0 <= YEAR_INDEX < len(YEARS) else YEARS[0]\n",
-    "CLEAN_DIR = OUT_ROOT / 'data' / 'clean' / DATASET / str(YEAR)\n",
-    "CLEAN_PATH = CLEAN_DIR / f'{DATASET}_{YEAR}_clean.parquet'\n",
+    "CLI_PREFIX = ['toolkit'] if shutil.which('toolkit') else ['py', '-m', 'toolkit.cli.app']\n",
+    "INSPECT_CMD = CLI_PREFIX + ['inspect', 'paths', '--config', str(DATASET_YML), '--year', str(YEAR), '--json']\n",
+    "INSPECT = json.loads(subprocess.run(INSPECT_CMD, capture_output=True, text=True, check=True).stdout)\n",
+    "CLEAN_DIR = Path(INSPECT['paths']['clean']['dir'])\n",
+    "CLEAN_PATH = Path(INSPECT['paths']['clean']['output'])\n",
     "REQUIRED_COLUMNS = CFG.get('clean', {}).get('required_columns', [])\n",
     "PRIMARY_KEY = CFG.get('clean', {}).get('validate', {}).get('primary_key', [])\n",
     "\n",
@@ -39,6 +43,7 @@
     "    'YEARS': YEARS,\n",
     "    'YEAR_INDEX': YEAR_INDEX,\n",
     "    'CLEAN_PATH': str(CLEAN_PATH),\n",
+    "    'INSPECT_CMD': INSPECT_CMD,\n",
     "    'REQUIRED_COLUMNS': REQUIRED_COLUMNS,\n",
     "    'PRIMARY_KEY': PRIMARY_KEY,\n",
     "}"
diff --git a/notebooks/03_explore_mart.ipynb b/notebooks/03_explore_mart.ipynb
index df2d7e7..38f63f5 100644
--- a/notebooks/03_explore_mart.ipynb
+++ b/notebooks/03_explore_mart.ipynb
@@ -18,14 +18,15 @@
    "outputs": [],
    "source": [
     "from pathlib import Path\n",
+    "import json\n",
+    "import shutil\n",
+    "import subprocess\n",
     "import duckdb\n",
     "import yaml\n",
     "\n",
     "ROOT = Path('.').resolve()\n",
     "DATASET_YML = (ROOT / 'dataset.yml').resolve() if (ROOT / 'dataset.yml').exists() else (ROOT / '..' / 'dataset.yml').resolve()\n",
     "CFG = yaml.safe_load(DATASET_YML.read_text(encoding='utf-8'))\n",
-    "BASE_DIR = DATASET_YML.parent\n",
-    "OUT_ROOT = (BASE_DIR / CFG.get('root', '.')).resolve()\n",
     "DATASET = CFG['dataset']['name']\n",
     "YEARS = CFG['dataset']['years']\n",
     "YEAR_INDEX = 0\n",
@@ -34,8 +35,12 @@
     "TABLE_INDEX = 0\n",
     "SELECTED_TABLE = TABLES[TABLE_INDEX] if TABLES and 0 <= TABLE_INDEX < len(TABLES) else (TABLES[0] if TABLES else {'name': 'mart_ok'})\n",
     "TABLE_NAME = SELECTED_TABLE['name']\n",
-    "MART_PATH = OUT_ROOT / 'data' / 'mart' / DATASET / str(YEAR) / f'{TABLE_NAME}.parquet'\n",
-    "{'YEARS': YEARS, 'YEAR_INDEX': YEAR_INDEX, 'TABLES': [table['name'] for table in TABLES], 'TABLE_INDEX': TABLE_INDEX, 'TABLE_NAME': TABLE_NAME, 'MART_PATH': str(MART_PATH)}"
+    "CLI_PREFIX = ['toolkit'] if shutil.which('toolkit') else ['py', '-m', 'toolkit.cli.app']\n",
+    "INSPECT_CMD = CLI_PREFIX + ['inspect', 'paths', '--config', str(DATASET_YML), '--year', str(YEAR), '--json']\n",
+    "INSPECT = json.loads(subprocess.run(INSPECT_CMD, capture_output=True, text=True, check=True).stdout)\n",
+    "MART_OUTPUTS = INSPECT['paths']['mart']['outputs']\n",
+    "MART_PATH = Path(MART_OUTPUTS[TABLE_INDEX]) if MART_OUTPUTS and 0 <= TABLE_INDEX < len(MART_OUTPUTS) else (Path(MART_OUTPUTS[0]) if MART_OUTPUTS else Path(INSPECT['paths']['mart']['dir']) / f'{TABLE_NAME}.parquet')\n",
+    "{'YEARS': YEARS, 'YEAR_INDEX': YEAR_INDEX, 'TABLES': [table['name'] for table in TABLES], 'TABLE_INDEX': TABLE_INDEX, 'TABLE_NAME': TABLE_NAME, 'MART_PATH': str(MART_PATH), 'INSPECT_CMD': INSPECT_CMD}"
    ]
   },
   {
diff --git a/notebooks/04_quality_checks.ipynb b/notebooks/04_quality_checks.ipynb
index f4df43e..f5aca77 100644
--- a/notebooks/04_quality_checks.ipynb
+++ b/notebooks/04_quality_checks.ipynb
@@ -18,14 +18,15 @@
    "outputs": [],
    "source": [
     "from pathlib import Path\n",
+    "import json\n",
+    "import shutil\n",
+    "import subprocess\n",
     "import duckdb\n",
     "import yaml\n",
     "\n",
     "ROOT = Path('.').resolve()\n",
     "DATASET_YML = (ROOT / 'dataset.yml').resolve() if (ROOT / 'dataset.yml').exists() else (ROOT / '..' / 'dataset.yml').resolve()\n",
     "CFG = yaml.safe_load(DATASET_YML.read_text(encoding='utf-8'))\n",
-    "BASE_DIR = DATASET_YML.parent\n",
-    "OUT_ROOT = (BASE_DIR / CFG.get('root', '.')).resolve()\n",
     "DATASET = CFG['dataset']['name']\n",
     "YEARS = CFG['dataset']['years']\n",
     "YEAR_INDEX = 0\n",
@@ -36,8 +37,12 @@
     "TABLE_NAME = SELECTED_TABLE['name']\n",
     "TABLE_RULES = CFG.get('mart', {}).get('validate', {}).get('table_rules', {}).get(TABLE_NAME, {})\n",
     "KEY_COLUMNS = TABLE_RULES.get('primary_key', [])\n",
-    "MART_PATH = OUT_ROOT / 'data' / 'mart' / DATASET / str(YEAR) / f'{TABLE_NAME}.parquet'\n",
-    "{'YEARS': YEARS, 'YEAR_INDEX': YEAR_INDEX, 'TABLES': [table['name'] for table in TABLES], 'TABLE_INDEX': TABLE_INDEX, 'TABLE_NAME': TABLE_NAME, 'MART_PATH': str(MART_PATH)}"
+    "CLI_PREFIX = ['toolkit'] if shutil.which('toolkit') else ['py', '-m', 'toolkit.cli.app']\n",
+    "INSPECT_CMD = CLI_PREFIX + ['inspect', 'paths', '--config', str(DATASET_YML), '--year', str(YEAR), '--json']\n",
+    "INSPECT = json.loads(subprocess.run(INSPECT_CMD, capture_output=True, text=True, check=True).stdout)\n",
+    "MART_OUTPUTS = INSPECT['paths']['mart']['outputs']\n",
+    "MART_PATH = Path(MART_OUTPUTS[TABLE_INDEX]) if MART_OUTPUTS and 0 <= TABLE_INDEX < len(MART_OUTPUTS) else (Path(MART_OUTPUTS[0]) if MART_OUTPUTS else Path(INSPECT['paths']['mart']['dir']) / f'{TABLE_NAME}.parquet')\n",
+    "{'YEARS': YEARS, 'YEAR_INDEX': YEAR_INDEX, 'TABLES': [table['name'] for table in TABLES], 'TABLE_INDEX': TABLE_INDEX, 'TABLE_NAME': TABLE_NAME, 'MART_PATH': str(MART_PATH), 'INSPECT_CMD': INSPECT_CMD}"
    ]
   },
   {
diff --git a/notebooks/05_dashboard_export.ipynb b/notebooks/05_dashboard_export.ipynb
index 9bb1d7d..e14c218 100644
--- a/notebooks/05_dashboard_export.ipynb
+++ b/notebooks/05_dashboard_export.ipynb
@@ -18,6 +18,9 @@
    "outputs": [],
    "source": [
     "from pathlib import Path\n",
+    "import json\n",
+    "import shutil\n",
+    "import subprocess\n",
     "import duckdb\n",
     "import yaml\n",
     "\n",
@@ -25,7 +28,6 @@
     "DATASET_YML = (ROOT / 'dataset.yml').resolve() if (ROOT / 'dataset.yml').exists() else (ROOT / '..' / 'dataset.yml').resolve()\n",
     "CFG = yaml.safe_load(DATASET_YML.read_text(encoding='utf-8'))\n",
     "BASE_DIR = DATASET_YML.parent\n",
-    "OUT_ROOT = (BASE_DIR / CFG.get('root', '.')).resolve()\n",
     "DATASET = CFG['dataset']['name']\n",
     "YEARS = CFG['dataset']['years']\n",
     "YEAR_INDEX = 0\n",
@@ -34,10 +36,14 @@
     "TABLE_INDEX = 0\n",
     "SELECTED_TABLE = TABLES[TABLE_INDEX] if TABLES and 0 <= TABLE_INDEX < len(TABLES) else (TABLES[0] if TABLES else {'name': 'mart_ok'})\n",
     "TABLE_NAME = SELECTED_TABLE['name']\n",
-    "MART_PATH = OUT_ROOT / 'data' / 'mart' / DATASET / str(YEAR) / f'{TABLE_NAME}.parquet'\n",
+    "CLI_PREFIX = ['toolkit'] if shutil.which('toolkit') else ['py', '-m', 'toolkit.cli.app']\n",
+    "INSPECT_CMD = CLI_PREFIX + ['inspect', 'paths', '--config', str(DATASET_YML), '--year', str(YEAR), '--json']\n",
+    "INSPECT = json.loads(subprocess.run(INSPECT_CMD, capture_output=True, text=True, check=True).stdout)\n",
+    "MART_OUTPUTS = INSPECT['paths']['mart']['outputs']\n",
+    "MART_PATH = Path(MART_OUTPUTS[TABLE_INDEX]) if MART_OUTPUTS and 0 <= TABLE_INDEX < len(MART_OUTPUTS) else (Path(MART_OUTPUTS[0]) if MART_OUTPUTS else Path(INSPECT['paths']['mart']['dir']) / f'{TABLE_NAME}.parquet')\n",
     "OUT_DIR = (BASE_DIR / '_tmp').resolve()\n",
     "EXPORT = False\n",
-    "{'YEARS': YEARS, 'YEAR_INDEX': YEAR_INDEX, 'TABLES': [table['name'] for table in TABLES], 'TABLE_INDEX': TABLE_INDEX, 'TABLE_NAME': TABLE_NAME, 'MART_PATH': str(MART_PATH)}"
+    "{'YEARS': YEARS, 'YEAR_INDEX': YEAR_INDEX, 'TABLES': [table['name'] for table in TABLES], 'TABLE_INDEX': TABLE_INDEX, 'TABLE_NAME': TABLE_NAME, 'MART_PATH': str(MART_PATH), 'INSPECT_CMD': INSPECT_CMD}"
    ]
   },
   {
diff --git a/notebooks/README.md b/notebooks/README.md
index 6fd0746..f929106 100644
--- a/notebooks/README.md
+++ b/notebooks/README.md
@@ -1,10 +1,12 @@
 # /notebooks - notebook standard per il dataset
 
 Questa cartella contiene notebook leggeri e clonabili per tutto il lifecycle operativo del dataset.
-Usano solo Python standard, `duckdb`, path relativi e il file `../dataset.yml` come riferimento di progetto.
+Usano Python standard, `duckdb` e il contratto stabile `toolkit inspect paths --json` per scoprire gli output reali del progetto.
 
 I notebook non reimplementano il motore della pipeline.
-Assumono che il toolkit produca output leggibili e servono a ispezionare RAW, CLEAN o MART dal punto di vista del dataset.
+Usano `toolkit inspect paths --json` come fonte primaria per localizzare RAW, CLEAN, MART e run record, e servono a ispezionare gli output dal punto di vista del dataset.
+Il comando puo essere disponibile come `toolkit ...` oppure come fallback `py -m toolkit.cli.app ...`.
+Per i dettagli stabili lato toolkit, vedi `docs/notebook-contract.md` e `docs/feature-stability.md` nel repo toolkit.
 
 ## Notebook inclusi
 

From c4b0cf81fcb3bdecf799e7970e09d2ee071e1c11 Mon Sep 17 00:00:00 2001
From: Zio Gabber <78922322+Gabrymi93@users.noreply.github.com>
Date: Mon, 2 Mar 2026 14:02:16 +0000
Subject: [PATCH 04/11] docs: clarify layer workflow with inspect paths
 discovery

---
 .github/seed-issues/02_sources.md |  1 +
 .github/seed-issues/03_raw.md     |  2 ++
 .github/seed-issues/04_clean.md   |  2 ++
 .github/seed-issues/05_mart.md    |  2 ++
 .github/seed-issues/08_release.md |  2 +-
 README.md                         |  3 +++
 WORKFLOW.md                       |  2 +-
 docs/README.md                    |  2 +-
 docs/contributing.md              | 12 +++++++++---
 docs/lab_links.md                 | 17 +++++++++--------
 10 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/.github/seed-issues/02_sources.md b/.github/seed-issues/02_sources.md
index ce12dc8..725c0e0 100644
--- a/.github/seed-issues/02_sources.md
+++ b/.github/seed-issues/02_sources.md
@@ -33,6 +33,7 @@ Fonte verificata e configurata in `dataset.yml`, con documentazione sufficiente
 
 - notebook consigliato: `notebooks/01_inspect_raw.ipynb`
 - comando minimo: `toolkit run raw --config dataset.yml`
+- dopo il run usa: `toolkit inspect paths --config dataset.yml --year <year> --json`
 
 ## File da toccare
 
diff --git a/.github/seed-issues/03_raw.md b/.github/seed-issues/03_raw.md
index 52adf76..7f62545 100644
--- a/.github/seed-issues/03_raw.md
+++ b/.github/seed-issues/03_raw.md
@@ -19,6 +19,7 @@ Ottenere un layer RAW eseguibile e ripetibile, senza committare output in repo.
 
 - [ ] Verificare `raw.sources[]`, `primary` ed eventuale extractor in `dataset.yml`
 - [ ] Eseguire `toolkit run raw --config dataset.yml`
+- [ ] Usare `toolkit inspect paths --config dataset.yml --year <year> --json` per localizzare gli artifact RAW
 - [ ] Controllare `manifest.json`, `metadata.json` e `raw_validation.json`
 - [ ] Controllare metadata, manifest e validation report del RAW
 - [ ] Confermare che `data/` non contenga output committati
@@ -32,6 +33,7 @@ RAW eseguibile con report minimi di validazione e metadata disponibili negli art
 
 - notebook consigliato: `notebooks/01_inspect_raw.ipynb`
 - path attesi: `root/data/raw/<dataset>/<year>/`
+- comando di discovery: `toolkit inspect paths --config dataset.yml --year <year> --json`
 
 ## File da toccare
 
diff --git a/.github/seed-issues/04_clean.md b/.github/seed-issues/04_clean.md
index 6fb4487..fc40d99 100644
--- a/.github/seed-issues/04_clean.md
+++ b/.github/seed-issues/04_clean.md
@@ -23,6 +23,7 @@ Portare il dataset da RAW a CLEAN con SQL esplicita, schema documentato e valida
 - [ ] Verificare chiavi logiche, `not_null`, `min_rows` e duplicati
 - [ ] Eseguire `toolkit run clean --config dataset.yml`
 - [ ] Eseguire `toolkit validate clean --config dataset.yml`
+- [ ] Usare `toolkit inspect paths --config dataset.yml --year <year> --json` per localizzare il layer CLEAN
 - [ ] Aggiornare `docs/data_dictionary.md` per il layer CLEAN
 - [ ] Loggare assunzioni e mapping in `docs/decisions.md`
 
@@ -34,6 +35,7 @@ Layer CLEAN riproducibile, con schema e regole di validazione sufficienti per al
 
 - notebook consigliato: `notebooks/02_inspect_clean.ipynb`
 - path attesi: `root/data/clean/<dataset>/<year>/`
+- comando di discovery: `toolkit inspect paths --config dataset.yml --year <year> --json`
 
 ## File da toccare
 
diff --git a/.github/seed-issues/05_mart.md b/.github/seed-issues/05_mart.md
index 9bac54a..ff39aea 100644
--- a/.github/seed-issues/05_mart.md
+++ b/.github/seed-issues/05_mart.md
@@ -22,6 +22,7 @@ Produrre uno o piu mart orientati a KPI e output finali, con tabella/e e validat
 - [ ] Allineare `mart.tables` in `dataset.yml` e aggiungere eventuali regole di validazione supportate dal toolkit
 - [ ] Eseguire `toolkit run mart --config dataset.yml --year <year>`
 - [ ] Eseguire `toolkit validate --config dataset.yml --year <year>`
+- [ ] Usare `toolkit inspect paths --config dataset.yml --year <year> --json` per localizzare i mart
 - [ ] Verificare required columns, chiavi, `not_null`, `min_rows` e KPI sanity
 - [ ] Aggiornare `docs/data_dictionary.md` con granularita, KPI e semantica dei mart
 
@@ -33,6 +34,7 @@ Mart pronti per dashboard o report, con SQL separata per tabella e regole di val
 
 - notebook consigliato: `notebooks/03_explore_mart.ipynb`
 - comando minimo: `toolkit run mart --config dataset.yml`
+- comando di discovery: `toolkit inspect paths --config dataset.yml --year <year> --json`
 
 ## File da toccare
 
diff --git a/.github/seed-issues/08_release.md b/.github/seed-issues/08_release.md
index 2b8b517..2fcf1af 100644
--- a/.github/seed-issues/08_release.md
+++ b/.github/seed-issues/08_release.md
@@ -19,7 +19,7 @@ Portare il progetto a una release riproducibile, spiegabile e pronta per handoff
 ## Checklist
 
 - [ ] Aggiornare `README.md` con scopo, metodo, output e limiti
-- [ ] Verificare `docs/lab_links.md` per release policy, DoD e riferimenti Lab-wide
+- [ ] Verificare `docs/lab_links.md` per hub DataCivicLab, policy comuni e riferimenti al toolkit
 - [ ] Confermare che `output.artifacts` resti su `minimal` o motivare eccezioni
 - [ ] Collegare eventuale dashboard o report ai mart corretti
 - [ ] Verificare che documentazione e artifact minimi siano coerenti
diff --git a/README.md b/README.md
index 35e155c..b3188bb 100644
--- a/README.md
+++ b/README.md
@@ -134,6 +134,8 @@ Per i contratti stabili del toolkit, vedi in particolare:
 * `docs/feature-stability.md`
 * `docs/advanced-workflows.md`
 
+Per il contesto dell'ecosistema DataCivicLab, la mappa delle repo e le policy condivise, usa invece i riferimenti in `docs/lab_links.md`.
+
 
 ## Archivio Pubblico
 
@@ -172,3 +174,4 @@ Esempio:
 
 Parte del progetto DataCivicLab.
 Costruiamo infrastruttura open per analisi pubbliche riproducibili.
+Per capire come si colloca questa repo nell'organizzazione, parti da `docs/lab_links.md`.
diff --git a/WORKFLOW.md b/WORKFLOW.md
index 466261b..fcd06b2 100644
--- a/WORKFLOW.md
+++ b/WORKFLOW.md
@@ -11,7 +11,7 @@ Come contribuire in modo semplice a un progetto dataset DataCivicLab.
 ## Dove andare
 
 - setup e contributo rapido: [docs/contributing.md](docs/contributing.md)
-- standard Lab, DoD e release policy: [docs/lab_links.md](docs/lab_links.md)
+- contesto DataCivicLab, policy comuni e motore tecnico: [docs/lab_links.md](docs/lab_links.md)
 - indice docs locali: [docs/README.md](docs/README.md)
 
 ## Confine tecnico
diff --git a/docs/README.md b/docs/README.md
index abcaf67..4024356 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,7 +1,7 @@
 # Docs
 
 Questa cartella contiene i documenti locali, specifici di questo dataset.
-Per standard del Lab vedi [lab_links.md](lab_links.md).
+Per standard del Lab e riferimenti organizzativi vedi [lab_links.md](lab_links.md).
 
 Il motore della pipeline non vive qui: questa documentazione descrive il dataset e il suo contratto verso il toolkit.
 Le decisioni operative sul run reale devono restare coerenti con la CLI e con lo schema config del toolkit.
diff --git a/docs/contributing.md b/docs/contributing.md
index 904c937..d524af7 100644
--- a/docs/contributing.md
+++ b/docs/contributing.md
@@ -79,6 +79,12 @@ Per il contratto stabile dei notebook e la matrice di stabilita delle feature, v
 - `docs/notebook-contract.md`
 - `docs/feature-stability.md`
 
+Quando lavori per layer invece che con `run all`, usa questa regola semplice:
+
+- `toolkit run raw|clean|mart ...` produce gli artifact
+- `toolkit inspect paths --config dataset.yml --year <year> --json` ti dice dove leggerli
+- notebook e controlli manuali devono leggere i path restituiti, non ricostruirli a mano
+
 ## Fasi operative
 
 - kickoff e contratto: `dataset.yml`, `README.md`, `tests/test_contract.py`
@@ -93,9 +99,9 @@ Per il contratto stabile dei notebook e la matrice di stabilita delle feature, v
 | Fase | File principali | Comando minimo | Notebook |
 |---|---|---|---|
 | Kickoff | `dataset.yml`, `README.md` | `py -m pytest tests/test_contract.py` | `00_quickstart.ipynb` |
-| Sources/RAW | `dataset.yml`, `docs/sources.md`, `docs/decisions.md` | `toolkit inspect paths --config dataset.yml --year <year> --json` | `01_inspect_raw.ipynb` |
-| CLEAN | `sql/clean.sql`, `dataset.yml`, `docs/data_dictionary.md` | `toolkit inspect paths --config dataset.yml --year <year> --json` | `02_inspect_clean.ipynb` |
-| MART | `sql/mart/*.sql`, `dataset.yml` | `toolkit inspect paths --config dataset.yml --year <year> --json` | `03_explore_mart.ipynb` |
+| Sources/RAW | `dataset.yml`, `docs/sources.md`, `docs/decisions.md` | `toolkit run raw --config dataset.yml`, poi `toolkit inspect paths --config dataset.yml --year <year> --json` | `01_inspect_raw.ipynb` |
+| CLEAN | `sql/clean.sql`, `dataset.yml`, `docs/data_dictionary.md` | `toolkit run clean --config dataset.yml`, poi `toolkit inspect paths --config dataset.yml --year <year> --json` | `02_inspect_clean.ipynb` |
+| MART | `sql/mart/*.sql`, `dataset.yml` | `toolkit run mart --config dataset.yml`, poi `toolkit inspect paths --config dataset.yml --year <year> --json` | `03_explore_mart.ipynb` |
 | QA | `tests/test_contract.py`, `.github/workflows/ci.yml` | `toolkit validate all --config dataset.yml` | `04_quality_checks.ipynb` |
 | Output pubblico | `dashboard/`, `README.md`, `scripts/publish_to_drive.py` | `maintainer-only: py scripts/publish_to_drive.py --config dataset.yml --drive-root "<drive>" --dry-run` | `05_dashboard_export.ipynb` |
 | Release | `README.md`, `docs/overview.md`, `docs/data_dictionary.md` | `toolkit status --dataset <dataset> --year <year> --latest --config dataset.yml` | `00_quickstart.ipynb` |
diff --git a/docs/lab_links.md b/docs/lab_links.md
index 5c75687..6268911 100644
--- a/docs/lab_links.md
+++ b/docs/lab_links.md
@@ -1,15 +1,16 @@
 # Lab Links
 
 Gli standard del Lab sono centralizzati e non vengono duplicati in questo template.
-Usa questa pagina come ponte verso handbook e repository org-wide.
+Usa questa pagina come ponte verso i repository organizzativi corretti.
 
-## Handbook
+## Hub del Lab
 
-- [Handbook: Method](TODO: link repo dataciviclab/handbook)
-- [Handbook: Definition of Done](TODO: link repo dataciviclab/handbook)
-- [Handbook: Release policy](TODO: link repo dataciviclab/handbook)
-- [Handbook: Roles](TODO: link repo dataciviclab/handbook)
+- [dataciviclab](https://github.com/dataciviclab/dataciviclab): hub pubblico del Lab, mappa delle repo, catalogo dataset, governance alta e canali community
 
-## Org-wide
+## Policy organizzative
 
-- [dataciviclab/.github: Issue and PR templates](TODO: link repo dataciviclab/.github)
+- [.github](https://github.com/dataciviclab/.github): policy condivise, issue template, PR template e community health files
+
+## Motore tecnico
+
+- [toolkit](https://github.com/dataciviclab/toolkit): workflow tecnico canonico, CLI, contratti stabili e documentazione del motore

From 336f08ba2682c8e2c03128084e94a353743aaa94 Mon Sep 17 00:00:00 2001
From: Zio Gabber <78922322+Gabrymi93@users.noreply.github.com>
Date: Mon, 2 Mar 2026 15:35:51 +0000
Subject: [PATCH 05/11] docs: finalize template framing for cloned dataset
 repos

---
 README.md            | 163 ++++++++++++++++++-------------------------
 WORKFLOW.md          |   6 +-
 docs/README.md       |   3 +
 docs/contributing.md |  22 ++++--
 docs/lab_links.md    |   5 ++
 5 files changed, 97 insertions(+), 102 deletions(-)

diff --git a/README.md b/README.md
index b3188bb..3788222 100644
--- a/README.md
+++ b/README.md
@@ -1,143 +1,138 @@
-# 📊 [Nome dataset] — DataCivicLab
+# [Nome dataset] - DataCivicLab
 
 Questo progetto analizza **[fenomeno pubblico]** per rispondere a una domanda semplice:
 **cosa sta succedendo, dove e con quali differenze nel tempo?**
 
-È pensato per chi vuole orientarsi in fretta:
+E pensato per chi vuole orientarsi in fretta:
 capire cosa mostrano i dati, dove sono solidi, quali limiti hanno e quali domande aiutano ad approfondire.
 
-* **Stato:** [alpha | beta | stable]
-* **Copertura:** [anni], [territorio]
-* **Unità di analisi:** [Comune / ASL / Provincia / …]
+- **Stato:** [alpha | beta | stable]
+- **Copertura:** [anni], [territorio]
+- **Unita di analisi:** [Comune / ASL / Provincia / ...]
 
 ## 🎯 La domanda civica
 
 **[Scrivi qui la domanda chiave in una frase chiara.]**
 
-Esempio:
+Esempi:
 
-* Come varia [fenomeno] tra territori?
-* Dove si osservano miglioramenti o peggioramenti?
-* Il mio territorio è sopra o sotto la media?
+- Come varia [fenomeno] tra territori?
+- Dove si osservano miglioramenti o peggioramenti?
+- Il mio territorio e sopra o sotto la media?
 
 ## 🔎 Cosa puoi capire con questi dati
 
-* come cambia il fenomeno nel tempo
-* quali territori mostrano differenze significative
-* se il tuo territorio è sopra o sotto la media
-* se emergono anomalie o salti improvvisi
-* quali aree meritano un approfondimento mirato
+- come cambia il fenomeno nel tempo
+- quali territori mostrano differenze significative
+- se il tuo territorio e sopra o sotto la media
+- se emergono anomalie o salti improvvisi
+- quali aree meritano un approfondimento mirato
 
-Non è solo un dataset: è una base per confronto e monitoraggio.
+Non e solo un dataset: e una base per confronto e monitoraggio.
 
 ## 📦 Output disponibili
 
 Le tabelle finali sono pronte per dashboard, grafici e analisi.
 
-* `mart.[tabella_1]` — confronti territoriali o temporali
-* `mart.[tabella_2]` — indicatori sintetici, ranking o riepiloghi
+- `mart.[tabella_1]` - confronti territoriali o temporali
+- `mart.[tabella_2]` - indicatori sintetici, ranking o riepiloghi
 
 Definizioni dettagliate di colonne e metriche:
-👉 `docs/data_dictionary.md`
-
+`docs/data_dictionary.md`
 
-## ✅ Perché fidarsi
+## ✅ Perche fidarsi
 
 La fiducia si costruisce su trasparenza e metodo.
 
-* fonti ufficiali o verificabili (`docs/sources.md`)
-* trasformazioni documentate (`docs/decisions.md`)
-* controlli automatici prima della pubblicazione
-* standard condivisi del DataCivicLab
+- fonti ufficiali o verificabili (`docs/sources.md`)
+- trasformazioni documentate (`docs/decisions.md`)
+- controlli automatici prima della pubblicazione
+- standard condivisi del DataCivicLab
 
 Ogni scelta che cambia il significato dei dati viene esplicitata.
 
-
 ## 💬 Partecipa
 
 Questo repository distingue chiaramente:
 
-* **Discussions** → domande civiche, interpretazioni, proposte di metriche
-* **Issues** → bug, problemi tecnici, miglioramenti della pipeline
-
-Se non sei tecnico, parti da una **Discussion**:
-spiega il contesto, il territorio o l’anno che ti interessa e cosa vuoi capire.
+- **Discussions** -> domande civiche, interpretazioni, proposte di metriche
+- **Issues** -> bug, problemi tecnici, miglioramenti della pipeline
 
+Se non sei tecnico, parti da una **Discussion** in questa repo:
+spiega il contesto, il territorio o l'anno che ti interessa e cosa vuoi capire.
 
 ## 📚 Documentazione del dataset
 
-* `docs/overview.md` — contesto, copertura, limiti
-* `docs/sources.md` — fonti ufficiali
-* `docs/data_dictionary.md` — colonne e metriche
-* `docs/decisions.md` — scelte progettuali
-* `docs/contributing.md` — come contribuire
-
-
-## Confine con il toolkit
+- `docs/overview.md` - contesto, copertura, limiti
+- `docs/sources.md` - fonti ufficiali
+- `docs/data_dictionary.md` - colonne e metriche
+- `docs/decisions.md` - scelte progettuali
+- `docs/contributing.md` - come contribuire
 
-Questo repository contiene il contratto del dataset:
+## 🧩 Cos'e questa repo
 
-* configurazione in `dataset.yml`
-* trasformazioni SQL in `sql/`
-* test di contratto e documentazione locale
-* notebook leggeri per ispezione degli output
+Questo repository e il template operativo da cui nascono i repo dataset DataCivicLab.
 
-Il motore della pipeline vive nel repository **Toolkit DataCivicLab**.
-Questa repo non replica la logica di esecuzione del toolkit: definisce input, regole e output attesi per questo dataset.
-
-In pratica:
-
-* bug o feature della CLI, runner, validazioni runtime e metadata di run → repo `toolkit`
-* bug o modifiche a fonti, mapping, SQL, mart, docs e notebook di dataset → questa repo
+Qui trovi il minimo necessario per far partire un progetto concreto:
 
+- `dataset.yml` come contratto del dataset
+- `sql/` per CLEAN e MART
+- `docs/` per documentazione locale del dataset
+- `tests/` per i contract tests minimi
+- `notebooks/` per leggere gli output reali della pipeline
 
-## 🧭 Roadmap
+## 🛠️ Confine con il toolkit
 
-La roadmap è gestita con **issue + milestone**.
+Il motore della pipeline vive nel repository `toolkit`.
+Questa repo non replica la logica di esecuzione del motore: definisce input, regole e output attesi per questo dataset.
 
+In pratica:
 
-## 🔁 Clonabilità
-
-Questo repository è un modello per progetti dataset DataCivicLab.
+- bug o feature di CLI, runner, validazioni runtime e run metadata -> repo `toolkit`
+- bug o modifiche a fonti, mapping, SQL, mart, docs e notebook di dataset -> questa repo
 
-`dataset.yml` in root è un esempio eseguibile completo, utile per smoke e onboarding.
-Chi clona questo template deve adattarlo al dataset reale, non copiarlo come contratto finale immutabile.
+## 🔁 Da dove partire
 
-Per adattarlo a un nuovo dataset:
+Se stai clonando il template per un nuovo progetto:
 
 1. aggiorna la domanda civica e gli esempi di insight
-2. sostituisci fonti, copertura e unità di analisi
+2. sostituisci fonti, copertura e unita di analisi
 3. definisci metriche e tabelle finali
 4. documenta le decisioni specifiche del dataset
+5. esegui `py -m pytest tests/test_contract.py`
 
-La struttura resta invariata.
-
+La struttura resta invariata. Non serve capire tutto subito: qui trovi la base pratica da cui partire.
 
-## 🧪 Esecuzione tecnica (per contributor)
+## 🧪 Esecuzione tecnica
 
 ```bash
 pip install dataciviclab-toolkit
 toolkit run all --config dataset.yml
 toolkit validate all --config dataset.yml
+toolkit status --dataset <dataset> --year <year> --latest --config dataset.yml
 ```
 
-Se lavori con un checkout locale del toolkit, installalo in editable e poi esegui i comandi da questa repo.
+I notebook del template usano anche:
 
-Per dettagli tecnici su CLI, configurazione supportata, validazioni runtime e run metadata,
-vedi il repository **Toolkit DataCivicLab**.
+```bash
+toolkit inspect paths --config dataset.yml --year <year> --json
+```
+
+Per dettagli piu profondi su CLI, contratti stabili, workflow advanced e feature stability, il posto giusto e `toolkit`.
 
-I notebook del template usano `toolkit inspect paths --config dataset.yml --year <year> --json` per localizzare gli output reali della pipeline.
-Il workflow principale del template resta centrato su `run all`, `validate all`, `status` e notebook locali; i flow avanzati del toolkit restano documentati nel repo toolkit.
-Per i contratti stabili del toolkit, vedi in particolare:
+## 🧭 Dove andare per il resto
 
-* `docs/notebook-contract.md`
-* `docs/feature-stability.md`
-* `docs/advanced-workflows.md`
+Questa repo resta focalizzata sul progetto dataset.
 
-Per il contesto dell'ecosistema DataCivicLab, la mappa delle repo e le policy condivise, usa invece i riferimenti in `docs/lab_links.md`.
+Per il resto:
 
+- contesto del Lab, mappa delle repo e catalogo dataset: `dataciviclab`
+- policy comuni, onboarding GitHub, issue/PR template e community health: `.github`
+- motore tecnico della pipeline e documentazione del runtime: `toolkit`
 
-## Archivio Pubblico
+I riferimenti rapidi sono raccolti in `docs/lab_links.md`.
+
+## 🌍 Archivio pubblico
 
 Se il progetto pubblica artifact in un archivio pubblico DataCivicLab su Drive, il flusso consigliato e:
 
@@ -145,7 +140,7 @@ Se il progetto pubblica artifact in un archivio pubblico DataCivicLab su Drive,
 2. verificare gli output sotto `root/data/...`
 3. pubblicare solo gli artifact pubblici con uno script separato
 
-Il publish su Drive e una operazione `maintainer-only`, da eseguire in fase di release o merge, non nel workflow base dei contributor.
+Questo passaggio e `maintainer-only`.
 
 Esempio:
 
@@ -154,24 +149,4 @@ py scripts/publish_to_drive.py --config dataset.yml --drive-root "G:\\DataCivicL
 py scripts/publish_to_drive.py --config dataset.yml --drive-root "G:\\DataCivicLab" --year 2022
 ```
 
-Per default lo script pubblica:
-
-* payload RAW
-* metadata, manifest e validation di `raw`, `clean`, `mart`
-* parquet CLEAN
-* parquet MART
-* ultimo run record
-
 La destinazione su Drive mantiene lo stesso path relativo degli output del toolkit sotto `root`.
-
-Esempio:
-
-* locale: `root/data/mart/<dataset>/<year>/mart_ok.parquet`
-* Drive: `<drive-root>/data/mart/<dataset>/<year>/mart_ok.parquet`
-
-
-## 🌍 DataCivicLab
-
-Parte del progetto DataCivicLab.
-Costruiamo infrastruttura open per analisi pubbliche riproducibili.
-Per capire come si colloca questa repo nell'organizzazione, parti da `docs/lab_links.md`.
diff --git a/WORKFLOW.md b/WORKFLOW.md
index fcd06b2..6091db9 100644
--- a/WORKFLOW.md
+++ b/WORKFLOW.md
@@ -4,8 +4,8 @@ Come contribuire in modo semplice a un progetto dataset DataCivicLab.
 
 ## Percorsi
 
-- feedback o idee: apri una Discussion o una Issue
-- avanzamento: usa gli issues e la Board
+- feedback o idee: usa le Discussions della repo se vuoi lasciare una traccia ragionata
+- avanzamento operativo: usa issue, project board o milestone della repo
 - insight o visual: parti da `sql/` o `dashboard/` se il progetto li prevede
 
 ## Dove andare
@@ -28,6 +28,8 @@ Come contribuire in modo semplice a un progetto dataset DataCivicLab.
 3. lavora su branch dedicato
 4. apri una PR piccola e leggibile
 
+GitHub resta il posto dove deve restare la traccia utile.
+
 ## Flusso tecnico minimo
 
 1. valida la config con `py -m pytest tests/test_contract.py`
diff --git a/docs/README.md b/docs/README.md
index 4024356..af43896 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -6,6 +6,9 @@ Per standard del Lab e riferimenti organizzativi vedi [lab_links.md](lab_links.m
 Il motore della pipeline non vive qui: questa documentazione descrive il dataset e il suo contratto verso il toolkit.
 Le decisioni operative sul run reale devono restare coerenti con la CLI e con lo schema config del toolkit.
 
+Se ti serve contesto generale su DataCivicLab, parti da `dataciviclab`.
+Se ti servono policy comuni o istruzioni GitHub valide per tutta l'organizzazione, parti da `.github`.
+
 ## Essenziali
 
 - [overview.md](overview.md)
diff --git a/docs/contributing.md b/docs/contributing.md
index d524af7..7fdf8dd 100644
--- a/docs/contributing.md
+++ b/docs/contributing.md
@@ -1,6 +1,9 @@
 # Contributing
 
-Guida rapida per contribuire ai dati senza dover leggere tutta la documentazione tecnica del progetto.
+Guida rapida per contribuire a un repo dataset senza dover capire tutto l'ecosistema in un colpo solo.
+
+Le policy comuni dell'organizzazione non vengono duplicate qui: per quelle, il posto giusto e `.github`.
+Questo documento resta pratico e locale al repo dataset.
 
 ## Setup minimo
 
@@ -11,7 +14,7 @@ Prerequisiti:
 - lavorare sempre dalla root del progetto
 
 Questa repo contiene configurazione dataset, SQL, documentazione e test di contratto.
-Il motore della pipeline sta nel repo toolkit.
+Il motore della pipeline sta nel repo `toolkit`.
 
 ## Contract tests
 
@@ -32,11 +35,11 @@ Per uno smoke test end-to-end:
 sh scripts/smoke.sh
 ```
 
-Se il toolkit non è nel `PATH`, usa il fallback documentato nello script.
-Se lo smoke fallisce per un problema del motore, apri il bug nel repo toolkit.
+Se il toolkit non e nel `PATH`, usa il fallback documentato nello script.
+Se lo smoke fallisce per un problema del motore, apri il bug nel repo `toolkit`.
 Se fallisce per config, SQL o assunzioni sul dato, correggi questa repo.
 
-Su Windows, se `sh` non è disponibile nel `PATH`, usa una shell POSIX come Git Bash oppure esegui i comandi toolkit equivalenti:
+Su Windows, se `sh` non e disponibile nel `PATH`, usa una shell POSIX come Git Bash oppure esegui i comandi toolkit equivalenti:
 
 ```powershell
 toolkit run all --config dataset.yml
@@ -44,6 +47,13 @@ toolkit validate all --config dataset.yml
 toolkit status --dataset <dataset> --year <year> --latest --config dataset.yml
 ```
 
+## Dove scrivere cosa
+
+- Discussions della repo: domande, interpretazioni, proposte e contesto
+- Issues della repo: bug, task e blocchi operativi
+- Project board o milestone della repo, se presenti: avanzamento e priorita
+- Discord o altri canali veloci del team: utili per scambio rapido, non come fonte canonica
+
 ## Publish su Drive
 
 Se il progetto usa un archivio pubblico su Drive, la pubblicazione va fatta dopo `run all` e `validate all`, non durante il run.
@@ -126,4 +136,4 @@ I notebook usano `toolkit inspect paths --config dataset.yml --year <year> --jso
 
 - workflow umano: [../WORKFLOW.md](../WORKFLOW.md)
 - docs locali: [README.md](README.md)
-- standard Lab: [lab_links.md](lab_links.md)
+- contesto DataCivicLab, policy comuni e motore: [lab_links.md](lab_links.md)
diff --git a/docs/lab_links.md b/docs/lab_links.md
index 6268911..ed48ca9 100644
--- a/docs/lab_links.md
+++ b/docs/lab_links.md
@@ -14,3 +14,8 @@ Usa questa pagina come ponte verso i repository organizzativi corretti.
 ## Motore tecnico
 
 - [toolkit](https://github.com/dataciviclab/toolkit): workflow tecnico canonico, CLI, contratti stabili e documentazione del motore
+
+## Canali pubblici
+
+- usa i canali della singola repo dataset per lasciare traccia utile del lavoro
+- se ti serve contesto org-wide, parti da `dataciviclab` e da `.github`

From d48b58d84cc7797a3e4b78476a96f0ba64be17bb Mon Sep 17 00:00:00 2001
From: Zio Gabber <78922322+Gabrymi93@users.noreply.github.com>
Date: Mon, 2 Mar 2026 23:00:45 +0000
Subject: [PATCH 06/11] Align template notebooks with toolkit path contract

---
 docs/contributing.md                |  8 +++++++-
 notebooks/00_quickstart.ipynb       | 18 ++++--------------
 notebooks/03_explore_mart.ipynb     |  6 +++---
 notebooks/04_quality_checks.ipynb   |  6 +++---
 notebooks/05_dashboard_export.ipynb |  8 ++++----
 notebooks/README.md                 |  7 +++++++
 scripts/smoke.sh                    |  1 +
 7 files changed, 29 insertions(+), 25 deletions(-)

diff --git a/docs/contributing.md b/docs/contributing.md
index 7fdf8dd..80c85a8 100644
--- a/docs/contributing.md
+++ b/docs/contributing.md
@@ -83,7 +83,7 @@ toolkit status --dataset <dataset> --year <year> --latest --config dataset.yml
 toolkit inspect paths --config dataset.yml --year <year> --json
 ```
 
-Per workflow avanzati come `run raw|clean|mart`, `resume`, `profile raw` o `gen-sql`, vedi la documentazione advanced del toolkit.
+Per workflow avanzati come `run raw|clean|mart`, `resume` o `profile raw`, vedi la documentazione advanced del toolkit.
 Per il contratto stabile dei notebook e la matrice di stabilita delle feature, vedi anche:
 
 - `docs/notebook-contract.md`
@@ -118,6 +118,12 @@ Quando lavori per layer invece che con `run all`, usa questa regola semplice:
 
 I notebook usano `toolkit inspect paths --config dataset.yml --year <year> --json` come contratto stabile per localizzare gli output.
 
+Ruoli minimi da tenere distinti nei notebook:
+
+- `metadata.json` = payload ricco del layer
+- `manifest.json` = summary stabile del layer con puntatori a metadata e validation
+- `data/_runs/.../<run_id>.json` = stato del run usato da `status` e `resume`
+
 ## Regole veloci
 
 - non committare output sotto `data/`, salvo sample piccoli in `data/_examples`
diff --git a/notebooks/00_quickstart.ipynb b/notebooks/00_quickstart.ipynb
index 1592f33..20d11ab 100644
--- a/notebooks/00_quickstart.ipynb
+++ b/notebooks/00_quickstart.ipynb
@@ -37,20 +37,10 @@
     "RUN_CMD = CLI_PREFIX + ['run', 'all', '--config', str(DATASET_YML)]\n",
     "VALIDATE_CMD = CLI_PREFIX + ['validate', 'all', '--config', str(DATASET_YML)]\n",
     "INSPECT_CMD = CLI_PREFIX + ['inspect', 'paths', '--config', str(DATASET_YML), '--year', str(YEAR), '--json']\n",
-    "try:\n",
-    "    INSPECT = json.loads(subprocess.run(INSPECT_CMD, capture_output=True, text=True, check=True).stdout)\n",
-    "except Exception:\n",
-    "    OUT_ROOT = (BASE_DIR / CFG.get('root', '.')).resolve()\n",
-    "    INSPECT = {\n",
-    "        'root': str(OUT_ROOT),\n",
-    "        'paths': {\n",
-    "            'raw': {'dir': str(OUT_ROOT / 'data' / 'raw' / DATASET / str(YEAR))},\n",
-    "            'clean': {'dir': str(OUT_ROOT / 'data' / 'clean' / DATASET / str(YEAR))},\n",
-    "            'mart': {'dir': str(OUT_ROOT / 'data' / 'mart' / DATASET / str(YEAR))},\n",
-    "            'run_dir': str(OUT_ROOT / 'data' / '_runs' / DATASET / str(YEAR)),\n",
-    "        },\n",
-    "        'latest_run': None,\n",
-    "    }\n",
+    "INSPECT_RESULT = subprocess.run(INSPECT_CMD, capture_output=True, text=True)\n",
+    "if INSPECT_RESULT.returncode != 0:\n",
+    "    raise RuntimeError(INSPECT_RESULT.stderr.strip() or INSPECT_RESULT.stdout.strip() or 'toolkit inspect paths failed')\n",
+    "INSPECT = json.loads(INSPECT_RESULT.stdout)\n",
     "\n",
     "{\n",
     "    'DATASET_YML': str(DATASET_YML),\n",
diff --git a/notebooks/03_explore_mart.ipynb b/notebooks/03_explore_mart.ipynb
index 38f63f5..0b698d1 100644
--- a/notebooks/03_explore_mart.ipynb
+++ b/notebooks/03_explore_mart.ipynb
@@ -39,7 +39,7 @@
     "INSPECT_CMD = CLI_PREFIX + ['inspect', 'paths', '--config', str(DATASET_YML), '--year', str(YEAR), '--json']\n",
     "INSPECT = json.loads(subprocess.run(INSPECT_CMD, capture_output=True, text=True, check=True).stdout)\n",
     "MART_OUTPUTS = INSPECT['paths']['mart']['outputs']\n",
-    "MART_PATH = Path(MART_OUTPUTS[TABLE_INDEX]) if MART_OUTPUTS and 0 <= TABLE_INDEX < len(MART_OUTPUTS) else (Path(MART_OUTPUTS[0]) if MART_OUTPUTS else Path(INSPECT['paths']['mart']['dir']) / f'{TABLE_NAME}.parquet')\n",
+    "MART_PATH = Path(MART_OUTPUTS[TABLE_INDEX]) if 0 <= TABLE_INDEX < len(MART_OUTPUTS) else None\n",
     "{'YEARS': YEARS, 'YEAR_INDEX': YEAR_INDEX, 'TABLES': [table['name'] for table in TABLES], 'TABLE_INDEX': TABLE_INDEX, 'TABLE_NAME': TABLE_NAME, 'MART_PATH': str(MART_PATH), 'INSPECT_CMD': INSPECT_CMD}"
    ]
   },
@@ -61,7 +61,7 @@
     "        metric_col = numeric_rows[0][0]\n",
     "    return year_col, metric_col\n",
     "\n",
-    "if MART_PATH.exists():\n",
+    "if MART_PATH and MART_PATH.exists():\n",
     "    schema_rows = con.execute(f\"DESCRIBE SELECT * FROM read_parquet('{MART_PATH.as_posix()}')\").fetchall()\n",
     "    schema_df = con.execute(f\"DESCRIBE SELECT * FROM read_parquet('{MART_PATH.as_posix()}')\").df()\n",
     "    preview_df = con.execute(f\"SELECT * FROM read_parquet('{MART_PATH.as_posix()}') LIMIT 20\").df()\n",
@@ -79,7 +79,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "if MART_PATH.exists() and YEAR_COL and METRIC_COL:\n",
+    "if MART_PATH and MART_PATH.exists() and YEAR_COL and METRIC_COL:\n",
     "    by_year = con.execute(\n",
     "        f\"SELECT {YEAR_COL} AS year_like, COUNT(*) AS rows, SUM({METRIC_COL}) AS metric_total FROM read_parquet('{MART_PATH.as_posix()}') GROUP BY 1 ORDER BY 1\"\n",
     "    ).df()\n",
diff --git a/notebooks/04_quality_checks.ipynb b/notebooks/04_quality_checks.ipynb
index f5aca77..6cb3c89 100644
--- a/notebooks/04_quality_checks.ipynb
+++ b/notebooks/04_quality_checks.ipynb
@@ -41,7 +41,7 @@
     "INSPECT_CMD = CLI_PREFIX + ['inspect', 'paths', '--config', str(DATASET_YML), '--year', str(YEAR), '--json']\n",
     "INSPECT = json.loads(subprocess.run(INSPECT_CMD, capture_output=True, text=True, check=True).stdout)\n",
     "MART_OUTPUTS = INSPECT['paths']['mart']['outputs']\n",
-    "MART_PATH = Path(MART_OUTPUTS[TABLE_INDEX]) if MART_OUTPUTS and 0 <= TABLE_INDEX < len(MART_OUTPUTS) else (Path(MART_OUTPUTS[0]) if MART_OUTPUTS else Path(INSPECT['paths']['mart']['dir']) / f'{TABLE_NAME}.parquet')\n",
+    "MART_PATH = Path(MART_OUTPUTS[TABLE_INDEX]) if 0 <= TABLE_INDEX < len(MART_OUTPUTS) else None\n",
     "{'YEARS': YEARS, 'YEAR_INDEX': YEAR_INDEX, 'TABLES': [table['name'] for table in TABLES], 'TABLE_INDEX': TABLE_INDEX, 'TABLE_NAME': TABLE_NAME, 'MART_PATH': str(MART_PATH), 'INSPECT_CMD': INSPECT_CMD}"
    ]
   },
@@ -58,7 +58,7 @@
     "    rows = con.execute(f\"DESCRIBE SELECT * FROM read_parquet('{path.as_posix()}')\").fetchall()\n",
     "    return [row[0] for row in rows if any(token in str(row[1]).upper() for token in ['INT', 'DECIMAL', 'DOUBLE', 'FLOAT', 'REAL', 'BIGINT'])]\n",
     "\n",
-    "if MART_PATH.exists():\n",
+    "if MART_PATH and MART_PATH.exists():\n",
     "    NUMERIC_COLUMNS = detect_numeric_columns(MART_PATH)[:3]\n",
     "    print({'KEY_COLUMNS': KEY_COLUMNS, 'NUMERIC_COLUMNS': NUMERIC_COLUMNS})\n",
     "else:\n",
@@ -71,7 +71,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "if MART_PATH.exists():\n",
+    "if MART_PATH and MART_PATH.exists():\n",
     "    if KEY_COLUMNS:\n",
     "        keys = ', '.join(KEY_COLUMNS)\n",
     "        dup_df = con.execute(\n",
diff --git a/notebooks/05_dashboard_export.ipynb b/notebooks/05_dashboard_export.ipynb
index e14c218..c29b947 100644
--- a/notebooks/05_dashboard_export.ipynb
+++ b/notebooks/05_dashboard_export.ipynb
@@ -40,7 +40,7 @@
     "INSPECT_CMD = CLI_PREFIX + ['inspect', 'paths', '--config', str(DATASET_YML), '--year', str(YEAR), '--json']\n",
     "INSPECT = json.loads(subprocess.run(INSPECT_CMD, capture_output=True, text=True, check=True).stdout)\n",
     "MART_OUTPUTS = INSPECT['paths']['mart']['outputs']\n",
-    "MART_PATH = Path(MART_OUTPUTS[TABLE_INDEX]) if MART_OUTPUTS and 0 <= TABLE_INDEX < len(MART_OUTPUTS) else (Path(MART_OUTPUTS[0]) if MART_OUTPUTS else Path(INSPECT['paths']['mart']['dir']) / f'{TABLE_NAME}.parquet')\n",
+    "MART_PATH = Path(MART_OUTPUTS[TABLE_INDEX]) if 0 <= TABLE_INDEX < len(MART_OUTPUTS) else None\n",
     "OUT_DIR = (BASE_DIR / '_tmp').resolve()\n",
     "EXPORT = False\n",
     "{'YEARS': YEARS, 'YEAR_INDEX': YEAR_INDEX, 'TABLES': [table['name'] for table in TABLES], 'TABLE_INDEX': TABLE_INDEX, 'TABLE_NAME': TABLE_NAME, 'MART_PATH': str(MART_PATH), 'INSPECT_CMD': INSPECT_CMD}"
@@ -66,7 +66,7 @@
     "        metric_col = numeric_rows[0]\n",
     "    return year_col, metric_col\n",
     "\n",
-    "if MART_PATH.exists():\n",
+    "if MART_PATH and MART_PATH.exists():\n",
     "    YEAR_COL, METRIC_COL = choose_columns(MART_PATH)\n",
     "    print({'YEAR_COL': YEAR_COL, 'METRIC_COL': METRIC_COL})\n",
     "else:\n",
@@ -79,11 +79,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "if MART_PATH.exists() and YEAR_COL and METRIC_COL:\n",
+    "if MART_PATH and MART_PATH.exists() and YEAR_COL and METRIC_COL:\n",
     "    export_df = con.execute(\n",
     "        f\"SELECT {YEAR_COL} AS year_like, {METRIC_COL} AS metric_value FROM read_parquet('{MART_PATH.as_posix()}') ORDER BY 1\"\n",
     "    ).df()\n",
-    "elif MART_PATH.exists():\n",
+    "elif MART_PATH and MART_PATH.exists():\n",
     "    export_df = con.execute(f\"SELECT * FROM read_parquet('{MART_PATH.as_posix()}') LIMIT 1000\").df()\n",
     "else:\n",
     "    export_df = None\n",
diff --git a/notebooks/README.md b/notebooks/README.md
index f929106..0223d3e 100644
--- a/notebooks/README.md
+++ b/notebooks/README.md
@@ -8,6 +8,12 @@ Usano `toolkit inspect paths --json` come fonte primaria per localizzare RAW, CL
 Il comando puo essere disponibile come `toolkit ...` oppure come fallback `py -m toolkit.cli.app ...`.
 Per i dettagli stabili lato toolkit, vedi `docs/notebook-contract.md` e `docs/feature-stability.md` nel repo toolkit.
 
+Contratto minimo degli output:
+
+- `metadata.json` = payload ricco del layer
+- `manifest.json` = summary stabile del layer con puntatori a metadata e validation
+- `data/_runs/.../<run_id>.json` = stato del run letto da `status` e `resume`
+
 ## Notebook inclusi
 
 - `00_quickstart.ipynb` - setup, command preview, run opzionale e localizzazione output reali del toolkit
@@ -22,4 +28,5 @@ Per i dettagli stabili lato toolkit, vedi `docs/notebook-contract.md` e `docs/fe
 - non salvare output pesanti nel repo
 - se serve esportare file, usa `../_tmp/`
 - mantieni i notebook generici: preferisci leggere `dataset.yml` e usa i parametri iniziali per scegliere anno/tabella
+- non ricostruire a mano i path degli output del toolkit: usa sempre i path restituiti da `inspect paths --json`
 - per dettagli tecnici della pipeline, vedi il repository Toolkit DataCivicLab
diff --git a/scripts/smoke.sh b/scripts/smoke.sh
index ec02853..f984a6c 100644
--- a/scripts/smoke.sh
+++ b/scripts/smoke.sh
@@ -107,3 +107,4 @@ echo "YEAR=${YEAR}"
 run_toolkit run all --config "${DATASET_FILE}"
 run_toolkit validate all --config "${DATASET_FILE}"
 run_toolkit status --dataset "${DATASET_NAME}" --year "${YEAR}" --latest --config "${DATASET_FILE}"
+run_toolkit inspect paths --config "${DATASET_FILE}" --year "${YEAR}" --json

From e71c859d8469c7773b2347a19481223eee45f987 Mon Sep 17 00:00:00 2001
From: Zio Gabber <78922322+Gabrymi93@users.noreply.github.com>
Date: Mon, 2 Mar 2026 23:01:04 +0000
Subject: [PATCH 07/11] Add notebook path regression guard

---
 tests/test_contract.py | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/tests/test_contract.py b/tests/test_contract.py
index e58fc1b..d4e2a68 100644
--- a/tests/test_contract.py
+++ b/tests/test_contract.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import json
 import re
 from pathlib import Path
 
@@ -9,6 +10,7 @@
 REPO_ROOT = Path(__file__).resolve().parents[1]
 DATASET_FILE = REPO_ROOT / "dataset.yml"
 DATA_DIR = REPO_ROOT / "data"
+NOTEBOOKS_DIR = REPO_ROOT / "notebooks"
 BLOCKED_DATA_EXTENSIONS = {".parquet", ".csv", ".jsonl", ".zip", ".xlsx", ".tsv"}
 REQUIRED_FILES = [
     REPO_ROOT / "dataset.yml",
@@ -150,3 +152,32 @@ def test_data_directory_does_not_contain_committed_outputs() -> None:
         "Non committare output in data/: usa data/_examples per sample piccoli. "
         f"Found: {offenders}"
     )
+
+
+def test_notebooks_do_not_rebuild_runtime_output_paths() -> None:
+    forbidden_patterns = [
+        "OUT_ROOT =",
+        "/ 'data' / 'raw' /",
+        "/ 'data' / 'clean' /",
+        "/ 'data' / 'mart' /",
+        "/ 'data' / '_runs' /",
+        "Path(INSPECT['paths']['mart']['dir']) /",
+    ]
+
+    offenders: list[str] = []
+
+    for path in sorted(NOTEBOOKS_DIR.glob("*.ipynb")):
+        notebook = json.loads(path.read_text(encoding="utf-8"))
+        for cell in notebook.get("cells", []):
+            if cell.get("cell_type") != "code":
+                continue
+            source = "".join(cell.get("source", []))
+            for pattern in forbidden_patterns:
+                if pattern in source:
+                    offenders.append(f"{path.relative_to(REPO_ROOT)} -> {pattern}")
+
+    assert not offenders, (
+        "I notebook devono usare `toolkit inspect paths --json` come fonte di verita` "
+        "e non ricostruire a mano i path del runtime. "
+        f"Found: {offenders}"
+    )

From cbc1c93d199d9e93d80a12dc1130f3f5d1657fe6 Mon Sep 17 00:00:00 2001
From: Zio Gabber <78922322+Gabrymi93@users.noreply.github.com>
Date: Tue, 3 Mar 2026 10:09:22 +0000
Subject: [PATCH 08/11] Clarify toolkit workflow in template docs

---
 README.md   | 8 ++++++++
 WORKFLOW.md | 4 +++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 3788222..430bf71 100644
--- a/README.md
+++ b/README.md
@@ -118,6 +118,14 @@ I notebook del template usano anche:
 toolkit inspect paths --config dataset.yml --year <year> --json
 ```
 
+Nota di contratto:
+
+- i path relativi in `dataset.yml` sono risolti rispetto alla directory del file `dataset.yml`, non rispetto al `cwd`
+- i notebook non devono ricostruire a mano `root/data/raw|clean|mart|_runs`
+- `metadata.json` e il payload ricco del layer
+- `manifest.json` e il summary stabile del layer
+- `data/_runs/.../<run_id>.json` e il run record letto da `status`
+
 Per dettagli piu profondi su CLI, contratti stabili, workflow advanced e feature stability, il posto giusto e `toolkit`.
 
 ## 🧭 Dove andare per il resto
diff --git a/WORKFLOW.md b/WORKFLOW.md
index 6091db9..77127ab 100644
--- a/WORKFLOW.md
+++ b/WORKFLOW.md
@@ -35,7 +35,9 @@ GitHub resta il posto dove deve restare la traccia utile.
 1. valida la config con `py -m pytest tests/test_contract.py`
 2. esegui `toolkit run all --config dataset.yml`
 3. esegui `toolkit validate all --config dataset.yml`
-4. usa i notebook per ispezionare RAW, CLEAN, MART e QA
+4. esegui `toolkit status --dataset <dataset> --year <year> --latest --config dataset.yml`
+5. usa `toolkit inspect paths --config dataset.yml --year <year> --json`
+6. usa i notebook per ispezionare RAW, CLEAN, MART e QA
 
 ## Maintainers
 

From 190d0bf652132ac7034d0d743490e17a817d66ff Mon Sep 17 00:00:00 2001
From: Zio Gabber <78922322+Gabrymi93@users.noreply.github.com>
Date: Tue, 3 Mar 2026 20:41:45 +0000
Subject: [PATCH 09/11] docs: make dataset question model explicit

---
 README.md | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/README.md b/README.md
index 430bf71..eae6b64 100644
--- a/README.md
+++ b/README.md
@@ -20,6 +20,15 @@ Esempi:
 - Dove si osservano miglioramenti o peggioramenti?
 - Il mio territorio e sopra o sotto la media?
 
+Questa repo dovrebbe avere **una domanda civica principale**.
+
+Dallo stesso dataset possono nascere anche altre domande utili, ma vanno tenute distinte:
+
+- la domanda principale orienta README, notebook e output pubblici
+- le domande secondarie o complementari possono emergere in Discussions e trasformarsi in issue operative
+
+In questo modo il repository resta leggibile e non diventa un contenitore indistinto di analisi.
+
 ## 🔎 Cosa puoi capire con questi dati
 
 - come cambia il fenomeno nel tempo
@@ -58,9 +67,19 @@ Questo repository distingue chiaramente:
 - **Discussions** -> domande civiche, interpretazioni, proposte di metriche
 - **Issues** -> bug, problemi tecnici, miglioramenti della pipeline
 
+Flusso consigliato:
+
+`domanda civica -> Discussion -> Issue -> analisi / notebook / output`
+
 Se non sei tecnico, parti da una **Discussion** in questa repo:
 spiega il contesto, il territorio o l'anno che ti interessa e cosa vuoi capire.
 
+Se la domanda richiede lavoro concreto, va trasformata in una **Issue** nella repo giusta:
+
+- issue dataset-specifiche in questa repo
+- issue di runtime o pipeline nel `toolkit`
+- issue di governance o processo nelle repo di ecosistema
+
 ## 📚 Documentazione del dataset
 
 - `docs/overview.md` - contesto, copertura, limiti
@@ -81,6 +100,9 @@ Qui trovi il minimo necessario per far partire un progetto concreto:
 - `tests/` per i contract tests minimi
 - `notebooks/` per leggere gli output reali della pipeline
 
+Un repo nato da questo template non serve solo a "ospitare dati".
+Serve a rispondere in modo verificabile a una domanda civica centrale, lasciando spazio anche a domande complementari ben tracciate.
+
 ## 🛠️ Confine con il toolkit
 
 Il motore della pipeline vive nel repository `toolkit`.

From ac1c70b89e61c2ed2416b5afb6d4f0e30d413cfd Mon Sep 17 00:00:00 2001
From: Zio Gabber <78922322+Gabrymi93@users.noreply.github.com>
Date: Tue, 3 Mar 2026 21:13:44 +0000
Subject: [PATCH 10/11] ci: make smoke workflow self-contained

---
 .github/workflows/ci.yml | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 36f5faf..7933694 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -62,13 +62,9 @@ jobs:
           python -m pip install pytest pyyaml
           if [ -n "${TOOLKIT_PIP_PACKAGE}" ]; then
             python -m pip install "${TOOLKIT_PIP_PACKAGE}"
-          elif [ -d "./toolkit" ]; then
-            python -m pip install -e "./toolkit"
-          elif [ -d "../toolkit" ]; then
-            python -m pip install -e "../toolkit"
           else
-            echo "Unable to install toolkit: set TOOLKIT_PIP_PACKAGE or provide ./toolkit or ../toolkit" >&2
-            exit 1
+            git clone --depth 1 https://github.com/dataciviclab/toolkit.git .toolkit-src
+            python -m pip install -e ./.toolkit-src
           fi
 
       - name: Export DCL_ROOT

From c1e9845ecee1e390bdd85bf3e3e40a1cb90600e4 Mon Sep 17 00:00:00 2001
From: Zio Gabber <78922322+Gabrymi93@users.noreply.github.com>
Date: Tue, 3 Mar 2026 21:46:55 +0000
Subject: [PATCH 11/11] docs: add dataset publishing rhythm guidance

---
 README.md | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/README.md b/README.md
index eae6b64..9298498 100644
--- a/README.md
+++ b/README.md
@@ -180,3 +180,21 @@ py scripts/publish_to_drive.py --config dataset.yml --drive-root "G:\\DataCivicL
 ```
 
 La destinazione su Drive mantiene lo stesso path relativo degli output del toolkit sotto `root`.
+
+## Ritmo operativo consigliato
+
+Quando un repo dataset entra in ritmo, conviene mantenere una sequenza semplice:
+
+1. una domanda civica principale sempre visibile nel README
+2. domande complementari che emergono e si chiariscono in Discussions
+3. issue piccole per trasformare le domande mature in lavoro concreto
+4. output condivisibili pubblicati con continuita
+
+L'output non deve essere sempre una dashboard completa.
+Puo essere anche:
+
+- una risposta breve con una tabella
+- un notebook che chiude una domanda precisa
+- un aggiornamento intermedio su limiti, dati mancanti o primi pattern
+
+Questo aiuta a mantenere il repository vivo senza trasformarlo in un backlog confuso.