From 2ea580d108545636910b510090903e76b4e7a446 Mon Sep 17 00:00:00 2001
From: Zio Gabber <78922322+Gabrymi93@users.noreply.github.com>
Date: Sun, 1 Mar 2026 22:29:46 +0000
Subject: [PATCH 1/6] allineamente finale con template

---
 CODE_OF_CONDUCT.md              |  31 +++++++
 README.md                       |  10 +++
 SECURITY.md                     |  25 +++---
 tests/test_cli_path_contract.py | 144 ++++++++++++++++++++++++++++++++
 toolkit/clean/run.py            |  11 ++-
 toolkit/cli/cmd_run.py          |   5 +-
 toolkit/mart/run.py             |  11 ++-
 7 files changed, 219 insertions(+), 18 deletions(-)
 create mode 100644 CODE_OF_CONDUCT.md
 create mode 100644 tests/test_cli_path_contract.py

diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000..befd34a
--- /dev/null
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,31 @@
+# Code of Conduct
+
+## Our Pledge
+
+We want this project to be a respectful, constructive, and harassment-free space for everyone involved.
+
+## Our Standards
+
+Examples of behavior that support a healthy community:
+
+- being respectful in disagreements
+- giving and receiving feedback constructively
+- focusing on the technical problem, not the person
+- showing patience with contributors at different experience levels
+
+Examples of unacceptable behavior:
+
+- harassment, insults, or personal attacks
+- discriminatory or hateful language
+- repeated bad-faith disruption of discussions or reviews
+- publishing private information without permission
+
+## Enforcement
+
+If you experience or witness unacceptable behavior, report it to the maintainers or repository owners.
+
+Project maintainers may remove, edit, or reject comments, issues, commits, code, or other contributions that violate this Code of Conduct.
+
+## Scope
+
+This Code of Conduct applies to repository discussions, issues, pull requests, code review, and other project spaces managed by the maintainers.
diff --git a/README.md b/README.md
index df9c119..6382c7e 100644
--- a/README.md
+++ b/README.md
@@ -30,6 +30,16 @@ pip install -e .[dev]
 
 Richiede Python 3.10+.
 
+## CLI Naming Note
+
+Il comando CLI canonico del progetto e' `toolkit`.
+
+Se nel tuo ambiente c'e' una collisione di nome o il console script non e' nel `PATH`, puoi usare direttamente il modulo Python:
+
+```bash
+python -m toolkit.cli.app run all --config dataset.yml
+```
+
 ## Quickstart
 
 Giro offline completo con il progetto di esempio, eseguibile in pochi minuti su una macchina pulita.
diff --git a/SECURITY.md b/SECURITY.md
index d9c53d4..7f618eb 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -2,23 +2,18 @@
 
 ## Reporting a Vulnerability
 
-If you discover a security issue in this repository, do not open a public issue first.
+If you believe you found a security issue in this repository, please do not open a public issue first.
 
-Report it privately to the maintainers with:
+Instead:
 
-- a clear description of the problem
-- affected versions or commits, if known
-- reproduction steps or a minimal proof of concept
-- suggested mitigations, if available
+- contact the maintainers privately, if a private contact path is available
+- or ask the repository owners for a private reporting channel before sharing details publicly
 
-If no dedicated private channel is available yet, contact the project maintainers through the repository owners and request a private disclosure path before sharing details publicly.
+When possible, include:
 
-## Response Expectations
+- a short description of the issue
+- affected files, versions, or commits
+- steps to reproduce
+- possible impact
 
-- We will acknowledge receipt of a report as soon as practical.
-- We will evaluate impact and reproduction details.
-- We will coordinate remediation and disclosure timing when the report is valid.
-
-## Scope
-
-This policy applies to the source code and release artifacts of this repository.
+There is currently no bug bounty program for this project.
diff --git a/tests/test_cli_path_contract.py b/tests/test_cli_path_contract.py
new file mode 100644
index 0000000..13965ee
--- /dev/null
+++ b/tests/test_cli_path_contract.py
@@ -0,0 +1,144 @@
+from __future__ import annotations
+
+import json
+import shutil
+from pathlib import Path
+
+from typer.testing import CliRunner
+
+from toolkit.cli.app import app
+
+
+def _copy_project_example(dst: Path) -> Path:
+    src = Path("project-example")
+    shutil.copytree(src, dst)
+    shutil.rmtree(dst / "_smoke_out", ignore_errors=True)
+    return dst / "dataset.yml"
+
+
+def _write_failed_run_record(path: Path, run_id: str) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(
+        json.dumps(
+            {
+                "dataset": "project_example",
+                "year": 2022,
+                "run_id": run_id,
+                "started_at": "2026-03-01T10:00:00+00:00",
+                "finished_at": "2026-03-01T10:01:00+00:00",
+                "status": "FAILED",
+                "layers": {
+                    "raw": {"status": "SUCCESS", "started_at": "2026-03-01T10:00:00+00:00", "finished_at": "2026-03-01T10:00:10+00:00"},
+                    "clean": {"status": "FAILED", "started_at": "2026-03-01T10:00:10+00:00", "finished_at": "2026-03-01T10:00:20+00:00"},
+                    "mart": {"status": "PENDING", "started_at": None, "finished_at": None},
+                },
+                "validations": {"raw": {}, "clean": {}, "mart": {}},
+                "resumed_from": None,
+                "error": "clean failed",
+            },
+            indent=2,
+        ),
+        encoding="utf-8",
+    )
+
+
+def test_cli_dry_run_resolves_sql_from_config_dir_not_cwd(tmp_path: Path, monkeypatch) -> None:
+    project_dir = tmp_path / "project-example"
+    config_path = _copy_project_example(project_dir)
+
+    monkeypatch.chdir(tmp_path)
+    runner = CliRunner()
+    result = runner.invoke(
+        app,
+        ["run", "all", "--config", str(config_path), "--dry-run", "--strict-config"],
+    )
+
+    assert result.exit_code == 0
+    assert "Execution Plan" in result.output
+    assert "steps: raw, clean, mart" in result.output
+
+
+def test_cli_commands_use_dataset_yml_dir_as_path_base(tmp_path: Path, monkeypatch) -> None:
+    project_dir = tmp_path / "project-example"
+    config_path = _copy_project_example(project_dir)
+
+    monkeypatch.chdir(tmp_path)
+    runner = CliRunner()
+
+    run_result = runner.invoke(app, ["run", "all", "--config", str(config_path), "--strict-config"])
+    assert run_result.exit_code == 0, run_result.output
+
+    validate_result = runner.invoke(
+        app,
+        ["validate", "all", "--config", str(config_path), "--strict-config"],
+    )
+    assert validate_result.exit_code == 0, validate_result.output
+
+    profile_result = runner.invoke(
+        app,
+        ["profile", "raw", "--config", str(config_path), "--strict-config"],
+    )
+    assert profile_result.exit_code == 0, profile_result.output
+
+    status_result = runner.invoke(
+        app,
+        [
+            "status",
+            "--dataset",
+            "project_example",
+            "--year",
+            "2022",
+            "--latest",
+            "--config",
+            str(config_path),
+            "--strict-config",
+        ],
+    )
+    assert status_result.exit_code == 0, status_result.output
+    assert "status: SUCCESS" in status_result.output
+
+    root = project_dir / "_smoke_out"
+    raw_dir = root / "data" / "raw" / "project_example" / "2022"
+    clean_dir = root / "data" / "clean" / "project_example" / "2022"
+    mart_dir = root / "data" / "mart" / "project_example" / "2022"
+
+    assert (raw_dir / "ispra_dettaglio_comunale_2022.csv").exists()
+    assert (raw_dir / "_profile" / "suggested_read.yml").exists()
+    assert (clean_dir / "project_example_2022_clean.parquet").exists()
+    assert (mart_dir / "rd_by_regione.parquet").exists()
+    assert (mart_dir / "rd_by_provincia.parquet").exists()
+
+
+def test_cli_resume_from_other_cwd_falls_back_and_reuses_relative_paths(tmp_path: Path, monkeypatch) -> None:
+    project_dir = tmp_path / "project-example"
+    config_path = _copy_project_example(project_dir)
+    runs_dir = project_dir / "_smoke_out" / "data" / "_runs" / "project_example" / "2022"
+    failed_run_id = "failed-run"
+    _write_failed_run_record(runs_dir / f"{failed_run_id}.json", failed_run_id)
+
+    monkeypatch.chdir(tmp_path)
+    runner = CliRunner()
+    result = runner.invoke(
+        app,
+        [
+            "resume",
+            "--dataset",
+            "project_example",
+            "--year",
+            "2022",
+            "--run-id",
+            failed_run_id,
+            "--config",
+            str(config_path),
+            "--strict-config",
+        ],
+    )
+
+    assert result.exit_code == 0, result.output
+    assert "Falling back to 'raw'" in result.output
+    assert "starting at raw" in result.output
+
+    root = project_dir / "_smoke_out"
+    assert (root / "data" / "raw" / "project_example" / "2022" / "ispra_dettaglio_comunale_2022.csv").exists()
+    assert (root / "data" / "clean" / "project_example" / "2022" / "project_example_2022_clean.parquet").exists()
+    assert (root / "data" / "mart" / "project_example" / "2022" / "rd_by_regione.parquet").exists()
diff --git a/toolkit/clean/run.py b/toolkit/clean/run.py
index 7c751c9..cc13afc 100644
--- a/toolkit/clean/run.py
+++ b/toolkit/clean/run.py
@@ -26,6 +26,15 @@ def _serialize_metadata_path(path: Path | None, rel_root: Path | None) -> str |
     return to_root_relative(path, rel_root)
 
 
+def _resolve_sql_path(sql_ref: str | Path, *, base_dir: Path | None) -> Path:
+    path = Path(sql_ref)
+    if path.is_absolute():
+        return path
+    if base_dir is None:
+        return path
+    return base_dir / path
+
+
 def _run_sql(
     input_files: list[Path],
     sql_query: str,
@@ -78,7 +87,7 @@ def run_clean(
     if not sql_rel:
         raise ValueError("clean.sql missing in dataset.yml (expected: clean: { sql: 'sql/clean.sql' })")
 
-    sql_path_obj = Path(sql_rel)
+    sql_path_obj = _resolve_sql_path(sql_rel, base_dir=base_dir)
     if not sql_path_obj.exists():
         raise FileNotFoundError(f"CLEAN SQL file not found: {sql_path_obj}")
 
diff --git a/toolkit/cli/cmd_run.py b/toolkit/cli/cmd_run.py
index b033c93..947b673 100644
--- a/toolkit/cli/cmd_run.py
+++ b/toolkit/cli/cmd_run.py
@@ -39,7 +39,10 @@ def _planned_layers(step: str) -> list[str]:
 def _resolve_sql_path(cfg, rel_path: str | None) -> Path:
     if not rel_path:
         raise ValueError("Missing SQL path in dataset.yml")
-    return Path(rel_path)
+    path = Path(rel_path)
+    if path.is_absolute():
+        return path
+    return Path(cfg.base_dir) / path
 
 
 def _validate_execution_plan(cfg, step: str) -> list[str]:
diff --git a/toolkit/mart/run.py b/toolkit/mart/run.py
index 82e18fd..acd50ec 100644
--- a/toolkit/mart/run.py
+++ b/toolkit/mart/run.py
@@ -19,6 +19,15 @@ def _serialize_metadata_path(path: Path | None, rel_root: Path | None) -> str |
     return to_root_relative(path, rel_root)
 
 
+def _resolve_sql_path(sql_ref: str | Path, *, base_dir: Path | None) -> Path:
+    path = Path(sql_ref)
+    if path.is_absolute():
+        return path
+    if base_dir is None:
+        return path
+    return base_dir / path
+
+
 def run_mart(
     dataset: str,
     year: int,
@@ -78,7 +87,7 @@ def run_mart(
             if not name or not sql_rel:
                 raise ValueError("Each mart.tables entry must include: name, sql")
 
-            sql_path = Path(sql_rel)
+            sql_path = _resolve_sql_path(sql_rel, base_dir=base_dir)
             if not sql_path.exists():
                 raise FileNotFoundError(f"MART SQL file not found: {sql_path}")
 

From f669c8d8ae5396c2a998f2a17aa443e62f69b55a Mon Sep 17 00:00:00 2001
From: Zio Gabber <78922322+Gabrymi93@users.noreply.github.com>
Date: Mon, 2 Mar 2026 09:16:44 +0000
Subject: [PATCH 2/6]  riordino documentale

---
 README.md                  | 148 +++++++++----------------------------
 docs/advanced-workflows.md | 102 +++++++++++++++++++++++++
 2 files changed, 137 insertions(+), 113 deletions(-)
 create mode 100644 docs/advanced-workflows.md

diff --git a/README.md b/README.md
index 6382c7e..4bc885e 100644
--- a/README.md
+++ b/README.md
@@ -42,6 +42,13 @@ python -m toolkit.cli.app run all --config dataset.yml
 
 ## Quickstart
 
+Il percorso canonico per i repo dataset clonati dal template e':
+
+1. `toolkit run all --config dataset.yml`
+2. `toolkit validate all --config dataset.yml`
+3. `toolkit status --dataset <dataset> --year <year> --latest --config dataset.yml`
+4. notebook locali che leggono gli output reali sotto `root/data/...`
+
 Giro offline completo con il progetto di esempio, eseguibile in pochi minuti su una macchina pulita.
 
 Windows PowerShell:
@@ -50,6 +57,7 @@ Windows PowerShell:
 $env:TOOLKIT_OUTDIR = Join-Path $env:TEMP "dataciviclab-toolkit-quickstart"
 py -m pip install -e ".[dev]"
 py -m toolkit.cli.app run all -c project-example/dataset.yml
+py -m toolkit.cli.app validate all -c project-example/dataset.yml
 py -m toolkit.cli.app status --dataset project_example --year 2022 --config project-example/dataset.yml
 ```
 
@@ -59,6 +67,7 @@ Linux/macOS:
 export TOOLKIT_OUTDIR="$(mktemp -d)/dataciviclab-toolkit-quickstart"
 python -m pip install -e ".[dev]"
 python -m toolkit.cli.app run all -c project-example/dataset.yml
+python -m toolkit.cli.app validate all -c project-example/dataset.yml
 python -m toolkit.cli.app status --dataset project_example --year 2022 --config project-example/dataset.yml
 ```
 
@@ -76,6 +85,7 @@ Interpretazione errori config:
 - warning di deprecazione -> config ancora accettata, ma in forma legacy da migrare
 
 Schema completo e legacy supportato: [docs/config-schema.md](docs/config-schema.md)
+Flow avanzati e tooling secondario: [docs/advanced-workflows.md](docs/advanced-workflows.md)
 
 Artefatti attesi:
 
@@ -137,91 +147,47 @@ Convenzioni:
 
 ## CLI
 
-Esecuzione step singolo:
-
-```bash
-toolkit run raw --config dataset.yml
-toolkit run clean --config dataset.yml
-toolkit run mart --config dataset.yml
-```
-
-Esecuzione end-to-end:
+Workflow canonico:
 
 ```bash
 toolkit run all --config dataset.yml
+toolkit validate all --config dataset.yml
 ```
 
-Dry-run:
-
-```bash
-toolkit run all --config dataset.yml --dry-run
-```
-
-Il dry-run:
-
-- valida config e path SQL richiesti
-- stampa l'execution plan per dataset/year
-- crea solo il run record in `data/_runs/...`
-- non scarica RAW, non esegue DuckDB, non scrive artefatti nei layer
-
-Resume:
-
-```bash
-toolkit resume --dataset project_example --year 2022 --config dataset.yml
-toolkit resume --dataset project_example --year 2022 --run-id <old_run_id> --config dataset.yml
-```
-
-`resume`:
-
-- legge un run record esistente
-- trova il primo layer non `SUCCESS`
-- crea un nuovo `run_id`
-- salva `resumed_from=<old_run_id>` nel nuovo record
-
-Status:
+Per il percorso base:
 
-```bash
-toolkit status --dataset project_example --year 2022 --latest --config dataset.yml
-toolkit status --dataset project_example --year 2022 --run-id <run_id> --config dataset.yml
-```
+- `run all` esegue RAW -> CLEAN -> MART
+- `validate all` esegue i quality checks su CLEAN e MART
+- `status` legge il run record e mostra lo stato piu` recente
+- `--dry-run` valida config e SQL senza eseguire la pipeline
 
-Validazione separata:
+Esempi:
 
 ```bash
-toolkit validate clean --config dataset.yml
-toolkit validate mart --config dataset.yml
-toolkit validate all --config dataset.yml
+toolkit run all --config dataset.yml --strict-config
+toolkit validate all --config dataset.yml --strict-config
+toolkit status --dataset my_dataset --year 2024 --latest --config dataset.yml
+toolkit run all --config dataset.yml --dry-run --strict-config
 ```
 
-Profilazione RAW:
+`resume`, `profile raw`, `run raw|clean|mart`, `gen-sql` e la policy completa degli artifacts restano disponibili, ma sono tooling avanzato: vedi [docs/advanced-workflows.md](docs/advanced-workflows.md).
 
-```bash
-toolkit profile raw --config dataset.yml
-```
+## Notebook locali
 
-`toolkit profile raw` scrive sempre hint utilizzabili anche se il parsing DuckDB fallisce.
-Tutti gli artefatti di profiling vivono in `raw/<dataset>/<year>/_profile/`.
-Il nome canonico del profilo JSON e` `raw_profile.json`; `profile.json` resta un alias di compatibilita` opzionale.
-Gli output effettivi dipendono dalla policy `output.artifacts`.
-`suggested_read.yml` usa le stesse chiavi che CLEAN passa a `clean.read`, senza mapping extra.
-Se DuckDB non riesce a sniffare il file, il profiler usa un fallback Python leggero per `header`, `delim`, `decimal`, `encoding` e aggiunge warning espliciti.
-L'output resta quindi consumabile da CLEAN anche su CSV sporchi o irregolari.
+Nei repo dataset clonati dal template, i notebook dovrebbero leggere gli output reali gia` scritti dal toolkit, non ricostruire logica di path.
 
-Artifacts policy:
+In pratica:
 
-```yaml
-output:
-  artifacts: standard   # minimal | standard | debug
-  legacy_aliases: true  # abilita l'alias legacy profile.json
-```
+- RAW: `root/data/raw/<dataset>/<year>/`
+- CLEAN: `root/data/clean/<dataset>/<year>/`
+- MART: `root/data/mart/<dataset>/<year>/`
+- run records: `root/data/_runs/<dataset>/<year>/`
 
-`standard` resta il default compatibile. `minimal` tiene solo gli artefatti di pipeline e salta report/debug SQL. `debug` tiene tutto.
+Questo mantiene il contratto semplice tra toolkit e repo dataset:
 
-Generazione SQL CLEAN da mapping dichiarativo:
-
-```bash
-toolkit gen-sql --config dataset.yml
-```
+- il toolkit produce artefatti e metadata stabili
+- i notebook li ispezionano localmente
+- `dataset.yml` resta la fonte di verita` per dataset, anni e path relativi
 
 ## Run Tracking
 
@@ -254,51 +220,6 @@ Comportamento:
 
 La CLI `validate` resta disponibile per eseguire i check separatamente.
 
-## CLEAN Input Selection
-
-La selezione degli input RAW per CLEAN e` configurabile via `clean.read`.
-
-Opzioni supportate:
-
-- `mode: explicit`
-- `mode: latest`
-- `mode: largest`
-- `mode: all`
-- `glob: "*"`
-- `include: [...]`
-- `prefer_from_raw_run: true`
-- `allow_ambiguous: false`
-
-Note operative:
-
-- `explicit` richiede `include`
-- `latest` seleziona il file con `mtime` piu` recente
-- `largest` seleziona il file piu` grande
-- `all` passa tutti i candidati a DuckDB in ordine deterministico
-- se `mode` non e` specificato, il toolkit usa il fallback legacy su `largest` e logga un warning di deprecazione
-
-CSV read mode:
-
-- `clean.read_mode: strict` usa solo i parametri dichiarati
-- `clean.read_mode: fallback` prova strict e, se fallisce, riprova con preset robusto loggando il fallback
-- `clean.read_mode: robust` usa direttamente il preset robusto
-- il preset robusto mantiene `delim`/`decimal`/`encoding` noti e aggiunge poche opzioni conservative come `ignore_errors`, `null_padding`, `strict_mode: false`, `sample_size: -1`
-- forma canonica:
-
-```yaml
-clean:
-  read:
-    source: auto  # oppure config_only
-```
-
-- `clean.read.source: auto` usa anche i format hints di `raw/<dataset>/<year>/_profile/suggested_read.yml`; `config_only` li ignora
-- da `suggested_read.yml` vengono applicate solo chiavi di formato come `delim`, `decimal`, `encoding`, `header`, `skip`, `quote`, `escape`, `comment`, `nullstr`, `trim_whitespace`, `columns`
-- le opzioni di robustezza presenti nel file suggerito non cambiano la policy di lettura: restano governate da `clean.read_mode` e dal preset robusto
-- il metadata CLEAN salva anche `read_source_used` (`strict` / `robust` / `parquet`)
-- il metadata CLEAN salva `read_params_used` con i parametri finali effettivamente usati dal reader
-- il metadata CLEAN salva `read_params_source` con le sorgenti del merge (`defaults`, `suggested`, `config_overrides`)
-- ogni `metadata.json` include `metadata_schema_version: 1`
-
 ## Layer
 
 ### RAW
@@ -429,6 +350,7 @@ Vedi [docs/conventions.md](docs/conventions.md) per:
 - policy di selezione input CLEAN
 - precedence del read config
 - metadata, manifest e validation contracts
+- workflow avanzati e tooling secondario: [docs/advanced-workflows.md](docs/advanced-workflows.md)
 
 ## Testing
 
diff --git a/docs/advanced-workflows.md b/docs/advanced-workflows.md
new file mode 100644
index 0000000..5bb499a
--- /dev/null
+++ b/docs/advanced-workflows.md
@@ -0,0 +1,102 @@
+# Advanced Workflows
+
+Questa nota raccoglie i flussi e le opzioni del toolkit che restano supportati, ma non fanno parte del percorso canonico dei repo dataset clonati dal template.
+
+Percorso canonico:
+
+- `toolkit run all --config dataset.yml`
+- `toolkit validate all --config dataset.yml`
+- `toolkit status --dataset <dataset> --year <year> --latest --config dataset.yml`
+- notebook locali che leggono output e metadata sotto `root/data/...`
+
+## Step singoli
+
+Utili per debug o per ripetere solo una parte della pipeline:
+
+```bash
+toolkit run raw --config dataset.yml
+toolkit run clean --config dataset.yml
+toolkit run mart --config dataset.yml
+```
+
+Questi comandi non sono il happy path raccomandato per i nuovi repo dataset, ma restano strumenti operativi supportati.
+
+## Resume
+
+`resume` serve quando esiste gia` un run record e vuoi ripartire dal primo layer non `SUCCESS` oppure forzare una ripartenza da `raw|clean|mart`.
+
+Esempi:
+
+```bash
+toolkit resume --dataset my_dataset --year 2024 --latest --config dataset.yml
+toolkit resume --dataset my_dataset --year 2024 --run-id <run_id> --from-layer clean --config dataset.yml
+```
+
+Il comando verifica anche gli artefatti minimi del layer precedente prima di ripartire.
+
+## Profile RAW
+
+`toolkit profile raw --config dataset.yml` genera hint utili per `clean.read` quando il RAW e` sporco, ambiguo o poco noto.
+
+Artefatti principali:
+
+- `raw/<dataset>/<year>/_profile/raw_profile.json`
+- `raw/<dataset>/<year>/_profile/suggested_read.yml`
+
+`profile.json` resta un alias legacy opzionale e non e` il nome canonico da promuovere nei nuovi repo.
+
+## CLEAN read e input selection
+
+Opzioni utili ma avanzate:
+
+- `clean.read.mode`: `explicit | latest | largest | all`
+- `clean.read.include`
+- `clean.read.glob`
+- `clean.read.prefer_from_raw_run`
+- `clean.read.allow_ambiguous`
+- `clean.read.source`: `auto | config_only`
+- `clean.read_mode`: `strict | fallback | robust`
+
+Uso consigliato:
+
+- repo dataset nuovi: configurazione esplicita e `--strict-config`
+- `profile raw` solo se serve capire meglio il formato RAW
+
+## Artifact policy
+
+La policy artifacts resta disponibile per tuning operativo:
+
+```yaml
+output:
+  artifacts: standard   # minimal | standard | debug
+  legacy_aliases: true
+```
+
+Regola pratica:
+
+- `standard`: default consigliato
+- `minimal`: riduce artefatti opzionali
+- `debug`: conserva anche SQL renderizzate e dettagli di debug
+
+`legacy_aliases` resta supportato per compatibilita`, ma non va promosso nei nuovi repo dataset.
+
+## gen-sql
+
+`toolkit gen-sql --config dataset.yml` resta disponibile come bootstrap helper da mapping dichiarativo.
+
+Stato raccomandato:
+
+- supportato
+- utile per bootstrap guidato
+- non parte del workflow operativo standard
+- da considerare congelato: bugfix si`, espansioni solo se emerge uso reale
+
+## Compat legacy
+
+Il toolkit mantiene compatibilita` con alcune forme legacy del config per facilitare la migrazione.
+
+Per i repo nuovi:
+
+- usa la shape canonica documentata in [config-schema.md](./config-schema.md)
+- usa `--strict-config` nei comandi CLI
+- non basarti su alias o campi legacy nei notebook e negli script del repo dataset

From c73296cc707a689831484362c0203b0019541274 Mon Sep 17 00:00:00 2001
From: Zio Gabber <78922322+Gabrymi93@users.noreply.github.com>
Date: Mon, 2 Mar 2026 09:42:20 +0000
Subject: [PATCH 3/6] Add inspect paths helper and tighten dataset repo docs

---
 README.md                       |  10 +++
 docs/feature-stability.md       |  27 +++++++++
 docs/notebook-contract.md       |  28 +++++++++
 tests/test_cli_inspect_paths.py |  60 ++++++++++++++++++
 toolkit/cli/app.py              |   2 +
 toolkit/cli/cmd_inspect.py      | 104 ++++++++++++++++++++++++++++++++
 6 files changed, 231 insertions(+)
 create mode 100644 docs/feature-stability.md
 create mode 100644 docs/notebook-contract.md
 create mode 100644 tests/test_cli_inspect_paths.py
 create mode 100644 toolkit/cli/cmd_inspect.py

diff --git a/README.md b/README.md
index 4bc885e..dd7366b 100644
--- a/README.md
+++ b/README.md
@@ -86,6 +86,8 @@ Interpretazione errori config:
 
 Schema completo e legacy supportato: [docs/config-schema.md](docs/config-schema.md)
 Flow avanzati e tooling secondario: [docs/advanced-workflows.md](docs/advanced-workflows.md)
+Matrice di stabilita`: [docs/feature-stability.md](docs/feature-stability.md)
+Contratto notebook/output: [docs/notebook-contract.md](docs/notebook-contract.md)
 
 Artefatti attesi:
 
@@ -159,6 +161,7 @@ Per il percorso base:
 - `run all` esegue RAW -> CLEAN -> MART
 - `validate all` esegue i quality checks su CLEAN e MART
 - `status` legge il run record e mostra lo stato piu` recente
+- `inspect paths` espone i path stabili per notebook e script locali
 - `--dry-run` valida config e SQL senza eseguire la pipeline
 
 Esempi:
@@ -167,6 +170,7 @@ Esempi:
 toolkit run all --config dataset.yml --strict-config
 toolkit validate all --config dataset.yml --strict-config
 toolkit status --dataset my_dataset --year 2024 --latest --config dataset.yml
+toolkit inspect paths --config dataset.yml --year 2024 --json
 toolkit run all --config dataset.yml --dry-run --strict-config
 ```
 
@@ -183,6 +187,12 @@ In pratica:
 - MART: `root/data/mart/<dataset>/<year>/`
 - run records: `root/data/_runs/<dataset>/<year>/`
 
+Helper ufficiale per evitare path logic duplicata nei notebook:
+
+```bash
+toolkit inspect paths --config dataset.yml --year 2024 --json
+```
+
 Questo mantiene il contratto semplice tra toolkit e repo dataset:
 
 - il toolkit produce artefatti e metadata stabili
diff --git a/docs/feature-stability.md b/docs/feature-stability.md
new file mode 100644
index 0000000..b320c43
--- /dev/null
+++ b/docs/feature-stability.md
@@ -0,0 +1,27 @@
+# Feature Stability
+
+Questa matrice serve a chiarire cosa il toolkit considera percorso canonico, cosa resta supportato ma secondario, e cosa non va trattato come parte del quickstart dei repo dataset clonati dal template.
+
+| Area | Stato | Uso raccomandato |
+|---|---|---|
+| `run all` | stable | percorso canonico |
+| `validate all` | stable | percorso canonico |
+| `status` | stable | percorso canonico |
+| path contract di `dataset.yml` | stable | percorso canonico |
+| output `raw/clean/mart/_runs` | stable | percorso canonico |
+| `inspect paths` | stable | helper per notebook e repo dataset |
+| `resume` | supported / advanced | debug operativo e recovery |
+| `profile raw` | supported / advanced | diagnostica su RAW sporchi o ambigui |
+| `run raw|clean|mart` | supported / advanced | debug e re-run parziali |
+| artifact policy `minimal|standard|debug` | supported / advanced | tuning operativo |
+| `legacy_aliases` | compatibility only | non promuovere nei repo nuovi |
+| config legacy | compatibility only | usare `--strict-config` nei repo nuovi |
+| `gen-sql` | frozen helper | bootstrap guidato, non workflow standard |
+| `api_json_paged` | experimental | usare solo con evidenza reale |
+| `html_table` | experimental | usare solo con evidenza reale |
+
+Regola pratica:
+
+- se stai creando o clonando un repo dataset nuovo, resta nel percorso canonico
+- se devi fare recovery, diagnostica o bootstrap, usa i comandi advanced
+- non basarti su compat legacy o helper frozen come parte del contratto stabile
diff --git a/docs/notebook-contract.md b/docs/notebook-contract.md
new file mode 100644
index 0000000..a968b68
--- /dev/null
+++ b/docs/notebook-contract.md
@@ -0,0 +1,28 @@
+# Notebook Contract
+
+Nei repo dataset clonati dal template, i notebook non dovrebbero ricostruire la logica della pipeline. Dovrebbero leggere gli output reali e i metadata stabili prodotti dal toolkit.
+
+Contratto stabile:
+
+- RAW: `root/data/raw/<dataset>/<year>/`
+- CLEAN: `root/data/clean/<dataset>/<year>/`
+- MART: `root/data/mart/<dataset>/<year>/`
+- run records: `root/data/_runs/<dataset>/<year>/`
+
+File utili:
+
+- RAW: `manifest.json`, `metadata.json`, `raw_validation.json`
+- CLEAN: `<dataset>_<year>_clean.parquet`, `manifest.json`, `metadata.json`
+- MART: `<table>.parquet`, `manifest.json`, `metadata.json`
+
+Per evitare duplicazione di path logic nei notebook:
+
+- leggi `dataset.yml`
+- usa `toolkit inspect paths --config dataset.yml --year <year> --json`
+- poi apri parquet, metadata e run record dai path restituiti
+
+Regola pratica:
+
+- il toolkit produce
+- i notebook ispezionano
+- `dataset.yml` resta la fonte di verita` per root, dataset, anni e path relativi
diff --git a/tests/test_cli_inspect_paths.py b/tests/test_cli_inspect_paths.py
new file mode 100644
index 0000000..6dc5e94
--- /dev/null
+++ b/tests/test_cli_inspect_paths.py
@@ -0,0 +1,60 @@
+from __future__ import annotations
+
+import json
+import shutil
+from pathlib import Path
+
+from typer.testing import CliRunner
+
+from toolkit.cli.app import app
+
+
+def test_inspect_paths_reports_dataset_repo_layout_from_other_cwd(tmp_path: Path, monkeypatch) -> None:
+    src = Path("project-example")
+    dst = tmp_path / "project-example"
+    shutil.copytree(src, dst)
+    shutil.rmtree(dst / "_smoke_out", ignore_errors=True)
+    config_path = dst / "dataset.yml"
+
+    runner = CliRunner()
+    monkeypatch.chdir(tmp_path)
+
+    run_result = runner.invoke(app, ["run", "all", "--config", str(config_path), "--strict-config"])
+    assert run_result.exit_code == 0, run_result.output
+
+    result = runner.invoke(
+        app,
+        ["inspect", "paths", "--config", str(config_path), "--year", "2022", "--strict-config"],
+    )
+
+    assert result.exit_code == 0, result.output
+    assert f"config_path: {config_path}" in result.output
+    assert f"root: {dst / '_smoke_out'}" in result.output
+    assert f"raw_dir: {dst / '_smoke_out' / 'data' / 'raw' / 'project_example' / '2022'}" in result.output
+    assert f"clean_output: {dst / '_smoke_out' / 'data' / 'clean' / 'project_example' / '2022' / 'project_example_2022_clean.parquet'}" in result.output
+    assert "latest_run_status: SUCCESS" in result.output
+
+
+def test_inspect_paths_json_is_notebook_friendly(tmp_path: Path, monkeypatch) -> None:
+    src = Path("project-example")
+    dst = tmp_path / "project-example"
+    shutil.copytree(src, dst)
+    shutil.rmtree(dst / "_smoke_out", ignore_errors=True)
+    config_path = dst / "dataset.yml"
+
+    runner = CliRunner()
+    monkeypatch.chdir(tmp_path)
+
+    result = runner.invoke(
+        app,
+        ["inspect", "paths", "--config", str(config_path), "--year", "2022", "--json", "--strict-config"],
+    )
+
+    assert result.exit_code == 0, result.output
+    payload = json.loads(result.output)
+    assert payload["dataset"] == "project_example"
+    assert payload["year"] == 2022
+    assert payload["config_path"] == str(config_path)
+    assert payload["paths"]["clean_output"].endswith("project_example_2022_clean.parquet")
+    assert payload["paths"]["mart_outputs"]
+    assert payload["latest_run"] is None
diff --git a/toolkit/cli/app.py b/toolkit/cli/app.py
index cb03ea8..80b4cc0 100644
--- a/toolkit/cli/app.py
+++ b/toolkit/cli/app.py
@@ -8,6 +8,7 @@
 from toolkit.cli.cmd_status import register as register_status
 from toolkit.cli.cmd_validate import register as register_validate
 from toolkit.cli.cmd_gen_sql import register as register_gen_sql
+from toolkit.cli.cmd_inspect import register as register_inspect
 
 app = typer.Typer(no_args_is_help=True, add_completion=False)
 
@@ -18,6 +19,7 @@
 register_status(app)
 register_validate(app)
 register_gen_sql(app)
+register_inspect(app)
 
 def main():
     app()
diff --git a/toolkit/cli/cmd_inspect.py b/toolkit/cli/cmd_inspect.py
new file mode 100644
index 0000000..dab6890
--- /dev/null
+++ b/toolkit/cli/cmd_inspect.py
@@ -0,0 +1,104 @@
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any
+
+import typer
+
+from toolkit.cli.common import iter_years
+from toolkit.core.config import load_config
+from toolkit.core.paths import layer_year_dir
+from toolkit.core.run_context import get_run_dir, latest_run
+
+
+def _clean_output_path(root: Path, dataset: str, year: int) -> Path:
+    return layer_year_dir(root, "clean", dataset, year) / f"{dataset}_{year}_clean.parquet"
+
+
+def _mart_output_paths(root: Path, year_dir: Path, tables: list[dict[str, Any]]) -> list[Path]:
+    return [year_dir / f"{table['name']}.parquet" for table in tables if isinstance(table, dict) and table.get("name")]
+
+
+def _payload_for_year(cfg, year: int) -> dict[str, Any]:
+    root = Path(cfg.root)
+    raw_dir = layer_year_dir(root, "raw", cfg.dataset, year)
+    clean_dir = layer_year_dir(root, "clean", cfg.dataset, year)
+    mart_dir = layer_year_dir(root, "mart", cfg.dataset, year)
+    run_dir = get_run_dir(root, cfg.dataset, year)
+    mart_tables = cfg.mart.get("tables") or []
+
+    latest_payload: dict[str, Any] | None = None
+    try:
+        latest_record = latest_run(run_dir)
+        latest_payload = {
+            "run_id": latest_record.get("run_id"),
+            "status": latest_record.get("status"),
+            "started_at": latest_record.get("started_at"),
+            "path": str(run_dir / f"{latest_record.get('run_id')}.json"),
+        }
+    except FileNotFoundError:
+        latest_payload = None
+
+    return {
+        "dataset": cfg.dataset,
+        "year": year,
+        "config_path": str(cfg.base_dir / "dataset.yml"),
+        "root": str(root),
+        "paths": {
+            "raw_dir": str(raw_dir),
+            "clean_dir": str(clean_dir),
+            "clean_output": str(_clean_output_path(root, cfg.dataset, year)),
+            "mart_dir": str(mart_dir),
+            "mart_outputs": [str(path) for path in _mart_output_paths(root, mart_dir, mart_tables)],
+            "run_dir": str(run_dir),
+        },
+        "latest_run": latest_payload,
+    }
+
+
+def paths(
+    config: str = typer.Option(..., "--config", "-c", help="Path to dataset.yml"),
+    year: int | None = typer.Option(None, "--year", help="Dataset year"),
+    as_json: bool = typer.Option(False, "--json", help="Emit JSON output for notebooks/scripts"),
+    strict_config: bool = typer.Option(False, "--strict-config", help="Treat deprecated config forms as errors"),
+):
+    """
+    Mostra i path stabili di output e l'ultimo run record per dataset/year.
+    """
+    strict_config_flag = strict_config if isinstance(strict_config, bool) else False
+    cfg = load_config(config, strict_config=strict_config_flag)
+    years = iter_years(cfg, year)
+    payload = [_payload_for_year(cfg, selected_year) for selected_year in years]
+
+    if as_json:
+        typer.echo(json.dumps(payload if len(payload) > 1 else payload[0], indent=2, ensure_ascii=False))
+        return
+
+    for item in payload:
+        typer.echo(f"dataset: {item['dataset']}")
+        typer.echo(f"year: {item['year']}")
+        typer.echo(f"config_path: {item['config_path']}")
+        typer.echo(f"root: {item['root']}")
+        typer.echo(f"raw_dir: {item['paths']['raw_dir']}")
+        typer.echo(f"clean_dir: {item['paths']['clean_dir']}")
+        typer.echo(f"clean_output: {item['paths']['clean_output']}")
+        typer.echo(f"mart_dir: {item['paths']['mart_dir']}")
+        typer.echo("mart_outputs:")
+        for output in item["paths"]["mart_outputs"]:
+            typer.echo(f"  - {output}")
+        typer.echo(f"run_dir: {item['paths']['run_dir']}")
+        latest_info = item.get("latest_run")
+        if latest_info is None:
+            typer.echo("latest_run: none")
+        else:
+            typer.echo(f"latest_run_id: {latest_info['run_id']}")
+            typer.echo(f"latest_run_status: {latest_info['status']}")
+            typer.echo(f"latest_run_record: {latest_info['path']}")
+        typer.echo("")
+
+
+def register(app: typer.Typer) -> None:
+    inspect_app = typer.Typer(no_args_is_help=True, add_completion=False)
+    inspect_app.command("paths")(paths)
+    app.add_typer(inspect_app, name="inspect")

From b1c08991b89625bf31cebd13d6bf2f51fde77d7a Mon Sep 17 00:00:00 2001
From: Zio Gabber <78922322+Gabrymi93@users.noreply.github.com>
Date: Mon, 2 Mar 2026 09:47:48 +0000
Subject: [PATCH 4/6] Refine inspect paths output and trim public README

---
 README.md                       | 160 +++-----------------------------
 docs/notebook-contract.md       |   2 +-
 tests/test_cli_inspect_paths.py |  10 +-
 toolkit/cli/cmd_inspect.py      |  62 ++++++++++---
 4 files changed, 72 insertions(+), 162 deletions(-)

diff --git a/README.md b/README.md
index dd7366b..e6d6d5f 100644
--- a/README.md
+++ b/README.md
@@ -199,158 +199,26 @@ Questo mantiene il contratto semplice tra toolkit e repo dataset:
 - i notebook li ispezionano localmente
 - `dataset.yml` resta la fonte di verita` per dataset, anni e path relativi
 
-## Run Tracking
+## Operative Notes
 
-Ogni comando `toolkit run ...` o `toolkit resume ...` scrive un record JSON in:
+Run tracking:
 
-```text
-data/_runs/<dataset>/<year>/<run_id>.json
-```
-
-Il record contiene almeno:
-
-- `status`: `RUNNING`, `SUCCESS`, `FAILED`, `SUCCESS_WITH_WARNINGS`, `DRY_RUN`
-- `started_at`, `finished_at`
-- `layers.raw|clean|mart.status`
-- `validations.raw|clean|mart`
-- `error` se presente
-- `resumed_from` se il run deriva da una ripresa
-
-Questo file e` la fonte per i comandi `status` e `resume`.
-
-## Validation Gate
-
-`toolkit run ...` esegue automaticamente la validazione dopo ogni layer completato con successo.
-
-Comportamento:
-
-- se la validazione passa, il run prosegue
-- se la validazione fallisce e `validation.fail_on_error: true`, la pipeline si interrompe
-- se la validazione fallisce e `validation.fail_on_error: false`, la pipeline continua e il run termina come `SUCCESS_WITH_WARNINGS`
-
-La CLI `validate` resta disponibile per eseguire i check separatamente.
-
-## Layer
-
-### RAW
-
-Responsabilita`:
-
-- legge o scarica il payload da plugin sorgente
-- applica extractor opzionale
-- scrive file normalizzati nel layer RAW
-- produce metadata, manifest e validation report
-
-Output tipici:
-
-- file sorgente normalizzati
-- `metadata.json`
-- `raw_validation.json`
-- `manifest.json`
-
-`manifest.json` nel RAW dichiara sempre il file primario da usare a valle.
-Campi minimi: `dataset`, `year`, `run_id`, `created_at`, `sources`, `primary_output_file`.
-`primary_output_file` e gli `output_file` delle source sono path relativi al RAW year-dir, in formato posix.
-`raw.output_policy` supporta `versioned` (default, suffix `_1/_2`) e `overwrite` (stesso filename sovrascritto).
-Con piu` source, si puo` fissare il primario con `primary: true`; altrimenti il toolkit usa la prima source e logga un warning.
-
-### CLEAN
-
-Responsabilita`:
-
-- seleziona gli input dal RAW year-dir
-- renderizza `clean.sql`
-- esegue SQL in DuckDB
-- esporta un parquet clean
-
-Output tipici:
-
-- `<dataset>_<year>_clean.parquet`
-- `_run/clean_rendered.sql`
-- `metadata.json`
-- `manifest.json`
-- `_validate/clean_validation.json`
-
-### MART
+- ogni `run` e `resume` scrive un record in `data/_runs/<dataset>/<year>/<run_id>.json`
+- `status` legge questi record
+- `inspect paths` espone i path stabili da usare in notebook e script
 
-Responsabilita`:
+Validation gate:
 
-- legge parquet CLEAN
-- renderizza le SQL delle tabelle finali
-- esporta un parquet per tabella
+- `toolkit run ...` valida automaticamente dopo ogni layer completato
+- con `validation.fail_on_error: true` la pipeline si ferma
+- con `validation.fail_on_error: false` il run puo` terminare come `SUCCESS_WITH_WARNINGS`
 
-Output tipici:
-
-- `<table>.parquet`
-- `_run/*_rendered.sql`
-- `metadata.json`
-- `manifest.json`
-- `_validate/mart_validation.json`
-
-## Validazioni
-
-### CLEAN
-
-- `required_columns`
-- `min_rows`
-- `not_null`
-- `primary_key`
-- `ranges`
-- `max_null_pct`
-
-### MART
-
-- `min_rows`
-- `required_columns`
-- `not_null`
-- `primary_key`
-- `ranges`
-
-Le validazioni vengono eseguite automaticamente da `toolkit run ...` dopo ogni layer completato con successo.
-La CLI `toolkit validate ...` resta disponibile per eseguirle separatamente.
-
-## Plugin sorgente
-
-Plugin registrati:
-
-- `local_file`
-- `http_file`
-- `api_json_paged`
-- `html_table`
-
-Stabilita`:
-
-- core pipeline `raw`, `clean`, `mart`: stable
-- plugin `local_file`, `http_file`: stable
-- plugin `api_json_paged`, `html_table`: experimental
-
-Come aggiungere un plugin:
-
-- definisci una classe plugin in `toolkit/plugins/<nome>.py`
-- contratto minimo:
-  - `__init__(**client)` per ricevere configurazione client
-  - `fetch(...) -> bytes` per restituire il payload RAW
-- registra il plugin in modo esplicito in `toolkit.core.registry.register_builtin_plugins()`
-- se il plugin dipende da librerie opzionali, il fallimento di import deve essere trattato come plugin opzionale non disponibile:
-  - warning `DCLPLUGIN001` in non-strict
-  - errore in strict mode
-
-## Smoke locale
-
-`project-example/` e` pensato per un giro completo locale, senza rete:
-
-```bash
-cd project-example
-py -m toolkit.cli.app run all --config dataset.yml
-py -m toolkit.cli.app status --dataset project_example --year 2022 --latest --config dataset.yml
-```
-
-Artefatti attesi:
+Per dettagli completi su layer, validazioni, plugin, artifact policy e flow avanzati, vedi:
 
-- `project-example/_smoke_out/data/raw/project_example/2022/raw_validation.json`
-- `project-example/_smoke_out/data/clean/project_example/2022/project_example_2022_clean.parquet`
-- `project-example/_smoke_out/data/mart/project_example/2022/rd_by_regione.parquet`
-- `project-example/_smoke_out/data/_runs/project_example/2022/<run_id>.json`
+- [docs/conventions.md](docs/conventions.md)
+- [docs/config-schema.md](docs/config-schema.md)
+- [docs/feature-stability.md](docs/feature-stability.md)
+- [docs/advanced-workflows.md](docs/advanced-workflows.md)
 
 ## Conventions
 
diff --git a/docs/notebook-contract.md b/docs/notebook-contract.md
index a968b68..e1170f7 100644
--- a/docs/notebook-contract.md
+++ b/docs/notebook-contract.md
@@ -19,7 +19,7 @@ Per evitare duplicazione di path logic nei notebook:
 
 - leggi `dataset.yml`
 - usa `toolkit inspect paths --config dataset.yml --year <year> --json`
-- poi apri parquet, metadata e run record dai path restituiti
+- poi apri parquet, metadata, manifest, validation e run record dai path restituiti
 
 Regola pratica:
 
diff --git a/tests/test_cli_inspect_paths.py b/tests/test_cli_inspect_paths.py
index 6dc5e94..46a2f8a 100644
--- a/tests/test_cli_inspect_paths.py
+++ b/tests/test_cli_inspect_paths.py
@@ -31,7 +31,10 @@ def test_inspect_paths_reports_dataset_repo_layout_from_other_cwd(tmp_path: Path
     assert f"config_path: {config_path}" in result.output
     assert f"root: {dst / '_smoke_out'}" in result.output
     assert f"raw_dir: {dst / '_smoke_out' / 'data' / 'raw' / 'project_example' / '2022'}" in result.output
+    assert f"raw_manifest: {dst / '_smoke_out' / 'data' / 'raw' / 'project_example' / '2022' / 'manifest.json'}" in result.output
     assert f"clean_output: {dst / '_smoke_out' / 'data' / 'clean' / 'project_example' / '2022' / 'project_example_2022_clean.parquet'}" in result.output
+    assert f"clean_validation: {dst / '_smoke_out' / 'data' / 'clean' / 'project_example' / '2022' / '_validate' / 'clean_validation.json'}" in result.output
+    assert f"mart_manifest: {dst / '_smoke_out' / 'data' / 'mart' / 'project_example' / '2022' / 'manifest.json'}" in result.output
     assert "latest_run_status: SUCCESS" in result.output
 
 
@@ -55,6 +58,9 @@ def test_inspect_paths_json_is_notebook_friendly(tmp_path: Path, monkeypatch) ->
     assert payload["dataset"] == "project_example"
     assert payload["year"] == 2022
     assert payload["config_path"] == str(config_path)
-    assert payload["paths"]["clean_output"].endswith("project_example_2022_clean.parquet")
-    assert payload["paths"]["mart_outputs"]
+    assert payload["paths"]["clean"]["output"].endswith("project_example_2022_clean.parquet")
+    assert payload["paths"]["clean"]["validation"].endswith("clean_validation.json")
+    assert payload["paths"]["raw"]["manifest"].endswith("manifest.json")
+    assert payload["paths"]["mart"]["outputs"]
+    assert payload["paths"]["mart"]["metadata"].endswith("metadata.json")
     assert payload["latest_run"] is None
diff --git a/toolkit/cli/cmd_inspect.py b/toolkit/cli/cmd_inspect.py
index dab6890..6330344 100644
--- a/toolkit/cli/cmd_inspect.py
+++ b/toolkit/cli/cmd_inspect.py
@@ -12,19 +12,48 @@
 from toolkit.core.run_context import get_run_dir, latest_run
 
 
+def _raw_output_paths(root: Path, dataset: str, year: int) -> dict[str, str]:
+    raw_dir = layer_year_dir(root, "raw", dataset, year)
+    return {
+        "dir": str(raw_dir),
+        "manifest": str(raw_dir / "manifest.json"),
+        "metadata": str(raw_dir / "metadata.json"),
+        "validation": str(raw_dir / "raw_validation.json"),
+    }
+
+
 def _clean_output_path(root: Path, dataset: str, year: int) -> Path:
     return layer_year_dir(root, "clean", dataset, year) / f"{dataset}_{year}_clean.parquet"
 
 
+def _clean_paths(root: Path, dataset: str, year: int) -> dict[str, str]:
+    clean_dir = layer_year_dir(root, "clean", dataset, year)
+    return {
+        "dir": str(clean_dir),
+        "output": str(_clean_output_path(root, dataset, year)),
+        "manifest": str(clean_dir / "manifest.json"),
+        "metadata": str(clean_dir / "metadata.json"),
+        "validation": str(clean_dir / "_validate" / "clean_validation.json"),
+    }
+
+
 def _mart_output_paths(root: Path, year_dir: Path, tables: list[dict[str, Any]]) -> list[Path]:
     return [year_dir / f"{table['name']}.parquet" for table in tables if isinstance(table, dict) and table.get("name")]
 
 
+def _mart_paths(root: Path, dataset: str, year: int, tables: list[dict[str, Any]]) -> dict[str, Any]:
+    mart_dir = layer_year_dir(root, "mart", dataset, year)
+    return {
+        "dir": str(mart_dir),
+        "outputs": [str(path) for path in _mart_output_paths(root, mart_dir, tables)],
+        "manifest": str(mart_dir / "manifest.json"),
+        "metadata": str(mart_dir / "metadata.json"),
+        "validation": str(mart_dir / "_validate" / "mart_validation.json"),
+    }
+
+
 def _payload_for_year(cfg, year: int) -> dict[str, Any]:
     root = Path(cfg.root)
-    raw_dir = layer_year_dir(root, "raw", cfg.dataset, year)
-    clean_dir = layer_year_dir(root, "clean", cfg.dataset, year)
-    mart_dir = layer_year_dir(root, "mart", cfg.dataset, year)
     run_dir = get_run_dir(root, cfg.dataset, year)
     mart_tables = cfg.mart.get("tables") or []
 
@@ -46,11 +75,9 @@ def _payload_for_year(cfg, year: int) -> dict[str, Any]:
         "config_path": str(cfg.base_dir / "dataset.yml"),
         "root": str(root),
         "paths": {
-            "raw_dir": str(raw_dir),
-            "clean_dir": str(clean_dir),
-            "clean_output": str(_clean_output_path(root, cfg.dataset, year)),
-            "mart_dir": str(mart_dir),
-            "mart_outputs": [str(path) for path in _mart_output_paths(root, mart_dir, mart_tables)],
+            "raw": _raw_output_paths(root, cfg.dataset, year),
+            "clean": _clean_paths(root, cfg.dataset, year),
+            "mart": _mart_paths(root, cfg.dataset, year, mart_tables),
             "run_dir": str(run_dir),
         },
         "latest_run": latest_payload,
@@ -80,13 +107,22 @@ def paths(
         typer.echo(f"year: {item['year']}")
         typer.echo(f"config_path: {item['config_path']}")
         typer.echo(f"root: {item['root']}")
-        typer.echo(f"raw_dir: {item['paths']['raw_dir']}")
-        typer.echo(f"clean_dir: {item['paths']['clean_dir']}")
-        typer.echo(f"clean_output: {item['paths']['clean_output']}")
-        typer.echo(f"mart_dir: {item['paths']['mart_dir']}")
+        typer.echo(f"raw_dir: {item['paths']['raw']['dir']}")
+        typer.echo(f"raw_manifest: {item['paths']['raw']['manifest']}")
+        typer.echo(f"raw_metadata: {item['paths']['raw']['metadata']}")
+        typer.echo(f"raw_validation: {item['paths']['raw']['validation']}")
+        typer.echo(f"clean_dir: {item['paths']['clean']['dir']}")
+        typer.echo(f"clean_output: {item['paths']['clean']['output']}")
+        typer.echo(f"clean_manifest: {item['paths']['clean']['manifest']}")
+        typer.echo(f"clean_metadata: {item['paths']['clean']['metadata']}")
+        typer.echo(f"clean_validation: {item['paths']['clean']['validation']}")
+        typer.echo(f"mart_dir: {item['paths']['mart']['dir']}")
         typer.echo("mart_outputs:")
-        for output in item["paths"]["mart_outputs"]:
+        for output in item["paths"]["mart"]["outputs"]:
             typer.echo(f"  - {output}")
+        typer.echo(f"mart_manifest: {item['paths']['mart']['manifest']}")
+        typer.echo(f"mart_metadata: {item['paths']['mart']['metadata']}")
+        typer.echo(f"mart_validation: {item['paths']['mart']['validation']}")
         typer.echo(f"run_dir: {item['paths']['run_dir']}")
         latest_info = item.get("latest_run")
         if latest_info is None:

From 9e904c75b97428a222223da761024fdbdd8b6809 Mon Sep 17 00:00:00 2001
From: Zio Gabber <78922322+Gabrymi93@users.noreply.github.com>
Date: Mon, 2 Mar 2026 15:51:53 +0000
Subject: [PATCH 5/6] Clarify toolkit role in the DataCivicLab ecosystem

---
 README.md | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/README.md b/README.md
index e6d6d5f..c5b2661 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,20 @@
 
 Toolkit Python per pipeline dati riproducibili `RAW -> CLEAN -> MART`, con approccio SQL-first, audit degli artefatti e run tracking persistente.
 
+## Ruolo Nell'Ecosistema
+
+Questa repo e' il motore tecnico della pipeline dati di DataCivicLab.
+
+Ruoli delle repo correlate:
+
+- `.github`: policy condivise, community health, template issue/PR, onboarding GitHub
+- `dataciviclab`: hub pubblico e minimale dell'organizzazione
+- `toolkit`: runtime, CLI, contract di config/path/output, documentazione tecnica del motore
+- `project-template`: template operativo dei repo dataset
+- repo dataset: progetti concreti che usano il toolkit
+
+Questa repo non e' l'hub dell'organizzazione e non replica la documentazione org-wide: resta focalizzata sul motore e sul suo contratto tecnico.
+
 ## Obiettivi
 
 - mantenere una struttura progetto semplice: `dataset.yml` + `sql/`
@@ -88,6 +102,7 @@ Schema completo e legacy supportato: [docs/config-schema.md](docs/config-schema.
 Flow avanzati e tooling secondario: [docs/advanced-workflows.md](docs/advanced-workflows.md)
 Matrice di stabilita`: [docs/feature-stability.md](docs/feature-stability.md)
 Contratto notebook/output: [docs/notebook-contract.md](docs/notebook-contract.md)
+Per policy condivise e community health organizzativa, fai riferimento alla repo `.github` dell'organizzazione.
 
 Artefatti attesi:
 

From f86a657a995a3670ba440d9f7b7d13fd485f5c2c Mon Sep 17 00:00:00 2001
From: Zio Gabber <78922322+Gabrymi93@users.noreply.github.com>
Date: Mon, 2 Mar 2026 15:52:02 +0000
Subject: [PATCH 6/6] Point repository conduct and security docs to org
 defaults

---
 CODE_OF_CONDUCT.md | 2 ++
 SECURITY.md        | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
index befd34a..530cf04 100644
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -1,5 +1,7 @@
 # Code of Conduct
 
+This repository follows the shared collaboration baseline of the DataCivicLab organization. For organization-wide community health and default GitHub policy, refer to the `dataciviclab/.github` repository. This file keeps a short local reference for the toolkit repository.
+
 ## Our Pledge
 
 We want this project to be a respectful, constructive, and harassment-free space for everyone involved.
diff --git a/SECURITY.md b/SECURITY.md
index 7f618eb..6ab4be1 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -4,6 +4,8 @@
 
 If you believe you found a security issue in this repository, please do not open a public issue first.
 
+For shared organizational GitHub policy and community health defaults, see the `dataciviclab/.github` repository. This file only covers the reporting path for this technical repository.
+
 Instead:
 
 - contact the maintainers privately, if a private contact path is available