From 70e16ab5bd79ba0efa6623ec4e468ede8bfecd54 Mon Sep 17 00:00:00 2001
From: Zio Gabber <78922322+Gabrymi93@users.noreply.github.com>
Date: Sun, 8 Mar 2026 18:44:01 +0000
Subject: [PATCH 1/5] refactor: rimuovi compat legacy del parser config

---
 docs/advanced-workflows.md    |   2 -
 docs/config-schema.md         |   7 +-
 tests/test_config.py          | 137 ++++++++--------------------------
 toolkit/clean/duckdb_read.py  |   6 +-
 toolkit/core/config_models.py | 109 +--------------------------
 toolkit/core/csv_read.py      |  46 +-----------
 6 files changed, 42 insertions(+), 265 deletions(-)

diff --git a/docs/advanced-workflows.md b/docs/advanced-workflows.md
index fdf1bb3..3fd48f2 100644
--- a/docs/advanced-workflows.md
+++ b/docs/advanced-workflows.md
@@ -95,8 +95,6 @@ Regola pratica:
 
 ## Compat legacy
 
-Il toolkit mantiene compatibilita` con alcune forme legacy del config per facilitare la migrazione.
-
 Per i repo nuovi:
 
 - usa la shape canonica documentata in [config-schema.md](./config-schema.md)
diff --git a/docs/config-schema.md b/docs/config-schema.md
index 112a123..2be7767 100644
--- a/docs/config-schema.md
+++ b/docs/config-schema.md
@@ -222,11 +222,6 @@ Con `config.strict: true` o `--strict-config`, gli stessi casi diventano errori.
 
 | Code | Legacy | Replacement | Status |
 |---|---|---|---|
-| `DCL001` | `raw.source` | `raw.sources` | deprecated |
-| `DCL002` | `raw.sources[].plugin` | `raw.sources[].type` | deprecated |
-| `DCL003` | `raw.sources[].id` | `raw.sources[].name` | deprecated |
-| `DCL004` | `clean.read: "auto"` | `clean.read.source: auto` | deprecated |
-| `DCL005` | `clean.read.csv.*` | `clean.read.*` | deprecated |
 | `DCL006` | `clean.sql_path` | `clean.sql` | ignored |
 | `DCL007` | `mart.sql_dir` | `mart.tables[].sql` | ignored |
 | `DCL008` | `bq` | rimuovere il campo | ignored |
@@ -312,7 +307,7 @@ Esempi tipici:
 - `Config validation failed: output.unknown_flag: Extra inputs are not permitted`
 - `Config validation failed: raw.sources: Input should be a valid list`
 - `Config validation failed: clean.validate.primary_key: clean.validate.primary_key must be a string or a list of strings`
-- `DCL001 raw.source is deprecated, usare raw.sources`
+- `Config validation failed: raw.sources: Input should be a valid list`
 
 Regola pratica:
 
diff --git a/tests/test_config.py b/tests/test_config.py
index a4e79c0..508791d 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -64,10 +64,10 @@ def test_load_config_resolves_relative_paths_from_dataset_dir(tmp_path: Path):
   name: demo
   years: [2022]
 raw:
-  source:
-    type: local_file
-    args:
-      path: "data/raw.csv"
+  sources:
+    - type: local_file
+      args:
+        path: "data/raw.csv"
 clean:
   sql: "sql/clean.sql"
 mart:
@@ -88,7 +88,7 @@ def test_load_config_resolves_relative_paths_from_dataset_dir(tmp_path: Path):
     assert cfg.base_dir == project_dir.resolve()
     assert cfg.root == (project_dir / "out").resolve()
     assert cfg.root_source == "yml"
-    assert cfg.raw["source"]["args"]["path"] == (project_dir / "data" / "raw.csv").resolve()
+    assert cfg.raw["sources"][0]["args"]["path"] == (project_dir / "data" / "raw.csv").resolve()
     assert cfg.clean["sql"] == (project_dir / "sql" / "clean.sql").resolve()
     assert cfg.mart["tables"][0]["sql"] == (project_dir / "sql" / "mart" / "demo.sql").resolve()
     assert cfg.cross_year["tables"][0]["sql"] == (project_dir / "sql" / "cross" / "demo_cross.sql").resolve()
@@ -106,11 +106,11 @@ def test_load_config_does_not_transform_non_whitelisted_path_like_fields(tmp_pat
   name: demo
   years: [2022]
 raw:
-  source:
-    type: local_file
-    args:
-      path: "data/raw.csv"
-      filename: "nested/raw.csv"
+  sources:
+    - type: local_file
+      args:
+        path: "data/raw.csv"
+        filename: "nested/raw.csv"
 clean:
   sql: "sql/clean.sql"
   note_path: "docs/clean.md"
@@ -125,8 +125,8 @@ def test_load_config_does_not_transform_non_whitelisted_path_like_fields(tmp_pat
 
     cfg = load_config(yml)
 
-    assert cfg.raw["source"]["args"]["path"] == (project_dir / "data" / "raw.csv").resolve()
-    assert cfg.raw["source"]["args"]["filename"] == "nested/raw.csv"
+    assert cfg.raw["sources"][0]["args"]["path"] == (project_dir / "data" / "raw.csv").resolve()
+    assert cfg.raw["sources"][0]["args"]["filename"] == "nested/raw.csv"
     assert cfg.clean["note_path"] == "docs/clean.md"
     assert cfg.mart["label_path"] == "labels/mart.txt"
 
@@ -289,7 +289,7 @@ def test_load_config_uses_base_dir_when_root_missing_and_dcl_root_missing(tmp_pa
     assert cfg.root_source == "base_dir_fallback"
 
 
-def test_load_config_normalizes_legacy_clean_read_csv_and_warns(tmp_path: Path, caplog):
+def test_load_config_rejects_legacy_clean_read_csv_shape(tmp_path: Path):
     project_dir = tmp_path / "project"
     project_dir.mkdir()
     yml = project_dir / "dataset.yml"
@@ -311,21 +311,10 @@ def test_load_config_normalizes_legacy_clean_read_csv_and_warns(tmp_path: Path,
         encoding="utf-8",
     )
 
-    module_logger = logging.getLogger("toolkit.core.config")
-    module_logger.handlers = [caplog.handler]
-    module_logger.propagate = True
-    module_logger.setLevel(logging.WARNING)
-
-    with caplog.at_level(logging.WARNING, logger="toolkit.core.config"):
-        cfg = load_config(yml)
+    with pytest.raises(ValueError) as exc:
+        load_config(yml)
 
-    assert cfg.clean["read"] == {
-        "source": "auto",
-        "columns": {"amount": "DOUBLE"},
-        "delim": ";",
-    }
-    assert "DCL005" in caplog.text
-    assert "deprecated, usare clean.read.*" in caplog.text
+    assert "clean.read.csv" in str(exc.value)
 
 
 def test_load_config_canonical_clean_read_has_no_deprecation_warning(tmp_path: Path, caplog):
@@ -469,7 +458,7 @@ def test_load_config_warns_on_zombie_fields(tmp_path: Path, caplog):
     assert "deprecated/ignored, usare remove field" in caplog.text
 
 
-def test_load_config_model_normalizes_legacy_aliases_to_canonical_shape(tmp_path: Path):
+def test_load_config_model_rejects_legacy_raw_source_plugin_id_shape(tmp_path: Path):
     yml = tmp_path / "dataset.yml"
     yml.write_text(
         """
@@ -482,52 +471,19 @@ def test_load_config_model_normalizes_legacy_aliases_to_canonical_shape(tmp_path
     plugin: local_file
     args:
       path: data/raw.csv
-clean:
-  read: auto
-mart: {}
-""".strip(),
-        encoding="utf-8",
-    )
-
-    model = load_config_model(yml)
-
-    assert len(model.raw.sources) == 1
-    assert model.raw.sources[0].name == "src_legacy"
-    assert model.raw.sources[0].type == "local_file"
-    assert model.clean.read is not None
-    assert model.clean.read.source == "auto"
-
-
-def test_load_config_logs_deprecation_codes_for_legacy_normalization(tmp_path: Path, caplog):
-    yml = tmp_path / "dataset.yml"
-    yml.write_text(
-        """
-dataset:
-  name: demo
-  years: [2022]
-raw:
-  source:
-    id: src_legacy
-    plugin: local_file
-    args:
-      path: data/raw.csv
-clean:
-  read: auto
+clean: {}
 mart: {}
 """.strip(),
         encoding="utf-8",
     )
 
-    with caplog.at_level(logging.WARNING, logger="toolkit.core.config"):
-        load_config(yml)
+    with pytest.raises(ValueError) as exc:
+        load_config_model(yml)
 
-    assert "DCL001" in caplog.text
-    assert "DCL002" in caplog.text
-    assert "DCL003" in caplog.text
-    assert "DCL004" in caplog.text
+    assert "raw.sources" in str(exc.value) or "raw.source" in str(exc.value)
 
 
-def test_load_config_model_strict_config_rejects_legacy_normalization(tmp_path: Path):
+def test_load_config_model_rejects_legacy_raw_sources_plugin_id_fields(tmp_path: Path):
     yml = tmp_path / "dataset.yml"
     yml.write_text(
         """
@@ -535,10 +491,11 @@ def test_load_config_model_strict_config_rejects_legacy_normalization(tmp_path:
   name: demo
   years: [2022]
 raw:
-  source:
-    type: local_file
-    args:
-      path: data/raw.csv
+  sources:
+    - id: src_legacy
+      plugin: local_file
+      args:
+        path: data/raw.csv
 clean: {}
 mart: {}
 """.strip(),
@@ -546,18 +503,15 @@ def test_load_config_model_strict_config_rejects_legacy_normalization(tmp_path:
     )
 
     with pytest.raises(ValueError) as exc:
-        load_config_model(yml, strict_config=True)
+        load_config_model(yml)
 
-    assert "DCL001" in str(exc.value)
-    assert "raw.source is deprecated, usare raw.sources" in str(exc.value)
+    assert "raw.sources.0" in str(exc.value)
 
 
-def test_load_config_model_config_strict_rejects_legacy_normalization(tmp_path: Path):
+def test_load_config_rejects_legacy_clean_read_scalar_form(tmp_path: Path):
     yml = tmp_path / "dataset.yml"
     yml.write_text(
         """
-config:
-  strict: true
 dataset:
   name: demo
   years: [2022]
@@ -569,36 +523,9 @@ def test_load_config_model_config_strict_rejects_legacy_normalization(tmp_path:
     )
 
     with pytest.raises(ValueError) as exc:
-        load_config_model(yml)
-
-    assert "DCL004" in str(exc.value)
-    assert "clean.read scalar form is deprecated" in str(exc.value)
-
-
-def test_cli_strict_config_rejects_legacy_config(tmp_path: Path):
-    project_dir = tmp_path / "project"
-    project_dir.mkdir()
-    yml = project_dir / "dataset.yml"
-    yml.write_text(
-        """
-dataset:
-  name: demo
-  years: [2022]
-raw:
-  source:
-    type: local_file
-    args:
-      path: data/raw.csv
-clean: {}
-mart: {}
-""".strip(),
-        encoding="utf-8",
-    )
-
-    with pytest.raises(ValueError) as exc:
-        run_cmd(step="raw", config=str(yml), strict_config=True)
+        load_config(yml)
 
-    assert "DCL001" in str(exc.value)
+    assert "clean.read" in str(exc.value)
 
 
 def test_project_example_config_parses_in_strict_mode():
diff --git a/toolkit/clean/duckdb_read.py b/toolkit/clean/duckdb_read.py
index b253b8d..94577dd 100644
--- a/toolkit/clean/duckdb_read.py
+++ b/toolkit/clean/duckdb_read.py
@@ -46,17 +46,13 @@ def _read_source_mode(clean_cfg: dict[str, Any], logger=None) -> tuple[str, dict
 
     if raw_read_cfg is None:
         pass
-    elif isinstance(raw_read_cfg, str):
-        if logger is not None:
-            logger.warning("clean.read scalar form is deprecated; use clean.read.source")
-        read_source = raw_read_cfg
     elif isinstance(raw_read_cfg, dict):
         explicit_cfg = dict(raw_read_cfg)
         nested_source = explicit_cfg.pop("source", None)
         if nested_source is not None:
             read_source = nested_source
     else:
-        raise ValueError("clean.read must be either a mapping (dict) or one of: auto, config_only")
+        raise ValueError("clean.read must be a mapping (dict)")
 
     normalized_source = str(read_source or "auto")
     if normalized_source not in READ_SOURCE_MODES:
diff --git a/toolkit/core/config_models.py b/toolkit/core/config_models.py
index e964347..311ca82 100644
--- a/toolkit/core/config_models.py
+++ b/toolkit/core/config_models.py
@@ -26,41 +26,6 @@ class ConfigDeprecation:
 
 
 _CONFIG_DEPRECATIONS: dict[str, ConfigDeprecation] = {
-    "raw.source": ConfigDeprecation(
-        code="DCL001",
-        legacy="raw.source",
-        replacement="raw.sources",
-        status="deprecated",
-        message="raw.source is deprecated, usare raw.sources",
-    ),
-    "raw.sources[].plugin": ConfigDeprecation(
-        code="DCL002",
-        legacy="raw.sources[].plugin",
-        replacement="raw.sources[].type",
-        status="deprecated",
-        message="raw.sources[].plugin is deprecated, usare raw.sources[].type",
-    ),
-    "raw.sources[].id": ConfigDeprecation(
-        code="DCL003",
-        legacy="raw.sources[].id",
-        replacement="raw.sources[].name",
-        status="deprecated",
-        message="raw.sources[].id is deprecated, usare raw.sources[].name",
-    ),
-    "clean.read": ConfigDeprecation(
-        code="DCL004",
-        legacy="clean.read: <string>",
-        replacement="clean.read.source",
-        status="deprecated",
-        message="clean.read scalar form is deprecated, usare clean.read.source",
-    ),
-    "clean.read.csv": ConfigDeprecation(
-        code="DCL005",
-        legacy="clean.read.csv.*",
-        replacement="clean.read.*",
-        status="deprecated",
-        message="clean.read.csv.* is deprecated, usare clean.read.*",
-    ),
     "clean.sql_path": ConfigDeprecation(
         code="DCL006",
         legacy="clean.sql_path",
@@ -644,19 +609,6 @@ def _resolve_root(root: Any, *, base_dir: Path) -> tuple[Path, str]:
         source = "env:TOOLKIT_OUTDIR" if os.environ.get("TOOLKIT_OUTDIR") else "env:DCL_OUTDIR"
         return Path(managed_outdir).expanduser().resolve(), source
     return _resolve_path_value(root, base_dir=base_dir), "yml"
-
-
-def _normalize_legacy_source(source: dict[str, Any]) -> dict[str, Any]:
-    normalized = dict(source)
-    plugin = normalized.pop("plugin", None)
-    if plugin is not None and "type" not in normalized:
-        normalized["type"] = plugin
-    source_id = normalized.pop("id", None)
-    if source_id is not None and "name" not in normalized:
-        normalized["name"] = source_id
-    return normalized
-
-
 def _emit_deprecation_notice(
     key: str,
     *,
@@ -713,39 +665,6 @@ def _declared_model_keys(model_cls: type[BaseModel]) -> set[str]:
 _CROSS_YEAR_ALLOWED_KEYS = _declared_model_keys(CrossYearConfig)
 
 
-def _normalize_legacy_clean_read(
-    clean: dict[str, Any],
-    *,
-    path: Path,
-    strict_config: bool,
-) -> dict[str, Any]:
-    normalized = dict(clean)
-    read_cfg = normalized.get("read")
-
-    if isinstance(read_cfg, str):
-        _emit_deprecation_notice("clean.read", strict_config=strict_config, path=path)
-        normalized["read"] = {"source": read_cfg}
-        read_cfg = normalized["read"]
-
-    if not isinstance(read_cfg, dict):
-        return normalized
-
-    csv_cfg = read_cfg.get("csv")
-    if csv_cfg is None:
-        return normalized
-    if not isinstance(csv_cfg, dict):
-        raise _err("clean.read.csv deve essere una mappa YAML (oggetto).", path=path)
-
-    merged_read = dict(read_cfg)
-    merged_read.pop("csv", None)
-    for key, value in csv_cfg.items():
-        merged_read.setdefault(key, value)
-
-    _emit_deprecation_notice("clean.read.csv", strict_config=strict_config, path=path)
-    normalized["read"] = merged_read
-    return normalized
-
-
 def _normalize_legacy_payload(
     data: dict[str, Any],
     *,
@@ -756,33 +675,11 @@ def _normalize_legacy_payload(
 
     raw = normalized.get("raw")
     if isinstance(raw, dict):
-        updated_raw = dict(raw)
-        if "source" in updated_raw:
-            source = updated_raw.pop("source")
-            if "sources" in updated_raw:
-                raise _err("Use either raw.source or raw.sources, not both.", path=path)
-            updated_raw["sources"] = [source]
-            _emit_deprecation_notice("raw.source", strict_config=strict_config, path=path)
-        sources = updated_raw.get("sources")
-        if isinstance(sources, list):
-            normalized_sources: list[Any] = []
-            for source in sources:
-                if not isinstance(source, dict):
-                    normalized_sources.append(source)
-                    continue
-                original = dict(source)
-                normalized_source = _normalize_legacy_source(source)
-                if "plugin" in original and "type" not in original:
-                    _emit_deprecation_notice("raw.sources[].plugin", strict_config=strict_config, path=path)
-                if "id" in original and "name" not in original:
-                    _emit_deprecation_notice("raw.sources[].id", strict_config=strict_config, path=path)
-                normalized_sources.append(normalized_source)
-            updated_raw["sources"] = normalized_sources
-        normalized["raw"] = updated_raw
+        normalized["raw"] = dict(raw)
 
     clean = normalized.get("clean")
     if isinstance(clean, dict):
-        updated_clean = _normalize_legacy_clean_read(clean, path=path, strict_config=strict_config)
+        updated_clean = dict(clean)
         if "sql_path" in updated_clean:
             _emit_deprecation_notice("clean.sql_path", strict_config=strict_config, path=path)
         normalized["clean"] = updated_clean
@@ -828,6 +725,8 @@ def _warn_or_reject_unknown_keys(
         if not isinstance(section, dict):
             continue
         extras = [key for key in section.keys() if key not in allowed_keys]
+        if section_name == "raw" and "source" in extras:
+            raise _err("raw.source is no longer supported; use raw.sources", path=path)
         if extras:
             _emit_unknown_keys_notice(
                 notice_key,
diff --git a/toolkit/core/csv_read.py b/toolkit/core/csv_read.py
index f88350a..02ceaef 100644
--- a/toolkit/core/csv_read.py
+++ b/toolkit/core/csv_read.py
@@ -23,33 +23,12 @@
     "prefer_from_raw_run",
     "allow_ambiguous",
     "include",
-    "csv",
     "columns",
     "normalize_rows_to_columns",
     "trim_whitespace",
     "sample_size",
     "sheet_name",
 }
-ALLOWED_NESTED_CSV_KEYS = {
-    "delim",
-    "header",
-    "encoding",
-    "decimal",
-    "skip",
-    "auto_detect",
-    "quote",
-    "escape",
-    "comment",
-    "ignore_errors",
-    "strict_mode",
-    "null_padding",
-    "parallel",
-    "nullstr",
-    "columns",
-    "normalize_rows_to_columns",
-    "trim_whitespace",
-    "sheet_name",
-}
 FORMAT_HINT_KEYS = {
     "delim",
     "header",
@@ -118,12 +97,8 @@ def normalize_columns_spec(columns: object) -> dict[str, str] | None:
 
 def normalize_read_cfg(read_cfg: dict[str, Any] | None) -> dict[str, Any]:
     cfg = dict(read_cfg or {})
-    csv_cfg = cfg.get("csv") or {}
-    if csv_cfg and not isinstance(csv_cfg, dict):
-        raise ValueError(
-            "clean.read must be a mapping (dict) in dataset.yml; "
-            "legacy clean.read.csv must also be a mapping if used"
-        )
+    if "csv" in cfg:
+        raise ValueError("clean.read.csv is no longer supported; use clean.read.* directly")
 
     unknown_top = sorted(set(cfg.keys()) - ALLOWED_READ_CSV_KEYS)
     if unknown_top:
@@ -131,21 +106,8 @@ def normalize_read_cfg(read_cfg: dict[str, Any] | None) -> dict[str, Any]:
             "Unsupported clean.read options for CSV reader: "
             f"{unknown_top}. Allowed keys: {sorted(ALLOWED_READ_CSV_KEYS)}"
         )
-
-    if csv_cfg:
-        unknown_nested = sorted(set(csv_cfg.keys()) - ALLOWED_NESTED_CSV_KEYS)
-        if unknown_nested:
-            raise ValueError(
-                "Unsupported legacy clean.read.csv options: "
-                f"{unknown_nested}. Allowed keys: {sorted(ALLOWED_NESTED_CSV_KEYS)}"
-            )
-
-    merged = dict(csv_cfg)
-    for key in ALLOWED_NESTED_CSV_KEYS:
-        if key in cfg:
-            merged[key] = cfg[key]
-    merged["columns"] = normalize_columns_spec(merged.get("columns"))
-    return merged
+    cfg["columns"] = normalize_columns_spec(cfg.get("columns"))
+    return cfg
 
 
 def filter_suggested_format_keys(cfg: dict[str, Any] | None) -> dict[str, Any]:

From 53aae93060cb4597654f6e5c215d8f9cd916b5e1 Mon Sep 17 00:00:00 2001
From: Zio Gabber <78922322+Gabrymi93@users.noreply.github.com>
Date: Sun, 8 Mar 2026 18:48:24 +0000
Subject: [PATCH 2/5] refactor: rimuovi clean.sql_path e mart.sql_dir

---
 CHANGELOG.md                  |  2 --
 docs/config-schema.md         |  2 --
 tests/test_config.py          | 59 ++++++++++++++++++++++++++++-------
 toolkit/core/config_models.py | 31 +++++-------------
 4 files changed, 55 insertions(+), 39 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 13d8039..ef4c248 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -41,6 +41,4 @@ All notable changes to this project will be documented in this file.
 - `raw.sources[].id` in favor of `raw.sources[].name`
 - scalar `clean.read` in favor of `clean.read.source`
 - `clean.read.csv.*` in favor of `clean.read.*`
-- `clean.sql_path`
-- `mart.sql_dir`
 - `bq`
diff --git a/docs/config-schema.md b/docs/config-schema.md
index 2be7767..151e629 100644
--- a/docs/config-schema.md
+++ b/docs/config-schema.md
@@ -222,8 +222,6 @@ Con `config.strict: true` o `--strict-config`, gli stessi casi diventano errori.
 
 | Code | Legacy | Replacement | Status |
 |---|---|---|---|
-| `DCL006` | `clean.sql_path` | `clean.sql` | ignored |
-| `DCL007` | `mart.sql_dir` | `mart.tables[].sql` | ignored |
 | `DCL008` | `bq` | rimuovere il campo | ignored |
 | `DCL013` | `cross_year.* unknown keys` | rimuovere il campo | ignored |
 
diff --git a/tests/test_config.py b/tests/test_config.py
index 508791d..f422679 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -180,7 +180,6 @@ def test_load_config_logs_normalized_whitelist_fields(tmp_path: Path, caplog, mo
 clean:
   sql: "sql/clean.sql"
 mart:
-  sql_dir: "sql/mart"
   tables:
     - name: demo_mart
       sql: "sql/mart/demo.sql"
@@ -200,14 +199,12 @@ def test_load_config_logs_normalized_whitelist_fields(tmp_path: Path, caplog, mo
     assert cfg.root == (project_dir / "out").resolve()
     assert cfg.raw["sources"][0]["args"]["path"] == (project_dir / "data" / "raw_a.csv").resolve()
     assert cfg.clean["sql"] == (project_dir / "sql" / "clean.sql").resolve()
-    assert cfg.mart["sql_dir"] == (project_dir / "sql" / "mart").resolve()
     assert cfg.mart["tables"][0]["sql"] == (project_dir / "sql" / "mart" / "demo.sql").resolve()
 
     assert "Normalized config paths:" in caplog.text
     assert "root=" in caplog.text
     assert "raw.sources[0].args.path=" in caplog.text
     assert "clean.sql=" in caplog.text
-    assert "mart.sql_dir=" in caplog.text
     assert "mart.tables[0].sql=" in caplog.text
 
 
@@ -429,7 +426,30 @@ def test_load_config_normalizes_bool_and_string_list_fields(tmp_path: Path):
     assert cfg.mart["validate"]["table_rules"]["mart_ok"]["primary_key"] == ["key_id"]
 
 
-def test_load_config_warns_on_zombie_fields(tmp_path: Path, caplog):
+def test_load_config_warns_on_zombie_field_bq(tmp_path: Path, caplog):
+    yml = tmp_path / "dataset.yml"
+    yml.write_text(
+        """
+dataset:
+  name: demo
+  years: [2022]
+raw: {}
+bq:
+  dataset: ignored
+clean: {}
+mart: {}
+""".strip(),
+        encoding="utf-8",
+    )
+
+    with caplog.at_level(logging.WARNING, logger="toolkit.core.config"):
+        load_config(yml)
+
+    assert "DCL008" in caplog.text
+    assert "deprecated/ignored, usare remove field" in caplog.text
+
+
+def test_load_config_rejects_clean_sql_path(tmp_path: Path):
     yml = tmp_path / "dataset.yml"
     yml.write_text(
         """
@@ -439,6 +459,28 @@ def test_load_config_warns_on_zombie_fields(tmp_path: Path, caplog):
 raw: {}
 clean:
   sql_path: sql/legacy_clean.sql
+mart: {}
+bq:
+  dataset: ignored
+""".strip(),
+        encoding="utf-8",
+    )
+
+    with pytest.raises(ValueError) as exc:
+        load_config(yml)
+
+    assert "clean.sql_path" in str(exc.value)
+
+
+def test_load_config_rejects_mart_sql_dir(tmp_path: Path):
+    yml = tmp_path / "dataset.yml"
+    yml.write_text(
+        """
+dataset:
+  name: demo
+  years: [2022]
+raw: {}
+clean: {}
 mart:
   sql_dir: sql/mart
 bq:
@@ -447,15 +489,10 @@ def test_load_config_warns_on_zombie_fields(tmp_path: Path, caplog):
         encoding="utf-8",
     )
 
-    with caplog.at_level(logging.WARNING, logger="toolkit.core.config"):
+    with pytest.raises(ValueError) as exc:
         load_config(yml)
 
-    assert "DCL006" in caplog.text
-    assert "DCL007" in caplog.text
-    assert "DCL008" in caplog.text
-    assert "deprecated/ignored, usare clean.sql" in caplog.text
-    assert "deprecated/ignored, usare mart.tables[].sql" in caplog.text
-    assert "deprecated/ignored, usare remove field" in caplog.text
+    assert "mart.sql_dir" in str(exc.value)
 
 
 def test_load_config_model_rejects_legacy_raw_source_plugin_id_shape(tmp_path: Path):
diff --git a/toolkit/core/config_models.py b/toolkit/core/config_models.py
index 311ca82..da2780e 100644
--- a/toolkit/core/config_models.py
+++ b/toolkit/core/config_models.py
@@ -26,20 +26,6 @@ class ConfigDeprecation:
 
 
 _CONFIG_DEPRECATIONS: dict[str, ConfigDeprecation] = {
-    "clean.sql_path": ConfigDeprecation(
-        code="DCL006",
-        legacy="clean.sql_path",
-        replacement="clean.sql",
-        status="ignored",
-        message="clean.sql_path is deprecated/ignored, usare clean.sql",
-    ),
-    "mart.sql_dir": ConfigDeprecation(
-        code="DCL007",
-        legacy="mart.sql_dir",
-        replacement="mart.tables[].sql",
-        status="ignored",
-        message="mart.sql_dir is deprecated/ignored, usare mart.tables[].sql",
-    ),
     "bq": ConfigDeprecation(
         code="DCL008",
         legacy="bq",
@@ -471,10 +457,8 @@ def _resolve_path_value(value: Any, *, base_dir: Path) -> Any:
     ),
     "clean": (
         ("sql",),
-        ("sql_path",),
     ),
     "mart": (
-        ("sql_dir",),
         ("tables", "*", "sql"),
     ),
     "cross_year": (
@@ -660,8 +644,8 @@ def _declared_model_keys(model_cls: type[BaseModel]) -> set[str]:
     "bq",
 }
 _RAW_ALLOWED_KEYS = _declared_model_keys(RawConfig)
-_CLEAN_ALLOWED_KEYS = _declared_model_keys(CleanConfig) | {"sql_path"}
-_MART_ALLOWED_KEYS = _declared_model_keys(MartConfig) | {"sql_dir"}
+_CLEAN_ALLOWED_KEYS = _declared_model_keys(CleanConfig)
+_MART_ALLOWED_KEYS = _declared_model_keys(MartConfig)
 _CROSS_YEAR_ALLOWED_KEYS = _declared_model_keys(CrossYearConfig)
 
 
@@ -679,15 +663,10 @@ def _normalize_legacy_payload(
 
     clean = normalized.get("clean")
     if isinstance(clean, dict):
-        updated_clean = dict(clean)
-        if "sql_path" in updated_clean:
-            _emit_deprecation_notice("clean.sql_path", strict_config=strict_config, path=path)
-        normalized["clean"] = updated_clean
+        normalized["clean"] = dict(clean)
 
     mart = normalized.get("mart")
     if isinstance(mart, dict):
-        if "sql_dir" in mart:
-            _emit_deprecation_notice("mart.sql_dir", strict_config=strict_config, path=path)
         normalized["mart"] = dict(mart)
 
     if "bq" in normalized:
@@ -727,6 +706,10 @@ def _warn_or_reject_unknown_keys(
         extras = [key for key in section.keys() if key not in allowed_keys]
         if section_name == "raw" and "source" in extras:
             raise _err("raw.source is no longer supported; use raw.sources", path=path)
+        if section_name == "clean" and "sql_path" in extras:
+            raise _err("clean.sql_path is no longer supported; use clean.sql", path=path)
+        if section_name == "mart" and "sql_dir" in extras:
+            raise _err("mart.sql_dir is no longer supported; use mart.tables[].sql", path=path)
         if extras:
             _emit_unknown_keys_notice(
                 notice_key,

From d125bdb561c48b25e9617905d2dd055a69a37edd Mon Sep 17 00:00:00 2001
From: Zio Gabber <78922322+Gabrymi93@users.noreply.github.com>
Date: Sun, 8 Mar 2026 18:57:30 +0000
Subject: [PATCH 3/5] test: aggiorna la suite al nuovo contract config

---
 tests/test_clean_csv_columns.py | 18 ++++++++----------
 tests/test_clean_duckdb_read.py |  2 +-
 tests/test_config.py            | 19 +++++++++++++++++--
 3 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/tests/test_clean_csv_columns.py b/tests/test_clean_csv_columns.py
index 8bb741c..6d34354 100644
--- a/tests/test_clean_csv_columns.py
+++ b/tests/test_clean_csv_columns.py
@@ -33,16 +33,14 @@ def test_run_clean_csv_columns_reads_trailing_delimiter_csv(tmp_path: Path):
             "sql": str(sql_path),
             "read": {
                 "mode": "latest",
-                "csv": {
-                    "delim": ";",
-                    "header": True,
-                    "ignore_errors": True,
-                    "null_padding": True,
-                    "trim_whitespace": True,
-                    "columns": {
-                        "a": "VARCHAR",
-                        "b": "VARCHAR",
-                    },
+                "delim": ";",
+                "header": True,
+                "ignore_errors": True,
+                "null_padding": True,
+                "trim_whitespace": True,
+                "columns": {
+                    "a": "VARCHAR",
+                    "b": "VARCHAR",
                 },
             },
         },
diff --git a/tests/test_clean_duckdb_read.py b/tests/test_clean_duckdb_read.py
index ad36bc5..be6fffc 100644
--- a/tests/test_clean_duckdb_read.py
+++ b/tests/test_clean_duckdb_read.py
@@ -370,7 +370,7 @@ def test_resolve_clean_read_cfg_config_only_ignores_suggested(tmp_path: Path):
 
     _, relation_cfg, params_source = duckdb_read.resolve_clean_read_cfg(
         raw_dir,
-        {"read": "config_only"},
+        {"read": {"source": "config_only"}},
         logging.getLogger("tests.clean.duckdb_read.config_only"),
     )
 
diff --git a/tests/test_config.py b/tests/test_config.py
index f422679..5919527 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -8,6 +8,14 @@
 from toolkit.core.config_models import load_config_model
 
 
+def _bind_config_logger(caplog, monkeypatch):
+    module_logger = logging.getLogger("toolkit.core.config")
+    monkeypatch.setattr(module_logger, "handlers", [caplog.handler])
+    monkeypatch.setattr(module_logger, "propagate", False)
+    module_logger.setLevel(logging.WARNING)
+    caplog.set_level(logging.WARNING, logger="toolkit.core.config")
+
+
 def test_load_config_ok(tmp_path: Path):
     yml = tmp_path / "dataset.yml"
     yml.write_text(
@@ -426,7 +434,7 @@ def test_load_config_normalizes_bool_and_string_list_fields(tmp_path: Path):
     assert cfg.mart["validate"]["table_rules"]["mart_ok"]["primary_key"] == ["key_id"]
 
 
-def test_load_config_warns_on_zombie_field_bq(tmp_path: Path, caplog):
+def test_load_config_warns_on_zombie_field_bq(tmp_path: Path, caplog, monkeypatch):
     yml = tmp_path / "dataset.yml"
     yml.write_text(
         """
@@ -442,6 +450,8 @@ def test_load_config_warns_on_zombie_field_bq(tmp_path: Path, caplog):
         encoding="utf-8",
     )
 
+    _bind_config_logger(caplog, monkeypatch)
+
     with caplog.at_level(logging.WARNING, logger="toolkit.core.config"):
         load_config(yml)
 
@@ -572,7 +582,7 @@ def test_project_example_config_parses_in_strict_mode():
     assert len(model.raw.sources) == 1
 
 
-def test_load_config_warns_on_unknown_top_level_keys_in_non_strict_mode(tmp_path: Path, caplog):
+def test_load_config_warns_on_unknown_top_level_keys_in_non_strict_mode(tmp_path: Path, caplog, monkeypatch):
     yml = tmp_path / "dataset.yml"
     yml.write_text(
         """
@@ -587,6 +597,8 @@ def test_load_config_warns_on_unknown_top_level_keys_in_non_strict_mode(tmp_path
         encoding="utf-8",
     )
 
+    _bind_config_logger(caplog, monkeypatch)
+
     with caplog.at_level(logging.WARNING, logger="toolkit.core.config"):
         cfg = load_config(yml)
 
@@ -667,6 +679,7 @@ def test_load_config_model_rejects_unknown_top_level_keys_in_strict_mode(tmp_pat
 def test_load_config_warns_on_unknown_section_keys_in_non_strict_mode(
     tmp_path: Path,
     caplog,
+    monkeypatch,
     section: str,
     yaml_text: str,
     code: str,
@@ -675,6 +688,8 @@ def test_load_config_warns_on_unknown_section_keys_in_non_strict_mode(
     yml = tmp_path / "dataset.yml"
     yml.write_text(yaml_text, encoding="utf-8")
 
+    _bind_config_logger(caplog, monkeypatch)
+
     with caplog.at_level(logging.WARNING, logger="toolkit.core.config"):
         cfg = load_config(yml)
 

From 89a9c62e848c6e1d00421a78a4db580241ff19ca Mon Sep 17 00:00:00 2001
From: Zio Gabber <78922322+Gabrymi93@users.noreply.github.com>
Date: Sun, 8 Mar 2026 18:58:48 +0000
Subject: [PATCH 4/5] docs: chiarisci le forme config rimosse

---
 CHANGELOG.md          | 13 +++++++++++++
 docs/config-schema.md | 14 ++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ef4c248..3172aef 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,19 @@
 
 All notable changes to this project will be documented in this file.
 
+## [Unreleased]
+
+### Removed
+
+- Legacy config forms below no longer emit deprecation warnings and now fail with explicit config errors:
+  - `raw.source`
+  - `raw.sources[].plugin`
+  - `raw.sources[].id`
+  - scalar `clean.read`
+  - `clean.read.csv.*`
+  - `clean.sql_path`
+  - `mart.sql_dir`
+
 ## [1.0.0] - 2026-02-28
 
 ### Added
diff --git a/docs/config-schema.md b/docs/config-schema.md
index 151e629..651fd2c 100644
--- a/docs/config-schema.md
+++ b/docs/config-schema.md
@@ -225,6 +225,20 @@ Con `config.strict: true` o `--strict-config`, gli stessi casi diventano errori.
 | `DCL008` | `bq` | rimuovere il campo | ignored |
 | `DCL013` | `cross_year.* unknown keys` | rimuovere il campo | ignored |
 
+## Legacy rimosso
+
+Le forme seguenti non sono piu supportate. Non generano warning legacy: falliscono subito con errore di config e va usata la shape canonica.
+
+| Legacy rimosso | Usa invece |
+|---|---|
+| `raw.source` | `raw.sources` |
+| `raw.sources[].plugin` | `raw.sources[].type` |
+| `raw.sources[].id` | `raw.sources[].name` |
+| `clean.read: "auto"` | `clean.read.source: auto` |
+| `clean.read.csv.*` | `clean.read.*` |
+| `clean.sql_path` | `clean.sql` |
+| `mart.sql_dir` | `mart.tables[].sql` |
+
 ## Esempi minimi
 
 ### RAW only

From ab48734d543004a6931508b26113c9ece4a5d662 Mon Sep 17 00:00:00 2001
From: Zio Gabber <78922322+Gabrymi93@users.noreply.github.com>
Date: Sun, 8 Mar 2026 19:02:57 +0000
Subject: [PATCH 5/5] test: rimuovi import inutilizzato in test_config

---
 tests/test_config.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/test_config.py b/tests/test_config.py
index 5919527..88a259a 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -3,7 +3,6 @@
 import logging
 import pytest
 
-from toolkit.cli.cmd_run import run as run_cmd
 from toolkit.core.config import ensure_str_list, load_config, parse_bool
 from toolkit.core.config_models import load_config_model