From 70e16ab5bd79ba0efa6623ec4e468ede8bfecd54 Mon Sep 17 00:00:00 2001 From: Zio Gabber <78922322+Gabrymi93@users.noreply.github.com> Date: Sun, 8 Mar 2026 18:44:01 +0000 Subject: [PATCH 1/5] refactor: rimuovi compat legacy del parser config --- docs/advanced-workflows.md | 2 - docs/config-schema.md | 7 +- tests/test_config.py | 137 ++++++++-------------------------- toolkit/clean/duckdb_read.py | 6 +- toolkit/core/config_models.py | 109 +-------------------------- toolkit/core/csv_read.py | 46 +----------- 6 files changed, 42 insertions(+), 265 deletions(-) diff --git a/docs/advanced-workflows.md b/docs/advanced-workflows.md index fdf1bb3..3fd48f2 100644 --- a/docs/advanced-workflows.md +++ b/docs/advanced-workflows.md @@ -95,8 +95,6 @@ Regola pratica: ## Compat legacy -Il toolkit mantiene compatibilita` con alcune forme legacy del config per facilitare la migrazione. - Per i repo nuovi: - usa la shape canonica documentata in [config-schema.md](./config-schema.md) diff --git a/docs/config-schema.md b/docs/config-schema.md index 112a123..2be7767 100644 --- a/docs/config-schema.md +++ b/docs/config-schema.md @@ -222,11 +222,6 @@ Con `config.strict: true` o `--strict-config`, gli stessi casi diventano errori. | Code | Legacy | Replacement | Status | |---|---|---|---| -| `DCL001` | `raw.source` | `raw.sources` | deprecated | -| `DCL002` | `raw.sources[].plugin` | `raw.sources[].type` | deprecated | -| `DCL003` | `raw.sources[].id` | `raw.sources[].name` | deprecated | -| `DCL004` | `clean.read: "auto"` | `clean.read.source: auto` | deprecated | -| `DCL005` | `clean.read.csv.*` | `clean.read.*` | deprecated | | `DCL006` | `clean.sql_path` | `clean.sql` | ignored | | `DCL007` | `mart.sql_dir` | `mart.tables[].sql` | ignored | | `DCL008` | `bq` | rimuovere il campo | ignored | @@ -312,7 +307,7 @@ Esempi tipici: - `Config validation failed: output.unknown_flag: Extra inputs are not permitted` - `Config validation failed: raw.sources: Input should be a valid list` - `Config validation failed: clean.validate.primary_key: clean.validate.primary_key must be a string or a list of strings` -- `DCL001 raw.source is deprecated, usare raw.sources` +- `Config validation failed: raw.sources: Input should be a valid list` Regola pratica: diff --git a/tests/test_config.py b/tests/test_config.py index a4e79c0..508791d 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -64,10 +64,10 @@ def test_load_config_resolves_relative_paths_from_dataset_dir(tmp_path: Path): name: demo years: [2022] raw: - source: - type: local_file - args: - path: "data/raw.csv" + sources: + - type: local_file + args: + path: "data/raw.csv" clean: sql: "sql/clean.sql" mart: @@ -88,7 +88,7 @@ def test_load_config_resolves_relative_paths_from_dataset_dir(tmp_path: Path): assert cfg.base_dir == project_dir.resolve() assert cfg.root == (project_dir / "out").resolve() assert cfg.root_source == "yml" - assert cfg.raw["source"]["args"]["path"] == (project_dir / "data" / "raw.csv").resolve() + assert cfg.raw["sources"][0]["args"]["path"] == (project_dir / "data" / "raw.csv").resolve() assert cfg.clean["sql"] == (project_dir / "sql" / "clean.sql").resolve() assert cfg.mart["tables"][0]["sql"] == (project_dir / "sql" / "mart" / "demo.sql").resolve() assert cfg.cross_year["tables"][0]["sql"] == (project_dir / "sql" / "cross" / "demo_cross.sql").resolve() @@ -106,11 +106,11 @@ def test_load_config_does_not_transform_non_whitelisted_path_like_fields(tmp_pat name: demo years: [2022] raw: - source: - type: local_file - args: - path: "data/raw.csv" - filename: "nested/raw.csv" + sources: + - type: local_file + args: + path: "data/raw.csv" + filename: "nested/raw.csv" clean: sql: "sql/clean.sql" note_path: "docs/clean.md" @@ -125,8 +125,8 @@ def test_load_config_does_not_transform_non_whitelisted_path_like_fields(tmp_pat cfg = load_config(yml) - assert cfg.raw["source"]["args"]["path"] == (project_dir / "data" / "raw.csv").resolve() - assert cfg.raw["source"]["args"]["filename"] == "nested/raw.csv" + assert cfg.raw["sources"][0]["args"]["path"] == (project_dir / "data" / "raw.csv").resolve() + assert cfg.raw["sources"][0]["args"]["filename"] == "nested/raw.csv" assert cfg.clean["note_path"] == "docs/clean.md" assert cfg.mart["label_path"] == "labels/mart.txt" @@ -289,7 +289,7 @@ def test_load_config_uses_base_dir_when_root_missing_and_dcl_root_missing(tmp_pa assert cfg.root_source == "base_dir_fallback" -def test_load_config_normalizes_legacy_clean_read_csv_and_warns(tmp_path: Path, caplog): +def test_load_config_rejects_legacy_clean_read_csv_shape(tmp_path: Path): project_dir = tmp_path / "project" project_dir.mkdir() yml = project_dir / "dataset.yml" @@ -311,21 +311,10 @@ def test_load_config_normalizes_legacy_clean_read_csv_and_warns(tmp_path: Path, encoding="utf-8", ) - module_logger = logging.getLogger("toolkit.core.config") - module_logger.handlers = [caplog.handler] - module_logger.propagate = True - module_logger.setLevel(logging.WARNING) - - with caplog.at_level(logging.WARNING, logger="toolkit.core.config"): - cfg = load_config(yml) + with pytest.raises(ValueError) as exc: + load_config(yml) - assert cfg.clean["read"] == { - "source": "auto", - "columns": {"amount": "DOUBLE"}, - "delim": ";", - } - assert "DCL005" in caplog.text - assert "deprecated, usare clean.read.*" in caplog.text + assert "clean.read.csv" in str(exc.value) def test_load_config_canonical_clean_read_has_no_deprecation_warning(tmp_path: Path, caplog): @@ -469,7 +458,7 @@ def test_load_config_warns_on_zombie_fields(tmp_path: Path, caplog): assert "deprecated/ignored, usare remove field" in caplog.text -def test_load_config_model_normalizes_legacy_aliases_to_canonical_shape(tmp_path: Path): +def test_load_config_model_rejects_legacy_raw_source_plugin_id_shape(tmp_path: Path): yml = tmp_path / "dataset.yml" yml.write_text( """ @@ -482,52 +471,19 @@ def test_load_config_model_normalizes_legacy_aliases_to_canonical_shape(tmp_path plugin: local_file args: path: data/raw.csv -clean: - read: auto -mart: {} -""".strip(), - encoding="utf-8", - ) - - model = load_config_model(yml) - - assert len(model.raw.sources) == 1 - assert model.raw.sources[0].name == "src_legacy" - assert model.raw.sources[0].type == "local_file" - assert model.clean.read is not None - assert model.clean.read.source == "auto" - - -def test_load_config_logs_deprecation_codes_for_legacy_normalization(tmp_path: Path, caplog): - yml = tmp_path / "dataset.yml" - yml.write_text( - """ -dataset: - name: demo - years: [2022] -raw: - source: - id: src_legacy - plugin: local_file - args: - path: data/raw.csv -clean: - read: auto +clean: {} mart: {} """.strip(), encoding="utf-8", ) - with caplog.at_level(logging.WARNING, logger="toolkit.core.config"): - load_config(yml) + with pytest.raises(ValueError) as exc: + load_config_model(yml) - assert "DCL001" in caplog.text - assert "DCL002" in caplog.text - assert "DCL003" in caplog.text - assert "DCL004" in caplog.text + assert "raw.sources" in str(exc.value) or "raw.source" in str(exc.value) -def test_load_config_model_strict_config_rejects_legacy_normalization(tmp_path: Path): +def test_load_config_model_rejects_legacy_raw_sources_plugin_id_fields(tmp_path: Path): yml = tmp_path / "dataset.yml" yml.write_text( """ @@ -535,10 +491,11 @@ def test_load_config_model_strict_config_rejects_legacy_normalization(tmp_path: name: demo years: [2022] raw: - source: - type: local_file - args: - path: data/raw.csv + sources: + - id: src_legacy + plugin: local_file + args: + path: data/raw.csv clean: {} mart: {} """.strip(), @@ -546,18 +503,15 @@ def test_load_config_model_strict_config_rejects_legacy_normalization(tmp_path: ) with pytest.raises(ValueError) as exc: - load_config_model(yml, strict_config=True) + load_config_model(yml) - assert "DCL001" in str(exc.value) - assert "raw.source is deprecated, usare raw.sources" in str(exc.value) + assert "raw.sources.0" in str(exc.value) -def test_load_config_model_config_strict_rejects_legacy_normalization(tmp_path: Path): +def test_load_config_rejects_legacy_clean_read_scalar_form(tmp_path: Path): yml = tmp_path / "dataset.yml" yml.write_text( """ -config: - strict: true dataset: name: demo years: [2022] @@ -569,36 +523,9 @@ def test_load_config_model_config_strict_rejects_legacy_normalization(tmp_path: ) with pytest.raises(ValueError) as exc: - load_config_model(yml) - - assert "DCL004" in str(exc.value) - assert "clean.read scalar form is deprecated" in str(exc.value) - - -def test_cli_strict_config_rejects_legacy_config(tmp_path: Path): - project_dir = tmp_path / "project" - project_dir.mkdir() - yml = project_dir / "dataset.yml" - yml.write_text( - """ -dataset: - name: demo - years: [2022] -raw: - source: - type: local_file - args: - path: data/raw.csv -clean: {} -mart: {} -""".strip(), - encoding="utf-8", - ) - - with pytest.raises(ValueError) as exc: - run_cmd(step="raw", config=str(yml), strict_config=True) + load_config(yml) - assert "DCL001" in str(exc.value) + assert "clean.read" in str(exc.value) def test_project_example_config_parses_in_strict_mode(): diff --git a/toolkit/clean/duckdb_read.py b/toolkit/clean/duckdb_read.py index b253b8d..94577dd 100644 --- a/toolkit/clean/duckdb_read.py +++ b/toolkit/clean/duckdb_read.py @@ -46,17 +46,13 @@ def _read_source_mode(clean_cfg: dict[str, Any], logger=None) -> tuple[str, dict if raw_read_cfg is None: pass - elif isinstance(raw_read_cfg, str): - if logger is not None: - logger.warning("clean.read scalar form is deprecated; use clean.read.source") - read_source = raw_read_cfg elif isinstance(raw_read_cfg, dict): explicit_cfg = dict(raw_read_cfg) nested_source = explicit_cfg.pop("source", None) if nested_source is not None: read_source = nested_source else: - raise ValueError("clean.read must be either a mapping (dict) or one of: auto, config_only") + raise ValueError("clean.read must be a mapping (dict)") normalized_source = str(read_source or "auto") if normalized_source not in READ_SOURCE_MODES: diff --git a/toolkit/core/config_models.py b/toolkit/core/config_models.py index e964347..311ca82 100644 --- a/toolkit/core/config_models.py +++ b/toolkit/core/config_models.py @@ -26,41 +26,6 @@ class ConfigDeprecation: _CONFIG_DEPRECATIONS: dict[str, ConfigDeprecation] = { - "raw.source": ConfigDeprecation( - code="DCL001", - legacy="raw.source", - replacement="raw.sources", - status="deprecated", - message="raw.source is deprecated, usare raw.sources", - ), - "raw.sources[].plugin": ConfigDeprecation( - code="DCL002", - legacy="raw.sources[].plugin", - replacement="raw.sources[].type", - status="deprecated", - message="raw.sources[].plugin is deprecated, usare raw.sources[].type", - ), - "raw.sources[].id": ConfigDeprecation( - code="DCL003", - legacy="raw.sources[].id", - replacement="raw.sources[].name", - status="deprecated", - message="raw.sources[].id is deprecated, usare raw.sources[].name", - ), - "clean.read": ConfigDeprecation( - code="DCL004", - legacy="clean.read: ", - replacement="clean.read.source", - status="deprecated", - message="clean.read scalar form is deprecated, usare clean.read.source", - ), - "clean.read.csv": ConfigDeprecation( - code="DCL005", - legacy="clean.read.csv.*", - replacement="clean.read.*", - status="deprecated", - message="clean.read.csv.* is deprecated, usare clean.read.*", - ), "clean.sql_path": ConfigDeprecation( code="DCL006", legacy="clean.sql_path", @@ -644,19 +609,6 @@ def _resolve_root(root: Any, *, base_dir: Path) -> tuple[Path, str]: source = "env:TOOLKIT_OUTDIR" if os.environ.get("TOOLKIT_OUTDIR") else "env:DCL_OUTDIR" return Path(managed_outdir).expanduser().resolve(), source return _resolve_path_value(root, base_dir=base_dir), "yml" - - -def _normalize_legacy_source(source: dict[str, Any]) -> dict[str, Any]: - normalized = dict(source) - plugin = normalized.pop("plugin", None) - if plugin is not None and "type" not in normalized: - normalized["type"] = plugin - source_id = normalized.pop("id", None) - if source_id is not None and "name" not in normalized: - normalized["name"] = source_id - return normalized - - def _emit_deprecation_notice( key: str, *, @@ -713,39 +665,6 @@ def _declared_model_keys(model_cls: type[BaseModel]) -> set[str]: _CROSS_YEAR_ALLOWED_KEYS = _declared_model_keys(CrossYearConfig) -def _normalize_legacy_clean_read( - clean: dict[str, Any], - *, - path: Path, - strict_config: bool, -) -> dict[str, Any]: - normalized = dict(clean) - read_cfg = normalized.get("read") - - if isinstance(read_cfg, str): - _emit_deprecation_notice("clean.read", strict_config=strict_config, path=path) - normalized["read"] = {"source": read_cfg} - read_cfg = normalized["read"] - - if not isinstance(read_cfg, dict): - return normalized - - csv_cfg = read_cfg.get("csv") - if csv_cfg is None: - return normalized - if not isinstance(csv_cfg, dict): - raise _err("clean.read.csv deve essere una mappa YAML (oggetto).", path=path) - - merged_read = dict(read_cfg) - merged_read.pop("csv", None) - for key, value in csv_cfg.items(): - merged_read.setdefault(key, value) - - _emit_deprecation_notice("clean.read.csv", strict_config=strict_config, path=path) - normalized["read"] = merged_read - return normalized - - def _normalize_legacy_payload( data: dict[str, Any], *, @@ -756,33 +675,11 @@ def _normalize_legacy_payload( raw = normalized.get("raw") if isinstance(raw, dict): - updated_raw = dict(raw) - if "source" in updated_raw: - source = updated_raw.pop("source") - if "sources" in updated_raw: - raise _err("Use either raw.source or raw.sources, not both.", path=path) - updated_raw["sources"] = [source] - _emit_deprecation_notice("raw.source", strict_config=strict_config, path=path) - sources = updated_raw.get("sources") - if isinstance(sources, list): - normalized_sources: list[Any] = [] - for source in sources: - if not isinstance(source, dict): - normalized_sources.append(source) - continue - original = dict(source) - normalized_source = _normalize_legacy_source(source) - if "plugin" in original and "type" not in original: - _emit_deprecation_notice("raw.sources[].plugin", strict_config=strict_config, path=path) - if "id" in original and "name" not in original: - _emit_deprecation_notice("raw.sources[].id", strict_config=strict_config, path=path) - normalized_sources.append(normalized_source) - updated_raw["sources"] = normalized_sources - normalized["raw"] = updated_raw + normalized["raw"] = dict(raw) clean = normalized.get("clean") if isinstance(clean, dict): - updated_clean = _normalize_legacy_clean_read(clean, path=path, strict_config=strict_config) + updated_clean = dict(clean) if "sql_path" in updated_clean: _emit_deprecation_notice("clean.sql_path", strict_config=strict_config, path=path) normalized["clean"] = updated_clean @@ -828,6 +725,8 @@ def _warn_or_reject_unknown_keys( if not isinstance(section, dict): continue extras = [key for key in section.keys() if key not in allowed_keys] + if section_name == "raw" and "source" in extras: + raise _err("raw.source is no longer supported; use raw.sources", path=path) if extras: _emit_unknown_keys_notice( notice_key, diff --git a/toolkit/core/csv_read.py b/toolkit/core/csv_read.py index f88350a..02ceaef 100644 --- a/toolkit/core/csv_read.py +++ b/toolkit/core/csv_read.py @@ -23,33 +23,12 @@ "prefer_from_raw_run", "allow_ambiguous", "include", - "csv", "columns", "normalize_rows_to_columns", "trim_whitespace", "sample_size", "sheet_name", } -ALLOWED_NESTED_CSV_KEYS = { - "delim", - "header", - "encoding", - "decimal", - "skip", - "auto_detect", - "quote", - "escape", - "comment", - "ignore_errors", - "strict_mode", - "null_padding", - "parallel", - "nullstr", - "columns", - "normalize_rows_to_columns", - "trim_whitespace", - "sheet_name", -} FORMAT_HINT_KEYS = { "delim", "header", @@ -118,12 +97,8 @@ def normalize_columns_spec(columns: object) -> dict[str, str] | None: def normalize_read_cfg(read_cfg: dict[str, Any] | None) -> dict[str, Any]: cfg = dict(read_cfg or {}) - csv_cfg = cfg.get("csv") or {} - if csv_cfg and not isinstance(csv_cfg, dict): - raise ValueError( - "clean.read must be a mapping (dict) in dataset.yml; " - "legacy clean.read.csv must also be a mapping if used" - ) + if "csv" in cfg: + raise ValueError("clean.read.csv is no longer supported; use clean.read.* directly") unknown_top = sorted(set(cfg.keys()) - ALLOWED_READ_CSV_KEYS) if unknown_top: @@ -131,21 +106,8 @@ def normalize_read_cfg(read_cfg: dict[str, Any] | None) -> dict[str, Any]: "Unsupported clean.read options for CSV reader: " f"{unknown_top}. Allowed keys: {sorted(ALLOWED_READ_CSV_KEYS)}" ) - - if csv_cfg: - unknown_nested = sorted(set(csv_cfg.keys()) - ALLOWED_NESTED_CSV_KEYS) - if unknown_nested: - raise ValueError( - "Unsupported legacy clean.read.csv options: " - f"{unknown_nested}. Allowed keys: {sorted(ALLOWED_NESTED_CSV_KEYS)}" - ) - - merged = dict(csv_cfg) - for key in ALLOWED_NESTED_CSV_KEYS: - if key in cfg: - merged[key] = cfg[key] - merged["columns"] = normalize_columns_spec(merged.get("columns")) - return merged + cfg["columns"] = normalize_columns_spec(cfg.get("columns")) + return cfg def filter_suggested_format_keys(cfg: dict[str, Any] | None) -> dict[str, Any]: From 53aae93060cb4597654f6e5c215d8f9cd916b5e1 Mon Sep 17 00:00:00 2001 From: Zio Gabber <78922322+Gabrymi93@users.noreply.github.com> Date: Sun, 8 Mar 2026 18:48:24 +0000 Subject: [PATCH 2/5] refactor: rimuovi clean.sql_path e mart.sql_dir --- CHANGELOG.md | 2 -- docs/config-schema.md | 2 -- tests/test_config.py | 59 ++++++++++++++++++++++++++++------- toolkit/core/config_models.py | 31 +++++------------- 4 files changed, 55 insertions(+), 39 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 13d8039..ef4c248 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,6 +41,4 @@ All notable changes to this project will be documented in this file. - `raw.sources[].id` in favor of `raw.sources[].name` - scalar `clean.read` in favor of `clean.read.source` - `clean.read.csv.*` in favor of `clean.read.*` -- `clean.sql_path` -- `mart.sql_dir` - `bq` diff --git a/docs/config-schema.md b/docs/config-schema.md index 2be7767..151e629 100644 --- a/docs/config-schema.md +++ b/docs/config-schema.md @@ -222,8 +222,6 @@ Con `config.strict: true` o `--strict-config`, gli stessi casi diventano errori. | Code | Legacy | Replacement | Status | |---|---|---|---| -| `DCL006` | `clean.sql_path` | `clean.sql` | ignored | -| `DCL007` | `mart.sql_dir` | `mart.tables[].sql` | ignored | | `DCL008` | `bq` | rimuovere il campo | ignored | | `DCL013` | `cross_year.* unknown keys` | rimuovere il campo | ignored | diff --git a/tests/test_config.py b/tests/test_config.py index 508791d..f422679 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -180,7 +180,6 @@ def test_load_config_logs_normalized_whitelist_fields(tmp_path: Path, caplog, mo clean: sql: "sql/clean.sql" mart: - sql_dir: "sql/mart" tables: - name: demo_mart sql: "sql/mart/demo.sql" @@ -200,14 +199,12 @@ def test_load_config_logs_normalized_whitelist_fields(tmp_path: Path, caplog, mo assert cfg.root == (project_dir / "out").resolve() assert cfg.raw["sources"][0]["args"]["path"] == (project_dir / "data" / "raw_a.csv").resolve() assert cfg.clean["sql"] == (project_dir / "sql" / "clean.sql").resolve() - assert cfg.mart["sql_dir"] == (project_dir / "sql" / "mart").resolve() assert cfg.mart["tables"][0]["sql"] == (project_dir / "sql" / "mart" / "demo.sql").resolve() assert "Normalized config paths:" in caplog.text assert "root=" in caplog.text assert "raw.sources[0].args.path=" in caplog.text assert "clean.sql=" in caplog.text - assert "mart.sql_dir=" in caplog.text assert "mart.tables[0].sql=" in caplog.text @@ -429,7 +426,30 @@ def test_load_config_normalizes_bool_and_string_list_fields(tmp_path: Path): assert cfg.mart["validate"]["table_rules"]["mart_ok"]["primary_key"] == ["key_id"] -def test_load_config_warns_on_zombie_fields(tmp_path: Path, caplog): +def test_load_config_warns_on_zombie_field_bq(tmp_path: Path, caplog): + yml = tmp_path / "dataset.yml" + yml.write_text( + """ +dataset: + name: demo + years: [2022] +raw: {} +bq: + dataset: ignored +clean: {} +mart: {} +""".strip(), + encoding="utf-8", + ) + + with caplog.at_level(logging.WARNING, logger="toolkit.core.config"): + load_config(yml) + + assert "DCL008" in caplog.text + assert "deprecated/ignored, usare remove field" in caplog.text + + +def test_load_config_rejects_clean_sql_path(tmp_path: Path): yml = tmp_path / "dataset.yml" yml.write_text( """ @@ -439,6 +459,28 @@ def test_load_config_warns_on_zombie_fields(tmp_path: Path, caplog): raw: {} clean: sql_path: sql/legacy_clean.sql +mart: {} +bq: + dataset: ignored +""".strip(), + encoding="utf-8", + ) + + with pytest.raises(ValueError) as exc: + load_config(yml) + + assert "clean.sql_path" in str(exc.value) + + +def test_load_config_rejects_mart_sql_dir(tmp_path: Path): + yml = tmp_path / "dataset.yml" + yml.write_text( + """ +dataset: + name: demo + years: [2022] +raw: {} +clean: {} mart: sql_dir: sql/mart bq: @@ -447,15 +489,10 @@ def test_load_config_warns_on_zombie_fields(tmp_path: Path, caplog): encoding="utf-8", ) - with caplog.at_level(logging.WARNING, logger="toolkit.core.config"): + with pytest.raises(ValueError) as exc: load_config(yml) - assert "DCL006" in caplog.text - assert "DCL007" in caplog.text - assert "DCL008" in caplog.text - assert "deprecated/ignored, usare clean.sql" in caplog.text - assert "deprecated/ignored, usare mart.tables[].sql" in caplog.text - assert "deprecated/ignored, usare remove field" in caplog.text + assert "mart.sql_dir" in str(exc.value) def test_load_config_model_rejects_legacy_raw_source_plugin_id_shape(tmp_path: Path): diff --git a/toolkit/core/config_models.py b/toolkit/core/config_models.py index 311ca82..da2780e 100644 --- a/toolkit/core/config_models.py +++ b/toolkit/core/config_models.py @@ -26,20 +26,6 @@ class ConfigDeprecation: _CONFIG_DEPRECATIONS: dict[str, ConfigDeprecation] = { - "clean.sql_path": ConfigDeprecation( - code="DCL006", - legacy="clean.sql_path", - replacement="clean.sql", - status="ignored", - message="clean.sql_path is deprecated/ignored, usare clean.sql", - ), - "mart.sql_dir": ConfigDeprecation( - code="DCL007", - legacy="mart.sql_dir", - replacement="mart.tables[].sql", - status="ignored", - message="mart.sql_dir is deprecated/ignored, usare mart.tables[].sql", - ), "bq": ConfigDeprecation( code="DCL008", legacy="bq", @@ -471,10 +457,8 @@ def _resolve_path_value(value: Any, *, base_dir: Path) -> Any: ), "clean": ( ("sql",), - ("sql_path",), ), "mart": ( - ("sql_dir",), ("tables", "*", "sql"), ), "cross_year": ( @@ -660,8 +644,8 @@ def _declared_model_keys(model_cls: type[BaseModel]) -> set[str]: "bq", } _RAW_ALLOWED_KEYS = _declared_model_keys(RawConfig) -_CLEAN_ALLOWED_KEYS = _declared_model_keys(CleanConfig) | {"sql_path"} -_MART_ALLOWED_KEYS = _declared_model_keys(MartConfig) | {"sql_dir"} +_CLEAN_ALLOWED_KEYS = _declared_model_keys(CleanConfig) +_MART_ALLOWED_KEYS = _declared_model_keys(MartConfig) _CROSS_YEAR_ALLOWED_KEYS = _declared_model_keys(CrossYearConfig) @@ -679,15 +663,10 @@ def _normalize_legacy_payload( clean = normalized.get("clean") if isinstance(clean, dict): - updated_clean = dict(clean) - if "sql_path" in updated_clean: - _emit_deprecation_notice("clean.sql_path", strict_config=strict_config, path=path) - normalized["clean"] = updated_clean + normalized["clean"] = dict(clean) mart = normalized.get("mart") if isinstance(mart, dict): - if "sql_dir" in mart: - _emit_deprecation_notice("mart.sql_dir", strict_config=strict_config, path=path) normalized["mart"] = dict(mart) if "bq" in normalized: @@ -727,6 +706,10 @@ def _warn_or_reject_unknown_keys( extras = [key for key in section.keys() if key not in allowed_keys] if section_name == "raw" and "source" in extras: raise _err("raw.source is no longer supported; use raw.sources", path=path) + if section_name == "clean" and "sql_path" in extras: + raise _err("clean.sql_path is no longer supported; use clean.sql", path=path) + if section_name == "mart" and "sql_dir" in extras: + raise _err("mart.sql_dir is no longer supported; use mart.tables[].sql", path=path) if extras: _emit_unknown_keys_notice( notice_key, From d125bdb561c48b25e9617905d2dd055a69a37edd Mon Sep 17 00:00:00 2001 From: Zio Gabber <78922322+Gabrymi93@users.noreply.github.com> Date: Sun, 8 Mar 2026 18:57:30 +0000 Subject: [PATCH 3/5] test: aggiorna la suite al nuovo contract config --- tests/test_clean_csv_columns.py | 18 ++++++++---------- tests/test_clean_duckdb_read.py | 2 +- tests/test_config.py | 19 +++++++++++++++++-- 3 files changed, 26 insertions(+), 13 deletions(-) diff --git a/tests/test_clean_csv_columns.py b/tests/test_clean_csv_columns.py index 8bb741c..6d34354 100644 --- a/tests/test_clean_csv_columns.py +++ b/tests/test_clean_csv_columns.py @@ -33,16 +33,14 @@ def test_run_clean_csv_columns_reads_trailing_delimiter_csv(tmp_path: Path): "sql": str(sql_path), "read": { "mode": "latest", - "csv": { - "delim": ";", - "header": True, - "ignore_errors": True, - "null_padding": True, - "trim_whitespace": True, - "columns": { - "a": "VARCHAR", - "b": "VARCHAR", - }, + "delim": ";", + "header": True, + "ignore_errors": True, + "null_padding": True, + "trim_whitespace": True, + "columns": { + "a": "VARCHAR", + "b": "VARCHAR", }, }, }, diff --git a/tests/test_clean_duckdb_read.py b/tests/test_clean_duckdb_read.py index ad36bc5..be6fffc 100644 --- a/tests/test_clean_duckdb_read.py +++ b/tests/test_clean_duckdb_read.py @@ -370,7 +370,7 @@ def test_resolve_clean_read_cfg_config_only_ignores_suggested(tmp_path: Path): _, relation_cfg, params_source = duckdb_read.resolve_clean_read_cfg( raw_dir, - {"read": "config_only"}, + {"read": {"source": "config_only"}}, logging.getLogger("tests.clean.duckdb_read.config_only"), ) diff --git a/tests/test_config.py b/tests/test_config.py index f422679..5919527 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -8,6 +8,14 @@ from toolkit.core.config_models import load_config_model +def _bind_config_logger(caplog, monkeypatch): + module_logger = logging.getLogger("toolkit.core.config") + monkeypatch.setattr(module_logger, "handlers", [caplog.handler]) + monkeypatch.setattr(module_logger, "propagate", False) + module_logger.setLevel(logging.WARNING) + caplog.set_level(logging.WARNING, logger="toolkit.core.config") + + def test_load_config_ok(tmp_path: Path): yml = tmp_path / "dataset.yml" yml.write_text( @@ -426,7 +434,7 @@ def test_load_config_normalizes_bool_and_string_list_fields(tmp_path: Path): assert cfg.mart["validate"]["table_rules"]["mart_ok"]["primary_key"] == ["key_id"] -def test_load_config_warns_on_zombie_field_bq(tmp_path: Path, caplog): +def test_load_config_warns_on_zombie_field_bq(tmp_path: Path, caplog, monkeypatch): yml = tmp_path / "dataset.yml" yml.write_text( """ @@ -442,6 +450,8 @@ def test_load_config_warns_on_zombie_field_bq(tmp_path: Path, caplog): encoding="utf-8", ) + _bind_config_logger(caplog, monkeypatch) + with caplog.at_level(logging.WARNING, logger="toolkit.core.config"): load_config(yml) @@ -572,7 +582,7 @@ def test_project_example_config_parses_in_strict_mode(): assert len(model.raw.sources) == 1 -def test_load_config_warns_on_unknown_top_level_keys_in_non_strict_mode(tmp_path: Path, caplog): +def test_load_config_warns_on_unknown_top_level_keys_in_non_strict_mode(tmp_path: Path, caplog, monkeypatch): yml = tmp_path / "dataset.yml" yml.write_text( """ @@ -587,6 +597,8 @@ def test_load_config_warns_on_unknown_top_level_keys_in_non_strict_mode(tmp_path encoding="utf-8", ) + _bind_config_logger(caplog, monkeypatch) + with caplog.at_level(logging.WARNING, logger="toolkit.core.config"): cfg = load_config(yml) @@ -667,6 +679,7 @@ def test_load_config_model_rejects_unknown_top_level_keys_in_strict_mode(tmp_pat def test_load_config_warns_on_unknown_section_keys_in_non_strict_mode( tmp_path: Path, caplog, + monkeypatch, section: str, yaml_text: str, code: str, @@ -675,6 +688,8 @@ def test_load_config_warns_on_unknown_section_keys_in_non_strict_mode( yml = tmp_path / "dataset.yml" yml.write_text(yaml_text, encoding="utf-8") + _bind_config_logger(caplog, monkeypatch) + with caplog.at_level(logging.WARNING, logger="toolkit.core.config"): cfg = load_config(yml) From 89a9c62e848c6e1d00421a78a4db580241ff19ca Mon Sep 17 00:00:00 2001 From: Zio Gabber <78922322+Gabrymi93@users.noreply.github.com> Date: Sun, 8 Mar 2026 18:58:48 +0000 Subject: [PATCH 4/5] docs: chiarisci le forme config rimosse --- CHANGELOG.md | 13 +++++++++++++ docs/config-schema.md | 14 ++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ef4c248..3172aef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,19 @@ All notable changes to this project will be documented in this file. +## [Unreleased] + +### Removed + +- Legacy config forms below no longer emit deprecation warnings and now fail with explicit config errors: + - `raw.source` + - `raw.sources[].plugin` + - `raw.sources[].id` + - scalar `clean.read` + - `clean.read.csv.*` + - `clean.sql_path` + - `mart.sql_dir` + ## [1.0.0] - 2026-02-28 ### Added diff --git a/docs/config-schema.md b/docs/config-schema.md index 151e629..651fd2c 100644 --- a/docs/config-schema.md +++ b/docs/config-schema.md @@ -225,6 +225,20 @@ Con `config.strict: true` o `--strict-config`, gli stessi casi diventano errori. | `DCL008` | `bq` | rimuovere il campo | ignored | | `DCL013` | `cross_year.* unknown keys` | rimuovere il campo | ignored | +## Legacy rimosso + +Le forme seguenti non sono piu supportate. Non generano warning legacy: falliscono subito con errore di config e va usata la shape canonica. + +| Legacy rimosso | Usa invece | +|---|---| +| `raw.source` | `raw.sources` | +| `raw.sources[].plugin` | `raw.sources[].type` | +| `raw.sources[].id` | `raw.sources[].name` | +| `clean.read: "auto"` | `clean.read.source: auto` | +| `clean.read.csv.*` | `clean.read.*` | +| `clean.sql_path` | `clean.sql` | +| `mart.sql_dir` | `mart.tables[].sql` | + ## Esempi minimi ### RAW only From ab48734d543004a6931508b26113c9ece4a5d662 Mon Sep 17 00:00:00 2001 From: Zio Gabber <78922322+Gabrymi93@users.noreply.github.com> Date: Sun, 8 Mar 2026 19:02:57 +0000 Subject: [PATCH 5/5] test: rimuovi import inutilizzato in test_config --- tests/test_config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_config.py b/tests/test_config.py index 5919527..88a259a 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -3,7 +3,6 @@ import logging import pytest -from toolkit.cli.cmd_run import run as run_cmd from toolkit.core.config import ensure_str_list, load_config, parse_bool from toolkit.core.config_models import load_config_model