diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c97835..e7cf057 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ All notable changes to this project will be documented in this file. ### Removed - Legacy config forms below no longer emit deprecation warnings and now fail with explicit config errors: + - `bq` - `raw.source` - `raw.sources[].plugin` - `raw.sources[].id` @@ -59,4 +60,3 @@ All notable changes to this project will be documented in this file. - `raw.sources[].id` in favor of `raw.sources[].name` - scalar `clean.read` in favor of `clean.read.source` - `clean.read.csv.*` in favor of `clean.read.*` -- `bq` diff --git a/docs/config-schema.md b/docs/config-schema.md index 651fd2c..03b750f 100644 --- a/docs/config-schema.md +++ b/docs/config-schema.md @@ -18,7 +18,6 @@ I path relativi sono sempre risolti rispetto alla directory che contiene `datase | `config` | `object` | no | policy parser config | | `validation` | `object` | no | solo opzioni globali del validation gate | | `output` | `object` | no | policy artefatti | -| `bq` | `object \| null` | no | accettato ma ignorato, con warning | ## dataset @@ -222,7 +221,6 @@ Con `config.strict: true` o `--strict-config`, gli stessi casi diventano errori. | Code | Legacy | Replacement | Status | |---|---|---|---| -| `DCL008` | `bq` | rimuovere il campo | ignored | | `DCL013` | `cross_year.* unknown keys` | rimuovere il campo | ignored | ## Legacy rimosso @@ -238,6 +236,7 @@ Le forme seguenti non sono piu supportate. Non generano warning legacy: fallisco | `clean.read.csv.*` | `clean.read.*` | | `clean.sql_path` | `clean.sql` | | `mart.sql_dir` | `mart.tables[].sql` | +| `bq` | rimuovere il campo | ## Esempi minimi diff --git a/tests/test_config.py b/tests/test_config.py index 88a259a..1cf5eaf 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -433,7 +433,7 @@ def test_load_config_normalizes_bool_and_string_list_fields(tmp_path: Path): assert cfg.mart["validate"]["table_rules"]["mart_ok"]["primary_key"] == ["key_id"] -def test_load_config_warns_on_zombie_field_bq(tmp_path: Path, caplog, monkeypatch): +def test_load_config_rejects_removed_bq_field(tmp_path: Path): yml = tmp_path / "dataset.yml" yml.write_text( """ @@ -449,13 +449,10 @@ def test_load_config_warns_on_zombie_field_bq(tmp_path: Path, caplog, monkeypatc encoding="utf-8", ) - _bind_config_logger(caplog, monkeypatch) - - with caplog.at_level(logging.WARNING, logger="toolkit.core.config"): + with pytest.raises(ValueError) as exc: load_config(yml) - assert "DCL008" in caplog.text - assert "deprecated/ignored, usare remove field" in caplog.text + assert "bq is no longer supported; remove field" in str(exc.value) def test_load_config_rejects_clean_sql_path(tmp_path: Path): @@ -469,8 +466,6 @@ def test_load_config_rejects_clean_sql_path(tmp_path: Path): clean: sql_path: sql/legacy_clean.sql mart: {} -bq: - dataset: ignored """.strip(), encoding="utf-8", ) @@ -492,8 +487,6 @@ def test_load_config_rejects_mart_sql_dir(tmp_path: Path): clean: {} mart: sql_dir: sql/mart -bq: - dataset: ignored """.strip(), encoding="utf-8", ) diff --git a/tests/test_raw_ext_inference.py b/tests/test_raw_ext_inference.py index d8eb3d2..4126648 100644 --- a/tests/test_raw_ext_inference.py +++ b/tests/test_raw_ext_inference.py @@ -32,14 +32,16 @@ def _fake_fetch_payload(_stype: str, _client: dict, _formatted_args: dict): monkeypatch.setattr("toolkit.raw.run._fetch_payload", _fake_fetch_payload) raw_cfg = { - "source": { - "name": "my_source", - "type": "http_file", - "args": { - "url": "https://example.org/dataset.csv.php", - "filename": "forced_name.data", - }, - } + "sources": [ + { + "name": "my_source", + "type": "http_file", + "args": { + "url": "https://example.org/dataset.csv.php", + "filename": "forced_name.data", + }, + } + ] } run_raw("demo", 2024, str(tmp_path), raw_cfg, _NoopLogger()) @@ -61,11 +63,13 @@ def _fake_fetch_payload(_stype: str, _client: dict, _formatted_args: dict): existing.write_bytes(b"old-content\n") raw_cfg = { - "source": { - "name": "my_source", - "type": "http_file", - "args": {"url": "https://example.org/file.csv", "filename": "file.csv"}, - } + "sources": [ + { + "name": "my_source", + "type": "http_file", + "args": {"url": "https://example.org/file.csv", "filename": "file.csv"}, + } + ] } run_raw("demo", 2024, str(tmp_path), raw_cfg, _NoopLogger()) @@ -82,11 +86,13 @@ def _fake_fetch_payload(_stype: str, _client: dict, _formatted_args: dict): monkeypatch.setattr("toolkit.raw.run._fetch_payload", _fake_fetch_payload) raw_cfg = { - "source": { - "name": "primary_source", - "type": "http_file", - "args": {"url": "https://example.org/manifest.csv", "filename": "manifest.csv"}, - } + "sources": [ + { + "name": "primary_source", + "type": "http_file", + "args": {"url": "https://example.org/manifest.csv", "filename": "manifest.csv"}, + } + ] } run_raw("demo", 2024, str(tmp_path), raw_cfg, _NoopLogger(), run_id="run-123") @@ -111,11 +117,13 @@ def _fake_fetch_payload(_stype: str, _client: dict, _formatted_args: dict): monkeypatch.setattr("toolkit.raw.run._fetch_payload", _fake_fetch_payload) raw_cfg = { - "source": { - "name": "my_source", - "type": "http_file", - "args": {"url": "https://example.org/file.csv", "filename": "file.csv"}, - } + "sources": [ + { + "name": "my_source", + "type": "http_file", + "args": {"url": "https://example.org/file.csv", "filename": "file.csv"}, + } + ] } run_raw("demo", 2024, str(tmp_path), raw_cfg, _NoopLogger(), run_id="run-1") @@ -140,11 +148,13 @@ def _fake_fetch_payload(_stype: str, _client: dict, _formatted_args: dict): raw_cfg = { "output_policy": "overwrite", - "source": { - "name": "my_source", - "type": "http_file", - "args": {"url": "https://example.org/file.csv", "filename": "file.csv"}, - }, + "sources": [ + { + "name": "my_source", + "type": "http_file", + "args": {"url": "https://example.org/file.csv", "filename": "file.csv"}, + } + ], } run_raw("demo", 2024, str(tmp_path), raw_cfg, _NoopLogger(), run_id="run-1") diff --git a/toolkit/core/config.py b/toolkit/core/config.py index 106712d..693b67b 100644 --- a/toolkit/core/config.py +++ b/toolkit/core/config.py @@ -27,7 +27,6 @@ class ToolkitConfig: config: dict[str, Any] validation: dict[str, Any] output: dict[str, Any] - bq: dict[str, Any] | None def resolve(self, rel_path: str | Path) -> Path: p = Path(rel_path) @@ -45,14 +44,6 @@ def ensure_str_list(value: Any, field_name: str) -> list[str]: return _ensure_str_list(value, field_name) -def _compat_raw(model: ToolkitConfigModel) -> dict[str, Any]: - raw = model.raw.model_dump(mode="python", exclude_none=True, exclude_unset=True) - sources = raw.get("sources") or [] - if sources and "source" not in raw: - raw["source"] = dict(sources[0]) - return raw - - def _compat_clean(model: ToolkitConfigModel) -> dict[str, Any]: return model.clean.model_dump( mode="python", @@ -88,12 +79,11 @@ def load_config(path: str | Path, *, strict_config: bool = False) -> ToolkitConf root_source=model.root_source, dataset=model.dataset.name, years=list(model.dataset.years), - raw=_compat_raw(model), + raw=model.raw.model_dump(mode="python", exclude_none=True, exclude_unset=True), clean=_compat_clean(model), mart=_compat_mart(model), cross_year=_compat_cross_year(model), config=model.config.model_dump(mode="python"), validation=model.validation.model_dump(mode="python"), output=model.output.model_dump(mode="python"), - bq=model.bq, ) diff --git a/toolkit/core/config_models.py b/toolkit/core/config_models.py index da2780e..f0261fc 100644 --- a/toolkit/core/config_models.py +++ b/toolkit/core/config_models.py @@ -26,13 +26,6 @@ class ConfigDeprecation: _CONFIG_DEPRECATIONS: dict[str, ConfigDeprecation] = { - "bq": ConfigDeprecation( - code="DCL008", - legacy="bq", - replacement="remove field", - status="ignored", - message="bq is deprecated/ignored, usare remove field", - ), "unknown.top_level": ConfigDeprecation( code="DCL009", legacy="unknown top-level keys", @@ -417,7 +410,6 @@ class ToolkitConfigModel(BaseModel): config: ConfigPolicy = Field(default_factory=ConfigPolicy) validation: GlobalValidationConfig = Field(default_factory=GlobalValidationConfig) output: OutputConfig = Field(default_factory=OutputConfig) - bq: dict[str, Any] | None = None def _err(msg: str, *, path: Path) -> ValueError: @@ -641,7 +633,6 @@ def _declared_model_keys(model_cls: type[BaseModel]) -> set[str]: "config", "validation", "output", - "bq", } _RAW_ALLOWED_KEYS = _declared_model_keys(RawConfig) _CLEAN_ALLOWED_KEYS = _declared_model_keys(CleanConfig) @@ -669,9 +660,6 @@ def _normalize_legacy_payload( if isinstance(mart, dict): normalized["mart"] = dict(mart) - if "bq" in normalized: - _emit_deprecation_notice("bq", strict_config=strict_config, path=path) - return normalized @@ -684,6 +672,8 @@ def _warn_or_reject_unknown_keys( normalized = dict(data) top_level_extras = [key for key in normalized.keys() if key not in _TOP_LEVEL_ALLOWED_KEYS] + if "bq" in top_level_extras: + raise _err("bq is no longer supported; remove field", path=path) if top_level_extras: _emit_unknown_keys_notice( "unknown.top_level", diff --git a/toolkit/raw/run.py b/toolkit/raw/run.py index e2b5e91..dce6a9c 100644 --- a/toolkit/raw/run.py +++ b/toolkit/raw/run.py @@ -148,18 +148,14 @@ def run_raw( ): """ Supporta: - - legacy: - raw: - source: {type, client, args} - - nuovo: - raw: - extractor: {type, args} # default extractor - sources: - - name: ... - type: ... - client: ... - args: ... - extractor: {type, args} # override per source + raw: + extractor: {type, args} # default extractor + sources: + - name: ... + type: ... + client: ... + args: ... + extractor: {type, args} # override per source """ register_builtin_plugins(strict=strict_plugins) @@ -172,12 +168,9 @@ def run_raw( default_extractor_fn, default_extractor_args = get_extractor(default_extractor_spec) output_policy = str(raw_cfg.get("output_policy", "versioned")) - # -------- build sources list (retrocompat) -------- sources = raw_cfg.get("sources") if not sources: - # fallback legacy - legacy = raw_cfg.get("source", {}) - sources = [legacy] + raise ValueError("raw.sources missing or empty in dataset.yml") files_written: list[dict] = [] inputs: list[dict] = []