dataciviclab · Gabrymi93 · Mar 11, 2026 · Mar 11, 2026 · Mar 11, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,6 +12,7 @@ All notable changes to this project will be documented in this file.
 ### Removed
 
 - Legacy config forms below no longer emit deprecation warnings and now fail with explicit config errors:
+  - `bq`
   - `raw.source`
   - `raw.sources[].plugin`
   - `raw.sources[].id`
@@ -59,4 +60,3 @@ All notable changes to this project will be documented in this file.
 - `raw.sources[].id` in favor of `raw.sources[].name`
 - scalar `clean.read` in favor of `clean.read.source`
 - `clean.read.csv.*` in favor of `clean.read.*`
-- `bq`
diff --git a/docs/config-schema.md b/docs/config-schema.md
@@ -18,7 +18,6 @@ I path relativi sono sempre risolti rispetto alla directory che contiene `datase
 | `config` | `object` | no | policy parser config |
 | `validation` | `object` | no | solo opzioni globali del validation gate |
 | `output` | `object` | no | policy artefatti |
-| `bq` | `object \| null` | no | accettato ma ignorato, con warning |
 
 ## dataset
 
@@ -222,7 +221,6 @@ Con `config.strict: true` o `--strict-config`, gli stessi casi diventano errori.
 
 | Code | Legacy | Replacement | Status |
 |---|---|---|---|
-| `DCL008` | `bq` | rimuovere il campo | ignored |
 | `DCL013` | `cross_year.* unknown keys` | rimuovere il campo | ignored |
 
 ## Legacy rimosso
@@ -238,6 +236,7 @@ Le forme seguenti non sono piu supportate. Non generano warning legacy: fallisco
 | `clean.read.csv.*` | `clean.read.*` |
 | `clean.sql_path` | `clean.sql` |
 | `mart.sql_dir` | `mart.tables[].sql` |
+| `bq` | rimuovere il campo |
 
 ## Esempi minimi
 

diff --git a/tests/test_config.py b/tests/test_config.py
@@ -433,7 +433,7 @@ def test_load_config_normalizes_bool_and_string_list_fields(tmp_path: Path):
     assert cfg.mart["validate"]["table_rules"]["mart_ok"]["primary_key"] == ["key_id"]
 
 
-def test_load_config_warns_on_zombie_field_bq(tmp_path: Path, caplog, monkeypatch):
+def test_load_config_rejects_removed_bq_field(tmp_path: Path):
     yml = tmp_path / "dataset.yml"
     yml.write_text(
         """
@@ -449,13 +449,10 @@ def test_load_config_warns_on_zombie_field_bq(tmp_path: Path, caplog, monkeypatc
         encoding="utf-8",
     )
 
-    _bind_config_logger(caplog, monkeypatch)
-
-    with caplog.at_level(logging.WARNING, logger="toolkit.core.config"):
+    with pytest.raises(ValueError) as exc:
         load_config(yml)
 
-    assert "DCL008" in caplog.text
-    assert "deprecated/ignored, usare remove field" in caplog.text
+    assert "bq is no longer supported; remove field" in str(exc.value)
 
 
 def test_load_config_rejects_clean_sql_path(tmp_path: Path):
@@ -469,8 +466,6 @@ def test_load_config_rejects_clean_sql_path(tmp_path: Path):
 clean:
   sql_path: sql/legacy_clean.sql
 mart: {}
-bq:
-  dataset: ignored
 """.strip(),
         encoding="utf-8",
     )
@@ -492,8 +487,6 @@ def test_load_config_rejects_mart_sql_dir(tmp_path: Path):
 clean: {}
 mart:
   sql_dir: sql/mart
-bq:
-  dataset: ignored
 """.strip(),
         encoding="utf-8",
     )

diff --git a/tests/test_raw_ext_inference.py b/tests/test_raw_ext_inference.py
@@ -32,14 +32,16 @@ def _fake_fetch_payload(_stype: str, _client: dict, _formatted_args: dict):
     monkeypatch.setattr("toolkit.raw.run._fetch_payload", _fake_fetch_payload)
 
     raw_cfg = {
-        "source": {
-            "name": "my_source",
-            "type": "http_file",
-            "args": {
-                "url": "https://example.org/dataset.csv.php",
-                "filename": "forced_name.data",
-            },
-        }
+        "sources": [
+            {
+                "name": "my_source",
+                "type": "http_file",
+                "args": {
+                    "url": "https://example.org/dataset.csv.php",
+                    "filename": "forced_name.data",
+                },
+            }
+        ]
     }
 
     run_raw("demo", 2024, str(tmp_path), raw_cfg, _NoopLogger())
@@ -61,11 +63,13 @@ def _fake_fetch_payload(_stype: str, _client: dict, _formatted_args: dict):
     existing.write_bytes(b"old-content\n")
 
     raw_cfg = {
-        "source": {
-            "name": "my_source",
-            "type": "http_file",
-            "args": {"url": "https://example.org/file.csv", "filename": "file.csv"},
-        }
+        "sources": [
+            {
+                "name": "my_source",
+                "type": "http_file",
+                "args": {"url": "https://example.org/file.csv", "filename": "file.csv"},
+            }
+        ]
     }
 
     run_raw("demo", 2024, str(tmp_path), raw_cfg, _NoopLogger())
@@ -82,11 +86,13 @@ def _fake_fetch_payload(_stype: str, _client: dict, _formatted_args: dict):
     monkeypatch.setattr("toolkit.raw.run._fetch_payload", _fake_fetch_payload)
 
     raw_cfg = {
-        "source": {
-            "name": "primary_source",
-            "type": "http_file",
-            "args": {"url": "https://example.org/manifest.csv", "filename": "manifest.csv"},
-        }
+        "sources": [
+            {
+                "name": "primary_source",
+                "type": "http_file",
+                "args": {"url": "https://example.org/manifest.csv", "filename": "manifest.csv"},
+            }
+        ]
     }
 
     run_raw("demo", 2024, str(tmp_path), raw_cfg, _NoopLogger(), run_id="run-123")
@@ -111,11 +117,13 @@ def _fake_fetch_payload(_stype: str, _client: dict, _formatted_args: dict):
     monkeypatch.setattr("toolkit.raw.run._fetch_payload", _fake_fetch_payload)
 
     raw_cfg = {
-        "source": {
-            "name": "my_source",
-            "type": "http_file",
-            "args": {"url": "https://example.org/file.csv", "filename": "file.csv"},
-        }
+        "sources": [
+            {
+                "name": "my_source",
+                "type": "http_file",
+                "args": {"url": "https://example.org/file.csv", "filename": "file.csv"},
+            }
+        ]
     }
 
     run_raw("demo", 2024, str(tmp_path), raw_cfg, _NoopLogger(), run_id="run-1")
@@ -140,11 +148,13 @@ def _fake_fetch_payload(_stype: str, _client: dict, _formatted_args: dict):
 
     raw_cfg = {
         "output_policy": "overwrite",
-        "source": {
-            "name": "my_source",
-            "type": "http_file",
-            "args": {"url": "https://example.org/file.csv", "filename": "file.csv"},
-        },
+        "sources": [
+            {
+                "name": "my_source",
+                "type": "http_file",
+                "args": {"url": "https://example.org/file.csv", "filename": "file.csv"},
+            }
+        ],
     }
 
     run_raw("demo", 2024, str(tmp_path), raw_cfg, _NoopLogger(), run_id="run-1")

diff --git a/toolkit/core/config.py b/toolkit/core/config.py
@@ -27,7 +27,6 @@ class ToolkitConfig:
     config: dict[str, Any]
     validation: dict[str, Any]
     output: dict[str, Any]
-    bq: dict[str, Any] | None
 
     def resolve(self, rel_path: str | Path) -> Path:
         p = Path(rel_path)
@@ -45,14 +44,6 @@ def ensure_str_list(value: Any, field_name: str) -> list[str]:
     return _ensure_str_list(value, field_name)
 
 
-def _compat_raw(model: ToolkitConfigModel) -> dict[str, Any]:
-    raw = model.raw.model_dump(mode="python", exclude_none=True, exclude_unset=True)
-    sources = raw.get("sources") or []
-    if sources and "source" not in raw:
-        raw["source"] = dict(sources[0])
-    return raw
-
-
 def _compat_clean(model: ToolkitConfigModel) -> dict[str, Any]:
     return model.clean.model_dump(
         mode="python",
@@ -88,12 +79,11 @@ def load_config(path: str | Path, *, strict_config: bool = False) -> ToolkitConf
         root_source=model.root_source,
         dataset=model.dataset.name,
         years=list(model.dataset.years),
-        raw=_compat_raw(model),
+        raw=model.raw.model_dump(mode="python", exclude_none=True, exclude_unset=True),
         clean=_compat_clean(model),
         mart=_compat_mart(model),
         cross_year=_compat_cross_year(model),
         config=model.config.model_dump(mode="python"),
         validation=model.validation.model_dump(mode="python"),
         output=model.output.model_dump(mode="python"),
-        bq=model.bq,
     )
diff --git a/toolkit/core/config_models.py b/toolkit/core/config_models.py
@@ -26,13 +26,6 @@ class ConfigDeprecation:
 
 
 _CONFIG_DEPRECATIONS: dict[str, ConfigDeprecation] = {
-    "bq": ConfigDeprecation(
-        code="DCL008",
-        legacy="bq",
-        replacement="remove field",
-        status="ignored",
-        message="bq is deprecated/ignored, usare remove field",
-    ),
     "unknown.top_level": ConfigDeprecation(
         code="DCL009",
         legacy="unknown top-level keys",
@@ -417,7 +410,6 @@ class ToolkitConfigModel(BaseModel):
     config: ConfigPolicy = Field(default_factory=ConfigPolicy)
     validation: GlobalValidationConfig = Field(default_factory=GlobalValidationConfig)
     output: OutputConfig = Field(default_factory=OutputConfig)
-    bq: dict[str, Any] | None = None
 
 
 def _err(msg: str, *, path: Path) -> ValueError:
@@ -641,7 +633,6 @@ def _declared_model_keys(model_cls: type[BaseModel]) -> set[str]:
     "config",
     "validation",
     "output",
-    "bq",
 }
 _RAW_ALLOWED_KEYS = _declared_model_keys(RawConfig)
 _CLEAN_ALLOWED_KEYS = _declared_model_keys(CleanConfig)
@@ -669,9 +660,6 @@ def _normalize_legacy_payload(
     if isinstance(mart, dict):
         normalized["mart"] = dict(mart)
 
-    if "bq" in normalized:
-        _emit_deprecation_notice("bq", strict_config=strict_config, path=path)
-
     return normalized
 
 
@@ -684,6 +672,8 @@ def _warn_or_reject_unknown_keys(
     normalized = dict(data)
 
     top_level_extras = [key for key in normalized.keys() if key not in _TOP_LEVEL_ALLOWED_KEYS]
+    if "bq" in top_level_extras:
+        raise _err("bq is no longer supported; remove field", path=path)
     if top_level_extras:
         _emit_unknown_keys_notice(
             "unknown.top_level",

diff --git a/toolkit/raw/run.py b/toolkit/raw/run.py
@@ -148,18 +148,14 @@ def run_raw(
 ):
     """
     Supporta:
-    - legacy:
-      raw:
-        source: {type, client, args}
-    - nuovo:
-      raw:
-        extractor: {type, args}   # default extractor
-        sources:
-          - name: ...
-            type: ...
-            client: ...
-            args: ...
-            extractor: {type, args}  # override per source
+    raw:
+      extractor: {type, args}   # default extractor
+      sources:
+        - name: ...
+          type: ...
+          client: ...
+          args: ...
+          extractor: {type, args}  # override per source
     """
 
     register_builtin_plugins(strict=strict_plugins)
@@ -172,12 +168,9 @@ def run_raw(
     default_extractor_fn, default_extractor_args = get_extractor(default_extractor_spec)
     output_policy = str(raw_cfg.get("output_policy", "versioned"))
 
-    # -------- build sources list (retrocompat) --------
     sources = raw_cfg.get("sources")
     if not sources:
-        # fallback legacy
-        legacy = raw_cfg.get("source", {})
-        sources = [legacy]
+        raise ValueError("raw.sources missing or empty in dataset.yml")
 
     files_written: list[dict] = []
     inputs: list[dict] = []