Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ All notable changes to this project will be documented in this file.
### Removed

- Legacy config forms below no longer emit deprecation warnings and now fail with explicit config errors:
- `bq`
- `raw.source`
- `raw.sources[].plugin`
- `raw.sources[].id`
Expand Down Expand Up @@ -59,4 +60,3 @@ All notable changes to this project will be documented in this file.
- `raw.sources[].id` in favor of `raw.sources[].name`
- scalar `clean.read` in favor of `clean.read.source`
- `clean.read.csv.*` in favor of `clean.read.*`
- `bq`
3 changes: 1 addition & 2 deletions docs/config-schema.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ I path relativi sono sempre risolti rispetto alla directory che contiene `datase
| `config` | `object` | no | policy parser config |
| `validation` | `object` | no | solo opzioni globali del validation gate |
| `output` | `object` | no | policy artefatti |
| `bq` | `object \| null` | no | accettato ma ignorato, con warning |

## dataset

Expand Down Expand Up @@ -222,7 +221,6 @@ Con `config.strict: true` o `--strict-config`, gli stessi casi diventano errori.

| Code | Legacy | Replacement | Status |
|---|---|---|---|
| `DCL008` | `bq` | rimuovere il campo | ignored |
| `DCL013` | `cross_year.* unknown keys` | rimuovere il campo | ignored |

## Legacy rimosso
Expand All @@ -238,6 +236,7 @@ Le forme seguenti non sono piu supportate. Non generano warning legacy: fallisco
| `clean.read.csv.*` | `clean.read.*` |
| `clean.sql_path` | `clean.sql` |
| `mart.sql_dir` | `mart.tables[].sql` |
| `bq` | rimuovere il campo |

## Esempi minimi

Expand Down
13 changes: 3 additions & 10 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ def test_load_config_normalizes_bool_and_string_list_fields(tmp_path: Path):
assert cfg.mart["validate"]["table_rules"]["mart_ok"]["primary_key"] == ["key_id"]


def test_load_config_warns_on_zombie_field_bq(tmp_path: Path, caplog, monkeypatch):
def test_load_config_rejects_removed_bq_field(tmp_path: Path):
yml = tmp_path / "dataset.yml"
yml.write_text(
"""
Expand All @@ -449,13 +449,10 @@ def test_load_config_warns_on_zombie_field_bq(tmp_path: Path, caplog, monkeypatc
encoding="utf-8",
)

_bind_config_logger(caplog, monkeypatch)

with caplog.at_level(logging.WARNING, logger="toolkit.core.config"):
with pytest.raises(ValueError) as exc:
load_config(yml)

assert "DCL008" in caplog.text
assert "deprecated/ignored, usare remove field" in caplog.text
assert "bq is no longer supported; remove field" in str(exc.value)


def test_load_config_rejects_clean_sql_path(tmp_path: Path):
Expand All @@ -469,8 +466,6 @@ def test_load_config_rejects_clean_sql_path(tmp_path: Path):
clean:
sql_path: sql/legacy_clean.sql
mart: {}
bq:
dataset: ignored
""".strip(),
encoding="utf-8",
)
Expand All @@ -492,8 +487,6 @@ def test_load_config_rejects_mart_sql_dir(tmp_path: Path):
clean: {}
mart:
sql_dir: sql/mart
bq:
dataset: ignored
""".strip(),
encoding="utf-8",
)
Expand Down
66 changes: 38 additions & 28 deletions tests/test_raw_ext_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,16 @@ def _fake_fetch_payload(_stype: str, _client: dict, _formatted_args: dict):
monkeypatch.setattr("toolkit.raw.run._fetch_payload", _fake_fetch_payload)

raw_cfg = {
"source": {
"name": "my_source",
"type": "http_file",
"args": {
"url": "https://example.org/dataset.csv.php",
"filename": "forced_name.data",
},
}
"sources": [
{
"name": "my_source",
"type": "http_file",
"args": {
"url": "https://example.org/dataset.csv.php",
"filename": "forced_name.data",
},
}
]
}

run_raw("demo", 2024, str(tmp_path), raw_cfg, _NoopLogger())
Expand All @@ -61,11 +63,13 @@ def _fake_fetch_payload(_stype: str, _client: dict, _formatted_args: dict):
existing.write_bytes(b"old-content\n")

raw_cfg = {
"source": {
"name": "my_source",
"type": "http_file",
"args": {"url": "https://example.org/file.csv", "filename": "file.csv"},
}
"sources": [
{
"name": "my_source",
"type": "http_file",
"args": {"url": "https://example.org/file.csv", "filename": "file.csv"},
}
]
}

run_raw("demo", 2024, str(tmp_path), raw_cfg, _NoopLogger())
Expand All @@ -82,11 +86,13 @@ def _fake_fetch_payload(_stype: str, _client: dict, _formatted_args: dict):
monkeypatch.setattr("toolkit.raw.run._fetch_payload", _fake_fetch_payload)

raw_cfg = {
"source": {
"name": "primary_source",
"type": "http_file",
"args": {"url": "https://example.org/manifest.csv", "filename": "manifest.csv"},
}
"sources": [
{
"name": "primary_source",
"type": "http_file",
"args": {"url": "https://example.org/manifest.csv", "filename": "manifest.csv"},
}
]
}

run_raw("demo", 2024, str(tmp_path), raw_cfg, _NoopLogger(), run_id="run-123")
Expand All @@ -111,11 +117,13 @@ def _fake_fetch_payload(_stype: str, _client: dict, _formatted_args: dict):
monkeypatch.setattr("toolkit.raw.run._fetch_payload", _fake_fetch_payload)

raw_cfg = {
"source": {
"name": "my_source",
"type": "http_file",
"args": {"url": "https://example.org/file.csv", "filename": "file.csv"},
}
"sources": [
{
"name": "my_source",
"type": "http_file",
"args": {"url": "https://example.org/file.csv", "filename": "file.csv"},
}
]
}

run_raw("demo", 2024, str(tmp_path), raw_cfg, _NoopLogger(), run_id="run-1")
Expand All @@ -140,11 +148,13 @@ def _fake_fetch_payload(_stype: str, _client: dict, _formatted_args: dict):

raw_cfg = {
"output_policy": "overwrite",
"source": {
"name": "my_source",
"type": "http_file",
"args": {"url": "https://example.org/file.csv", "filename": "file.csv"},
},
"sources": [
{
"name": "my_source",
"type": "http_file",
"args": {"url": "https://example.org/file.csv", "filename": "file.csv"},
}
],
}

run_raw("demo", 2024, str(tmp_path), raw_cfg, _NoopLogger(), run_id="run-1")
Expand Down
12 changes: 1 addition & 11 deletions toolkit/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ class ToolkitConfig:
config: dict[str, Any]
validation: dict[str, Any]
output: dict[str, Any]
bq: dict[str, Any] | None

def resolve(self, rel_path: str | Path) -> Path:
p = Path(rel_path)
Expand All @@ -45,14 +44,6 @@ def ensure_str_list(value: Any, field_name: str) -> list[str]:
return _ensure_str_list(value, field_name)


def _compat_raw(model: ToolkitConfigModel) -> dict[str, Any]:
raw = model.raw.model_dump(mode="python", exclude_none=True, exclude_unset=True)
sources = raw.get("sources") or []
if sources and "source" not in raw:
raw["source"] = dict(sources[0])
return raw


def _compat_clean(model: ToolkitConfigModel) -> dict[str, Any]:
return model.clean.model_dump(
mode="python",
Expand Down Expand Up @@ -88,12 +79,11 @@ def load_config(path: str | Path, *, strict_config: bool = False) -> ToolkitConf
root_source=model.root_source,
dataset=model.dataset.name,
years=list(model.dataset.years),
raw=_compat_raw(model),
raw=model.raw.model_dump(mode="python", exclude_none=True, exclude_unset=True),
clean=_compat_clean(model),
mart=_compat_mart(model),
cross_year=_compat_cross_year(model),
config=model.config.model_dump(mode="python"),
validation=model.validation.model_dump(mode="python"),
output=model.output.model_dump(mode="python"),
bq=model.bq,
)
14 changes: 2 additions & 12 deletions toolkit/core/config_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,6 @@ class ConfigDeprecation:


_CONFIG_DEPRECATIONS: dict[str, ConfigDeprecation] = {
"bq": ConfigDeprecation(
code="DCL008",
legacy="bq",
replacement="remove field",
status="ignored",
message="bq is deprecated/ignored, usare remove field",
),
"unknown.top_level": ConfigDeprecation(
code="DCL009",
legacy="unknown top-level keys",
Expand Down Expand Up @@ -417,7 +410,6 @@ class ToolkitConfigModel(BaseModel):
config: ConfigPolicy = Field(default_factory=ConfigPolicy)
validation: GlobalValidationConfig = Field(default_factory=GlobalValidationConfig)
output: OutputConfig = Field(default_factory=OutputConfig)
bq: dict[str, Any] | None = None


def _err(msg: str, *, path: Path) -> ValueError:
Expand Down Expand Up @@ -641,7 +633,6 @@ def _declared_model_keys(model_cls: type[BaseModel]) -> set[str]:
"config",
"validation",
"output",
"bq",
}
_RAW_ALLOWED_KEYS = _declared_model_keys(RawConfig)
_CLEAN_ALLOWED_KEYS = _declared_model_keys(CleanConfig)
Expand Down Expand Up @@ -669,9 +660,6 @@ def _normalize_legacy_payload(
if isinstance(mart, dict):
normalized["mart"] = dict(mart)

if "bq" in normalized:
_emit_deprecation_notice("bq", strict_config=strict_config, path=path)

return normalized


Expand All @@ -684,6 +672,8 @@ def _warn_or_reject_unknown_keys(
normalized = dict(data)

top_level_extras = [key for key in normalized.keys() if key not in _TOP_LEVEL_ALLOWED_KEYS]
if "bq" in top_level_extras:
raise _err("bq is no longer supported; remove field", path=path)
if top_level_extras:
_emit_unknown_keys_notice(
"unknown.top_level",
Expand Down
25 changes: 9 additions & 16 deletions toolkit/raw/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,18 +148,14 @@ def run_raw(
):
"""
Supporta:
- legacy:
raw:
source: {type, client, args}
- nuovo:
raw:
extractor: {type, args} # default extractor
sources:
- name: ...
type: ...
client: ...
args: ...
extractor: {type, args} # override per source
raw:
extractor: {type, args} # default extractor
sources:
- name: ...
type: ...
client: ...
args: ...
extractor: {type, args} # override per source
"""

register_builtin_plugins(strict=strict_plugins)
Expand All @@ -172,12 +168,9 @@ def run_raw(
default_extractor_fn, default_extractor_args = get_extractor(default_extractor_spec)
output_policy = str(raw_cfg.get("output_policy", "versioned"))

# -------- build sources list (retrocompat) --------
sources = raw_cfg.get("sources")
if not sources:
# fallback legacy
legacy = raw_cfg.get("source", {})
sources = [legacy]
raise ValueError("raw.sources missing or empty in dataset.yml")

files_written: list[dict] = []
inputs: list[dict] = []
Expand Down
Loading