Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions toolkit/core/config_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

logger = logging.getLogger("toolkit.core.config")
_MANAGED_OUTPUT_ROOTS = {"_smoke_out", "_test_out"}
_SAFE_SQL_IDENTIFIER_RE = r"^[A-Za-z_][A-Za-z0-9_]*$"


@dataclass(frozen=True)
Expand Down Expand Up @@ -319,6 +320,21 @@ class MartTableConfig(BaseModel):
name: str
sql: Path

@field_validator("name")
@classmethod
def _validate_name(cls, value: str) -> str:
text = value.strip()
if not text:
raise ValueError("mart.tables[].name must not be empty")
import re

if not re.fullmatch(_SAFE_SQL_IDENTIFIER_RE, text):
raise ValueError(
"mart.tables[].name must be a safe SQL identifier "
"(letters, numbers, underscore; cannot start with a number)"
)
return text


class CrossYearTableConfig(BaseModel):
model_config = ConfigDict(extra="forbid")
Expand Down
9 changes: 9 additions & 0 deletions toolkit/mart/run.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import re
from pathlib import Path
from typing import Any

Expand All @@ -11,6 +12,9 @@
from toolkit.core.template import build_runtime_template_ctx, public_template_ctx, render_template


_CLEAN_INPUT_TOKEN_RE = re.compile(r"\bclean_input\b", re.IGNORECASE)


def _serialize_metadata_path(path: Path | None, rel_root: Path | None) -> str | None:
if path is None:
return None
Expand Down Expand Up @@ -106,6 +110,11 @@ def run_mart(
sql = sql_path.read_text(encoding="utf-8")
sql = render_template(sql, template_ctx)

if not clean_sql_configured and _CLEAN_INPUT_TOKEN_RE.search(sql):
raise ValueError(
"MART SQL references clean_input but clean.sql is not configured in dataset.yml"
)

# Save rendered SQL for audit/debug
rendered_sql_path: Path | None = None
if run_dir is not None:
Expand Down
Loading