From e9ef323f3c36c3d1d381b303051bc0b64015c51d Mon Sep 17 00:00:00 2001 From: Nicholas Karlson Date: Tue, 20 Jan 2026 16:19:53 -0800 Subject: [PATCH] Track D: add BYOD normalize skeleton (tables -> normalized) --- src/pystatsv1/cli.py | 68 +++++++++-- src/pystatsv1/trackd/byod.py | 146 ++++++++++++++++++++++++ tests/test_trackd_byod_normalize_cli.py | 49 ++++++++ 3 files changed, 253 insertions(+), 10 deletions(-) create mode 100644 tests/test_trackd_byod_normalize_cli.py diff --git a/src/pystatsv1/cli.py b/src/pystatsv1/cli.py index 47fbeb9..2f4a4b8 100644 --- a/src/pystatsv1/cli.py +++ b/src/pystatsv1/cli.py @@ -127,7 +127,7 @@ def cmd_workbook_init(args: argparse.Namespace) -> int: _extract_track_d_datasets(dest) next_steps = textwrap.dedent( f"""\ - ✅ Track D workbook starter created at: + OK: Track D workbook starter created at: {dest} @@ -146,7 +146,7 @@ def cmd_workbook_init(args: argparse.Namespace) -> int: print( textwrap.dedent( f"""\ - ✅ Workbook starter created at: + OK: Workbook starter created at: {dest} @@ -281,7 +281,7 @@ def cmd_workbook_run(args: argparse.Namespace) -> int: if not script.exists(): print( - "❌ Could not find the script to run.\n" + "ERROR: Could not find the script to run.\n" f" Looking for: {script}\n\n" "Tip: run this inside your workbook folder (created by `pystatsv1 workbook init`).\n" " Or pass --workdir to point at it." @@ -312,7 +312,7 @@ def cmd_workbook_check(args: argparse.Namespace) -> int: if not test_file.exists(): print( - "❌ Could not find the test file to run.\n" + "ERROR: Could not find the test file to run.\n" f" Looking for: {test_file}\n\n" "Tip: run this inside your workbook folder (created by `pystatsv1 workbook init`).\n" " Or pass --workdir to point at it." @@ -349,7 +349,7 @@ def cmd_doctor(args: argparse.Namespace) -> int: in_venv = _in_venv() if not in_venv: print( - "⚠️ You are NOT in a virtual environment. This is OK, but not recommended.\n" + "WARNING: You are NOT in a virtual environment. This is OK, but not recommended.\n" "Create one and activate it first:\n" " python -m venv .venv\n" " source .venv/Scripts/activate # Windows Git Bash\n" @@ -379,7 +379,7 @@ def cmd_doctor(args: argparse.Namespace) -> int: if missing: ok = False print( - "\n❌ Missing packages in this environment:\n - " + "\nERROR: Missing packages in this environment:\n - " + "\n - ".join(missing) + "\n\nInstall the student bundle:\n" " python -m pip install -U pip\n" @@ -388,9 +388,9 @@ def cmd_doctor(args: argparse.Namespace) -> int: if ok: if in_venv: - print("✅ Environment looks good.") + print("OK: Environment looks good.") else: - print("✅ Packages look good (consider using a venv).") + print("OK: Packages look good (consider using a venv).") return 0 return 1 @@ -410,7 +410,7 @@ def cmd_trackd_validate(args: argparse.Namespace) -> int: print( textwrap.dedent( f"""\ - ✅ Track D dataset looks valid. + Track D dataset looks valid. Profile: {args.profile} Data directory: {Path(args.datadir).expanduser()} @@ -435,12 +435,43 @@ def cmd_trackd_byod_init(args: argparse.Namespace) -> int: print( textwrap.dedent( f"""\ - ✅ Track D BYOD project created at:\n + Track D BYOD project created at:\n {root}\n Next steps:\n 1) cd {root}\n 2) Fill in the required CSVs in tables/\n 3) pystatsv1 trackd validate --datadir tables --profile {args.profile}\n """ ).rstrip() ) return 0 + + +def cmd_trackd_byod_normalize(args: argparse.Namespace) -> int: + from pystatsv1.trackd import TrackDDataError, TrackDSchemaError + from pystatsv1.trackd.byod import normalize_byod_project + + try: + report = normalize_byod_project(args.project, profile=args.profile) + except (TrackDDataError, TrackDSchemaError) as e: + print(str(e)) + return 1 + + files = report.get("files", []) + written = "\n".join(f" - {Path(f['dst']).name}" for f in files) + + print( + textwrap.dedent( + f"""\ + Track D BYOD normalization complete. + + Profile: {report.get('profile')} + Project: {report.get('project')} + Input tables: {report.get('tables_dir')} + Output normalized: {report.get('normalized_dir')} + Wrote:\n{written} + """ + ).rstrip() + ) + return 0 + + def build_parser() -> argparse.ArgumentParser: p = argparse.ArgumentParser( prog="pystatsv1", @@ -560,6 +591,23 @@ def build_parser() -> argparse.ArgumentParser: ) p_byod_init.set_defaults(func=cmd_trackd_byod_init) + p_byod_norm = byod_sub.add_parser( + "normalize", + help="Normalize BYOD tables/ into canonical normalized/ outputs (Phase 2 skeleton).", + ) + p_byod_norm.add_argument( + "--project", + required=True, + help="Path to a BYOD project folder created by 'pystatsv1 trackd byod init'.", + ) + p_byod_norm.add_argument( + "--profile", + default=None, + choices=["core_gl", "ar", "full"], + help="Override profile (default: read from config.toml).", + ) + p_byod_norm.set_defaults(func=cmd_trackd_byod_normalize) + return p diff --git a/src/pystatsv1/trackd/byod.py b/src/pystatsv1/trackd/byod.py index 7faa007..74b3557 100644 --- a/src/pystatsv1/trackd/byod.py +++ b/src/pystatsv1/trackd/byod.py @@ -15,12 +15,93 @@ import csv import textwrap from pathlib import Path +from typing import Any from ._errors import TrackDDataError from ._types import PathLike from .contracts import ALLOWED_PROFILES, schemas_for_profile +def _read_trackd_config(project_root: Path) -> dict[str, str]: + """Read a tiny subset of config.toml. + + The BYOD config is intentionally minimal (and write-only in the early PRs). + We parse just enough here to support normalization: + + - [trackd].profile + - [trackd].tables_dir + + Notes + ----- + - We avoid adding a TOML dependency (Python 3.10). + - Unknown keys are ignored. + """ + + cfg_path = project_root / "config.toml" + if not cfg_path.exists(): + return {} + + section: str | None = None + out: dict[str, str] = {} + + for raw in cfg_path.read_text(encoding="utf-8").splitlines(): + line = raw.strip() + if not line or line.startswith("#"): + continue + if line.startswith("[") and line.endswith("]"): + section = line.strip("[]").strip() + continue + if section != "trackd" or "=" not in line: + continue + + k, v = line.split("=", 1) + key = k.strip() + val = v.strip().strip('"').strip("'") + if key in {"profile", "tables_dir"}: + out[key] = val + + return out + + +def _normalize_csv( + src: Path, dst: Path, *, required_columns: tuple[str, ...] +) -> dict[str, Any]: + """Write a normalized CSV with canonical column order. + + - required columns appear first, in contract order + - any extra columns are preserved, appended in their original order + """ + + with src.open("r", newline="", encoding="utf-8-sig") as f_in: + reader = csv.DictReader(f_in) + if not reader.fieldnames: + # This should be caught by validate(), but keep a friendly message. + raise TrackDDataError(f"CSV appears to have no header row: {src.name}") + + fieldnames = [str(c) for c in reader.fieldnames if c is not None] + required = list(required_columns) + required_set = set(required) + extras = [c for c in fieldnames if c not in required_set] + out_fields = required + extras + + dst.parent.mkdir(parents=True, exist_ok=True) + with dst.open("w", newline="", encoding="utf-8") as f_out: + writer = csv.DictWriter(f_out, fieldnames=out_fields) + writer.writeheader() + n_rows = 0 + for row in reader: + out_row = {k: (row.get(k) or "") for k in out_fields} + writer.writerow(out_row) + n_rows += 1 + + return { + "src": str(src), + "dst": str(dst), + "written_rows": n_rows, + "written_columns": out_fields, + } + + def init_byod_project(dest: PathLike, *, profile: str = "core_gl", force: bool = False) -> Path: """Create a Track D BYOD project folder. @@ -122,3 +203,68 @@ def init_byod_project(dest: PathLike, *, profile: str = "core_gl", force: bool = (root / "README.md").write_text(readme, encoding="utf-8") return root + + +def normalize_byod_project(project: PathLike, *, profile: str | None = None) -> dict[str, Any]: + """Normalize BYOD project tables into ``normalized/`` outputs. + + This is a *Phase 2 skeleton* implementation: + - validates required files + required columns (headers) + - re-writes CSVs in canonical contract column order + + Parameters + ---------- + project: + BYOD project root (created by :func:`init_byod_project`). + profile: + Optional override. If omitted, uses ``config.toml``. + + Returns + ------- + dict + Report dict with keys: ok, profile, project, tables_dir, normalized_dir, files. + """ + + from .validate import validate_dataset + + root = Path(project).expanduser().resolve() + if not root.exists() or not root.is_dir(): + raise TrackDDataError(f"Project directory not found: {root}") + + cfg = _read_trackd_config(root) + p = (profile or cfg.get("profile") or "").strip().lower() + if not p: + raise TrackDDataError( + f"Missing profile for BYOD project: {root}\n" + "Fix: pass --profile or create the project with 'pystatsv1 trackd byod init'." + ) + + tables_rel = cfg.get("tables_dir", "tables") + tables_dir = (root / tables_rel).resolve() + if not tables_dir.exists() or not tables_dir.is_dir(): + raise TrackDDataError( + f"Tables directory not found: {tables_dir}\n" + "Hint: your BYOD project should contain a 'tables/' folder." + ) + + # Validate required schema issues first, so normalization can assume headers exist. + validate_dataset(tables_dir, profile=p) + + schemas = schemas_for_profile(p) + out_dir = root / "normalized" + out_dir.mkdir(parents=True, exist_ok=True) + + files: list[dict[str, Any]] = [] + for schema in schemas: + src = tables_dir / schema.name + dst = out_dir / schema.name + files.append(_normalize_csv(src, dst, required_columns=schema.required_columns)) + + return { + "ok": True, + "profile": p, + "project": str(root), + "tables_dir": str(tables_dir), + "normalized_dir": str(out_dir), + "files": files, + } diff --git a/tests/test_trackd_byod_normalize_cli.py b/tests/test_trackd_byod_normalize_cli.py new file mode 100644 index 0000000..d6528e3 --- /dev/null +++ b/tests/test_trackd_byod_normalize_cli.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +from pathlib import Path + +from pystatsv1.cli import main + + +def test_trackd_byod_normalize_writes_canonical_outputs(tmp_path: Path, capsys) -> None: + proj = tmp_path / "byod" + + rc_init = main(["trackd", "byod", "init", "--dest", str(proj), "--profile", "core_gl"]) + assert rc_init == 0 + + # Write valid inputs, but scramble column order and add an extra column. + (proj / "tables" / "chart_of_accounts.csv").write_text( + "account_type,account_name,account_id,normal_side,extra\n" + "asset,Cash,1,debit,x\n", + encoding="utf-8", + ) + + (proj / "tables" / "gl_journal.csv").write_text( + "credit,debit,account_id,description,doc_id,date,txn_id,extra\n" + "0,100,1,Example,d1,2025-01-01,t1,y\n", + encoding="utf-8", + ) + + rc = main(["trackd", "byod", "normalize", "--project", str(proj)]) + out = capsys.readouterr().out + + assert rc == 0 + assert "normalization complete" in out.lower() + + coa_out = (proj / "normalized" / "chart_of_accounts.csv").read_text(encoding="utf-8").splitlines()[0] + gl_out = (proj / "normalized" / "gl_journal.csv").read_text(encoding="utf-8").splitlines()[0] + + assert coa_out == "account_id,account_name,account_type,normal_side,extra" + assert gl_out == "txn_id,date,doc_id,description,account_id,debit,credit,extra" + + +def test_trackd_byod_normalize_requires_config_or_profile(tmp_path: Path, capsys) -> None: + proj = tmp_path / "byod" + proj.mkdir() + (proj / "tables").mkdir() + + rc = main(["trackd", "byod", "normalize", "--project", str(proj)]) + out = capsys.readouterr().out + + assert rc == 1 + assert "missing profile" in out.lower()