From e9ef323f3c36c3d1d381b303051bc0b64015c51d Mon Sep 17 00:00:00 2001
From: Nicholas Karlson <nicholaskarlson@gmail.com>
Date: Tue, 20 Jan 2026 16:19:53 -0800
Subject: [PATCH] Track D: add BYOD normalize skeleton (tables -> normalized)

---
 src/pystatsv1/cli.py                    |  68 +++++++++--
 src/pystatsv1/trackd/byod.py            | 146 ++++++++++++++++++++++++
 tests/test_trackd_byod_normalize_cli.py |  49 ++++++++
 3 files changed, 253 insertions(+), 10 deletions(-)
 create mode 100644 tests/test_trackd_byod_normalize_cli.py

diff --git a/src/pystatsv1/cli.py b/src/pystatsv1/cli.py
index 47fbeb9..2f4a4b8 100644
--- a/src/pystatsv1/cli.py
+++ b/src/pystatsv1/cli.py
@@ -127,7 +127,7 @@ def cmd_workbook_init(args: argparse.Namespace) -> int:
         _extract_track_d_datasets(dest)
         next_steps = textwrap.dedent(
             f"""\
-            ✅ Track D workbook starter created at:
+            OK: Track D workbook starter created at:
 
                 {dest}
 
@@ -146,7 +146,7 @@ def cmd_workbook_init(args: argparse.Namespace) -> int:
     print(
         textwrap.dedent(
             f"""\
-            ✅ Workbook starter created at:
+            OK: Workbook starter created at:
 
                 {dest}
 
@@ -281,7 +281,7 @@ def cmd_workbook_run(args: argparse.Namespace) -> int:
 
     if not script.exists():
         print(
-            "❌ Could not find the script to run.\n"
+            "ERROR: Could not find the script to run.\n"
             f"   Looking for: {script}\n\n"
             "Tip: run this inside your workbook folder (created by `pystatsv1 workbook init`).\n"
             "     Or pass --workdir to point at it."
@@ -312,7 +312,7 @@ def cmd_workbook_check(args: argparse.Namespace) -> int:
 
     if not test_file.exists():
         print(
-            "❌ Could not find the test file to run.\n"
+            "ERROR: Could not find the test file to run.\n"
             f"   Looking for: {test_file}\n\n"
             "Tip: run this inside your workbook folder (created by `pystatsv1 workbook init`).\n"
             "     Or pass --workdir to point at it."
@@ -349,7 +349,7 @@ def cmd_doctor(args: argparse.Namespace) -> int:
     in_venv = _in_venv()
     if not in_venv:
         print(
-            "⚠️  You are NOT in a virtual environment. This is OK, but not recommended.\n"
+            "WARNING: You are NOT in a virtual environment. This is OK, but not recommended.\n"
             "Create one and activate it first:\n"
             "  python -m venv .venv\n"
             "  source .venv/Scripts/activate   # Windows Git Bash\n"
@@ -379,7 +379,7 @@ def cmd_doctor(args: argparse.Namespace) -> int:
     if missing:
         ok = False
         print(
-            "\n❌ Missing packages in this environment:\n  - "
+            "\nERROR: Missing packages in this environment:\n  - "
             + "\n  - ".join(missing)
             + "\n\nInstall the student bundle:\n"
             "  python -m pip install -U pip\n"
@@ -388,9 +388,9 @@ def cmd_doctor(args: argparse.Namespace) -> int:
 
     if ok:
         if in_venv:
-            print("✅ Environment looks good.")
+            print("OK: Environment looks good.")
         else:
-            print("✅ Packages look good (consider using a venv).")
+            print("OK: Packages look good (consider using a venv).")
         return 0
 
     return 1
@@ -410,7 +410,7 @@ def cmd_trackd_validate(args: argparse.Namespace) -> int:
     print(
         textwrap.dedent(
             f"""\
-            ✅ Track D dataset looks valid.
+            Track D dataset looks valid.
 
             Profile: {args.profile}
             Data directory: {Path(args.datadir).expanduser()}
@@ -435,12 +435,43 @@ def cmd_trackd_byod_init(args: argparse.Namespace) -> int:
     print(
         textwrap.dedent(
             f"""\
-            ✅ Track D BYOD project created at:\n
+            Track D BYOD project created at:\n
                 {root}\n
             Next steps:\n              1) cd {root}\n              2) Fill in the required CSVs in tables/\n              3) pystatsv1 trackd validate --datadir tables --profile {args.profile}\n            """
         ).rstrip()
     )
     return 0
+
+
+def cmd_trackd_byod_normalize(args: argparse.Namespace) -> int:
+    from pystatsv1.trackd import TrackDDataError, TrackDSchemaError
+    from pystatsv1.trackd.byod import normalize_byod_project
+
+    try:
+        report = normalize_byod_project(args.project, profile=args.profile)
+    except (TrackDDataError, TrackDSchemaError) as e:
+        print(str(e))
+        return 1
+
+    files = report.get("files", [])
+    written = "\n".join(f"  - {Path(f['dst']).name}" for f in files)
+
+    print(
+        textwrap.dedent(
+            f"""\
+            Track D BYOD normalization complete.
+
+            Profile: {report.get('profile')}
+            Project: {report.get('project')}
+            Input tables: {report.get('tables_dir')}
+            Output normalized: {report.get('normalized_dir')}
+            Wrote:\n{written}
+            """
+        ).rstrip()
+    )
+    return 0
+
+
 def build_parser() -> argparse.ArgumentParser:
     p = argparse.ArgumentParser(
         prog="pystatsv1",
@@ -560,6 +591,23 @@ def build_parser() -> argparse.ArgumentParser:
     )
     p_byod_init.set_defaults(func=cmd_trackd_byod_init)
 
+    p_byod_norm = byod_sub.add_parser(
+        "normalize",
+        help="Normalize BYOD tables/ into canonical normalized/ outputs (Phase 2 skeleton).",
+    )
+    p_byod_norm.add_argument(
+        "--project",
+        required=True,
+        help="Path to a BYOD project folder created by 'pystatsv1 trackd byod init'.",
+    )
+    p_byod_norm.add_argument(
+        "--profile",
+        default=None,
+        choices=["core_gl", "ar", "full"],
+        help="Override profile (default: read from config.toml).",
+    )
+    p_byod_norm.set_defaults(func=cmd_trackd_byod_normalize)
+
 
     return p
 
diff --git a/src/pystatsv1/trackd/byod.py b/src/pystatsv1/trackd/byod.py
index 7faa007..74b3557 100644
--- a/src/pystatsv1/trackd/byod.py
+++ b/src/pystatsv1/trackd/byod.py
@@ -15,12 +15,93 @@
 import csv
 import textwrap
 from pathlib import Path
+from typing import Any
 
 from ._errors import TrackDDataError
 from ._types import PathLike
 from .contracts import ALLOWED_PROFILES, schemas_for_profile
 
 
+def _read_trackd_config(project_root: Path) -> dict[str, str]:
+    """Read a tiny subset of config.toml.
+
+    The BYOD config is intentionally minimal (and write-only in the early PRs).
+    We parse just enough here to support normalization:
+
+    - [trackd].profile
+    - [trackd].tables_dir
+
+    Notes
+    -----
+    - We avoid adding a TOML dependency (Python 3.10).
+    - Unknown keys are ignored.
+    """
+
+    cfg_path = project_root / "config.toml"
+    if not cfg_path.exists():
+        return {}
+
+    section: str | None = None
+    out: dict[str, str] = {}
+
+    for raw in cfg_path.read_text(encoding="utf-8").splitlines():
+        line = raw.strip()
+        if not line or line.startswith("#"):
+            continue
+        if line.startswith("[") and line.endswith("]"):
+            section = line.strip("[]").strip()
+            continue
+        if section != "trackd" or "=" not in line:
+            continue
+
+        k, v = line.split("=", 1)
+        key = k.strip()
+        val = v.strip().strip('"').strip("'")
+        if key in {"profile", "tables_dir"}:
+            out[key] = val
+
+    return out
+
+
+def _normalize_csv(
+    src: Path, dst: Path, *, required_columns: tuple[str, ...]
+) -> dict[str, Any]:
+    """Write a normalized CSV with canonical column order.
+
+    - required columns appear first, in contract order
+    - any extra columns are preserved, appended in their original order
+    """
+
+    with src.open("r", newline="", encoding="utf-8-sig") as f_in:
+        reader = csv.DictReader(f_in)
+        if not reader.fieldnames:
+            # This should be caught by validate(), but keep a friendly message.
+            raise TrackDDataError(f"CSV appears to have no header row: {src.name}")
+
+        fieldnames = [str(c) for c in reader.fieldnames if c is not None]
+        required = list(required_columns)
+        required_set = set(required)
+        extras = [c for c in fieldnames if c not in required_set]
+        out_fields = required + extras
+
+        dst.parent.mkdir(parents=True, exist_ok=True)
+        with dst.open("w", newline="", encoding="utf-8") as f_out:
+            writer = csv.DictWriter(f_out, fieldnames=out_fields)
+            writer.writeheader()
+            n_rows = 0
+            for row in reader:
+                out_row = {k: (row.get(k) or "") for k in out_fields}
+                writer.writerow(out_row)
+                n_rows += 1
+
+    return {
+        "src": str(src),
+        "dst": str(dst),
+        "written_rows": n_rows,
+        "written_columns": out_fields,
+    }
+
+
 def init_byod_project(dest: PathLike, *, profile: str = "core_gl", force: bool = False) -> Path:
     """Create a Track D BYOD project folder.
 
@@ -122,3 +203,68 @@ def init_byod_project(dest: PathLike, *, profile: str = "core_gl", force: bool =
 
     (root / "README.md").write_text(readme, encoding="utf-8")
     return root
+
+
+def normalize_byod_project(project: PathLike, *, profile: str | None = None) -> dict[str, Any]:
+    """Normalize BYOD project tables into ``normalized/`` outputs.
+
+    This is a *Phase 2 skeleton* implementation:
+    - validates required files + required columns (headers)
+    - re-writes CSVs in canonical contract column order
+
+    Parameters
+    ----------
+    project:
+        BYOD project root (created by :func:`init_byod_project`).
+    profile:
+        Optional override. If omitted, uses ``config.toml``.
+
+    Returns
+    -------
+    dict
+        Report dict with keys: ok, profile, project, tables_dir, normalized_dir, files.
+    """
+
+    from .validate import validate_dataset
+
+    root = Path(project).expanduser().resolve()
+    if not root.exists() or not root.is_dir():
+        raise TrackDDataError(f"Project directory not found: {root}")
+
+    cfg = _read_trackd_config(root)
+    p = (profile or cfg.get("profile") or "").strip().lower()
+    if not p:
+        raise TrackDDataError(
+            f"Missing profile for BYOD project: {root}\n"
+            "Fix: pass --profile <core_gl|ar|full> or create the project with 'pystatsv1 trackd byod init'."
+        )
+
+    tables_rel = cfg.get("tables_dir", "tables")
+    tables_dir = (root / tables_rel).resolve()
+    if not tables_dir.exists() or not tables_dir.is_dir():
+        raise TrackDDataError(
+            f"Tables directory not found: {tables_dir}\n"
+            "Hint: your BYOD project should contain a 'tables/' folder."
+        )
+
+    # Validate required schema issues first, so normalization can assume headers exist.
+    validate_dataset(tables_dir, profile=p)
+
+    schemas = schemas_for_profile(p)
+    out_dir = root / "normalized"
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    files: list[dict[str, Any]] = []
+    for schema in schemas:
+        src = tables_dir / schema.name
+        dst = out_dir / schema.name
+        files.append(_normalize_csv(src, dst, required_columns=schema.required_columns))
+
+    return {
+        "ok": True,
+        "profile": p,
+        "project": str(root),
+        "tables_dir": str(tables_dir),
+        "normalized_dir": str(out_dir),
+        "files": files,
+    }
diff --git a/tests/test_trackd_byod_normalize_cli.py b/tests/test_trackd_byod_normalize_cli.py
new file mode 100644
index 0000000..d6528e3
--- /dev/null
+++ b/tests/test_trackd_byod_normalize_cli.py
@@ -0,0 +1,49 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+from pystatsv1.cli import main
+
+
+def test_trackd_byod_normalize_writes_canonical_outputs(tmp_path: Path, capsys) -> None:
+    proj = tmp_path / "byod"
+
+    rc_init = main(["trackd", "byod", "init", "--dest", str(proj), "--profile", "core_gl"])
+    assert rc_init == 0
+
+    # Write valid inputs, but scramble column order and add an extra column.
+    (proj / "tables" / "chart_of_accounts.csv").write_text(
+        "account_type,account_name,account_id,normal_side,extra\n"
+        "asset,Cash,1,debit,x\n",
+        encoding="utf-8",
+    )
+
+    (proj / "tables" / "gl_journal.csv").write_text(
+        "credit,debit,account_id,description,doc_id,date,txn_id,extra\n"
+        "0,100,1,Example,d1,2025-01-01,t1,y\n",
+        encoding="utf-8",
+    )
+
+    rc = main(["trackd", "byod", "normalize", "--project", str(proj)])
+    out = capsys.readouterr().out
+
+    assert rc == 0
+    assert "normalization complete" in out.lower()
+
+    coa_out = (proj / "normalized" / "chart_of_accounts.csv").read_text(encoding="utf-8").splitlines()[0]
+    gl_out = (proj / "normalized" / "gl_journal.csv").read_text(encoding="utf-8").splitlines()[0]
+
+    assert coa_out == "account_id,account_name,account_type,normal_side,extra"
+    assert gl_out == "txn_id,date,doc_id,description,account_id,debit,credit,extra"
+
+
+def test_trackd_byod_normalize_requires_config_or_profile(tmp_path: Path, capsys) -> None:
+    proj = tmp_path / "byod"
+    proj.mkdir()
+    (proj / "tables").mkdir()
+
+    rc = main(["trackd", "byod", "normalize", "--project", str(proj)])
+    out = capsys.readouterr().out
+
+    assert rc == 1
+    assert "missing profile" in out.lower()