Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 58 additions & 10 deletions src/pystatsv1/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def cmd_workbook_init(args: argparse.Namespace) -> int:
_extract_track_d_datasets(dest)
next_steps = textwrap.dedent(
f"""\
Track D workbook starter created at:
OK: Track D workbook starter created at:

{dest}

Expand All @@ -146,7 +146,7 @@ def cmd_workbook_init(args: argparse.Namespace) -> int:
print(
textwrap.dedent(
f"""\
Workbook starter created at:
OK: Workbook starter created at:

{dest}

Expand Down Expand Up @@ -281,7 +281,7 @@ def cmd_workbook_run(args: argparse.Namespace) -> int:

if not script.exists():
print(
" Could not find the script to run.\n"
"ERROR: Could not find the script to run.\n"
f" Looking for: {script}\n\n"
"Tip: run this inside your workbook folder (created by `pystatsv1 workbook init`).\n"
" Or pass --workdir to point at it."
Expand Down Expand Up @@ -312,7 +312,7 @@ def cmd_workbook_check(args: argparse.Namespace) -> int:

if not test_file.exists():
print(
" Could not find the test file to run.\n"
"ERROR: Could not find the test file to run.\n"
f" Looking for: {test_file}\n\n"
"Tip: run this inside your workbook folder (created by `pystatsv1 workbook init`).\n"
" Or pass --workdir to point at it."
Expand Down Expand Up @@ -349,7 +349,7 @@ def cmd_doctor(args: argparse.Namespace) -> int:
in_venv = _in_venv()
if not in_venv:
print(
"⚠️ You are NOT in a virtual environment. This is OK, but not recommended.\n"
"WARNING: You are NOT in a virtual environment. This is OK, but not recommended.\n"
"Create one and activate it first:\n"
" python -m venv .venv\n"
" source .venv/Scripts/activate # Windows Git Bash\n"
Expand Down Expand Up @@ -379,7 +379,7 @@ def cmd_doctor(args: argparse.Namespace) -> int:
if missing:
ok = False
print(
"\n❌ Missing packages in this environment:\n - "
"\nERROR: Missing packages in this environment:\n - "
+ "\n - ".join(missing)
+ "\n\nInstall the student bundle:\n"
" python -m pip install -U pip\n"
Expand All @@ -388,9 +388,9 @@ def cmd_doctor(args: argparse.Namespace) -> int:

if ok:
if in_venv:
print(" Environment looks good.")
print("OK: Environment looks good.")
else:
print(" Packages look good (consider using a venv).")
print("OK: Packages look good (consider using a venv).")
return 0

return 1
Expand All @@ -410,7 +410,7 @@ def cmd_trackd_validate(args: argparse.Namespace) -> int:
print(
textwrap.dedent(
f"""\
Track D dataset looks valid.
Track D dataset looks valid.

Profile: {args.profile}
Data directory: {Path(args.datadir).expanduser()}
Expand All @@ -435,12 +435,43 @@ def cmd_trackd_byod_init(args: argparse.Namespace) -> int:
print(
textwrap.dedent(
f"""\
Track D BYOD project created at:\n
Track D BYOD project created at:\n
{root}\n
Next steps:\n 1) cd {root}\n 2) Fill in the required CSVs in tables/\n 3) pystatsv1 trackd validate --datadir tables --profile {args.profile}\n """
).rstrip()
)
return 0


def cmd_trackd_byod_normalize(args: argparse.Namespace) -> int:
from pystatsv1.trackd import TrackDDataError, TrackDSchemaError
from pystatsv1.trackd.byod import normalize_byod_project

try:
report = normalize_byod_project(args.project, profile=args.profile)
except (TrackDDataError, TrackDSchemaError) as e:
print(str(e))
return 1

files = report.get("files", [])
written = "\n".join(f" - {Path(f['dst']).name}" for f in files)

print(
textwrap.dedent(
f"""\
Track D BYOD normalization complete.

Profile: {report.get('profile')}
Project: {report.get('project')}
Input tables: {report.get('tables_dir')}
Output normalized: {report.get('normalized_dir')}
Wrote:\n{written}
"""
).rstrip()
)
return 0


def build_parser() -> argparse.ArgumentParser:
p = argparse.ArgumentParser(
prog="pystatsv1",
Expand Down Expand Up @@ -560,6 +591,23 @@ def build_parser() -> argparse.ArgumentParser:
)
p_byod_init.set_defaults(func=cmd_trackd_byod_init)

p_byod_norm = byod_sub.add_parser(
"normalize",
help="Normalize BYOD tables/ into canonical normalized/ outputs (Phase 2 skeleton).",
)
p_byod_norm.add_argument(
"--project",
required=True,
help="Path to a BYOD project folder created by 'pystatsv1 trackd byod init'.",
)
p_byod_norm.add_argument(
"--profile",
default=None,
choices=["core_gl", "ar", "full"],
help="Override profile (default: read from config.toml).",
)
p_byod_norm.set_defaults(func=cmd_trackd_byod_normalize)


return p

Expand Down
146 changes: 146 additions & 0 deletions src/pystatsv1/trackd/byod.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,93 @@
import csv
import textwrap
from pathlib import Path
from typing import Any

from ._errors import TrackDDataError
from ._types import PathLike
from .contracts import ALLOWED_PROFILES, schemas_for_profile


def _read_trackd_config(project_root: Path) -> dict[str, str]:
"""Read a tiny subset of config.toml.

The BYOD config is intentionally minimal (and write-only in the early PRs).
We parse just enough here to support normalization:

- [trackd].profile
- [trackd].tables_dir

Notes
-----
- We avoid adding a TOML dependency (Python 3.10).
- Unknown keys are ignored.
"""

cfg_path = project_root / "config.toml"
if not cfg_path.exists():
return {}

section: str | None = None
out: dict[str, str] = {}

for raw in cfg_path.read_text(encoding="utf-8").splitlines():
line = raw.strip()
if not line or line.startswith("#"):
continue
if line.startswith("[") and line.endswith("]"):
section = line.strip("[]").strip()
continue
if section != "trackd" or "=" not in line:
continue

k, v = line.split("=", 1)
key = k.strip()
val = v.strip().strip('"').strip("'")
if key in {"profile", "tables_dir"}:
out[key] = val

return out


def _normalize_csv(
src: Path, dst: Path, *, required_columns: tuple[str, ...]
) -> dict[str, Any]:
"""Write a normalized CSV with canonical column order.

- required columns appear first, in contract order
- any extra columns are preserved, appended in their original order
"""

with src.open("r", newline="", encoding="utf-8-sig") as f_in:
reader = csv.DictReader(f_in)
if not reader.fieldnames:
# This should be caught by validate(), but keep a friendly message.
raise TrackDDataError(f"CSV appears to have no header row: {src.name}")

fieldnames = [str(c) for c in reader.fieldnames if c is not None]
required = list(required_columns)
required_set = set(required)
extras = [c for c in fieldnames if c not in required_set]
out_fields = required + extras

dst.parent.mkdir(parents=True, exist_ok=True)
with dst.open("w", newline="", encoding="utf-8") as f_out:
writer = csv.DictWriter(f_out, fieldnames=out_fields)
writer.writeheader()
n_rows = 0
for row in reader:
out_row = {k: (row.get(k) or "") for k in out_fields}
writer.writerow(out_row)
n_rows += 1

return {
"src": str(src),
"dst": str(dst),
"written_rows": n_rows,
"written_columns": out_fields,
}


def init_byod_project(dest: PathLike, *, profile: str = "core_gl", force: bool = False) -> Path:
"""Create a Track D BYOD project folder.

Expand Down Expand Up @@ -122,3 +203,68 @@ def init_byod_project(dest: PathLike, *, profile: str = "core_gl", force: bool =

(root / "README.md").write_text(readme, encoding="utf-8")
return root


def normalize_byod_project(project: PathLike, *, profile: str | None = None) -> dict[str, Any]:
"""Normalize BYOD project tables into ``normalized/`` outputs.

This is a *Phase 2 skeleton* implementation:
- validates required files + required columns (headers)
- re-writes CSVs in canonical contract column order

Parameters
----------
project:
BYOD project root (created by :func:`init_byod_project`).
profile:
Optional override. If omitted, uses ``config.toml``.

Returns
-------
dict
Report dict with keys: ok, profile, project, tables_dir, normalized_dir, files.
"""

from .validate import validate_dataset

root = Path(project).expanduser().resolve()
if not root.exists() or not root.is_dir():
raise TrackDDataError(f"Project directory not found: {root}")

cfg = _read_trackd_config(root)
p = (profile or cfg.get("profile") or "").strip().lower()
if not p:
raise TrackDDataError(
f"Missing profile for BYOD project: {root}\n"
"Fix: pass --profile <core_gl|ar|full> or create the project with 'pystatsv1 trackd byod init'."
)

tables_rel = cfg.get("tables_dir", "tables")
tables_dir = (root / tables_rel).resolve()
if not tables_dir.exists() or not tables_dir.is_dir():
raise TrackDDataError(
f"Tables directory not found: {tables_dir}\n"
"Hint: your BYOD project should contain a 'tables/' folder."
)

# Validate required schema issues first, so normalization can assume headers exist.
validate_dataset(tables_dir, profile=p)

schemas = schemas_for_profile(p)
out_dir = root / "normalized"
out_dir.mkdir(parents=True, exist_ok=True)

files: list[dict[str, Any]] = []
for schema in schemas:
src = tables_dir / schema.name
dst = out_dir / schema.name
files.append(_normalize_csv(src, dst, required_columns=schema.required_columns))

return {
"ok": True,
"profile": p,
"project": str(root),
"tables_dir": str(tables_dir),
"normalized_dir": str(out_dir),
"files": files,
}
49 changes: 49 additions & 0 deletions tests/test_trackd_byod_normalize_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from __future__ import annotations

from pathlib import Path

from pystatsv1.cli import main


def test_trackd_byod_normalize_writes_canonical_outputs(tmp_path: Path, capsys) -> None:
proj = tmp_path / "byod"

rc_init = main(["trackd", "byod", "init", "--dest", str(proj), "--profile", "core_gl"])
assert rc_init == 0

# Write valid inputs, but scramble column order and add an extra column.
(proj / "tables" / "chart_of_accounts.csv").write_text(
"account_type,account_name,account_id,normal_side,extra\n"
"asset,Cash,1,debit,x\n",
encoding="utf-8",
)

(proj / "tables" / "gl_journal.csv").write_text(
"credit,debit,account_id,description,doc_id,date,txn_id,extra\n"
"0,100,1,Example,d1,2025-01-01,t1,y\n",
encoding="utf-8",
)

rc = main(["trackd", "byod", "normalize", "--project", str(proj)])
out = capsys.readouterr().out

assert rc == 0
assert "normalization complete" in out.lower()

coa_out = (proj / "normalized" / "chart_of_accounts.csv").read_text(encoding="utf-8").splitlines()[0]
gl_out = (proj / "normalized" / "gl_journal.csv").read_text(encoding="utf-8").splitlines()[0]

assert coa_out == "account_id,account_name,account_type,normal_side,extra"
assert gl_out == "txn_id,date,doc_id,description,account_id,debit,credit,extra"


def test_trackd_byod_normalize_requires_config_or_profile(tmp_path: Path, capsys) -> None:
proj = tmp_path / "byod"
proj.mkdir()
(proj / "tables").mkdir()

rc = main(["trackd", "byod", "normalize", "--project", str(proj)])
out = capsys.readouterr().out

assert rc == 1
assert "missing profile" in out.lower()