diff --git a/scripts/d00_peek_data.py b/scripts/d00_peek_data.py new file mode 100644 index 0000000..6b337b2 --- /dev/null +++ b/scripts/d00_peek_data.py @@ -0,0 +1,121 @@ +"""Track D workbook helper: peek at the (canonical) datasets. + +This script is meant to be run inside a Track D workbook folder created by: + + pystatsv1 workbook init --track d + +It looks for the two Track D synthetic datasets under: + + data/synthetic/ledgerlab_ch01/ + data/synthetic/nso_v1/ + +For the Track D student experience, these datasets are intended to be stable and +repeatable (seed=123). + +What it does: +- lists the available CSV tables +- prints shapes + column names +- prints a small preview of each table +- writes a summary report under outputs/track_d/ +""" + +from __future__ import annotations + +import argparse +from pathlib import Path + +import pandas as pd + + +def _preview_csv(path: Path, n: int = 5) -> str: + df = pd.read_csv(path) + head = df.head(n) + return ( + f"{path.name}: rows={len(df)} cols={len(df.columns)}\n" + f"columns: {', '.join(map(str, df.columns))}\n" + f"preview:\n{head.to_string(index=False)}\n" + ) + + +def _peek_dataset(name: str, folder: Path, preview_rows: int) -> tuple[str, list[str]]: + if not folder.exists(): + msg = ( + f"⚠️ Missing dataset folder: {folder}\n" + "If you just created this workbook, you may be on an older PyStatsV1 version.\n" + "Update, then re-run workbook init:\n\n" + " python -m pip install -U pystatsv1\n" + " pystatsv1 workbook init --track d --dest pystatsv1_track_d --force\n" + ) + return msg, [msg] + + csvs = sorted(folder.glob("*.csv")) + if not csvs: + msg = ( + f"⚠️ No CSV files found in: {folder}\n" + "This workbook expects canonical datasets to exist under data/synthetic/.\n" + ) + return msg, [msg] + + lines: list[str] = [] + print(f"\n== {name} ==") + lines.append(f"## {name}\n") + lines.append(f"Folder: {folder}\n") + + for csv in csvs: + block = _preview_csv(csv, n=preview_rows) + print(block) + lines.append(f"### {csv.name}\n") + lines.append("```\n") + lines.append(block.rstrip()) + lines.append("\n```\n") + + return "OK", lines + + +def main(argv: list[str] | None = None) -> int: + p = argparse.ArgumentParser(description="Peek at Track D datasets (seed=123).") + p.add_argument( + "--root", + default="data/synthetic", + help="Dataset root (default: data/synthetic).", + ) + p.add_argument( + "--outdir", + default="outputs/track_d", + help="Where to write the summary report (default: outputs/track_d).", + ) + p.add_argument( + "--preview-rows", + type=int, + default=5, + help="Number of rows to preview per table (default: 5).", + ) + + args = p.parse_args(argv) + + root = Path(args.root) + outdir = Path(args.outdir) + outdir.mkdir(parents=True, exist_ok=True) + + sections: list[str] = [] + sections.append("# Track D dataset peek (seed=123)\n") + + _status, lines = _peek_dataset( + "LedgerLab (Ch01)", root / "ledgerlab_ch01", preview_rows=args.preview_rows + ) + sections.extend(lines) + + _status, lines = _peek_dataset( + "NSO v1 running case", root / "nso_v1", preview_rows=args.preview_rows + ) + sections.extend(lines) + + report = outdir / "d00_peek_data_summary.md" + report.write_text("\n".join(sections).rstrip() + "\n", encoding="utf-8") + + print(f"\n✅ Wrote summary: {report}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/src/pystatsv1/assets/workbook_track_d.zip b/src/pystatsv1/assets/workbook_track_d.zip new file mode 100644 index 0000000..68f95d4 Binary files /dev/null and b/src/pystatsv1/assets/workbook_track_d.zip differ diff --git a/src/pystatsv1/cli.py b/src/pystatsv1/cli.py index 16cc70e..71ad88c 100644 --- a/src/pystatsv1/cli.py +++ b/src/pystatsv1/cli.py @@ -53,7 +53,27 @@ def cmd_docs(_: argparse.Namespace) -> int: return 0 -def _extract_workbook_template(dest: Path, force: bool) -> None: +def _normalize_track(track: str | None) -> str: + t = (track or "c").strip().lower() + if t in {"c", "track_c"}: + return "c" + if t in {"d", "track_d"}: + return "d" + raise SystemExit( + "Unknown track. Use one of: c, track_c, d, track_d.\n" + "Example: pystatsv1 workbook init --track d" + ) + + +def _workbook_asset_for_track(track: str) -> str: + t = _normalize_track(track) + return { + "c": "workbook_starter.zip", + "d": "workbook_track_d.zip", + }[t] + + +def _extract_workbook_template(dest: Path, force: bool, track: str = "c") -> None: dest = dest.expanduser().resolve() if dest.exists(): @@ -65,16 +85,36 @@ def _extract_workbook_template(dest: Path, force: bool) -> None: else: dest.mkdir(parents=True, exist_ok=True) - asset = resources.files(f"{PKG}.assets") / "workbook_starter.zip" + asset_name = _workbook_asset_for_track(track) + asset = resources.files(f"{PKG}.assets") / asset_name with resources.as_file(asset) as asset_path: with zipfile.ZipFile(asset_path) as zf: zf.extractall(dest) def cmd_workbook_init(args: argparse.Namespace) -> int: - _extract_workbook_template(Path(args.dest), force=args.force) + track = _normalize_track(getattr(args, "track", "c")) + _extract_workbook_template(Path(args.dest), force=args.force, track=track) dest = Path(args.dest).expanduser().resolve() + + if track == "d": + next_steps = textwrap.dedent( + f"""\ + ✅ Track D workbook starter created at: + + {dest} + + Next steps: + 1) cd {dest} + 2) pystatsv1 workbook run d00_peek_data + + Tip: If you're new to Python, always work inside a virtual environment. + """ + ).rstrip() + print(next_steps) + return 0 + print( textwrap.dedent( f"""\ @@ -94,8 +134,40 @@ def cmd_workbook_init(args: argparse.Namespace) -> int: return 0 -def cmd_workbook_list(_: argparse.Namespace) -> int: - # Just list what is bundled in the starter zip (Track C for now). +def cmd_workbook_list(args: argparse.Namespace) -> int: + track = _normalize_track(getattr(args, "track", "c")) + + if track == "d": + chapters = [ + "D00 Peek the Track D datasets (LedgerLab + NSO)", + "Ch01 Accounting as a measurement system", + "Ch02 Double-entry and the general ledger as a database", + "Ch03 Financial statements as summaries", + "Ch04 Assets: inventory + fixed assets", + "Ch05 Liabilities, payroll, taxes, and equity", + "Ch06 Reconciliations and quality control", + "Ch07 Preparing accounting data for analysis", + "Ch08 Descriptive statistics for financial performance", + "Ch09 Reporting style contract", + "Ch10 Probability and risk", + "Ch11 Sampling, estimation, and audit controls", + "Ch12 Hypothesis testing for decisions", + "Ch13 Correlation, causation, and controlled comparisons", + "Ch14 Regression and driver analysis", + "Ch15 Forecasting foundations", + "Ch16 Seasonality and baselines", + "Ch17 Revenue forecasting: segmentation + drivers", + "Ch18 Expense forecasting: fixed/variable/step + payroll", + "Ch19 Cash flow forecasting: direct method (13-week)", + "Ch20 Integrated forecasting: three statements", + "Ch21 Scenario planning: sensitivity + stress", + "Ch22 Financial statement analysis toolkit", + "Ch23 Communicating results and governance", + ] + print("\n".join(chapters)) + return 0 + + # Track C (default): bundled in the starter zip. chapters = [ "Ch10 One-way ANOVA", "Ch11 Repeated measures / mixed designs (problem set)", @@ -323,6 +395,11 @@ def build_parser() -> argparse.ArgumentParser: wb_sub = p_wb.add_subparsers(dest="workbook_cmd", required=True) p_init = wb_sub.add_parser("init", help="Create a local workbook starter folder.") + p_init.add_argument( + "--track", + default="c", + help="Which workbook to create: c (intro/psych) or d (business case). Default: c.", + ) p_init.add_argument( "--dest", default="pystatsv1_workbook", @@ -335,7 +412,12 @@ def build_parser() -> argparse.ArgumentParser: ) p_init.set_defaults(func=cmd_workbook_init) - p_list = wb_sub.add_parser("list", help="List chapters included in the starter kit.") + p_list = wb_sub.add_parser("list", help="List chapters included in a starter kit.") + p_list.add_argument( + "--track", + default="c", + help="Which chapter list to show: c (intro/psych) or d (business case). Default: c.", + ) p_list.set_defaults(func=cmd_workbook_list) p_run = wb_sub.add_parser("run", help="Run a workbook script (no make required).") diff --git a/tests/test_workbook_track_d_zip_smoke.py b/tests/test_workbook_track_d_zip_smoke.py new file mode 100644 index 0000000..dea5317 --- /dev/null +++ b/tests/test_workbook_track_d_zip_smoke.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +import zipfile +from argparse import Namespace +from pathlib import Path + +import pystatsv1.cli as cli + + +def test_workbook_track_d_zip_exists_and_has_expected_files() -> None: + zip_path = Path("src") / "pystatsv1" / "assets" / "workbook_track_d.zip" + assert zip_path.exists(), f"Missing Track D workbook zip at: {zip_path}" + + with zipfile.ZipFile(zip_path, "r") as z: + names = set(z.namelist()) + + expected = { + "README.md", + "Makefile", + "scripts/d00_peek_data.py", + "scripts/_cli.py", + "scripts/_business_etl.py", + "scripts/sim_business_nso_v1.py", + "scripts/sim_business_ledgerlab.py", + "scripts/business_ch01_accounting_measurement.py", + "scripts/business_ch23_communicating_results_governance.py", + "data/synthetic/.gitkeep", + "data/synthetic/ledgerlab_ch01/.gitkeep", + "data/synthetic/nso_v1/.gitkeep", + "outputs/track_d/.gitkeep", + "tests/test_business_smoke.py", + } + + missing = expected - names + assert not missing, f"missing from Track D starter zip: {sorted(missing)}" + + +def test_extract_workbook_template_track_d_extracts_scripts(tmp_path) -> None: + dest = tmp_path / "wb_d" + cli._extract_workbook_template(dest=dest, force=False, track="d") + + assert (dest / "scripts" / "business_ch01_accounting_measurement.py").exists() + assert (dest / "scripts" / "d00_peek_data.py").exists() + assert (dest / "outputs" / "track_d").exists() + + +def test_workbook_list_track_d_mentions_ch01(capsys) -> None: + rc = cli.cmd_workbook_list(Namespace(track="d")) + assert rc == 0 + out = capsys.readouterr().out + assert "Ch01" in out + assert "D00" in out