pystatsv1 · nicholaskarlson · Jan 18, 2026 · Jan 18, 2026
diff --git a/scripts/d00_peek_data.py b/scripts/d00_peek_data.py
@@ -0,0 +1,121 @@
+"""Track D workbook helper: peek at the (canonical) datasets.
+
+This script is meant to be run inside a Track D workbook folder created by:
+
+  pystatsv1 workbook init --track d
+
+It looks for the two Track D synthetic datasets under:
+
+  data/synthetic/ledgerlab_ch01/
+  data/synthetic/nso_v1/
+
+For the Track D student experience, these datasets are intended to be stable and
+repeatable (seed=123).
+
+What it does:
+- lists the available CSV tables
+- prints shapes + column names
+- prints a small preview of each table
+- writes a summary report under outputs/track_d/
+"""
+
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+
+import pandas as pd
+
+
+def _preview_csv(path: Path, n: int = 5) -> str:
+    df = pd.read_csv(path)
+    head = df.head(n)
+    return (
+        f"{path.name}: rows={len(df)} cols={len(df.columns)}\n"
+        f"columns: {', '.join(map(str, df.columns))}\n"
+        f"preview:\n{head.to_string(index=False)}\n"
+    )
+
+
+def _peek_dataset(name: str, folder: Path, preview_rows: int) -> tuple[str, list[str]]:
+    if not folder.exists():
+        msg = (
+            f"⚠️  Missing dataset folder: {folder}\n"
+            "If you just created this workbook, you may be on an older PyStatsV1 version.\n"
+            "Update, then re-run workbook init:\n\n"
+            "  python -m pip install -U pystatsv1\n"
+            "  pystatsv1 workbook init --track d --dest pystatsv1_track_d --force\n"
+        )
+        return msg, [msg]
+
+    csvs = sorted(folder.glob("*.csv"))
+    if not csvs:
+        msg = (
+            f"⚠️  No CSV files found in: {folder}\n"
+            "This workbook expects canonical datasets to exist under data/synthetic/.\n"
+        )
+        return msg, [msg]
+
+    lines: list[str] = []
+    print(f"\n== {name} ==")
+    lines.append(f"## {name}\n")
+    lines.append(f"Folder: {folder}\n")
+
+    for csv in csvs:
+        block = _preview_csv(csv, n=preview_rows)
+        print(block)
+        lines.append(f"### {csv.name}\n")
+        lines.append("```\n")
+        lines.append(block.rstrip())
+        lines.append("\n```\n")
+
+    return "OK", lines
+
+
+def main(argv: list[str] | None = None) -> int:
+    p = argparse.ArgumentParser(description="Peek at Track D datasets (seed=123).")
+    p.add_argument(
+        "--root",
+        default="data/synthetic",
+        help="Dataset root (default: data/synthetic).",
+    )
+    p.add_argument(
+        "--outdir",
+        default="outputs/track_d",
+        help="Where to write the summary report (default: outputs/track_d).",
+    )
+    p.add_argument(
+        "--preview-rows",
+        type=int,
+        default=5,
+        help="Number of rows to preview per table (default: 5).",
+    )
+
+    args = p.parse_args(argv)
+
+    root = Path(args.root)
+    outdir = Path(args.outdir)
+    outdir.mkdir(parents=True, exist_ok=True)
+
+    sections: list[str] = []
+    sections.append("# Track D dataset peek (seed=123)\n")
+
+    _status, lines = _peek_dataset(
+        "LedgerLab (Ch01)", root / "ledgerlab_ch01", preview_rows=args.preview_rows
+    )
+    sections.extend(lines)
+
+    _status, lines = _peek_dataset(
+        "NSO v1 running case", root / "nso_v1", preview_rows=args.preview_rows
+    )
+    sections.extend(lines)
+
+    report = outdir / "d00_peek_data_summary.md"
+    report.write_text("\n".join(sections).rstrip() + "\n", encoding="utf-8")
+
+    print(f"\n✅ Wrote summary: {report}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/src/pystatsv1/assets/workbook_track_d.zip b/src/pystatsv1/assets/workbook_track_d.zip
diff --git a/src/pystatsv1/cli.py b/src/pystatsv1/cli.py
@@ -53,7 +53,27 @@ def cmd_docs(_: argparse.Namespace) -> int:
     return 0
 
 
-def _extract_workbook_template(dest: Path, force: bool) -> None:
+def _normalize_track(track: str | None) -> str:
+    t = (track or "c").strip().lower()
+    if t in {"c", "track_c"}:
+        return "c"
+    if t in {"d", "track_d"}:
+        return "d"
+    raise SystemExit(
+        "Unknown track. Use one of: c, track_c, d, track_d.\n"
+        "Example: pystatsv1 workbook init --track d"
+    )
+
+
+def _workbook_asset_for_track(track: str) -> str:
+    t = _normalize_track(track)
+    return {
+        "c": "workbook_starter.zip",
+        "d": "workbook_track_d.zip",
+    }[t]
+
+
+def _extract_workbook_template(dest: Path, force: bool, track: str = "c") -> None:
     dest = dest.expanduser().resolve()
 
     if dest.exists():
@@ -65,16 +85,36 @@ def _extract_workbook_template(dest: Path, force: bool) -> None:
     else:
         dest.mkdir(parents=True, exist_ok=True)
 
-    asset = resources.files(f"{PKG}.assets") / "workbook_starter.zip"
+    asset_name = _workbook_asset_for_track(track)
+    asset = resources.files(f"{PKG}.assets") / asset_name
     with resources.as_file(asset) as asset_path:
         with zipfile.ZipFile(asset_path) as zf:
             zf.extractall(dest)
 
 
 def cmd_workbook_init(args: argparse.Namespace) -> int:
-    _extract_workbook_template(Path(args.dest), force=args.force)
+    track = _normalize_track(getattr(args, "track", "c"))
+    _extract_workbook_template(Path(args.dest), force=args.force, track=track)
 
     dest = Path(args.dest).expanduser().resolve()
+
+    if track == "d":
+        next_steps = textwrap.dedent(
+            f"""\
+            ✅ Track D workbook starter created at:
+
+                {dest}
+
+            Next steps:
+              1) cd {dest}
+              2) pystatsv1 workbook run d00_peek_data
+
+            Tip: If you're new to Python, always work inside a virtual environment.
+            """
+        ).rstrip()
+        print(next_steps)
+        return 0
+
     print(
         textwrap.dedent(
             f"""\
@@ -94,8 +134,40 @@ def cmd_workbook_init(args: argparse.Namespace) -> int:
     return 0
 
 
-def cmd_workbook_list(_: argparse.Namespace) -> int:
-    # Just list what is bundled in the starter zip (Track C for now).
+def cmd_workbook_list(args: argparse.Namespace) -> int:
+    track = _normalize_track(getattr(args, "track", "c"))
+
+    if track == "d":
+        chapters = [
+            "D00  Peek the Track D datasets (LedgerLab + NSO)",
+            "Ch01  Accounting as a measurement system",
+            "Ch02  Double-entry and the general ledger as a database",
+            "Ch03  Financial statements as summaries",
+            "Ch04  Assets: inventory + fixed assets",
+            "Ch05  Liabilities, payroll, taxes, and equity",
+            "Ch06  Reconciliations and quality control",
+            "Ch07  Preparing accounting data for analysis",
+            "Ch08  Descriptive statistics for financial performance",
+            "Ch09  Reporting style contract",
+            "Ch10  Probability and risk",
+            "Ch11  Sampling, estimation, and audit controls",
+            "Ch12  Hypothesis testing for decisions",
+            "Ch13  Correlation, causation, and controlled comparisons",
+            "Ch14  Regression and driver analysis",
+            "Ch15  Forecasting foundations",
+            "Ch16  Seasonality and baselines",
+            "Ch17  Revenue forecasting: segmentation + drivers",
+            "Ch18  Expense forecasting: fixed/variable/step + payroll",
+            "Ch19  Cash flow forecasting: direct method (13-week)",
+            "Ch20  Integrated forecasting: three statements",
+            "Ch21  Scenario planning: sensitivity + stress",
+            "Ch22  Financial statement analysis toolkit",
+            "Ch23  Communicating results and governance",
+        ]
+        print("\n".join(chapters))
+        return 0
+
+    # Track C (default): bundled in the starter zip.
     chapters = [
         "Ch10  One-way ANOVA",
         "Ch11  Repeated measures / mixed designs (problem set)",
@@ -323,6 +395,11 @@ def build_parser() -> argparse.ArgumentParser:
     wb_sub = p_wb.add_subparsers(dest="workbook_cmd", required=True)
 
     p_init = wb_sub.add_parser("init", help="Create a local workbook starter folder.")
+    p_init.add_argument(
+        "--track",
+        default="c",
+        help="Which workbook to create: c (intro/psych) or d (business case). Default: c.",
+    )
     p_init.add_argument(
         "--dest",
         default="pystatsv1_workbook",
@@ -335,7 +412,12 @@ def build_parser() -> argparse.ArgumentParser:
     )
     p_init.set_defaults(func=cmd_workbook_init)
 
-    p_list = wb_sub.add_parser("list", help="List chapters included in the starter kit.")
+    p_list = wb_sub.add_parser("list", help="List chapters included in a starter kit.")
+    p_list.add_argument(
+        "--track",
+        default="c",
+        help="Which chapter list to show: c (intro/psych) or d (business case). Default: c.",
+    )
     p_list.set_defaults(func=cmd_workbook_list)
 
     p_run = wb_sub.add_parser("run", help="Run a workbook script (no make required).")

diff --git a/tests/test_workbook_track_d_zip_smoke.py b/tests/test_workbook_track_d_zip_smoke.py
@@ -0,0 +1,52 @@
+from __future__ import annotations
+
+import zipfile
+from argparse import Namespace
+from pathlib import Path
+
+import pystatsv1.cli as cli
+
+
+def test_workbook_track_d_zip_exists_and_has_expected_files() -> None:
+    zip_path = Path("src") / "pystatsv1" / "assets" / "workbook_track_d.zip"
+    assert zip_path.exists(), f"Missing Track D workbook zip at: {zip_path}"
+
+    with zipfile.ZipFile(zip_path, "r") as z:
+        names = set(z.namelist())
+
+    expected = {
+        "README.md",
+        "Makefile",
+        "scripts/d00_peek_data.py",
+        "scripts/_cli.py",
+        "scripts/_business_etl.py",
+        "scripts/sim_business_nso_v1.py",
+        "scripts/sim_business_ledgerlab.py",
+        "scripts/business_ch01_accounting_measurement.py",
+        "scripts/business_ch23_communicating_results_governance.py",
+        "data/synthetic/.gitkeep",
+        "data/synthetic/ledgerlab_ch01/.gitkeep",
+        "data/synthetic/nso_v1/.gitkeep",
+        "outputs/track_d/.gitkeep",
+        "tests/test_business_smoke.py",
+    }
+
+    missing = expected - names
+    assert not missing, f"missing from Track D starter zip: {sorted(missing)}"
+
+
+def test_extract_workbook_template_track_d_extracts_scripts(tmp_path) -> None:
+    dest = tmp_path / "wb_d"
+    cli._extract_workbook_template(dest=dest, force=False, track="d")
+
+    assert (dest / "scripts" / "business_ch01_accounting_measurement.py").exists()
+    assert (dest / "scripts" / "d00_peek_data.py").exists()
+    assert (dest / "outputs" / "track_d").exists()
+
+
+def test_workbook_list_track_d_mentions_ch01(capsys) -> None:
+    rc = cli.cmd_workbook_list(Namespace(track="d"))
+    assert rc == 0
+    out = capsys.readouterr().out
+    assert "Ch01" in out
+    assert "D00" in out