pystatsv1 · nicholaskarlson · Jan 18, 2026 · Jan 18, 2026
diff --git a/scripts/d00_peek_data.py b/scripts/d00_peek_data.py
@@ -114,6 +114,7 @@ def main(argv: list[str] | None = None) -> int:
     report.write_text("\n".join(sections).rstrip() + "\n", encoding="utf-8")
 
     print(f"\n✅ Wrote summary: {report}")
+    print("Tip: If you edited data/synthetic, run: pystatsv1 workbook run d00_setup_data --force")
     return 0
 
 

diff --git a/scripts/d00_setup_data.py b/scripts/d00_setup_data.py
@@ -0,0 +1,102 @@
+# SPDX-License-Identifier: MIT
+"""Track D workbook helper: (re)generate the synthetic datasets.
+
+Normally you do **not** need this because Track D canonical datasets (seed=123)
+are shipped and extracted during:
+
+  pystatsv1 workbook init --track d
+
+Use this script if you deleted/modified files under data/synthetic and want to
+reset them, or if you want to confirm determinism.
+"""
+
+from __future__ import annotations
+
+import argparse
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+
+
+def _rm_tree(path: Path) -> None:
+    if path.exists():
+        shutil.rmtree(path)
+
+
+def _run(script_path: Path, args: list[str]) -> None:
+    subprocess.run([sys.executable, str(script_path), *args], check=True)
+
+
+def main(argv: list[str] | None = None) -> int:
+    p = argparse.ArgumentParser(description="(Re)generate Track D datasets (deterministic).")
+    p.add_argument(
+        "--seed",
+        type=int,
+        default=123,
+        help="Random seed (default: 123). Keep 123 to match the canonical datasets.",
+    )
+    p.add_argument(
+        "--root",
+        default="data/synthetic",
+        help="Dataset root folder (default: data/synthetic).",
+    )
+    p.add_argument(
+        "--force",
+        action="store_true",
+        help="Delete existing dataset folders before regenerating.",
+    )
+    p.add_argument(
+        "--no-validate",
+        action="store_true",
+        help="Skip the NSO dataset validation step.",
+    )
+
+    args = p.parse_args(argv)
+
+    root = Path(args.root)
+    ledger_dir = root / "ledgerlab_ch01"
+    nso_dir = root / "nso_v1"
+
+    if args.force:
+        _rm_tree(ledger_dir)
+        _rm_tree(nso_dir)
+
+    ledger_dir.mkdir(parents=True, exist_ok=True)
+    nso_dir.mkdir(parents=True, exist_ok=True)
+
+    scripts_dir = Path(__file__).resolve().parent
+    _run(
+        scripts_dir / "sim_business_ledgerlab.py",
+        ["--outdir", str(ledger_dir), "--seed", str(args.seed)],
+    )
+    _run(
+        scripts_dir / "sim_business_nso_v1.py",
+        ["--outdir", str(nso_dir), "--seed", str(args.seed)],
+    )
+
+    if not args.no_validate:
+        outdir = Path("outputs/track_d") / "d00_setup_data_validate"
+        outdir.mkdir(parents=True, exist_ok=True)
+        _run(
+            scripts_dir / "business_validate_dataset.py",
+            [
+                "--datadir",
+                str(nso_dir),
+                "--outdir",
+                str(outdir),
+                "--dataset",
+                "nso_v1",
+                "--seed",
+                str(args.seed),
+            ],
+        )
+
+    print("\n✅ Datasets ready under:", root)
+    print("   -", ledger_dir)
+    print("   -", nso_dir)
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/src/pystatsv1/assets/workbook_track_d.zip b/src/pystatsv1/assets/workbook_track_d.zip
diff --git a/src/pystatsv1/cli.py b/src/pystatsv1/cli.py
@@ -167,30 +167,31 @@ def cmd_workbook_list(args: argparse.Namespace) -> int:
 
     if track == "d":
         chapters = [
-            "D00  Peek the Track D datasets (LedgerLab + NSO)",
-            "Ch01  Accounting as a measurement system",
-            "Ch02  Double-entry and the general ledger as a database",
-            "Ch03  Financial statements as summaries",
-            "Ch04  Assets: inventory + fixed assets",
-            "Ch05  Liabilities, payroll, taxes, and equity",
-            "Ch06  Reconciliations and quality control",
-            "Ch07  Preparing accounting data for analysis",
-            "Ch08  Descriptive statistics for financial performance",
-            "Ch09  Reporting style contract",
-            "Ch10  Probability and risk",
-            "Ch11  Sampling, estimation, and audit controls",
-            "Ch12  Hypothesis testing for decisions",
-            "Ch13  Correlation, causation, and controlled comparisons",
-            "Ch14  Regression and driver analysis",
-            "Ch15  Forecasting foundations",
-            "Ch16  Seasonality and baselines",
-            "Ch17  Revenue forecasting: segmentation + drivers",
-            "Ch18  Expense forecasting: fixed/variable/step + payroll",
-            "Ch19  Cash flow forecasting: direct method (13-week)",
-            "Ch20  Integrated forecasting: three statements",
-            "Ch21  Scenario planning: sensitivity + stress",
-            "Ch22  Financial statement analysis toolkit",
-            "Ch23  Communicating results and governance",
+            "D00  Setup/reset Track D datasets (run: d00_setup_data)",
+            "D00  Peek the Track D datasets (LedgerLab + NSO) (run: d00_peek_data)",
+            "D01  Ch01 Accounting as a measurement system (run: d01)",
+            "D02  Ch02 Double-entry and the general ledger as a database (run: d02)",
+            "D03  Ch03 Financial statements as summaries (run: d03)",
+            "D04  Ch04 Assets: inventory + fixed assets (run: d04)",
+            "D05  Ch05 Liabilities, payroll, taxes, and equity (run: d05)",
+            "D06  Ch06 Reconciliations and quality control (run: d06)",
+            "D07  Ch07 Preparing accounting data for analysis (run: d07)",
+            "D08  Ch08 Descriptive statistics for financial performance (run: d08)",
+            "D09  Ch09 Reporting style contract (run: d09)",
+            "D10  Ch10 Probability and risk (run: d10)",
+            "D11  Ch11 Sampling, estimation, and audit controls (run: d11)",
+            "D12  Ch12 Hypothesis testing for decisions (run: d12)",
+            "D13  Ch13 Correlation, causation, and controlled comparisons (run: d13)",
+            "D14  Ch14 Regression and driver analysis (run: d14)",
+            "D15  Ch15 Forecasting foundations (run: d15)",
+            "D16  Ch16 Seasonality and baselines (run: d16)",
+            "D17  Ch17 Revenue forecasting: segmentation + drivers (run: d17)",
+            "D18  Ch18 Expense forecasting: fixed/variable/step + payroll (run: d18)",
+            "D19  Ch19 Cash flow forecasting: direct method (13-week) (run: d19)",
+            "D20  Ch20 Integrated forecasting: three statements (run: d20)",
+            "D21  Ch21 Scenario planning: sensitivity + stress (run: d21)",
+            "D22  Ch22 Financial statement analysis toolkit (run: d22)",
+            "D23  Ch23 Communicating results and governance (run: d23)",
         ]
         print("\n".join(chapters))
         return 0

diff --git a/tests/test_workbook_track_d_zip_smoke.py b/tests/test_workbook_track_d_zip_smoke.py
@@ -1,52 +1,66 @@
+# SPDX-License-Identifier: MIT
+
 from __future__ import annotations
 
-import zipfile
-from argparse import Namespace
 from pathlib import Path
 
-import pystatsv1.cli as cli
+import zipfile
 
 
 def test_workbook_track_d_zip_exists_and_has_expected_files() -> None:
-    zip_path = Path("src") / "pystatsv1" / "assets" / "workbook_track_d.zip"
-    assert zip_path.exists(), f"Missing Track D workbook zip at: {zip_path}"
+    """Ensure the Track D workbook template zip is present and looks sane."""
+    zip_path = Path("src/pystatsv1/assets/workbook_track_d.zip")
+    assert zip_path.exists(), "workbook_track_d.zip missing"
 
-    with zipfile.ZipFile(zip_path, "r") as z:
-        names = set(z.namelist())
+    with zipfile.ZipFile(zip_path) as zf:
+        names = set(zf.namelist())
 
     expected = {
         "README.md",
         "Makefile",
-        "scripts/d00_peek_data.py",
+        "requirements.txt",
         "scripts/_cli.py",
-        "scripts/_business_etl.py",
-        "scripts/sim_business_nso_v1.py",
+        "scripts/d00_peek_data.py",
+        "scripts/d00_setup_data.py",
         "scripts/sim_business_ledgerlab.py",
-        "scripts/business_ch01_accounting_measurement.py",
-        "scripts/business_ch23_communicating_results_governance.py",
-        "data/synthetic/.gitkeep",
+        "scripts/sim_business_nso_v1.py",
+        "scripts/business_validate_dataset.py",
+        "tests/test_business_smoke.py",
         "data/synthetic/ledgerlab_ch01/.gitkeep",
         "data/synthetic/nso_v1/.gitkeep",
-        "outputs/track_d/.gitkeep",
-        "tests/test_business_smoke.py",
     }
 
+    # Convenience wrappers: d01.py .. d23.py
+    expected |= {f"scripts/d{i:02d}.py" for i in range(1, 24)}
+
     missing = expected - names
-    assert not missing, f"missing from Track D starter zip: {sorted(missing)}"
+    assert not missing, f"Missing from workbook zip: {sorted(missing)}"
+
+
+def test_extract_workbook_template_has_expected_layout(tmp_path: Path) -> None:
+    zip_path = Path("src/pystatsv1/assets/workbook_track_d.zip")
+
+    with zipfile.ZipFile(zip_path) as zf:
+        zf.extractall(tmp_path)
 
+    assert (tmp_path / "README.md").exists()
+    assert (tmp_path / "scripts" / "d00_peek_data.py").exists()
+    assert (tmp_path / "scripts" / "d00_setup_data.py").exists()
 
-def test_extract_workbook_template_track_d_extracts_scripts(tmp_path) -> None:
-    dest = tmp_path / "wb_d"
-    cli._extract_workbook_template(dest=dest, force=False, track="d")
+    # Spot-check a few wrapper scripts.
+    assert (tmp_path / "scripts" / "d01.py").exists()
+    assert (tmp_path / "scripts" / "d14.py").exists()
+    assert (tmp_path / "scripts" / "d23.py").exists()
 
-    assert (dest / "scripts" / "business_ch01_accounting_measurement.py").exists()
-    assert (dest / "scripts" / "d00_peek_data.py").exists()
-    assert (dest / "outputs" / "track_d").exists()
 
+def test_workbook_list_track_d_mentions_track_d(capsys) -> None:
+    # Import and call the list command, then assert key UX strings are present.
+    from pystatsv1.cli import main
 
-def test_workbook_list_track_d_mentions_ch01(capsys) -> None:
-    rc = cli.cmd_workbook_list(Namespace(track="d"))
-    assert rc == 0
+    code = main(["workbook", "list", "--track", "d"])
+    assert code == 0
     out = capsys.readouterr().out
-    assert "Ch01" in out
-    assert "D00" in out
+    assert "run: d00_setup_data" in out
+    assert "run: d00_peek_data" in out
+    assert "run: d01" in out
+    assert "run: d23" in out