pystatsv1 · nicholaskarlson · Jan 18, 2026 · Jan 18, 2026
diff --git a/src/pystatsv1/assets/ledgerlab_ch01_seed123.zip b/src/pystatsv1/assets/ledgerlab_ch01_seed123.zip
diff --git a/src/pystatsv1/assets/nso_v1_seed123.zip b/src/pystatsv1/assets/nso_v1_seed123.zip
diff --git a/src/pystatsv1/cli.py b/src/pystatsv1/cli.py
@@ -92,13 +92,39 @@ def _extract_workbook_template(dest: Path, force: bool, track: str = "c") -> Non
             zf.extractall(dest)
 
 
+def _extract_asset_zip(asset_name: str, dest: Path) -> None:
+    asset = resources.files(f"{PKG}.assets") / asset_name
+    with resources.as_file(asset) as asset_path:
+        with zipfile.ZipFile(asset_path) as zf:
+            zf.extractall(dest)
+
+
+def _extract_track_d_datasets(dest: Path) -> None:
+    # Extract canonical Track D datasets (seed=123) into the workbook folder.
+    ds_root = dest / "data" / "synthetic"
+    ds_root.mkdir(parents=True, exist_ok=True)
+
+    for asset_name in (
+        "ledgerlab_ch01_seed123.zip",
+        "nso_v1_seed123.zip",
+    ):
+        try:
+            _extract_asset_zip(asset_name, ds_root)
+        except Exception as e:
+            raise SystemExit(
+                "Failed to extract Track D canonical datasets. "
+                "Try upgrading PyStatsV1 (pip install -U pystatsv1) and re-run workbook init. "
+                f"Missing or unreadable asset: {asset_name}"
+            ) from e
+
+
 def cmd_workbook_init(args: argparse.Namespace) -> int:
     track = _normalize_track(getattr(args, "track", "c"))
     _extract_workbook_template(Path(args.dest), force=args.force, track=track)
 
     dest = Path(args.dest).expanduser().resolve()
-
     if track == "d":
+        _extract_track_d_datasets(dest)
         next_steps = textwrap.dedent(
             f"""\
             ✅ Track D workbook starter created at:
@@ -109,6 +135,8 @@ def cmd_workbook_init(args: argparse.Namespace) -> int:
               1) cd {dest}
               2) pystatsv1 workbook run d00_peek_data
 
+            (Datasets are pre-installed under data/synthetic/, seed=123.)
+
             Tip: If you're new to Python, always work inside a virtual environment.
             """
         ).rstrip()

diff --git a/tests/test_workbook_track_d_datasets_seed123.py b/tests/test_workbook_track_d_datasets_seed123.py
@@ -0,0 +1,35 @@
+from __future__ import annotations
+
+from argparse import Namespace
+from pathlib import Path
+
+import pystatsv1.cli as cli
+
+
+def test_track_d_dataset_assets_exist() -> None:
+    assets = Path("src") / "pystatsv1" / "assets"
+    for name in (
+        "ledgerlab_ch01_seed123.zip",
+        "nso_v1_seed123.zip",
+    ):
+        assert (assets / name).exists(), f"Missing dataset asset: {assets / name}"
+
+
+def test_workbook_init_track_d_extracts_seed123_datasets(tmp_path) -> None:
+    dest = tmp_path / "wb_d"
+    rc = cli.cmd_workbook_init(Namespace(track="d", dest=str(dest), force=False))
+    assert rc == 0
+
+    ledgerlab = dest / "data" / "synthetic" / "ledgerlab_ch01"
+    nso = dest / "data" / "synthetic" / "nso_v1"
+
+    assert (ledgerlab / "chart_of_accounts.csv").exists()
+    assert (ledgerlab / "gl_journal.csv").exists()
+
+    assert (nso / "chart_of_accounts.csv").exists()
+    assert (nso / "gl_journal.csv").exists()
+    assert (nso / "nso_v1_meta.json").exists()
+
+    # quick sanity: non-empty core tables
+    assert (ledgerlab / "gl_journal.csv").stat().st_size > 0
+    assert (nso / "gl_journal.csv").stat().st_size > 0