diff --git a/src/pystatsv1/assets/ledgerlab_ch01_seed123.zip b/src/pystatsv1/assets/ledgerlab_ch01_seed123.zip new file mode 100644 index 0000000..2037e1c Binary files /dev/null and b/src/pystatsv1/assets/ledgerlab_ch01_seed123.zip differ diff --git a/src/pystatsv1/assets/nso_v1_seed123.zip b/src/pystatsv1/assets/nso_v1_seed123.zip new file mode 100644 index 0000000..277c36e Binary files /dev/null and b/src/pystatsv1/assets/nso_v1_seed123.zip differ diff --git a/src/pystatsv1/cli.py b/src/pystatsv1/cli.py index 71ad88c..b105485 100644 --- a/src/pystatsv1/cli.py +++ b/src/pystatsv1/cli.py @@ -92,13 +92,39 @@ def _extract_workbook_template(dest: Path, force: bool, track: str = "c") -> Non zf.extractall(dest) +def _extract_asset_zip(asset_name: str, dest: Path) -> None: + asset = resources.files(f"{PKG}.assets") / asset_name + with resources.as_file(asset) as asset_path: + with zipfile.ZipFile(asset_path) as zf: + zf.extractall(dest) + + +def _extract_track_d_datasets(dest: Path) -> None: + # Extract canonical Track D datasets (seed=123) into the workbook folder. + ds_root = dest / "data" / "synthetic" + ds_root.mkdir(parents=True, exist_ok=True) + + for asset_name in ( + "ledgerlab_ch01_seed123.zip", + "nso_v1_seed123.zip", + ): + try: + _extract_asset_zip(asset_name, ds_root) + except Exception as e: + raise SystemExit( + "Failed to extract Track D canonical datasets. " + "Try upgrading PyStatsV1 (pip install -U pystatsv1) and re-run workbook init. " + f"Missing or unreadable asset: {asset_name}" + ) from e + + def cmd_workbook_init(args: argparse.Namespace) -> int: track = _normalize_track(getattr(args, "track", "c")) _extract_workbook_template(Path(args.dest), force=args.force, track=track) dest = Path(args.dest).expanduser().resolve() - if track == "d": + _extract_track_d_datasets(dest) next_steps = textwrap.dedent( f"""\ ✅ Track D workbook starter created at: @@ -109,6 +135,8 @@ def cmd_workbook_init(args: argparse.Namespace) -> int: 1) cd {dest} 2) pystatsv1 workbook run d00_peek_data + (Datasets are pre-installed under data/synthetic/, seed=123.) + Tip: If you're new to Python, always work inside a virtual environment. """ ).rstrip() diff --git a/tests/test_workbook_track_d_datasets_seed123.py b/tests/test_workbook_track_d_datasets_seed123.py new file mode 100644 index 0000000..614452b --- /dev/null +++ b/tests/test_workbook_track_d_datasets_seed123.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +from argparse import Namespace +from pathlib import Path + +import pystatsv1.cli as cli + + +def test_track_d_dataset_assets_exist() -> None: + assets = Path("src") / "pystatsv1" / "assets" + for name in ( + "ledgerlab_ch01_seed123.zip", + "nso_v1_seed123.zip", + ): + assert (assets / name).exists(), f"Missing dataset asset: {assets / name}" + + +def test_workbook_init_track_d_extracts_seed123_datasets(tmp_path) -> None: + dest = tmp_path / "wb_d" + rc = cli.cmd_workbook_init(Namespace(track="d", dest=str(dest), force=False)) + assert rc == 0 + + ledgerlab = dest / "data" / "synthetic" / "ledgerlab_ch01" + nso = dest / "data" / "synthetic" / "nso_v1" + + assert (ledgerlab / "chart_of_accounts.csv").exists() + assert (ledgerlab / "gl_journal.csv").exists() + + assert (nso / "chart_of_accounts.csv").exists() + assert (nso / "gl_journal.csv").exists() + assert (nso / "nso_v1_meta.json").exists() + + # quick sanity: non-empty core tables + assert (ledgerlab / "gl_journal.csv").stat().st_size > 0 + assert (nso / "gl_journal.csv").stat().st_size > 0