diff --git a/scripts/d00_peek_data.py b/scripts/d00_peek_data.py index 6b337b2..aca760a 100644 --- a/scripts/d00_peek_data.py +++ b/scripts/d00_peek_data.py @@ -114,6 +114,7 @@ def main(argv: list[str] | None = None) -> int: report.write_text("\n".join(sections).rstrip() + "\n", encoding="utf-8") print(f"\n✅ Wrote summary: {report}") + print("Tip: If you edited data/synthetic, run: pystatsv1 workbook run d00_setup_data --force") return 0 diff --git a/scripts/d00_setup_data.py b/scripts/d00_setup_data.py new file mode 100644 index 0000000..c7a9c25 --- /dev/null +++ b/scripts/d00_setup_data.py @@ -0,0 +1,102 @@ +# SPDX-License-Identifier: MIT +"""Track D workbook helper: (re)generate the synthetic datasets. + +Normally you do **not** need this because Track D canonical datasets (seed=123) +are shipped and extracted during: + + pystatsv1 workbook init --track d + +Use this script if you deleted/modified files under data/synthetic and want to +reset them, or if you want to confirm determinism. +""" + +from __future__ import annotations + +import argparse +import shutil +import subprocess +import sys +from pathlib import Path + + +def _rm_tree(path: Path) -> None: + if path.exists(): + shutil.rmtree(path) + + +def _run(script_path: Path, args: list[str]) -> None: + subprocess.run([sys.executable, str(script_path), *args], check=True) + + +def main(argv: list[str] | None = None) -> int: + p = argparse.ArgumentParser(description="(Re)generate Track D datasets (deterministic).") + p.add_argument( + "--seed", + type=int, + default=123, + help="Random seed (default: 123). Keep 123 to match the canonical datasets.", + ) + p.add_argument( + "--root", + default="data/synthetic", + help="Dataset root folder (default: data/synthetic).", + ) + p.add_argument( + "--force", + action="store_true", + help="Delete existing dataset folders before regenerating.", + ) + p.add_argument( + "--no-validate", + action="store_true", + help="Skip the NSO dataset validation step.", + ) + + args = p.parse_args(argv) + + root = Path(args.root) + ledger_dir = root / "ledgerlab_ch01" + nso_dir = root / "nso_v1" + + if args.force: + _rm_tree(ledger_dir) + _rm_tree(nso_dir) + + ledger_dir.mkdir(parents=True, exist_ok=True) + nso_dir.mkdir(parents=True, exist_ok=True) + + scripts_dir = Path(__file__).resolve().parent + _run( + scripts_dir / "sim_business_ledgerlab.py", + ["--outdir", str(ledger_dir), "--seed", str(args.seed)], + ) + _run( + scripts_dir / "sim_business_nso_v1.py", + ["--outdir", str(nso_dir), "--seed", str(args.seed)], + ) + + if not args.no_validate: + outdir = Path("outputs/track_d") / "d00_setup_data_validate" + outdir.mkdir(parents=True, exist_ok=True) + _run( + scripts_dir / "business_validate_dataset.py", + [ + "--datadir", + str(nso_dir), + "--outdir", + str(outdir), + "--dataset", + "nso_v1", + "--seed", + str(args.seed), + ], + ) + + print("\n✅ Datasets ready under:", root) + print(" -", ledger_dir) + print(" -", nso_dir) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/src/pystatsv1/assets/workbook_track_d.zip b/src/pystatsv1/assets/workbook_track_d.zip index 68f95d4..6f7ad7d 100644 Binary files a/src/pystatsv1/assets/workbook_track_d.zip and b/src/pystatsv1/assets/workbook_track_d.zip differ diff --git a/src/pystatsv1/cli.py b/src/pystatsv1/cli.py index b105485..bec4162 100644 --- a/src/pystatsv1/cli.py +++ b/src/pystatsv1/cli.py @@ -167,30 +167,31 @@ def cmd_workbook_list(args: argparse.Namespace) -> int: if track == "d": chapters = [ - "D00 Peek the Track D datasets (LedgerLab + NSO)", - "Ch01 Accounting as a measurement system", - "Ch02 Double-entry and the general ledger as a database", - "Ch03 Financial statements as summaries", - "Ch04 Assets: inventory + fixed assets", - "Ch05 Liabilities, payroll, taxes, and equity", - "Ch06 Reconciliations and quality control", - "Ch07 Preparing accounting data for analysis", - "Ch08 Descriptive statistics for financial performance", - "Ch09 Reporting style contract", - "Ch10 Probability and risk", - "Ch11 Sampling, estimation, and audit controls", - "Ch12 Hypothesis testing for decisions", - "Ch13 Correlation, causation, and controlled comparisons", - "Ch14 Regression and driver analysis", - "Ch15 Forecasting foundations", - "Ch16 Seasonality and baselines", - "Ch17 Revenue forecasting: segmentation + drivers", - "Ch18 Expense forecasting: fixed/variable/step + payroll", - "Ch19 Cash flow forecasting: direct method (13-week)", - "Ch20 Integrated forecasting: three statements", - "Ch21 Scenario planning: sensitivity + stress", - "Ch22 Financial statement analysis toolkit", - "Ch23 Communicating results and governance", + "D00 Setup/reset Track D datasets (run: d00_setup_data)", + "D00 Peek the Track D datasets (LedgerLab + NSO) (run: d00_peek_data)", + "D01 Ch01 Accounting as a measurement system (run: d01)", + "D02 Ch02 Double-entry and the general ledger as a database (run: d02)", + "D03 Ch03 Financial statements as summaries (run: d03)", + "D04 Ch04 Assets: inventory + fixed assets (run: d04)", + "D05 Ch05 Liabilities, payroll, taxes, and equity (run: d05)", + "D06 Ch06 Reconciliations and quality control (run: d06)", + "D07 Ch07 Preparing accounting data for analysis (run: d07)", + "D08 Ch08 Descriptive statistics for financial performance (run: d08)", + "D09 Ch09 Reporting style contract (run: d09)", + "D10 Ch10 Probability and risk (run: d10)", + "D11 Ch11 Sampling, estimation, and audit controls (run: d11)", + "D12 Ch12 Hypothesis testing for decisions (run: d12)", + "D13 Ch13 Correlation, causation, and controlled comparisons (run: d13)", + "D14 Ch14 Regression and driver analysis (run: d14)", + "D15 Ch15 Forecasting foundations (run: d15)", + "D16 Ch16 Seasonality and baselines (run: d16)", + "D17 Ch17 Revenue forecasting: segmentation + drivers (run: d17)", + "D18 Ch18 Expense forecasting: fixed/variable/step + payroll (run: d18)", + "D19 Ch19 Cash flow forecasting: direct method (13-week) (run: d19)", + "D20 Ch20 Integrated forecasting: three statements (run: d20)", + "D21 Ch21 Scenario planning: sensitivity + stress (run: d21)", + "D22 Ch22 Financial statement analysis toolkit (run: d22)", + "D23 Ch23 Communicating results and governance (run: d23)", ] print("\n".join(chapters)) return 0 diff --git a/tests/test_workbook_track_d_zip_smoke.py b/tests/test_workbook_track_d_zip_smoke.py index dea5317..4a176c0 100644 --- a/tests/test_workbook_track_d_zip_smoke.py +++ b/tests/test_workbook_track_d_zip_smoke.py @@ -1,52 +1,66 @@ +# SPDX-License-Identifier: MIT + from __future__ import annotations -import zipfile -from argparse import Namespace from pathlib import Path -import pystatsv1.cli as cli +import zipfile def test_workbook_track_d_zip_exists_and_has_expected_files() -> None: - zip_path = Path("src") / "pystatsv1" / "assets" / "workbook_track_d.zip" - assert zip_path.exists(), f"Missing Track D workbook zip at: {zip_path}" + """Ensure the Track D workbook template zip is present and looks sane.""" + zip_path = Path("src/pystatsv1/assets/workbook_track_d.zip") + assert zip_path.exists(), "workbook_track_d.zip missing" - with zipfile.ZipFile(zip_path, "r") as z: - names = set(z.namelist()) + with zipfile.ZipFile(zip_path) as zf: + names = set(zf.namelist()) expected = { "README.md", "Makefile", - "scripts/d00_peek_data.py", + "requirements.txt", "scripts/_cli.py", - "scripts/_business_etl.py", - "scripts/sim_business_nso_v1.py", + "scripts/d00_peek_data.py", + "scripts/d00_setup_data.py", "scripts/sim_business_ledgerlab.py", - "scripts/business_ch01_accounting_measurement.py", - "scripts/business_ch23_communicating_results_governance.py", - "data/synthetic/.gitkeep", + "scripts/sim_business_nso_v1.py", + "scripts/business_validate_dataset.py", + "tests/test_business_smoke.py", "data/synthetic/ledgerlab_ch01/.gitkeep", "data/synthetic/nso_v1/.gitkeep", - "outputs/track_d/.gitkeep", - "tests/test_business_smoke.py", } + # Convenience wrappers: d01.py .. d23.py + expected |= {f"scripts/d{i:02d}.py" for i in range(1, 24)} + missing = expected - names - assert not missing, f"missing from Track D starter zip: {sorted(missing)}" + assert not missing, f"Missing from workbook zip: {sorted(missing)}" + + +def test_extract_workbook_template_has_expected_layout(tmp_path: Path) -> None: + zip_path = Path("src/pystatsv1/assets/workbook_track_d.zip") + + with zipfile.ZipFile(zip_path) as zf: + zf.extractall(tmp_path) + assert (tmp_path / "README.md").exists() + assert (tmp_path / "scripts" / "d00_peek_data.py").exists() + assert (tmp_path / "scripts" / "d00_setup_data.py").exists() -def test_extract_workbook_template_track_d_extracts_scripts(tmp_path) -> None: - dest = tmp_path / "wb_d" - cli._extract_workbook_template(dest=dest, force=False, track="d") + # Spot-check a few wrapper scripts. + assert (tmp_path / "scripts" / "d01.py").exists() + assert (tmp_path / "scripts" / "d14.py").exists() + assert (tmp_path / "scripts" / "d23.py").exists() - assert (dest / "scripts" / "business_ch01_accounting_measurement.py").exists() - assert (dest / "scripts" / "d00_peek_data.py").exists() - assert (dest / "outputs" / "track_d").exists() +def test_workbook_list_track_d_mentions_track_d(capsys) -> None: + # Import and call the list command, then assert key UX strings are present. + from pystatsv1.cli import main -def test_workbook_list_track_d_mentions_ch01(capsys) -> None: - rc = cli.cmd_workbook_list(Namespace(track="d")) - assert rc == 0 + code = main(["workbook", "list", "--track", "d"]) + assert code == 0 out = capsys.readouterr().out - assert "Ch01" in out - assert "D00" in out + assert "run: d00_setup_data" in out + assert "run: d00_peek_data" in out + assert "run: d01" in out + assert "run: d23" in out