Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions scripts/d00_peek_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ def main(argv: list[str] | None = None) -> int:
report.write_text("\n".join(sections).rstrip() + "\n", encoding="utf-8")

print(f"\n✅ Wrote summary: {report}")
print("Tip: If you edited data/synthetic, run: pystatsv1 workbook run d00_setup_data --force")
return 0


Expand Down
102 changes: 102 additions & 0 deletions scripts/d00_setup_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# SPDX-License-Identifier: MIT
"""Track D workbook helper: (re)generate the synthetic datasets.

Normally you do **not** need this because Track D canonical datasets (seed=123)
are shipped and extracted during:

pystatsv1 workbook init --track d

Use this script if you deleted/modified files under data/synthetic and want to
reset them, or if you want to confirm determinism.
"""

from __future__ import annotations

import argparse
import shutil
import subprocess
import sys
from pathlib import Path


def _rm_tree(path: Path) -> None:
if path.exists():
shutil.rmtree(path)


def _run(script_path: Path, args: list[str]) -> None:
subprocess.run([sys.executable, str(script_path), *args], check=True)


def main(argv: list[str] | None = None) -> int:
p = argparse.ArgumentParser(description="(Re)generate Track D datasets (deterministic).")
p.add_argument(
"--seed",
type=int,
default=123,
help="Random seed (default: 123). Keep 123 to match the canonical datasets.",
)
p.add_argument(
"--root",
default="data/synthetic",
help="Dataset root folder (default: data/synthetic).",
)
p.add_argument(
"--force",
action="store_true",
help="Delete existing dataset folders before regenerating.",
)
p.add_argument(
"--no-validate",
action="store_true",
help="Skip the NSO dataset validation step.",
)

args = p.parse_args(argv)

root = Path(args.root)
ledger_dir = root / "ledgerlab_ch01"
nso_dir = root / "nso_v1"

if args.force:
_rm_tree(ledger_dir)
_rm_tree(nso_dir)

ledger_dir.mkdir(parents=True, exist_ok=True)
nso_dir.mkdir(parents=True, exist_ok=True)

scripts_dir = Path(__file__).resolve().parent
_run(
scripts_dir / "sim_business_ledgerlab.py",
["--outdir", str(ledger_dir), "--seed", str(args.seed)],
)
_run(
scripts_dir / "sim_business_nso_v1.py",
["--outdir", str(nso_dir), "--seed", str(args.seed)],
)

if not args.no_validate:
outdir = Path("outputs/track_d") / "d00_setup_data_validate"
outdir.mkdir(parents=True, exist_ok=True)
_run(
scripts_dir / "business_validate_dataset.py",
[
"--datadir",
str(nso_dir),
"--outdir",
str(outdir),
"--dataset",
"nso_v1",
"--seed",
str(args.seed),
],
)

print("\n✅ Datasets ready under:", root)
print(" -", ledger_dir)
print(" -", nso_dir)
return 0


if __name__ == "__main__":
raise SystemExit(main())
Binary file modified src/pystatsv1/assets/workbook_track_d.zip
Binary file not shown.
49 changes: 25 additions & 24 deletions src/pystatsv1/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,30 +167,31 @@ def cmd_workbook_list(args: argparse.Namespace) -> int:

if track == "d":
chapters = [
"D00 Peek the Track D datasets (LedgerLab + NSO)",
"Ch01 Accounting as a measurement system",
"Ch02 Double-entry and the general ledger as a database",
"Ch03 Financial statements as summaries",
"Ch04 Assets: inventory + fixed assets",
"Ch05 Liabilities, payroll, taxes, and equity",
"Ch06 Reconciliations and quality control",
"Ch07 Preparing accounting data for analysis",
"Ch08 Descriptive statistics for financial performance",
"Ch09 Reporting style contract",
"Ch10 Probability and risk",
"Ch11 Sampling, estimation, and audit controls",
"Ch12 Hypothesis testing for decisions",
"Ch13 Correlation, causation, and controlled comparisons",
"Ch14 Regression and driver analysis",
"Ch15 Forecasting foundations",
"Ch16 Seasonality and baselines",
"Ch17 Revenue forecasting: segmentation + drivers",
"Ch18 Expense forecasting: fixed/variable/step + payroll",
"Ch19 Cash flow forecasting: direct method (13-week)",
"Ch20 Integrated forecasting: three statements",
"Ch21 Scenario planning: sensitivity + stress",
"Ch22 Financial statement analysis toolkit",
"Ch23 Communicating results and governance",
"D00 Setup/reset Track D datasets (run: d00_setup_data)",
"D00 Peek the Track D datasets (LedgerLab + NSO) (run: d00_peek_data)",
"D01 Ch01 Accounting as a measurement system (run: d01)",
"D02 Ch02 Double-entry and the general ledger as a database (run: d02)",
"D03 Ch03 Financial statements as summaries (run: d03)",
"D04 Ch04 Assets: inventory + fixed assets (run: d04)",
"D05 Ch05 Liabilities, payroll, taxes, and equity (run: d05)",
"D06 Ch06 Reconciliations and quality control (run: d06)",
"D07 Ch07 Preparing accounting data for analysis (run: d07)",
"D08 Ch08 Descriptive statistics for financial performance (run: d08)",
"D09 Ch09 Reporting style contract (run: d09)",
"D10 Ch10 Probability and risk (run: d10)",
"D11 Ch11 Sampling, estimation, and audit controls (run: d11)",
"D12 Ch12 Hypothesis testing for decisions (run: d12)",
"D13 Ch13 Correlation, causation, and controlled comparisons (run: d13)",
"D14 Ch14 Regression and driver analysis (run: d14)",
"D15 Ch15 Forecasting foundations (run: d15)",
"D16 Ch16 Seasonality and baselines (run: d16)",
"D17 Ch17 Revenue forecasting: segmentation + drivers (run: d17)",
"D18 Ch18 Expense forecasting: fixed/variable/step + payroll (run: d18)",
"D19 Ch19 Cash flow forecasting: direct method (13-week) (run: d19)",
"D20 Ch20 Integrated forecasting: three statements (run: d20)",
"D21 Ch21 Scenario planning: sensitivity + stress (run: d21)",
"D22 Ch22 Financial statement analysis toolkit (run: d22)",
"D23 Ch23 Communicating results and governance (run: d23)",
]
print("\n".join(chapters))
return 0
Expand Down
68 changes: 41 additions & 27 deletions tests/test_workbook_track_d_zip_smoke.py
Original file line number Diff line number Diff line change
@@ -1,52 +1,66 @@
# SPDX-License-Identifier: MIT

from __future__ import annotations

import zipfile
from argparse import Namespace
from pathlib import Path

import pystatsv1.cli as cli
import zipfile


def test_workbook_track_d_zip_exists_and_has_expected_files() -> None:
zip_path = Path("src") / "pystatsv1" / "assets" / "workbook_track_d.zip"
assert zip_path.exists(), f"Missing Track D workbook zip at: {zip_path}"
"""Ensure the Track D workbook template zip is present and looks sane."""
zip_path = Path("src/pystatsv1/assets/workbook_track_d.zip")
assert zip_path.exists(), "workbook_track_d.zip missing"

with zipfile.ZipFile(zip_path, "r") as z:
names = set(z.namelist())
with zipfile.ZipFile(zip_path) as zf:
names = set(zf.namelist())

expected = {
"README.md",
"Makefile",
"scripts/d00_peek_data.py",
"requirements.txt",
"scripts/_cli.py",
"scripts/_business_etl.py",
"scripts/sim_business_nso_v1.py",
"scripts/d00_peek_data.py",
"scripts/d00_setup_data.py",
"scripts/sim_business_ledgerlab.py",
"scripts/business_ch01_accounting_measurement.py",
"scripts/business_ch23_communicating_results_governance.py",
"data/synthetic/.gitkeep",
"scripts/sim_business_nso_v1.py",
"scripts/business_validate_dataset.py",
"tests/test_business_smoke.py",
"data/synthetic/ledgerlab_ch01/.gitkeep",
"data/synthetic/nso_v1/.gitkeep",
"outputs/track_d/.gitkeep",
"tests/test_business_smoke.py",
}

# Convenience wrappers: d01.py .. d23.py
expected |= {f"scripts/d{i:02d}.py" for i in range(1, 24)}

missing = expected - names
assert not missing, f"missing from Track D starter zip: {sorted(missing)}"
assert not missing, f"Missing from workbook zip: {sorted(missing)}"


def test_extract_workbook_template_has_expected_layout(tmp_path: Path) -> None:
zip_path = Path("src/pystatsv1/assets/workbook_track_d.zip")

with zipfile.ZipFile(zip_path) as zf:
zf.extractall(tmp_path)

assert (tmp_path / "README.md").exists()
assert (tmp_path / "scripts" / "d00_peek_data.py").exists()
assert (tmp_path / "scripts" / "d00_setup_data.py").exists()

def test_extract_workbook_template_track_d_extracts_scripts(tmp_path) -> None:
dest = tmp_path / "wb_d"
cli._extract_workbook_template(dest=dest, force=False, track="d")
# Spot-check a few wrapper scripts.
assert (tmp_path / "scripts" / "d01.py").exists()
assert (tmp_path / "scripts" / "d14.py").exists()
assert (tmp_path / "scripts" / "d23.py").exists()

assert (dest / "scripts" / "business_ch01_accounting_measurement.py").exists()
assert (dest / "scripts" / "d00_peek_data.py").exists()
assert (dest / "outputs" / "track_d").exists()

def test_workbook_list_track_d_mentions_track_d(capsys) -> None:
# Import and call the list command, then assert key UX strings are present.
from pystatsv1.cli import main

def test_workbook_list_track_d_mentions_ch01(capsys) -> None:
rc = cli.cmd_workbook_list(Namespace(track="d"))
assert rc == 0
code = main(["workbook", "list", "--track", "d"])
assert code == 0
out = capsys.readouterr().out
assert "Ch01" in out
assert "D00" in out
assert "run: d00_setup_data" in out
assert "run: d00_peek_data" in out
assert "run: d01" in out
assert "run: d23" in out