Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 121 additions & 0 deletions scripts/d00_peek_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
"""Track D workbook helper: peek at the (canonical) datasets.

This script is meant to be run inside a Track D workbook folder created by:

pystatsv1 workbook init --track d

It looks for the two Track D synthetic datasets under:

data/synthetic/ledgerlab_ch01/
data/synthetic/nso_v1/

For the Track D student experience, these datasets are intended to be stable and
repeatable (seed=123).

What it does:
- lists the available CSV tables
- prints shapes + column names
- prints a small preview of each table
- writes a summary report under outputs/track_d/
"""

from __future__ import annotations

import argparse
from pathlib import Path

import pandas as pd


def _preview_csv(path: Path, n: int = 5) -> str:
df = pd.read_csv(path)
head = df.head(n)
return (
f"{path.name}: rows={len(df)} cols={len(df.columns)}\n"
f"columns: {', '.join(map(str, df.columns))}\n"
f"preview:\n{head.to_string(index=False)}\n"
)


def _peek_dataset(name: str, folder: Path, preview_rows: int) -> tuple[str, list[str]]:
if not folder.exists():
msg = (
f"⚠️ Missing dataset folder: {folder}\n"
"If you just created this workbook, you may be on an older PyStatsV1 version.\n"
"Update, then re-run workbook init:\n\n"
" python -m pip install -U pystatsv1\n"
" pystatsv1 workbook init --track d --dest pystatsv1_track_d --force\n"
)
return msg, [msg]

csvs = sorted(folder.glob("*.csv"))
if not csvs:
msg = (
f"⚠️ No CSV files found in: {folder}\n"
"This workbook expects canonical datasets to exist under data/synthetic/.\n"
)
return msg, [msg]

lines: list[str] = []
print(f"\n== {name} ==")
lines.append(f"## {name}\n")
lines.append(f"Folder: {folder}\n")

for csv in csvs:
block = _preview_csv(csv, n=preview_rows)
print(block)
lines.append(f"### {csv.name}\n")
lines.append("```\n")
lines.append(block.rstrip())
lines.append("\n```\n")

return "OK", lines


def main(argv: list[str] | None = None) -> int:
p = argparse.ArgumentParser(description="Peek at Track D datasets (seed=123).")
p.add_argument(
"--root",
default="data/synthetic",
help="Dataset root (default: data/synthetic).",
)
p.add_argument(
"--outdir",
default="outputs/track_d",
help="Where to write the summary report (default: outputs/track_d).",
)
p.add_argument(
"--preview-rows",
type=int,
default=5,
help="Number of rows to preview per table (default: 5).",
)

args = p.parse_args(argv)

root = Path(args.root)
outdir = Path(args.outdir)
outdir.mkdir(parents=True, exist_ok=True)

sections: list[str] = []
sections.append("# Track D dataset peek (seed=123)\n")

_status, lines = _peek_dataset(
"LedgerLab (Ch01)", root / "ledgerlab_ch01", preview_rows=args.preview_rows
)
sections.extend(lines)

_status, lines = _peek_dataset(
"NSO v1 running case", root / "nso_v1", preview_rows=args.preview_rows
)
sections.extend(lines)

report = outdir / "d00_peek_data_summary.md"
report.write_text("\n".join(sections).rstrip() + "\n", encoding="utf-8")

print(f"\n✅ Wrote summary: {report}")
return 0


if __name__ == "__main__":
raise SystemExit(main())
Binary file added src/pystatsv1/assets/workbook_track_d.zip
Binary file not shown.
94 changes: 88 additions & 6 deletions src/pystatsv1/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,27 @@ def cmd_docs(_: argparse.Namespace) -> int:
return 0


def _extract_workbook_template(dest: Path, force: bool) -> None:
def _normalize_track(track: str | None) -> str:
t = (track or "c").strip().lower()
if t in {"c", "track_c"}:
return "c"
if t in {"d", "track_d"}:
return "d"
raise SystemExit(
"Unknown track. Use one of: c, track_c, d, track_d.\n"
"Example: pystatsv1 workbook init --track d"
)


def _workbook_asset_for_track(track: str) -> str:
t = _normalize_track(track)
return {
"c": "workbook_starter.zip",
"d": "workbook_track_d.zip",
}[t]


def _extract_workbook_template(dest: Path, force: bool, track: str = "c") -> None:
dest = dest.expanduser().resolve()

if dest.exists():
Expand All @@ -65,16 +85,36 @@ def _extract_workbook_template(dest: Path, force: bool) -> None:
else:
dest.mkdir(parents=True, exist_ok=True)

asset = resources.files(f"{PKG}.assets") / "workbook_starter.zip"
asset_name = _workbook_asset_for_track(track)
asset = resources.files(f"{PKG}.assets") / asset_name
with resources.as_file(asset) as asset_path:
with zipfile.ZipFile(asset_path) as zf:
zf.extractall(dest)


def cmd_workbook_init(args: argparse.Namespace) -> int:
_extract_workbook_template(Path(args.dest), force=args.force)
track = _normalize_track(getattr(args, "track", "c"))
_extract_workbook_template(Path(args.dest), force=args.force, track=track)

dest = Path(args.dest).expanduser().resolve()

if track == "d":
next_steps = textwrap.dedent(
f"""\
✅ Track D workbook starter created at:

{dest}

Next steps:
1) cd {dest}
2) pystatsv1 workbook run d00_peek_data

Tip: If you're new to Python, always work inside a virtual environment.
"""
).rstrip()
print(next_steps)
return 0

print(
textwrap.dedent(
f"""\
Expand All @@ -94,8 +134,40 @@ def cmd_workbook_init(args: argparse.Namespace) -> int:
return 0


def cmd_workbook_list(_: argparse.Namespace) -> int:
# Just list what is bundled in the starter zip (Track C for now).
def cmd_workbook_list(args: argparse.Namespace) -> int:
track = _normalize_track(getattr(args, "track", "c"))

if track == "d":
chapters = [
"D00 Peek the Track D datasets (LedgerLab + NSO)",
"Ch01 Accounting as a measurement system",
"Ch02 Double-entry and the general ledger as a database",
"Ch03 Financial statements as summaries",
"Ch04 Assets: inventory + fixed assets",
"Ch05 Liabilities, payroll, taxes, and equity",
"Ch06 Reconciliations and quality control",
"Ch07 Preparing accounting data for analysis",
"Ch08 Descriptive statistics for financial performance",
"Ch09 Reporting style contract",
"Ch10 Probability and risk",
"Ch11 Sampling, estimation, and audit controls",
"Ch12 Hypothesis testing for decisions",
"Ch13 Correlation, causation, and controlled comparisons",
"Ch14 Regression and driver analysis",
"Ch15 Forecasting foundations",
"Ch16 Seasonality and baselines",
"Ch17 Revenue forecasting: segmentation + drivers",
"Ch18 Expense forecasting: fixed/variable/step + payroll",
"Ch19 Cash flow forecasting: direct method (13-week)",
"Ch20 Integrated forecasting: three statements",
"Ch21 Scenario planning: sensitivity + stress",
"Ch22 Financial statement analysis toolkit",
"Ch23 Communicating results and governance",
]
print("\n".join(chapters))
return 0

# Track C (default): bundled in the starter zip.
chapters = [
"Ch10 One-way ANOVA",
"Ch11 Repeated measures / mixed designs (problem set)",
Expand Down Expand Up @@ -323,6 +395,11 @@ def build_parser() -> argparse.ArgumentParser:
wb_sub = p_wb.add_subparsers(dest="workbook_cmd", required=True)

p_init = wb_sub.add_parser("init", help="Create a local workbook starter folder.")
p_init.add_argument(
"--track",
default="c",
help="Which workbook to create: c (intro/psych) or d (business case). Default: c.",
)
p_init.add_argument(
"--dest",
default="pystatsv1_workbook",
Expand All @@ -335,7 +412,12 @@ def build_parser() -> argparse.ArgumentParser:
)
p_init.set_defaults(func=cmd_workbook_init)

p_list = wb_sub.add_parser("list", help="List chapters included in the starter kit.")
p_list = wb_sub.add_parser("list", help="List chapters included in a starter kit.")
p_list.add_argument(
"--track",
default="c",
help="Which chapter list to show: c (intro/psych) or d (business case). Default: c.",
)
p_list.set_defaults(func=cmd_workbook_list)

p_run = wb_sub.add_parser("run", help="Run a workbook script (no make required).")
Expand Down
52 changes: 52 additions & 0 deletions tests/test_workbook_track_d_zip_smoke.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from __future__ import annotations

import zipfile
from argparse import Namespace
from pathlib import Path

import pystatsv1.cli as cli


def test_workbook_track_d_zip_exists_and_has_expected_files() -> None:
zip_path = Path("src") / "pystatsv1" / "assets" / "workbook_track_d.zip"
assert zip_path.exists(), f"Missing Track D workbook zip at: {zip_path}"

with zipfile.ZipFile(zip_path, "r") as z:
names = set(z.namelist())

expected = {
"README.md",
"Makefile",
"scripts/d00_peek_data.py",
"scripts/_cli.py",
"scripts/_business_etl.py",
"scripts/sim_business_nso_v1.py",
"scripts/sim_business_ledgerlab.py",
"scripts/business_ch01_accounting_measurement.py",
"scripts/business_ch23_communicating_results_governance.py",
"data/synthetic/.gitkeep",
"data/synthetic/ledgerlab_ch01/.gitkeep",
"data/synthetic/nso_v1/.gitkeep",
"outputs/track_d/.gitkeep",
"tests/test_business_smoke.py",
}

missing = expected - names
assert not missing, f"missing from Track D starter zip: {sorted(missing)}"


def test_extract_workbook_template_track_d_extracts_scripts(tmp_path) -> None:
dest = tmp_path / "wb_d"
cli._extract_workbook_template(dest=dest, force=False, track="d")

assert (dest / "scripts" / "business_ch01_accounting_measurement.py").exists()
assert (dest / "scripts" / "d00_peek_data.py").exists()
assert (dest / "outputs" / "track_d").exists()


def test_workbook_list_track_d_mentions_ch01(capsys) -> None:
rc = cli.cmd_workbook_list(Namespace(track="d"))
assert rc == 0
out = capsys.readouterr().out
assert "Ch01" in out
assert "D00" in out