Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/workbook/track_d_lab_ta_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ location. You now have a ready-to-run lab workspace.”

Students should see a message like:

- “ Track D workbook starter created at …”
- “[OK] Track D workbook starter created at …”
- “Datasets are pre-installed under ``data/synthetic/``, seed=123.”

4.2 Why seed=123 matters
Expand Down
2 changes: 1 addition & 1 deletion docs/source/workbook/windows11_setup.rst
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ The ``[workbook]`` part is important: it means "include the workbook extras".

``pystatsv1 doctor`` runs a quick health check. If you see::

Environment looks good.
[OK] Environment looks good.

then your install is working.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,8 @@ def analyze_ch13(*, datadir: Path, outdir: Path, seed: int) -> Outputs:
The controlled comparison (partial correlation) helps avoid a misleading story: “Revenue causes payroll taxes.”

## What we can and cannot claim
✅ We can say: Revenue and payroll taxes move together in this dataset, and payroll explains much of that relationship.
❌ We cannot say: Increasing revenue causes payroll taxes to rise (causal claim).
- OK: Revenue and payroll taxes move together in this dataset, and payroll explains much of that relationship.
- NOT OK: Increasing revenue causes payroll taxes to rise (a causal claim).

## Sensitivity check
Leave-one-out naive correlation range: {loo_min:.3f} to {loo_max:.3f}
Expand Down
6 changes: 3 additions & 3 deletions scripts/d00_peek_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def _preview_csv(path: Path, n: int = 5) -> str:
def _peek_dataset(name: str, folder: Path, preview_rows: int) -> tuple[str, list[str]]:
if not folder.exists():
msg = (
f"⚠️ Missing dataset folder: {folder}\n"
f"[WARN] Missing dataset folder: {folder}\n"
"If you just created this workbook, you may be on an older PyStatsV1 version.\n"
"Update, then re-run workbook init:\n\n"
" python -m pip install -U pystatsv1\n"
Expand All @@ -51,7 +51,7 @@ def _peek_dataset(name: str, folder: Path, preview_rows: int) -> tuple[str, list
csvs = sorted(folder.glob("*.csv"))
if not csvs:
msg = (
f"⚠️ No CSV files found in: {folder}\n"
f"[WARN] No CSV files found in: {folder}\n"
"This workbook expects canonical datasets to exist under data/synthetic/.\n"
)
return msg, [msg]
Expand Down Expand Up @@ -113,7 +113,7 @@ def main(argv: list[str] | None = None) -> int:
report = outdir / "d00_peek_data_summary.md"
report.write_text("\n".join(sections).rstrip() + "\n", encoding="utf-8")

print(f"\n Wrote summary: {report}")
print(f"\n[OK] Wrote summary: {report}")
print("Tip: If you edited data/synthetic, run: pystatsv1 workbook run d00_setup_data --force")
return 0

Expand Down
3 changes: 2 additions & 1 deletion scripts/d00_setup_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ def main(argv: list[str] | None = None) -> int:
],
)

print("\n✅ Datasets ready under:", root)
# ASCII-only status marker for Windows consoles that default to cp1252.
print("\n[OK] Datasets ready under:", root)
print(" -", ledger_dir)
print(" -", nso_dir)
return 0
Expand Down
6 changes: 4 additions & 2 deletions scripts/my_data_01_explore.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def print_quick_report(df: pd.DataFrame) -> None:
if ID_COL in df.columns:
dup = df[ID_COL].duplicated().sum()
if dup:
print(f"\n⚠️ Duplicate {ID_COL} values: {dup}")
print(f"\n[WARN] Duplicate {ID_COL} values: {dup}")

if GROUP_COL in df.columns:
k = df[GROUP_COL].nunique(dropna=False)
Expand All @@ -182,7 +182,9 @@ def print_quick_report(df: pd.DataFrame) -> None:

numeric = df.select_dtypes(include=["number"]).columns
if len(numeric) == 0:
print("\n⚠️ No numeric columns detected. If numbers are stored as text, fix your CSV or edit the script.")
print(
"\n[WARN] No numeric columns detected. If numbers are stored as text, fix your CSV or edit the script."
)
else:
print("\nnumeric columns:")
for c in numeric:
Expand Down
Binary file modified src/pystatsv1/assets/workbook_track_d.zip
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,8 @@ def analyze_ch13(*, datadir: Path, outdir: Path, seed: int) -> Outputs:
The controlled comparison (partial correlation) helps avoid a misleading story: “Revenue causes payroll taxes.”

## What we can and cannot claim
✅ We can say: Revenue and payroll taxes move together in this dataset, and payroll explains much of that relationship.
❌ We cannot say: Increasing revenue causes payroll taxes to rise (causal claim).
- OK: Revenue and payroll taxes move together in this dataset, and payroll explains much of that relationship.
- NOT OK: Increasing revenue causes payroll taxes to rise (a causal claim).

## Sensitivity check
Leave-one-out naive correlation range: {loo_min:.3f} to {loo_max:.3f}
Expand Down
7 changes: 4 additions & 3 deletions workbooks/track_d_template/scripts/d00_peek_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def _preview_csv(path: Path, n: int = 5) -> str:
def _peek_dataset(name: str, folder: Path, preview_rows: int) -> tuple[str, list[str]]:
if not folder.exists():
msg = (
f"⚠️ Missing dataset folder: {folder}\n"
f"[WARN] Missing dataset folder: {folder}\n"
"If you just created this workbook, you may be on an older PyStatsV1 version.\n"
"Update, then re-run workbook init:\n\n"
" python -m pip install -U pystatsv1\n"
Expand All @@ -54,7 +54,7 @@ def _peek_dataset(name: str, folder: Path, preview_rows: int) -> tuple[str, list
csvs = sorted(folder.glob("*.csv"))
if not csvs:
msg = (
f"⚠️ No CSV files found in: {folder}\n"
f"[WARN] No CSV files found in: {folder}\n"
"This workbook expects canonical datasets to exist under data/synthetic/.\n"
)
return msg, [msg]
Expand Down Expand Up @@ -116,7 +116,8 @@ def main(argv: list[str] | None = None) -> int:
report = outdir / "d00_peek_data_summary.md"
report.write_text("\n".join(sections).rstrip() + "\n", encoding="utf-8")

print(f"\n✅ Wrote summary: {report}")
# ASCII-only status marker for Windows consoles that default to cp1252.
print(f"\n[OK] Wrote summary: {report}")
print("Tip: If you edited data/synthetic, run: pystatsv1 workbook run d00_setup_data --force")
return 0

Expand Down
3 changes: 2 additions & 1 deletion workbooks/track_d_template/scripts/d00_setup_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ def main(argv: list[str] | None = None) -> int:
],
)

print("\n✅ Datasets ready under:", root)
# ASCII-only status marker for Windows consoles that default to cp1252.
print("\n[OK] Datasets ready under:", root)
print(" -", ledger_dir)
print(" -", nso_dir)
return 0
Expand Down