From dd82dedf1e4b773c14a80404c1065d9b312208d4 Mon Sep 17 00:00:00 2001
From: Nicholas Karlson <nicholaskarlson@gmail.com>
Date: Mon, 10 Nov 2025 23:09:18 -0800
Subject: [PATCH] feat(cli): standardize --outdir/--seed; add smoke tests;
 Makefile & CI updates

---
 .github/workflows/ci.yml      |  40 +++++---
 Makefile                      |  54 +++++++---
 scripts/__init__.py           |   0
 scripts/_cli.py               |  31 ++++++
 scripts/ch13_fitness_mixed.py | 128 ++++++++++++++++--------
 scripts/ch13_stroop_within.py | 115 +++++++++++++++------
 scripts/sim_fitness_2x2.py    | 153 ++++++++++++++++++----------
 scripts/sim_stroop.py         | 183 ++++++++++++++++++++--------------
 tests/test_cli_smoke.py       |  15 +++
 9 files changed, 489 insertions(+), 230 deletions(-)
 create mode 100644 scripts/__init__.py
 create mode 100644 scripts/_cli.py
 create mode 100644 tests/test_cli_smoke.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 1836341..2189145 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,4 +1,5 @@
 name: ci
+
 on:
   workflow_dispatch:
   push:
@@ -7,24 +8,33 @@ on:
     branches: [ main ]
 
 jobs:
-  test:
-    runs-on: windows-latest # matches your local environment
+  windows-py310:
+    runs-on: windows-latest
+    timeout-minutes: 20
     env:
       PYTHONIOENCODING: utf-8
+    defaults:
+      run:
+        shell: pwsh
+
     steps:
-      - uses: actions/checkout@v4
+      - name: Checkout
+        uses: actions/checkout@v4
 
-      - uses: actions/setup-python@v5
+      - name: Set up Python
+        uses: actions/setup-python@v5
         with:
-          python-version: "3.10"
-          cache: "pip"
+          python-version: '3.10'
+          cache: pip
+
+      - name: Install make (Chocolatey)
+        run: choco install make -y
 
-      - name: Install deps
-        shell: bash
+      - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          if [ -f requirements.txt ]; then pip install -r requirements.txt; else pip install numpy pandas statsmodels matplotlib scipy; fi
-          if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi
+          if (Test-Path requirements.txt) { pip install -r requirements.txt } else { pip install numpy pandas statsmodels matplotlib scipy }
+          if (Test-Path requirements-dev.txt) { pip install -r requirements-dev.txt } else { pip install ruff pytest }
 
       - name: Lint
         run: make lint
@@ -35,10 +45,12 @@ jobs:
       - name: Tiny Chapter 13 smoke
         run: make ch13-ci
 
-      - name: Upload plots (artifact)
+      - name: Upload artifacts (plots & data)
         if: always()
         uses: actions/upload-artifact@v4
         with:
-          name: ch13-plots
-          path: outputs/
-          if-no-files-found: ignore
\ No newline at end of file
+          name: ch13-artifacts
+          if-no-files-found: ignore
+          path: |
+            data/synthetic/**
+            outputs/**
diff --git a/Makefile b/Makefile
index 3f191b4..3db22a5 100644
--- a/Makefile
+++ b/Makefile
@@ -1,29 +1,53 @@
+# Default target
 .DEFAULT_GOAL := help
 
+# Config
+PYTHON := python
+SEED ?= 123
+OUT_SYN := data/synthetic
+OUT_CH13 := outputs/ch13
+
 .PHONY: help
 help:
 	@echo "Available targets:"
-	@echo "  ch13      - full Chapter 13 run (plots saved)"
-	@echo "  ch13-ci   - tiny smoke (fast) for CI"
+	@echo "  ch13        - full Chapter 13 run (sim + analysis + plots)"
+	@echo "  ch13-ci     - tiny smoke run for CI (fast)"
+	@echo "  lint        - run ruff checks"
+	@echo "  lint-fix    - auto-fix with ruff"
+	@echo "  test        - run pytest"
+	@echo "  clean       - remove generated outputs"
 
+# ---- Fast CI smoke (small n, deterministic) ----
 .PHONY: ch13-ci
 ch13-ci:
-	python scripts/sim_stroop.py --n-subjects 6 --n-trials 10 --seed 2025
-	python scripts/ch13_stroop_within.py --save-plots
-	python scripts/sim_fitness_2x2.py --n-per-group 40 --seed 2025
-	python scripts/ch13_fitness_mixed.py --save-plots
+	$(PYTHON) -m scripts.sim_stroop --n-subjects 6 --n-trials 10 --seed $(SEED) --outdir $(OUT_SYN)
+	$(PYTHON) -m scripts.ch13_stroop_within --data $(OUT_SYN)/psych_stroop_trials.csv --outdir $(OUT_CH13) --save-plots --seed $(SEED)
+	$(PYTHON) -m scripts.sim_fitness_2x2 --n-per-group 10 --seed $(SEED) --outdir $(OUT_SYN)
+	$(PYTHON) -m scripts.ch13_fitness_mixed --data $(OUT_SYN)/fitness_long.csv --outdir $(OUT_CH13) --save-plots --seed $(SEED)
 
+# ---- Full Chapter 13 demo (default sizes) ----
 .PHONY: ch13
 ch13:
-	python scripts/sim_stroop.py
-	python scripts/ch13_stroop_within.py --save-plots
-	python scripts/sim_fitness_2x2.py
-	python scripts/ch13_fitness_mixed.py --save-plots
+	$(PYTHON) -m scripts.sim_stroop --seed $(SEED) --outdir $(OUT_SYN)
+	$(PYTHON) -m scripts.ch13_stroop_within --data $(OUT_SYN)/psych_stroop_trials.csv --outdir $(OUT_CH13) --save-plots --seed $(SEED)
+	$(PYTHON) -m scripts.sim_fitness_2x2 --seed $(SEED) --outdir $(OUT_SYN)
+	$(PYTHON) -m scripts.ch13_fitness_mixed --data $(OUT_SYN)/fitness_long.csv --outdir $(OUT_CH13) --save-plots --seed $(SEED)
 
-.PHONY: lint format test
+# ---- Quality gates ----
+.PHONY: lint
 lint:
-	ruff check tests/
-format:
-	black .
+	ruff check .
+
+.PHONY: lint-fix
+lint-fix:
+	ruff check . --fix
+
+.PHONY: test
 test:
-	pytest -q || true
\ No newline at end of file
+	pytest -q
+
+# ---- Utilities ----
+.PHONY: clean
+clean:
+	@echo "Removing generated outputs in $(OUT_SYN) and $(OUT_CH13)"
+	-@rm -rf $(OUT_SYN) $(OUT_CH13)
diff --git a/scripts/__init__.py b/scripts/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/scripts/_cli.py b/scripts/_cli.py
new file mode 100644
index 0000000..524b1d4
--- /dev/null
+++ b/scripts/_cli.py
@@ -0,0 +1,31 @@
+from __future__ import annotations
+
+import argparse
+import pathlib
+import random
+
+import numpy as np
+
+
+def base_parser(description: str) -> argparse.ArgumentParser:
+    p = argparse.ArgumentParser(description=description)
+    p.add_argument(
+        "--outdir",
+        type=pathlib.Path,
+        default=pathlib.Path("outputs"),
+        help="Where to write outputs (plots, csv). Default: ./outputs",
+    )
+    p.add_argument("--seed", type=int, default=None, help="Random seed for reproducibility")
+    return p
+
+
+def apply_seed(seed: int | None) -> None:
+    if seed is None:
+        return
+    np.random.seed(seed)
+    random.seed(seed)
+    try:  # optional: seed torch if present
+        import torch  # pragma: no cover
+        torch.manual_seed(seed)
+    except Exception:
+        pass
diff --git a/scripts/ch13_fitness_mixed.py b/scripts/ch13_fitness_mixed.py
index 5127cf3..bcfd4ce 100644
--- a/scripts/ch13_fitness_mixed.py
+++ b/scripts/ch13_fitness_mixed.py
@@ -1,86 +1,130 @@
 # SPDX-License-Identifier: MIT
-import argparse, os, math
-import numpy as np, pandas as pd
-import statsmodels.formula.api as smf
-import matplotlib; matplotlib.use("Agg")
+"""
+Chapter 13 — Mixed fitness study analysis:
+- LMM with random intercepts
+- Within-group paired tests
+- Between-group Welch t + Hedges g
+
+Usage:
+  python -m scripts.ch13_fitness_mixed \
+      --data data/synthetic/fitness_long.csv \
+      --outdir outputs/ch13 --save-plots --seed 123
+"""
+
+import argparse
+from pathlib import Path
+
+import matplotlib
+matplotlib.use("Agg")  # safe for CI/headless
 import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import statsmodels.api as sm
+from scipy import stats
+
 
 # --- make Windows console UTF-8 friendly ---
 import sys
-if os.name == "nt":
+if sys.platform.startswith("win"):
     try:
         sys.stdout.reconfigure(encoding="utf-8")
     except Exception:
         pass
 
-def hedges_g(a, b):
+
+def hedges_g(a: pd.Series, b: pd.Series) -> float:
     na, nb = len(a), len(b)
     sa, sb = a.var(ddof=1), b.var(ddof=1)
-    s_p = np.sqrt(((na-1)*sa + (nb-1)*sb)/(na+nb-2))
-    d = (a.mean()-b.mean())/s_p
-    J = 1 - 3/(4*(na+nb)-9)
-    return d*J
-
-def main():
-    ap = argparse.ArgumentParser()
-    ap.add_argument("--data", default="data/synthetic/fitness_long.csv")
-    ap.add_argument("--save-plots", action="store_true")
+    s_p = np.sqrt(((na - 1) * sa + (nb - 1) * sb) / (na + nb - 2))
+    d = (a.mean() - b.mean()) / s_p
+    J = 1 - 3 / (4 * (na + nb) - 9)
+    return float(d * J)
+
+
+def main() -> None:
+    ap = argparse.ArgumentParser(description="Analyze mixed-design fitness data")
+    ap.add_argument(
+        "--data",
+        type=Path,
+        default=Path("data/synthetic/fitness_long.csv"),
+        help="Path to long-format CSV.",
+    )
+    ap.add_argument("--save-plots", action="store_true", help="Save summary plot")
+    ap.add_argument(
+        "--outdir",
+        type=Path,
+        default=Path("outputs"),
+        help="Where to write plots (if --save-plots).",
+    )
+    ap.add_argument("--seed", type=int, default=None, help="Optional RNG seed")
     args = ap.parse_args()
 
+    if args.seed is not None:
+        np.random.seed(args.seed)  # kept for CLI consistency; not required by current code
+
     df = pd.read_csv(args.data)
-    df["time"]  = pd.Categorical(df["time"],  categories=["pre","post"], ordered=True)
+    df["time"] = pd.Categorical(df["time"], categories=["pre", "post"], ordered=True)
     df["group"] = pd.Categorical(df["group"])
 
     # Mixed model with random intercept per subject
-    import statsmodels.api as sm
-    md = sm.MixedLM.from_formula("strength ~ time*group + age + sex + bmi",
-                                 groups="id", re_formula="1", data=df)
+    md = sm.MixedLM.from_formula(
+        "strength ~ time*group + age + sex + bmi",
+        groups="id",
+        re_formula="1",
+        data=df,
+    )
     m = md.fit(method="lbfgs")
 
     print("=== LMM: strength ~ time*group + age + sex + bmi + (1|id) ===")
     print(m.summary())
 
     # Within-group pre→post paired tests
-    from scipy import stats
     print("\n=== Within-group pre→post (paired) ===")
-    for g in df["group"].cat.categories:
-        wide = (df[df.group==g]
-                .pivot_table(index="id", columns="time", values="strength", observed=False))
+    for gname in df["group"].cat.categories:
+        wide = (
+            df[df.group == gname]
+            .pivot_table(index="id", columns="time", values="strength", observed=False)
+        )
         pre, post = wide["pre"], wide["post"]
         diff = (post - pre).dropna()
-        mean = diff.mean(); sd = diff.std(ddof=1); n = diff.shape[0]
-        t = mean / (sd/np.sqrt(n))
-        p = 2*stats.t.sf(abs(t), df=n-1)
+        mean = diff.mean()
+        sd = diff.std(ddof=1)
+        n = diff.shape[0]
+        t = mean / (sd / np.sqrt(n))
+        p = 2 * stats.t.sf(abs(t), df=n - 1)
         d_paired = mean / sd
-        print(f"{g:8s} n={n:2d}  Δ={mean:6.2f}  t={t:6.2f}  p={p:.3g}  d_paired={d_paired:.2f}")
+        print(f"{gname:8s} n={n:2d}  Δ={mean:6.2f}  t={t:6.2f}  p={p:.3g}  d_paired={d_paired:.2f}")
 
     # Between-group at post (Welch t) + Hedges g
-    post = df[df.time=="post"]
+    post = df[df.time == "post"]
     cats = post.group.cat.categories
-    gA = post[post.group==cats[0]]["strength"]
-    gB = post[post.group==cats[1]]["strength"]
+    gA = post[post.group == cats[0]]["strength"]
+    gB = post[post.group == cats[1]]["strength"]
     t2, p2 = stats.ttest_ind(gA, gB, equal_var=False)
     g = hedges_g(gA, gB)
     print("\n=== Between groups at post (Welch t) ===")
     print(f"t={t2:.2f}  p={p2:.3g}  Hedges g={g:.2f}")
 
     if args.save_plots:
-        os.makedirs("outputs", exist_ok=True)
-        fig, ax = plt.subplots(figsize=(6,4))
-        # spaghetti
-        for (grp, sid), sub in df.sort_values("time").groupby(["group","id"], observed=False):
-            ax.plot([0,1], sub["strength"].values, alpha=0.2)
+        args.outdir.mkdir(parents=True, exist_ok=True)
+        fig, ax = plt.subplots(figsize=(6, 4))
+        # spaghetti per subject
+        for (_, sid), sub in df.sort_values("time").groupby(["group", "id"], observed=False):
+            ax.plot([0, 1], sub["strength"].values, alpha=0.2)
         # thick means per group
-        mean_pre  = df[df.time=="pre" ].groupby("group", observed=False)["strength"].mean()
-        mean_post = df[df.time=="post"].groupby("group", observed=False)["strength"].mean()
+        mean_pre = df[df.time == "pre"].groupby("group", observed=False)["strength"].mean()
+        mean_post = df[df.time == "post"].groupby("group", observed=False)["strength"].mean()
         for gname in df["group"].cat.categories:
-            ax.plot([0,1], [mean_pre[gname], mean_post[gname]], linewidth=3, label=gname)
-        ax.set_xticks([0,1]); ax.set_xticklabels(["pre","post"])
+            ax.plot([0, 1], [mean_pre[gname], mean_post[gname]], linewidth=3, label=gname)
+        ax.set_xticks([0, 1])
+        ax.set_xticklabels(["pre", "post"])
         ax.set_ylabel("Strength")
         ax.legend()
         fig.tight_layout()
-        fp = "outputs/ch13_fitness_spaghetti.png"; fig.savefig(fp, dpi=150)
-        print("Saved plot ->", fp)
+        out_path = args.outdir / "ch13_fitness_spaghetti.png"
+        fig.savefig(out_path, dpi=150)
+        print(f"Saved plot -> {out_path}")
+
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/scripts/ch13_stroop_within.py b/scripts/ch13_stroop_within.py
index 0826a24..f155de4 100644
--- a/scripts/ch13_stroop_within.py
+++ b/scripts/ch13_stroop_within.py
@@ -1,59 +1,103 @@
 # SPDX-License-Identifier: MIT
 """
 Chapter 13 — Within-Subjects (Stroop): paired t-test vs. mixed model
+
 Usage:
-  python scripts/ch13_stroop_within.py --save-plots
+  python -m scripts.ch13_stroop_within --save-plots \
+      --data data/synthetic/psych_stroop_trials.csv \
+      --outdir outputs/ch13 --seed 123
+
 Options:
-  --data data/synthetic/psych_stroop_trials.csv
+  --data   Path to trial-level CSV (default: data/synthetic/psych_stroop_trials.csv)
+  --outdir Where to write plots (default: outputs)
+  --seed   RNG seed (not required; included for consistency across scripts)
   --min-rt 200 --max-rt 2000    # trial-level trims
 """
-import argparse, os, math, numpy as np, pandas as pd
-import statsmodels.api as sm
-import statsmodels.formula.api as smf
-from scipy import stats
+
+import argparse
+import pathlib
+from pathlib import Path
+
 import matplotlib
-matplotlib.use("Agg")
+matplotlib.use("Agg")  # Safe for CI/headless environments
 import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import statsmodels.api as sm  # noqa: F401 (imported for users who inspect models)
+import statsmodels.formula.api as smf
+from scipy import stats
+
 
-def load_trials(path):
+def load_trials(path: Path) -> pd.DataFrame:
     df = pd.read_csv(path)
     df["condition"] = df["condition"].astype("category")
     return df
 
-def cohen_dz(a, b):
-    # within-subject effect size
+
+def cohen_dz(a: pd.Series, b: pd.Series) -> float:
+    """Within-subject effect size (dz) for paired data."""
     diff = a - b
-    return diff.mean()/diff.std(ddof=1)
-
-def main():
-    ap = argparse.ArgumentParser()
-    ap.add_argument("--data", default="data/synthetic/psych_stroop_trials.csv")
-    ap.add_argument("--min-rt", type=float, default=200.0)
-    ap.add_argument("--max-rt", type=float, default=2000.0)
-    ap.add_argument("--save-plots", action="store_true")
+    return float(diff.mean() / diff.std(ddof=1))
+
+
+def main() -> None:
+    ap = argparse.ArgumentParser(description="Paired t-test vs. mixed model on Stroop data")
+    ap.add_argument(
+        "--data",
+        type=pathlib.Path,
+        default=Path("data/synthetic/psych_stroop_trials.csv"),
+        help="Path to trial-level CSV.",
+    )
+    ap.add_argument("--min-rt", type=float, default=200.0, help="Min RT (ms) to keep")
+    ap.add_argument("--max-rt", type=float, default=2000.0, help="Max RT (ms) to keep")
+    ap.add_argument("--save-plots", action="store_true", help="Save summary plots")
+    ap.add_argument(
+        "--outdir",
+        type=pathlib.Path,
+        default=Path("outputs"),
+        help="Where to write plots (if --save-plots).",
+    )
+    # Included for CLI consistency; not used by the current analyses but harmless.
+    ap.add_argument("--seed", type=int, default=None, help="RNG seed (optional)")
     args = ap.parse_args()
 
-    os.makedirs("outputs", exist_ok=True)
+    # Optional seeding keeps behavior consistent with other scripts
+    if args.seed is not None:
+        np.random.seed(args.seed)
+
+    # Ensure output directory exists when saving plots
+    if args.save_plots:
+        args.outdir.mkdir(parents=True, exist_ok=True)
+
     df = load_trials(args.data)
 
     # Keep correct trials and trim RTs
     before = len(df)
-    df = df[(df["correct"] == 1) & (df["rt_ms"].between(args.min_rt, args.max_rt))]
-    print(f"Filtered: kept {len(df)}/{before} trials (correct & {args.min_rt}-{args.max_rt} ms)")
+    df = df[(df["correct"] == 1) & (df["rt_ms"].between(args.min_rt, args.max_rt))].copy()
+    print(
+        f"Filtered: kept {len(df)}/{before} trials "
+        f"(correct & {args.min_rt}-{args.max_rt} ms)"
+    )
 
     # Log-transform RT (typical for skew)
     df["log_rt"] = np.log(df["rt_ms"])
 
     # Subject means per condition (paired t-test)
-    subj_means = df.groupby(["subject", "condition"], as_index=False, observed=False)["log_rt"].mean()
+    subj_means = (
+        df.groupby(["subject", "condition"], as_index=False, observed=False)["log_rt"]
+        .mean()
+    )
     wide = subj_means.pivot(index="subject", columns="condition", values="log_rt")
+
     # paired t-test (incongruent > congruent expected)
-    tstat, pval = stats.ttest_rel(wide["incongruent"], wide["congruent"], nan_policy="omit")
+    tstat, pval = stats.ttest_rel(
+        wide["incongruent"], wide["congruent"], nan_policy="omit"
+    )
     dz = cohen_dz(wide["incongruent"], wide["congruent"])
 
     print("\n=== Paired t-test on subject means (log RT) ===")
     print(f"t = {tstat:.3f}, p = {pval:.3g}, Cohen's dz = {dz:.3f}")
-    delta_ms = (np.exp(wide["incongruent"].mean()) - np.exp(wide["congruent"].mean()))
+    delta_ms = np.exp(wide["incongruent"].mean()) - np.exp(wide["congruent"].mean())
     print(f"Back-translated mean RT difference ≈ {delta_ms:.1f} ms (incongruent - congruent)")
 
     # Mixed model on trial-level log RT
@@ -72,24 +116,29 @@ def main():
     # Plots
     if args.save_plots:
         # per-subject spaghetti of means
-        plt.figure(figsize=(6,4))
+        plt.figure(figsize=(6, 4))
         for _, row in wide.iterrows():
-            plt.plot([0,1], [row["congruent"], row["incongruent"]], alpha=0.3)
-        plt.xticks([0,1], ["congruent","incongruent"])
+            plt.plot([0, 1], [row["congruent"], row["incongruent"]], alpha=0.3)
+        plt.xticks([0, 1], ["congruent", "incongruent"])
         plt.ylabel("Mean log RT")
         plt.title("Within-subject means (log RT)")
-        plt.tight_layout(); plt.savefig("outputs/ch13_stroop_subject_means.png", dpi=150)
+        plt.tight_layout()
+        p1 = args.outdir / "ch13_stroop_subject_means.png"
+        plt.savefig(p1, dpi=150)
 
         # violin/box of trial-level log RT by condition
-        plt.figure(figsize=(6,4))
-        data_c = [df.loc[df["condition"]==c,"log_rt"].values for c in ["congruent","incongruent"]]
+        plt.figure(figsize=(6, 4))
+        data_c = [df.loc[df["condition"] == c, "log_rt"].values for c in ["congruent", "incongruent"]]
         plt.violinplot(data_c, showmeans=True)
-        plt.xticks([1,2], ["congruent","incongruent"])
+        plt.xticks([1, 2], ["congruent", "incongruent"])
         plt.ylabel("Trial log RT")
         plt.title("Trial-level distribution by condition")
-        plt.tight_layout(); plt.savefig("outputs/ch13_stroop_violins.png", dpi=150)
+        plt.tight_layout()
+        p2 = args.outdir / "ch13_stroop_violins.png"
+        plt.savefig(p2, dpi=150)
+
+        print(f"Saved plots -> {p1}, {p2}")
 
-        print("Saved plots -> outputs/ch13_stroop_subject_means.png, ch13_stroop_violins.png")
 
 if __name__ == "__main__":
     main()
diff --git a/scripts/sim_fitness_2x2.py b/scripts/sim_fitness_2x2.py
index c21aa4f..18010ec 100644
--- a/scripts/sim_fitness_2x2.py
+++ b/scripts/sim_fitness_2x2.py
@@ -1,74 +1,123 @@
 # SPDX-License-Identifier: MIT
-import os, json, time, argparse
-import numpy as np, pandas as pd
+"""
+Simulate a 2x2 mixed design (Group between × Time within) fitness study.
+Creates in --outdir (default: data/synthetic):
+  fitness_subjects.csv
+  fitness_long.csv
+  fitness_meta.json
+"""
 
-# globals configured in main()
-rng = np.random.default_rng()
-N = 80
-GROUPS = ["ProgramA","ProgramB"]
+import argparse
+import json
+import time
+import pathlib
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+
+# Defaults
 DEFAULT_SEED = 7
+GROUPS = ["ProgramA", "ProgramB"]
+N_PER_GROUP_DEFAULT = 40  # total N = n_per_group * 2
 
-def simulate(n_per_group=None, seed=DEFAULT_SEED):
-    global rng, N
-    # Allow overriding globals via args, primarily for testing
-    if n_per_group is not None:
-        N = n_per_group * len(GROUPS) # Adjust N based on n_per_group
-    
+
+def simulate(
+    n_per_group: int = N_PER_GROUP_DEFAULT,
+    seed: int = DEFAULT_SEED,
+    outdir: Path = Path("data/synthetic"),
+):
+    """Simulates a simple strength-training experiment and writes CSV/JSON to outdir."""
     rng = np.random.default_rng(seed)
+    outdir.mkdir(parents=True, exist_ok=True)
+
+    n_total = n_per_group * len(GROUPS)
+
+    subjects = pd.DataFrame(
+        {
+            "id": np.arange(1, n_total + 1),
+            "group": rng.choice(GROUPS, size=n_total, replace=True),
+            "age": rng.integers(18, 45, size=n_total),  # 18..44
+            "sex": rng.choice(["F", "M"], size=n_total, p=[0.5, 0.5]),
+            "bmi": rng.normal(24, 3.5, size=n_total).clip(17, 38),
+        }
+    )
+
+    # Baseline strength with small covariate effects
+    base = (
+        80
+        + 0.8 * (subjects["age"] - 30)
+        + 1.2 * (subjects["bmi"] - 24)
+        + rng.normal(0, 8, size=n_total)
+    )
 
-    os.makedirs("data/synthetic", exist_ok=True)
-    subjects = pd.DataFrame({
-        "id": np.arange(1, N+1),
-        "group": rng.choice(GROUPS, size=N, replace=True),
-        "age": rng.integers(18, 45, size=N),
-        "sex": rng.choice(["F","M"], size=N, p=[0.5,0.5]),
-        "bmi": rng.normal(24, 3.5, size=N).clip(17, 38),
-    })
-
-    # baseline strength with small covariate effects
-    base = (80
-            + 0.8*(subjects["age"]-30)
-            + 1.2*(subjects["bmi"]-24)
-            + rng.normal(0, 8, size=N))
-
-    # program gains
-    gain_A = rng.normal(8, 4, size=N)
-    gain_B = rng.normal(12, 4, size=N)
-    gain = np.where(subjects["group"].to_numpy()=="ProgramA", gain_A, gain_B)
-
-    long = pd.concat([
-        pd.DataFrame({"id": subjects["id"], "time":"pre",
-                      "strength": base + rng.normal(0, 3, size=N)}),
-        pd.DataFrame({"id": subjects["id"], "time":"post",
-                      "strength": base + gain + rng.normal(0, 3, size=N)})
-    ], ignore_index=True).merge(subjects, on="id")
-
-    subjects.to_csv("data/synthetic/fitness_subjects.csv", index=False)
-    long.to_csv("data/synthetic/fitness_long.csv", index=False)
+    # Program gains (B > A on average)
+    gain_A = rng.normal(8, 4, size=n_total)
+    gain_B = rng.normal(12, 4, size=n_total)
+    gain = np.where(subjects["group"].to_numpy() == "ProgramA", gain_A, gain_B)
+
+    long = pd.concat(
+        [
+            pd.DataFrame(
+                {
+                    "id": subjects["id"],
+                    "time": "pre",
+                    "strength": base + rng.normal(0, 3, size=n_total),
+                }
+            ),
+            pd.DataFrame(
+                {
+                    "id": subjects["id"],
+                    "time": "post",
+                    "strength": base + gain + rng.normal(0, 3, size=n_total),
+                }
+            ),
+        ],
+        ignore_index=True,
+    ).merge(subjects, on="id")
+
+    # Write artifacts
+    subjects_path = outdir / "fitness_subjects.csv"
+    long_path = outdir / "fitness_long.csv"
+    subjects.to_csv(subjects_path, index=False)
+    long.to_csv(long_path, index=False)
 
     meta = dict(
         seed=seed,
-        n=int(N),
+        n=int(n_total),
+        n_per_group=int(n_per_group),
         design="2x2 mixed (Group between × Time within)",
         programs=GROUPS,
         dv="strength_1RM_like",
-        generated_utc=time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
+        generated_utc=time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
     )
-    with open("data/synthetic/fitness_meta.json","w") as f:
+    meta_path = outdir / "fitness_meta.json"
+    with meta_path.open("w") as f:
         json.dump(meta, f, indent=2)
 
-    print("Wrote fitness_subjects.csv, fitness_long.csv, fitness_meta.json")
-    # Return for testing convenience
+    print(f"Wrote {subjects_path}, {long_path}, {meta_path}")
     return subjects, long
 
+
 def main():
-    ap = argparse.ArgumentParser()
+    ap = argparse.ArgumentParser(description="Simulate mixed-design fitness study data")
     ap.add_argument("--seed", type=int, default=DEFAULT_SEED, help="RNG seed")
-    ap.add_argument("--n-per-group", type=int, default=40, help="Number of subjects per group")
+    ap.add_argument(
+        "--n-per-group",
+        type=int,
+        default=N_PER_GROUP_DEFAULT,
+        help="Number of subjects per program",
+    )
+    ap.add_argument(
+        "--outdir",
+        type=pathlib.Path,
+        default=Path("data/synthetic"),
+        help="Where to write outputs (CSV/JSON).",
+    )
     args = ap.parse_args()
-    
-    # Pass CLI args to simulate
-    simulate(n_per_group=args.n_per_group, seed=args.seed)
+
+    simulate(n_per_group=args.n_per_group, seed=args.seed, outdir=args.outdir)
+
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/scripts/sim_stroop.py b/scripts/sim_stroop.py
index 607b69e..35a22ce 100644
--- a/scripts/sim_stroop.py
+++ b/scripts/sim_stroop.py
@@ -1,34 +1,40 @@
-import argparse
 # SPDX-License-Identifier: MIT
 """
 Simulate trial-level Stroop data (plausible undergrad study).
-Creates:
-  data/synthetic/psych_stroop_trials.csv
-  data/synthetic/psych_stroop_subjects.csv
-  data/synthetic/psych_stroop_meta.json
+Creates in --outdir (default: data/synthetic):
+  psych_stroop_trials.csv
+  psych_stroop_subjects.csv
+  psych_stroop_meta.json
 """
-import os, time, json
+
+import argparse
+import json
+import time
+import pathlib
+from pathlib import Path
+
 import numpy as np
 import pandas as pd
 
-RNG_SEED = 42
-rng = np.random.default_rng(RNG_SEED) # Will be reset by simulate()
-
-N_SUBJ = 60
-N_TRIALS_PER_COND = 100
+# Defaults (kept explicit so meta stays in sync)
+RNG_SEED_DEFAULT = 42
+N_SUBJ_DEFAULT = 60
+N_TRIALS_PER_COND_DEFAULT = 100
 CONDITIONS = ["congruent", "incongruent"]
 
-# Parameters (kept explicit so meta stays in sync)
-BASE_RT_CONGRUENT = 550      # ms
-BASE_RT_INCONGRUENT = 650    # ms (=> 100 ms penalty)
-BETWEEN_PERSON_RT_SD = 80    # ms subject intercept variation
-TRIAL_NOISE_SD = 120         # ms trial-to-trial noise
-BASELINE_SPEED_EFFECT = 25   # ms per +1 SD baseline_speed (faster -> shave time)
+# Parameters
+BASE_RT_CONGRUENT = 550       # ms
+BASE_RT_INCONGRUENT = 650     # ms (=> 100 ms penalty)
+BETWEEN_PERSON_RT_SD = 80     # ms subject intercept variation
+TRIAL_NOISE_SD = 120          # ms trial-to-trial noise
+BASELINE_SPEED_EFFECT = 25    # ms per +1 SD baseline_speed (faster -> shave time)
+
 ACC_BASE_CONGRUENT = 0.97
-ACC_BASE_INCONGRUENT = 0.90  # (=> 0.07 penalty)
-ACC_BSL_SPEED_PENALTY = 0.03 # subtract per +1 SD if positive
+ACC_BASE_INCONGRUENT = 0.90   # (=> 0.07 penalty)
+ACC_BSL_SPEED_PENALTY = 0.03  # subtract per +1 SD if positive
 ACC_ERR_BIAS_SD = 0.05
 ACC_CLIP = (0.75, 0.99)
+
 OUTLIER_RATE_ANTICIP = 0.03
 OUTLIER_RT_ANTICIP_MU = 150
 OUTLIER_RT_ANTICIP_SD = 30
@@ -37,35 +43,37 @@
 OUTLIER_RT_LAPSE_SD = 200
 RT_FLOOR = 80
 
-def simulate(n_subjects=N_SUBJ, n_trials=N_TRIALS_PER_COND, seed=RNG_SEED):
-    global rng, N_SUBJ, N_TRIALS_PER_COND, RNG_SEED
-    
-    # Set globals based on args, primarily for testing
-    N_SUBJ = n_subjects
-    N_TRIALS_PER_COND = n_trials
-    RNG_SEED = seed
-    rng = np.random.default_rng(RNG_SEED)
-    
-    os.makedirs("data/synthetic", exist_ok=True)
+
+def simulate(
+    n_subjects: int = N_SUBJ_DEFAULT,
+    n_trials: int = N_TRIALS_PER_COND_DEFAULT,
+    seed: int = RNG_SEED_DEFAULT,
+    outdir: Path = Path("data/synthetic"),
+):
+    """Simulates Stroop data and writes CSVs to outdir."""
+    rng = np.random.default_rng(seed)
+    outdir.mkdir(parents=True, exist_ok=True)
 
     # subject-level covariates
-    subjects = pd.DataFrame({
-        "subject": np.arange(1, N_SUBJ + 1),
-        "gender": rng.choice(["F", "M"], size=N_SUBJ, p=[0.6, 0.4]),
-        "handedness": rng.choice(["R", "L"], size=N_SUBJ, p=[0.9, 0.1]),
-        # baseline processing speed (lower = faster RT offset)
-        "baseline_speed": rng.normal(loc=0.0, scale=1.0, size=N_SUBJ)
-    })
+    subjects = pd.DataFrame(
+        {
+            "subject": np.arange(1, n_subjects + 1),
+            "gender": rng.choice(["F", "M"], size=n_subjects, p=[0.6, 0.4]),
+            "handedness": rng.choice(["R", "L"], size=n_subjects, p=[0.9, 0.1]),
+            # baseline processing speed (lower = faster RT offset)
+            "baseline_speed": rng.normal(loc=0.0, scale=1.0, size=n_subjects),
+        }
+    )
 
     rows = []
     for _, s in subjects.iterrows():
         # random subject intercept (ms)
-        subj_offset = rng.normal(0, BETWEEN_PERSON_RT_SD)  # between-person variability
+        subj_offset = rng.normal(0, BETWEEN_PERSON_RT_SD)
         # accuracy tendency (fast-but-error-prone archetype)
         err_bias = rng.normal(0, ACC_ERR_BIAS_SD)
 
         for cond in CONDITIONS:
-            for t in range(N_TRIALS_PER_COND):
+            for t in range(n_trials):
                 # base RTs
                 base = BASE_RT_CONGRUENT if cond == "congruent" else BASE_RT_INCONGRUENT
                 # trial noise
@@ -81,37 +89,54 @@ def simulate(n_subjects=N_SUBJ, n_trials=N_TRIALS_PER_COND, seed=RNG_SEED):
 
                 # sprinkle outliers (anticipations & lapses)
                 if rng.random() < OUTLIER_RATE_ANTICIP:
-                    rt = rng.normal(OUTLIER_RT_ANTICIP_MU, OUTLIER_RT_ANTICIP_SD)    # anticipatory
+                    rt = rng.normal(OUTLIER_RT_ANTICIP_MU, OUTLIER_RT_ANTICIP_SD)  # anticipatory
                 if rng.random() < OUTLIER_RATE_LAPSE:
-                    rt = rng.normal(OUTLIER_RT_LAPSE_MU, OUTLIER_RT_LAPSE_SD)      # lapse
+                    rt = rng.normal(OUTLIER_RT_LAPSE_MU, OUTLIER_RT_LAPSE_SD)  # lapse
 
                 rt = max(RT_FLOOR, float(rt))  # keep sane
 
-                rows.append({
-                    "subject": int(s["subject"]),
-                    "condition": cond,
-                    "trial": int(t + 1),
-                    "rt_ms": rt,
-                    "correct": int(correct),
-                })
+                rows.append(
+                    {
+                        "subject": int(s["subject"]),
+                        "condition": cond,
+                        "trial": int(t + 1),
+                        "rt_ms": rt,
+                        "correct": int(correct),
+                    }
+                )
 
     trials = pd.DataFrame(rows)
 
     # write CSVs
-    subjects.to_csv("data/synthetic/psych_stroop_subjects.csv", index=False)
-    trials.to_csv("data/synthetic/psych_stroop_trials.csv", index=False)
-    print("Wrote data/synthetic/psych_stroop_subjects.csv and psych_stroop_trials.csv")
-    
+    subjects_path = outdir / "psych_stroop_subjects.csv"
+    trials_path = outdir / "psych_stroop_trials.csv"
+    subjects.to_csv(subjects_path, index=False)
+    trials.to_csv(trials_path, index=False)
+    print(f"Wrote {subjects_path} and {trials_path}")
+
     # Write meta *after* simulation is done
-    write_meta(subjects, trials)
-    
+    write_meta(
+        subjects=subjects,
+        trials=trials,
+        outdir=outdir,
+        seed=seed,
+        n_trials_per_cond=n_trials,
+    )
+
     return subjects, trials
 
-def write_meta(subjects: pd.DataFrame, trials: pd.DataFrame):
+
+def write_meta(
+    subjects: pd.DataFrame,
+    trials: pd.DataFrame,
+    outdir: Path,
+    seed: int,
+    n_trials_per_cond: int,
+):
     meta = dict(
-        seed=RNG_SEED,
+        seed=seed,
         n_subjects=int(len(subjects)),
-        n_trials_per_cond=int(N_TRIALS_PER_COND),
+        n_trials_per_cond=int(n_trials_per_cond),
         conditions=CONDITIONS,
         rt_ms=dict(
             base_congruent=BASE_RT_CONGRUENT,
@@ -121,10 +146,12 @@ def write_meta(subjects: pd.DataFrame, trials: pd.DataFrame):
             trial_noise_sd=TRIAL_NOISE_SD,
             baseline_speed_effect_ms_per_sd=BASELINE_SPEED_EFFECT,
             outliers=dict(
-                anticipations=dict(rate=OUTLIER_RATE_ANTICIP,
-                                   mu=OUTLIER_RT_ANTICIP_MU, sd=OUTLIER_RT_ANTICIP_SD),
-                lapses=dict(rate=OUTLIER_RATE_LAPSE,
-                            mu=OUTLIER_RT_LAPSE_MU, sd=OUTLIER_RT_LAPSE_SD),
+                anticipations=dict(
+                    rate=OUTLIER_RATE_ANTICIP, mu=OUTLIER_RT_ANTICIP_MU, sd=OUTLIER_RT_ANTICIP_SD
+                ),
+                lapses=dict(
+                    rate=OUTLIER_RATE_LAPSE, mu=OUTLIER_RT_LAPSE_MU, sd=OUTLIER_RT_LAPSE_SD
+                ),
             ),
             floor_ms=RT_FLOOR,
         ),
@@ -137,26 +164,34 @@ def write_meta(subjects: pd.DataFrame, trials: pd.DataFrame):
             clip=list(ACC_CLIP),
         ),
         generated_utc=time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
-        file_rows=dict(
-            subjects=int(len(subjects)),
-            trials=int(len(trials)),
-        ),
+        file_rows=dict(subjects=int(len(subjects)), trials=int(len(trials))),
     )
-    with open("data/synthetic/psych_stroop_meta.json", "w") as f:
+    meta_path = outdir / "psych_stroop_meta.json"
+    with meta_path.open("w") as f:
         json.dump(meta, f, indent=2)
-    print("Wrote data/synthetic/psych_stroop_meta.json")
+    print(f"Wrote {meta_path}")
 
 
 def main():
-    ap = argparse.ArgumentParser()
-    ap.add_argument("--seed", type=int, default=RNG_SEED)
-    ap.add_argument("--n-subjects", type=int, default=N_SUBJ)
-    ap.add_argument("--n-trials", type=int, default=N_TRIALS_PER_COND)
+    ap = argparse.ArgumentParser(description="Simulate trial-level Stroop data")
+    ap.add_argument("--seed", type=int, default=RNG_SEED_DEFAULT)
+    ap.add_argument("--n-subjects", type=int, default=N_SUBJ_DEFAULT)
+    ap.add_argument("--n-trials", type=int, default=N_TRIALS_PER_COND_DEFAULT)
+    ap.add_argument(
+        "--outdir",
+        type=pathlib.Path,
+        default=Path("data/synthetic"),
+        help="Where to write outputs (CSV/JSON).",
+    )
     args = ap.parse_args()
-    
-    # Pass CLI args to simulate
-    simulate(n_subjects=args.n_subjects, n_trials=args.n_trials, seed=args.seed)
+
+    simulate(
+        n_subjects=args.n_subjects,
+        n_trials=args.n_trials,
+        seed=args.seed,
+        outdir=args.outdir,
+    )
 
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/tests/test_cli_smoke.py b/tests/test_cli_smoke.py
new file mode 100644
index 0000000..687a208
--- /dev/null
+++ b/tests/test_cli_smoke.py
@@ -0,0 +1,15 @@
+from __future__ import annotations
+import pathlib, subprocess, sys, tempfile
+
+SCRIPTS = ["ch13_stroop_within","ch13_fitness_mixed","sim_stroop","sim_fitness_2x2"]
+
+def run_module(mod: str) -> None:
+    root = pathlib.Path(__file__).resolve().parents[1]
+    with tempfile.TemporaryDirectory() as tmpd:
+        cmd = [sys.executable, "-m", f"scripts.{mod}", "--outdir", tmpd, "--seed", "42"]
+        res = subprocess.run(cmd, cwd=root, capture_output=True, text=True)
+        assert res.returncode == 0, res.stderr or res.stdout
+
+def test_scripts_run_with_cli():
+    for m in SCRIPTS:
+        run_module(m)
\ No newline at end of file