diff --git a/data/synthetic/fitness_meta.json b/data/synthetic/fitness_meta.json new file mode 100644 index 0000000..f24a6ec --- /dev/null +++ b/data/synthetic/fitness_meta.json @@ -0,0 +1,11 @@ +{ + "seed": 7, + "n": 80, + "design": "2x2 mixed (Group between \u00d7 Time within)", + "programs": [ + "ProgramA", + "ProgramB" + ], + "dv": "strength_1RM_like", + "generated_utc": "2025-11-09T18:46:49Z" +} \ No newline at end of file diff --git a/docs/ch13_fitness.md b/docs/ch13_fitness.md new file mode 100644 index 0000000..a6ab25f --- /dev/null +++ b/docs/ch13_fitness.md @@ -0,0 +1,8 @@ +# Fitness 2×2 Mixed Example (Chapter 13) + +**Data**: `data/synthetic/fitness_subjects.csv`, `data/synthetic/fitness_long.csv`, and `data/synthetic/fitness_meta.json` + +**Run** +```bash +python scripts/sim_fitness_2x2.py +python scripts/ch13_fitness_mixed.py --save-plots \ No newline at end of file diff --git a/scripts/ch13_fitness_mixed.py b/scripts/ch13_fitness_mixed.py new file mode 100644 index 0000000..5eb1491 --- /dev/null +++ b/scripts/ch13_fitness_mixed.py @@ -0,0 +1,78 @@ +# SPDX-License-Identifier: MIT +import argparse, os +import numpy as np, pandas as pd +import statsmodels.formula.api as smf +import matplotlib; matplotlib.use("Agg") +import matplotlib.pyplot as plt + +def hedges_g(a, b): + na, nb = len(a), len(b) + sa, sb = a.var(ddof=1), b.var(ddof=1) + s_p = np.sqrt(((na-1)*sa + (nb-1)*sb)/(na+nb-2)) + d = (a.mean()-b.mean())/s_p + J = 1 - 3/(4*(na+nb)-9) + return d*J + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--data", default="data/synthetic/fitness_long.csv") + ap.add_argument("--save-plots", action="store_true") + args = ap.parse_args() + + df = pd.read_csv(args.data) + df["time"] = pd.Categorical(df["time"], categories=["pre","post"], ordered=True) + df["group"] = pd.Categorical(df["group"]) + + # Mixed model with random intercept per subject + import statsmodels.api as sm + md = sm.MixedLM.from_formula("strength ~ time*group + age + sex + bmi", + groups="id", re_formula="1", data=df) + m = md.fit(method="lbfgs") + + print("=== LMM: strength ~ time*group + age + sex + bmi + (1|id) ===") + print(m.summary()) + + # Within-group pre→post paired tests + from scipy import stats + print("\n=== Within-group pre→post (paired) ===") + for g in df["group"].cat.categories: + wide = (df[df.group==g] + .pivot_table(index="id", columns="time", values="strength", observed=False)) + pre, post = wide["pre"], wide["post"] + diff = (post - pre).dropna() + mean = diff.mean(); sd = diff.std(ddof=1); n = diff.shape[0] + t = mean / (sd/np.sqrt(n)) + p = 2*stats.t.sf(abs(t), df=n-1) + d_paired = mean / sd + print(f"{g:8s} n={n:2d} Δ={mean:6.2f} t={t:6.2f} p={p:.3g} d_paired={d_paired:.2f}") + + # Between-group at post (Welch t) + Hedges g + post = df[df.time=="post"] + cats = post.group.cat.categories + gA = post[post.group==cats[0]]["strength"] + gB = post[post.group==cats[1]]["strength"] + t2, p2 = stats.ttest_ind(gA, gB, equal_var=False) + g = hedges_g(gA, gB) + print("\n=== Between groups at post (Welch t) ===") + print(f"t={t2:.2f} p={p2:.3g} Hedges g={g:.2f}") + + if args.save_plots: + os.makedirs("outputs", exist_ok=True) + fig, ax = plt.subplots(figsize=(6,4)) + # spaghetti + for (grp, sid), sub in df.sort_values("time").groupby(["group","id"], observed=False): + ax.plot([0,1], sub["strength"].values, alpha=0.2) + # thick means per group + mean_pre = df[df.time=="pre" ].groupby("group", observed=False)["strength"].mean() + mean_post = df[df.time=="post"].groupby("group", observed=False)["strength"].mean() + for gname in df["group"].cat.categories: + ax.plot([0,1], [mean_pre[gname], mean_post[gname]], linewidth=3, label=gname) + ax.set_xticks([0,1]); ax.set_xticklabels(["pre","post"]) + ax.set_ylabel("Strength") + ax.legend() + fig.tight_layout() + fp = "outputs/ch13_fitness_spaghetti.png"; fig.savefig(fp, dpi=150) + print("Saved plot ->", fp) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/sim_fitness_2x2.py b/scripts/sim_fitness_2x2.py index 4e0d00f..c4aac84 100644 --- a/scripts/sim_fitness_2x2.py +++ b/scripts/sim_fitness_2x2.py @@ -1,10 +1,11 @@ # SPDX-License-Identifier: MIT import os, json, time import numpy as np, pandas as pd -rng = np.random.default_rng(7) +rng = np.random.default_rng(7) N = 80 GROUPS = ["ProgramA","ProgramB"] + def simulate(): os.makedirs("data/synthetic", exist_ok=True) subjects = pd.DataFrame({ @@ -14,25 +15,40 @@ def simulate(): "sex": rng.choice(["F","M"], size=N, p=[0.5,0.5]), "bmi": rng.normal(24, 3.5, size=N).clip(17, 38), }) - # baseline strength varies by covariates a bit - base = 80 + 0.8*(subjects["age"]-30) + 1.2*(subjects["bmi"]-24) + rng.normal(0, 8, N) - # program effects (post - pre), slightly larger for ProgramB - gain_A = rng.normal(8, 4, N) - gain_B = rng.normal(12, 4, N) - gain = np.where(subjects["group"]=="ProgramA", gain_A, gain_B) + + # baseline strength with small covariate effects + base = (80 + + 0.8*(subjects["age"]-30) + + 1.2*(subjects["bmi"]-24) + + rng.normal(0, 8, size=N)) + + # program gains + gain_A = rng.normal(8, 4, size=N) + gain_B = rng.normal(12, 4, size=N) + gain = np.where(subjects["group"].to_numpy()=="ProgramA", gain_A, gain_B) long = pd.concat([ - pd.DataFrame({"id": subjects["id"], "time":"pre", "strength": base + rng.normal(0, 3, N)}), - pd.DataFrame({"id": subjects["id"], "time":"post", "strength": base + gain + rng.normal(0, 3, N)}) + pd.DataFrame({"id": subjects["id"], "time":"pre", + "strength": base + rng.normal(0, 3, size=N)}), + pd.DataFrame({"id": subjects["id"], "time":"post", + "strength": base + gain + rng.normal(0, 3, size=N)}) ], ignore_index=True).merge(subjects, on="id") subjects.to_csv("data/synthetic/fitness_subjects.csv", index=False) long.to_csv("data/synthetic/fitness_long.csv", index=False) + meta = dict( - seed=7, n= int(N), design="2x2 mixed (Group between × Time within)", - programs=GROUPS, dv="strength_1RM_like", + seed=7, + n=int(N), + design="2x2 mixed (Group between × Time within)", + programs=GROUPS, + dv="strength_1RM_like", generated_utc=time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) ) - with open("data/synthetic/fitness_meta.json","w") as f: json.dump(meta, f, indent=2) + with open("data/synthetic/fitness_meta.json","w") as f: + json.dump(meta, f, indent=2) + print("Wrote fitness_subjects.csv, fitness_long.csv, fitness_meta.json") -if __name__ == "__main__": simulate() + +if __name__ == "__main__": + simulate() \ No newline at end of file