Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions data/synthetic/fitness_meta.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"seed": 7,
"n": 80,
"design": "2x2 mixed (Group between \u00d7 Time within)",
"programs": [
"ProgramA",
"ProgramB"
],
"dv": "strength_1RM_like",
"generated_utc": "2025-11-09T18:46:49Z"
}
8 changes: 8 additions & 0 deletions docs/ch13_fitness.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Fitness 2×2 Mixed Example (Chapter 13)

**Data**: `data/synthetic/fitness_subjects.csv`, `data/synthetic/fitness_long.csv`, and `data/synthetic/fitness_meta.json`

**Run**
```bash
python scripts/sim_fitness_2x2.py
python scripts/ch13_fitness_mixed.py --save-plots
78 changes: 78 additions & 0 deletions scripts/ch13_fitness_mixed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# SPDX-License-Identifier: MIT
import argparse, os
import numpy as np, pandas as pd
import statsmodels.formula.api as smf
import matplotlib; matplotlib.use("Agg")
import matplotlib.pyplot as plt

def hedges_g(a, b):
na, nb = len(a), len(b)
sa, sb = a.var(ddof=1), b.var(ddof=1)
s_p = np.sqrt(((na-1)*sa + (nb-1)*sb)/(na+nb-2))
d = (a.mean()-b.mean())/s_p
J = 1 - 3/(4*(na+nb)-9)
return d*J

def main():
ap = argparse.ArgumentParser()
ap.add_argument("--data", default="data/synthetic/fitness_long.csv")
ap.add_argument("--save-plots", action="store_true")
args = ap.parse_args()

df = pd.read_csv(args.data)
df["time"] = pd.Categorical(df["time"], categories=["pre","post"], ordered=True)
df["group"] = pd.Categorical(df["group"])

# Mixed model with random intercept per subject
import statsmodels.api as sm
md = sm.MixedLM.from_formula("strength ~ time*group + age + sex + bmi",
groups="id", re_formula="1", data=df)
m = md.fit(method="lbfgs")

print("=== LMM: strength ~ time*group + age + sex + bmi + (1|id) ===")
print(m.summary())

# Within-group pre→post paired tests
from scipy import stats
print("\n=== Within-group pre→post (paired) ===")
for g in df["group"].cat.categories:
wide = (df[df.group==g]
.pivot_table(index="id", columns="time", values="strength", observed=False))
pre, post = wide["pre"], wide["post"]
diff = (post - pre).dropna()
mean = diff.mean(); sd = diff.std(ddof=1); n = diff.shape[0]
t = mean / (sd/np.sqrt(n))
p = 2*stats.t.sf(abs(t), df=n-1)
d_paired = mean / sd
print(f"{g:8s} n={n:2d} Δ={mean:6.2f} t={t:6.2f} p={p:.3g} d_paired={d_paired:.2f}")

# Between-group at post (Welch t) + Hedges g
post = df[df.time=="post"]
cats = post.group.cat.categories
gA = post[post.group==cats[0]]["strength"]
gB = post[post.group==cats[1]]["strength"]
t2, p2 = stats.ttest_ind(gA, gB, equal_var=False)
g = hedges_g(gA, gB)
print("\n=== Between groups at post (Welch t) ===")
print(f"t={t2:.2f} p={p2:.3g} Hedges g={g:.2f}")

if args.save_plots:
os.makedirs("outputs", exist_ok=True)
fig, ax = plt.subplots(figsize=(6,4))
# spaghetti
for (grp, sid), sub in df.sort_values("time").groupby(["group","id"], observed=False):
ax.plot([0,1], sub["strength"].values, alpha=0.2)
# thick means per group
mean_pre = df[df.time=="pre" ].groupby("group", observed=False)["strength"].mean()
mean_post = df[df.time=="post"].groupby("group", observed=False)["strength"].mean()
for gname in df["group"].cat.categories:
ax.plot([0,1], [mean_pre[gname], mean_post[gname]], linewidth=3, label=gname)
ax.set_xticks([0,1]); ax.set_xticklabels(["pre","post"])
ax.set_ylabel("Strength")
ax.legend()
fig.tight_layout()
fp = "outputs/ch13_fitness_spaghetti.png"; fig.savefig(fp, dpi=150)
print("Saved plot ->", fp)

if __name__ == "__main__":
main()
42 changes: 29 additions & 13 deletions scripts/sim_fitness_2x2.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
# SPDX-License-Identifier: MIT
import os, json, time
import numpy as np, pandas as pd
rng = np.random.default_rng(7)

rng = np.random.default_rng(7)
N = 80
GROUPS = ["ProgramA","ProgramB"]

def simulate():
os.makedirs("data/synthetic", exist_ok=True)
subjects = pd.DataFrame({
Expand All @@ -14,25 +15,40 @@ def simulate():
"sex": rng.choice(["F","M"], size=N, p=[0.5,0.5]),
"bmi": rng.normal(24, 3.5, size=N).clip(17, 38),
})
# baseline strength varies by covariates a bit
base = 80 + 0.8*(subjects["age"]-30) + 1.2*(subjects["bmi"]-24) + rng.normal(0, 8, N)
# program effects (post - pre), slightly larger for ProgramB
gain_A = rng.normal(8, 4, N)
gain_B = rng.normal(12, 4, N)
gain = np.where(subjects["group"]=="ProgramA", gain_A, gain_B)

# baseline strength with small covariate effects
base = (80
+ 0.8*(subjects["age"]-30)
+ 1.2*(subjects["bmi"]-24)
+ rng.normal(0, 8, size=N))

# program gains
gain_A = rng.normal(8, 4, size=N)
gain_B = rng.normal(12, 4, size=N)
gain = np.where(subjects["group"].to_numpy()=="ProgramA", gain_A, gain_B)

long = pd.concat([
pd.DataFrame({"id": subjects["id"], "time":"pre", "strength": base + rng.normal(0, 3, N)}),
pd.DataFrame({"id": subjects["id"], "time":"post", "strength": base + gain + rng.normal(0, 3, N)})
pd.DataFrame({"id": subjects["id"], "time":"pre",
"strength": base + rng.normal(0, 3, size=N)}),
pd.DataFrame({"id": subjects["id"], "time":"post",
"strength": base + gain + rng.normal(0, 3, size=N)})
], ignore_index=True).merge(subjects, on="id")

subjects.to_csv("data/synthetic/fitness_subjects.csv", index=False)
long.to_csv("data/synthetic/fitness_long.csv", index=False)

meta = dict(
seed=7, n= int(N), design="2x2 mixed (Group between × Time within)",
programs=GROUPS, dv="strength_1RM_like",
seed=7,
n=int(N),
design="2x2 mixed (Group between × Time within)",
programs=GROUPS,
dv="strength_1RM_like",
generated_utc=time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
)
with open("data/synthetic/fitness_meta.json","w") as f: json.dump(meta, f, indent=2)
with open("data/synthetic/fitness_meta.json","w") as f:
json.dump(meta, f, indent=2)

print("Wrote fitness_subjects.csv, fitness_long.csv, fitness_meta.json")
if __name__ == "__main__": simulate()

if __name__ == "__main__":
simulate()