Skip to content

Commit 88ec66d

Browse files
committed
Add comm cost extraction and visualization scripts, update benchmarks and EKS configs
1 parent 5670080 commit 88ec66d

23 files changed

Lines changed: 1080 additions & 18 deletions
137 KB
Loading
136 KB
Loading
138 KB
Loading
136 KB
Loading
139 KB
Loading
137 KB
Loading
137 KB
Loading
Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,255 @@
1+
import re
2+
3+
import matplotlib.pyplot as plt
4+
import numpy as np
5+
import pandas as pd
6+
import seaborn as sns
7+
8+
9+
def extract_comm_costs(logfile):
10+
with open(logfile, "r") as f:
11+
log_content = f.read()
12+
13+
experiments = re.split(r"-{80}\nRunning experiment \d+/\d+:", log_content)
14+
results = []
15+
16+
for exp in experiments[1:]:
17+
algo_match = re.search(r"Algorithm: (\w+)", exp)
18+
dataset_match = re.search(r"Dataset: ([A-Z0-9-]+)", exp)
19+
trainers_match = re.search(r"Trainers: (\d+)", exp)
20+
accuracy_match = re.search(r"Average test accuracy: ([\d.]+)", exp)
21+
22+
if not (algo_match and dataset_match and trainers_match):
23+
continue
24+
25+
algo = algo_match.group(1)
26+
dataset = dataset_match.group(1)
27+
trainers = trainers_match.group(1)
28+
accuracy = float(accuracy_match.group(1)) if accuracy_match else None
29+
30+
theoretical_pretrain = re.findall(
31+
r"//Log Theoretical Pretrain Comm Cost: ([\d.]+) MB //end", exp
32+
)
33+
theoretical_train = re.findall(
34+
r"//Log Theoretical Train Comm Cost: ([\d.]+) MB //end", exp
35+
)
36+
37+
actual_pretrain = re.search(
38+
r"//Log Total Actual Pretrain Comm Cost: ([\d.]+) MB //end", exp
39+
)
40+
actual_train = re.search(
41+
r"//Log Total Actual Train Comm Cost: ([\d.]+) MB //end", exp
42+
)
43+
44+
if not (theoretical_pretrain and theoretical_train):
45+
continue
46+
47+
result = {
48+
"Algorithm": algo,
49+
"Dataset": dataset,
50+
"Trainers": int(trainers),
51+
"Theoretical_Pretrain_MB": float(theoretical_pretrain[-1])
52+
if theoretical_pretrain
53+
else 0,
54+
"Theoretical_Train_MB": float(theoretical_train[-1])
55+
if theoretical_train
56+
else 0,
57+
"Actual_Pretrain_MB": float(actual_pretrain.group(1))
58+
if actual_pretrain
59+
else None,
60+
"Actual_Train_MB": float(actual_train.group(1)) if actual_train else None,
61+
"Accuracy": accuracy,
62+
}
63+
64+
result["Theoretical_Total_MB"] = (
65+
result["Theoretical_Pretrain_MB"] + result["Theoretical_Train_MB"]
66+
)
67+
68+
if (
69+
result["Actual_Pretrain_MB"] is not None
70+
and result["Actual_Train_MB"] is not None
71+
):
72+
result["Actual_Total_MB"] = (
73+
result["Actual_Pretrain_MB"] + result["Actual_Train_MB"]
74+
)
75+
76+
if (
77+
result["Theoretical_Pretrain_MB"] > 0
78+
and result["Actual_Pretrain_MB"] > 0
79+
):
80+
result["Pretrain_Ratio"] = (
81+
result["Actual_Pretrain_MB"] / result["Theoretical_Pretrain_MB"]
82+
)
83+
else:
84+
result["Pretrain_Ratio"] = (
85+
float("inf")
86+
if result["Actual_Pretrain_MB"] and result["Actual_Pretrain_MB"] > 0
87+
else None
88+
)
89+
90+
if result["Theoretical_Train_MB"] > 0:
91+
result["Train_Ratio"] = (
92+
result["Actual_Train_MB"] / result["Theoretical_Train_MB"]
93+
)
94+
else:
95+
result["Train_Ratio"] = (
96+
float("inf")
97+
if result["Actual_Train_MB"] and result["Actual_Train_MB"] > 0
98+
else None
99+
)
100+
101+
if result["Theoretical_Total_MB"] > 0:
102+
result["Total_Ratio"] = (
103+
result["Actual_Total_MB"] / result["Theoretical_Total_MB"]
104+
)
105+
else:
106+
result["Total_Ratio"] = (
107+
float("inf")
108+
if result["Actual_Total_MB"] and result["Actual_Total_MB"] > 0
109+
else None
110+
)
111+
112+
results.append(result)
113+
114+
return pd.DataFrame(results)
115+
116+
117+
def generate_dataset_comparisons(df, output_prefix="comm_cost"):
118+
comparison_data = (
119+
df.groupby(["Dataset", "Algorithm"])
120+
.agg(
121+
{
122+
"Theoretical_Pretrain_MB": "mean",
123+
"Theoretical_Train_MB": "mean",
124+
"Theoretical_Total_MB": "mean",
125+
"Actual_Pretrain_MB": "mean",
126+
"Actual_Train_MB": "mean",
127+
"Actual_Total_MB": "mean",
128+
"Train_Ratio": "mean",
129+
"Accuracy": "mean",
130+
}
131+
)
132+
.reset_index()
133+
)
134+
135+
comparison_data.to_csv(
136+
f"{output_prefix}_dataset_algorithm_comparison.csv", index=False
137+
)
138+
139+
datasets = df["Dataset"].unique()
140+
report_tables = []
141+
142+
for dataset in datasets:
143+
dataset_data = comparison_data[comparison_data["Dataset"] == dataset]
144+
145+
table_rows = []
146+
for _, row in dataset_data.iterrows():
147+
table_row = {
148+
"Algorithm": row["Algorithm"],
149+
"Theoretical Train (MB)": f"{row['Theoretical_Train_MB']:.2f}",
150+
"Actual Train (MB)": f"{row['Actual_Train_MB']:.2f}"
151+
if pd.notna(row["Actual_Train_MB"])
152+
else "N/A",
153+
"Train Overhead (MB)": f"{row['Actual_Train_MB'] - row['Theoretical_Train_MB']:.2f}"
154+
if pd.notna(row["Actual_Train_MB"])
155+
else "N/A",
156+
"Accuracy": f"{row['Accuracy']:.4f}"
157+
if pd.notna(row["Accuracy"])
158+
else "N/A",
159+
}
160+
table_rows.append(table_row)
161+
162+
dataset_table = pd.DataFrame(table_rows)
163+
dataset_table.to_csv(f"{output_prefix}_{dataset}_comparison.csv", index=False)
164+
report_tables.append((dataset, dataset_table))
165+
166+
# Create visualization for theoretical vs actual training communication costs
167+
plt.figure(figsize=(12, 8))
168+
plot_data = pd.melt(
169+
dataset_data,
170+
id_vars=["Algorithm"],
171+
value_vars=["Theoretical_Train_MB", "Actual_Train_MB"],
172+
var_name="Type",
173+
value_name="Communication Cost (MB)",
174+
)
175+
ax = sns.barplot(
176+
x="Algorithm", y="Communication Cost (MB)", hue="Type", data=plot_data
177+
)
178+
plt.title(f"{dataset} - Theoretical vs Actual Training Communication Costs")
179+
plt.xticks(rotation=45)
180+
plt.tight_layout()
181+
plt.savefig(f"{output_prefix}_{dataset}_train_comparison.png", dpi=300)
182+
plt.close()
183+
184+
return report_tables
185+
186+
187+
def generate_report(logfile, output_prefix="comm_cost"):
188+
df = extract_comm_costs(logfile)
189+
if df.empty:
190+
print("No communication cost data found in log file.")
191+
return None
192+
193+
df.to_csv(f"{output_prefix}_raw.csv", index=False)
194+
195+
report_tables = generate_dataset_comparisons(df, output_prefix)
196+
197+
consolidated_report = pd.DataFrame()
198+
199+
for dataset, dataset_table in report_tables:
200+
dataset_table["Dataset"] = dataset
201+
consolidated_report = pd.concat([consolidated_report, dataset_table])
202+
203+
consolidated_report.to_csv(f"{output_prefix}_consolidated_report.csv", index=False)
204+
205+
algorithm_summary = (
206+
df.groupby("Algorithm")
207+
.agg(
208+
{
209+
"Theoretical_Train_MB": "mean",
210+
"Actual_Train_MB": "mean",
211+
"Accuracy": "mean",
212+
}
213+
)
214+
.reset_index()
215+
)
216+
217+
algorithm_summary["Average Overhead (MB)"] = (
218+
algorithm_summary["Actual_Train_MB"] - algorithm_summary["Theoretical_Train_MB"]
219+
)
220+
221+
algorithm_summary.to_csv(f"{output_prefix}_algorithm_summary.csv", index=False)
222+
223+
return consolidated_report
224+
225+
226+
if __name__ == "__main__":
227+
import sys
228+
229+
logfile = "GC.log"
230+
if len(sys.argv) > 1:
231+
logfile = sys.argv[1]
232+
233+
output_prefix = "comm_cost"
234+
if len(sys.argv) > 2:
235+
output_prefix = sys.argv[2]
236+
237+
consolidated_report = generate_report(logfile, output_prefix)
238+
239+
if consolidated_report is not None:
240+
print("\nComparison by Dataset and Algorithm:")
241+
for dataset in consolidated_report["Dataset"].unique():
242+
print(f"\n=== Dataset: {dataset} ===")
243+
dataset_data = consolidated_report[
244+
consolidated_report["Dataset"] == dataset
245+
]
246+
print(
247+
dataset_data[
248+
[
249+
"Algorithm",
250+
"Theoretical Train (MB)",
251+
"Actual Train (MB)",
252+
"Accuracy",
253+
]
254+
]
255+
)
118 KB
Loading
120 KB
Loading

0 commit comments

Comments
 (0)