Skip to content

Commit 3ff8466

Browse files
committed
refined NC figures
1 parent 5068165 commit 3ff8466

10 files changed

+40
-29
lines changed

benchmark/figure/NC_comm_costs_old/extract_NC_log.py

Lines changed: 40 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,9 @@ def extract_nc_data(logfile):
4343
iid_beta = float(iid_beta_match.group(1))
4444
algo_match = re.search(r"method': '([A-Za-z0-9+_]+)'", exp)
4545
if not algo_match:
46-
algo_match = re.search(r"Changing method to ([A-Za-z0-9+_]+)", exp)
46+
algo_match = re.search(r"Changing method to ([A-Za-z0-9+_]+)", exp)
4747
algorithm = algo_match.group(1).strip() if algo_match else "FedAvg"
48-
if dataset not in ["cora", "citeseer", "pubmed", "ogbn-arxiv"]:
48+
if dataset not in ["cora", "citeseer", "pubmed"]: #, "ogbn-arxiv"
4949
continue
5050
result = extract_metrics(exp, algorithm, dataset, trainers, iid_beta)
5151
if result:
@@ -63,7 +63,8 @@ def extract_metrics(exp_text, algorithm, dataset, trainers, iid_beta):
6363
else:
6464
accuracy = float(final_accuracy_match.group(1))
6565
train_time_match = re.search(r"//train_time: ([\d.]+) ms//end", exp_text)
66-
train_time = float(train_time_match.group(1)) if train_time_match else None
66+
train_time_ms = float(train_time_match.group(1)) if train_time_match else None
67+
train_time_s = train_time_ms / 1000.0 if train_time_ms is not None else None
6768
theoretical_pretrain = re.findall(
6869
r"//Log Theoretical Pretrain Comm Cost: ([\d.]+) MB //end", exp_text
6970
)
@@ -91,7 +92,8 @@ def extract_metrics(exp_text, algorithm, dataset, trainers, iid_beta):
9192
"Trainers": trainers,
9293
"IID_Beta": iid_beta,
9394
"Accuracy": accuracy,
94-
"Train_Time_ms": train_time,
95+
"Train_Time_ms": train_time_ms,
96+
"Train_Time_s": train_time_s,
9597
"Theoretical_Pretrain_MB": float(theoretical_pretrain[-1])
9698
if theoretical_pretrain
9799
else 0,
@@ -119,12 +121,12 @@ def extract_metrics(exp_text, algorithm, dataset, trainers, iid_beta):
119121

120122

121123
def plot_metric(df, metric, ylabel, filename_prefix):
122-
datasets = ["cora", "citeseer", "pubmed", "ogbn-arxiv"]
123-
algorithms = ["FedAvg", "fedgcn"]
124-
colors = {"FedAvg": "#1f77b4", "fedgcn": "#ff7f0e"}
124+
datasets = ["cora", "citeseer", "pubmed"] #, "ogbn-arxiv"
125+
algorithms = ["FedAvg", "FedGCN"]
126+
colors = {"FedAvg": "#1f77b4", "FedGCN": "#ff7f0e"}
125127
target_betas = [10000.0, 100.0, 10.0]
126128
for beta in target_betas:
127-
plt.figure(figsize=(10, 6))
129+
plt.figure(figsize=(12, 6))
128130
df_beta = df[df["IID_Beta"] == beta]
129131
x_positions = np.arange(len(datasets))
130132
width = 0.35
@@ -134,7 +136,8 @@ def plot_metric(df, metric, ylabel, filename_prefix):
134136
for dataset in datasets:
135137
temp = df_algo[df_algo["Dataset"] == dataset]
136138
if not temp.empty and not pd.isna(temp[metric].values[0]):
137-
values.append(temp[metric].values[0])
139+
val = temp[metric].values[0]
140+
values.append(val)
138141
else:
139142
values.append(0)
140143
plt.bar(
@@ -144,33 +147,36 @@ def plot_metric(df, metric, ylabel, filename_prefix):
144147
label=algo,
145148
color=colors[algo],
146149
)
147-
if metric == "Train_Time_ms":
148-
plt.yscale("log")
149-
plt.title(f"{ylabel} (IID Beta={beta})", fontsize=26)
150-
plt.xlabel("Dataset", fontsize=26)
150+
# plt.title(f"{ylabel} (IID Beta={beta})", fontsize=26)
151+
# plt.xlabel("Dataset", fontsize=26)
151152
plt.ylabel(ylabel, fontsize=24)
152-
plt.xticks(x_positions + width / 2, datasets, rotation=45, fontsize=24)
153+
pretty_names = ["Cora", "Citeseer", "Pubmed"]
154+
plt.xticks(x_positions + width / 2, pretty_names, rotation=0, fontsize=24)
153155
plt.yticks(fontsize=24)
154-
plt.legend(fontsize=24)
156+
plt.legend(
157+
loc="upper left",
158+
bbox_to_anchor=(1, 1),
159+
fontsize=24,
160+
)
155161
plt.tight_layout()
156162
plt.savefig(f"{filename_prefix}_beta{int(beta)}.pdf", dpi=300)
157163
plt.close()
158164

159165

160166
def plot_comm_cost(df):
161-
datasets = ["cora", "citeseer", "pubmed", "ogbn-arxiv"]
162-
algorithms = ["FedAvg", "fedgcn"]
163-
actual_colors = {"FedAvg": "#1f77b4", "fedgcn": "#ff7f0e"}
167+
datasets = ["cora", "citeseer", "pubmed"] #, "ogbn-arxiv"
168+
algorithms = ["FedAvg", "FedGCN"]
169+
actual_colors = {"FedAvg": "#1f77b4", "FedGCN": "#ff7f0e"}
164170
theoretical_colors = {
165171
"FedAvg": "#aec7e8",
166-
"fedgcn_pretrain": "#c5b0d5",
167-
"fedgcn_train": "#98df8a",
172+
"FedGCN_Pretrain": "#c5b0d5",
173+
"FedGCN_Train": "#98df8a",
168174
}
169175
pretrain_colors_actual = "#2ca02c"
170176
target_betas = [10000.0, 100.0, 10.0]
171177

172178
for beta in target_betas:
173-
plt.figure(figsize=(10, 6))
179+
plt.figure(figsize=(12, 6))
174180
df_beta = df[df["IID_Beta"] == beta]
175181
x_positions = np.arange(len(datasets))
176182
width = 0.18
@@ -246,20 +252,21 @@ def plot_comm_cost(df):
246252
xpos_theo,
247253
pretrain_theo,
248254
width=width,
249-
color=theoretical_colors["fedgcn_pretrain"],
255+
color=theoretical_colors["FedGCN_Pretrain"],
250256
)
251257
plt.bar(
252258
xpos_theo,
253259
train_theo,
254260
width=width,
255261
bottom=pretrain_theo,
256-
color=theoretical_colors["fedgcn_train"],
262+
color=theoretical_colors["FedGCN_Train"],
257263
)
258264

259-
plt.title(f"Communication Cost (IID Beta={beta})", fontsize=22)
260-
plt.xlabel("Dataset", fontsize=22)
265+
# plt.title(f"Communication Cost (IID Beta={beta})", fontsize=22)
266+
# plt.xlabel("Dataset", fontsize=22)
261267
plt.ylabel("Communication Cost (MB)", fontsize=22)
262-
plt.xticks(x_positions, datasets, rotation=45, fontsize=22)
268+
pretty_names = ["Cora", "Citeseer", "Pubmed"]
269+
plt.xticks(x_positions, pretty_names, rotation=0, fontsize=22)
263270
plt.yticks(fontsize=24)
264271
plt.grid(axis="y", linestyle="--", alpha=0.5)
265272

@@ -283,7 +290,7 @@ def plot_comm_cost(df):
283290
],
284291
loc="upper left",
285292
bbox_to_anchor=(1, 1),
286-
fontsize=16,
293+
fontsize=14,
287294
)
288295

289296
plt.tight_layout()
@@ -325,9 +332,13 @@ def process_all_log_files(log_folder):
325332
else:
326333
df = process_all_log_files(os.getcwd())
327334
if not df.empty:
328-
df.to_csv("nc_data_raw.csv", index=False)
335+
# Only save ms to CSV
336+
df_csv = df.copy()
337+
if "Train_Time_s" in df_csv.columns:
338+
df_csv = df_csv.drop(columns=["Train_Time_s"])
339+
df_csv.to_csv("nc_data_raw.csv", index=False)
329340
plot_metric(df, "Accuracy", "Accuracy", "nc_accuracy_comparison")
330341
plot_metric(
331-
df, "Train_Time_ms", "Training Time (ms)", "nc_train_time_comparison"
342+
df, "Train_Time_s", "Training Time (s)", "nc_train_time_comparison"
332343
)
333344
plot_comm_cost(df)
-868 Bytes
Binary file not shown.
-878 Bytes
Binary file not shown.
Binary file not shown.
-1014 Bytes
Binary file not shown.
-1012 Bytes
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)