@@ -43,9 +43,9 @@ def extract_nc_data(logfile):
4343 iid_beta = float (iid_beta_match .group (1 ))
4444 algo_match = re .search (r"method': '([A-Za-z0-9+_]+)'" , exp )
4545 if not algo_match :
46- algo_match = re .search (r"Changing method to ([A-Za-z0-9+_]+)" , exp )
46+ algo_match = re .search (r"Changing method to ([A-Za-z0-9+_]+)" , exp )
4747 algorithm = algo_match .group (1 ).strip () if algo_match else "FedAvg"
48- if dataset not in ["cora" , "citeseer" , "pubmed" , "ogbn-arxiv" ]:
48+ if dataset not in ["cora" , "citeseer" , "pubmed" ]: # , "ogbn-arxiv"
4949 continue
5050 result = extract_metrics (exp , algorithm , dataset , trainers , iid_beta )
5151 if result :
@@ -63,7 +63,8 @@ def extract_metrics(exp_text, algorithm, dataset, trainers, iid_beta):
6363 else :
6464 accuracy = float (final_accuracy_match .group (1 ))
6565 train_time_match = re .search (r"//train_time: ([\d.]+) ms//end" , exp_text )
66- train_time = float (train_time_match .group (1 )) if train_time_match else None
66+ train_time_ms = float (train_time_match .group (1 )) if train_time_match else None
67+ train_time_s = train_time_ms / 1000.0 if train_time_ms is not None else None
6768 theoretical_pretrain = re .findall (
6869 r"//Log Theoretical Pretrain Comm Cost: ([\d.]+) MB //end" , exp_text
6970 )
@@ -91,7 +92,8 @@ def extract_metrics(exp_text, algorithm, dataset, trainers, iid_beta):
9192 "Trainers" : trainers ,
9293 "IID_Beta" : iid_beta ,
9394 "Accuracy" : accuracy ,
94- "Train_Time_ms" : train_time ,
95+ "Train_Time_ms" : train_time_ms ,
96+ "Train_Time_s" : train_time_s ,
9597 "Theoretical_Pretrain_MB" : float (theoretical_pretrain [- 1 ])
9698 if theoretical_pretrain
9799 else 0 ,
@@ -119,12 +121,12 @@ def extract_metrics(exp_text, algorithm, dataset, trainers, iid_beta):
119121
120122
121123def plot_metric (df , metric , ylabel , filename_prefix ):
122- datasets = ["cora" , "citeseer" , "pubmed" , "ogbn-arxiv" ]
123- algorithms = ["FedAvg" , "fedgcn " ]
124- colors = {"FedAvg" : "#1f77b4" , "fedgcn " : "#ff7f0e" }
124+ datasets = ["cora" , "citeseer" , "pubmed" ] # , "ogbn-arxiv"
125+ algorithms = ["FedAvg" , "FedGCN " ]
126+ colors = {"FedAvg" : "#1f77b4" , "FedGCN " : "#ff7f0e" }
125127 target_betas = [10000.0 , 100.0 , 10.0 ]
126128 for beta in target_betas :
127- plt .figure (figsize = (10 , 6 ))
129+ plt .figure (figsize = (12 , 6 ))
128130 df_beta = df [df ["IID_Beta" ] == beta ]
129131 x_positions = np .arange (len (datasets ))
130132 width = 0.35
@@ -134,7 +136,8 @@ def plot_metric(df, metric, ylabel, filename_prefix):
134136 for dataset in datasets :
135137 temp = df_algo [df_algo ["Dataset" ] == dataset ]
136138 if not temp .empty and not pd .isna (temp [metric ].values [0 ]):
137- values .append (temp [metric ].values [0 ])
139+ val = temp [metric ].values [0 ]
140+ values .append (val )
138141 else :
139142 values .append (0 )
140143 plt .bar (
@@ -144,33 +147,36 @@ def plot_metric(df, metric, ylabel, filename_prefix):
144147 label = algo ,
145148 color = colors [algo ],
146149 )
147- if metric == "Train_Time_ms" :
148- plt .yscale ("log" )
149- plt .title (f"{ ylabel } (IID Beta={ beta } )" , fontsize = 26 )
150- plt .xlabel ("Dataset" , fontsize = 26 )
150+ # plt.title(f"{ylabel} (IID Beta={beta})", fontsize=26)
151+ # plt.xlabel("Dataset", fontsize=26)
151152 plt .ylabel (ylabel , fontsize = 24 )
152- plt .xticks (x_positions + width / 2 , datasets , rotation = 45 , fontsize = 24 )
153+ pretty_names = ["Cora" , "Citeseer" , "Pubmed" ]
154+ plt .xticks (x_positions + width / 2 , pretty_names , rotation = 0 , fontsize = 24 )
153155 plt .yticks (fontsize = 24 )
154- plt .legend (fontsize = 24 )
156+ plt .legend (
157+ loc = "upper left" ,
158+ bbox_to_anchor = (1 , 1 ),
159+ fontsize = 24 ,
160+ )
155161 plt .tight_layout ()
156162 plt .savefig (f"{ filename_prefix } _beta{ int (beta )} .pdf" , dpi = 300 )
157163 plt .close ()
158164
159165
160166def plot_comm_cost (df ):
161- datasets = ["cora" , "citeseer" , "pubmed" , "ogbn-arxiv" ]
162- algorithms = ["FedAvg" , "fedgcn " ]
163- actual_colors = {"FedAvg" : "#1f77b4" , "fedgcn " : "#ff7f0e" }
167+ datasets = ["cora" , "citeseer" , "pubmed" ] # , "ogbn-arxiv"
168+ algorithms = ["FedAvg" , "FedGCN " ]
169+ actual_colors = {"FedAvg" : "#1f77b4" , "FedGCN " : "#ff7f0e" }
164170 theoretical_colors = {
165171 "FedAvg" : "#aec7e8" ,
166- "fedgcn_pretrain " : "#c5b0d5" ,
167- "fedgcn_train " : "#98df8a" ,
172+ "FedGCN_Pretrain " : "#c5b0d5" ,
173+ "FedGCN_Train " : "#98df8a" ,
168174 }
169175 pretrain_colors_actual = "#2ca02c"
170176 target_betas = [10000.0 , 100.0 , 10.0 ]
171177
172178 for beta in target_betas :
173- plt .figure (figsize = (10 , 6 ))
179+ plt .figure (figsize = (12 , 6 ))
174180 df_beta = df [df ["IID_Beta" ] == beta ]
175181 x_positions = np .arange (len (datasets ))
176182 width = 0.18
@@ -246,20 +252,21 @@ def plot_comm_cost(df):
246252 xpos_theo ,
247253 pretrain_theo ,
248254 width = width ,
249- color = theoretical_colors ["fedgcn_pretrain " ],
255+ color = theoretical_colors ["FedGCN_Pretrain " ],
250256 )
251257 plt .bar (
252258 xpos_theo ,
253259 train_theo ,
254260 width = width ,
255261 bottom = pretrain_theo ,
256- color = theoretical_colors ["fedgcn_train " ],
262+ color = theoretical_colors ["FedGCN_Train " ],
257263 )
258264
259- plt .title (f"Communication Cost (IID Beta={ beta } )" , fontsize = 22 )
260- plt .xlabel ("Dataset" , fontsize = 22 )
265+ # plt.title(f"Communication Cost (IID Beta={beta})", fontsize=22)
266+ # plt.xlabel("Dataset", fontsize=22)
261267 plt .ylabel ("Communication Cost (MB)" , fontsize = 22 )
262- plt .xticks (x_positions , datasets , rotation = 45 , fontsize = 22 )
268+ pretty_names = ["Cora" , "Citeseer" , "Pubmed" ]
269+ plt .xticks (x_positions , pretty_names , rotation = 0 , fontsize = 22 )
263270 plt .yticks (fontsize = 24 )
264271 plt .grid (axis = "y" , linestyle = "--" , alpha = 0.5 )
265272
@@ -283,7 +290,7 @@ def plot_comm_cost(df):
283290 ],
284291 loc = "upper left" ,
285292 bbox_to_anchor = (1 , 1 ),
286- fontsize = 16 ,
293+ fontsize = 14 ,
287294 )
288295
289296 plt .tight_layout ()
@@ -325,9 +332,13 @@ def process_all_log_files(log_folder):
325332 else :
326333 df = process_all_log_files (os .getcwd ())
327334 if not df .empty :
328- df .to_csv ("nc_data_raw.csv" , index = False )
335+ # Only save ms to CSV
336+ df_csv = df .copy ()
337+ if "Train_Time_s" in df_csv .columns :
338+ df_csv = df_csv .drop (columns = ["Train_Time_s" ])
339+ df_csv .to_csv ("nc_data_raw.csv" , index = False )
329340 plot_metric (df , "Accuracy" , "Accuracy" , "nc_accuracy_comparison" )
330341 plot_metric (
331- df , "Train_Time_ms " , "Training Time (ms )" , "nc_train_time_comparison"
342+ df , "Train_Time_s " , "Training Time (s )" , "nc_train_time_comparison"
332343 )
333344 plot_comm_cost (df )
0 commit comments