1313sns .set_context ("talk" )
1414
1515
16- def extract_nc_scalability_data (logfile ):
17- """Extract training and communication time data from NC log files"""
16+ def extract_nc_scalability_data (logfile , expected_trainers = None ):
1817 with open (logfile , "r" , encoding = "utf-8" , errors = "replace" ) as f :
1918 log_content = f .read ()
2019
2120 results = []
2221
23- # Find CSV FORMAT RESULT sections
2422 csv_sections = re .findall (
25- r"CSV FORMAT RESULT:.*?DS,IID,BS,Time \[s\],FinalAcc\[% \],CompTime \[s\],CommCost\[MB\],PeakMem\[MB\],AvgRoundTime\[s\],ModelSize\[MB\],TotalParams\n(.*?)\n" ,
23+ r"CSV FORMAT RESULT:.*?DS,IID,BS,TotalTime \[s\],PureTrainingTime\[s \],CommTime \[s\],FinalAcc\[% \],CommCost\[MB\],PeakMem\[MB\],AvgRoundTime\[s\],ModelSize\[MB\],TotalParams\n(.*?)\n" ,
2624 log_content ,
2725 re .DOTALL ,
2826 )
2927
30- # Extract number of trainers from experiment configuration
3128 trainer_matches = re .findall (r"Trainers: (\d+)" , log_content )
3229
3330 for csv_idx , csv_line in enumerate (csv_sections ):
3431 parts = csv_line .strip ().split ("," )
35- if len (parts ) >= 11 :
32+ if len (parts ) >= 12 :
3633 try :
37- # Get number of trainers for this experiment
38- num_trainers = (
34+ num_trainers = expected_trainers if expected_trainers else (
3935 int (trainer_matches [csv_idx ])
4036 if csv_idx < len (trainer_matches )
4137 else 10
@@ -44,17 +40,16 @@ def extract_nc_scalability_data(logfile):
4440 result = {
4541 "Dataset" : parts [0 ],
4642 "IID_Beta" : float (parts [1 ]),
47- "Batch_Size" : int (parts [2 ]),
43+ "Batch_Size" : int (parts [2 ]) if parts [ 2 ] != '-1' else - 1 ,
4844 "Total_Time" : float (parts [3 ]),
49- "Final_Accuracy" : float (parts [4 ]),
50- "Training_Time" : float (parts [5 ]), # CompTime[s]
51- "Communication_Cost" : float (
52- parts [6 ]
53- ), # CommCost[MB] - will convert to time
54- "Peak_Memory" : float (parts [7 ]),
55- "Avg_Round_Time" : float (parts [8 ]),
56- "Model_Size" : float (parts [9 ]),
57- "Total_Params" : int (float (parts [10 ])),
45+ "Training_Time" : float (parts [4 ]),
46+ "Communication_Time" : float (parts [5 ]),
47+ "Final_Accuracy" : float (parts [6 ]),
48+ "Communication_Cost" : float (parts [7 ]),
49+ "Peak_Memory" : float (parts [8 ]),
50+ "Avg_Round_Time" : float (parts [9 ]),
51+ "Model_Size" : float (parts [10 ]),
52+ "Total_Params" : int (float (parts [11 ])),
5853 "Num_Trainers" : num_trainers ,
5954 }
6055 results .append (result )
@@ -64,90 +59,54 @@ def extract_nc_scalability_data(logfile):
6459 return pd .DataFrame (results )
6560
6661
67- def estimate_communication_time (comm_cost_mb , num_trainers ):
68- """Estimate communication time based on communication cost and network assumptions"""
69- # Assume network bandwidth: 100 Mbps = 12.5 MB/s
70- # This is a reasonable assumption for federated learning scenarios
71- network_bandwidth_mbps = 100 / 8 # Convert to MB/s
72-
73- # Communication time = Total communication cost / bandwidth
74- comm_time = comm_cost_mb / network_bandwidth_mbps
75-
76- return comm_time
77-
78-
7962def load_all_nc_logs ():
80- """Load data from all NC log files"""
81- log_files = ["NC.log" , "NC5.log" , "NC10.log" , "NC20.log" , "NC40.log" ]
82- trainer_counts = [10 , 5 , 10 , 20 , 40 ] # Default mapping
63+ log_files = ["NC5.log" , "NC10.log" , "NC15.log" , "NC20.log" ]
64+ trainer_counts = [5 , 10 , 15 , 20 ]
8365
8466 all_data = []
8567
86- for log_file , default_trainers in zip (log_files , trainer_counts ):
68+ for log_file , expected_trainers in zip (log_files , trainer_counts ):
8769 if os .path .exists (log_file ):
88- df = extract_nc_scalability_data (log_file )
70+ df = extract_nc_scalability_data (log_file , expected_trainers )
8971 if not df .empty :
90- # If trainer count not detected, use default
91- if "Num_Trainers" not in df .columns or df ["Num_Trainers" ].isna ().all ():
92- df ["Num_Trainers" ] = default_trainers
72+ df ["Num_Trainers" ] = expected_trainers
9373 all_data .append (df )
94- print (
95- f"Loaded { len (df )} records from { log_file } (Trainers: { default_trainers } )"
96- )
9774
9875 if all_data :
9976 combined_df = pd .concat (all_data , ignore_index = True )
10077 return combined_df
10178 else :
102- print ("No NC log files found" )
10379 return pd .DataFrame ()
10480
10581
10682def create_scalability_plot (df ):
107- """Create scalability plot showing training time and communication time vs number of clients"""
108-
10983 if df .empty :
110- print ("No data available for plotting" )
11184 return
11285
113- # Filter for IID_Beta = 10.0 (as specified in your benchmark)
11486 df_filtered = df [df ["IID_Beta" ] == 10.0 ].copy ()
11587
11688 if df_filtered .empty :
117- print ("No data found for IID_Beta = 10.0" )
11889 return
11990
120- # Add estimated communication time
121- df_filtered ["Communication_Time" ] = df_filtered .apply (
122- lambda row : estimate_communication_time (
123- row ["Communication_Cost" ], row ["Num_Trainers" ]
124- ),
125- axis = 1 ,
126- )
127-
128- # Group by number of trainers and calculate average times
12991 scalability_data = (
13092 df_filtered .groupby ("Num_Trainers" )
13193 .agg (
13294 {
13395 "Training_Time" : "mean" ,
13496 "Communication_Time" : "mean" ,
13597 "Total_Time" : "mean" ,
98+ "Final_Accuracy" : "mean" ,
99+ "Communication_Cost" : "mean" ,
100+ "Peak_Memory" : "mean" ,
136101 }
137102 )
138103 .reset_index ()
139104 )
140105
141- # Sort by number of trainers
142106 scalability_data = scalability_data .sort_values ("Num_Trainers" )
143107
144- print ("Scalability Data Summary:" )
145- print (scalability_data )
146-
147- # Create the plot
148108 plt .figure (figsize = (12 , 8 ))
149109
150- # Plot training time
151110 plt .plot (
152111 scalability_data ["Num_Trainers" ],
153112 scalability_data ["Training_Time" ],
@@ -158,7 +117,6 @@ def create_scalability_plot(df):
158117 label = "Training Time" ,
159118 )
160119
161- # Plot communication time
162120 plt .plot (
163121 scalability_data ["Num_Trainers" ],
164122 scalability_data ["Communication_Time" ],
@@ -169,7 +127,6 @@ def create_scalability_plot(df):
169127 label = "Communication Time" ,
170128 )
171129
172- # Add value labels on points
173130 for _ , row in scalability_data .iterrows ():
174131 plt .annotate (
175132 f'{ row ["Training_Time" ]:.1f} s' ,
@@ -191,19 +148,16 @@ def create_scalability_plot(df):
191148 color = "#ff7f0e" ,
192149 )
193150
194- # Customize plot
195151 plt .xlabel ("Number of Clients" , fontsize = 16 )
196152 plt .ylabel ("Time (seconds)" , fontsize = 16 )
197153 plt .title ("Federated Learning Scalability Analysis" , fontsize = 18 , fontweight = "bold" )
198154 plt .legend (fontsize = 14 , loc = "upper left" )
199155 plt .grid (True , alpha = 0.3 )
200156
201- # Set x-axis to show all client numbers
202157 client_numbers = sorted (scalability_data ["Num_Trainers" ].unique ())
203158 plt .xticks (client_numbers , fontsize = 14 )
204159 plt .yticks (fontsize = 14 )
205160
206- # Add some padding to y-axis
207161 y_max = max (
208162 scalability_data ["Training_Time" ].max (),
209163 scalability_data ["Communication_Time" ].max (),
@@ -214,66 +168,15 @@ def create_scalability_plot(df):
214168 plt .savefig ("federated_learning_scalability.pdf" , dpi = 300 , bbox_inches = "tight" )
215169 plt .close ()
216170
217- print ("Generated: federated_learning_scalability.pdf" )
218-
219- # Create additional analysis table
220- scalability_data ["Training_Growth" ] = (
221- scalability_data ["Training_Time" ] / scalability_data ["Training_Time" ].iloc [0 ]
222- )
223- scalability_data ["Communication_Growth" ] = (
224- scalability_data ["Communication_Time" ]
225- / scalability_data ["Communication_Time" ].iloc [0 ]
226- )
227-
228- print (f"\n { '=' * 60 } " )
229- print ("SCALABILITY ANALYSIS SUMMARY" )
230- print ("=" * 60 )
231- print (
232- f"{ 'Clients' :<8} { 'Train Time' :<12} { 'Comm Time' :<12} { 'Train Growth' :<13} { 'Comm Growth' :<12} "
233- )
234- print ("-" * 60 )
235-
236- for _ , row in scalability_data .iterrows ():
237- print (
238- f"{ row ['Num_Trainers' ]:<8.0f} "
239- f"{ row ['Training_Time' ]:<12.1f} "
240- f"{ row ['Communication_Time' ]:<12.1f} "
241- f"{ row ['Training_Growth' ]:<13.2f} x "
242- f"{ row ['Communication_Growth' ]:<12.2f} x"
243- )
244-
245- # Save detailed results
246171 scalability_data .to_csv ("scalability_analysis.csv" , index = False )
247- print (f"\n Detailed results saved to: scalability_analysis.csv" )
248172
249173
250174def main ():
251- """Main function to analyze federated learning scalability"""
252- print ("Loading federated learning scalability data..." )
253-
254- # Load all NC log data
255175 df = load_all_nc_logs ()
256-
257- if df .empty :
258- print ("No data found. Please check if NC log files exist:" )
259- print ("- NC.log, NC5.log, NC10.log, NC20.log, NC40.log" )
260- return
261-
262- print (f"\n Loaded data summary:" )
263- print (f"Total records: { len (df )} " )
264- print (f"Client counts: { sorted (df ['Num_Trainers' ].unique ())} " )
265- print (f"Datasets: { list (df ['Dataset' ].unique ())} " )
266- print (f"IID Betas: { sorted (df ['IID_Beta' ].unique ())} " )
267-
268- # Create scalability analysis
269- print ("\n Generating scalability analysis..." )
270- create_scalability_plot (df )
271-
272- print (f"\n Scalability analysis completed!" )
273- print ("Generated files:" )
274- print ("- federated_learning_scalability.pdf" )
275- print ("- scalability_analysis.csv" )
176+
177+ if not df .empty :
178+ create_scalability_plot (df )
276179
277180
278181if __name__ == "__main__" :
279- main ()
182+ main ()
0 commit comments