From df49062f976131da24eed59eb8c555a48e30e47d Mon Sep 17 00:00:00 2001 From: "Agrawal, Kush" Date: Tue, 18 Feb 2025 02:27:08 -0800 Subject: [PATCH 01/16] Upgrade OpenFL Signed-off-by: Agrawal, Kush --- Task_1/FeTS_Challenge.py | 4 +-- .../custom_aggregation_wrapper.py | 2 +- Task_1/fets_challenge/experiment.py | 26 +++++++++++++++---- Task_1/generate_predictions.py | 2 +- .../fets_challenge_workspace/plan/plan.yaml | 8 +++--- .../src/fets_challenge_model.py | 16 +++++++++--- Task_1/setup.py | 14 +++++----- 7 files changed, 50 insertions(+), 22 deletions(-) diff --git a/Task_1/FeTS_Challenge.py b/Task_1/FeTS_Challenge.py index 94d7598..b36a97b 100644 --- a/Task_1/FeTS_Challenge.py +++ b/Task_1/FeTS_Challenge.py @@ -526,7 +526,7 @@ def FedAvgM_Selection(local_tensors, institution_split_csv_filename = 'small_split.csv' # change this to point to the parent directory of the data -brats_training_data_parent_dir = '/raid/datasets/FeTS22/MICCAI_FeTS2022_TrainingData' +brats_training_data_parent_dir = '/home/ad_kagrawa2/Data/MICCAI_FeTS2022_TrainingData' # increase this if you need a longer history for your algorithms # decrease this if you need to reduce system RAM consumption @@ -589,7 +589,7 @@ def FedAvgM_Selection(local_tensors, #checkpoint_folder='experiment_1' #data_path = -data_path = '/home/brats/MICCAI_FeTS2022_ValidationData' +data_path = '/home/ad_kagrawa2/Data/MICCAI_FeTS2022_ValidationData' validation_csv_filename = 'validation.csv' # you can keep these the same if you wish diff --git a/Task_1/fets_challenge/custom_aggregation_wrapper.py b/Task_1/fets_challenge/custom_aggregation_wrapper.py index ae7abc4..63472a6 100644 --- a/Task_1/fets_challenge/custom_aggregation_wrapper.py +++ b/Task_1/fets_challenge/custom_aggregation_wrapper.py @@ -1,4 +1,4 @@ -from openfl.component.aggregation_functions.experimental import PrivilegedAggregationFunction +from openfl.interface.aggregation_functions.experimental import PrivilegedAggregationFunction # extends the openfl agg func interface to include challenge-relevant information diff --git a/Task_1/fets_challenge/experiment.py b/Task_1/fets_challenge/experiment.py index f561e66..1896ace 100644 --- a/Task_1/fets_challenge/experiment.py +++ b/Task_1/fets_challenge/experiment.py @@ -13,7 +13,8 @@ import numpy as np import pandas as pd -from openfl.utilities import split_tensor_dict_for_holdouts, TensorKey +from openfl.utilities.split import split_tensor_dict_for_holdouts +from openfl.utilities import TensorKey from openfl.protocols import utils import openfl.native as fx import torch @@ -231,7 +232,7 @@ def run_challenge_experiment(aggregation_function, save_checkpoints=True, restore_from_checkpoint_folder=None, include_validation_with_hausdorff=True, - use_pretrained_model=True): + use_pretrained_model=False): fx.init('fets_challenge_workspace') @@ -241,6 +242,8 @@ def run_challenge_experiment(aggregation_function, root = file.parent.resolve() # interface root, containing command modules work = Path.cwd().resolve() + print(f"TESTING ->>>>>> Work directory: {work}") + path.append(str(root)) path.insert(0, str(work)) @@ -251,6 +254,8 @@ def run_challenge_experiment(aggregation_function, institution_split_csv_filename, 0.8, gandlf_csv_path) + + print(f'TESTING ->>>>>> Collaborator names: {collaborator_names}') aggregation_wrapper = CustomAggregationWrapper(aggregation_function) @@ -279,16 +284,27 @@ def run_challenge_experiment(aggregation_function, transformed_csv_dict = extract_csv_partitions(os.path.join(work, 'gandlf_paths.csv')) # get the task runner, passing the first data loader + print('TESTING ->>>>>> Fetching TaskRunner ...') for col in collaborator_data_loaders: #Insert logic to serialize train / val CSVs here - transformed_csv_dict[col]['train'].to_csv(os.path.join(work, 'seg_test_train.csv')) - transformed_csv_dict[col]['val'].to_csv(os.path.join(work, 'seg_test_val.csv')) + # transformed_csv_dict[col]['train'].to_csv(os.path.join(work, 'seg_test_train.csv')) + # transformed_csv_dict[col]['val'].to_csv(os.path.join(work, 'seg_test_val.csv')) + transformed_csv_dict[col]['train'].to_csv(os.path.join(work, 'train.csv')) + transformed_csv_dict[col]['val'].to_csv(os.path.join(work, 'valid.csv')) task_runner = copy(plan).get_task_runner(collaborator_data_loaders[col]) if use_pretrained_model: - print('Loading pretrained model...') + print('TESTING ->>>>>> Loading pretrained model...') if device == 'cpu': checkpoint = torch.load(f'{root}/pretrained_model/resunet_pretrained.pth',map_location=torch.device('cpu')) + print('TESTING ->>>>>> Loading checkpoint model...') + print(checkpoint.keys()) + print('TESTING ->>>>>> Loading checkpoint state dict...') + model_state = checkpoint['model_state_dict'] + for name, tensor in model_state.items(): + print(f"Priting {name}: {tensor.shape}") + print('TESTING ->>>>>> Loading taskrunner model') + print(task_runner.model) task_runner.model.load_state_dict(checkpoint['model_state_dict']) task_runner.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) else: diff --git a/Task_1/generate_predictions.py b/Task_1/generate_predictions.py index 872a62a..4c5a570 100644 --- a/Task_1/generate_predictions.py +++ b/Task_1/generate_predictions.py @@ -23,7 +23,7 @@ # the data you want to run inference over checkpoint_folder='experiment_1' #data_path = -data_path = '/raid/datasets/FeTS22/MICCAI_FeTS2022_ValidationData' +data_path = '/home/ad_kagrawa2/Data/MICCAI_FeTS2022_ValidationData' # you can keep these the same if you wish best_model_path = os.path.join(home, '.local/workspace/checkpoint', checkpoint_folder, 'best_model.pkl') diff --git a/Task_1/openfl-workspace/fets_challenge_workspace/plan/plan.yaml b/Task_1/openfl-workspace/fets_challenge_workspace/plan/plan.yaml index ca4476c..2e35cee 100644 --- a/Task_1/openfl-workspace/fets_challenge_workspace/plan/plan.yaml +++ b/Task_1/openfl-workspace/fets_challenge_workspace/plan/plan.yaml @@ -21,9 +21,10 @@ collaborator : data_loader : defaults : plan/defaults/data_loader.yaml - template : openfl.federated.data.loader_fets_challenge.FeTSChallengeDataLoaderWrapper + template : openfl.federated.data.loader_gandlf.GaNDLFDataLoaderWrapper settings : feature_shape : [32, 32, 32] + data_path : /home/ad_kagrawa2/.local/workspace task_runner : template : src.fets_challenge_model.FeTSChallengeModel @@ -31,7 +32,7 @@ task_runner : train_csv : seg_test_train.csv val_csv : seg_test_val.csv device : cpu - fets_config_dict : + gandlf_config : batch_size: 1 clip_grad: null clip_mode: null @@ -92,9 +93,10 @@ task_runner : track_memory_usage: false verbose: false version: - maximum: 0.0.14 + maximum: 0.1.0 minimum: 0.0.14 weighted_loss: true + modality: rad network : diff --git a/Task_1/openfl-workspace/fets_challenge_workspace/src/fets_challenge_model.py b/Task_1/openfl-workspace/fets_challenge_workspace/src/fets_challenge_model.py index 3794be6..6af8c6e 100644 --- a/Task_1/openfl-workspace/fets_challenge_workspace/src/fets_challenge_model.py +++ b/Task_1/openfl-workspace/fets_challenge_workspace/src/fets_challenge_model.py @@ -8,10 +8,10 @@ import numpy as np import torch as pt -from openfl.utilities import split_tensor_dict_for_holdouts +from openfl.utilities.split import split_tensor_dict_for_holdouts from openfl.utilities import TensorKey -from openfl.federated.task.runner_fets_challenge import * +from openfl.federated.task.runner_gandlf import * from GANDLF.compute.generic import create_pytorch_objects from GANDLF.compute.training_loop import train_network @@ -19,7 +19,7 @@ from . import TRAINING_HPARAMS -class FeTSChallengeModel(FeTSChallengeTaskRunner): +class FeTSChallengeModel(GaNDLFTaskRunner): """FeTSChallenge Model class for Federated Learning.""" def validate(self, col_name, round_num, input_tensor_dict, @@ -61,6 +61,10 @@ def validate(self, col_name, round_num, input_tensor_dict, output_tensor_dict = {} output_tensor_dict[TensorKey('valid_loss', origin, round_num, True, tags)] = np.array(epoch_valid_loss) for k, v in epoch_valid_metric.items(): + print(f"Testing ->>>> Metric Key {k} Value {v}") + if isinstance(v, str): + v = list(map(float, v.split('_'))) + if np.array(v).size == 1: output_tensor_dict[TensorKey(f'valid_{k}', origin, round_num, True, tags)] = np.array(v) else: @@ -105,6 +109,9 @@ def inference(self, col_name, round_num, input_tensor_dict, output_tensor_dict = {} output_tensor_dict[TensorKey('valid_loss', origin, round_num, True, tags)] = np.array(epoch_valid_loss) for k, v in epoch_valid_metric.items(): + print(f"Testing ->>>> Metric Key {k} Value {v}") + if isinstance(v, str): + v = list(map(float, v.split('_'))) if np.array(v).size == 1: output_tensor_dict[TensorKey(f'valid_{k}', origin, round_num, True, tags)] = np.array(v) else: @@ -163,6 +170,9 @@ def train(self, col_name, round_num, input_tensor_dict, use_tqdm=False, epochs=1 metric_dict = {'loss': epoch_train_loss} for k, v in epoch_train_metric.items(): + print(f"Testing ->>>> Metric Key {k} Value {v}") + if isinstance(v, str): + v = list(map(float, v.split('_'))) if np.array(v).size == 1: metric_dict[f'train_{k}'] = np.array(v) else: diff --git a/Task_1/setup.py b/Task_1/setup.py index 1ff561d..2ea1e59 100644 --- a/Task_1/setup.py +++ b/Task_1/setup.py @@ -28,11 +28,11 @@ ], include_package_data=True, install_requires=[ - 'openfl @ git+https://github.com/intel/openfl.git@f4b28d710e2be31cdfa7487fdb4e8cb3a1387a5f', - 'GANDLF @ git+https://github.com/CBICA/GaNDLF.git@e4d0d4bfdf4076130817001a98dfb90189956278', + 'openfl @ git+https://github.com/securefederatedai/openfl.git@6bbf9b62f97f50a06a9956eefacebf6d0a6cba4e', + 'GANDLF @ git+https://github.com/CBICA/GaNDLF.git@4d614fe1de550ea4035b543b4c712ad564248106', 'fets @ git+https://github.com/FETS-AI/Algorithms.git@fets_challenge', ], - python_requires='>=3.6, <3.9', + python_requires='>=3.10, <3.13', classifiers=[ 'Environment :: Console', # How mature is this project? Common values are @@ -46,9 +46,9 @@ 'License :: OSI Approved :: FETS UI License', # Specify the Python versions you support here. In particular, ensure # that you indicate whether you support Python 2, Python 3 or both. - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', + 'Programming Language :: Python :: 3.13', ] ) From 9c1dabae95c7839edeab8f23041d2e249676b6ba Mon Sep 17 00:00:00 2001 From: "Agrawal, Kush" Date: Sun, 2 Mar 2025 23:17:49 -0800 Subject: [PATCH 02/16] Updated Federated FLow Signed-off-by: Agrawal, Kush --- Task_1/FeTS_Challenge.py | 2 +- Task_1/fets_challenge/experiment.py | 701 ++++++++++------- Task_1/fets_challenge/fets_challenge_model.py | 728 ++++++++++++++++++ Task_1/fets_challenge/fets_flow.py | 114 +++ Task_1/fets_challenge/gandlf_config.yaml | 63 ++ Task_1/fets_challenge/inference.py | 8 +- 6 files changed, 1320 insertions(+), 296 deletions(-) create mode 100644 Task_1/fets_challenge/fets_challenge_model.py create mode 100644 Task_1/fets_challenge/fets_flow.py create mode 100644 Task_1/fets_challenge/gandlf_config.yaml diff --git a/Task_1/FeTS_Challenge.py b/Task_1/FeTS_Challenge.py index b36a97b..22dc67c 100644 --- a/Task_1/FeTS_Challenge.py +++ b/Task_1/FeTS_Challenge.py @@ -537,7 +537,7 @@ def FedAvgM_Selection(local_tensors, # you'll want to increase this most likely. You can set it as high as you like, # however, the experiment will exit once the simulated time exceeds one week. -rounds_to_train = 5 +rounds_to_train = 1 # (bool) Determines whether checkpoints should be saved during the experiment. # The checkpoints can grow quite large (5-10GB) so only the latest will be saved when this parameter is enabled diff --git a/Task_1/fets_challenge/experiment.py b/Task_1/fets_challenge/experiment.py index 1896ace..3136b36 100644 --- a/Task_1/fets_challenge/experiment.py +++ b/Task_1/fets_challenge/experiment.py @@ -23,6 +23,12 @@ from .custom_aggregation_wrapper import CustomAggregationWrapper from .checkpoint_utils import setup_checkpoint_folder, save_checkpoint, load_checkpoint +from .fets_flow import FeTSFederatedFlow +from .fets_challenge_model import FeTSChallengeModel + +from openfl.experimental.workflow.interface import FLSpec, Aggregator, Collaborator +from openfl.experimental.workflow.runtime import LocalRuntime + # one week # MINUTE = 60 # HOUR = 60 * MINUTE @@ -221,6 +227,16 @@ def get_metric(metric, fl_round, tensor_db): target_tags = ('metric', 'validate_agg') return float(tensor_db.tensor_db.query("tensor_name == @metric_name and round == @fl_round and tags == @target_tags").nparray) +def collaborator_private_attributes( + index, n_collaborators, train_csv, valid_csv, gandlf_config, device + ): + return { + "train_csv": train_csv, + "val_csv": valid_csv, + "gandlf_config": gandlf_config, + "device": device + } + def run_challenge_experiment(aggregation_function, choose_training_collaborators, training_hyper_parameters_for_round, @@ -234,7 +250,7 @@ def run_challenge_experiment(aggregation_function, include_validation_with_hausdorff=True, use_pretrained_model=False): - fx.init('fets_challenge_workspace') + #fx.init('fets_challenge_workspace') from sys import path, exit @@ -242,6 +258,10 @@ def run_challenge_experiment(aggregation_function, root = file.parent.resolve() # interface root, containing command modules work = Path.cwd().resolve() + gandlf_config_path = os.path.join(root, 'gandlf_config.yaml') + + print(f"TESTING ->>>>>> Gandlf Config Path: {gandlf_config_path}") + print(f"TESTING ->>>>>> Work directory: {work}") path.append(str(root)) @@ -257,7 +277,7 @@ def run_challenge_experiment(aggregation_function, print(f'TESTING ->>>>>> Collaborator names: {collaborator_names}') - aggregation_wrapper = CustomAggregationWrapper(aggregation_function) + aggregation_wrapper = CustomAggregationWrapper(aggregation_function) # ---> [TODO] Set the aggregation function in the workflow overrides = { 'aggregator.settings.rounds_to_train': rounds_to_train, @@ -268,312 +288,411 @@ def run_challenge_experiment(aggregation_function, # Update the plan if necessary - plan = fx.update_plan(overrides) + # # [Kush - Flow] -> Update the Plan with the overrides + # ---> Not required in workflow + #plan = fx.update_plan(overrides) - if not include_validation_with_hausdorff: - plan.config['task_runner']['settings']['fets_config_dict']['metrics'] = ['dice','dice_per_label'] + # if not include_validation_with_hausdorff: + # plan.config['task_runner']['settings']['fets_config_dict']['metrics'] = ['dice','dice_per_label'] - # Overwrite collaborator names - plan.authorized_cols = collaborator_names - # overwrite datapath values with the collaborator name itself - for col in collaborator_names: - plan.cols_data_paths[col] = col + # # Overwrite collaborator names + # plan.authorized_cols = collaborator_names + # # overwrite datapath values with the collaborator name itself + # for col in collaborator_names: + # # [Kush - Flow] -> Collaborator data path dictionary + # plan.cols_data_paths[col] = col # get the data loaders for each collaborator - collaborator_data_loaders = {col: copy(plan).get_data_loader(col) for col in collaborator_names} + # [Kush - Flow] -> def get_data_loader(self, collaborator_name): Builds the DataLoader for the collaborator based on plan + # --> Not required for workflow + # collaborator_data_loaders = {col: copy(plan).get_data_loader(col) for col in collaborator_names} transformed_csv_dict = extract_csv_partitions(os.path.join(work, 'gandlf_paths.csv')) # get the task runner, passing the first data loader print('TESTING ->>>>>> Fetching TaskRunner ...') - for col in collaborator_data_loaders: - #Insert logic to serialize train / val CSVs here - # transformed_csv_dict[col]['train'].to_csv(os.path.join(work, 'seg_test_train.csv')) - # transformed_csv_dict[col]['val'].to_csv(os.path.join(work, 'seg_test_val.csv')) - transformed_csv_dict[col]['train'].to_csv(os.path.join(work, 'train.csv')) - transformed_csv_dict[col]['val'].to_csv(os.path.join(work, 'valid.csv')) - task_runner = copy(plan).get_task_runner(collaborator_data_loaders[col]) - - if use_pretrained_model: - print('TESTING ->>>>>> Loading pretrained model...') - if device == 'cpu': - checkpoint = torch.load(f'{root}/pretrained_model/resunet_pretrained.pth',map_location=torch.device('cpu')) - print('TESTING ->>>>>> Loading checkpoint model...') - print(checkpoint.keys()) - print('TESTING ->>>>>> Loading checkpoint state dict...') - model_state = checkpoint['model_state_dict'] - for name, tensor in model_state.items(): - print(f"Priting {name}: {tensor.shape}") - print('TESTING ->>>>>> Loading taskrunner model') - print(task_runner.model) - task_runner.model.load_state_dict(checkpoint['model_state_dict']) - task_runner.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) - else: - checkpoint = torch.load(f'{root}/pretrained_model/resunet_pretrained.pth') - task_runner.model.load_state_dict(checkpoint['model_state_dict']) - task_runner.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) - - tensor_pipe = plan.get_tensor_pipe() - - # Initialize model weights - init_state_path = plan.config['aggregator']['settings']['init_state_path'] - tensor_dict, _ = split_tensor_dict_for_holdouts(logger, task_runner.get_tensor_dict(False)) - - model_snap = utils.construct_model_proto(tensor_dict=tensor_dict, - round_number=0, - tensor_pipe=tensor_pipe) - - utils.dump_proto(model_proto=model_snap, fpath=init_state_path) - - # get the aggregator, now that we have the initial weights file set up - logger.info('Creating aggregator...') - aggregator = plan.get_aggregator() - # manually override the aggregator UUID (for checkpoint resume when rounds change) - aggregator.uuid = 'aggregator' - aggregator._load_initial_tensors() - - # create our collaborators - logger.info('Creating collaborators...') - collaborators = {col: copy(plan).get_collaborator(col, task_runner=task_runner, client=aggregator) for col in collaborator_names} - - collaborator_time_stats = gen_collaborator_time_stats(plan.authorized_cols) - - collaborators_chosen_each_round = {} - collaborator_times_per_round = {} - - logger.info('Starting experiment') - - total_simulated_time = 0 - best_dice = -1.0 - best_dice_over_time_auc = 0 - - # results dataframe data - experiment_results = { - 'round':[], - 'time': [], - 'convergence_score': [], - 'round_dice': [], - 'dice_label_0': [], - 'dice_label_1': [], - 'dice_label_2': [], - 'dice_label_4': [], - } - if include_validation_with_hausdorff: - experiment_results.update({ - 'hausdorff95_label_0': [], - 'hausdorff95_label_1': [], - 'hausdorff95_label_2': [], - 'hausdorff95_label_4': [], - }) + # for col in collaborator_data_loaders: + # #Insert logic to serialize train / val CSVs here + # # transformed_csv_dict[col]['train'].to_csv(os.path.join(work, 'seg_test_train.csv')) + # # transformed_csv_dict[col]['val'].to_csv(os.path.join(work, 'seg_test_val.csv')) + # transformed_csv_dict[col]['train'].to_csv(os.path.join(work, 'train.csv')) + # transformed_csv_dict[col]['val'].to_csv(os.path.join(work, 'valid.csv')) + # # [Kush - Flow] -> def get_task_runner(self, data_loader): Builds the TaskRunner and returns returns the taskrunner instance for the collaborator based on plan + # # ---> [[TODO]] Create coll priv_attributes as per csv dictionary. + # task_runner = copy(plan).get_task_runner(collaborator_data_loaders[col]) + + aggregator = Aggregator() + + collaborators = [] + for idx, col in enumerate(collaborator_names): + col_dir = os.path.join(work, 'data', str(col)) + os.makedirs(col_dir, exist_ok=True) + + train_csv_path = os.path.join(col_dir, 'train.csv') + val_csv_path = os.path.join(col_dir, 'valid.csv') + + transformed_csv_dict[col]['train'].to_csv(train_csv_path) + transformed_csv_dict[col]['val'].to_csv(val_csv_path) + collaborators.append( + Collaborator( + name=col, + private_attributes_callable=collaborator_private_attributes, + # If 1 GPU is available in the machine + # Set `num_gpus=0.0` to `num_gpus=0.3` to run on GPU + # with ray backend with 2 collaborators + num_cpus=0.0, + num_gpus=0.0, + # arguments required to pass to callable + index=idx, + n_collaborators=len(collaborator_names), + train_csv=train_csv_path, + valid_csv=val_csv_path, + gandlf_config=gandlf_config_path, + device=device + ) + ) + + local_runtime = LocalRuntime( + aggregator=aggregator, collaborators=collaborators, backend="single_process" + ) + + logger.info(f"Local runtime collaborators = {local_runtime.collaborators}") + + model = FeTSChallengeModel(gandlf_config_path) + top_model_accuracy = 0 + # optimizers = { + # collaborator.name: default_optimizer(model, optimizer_type=args.optimizer_type) + # for collaborator in collaborators + # } + # flflow = FederatedFlow( + # model, + # optimizers, + # device, + # args.comm_round, + # top_model_accuracy, + # args.flow_internal_loop_test, + # ) + + flflow = FeTSFederatedFlow( + model, + 1 + ) + + flflow.runtime = local_runtime + flflow.run() + + # [Kush - Flow] -> Commenting as pretrained model is not used. + # ---> Define a new step in federated flow before training to load the pretrained model + # if use_pretrained_model: + # print('TESTING ->>>>>> Loading pretrained model...') + # if device == 'cpu': + # checkpoint = torch.load(f'{root}/pretrained_model/resunet_pretrained.pth',map_location=torch.device('cpu')) + # print('TESTING ->>>>>> Loading checkpoint model...') + # print(checkpoint.keys()) + # print('TESTING ->>>>>> Loading checkpoint state dict...') + # model_state = checkpoint['model_state_dict'] + # for name, tensor in model_state.items(): + # print(f"Priting {name}: {tensor.shape}") + # print('TESTING ->>>>>> Loading taskrunner model') + # print(task_runner.model) + # task_runner.model.load_state_dict(checkpoint['model_state_dict']) + # task_runner.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) + # else: + # checkpoint = torch.load(f'{root}/pretrained_model/resunet_pretrained.pth') + # task_runner.model.load_state_dict(checkpoint['model_state_dict']) + # task_runner.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) + + # [Kush - Flow] -> [TODO] Compression Pipeline + # tensor_pipe = plan.get_tensor_pipe() + + # # Initialize model weights + # # [Kush - FLow] - [TODO] How to set the initial state in the workflow + # init_state_path = plan.config['aggregator']['settings']['init_state_path'] + # tensor_dict, _ = split_tensor_dict_for_holdouts(logger, task_runner.get_tensor_dict(False)) + + # model_snap = utils.construct_model_proto(tensor_dict=tensor_dict, + # round_number=0, + # tensor_pipe=tensor_pipe) + + # utils.dump_proto(model_proto=model_snap, fpath=init_state_path) + + # # [Kush - Flow] ->Fetch the required aggregator from plan + # # --> [SKIP] Not required for workflow as we will we creating aggregator and setting to runtime + # # get the aggregator, now that we have the initial weights file set up + # logger.info('Creating aggregator...') + # aggregator = plan.get_aggregator() + # # manually override the aggregator UUID (for checkpoint resume when rounds change) + # aggregator.uuid = 'aggregator' + # aggregator._load_initial_tensors() + + # # create our collaborators + # # [Kush - Flow] ->Fetch the required COLLABORTAOR from plan + # # --> [SKIP] Not required for workflow as we will we creating COLLABORATORS and setting to runtime + # logger.info('Creating collaborators...') + # collaborators = {col: copy(plan).get_collaborator(col, task_runner=task_runner, client=aggregator) for col in collaborator_names} + + # collaborator_time_stats = gen_collaborator_time_stats(plan.authorized_cols) + + # collaborators_chosen_each_round = {} + # collaborator_times_per_round = {} + + # logger.info('Starting experiment') + + # total_simulated_time = 0 + # best_dice = -1.0 + # best_dice_over_time_auc = 0 + + # # results dataframe data + # experiment_results = { + # 'round':[], + # 'time': [], + # 'convergence_score': [], + # 'round_dice': [], + # 'dice_label_0': [], + # 'dice_label_1': [], + # 'dice_label_2': [], + # 'dice_label_4': [], + # } + # if include_validation_with_hausdorff: + # experiment_results.update({ + # 'hausdorff95_label_0': [], + # 'hausdorff95_label_1': [], + # 'hausdorff95_label_2': [], + # 'hausdorff95_label_4': [], + # }) - - if restore_from_checkpoint_folder is None: - checkpoint_folder = setup_checkpoint_folder() - logger.info(f'\nCreated experiment folder {checkpoint_folder}...') - starting_round_num = 0 - else: - if not Path(f'checkpoint/{restore_from_checkpoint_folder}').exists(): - logger.warning(f'Could not find provided checkpoint folder: {restore_from_checkpoint_folder}. Exiting...') - exit(1) - else: - logger.info(f'Attempting to load last completed round from {restore_from_checkpoint_folder}') - state = load_checkpoint(restore_from_checkpoint_folder) - checkpoint_folder = restore_from_checkpoint_folder - - [loaded_collaborator_names, starting_round_num, collaborator_time_stats, - total_simulated_time, best_dice, best_dice_over_time_auc, - collaborators_chosen_each_round, collaborator_times_per_round, - experiment_results, summary, agg_tensor_db] = state - - if loaded_collaborator_names != collaborator_names: - logger.error(f'Collaborator names found in checkpoint ({loaded_collaborator_names}) ' - f'do not match provided collaborators ({collaborator_names})') - exit(1) - - logger.info(f'Previous summary for round {starting_round_num}') - logger.info(summary) - - starting_round_num += 1 - aggregator.tensor_db.tensor_db = agg_tensor_db - aggregator.round_number = starting_round_num - - - for round_num in range(starting_round_num, rounds_to_train): - # pick collaborators to train for the round - training_collaborators = choose_training_collaborators(collaborator_names, - aggregator.tensor_db._iterate(), - round_num, - collaborators_chosen_each_round, - collaborator_times_per_round) + # # [Kush-Flow] [TODO] Will check later + # # if restore_from_checkpoint_folder is None: + # # checkpoint_folder = setup_checkpoint_folder() + # # logger.info(f'\nCreated experiment folder {checkpoint_folder}...') + # # starting_round_num = 0 + # # else: + # # if not Path(f'checkpoint/{restore_from_checkpoint_folder}').exists(): + # # logger.warning(f'Could not find provided checkpoint folder: {restore_from_checkpoint_folder}. Exiting...') + # # exit(1) + # # else: + # # logger.info(f'Attempting to load last completed round from {restore_from_checkpoint_folder}') + # # state = load_checkpoint(restore_from_checkpoint_folder) + # # checkpoint_folder = restore_from_checkpoint_folder + + # # [loaded_collaborator_names, starting_round_num, collaborator_time_stats, + # # total_simulated_time, best_dice, best_dice_over_time_auc, + # # collaborators_chosen_each_round, collaborator_times_per_round, + # # experiment_results, summary, agg_tensor_db] = state + + # # if loaded_collaborator_names != collaborator_names: + # # logger.error(f'Collaborator names found in checkpoint ({loaded_collaborator_names}) ' + # # f'do not match provided collaborators ({collaborator_names})') + # # exit(1) + + # # logger.info(f'Previous summary for round {starting_round_num}') + # # logger.info(summary) + + # # starting_round_num += 1 + # # aggregator.tensor_db.tensor_db = agg_tensor_db + # # aggregator.round_number = starting_round_num + + + # for round_num in range(starting_round_num, rounds_to_train): + # # pick collaborators to train for the round + # # [Kush - Flow] -> Choose Training Collaborators + # # ---> [TODO] In flow based API's, in start we can pass as foreach = 'collaborators' + # training_collaborators = choose_training_collaborators(collaborator_names, + # aggregator.tensor_db._iterate(), + # round_num, + # collaborators_chosen_each_round, + # collaborator_times_per_round) - logger.info('Collaborators chosen to train for round {}:\n\t{}'.format(round_num, training_collaborators)) + # logger.info('Collaborators chosen to train for round {}:\n\t{}'.format(round_num, training_collaborators)) - # save the collaborators chosen this round - collaborators_chosen_each_round[round_num] = training_collaborators + # # save the collaborators chosen this round + # collaborators_chosen_each_round[round_num] = training_collaborators - # get the hyper-parameters from the competitor - hparams = training_hyper_parameters_for_round(collaborator_names, - aggregator.tensor_db._iterate(), - round_num, - collaborators_chosen_each_round, - collaborator_times_per_round) + # # get the hyper-parameters from the competitor + # # [KUSH - Flow] --> Need to set how to set hyper parameters in the workflow + # # --> [TODO] Set some private attribute for the collaborator + # hparams = training_hyper_parameters_for_round(collaborator_names, + # aggregator.tensor_db._iterate(), + # round_num, + # collaborators_chosen_each_round, + # collaborator_times_per_round) - learning_rate, epochs_per_round = hparams + # learning_rate, epochs_per_round = hparams - if (epochs_per_round is None): - logger.warning('Hyper-parameter function warning: function returned None for "epochs_per_round". Setting "epochs_per_round" to 1') - epochs_per_round = 1 + # if (epochs_per_round is None): + # logger.warning('Hyper-parameter function warning: function returned None for "epochs_per_round". Setting "epochs_per_round" to 1') + # epochs_per_round = 1 - hparam_message = "\n\tlearning rate: {}".format(learning_rate) - - hparam_message += "\n\tepochs_per_round: {}".format(epochs_per_round) - - logger.info("Hyper-parameters for round {}:{}".format(round_num, hparam_message)) - - # cache each tensor in the aggregator tensor_db - hparam_dict = {} - tk = TensorKey(tensor_name='learning_rate', - origin=aggregator.uuid, - round_number=round_num, - report=False, - tags=('hparam', 'model')) - hparam_dict[tk] = np.array(learning_rate) - tk = TensorKey(tensor_name='epochs_per_round', - origin=aggregator.uuid, - round_number=round_num, - report=False, - tags=('hparam', 'model')) - hparam_dict[tk] = np.array(epochs_per_round) - aggregator.tensor_db.cache_tensor(hparam_dict) - - # pre-compute the times for each collaborator - times_per_collaborator = compute_times_per_collaborator(collaborator_names, - training_collaborators, - epochs_per_round, - collaborator_data_loaders, - collaborator_time_stats, - round_num) - collaborator_times_per_round[round_num] = times_per_collaborator - - aggregator.assigner.set_training_collaborators(training_collaborators) - - # update the state in the aggregation wrapper - aggregation_wrapper.set_state_data_for_round(collaborators_chosen_each_round, collaborator_times_per_round) - - # turn the times list into a list of tuples and sort it - times_list = [(t, col) for col, t in times_per_collaborator.items()] - times_list = sorted(times_list) - - # now call each collaborator in order of time - # FIXME: this doesn't break up each task. We need this if we're doing straggler handling - for t, col in times_list: - # set the task_runner data loader - task_runner.data_loader = collaborator_data_loaders[col] - - # run the collaborator - collaborators[col].run_simulation() + # hparam_message = "\n\tlearning rate: {}".format(learning_rate) + + # hparam_message += "\n\tepochs_per_round: {}".format(epochs_per_round) + + # logger.info("Hyper-parameters for round {}:{}".format(round_num, hparam_message)) + + # # cache each tensor in the aggregator tensor_db + # hparam_dict = {} + # tk = TensorKey(tensor_name='learning_rate', + # origin=aggregator.uuid, + # round_number=round_num, + # report=False, + # tags=('hparam', 'model')) + # hparam_dict[tk] = np.array(learning_rate) + # tk = TensorKey(tensor_name='epochs_per_round', + # origin=aggregator.uuid, + # round_number=round_num, + # report=False, + # tags=('hparam', 'model')) + # hparam_dict[tk] = np.array(epochs_per_round) + # # [Kush - FLow] -> [TODO] How to cache the tensor in the workflow ? + # aggregator.tensor_db.cache_tensor(hparam_dict) + + # # pre-compute the times for each collaborator + # # [Kush - Flow] [TODO] What is the use of this ? + # times_per_collaborator = compute_times_per_collaborator(collaborator_names, + # training_collaborators, + # epochs_per_round, + # collaborator_data_loaders, + # collaborator_time_stats, + # round_num) + # collaborator_times_per_round[round_num] = times_per_collaborator + + # # [Kush - Flow] -> Not required in workflow + # aggregator.assigner.set_training_collaborators(training_collaborators) + + # # update the state in the aggregation wrapper + # # [Kush - Flow] -> [TODO] See how to pass this in the workflow as aggregation function and use in JOIN step + # aggregation_wrapper.set_state_data_for_round(collaborators_chosen_each_round, collaborator_times_per_round) + + # # turn the times list into a list of tuples and sort it + # times_list = [(t, col) for col, t in times_per_collaborator.items()] + # times_list = sorted(times_list) + + # # now call each collaborator in order of time + # # FIXME: this doesn't break up each task. We need this if we're doing straggler handling + # # [Kush - Flow] -> Below codeblock is not required in workflow as below two lines will be handled by the workflow + # # ---> [TODO] Create LocalRunTime using ray bakcend and do flow.run() to start the training + # for t, col in times_list: + # # set the task_runner data loader + # task_runner.data_loader = collaborator_data_loaders[col] + + # # run the collaborator + # collaborators[col].run_simulation() - logger.info("Collaborator {} took simulated time: {} minutes".format(col, round(t / 60, 2))) - - # the round time is the max of the times_list - round_time = max([t for t, _ in times_list]) - total_simulated_time += round_time - - - # get the performace validation scores for the round - round_dice = get_metric('valid_dice', round_num, aggregator.tensor_db) - dice_label_0 = get_metric('valid_dice_per_label_0', round_num, aggregator.tensor_db) - dice_label_1 = get_metric('valid_dice_per_label_1', round_num, aggregator.tensor_db) - dice_label_2 = get_metric('valid_dice_per_label_2', round_num, aggregator.tensor_db) - dice_label_4 = get_metric('valid_dice_per_label_4', round_num, aggregator.tensor_db) - if include_validation_with_hausdorff: - hausdorff95_label_0 = get_metric('valid_hd95_per_label_0', round_num, aggregator.tensor_db) - hausdorff95_label_1 = get_metric('valid_hd95_per_label_1', round_num, aggregator.tensor_db) - hausdorff95_label_2 = get_metric('valid_hd95_per_label_2', round_num, aggregator.tensor_db) - hausdorff95_label_4 = get_metric('valid_hd95_per_label_4', round_num, aggregator.tensor_db) - - # update best score - if best_dice < round_dice: - best_dice = round_dice - # Set the weights for the final model - if round_num == 0: - # here the initial model was validated (temp model does not exist) - logger.info(f'Skipping best model saving to disk as it is a random initialization.') - elif not os.path.exists(f'checkpoint/{checkpoint_folder}/temp_model.pkl'): - raise ValueError(f'Expected temporary model at: checkpoint/{checkpoint_folder}/temp_model.pkl to exist but it was not found.') - else: - # here the temp model was the one validated - shutil.copyfile(src=f'checkpoint/{checkpoint_folder}/temp_model.pkl',dst=f'checkpoint/{checkpoint_folder}/best_model.pkl') - logger.info(f'Saved model with best average binary DICE: {best_dice} to ~/.local/workspace/checkpoint/{checkpoint_folder}/best_model.pkl') - - ## RUN VALIDATION ON INTERMEDIATE CONSENSUS MODEL - # set the task_runner data loader - # task_runner.data_loader = collaborator_data_loaders[col] - - ## CONVERGENCE METRIC COMPUTATION - # update the auc score - best_dice_over_time_auc += best_dice * round_time - - # project the auc score as remaining time * best dice - # this projection assumes that the current best score is carried forward for the entire week - projected_auc = (MAX_SIMULATION_TIME - total_simulated_time) * best_dice + best_dice_over_time_auc - projected_auc /= MAX_SIMULATION_TIME - - # End of round summary - summary = '"**** END OF ROUND {} SUMMARY *****"'.format(round_num) - summary += "\n\tSimulation Time: {} minutes".format(round(total_simulated_time / 60, 2)) - summary += "\n\t(Projected) Convergence Score: {}".format(projected_auc) - summary += "\n\tDICE Label 0: {}".format(dice_label_0) - summary += "\n\tDICE Label 1: {}".format(dice_label_1) - summary += "\n\tDICE Label 2: {}".format(dice_label_2) - summary += "\n\tDICE Label 4: {}".format(dice_label_4) - if include_validation_with_hausdorff: - summary += "\n\tHausdorff95 Label 0: {}".format(hausdorff95_label_0) - summary += "\n\tHausdorff95 Label 1: {}".format(hausdorff95_label_1) - summary += "\n\tHausdorff95 Label 2: {}".format(hausdorff95_label_2) - summary += "\n\tHausdorff95 Label 4: {}".format(hausdorff95_label_4) - - - experiment_results['round'].append(round_num) - experiment_results['time'].append(total_simulated_time) - experiment_results['convergence_score'].append(projected_auc) - experiment_results['round_dice'].append(round_dice) - experiment_results['dice_label_0'].append(dice_label_0) - experiment_results['dice_label_1'].append(dice_label_1) - experiment_results['dice_label_2'].append(dice_label_2) - experiment_results['dice_label_4'].append(dice_label_4) - if include_validation_with_hausdorff: - experiment_results['hausdorff95_label_0'].append(hausdorff95_label_0) - experiment_results['hausdorff95_label_1'].append(hausdorff95_label_1) - experiment_results['hausdorff95_label_2'].append(hausdorff95_label_2) - experiment_results['hausdorff95_label_4'].append(hausdorff95_label_4) - logger.info(summary) - - if save_checkpoints: - logger.info(f'Saving checkpoint for round {round_num}') - logger.info(f'To resume from this checkpoint, set the restore_from_checkpoint_folder parameter to \'{checkpoint_folder}\'') - save_checkpoint(checkpoint_folder, aggregator, - collaborator_names, collaborators, - round_num, collaborator_time_stats, - total_simulated_time, best_dice, - best_dice_over_time_auc, - collaborators_chosen_each_round, - collaborator_times_per_round, - experiment_results, - summary) - - # if the total_simulated_time has exceeded the maximum time, we break - # in practice, this means that the previous round's model is the last model scored, - # so a long final round should not actually benefit the competitor, since that final - # model is never globally validated - if total_simulated_time > MAX_SIMULATION_TIME: - logger.info("Simulation time exceeded. Ending Experiment") - break - - # save the most recent aggregated model in native format to be copied over as best when appropriate - # (note this model has not been validated by the collaborators yet) - task_runner.rebuild_model(round_num, aggregator.last_tensor_dict, validation=True) - task_runner.save_native(f'checkpoint/{checkpoint_folder}/temp_model.pkl') + # logger.info("Collaborator {} took simulated time: {} minutes".format(col, round(t / 60, 2))) + + # # the round time is the max of the times_list + # round_time = max([t for t, _ in times_list]) + # total_simulated_time += round_time + + # # [Kush - Flow] --> [TODO] How to set these metrics in the workflow and save the checkpoint ?? + # # get the performace validation scores for the round + # round_dice = get_metric('valid_dice', round_num, aggregator.tensor_db) + # dice_label_0 = get_metric('valid_dice_per_label_0', round_num, aggregator.tensor_db) + # dice_label_1 = get_metric('valid_dice_per_label_1', round_num, aggregator.tensor_db) + # dice_label_2 = get_metric('valid_dice_per_label_2', round_num, aggregator.tensor_db) + # dice_label_4 = get_metric('valid_dice_per_label_4', round_num, aggregator.tensor_db) + # if include_validation_with_hausdorff: + # hausdorff95_label_0 = get_metric('valid_hd95_per_label_0', round_num, aggregator.tensor_db) + # hausdorff95_label_1 = get_metric('valid_hd95_per_label_1', round_num, aggregator.tensor_db) + # hausdorff95_label_2 = get_metric('valid_hd95_per_label_2', round_num, aggregator.tensor_db) + # hausdorff95_label_4 = get_metric('valid_hd95_per_label_4', round_num, aggregator.tensor_db) + + # # update best score + # if best_dice < round_dice: + # best_dice = round_dice + # # Set the weights for the final model + # if round_num == 0: + # # here the initial model was validated (temp model does not exist) + # logger.info(f'Skipping best model saving to disk as it is a random initialization.') + # elif not os.path.exists(f'checkpoint/{checkpoint_folder}/temp_model.pkl'): + # raise ValueError(f'Expected temporary model at: checkpoint/{checkpoint_folder}/temp_model.pkl to exist but it was not found.') + # else: + # # here the temp model was the one validated + # shutil.copyfile(src=f'checkpoint/{checkpoint_folder}/temp_model.pkl',dst=f'checkpoint/{checkpoint_folder}/best_model.pkl') + # logger.info(f'Saved model with best average binary DICE: {best_dice} to ~/.local/workspace/checkpoint/{checkpoint_folder}/best_model.pkl') + + # ## RUN VALIDATION ON INTERMEDIATE CONSENSUS MODEL + # # set the task_runner data loader + # # task_runner.data_loader = collaborator_data_loaders[col] + + # ## CONVERGENCE METRIC COMPUTATION + # # update the auc score + # best_dice_over_time_auc += best_dice * round_time + + # # project the auc score as remaining time * best dice + # # this projection assumes that the current best score is carried forward for the entire week + # projected_auc = (MAX_SIMULATION_TIME - total_simulated_time) * best_dice + best_dice_over_time_auc + # projected_auc /= MAX_SIMULATION_TIME + + # # End of round summary + # summary = '"**** END OF ROUND {} SUMMARY *****"'.format(round_num) + # summary += "\n\tSimulation Time: {} minutes".format(round(total_simulated_time / 60, 2)) + # summary += "\n\t(Projected) Convergence Score: {}".format(projected_auc) + # summary += "\n\tDICE Label 0: {}".format(dice_label_0) + # summary += "\n\tDICE Label 1: {}".format(dice_label_1) + # summary += "\n\tDICE Label 2: {}".format(dice_label_2) + # summary += "\n\tDICE Label 4: {}".format(dice_label_4) + # if include_validation_with_hausdorff: + # summary += "\n\tHausdorff95 Label 0: {}".format(hausdorff95_label_0) + # summary += "\n\tHausdorff95 Label 1: {}".format(hausdorff95_label_1) + # summary += "\n\tHausdorff95 Label 2: {}".format(hausdorff95_label_2) + # summary += "\n\tHausdorff95 Label 4: {}".format(hausdorff95_label_4) + + + # experiment_results['round'].append(round_num) + # experiment_results['time'].append(total_simulated_time) + # experiment_results['convergence_score'].append(projected_auc) + # experiment_results['round_dice'].append(round_dice) + # experiment_results['dice_label_0'].append(dice_label_0) + # experiment_results['dice_label_1'].append(dice_label_1) + # experiment_results['dice_label_2'].append(dice_label_2) + # experiment_results['dice_label_4'].append(dice_label_4) + # if include_validation_with_hausdorff: + # experiment_results['hausdorff95_label_0'].append(hausdorff95_label_0) + # experiment_results['hausdorff95_label_1'].append(hausdorff95_label_1) + # experiment_results['hausdorff95_label_2'].append(hausdorff95_label_2) + # experiment_results['hausdorff95_label_4'].append(hausdorff95_label_4) + # logger.info(summary) + + # if save_checkpoints: + # logger.info(f'Saving checkpoint for round {round_num}') + # logger.info(f'To resume from this checkpoint, set the restore_from_checkpoint_folder parameter to \'{checkpoint_folder}\'') + # save_checkpoint(checkpoint_folder, aggregator, + # collaborator_names, collaborators, + # round_num, collaborator_time_stats, + # total_simulated_time, best_dice, + # best_dice_over_time_auc, + # collaborators_chosen_each_round, + # collaborator_times_per_round, + # experiment_results, + # summary) + + # # if the total_simulated_time has exceeded the maximum time, we break + # # in practice, this means that the previous round's model is the last model scored, + # # so a long final round should not actually benefit the competitor, since that final + # # model is never globally validated + # if total_simulated_time > MAX_SIMULATION_TIME: + # logger.info("Simulation time exceeded. Ending Experiment") + # break + + # # save the most recent aggregated model in native format to be copied over as best when appropriate + # # (note this model has not been validated by the collaborators yet) + # task_runner.rebuild_model(round_num, aggregator.last_tensor_dict, validation=True) + # task_runner.save_native(f'checkpoint/{checkpoint_folder}/temp_model.pkl') return pd.DataFrame.from_dict(experiment_results), checkpoint_folder + + +# High Level Things Required +# 1. Create Aggregator and see is private attributes setting required. +# 2. Create Collaborators and pass training/valid csv files as private attributes.(See what else can be passed as private attributes) - Done +# 3. How to set the hyperparameters for each round. +# 4. How to set the aggregation function. +# 5. Create FederatedFlow and what all steps are required. Define the functions for the steps. +# 6. Create Federated Model Class +# 7. Check how to update metrics after each round. +# 8. Check the requirement for setting times per collaborator. +# 9. How to store checkpoint and restore from checkpoint. +# 10. How to set the initial state of the model. +# \ No newline at end of file diff --git a/Task_1/fets_challenge/fets_challenge_model.py b/Task_1/fets_challenge/fets_challenge_model.py new file mode 100644 index 0000000..d3d6393 --- /dev/null +++ b/Task_1/fets_challenge/fets_challenge_model.py @@ -0,0 +1,728 @@ +# Copyright 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +"""GaNDLFTaskRunner module.""" + +import os +from copy import deepcopy +from typing import Union + +import numpy as np +import torch as pt +import yaml +from GANDLF.compute.forward_pass import validate_network +from GANDLF.compute.generic import create_pytorch_objects +from GANDLF.compute.training_loop import train_network +from GANDLF.config_manager import ConfigManager + +from openfl.federated.task.runner import TaskRunner +from openfl.utilities import TensorKey +from openfl.utilities.split import split_tensor_dict_for_holdouts +from logging import getLogger + +class FeTSChallengeModel(): + """GaNDLF Model class for Federated Learning. + + This class provides methods to manage and manipulate GaNDLF models in a + federated learning context. + + Attributes: + build_model (function or class): Function or Class to build the model. + lambda_opt (function): Lambda function for the optimizer. + model (Model): The built model. + optimizer (Optimizer): Optimizer for the model. + scheduler (Scheduler): Scheduler for the model. + params (Parameters): Parameters for the model. + device (str): Device for the model. + training_round_completed (bool): Whether the training round has been + completed. + required_tensorkeys_for_function (dict): Required tensorkeys for + function. + tensor_dict_split_fn_kwargs (dict): Keyword arguments for the tensor + dict split function. + """ + + def __init__( + self, gandlf_config_path + ): + """Initializes the GaNDLFTaskRunner object. + + Sets up the initial state of the GaNDLFTaskRunner object, initializing + various components needed for the federated model. + Args: + gandlf_config (Union[str, dict], optional): GaNDLF configuration. + Can be a string (file path) or a dictionary. Defaults to None. + device (str, optional): Compute device. Defaults to None + (default="cpu"). + **kwargs: Additional parameters to pass to the function. + """ + + if isinstance(gandlf_config_path, str) and os.path.exists(gandlf_config_path): + gandlf_conf = yaml.safe_load(open(gandlf_config_path, "r")) + + gandlf_conf = ConfigManager(gandlf_config_path) + + ( + model, + optimizer, + train_loader, + val_loader, + scheduler, + params, + ) = create_pytorch_objects( + gandlf_conf, device="cpu" + ) + self.model = model + self.optimizer = optimizer + self.scheduler = scheduler + self.params = params + + self.training_round_completed = False + + self.required_tensorkeys_for_function = {} + + self.logger = getLogger(__name__) + + # FIXME: why isn't this initial call in runner_pt? + #self.initialize_tensorkeys_for_functions(with_opt_vars=False) + + # overwrite attribute to account for one optimizer param (in every + # child model that does not overwrite get and set tensordict) that is + # not a numpy array + self.tensor_dict_split_fn_kwargs = {} + self.tensor_dict_split_fn_kwargs.update({"holdout_tensor_names": ["__opt_state_needed"]}) + + def rebuild_model(self, round_num, input_tensor_dict, validation=False): + """Parse tensor names and update weights of model. Handles the + optimizer treatment. + + Args: + round_num: The current round number. + input_tensor_dict (dict): The input tensor dictionary used to + update the weights of the model. + validation (bool, optional): A flag indicating whether the model + is in validation. Defaults to False. + + Returns: + None + """ + + if self.opt_treatment == "RESET": + self.reset_opt_vars() + self.set_tensor_dict(input_tensor_dict, with_opt_vars=False) + elif ( + self.training_round_completed + and self.opt_treatment == "CONTINUE_GLOBAL" + and not validation + ): + self.set_tensor_dict(input_tensor_dict, with_opt_vars=True) + else: + self.set_tensor_dict(input_tensor_dict, with_opt_vars=False) + + def validate(self, model, col_name, round_num, val_dataloader, params, scheduler, use_tqdm=False, **kwargs): + """Validate. + Run validation of the model on the local data. + Args: + col_name (str): Name of the collaborator. + round_num (int): Current round number. + input_tensor_dict (dict): Required input tensors (for model). + use_tqdm (bool, optional): Use tqdm to print a progress bar. + Defaults to False. + **kwargs: Key word arguments passed to GaNDLF main_run. + + Returns: + output_tensor_dict (dict): Tensors to send back to the aggregator. + {} (dict): Tensors to maintain in the local TensorDB. + """ + #self.rebuild_model(round_num, input_tensor_dict, validation=True) + model.eval() + + epoch_valid_loss, epoch_valid_metric = validate_network( + model, + val_dataloader, + scheduler, + params, + round_num, + mode="validation", + ) + + #self.logger.info(epoch_valid_loss) + #self.logger.info(epoch_valid_metric) + + print(f"Validation loss: {epoch_valid_loss}") + print(f"Validation metric: {epoch_valid_metric}") + + origin = col_name + suffix = "validate" + # if kwargs["apply"] == "local": + # suffix += "_local" + # else: + # suffix += "_agg" + tags = ("metric", suffix) + + output_tensor_dict = {} + valid_loss_tensor_key = TensorKey("valid_loss", origin, round_num, True, tags) + output_tensor_dict[valid_loss_tensor_key] = np.array(epoch_valid_loss) + for k, v in epoch_valid_metric.items(): + tensor_key = TensorKey(f"valid_{k}", origin, round_num, True, tags) + output_tensor_dict[tensor_key] = np.array(v) + + # Empty list represents metrics that should only be stored locally + return output_tensor_dict, {} + + def train(self, model, col_name, round_num, train_loader, params, optimizer, use_tqdm=False, epochs=1, **kwargs): + """Train batches. + Train the model on the requested number of batches. + Args: + col_name (str): Name of the collaborator. + round_num (int): Current round number. + input_tensor_dict (dict): Required input tensors (for model). + use_tqdm (bool, optional): Use tqdm to print a progress bar. + Defaults to False. + epochs (int, optional): The number of epochs to train. Defaults to 1. + **kwargs: Key word arguments passed to GaNDLF main_run. + + Returns: + global_tensor_dict (dict): Tensors to send back to the aggregator. + local_tensor_dict (dict): Tensors to maintain in the local + TensorDB. + """ + #self.rebuild_model(round_num, input_tensor_dict) + # set to "training" mode + self.model.train() + for epoch in range(epochs): + print(f"Run %s epoch of %s round", epoch, round_num) + # FIXME: do we want to capture these in an array + # rather than simply taking the last value? + epoch_train_loss, epoch_train_metric = train_network( + model, + train_loader, + optimizer, + params, + ) + + # output model tensors (Doesn't include TensorKey) + tensor_dict = self.get_tensor_dict(with_opt_vars=True) + + metric_dict = {"loss": epoch_train_loss} + for k, v in epoch_train_metric.items(): + metric_dict[f"train_{k}"] = v + + # Return global_tensor_dict, local_tensor_dict + # is this even pt-specific really? + global_tensor_dict, local_tensor_dict = create_tensorkey_dicts( + tensor_dict, + metric_dict, + col_name, + round_num, + self.logger, + self.tensor_dict_split_fn_kwargs, + ) + + # Update the required tensors if they need to be pulled from the + # aggregator + # TODO this logic can break if different collaborators have different + # roles between rounds. + # For example, if a collaborator only performs validation in the first + # round but training in the second, it has no way of knowing the + # optimizer state tensor names to request from the aggregator because + # these are only created after training occurs. A work around could + # involve doing a single epoch of training on random data to get the + # optimizer names, and then throwing away the model. + #if self.opt_treatment == "CONTINUE_GLOBAL": + # self.initialize_tensorkeys_for_functions(with_opt_vars=True) + + # This will signal that the optimizer values are now present, + # and can be loaded when the model is rebuilt + self.training_round_completed = True + + # Return global_tensor_dict, local_tensor_dict + return global_tensor_dict, local_tensor_dict + + def get_tensor_dict(self, with_opt_vars=False): + """Return the tensor dictionary. + + Args: + with_opt_vars (bool): Return the tensor dictionary including the + optimizer tensors (Default=False). + + Returns: + state (dict): Tensor dictionary {**dict, **optimizer_dict} + """ + # Gets information regarding tensor model layers and optimizer state. + # FIXME: self.parameters() instead? Unclear if load_state_dict() or + # simple assignment is better + # for now, state dict gives us names which is good + # FIXME: do both and sanity check each time? + + state = to_cpu_numpy(self.model.state_dict()) + + if with_opt_vars: + opt_state = _get_optimizer_state(self.optimizer) + state = {**state, **opt_state} + + return state + + def _get_weights_names(self, with_opt_vars=False): + """Get the names of the weights. + + Args: + with_opt_vars (bool, optional): Include the optimizer variables. + Defaults to False. + + Returns: + list: List of weight names. + """ + # Gets information regarding tensor model layers and optimizer state. + # FIXME: self.parameters() instead? Unclear if load_state_dict() or + # simple assignment is better + # for now, state dict gives us names which is good + # FIXME: do both and sanity check each time? + + state = self.model.state_dict().keys() + + if with_opt_vars: + opt_state = _get_optimizer_state(self.model.optimizer) + state += opt_state.keys() + + return state + + def set_tensor_dict(self, tensor_dict, with_opt_vars=False): + """Set the tensor dictionary. + + Args: + tensor_dict (dict): The tensor dictionary. + with_opt_vars (bool, optional): Include the optimizer tensors. + Defaults to False. + """ + set_pt_model_from_tensor_dict(self.model, tensor_dict, self.device, with_opt_vars) + + def get_optimizer(self): + """Get the optimizer of this instance. + + Returns: + Optimizer: The optimizer of this instance. + """ + return self.optimizer + + def get_required_tensorkeys_for_function(self, func_name, **kwargs): + """Get the required tensors for specified function that could be called + as part of a task. + + By default, this is just all of the layers and optimizer of the model. + + Args: + func_name (str): Function name. + **kwargs: Additional keyword arguments. + + Returns: + required_tensorkeys_for_function (list): List of required + TensorKey. + """ + if func_name == "validate": + local_model = "apply=" + str(kwargs["apply"]) + return self.required_tensorkeys_for_function[func_name][local_model] + else: + return self.required_tensorkeys_for_function[func_name] + + def initialize_tensorkeys_for_functions(self, with_opt_vars=False): + """Set the required tensors for all publicly accessible task methods. + + By default, this is just all of the layers and optimizer of the model. + Custom tensors should be added to this function. + + Args: + with_opt_vars (bool, optional): Include the optimizer tensors. + Defaults to False. + """ + # TODO there should be a way to programmatically iterate through + # all of the methods in the class and declare the tensors. + # For now this is done manually + + output_model_dict = self.get_tensor_dict(with_opt_vars=with_opt_vars) + global_model_dict, local_model_dict = split_tensor_dict_for_holdouts( + self.logger, output_model_dict, **self.tensor_dict_split_fn_kwargs + ) + if not with_opt_vars: + global_model_dict_val = global_model_dict + local_model_dict_val = local_model_dict + else: + output_model_dict = self.get_tensor_dict(with_opt_vars=False) + global_model_dict_val, local_model_dict_val = split_tensor_dict_for_holdouts( + self.logger, + output_model_dict, + **self.tensor_dict_split_fn_kwargs, + ) + + self.required_tensorkeys_for_function["train"] = [ + TensorKey(tensor_name, "GLOBAL", 0, False, ("model",)) + for tensor_name in global_model_dict + ] + self.required_tensorkeys_for_function["train"] += [ + TensorKey(tensor_name, "LOCAL", 0, False, ("model",)) + for tensor_name in local_model_dict + ] + + # Validation may be performed on local or aggregated (global) model, + # so there is an extra lookup dimension for kwargs + self.required_tensorkeys_for_function["validate"] = {} + # TODO This is not stateless. The optimizer will not be + self.required_tensorkeys_for_function["validate"]["apply=local"] = [ + TensorKey(tensor_name, "LOCAL", 0, False, ("trained",)) + for tensor_name in {**global_model_dict_val, **local_model_dict_val} + ] + self.required_tensorkeys_for_function["validate"]["apply=global"] = [ + TensorKey(tensor_name, "GLOBAL", 0, False, ("model",)) + for tensor_name in global_model_dict_val + ] + self.required_tensorkeys_for_function["validate"]["apply=global"] += [ + TensorKey(tensor_name, "LOCAL", 0, False, ("model",)) + for tensor_name in local_model_dict_val + ] + + def load_native( + self, + filepath, + model_state_dict_key="model_state_dict", + optimizer_state_dict_key="optimizer_state_dict", + **kwargs, + ): + """ + Load model and optimizer states from a pickled file specified by \ + filepath. model_/optimizer_state_dict args can be specified if needed. \ + Uses pt.load(). + + Args: + filepath (str): Path to pickle file created by pt.save(). + model_state_dict_key (str, optional): Key for model state dict in + pickled file. Defaults to 'model_state_dict'. + optimizer_state_dict_key (str, optional): Key for optimizer state + dict in picked file. Defaults to 'optimizer_state_dict'. + **kwargs: Additional keyword arguments. + """ + pickle_dict = pt.load(filepath) + self.model.load_state_dict(pickle_dict[model_state_dict_key]) + self.optimizer.load_state_dict(pickle_dict[optimizer_state_dict_key]) + + def save_native( + self, + filepath, + model_state_dict_key="model_state_dict", + optimizer_state_dict_key="optimizer_state_dict", + **kwargs, + ): + """ + Save model and optimizer states in a picked file specified by the \ + filepath. model_/optimizer_state_dicts are stored in the keys provided. \ + Uses pt.save(). + + Args: + filepath (str): Path to pickle file to be created by pt.save(). + model_state_dict_key (str, optional): Key for model state dict in + pickled file. Defaults to 'model_state_dict'. + optimizer_state_dict_key (str, optional): Key for optimizer state + dict in picked file. Defaults to 'optimizer_state_dict'. + **kwargs: Additional keyword arguments. + """ + pickle_dict = { + model_state_dict_key: self.model.state_dict(), + optimizer_state_dict_key: self.optimizer.state_dict(), + } + pt.save(pickle_dict, filepath) + + def reset_opt_vars(self): + """Reset optimizer variables.""" + pass + + +def create_tensorkey_dicts( + tensor_dict, + metric_dict, + col_name, + round_num, + logger, + tensor_dict_split_fn_kwargs, +): + """Create dictionaries of TensorKeys for global and local tensors. + + Args: + tensor_dict (dict): Dictionary of tensors. + metric_dict (dict): Dictionary of metrics. + col_name (str): Name of the collaborator. + round_num (int): Current round number. + logger (Logger): Logger instance. + tensor_dict_split_fn_kwargs (dict): Keyword arguments for the tensor + dict split function. + + Returns: + global_tensor_dict (dict): Dictionary of global TensorKeys. + local_tensor_dict (dict): Dictionary of local TensorKeys. + """ + origin = col_name + tags = ("trained",) + output_metric_dict = {} + for k, v in metric_dict.items(): + tk = TensorKey(k, origin, round_num, True, ("metric",)) + output_metric_dict[tk] = np.array(v) + + global_model_dict, local_model_dict = split_tensor_dict_for_holdouts( + logger, tensor_dict, **tensor_dict_split_fn_kwargs + ) + + # Create global tensorkeys + global_tensorkey_model_dict = { + TensorKey(tensor_name, origin, round_num, False, tags): nparray + for tensor_name, nparray in global_model_dict.items() + } + # Create tensorkeys that should stay local + local_tensorkey_model_dict = { + TensorKey(tensor_name, origin, round_num, False, tags): nparray + for tensor_name, nparray in local_model_dict.items() + } + # The train/validate aggregated function of the next round will look + # for the updated model parameters. + # This ensures they will be resolved locally + next_local_tensorkey_model_dict = { + TensorKey(tensor_name, origin, round_num + 1, False, ("model",)): nparray + for tensor_name, nparray in local_model_dict.items() + } + + global_tensor_dict = {**output_metric_dict, **global_tensorkey_model_dict} + local_tensor_dict = { + **local_tensorkey_model_dict, + **next_local_tensorkey_model_dict, + } + + return global_tensor_dict, local_tensor_dict + + +def set_pt_model_from_tensor_dict(model, tensor_dict, device, with_opt_vars=False): + """Set the tensor dictionary for the PyTorch model. + + Args: + model (Model): The PyTorch model. + tensor_dict (dict): Tensor dictionary. + device (str): Device for the model. + with_opt_vars (bool, optional): Include the optimizer tensors. + Defaults to False. + """ + # Sets tensors for model layers and optimizer state. + # FIXME: model.parameters() instead? Unclear if load_state_dict() or + # simple assignment is better + # for now, state dict gives us names, which is good + # FIXME: do both and sanity check each time? + + new_state = {} + # Grabbing keys from model's state_dict helps to confirm we have + # everything + for k in model.state_dict(): + new_state[k] = pt.from_numpy(tensor_dict.pop(k)).to(device) + + # set model state + model.load_state_dict(new_state) + + if with_opt_vars: + # see if there is state to restore first + if tensor_dict.pop("__opt_state_needed") == "true": + _set_optimizer_state(model.get_optimizer(), device, tensor_dict) + + # sanity check that we did not record any state that was not used + assert len(tensor_dict) == 0 + + +def _derive_opt_state_dict(opt_state_dict): + """Separate optimizer tensors from the tensor dictionary. + + Flattens the optimizer state dict so as to have key, value pairs with + values as numpy arrays. + The keys have sufficient info to restore opt_state_dict using + expand_derived_opt_state_dict. + + Args: + opt_state_dict (dict): Optimizer state dictionary. + + Returns: + derived_opt_state_dict (dict): Optimizer state dictionary. + """ + derived_opt_state_dict = {} + + # Determine if state is needed for this optimizer. + if len(opt_state_dict["state"]) == 0: + derived_opt_state_dict["__opt_state_needed"] = "false" + return derived_opt_state_dict + + derived_opt_state_dict["__opt_state_needed"] = "true" + + # Using one example state key, we collect keys for the corresponding + # dictionary value. + example_state_key = opt_state_dict["param_groups"][0]["params"][0] + example_state_subkeys = set(opt_state_dict["state"][example_state_key].keys()) + + # We assume that the state collected for all params in all param groups is + # the same. + # We also assume that whether or not the associated values to these state + # subkeys is a tensor depends only on the subkey. + # Using assert statements to break the routine if these assumptions are + # incorrect. + for state_key in opt_state_dict["state"].keys(): + assert example_state_subkeys == set(opt_state_dict["state"][state_key].keys()) + for state_subkey in example_state_subkeys: + assert isinstance( + opt_state_dict["state"][example_state_key][state_subkey], + pt.Tensor, + ) == isinstance(opt_state_dict["state"][state_key][state_subkey], pt.Tensor) + + state_subkeys = list(opt_state_dict["state"][example_state_key].keys()) + + # Tags will record whether the value associated to the subkey is a + # tensor or not. + state_subkey_tags = [] + for state_subkey in state_subkeys: + if isinstance(opt_state_dict["state"][example_state_key][state_subkey], pt.Tensor): + state_subkey_tags.append("istensor") + else: + state_subkey_tags.append("") + state_subkeys_and_tags = list(zip(state_subkeys, state_subkey_tags)) + + # Forming the flattened dict, using a concatenation of group index, + # subindex, tag, and subkey inserted into the flattened dict key - + # needed for reconstruction. + nb_params_per_group = [] + for group_idx, group in enumerate(opt_state_dict["param_groups"]): + for idx, param_id in enumerate(group["params"]): + for subkey, tag in state_subkeys_and_tags: + if tag == "istensor": + new_v = opt_state_dict["state"][param_id][subkey].cpu().numpy() + else: + new_v = np.array([opt_state_dict["state"][param_id][subkey]]) + derived_opt_state_dict[f"__opt_state_{group_idx}_{idx}_{tag}_{subkey}"] = new_v + nb_params_per_group.append(idx + 1) + # group lengths are also helpful for reconstructing + # original opt_state_dict structure + derived_opt_state_dict["__opt_group_lengths"] = np.array(nb_params_per_group) + + return derived_opt_state_dict + + +def expand_derived_opt_state_dict(derived_opt_state_dict, device): + """Expand the optimizer state dictionary. + + Takes a derived opt_state_dict and creates an opt_state_dict suitable as + input for load_state_dict for restoring optimizer state. + Reconstructing state_subkeys_and_tags using the example key prefix, + "__opt_state_0_0_", certain to be present. + + Args: + derived_opt_state_dict (dict): Derived optimizer state dictionary. + device (str): Device for the model. + + Returns: + opt_state_dict (dict): Expanded optimizer state dictionary. + """ + state_subkeys_and_tags = [] + for key in derived_opt_state_dict: + if key.startswith("__opt_state_0_0_"): + stripped_key = key[16:] + if stripped_key.startswith("istensor_"): + this_tag = "istensor" + subkey = stripped_key[9:] + else: + this_tag = "" + subkey = stripped_key[1:] + state_subkeys_and_tags.append((subkey, this_tag)) + + opt_state_dict = {"param_groups": [], "state": {}} + nb_params_per_group = list(derived_opt_state_dict.pop("__opt_group_lengths").astype(np.int32)) + + # Construct the expanded dict. + for group_idx, nb_params in enumerate(nb_params_per_group): + these_group_ids = [f"{group_idx}_{idx}" for idx in range(nb_params)] + opt_state_dict["param_groups"].append({"params": these_group_ids}) + for this_id in these_group_ids: + opt_state_dict["state"][this_id] = {} + for subkey, tag in state_subkeys_and_tags: + flat_key = f"__opt_state_{this_id}_{tag}_{subkey}" + if tag == "istensor": + new_v = pt.from_numpy(derived_opt_state_dict.pop(flat_key)) + else: + # Here (for currrently supported optimizers) the subkey + # should be 'step' and the length of array should be one. + assert subkey == "step" + assert len(derived_opt_state_dict[flat_key]) == 1 + new_v = int(derived_opt_state_dict.pop(flat_key)) + opt_state_dict["state"][this_id][subkey] = new_v + + # sanity check that we did not miss any optimizer state + assert len(derived_opt_state_dict) == 0 + + return opt_state_dict + + +def _get_optimizer_state(optimizer): + """Get the state of the optimizer. + + Args: + optimizer (Optimizer): Optimizer. + + Returns: + derived_opt_state_dict (dict): State of the optimizer. + """ + opt_state_dict = deepcopy(optimizer.state_dict()) + + # Optimizer state might not have some parts representing frozen parameters + # So we do not synchronize them + param_keys_with_state = set(opt_state_dict["state"].keys()) + for group in opt_state_dict["param_groups"]: + local_param_set = set(group["params"]) + params_to_sync = local_param_set & param_keys_with_state + group["params"] = sorted(params_to_sync) + + derived_opt_state_dict = _derive_opt_state_dict(opt_state_dict) + + return derived_opt_state_dict + + +def _set_optimizer_state(optimizer, device, derived_opt_state_dict): + """Set the state of the optimizer. + + Args: + optimizer (Optimizer): Optimizer. + device (str): Device for the model. + derived_opt_state_dict (dict): Derived optimizer state dictionary. + """ + temp_state_dict = expand_derived_opt_state_dict(derived_opt_state_dict, device) + + # FIXME: Figure out whether or not this breaks learning rate + # scheduling and the like. + # Setting default values. + # All optimizer.defaults are considered as not changing over course of + # training. + for group in temp_state_dict["param_groups"]: + for k, v in optimizer.defaults.items(): + group[k] = v + + optimizer.load_state_dict(temp_state_dict) + + +def to_cpu_numpy(state): + """Convert state to CPU as Numpy array. + + Args: + state (State): State to be converted. + + Returns: + state (dict): State as Numpy array. + """ + # deep copy so as to decouple from active model + state = deepcopy(state) + + for k, v in state.items(): + # When restoring, we currently assume all values are tensors. + if not pt.is_tensor(v): + raise ValueError( + "We do not currently support non-tensors " "coming from model.state_dict()" + ) + # get as a numpy array, making sure is on cpu + state[k] = v.cpu().numpy() + return state diff --git a/Task_1/fets_challenge/fets_flow.py b/Task_1/fets_challenge/fets_flow.py new file mode 100644 index 0000000..8ced9c8 --- /dev/null +++ b/Task_1/fets_challenge/fets_flow.py @@ -0,0 +1,114 @@ +import os +from copy import deepcopy +from typing import Union + +import numpy as np +import torch as pt +import yaml + +from sys import path +from openfl.federated import Plan +from pathlib import Path + +from openfl.experimental.workflow.interface import FLSpec +from openfl.experimental.workflow.placement import aggregator, collaborator + +from GANDLF.compute.generic import create_pytorch_objects +from GANDLF.config_manager import ConfigManager + +#from .fets_challenge_model import inference, fedavg + +class FeTSFederatedFlow(FLSpec): + def __init__(self, model, rounds=3, **kwargs): + super().__init__(**kwargs) + self.fets_model = model + self.n_rounds = rounds + self.current_round = 1 + + @aggregator + def start(self): + self.collaborators = self.runtime.collaborators + self.next(self.initialize_collaborators, foreach='collaborators') + + @collaborator + def initialize_collaborators(self): + if isinstance(self.gandlf_config, str) and os.path.exists(self.gandlf_config): + gandlf_conf = yaml.safe_load(open(self.gandlf_config, "r")) + + print(gandlf_conf) + + #gandlf_config_path = "/home/ad_tbanda/code/fedAI/Challenge/Task_1/gandlf_config.yaml" + gandlf_config = Plan.load(Path(self.gandlf_config)) + print(gandlf_config) + print(gandlf_config['weighted_loss']) + + gandlf_conf = ConfigManager(self.gandlf_config) + + ( + model, + optimizer, + train_loader, + val_loader, + scheduler, + params, + ) = create_pytorch_objects( + gandlf_conf, train_csv=self.train_csv, val_csv=self.val_csv, device=self.device + ) + self.model = model + self.optimizer = optimizer + self.scheduler = scheduler + self.params = params + self.device = self.device + self.train_loader = train_loader + self.val_loader = val_loader + self.epochs = 1 + self.next(self.aggregated_model_validation) + + @collaborator + def aggregated_model_validation(self): + print(f'Performing aggregated model validation for collaborator {self.input}') + print(f'Val Loader: {self.val_loader}') + self.agg_validation_score = self.fets_model.validate(self.model, self.input, self.current_round, self.val_loader, self.params, self.scheduler) + print(f'{self.input} value of {self.agg_validation_score}') + self.next(self.train) + + @collaborator + def train(self): + print(f'Performing training for collaborator {self.input}') + self.fets_model.train(self.model, self.input, self.current_round, self.train_loader, self.params, self.optimizer, self.epochs) + self.metric = "Test" + self.next(self.local_model_validation) + + @collaborator + def local_model_validation(self): + self.local_validation_score = self.fets_model.validate(self.model, self.input, self.current_round, self.val_loader, self.params, self.scheduler) + print(f'Doing local model validation for collaborator {self.input}:' + + f' {self.local_validation_score}') + self.next(self.join) + + @aggregator + def join(self, inputs): + self.average_loss = sum(input.loss for input in inputs) / len(inputs) + self.aggregated_model_accuracy = sum( + input.agg_validation_score for input in inputs) / len(inputs) + self.local_model_accuracy = sum( + input.local_validation_score for input in inputs) / len(inputs) + print(f'Average aggregated model validation values = {self.aggregated_model_accuracy}') + print(f'Average training loss = {self.average_loss}') + print(f'Average local model validation values = {self.local_model_accuracy}') + print("Taking FedAvg of models of all collaborators") + self.model = fedavg([input.model for input in inputs]) + + self.next(self.internal_loop) + + @aggregator + def internal_loop(self): + if self.current_round == self.n_rounds: + self.next(self.end) + else: + self.current_round += 1 + self.next(self.aggregated_model_validation, foreach='collaborators') + + @aggregator + def end(self): + print('This is the end of the flow') \ No newline at end of file diff --git a/Task_1/fets_challenge/gandlf_config.yaml b/Task_1/fets_challenge/gandlf_config.yaml new file mode 100644 index 0000000..d1cd49f --- /dev/null +++ b/Task_1/fets_challenge/gandlf_config.yaml @@ -0,0 +1,63 @@ +batch_size: 1 +clip_grad: null +clip_mode: null +data_augmentation: {} +data_postprocessing: {} +data_preprocessing: + normalize: null +enable_padding: false +in_memory: true +inference_mechanism : + grid_aggregator_overlap: crop + patch_overlap: 0 +learning_rate: 0.001 +loss_function: dc +medcam_enabled: false +output_dir: '.' +metrics: +- dice +model: + amp: true + architecture: unet + base_filters: 32 + batch_norm: false + class_list: + - 0 + - 1 + dimension: 3 + final_layer: sigmoid + ignore_label_validation: null + norm_type: instance + num_channels: 4 +nested_training: + testing: -5 + validation: -5 +num_epochs: 1 +optimizer: + type: adam +parallel_compute_command: '' +patch_sampler: uniform +patch_size: +- 32 +- 32 +- 32 +patience: 1 +pin_memory_dataloader: false +print_rgb_label_warning: true +q_max_length: 1 +q_num_workers: 0 +q_samples_per_volume: 1 +q_verbose: false +save_output: false +save_training: false +scaling_factor: 1 +scheduler: + type: triangle +track_memory_usage: false +verbose: false +version: + maximum: 0.1.0 + minimum: 0.0.13 +weighted_loss: true +modality: rad +problem_type: classification \ No newline at end of file diff --git a/Task_1/fets_challenge/inference.py b/Task_1/fets_challenge/inference.py index 13f0680..48503a6 100644 --- a/Task_1/fets_challenge/inference.py +++ b/Task_1/fets_challenge/inference.py @@ -81,7 +81,7 @@ def generate_validation_csv(data_path, validation_csv_filename, working_dir): 0.0, 'placeholder', training_and_validation=False) - validation_csv_dict.to_csv(os.path.join(working_dir, 'validation_paths.csv'),index=False) + validation_csv_dict.to_csv(os.path.join(working_dir, 'valid.csv'),index=False) def replace_initializations(done_replacing, array, mask, replacement_value, initialization_value): """ @@ -222,14 +222,14 @@ def model_outputs_to_disc(data_path, overrides = { 'task_runner.settings.device': device, - 'task_runner.settings.val_csv': 'validation_paths.csv', + 'task_runner.settings.val_csv': 'valid.csv', 'task_runner.settings.train_csv': None, } # Update the plan if necessary plan = fx.update_plan(overrides) - plan.config['task_runner']['settings']['fets_config_dict']['save_output'] = True - plan.config['task_runner']['settings']['fets_config_dict']['output_dir'] = output_path + plan.config['task_runner']['settings']['gandlf_config']['save_output'] = True + plan.config['task_runner']['settings']['gandlf_config']['output_dir'] = output_path # overwrite datapath value for a single 'InferenceCol' collaborator plan.cols_data_paths['InferenceCol'] = data_path From ce2a4c13023ac9ad55b243b3dc26a4ea4a56396b Mon Sep 17 00:00:00 2001 From: "Agrawal, Kush" Date: Mon, 3 Mar 2025 22:06:31 -0800 Subject: [PATCH 03/16] Update metrics Signed-off-by: Agrawal, Kush --- Task_1/fets_challenge/fets_challenge_model.py | 35 +++-- Task_1/fets_challenge/fets_flow.py | 135 +++++++++++++++--- Task_1/fets_challenge/gandlf_config.yaml | 1 + 3 files changed, 141 insertions(+), 30 deletions(-) diff --git a/Task_1/fets_challenge/fets_challenge_model.py b/Task_1/fets_challenge/fets_challenge_model.py index d3d6393..bd70765 100644 --- a/Task_1/fets_challenge/fets_challenge_model.py +++ b/Task_1/fets_challenge/fets_challenge_model.py @@ -154,19 +154,25 @@ def validate(self, model, col_name, round_num, val_dataloader, params, scheduler print(f"Validation metric: {epoch_valid_metric}") origin = col_name - suffix = "validate" - # if kwargs["apply"] == "local": - # suffix += "_local" + suffix = 'validate' + # if kwargs['apply'] == 'local': + # suffix += '_local' # else: - # suffix += "_agg" - tags = ("metric", suffix) + # suffix += '_agg' + tags = ('metric', suffix) output_tensor_dict = {} - valid_loss_tensor_key = TensorKey("valid_loss", origin, round_num, True, tags) - output_tensor_dict[valid_loss_tensor_key] = np.array(epoch_valid_loss) + output_tensor_dict[TensorKey('valid_loss', origin, round_num, True, tags)] = np.array(epoch_valid_loss) for k, v in epoch_valid_metric.items(): - tensor_key = TensorKey(f"valid_{k}", origin, round_num, True, tags) - output_tensor_dict[tensor_key] = np.array(v) + print(f"Testing ->>>> Metric Key {k} Value {v}") + if isinstance(v, str): + v = list(map(float, v.split('_'))) + + if np.array(v).size == 1: + output_tensor_dict[TensorKey(f'valid_{k}', origin, round_num, True, tags)] = np.array(v) + else: + for idx,label in enumerate([0,1]): + output_tensor_dict[TensorKey(f'valid_{k}_{label}', origin, round_num, True, tags)] = np.array(v[idx]) # Empty list represents metrics that should only be stored locally return output_tensor_dict, {} @@ -205,9 +211,16 @@ def train(self, model, col_name, round_num, train_loader, params, optimizer, use # output model tensors (Doesn't include TensorKey) tensor_dict = self.get_tensor_dict(with_opt_vars=True) - metric_dict = {"loss": epoch_train_loss} + metric_dict = {'loss': epoch_train_loss} for k, v in epoch_train_metric.items(): - metric_dict[f"train_{k}"] = v + print(f"Testing ->>>> Metric Key {k} Value {v}") + if isinstance(v, str): + v = list(map(float, v.split('_'))) + if np.array(v).size == 1: + metric_dict[f'train_{k}'] = np.array(v) + else: + for idx,label in enumerate([0,1]): + metric_dict[f'train_{k}_{label}'] = np.array(v[idx]) # Return global_tensor_dict, local_tensor_dict # is this even pt-specific really? diff --git a/Task_1/fets_challenge/fets_flow.py b/Task_1/fets_challenge/fets_flow.py index 8ced9c8..71f4489 100644 --- a/Task_1/fets_challenge/fets_flow.py +++ b/Task_1/fets_challenge/fets_flow.py @@ -12,16 +12,34 @@ from openfl.experimental.workflow.interface import FLSpec from openfl.experimental.workflow.placement import aggregator, collaborator +from openfl.databases import TensorDB +from openfl.utilities import TaskResultKey, TensorKey, change_tags from GANDLF.compute.generic import create_pytorch_objects from GANDLF.config_manager import ConfigManager #from .fets_challenge_model import inference, fedavg +def get_metric(metric_name, col_name, fl_round, output_tensor_dict): + print(f'Getting metric {metric_name} for collaborator {col_name} at round {fl_round}') + target_tags = ('metric', 'validate') + tensor_key = TensorKey(metric_name, col_name, fl_round, True, target_tags) + + # Check if the key exists in the dictionary + value = None + if tensor_key in output_tensor_dict: + # Retrieve the value associated with the TensorKey + value = output_tensor_dict[tensor_key] + print(value) + else: + print(f"TensorKey {tensor_key} not found in the dictionary") + + return value + class FeTSFederatedFlow(FLSpec): - def __init__(self, model, rounds=3, **kwargs): + def __init__(self, fets_model, rounds=3, **kwargs): super().__init__(**kwargs) - self.fets_model = model + self.fets_model = fets_model self.n_rounds = rounds self.current_round = 1 @@ -62,42 +80,121 @@ def initialize_collaborators(self): self.train_loader = train_loader self.val_loader = val_loader self.epochs = 1 + self.tensor_db = TensorDB() self.next(self.aggregated_model_validation) @collaborator def aggregated_model_validation(self): print(f'Performing aggregated model validation for collaborator {self.input}') print(f'Val Loader: {self.val_loader}') - self.agg_validation_score = self.fets_model.validate(self.model, self.input, self.current_round, self.val_loader, self.params, self.scheduler) - print(f'{self.input} value of {self.agg_validation_score}') + self.agg_output_dict, _ = self.fets_model.validate(self.model, self.input, self.current_round, self.val_loader, self.params, self.scheduler) + print(f'{self.input} value of {self.agg_output_dict}') self.next(self.train) @collaborator def train(self): print(f'Performing training for collaborator {self.input}') - self.fets_model.train(self.model, self.input, self.current_round, self.train_loader, self.params, self.optimizer, self.epochs) - self.metric = "Test" + global_output_tensor_dict, local_output_tensor_dict = self.fets_model.train(self.model, self.input, self.current_round, self.train_loader, self.params, self.optimizer, self.epochs) + self.tensor_db.cache_tensor(global_output_tensor_dict) + self.tensor_db.cache_tensor(local_output_tensor_dict) self.next(self.local_model_validation) @collaborator def local_model_validation(self): - self.local_validation_score = self.fets_model.validate(self.model, self.input, self.current_round, self.val_loader, self.params, self.scheduler) + self.local_output_dict, _ = self.fets_model.validate(self.model, self.input, self.current_round, self.val_loader, self.params, self.scheduler) print(f'Doing local model validation for collaborator {self.input}:' - + f' {self.local_validation_score}') + + f' {self.local_output_dict}') self.next(self.join) @aggregator - def join(self, inputs): - self.average_loss = sum(input.loss for input in inputs) / len(inputs) - self.aggregated_model_accuracy = sum( - input.agg_validation_score for input in inputs) / len(inputs) - self.local_model_accuracy = sum( - input.local_validation_score for input in inputs) / len(inputs) - print(f'Average aggregated model validation values = {self.aggregated_model_accuracy}') - print(f'Average training loss = {self.average_loss}') - print(f'Average local model validation values = {self.local_model_accuracy}') - print("Taking FedAvg of models of all collaborators") - self.model = fedavg([input.model for input in inputs]) + def join(self, inputs): + + total_loss = 0.0 + total_dice = 0.0 + num_inputs = len(inputs) + + for idx, col in enumerate(inputs): + print(f'Aggregating results for {idx}') + round_loss = get_metric('valid_loss', str(idx + 1), self.current_round, col.agg_output_dict) + round_dice = get_metric('valid_dice', str(idx + 1), self.current_round, col.agg_output_dict) + dice_label_0 = get_metric('valid_dice_per_label_0', str(idx + 1), self.current_round, col.agg_output_dict) + dice_label_1 = get_metric('valid_dice_per_label_1', str(idx + 1), self.current_round, col.agg_output_dict) + + print(f'Round loss: {round_loss}') + print(f'Round dice: {round_dice}') + print(f'Dice label 0: {dice_label_0}') + print(f'Dice label 1: {dice_label_1}') + + total_loss += round_loss + total_dice += round_dice + # dice_label_0 = get_metric('valid_dice_per_label_0', self.current_round, aggregator.tensor_db) + # dice_label_1 = get_metric('valid_dice_per_label_1', self.current_round, aggregator.tensor_db) + # dice_label_2 = get_metric('valid_dice_per_label_2', self.current_round, aggregator.tensor_db) + # dice_label_4 = get_metric('valid_dice_per_label_4', self.current_round, aggregator.tensor_db) + #self.model = fedavg([input.model for input in inputs]) + + average_round_loss = total_loss / num_inputs + average_round_dice = total_dice / num_inputs + + print(f'Average round loss: {average_round_loss}') + print(f'Average round dice: {average_round_dice}') + + # times_per_collaborator = compute_times_per_collaborator(collaborator_names, + # training_collaborators, + # epochs_per_round, + # collaborator_data_loaders, + # collaborator_time_stats, + # round_num) + # collaborator_times_per_round[round_num] = times_per_collaborator + + total_simulated_time = 0 + best_dice = -1.0 + best_dice_over_time_auc = 0 + + # times_list = [(t, col) for col, t in times_per_collaborator.items()] + # times_list = sorted(times_list) + + # the round time is the max of the times_list + # round_time = max([t for t, _ in times_list]) + # total_simulated_time += round_time + + if best_dice < average_round_dice: + best_dice = average_round_dice + # Set the weights for the final model + # if round_num == 0: + # # here the initial model was validated (temp model does not exist) + # logger.info(f'Skipping best model saving to disk as it is a random initialization.') + # elif not os.path.exists(f'checkpoint/{checkpoint_folder}/temp_model.pkl'): + # raise ValueError(f'Expected temporary model at: checkpoint/{checkpoint_folder}/temp_model.pkl to exist but it was not found.') + # else: + # # here the temp model was the one validated + # shutil.copyfile(src=f'checkpoint/{checkpoint_folder}/temp_model.pkl',dst=f'checkpoint/{checkpoint_folder}/best_model.pkl') + # logger.info(f'Saved model with best average binary DICE: {best_dice} to ~/.local/workspace/checkpoint/{checkpoint_folder}/best_model.pkl') + + ## CONVERGENCE METRIC COMPUTATION + # update the auc score + # best_dice_over_time_auc += best_dice * round_time + + # project the auc score as remaining time * best dice + # this projection assumes that the current best score is carried forward for the entire week + # projected_auc = (MAX_SIMULATION_TIME - total_simulated_time) * best_dice + best_dice_over_time_auc + # projected_auc /= MAX_SIMULATION_TIME + + # # End of round summary + # summary = '"**** END OF ROUND {} SUMMARY *****"'.format(self.current_round) + # summary += "\n\tSimulation Time: {} minutes".format(round(total_simulated_time / 60, 2)) + # summary += "\n\t(Projected) Convergence Score: {}".format(projected_auc) + # summary += "\n\tDICE Label 0: {}".format(dice_label_0) + # summary += "\n\tDICE Label 1: {}".format(dice_label_1) + # summary += "\n\tDICE Label 2: {}".format(dice_label_2) + # summary += "\n\tDICE Label 4: {}".format(dice_label_4) + # if include_validation_with_hausdorff: + # summary += "\n\tHausdorff95 Label 0: {}".format(hausdorff95_label_0) + # summary += "\n\tHausdorff95 Label 1: {}".format(hausdorff95_label_1) + # summary += "\n\tHausdorff95 Label 2: {}".format(hausdorff95_label_2) + # summary += "\n\tHausdorff95 Label 4: {}".format(hausdorff95_label_4) + + # [TODO] : Aggregation Function self.next(self.internal_loop) diff --git a/Task_1/fets_challenge/gandlf_config.yaml b/Task_1/fets_challenge/gandlf_config.yaml index d1cd49f..867b870 100644 --- a/Task_1/fets_challenge/gandlf_config.yaml +++ b/Task_1/fets_challenge/gandlf_config.yaml @@ -16,6 +16,7 @@ medcam_enabled: false output_dir: '.' metrics: - dice +- dice_per_label model: amp: true architecture: unet From 78141e71b7352e134c9f37ad8afbdc65e6d70460 Mon Sep 17 00:00:00 2001 From: "Agrawal, Kush" Date: Thu, 6 Mar 2025 03:29:50 -0800 Subject: [PATCH 04/16] Added aggregation function Signed-off-by: Agrawal, Kush --- Task_1/FeTS_Challenge.py | 53 +-- Task_1/fets_challenge/experiment.py | 305 +++----------- Task_1/fets_challenge/fets_challenge_model.py | 27 +- Task_1/fets_challenge/fets_flow.py | 375 +++++++++++++----- Task_1/fets_challenge/gandlf_config.yaml | 34 +- 5 files changed, 400 insertions(+), 394 deletions(-) diff --git a/Task_1/FeTS_Challenge.py b/Task_1/FeTS_Challenge.py index 22dc67c..a098683 100644 --- a/Task_1/FeTS_Challenge.py +++ b/Task_1/FeTS_Challenge.py @@ -552,7 +552,8 @@ def FedAvgM_Selection(local_tensors, # the scores are returned in a Pandas dataframe -scores_dataframe, checkpoint_folder = run_challenge_experiment( +#scores_dataframe, +checkpoint_folder = run_challenge_experiment( aggregation_function=aggregation_function, choose_training_collaborators=choose_training_collaborators, training_hyper_parameters_for_round=training_hyper_parameters_for_round, @@ -566,7 +567,7 @@ def FedAvgM_Selection(local_tensors, restore_from_checkpoint_folder = restore_from_checkpoint_folder) -scores_dataframe +#scores_dataframe # ## Produce NIfTI files for best model outputs on the validation set @@ -578,36 +579,36 @@ def FedAvgM_Selection(local_tensors, # experiment (look for the log entry: "Created experiment folder experiment_##..." above). -from fets_challenge import model_outputs_to_disc -from pathlib import Path +# from fets_challenge import model_outputs_to_disc +# from pathlib import Path -# infer participant home folder -home = str(Path.home()) +# # infer participant home folder +# home = str(Path.home()) -# you will need to specify the correct experiment folder and the parent directory for -# the data you want to run inference over (assumed to be the experiment that just completed) +# # you will need to specify the correct experiment folder and the parent directory for +# # the data you want to run inference over (assumed to be the experiment that just completed) -#checkpoint_folder='experiment_1' -#data_path = -data_path = '/home/ad_kagrawa2/Data/MICCAI_FeTS2022_ValidationData' -validation_csv_filename = 'validation.csv' +# #checkpoint_folder='experiment_1' +# #data_path = +# data_path = '/home/ad_kagrawa2/Data/MICCAI_FeTS2022_ValidationData' +# validation_csv_filename = 'validation.csv' -# you can keep these the same if you wish -final_model_path = os.path.join(home, '.local/workspace/checkpoint', checkpoint_folder, 'best_model.pkl') +# # you can keep these the same if you wish +# final_model_path = os.path.join(home, '.local/workspace/checkpoint', checkpoint_folder, 'best_model.pkl') -# If the experiment is only run for a single round, use the temp model instead -if not Path(final_model_path).exists(): - final_model_path = os.path.join(home, '.local/workspace/checkpoint', checkpoint_folder, 'temp_model.pkl') +# # If the experiment is only run for a single round, use the temp model instead +# if not Path(final_model_path).exists(): +# final_model_path = os.path.join(home, '.local/workspace/checkpoint', checkpoint_folder, 'temp_model.pkl') -outputs_path = os.path.join(home, '.local/workspace/checkpoint', checkpoint_folder, 'model_outputs') +# outputs_path = os.path.join(home, '.local/workspace/checkpoint', checkpoint_folder, 'model_outputs') -# Using this best model, we can now produce NIfTI files for model outputs -# using a provided data directory +# # Using this best model, we can now produce NIfTI files for model outputs +# # using a provided data directory -model_outputs_to_disc(data_path=data_path, - validation_csv=validation_csv_filename, - output_path=outputs_path, - native_model_path=final_model_path, - outputtag='', - device=device) +# model_outputs_to_disc(data_path=data_path, +# validation_csv=validation_csv_filename, +# output_path=outputs_path, +# native_model_path=final_model_path, +# outputtag='', +# device=device) diff --git a/Task_1/fets_challenge/experiment.py b/Task_1/fets_challenge/experiment.py index 3136b36..bab475e 100644 --- a/Task_1/fets_challenge/experiment.py +++ b/Task_1/fets_challenge/experiment.py @@ -17,6 +17,7 @@ from openfl.utilities import TensorKey from openfl.protocols import utils import openfl.native as fx +from openfl.databases import TensorDB import torch from .gandlf_csv_adapter import construct_fedsim_csv, extract_csv_partitions @@ -227,16 +228,36 @@ def get_metric(metric, fl_round, tensor_db): target_tags = ('metric', 'validate_agg') return float(tensor_db.tensor_db.query("tensor_name == @metric_name and round == @fl_round and tags == @target_tags").nparray) +def aggregator_private_attributes( + uuid, aggregation_type, round_number, collaborator_names, include_validation_with_hausdorff, choose_training_collaborators, training_hyper_parameters_for_round): + print(f'Tarun inside aggregator_private_attributes ->>>>>> Aggregation Type: {aggregation_type}') + print(f'Tarun inside aggregator_private_attributes ->>>>>> Round Number: {round_number}') + print(f'Tarun inside aggregator_private_attributes ->>>>>> Collaborator Names: {collaborator_names}') + print(f'Tarun inside aggregator_private_attributes ->>>>>> Choose Training Collaborators: {choose_training_collaborators}') + print(f'Tarun inside aggregator_private_attributes ->>>>>> Training Hyper Parameters for Round: {training_hyper_parameters_for_round}') + return {"uuid": uuid, + "aggregation_type" : aggregation_type, + "round_number": round_number, + "collaborator_names": collaborator_names, + "include_validation_with_hausdorff": include_validation_with_hausdorff, + "choose_training_collaborators": choose_training_collaborators, + "training_hyper_parameters_for_round": training_hyper_parameters_for_round, + "max_simulation_time": MAX_SIMULATION_TIME + } + + def collaborator_private_attributes( - index, n_collaborators, train_csv, valid_csv, gandlf_config, device + index, n_collaborators, train_csv, valid_csv, gandlf_config, device, training_hyper_parameters_for_round ): return { "train_csv": train_csv, "val_csv": valid_csv, "gandlf_config": gandlf_config, - "device": device + "device": device, + "training_hyper_parameters_for_round": training_hyper_parameters_for_round } + def run_challenge_experiment(aggregation_function, choose_training_collaborators, training_hyper_parameters_for_round, @@ -279,48 +300,32 @@ def run_challenge_experiment(aggregation_function, aggregation_wrapper = CustomAggregationWrapper(aggregation_function) # ---> [TODO] Set the aggregation function in the workflow - overrides = { - 'aggregator.settings.rounds_to_train': rounds_to_train, - 'aggregator.settings.db_store_rounds': db_store_rounds, - 'tasks.train.aggregation_type': aggregation_wrapper, - 'task_runner.settings.device': device, - } - - # Update the plan if necessary - # # [Kush - Flow] -> Update the Plan with the overrides - # ---> Not required in workflow - #plan = fx.update_plan(overrides) + # [TODO] [Workflow - API] Need to check db_store rounds + # overrides = { + # 'aggregator.settings.rounds_to_train': rounds_to_train, + # 'aggregator.settings.db_store_rounds': db_store_rounds, + # 'tasks.train.aggregation_type': aggregation_wrapper, + # 'task_runner.settings.device': device, + # } + # [TODO] [Workflow - API] How to update the gandfl_config runtime # if not include_validation_with_hausdorff: # plan.config['task_runner']['settings']['fets_config_dict']['metrics'] = ['dice','dice_per_label'] - # # Overwrite collaborator names - # plan.authorized_cols = collaborator_names - # # overwrite datapath values with the collaborator name itself - # for col in collaborator_names: - # # [Kush - Flow] -> Collaborator data path dictionary - # plan.cols_data_paths[col] = col - - # get the data loaders for each collaborator - # [Kush - Flow] -> def get_data_loader(self, collaborator_name): Builds the DataLoader for the collaborator based on plan - # --> Not required for workflow - # collaborator_data_loaders = {col: copy(plan).get_data_loader(col) for col in collaborator_names} - transformed_csv_dict = extract_csv_partitions(os.path.join(work, 'gandlf_paths.csv')) - # get the task runner, passing the first data loader - print('TESTING ->>>>>> Fetching TaskRunner ...') - # for col in collaborator_data_loaders: - # #Insert logic to serialize train / val CSVs here - # # transformed_csv_dict[col]['train'].to_csv(os.path.join(work, 'seg_test_train.csv')) - # # transformed_csv_dict[col]['val'].to_csv(os.path.join(work, 'seg_test_val.csv')) - # transformed_csv_dict[col]['train'].to_csv(os.path.join(work, 'train.csv')) - # transformed_csv_dict[col]['val'].to_csv(os.path.join(work, 'valid.csv')) - # # [Kush - Flow] -> def get_task_runner(self, data_loader): Builds the TaskRunner and returns returns the taskrunner instance for the collaborator based on plan - # # ---> [[TODO]] Create coll priv_attributes as per csv dictionary. - # task_runner = copy(plan).get_task_runner(collaborator_data_loaders[col]) - - aggregator = Aggregator() + + aggregator = Aggregator(name="aggregator", + private_attributes_callable=aggregator_private_attributes, + num_cpus=0.0, + num_gpus=0.0, + uuid='aggregator', + round_number=rounds_to_train, + collaborator_names=collaborator_names, + include_validation_with_hausdorff=include_validation_with_hausdorff, + aggregation_type=aggregation_wrapper, + choose_training_collaborators=choose_training_collaborators, + training_hyper_parameters_for_round=training_hyper_parameters_for_round) collaborators = [] for idx, col in enumerate(collaborator_names): @@ -347,7 +352,8 @@ def run_challenge_experiment(aggregation_function, train_csv=train_csv_path, valid_csv=val_csv_path, gandlf_config=gandlf_config_path, - device=device + device=device, + training_hyper_parameters_for_round=training_hyper_parameters_for_round ) ) @@ -380,7 +386,7 @@ def run_challenge_experiment(aggregation_function, flflow.runtime = local_runtime flflow.run() - # [Kush - Flow] -> Commenting as pretrained model is not used. + # [TODO] [Workflow - API] -> Commenting as pretrained model is not used. # ---> Define a new step in federated flow before training to load the pretrained model # if use_pretrained_model: # print('TESTING ->>>>>> Loading pretrained model...') @@ -401,11 +407,11 @@ def run_challenge_experiment(aggregation_function, # task_runner.model.load_state_dict(checkpoint['model_state_dict']) # task_runner.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) - # [Kush - Flow] -> [TODO] Compression Pipeline + # [TODO] [Workflow - API] Compression Pipeline # tensor_pipe = plan.get_tensor_pipe() # # Initialize model weights - # # [Kush - FLow] - [TODO] How to set the initial state in the workflow + # # [TODO] [Workflow - API] How to set the initial state in the workflow # init_state_path = plan.config['aggregator']['settings']['init_state_path'] # tensor_dict, _ = split_tensor_dict_for_holdouts(logger, task_runner.get_tensor_dict(False)) @@ -415,52 +421,18 @@ def run_challenge_experiment(aggregation_function, # utils.dump_proto(model_proto=model_snap, fpath=init_state_path) - # # [Kush - Flow] ->Fetch the required aggregator from plan - # # --> [SKIP] Not required for workflow as we will we creating aggregator and setting to runtime + # # [TODO] [Workflow - API] ->Fetch the required aggregator from plan # # get the aggregator, now that we have the initial weights file set up - # logger.info('Creating aggregator...') - # aggregator = plan.get_aggregator() # # manually override the aggregator UUID (for checkpoint resume when rounds change) # aggregator.uuid = 'aggregator' # aggregator._load_initial_tensors() - # # create our collaborators - # # [Kush - Flow] ->Fetch the required COLLABORTAOR from plan - # # --> [SKIP] Not required for workflow as we will we creating COLLABORATORS and setting to runtime - # logger.info('Creating collaborators...') - # collaborators = {col: copy(plan).get_collaborator(col, task_runner=task_runner, client=aggregator) for col in collaborator_names} - - # collaborator_time_stats = gen_collaborator_time_stats(plan.authorized_cols) - # collaborators_chosen_each_round = {} # collaborator_times_per_round = {} # logger.info('Starting experiment') - # total_simulated_time = 0 - # best_dice = -1.0 - # best_dice_over_time_auc = 0 - - # # results dataframe data - # experiment_results = { - # 'round':[], - # 'time': [], - # 'convergence_score': [], - # 'round_dice': [], - # 'dice_label_0': [], - # 'dice_label_1': [], - # 'dice_label_2': [], - # 'dice_label_4': [], - # } - # if include_validation_with_hausdorff: - # experiment_results.update({ - # 'hausdorff95_label_0': [], - # 'hausdorff95_label_1': [], - # 'hausdorff95_label_2': [], - # 'hausdorff95_label_4': [], - # }) - - # # [Kush-Flow] [TODO] Will check later + # # [TODO] [Workflow - API] Restore from checkpoint # # if restore_from_checkpoint_folder is None: # # checkpoint_folder = setup_checkpoint_folder() # # logger.info(f'\nCreated experiment folder {checkpoint_folder}...') @@ -494,8 +466,7 @@ def run_challenge_experiment(aggregation_function, # for round_num in range(starting_round_num, rounds_to_train): # # pick collaborators to train for the round - # # [Kush - Flow] -> Choose Training Collaborators - # # ---> [TODO] In flow based API's, in start we can pass as foreach = 'collaborators' + # # ---> [TODO] [Workflow - API] In flow based API's, in start we can pass as foreach = 'collaborators' # training_collaborators = choose_training_collaborators(collaborator_names, # aggregator.tensor_db._iterate(), # round_num, @@ -507,46 +478,12 @@ def run_challenge_experiment(aggregation_function, # # save the collaborators chosen this round # collaborators_chosen_each_round[round_num] = training_collaborators - # # get the hyper-parameters from the competitor - # # [KUSH - Flow] --> Need to set how to set hyper parameters in the workflow - # # --> [TODO] Set some private attribute for the collaborator - # hparams = training_hyper_parameters_for_round(collaborator_names, - # aggregator.tensor_db._iterate(), - # round_num, - # collaborators_chosen_each_round, - # collaborator_times_per_round) - - # learning_rate, epochs_per_round = hparams - - # if (epochs_per_round is None): - # logger.warning('Hyper-parameter function warning: function returned None for "epochs_per_round". Setting "epochs_per_round" to 1') - # epochs_per_round = 1 - - # hparam_message = "\n\tlearning rate: {}".format(learning_rate) - - # hparam_message += "\n\tepochs_per_round: {}".format(epochs_per_round) - - # logger.info("Hyper-parameters for round {}:{}".format(round_num, hparam_message)) - - # # cache each tensor in the aggregator tensor_db - # hparam_dict = {} - # tk = TensorKey(tensor_name='learning_rate', - # origin=aggregator.uuid, - # round_number=round_num, - # report=False, - # tags=('hparam', 'model')) - # hparam_dict[tk] = np.array(learning_rate) - # tk = TensorKey(tensor_name='epochs_per_round', - # origin=aggregator.uuid, - # round_number=round_num, - # report=False, - # tags=('hparam', 'model')) - # hparam_dict[tk] = np.array(epochs_per_round) - # # [Kush - FLow] -> [TODO] How to cache the tensor in the workflow ? + # + # # [TODO] [Workflow - API] How to cache the tensor in the workflow ? do we need to cache h-params ? # aggregator.tensor_db.cache_tensor(hparam_dict) # # pre-compute the times for each collaborator - # # [Kush - Flow] [TODO] What is the use of this ? + # # [TODO] [Workflow - API] What is the use of this ? # times_per_collaborator = compute_times_per_collaborator(collaborator_names, # training_collaborators, # epochs_per_round, @@ -555,144 +492,18 @@ def run_challenge_experiment(aggregation_function, # round_num) # collaborator_times_per_round[round_num] = times_per_collaborator - # # [Kush - Flow] -> Not required in workflow - # aggregator.assigner.set_training_collaborators(training_collaborators) # # update the state in the aggregation wrapper - # # [Kush - Flow] -> [TODO] See how to pass this in the workflow as aggregation function and use in JOIN step + # # [TODO] [Workflow - API] See how to pass this in the workflow as aggregation function and use in JOIN step # aggregation_wrapper.set_state_data_for_round(collaborators_chosen_each_round, collaborator_times_per_round) # # turn the times list into a list of tuples and sort it # times_list = [(t, col) for col, t in times_per_collaborator.items()] # times_list = sorted(times_list) - # # now call each collaborator in order of time - # # FIXME: this doesn't break up each task. We need this if we're doing straggler handling - # # [Kush - Flow] -> Below codeblock is not required in workflow as below two lines will be handled by the workflow - # # ---> [TODO] Create LocalRunTime using ray bakcend and do flow.run() to start the training + # # [TODO] [Workflow - API] Create LocalRunTime using ray bakcend and do flow.run() to start the training # for t, col in times_list: - # # set the task_runner data loader - # task_runner.data_loader = collaborator_data_loaders[col] - - # # run the collaborator - # collaborators[col].run_simulation() - # logger.info("Collaborator {} took simulated time: {} minutes".format(col, round(t / 60, 2))) - - # # the round time is the max of the times_list - # round_time = max([t for t, _ in times_list]) - # total_simulated_time += round_time - - # # [Kush - Flow] --> [TODO] How to set these metrics in the workflow and save the checkpoint ?? - # # get the performace validation scores for the round - # round_dice = get_metric('valid_dice', round_num, aggregator.tensor_db) - # dice_label_0 = get_metric('valid_dice_per_label_0', round_num, aggregator.tensor_db) - # dice_label_1 = get_metric('valid_dice_per_label_1', round_num, aggregator.tensor_db) - # dice_label_2 = get_metric('valid_dice_per_label_2', round_num, aggregator.tensor_db) - # dice_label_4 = get_metric('valid_dice_per_label_4', round_num, aggregator.tensor_db) - # if include_validation_with_hausdorff: - # hausdorff95_label_0 = get_metric('valid_hd95_per_label_0', round_num, aggregator.tensor_db) - # hausdorff95_label_1 = get_metric('valid_hd95_per_label_1', round_num, aggregator.tensor_db) - # hausdorff95_label_2 = get_metric('valid_hd95_per_label_2', round_num, aggregator.tensor_db) - # hausdorff95_label_4 = get_metric('valid_hd95_per_label_4', round_num, aggregator.tensor_db) - - # # update best score - # if best_dice < round_dice: - # best_dice = round_dice - # # Set the weights for the final model - # if round_num == 0: - # # here the initial model was validated (temp model does not exist) - # logger.info(f'Skipping best model saving to disk as it is a random initialization.') - # elif not os.path.exists(f'checkpoint/{checkpoint_folder}/temp_model.pkl'): - # raise ValueError(f'Expected temporary model at: checkpoint/{checkpoint_folder}/temp_model.pkl to exist but it was not found.') - # else: - # # here the temp model was the one validated - # shutil.copyfile(src=f'checkpoint/{checkpoint_folder}/temp_model.pkl',dst=f'checkpoint/{checkpoint_folder}/best_model.pkl') - # logger.info(f'Saved model with best average binary DICE: {best_dice} to ~/.local/workspace/checkpoint/{checkpoint_folder}/best_model.pkl') - - # ## RUN VALIDATION ON INTERMEDIATE CONSENSUS MODEL - # # set the task_runner data loader - # # task_runner.data_loader = collaborator_data_loaders[col] - - # ## CONVERGENCE METRIC COMPUTATION - # # update the auc score - # best_dice_over_time_auc += best_dice * round_time - - # # project the auc score as remaining time * best dice - # # this projection assumes that the current best score is carried forward for the entire week - # projected_auc = (MAX_SIMULATION_TIME - total_simulated_time) * best_dice + best_dice_over_time_auc - # projected_auc /= MAX_SIMULATION_TIME - - # # End of round summary - # summary = '"**** END OF ROUND {} SUMMARY *****"'.format(round_num) - # summary += "\n\tSimulation Time: {} minutes".format(round(total_simulated_time / 60, 2)) - # summary += "\n\t(Projected) Convergence Score: {}".format(projected_auc) - # summary += "\n\tDICE Label 0: {}".format(dice_label_0) - # summary += "\n\tDICE Label 1: {}".format(dice_label_1) - # summary += "\n\tDICE Label 2: {}".format(dice_label_2) - # summary += "\n\tDICE Label 4: {}".format(dice_label_4) - # if include_validation_with_hausdorff: - # summary += "\n\tHausdorff95 Label 0: {}".format(hausdorff95_label_0) - # summary += "\n\tHausdorff95 Label 1: {}".format(hausdorff95_label_1) - # summary += "\n\tHausdorff95 Label 2: {}".format(hausdorff95_label_2) - # summary += "\n\tHausdorff95 Label 4: {}".format(hausdorff95_label_4) - - - # experiment_results['round'].append(round_num) - # experiment_results['time'].append(total_simulated_time) - # experiment_results['convergence_score'].append(projected_auc) - # experiment_results['round_dice'].append(round_dice) - # experiment_results['dice_label_0'].append(dice_label_0) - # experiment_results['dice_label_1'].append(dice_label_1) - # experiment_results['dice_label_2'].append(dice_label_2) - # experiment_results['dice_label_4'].append(dice_label_4) - # if include_validation_with_hausdorff: - # experiment_results['hausdorff95_label_0'].append(hausdorff95_label_0) - # experiment_results['hausdorff95_label_1'].append(hausdorff95_label_1) - # experiment_results['hausdorff95_label_2'].append(hausdorff95_label_2) - # experiment_results['hausdorff95_label_4'].append(hausdorff95_label_4) - # logger.info(summary) - - # if save_checkpoints: - # logger.info(f'Saving checkpoint for round {round_num}') - # logger.info(f'To resume from this checkpoint, set the restore_from_checkpoint_folder parameter to \'{checkpoint_folder}\'') - # save_checkpoint(checkpoint_folder, aggregator, - # collaborator_names, collaborators, - # round_num, collaborator_time_stats, - # total_simulated_time, best_dice, - # best_dice_over_time_auc, - # collaborators_chosen_each_round, - # collaborator_times_per_round, - # experiment_results, - # summary) - - # # if the total_simulated_time has exceeded the maximum time, we break - # # in practice, this means that the previous round's model is the last model scored, - # # so a long final round should not actually benefit the competitor, since that final - # # model is never globally validated - # if total_simulated_time > MAX_SIMULATION_TIME: - # logger.info("Simulation time exceeded. Ending Experiment") - # break - - # # save the most recent aggregated model in native format to be copied over as best when appropriate - # # (note this model has not been validated by the collaborators yet) - # task_runner.rebuild_model(round_num, aggregator.last_tensor_dict, validation=True) - # task_runner.save_native(f'checkpoint/{checkpoint_folder}/temp_model.pkl') - - - return pd.DataFrame.from_dict(experiment_results), checkpoint_folder - - -# High Level Things Required -# 1. Create Aggregator and see is private attributes setting required. -# 2. Create Collaborators and pass training/valid csv files as private attributes.(See what else can be passed as private attributes) - Done -# 3. How to set the hyperparameters for each round. -# 4. How to set the aggregation function. -# 5. Create FederatedFlow and what all steps are required. Define the functions for the steps. -# 6. Create Federated Model Class -# 7. Check how to update metrics after each round. -# 8. Check the requirement for setting times per collaborator. -# 9. How to store checkpoint and restore from checkpoint. -# 10. How to set the initial state of the model. -# \ No newline at end of file + #return pd.DataFrame.from_dict(experiment_results), checkpoint_folder + return None \ No newline at end of file diff --git a/Task_1/fets_challenge/fets_challenge_model.py b/Task_1/fets_challenge/fets_challenge_model.py index bd70765..df633e7 100644 --- a/Task_1/fets_challenge/fets_challenge_model.py +++ b/Task_1/fets_challenge/fets_challenge_model.py @@ -63,6 +63,7 @@ def __init__( gandlf_conf = ConfigManager(gandlf_config_path) + # TODO -> CHECK HOW TO CREATE A MODEL HERE ( model, optimizer, @@ -155,10 +156,10 @@ def validate(self, model, col_name, round_num, val_dataloader, params, scheduler origin = col_name suffix = 'validate' - # if kwargs['apply'] == 'local': - # suffix += '_local' - # else: - # suffix += '_agg' + if kwargs['apply'] == 'local': + suffix += '_local' + else: + suffix += '_agg' tags = ('metric', suffix) output_tensor_dict = {} @@ -171,13 +172,13 @@ def validate(self, model, col_name, round_num, val_dataloader, params, scheduler if np.array(v).size == 1: output_tensor_dict[TensorKey(f'valid_{k}', origin, round_num, True, tags)] = np.array(v) else: - for idx,label in enumerate([0,1]): + for idx,label in enumerate([0,1,2,4]): output_tensor_dict[TensorKey(f'valid_{k}_{label}', origin, round_num, True, tags)] = np.array(v[idx]) # Empty list represents metrics that should only be stored locally return output_tensor_dict, {} - def train(self, model, col_name, round_num, train_loader, params, optimizer, use_tqdm=False, epochs=1, **kwargs): + def train(self, model, col_name, round_num, train_loader, params, optimizer, hparams_dict, use_tqdm=False, epochs=1, **kwargs): """Train batches. Train the model on the requested number of batches. Args: @@ -194,9 +195,18 @@ def train(self, model, col_name, round_num, train_loader, params, optimizer, use local_tensor_dict (dict): Tensors to maintain in the local TensorDB. """ + # handle the hparams + #epochs_per_round = int(input_tensor_dict.pop('epochs_per_round')) + #learning_rate = float(input_tensor_dict.pop('learning_rate')) + #self.rebuild_model(round_num, input_tensor_dict) # set to "training" mode self.model.train() + + # Set the learning rate + #for group in optimizer.param_groups: + # group['lr'] = learning_rate + for epoch in range(epochs): print(f"Run %s epoch of %s round", epoch, round_num) # FIXME: do we want to capture these in an array @@ -219,7 +229,7 @@ def train(self, model, col_name, round_num, train_loader, params, optimizer, use if np.array(v).size == 1: metric_dict[f'train_{k}'] = np.array(v) else: - for idx,label in enumerate([0,1]): + for idx,label in enumerate([0,1,2,4]): metric_dict[f'train_{k}_{label}'] = np.array(v[idx]) # Return global_tensor_dict, local_tensor_dict @@ -483,6 +493,9 @@ def create_tensorkey_dicts( logger, tensor_dict, **tensor_dict_split_fn_kwargs ) + # global_model_dict : [{x: np1}, {x1: np2}] + # global_tensorkey_model_dict : [{tk1: np1}, {tk2: np2}] + # Create global tensorkeys global_tensorkey_model_dict = { TensorKey(tensor_name, origin, round_num, False, tags): nparray diff --git a/Task_1/fets_challenge/fets_flow.py b/Task_1/fets_challenge/fets_flow.py index 71f4489..6cdfb89 100644 --- a/Task_1/fets_challenge/fets_flow.py +++ b/Task_1/fets_challenge/fets_flow.py @@ -2,6 +2,8 @@ from copy import deepcopy from typing import Union +import logging +import pandas as pd import numpy as np import torch as pt import yaml @@ -14,27 +16,34 @@ from openfl.experimental.workflow.placement import aggregator, collaborator from openfl.databases import TensorDB from openfl.utilities import TaskResultKey, TensorKey, change_tags +from .checkpoint_utils import setup_checkpoint_folder, save_checkpoint, load_checkpoint from GANDLF.compute.generic import create_pytorch_objects from GANDLF.config_manager import ConfigManager -#from .fets_challenge_model import inference, fedavg - -def get_metric(metric_name, col_name, fl_round, output_tensor_dict): - print(f'Getting metric {metric_name} for collaborator {col_name} at round {fl_round}') - target_tags = ('metric', 'validate') - tensor_key = TensorKey(metric_name, col_name, fl_round, True, target_tags) - - # Check if the key exists in the dictionary - value = None - if tensor_key in output_tensor_dict: - # Retrieve the value associated with the TensorKey - value = output_tensor_dict[tensor_key] - print(value) - else: - print(f"TensorKey {tensor_key} not found in the dictionary") - - return value +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +def get_metric(metric_name, fl_round, agg_tensor_db): + target_tags = ('metric', 'validate_agg') + metric_tensor_key = TensorKey(metric_name, 'aggregator', fl_round, True, target_tags) + logger.info(f'Getting metric {metric_name} at round {fl_round} tensor key: {metric_tensor_key}') + nparray = agg_tensor_db.get_tensor_from_cache(metric_tensor_key) + logger.info(f'nparray for {metric_name} at round {fl_round}: {nparray.item()}') + return nparray.item() + +def cache_tensor_dict(tensor_dict, agg_tensor_db, idx, agg_out_dict): + for key, value in tensor_dict.items(): + new_tags = change_tags(key.tags, add_field=str(idx + 1)) + modified_key = TensorKey( + tensor_name=key.tensor_name, + origin="aggregator", + round_number=key.round_number, + report=key.report, + tags=new_tags + ) + agg_out_dict[modified_key] = value + agg_tensor_db.cache_tensor(agg_out_dict) class FeTSFederatedFlow(FLSpec): def __init__(self, fets_model, rounds=3, **kwargs): @@ -46,19 +55,127 @@ def __init__(self, fets_model, rounds=3, **kwargs): @aggregator def start(self): self.collaborators = self.runtime.collaborators - self.next(self.initialize_collaborators, foreach='collaborators') + logger.info(f'Collaborators: {self.collaborators}') + #self.agg_tensor_db = TensorDB() + #self.next(self.initialize_collaborators, foreach='collaborators', exclude='agg_tensor_db') + self.next(self.fetch_hyper_parameters) + + self.experiment_results = { + 'round':[], + 'time': [], + 'convergence_score': [], + 'round_dice': [], + 'dice_label_0': [], + 'dice_label_1': [], + 'dice_label_2': [], + 'dice_label_4': [], + } + if self.include_validation_with_hausdorff: + self.experiment_results.update({ + 'hausdorff95_label_0': [], + 'hausdorff95_label_1': [], + 'hausdorff95_label_2': [], + 'hausdorff95_label_4': [], + }) + + self.total_simulated_time = 0 + self.best_dice = -1.0 + self.best_dice_over_time_auc = 0 + + # if self.restore_from_checkpoint_folder is None: + # checkpoint_folder = setup_checkpoint_folder() + # logger.info(f'\nCreated experiment folder {checkpoint_folder}...') + # starting_round_num = 0 + # else: + # if not Path(f'checkpoint/{restore_from_checkpoint_folder}').exists(): + # logger.warning(f'Could not find provided checkpoint folder: {restore_from_checkpoint_folder}. Exiting...') + # exit(1) + # else: + # logger.info(f'Attempting to load last completed round from {restore_from_checkpoint_folder}') + # state = load_checkpoint(restore_from_checkpoint_folder) + # checkpoint_folder = restore_from_checkpoint_folder + + # [loaded_collaborator_names, starting_round_num, collaborator_time_stats, + # total_simulated_time, best_dice, self.best_dice_over_time_auc, + # collaborators_chosen_each_round, collaborator_times_per_round, + # experiment_results, summary, agg_tensor_db] = state + + # if loaded_collaborator_names != self.collaborator_names: + # logger.error(f'Collaborator names found in checkpoint ({loaded_collaborator_names}) ' + # f'do not match provided collaborators ({self.collaborator_names})') + # exit(1) + + # logger.info(f'Previous summary for round {starting_round_num}') + # logger.info(summary) + + # starting_round_num += 1 + # self.tensor_db.tensor_db = agg_tensor_db + # self.round_number = starting_round_num + + @aggregator + def fetch_hyper_parameters(self): + logger.info('Fetching hyperparameters') + tensrdb = TensorDB() + collaborators_chosen_each_round = {} + collaborator_times_per_round = {} + hparams = self.training_hyper_parameters_for_round(self.collaborators, + tensrdb._iterate(), + self.current_round, + collaborators_chosen_each_round, + collaborator_times_per_round) + + learning_rate, epochs_per_round = hparams + + if (epochs_per_round is None): + logger.warning('Hyper-parameter function warning: function returned None for "epochs_per_round". Setting "epochs_per_round" to 1') + epochs_per_round = 1 + + hparam_message = "\n\tlearning rate: {}".format(learning_rate) + + hparam_message += "\n\tepochs_per_round: {}".format(epochs_per_round) + + logger.info("Hyper-parameters for round {}:{}".format(self.current_round, hparam_message)) + + # cache each tensor in the aggregator tensor_db + self.hparam_dict = {} + tk = TensorKey(tensor_name='learning_rate', + origin=self.uuid, + round_number=self.current_round, + report=False, + tags=('hparam', 'model')) + self.hparam_dict[tk] = np.array(learning_rate) + tk = TensorKey(tensor_name='epochs_per_round', + origin=self.uuid, + round_number=self.current_round, + report=False, + tags=('hparam', 'model')) + self.hparam_dict[tk] = np.array(epochs_per_round) + + + + # times_per_collaborator = compute_times_per_collaborator(collaborator_names, + # training_collaborators, + # epochs_per_round, + # collaborator_data_loaders, + # collaborator_time_stats, + # round_num) + + + if self.current_round == 1: + self.next(self.initialize_collaborators, foreach='collaborators') + else: + self.next(self.aggregated_model_validation, foreach='collaborators') + @collaborator def initialize_collaborators(self): if isinstance(self.gandlf_config, str) and os.path.exists(self.gandlf_config): gandlf_conf = yaml.safe_load(open(self.gandlf_config, "r")) - print(gandlf_conf) + logger.info(gandlf_conf) #gandlf_config_path = "/home/ad_tbanda/code/fedAI/Challenge/Task_1/gandlf_config.yaml" - gandlf_config = Plan.load(Path(self.gandlf_config)) - print(gandlf_config) - print(gandlf_config['weighted_loss']) + #gandlf_config = Plan.load(Path(self.gandlf_config)) gandlf_conf = ConfigManager(self.gandlf_config) @@ -80,86 +197,103 @@ def initialize_collaborators(self): self.train_loader = train_loader self.val_loader = val_loader self.epochs = 1 - self.tensor_db = TensorDB() + self.coll_tensor_db = TensorDB() self.next(self.aggregated_model_validation) @collaborator def aggregated_model_validation(self): - print(f'Performing aggregated model validation for collaborator {self.input}') - print(f'Val Loader: {self.val_loader}') - self.agg_output_dict, _ = self.fets_model.validate(self.model, self.input, self.current_round, self.val_loader, self.params, self.scheduler) - print(f'{self.input} value of {self.agg_output_dict}') + logger.info(f'Performing aggregated model validation for collaborator {self.input}') + self.agg_output_dict, _ = self.fets_model.validate(self.model, self.input, self.current_round, self.val_loader, self.params, self.scheduler, apply="global") + logger.info(f'{self.input} value of {self.agg_output_dict.keys()}') self.next(self.train) @collaborator def train(self): - print(f'Performing training for collaborator {self.input}') - global_output_tensor_dict, local_output_tensor_dict = self.fets_model.train(self.model, self.input, self.current_round, self.train_loader, self.params, self.optimizer, self.epochs) - self.tensor_db.cache_tensor(global_output_tensor_dict) - self.tensor_db.cache_tensor(local_output_tensor_dict) + logger.info(f'Performing training for collaborator {self.input}') + self.global_output_tensor_dict, local_output_tensor_dict = self.fets_model.train(self.model, self.input, self.current_round, self.train_loader, self.params, self.optimizer, self.hparam_dict, self.epochs) + #logger.info(f'{self.input} value of {self.global_output_tensor_dict.keys()}') self.next(self.local_model_validation) @collaborator def local_model_validation(self): - self.local_output_dict, _ = self.fets_model.validate(self.model, self.input, self.current_round, self.val_loader, self.params, self.scheduler) - print(f'Doing local model validation for collaborator {self.input}:' - + f' {self.local_output_dict}') + self.local_output_dict, _ = self.fets_model.validate(self.model, self.input, self.current_round, self.val_loader, self.params, self.scheduler, apply="local") + logger.info(f'Doing local model validation for collaborator {self.input}:' + f' {self.local_output_dict}') self.next(self.join) @aggregator def join(self, inputs): - - total_loss = 0.0 - total_dice = 0.0 - num_inputs = len(inputs) - + agg_tensor_db = TensorDB() + tensor_keys_per_col = {} for idx, col in enumerate(inputs): - print(f'Aggregating results for {idx}') - round_loss = get_metric('valid_loss', str(idx + 1), self.current_round, col.agg_output_dict) - round_dice = get_metric('valid_dice', str(idx + 1), self.current_round, col.agg_output_dict) - dice_label_0 = get_metric('valid_dice_per_label_0', str(idx + 1), self.current_round, col.agg_output_dict) - dice_label_1 = get_metric('valid_dice_per_label_1', str(idx + 1), self.current_round, col.agg_output_dict) - - print(f'Round loss: {round_loss}') - print(f'Round dice: {round_dice}') - print(f'Dice label 0: {dice_label_0}') - print(f'Dice label 1: {dice_label_1}') - - total_loss += round_loss - total_dice += round_dice - # dice_label_0 = get_metric('valid_dice_per_label_0', self.current_round, aggregator.tensor_db) - # dice_label_1 = get_metric('valid_dice_per_label_1', self.current_round, aggregator.tensor_db) - # dice_label_2 = get_metric('valid_dice_per_label_2', self.current_round, aggregator.tensor_db) - # dice_label_4 = get_metric('valid_dice_per_label_4', self.current_round, aggregator.tensor_db) - #self.model = fedavg([input.model for input in inputs]) - - average_round_loss = total_loss / num_inputs - average_round_dice = total_dice / num_inputs - - print(f'Average round loss: {average_round_loss}') - print(f'Average round dice: {average_round_dice}') - - # times_per_collaborator = compute_times_per_collaborator(collaborator_names, - # training_collaborators, - # epochs_per_round, - # collaborator_data_loaders, - # collaborator_time_stats, - # round_num) - # collaborator_times_per_round[round_num] = times_per_collaborator - - total_simulated_time = 0 - best_dice = -1.0 - best_dice_over_time_auc = 0 + logger.info(f'Aggregating results for {idx}') + agg_out_dict = {} + cache_tensor_dict(col.local_output_dict, agg_tensor_db, idx, agg_out_dict) + cache_tensor_dict(col.agg_output_dict, agg_tensor_db, idx, agg_out_dict) + #cache_tensor_dict(col.global_output_tensor_dict, agg_tensor_db, idx, agg_out_dict) + + # Store the keys for each collaborator + tensor_keys = [] + for tensor_key in agg_out_dict.keys(): + logger.info(f'Adding tensor key {tensor_key} to the dict of tensor keys') + tensor_keys.append(tensor_key) + tensor_keys_per_col[str(idx + 1)] = tensor_keys + + # [TODO] : Aggregation Function -> Collaborator Weight Dict + collaborator_weight_dict = {"1":0.33, "2":0.33, "3":0.34} + aggrgegated_tensor_dict = {} + for col,tensor_keys in tensor_keys_per_col.items(): + for tensor_key in tensor_keys: + tensor_name, origin, round_number, report, tags = tensor_key + logger.info(f'Aggregating tensor {tensor_name} from collaborator {origin} for round {round_number}') + new_tags = change_tags(tags, remove_field=col) + agg_tensor_key = TensorKey(tensor_name, origin, round_number, report, new_tags) + # returns the list of 2 elements if already processed otherwise 1 + agg_results = agg_tensor_db.get_aggregated_tensor( + agg_tensor_key, + collaborator_weight_dict, + aggregation_function=self.aggregation_type, + ) + logger.info(f'Aggregated tensor value for tensor key {agg_tensor_key}: {agg_results}') + + # if agg_results.size == 1: + # value = agg_results[0] + # if report: + # value = float(agg_results) + # new_aggregated_tags = change_tags(new_tags, add_field='aggregated') + # new_tensor_key = TensorKey(tensor_name, origin, round_number, report, new_aggregated_tags) + # logger.info(f'Stroing aggregated tensor key {new_tensor_key} with value {value}') + # aggrgegated_tensor_dict[new_tensor_key] = value + # else: + # logger.info(f'Aggregated tensor key {agg_tensor_key} already exists in the tensor database') + + # for input in inputs: + # # Add some logic to get the aggregated tensors from tensor dict -> aggrgegated_tensor_dict + # self.fets_model.rebuild_model(input.model, input, self.current_round, self.train_loader, self.params, self.optimizer, self.hparam_dict, self.epochs) + + # Cache the aggregated tensor dictionary in the tensor database + # agg_tensor_db.cache_tensor(aggrgegated_tensor_dict) + + round_loss = get_metric('valid_loss', self.current_round, agg_tensor_db) + round_dice = get_metric('valid_dice', self.current_round, agg_tensor_db) + dice_label_0 = get_metric('valid_dice_per_label_0', self.current_round, agg_tensor_db) + dice_label_1 = get_metric('valid_dice_per_label_1', self.current_round, agg_tensor_db) + dice_label_2 = get_metric('valid_dice_per_label_2', self.current_round, agg_tensor_db) + dice_label_4 = get_metric('valid_dice_per_label_4', self.current_round, agg_tensor_db) + if self.include_validation_with_hausdorff: + hausdorff95_label_0 = get_metric('valid_hd95_per_label_0', self.current_round, agg_tensor_db) + hausdorff95_label_1 = get_metric('valid_hd95_per_label_1', self.current_round, agg_tensor_db) + hausdorff95_label_2 = get_metric('valid_hd95_per_label_2', self.current_round, agg_tensor_db) + hausdorff95_label_4 = get_metric('valid_hd95_per_label_4', self.current_round, agg_tensor_db) # times_list = [(t, col) for col, t in times_per_collaborator.items()] # times_list = sorted(times_list) # the round time is the max of the times_list # round_time = max([t for t, _ in times_list]) - # total_simulated_time += round_time + # self.total_simulated_time += round_time - if best_dice < average_round_dice: - best_dice = average_round_dice + if self.best_dice < round_dice: + self.best_dice = round_dice # Set the weights for the final model # if round_num == 0: # # here the initial model was validated (temp model does not exist) @@ -169,43 +303,88 @@ def join(self, inputs): # else: # # here the temp model was the one validated # shutil.copyfile(src=f'checkpoint/{checkpoint_folder}/temp_model.pkl',dst=f'checkpoint/{checkpoint_folder}/best_model.pkl') - # logger.info(f'Saved model with best average binary DICE: {best_dice} to ~/.local/workspace/checkpoint/{checkpoint_folder}/best_model.pkl') + # logger.info(f'Saved model with best average binary DICE: {self.best_dice} to ~/.local/workspace/checkpoint/{checkpoint_folder}/best_model.pkl') ## CONVERGENCE METRIC COMPUTATION # update the auc score - # best_dice_over_time_auc += best_dice * round_time + # self.best_dice_over_time_auc += self.best_dice * round_time # project the auc score as remaining time * best dice # this projection assumes that the current best score is carried forward for the entire week - # projected_auc = (MAX_SIMULATION_TIME - total_simulated_time) * best_dice + best_dice_over_time_auc - # projected_auc /= MAX_SIMULATION_TIME + projected_auc = (self.max_simulation_time - self.total_simulated_time) * self.best_dice + self.best_dice_over_time_auc + projected_auc /= self.max_simulation_time # # End of round summary - # summary = '"**** END OF ROUND {} SUMMARY *****"'.format(self.current_round) - # summary += "\n\tSimulation Time: {} minutes".format(round(total_simulated_time / 60, 2)) - # summary += "\n\t(Projected) Convergence Score: {}".format(projected_auc) - # summary += "\n\tDICE Label 0: {}".format(dice_label_0) - # summary += "\n\tDICE Label 1: {}".format(dice_label_1) - # summary += "\n\tDICE Label 2: {}".format(dice_label_2) - # summary += "\n\tDICE Label 4: {}".format(dice_label_4) - # if include_validation_with_hausdorff: - # summary += "\n\tHausdorff95 Label 0: {}".format(hausdorff95_label_0) - # summary += "\n\tHausdorff95 Label 1: {}".format(hausdorff95_label_1) - # summary += "\n\tHausdorff95 Label 2: {}".format(hausdorff95_label_2) - # summary += "\n\tHausdorff95 Label 4: {}".format(hausdorff95_label_4) - - # [TODO] : Aggregation Function + summary = '"**** END OF ROUND {} SUMMARY *****"'.format(self.current_round) + summary += "\n\tSimulation Time: {} minutes".format(round(self.total_simulated_time / 60, 2)) + summary += "\n\t(Projected) Convergence Score: {}".format(projected_auc) + summary += "\n\tRound Loss: {}".format(round_loss) + summary += "\n\Round Dice: {}".format(round_dice) + summary += "\n\tDICE Label 0: {}".format(dice_label_0) + summary += "\n\tDICE Label 1: {}".format(dice_label_1) + summary += "\n\tDICE Label 2: {}".format(dice_label_2) + summary += "\n\tDICE Label 4: {}".format(dice_label_4) + if self.include_validation_with_hausdorff: + summary += "\n\tHausdorff95 Label 0: {}".format(hausdorff95_label_0) + summary += "\n\tHausdorff95 Label 1: {}".format(hausdorff95_label_1) + summary += "\n\tHausdorff95 Label 2: {}".format(hausdorff95_label_2) + summary += "\n\tHausdorff95 Label 4: {}".format(hausdorff95_label_4) + logger.info(summary) + + self.experiment_results['round'].append(self.current_round) + self.experiment_results['time'].append(self.total_simulated_time) + self.experiment_results['convergence_score'].append(projected_auc) + self.experiment_results['round_dice'].append(round_dice) + self.experiment_results['dice_label_0'].append(dice_label_0) + self.experiment_results['dice_label_1'].append(dice_label_1) + self.experiment_results['dice_label_2'].append(dice_label_2) + self.experiment_results['dice_label_4'].append(dice_label_4) + if self.include_validation_with_hausdorff: + self.experiment_results['hausdorff95_label_0'].append(hausdorff95_label_0) + self.experiment_results['hausdorff95_label_1'].append(hausdorff95_label_1) + self.experiment_results['hausdorff95_label_2'].append(hausdorff95_label_2) + self.experiment_results['hausdorff95_label_4'].append(hausdorff95_label_4) + logger.info(summary) + + # if save_checkpoints: + # logger.info(f'Saving checkpoint for round {round_num}') + # logger.info(f'To resume from this checkpoint, set the restore_from_checkpoint_folder parameter to \'{checkpoint_folder}\'') + # save_checkpoint(checkpoint_folder, aggregator, + # collaborator_names, collaborators, + # round_num, collaborator_time_stats, + # self.total_simulated_time, self.best_dice, + # self.best_dice_over_time_auc, + # collaborators_chosen_each_round, + # collaborator_times_per_round, + # experiment_results, + # summary) + + # if the total_simulated_time has exceeded the maximum time, we break + # in practice, this means that the previous round's model is the last model scored, + # so a long final round should not actually benefit the competitor, since that final + # model is never globally validated + if self.total_simulated_time > self.max_simulation_time: + logger.info("Simulation time exceeded. Ending Experiment") + self.next(self.end) + + # save the most recent aggregated model in native format to be copied over as best when appropriate + # (note this model has not been validated by the collaborators yet) + # self.fets_model.rebuild_model(round_num, aggregator.last_tensor_dict, validation=True) + # self.fets_model.save_native(f'checkpoint/{checkpoint_folder}/temp_model.pkl') self.next(self.internal_loop) @aggregator def internal_loop(self): if self.current_round == self.n_rounds: + logger.info('************* EXPERIMENT COMPLETED *************') + logger.info('Experiment results:') + logger.info(pd.DataFrame.from_dict(self.experiment_results)) self.next(self.end) else: self.current_round += 1 - self.next(self.aggregated_model_validation, foreach='collaborators') + self.next(self.fetch_hyper_parameters) @aggregator def end(self): - print('This is the end of the flow') \ No newline at end of file + logger.info('This is the end of the flow') \ No newline at end of file diff --git a/Task_1/fets_challenge/gandlf_config.yaml b/Task_1/fets_challenge/gandlf_config.yaml index 867b870..b9fbd1e 100644 --- a/Task_1/fets_challenge/gandlf_config.yaml +++ b/Task_1/fets_challenge/gandlf_config.yaml @@ -6,7 +6,7 @@ data_postprocessing: {} data_preprocessing: normalize: null enable_padding: false -in_memory: true +in_memory: false inference_mechanism : grid_aggregator_overlap: crop patch_overlap: 0 @@ -17,48 +17,50 @@ output_dir: '.' metrics: - dice - dice_per_label +- hd95_per_label model: amp: true - architecture: unet + architecture: resunet base_filters: 32 - batch_norm: false class_list: - 0 - 1 + - 2 + - 4 dimension: 3 - final_layer: sigmoid + final_layer: softmax ignore_label_validation: null norm_type: instance num_channels: 4 nested_training: - testing: -5 + testing: 1 validation: -5 num_epochs: 1 -optimizer: - type: adam +optimizer: + type: sgd parallel_compute_command: '' -patch_sampler: uniform +patch_sampler: label patch_size: -- 32 -- 32 -- 32 -patience: 1 +- 64 +- 64 +- 64 +patience: 100 pin_memory_dataloader: false print_rgb_label_warning: true -q_max_length: 1 +q_max_length: 100 q_num_workers: 0 -q_samples_per_volume: 1 +q_samples_per_volume: 40 q_verbose: false save_output: false save_training: false scaling_factor: 1 scheduler: - type: triangle + type: triangle_modified track_memory_usage: false verbose: false version: maximum: 0.1.0 - minimum: 0.0.13 + minimum: 0.0.14 weighted_loss: true modality: rad problem_type: classification \ No newline at end of file From 0cea1036911d124ab7a8f2005679a4592b481e58 Mon Sep 17 00:00:00 2001 From: "Agrawal, Kush" Date: Fri, 7 Mar 2025 00:51:42 -0800 Subject: [PATCH 05/16] Updated rebuilding model logic Signed-off-by: Agrawal, Kush --- Task_1/fets_challenge/fets_challenge_model.py | 42 ++++---- Task_1/fets_challenge/fets_flow.py | 97 +++++++++++-------- 2 files changed, 78 insertions(+), 61 deletions(-) diff --git a/Task_1/fets_challenge/fets_challenge_model.py b/Task_1/fets_challenge/fets_challenge_model.py index df633e7..2071c9c 100644 --- a/Task_1/fets_challenge/fets_challenge_model.py +++ b/Task_1/fets_challenge/fets_challenge_model.py @@ -94,7 +94,7 @@ def __init__( self.tensor_dict_split_fn_kwargs = {} self.tensor_dict_split_fn_kwargs.update({"holdout_tensor_names": ["__opt_state_needed"]}) - def rebuild_model(self, round_num, input_tensor_dict, validation=False): + def rebuild_model(self, model, round_num, input_tensor_dict, device, validation=False): """Parse tensor names and update weights of model. Handles the optimizer treatment. @@ -108,18 +108,22 @@ def rebuild_model(self, round_num, input_tensor_dict, validation=False): Returns: None """ + self.device = device # [TODO] - FIX ME + self.model = model - if self.opt_treatment == "RESET": - self.reset_opt_vars() - self.set_tensor_dict(input_tensor_dict, with_opt_vars=False) - elif ( - self.training_round_completed - and self.opt_treatment == "CONTINUE_GLOBAL" - and not validation - ): - self.set_tensor_dict(input_tensor_dict, with_opt_vars=True) - else: - self.set_tensor_dict(input_tensor_dict, with_opt_vars=False) + self.set_tensor_dict(input_tensor_dict, with_opt_vars=False) + + # if self.opt_treatment == "RESET": + # self.reset_opt_vars() + # self.set_tensor_dict(input_tensor_dict, with_opt_vars=False) + # elif ( + # self.training_round_completed + # and self.opt_treatment == "CONTINUE_GLOBAL" + # and not validation + # ): + # self.set_tensor_dict(input_tensor_dict, with_opt_vars=True) + # else: + # self.set_tensor_dict(input_tensor_dict, with_opt_vars=False) def validate(self, model, col_name, round_num, val_dataloader, params, scheduler, use_tqdm=False, **kwargs): """Validate. @@ -137,7 +141,7 @@ def validate(self, model, col_name, round_num, val_dataloader, params, scheduler {} (dict): Tensors to maintain in the local TensorDB. """ #self.rebuild_model(round_num, input_tensor_dict, validation=True) - model.eval() + #model.eval() epoch_valid_loss, epoch_valid_metric = validate_network( model, @@ -201,7 +205,7 @@ def train(self, model, col_name, round_num, train_loader, params, optimizer, hpa #self.rebuild_model(round_num, input_tensor_dict) # set to "training" mode - self.model.train() + model.train() # Set the learning rate #for group in optimizer.param_groups: @@ -219,7 +223,7 @@ def train(self, model, col_name, round_num, train_loader, params, optimizer, hpa ) # output model tensors (Doesn't include TensorKey) - tensor_dict = self.get_tensor_dict(with_opt_vars=True) + tensor_dict = self.get_tensor_dict(model, with_opt_vars=True) metric_dict = {'loss': epoch_train_loss} for k, v in epoch_train_metric.items(): @@ -263,7 +267,7 @@ def train(self, model, col_name, round_num, train_loader, params, optimizer, hpa # Return global_tensor_dict, local_tensor_dict return global_tensor_dict, local_tensor_dict - def get_tensor_dict(self, with_opt_vars=False): + def get_tensor_dict(self, model, with_opt_vars=False): """Return the tensor dictionary. Args: @@ -279,7 +283,7 @@ def get_tensor_dict(self, with_opt_vars=False): # for now, state dict gives us names which is good # FIXME: do both and sanity check each time? - state = to_cpu_numpy(self.model.state_dict()) + state = to_cpu_numpy(model.state_dict()) if with_opt_vars: opt_state = _get_optimizer_state(self.optimizer) @@ -363,7 +367,7 @@ def initialize_tensorkeys_for_functions(self, with_opt_vars=False): # all of the methods in the class and declare the tensors. # For now this is done manually - output_model_dict = self.get_tensor_dict(with_opt_vars=with_opt_vars) + output_model_dict = self.get_tensor_dict(self.model, with_opt_vars=with_opt_vars) global_model_dict, local_model_dict = split_tensor_dict_for_holdouts( self.logger, output_model_dict, **self.tensor_dict_split_fn_kwargs ) @@ -371,7 +375,7 @@ def initialize_tensorkeys_for_functions(self, with_opt_vars=False): global_model_dict_val = global_model_dict local_model_dict_val = local_model_dict else: - output_model_dict = self.get_tensor_dict(with_opt_vars=False) + output_model_dict = self.get_tensor_dict(self.model, with_opt_vars=False) global_model_dict_val, local_model_dict_val = split_tensor_dict_for_holdouts( self.logger, output_model_dict, diff --git a/Task_1/fets_challenge/fets_flow.py b/Task_1/fets_challenge/fets_flow.py index 6cdfb89..b6cefb9 100644 --- a/Task_1/fets_challenge/fets_flow.py +++ b/Task_1/fets_challenge/fets_flow.py @@ -29,7 +29,7 @@ def get_metric(metric_name, fl_round, agg_tensor_db): metric_tensor_key = TensorKey(metric_name, 'aggregator', fl_round, True, target_tags) logger.info(f'Getting metric {metric_name} at round {fl_round} tensor key: {metric_tensor_key}') nparray = agg_tensor_db.get_tensor_from_cache(metric_tensor_key) - logger.info(f'nparray for {metric_name} at round {fl_round}: {nparray.item()}') + #logger.info(f'nparray for {metric_name} at round {fl_round}: {nparray.item()}') return nparray.item() def cache_tensor_dict(tensor_dict, agg_tensor_db, idx, agg_out_dict): @@ -114,6 +114,9 @@ def start(self): @aggregator def fetch_hyper_parameters(self): + print("*" * 40) + print("Starting round {}".format(self.current_round)) + print("*" * 40) logger.info('Fetching hyperparameters') tensrdb = TensorDB() collaborators_chosen_each_round = {} @@ -162,21 +165,20 @@ def fetch_hyper_parameters(self): if self.current_round == 1: - self.next(self.initialize_collaborators, foreach='collaborators') + logger.info('[Next Step] : Initializing collaborators') + self.next(self.initialize_colls, foreach='collaborators') else: + logger.info('[Next Step] : Aggregated model validation') self.next(self.aggregated_model_validation, foreach='collaborators') @collaborator - def initialize_collaborators(self): + def initialize_colls(self): + logger.info(f'Initializing collaborator {self.input}') if isinstance(self.gandlf_config, str) and os.path.exists(self.gandlf_config): gandlf_conf = yaml.safe_load(open(self.gandlf_config, "r")) logger.info(gandlf_conf) - - #gandlf_config_path = "/home/ad_tbanda/code/fedAI/Challenge/Task_1/gandlf_config.yaml" - #gandlf_config = Plan.load(Path(self.gandlf_config)) - gandlf_conf = ConfigManager(self.gandlf_config) ( @@ -197,14 +199,24 @@ def initialize_collaborators(self): self.train_loader = train_loader self.val_loader = val_loader self.epochs = 1 - self.coll_tensor_db = TensorDB() self.next(self.aggregated_model_validation) + # @collaborator + # def init_tensors(self): + # logger.info(f'Initializing tensors for collaborator {self.input}') + # coll_tensor_dict = self.fets_model.get_tensor_dict(self.model) + # # for key, value in coll_tensor_dict.items(): + # # print(f'Adding tensor {key}') + # # print(f'Value of tensor {key} is {value}') + + # self.fets_model.rebuild_model(self.model, self.current_round, coll_tensor_dict, "cpu") + # self.next(self.aggregated_model_validation) + @collaborator def aggregated_model_validation(self): logger.info(f'Performing aggregated model validation for collaborator {self.input}') - self.agg_output_dict, _ = self.fets_model.validate(self.model, self.input, self.current_round, self.val_loader, self.params, self.scheduler, apply="global") - logger.info(f'{self.input} value of {self.agg_output_dict.keys()}') + self.agg_valid_dict, _ = self.fets_model.validate(self.model, self.input, self.current_round, self.val_loader, self.params, self.scheduler, apply="global") + #logger.info(f'{self.input} value of {self.agg_valid_dict.keys()}') self.next(self.train) @collaborator @@ -216,8 +228,9 @@ def train(self): @collaborator def local_model_validation(self): - self.local_output_dict, _ = self.fets_model.validate(self.model, self.input, self.current_round, self.val_loader, self.params, self.scheduler, apply="local") - logger.info(f'Doing local model validation for collaborator {self.input}:' + f' {self.local_output_dict}') + logger.info(f'Performing local model validation for collaborator {self.input}') + self.local_valid_dict, _ = self.fets_model.validate(self.model, self.input, self.current_round, self.val_loader, self.params, self.scheduler, apply="local") + #logger.info(f'Doing local model validation for collaborator {self.input}:' + f' {self.local_output_dict}') self.next(self.join) @aggregator @@ -227,51 +240,52 @@ def join(self, inputs): for idx, col in enumerate(inputs): logger.info(f'Aggregating results for {idx}') agg_out_dict = {} - cache_tensor_dict(col.local_output_dict, agg_tensor_db, idx, agg_out_dict) - cache_tensor_dict(col.agg_output_dict, agg_tensor_db, idx, agg_out_dict) - #cache_tensor_dict(col.global_output_tensor_dict, agg_tensor_db, idx, agg_out_dict) + cache_tensor_dict(col.local_valid_dict, agg_tensor_db, idx, agg_out_dict) + cache_tensor_dict(col.agg_valid_dict, agg_tensor_db, idx, agg_out_dict) + cache_tensor_dict(col.global_output_tensor_dict, agg_tensor_db, idx, agg_out_dict) # Store the keys for each collaborator tensor_keys = [] for tensor_key in agg_out_dict.keys(): - logger.info(f'Adding tensor key {tensor_key} to the dict of tensor keys') + #logger.info(f'Adding tensor key {tensor_key} to the dict of tensor keys') tensor_keys.append(tensor_key) tensor_keys_per_col[str(idx + 1)] = tensor_keys # [TODO] : Aggregation Function -> Collaborator Weight Dict collaborator_weight_dict = {"1":0.33, "2":0.33, "3":0.34} - aggrgegated_tensor_dict = {} for col,tensor_keys in tensor_keys_per_col.items(): for tensor_key in tensor_keys: tensor_name, origin, round_number, report, tags = tensor_key - logger.info(f'Aggregating tensor {tensor_name} from collaborator {origin} for round {round_number}') + #logger.info(f'Aggregating tensor {tensor_name} from collaborator {origin} for round {round_number}') new_tags = change_tags(tags, remove_field=col) agg_tensor_key = TensorKey(tensor_name, origin, round_number, report, new_tags) - # returns the list of 2 elements if already processed otherwise 1 + # Aggregates the tensor values for the tensor key and stores it in tensor_db agg_results = agg_tensor_db.get_aggregated_tensor( agg_tensor_key, collaborator_weight_dict, aggregation_function=self.aggregation_type, ) - logger.info(f'Aggregated tensor value for tensor key {agg_tensor_key}: {agg_results}') - - # if agg_results.size == 1: - # value = agg_results[0] - # if report: - # value = float(agg_results) - # new_aggregated_tags = change_tags(new_tags, add_field='aggregated') - # new_tensor_key = TensorKey(tensor_name, origin, round_number, report, new_aggregated_tags) - # logger.info(f'Stroing aggregated tensor key {new_tensor_key} with value {value}') - # aggrgegated_tensor_dict[new_tensor_key] = value - # else: - # logger.info(f'Aggregated tensor key {agg_tensor_key} already exists in the tensor database') - - # for input in inputs: - # # Add some logic to get the aggregated tensors from tensor dict -> aggrgegated_tensor_dict - # self.fets_model.rebuild_model(input.model, input, self.current_round, self.train_loader, self.params, self.optimizer, self.hparam_dict, self.epochs) - - # Cache the aggregated tensor dictionary in the tensor database - # agg_tensor_db.cache_tensor(aggrgegated_tensor_dict) + #logger.info(f'Aggregated tensor value for tensor key {agg_tensor_key}') + + agg_tensor_dict = {} + for col,tensor_keys in tensor_keys_per_col.items(): + for tensor_key in tensor_keys: + tensor_name, origin, round_number, report, tags = tensor_key + #logger.info(f'Training tensor_key {tensor_key}') + if 'trained' in tags: + #logger.info(f'Fetching tensor {tensor_name} from tensor_db for round {round_number}') + new_tags = change_tags(tags, remove_field=col) + new_tensor_key = TensorKey(tensor_name, origin, round_number, report, new_tags) + if tensor_name not in agg_tensor_dict: + agg_tensor_dict[tensor_name] = agg_tensor_db.get_tensor_from_cache(new_tensor_key) + #logger.info(f'Fetched tensor {tensor_name} from tensor_db for round {round_number}') + + # Rebuild the model with the aggregated tensor_dict + for input in inputs: + logger.info(f'Updating model for collaborator {input}') + local_tensor_dict = deepcopy(agg_tensor_dict) + self.fets_model.rebuild_model(input.model, self.current_round, local_tensor_dict, "cpu") + local_tensor_dict = None round_loss = get_metric('valid_loss', self.current_round, agg_tensor_db) round_dice = get_metric('valid_dice', self.current_round, agg_tensor_db) @@ -344,7 +358,6 @@ def join(self, inputs): self.experiment_results['hausdorff95_label_1'].append(hausdorff95_label_1) self.experiment_results['hausdorff95_label_2'].append(hausdorff95_label_2) self.experiment_results['hausdorff95_label_4'].append(hausdorff95_label_4) - logger.info(summary) # if save_checkpoints: # logger.info(f'Saving checkpoint for round {round_num}') @@ -377,9 +390,9 @@ def join(self, inputs): @aggregator def internal_loop(self): if self.current_round == self.n_rounds: - logger.info('************* EXPERIMENT COMPLETED *************') - logger.info('Experiment results:') - logger.info(pd.DataFrame.from_dict(self.experiment_results)) + print('************* EXPERIMENT COMPLETED *************') + print('Experiment results:') + print(pd.DataFrame.from_dict(self.experiment_results)) self.next(self.end) else: self.current_round += 1 From ddb11fd5567ec6c867309b19f803b79dbda93d42 Mon Sep 17 00:00:00 2001 From: Tarunkumar Banda Date: Fri, 7 Mar 2025 03:14:29 -0800 Subject: [PATCH 06/16] Added checkpointing --- Task_1/FeTS_Challenge.py | 2 +- Task_1/fets_challenge/checkpoint_utils.py | 4 +- Task_1/fets_challenge/experiment.py | 57 +++------- Task_1/fets_challenge/fets_flow.py | 125 +++++++++++----------- 4 files changed, 84 insertions(+), 104 deletions(-) diff --git a/Task_1/FeTS_Challenge.py b/Task_1/FeTS_Challenge.py index a098683..d1f52b9 100644 --- a/Task_1/FeTS_Challenge.py +++ b/Task_1/FeTS_Challenge.py @@ -526,7 +526,7 @@ def FedAvgM_Selection(local_tensors, institution_split_csv_filename = 'small_split.csv' # change this to point to the parent directory of the data -brats_training_data_parent_dir = '/home/ad_kagrawa2/Data/MICCAI_FeTS2022_TrainingData' +brats_training_data_parent_dir = '/home/ad_kagrawa2/Data/fedAI/MICCAI_FeTS2022_TrainingData' # increase this if you need a longer history for your algorithms # decrease this if you need to reduce system RAM consumption diff --git a/Task_1/fets_challenge/checkpoint_utils.py b/Task_1/fets_challenge/checkpoint_utils.py index 30d5706..9ee8c00 100644 --- a/Task_1/fets_challenge/checkpoint_utils.py +++ b/Task_1/fets_challenge/checkpoint_utils.py @@ -21,7 +21,7 @@ def setup_checkpoint_folder(): Path(checkpoint_folder).mkdir(parents=True, exist_ok=False) return experiment_folder -def save_checkpoint(checkpoint_folder, aggregator, +def save_checkpoint(checkpoint_folder, agg_tensor_db, collaborator_names, collaborators, round_num, collaborator_time_stats, total_simulated_time, best_dice, @@ -34,7 +34,7 @@ def save_checkpoint(checkpoint_folder, aggregator, Save latest checkpoint """ # Save aggregator tensor_db - aggregator.tensor_db.tensor_db.to_pickle(f'checkpoint/{checkpoint_folder}/aggregator_tensor_db.pkl') + agg_tensor_db.tensor_db.to_pickle(f'checkpoint/{checkpoint_folder}/aggregator_tensor_db.pkl') with open(f'checkpoint/{checkpoint_folder}/state.pkl', 'wb') as f: pickle.dump([collaborator_names, round_num, collaborator_time_stats, total_simulated_time, best_dice, best_dice_over_time_auc, collaborators_chosen_each_round, diff --git a/Task_1/fets_challenge/experiment.py b/Task_1/fets_challenge/experiment.py index bab475e..55824cd 100644 --- a/Task_1/fets_challenge/experiment.py +++ b/Task_1/fets_challenge/experiment.py @@ -229,12 +229,13 @@ def get_metric(metric, fl_round, tensor_db): return float(tensor_db.tensor_db.query("tensor_name == @metric_name and round == @fl_round and tags == @target_tags").nparray) def aggregator_private_attributes( - uuid, aggregation_type, round_number, collaborator_names, include_validation_with_hausdorff, choose_training_collaborators, training_hyper_parameters_for_round): - print(f'Tarun inside aggregator_private_attributes ->>>>>> Aggregation Type: {aggregation_type}') - print(f'Tarun inside aggregator_private_attributes ->>>>>> Round Number: {round_number}') - print(f'Tarun inside aggregator_private_attributes ->>>>>> Collaborator Names: {collaborator_names}') - print(f'Tarun inside aggregator_private_attributes ->>>>>> Choose Training Collaborators: {choose_training_collaborators}') - print(f'Tarun inside aggregator_private_attributes ->>>>>> Training Hyper Parameters for Round: {training_hyper_parameters_for_round}') + uuid, aggregation_type, round_number, collaborator_names, include_validation_with_hausdorff, choose_training_collaborators, training_hyper_parameters_for_round, restore_from_checkpoint_folder, save_checkpoints, collaborator_time_stats): + print(f'aggregator_private_attributes ->>>>>> Aggregation Type: {aggregation_type}') + print(f'aggregator_private_attributes ->>>>>> Round Number: {round_number}') + print(f'aggregator_private_attributes ->>>>>> Collaborator Names: {collaborator_names}') + print(f'aggregator_private_attributes ->>>>>> Choose Training Collaborators: {choose_training_collaborators}') + print(f'aggregator_private_attributes ->>>>>> Training Hyper Parameters for Round: {training_hyper_parameters_for_round}') + print(f'aggregator_private_attributes ->>>>>> restore_from_checkpoint_folder: {restore_from_checkpoint_folder}') return {"uuid": uuid, "aggregation_type" : aggregation_type, "round_number": round_number, @@ -242,7 +243,10 @@ def aggregator_private_attributes( "include_validation_with_hausdorff": include_validation_with_hausdorff, "choose_training_collaborators": choose_training_collaborators, "training_hyper_parameters_for_round": training_hyper_parameters_for_round, - "max_simulation_time": MAX_SIMULATION_TIME + "max_simulation_time": MAX_SIMULATION_TIME, + "restore_from_checkpoint_folder": restore_from_checkpoint_folder, + "save_checkpoints":save_checkpoints, + "collaborator_time_stats": collaborator_time_stats } @@ -299,7 +303,8 @@ def run_challenge_experiment(aggregation_function, print(f'TESTING ->>>>>> Collaborator names: {collaborator_names}') aggregation_wrapper = CustomAggregationWrapper(aggregation_function) # ---> [TODO] Set the aggregation function in the workflow - + + collaborator_time_stats = gen_collaborator_time_stats(collaborator_names) # [TODO] [Workflow - API] Need to check db_store rounds # overrides = { @@ -325,7 +330,10 @@ def run_challenge_experiment(aggregation_function, include_validation_with_hausdorff=include_validation_with_hausdorff, aggregation_type=aggregation_wrapper, choose_training_collaborators=choose_training_collaborators, - training_hyper_parameters_for_round=training_hyper_parameters_for_round) + training_hyper_parameters_for_round=training_hyper_parameters_for_round, + restore_from_checkpoint_folder=restore_from_checkpoint_folder, + save_checkpoints=save_checkpoints, + collaborator_time_stats=collaborator_time_stats) collaborators = [] for idx, col in enumerate(collaborator_names): @@ -432,37 +440,6 @@ def run_challenge_experiment(aggregation_function, # logger.info('Starting experiment') - # # [TODO] [Workflow - API] Restore from checkpoint - # # if restore_from_checkpoint_folder is None: - # # checkpoint_folder = setup_checkpoint_folder() - # # logger.info(f'\nCreated experiment folder {checkpoint_folder}...') - # # starting_round_num = 0 - # # else: - # # if not Path(f'checkpoint/{restore_from_checkpoint_folder}').exists(): - # # logger.warning(f'Could not find provided checkpoint folder: {restore_from_checkpoint_folder}. Exiting...') - # # exit(1) - # # else: - # # logger.info(f'Attempting to load last completed round from {restore_from_checkpoint_folder}') - # # state = load_checkpoint(restore_from_checkpoint_folder) - # # checkpoint_folder = restore_from_checkpoint_folder - - # # [loaded_collaborator_names, starting_round_num, collaborator_time_stats, - # # total_simulated_time, best_dice, best_dice_over_time_auc, - # # collaborators_chosen_each_round, collaborator_times_per_round, - # # experiment_results, summary, agg_tensor_db] = state - - # # if loaded_collaborator_names != collaborator_names: - # # logger.error(f'Collaborator names found in checkpoint ({loaded_collaborator_names}) ' - # # f'do not match provided collaborators ({collaborator_names})') - # # exit(1) - - # # logger.info(f'Previous summary for round {starting_round_num}') - # # logger.info(summary) - - # # starting_round_num += 1 - # # aggregator.tensor_db.tensor_db = agg_tensor_db - # # aggregator.round_number = starting_round_num - # for round_num in range(starting_round_num, rounds_to_train): # # pick collaborators to train for the round diff --git a/Task_1/fets_challenge/fets_flow.py b/Task_1/fets_challenge/fets_flow.py index b6cefb9..98a6372 100644 --- a/Task_1/fets_challenge/fets_flow.py +++ b/Task_1/fets_challenge/fets_flow.py @@ -7,6 +7,7 @@ import numpy as np import torch as pt import yaml +import shutil from sys import path from openfl.federated import Plan @@ -56,9 +57,9 @@ def __init__(self, fets_model, rounds=3, **kwargs): def start(self): self.collaborators = self.runtime.collaborators logger.info(f'Collaborators: {self.collaborators}') - #self.agg_tensor_db = TensorDB() - #self.next(self.initialize_collaborators, foreach='collaborators', exclude='agg_tensor_db') - self.next(self.fetch_hyper_parameters) + logger.info(f'save_checkpoints: {self.save_checkpoints}') + logger.info(f'collaborator_time_stats: {self.collaborator_time_stats}') + logger.info(f'restore_from_checkpoint_folder: {self.restore_from_checkpoint_folder}') self.experiment_results = { 'round':[], @@ -82,35 +83,39 @@ def start(self): self.best_dice = -1.0 self.best_dice_over_time_auc = 0 - # if self.restore_from_checkpoint_folder is None: - # checkpoint_folder = setup_checkpoint_folder() - # logger.info(f'\nCreated experiment folder {checkpoint_folder}...') - # starting_round_num = 0 - # else: - # if not Path(f'checkpoint/{restore_from_checkpoint_folder}').exists(): - # logger.warning(f'Could not find provided checkpoint folder: {restore_from_checkpoint_folder}. Exiting...') - # exit(1) - # else: - # logger.info(f'Attempting to load last completed round from {restore_from_checkpoint_folder}') - # state = load_checkpoint(restore_from_checkpoint_folder) - # checkpoint_folder = restore_from_checkpoint_folder - - # [loaded_collaborator_names, starting_round_num, collaborator_time_stats, - # total_simulated_time, best_dice, self.best_dice_over_time_auc, - # collaborators_chosen_each_round, collaborator_times_per_round, - # experiment_results, summary, agg_tensor_db] = state - - # if loaded_collaborator_names != self.collaborator_names: - # logger.error(f'Collaborator names found in checkpoint ({loaded_collaborator_names}) ' - # f'do not match provided collaborators ({self.collaborator_names})') - # exit(1) - - # logger.info(f'Previous summary for round {starting_round_num}') - # logger.info(summary) - - # starting_round_num += 1 - # self.tensor_db.tensor_db = agg_tensor_db - # self.round_number = starting_round_num + self.checkpoint_folder = "" + self.collaborators_chosen_each_round = {} + self.collaborator_times_per_round = {} + if self.restore_from_checkpoint_folder is None: + self.checkpoint_folder = setup_checkpoint_folder() + logger.info(f'\nCreated experiment folder {self.checkpoint_folder}...') + starting_round_num = 0 + else: + if not Path(f'checkpoint/{self.restore_from_checkpoint_folder}').exists(): + logger.warning(f'Could not find provided checkpoint folder: {self.restore_from_checkpoint_folder}. Exiting...') + exit(1) + else: + logger.info(f'Attempting to load last completed round from {self.restore_from_checkpoint_folder}') + state = load_checkpoint(self.restore_from_checkpoint_folder) + self.checkpoint_folder = self.restore_from_checkpoint_folder + + [loaded_collaborator_names, starting_round_num, self.collaborator_time_stats, + self.total_simulated_time, self.best_dice, self.best_dice_over_time_auc, + self.collaborators_chosen_each_round, self.collaborator_times_per_round, + self.experiment_results, summary, agg_tensor_db] = state + + if loaded_collaborator_names != self.collaborator_names: + logger.error(f'Collaborator names found in checkpoint ({loaded_collaborator_names}) ' + f'do not match provided collaborators ({self.collaborator_names})') + exit(1) + + logger.info(f'Previous summary for round {starting_round_num}') + logger.info(summary) + + starting_round_num += 1 + #self.tensor_db.tensor_db = agg_tensor_db + self.round_number = starting_round_num + self.next(self.fetch_hyper_parameters) @aggregator def fetch_hyper_parameters(self): @@ -119,13 +124,11 @@ def fetch_hyper_parameters(self): print("*" * 40) logger.info('Fetching hyperparameters') tensrdb = TensorDB() - collaborators_chosen_each_round = {} - collaborator_times_per_round = {} hparams = self.training_hyper_parameters_for_round(self.collaborators, tensrdb._iterate(), self.current_round, - collaborators_chosen_each_round, - collaborator_times_per_round) + self.collaborators_chosen_each_round, + self.collaborator_times_per_round) learning_rate, epochs_per_round = hparams @@ -252,7 +255,7 @@ def join(self, inputs): tensor_keys_per_col[str(idx + 1)] = tensor_keys # [TODO] : Aggregation Function -> Collaborator Weight Dict - collaborator_weight_dict = {"1":0.33, "2":0.33, "3":0.34} + collaborator_weight_dict = {'1': 0.3333333333333333, '2': 0.3333333333333333, '3': 0.3333333333333333} for col,tensor_keys in tensor_keys_per_col.items(): for tensor_key in tensor_keys: tensor_name, origin, round_number, report, tags = tensor_key @@ -309,19 +312,19 @@ def join(self, inputs): if self.best_dice < round_dice: self.best_dice = round_dice # Set the weights for the final model - # if round_num == 0: - # # here the initial model was validated (temp model does not exist) - # logger.info(f'Skipping best model saving to disk as it is a random initialization.') - # elif not os.path.exists(f'checkpoint/{checkpoint_folder}/temp_model.pkl'): - # raise ValueError(f'Expected temporary model at: checkpoint/{checkpoint_folder}/temp_model.pkl to exist but it was not found.') - # else: - # # here the temp model was the one validated - # shutil.copyfile(src=f'checkpoint/{checkpoint_folder}/temp_model.pkl',dst=f'checkpoint/{checkpoint_folder}/best_model.pkl') - # logger.info(f'Saved model with best average binary DICE: {self.best_dice} to ~/.local/workspace/checkpoint/{checkpoint_folder}/best_model.pkl') + if self.current_round == 0: + # here the initial model was validated (temp model does not exist) + logger.info(f'Skipping best model saving to disk as it is a random initialization.') + elif not os.path.exists(f'checkpoint/{self.checkpoint_folder}/temp_model.pkl'): + raise ValueError(f'Expected temporary model at: checkpoint/{self.checkpoint_folder}/temp_model.pkl to exist but it was not found.') + else: + # here the temp model was the one validated + shutil.copyfile(src=f'checkpoint/{self.checkpoint_folder}/temp_model.pkl',dst=f'checkpoint/{self.checkpoint_folder}/best_model.pkl') + logger.info(f'Saved model with best average binary DICE: {self.best_dice} to ~/.local/workspace/checkpoint/{self.checkpoint_folder}/best_model.pkl') ## CONVERGENCE METRIC COMPUTATION # update the auc score - # self.best_dice_over_time_auc += self.best_dice * round_time + self.best_dice_over_time_auc += self.best_dice * self.current_round # project the auc score as remaining time * best dice # this projection assumes that the current best score is carried forward for the entire week @@ -359,18 +362,18 @@ def join(self, inputs): self.experiment_results['hausdorff95_label_2'].append(hausdorff95_label_2) self.experiment_results['hausdorff95_label_4'].append(hausdorff95_label_4) - # if save_checkpoints: - # logger.info(f'Saving checkpoint for round {round_num}') - # logger.info(f'To resume from this checkpoint, set the restore_from_checkpoint_folder parameter to \'{checkpoint_folder}\'') - # save_checkpoint(checkpoint_folder, aggregator, - # collaborator_names, collaborators, - # round_num, collaborator_time_stats, - # self.total_simulated_time, self.best_dice, - # self.best_dice_over_time_auc, - # collaborators_chosen_each_round, - # collaborator_times_per_round, - # experiment_results, - # summary) + if self.save_checkpoints: + logger.info(f'Saving checkpoint for round {self.current_round} : checkpoint folder {self.checkpoint_folder}') + logger.info(f'To resume from this checkpoint, set the restore_from_checkpoint_folder parameter to \'{self.checkpoint_folder}\'') + save_checkpoint(self.checkpoint_folder, agg_tensor_db, + self.collaborator_names, self.runtime.collaborators, + self.current_round, self.collaborator_time_stats, + self.total_simulated_time, self.best_dice, + self.best_dice_over_time_auc, + self.collaborators_chosen_each_round, + self.collaborator_times_per_round, + self.experiment_results, + summary) # if the total_simulated_time has exceeded the maximum time, we break # in practice, this means that the previous round's model is the last model scored, @@ -382,8 +385,8 @@ def join(self, inputs): # save the most recent aggregated model in native format to be copied over as best when appropriate # (note this model has not been validated by the collaborators yet) - # self.fets_model.rebuild_model(round_num, aggregator.last_tensor_dict, validation=True) - # self.fets_model.save_native(f'checkpoint/{checkpoint_folder}/temp_model.pkl') + # self.fets_model.rebuild_model(round_num, aggregator.last_tensor_dict, validation=True) + self.fets_model.save_native(f'checkpoint/{self.checkpoint_folder}/temp_model.pkl') self.next(self.internal_loop) From 4e61045f406d17ef36b075e8aaede6a58255bf49 Mon Sep 17 00:00:00 2001 From: "Agrawal, Kush" Date: Sun, 9 Mar 2025 23:55:57 -0700 Subject: [PATCH 07/16] Added tensor dictionary for handling persistent storage Signed-off-by: Agrawal, Kush --- Task_1/FeTS_Challenge.py | 4 +- Task_1/fets_challenge/experiment.py | 361 ++---------------- Task_1/fets_challenge/fets_challenge_model.py | 151 ++------ Task_1/fets_challenge/fets_data_loader.py | 55 +++ Task_1/fets_challenge/fets_flow.py | 280 ++++++++------ Task_1/fets_challenge/gandlf_config.yaml | 5 +- Task_1/fets_challenge/time_utils.py | 192 ++++++++++ 7 files changed, 486 insertions(+), 562 deletions(-) create mode 100644 Task_1/fets_challenge/fets_data_loader.py create mode 100644 Task_1/fets_challenge/time_utils.py diff --git a/Task_1/FeTS_Challenge.py b/Task_1/FeTS_Challenge.py index d1f52b9..3cd2578 100644 --- a/Task_1/FeTS_Challenge.py +++ b/Task_1/FeTS_Challenge.py @@ -518,7 +518,7 @@ def FedAvgM_Selection(local_tensors, # to those you specify immediately above. Changing the below value to False will change # this fact, excluding the three hausdorff measurements. As hausdorff distance is # expensive to compute, excluding them will speed up your experiments. -include_validation_with_hausdorff=True +include_validation_with_hausdorff=False # We encourage participants to experiment with partitioning_1 and partitioning_2, as well as to create # other partitionings to test your changes for generalization to multiple partitionings. @@ -526,7 +526,7 @@ def FedAvgM_Selection(local_tensors, institution_split_csv_filename = 'small_split.csv' # change this to point to the parent directory of the data -brats_training_data_parent_dir = '/home/ad_kagrawa2/Data/fedAI/MICCAI_FeTS2022_TrainingData' +brats_training_data_parent_dir = '/home/ad_kagrawa2/Data/MICCAI_FeTS2022_TrainingData' # increase this if you need a longer history for your algorithms # decrease this if you need to reduce system RAM consumption diff --git a/Task_1/fets_challenge/experiment.py b/Task_1/fets_challenge/experiment.py index 55824cd..9a1142f 100644 --- a/Task_1/fets_challenge/experiment.py +++ b/Task_1/fets_challenge/experiment.py @@ -30,6 +30,10 @@ from openfl.experimental.workflow.interface import FLSpec, Aggregator, Collaborator from openfl.experimental.workflow.runtime import LocalRuntime +logger = getLogger(__name__) +# This catches PyTorch UserWarnings for CPU +warnings.filterwarnings("ignore", category=UserWarning) + # one week # MINUTE = 60 # HOUR = 60 * MINUTE @@ -37,228 +41,29 @@ # WEEK = 7 * DAY MAX_SIMULATION_TIME = 7 * 24 * 60 * 60 -## COLLABORATOR TIMING DISTRIBUTIONS -# These data are derived from the actual timing information in the real-world FeTS information -# They reflect a subset of the institutions involved. -# Tuples are (mean, stddev) in seconds - -# time to train one patient -TRAINING_TIMES = [(6.710741331207654, 0.8726112813698301), - (2.7343911917098445, 0.023976155580152165), - (3.173076923076923, 0.04154320960517865), - (6.580379746835443, 0.22461890673025595), - (3.452046783625731, 0.47136389322749656), - (6.090788461700995, 0.08541499003440205), - (3.206933911159263, 0.1927067498514361), - (3.3358208955223883, 0.2950567549663471), - (4.391304347826087, 0.37464538999161057), - (6.324805129494594, 0.1413885448869165), - (7.415133477633478, 1.1198881747151301), - (5.806410256410255, 0.029926699295169234), - (6.300204918032787, 0.24932319729777577), - (5.886317567567567, 0.018627858809133223), - (5.478184991273998, 0.04902740607167421), - (6.32440159574468, 0.15838847558954935), - (20.661918328585003, 6.085405543890793), - (3.197901325478645, 0.07049966132127056), - (6.523963730569948, 0.2533266757118492), - (2.6540077569489338, 0.025503099659276184), - (1.8025746183640918, 0.06805805332403576)] - -# time to validate one patient -VALIDATION_TIMES = [(23.129135113591072, 2.5975116854269507), - (12.965544041450777, 0.3476297824941513), - (14.782051282051283, 0.5262660449172765), - (16.444936708860762, 0.42613177203005187), - (15.728654970760235, 4.327559980390658), - (12.946098012884802, 0.2449927822869217), - (15.335950126991456, 1.1587597276712558), - (24.024875621890544, 3.087348297794285), - (38.361702127659576, 2.240113332190875), - (16.320970580839827, 0.4995108101783225), - (30.805555555555554, 3.1836337269688237), - (12.100899742930592, 0.41122386959584895), - (13.099897540983607, 0.6693132795197584), - (9.690202702702702, 0.17513593019922968), - (10.06980802792321, 0.7947848617875114), - (14.605333333333334, 0.6012305898922827), - (36.30294396961064, 9.24123672148819), - (16.9130060292851, 0.7452868131028928), - (40.244078460399706, 3.7700993678269037), - (13.161603102779575, 0.1975347910041472), - (11.222161868549701, 0.7021223062972527)] - -# time to download the model -DOWNLOAD_TIMES = [(112.42869743589742, 14.456734719659513), - (117.26870618556701, 12.549951446132013), - (13.059666666666667, 4.8700489616521185), - (47.50220338983051, 14.92128656898884), - (162.27864210526315, 32.562113378948396), - (99.46072058823529, 13.808785580783224), - (33.6347090909091, 25.00299299660141), - (216.25489393939392, 19.176465340447848), - (217.4117230769231, 20.757673955585453), - (98.38857297297298, 13.205048376808929), - (88.87509473684209, 23.152936862511545), - (66.96994262295081, 16.682497150763503), - (36.668852040816326, 13.759109844677598), - (149.31716326530614, 26.018185409516104), - (139.847, 80.04755583050091), - (54.97624444444445, 16.645170929316794)] - -# time to upload the model -UPLOAD_TIMES = [(192.28497409326425, 21.537450985376967), - (194.60103626943004, 24.194406902237056), - (20.0, 0.0), - (52.43859649122807, 5.047207127169352), - (182.82417582417582, 14.793519078918195), - (143.38059701492537, 7.910690646792151), - (30.695652173913043, 9.668122350904568), - (430.95360824742266, 54.97790476867727), - (348.3174603174603, 30.14347985347738), - (141.43715846994536, 5.271340868190727), - (158.7433155080214, 64.87526819391198), - (81.06086956521739, 7.003461202082419), - (32.60621761658031, 5.0418315093016615), - (281.5388601036269, 90.60338778706557), - (194.34065934065933, 36.6519776778435), - (66.53787878787878, 16.456280602190606)] - -logger = getLogger(__name__) -# This catches PyTorch UserWarnings for CPU -warnings.filterwarnings("ignore", category=UserWarning) - -CollaboratorTimeStats = namedtuple('CollaboratorTimeStats', - [ - 'validation_mean', - 'training_mean', - 'download_speed_mean', - 'upload_speed_mean', - 'validation_std', - 'training_std', - 'download_speed_std', - 'upload_speed_std', - ] - ) - -def gen_collaborator_time_stats(collaborator_names, seed=0xFEEDFACE): - - np.random.seed(seed) - - stats = {} - for col in collaborator_names: - ml_index = np.random.randint(len(VALIDATION_TIMES)) - validation = VALIDATION_TIMES[ml_index] - training = TRAINING_TIMES[ml_index] - net_index = np.random.randint(len(DOWNLOAD_TIMES)) - download = DOWNLOAD_TIMES[net_index] - upload = UPLOAD_TIMES[net_index] - - stats[col] = CollaboratorTimeStats(validation_mean=validation[0], - training_mean=training[0], - download_speed_mean=download[0], - upload_speed_mean=upload[0], - validation_std=validation[1], - training_std=training[1], - download_speed_std=download[1], - upload_speed_std=upload[1]) - return stats - -def compute_times_per_collaborator(collaborator_names, - training_collaborators, - epochs_per_round, - collaborator_data, - collaborator_time_stats, - round_num): - np.random.seed(round_num) - times = {} - for col in collaborator_names: - time = 0 - - # stats - stats = collaborator_time_stats[col] - - # download time - download_time = np.random.normal(loc=stats.download_speed_mean, - scale=stats.download_speed_std) - download_time = max(1, download_time) - time += download_time - - # data loader - data = collaborator_data[col] - - # validation time - data_size = data.get_valid_data_size() - validation_time_per = np.random.normal(loc=stats.validation_mean, - scale=stats.validation_std) - validation_time_per = max(1, validation_time_per) - time += data_size * validation_time_per - - # only if training - if col in training_collaborators: - # training time - data_size = data.get_train_data_size() - training_time_per = np.random.normal(loc=stats.training_mean, - scale=stats.training_std) - training_time_per = max(1, training_time_per) - - # training data size depends on the hparams - data_size *= epochs_per_round - time += data_size * training_time_per - - # if training, we also validate the locally updated model - data_size = data.get_valid_data_size() - validation_time_per = np.random.normal(loc=stats.validation_mean, - scale=stats.validation_std) - validation_time_per = max(1, validation_time_per) - time += data_size * validation_time_per - - # upload time - upload_time = np.random.normal(loc=stats.upload_speed_mean, - scale=stats.upload_speed_std) - upload_time = max(1, upload_time) - time += upload_time - - times[col] = time - return times - - -def get_metric(metric, fl_round, tensor_db): - metric_name = metric - target_tags = ('metric', 'validate_agg') - return float(tensor_db.tensor_db.query("tensor_name == @metric_name and round == @fl_round and tags == @target_tags").nparray) - def aggregator_private_attributes( - uuid, aggregation_type, round_number, collaborator_names, include_validation_with_hausdorff, choose_training_collaborators, training_hyper_parameters_for_round, restore_from_checkpoint_folder, save_checkpoints, collaborator_time_stats): - print(f'aggregator_private_attributes ->>>>>> Aggregation Type: {aggregation_type}') - print(f'aggregator_private_attributes ->>>>>> Round Number: {round_number}') - print(f'aggregator_private_attributes ->>>>>> Collaborator Names: {collaborator_names}') - print(f'aggregator_private_attributes ->>>>>> Choose Training Collaborators: {choose_training_collaborators}') - print(f'aggregator_private_attributes ->>>>>> Training Hyper Parameters for Round: {training_hyper_parameters_for_round}') - print(f'aggregator_private_attributes ->>>>>> restore_from_checkpoint_folder: {restore_from_checkpoint_folder}') - return {"uuid": uuid, + uuid, aggregation_type, collaborator_names, include_validation_with_hausdorff, choose_training_collaborators, + training_hyper_parameters_for_round, restore_from_checkpoint_folder, save_checkpoints): + return {"uuid": uuid, "aggregation_type" : aggregation_type, - "round_number": round_number, - "collaborator_names": collaborator_names, - "include_validation_with_hausdorff": include_validation_with_hausdorff, - "choose_training_collaborators": choose_training_collaborators, - "training_hyper_parameters_for_round": training_hyper_parameters_for_round, - "max_simulation_time": MAX_SIMULATION_TIME, - "restore_from_checkpoint_folder": restore_from_checkpoint_folder, - "save_checkpoints":save_checkpoints, - "collaborator_time_stats": collaborator_time_stats - } + "collaborator_names": collaborator_names, + "include_validation_with_hausdorff": include_validation_with_hausdorff, + "choose_training_collaborators": choose_training_collaborators, + "training_hyper_parameters_for_round": training_hyper_parameters_for_round, + "max_simulation_time": MAX_SIMULATION_TIME, + "restore_from_checkpoint_folder": restore_from_checkpoint_folder, + "save_checkpoints":save_checkpoints +} def collaborator_private_attributes( - index, n_collaborators, train_csv, valid_csv, gandlf_config, device, training_hyper_parameters_for_round - ): + index, n_collaborators, gandlf_config, train_csv_path, val_csv_path): return { - "train_csv": train_csv, - "val_csv": valid_csv, + "index": index, + "n_collaborators": n_collaborators, "gandlf_config": gandlf_config, - "device": device, - "training_hyper_parameters_for_round": training_hyper_parameters_for_round + "train_csv_path": train_csv_path, + "val_csv_path": val_csv_path } @@ -275,8 +80,6 @@ def run_challenge_experiment(aggregation_function, include_validation_with_hausdorff=True, use_pretrained_model=False): - #fx.init('fets_challenge_workspace') - from sys import path, exit file = Path(__file__).resolve() @@ -284,11 +87,6 @@ def run_challenge_experiment(aggregation_function, work = Path.cwd().resolve() gandlf_config_path = os.path.join(root, 'gandlf_config.yaml') - - print(f"TESTING ->>>>>> Gandlf Config Path: {gandlf_config_path}") - - print(f"TESTING ->>>>>> Work directory: {work}") - path.append(str(root)) path.insert(0, str(work)) @@ -300,18 +98,13 @@ def run_challenge_experiment(aggregation_function, 0.8, gandlf_csv_path) - print(f'TESTING ->>>>>> Collaborator names: {collaborator_names}') + print(f'Collaborator names for experiment : {collaborator_names}') aggregation_wrapper = CustomAggregationWrapper(aggregation_function) # ---> [TODO] Set the aggregation function in the workflow - - collaborator_time_stats = gen_collaborator_time_stats(collaborator_names) # [TODO] [Workflow - API] Need to check db_store rounds # overrides = { - # 'aggregator.settings.rounds_to_train': rounds_to_train, # 'aggregator.settings.db_store_rounds': db_store_rounds, - # 'tasks.train.aggregation_type': aggregation_wrapper, - # 'task_runner.settings.device': device, # } # [TODO] [Workflow - API] How to update the gandfl_config runtime @@ -320,21 +113,6 @@ def run_challenge_experiment(aggregation_function, transformed_csv_dict = extract_csv_partitions(os.path.join(work, 'gandlf_paths.csv')) - aggregator = Aggregator(name="aggregator", - private_attributes_callable=aggregator_private_attributes, - num_cpus=0.0, - num_gpus=0.0, - uuid='aggregator', - round_number=rounds_to_train, - collaborator_names=collaborator_names, - include_validation_with_hausdorff=include_validation_with_hausdorff, - aggregation_type=aggregation_wrapper, - choose_training_collaborators=choose_training_collaborators, - training_hyper_parameters_for_round=training_hyper_parameters_for_round, - restore_from_checkpoint_folder=restore_from_checkpoint_folder, - save_checkpoints=save_checkpoints, - collaborator_time_stats=collaborator_time_stats) - collaborators = [] for idx, col in enumerate(collaborator_names): col_dir = os.path.join(work, 'data', str(col)) @@ -345,6 +123,7 @@ def run_challenge_experiment(aggregation_function, transformed_csv_dict[col]['train'].to_csv(train_csv_path) transformed_csv_dict[col]['val'].to_csv(val_csv_path) + collaborators.append( Collaborator( name=col, @@ -352,43 +131,41 @@ def run_challenge_experiment(aggregation_function, # If 1 GPU is available in the machine # Set `num_gpus=0.0` to `num_gpus=0.3` to run on GPU # with ray backend with 2 collaborators - num_cpus=0.0, + num_cpus=4.0, num_gpus=0.0, # arguments required to pass to callable index=idx, n_collaborators=len(collaborator_names), - train_csv=train_csv_path, - valid_csv=val_csv_path, gandlf_config=gandlf_config_path, - device=device, - training_hyper_parameters_for_round=training_hyper_parameters_for_round + train_csv_path=train_csv_path, + val_csv_path=val_csv_path ) ) + aggregator = Aggregator(name="aggregator", + private_attributes_callable=aggregator_private_attributes, + num_cpus=4.0, + num_gpus=0.0, + uuid='aggregator', + collaborator_names=collaborator_names, + include_validation_with_hausdorff=include_validation_with_hausdorff, + aggregation_type=aggregation_wrapper, + choose_training_collaborators=choose_training_collaborators, + training_hyper_parameters_for_round=training_hyper_parameters_for_round, + restore_from_checkpoint_folder=restore_from_checkpoint_folder, + save_checkpoints=save_checkpoints) + local_runtime = LocalRuntime( - aggregator=aggregator, collaborators=collaborators, backend="single_process" + aggregator=aggregator, collaborators=collaborators, backend="single_process", num_actors=1 ) logger.info(f"Local runtime collaborators = {local_runtime.collaborators}") model = FeTSChallengeModel(gandlf_config_path) - top_model_accuracy = 0 - # optimizers = { - # collaborator.name: default_optimizer(model, optimizer_type=args.optimizer_type) - # for collaborator in collaborators - # } - # flflow = FederatedFlow( - # model, - # optimizers, - # device, - # args.comm_round, - # top_model_accuracy, - # args.flow_internal_loop_test, - # ) - flflow = FeTSFederatedFlow( model, - 1 + rounds_to_train, + device, ) flflow.runtime = local_runtime @@ -415,9 +192,6 @@ def run_challenge_experiment(aggregation_function, # task_runner.model.load_state_dict(checkpoint['model_state_dict']) # task_runner.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) - # [TODO] [Workflow - API] Compression Pipeline - # tensor_pipe = plan.get_tensor_pipe() - # # Initialize model weights # # [TODO] [Workflow - API] How to set the initial state in the workflow # init_state_path = plan.config['aggregator']['settings']['init_state_path'] @@ -429,58 +203,5 @@ def run_challenge_experiment(aggregation_function, # utils.dump_proto(model_proto=model_snap, fpath=init_state_path) - # # [TODO] [Workflow - API] ->Fetch the required aggregator from plan - # # get the aggregator, now that we have the initial weights file set up - # # manually override the aggregator UUID (for checkpoint resume when rounds change) - # aggregator.uuid = 'aggregator' - # aggregator._load_initial_tensors() - - # collaborators_chosen_each_round = {} - # collaborator_times_per_round = {} - - # logger.info('Starting experiment') - - - # for round_num in range(starting_round_num, rounds_to_train): - # # pick collaborators to train for the round - # # ---> [TODO] [Workflow - API] In flow based API's, in start we can pass as foreach = 'collaborators' - # training_collaborators = choose_training_collaborators(collaborator_names, - # aggregator.tensor_db._iterate(), - # round_num, - # collaborators_chosen_each_round, - # collaborator_times_per_round) - - # logger.info('Collaborators chosen to train for round {}:\n\t{}'.format(round_num, training_collaborators)) - - # # save the collaborators chosen this round - # collaborators_chosen_each_round[round_num] = training_collaborators - - # - # # [TODO] [Workflow - API] How to cache the tensor in the workflow ? do we need to cache h-params ? - # aggregator.tensor_db.cache_tensor(hparam_dict) - - # # pre-compute the times for each collaborator - # # [TODO] [Workflow - API] What is the use of this ? - # times_per_collaborator = compute_times_per_collaborator(collaborator_names, - # training_collaborators, - # epochs_per_round, - # collaborator_data_loaders, - # collaborator_time_stats, - # round_num) - # collaborator_times_per_round[round_num] = times_per_collaborator - - - # # update the state in the aggregation wrapper - # # [TODO] [Workflow - API] See how to pass this in the workflow as aggregation function and use in JOIN step - # aggregation_wrapper.set_state_data_for_round(collaborators_chosen_each_round, collaborator_times_per_round) - - # # turn the times list into a list of tuples and sort it - # times_list = [(t, col) for col, t in times_per_collaborator.items()] - # times_list = sorted(times_list) - - # # [TODO] [Workflow - API] Create LocalRunTime using ray bakcend and do flow.run() to start the training - # for t, col in times_list: - # logger.info("Collaborator {} took simulated time: {} minutes".format(col, round(t / 60, 2))) - #return pd.DataFrame.from_dict(experiment_results), checkpoint_folder return None \ No newline at end of file diff --git a/Task_1/fets_challenge/fets_challenge_model.py b/Task_1/fets_challenge/fets_challenge_model.py index 2071c9c..13a1d2f 100644 --- a/Task_1/fets_challenge/fets_challenge_model.py +++ b/Task_1/fets_challenge/fets_challenge_model.py @@ -58,31 +58,14 @@ def __init__( **kwargs: Additional parameters to pass to the function. """ - if isinstance(gandlf_config_path, str) and os.path.exists(gandlf_config_path): - gandlf_conf = yaml.safe_load(open(gandlf_config_path, "r")) - - gandlf_conf = ConfigManager(gandlf_config_path) - - # TODO -> CHECK HOW TO CREATE A MODEL HERE - ( - model, - optimizer, - train_loader, - val_loader, - scheduler, - params, - ) = create_pytorch_objects( - gandlf_conf, device="cpu" - ) - self.model = model - self.optimizer = optimizer - self.scheduler = scheduler - self.params = params + self.model = None + self.optimizer = None + self.scheduler = None + self.params = None + self.device = None self.training_round_completed = False - self.required_tensorkeys_for_function = {} - self.logger = getLogger(__name__) # FIXME: why isn't this initial call in runner_pt? @@ -94,7 +77,7 @@ def __init__( self.tensor_dict_split_fn_kwargs = {} self.tensor_dict_split_fn_kwargs.update({"holdout_tensor_names": ["__opt_state_needed"]}) - def rebuild_model(self, model, round_num, input_tensor_dict, device, validation=False): + def rebuild_model(self, round_num, input_tensor_dict, validation=False): """Parse tensor names and update weights of model. Handles the optimizer treatment. @@ -108,8 +91,6 @@ def rebuild_model(self, model, round_num, input_tensor_dict, device, validation= Returns: None """ - self.device = device # [TODO] - FIX ME - self.model = model self.set_tensor_dict(input_tensor_dict, with_opt_vars=False) @@ -125,7 +106,7 @@ def rebuild_model(self, model, round_num, input_tensor_dict, device, validation= # else: # self.set_tensor_dict(input_tensor_dict, with_opt_vars=False) - def validate(self, model, col_name, round_num, val_dataloader, params, scheduler, use_tqdm=False, **kwargs): + def validate(self, col_name, round_num, val_loader, use_tqdm=False, **kwargs): """Validate. Run validation of the model on the local data. Args: @@ -141,13 +122,13 @@ def validate(self, model, col_name, round_num, val_dataloader, params, scheduler {} (dict): Tensors to maintain in the local TensorDB. """ #self.rebuild_model(round_num, input_tensor_dict, validation=True) - #model.eval() + self.model.eval() epoch_valid_loss, epoch_valid_metric = validate_network( - model, - val_dataloader, - scheduler, - params, + self.model, + val_loader, + self.scheduler, + self.params, round_num, mode="validation", ) @@ -169,7 +150,6 @@ def validate(self, model, col_name, round_num, val_dataloader, params, scheduler output_tensor_dict = {} output_tensor_dict[TensorKey('valid_loss', origin, round_num, True, tags)] = np.array(epoch_valid_loss) for k, v in epoch_valid_metric.items(): - print(f"Testing ->>>> Metric Key {k} Value {v}") if isinstance(v, str): v = list(map(float, v.split('_'))) @@ -182,7 +162,7 @@ def validate(self, model, col_name, round_num, val_dataloader, params, scheduler # Empty list represents metrics that should only be stored locally return output_tensor_dict, {} - def train(self, model, col_name, round_num, train_loader, params, optimizer, hparams_dict, use_tqdm=False, epochs=1, **kwargs): + def train(self, col_name, round_num, hparams_dict, train_loader, use_tqdm=False, **kwargs): """Train batches. Train the model on the requested number of batches. Args: @@ -200,34 +180,34 @@ def train(self, model, col_name, round_num, train_loader, params, optimizer, hpa TensorDB. """ # handle the hparams - #epochs_per_round = int(input_tensor_dict.pop('epochs_per_round')) - #learning_rate = float(input_tensor_dict.pop('learning_rate')) + epochs_per_round = int(hparams_dict.pop('epochs_per_round')) + learning_rate = float(hparams_dict.pop('learning_rate')) #self.rebuild_model(round_num, input_tensor_dict) # set to "training" mode - model.train() + self.model.train() # Set the learning rate - #for group in optimizer.param_groups: - # group['lr'] = learning_rate + self.logger.info(f"Setting learning rate to {learning_rate}") + for group in self.optimizer.param_groups: + group['lr'] = learning_rate - for epoch in range(epochs): - print(f"Run %s epoch of %s round", epoch, round_num) + for epoch in range(epochs_per_round): + print(f"Run {epoch} of {round_num}") # FIXME: do we want to capture these in an array # rather than simply taking the last value? epoch_train_loss, epoch_train_metric = train_network( - model, + self.model, train_loader, - optimizer, - params, + self.optimizer, + self.params, ) # output model tensors (Doesn't include TensorKey) - tensor_dict = self.get_tensor_dict(model, with_opt_vars=True) + tensor_dict = self.get_tensor_dict(self.model, with_opt_vars=True) metric_dict = {'loss': epoch_train_loss} for k, v in epoch_train_metric.items(): - print(f"Testing ->>>> Metric Key {k} Value {v}") if isinstance(v, str): v = list(map(float, v.split('_'))) if np.array(v).size == 1: @@ -267,7 +247,7 @@ def train(self, model, col_name, round_num, train_loader, params, optimizer, hpa # Return global_tensor_dict, local_tensor_dict return global_tensor_dict, local_tensor_dict - def get_tensor_dict(self, model, with_opt_vars=False): + def get_tensor_dict(self, model=None, with_opt_vars=False): """Return the tensor dictionary. Args: @@ -283,6 +263,9 @@ def get_tensor_dict(self, model, with_opt_vars=False): # for now, state dict gives us names which is good # FIXME: do both and sanity check each time? + if model is None: + model = self.model + state = to_cpu_numpy(model.state_dict()) if with_opt_vars: @@ -333,81 +316,6 @@ def get_optimizer(self): """ return self.optimizer - def get_required_tensorkeys_for_function(self, func_name, **kwargs): - """Get the required tensors for specified function that could be called - as part of a task. - - By default, this is just all of the layers and optimizer of the model. - - Args: - func_name (str): Function name. - **kwargs: Additional keyword arguments. - - Returns: - required_tensorkeys_for_function (list): List of required - TensorKey. - """ - if func_name == "validate": - local_model = "apply=" + str(kwargs["apply"]) - return self.required_tensorkeys_for_function[func_name][local_model] - else: - return self.required_tensorkeys_for_function[func_name] - - def initialize_tensorkeys_for_functions(self, with_opt_vars=False): - """Set the required tensors for all publicly accessible task methods. - - By default, this is just all of the layers and optimizer of the model. - Custom tensors should be added to this function. - - Args: - with_opt_vars (bool, optional): Include the optimizer tensors. - Defaults to False. - """ - # TODO there should be a way to programmatically iterate through - # all of the methods in the class and declare the tensors. - # For now this is done manually - - output_model_dict = self.get_tensor_dict(self.model, with_opt_vars=with_opt_vars) - global_model_dict, local_model_dict = split_tensor_dict_for_holdouts( - self.logger, output_model_dict, **self.tensor_dict_split_fn_kwargs - ) - if not with_opt_vars: - global_model_dict_val = global_model_dict - local_model_dict_val = local_model_dict - else: - output_model_dict = self.get_tensor_dict(self.model, with_opt_vars=False) - global_model_dict_val, local_model_dict_val = split_tensor_dict_for_holdouts( - self.logger, - output_model_dict, - **self.tensor_dict_split_fn_kwargs, - ) - - self.required_tensorkeys_for_function["train"] = [ - TensorKey(tensor_name, "GLOBAL", 0, False, ("model",)) - for tensor_name in global_model_dict - ] - self.required_tensorkeys_for_function["train"] += [ - TensorKey(tensor_name, "LOCAL", 0, False, ("model",)) - for tensor_name in local_model_dict - ] - - # Validation may be performed on local or aggregated (global) model, - # so there is an extra lookup dimension for kwargs - self.required_tensorkeys_for_function["validate"] = {} - # TODO This is not stateless. The optimizer will not be - self.required_tensorkeys_for_function["validate"]["apply=local"] = [ - TensorKey(tensor_name, "LOCAL", 0, False, ("trained",)) - for tensor_name in {**global_model_dict_val, **local_model_dict_val} - ] - self.required_tensorkeys_for_function["validate"]["apply=global"] = [ - TensorKey(tensor_name, "GLOBAL", 0, False, ("model",)) - for tensor_name in global_model_dict_val - ] - self.required_tensorkeys_for_function["validate"]["apply=global"] += [ - TensorKey(tensor_name, "LOCAL", 0, False, ("model",)) - for tensor_name in local_model_dict_val - ] - def load_native( self, filepath, @@ -452,6 +360,7 @@ def save_native( dict in picked file. Defaults to 'optimizer_state_dict'. **kwargs: Additional keyword arguments. """ + pickle_dict = { model_state_dict_key: self.model.state_dict(), optimizer_state_dict_key: self.optimizer.state_dict(), diff --git a/Task_1/fets_challenge/fets_data_loader.py b/Task_1/fets_challenge/fets_data_loader.py new file mode 100644 index 0000000..e69e004 --- /dev/null +++ b/Task_1/fets_challenge/fets_data_loader.py @@ -0,0 +1,55 @@ +class FeTSDataLoader(): + """ + A data loader class for the FeTS challenge that handles training and validation data loaders. + + Attributes: + train_loader (DataLoader): The data loader for the training dataset. + valid_loader (DataLoader): The data loader for the validation dataset. + """ + + def __init__(self, train_loader, valid_loader): + """ + Initializes the FeTSDataLoader with training and validation data loaders. + + Args: + train_loader (DataLoader): The data loader for the training dataset. + valid_loader (DataLoader): The data loader for the validation dataset. + """ + self.train_loader = train_loader + self.valid_loader = valid_loader + + def get_train_loader(self): + """ + Returns the data loader for the training dataset. + + Returns: + DataLoader: The data loader for the training dataset. + """ + return self.train_loader + + def get_valid_loader(self): + """ + Returns the data loader for the validation dataset. + + Returns: + DataLoader: The data loader for the validation dataset. + """ + return self.valid_loader + + def get_train_data_size(self): + """ + Returns the size of the training dataset. + + Returns: + int: The number of samples in the training dataset. + """ + return len(self.train_loader.dataset) + + def get_valid_data_size(self): + """ + Returns the size of the validation dataset. + + Returns: + int: The number of samples in the validation dataset. + """ + return len(self.valid_loader.dataset) \ No newline at end of file diff --git a/Task_1/fets_challenge/fets_flow.py b/Task_1/fets_challenge/fets_flow.py index 98a6372..748fd38 100644 --- a/Task_1/fets_challenge/fets_flow.py +++ b/Task_1/fets_challenge/fets_flow.py @@ -8,6 +8,7 @@ import torch as pt import yaml import shutil +import time from sys import path from openfl.federated import Plan @@ -17,7 +18,9 @@ from openfl.experimental.workflow.placement import aggregator, collaborator from openfl.databases import TensorDB from openfl.utilities import TaskResultKey, TensorKey, change_tags +from .fets_data_loader import FeTSDataLoader from .checkpoint_utils import setup_checkpoint_folder, save_checkpoint, load_checkpoint +from .time_utils import gen_collaborator_time_stats, compute_times_per_collaborator from GANDLF.compute.generic import create_pytorch_objects from GANDLF.config_manager import ConfigManager @@ -25,6 +28,13 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) +# one week +# MINUTE = 60 +# HOUR = 60 * MINUTE +# DAY = 24 * HOUR +# WEEK = 7 * DAY +MAX_SIMULATION_TIME = 7 * 24 * 60 * 60 + def get_metric(metric_name, fl_round, agg_tensor_db): target_tags = ('metric', 'validate_agg') metric_tensor_key = TensorKey(metric_name, 'aggregator', fl_round, True, target_tags) @@ -46,20 +56,25 @@ def cache_tensor_dict(tensor_dict, agg_tensor_db, idx, agg_out_dict): agg_out_dict[modified_key] = value agg_tensor_db.cache_tensor(agg_out_dict) +collaborator_data_loaders = {} + class FeTSFederatedFlow(FLSpec): - def __init__(self, fets_model, rounds=3, **kwargs): + def __init__(self, fets_model, rounds=3 , device="cpu", **kwargs): super().__init__(**kwargs) self.fets_model = fets_model self.n_rounds = rounds + self.device = device self.current_round = 1 - - @aggregator - def start(self): - self.collaborators = self.runtime.collaborators - logger.info(f'Collaborators: {self.collaborators}') - logger.info(f'save_checkpoints: {self.save_checkpoints}') - logger.info(f'collaborator_time_stats: {self.collaborator_time_stats}') - logger.info(f'restore_from_checkpoint_folder: {self.restore_from_checkpoint_folder}') + self.total_simulated_time = 0 + self.best_dice = -1.0 + self.best_dice_over_time_auc = 0 + self.collaborators_chosen_each_round = {} + self.collaborator_times_per_round = {} + self.times_per_collaborator = {} + self.agg_tensor_dict = {} + self.tensor_keys_per_col = {} + self.restored = False + self.checkpoint_folder = "" self.experiment_results = { 'round':[], @@ -71,6 +86,10 @@ def start(self): 'dice_label_2': [], 'dice_label_4': [], } + + @aggregator + def start(self): + if self.include_validation_with_hausdorff: self.experiment_results.update({ 'hausdorff95_label_0': [], @@ -79,13 +98,7 @@ def start(self): 'hausdorff95_label_4': [], }) - self.total_simulated_time = 0 - self.best_dice = -1.0 - self.best_dice_over_time_auc = 0 - - self.checkpoint_folder = "" - self.collaborators_chosen_each_round = {} - self.collaborator_times_per_round = {} + self.collaborators = self.runtime.collaborators if self.restore_from_checkpoint_folder is None: self.checkpoint_folder = setup_checkpoint_folder() logger.info(f'\nCreated experiment folder {self.checkpoint_folder}...') @@ -109,21 +122,33 @@ def start(self): f'do not match provided collaborators ({self.collaborator_names})') exit(1) + self.restored = True logger.info(f'Previous summary for round {starting_round_num}') logger.info(summary) + #Updating the agg_tensor_dict from stored tensor_db starting_round_num += 1 - #self.tensor_db.tensor_db = agg_tensor_db - self.round_number = starting_round_num - self.next(self.fetch_hyper_parameters) + self.current_round = starting_round_num + for col,tensor_keys in self.tensor_keys_per_col.items(): + for tensor_key in tensor_keys: + tensor_name, origin, round_number, report, tags = tensor_key + if 'trained' in tags: + new_tags = change_tags(tags, remove_field=col) + new_tensor_key = TensorKey(tensor_name, origin, round_number, report, new_tags) + if tensor_name not in self.agg_tensor_dict: + self.agg_tensor_dict[tensor_name] = agg_tensor_db.get_tensor_from_cache(new_tensor_key) + #logger.info(f'Fetched tensor {tensor_name} from tensor_db for round {round_number}') + + self.collaborator_time_stats = gen_collaborator_time_stats(self.collaborator_names) + self.next(self.fetch_parameters_for_colls) @aggregator - def fetch_hyper_parameters(self): + def fetch_parameters_for_colls(self): print("*" * 40) print("Starting round {}".format(self.current_round)) print("*" * 40) logger.info('Fetching hyperparameters') - tensrdb = TensorDB() + tensrdb = TensorDB() # [TODO] Check is it required ? hparams = self.training_hyper_parameters_for_round(self.collaborators, tensrdb._iterate(), self.current_round, @@ -136,54 +161,34 @@ def fetch_hyper_parameters(self): logger.warning('Hyper-parameter function warning: function returned None for "epochs_per_round". Setting "epochs_per_round" to 1') epochs_per_round = 1 - hparam_message = "\n\tlearning rate: {}".format(learning_rate) - - hparam_message += "\n\tepochs_per_round: {}".format(epochs_per_round) + self.hparam_dict = {} + self.hparam_dict['learning_rate'] = learning_rate + self.hparam_dict['epochs_per_round'] = epochs_per_round - logger.info("Hyper-parameters for round {}:{}".format(self.current_round, hparam_message)) + print(f'Hyperparameters for round {self.current_round}: {self.hparam_dict}') - # cache each tensor in the aggregator tensor_db - self.hparam_dict = {} - tk = TensorKey(tensor_name='learning_rate', - origin=self.uuid, - round_number=self.current_round, - report=False, - tags=('hparam', 'model')) - self.hparam_dict[tk] = np.array(learning_rate) - tk = TensorKey(tensor_name='epochs_per_round', - origin=self.uuid, - round_number=self.current_round, - report=False, - tags=('hparam', 'model')) - self.hparam_dict[tk] = np.array(epochs_per_round) - - - - # times_per_collaborator = compute_times_per_collaborator(collaborator_names, - # training_collaborators, - # epochs_per_round, - # collaborator_data_loaders, - # collaborator_time_stats, - # round_num) - - - if self.current_round == 1: - logger.info('[Next Step] : Initializing collaborators') - self.next(self.initialize_colls, foreach='collaborators') - else: - logger.info('[Next Step] : Aggregated model validation') - self.next(self.aggregated_model_validation, foreach='collaborators') + # pick collaborators to train for the round + self.training_collaborators = self.choose_training_collaborators(self.collaborator_names, + tensrdb._iterate(), + self.current_round, + self.collaborators_chosen_each_round, + self.collaborator_times_per_round) + logger.info('Collaborators chosen to train for round {}:\n\t{}'.format(self.current_round, self.training_collaborators)) + # save the collaborators chosen this round + self.collaborators_chosen_each_round[self.current_round] = self.training_collaborators + self.next(self.initialize_colls, foreach='training_collaborators') @collaborator def initialize_colls(self): - logger.info(f'Initializing collaborator {self.input}') if isinstance(self.gandlf_config, str) and os.path.exists(self.gandlf_config): gandlf_conf = yaml.safe_load(open(self.gandlf_config, "r")) logger.info(gandlf_conf) gandlf_conf = ConfigManager(self.gandlf_config) + self.fets_model.device = self.device + logger.info(f'Initializing collaborator {self.input}') ( model, optimizer, @@ -192,54 +197,84 @@ def initialize_colls(self): scheduler, params, ) = create_pytorch_objects( - gandlf_conf, train_csv=self.train_csv, val_csv=self.val_csv, device=self.device + gandlf_conf, train_csv=self.train_csv_path, val_csv=self.val_csv_path, device=self.device ) - self.model = model - self.optimizer = optimizer - self.scheduler = scheduler - self.params = params - self.device = self.device - self.train_loader = train_loader - self.val_loader = val_loader - self.epochs = 1 - self.next(self.aggregated_model_validation) + self.fets_model.model = model + self.fets_model.optimizer = optimizer + self.fets_model.scheduler = scheduler + self.fets_model.params = params + + logger.info(f'Initializing dataloaders for collaborator {self.input}') + collaborator_data_loaders[self.input] = FeTSDataLoader(train_loader, val_loader) - # @collaborator - # def init_tensors(self): - # logger.info(f'Initializing tensors for collaborator {self.input}') - # coll_tensor_dict = self.fets_model.get_tensor_dict(self.model) - # # for key, value in coll_tensor_dict.items(): - # # print(f'Adding tensor {key}') - # # print(f'Value of tensor {key} is {value}') + self.times_per_collaborator[self.input] = compute_times_per_collaborator(self.input, + self.training_collaborators, + self.hparam_dict['epochs_per_round'], + collaborator_data_loaders[self.input], + self.collaborator_time_stats, + self.current_round) + + print(f'Times per collaborator for round {self.current_round}: {self.times_per_collaborator[self.input]}') - # self.fets_model.rebuild_model(self.model, self.current_round, coll_tensor_dict, "cpu") - # self.next(self.aggregated_model_validation) + if self.restored is False: + self.agg_tensor_dict = self.fets_model.get_tensor_dict() + self.next(self.aggregated_model_validation) @collaborator def aggregated_model_validation(self): + validation_start_time = time.time() + logger.info(f'Performing aggregated model validation for collaborator {self.input}') - self.agg_valid_dict, _ = self.fets_model.validate(self.model, self.input, self.current_round, self.val_loader, self.params, self.scheduler, apply="global") - #logger.info(f'{self.input} value of {self.agg_valid_dict.keys()}') + input_tensor_dict = deepcopy(self.agg_tensor_dict) + val_loader = collaborator_data_loaders[self.input].get_valid_loader() + self.fets_model.rebuild_model(self.current_round, input_tensor_dict) + self.agg_valid_dict, _ = self.fets_model.validate(self.input, self.current_round, val_loader, apply="global") + + validation_end_time = time.time() + self.aggregated_model_validation_time = validation_end_time - validation_start_time + print(f'Collaborator {self.input} took {self.aggregated_model_validation_time} seconds for agg validation') self.next(self.train) @collaborator def train(self): + training_start_time = time.time() + logger.info(f'Performing training for collaborator {self.input}') - self.global_output_tensor_dict, local_output_tensor_dict = self.fets_model.train(self.model, self.input, self.current_round, self.train_loader, self.params, self.optimizer, self.hparam_dict, self.epochs) - #logger.info(f'{self.input} value of {self.global_output_tensor_dict.keys()}') + train_loader = collaborator_data_loaders[self.input].get_train_loader() + self.global_output_tensor_dict, _ = self.fets_model.train(self.input, self.current_round, self.hparam_dict, train_loader) + + training_end_time = time.time() + self.training_time = training_end_time - training_start_time + print(f'Collaborator {self.input} took {self.training_time} seconds for training') self.next(self.local_model_validation) @collaborator def local_model_validation(self): + validation_start_time = time.time() + logger.info(f'Performing local model validation for collaborator {self.input}') - self.local_valid_dict, _ = self.fets_model.validate(self.model, self.input, self.current_round, self.val_loader, self.params, self.scheduler, apply="local") - #logger.info(f'Doing local model validation for collaborator {self.input}:' + f' {self.local_output_dict}') + val_loader = collaborator_data_loaders[self.input].get_valid_loader() + self.local_valid_dict, _ = self.fets_model.validate(self.input, self.current_round, val_loader, apply="local") + + validation_end_time = time.time() + self.local_model_validation_time = validation_end_time - validation_start_time + print(f'Collaborator {self.input} took {self.local_model_validation_time} seconds for local validation') self.next(self.join) + # @collaborator + # def testing_collaborator(self): + # logger.info(f'Testing collaborator {self.input}') + # self.next(self.join) + + # @aggregator + # def join(self, inputs): + # self.next(self.internal_loop) + @aggregator def join(self, inputs): - agg_tensor_db = TensorDB() - tensor_keys_per_col = {} + join_start_time = time.time() + self.aggregation_type.set_state_data_for_round(self.collaborators_chosen_each_round, self.collaborator_times_per_round) + agg_tensor_db = TensorDB() # [TODO] As tensordb cannot be used as FLSpec Attribute, should we load this tensor_db from agg_tensor_dict before checkpointing ? for idx, col in enumerate(inputs): logger.info(f'Aggregating results for {idx}') agg_out_dict = {} @@ -250,16 +285,15 @@ def join(self, inputs): # Store the keys for each collaborator tensor_keys = [] for tensor_key in agg_out_dict.keys(): - #logger.info(f'Adding tensor key {tensor_key} to the dict of tensor keys') tensor_keys.append(tensor_key) - tensor_keys_per_col[str(idx + 1)] = tensor_keys + self.tensor_keys_per_col[str(idx + 1)] = tensor_keys # [TODO] : Aggregation Function -> Collaborator Weight Dict + self.agg_tensor_dict = {} collaborator_weight_dict = {'1': 0.3333333333333333, '2': 0.3333333333333333, '3': 0.3333333333333333} - for col,tensor_keys in tensor_keys_per_col.items(): + for col,tensor_keys in self.tensor_keys_per_col.items(): for tensor_key in tensor_keys: tensor_name, origin, round_number, report, tags = tensor_key - #logger.info(f'Aggregating tensor {tensor_name} from collaborator {origin} for round {round_number}') new_tags = change_tags(tags, remove_field=col) agg_tensor_key = TensorKey(tensor_name, origin, round_number, report, new_tags) # Aggregates the tensor values for the tensor key and stores it in tensor_db @@ -268,27 +302,16 @@ def join(self, inputs): collaborator_weight_dict, aggregation_function=self.aggregation_type, ) + if 'trained' in tags and tensor_name not in self.agg_tensor_dict: + logger.info(f'Fetched tensor {tensor_name} from tensor_db for round {round_number}') + self.agg_tensor_dict[tensor_name] = agg_tensor_db.get_tensor_from_cache(agg_tensor_key) #logger.info(f'Aggregated tensor value for tensor key {agg_tensor_key}') - agg_tensor_dict = {} - for col,tensor_keys in tensor_keys_per_col.items(): - for tensor_key in tensor_keys: - tensor_name, origin, round_number, report, tags = tensor_key - #logger.info(f'Training tensor_key {tensor_key}') - if 'trained' in tags: - #logger.info(f'Fetching tensor {tensor_name} from tensor_db for round {round_number}') - new_tags = change_tags(tags, remove_field=col) - new_tensor_key = TensorKey(tensor_name, origin, round_number, report, new_tags) - if tensor_name not in agg_tensor_dict: - agg_tensor_dict[tensor_name] = agg_tensor_db.get_tensor_from_cache(new_tensor_key) - #logger.info(f'Fetched tensor {tensor_name} from tensor_db for round {round_number}') - # Rebuild the model with the aggregated tensor_dict - for input in inputs: - logger.info(f'Updating model for collaborator {input}') - local_tensor_dict = deepcopy(agg_tensor_dict) - self.fets_model.rebuild_model(input.model, self.current_round, local_tensor_dict, "cpu") - local_tensor_dict = None + # for input in inputs: + # if self.agg_tensor_dict is not None: + # local_tensor_dict = deepcopy(self.agg_tensor_dict) + # input.fets_model.rebuild_model(self.current_round, local_tensor_dict) round_loss = get_metric('valid_loss', self.current_round, agg_tensor_db) round_dice = get_metric('valid_dice', self.current_round, agg_tensor_db) @@ -302,17 +325,18 @@ def join(self, inputs): hausdorff95_label_2 = get_metric('valid_hd95_per_label_2', self.current_round, agg_tensor_db) hausdorff95_label_4 = get_metric('valid_hd95_per_label_4', self.current_round, agg_tensor_db) - # times_list = [(t, col) for col, t in times_per_collaborator.items()] - # times_list = sorted(times_list) + #times_list = [(t, col) for col, t in self.times_per_collaborator.items()] + #times_list = sorted(times_list) # the round time is the max of the times_list - # round_time = max([t for t, _ in times_list]) - # self.total_simulated_time += round_time + round_time = 1 + #round_time = max([t for t, _ in times_list]) + #self.total_simulated_time += round_time if self.best_dice < round_dice: self.best_dice = round_dice # Set the weights for the final model - if self.current_round == 0: + if self.current_round == 1: # here the initial model was validated (temp model does not exist) logger.info(f'Skipping best model saving to disk as it is a random initialization.') elif not os.path.exists(f'checkpoint/{self.checkpoint_folder}/temp_model.pkl'): @@ -324,7 +348,7 @@ def join(self, inputs): ## CONVERGENCE METRIC COMPUTATION # update the auc score - self.best_dice_over_time_auc += self.best_dice * self.current_round + self.best_dice_over_time_auc += self.best_dice * round_time # project the auc score as remaining time * best dice # this projection assumes that the current best score is carried forward for the entire week @@ -336,7 +360,7 @@ def join(self, inputs): summary += "\n\tSimulation Time: {} minutes".format(round(self.total_simulated_time / 60, 2)) summary += "\n\t(Projected) Convergence Score: {}".format(projected_auc) summary += "\n\tRound Loss: {}".format(round_loss) - summary += "\n\Round Dice: {}".format(round_dice) + summary += "\n\tRound Dice: {}".format(round_dice) summary += "\n\tDICE Label 0: {}".format(dice_label_0) summary += "\n\tDICE Label 1: {}".format(dice_label_1) summary += "\n\tDICE Label 2: {}".format(dice_label_2) @@ -385,9 +409,31 @@ def join(self, inputs): # save the most recent aggregated model in native format to be copied over as best when appropriate # (note this model has not been validated by the collaborators yet) - # self.fets_model.rebuild_model(round_num, aggregator.last_tensor_dict, validation=True) + # Global FeTS Model may be unititialized in the first round + if self.fets_model.model is None: + logger.info(f'Global model is not initialized. Initializing with the first round model') + self.fets_model.model = inputs[0].fets_model.model + self.fets_model.optimizer = inputs[0].fets_model.optimizer + self.fets_model.scheduler = inputs[0].fets_model.scheduler + self.fets_model.params = inputs[0].fets_model.params + + # Rebuild the model with the aggregated tensor_dict + local_tensor_dict = deepcopy(self.agg_tensor_dict) + self.fets_model.rebuild_model(self.current_round, local_tensor_dict) self.fets_model.save_native(f'checkpoint/{self.checkpoint_folder}/temp_model.pkl') + # [TOOD] : Remove below logging + join_end_time = time.time() + self.join_time = join_end_time - join_start_time + print(f'took {self.join_time} seconds for join_time') + + total_time = 0 + for input in inputs: + print(f"TIme taken by {input} : {input.aggregated_model_validation_time + input.training_time + input.local_model_validation_time + self.join_time}") + total_time += input.aggregated_model_validation_time + input.training_time + input.local_model_validation_time + self.join_time + + print(f'took {total_time} seconds for total training and valid') + self.next(self.internal_loop) @aggregator @@ -399,8 +445,10 @@ def internal_loop(self): self.next(self.end) else: self.current_round += 1 - self.next(self.fetch_hyper_parameters) + self.next(self.fetch_parameters_for_colls) @aggregator def end(self): - logger.info('This is the end of the flow') \ No newline at end of file + logger.info('********************************') + logger.info('End of flow') + logger.info('********************************') \ No newline at end of file diff --git a/Task_1/fets_challenge/gandlf_config.yaml b/Task_1/fets_challenge/gandlf_config.yaml index b9fbd1e..e4845e5 100644 --- a/Task_1/fets_challenge/gandlf_config.yaml +++ b/Task_1/fets_challenge/gandlf_config.yaml @@ -17,7 +17,7 @@ output_dir: '.' metrics: - dice - dice_per_label -- hd95_per_label +#- hd95_per_label model: amp: true architecture: resunet @@ -31,7 +31,6 @@ model: final_layer: softmax ignore_label_validation: null norm_type: instance - num_channels: 4 nested_training: testing: 1 validation: -5 @@ -63,4 +62,4 @@ version: minimum: 0.0.14 weighted_loss: true modality: rad -problem_type: classification \ No newline at end of file +problem_type: segmentation \ No newline at end of file diff --git a/Task_1/fets_challenge/time_utils.py b/Task_1/fets_challenge/time_utils.py new file mode 100644 index 0000000..2fc1288 --- /dev/null +++ b/Task_1/fets_challenge/time_utils.py @@ -0,0 +1,192 @@ +from collections import namedtuple +from logging import getLogger +import warnings + +import numpy as np +import pandas as pd + +## COLLABORATOR TIMING DISTRIBUTIONS +# These data are derived from the actual timing information in the real-world FeTS information +# They reflect a subset of the institutions involved. +# Tuples are (mean, stddev) in seconds + +# time to train one patient +TRAINING_TIMES = [(6.710741331207654, 0.8726112813698301), + (2.7343911917098445, 0.023976155580152165), + (3.173076923076923, 0.04154320960517865), + (6.580379746835443, 0.22461890673025595), + (3.452046783625731, 0.47136389322749656), + (6.090788461700995, 0.08541499003440205), + (3.206933911159263, 0.1927067498514361), + (3.3358208955223883, 0.2950567549663471), + (4.391304347826087, 0.37464538999161057), + (6.324805129494594, 0.1413885448869165), + (7.415133477633478, 1.1198881747151301), + (5.806410256410255, 0.029926699295169234), + (6.300204918032787, 0.24932319729777577), + (5.886317567567567, 0.018627858809133223), + (5.478184991273998, 0.04902740607167421), + (6.32440159574468, 0.15838847558954935), + (20.661918328585003, 6.085405543890793), + (3.197901325478645, 0.07049966132127056), + (6.523963730569948, 0.2533266757118492), + (2.6540077569489338, 0.025503099659276184), + (1.8025746183640918, 0.06805805332403576)] + +# time to validate one patient +VALIDATION_TIMES = [(23.129135113591072, 2.5975116854269507), + (12.965544041450777, 0.3476297824941513), + (14.782051282051283, 0.5262660449172765), + (16.444936708860762, 0.42613177203005187), + (15.728654970760235, 4.327559980390658), + (12.946098012884802, 0.2449927822869217), + (15.335950126991456, 1.1587597276712558), + (24.024875621890544, 3.087348297794285), + (38.361702127659576, 2.240113332190875), + (16.320970580839827, 0.4995108101783225), + (30.805555555555554, 3.1836337269688237), + (12.100899742930592, 0.41122386959584895), + (13.099897540983607, 0.6693132795197584), + (9.690202702702702, 0.17513593019922968), + (10.06980802792321, 0.7947848617875114), + (14.605333333333334, 0.6012305898922827), + (36.30294396961064, 9.24123672148819), + (16.9130060292851, 0.7452868131028928), + (40.244078460399706, 3.7700993678269037), + (13.161603102779575, 0.1975347910041472), + (11.222161868549701, 0.7021223062972527)] + +# time to download the model +DOWNLOAD_TIMES = [(112.42869743589742, 14.456734719659513), + (117.26870618556701, 12.549951446132013), + (13.059666666666667, 4.8700489616521185), + (47.50220338983051, 14.92128656898884), + (162.27864210526315, 32.562113378948396), + (99.46072058823529, 13.808785580783224), + (33.6347090909091, 25.00299299660141), + (216.25489393939392, 19.176465340447848), + (217.4117230769231, 20.757673955585453), + (98.38857297297298, 13.205048376808929), + (88.87509473684209, 23.152936862511545), + (66.96994262295081, 16.682497150763503), + (36.668852040816326, 13.759109844677598), + (149.31716326530614, 26.018185409516104), + (139.847, 80.04755583050091), + (54.97624444444445, 16.645170929316794)] + +# time to upload the model +UPLOAD_TIMES = [(192.28497409326425, 21.537450985376967), + (194.60103626943004, 24.194406902237056), + (20.0, 0.0), + (52.43859649122807, 5.047207127169352), + (182.82417582417582, 14.793519078918195), + (143.38059701492537, 7.910690646792151), + (30.695652173913043, 9.668122350904568), + (430.95360824742266, 54.97790476867727), + (348.3174603174603, 30.14347985347738), + (141.43715846994536, 5.271340868190727), + (158.7433155080214, 64.87526819391198), + (81.06086956521739, 7.003461202082419), + (32.60621761658031, 5.0418315093016615), + (281.5388601036269, 90.60338778706557), + (194.34065934065933, 36.6519776778435), + (66.53787878787878, 16.456280602190606)] + +logger = getLogger(__name__) +# This catches PyTorch UserWarnings for CPU +warnings.filterwarnings("ignore", category=UserWarning) + +CollaboratorTimeStats = namedtuple('CollaboratorTimeStats', + [ + 'validation_mean', + 'training_mean', + 'download_speed_mean', + 'upload_speed_mean', + 'validation_std', + 'training_std', + 'download_speed_std', + 'upload_speed_std', + ] + ) + +def gen_collaborator_time_stats(collaborator_names, seed=0xFEEDFACE): + + np.random.seed(seed) + + stats = {} + for col in collaborator_names: + ml_index = np.random.randint(len(VALIDATION_TIMES)) + validation = VALIDATION_TIMES[ml_index] + training = TRAINING_TIMES[ml_index] + net_index = np.random.randint(len(DOWNLOAD_TIMES)) + download = DOWNLOAD_TIMES[net_index] + upload = UPLOAD_TIMES[net_index] + + stats[col] = CollaboratorTimeStats(validation_mean=validation[0], + training_mean=training[0], + download_speed_mean=download[0], + upload_speed_mean=upload[0], + validation_std=validation[1], + training_std=training[1], + download_speed_std=download[1], + upload_speed_std=upload[1]) + return stats + +def compute_times_per_collaborator(collaborator_name, + training_collaborators, + epochs_per_round, + collaborator_data, + collaborator_time_stats, + round_num): + np.random.seed(round_num) + #times = {} + #for col in collaborator_names: + time = 0 + + print(f'Computing time for collaborator {collaborator_name}') + # stats + stats = collaborator_time_stats[collaborator_name] + + # download time + download_time = np.random.normal(loc=stats.download_speed_mean, + scale=stats.download_speed_std) + download_time = max(1, download_time) + time += download_time + + # data loader + #data = collaborator_data[collaborator_name] + + # validation time + data_size = collaborator_data.get_valid_data_size() + validation_time_per = np.random.normal(loc=stats.validation_mean, + scale=stats.validation_std) + validation_time_per = max(1, validation_time_per) + time += data_size * validation_time_per + + # only if training + if collaborator_name in training_collaborators: + # training time + data_size = collaborator_data.get_train_data_size() + training_time_per = np.random.normal(loc=stats.training_mean, + scale=stats.training_std) + training_time_per = max(1, training_time_per) + + # training data size depends on the hparams + data_size *= epochs_per_round + time += data_size * training_time_per + + # if training, we also validate the locally updated model + data_size = collaborator_data.get_valid_data_size() + validation_time_per = np.random.normal(loc=stats.validation_mean, + scale=stats.validation_std) + validation_time_per = max(1, validation_time_per) + time += data_size * validation_time_per + + # upload time + upload_time = np.random.normal(loc=stats.upload_speed_mean, + scale=stats.upload_speed_std) + upload_time = max(1, upload_time) + time += upload_time + + #times[col] = time + return time \ No newline at end of file From f2f4e003ef90c59b098e755913a8be06794c5d03 Mon Sep 17 00:00:00 2001 From: Tarunkumar Banda Date: Thu, 13 Mar 2025 08:06:28 -0700 Subject: [PATCH 08/16] Added support for inference Added workspace directory changes --- Task_1/FeTS_Challenge.py | 88 +++++++++++-------- Task_1/fets_challenge/experiment.py | 15 ++-- Task_1/fets_challenge/fets_challenge_model.py | 53 ++++++++++- Task_1/fets_challenge/fets_flow.py | 20 +++-- Task_1/fets_challenge/inference.py | 57 +++++++----- 5 files changed, 155 insertions(+), 78 deletions(-) diff --git a/Task_1/FeTS_Challenge.py b/Task_1/FeTS_Challenge.py index 3cd2578..5183bae 100644 --- a/Task_1/FeTS_Challenge.py +++ b/Task_1/FeTS_Challenge.py @@ -14,7 +14,10 @@ import os import numpy as np - +from fets_challenge import model_outputs_to_disc +from pathlib import Path +import shutil +import glob from fets_challenge import run_challenge_experiment @@ -526,7 +529,7 @@ def FedAvgM_Selection(local_tensors, institution_split_csv_filename = 'small_split.csv' # change this to point to the parent directory of the data -brats_training_data_parent_dir = '/home/ad_kagrawa2/Data/MICCAI_FeTS2022_TrainingData' +brats_training_data_parent_dir = '/home/ad_tbanda/code/fedAI/MICCAI_FeTS2022_TrainingData' # increase this if you need a longer history for your algorithms # decrease this if you need to reduce system RAM consumption @@ -544,15 +547,37 @@ def FedAvgM_Selection(local_tensors, save_checkpoints = True # path to previous checkpoint folder for experiment that was stopped before completion. -# Checkpoints are stored in ~/.local/workspace/checkpoint, and you should provide the experiment directory +# Checkpoints are stored in checkpoint, and you should provide the experiment directory # relative to this path (i.e. 'experiment_1'). Please note that if you restore from a checkpoint, # and save checkpoint is set to True, then the checkpoint you restore from will be subsequently overwritten. # restore_from_checkpoint_folder = 'experiment_1' restore_from_checkpoint_folder = None +# infer participant home folder +home = str(Path.home()) + +#Creating working directory and copying the required csv files +working_directory= os.path.join(home, '.local/workspace/') +Path(working_directory).mkdir(parents=True, exist_ok=True) +source_dir=f'{Path.cwd()}/openfl-workspace/fets_challenge_workspace/' +pattern = "*.csv" +source_pattern = os.path.join(source_dir, pattern) +files_to_copy = glob.glob(source_pattern) + +if not files_to_copy: + print(f"No files found matching pattern: {pattern}") + +for source_file in files_to_copy: + destination_file = os.path.join(working_directory, os.path.basename(source_file)) + shutil.copy2(source_file, destination_file) +try: + os.chdir(working_directory) + print("Directory changed to:", os.getcwd()) +except FileNotFoundError: + print("Error: Directory not found.") +except PermissionError: + print("Error: Permission denied") -# the scores are returned in a Pandas dataframe -#scores_dataframe, checkpoint_folder = run_challenge_experiment( aggregation_function=aggregation_function, choose_training_collaborators=choose_training_collaborators, @@ -567,48 +592,37 @@ def FedAvgM_Selection(local_tensors, restore_from_checkpoint_folder = restore_from_checkpoint_folder) -#scores_dataframe - - # ## Produce NIfTI files for best model outputs on the validation set # Now we will produce model outputs to submit to the leader board. # # At the end of every experiment, the best model (according to average ET, TC, WT DICE) -# is saved to disk at: ~/.local/workspace/checkpoint/\/best_model.pkl, +# is saved to disk at: checkpoint/\/best_model.pkl, # where \ is the one printed to stdout during the start of the # experiment (look for the log entry: "Created experiment folder experiment_##..." above). +# you will need to specify the correct experiment folder and the parent directory for +# the data you want to run inference over (assumed to be the experiment that just completed) -# from fets_challenge import model_outputs_to_disc -# from pathlib import Path - -# # infer participant home folder -# home = str(Path.home()) - -# # you will need to specify the correct experiment folder and the parent directory for -# # the data you want to run inference over (assumed to be the experiment that just completed) - -# #checkpoint_folder='experiment_1' -# #data_path = -# data_path = '/home/ad_kagrawa2/Data/MICCAI_FeTS2022_ValidationData' -# validation_csv_filename = 'validation.csv' - -# # you can keep these the same if you wish -# final_model_path = os.path.join(home, '.local/workspace/checkpoint', checkpoint_folder, 'best_model.pkl') +#checkpoint_folder='experiment_1' +#data_path = +data_path = '/home/ad_tbanda/code/fedAI/MICCAI_FeTS2022_ValidationData' +validation_csv_filename = 'validation.csv' -# # If the experiment is only run for a single round, use the temp model instead -# if not Path(final_model_path).exists(): -# final_model_path = os.path.join(home, '.local/workspace/checkpoint', checkpoint_folder, 'temp_model.pkl') +# you can keep these the same if you wish +final_model_path = os.path.join(working_directory, 'checkpoint', checkpoint_folder, 'best_model.pkl') -# outputs_path = os.path.join(home, '.local/workspace/checkpoint', checkpoint_folder, 'model_outputs') +# If the experiment is only run for a single round, use the temp model instead +if not Path(final_model_path).exists(): + final_model_path = os.path.join(working_directory, 'checkpoint', checkpoint_folder, 'temp_model.pkl') +outputs_path = os.path.join(working_directory, 'checkpoint', checkpoint_folder, 'model_outputs') -# # Using this best model, we can now produce NIfTI files for model outputs -# # using a provided data directory +# Using this best model, we can now produce NIfTI files for model outputs +# using a provided data directory -# model_outputs_to_disc(data_path=data_path, -# validation_csv=validation_csv_filename, -# output_path=outputs_path, -# native_model_path=final_model_path, -# outputtag='', -# device=device) +model_outputs_to_disc(data_path=data_path, + validation_csv=validation_csv_filename, + output_path=outputs_path, + native_model_path=final_model_path, + outputtag='', + device=device) \ No newline at end of file diff --git a/Task_1/fets_challenge/experiment.py b/Task_1/fets_challenge/experiment.py index 9a1142f..310c46a 100644 --- a/Task_1/fets_challenge/experiment.py +++ b/Task_1/fets_challenge/experiment.py @@ -17,7 +17,6 @@ from openfl.utilities import TensorKey from openfl.protocols import utils import openfl.native as fx -from openfl.databases import TensorDB import torch from .gandlf_csv_adapter import construct_fedsim_csv, extract_csv_partitions @@ -52,7 +51,8 @@ def aggregator_private_attributes( "training_hyper_parameters_for_round": training_hyper_parameters_for_round, "max_simulation_time": MAX_SIMULATION_TIME, "restore_from_checkpoint_folder": restore_from_checkpoint_folder, - "save_checkpoints":save_checkpoints + "save_checkpoints":save_checkpoints, + "checkpoint_folder":"" } @@ -85,22 +85,21 @@ def run_challenge_experiment(aggregation_function, file = Path(__file__).resolve() root = file.parent.resolve() # interface root, containing command modules work = Path.cwd().resolve() - gandlf_config_path = os.path.join(root, 'gandlf_config.yaml') path.append(str(root)) path.insert(0, str(work)) # create gandlf_csv and get collaborator names gandlf_csv_path = os.path.join(work, 'gandlf_paths.csv') - # split_csv_path = os.path.join(work, institution_split_csv_filename) + split_csv_path = os.path.join(work, institution_split_csv_filename) collaborator_names = construct_fedsim_csv(brats_training_data_parent_dir, - institution_split_csv_filename, + split_csv_path, 0.8, gandlf_csv_path) print(f'Collaborator names for experiment : {collaborator_names}') - aggregation_wrapper = CustomAggregationWrapper(aggregation_function) # ---> [TODO] Set the aggregation function in the workflow + aggregation_wrapper = CustomAggregationWrapper(aggregation_function) # [TODO] [Workflow - API] Need to check db_store rounds # overrides = { @@ -202,6 +201,4 @@ def run_challenge_experiment(aggregation_function, # tensor_pipe=tensor_pipe) # utils.dump_proto(model_proto=model_snap, fpath=init_state_path) - - #return pd.DataFrame.from_dict(experiment_results), checkpoint_folder - return None \ No newline at end of file + return aggregator.private_attributes["checkpoint_folder"] \ No newline at end of file diff --git a/Task_1/fets_challenge/fets_challenge_model.py b/Task_1/fets_challenge/fets_challenge_model.py index 13a1d2f..dcd8c5e 100644 --- a/Task_1/fets_challenge/fets_challenge_model.py +++ b/Task_1/fets_challenge/fets_challenge_model.py @@ -133,9 +133,6 @@ def validate(self, col_name, round_num, val_loader, use_tqdm=False, **kwargs): mode="validation", ) - #self.logger.info(epoch_valid_loss) - #self.logger.info(epoch_valid_metric) - print(f"Validation loss: {epoch_valid_loss}") print(f"Validation metric: {epoch_valid_metric}") @@ -162,6 +159,56 @@ def validate(self, col_name, round_num, val_loader, use_tqdm=False, **kwargs): # Empty list represents metrics that should only be stored locally return output_tensor_dict, {} + def inference(self, col_name, round_num, val_loader, use_tqdm=False, **kwargs): + """Inference. + Run validation of the model on the local data. + Args: + col_name (str): Name of the collaborator. + round_num (int): Current round number. + input_tensor_dict (dict): Required input tensors (for model). + use_tqdm (bool, optional): Use tqdm to print a progress bar. + Defaults to False. + **kwargs: Key word arguments passed to GaNDLF main_run. + + Returns: + output_tensor_dict (dict): Tensors to send back to the aggregator. + {} (dict): Tensors to maintain in the local TensorDB. + """ + #self.rebuild_model(round_num, input_tensor_dict, validation=True) + self.model.eval() + + epoch_inference_loss, epoch_inference_metric = validate_network( + self.model, + val_loader, + self.scheduler, + self.params, + round_num, + mode="inference", + ) + + origin = col_name + suffix = 'inference' + if kwargs['apply'] == 'local': + suffix += '_local' + else: + suffix += '_agg' + tags = ('metric', suffix) + + output_tensor_dict = {} + output_tensor_dict[TensorKey('inference_loss', origin, round_num, True, tags)] = np.array(epoch_inference_loss) + for k, v in epoch_inference_metric.items(): + if isinstance(v, str): + v = list(map(float, v.split('_'))) + + if np.array(v).size == 1: + output_tensor_dict[TensorKey(f'inference_{k}', origin, round_num, True, tags)] = np.array(v) + else: + for idx,label in enumerate([0,1,2,4]): + output_tensor_dict[TensorKey(f'inference_{k}_{label}', origin, round_num, True, tags)] = np.array(v[idx]) + + # Empty list represents metrics that should only be stored locally + return output_tensor_dict, {} + def train(self, col_name, round_num, hparams_dict, train_loader, use_tqdm=False, **kwargs): """Train batches. Train the model on the requested number of batches. diff --git a/Task_1/fets_challenge/fets_flow.py b/Task_1/fets_challenge/fets_flow.py index 748fd38..d1a5d95 100644 --- a/Task_1/fets_challenge/fets_flow.py +++ b/Task_1/fets_challenge/fets_flow.py @@ -74,7 +74,6 @@ def __init__(self, fets_model, rounds=3 , device="cpu", **kwargs): self.agg_tensor_dict = {} self.tensor_keys_per_col = {} self.restored = False - self.checkpoint_folder = "" self.experiment_results = { 'round':[], @@ -203,7 +202,6 @@ def initialize_colls(self): self.fets_model.optimizer = optimizer self.fets_model.scheduler = scheduler self.fets_model.params = params - logger.info(f'Initializing dataloaders for collaborator {self.input}') collaborator_data_loaders[self.input] = FeTSDataLoader(train_loader, val_loader) @@ -274,23 +272,33 @@ def local_model_validation(self): def join(self, inputs): join_start_time = time.time() self.aggregation_type.set_state_data_for_round(self.collaborators_chosen_each_round, self.collaborator_times_per_round) - agg_tensor_db = TensorDB() # [TODO] As tensordb cannot be used as FLSpec Attribute, should we load this tensor_db from agg_tensor_dict before checkpointing ? + agg_tensor_db = TensorDB() # [TODO] As tensordb cannot be used as FLSpec Attribute, should we load this tensor_db from agg_tensor_dict before checkpointing ? + collaborator_task_weight = {} for idx, col in enumerate(inputs): logger.info(f'Aggregating results for {idx}') agg_out_dict = {} cache_tensor_dict(col.local_valid_dict, agg_tensor_db, idx, agg_out_dict) cache_tensor_dict(col.agg_valid_dict, agg_tensor_db, idx, agg_out_dict) cache_tensor_dict(col.global_output_tensor_dict, agg_tensor_db, idx, agg_out_dict) + collaborator_task_weight[col.input] = collaborator_data_loaders[col.input].get_train_data_size() # Store the keys for each collaborator tensor_keys = [] for tensor_key in agg_out_dict.keys(): tensor_keys.append(tensor_key) self.tensor_keys_per_col[str(idx + 1)] = tensor_keys - # [TODO] : Aggregation Function -> Collaborator Weight Dict self.agg_tensor_dict = {} - collaborator_weight_dict = {'1': 0.3333333333333333, '2': 0.3333333333333333, '3': 0.3333333333333333} + # The collaborator data sizes for that task + collaborator_weights_unnormalized = { + col.input: collaborator_task_weight[col.input] + for _, col in enumerate(inputs) + } + weight_total = sum(collaborator_task_weight.values()) + collaborator_weight_dict = { + k: v / weight_total for k, v in collaborator_weights_unnormalized.items() + } + print(f'Calculated Collaborator weights: {collaborator_weight_dict}') for col,tensor_keys in self.tensor_keys_per_col.items(): for tensor_key in tensor_keys: tensor_name, origin, round_number, report, tags = tensor_key @@ -344,7 +352,7 @@ def join(self, inputs): else: # here the temp model was the one validated shutil.copyfile(src=f'checkpoint/{self.checkpoint_folder}/temp_model.pkl',dst=f'checkpoint/{self.checkpoint_folder}/best_model.pkl') - logger.info(f'Saved model with best average binary DICE: {self.best_dice} to ~/.local/workspace/checkpoint/{self.checkpoint_folder}/best_model.pkl') + logger.info(f'Saved model with best average binary DICE: {self.best_dice} to checkpoint/{self.checkpoint_folder}/best_model.pkl') ## CONVERGENCE METRIC COMPUTATION # update the auc score diff --git a/Task_1/fets_challenge/inference.py b/Task_1/fets_challenge/inference.py index 48503a6..4cab1b5 100644 --- a/Task_1/fets_challenge/inference.py +++ b/Task_1/fets_challenge/inference.py @@ -15,6 +15,9 @@ import openfl.native as fx from .gandlf_csv_adapter import construct_fedsim_csv +from GANDLF.compute.generic import create_pytorch_objects +from GANDLF.config_manager import ConfigManager +from .fets_challenge_model import FeTSChallengeModel logger = getLogger(__name__) @@ -81,7 +84,7 @@ def generate_validation_csv(data_path, validation_csv_filename, working_dir): 0.0, 'placeholder', training_and_validation=False) - validation_csv_dict.to_csv(os.path.join(working_dir, 'valid.csv'),index=False) + validation_csv_dict.to_csv(os.path.join(working_dir, 'validation_paths.csv'),index=False) def replace_initializations(done_replacing, array, mask, replacement_value, initialization_value): """ @@ -206,8 +209,6 @@ def model_outputs_to_disc(data_path, native_model_path, outputtag='', device='cpu'): - - fx.init('fets_challenge_workspace') from sys import path, exit @@ -217,35 +218,45 @@ def model_outputs_to_disc(data_path, path.append(str(root)) path.insert(0, str(work)) - generate_validation_csv(data_path,validation_csv, working_dir=work) - overrides = { - 'task_runner.settings.device': device, - 'task_runner.settings.val_csv': 'valid.csv', - 'task_runner.settings.train_csv': None, - } + # # overwrite datapath value for a single 'InferenceCol' collaborator + # plan.cols_data_paths['InferenceCol'] = data_path - # Update the plan if necessary - plan = fx.update_plan(overrides) - plan.config['task_runner']['settings']['gandlf_config']['save_output'] = True - plan.config['task_runner']['settings']['gandlf_config']['output_dir'] = output_path + # # get the inference data loader + # data_loader = copy(plan).get_data_loader('InferenceCol') - # overwrite datapath value for a single 'InferenceCol' collaborator - plan.cols_data_paths['InferenceCol'] = data_path + # # get the task runner, passing the data loader + # task_runner = copy(plan).get_task_runner(data_loader) - # get the inference data loader - data_loader = copy(plan).get_data_loader('InferenceCol') + gandlf_config_path = os.path.join(root, 'gandlf_config.yaml') + fets_model = FeTSChallengeModel(gandlf_config_path) + val_csv_path = os.path.join(work, 'validation_paths.csv') + gandlf_conf = ConfigManager(gandlf_config_path) + ( + model, + optimizer, + train_loader, + val_loader, + scheduler, + params, + ) = create_pytorch_objects( + gandlf_conf, train_csv=None, val_csv=val_csv_path, device=device + ) + gandlf_conf['output_dir'] = output_path + gandlf_conf['save_output'] = True + fets_model.model = model + fets_model.optimizer = optimizer + fets_model.scheduler = scheduler + fets_model.params = params + fets_model.device = device - # get the task runner, passing the data loader - task_runner = copy(plan).get_task_runner(data_loader) - # Populate model weights device = torch.device(device) - task_runner.load_native(filepath=native_model_path, map_location=device) - task_runner.opt_treatment = 'RESET' + fets_model.load_native(filepath=native_model_path, map_location=device) + #task_runner.opt_treatment = 'RESET' logger.info('Starting inference using data from {}\n'.format(data_path)) - task_runner.inference('aggregator',-1,task_runner.get_tensor_dict(),apply='global') + fets_model.inference('aggregator',-1,val_loader,apply='global') logger.info(f"\nFinished generating predictions to output folder {output_path}") From 8350df921c46e36b3f4be25e602926e4b9b848f9 Mon Sep 17 00:00:00 2001 From: "Agrawal, Kush" Date: Wed, 19 Mar 2025 11:40:10 -0700 Subject: [PATCH 09/16] Update dataloaders and params Signed-off-by: Agrawal, Kush --- Task_1/FeTS_Challenge.py | 32 +- .../{ => config}/gandlf_config.yaml | 0 Task_1/fets_challenge/experiment.py | 77 +- Task_1/fets_challenge/fets_flow.py | 164 ++- Task_1/fets_challenge/inference.py | 5 +- Task_1/fets_challenge/time_utils.py | 7 + .../partitioning_1.csv | 1252 ----------------- .../partitioning_2.csv | 1252 ----------------- .../fets_challenge_workspace/plan/cols.yaml | 3 - .../fets_challenge_workspace/plan/data.yaml | 4 - .../fets_challenge_workspace/plan/defaults | 2 - .../fets_challenge_workspace/plan/plan.yaml | 142 -- .../fets_challenge_workspace/requirements.txt | 2 - .../fets_challenge_workspace/small_split.csv | 11 - .../fets_challenge_workspace/src/__init__.py | 10 - .../src/challenge_assigner.py | 40 - .../src/fets_challenge_model.py | 218 --- .../fets_challenge_workspace/validation.csv | 220 --- Task_1/setup.py | 1 - 19 files changed, 129 insertions(+), 3313 deletions(-) rename Task_1/fets_challenge/{ => config}/gandlf_config.yaml (100%) delete mode 100644 Task_1/openfl-workspace/fets_challenge_workspace/partitioning_1.csv delete mode 100644 Task_1/openfl-workspace/fets_challenge_workspace/partitioning_2.csv delete mode 100644 Task_1/openfl-workspace/fets_challenge_workspace/plan/cols.yaml delete mode 100644 Task_1/openfl-workspace/fets_challenge_workspace/plan/data.yaml delete mode 100644 Task_1/openfl-workspace/fets_challenge_workspace/plan/defaults delete mode 100644 Task_1/openfl-workspace/fets_challenge_workspace/plan/plan.yaml delete mode 100644 Task_1/openfl-workspace/fets_challenge_workspace/requirements.txt delete mode 100644 Task_1/openfl-workspace/fets_challenge_workspace/small_split.csv delete mode 100644 Task_1/openfl-workspace/fets_challenge_workspace/src/__init__.py delete mode 100644 Task_1/openfl-workspace/fets_challenge_workspace/src/challenge_assigner.py delete mode 100644 Task_1/openfl-workspace/fets_challenge_workspace/src/fets_challenge_model.py delete mode 100644 Task_1/openfl-workspace/fets_challenge_workspace/validation.csv diff --git a/Task_1/FeTS_Challenge.py b/Task_1/FeTS_Challenge.py index 5183bae..e4e45ae 100644 --- a/Task_1/FeTS_Challenge.py +++ b/Task_1/FeTS_Challenge.py @@ -521,7 +521,7 @@ def FedAvgM_Selection(local_tensors, # to those you specify immediately above. Changing the below value to False will change # this fact, excluding the three hausdorff measurements. As hausdorff distance is # expensive to compute, excluding them will speed up your experiments. -include_validation_with_hausdorff=False +include_validation_with_hausdorff=False #TODO change it to True # We encourage participants to experiment with partitioning_1 and partitioning_2, as well as to create # other partitionings to test your changes for generalization to multiple partitionings. @@ -529,25 +529,25 @@ def FedAvgM_Selection(local_tensors, institution_split_csv_filename = 'small_split.csv' # change this to point to the parent directory of the data -brats_training_data_parent_dir = '/home/ad_tbanda/code/fedAI/MICCAI_FeTS2022_TrainingData' +brats_training_data_parent_dir = '/home/ad_kagrawa2/Data/MICCAI_FeTS2022_TrainingData' #TODO revert to '/raid/datasets/FeTS22/MICCAI_FeTS2022_TrainingData' before raising the PR # increase this if you need a longer history for your algorithms # decrease this if you need to reduce system RAM consumption -db_store_rounds = 5 +db_store_rounds = 5 #TODO store the tensor db for these many rounds # this is passed to PyTorch, so set it accordingly for your system device = 'cpu' # you'll want to increase this most likely. You can set it as high as you like, # however, the experiment will exit once the simulated time exceeds one week. -rounds_to_train = 1 +rounds_to_train = 2 #TODO change it to 5 before merging # (bool) Determines whether checkpoints should be saved during the experiment. # The checkpoints can grow quite large (5-10GB) so only the latest will be saved when this parameter is enabled save_checkpoints = True # path to previous checkpoint folder for experiment that was stopped before completion. -# Checkpoints are stored in checkpoint, and you should provide the experiment directory +# Checkpoints are stored in ~/.local/workspace/checkpoint, and you should provide the experiment directory # relative to this path (i.e. 'experiment_1'). Please note that if you restore from a checkpoint, # and save checkpoint is set to True, then the checkpoint you restore from will be subsequently overwritten. # restore_from_checkpoint_folder = 'experiment_1' @@ -557,9 +557,9 @@ def FedAvgM_Selection(local_tensors, home = str(Path.home()) #Creating working directory and copying the required csv files -working_directory= os.path.join(home, '.local/workspace/') +working_directory= os.path.join(home, '.local/tarunNew/') Path(working_directory).mkdir(parents=True, exist_ok=True) -source_dir=f'{Path.cwd()}/openfl-workspace/fets_challenge_workspace/' +source_dir=f'{Path.cwd()}/partitioning_data/' pattern = "*.csv" source_pattern = os.path.join(source_dir, pattern) files_to_copy = glob.glob(source_pattern) @@ -596,7 +596,7 @@ def FedAvgM_Selection(local_tensors, # Now we will produce model outputs to submit to the leader board. # # At the end of every experiment, the best model (according to average ET, TC, WT DICE) -# is saved to disk at: checkpoint/\/best_model.pkl, +# is saved to disk at: ~/.local/workspace/checkpoint/checkpoint/\/best_model.pkl, # where \ is the one printed to stdout during the start of the # experiment (look for the log entry: "Created experiment folder experiment_##..." above). @@ -605,24 +605,30 @@ def FedAvgM_Selection(local_tensors, #checkpoint_folder='experiment_1' #data_path = -data_path = '/home/ad_tbanda/code/fedAI/MICCAI_FeTS2022_ValidationData' +data_path = '/home/ad_kagrawa2/Data/MICCAI_FeTS2022_ValidationData' #TODO revert to '/home/brats/MICCAI_FeTS2022_ValidationData' before raising the PR validation_csv_filename = 'validation.csv' # you can keep these the same if you wish -final_model_path = os.path.join(working_directory, 'checkpoint', checkpoint_folder, 'best_model.pkl') +if checkpoint_folder is not None: + final_model_path = os.path.join(working_directory, 'checkpoint', checkpoint_folder, 'best_model.pkl') +else: + exit("No checkpoint folder found. Please provide a valid checkpoint folder. Exiting the experiment without inferencing") # If the experiment is only run for a single round, use the temp model instead if not Path(final_model_path).exists(): final_model_path = os.path.join(working_directory, 'checkpoint', checkpoint_folder, 'temp_model.pkl') +if not Path(final_model_path).exists(): + exit("No model found. Please provide a valid checkpoint folder. Exiting the experiment without inferencing") + outputs_path = os.path.join(working_directory, 'checkpoint', checkpoint_folder, 'model_outputs') -# Using this best model, we can now produce NIfTI files for model outputs +# Using this best model, we can now produce NIfTI files for model outputs # using a provided data directory -model_outputs_to_disc(data_path=data_path, +model_outputs_to_disc(data_path=data_path, validation_csv=validation_csv_filename, - output_path=outputs_path, + output_path=outputs_path, native_model_path=final_model_path, outputtag='', device=device) \ No newline at end of file diff --git a/Task_1/fets_challenge/gandlf_config.yaml b/Task_1/fets_challenge/config/gandlf_config.yaml similarity index 100% rename from Task_1/fets_challenge/gandlf_config.yaml rename to Task_1/fets_challenge/config/gandlf_config.yaml diff --git a/Task_1/fets_challenge/experiment.py b/Task_1/fets_challenge/experiment.py index 310c46a..1a4f18a 100644 --- a/Task_1/fets_challenge/experiment.py +++ b/Task_1/fets_challenge/experiment.py @@ -4,63 +4,39 @@ # Patrick Foley (Intel), Micah Sheller (Intel) import os +from copy import deepcopy import warnings -from collections import namedtuple -from copy import copy -import shutil from logging import getLogger from pathlib import Path - -import numpy as np -import pandas as pd -from openfl.utilities.split import split_tensor_dict_for_holdouts -from openfl.utilities import TensorKey -from openfl.protocols import utils -import openfl.native as fx -import torch +from torch.utils.data import DataLoader from .gandlf_csv_adapter import construct_fedsim_csv, extract_csv_partitions from .custom_aggregation_wrapper import CustomAggregationWrapper -from .checkpoint_utils import setup_checkpoint_folder, save_checkpoint, load_checkpoint from .fets_flow import FeTSFederatedFlow from .fets_challenge_model import FeTSChallengeModel +from .fets_data_loader import FeTSDataLoader -from openfl.experimental.workflow.interface import FLSpec, Aggregator, Collaborator +from openfl.experimental.workflow.interface import Aggregator, Collaborator from openfl.experimental.workflow.runtime import LocalRuntime logger = getLogger(__name__) # This catches PyTorch UserWarnings for CPU warnings.filterwarnings("ignore", category=UserWarning) -# one week -# MINUTE = 60 -# HOUR = 60 * MINUTE -# DAY = 24 * HOUR -# WEEK = 7 * DAY -MAX_SIMULATION_TIME = 7 * 24 * 60 * 60 - def aggregator_private_attributes( - uuid, aggregation_type, collaborator_names, include_validation_with_hausdorff, choose_training_collaborators, - training_hyper_parameters_for_round, restore_from_checkpoint_folder, save_checkpoints): - return {"uuid": uuid, - "aggregation_type" : aggregation_type, + aggregation_type, collaborator_names, db_store_rounds): + return {"aggregation_type" : aggregation_type, "collaborator_names": collaborator_names, - "include_validation_with_hausdorff": include_validation_with_hausdorff, - "choose_training_collaborators": choose_training_collaborators, - "training_hyper_parameters_for_round": training_hyper_parameters_for_round, - "max_simulation_time": MAX_SIMULATION_TIME, - "restore_from_checkpoint_folder": restore_from_checkpoint_folder, - "save_checkpoints":save_checkpoints, - "checkpoint_folder":"" + "checkpoint_folder":None, + "db_store_rounds":db_store_rounds } def collaborator_private_attributes( - index, n_collaborators, gandlf_config, train_csv_path, val_csv_path): + index, gandlf_config, train_csv_path, val_csv_path): return { "index": index, - "n_collaborators": n_collaborators, "gandlf_config": gandlf_config, "train_csv_path": train_csv_path, "val_csv_path": val_csv_path @@ -80,14 +56,10 @@ def run_challenge_experiment(aggregation_function, include_validation_with_hausdorff=True, use_pretrained_model=False): - from sys import path, exit - file = Path(__file__).resolve() root = file.parent.resolve() # interface root, containing command modules work = Path.cwd().resolve() - gandlf_config_path = os.path.join(root, 'gandlf_config.yaml') - path.append(str(root)) - path.insert(0, str(work)) + gandlf_config_path = os.path.join(root, 'config', 'gandlf_config.yaml') # create gandlf_csv and get collaborator names gandlf_csv_path = os.path.join(work, 'gandlf_paths.csv') @@ -101,15 +73,11 @@ def run_challenge_experiment(aggregation_function, aggregation_wrapper = CustomAggregationWrapper(aggregation_function) - # [TODO] [Workflow - API] Need to check db_store rounds + # [TODO] Handle the storing of data in the fets flow (add db_sotre_rounds aggregator private attribute) # overrides = { # 'aggregator.settings.db_store_rounds': db_store_rounds, # } - # [TODO] [Workflow - API] How to update the gandfl_config runtime - # if not include_validation_with_hausdorff: - # plan.config['task_runner']['settings']['fets_config_dict']['metrics'] = ['dice','dice_per_label'] - transformed_csv_dict = extract_csv_partitions(os.path.join(work, 'gandlf_paths.csv')) collaborators = [] @@ -134,7 +102,6 @@ def run_challenge_experiment(aggregation_function, num_gpus=0.0, # arguments required to pass to callable index=idx, - n_collaborators=len(collaborator_names), gandlf_config=gandlf_config_path, train_csv_path=train_csv_path, val_csv_path=val_csv_path @@ -145,14 +112,9 @@ def run_challenge_experiment(aggregation_function, private_attributes_callable=aggregator_private_attributes, num_cpus=4.0, num_gpus=0.0, - uuid='aggregator', collaborator_names=collaborator_names, - include_validation_with_hausdorff=include_validation_with_hausdorff, aggregation_type=aggregation_wrapper, - choose_training_collaborators=choose_training_collaborators, - training_hyper_parameters_for_round=training_hyper_parameters_for_round, - restore_from_checkpoint_folder=restore_from_checkpoint_folder, - save_checkpoints=save_checkpoints) + db_store_rounds=db_store_rounds) local_runtime = LocalRuntime( aggregator=aggregator, collaborators=collaborators, backend="single_process", num_actors=1 @@ -160,17 +122,24 @@ def run_challenge_experiment(aggregation_function, logger.info(f"Local runtime collaborators = {local_runtime.collaborators}") + params_dict = {"include_validation_with_hausdorff": include_validation_with_hausdorff, + "choose_training_collaborators": choose_training_collaborators, #TODO verify with different collaborators and check if works? + "training_hyper_parameters_for_round": training_hyper_parameters_for_round, + "restore_from_checkpoint_folder": restore_from_checkpoint_folder, + "save_checkpoints": save_checkpoints} + model = FeTSChallengeModel(gandlf_config_path) flflow = FeTSFederatedFlow( model, + params_dict, rounds_to_train, - device, + device ) flflow.runtime = local_runtime flflow.run() - # [TODO] [Workflow - API] -> Commenting as pretrained model is not used. + # #TODO [Workflow - API] -> Commenting as pretrained model is not used. # ---> Define a new step in federated flow before training to load the pretrained model # if use_pretrained_model: # print('TESTING ->>>>>> Loading pretrained model...') @@ -192,13 +161,11 @@ def run_challenge_experiment(aggregation_function, # task_runner.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) # # Initialize model weights - # # [TODO] [Workflow - API] How to set the initial state in the workflow + # #TODO [Workflow - API] How to set the initial state in the workflow -> check if it needed to be done in workflow # init_state_path = plan.config['aggregator']['settings']['init_state_path'] # tensor_dict, _ = split_tensor_dict_for_holdouts(logger, task_runner.get_tensor_dict(False)) - # model_snap = utils.construct_model_proto(tensor_dict=tensor_dict, # round_number=0, # tensor_pipe=tensor_pipe) - # utils.dump_proto(model_proto=model_snap, fpath=init_state_path) return aggregator.private_attributes["checkpoint_folder"] \ No newline at end of file diff --git a/Task_1/fets_challenge/fets_flow.py b/Task_1/fets_challenge/fets_flow.py index d1a5d95..469134b 100644 --- a/Task_1/fets_challenge/fets_flow.py +++ b/Task_1/fets_challenge/fets_flow.py @@ -1,26 +1,20 @@ import os -from copy import deepcopy -from typing import Union - -import logging -import pandas as pd -import numpy as np -import torch as pt -import yaml import shutil import time - -from sys import path -from openfl.federated import Plan +import logging +from copy import deepcopy +import pandas as pd from pathlib import Path from openfl.experimental.workflow.interface import FLSpec from openfl.experimental.workflow.placement import aggregator, collaborator from openfl.databases import TensorDB -from openfl.utilities import TaskResultKey, TensorKey, change_tags +from openfl.utilities import TensorKey, change_tags + from .fets_data_loader import FeTSDataLoader + from .checkpoint_utils import setup_checkpoint_folder, save_checkpoint, load_checkpoint -from .time_utils import gen_collaborator_time_stats, compute_times_per_collaborator +from .time_utils import gen_collaborator_time_stats, compute_times_per_collaborator, MAX_SIMULATION_TIME from GANDLF.compute.generic import create_pytorch_objects from GANDLF.config_manager import ConfigManager @@ -28,19 +22,10 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -# one week -# MINUTE = 60 -# HOUR = 60 * MINUTE -# DAY = 24 * HOUR -# WEEK = 7 * DAY -MAX_SIMULATION_TIME = 7 * 24 * 60 * 60 - def get_metric(metric_name, fl_round, agg_tensor_db): target_tags = ('metric', 'validate_agg') metric_tensor_key = TensorKey(metric_name, 'aggregator', fl_round, True, target_tags) - logger.info(f'Getting metric {metric_name} at round {fl_round} tensor key: {metric_tensor_key}') nparray = agg_tensor_db.get_tensor_from_cache(metric_tensor_key) - #logger.info(f'nparray for {metric_name} at round {fl_round}: {nparray.item()}') return nparray.item() def cache_tensor_dict(tensor_dict, agg_tensor_db, idx, agg_out_dict): @@ -56,10 +41,17 @@ def cache_tensor_dict(tensor_dict, agg_tensor_db, idx, agg_out_dict): agg_out_dict[modified_key] = value agg_tensor_db.cache_tensor(agg_out_dict) +def get_aggregated_dict_with_tensorname(agg_tensor_dict): + agg_dict_with_tensornames = {} + for tensor_key, value in agg_tensor_dict.items(): + tensor_name, origin, round_number, report, tags = tensor_key + agg_dict_with_tensornames[tensor_name] = value + return agg_dict_with_tensornames + collaborator_data_loaders = {} class FeTSFederatedFlow(FLSpec): - def __init__(self, fets_model, rounds=3 , device="cpu", **kwargs): + def __init__(self, fets_model, params_dict, rounds=5 , device="cpu", **kwargs): super().__init__(**kwargs) self.fets_model = fets_model self.n_rounds = rounds @@ -70,11 +62,16 @@ def __init__(self, fets_model, rounds=3 , device="cpu", **kwargs): self.best_dice_over_time_auc = 0 self.collaborators_chosen_each_round = {} self.collaborator_times_per_round = {} - self.times_per_collaborator = {} self.agg_tensor_dict = {} self.tensor_keys_per_col = {} self.restored = False + self.include_validation_with_hausdorff = params_dict.get('include_validation_with_hausdorff', False) + self.choose_training_collaborators = params_dict.get('choose_training_collaborators', None) + self.training_hyper_parameters_for_round = params_dict.get('training_hyper_parameters_for_round', None) + self.restore_from_checkpoint_folder = params_dict.get('restore_from_checkpoint_folder', None) + self.save_checkpoints = params_dict.get('save_checkpoints', False) + self.experiment_results = { 'round':[], 'time': [], @@ -135,8 +132,7 @@ def start(self): new_tags = change_tags(tags, remove_field=col) new_tensor_key = TensorKey(tensor_name, origin, round_number, report, new_tags) if tensor_name not in self.agg_tensor_dict: - self.agg_tensor_dict[tensor_name] = agg_tensor_db.get_tensor_from_cache(new_tensor_key) - #logger.info(f'Fetched tensor {tensor_name} from tensor_db for round {round_number}') + self.agg_tensor_dict[new_tensor_key] = agg_tensor_db.get_tensor_from_cache(new_tensor_key) self.collaborator_time_stats = gen_collaborator_time_stats(self.collaborator_names) self.next(self.fetch_parameters_for_colls) @@ -146,10 +142,8 @@ def fetch_parameters_for_colls(self): print("*" * 40) print("Starting round {}".format(self.current_round)) print("*" * 40) - logger.info('Fetching hyperparameters') - tensrdb = TensorDB() # [TODO] Check is it required ? hparams = self.training_hyper_parameters_for_round(self.collaborators, - tensrdb._iterate(), + None, self.current_round, self.collaborators_chosen_each_round, self.collaborator_times_per_round) @@ -168,25 +162,32 @@ def fetch_parameters_for_colls(self): # pick collaborators to train for the round self.training_collaborators = self.choose_training_collaborators(self.collaborator_names, - tensrdb._iterate(), + None, self.current_round, self.collaborators_chosen_each_round, self.collaborator_times_per_round) logger.info('Collaborators chosen to train for round {}:\n\t{}'.format(self.current_round, self.training_collaborators)) - # save the collaborators chosen this round self.collaborators_chosen_each_round[self.current_round] = self.training_collaborators - self.next(self.initialize_colls, foreach='training_collaborators') + if self.current_round == 1 or self.restored is True: + self.next(self.initialize_colls, foreach='collaborators') + else: + self.next(self.aggregated_model_validation, foreach='training_collaborators') @collaborator def initialize_colls(self): + + gandlf_conf = {} if isinstance(self.gandlf_config, str) and os.path.exists(self.gandlf_config): - gandlf_conf = yaml.safe_load(open(self.gandlf_config, "r")) + gandlf_conf = ConfigManager(self.gandlf_config) + elif isinstance(self.gandlf_config, dict): + gandlf_conf = self.gandlf_config + else: + exit("GANDLF config file not found. Exiting...") - logger.info(gandlf_conf) - gandlf_conf = ConfigManager(self.gandlf_config) + if not self.include_validation_with_hausdorff: + gandlf_conf['metrics'] = ['dice','dice_per_label'] - self.fets_model.device = self.device logger.info(f'Initializing collaborator {self.input}') ( model, @@ -198,6 +199,8 @@ def initialize_colls(self): ) = create_pytorch_objects( gandlf_conf, train_csv=self.train_csv_path, val_csv=self.val_csv_path, device=self.device ) + + self.fets_model.device = self.device self.fets_model.model = model self.fets_model.optimizer = optimizer self.fets_model.scheduler = scheduler @@ -205,15 +208,16 @@ def initialize_colls(self): logger.info(f'Initializing dataloaders for collaborator {self.input}') collaborator_data_loaders[self.input] = FeTSDataLoader(train_loader, val_loader) - self.times_per_collaborator[self.input] = compute_times_per_collaborator(self.input, - self.training_collaborators, - self.hparam_dict['epochs_per_round'], - collaborator_data_loaders[self.input], - self.collaborator_time_stats, - self.current_round) - print(f'Times per collaborator for round {self.current_round}: {self.times_per_collaborator[self.input]}') + #TODO the times per collaborator is calculated based on the random values, it doesn't look like the actual time taken by the collaborator + self.times_per_collaborator = compute_times_per_collaborator(self.input, + self.training_collaborators, + self.hparam_dict['epochs_per_round'], + collaborator_data_loaders[self.input], + self.collaborator_time_stats, + self.current_round) + print(f'Times per collaborator for round {self.current_round}: {self.times_per_collaborator}') if self.restored is False: self.agg_tensor_dict = self.fets_model.get_tensor_dict() self.next(self.aggregated_model_validation) @@ -223,7 +227,7 @@ def aggregated_model_validation(self): validation_start_time = time.time() logger.info(f'Performing aggregated model validation for collaborator {self.input}') - input_tensor_dict = deepcopy(self.agg_tensor_dict) + input_tensor_dict = get_aggregated_dict_with_tensorname(self.agg_tensor_dict) val_loader = collaborator_data_loaders[self.input].get_valid_loader() self.fets_model.rebuild_model(self.current_round, input_tensor_dict) self.agg_valid_dict, _ = self.fets_model.validate(self.input, self.current_round, val_loader, apply="global") @@ -241,6 +245,9 @@ def train(self): train_loader = collaborator_data_loaders[self.input].get_train_loader() self.global_output_tensor_dict, _ = self.fets_model.train(self.input, self.current_round, self.hparam_dict, train_loader) + self.collaborator_task_weight = collaborator_data_loaders[self.input].get_train_data_size() + + print(f'Collaborator task weight in training: {self.collaborator_task_weight}') training_end_time = time.time() self.training_time = training_end_time - training_start_time print(f'Collaborator {self.input} took {self.training_time} seconds for training') @@ -259,42 +266,38 @@ def local_model_validation(self): print(f'Collaborator {self.input} took {self.local_model_validation_time} seconds for local validation') self.next(self.join) - # @collaborator - # def testing_collaborator(self): - # logger.info(f'Testing collaborator {self.input}') - # self.next(self.join) - - # @aggregator - # def join(self, inputs): - # self.next(self.internal_loop) + @aggregator + def join_task(self, inputs): + self.next(self.internal_loop) @aggregator def join(self, inputs): join_start_time = time.time() self.aggregation_type.set_state_data_for_round(self.collaborators_chosen_each_round, self.collaborator_times_per_round) - agg_tensor_db = TensorDB() # [TODO] As tensordb cannot be used as FLSpec Attribute, should we load this tensor_db from agg_tensor_dict before checkpointing ? - collaborator_task_weight = {} + agg_tensor_db = TensorDB() + cache_tensor_dict(self.agg_tensor_dict, agg_tensor_db, 0, {}) + collaborator_weights_unnormalized = {} + times_per_collaborator = {} for idx, col in enumerate(inputs): logger.info(f'Aggregating results for {idx}') agg_out_dict = {} cache_tensor_dict(col.local_valid_dict, agg_tensor_db, idx, agg_out_dict) cache_tensor_dict(col.agg_valid_dict, agg_tensor_db, idx, agg_out_dict) - cache_tensor_dict(col.global_output_tensor_dict, agg_tensor_db, idx, agg_out_dict) - collaborator_task_weight[col.input] = collaborator_data_loaders[col.input].get_train_data_size() + cache_tensor_dict(col.global_output_tensor_dict, agg_tensor_db, idx, agg_out_dict) # Store the keys for each collaborator tensor_keys = [] for tensor_key in agg_out_dict.keys(): tensor_keys.append(tensor_key) self.tensor_keys_per_col[str(idx + 1)] = tensor_keys - # [TODO] : Aggregation Function -> Collaborator Weight Dict - self.agg_tensor_dict = {} - # The collaborator data sizes for that task - collaborator_weights_unnormalized = { - col.input: collaborator_task_weight[col.input] - for _, col in enumerate(inputs) - } - weight_total = sum(collaborator_task_weight.values()) + #TODO : Compare the weight from the old expermient, we saw three different sets of weights while running the experiment for single round + # The collaborator data sizes for that task + collaborator_weights_unnormalized[col.input] = col.collaborator_task_weight + times_per_collaborator[col.input] = col.times_per_collaborator + + print(f'Collaborator task weights: {collaborator_weights_unnormalized}') + print(f'Collaborator times: {times_per_collaborator}') + weight_total = sum(collaborator_weights_unnormalized.values()) collaborator_weight_dict = { k: v / weight_total for k, v in collaborator_weights_unnormalized.items() } @@ -311,15 +314,7 @@ def join(self, inputs): aggregation_function=self.aggregation_type, ) if 'trained' in tags and tensor_name not in self.agg_tensor_dict: - logger.info(f'Fetched tensor {tensor_name} from tensor_db for round {round_number}') - self.agg_tensor_dict[tensor_name] = agg_tensor_db.get_tensor_from_cache(agg_tensor_key) - #logger.info(f'Aggregated tensor value for tensor key {agg_tensor_key}') - - # Rebuild the model with the aggregated tensor_dict - # for input in inputs: - # if self.agg_tensor_dict is not None: - # local_tensor_dict = deepcopy(self.agg_tensor_dict) - # input.fets_model.rebuild_model(self.current_round, local_tensor_dict) + self.agg_tensor_dict[agg_tensor_key] = agg_tensor_db.get_tensor_from_cache(agg_tensor_key) round_loss = get_metric('valid_loss', self.current_round, agg_tensor_db) round_dice = get_metric('valid_dice', self.current_round, agg_tensor_db) @@ -333,13 +328,12 @@ def join(self, inputs): hausdorff95_label_2 = get_metric('valid_hd95_per_label_2', self.current_round, agg_tensor_db) hausdorff95_label_4 = get_metric('valid_hd95_per_label_4', self.current_round, agg_tensor_db) - #times_list = [(t, col) for col, t in self.times_per_collaborator.items()] - #times_list = sorted(times_list) + times_list = [(t, col) for col, t in times_per_collaborator.items()] + times_list = sorted(times_list) # the round time is the max of the times_list - round_time = 1 - #round_time = max([t for t, _ in times_list]) - #self.total_simulated_time += round_time + round_time = max([t for t, _ in times_list]) + self.total_simulated_time += round_time if self.best_dice < round_dice: self.best_dice = round_dice @@ -360,8 +354,8 @@ def join(self, inputs): # project the auc score as remaining time * best dice # this projection assumes that the current best score is carried forward for the entire week - projected_auc = (self.max_simulation_time - self.total_simulated_time) * self.best_dice + self.best_dice_over_time_auc - projected_auc /= self.max_simulation_time + projected_auc = (MAX_SIMULATION_TIME - self.total_simulated_time) * self.best_dice + self.best_dice_over_time_auc + projected_auc /= MAX_SIMULATION_TIME # # End of round summary summary = '"**** END OF ROUND {} SUMMARY *****"'.format(self.current_round) @@ -411,7 +405,8 @@ def join(self, inputs): # in practice, this means that the previous round's model is the last model scored, # so a long final round should not actually benefit the competitor, since that final # model is never globally validated - if self.total_simulated_time > self.max_simulation_time: + # TODO : Added total time taken by running the experiment till join per round + if self.total_simulated_time > MAX_SIMULATION_TIME: logger.info("Simulation time exceeded. Ending Experiment") self.next(self.end) @@ -426,11 +421,11 @@ def join(self, inputs): self.fets_model.params = inputs[0].fets_model.params # Rebuild the model with the aggregated tensor_dict - local_tensor_dict = deepcopy(self.agg_tensor_dict) + local_tensor_dict = get_aggregated_dict_with_tensorname(self.agg_tensor_dict) self.fets_model.rebuild_model(self.current_round, local_tensor_dict) self.fets_model.save_native(f'checkpoint/{self.checkpoint_folder}/temp_model.pkl') - # [TOOD] : Remove below logging + #TODO : Remove below logging join_end_time = time.time() self.join_time = join_end_time - join_start_time print(f'took {self.join_time} seconds for join_time') @@ -441,13 +436,14 @@ def join(self, inputs): total_time += input.aggregated_model_validation_time + input.training_time + input.local_model_validation_time + self.join_time print(f'took {total_time} seconds for total training and valid') - + #TODO cleaup aggreated tensor dict based on db store rounds, get the round number of data to be deleted, by finding round number from the dictioinary keys self.next(self.internal_loop) @aggregator def internal_loop(self): if self.current_round == self.n_rounds: print('************* EXPERIMENT COMPLETED *************') + # TODO : Add the average time taken for completing n_rounds print('Experiment results:') print(pd.DataFrame.from_dict(self.experiment_results)) self.next(self.end) diff --git a/Task_1/fets_challenge/inference.py b/Task_1/fets_challenge/inference.py index 4cab1b5..3400b08 100644 --- a/Task_1/fets_challenge/inference.py +++ b/Task_1/fets_challenge/inference.py @@ -216,8 +216,6 @@ def model_outputs_to_disc(data_path, root = file.parent.resolve() # interface root, containing command modules work = Path.cwd().resolve() - path.append(str(root)) - path.insert(0, str(work)) generate_validation_csv(data_path,validation_csv, working_dir=work) # # overwrite datapath value for a single 'InferenceCol' collaborator @@ -228,8 +226,7 @@ def model_outputs_to_disc(data_path, # # get the task runner, passing the data loader # task_runner = copy(plan).get_task_runner(data_loader) - - gandlf_config_path = os.path.join(root, 'gandlf_config.yaml') + gandlf_config_path = os.path.join(root, 'config', 'gandlf_config.yaml') fets_model = FeTSChallengeModel(gandlf_config_path) val_csv_path = os.path.join(work, 'validation_paths.csv') gandlf_conf = ConfigManager(gandlf_config_path) diff --git a/Task_1/fets_challenge/time_utils.py b/Task_1/fets_challenge/time_utils.py index 2fc1288..4b43014 100644 --- a/Task_1/fets_challenge/time_utils.py +++ b/Task_1/fets_challenge/time_utils.py @@ -96,6 +96,13 @@ # This catches PyTorch UserWarnings for CPU warnings.filterwarnings("ignore", category=UserWarning) +# one week +# MINUTE = 60 +# HOUR = 60 * MINUTE +# DAY = 24 * HOUR +# WEEK = 7 * DAY +MAX_SIMULATION_TIME = 7 * 24 * 60 * 60 #TODO check if this can be move to time_utils.py file + CollaboratorTimeStats = namedtuple('CollaboratorTimeStats', [ 'validation_mean', diff --git a/Task_1/openfl-workspace/fets_challenge_workspace/partitioning_1.csv b/Task_1/openfl-workspace/fets_challenge_workspace/partitioning_1.csv deleted file mode 100644 index a5a8fb3..0000000 --- a/Task_1/openfl-workspace/fets_challenge_workspace/partitioning_1.csv +++ /dev/null @@ -1,1252 +0,0 @@ -Partition_ID,Subject_ID -1,FeTS2022_01341 -1,FeTS2022_01333 -1,FeTS2022_01077 -1,FeTS2022_01054 -1,FeTS2022_00285 -1,FeTS2022_01308 -1,FeTS2022_01363 -1,FeTS2022_01091 -1,FeTS2022_01273 -1,FeTS2022_01108 -1,FeTS2022_01255 -1,FeTS2022_01301 -1,FeTS2022_00219 -1,FeTS2022_00380 -1,FeTS2022_01349 -1,FeTS2022_00251 -1,FeTS2022_01276 -1,FeTS2022_01407 -1,FeTS2022_01344 -1,FeTS2022_01405 -1,FeTS2022_00218 -1,FeTS2022_01327 -1,FeTS2022_01252 -1,FeTS2022_01132 -1,FeTS2022_01036 -1,FeTS2022_01039 -1,FeTS2022_01366 -1,FeTS2022_00262 -1,FeTS2022_01279 -1,FeTS2022_00839 -1,FeTS2022_01322 -1,FeTS2022_00389 -1,FeTS2022_00390 -1,FeTS2022_00431 -1,FeTS2022_00222 -1,FeTS2022_00373 -1,FeTS2022_00288 -1,FeTS2022_00284 -1,FeTS2022_01088 -1,FeTS2022_00311 -1,FeTS2022_00387 -1,FeTS2022_00258 -1,FeTS2022_01389 -1,FeTS2022_00321 -1,FeTS2022_01249 -1,FeTS2022_01230 -1,FeTS2022_00836 -1,FeTS2022_00348 -1,FeTS2022_01205 -1,FeTS2022_00246 -1,FeTS2022_00314 -1,FeTS2022_01404 -1,FeTS2022_01102 -1,FeTS2022_00379 -1,FeTS2022_01395 -1,FeTS2022_00155 -1,FeTS2022_00170 -1,FeTS2022_01264 -1,FeTS2022_00837 -1,FeTS2022_01372 -1,FeTS2022_00341 -1,FeTS2022_01257 -1,FeTS2022_00329 -1,FeTS2022_00425 -1,FeTS2022_01350 -1,FeTS2022_01247 -1,FeTS2022_01234 -1,FeTS2022_00331 -1,FeTS2022_01128 -1,FeTS2022_01365 -1,FeTS2022_00221 -1,FeTS2022_00298 -1,FeTS2022_00227 -1,FeTS2022_01204 -1,FeTS2022_00204 -1,FeTS2022_01399 -1,FeTS2022_00377 -1,FeTS2022_00343 -1,FeTS2022_00280 -1,FeTS2022_01347 -1,FeTS2022_00210 -1,FeTS2022_01117 -1,FeTS2022_01275 -1,FeTS2022_01034 -1,FeTS2022_00162 -1,FeTS2022_01340 -1,FeTS2022_01212 -1,FeTS2022_01220 -1,FeTS2022_00419 -1,FeTS2022_00340 -1,FeTS2022_00296 -1,FeTS2022_01208 -1,FeTS2022_01064 -1,FeTS2022_00433 -1,FeTS2022_01050 -1,FeTS2022_01278 -1,FeTS2022_00293 -1,FeTS2022_00206 -1,FeTS2022_00356 -1,FeTS2022_00376 -1,FeTS2022_00316 -1,FeTS2022_00403 -1,FeTS2022_01348 -1,FeTS2022_00192 -1,FeTS2022_00313 -1,FeTS2022_01240 -1,FeTS2022_01222 -1,FeTS2022_00344 -1,FeTS2022_00332 -1,FeTS2022_00292 -1,FeTS2022_01392 -1,FeTS2022_00220 -1,FeTS2022_00378 -1,FeTS2022_01130 -1,FeTS2022_01106 -1,FeTS2022_01295 -1,FeTS2022_01409 -1,FeTS2022_01057 -1,FeTS2022_01068 -1,FeTS2022_00320 -1,FeTS2022_00346 -1,FeTS2022_01001 -1,FeTS2022_01207 -1,FeTS2022_01137 -1,FeTS2022_01318 -1,FeTS2022_00289 -1,FeTS2022_00157 -1,FeTS2022_01224 -1,FeTS2022_01367 -1,FeTS2022_01382 -1,FeTS2022_00309 -1,FeTS2022_01008 -1,FeTS2022_01059 -1,FeTS2022_01271 -1,FeTS2022_01110 -1,FeTS2022_01398 -1,FeTS2022_01119 -1,FeTS2022_00209 -1,FeTS2022_01274 -1,FeTS2022_01124 -1,FeTS2022_00241 -1,FeTS2022_00152 -1,FeTS2022_01131 -1,FeTS2022_01353 -1,FeTS2022_00352 -1,FeTS2022_01133 -1,FeTS2022_01084 -1,FeTS2022_00441 -1,FeTS2022_01268 -1,FeTS2022_01210 -1,FeTS2022_01375 -1,FeTS2022_00236 -1,FeTS2022_01218 -1,FeTS2022_01408 -1,FeTS2022_00274 -1,FeTS2022_01118 -1,FeTS2022_01213 -1,FeTS2022_01310 -1,FeTS2022_00194 -1,FeTS2022_00392 -1,FeTS2022_00334 -1,FeTS2022_00270 -1,FeTS2022_01359 -1,FeTS2022_01364 -1,FeTS2022_01336 -1,FeTS2022_01272 -1,FeTS2022_01090 -1,FeTS2022_00412 -1,FeTS2022_00228 -1,FeTS2022_00410 -1,FeTS2022_01239 -1,FeTS2022_01010 -1,FeTS2022_01394 -1,FeTS2022_00282 -1,FeTS2022_00237 -1,FeTS2022_01390 -1,FeTS2022_00382 -1,FeTS2022_00188 -1,FeTS2022_01211 -1,FeTS2022_01376 -1,FeTS2022_01243 -1,FeTS2022_01330 -1,FeTS2022_00253 -1,FeTS2022_01329 -1,FeTS2022_01306 -1,FeTS2022_01081 -1,FeTS2022_01369 -1,FeTS2022_01048 -1,FeTS2022_00328 -1,FeTS2022_00291 -1,FeTS2022_01049 -1,FeTS2022_01263 -1,FeTS2022_00317 -1,FeTS2022_00305 -1,FeTS2022_01265 -1,FeTS2022_00238 -1,FeTS2022_00423 -1,FeTS2022_01127 -1,FeTS2022_01379 -1,FeTS2022_01258 -1,FeTS2022_00299 -1,FeTS2022_01334 -1,FeTS2022_00350 -1,FeTS2022_01109 -1,FeTS2022_01352 -1,FeTS2022_01055 -1,FeTS2022_00167 -1,FeTS2022_01354 -1,FeTS2022_01231 -1,FeTS2022_00185 -1,FeTS2022_00306 -1,FeTS2022_00171 -1,FeTS2022_01261 -1,FeTS2022_01345 -1,FeTS2022_01397 -1,FeTS2022_00399 -1,FeTS2022_01319 -1,FeTS2022_01250 -1,FeTS2022_01097 -1,FeTS2022_01229 -1,FeTS2022_01393 -1,FeTS2022_00430 -1,FeTS2022_01203 -1,FeTS2022_01309 -1,FeTS2022_01342 -1,FeTS2022_01223 -1,FeTS2022_00239 -1,FeTS2022_00275 -1,FeTS2022_00406 -1,FeTS2022_01116 -1,FeTS2022_01380 -1,FeTS2022_00214 -1,FeTS2022_00195 -1,FeTS2022_01314 -1,FeTS2022_01113 -1,FeTS2022_00193 -1,FeTS2022_01259 -1,FeTS2022_00386 -1,FeTS2022_00834 -1,FeTS2022_01227 -1,FeTS2022_01277 -1,FeTS2022_00283 -1,FeTS2022_01099 -1,FeTS2022_00212 -1,FeTS2022_00165 -1,FeTS2022_01332 -1,FeTS2022_00364 -1,FeTS2022_01129 -1,FeTS2022_00301 -1,FeTS2022_01402 -1,FeTS2022_00199 -1,FeTS2022_01066 -1,FeTS2022_01107 -1,FeTS2022_01337 -1,FeTS2022_00230 -1,FeTS2022_01114 -1,FeTS2022_01294 -1,FeTS2022_01370 -1,FeTS2022_01269 -1,FeTS2022_01043 -1,FeTS2022_00359 -1,FeTS2022_01004 -1,FeTS2022_00286 -1,FeTS2022_01038 -1,FeTS2022_00370 -1,FeTS2022_00184 -1,FeTS2022_00360 -1,FeTS2022_01123 -1,FeTS2022_01237 -1,FeTS2022_01086 -1,FeTS2022_00231 -1,FeTS2022_00353 -1,FeTS2022_01254 -1,FeTS2022_01373 -1,FeTS2022_01100 -1,FeTS2022_01214 -1,FeTS2022_01242 -1,FeTS2022_01115 -1,FeTS2022_01331 -1,FeTS2022_00391 -1,FeTS2022_01312 -1,FeTS2022_00324 -1,FeTS2022_01080 -1,FeTS2022_00371 -1,FeTS2022_01396 -1,FeTS2022_00339 -1,FeTS2022_00260 -1,FeTS2022_00243 -1,FeTS2022_00233 -1,FeTS2022_01323 -1,FeTS2022_01248 -1,FeTS2022_00263 -1,FeTS2022_00347 -1,FeTS2022_01233 -1,FeTS2022_00367 -1,FeTS2022_01051 -1,FeTS2022_01126 -1,FeTS2022_01267 -1,FeTS2022_00383 -1,FeTS2022_01357 -1,FeTS2022_00413 -1,FeTS2022_01287 -1,FeTS2022_00349 -1,FeTS2022_01244 -1,FeTS2022_01041 -1,FeTS2022_01236 -1,FeTS2022_01245 -1,FeTS2022_01383 -1,FeTS2022_00196 -1,FeTS2022_01387 -1,FeTS2022_00297 -1,FeTS2022_01103 -1,FeTS2022_01098 -1,FeTS2022_01410 -1,FeTS2022_00440 -1,FeTS2022_01305 -1,FeTS2022_01304 -1,FeTS2022_01074 -1,FeTS2022_01046 -1,FeTS2022_01226 -1,FeTS2022_01253 -1,FeTS2022_01040 -1,FeTS2022_00269 -1,FeTS2022_00310 -1,FeTS2022_01056 -1,FeTS2022_01311 -1,FeTS2022_01338 -1,FeTS2022_00166 -1,FeTS2022_00327 -1,FeTS2022_00254 -1,FeTS2022_01000 -1,FeTS2022_00259 -1,FeTS2022_01134 -1,FeTS2022_01104 -1,FeTS2022_01232 -1,FeTS2022_01286 -1,FeTS2022_01052 -1,FeTS2022_01217 -1,FeTS2022_01238 -1,FeTS2022_00154 -1,FeTS2022_00395 -1,FeTS2022_00267 -1,FeTS2022_00366 -1,FeTS2022_00351 -1,FeTS2022_00159 -1,FeTS2022_00131 -1,FeTS2022_01246 -1,FeTS2022_01060 -1,FeTS2022_01087 -1,FeTS2022_00250 -1,FeTS2022_00234 -1,FeTS2022_01058 -1,FeTS2022_00235 -1,FeTS2022_00203 -1,FeTS2022_00414 -1,FeTS2022_01285 -1,FeTS2022_01071 -1,FeTS2022_01111 -1,FeTS2022_01377 -1,FeTS2022_01355 -1,FeTS2022_01384 -1,FeTS2022_01120 -1,FeTS2022_01082 -1,FeTS2022_01076 -1,FeTS2022_01072 -1,FeTS2022_00303 -1,FeTS2022_00436 -1,FeTS2022_01361 -1,FeTS2022_01073 -1,FeTS2022_00338 -1,FeTS2022_01351 -1,FeTS2022_00273 -1,FeTS2022_00186 -1,FeTS2022_00290 -1,FeTS2022_01381 -1,FeTS2022_01083 -1,FeTS2022_00409 -1,FeTS2022_00281 -1,FeTS2022_00840 -1,FeTS2022_00407 -1,FeTS2022_01094 -1,FeTS2022_01328 -1,FeTS2022_01078 -1,FeTS2022_00312 -1,FeTS2022_01235 -1,FeTS2022_01288 -1,FeTS2022_01391 -1,FeTS2022_01215 -1,FeTS2022_00160 -1,FeTS2022_00421 -1,FeTS2022_01317 -1,FeTS2022_01216 -1,FeTS2022_00178 -1,FeTS2022_00838 -1,FeTS2022_01321 -1,FeTS2022_01037 -1,FeTS2022_00176 -1,FeTS2022_01293 -1,FeTS2022_01219 -1,FeTS2022_01260 -1,FeTS2022_01339 -1,FeTS2022_01325 -1,FeTS2022_00249 -1,FeTS2022_01241 -1,FeTS2022_00211 -1,FeTS2022_01105 -1,FeTS2022_01138 -1,FeTS2022_00261 -1,FeTS2022_01316 -1,FeTS2022_01315 -1,FeTS2022_01256 -1,FeTS2022_00191 -1,FeTS2022_01069 -1,FeTS2022_01062 -1,FeTS2022_01135 -1,FeTS2022_00207 -1,FeTS2022_00401 -1,FeTS2022_00172 -1,FeTS2022_01085 -1,FeTS2022_00247 -1,FeTS2022_01206 -1,FeTS2022_01356 -1,FeTS2022_00325 -1,FeTS2022_00429 -1,FeTS2022_01122 -1,FeTS2022_01374 -1,FeTS2022_00156 -1,FeTS2022_01075 -1,FeTS2022_01362 -1,FeTS2022_01251 -1,FeTS2022_00405 -1,FeTS2022_01047 -1,FeTS2022_00240 -1,FeTS2022_00336 -1,FeTS2022_01092 -1,FeTS2022_01403 -1,FeTS2022_01385 -1,FeTS2022_01096 -1,FeTS2022_00426 -1,FeTS2022_00201 -1,FeTS2022_01335 -1,FeTS2022_00404 -1,FeTS2022_00322 -1,FeTS2022_00294 -1,FeTS2022_01070 -1,FeTS2022_01225 -1,FeTS2022_01067 -1,FeTS2022_00375 -1,FeTS2022_00158 -1,FeTS2022_00177 -1,FeTS2022_00271 -1,FeTS2022_01388 -1,FeTS2022_01053 -1,FeTS2022_01042 -1,FeTS2022_01400 -1,FeTS2022_00183 -1,FeTS2022_01358 -1,FeTS2022_01266 -1,FeTS2022_01360 -1,FeTS2022_00304 -1,FeTS2022_01065 -1,FeTS2022_01093 -1,FeTS2022_00397 -1,FeTS2022_01262 -1,FeTS2022_00217 -1,FeTS2022_01401 -1,FeTS2022_01125 -1,FeTS2022_01406 -1,FeTS2022_01343 -1,FeTS2022_01346 -1,FeTS2022_01089 -1,FeTS2022_00216 -1,FeTS2022_01061 -1,FeTS2022_01299 -1,FeTS2022_00242 -1,FeTS2022_01112 -1,FeTS2022_00300 -1,FeTS2022_01280 -1,FeTS2022_00187 -1,FeTS2022_00318 -1,FeTS2022_01371 -1,FeTS2022_01378 -1,FeTS2022_00418 -1,FeTS2022_01121 -1,FeTS2022_01136 -1,FeTS2022_00266 -1,FeTS2022_01221 -1,FeTS2022_01307 -1,FeTS2022_01386 -1,FeTS2022_00432 -1,FeTS2022_01101 -1,FeTS2022_01228 -1,FeTS2022_01313 -1,FeTS2022_01209 -1,FeTS2022_00388 -1,FeTS2022_01270 -1,FeTS2022_01044 -1,FeTS2022_00417 -1,FeTS2022_01063 -1,FeTS2022_01368 -1,FeTS2022_00369 -1,FeTS2022_01095 -1,FeTS2022_00416 -1,FeTS2022_00400 -1,FeTS2022_01045 -1,FeTS2022_01202 -1,FeTS2022_01326 -1,FeTS2022_01079 -1,FeTS2022_00402 -1,FeTS2022_01320 -1,FeTS2022_01324 -2,FeTS2022_01412 -2,FeTS2022_01415 -2,FeTS2022_01411 -2,FeTS2022_01414 -2,FeTS2022_01413 -2,FeTS2022_01416 -3,FeTS2022_01439 -3,FeTS2022_01435 -3,FeTS2022_01434 -3,FeTS2022_01440 -3,FeTS2022_01431 -3,FeTS2022_01437 -3,FeTS2022_01436 -3,FeTS2022_01433 -3,FeTS2022_01438 -3,FeTS2022_01426 -3,FeTS2022_01427 -3,FeTS2022_01428 -3,FeTS2022_01429 -3,FeTS2022_01432 -3,FeTS2022_01430 -4,FeTS2022_01152 -4,FeTS2022_01178 -4,FeTS2022_01186 -4,FeTS2022_01184 -4,FeTS2022_01181 -4,FeTS2022_01187 -4,FeTS2022_01168 -4,FeTS2022_01196 -4,FeTS2022_01173 -4,FeTS2022_01176 -4,FeTS2022_01200 -4,FeTS2022_00565 -4,FeTS2022_01193 -4,FeTS2022_01174 -4,FeTS2022_01662 -4,FeTS2022_01660 -4,FeTS2022_01201 -4,FeTS2022_01167 -4,FeTS2022_01170 -4,FeTS2022_01179 -4,FeTS2022_01185 -4,FeTS2022_01197 -4,FeTS2022_01172 -4,FeTS2022_01189 -4,FeTS2022_00563 -4,FeTS2022_01180 -4,FeTS2022_01198 -4,FeTS2022_01183 -4,FeTS2022_01151 -4,FeTS2022_01195 -4,FeTS2022_01657 -4,FeTS2022_01194 -4,FeTS2022_01191 -4,FeTS2022_01169 -4,FeTS2022_01171 -4,FeTS2022_00561 -4,FeTS2022_01659 -4,FeTS2022_01661 -4,FeTS2022_01190 -4,FeTS2022_01188 -4,FeTS2022_01199 -4,FeTS2022_01658 -4,FeTS2022_01192 -4,FeTS2022_01175 -4,FeTS2022_01182 -4,FeTS2022_01537 -4,FeTS2022_01177 -5,FeTS2022_00102 -5,FeTS2022_00149 -5,FeTS2022_01290 -5,FeTS2022_00113 -5,FeTS2022_01009 -5,FeTS2022_01007 -5,FeTS2022_01002 -5,FeTS2022_00139 -5,FeTS2022_01292 -5,FeTS2022_00100 -5,FeTS2022_01289 -5,FeTS2022_01291 -5,FeTS2022_01005 -5,FeTS2022_01282 -5,FeTS2022_01003 -5,FeTS2022_00109 -5,FeTS2022_01283 -5,FeTS2022_00999 -5,FeTS2022_01281 -5,FeTS2022_01284 -5,FeTS2022_00151 -5,FeTS2022_00123 -6,FeTS2022_01451 -6,FeTS2022_01453 -6,FeTS2022_01452 -6,FeTS2022_00831 -6,FeTS2022_01448 -6,FeTS2022_01300 -6,FeTS2022_01443 -6,FeTS2022_00136 -6,FeTS2022_01454 -6,FeTS2022_00144 -6,FeTS2022_00121 -6,FeTS2022_01297 -6,FeTS2022_00133 -6,FeTS2022_01447 -6,FeTS2022_00142 -6,FeTS2022_01450 -6,FeTS2022_00120 -6,FeTS2022_01298 -6,FeTS2022_01449 -6,FeTS2022_01442 -6,FeTS2022_01446 -6,FeTS2022_01303 -6,FeTS2022_01296 -6,FeTS2022_00132 -6,FeTS2022_01441 -6,FeTS2022_01445 -6,FeTS2022_01302 -6,FeTS2022_00143 -6,FeTS2022_00105 -6,FeTS2022_01444 -6,FeTS2022_00147 -6,FeTS2022_01455 -6,FeTS2022_00146 -6,FeTS2022_00137 -7,FeTS2022_01459 -7,FeTS2022_01464 -7,FeTS2022_01458 -7,FeTS2022_01457 -7,FeTS2022_01461 -7,FeTS2022_01456 -7,FeTS2022_01460 -7,FeTS2022_01462 -7,FeTS2022_01466 -7,FeTS2022_01465 -7,FeTS2022_01463 -7,FeTS2022_01467 -8,FeTS2022_00140 -8,FeTS2022_01469 -8,FeTS2022_01468 -8,FeTS2022_01470 -8,FeTS2022_00104 -8,FeTS2022_00110 -8,FeTS2022_00112 -8,FeTS2022_00128 -9,FeTS2022_00134 -9,FeTS2022_00150 -9,FeTS2022_00116 -9,FeTS2022_01471 -10,FeTS2022_01472 -10,FeTS2022_00117 -10,FeTS2022_00130 -10,FeTS2022_00138 -10,FeTS2022_01473 -10,FeTS2022_00111 -10,FeTS2022_00124 -10,FeTS2022_00106 -11,FeTS2022_00122 -11,FeTS2022_00148 -11,FeTS2022_01474 -11,FeTS2022_00108 -11,FeTS2022_01144 -11,FeTS2022_00107 -11,FeTS2022_01140 -11,FeTS2022_01146 -11,FeTS2022_01145 -11,FeTS2022_01139 -11,FeTS2022_01141 -11,FeTS2022_01142 -11,FeTS2022_01143 -11,FeTS2022_01475 -12,FeTS2022_01482 -12,FeTS2022_01480 -12,FeTS2022_01485 -12,FeTS2022_01476 -12,FeTS2022_01481 -12,FeTS2022_01483 -12,FeTS2022_01486 -12,FeTS2022_01484 -12,FeTS2022_01479 -12,FeTS2022_01477 -12,FeTS2022_01478 -13,FeTS2022_01491 -13,FeTS2022_01500 -13,FeTS2022_01519 -13,FeTS2022_01516 -13,FeTS2022_01509 -13,FeTS2022_01520 -13,FeTS2022_01508 -13,FeTS2022_01503 -13,FeTS2022_01488 -13,FeTS2022_01492 -13,FeTS2022_01502 -13,FeTS2022_01493 -13,FeTS2022_01497 -13,FeTS2022_01499 -13,FeTS2022_01487 -13,FeTS2022_01505 -13,FeTS2022_01504 -13,FeTS2022_01490 -13,FeTS2022_01507 -13,FeTS2022_01510 -13,FeTS2022_01512 -13,FeTS2022_01514 -13,FeTS2022_01517 -13,FeTS2022_01501 -13,FeTS2022_01518 -13,FeTS2022_01506 -13,FeTS2022_01515 -13,FeTS2022_01511 -13,FeTS2022_01494 -13,FeTS2022_01489 -13,FeTS2022_01513 -13,FeTS2022_01496 -13,FeTS2022_01495 -13,FeTS2022_01521 -13,FeTS2022_01498 -14,FeTS2022_01522 -14,FeTS2022_01525 -14,FeTS2022_01526 -14,FeTS2022_01527 -14,FeTS2022_01524 -14,FeTS2022_01523 -15,FeTS2022_01530 -15,FeTS2022_01536 -15,FeTS2022_01535 -15,FeTS2022_01663 -15,FeTS2022_01534 -15,FeTS2022_01529 -15,FeTS2022_01531 -15,FeTS2022_01666 -15,FeTS2022_01665 -15,FeTS2022_01532 -15,FeTS2022_01664 -15,FeTS2022_01528 -15,FeTS2022_01533 -16,FeTS2022_00584 -16,FeTS2022_00567 -16,FeTS2022_00571 -16,FeTS2022_00582 -16,FeTS2022_00570 -16,FeTS2022_00594 -16,FeTS2022_00597 -16,FeTS2022_00596 -16,FeTS2022_00576 -16,FeTS2022_00572 -16,FeTS2022_00115 -16,FeTS2022_00593 -16,FeTS2022_00588 -16,FeTS2022_00598 -16,FeTS2022_00589 -16,FeTS2022_00574 -16,FeTS2022_00586 -16,FeTS2022_00579 -16,FeTS2022_00590 -16,FeTS2022_00599 -16,FeTS2022_00577 -16,FeTS2022_00575 -16,FeTS2022_00581 -16,FeTS2022_00591 -16,FeTS2022_00569 -16,FeTS2022_00587 -16,FeTS2022_00580 -16,FeTS2022_00583 -16,FeTS2022_00578 -16,FeTS2022_00568 -17,FeTS2022_01423 -17,FeTS2022_01420 -17,FeTS2022_01422 -17,FeTS2022_01417 -17,FeTS2022_01421 -17,FeTS2022_01424 -17,FeTS2022_01418 -17,FeTS2022_01425 -17,FeTS2022_01419 -18,FeTS2022_01628 -18,FeTS2022_01615 -18,FeTS2022_01035 -18,FeTS2022_00732 -18,FeTS2022_00753 -18,FeTS2022_01620 -18,FeTS2022_01637 -18,FeTS2022_01594 -18,FeTS2022_00530 -18,FeTS2022_00772 -18,FeTS2022_01580 -18,FeTS2022_00731 -18,FeTS2022_00540 -18,FeTS2022_00464 -18,FeTS2022_01622 -18,FeTS2022_01154 -18,FeTS2022_01559 -18,FeTS2022_00729 -18,FeTS2022_00708 -18,FeTS2022_00044 -18,FeTS2022_00705 -18,FeTS2022_00645 -18,FeTS2022_01640 -18,FeTS2022_00008 -18,FeTS2022_00746 -18,FeTS2022_01551 -18,FeTS2022_01610 -18,FeTS2022_00061 -18,FeTS2022_00642 -18,FeTS2022_00675 -18,FeTS2022_01651 -18,FeTS2022_00651 -18,FeTS2022_00626 -18,FeTS2022_00028 -18,FeTS2022_01557 -18,FeTS2022_01616 -18,FeTS2022_00684 -18,FeTS2022_01538 -18,FeTS2022_01647 -18,FeTS2022_00688 -18,FeTS2022_00737 -18,FeTS2022_00063 -18,FeTS2022_00758 -18,FeTS2022_01159 -18,FeTS2022_00615 -18,FeTS2022_00621 -18,FeTS2022_01543 -18,FeTS2022_01560 -18,FeTS2022_00058 -18,FeTS2022_00009 -18,FeTS2022_00544 -18,FeTS2022_01611 -18,FeTS2022_00485 -18,FeTS2022_00735 -18,FeTS2022_00659 -18,FeTS2022_00025 -18,FeTS2022_00550 -18,FeTS2022_01599 -18,FeTS2022_00636 -18,FeTS2022_01644 -18,FeTS2022_00716 -18,FeTS2022_00641 -18,FeTS2022_01624 -18,FeTS2022_00547 -18,FeTS2022_00046 -18,FeTS2022_00728 -18,FeTS2022_00045 -18,FeTS2022_00493 -18,FeTS2022_00089 -18,FeTS2022_00622 -18,FeTS2022_01643 -18,FeTS2022_00602 -18,FeTS2022_00035 -18,FeTS2022_01545 -18,FeTS2022_00014 -18,FeTS2022_01566 -18,FeTS2022_00066 -18,FeTS2022_01614 -18,FeTS2022_01591 -18,FeTS2022_00514 -18,FeTS2022_01588 -18,FeTS2022_00520 -18,FeTS2022_01556 -18,FeTS2022_00097 -18,FeTS2022_00555 -18,FeTS2022_00736 -18,FeTS2022_00639 -18,FeTS2022_00479 -18,FeTS2022_01550 -18,FeTS2022_01592 -18,FeTS2022_01626 -18,FeTS2022_00557 -18,FeTS2022_00496 -18,FeTS2022_00778 -18,FeTS2022_01561 -18,FeTS2022_00690 -18,FeTS2022_00750 -18,FeTS2022_01586 -18,FeTS2022_01549 -18,FeTS2022_01555 -18,FeTS2022_01612 -18,FeTS2022_01600 -18,FeTS2022_01629 -18,FeTS2022_01656 -18,FeTS2022_00500 -18,FeTS2022_00529 -18,FeTS2022_00628 -18,FeTS2022_00775 -18,FeTS2022_00523 -18,FeTS2022_00488 -18,FeTS2022_00518 -18,FeTS2022_00000 -18,FeTS2022_00020 -18,FeTS2022_01646 -18,FeTS2022_01638 -18,FeTS2022_00630 -18,FeTS2022_01590 -18,FeTS2022_01613 -18,FeTS2022_01571 -18,FeTS2022_00519 -18,FeTS2022_01617 -18,FeTS2022_01623 -18,FeTS2022_00691 -18,FeTS2022_01027 -18,FeTS2022_00704 -18,FeTS2022_00098 -18,FeTS2022_01558 -18,FeTS2022_00715 -18,FeTS2022_00757 -18,FeTS2022_00084 -18,FeTS2022_00692 -18,FeTS2022_00078 -18,FeTS2022_00747 -18,FeTS2022_01607 -18,FeTS2022_00751 -18,FeTS2022_00011 -18,FeTS2022_00610 -18,FeTS2022_00694 -18,FeTS2022_00026 -18,FeTS2022_00658 -18,FeTS2022_01544 -18,FeTS2022_01583 -18,FeTS2022_00680 -18,FeTS2022_01028 -18,FeTS2022_01636 -18,FeTS2022_00545 -18,FeTS2022_00072 -18,FeTS2022_00016 -18,FeTS2022_01548 -18,FeTS2022_00624 -18,FeTS2022_00676 -18,FeTS2022_00533 -18,FeTS2022_01574 -18,FeTS2022_01582 -18,FeTS2022_00085 -18,FeTS2022_00613 -18,FeTS2022_01593 -18,FeTS2022_00730 -18,FeTS2022_01585 -18,FeTS2022_00524 -18,FeTS2022_00081 -18,FeTS2022_00472 -18,FeTS2022_00478 -18,FeTS2022_00469 -18,FeTS2022_00682 -18,FeTS2022_00733 -18,FeTS2022_00723 -18,FeTS2022_00099 -18,FeTS2022_00744 -18,FeTS2022_00048 -18,FeTS2022_00480 -18,FeTS2022_00650 -18,FeTS2022_00601 -18,FeTS2022_00542 -18,FeTS2022_00667 -18,FeTS2022_00505 -18,FeTS2022_01539 -18,FeTS2022_00764 -18,FeTS2022_00506 -18,FeTS2022_01649 -18,FeTS2022_00032 -18,FeTS2022_00021 -18,FeTS2022_00685 -18,FeTS2022_00611 -18,FeTS2022_00511 -18,FeTS2022_01584 -18,FeTS2022_01635 -18,FeTS2022_00607 -18,FeTS2022_00071 -18,FeTS2022_00687 -18,FeTS2022_00767 -18,FeTS2022_00537 -18,FeTS2022_01630 -18,FeTS2022_00740 -18,FeTS2022_00525 -18,FeTS2022_00725 -18,FeTS2022_00502 -18,FeTS2022_01562 -18,FeTS2022_01577 -18,FeTS2022_01576 -18,FeTS2022_01595 -18,FeTS2022_00654 -18,FeTS2022_00090 -18,FeTS2022_01645 -18,FeTS2022_01564 -18,FeTS2022_01567 -18,FeTS2022_00703 -18,FeTS2022_00043 -18,FeTS2022_00003 -18,FeTS2022_00495 -18,FeTS2022_00017 -18,FeTS2022_00491 -18,FeTS2022_00054 -18,FeTS2022_00510 -18,FeTS2022_00618 -18,FeTS2022_00064 -18,FeTS2022_00024 -18,FeTS2022_00709 -18,FeTS2022_01653 -18,FeTS2022_01579 -18,FeTS2022_01572 -18,FeTS2022_01156 -18,FeTS2022_00707 -18,FeTS2022_01540 -18,FeTS2022_00056 -18,FeTS2022_00620 -18,FeTS2022_00470 -18,FeTS2022_00499 -18,FeTS2022_00640 -18,FeTS2022_00549 -18,FeTS2022_01601 -18,FeTS2022_00608 -18,FeTS2022_00727 -18,FeTS2022_00773 -18,FeTS2022_00504 -18,FeTS2022_01604 -18,FeTS2022_01158 -18,FeTS2022_00051 -18,FeTS2022_00768 -18,FeTS2022_01161 -18,FeTS2022_00765 -18,FeTS2022_00068 -18,FeTS2022_00551 -18,FeTS2022_01605 -18,FeTS2022_00674 -18,FeTS2022_01157 -18,FeTS2022_01631 -18,FeTS2022_00022 -18,FeTS2022_00777 -18,FeTS2022_01609 -18,FeTS2022_01633 -18,FeTS2022_01652 -18,FeTS2022_00759 -18,FeTS2022_01655 -18,FeTS2022_01639 -18,FeTS2022_01563 -18,FeTS2022_00661 -18,FeTS2022_00087 -18,FeTS2022_00030 -18,FeTS2022_00556 -18,FeTS2022_01597 -18,FeTS2022_00724 -18,FeTS2022_00096 -18,FeTS2022_00049 -18,FeTS2022_00683 -18,FeTS2022_00059 -18,FeTS2022_01596 -18,FeTS2022_00498 -18,FeTS2022_00543 -18,FeTS2022_01641 -18,FeTS2022_01542 -18,FeTS2022_00062 -18,FeTS2022_00005 -18,FeTS2022_00646 -18,FeTS2022_00088 -18,FeTS2022_00656 -18,FeTS2022_01589 -18,FeTS2022_01160 -18,FeTS2022_01547 -18,FeTS2022_01606 -18,FeTS2022_00631 -18,FeTS2022_00756 -18,FeTS2022_00619 -18,FeTS2022_00698 -18,FeTS2022_01541 -18,FeTS2022_00539 -18,FeTS2022_00053 -18,FeTS2022_01618 -18,FeTS2022_00693 -18,FeTS2022_00616 -18,FeTS2022_01642 -18,FeTS2022_01632 -18,FeTS2022_00718 -18,FeTS2022_00006 -18,FeTS2022_00466 -18,FeTS2022_01565 -18,FeTS2022_01621 -18,FeTS2022_00697 -18,FeTS2022_00689 -18,FeTS2022_00554 -18,FeTS2022_00638 -18,FeTS2022_00517 -18,FeTS2022_00019 -18,FeTS2022_01650 -18,FeTS2022_01602 -18,FeTS2022_01570 -18,FeTS2022_00655 -18,FeTS2022_00552 -18,FeTS2022_00706 -18,FeTS2022_01654 -18,FeTS2022_00481 -18,FeTS2022_00604 -18,FeTS2022_00612 -18,FeTS2022_00774 -18,FeTS2022_00625 -18,FeTS2022_00070 -18,FeTS2022_00649 -18,FeTS2022_00036 -18,FeTS2022_01546 -18,FeTS2022_00559 -18,FeTS2022_00018 -18,FeTS2022_00507 -18,FeTS2022_00760 -18,FeTS2022_01568 -18,FeTS2022_00094 -18,FeTS2022_00526 -18,FeTS2022_01575 -18,FeTS2022_00512 -18,FeTS2022_00033 -18,FeTS2022_01648 -18,FeTS2022_00052 -18,FeTS2022_01625 -18,FeTS2022_01573 -18,FeTS2022_00623 -18,FeTS2022_01153 -18,FeTS2022_00532 -18,FeTS2022_00516 -18,FeTS2022_00679 -18,FeTS2022_00468 -18,FeTS2022_00494 -18,FeTS2022_00483 -18,FeTS2022_01552 -18,FeTS2022_00606 -18,FeTS2022_00742 -18,FeTS2022_00677 -18,FeTS2022_00652 -18,FeTS2022_00074 -18,FeTS2022_00513 -18,FeTS2022_01581 -18,FeTS2022_00663 -18,FeTS2022_00734 -18,FeTS2022_01619 -18,FeTS2022_00668 -18,FeTS2022_00558 -18,FeTS2022_00002 -18,FeTS2022_01598 -18,FeTS2022_00477 -18,FeTS2022_01634 -18,FeTS2022_00501 -18,FeTS2022_01155 -18,FeTS2022_00077 -18,FeTS2022_01578 -18,FeTS2022_01569 -18,FeTS2022_01603 -18,FeTS2022_00538 -18,FeTS2022_00714 -18,FeTS2022_00031 -18,FeTS2022_01627 -18,FeTS2022_01553 -18,FeTS2022_00548 -18,FeTS2022_00739 -18,FeTS2022_00103 -18,FeTS2022_00528 -18,FeTS2022_01608 -18,FeTS2022_00095 -18,FeTS2022_00060 -18,FeTS2022_01554 -18,FeTS2022_00657 -18,FeTS2022_01587 -18,FeTS2022_00605 -18,FeTS2022_00686 -18,FeTS2022_00012 -19,FeTS2022_01166 -19,FeTS2022_01163 -19,FeTS2022_01165 -19,FeTS2022_01164 -20,FeTS2022_00444 -20,FeTS2022_01014 -20,FeTS2022_00442 -20,FeTS2022_01025 -20,FeTS2022_01024 -20,FeTS2022_00101 -20,FeTS2022_00453 -20,FeTS2022_01013 -20,FeTS2022_01011 -20,FeTS2022_00459 -20,FeTS2022_00457 -20,FeTS2022_01016 -20,FeTS2022_00448 -20,FeTS2022_01023 -20,FeTS2022_01017 -20,FeTS2022_00443 -20,FeTS2022_00455 -20,FeTS2022_00127 -20,FeTS2022_01012 -20,FeTS2022_01018 -20,FeTS2022_01022 -20,FeTS2022_00451 -20,FeTS2022_00445 -20,FeTS2022_00452 -20,FeTS2022_00454 -20,FeTS2022_01019 -20,FeTS2022_01021 -20,FeTS2022_01020 -20,FeTS2022_01026 -20,FeTS2022_00456 -20,FeTS2022_00446 -20,FeTS2022_00449 -20,FeTS2022_01015 -21,FeTS2022_00802 -21,FeTS2022_00788 -21,FeTS2022_00795 -21,FeTS2022_00820 -21,FeTS2022_00782 -21,FeTS2022_00800 -21,FeTS2022_00830 -21,FeTS2022_00824 -21,FeTS2022_00805 -21,FeTS2022_00796 -21,FeTS2022_00823 -21,FeTS2022_00828 -21,FeTS2022_00811 -21,FeTS2022_00789 -21,FeTS2022_00801 -21,FeTS2022_00780 -21,FeTS2022_00781 -21,FeTS2022_00814 -21,FeTS2022_00806 -21,FeTS2022_00810 -21,FeTS2022_00807 -21,FeTS2022_00818 -21,FeTS2022_00791 -21,FeTS2022_00787 -21,FeTS2022_00808 -21,FeTS2022_00809 -21,FeTS2022_00803 -21,FeTS2022_00816 -21,FeTS2022_00819 -21,FeTS2022_00793 -21,FeTS2022_00799 -21,FeTS2022_00797 -21,FeTS2022_00784 -21,FeTS2022_00804 -21,FeTS2022_00792 -22,FeTS2022_01031 -22,FeTS2022_01033 -22,FeTS2022_01030 -22,FeTS2022_00118 -22,FeTS2022_01029 -22,FeTS2022_00126 -22,FeTS2022_01032 -23,FeTS2022_01147 -23,FeTS2022_01149 -23,FeTS2022_01150 -23,FeTS2022_01148 -23,FeTS2022_01162 diff --git a/Task_1/openfl-workspace/fets_challenge_workspace/partitioning_2.csv b/Task_1/openfl-workspace/fets_challenge_workspace/partitioning_2.csv deleted file mode 100644 index 798f651..0000000 --- a/Task_1/openfl-workspace/fets_challenge_workspace/partitioning_2.csv +++ /dev/null @@ -1,1252 +0,0 @@ -Partition_ID,Subject_ID -1,FeTS2022_01341 -1,FeTS2022_01333 -1,FeTS2022_01077 -1,FeTS2022_01054 -1,FeTS2022_00285 -1,FeTS2022_01308 -1,FeTS2022_01363 -1,FeTS2022_01091 -1,FeTS2022_01273 -1,FeTS2022_01108 -1,FeTS2022_01255 -1,FeTS2022_01301 -1,FeTS2022_00219 -1,FeTS2022_00380 -1,FeTS2022_01349 -1,FeTS2022_00251 -1,FeTS2022_01276 -1,FeTS2022_01407 -1,FeTS2022_01344 -1,FeTS2022_01405 -1,FeTS2022_00218 -1,FeTS2022_01327 -1,FeTS2022_01252 -1,FeTS2022_01132 -1,FeTS2022_01036 -1,FeTS2022_01039 -1,FeTS2022_01366 -1,FeTS2022_00262 -1,FeTS2022_01279 -1,FeTS2022_00839 -1,FeTS2022_01322 -1,FeTS2022_00389 -1,FeTS2022_00390 -1,FeTS2022_00431 -1,FeTS2022_00222 -1,FeTS2022_00373 -1,FeTS2022_00288 -1,FeTS2022_00284 -1,FeTS2022_01088 -1,FeTS2022_00311 -1,FeTS2022_00387 -1,FeTS2022_00258 -1,FeTS2022_01389 -1,FeTS2022_00321 -1,FeTS2022_01249 -1,FeTS2022_01230 -1,FeTS2022_00836 -1,FeTS2022_00348 -1,FeTS2022_01205 -1,FeTS2022_00246 -1,FeTS2022_00314 -1,FeTS2022_01404 -1,FeTS2022_01102 -1,FeTS2022_00379 -1,FeTS2022_01395 -1,FeTS2022_00155 -1,FeTS2022_00170 -1,FeTS2022_01264 -1,FeTS2022_00837 -1,FeTS2022_01372 -1,FeTS2022_00341 -1,FeTS2022_01257 -1,FeTS2022_00329 -1,FeTS2022_00425 -1,FeTS2022_01350 -1,FeTS2022_01247 -1,FeTS2022_01234 -1,FeTS2022_00331 -1,FeTS2022_01128 -1,FeTS2022_01365 -1,FeTS2022_00221 -1,FeTS2022_00298 -1,FeTS2022_00227 -1,FeTS2022_01204 -1,FeTS2022_00204 -1,FeTS2022_01399 -1,FeTS2022_00377 -1,FeTS2022_00343 -1,FeTS2022_00280 -1,FeTS2022_01347 -1,FeTS2022_00210 -1,FeTS2022_01117 -1,FeTS2022_01275 -1,FeTS2022_01034 -1,FeTS2022_00162 -1,FeTS2022_01340 -1,FeTS2022_01212 -1,FeTS2022_01220 -1,FeTS2022_00419 -1,FeTS2022_00340 -1,FeTS2022_00296 -1,FeTS2022_01208 -1,FeTS2022_01064 -1,FeTS2022_00433 -1,FeTS2022_01050 -1,FeTS2022_01278 -1,FeTS2022_00293 -1,FeTS2022_00206 -1,FeTS2022_00356 -1,FeTS2022_00376 -1,FeTS2022_00316 -1,FeTS2022_00403 -1,FeTS2022_01348 -1,FeTS2022_00192 -1,FeTS2022_00313 -1,FeTS2022_01240 -1,FeTS2022_01222 -1,FeTS2022_00344 -1,FeTS2022_00332 -1,FeTS2022_00292 -1,FeTS2022_01392 -1,FeTS2022_00220 -1,FeTS2022_00378 -1,FeTS2022_01130 -1,FeTS2022_01106 -1,FeTS2022_01295 -1,FeTS2022_01409 -1,FeTS2022_01057 -1,FeTS2022_01068 -1,FeTS2022_00320 -1,FeTS2022_00346 -1,FeTS2022_01001 -1,FeTS2022_01207 -1,FeTS2022_01137 -1,FeTS2022_01318 -1,FeTS2022_00289 -1,FeTS2022_00157 -1,FeTS2022_01224 -1,FeTS2022_01367 -1,FeTS2022_01382 -1,FeTS2022_00309 -1,FeTS2022_01008 -1,FeTS2022_01059 -1,FeTS2022_01271 -1,FeTS2022_01110 -1,FeTS2022_01398 -1,FeTS2022_01119 -1,FeTS2022_00209 -1,FeTS2022_01274 -1,FeTS2022_01124 -1,FeTS2022_00241 -1,FeTS2022_00152 -1,FeTS2022_01131 -1,FeTS2022_01353 -1,FeTS2022_00352 -1,FeTS2022_01133 -1,FeTS2022_01084 -1,FeTS2022_00441 -1,FeTS2022_01268 -1,FeTS2022_01210 -1,FeTS2022_01375 -1,FeTS2022_00236 -1,FeTS2022_01218 -1,FeTS2022_01408 -1,FeTS2022_00274 -1,FeTS2022_01118 -1,FeTS2022_01213 -1,FeTS2022_01310 -1,FeTS2022_00194 -1,FeTS2022_00392 -1,FeTS2022_00334 -1,FeTS2022_00270 -1,FeTS2022_01359 -1,FeTS2022_01364 -1,FeTS2022_01336 -1,FeTS2022_01272 -1,FeTS2022_01090 -1,FeTS2022_00412 -1,FeTS2022_00228 -1,FeTS2022_00410 -2,FeTS2022_01239 -2,FeTS2022_01010 -2,FeTS2022_01394 -2,FeTS2022_00282 -2,FeTS2022_00237 -2,FeTS2022_01390 -2,FeTS2022_00382 -2,FeTS2022_00188 -2,FeTS2022_01211 -2,FeTS2022_01376 -2,FeTS2022_01243 -2,FeTS2022_01330 -2,FeTS2022_00253 -2,FeTS2022_01329 -2,FeTS2022_01306 -2,FeTS2022_01081 -2,FeTS2022_01369 -2,FeTS2022_01048 -2,FeTS2022_00328 -2,FeTS2022_00291 -2,FeTS2022_01049 -2,FeTS2022_01263 -2,FeTS2022_00317 -2,FeTS2022_00305 -2,FeTS2022_01265 -2,FeTS2022_00238 -2,FeTS2022_00423 -2,FeTS2022_01127 -2,FeTS2022_01379 -2,FeTS2022_01258 -2,FeTS2022_00299 -2,FeTS2022_01334 -2,FeTS2022_00350 -2,FeTS2022_01109 -2,FeTS2022_01352 -2,FeTS2022_01055 -2,FeTS2022_00167 -2,FeTS2022_01354 -2,FeTS2022_01231 -2,FeTS2022_00185 -2,FeTS2022_00306 -2,FeTS2022_00171 -2,FeTS2022_01261 -2,FeTS2022_01345 -2,FeTS2022_01397 -2,FeTS2022_00399 -2,FeTS2022_01319 -2,FeTS2022_01250 -2,FeTS2022_01097 -2,FeTS2022_01229 -2,FeTS2022_01393 -2,FeTS2022_00430 -2,FeTS2022_01203 -2,FeTS2022_01309 -2,FeTS2022_01342 -2,FeTS2022_01223 -2,FeTS2022_00239 -2,FeTS2022_00275 -2,FeTS2022_00406 -2,FeTS2022_01116 -2,FeTS2022_01380 -2,FeTS2022_00214 -2,FeTS2022_00195 -2,FeTS2022_01314 -2,FeTS2022_01113 -2,FeTS2022_00193 -2,FeTS2022_01259 -2,FeTS2022_00386 -2,FeTS2022_00834 -2,FeTS2022_01227 -2,FeTS2022_01277 -2,FeTS2022_00283 -2,FeTS2022_01099 -2,FeTS2022_00212 -2,FeTS2022_00165 -2,FeTS2022_01332 -2,FeTS2022_00364 -2,FeTS2022_01129 -2,FeTS2022_00301 -2,FeTS2022_01402 -2,FeTS2022_00199 -2,FeTS2022_01066 -2,FeTS2022_01107 -2,FeTS2022_01337 -2,FeTS2022_00230 -2,FeTS2022_01114 -2,FeTS2022_01294 -2,FeTS2022_01370 -2,FeTS2022_01269 -2,FeTS2022_01043 -2,FeTS2022_00359 -2,FeTS2022_01004 -2,FeTS2022_00286 -2,FeTS2022_01038 -2,FeTS2022_00370 -2,FeTS2022_00184 -2,FeTS2022_00360 -2,FeTS2022_01123 -2,FeTS2022_01237 -2,FeTS2022_01086 -2,FeTS2022_00231 -2,FeTS2022_00353 -2,FeTS2022_01254 -2,FeTS2022_01373 -2,FeTS2022_01100 -2,FeTS2022_01214 -2,FeTS2022_01242 -2,FeTS2022_01115 -2,FeTS2022_01331 -2,FeTS2022_00391 -2,FeTS2022_01312 -2,FeTS2022_00324 -2,FeTS2022_01080 -2,FeTS2022_00371 -2,FeTS2022_01396 -2,FeTS2022_00339 -2,FeTS2022_00260 -2,FeTS2022_00243 -2,FeTS2022_00233 -2,FeTS2022_01323 -2,FeTS2022_01248 -2,FeTS2022_00263 -2,FeTS2022_00347 -2,FeTS2022_01233 -2,FeTS2022_00367 -2,FeTS2022_01051 -2,FeTS2022_01126 -2,FeTS2022_01267 -2,FeTS2022_00383 -2,FeTS2022_01357 -2,FeTS2022_00413 -2,FeTS2022_01287 -2,FeTS2022_00349 -2,FeTS2022_01244 -2,FeTS2022_01041 -2,FeTS2022_01236 -2,FeTS2022_01245 -2,FeTS2022_01383 -2,FeTS2022_00196 -2,FeTS2022_01387 -2,FeTS2022_00297 -2,FeTS2022_01103 -2,FeTS2022_01098 -2,FeTS2022_01410 -2,FeTS2022_00440 -2,FeTS2022_01305 -2,FeTS2022_01304 -2,FeTS2022_01074 -2,FeTS2022_01046 -2,FeTS2022_01226 -2,FeTS2022_01253 -2,FeTS2022_01040 -2,FeTS2022_00269 -2,FeTS2022_00310 -2,FeTS2022_01056 -2,FeTS2022_01311 -2,FeTS2022_01338 -2,FeTS2022_00166 -2,FeTS2022_00327 -2,FeTS2022_00254 -2,FeTS2022_01000 -2,FeTS2022_00259 -2,FeTS2022_01134 -2,FeTS2022_01104 -2,FeTS2022_01232 -2,FeTS2022_01286 -2,FeTS2022_01052 -2,FeTS2022_01217 -2,FeTS2022_01238 -2,FeTS2022_00154 -3,FeTS2022_00395 -3,FeTS2022_00267 -3,FeTS2022_00366 -3,FeTS2022_00351 -3,FeTS2022_00159 -3,FeTS2022_00131 -3,FeTS2022_01246 -3,FeTS2022_01060 -3,FeTS2022_01087 -3,FeTS2022_00250 -3,FeTS2022_00234 -3,FeTS2022_01058 -3,FeTS2022_00235 -3,FeTS2022_00203 -3,FeTS2022_00414 -3,FeTS2022_01285 -3,FeTS2022_01071 -3,FeTS2022_01111 -3,FeTS2022_01377 -3,FeTS2022_01355 -3,FeTS2022_01384 -3,FeTS2022_01120 -3,FeTS2022_01082 -3,FeTS2022_01076 -3,FeTS2022_01072 -3,FeTS2022_00303 -3,FeTS2022_00436 -3,FeTS2022_01361 -3,FeTS2022_01073 -3,FeTS2022_00338 -3,FeTS2022_01351 -3,FeTS2022_00273 -3,FeTS2022_00186 -3,FeTS2022_00290 -3,FeTS2022_01381 -3,FeTS2022_01083 -3,FeTS2022_00409 -3,FeTS2022_00281 -3,FeTS2022_00840 -3,FeTS2022_00407 -3,FeTS2022_01094 -3,FeTS2022_01328 -3,FeTS2022_01078 -3,FeTS2022_00312 -3,FeTS2022_01235 -3,FeTS2022_01288 -3,FeTS2022_01391 -3,FeTS2022_01215 -3,FeTS2022_00160 -3,FeTS2022_00421 -3,FeTS2022_01317 -3,FeTS2022_01216 -3,FeTS2022_00178 -3,FeTS2022_00838 -3,FeTS2022_01321 -3,FeTS2022_01037 -3,FeTS2022_00176 -3,FeTS2022_01293 -3,FeTS2022_01219 -3,FeTS2022_01260 -3,FeTS2022_01339 -3,FeTS2022_01325 -3,FeTS2022_00249 -3,FeTS2022_01241 -3,FeTS2022_00211 -3,FeTS2022_01105 -3,FeTS2022_01138 -3,FeTS2022_00261 -3,FeTS2022_01316 -3,FeTS2022_01315 -3,FeTS2022_01256 -3,FeTS2022_00191 -3,FeTS2022_01069 -3,FeTS2022_01062 -3,FeTS2022_01135 -3,FeTS2022_00207 -3,FeTS2022_00401 -3,FeTS2022_00172 -3,FeTS2022_01085 -3,FeTS2022_00247 -3,FeTS2022_01206 -3,FeTS2022_01356 -3,FeTS2022_00325 -3,FeTS2022_00429 -3,FeTS2022_01122 -3,FeTS2022_01374 -3,FeTS2022_00156 -3,FeTS2022_01075 -3,FeTS2022_01362 -3,FeTS2022_01251 -3,FeTS2022_00405 -3,FeTS2022_01047 -3,FeTS2022_00240 -3,FeTS2022_00336 -3,FeTS2022_01092 -3,FeTS2022_01403 -3,FeTS2022_01385 -3,FeTS2022_01096 -3,FeTS2022_00426 -3,FeTS2022_00201 -3,FeTS2022_01335 -3,FeTS2022_00404 -3,FeTS2022_00322 -3,FeTS2022_00294 -3,FeTS2022_01070 -3,FeTS2022_01225 -3,FeTS2022_01067 -3,FeTS2022_00375 -3,FeTS2022_00158 -3,FeTS2022_00177 -3,FeTS2022_00271 -3,FeTS2022_01388 -3,FeTS2022_01053 -3,FeTS2022_01042 -3,FeTS2022_01400 -3,FeTS2022_00183 -3,FeTS2022_01358 -3,FeTS2022_01266 -3,FeTS2022_01360 -3,FeTS2022_00304 -3,FeTS2022_01065 -3,FeTS2022_01093 -3,FeTS2022_00397 -3,FeTS2022_01262 -3,FeTS2022_00217 -3,FeTS2022_01401 -3,FeTS2022_01125 -3,FeTS2022_01406 -3,FeTS2022_01343 -3,FeTS2022_01346 -3,FeTS2022_01089 -3,FeTS2022_00216 -3,FeTS2022_01061 -3,FeTS2022_01299 -3,FeTS2022_00242 -3,FeTS2022_01112 -3,FeTS2022_00300 -3,FeTS2022_01280 -3,FeTS2022_00187 -3,FeTS2022_00318 -3,FeTS2022_01371 -3,FeTS2022_01378 -3,FeTS2022_00418 -3,FeTS2022_01121 -3,FeTS2022_01136 -3,FeTS2022_00266 -3,FeTS2022_01221 -3,FeTS2022_01307 -3,FeTS2022_01386 -3,FeTS2022_00432 -3,FeTS2022_01101 -3,FeTS2022_01228 -3,FeTS2022_01313 -3,FeTS2022_01209 -3,FeTS2022_00388 -3,FeTS2022_01270 -3,FeTS2022_01044 -3,FeTS2022_00417 -3,FeTS2022_01063 -3,FeTS2022_01368 -3,FeTS2022_00369 -3,FeTS2022_01095 -3,FeTS2022_00416 -3,FeTS2022_00400 -3,FeTS2022_01045 -3,FeTS2022_01202 -3,FeTS2022_01326 -3,FeTS2022_01079 -3,FeTS2022_00402 -3,FeTS2022_01320 -3,FeTS2022_01324 -4,FeTS2022_01412 -4,FeTS2022_01415 -4,FeTS2022_01411 -4,FeTS2022_01414 -4,FeTS2022_01413 -4,FeTS2022_01416 -5,FeTS2022_01439 -5,FeTS2022_01435 -5,FeTS2022_01434 -5,FeTS2022_01440 -5,FeTS2022_01431 -5,FeTS2022_01437 -5,FeTS2022_01436 -5,FeTS2022_01433 -5,FeTS2022_01438 -5,FeTS2022_01426 -5,FeTS2022_01427 -5,FeTS2022_01428 -5,FeTS2022_01429 -5,FeTS2022_01432 -5,FeTS2022_01430 -6,FeTS2022_01152 -6,FeTS2022_01178 -6,FeTS2022_01186 -6,FeTS2022_01184 -6,FeTS2022_01181 -6,FeTS2022_01187 -6,FeTS2022_01168 -6,FeTS2022_01196 -6,FeTS2022_01173 -6,FeTS2022_01176 -6,FeTS2022_01200 -6,FeTS2022_00565 -6,FeTS2022_01193 -6,FeTS2022_01174 -6,FeTS2022_01662 -6,FeTS2022_01660 -7,FeTS2022_01201 -7,FeTS2022_01167 -7,FeTS2022_01170 -7,FeTS2022_01179 -7,FeTS2022_01185 -7,FeTS2022_01197 -7,FeTS2022_01172 -7,FeTS2022_01189 -7,FeTS2022_00563 -7,FeTS2022_01180 -7,FeTS2022_01198 -7,FeTS2022_01183 -7,FeTS2022_01151 -7,FeTS2022_01195 -7,FeTS2022_01657 -8,FeTS2022_01194 -8,FeTS2022_01191 -8,FeTS2022_01169 -8,FeTS2022_01171 -8,FeTS2022_00561 -8,FeTS2022_01659 -8,FeTS2022_01661 -8,FeTS2022_01190 -8,FeTS2022_01188 -8,FeTS2022_01199 -8,FeTS2022_01658 -8,FeTS2022_01192 -8,FeTS2022_01175 -8,FeTS2022_01182 -8,FeTS2022_01537 -8,FeTS2022_01177 -9,FeTS2022_00102 -9,FeTS2022_00149 -9,FeTS2022_01290 -9,FeTS2022_00113 -9,FeTS2022_01009 -9,FeTS2022_01007 -9,FeTS2022_01002 -9,FeTS2022_00139 -9,FeTS2022_01292 -9,FeTS2022_00100 -9,FeTS2022_01289 -9,FeTS2022_01291 -9,FeTS2022_01005 -9,FeTS2022_01282 -9,FeTS2022_01003 -9,FeTS2022_00109 -9,FeTS2022_01283 -9,FeTS2022_00999 -9,FeTS2022_01281 -9,FeTS2022_01284 -9,FeTS2022_00151 -9,FeTS2022_00123 -10,FeTS2022_01451 -10,FeTS2022_01453 -10,FeTS2022_01452 -10,FeTS2022_00831 -10,FeTS2022_01448 -10,FeTS2022_01300 -10,FeTS2022_01443 -10,FeTS2022_00136 -10,FeTS2022_01454 -10,FeTS2022_00144 -10,FeTS2022_00121 -10,FeTS2022_01297 -10,FeTS2022_00133 -10,FeTS2022_01447 -10,FeTS2022_00142 -10,FeTS2022_01450 -10,FeTS2022_00120 -10,FeTS2022_01298 -10,FeTS2022_01449 -10,FeTS2022_01442 -10,FeTS2022_01446 -10,FeTS2022_01303 -10,FeTS2022_01296 -10,FeTS2022_00132 -10,FeTS2022_01441 -10,FeTS2022_01445 -10,FeTS2022_01302 -10,FeTS2022_00143 -10,FeTS2022_00105 -10,FeTS2022_01444 -10,FeTS2022_00147 -10,FeTS2022_01455 -10,FeTS2022_00146 -10,FeTS2022_00137 -11,FeTS2022_01459 -11,FeTS2022_01464 -11,FeTS2022_01458 -11,FeTS2022_01457 -11,FeTS2022_01461 -11,FeTS2022_01456 -11,FeTS2022_01460 -11,FeTS2022_01462 -11,FeTS2022_01466 -11,FeTS2022_01465 -11,FeTS2022_01463 -11,FeTS2022_01467 -12,FeTS2022_00140 -12,FeTS2022_01469 -12,FeTS2022_01468 -12,FeTS2022_01470 -12,FeTS2022_00104 -12,FeTS2022_00110 -12,FeTS2022_00112 -12,FeTS2022_00128 -13,FeTS2022_00134 -13,FeTS2022_00150 -13,FeTS2022_00116 -13,FeTS2022_01471 -14,FeTS2022_01472 -14,FeTS2022_00117 -14,FeTS2022_00130 -14,FeTS2022_00138 -14,FeTS2022_01473 -14,FeTS2022_00111 -14,FeTS2022_00124 -14,FeTS2022_00106 -15,FeTS2022_00122 -15,FeTS2022_00148 -15,FeTS2022_01474 -15,FeTS2022_00108 -15,FeTS2022_01144 -15,FeTS2022_00107 -15,FeTS2022_01140 -15,FeTS2022_01146 -15,FeTS2022_01145 -15,FeTS2022_01139 -15,FeTS2022_01141 -15,FeTS2022_01142 -15,FeTS2022_01143 -15,FeTS2022_01475 -16,FeTS2022_01482 -16,FeTS2022_01480 -16,FeTS2022_01485 -16,FeTS2022_01476 -16,FeTS2022_01481 -16,FeTS2022_01483 -16,FeTS2022_01486 -16,FeTS2022_01484 -16,FeTS2022_01479 -16,FeTS2022_01477 -16,FeTS2022_01478 -17,FeTS2022_01491 -17,FeTS2022_01500 -17,FeTS2022_01519 -17,FeTS2022_01516 -17,FeTS2022_01509 -17,FeTS2022_01520 -17,FeTS2022_01508 -17,FeTS2022_01503 -17,FeTS2022_01488 -17,FeTS2022_01492 -17,FeTS2022_01502 -17,FeTS2022_01493 -18,FeTS2022_01497 -18,FeTS2022_01499 -18,FeTS2022_01487 -18,FeTS2022_01505 -18,FeTS2022_01504 -18,FeTS2022_01490 -18,FeTS2022_01507 -18,FeTS2022_01510 -18,FeTS2022_01512 -18,FeTS2022_01514 -18,FeTS2022_01517 -19,FeTS2022_01501 -19,FeTS2022_01518 -19,FeTS2022_01506 -19,FeTS2022_01515 -19,FeTS2022_01511 -19,FeTS2022_01494 -19,FeTS2022_01489 -19,FeTS2022_01513 -19,FeTS2022_01496 -19,FeTS2022_01495 -19,FeTS2022_01521 -19,FeTS2022_01498 -20,FeTS2022_01522 -20,FeTS2022_01525 -20,FeTS2022_01526 -20,FeTS2022_01527 -20,FeTS2022_01524 -20,FeTS2022_01523 -21,FeTS2022_01530 -21,FeTS2022_01536 -21,FeTS2022_01535 -21,FeTS2022_01663 -21,FeTS2022_01534 -21,FeTS2022_01529 -21,FeTS2022_01531 -21,FeTS2022_01666 -21,FeTS2022_01665 -21,FeTS2022_01532 -21,FeTS2022_01664 -21,FeTS2022_01528 -21,FeTS2022_01533 -22,FeTS2022_00584 -22,FeTS2022_00567 -22,FeTS2022_00571 -22,FeTS2022_00582 -22,FeTS2022_00570 -22,FeTS2022_00594 -22,FeTS2022_00597 -22,FeTS2022_00596 -22,FeTS2022_00576 -22,FeTS2022_00572 -22,FeTS2022_00115 -22,FeTS2022_00593 -22,FeTS2022_00588 -22,FeTS2022_00598 -22,FeTS2022_00589 -22,FeTS2022_00574 -22,FeTS2022_00586 -22,FeTS2022_00579 -22,FeTS2022_00590 -22,FeTS2022_00599 -22,FeTS2022_00577 -22,FeTS2022_00575 -22,FeTS2022_00581 -22,FeTS2022_00591 -22,FeTS2022_00569 -22,FeTS2022_00587 -22,FeTS2022_00580 -22,FeTS2022_00583 -22,FeTS2022_00578 -22,FeTS2022_00568 -23,FeTS2022_01423 -23,FeTS2022_01420 -23,FeTS2022_01422 -23,FeTS2022_01417 -23,FeTS2022_01421 -23,FeTS2022_01424 -23,FeTS2022_01418 -23,FeTS2022_01425 -23,FeTS2022_01419 -24,FeTS2022_01628 -24,FeTS2022_01615 -24,FeTS2022_01035 -24,FeTS2022_00732 -24,FeTS2022_00753 -24,FeTS2022_01620 -24,FeTS2022_01637 -24,FeTS2022_01594 -24,FeTS2022_00530 -24,FeTS2022_00772 -24,FeTS2022_01580 -24,FeTS2022_00731 -24,FeTS2022_00540 -24,FeTS2022_00464 -24,FeTS2022_01622 -24,FeTS2022_01154 -24,FeTS2022_01559 -24,FeTS2022_00729 -24,FeTS2022_00708 -24,FeTS2022_00044 -24,FeTS2022_00705 -24,FeTS2022_00645 -24,FeTS2022_01640 -24,FeTS2022_00008 -24,FeTS2022_00746 -24,FeTS2022_01551 -24,FeTS2022_01610 -24,FeTS2022_00061 -24,FeTS2022_00642 -24,FeTS2022_00675 -24,FeTS2022_01651 -24,FeTS2022_00651 -24,FeTS2022_00626 -24,FeTS2022_00028 -24,FeTS2022_01557 -24,FeTS2022_01616 -24,FeTS2022_00684 -24,FeTS2022_01538 -24,FeTS2022_01647 -24,FeTS2022_00688 -24,FeTS2022_00737 -24,FeTS2022_00063 -24,FeTS2022_00758 -24,FeTS2022_01159 -24,FeTS2022_00615 -24,FeTS2022_00621 -24,FeTS2022_01543 -24,FeTS2022_01560 -24,FeTS2022_00058 -24,FeTS2022_00009 -24,FeTS2022_00544 -24,FeTS2022_01611 -24,FeTS2022_00485 -24,FeTS2022_00735 -24,FeTS2022_00659 -24,FeTS2022_00025 -24,FeTS2022_00550 -24,FeTS2022_01599 -24,FeTS2022_00636 -24,FeTS2022_01644 -24,FeTS2022_00716 -24,FeTS2022_00641 -24,FeTS2022_01624 -24,FeTS2022_00547 -24,FeTS2022_00046 -24,FeTS2022_00728 -24,FeTS2022_00045 -24,FeTS2022_00493 -24,FeTS2022_00089 -24,FeTS2022_00622 -24,FeTS2022_01643 -24,FeTS2022_00602 -24,FeTS2022_00035 -24,FeTS2022_01545 -24,FeTS2022_00014 -24,FeTS2022_01566 -24,FeTS2022_00066 -24,FeTS2022_01614 -24,FeTS2022_01591 -24,FeTS2022_00514 -24,FeTS2022_01588 -24,FeTS2022_00520 -24,FeTS2022_01556 -24,FeTS2022_00097 -24,FeTS2022_00555 -24,FeTS2022_00736 -24,FeTS2022_00639 -24,FeTS2022_00479 -24,FeTS2022_01550 -24,FeTS2022_01592 -24,FeTS2022_01626 -24,FeTS2022_00557 -24,FeTS2022_00496 -24,FeTS2022_00778 -24,FeTS2022_01561 -24,FeTS2022_00690 -24,FeTS2022_00750 -24,FeTS2022_01586 -24,FeTS2022_01549 -24,FeTS2022_01555 -24,FeTS2022_01612 -24,FeTS2022_01600 -24,FeTS2022_01629 -24,FeTS2022_01656 -24,FeTS2022_00500 -24,FeTS2022_00529 -24,FeTS2022_00628 -24,FeTS2022_00775 -24,FeTS2022_00523 -24,FeTS2022_00488 -24,FeTS2022_00518 -24,FeTS2022_00000 -24,FeTS2022_00020 -24,FeTS2022_01646 -24,FeTS2022_01638 -24,FeTS2022_00630 -24,FeTS2022_01590 -24,FeTS2022_01613 -24,FeTS2022_01571 -24,FeTS2022_00519 -24,FeTS2022_01617 -24,FeTS2022_01623 -24,FeTS2022_00691 -24,FeTS2022_01027 -24,FeTS2022_00704 -24,FeTS2022_00098 -24,FeTS2022_01558 -25,FeTS2022_00715 -25,FeTS2022_00757 -25,FeTS2022_00084 -25,FeTS2022_00692 -25,FeTS2022_00078 -25,FeTS2022_00747 -25,FeTS2022_01607 -25,FeTS2022_00751 -25,FeTS2022_00011 -25,FeTS2022_00610 -25,FeTS2022_00694 -25,FeTS2022_00026 -25,FeTS2022_00658 -25,FeTS2022_01544 -25,FeTS2022_01583 -25,FeTS2022_00680 -25,FeTS2022_01028 -25,FeTS2022_01636 -25,FeTS2022_00545 -25,FeTS2022_00072 -25,FeTS2022_00016 -25,FeTS2022_01548 -25,FeTS2022_00624 -25,FeTS2022_00676 -25,FeTS2022_00533 -25,FeTS2022_01574 -25,FeTS2022_01582 -25,FeTS2022_00085 -25,FeTS2022_00613 -25,FeTS2022_01593 -25,FeTS2022_00730 -25,FeTS2022_01585 -25,FeTS2022_00524 -25,FeTS2022_00081 -25,FeTS2022_00472 -25,FeTS2022_00478 -25,FeTS2022_00469 -25,FeTS2022_00682 -25,FeTS2022_00733 -25,FeTS2022_00723 -25,FeTS2022_00099 -25,FeTS2022_00744 -25,FeTS2022_00048 -25,FeTS2022_00480 -25,FeTS2022_00650 -25,FeTS2022_00601 -25,FeTS2022_00542 -25,FeTS2022_00667 -25,FeTS2022_00505 -25,FeTS2022_01539 -25,FeTS2022_00764 -25,FeTS2022_00506 -25,FeTS2022_01649 -25,FeTS2022_00032 -25,FeTS2022_00021 -25,FeTS2022_00685 -25,FeTS2022_00611 -25,FeTS2022_00511 -25,FeTS2022_01584 -25,FeTS2022_01635 -25,FeTS2022_00607 -25,FeTS2022_00071 -25,FeTS2022_00687 -25,FeTS2022_00767 -25,FeTS2022_00537 -25,FeTS2022_01630 -25,FeTS2022_00740 -25,FeTS2022_00525 -25,FeTS2022_00725 -25,FeTS2022_00502 -25,FeTS2022_01562 -25,FeTS2022_01577 -25,FeTS2022_01576 -25,FeTS2022_01595 -25,FeTS2022_00654 -25,FeTS2022_00090 -25,FeTS2022_01645 -25,FeTS2022_01564 -25,FeTS2022_01567 -25,FeTS2022_00703 -25,FeTS2022_00043 -25,FeTS2022_00003 -25,FeTS2022_00495 -25,FeTS2022_00017 -25,FeTS2022_00491 -25,FeTS2022_00054 -25,FeTS2022_00510 -25,FeTS2022_00618 -25,FeTS2022_00064 -25,FeTS2022_00024 -25,FeTS2022_00709 -25,FeTS2022_01653 -25,FeTS2022_01579 -25,FeTS2022_01572 -25,FeTS2022_01156 -25,FeTS2022_00707 -25,FeTS2022_01540 -25,FeTS2022_00056 -25,FeTS2022_00620 -25,FeTS2022_00470 -25,FeTS2022_00499 -25,FeTS2022_00640 -25,FeTS2022_00549 -25,FeTS2022_01601 -25,FeTS2022_00608 -25,FeTS2022_00727 -25,FeTS2022_00773 -25,FeTS2022_00504 -25,FeTS2022_01604 -25,FeTS2022_01158 -25,FeTS2022_00051 -25,FeTS2022_00768 -25,FeTS2022_01161 -25,FeTS2022_00765 -25,FeTS2022_00068 -25,FeTS2022_00551 -25,FeTS2022_01605 -25,FeTS2022_00674 -25,FeTS2022_01157 -25,FeTS2022_01631 -25,FeTS2022_00022 -25,FeTS2022_00777 -25,FeTS2022_01609 -25,FeTS2022_01633 -25,FeTS2022_01652 -25,FeTS2022_00759 -25,FeTS2022_01655 -26,FeTS2022_01639 -26,FeTS2022_01563 -26,FeTS2022_00661 -26,FeTS2022_00087 -26,FeTS2022_00030 -26,FeTS2022_00556 -26,FeTS2022_01597 -26,FeTS2022_00724 -26,FeTS2022_00096 -26,FeTS2022_00049 -26,FeTS2022_00683 -26,FeTS2022_00059 -26,FeTS2022_01596 -26,FeTS2022_00498 -26,FeTS2022_00543 -26,FeTS2022_01641 -26,FeTS2022_01542 -26,FeTS2022_00062 -26,FeTS2022_00005 -26,FeTS2022_00646 -26,FeTS2022_00088 -26,FeTS2022_00656 -26,FeTS2022_01589 -26,FeTS2022_01160 -26,FeTS2022_01547 -26,FeTS2022_01606 -26,FeTS2022_00631 -26,FeTS2022_00756 -26,FeTS2022_00619 -26,FeTS2022_00698 -26,FeTS2022_01541 -26,FeTS2022_00539 -26,FeTS2022_00053 -26,FeTS2022_01618 -26,FeTS2022_00693 -26,FeTS2022_00616 -26,FeTS2022_01642 -26,FeTS2022_01632 -26,FeTS2022_00718 -26,FeTS2022_00006 -26,FeTS2022_00466 -26,FeTS2022_01565 -26,FeTS2022_01621 -26,FeTS2022_00697 -26,FeTS2022_00689 -26,FeTS2022_00554 -26,FeTS2022_00638 -26,FeTS2022_00517 -26,FeTS2022_00019 -26,FeTS2022_01650 -26,FeTS2022_01602 -26,FeTS2022_01570 -26,FeTS2022_00655 -26,FeTS2022_00552 -26,FeTS2022_00706 -26,FeTS2022_01654 -26,FeTS2022_00481 -26,FeTS2022_00604 -26,FeTS2022_00612 -26,FeTS2022_00774 -26,FeTS2022_00625 -26,FeTS2022_00070 -26,FeTS2022_00649 -26,FeTS2022_00036 -26,FeTS2022_01546 -26,FeTS2022_00559 -26,FeTS2022_00018 -26,FeTS2022_00507 -26,FeTS2022_00760 -26,FeTS2022_01568 -26,FeTS2022_00094 -26,FeTS2022_00526 -26,FeTS2022_01575 -26,FeTS2022_00512 -26,FeTS2022_00033 -26,FeTS2022_01648 -26,FeTS2022_00052 -26,FeTS2022_01625 -26,FeTS2022_01573 -26,FeTS2022_00623 -26,FeTS2022_01153 -26,FeTS2022_00532 -26,FeTS2022_00516 -26,FeTS2022_00679 -26,FeTS2022_00468 -26,FeTS2022_00494 -26,FeTS2022_00483 -26,FeTS2022_01552 -26,FeTS2022_00606 -26,FeTS2022_00742 -26,FeTS2022_00677 -26,FeTS2022_00652 -26,FeTS2022_00074 -26,FeTS2022_00513 -26,FeTS2022_01581 -26,FeTS2022_00663 -26,FeTS2022_00734 -26,FeTS2022_01619 -26,FeTS2022_00668 -26,FeTS2022_00558 -26,FeTS2022_00002 -26,FeTS2022_01598 -26,FeTS2022_00477 -26,FeTS2022_01634 -26,FeTS2022_00501 -26,FeTS2022_01155 -26,FeTS2022_00077 -26,FeTS2022_01578 -26,FeTS2022_01569 -26,FeTS2022_01603 -26,FeTS2022_00538 -26,FeTS2022_00714 -26,FeTS2022_00031 -26,FeTS2022_01627 -26,FeTS2022_01553 -26,FeTS2022_00548 -26,FeTS2022_00739 -26,FeTS2022_00103 -26,FeTS2022_00528 -26,FeTS2022_01608 -26,FeTS2022_00095 -26,FeTS2022_00060 -26,FeTS2022_01554 -26,FeTS2022_00657 -26,FeTS2022_01587 -26,FeTS2022_00605 -26,FeTS2022_00686 -26,FeTS2022_00012 -27,FeTS2022_01166 -27,FeTS2022_01163 -27,FeTS2022_01165 -27,FeTS2022_01164 -28,FeTS2022_00444 -28,FeTS2022_01014 -28,FeTS2022_00442 -28,FeTS2022_01025 -28,FeTS2022_01024 -28,FeTS2022_00101 -28,FeTS2022_00453 -28,FeTS2022_01013 -28,FeTS2022_01011 -28,FeTS2022_00459 -28,FeTS2022_00457 -28,FeTS2022_01016 -28,FeTS2022_00448 -28,FeTS2022_01023 -28,FeTS2022_01017 -28,FeTS2022_00443 -28,FeTS2022_00455 -28,FeTS2022_00127 -28,FeTS2022_01012 -28,FeTS2022_01018 -28,FeTS2022_01022 -28,FeTS2022_00451 -28,FeTS2022_00445 -28,FeTS2022_00452 -28,FeTS2022_00454 -28,FeTS2022_01019 -28,FeTS2022_01021 -28,FeTS2022_01020 -28,FeTS2022_01026 -28,FeTS2022_00456 -28,FeTS2022_00446 -28,FeTS2022_00449 -28,FeTS2022_01015 -29,FeTS2022_00802 -29,FeTS2022_00788 -29,FeTS2022_00795 -29,FeTS2022_00820 -29,FeTS2022_00782 -29,FeTS2022_00800 -29,FeTS2022_00830 -29,FeTS2022_00824 -29,FeTS2022_00805 -29,FeTS2022_00796 -29,FeTS2022_00823 -29,FeTS2022_00828 -30,FeTS2022_00811 -30,FeTS2022_00789 -30,FeTS2022_00801 -30,FeTS2022_00780 -30,FeTS2022_00781 -30,FeTS2022_00814 -30,FeTS2022_00806 -30,FeTS2022_00810 -30,FeTS2022_00807 -30,FeTS2022_00818 -30,FeTS2022_00791 -31,FeTS2022_00787 -31,FeTS2022_00808 -31,FeTS2022_00809 -31,FeTS2022_00803 -31,FeTS2022_00816 -31,FeTS2022_00819 -31,FeTS2022_00793 -31,FeTS2022_00799 -31,FeTS2022_00797 -31,FeTS2022_00784 -31,FeTS2022_00804 -31,FeTS2022_00792 -32,FeTS2022_01031 -32,FeTS2022_01033 -32,FeTS2022_01030 -32,FeTS2022_00118 -32,FeTS2022_01029 -32,FeTS2022_00126 -32,FeTS2022_01032 -33,FeTS2022_01147 -33,FeTS2022_01149 -33,FeTS2022_01150 -33,FeTS2022_01148 -33,FeTS2022_01162 diff --git a/Task_1/openfl-workspace/fets_challenge_workspace/plan/cols.yaml b/Task_1/openfl-workspace/fets_challenge_workspace/plan/cols.yaml deleted file mode 100644 index ebd5bec..0000000 --- a/Task_1/openfl-workspace/fets_challenge_workspace/plan/cols.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Provided by the FeTS Initiative (www.fets.ai) as part of the FeTS Challenge 2021 - -collaborators: \ No newline at end of file diff --git a/Task_1/openfl-workspace/fets_challenge_workspace/plan/data.yaml b/Task_1/openfl-workspace/fets_challenge_workspace/plan/data.yaml deleted file mode 100644 index 93c8816..0000000 --- a/Task_1/openfl-workspace/fets_challenge_workspace/plan/data.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Provided by the FeTS Initiative (www.fets.ai) as part of the FeTS Challenge 2021 - -one,1 -two,2 diff --git a/Task_1/openfl-workspace/fets_challenge_workspace/plan/defaults b/Task_1/openfl-workspace/fets_challenge_workspace/plan/defaults deleted file mode 100644 index fb82f9c..0000000 --- a/Task_1/openfl-workspace/fets_challenge_workspace/plan/defaults +++ /dev/null @@ -1,2 +0,0 @@ -../../workspace/plan/defaults - diff --git a/Task_1/openfl-workspace/fets_challenge_workspace/plan/plan.yaml b/Task_1/openfl-workspace/fets_challenge_workspace/plan/plan.yaml deleted file mode 100644 index 2e35cee..0000000 --- a/Task_1/openfl-workspace/fets_challenge_workspace/plan/plan.yaml +++ /dev/null @@ -1,142 +0,0 @@ -# Copyright (C) 2022 Intel Corporation -# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. - -aggregator : - defaults : plan/defaults/aggregator.yaml - template : openfl.component.Aggregator - settings : - init_state_path : save/fets_seg_test_init.pbuf - best_state_path : save/fets_seg_test_best.pbuf - last_state_path : save/fets_seg_test_last.pbuf - rounds_to_train : 3 - write_logs : true - - -collaborator : - defaults : plan/defaults/collaborator.yaml - template : openfl.component.Collaborator - settings : - delta_updates : false - opt_treatment : RESET - -data_loader : - defaults : plan/defaults/data_loader.yaml - template : openfl.federated.data.loader_gandlf.GaNDLFDataLoaderWrapper - settings : - feature_shape : [32, 32, 32] - data_path : /home/ad_kagrawa2/.local/workspace - -task_runner : - template : src.fets_challenge_model.FeTSChallengeModel - settings : - train_csv : seg_test_train.csv - val_csv : seg_test_val.csv - device : cpu - gandlf_config : - batch_size: 1 - clip_grad: null - clip_mode: null - data_augmentation: {} - data_postprocessing: {} - data_preprocessing: - normalize: null - enable_padding: false - in_memory: false - inference_mechanism : - grid_aggregator_overlap: crop - patch_overlap: 0 - learning_rate: 0.001 - loss_function: dc - medcam_enabled: false - output_dir: '.' - metrics: - - dice - - dice_per_label - - hd95_per_label - model: - amp: true - architecture: resunet - base_filters: 32 - class_list: - - 0 - - 1 - - 2 - - 4 - dimension: 3 - final_layer: softmax - ignore_label_validation: null - norm_type: instance - nested_training: - testing: 1 - validation: -5 - num_epochs: 1 - optimizer: - type: sgd - parallel_compute_command: '' - patch_sampler: label - patch_size: - - 64 - - 64 - - 64 - patience: 100 - pin_memory_dataloader: false - print_rgb_label_warning: true - q_max_length: 100 - q_num_workers: 0 - q_samples_per_volume: 40 - q_verbose: false - save_output: false - save_training: false - scaling_factor: 1 - scheduler: - type: triangle_modified - track_memory_usage: false - verbose: false - version: - maximum: 0.1.0 - minimum: 0.0.14 - weighted_loss: true - modality: rad - - -network : - defaults : plan/defaults/network.yaml - -assigner: - template : src.challenge_assigner.FeTSChallengeAssigner - settings : - training_tasks : - - aggregated_model_validation - - train - - locally_tuned_model_validation - validation_tasks : - - aggregated_model_validation - -tasks : - aggregated_model_validation: - function : validate - kwargs : - apply : global - metrics : - - valid_loss - - valid_dice - - locally_tuned_model_validation: - function : validate - kwargs : - apply: local - metrics : - - valid_loss - - valid_dice - - train: - function : train - kwargs : - metrics : - - loss - - train_dice - epochs : 1 - - -compression_pipeline : - defaults : plan/defaults/compression_pipeline.yaml diff --git a/Task_1/openfl-workspace/fets_challenge_workspace/requirements.txt b/Task_1/openfl-workspace/fets_challenge_workspace/requirements.txt deleted file mode 100644 index 9a7d57c..0000000 --- a/Task_1/openfl-workspace/fets_challenge_workspace/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -torchvision -torch diff --git a/Task_1/openfl-workspace/fets_challenge_workspace/small_split.csv b/Task_1/openfl-workspace/fets_challenge_workspace/small_split.csv deleted file mode 100644 index 50baaca..0000000 --- a/Task_1/openfl-workspace/fets_challenge_workspace/small_split.csv +++ /dev/null @@ -1,11 +0,0 @@ -Partition_ID,Subject_ID -1,FeTS2022_01341 -1,FeTS2022_01333 -1,FeTS2022_01077 -1,FeTS2022_01324 -2,FeTS2022_01412 -2,FeTS2022_01415 -2,FeTS2022_01411 -3,FeTS2022_01439 -3,FeTS2022_01435 -3,FeTS2022_01434 diff --git a/Task_1/openfl-workspace/fets_challenge_workspace/src/__init__.py b/Task_1/openfl-workspace/fets_challenge_workspace/src/__init__.py deleted file mode 100644 index 1c5a549..0000000 --- a/Task_1/openfl-workspace/fets_challenge_workspace/src/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -# Provided by the FeTS Initiative (www.fets.ai) as part of the FeTS Challenge 2021 - -# Contributing Authors (alphabetical): -# Patrick Foley (Intel) -# Micah Sheller (Intel) - -TRAINING_HPARAMS = [ - 'epochs_per_round', - 'learning_rate', -] diff --git a/Task_1/openfl-workspace/fets_challenge_workspace/src/challenge_assigner.py b/Task_1/openfl-workspace/fets_challenge_workspace/src/challenge_assigner.py deleted file mode 100644 index 46e847f..0000000 --- a/Task_1/openfl-workspace/fets_challenge_workspace/src/challenge_assigner.py +++ /dev/null @@ -1,40 +0,0 @@ -# Provided by the FeTS Initiative (www.fets.ai) as part of the FeTS Challenge 2022 - -# Contributing Authors (alphabetical): -# Micah Sheller (Intel) - -class FeTSChallengeAssigner: - def __init__(self, tasks, authorized_cols, training_tasks, validation_tasks, **kwargs): - """Initialize.""" - self.training_collaborators = [] - self.tasks = tasks - self.training_tasks = training_tasks - self.validation_tasks = validation_tasks - self.collaborators = authorized_cols - - def set_training_collaborators(self, training_collaborators): - self.training_collaborators = training_collaborators - - - def get_tasks_for_collaborator(self, collaborator_name, round_number): - """Get tasks for the collaborator specified.""" - if collaborator_name in self.training_collaborators: - return self.training_tasks - else: - return self.validation_tasks - - def get_collaborators_for_task(self, task_name, round_number): - """Get collaborators for the task specified.""" - if task_name in self.validation_tasks: - return self.collaborators - else: - return self.training_collaborators - - def get_all_tasks_for_round(self, round_number): - return self.training_tasks - - def get_aggregation_type_for_task(self, task_name): - """Extract aggregation type from self.tasks.""" - if 'aggregation_type' not in self.tasks[task_name]: - return None - return self.tasks[task_name]['aggregation_type'] diff --git a/Task_1/openfl-workspace/fets_challenge_workspace/src/fets_challenge_model.py b/Task_1/openfl-workspace/fets_challenge_workspace/src/fets_challenge_model.py deleted file mode 100644 index 6af8c6e..0000000 --- a/Task_1/openfl-workspace/fets_challenge_workspace/src/fets_challenge_model.py +++ /dev/null @@ -1,218 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""GaNDLFTaskRunner module.""" - -from copy import deepcopy - -import numpy as np -import torch as pt - -from openfl.utilities.split import split_tensor_dict_for_holdouts -from openfl.utilities import TensorKey - -from openfl.federated.task.runner_gandlf import * - -from GANDLF.compute.generic import create_pytorch_objects -from GANDLF.compute.training_loop import train_network -from GANDLF.compute.forward_pass import validate_network - -from . import TRAINING_HPARAMS - -class FeTSChallengeModel(GaNDLFTaskRunner): - """FeTSChallenge Model class for Federated Learning.""" - - def validate(self, col_name, round_num, input_tensor_dict, - use_tqdm=False, **kwargs): - """Validate. - Run validation of the model on the local data. - Args: - col_name: Name of the collaborator - round_num: What round is it - input_tensor_dict: Required input tensors (for model) - use_tqdm (bool): Use tqdm to print a progress bar (Default=True) - kwargs: Key word arguments passed to GaNDLF main_run - Returns: - global_output_dict: Tensors to send back to the aggregator - local_output_dict: Tensors to maintain in the local TensorDB - """ - self.rebuild_model(round_num, input_tensor_dict, validation=True) - self.model.eval() - # self.model.to(self.device) - - epoch_valid_loss, epoch_valid_metric = validate_network(self.model, - self.data_loader.val_dataloader, - self.scheduler, - self.params, - round_num, - mode="validation") - - self.logger.info(epoch_valid_loss) - self.logger.info(epoch_valid_metric) - - origin = col_name - suffix = 'validate' - if kwargs['apply'] == 'local': - suffix += '_local' - else: - suffix += '_agg' - tags = ('metric', suffix) - - output_tensor_dict = {} - output_tensor_dict[TensorKey('valid_loss', origin, round_num, True, tags)] = np.array(epoch_valid_loss) - for k, v in epoch_valid_metric.items(): - print(f"Testing ->>>> Metric Key {k} Value {v}") - if isinstance(v, str): - v = list(map(float, v.split('_'))) - - if np.array(v).size == 1: - output_tensor_dict[TensorKey(f'valid_{k}', origin, round_num, True, tags)] = np.array(v) - else: - for idx,label in enumerate([0,1,2,4]): - output_tensor_dict[TensorKey(f'valid_{k}_{label}', origin, round_num, True, tags)] = np.array(v[idx]) - - return output_tensor_dict, {} - - def inference(self, col_name, round_num, input_tensor_dict, - use_tqdm=False, **kwargs): - """Inference. - Run inference of the model on the local data (used for final validation) - Args: - col_name: Name of the collaborator - round_num: What round is it - input_tensor_dict: Required input tensors (for model) - use_tqdm (bool): Use tqdm to print a progress bar (Default=True) - kwargs: Key word arguments passed to GaNDLF main_run - Returns: - global_output_dict: Tensors to send back to the aggregator - local_output_dict: Tensors to maintain in the local TensorDB - """ - self.rebuild_model(round_num, input_tensor_dict, validation=True) - self.model.eval() - # self.model.to(self.device) - - epoch_valid_loss, epoch_valid_metric = validate_network(self.model, - self.data_loader.val_dataloader, - self.scheduler, - self.params, - round_num, - mode="inference") - - origin = col_name - suffix = 'validate' - if kwargs['apply'] == 'local': - suffix += '_local' - else: - suffix += '_agg' - tags = ('metric', suffix) - - output_tensor_dict = {} - output_tensor_dict[TensorKey('valid_loss', origin, round_num, True, tags)] = np.array(epoch_valid_loss) - for k, v in epoch_valid_metric.items(): - print(f"Testing ->>>> Metric Key {k} Value {v}") - if isinstance(v, str): - v = list(map(float, v.split('_'))) - if np.array(v).size == 1: - output_tensor_dict[TensorKey(f'valid_{k}', origin, round_num, True, tags)] = np.array(v) - else: - for idx,label in enumerate([0,1,2,4]): - output_tensor_dict[TensorKey(f'valid_{k}_{label}', origin, round_num, True, tags)] = np.array(v[idx]) - - return output_tensor_dict, {} - - - def train(self, col_name, round_num, input_tensor_dict, use_tqdm=False, epochs=1, **kwargs): - """Train batches. - Train the model on the requested number of batches. - Args: - col_name : Name of the collaborator - round_num : What round is it - input_tensor_dict : Required input tensors (for model) - use_tqdm (bool) : Use tqdm to print a progress bar (Default=True) - epochs : The number of epochs to train - crossfold_test : Whether or not to use cross fold trainval/test - to evaluate the quality of the model under fine tuning - (this uses a separate prameter to pass in the data and - config used) - crossfold_test_data_csv : Data csv used to define data used in crossfold test. - This csv does not itself define the folds, just - defines the total data to be used. - crossfold_val_n : number of folds to use for the train,val level of the nested crossfold. - corssfold_test_n : number of folds to use for the trainval,test level of the nested crossfold. - kwargs : Key word arguments passed to GaNDLF main_run - Returns: - global_output_dict : Tensors to send back to the aggregator - local_output_dict : Tensors to maintain in the local TensorDB - """ - - # handle the hparams - epochs_per_round = int(input_tensor_dict.pop('epochs_per_round')) - learning_rate = float(input_tensor_dict.pop('learning_rate')) - - self.rebuild_model(round_num, input_tensor_dict) - # set to "training" mode - self.model.train() - - # Set the learning rate - for group in self.optimizer.param_groups: - group['lr'] = learning_rate - - for epoch in range(epochs_per_round): - self.logger.info(f'Run {epoch} epoch of {round_num} round') - # FIXME: do we want to capture these in an array rather than simply taking the last value? - epoch_train_loss, epoch_train_metric = train_network(self.model, - self.data_loader.train_dataloader, - self.optimizer, - self.params) - - # output model tensors (Doesn't include TensorKey) - tensor_dict = self.get_tensor_dict(with_opt_vars=True) - - metric_dict = {'loss': epoch_train_loss} - for k, v in epoch_train_metric.items(): - print(f"Testing ->>>> Metric Key {k} Value {v}") - if isinstance(v, str): - v = list(map(float, v.split('_'))) - if np.array(v).size == 1: - metric_dict[f'train_{k}'] = np.array(v) - else: - for idx,label in enumerate([0,1,2,4]): - metric_dict[f'train_{k}_{label}'] = np.array(v[idx]) - - - # Return global_tensor_dict, local_tensor_dict - # is this even pt-specific really? - global_tensor_dict, local_tensor_dict = create_tensorkey_dicts(tensor_dict, - metric_dict, - col_name, - round_num, - self.logger, - self.tensor_dict_split_fn_kwargs) - - # Update the required tensors if they need to be pulled from the - # aggregator - # TODO this logic can break if different collaborators have different - # roles between rounds. - # For example, if a collaborator only performs validation in the first - # round but training in the second, it has no way of knowing the - # optimizer state tensor names to request from the aggregator because - # these are only created after training occurs. A work around could - # involve doing a single epoch of training on random data to get the - # optimizer names, and then throwing away the model. - if self.opt_treatment == 'CONTINUE_GLOBAL': - self.initialize_tensorkeys_for_functions(with_opt_vars=True) - - # This will signal that the optimizer values are now present, - # and can be loaded when the model is rebuilt - self.train_round_completed = True - - # Return global_tensor_dict, local_tensor_dict - return global_tensor_dict, local_tensor_dict - - def get_required_tensorkeys_for_function(self, func_name, **kwargs): - required = super().get_required_tensorkeys_for_function(func_name, **kwargs) - if func_name == 'train': - round_number = required[0].round_number - for hparam in TRAINING_HPARAMS: - required.append(TensorKey(tensor_name=hparam, origin='GLOBAL', round_number=round_number, report=False, tags=('hparam', 'model'))) - return required diff --git a/Task_1/openfl-workspace/fets_challenge_workspace/validation.csv b/Task_1/openfl-workspace/fets_challenge_workspace/validation.csv deleted file mode 100644 index d0d78f1..0000000 --- a/Task_1/openfl-workspace/fets_challenge_workspace/validation.csv +++ /dev/null @@ -1,220 +0,0 @@ -Partition_ID,Subject_ID --1,FeTS2022_01718 --1,FeTS2022_01719 --1,FeTS2022_00573 --1,FeTS2022_00592 --1,FeTS2022_00585 --1,FeTS2022_00595 --1,FeTS2022_01678 --1,FeTS2022_01679 --1,FeTS2022_01680 --1,FeTS2022_01681 --1,FeTS2022_01682 --1,FeTS2022_01683 --1,FeTS2022_01684 --1,FeTS2022_01685 --1,FeTS2022_01686 --1,FeTS2022_00833 --1,FeTS2022_01687 --1,FeTS2022_00384 --1,FeTS2022_01688 --1,FeTS2022_01689 --1,FeTS2022_01690 --1,FeTS2022_01691 --1,FeTS2022_00434 --1,FeTS2022_00333 --1,FeTS2022_00337 --1,FeTS2022_01692 --1,FeTS2022_00229 --1,FeTS2022_01667 --1,FeTS2022_01668 --1,FeTS2022_01669 --1,FeTS2022_01670 --1,FeTS2022_01671 --1,FeTS2022_01672 --1,FeTS2022_01673 --1,FeTS2022_01674 --1,FeTS2022_01675 --1,FeTS2022_01676 --1,FeTS2022_01677 --1,FeTS2022_01720 --1,FeTS2022_01724 --1,FeTS2022_00145 --1,FeTS2022_01693 --1,FeTS2022_01006 --1,FeTS2022_01727 --1,FeTS2022_01694 --1,FeTS2022_01729 --1,FeTS2022_01695 --1,FeTS2022_00997 --1,FeTS2022_01696 --1,FeTS2022_01732 --1,FeTS2022_01697 --1,FeTS2022_01698 --1,FeTS2022_01699 --1,FeTS2022_01700 --1,FeTS2022_01701 --1,FeTS2022_01702 --1,FeTS2022_01703 --1,FeTS2022_00135 --1,FeTS2022_01704 --1,FeTS2022_01705 --1,FeTS2022_01706 --1,FeTS2022_01707 --1,FeTS2022_01708 --1,FeTS2022_01709 --1,FeTS2022_01710 --1,FeTS2022_01711 --1,FeTS2022_01712 --1,FeTS2022_01713 --1,FeTS2022_00129 --1,FeTS2022_00125 --1,FeTS2022_01714 --1,FeTS2022_00119 --1,FeTS2022_01736 --1,FeTS2022_00114 --1,FeTS2022_01738 --1,FeTS2022_00141 --1,FeTS2022_01715 --1,FeTS2022_01716 --1,FeTS2022_01717 --1,FeTS2022_01721 --1,FeTS2022_01756 --1,FeTS2022_01757 --1,FeTS2022_01758 --1,FeTS2022_01759 --1,FeTS2022_01760 --1,FeTS2022_01761 --1,FeTS2022_01762 --1,FeTS2022_01763 --1,FeTS2022_01764 --1,FeTS2022_01765 --1,FeTS2022_01766 --1,FeTS2022_01767 --1,FeTS2022_01768 --1,FeTS2022_01770 --1,FeTS2022_01769 --1,FeTS2022_01772 --1,FeTS2022_01771 --1,FeTS2022_00001 --1,FeTS2022_00013 --1,FeTS2022_00015 --1,FeTS2022_00027 --1,FeTS2022_00037 --1,FeTS2022_00047 --1,FeTS2022_00079 --1,FeTS2022_00080 --1,FeTS2022_00082 --1,FeTS2022_00091 --1,FeTS2022_01722 --1,FeTS2022_01723 --1,FeTS2022_01741 --1,FeTS2022_01725 --1,FeTS2022_01726 --1,FeTS2022_01728 --1,FeTS2022_01742 --1,FeTS2022_01743 --1,FeTS2022_01730 --1,FeTS2022_01744 --1,FeTS2022_01731 --1,FeTS2022_01733 --1,FeTS2022_01734 --1,FeTS2022_01735 --1,FeTS2022_01747 --1,FeTS2022_01737 --1,FeTS2022_01739 --1,FeTS2022_01740 --1,FeTS2022_01745 --1,FeTS2022_01746 --1,FeTS2022_01748 --1,FeTS2022_00153 --1,FeTS2022_01749 --1,FeTS2022_01750 --1,FeTS2022_01751 --1,FeTS2022_01752 --1,FeTS2022_01753 --1,FeTS2022_01754 --1,FeTS2022_01755 --1,FeTS2022_01773 --1,FeTS2022_01774 --1,FeTS2022_01775 --1,FeTS2022_01776 --1,FeTS2022_00161 --1,FeTS2022_00163 --1,FeTS2022_00174 --1,FeTS2022_00181 --1,FeTS2022_00182 --1,FeTS2022_00190 --1,FeTS2022_00200 --1,FeTS2022_00208 --1,FeTS2022_00213 --1,FeTS2022_00252 --1,FeTS2022_00256 --1,FeTS2022_00264 --1,FeTS2022_00287 --1,FeTS2022_00307 --1,FeTS2022_00323 --1,FeTS2022_00335 --1,FeTS2022_00355 --1,FeTS2022_00372 --1,FeTS2022_00381 --1,FeTS2022_00422 --1,FeTS2022_00428 --1,FeTS2022_00438 --1,FeTS2022_00447 --1,FeTS2022_00450 --1,FeTS2022_00458 --1,FeTS2022_00460 --1,FeTS2022_00462 --1,FeTS2022_00463 --1,FeTS2022_00467 --1,FeTS2022_00474 --1,FeTS2022_00489 --1,FeTS2022_00492 --1,FeTS2022_00503 --1,FeTS2022_00521 --1,FeTS2022_00535 --1,FeTS2022_00553 --1,FeTS2022_00560 --1,FeTS2022_00603 --1,FeTS2022_01777 --1,FeTS2022_01778 --1,FeTS2022_01779 --1,FeTS2022_01780 --1,FeTS2022_01781 --1,FeTS2022_01782 --1,FeTS2022_01783 --1,FeTS2022_01784 --1,FeTS2022_01785 --1,FeTS2022_01786 --1,FeTS2022_01787 --1,FeTS2022_01788 --1,FeTS2022_01789 --1,FeTS2022_01790 --1,FeTS2022_00644 --1,FeTS2022_00647 --1,FeTS2022_00662 --1,FeTS2022_00671 --1,FeTS2022_00681 --1,FeTS2022_00699 --1,FeTS2022_00702 --1,FeTS2022_00712 --1,FeTS2022_00719 --1,FeTS2022_00721 --1,FeTS2022_00749 --1,FeTS2022_00762 --1,FeTS2022_00769 --1,FeTS2022_00779 --1,FeTS2022_00821 --1,FeTS2022_00822 --1,FeTS2022_00825 --1,FeTS2022_00826 --1,FeTS2022_00829 --1,FeTS2022_01791 --1,FeTS2022_01792 --1,FeTS2022_01793 --1,FeTS2022_01794 --1,FeTS2022_01795 --1,FeTS2022_01796 --1,FeTS2022_01797 --1,FeTS2022_01798 diff --git a/Task_1/setup.py b/Task_1/setup.py index 2ea1e59..98ac63f 100644 --- a/Task_1/setup.py +++ b/Task_1/setup.py @@ -24,7 +24,6 @@ url='https://github.com/FETS-AI/Challenge', packages=[ 'fets_challenge', - 'openfl-workspace', ], include_package_data=True, install_requires=[ From e5b5180bbc652449dd942e286b85d05487d82eaa Mon Sep 17 00:00:00 2001 From: "Agrawal, Kush" Date: Thu, 20 Mar 2025 23:15:08 -0700 Subject: [PATCH 10/16] Updated folder Signed-off-by: Agrawal, Kush --- Task_1/fets_challenge/fets_flow.py | 10 +- Task_1/partitioning_data/partitioning_1.csv | 1252 +++++++++++++++++++ Task_1/partitioning_data/partitioning_2.csv | 1252 +++++++++++++++++++ Task_1/partitioning_data/small_split.csv | 11 + Task_1/partitioning_data/validation.csv | 220 ++++ 5 files changed, 2744 insertions(+), 1 deletion(-) create mode 100644 Task_1/partitioning_data/partitioning_1.csv create mode 100644 Task_1/partitioning_data/partitioning_2.csv create mode 100644 Task_1/partitioning_data/small_split.csv create mode 100644 Task_1/partitioning_data/validation.csv diff --git a/Task_1/fets_challenge/fets_flow.py b/Task_1/fets_challenge/fets_flow.py index 469134b..9392b25 100644 --- a/Task_1/fets_challenge/fets_flow.py +++ b/Task_1/fets_challenge/fets_flow.py @@ -219,7 +219,15 @@ def initialize_colls(self): print(f'Times per collaborator for round {self.current_round}: {self.times_per_collaborator}') if self.restored is False: - self.agg_tensor_dict = self.fets_model.get_tensor_dict() + tensor_dict = self.fets_model.get_tensor_dict() + for key, value in tensor_dict.items(): + origin = 'collaborator' + round_number = self.current_round + report = False + tags = ('trained') + agg_tensor_key = TensorKey(key, origin, round_number, report, tags) + + self.agg_tensor_dict[agg_tensor_key] = value self.next(self.aggregated_model_validation) @collaborator diff --git a/Task_1/partitioning_data/partitioning_1.csv b/Task_1/partitioning_data/partitioning_1.csv new file mode 100644 index 0000000..a5a8fb3 --- /dev/null +++ b/Task_1/partitioning_data/partitioning_1.csv @@ -0,0 +1,1252 @@ +Partition_ID,Subject_ID +1,FeTS2022_01341 +1,FeTS2022_01333 +1,FeTS2022_01077 +1,FeTS2022_01054 +1,FeTS2022_00285 +1,FeTS2022_01308 +1,FeTS2022_01363 +1,FeTS2022_01091 +1,FeTS2022_01273 +1,FeTS2022_01108 +1,FeTS2022_01255 +1,FeTS2022_01301 +1,FeTS2022_00219 +1,FeTS2022_00380 +1,FeTS2022_01349 +1,FeTS2022_00251 +1,FeTS2022_01276 +1,FeTS2022_01407 +1,FeTS2022_01344 +1,FeTS2022_01405 +1,FeTS2022_00218 +1,FeTS2022_01327 +1,FeTS2022_01252 +1,FeTS2022_01132 +1,FeTS2022_01036 +1,FeTS2022_01039 +1,FeTS2022_01366 +1,FeTS2022_00262 +1,FeTS2022_01279 +1,FeTS2022_00839 +1,FeTS2022_01322 +1,FeTS2022_00389 +1,FeTS2022_00390 +1,FeTS2022_00431 +1,FeTS2022_00222 +1,FeTS2022_00373 +1,FeTS2022_00288 +1,FeTS2022_00284 +1,FeTS2022_01088 +1,FeTS2022_00311 +1,FeTS2022_00387 +1,FeTS2022_00258 +1,FeTS2022_01389 +1,FeTS2022_00321 +1,FeTS2022_01249 +1,FeTS2022_01230 +1,FeTS2022_00836 +1,FeTS2022_00348 +1,FeTS2022_01205 +1,FeTS2022_00246 +1,FeTS2022_00314 +1,FeTS2022_01404 +1,FeTS2022_01102 +1,FeTS2022_00379 +1,FeTS2022_01395 +1,FeTS2022_00155 +1,FeTS2022_00170 +1,FeTS2022_01264 +1,FeTS2022_00837 +1,FeTS2022_01372 +1,FeTS2022_00341 +1,FeTS2022_01257 +1,FeTS2022_00329 +1,FeTS2022_00425 +1,FeTS2022_01350 +1,FeTS2022_01247 +1,FeTS2022_01234 +1,FeTS2022_00331 +1,FeTS2022_01128 +1,FeTS2022_01365 +1,FeTS2022_00221 +1,FeTS2022_00298 +1,FeTS2022_00227 +1,FeTS2022_01204 +1,FeTS2022_00204 +1,FeTS2022_01399 +1,FeTS2022_00377 +1,FeTS2022_00343 +1,FeTS2022_00280 +1,FeTS2022_01347 +1,FeTS2022_00210 +1,FeTS2022_01117 +1,FeTS2022_01275 +1,FeTS2022_01034 +1,FeTS2022_00162 +1,FeTS2022_01340 +1,FeTS2022_01212 +1,FeTS2022_01220 +1,FeTS2022_00419 +1,FeTS2022_00340 +1,FeTS2022_00296 +1,FeTS2022_01208 +1,FeTS2022_01064 +1,FeTS2022_00433 +1,FeTS2022_01050 +1,FeTS2022_01278 +1,FeTS2022_00293 +1,FeTS2022_00206 +1,FeTS2022_00356 +1,FeTS2022_00376 +1,FeTS2022_00316 +1,FeTS2022_00403 +1,FeTS2022_01348 +1,FeTS2022_00192 +1,FeTS2022_00313 +1,FeTS2022_01240 +1,FeTS2022_01222 +1,FeTS2022_00344 +1,FeTS2022_00332 +1,FeTS2022_00292 +1,FeTS2022_01392 +1,FeTS2022_00220 +1,FeTS2022_00378 +1,FeTS2022_01130 +1,FeTS2022_01106 +1,FeTS2022_01295 +1,FeTS2022_01409 +1,FeTS2022_01057 +1,FeTS2022_01068 +1,FeTS2022_00320 +1,FeTS2022_00346 +1,FeTS2022_01001 +1,FeTS2022_01207 +1,FeTS2022_01137 +1,FeTS2022_01318 +1,FeTS2022_00289 +1,FeTS2022_00157 +1,FeTS2022_01224 +1,FeTS2022_01367 +1,FeTS2022_01382 +1,FeTS2022_00309 +1,FeTS2022_01008 +1,FeTS2022_01059 +1,FeTS2022_01271 +1,FeTS2022_01110 +1,FeTS2022_01398 +1,FeTS2022_01119 +1,FeTS2022_00209 +1,FeTS2022_01274 +1,FeTS2022_01124 +1,FeTS2022_00241 +1,FeTS2022_00152 +1,FeTS2022_01131 +1,FeTS2022_01353 +1,FeTS2022_00352 +1,FeTS2022_01133 +1,FeTS2022_01084 +1,FeTS2022_00441 +1,FeTS2022_01268 +1,FeTS2022_01210 +1,FeTS2022_01375 +1,FeTS2022_00236 +1,FeTS2022_01218 +1,FeTS2022_01408 +1,FeTS2022_00274 +1,FeTS2022_01118 +1,FeTS2022_01213 +1,FeTS2022_01310 +1,FeTS2022_00194 +1,FeTS2022_00392 +1,FeTS2022_00334 +1,FeTS2022_00270 +1,FeTS2022_01359 +1,FeTS2022_01364 +1,FeTS2022_01336 +1,FeTS2022_01272 +1,FeTS2022_01090 +1,FeTS2022_00412 +1,FeTS2022_00228 +1,FeTS2022_00410 +1,FeTS2022_01239 +1,FeTS2022_01010 +1,FeTS2022_01394 +1,FeTS2022_00282 +1,FeTS2022_00237 +1,FeTS2022_01390 +1,FeTS2022_00382 +1,FeTS2022_00188 +1,FeTS2022_01211 +1,FeTS2022_01376 +1,FeTS2022_01243 +1,FeTS2022_01330 +1,FeTS2022_00253 +1,FeTS2022_01329 +1,FeTS2022_01306 +1,FeTS2022_01081 +1,FeTS2022_01369 +1,FeTS2022_01048 +1,FeTS2022_00328 +1,FeTS2022_00291 +1,FeTS2022_01049 +1,FeTS2022_01263 +1,FeTS2022_00317 +1,FeTS2022_00305 +1,FeTS2022_01265 +1,FeTS2022_00238 +1,FeTS2022_00423 +1,FeTS2022_01127 +1,FeTS2022_01379 +1,FeTS2022_01258 +1,FeTS2022_00299 +1,FeTS2022_01334 +1,FeTS2022_00350 +1,FeTS2022_01109 +1,FeTS2022_01352 +1,FeTS2022_01055 +1,FeTS2022_00167 +1,FeTS2022_01354 +1,FeTS2022_01231 +1,FeTS2022_00185 +1,FeTS2022_00306 +1,FeTS2022_00171 +1,FeTS2022_01261 +1,FeTS2022_01345 +1,FeTS2022_01397 +1,FeTS2022_00399 +1,FeTS2022_01319 +1,FeTS2022_01250 +1,FeTS2022_01097 +1,FeTS2022_01229 +1,FeTS2022_01393 +1,FeTS2022_00430 +1,FeTS2022_01203 +1,FeTS2022_01309 +1,FeTS2022_01342 +1,FeTS2022_01223 +1,FeTS2022_00239 +1,FeTS2022_00275 +1,FeTS2022_00406 +1,FeTS2022_01116 +1,FeTS2022_01380 +1,FeTS2022_00214 +1,FeTS2022_00195 +1,FeTS2022_01314 +1,FeTS2022_01113 +1,FeTS2022_00193 +1,FeTS2022_01259 +1,FeTS2022_00386 +1,FeTS2022_00834 +1,FeTS2022_01227 +1,FeTS2022_01277 +1,FeTS2022_00283 +1,FeTS2022_01099 +1,FeTS2022_00212 +1,FeTS2022_00165 +1,FeTS2022_01332 +1,FeTS2022_00364 +1,FeTS2022_01129 +1,FeTS2022_00301 +1,FeTS2022_01402 +1,FeTS2022_00199 +1,FeTS2022_01066 +1,FeTS2022_01107 +1,FeTS2022_01337 +1,FeTS2022_00230 +1,FeTS2022_01114 +1,FeTS2022_01294 +1,FeTS2022_01370 +1,FeTS2022_01269 +1,FeTS2022_01043 +1,FeTS2022_00359 +1,FeTS2022_01004 +1,FeTS2022_00286 +1,FeTS2022_01038 +1,FeTS2022_00370 +1,FeTS2022_00184 +1,FeTS2022_00360 +1,FeTS2022_01123 +1,FeTS2022_01237 +1,FeTS2022_01086 +1,FeTS2022_00231 +1,FeTS2022_00353 +1,FeTS2022_01254 +1,FeTS2022_01373 +1,FeTS2022_01100 +1,FeTS2022_01214 +1,FeTS2022_01242 +1,FeTS2022_01115 +1,FeTS2022_01331 +1,FeTS2022_00391 +1,FeTS2022_01312 +1,FeTS2022_00324 +1,FeTS2022_01080 +1,FeTS2022_00371 +1,FeTS2022_01396 +1,FeTS2022_00339 +1,FeTS2022_00260 +1,FeTS2022_00243 +1,FeTS2022_00233 +1,FeTS2022_01323 +1,FeTS2022_01248 +1,FeTS2022_00263 +1,FeTS2022_00347 +1,FeTS2022_01233 +1,FeTS2022_00367 +1,FeTS2022_01051 +1,FeTS2022_01126 +1,FeTS2022_01267 +1,FeTS2022_00383 +1,FeTS2022_01357 +1,FeTS2022_00413 +1,FeTS2022_01287 +1,FeTS2022_00349 +1,FeTS2022_01244 +1,FeTS2022_01041 +1,FeTS2022_01236 +1,FeTS2022_01245 +1,FeTS2022_01383 +1,FeTS2022_00196 +1,FeTS2022_01387 +1,FeTS2022_00297 +1,FeTS2022_01103 +1,FeTS2022_01098 +1,FeTS2022_01410 +1,FeTS2022_00440 +1,FeTS2022_01305 +1,FeTS2022_01304 +1,FeTS2022_01074 +1,FeTS2022_01046 +1,FeTS2022_01226 +1,FeTS2022_01253 +1,FeTS2022_01040 +1,FeTS2022_00269 +1,FeTS2022_00310 +1,FeTS2022_01056 +1,FeTS2022_01311 +1,FeTS2022_01338 +1,FeTS2022_00166 +1,FeTS2022_00327 +1,FeTS2022_00254 +1,FeTS2022_01000 +1,FeTS2022_00259 +1,FeTS2022_01134 +1,FeTS2022_01104 +1,FeTS2022_01232 +1,FeTS2022_01286 +1,FeTS2022_01052 +1,FeTS2022_01217 +1,FeTS2022_01238 +1,FeTS2022_00154 +1,FeTS2022_00395 +1,FeTS2022_00267 +1,FeTS2022_00366 +1,FeTS2022_00351 +1,FeTS2022_00159 +1,FeTS2022_00131 +1,FeTS2022_01246 +1,FeTS2022_01060 +1,FeTS2022_01087 +1,FeTS2022_00250 +1,FeTS2022_00234 +1,FeTS2022_01058 +1,FeTS2022_00235 +1,FeTS2022_00203 +1,FeTS2022_00414 +1,FeTS2022_01285 +1,FeTS2022_01071 +1,FeTS2022_01111 +1,FeTS2022_01377 +1,FeTS2022_01355 +1,FeTS2022_01384 +1,FeTS2022_01120 +1,FeTS2022_01082 +1,FeTS2022_01076 +1,FeTS2022_01072 +1,FeTS2022_00303 +1,FeTS2022_00436 +1,FeTS2022_01361 +1,FeTS2022_01073 +1,FeTS2022_00338 +1,FeTS2022_01351 +1,FeTS2022_00273 +1,FeTS2022_00186 +1,FeTS2022_00290 +1,FeTS2022_01381 +1,FeTS2022_01083 +1,FeTS2022_00409 +1,FeTS2022_00281 +1,FeTS2022_00840 +1,FeTS2022_00407 +1,FeTS2022_01094 +1,FeTS2022_01328 +1,FeTS2022_01078 +1,FeTS2022_00312 +1,FeTS2022_01235 +1,FeTS2022_01288 +1,FeTS2022_01391 +1,FeTS2022_01215 +1,FeTS2022_00160 +1,FeTS2022_00421 +1,FeTS2022_01317 +1,FeTS2022_01216 +1,FeTS2022_00178 +1,FeTS2022_00838 +1,FeTS2022_01321 +1,FeTS2022_01037 +1,FeTS2022_00176 +1,FeTS2022_01293 +1,FeTS2022_01219 +1,FeTS2022_01260 +1,FeTS2022_01339 +1,FeTS2022_01325 +1,FeTS2022_00249 +1,FeTS2022_01241 +1,FeTS2022_00211 +1,FeTS2022_01105 +1,FeTS2022_01138 +1,FeTS2022_00261 +1,FeTS2022_01316 +1,FeTS2022_01315 +1,FeTS2022_01256 +1,FeTS2022_00191 +1,FeTS2022_01069 +1,FeTS2022_01062 +1,FeTS2022_01135 +1,FeTS2022_00207 +1,FeTS2022_00401 +1,FeTS2022_00172 +1,FeTS2022_01085 +1,FeTS2022_00247 +1,FeTS2022_01206 +1,FeTS2022_01356 +1,FeTS2022_00325 +1,FeTS2022_00429 +1,FeTS2022_01122 +1,FeTS2022_01374 +1,FeTS2022_00156 +1,FeTS2022_01075 +1,FeTS2022_01362 +1,FeTS2022_01251 +1,FeTS2022_00405 +1,FeTS2022_01047 +1,FeTS2022_00240 +1,FeTS2022_00336 +1,FeTS2022_01092 +1,FeTS2022_01403 +1,FeTS2022_01385 +1,FeTS2022_01096 +1,FeTS2022_00426 +1,FeTS2022_00201 +1,FeTS2022_01335 +1,FeTS2022_00404 +1,FeTS2022_00322 +1,FeTS2022_00294 +1,FeTS2022_01070 +1,FeTS2022_01225 +1,FeTS2022_01067 +1,FeTS2022_00375 +1,FeTS2022_00158 +1,FeTS2022_00177 +1,FeTS2022_00271 +1,FeTS2022_01388 +1,FeTS2022_01053 +1,FeTS2022_01042 +1,FeTS2022_01400 +1,FeTS2022_00183 +1,FeTS2022_01358 +1,FeTS2022_01266 +1,FeTS2022_01360 +1,FeTS2022_00304 +1,FeTS2022_01065 +1,FeTS2022_01093 +1,FeTS2022_00397 +1,FeTS2022_01262 +1,FeTS2022_00217 +1,FeTS2022_01401 +1,FeTS2022_01125 +1,FeTS2022_01406 +1,FeTS2022_01343 +1,FeTS2022_01346 +1,FeTS2022_01089 +1,FeTS2022_00216 +1,FeTS2022_01061 +1,FeTS2022_01299 +1,FeTS2022_00242 +1,FeTS2022_01112 +1,FeTS2022_00300 +1,FeTS2022_01280 +1,FeTS2022_00187 +1,FeTS2022_00318 +1,FeTS2022_01371 +1,FeTS2022_01378 +1,FeTS2022_00418 +1,FeTS2022_01121 +1,FeTS2022_01136 +1,FeTS2022_00266 +1,FeTS2022_01221 +1,FeTS2022_01307 +1,FeTS2022_01386 +1,FeTS2022_00432 +1,FeTS2022_01101 +1,FeTS2022_01228 +1,FeTS2022_01313 +1,FeTS2022_01209 +1,FeTS2022_00388 +1,FeTS2022_01270 +1,FeTS2022_01044 +1,FeTS2022_00417 +1,FeTS2022_01063 +1,FeTS2022_01368 +1,FeTS2022_00369 +1,FeTS2022_01095 +1,FeTS2022_00416 +1,FeTS2022_00400 +1,FeTS2022_01045 +1,FeTS2022_01202 +1,FeTS2022_01326 +1,FeTS2022_01079 +1,FeTS2022_00402 +1,FeTS2022_01320 +1,FeTS2022_01324 +2,FeTS2022_01412 +2,FeTS2022_01415 +2,FeTS2022_01411 +2,FeTS2022_01414 +2,FeTS2022_01413 +2,FeTS2022_01416 +3,FeTS2022_01439 +3,FeTS2022_01435 +3,FeTS2022_01434 +3,FeTS2022_01440 +3,FeTS2022_01431 +3,FeTS2022_01437 +3,FeTS2022_01436 +3,FeTS2022_01433 +3,FeTS2022_01438 +3,FeTS2022_01426 +3,FeTS2022_01427 +3,FeTS2022_01428 +3,FeTS2022_01429 +3,FeTS2022_01432 +3,FeTS2022_01430 +4,FeTS2022_01152 +4,FeTS2022_01178 +4,FeTS2022_01186 +4,FeTS2022_01184 +4,FeTS2022_01181 +4,FeTS2022_01187 +4,FeTS2022_01168 +4,FeTS2022_01196 +4,FeTS2022_01173 +4,FeTS2022_01176 +4,FeTS2022_01200 +4,FeTS2022_00565 +4,FeTS2022_01193 +4,FeTS2022_01174 +4,FeTS2022_01662 +4,FeTS2022_01660 +4,FeTS2022_01201 +4,FeTS2022_01167 +4,FeTS2022_01170 +4,FeTS2022_01179 +4,FeTS2022_01185 +4,FeTS2022_01197 +4,FeTS2022_01172 +4,FeTS2022_01189 +4,FeTS2022_00563 +4,FeTS2022_01180 +4,FeTS2022_01198 +4,FeTS2022_01183 +4,FeTS2022_01151 +4,FeTS2022_01195 +4,FeTS2022_01657 +4,FeTS2022_01194 +4,FeTS2022_01191 +4,FeTS2022_01169 +4,FeTS2022_01171 +4,FeTS2022_00561 +4,FeTS2022_01659 +4,FeTS2022_01661 +4,FeTS2022_01190 +4,FeTS2022_01188 +4,FeTS2022_01199 +4,FeTS2022_01658 +4,FeTS2022_01192 +4,FeTS2022_01175 +4,FeTS2022_01182 +4,FeTS2022_01537 +4,FeTS2022_01177 +5,FeTS2022_00102 +5,FeTS2022_00149 +5,FeTS2022_01290 +5,FeTS2022_00113 +5,FeTS2022_01009 +5,FeTS2022_01007 +5,FeTS2022_01002 +5,FeTS2022_00139 +5,FeTS2022_01292 +5,FeTS2022_00100 +5,FeTS2022_01289 +5,FeTS2022_01291 +5,FeTS2022_01005 +5,FeTS2022_01282 +5,FeTS2022_01003 +5,FeTS2022_00109 +5,FeTS2022_01283 +5,FeTS2022_00999 +5,FeTS2022_01281 +5,FeTS2022_01284 +5,FeTS2022_00151 +5,FeTS2022_00123 +6,FeTS2022_01451 +6,FeTS2022_01453 +6,FeTS2022_01452 +6,FeTS2022_00831 +6,FeTS2022_01448 +6,FeTS2022_01300 +6,FeTS2022_01443 +6,FeTS2022_00136 +6,FeTS2022_01454 +6,FeTS2022_00144 +6,FeTS2022_00121 +6,FeTS2022_01297 +6,FeTS2022_00133 +6,FeTS2022_01447 +6,FeTS2022_00142 +6,FeTS2022_01450 +6,FeTS2022_00120 +6,FeTS2022_01298 +6,FeTS2022_01449 +6,FeTS2022_01442 +6,FeTS2022_01446 +6,FeTS2022_01303 +6,FeTS2022_01296 +6,FeTS2022_00132 +6,FeTS2022_01441 +6,FeTS2022_01445 +6,FeTS2022_01302 +6,FeTS2022_00143 +6,FeTS2022_00105 +6,FeTS2022_01444 +6,FeTS2022_00147 +6,FeTS2022_01455 +6,FeTS2022_00146 +6,FeTS2022_00137 +7,FeTS2022_01459 +7,FeTS2022_01464 +7,FeTS2022_01458 +7,FeTS2022_01457 +7,FeTS2022_01461 +7,FeTS2022_01456 +7,FeTS2022_01460 +7,FeTS2022_01462 +7,FeTS2022_01466 +7,FeTS2022_01465 +7,FeTS2022_01463 +7,FeTS2022_01467 +8,FeTS2022_00140 +8,FeTS2022_01469 +8,FeTS2022_01468 +8,FeTS2022_01470 +8,FeTS2022_00104 +8,FeTS2022_00110 +8,FeTS2022_00112 +8,FeTS2022_00128 +9,FeTS2022_00134 +9,FeTS2022_00150 +9,FeTS2022_00116 +9,FeTS2022_01471 +10,FeTS2022_01472 +10,FeTS2022_00117 +10,FeTS2022_00130 +10,FeTS2022_00138 +10,FeTS2022_01473 +10,FeTS2022_00111 +10,FeTS2022_00124 +10,FeTS2022_00106 +11,FeTS2022_00122 +11,FeTS2022_00148 +11,FeTS2022_01474 +11,FeTS2022_00108 +11,FeTS2022_01144 +11,FeTS2022_00107 +11,FeTS2022_01140 +11,FeTS2022_01146 +11,FeTS2022_01145 +11,FeTS2022_01139 +11,FeTS2022_01141 +11,FeTS2022_01142 +11,FeTS2022_01143 +11,FeTS2022_01475 +12,FeTS2022_01482 +12,FeTS2022_01480 +12,FeTS2022_01485 +12,FeTS2022_01476 +12,FeTS2022_01481 +12,FeTS2022_01483 +12,FeTS2022_01486 +12,FeTS2022_01484 +12,FeTS2022_01479 +12,FeTS2022_01477 +12,FeTS2022_01478 +13,FeTS2022_01491 +13,FeTS2022_01500 +13,FeTS2022_01519 +13,FeTS2022_01516 +13,FeTS2022_01509 +13,FeTS2022_01520 +13,FeTS2022_01508 +13,FeTS2022_01503 +13,FeTS2022_01488 +13,FeTS2022_01492 +13,FeTS2022_01502 +13,FeTS2022_01493 +13,FeTS2022_01497 +13,FeTS2022_01499 +13,FeTS2022_01487 +13,FeTS2022_01505 +13,FeTS2022_01504 +13,FeTS2022_01490 +13,FeTS2022_01507 +13,FeTS2022_01510 +13,FeTS2022_01512 +13,FeTS2022_01514 +13,FeTS2022_01517 +13,FeTS2022_01501 +13,FeTS2022_01518 +13,FeTS2022_01506 +13,FeTS2022_01515 +13,FeTS2022_01511 +13,FeTS2022_01494 +13,FeTS2022_01489 +13,FeTS2022_01513 +13,FeTS2022_01496 +13,FeTS2022_01495 +13,FeTS2022_01521 +13,FeTS2022_01498 +14,FeTS2022_01522 +14,FeTS2022_01525 +14,FeTS2022_01526 +14,FeTS2022_01527 +14,FeTS2022_01524 +14,FeTS2022_01523 +15,FeTS2022_01530 +15,FeTS2022_01536 +15,FeTS2022_01535 +15,FeTS2022_01663 +15,FeTS2022_01534 +15,FeTS2022_01529 +15,FeTS2022_01531 +15,FeTS2022_01666 +15,FeTS2022_01665 +15,FeTS2022_01532 +15,FeTS2022_01664 +15,FeTS2022_01528 +15,FeTS2022_01533 +16,FeTS2022_00584 +16,FeTS2022_00567 +16,FeTS2022_00571 +16,FeTS2022_00582 +16,FeTS2022_00570 +16,FeTS2022_00594 +16,FeTS2022_00597 +16,FeTS2022_00596 +16,FeTS2022_00576 +16,FeTS2022_00572 +16,FeTS2022_00115 +16,FeTS2022_00593 +16,FeTS2022_00588 +16,FeTS2022_00598 +16,FeTS2022_00589 +16,FeTS2022_00574 +16,FeTS2022_00586 +16,FeTS2022_00579 +16,FeTS2022_00590 +16,FeTS2022_00599 +16,FeTS2022_00577 +16,FeTS2022_00575 +16,FeTS2022_00581 +16,FeTS2022_00591 +16,FeTS2022_00569 +16,FeTS2022_00587 +16,FeTS2022_00580 +16,FeTS2022_00583 +16,FeTS2022_00578 +16,FeTS2022_00568 +17,FeTS2022_01423 +17,FeTS2022_01420 +17,FeTS2022_01422 +17,FeTS2022_01417 +17,FeTS2022_01421 +17,FeTS2022_01424 +17,FeTS2022_01418 +17,FeTS2022_01425 +17,FeTS2022_01419 +18,FeTS2022_01628 +18,FeTS2022_01615 +18,FeTS2022_01035 +18,FeTS2022_00732 +18,FeTS2022_00753 +18,FeTS2022_01620 +18,FeTS2022_01637 +18,FeTS2022_01594 +18,FeTS2022_00530 +18,FeTS2022_00772 +18,FeTS2022_01580 +18,FeTS2022_00731 +18,FeTS2022_00540 +18,FeTS2022_00464 +18,FeTS2022_01622 +18,FeTS2022_01154 +18,FeTS2022_01559 +18,FeTS2022_00729 +18,FeTS2022_00708 +18,FeTS2022_00044 +18,FeTS2022_00705 +18,FeTS2022_00645 +18,FeTS2022_01640 +18,FeTS2022_00008 +18,FeTS2022_00746 +18,FeTS2022_01551 +18,FeTS2022_01610 +18,FeTS2022_00061 +18,FeTS2022_00642 +18,FeTS2022_00675 +18,FeTS2022_01651 +18,FeTS2022_00651 +18,FeTS2022_00626 +18,FeTS2022_00028 +18,FeTS2022_01557 +18,FeTS2022_01616 +18,FeTS2022_00684 +18,FeTS2022_01538 +18,FeTS2022_01647 +18,FeTS2022_00688 +18,FeTS2022_00737 +18,FeTS2022_00063 +18,FeTS2022_00758 +18,FeTS2022_01159 +18,FeTS2022_00615 +18,FeTS2022_00621 +18,FeTS2022_01543 +18,FeTS2022_01560 +18,FeTS2022_00058 +18,FeTS2022_00009 +18,FeTS2022_00544 +18,FeTS2022_01611 +18,FeTS2022_00485 +18,FeTS2022_00735 +18,FeTS2022_00659 +18,FeTS2022_00025 +18,FeTS2022_00550 +18,FeTS2022_01599 +18,FeTS2022_00636 +18,FeTS2022_01644 +18,FeTS2022_00716 +18,FeTS2022_00641 +18,FeTS2022_01624 +18,FeTS2022_00547 +18,FeTS2022_00046 +18,FeTS2022_00728 +18,FeTS2022_00045 +18,FeTS2022_00493 +18,FeTS2022_00089 +18,FeTS2022_00622 +18,FeTS2022_01643 +18,FeTS2022_00602 +18,FeTS2022_00035 +18,FeTS2022_01545 +18,FeTS2022_00014 +18,FeTS2022_01566 +18,FeTS2022_00066 +18,FeTS2022_01614 +18,FeTS2022_01591 +18,FeTS2022_00514 +18,FeTS2022_01588 +18,FeTS2022_00520 +18,FeTS2022_01556 +18,FeTS2022_00097 +18,FeTS2022_00555 +18,FeTS2022_00736 +18,FeTS2022_00639 +18,FeTS2022_00479 +18,FeTS2022_01550 +18,FeTS2022_01592 +18,FeTS2022_01626 +18,FeTS2022_00557 +18,FeTS2022_00496 +18,FeTS2022_00778 +18,FeTS2022_01561 +18,FeTS2022_00690 +18,FeTS2022_00750 +18,FeTS2022_01586 +18,FeTS2022_01549 +18,FeTS2022_01555 +18,FeTS2022_01612 +18,FeTS2022_01600 +18,FeTS2022_01629 +18,FeTS2022_01656 +18,FeTS2022_00500 +18,FeTS2022_00529 +18,FeTS2022_00628 +18,FeTS2022_00775 +18,FeTS2022_00523 +18,FeTS2022_00488 +18,FeTS2022_00518 +18,FeTS2022_00000 +18,FeTS2022_00020 +18,FeTS2022_01646 +18,FeTS2022_01638 +18,FeTS2022_00630 +18,FeTS2022_01590 +18,FeTS2022_01613 +18,FeTS2022_01571 +18,FeTS2022_00519 +18,FeTS2022_01617 +18,FeTS2022_01623 +18,FeTS2022_00691 +18,FeTS2022_01027 +18,FeTS2022_00704 +18,FeTS2022_00098 +18,FeTS2022_01558 +18,FeTS2022_00715 +18,FeTS2022_00757 +18,FeTS2022_00084 +18,FeTS2022_00692 +18,FeTS2022_00078 +18,FeTS2022_00747 +18,FeTS2022_01607 +18,FeTS2022_00751 +18,FeTS2022_00011 +18,FeTS2022_00610 +18,FeTS2022_00694 +18,FeTS2022_00026 +18,FeTS2022_00658 +18,FeTS2022_01544 +18,FeTS2022_01583 +18,FeTS2022_00680 +18,FeTS2022_01028 +18,FeTS2022_01636 +18,FeTS2022_00545 +18,FeTS2022_00072 +18,FeTS2022_00016 +18,FeTS2022_01548 +18,FeTS2022_00624 +18,FeTS2022_00676 +18,FeTS2022_00533 +18,FeTS2022_01574 +18,FeTS2022_01582 +18,FeTS2022_00085 +18,FeTS2022_00613 +18,FeTS2022_01593 +18,FeTS2022_00730 +18,FeTS2022_01585 +18,FeTS2022_00524 +18,FeTS2022_00081 +18,FeTS2022_00472 +18,FeTS2022_00478 +18,FeTS2022_00469 +18,FeTS2022_00682 +18,FeTS2022_00733 +18,FeTS2022_00723 +18,FeTS2022_00099 +18,FeTS2022_00744 +18,FeTS2022_00048 +18,FeTS2022_00480 +18,FeTS2022_00650 +18,FeTS2022_00601 +18,FeTS2022_00542 +18,FeTS2022_00667 +18,FeTS2022_00505 +18,FeTS2022_01539 +18,FeTS2022_00764 +18,FeTS2022_00506 +18,FeTS2022_01649 +18,FeTS2022_00032 +18,FeTS2022_00021 +18,FeTS2022_00685 +18,FeTS2022_00611 +18,FeTS2022_00511 +18,FeTS2022_01584 +18,FeTS2022_01635 +18,FeTS2022_00607 +18,FeTS2022_00071 +18,FeTS2022_00687 +18,FeTS2022_00767 +18,FeTS2022_00537 +18,FeTS2022_01630 +18,FeTS2022_00740 +18,FeTS2022_00525 +18,FeTS2022_00725 +18,FeTS2022_00502 +18,FeTS2022_01562 +18,FeTS2022_01577 +18,FeTS2022_01576 +18,FeTS2022_01595 +18,FeTS2022_00654 +18,FeTS2022_00090 +18,FeTS2022_01645 +18,FeTS2022_01564 +18,FeTS2022_01567 +18,FeTS2022_00703 +18,FeTS2022_00043 +18,FeTS2022_00003 +18,FeTS2022_00495 +18,FeTS2022_00017 +18,FeTS2022_00491 +18,FeTS2022_00054 +18,FeTS2022_00510 +18,FeTS2022_00618 +18,FeTS2022_00064 +18,FeTS2022_00024 +18,FeTS2022_00709 +18,FeTS2022_01653 +18,FeTS2022_01579 +18,FeTS2022_01572 +18,FeTS2022_01156 +18,FeTS2022_00707 +18,FeTS2022_01540 +18,FeTS2022_00056 +18,FeTS2022_00620 +18,FeTS2022_00470 +18,FeTS2022_00499 +18,FeTS2022_00640 +18,FeTS2022_00549 +18,FeTS2022_01601 +18,FeTS2022_00608 +18,FeTS2022_00727 +18,FeTS2022_00773 +18,FeTS2022_00504 +18,FeTS2022_01604 +18,FeTS2022_01158 +18,FeTS2022_00051 +18,FeTS2022_00768 +18,FeTS2022_01161 +18,FeTS2022_00765 +18,FeTS2022_00068 +18,FeTS2022_00551 +18,FeTS2022_01605 +18,FeTS2022_00674 +18,FeTS2022_01157 +18,FeTS2022_01631 +18,FeTS2022_00022 +18,FeTS2022_00777 +18,FeTS2022_01609 +18,FeTS2022_01633 +18,FeTS2022_01652 +18,FeTS2022_00759 +18,FeTS2022_01655 +18,FeTS2022_01639 +18,FeTS2022_01563 +18,FeTS2022_00661 +18,FeTS2022_00087 +18,FeTS2022_00030 +18,FeTS2022_00556 +18,FeTS2022_01597 +18,FeTS2022_00724 +18,FeTS2022_00096 +18,FeTS2022_00049 +18,FeTS2022_00683 +18,FeTS2022_00059 +18,FeTS2022_01596 +18,FeTS2022_00498 +18,FeTS2022_00543 +18,FeTS2022_01641 +18,FeTS2022_01542 +18,FeTS2022_00062 +18,FeTS2022_00005 +18,FeTS2022_00646 +18,FeTS2022_00088 +18,FeTS2022_00656 +18,FeTS2022_01589 +18,FeTS2022_01160 +18,FeTS2022_01547 +18,FeTS2022_01606 +18,FeTS2022_00631 +18,FeTS2022_00756 +18,FeTS2022_00619 +18,FeTS2022_00698 +18,FeTS2022_01541 +18,FeTS2022_00539 +18,FeTS2022_00053 +18,FeTS2022_01618 +18,FeTS2022_00693 +18,FeTS2022_00616 +18,FeTS2022_01642 +18,FeTS2022_01632 +18,FeTS2022_00718 +18,FeTS2022_00006 +18,FeTS2022_00466 +18,FeTS2022_01565 +18,FeTS2022_01621 +18,FeTS2022_00697 +18,FeTS2022_00689 +18,FeTS2022_00554 +18,FeTS2022_00638 +18,FeTS2022_00517 +18,FeTS2022_00019 +18,FeTS2022_01650 +18,FeTS2022_01602 +18,FeTS2022_01570 +18,FeTS2022_00655 +18,FeTS2022_00552 +18,FeTS2022_00706 +18,FeTS2022_01654 +18,FeTS2022_00481 +18,FeTS2022_00604 +18,FeTS2022_00612 +18,FeTS2022_00774 +18,FeTS2022_00625 +18,FeTS2022_00070 +18,FeTS2022_00649 +18,FeTS2022_00036 +18,FeTS2022_01546 +18,FeTS2022_00559 +18,FeTS2022_00018 +18,FeTS2022_00507 +18,FeTS2022_00760 +18,FeTS2022_01568 +18,FeTS2022_00094 +18,FeTS2022_00526 +18,FeTS2022_01575 +18,FeTS2022_00512 +18,FeTS2022_00033 +18,FeTS2022_01648 +18,FeTS2022_00052 +18,FeTS2022_01625 +18,FeTS2022_01573 +18,FeTS2022_00623 +18,FeTS2022_01153 +18,FeTS2022_00532 +18,FeTS2022_00516 +18,FeTS2022_00679 +18,FeTS2022_00468 +18,FeTS2022_00494 +18,FeTS2022_00483 +18,FeTS2022_01552 +18,FeTS2022_00606 +18,FeTS2022_00742 +18,FeTS2022_00677 +18,FeTS2022_00652 +18,FeTS2022_00074 +18,FeTS2022_00513 +18,FeTS2022_01581 +18,FeTS2022_00663 +18,FeTS2022_00734 +18,FeTS2022_01619 +18,FeTS2022_00668 +18,FeTS2022_00558 +18,FeTS2022_00002 +18,FeTS2022_01598 +18,FeTS2022_00477 +18,FeTS2022_01634 +18,FeTS2022_00501 +18,FeTS2022_01155 +18,FeTS2022_00077 +18,FeTS2022_01578 +18,FeTS2022_01569 +18,FeTS2022_01603 +18,FeTS2022_00538 +18,FeTS2022_00714 +18,FeTS2022_00031 +18,FeTS2022_01627 +18,FeTS2022_01553 +18,FeTS2022_00548 +18,FeTS2022_00739 +18,FeTS2022_00103 +18,FeTS2022_00528 +18,FeTS2022_01608 +18,FeTS2022_00095 +18,FeTS2022_00060 +18,FeTS2022_01554 +18,FeTS2022_00657 +18,FeTS2022_01587 +18,FeTS2022_00605 +18,FeTS2022_00686 +18,FeTS2022_00012 +19,FeTS2022_01166 +19,FeTS2022_01163 +19,FeTS2022_01165 +19,FeTS2022_01164 +20,FeTS2022_00444 +20,FeTS2022_01014 +20,FeTS2022_00442 +20,FeTS2022_01025 +20,FeTS2022_01024 +20,FeTS2022_00101 +20,FeTS2022_00453 +20,FeTS2022_01013 +20,FeTS2022_01011 +20,FeTS2022_00459 +20,FeTS2022_00457 +20,FeTS2022_01016 +20,FeTS2022_00448 +20,FeTS2022_01023 +20,FeTS2022_01017 +20,FeTS2022_00443 +20,FeTS2022_00455 +20,FeTS2022_00127 +20,FeTS2022_01012 +20,FeTS2022_01018 +20,FeTS2022_01022 +20,FeTS2022_00451 +20,FeTS2022_00445 +20,FeTS2022_00452 +20,FeTS2022_00454 +20,FeTS2022_01019 +20,FeTS2022_01021 +20,FeTS2022_01020 +20,FeTS2022_01026 +20,FeTS2022_00456 +20,FeTS2022_00446 +20,FeTS2022_00449 +20,FeTS2022_01015 +21,FeTS2022_00802 +21,FeTS2022_00788 +21,FeTS2022_00795 +21,FeTS2022_00820 +21,FeTS2022_00782 +21,FeTS2022_00800 +21,FeTS2022_00830 +21,FeTS2022_00824 +21,FeTS2022_00805 +21,FeTS2022_00796 +21,FeTS2022_00823 +21,FeTS2022_00828 +21,FeTS2022_00811 +21,FeTS2022_00789 +21,FeTS2022_00801 +21,FeTS2022_00780 +21,FeTS2022_00781 +21,FeTS2022_00814 +21,FeTS2022_00806 +21,FeTS2022_00810 +21,FeTS2022_00807 +21,FeTS2022_00818 +21,FeTS2022_00791 +21,FeTS2022_00787 +21,FeTS2022_00808 +21,FeTS2022_00809 +21,FeTS2022_00803 +21,FeTS2022_00816 +21,FeTS2022_00819 +21,FeTS2022_00793 +21,FeTS2022_00799 +21,FeTS2022_00797 +21,FeTS2022_00784 +21,FeTS2022_00804 +21,FeTS2022_00792 +22,FeTS2022_01031 +22,FeTS2022_01033 +22,FeTS2022_01030 +22,FeTS2022_00118 +22,FeTS2022_01029 +22,FeTS2022_00126 +22,FeTS2022_01032 +23,FeTS2022_01147 +23,FeTS2022_01149 +23,FeTS2022_01150 +23,FeTS2022_01148 +23,FeTS2022_01162 diff --git a/Task_1/partitioning_data/partitioning_2.csv b/Task_1/partitioning_data/partitioning_2.csv new file mode 100644 index 0000000..798f651 --- /dev/null +++ b/Task_1/partitioning_data/partitioning_2.csv @@ -0,0 +1,1252 @@ +Partition_ID,Subject_ID +1,FeTS2022_01341 +1,FeTS2022_01333 +1,FeTS2022_01077 +1,FeTS2022_01054 +1,FeTS2022_00285 +1,FeTS2022_01308 +1,FeTS2022_01363 +1,FeTS2022_01091 +1,FeTS2022_01273 +1,FeTS2022_01108 +1,FeTS2022_01255 +1,FeTS2022_01301 +1,FeTS2022_00219 +1,FeTS2022_00380 +1,FeTS2022_01349 +1,FeTS2022_00251 +1,FeTS2022_01276 +1,FeTS2022_01407 +1,FeTS2022_01344 +1,FeTS2022_01405 +1,FeTS2022_00218 +1,FeTS2022_01327 +1,FeTS2022_01252 +1,FeTS2022_01132 +1,FeTS2022_01036 +1,FeTS2022_01039 +1,FeTS2022_01366 +1,FeTS2022_00262 +1,FeTS2022_01279 +1,FeTS2022_00839 +1,FeTS2022_01322 +1,FeTS2022_00389 +1,FeTS2022_00390 +1,FeTS2022_00431 +1,FeTS2022_00222 +1,FeTS2022_00373 +1,FeTS2022_00288 +1,FeTS2022_00284 +1,FeTS2022_01088 +1,FeTS2022_00311 +1,FeTS2022_00387 +1,FeTS2022_00258 +1,FeTS2022_01389 +1,FeTS2022_00321 +1,FeTS2022_01249 +1,FeTS2022_01230 +1,FeTS2022_00836 +1,FeTS2022_00348 +1,FeTS2022_01205 +1,FeTS2022_00246 +1,FeTS2022_00314 +1,FeTS2022_01404 +1,FeTS2022_01102 +1,FeTS2022_00379 +1,FeTS2022_01395 +1,FeTS2022_00155 +1,FeTS2022_00170 +1,FeTS2022_01264 +1,FeTS2022_00837 +1,FeTS2022_01372 +1,FeTS2022_00341 +1,FeTS2022_01257 +1,FeTS2022_00329 +1,FeTS2022_00425 +1,FeTS2022_01350 +1,FeTS2022_01247 +1,FeTS2022_01234 +1,FeTS2022_00331 +1,FeTS2022_01128 +1,FeTS2022_01365 +1,FeTS2022_00221 +1,FeTS2022_00298 +1,FeTS2022_00227 +1,FeTS2022_01204 +1,FeTS2022_00204 +1,FeTS2022_01399 +1,FeTS2022_00377 +1,FeTS2022_00343 +1,FeTS2022_00280 +1,FeTS2022_01347 +1,FeTS2022_00210 +1,FeTS2022_01117 +1,FeTS2022_01275 +1,FeTS2022_01034 +1,FeTS2022_00162 +1,FeTS2022_01340 +1,FeTS2022_01212 +1,FeTS2022_01220 +1,FeTS2022_00419 +1,FeTS2022_00340 +1,FeTS2022_00296 +1,FeTS2022_01208 +1,FeTS2022_01064 +1,FeTS2022_00433 +1,FeTS2022_01050 +1,FeTS2022_01278 +1,FeTS2022_00293 +1,FeTS2022_00206 +1,FeTS2022_00356 +1,FeTS2022_00376 +1,FeTS2022_00316 +1,FeTS2022_00403 +1,FeTS2022_01348 +1,FeTS2022_00192 +1,FeTS2022_00313 +1,FeTS2022_01240 +1,FeTS2022_01222 +1,FeTS2022_00344 +1,FeTS2022_00332 +1,FeTS2022_00292 +1,FeTS2022_01392 +1,FeTS2022_00220 +1,FeTS2022_00378 +1,FeTS2022_01130 +1,FeTS2022_01106 +1,FeTS2022_01295 +1,FeTS2022_01409 +1,FeTS2022_01057 +1,FeTS2022_01068 +1,FeTS2022_00320 +1,FeTS2022_00346 +1,FeTS2022_01001 +1,FeTS2022_01207 +1,FeTS2022_01137 +1,FeTS2022_01318 +1,FeTS2022_00289 +1,FeTS2022_00157 +1,FeTS2022_01224 +1,FeTS2022_01367 +1,FeTS2022_01382 +1,FeTS2022_00309 +1,FeTS2022_01008 +1,FeTS2022_01059 +1,FeTS2022_01271 +1,FeTS2022_01110 +1,FeTS2022_01398 +1,FeTS2022_01119 +1,FeTS2022_00209 +1,FeTS2022_01274 +1,FeTS2022_01124 +1,FeTS2022_00241 +1,FeTS2022_00152 +1,FeTS2022_01131 +1,FeTS2022_01353 +1,FeTS2022_00352 +1,FeTS2022_01133 +1,FeTS2022_01084 +1,FeTS2022_00441 +1,FeTS2022_01268 +1,FeTS2022_01210 +1,FeTS2022_01375 +1,FeTS2022_00236 +1,FeTS2022_01218 +1,FeTS2022_01408 +1,FeTS2022_00274 +1,FeTS2022_01118 +1,FeTS2022_01213 +1,FeTS2022_01310 +1,FeTS2022_00194 +1,FeTS2022_00392 +1,FeTS2022_00334 +1,FeTS2022_00270 +1,FeTS2022_01359 +1,FeTS2022_01364 +1,FeTS2022_01336 +1,FeTS2022_01272 +1,FeTS2022_01090 +1,FeTS2022_00412 +1,FeTS2022_00228 +1,FeTS2022_00410 +2,FeTS2022_01239 +2,FeTS2022_01010 +2,FeTS2022_01394 +2,FeTS2022_00282 +2,FeTS2022_00237 +2,FeTS2022_01390 +2,FeTS2022_00382 +2,FeTS2022_00188 +2,FeTS2022_01211 +2,FeTS2022_01376 +2,FeTS2022_01243 +2,FeTS2022_01330 +2,FeTS2022_00253 +2,FeTS2022_01329 +2,FeTS2022_01306 +2,FeTS2022_01081 +2,FeTS2022_01369 +2,FeTS2022_01048 +2,FeTS2022_00328 +2,FeTS2022_00291 +2,FeTS2022_01049 +2,FeTS2022_01263 +2,FeTS2022_00317 +2,FeTS2022_00305 +2,FeTS2022_01265 +2,FeTS2022_00238 +2,FeTS2022_00423 +2,FeTS2022_01127 +2,FeTS2022_01379 +2,FeTS2022_01258 +2,FeTS2022_00299 +2,FeTS2022_01334 +2,FeTS2022_00350 +2,FeTS2022_01109 +2,FeTS2022_01352 +2,FeTS2022_01055 +2,FeTS2022_00167 +2,FeTS2022_01354 +2,FeTS2022_01231 +2,FeTS2022_00185 +2,FeTS2022_00306 +2,FeTS2022_00171 +2,FeTS2022_01261 +2,FeTS2022_01345 +2,FeTS2022_01397 +2,FeTS2022_00399 +2,FeTS2022_01319 +2,FeTS2022_01250 +2,FeTS2022_01097 +2,FeTS2022_01229 +2,FeTS2022_01393 +2,FeTS2022_00430 +2,FeTS2022_01203 +2,FeTS2022_01309 +2,FeTS2022_01342 +2,FeTS2022_01223 +2,FeTS2022_00239 +2,FeTS2022_00275 +2,FeTS2022_00406 +2,FeTS2022_01116 +2,FeTS2022_01380 +2,FeTS2022_00214 +2,FeTS2022_00195 +2,FeTS2022_01314 +2,FeTS2022_01113 +2,FeTS2022_00193 +2,FeTS2022_01259 +2,FeTS2022_00386 +2,FeTS2022_00834 +2,FeTS2022_01227 +2,FeTS2022_01277 +2,FeTS2022_00283 +2,FeTS2022_01099 +2,FeTS2022_00212 +2,FeTS2022_00165 +2,FeTS2022_01332 +2,FeTS2022_00364 +2,FeTS2022_01129 +2,FeTS2022_00301 +2,FeTS2022_01402 +2,FeTS2022_00199 +2,FeTS2022_01066 +2,FeTS2022_01107 +2,FeTS2022_01337 +2,FeTS2022_00230 +2,FeTS2022_01114 +2,FeTS2022_01294 +2,FeTS2022_01370 +2,FeTS2022_01269 +2,FeTS2022_01043 +2,FeTS2022_00359 +2,FeTS2022_01004 +2,FeTS2022_00286 +2,FeTS2022_01038 +2,FeTS2022_00370 +2,FeTS2022_00184 +2,FeTS2022_00360 +2,FeTS2022_01123 +2,FeTS2022_01237 +2,FeTS2022_01086 +2,FeTS2022_00231 +2,FeTS2022_00353 +2,FeTS2022_01254 +2,FeTS2022_01373 +2,FeTS2022_01100 +2,FeTS2022_01214 +2,FeTS2022_01242 +2,FeTS2022_01115 +2,FeTS2022_01331 +2,FeTS2022_00391 +2,FeTS2022_01312 +2,FeTS2022_00324 +2,FeTS2022_01080 +2,FeTS2022_00371 +2,FeTS2022_01396 +2,FeTS2022_00339 +2,FeTS2022_00260 +2,FeTS2022_00243 +2,FeTS2022_00233 +2,FeTS2022_01323 +2,FeTS2022_01248 +2,FeTS2022_00263 +2,FeTS2022_00347 +2,FeTS2022_01233 +2,FeTS2022_00367 +2,FeTS2022_01051 +2,FeTS2022_01126 +2,FeTS2022_01267 +2,FeTS2022_00383 +2,FeTS2022_01357 +2,FeTS2022_00413 +2,FeTS2022_01287 +2,FeTS2022_00349 +2,FeTS2022_01244 +2,FeTS2022_01041 +2,FeTS2022_01236 +2,FeTS2022_01245 +2,FeTS2022_01383 +2,FeTS2022_00196 +2,FeTS2022_01387 +2,FeTS2022_00297 +2,FeTS2022_01103 +2,FeTS2022_01098 +2,FeTS2022_01410 +2,FeTS2022_00440 +2,FeTS2022_01305 +2,FeTS2022_01304 +2,FeTS2022_01074 +2,FeTS2022_01046 +2,FeTS2022_01226 +2,FeTS2022_01253 +2,FeTS2022_01040 +2,FeTS2022_00269 +2,FeTS2022_00310 +2,FeTS2022_01056 +2,FeTS2022_01311 +2,FeTS2022_01338 +2,FeTS2022_00166 +2,FeTS2022_00327 +2,FeTS2022_00254 +2,FeTS2022_01000 +2,FeTS2022_00259 +2,FeTS2022_01134 +2,FeTS2022_01104 +2,FeTS2022_01232 +2,FeTS2022_01286 +2,FeTS2022_01052 +2,FeTS2022_01217 +2,FeTS2022_01238 +2,FeTS2022_00154 +3,FeTS2022_00395 +3,FeTS2022_00267 +3,FeTS2022_00366 +3,FeTS2022_00351 +3,FeTS2022_00159 +3,FeTS2022_00131 +3,FeTS2022_01246 +3,FeTS2022_01060 +3,FeTS2022_01087 +3,FeTS2022_00250 +3,FeTS2022_00234 +3,FeTS2022_01058 +3,FeTS2022_00235 +3,FeTS2022_00203 +3,FeTS2022_00414 +3,FeTS2022_01285 +3,FeTS2022_01071 +3,FeTS2022_01111 +3,FeTS2022_01377 +3,FeTS2022_01355 +3,FeTS2022_01384 +3,FeTS2022_01120 +3,FeTS2022_01082 +3,FeTS2022_01076 +3,FeTS2022_01072 +3,FeTS2022_00303 +3,FeTS2022_00436 +3,FeTS2022_01361 +3,FeTS2022_01073 +3,FeTS2022_00338 +3,FeTS2022_01351 +3,FeTS2022_00273 +3,FeTS2022_00186 +3,FeTS2022_00290 +3,FeTS2022_01381 +3,FeTS2022_01083 +3,FeTS2022_00409 +3,FeTS2022_00281 +3,FeTS2022_00840 +3,FeTS2022_00407 +3,FeTS2022_01094 +3,FeTS2022_01328 +3,FeTS2022_01078 +3,FeTS2022_00312 +3,FeTS2022_01235 +3,FeTS2022_01288 +3,FeTS2022_01391 +3,FeTS2022_01215 +3,FeTS2022_00160 +3,FeTS2022_00421 +3,FeTS2022_01317 +3,FeTS2022_01216 +3,FeTS2022_00178 +3,FeTS2022_00838 +3,FeTS2022_01321 +3,FeTS2022_01037 +3,FeTS2022_00176 +3,FeTS2022_01293 +3,FeTS2022_01219 +3,FeTS2022_01260 +3,FeTS2022_01339 +3,FeTS2022_01325 +3,FeTS2022_00249 +3,FeTS2022_01241 +3,FeTS2022_00211 +3,FeTS2022_01105 +3,FeTS2022_01138 +3,FeTS2022_00261 +3,FeTS2022_01316 +3,FeTS2022_01315 +3,FeTS2022_01256 +3,FeTS2022_00191 +3,FeTS2022_01069 +3,FeTS2022_01062 +3,FeTS2022_01135 +3,FeTS2022_00207 +3,FeTS2022_00401 +3,FeTS2022_00172 +3,FeTS2022_01085 +3,FeTS2022_00247 +3,FeTS2022_01206 +3,FeTS2022_01356 +3,FeTS2022_00325 +3,FeTS2022_00429 +3,FeTS2022_01122 +3,FeTS2022_01374 +3,FeTS2022_00156 +3,FeTS2022_01075 +3,FeTS2022_01362 +3,FeTS2022_01251 +3,FeTS2022_00405 +3,FeTS2022_01047 +3,FeTS2022_00240 +3,FeTS2022_00336 +3,FeTS2022_01092 +3,FeTS2022_01403 +3,FeTS2022_01385 +3,FeTS2022_01096 +3,FeTS2022_00426 +3,FeTS2022_00201 +3,FeTS2022_01335 +3,FeTS2022_00404 +3,FeTS2022_00322 +3,FeTS2022_00294 +3,FeTS2022_01070 +3,FeTS2022_01225 +3,FeTS2022_01067 +3,FeTS2022_00375 +3,FeTS2022_00158 +3,FeTS2022_00177 +3,FeTS2022_00271 +3,FeTS2022_01388 +3,FeTS2022_01053 +3,FeTS2022_01042 +3,FeTS2022_01400 +3,FeTS2022_00183 +3,FeTS2022_01358 +3,FeTS2022_01266 +3,FeTS2022_01360 +3,FeTS2022_00304 +3,FeTS2022_01065 +3,FeTS2022_01093 +3,FeTS2022_00397 +3,FeTS2022_01262 +3,FeTS2022_00217 +3,FeTS2022_01401 +3,FeTS2022_01125 +3,FeTS2022_01406 +3,FeTS2022_01343 +3,FeTS2022_01346 +3,FeTS2022_01089 +3,FeTS2022_00216 +3,FeTS2022_01061 +3,FeTS2022_01299 +3,FeTS2022_00242 +3,FeTS2022_01112 +3,FeTS2022_00300 +3,FeTS2022_01280 +3,FeTS2022_00187 +3,FeTS2022_00318 +3,FeTS2022_01371 +3,FeTS2022_01378 +3,FeTS2022_00418 +3,FeTS2022_01121 +3,FeTS2022_01136 +3,FeTS2022_00266 +3,FeTS2022_01221 +3,FeTS2022_01307 +3,FeTS2022_01386 +3,FeTS2022_00432 +3,FeTS2022_01101 +3,FeTS2022_01228 +3,FeTS2022_01313 +3,FeTS2022_01209 +3,FeTS2022_00388 +3,FeTS2022_01270 +3,FeTS2022_01044 +3,FeTS2022_00417 +3,FeTS2022_01063 +3,FeTS2022_01368 +3,FeTS2022_00369 +3,FeTS2022_01095 +3,FeTS2022_00416 +3,FeTS2022_00400 +3,FeTS2022_01045 +3,FeTS2022_01202 +3,FeTS2022_01326 +3,FeTS2022_01079 +3,FeTS2022_00402 +3,FeTS2022_01320 +3,FeTS2022_01324 +4,FeTS2022_01412 +4,FeTS2022_01415 +4,FeTS2022_01411 +4,FeTS2022_01414 +4,FeTS2022_01413 +4,FeTS2022_01416 +5,FeTS2022_01439 +5,FeTS2022_01435 +5,FeTS2022_01434 +5,FeTS2022_01440 +5,FeTS2022_01431 +5,FeTS2022_01437 +5,FeTS2022_01436 +5,FeTS2022_01433 +5,FeTS2022_01438 +5,FeTS2022_01426 +5,FeTS2022_01427 +5,FeTS2022_01428 +5,FeTS2022_01429 +5,FeTS2022_01432 +5,FeTS2022_01430 +6,FeTS2022_01152 +6,FeTS2022_01178 +6,FeTS2022_01186 +6,FeTS2022_01184 +6,FeTS2022_01181 +6,FeTS2022_01187 +6,FeTS2022_01168 +6,FeTS2022_01196 +6,FeTS2022_01173 +6,FeTS2022_01176 +6,FeTS2022_01200 +6,FeTS2022_00565 +6,FeTS2022_01193 +6,FeTS2022_01174 +6,FeTS2022_01662 +6,FeTS2022_01660 +7,FeTS2022_01201 +7,FeTS2022_01167 +7,FeTS2022_01170 +7,FeTS2022_01179 +7,FeTS2022_01185 +7,FeTS2022_01197 +7,FeTS2022_01172 +7,FeTS2022_01189 +7,FeTS2022_00563 +7,FeTS2022_01180 +7,FeTS2022_01198 +7,FeTS2022_01183 +7,FeTS2022_01151 +7,FeTS2022_01195 +7,FeTS2022_01657 +8,FeTS2022_01194 +8,FeTS2022_01191 +8,FeTS2022_01169 +8,FeTS2022_01171 +8,FeTS2022_00561 +8,FeTS2022_01659 +8,FeTS2022_01661 +8,FeTS2022_01190 +8,FeTS2022_01188 +8,FeTS2022_01199 +8,FeTS2022_01658 +8,FeTS2022_01192 +8,FeTS2022_01175 +8,FeTS2022_01182 +8,FeTS2022_01537 +8,FeTS2022_01177 +9,FeTS2022_00102 +9,FeTS2022_00149 +9,FeTS2022_01290 +9,FeTS2022_00113 +9,FeTS2022_01009 +9,FeTS2022_01007 +9,FeTS2022_01002 +9,FeTS2022_00139 +9,FeTS2022_01292 +9,FeTS2022_00100 +9,FeTS2022_01289 +9,FeTS2022_01291 +9,FeTS2022_01005 +9,FeTS2022_01282 +9,FeTS2022_01003 +9,FeTS2022_00109 +9,FeTS2022_01283 +9,FeTS2022_00999 +9,FeTS2022_01281 +9,FeTS2022_01284 +9,FeTS2022_00151 +9,FeTS2022_00123 +10,FeTS2022_01451 +10,FeTS2022_01453 +10,FeTS2022_01452 +10,FeTS2022_00831 +10,FeTS2022_01448 +10,FeTS2022_01300 +10,FeTS2022_01443 +10,FeTS2022_00136 +10,FeTS2022_01454 +10,FeTS2022_00144 +10,FeTS2022_00121 +10,FeTS2022_01297 +10,FeTS2022_00133 +10,FeTS2022_01447 +10,FeTS2022_00142 +10,FeTS2022_01450 +10,FeTS2022_00120 +10,FeTS2022_01298 +10,FeTS2022_01449 +10,FeTS2022_01442 +10,FeTS2022_01446 +10,FeTS2022_01303 +10,FeTS2022_01296 +10,FeTS2022_00132 +10,FeTS2022_01441 +10,FeTS2022_01445 +10,FeTS2022_01302 +10,FeTS2022_00143 +10,FeTS2022_00105 +10,FeTS2022_01444 +10,FeTS2022_00147 +10,FeTS2022_01455 +10,FeTS2022_00146 +10,FeTS2022_00137 +11,FeTS2022_01459 +11,FeTS2022_01464 +11,FeTS2022_01458 +11,FeTS2022_01457 +11,FeTS2022_01461 +11,FeTS2022_01456 +11,FeTS2022_01460 +11,FeTS2022_01462 +11,FeTS2022_01466 +11,FeTS2022_01465 +11,FeTS2022_01463 +11,FeTS2022_01467 +12,FeTS2022_00140 +12,FeTS2022_01469 +12,FeTS2022_01468 +12,FeTS2022_01470 +12,FeTS2022_00104 +12,FeTS2022_00110 +12,FeTS2022_00112 +12,FeTS2022_00128 +13,FeTS2022_00134 +13,FeTS2022_00150 +13,FeTS2022_00116 +13,FeTS2022_01471 +14,FeTS2022_01472 +14,FeTS2022_00117 +14,FeTS2022_00130 +14,FeTS2022_00138 +14,FeTS2022_01473 +14,FeTS2022_00111 +14,FeTS2022_00124 +14,FeTS2022_00106 +15,FeTS2022_00122 +15,FeTS2022_00148 +15,FeTS2022_01474 +15,FeTS2022_00108 +15,FeTS2022_01144 +15,FeTS2022_00107 +15,FeTS2022_01140 +15,FeTS2022_01146 +15,FeTS2022_01145 +15,FeTS2022_01139 +15,FeTS2022_01141 +15,FeTS2022_01142 +15,FeTS2022_01143 +15,FeTS2022_01475 +16,FeTS2022_01482 +16,FeTS2022_01480 +16,FeTS2022_01485 +16,FeTS2022_01476 +16,FeTS2022_01481 +16,FeTS2022_01483 +16,FeTS2022_01486 +16,FeTS2022_01484 +16,FeTS2022_01479 +16,FeTS2022_01477 +16,FeTS2022_01478 +17,FeTS2022_01491 +17,FeTS2022_01500 +17,FeTS2022_01519 +17,FeTS2022_01516 +17,FeTS2022_01509 +17,FeTS2022_01520 +17,FeTS2022_01508 +17,FeTS2022_01503 +17,FeTS2022_01488 +17,FeTS2022_01492 +17,FeTS2022_01502 +17,FeTS2022_01493 +18,FeTS2022_01497 +18,FeTS2022_01499 +18,FeTS2022_01487 +18,FeTS2022_01505 +18,FeTS2022_01504 +18,FeTS2022_01490 +18,FeTS2022_01507 +18,FeTS2022_01510 +18,FeTS2022_01512 +18,FeTS2022_01514 +18,FeTS2022_01517 +19,FeTS2022_01501 +19,FeTS2022_01518 +19,FeTS2022_01506 +19,FeTS2022_01515 +19,FeTS2022_01511 +19,FeTS2022_01494 +19,FeTS2022_01489 +19,FeTS2022_01513 +19,FeTS2022_01496 +19,FeTS2022_01495 +19,FeTS2022_01521 +19,FeTS2022_01498 +20,FeTS2022_01522 +20,FeTS2022_01525 +20,FeTS2022_01526 +20,FeTS2022_01527 +20,FeTS2022_01524 +20,FeTS2022_01523 +21,FeTS2022_01530 +21,FeTS2022_01536 +21,FeTS2022_01535 +21,FeTS2022_01663 +21,FeTS2022_01534 +21,FeTS2022_01529 +21,FeTS2022_01531 +21,FeTS2022_01666 +21,FeTS2022_01665 +21,FeTS2022_01532 +21,FeTS2022_01664 +21,FeTS2022_01528 +21,FeTS2022_01533 +22,FeTS2022_00584 +22,FeTS2022_00567 +22,FeTS2022_00571 +22,FeTS2022_00582 +22,FeTS2022_00570 +22,FeTS2022_00594 +22,FeTS2022_00597 +22,FeTS2022_00596 +22,FeTS2022_00576 +22,FeTS2022_00572 +22,FeTS2022_00115 +22,FeTS2022_00593 +22,FeTS2022_00588 +22,FeTS2022_00598 +22,FeTS2022_00589 +22,FeTS2022_00574 +22,FeTS2022_00586 +22,FeTS2022_00579 +22,FeTS2022_00590 +22,FeTS2022_00599 +22,FeTS2022_00577 +22,FeTS2022_00575 +22,FeTS2022_00581 +22,FeTS2022_00591 +22,FeTS2022_00569 +22,FeTS2022_00587 +22,FeTS2022_00580 +22,FeTS2022_00583 +22,FeTS2022_00578 +22,FeTS2022_00568 +23,FeTS2022_01423 +23,FeTS2022_01420 +23,FeTS2022_01422 +23,FeTS2022_01417 +23,FeTS2022_01421 +23,FeTS2022_01424 +23,FeTS2022_01418 +23,FeTS2022_01425 +23,FeTS2022_01419 +24,FeTS2022_01628 +24,FeTS2022_01615 +24,FeTS2022_01035 +24,FeTS2022_00732 +24,FeTS2022_00753 +24,FeTS2022_01620 +24,FeTS2022_01637 +24,FeTS2022_01594 +24,FeTS2022_00530 +24,FeTS2022_00772 +24,FeTS2022_01580 +24,FeTS2022_00731 +24,FeTS2022_00540 +24,FeTS2022_00464 +24,FeTS2022_01622 +24,FeTS2022_01154 +24,FeTS2022_01559 +24,FeTS2022_00729 +24,FeTS2022_00708 +24,FeTS2022_00044 +24,FeTS2022_00705 +24,FeTS2022_00645 +24,FeTS2022_01640 +24,FeTS2022_00008 +24,FeTS2022_00746 +24,FeTS2022_01551 +24,FeTS2022_01610 +24,FeTS2022_00061 +24,FeTS2022_00642 +24,FeTS2022_00675 +24,FeTS2022_01651 +24,FeTS2022_00651 +24,FeTS2022_00626 +24,FeTS2022_00028 +24,FeTS2022_01557 +24,FeTS2022_01616 +24,FeTS2022_00684 +24,FeTS2022_01538 +24,FeTS2022_01647 +24,FeTS2022_00688 +24,FeTS2022_00737 +24,FeTS2022_00063 +24,FeTS2022_00758 +24,FeTS2022_01159 +24,FeTS2022_00615 +24,FeTS2022_00621 +24,FeTS2022_01543 +24,FeTS2022_01560 +24,FeTS2022_00058 +24,FeTS2022_00009 +24,FeTS2022_00544 +24,FeTS2022_01611 +24,FeTS2022_00485 +24,FeTS2022_00735 +24,FeTS2022_00659 +24,FeTS2022_00025 +24,FeTS2022_00550 +24,FeTS2022_01599 +24,FeTS2022_00636 +24,FeTS2022_01644 +24,FeTS2022_00716 +24,FeTS2022_00641 +24,FeTS2022_01624 +24,FeTS2022_00547 +24,FeTS2022_00046 +24,FeTS2022_00728 +24,FeTS2022_00045 +24,FeTS2022_00493 +24,FeTS2022_00089 +24,FeTS2022_00622 +24,FeTS2022_01643 +24,FeTS2022_00602 +24,FeTS2022_00035 +24,FeTS2022_01545 +24,FeTS2022_00014 +24,FeTS2022_01566 +24,FeTS2022_00066 +24,FeTS2022_01614 +24,FeTS2022_01591 +24,FeTS2022_00514 +24,FeTS2022_01588 +24,FeTS2022_00520 +24,FeTS2022_01556 +24,FeTS2022_00097 +24,FeTS2022_00555 +24,FeTS2022_00736 +24,FeTS2022_00639 +24,FeTS2022_00479 +24,FeTS2022_01550 +24,FeTS2022_01592 +24,FeTS2022_01626 +24,FeTS2022_00557 +24,FeTS2022_00496 +24,FeTS2022_00778 +24,FeTS2022_01561 +24,FeTS2022_00690 +24,FeTS2022_00750 +24,FeTS2022_01586 +24,FeTS2022_01549 +24,FeTS2022_01555 +24,FeTS2022_01612 +24,FeTS2022_01600 +24,FeTS2022_01629 +24,FeTS2022_01656 +24,FeTS2022_00500 +24,FeTS2022_00529 +24,FeTS2022_00628 +24,FeTS2022_00775 +24,FeTS2022_00523 +24,FeTS2022_00488 +24,FeTS2022_00518 +24,FeTS2022_00000 +24,FeTS2022_00020 +24,FeTS2022_01646 +24,FeTS2022_01638 +24,FeTS2022_00630 +24,FeTS2022_01590 +24,FeTS2022_01613 +24,FeTS2022_01571 +24,FeTS2022_00519 +24,FeTS2022_01617 +24,FeTS2022_01623 +24,FeTS2022_00691 +24,FeTS2022_01027 +24,FeTS2022_00704 +24,FeTS2022_00098 +24,FeTS2022_01558 +25,FeTS2022_00715 +25,FeTS2022_00757 +25,FeTS2022_00084 +25,FeTS2022_00692 +25,FeTS2022_00078 +25,FeTS2022_00747 +25,FeTS2022_01607 +25,FeTS2022_00751 +25,FeTS2022_00011 +25,FeTS2022_00610 +25,FeTS2022_00694 +25,FeTS2022_00026 +25,FeTS2022_00658 +25,FeTS2022_01544 +25,FeTS2022_01583 +25,FeTS2022_00680 +25,FeTS2022_01028 +25,FeTS2022_01636 +25,FeTS2022_00545 +25,FeTS2022_00072 +25,FeTS2022_00016 +25,FeTS2022_01548 +25,FeTS2022_00624 +25,FeTS2022_00676 +25,FeTS2022_00533 +25,FeTS2022_01574 +25,FeTS2022_01582 +25,FeTS2022_00085 +25,FeTS2022_00613 +25,FeTS2022_01593 +25,FeTS2022_00730 +25,FeTS2022_01585 +25,FeTS2022_00524 +25,FeTS2022_00081 +25,FeTS2022_00472 +25,FeTS2022_00478 +25,FeTS2022_00469 +25,FeTS2022_00682 +25,FeTS2022_00733 +25,FeTS2022_00723 +25,FeTS2022_00099 +25,FeTS2022_00744 +25,FeTS2022_00048 +25,FeTS2022_00480 +25,FeTS2022_00650 +25,FeTS2022_00601 +25,FeTS2022_00542 +25,FeTS2022_00667 +25,FeTS2022_00505 +25,FeTS2022_01539 +25,FeTS2022_00764 +25,FeTS2022_00506 +25,FeTS2022_01649 +25,FeTS2022_00032 +25,FeTS2022_00021 +25,FeTS2022_00685 +25,FeTS2022_00611 +25,FeTS2022_00511 +25,FeTS2022_01584 +25,FeTS2022_01635 +25,FeTS2022_00607 +25,FeTS2022_00071 +25,FeTS2022_00687 +25,FeTS2022_00767 +25,FeTS2022_00537 +25,FeTS2022_01630 +25,FeTS2022_00740 +25,FeTS2022_00525 +25,FeTS2022_00725 +25,FeTS2022_00502 +25,FeTS2022_01562 +25,FeTS2022_01577 +25,FeTS2022_01576 +25,FeTS2022_01595 +25,FeTS2022_00654 +25,FeTS2022_00090 +25,FeTS2022_01645 +25,FeTS2022_01564 +25,FeTS2022_01567 +25,FeTS2022_00703 +25,FeTS2022_00043 +25,FeTS2022_00003 +25,FeTS2022_00495 +25,FeTS2022_00017 +25,FeTS2022_00491 +25,FeTS2022_00054 +25,FeTS2022_00510 +25,FeTS2022_00618 +25,FeTS2022_00064 +25,FeTS2022_00024 +25,FeTS2022_00709 +25,FeTS2022_01653 +25,FeTS2022_01579 +25,FeTS2022_01572 +25,FeTS2022_01156 +25,FeTS2022_00707 +25,FeTS2022_01540 +25,FeTS2022_00056 +25,FeTS2022_00620 +25,FeTS2022_00470 +25,FeTS2022_00499 +25,FeTS2022_00640 +25,FeTS2022_00549 +25,FeTS2022_01601 +25,FeTS2022_00608 +25,FeTS2022_00727 +25,FeTS2022_00773 +25,FeTS2022_00504 +25,FeTS2022_01604 +25,FeTS2022_01158 +25,FeTS2022_00051 +25,FeTS2022_00768 +25,FeTS2022_01161 +25,FeTS2022_00765 +25,FeTS2022_00068 +25,FeTS2022_00551 +25,FeTS2022_01605 +25,FeTS2022_00674 +25,FeTS2022_01157 +25,FeTS2022_01631 +25,FeTS2022_00022 +25,FeTS2022_00777 +25,FeTS2022_01609 +25,FeTS2022_01633 +25,FeTS2022_01652 +25,FeTS2022_00759 +25,FeTS2022_01655 +26,FeTS2022_01639 +26,FeTS2022_01563 +26,FeTS2022_00661 +26,FeTS2022_00087 +26,FeTS2022_00030 +26,FeTS2022_00556 +26,FeTS2022_01597 +26,FeTS2022_00724 +26,FeTS2022_00096 +26,FeTS2022_00049 +26,FeTS2022_00683 +26,FeTS2022_00059 +26,FeTS2022_01596 +26,FeTS2022_00498 +26,FeTS2022_00543 +26,FeTS2022_01641 +26,FeTS2022_01542 +26,FeTS2022_00062 +26,FeTS2022_00005 +26,FeTS2022_00646 +26,FeTS2022_00088 +26,FeTS2022_00656 +26,FeTS2022_01589 +26,FeTS2022_01160 +26,FeTS2022_01547 +26,FeTS2022_01606 +26,FeTS2022_00631 +26,FeTS2022_00756 +26,FeTS2022_00619 +26,FeTS2022_00698 +26,FeTS2022_01541 +26,FeTS2022_00539 +26,FeTS2022_00053 +26,FeTS2022_01618 +26,FeTS2022_00693 +26,FeTS2022_00616 +26,FeTS2022_01642 +26,FeTS2022_01632 +26,FeTS2022_00718 +26,FeTS2022_00006 +26,FeTS2022_00466 +26,FeTS2022_01565 +26,FeTS2022_01621 +26,FeTS2022_00697 +26,FeTS2022_00689 +26,FeTS2022_00554 +26,FeTS2022_00638 +26,FeTS2022_00517 +26,FeTS2022_00019 +26,FeTS2022_01650 +26,FeTS2022_01602 +26,FeTS2022_01570 +26,FeTS2022_00655 +26,FeTS2022_00552 +26,FeTS2022_00706 +26,FeTS2022_01654 +26,FeTS2022_00481 +26,FeTS2022_00604 +26,FeTS2022_00612 +26,FeTS2022_00774 +26,FeTS2022_00625 +26,FeTS2022_00070 +26,FeTS2022_00649 +26,FeTS2022_00036 +26,FeTS2022_01546 +26,FeTS2022_00559 +26,FeTS2022_00018 +26,FeTS2022_00507 +26,FeTS2022_00760 +26,FeTS2022_01568 +26,FeTS2022_00094 +26,FeTS2022_00526 +26,FeTS2022_01575 +26,FeTS2022_00512 +26,FeTS2022_00033 +26,FeTS2022_01648 +26,FeTS2022_00052 +26,FeTS2022_01625 +26,FeTS2022_01573 +26,FeTS2022_00623 +26,FeTS2022_01153 +26,FeTS2022_00532 +26,FeTS2022_00516 +26,FeTS2022_00679 +26,FeTS2022_00468 +26,FeTS2022_00494 +26,FeTS2022_00483 +26,FeTS2022_01552 +26,FeTS2022_00606 +26,FeTS2022_00742 +26,FeTS2022_00677 +26,FeTS2022_00652 +26,FeTS2022_00074 +26,FeTS2022_00513 +26,FeTS2022_01581 +26,FeTS2022_00663 +26,FeTS2022_00734 +26,FeTS2022_01619 +26,FeTS2022_00668 +26,FeTS2022_00558 +26,FeTS2022_00002 +26,FeTS2022_01598 +26,FeTS2022_00477 +26,FeTS2022_01634 +26,FeTS2022_00501 +26,FeTS2022_01155 +26,FeTS2022_00077 +26,FeTS2022_01578 +26,FeTS2022_01569 +26,FeTS2022_01603 +26,FeTS2022_00538 +26,FeTS2022_00714 +26,FeTS2022_00031 +26,FeTS2022_01627 +26,FeTS2022_01553 +26,FeTS2022_00548 +26,FeTS2022_00739 +26,FeTS2022_00103 +26,FeTS2022_00528 +26,FeTS2022_01608 +26,FeTS2022_00095 +26,FeTS2022_00060 +26,FeTS2022_01554 +26,FeTS2022_00657 +26,FeTS2022_01587 +26,FeTS2022_00605 +26,FeTS2022_00686 +26,FeTS2022_00012 +27,FeTS2022_01166 +27,FeTS2022_01163 +27,FeTS2022_01165 +27,FeTS2022_01164 +28,FeTS2022_00444 +28,FeTS2022_01014 +28,FeTS2022_00442 +28,FeTS2022_01025 +28,FeTS2022_01024 +28,FeTS2022_00101 +28,FeTS2022_00453 +28,FeTS2022_01013 +28,FeTS2022_01011 +28,FeTS2022_00459 +28,FeTS2022_00457 +28,FeTS2022_01016 +28,FeTS2022_00448 +28,FeTS2022_01023 +28,FeTS2022_01017 +28,FeTS2022_00443 +28,FeTS2022_00455 +28,FeTS2022_00127 +28,FeTS2022_01012 +28,FeTS2022_01018 +28,FeTS2022_01022 +28,FeTS2022_00451 +28,FeTS2022_00445 +28,FeTS2022_00452 +28,FeTS2022_00454 +28,FeTS2022_01019 +28,FeTS2022_01021 +28,FeTS2022_01020 +28,FeTS2022_01026 +28,FeTS2022_00456 +28,FeTS2022_00446 +28,FeTS2022_00449 +28,FeTS2022_01015 +29,FeTS2022_00802 +29,FeTS2022_00788 +29,FeTS2022_00795 +29,FeTS2022_00820 +29,FeTS2022_00782 +29,FeTS2022_00800 +29,FeTS2022_00830 +29,FeTS2022_00824 +29,FeTS2022_00805 +29,FeTS2022_00796 +29,FeTS2022_00823 +29,FeTS2022_00828 +30,FeTS2022_00811 +30,FeTS2022_00789 +30,FeTS2022_00801 +30,FeTS2022_00780 +30,FeTS2022_00781 +30,FeTS2022_00814 +30,FeTS2022_00806 +30,FeTS2022_00810 +30,FeTS2022_00807 +30,FeTS2022_00818 +30,FeTS2022_00791 +31,FeTS2022_00787 +31,FeTS2022_00808 +31,FeTS2022_00809 +31,FeTS2022_00803 +31,FeTS2022_00816 +31,FeTS2022_00819 +31,FeTS2022_00793 +31,FeTS2022_00799 +31,FeTS2022_00797 +31,FeTS2022_00784 +31,FeTS2022_00804 +31,FeTS2022_00792 +32,FeTS2022_01031 +32,FeTS2022_01033 +32,FeTS2022_01030 +32,FeTS2022_00118 +32,FeTS2022_01029 +32,FeTS2022_00126 +32,FeTS2022_01032 +33,FeTS2022_01147 +33,FeTS2022_01149 +33,FeTS2022_01150 +33,FeTS2022_01148 +33,FeTS2022_01162 diff --git a/Task_1/partitioning_data/small_split.csv b/Task_1/partitioning_data/small_split.csv new file mode 100644 index 0000000..50baaca --- /dev/null +++ b/Task_1/partitioning_data/small_split.csv @@ -0,0 +1,11 @@ +Partition_ID,Subject_ID +1,FeTS2022_01341 +1,FeTS2022_01333 +1,FeTS2022_01077 +1,FeTS2022_01324 +2,FeTS2022_01412 +2,FeTS2022_01415 +2,FeTS2022_01411 +3,FeTS2022_01439 +3,FeTS2022_01435 +3,FeTS2022_01434 diff --git a/Task_1/partitioning_data/validation.csv b/Task_1/partitioning_data/validation.csv new file mode 100644 index 0000000..d0d78f1 --- /dev/null +++ b/Task_1/partitioning_data/validation.csv @@ -0,0 +1,220 @@ +Partition_ID,Subject_ID +-1,FeTS2022_01718 +-1,FeTS2022_01719 +-1,FeTS2022_00573 +-1,FeTS2022_00592 +-1,FeTS2022_00585 +-1,FeTS2022_00595 +-1,FeTS2022_01678 +-1,FeTS2022_01679 +-1,FeTS2022_01680 +-1,FeTS2022_01681 +-1,FeTS2022_01682 +-1,FeTS2022_01683 +-1,FeTS2022_01684 +-1,FeTS2022_01685 +-1,FeTS2022_01686 +-1,FeTS2022_00833 +-1,FeTS2022_01687 +-1,FeTS2022_00384 +-1,FeTS2022_01688 +-1,FeTS2022_01689 +-1,FeTS2022_01690 +-1,FeTS2022_01691 +-1,FeTS2022_00434 +-1,FeTS2022_00333 +-1,FeTS2022_00337 +-1,FeTS2022_01692 +-1,FeTS2022_00229 +-1,FeTS2022_01667 +-1,FeTS2022_01668 +-1,FeTS2022_01669 +-1,FeTS2022_01670 +-1,FeTS2022_01671 +-1,FeTS2022_01672 +-1,FeTS2022_01673 +-1,FeTS2022_01674 +-1,FeTS2022_01675 +-1,FeTS2022_01676 +-1,FeTS2022_01677 +-1,FeTS2022_01720 +-1,FeTS2022_01724 +-1,FeTS2022_00145 +-1,FeTS2022_01693 +-1,FeTS2022_01006 +-1,FeTS2022_01727 +-1,FeTS2022_01694 +-1,FeTS2022_01729 +-1,FeTS2022_01695 +-1,FeTS2022_00997 +-1,FeTS2022_01696 +-1,FeTS2022_01732 +-1,FeTS2022_01697 +-1,FeTS2022_01698 +-1,FeTS2022_01699 +-1,FeTS2022_01700 +-1,FeTS2022_01701 +-1,FeTS2022_01702 +-1,FeTS2022_01703 +-1,FeTS2022_00135 +-1,FeTS2022_01704 +-1,FeTS2022_01705 +-1,FeTS2022_01706 +-1,FeTS2022_01707 +-1,FeTS2022_01708 +-1,FeTS2022_01709 +-1,FeTS2022_01710 +-1,FeTS2022_01711 +-1,FeTS2022_01712 +-1,FeTS2022_01713 +-1,FeTS2022_00129 +-1,FeTS2022_00125 +-1,FeTS2022_01714 +-1,FeTS2022_00119 +-1,FeTS2022_01736 +-1,FeTS2022_00114 +-1,FeTS2022_01738 +-1,FeTS2022_00141 +-1,FeTS2022_01715 +-1,FeTS2022_01716 +-1,FeTS2022_01717 +-1,FeTS2022_01721 +-1,FeTS2022_01756 +-1,FeTS2022_01757 +-1,FeTS2022_01758 +-1,FeTS2022_01759 +-1,FeTS2022_01760 +-1,FeTS2022_01761 +-1,FeTS2022_01762 +-1,FeTS2022_01763 +-1,FeTS2022_01764 +-1,FeTS2022_01765 +-1,FeTS2022_01766 +-1,FeTS2022_01767 +-1,FeTS2022_01768 +-1,FeTS2022_01770 +-1,FeTS2022_01769 +-1,FeTS2022_01772 +-1,FeTS2022_01771 +-1,FeTS2022_00001 +-1,FeTS2022_00013 +-1,FeTS2022_00015 +-1,FeTS2022_00027 +-1,FeTS2022_00037 +-1,FeTS2022_00047 +-1,FeTS2022_00079 +-1,FeTS2022_00080 +-1,FeTS2022_00082 +-1,FeTS2022_00091 +-1,FeTS2022_01722 +-1,FeTS2022_01723 +-1,FeTS2022_01741 +-1,FeTS2022_01725 +-1,FeTS2022_01726 +-1,FeTS2022_01728 +-1,FeTS2022_01742 +-1,FeTS2022_01743 +-1,FeTS2022_01730 +-1,FeTS2022_01744 +-1,FeTS2022_01731 +-1,FeTS2022_01733 +-1,FeTS2022_01734 +-1,FeTS2022_01735 +-1,FeTS2022_01747 +-1,FeTS2022_01737 +-1,FeTS2022_01739 +-1,FeTS2022_01740 +-1,FeTS2022_01745 +-1,FeTS2022_01746 +-1,FeTS2022_01748 +-1,FeTS2022_00153 +-1,FeTS2022_01749 +-1,FeTS2022_01750 +-1,FeTS2022_01751 +-1,FeTS2022_01752 +-1,FeTS2022_01753 +-1,FeTS2022_01754 +-1,FeTS2022_01755 +-1,FeTS2022_01773 +-1,FeTS2022_01774 +-1,FeTS2022_01775 +-1,FeTS2022_01776 +-1,FeTS2022_00161 +-1,FeTS2022_00163 +-1,FeTS2022_00174 +-1,FeTS2022_00181 +-1,FeTS2022_00182 +-1,FeTS2022_00190 +-1,FeTS2022_00200 +-1,FeTS2022_00208 +-1,FeTS2022_00213 +-1,FeTS2022_00252 +-1,FeTS2022_00256 +-1,FeTS2022_00264 +-1,FeTS2022_00287 +-1,FeTS2022_00307 +-1,FeTS2022_00323 +-1,FeTS2022_00335 +-1,FeTS2022_00355 +-1,FeTS2022_00372 +-1,FeTS2022_00381 +-1,FeTS2022_00422 +-1,FeTS2022_00428 +-1,FeTS2022_00438 +-1,FeTS2022_00447 +-1,FeTS2022_00450 +-1,FeTS2022_00458 +-1,FeTS2022_00460 +-1,FeTS2022_00462 +-1,FeTS2022_00463 +-1,FeTS2022_00467 +-1,FeTS2022_00474 +-1,FeTS2022_00489 +-1,FeTS2022_00492 +-1,FeTS2022_00503 +-1,FeTS2022_00521 +-1,FeTS2022_00535 +-1,FeTS2022_00553 +-1,FeTS2022_00560 +-1,FeTS2022_00603 +-1,FeTS2022_01777 +-1,FeTS2022_01778 +-1,FeTS2022_01779 +-1,FeTS2022_01780 +-1,FeTS2022_01781 +-1,FeTS2022_01782 +-1,FeTS2022_01783 +-1,FeTS2022_01784 +-1,FeTS2022_01785 +-1,FeTS2022_01786 +-1,FeTS2022_01787 +-1,FeTS2022_01788 +-1,FeTS2022_01789 +-1,FeTS2022_01790 +-1,FeTS2022_00644 +-1,FeTS2022_00647 +-1,FeTS2022_00662 +-1,FeTS2022_00671 +-1,FeTS2022_00681 +-1,FeTS2022_00699 +-1,FeTS2022_00702 +-1,FeTS2022_00712 +-1,FeTS2022_00719 +-1,FeTS2022_00721 +-1,FeTS2022_00749 +-1,FeTS2022_00762 +-1,FeTS2022_00769 +-1,FeTS2022_00779 +-1,FeTS2022_00821 +-1,FeTS2022_00822 +-1,FeTS2022_00825 +-1,FeTS2022_00826 +-1,FeTS2022_00829 +-1,FeTS2022_01791 +-1,FeTS2022_01792 +-1,FeTS2022_01793 +-1,FeTS2022_01794 +-1,FeTS2022_01795 +-1,FeTS2022_01796 +-1,FeTS2022_01797 +-1,FeTS2022_01798 From fb7c0f5cb94a67b5fd6b073a58af2fdea1ca0066 Mon Sep 17 00:00:00 2001 From: Tarunkumar Banda Date: Fri, 21 Mar 2025 07:06:50 -0700 Subject: [PATCH 11/16] fixed few TODOs --- Task_1/FeTS_Challenge.py | 8 ++--- Task_1/fets_challenge/experiment.py | 15 -------- Task_1/fets_challenge/fets_flow.py | 54 +++++++++++++++++------------ 3 files changed, 35 insertions(+), 42 deletions(-) diff --git a/Task_1/FeTS_Challenge.py b/Task_1/FeTS_Challenge.py index e4e45ae..826f1c8 100644 --- a/Task_1/FeTS_Challenge.py +++ b/Task_1/FeTS_Challenge.py @@ -529,11 +529,11 @@ def FedAvgM_Selection(local_tensors, institution_split_csv_filename = 'small_split.csv' # change this to point to the parent directory of the data -brats_training_data_parent_dir = '/home/ad_kagrawa2/Data/MICCAI_FeTS2022_TrainingData' #TODO revert to '/raid/datasets/FeTS22/MICCAI_FeTS2022_TrainingData' before raising the PR +brats_training_data_parent_dir = '/home/ad_tbanda/code/fedAI/MICCAI_FeTS2022_TrainingData' #TODO revert to '/raid/datasets/FeTS22/MICCAI_FeTS2022_TrainingData' before raising the PR # increase this if you need a longer history for your algorithms # decrease this if you need to reduce system RAM consumption -db_store_rounds = 5 #TODO store the tensor db for these many rounds +db_store_rounds = 1 #TODO store the tensor db for these many rounds # this is passed to PyTorch, so set it accordingly for your system device = 'cpu' @@ -557,7 +557,7 @@ def FedAvgM_Selection(local_tensors, home = str(Path.home()) #Creating working directory and copying the required csv files -working_directory= os.path.join(home, '.local/tarunNew/') +working_directory= os.path.join(home, '.local/workspace/') Path(working_directory).mkdir(parents=True, exist_ok=True) source_dir=f'{Path.cwd()}/partitioning_data/' pattern = "*.csv" @@ -605,7 +605,7 @@ def FedAvgM_Selection(local_tensors, #checkpoint_folder='experiment_1' #data_path = -data_path = '/home/ad_kagrawa2/Data/MICCAI_FeTS2022_ValidationData' #TODO revert to '/home/brats/MICCAI_FeTS2022_ValidationData' before raising the PR +data_path = '/home/ad_tbanda/code/fedAI/MICCAI_FeTS2022_ValidationData' #TODO revert to '/home/brats/MICCAI_FeTS2022_ValidationData' before raising the PR validation_csv_filename = 'validation.csv' # you can keep these the same if you wish diff --git a/Task_1/fets_challenge/experiment.py b/Task_1/fets_challenge/experiment.py index 1a4f18a..31c34b0 100644 --- a/Task_1/fets_challenge/experiment.py +++ b/Task_1/fets_challenge/experiment.py @@ -73,11 +73,6 @@ def run_challenge_experiment(aggregation_function, aggregation_wrapper = CustomAggregationWrapper(aggregation_function) - # [TODO] Handle the storing of data in the fets flow (add db_sotre_rounds aggregator private attribute) - # overrides = { - # 'aggregator.settings.db_store_rounds': db_store_rounds, - # } - transformed_csv_dict = extract_csv_partitions(os.path.join(work, 'gandlf_paths.csv')) collaborators = [] @@ -140,19 +135,9 @@ def run_challenge_experiment(aggregation_function, flflow.run() # #TODO [Workflow - API] -> Commenting as pretrained model is not used. - # ---> Define a new step in federated flow before training to load the pretrained model # if use_pretrained_model: - # print('TESTING ->>>>>> Loading pretrained model...') # if device == 'cpu': # checkpoint = torch.load(f'{root}/pretrained_model/resunet_pretrained.pth',map_location=torch.device('cpu')) - # print('TESTING ->>>>>> Loading checkpoint model...') - # print(checkpoint.keys()) - # print('TESTING ->>>>>> Loading checkpoint state dict...') - # model_state = checkpoint['model_state_dict'] - # for name, tensor in model_state.items(): - # print(f"Priting {name}: {tensor.shape}") - # print('TESTING ->>>>>> Loading taskrunner model') - # print(task_runner.model) # task_runner.model.load_state_dict(checkpoint['model_state_dict']) # task_runner.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) # else: diff --git a/Task_1/fets_challenge/fets_flow.py b/Task_1/fets_challenge/fets_flow.py index 9392b25..6aaa269 100644 --- a/Task_1/fets_challenge/fets_flow.py +++ b/Task_1/fets_challenge/fets_flow.py @@ -41,6 +41,17 @@ def cache_tensor_dict(tensor_dict, agg_tensor_db, idx, agg_out_dict): agg_out_dict[modified_key] = value agg_tensor_db.cache_tensor(agg_out_dict) +def return_cleanup_key(tensor_key, col, round_data_to_delete): + new_tags = change_tags(tensor_key.tags, remove_field=col) + modified_key = TensorKey( + tensor_name=tensor_key.tensor_name, + origin=col, + round_number=round_data_to_delete, + report=tensor_key.report, + tags=new_tags + ) + return modified_key + def get_aggregated_dict_with_tensorname(agg_tensor_dict): agg_dict_with_tensornames = {} for tensor_key, value in agg_tensor_dict.items(): @@ -104,6 +115,7 @@ def start(self): logger.warning(f'Could not find provided checkpoint folder: {self.restore_from_checkpoint_folder}. Exiting...') exit(1) else: + #TODO : Validate load from checkpoint logic logger.info(f'Attempting to load last completed round from {self.restore_from_checkpoint_folder}') state = load_checkpoint(self.restore_from_checkpoint_folder) self.checkpoint_folder = self.restore_from_checkpoint_folder @@ -209,7 +221,7 @@ def initialize_colls(self): collaborator_data_loaders[self.input] = FeTSDataLoader(train_loader, val_loader) - #TODO the times per collaborator is calculated based on the random values, it doesn't look like the actual time taken by the collaborator + #TODO Validate the times per collaborator is calculated based on the random values, it doesn't look like the actual time taken by the collaborator self.times_per_collaborator = compute_times_per_collaborator(self.input, self.training_collaborators, self.hparam_dict['epochs_per_round'], @@ -280,10 +292,11 @@ def join_task(self, inputs): @aggregator def join(self, inputs): - join_start_time = time.time() + round_data_to_delete = 0 + if self.current_round > self.db_store_rounds: + round_data_to_delete = self.current_round - self.db_store_rounds self.aggregation_type.set_state_data_for_round(self.collaborators_chosen_each_round, self.collaborator_times_per_round) agg_tensor_db = TensorDB() - cache_tensor_dict(self.agg_tensor_dict, agg_tensor_db, 0, {}) collaborator_weights_unnormalized = {} times_per_collaborator = {} for idx, col in enumerate(inputs): @@ -292,7 +305,9 @@ def join(self, inputs): cache_tensor_dict(col.local_valid_dict, agg_tensor_db, idx, agg_out_dict) cache_tensor_dict(col.agg_valid_dict, agg_tensor_db, idx, agg_out_dict) cache_tensor_dict(col.global_output_tensor_dict, agg_tensor_db, idx, agg_out_dict) - + self.agg_tensor_dict.update(col.local_valid_dict) + self.agg_tensor_dict.update(col.agg_valid_dict) + self.agg_tensor_dict.update(col.global_output_tensor_dict) # Store the keys for each collaborator tensor_keys = [] for tensor_key in agg_out_dict.keys(): @@ -302,7 +317,6 @@ def join(self, inputs): # The collaborator data sizes for that task collaborator_weights_unnormalized[col.input] = col.collaborator_task_weight times_per_collaborator[col.input] = col.times_per_collaborator - print(f'Collaborator task weights: {collaborator_weights_unnormalized}') print(f'Collaborator times: {times_per_collaborator}') weight_total = sum(collaborator_weights_unnormalized.values()) @@ -310,6 +324,7 @@ def join(self, inputs): k: v / weight_total for k, v in collaborator_weights_unnormalized.items() } print(f'Calculated Collaborator weights: {collaborator_weight_dict}') + print("=" * 40) for col,tensor_keys in self.tensor_keys_per_col.items(): for tensor_key in tensor_keys: tensor_name, origin, round_number, report, tags = tensor_key @@ -321,9 +336,15 @@ def join(self, inputs): collaborator_weight_dict, aggregation_function=self.aggregation_type, ) - if 'trained' in tags and tensor_name not in self.agg_tensor_dict: - self.agg_tensor_dict[agg_tensor_key] = agg_tensor_db.get_tensor_from_cache(agg_tensor_key) - + #cleaningup aggregated tensor dict based on db store rounds + if self.current_round > self.db_store_rounds: + col_tensor_key_to_be_deleted = return_cleanup_key(tensor_key, col, round_data_to_delete) + agg_tensor_key_to_be_deleted = TensorKey(tensor_name, origin, round_data_to_delete, report, new_tags) + if col_tensor_key_to_be_deleted in self.agg_tensor_dict: + self.agg_tensor_dict.pop(col_tensor_key_to_be_deleted) + if agg_tensor_key_to_be_deleted in self.agg_tensor_dict: + self.agg_tensor_dict.pop(agg_tensor_key_to_be_deleted) + self.agg_tensor_dict[agg_tensor_key] = agg_tensor_db.get_tensor_from_cache(agg_tensor_key) round_loss = get_metric('valid_loss', self.current_round, agg_tensor_db) round_dice = get_metric('valid_dice', self.current_round, agg_tensor_db) dice_label_0 = get_metric('valid_dice_per_label_0', self.current_round, agg_tensor_db) @@ -396,6 +417,8 @@ def join(self, inputs): self.experiment_results['hausdorff95_label_2'].append(hausdorff95_label_2) self.experiment_results['hausdorff95_label_4'].append(hausdorff95_label_4) + cache_tensor_dict(self.agg_tensor_dict, agg_tensor_db, 0, {}) + if self.save_checkpoints: logger.info(f'Saving checkpoint for round {self.current_round} : checkpoint folder {self.checkpoint_folder}') logger.info(f'To resume from this checkpoint, set the restore_from_checkpoint_folder parameter to \'{self.checkpoint_folder}\'') @@ -413,7 +436,6 @@ def join(self, inputs): # in practice, this means that the previous round's model is the last model scored, # so a long final round should not actually benefit the competitor, since that final # model is never globally validated - # TODO : Added total time taken by running the experiment till join per round if self.total_simulated_time > MAX_SIMULATION_TIME: logger.info("Simulation time exceeded. Ending Experiment") self.next(self.end) @@ -432,26 +454,12 @@ def join(self, inputs): local_tensor_dict = get_aggregated_dict_with_tensorname(self.agg_tensor_dict) self.fets_model.rebuild_model(self.current_round, local_tensor_dict) self.fets_model.save_native(f'checkpoint/{self.checkpoint_folder}/temp_model.pkl') - - #TODO : Remove below logging - join_end_time = time.time() - self.join_time = join_end_time - join_start_time - print(f'took {self.join_time} seconds for join_time') - - total_time = 0 - for input in inputs: - print(f"TIme taken by {input} : {input.aggregated_model_validation_time + input.training_time + input.local_model_validation_time + self.join_time}") - total_time += input.aggregated_model_validation_time + input.training_time + input.local_model_validation_time + self.join_time - - print(f'took {total_time} seconds for total training and valid') - #TODO cleaup aggreated tensor dict based on db store rounds, get the round number of data to be deleted, by finding round number from the dictioinary keys self.next(self.internal_loop) @aggregator def internal_loop(self): if self.current_round == self.n_rounds: print('************* EXPERIMENT COMPLETED *************') - # TODO : Add the average time taken for completing n_rounds print('Experiment results:') print(pd.DataFrame.from_dict(self.experiment_results)) self.next(self.end) From 846245be4382c0595175b3b869352578ab3d1bb6 Mon Sep 17 00:00:00 2001 From: "Agrawal, Kush" Date: Mon, 24 Mar 2025 08:27:05 -0700 Subject: [PATCH 12/16] Fixed tensor keys restoration from checkpoint Signed-off-by: Agrawal, Kush --- Task_1/FeTS_Challenge.py | 14 +- Task_1/fets_challenge/checkpoint_utils.py | 3 +- .../fets_challenge/config/gandlf_config.yaml | 2 +- Task_1/fets_challenge/fets_challenge_model.py | 1 - Task_1/fets_challenge/fets_flow.py | 208 +++++++++--------- 5 files changed, 115 insertions(+), 113 deletions(-) diff --git a/Task_1/FeTS_Challenge.py b/Task_1/FeTS_Challenge.py index 826f1c8..fb0af22 100644 --- a/Task_1/FeTS_Challenge.py +++ b/Task_1/FeTS_Challenge.py @@ -340,7 +340,7 @@ def clipped_aggregation(local_tensors, previous_tensor_value = tensor_db.search(tensor_name=tensor_name, fl_round=fl_round, tags=('model',), origin='aggregator') if previous_tensor_value.shape[0] > 1: - print(previous_tensor_value) + logger.info(previous_tensor_value) raise ValueError(f'found multiple matching tensors for {tensor_name}, tags=(model,), origin=aggregator') if previous_tensor_value.shape[0] < 1: @@ -529,7 +529,7 @@ def FedAvgM_Selection(local_tensors, institution_split_csv_filename = 'small_split.csv' # change this to point to the parent directory of the data -brats_training_data_parent_dir = '/home/ad_tbanda/code/fedAI/MICCAI_FeTS2022_TrainingData' #TODO revert to '/raid/datasets/FeTS22/MICCAI_FeTS2022_TrainingData' before raising the PR +brats_training_data_parent_dir = '/home/ad_kagrawa2/Data/MICCAI_FeTS2022_TrainingData' #TODO revert to '/raid/datasets/FeTS22/MICCAI_FeTS2022_TrainingData' before raising the PR # increase this if you need a longer history for your algorithms # decrease this if you need to reduce system RAM consumption @@ -565,18 +565,18 @@ def FedAvgM_Selection(local_tensors, files_to_copy = glob.glob(source_pattern) if not files_to_copy: - print(f"No files found matching pattern: {pattern}") + logger.info(f"No files found matching pattern: {pattern}") for source_file in files_to_copy: destination_file = os.path.join(working_directory, os.path.basename(source_file)) shutil.copy2(source_file, destination_file) try: os.chdir(working_directory) - print("Directory changed to:", os.getcwd()) + logger.info("Directory changed to:", os.getcwd()) except FileNotFoundError: - print("Error: Directory not found.") + logger.info("Error: Directory not found.") except PermissionError: - print("Error: Permission denied") + logger.info("Error: Permission denied") checkpoint_folder = run_challenge_experiment( aggregation_function=aggregation_function, @@ -605,7 +605,7 @@ def FedAvgM_Selection(local_tensors, #checkpoint_folder='experiment_1' #data_path = -data_path = '/home/ad_tbanda/code/fedAI/MICCAI_FeTS2022_ValidationData' #TODO revert to '/home/brats/MICCAI_FeTS2022_ValidationData' before raising the PR +data_path = '/home/ad_kagrawa2/Data/MICCAI_FeTS2022_ValidationData' #TODO revert to '/home/brats/MICCAI_FeTS2022_ValidationData' before raising the PR validation_csv_filename = 'validation.csv' # you can keep these the same if you wish diff --git a/Task_1/fets_challenge/checkpoint_utils.py b/Task_1/fets_challenge/checkpoint_utils.py index 9ee8c00..4742674 100644 --- a/Task_1/fets_challenge/checkpoint_utils.py +++ b/Task_1/fets_challenge/checkpoint_utils.py @@ -28,6 +28,7 @@ def save_checkpoint(checkpoint_folder, agg_tensor_db, best_dice_over_time_auc, collaborators_chosen_each_round, collaborator_times_per_round, + tensor_keys_per_col, experiment_results, summary): """ @@ -38,7 +39,7 @@ def save_checkpoint(checkpoint_folder, agg_tensor_db, with open(f'checkpoint/{checkpoint_folder}/state.pkl', 'wb') as f: pickle.dump([collaborator_names, round_num, collaborator_time_stats, total_simulated_time, best_dice, best_dice_over_time_auc, collaborators_chosen_each_round, - collaborator_times_per_round, experiment_results, summary], f) + collaborator_times_per_round, tensor_keys_per_col, experiment_results, summary], f) def load_checkpoint(checkpoint_folder): """ diff --git a/Task_1/fets_challenge/config/gandlf_config.yaml b/Task_1/fets_challenge/config/gandlf_config.yaml index e4845e5..eb03963 100644 --- a/Task_1/fets_challenge/config/gandlf_config.yaml +++ b/Task_1/fets_challenge/config/gandlf_config.yaml @@ -17,7 +17,7 @@ output_dir: '.' metrics: - dice - dice_per_label -#- hd95_per_label +- hd95_per_label model: amp: true architecture: resunet diff --git a/Task_1/fets_challenge/fets_challenge_model.py b/Task_1/fets_challenge/fets_challenge_model.py index dcd8c5e..77c3c63 100644 --- a/Task_1/fets_challenge/fets_challenge_model.py +++ b/Task_1/fets_challenge/fets_challenge_model.py @@ -230,7 +230,6 @@ def train(self, col_name, round_num, hparams_dict, train_loader, use_tqdm=False, epochs_per_round = int(hparams_dict.pop('epochs_per_round')) learning_rate = float(hparams_dict.pop('learning_rate')) - #self.rebuild_model(round_num, input_tensor_dict) # set to "training" mode self.model.train() diff --git a/Task_1/fets_challenge/fets_flow.py b/Task_1/fets_challenge/fets_flow.py index 6aaa269..d951f0e 100644 --- a/Task_1/fets_challenge/fets_flow.py +++ b/Task_1/fets_challenge/fets_flow.py @@ -59,6 +59,52 @@ def get_aggregated_dict_with_tensorname(agg_tensor_dict): agg_dict_with_tensornames[tensor_name] = value return agg_dict_with_tensornames +def update_metrics(current_round, agg_tensor_db, summary, experiment_results, include_validation_with_hausdorff, + total_simulated_time, round_dice, projected_auc): + + round_loss = get_metric('valid_loss', current_round, agg_tensor_db) + round_dice = get_metric('valid_dice', current_round, agg_tensor_db) + dice_label_0 = get_metric('valid_dice_per_label_0', current_round, agg_tensor_db) + dice_label_1 = get_metric('valid_dice_per_label_1', current_round, agg_tensor_db) + dice_label_2 = get_metric('valid_dice_per_label_2', current_round, agg_tensor_db) + dice_label_4 = get_metric('valid_dice_per_label_4', current_round, agg_tensor_db) + if include_validation_with_hausdorff: + hausdorff95_label_0 = get_metric('valid_hd95_per_label_0', current_round, agg_tensor_db) + hausdorff95_label_1 = get_metric('valid_hd95_per_label_1', current_round, agg_tensor_db) + hausdorff95_label_2 = get_metric('valid_hd95_per_label_2', current_round, agg_tensor_db) + hausdorff95_label_4 = get_metric('valid_hd95_per_label_4', current_round, agg_tensor_db) + + # # End of round summary + summary = '"**** END OF ROUND {} SUMMARY *****"'.format(current_round) + summary += "\n\tSimulation Time: {} minutes".format(round(total_simulated_time / 60, 2)) + summary += "\n\t(Projected) Convergence Score: {}".format(projected_auc) + summary += "\n\tRound Loss: {}".format(round_loss) + summary += "\n\tRound Dice: {}".format(round_dice) + summary += "\n\tDICE Label 0: {}".format(dice_label_0) + summary += "\n\tDICE Label 1: {}".format(dice_label_1) + summary += "\n\tDICE Label 2: {}".format(dice_label_2) + summary += "\n\tDICE Label 4: {}".format(dice_label_4) + if include_validation_with_hausdorff: + summary += "\n\tHausdorff95 Label 0: {}".format(hausdorff95_label_0) + summary += "\n\tHausdorff95 Label 1: {}".format(hausdorff95_label_1) + summary += "\n\tHausdorff95 Label 2: {}".format(hausdorff95_label_2) + summary += "\n\tHausdorff95 Label 4: {}".format(hausdorff95_label_4) + logger.info(summary) + + experiment_results['round'].append(current_round) + experiment_results['time'].append(total_simulated_time) + experiment_results['convergence_score'].append(projected_auc) + experiment_results['round_dice'].append(round_dice) + experiment_results['dice_label_0'].append(dice_label_0) + experiment_results['dice_label_1'].append(dice_label_1) + experiment_results['dice_label_2'].append(dice_label_2) + experiment_results['dice_label_4'].append(dice_label_4) + if include_validation_with_hausdorff: + experiment_results['hausdorff95_label_0'].append(hausdorff95_label_0) + experiment_results['hausdorff95_label_1'].append(hausdorff95_label_1) + experiment_results['hausdorff95_label_2'].append(hausdorff95_label_2) + experiment_results['hausdorff95_label_4'].append(hausdorff95_label_4) + collaborator_data_loaders = {} class FeTSFederatedFlow(FLSpec): @@ -123,7 +169,7 @@ def start(self): [loaded_collaborator_names, starting_round_num, self.collaborator_time_stats, self.total_simulated_time, self.best_dice, self.best_dice_over_time_auc, self.collaborators_chosen_each_round, self.collaborator_times_per_round, - self.experiment_results, summary, agg_tensor_db] = state + self.tensor_keys_per_col, self.experiment_results, summary, agg_tensor_db] = state if loaded_collaborator_names != self.collaborator_names: logger.error(f'Collaborator names found in checkpoint ({loaded_collaborator_names}) ' @@ -134,18 +180,24 @@ def start(self): logger.info(f'Previous summary for round {starting_round_num}') logger.info(summary) + aggregator_tensor_db = TensorDB() + aggregator_tensor_db.tensor_db = agg_tensor_db + #Updating the agg_tensor_dict from stored tensor_db starting_round_num += 1 self.current_round = starting_round_num + logger.info(f'Loading checkpoint from round {self.tensor_keys_per_col}') for col,tensor_keys in self.tensor_keys_per_col.items(): for tensor_key in tensor_keys: - tensor_name, origin, round_number, report, tags = tensor_key - if 'trained' in tags: - new_tags = change_tags(tags, remove_field=col) - new_tensor_key = TensorKey(tensor_name, origin, round_number, report, new_tags) - if tensor_name not in self.agg_tensor_dict: - self.agg_tensor_dict[new_tensor_key] = agg_tensor_db.get_tensor_from_cache(new_tensor_key) + tensor_name, _, _, _, _ = tensor_key + if tensor_name not in self.agg_tensor_dict: + self.agg_tensor_dict[tensor_key] = aggregator_tensor_db.get_tensor_from_cache(tensor_key) + logger.info(f'Loaded tensor key {tensor_key}') + if self.current_round >= self.n_rounds: + logger.info("Experiment already completed. Exiting...") + self.next(self.end) + self.collaborator_time_stats = gen_collaborator_time_stats(self.collaborator_names) self.next(self.fetch_parameters_for_colls) @@ -170,7 +222,7 @@ def fetch_parameters_for_colls(self): self.hparam_dict['learning_rate'] = learning_rate self.hparam_dict['epochs_per_round'] = epochs_per_round - print(f'Hyperparameters for round {self.current_round}: {self.hparam_dict}') + logger.info(f'Hyperparameters for round {self.current_round}: {self.hparam_dict}') # pick collaborators to train for the round self.training_collaborators = self.choose_training_collaborators(self.collaborator_names, @@ -183,6 +235,7 @@ def fetch_parameters_for_colls(self): self.collaborators_chosen_each_round[self.current_round] = self.training_collaborators if self.current_round == 1 or self.restored is True: self.next(self.initialize_colls, foreach='collaborators') + self.restored = False else: self.next(self.aggregated_model_validation, foreach='training_collaborators') @@ -229,7 +282,6 @@ def initialize_colls(self): self.collaborator_time_stats, self.current_round) - print(f'Times per collaborator for round {self.current_round}: {self.times_per_collaborator}') if self.restored is False: tensor_dict = self.fets_model.get_tensor_dict() for key, value in tensor_dict.items(): @@ -244,46 +296,26 @@ def initialize_colls(self): @collaborator def aggregated_model_validation(self): - validation_start_time = time.time() - logger.info(f'Performing aggregated model validation for collaborator {self.input}') input_tensor_dict = get_aggregated_dict_with_tensorname(self.agg_tensor_dict) val_loader = collaborator_data_loaders[self.input].get_valid_loader() self.fets_model.rebuild_model(self.current_round, input_tensor_dict) self.agg_valid_dict, _ = self.fets_model.validate(self.input, self.current_round, val_loader, apply="global") - - validation_end_time = time.time() - self.aggregated_model_validation_time = validation_end_time - validation_start_time - print(f'Collaborator {self.input} took {self.aggregated_model_validation_time} seconds for agg validation') self.next(self.train) @collaborator def train(self): - training_start_time = time.time() - logger.info(f'Performing training for collaborator {self.input}') train_loader = collaborator_data_loaders[self.input].get_train_loader() self.global_output_tensor_dict, _ = self.fets_model.train(self.input, self.current_round, self.hparam_dict, train_loader) - self.collaborator_task_weight = collaborator_data_loaders[self.input].get_train_data_size() - - print(f'Collaborator task weight in training: {self.collaborator_task_weight}') - training_end_time = time.time() - self.training_time = training_end_time - training_start_time - print(f'Collaborator {self.input} took {self.training_time} seconds for training') self.next(self.local_model_validation) @collaborator - def local_model_validation(self): - validation_start_time = time.time() - + def local_model_validation(self): logger.info(f'Performing local model validation for collaborator {self.input}') val_loader = collaborator_data_loaders[self.input].get_valid_loader() self.local_valid_dict, _ = self.fets_model.validate(self.input, self.current_round, val_loader, apply="local") - - validation_end_time = time.time() - self.local_model_validation_time = validation_end_time - validation_start_time - print(f'Collaborator {self.input} took {self.local_model_validation_time} seconds for local validation') self.next(self.join) @aggregator @@ -305,37 +337,37 @@ def join(self, inputs): cache_tensor_dict(col.local_valid_dict, agg_tensor_db, idx, agg_out_dict) cache_tensor_dict(col.agg_valid_dict, agg_tensor_db, idx, agg_out_dict) cache_tensor_dict(col.global_output_tensor_dict, agg_tensor_db, idx, agg_out_dict) - self.agg_tensor_dict.update(col.local_valid_dict) - self.agg_tensor_dict.update(col.agg_valid_dict) - self.agg_tensor_dict.update(col.global_output_tensor_dict) + self.agg_tensor_dict.update(agg_out_dict) + # Store the keys for each collaborator - tensor_keys = [] - for tensor_key in agg_out_dict.keys(): - tensor_keys.append(tensor_key) - self.tensor_keys_per_col[str(idx + 1)] = tensor_keys + self.tensor_keys_per_col[str(idx + 1)] = list(agg_out_dict.keys()) + #TODO : Compare the weight from the old expermient, we saw three different sets of weights while running the experiment for single round - # The collaborator data sizes for that task collaborator_weights_unnormalized[col.input] = col.collaborator_task_weight times_per_collaborator[col.input] = col.times_per_collaborator - print(f'Collaborator task weights: {collaborator_weights_unnormalized}') - print(f'Collaborator times: {times_per_collaborator}') + weight_total = sum(collaborator_weights_unnormalized.values()) collaborator_weight_dict = { k: v / weight_total for k, v in collaborator_weights_unnormalized.items() } - print(f'Calculated Collaborator weights: {collaborator_weight_dict}') - print("=" * 40) + logger.info(f'Calculated Collaborator weights: {collaborator_weight_dict} and and times: {times_per_collaborator}') + agg_tensor_keys = [] for col,tensor_keys in self.tensor_keys_per_col.items(): for tensor_key in tensor_keys: tensor_name, origin, round_number, report, tags = tensor_key - new_tags = change_tags(tags, remove_field=col) - agg_tensor_key = TensorKey(tensor_name, origin, round_number, report, new_tags) - # Aggregates the tensor values for the tensor key and stores it in tensor_db - agg_results = agg_tensor_db.get_aggregated_tensor( - agg_tensor_key, - collaborator_weight_dict, - aggregation_function=self.aggregation_type, - ) + if col in tags: + new_tags = change_tags(tags, remove_field=col) + agg_tensor_key = TensorKey(tensor_name, origin, round_number, report, new_tags) + # Aggregates the tensor values for the tensor key and stores it in tensor_db + if agg_tensor_key not in self.agg_tensor_dict: + agg_results = agg_tensor_db.get_aggregated_tensor( + agg_tensor_key, + collaborator_weight_dict, + aggregation_function=self.aggregation_type, + ) + self.agg_tensor_dict[agg_tensor_key] = agg_tensor_db.get_tensor_from_cache(agg_tensor_key) + agg_tensor_keys.append(agg_tensor_key) + #cleaningup aggregated tensor dict based on db store rounds if self.current_round > self.db_store_rounds: col_tensor_key_to_be_deleted = return_cleanup_key(tensor_key, col, round_data_to_delete) @@ -344,18 +376,11 @@ def join(self, inputs): self.agg_tensor_dict.pop(col_tensor_key_to_be_deleted) if agg_tensor_key_to_be_deleted in self.agg_tensor_dict: self.agg_tensor_dict.pop(agg_tensor_key_to_be_deleted) - self.agg_tensor_dict[agg_tensor_key] = agg_tensor_db.get_tensor_from_cache(agg_tensor_key) - round_loss = get_metric('valid_loss', self.current_round, agg_tensor_db) - round_dice = get_metric('valid_dice', self.current_round, agg_tensor_db) - dice_label_0 = get_metric('valid_dice_per_label_0', self.current_round, agg_tensor_db) - dice_label_1 = get_metric('valid_dice_per_label_1', self.current_round, agg_tensor_db) - dice_label_2 = get_metric('valid_dice_per_label_2', self.current_round, agg_tensor_db) - dice_label_4 = get_metric('valid_dice_per_label_4', self.current_round, agg_tensor_db) - if self.include_validation_with_hausdorff: - hausdorff95_label_0 = get_metric('valid_hd95_per_label_0', self.current_round, agg_tensor_db) - hausdorff95_label_1 = get_metric('valid_hd95_per_label_1', self.current_round, agg_tensor_db) - hausdorff95_label_2 = get_metric('valid_hd95_per_label_2', self.current_round, agg_tensor_db) - hausdorff95_label_4 = get_metric('valid_hd95_per_label_4', self.current_round, agg_tensor_db) + + self.tensor_keys_per_col['aggregator'] = agg_tensor_keys + + for key in self.agg_tensor_dict.keys(): + print(f'[Kush Aggregated Tensor Dictionary] Keys : {key}') times_list = [(t, col) for col, t in times_per_collaborator.items()] times_list = sorted(times_list) @@ -364,6 +389,21 @@ def join(self, inputs): round_time = max([t for t, _ in times_list]) self.total_simulated_time += round_time + ## CONVERGENCE METRIC COMPUTATION + # update the auc score + self.best_dice_over_time_auc += self.best_dice * round_time + + # project the auc score as remaining time * best dice + # this projection assumes that the current best score is carried forward for the entire week + projected_auc = (MAX_SIMULATION_TIME - self.total_simulated_time) * self.best_dice + self.best_dice_over_time_auc + projected_auc /= MAX_SIMULATION_TIME + + # update metrics and results + summary = "" + round_dice = 0 + update_metrics(self.current_round, agg_tensor_db, summary. self.experiment_results, + self.include_validation_with_hausdorff, self.total_simulated_time, round_dice, projected_auc) + if self.best_dice < round_dice: self.best_dice = round_dice # Set the weights for the final model @@ -377,46 +417,7 @@ def join(self, inputs): shutil.copyfile(src=f'checkpoint/{self.checkpoint_folder}/temp_model.pkl',dst=f'checkpoint/{self.checkpoint_folder}/best_model.pkl') logger.info(f'Saved model with best average binary DICE: {self.best_dice} to checkpoint/{self.checkpoint_folder}/best_model.pkl') - ## CONVERGENCE METRIC COMPUTATION - # update the auc score - self.best_dice_over_time_auc += self.best_dice * round_time - - # project the auc score as remaining time * best dice - # this projection assumes that the current best score is carried forward for the entire week - projected_auc = (MAX_SIMULATION_TIME - self.total_simulated_time) * self.best_dice + self.best_dice_over_time_auc - projected_auc /= MAX_SIMULATION_TIME - - # # End of round summary - summary = '"**** END OF ROUND {} SUMMARY *****"'.format(self.current_round) - summary += "\n\tSimulation Time: {} minutes".format(round(self.total_simulated_time / 60, 2)) - summary += "\n\t(Projected) Convergence Score: {}".format(projected_auc) - summary += "\n\tRound Loss: {}".format(round_loss) - summary += "\n\tRound Dice: {}".format(round_dice) - summary += "\n\tDICE Label 0: {}".format(dice_label_0) - summary += "\n\tDICE Label 1: {}".format(dice_label_1) - summary += "\n\tDICE Label 2: {}".format(dice_label_2) - summary += "\n\tDICE Label 4: {}".format(dice_label_4) - if self.include_validation_with_hausdorff: - summary += "\n\tHausdorff95 Label 0: {}".format(hausdorff95_label_0) - summary += "\n\tHausdorff95 Label 1: {}".format(hausdorff95_label_1) - summary += "\n\tHausdorff95 Label 2: {}".format(hausdorff95_label_2) - summary += "\n\tHausdorff95 Label 4: {}".format(hausdorff95_label_4) - logger.info(summary) - - self.experiment_results['round'].append(self.current_round) - self.experiment_results['time'].append(self.total_simulated_time) - self.experiment_results['convergence_score'].append(projected_auc) - self.experiment_results['round_dice'].append(round_dice) - self.experiment_results['dice_label_0'].append(dice_label_0) - self.experiment_results['dice_label_1'].append(dice_label_1) - self.experiment_results['dice_label_2'].append(dice_label_2) - self.experiment_results['dice_label_4'].append(dice_label_4) - if self.include_validation_with_hausdorff: - self.experiment_results['hausdorff95_label_0'].append(hausdorff95_label_0) - self.experiment_results['hausdorff95_label_1'].append(hausdorff95_label_1) - self.experiment_results['hausdorff95_label_2'].append(hausdorff95_label_2) - self.experiment_results['hausdorff95_label_4'].append(hausdorff95_label_4) - + # cache the aggregated tensor_dict cache_tensor_dict(self.agg_tensor_dict, agg_tensor_db, 0, {}) if self.save_checkpoints: @@ -429,6 +430,7 @@ def join(self, inputs): self.best_dice_over_time_auc, self.collaborators_chosen_each_round, self.collaborator_times_per_round, + self.tensor_keys_per_col, self.experiment_results, summary) @@ -458,7 +460,7 @@ def join(self, inputs): @aggregator def internal_loop(self): - if self.current_round == self.n_rounds: + if self.current_round >= self.n_rounds: print('************* EXPERIMENT COMPLETED *************') print('Experiment results:') print(pd.DataFrame.from_dict(self.experiment_results)) From 7f6ad419ce71ec412d28ee339a936f1e6477f9be Mon Sep 17 00:00:00 2001 From: "Agrawal, Kush" Date: Mon, 24 Mar 2025 08:32:46 -0700 Subject: [PATCH 13/16] Updated requirements.txt file Signed-off-by: Agrawal, Kush --- Task_1/FeTS_Challenge.py | 6 +++--- Task_1/README.md | 13 ++++++------- Task_1/fets_challenge/fets_flow.py | 6 ++++-- Task_1/generate_predictions.py | 2 +- Task_1/requirements.txt | 12 ++++++++++++ 5 files changed, 26 insertions(+), 13 deletions(-) create mode 100644 Task_1/requirements.txt diff --git a/Task_1/FeTS_Challenge.py b/Task_1/FeTS_Challenge.py index fb0af22..91ba572 100644 --- a/Task_1/FeTS_Challenge.py +++ b/Task_1/FeTS_Challenge.py @@ -529,7 +529,7 @@ def FedAvgM_Selection(local_tensors, institution_split_csv_filename = 'small_split.csv' # change this to point to the parent directory of the data -brats_training_data_parent_dir = '/home/ad_kagrawa2/Data/MICCAI_FeTS2022_TrainingData' #TODO revert to '/raid/datasets/FeTS22/MICCAI_FeTS2022_TrainingData' before raising the PR +brats_training_data_parent_dir = '/home/ad_tbanda/code/fedAI/MICCAI_FeTS2022_TrainingData' #TODO revert to '/raid/datasets/FeTS22/MICCAI_FeTS2022_TrainingData' before raising the PR # increase this if you need a longer history for your algorithms # decrease this if you need to reduce system RAM consumption @@ -572,7 +572,7 @@ def FedAvgM_Selection(local_tensors, shutil.copy2(source_file, destination_file) try: os.chdir(working_directory) - logger.info("Directory changed to:", os.getcwd()) + logger.info(f"Directory changed to : {os.getcwd()}") except FileNotFoundError: logger.info("Error: Directory not found.") except PermissionError: @@ -605,7 +605,7 @@ def FedAvgM_Selection(local_tensors, #checkpoint_folder='experiment_1' #data_path = -data_path = '/home/ad_kagrawa2/Data/MICCAI_FeTS2022_ValidationData' #TODO revert to '/home/brats/MICCAI_FeTS2022_ValidationData' before raising the PR +data_path = '/home/ad_tbanda/code/fedAI/MICCAI_FeTS2022_ValidationData' #TODO revert to '/home/brats/MICCAI_FeTS2022_ValidationData' before raising the PR validation_csv_filename = 'validation.csv' # you can keep these the same if you wish diff --git a/Task_1/README.md b/Task_1/README.md index ab80043..858a19c 100644 --- a/Task_1/README.md +++ b/Task_1/README.md @@ -20,17 +20,16 @@ Please ask any additional questions in our discussion pages on our github site a 2. ```git clone https://github.com/FETS-AI/Challenge.git``` 3. ```cd Challenge/Task_1``` 4. ```git lfs pull``` -5. Create virtual environment (python 3.6-3.8): using Anaconda, a new environment can be created and activated using the following commands: +5. Create virtual environment (python 3.8-3.13): using python venv, a new environment can be created and activated using the following commands: ```sh ## create venv in specific path - conda create -p ./venv python=3.7 -y - conda activate ./venv + python -m venv venv + source venv/bin/activate ``` 6. ```pip install --upgrade pip``` -7. Install Pytorch LTS (1.8.2) for your system (use CUDA 11): - ```pip3 install torch==1.8.2 torchvision==0.9.2 torchaudio==0.8.2 --extra-index-url https://download.pytorch.org/whl/lts/1.8/cu111``` -*Note all previous versions of pytorch can be found in [these instructions]([https://pytorch.org/get-started/locally/](https://pytorch.org/get-started/previous-versions/)) -9. Set the environment variable `SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True` (to avoid sklearn deprecation error) +7. Install dependent pip libraries: + ```pip install -r requirements.txt``` +9. Set the environment variable `export SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True` (to avoid sklearn deprecation error) 10. ```pip install .``` > * _Note: if you run into ```ERROR: Failed building wheel for SimpleITK```, try running ```pip install SimpleITK --only-binary :all:``` then rerunning ```pip install .```_ 10. ```python FeTS_Challenge.py``` diff --git a/Task_1/fets_challenge/fets_flow.py b/Task_1/fets_challenge/fets_flow.py index d951f0e..8b26f2a 100644 --- a/Task_1/fets_challenge/fets_flow.py +++ b/Task_1/fets_challenge/fets_flow.py @@ -42,7 +42,9 @@ def cache_tensor_dict(tensor_dict, agg_tensor_db, idx, agg_out_dict): agg_tensor_db.cache_tensor(agg_out_dict) def return_cleanup_key(tensor_key, col, round_data_to_delete): - new_tags = change_tags(tensor_key.tags, remove_field=col) + new_tags = tensor_key.tags + if col in tensor_key.tags: + new_tags = change_tags(tensor_key.tags, remove_field=col) modified_key = TensorKey( tensor_name=tensor_key.tensor_name, origin=col, @@ -401,7 +403,7 @@ def join(self, inputs): # update metrics and results summary = "" round_dice = 0 - update_metrics(self.current_round, agg_tensor_db, summary. self.experiment_results, + update_metrics(self.current_round, agg_tensor_db, summary, self.experiment_results, self.include_validation_with_hausdorff, self.total_simulated_time, round_dice, projected_auc) if self.best_dice < round_dice: diff --git a/Task_1/generate_predictions.py b/Task_1/generate_predictions.py index 4c5a570..c4de46d 100644 --- a/Task_1/generate_predictions.py +++ b/Task_1/generate_predictions.py @@ -23,7 +23,7 @@ # the data you want to run inference over checkpoint_folder='experiment_1' #data_path = -data_path = '/home/ad_kagrawa2/Data/MICCAI_FeTS2022_ValidationData' +data_path = '/home/ad_tbanda/code/fedAI/MICCAI_FeTS2022_ValidationData' #TODO revert to '/home/brats/MICCAI_FeTS2022_ValidationData' before raising the PR # you can keep these the same if you wish best_model_path = os.path.join(home, '.local/workspace/checkpoint', checkpoint_folder, 'best_model.pkl') diff --git a/Task_1/requirements.txt b/Task_1/requirements.txt new file mode 100644 index 0000000..cd27f40 --- /dev/null +++ b/Task_1/requirements.txt @@ -0,0 +1,12 @@ +chardet +charset-normalizer +dill==0.3.6 +matplotlib>=2.0.0 +metaflow==2.7.15 +nbdev==2.3.12 +nbformat==5.10.4 +ray==2.9.2 +tabulate==0.9.0 +torch==2.3.1 +torchvision==0.18.1 +fastcore==1.5.29 \ No newline at end of file From 8f80b5470b937f54d3e670a1692cfe887d1263f0 Mon Sep 17 00:00:00 2001 From: "Agrawal, Kush" Date: Mon, 24 Mar 2025 11:24:36 -0700 Subject: [PATCH 14/16] Update comments and typos Signed-off-by: Tarunkumar, Banda --- Task_1/FeTS_Challenge.py | 24 ++- Task_1/README.md | 2 +- .../fets_challenge/config/gandlf_config.yaml | 2 +- Task_1/fets_challenge/experiment.py | 18 +- Task_1/fets_challenge/fets_challenge_model.py | 14 +- Task_1/fets_challenge/fets_flow.py | 194 +++++++++++------- Task_1/fets_challenge/inference.py | 13 +- Task_1/generate_predictions.py | 33 ++- 8 files changed, 168 insertions(+), 132 deletions(-) diff --git a/Task_1/FeTS_Challenge.py b/Task_1/FeTS_Challenge.py index 91ba572..3b73db3 100644 --- a/Task_1/FeTS_Challenge.py +++ b/Task_1/FeTS_Challenge.py @@ -337,7 +337,9 @@ def clipped_aggregation(local_tensors, # first, we need to determine how much each local update has changed the tensor from the previous value # we'll use the tensor_db search function to find the - previous_tensor_value = tensor_db.search(tensor_name=tensor_name, fl_round=fl_round, tags=('model',), origin='aggregator') + previous_tensor_value = tensor_db.search(tensor_name=tensor_name, fl_round=fl_round, tags=('trained',), origin='aggregator') + logger.info(f"Tensor Values {previous_tensor_value}") + logger.info(f"Tensor Values Shape {previous_tensor_value.shape[0]}") if previous_tensor_value.shape[0] > 1: logger.info(previous_tensor_value) @@ -436,6 +438,8 @@ def FedAvgM_Selection(local_tensors, # Calculate aggregator's last value previous_tensor_value = None for _, record in tensor_db.iterrows(): + print(f'record tags {record["tags"]} record round {record["round"]} record tensor_name {record["tensor_name"]}') + print(f'fl_round {fl_round} tensor_name {tensor_name}') if (record['round'] == fl_round and record["tensor_name"] == tensor_name and record["tags"] == ("aggregated",)): @@ -512,7 +516,7 @@ def FedAvgM_Selection(local_tensors, # change any of these you wish to your custom functions. You may leave defaults if you wish. -aggregation_function = weighted_average_aggregation +aggregation_function = FedAvgM_Selection choose_training_collaborators = all_collaborators_train training_hyper_parameters_for_round = constant_hyper_parameters @@ -521,7 +525,7 @@ def FedAvgM_Selection(local_tensors, # to those you specify immediately above. Changing the below value to False will change # this fact, excluding the three hausdorff measurements. As hausdorff distance is # expensive to compute, excluding them will speed up your experiments. -include_validation_with_hausdorff=False #TODO change it to True +include_validation_with_hausdorff=True #TODO change it to True # We encourage participants to experiment with partitioning_1 and partitioning_2, as well as to create # other partitionings to test your changes for generalization to multiple partitionings. @@ -529,7 +533,7 @@ def FedAvgM_Selection(local_tensors, institution_split_csv_filename = 'small_split.csv' # change this to point to the parent directory of the data -brats_training_data_parent_dir = '/home/ad_tbanda/code/fedAI/MICCAI_FeTS2022_TrainingData' #TODO revert to '/raid/datasets/FeTS22/MICCAI_FeTS2022_TrainingData' before raising the PR +brats_training_data_parent_dir = '/home/ad_kagrawa2/Data/MICCAI_FeTS2022_TrainingData' # increase this if you need a longer history for your algorithms # decrease this if you need to reduce system RAM consumption @@ -546,6 +550,9 @@ def FedAvgM_Selection(local_tensors, # The checkpoints can grow quite large (5-10GB) so only the latest will be saved when this parameter is enabled save_checkpoints = True +# (str) Determines the backend process to use for the experiment.(single_process, ray) +backend_process = 'single_process' + # path to previous checkpoint folder for experiment that was stopped before completion. # Checkpoints are stored in ~/.local/workspace/checkpoint, and you should provide the experiment directory # relative to this path (i.e. 'experiment_1'). Please note that if you restore from a checkpoint, @@ -582,14 +589,15 @@ def FedAvgM_Selection(local_tensors, aggregation_function=aggregation_function, choose_training_collaborators=choose_training_collaborators, training_hyper_parameters_for_round=training_hyper_parameters_for_round, - include_validation_with_hausdorff=include_validation_with_hausdorff, institution_split_csv_filename=institution_split_csv_filename, brats_training_data_parent_dir=brats_training_data_parent_dir, db_store_rounds=db_store_rounds, rounds_to_train=rounds_to_train, device=device, save_checkpoints=save_checkpoints, - restore_from_checkpoint_folder = restore_from_checkpoint_folder) + restore_from_checkpoint_folder = restore_from_checkpoint_folder, + include_validation_with_hausdorff=include_validation_with_hausdorff, + backend_process = backend_process) # ## Produce NIfTI files for best model outputs on the validation set @@ -603,9 +611,8 @@ def FedAvgM_Selection(local_tensors, # you will need to specify the correct experiment folder and the parent directory for # the data you want to run inference over (assumed to be the experiment that just completed) -#checkpoint_folder='experiment_1' #data_path = -data_path = '/home/ad_tbanda/code/fedAI/MICCAI_FeTS2022_ValidationData' #TODO revert to '/home/brats/MICCAI_FeTS2022_ValidationData' before raising the PR +data_path = '/home/ad_kagrawa2/Data/MICCAI_FeTS2022_ValidationData' validation_csv_filename = 'validation.csv' # you can keep these the same if you wish @@ -625,7 +632,6 @@ def FedAvgM_Selection(local_tensors, # Using this best model, we can now produce NIfTI files for model outputs # using a provided data directory - model_outputs_to_disc(data_path=data_path, validation_csv=validation_csv_filename, output_path=outputs_path, diff --git a/Task_1/README.md b/Task_1/README.md index 858a19c..96972d4 100644 --- a/Task_1/README.md +++ b/Task_1/README.md @@ -20,7 +20,7 @@ Please ask any additional questions in our discussion pages on our github site a 2. ```git clone https://github.com/FETS-AI/Challenge.git``` 3. ```cd Challenge/Task_1``` 4. ```git lfs pull``` -5. Create virtual environment (python 3.8-3.13): using python venv, a new environment can be created and activated using the following commands: +5. Create virtual environment (python 3.10-3.13): using python venv, a new environment can be created and activated using the following commands: ```sh ## create venv in specific path python -m venv venv diff --git a/Task_1/fets_challenge/config/gandlf_config.yaml b/Task_1/fets_challenge/config/gandlf_config.yaml index eb03963..1cecae5 100644 --- a/Task_1/fets_challenge/config/gandlf_config.yaml +++ b/Task_1/fets_challenge/config/gandlf_config.yaml @@ -56,7 +56,7 @@ scaling_factor: 1 scheduler: type: triangle_modified track_memory_usage: false -verbose: false +verbose: True version: maximum: 0.1.0 minimum: 0.0.14 diff --git a/Task_1/fets_challenge/experiment.py b/Task_1/fets_challenge/experiment.py index 31c34b0..f3d846b 100644 --- a/Task_1/fets_challenge/experiment.py +++ b/Task_1/fets_challenge/experiment.py @@ -54,7 +54,8 @@ def run_challenge_experiment(aggregation_function, save_checkpoints=True, restore_from_checkpoint_folder=None, include_validation_with_hausdorff=True, - use_pretrained_model=False): + use_pretrained_model=False, + backend_process='single_process'): file = Path(__file__).resolve() root = file.parent.resolve() # interface root, containing command modules @@ -112,18 +113,18 @@ def run_challenge_experiment(aggregation_function, db_store_rounds=db_store_rounds) local_runtime = LocalRuntime( - aggregator=aggregator, collaborators=collaborators, backend="single_process", num_actors=1 + aggregator=aggregator, collaborators=collaborators, backend=backend_process, num_actors=1 ) logger.info(f"Local runtime collaborators = {local_runtime.collaborators}") params_dict = {"include_validation_with_hausdorff": include_validation_with_hausdorff, - "choose_training_collaborators": choose_training_collaborators, #TODO verify with different collaborators and check if works? + "choose_training_collaborators": choose_training_collaborators, "training_hyper_parameters_for_round": training_hyper_parameters_for_round, "restore_from_checkpoint_folder": restore_from_checkpoint_folder, "save_checkpoints": save_checkpoints} - model = FeTSChallengeModel(gandlf_config_path) + model = FeTSChallengeModel() flflow = FeTSFederatedFlow( model, params_dict, @@ -144,13 +145,4 @@ def run_challenge_experiment(aggregation_function, # checkpoint = torch.load(f'{root}/pretrained_model/resunet_pretrained.pth') # task_runner.model.load_state_dict(checkpoint['model_state_dict']) # task_runner.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) - - # # Initialize model weights - # #TODO [Workflow - API] How to set the initial state in the workflow -> check if it needed to be done in workflow - # init_state_path = plan.config['aggregator']['settings']['init_state_path'] - # tensor_dict, _ = split_tensor_dict_for_holdouts(logger, task_runner.get_tensor_dict(False)) - # model_snap = utils.construct_model_proto(tensor_dict=tensor_dict, - # round_number=0, - # tensor_pipe=tensor_pipe) - # utils.dump_proto(model_proto=model_snap, fpath=init_state_path) return aggregator.private_attributes["checkpoint_folder"] \ No newline at end of file diff --git a/Task_1/fets_challenge/fets_challenge_model.py b/Task_1/fets_challenge/fets_challenge_model.py index 77c3c63..044641a 100644 --- a/Task_1/fets_challenge/fets_challenge_model.py +++ b/Task_1/fets_challenge/fets_challenge_model.py @@ -37,15 +37,11 @@ class FeTSChallengeModel(): device (str): Device for the model. training_round_completed (bool): Whether the training round has been completed. - required_tensorkeys_for_function (dict): Required tensorkeys for - function. tensor_dict_split_fn_kwargs (dict): Keyword arguments for the tensor dict split function. """ - def __init__( - self, gandlf_config_path - ): + def __init__(self): """Initializes the GaNDLFTaskRunner object. Sets up the initial state of the GaNDLFTaskRunner object, initializing @@ -65,7 +61,6 @@ def __init__( self.device = None self.training_round_completed = False - self.required_tensorkeys_for_function = {} self.logger = getLogger(__name__) # FIXME: why isn't this initial call in runner_pt? @@ -133,9 +128,6 @@ def validate(self, col_name, round_num, val_loader, use_tqdm=False, **kwargs): mode="validation", ) - print(f"Validation loss: {epoch_valid_loss}") - print(f"Validation metric: {epoch_valid_metric}") - origin = col_name suffix = 'validate' if kwargs['apply'] == 'local': @@ -452,9 +444,6 @@ def create_tensorkey_dicts( logger, tensor_dict, **tensor_dict_split_fn_kwargs ) - # global_model_dict : [{x: np1}, {x1: np2}] - # global_tensorkey_model_dict : [{tk1: np1}, {tk2: np2}] - # Create global tensorkeys global_tensorkey_model_dict = { TensorKey(tensor_name, origin, round_num, False, tags): nparray @@ -502,6 +491,7 @@ def set_pt_model_from_tensor_dict(model, tensor_dict, device, with_opt_vars=Fals # Grabbing keys from model's state_dict helps to confirm we have # everything for k in model.state_dict(): + #print(f" Fetching state for key = {k} Value : {tensor_dict[k]}") new_state[k] = pt.from_numpy(tensor_dict.pop(k)).to(device) # set model state diff --git a/Task_1/fets_challenge/fets_flow.py b/Task_1/fets_challenge/fets_flow.py index 8b26f2a..cf83680 100644 --- a/Task_1/fets_challenge/fets_flow.py +++ b/Task_1/fets_challenge/fets_flow.py @@ -54,58 +54,76 @@ def return_cleanup_key(tensor_key, col, round_data_to_delete): ) return modified_key -def get_aggregated_dict_with_tensorname(agg_tensor_dict): +def get_aggregated_dict_with_tensorname(agg_tensor_dict, current_round=0): agg_dict_with_tensornames = {} for tensor_key, value in agg_tensor_dict.items(): - tensor_name, origin, round_number, report, tags = tensor_key - agg_dict_with_tensornames[tensor_name] = value + print(f"Tags : {tensor_key.tags}") + if 'aggregated' in tensor_key.tags or current_round == 0: + tensor_name, origin, round_number, report, tags = tensor_key + print(f"Tensor Name : {tensor_name}") + agg_dict_with_tensornames[tensor_name] = value return agg_dict_with_tensornames def update_metrics(current_round, agg_tensor_db, summary, experiment_results, include_validation_with_hausdorff, +<<<<<<< HEAD + total_simulated_time, projected_auc): +======= total_simulated_time, round_dice, projected_auc): - - round_loss = get_metric('valid_loss', current_round, agg_tensor_db) - round_dice = get_metric('valid_dice', current_round, agg_tensor_db) - dice_label_0 = get_metric('valid_dice_per_label_0', current_round, agg_tensor_db) - dice_label_1 = get_metric('valid_dice_per_label_1', current_round, agg_tensor_db) - dice_label_2 = get_metric('valid_dice_per_label_2', current_round, agg_tensor_db) - dice_label_4 = get_metric('valid_dice_per_label_4', current_round, agg_tensor_db) - if include_validation_with_hausdorff: - hausdorff95_label_0 = get_metric('valid_hd95_per_label_0', current_round, agg_tensor_db) - hausdorff95_label_1 = get_metric('valid_hd95_per_label_1', current_round, agg_tensor_db) - hausdorff95_label_2 = get_metric('valid_hd95_per_label_2', current_round, agg_tensor_db) - hausdorff95_label_4 = get_metric('valid_hd95_per_label_4', current_round, agg_tensor_db) +>>>>>>> 74cc059... Fixed typo + + round_loss = get_metric('valid_loss', current_round, agg_tensor_db) + round_dice = get_metric('valid_dice', current_round, agg_tensor_db) + dice_label_0 = get_metric('valid_dice_per_label_0', current_round, agg_tensor_db) + dice_label_1 = get_metric('valid_dice_per_label_1', current_round, agg_tensor_db) + dice_label_2 = get_metric('valid_dice_per_label_2', current_round, agg_tensor_db) + dice_label_4 = get_metric('valid_dice_per_label_4', current_round, agg_tensor_db) + if include_validation_with_hausdorff: + hausdorff95_label_0 = get_metric('valid_hd95_per_label_0', current_round, agg_tensor_db) + hausdorff95_label_1 = get_metric('valid_hd95_per_label_1', current_round, agg_tensor_db) + hausdorff95_label_2 = get_metric('valid_hd95_per_label_2', current_round, agg_tensor_db) + hausdorff95_label_4 = get_metric('valid_hd95_per_label_4', current_round, agg_tensor_db) +<<<<<<< HEAD + + # # End of round summary +======= - # # End of round summary - summary = '"**** END OF ROUND {} SUMMARY *****"'.format(current_round) - summary += "\n\tSimulation Time: {} minutes".format(round(total_simulated_time / 60, 2)) - summary += "\n\t(Projected) Convergence Score: {}".format(projected_auc) - summary += "\n\tRound Loss: {}".format(round_loss) - summary += "\n\tRound Dice: {}".format(round_dice) - summary += "\n\tDICE Label 0: {}".format(dice_label_0) - summary += "\n\tDICE Label 1: {}".format(dice_label_1) - summary += "\n\tDICE Label 2: {}".format(dice_label_2) - summary += "\n\tDICE Label 4: {}".format(dice_label_4) - if include_validation_with_hausdorff: - summary += "\n\tHausdorff95 Label 0: {}".format(hausdorff95_label_0) - summary += "\n\tHausdorff95 Label 1: {}".format(hausdorff95_label_1) - summary += "\n\tHausdorff95 Label 2: {}".format(hausdorff95_label_2) - summary += "\n\tHausdorff95 Label 4: {}".format(hausdorff95_label_4) - logger.info(summary) - - experiment_results['round'].append(current_round) - experiment_results['time'].append(total_simulated_time) - experiment_results['convergence_score'].append(projected_auc) - experiment_results['round_dice'].append(round_dice) - experiment_results['dice_label_0'].append(dice_label_0) - experiment_results['dice_label_1'].append(dice_label_1) - experiment_results['dice_label_2'].append(dice_label_2) - experiment_results['dice_label_4'].append(dice_label_4) - if include_validation_with_hausdorff: - experiment_results['hausdorff95_label_0'].append(hausdorff95_label_0) - experiment_results['hausdorff95_label_1'].append(hausdorff95_label_1) - experiment_results['hausdorff95_label_2'].append(hausdorff95_label_2) - experiment_results['hausdorff95_label_4'].append(hausdorff95_label_4) + # End of round summary +>>>>>>> 74cc059... Fixed typo + summary = '"**** END OF ROUND {} SUMMARY *****"'.format(current_round) + summary += "\n\tSimulation Time: {} minutes".format(round(total_simulated_time / 60, 2)) + summary += "\n\t(Projected) Convergence Score: {}".format(projected_auc) + summary += "\n\tRound Loss: {}".format(round_loss) + summary += "\n\tRound Dice: {}".format(round_dice) + summary += "\n\tDICE Label 0: {}".format(dice_label_0) + summary += "\n\tDICE Label 1: {}".format(dice_label_1) + summary += "\n\tDICE Label 2: {}".format(dice_label_2) + summary += "\n\tDICE Label 4: {}".format(dice_label_4) + if include_validation_with_hausdorff: + summary += "\n\tHausdorff95 Label 0: {}".format(hausdorff95_label_0) + summary += "\n\tHausdorff95 Label 1: {}".format(hausdorff95_label_1) + summary += "\n\tHausdorff95 Label 2: {}".format(hausdorff95_label_2) + summary += "\n\tHausdorff95 Label 4: {}".format(hausdorff95_label_4) + logger.info(summary) + + experiment_results['round'].append(current_round) + experiment_results['time'].append(total_simulated_time) + experiment_results['convergence_score'].append(projected_auc) + experiment_results['round_dice'].append(round_dice) + experiment_results['dice_label_0'].append(dice_label_0) + experiment_results['dice_label_1'].append(dice_label_1) + experiment_results['dice_label_2'].append(dice_label_2) + experiment_results['dice_label_4'].append(dice_label_4) + if include_validation_with_hausdorff: + experiment_results['hausdorff95_label_0'].append(hausdorff95_label_0) + experiment_results['hausdorff95_label_1'].append(hausdorff95_label_1) + experiment_results['hausdorff95_label_2'].append(hausdorff95_label_2) + experiment_results['hausdorff95_label_4'].append(hausdorff95_label_4) +<<<<<<< HEAD + + return summary, round_dice +======= + return round_dice +>>>>>>> 74cc059... Fixed typo collaborator_data_loaders = {} @@ -115,7 +133,7 @@ def __init__(self, fets_model, params_dict, rounds=5 , device="cpu", **kwargs): self.fets_model = fets_model self.n_rounds = rounds self.device = device - self.current_round = 1 + self.current_round = 0 self.total_simulated_time = 0 self.best_dice = -1.0 self.best_dice_over_time_auc = 0 @@ -163,7 +181,6 @@ def start(self): logger.warning(f'Could not find provided checkpoint folder: {self.restore_from_checkpoint_folder}. Exiting...') exit(1) else: - #TODO : Validate load from checkpoint logic logger.info(f'Attempting to load last completed round from {self.restore_from_checkpoint_folder}') state = load_checkpoint(self.restore_from_checkpoint_folder) self.checkpoint_folder = self.restore_from_checkpoint_folder @@ -188,20 +205,18 @@ def start(self): #Updating the agg_tensor_dict from stored tensor_db starting_round_num += 1 self.current_round = starting_round_num - logger.info(f'Loading checkpoint from round {self.tensor_keys_per_col}') for col,tensor_keys in self.tensor_keys_per_col.items(): for tensor_key in tensor_keys: tensor_name, _, _, _, _ = tensor_key if tensor_name not in self.agg_tensor_dict: self.agg_tensor_dict[tensor_key] = aggregator_tensor_db.get_tensor_from_cache(tensor_key) - logger.info(f'Loaded tensor key {tensor_key}') if self.current_round >= self.n_rounds: logger.info("Experiment already completed. Exiting...") self.next(self.end) - - self.collaborator_time_stats = gen_collaborator_time_stats(self.collaborator_names) - self.next(self.fetch_parameters_for_colls) + else: + self.collaborator_time_stats = gen_collaborator_time_stats(self.collaborator_names) + self.next(self.fetch_parameters_for_colls) @aggregator def fetch_parameters_for_colls(self): @@ -235,7 +250,7 @@ def fetch_parameters_for_colls(self): logger.info('Collaborators chosen to train for round {}:\n\t{}'.format(self.current_round, self.training_collaborators)) self.collaborators_chosen_each_round[self.current_round] = self.training_collaborators - if self.current_round == 1 or self.restored is True: + if self.current_round == 0 or self.restored is True: self.next(self.initialize_colls, foreach='collaborators') self.restored = False else: @@ -275,8 +290,6 @@ def initialize_colls(self): logger.info(f'Initializing dataloaders for collaborator {self.input}') collaborator_data_loaders[self.input] = FeTSDataLoader(train_loader, val_loader) - - #TODO Validate the times per collaborator is calculated based on the random values, it doesn't look like the actual time taken by the collaborator self.times_per_collaborator = compute_times_per_collaborator(self.input, self.training_collaborators, self.hparam_dict['epochs_per_round'], @@ -299,7 +312,7 @@ def initialize_colls(self): @collaborator def aggregated_model_validation(self): logger.info(f'Performing aggregated model validation for collaborator {self.input}') - input_tensor_dict = get_aggregated_dict_with_tensorname(self.agg_tensor_dict) + input_tensor_dict = get_aggregated_dict_with_tensorname(self.agg_tensor_dict, self.current_round) val_loader = collaborator_data_loaders[self.input].get_valid_loader() self.fets_model.rebuild_model(self.current_round, input_tensor_dict) self.agg_valid_dict, _ = self.fets_model.validate(self.input, self.current_round, val_loader, apply="global") @@ -330,9 +343,11 @@ def join(self, inputs): if self.current_round > self.db_store_rounds: round_data_to_delete = self.current_round - self.db_store_rounds self.aggregation_type.set_state_data_for_round(self.collaborators_chosen_each_round, self.collaborator_times_per_round) + agg_tensor_db = TensorDB() collaborator_weights_unnormalized = {} times_per_collaborator = {} + cache_tensor_dict(self.agg_tensor_dict, agg_tensor_db, 0, {}) for idx, col in enumerate(inputs): logger.info(f'Aggregating results for {idx}') agg_out_dict = {} @@ -343,8 +358,6 @@ def join(self, inputs): # Store the keys for each collaborator self.tensor_keys_per_col[str(idx + 1)] = list(agg_out_dict.keys()) - - #TODO : Compare the weight from the old expermient, we saw three different sets of weights while running the experiment for single round collaborator_weights_unnormalized[col.input] = col.collaborator_task_weight times_per_collaborator[col.input] = col.times_per_collaborator @@ -360,30 +373,35 @@ def join(self, inputs): if col in tags: new_tags = change_tags(tags, remove_field=col) agg_tensor_key = TensorKey(tensor_name, origin, round_number, report, new_tags) - # Aggregates the tensor values for the tensor key and stores it in tensor_db - if agg_tensor_key not in self.agg_tensor_dict: + # Aggregates the tensor values for the tensor key and stores it in tensor_db. Checks if the tensor is already in tensor_db + if agg_tensor_db.get_tensor_from_cache(agg_tensor_key) is None: agg_results = agg_tensor_db.get_aggregated_tensor( agg_tensor_key, collaborator_weight_dict, aggregation_function=self.aggregation_type, ) - self.agg_tensor_dict[agg_tensor_key] = agg_tensor_db.get_tensor_from_cache(agg_tensor_key) - agg_tensor_keys.append(agg_tensor_key) + agg_tag_tk = TensorKey(tensor_name, origin, round_number, report, ("aggregated",)) + agg_tensor_db.cache_tensor({agg_tag_tk: agg_results}) + # self.agg_tensor_dict[agg_tag_tk] = agg_results + # self.agg_tensor_dict[agg_tensor_key] = agg_results + # agg_tensor_keys.append(agg_tensor_key) + # agg_tensor_keys.append(agg_tag_tk) #cleaningup aggregated tensor dict based on db store rounds - if self.current_round > self.db_store_rounds: - col_tensor_key_to_be_deleted = return_cleanup_key(tensor_key, col, round_data_to_delete) - agg_tensor_key_to_be_deleted = TensorKey(tensor_name, origin, round_data_to_delete, report, new_tags) - if col_tensor_key_to_be_deleted in self.agg_tensor_dict: - self.agg_tensor_dict.pop(col_tensor_key_to_be_deleted) - if agg_tensor_key_to_be_deleted in self.agg_tensor_dict: - self.agg_tensor_dict.pop(agg_tensor_key_to_be_deleted) - + # if (self.current_round + 1) > self.db_store_rounds: + # col_tensor_key_to_be_deleted = return_cleanup_key(tensor_key, col, round_data_to_delete) + # agg_tensor_key_to_be_deleted = TensorKey(tensor_name, origin, round_data_to_delete, report, new_tags) + # agg_tag_key = TensorKey(tensor_name, origin, round_number, report, ("aggregated",)) + # if col_tensor_key_to_be_deleted in self.agg_tensor_dict: + # self.agg_tensor_dict.pop(col_tensor_key_to_be_deleted) + # if agg_tensor_key_to_be_deleted in self.agg_tensor_dict: + # self.agg_tensor_dict.pop(agg_tensor_key_to_be_deleted) + # if agg_tag_key in self.agg_tensor_dict: + # self.agg_tensor_dict.pop(agg_tag_key) + + agg_tensor_db.clean_up(self.db_store_rounds) self.tensor_keys_per_col['aggregator'] = agg_tensor_keys - for key in self.agg_tensor_dict.keys(): - print(f'[Kush Aggregated Tensor Dictionary] Keys : {key}') - times_list = [(t, col) for col, t in times_per_collaborator.items()] times_list = sorted(times_list) @@ -401,15 +419,21 @@ def join(self, inputs): projected_auc /= MAX_SIMULATION_TIME # update metrics and results +<<<<<<< HEAD + #round_dice = 0 + summary, round_dice = update_metrics(self.current_round, agg_tensor_db, self.experiment_results, + self.include_validation_with_hausdorff, self.total_simulated_time, projected_auc) +======= summary = "" round_dice = 0 - update_metrics(self.current_round, agg_tensor_db, summary, self.experiment_results, + round_dice = update_metrics(self.current_round, agg_tensor_db, summary, self.experiment_results, self.include_validation_with_hausdorff, self.total_simulated_time, round_dice, projected_auc) +>>>>>>> 74cc059... Fixed typo if self.best_dice < round_dice: self.best_dice = round_dice # Set the weights for the final model - if self.current_round == 1: + if self.current_round == 0: # here the initial model was validated (temp model does not exist) logger.info(f'Skipping best model saving to disk as it is a random initialization.') elif not os.path.exists(f'checkpoint/{self.checkpoint_folder}/temp_model.pkl'): @@ -419,12 +443,23 @@ def join(self, inputs): shutil.copyfile(src=f'checkpoint/{self.checkpoint_folder}/temp_model.pkl',dst=f'checkpoint/{self.checkpoint_folder}/best_model.pkl') logger.info(f'Saved model with best average binary DICE: {self.best_dice} to checkpoint/{self.checkpoint_folder}/best_model.pkl') - # cache the aggregated tensor_dict - cache_tensor_dict(self.agg_tensor_dict, agg_tensor_db, 0, {}) + # Update the agg_tensor_dict for subsequent rounds with the aggregated tensor_db + self.agg_tensor_dict = {} + for _, record in agg_tensor_db.tensor_db.iterrows(): + print(f'Record tensor_name {record["tensor_name"]}') + print(f'Record origin {record["origin"]}') + print(f'Record round {record["round"]}') + print(f'Record report {record["report"]}') + print(f'Record tags {record["tags"]}') + + tensor_key = TensorKey(record["tensor_name"], record["origin"], record["round"], record["report"], record["tags"]) + self.agg_tensor_dict[tensor_key] = record["nparray"] + + print(f'************************') if self.save_checkpoints: logger.info(f'Saving checkpoint for round {self.current_round} : checkpoint folder {self.checkpoint_folder}') - logger.info(f'To resume from this checkpoint, set the restore_from_checkpoint_folder parameter to \'{self.checkpoint_folder}\'') + logger.info(f'To resume from this checkpoint, set the restore_from_checkpoint_folder parameter to {self.checkpoint_folder}') save_checkpoint(self.checkpoint_folder, agg_tensor_db, self.collaborator_names, self.runtime.collaborators, self.current_round, self.collaborator_time_stats, @@ -455,7 +490,8 @@ def join(self, inputs): self.fets_model.params = inputs[0].fets_model.params # Rebuild the model with the aggregated tensor_dict - local_tensor_dict = get_aggregated_dict_with_tensorname(self.agg_tensor_dict) + local_tensor_dict = get_aggregated_dict_with_tensorname(self.agg_tensor_dict, self.current_round) + print(f'Local Tensor Dict : {local_tensor_dict.keys()}') self.fets_model.rebuild_model(self.current_round, local_tensor_dict) self.fets_model.save_native(f'checkpoint/{self.checkpoint_folder}/temp_model.pkl') self.next(self.internal_loop) diff --git a/Task_1/fets_challenge/inference.py b/Task_1/fets_challenge/inference.py index 3400b08..35b0706 100644 --- a/Task_1/fets_challenge/inference.py +++ b/Task_1/fets_challenge/inference.py @@ -217,23 +217,14 @@ def model_outputs_to_disc(data_path, work = Path.cwd().resolve() generate_validation_csv(data_path,validation_csv, working_dir=work) - - # # overwrite datapath value for a single 'InferenceCol' collaborator - # plan.cols_data_paths['InferenceCol'] = data_path - - # # get the inference data loader - # data_loader = copy(plan).get_data_loader('InferenceCol') - - # # get the task runner, passing the data loader - # task_runner = copy(plan).get_task_runner(data_loader) gandlf_config_path = os.path.join(root, 'config', 'gandlf_config.yaml') - fets_model = FeTSChallengeModel(gandlf_config_path) + fets_model = FeTSChallengeModel() val_csv_path = os.path.join(work, 'validation_paths.csv') gandlf_conf = ConfigManager(gandlf_config_path) ( model, optimizer, - train_loader, + _, val_loader, scheduler, params, diff --git a/Task_1/generate_predictions.py b/Task_1/generate_predictions.py index c4de46d..b6d1900 100644 --- a/Task_1/generate_predictions.py +++ b/Task_1/generate_predictions.py @@ -12,10 +12,11 @@ from pathlib import Path import os from sys import path +from logging import getLogger from fets_challenge.gandlf_csv_adapter import construct_fedsim_csv, extract_csv_partitions device='cpu' - +logger = getLogger(__name__) # infer participant home folder home = str(Path.home()) @@ -23,13 +24,33 @@ # the data you want to run inference over checkpoint_folder='experiment_1' #data_path = -data_path = '/home/ad_tbanda/code/fedAI/MICCAI_FeTS2022_ValidationData' #TODO revert to '/home/brats/MICCAI_FeTS2022_ValidationData' before raising the PR +data_path = '/home/brats/MICCAI_FeTS2022_ValidationData' + +working_directory= os.path.join(home, '.local/workspace/') + +try: + os.chdir(working_directory) + logger.info(f"Directory changed to : {os.getcwd()}") +except FileNotFoundError: + logger.info("Error: Directory not found.") +except PermissionError: + logger.info("Error: Permission denied") + +if checkpoint_folder is not None: + best_model_path = os.path.join(working_directory, 'checkpoint', checkpoint_folder, 'best_model.pkl') +else: + exit("No checkpoint folder found. Please provide a valid checkpoint folder. Exiting the experiment without inferencing") + +# If the experiment is only run for a single round, use the temp model instead +if not Path(best_model_path).exists(): + best_model_path = os.path.join(working_directory, 'checkpoint', checkpoint_folder, 'temp_model.pkl') + +if not Path(best_model_path).exists(): + exit("No model found. Please provide a valid checkpoint folder. Exiting the experiment without inferencing") -# you can keep these the same if you wish -best_model_path = os.path.join(home, '.local/workspace/checkpoint', checkpoint_folder, 'best_model.pkl') -outputs_path = os.path.join(home, '.local/workspace/checkpoint', checkpoint_folder, 'model_outputs') +outputs_path = os.path.join(working_directory, 'checkpoint', checkpoint_folder, 'model_outputs') -validation_csv_filename='validation.csv' +validation_csv_filename=os.path.join(home, '.local/workspace/', 'validation.csv') # Using this best model, we can now produce NIfTI files for model outputs From 91f00beaf59777ea669240d7429ce89ea91854a6 Mon Sep 17 00:00:00 2001 From: "Agrawal, Kush" Date: Wed, 26 Mar 2025 22:05:05 -0700 Subject: [PATCH 15/16] Refactor tensors storing logic Signed-off-by: Agrawal, Kush --- Task_1/FeTS_Challenge.py | 40 +- Task_1/fets_challenge/checkpoint_utils.py | 3 +- .../fets_challenge/config/gandlf_config.yaml | 3 +- Task_1/fets_challenge/experiment.py | 70 +-- Task_1/fets_challenge/fets_challenge_model.py | 67 +- Task_1/fets_challenge/fets_flow.py | 571 +++++++++--------- Task_1/fets_challenge/time_utils.py | 8 - 7 files changed, 364 insertions(+), 398 deletions(-) diff --git a/Task_1/FeTS_Challenge.py b/Task_1/FeTS_Challenge.py index 3b73db3..e29cdb1 100644 --- a/Task_1/FeTS_Challenge.py +++ b/Task_1/FeTS_Challenge.py @@ -336,17 +336,12 @@ def clipped_aggregation(local_tensors, clip_to_percentile = 80 # first, we need to determine how much each local update has changed the tensor from the previous value - # we'll use the tensor_db search function to find the - previous_tensor_value = tensor_db.search(tensor_name=tensor_name, fl_round=fl_round, tags=('trained',), origin='aggregator') - logger.info(f"Tensor Values {previous_tensor_value}") - logger.info(f"Tensor Values Shape {previous_tensor_value.shape[0]}") + # we'll use the tensor_db retrieve function to find the previous tensor value + previous_tensor_value = tensor_db.retrieve(tensor_name=tensor_name, origin='aggregator', fl_round=fl_round - 1, tags=('aggregated',)) - if previous_tensor_value.shape[0] > 1: - logger.info(previous_tensor_value) - raise ValueError(f'found multiple matching tensors for {tensor_name}, tags=(model,), origin=aggregator') - - if previous_tensor_value.shape[0] < 1: + if previous_tensor_value is None: # no previous tensor, so just return the weighted average + logger.info(f"previous_tensor_value is None") return weighted_average_aggregation(local_tensors, tensor_db, tensor_name, @@ -354,8 +349,6 @@ def clipped_aggregation(local_tensors, collaborators_chosen_each_round, collaborator_times_per_round) - previous_tensor_value = previous_tensor_value.nparray.iloc[0] - # compute the deltas for each collaborator deltas = [t.tensor - previous_tensor_value for t in local_tensors] @@ -428,21 +421,20 @@ def FedAvgM_Selection(local_tensors, if tensor_name not in tensor_db.search(tags=('weight_speeds',))['tensor_name']: #weight_speeds[tensor_name] = np.zeros_like(local_tensors[0].tensor) # weight_speeds[tensor_name] = np.zeros(local_tensors[0].tensor.shape) tensor_db.store( - tensor_name=tensor_name, + tensor_name=tensor_name, tags=('weight_speeds',), nparray=np.zeros_like(local_tensors[0].tensor), ) + return new_tensor_weight else: if tensor_name.endswith("weight") or tensor_name.endswith("bias"): # Calculate aggregator's last value previous_tensor_value = None for _, record in tensor_db.iterrows(): - print(f'record tags {record["tags"]} record round {record["round"]} record tensor_name {record["tensor_name"]}') - print(f'fl_round {fl_round} tensor_name {tensor_name}') - if (record['round'] == fl_round + if (record['round'] == fl_round - 1 # Fetching aggregated value for previous round and record["tensor_name"] == tensor_name - and record["tags"] == ("aggregated",)): + and record["tags"] == ('aggregated',)): previous_tensor_value = record['nparray'] break @@ -457,7 +449,7 @@ def FedAvgM_Selection(local_tensors, if tensor_name not in tensor_db.search(tags=('weight_speeds',))['tensor_name']: tensor_db.store( - tensor_name=tensor_name, + tensor_name=tensor_name, tags=('weight_speeds',), nparray=np.zeros_like(local_tensors[0].tensor), ) @@ -481,7 +473,7 @@ def FedAvgM_Selection(local_tensors, new_tensor_weight_speed = momentum * tensor_weight_speed + average_deltas # fix delete (1-momentum) tensor_db.store( - tensor_name=tensor_name, + tensor_name=tensor_name, tags=('weight_speeds',), nparray=new_tensor_weight_speed ) @@ -516,7 +508,7 @@ def FedAvgM_Selection(local_tensors, # change any of these you wish to your custom functions. You may leave defaults if you wish. -aggregation_function = FedAvgM_Selection +aggregation_function = weighted_average_aggregation choose_training_collaborators = all_collaborators_train training_hyper_parameters_for_round = constant_hyper_parameters @@ -525,7 +517,7 @@ def FedAvgM_Selection(local_tensors, # to those you specify immediately above. Changing the below value to False will change # this fact, excluding the three hausdorff measurements. As hausdorff distance is # expensive to compute, excluding them will speed up your experiments. -include_validation_with_hausdorff=True #TODO change it to True +include_validation_with_hausdorff=True # We encourage participants to experiment with partitioning_1 and partitioning_2, as well as to create # other partitionings to test your changes for generalization to multiple partitionings. @@ -533,18 +525,18 @@ def FedAvgM_Selection(local_tensors, institution_split_csv_filename = 'small_split.csv' # change this to point to the parent directory of the data -brats_training_data_parent_dir = '/home/ad_kagrawa2/Data/MICCAI_FeTS2022_TrainingData' +brats_training_data_parent_dir = '/raid/datasets/FeTS22/MICCAI_FeTS2022_TrainingData' # increase this if you need a longer history for your algorithms # decrease this if you need to reduce system RAM consumption -db_store_rounds = 1 #TODO store the tensor db for these many rounds +db_store_rounds = 1 # this is passed to PyTorch, so set it accordingly for your system device = 'cpu' # you'll want to increase this most likely. You can set it as high as you like, # however, the experiment will exit once the simulated time exceeds one week. -rounds_to_train = 2 #TODO change it to 5 before merging +rounds_to_train = 5 # (bool) Determines whether checkpoints should be saved during the experiment. # The checkpoints can grow quite large (5-10GB) so only the latest will be saved when this parameter is enabled @@ -612,7 +604,7 @@ def FedAvgM_Selection(local_tensors, # the data you want to run inference over (assumed to be the experiment that just completed) #data_path = -data_path = '/home/ad_kagrawa2/Data/MICCAI_FeTS2022_ValidationData' +data_path = '/raid/datasets/FeTS22/MICCAI_FeTS2022_ValidationData' validation_csv_filename = 'validation.csv' # you can keep these the same if you wish diff --git a/Task_1/fets_challenge/checkpoint_utils.py b/Task_1/fets_challenge/checkpoint_utils.py index 4742674..9ee8c00 100644 --- a/Task_1/fets_challenge/checkpoint_utils.py +++ b/Task_1/fets_challenge/checkpoint_utils.py @@ -28,7 +28,6 @@ def save_checkpoint(checkpoint_folder, agg_tensor_db, best_dice_over_time_auc, collaborators_chosen_each_round, collaborator_times_per_round, - tensor_keys_per_col, experiment_results, summary): """ @@ -39,7 +38,7 @@ def save_checkpoint(checkpoint_folder, agg_tensor_db, with open(f'checkpoint/{checkpoint_folder}/state.pkl', 'wb') as f: pickle.dump([collaborator_names, round_num, collaborator_time_stats, total_simulated_time, best_dice, best_dice_over_time_auc, collaborators_chosen_each_round, - collaborator_times_per_round, tensor_keys_per_col, experiment_results, summary], f) + collaborator_times_per_round, experiment_results, summary], f) def load_checkpoint(checkpoint_folder): """ diff --git a/Task_1/fets_challenge/config/gandlf_config.yaml b/Task_1/fets_challenge/config/gandlf_config.yaml index 1cecae5..4be0b2a 100644 --- a/Task_1/fets_challenge/config/gandlf_config.yaml +++ b/Task_1/fets_challenge/config/gandlf_config.yaml @@ -31,6 +31,7 @@ model: final_layer: softmax ignore_label_validation: null norm_type: instance + num_channels: 4 nested_training: testing: 1 validation: -5 @@ -56,7 +57,7 @@ scaling_factor: 1 scheduler: type: triangle_modified track_memory_usage: false -verbose: True +verbose: False version: maximum: 0.1.0 minimum: 0.0.14 diff --git a/Task_1/fets_challenge/experiment.py b/Task_1/fets_challenge/experiment.py index f3d846b..8d9a7f1 100644 --- a/Task_1/fets_challenge/experiment.py +++ b/Task_1/fets_challenge/experiment.py @@ -20,27 +20,28 @@ from openfl.experimental.workflow.interface import Aggregator, Collaborator from openfl.experimental.workflow.runtime import LocalRuntime +from GANDLF.config_manager import ConfigManager + logger = getLogger(__name__) # This catches PyTorch UserWarnings for CPU warnings.filterwarnings("ignore", category=UserWarning) -def aggregator_private_attributes( - aggregation_type, collaborator_names, db_store_rounds): - return {"aggregation_type" : aggregation_type, - "collaborator_names": collaborator_names, - "checkpoint_folder":None, - "db_store_rounds":db_store_rounds -} - - -def collaborator_private_attributes( - index, gandlf_config, train_csv_path, val_csv_path): - return { - "index": index, - "gandlf_config": gandlf_config, - "train_csv_path": train_csv_path, - "val_csv_path": val_csv_path - } +def aggregator_private_attributes(aggregation_type, collaborator_names, db_store_rounds): + return { + "aggregation_type" : aggregation_type, + "collaborator_names": collaborator_names, + "checkpoint_folder":None, + "db_store_rounds":db_store_rounds, + "agg_tensor_dict":{} + } + + +def collaborator_private_attributes(index, train_csv_path, val_csv_path): + return { + "index": index, + "train_csv_path": train_csv_path, + "val_csv_path": val_csv_path + } def run_challenge_experiment(aggregation_function, @@ -70,12 +71,20 @@ def run_challenge_experiment(aggregation_function, 0.8, gandlf_csv_path) - print(f'Collaborator names for experiment : {collaborator_names}') + logger.info(f'Collaborator names for experiment : {collaborator_names}') aggregation_wrapper = CustomAggregationWrapper(aggregation_function) transformed_csv_dict = extract_csv_partitions(os.path.join(work, 'gandlf_paths.csv')) + gandlf_conf = {} + if isinstance(gandlf_config_path, str) and os.path.exists(gandlf_config_path): + gandlf_conf = ConfigManager(gandlf_config_path) + elif isinstance(gandlf_config_path, dict): + gandlf_conf = gandlf_config_path + else: + exit("GANDLF config file not found. Exiting...") + collaborators = [] for idx, col in enumerate(collaborator_names): col_dir = os.path.join(work, 'data', str(col)) @@ -96,9 +105,8 @@ def run_challenge_experiment(aggregation_function, # with ray backend with 2 collaborators num_cpus=4.0, num_gpus=0.0, - # arguments required to pass to callable + # private arguments required to pass to callable index=idx, - gandlf_config=gandlf_config_path, train_csv_path=train_csv_path, val_csv_path=val_csv_path ) @@ -108,6 +116,7 @@ def run_challenge_experiment(aggregation_function, private_attributes_callable=aggregator_private_attributes, num_cpus=4.0, num_gpus=0.0, + # private arguments required to pass to callable collaborator_names=collaborator_names, aggregation_type=aggregation_wrapper, db_store_rounds=db_store_rounds) @@ -119,10 +128,12 @@ def run_challenge_experiment(aggregation_function, logger.info(f"Local runtime collaborators = {local_runtime.collaborators}") params_dict = {"include_validation_with_hausdorff": include_validation_with_hausdorff, - "choose_training_collaborators": choose_training_collaborators, - "training_hyper_parameters_for_round": training_hyper_parameters_for_round, - "restore_from_checkpoint_folder": restore_from_checkpoint_folder, - "save_checkpoints": save_checkpoints} + "use_pretrained_model": use_pretrained_model, + "gandlf_config": gandlf_conf, + "choose_training_collaborators": choose_training_collaborators, + "training_hyper_parameters_for_round": training_hyper_parameters_for_round, + "restore_from_checkpoint_folder": restore_from_checkpoint_folder, + "save_checkpoints": save_checkpoints} model = FeTSChallengeModel() flflow = FeTSFederatedFlow( @@ -134,15 +145,4 @@ def run_challenge_experiment(aggregation_function, flflow.runtime = local_runtime flflow.run() - - # #TODO [Workflow - API] -> Commenting as pretrained model is not used. - # if use_pretrained_model: - # if device == 'cpu': - # checkpoint = torch.load(f'{root}/pretrained_model/resunet_pretrained.pth',map_location=torch.device('cpu')) - # task_runner.model.load_state_dict(checkpoint['model_state_dict']) - # task_runner.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) - # else: - # checkpoint = torch.load(f'{root}/pretrained_model/resunet_pretrained.pth') - # task_runner.model.load_state_dict(checkpoint['model_state_dict']) - # task_runner.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) return aggregator.private_attributes["checkpoint_folder"] \ No newline at end of file diff --git a/Task_1/fets_challenge/fets_challenge_model.py b/Task_1/fets_challenge/fets_challenge_model.py index 044641a..5767e10 100644 --- a/Task_1/fets_challenge/fets_challenge_model.py +++ b/Task_1/fets_challenge/fets_challenge_model.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 -"""GaNDLFTaskRunner module.""" +"""FeTS Challenge Model class for Federated Learning.""" import os from copy import deepcopy @@ -22,14 +22,12 @@ from logging import getLogger class FeTSChallengeModel(): - """GaNDLF Model class for Federated Learning. + """FeTS Challenge Model class for Federated Learning. This class provides methods to manage and manipulate GaNDLF models in a federated learning context. Attributes: - build_model (function or class): Function or Class to build the model. - lambda_opt (function): Lambda function for the optimizer. model (Model): The built model. optimizer (Optimizer): Optimizer for the model. scheduler (Scheduler): Scheduler for the model. @@ -46,12 +44,6 @@ def __init__(self): Sets up the initial state of the GaNDLFTaskRunner object, initializing various components needed for the federated model. - Args: - gandlf_config (Union[str, dict], optional): GaNDLF configuration. - Can be a string (file path) or a dictionary. Defaults to None. - device (str, optional): Compute device. Defaults to None - (default="cpu"). - **kwargs: Additional parameters to pass to the function. """ self.model = None @@ -60,24 +52,22 @@ def __init__(self): self.params = None self.device = None + self.opt_treatment = "RESET" + self.training_round_completed = False self.logger = getLogger(__name__) - # FIXME: why isn't this initial call in runner_pt? - #self.initialize_tensorkeys_for_functions(with_opt_vars=False) - # overwrite attribute to account for one optimizer param (in every # child model that does not overwrite get and set tensordict) that is # not a numpy array self.tensor_dict_split_fn_kwargs = {} self.tensor_dict_split_fn_kwargs.update({"holdout_tensor_names": ["__opt_state_needed"]}) - def rebuild_model(self, round_num, input_tensor_dict, validation=False): + def rebuild_model(self, input_tensor_dict, validation=False): """Parse tensor names and update weights of model. Handles the optimizer treatment. Args: - round_num: The current round number. input_tensor_dict (dict): The input tensor dictionary used to update the weights of the model. validation (bool, optional): A flag indicating whether the model @@ -86,28 +76,26 @@ def rebuild_model(self, round_num, input_tensor_dict, validation=False): Returns: None """ + if self.opt_treatment == "RESET": + self.reset_opt_vars() + self.set_tensor_dict(input_tensor_dict, with_opt_vars=False) + elif ( + self.training_round_completed + and self.opt_treatment == "CONTINUE_GLOBAL" + and not validation + ): + self.set_tensor_dict(input_tensor_dict, with_opt_vars=True) + else: + self.set_tensor_dict(input_tensor_dict, with_opt_vars=False) - self.set_tensor_dict(input_tensor_dict, with_opt_vars=False) - - # if self.opt_treatment == "RESET": - # self.reset_opt_vars() - # self.set_tensor_dict(input_tensor_dict, with_opt_vars=False) - # elif ( - # self.training_round_completed - # and self.opt_treatment == "CONTINUE_GLOBAL" - # and not validation - # ): - # self.set_tensor_dict(input_tensor_dict, with_opt_vars=True) - # else: - # self.set_tensor_dict(input_tensor_dict, with_opt_vars=False) - - def validate(self, col_name, round_num, val_loader, use_tqdm=False, **kwargs): + def validate(self, col_name, round_num, input_tensor_dict, val_loader, use_tqdm=False, **kwargs): """Validate. Run validation of the model on the local data. Args: col_name (str): Name of the collaborator. round_num (int): Current round number. input_tensor_dict (dict): Required input tensors (for model). + val_loader (DataLoader): Validation data loader. use_tqdm (bool, optional): Use tqdm to print a progress bar. Defaults to False. **kwargs: Key word arguments passed to GaNDLF main_run. @@ -116,7 +104,7 @@ def validate(self, col_name, round_num, val_loader, use_tqdm=False, **kwargs): output_tensor_dict (dict): Tensors to send back to the aggregator. {} (dict): Tensors to maintain in the local TensorDB. """ - #self.rebuild_model(round_num, input_tensor_dict, validation=True) + self.rebuild_model(input_tensor_dict, validation=True) self.model.eval() epoch_valid_loss, epoch_valid_metric = validate_network( @@ -166,7 +154,6 @@ def inference(self, col_name, round_num, val_loader, use_tqdm=False, **kwargs): output_tensor_dict (dict): Tensors to send back to the aggregator. {} (dict): Tensors to maintain in the local TensorDB. """ - #self.rebuild_model(round_num, input_tensor_dict, validation=True) self.model.eval() epoch_inference_loss, epoch_inference_metric = validate_network( @@ -201,7 +188,7 @@ def inference(self, col_name, round_num, val_loader, use_tqdm=False, **kwargs): # Empty list represents metrics that should only be stored locally return output_tensor_dict, {} - def train(self, col_name, round_num, hparams_dict, train_loader, use_tqdm=False, **kwargs): + def train(self, col_name, round_num, input_tensor_dict, hparams_dict, train_loader, use_tqdm=False, **kwargs): """Train batches. Train the model on the requested number of batches. Args: @@ -224,6 +211,7 @@ def train(self, col_name, round_num, hparams_dict, train_loader, use_tqdm=False, # set to "training" mode self.model.train() + self.rebuild_model(input_tensor_dict) # Set the learning rate self.logger.info(f"Setting learning rate to {learning_rate}") @@ -265,19 +253,6 @@ def train(self, col_name, round_num, hparams_dict, train_loader, use_tqdm=False, self.tensor_dict_split_fn_kwargs, ) - # Update the required tensors if they need to be pulled from the - # aggregator - # TODO this logic can break if different collaborators have different - # roles between rounds. - # For example, if a collaborator only performs validation in the first - # round but training in the second, it has no way of knowing the - # optimizer state tensor names to request from the aggregator because - # these are only created after training occurs. A work around could - # involve doing a single epoch of training on random data to get the - # optimizer names, and then throwing away the model. - #if self.opt_treatment == "CONTINUE_GLOBAL": - # self.initialize_tensorkeys_for_functions(with_opt_vars=True) - # This will signal that the optimizer values are now present, # and can be loaded when the model is rebuilt self.training_round_completed = True diff --git a/Task_1/fets_challenge/fets_flow.py b/Task_1/fets_challenge/fets_flow.py index cf83680..208753d 100644 --- a/Task_1/fets_challenge/fets_flow.py +++ b/Task_1/fets_challenge/fets_flow.py @@ -1,3 +1,6 @@ + +"""FeTS Federated Flow.""" + import os import shutil import time @@ -5,6 +8,7 @@ from copy import deepcopy import pandas as pd from pathlib import Path +import torch from openfl.experimental.workflow.interface import FLSpec from openfl.experimental.workflow.placement import aggregator, collaborator @@ -17,114 +21,12 @@ from .time_utils import gen_collaborator_time_stats, compute_times_per_collaborator, MAX_SIMULATION_TIME from GANDLF.compute.generic import create_pytorch_objects -from GANDLF.config_manager import ConfigManager logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -def get_metric(metric_name, fl_round, agg_tensor_db): - target_tags = ('metric', 'validate_agg') - metric_tensor_key = TensorKey(metric_name, 'aggregator', fl_round, True, target_tags) - nparray = agg_tensor_db.get_tensor_from_cache(metric_tensor_key) - return nparray.item() - -def cache_tensor_dict(tensor_dict, agg_tensor_db, idx, agg_out_dict): - for key, value in tensor_dict.items(): - new_tags = change_tags(key.tags, add_field=str(idx + 1)) - modified_key = TensorKey( - tensor_name=key.tensor_name, - origin="aggregator", - round_number=key.round_number, - report=key.report, - tags=new_tags - ) - agg_out_dict[modified_key] = value - agg_tensor_db.cache_tensor(agg_out_dict) - -def return_cleanup_key(tensor_key, col, round_data_to_delete): - new_tags = tensor_key.tags - if col in tensor_key.tags: - new_tags = change_tags(tensor_key.tags, remove_field=col) - modified_key = TensorKey( - tensor_name=tensor_key.tensor_name, - origin=col, - round_number=round_data_to_delete, - report=tensor_key.report, - tags=new_tags - ) - return modified_key - -def get_aggregated_dict_with_tensorname(agg_tensor_dict, current_round=0): - agg_dict_with_tensornames = {} - for tensor_key, value in agg_tensor_dict.items(): - print(f"Tags : {tensor_key.tags}") - if 'aggregated' in tensor_key.tags or current_round == 0: - tensor_name, origin, round_number, report, tags = tensor_key - print(f"Tensor Name : {tensor_name}") - agg_dict_with_tensornames[tensor_name] = value - return agg_dict_with_tensornames - -def update_metrics(current_round, agg_tensor_db, summary, experiment_results, include_validation_with_hausdorff, -<<<<<<< HEAD - total_simulated_time, projected_auc): -======= - total_simulated_time, round_dice, projected_auc): ->>>>>>> 74cc059... Fixed typo - - round_loss = get_metric('valid_loss', current_round, agg_tensor_db) - round_dice = get_metric('valid_dice', current_round, agg_tensor_db) - dice_label_0 = get_metric('valid_dice_per_label_0', current_round, agg_tensor_db) - dice_label_1 = get_metric('valid_dice_per_label_1', current_round, agg_tensor_db) - dice_label_2 = get_metric('valid_dice_per_label_2', current_round, agg_tensor_db) - dice_label_4 = get_metric('valid_dice_per_label_4', current_round, agg_tensor_db) - if include_validation_with_hausdorff: - hausdorff95_label_0 = get_metric('valid_hd95_per_label_0', current_round, agg_tensor_db) - hausdorff95_label_1 = get_metric('valid_hd95_per_label_1', current_round, agg_tensor_db) - hausdorff95_label_2 = get_metric('valid_hd95_per_label_2', current_round, agg_tensor_db) - hausdorff95_label_4 = get_metric('valid_hd95_per_label_4', current_round, agg_tensor_db) -<<<<<<< HEAD - - # # End of round summary -======= - - # End of round summary ->>>>>>> 74cc059... Fixed typo - summary = '"**** END OF ROUND {} SUMMARY *****"'.format(current_round) - summary += "\n\tSimulation Time: {} minutes".format(round(total_simulated_time / 60, 2)) - summary += "\n\t(Projected) Convergence Score: {}".format(projected_auc) - summary += "\n\tRound Loss: {}".format(round_loss) - summary += "\n\tRound Dice: {}".format(round_dice) - summary += "\n\tDICE Label 0: {}".format(dice_label_0) - summary += "\n\tDICE Label 1: {}".format(dice_label_1) - summary += "\n\tDICE Label 2: {}".format(dice_label_2) - summary += "\n\tDICE Label 4: {}".format(dice_label_4) - if include_validation_with_hausdorff: - summary += "\n\tHausdorff95 Label 0: {}".format(hausdorff95_label_0) - summary += "\n\tHausdorff95 Label 1: {}".format(hausdorff95_label_1) - summary += "\n\tHausdorff95 Label 2: {}".format(hausdorff95_label_2) - summary += "\n\tHausdorff95 Label 4: {}".format(hausdorff95_label_4) - logger.info(summary) - - experiment_results['round'].append(current_round) - experiment_results['time'].append(total_simulated_time) - experiment_results['convergence_score'].append(projected_auc) - experiment_results['round_dice'].append(round_dice) - experiment_results['dice_label_0'].append(dice_label_0) - experiment_results['dice_label_1'].append(dice_label_1) - experiment_results['dice_label_2'].append(dice_label_2) - experiment_results['dice_label_4'].append(dice_label_4) - if include_validation_with_hausdorff: - experiment_results['hausdorff95_label_0'].append(hausdorff95_label_0) - experiment_results['hausdorff95_label_1'].append(hausdorff95_label_1) - experiment_results['hausdorff95_label_2'].append(hausdorff95_label_2) - experiment_results['hausdorff95_label_4'].append(hausdorff95_label_4) -<<<<<<< HEAD - - return summary, round_dice -======= - return round_dice ->>>>>>> 74cc059... Fixed typo - +# [TODO] - FixMe Dataloaders cannot be passed as private attributes of collaborator. +# This is a temporary workaround to store the dataloaders in a global variable. collaborator_data_loaders = {} class FeTSFederatedFlow(FLSpec): @@ -140,15 +42,18 @@ def __init__(self, fets_model, params_dict, rounds=5 , device="cpu", **kwargs): self.collaborators_chosen_each_round = {} self.collaborator_times_per_round = {} self.agg_tensor_dict = {} - self.tensor_keys_per_col = {} self.restored = False self.include_validation_with_hausdorff = params_dict.get('include_validation_with_hausdorff', False) + self.use_pretrained_model = params_dict.get('use_pretrained_model', False) self.choose_training_collaborators = params_dict.get('choose_training_collaborators', None) self.training_hyper_parameters_for_round = params_dict.get('training_hyper_parameters_for_round', None) self.restore_from_checkpoint_folder = params_dict.get('restore_from_checkpoint_folder', None) self.save_checkpoints = params_dict.get('save_checkpoints', False) + # GaNDLF config + self.gandlf_config = params_dict.get('gandlf_config', None) + self.experiment_results = { 'round':[], 'time': [], @@ -160,63 +65,246 @@ def __init__(self, fets_model, params_dict, rounds=5 , device="cpu", **kwargs): 'dice_label_4': [], } + def _get_metric(self, metric_name, fl_round, agg_tensor_db): + tensor_key = TensorKey(metric_name, 'aggregator', fl_round, True, ('metric', 'validate_agg')) + return agg_tensor_db.get_tensor_from_cache(tensor_key).item() + + def _cache_tensor_dict(self, tensor_dict, agg_tensor_db, idx, agg_out_dict): + agg_out_dict.update({ + TensorKey( + tensor_name=key.tensor_name, + origin="aggregator", + round_number=key.round_number, + report=key.report, + tags=change_tags(key.tags, add_field=str(idx + 1)) + ): value + for key, value in tensor_dict.items() + }) + # Cache the updated dictionary in agg_tensor_db + agg_tensor_db.cache_tensor(agg_out_dict) + + def _get_aggregated_dict_with_tensorname(self, agg_tensor_dict, current_round=0, lookup_tags='aggregated'): + return { + tensor_key.tensor_name: value + for tensor_key, value in agg_tensor_dict.items() + if lookup_tags in tensor_key.tags + } + + def _update_metrics(self, current_round, agg_tensor_db, experiment_results, include_validation_with_hausdorff, + total_simulated_time, projected_auc): + + dice_metrics = [ + 'valid_loss', 'valid_dice', + 'valid_dice_per_label_0', 'valid_dice_per_label_1', + 'valid_dice_per_label_2', 'valid_dice_per_label_4' + ] + hausdorff_metrics = [ + 'valid_hd95_per_label_0', 'valid_hd95_per_label_1', + 'valid_hd95_per_label_2', 'valid_hd95_per_label_4' + ] + + # Fetch dice metrics + dice_values = {metric: self._get_metric(metric, current_round, agg_tensor_db) for metric in dice_metrics} + + # Fetch Hausdorff metrics if required + hausdorff_values = {} + if include_validation_with_hausdorff: + hausdorff_values = {metric: self._get_metric(metric, current_round, agg_tensor_db) for metric in hausdorff_metrics} + + # # End of round summary + summary = '"**** END OF ROUND {} SUMMARY *****"'.format(current_round) + summary += "\n\tSimulation Time: {} minutes".format(round(total_simulated_time / 60, 2)) + summary += "\n\t(Projected) Convergence Score: {}".format(projected_auc) + summary += "\n\tRound Loss: {}".format(dice_values['valid_loss']) + summary += "\n\tRound Dice: {}".format(dice_values['valid_dice']) + summary += "\n\tDICE Label 0: {}".format(dice_values['valid_dice_per_label_0']) + summary += "\n\tDICE Label 1: {}".format(dice_values['valid_dice_per_label_1']) + summary += "\n\tDICE Label 2: {}".format(dice_values['valid_dice_per_label_2']) + summary += "\n\tDICE Label 4: {}".format(dice_values['valid_dice_per_label_4']) + if include_validation_with_hausdorff: + summary += "\n\tHausdorff95 Label 0: {}".format(hausdorff_values['valid_hd95_per_label_0']) + summary += "\n\tHausdorff95 Label 1: {}".format(hausdorff_values['valid_hd95_per_label_1']) + summary += "\n\tHausdorff95 Label 2: {}".format(hausdorff_values['valid_hd95_per_label_2']) + summary += "\n\tHausdorff95 Label 4: {}".format(hausdorff_values['valid_hd95_per_label_4']) + logger.info(summary) + + experiment_results['round'].append(current_round) + experiment_results['time'].append(total_simulated_time) + experiment_results['convergence_score'].append(projected_auc) + experiment_results['round_dice'].append(dice_values['valid_dice']) + experiment_results['dice_label_0'].append(dice_values['valid_dice_per_label_0']) + experiment_results['dice_label_1'].append(dice_values['valid_dice_per_label_1']) + experiment_results['dice_label_2'].append(dice_values['valid_dice_per_label_2']) + experiment_results['dice_label_4'].append(dice_values['valid_dice_per_label_4']) + if include_validation_with_hausdorff: + experiment_results['hausdorff95_label_0'].append(hausdorff_values['valid_hd95_per_label_0']) + experiment_results['hausdorff95_label_1'].append(hausdorff_values['valid_hd95_per_label_1']) + experiment_results['hausdorff95_label_2'].append(hausdorff_values['valid_hd95_per_label_2']) + experiment_results['hausdorff95_label_4'].append(hausdorff_values['valid_hd95_per_label_4']) + + return summary, dice_values['valid_dice'] + + def _initialize_aggregator_model(self): + """Initialize the aggregator model and its components.""" + model, optimizer, _, _, scheduler, params = create_pytorch_objects( + self.gandlf_config, None, None, device=self.device + ) + self.fets_model.model = model + self.fets_model.optimizer = optimizer + self.fets_model.scheduler = scheduler + self.fets_model.params = params + + def _restore_from_checkpoint(self): + """Restore the experiment state from a checkpoint.""" + checkpoint_path = Path(f'checkpoint/{self.restore_from_checkpoint_folder}') + if not checkpoint_path.exists(): + logger.warning(f'Could not find provided checkpoint folder: {self.restore_from_checkpoint_folder}. Exiting...') + exit(1) + + logger.info(f'Attempting to load last completed round from {self.restore_from_checkpoint_folder}') + state = load_checkpoint(self.restore_from_checkpoint_folder) + self.checkpoint_folder = self.restore_from_checkpoint_folder + + ( + loaded_collaborator_names, starting_round_num, self.collaborator_time_stats, + self.total_simulated_time, self.best_dice, self.best_dice_over_time_auc, + self.collaborators_chosen_each_round, self.collaborator_times_per_round, + self.experiment_results, summary, agg_tensor_db + ) = state + + if loaded_collaborator_names != self.collaborator_names: + logger.error(f'Collaborator names found in checkpoint ({loaded_collaborator_names}) ' + f'do not match provided collaborators ({self.collaborator_names})') + exit(1) + + self.restored = True + logger.info(f'Previous summary for round {starting_round_num}') + logger.info(summary) + + # Update the agg_tensor_dict from stored tensor_db + self.current_round = starting_round_num + self._load_agg_tensor_dict(agg_tensor_db) + + def _setup_new_experiment(self): + """Set up a new experiment folder and initialize the tensor dictionary.""" + self.checkpoint_folder = setup_checkpoint_folder() + logger.info(f'\nCreated experiment folder {self.checkpoint_folder}...') + self.current_round = 0 + + # Initialize the tensor dictionary for the first round + tensor_dict = self.fets_model.get_tensor_dict() + self.agg_tensor_dict.update({ + TensorKey( + tensor_name=key, + origin='aggregator', + round_number=self.current_round, + report=False, + tags=('aggregated',) + ): value + for key, value in tensor_dict.items() + }) + + def _load_agg_tensor_dict(self, agg_tensor_db): + """Load the agg_tensor_dict from the stored tensor_db.""" + for _, record in agg_tensor_db.iterrows(): + tensor_key = TensorKey( + record["tensor_name"], record["origin"], record["round"], + record["report"], record["tags"] + ) + self.agg_tensor_dict[tensor_key] = record["nparray"] + + def _aggregate_tensors(self, agg_tensor_db, tensor_keys_per_col, collaborator_weight_dict): + """Aggregate tensors and cache the results.""" + self.aggregation_type.set_state_data_for_round(self.collaborators_chosen_each_round, self.collaborator_times_per_round) + for col, tensor_keys in tensor_keys_per_col.items(): + for tensor_key in tensor_keys: + tensor_name, origin, round_number, report, tags = tensor_key + if col in tags: + new_tags = change_tags(tags, remove_field=col) + agg_tensor_key = TensorKey(tensor_name, origin, round_number, report, new_tags) + if agg_tensor_db.get_tensor_from_cache(agg_tensor_key) is None: + agg_results = agg_tensor_db.get_aggregated_tensor( + agg_tensor_key, + collaborator_weight_dict, + aggregation_function=self.aggregation_type, + ) + agg_tag_tk = TensorKey(tensor_name, origin, round_number, report, ('aggregated',)) + agg_tensor_db.cache_tensor({agg_tag_tk: agg_results}) + + def _process_collaborators(self, inputs, agg_tensor_db, collaborator_weights_unnormalized, times_per_collaborator): + """Process tensors for each collaborator and cache them.""" + tensor_keys_per_col = {} + for idx, col in enumerate(inputs): + agg_out_dict = {} + self._cache_tensor_dict(col.local_valid_dict, agg_tensor_db, idx, agg_out_dict) + self._cache_tensor_dict(col.agg_valid_dict, agg_tensor_db, idx, agg_out_dict) + self._cache_tensor_dict(col.global_output_tensor_dict, agg_tensor_db, idx, agg_out_dict) + + # Store the keys for each collaborator + tensor_keys_per_col[str(idx + 1)] = list(agg_out_dict.keys()) + collaborator_weights_unnormalized[col.input] = col.collaborator_task_weight + times_per_collaborator[col.input] = col.times_per_collaborator + return tensor_keys_per_col + + def _update_best_model(self, round_dice): + """Update the best model if the current round's dice score is better.""" + if self.best_dice < round_dice: + self.best_dice = round_dice + if self.current_round == 0: + logger.info(f'Skipping best model saving to disk as it is a random initialization.') + elif not os.path.exists(f'checkpoint/{self.checkpoint_folder}/temp_model.pkl'): + raise ValueError(f'Expected temporary model at: checkpoint/{self.checkpoint_folder}/temp_model.pkl to exist but it was not found.') + else: + shutil.copyfile( + src=f'checkpoint/{self.checkpoint_folder}/temp_model.pkl', + dst=f'checkpoint/{self.checkpoint_folder}/best_model.pkl' + ) + logger.info(f'Saved model with best average binary DICE: {self.best_dice} to checkpoint/{self.checkpoint_folder}/best_model.pkl') + + + def _update_aggregator_model(self, inputs): + """Update the aggregator model with the aggregated tensors.""" + logger.info(f'Aggregator Model updated for round {self.current_round}') + self.fets_model.model = inputs[0].fets_model.model + self.fets_model.optimizer = inputs[0].fets_model.optimizer + self.fets_model.scheduler = inputs[0].fets_model.scheduler + self.fets_model.params = inputs[0].fets_model.params + + # Rebuild the model with the aggregated tensor_dict + local_tensor_dict = self._get_aggregated_dict_with_tensorname(self.agg_tensor_dict, self.current_round) + self.fets_model.rebuild_model(local_tensor_dict) + self.fets_model.save_native(f'checkpoint/{self.checkpoint_folder}/temp_model.pkl') + @aggregator def start(self): - + # Update experiment results if validation with Hausdorff is included if self.include_validation_with_hausdorff: self.experiment_results.update({ - 'hausdorff95_label_0': [], - 'hausdorff95_label_1': [], - 'hausdorff95_label_2': [], - 'hausdorff95_label_4': [], + f'hausdorff95_label_{label}': [] for label in [0, 1, 2, 4] }) + # Initialize the aggregator model + self._initialize_aggregator_model() + self.collaborators = self.runtime.collaborators - if self.restore_from_checkpoint_folder is None: - self.checkpoint_folder = setup_checkpoint_folder() - logger.info(f'\nCreated experiment folder {self.checkpoint_folder}...') - starting_round_num = 0 + # Handle checkpoint restoration or setup a new experiment folder + if self.restore_from_checkpoint_folder: + self._restore_from_checkpoint() else: - if not Path(f'checkpoint/{self.restore_from_checkpoint_folder}').exists(): - logger.warning(f'Could not find provided checkpoint folder: {self.restore_from_checkpoint_folder}. Exiting...') - exit(1) - else: - logger.info(f'Attempting to load last completed round from {self.restore_from_checkpoint_folder}') - state = load_checkpoint(self.restore_from_checkpoint_folder) - self.checkpoint_folder = self.restore_from_checkpoint_folder - - [loaded_collaborator_names, starting_round_num, self.collaborator_time_stats, - self.total_simulated_time, self.best_dice, self.best_dice_over_time_auc, - self.collaborators_chosen_each_round, self.collaborator_times_per_round, - self.tensor_keys_per_col, self.experiment_results, summary, agg_tensor_db] = state - - if loaded_collaborator_names != self.collaborator_names: - logger.error(f'Collaborator names found in checkpoint ({loaded_collaborator_names}) ' - f'do not match provided collaborators ({self.collaborator_names})') - exit(1) - - self.restored = True - logger.info(f'Previous summary for round {starting_round_num}') - logger.info(summary) - - aggregator_tensor_db = TensorDB() - aggregator_tensor_db.tensor_db = agg_tensor_db - - #Updating the agg_tensor_dict from stored tensor_db - starting_round_num += 1 - self.current_round = starting_round_num - for col,tensor_keys in self.tensor_keys_per_col.items(): - for tensor_key in tensor_keys: - tensor_name, _, _, _, _ = tensor_key - if tensor_name not in self.agg_tensor_dict: - self.agg_tensor_dict[tensor_key] = aggregator_tensor_db.get_tensor_from_cache(tensor_key) + self._setup_new_experiment() + # Check if the experiment is already completed if self.current_round >= self.n_rounds: logger.info("Experiment already completed. Exiting...") - self.next(self.end) - else: - self.collaborator_time_stats = gen_collaborator_time_stats(self.collaborator_names) - self.next(self.fetch_parameters_for_colls) + self.next(self.internal_loop) + return + + if self.restore_from_checkpoint_folder: + self.current_round += 1 + + # Proceed to the next step + self.collaborator_time_stats = gen_collaborator_time_stats(self.collaborator_names) + self.next(self.fetch_parameters_for_colls) @aggregator def fetch_parameters_for_colls(self): @@ -250,6 +338,9 @@ def fetch_parameters_for_colls(self): logger.info('Collaborators chosen to train for round {}:\n\t{}'.format(self.current_round, self.training_collaborators)) self.collaborators_chosen_each_round[self.current_round] = self.training_collaborators + + # Fetch the aggregated tensor dict for the current round + self.input_tensor_dict = self._get_aggregated_dict_with_tensorname(self.agg_tensor_dict, self.current_round) if self.current_round == 0 or self.restored is True: self.next(self.initialize_colls, foreach='collaborators') self.restored = False @@ -258,17 +349,8 @@ def fetch_parameters_for_colls(self): @collaborator def initialize_colls(self): - - gandlf_conf = {} - if isinstance(self.gandlf_config, str) and os.path.exists(self.gandlf_config): - gandlf_conf = ConfigManager(self.gandlf_config) - elif isinstance(self.gandlf_config, dict): - gandlf_conf = self.gandlf_config - else: - exit("GANDLF config file not found. Exiting...") - if not self.include_validation_with_hausdorff: - gandlf_conf['metrics'] = ['dice','dice_per_label'] + self.gandlf_config['metrics'] = ['dice','dice_per_label'] logger.info(f'Initializing collaborator {self.input}') ( @@ -279,7 +361,7 @@ def initialize_colls(self): scheduler, params, ) = create_pytorch_objects( - gandlf_conf, train_csv=self.train_csv_path, val_csv=self.val_csv_path, device=self.device + self.gandlf_config, train_csv=self.train_csv_path, val_csv=self.val_csv_path, device=self.device ) self.fets_model.device = self.device @@ -297,32 +379,33 @@ def initialize_colls(self): self.collaborator_time_stats, self.current_round) - if self.restored is False: - tensor_dict = self.fets_model.get_tensor_dict() - for key, value in tensor_dict.items(): - origin = 'collaborator' - round_number = self.current_round - report = False - tags = ('trained') - agg_tensor_key = TensorKey(key, origin, round_number, report, tags) + # [TODO] - FIX using Pretrained model + if self.use_pretrained_model: + if self.device == 'cpu': + checkpoint = torch.load(f'checkpoint/pretrained_model/resunet_pretrained.pth',map_location=torch.device('cpu')) + self.fets_model.model.load_state_dict(checkpoint['model_state_dict']) + self.fets_model.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) + else: + checkpoint = torch.load(f'checkpoint/pretrained_model/resunet_pretrained.pth') + self.fets_model.model.load_state_dict(checkpoint['model_state_dict']) + self.fets_model.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) - self.agg_tensor_dict[agg_tensor_key] = value self.next(self.aggregated_model_validation) @collaborator def aggregated_model_validation(self): logger.info(f'Performing aggregated model validation for collaborator {self.input}') - input_tensor_dict = get_aggregated_dict_with_tensorname(self.agg_tensor_dict, self.current_round) + input_tensor_dict = deepcopy(self.input_tensor_dict) val_loader = collaborator_data_loaders[self.input].get_valid_loader() - self.fets_model.rebuild_model(self.current_round, input_tensor_dict) - self.agg_valid_dict, _ = self.fets_model.validate(self.input, self.current_round, val_loader, apply="global") + self.agg_valid_dict, _ = self.fets_model.validate(self.input, self.current_round, input_tensor_dict, val_loader, apply="global") self.next(self.train) @collaborator def train(self): logger.info(f'Performing training for collaborator {self.input}') train_loader = collaborator_data_loaders[self.input].get_train_loader() - self.global_output_tensor_dict, _ = self.fets_model.train(self.input, self.current_round, self.hparam_dict, train_loader) + input_tensor_dict = deepcopy(self.input_tensor_dict) + self.global_output_tensor_dict, _ = self.fets_model.train(self.input, self.current_round, input_tensor_dict, self.hparam_dict, train_loader) self.collaborator_task_weight = collaborator_data_loaders[self.input].get_train_data_size() self.next(self.local_model_validation) @@ -330,77 +413,37 @@ def train(self): def local_model_validation(self): logger.info(f'Performing local model validation for collaborator {self.input}') val_loader = collaborator_data_loaders[self.input].get_valid_loader() - self.local_valid_dict, _ = self.fets_model.validate(self.input, self.current_round, val_loader, apply="local") + # Update the model with the trained tensors for local validation of this round. + input_tensor_dict = self._get_aggregated_dict_with_tensorname(self.global_output_tensor_dict, self.current_round, 'trained') + self.local_valid_dict, _ = self.fets_model.validate(self.input, self.current_round, input_tensor_dict, val_loader, apply="local") self.next(self.join) - @aggregator - def join_task(self, inputs): - self.next(self.internal_loop) - @aggregator def join(self, inputs): - round_data_to_delete = 0 - if self.current_round > self.db_store_rounds: - round_data_to_delete = self.current_round - self.db_store_rounds - self.aggregation_type.set_state_data_for_round(self.collaborators_chosen_each_round, self.collaborator_times_per_round) - - agg_tensor_db = TensorDB() + logger.info(f'Aggregating results for round {self.current_round}') + agg_tensor_db = TensorDB() # Used for aggregating and persisting tensors collaborator_weights_unnormalized = {} times_per_collaborator = {} - cache_tensor_dict(self.agg_tensor_dict, agg_tensor_db, 0, {}) - for idx, col in enumerate(inputs): - logger.info(f'Aggregating results for {idx}') - agg_out_dict = {} - cache_tensor_dict(col.local_valid_dict, agg_tensor_db, idx, agg_out_dict) - cache_tensor_dict(col.agg_valid_dict, agg_tensor_db, idx, agg_out_dict) - cache_tensor_dict(col.global_output_tensor_dict, agg_tensor_db, idx, agg_out_dict) - self.agg_tensor_dict.update(agg_out_dict) - - # Store the keys for each collaborator - self.tensor_keys_per_col[str(idx + 1)] = list(agg_out_dict.keys()) - collaborator_weights_unnormalized[col.input] = col.collaborator_task_weight - times_per_collaborator[col.input] = col.times_per_collaborator + tensor_keys_per_col = () + # Cache the aggregator tensor dict in tensor_db so that tensor_db has updated tensor values. + agg_tensor_db.cache_tensor(self.agg_tensor_dict) + + # Process each collaborator's tensors + tensor_keys_per_col = self._process_collaborators(inputs, agg_tensor_db, collaborator_weights_unnormalized, times_per_collaborator) + + self.collaborator_times_per_round[self.current_round] = times_per_collaborator weight_total = sum(collaborator_weights_unnormalized.values()) collaborator_weight_dict = { k: v / weight_total for k, v in collaborator_weights_unnormalized.items() } logger.info(f'Calculated Collaborator weights: {collaborator_weight_dict} and and times: {times_per_collaborator}') - agg_tensor_keys = [] - for col,tensor_keys in self.tensor_keys_per_col.items(): - for tensor_key in tensor_keys: - tensor_name, origin, round_number, report, tags = tensor_key - if col in tags: - new_tags = change_tags(tags, remove_field=col) - agg_tensor_key = TensorKey(tensor_name, origin, round_number, report, new_tags) - # Aggregates the tensor values for the tensor key and stores it in tensor_db. Checks if the tensor is already in tensor_db - if agg_tensor_db.get_tensor_from_cache(agg_tensor_key) is None: - agg_results = agg_tensor_db.get_aggregated_tensor( - agg_tensor_key, - collaborator_weight_dict, - aggregation_function=self.aggregation_type, - ) - agg_tag_tk = TensorKey(tensor_name, origin, round_number, report, ("aggregated",)) - agg_tensor_db.cache_tensor({agg_tag_tk: agg_results}) - # self.agg_tensor_dict[agg_tag_tk] = agg_results - # self.agg_tensor_dict[agg_tensor_key] = agg_results - # agg_tensor_keys.append(agg_tensor_key) - # agg_tensor_keys.append(agg_tag_tk) - - #cleaningup aggregated tensor dict based on db store rounds - # if (self.current_round + 1) > self.db_store_rounds: - # col_tensor_key_to_be_deleted = return_cleanup_key(tensor_key, col, round_data_to_delete) - # agg_tensor_key_to_be_deleted = TensorKey(tensor_name, origin, round_data_to_delete, report, new_tags) - # agg_tag_key = TensorKey(tensor_name, origin, round_number, report, ("aggregated",)) - # if col_tensor_key_to_be_deleted in self.agg_tensor_dict: - # self.agg_tensor_dict.pop(col_tensor_key_to_be_deleted) - # if agg_tensor_key_to_be_deleted in self.agg_tensor_dict: - # self.agg_tensor_dict.pop(agg_tensor_key_to_be_deleted) - # if agg_tag_key in self.agg_tensor_dict: - # self.agg_tensor_dict.pop(agg_tag_key) + # Perform aggregation + self._aggregate_tensors(agg_tensor_db, tensor_keys_per_col, collaborator_weight_dict) + + # Clean up the tensor_db for the round_data_to_delete rounds agg_tensor_db.clean_up(self.db_store_rounds) - self.tensor_keys_per_col['aggregator'] = agg_tensor_keys times_list = [(t, col) for col, t in times_per_collaborator.items()] times_list = sorted(times_list) @@ -419,43 +462,20 @@ def join(self, inputs): projected_auc /= MAX_SIMULATION_TIME # update metrics and results -<<<<<<< HEAD - #round_dice = 0 - summary, round_dice = update_metrics(self.current_round, agg_tensor_db, self.experiment_results, - self.include_validation_with_hausdorff, self.total_simulated_time, projected_auc) -======= - summary = "" - round_dice = 0 - round_dice = update_metrics(self.current_round, agg_tensor_db, summary, self.experiment_results, - self.include_validation_with_hausdorff, self.total_simulated_time, round_dice, projected_auc) ->>>>>>> 74cc059... Fixed typo + summary, round_dice = self._update_metrics( + self.current_round, agg_tensor_db, self.experiment_results, + self.include_validation_with_hausdorff, self.total_simulated_time, projected_auc + ) - if self.best_dice < round_dice: - self.best_dice = round_dice - # Set the weights for the final model - if self.current_round == 0: - # here the initial model was validated (temp model does not exist) - logger.info(f'Skipping best model saving to disk as it is a random initialization.') - elif not os.path.exists(f'checkpoint/{self.checkpoint_folder}/temp_model.pkl'): - raise ValueError(f'Expected temporary model at: checkpoint/{self.checkpoint_folder}/temp_model.pkl to exist but it was not found.') - else: - # here the temp model was the one validated - shutil.copyfile(src=f'checkpoint/{self.checkpoint_folder}/temp_model.pkl',dst=f'checkpoint/{self.checkpoint_folder}/best_model.pkl') - logger.info(f'Saved model with best average binary DICE: {self.best_dice} to checkpoint/{self.checkpoint_folder}/best_model.pkl') + # Update the best model if necessary + self._update_best_model(round_dice) # Update the agg_tensor_dict for subsequent rounds with the aggregated tensor_db - self.agg_tensor_dict = {} - for _, record in agg_tensor_db.tensor_db.iterrows(): - print(f'Record tensor_name {record["tensor_name"]}') - print(f'Record origin {record["origin"]}') - print(f'Record round {record["round"]}') - print(f'Record report {record["report"]}') - print(f'Record tags {record["tags"]}') - - tensor_key = TensorKey(record["tensor_name"], record["origin"], record["round"], record["report"], record["tags"]) - self.agg_tensor_dict[tensor_key] = record["nparray"] - - print(f'************************') + self.agg_tensor_dict.clear() + self.agg_tensor_dict = { + TensorKey(record["tensor_name"], record["origin"], record["round"], record["report"], record["tags"]): record["nparray"] + for _, record in agg_tensor_db.tensor_db.iterrows() + } if self.save_checkpoints: logger.info(f'Saving checkpoint for round {self.current_round} : checkpoint folder {self.checkpoint_folder}') @@ -467,7 +487,6 @@ def join(self, inputs): self.best_dice_over_time_auc, self.collaborators_chosen_each_round, self.collaborator_times_per_round, - self.tensor_keys_per_col, self.experiment_results, summary) @@ -478,22 +497,10 @@ def join(self, inputs): if self.total_simulated_time > MAX_SIMULATION_TIME: logger.info("Simulation time exceeded. Ending Experiment") self.next(self.end) + return - # save the most recent aggregated model in native format to be copied over as best when appropriate - # (note this model has not been validated by the collaborators yet) - # Global FeTS Model may be unititialized in the first round - if self.fets_model.model is None: - logger.info(f'Global model is not initialized. Initializing with the first round model') - self.fets_model.model = inputs[0].fets_model.model - self.fets_model.optimizer = inputs[0].fets_model.optimizer - self.fets_model.scheduler = inputs[0].fets_model.scheduler - self.fets_model.params = inputs[0].fets_model.params - - # Rebuild the model with the aggregated tensor_dict - local_tensor_dict = get_aggregated_dict_with_tensorname(self.agg_tensor_dict, self.current_round) - print(f'Local Tensor Dict : {local_tensor_dict.keys()}') - self.fets_model.rebuild_model(self.current_round, local_tensor_dict) - self.fets_model.save_native(f'checkpoint/{self.checkpoint_folder}/temp_model.pkl') + # Update the aggregator model and rebuild it with aggregated tensors + self._update_aggregator_model(inputs) self.next(self.internal_loop) @aggregator diff --git a/Task_1/fets_challenge/time_utils.py b/Task_1/fets_challenge/time_utils.py index 4b43014..95428ba 100644 --- a/Task_1/fets_challenge/time_utils.py +++ b/Task_1/fets_challenge/time_utils.py @@ -146,11 +146,8 @@ def compute_times_per_collaborator(collaborator_name, collaborator_time_stats, round_num): np.random.seed(round_num) - #times = {} - #for col in collaborator_names: time = 0 - print(f'Computing time for collaborator {collaborator_name}') # stats stats = collaborator_time_stats[collaborator_name] @@ -160,9 +157,6 @@ def compute_times_per_collaborator(collaborator_name, download_time = max(1, download_time) time += download_time - # data loader - #data = collaborator_data[collaborator_name] - # validation time data_size = collaborator_data.get_valid_data_size() validation_time_per = np.random.normal(loc=stats.validation_mean, @@ -194,6 +188,4 @@ def compute_times_per_collaborator(collaborator_name, scale=stats.upload_speed_std) upload_time = max(1, upload_time) time += upload_time - - #times[col] = time return time \ No newline at end of file From f29bd32f2166468606e5881b8765bb3ce72c107d Mon Sep 17 00:00:00 2001 From: "Agrawal, Kush" Date: Thu, 27 Mar 2025 00:30:49 -0700 Subject: [PATCH 16/16] Upgraded to OpenFL 1.7.1 Signed-off-by: Agrawal, Kush --- Task_1/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Task_1/setup.py b/Task_1/setup.py index 98ac63f..0f38375 100644 --- a/Task_1/setup.py +++ b/Task_1/setup.py @@ -27,7 +27,7 @@ ], include_package_data=True, install_requires=[ - 'openfl @ git+https://github.com/securefederatedai/openfl.git@6bbf9b62f97f50a06a9956eefacebf6d0a6cba4e', + 'openfl @ git+https://github.com/securefederatedai/openfl.git@v1.7.1', 'GANDLF @ git+https://github.com/CBICA/GaNDLF.git@4d614fe1de550ea4035b543b4c712ad564248106', 'fets @ git+https://github.com/FETS-AI/Algorithms.git@fets_challenge', ],