From 32c7e364f85487d0f4f41749e6f84767937c2852 Mon Sep 17 00:00:00 2001 From: tgiani Date: Tue, 23 Sep 2025 12:08:50 +0200 Subject: [PATCH 1/8] reading different hyperparameters for each replica --- n3fit/src/n3fit/model_trainer.py | 47 +++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py index a8b7b95bee..0d377b04fe 100644 --- a/n3fit/src/n3fit/model_trainer.py +++ b/n3fit/src/n3fit/model_trainer.py @@ -12,6 +12,7 @@ from collections import namedtuple from itertools import zip_longest import logging +import json import numpy as np @@ -859,6 +860,12 @@ def hyperparametrizable(self, params): for key in self._hyperkeys: log.info(" > > Testing %s = %s", key, params[key]) params = self._hyperopt_override(params) + + # if not doing hyperot, read the input hyperopt file containing + # different samples + else: + with open(params['hyperopt_res'], 'r') as file: + hyperopt_params = json.load(file) # Preprocess some hyperparameters epochs = int(params["epochs"]) @@ -905,19 +912,33 @@ def hyperparametrizable(self, params): # Prepare the settings for all replica replicas_settings = [] - for seed in self._nn_seeds: - # WIP here the sampling will happen when necessary - tmp = model_gen.ReplicaSettings( - seed=seed, - nodes=params["nodes_per_layer"], - activations=params["activation_per_layer"], - initializer=params["initializer"], - architecture=params["layer_type"], - dropout_rate=params["dropout"], - regularizer=params.get("regularizer"), - regularizer_args=params.get("regularizer_args"), - ) - replicas_settings.append(tmp) + if self.mode_hyperopt: + for seed in self._nn_seeds: + tmp = model_gen.ReplicaSettings( + seed=seed, + nodes=params["nodes_per_layer"], + activations=params["activation_per_layer"], + initializer=params["initializer"], + architecture=params["layer_type"], + dropout_rate=params["dropout"], + regularizer=params.get("regularizer"), + regularizer_args=params.get("regularizer_args"), + ) + replicas_settings.append(tmp) + else: + # read hyperparameter values from hyperopt results + for rep, seed in zip(self.replicas, self._nn_seeds): + tmp = model_gen.ReplicaSettings( + seed=seed, + nodes=hyperopt_params["nodes_per_layer"][rep], + activations=[hyperopt_params["activation_per_layer"][rep]] * len(hyperopt_params["nodes_per_layer"][rep]), + initializer=hyperopt_params["initializer"][rep], + architecture=hyperopt_params["layer_type"][rep], + dropout_rate=hyperopt_params["dropout"][rep], + regularizer=params.get("regularizer"), + regularizer_args=params.get("regularizer_args"), + ) + replicas_settings.append(tmp) ### Training loop for k, partition in enumerate(self.kpartitions): From d09953602b14792e8c483ed4a05701983dfffdbc Mon Sep 17 00:00:00 2001 From: tgiani Date: Fri, 26 Sep 2025 11:06:37 +0200 Subject: [PATCH 2/8] setting activation function for last layer to be linear --- n3fit/src/n3fit/model_trainer.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py index 0d377b04fe..45b74e7e64 100644 --- a/n3fit/src/n3fit/model_trainer.py +++ b/n3fit/src/n3fit/model_trainer.py @@ -928,10 +928,15 @@ def hyperparametrizable(self, params): else: # read hyperparameter values from hyperopt results for rep, seed in zip(self.replicas, self._nn_seeds): + + activations = [hyperopt_params["activation_per_layer"][rep]] * (len(hyperopt_params["nodes_per_layer"][rep])-1) + # last layer activation is always linear + activations.append('linear') + tmp = model_gen.ReplicaSettings( seed=seed, nodes=hyperopt_params["nodes_per_layer"][rep], - activations=[hyperopt_params["activation_per_layer"][rep]] * len(hyperopt_params["nodes_per_layer"][rep]), + activations=activations, initializer=hyperopt_params["initializer"][rep], architecture=hyperopt_params["layer_type"][rep], dropout_rate=hyperopt_params["dropout"][rep], From 341aae48727871a2745ff88d9c4e00f96ab61f22 Mon Sep 17 00:00:00 2001 From: tgiani Date: Mon, 29 Sep 2025 08:39:13 +0200 Subject: [PATCH 3/8] adding the sampling for optimizer hyperparmeters. Probably wrong --- n3fit/src/n3fit/model_trainer.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py index 45b74e7e64..aa12e56e69 100644 --- a/n3fit/src/n3fit/model_trainer.py +++ b/n3fit/src/n3fit/model_trainer.py @@ -1012,10 +1012,21 @@ def hyperparametrizable(self, params): threshold_positivity=threshold_pos, threshold_chi2=threshold_chi2, ) + + if self.mode_hyperopt: + # Compile each of the models with the right parameters + for model in models.values(): + model.compile(**params["optimizer"]) + else: + # Proper way of doing this? Not sure how optimizer parameters should be treated + optimizer_params = {} + optimizer_params["clipnorm"] = hyperopt_params['clipnorm'][self.replicas[0]] + optimizer_params["learning_rate"] = hyperopt_params['learning_rate'][self.replicas[0]] + optimizer_params["optimizer_name"] = hyperopt_params['optimizer'][self.replicas[0]] + + for model in models.values(): + model.compile(**optimizer_params) - # Compile each of the models with the right parameters - for model in models.values(): - model.compile(**params["optimizer"]) self._train_and_fit(models["training"], stopping_object, epochs=epochs) From 5cbf953bfe57f33063ec8bf89485706452e8696f Mon Sep 17 00:00:00 2001 From: tgiani Date: Fri, 24 Oct 2025 10:27:36 +0200 Subject: [PATCH 4/8] no optimizer hyperparams in the sampling for the replicas --- n3fit/src/n3fit/model_trainer.py | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py index aa12e56e69..bdc6cd23c0 100644 --- a/n3fit/src/n3fit/model_trainer.py +++ b/n3fit/src/n3fit/model_trainer.py @@ -1013,21 +1013,10 @@ def hyperparametrizable(self, params): threshold_chi2=threshold_chi2, ) - if self.mode_hyperopt: - # Compile each of the models with the right parameters - for model in models.values(): - model.compile(**params["optimizer"]) - else: - # Proper way of doing this? Not sure how optimizer parameters should be treated - optimizer_params = {} - optimizer_params["clipnorm"] = hyperopt_params['clipnorm'][self.replicas[0]] - optimizer_params["learning_rate"] = hyperopt_params['learning_rate'][self.replicas[0]] - optimizer_params["optimizer_name"] = hyperopt_params['optimizer'][self.replicas[0]] - - for model in models.values(): - model.compile(**optimizer_params) - - + # Compile each of the models with the right parameters + for model in models.values(): + model.compile(**params["optimizer"]) + self._train_and_fit(models["training"], stopping_object, epochs=epochs) if self.mode_hyperopt: From 3c8d0e515ba058ae2fa734007ac296bea1977c8a Mon Sep 17 00:00:00 2001 From: tgiani Date: Fri, 24 Oct 2025 10:38:14 +0200 Subject: [PATCH 5/8] small fix --- n3fit/src/n3fit/model_trainer.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py index bdc6cd23c0..c590ec6a16 100644 --- a/n3fit/src/n3fit/model_trainer.py +++ b/n3fit/src/n3fit/model_trainer.py @@ -928,18 +928,18 @@ def hyperparametrizable(self, params): else: # read hyperparameter values from hyperopt results for rep, seed in zip(self.replicas, self._nn_seeds): - - activations = [hyperopt_params["activation_per_layer"][rep]] * (len(hyperopt_params["nodes_per_layer"][rep])-1) + import pdb; pdb.set_trace() + activations = [hyperopt_params["activation_per_layer"][rep-1]] * (len(hyperopt_params["nodes_per_layer"][rep-1])-1) # last layer activation is always linear activations.append('linear') tmp = model_gen.ReplicaSettings( seed=seed, - nodes=hyperopt_params["nodes_per_layer"][rep], + nodes=hyperopt_params["nodes_per_layer"][rep-1], activations=activations, - initializer=hyperopt_params["initializer"][rep], - architecture=hyperopt_params["layer_type"][rep], - dropout_rate=hyperopt_params["dropout"][rep], + initializer=hyperopt_params["initializer"][rep-1], + architecture=hyperopt_params["layer_type"][rep-1], + dropout_rate=hyperopt_params["dropout"][rep-1], regularizer=params.get("regularizer"), regularizer_args=params.get("regularizer_args"), ) From b9b92f486b2e74291d06714f58e14994993c2098 Mon Sep 17 00:00:00 2001 From: tgiani Date: Fri, 7 Nov 2025 08:56:03 +0100 Subject: [PATCH 6/8] removing pdb --- n3fit/src/n3fit/model_trainer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py index c590ec6a16..c9e6ec5860 100644 --- a/n3fit/src/n3fit/model_trainer.py +++ b/n3fit/src/n3fit/model_trainer.py @@ -928,7 +928,6 @@ def hyperparametrizable(self, params): else: # read hyperparameter values from hyperopt results for rep, seed in zip(self.replicas, self._nn_seeds): - import pdb; pdb.set_trace() activations = [hyperopt_params["activation_per_layer"][rep-1]] * (len(hyperopt_params["nodes_per_layer"][rep-1])-1) # last layer activation is always linear activations.append('linear') From 66bf666a9ea9d8126bdb93f16809e6845081158f Mon Sep 17 00:00:00 2001 From: tgiani Date: Thu, 20 Nov 2025 10:40:03 +0100 Subject: [PATCH 7/8] change also optimizer hyperparameters when running a fit --- n3fit/src/n3fit/model_trainer.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py index c9e6ec5860..92e73e3e17 100644 --- a/n3fit/src/n3fit/model_trainer.py +++ b/n3fit/src/n3fit/model_trainer.py @@ -868,9 +868,15 @@ def hyperparametrizable(self, params): hyperopt_params = json.load(file) # Preprocess some hyperparameters - epochs = int(params["epochs"]) - stopping_patience = params["stopping_patience"] - stopping_epochs = int(epochs * stopping_patience) + if self.mode_hyperopt: + epochs = int(params["epochs"]) + stopping_patience = params["stopping_patience"] + stopping_epochs = int(epochs * stopping_patience) + else: + epochs = int(hyperopt_params["epochs"][self.replicas[0]-1]) + stopping_patience = hyperopt_params["stopping_patience"][self.replicas[0]-1] + stopping_epochs = int(epochs * stopping_patience) + # Fill the 3 dictionaries (training, validation, experimental) with the layers and losses # when k-folding, these are the same for all folds @@ -1012,9 +1018,19 @@ def hyperparametrizable(self, params): threshold_chi2=threshold_chi2, ) - # Compile each of the models with the right parameters - for model in models.values(): - model.compile(**params["optimizer"]) + if self.mode_hyperopt: + # Compile each of the models with the right parameters + for model in models.values(): + model.compile(**params["optimizer"]) + else: + # Proper way of doing this? Not sure how optimizer parameters should be treated + optimizer_params = {} + optimizer_params["clipnorm"] = hyperopt_params['clipnorm'][self.replicas[0]-1] + optimizer_params["learning_rate"] = hyperopt_params['learning_rate'][self.replicas[0]-1] + optimizer_params["optimizer_name"] = hyperopt_params['optimizer'][self.replicas[0]-1] + + for model in models.values(): + model.compile(**optimizer_params) self._train_and_fit(models["training"], stopping_object, epochs=epochs) From 2e3137d1114fb152bee0253f4013be3662e0ff57 Mon Sep 17 00:00:00 2001 From: tgiani Date: Thu, 4 Dec 2025 11:06:54 +0100 Subject: [PATCH 8/8] temporary way to account for 10 best trials --- n3fit/src/n3fit/model_trainer.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py index 92e73e3e17..9b31a8f537 100644 --- a/n3fit/src/n3fit/model_trainer.py +++ b/n3fit/src/n3fit/model_trainer.py @@ -873,8 +873,9 @@ def hyperparametrizable(self, params): stopping_patience = params["stopping_patience"] stopping_epochs = int(epochs * stopping_patience) else: - epochs = int(hyperopt_params["epochs"][self.replicas[0]-1]) - stopping_patience = hyperopt_params["stopping_patience"][self.replicas[0]-1] + idx_hyperparamters = self.replicas[0]%10 + epochs = int(hyperopt_params["epochs"][idx_hyperparamters]) + stopping_patience = hyperopt_params["stopping_patience"][idx_hyperparamters] stopping_epochs = int(epochs * stopping_patience) @@ -934,17 +935,18 @@ def hyperparametrizable(self, params): else: # read hyperparameter values from hyperopt results for rep, seed in zip(self.replicas, self._nn_seeds): - activations = [hyperopt_params["activation_per_layer"][rep-1]] * (len(hyperopt_params["nodes_per_layer"][rep-1])-1) + idx_hyperparamters = rep%10 + activations = [hyperopt_params["activation_per_layer"][idx_hyperparamters]] * (len(hyperopt_params["nodes_per_layer"][idx_hyperparamters])-1) # last layer activation is always linear activations.append('linear') tmp = model_gen.ReplicaSettings( seed=seed, - nodes=hyperopt_params["nodes_per_layer"][rep-1], + nodes=hyperopt_params["nodes_per_layer"][idx_hyperparamters], activations=activations, - initializer=hyperopt_params["initializer"][rep-1], - architecture=hyperopt_params["layer_type"][rep-1], - dropout_rate=hyperopt_params["dropout"][rep-1], + initializer=hyperopt_params["initializer"][idx_hyperparamters], + architecture=hyperopt_params["layer_type"][idx_hyperparamters], + dropout_rate=hyperopt_params["dropout"][idx_hyperparamters], regularizer=params.get("regularizer"), regularizer_args=params.get("regularizer_args"), ) @@ -1024,11 +1026,11 @@ def hyperparametrizable(self, params): model.compile(**params["optimizer"]) else: # Proper way of doing this? Not sure how optimizer parameters should be treated + idx_hyperparamters = self.replicas[0]%10 optimizer_params = {} - optimizer_params["clipnorm"] = hyperopt_params['clipnorm'][self.replicas[0]-1] - optimizer_params["learning_rate"] = hyperopt_params['learning_rate'][self.replicas[0]-1] - optimizer_params["optimizer_name"] = hyperopt_params['optimizer'][self.replicas[0]-1] - + optimizer_params["clipnorm"] = hyperopt_params['clipnorm'][idx_hyperparamters] + optimizer_params["learning_rate"] = hyperopt_params['learning_rate'][idx_hyperparamters] + optimizer_params["optimizer_name"] = hyperopt_params['optimizer'][idx_hyperparamters] for model in models.values(): model.compile(**optimizer_params)