From ec60c3a2fe1b8e267c2a65dbd11aea080e349c28 Mon Sep 17 00:00:00 2001 From: Florian Huber <36473328+florian-huber@users.noreply.github.com> Date: Thu, 17 Aug 2023 22:07:01 +0200 Subject: [PATCH 1/2] Update hyperparameters for larger MS2DeepScore model --- ms2query/create_new_library/train_ms2deepscore.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ms2query/create_new_library/train_ms2deepscore.py b/ms2query/create_new_library/train_ms2deepscore.py index e279e760..82626488 100644 --- a/ms2query/create_new_library/train_ms2deepscore.py +++ b/ms2query/create_new_library/train_ms2deepscore.py @@ -43,7 +43,7 @@ def train_ms2ds_model(training_spectra, reference_scores_df=tanimoto_df, dim=len(spectrum_binner.known_bins), # The number of bins created same_prob_bins=same_prob_bins, - num_turns=2, + num_turns=1, augment_noise_max=10, augment_noise_intensity=0.01) @@ -58,9 +58,9 @@ def train_ms2ds_model(training_spectra, augment_removal_max=0, augment_removal_intensity=0, augment_intensity=0, augment_noise_max=0, use_fixed_set=True ) - model = SiameseModel(spectrum_binner, base_dims=(500, 500), embedding_dim=200, dropout_rate=0.2) + model = SiameseModel(spectrum_binner, base_dims=(1000, 1000, 1000), embedding_dim=500, dropout_rate=0.2) - model.compile(loss='mse', optimizer=Adam(lr=0.001), metrics=["mae", tf.keras.metrics.RootMeanSquaredError()]) + model.compile(loss='mse', optimizer=Adam(lr=0.0005), metrics=["mae", tf.keras.metrics.RootMeanSquaredError()]) # Save best model and include early stopping checkpointer = ModelCheckpoint(filepath=output_model_file_name, monitor='val_loss', mode="min", verbose=1, save_best_only=True) From d7eb20229194b0777c7304b300692978ff72f857 Mon Sep 17 00:00:00 2001 From: Florian Huber <36473328+florian-huber@users.noreply.github.com> Date: Sun, 20 Aug 2023 08:30:28 +0200 Subject: [PATCH 2/2] Update calculate_tanimoto_scores.py --- ms2query/create_new_library/calculate_tanimoto_scores.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ms2query/create_new_library/calculate_tanimoto_scores.py b/ms2query/create_new_library/calculate_tanimoto_scores.py index 38ed59c6..c604930f 100644 --- a/ms2query/create_new_library/calculate_tanimoto_scores.py +++ b/ms2query/create_new_library/calculate_tanimoto_scores.py @@ -15,7 +15,11 @@ def get_fingerprint(smiles: str): - fingerprint = np.array(Chem.RDKFingerprint(Chem.MolFromSmiles(smiles), fpSize=2048)) + try: + fingerprint = np.array(Chem.RDKFingerprint(Chem.MolFromSmiles(smiles), fpSize=2048)) + except: + # TODO: this is to avoid workflows breaking because of an incorrect smiles. Should be handled better. + fingerprint = np.zeros(2048) assert isinstance(fingerprint, np.ndarray), \ f"Fingerprint for 1 spectrum could not be set smiles is {smiles}" return fingerprint