From 55c72f5eec7a0858aa15daee5289a76df6aa3210 Mon Sep 17 00:00:00 2001
From: Monimu <monimu@gmx.de>
Date: Wed, 4 Feb 2026 09:56:02 +0100
Subject: [PATCH 1/5] Change in clustering,  handling NANs for calculating SI
 and CH

handling NANs when averaging SI and CH and saving the SI and CH values of the null distribution
---
 jale/core/utils/hierarchical.py | 40 +++++++++++++++++++++++++--------
 1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/jale/core/utils/hierarchical.py b/jale/core/utils/hierarchical.py
index 6279683..5b026ef 100644
--- a/jale/core/utils/hierarchical.py
+++ b/jale/core/utils/hierarchical.py
@@ -89,8 +89,10 @@ def hierarchical_clustering_pipeline(
         project_path=project_path,
         meta_name=meta_name,
         silhouette_scores=silhouette_scores,
+        null_silhouette_scores=null_silhouette_scores,
         silhouette_scores_z=silhouette_scores_z,
         calinski_harabasz_scores=calinski_harabasz_scores,
+        null_calinski_harabasz_scores=null_calinski_harabasz_scores,
         calinski_harabasz_scores_z=calinski_harabasz_scores_z,
         exp_separation_density=exp_separation_density,
         correlation_type=correlation_type,
@@ -299,8 +301,8 @@ def compute_hierarchical_clustering(correlation_matrix, k, linkage_method):
     Z = linkage(condensed_distance, method=linkage_method)
     cluster_labels = fcluster(Z, k, criterion="maxclust")
 
-    # Safeguard: If only one unique cluster label, set metrics to np.nan
-    if len(np.unique(cluster_labels)) < 2:
+    # Safeguard: If number of cluster labels is smaller than k, set metrics to np.nan
+    if len(np.unique(cluster_labels)) < k:
         silhouette = np.nan
         calinski_harabasz = np.nan
     else:
@@ -328,21 +330,21 @@ def compute_hc_metrics_z(
     def pooled_std(sample1, sample2):
         """Compute the pooled standard deviation of two samples."""
         n1, n2 = sample1.shape[1], sample2.shape[1]
-        var1, var2 = np.var(sample1, axis=1, ddof=1), np.var(sample2, axis=1, ddof=1)
+        var1, var2 = np.nanvar(sample1, axis=1, ddof=1), np.nanvar(sample2, axis=1, ddof=1)
         return np.sqrt(((n1 - 1) * var1 + (n2 - 1) * var2) / (n1 + n2 - 2))
 
-    silhouette_scores_avg = np.average(silhouette_scores, axis=1)
-    null_silhouette_scores_avg = np.average(null_silhouette_scores, axis=1)
+    silhouette_scores_avg = np.nanmean(silhouette_scores, axis=1)
+    null_silhouette_scores_avg = np.nanmean(null_silhouette_scores, axis=1)
 
     if use_pooled_std:
         silhouette_std = pooled_std(silhouette_scores, null_silhouette_scores)
     else:
-        silhouette_std = np.std(null_silhouette_scores, axis=1, ddof=1)
+        silhouette_std = np.nanstd(null_silhouette_scores, axis=1, ddof=1)
 
     silhouette_z = (silhouette_scores_avg - null_silhouette_scores_avg) / silhouette_std
 
-    calinski_harabasz_scores_avg = np.average(calinski_harabasz_scores, axis=1)
-    null_calinski_harabasz_scores_avg = np.average(
+    calinski_harabasz_scores_avg = np.nanmean(calinski_harabasz_scores, axis=1)
+    null_calinski_harabasz_scores_avg = np.nanmean(
         null_calinski_harabasz_scores, axis=1
     )
 
@@ -351,7 +353,7 @@ def pooled_std(sample1, sample2):
             calinski_harabasz_scores, null_calinski_harabasz_scores
         )
     else:
-        calinski_harabasz_std = np.std(null_calinski_harabasz_scores, axis=1, ddof=1)
+        calinski_harabasz_std = np.nanstd(null_calinski_harabasz_scores, axis=1, ddof=1)
 
     calinski_harabasz_z = (
         calinski_harabasz_scores_avg - null_calinski_harabasz_scores_avg
@@ -435,8 +437,10 @@ def save_hc_metrics(
     project_path,
     meta_name,
     silhouette_scores,
+    null_silhouette_scores,
     silhouette_scores_z,
     calinski_harabasz_scores,
+    null_calinski_harabasz_scores,
     calinski_harabasz_scores_z,
     exp_separation_density,
     correlation_type,
@@ -448,9 +452,13 @@ def save_hc_metrics(
             "Number of Clusters": range(2, max_k + 1),
             "Silhouette Scores": np.average(silhouette_scores, axis=1),
             "Silhouette Scores SD": np.std(silhouette_scores, axis=1),
+            "Silhouette Scores NULL": np.nanmean(null_silhouette_scores, axis=1),
+            "Silhouette Scores NULL SD": np.nanstd(null_silhouette_scores, axis=1),
             "Silhouette Scores Z": silhouette_scores_z,
             "Calinski-Harabasz Scores": np.average(calinski_harabasz_scores, axis=1),
             "Calinski-Harabasz Scores SD": np.std(calinski_harabasz_scores, axis=1),
+            "Calinski-Harabasz Scores NULL": np.nanmean(null_calinski_harabasz_scores, axis=1),
+            "Calinski-Harabasz Scores NULL SD": np.nanstd(null_calinski_harabasz_scores, axis=1),
             "Calinski-Harabasz Scores Z": calinski_harabasz_scores_z,
             # Pad with NaN for k=2 as metrics start at k=3
             "Experiment Separation Density": np.concatenate(
@@ -471,6 +479,13 @@ def save_hc_metrics(
         header=[f"k={k}" for k in range(2, max_k + 1)],
     )
 
+     pd.DataFrame(null_silhouette_scores.T).to_csv(
+        project_path
+        / f"Results/MA_Clustering/metrics/{meta_name}_NULLsilhouette_scores_{correlation_type}_hc_{linkage_method}.csv",
+        index=False,
+        header=[f"k={k}" for k in range(2, max_k + 1)],
+    )
+
     pd.DataFrame(calinski_harabasz_scores.T).to_csv(
         project_path
         / f"Results/MA_Clustering/metrics/{meta_name}_calinski_harabasz_scores_{correlation_type}_hc_{linkage_method}.csv",
@@ -478,6 +493,13 @@ def save_hc_metrics(
         header=[f"k={k}" for k in range(2, max_k + 1)],
     )
 
+     pd.DataFrame(null_calinski_harabasz_scores.T).to_csv(
+        project_path
+        / f"Results/MA_Clustering/metrics/{meta_name}_NULLcalinski_harabasz_scores_{correlation_type}_hc_{linkage_method}.csv",
+        index=False,
+        header=[f"k={k}" for k in range(2, max_k + 1)],
+    )
+
     pd.DataFrame(exp_separation_density.T).to_csv(
         project_path
         / f"Results/MA_Clustering/metrics/{meta_name}_exp_separation_density_{correlation_type}_hc_{linkage_method}.csv",

From 9e094628c1760199312ea59a1ca2eeb8c4c26e0f Mon Sep 17 00:00:00 2001
From: Monimu <monimu@gmx.de>
Date: Thu, 5 Feb 2026 17:40:44 +0100
Subject: [PATCH 2/5] fixed indent errors

---
 jale/core/utils/hierarchical.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/jale/core/utils/hierarchical.py b/jale/core/utils/hierarchical.py
index 5b026ef..91db4da 100644
--- a/jale/core/utils/hierarchical.py
+++ b/jale/core/utils/hierarchical.py
@@ -479,7 +479,7 @@ def save_hc_metrics(
         header=[f"k={k}" for k in range(2, max_k + 1)],
     )
 
-     pd.DataFrame(null_silhouette_scores.T).to_csv(
+    pd.DataFrame(null_silhouette_scores.T).to_csv(
         project_path
         / f"Results/MA_Clustering/metrics/{meta_name}_NULLsilhouette_scores_{correlation_type}_hc_{linkage_method}.csv",
         index=False,
@@ -493,7 +493,7 @@ def save_hc_metrics(
         header=[f"k={k}" for k in range(2, max_k + 1)],
     )
 
-     pd.DataFrame(null_calinski_harabasz_scores.T).to_csv(
+    pd.DataFrame(null_calinski_harabasz_scores.T).to_csv(
         project_path
         / f"Results/MA_Clustering/metrics/{meta_name}_NULLcalinski_harabasz_scores_{correlation_type}_hc_{linkage_method}.csv",
         index=False,

From 8e4eba35fe74afd5c9c9d551b15798a5cc29bf64 Mon Sep 17 00:00:00 2001
From: Monimu <monimu@gmx.de>
Date: Wed, 11 Feb 2026 16:30:07 +0100
Subject: [PATCH 3/5] Update hierarchical.py

---
 jale/core/utils/hierarchical.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jale/core/utils/hierarchical.py b/jale/core/utils/hierarchical.py
index 91db4da..09592fb 100644
--- a/jale/core/utils/hierarchical.py
+++ b/jale/core/utils/hierarchical.py
@@ -302,7 +302,7 @@ def compute_hierarchical_clustering(correlation_matrix, k, linkage_method):
     cluster_labels = fcluster(Z, k, criterion="maxclust")
 
     # Safeguard: If number of cluster labels is smaller than k, set metrics to np.nan
-    if len(np.unique(cluster_labels)) < k:
+    if len(np.unique(cluster_labels)) < 2:
         silhouette = np.nan
         calinski_harabasz = np.nan
     else:

From cd2160dd88905c284f243a99499f217e28467ddf Mon Sep 17 00:00:00 2001
From: Monimu <monimu@gmx.de>
Date: Thu, 12 Feb 2026 09:01:59 +0100
Subject: [PATCH 4/5] change of programming language pyproject.toml to python
 3.11 only

---
 pyproject.toml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index e127593..05c4bc3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,13 +7,12 @@ name = "jale"
 version = "0.2.9"
 description = "Package allowing users to run Activation Likelihood Estimation Meta-Analysis"
 readme = "README.md"
-requires-python = ">=3.9, <3.13"
+requires-python = ">=3.9, <3.12"
 license = "MIT"
 authors = [{ name = "Lennart Frahm", email = "l.frahm@mailbox.org" }]
 classifiers = [
     "Programming Language :: Python :: 3",
     "Programming Language :: Python :: 3.11",
-    "Programming Language :: Python :: 3.12",
     "Operating System :: OS Independent",
 ]
 dependencies = [

From cd445a9c725c5393444583749eeb2b29d75ac5fa Mon Sep 17 00:00:00 2001
From: Monimu <monimu@gmx.de>
Date: Thu, 12 Feb 2026 10:54:45 +0100
Subject: [PATCH 5/5] Update ci.yml remove python 3.12

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a14bda4..175c399 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -16,7 +16,7 @@ jobs:
 
     strategy:
       matrix:
-        python-version: [3.11, 3.12]
+        python-version: [3.11]
 
     steps:
       - uses: actions/checkout@v3