correct neighborhood masking and utils for adding regulons

AnneHartebrodt · AnneHartebrodt · commit e12c43b40d36 · 2026-01-29T16:22:34.000+01:00
diff --git a/src/netmap/masking/internal.py b/src/netmap/masking/internal.py
@@ -77,7 +77,7 @@ def create_pairwise_binary_mask(binary_matrix, gene_list):
 
     gene_pairs_indices = list(itertools.combinations(range(num_genes), 2))
     for g1_idx, g2_idx in gene_pairs_indices:
-        mask = binary_matrix[:, g1_idx] * binary_matrix[:, g2_idx]
+        mask = np.multiply(binary_matrix[:, g1_idx] , binary_matrix[:, g2_idx])
         key_fwd = f"{gene_list[g1_idx]}_{gene_list[g2_idx]}"
         pairwise_mask_dict[key_fwd] = mask
         key_rev = f"{gene_list[g2_idx]}_{gene_list[g1_idx]}"
@@ -102,13 +102,14 @@ def dict_to_dataframe(mask_dict, column_order_list):
         pd.DataFrame: A DataFrame with masks as columns, in the specified order.
     """
     # 1. Create a dictionary with only the ordered columns
-    ordered_data = {col: mask_dict[col] for col in column_order_list if col in mask_dict}
+    ordered_data = {col: np.asarray(mask_dict[col]).squeeze() for col in column_order_list if col in mask_dict}
     
     # 2. Check if all specified columns were found
     if len(ordered_data) != len(column_order_list):
         missing_columns = set(column_order_list) - set(ordered_data.keys())
         print(f"Warning: The following columns were not found in the mask dictionary: {missing_columns}")
 
+    print(ordered_data)
     # 3. Create the DataFrame from the ordered dictionary
     df = pd.DataFrame(ordered_data)
     
@@ -117,7 +118,7 @@ def dict_to_dataframe(mask_dict, column_order_list):
 def binarize_adata(adata, expression_threshold = 0):
 
     if issparse(adata.X):
-        binary_expression = (adata.X > expression_threshold).astype(int).tocsr()
+        binary_expression = (adata.X.todense() > expression_threshold).astype(int)
     else:
         binary_expression = (adata.X > expression_threshold).astype(int)
     return binary_expression
@@ -140,9 +141,11 @@ def add_neighbourhood_expression_mask(adata, grn_adata, strict=False):
         ne = get_neighborhood_expression(adata, required_neighbours=5)
     else:
         ne = binarize_adata(adata)
-    mask = create_pairwise_binary_mask(ne, adata.var.index)
+    mask = create_pairwise_binary_mask(ne, list(adata.var.index))
+    
     mask = dict_to_dataframe(mask, column_order_list = grn_adata.var.index)
     grn_adata.layers['mask'] = mask
+    grn_adata.var['count_nonzero'] = np.sum(grn_adata.layers['mask'], axis =0)
     return grn_adata
 
 
diff --git a/src/netmap/utils/data_utils.py b/src/netmap/utils/data_utils.py
@@ -2,6 +2,9 @@
 import os
 import os.path as op
 
+import pandas as pd
+import scipy.sparse
+
 def attribution_to_anndata(attribution_list, var = None, obs = None)-> anndata.AnnData:
 
     """
@@ -29,4 +32,38 @@ def create_output_directory(result_params):
 
 
 def save_anndata(adobj, result_params):
-    adobj.write( filename = op.join(result_params['output_directory'], result_params['adata_filename']))
+    adobj.write( filename = op.join(result_params['output_directory'], result_params['adata_filename']))
+
+
+
+def merge_all_to_obs(target_adata, source_adata, replace=True):
+    """
+    Takes all variables from source_adata and appends them as columns
+    to target_adata.obs for easy plotting.
+    """
+    if target_adata.n_obs != source_adata.n_obs:
+        raise ValueError("Cell counts do not match between objects.")
+
+
+    if scipy.sparse.issparse(source_adata.X):
+        source_data = source_adata.X.toarray()
+    else:
+        source_data = source_adata.X
+
+    # Create a DataFrame from the source data
+    source_df = pd.DataFrame(
+        source_data, 
+        index=source_adata.obs_names, 
+        columns=source_adata.var_names
+    )
+
+    # Check if regulon cols are already present, and delte all regulon columns
+    if len(set(target_adata.obs.columns).intersection(list(source_df.columns)))>0:
+        if replace:
+            spike_cols = [col for col in target_adata.obs.columns if 'regulon' in col]
+            target_adata.obs = target_adata.obs.drop(columns = spike_cols)
+            target_adata.obs = pd.concat([target_adata.obs, source_df], axis=1)
+        else:
+            print('Regulon columns where present and not replaced.')
+
+    return target_adata