1111import itertools
1212
1313
14- def get_neighborhood_expression (adata , knn_neighbours = 10 , required_neighbours = 1 ):
14+ def get_neighborhood_expression (adata , knn_neighbours = 10 , required_neighbours = 1 , expression_threshold = 0 ):
1515 """
1616 Checks if each gene is expressed in the k-nearest neighbors (kNN) of each cell.
1717
@@ -33,12 +33,7 @@ def get_neighborhood_expression(adata, knn_neighbours =10, required_neighbours =
3333 connectivities = adata .obsp ['connectivities' ].copy ()
3434 connectivities .data = np .ones (connectivities .data .shape ) # Binarize the graph
3535
36- # Get the binary gene expression matrix (cells x genes)
37- if issparse (adata .X ):
38- binary_expression = (adata .X > 10 ).astype (int ).tocsr ()
39- else :
40- binary_expression = (adata .X > 10 ).astype (int )
41-
36+ binary_expression = binarize_adata (adata , expression_threshold = expression_threshold )
4237 # Perform matrix multiplication to check for neighbor expression
4338 # connectivities (cells x cells) @ binary_expression (cells x genes)
4439 # The result is a matrix where each value is the number of neighbors
@@ -90,6 +85,8 @@ def create_pairwise_binary_mask(binary_matrix, gene_list):
9085
9186 return pairwise_mask_dict
9287
88+
89+
9390def dict_to_dataframe (mask_dict , column_order_list ):
9491 """
9592 Converts a dictionary of binary masks into a pandas DataFrame,
@@ -117,37 +114,17 @@ def dict_to_dataframe(mask_dict, column_order_list):
117114
118115 return df
119116
117+ def binarize_adata (adata , expression_threshold = 0 ):
120118
119+ if issparse (adata .X ):
120+ binary_expression = (adata .X > expression_threshold ).astype (int ).tocsr ()
121+ else :
122+ binary_expression = (adata .X > expression_threshold ).astype (int )
123+ return binary_expression
121124
122- def dict_to_dataframe (mask_dict , column_order_list ):
123- """
124- Converts a dictionary of binary masks into a pandas DataFrame,
125- respecting a specified column order.
126-
127- Args:
128- mask_dict (dict): A dictionary where keys are gene pair strings and
129- values are 1D numpy arrays (the masks).
130- column_order_list (list): A list of gene pair strings specifying the
131- desired order of the DataFrame columns.
132-
133- Returns:
134- pd.DataFrame: A DataFrame with masks as columns, in the specified order.
135- """
136- # 1. Create a dictionary with only the ordered columns
137- ordered_data = {col : mask_dict [col ] for col in column_order_list if col in mask_dict }
138-
139- # 2. Check if all specified columns were found
140- if len (ordered_data ) != len (column_order_list ):
141- missing_columns = set (column_order_list ) - set (ordered_data .keys ())
142- print (f"Warning: The following columns were not found in the mask dictionary: { missing_columns } " )
143-
144- # 3. Create the DataFrame from the ordered dictionary
145- df = pd .DataFrame (ordered_data )
146-
147- return df
148125
149126
150- def add_neighbourhood_expression_mask (adata , grn_adata ):
127+ def add_neighbourhood_expression_mask (adata , grn_adata , strict = False ):
151128 """ Create a mask indicating whether the edge is likely actually
152129 expressed or not.
153130
@@ -160,7 +137,11 @@ def add_neighbourhood_expression_mask(adata, grn_adata):
160137 """
161138 counts = pd .DataFrame (adata .X )
162139 counts .columns = adata .var .index
163- ne = get_neighborhood_expression (adata , required_neighbours = 5 )
140+ # binarized matrix = ne
141+ if not strict :
142+ ne = get_neighborhood_expression (adata , required_neighbours = 5 )
143+ else :
144+ ne = binarize_adata (adata )
164145 mask = create_pairwise_binary_mask (ne , counts .columns )
165146 mask = dict_to_dataframe (mask , column_order_list = grn_adata .var .index )
166147 grn_adata .layers ['mask' ] = mask
0 commit comments