Skip to content

Commit ee93617

Browse files
fix internal masking expression threshold
1 parent f69606b commit ee93617

1 file changed

Lines changed: 16 additions & 35 deletions

File tree

src/netmap/masking/internal.py

Lines changed: 16 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import itertools
1212

1313

14-
def get_neighborhood_expression(adata, knn_neighbours =10, required_neighbours = 1):
14+
def get_neighborhood_expression(adata, knn_neighbours =10, required_neighbours = 1, expression_threshold = 0):
1515
"""
1616
Checks if each gene is expressed in the k-nearest neighbors (kNN) of each cell.
1717
@@ -33,12 +33,7 @@ def get_neighborhood_expression(adata, knn_neighbours =10, required_neighbours =
3333
connectivities = adata.obsp['connectivities'].copy()
3434
connectivities.data = np.ones(connectivities.data.shape) # Binarize the graph
3535

36-
# Get the binary gene expression matrix (cells x genes)
37-
if issparse(adata.X):
38-
binary_expression = (adata.X > 10).astype(int).tocsr()
39-
else:
40-
binary_expression = (adata.X > 10).astype(int)
41-
36+
binary_expression = binarize_adata(adata, expression_threshold = expression_threshold)
4237
# Perform matrix multiplication to check for neighbor expression
4338
# connectivities (cells x cells) @ binary_expression (cells x genes)
4439
# The result is a matrix where each value is the number of neighbors
@@ -90,6 +85,8 @@ def create_pairwise_binary_mask(binary_matrix, gene_list):
9085

9186
return pairwise_mask_dict
9287

88+
89+
9390
def dict_to_dataframe(mask_dict, column_order_list):
9491
"""
9592
Converts a dictionary of binary masks into a pandas DataFrame,
@@ -117,37 +114,17 @@ def dict_to_dataframe(mask_dict, column_order_list):
117114

118115
return df
119116

117+
def binarize_adata(adata, expression_threshold = 0):
120118

119+
if issparse(adata.X):
120+
binary_expression = (adata.X > expression_threshold).astype(int).tocsr()
121+
else:
122+
binary_expression = (adata.X > expression_threshold).astype(int)
123+
return binary_expression
121124

122-
def dict_to_dataframe(mask_dict, column_order_list):
123-
"""
124-
Converts a dictionary of binary masks into a pandas DataFrame,
125-
respecting a specified column order.
126-
127-
Args:
128-
mask_dict (dict): A dictionary where keys are gene pair strings and
129-
values are 1D numpy arrays (the masks).
130-
column_order_list (list): A list of gene pair strings specifying the
131-
desired order of the DataFrame columns.
132-
133-
Returns:
134-
pd.DataFrame: A DataFrame with masks as columns, in the specified order.
135-
"""
136-
# 1. Create a dictionary with only the ordered columns
137-
ordered_data = {col: mask_dict[col] for col in column_order_list if col in mask_dict}
138-
139-
# 2. Check if all specified columns were found
140-
if len(ordered_data) != len(column_order_list):
141-
missing_columns = set(column_order_list) - set(ordered_data.keys())
142-
print(f"Warning: The following columns were not found in the mask dictionary: {missing_columns}")
143-
144-
# 3. Create the DataFrame from the ordered dictionary
145-
df = pd.DataFrame(ordered_data)
146-
147-
return df
148125

149126

150-
def add_neighbourhood_expression_mask(adata, grn_adata):
127+
def add_neighbourhood_expression_mask(adata, grn_adata, strict=False):
151128
""" Create a mask indicating whether the edge is likely actually
152129
expressed or not.
153130
@@ -160,7 +137,11 @@ def add_neighbourhood_expression_mask(adata, grn_adata):
160137
"""
161138
counts = pd.DataFrame(adata.X)
162139
counts.columns =adata.var.index
163-
ne = get_neighborhood_expression(adata, required_neighbours=5)
140+
# binarized matrix = ne
141+
if not strict:
142+
ne = get_neighborhood_expression(adata, required_neighbours=5)
143+
else:
144+
ne = binarize_adata(adata)
164145
mask = create_pairwise_binary_mask(ne, counts.columns)
165146
mask = dict_to_dataframe(mask, column_order_list = grn_adata.var.index)
166147
grn_adata.layers['mask'] = mask

0 commit comments

Comments
 (0)