-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcsv_inference.py
More file actions
119 lines (97 loc) · 4.85 KB
/
csv_inference.py
File metadata and controls
119 lines (97 loc) · 4.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# workflow_step_wise_supervised_inference
# tested and running 2026-03-05
print('load scripts and data...')
import os
import numpy as np
from flagx.io import FlowDataManager, export_to_fcs
from flagx.gating import SomClassifier, MLPClassifier
from flagx.dimred import UMAP, TSNE
# --- Define path where results are saved to
save_path = './results/workflow_supervised_inference'
os.makedirs(save_path, exist_ok=True)
# Define path to training data
inference_data_path = './data/testing'
# Get list of files in the data directory (only include ones ending with .csv)
inference_files = sorted([fn for fn in os.listdir(inference_data_path) if fn.endswith('.csv')])
# --- Data loading and processing
# Initialize the data manager
fdm = FlowDataManager(
data_file_names=inference_files,
data_file_type=None, # Is inferred from the filename ending of the 1st file in the 'training_files' list
data_file_path=inference_data_path,
verbosity=1
)
# Load data into memory
fdm.load_data_files_to_anndata()
# --- Apply preprocessing transformation to each sample, same as for training data
# Apply arcsinh with cofactor 150.
# Store non-transformed data in a separate layer of the AnnData object that we call 'no_trafo'.
preprocessing_kwargs = {'cofactor': 150}
fdm.sample_wise_preprocessing(flavour='arcsinh', save_raw_to_layer='no_trafo', **preprocessing_kwargs)
# Define channels that were used for model training
channels = ['FS INT', 'SS INT',
'15-FITC', '13-PE', '34-ECD', '117-PC5.5', '33-PC7', '2-APC', '7-APC-AF700', 'HLADR-PB', '45-CO']
# Extract the processed data matrices from the AnnData objects
data_matrices = [adata[:, channels].X for adata in fdm.anndata_list_]
# Get number of events per test sample and compute indices at which samples start in the concatenated data matrix
num_events = [x.shape[0] for x in data_matrices]
starting_indices = np.cumsum([0, ] + num_events)
# Concatenate
x_test = np.concatenate(data_matrices, axis=0)
# Load the previously trained models
som_clf = SomClassifier.load(
filename='som_classifier.pkl',
filepath='./results/workflow_step_wise_supervised_training'
)
mlp_clf = MLPClassifier.load(
filename='mlp_classifier.pkl',
filepath='./results/workflow_step_wise_supervised_training'
)
# Make prediction for the test data
print('make predictions for test data...')
y_pred_som = som_clf.predict(x_test)
y_pred_mlp = mlp_clf.predict(x_test)
# Change predictions back into sample-wise format (input format required by export function)
y_pred_som = [y_pred_som[starting_indices[i]: starting_indices[i + 1]] for i in range(len(num_events))]
y_pred_mlp = [y_pred_mlp[starting_indices[i]: starting_indices[i + 1]] for i in range(len(num_events))]
add_columns = [y_pred_som, y_pred_mlp]
add_columns_names = ['pred_som', 'pred_mlp']
# If dimensionality reductions should be computed as well, set: compute_dim_red = True
compute_dim_red = True
if compute_dim_red:
# --- SOM
print('compute SOM...')
_, x_som, _, _ = som_clf.transform(x_test)
# --- UMAP
print('compute UMAP...')
umap_model = UMAP(n_components=2, n_jobs=-1)
x_umap = umap_model.fit_transform(x_test)
# --- t-SNE
print('compute t-SNE...')
tsne_model = TSNE(n_components=2, n_jobs=-1)
x_tsne = tsne_model.fit_transform(x_test)
# Change back into sample-wise format (input format required by export function)
x_soms_1 = [x_som[starting_indices[i]: starting_indices[i + 1], 0] for i in range(len(num_events))]
x_soms_2 = [x_som[starting_indices[i]: starting_indices[i + 1], 1] for i in range(len(num_events))]
x_umaps_1 = [x_umap[starting_indices[i]: starting_indices[i + 1], 0] for i in range(len(num_events))]
x_umaps_2 = [x_umap[starting_indices[i]: starting_indices[i + 1], 1] for i in range(len(num_events))]
x_tsnes_1 = [x_tsne[starting_indices[i]: starting_indices[i + 1], 0] for i in range(len(num_events))]
x_tsnes_2 = [x_tsne[starting_indices[i]: starting_indices[i + 1], 1] for i in range(len(num_events))]
add_columns += [
x_soms_1, x_soms_2,
x_umaps_1, x_umaps_2,
x_tsnes_1, x_tsnes_2
]
add_columns_names += ['SOM_1', 'SOM_2', 'UMAP_1', 'UMAP_2', 'TSNE_1', 'TSNE_2']
# Export to FCS
export_to_fcs(
data_list=fdm.anndata_list_, # Export the test samples
layer_key='no_trafo', # We want to export non-transformed data => choose the 'no_trafo' layer
sample_wise=False, # Export one FCS in which the test samples are concatenated
add_columns=add_columns, # Add columns corresponding to the 1st and 2nd dimension of the dimensionality reductions into 2D
add_columns_names=add_columns_names, # Add names for added columns
scale_columns=add_columns_names, # Select added columns for scaling (all that were added to the file)
val_range=(0, 2 ** 20), # Range to which selected columns are scaled to
save_path=save_path,
save_filenames='annotated_test_data.fcs'
)