-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmimosca.wdl
More file actions
78 lines (59 loc) · 1.98 KB
/
mimosca.wdl
File metadata and controls
78 lines (59 loc) · 1.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
version 1.0
workflow mimosca {
call run_mimosca
output {
File mimosca_output = run_mimosca.mimosca_coeffs
}
}
task run_mimosca {
input {
String output_dir # gbucket (no / at end)
File perturb_gex_anndata_file # al_ld_073_processed_deepika.h5ad
File cell_by_guide_csv_file # cell_by_guide_df.csv
Int cpu = 24
Int memory = 256
String docker = "dyeramosu/mimosca:1.0.0"
Int preemptible = 2
Int disk_space = 128
}
command <<<
set -e
mkdir tmpdir
mkdir mimosca_output_wdl
python << CODE
# imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scanpy as sc
import sklearn
from sklearn import linear_model
print('loading in anndata', flush=True)
# load files
adata = sc.read_h5ad('~{perturb_gex_anndata_file}') # Y = adata.X
cell_by_guide = pd.read_csv('~{cell_by_guide_csv_file}', index_col=0) # X
print('loaded in anndata, starting regression', flush=True)
# fit regression model
lm = linear_model.Ridge(fit_intercept=True, max_iter=10000)
lm.fit(adata.X, cell_by_guide.values)
B = pd.DataFrame(lm.coef_) # 32659 rows (num_genes)
print('finished regression, saving coefficients', flush=True)
# save coefficients
#B.to_csv('mimosca_output_wdl/mimosca_coeffs.csv')
B.to_pickle("mimosca_output_wdl/mimosca_coeffs.pkl")
print('saved coefficients', flush=True)
CODE
gsutil -m cp mimosca_output_wdl/mimosca_coeffs.pkl ~{output_dir}
>>>
output {
File mimosca_coeffs = 'mimosca_output_wdl/mimosca_coeffs.pkl'
}
runtime {
docker: docker
memory: memory + "G"
bootDiskSizeGb: 100
disks: "local-disk " + disk_space + " HDD"
cpu: cpu
preemptible: preemptible
}
}