-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathupdate_serverfiles.py
More file actions
96 lines (74 loc) · 3.48 KB
/
update_serverfiles.py
File metadata and controls
96 lines (74 loc) · 3.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import re
import requests
from plumbum import local
from plumbum.cmd import wget
def fetch_html(url: str) -> str:
# todo: handle exceptions
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'}
return requests.get(url, headers=headers).content.decode('utf-8')
def panglao_db_filename() -> str:
panglao_url = 'https://panglaodb.se/markers.html'
re_pattern = r'<a href=\"markers/(.*?)\"'
m = re.search(re_pattern, fetch_html(panglao_url))
return m.group(1)
panglao_fn = panglao_db_filename()
# Download links
gene_info = 'ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/gene_info.gz'
gene_history = 'ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/gene_history.gz'
gene_to_go = 'ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/gene2go.gz'
homolog_genes = 'ftp://ftp.ncbi.nlm.nih.gov/pub/HomoloGene/current/homologene.data'
gene_ontology = 'http://purl.obolibrary.org/obo/go/go-basic.obo'
cellmarker = 'http://xteam.xbio.top/CellMarker/download/all_cell_markers.txt'
panglaodb = f"https://panglaodb.se/markers/{panglao_fn}"
# dictybase
base_url = 'http://dictybase.org/db/cgi-bin/dictyBase/download/download.pl?area=mutant_phenotypes&ID={}'
all_mutants = 'all-mutants.txt'
null_mutants = 'null-mutants.txt'
overexpression_mutants = 'overexpression-mutants.txt'
developmental_mutants = 'developmental-mutants.txt'
multiple_mutants = 'multiple-mutants.txt'
other_mutants = 'other-mutants.txt'
python = local['python']
download_process = wget['-P', 'temp/', "--user-agent='Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'"]
print(f'downloading gene info from {gene_info}')
download_process(gene_info)
print(f'downloading gene history from {gene_history}')
download_process(gene_history)
print(f'downloading gene to go from {gene_to_go}')
download_process(gene_to_go)
print(f'downloading homolog genes from {homolog_genes}')
download_process(homolog_genes)
print(f'downloading gene ontology from {gene_ontology}')
download_process(gene_ontology)
print(f'downloading cellmarker gene markers from {cellmarker}')
download_process(cellmarker)
print(f'downloading panglao gene markers from {panglaodb}')
download_process(panglaodb)
print('Download mutants from dictybase ...')
download_process['-O', f'temp/{all_mutants}'](base_url.format(all_mutants))
download_process['-O', f'temp/{null_mutants}'](base_url.format(null_mutants))
download_process['-O', f'temp/{overexpression_mutants}'](base_url.format(overexpression_mutants))
download_process['-O', f'temp/{developmental_mutants}'](base_url.format(developmental_mutants))
download_process['-O', f'temp/{multiple_mutants}'](base_url.format(multiple_mutants))
download_process['-O', f'temp/{other_mutants}'](base_url.format(other_mutants))
print('Updating homologs ...')
python('update_scripts/homologene.py', 'temp/homologene.data', 'temp/gene_history.gz')
print('Updating genes ...')
python('update_scripts/gene.py', 'temp/gene_info.gz')
print('Updating marker genes ...')
python('update_scripts/marker_genes.py', f'temp/{panglao_fn}', 'temp/all_cell_markers.txt')
print('Updating GO ...')
python('update_scripts/go.py', 'temp/gene2go.gz', 'temp/go-basic.obo')
print('Updating dictybase phenotypes ...')
python(
'update_scripts/dictybase.py',
f'temp/{all_mutants}',
f'temp/{null_mutants}',
f'temp/{overexpression_mutants}',
f'temp/{multiple_mutants}',
f'temp/{developmental_mutants}',
f'temp/{other_mutants}',
)
print('Updating gene sets ...')
python('update_scripts/gene_sets.py')
print('... All done!')