-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsim_setup_af.py
More file actions
52 lines (40 loc) · 2.55 KB
/
sim_setup_af.py
File metadata and controls
52 lines (40 loc) · 2.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import os
from helpers import *
# things that we only need to do once across all simulations
prefix = 'subset_302_homozygotes'
tag_inds = [int(x) for x in open(prefix + '_tag_inds.txt', 'r').readline().split()]
ped_lines = [line.split() for line in open(prefix + '.ped', 'r').readlines()]
map_lines = [line.split() for line in open(prefix + '.map', 'r').readlines()]
# find allele frequencies
#find_minor_allele_freqs(prefix, ped_lines)
#find_derived_allele_freqs(prefix, ped_lines, ancestral)
# make ped/bed file with only tag snps to go with tag map file
print('making ped/bed files with only tag snps')
#extract_ped_indices(ped_lines, map_lines, prefix + '_tag', tag_inds)
# filter ped file by MAF and biallecism to determine which snps we can
# pick as causal
print('filtering all snps by allele frequency')
#filter_ped(prefix, ped_lines, .05)
print('generating eigen decompositions of kinship matrices for af bins')
af = [float(line.split()[0]) for line in open(prefix + '_maf.txt', 'r').readlines()]
#gen_eigen_decomps_af(prefix, ped_lines, map_lines, af, 10)
#gen_eigen_decomps(prefix, ped_lines, map_lines, tag_inds, 0)
#gen_eigen_decomps_exclude_chr(prefix, ped_lines, map_lines)
print('generating bed files for tag snps')
gen_bed_af(prefix, ped_lines, map_lines, tag_inds, af, 10)
#gen_marker_beds(prefix, ped_lines, map_lines, tag_inds)
#gen_bed_chr(prefix, ped_lines, map_lines, tag_inds)
# eigen decomposition of kinship matrix for whole genome (along with
# bed/bim files)
print('generating eigen decomposition of whole-genome kinship matrix')
#os.system('plink --noweb --file ' + prefix + ' --make-bed --out ' + prefix)
#copy_fam_nonmissing(prefix, prefix)
#os.system('/net/gs/vol1/home/aclark4/software/gemma -bfile ' + prefix + ' -gk 1 -notsnp -o ' + prefix)
#os.system('/net/gs/vol1/home/aclark4/software/gemma -bfile ' + prefix + ' -k output/' + prefix + '.cXX.txt -eigen -notsnp -o ' + prefix)
#os.system('cp ' + prefix + '.map ' + prefix + '_shuffled.map')
#os.system('plink --noweb --file ' + prefix + '_shuffled --make-bed --out ' + prefix + '_shuffled')
#copy_fam_nonmissing(prefix, prefix + '_shuffled')
#os.system('/net/gs/vol1/home/aclark4/software/gemma -bfile ' + prefix + '_shuffled' + ' -gk 1 -notsnp -o ' + prefix + '_shuffled')
#os.system('/net/gs/vol1/home/aclark4/software/gemma -bfile ' + prefix + '_shuffled -k output/' + prefix + '_shuffled.cXX.txt -eigen -notsnp -o ' + prefix + '_shuffled')
#ped_lines = [line.split() for line in open(prefix + '_shuffled.ped', 'r').readlines()]
#extract_ped_indices(ped_lines, map_lines, prefix + '_tag_shuffled', tag_inds)