From bc15752602b333778fa185ba5a239e5eb49fa057 Mon Sep 17 00:00:00 2001 From: ajw2329 Date: Wed, 13 Mar 2024 23:11:32 -0700 Subject: [PATCH 1/2] Change HS (haplotype) field from Integer to character in VCFs heterogenesis_varincorp.py Haplotypes are represented by the HS field in the VCFs generated by Heterogenesis, and are indicated by letters A, B . . Currently, the VCF header specifies that this field is numeric. This causes certain VCF parsers to fail to correctly read the HS field. --- heterogenesis_varincorp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/heterogenesis_varincorp.py b/heterogenesis_varincorp.py index e5c0dcb..97ce667 100644 --- a/heterogenesis_varincorp.py +++ b/heterogenesis_varincorp.py @@ -146,7 +146,7 @@ def writevcffile(directory,prefix,clo,combvcfs,prochro): file.write('##INFO=\n') file.write('##FORMAT=\n') file.write('##FORMAT=\n') - file.write('##FORMAT=\n') + file.write('##FORMAT=\n') file.write('##FORMAT=\n') file.write('##FORMAT=\n') file.write('#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t'+clo+'\n') From a797f38690a1f3f4a26c5b8794d89eed2d97ee05 Mon Sep 17 00:00:00 2001 From: ajw2329 Date: Mon, 10 Mar 2025 21:09:27 -0700 Subject: [PATCH 2/2] Update heterogenesis_varincorp.py -- augment substitutions with all IUPAC codes --- heterogenesis_varincorp.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/heterogenesis_varincorp.py b/heterogenesis_varincorp.py index 97ce667..cac2c1d 100644 --- a/heterogenesis_varincorp.py +++ b/heterogenesis_varincorp.py @@ -252,10 +252,26 @@ def invert(forward): substitutions['g']='c' substitutions['N']='N' substitutions['n']='n' - substitutions['R']='R' - substitutions['r']='r' - substitutions['M']='M' - substitutions['m']='m' + substitutions['R']='Y' + substitutions['Y']='R' + substitutions['y']='r' + substitutions['r']='y' + substitutions['S']='S' + substitutions['s']='s' + substitutions['W']='W' + substitutions['w']='w' + substitutions['K']='M' + substitutions['M']='K' + substitutions['k']='m' + substitutions['m']='k' + substitutions['B']='V' + substitutions['V']='B' + substitutions['b']='v' + substitutions['v']='b' + substitutions['D']='H' + substitutions['H']='D' + substitutions['d']='h' + substitutions['h']='d' for i in forward[::-1]: reverse.append(substitutions[i]) return reverse