Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/brca.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
##contig=<ID=chrY,length=59373566>
##INFO=<ID=AF_ESP,Number=1,Type=Float,Description="allele frequencies from GO-ESP">
#CHROM POS ID REF ALT QUAL FILTER INFO
chr17 41276135 . TTAA GTCT . . .
chr17 41276135 . TTAA GTCT . . .
chr17 41276135 . T G . . .
chr17 41276135 . T C . . .
chr17 41276135 . T A . . .
Expand Down
57 changes: 38 additions & 19 deletions pangolin/pangolin.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import argparse
from pkg_resources import resource_filename
from pangolin.model import *
import vcf

import gffutils
import pandas as pd
import pyfastx
# import time
# startTime = time.time()
import pysam
from pkg_resources import resource_filename

from pangolin.model import *


IN_MAP = np.asarray([[0, 0, 0, 0],
[1, 0, 0, 0],
Expand Down Expand Up @@ -240,20 +241,38 @@ def main():
if line[0] != '#':
break

variants = vcf.Reader(filename=variants)
variants.infos["Pangolin"] = vcf.parser._Info(
"Pangolin",'.',"String","Pangolin splice scores. "
"Format: gene|pos:score_change|pos:score_change|warnings,...",'.','.')
fout = vcf.Writer(open(args.output_file+".vcf", 'w'), variants)

for i, variant in enumerate(variants):
scores = process_variant(lnum+i, str(variant.CHROM), int(variant.POS), variant.REF, str(variant.ALT[0]), gtf, models, args)
if scores != -1:
variant.INFO["Pangolin"] = scores
fout.write_record(variant)
fout.flush()

fout.close()
with pysam.VariantFile(variants) as variant_file, pysam.VariantFile(
args.output_file+".vcf", "w", header=variant_file.header
) as out_variant_file:
out_variant_file.header.add_meta(
key="INFO",
items=[
("ID", "Pangolin"),
("Number", "."),
("Type", "String"),
(
"Description",
"Pangolin splice scores. Format: gene|pos:score_change|pos:score_change|warnings,..."
),
]
)
for i, variant_record in enumerate(variant_file):
variant_record.translate(out_variant_file.header)
assert variant_record.ref, f"Empty REF field in variant record {variant_record}"
assert variant_record.alts, f"Empty ALT field in variant record {variant_record}"
scores = process_variant(
lnum + i,
str(variant_record.contig),
int(variant_record.pos),
variant_record.ref,
str(variant_record.alts[0]),
gtf,
models,
args,
)
if scores != -1:
variant_record.info["Pangolin"] = scores
out_variant_file.write(variant_record)

elif variants.endswith(".csv"):
col_ids = args.column_ids.split(',')
Expand Down