-
Notifications
You must be signed in to change notification settings - Fork 15
Assertion failure #132
Copy link
Copy link
Open
Description
Hi Oxbow.
Thanks for your work on this project!
Some of my VCF inputs cause an assertion failure. The VCFs may be non-conforming to the specification, but it would be nice to receive an error rather than an assertion failure.
To reproduce:
With the files below, I get the following behaviour:
$ ./target/debug/oxbow-read-vcf
thread 'main' panicked at /Users/tom.conway/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/arrow-array-54.3.1/src/builder/fixed_size_list_builder.rs:174:9:
assertion `left == right` failed: Length of the child array (6) must be the multiple of the value length (2) and the array length (4).
left: 6
right: 8
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
Cargo.toml:
[package]
name = "oxbow-read-vcf"
version = "0.1.0"
edition = "2024"
[dependencies]
oxbow = "0.4.0"
noodles = { version = "0.90.0", features = ["core", "vcf"] }main.rs:
use std::fs::File;
use std::io::BufReader;
use oxbow::variant::format::vcf::Scanner;
fn main() {
let inner = File::open("x.vcf").map(BufReader::new).unwrap();
let mut fmt_reader = noodles::vcf::io::Reader::new(inner);
let header = fmt_reader.read_header().unwrap();
let scanner = Scanner::new(header);
let batches = scanner.scan(fmt_reader, None, None, None, None, None, None, Some(1000)).unwrap();
for batch in batches {
let batch = batch.unwrap();
eprintln!("{:?}", batch.schema().as_ref());
}
}x.vcf:
##fileformat=VCFv4.2
##FILTER=<ID=PASS,Description="All filters passed">
##ALT=<ID=NON_REF,Description="Represents any possible alternative allele not already represented at this location by REF and ALT">
##FILTER=<ID=LowQual,Description="Low quality">
##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
##FORMAT=<ID=GP,Number=G,Type=Float,Description="genotype posterior in Phred Scale">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum DP observed within the GVCF block">
##FORMAT=<ID=PG,Number=G,Type=Float,Description="genotype priors in Phred Scale">
##FORMAT=<ID=PGT,Number=1,Type=String,Description="Physical phasing haplotype information, describing how the alternate alleles are phased in relation to one another; will always be heterozygous and is not intended to describe called alleles">
##FORMAT=<ID=PID,Number=1,Type=String,Description="Physical phasing ID information, where each unique ID within a given sample (but not across samples) connects records within a phasing group">
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
##FORMAT=<ID=PS,Number=1,Type=Integer,Description="Phasing set (typically the position of the first variant in the set)">
##FORMAT=<ID=RGQ,Number=1,Type=Integer,Description="Unconditional reference genotype confidence, encoded as a phred quality -10*log10 p(genotype call is wrong)">
##FORMAT=<ID=SB,Number=4,Type=Integer,Description="Per-sample component statistics which comprise the Fisher's Exact Test to detect strand bias.">
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities">
##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP Membership">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
##INFO=<ID=DRAGstrInfo,Number=2,Type=Integer,Description="Indicates the period and repeat count">
##INFO=<ID=DRAGstrParams,Number=3,Type=Float,Description="Parameters used (GOP, GCP, API)">
##INFO=<ID=END,Number=1,Type=Integer,Description="Stop position of the interval">
##INFO=<ID=ExcessHet,Number=1,Type=Float,Description="Phred-scaled p-value for exact test of excess heterozygosity">
##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias">
##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation">
##INFO=<ID=MLEAC,Number=A,Type=Integer,Description="Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed">
##INFO=<ID=MLEAF,Number=A,Type=Float,Description="Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed">
##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities">
##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">
##INFO=<ID=RAW_MQandDP,Number=2,Type=Integer,Description="Raw data (sum of squared MQ and total depth) for improved RMS Mapping Quality calculation. Incompatible with deprecated RAW_MQ formulation.">
##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias">
##INFO=<ID=SOR,Number=1,Type=Float,Description="Symmetric Odds Ratio of 2x2 contingency table to detect strand bias">
##contig=<ID=chr1,length=248956422>
##contig=<ID=chr2,length=242193529>
##contig=<ID=chr3,length=198295559>
##contig=<ID=chr4,length=190214555>
##contig=<ID=chr5,length=181538259>
##contig=<ID=chr6,length=170805979>
##contig=<ID=chr7,length=159345973>
##contig=<ID=chr8,length=145138636>
##contig=<ID=chr9,length=138394717>
##contig=<ID=chr10,length=133797422>
##contig=<ID=chr11,length=135086622>
##contig=<ID=chr12,length=133275309>
##contig=<ID=chr13,length=114364328>
##contig=<ID=chr14,length=107043718>
##contig=<ID=chr15,length=101991189>
##contig=<ID=chr16,length=90338345>
##contig=<ID=chr17,length=83257441>
##contig=<ID=chr18,length=80373285>
##contig=<ID=chr19,length=58617616>
##contig=<ID=chr20,length=64444167>
##contig=<ID=chr21,length=46709983>
##contig=<ID=chr22,length=50818468>
##contig=<ID=chrX,length=156040895>
##contig=<ID=chrY,length=57227415>
##contig=<ID=chrM,length=16569>
##INFO=<ID=ANN,Number=.,Type=String,Description="Consequence annotations from Ensembl VEP. Format: Allele|Consequence|IMPACT|SYMBOL|Gene|Feature_type|Feature|BIOTYPE|EXON|INTRON|HGVSc|HGVSp|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|ALLELE_NUM|DISTANCE|STRAND|FLAGS|VARIANT_CLASS|SYMBOL_SOURCE|HGNC_ID|CANONICAL|MANE|ENSP|REFSEQ_MATCH|REFSEQ_OFFSET|GIVEN_REF|USED_REF|BAM_EDIT|SIFT|PolyPhen|HGVS_OFFSET|AFR_AF|AMR_AF|EAS_AF|EUR_AF|SAS_AF|AA_AF|EA_AF|gnomAD_AF|gnomAD_AFR_AF|gnomAD_AMR_AF|gnomAD_ASJ_AF|gnomAD_EAS_AF|gnomAD_FIN_AF|gnomAD_NFE_AF|gnomAD_OTH_AF|gnomAD_SAS_AF|MAX_AF|MAX_AF_POPS|CLIN_SIG|SOMATIC|PHENO|PUBMED">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA24143 NA24149 NA24385
chr1 10421 . ACCCTAACCCTAACCCTAAC A 13.58 . AC=1;AF=0.167;AN=6;BaseQRankSum=0.366;DP=141;DRAGstrInfo=1;DRAGstrParams=36;ExcessHet=0;FS=6.419;MLEAC=1;MLEAF=0.167;MQ=20.57;MQRankSum=-1.534;QD=1.7;ReadPosRankSum=0.366;SOR=2.833;ANN=-|downstream_gene_variant|MODIFIER|WASH7P|653635|Transcript|NR_024540.1|transcribed_pseudogene||||||||||rs1557426865|1|3922|-1||deletion|EntrezGene||YES|||||CCCTAACCCTAACCCTAAC|CCCTAACCCTAACCCTAAC|OK|||||||||||||||||||||||||,-|upstream_gene_variant|MODIFIER|DDX11L1|100287102|Transcript|NR_046018.2|transcribed_pseudogene||||||||||rs1557426865|1|1434|1||deletion|EntrezGene||YES|||||CCCTAACCCTAACCCTAAC|CCCTAACCCTAACCCTAAC|||||||||||||||||||||||||| GT:AD:DP:GQ:PGT:PID:PL:PS 0/1:7,1:8:21:.:.:21,0,34:. 0/0:14,0:14:0:.:.:0,0,248:. 0|0:9,0:11:53:0|1:10418_CT_C:0,53,93:10418
chr1 10439 rs112766696 AC A,* 30.89 . AC=2,1;AF=0.333,0.167;AN=6;BaseQRankSum=1.09;DB;DP=153;DRAGstrInfo=1;DRAGstrParams=32;ExcessHet=3.9794;FS=0;MLEAC=2,1;MLEAF=0.333,0.167;MQ=24.89;MQRankSum=-1.068;QD=1.72;ReadPosRankSum=0.536;SOR=1.075;ANN=-|downstream_gene_variant|MODIFIER|WASH7P|653635|Transcript|NR_024540.1|transcribed_pseudogene||||||||||rs112766696|1|3922|-1||sequence_alteration|EntrezGene||YES|||||C|C|OK|||||||||||||||||||||||||,-|upstream_gene_variant|MODIFIER|DDX11L1|100287102|Transcript|NR_046018.2|transcribed_pseudogene||||||||||rs112766696|1|1434|1||sequence_alteration|EntrezGene||YES|||||C|C|||||||||||||||||||||||||| GT:AD:DP:GQ:PL 0/2:7,0,1:8:21:21,42,76,0,206,34 0/1:6,2,0:8:34:34,0,40,52,179,92 0/1:7,2,1:10:7:7,0,40,30,212,70
chr1 10492 rs55998931 C T 101.58 . AC=2;AF=0.333;AN=6;BaseQRankSum=-0.319;DB;DP=47;ExcessHet=1.7609;FS=7.16;MLEAC=2;MLEAF=0.333;MQ=31.71;MQRankSum=0;QD=7.26;ReadPosRankSum=-0.414;SOR=3.737;ANN=T|downstream_gene_variant|MODIFIER|WASH7P|653635|Transcript|NR_024540.1|transcribed_pseudogene||||||||||rs55998931|1|3870|-1||SNV|EntrezGene||YES|||||C|C|OK|||||||||||||||||||||||||,T|upstream_gene_variant|MODIFIER|DDX11L1|100287102|Transcript|NR_046018.2|transcribed_pseudogene||||||||||rs55998931|1|1382|1||SNV|EntrezGene||YES|||||C|C|||||||||||||||||||||||||| GT:AD:DP:GQ:PL 0/1:2,4:6:30:57,0,30 0/0:6,0:6:0:0,0,149 0/1:5,3:8:40:53,0,40
chr1 13273 . G C 91.88 . AC=2;AF=0.5;AN=4;DP=38;ExcessHet=0;FS=0;MLEAC=2;MLEAF=0.5;MQ=5.35;QD=5.1;SOR=1.179;ANN=C|downstream_gene_variant|MODIFIER|WASH7P|653635|Transcript|NR_024540.1|transcribed_pseudogene||||||||||rs531730856|1|1089|-1||SNV|EntrezGene||YES|||||G|G|OK||||0.0204|0.1455|0.0625|0.1471|0.1401||||||||||||0.1471|EUR||||,C|non_coding_transcript_exon_variant|MODIFIER|DDX11L1|100287102|Transcript|NR_046018.2|transcribed_pseudogene|3/3||NR_046018.2:n.516G>C||516|||||rs531730856|1||1||SNV|EntrezGene||YES|||||G|G|||||0.0204|0.1455|0.0625|0.1471|0.1401||||||||||||0.1471|EUR||||,C|downstream_gene_variant|MODIFIER|MIR6859-1|102466751|Transcript|NR_106918.1|miRNA||||||||||rs531730856|1|4096|-1||SNV|EntrezGene||YES|||||G|G|||||0.0204|0.1455|0.0625|0.1471|0.1401||||||||||||0.1471|EUR|||| GT:AD:DP:GQ:PL 1/1:0,18:18:54:106,54,0 0/0:17,0:17:51:0,51,540 ./.:0,0:0:0:0,0,0
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels