Skip to content
This repository was archived by the owner on Jan 31, 2020. It is now read-only.
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 23 additions & 19 deletions src/pindel2vcf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -760,6 +760,10 @@ void createHeader(ofstream &outFile, const string& sourceProgram, const string&
outFile << "##INFO=<ID=SVLEN,Number=1,Type=Integer,Description=\"Difference in length between REF and ALT alleles\">" << endl;
outFile << "##INFO=<ID=SVTYPE,Number=1,Type=String,Description=\"Type of structural variant\">" << endl;
outFile << "##INFO=<ID=NTLEN,Number=.,Type=Integer,Description=\"Number of bases inserted in place of deleted code\">" << endl;
if ( g_par.somatic && pindel024uOrLater ){ {
// The field name of VarScan's similar statistic is used here.
outFile << "##INFO=<ID=SPV,Number=1,Type=Float,Description=\"Fisher's p-value for somatic events between the first two samples\">" << endl;
}
outFile << "##FORMAT=<ID=PL,Number=3,Type=Integer,Description=\"Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification\">" << endl;
//outFile << "##ALT=<ID=DEL,Description=\"Deletion\">" << endl; /*EWL040311: probably not needed, as our calls are precise so we should rather give the exact replacing sequence instead of a label. */
//outFile << "##ALT=<ID=DUP,Description=\"Duplication\">" << endl;
Expand Down Expand Up @@ -1516,32 +1520,29 @@ int FACT(int n)
return fact;
}

double fisher_test(int a, int c, int b, int d)
{
double p = 0.0;
int n = a + b + c + d;
p = (FACT(a+b)*FACT(c+d)*FACT(a+c)*FACT(d+b)) / (double)(FACT(a)*FACT(b)*FACT(c)*FACT(d)*FACT(n));
std::cout << a << " " << c << " " << b << " " << d << " " << p << std::endl;
return p;
}
//double fisher_test(int a, int c, int b, int d, double *_left, double *_right, double *_two)
//{
// double p = 0.0, left = 0.0, right = 0.0, two = 0.0;
// int n = a + b + c + d;
// //p = (FACT(a+b)*FACT(c+d)*FACT(a+c)*FACT(d+b)) / (double)(FACT(a)*FACT(b)*FACT(c)*FACT(d)*FACT(n));
// p = kt_fisher_exact (a, b, c, d, &left, &right, &two);
// std::cout << a << "|" << c << "|" << b << "|" << d << "|" << p << "|" << two << std::endl;
// return p;
//}


ostream& operator<<(ostream& os, const SVData& svd)
{
double somatic_p_value = 0.0;


double somatic_p_value, left, right, twotail;

os << svd.d_chromosome << "\t";
os << svd.getPosition() << "\t";
os << svd.d_id << "\t";
os << svd.getOutputFormattedReference() << "\t";
os << svd.getOutputFormattedAlternative() << "\t";
os << svd.d_quality << "\t";
if (svd.d_format.size() == 2 && g_par.somatic) {

somatic_p_value = fisher_test(svd.d_format[0].getTotalReads(), svd.d_format[0].getTotalRefSupport(), svd.d_format[1].getTotalReads(), svd.d_format[1].getTotalRefSupport());
//if (somatic_p_value < 0.05) svd.d_filter = "PASS";
if (svd.d_format.size() == 2 && g_par.somatic && pindel024uOrLater) {
somatic_p_value = kt_fisher_exact (svd.d_format[0].getTotalReads(), svd.d_format[0].getTotalRefSupport(), svd.d_format[1].getTotalReads(), svd.d_format[1].getTotalRefSupport(), &left, &right, &twotail);
}
if (somatic_p_value < 0.05) {
os << "PASS\t";
Expand Down Expand Up @@ -1571,8 +1572,8 @@ ostream& operator<<(ostream& os, const SVData& svd)
os << "," << svd.d_replaceLenTwo;
}

if (svd.d_format.size() == 2 && g_par.somatic) {
os << ";" << somatic_p_value;
if (svd.d_format.size() == 2 && g_par.somatic && pindel024uOrLater ) {
os << ";SPV=" << twotail;
}


Expand Down Expand Up @@ -2035,8 +2036,8 @@ void createParameters()
new IntParameter( &g_par.maxPostRepeatLength, "-pl", "--max_postindel_repeatlength", "Filters out all indels where the inserted/deleted sequence is followed by a repetition of the fundamental repeat unit of the inserted/deleted sequence; the maximum size of that 'fundamental unit' given by the value of -pl (default infinite) For example: TCAG->TCAGCAG has insertion CAG and post-insertion sequence CAG. This insertion would be filtered out if -pl has been set to 3 or above, but would be deemed 'sufficiently unrepetitive' if -pl is 2", false, -1 ) );
parameters.push_back(
new BoolParameter( &g_par.onlyBalancedSamples, "-sb", "--only_balanced_samples", "Only count a sample as supporting an event if it is supported by reads on both strands, minimum reads per strand given by the -ss parameter. (default false)", false, 0 ) );
// parameters.push_back(
// new BoolParameter( &g_par.somatic, "-so", "--somatic_p", "compute somatic p value when two samples are present, assume the order is normal and tumor. (default false)", false, 0 ) );
parameters.push_back(
new BoolParameter( &g_par.somatic, "-so", "--somatic_p", "compute somatic p value when two samples are present, assume the order is normal and tumor. (default false)", false, 0 ) );

parameters.push_back(
new IntParameter( &g_par.minimumStrandSupport, "-ss", "--minimum_strand_support", "Only count a sample as supporting an event if at least one of its strands is supported by X reads (default 1)", false, 1 ) );
Expand Down Expand Up @@ -2350,6 +2351,9 @@ int main(int argc, char* argv[])
showSet( sampleNames );
cout << "Chromosomes in which SVs have been found:\n";
showSet( chromosomeNames );
if ( g_par.somatic && ! pindel024uOrLater){
cout << "P-values for somatic variations can only be calculated for pindel outputs after 0.2.4u.\n"
}

map< string, int > sampleMap;
makeSampleMap( sampleNames, sampleMap );
Expand Down