Skip to content

Commit e38f6f7

Browse files
committed
cleanup of code, bugfixes
1 parent da738d9 commit e38f6f7

22 files changed

+1217
-709
lines changed

halvade/src/be/ugent/intec/halvade/HalvadeOptions.java

Lines changed: 102 additions & 194 deletions
Large diffs are not rendered by default.

halvade/src/be/ugent/intec/halvade/MapReduceRunner.java

Lines changed: 20 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
3232
import org.apache.hadoop.util.Tool;
3333
import be.ugent.intec.halvade.utils.Logger;
34-
import be.ugent.intec.halvade.utils.MyConf;
34+
import be.ugent.intec.halvade.utils.HalvadeConf;
3535
import be.ugent.intec.halvade.utils.Timer;
3636
import fi.tkk.ics.hadoop.bam.VCFInputFormat;
3737
import java.net.URI;
@@ -54,23 +54,14 @@ public int run(String[] strings) throws Exception {
5454
halvadeOpts = new HalvadeOptions();
5555
int optR = halvadeOpts.GetOptions(strings, halvadeConf);
5656
if (optR != 0) return optR;
57-
// initialise MapReduce - copy ref to each node!
57+
// initialise MapReduce - copy ref to each node??
5858

5959

60-
// only put files or continue?
61-
if(halvadeOpts.justPut)
62-
return 0;
63-
64-
65-
Job halvadeJob = new Job(halvadeConf, "Halvade");
66-
// add to dist cache with job
67-
halvadeJob.addCacheArchive(new URI(halvadeOpts.halvadeDir + "bin.tar.gz"));
68-
60+
Job halvadeJob = Job.getInstance(halvadeConf, "Halvade");
61+
halvadeJob.addCacheArchive(new URI(halvadeOpts.halvadeBinaries));
6962

70-
halvadeJob.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.BWAMemMapper.class);
71-
// specify input and output dirs
72-
// check if input is a file or directory
7363

64+
halvadeJob.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.HalvadeMapper.class);
7465
FileSystem fs = FileSystem.get(new URI(halvadeOpts.in), halvadeConf);
7566
try {
7667
if (fs.getFileStatus(new Path(halvadeOpts.in)).isDirectory()) {
@@ -90,32 +81,32 @@ public int run(String[] strings) throws Exception {
9081
}
9182
FileOutputFormat.setOutputPath(halvadeJob, new Path(halvadeOpts.out));
9283

93-
// specify a mapper
94-
if (halvadeOpts.aln) halvadeJob.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.BWAAlnMapper.class);
95-
else halvadeJob.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.BWAMemMapper.class);
84+
if(halvadeOpts.rnaPipeline)
85+
halvadeJob.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.StarAlignMapper.class);
86+
else {
87+
if (halvadeOpts.aln) halvadeJob.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.BWAAlnMapper.class);
88+
else halvadeJob.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.BWAMemMapper.class); }
9689
halvadeJob.setMapOutputKeyClass(ChromosomeRegion.class);
9790
halvadeJob.setMapOutputValueClass(SAMRecordWritable.class);
98-
halvadeJob.setInputFormatClass(TextInputFormat.class);
99-
// halvadeJob.setInputFormatClass(FastqInputFormat.class);
100-
101-
// per chromosome && region
91+
halvadeJob.setInputFormatClass(TextInputFormat.class);
10292
halvadeJob.setPartitionerClass(ChrRgPartitioner.class);
10393
halvadeJob.setSortComparatorClass(ChrRgPositionComparator.class);
10494
halvadeJob.setGroupingComparatorClass(ChrRgRegionComparator.class);
10595

106-
// # reducers
10796
if(halvadeOpts.justAlign)
10897
halvadeJob.setNumReduceTasks(0);
10998
else
110-
halvadeJob.setNumReduceTasks(halvadeOpts.reducers);
111-
// specify a reducer
112-
halvadeJob.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.GATKReducer.class);
99+
halvadeJob.setNumReduceTasks(halvadeOpts.reducers);
100+
if(halvadeOpts.rnaPipeline)
101+
halvadeJob.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.RnaGATKReducer.class);
102+
else
103+
halvadeJob.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.DnaGATKReducer.class);
113104
halvadeJob.setOutputKeyClass(Text.class);
114105
halvadeJob.setOutputValueClass(VariantContextWritable.class);
115-
// job.setOutputFormatClass(VCFOutputFormat.class);
116106

117107
if(halvadeOpts.dryRun)
118108
return 0;
109+
119110
Timer timer = new Timer();
120111
timer.start();
121112
ret = halvadeJob.waitForCompletion(true) ? 0 : 1;
@@ -127,9 +118,9 @@ public int run(String[] strings) throws Exception {
127118
Logger.DEBUG("combining output");
128119
Configuration combineConf = getConf();
129120
if(!halvadeOpts.out.endsWith("/")) halvadeOpts.out += "/";
130-
MyConf.setInputDir(combineConf, halvadeOpts.out);
131-
MyConf.setOutDir(combineConf, halvadeOpts.out + "combinedVCF/");
132-
MyConf.setReportAllVariant(combineConf, halvadeOpts.reportAll);
121+
HalvadeConf.setInputDir(combineConf, halvadeOpts.out);
122+
HalvadeConf.setOutDir(combineConf, halvadeOpts.out + "combinedVCF/");
123+
HalvadeConf.setReportAllVariant(combineConf, halvadeOpts.reportAll);
133124
Job combineJob = new Job(combineConf, "HalvadeCombineVCF");
134125
combineJob.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.VCFCombineMapper.class);
135126

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/*
2+
* To change this license header, choose License Headers in Project Properties.
3+
* To change this template file, choose Tools | Templates
4+
* and open the template in the editor.
5+
*/
6+
7+
package be.ugent.intec.halvade.hadoop.mapreduce;
8+
9+
import be.ugent.intec.halvade.tools.GATKTools;
10+
import be.ugent.intec.halvade.tools.PreprocessingTools;
11+
import be.ugent.intec.halvade.tools.QualityException;
12+
import be.ugent.intec.halvade.utils.ChromosomeRange;
13+
import be.ugent.intec.halvade.utils.Logger;
14+
import be.ugent.intec.halvade.utils.HalvadeConf;
15+
import be.ugent.intec.halvade.utils.SAMRecordIterator;
16+
import fi.tkk.ics.hadoop.bam.SAMRecordWritable;
17+
import java.io.IOException;
18+
import java.net.URISyntaxException;
19+
20+
/**
21+
*
22+
* @author ddecap
23+
*/
24+
public class DnaGATKReducer extends GATKReducer {
25+
26+
@Override
27+
protected void processAlignments(Iterable<SAMRecordWritable> values, Context context, PreprocessingTools tools, GATKTools gatk) throws IOException, InterruptedException, URISyntaxException, QualityException {
28+
long startTime = System.currentTimeMillis();
29+
// temporary files
30+
String region = tmpFileBase + "-region.intervals";
31+
String preprocess = tmpFileBase + ".bam";
32+
String tmpFile1 = tmpFileBase + "-2.bam";
33+
String tmpFile2 = tmpFileBase + "-3.bam";
34+
String snps = tmpFileBase + ".vcf";
35+
boolean useElPrep = HalvadeConf.getUseIPrep(context.getConfiguration());
36+
ChromosomeRange r = new ChromosomeRange();
37+
SAMRecordIterator SAMit = new SAMRecordIterator(values.iterator(), header, r);
38+
39+
if(useElPrep)
40+
elPrepPreprocess(context, tools, SAMit, preprocess);
41+
else
42+
PicardPreprocess(context, tools, SAMit, preprocess);
43+
region = makeRegionFile(context, r, tools, region);
44+
if(region == null) return;
45+
46+
indelRealignment(context, region, gatk, preprocess, tmpFile1);
47+
baseQualityScoreRecalibration(context, region, r, tools, gatk, tmpFile1, tmpFile2);
48+
DnaVariantCalling(context, region, gatk, tmpFile2, snps);
49+
variantFiles.add(snps);
50+
51+
removeLocalFile(region);
52+
long estimatedTime = System.currentTimeMillis() - startTime;
53+
Logger.DEBUG("total estimated time: " + estimatedTime / 1000);
54+
}
55+
56+
}

0 commit comments

Comments
 (0)