From 3f5b91ee8301053c743ea9922ac6f595bf9283b6 Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedrhcwrair001.amed.ds.army.mil>
Date: Thu, 28 Jul 2016 14:38:39 -0400
Subject: [PATCH 01/14] makefile for doing freebayes-based consensus on
 ngs_mapper output directory

---
 bioframework/Makefile | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 bioframework/Makefile
diff --git a/bioframework/Makefile b/bioframework/Makefile
new file mode 100644
index 0000000..255239f
--- /dev/null
+++ b/bioframework/Makefile
@@ -0,0 +1,21 @@
+
+BAM=$(NGSDIR)/$(notdir $(NGSDIR)).bam
+REF=$(NGSDIR)/$(notdir $(NGSDIR)).ref.fasta
+TAGGED=$(NGSDIR)/tagged.bam
+FBVCF=$(NGSDIR)/freebayes.vcf
+FBCONSENSUS=$(NGSDIR)/freebayes_consensus.fasta
+
+all: $(FBCONSENSUS)
+
+$(TAGGED): $(BAM)
+	python tagreads.py $< | samtools view  - -Sbh | samtools sort - $(TAGGED:.bam=)
+
+$(TAGGED).bai: $(TAGGED)
+	samtools index $<
+
+$(FBVCF): $(TAGGED) $(NGSDIR)/tagged.bam.bai $(REF)
+	freebayes -f $(REF) $< > $@
+
+$(FBCONSENSUS): $(BAM) $(REF) $(FBVCF) 
+	python consensus.py --vcf $< --ref $(REF) > $@
+	#cd ~/bioframes && python bioframes/consensus.py $^ > $@

From f8132a0e8786e24b7ced2e21a31f91210f56fd12 Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedrhcwrair001.amed.ds.army.mil>
Date: Fri, 29 Jul 2016 16:08:25 -0400
Subject: [PATCH 02/14] add consensus for freebayes

---
 bioframework/consensus.py          | 10 +++++-----
 install.sh                         |  3 +++
 requirements-conda.txt             |  1 +
 {bioframework => scripts}/Makefile |  0
 scripts/consensus.sh               |  7 +++++++
 setup.py                           |  6 ++++++
 6 files changed, 22 insertions(+), 5 deletions(-)
 create mode 100644 install.sh
 rename {bioframework => scripts}/Makefile (100%)
 create mode 100644 scripts/consensus.sh

diff --git a/bioframework/consensus.py b/bioframework/consensus.py
index 8dd433c..f443d03 100755
--- a/bioframework/consensus.py
+++ b/bioframework/consensus.py
@@ -22,7 +22,7 @@
 from Bio.SeqRecord import SeqRecord #done
 import vcf #done
 from vcf.model import _Record
-import sh #todo
+# import sh #todo
 #from toolz import compose
 from toolz.dicttoolz import merge, dissoc, merge_with, valfilter, keyfilter #done
 from docopt import docopt #ignore
@@ -200,10 +200,10 @@ def single_consensus(muts, ref):
 def consensus_str(ref, consensus): # type: (SeqRecord, str) -> str
     return ">{0}:Consensus\n{1}".format(ref.id, consensus)
 
-def zero_coverage_positions(bam_file, ref_file): # type: (str, str) -> Iterable[int]
-    pileup = sh.Command('mpileup')(bam_file, f=ref_file, _iter=True)
-    get_pos = lambda x: int(x.split()[1]) # type: Callable[[str],int]
-    return imap(get_pos, pileup)
+#def zero_coverage_positions(bam_file, ref_file): # type: (str, str) -> Iterable[int]
+#    pileup = sh.Command('mpileup')(bam_file, f=ref_file, _iter=True)
+#    get_pos = lambda x: int(x.split()[1]) # type: Callable[[str],int]
+#    return imap(get_pos, pileup)
 
 #TODO: is pileup 0-based or 1-based index?
 def trim_ref(ref, positions): # type: (str, Iterator[int]) -> str
diff --git a/install.sh b/install.sh
new file mode 100644
index 0000000..8886a67
--- /dev/null
+++ b/install.sh
@@ -0,0 +1,3 @@
+conda install --file requirements-conda.txt
+pip install -r requirements.txt
+python setup.py install
diff --git a/requirements-conda.txt b/requirements-conda.txt
index d2b04c0..62509a0 100644
--- a/requirements-conda.txt
+++ b/requirements-conda.txt
@@ -1,5 +1,6 @@
 # Dependencies that are software and/or large such as scikit-learn...
 bwa
 samtools
+freebayes
 cutadapt
 biopython
diff --git a/bioframework/Makefile b/scripts/Makefile
similarity index 100%
rename from bioframework/Makefile
rename to scripts/Makefile
diff --git a/scripts/consensus.sh b/scripts/consensus.sh
new file mode 100644
index 0000000..38c47ef
--- /dev/null
+++ b/scripts/consensus.sh
@@ -0,0 +1,7 @@
+if [ -e  $boo ]; 
+then 
+    echo "Usage: $(basename $0) <ngsdir>";
+    exit 1
+fi;
+
+make NGSDIR=$1
diff --git a/setup.py b/setup.py
index 1d09b17..4fcda06 100755
--- a/setup.py
+++ b/setup.py
@@ -18,7 +18,13 @@ def jip_modules(path=bioframework.JIP_PATH):
     description = bioframework.__description__,
     license = "GPLv2",
     keywords = bioframework.__keywords__,
+    # scripts = glob('scripts/*'),
+    scripts = ['scripts/consensus.sh'],
     entry_points = {
+        'console_scripts': [
+            'tagreads = bioframework.tagreads:main',
+            'fb_consensus = bioframework.consensus:main'
+        ]
     },
     install_requires = [
         'pyjip',

From 8904f94c95fdaf0de2f02c86784f3fe6c34cb9f0 Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedrhcwrair001.amed.ds.army.mil>
Date: Fri, 29 Jul 2016 16:28:13 -0400
Subject: [PATCH 03/14] now calls makefile correctly

---
 scripts/consensus.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/consensus.sh b/scripts/consensus.sh
index 38c47ef..eb09908 100644
--- a/scripts/consensus.sh
+++ b/scripts/consensus.sh
@@ -1,7 +1,7 @@
-if [ -e  $boo ]; 
+if [ -z  "$1" ]; 
 then 
     echo "Usage: $(basename $0) <ngsdir>";
     exit 1
 fi;
 
-make NGSDIR=$1
+make -f $(dirname $0)/Makefile NGSDIR=$1

From ac80fa0faad6b1d81210e51655fc8d5140068317 Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedrhcwrair001.amed.ds.army.mil>
Date: Tue, 13 Sep 2016 16:51:26 -0400
Subject: [PATCH 04/14] missing imports (samtools, ngs_mapper) also fixed

---
 bioframework/tagreads.py | 13 +++++++++----
 scripts/Makefile         |  8 ++++----
 scripts/consensus.sh     |  4 +++-
 setup.py                 |  2 +-
 4 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/bioframework/tagreads.py b/bioframework/tagreads.py
index 42fa565..5acd7d1 100644
--- a/bioframework/tagreads.py
+++ b/bioframework/tagreads.py
@@ -155,14 +155,19 @@ def get_bam_header( bam ):
     return hdr.rstrip()
 
 def parse_args( args=sys.argv[1:] ):
-    from ngs_mapper import config
-    conf_parser, args, config, configfile = config.get_config_argparse(args)
-    defaults = config['tagreads']
+    defaults = { 'SM' : { 'default' : None,
+                         'help' : 'Sets the SM tag value inside of each read group. Default is the portion of the filname that preceeds the .bam[Default: %(default)s]'
+                         },
+                 'CN' : { 'default' : None,
+                         'help' : 'Sets the CN tag inside of each read group to the value specified.[Default: %(default)s]'
+                         }
+                }
+
+                #         config['tagreads']
 
     parser = argparse.ArgumentParser(
         description='''Adds Sanger, MiSeq, 454 and IonTorrent read groups to a bam file.
         Will tag all reads in the alignment for each of the platforms based on the read name''',
-        parents=[conf_parser]
     )
 
     parser.add_argument(
diff --git a/scripts/Makefile b/scripts/Makefile
index 255239f..c025104 100644
--- a/scripts/Makefile
+++ b/scripts/Makefile
@@ -8,7 +8,7 @@ FBCONSENSUS=$(NGSDIR)/freebayes_consensus.fasta
 all: $(FBCONSENSUS)
 
 $(TAGGED): $(BAM)
-	python tagreads.py $< | samtools view  - -Sbh | samtools sort - $(TAGGED:.bam=)
+	fb_tagreads $< | samtools view  - -Sbh | samtools sort - $(TAGGED:.bam=)
 
 $(TAGGED).bai: $(TAGGED)
 	samtools index $<
@@ -16,6 +16,6 @@ $(TAGGED).bai: $(TAGGED)
 $(FBVCF): $(TAGGED) $(NGSDIR)/tagged.bam.bai $(REF)
 	freebayes -f $(REF) $< > $@
 
-$(FBCONSENSUS): $(BAM) $(REF) $(FBVCF) 
-	python consensus.py --vcf $< --ref $(REF) > $@
-	#cd ~/bioframes && python bioframes/consensus.py $^ > $@
+$(FBCONSENSUS):  $(FBVCF) $(REF)
+	fb_consensus --vcf $< --ref $(REF) > $@
+#cd ~/bioframes && python bioframes/consensus.py $^ > $@
diff --git a/scripts/consensus.sh b/scripts/consensus.sh
index eb09908..263f86c 100644
--- a/scripts/consensus.sh
+++ b/scripts/consensus.sh
@@ -3,5 +3,7 @@ then
     echo "Usage: $(basename $0) <ngsdir>";
     exit 1
 fi;
+makefile=/media/VD_Research/Admin/PBS/Software/bioframework/scripts/Makefile
 
-make -f $(dirname $0)/Makefile NGSDIR=$1
+# make -f $(dirname $0)/Makefile NGSDIR=$1
+make -f $makefile NGSDIR=$1
diff --git a/setup.py b/setup.py
index 4fcda06..c56d402 100755
--- a/setup.py
+++ b/setup.py
@@ -22,7 +22,7 @@ def jip_modules(path=bioframework.JIP_PATH):
     scripts = ['scripts/consensus.sh'],
     entry_points = {
         'console_scripts': [
-            'tagreads = bioframework.tagreads:main',
+            'fb_tagreads = bioframework.tagreads:main',
             'fb_consensus = bioframework.consensus:main'
         ]
     },

From 09fb5148f48c1ef341b0378409c0c0a854e581e1 Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedrhcwrair001.amed.ds.army.mil>
Date: Thu, 15 Sep 2016 14:24:40 -0400
Subject: [PATCH 05/14] fix so consensus includes reference as argument to
 match Makefile

---
 scripts/consensus.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/consensus.sh b/scripts/consensus.sh
index 263f86c..dbba369 100644
--- a/scripts/consensus.sh
+++ b/scripts/consensus.sh
@@ -1,9 +1,9 @@
-if [ -z  "$1" ]; 
+if [ -z  "$1" ] || [ -z "$2" ]; 
 then 
-    echo "Usage: $(basename $0) <ngsdir>";
+    echo "Usage: $(basename $0) <ngsdir> <reference>";
     exit 1
 fi;
 makefile=/media/VD_Research/Admin/PBS/Software/bioframework/scripts/Makefile
 
 # make -f $(dirname $0)/Makefile NGSDIR=$1
-make -f $makefile NGSDIR=$1
+make -f $makefile NGSDIR=$1 REF=$2

From f2922a2ffc89aea6737d07955e7e1ec5ccf34625 Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedrhcwrair001.amed.ds.army.mil>
Date: Thu, 15 Sep 2016 14:25:15 -0400
Subject: [PATCH 06/14] freebayes consnesus now includes sample name in
 filename

---
 scripts/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/Makefile b/scripts/Makefile
index c025104..7953f0b 100644
--- a/scripts/Makefile
+++ b/scripts/Makefile
@@ -3,7 +3,8 @@ BAM=$(NGSDIR)/$(notdir $(NGSDIR)).bam
 REF=$(NGSDIR)/$(notdir $(NGSDIR)).ref.fasta
 TAGGED=$(NGSDIR)/tagged.bam
 FBVCF=$(NGSDIR)/freebayes.vcf
-FBCONSENSUS=$(NGSDIR)/freebayes_consensus.fasta
+FBCONSENSUS=$(NGSDIR)/$(SAMPLE).freebayes_consensus.fasta
+SAMPLE=$(basename $(NGSDIR))
 
 all: $(FBCONSENSUS)
 

From 8fb4ac62eb3c4bfec327b4d9ca793b52b7dabcca Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedrhcwrair001.amed.ds.army.mil>
Date: Thu, 15 Sep 2016 14:44:06 -0400
Subject: [PATCH 07/14] consensus now accepts samplename argument to prepend to
 fasta IDs

---
 bioframework/consensus.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/bioframework/consensus.py b/bioframework/consensus.py
index f443d03..9b2cbbc 100755
--- a/bioframework/consensus.py
+++ b/bioframework/consensus.py
@@ -1,12 +1,13 @@
 """
 Usage:
-     consensus --ref <ref> --vcf <vcf> [--mind <mind>] [--majority <majority>] [-o <output>]
+     consensus --ref <ref> --vcf <vcf> [--mind <mind>] [--majority <majority>] [-o <output>] [--sample <sample>]
 
 Options:
     --ref=<ref>             Reference fasta file
     --vcf=<vcf>             VCF output
     --majority=<majority>   Percentage required [default: 80]
     --mind=<mind>           minimum depth to call base non-N [default: 10]
+    --sample=<sample>       sample name
     --output,-o=<output>       output file [default: ]
 """
 #stdlib
@@ -26,7 +27,7 @@
 #from toolz import compose
 from toolz.dicttoolz import merge, dissoc, merge_with, valfilter, keyfilter #done
 from docopt import docopt #ignore
-from schema import Schema, Use #ignore
+from schema import Schema, Use, Optional #ignore
 #from contracts import contract, new_contract #can ignore
 #from mypy.types import VCFRow
 #############
@@ -197,8 +198,8 @@ def single_consensus(muts, ref):
 ##########
 # I/O    #
 ##########
-def consensus_str(ref, consensus): # type: (SeqRecord, str) -> str
-    return ">{0}:Consensus\n{1}".format(ref.id, consensus)
+def consensus_str(sample, ref, consensus): # type: (SeqRecord, str) -> str
+    return ">{0}_{1}:Consensus\n{2}".format(sample if sample else '', ref.id, consensus)
 
 #def zero_coverage_positions(bam_file, ref_file): # type: (str, str) -> Iterable[int]
 #    pileup = sh.Command('mpileup')(bam_file, f=ref_file, _iter=True)
@@ -213,14 +214,14 @@ def trim_ref(ref, positions): # type: (str, Iterator[int]) -> str
 
 
 #@contract(ref_fasta=str, vcf=str, mind=int, majority=int)
-def run(ref_fasta, freebayes_vcf, outfile, mind, majority):
+def run(ref_fasta, freebayes_vcf, outfile, mind, majority, sample):
     # type: (str, str, BinaryIO, int, int) -> int
     _refs = SeqIO.parse(ref_fasta, 'fasta')
     with open(freebayes_vcf, 'r') as vcf_handle:
         _muts = map(flatten_vcf_record, vcf.Reader(vcf_handle))
         refs, muts = list(_refs), list(_muts)
         the_refs, seqs_and_muts = all_consensuses(refs, muts, mind, majority)
-        strings = imap(consensus_str, the_refs, imap(get(0), seqs_and_muts))
+        strings = imap(partial(consensus_str, sample), the_refs, imap(get(0), seqs_and_muts))
         result = '\n'.join(strings)
         outfile.write(result)
         outfile.close()
@@ -230,13 +231,14 @@ def main(): # type: () -> None
     scheme = Schema(
         { '--vcf' : os.path.isfile,
           '--ref' : os.path.isfile,
+         Optional('--sample') : lambda x: True,
           '--majority' : Use(int),
           '--mind' : Use(int),
           '--output' : Use(lambda x: sys.stdout if not x else open(x, 'w'))})
     raw_args = docopt(__doc__, version='Version 1.0')
     args = scheme.validate(raw_args)
     run(args['--ref'], args['--vcf'], args['--output'],
-        args['--mind'], args['--output'])
+        args['--mind'], args['--output'], args['--sample'])
 
 if __name__ == '__main__':
     main()

From 65abc69d39bfc6acc079e8bf009ccb5a9b9600e5 Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedrhcwrair001.amed.ds.army.mil>
Date: Thu, 15 Sep 2016 14:44:30 -0400
Subject: [PATCH 08/14] Makefile now uses consensus.py sample argument

---
 scripts/Makefile | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/Makefile b/scripts/Makefile
index 7953f0b..3ac1f02 100644
--- a/scripts/Makefile
+++ b/scripts/Makefile
@@ -1,10 +1,10 @@
 
-BAM=$(NGSDIR)/$(notdir $(NGSDIR)).bam
-REF=$(NGSDIR)/$(notdir $(NGSDIR)).ref.fasta
+SAMPLE=$(notdir $(NGSDIR))
+BAM=$(NGSDIR)/$(SAMPLE).bam
+REF=$(NGSDIR)/$(SAMPLE).ref.fasta
 TAGGED=$(NGSDIR)/tagged.bam
 FBVCF=$(NGSDIR)/freebayes.vcf
 FBCONSENSUS=$(NGSDIR)/$(SAMPLE).freebayes_consensus.fasta
-SAMPLE=$(basename $(NGSDIR))
 
 all: $(FBCONSENSUS)
 
@@ -18,5 +18,5 @@ $(FBVCF): $(TAGGED) $(NGSDIR)/tagged.bam.bai $(REF)
 	freebayes -f $(REF) $< > $@
 
 $(FBCONSENSUS):  $(FBVCF) $(REF)
-	fb_consensus --vcf $< --ref $(REF) > $@
+	fb_consensus --vcf $< --ref $(REF) --sample $(SAMPLE) > $@
 #cd ~/bioframes && python bioframes/consensus.py $^ > $@

From d414a08fd84a3196f9b6e86620e4818774aa1f4f Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedrhcwrair001.amed.ds.army.mil>
Date: Thu, 15 Sep 2016 15:47:30 -0400
Subject: [PATCH 09/14] functionality for coverage checking

---
 bioframework/consensus.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/bioframework/consensus.py b/bioframework/consensus.py
index 9b2cbbc..b6ecf5e 100755
--- a/bioframework/consensus.py
+++ b/bioframework/consensus.py
@@ -28,6 +28,7 @@
 from toolz.dicttoolz import merge, dissoc, merge_with, valfilter, keyfilter #done
 from docopt import docopt #ignore
 from schema import Schema, Use, Optional #ignore
+from plumbum.cmd import samtools
 #from contracts import contract, new_contract #can ignore
 #from mypy.types import VCFRow
 #############
@@ -211,6 +212,16 @@ def trim_ref(ref, positions): # type: (str, Iterator[int]) -> str
     start, end = next(positions), collections.deque(positions, 1)[0]
     return '-'*start + ref[:start:end] + '-'*(len(ref) - end)
 
+def samtoolsDepth(bam):
+    lines = samtools['depth'][bam]().split('\n')
+    lines = map(str.split, lines)
+    stats = map(lambda x: { 'pos' : int(x[1]), 'depth' : int(x[2]) }, lines)
+    return stats
+
+def uncoveredPositions(reqDepth, bam):
+    depthStats = samtoolsDepth(bam)
+    underStats = filter(lambda x: x['depth'] < reqDepth, depthStats)
+    return map(lambda x: x['pos'], underStats)
 
 
 #@contract(ref_fasta=str, vcf=str, mind=int, majority=int)

From 58de882683d24f6cf21ae95febba4e0814ceaf6a Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedrhcwrair001.amed.ds.army.mil>
Date: Fri, 16 Sep 2016 16:43:45 -0400
Subject: [PATCH 10/14] consensus now considers coverage

---
 bioframework/consensus.py | 62 ++++++++++++++++++++++++++++++++-------
 setup.py                  |  3 +-
 2 files changed, 53 insertions(+), 12 deletions(-)

diff --git a/bioframework/consensus.py b/bioframework/consensus.py
index b6ecf5e..60eb541 100755
--- a/bioframework/consensus.py
+++ b/bioframework/consensus.py
@@ -1,6 +1,6 @@
 """
 Usage:
-     consensus --ref <ref> --vcf <vcf> [--mind <mind>] [--majority <majority>] [-o <output>] [--sample <sample>]
+     consensus --ref <ref> --vcf <vcf> [--mind <mind>] [--majority <majority>] [-o <output>] [--sample <sample>] [--bam <bam>]
 
 Options:
     --ref=<ref>             Reference fasta file
@@ -8,9 +8,15 @@
     --majority=<majority>   Percentage required [default: 80]
     --mind=<mind>           minimum depth to call base non-N [default: 10]
     --sample=<sample>       sample name
-    --output,-o=<output>       output file [default: ]
+    --bam=<bam>             bam file, required to check for coverage
+    --output,-o=<output>    output file [default: ]
 """
 #stdlib
+# fb_consensus --ref
+# /media/VD_Research/Analysis/ProjectBased_Analysis/melanie/share/Issue_11973_freeBayes_MP/Dengue/Issue_12657_80-20/Projects/2195/Den3__Thailand__FJ744727__2001.fasta
+# --bam $dir/tagged.bam --mind 10 --vcf
+# /media/VD_Research/Analysis/ProjectBased_Analysis/melanie/share/Issue_11973_freeBayes_MP/Dengue/Issue_12657_80-20/Projects/2195/freebayes.vcf
+
 from operator import itemgetter as get
 from functools import partial
 from itertools import ifilter, imap, groupby, takewhile, repeat, starmap, izip_longest
@@ -21,6 +27,8 @@
 
 from Bio import SeqIO #done
 from Bio.SeqRecord import SeqRecord #done
+from Bio.Seq import Seq
+
 import vcf #done
 from vcf.model import _Record
 # import sh #todo
@@ -212,25 +220,56 @@ def trim_ref(ref, positions): # type: (str, Iterator[int]) -> str
     start, end = next(positions), collections.deque(positions, 1)[0]
     return '-'*start + ref[:start:end] + '-'*(len(ref) - end)
 
-def samtoolsDepth(bam):
-    lines = samtools['depth'][bam]().split('\n')
-    lines = map(str.split, lines)
+# def samtoolsDepth(bam):
+#     lines = samtools['depth'][bam]().split('\n')
+#     lines = map(str.split, lines)
+#     stats = map(lambda x: { 'pos' : int(x[1]), 'depth' : int(x[2]) }, lines)
+#     return stats
+
+def samtoolsDepth(ref_id, bam):
+    lines = samtools['depth'][bam, '-r', ref_id]().split('\n')
+    lines = filter(lambda x: x.strip(), lines)
+    lines = map(lambda x: x.split('\t'), lines)
     stats = map(lambda x: { 'pos' : int(x[1]), 'depth' : int(x[2]) }, lines)
     return stats
 
-def uncoveredPositions(reqDepth, bam):
-    depthStats = samtoolsDepth(bam)
-    underStats = filter(lambda x: x['depth'] < reqDepth, depthStats)
-    return map(lambda x: x['pos'], underStats)
+#def uncoveredPositions(mind, bam):
+#    depthStats = samtoolsDepth(bam)
+#    underStats = filter(lambda x: x['depth'] < mind, depthStats)
+#    return map(lambda x: x['pos'], underStats)
+
+def uncoveredPositions(mind, bam, ref):
+    depthStats = samtoolsDepth(str(ref.id), bam) # use ref string
+    allPositions = range(1, len(ref.seq)+1)
+    underStats = filter(lambda x: x['depth'] < mind, depthStats)
+    underPositions = map(lambda x: x['pos'], underStats)
+    allDepthPositions = map(lambda x: x['pos'], depthStats)
+    zeroPositions = set(allPositions) - set(allDepthPositions)
+    return set(underPositions) | zeroPositions
+    # return map(lambda x: x['pos'], underStats)
+
+#def addNsAtUncovered(positions, ref):
+#    new_ref = ref
+#    for pos in positions:
+#        new_ref[pos-1] = 'N'
+#    return new_ref
+
 
+def addNsAtUncovered(mind, bam, ref_seq):
+    badPositions = uncoveredPositions(mind, bam, ref_seq)
+    new_ref = list(str(ref_seq.seq))
+    for pos in badPositions:
+        new_ref[pos-1] = 'N'
+    return SeqRecord(seq=Seq(''.join(new_ref)), id=ref_seq.id)
 
 #@contract(ref_fasta=str, vcf=str, mind=int, majority=int)
-def run(ref_fasta, freebayes_vcf, outfile, mind, majority, sample):
+def run(ref_fasta, freebayes_vcf, outfile, mind, majority, sample, bam):
     # type: (str, str, BinaryIO, int, int) -> int
     _refs = SeqIO.parse(ref_fasta, 'fasta')
     with open(freebayes_vcf, 'r') as vcf_handle:
         _muts = map(flatten_vcf_record, vcf.Reader(vcf_handle))
         refs, muts = list(_refs), list(_muts)
+        refs = map(partial(addNsAtUncovered, mind, bam), refs)
         the_refs, seqs_and_muts = all_consensuses(refs, muts, mind, majority)
         strings = imap(partial(consensus_str, sample), the_refs, imap(get(0), seqs_and_muts))
         result = '\n'.join(strings)
@@ -243,13 +282,14 @@ def main(): # type: () -> None
         { '--vcf' : os.path.isfile,
           '--ref' : os.path.isfile,
          Optional('--sample') : lambda x: True,
+         Optional('--bam') : lambda x: True,
           '--majority' : Use(int),
           '--mind' : Use(int),
           '--output' : Use(lambda x: sys.stdout if not x else open(x, 'w'))})
     raw_args = docopt(__doc__, version='Version 1.0')
     args = scheme.validate(raw_args)
     run(args['--ref'], args['--vcf'], args['--output'],
-        args['--mind'], args['--output'], args['--sample'])
+        args['--mind'], args['--output'], args['--sample'], args['--bam'])
 
 if __name__ == '__main__':
     main()
diff --git a/setup.py b/setup.py
index c56d402..aaaac3f 100755
--- a/setup.py
+++ b/setup.py
@@ -32,7 +32,8 @@ def jip_modules(path=bioframework.JIP_PATH):
         'docopt',
         'schema',
         'pyvcf',
-        'typing'
+        'typing',
+        'plumbum'
     ],
     data_files=[
         (join(sys.prefix,'bin'), jip_modules()),

From 2ad9e55af2b29bccf7a4aa0f6d7b2965c374a982 Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedrhcwrair001.amed.ds.army.mil>
Date: Thu, 27 Oct 2016 15:26:32 -0400
Subject: [PATCH 11/14] add minimum basequal parameter

---
 bioframework/consensus.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/bioframework/consensus.py b/bioframework/consensus.py
index 60eb541..7dbcc41 100755
--- a/bioframework/consensus.py
+++ b/bioframework/consensus.py
@@ -238,7 +238,7 @@ def samtoolsDepth(ref_id, bam):
 #    underStats = filter(lambda x: x['depth'] < mind, depthStats)
 #    return map(lambda x: x['pos'], underStats)
 
-def uncoveredPositions(mind, bam, ref):
+def uncoveredPositions(mind, minbq, bam, ref):
     depthStats = samtoolsDepth(str(ref.id), bam) # use ref string
     allPositions = range(1, len(ref.seq)+1)
     underStats = filter(lambda x: x['depth'] < mind, depthStats)
@@ -255,21 +255,21 @@ def uncoveredPositions(mind, bam, ref):
 #    return new_ref
 
 
-def addNsAtUncovered(mind, bam, ref_seq):
-    badPositions = uncoveredPositions(mind, bam, ref_seq)
+def addNsAtUncovered(mind, minbq, bam, ref_seq):
+    badPositions = uncoveredPositions(mind, minbq, bam, ref_seq)
     new_ref = list(str(ref_seq.seq))
     for pos in badPositions:
         new_ref[pos-1] = 'N'
     return SeqRecord(seq=Seq(''.join(new_ref)), id=ref_seq.id)
 
 #@contract(ref_fasta=str, vcf=str, mind=int, majority=int)
-def run(ref_fasta, freebayes_vcf, outfile, mind, majority, sample, bam):
+def run(ref_fasta, freebayes_vcf, outfile, mind, minbq, majority, sample, bam):
     # type: (str, str, BinaryIO, int, int) -> int
     _refs = SeqIO.parse(ref_fasta, 'fasta')
     with open(freebayes_vcf, 'r') as vcf_handle:
         _muts = map(flatten_vcf_record, vcf.Reader(vcf_handle))
         refs, muts = list(_refs), list(_muts)
-        refs = map(partial(addNsAtUncovered, mind, bam), refs)
+        refs = map(partial(addNsAtUncovered, mind, minbq, bam), refs)
         the_refs, seqs_and_muts = all_consensuses(refs, muts, mind, majority)
         strings = imap(partial(consensus_str, sample), the_refs, imap(get(0), seqs_and_muts))
         result = '\n'.join(strings)
@@ -285,11 +285,12 @@ def main(): # type: () -> None
          Optional('--bam') : lambda x: True,
           '--majority' : Use(int),
           '--mind' : Use(int),
+          '--minbq' : Use(int),
           '--output' : Use(lambda x: sys.stdout if not x else open(x, 'w'))})
     raw_args = docopt(__doc__, version='Version 1.0')
     args = scheme.validate(raw_args)
     run(args['--ref'], args['--vcf'], args['--output'],
-        args['--mind'], args['--output'], args['--sample'], args['--bam'])
+        args['--mind'], args['--minbq'], args['--output'], args['--sample'], args['--bam'])
 
 if __name__ == '__main__':
     main()

From 5153042a3e69d4c4991c7af9b9897903633f812f Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedrhcwrair001.amed.ds.army.mil>
Date: Thu, 27 Oct 2016 15:35:29 -0400
Subject: [PATCH 12/14] minbq now filters samtools depth call

---
 bioframework/consensus.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/bioframework/consensus.py b/bioframework/consensus.py
index 7dbcc41..84fdb01 100755
--- a/bioframework/consensus.py
+++ b/bioframework/consensus.py
@@ -7,6 +7,7 @@
     --vcf=<vcf>             VCF output
     --majority=<majority>   Percentage required [default: 80]
     --mind=<mind>           minimum depth to call base non-N [default: 10]
+    --minbq=<mind>          bases below this quality are ignored when computing depth [default: 0]
     --sample=<sample>       sample name
     --bam=<bam>             bam file, required to check for coverage
     --output,-o=<output>    output file [default: ]
@@ -226,8 +227,8 @@ def trim_ref(ref, positions): # type: (str, Iterator[int]) -> str
 #     stats = map(lambda x: { 'pos' : int(x[1]), 'depth' : int(x[2]) }, lines)
 #     return stats
 
-def samtoolsDepth(ref_id, bam):
-    lines = samtools['depth'][bam, '-r', ref_id]().split('\n')
+def samtoolsDepth(ref_id, bam, minbq):
+    lines = samtools['depth'][bam, '-r', ref_id, '-q', minbq]().split('\n')
     lines = filter(lambda x: x.strip(), lines)
     lines = map(lambda x: x.split('\t'), lines)
     stats = map(lambda x: { 'pos' : int(x[1]), 'depth' : int(x[2]) }, lines)
@@ -239,7 +240,7 @@ def samtoolsDepth(ref_id, bam):
 #    return map(lambda x: x['pos'], underStats)
 
 def uncoveredPositions(mind, minbq, bam, ref):
-    depthStats = samtoolsDepth(str(ref.id), bam) # use ref string
+    depthStats = samtoolsDepth(str(ref.id), bam, minbq) # use ref string
     allPositions = range(1, len(ref.seq)+1)
     underStats = filter(lambda x: x['depth'] < mind, depthStats)
     underPositions = map(lambda x: x['pos'], underStats)

From 8c09f79dfc0092a2b2e84f4ce0c721a4c3a29070 Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedrhcwrair001.amed.ds.army.mil>
Date: Thu, 27 Oct 2016 16:29:26 -0400
Subject: [PATCH 13/14] fixed missing minbq in cmd args doc

---
 bioframework/consensus.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bioframework/consensus.py b/bioframework/consensus.py
index 84fdb01..746e915 100755
--- a/bioframework/consensus.py
+++ b/bioframework/consensus.py
@@ -1,6 +1,6 @@
 """
 Usage:
-     consensus --ref <ref> --vcf <vcf> [--mind <mind>] [--majority <majority>] [-o <output>] [--sample <sample>] [--bam <bam>]
+     consensus --ref <ref> --vcf <vcf> [--mind <mind>] [--majority <majority>] [-o <output>] [--sample <sample>] [--bam <bam>] [--minbq <minbq>]
 
 Options:
     --ref=<ref>             Reference fasta file

From 720dd6be628d159062bd3b0c230ee7137263ff02 Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedrhcwrair001.amed.ds.army.mil>
Date: Thu, 27 Oct 2016 16:29:46 -0400
Subject: [PATCH 14/14] Makefile now uses minbq and mind.

---
 scripts/Makefile | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/scripts/Makefile b/scripts/Makefile
index 3ac1f02..1f85921 100644
--- a/scripts/Makefile
+++ b/scripts/Makefile
@@ -5,6 +5,8 @@ REF=$(NGSDIR)/$(SAMPLE).ref.fasta
 TAGGED=$(NGSDIR)/tagged.bam
 FBVCF=$(NGSDIR)/freebayes.vcf
 FBCONSENSUS=$(NGSDIR)/$(SAMPLE).freebayes_consensus.fasta
+# MIND=0
+# MINBQ=0
 
 all: $(FBCONSENSUS)
 
@@ -18,5 +20,6 @@ $(FBVCF): $(TAGGED) $(NGSDIR)/tagged.bam.bai $(REF)
 	freebayes -f $(REF) $< > $@
 
 $(FBCONSENSUS):  $(FBVCF) $(REF)
-	fb_consensus --vcf $< --ref $(REF) --sample $(SAMPLE) > $@
+	#fb_consensus --vcf $< --ref $(REF) --sample $(SAMPLE) > $@
+	fb_consensus --vcf $< --ref $(REF) --sample $(SAMPLE) --bam $(TAGGED) --mind $(MIND) --minbq $(MINBQ) > $@
 #cd ~/bioframes && python bioframes/consensus.py $^ > $@