From 6c66ad1a9b129d4e5b0895940fbb6074174b11a1 Mon Sep 17 00:00:00 2001
From: averagehat <michael.panciera.work@gmail.com>
Date: Wed, 10 Feb 2016 12:25:46 -0500
Subject: [PATCH 01/41] added tagreads (tags all reads, doesn't skip any)

---
 bioframework/tagreads.py | 186 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 186 insertions(+)
 create mode 100644 bioframework/tagreads.py

diff --git a/bioframework/tagreads.py b/bioframework/tagreads.py
new file mode 100644
index 0000000..d717080
--- /dev/null
+++ b/bioframework/tagreads.py
@@ -0,0 +1,186 @@
+import argparse
+import sys
+import samtools
+import re
+import os.path
+#from ngs_mapper.bam import sortbam, indexbam
+#import shutil
+
+import log
+logger = log.setup_logger('tagreads',log.get_config())
+
+# Exception for when headers exist
+class HeaderExists(Exception): pass
+
+# The next 3 tuples have to be the same length and each index in each is related the same index in each tuple
+# AKA zip( IDS, PLATFORMS, ID_MAP ) should work as expected
+# Read group ID list
+IDS = ('Roche454', 'IonTorrent', 'MiSeq', 'Sanger')
+# Valid platforms for read groups
+PLATFORMS = ('L454', 'IONTORRENT', 'ILLUMINA', 'CAPILLARY')
+# Read name map to ID name
+ID_MAP = (
+    re.compile( '[0-9A-Z]{14}' ),
+    re.compile( '[A-Z0-9]{5}:\d{1,}:\d{1,}' ),
+    re.compile( 'M[0-9]{5}:\d+:\d{9}-[A-Z0-9]{5}:\d:\d{4}:\d{4,5}:\d{4,5}' ),
+    re.compile( '.*' )
+)
+# Read Group Template
+RG_TEMPLATE = {
+    'SM': None,
+    'ID': None,
+    'PL': None,
+    'CN': None
+}
+
+def main():
+    args = parse_args()
+    for bam in args.bamfiles:
+        tag_bam( bam, args.SM, args.CN )
+
+def tag_bam( bam, SM, CN ):
+    logger.info( "Gathering existing header for {0}".format(bam) )
+    hdr = get_rg_headers( bam, SM, CN )
+    tag_reads( bam, hdr )
+
+def tag_read( untagged_read, tags ):
+    '''
+        Tags a given samtools.SamRow with various tags
+        Does not replace existing tags
+
+        @param untagged_read - samtools.SamRow
+        @param tags - List of valid samspec optional field strings(aka ['RG:Z:Value'])
+
+        @returns a tagged read
+    '''
+    #if untagged_read.FLAG >= 2048:
+    #    # Skip supplementary
+    #    logger.debug( "Skipping read {0} because it is supplementary".format(untagged_read.QNAME) )
+    #    return untagged_read
+    # Append the new tags
+    if untagged_read._tags and untagged_read._tags[-1] != '\t':
+        untagged_read._tags += '\t'
+    # Append the tags that do not already exist
+    # tag+'\t' is used to make it an exact match
+    tags = [tag for tag in tags if tag+'\t' not in untagged_read._tags]
+    untagged_read._tags += '\t'.join( tags )
+    # Remove trailing tab if all the tags were duplicate
+    untagged_read._tags = untagged_read._tags.rstrip()
+    # Return the tagged read
+    return untagged_read
+
+def tag_readgroup( read ):
+    '''
+        Tags a given read with the readgroup that the qname
+        belongs to depending on how it maps to ID_MAP
+
+        @param read - samtools.SamRow
+
+        @returns SamRow that is tagged with the appropriate read group
+    '''
+    rg = get_rg_for_read( read )
+    #logger.debug( "Tagging {0} with Read group {0}".format(read.qname,rg) )
+    return tag_read( read, ['RG:Z:'+rg] )
+
+def tag_reads( bam, hdr ):
+    '''
+        Sets header of bam and tags all reads appropriately for each platform
+        Overwrites existing header
+
+        @param bam - Bam file to tag reads in
+        @param hdr - Header string to set in the bam(needs newline at the end)
+    '''
+    # Open the existing bam to fetch the reads to modify from
+    untagged_bam = samtools.view( bam )
+    # Open a file to write the sam output to with the tagged reads
+    samf = bam.replace('.bam','.sam')
+    with open( samf, 'w' ) as sam:
+        # Write the hdr to the file first
+        sam.write( hdr )
+        # Tag the reads
+        logger.info( "Tagging reads for {0}".format(bam) )
+        for read in untagged_bam:
+            samrow = samtools.SamRow(read)
+            read = tag_readgroup( samrow )
+            sam.write( str(read) + '\n' )
+    # Close stdout
+    untagged_bam.close()
+    logger.info( "Finished tagging reads for {0}".format(bam) )
+    logger.info( "Sorting {0}".format(bam) )
+#    b = samtools.view( samf, h=True, S=True, b=True )
+#    sortbam( b, bam )
+#    # Close the fh
+#    b.close()
+#    # Remove temp sam file
+#    # maybe some day could even just use pipes all the way through :)
+#    os.unlink( samf )
+#    logger.info( "Indexing {0}".format(bam) )
+#    indexbam( bam )
+
+def get_rg_for_read( aread ):
+    ''' Gets the read group name for the given samtools.SamRow '''
+    rname = aread.QNAME
+    for i, p in enumerate( ID_MAP ):
+        if p.match( rname ):
+            return IDS[i]
+    raise ValueError( "{0} is from an unknown platform and cannot be tagged".format(rname) )
+
+def get_rg_headers( bam, SM=None, CN=None ):
+    old_header = get_bam_header( bam ) + '\n'
+
+    for id, pl in zip( IDS, PLATFORMS ):
+        # Skip headers that exist already
+        if 'ID:{0}\t'.format(id) in old_header:
+            continue
+
+        rg = '@RG\tID:{0}\tSM:{1}\t'
+        if SM is None:
+            SM = os.path.basename(bam).replace( '.bam', '' )
+
+        if CN is not None:
+            rg += 'CN:{2}\tPL:{3}'
+            old_header += rg.format( id, SM, CN, pl ) + '\n'
+        else:
+            rg += 'PL:{2}'
+            old_header += rg.format( id, SM, pl ) + '\n'
+
+    return old_header
+
+def get_bam_header( bam ):
+    r = samtools.view( bam, H=True )
+    hdr = r.read()
+    r.close()
+    return hdr.rstrip()
+
+def parse_args( args=sys.argv[1:] ):
+    from ngs_mapper import config
+    conf_parser, args, config, configfile = config.get_config_argparse(args)
+    defaults = config['tagreads']
+
+    parser = argparse.ArgumentParser(
+        description='''Adds Sanger, MiSeq, 454 and IonTorrent read groups to a bam file.
+        Will tag all reads in the alignment for each of the platforms based on the read name''',
+        parents=[conf_parser]
+    )
+
+    parser.add_argument(
+        dest='bamfiles',
+        nargs='+',
+        help='Bamfile to add read groups to for platforms'
+    )
+
+    parser.add_argument(
+        '-SM',
+        dest='SM',
+        default=defaults['SM']['default'],
+        help=defaults['SM']['help']
+    )
+
+    parser.add_argument(
+        '-CN',
+        dest='CN',
+        default=defaults['CN']['default'],
+        help=defaults['CN']['help']
+    )
+
+    return parser.parse_args( args )

From ed0ac2b7db840cbffcbfbce90f5239691127c8cb Mon Sep 17 00:00:00 2001
From: averagehat <michael.panciera.work@gmail.com>
Date: Thu, 11 Feb 2016 10:49:16 -0500
Subject: [PATCH 02/41] added tagreads

---
 bioframework/tagreads.py | 5 +++++
 jip_modules/tag_bam.jip  | 9 +++++++++
 2 files changed, 14 insertions(+)
 create mode 100644 jip_modules/tag_bam.jip

diff --git a/bioframework/tagreads.py b/bioframework/tagreads.py
index d717080..6d285ed 100644
--- a/bioframework/tagreads.py
+++ b/bioframework/tagreads.py
@@ -183,4 +183,9 @@ def parse_args( args=sys.argv[1:] ):
         help=defaults['CN']['help']
     )
 
+    parser.add_argument(
+        '--output',
+        '-o',
+        dest='output'
+    )
     return parser.parse_args( args )
diff --git a/jip_modules/tag_bam.jip b/jip_modules/tag_bam.jip
new file mode 100644
index 0000000..f05a72f
--- /dev/null
+++ b/jip_modules/tag_bam.jip
@@ -0,0 +1,9 @@
+#!/usr/bin/env jip
+# Usage: 
+# tagreads [<input>] [-h] [--config CONFIG] [--version] [-SM <SM>] [-CN <CN>] [-o <output>]
+# Options:
+#   -SM <SM>  A tag
+#   -CN <CN>  Another tag
+#   -o, --output=<output>       Output fastq. If input was interleave, output will be as well
+
+tagreads ${input|else('/dev/stdin')} ${SM|arg} ${CN|arg} ${output|else('/dev/stdout')}

From 4aa14ca20ab4d85a2b53a0922fc91f421532b65e Mon Sep 17 00:00:00 2001
From: averagehat <michael.panciera.work@gmail.com>
Date: Thu, 11 Feb 2016 11:35:16 -0500
Subject: [PATCH 03/41] tag_bam now sorts and indexes afterwards

---
 jip_modules/tag_bam.jip | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/jip_modules/tag_bam.jip b/jip_modules/tag_bam.jip
index f05a72f..2857ef2 100644
--- a/jip_modules/tag_bam.jip
+++ b/jip_modules/tag_bam.jip
@@ -6,4 +6,7 @@
 #   -CN <CN>  Another tag
 #   -o, --output=<output>       Output fastq. If input was interleave, output will be as well
 
-tagreads ${input|else('/dev/stdin')} ${SM|arg} ${CN|arg} ${output|else('/dev/stdout')}
+#tagreads ${input|else('/dev/stdin')} ${SM|arg} ${CN|arg} ${output|else('/dev/stdout')}
+
+tagreads ${input|else('/dev/stdin')} ${SM|arg} ${CN|arg}  |\
+samtools view -Sbh - | samtools sort - ${output} | samtools index ${output} 

From 4d46b24a0464139bcf52231e7320fc484ee00083 Mon Sep 17 00:00:00 2001
From: averagehat <michael.panciera.work@gmail.com>
Date: Thu, 11 Feb 2016 11:42:49 -0500
Subject: [PATCH 04/41] added freebayes.jip

---
 jip_modules/freebayes.jip | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 jip_modules/freebayes.jip

diff --git a/jip_modules/freebayes.jip b/jip_modules/freebayes.jip
new file mode 100644
index 0000000..5159839
--- /dev/null
+++ b/jip_modules/freebayes.jip
@@ -0,0 +1,16 @@
+
+#!/usr/bin/env jip
+#
+# run freebayes
+#
+# Usage:
+#   run_freebayes [<input>] -f <reference> [-o <output>]
+#
+# Options:
+#   -o, --output=<output>       Output fastq. If input was interleave, output will be as well
+#   -f,                         The reference
+#                               the input is assumed to be unpaired fastq
+# Help:
+#    Requires a bam file (<input>) and a reference (-f)
+
+freebayes ${input|else("-")} ${reference|arg} ${output|arg(">")}

From 4c9933917ddb30955255a28de7f9ecab5ec42d73 Mon Sep 17 00:00:00 2001
From: averagehat <michael.panciera.work@gmail.com>
Date: Thu, 11 Feb 2016 11:55:20 -0500
Subject: [PATCH 05/41] tagreads now writes to separate file or stdout

---
 bioframework/tagreads.py | 45 +++++++++++++++++++++-------------------
 1 file changed, 24 insertions(+), 21 deletions(-)

diff --git a/bioframework/tagreads.py b/bioframework/tagreads.py
index 6d285ed..1f8b15a 100644
--- a/bioframework/tagreads.py
+++ b/bioframework/tagreads.py
@@ -6,8 +6,8 @@
 #from ngs_mapper.bam import sortbam, indexbam
 #import shutil
 
-import log
-logger = log.setup_logger('tagreads',log.get_config())
+#import log
+#logger = log.setup_logger('tagreads',log.get_config())
 
 # Exception for when headers exist
 class HeaderExists(Exception): pass
@@ -36,12 +36,12 @@ class HeaderExists(Exception): pass
 def main():
     args = parse_args()
     for bam in args.bamfiles:
-        tag_bam( bam, args.SM, args.CN )
+        tag_bam( bam, args.SM, args.CN, args.output )
 
-def tag_bam( bam, SM, CN ):
-    logger.info( "Gathering existing header for {0}".format(bam) )
+def tag_bam( bam, SM, CN, output ):
+    #logger.info( "Gathering existing header for {0}".format(bam) )
     hdr = get_rg_headers( bam, SM, CN )
-    tag_reads( bam, hdr )
+    tag_reads( bam, hdr, output )
 
 def tag_read( untagged_read, tags ):
     '''
@@ -55,7 +55,7 @@ def tag_read( untagged_read, tags ):
     '''
     #if untagged_read.FLAG >= 2048:
     #    # Skip supplementary
-    #    logger.debug( "Skipping read {0} because it is supplementary".format(untagged_read.QNAME) )
+    #    #logger.debug( "Skipping read {0} because it is supplementary".format(untagged_read.QNAME) )
     #    return untagged_read
     # Append the new tags
     if untagged_read._tags and untagged_read._tags[-1] != '\t':
@@ -79,10 +79,10 @@ def tag_readgroup( read ):
         @returns SamRow that is tagged with the appropriate read group
     '''
     rg = get_rg_for_read( read )
-    #logger.debug( "Tagging {0} with Read group {0}".format(read.qname,rg) )
+    ##logger.debug( "Tagging {0} with Read group {0}".format(read.qname,rg) )
     return tag_read( read, ['RG:Z:'+rg] )
 
-def tag_reads( bam, hdr ):
+def tag_reads( bam, hdr, output ):
     '''
         Sets header of bam and tags all reads appropriately for each platform
         Overwrites existing header
@@ -93,20 +93,21 @@ def tag_reads( bam, hdr ):
     # Open the existing bam to fetch the reads to modify from
     untagged_bam = samtools.view( bam )
     # Open a file to write the sam output to with the tagged reads
-    samf = bam.replace('.bam','.sam')
-    with open( samf, 'w' ) as sam:
+    if output: sam = open(output, 'w')
+    else: sam = sys.stdout
         # Write the hdr to the file first
-        sam.write( hdr )
-        # Tag the reads
-        logger.info( "Tagging reads for {0}".format(bam) )
-        for read in untagged_bam:
-            samrow = samtools.SamRow(read)
-            read = tag_readgroup( samrow )
-            sam.write( str(read) + '\n' )
+    sam.write( hdr )
+    # Tag the reads
+    #logger.info( "Tagging reads for {0}".format(bam) )
+    for read in untagged_bam:
+        samrow = samtools.SamRow(read)
+        read = tag_readgroup( samrow )
+        sam.write( str(read) + '\n' )
     # Close stdout
     untagged_bam.close()
-    logger.info( "Finished tagging reads for {0}".format(bam) )
-    logger.info( "Sorting {0}".format(bam) )
+    sam.close()
+    #logger.info( "Finished tagging reads for {0}".format(bam) )
+    #logger.info( "Sorting {0}".format(bam) )
 #    b = samtools.view( samf, h=True, S=True, b=True )
 #    sortbam( b, bam )
 #    # Close the fh
@@ -114,7 +115,7 @@ def tag_reads( bam, hdr ):
 #    # Remove temp sam file
 #    # maybe some day could even just use pipes all the way through :)
 #    os.unlink( samf )
-#    logger.info( "Indexing {0}".format(bam) )
+#    #logger.info( "Indexing {0}".format(bam) )
 #    indexbam( bam )
 
 def get_rg_for_read( aread ):
@@ -189,3 +190,5 @@ def parse_args( args=sys.argv[1:] ):
         dest='output'
     )
     return parser.parse_args( args )
+
+if __name__ == '__main__': main()

From 6244e292e2f1affc06a433932a7d777cbd70f986 Mon Sep 17 00:00:00 2001
From: averagehat <michael.panciera.work@gmail.com>
Date: Thu, 11 Feb 2016 12:58:22 -0500
Subject: [PATCH 06/41] foo

---
 bioframework/tagreads.py |  3 ++-
 jip_modules/tag_bam.jip  | 18 ++++++++++++------
 2 files changed, 14 insertions(+), 7 deletions(-)
 mode change 100644 => 100755 jip_modules/tag_bam.jip

diff --git a/bioframework/tagreads.py b/bioframework/tagreads.py
index 1f8b15a..42fa565 100644
--- a/bioframework/tagreads.py
+++ b/bioframework/tagreads.py
@@ -5,7 +5,8 @@
 import os.path
 #from ngs_mapper.bam import sortbam, indexbam
 #import shutil
-
+from signal import signal, SIGPIPE, SIG_DFL
+signal(SIGPIPE,SIG_DFL) #http://stackoverflow.com/questions/14207708/ioerror-errno-32-broken-pipe-python
 #import log
 #logger = log.setup_logger('tagreads',log.get_config())
 
diff --git a/jip_modules/tag_bam.jip b/jip_modules/tag_bam.jip
old mode 100644
new mode 100755
index 2857ef2..1c988ae
--- a/jip_modules/tag_bam.jip
+++ b/jip_modules/tag_bam.jip
@@ -1,12 +1,18 @@
 #!/usr/bin/env jip
 # Usage: 
-# tagreads [<input>] [-h] [--config CONFIG] [--version] [-SM <SM>] [-CN <CN>] [-o <output>]
+#   tag_bam [<input>] [--config CONFIG] [--version] [--SM <SM>] [--CN <CN>] -o <output>
+#
 # Options:
-#   -SM <SM>  A tag
-#   -CN <CN>  Another tag
+#   --SM <SM>  A tag
+#   --CN <CN>  Another tag
 #   -o, --output=<output>       Output fastq. If input was interleave, output will be as well
 
-#tagreads ${input|else('/dev/stdin')} ${SM|arg} ${CN|arg} ${output|else('/dev/stdout')}
 
-tagreads ${input|else('/dev/stdin')} ${SM|arg} ${CN|arg}  |\
-samtools view -Sbh - | samtools sort - ${output} | samtools index ${output} 
+#%begin command bash
+#
+python bioframework/tagreads.py ${input|else('/dev/stdin')} ${SM|arg('-SM')} ${CN|arg('-CN')}  | samtools view  - -Sbh | samtools sort - ${output} 
+samtools index ${output}.bam
+
+#%end
+
+

From eb79428696c39756c4d3316384273f566b777fbe Mon Sep 17 00:00:00 2001
From: averagehat <michael.panciera.work@gmail.com>
Date: Thu, 11 Feb 2016 12:58:34 -0500
Subject: [PATCH 07/41] fixing freebayes

---
 jip_modules/freebayes.jip | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 jip_modules/freebayes.jip

diff --git a/jip_modules/freebayes.jip b/jip_modules/freebayes.jip
old mode 100644
new mode 100755

From 44fbf7439a4a3637dc894c8d9d24a7d5a31db991 Mon Sep 17 00:00:00 2001
From: averagehat <michael.panciera.work@gmail.com>
Date: Thu, 11 Feb 2016 15:24:55 -0500
Subject: [PATCH 08/41] added bioframes dependency from git url

---
 .travis.yml      | 1 +
 requirements.txt | 1 +
 2 files changed, 2 insertions(+)
 create mode 100644 requirements.txt

diff --git a/.travis.yml b/.travis.yml
index d759796..81c8819 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -23,6 +23,7 @@ install:
   - conda config --add channels bioconda
   - conda install --file requirements-conda.txt
   - pip install -r tests/requirements-dev.txt
+  - pip install -r requirements.txt
   - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install unittest2; fi
   - if [[ $TRAVIS_PYTHON_VERSION == '3.4' ]]; then pip install robotframework-python3; else pip install robotframework; fi
   - python setup.py install
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..676cf9f
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
+git+https://github.com/VDBWRAIR/bioframes.git

From 2d7c592cdc51d8c5516d0281b89aecb5046983bc Mon Sep 17 00:00:00 2001
From: averagehat <michael.panciera.work@gmail.com>
Date: Thu, 11 Feb 2016 16:07:27 -0500
Subject: [PATCH 09/41] added jip consensus

---
 jip_modules/consensus.jip | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 jip_modules/consensus.jip

diff --git a/jip_modules/consensus.jip b/jip_modules/consensus.jip
new file mode 100644
index 0000000..0ceba21
--- /dev/null
+++ b/jip_modules/consensus.jip
@@ -0,0 +1,19 @@
+#!/usr/bin/env jip
+#
+# Usage:  
+#    index_filter_quality <input> -r <reference> -v <vcf> [-o <output>] --minimum <minimum>
+#
+# Options:
+#  -o, --output=<output>     Output files [default: stdout]
+#  -r, --reference=<reference>  Reference fasta
+#  -v, --vcf=<vcf>   VCF file generated by freebayes
+
+
+#%begin command python
+
+from bioframes import consensus
+result = consensus.make_consensus("${bam}", "${ref}", "${vcf}")
+with open("${output}", 'w') as out:
+    out.write(result)
+
+#%end

From e21e14d8cab45b496af600b161ef616234ae5fc4 Mon Sep 17 00:00:00 2001
From: averagehat <michael.panciera.work@gmail.com>
Date: Fri, 12 Feb 2016 16:11:11 -0500
Subject: [PATCH 10/41] added new implementation of consensus

---
 bioframework/consensus.py | 131 ++++++++++++++++++++++++++++++++++++++
 jip_modules/consensus.jip |   6 +-
 requirements.txt          |   4 +-
 3 files changed, 136 insertions(+), 5 deletions(-)
 create mode 100644 bioframework/consensus.py

diff --git a/bioframework/consensus.py b/bioframework/consensus.py
new file mode 100644
index 0000000..a6b3c4d
--- /dev/null
+++ b/bioframework/consensus.py
@@ -0,0 +1,131 @@
+from toolz.itertoolz import first, rest, peek
+from operator import itemgetter as get
+from toolz import compose, curry
+from functools import partial
+from toolz.dicttoolz import merge
+from itertools import ifilter, imap, groupby, takewhile
+from contracts import contract
+from Bio import SeqIO, SeqRecord
+import vcf
+
+#############
+# Constants #
+#############
+
+AMBIGUITY_TABLE = { 'A': 'A', 'T': 'T', 'G': 'G', 'C': 'C', 'N': 'N',
+                       'AC': 'M', 'AG': 'R', 'AT': 'W', 'CG': 'S', 'CT':
+                       'Y', 'GT': 'K', 'ACG': 'V', 'ACT': 'H', 'AGT': 'D',
+                       'CGT': 'B', 'ACGT': 'N' }
+
+MAJORITY_PERCENTAGE = 80
+MIN_DEPTH = 10
+
+########################
+# Function combintaors #
+########################
+
+''' just a way to compose filters and mapping functions together '''
+def compose_transormer(transformer, combinator, *funcs):
+    return partial(transformer, reduce(combinator, funcs))
+_and = lambda f,g: lambda x: f(x) and g(x)
+compose_filters = partial(compose_transormer, ifilter, _and)
+compose_mappers = partial(compose_transormer, imap, compose)
+
+
+###########
+# Reducer #
+###########
+@contract(reference=str, muts='seq(tuple(str, str, int))'   )
+def make_consensus(reference, muts):
+    ''' Actually builds a consensus string by recursively applying
+          the mutations.'''
+    def _do_build((accString, string, lastPos), (x, y, bigPos)):
+        pos = bigPos - lastPos
+        return (accString + (string[:pos] + y), string[pos+len(x):],  bigPos+len(x))
+    return reduce(_do_build, muts, ('', reference, 0))
+
+
+##############
+#  Mappers   #
+##############
+@contract(min_depth=int, majoirty_percentage=int, rec=dict, returns='tuple(str, str, int)')
+def call_base(min_depth, majoirty_percentage, rec):
+    '''if a base is under the min depth, it gets called as an `N`.
+    If the alternate base is >= `majoirty_percentage`, it will be
+    converted to an ambiguous base if it includes multiple bases.
+    Otherwise, the reference base replaces itself (gets ignored).
+    Returns a tuple of the form:
+        (referene_base, alternate_base, position)
+        where reference_base == alternate_base if the reference was preferred.'''
+    get_ambiguous = compose(AMBIGUITY_TABLE.__getitem__, ''.join, sorted)
+    if rec['DP'] < min_depth: alt = 'N'# (rec['ref'], 'N', rec['POS'])
+    elif alt_over_percent(majoirty_percentage)(rec):
+        alt = get_ambiguous(rec['alt'])
+    else: alt = rec['ref']
+    return (rec['ref'], alt, rec['pos'])
+
+def flatten_vcf_record(rec):
+    return merge({
+  'alt' : rec.ALT, 'ref' : rec.REF,
+  'pos' : rec.POS, 'chrom' : rec.CHROM},
+        rec.INFO)
+
+##############
+#  Filters   #
+##############
+ref_and_alt_differ = lambda x: x[0] != x[1]
+alt_over_percent =  lambda n: lambda rec: (n/float(100)) > rec['AO']/float(rec['DP'])
+
+
+##############
+# Group By   #
+##############
+def group_muts_by_refs(references, muts):
+    '''group and sort the mutations so that they match the order of the references.'''
+    mut_groups = groupby(muts, get('CHROM'))
+    def index_of_ref(key):
+        return sum(1 for _ in takewhile(lambda x: x.id != key, references))
+    muts_by_ref = sorted(mut_groups, key=index_of_ref)
+    return muts_by_ref
+
+
+
+###############
+# Runner      #
+###############
+@contract(references='SeqRecord', muts='seq(dict)', mind=int, majority=int)
+def all_consensuses(references, muts, mind, majority):
+    ''' generates conesnsuses, including for flu and other mult-reference VCFs.
+    applies filters and base callers to the mutations.
+    then builds the consensus using these calls and `make_consensus`'''
+    mappers = [partial(call_base, mind, majority)]
+    filters = [ref_and_alt_differ]
+    muts_by_ref = group_muts_by_refs(references, muts)
+    muts = imap(get(0), muts_by_ref)
+    def single_ref_consensus(recs, ref):
+        fix_recs = compose(compose_filters(filters), compose_mappers(mappers))
+        ref_str = str(ref.seq)
+        return make_consensus(ref_str, fix_recs(recs))
+    return references, imap(single_ref_consensus, muts, references)
+
+
+
+##########
+# I/O    #
+##########
+
+def consensus_str(ref, consensus):
+    return ">{0}:Consensus\n{1}".format(ref.id, consensus)
+
+make_fasta_str = compose('\n'.join,
+                         partial(imap, consensus_str), all_consensuses)
+
+@contract(ref_fasta=str, vcf=str, mind=int, majority=int)
+def run(ref_fasta, freebayes_vcf, outpath, mind, majority):
+    refs = SeqIO.parse(ref_fasta, 'fasta')
+    with open(freebayes_vcf, 'r') as vcf_handle, open(outpath, 'w') as out:
+        muts = imap(flatten_vcf_record, vcf.Reader(vcf_handle))
+        result = make_fasta_str(refs, muts, mind, majority)
+        out.write(result)
+    return 0
+
diff --git a/jip_modules/consensus.jip b/jip_modules/consensus.jip
index 0ceba21..c2ad138 100644
--- a/jip_modules/consensus.jip
+++ b/jip_modules/consensus.jip
@@ -11,9 +11,7 @@
 
 #%begin command python
 
-from bioframes import consensus
-result = consensus.make_consensus("${bam}", "${ref}", "${vcf}")
-with open("${output}", 'w') as out:
-    out.write(result)
+from bioframework import consensus
+consens.run("${bam}", "${ref}", "${vcf}", "${out}")
 
 #%end
diff --git a/requirements.txt b/requirements.txt
index 676cf9f..f0346dc 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1,3 @@
-git+https://github.com/VDBWRAIR/bioframes.git
+contracts
+toolz
+pyvcf

From 44be3d0ae77d79bcde0060756808077497af70dc Mon Sep 17 00:00:00 2001
From: michaelpanciera <michael.panciera.work@gmail.com>
Date: Mon, 22 Feb 2016 17:01:31 -0500
Subject: [PATCH 11/41] update

---
 bioframework/consensus.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/bioframework/consensus.py b/bioframework/consensus.py
index a6b3c4d..2c365a9 100644
--- a/bioframework/consensus.py
+++ b/bioframework/consensus.py
@@ -48,6 +48,28 @@ def _do_build((accString, string, lastPos), (x, y, bigPos)):
 ##############
 #  Mappers   #
 ##############
+
+#when there are multiple alts, zip through with each of them
+#zip(*alts), character by character. compare the percentages, and
+#sum the percentages for each base. (groupby, sum) pick each character (call each base) based on the given rules (using call_base).
+def call_many(min_depth, majoirty_percentage, rec):
+    if hasattr(rec['alt'], '__iter__'):
+        fields = ['AO', 'DP', 'alt']
+        vals = zip(*get(*fields)(rec))
+        muts = map(lambda items: dict(zip(fields, items)), vals)
+        def extract(rec):
+            AO, DP, alt = get(*fields)(rec)
+            #length = len(alt)
+            #return repeat(AO, length), repeat(DP, length), alt
+            return repeat(AO), repeat(DP), alt
+
+        def call_multiple(*recs):
+            for (ao, dp, alt) in zip(*recs):
+                groups = groupby(recs, get('alt'))
+                #map() 
+        pass #prep and map using call_base
+    else:
+        return call_base(min_depth, majoirty_percentage, rec)
 @contract(min_depth=int, majoirty_percentage=int, rec=dict, returns='tuple(str, str, int)')
 def call_base(min_depth, majoirty_percentage, rec):
     '''if a base is under the min depth, it gets called as an `N`.

From 549a244a15730c4c188852c4727a30b977bd7904 Mon Sep 17 00:00:00 2001
From: averagehat <michael.panciera.work@gmail.com>
Date: Tue, 23 Feb 2016 17:58:38 -0500
Subject: [PATCH 12/41] consensus now handles multiple alts

---
 bioframework/consensus.py | 113 +++++++++++++++++++++++++++++---------
 1 file changed, 87 insertions(+), 26 deletions(-)
 mode change 100644 => 100755 bioframework/consensus.py

diff --git a/bioframework/consensus.py b/bioframework/consensus.py
old mode 100644
new mode 100755
index 2c365a9..15003c7
--- a/bioframework/consensus.py
+++ b/bioframework/consensus.py
@@ -2,8 +2,8 @@
 from operator import itemgetter as get
 from toolz import compose, curry
 from functools import partial
-from toolz.dicttoolz import merge
-from itertools import ifilter, imap, groupby, takewhile
+from toolz.dicttoolz import merge, dissoc
+from itertools import ifilter, imap, groupby, takewhile, repeat, starmap, izip_longest
 from contracts import contract
 from Bio import SeqIO, SeqRecord
 import vcf
@@ -35,7 +35,7 @@ def compose_transormer(transformer, combinator, *funcs):
 ###########
 # Reducer #
 ###########
-@contract(reference=str, muts='seq(tuple(str, str, int))'   )
+#@contract(reference=str, muts='seq(tuple(str, str, int))'   )
 def make_consensus(reference, muts):
     ''' Actually builds a consensus string by recursively applying
           the mutations.'''
@@ -52,26 +52,87 @@ def _do_build((accString, string, lastPos), (x, y, bigPos)):
 #when there are multiple alts, zip through with each of them
 #zip(*alts), character by character. compare the percentages, and
 #sum the percentages for each base. (groupby, sum) pick each character (call each base) based on the given rules (using call_base).
-def call_many(min_depth, majoirty_percentage, rec):
-    if hasattr(rec['alt'], '__iter__'):
-        fields = ['AO', 'DP', 'alt']
-        vals = zip(*get(*fields)(rec))
-        muts = map(lambda items: dict(zip(fields, items)), vals)
-        def extract(rec):
-            AO, DP, alt = get(*fields)(rec)
-            #length = len(alt)
-            #return repeat(AO, length), repeat(DP, length), alt
-            return repeat(AO), repeat(DP), alt
-
-        def call_multiple(*recs):
-            for (ao, dp, alt) in zip(*recs):
-                groups = groupby(recs, get('alt'))
-                #map() 
-        pass #prep and map using call_base
+from toolz.dicttoolz import merge_with
+#@contract(dp=int,ref=str,alts='dict(string,int)')
+from toolz.dicttoolz import valfilter
+def find_val(pred, d):
+    res = valfilter(pred, d)
+    return res if res else None
+def find(pred, xs):
+    res = ifilter(pred, xs)
+    try:
+        return next(res)
+    except:
+        return None
+#TODO: Failing Case:
+# a = {'ref': u'AT', 'pos': 2, 'AO': (51771, 41537, 42398, 9342), 'alt': [u'A',
+# u'TT', u'AATTG', u'AAGAA'], 'chrom': u'o', 'DP': 87288}
+#  bioframework.consensus.call_many(10, 80, a)
+
+
+def call_base_multi_alts(min_depth, majority_percentage, dp, alts, ref):
+    #TODO: majority_percentage gets ignored, so replace constants
+    #TODO: behavior is undefined if sum(AO) > dp.
+    import ipdb; ipdb.set_trace()
+    if dp < min_depth: #could call REF here sometimes
+        return 'N'
+    total_ao = lambda: sum(alts.values()) # avoid evaluating unless necessary
+
+    if ref is None: # this is an insert
+        if total_ao()/float(dp) < .50:
+            return ref
+        # if the insert is above threshold, keep going and call the insert like a normal base
+    if '-' in alts:
+        if alts['-']/float(dp) > .50: # a deletion
+            return ''
+        dp -= alts['-']
+        alts_without_insert = dissoc(alts, '-')
     else:
-        return call_base(min_depth, majoirty_percentage, rec)
-@contract(min_depth=int, majoirty_percentage=int, rec=dict, returns='tuple(str, str, int)')
+        alts_without_insert = alts
+    over_depth = lambda x: lambda depth: depth/float(dp) > x
+    picked_alt = valfilter(over_depth(0.8), alts_without_insert)
+    if picked_alt:
+        return picked_alt.keys()[0]
+    #add ref so that it will be considered in creating ambiguous base
+    alts_with_ref = merge(alts_without_insert, ({ref : (dp - total_ao()) } if ref else {}))
+    over20 = valfilter(over_depth(0.2), alts_with_ref)
+    as_ambiguous = ''.join(sorted(over20.keys()))
+    # this could return a single base, (including the reference), becuase i.e.  A => A in the ambiguity table
+    return AMBIGUITY_TABLE[as_ambiguous]
+
+
+def call_many(min_depth, majority_percentage, _rec):
+    #TODO: switch to generators
+    if not hasattr(_rec['alt'], '__iter__'): #TODO: put this somewhere else
+        rec = merge(_rec, dict(alt=[_rec['alt']], AO=[_rec['AO']]))
+    else: rec = _rec
+    muts = zip(rec['AO'], rec['alt'])
+    ref, dp, pos = rec['ref'], rec['DP'], rec['pos']
+    longest_len = max(map(lambda x: len(x[-1]), muts))
+    longest_len = max(longest_len, len(ref))
+    def fill_gap(r):
+        ao, s = r
+        return (ao, s + (longest_len - len(s)) * '-')
+    xs = map(fill_gap, muts) # fill in the shorter alts with '-'.
+    def merge_sum(x,y):
+        return x if y is None else (y if x is None else merge_with(sum, x, y))
+    def seq_count(acc, ao_and_nts):
+        ao, nts = ao_and_nts
+        return map(merge_sum, acc, [{nt:ao} for nt in nts])
+    # create a list of {base : count}, where the index matches the position
+    mut_dicts = reduce(seq_count, xs, [{}])
+    base_caller = partial(call_base_multi_alts, min_depth, majority_percentage, dp)
+    res = map(base_caller, mut_dicts, ref)
+    # trim None values at the end, (which indicate deletion)
+    result = takewhile(bool, res)
+    return (ref, ''.join(result), pos)
+    #    return call_base(min_depth, majority_percentage, rec)
+
+
+#@contract(min_depth=int, majoirty_percentage=int, rec=dict, returns='tuple(str, str, int)')
+#DEPRECATED
 def call_base(min_depth, majoirty_percentage, rec):
+#DEPRECATED
     '''if a base is under the min depth, it gets called as an `N`.
     If the alternate base is >= `majoirty_percentage`, it will be
     converted to an ambiguous base if it includes multiple bases.
@@ -79,7 +140,7 @@ def call_base(min_depth, majoirty_percentage, rec):
     Returns a tuple of the form:
         (referene_base, alternate_base, position)
         where reference_base == alternate_base if the reference was preferred.'''
-    get_ambiguous = compose(AMBIGUITY_TABLE.__getitem__, ''.join, sorted)
+    get_ambiguous = compose(lambda x: AMBIGUITY_TABLE.get(x, x), ''.join, sorted)
     if rec['DP'] < min_depth: alt = 'N'# (rec['ref'], 'N', rec['POS'])
     elif alt_over_percent(majoirty_percentage)(rec):
         alt = get_ambiguous(rec['alt'])
@@ -96,7 +157,7 @@ def flatten_vcf_record(rec):
 #  Filters   #
 ##############
 ref_and_alt_differ = lambda x: x[0] != x[1]
-alt_over_percent =  lambda n: lambda rec: (n/float(100)) > rec['AO']/float(rec['DP'])
+alt_over_percent =  lambda n: lambda rec: rec['AO']/float(rec['DP']) > (n/float(100))
 
 
 ##############
@@ -115,12 +176,12 @@ def index_of_ref(key):
 ###############
 # Runner      #
 ###############
-@contract(references='SeqRecord', muts='seq(dict)', mind=int, majority=int)
+#@contract(references='SeqRecord', muts='seq(dict)', mind=int, majority=int)
 def all_consensuses(references, muts, mind, majority):
     ''' generates conesnsuses, including for flu and other mult-reference VCFs.
     applies filters and base callers to the mutations.
     then builds the consensus using these calls and `make_consensus`'''
-    mappers = [partial(call_base, mind, majority)]
+    mappers = [partial(call_many, mind, majority)]
     filters = [ref_and_alt_differ]
     muts_by_ref = group_muts_by_refs(references, muts)
     muts = imap(get(0), muts_by_ref)
@@ -142,7 +203,7 @@ def consensus_str(ref, consensus):
 make_fasta_str = compose('\n'.join,
                          partial(imap, consensus_str), all_consensuses)
 
-@contract(ref_fasta=str, vcf=str, mind=int, majority=int)
+#@contract(ref_fasta=str, vcf=str, mind=int, majority=int)
 def run(ref_fasta, freebayes_vcf, outpath, mind, majority):
     refs = SeqIO.parse(ref_fasta, 'fasta')
     with open(freebayes_vcf, 'r') as vcf_handle, open(outpath, 'w') as out:

From aefe452d01ebe8e7892c3a51fa638b03ca3b653f Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedrhcwrair001.amed.ds.army.mil>
Date: Wed, 24 Feb 2016 13:02:00 -0500
Subject: [PATCH 13/41] various fixes

---
 bioframework/consensus.py | 84 ++++++++++++++++++++++++---------------
 1 file changed, 51 insertions(+), 33 deletions(-)

diff --git a/bioframework/consensus.py b/bioframework/consensus.py
index 15003c7..184668d 100755
--- a/bioframework/consensus.py
+++ b/bioframework/consensus.py
@@ -1,10 +1,26 @@
+"""
+Usage:
+     consensus --ref <ref> --vcf <vcf> [--mind <mind>] [--majority <majority>] [-o <output>]
+
+Options:
+    --ref=<ref>             Reference fasta file
+    --vcf=<vcf>             VCF output
+    --majority=<majority>   Percentage required [default: 80]
+    --mind=<mind>           minimum depth to call base non-N [default: 10]
+    --output,-o=<output>       output file [default: ]
+"""
+
+
 from toolz.itertoolz import first, rest, peek
 from operator import itemgetter as get
 from toolz import compose, curry
 from functools import partial
-from toolz.dicttoolz import merge, dissoc
+from toolz.dicttoolz import merge, dissoc, merge_with, valfilter
 from itertools import ifilter, imap, groupby, takewhile, repeat, starmap, izip_longest
-from contracts import contract
+#from contracts import contract
+import os, sys
+from docopt import docopt
+from schema import Schema, Use
 from Bio import SeqIO, SeqRecord
 import vcf
 
@@ -42,38 +58,27 @@ def make_consensus(reference, muts):
     def _do_build((accString, string, lastPos), (x, y, bigPos)):
         pos = bigPos - lastPos
         return (accString + (string[:pos] + y), string[pos+len(x):],  bigPos+len(x))
-    return reduce(_do_build, muts, ('', reference, 0))
+    return reduce(_do_build, muts, ('', reference, 0))[0]
 
 
 ##############
 #  Mappers   #
 ##############
 
-#when there are multiple alts, zip through with each of them
-#zip(*alts), character by character. compare the percentages, and
-#sum the percentages for each base. (groupby, sum) pick each character (call each base) based on the given rules (using call_base).
-from toolz.dicttoolz import merge_with
-#@contract(dp=int,ref=str,alts='dict(string,int)')
-from toolz.dicttoolz import valfilter
-def find_val(pred, d):
-    res = valfilter(pred, d)
-    return res if res else None
-def find(pred, xs):
-    res = ifilter(pred, xs)
-    try:
-        return next(res)
-    except:
-        return None
 #TODO: Failing Case:
 # a = {'ref': u'AT', 'pos': 2, 'AO': (51771, 41537, 42398, 9342), 'alt': [u'A',
 # u'TT', u'AATTG', u'AAGAA'], 'chrom': u'o', 'DP': 87288}
 #  bioframework.consensus.call_many(10, 80, a)
 
-
+#@contract(dp=int,ref=str,alts='dict(string,int)')
 def call_base_multi_alts(min_depth, majority_percentage, dp, alts, ref):
+    """when there are multiple alts, zip through with each of them
+    zip(*alts), character by character. compare the percentages, and
+    sum the percentages for each base. (groupby, sum) pick each character
+    (call each base) based on the given rules (using call_base)."""
     #TODO: majority_percentage gets ignored, so replace constants
     #TODO: behavior is undefined if sum(AO) > dp.
-    import ipdb; ipdb.set_trace()
+    #import ipdb; ipdb.set_trace()
     if dp < min_depth: #could call REF here sometimes
         return 'N'
     total_ao = lambda: sum(alts.values()) # avoid evaluating unless necessary
@@ -112,7 +117,7 @@ def call_many(min_depth, majority_percentage, _rec):
     longest_len = max(longest_len, len(ref))
     def fill_gap(r):
         ao, s = r
-        return (ao, s + (longest_len - len(s)) * '-')
+        return (ao, str(s) + (longest_len - len(s)) * '-')
     xs = map(fill_gap, muts) # fill in the shorter alts with '-'.
     def merge_sum(x,y):
         return x if y is None else (y if x is None else merge_with(sum, x, y))
@@ -126,7 +131,6 @@ def seq_count(acc, ao_and_nts):
     # trim None values at the end, (which indicate deletion)
     result = takewhile(bool, res)
     return (ref, ''.join(result), pos)
-    #    return call_base(min_depth, majority_percentage, rec)
 
 
 #@contract(min_depth=int, majoirty_percentage=int, rec=dict, returns='tuple(str, str, int)')
@@ -165,7 +169,7 @@ def flatten_vcf_record(rec):
 ##############
 def group_muts_by_refs(references, muts):
     '''group and sort the mutations so that they match the order of the references.'''
-    mut_groups = groupby(muts, get('CHROM'))
+    mut_groups = groupby(muts, get('chrom'))
     def index_of_ref(key):
         return sum(1 for _ in takewhile(lambda x: x.id != key, references))
     muts_by_ref = sorted(mut_groups, key=index_of_ref)
@@ -184,31 +188,45 @@ def all_consensuses(references, muts, mind, majority):
     mappers = [partial(call_many, mind, majority)]
     filters = [ref_and_alt_differ]
     muts_by_ref = group_muts_by_refs(references, muts)
-    muts = imap(get(0), muts_by_ref)
+    muts = map(compose(list, get(1)), muts_by_ref)
     def single_ref_consensus(recs, ref):
-        fix_recs = compose(compose_filters(filters), compose_mappers(mappers))
+        fix_recs = compose(compose_filters(*filters), compose_mappers(*mappers))
         ref_str = str(ref.seq)
         return make_consensus(ref_str, fix_recs(recs))
     return references, imap(single_ref_consensus, muts, references)
 
 
-
 ##########
 # I/O    #
 ##########
-
 def consensus_str(ref, consensus):
     return ">{0}:Consensus\n{1}".format(ref.id, consensus)
 
-make_fasta_str = compose('\n'.join,
-                         partial(imap, consensus_str), all_consensuses)
 
 #@contract(ref_fasta=str, vcf=str, mind=int, majority=int)
-def run(ref_fasta, freebayes_vcf, outpath, mind, majority):
+def run(ref_fasta, freebayes_vcf, outfile, mind, majority):
     refs = SeqIO.parse(ref_fasta, 'fasta')
-    with open(freebayes_vcf, 'r') as vcf_handle, open(outpath, 'w') as out:
+    with open(freebayes_vcf, 'r') as vcf_handle:
         muts = imap(flatten_vcf_record, vcf.Reader(vcf_handle))
-        result = make_fasta_str(refs, muts, mind, majority)
-        out.write(result)
+        refs, muts = list(refs), list(muts)
+        refs, seqs = all_consensuses(refs, muts, mind, majority)
+        strings = imap(consensus_str, refs, seqs)
+        result = '\n'.join(strings)
+        outfile.write(result)
+        outfile.close()
     return 0
 
+def main():
+    scheme = Schema(
+        { '--vcf' : os.path.isfile,
+          '--ref' : os.path.isfile,
+          '--majority' : Use(int),
+          '--mind' : Use(int),
+          '--output' : Use(lambda x: sys.stdout if not x else open(x, 'w'))})
+    raw_args = docopt(__doc__, version='Version 1.0')
+    args = scheme.validate(raw_args)
+    run(args['--ref'], args['--vcf'], args['--output'],
+        args['--mind'], args['--output'])
+
+if __name__ == '__main__':
+    main()

From e36a4c1720593ec30452ed578770561d1baad030 Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedrhcwrair001.amed.ds.army.mil>
Date: Wed, 24 Feb 2016 13:22:58 -0500
Subject: [PATCH 14/41] refactoring to simplify and remove deprecated funcs

---
 bioframework/consensus.py | 56 ++++++---------------------------------
 1 file changed, 8 insertions(+), 48 deletions(-)

diff --git a/bioframework/consensus.py b/bioframework/consensus.py
index 184668d..cd2c060 100755
--- a/bioframework/consensus.py
+++ b/bioframework/consensus.py
@@ -9,8 +9,6 @@
     --mind=<mind>           minimum depth to call base non-N [default: 10]
     --output,-o=<output>       output file [default: ]
 """
-
-
 from toolz.itertoolz import first, rest, peek
 from operator import itemgetter as get
 from toolz import compose, curry
@@ -36,18 +34,6 @@
 MAJORITY_PERCENTAGE = 80
 MIN_DEPTH = 10
 
-########################
-# Function combintaors #
-########################
-
-''' just a way to compose filters and mapping functions together '''
-def compose_transormer(transformer, combinator, *funcs):
-    return partial(transformer, reduce(combinator, funcs))
-_and = lambda f,g: lambda x: f(x) and g(x)
-compose_filters = partial(compose_transormer, ifilter, _and)
-compose_mappers = partial(compose_transormer, imap, compose)
-
-
 ###########
 # Reducer #
 ###########
@@ -132,38 +118,12 @@ def seq_count(acc, ao_and_nts):
     result = takewhile(bool, res)
     return (ref, ''.join(result), pos)
 
-
-#@contract(min_depth=int, majoirty_percentage=int, rec=dict, returns='tuple(str, str, int)')
-#DEPRECATED
-def call_base(min_depth, majoirty_percentage, rec):
-#DEPRECATED
-    '''if a base is under the min depth, it gets called as an `N`.
-    If the alternate base is >= `majoirty_percentage`, it will be
-    converted to an ambiguous base if it includes multiple bases.
-    Otherwise, the reference base replaces itself (gets ignored).
-    Returns a tuple of the form:
-        (referene_base, alternate_base, position)
-        where reference_base == alternate_base if the reference was preferred.'''
-    get_ambiguous = compose(lambda x: AMBIGUITY_TABLE.get(x, x), ''.join, sorted)
-    if rec['DP'] < min_depth: alt = 'N'# (rec['ref'], 'N', rec['POS'])
-    elif alt_over_percent(majoirty_percentage)(rec):
-        alt = get_ambiguous(rec['alt'])
-    else: alt = rec['ref']
-    return (rec['ref'], alt, rec['pos'])
-
 def flatten_vcf_record(rec):
     return merge({
   'alt' : rec.ALT, 'ref' : rec.REF,
   'pos' : rec.POS, 'chrom' : rec.CHROM},
         rec.INFO)
 
-##############
-#  Filters   #
-##############
-ref_and_alt_differ = lambda x: x[0] != x[1]
-alt_over_percent =  lambda n: lambda rec: rec['AO']/float(rec['DP']) > (n/float(100))
-
-
 ##############
 # Group By   #
 ##############
@@ -180,20 +140,20 @@ def index_of_ref(key):
 ###############
 # Runner      #
 ###############
+
 #@contract(references='SeqRecord', muts='seq(dict)', mind=int, majority=int)
 def all_consensuses(references, muts, mind, majority):
     ''' generates conesnsuses, including for flu and other mult-reference VCFs.
     applies filters and base callers to the mutations.
     then builds the consensus using these calls and `make_consensus`'''
-    mappers = [partial(call_many, mind, majority)]
-    filters = [ref_and_alt_differ]
     muts_by_ref = group_muts_by_refs(references, muts)
-    muts = map(compose(list, get(1)), muts_by_ref)
-    def single_ref_consensus(recs, ref):
-        fix_recs = compose(compose_filters(*filters), compose_mappers(*mappers))
-        ref_str = str(ref.seq)
-        return make_consensus(ref_str, fix_recs(recs))
-    return references, imap(single_ref_consensus, muts, references)
+    mut_groups = map(compose(list, get(1)), muts_by_ref)
+    def single_consensus(muts, ref):
+        the_muts = map(partial(call_many, mind, majority), muts)
+        ref_and_alt_differ = lambda x: x[0] != x[1]
+        real_muts = filter(ref_and_alt_differ, the_muts)
+        return make_consensus(str(ref.seq), real_muts)
+    return references, imap(single_consensus, mut_groups, references)
 
 
 ##########

From 46f2df9cb23951e771a37a3b27b6adcb6aad0cc8 Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedrhcwrair001.amed.ds.army.mil>
Date: Fri, 26 Feb 2016 22:20:18 -0500
Subject: [PATCH 15/41] added tests,refactored vcf dict flattening

---
 bioframework/consensus.py | 10 +++----
 tests/test_consensus.py   | 62 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+), 5 deletions(-)
 create mode 100644 tests/test_consensus.py

diff --git a/bioframework/consensus.py b/bioframework/consensus.py
index cd2c060..ce433e0 100755
--- a/bioframework/consensus.py
+++ b/bioframework/consensus.py
@@ -92,11 +92,8 @@ def call_base_multi_alts(min_depth, majority_percentage, dp, alts, ref):
     return AMBIGUITY_TABLE[as_ambiguous]
 
 
-def call_many(min_depth, majority_percentage, _rec):
+def call_many(min_depth, majority_percentage, rec):
     #TODO: switch to generators
-    if not hasattr(_rec['alt'], '__iter__'): #TODO: put this somewhere else
-        rec = merge(_rec, dict(alt=[_rec['alt']], AO=[_rec['AO']]))
-    else: rec = _rec
     muts = zip(rec['AO'], rec['alt'])
     ref, dp, pos = rec['ref'], rec['DP'], rec['pos']
     longest_len = max(map(lambda x: len(x[-1]), muts))
@@ -119,10 +116,13 @@ def seq_count(acc, ao_and_nts):
     return (ref, ''.join(result), pos)
 
 def flatten_vcf_record(rec):
-    return merge({
+    _rec = merge({
   'alt' : rec.ALT, 'ref' : rec.REF,
   'pos' : rec.POS, 'chrom' : rec.CHROM},
         rec.INFO)
+    if not hasattr(_rec['alt'], '__iter__'): #TODO: put this somewhere else
+        return merge(_rec, dict(alt=[_rec['alt']], AO=[_rec['AO']]))
+    else: return _rec
 
 ##############
 # Group By   #
diff --git a/tests/test_consensus.py b/tests/test_consensus.py
new file mode 100644
index 0000000..5f78e67
--- /dev/null
+++ b/tests/test_consensus.py
@@ -0,0 +1,62 @@
+from biottest.biohypothesis import ref_with_vcf_dicts_strategy_factory
+from hypothesis import given
+from hypothesis import strategies as st
+from hypothesis import given, assume
+from bioframework.consensus import call_many, all_consensuses
+import unittest
+
+simple_vcf_dict_strategy = st.tuples(st.text(string.ascii_letters),
+                     st.integers(min_value=1),
+                     st.text(string.ascii_letters)) \
+                     .flatmap(lambda tup:
+                          vcf_dict_strategy_factory(*tup))
+pos_int = st.integers(min_value=0)
+def just_ref(*args):
+    return next(all_consensuses(*args)[1])
+class CallBaseHypothesisTest(unittest.TestCase):
+    @given(simple_vcf_dict_strategy, pos_int)
+    def test_under_mind_is_N(self, mut, mind):
+        assume(mut['DP'] < mind)
+        result = call_many(mind, 80, mut)[1]
+        self.assertTrue(all(map(lambda x: x == 'N', result)))
+
+    @given(simple_vcf_dict_strategy)
+    def test_ao_under_minority_is_ref(self, mut):
+        assume(sum(mut['AO']) / mut['DP'] < 0.2)
+        result = call_many(0, 80, mut)[1]
+        self.assertEquals(result == mut['ref'])
+
+    @given(simple_vcf_dict_strategy)
+    def test_over_minoriy_is_alt(self, mut):
+        assume(sum(mut['AO']) / mut['DP'] > 0.2)
+        assume(len(mut['alt']) == 1)
+        result = call_many(0, 80, mut)[1]
+        self.assertEquals(result == mut['alt'])
+
+class ConsesusExampleTest(unittest.TestCase):
+    def test_single_example(self):
+        muts = [{
+            'pos' : 2,
+            'ref' : 'CG',
+            'alt' : 'TT',
+            'AO'  : 99,
+            'DP'  : 100,
+            'chrom' : 'X'
+        },
+        {
+            'pos' : 6,
+            'ref' : 'C',
+            'alt' : ['G', 'A'],
+            'AO' : [15, 120],
+            'DP' : 150,
+            'chrom' : 'X'
+        }]
+    ref = 'ACGTACGT'
+    expected = 'ATTTAAGT'
+    result = just_ref([ref], muts, 10, 80)
+    self.assertEquals(expected, result)
+
+class ConsensusHypothesisTest(unittest.TestCase):
+    from collections import Counter
+
+

From 1894e0a92704f3992f68bb2371f745a3080c62c6 Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedwrair024635.amed.ds.army.mil>
Date: Wed, 2 Mar 2016 12:47:55 -0500
Subject: [PATCH 16/41] add missing dependencies

---
 setup.py                   | 1 +
 tests/requirements-dev.txt | 1 +
 2 files changed, 2 insertions(+)
 mode change 100644 => 100755 setup.py
 mode change 100644 => 100755 tests/requirements-dev.txt

diff --git a/setup.py b/setup.py
old mode 100644
new mode 100755
index 292f24b..0c97ac2
--- a/setup.py
+++ b/setup.py
@@ -23,6 +23,7 @@ def jip_modules(path=bioframework.JIP_PATH):
     install_requires = [
         'pyjip',
         'toolz',
+        'docopt',
     ],
     data_files=[
         (join(sys.prefix,'bin'), jip_modules()),
diff --git a/tests/requirements-dev.txt b/tests/requirements-dev.txt
old mode 100644
new mode 100755
index d96de86..f0fa163
--- a/tests/requirements-dev.txt
+++ b/tests/requirements-dev.txt
@@ -4,3 +4,4 @@ python-coveralls
 testfixtures
 sh
 hypothesis
+https://github.com/VDBWRAIR/biotest

From 4633cc6f832fca1ab7449a2973341b4ea8ac129c Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedwrair024635.amed.ds.army.mil>
Date: Wed, 2 Mar 2016 12:48:55 -0500
Subject: [PATCH 17/41] fixed ordering bug

---
 bioframework/consensus.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/bioframework/consensus.py b/bioframework/consensus.py
index ce433e0..7dc0387 100755
--- a/bioframework/consensus.py
+++ b/bioframework/consensus.py
@@ -131,7 +131,10 @@ def group_muts_by_refs(references, muts):
     '''group and sort the mutations so that they match the order of the references.'''
     mut_groups = groupby(muts, get('chrom'))
     def index_of_ref(key):
-        return sum(1 for _ in takewhile(lambda x: x.id != key, references))
+        chrom=key[0]
+        assert(type(chrom) == str)
+        index_of_chrom =  map(lambda x: x.id, references).index(chrom)
+        return index_of_chrom
     muts_by_ref = sorted(mut_groups, key=index_of_ref)
     return muts_by_ref
 

From 05b04e875c2b90ff2d7ab0498cba8e08ab86cb87 Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedwrair024635.amed.ds.army.mil>
Date: Wed, 2 Mar 2016 13:20:52 -0500
Subject: [PATCH 18/41] fixed groupby bug

---
 bioframework/consensus.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/bioframework/consensus.py b/bioframework/consensus.py
index 7dc0387..8f39e07 100755
--- a/bioframework/consensus.py
+++ b/bioframework/consensus.py
@@ -129,13 +129,16 @@ def flatten_vcf_record(rec):
 ##############
 def group_muts_by_refs(references, muts):
     '''group and sort the mutations so that they match the order of the references.'''
-    mut_groups = groupby(muts, get('chrom'))
+    #NOTE: muts will already be "sorted" in that they are grouped together in the vcf
+    #fix the groupby so it doesn't incidentally drain the first object of the group
+    unzip = lambda x: zip(*x)
+    chroms, groups = unzip(map(lambda kv: (kv[0], list(kv[1])), groupby(muts, get('chrom'))))
     def index_of_ref(key):
         chrom=key[0]
         assert(type(chrom) == str)
         index_of_chrom =  map(lambda x: x.id, references).index(chrom)
         return index_of_chrom
-    muts_by_ref = sorted(mut_groups, key=index_of_ref)
+    _, muts_by_ref = unzip(sorted(zip(chroms, groups), key=index_of_ref))
     return muts_by_ref
 
 

From 8eb0c38cc6242aec8775bb66e794c1a28b7e72ea Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedwrair024635.amed.ds.army.mil>
Date: Wed, 2 Mar 2016 16:32:18 -0500
Subject: [PATCH 19/41] fixed make_consensus

---
 bioframework/consensus.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/bioframework/consensus.py b/bioframework/consensus.py
index 8f39e07..d9cc462 100755
--- a/bioframework/consensus.py
+++ b/bioframework/consensus.py
@@ -44,7 +44,8 @@ def make_consensus(reference, muts):
     def _do_build((accString, string, lastPos), (x, y, bigPos)):
         pos = bigPos - lastPos
         return (accString + (string[:pos] + y), string[pos+len(x):],  bigPos+len(x))
-    return reduce(_do_build, muts, ('', reference, 0))[0]
+    result, remaining, _ = reduce(_do_build, muts, ('', reference, 0))
+    return result + remaining
 
 
 ##############
@@ -89,7 +90,7 @@ def call_base_multi_alts(min_depth, majority_percentage, dp, alts, ref):
     over20 = valfilter(over_depth(0.2), alts_with_ref)
     as_ambiguous = ''.join(sorted(over20.keys()))
     # this could return a single base, (including the reference), becuase i.e.  A => A in the ambiguity table
-    return AMBIGUITY_TABLE[as_ambiguous]
+    return AMBIGUITY_TABLE[as_ambiguous] if as_ambiguous != '' else ''
 
 
 def call_many(min_depth, majority_percentage, rec):
@@ -153,13 +154,15 @@ def all_consensuses(references, muts, mind, majority):
     applies filters and base callers to the mutations.
     then builds the consensus using these calls and `make_consensus`'''
     muts_by_ref = group_muts_by_refs(references, muts)
-    mut_groups = map(compose(list, get(1)), muts_by_ref)
+    #mut_groups = map(compose(list, get(1)), muts_by_ref)
+    #real_muts = filter(ref_and_alt_differ, the_muts)
     def single_consensus(muts, ref):
         the_muts = map(partial(call_many, mind, majority), muts)
         ref_and_alt_differ = lambda x: x[0] != x[1]
-        real_muts = filter(ref_and_alt_differ, the_muts)
+        # vcf is index-starting-at-1
+        real_muts = map(lambda (a,b,pos): (a,b,pos-1), filter(ref_and_alt_differ, the_muts))
         return make_consensus(str(ref.seq), real_muts)
-    return references, imap(single_consensus, mut_groups, references)
+    return references, imap(single_consensus, muts_by_ref, references)
 
 
 ##########

From 8e3062c2917b14f031d55559729878a4d5932f32 Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedwrair024635.amed.ds.army.mil>
Date: Wed, 2 Mar 2016 16:32:56 -0500
Subject: [PATCH 20/41] more tests

---
 tests/test_consensus.py | 71 ++++++++++++++++++++++++++++++-----------
 1 file changed, 53 insertions(+), 18 deletions(-)

diff --git a/tests/test_consensus.py b/tests/test_consensus.py
index 5f78e67..30910ce 100644
--- a/tests/test_consensus.py
+++ b/tests/test_consensus.py
@@ -1,14 +1,18 @@
-from biottest.biohypothesis import ref_with_vcf_dicts_strategy_factory
+from __future__ import division
+from biotest.biohypothesis import ref_with_vcf_dicts_strategy_factory, \
+    make_seqrec, vcf_dict_strategy_factory
 from hypothesis import given
+from fn import _
 from hypothesis import strategies as st
 from hypothesis import given, assume
-from bioframework.consensus import call_many, all_consensuses
+from bioframework.consensus import call_many, all_consensuses, make_consensus
+import string
 import unittest
 
 simple_vcf_dict_strategy = st.tuples(st.text(string.ascii_letters),
                      st.integers(min_value=1),
-                     st.text(string.ascii_letters)) \
-                     .flatmap(lambda tup:
+                     st.text(alphabet='ACTGN', min_size=1, max_size=6)) \
+                     .flatmap(lambda tup:\
                           vcf_dict_strategy_factory(*tup))
 pos_int = st.integers(min_value=0)
 def just_ref(*args):
@@ -24,22 +28,41 @@ def test_under_mind_is_N(self, mut, mind):
     def test_ao_under_minority_is_ref(self, mut):
         assume(sum(mut['AO']) / mut['DP'] < 0.2)
         result = call_many(0, 80, mut)[1]
-        self.assertEquals(result == mut['ref'])
+        self.assertEquals(result, mut['ref'])
 
     @given(simple_vcf_dict_strategy)
-    def test_over_minoriy_is_alt(self, mut):
-        assume(sum(mut['AO']) / mut['DP'] > 0.2)
+    def test_over_majority_is_alt(self, mut):
+        #TODO: this is slow
+        assume(sum(mut['AO']) / mut['DP'] > 0.8)
         assume(len(mut['alt']) == 1)
         result = call_many(0, 80, mut)[1]
-        self.assertEquals(result == mut['alt'])
+        self.assertEquals(result, mut['alt'][0])
+
+#Commented out because it's not actually always true,
+# e.g. mut={'ref': u'AA', 'pos': 1, 'AO': [784313725491], 'alt': [u'A'],
+# 'chrom': u'', 'DP': 3921568627454})
+# should result in AA
+#    @given(simple_vcf_dict_strategy)
+#    def test_over_minoriy_is_not_ref(self, mut):
+#        assume(sum(mut['AO']) / mut['DP'] > 0.2)
+#        result = call_many(0, 80, mut)[1]
+#        self.assertNotEquals(result, mut['ref'])
 
 class ConsesusExampleTest(unittest.TestCase):
+    def test_make_consensus_example(self):
+        muts = [('CG', 'TT', 1),
+                ('C', 'A', 5)]
+        ref = 'ACGTACGT'
+        expected = 'ATTTAAGT'
+        actual = make_consensus(ref, muts)
+        self.assertEquals(expected, actual)
+
     def test_single_example(self):
         muts = [{
             'pos' : 2,
             'ref' : 'CG',
-            'alt' : 'TT',
-            'AO'  : 99,
+            'alt' : ['TT'],
+            'AO'  : [99],
             'DP'  : 100,
             'chrom' : 'X'
         },
@@ -51,12 +74,24 @@ def test_single_example(self):
             'DP' : 150,
             'chrom' : 'X'
         }]
-    ref = 'ACGTACGT'
-    expected = 'ATTTAAGT'
-    result = just_ref([ref], muts, 10, 80)
-    self.assertEquals(expected, result)
-
-class ConsensusHypothesisTest(unittest.TestCase):
-    from collections import Counter
-
+        ref = make_seqrec('X', 'ACGTACGT')
+        expected = 'ATTTAAGT'
+        result = just_ref([ref], muts, 10, 80)
+        self.assertEquals(expected, result)
 
+from collections import Counter
+countof = lambda c: lambda x: Counter(x).get(c, 0)
+#class ConsensusHypothesisTest(unittest.TestCase):
+#    @st.random_module
+#    @given(ref_with_vcf_dicts_strategy_factory())
+#    def test_n_count(self, ref_and_muts):
+#        ref, muts = ref_and_muts
+#        originalNs = countof('N')(ref)
+#        assume(not filter(lambda x: 'N' in x, muts))
+#        expectedNs = len(filter(_['DP'] < 10, muts))  + originalNs
+#        result = just_ref([ref], muts, 10, 80)
+#        self.assertEquals(countof('N')(result), expectedNs)
+#
+#
+#
+#

From 33d7db7a7728d7fb594279ee21e73581345f60bc Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedwrair024635.amed.ds.army.mil>
Date: Wed, 2 Mar 2016 16:40:08 -0500
Subject: [PATCH 21/41] add failing test

---
 tests/test_consensus.py | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/tests/test_consensus.py b/tests/test_consensus.py
index 30910ce..3ba6d9c 100644
--- a/tests/test_consensus.py
+++ b/tests/test_consensus.py
@@ -81,17 +81,17 @@ def test_single_example(self):
 
 from collections import Counter
 countof = lambda c: lambda x: Counter(x).get(c, 0)
-#class ConsensusHypothesisTest(unittest.TestCase):
-#    @st.random_module
-#    @given(ref_with_vcf_dicts_strategy_factory())
-#    def test_n_count(self, ref_and_muts):
-#        ref, muts = ref_and_muts
-#        originalNs = countof('N')(ref)
-#        assume(not filter(lambda x: 'N' in x, muts))
-#        expectedNs = len(filter(_['DP'] < 10, muts))  + originalNs
-#        result = just_ref([ref], muts, 10, 80)
-#        self.assertEquals(countof('N')(result), expectedNs)
-#
-#
-#
-#
+class ConsensusHypothesisTest(unittest.TestCase):
+    #@st.random_module
+    @given(ref_with_vcf_dicts_strategy_factory(), st.random_module())
+    def test_n_count(self, ref_and_muts, rand):
+        ref, muts = ref_and_muts
+        originalNs = countof('N')(ref)
+        assume(not filter(lambda x: 'N' in x, muts))
+        expectedNs = len(filter(_['DP'] < 10, muts))  + originalNs
+        result = just_ref([ref], muts, 10, 80)
+        self.assertEquals(countof('N')(result), expectedNs)
+
+
+
+

From d3bfe633fb7c05bb2e831e3ca7f4ea078e4ca425 Mon Sep 17 00:00:00 2001
From: michaelpanciera <michael.panciera.work@gmail.com>
Date: Thu, 3 Mar 2016 09:12:05 -0500
Subject: [PATCH 22/41] fixed git+ protocol of biotest dep.

---
 tests/requirements-dev.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/requirements-dev.txt b/tests/requirements-dev.txt
index f0fa163..b139172 100755
--- a/tests/requirements-dev.txt
+++ b/tests/requirements-dev.txt
@@ -4,4 +4,4 @@ python-coveralls
 testfixtures
 sh
 hypothesis
-https://github.com/VDBWRAIR/biotest
+git+https://github.com/VDBWRAIR/biotest

From 2faecbfaeea69ef3d8b3a291bc7359e40c87193b Mon Sep 17 00:00:00 2001
From: michaelpanciera <michael.panciera.work@gmail.com>
Date: Thu, 3 Mar 2016 09:40:59 -0500
Subject: [PATCH 23/41] fixed contracts to actual pypy name

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index f0346dc..dbdcd86 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,3 @@
-contracts
+pycontracts
 toolz
 pyvcf

From 613e825729a4b598bc1789ee9db7f4202873ba91 Mon Sep 17 00:00:00 2001
From: michaelpanciera <michael.panciera.work@gmail.com>
Date: Thu, 3 Mar 2016 18:17:51 -0500
Subject: [PATCH 24/41] 10/10 working

---
 bioframework/consensus.py | 61 ++++++++++++++++----------
 tests/test_consensus.py   | 92 +++++++++++++++++++++++++++++++++++----
 2 files changed, 120 insertions(+), 33 deletions(-)

diff --git a/bioframework/consensus.py b/bioframework/consensus.py
index d9cc462..05a1970 100755
--- a/bioframework/consensus.py
+++ b/bioframework/consensus.py
@@ -9,19 +9,22 @@
     --mind=<mind>           minimum depth to call base non-N [default: 10]
     --output,-o=<output>       output file [default: ]
 """
-from toolz.itertoolz import first, rest, peek
+#stdlib
 from operator import itemgetter as get
-from toolz import compose, curry
 from functools import partial
-from toolz.dicttoolz import merge, dissoc, merge_with, valfilter
 from itertools import ifilter, imap, groupby, takewhile, repeat, starmap, izip_longest
-#from contracts import contract
 import os, sys
-from docopt import docopt
-from schema import Schema, Use
-from Bio import SeqIO, SeqRecord
-import vcf
-
+from typing import Tuple, Dict, List, Iterator, Iterable, Any, Callable
+
+from Bio import SeqIO #done
+from Bio.SeqRecord import SeqRecord #done
+import vcf #done
+from vcf.model import _Record
+#from toolz import compose
+from toolz.dicttoolz import merge, dissoc, merge_with, valfilter #todo
+from docopt import docopt #ignore
+from schema import Schema, Use #ignore
+from contracts import contract, new_contract #can ignore
 #############
 # Constants #
 #############
@@ -33,19 +36,20 @@
 
 MAJORITY_PERCENTAGE = 80
 MIN_DEPTH = 10
-
+Mut = Tuple[str, str, int]
 ###########
 # Reducer #
 ###########
-#@contract(reference=str, muts='seq(tuple(str, str, int))'   )
+@contract(reference='string', muts='list(tuple(string, string, int))'   )
 def make_consensus(reference, muts):
+    # type: (str, List[Mut]) -> Tuple[str, List[Mut]]
     ''' Actually builds a consensus string by recursively applying
           the mutations.'''
     def _do_build((accString, string, lastPos), (x, y, bigPos)):
         pos = bigPos - lastPos
         return (accString + (string[:pos] + y), string[pos+len(x):],  bigPos+len(x))
     result, remaining, _ = reduce(_do_build, muts, ('', reference, 0))
-    return result + remaining
+    return result + remaining, muts
 
 
 ##############
@@ -57,15 +61,15 @@ def _do_build((accString, string, lastPos), (x, y, bigPos)):
 # u'TT', u'AATTG', u'AAGAA'], 'chrom': u'o', 'DP': 87288}
 #  bioframework.consensus.call_many(10, 80, a)
 
-#@contract(dp=int,ref=str,alts='dict(string,int)')
+#@contract(min_depth='number,>=0', majority_percentage='number,>=0,<=100',dp='number,>=0', ref='string|None',alts='dict(string: number)')
 def call_base_multi_alts(min_depth, majority_percentage, dp, alts, ref):
+    # type: (int, int, int, Dict[str, int], str) -> str
     """when there are multiple alts, zip through with each of them
     zip(*alts), character by character. compare the percentages, and
     sum the percentages for each base. (groupby, sum) pick each character
     (call each base) based on the given rules (using call_base)."""
     #TODO: majority_percentage gets ignored, so replace constants
     #TODO: behavior is undefined if sum(AO) > dp.
-    #import ipdb; ipdb.set_trace()
     if dp < min_depth: #could call REF here sometimes
         return 'N'
     total_ao = lambda: sum(alts.values()) # avoid evaluating unless necessary
@@ -92,8 +96,9 @@ def call_base_multi_alts(min_depth, majority_percentage, dp, alts, ref):
     # this could return a single base, (including the reference), becuase i.e.  A => A in the ambiguity table
     return AMBIGUITY_TABLE[as_ambiguous] if as_ambiguous != '' else ''
 
-
+#@contract(min_depth='number,>=0', majority_percentage='number,>=0,<=100', rec='dict', returns='tuple(string, string, int)')
 def call_many(min_depth, majority_percentage, rec):
+    # type: (int, int, Dict) -> Mut
     #TODO: switch to generators
     muts = zip(rec['AO'], rec['alt'])
     ref, dp, pos = rec['ref'], rec['DP'], rec['pos']
@@ -110,13 +115,15 @@ def seq_count(acc, ao_and_nts):
         return map(merge_sum, acc, [{nt:ao} for nt in nts])
     # create a list of {base : count}, where the index matches the position
     mut_dicts = reduce(seq_count, xs, [{}])
-    base_caller = partial(call_base_multi_alts, min_depth, majority_percentage, dp)
+    base_caller = partial(call_base_multi_alts, min_depth, majority_percentage, dp) # type: Callable[[Dict[Any,Any], str], str]
     res = map(base_caller, mut_dicts, ref)
     # trim None values at the end, (which indicate deletion)
     result = takewhile(bool, res)
     return (ref, ''.join(result), pos)
 
+@contract(rec='dict',returns='dict')
 def flatten_vcf_record(rec):
+    # type: (_Record) -> Dict[str, Any]
     _rec = merge({
   'alt' : rec.ALT, 'ref' : rec.REF,
   'pos' : rec.POS, 'chrom' : rec.CHROM},
@@ -128,15 +135,20 @@ def flatten_vcf_record(rec):
 ##############
 # Group By   #
 ##############
+#NOTE: could possibly drop lists, use fn.Stream all the time,
+# and write a Stream instance for contracts like:
+# https://github.com/AndreaCensi/contracts/blob/831ec7a5260ceb8960540ba0cb6cc26370cf2d82/src/contracts/library/lists.py
+@contract(references='list[N]($SeqRecord),N>0', muts='list(dict)',returns='tuple(list(dict))')
 def group_muts_by_refs(references, muts):
+    # type: (List[SeqRecord], List[Dict[Any, Any]]) -> Iterable[List[Dict]]
     '''group and sort the mutations so that they match the order of the references.'''
     #NOTE: muts will already be "sorted" in that they are grouped together in the vcf
     #fix the groupby so it doesn't incidentally drain the first object of the group
     unzip = lambda x: zip(*x)
     chroms, groups = unzip(map(lambda kv: (kv[0], list(kv[1])), groupby(muts, get('chrom'))))
+    @contract(key='tuple(string,list)')
     def index_of_ref(key):
         chrom=key[0]
-        assert(type(chrom) == str)
         index_of_chrom =  map(lambda x: x.id, references).index(chrom)
         return index_of_chrom
     _, muts_by_ref = unzip(sorted(zip(chroms, groups), key=index_of_ref))
@@ -150,17 +162,17 @@ def index_of_ref(key):
 
 #@contract(references='SeqRecord', muts='seq(dict)', mind=int, majority=int)
 def all_consensuses(references, muts, mind, majority):
+    # type: (Iterable[SeqRecord], Iterable[Dict[Any,Any]], int, int) -> Tuple[List[str], Iterator[Tuple[str, List[Mut]]]]
     ''' generates conesnsuses, including for flu and other mult-reference VCFs.
     applies filters and base callers to the mutations.
     then builds the consensus using these calls and `make_consensus`'''
     muts_by_ref = group_muts_by_refs(references, muts)
-    #mut_groups = map(compose(list, get(1)), muts_by_ref)
-    #real_muts = filter(ref_and_alt_differ, the_muts)
     def single_consensus(muts, ref):
         the_muts = map(partial(call_many, mind, majority), muts)
         ref_and_alt_differ = lambda x: x[0] != x[1]
         # vcf is index-starting-at-1
-        real_muts = map(lambda (a,b,pos): (a,b,pos-1), filter(ref_and_alt_differ, the_muts))
+        #real_muts = map(lambda (a,b,pos): (a,b,pos-1), filter(ref_and_alt_differ, the_muts))
+        real_muts = map(lambda x: (x[0], x[1], x[2] - 1), filter(ref_and_alt_differ, the_muts))
         return make_consensus(str(ref.seq), real_muts)
     return references, imap(single_consensus, muts_by_ref, references)
 
@@ -168,24 +180,25 @@ def single_consensus(muts, ref):
 ##########
 # I/O    #
 ##########
-def consensus_str(ref, consensus):
+def consensus_str(ref, consensus): # type: (SeqRecord, str) -> str
     return ">{0}:Consensus\n{1}".format(ref.id, consensus)
 
 
 #@contract(ref_fasta=str, vcf=str, mind=int, majority=int)
 def run(ref_fasta, freebayes_vcf, outfile, mind, majority):
+    # type: (str, str, str, int, int) -> int
     refs = SeqIO.parse(ref_fasta, 'fasta')
     with open(freebayes_vcf, 'r') as vcf_handle:
         muts = imap(flatten_vcf_record, vcf.Reader(vcf_handle))
         refs, muts = list(refs), list(muts)
-        refs, seqs = all_consensuses(refs, muts, mind, majority)
-        strings = imap(consensus_str, refs, seqs)
+        refs, seqs_and_muts = all_consensuses(refs, muts, mind, majority)
+        strings = imap(consensus_str, refs, imap(get(0), seqs_and_muts))
         result = '\n'.join(strings)
         outfile.write(result)
         outfile.close()
     return 0
 
-def main():
+def main(): # type () -> None
     scheme = Schema(
         { '--vcf' : os.path.isfile,
           '--ref' : os.path.isfile,
diff --git a/tests/test_consensus.py b/tests/test_consensus.py
index 3ba6d9c..55f81f5 100644
--- a/tests/test_consensus.py
+++ b/tests/test_consensus.py
@@ -2,11 +2,13 @@
 from biotest.biohypothesis import ref_with_vcf_dicts_strategy_factory, \
     make_seqrec, vcf_dict_strategy_factory
 from hypothesis import given
-from fn import _
+#from fn import _
 from hypothesis import strategies as st
 from hypothesis import given, assume
+from operator import itemgetter as get
 from bioframework.consensus import call_many, all_consensuses, make_consensus
 import string
+import itertools
 import unittest
 
 simple_vcf_dict_strategy = st.tuples(st.text(string.ascii_letters),
@@ -15,8 +17,10 @@
                      .flatmap(lambda tup:\
                           vcf_dict_strategy_factory(*tup))
 pos_int = st.integers(min_value=0)
+
+#TODO: these 10, 80 for trhesh and majority_percentage should be factored out and possibly be strategies themselves
 def just_ref(*args):
-    return next(all_consensuses(*args)[1])
+    return next(all_consensuses(*args)[1])[0]
 class CallBaseHypothesisTest(unittest.TestCase):
     @given(simple_vcf_dict_strategy, pos_int)
     def test_under_mind_is_N(self, mut, mind):
@@ -54,7 +58,7 @@ def test_make_consensus_example(self):
                 ('C', 'A', 5)]
         ref = 'ACGTACGT'
         expected = 'ATTTAAGT'
-        actual = make_consensus(ref, muts)
+        actual = make_consensus(ref, muts)[0]
         self.assertEquals(expected, actual)
 
     def test_single_example(self):
@@ -78,20 +82,90 @@ def test_single_example(self):
         expected = 'ATTTAAGT'
         result = just_ref([ref], muts, 10, 80)
         self.assertEquals(expected, result)
-
+ref_with_vcf_dicts_strategy = ref_with_vcf_dicts_strategy_factory().map(
+    lambda (r, muts): (make_seqrec(muts[0]['chrom'], r), muts))
 from collections import Counter
 countof = lambda c: lambda x: Counter(x).get(c, 0)
-class ConsensusHypothesisTest(unittest.TestCase):
-    #@st.random_module
-    @given(ref_with_vcf_dicts_strategy_factory(), st.random_module())
+def run_cons(*args):
+    _, alt_and_cons = all_consensuses(*args)
+    cons, alts  = zip(*alt_and_cons)
+    return  cons[0], alts[0]
+class ConsensusHypothesisTest(unittest.TestCase): 
+    #ref_and_muts=(SeqRecord(seq=Seq(u'AAAAAAAAAA', IUPACAmbiguousDNA()), id=u'', name='<unknown name>', description='', dbxrefs=[]), [
+#    {'ref': u'A', 'pos': 1, 'AO': [479, 777, 119, 604], 'alt': [u'G', u'C', u'G', u'TG'], 'chrom': u'', 'DP': 2635},
+#    {'ref': u'A', 'pos': 3, 'AO': [291, 241, 583, 420], 'alt': [u'CTG', u'C', u'G', u'C'], 'chrom': u'', 'DP': 1627}]), rand=random.seed(0))
+#  AssertionError: 1 != 0
+
+    @given(ref_with_vcf_dicts_strategy, st.random_module())
     def test_n_count(self, ref_and_muts, rand):
         ref, muts = ref_and_muts
         originalNs = countof('N')(ref)
-        assume(not filter(lambda x: 'N' in x, muts))
-        expectedNs = len(filter(_['DP'] < 10, muts))  + originalNs
+        alts = map(get('alt'), muts)
+        assume(not any(map(lambda x: 'N' in x, itertools.chain(*alts))))
+        # needed because  ACGT -> N
+        assume(not filter(lambda x: len(x) > 3, alts))
+        expectedNs = len(filter(lambda x: x['DP'] < 10, muts))  + originalNs
         result = just_ref([ref], muts, 10, 80)
         self.assertEquals(countof('N')(result), expectedNs)
 
+    @given(ref_with_vcf_dicts_strategy)
+    def test_less_or_equal_length_when_no_inserts(self, ref_and_muts):
+        ref, muts = ref_and_muts
+        cons, alts = run_cons([ref], muts, 10, 80)
+        assume(not any(map(lambda x: len(x[0]) < len(x[1]), alts)))
+        self.assertGreaterEqual(len(ref), len(cons))
+
+    @given(ref_with_vcf_dicts_strategy)
+    def assume_greater_or_equal_length_when_no_deletions(self, ref_and_muts):
+        ref, muts = ref_and_muts
+        def has_deletion(mut):
+             filter(lambda x: len(x) < mut['ref'], mut['alt'])
+        assume(not any(map(has_deletion, muts)))
+        result = just_ref([ref], muts, 10, 80)
+        self.assertLesserEqual(len(ref), len(result))
+
+class ConsensusMetamorphicTests(unittest.TestCase):
+    '''paper on metamorphic testing: http://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-10-24 '''
+    @given(ref_with_vcf_dicts_strategy, st.integers(), st.integers())
+    def test_more_or_equal_ns_with_lower_threshold(self, ref_and_muts, n1, n2):
+        ref, muts = ref_and_muts
+        assume(n1 < n2)
+        cons1 = just_ref([ref], muts, n1, 80)
+        cons2 = just_ref([ref], muts, n2, 80)
+        nsCount1, nsCount2 = countof('N')(cons1), countof('N')(cons2)
+        self.assertLessEqual(nsCount1, nsCount2) 
+
+    @given(ref_with_vcf_dicts_strategy)
+    def test_consensus_from_consensus_contains_more_alts(self, ref_and_muts):
+        ref, muts = ref_and_muts
+        assume(not any(map(lambda x: len(x['alt']) > 1, muts)))
+        n1 = 10
+        cons1, alts = run_cons([ref], muts, n1, 80)
+        assume(not any(map(lambda x: len(x[0]) > len(x[1]), alts)))
+        cons2, _ = run_cons([make_seqrec(muts[0]['chrom'], cons1)], muts, n1, 80)
+        picked_alts = map(get(1), alts)
+        altCounts1 = sum(map(lambda f: f(cons1),  map(countof, picked_alts)))
+        altCounts2 = sum(map(lambda f: f(cons2),  map(countof, picked_alts)))
+        self.assertLessEqual(altCounts1, altCounts2) 
 
 
+        #NOTE: the below test appears to be meaningless,
+        # ass the values are always equal
+    percent = st.integers(min_value=0, max_value=100)
+    @given(ref_with_vcf_dicts_strategy, percent, percent)
+    def test_lower_majority_required_contains_more_alts(self, ref_and_muts, p1, p2):
+        ref, muts = ref_and_muts
+        assume(p1 < p2)
+        assume(not any(map(lambda x: len(x['alt']) > 1, muts)))
+        n1 = 10
+        cons1, alts = run_cons([ref], muts, n1, p1)
+        assume(not any(map(lambda x: len(x[0]) > len(x[1]), alts)))
+        cons2, _ = run_cons([ref], muts, n1, p2)
+        picked_alts = map(get(1), alts)
+        altCounts1 = sum(map(lambda f: f(cons1),  map(countof, picked_alts)))
+        altCounts2 = sum(map(lambda f: f(cons2),  map(countof, picked_alts)))
+        self.assertLessEqual(altCounts1, altCounts2)
 
+        #TODO: could test for exceptions when e.g.,
+        # 1. POS is repeated
+        # 2. POS is greater than the size of sequence

From 0de7577847ee06821a34874a575ecfe499516eaa Mon Sep 17 00:00:00 2001
From: michaelpanciera <michael.panciera.work@gmail.com>
Date: Thu, 3 Mar 2016 21:20:21 -0500
Subject: [PATCH 25/41] baic types and examples

---
 MyPy/Bio/SeqIO.pyi      |  5 +++
 MyPy/Bio/SeqRecord.py   |  6 ++++
 MyPy/Bio/__init__.py    |  0
 MyPy/example.py         | 44 ++++++++++++++++++++++++
 MyPy/toolz/__init__.py  |  0
 MyPy/toolz/dicttoolz.py | 32 +++++++++++++++++
 MyPy/types.py           | 76 +++++++++++++++++++++++++++++++++++++++++
 MyPy/vcf/__init__.py    | 10 ++++++
 MyPy/vcf/model.py       |  3 ++
 9 files changed, 176 insertions(+)
 create mode 100644 MyPy/Bio/SeqIO.pyi
 create mode 100644 MyPy/Bio/SeqRecord.py
 create mode 100644 MyPy/Bio/__init__.py
 create mode 100644 MyPy/example.py
 create mode 100644 MyPy/toolz/__init__.py
 create mode 100644 MyPy/toolz/dicttoolz.py
 create mode 100644 MyPy/types.py
 create mode 100644 MyPy/vcf/__init__.py
 create mode 100644 MyPy/vcf/model.py

diff --git a/MyPy/Bio/SeqIO.pyi b/MyPy/Bio/SeqIO.pyi
new file mode 100644
index 0000000..baaf8a9
--- /dev/null
+++ b/MyPy/Bio/SeqIO.pyi
@@ -0,0 +1,5 @@
+from Bio.SeqRecord import SeqRecord
+from typing import Generator, Any, Iterator
+
+def parse(*anything): # type: (*Any) -> Iterator[SeqRecord]
+  pass
diff --git a/MyPy/Bio/SeqRecord.py b/MyPy/Bio/SeqRecord.py
new file mode 100644
index 0000000..8bdc6c6
--- /dev/null
+++ b/MyPy/Bio/SeqRecord.py
@@ -0,0 +1,6 @@
+# from Bio.SeqIO import SeqIO
+from typing import NamedTuple
+class Stringable(object):
+    def __str__(self): # type: () -> str
+        pass
+SeqRecord = NamedTuple('SeqRecord', [('id', str), ('seq', Stringable)])
diff --git a/MyPy/Bio/__init__.py b/MyPy/Bio/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/MyPy/example.py b/MyPy/example.py
new file mode 100644
index 0000000..4f9bd9f
--- /dev/null
+++ b/MyPy/example.py
@@ -0,0 +1,44 @@
+from typing import List, Dict, Generator, Iterator, Iterable, Tuple
+from Bio import SeqIO
+from itertools import imap
+from Bio.SeqRecord import SeqRecord
+def test_long(): # type: () -> int
+    return 11999999L
+def test_seqIO_map_fails(s): # type: (str) -> List[SeqRecord]
+    return map(lambda x: x.id, SeqIO.parse(s))
+
+#def test_seqIO_map_fails2(s): # type: (str) -> Iterator[SeqRecord]
+#    return map(lambda x: x.id, SeqIO.parse(s))
+def test_seqIO_map_passes(s): # type: (str) -> Iterable[str]
+    return imap(lambda x: x.id, SeqIO.parse(s))
+
+def test_seqIO(s): # type: (str) -> Iterator[SeqRecord]
+    return SeqIO.parse(s)
+def test_list_seqIO(s): # type: (str) -> List[SeqRecord]
+    return list(SeqIO.parse(s))
+def test_seqIO_fails(s): # type: (str) -> List[str]
+    return SeqIO.parse(s)
+def test_should_pass(s): # type: (SeqRecord) -> str
+    return s.id
+def test_should_fail(s): # type: (SeqRecord) -> int
+    return s.id
+#def test_should_fail(): # type: () -> List[SeqRecord]
+#    return 3
+
+#a = test_should_fail()
+def test_ordered_dict(od): # type: (Dict[str,int]) -> Dict[str,int]
+    return 1   #type error 1
+#
+#a = test_ordered_dict(1)   #type error 2
+#
+#def test_me():
+#    a = test_ordered_dict(1)  # type error 3 is not reported
+
+####def test_ordered_dict(od: typing.Dict[str,int]) -> typing.Dict[str,int]:
+####    return 1   #type error 1
+####
+####a = test_ordered_dict(1)   #type error 2
+####
+####def test_me():
+####    a = test_ordered_dict(1)  # type error 3 is not reported
+###
diff --git a/MyPy/toolz/__init__.py b/MyPy/toolz/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/MyPy/toolz/dicttoolz.py b/MyPy/toolz/dicttoolz.py
new file mode 100644
index 0000000..6f7178e
--- /dev/null
+++ b/MyPy/toolz/dicttoolz.py
@@ -0,0 +1,32 @@
+from typing import Dict, Any, Callable, TypeVar
+K = TypeVar('K')
+V = TypeVar('V')
+V2 = TypeVar('V2')
+V3 = TypeVar('V3')
+def merge(d1, d2): # type: (Dict[K,V], Dict[K,V]) -> Dict[K,V]
+    pass
+
+def dissoc(d, k): # type: (Dict[K,V], K) -> Dict[K,V]
+  pass
+
+def merge_with(f, d1, d2): # type: (Callable[[V,V2], V3], Dict[K,V], Dict[K,V2]) -> Dict[K,V3]
+    pass
+
+def valfilter(f, d): # type: (Callable[[V], bool], Dict[K,V]) -> Dict[K,V]
+  pass
+
+
+
+#from typing import Dict, Any, Callable, TypeVar
+#T = TypeVar('T')
+#def merge(d1, d2): # type: (Dict[Any,Any], Dict[Any,Any]) -> Dict[Any,Any]
+#    pass
+#
+#def dissoc(d, k): # type: (Dict[Any,Any], Any) -> Dict[Any,Any]
+#  pass
+#
+#def merge_with(f, d1, d2): # type: (Callable, Dict[Any,Any], Dict[Any,Any]) -> Dict[Any,Any]
+#    pass
+#
+#def valfilter(f, d): # type: (Callable, Dict[Any,Any]) -> Dict[Any,Any]
+#  pass
diff --git a/MyPy/types.py b/MyPy/types.py
new file mode 100644
index 0000000..9c40861
--- /dev/null
+++ b/MyPy/types.py
@@ -0,0 +1,76 @@
+from hypothesis import strategies as st
+from typing import Dict, Tuple, List, Iterator, Set, Union, Optional, TypingMeta, NamedTuple
+import re
+import operator
+from functools import partial
+import string
+from collections import namedtuple, OrderedDict
+compose = lambda f,g: lambda *x: f(g(*x))
+'''
+support:
+- [x] NamedTuple
+- [ ] Automatic function arguments 
+could also say, "given a function, generate random return values that it might give" because functions are also annotated with return values
+'''
+
+# Just an exmaple of a named tuple
+VCFRow = NamedTuple("VCFRow",
+                    [('ref', str),
+                     ('AO', List[int]),
+                     ('DP', int),
+                     ('chrom',str),
+                     ('pos', int),
+                     ('alt', List[str])])
+
+primitives = {
+    str   : st.text(),
+    int   : st.integers(),
+    bool  : st.booleans(),
+    float : st.floats(),
+    type(None) : st.none(),
+    unicode : st.characters(),
+    bytes : st.binary() # this is weird because str == bytes in py2
+} # missing: fractions, decimal
+
+#TODO: add Iterable, handle Sequence, etc.
+def resolve(x): # type: (TypingMeta) -> hypothesis.strategies.SearchStrategy
+   if x in primitives:
+       strat = primitives[x]
+   elif hasattr(x, '_fields'):
+       # NamedTuple isn't a type, so this can't be a subclass check
+       try:
+           #Only way I know how to extract the name so it's pretty...
+           name = re.compile("([^\.]+)'>$").search(str(x)).groups()[0]
+       except:
+           name = str(x) 
+       fts = OrderedDict(x._field_types)
+       nt = namedtuple(name, fts.keys())
+       vals = map(resolve, fts.values())
+       strat = st.tuples(*vals).map(lambda x: nt(*x))
+   elif issubclass(x, Dict):
+       strat = st.dictionaries(*map(resolve, x.__parameters__))
+   elif issubclass(x, Tuple):
+       strat = st.tuples(*map(resolve, x.__tuple_params__))
+   elif issubclass(x, Union):
+       strat = operator.ior(*map(resolve, x.__union_params__))
+   elif issubclass(x, Optional):
+       # Optional[X] is equivalent to Union[X, type(None)]. second param is always Nonetype.
+       value = x.__union_params__[0] 
+       strat = (resolve(value) | st.none())
+   else:  # a list-type-ish
+       collections = {
+           Iterator : lambda x: st.lists(x).map(iter),
+           List : st.lists,
+           Set : st.sets
+          } #TODO: missing: Iterable , etc.
+       # For some reason List[T] not a subclass of List: issubclass(x, List) == False. So do these hijinks
+       params = x.__parameters__
+       assert len(params) == 1, "Wrong type %s, not a list-like" % x
+       matches = filter(lambda k: k == x.__origin__, collections.keys())
+       assert len(matches) == 1, "Should have exactly one match. %s matched with %s" % (x, matches)
+       collection_strat = collections[matches[0]]
+       strat = collection_strat(resolve(params[0]))
+   return strat
+# see https://docs.python.org/3/library/typing.html
+# not Generics
+# not Callables
diff --git a/MyPy/vcf/__init__.py b/MyPy/vcf/__init__.py
new file mode 100644
index 0000000..fbb79f9
--- /dev/null
+++ b/MyPy/vcf/__init__.py
@@ -0,0 +1,10 @@
+from typing import Union, Dict, List, NamedTuple, Iterator
+
+#fields = [("ALT", Union[str, List[str]]), ("REF", str), ("POS", int), ("CHROM", str), ("INFO", Dict[str, Union[int, List[int]]])]
+#
+#VCFRecord = NamedTuple('VCFRecord', fields)
+
+VCFRecord = NamedTuple('VCFRecord', [("ALT", Union[str, List[str]]), ("REF", str), ("POS", int), ("CHROM", str), ("INFO", Dict[str, Union[int, List[int]]])]
+)
+def Reader(s): # type: (str) -> Iterator[VCFRecord]
+    pass
diff --git a/MyPy/vcf/model.py b/MyPy/vcf/model.py
new file mode 100644
index 0000000..02b1e9a
--- /dev/null
+++ b/MyPy/vcf/model.py
@@ -0,0 +1,3 @@
+from typing import Union, Dict, List, NamedTuple, Iterator
+_Record = NamedTuple('VCFRecord', [("ALT", Union[str, List[str]]), ("REF", str), ("POS", int), ("CHROM", str), ("INFO", Dict[str, Union[int, List[int]]])]
+)

From bf1b67bbd4f7f889b1267883119f014187340b39 Mon Sep 17 00:00:00 2001
From: michaelpanciera <michael.panciera.work@gmail.com>
Date: Thu, 3 Mar 2016 22:13:49 -0500
Subject: [PATCH 26/41] typechecking works.

---
 MyPy/vcf/model.py                 |  3 --
 bioframework/consensus.py         | 63 ++++++++++++++-----------
 {MyPy => mypy}/Bio/SeqIO.pyi      |  0
 {MyPy => mypy}/Bio/SeqRecord.py   |  0
 {MyPy => mypy}/Bio/__init__.py    |  0
 {MyPy/toolz => mypy}/__init__.py  |  0
 {MyPy => mypy}/example.py         |  0
 mypy/out/docopt.pyi               | 76 +++++++++++++++++++++++++++++++
 mypy/out/hypothesis/__init__.pyi  |  8 ++++
 mypy/out/schema.pyi               | 31 +++++++++++++
 mypy/toolz/__init__.py            |  0
 {MyPy => mypy}/toolz/dicttoolz.py |  0
 {MyPy => mypy}/types.py           |  0
 {MyPy => mypy}/vcf/__init__.py    |  5 +-
 mypy/vcf/model.py                 |  3 ++
 15 files changed, 157 insertions(+), 32 deletions(-)
 delete mode 100644 MyPy/vcf/model.py
 rename {MyPy => mypy}/Bio/SeqIO.pyi (100%)
 rename {MyPy => mypy}/Bio/SeqRecord.py (100%)
 rename {MyPy => mypy}/Bio/__init__.py (100%)
 rename {MyPy/toolz => mypy}/__init__.py (100%)
 rename {MyPy => mypy}/example.py (100%)
 create mode 100644 mypy/out/docopt.pyi
 create mode 100644 mypy/out/hypothesis/__init__.pyi
 create mode 100644 mypy/out/schema.pyi
 create mode 100644 mypy/toolz/__init__.py
 rename {MyPy => mypy}/toolz/dicttoolz.py (100%)
 rename {MyPy => mypy}/types.py (100%)
 rename {MyPy => mypy}/vcf/__init__.py (69%)
 create mode 100644 mypy/vcf/model.py

diff --git a/MyPy/vcf/model.py b/MyPy/vcf/model.py
deleted file mode 100644
index 02b1e9a..0000000
--- a/MyPy/vcf/model.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from typing import Union, Dict, List, NamedTuple, Iterator
-_Record = NamedTuple('VCFRecord', [("ALT", Union[str, List[str]]), ("REF", str), ("POS", int), ("CHROM", str), ("INFO", Dict[str, Union[int, List[int]]])]
-)
diff --git a/bioframework/consensus.py b/bioframework/consensus.py
index 05a1970..8c0da19 100755
--- a/bioframework/consensus.py
+++ b/bioframework/consensus.py
@@ -14,7 +14,7 @@
 from functools import partial
 from itertools import ifilter, imap, groupby, takewhile, repeat, starmap, izip_longest
 import os, sys
-from typing import Tuple, Dict, List, Iterator, Iterable, Any, Callable
+from typing import Tuple, Dict, List, Iterator, Iterable, Any, Callable, NamedTuple, BinaryIO
 
 from Bio import SeqIO #done
 from Bio.SeqRecord import SeqRecord #done
@@ -24,15 +24,21 @@
 from toolz.dicttoolz import merge, dissoc, merge_with, valfilter #todo
 from docopt import docopt #ignore
 from schema import Schema, Use #ignore
-from contracts import contract, new_contract #can ignore
+#from contracts import contract, new_contract #can ignore
+#from mypy.types import VCFRow
 #############
 # Constants #
 #############
+VCFRow = NamedTuple("VCFRow",
+                    [('ref', str),
+                     ('AO', List[int]),
+                     ('DP', int),
+                     ('chrom',str),
+                     ('pos', int),
+                     ('alt', List[str])])
+#VcfRow = namedtuple("VcfRow", VCFRow._fields) # type: (*Any) -> VCFRow
 
-AMBIGUITY_TABLE = { 'A': 'A', 'T': 'T', 'G': 'G', 'C': 'C', 'N': 'N',
-                       'AC': 'M', 'AG': 'R', 'AT': 'W', 'CG': 'S', 'CT':
-                       'Y', 'GT': 'K', 'ACG': 'V', 'ACT': 'H', 'AGT': 'D',
-                       'CGT': 'B', 'ACGT': 'N' }
+AMBIGUITY_TABLE = { 'A': 'A', 'T': 'T', 'G': 'G', 'C': 'C', 'N': 'N', 'AC': 'M', 'AG': 'R', 'AT': 'W', 'CG': 'S', 'CT': 'Y', 'GT': 'K', 'ACG': 'V', 'ACT': 'H', 'AGT': 'D', 'CGT': 'B', 'ACGT': 'N' }
 
 MAJORITY_PERCENTAGE = 80
 MIN_DEPTH = 10
@@ -40,7 +46,7 @@
 ###########
 # Reducer #
 ###########
-@contract(reference='string', muts='list(tuple(string, string, int))'   )
+#@contract(reference='string', muts='list(tuple(string, string, int))'   )
 def make_consensus(reference, muts):
     # type: (str, List[Mut]) -> Tuple[str, List[Mut]]
     ''' Actually builds a consensus string by recursively applying
@@ -98,10 +104,10 @@ def call_base_multi_alts(min_depth, majority_percentage, dp, alts, ref):
 
 #@contract(min_depth='number,>=0', majority_percentage='number,>=0,<=100', rec='dict', returns='tuple(string, string, int)')
 def call_many(min_depth, majority_percentage, rec):
-    # type: (int, int, Dict) -> Mut
+    # type: (int, int, VCFRow) -> Mut
     #TODO: switch to generators
-    muts = zip(rec['AO'], rec['alt'])
-    ref, dp, pos = rec['ref'], rec['DP'], rec['pos']
+    muts = zip(rec.AO, rec.alt)
+    ref, dp, pos = rec.ref, rec.DP, rec.pos
     longest_len = max(map(lambda x: len(x[-1]), muts))
     longest_len = max(longest_len, len(ref))
     def fill_gap(r):
@@ -115,22 +121,23 @@ def seq_count(acc, ao_and_nts):
         return map(merge_sum, acc, [{nt:ao} for nt in nts])
     # create a list of {base : count}, where the index matches the position
     mut_dicts = reduce(seq_count, xs, [{}])
-    base_caller = partial(call_base_multi_alts, min_depth, majority_percentage, dp) # type: Callable[[Dict[Any,Any], str], str]
+    base_caller = lambda m,r: call_base_multi_alts(min_depth, majority_percentage, dp, m, r) #   # # ?Callable[[Dict[Any,Any], str], str]
     res = map(base_caller, mut_dicts, ref)
     # trim None values at the end, (which indicate deletion)
     result = takewhile(bool, res)
     return (ref, ''.join(result), pos)
 
-@contract(rec='dict',returns='dict')
+#@contract(rec='dict',returns='dict')
 def flatten_vcf_record(rec):
-    # type: (_Record) -> Dict[str, Any]
+    # type: (_Record) -> VCFRow
     _rec = merge({
   'alt' : rec.ALT, 'ref' : rec.REF,
   'pos' : rec.POS, 'chrom' : rec.CHROM},
         rec.INFO)
     if not hasattr(_rec['alt'], '__iter__'): #TODO: put this somewhere else
-        return merge(_rec, dict(alt=[_rec['alt']], AO=[_rec['AO']]))
-    else: return _rec
+        d = merge(_rec, dict(alt=[_rec['alt']], AO=[_rec['AO']]))
+    else: d = _rec
+    return VCFRow(**d)
 
 ##############
 # Group By   #
@@ -138,16 +145,16 @@ def flatten_vcf_record(rec):
 #NOTE: could possibly drop lists, use fn.Stream all the time,
 # and write a Stream instance for contracts like:
 # https://github.com/AndreaCensi/contracts/blob/831ec7a5260ceb8960540ba0cb6cc26370cf2d82/src/contracts/library/lists.py
-@contract(references='list[N]($SeqRecord),N>0', muts='list(dict)',returns='tuple(list(dict))')
+#@contract(references='list[N]($SeqRecord),N>0', muts='list(dict)',returns='tuple(list(dict))')
 def group_muts_by_refs(references, muts):
-    # type: (List[SeqRecord], List[Dict[Any, Any]]) -> Iterable[List[Dict]]
+    # type: (List[SeqRecord], List[VCFRow]) -> List[List[VCFRow]]
     '''group and sort the mutations so that they match the order of the references.'''
     #NOTE: muts will already be "sorted" in that they are grouped together in the vcf
     #fix the groupby so it doesn't incidentally drain the first object of the group
     unzip = lambda x: zip(*x)
     chroms, groups = unzip(map(lambda kv: (kv[0], list(kv[1])), groupby(muts, get('chrom'))))
-    @contract(key='tuple(string,list)')
-    def index_of_ref(key):
+    #@contract(key='tuple(string,list)')
+    def index_of_ref(key): # type: (Tuple[str, List[SeqRecord]]) -> int
         chrom=key[0]
         index_of_chrom =  map(lambda x: x.id, references).index(chrom)
         return index_of_chrom
@@ -162,13 +169,15 @@ def index_of_ref(key):
 
 #@contract(references='SeqRecord', muts='seq(dict)', mind=int, majority=int)
 def all_consensuses(references, muts, mind, majority):
-    # type: (Iterable[SeqRecord], Iterable[Dict[Any,Any]], int, int) -> Tuple[List[str], Iterator[Tuple[str, List[Mut]]]]
+    # type: (List[SeqRecord], List[VCFRow], int, int) -> Tuple[List[SeqRecord], Iterable[Tuple[str, List[Mut]]]]
     ''' generates conesnsuses, including for flu and other mult-reference VCFs.
     applies filters and base callers to the mutations.
     then builds the consensus using these calls and `make_consensus`'''
     muts_by_ref = group_muts_by_refs(references, muts)
     def single_consensus(muts, ref):
-        the_muts = map(partial(call_many, mind, majority), muts)
+        # type: (List[VCFRow], SeqRecord) -> Tuple[str, List[Mut]]
+        #the_muts = map(partial(call_many, mind, majority), muts)
+        the_muts = map(lambda x: call_many(mind, majority, x), muts)
         ref_and_alt_differ = lambda x: x[0] != x[1]
         # vcf is index-starting-at-1
         #real_muts = map(lambda (a,b,pos): (a,b,pos-1), filter(ref_and_alt_differ, the_muts))
@@ -186,13 +195,13 @@ def consensus_str(ref, consensus): # type: (SeqRecord, str) -> str
 
 #@contract(ref_fasta=str, vcf=str, mind=int, majority=int)
 def run(ref_fasta, freebayes_vcf, outfile, mind, majority):
-    # type: (str, str, str, int, int) -> int
-    refs = SeqIO.parse(ref_fasta, 'fasta')
+    # type: (str, str, BinaryIO, int, int) -> int
+    _refs = SeqIO.parse(ref_fasta, 'fasta')
     with open(freebayes_vcf, 'r') as vcf_handle:
-        muts = imap(flatten_vcf_record, vcf.Reader(vcf_handle))
-        refs, muts = list(refs), list(muts)
-        refs, seqs_and_muts = all_consensuses(refs, muts, mind, majority)
-        strings = imap(consensus_str, refs, imap(get(0), seqs_and_muts))
+        _muts = map(flatten_vcf_record, vcf.Reader(vcf_handle))
+        refs, muts = list(_refs), list(_muts)
+        the_refs, seqs_and_muts = all_consensuses(refs, muts, mind, majority)
+        strings = imap(consensus_str, the_refs, imap(get(0), seqs_and_muts))
         result = '\n'.join(strings)
         outfile.write(result)
         outfile.close()
diff --git a/MyPy/Bio/SeqIO.pyi b/mypy/Bio/SeqIO.pyi
similarity index 100%
rename from MyPy/Bio/SeqIO.pyi
rename to mypy/Bio/SeqIO.pyi
diff --git a/MyPy/Bio/SeqRecord.py b/mypy/Bio/SeqRecord.py
similarity index 100%
rename from MyPy/Bio/SeqRecord.py
rename to mypy/Bio/SeqRecord.py
diff --git a/MyPy/Bio/__init__.py b/mypy/Bio/__init__.py
similarity index 100%
rename from MyPy/Bio/__init__.py
rename to mypy/Bio/__init__.py
diff --git a/MyPy/toolz/__init__.py b/mypy/__init__.py
similarity index 100%
rename from MyPy/toolz/__init__.py
rename to mypy/__init__.py
diff --git a/MyPy/example.py b/mypy/example.py
similarity index 100%
rename from MyPy/example.py
rename to mypy/example.py
diff --git a/mypy/out/docopt.pyi b/mypy/out/docopt.pyi
new file mode 100644
index 0000000..6f9431c
--- /dev/null
+++ b/mypy/out/docopt.pyi
@@ -0,0 +1,76 @@
+# Stubs for docopt (Python 2)
+#
+# NOTE: This dynamically typed stub was automatically generated by stubgen.
+
+from typing import Any
+
+class DocoptLanguageError(Exception): ...
+
+class DocoptExit(SystemExit):
+    usage = ... # type: Any
+    def __init__(self, message=''): ...
+
+class Pattern:
+    def __eq__(self, other): ...
+    def __hash__(self): ...
+    def fix(self): ...
+    def fix_identities(self, uniq=None): ...
+    def fix_repeating_arguments(self): ...
+    @property
+    def either(self): ...
+
+class ChildPattern(Pattern):
+    name = ... # type: Any
+    value = ... # type: Any
+    def __init__(self, name, value=None): ...
+    def flat(self, *types): ...
+    def match(self, left, collected=None): ...
+
+class ParentPattern(Pattern):
+    children = ... # type: Any
+    def __init__(self, *children): ...
+    def flat(self, *types): ...
+
+class Argument(ChildPattern):
+    def single_match(self, left): ...
+    @classmethod
+    def parse(class_, source): ...
+
+class Command(Argument):
+    name = ... # type: Any
+    value = ... # type: Any
+    def __init__(self, name, value=False): ...
+    def single_match(self, left): ...
+
+class Option(ChildPattern):
+    value = ... # type: Any
+    def __init__(self, short=None, long=None, argcount=0, value=False): ...
+    @classmethod
+    def parse(class_, option_description): ...
+    def single_match(self, left): ...
+    @property
+    def name(self): ...
+
+class Required(ParentPattern):
+    def match(self, left, collected=None): ...
+
+class Optional(ParentPattern):
+    def match(self, left, collected=None): ...
+
+class AnyOptions(Optional): ...
+
+class OneOrMore(ParentPattern):
+    def match(self, left, collected=None): ...
+
+class Either(ParentPattern):
+    def match(self, left, collected=None): ...
+
+class TokenStream(list):
+    error = ... # type: Any
+    def __init__(self, source, error): ...
+    def move(self): ...
+    def current(self): ...
+
+class Dict(dict): ...
+
+def docopt(doc, argv=None, help=True, version=None, options_first=False): ...
diff --git a/mypy/out/hypothesis/__init__.pyi b/mypy/out/hypothesis/__init__.pyi
new file mode 100644
index 0000000..764f9f7
--- /dev/null
+++ b/mypy/out/hypothesis/__init__.pyi
@@ -0,0 +1,8 @@
+# Stubs for hypothesis (Python 2)
+#
+# NOTE: This dynamically typed stub was automatically generated by stubgen.
+
+from hypothesis._settings import settings as settings, Verbosity as Verbosity
+from hypothesis.version import __version_info__ as __version_info__, __version__ as __version__
+from hypothesis.control import assume as assume, note as note, reject as reject
+from hypothesis.core import given as given, find as find, example as example, seed as seed
diff --git a/mypy/out/schema.pyi b/mypy/out/schema.pyi
new file mode 100644
index 0000000..3eb2140
--- /dev/null
+++ b/mypy/out/schema.pyi
@@ -0,0 +1,31 @@
+# Stubs for schema (Python 2)
+#
+# NOTE: This dynamically typed stub was automatically generated by stubgen.
+
+from typing import Any
+
+class SchemaError(Exception):
+    autos = ... # type: Any
+    errors = ... # type: Any
+    def __init__(self, autos, errors): ...
+    @property
+    def code(self): ...
+
+class And:
+    def __init__(self, *args, **kw): ...
+    def validate(self, data): ...
+
+class Or(And):
+    def validate(self, data): ...
+
+class Use:
+    def __init__(self, callable_, error=None): ...
+    def validate(self, data): ...
+
+def priority(s): ...
+
+class Schema:
+    def __init__(self, schema, error=None): ...
+    def validate(self, data): ...
+
+class Optional(Schema): ...
diff --git a/mypy/toolz/__init__.py b/mypy/toolz/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/MyPy/toolz/dicttoolz.py b/mypy/toolz/dicttoolz.py
similarity index 100%
rename from MyPy/toolz/dicttoolz.py
rename to mypy/toolz/dicttoolz.py
diff --git a/MyPy/types.py b/mypy/types.py
similarity index 100%
rename from MyPy/types.py
rename to mypy/types.py
diff --git a/MyPy/vcf/__init__.py b/mypy/vcf/__init__.py
similarity index 69%
rename from MyPy/vcf/__init__.py
rename to mypy/vcf/__init__.py
index fbb79f9..29c9cc8 100644
--- a/MyPy/vcf/__init__.py
+++ b/mypy/vcf/__init__.py
@@ -1,4 +1,5 @@
-from typing import Union, Dict, List, NamedTuple, Iterator
+from typing import Union, Dict, List, NamedTuple, Iterator, BinaryIO
+from vcf.model import _Record
 
 #fields = [("ALT", Union[str, List[str]]), ("REF", str), ("POS", int), ("CHROM", str), ("INFO", Dict[str, Union[int, List[int]]])]
 #
@@ -6,5 +7,5 @@
 
 VCFRecord = NamedTuple('VCFRecord', [("ALT", Union[str, List[str]]), ("REF", str), ("POS", int), ("CHROM", str), ("INFO", Dict[str, Union[int, List[int]]])]
 )
-def Reader(s): # type: (str) -> Iterator[VCFRecord]
+def Reader(s): # type: (BinaryIO) -> Iterator[_Record]
     pass
diff --git a/mypy/vcf/model.py b/mypy/vcf/model.py
new file mode 100644
index 0000000..9266869
--- /dev/null
+++ b/mypy/vcf/model.py
@@ -0,0 +1,3 @@
+from typing import Union, Dict, List, NamedTuple, Iterator
+_Record = NamedTuple('_Record', [("ALT", Union[str, List[str]]), ("REF", str), ("POS", int), ("CHROM", str), ("INFO", Dict[str, Union[int, List[int]]])]
+)

From c2aecb8c613804931daec26c82810665c8d7ee95 Mon Sep 17 00:00:00 2001
From: michaelpanciera <michael.panciera.work@gmail.com>
Date: Thu, 3 Mar 2016 22:17:27 -0500
Subject: [PATCH 27/41] added readme for types

---
 mypy/README.md | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 mypy/README.md

diff --git a/mypy/README.md b/mypy/README.md
new file mode 100644
index 0000000..aec5641
--- /dev/null
+++ b/mypy/README.md
@@ -0,0 +1,6 @@
+1. Install [mypy](https://github.com/python/mypy#quick-start)
+
+2. Run mypy MYPYPATH=$PWD/mypy:$PWD/mypy/out  mypy --py2 bioframework/consensus.py
+
+If needed, uses `stubgen` to generate more stub files for other libraries.
+

From c09b94bfbb5c46087a16a57428e3678ebc175cbc Mon Sep 17 00:00:00 2001
From: michaelpanciera <michael.panciera.work@gmail.com>
Date: Thu, 3 Mar 2016 22:28:22 -0500
Subject: [PATCH 28/41] fixed tests, now passing

---
 bioframework/consensus.py |  2 +-
 tests/test_consensus.py   | 37 +++++++++++++++++++------------------
 2 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/bioframework/consensus.py b/bioframework/consensus.py
index 8c0da19..c6384ea 100755
--- a/bioframework/consensus.py
+++ b/bioframework/consensus.py
@@ -152,7 +152,7 @@ def group_muts_by_refs(references, muts):
     #NOTE: muts will already be "sorted" in that they are grouped together in the vcf
     #fix the groupby so it doesn't incidentally drain the first object of the group
     unzip = lambda x: zip(*x)
-    chroms, groups = unzip(map(lambda kv: (kv[0], list(kv[1])), groupby(muts, get('chrom'))))
+    chroms, groups = unzip(map(lambda kv: (kv[0], list(kv[1])), groupby(muts, lambda x: x.chrom)))
     #@contract(key='tuple(string,list)')
     def index_of_ref(key): # type: (Tuple[str, List[SeqRecord]]) -> int
         chrom=key[0]
diff --git a/tests/test_consensus.py b/tests/test_consensus.py
index 55f81f5..47076c1 100644
--- a/tests/test_consensus.py
+++ b/tests/test_consensus.py
@@ -6,7 +6,7 @@
 from hypothesis import strategies as st
 from hypothesis import given, assume
 from operator import itemgetter as get
-from bioframework.consensus import call_many, all_consensuses, make_consensus
+from bioframework.consensus import call_many, all_consensuses, make_consensus, VCFRow
 import string
 import itertools
 import unittest
@@ -15,7 +15,7 @@
                      st.integers(min_value=1),
                      st.text(alphabet='ACTGN', min_size=1, max_size=6)) \
                      .flatmap(lambda tup:\
-                          vcf_dict_strategy_factory(*tup))
+                          vcf_dict_strategy_factory(*tup)).map(lambda d: VCFRow(**d))
 pos_int = st.integers(min_value=0)
 
 #TODO: these 10, 80 for trhesh and majority_percentage should be factored out and possibly be strategies themselves
@@ -24,23 +24,23 @@ def just_ref(*args):
 class CallBaseHypothesisTest(unittest.TestCase):
     @given(simple_vcf_dict_strategy, pos_int)
     def test_under_mind_is_N(self, mut, mind):
-        assume(mut['DP'] < mind)
+        assume(mut.DP < mind)
         result = call_many(mind, 80, mut)[1]
         self.assertTrue(all(map(lambda x: x == 'N', result)))
 
     @given(simple_vcf_dict_strategy)
     def test_ao_under_minority_is_ref(self, mut):
-        assume(sum(mut['AO']) / mut['DP'] < 0.2)
+        assume(sum(mut.AO) / mut.DP < 0.2)
         result = call_many(0, 80, mut)[1]
-        self.assertEquals(result, mut['ref'])
+        self.assertEquals(result, mut.ref)
 
     @given(simple_vcf_dict_strategy)
     def test_over_majority_is_alt(self, mut):
         #TODO: this is slow
-        assume(sum(mut['AO']) / mut['DP'] > 0.8)
-        assume(len(mut['alt']) == 1)
+        assume(sum(mut.AO) / mut.DP > 0.8)
+        assume(len(mut.alt) == 1)
         result = call_many(0, 80, mut)[1]
-        self.assertEquals(result, mut['alt'][0])
+        self.assertEquals(result, mut.alt[0])
 
 #Commented out because it's not actually always true,
 # e.g. mut={'ref': u'AA', 'pos': 1, 'AO': [784313725491], 'alt': [u'A'],
@@ -48,9 +48,9 @@ def test_over_majority_is_alt(self, mut):
 # should result in AA
 #    @given(simple_vcf_dict_strategy)
 #    def test_over_minoriy_is_not_ref(self, mut):
-#        assume(sum(mut['AO']) / mut['DP'] > 0.2)
+#        assume(sum(mut.AO) / mut.DP > 0.2)
 #        result = call_many(0, 80, mut)[1]
-#        self.assertNotEquals(result, mut['ref'])
+#        self.assertNotEquals(result, mut.ref)
 
 class ConsesusExampleTest(unittest.TestCase):
     def test_make_consensus_example(self):
@@ -62,7 +62,7 @@ def test_make_consensus_example(self):
         self.assertEquals(expected, actual)
 
     def test_single_example(self):
-        muts = [{
+        raw_muts = [{
             'pos' : 2,
             'ref' : 'CG',
             'alt' : ['TT'],
@@ -78,12 +78,13 @@ def test_single_example(self):
             'DP' : 150,
             'chrom' : 'X'
         }]
+        muts = map(lambda d: VCFRow(**d), raw_muts)
         ref = make_seqrec('X', 'ACGTACGT')
         expected = 'ATTTAAGT'
         result = just_ref([ref], muts, 10, 80)
         self.assertEquals(expected, result)
 ref_with_vcf_dicts_strategy = ref_with_vcf_dicts_strategy_factory().map(
-    lambda (r, muts): (make_seqrec(muts[0]['chrom'], r), muts))
+    lambda (r, muts): (make_seqrec(muts[0]['chrom'], r), map(lambda d: VCFRow(**d), muts)))
 from collections import Counter
 countof = lambda c: lambda x: Counter(x).get(c, 0)
 def run_cons(*args):
@@ -100,11 +101,11 @@ class ConsensusHypothesisTest(unittest.TestCase):
     def test_n_count(self, ref_and_muts, rand):
         ref, muts = ref_and_muts
         originalNs = countof('N')(ref)
-        alts = map(get('alt'), muts)
+        alts = map(lambda x: x.alt, muts)
         assume(not any(map(lambda x: 'N' in x, itertools.chain(*alts))))
         # needed because  ACGT -> N
         assume(not filter(lambda x: len(x) > 3, alts))
-        expectedNs = len(filter(lambda x: x['DP'] < 10, muts))  + originalNs
+        expectedNs = len(filter(lambda x: x.DP < 10, muts))  + originalNs
         result = just_ref([ref], muts, 10, 80)
         self.assertEquals(countof('N')(result), expectedNs)
 
@@ -119,7 +120,7 @@ def test_less_or_equal_length_when_no_inserts(self, ref_and_muts):
     def assume_greater_or_equal_length_when_no_deletions(self, ref_and_muts):
         ref, muts = ref_and_muts
         def has_deletion(mut):
-             filter(lambda x: len(x) < mut['ref'], mut['alt'])
+             filter(lambda x: len(x) < mut.ref, mut.alt)
         assume(not any(map(has_deletion, muts)))
         result = just_ref([ref], muts, 10, 80)
         self.assertLesserEqual(len(ref), len(result))
@@ -138,11 +139,11 @@ def test_more_or_equal_ns_with_lower_threshold(self, ref_and_muts, n1, n2):
     @given(ref_with_vcf_dicts_strategy)
     def test_consensus_from_consensus_contains_more_alts(self, ref_and_muts):
         ref, muts = ref_and_muts
-        assume(not any(map(lambda x: len(x['alt']) > 1, muts)))
+        assume(not any(map(lambda x: len(x.alt) > 1, muts)))
         n1 = 10
         cons1, alts = run_cons([ref], muts, n1, 80)
         assume(not any(map(lambda x: len(x[0]) > len(x[1]), alts)))
-        cons2, _ = run_cons([make_seqrec(muts[0]['chrom'], cons1)], muts, n1, 80)
+        cons2, _ = run_cons([make_seqrec(muts[0].chrom, cons1)], muts, n1, 80)
         picked_alts = map(get(1), alts)
         altCounts1 = sum(map(lambda f: f(cons1),  map(countof, picked_alts)))
         altCounts2 = sum(map(lambda f: f(cons2),  map(countof, picked_alts)))
@@ -156,7 +157,7 @@ def test_consensus_from_consensus_contains_more_alts(self, ref_and_muts):
     def test_lower_majority_required_contains_more_alts(self, ref_and_muts, p1, p2):
         ref, muts = ref_and_muts
         assume(p1 < p2)
-        assume(not any(map(lambda x: len(x['alt']) > 1, muts)))
+        assume(not any(map(lambda x: len(x.alt) > 1, muts)))
         n1 = 10
         cons1, alts = run_cons([ref], muts, n1, p1)
         assume(not any(map(lambda x: len(x[0]) > len(x[1]), alts)))

From 6a0bd962f9d3f96907705cd959d1e41a15c97cd5 Mon Sep 17 00:00:00 2001
From: michaelpanciera <michael.panciera.work@gmail.com>
Date: Thu, 3 Mar 2016 22:31:54 -0500
Subject: [PATCH 29/41] removing old types-hypothesis generator file

---
 mypy/types.py | 76 ---------------------------------------------------
 1 file changed, 76 deletions(-)
 delete mode 100644 mypy/types.py

diff --git a/mypy/types.py b/mypy/types.py
deleted file mode 100644
index 9c40861..0000000
--- a/mypy/types.py
+++ /dev/null
@@ -1,76 +0,0 @@
-from hypothesis import strategies as st
-from typing import Dict, Tuple, List, Iterator, Set, Union, Optional, TypingMeta, NamedTuple
-import re
-import operator
-from functools import partial
-import string
-from collections import namedtuple, OrderedDict
-compose = lambda f,g: lambda *x: f(g(*x))
-'''
-support:
-- [x] NamedTuple
-- [ ] Automatic function arguments 
-could also say, "given a function, generate random return values that it might give" because functions are also annotated with return values
-'''
-
-# Just an exmaple of a named tuple
-VCFRow = NamedTuple("VCFRow",
-                    [('ref', str),
-                     ('AO', List[int]),
-                     ('DP', int),
-                     ('chrom',str),
-                     ('pos', int),
-                     ('alt', List[str])])
-
-primitives = {
-    str   : st.text(),
-    int   : st.integers(),
-    bool  : st.booleans(),
-    float : st.floats(),
-    type(None) : st.none(),
-    unicode : st.characters(),
-    bytes : st.binary() # this is weird because str == bytes in py2
-} # missing: fractions, decimal
-
-#TODO: add Iterable, handle Sequence, etc.
-def resolve(x): # type: (TypingMeta) -> hypothesis.strategies.SearchStrategy
-   if x in primitives:
-       strat = primitives[x]
-   elif hasattr(x, '_fields'):
-       # NamedTuple isn't a type, so this can't be a subclass check
-       try:
-           #Only way I know how to extract the name so it's pretty...
-           name = re.compile("([^\.]+)'>$").search(str(x)).groups()[0]
-       except:
-           name = str(x) 
-       fts = OrderedDict(x._field_types)
-       nt = namedtuple(name, fts.keys())
-       vals = map(resolve, fts.values())
-       strat = st.tuples(*vals).map(lambda x: nt(*x))
-   elif issubclass(x, Dict):
-       strat = st.dictionaries(*map(resolve, x.__parameters__))
-   elif issubclass(x, Tuple):
-       strat = st.tuples(*map(resolve, x.__tuple_params__))
-   elif issubclass(x, Union):
-       strat = operator.ior(*map(resolve, x.__union_params__))
-   elif issubclass(x, Optional):
-       # Optional[X] is equivalent to Union[X, type(None)]. second param is always Nonetype.
-       value = x.__union_params__[0] 
-       strat = (resolve(value) | st.none())
-   else:  # a list-type-ish
-       collections = {
-           Iterator : lambda x: st.lists(x).map(iter),
-           List : st.lists,
-           Set : st.sets
-          } #TODO: missing: Iterable , etc.
-       # For some reason List[T] not a subclass of List: issubclass(x, List) == False. So do these hijinks
-       params = x.__parameters__
-       assert len(params) == 1, "Wrong type %s, not a list-like" % x
-       matches = filter(lambda k: k == x.__origin__, collections.keys())
-       assert len(matches) == 1, "Should have exactly one match. %s matched with %s" % (x, matches)
-       collection_strat = collections[matches[0]]
-       strat = collection_strat(resolve(params[0]))
-   return strat
-# see https://docs.python.org/3/library/typing.html
-# not Generics
-# not Callables

From 6cbd2e16d83ed64f278cb1bb088f3c7dd165a4c9 Mon Sep 17 00:00:00 2001
From: michaelpanciera <michael.panciera.work@gmail.com>
Date: Thu, 3 Mar 2016 22:32:18 -0500
Subject: [PATCH 30/41] added missing typing requirement

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index dbdcd86..26a06bb 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 pycontracts
 toolz
 pyvcf
+typing

From 4427de8e70b997a8b6a262342f1dda796c5912ed Mon Sep 17 00:00:00 2001
From: michaelpanciera <michael.panciera.work@gmail.com>
Date: Thu, 3 Mar 2016 22:44:40 -0500
Subject: [PATCH 31/41] missing deps.

---
 requirements.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/requirements.txt b/requirements.txt
index 26a06bb..8e01b72 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,3 +2,5 @@ pycontracts
 toolz
 pyvcf
 typing
+docopt
+schema

From 50cf3c9273760ff8d0b702ff34d4c46c5e744e57 Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedwrair024635.amed.ds.army.mil>
Date: Fri, 4 Mar 2016 10:18:25 -0500
Subject: [PATCH 32/41] add typing module dep.

---
 setup.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/setup.py b/setup.py
index 0c97ac2..1d09b17 100755
--- a/setup.py
+++ b/setup.py
@@ -24,8 +24,12 @@ def jip_modules(path=bioframework.JIP_PATH):
         'pyjip',
         'toolz',
         'docopt',
+        'schema',
+        'pyvcf',
+        'typing'
     ],
     data_files=[
         (join(sys.prefix,'bin'), jip_modules()),
     ]
 )
+

From 23afece1421c36dcc1fb2cb09c21250e016cb87c Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedwrair024635.amed.ds.army.mil>
Date: Fri, 4 Mar 2016 10:47:59 -0500
Subject: [PATCH 33/41] fix failing test

---
 tests/test_consensus.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/tests/test_consensus.py b/tests/test_consensus.py
index 47076c1..955e086 100644
--- a/tests/test_consensus.py
+++ b/tests/test_consensus.py
@@ -91,7 +91,7 @@ def run_cons(*args):
     _, alt_and_cons = all_consensuses(*args)
     cons, alts  = zip(*alt_and_cons)
     return  cons[0], alts[0]
-class ConsensusHypothesisTest(unittest.TestCase): 
+class ConsensusHypothesisTest(unittest.TestCase):
     #ref_and_muts=(SeqRecord(seq=Seq(u'AAAAAAAAAA', IUPACAmbiguousDNA()), id=u'', name='<unknown name>', description='', dbxrefs=[]), [
 #    {'ref': u'A', 'pos': 1, 'AO': [479, 777, 119, 604], 'alt': [u'G', u'C', u'G', u'TG'], 'chrom': u'', 'DP': 2635},
 #    {'ref': u'A', 'pos': 3, 'AO': [291, 241, 583, 420], 'alt': [u'CTG', u'C', u'G', u'C'], 'chrom': u'', 'DP': 1627}]), rand=random.seed(0))
@@ -102,9 +102,11 @@ def test_n_count(self, ref_and_muts, rand):
         ref, muts = ref_and_muts
         originalNs = countof('N')(ref)
         alts = map(lambda x: x.alt, muts)
-        assume(not any(map(lambda x: 'N' in x, itertools.chain(*alts))))
+        refs = map(lambda x: x.ref, muts)
+        assume(not filter(lambda x: 'N' in x, itertools.chain(*alts)))
+        assume(not filter(lambda x: len(x) > 1, itertools.chain(*alts)))
+        assume(not filter(lambda x: len(x) > 1, refs))
         # needed because  ACGT -> N
-        assume(not filter(lambda x: len(x) > 3, alts))
         expectedNs = len(filter(lambda x: x.DP < 10, muts))  + originalNs
         result = just_ref([ref], muts, 10, 80)
         self.assertEquals(countof('N')(result), expectedNs)
@@ -134,7 +136,7 @@ def test_more_or_equal_ns_with_lower_threshold(self, ref_and_muts, n1, n2):
         cons1 = just_ref([ref], muts, n1, 80)
         cons2 = just_ref([ref], muts, n2, 80)
         nsCount1, nsCount2 = countof('N')(cons1), countof('N')(cons2)
-        self.assertLessEqual(nsCount1, nsCount2) 
+        self.assertLessEqual(nsCount1, nsCount2)
 
     @given(ref_with_vcf_dicts_strategy)
     def test_consensus_from_consensus_contains_more_alts(self, ref_and_muts):
@@ -147,7 +149,7 @@ def test_consensus_from_consensus_contains_more_alts(self, ref_and_muts):
         picked_alts = map(get(1), alts)
         altCounts1 = sum(map(lambda f: f(cons1),  map(countof, picked_alts)))
         altCounts2 = sum(map(lambda f: f(cons2),  map(countof, picked_alts)))
-        self.assertLessEqual(altCounts1, altCounts2) 
+        self.assertLessEqual(altCounts1, altCounts2)
 
 
         #NOTE: the below test appears to be meaningless,

From bb7f93734e53e714b3a2057094dab1d587894268 Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedwrair024635.amed.ds.army.mil>
Date: Fri, 4 Mar 2016 13:51:19 -0500
Subject: [PATCH 34/41] add support for trimming reference

---
 bioframework/consensus.py | 18 +++++++++++++++---
 mypy/sh.py                |  3 +++
 2 files changed, 18 insertions(+), 3 deletions(-)
 create mode 100644 mypy/sh.py

diff --git a/bioframework/consensus.py b/bioframework/consensus.py
index c6384ea..84bfdf4 100755
--- a/bioframework/consensus.py
+++ b/bioframework/consensus.py
@@ -14,14 +14,17 @@
 from functools import partial
 from itertools import ifilter, imap, groupby, takewhile, repeat, starmap, izip_longest
 import os, sys
+import collections
+
 from typing import Tuple, Dict, List, Iterator, Iterable, Any, Callable, NamedTuple, BinaryIO
 
 from Bio import SeqIO #done
 from Bio.SeqRecord import SeqRecord #done
 import vcf #done
 from vcf.model import _Record
+import sh #todo
 #from toolz import compose
-from toolz.dicttoolz import merge, dissoc, merge_with, valfilter #todo
+from toolz.dicttoolz import merge, dissoc, merge_with, valfilter #done
 from docopt import docopt #ignore
 from schema import Schema, Use #ignore
 #from contracts import contract, new_contract #can ignore
@@ -36,8 +39,6 @@
                      ('chrom',str),
                      ('pos', int),
                      ('alt', List[str])])
-#VcfRow = namedtuple("VcfRow", VCFRow._fields) # type: (*Any) -> VCFRow
-
 AMBIGUITY_TABLE = { 'A': 'A', 'T': 'T', 'G': 'G', 'C': 'C', 'N': 'N', 'AC': 'M', 'AG': 'R', 'AT': 'W', 'CG': 'S', 'CT': 'Y', 'GT': 'K', 'ACG': 'V', 'ACT': 'H', 'AGT': 'D', 'CGT': 'B', 'ACGT': 'N' }
 
 MAJORITY_PERCENTAGE = 80
@@ -192,6 +193,17 @@ def single_consensus(muts, ref):
 def consensus_str(ref, consensus): # type: (SeqRecord, str) -> str
     return ">{0}:Consensus\n{1}".format(ref.id, consensus)
 
+def zero_coverage_positions(bam_file, ref_file): # type: (str, str) -> Iterable[int]
+    pileup = sh.Command('mpileup')(bam_file, f=ref_file, _iter=True)
+    get_pos = lambda x: int(x.split()[1]) # type: Callable[[str],int]
+    return imap(get_pos, pileup)
+
+#TODO: is pileup 0-based or 1-based index?
+def trim_ref(ref, positions): # type: (str, Iterator[int]) -> str
+    start, end = next(positions), collections.deque(positions, 1)[0]
+    return '-'*start + ref[:start:end] + '-'*(len(ref) - end)
+
+
 
 #@contract(ref_fasta=str, vcf=str, mind=int, majority=int)
 def run(ref_fasta, freebayes_vcf, outfile, mind, majority):
diff --git a/mypy/sh.py b/mypy/sh.py
new file mode 100644
index 0000000..ee8e4e9
--- /dev/null
+++ b/mypy/sh.py
@@ -0,0 +1,3 @@
+from typing import Callable, Any, Union, List, Iterator
+def Command(s): # type: (str) -> Callable[...,Union[List[str],Iterator[str]]]
+    pass

From 272a115ed63eaf7faa8452bf359ea081639c1eed Mon Sep 17 00:00:00 2001
From: Mike Panciera <michael.panciera.work@gmail.com>
Date: Fri, 4 Mar 2016 14:14:32 -0500
Subject: [PATCH 35/41] fix readme formatting

---
 mypy/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mypy/README.md b/mypy/README.md
index aec5641..26906fb 100644
--- a/mypy/README.md
+++ b/mypy/README.md
@@ -1,6 +1,6 @@
 1. Install [mypy](https://github.com/python/mypy#quick-start)
 
-2. Run mypy MYPYPATH=$PWD/mypy:$PWD/mypy/out  mypy --py2 bioframework/consensus.py
+2. Run mypy: `MYPYPATH=$PWD/mypy:$PWD/mypy/out  mypy --py2 bioframework/consensus.py`
 
 If needed, uses `stubgen` to generate more stub files for other libraries.
 

From bbd8af8ee784e0578550deedff94138ae5a30f03 Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedrhcwrair001.amed.ds.army.mil>
Date: Tue, 22 Mar 2016 16:03:40 -0400
Subject: [PATCH 36/41] fix VCFRow instantiation

---
 bioframework/consensus.py | 21 ++++++++++++++-------
 mypy/toolz/dicttoolz.py   |  2 ++
 2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/bioframework/consensus.py b/bioframework/consensus.py
index 84bfdf4..8dd433c 100755
--- a/bioframework/consensus.py
+++ b/bioframework/consensus.py
@@ -24,7 +24,7 @@
 from vcf.model import _Record
 import sh #todo
 #from toolz import compose
-from toolz.dicttoolz import merge, dissoc, merge_with, valfilter #done
+from toolz.dicttoolz import merge, dissoc, merge_with, valfilter, keyfilter #done
 from docopt import docopt #ignore
 from schema import Schema, Use #ignore
 #from contracts import contract, new_contract #can ignore
@@ -36,6 +36,8 @@
                     [('ref', str),
                      ('AO', List[int]),
                      ('DP', int),
+                     ('QA', List[int]),
+                     ('QR', int),
                      ('chrom',str),
                      ('pos', int),
                      ('alt', List[str])])
@@ -43,6 +45,7 @@
 
 MAJORITY_PERCENTAGE = 80
 MIN_DEPTH = 10
+#PMut = NamedTuple('PMut', [('AO', int), ('DP', int), ('QA', int), ('QR', int)])
 Mut = Tuple[str, str, int]
 ###########
 # Reducer #
@@ -52,7 +55,8 @@ def make_consensus(reference, muts):
     # type: (str, List[Mut]) -> Tuple[str, List[Mut]]
     ''' Actually builds a consensus string by recursively applying
           the mutations.'''
-    def _do_build((accString, string, lastPos), (x, y, bigPos)):
+    def _do_build(t1, t2): # type: (Tuple[int,str,int], Tuple[str,str,int]) -> Tuple[str,str,int]
+        (accString, string, lastPos), (x, y, bigPos) = t1, t2
         pos = bigPos - lastPos
         return (accString + (string[:pos] + y), string[pos+len(x):],  bigPos+len(x))
     result, remaining, _ = reduce(_do_build, muts, ('', reference, 0))
@@ -77,8 +81,8 @@ def call_base_multi_alts(min_depth, majority_percentage, dp, alts, ref):
     (call each base) based on the given rules (using call_base)."""
     #TODO: majority_percentage gets ignored, so replace constants
     #TODO: behavior is undefined if sum(AO) > dp.
-    if dp < min_depth: #could call REF here sometimes
-        return 'N'
+#    if dp < min_depth: #could call REF here sometimes
+#        return 'N'
     total_ao = lambda: sum(alts.values()) # avoid evaluating unless necessary
 
     if ref is None: # this is an insert
@@ -121,7 +125,7 @@ def seq_count(acc, ao_and_nts):
         ao, nts = ao_and_nts
         return map(merge_sum, acc, [{nt:ao} for nt in nts])
     # create a list of {base : count}, where the index matches the position
-    mut_dicts = reduce(seq_count, xs, [{}])
+    mut_dicts = reduce(seq_count, xs, [{}]) # type: Iterable[Dict[str,int]]
     base_caller = lambda m,r: call_base_multi_alts(min_depth, majority_percentage, dp, m, r) #   # # ?Callable[[Dict[Any,Any], str], str]
     res = map(base_caller, mut_dicts, ref)
     # trim None values at the end, (which indicate deletion)
@@ -136,8 +140,11 @@ def flatten_vcf_record(rec):
   'pos' : rec.POS, 'chrom' : rec.CHROM},
         rec.INFO)
     if not hasattr(_rec['alt'], '__iter__'): #TODO: put this somewhere else
-        d = merge(_rec, dict(alt=[_rec['alt']], AO=[_rec['AO']]))
+        d = merge(_rec, dict(alt=[_rec['alt']],
+                             AO=[_rec['AO']],
+                             QA=[_rec['QA']]))
     else: d = _rec
+    d = keyfilter(VCFRow._fields.__contains__, d)
     return VCFRow(**d)
 
 ##############
@@ -219,7 +226,7 @@ def run(ref_fasta, freebayes_vcf, outfile, mind, majority):
         outfile.close()
     return 0
 
-def main(): # type () -> None
+def main(): # type: () -> None
     scheme = Schema(
         { '--vcf' : os.path.isfile,
           '--ref' : os.path.isfile,
diff --git a/mypy/toolz/dicttoolz.py b/mypy/toolz/dicttoolz.py
index 6f7178e..22feb1c 100644
--- a/mypy/toolz/dicttoolz.py
+++ b/mypy/toolz/dicttoolz.py
@@ -15,6 +15,8 @@ def merge_with(f, d1, d2): # type: (Callable[[V,V2], V3], Dict[K,V], Dict[K,V2])
 def valfilter(f, d): # type: (Callable[[V], bool], Dict[K,V]) -> Dict[K,V]
   pass
 
+def keyfilter(f, d): # type: (Callable[[K], bool], Dict[K,V]) -> Dict[K,V]
+  pass
 
 
 #from typing import Dict, Any, Callable, TypeVar

From c556a76c34d7aa5c8d721443deec0b7b3742562c Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedrhcwrair001.amed.ds.army.mil>
Date: Wed, 23 Mar 2016 16:52:08 -0400
Subject: [PATCH 37/41] more info in consensus's stuff

---
 bioframework/consensus.py | 128 +++++++++++++++++++++++---------------
 tests/test_consensus.py   |   6 +-
 2 files changed, 83 insertions(+), 51 deletions(-)

diff --git a/bioframework/consensus.py b/bioframework/consensus.py
index 8dd433c..da29f64 100755
--- a/bioframework/consensus.py
+++ b/bioframework/consensus.py
@@ -10,9 +10,12 @@
     --output,-o=<output>       output file [default: ]
 """
 #stdlib
+#TO type-check: (requires python3)
+# $ MYPYPATH=$HOME/bioframework/mypy mypy --py2 --disallow-untyped-calls  bioframework/consensus.py
+
 from operator import itemgetter as get
 from functools import partial
-from itertools import ifilter, imap, groupby, takewhile, repeat, starmap, izip_longest
+from itertools import ifilter, ifilterfalse, imap, groupby, takewhile, repeat, starmap, izip_longest
 import os, sys
 import collections
 
@@ -25,8 +28,8 @@
 import sh #todo
 #from toolz import compose
 from toolz.dicttoolz import merge, dissoc, merge_with, valfilter, keyfilter #done
-from docopt import docopt #ignore
-from schema import Schema, Use #ignore
+#from docopt import docopt #ignore
+#from schema import Schema, Use #ignore
 #from contracts import contract, new_contract #can ignore
 #from mypy.types import VCFRow
 #############
@@ -45,7 +48,7 @@
 
 MAJORITY_PERCENTAGE = 80
 MIN_DEPTH = 10
-#PMut = NamedTuple('PMut', [('AO', int), ('DP', int), ('QA', int), ('QR', int)])
+PMut = NamedTuple('PMut', [('ALT', str), ('AO', int), ('DP', int), ('QA', int), ('QR', int)])
 Mut = Tuple[str, str, int]
 ###########
 # Reducer #
@@ -55,7 +58,7 @@ def make_consensus(reference, muts):
     # type: (str, List[Mut]) -> Tuple[str, List[Mut]]
     ''' Actually builds a consensus string by recursively applying
           the mutations.'''
-    def _do_build(t1, t2): # type: (Tuple[int,str,int], Tuple[str,str,int]) -> Tuple[str,str,int]
+    def _do_build(t1, t2): # type: (Tuple[str,str,int], Tuple[str,str,int]) -> Tuple[str,str,int]
         (accString, string, lastPos), (x, y, bigPos) = t1, t2
         pos = bigPos - lastPos
         return (accString + (string[:pos] + y), string[pos+len(x):],  bigPos+len(x))
@@ -74,7 +77,7 @@ def _do_build(t1, t2): # type: (Tuple[int,str,int], Tuple[str,str,int]) -> Tuple
 
 #@contract(min_depth='number,>=0', majority_percentage='number,>=0,<=100',dp='number,>=0', ref='string|None',alts='dict(string: number)')
 def call_base_multi_alts(min_depth, majority_percentage, dp, alts, ref):
-    # type: (int, int, int, Dict[str, int], str) -> str
+    # type: (int, int, int, List[PMut], str) -> str
     """when there are multiple alts, zip through with each of them
     zip(*alts), character by character. compare the percentages, and
     sum the percentages for each base. (groupby, sum) pick each character
@@ -83,58 +86,81 @@ def call_base_multi_alts(min_depth, majority_percentage, dp, alts, ref):
     #TODO: behavior is undefined if sum(AO) > dp.
 #    if dp < min_depth: #could call REF here sometimes
 #        return 'N'
-    total_ao = lambda: sum(alts.values()) # avoid evaluating unless necessary
+    get_ao = lambda x: x.AO
+    total_ao = sum(map(get_ao, alts))
 
     if ref is None: # this is an insert
-        if total_ao()/float(dp) < .50:
+        if total_ao/float(dp) < .50:
             return ref
         # if the insert is above threshold, keep going and call the insert like a normal base
-    if '-' in alts:
-        if alts['-']/float(dp) > .50: # a deletion
+    is_insert = lambda x: x.ALT == '-' # type: Callable[[PMut],bool]
+    inserts = list(filter(is_insert, alts))
+    if inserts:
+        assert len(inserts) == 1
+        ins = inserts[0]
+        if ins.AO/float(dp) > .50: # a deletion
             return ''
-        dp -= alts['-']
-        alts_without_insert = dissoc(alts, '-')
+        dp -= ins.AO
+        alts_without_insert = list(ifilterfalse(is_insert, alts))
     else:
         alts_without_insert = alts
-    over_depth = lambda x: lambda depth: depth/float(dp) > x
-    picked_alt = valfilter(over_depth(0.8), alts_without_insert)
+    over_depth = lambda x: lambda y: y.AO/float(dp) > x
+    picked_alt = filter(over_depth(0.8), alts_without_insert)
     if picked_alt:
-        return picked_alt.keys()[0]
+        return picked_alt[0].ALT
     #add ref so that it will be considered in creating ambiguous base
-    alts_with_ref = merge(alts_without_insert, ({ref : (dp - total_ao()) } if ref else {}))
-    over20 = valfilter(over_depth(0.2), alts_with_ref)
-    as_ambiguous = ''.join(sorted(over20.keys()))
+    if ref:
+        alts.append(PMut(ALT=ref, AO=(dp-total_ao), DP=dp, QA=-1, QR=-1))
+    over20 = filter(over_depth(0.2), alts)
+    as_ambiguous = ''.join(sorted(map(lambda x: x.ALT, over20)))
     # this could return a single base, (including the reference), becuase i.e.  A => A in the ambiguity table
     return AMBIGUITY_TABLE[as_ambiguous] if as_ambiguous != '' else ''
 
-#@contract(min_depth='number,>=0', majority_percentage='number,>=0,<=100', rec='dict', returns='tuple(string, string, int)')
-def call_many(min_depth, majority_percentage, rec):
-    # type: (int, int, VCFRow) -> Mut
-    #TODO: switch to generators
-    muts = zip(rec.AO, rec.alt)
-    ref, dp, pos = rec.ref, rec.DP, rec.pos
+def pmuts_from_row(row):
+    # type: (VCFRow) -> List[PMut]
+    muts = zip(row.QA, row.AO, row.alt)
     longest_len = max(map(lambda x: len(x[-1]), muts))
-    longest_len = max(longest_len, len(ref))
+    longest_len = max(longest_len, len(row.ref))
     def fill_gap(r):
-        ao, s = r
-        return (ao, str(s) + (longest_len - len(s)) * '-')
-    xs = map(fill_gap, muts) # fill in the shorter alts with '-'.
+        qa, ao, s = r
+        return (qa, ao, str(s) + (longest_len - len(s)) * '-')
     def merge_sum(x,y):
-        return x if y is None else (y if x is None else merge_with(sum, x, y))
+        def combine(tups):
+            return sum(map(get(0), tups)), sum(map(get(1), tups))
+        #return x if y is None else (y if x is None else merge_with(sum, x, y))
+        return x if y is None else (y if x is None else merge_with(combine, x, y))
     def seq_count(acc, ao_and_nts):
-        ao, nts = ao_and_nts
-        return map(merge_sum, acc, [{nt:ao} for nt in nts])
+        qa, ao, nts = ao_and_nts
+        #return map(merge_sum, acc, [{nt:ao, 'qa': qa} for nt in nts])
+        return map(merge_sum, acc, [{nt:(ao, qa)} for nt in nts])
+    xs = map(fill_gap, muts) # fill in the shorter alts with '-'.
     # create a list of {base : count}, where the index matches the position
     mut_dicts = reduce(seq_count, xs, [{}]) # type: Iterable[Dict[str,int]]
-    base_caller = lambda m,r: call_base_multi_alts(min_depth, majority_percentage, dp, m, r) #   # # ?Callable[[Dict[Any,Any], str], str]
-    res = map(base_caller, mut_dicts, ref)
+    def to_pmuts(d): # type: (Dict[str,int]) -> List[PMut]
+        #qa = d.pop('qa')
+        #assert len(d.values()) == 1
+        make = lambda k,v: PMut(k, AO=v[0], QA=v[1], DP=row.DP,QR=row.QR)
+        return list(starmap(make, d.items()))
+#        res = PMut(d.keys()[0], d.values()[0], row.DP, qa, row.QR)
+#        return res
+    res = list(map(to_pmuts, mut_dicts))
+    return res
+
+def call_many(min_depth, majority_percentage, rec):
+    # type: (int, int, VCFRow) -> Mut
+    muts_by_column = pmuts_from_row(rec)
+    #TODO: switch to generators
+    base_caller = lambda m,r: call_base_multi_alts(
+        min_depth, majority_percentage, rec.DP, m, r) #   # # ?Callable[[Dict[Any,Any], str], str]
+    res = map(base_caller, muts_by_column, rec.ref)
     # trim None values at the end, (which indicate deletion)
     result = takewhile(bool, res)
-    return (ref, ''.join(result), pos)
+    return (rec.ref, ''.join(result), rec.pos)
 
 #@contract(rec='dict',returns='dict')
 def flatten_vcf_record(rec):
     # type: (_Record) -> VCFRow
+    fields = ['ref', 'AO', 'DP', 'QA', 'QR', 'chrom' 'pos', 'alt']
     _rec = merge({
   'alt' : rec.ALT, 'ref' : rec.REF,
   'pos' : rec.POS, 'chrom' : rec.CHROM},
@@ -144,7 +170,7 @@ def flatten_vcf_record(rec):
                              AO=[_rec['AO']],
                              QA=[_rec['QA']]))
     else: d = _rec
-    d = keyfilter(VCFRow._fields.__contains__, d)
+    d = keyfilter(fields.__contains__, d)
     return VCFRow(**d)
 
 ##############
@@ -226,17 +252,21 @@ def run(ref_fasta, freebayes_vcf, outfile, mind, majority):
         outfile.close()
     return 0
 
-def main(): # type: () -> None
-    scheme = Schema(
-        { '--vcf' : os.path.isfile,
-          '--ref' : os.path.isfile,
-          '--majority' : Use(int),
-          '--mind' : Use(int),
-          '--output' : Use(lambda x: sys.stdout if not x else open(x, 'w'))})
-    raw_args = docopt(__doc__, version='Version 1.0')
-    args = scheme.validate(raw_args)
-    run(args['--ref'], args['--vcf'], args['--output'],
-        args['--mind'], args['--output'])
-
-if __name__ == '__main__':
-    main()
+
+
+
+
+#def main(): # type: () -> None
+#    scheme = Schema(
+#        { '--vcf' : os.path.isfile,
+#          '--ref' : os.path.isfile,
+#          '--majority' : Use(int),
+#          '--mind' : Use(int),
+#          '--output' : Use(lambda x: sys.stdout if not x else open(x, 'w'))})
+#    raw_args = docopt(__doc__, version='Version 1.0')
+#    args = scheme.validate(raw_args)
+#    run(args['--ref'], args['--vcf'], args['--output'],
+#        args['--mind'], args['--output'])
+
+#if __name__ == '__main__':
+#    main()
diff --git a/tests/test_consensus.py b/tests/test_consensus.py
index 955e086..54ec1a1 100644
--- a/tests/test_consensus.py
+++ b/tests/test_consensus.py
@@ -2,6 +2,7 @@
 from biotest.biohypothesis import ref_with_vcf_dicts_strategy_factory, \
     make_seqrec, vcf_dict_strategy_factory
 from hypothesis import given
+from collections import Counter
 #from fn import _
 from hypothesis import strategies as st
 from hypothesis import given, assume
@@ -84,8 +85,9 @@ def test_single_example(self):
         result = just_ref([ref], muts, 10, 80)
         self.assertEquals(expected, result)
 ref_with_vcf_dicts_strategy = ref_with_vcf_dicts_strategy_factory().map(
-    lambda (r, muts): (make_seqrec(muts[0]['chrom'], r), map(lambda d: VCFRow(**d), muts)))
-from collections import Counter
+    #lambda (r, muts): (make_seqrec(muts[0]['chrom'], r), map(lambda d: VCFRow(**d), muts)))
+    lambda tup: (make_seqrec(tup[1][0]['chrom'], tup[0]), map(lambda d: VCFRow(QA=pos_int,QR=pos_int,**d), tup[1])))
+
 countof = lambda c: lambda x: Counter(x).get(c, 0)
 def run_cons(*args):
     _, alt_and_cons = all_consensuses(*args)

From e770b20902469183baafceb710684675a6004e31 Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedrhcwrair001.amed.ds.army.mil>
Date: Wed, 23 Mar 2016 17:35:09 -0400
Subject: [PATCH 38/41] add doc for low-depth handling

---
 bioframework/consensus.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/bioframework/consensus.py b/bioframework/consensus.py
index da29f64..fc40b91 100755
--- a/bioframework/consensus.py
+++ b/bioframework/consensus.py
@@ -86,6 +86,21 @@ def call_base_multi_alts(min_depth, majority_percentage, dp, alts, ref):
     #TODO: behavior is undefined if sum(AO) > dp.
 #    if dp < min_depth: #could call REF here sometimes
 #        return 'N'
+
+#    """
+#    1) "if the total quality of the alternates is < 200 (25 * 8), don't call it an alternate."
+#    2) "if the total quality of the references is < 200, don't call the reference."
+#    3) "if neither the reference nor the alternate can be called, call an N."
+#    """
+#    altsTooLow = sum(map(lambda x: x.QA)) < 200
+#    refTooLow = alts[0].QR < 200
+#
+#    if altsTooLow and refTooLow:
+#        return 'N'
+#
+#    elif altsTooLow:
+#        return ref
+
     get_ao = lambda x: x.AO
     total_ao = sum(map(get_ao, alts))
 

From c77a809b82b5f8f1ab56fac99fea82077e45f076 Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedrhcwrair001.amed.ds.army.mil>
Date: Wed, 23 Mar 2016 17:35:54 -0400
Subject: [PATCH 39/41] restore lowdepth->N so tests can maybe pass

---
 bioframework/consensus.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bioframework/consensus.py b/bioframework/consensus.py
index fc40b91..2ebcb58 100755
--- a/bioframework/consensus.py
+++ b/bioframework/consensus.py
@@ -84,8 +84,8 @@ def call_base_multi_alts(min_depth, majority_percentage, dp, alts, ref):
     (call each base) based on the given rules (using call_base)."""
     #TODO: majority_percentage gets ignored, so replace constants
     #TODO: behavior is undefined if sum(AO) > dp.
-#    if dp < min_depth: #could call REF here sometimes
-#        return 'N'
+    if dp < min_depth: #could call REF here sometimes
+        return 'N'
 
 #    """
 #    1) "if the total quality of the alternates is < 200 (25 * 8), don't call it an alternate."

From 101f5aa51040491927e09dbb9849cb06b4f7cbfb Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedrhcwrair001.amed.ds.army.mil>
Date: Wed, 23 Mar 2016 17:42:07 -0400
Subject: [PATCH 40/41] fixed duplcate args error

---
 tests/test_consensus.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_consensus.py b/tests/test_consensus.py
index 54ec1a1..932bf81 100644
--- a/tests/test_consensus.py
+++ b/tests/test_consensus.py
@@ -86,7 +86,7 @@ def test_single_example(self):
         self.assertEquals(expected, result)
 ref_with_vcf_dicts_strategy = ref_with_vcf_dicts_strategy_factory().map(
     #lambda (r, muts): (make_seqrec(muts[0]['chrom'], r), map(lambda d: VCFRow(**d), muts)))
-    lambda tup: (make_seqrec(tup[1][0]['chrom'], tup[0]), map(lambda d: VCFRow(QA=pos_int,QR=pos_int,**d), tup[1])))
+    lambda tup: (make_seqrec(tup[1][0]['chrom'], tup[0]), map(lambda d: VCFRow(**d), tup[1])))
 
 countof = lambda c: lambda x: Counter(x).get(c, 0)
 def run_cons(*args):

From 97958974b59542fc96b10dc26a91a8dc3ed50d8b Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedrhcwrair001.amed.ds.army.mil>
Date: Mon, 17 Apr 2017 13:52:06 -0400
Subject: [PATCH 41/41] ignroe

---
 newfile | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 newfile

diff --git a/newfile b/newfile
new file mode 100644
index 0000000..e69de29