diff --git a/.gitignore b/.gitignore
index 3bbebb7..c71be5a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,3 +13,6 @@
 /*/*/__pycache__
 /*/*/*/__pycache__
 doc/sphinx/_build
+/test/tmp.*
+/test/hg38.fa
+/test/hg38.fa.fai
diff --git a/README b/README.md
similarity index 87%
rename from README
rename to README.md
index 89f1fcf..1af04f3 100644
--- a/README
+++ b/README.md
@@ -1,3 +1,15 @@
+<!-- vim-markdown-toc GFM -->
+
+* [CAVA v1.2.3 README](#cava-v123-readme)
+    * [1 INTRODUCTION](#1-introduction)
+    * [2 PUBLICATION](#2-publication)
+    * [3 DEPENDENCIES](#3-dependencies)
+    * [4 INSTALLATION ON LINUX OR MAC](#4-installation-on-linux-or-mac)
+    * [5 RUNNING CAVA](#5-running-cava)
+    * [6 DOCUMENTATION](#6-documentation)
+    * [7 LICENCE](#7-licence)
+
+<!-- vim-markdown-toc -->
 
 CAVA v1.2.3 README
 ==================
@@ -22,7 +34,7 @@ Márton Münz, Elise Ruark, Anthony Renwick, Emma Ramsay, Matthew Clarke, Shazia
 --------------
 
 To install and run CAVA v1.2.3 you will need the following dependencies installed:
-- Python 2.7.9 or later (Python2 series)
+- Python 3
 - GCC and GNU make
 - virtualenv
 
diff --git a/bin/CAVA.py b/bin/CAVA.py
index 102b625..7ddb738 100755
--- a/bin/CAVA.py
+++ b/bin/CAVA.py
@@ -1,11 +1,11 @@
-#!env/bin/python
+#!env/bin/python3
 
 from optparse import OptionParser
 from cava_ import main
 from cava_ import helper
 
 # Version
-version = 'v1.2.3'
+version = 'v2.0.0'
 
 # Read default configuration file name from the default_config_path file
 default_config_file = helper.defaultConfigPath()
diff --git a/bin/EnsemblDB.py b/bin/EnsemblDB.py
index 69fac2b..0dfadaa 100644
--- a/bin/EnsemblDB.py
+++ b/bin/EnsemblDB.py
@@ -1,10 +1,10 @@
-#!env/bin/python
+#!env/bin/python3
 
 from optparse import OptionParser
 from ensembldb import main
 
 # Version
-version = '1.2.3'
+version = '2.0.0'
 
 # Command line argument parsing
 descr = 'CAVA ensembl_db v' + version
diff --git a/bin/dbSNPDB.py b/bin/dbSNPDB.py
index 62893fe..47d3be5 100755
--- a/bin/dbSNPDB.py
+++ b/bin/dbSNPDB.py
@@ -1,4 +1,4 @@
-#!env/bin/python
+#!env/bin/python3
 
 import os
 import sys
@@ -54,7 +54,7 @@ def processData(options, datafile, IDs):
 
     sys.stdout.write('\rProcessing SNPs on all chrs ... OK')
     sys.stdout.flush()
-    print ''
+    print('')
     datafile.close()
     outfile.close()
     return counter
@@ -83,16 +83,16 @@ def printMetaData(datafile):
         line = line.strip()
         if line.startswith('#'):
             if line.startswith('##dbSNP_BUILD_ID='):
-                print 'Input file build: '+line[17:]
+                print('Input file build: '+line[17:])
                 ret = int(line[17:])
-            if line.startswith('##reference='): print 'Reference: '+line[12:]+'\n'
+            if line.startswith('##reference='): print('Reference: '+line[12:]+'\n')
         else:
             return ret
 
 #######################################################################################################################
 
 # Version
-version = '1.2.3'
+version = '2.0.0'
 
 descr =  'CAVA dbsnp_db v' + version
 epilog = '\nExample usage: CAVA-{}/dbsnp_db -d 00-All.vcf.gz -s 138 -o out\n\n'.format(version)
@@ -106,39 +106,39 @@ def printMetaData(datafile):
 
 # Check options
 if options.release is None:
-    print '\nError: no dbSNP release specified'
-    print 'Please use option -s to specify dbSNP release version\n'
+    print('\nError: no dbSNP release specified')
+    print('Please use option -s to specify dbSNP release version\n')
     quit()
 if options.data is None:
-    print '\nError: no 00-All.vcf.gz data file specified'
-    print 'Please use option -d to specify path to data file\n'
+    print('\nError: no 00-All.vcf.gz data file specified')
+    print('Please use option -d to specify path to data file\n')
     quit()
 if not os.path.isfile(options.data):
-    print '\nError: 00-All.vcf.gz file (' + options.data + ') cannot be found.\n'
+    print('\nError: 00-All.vcf.gz file (' + options.data + ') cannot be found.\n')
     quit()
 
 # Print out version information
-print "\n---------------------------------------------------------------------------------------"
-print 'CAVA ' + version + ' dbSNP database preparation tool (dbsnp_db) is now running.'
-print 'Started: ', datetime.datetime.now(), '\n'
+print("\n---------------------------------------------------------------------------------------")
+print('CAVA ' + version + ' dbSNP database preparation tool (dbsnp_db) is now running.')
+print('Started: ', datetime.datetime.now(), '\n')
 
 # Open data fie for reading
-datafile = gzip.open(options.data, 'r')
+datafile = gzip.open(options.data, 'rt')
 
 # Print out meta data
 build = printMetaData(datafile)
 datafile.seek(0)
 if int(options.release) > build:
-    print 'Error: requested release must be <=' + str(build) + '\n'
+    print('Error: requested release must be <=' + str(build) + '\n')
     quit()
 
 # Print out info
-print 'Requested dbSNP release: ' + str(options.release)
+print('Requested dbSNP release: ' + str(options.release))
 IDs = []
 if options.input is not None:
     IDs = readIDs(options.input)
-    print '\nInput file contains ' + str(len(IDs)) + ' dbSNP IDs to be included in the database'
-print ''
+    print('\nInput file contains ' + str(len(IDs)) + ' dbSNP IDs to be included in the database')
+print('')
 
 # Create compressed output file
 N = processData(options, datafile, IDs)
@@ -150,13 +150,13 @@ def printMetaData(datafile):
 os.remove(options.output)
 
 # Print out summary information
-print '\nA total of ' + str(N) + ' SNPs have been retrieved\n'
-print '---------------------'
-print 'Output files created:'
-print '---------------------'
-print options.output + '.gz (SNP database)'
-print options.output + '.gz.tbi (index file)'
-print ''
-print 'CAVA dbsnp_db successfully finished: ', datetime.datetime.now()
-print "---------------------------------------------------------------------------------------\n"
+print('\nA total of ' + str(N) + ' SNPs have been retrieved\n')
+print('---------------------')
+print('Output files created:')
+print('---------------------')
+print(options.output + '.gz (SNP database)')
+print(options.output + '.gz.tbi (index file)')
+print('')
+print('CAVA dbsnp_db successfully finished: ', datetime.datetime.now())
+print("---------------------------------------------------------------------------------------\n")
 
diff --git a/cava_/conseq.py b/cava_/conseq.py
index 2b10616..01b5d64 100755
--- a/cava_/conseq.py
+++ b/cava_/conseq.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 
 # CLASS annotation
@@ -219,4 +219,4 @@ def checkUTR(transcript, variant):
         y = variant.pos + len(variant.ref) - 1
     if transcript.isPositionOutsideCDS_5prime(x) or transcript.isPositionOutsideCDS_5prime(y): return 'UTR5'
     if transcript.isPositionOutsideCDS_3prime(x) or transcript.isPositionOutsideCDS_3prime(y): return 'UTR3'
-    return ''
\ No newline at end of file
+    return ''
diff --git a/cava_/core.py b/cava_/core.py
index 9049b54..024285d 100755
--- a/cava_/core.py
+++ b/cava_/core.py
@@ -1,11 +1,11 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 
 # Collection of basic classes and functions
 #######################################################################################################################
 
 
-from __future__ import division
+
 import os
 import logging
 import gzip
@@ -342,7 +342,7 @@ def output(self, outformat, outfile, options, genelist, transcriptlist, snplist,
 
             # Writing record to the output file
             if stdout:
-                print '\t'.join(record)
+                print('\t'.join(record))
             else:
                 outfile.write('\t'.join(record) + '\n')
 
@@ -381,7 +381,7 @@ def output(self, outformat, outfile, options, genelist, transcriptlist, snplist,
 
                     # Writing record to the output file
                     if stdout:
-                        print record + rest
+                        print(record + rest)
                     else:
                         outfile.write(record + rest + '\n')
 
@@ -748,13 +748,13 @@ def read(self):
             line = line.strip()
             if line.startswith('@'):
                 key = line[1:line.index('=')].strip()
-                if key in self.defs.keys():
+                if key in list(self.defs.keys()):
                     (typeofvar, default) = self.defs[key]
                     if typeofvar == 'string': self.args[key] = line[line.find('=') + 1:].strip()
                     if typeofvar == 'list': self.args[key] = line[line.find('=') + 1:].strip().split(',')
                     if typeofvar == 'boolean': self.args[key] = (line[line.find('=') + 1:].strip().upper() == 'TRUE')
-        for key, (typeofvar, default) in self.defs.iteritems():
-            if not key in self.args.keys(): self.args[key] = default
+        for key, (typeofvar, default) in self.defs.items():
+            if not key in list(self.args.keys()): self.args[key] = default
 
 
 #######################################################################################################################            
@@ -764,7 +764,7 @@ def read(self):
 # Reading gene, transcript or snp list from file
 def readSet(options, tag):
     ret = set()
-    if tag in options.args.keys() and not (options.args[tag] == '' or options.args[tag] == '.'):
+    if tag in list(options.args.keys()) and not (options.args[tag] == '' or options.args[tag] == '.'):
         for line in open(options.args[tag]):
             line = line.strip()
             if line == '' or line == '.': continue
@@ -807,7 +807,7 @@ def writeHeader(options, header, outfile, stdout):
     if options.args['outputformat'] == 'VCF':
         if header == '':
             if stdout:
-                print '##fileformat=VCFv4.1\n' + dateline + '\n' + headerinfo + '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO'
+                print('##fileformat=VCFv4.1\n' + dateline + '\n' + headerinfo + '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO')
             else:
                 outfile.write('##fileformat=VCFv4.1\n' + dateline + '\n' + headerinfo + '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n')
         else:
@@ -821,7 +821,7 @@ def writeHeader(options, header, outfile, stdout):
                     continue
                 headerm.append(x)
             if stdout:
-                print startline + '\n' + dateline + '\n' + headerinfo + '\n'.join(headerm)+'\n'
+                print(startline + '\n' + dateline + '\n' + headerinfo + '\n'.join(headerm)+'\n')
             else:
                 outfile.write(startline + '\n' + dateline + '\n' + headerinfo + '\n'.join(headerm)+'\n')
 
@@ -845,7 +845,7 @@ def writeHeader(options, header, outfile, stdout):
             str += '\tDBSNP'
 
         if stdout:
-            print str
+            print(str)
         else:
             outfile.write(str + '\n')
 
@@ -854,7 +854,7 @@ def writeHeader(options, header, outfile, stdout):
 def countRecords(filename):
     ret = 0
     if filename.endswith('.gz'):
-        inputf = gzip.open(filename, 'r')
+        inputf = gzip.open(filename, 'rt')
     else:
         inputf = open(filename)
     for line in inputf:
@@ -868,10 +868,10 @@ def checkOptions(options):
     # Checking if @inputformat was given correct value
     str = options.args['inputformat'].upper()
     if not (str == 'VCF' or str == 'TXT'):
-        print 'ERROR: incorrect value of the tag @inputformat.'
-        print '(Allowed values: \'VCF\' or \'TXT\')'
-        print '\nNo output file written. CAVA quit.'
-        print "--------------------------------------------------------------------\n"
+        print('ERROR: incorrect value of the tag @inputformat.')
+        print('(Allowed values: \'VCF\' or \'TXT\')')
+        print('\nNo output file written. CAVA quit.')
+        print("--------------------------------------------------------------------\n")
         if options.args['logfile']:
             logging.error('Incorrect value of the tag @inputformat.')
             logging.info('No output file written. CAVA quit.')
@@ -880,10 +880,10 @@ def checkOptions(options):
     # Checking if @outputformat was given correct value
     str = options.args['outputformat'].upper()
     if not (str == 'VCF' or str == 'TSV'):
-        print 'ERROR: incorrect value of the tag @outputformat.'
-        print '(Allowed values: \'VCF\' or \'TSV\')'
-        print '\nNo output file written. CAVA quit.'
-        print "--------------------------------------------------------------------\n"
+        print('ERROR: incorrect value of the tag @outputformat.')
+        print('(Allowed values: \'VCF\' or \'TSV\')')
+        print('\nNo output file written. CAVA quit.')
+        print("--------------------------------------------------------------------\n")
         if options.args['logfile']:
             logging.error('Incorrect value of the tag @outputformat.')
             logging.info('No output file written. CAVA quit.')
@@ -893,10 +893,10 @@ def checkOptions(options):
     str = options.args['type'].upper()
     if not (
                                     str == 'ALL' or str == 'SUBSTITUTION' or str == 'INDEL' or str == 'INSERTION' or str == 'DELETION' or str == 'COMPLEX'):
-        print 'ERROR: incorrect value of the tag @type.'
-        print '(Allowed values: \'all\', \'substitution\', \'indel\', \'insertion\', \'deletion\' or \'complex\')'
-        print '\nNo output file written. CAVA quit.'
-        print "--------------------------------------------------------------------\n"
+        print('ERROR: incorrect value of the tag @type.')
+        print('(Allowed values: \'all\', \'substitution\', \'indel\', \'insertion\', \'deletion\' or \'complex\')')
+        print('\nNo output file written. CAVA quit.')
+        print("--------------------------------------------------------------------\n")
         if options.args['logfile']:
             logging.error('Incorrect value of the tag @type.')
             logging.info('No output file written. CAVA quit.')
@@ -905,10 +905,10 @@ def checkOptions(options):
     # Checking if @ssrange was given correct value
     ssrange = int(options.args['ssrange'])
     if not ssrange >= 6:
-        print 'ERROR: incorrect value of the tag @ssrange.'
-        print '(Minimum value allowed is 6.)'
-        print '\nNo output file written. CAVA quit.'
-        print "--------------------------------------------------------------------\n"
+        print('ERROR: incorrect value of the tag @ssrange.')
+        print('(Minimum value allowed is 6.)')
+        print('\nNo output file written. CAVA quit.')
+        print("--------------------------------------------------------------------\n")
         if options.args['logfile']:
             logging.error('Incorrect value of the tag @ssrange.')
             logging.info('No output file written. CAVA quit.')
@@ -917,10 +917,10 @@ def checkOptions(options):
     # Checking if @ontology was given correct value
     str = options.args['ontology'].upper()
     if not (str == 'CLASS' or str == 'SO' or str == 'BOTH'):
-        print 'ERROR: incorrect value of the tag @ontology.'
-        print '(Allowed values: \'CLASS\' or \'SO\' or \'both\')'
-        print '\nNo output file written. CAVA quit.'
-        print "--------------------------------------------------------------------\n"
+        print('ERROR: incorrect value of the tag @ontology.')
+        print('(Allowed values: \'CLASS\' or \'SO\' or \'both\')')
+        print('\nNo output file written. CAVA quit.')
+        print("--------------------------------------------------------------------\n")
         if options.args['logfile']:
             logging.error('Incorrect value of the tag @ontology.')
             logging.info('No output file written. CAVA quit.')
@@ -928,9 +928,9 @@ def checkOptions(options):
 
     # Checking if @reference file exists
     if not os.path.isfile(options.args['reference']):
-        print 'ERROR: the file given as @reference does not exist.'
-        print '\nNo output file written. CAVA quit.'
-        print "--------------------------------------------------------------------\n"
+        print('ERROR: the file given as @reference does not exist.')
+        print('\nNo output file written. CAVA quit.')
+        print("--------------------------------------------------------------------\n")
         if options.args['logfile']:
             logging.error('The file given as @reference does not exist.')
             logging.info('No output file written. CAVA quit.')
@@ -938,9 +938,9 @@ def checkOptions(options):
 
     # Checking if @reference index file exists
     if not os.path.isfile(options.args['reference'] + '.fai'):
-        print 'ERROR: the .fa.fai index file for @reference is not found.'
-        print '\nNo output file written. CAVA quit.'
-        print "--------------------------------------------------------------------\n"
+        print('ERROR: the .fa.fai index file for @reference is not found.')
+        print('\nNo output file written. CAVA quit.')
+        print("--------------------------------------------------------------------\n")
         if options.args['logfile']:
             logging.error('The .fa.fai index file for @reference is not found.')
             logging.info('No output file written. CAVA quit.')
@@ -949,9 +949,9 @@ def checkOptions(options):
     # Checking if @ensembl file exists
     if not (options.args['ensembl'] == '.' or options.args['ensembl'] == '') and not os.path.isfile(
             options.args['ensembl']):
-        print 'ERROR: the file given as @ensembl does not exist.'
-        print '\nNo output file written. CAVA quit.'
-        print "--------------------------------------------------------------------\n"
+        print('ERROR: the file given as @ensembl does not exist.')
+        print('\nNo output file written. CAVA quit.')
+        print("--------------------------------------------------------------------\n")
         if options.args['logfile']:
             logging.error('The file given as @ensembl does not exist.')
             logging.info('No output file written. CAVA quit.')
@@ -960,9 +960,9 @@ def checkOptions(options):
     # Checking if @ensembl index file exists
     if not (options.args['ensembl'] == '.' or options.args['ensembl'] == '') and not os.path.isfile(
                     options.args['ensembl'] + '.tbi'):
-        print 'ERROR: the .gz.tbi index file for @ensembl is not found.'
-        print '\nNo output file written. CAVA quit.'
-        print "--------------------------------------------------------------------\n"
+        print('ERROR: the .gz.tbi index file for @ensembl is not found.')
+        print('\nNo output file written. CAVA quit.')
+        print("--------------------------------------------------------------------\n")
         if options.args['logfile']:
             logging.error('The .gz.tbi index file for @ensembl is not found.')
             logging.info('No output file written. CAVA quit.')
@@ -970,9 +970,9 @@ def checkOptions(options):
 
     # Checking if @dbsnp file exists
     if not (options.args['dbsnp'] == '.' or options.args['dbsnp'] == '') and not os.path.isfile(options.args['dbsnp']):
-        print 'ERROR: the file given as @dbsnp does not exist.'
-        print '\nNo output file written. CAVA quit.'
-        print "--------------------------------------------------------------------\n"
+        print('ERROR: the file given as @dbsnp does not exist.')
+        print('\nNo output file written. CAVA quit.')
+        print("--------------------------------------------------------------------\n")
         if options.args['logfile']:
             logging.error('The file given as @dbsnp does not exist.')
             logging.info('No output file written. CAVA quit.')
@@ -981,9 +981,9 @@ def checkOptions(options):
     # Checking if @dbsnp index file exists
     if not (options.args['dbsnp'] == '.' or options.args['dbsnp'] == '') and not os.path.isfile(
                     options.args['dbsnp'] + '.tbi'):
-        print 'ERROR: the .gz.tbi index file for @dbsnp is not found.'
-        print '\nNo output file written. CAVA quit.'
-        print "--------------------------------------------------------------------\n"
+        print('ERROR: the .gz.tbi index file for @dbsnp is not found.')
+        print('\nNo output file written. CAVA quit.')
+        print("--------------------------------------------------------------------\n")
         if options.args['logfile']:
             logging.error('The .gz.tbi index file for @dbsnp is not found.')
             logging.info('No output file written. CAVA quit.')
@@ -992,9 +992,9 @@ def checkOptions(options):
     # Checking if @target file exists
     if not (options.args['target'] == '.' or options.args['target'] == '') and not os.path.isfile(
             options.args['target']):
-        print 'ERROR: the file given as @target does not exist.'
-        print '\nNo output file written. CAVA quit.'
-        print "--------------------------------------------------------------------\n"
+        print('ERROR: the file given as @target does not exist.')
+        print('\nNo output file written. CAVA quit.')
+        print("--------------------------------------------------------------------\n")
         if options.args['logfile']:
             logging.error('The file given as @target does not exist.')
             logging.info('No output file written. CAVA quit.')
@@ -1003,9 +1003,9 @@ def checkOptions(options):
     # Checking if @target index file exists
     if not (options.args['target'] == '.' or options.args['target'] == '') and not os.path.isfile(
                     options.args['target'] + '.tbi'):
-        print 'ERROR: the .bed.tbi index file for @target is not found.'
-        print '\nNo output file written. CAVA quit.'
-        print "--------------------------------------------------------------------\n"
+        print('ERROR: the .bed.tbi index file for @target is not found.')
+        print('\nNo output file written. CAVA quit.')
+        print("--------------------------------------------------------------------\n")
         if options.args['logfile']:
             logging.error('The .bed.tbi index file for @target is not found.')
             logging.info('No output file written. CAVA quit.')
@@ -1014,9 +1014,9 @@ def checkOptions(options):
     # Checking if @genelist file exists
     if not (options.args['genelist'] == '.' or options.args['genelist'] == '') and not os.path.isfile(
             options.args['genelist']):
-        print 'ERROR: the file given as @genelist does not exist.'
-        print '\nNo output file written. CAVA quit.'
-        print "--------------------------------------------------------------------\n"
+        print('ERROR: the file given as @genelist does not exist.')
+        print('\nNo output file written. CAVA quit.')
+        print("--------------------------------------------------------------------\n")
         if options.args['logfile']:
             logging.error('The file given as @genelist does not exist.')
             logging.info('No output file written. CAVA quit.')
@@ -1025,9 +1025,9 @@ def checkOptions(options):
     # Checking if @transcriptlist file exists
     if not (options.args['transcriptlist'] == '.' or options.args['transcriptlist'] == '') and not os.path.isfile(
             options.args['transcriptlist']):
-        print 'ERROR: the file given as @transcriptlist does not exist.'
-        print '\nNo output file written. CAVA quit.'
-        print "--------------------------------------------------------------------\n"
+        print('ERROR: the file given as @transcriptlist does not exist.')
+        print('\nNo output file written. CAVA quit.')
+        print("--------------------------------------------------------------------\n")
         if options.args['logfile']:
             logging.error('The file given as @transcriptlist does not exist.')
             logging.info('No output file written. CAVA quit.')
@@ -1036,9 +1036,9 @@ def checkOptions(options):
     # Checking if @snplist file exists
     if not (options.args['snplist'] == '.' or options.args['snplist'] == '') and not os.path.isfile(
             options.args['snplist']):
-        print 'ERROR: the file given as @snplist does not exist.'
-        print '\nNo output file written. CAVA quit.'
-        print "--------------------------------------------------------------------\n"
+        print('ERROR: the file given as @snplist does not exist.')
+        print('\nNo output file written. CAVA quit.')
+        print("--------------------------------------------------------------------\n")
         if options.args['logfile']:
             logging.error('The file given as @snplist does not exist.')
             logging.info('No output file written. CAVA quit.')
diff --git a/cava_/csn.py b/cava_/csn.py
index 89fc1e5..77ea7ca 100755
--- a/cava_/csn.py
+++ b/cava_/csn.py
@@ -1,10 +1,10 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 
 # CSN annotation
 #######################################################################################################################
 
-import core
+from . import core
 
 
 # Class representing a CSN annotation
@@ -175,7 +175,7 @@ def makeProteinString(variant, transcript, reference, prot, mutprot, coord1):
 
     # Checking if there was no change in protein sequence
     if prot == mutprot:
-        idx = coord1 / 3
+        idx = int(coord1 / 3)
         if coord1 % 3 > 0: idx += 1
         return '_p.=', (str(idx), prot[idx-1], prot[idx-1])
 
@@ -281,7 +281,7 @@ def transformToCSNCoordinate(pos, transcript):
                 if transcript.strand == 1:
                     # Checking if genomic position is within intron
                     if prevExonEnd < pos < exon.start + 1:
-                        if pos <= (exon.start + 1 - prevExonEnd) / 2 + prevExonEnd:
+                        if pos <= int((exon.start + 1 - prevExonEnd) / 2) + prevExonEnd:
                             x, y = transformToCSNCoordinate(prevExonEnd, transcript)
                             return x, pos - prevExonEnd
                         else:
@@ -290,7 +290,7 @@ def transformToCSNCoordinate(pos, transcript):
                 else:
                     # Checking if genomic position is within intron
                     if exon.end < pos < prevExonEnd:
-                        if pos >= (prevExonEnd - exon.end + 1) / 2 + exon.end:
+                        if pos >= int((prevExonEnd - exon.end + 1) / 2) + exon.end:
                             x, y = transformToCSNCoordinate(prevExonEnd, transcript)
                             return x, prevExonEnd - pos
                         else:
@@ -320,7 +320,7 @@ def transformToCSNCoordinate(pos, transcript):
                 if transcript.strand == 1:
                     # Checking if genomic position is within intron
                     if prevExonEnd < pos < exon.start + 1:
-                        if pos <= (exon.start + 1 - prevExonEnd) / 2 + prevExonEnd:
+                        if pos <= int((exon.start + 1 - prevExonEnd) / 2) + prevExonEnd:
                             x, y = transformToCSNCoordinate(prevExonEnd, transcript)
                             return x, pos - prevExonEnd
                         else:
@@ -329,7 +329,7 @@ def transformToCSNCoordinate(pos, transcript):
                 else:
                     # Checking if genomic position is within intron
                     if exon.end < pos < prevExonEnd:
-                        if pos >= (prevExonEnd - exon.end + 1) / 2 + exon.end:
+                        if pos >= int((prevExonEnd - exon.end + 1) / 2) + exon.end:
                             x, y = transformToCSNCoordinate(prevExonEnd, transcript)
                             return x, prevExonEnd - pos
                         else:
diff --git a/cava_/data.py b/cava_/data.py
index 5fa95ef..7d03f72 100755
--- a/cava_/data.py
+++ b/cava_/data.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 
 # Classes providing interfaces with annotation databases and the reference genome
@@ -7,9 +7,9 @@
 import os
 import sys
 
-import conseq
-import core
-import csn
+from . import conseq
+from . import core
+from . import csn
 
 sys.path.insert(0, os.path.dirname(os.path.realpath(__file__)) + '/pysamdir')
 import pysam
@@ -69,22 +69,22 @@ def findTranscripts(self, variant, strand, reference):
                 hitdict2[transcript.TRANSCRIPT] = transcript
 
             # Find transcripts with which the variant fully or partially overlaps
-            for key, transcript in hitdict1.iteritems():
+            for key, transcript in hitdict1.items():
 
                 if len(self.genelist) > 0 and transcript.geneSymbol not in self.genelist: continue
                 if len(self.transcriptlist) > 0 and transcript.TRANSCRIPT not in self.transcriptlist: continue
 
-                if key in hitdict2.keys():
+                if key in list(hitdict2.keys()):
                     ret[key] = transcript
                 else:
                     if not variant.isInsertion(): retOUT[key] = transcript
 
             if not variant.isInsertion():
-                for key, transcript in hitdict2.iteritems():
+                for key, transcript in hitdict2.items():
                     if len(self.genelist) > 0 and transcript.geneSymbol not in self.genelist: continue
                     if len(self.transcriptlist) > 0 and transcript.TRANSCRIPT not in self.transcriptlist: continue
 
-                    if not key in hitdict1.keys():
+                    if not key in list(hitdict1.keys()):
                         retOUT[key] = transcript
 
         else:
@@ -190,8 +190,8 @@ def annotate(self, variant, reference, impactdir):
         transcripts_plus, transcriptsOUT_plus = self.findTranscripts(variant_plus, 1, reference)
         transcripts_minus, transcriptsOUT_minus = self.findTranscripts(variant_minus, -1, reference)
 
-        transcripts = set(transcripts_plus.keys() + transcripts_minus.keys())
-        transcriptsOUT = set(transcriptsOUT_plus.keys() + transcriptsOUT_minus.keys())
+        transcripts = set(list(transcripts_plus.keys()) + list(transcripts_minus.keys()))
+        transcriptsOUT = set(list(transcriptsOUT_plus.keys()) + list(transcriptsOUT_minus.keys()))
 
         transcripts = sorted(list(transcripts))
         transcriptsOUT = sorted(list(transcriptsOUT))
@@ -201,7 +201,7 @@ def annotate(self, variant, reference, impactdir):
 
             if TRANSCRIPT in transcripts: continue
 
-            if TRANSCRIPT in transcriptsOUT_plus.keys(): transcript = transcriptsOUT_plus[TRANSCRIPT]
+            if TRANSCRIPT in list(transcriptsOUT_plus.keys()): transcript = transcriptsOUT_plus[TRANSCRIPT]
             else: transcript = transcriptsOUT_minus[TRANSCRIPT]
 
             if len(TRANSCRIPTstring) > 0:
@@ -228,10 +228,10 @@ def annotate(self, variant, reference, impactdir):
             TRINFOstring += transcript.TRINFO
 
             if transcript.strand == 1:
-                if TRANSCRIPT in transcriptsOUT_plus.keys(): LOCstring += 'OUT'
+                if TRANSCRIPT in list(transcriptsOUT_plus.keys()): LOCstring += 'OUT'
                 else: LOCstring += '.'
             else:
-                if TRANSCRIPT in transcriptsOUT_minus.keys(): LOCstring += 'OUT'
+                if TRANSCRIPT in list(transcriptsOUT_minus.keys()): LOCstring += 'OUT'
                 else: LOCstring += '.'
 
             CSNstring += '.'
@@ -250,7 +250,7 @@ def annotate(self, variant, reference, impactdir):
         # Iterating through the list of transcripts
         for TRANSCRIPT in transcripts:
 
-            if TRANSCRIPT in transcripts_plus.keys(): transcript = transcripts_plus[TRANSCRIPT]
+            if TRANSCRIPT in list(transcripts_plus.keys()): transcript = transcripts_plus[TRANSCRIPT]
             else: transcript = transcripts_minus[TRANSCRIPT]
 
             # Separating annotations by different transcripts with colon
@@ -279,13 +279,13 @@ def annotate(self, variant, reference, impactdir):
             TRINFOstring += transcript.TRINFO
 
             # Creating the LOC annotation
-            if TRANSCRIPT in transcripts_plus.keys(): loc_plus = transcript.whereIsThisVariant(variant_plus)
-            elif TRANSCRIPT in transcriptsOUT_plus.keys(): loc_plus = 'OUT'
+            if TRANSCRIPT in list(transcripts_plus.keys()): loc_plus = transcript.whereIsThisVariant(variant_plus)
+            elif TRANSCRIPT in list(transcriptsOUT_plus.keys()): loc_plus = 'OUT'
             else: loc_plus = '.'
 
             if difference:
-                if TRANSCRIPT in transcripts_minus.keys(): loc_minus = transcript.whereIsThisVariant(variant_minus)
-                elif TRANSCRIPT in transcriptsOUT_minus.keys(): loc_minus = 'OUT'
+                if TRANSCRIPT in list(transcripts_minus.keys()): loc_minus = transcript.whereIsThisVariant(variant_minus)
+                elif TRANSCRIPT in list(transcriptsOUT_minus.keys()): loc_minus = 'OUT'
                 else: loc_minus = '.'
             else:
                 loc_minus = loc_plus
@@ -299,7 +299,7 @@ def annotate(self, variant, reference, impactdir):
             if notexonic_plus and notexonic_minus:
                 protein = ''
             else:
-                if not transcript.TRANSCRIPT in self.proteinSeqs.keys():
+                if not transcript.TRANSCRIPT in list(self.proteinSeqs.keys()):
                     protein, exonseqs = transcript.getProteinSequence(reference, None, None)
 
                     if len(self.proteinSeqs) > 5:
@@ -327,13 +327,13 @@ def annotate(self, variant, reference, impactdir):
 
 
             # Creating the CSN annotations both for left and right aligned variant
-            if TRANSCRIPT in transcripts_plus.keys():
+            if TRANSCRIPT in list(transcripts_plus.keys()):
                 csn_plus, protchange_plus = csn.getAnnotation(variant_plus, transcript, reference, protein, mutprotein_plus)
                 csn_plus_str = csn_plus.getAsString()
             else: csn_plus_str, protchange_plus = '.', ('.','.','.')
 
             if difference:
-                if TRANSCRIPT in transcripts_minus.keys():
+                if TRANSCRIPT in list(transcripts_minus.keys()):
                     csn_minus, protchange_minus = csn.getAnnotation(variant_minus, transcript, reference, protein, mutprotein_minus)
                     csn_minus_str = csn_minus.getAsString()
                 else: csn_minus_str, protchange_minus = '.', ('.','.','.')
@@ -353,12 +353,12 @@ def annotate(self, variant, reference, impactdir):
             if not impactdir == None or self.options.args['ontology'].upper() in ['CLASS', 'BOTH']:
 
                 # Creating the CLASS annotations both for left and right aligned variant
-                if TRANSCRIPT in transcripts_plus.keys():
+                if TRANSCRIPT in list(transcripts_plus.keys()):
                     class_plus = conseq.getClassAnnotation(variant_plus, transcript, protein, mutprotein_plus, loc_plus, int(self.options.args['ssrange']))
                 else: class_plus = '.'
 
                 if difference:
-                    if TRANSCRIPT in transcripts_minus.keys():
+                    if TRANSCRIPT in list(transcripts_minus.keys()):
                         class_minus = conseq.getClassAnnotation(variant_minus, transcript, protein, mutprotein_minus, loc_minus, int(self.options.args['ssrange']))
                     else: class_minus = '.'
                 else: class_minus = class_plus
@@ -367,14 +367,14 @@ def annotate(self, variant, reference, impactdir):
             # Determining the IMPACT flag
             if not impactdir == None:
 
-                if TRANSCRIPT in transcripts_plus.keys():
-                    if class_plus in impactdir.keys(): impact_plus = impactdir[class_plus]
+                if TRANSCRIPT in list(transcripts_plus.keys()):
+                    if class_plus in list(impactdir.keys()): impact_plus = impactdir[class_plus]
                     else: impact_plus = 'None'
                 else:
                     impact_plus = '.'
 
-                if TRANSCRIPT in transcripts_minus.keys():
-                    if class_minus in impactdir.keys(): impact_minus = impactdir[class_minus]
+                if TRANSCRIPT in list(transcripts_minus.keys()):
+                    if class_minus in list(impactdir.keys()): impact_minus = impactdir[class_minus]
                     else: impact_minus = 'None'
                 else:
                     impact_minus = '.'
@@ -383,12 +383,12 @@ def annotate(self, variant, reference, impactdir):
             if self.options.args['ontology'].upper() in ['SO', 'BOTH']:
                 # Creating the SO annotations both for left and right aligned variant
 
-                if TRANSCRIPT in transcripts_plus.keys():
+                if TRANSCRIPT in list(transcripts_plus.keys()):
                     so_plus = conseq.getSequenceOntologyAnnotation(variant_plus, transcript, protein, mutprotein_plus, loc_plus)
                 else: so_plus = '.'
 
                 if difference:
-                    if TRANSCRIPT in transcripts_minus.keys():
+                    if TRANSCRIPT in list(transcripts_minus.keys()):
                         so_minus = conseq.getSequenceOntologyAnnotation(variant_minus, transcript, protein, mutprotein_minus, loc_minus)
                     else: so_minus = '.'
 
diff --git a/cava_/main.py b/cava_/main.py
index 6669ce3..15b164b 100644
--- a/cava_/main.py
+++ b/cava_/main.py
@@ -1,6 +1,3 @@
-
-from __future__ import division
-
 import datetime
 import gzip
 import logging
@@ -9,20 +6,20 @@
 import sys
 import pysam
 
-import core
+from . import core
 from cava_.data import Ensembl
 from cava_.data import Reference
 from cava_.data import dbSNP
-from core import Options
-from core import Record
+from .core import Options
+from .core import Record
 
 
 # Printing out welcome meassage
 def printStartInfo(ver):
     starttime = datetime.datetime.now()
-    print "\n-----------------------------------------------------------------------"
-    print 'CAVA (Clinical Annotation of VAriants) ' + ver + ' is now running.'
-    print 'Started: ', str(starttime), '\n'
+    print("\n-----------------------------------------------------------------------")
+    print('CAVA (Clinical Annotation of VAriants) ' + ver + ' is now running.')
+    print('Started: ', str(starttime), '\n')
     return starttime
 
 
@@ -32,12 +29,12 @@ def printInputFileNames(copts, options):
         outfn = copts.output + '.vcf'
     else:
         outfn = copts.output + '.txt'
-    print 'Configuration file:  ' + copts.conf
-    print 'Input file (' + options.args['inputformat'] + '):    ' + copts.input
-    print 'Output file (' + options.args['outputformat'] + '):   ' + outfn
-    if options.args['logfile']:  print 'Log file:            ' + copts.output + '.log'
+    print('Configuration file:  ' + copts.conf)
+    print('Input file (' + options.args['inputformat'] + '):    ' + copts.input)
+    print('Output file (' + options.args['outputformat'] + '):   ' + outfn)
+    if options.args['logfile']:  print('Log file:            ' + copts.output + '.log')
     if copts.threads > 1:
-        print '\nMultithreading:      ' + str(copts.threads) + ' threads'
+        print('\nMultithreading:      ' + str(copts.threads) + ' threads')
 
     if options.args['logfile']:
         logging.info('Configuration file - ' + copts.conf)
@@ -52,7 +49,7 @@ def printInputFileNames(copts, options):
 
 # Printing out number of records in the input file
 def printNumOfRecords(numOfRecords):
-    print '\nInput file contains ' + str(numOfRecords) + ' records to annotate.\n'
+    print('\nInput file contains ' + str(numOfRecords) + ' records to annotate.\n')
 
 
 # Initializing progress information
@@ -73,7 +70,7 @@ def printProgressInfo(counter, numOfRecords):
 def finalizeProgressInfo():
     sys.stdout.write('\rAnnotating variants ... 100.0%')
     sys.stdout.flush()
-    print ' - Done.'
+    print(' - Done.')
 
 
 # Printing out goodbye message
@@ -83,11 +80,11 @@ def printEndInfo(options, copts, starttime):
         outfn = copts.output + '.vcf'
     else:
         outfn = copts.output + '.txt'
-    print '\n(Size of output file: ' + str(round(os.stat(outfn).st_size / 1000, 1)) + ' Kbyte)'
-    print '\nCAVA (Clinical Annotation of VAriants) successfully finished.'
-    print 'Ended: ', str(endtime)
-    print 'Total runtime: '+str(endtime-starttime)
-    print "-----------------------------------------------------------------------\n"
+    print('\n(Size of output file: ' + str(round(os.stat(outfn).st_size / 1000, 1)) + ' Kbyte)')
+    print('\nCAVA (Clinical Annotation of VAriants) successfully finished.')
+    print('Ended: ', str(endtime))
+    print('Total runtime: '+str(endtime-starttime))
+    print("-----------------------------------------------------------------------\n")
     if options.args['logfile']:
         logging.info('100% of records annotated.')
         if not copts.stdout: logging.info('Output file = ' + str(round(os.stat(outfn).st_size / 1000, 1)) + ' Kbyte')
@@ -100,7 +97,7 @@ def findFileBreaks(inputf, threads):
     started = False
     counter = 0
 
-    if inputf.endswith('.gz'): infile = gzip.open(inputf, 'r')
+    if inputf.endswith('.gz'): infile = gzip.open(inputf, 'rt')
     else: infile = open(inputf)
 
     for line in infile:
@@ -123,7 +120,7 @@ def findFileBreaks(inputf, threads):
 def readHeader(inputfn):
     ret = []
 
-    if inputfn.endswith('.gz'): infile = gzip.open(inputfn, 'r')
+    if inputfn.endswith('.gz'): infile = gzip.open(inputfn, 'rt')
     else: infile = open(inputfn)
 
     for line in infile:
@@ -197,7 +194,7 @@ def __init__(self, threadidx, options, copts, startline, endline, genelist, tran
 
         # Input file
         if copts.input.endswith('.gz'):
-            self.infile = gzip.open(copts.input, 'r')
+            self.infile = gzip.open(copts.input, 'rt')
         else:
             self.infile = open(copts.input)
 
@@ -310,14 +307,14 @@ def run(copts, version, default_config_file):
 
     # Check if input and configuration files exist
     if copts.conf is None:
-        print '\nError: no configuration file specified.'
-        print 'Please use option -c or add the absolute path to the default_config_path file.\n'
+        print('\nError: no configuration file specified.')
+        print('Please use option -c or add the absolute path to the default_config_path file.\n')
         quit()
     if not os.path.isfile(copts.conf):
-        print '\nError: configuration file (' + copts.conf + ') cannot be found.\n'
+        print('\nError: configuration file (' + copts.conf + ') cannot be found.\n')
         quit()
     if not os.path.isfile(copts.input):
-        print '\nError: input file (' + copts.input + ') cannot be found.\n'
+        print('\nError: input file (' + copts.input + ') cannot be found.\n')
         quit()
 
     # Reading options from configuration file
@@ -393,4 +390,4 @@ def run(copts, version, default_config_file):
 
     # Printing out summary information and end time
     if not copts.stdout:
-        printEndInfo(options, copts, starttime)
\ No newline at end of file
+        printEndInfo(options, copts, starttime)
diff --git a/ensembldb/main.py b/ensembldb/main.py
index 29ba886..be22077 100644
--- a/ensembldb/main.py
+++ b/ensembldb/main.py
@@ -1,13 +1,11 @@
-from __future__ import division
 import os
 import datetime
 import sys
-import urllib
+import urllib.request, urllib.parse, urllib.error
 import gzip
 import pysam
 from operator import itemgetter
 
-
 # Class representing a transcript
 class Transcript(object):
 
@@ -22,8 +20,8 @@ def __init__(self):
         self.POSEND = None
         self.GENETYPE = None
         self.TRANSTYPE = None
-        self.CODING_START = None
-        self.CODING_END = None
+        self.CODING_START = -1
+        self.CODING_END = -1
         self.CODING_START_RELATIVE = None
         self.CCDS = None
         self.EXONS = []
@@ -37,7 +35,7 @@ def getInfoString(self):
         if self.STRAND == '1': ret = '+/'
         else: ret = '-/'
         cdna = self.getcDNALength()
-        return ret+str(round((self.POSEND-self.POS+1)/1000,1))+'kb/'+str(len(self.EXONS))+'/'+str(round(cdna/1000,1))+'kb/'+str(self.getProteinLength())
+        return ret+str(self.POSEND-self.POS+1)+'bp/'+str(len(self.EXONS))+'/'+str(cdna)+'bp/'+str(self.getProteinLength())
 
 
     # Get cDNA length of the transcript
@@ -54,10 +52,14 @@ def getProteinLength(self):
             for exon in self.EXONS:
                 if exon.END < self.CODING_START: continue
                 if exon.START > self.CODING_END: continue
-                if exon.START <= self.CODING_START <= exon.END: start = self.CODING_START
-                else: start = exon.START + 1
-                if exon.START <= self.CODING_END <= exon.END: end = self.CODING_END
-                else: end = exon.END
+                if exon.START <= self.CODING_START <= exon.END: 
+                    start = self.CODING_START
+                else: 
+                    start = exon.START + 1
+                if exon.START <= self.CODING_END <= exon.END: 
+                    end = self.CODING_END
+                else: 
+                    end = exon.END
                 codingdna += end - start + 1
         else:
             for exon in self.EXONS:
@@ -73,8 +75,10 @@ def getProteinLength(self):
 
     # Check if it is a candidate transcript
     def isCandidate(self):
-        if not (self.GENETYPE=='protein_coding' and self.TRANSTYPE=='protein_coding'): return False
-        return (self.CODING_START is not None and self.CODING_END is not None) and self.isComplete
+        if not (self.GENETYPE=='protein_coding' and self.TRANSTYPE=='protein_coding'): 
+            return False
+        # return (self.CODING_START is not None and self.CODING_END is not None) and self.isComplete
+        return (self.CODING_START > -1 and self.CODING_END > -1) and self.isComplete
 
 
     # Output transcript
@@ -138,18 +142,27 @@ def __init__(self, symbol, ensg):
     def selectTranscript(self):
         ccds_set = []
         nonccds_set = []
-        for enst,transcript in self.TRANSCRIPTS.iteritems():
+        for enst,transcript in self.TRANSCRIPTS.items():
             if transcript.CCDS: ccds_set.append(transcript)
             else: nonccds_set.append(transcript)
 
         if len(ccds_set) > 0: candidates = ccds_set
         else: candidates = nonccds_set
 
+        # Sort candidates by ENST. In case there multiple selectable
+        # transcripts, the selected one does not depend on the order they are
+        # returned by .items()
+        candidates.sort(key=lambda x: x.ENST)
+        
         selected = Transcript()
         selected.PROTL = selected.CDNAL = -1
         for t in candidates:
-            if t.PROTL > selected.PROTL: selected = t
-            elif t.PROTL == selected.PROTL and t.CDNAL > selected.CDNAL: selected = t
+            # Note that we return the *last* selected candidate since we
+            # overwrite the variable `selected`.
+            if t.PROTL > selected.PROTL: 
+                selected = t
+            elif t.PROTL == selected.PROTL and t.CDNAL > selected.CDNAL: 
+                selected = t
 
         return selected
 
@@ -157,17 +170,17 @@ def selectTranscript(self):
     # Output all or selected transcripts
     def output(self, outfile, outfile_list, select, mcg_transcripts):
         if select:
-            if self.SYMBOL in mcg_transcripts.keys():
+            if self.SYMBOL in list(mcg_transcripts.keys()):
                 ok = False
-                for _,transcript in self.TRANSCRIPTS.iteritems():
+                for _,transcript in self.TRANSCRIPTS.items():
                     if transcript.ENST in mcg_transcripts[self.SYMBOL]:
                         transcript.output(outfile,outfile_list)
                         ok = True
-            if self.SYMBOL not in mcg_transcripts.keys() or not ok:
+            if self.SYMBOL not in list(mcg_transcripts.keys()) or not ok:
                 transcript = self.selectTranscript()
                 transcript.output(outfile,outfile_list)
         else:
-            for _,transcript in self.TRANSCRIPTS.iteritems():
+            for _,transcript in self.TRANSCRIPTS.items():
                 transcript.output(outfile,outfile_list)
 
 
@@ -209,11 +222,11 @@ def sortRecords(records, idx1, idx2):
     chroms = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', 'MT', 'X', 'Y']
     for i in range(len(chroms)):
         chrom = chroms[i]
-        if chrom in records.keys():
+        if chrom in list(records.keys()):
             records[chrom] = sorted(records[chrom], key=itemgetter(idx1,idx2))
     for i in range(len(chroms)):
         chrom = chroms[i]
-        if chrom in records.keys():
+        if chrom in list(records.keys()):
             for record in records[chrom]: ret.append(record)
     return ret
 
@@ -245,7 +258,7 @@ def process_data(options, genome_build, version):
         line = line.strip()
         if line == '': continue
         cols = line.split('\t')
-        if cols[0] not in mcg_transcripts.keys(): mcg_transcripts[cols[0]] = set()
+        if cols[0] not in list(mcg_transcripts.keys()): mcg_transcripts[cols[0]] = set()
         mcg_transcripts[cols[0]].add(cols[1])
     # Changing transcript for certain releases
     if int(options.ensembl)>=71: mcg_transcripts['BMPR1A'] = {'ENST00000372037'}
@@ -261,11 +274,11 @@ def process_data(options, genome_build, version):
     transIDs = set()
     if options.input is not None:
         transIDs = readTranscriptIDs(options.input)
-        print '\nOnly ' + str(len(transIDs)) + ' transcripts read from ' + options.input + ' are considered\n'
-    else: print '\nAll transcripts from the Ensembl release are considered\n'
+        print('\nOnly ' + str(len(transIDs)) + ' transcripts read from ' + options.input + ' are considered\n')
+    else: print('\nAll transcripts from the Ensembl release are considered\n')
 
     # Print out info
-    if options.select: print 'Transcript selection switched on\n'
+    if options.select: print('Transcript selection switched on\n')
 
     # Load candidate and CCDS data for Ensembl <75
     candidates = dict()
@@ -275,7 +288,7 @@ def process_data(options, genome_build, version):
             line=line.strip()
             if line=='': continue
             cols = line.split('\t')
-            if cols[0] not in candidates.keys(): candidates[cols[0]]=dict()
+            if cols[0] not in list(candidates.keys()): candidates[cols[0]]=dict()
             candidates[cols[0]][cols[1]]=int(cols[2])
 
     # Download Ensembl data
@@ -283,9 +296,9 @@ def process_data(options, genome_build, version):
     sys.stdout.flush()
     url = 'ftp://ftp.ensembl.org/pub/release-' + options.ensembl + '/gtf/homo_sapiens/Homo_sapiens.' + genome_build + '.' + options.ensembl + '.gtf.gz'
     try:
-        urllib.urlretrieve(url, 'ensembl_data.gz')
+        urllib.request.urlretrieve(url, 'ensembl_data.gz')
     except:
-        print '\n\nCannot connect to Ensembl FTP site. No internet connection?\n'
+        print('\n\nCannot connect to Ensembl FTP site. No internet connection?\n')
         quit()
 
     sys.stdout.write('OK\n')
@@ -296,7 +309,7 @@ def process_data(options, genome_build, version):
     first = True
     prevenst = ''
     transcript = None
-    for line in gzip.open('ensembl_data.gz', 'r'):
+    for line in gzip.open('ensembl_data.gz', 'rt'):
         line = line.strip()
         if line.startswith('#'): continue
         cols = line.split('\t')
@@ -322,7 +335,8 @@ def process_data(options, genome_build, version):
             if not first:
                 transcript.finalize()
                 if transcript.isCandidate():
-                    if transcript.ENSG not in genesdata.keys(): genesdata[transcript.ENSG] = Gene(transcript.GENE, transcript.ENSG)
+                    if transcript.ENSG not in list(genesdata.keys()): 
+                        genesdata[transcript.ENSG] = Gene(transcript.GENE, transcript.ENSG)
                     genesdata[transcript.ENSG].TRANSCRIPTS[transcript.ENST] = transcript
 
             # Initialize new Transcript object
@@ -358,20 +372,20 @@ def process_data(options, genome_build, version):
 
         if cols[2] == 'start_codon':
             if transcript.STRAND == '1':
-                if transcript.CODING_START is None or int(cols[3]) < transcript.CODING_START: transcript.CODING_START = int(cols[3])
+                if transcript.CODING_START < 0 or int(cols[3]) < transcript.CODING_START: transcript.CODING_START = int(cols[3])
             else:
-                if transcript.CODING_START is None or int(cols[4]) > transcript.CODING_START: transcript.CODING_START = int(cols[4])
+                if transcript.CODING_START < 0 or int(cols[4]) > transcript.CODING_START: transcript.CODING_START = int(cols[4])
 
         if cols[2] == 'stop_codon':
             if transcript.STRAND == '1':
-                if transcript.CODING_END is None or int(cols[4]) > transcript.CODING_END: transcript.CODING_END = int(cols[4])
+                if transcript.CODING_END < 0 or int(cols[4]) > transcript.CODING_END: transcript.CODING_END = int(cols[4])
             else:
-                if transcript.CODING_END is None or int(cols[3]) < transcript.CODING_END: transcript.CODING_END = int(cols[3])
+                if transcript.CODING_END < 0 or int(cols[3]) < transcript.CODING_END: transcript.CODING_END = int(cols[3])
 
         # Check if transcript is complete and is a CCDS transcript
         if transcript.isComplete is None:
             if int(options.ensembl) < 75:
-                if transcript.ENST in candidates[transcript.CHROM].keys():
+                if transcript.ENST in list(candidates[transcript.CHROM].keys()):
                     transcript.CCDS = (candidates[transcript.CHROM][transcript.ENST] == 1)
                     transcript.isComplete = True
                 else:
@@ -388,14 +402,14 @@ def process_data(options, genome_build, version):
     if transcript is not None:
         transcript.finalize()
         if transcript.isCandidate():
-            if transcript.ENSG not in genesdata.keys(): genesdata[transcript.ENSG] = Gene(transcript.GENE, transcript.ENSG)
+            if transcript.ENSG not in list(genesdata.keys()): genesdata[transcript.ENSG] = Gene(transcript.GENE, transcript.ENSG)
             genesdata[transcript.ENSG].TRANSCRIPTS[transcript.ENST] = transcript
 
     # If no transcript ID from the input file was found in the Ensembl release
     if len(genesdata) == 0:
-        print '\n\nNo transcripts from '+options.input+' found in Ensembl release.'
-        print '\nNo transcript database created.'
-        print "-----------------------------------------------------------------\n"
+        print('\n\nNo transcripts from '+options.input+' found in Ensembl release.')
+        print('\nNo transcript database created.')
+        print("-----------------------------------------------------------------\n")
         os.remove('ensembl_data.gz')
         quit()
 
@@ -408,7 +422,7 @@ def process_data(options, genome_build, version):
     outfile_list.write('ENSG\tGENE\tENST\n')
 
     # Output transcripts of each gene
-    for ensg, gene in genesdata.iteritems(): gene.output(outfile,outfile_list,options.select,mcg_transcripts)
+    for ensg, gene in genesdata.items(): gene.output(outfile,outfile_list,options.select,mcg_transcripts)
 
     # Close temporary output files
     outfile.close()
@@ -423,7 +437,7 @@ def process_data(options, genome_build, version):
         line.rstrip()
         record = line.split('\t')
         record[6] = int(record[6])
-        if record[4] in data.keys():
+        if record[4] in list(data.keys()):
             data[record[4]].append(record)
         else:
             data[record[4]] = []
@@ -472,18 +486,18 @@ def is_number(s):
 def run(options, version):
     # Checking if all required options specified
     if options.ensembl is None:
-        print '\nError: no Ensembl release specified. Use option -h to get help!\n'
+        print('\nError: no Ensembl release specified. Use option -h to get help!\n')
         quit()
     if not is_number(options.ensembl):
-        print '\nError: Ensembl release specified is not an integer. Use option -h to get help!\n'
+        print('\nError: Ensembl release specified is not an integer. Use option -h to get help!\n')
         quit()
     if options.output is None:
-        print '\nError: no output file name specified. Use option -h to get help!\n'
+        print('\nError: no output file name specified. Use option -h to get help!\n')
         quit()
 
     # Must use Ensembl release >= 70
     if not (int(options.ensembl) >= 70 or int(options.ensembl) == 65):
-        print '\nError: This version works with Ensembl v65 or >= v70.\n'
+        print('\nError: This version works with Ensembl v65 or >= v70.\n')
         quit()
 
     # Genome build
@@ -491,17 +505,17 @@ def run(options, version):
     genome_build = 'GRCh37' if int(options.ensembl) <= 75 else 'GRCh38'
 
     # Printing out version information
-    print "\n---------------------------------------------------------------------------------------"
-    print 'CAVA ' + version + ' transcript database preparation tool (ensembl_db) is now running'
-    print 'Started: ', datetime.datetime.now(), '\n'
+    print("\n---------------------------------------------------------------------------------------")
+    print('CAVA ' + version + ' transcript database preparation tool (ensembl_db) is now running')
+    print('Started: ', datetime.datetime.now(), '\n')
 
     # Print info
-    print 'Ensembl version:  ' + options.ensembl
-    print 'Reference genome: ' + genome_build
+    print('Ensembl version:  ' + options.ensembl)
+    print('Reference genome: ' + genome_build)
 
     # Creating compressed output file
     Nretrieved = process_data(options, genome_build, version)
-    print '\nA total of ' + str(Nretrieved) + ' transcripts have been retrieved\n'
+    print('\nA total of ' + str(Nretrieved) + ' transcripts have been retrieved\n')
 
     # Indexing output file with Tabix
     indexFile(options)
@@ -510,14 +524,14 @@ def run(options, version):
     os.remove(options.output)
 
     # Printing out summary information
-    print ''
-    print '---------------------'
-    print 'Output files created:'
-    print '---------------------'
-    print options.output + '.gz (transcript database)'
-    print options.output + '.gz.tbi (index file)'
-    print options.output + '.txt (list of transcripts)'
-
-    print ''
-    print 'CAVA ensembl_db successfully finished: ', datetime.datetime.now()
-    print "---------------------------------------------------------------------------------------\n"
+    print('')
+    print('---------------------')
+    print('Output files created:')
+    print('---------------------')
+    print(options.output + '.gz (transcript database)')
+    print(options.output + '.gz.tbi (index file)')
+    print(options.output + '.txt (list of transcripts)')
+
+    print('')
+    print('CAVA ensembl_db successfully finished: ', datetime.datetime.now())
+    print("---------------------------------------------------------------------------------------\n")
diff --git a/install.sh b/install.sh
index 53797aa..f1ff5a3 100755
--- a/install.sh
+++ b/install.sh
@@ -4,7 +4,7 @@ unset PYTHONPATH
 ABSOLUTE_PATH=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 PIP_ARGS='--no-cache-dir --ignore-installed --force-reinstall'
 
-virtualenv -p python2.7 env
+virtualenv -p python3 env
 
 source ${ABSOLUTE_PATH}/env/bin/activate
 
diff --git a/requirements.txt b/requirements.txt
index 99b373c..a890ffd 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,7 +6,7 @@ pep8==1.7.0
 py==1.4.32
 pylint==1.7.2
 pyparsing==2.2.0
-pysam==0.7.7
+pysam==0.15.0
 pytest==3.0.7
 pytest-cov>=2.2.0
 radon==2.0.2
diff --git a/setup.py b/setup.py
index 44dc8d8..c384b48 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name= 'CAVA',
-    version = '1.2.3',
+    version = '2.0.0',
     description = 'CAVA (Clinical Annotation of VAriants)',
     url = 'https://github.com/RahmanTeam/CAVA',
     author = 'RahmanTeam',
diff --git a/test.sh b/test.sh
new file mode 100644
index 0000000..70f18b6
--- /dev/null
+++ b/test.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+
+# Test CAVA suite. After having installed CAVA, execute this script as:
+#
+#   bash test.sh
+
+set -e 
+set -o pipefail
+
+# Set up
+
+if [ ! -f test/tmp.input.vcf.gz ]
+then
+    curl ftp://ftp.ncbi.nih.gov/snp/organisms/human_9606/VCF/common_all_20180418.vcf.gz \
+        | gunzip \
+        | awk '$1 ~ "^#" || NR % 1000 == 0' \
+        | gzip > test/tmp.input.vcf.gz
+fi
+
+if [ ! -f test/tmp.hg38.fa ]
+then
+    curl http://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz | gunzip > test/tmp.hg38.fa
+    samtools faidx test/tmp.hg38.fa
+fi
+
+# Test: prepare database
+./ensembl_db -e 75 -o test/tmp.db
+
+# Lengths in bp not kb:
+gunzip -c test/tmp.db.gz | grep -F '+/919bp/1/918bp/305' > /dev/null
+
+# Test: annotate
+./cava -i test/tmp.input.vcf.gz -o test/tmp. -c test/CAVA_config.txt -t 8
+
+# Tear down
+# =========
+
+rm test/tmp.*
diff --git a/test/CAVA_config.txt b/test/CAVA_config.txt
new file mode 100644
index 0000000..8250d73
--- /dev/null
+++ b/test/CAVA_config.txt
@@ -0,0 +1,77 @@
+# Input file format
+# Possible values: VCF or TXT | Optional: yes | Default value: VCF
+@inputformat = VCF
+
+# Output file format
+# Possible values: VCF or TSV | Optional: yes | Default value: VCF
+@outputformat =  VCF
+
+# Absolute path to reference genome file
+# Possible values: string | Optional: no
+@reference = test/tmp.hg38.fa
+
+# Absolute path to Ensembl transcript database file
+# Possible values: string | Optional: yes (if not specified, default transcript database will be used)
+@ensembl = test/tmp.db.gz
+
+# Absolute path to dbSNP database file
+# Possible values: string | Optional: yes
+##@dbsnp = absolute/path/to/dbsnp/database
+
+# Are variants with neither transcript nor dbSNP annotation to be included in the output?
+# Possible values: TRUE or FALSE | Optional: yes | Default value: TRUE
+@nonannot = TRUE
+
+# Are only records with PASS filter value included in the output?
+# Possible values: TRUE or FALSE | Optional: yes | Default value: FALSE
+@filter = FALSE
+
+# Types of variants to be annotated and outputted
+# Possible values: ALL, SUBSTITUTION, INDEL, INSERTION, DELETION or COMPLEX | Optional: yes | Default value: ALL
+@type = ALL
+
+# Name of compressed BED file specifying genomic regions variant annotation is restricted to
+# Possible values: string | Optional: yes
+##@target = .
+
+# Name of file providing a list of the gene identifiers variant annotation is restricted to
+# Note: gene identifiers need to be given on separate lines in the file
+# Possible values: string | Optional: yes  
+##@genelist = .
+
+# Name of file providing a list of transcript identifiers variant annotation is restricted to 
+# Note: transcript identifiers need to be given on separate lines in the file
+# Possible values: string | Optional: yes 
+##@transcriptlist = .
+
+# Name of file providing a list of the dbSNP identifiers variant annotation is restricted to
+# Note: dbSNP identifiers need to be given on separate lines in the file
+# Possible values: string | Optional: yes  
+##@snplist = .
+
+# Is a log file to be written?
+# Possible values: TRUE or FALSE | Optional: yes | Default value: FALSE
+@logfile = TRUE
+
+# Which ontology is used for reporting consequence type?
+# Possible values: CLASS, SO or BOTH | Optional: yes | Default value: BOTH
+@ontology = BOTH
+
+# Definition of variant impact levels (reported by the IMPACT annotation flag)
+# Different impact levels are separated by | and a comma-separated list of CLASS terms must be given for each level 
+# Possible values: string | Optional: yes
+# Default value: SG,ESS,FS | SS5,IM,SL,EE,IF,NSY | SY,SS,INT,5PU,3PU
+@impactdef = SG,ESS,FS | SS5,IM,SL,EE,IF,NSY | SY,SS,INT,5PU,3PU
+
+# Are alternative annotations outputted?
+# If TRUE, the ALTANN and ALTCLASS/ALTSO annotation flags are reported instead of the ALTFLAG flag
+# Possible values: TRUE or FALSE | Optional: yes | Default value: TRUE
+@givealt = TRUE
+
+# Number of bases into the intron used as the splice site region 
+# Possible values: integer >= 6 | Optional: yes | Default value: 8
+@ssrange = 8
+
+# Is the prefix CAVA_ added to annotation flag names in VCF output?
+# Possible values: TRUE or FALSE | Optional: yes | Default value: FALSE
+@prefix = FALSE