diff --git a/License.txt b/License.txt
old mode 100755
new mode 100644
diff --git a/Readme.txt b/Readme.txt
old mode 100755
new mode 100644
diff --git a/SURPI.sh b/SURPI.sh
index 293e7f5..fe9a19c 100755
--- a/SURPI.sh
+++ b/SURPI.sh
@@ -13,25 +13,28 @@
 #
 SURPI_version="1.0.22"
 
-optspec=":f:hvz:"
-bold=$(tput bold)
-normal=$(tput sgr0)
+optspec=":f:d:hvz:"
+bold=$(tput bold 2> /dev/null || true)
+normal=$(tput sgr0 2> /dev/null || true)
 green='\e[0;32m'
 red='\e[0;31m'
 endColor='\e[0m'
 
 host=$(hostname)
 scriptname=${0##*/}
+source debug.sh
+source logging.sh
 
 while getopts "$optspec" option; do
 	case "${option}" in
 		f) config_file=${OPTARG};; # get parameters from config file if specified
+		d) ref_path=${OPTARG:-/reference};; # Path to reference db directory.
 		h) HELP=1;;
 		v) VERIFICATION=1;;
-		z)	create_config_file=${OPTARG}
+		z) create_config_file=${OPTARG}
 			configprefix=${create_config_file%.fastq}
 			;;
-		:)	echo "Option -$OPTARG requires an argument." >&2
+		:) echo "Option -$OPTARG requires an argument." >&2
 			exit 1
 			;;
 	esac
@@ -39,58 +42,59 @@ done
 
 if [[ $HELP -eq 1  ||  $# -lt 1 ]]
 then
-	cat <<USAGE
-
+  cat <<USAGE
 ${bold}SURPI version ${SURPI_version}${normal}
 
 This program will run the SURPI pipeline with the parameters supplied by the config file.
 
 ${bold}Command Line Switches:${normal}
 
-	-h	Show this help & ignore all other switches
+  -h  Show this help & ignore all other switches
 
-	-f	Specify config file
+  -f  Specify config file
 
-		This switch is used to initiate a SURPI run using a specified config file. Verification (see -v switch) will occur at the beginning of the run.
-		The pipeline will cease if SURPI fails to find a software dependency or necessary reference data.
+    This switch is used to initiate a SURPI run using a specified config file. Verification (see -v switch) will occur at the beginning of the run.
+    The pipeline will cease if SURPI fails to find a software dependency or necessary reference data.
 
-	-v	Verification mode
+  -v  Verification mode
 
-		When using verification mode, SURPI will verify necessary dependencies, but will
-		stop after verification. This same verification is also done
-		before each SURPI run.
+    When using verification mode, SURPI will verify necessary dependencies, but will
+    stop after verification. This same verification is also done
+    before each SURPI run.
 
-			• software dependencies
-				SURPI will check for the presence of all software dependencies.
-			• reference data specified in config file
-				SURPI does a cursory check for the presence of reference data. This check is
-				not a comprehensive test of the reference data.
-			• taxonomy lookup functionality
-				SURPI verifies the functionality of the taxonomy lookup.
-			• FASTQ file (if requested in config file)
-				SURPI uses fastQValidator to check the integrity of the FASTQ file.
+    • software dependencies
+      SURPI will check for the presence of all software dependencies.
+    • reference data specified in config file
+      SURPI does a cursory check for the presence of reference data. This check is
+      not a comprehensive test of the reference data.
+    • taxonomy lookup functionality
+      SURPI verifies the functionality of the taxonomy lookup.
+    • FASTQ file (if requested in config file)
+      SURPI uses fastQValidator to check the integrity of the FASTQ file.
 
-	-z	Create default config file and go file. [optional] (specify fastq filename)
-		This option will create a standard .config file, and go file.
+ -z  Create default config file and go file. [optional] (specify fastq filename)
 
-${bold}Usage:${normal}
+    This option will create a standard .config file, and go file.
+
+ -d  Specify directory containing reference databases. Default: /reference
 
-	Create default config and go file.
-		$scriptname -z test.fastq
+${bold}Usage:${normal}
 
-	Run SURPI pipeline in verification mode:
-		$scriptname -f config -v
+  Create default config and go file.
+    $scriptname -z test.fastq
 
-	Run SURPI pipeline with the config file:
-		$scriptname -f config
+  Run SURPI pipeline in verification mode:
+    $scriptname -f config -v
 
+  Run SURPI pipeline with the config file:
+    $scriptname -f config
 USAGE
 	exit
 fi
 
 if [[ $create_config_file ]]
 then
-	echo "PATH=/usr/local/bin/surpi:/usr/local/bin:/usr/bin/:/bin" > go_$configprefix
+	echo "PATH=$PATH:/usr/local/bin/surpi:/usr/local/bin:/usr/bin/:/bin" > go_$configprefix
 	echo "nohup $scriptname -f $configprefix.config > SURPI.$configprefix.log 2> SURPI.$configprefix.err" >> go_$configprefix
 
 	chmod +x go_$configprefix
@@ -152,7 +156,8 @@ run_mode="Comprehensive"
 # $basef.cutadapt.fastq
 # $basef.preprocessed.fastq
 #preprocess="skip"
-
+#human_mapping="skip"
+#alignment="skip"
 
 ##########################
 # Preprocessing
@@ -177,7 +182,7 @@ quality_cutoff=18
 ##########################
 
 #SNAP executable
-snap="/usr/local/bin/snap-dev"
+snap="snap"
 
 #SNAP edit distance for Computational Subtraction of host genome [Highly recommended default: d_human=12]
 #see Section 3.1.2 MaxDist description: http://snap.cs.berkeley.edu/downloads/snap-1.0beta-manual.pdf
@@ -246,32 +251,32 @@ eBLASTn="1e-15"
 # SURPI will subtract all SNAP databases found in this directory from the input sequence
 # Useful if you want to subtract multiple genomes (without combining SNAP databases)
 # or, if you need to split a db if it is larger than available RAM.
-SNAP_subtraction_folder="/reference/hg19"
+SNAP_subtraction_folder="$ref_path/Subtraction_SNAP"
 
 # directory for SNAP-indexed databases of NCBI NT (for mapping phase in comprehensive mode)
 # directory must ONLY contain snap indexed databases
-SNAP_COMPREHENSIVE_db_dir="/reference/COMP_SNAP"
+SNAP_COMPREHENSIVE_db_dir="$ref_path/COMP_SNAP"
 
 # directory for SNAP-indexed databases for mapping phase in FAST mode
 # directory must ONLY contain snap indexed databases
-SNAP_FAST_db_dir="/reference/FAST_SNAP"
+SNAP_FAST_db_dir="$ref_path/FAST_SNAP"
 
 #Taxonomy Reference data directory
 #This folder should contain the 3 SQLite files created by the script "create_taxonomy_db.sh"
 #gi_taxid_nucl.db - nucleotide db of gi/taxonid
 #gi_taxid_prot.db - protein db of gi/taxonid
 #names_nodes_scientific.db - db of taxonid/taxonomy
-taxonomy_db_directory="/reference/taxonomy"
+taxonomy_db_directory="$ref_path/taxonomy"
 
 #RAPSearch viral database name: indexed protein dataset (all of Viruses)
-#make sure that directory also includes the .info file 
-RAPSearch_VIRUS_db="/reference/RAPSearch/rapsearch_viral_db"
+#make sure that directory also includes the .info file
+RAPSearch_VIRUS_db="$ref_path/RAPSearch/rapsearch_viral_aa_130628_db_v2.12"
 
 #RAPSearch nr database name: indexed protein dataset (all of NR)
-#make sure that directory also includes the .info file 
-RAPSearch_NR_db="/reference/RAPSearch/rapsearch_nr_db"
+#make sure that directory also includes the .info file
+RAPSearch_NR_db="$ref_path/RAPSearch/rapsearch_nr_db_v2.12"
 
-ribo_snap_bac_euk_directory="/reference/RiboClean_SNAP"
+ribo_snap_bac_euk_directory="$ref_path/RiboClean_SNAP"
 
 ##########################
 # Server related values
@@ -286,17 +291,12 @@ ribo_snap_bac_euk_directory="/reference/RiboClean_SNAP"
 #This folder will not be created by SURPI, so be sure it already exists with proper permissions.
 temporary_files_directory="/tmp/"
 
-#This parameter controls whether dropcache is used throughout the pipeline. If free RAM is less than cache_reset,
-#	then dropcache. If cache_reset=0, then dropcache will never be used.
-cache_reset="0"
-
-
 ##########################
 # AWS related values
 ##########################
 
 # AWS_master_slave will start up a slave instance on AWS for each division of the nt database
-# It will be more costly, but should run significantly faster than the solo method, which 
+# It will be more costly, but should run significantly faster than the solo method, which
 # runs each NT division through SNAP serially on a single machine.
 # If using the "AWS_master_slave" option, be sure that all parameters in the AWS section below are
 # set properly.
@@ -306,7 +306,7 @@ cache_reset="0"
 
 #Which method to use for SNAP to nt [AWS_master_slave/solo]
 # AWS_master_slave will start up a slave instance on AWS for each division of the nt database
-# It will be more costly, but should run significantly faster than the solo method, which 
+# It will be more costly, but should run significantly faster than the solo method, which
 # runs each NT division through SNAP serially on a single machine.
 # If using the "AWS_master_slave" option, be sure that all parameters in the AWS section below are
 # set properly.
@@ -318,7 +318,7 @@ snap_nt_procedure="solo"
 ami_id="ami-5ef61936"
 
 #Number of slave instances will not exceed this value. Useful for testing, in order to restrict instance count.
-#Otherwise, number of instances should be equal to number of SNAP-NT database divisions. This value is 
+#Otherwise, number of instances should be equal to number of SNAP-NT database divisions. This value is
 #automatically calculated by SURPI.
 max_slave_instances=29
 
@@ -402,25 +402,6 @@ then
 	exit 65
 fi
 
-#set cache_reset. if none specified:
-# >500GB -> 200GB
-# >200GB -> 150GB
-# otherwise -> 50GB
-# note: this may cause problems on a machine with <50GB RAM
-if [ ! $cache_reset ]
-then
-	total_ram=$(grep MemTotal /proc/meminfo | awk '{print $2}')
-	if [ "$total_ram" -gt "500000000" ] #500GiB
-	then
-		cache_reset=200 # This is 200GB
-	elif [ "$total_ram" -gt "200000000" ] #200 GiB
-	then
-		cache_reset=150
-	else
-		cache_reset=50
-	fi
-fi
-
 #these 2 parameters are for cropping prior to snap in the preprocessing stage
 if [ ! $start_nt ]
 then
@@ -486,7 +467,7 @@ nopathf=${FASTQ_file##*/} # remove the path to file
 basef=${nopathf%.fastq}
 
 #verify that all software dependencies are properly installed
-declare -a dependency_list=("gt" "seqtk" "fastq" "fqextract" "cutadapt" "prinseq-lite.pl" "dropcache" "$snap" "rapsearch" "fastQValidator" "abyss-pe" "ABYSS-P" "Minimo")
+declare -a dependency_list=("gt" "seqtk" "fastq" "fqextract" "cutadapt" "prinseq-lite.pl" "dropcache" "$snap" "rapsearch" "fastQValidator" "abyss-pe" "Minimo")
 echo "-----------------------------------------------------------------------------------------"
 echo "DEPENDENCY VERIFICATION"
 echo "-----------------------------------------------------------------------------------------"
@@ -511,10 +492,8 @@ seqtk_version=$(seqtk 2>&1 | head -3 | tail -1 | awk '{print $2}')
 cutadapt_version=$(cutadapt --version)
 prinseqlite_version=$(prinseq-lite.pl --version 2>&1 | awk '{print $2}')
 snap_version=$(snap 2>&1 | grep version | awk '{print $5}')
-snap_dev_version=$(snap-dev 2>&1 | grep version | awk '{print $5}')
 rapsearch_version=$(rapsearch 2>&1 | head -2 | tail -1 | awk '{print $2}')
 abyss_pe_version=$(abyss-pe version | head -2 | tail -1 | awk '{print $3}')
-ABYSS_P_version=$(ABYSS-P  --version | head -1 | awk '{print $3}')
 Minimo_version=$(Minimo -h | tail -2 | awk '{print $2}')
 echo -e "SURPI version: $SURPI_version"
 echo -e "config file version: $config_file_version"
@@ -523,10 +502,8 @@ echo -e "seqtk: $seqtk_version"
 echo -e "cutadapt: $cutadapt_version"
 echo -e "prinseq-lite: $prinseqlite_version${endColor}"
 echo -e "snap: $snap_version${endColor}"
-echo -e "snap-dev: $snap_dev_version${endColor}"
 echo -e "RAPSearch: $rapsearch_version"
 echo -e "abyss-pe: $abyss_pe_version"
-echo -e "ABYSS-P: $ABYSS_P_version"
 echo -e "Minimo: $Minimo_version"
 
 echo "-----------------------------------------------------------------------------------------"
@@ -546,9 +523,10 @@ do
 done
 
 echo -e "SNAP Comprehensive Mode database"
-for f in $SNAP_COMPREHENSIVE_db_dir/*
+for f_basename in $(ls -1v "$SNAP_COMPREHENSIVE_db_dir")
 do
-	if [ -f $f/Genome ]
+  f = "$SNAP_COMPREHENSIVE_db_dir"/"$f_basename"
+  if [ -f $f/Genome ]
 	then
 		echo -e "\t$f: ${green}OK${endColor}"
 	else
@@ -577,7 +555,8 @@ done
 
 #verify taxonomy is functioning properly
 result=$( taxonomy_lookup_embedded.pl -d nucl -q $taxonomy_db_directory 149408158 )
-if [ $result = "149408158" ]
+echo "taxonomy result: ${result}"
+if [ $result == "149408158" ]
 then
 	echo -e "taxonomy: ${green}OK${endColor}"
 else
@@ -678,7 +657,6 @@ echo "contigcutoff for abyss assembly unitigs: $contigcutoff"
 echo "abysskmer length: $abysskmer"
 echo "Ignore barcodes for assembly? $ignore_barcodes_for_de_novo"
 
-echo "cache_reset (if 0, then dropcache will never be used): $cache_reset"
 echo "start_nt: $start_nt"
 echo "crop_length: $crop_length"
 
@@ -735,10 +713,10 @@ then
 	exit
 fi
 ###########################################################
-echo -e "$(date)\t$scriptname\t########## STARTING SURPI PIPELINE ##########"
+log "########## STARTING SURPI PIPELINE ##########"
 START_PIPELINE=$(date +%s)
-echo -e "$(date)\t$scriptname\tFound file $FASTQ_file"
-echo -e "$(date)\t$scriptname\tAfter removing path: $nopathf"
+log "Found file $FASTQ_file"
+log "After removing path: $nopathf"
 ############ Start up AWS slave machines ##################
 
 file_with_slave_ips="slave_list.txt"
@@ -746,49 +724,51 @@ file_with_slave_ips="slave_list.txt"
 if [ "$snap_nt_procedure" = "AWS_master_slave" ]
 then
 	# start the slaves as a background process. They should be ready to run at the SNAP to NT step in the pipeline.
-	start_slaves.sh $ami_id $actual_slave_instances $instance_type $keypair $security_group $availability_zone $file_with_slave_ips $placement_group & # > $basef.AWS.log 2>&1
+	start_slaves.sh $ami_id $actual_slave_instances $instance_type $keypair $security_group $availability_zone $file_with_slave_ips $placement_group & # > $basef.AWS.log "2>&1
 fi
 
 ############ PREPROCESSING ##################
-if [ "$preprocess" != "skip" ]
+if grep -qs preprocess_complete "$basef.checkpoint"; then
+  preprocess_complete=1
+fi
+if [[ "$preprocess" != "skip" ]] && [[ "$preprocess_complete" != 1 ]]
 then
-	echo -e "$(date)\t$scriptname\t############### PREPROCESSING ###############"
-	echo -e "$(date)\t$scriptname\tStarting: preprocessing using $cores cores "
+  if [[ -f $basef.cutadapt.fastq.done ]]; then
+    cp $basef.cutadapt.fastq
+  fi
+	log "############### PREPROCESSING ###############"
+	log "Starting: preprocessing using $cores cores "
 	START_PREPROC=$(date +%s)
-	echo -e "$(date)\t$scriptname\tParameters: preprocess_ncores.sh $basef.fastq $quality N $length_cutoff $cores Y N $adapter_set $start_nt $crop_length $temporary_files_directory >& $basef.preprocess.log"
-	preprocess_ncores.sh $basef.fastq $quality N $length_cutoff $cores $cache_reset N $adapter_set $start_nt $crop_length $temporary_files_directory $quality_cutoff >& $basef.preprocess.log
-	echo -e "$(date)\t$scriptname\tDone: preprocessing "
+	log "Parameters: preprocess_ncores.sh $basef.fastq $quality N $length_cutoff $cores Y N $adapter_set $start_nt $crop_length $temporary_files_directory >& $basef.preprocess.log"
+	preprocess_ncores.sh $basef.fastq $quality N $length_cutoff $cores N $adapter_set $start_nt $crop_length $temporary_files_directory $quality_cutoff >& $basef.preprocess.log
+	log "Done: preprocessing "
 	END_PREPROC=$(date +%s)
 	diff_PREPROC=$(( END_PREPROC - START_PREPROC ))
-	echo -e "$(date)\t$scriptname\tPreprocessing took $diff_PREPROC seconds" | tee timing.$basef.log
-fi
-# verify preprocessing step
-if [ ! -s $basef.cutadapt.fastq ] || [ ! -s $basef.preprocessed.fastq ]
-then
-	echo -e "$(date)\t$scriptname\t${red}Preprocessing appears to have failed. One of the following files does not exist, or is of 0 size:${endColor}"
-	echo "$basef.cutadapt.fastq"
-	echo "$basef.preprocessed.fastq"
-	exit
+  log "Preprocessing took $diff_PREPROC seconds" | tee timing.$basef.log
+
+  # verify preprocessing step
+  if [ ! -s $basef.cutadapt.fastq ] || [ ! -s $basef.preprocessed.fastq ]
+  then
+    log "${red}Preprocessing appears to have failed. One of the following files does not exist, or is of 0 size:${endColor}"
+    echo "$basef.cutadapt.fastq"
+    echo "$basef.preprocessed.fastq"
+    exit
+  else
+    echo "preprocess_complete" >> "$basef.checkpoint"
+  fi
 fi
 
 ############# BEGIN SNAP PIPELINE #################
-freemem=$(free -g | awk '{print $4}' | head -n 2 | tail -1)
-echo -e "$(date)\t$scriptname\tThere is $freemem GB available free memory...[cutoff=$cache_reset GB]"
-if [[ $dropcache == "Y" ]]
-then
-	if [ "$freemem" -lt "$cache_reset" ]
-	then
-		echo -e "$(date)\t$scriptname\tClearing cache..."
-		dropcache
-	fi
-fi
+basef_h=${nopathf%.fastq}.preprocessed.s20.h250n25d${d_human}xfu # remove fastq extension
 ############# HUMAN MAPPING #################
-if [ "$human_mapping" != "skip" ]
+if grep -qs human_mapping_complete "$basef.checkpoint"; then
+  human_mapping_complete=1
+fi
+if [ "$human_mapping" != "skip" ] && [[ "$human_mapping_complete" != 1 ]]
 then
-	echo -e "$(date)\t$scriptname\t############### SNAP TO HUMAN ###############"
-	basef_h=${nopathf%.fastq}.preprocessed.s20.h250n25d${d_human}xfu # remove fastq extension
-	echo -e "$(date)\t$scriptname\tBase file: $basef_h"
-	echo -e "$(date)\t$scriptname\tStarting: $basef_h human mapping"
+	log "############### SNAP TO HUMAN ###############"
+	log "Base file: $basef_h"
+	log "Starting: $basef_h human mapping"
 
 	file_to_subtract="$basef.preprocessed.fastq"
 	subtracted_output_file="$basef_h.human.snap.unmatched.sam"
@@ -798,50 +778,43 @@ then
 	for SNAP_subtraction_db in $SNAP_subtraction_folder/*; do
 		SUBTRACTION_COUNTER=$[$SUBTRACTION_COUNTER +1]
 		# check if SNAP db is cached in RAM, use optimal parameters depending on result
-		SNAP_db_cached=$(vmtouch -m500G -f "$SNAP_subtraction_db" | grep 'Resident Pages' | awk '{print $5}')
-		if [[ "$SNAP_db_cached" == "100%" ]]
-		then
-			echo -e "$(date)\t$scriptname\tSNAP database is cached ($SNAP_db_cached)."
-			SNAP_cache_option=" -map "
-		else
-			echo -e "$(date)\t$scriptname\tSNAP database is not cached ($SNAP_db_cached)."
-			SNAP_cache_option=" -pre -map "
-		fi
-		echo -e "$(date)\t$scriptname\tParameters: snap-dev single $SNAP_subtraction_db $file_to_subtract -o -sam $subtracted_output_file.$SUBTRACTION_COUNTER.sam -t $cores -x -f -h 250 -d ${d_human} -n 25 -F u $SNAP_cache_option"
+		#SNAP_db_cached=$(vmtouch -m500G -f "$SNAP_subtraction_db" | grep 'Resident Pages' | awk '{print $5}')
+		#if [[ "$SNAP_db_cached" == "100%" ]]
+		#then
+			#log "SNAP database is cached ($SNAP_db_cached)."
+			#SNAP_cache_option=" -map "
+		#else
+			#log "SNAP database is not cached ($SNAP_db_cached)."
+			#SNAP_cache_option=" -pre -map "
+		#fi
+		log "Parameters: snap single $SNAP_subtraction_db $file_to_subtract -o -sam $subtracted_output_file.$SUBTRACTION_COUNTER.sam -t $cores -x -f -h 250 -d ${d_human} -n 25 -F u $SNAP_cache_option"
 		START_SUBTRACTION_STEP=$(date +%s)
-		snap-dev single "$SNAP_subtraction_db" "$file_to_subtract" -o -sam "$subtracted_output_file.$SUBTRACTION_COUNTER.sam" -t $cores -x -f -h 250 -d ${d_human} -n 25 -F u $SNAP_cache_option
+		snap single "$SNAP_subtraction_db" "$file_to_subtract" -o -sam "$subtracted_output_file.$SUBTRACTION_COUNTER.sam" -t $cores -x -f -h 250 -d ${d_human} -n 25 -F u $SNAP_cache_option
 		END_SUBTRACTION_STEP=$(date +%s)
-		echo -e "$(date)\t$scriptname\tDone: SNAP to human"
+		log "Done: SNAP to human"
 		diff_SUBTRACTION_STEP=$(( END_SUBTRACTION_STEP - START_SUBTRACTION_STEP ))
-		echo -e "$(date)\t$scriptname\tSubtraction step: $SUBTRACTION_COUNTER took $diff_SUBTRACTION_STEP seconds"
+		log "Subtraction step: $SUBTRACTION_COUNTER took $diff_SUBTRACTION_STEP seconds"
 		file_to_subtract="$subtracted_output_file.$SUBTRACTION_COUNTER.sam"
 	done
 	egrep -v "^@" "$subtracted_output_file.$SUBTRACTION_COUNTER.sam" | awk '{if($3 == "*") print "@"$1"\n"$10"\n""+"$1"\n"$11}' > $(echo "$basef_h".human.snap.unmatched.sam | sed 's/\(.*\)\..*/\1/').fastq
 	END_SUBTRACTION=$(date +%s)
 	diff_SUBTRACTION=$(( END_SUBTRACTION - START_SUBTRACTION ))
-	rm $subtracted_output_file.*.sam
-	echo -e "$(date)\t$scriptname\tSubtraction took $diff_SUBTRACTION seconds" | tee -a timing.$basef.log
-fi
-######dropcache?#############
-freemem=$(free -g | awk '{print $4}' | head -n 2 | tail -1)
-echo -e "$(date)\t$scriptname\tThere is $freemem GB available free memory...[cutoff=$cache_reset GB]"
-if [[ $dropcache == "Y" ]]
-then
-	if [ "$freemem" -lt "$cache_reset" ]
-	then
-		echo -e "$(date)\t$scriptname\tClearing cache..."
-		dropcache
-	fi
+	# rm $subtracted_output_file.*.sam
+	log "Subtraction took $diff_SUBTRACTION seconds" | tee -a timing.$basef.log
+  echo "human_mapping_complete" >> "$basef.checkpoint"
 fi
+
 ############################# SNAP TO NT ##############################
 if [ "$alignment" != "skip" ]
 then
-	if [ ! -f $basef.NT.snap.sam ]
+	if [ -f $basef.NT.snap.sam ] # Cache SNAP NT hits.
 	then
-		echo -e "$(date)\t$scriptname\t####### SNAP UNMATCHED SEQUENCES TO NT ######"
-		echo -e -n "$(date)\t$scriptname\tCalculating number of sequences to analyze using SNAP to NT: "
-		echo $(awk 'NR%4==1' "$basef_h".human.snap.unmatched.fastq | wc -l)
-		echo -e "$(date)\t$scriptname\tStarting: Mapping by SNAP to NT from $basef_h.human.snap.unmatched.fastq"
+    log "Using cached $basef.NT.snap.sam"
+  else
+		log "####### SNAP UNMATCHED SEQUENCES TO NT ######"
+    num_seq_snap_nt = $(awk 'NR%4==1' "$basef_h".human.snap.unmatched.fastq | wc -l)
+		log "Calculating number of sequences to analyze using SNAP to NT: $num_seq_snap_nt"
+		log "Starting: Mapping by SNAP to NT from $basef_h.human.snap.unmatched.fastq"
 		START_SNAPNT=$(date +%s)
 		# SNAP to NT for unmatched reads (d value threshold cutoff = 12)
 
@@ -849,8 +822,8 @@ then
 		then
 			if [ $snap_integrator = "inline" ]
 			then
-				echo -e "$(date)\t$scriptname\tParameters: snap_nt.sh $basef_h.human.snap.unmatched.fastq ${SNAP_COMPREHENSIVE_db_dir} $cores $cache_reset $d_NT_alignment $snap"
-				snap_nt.sh "$basef_h.human.snap.unmatched.fastq" "${SNAP_COMPREHENSIVE_db_dir}" "$cores" "$cache_reset" "$d_NT_alignment" "$snap"
+				log "Parameters: snap_nt.sh $basef_h.human.snap.unmatched.fastq ${SNAP_COMPREHENSIVE_db_dir} $cores $d_NT_alignment $snap"
+				snap_nt.sh "$basef_h.human.snap.unmatched.fastq" "${SNAP_COMPREHENSIVE_db_dir}" "$cores" "$d_NT_alignment" "$snap"
 			elif [ $snap_integrator = "end" ]
 			then
 				if [ "$snap_nt_procedure" = "AWS_master_slave" ]
@@ -865,38 +838,40 @@ then
 						sleep 2
 					done
 					echo
-					echo -e "$(date)\t$scriptname\tParameters: snap_on_slave.sh $basef_h.human.snap.unmatched.fastq $pemkey $file_with_slave_ips $incoming_dir ${basef}.NT.snap.sam $d_NT_alignment"
+					log "Parameters: snap_on_slave.sh $basef_h.human.snap.unmatched.fastq $pemkey $file_with_slave_ips $incoming_dir ${basef}.NT.snap.sam $d_NT_alignment"
 					snap_on_slave.sh "$basef_h.human.snap.unmatched.fastq" "$pemkey" "$file_with_slave_ips" "$incoming_dir" "${basef}.NT.snap.sam" "$d_human"> "$basef.AWS.log" 2>&1
 
 				elif [ "$snap_nt_procedure" = "solo" ]
 				then
-					echo -e "$(date)\t$scriptname\tParameters: snap_nt_combine.sh $basef_h.human.snap.unmatched.fastq ${SNAP_COMPREHENSIVE_db_dir} $cores $cache_reset $d_NT_alignment $num_simultaneous_SNAP_runs"
-					snap_nt_combine.sh "$basef_h.human.snap.unmatched.fastq" "${SNAP_COMPREHENSIVE_db_dir}" "$cores" "$cache_reset" "$d_NT_alignment" "$num_simultaneous_SNAP_runs"
+					log "Parameters: snap_nt_combine.sh $basef_h.human.snap.unmatched.fastq ${SNAP_COMPREHENSIVE_db_dir} $cores $d_NT_alignment $num_simultaneous_SNAP_runs"
+					snap_nt_combine.sh "$basef_h.human.snap.unmatched.fastq" "${SNAP_COMPREHENSIVE_db_dir}" "$cores" "$d_NT_alignment" "$num_simultaneous_SNAP_runs"
 				fi
 			fi
 		elif [ $run_mode = "Fast" ]
 		then
-			echo -e "$(date)\t$scriptname\tParameters: snap_nt.sh $basef_h.human.snap.unmatched.fastq ${SNAP_FAST_db_dir} $cores $cache_reset $d_NT_alignment $snap"
-			snap_nt.sh "$basef_h.human.snap.unmatched.fastq" "${SNAP_FAST_db_dir}" "$cores" "$cache_reset" "$d_NT_alignment" "$snap"
+			log "Parameters: snap_nt.sh $basef_h.human.snap.unmatched.fastq ${SNAP_FAST_db_dir} $cores $d_NT_alignment $snap"
+			snap_nt.sh "$basef_h.human.snap.unmatched.fastq" "${SNAP_FAST_db_dir}" "$cores" "$d_NT_alignment" "$snap"
 		fi
 
-		echo -e "$(date)\t$scriptname\tDone: SNAP to NT"
+		log "Done: SNAP to NT"
 		END_SNAPNT=$(date +%s)
 		diff_SNAPNT=$(( END_SNAPNT - START_SNAPNT ))
-		echo -e "$(date)\t$scriptname\tSNAP to NT took $diff_SNAPNT seconds." | tee -a timing.$basef.log
+		log "SNAP to NT took $diff_SNAPNT seconds." | tee -a timing.$basef.log
 		mv -f "$basef_h.human.snap.unmatched.NT.sam" "$basef.NT.snap.sam"
 	fi
-	echo -e "$(date)\t$scriptname\tStarting: parsing $basef.NT.snap.sam"
-	echo -e "$(date)\t$scriptname\textract matched/unmatched $basef.NT.snap.sam"
-	egrep -v "^@" $basef.NT.snap.sam | awk '{if($3 != "*") print }' > $basef.NT.snap.matched.sam
-	egrep -v "^@" $basef.NT.snap.sam | awk '{if($3 == "*") print }' > $basef.NT.snap.unmatched.sam
-	echo -e "$(date)\t$scriptname\tconvert sam to fastq from $basef.NT.snap.sam"
-	echo -e "$(date)\t$scriptname\tDone: parsing $basef.NT.snap.unmatched.sam"
-	if [ ! -f "$basef.NT.snap.matched.all.annotated" ]
+
+	if [ ! -f "$basef.NT.snap.matched.fulllength.all.annotated.sorted" ]
 	then
+    log "Starting: parsing $basef.NT.snap.sam"
+    log "extract matched/unmatched $basef.NT.snap.sam"
+    egrep -v "^@" $basef.NT.snap.sam | awk '{if($3 != "*") print }' > $basef.NT.snap.matched.sam
+    egrep -v "^@" $basef.NT.snap.sam | awk '{if($3 == "*") print }' > $basef.NT.snap.unmatched.sam
+
+    log "convert sam to fastq from $basef.NT.snap.sam"
+    log "Done: parsing $basef.NT.snap.unmatched.sam"
 		## convert to FASTQ and retrieve full-length sequences
-		echo -e "$(date)\t$scriptname\tconvert to FASTQ and retrieve full-length sequences for SNAP NT matched hits"
-		echo -e "$(date)\t$scriptname\tParameters: extractHeaderFromFastq_ncores.sh $cores $basef.cutadapt.fastq $basef.NT.snap.matched.sam $basef.NT.snap.matched.fulllength.fastq $basef.NT.snap.unmatched.sam $basef.NT.snap.unmatched.fulllength.fastq"
+		log "convert to FASTQ and retrieve full-length sequences for SNAP NT matched hits"
+		log "Parameters: extractHeaderFromFastq_ncores.sh $cores $basef.cutadapt.fastq $basef.NT.snap.matched.sam $basef.NT.snap.matched.fulllength.fastq $basef.NT.snap.unmatched.sam $basef.NT.snap.unmatched.fulllength.fastq"
 		extractHeaderFromFastq_ncores.sh "$cores" "$basef.cutadapt.fastq" "$basef.NT.snap.matched.sam" "$basef.NT.snap.matched.fulllength.fastq" "$basef.NT.snap.unmatched.sam" "$basef.NT.snap.unmatched.fulllength.fastq"   #SNN140507
 		sort -k1,1 "$basef.NT.snap.matched.sam"  > "$basef.NT.snap.matched.sorted.sam"
 		cut -f1-9 "$basef.NT.snap.matched.sorted.sam" > "$basef.NT.snap.matched.sorted.sam.tmp1"
@@ -904,72 +879,77 @@ then
 		awk '(NR%4==1) {printf("%s\t",$0)} (NR%4==2) {printf("%s\t", $0)} (NR%4==0) {printf("%s\n",$0)}' "$basef.NT.snap.matched.fulllength.fastq" | sort -k1,1 | awk '{print $2 "\t" $3}' > "$basef.NT.snap.matched.fulllength.sequence.txt" #SNN140507 change this to bring in quality lines as well
 		paste "$basef.NT.snap.matched.sorted.sam.tmp1" "$basef.NT.snap.matched.fulllength.sequence.txt" "$basef.NT.snap.matched.sorted.sam.tmp2" > "$basef.NT.snap.matched.fulllength.sam"
 		###retrieve taxonomy matched to NT ###
-		echo -e "$(date)\t$scriptname\ttaxonomy retrieval for $basef.NT.snap.matched.fulllength.sam"
-		echo -e "$(date)\t$scriptname\tParameters: taxonomy_lookup.pl $basef.NT.snap.matched.fulllength.sam sam nucl $cores $taxonomy_db_directory"
+		log "taxonomy retrieval for $basef.NT.snap.matched.fulllength.sam"
+		log "Parameters: taxonomy_lookup.pl $basef.NT.snap.matched.fulllength.sam sam nucl $cores $taxonomy_db_directory"
 		taxonomy_lookup.pl "$basef.NT.snap.matched.fulllength.sam" sam nucl $cores $taxonomy_db_directory
 		sort -k 13.7n "$basef.NT.snap.matched.fulllength.all.annotated" > "$basef.NT.snap.matched.fulllength.all.annotated.sorted" # sam format is no longer disturbed
 		rm -f  "$basef.NT.snap.matched.fulllength.gi" "$basef.NT.snap.matched.fullength.gi.taxonomy"
 	fi
-# adjust filenames for FAST mode
-	grep "Viruses;" "$basef.NT.snap.matched.fulllength.all.annotated.sorted" > "$basef.NT.snap.matched.fl.Viruses.annotated"
-	grep "Bacteria;" "$basef.NT.snap.matched.fulllength.all.annotated.sorted" > "$basef.NT.snap.matched.fl.Bacteria.annotated"
-
-	##SNN140507 cleanup bacterial reads
-	echo -e "$(date)\t$scriptname\tParameters: ribo_snap_bac_euk.sh $basef.NT.snap.matched.fl.Bacteria.annotated BAC $cores $ribo_snap_bac_euk_directory"
-	ribo_snap_bac_euk.sh $basef.NT.snap.matched.fl.Bacteria.annotated BAC $cores $ribo_snap_bac_euk_directory #SNN140507
-	if [ $run_mode = "Comprehensive" ]
-	then
-		grep "Primates;" "$basef.NT.snap.matched.fulllength.all.annotated.sorted" > "$basef.NT.snap.matched.fl.Primates.annotated"
-		grep -v "Primates" "$basef.NT.snap.matched.fulllength.all.annotated.sorted" | grep "Mammalia" > "$basef.NT.snap.matched.fl.nonPrimMammal.annotated"
-		grep -v "Mammalia" "$basef.NT.snap.matched.fulllength.all.annotated.sorted" | grep "Chordata" > "$basef.NT.snap.matched.fl.nonMammalChordat.annotated"
-		grep -v "Chordata" "$basef.NT.snap.matched.fulllength.all.annotated.sorted" | grep "Eukaryota" > "$basef.NT.snap.matched.fl.nonChordatEuk.annotated"
-		ribo_snap_bac_euk.sh $basef.NT.snap.matched.fl.nonChordatEuk.annotated EUK $cores $ribo_snap_bac_euk_directory
-	fi
-	echo -e "$(date)\t$scriptname\tDone taxonomy retrieval"
-	echo -e "$(date)\t$scriptname\tParameters: table_generator.sh $basef.NT.snap.matched.fl.Viruses.annotated SNAP Y Y Y Y>& $basef.table_generator_snap.matched.fl.log"
-	table_generator.sh "$basef.NT.snap.matched.fl.Viruses.annotated" SNAP Y Y Y Y>& "$basef.table_generator_snap.matched.fl.log"
-	if [ $run_mode = "Comprehensive" ]
-	then
-		### convert to FASTQ and retrieve full-length sequences to add to unmatched SNAP for viral RAPSearch###
-		egrep -v "^@" "$basef.NT.snap.matched.fl.Viruses.annotated" | awk '{if($3 != "*") print "@"$1"\n"$10"\n""+"$1"\n"$11}' > $(echo "$basef.NT.snap.matched.fl.Viruses.annotated" | sed 's/\(.*\)\..*/\1/').fastq
-		echo -e "$(date)\t$scriptname\tDone: convert to FASTQ and retrieve full-length sequences for SNAP NT hits "
-	fi
-	echo -e "$(date)\t$scriptname\t############# SORTING unmatched to NT BY LENGTH AND UNIQ AND LOOKUP ORIGINAL SEQUENCES  #################"
-	if [ $run_mode = "Comprehensive" ]
-	then
-		#SNN 140507 extractHeaderFromFastq.csh "$basef.NT.snap.unmatched.fastq" FASTQ "$basef.cutadapt.fastq" "$basef.NT.snap.unmatched.fulllength.fastq"
-		sed "n;n;n;d" "$basef.NT.snap.unmatched.fulllength.fastq" | sed "n;n;d" | sed "s/^@/>/g" > "$basef.NT.snap.unmatched.fulllength.fasta"
-	fi
-	cat "$basef.NT.snap.unmatched.fulllength.fasta" | perl -e 'while (<>) {$h=$_; $s=<>; $seqs{$h}=$s;} foreach $header (reverse sort {length($seqs{$a}) <=> length($seqs{$b})} keys %seqs) {print $header.$seqs{$header}}' > $basef.NT.snap.unmatched.fulllength.sorted.fasta
-	if [ $run_mode = "Comprehensive" ]
-	then
-		echo -e "$(date)\t$scriptname\twe will be using 50 as the length of the cropped read for removing unique and low-complexity reads"
-		echo -e "$(date)\t$scriptname\tParameters: crop_reads.csh $basef.NT.snap.unmatched.fulllength.sorted.fasta 25 50 > $basef.NT.snap.unmatched.fulllength.sorted.cropped.fasta"
-		crop_reads.csh "$basef.NT.snap.unmatched.fulllength.sorted.fasta" 25 50 > "$basef.NT.snap.unmatched.fulllength.sorted.cropped.fasta"
-		echo -e "$(date)\t$scriptname\t*** reads cropped ***"
-		echo -e "$(date)\t$scriptname\tParameters: gt sequniq -seqit -force -o $basef.NT.snap.unmatched.fulllength.sorted.cropped.uniq.fasta $basef.NT.snap.unmatched.fulllength.sorted.cropped.fasta"
-		gt sequniq -seqit -force -o "$basef.NT.snap.unmatched.fulllength.sorted.cropped.uniq.fasta" "$basef.NT.snap.unmatched.fulllength.sorted.cropped.fasta"
-		echo -e "$(date)\t$scriptname\tParameters: extractAlltoFast.sh $basef.NT.snap.unmatched.fulllength.sorted.cropped.uniq.fasta FASTA $basef.NT.snap.unmatched.fulllength.fasta FASTA $basef.NT.snap.unmatched.uniq.fl.fasta FASTA"
-		extractAlltoFast.sh "$basef.NT.snap.unmatched.fulllength.sorted.cropped.uniq.fasta" FASTA "$basef.NT.snap.unmatched.fulllength.fasta" FASTA "$basef.NT.snap.unmatched.uniq.fl.fasta" FASTA #SNN140507
-	fi
-	echo -e "$(date)\t$scriptname\tDone uniquing full length sequences of unmatched to NT"
+
+	if [ ! -f "$basef.NT.snap.unmatched.uniq.fl.fasta" ]
+  then
+    # adjust filenames for FAST mode
+    grep "Viruses;" "$basef.NT.snap.matched.fulllength.all.annotated.sorted" > "$basef.NT.snap.matched.fl.Viruses.annotated"
+    grep "Bacteria;" "$basef.NT.snap.matched.fulllength.all.annotated.sorted" > "$basef.NT.snap.matched.fl.Bacteria.annotated"
+
+    ##SNN140507 cleanup bacterial reads
+    log "Parameters: ribo_snap_bac_euk.sh $basef.NT.snap.matched.fl.Bacteria.annotated BAC $cores $ribo_snap_bac_euk_directory"
+    ribo_snap_bac_euk.sh $basef.NT.snap.matched.fl.Bacteria.annotated BAC $cores $ribo_snap_bac_euk_directory #SNN140507
+    if [ $run_mode = "Comprehensive" ]
+    then
+      grep "Primates;" "$basef.NT.snap.matched.fulllength.all.annotated.sorted" > "$basef.NT.snap.matched.fl.Primates.annotated"
+      grep -v "Primates" "$basef.NT.snap.matched.fulllength.all.annotated.sorted" | grep "Mammalia" > "$basef.NT.snap.matched.fl.nonPrimMammal.annotated"
+      grep -v "Mammalia" "$basef.NT.snap.matched.fulllength.all.annotated.sorted" | grep "Chordata" > "$basef.NT.snap.matched.fl.nonMammalChordat.annotated"
+      grep -v "Chordata" "$basef.NT.snap.matched.fulllength.all.annotated.sorted" | grep "Eukaryota" > "$basef.NT.snap.matched.fl.nonChordatEuk.annotated"
+      ribo_snap_bac_euk.sh $basef.NT.snap.matched.fl.nonChordatEuk.annotated EUK $cores $ribo_snap_bac_euk_directory
+    fi
+    log "Done taxonomy retrieval"
+    log "Parameters: table_generator.sh $basef.NT.snap.matched.fl.Viruses.annotated SNAP Y Y Y Y>& $basef.table_generator_snap.matched.fl.log"
+    table_generator.sh "$basef.NT.snap.matched.fl.Viruses.annotated" SNAP Y Y Y Y>& "$basef.table_generator_snap.matched.fl.log"
+    if [ $run_mode = "Comprehensive" ]
+    then
+      ### convert to FASTQ and retrieve full-length sequences to add to unmatched SNAP for viral RAPSearch###
+      egrep -v "^@" "$basef.NT.snap.matched.fl.Viruses.annotated" | awk '{if($3 != "*") print "@"$1"\n"$10"\n""+"$1"\n"$11}' > $(echo "$basef.NT.snap.matched.fl.Viruses.annotated" | sed 's/\(.*\)\..*/\1/').fastq
+      log "Done: convert to FASTQ and retrieve full-length sequences for SNAP NT hits "
+    fi
+    log "############# SORTING unmatched to NT BY LENGTH AND UNIQ AND LOOKUP ORIGINAL SEQUENCES  #################"
+    if [ $run_mode = "Comprehensive" ]
+    then
+      #SNN 140507 extractHeaderFromFastq.csh "$basef.NT.snap.unmatched.fastq" FASTQ "$basef.cutadapt.fastq" "$basef.NT.snap.unmatched.fulllength.fastq"
+      sed "n;n;n;d" "$basef.NT.snap.unmatched.fulllength.fastq" | sed "n;n;d" | sed "s/^@/>/g" > "$basef.NT.snap.unmatched.fulllength.fasta"
+    fi
+    cat "$basef.NT.snap.unmatched.fulllength.fasta" | perl -e 'while (<>) {$h=$_; $s=<>; $seqs{$h}=$s;} foreach $header (reverse sort {length($seqs{$a}) <=> length($seqs{$b})} keys %seqs) {print $header.$seqs{$header}}' > $basef.NT.snap.unmatched.fulllength.sorted.fasta
+    if [ $run_mode = "Comprehensive" ]
+    then
+      log "we will be using 50 as the length of the cropped read for removing unique and low-complexity reads"
+      log "Parameters: crop_reads.csh $basef.NT.snap.unmatched.fulllength.sorted.fasta 25 50 > $basef.NT.snap.unmatched.fulllength.sorted.cropped.fasta"
+      crop_reads.csh "$basef.NT.snap.unmatched.fulllength.sorted.fasta" 25 50 > "$basef.NT.snap.unmatched.fulllength.sorted.cropped.fasta"
+      log "*** reads cropped ***"
+      log "Parameters: gt sequniq -seqit -force -o $basef.NT.snap.unmatched.fulllength.sorted.cropped.uniq.fasta $basef.NT.snap.unmatched.fulllength.sorted.cropped.fasta"
+      gt sequniq -seqit -force -o "$basef.NT.snap.unmatched.fulllength.sorted.cropped.uniq.fasta" "$basef.NT.snap.unmatched.fulllength.sorted.cropped.fasta"
+      log "Parameters: extractAlltoFast.sh $basef.NT.snap.unmatched.fulllength.sorted.cropped.uniq.fasta FASTA $basef.NT.snap.unmatched.fulllength.fasta FASTA $basef.NT.snap.unmatched.uniq.fl.fasta FASTA"
+      extractAlltoFast.sh "$basef.NT.snap.unmatched.fulllength.sorted.cropped.uniq.fasta" FASTA "$basef.NT.snap.unmatched.fulllength.fasta" FASTA "$basef.NT.snap.unmatched.uniq.fl.fasta" FASTA #SNN140507
+    fi
+    log "Done uniquing full length sequences of unmatched to NT"
+  fi
 fi
+
 ####################### DENOVO CONTIG ASSEMBLY #####
 if [ $run_mode = "Comprehensive" ]
 then
-	echo -e "$(date)\t$scriptname\t######### Running ABYSS and Minimus #########"
+	log "######### Running ABYSS and Minimus #########"
 	START_deNovo=$(date +%s)
-	echo -e "$(date)\t$scriptname\tAdding matched viruses to NT unmatched" 
+	log "Adding matched viruses to NT unmatched"
 	sed "n;n;n;d" "$basef.NT.snap.matched.fl.Viruses.fastq" | sed "n;n;d" | sed "s/^@/>/g" | sed 's/>/>Vir/g' > "$basef.NT.snap.matched.fl.Viruses.fasta"
 	gt sequniq -seqit -force -o "$basef.NT.snap.matched.fl.Viruses.uniq.fasta" "$basef.NT.snap.matched.fl.Viruses.fasta"
 	cat "$basef.NT.snap.unmatched.uniq.fl.fasta" "$basef.NT.snap.matched.fl.Viruses.uniq.fasta" > "$basef.NT.snap.unmatched_addVir_uniq.fasta"
-	echo -e "$(date)\t$scriptname\tStarting deNovo assembly"
-	echo -e "$(date)\t$scriptname\tParameters: abyss_minimus.sh $basef.NT.snap.unmatched_addVir_uniq.fasta $length $contigcutoff $cores $abysskmer $ignore_barcodes_for_de_novo"
+	log "Starting deNovo assembly"
+	log "Parameters: abyss_minimus.sh $basef.NT.snap.unmatched_addVir_uniq.fasta $length $contigcutoff $cores $abysskmer $ignore_barcodes_for_de_novo"
 	abyss_minimus.sh "$basef.NT.snap.unmatched_addVir_uniq.fasta" "$length" "$contigcutoff" "$cores" "$abysskmer" "$ignore_barcodes_for_de_novo"
-	echo -e "$(date)\t$scriptname\tCompleted deNovo assembly: generated all.$basef.NT.snap.unmatched_addVir_uniq.fasta.unitigs.cut${length}.${contigcutoff}-mini.fa"
+  log "Completed deNovo assembly: generated all.$basef.NT.snap.unmatched_addVir_uniq.fasta.unitigs.cut${length}.${contigcutoff}-mini.fa"
 	END_deNovo=$(date +%s)
 	diff_deNovo=$(( END_deNovo - START_deNovo ))
-	echo -e "$(date)\t$scriptname\tdeNovo Assembly took $diff_deNovo seconds." | tee -a timing.$basef.log
+	log "deNovo Assembly took $diff_deNovo seconds." | tee -a timing.$basef.log
 fi
 #######RAPSearch#####
 #################### RAPSearch to Vir ###########
@@ -979,59 +959,55 @@ then
 	then
 		if [ -f "$basef.NT.snap.unmatched.uniq.fl.fasta" ]
 		then
-			echo -e "$(date)\t$scriptname\t############# RAPSearch to ${RAPSearch_VIRUS_db} ON NT-UNMATCHED SEQUENCES #################"
-			if [[ $dropcache == "Y" ]]
-			then
-				dropcache
-			fi
-			echo -e "$(date)\t$scriptname\tStarting: RAPSearch $basef.NT.snap.unmatched.uniq.fl.fasta "
+			log "############# RAPSearch to ${RAPSearch_VIRUS_db} ON NT-UNMATCHED SEQUENCES #################"
+			log "Starting: RAPSearch $basef.NT.snap.unmatched.uniq.fl.fasta "
 			START14=$(date +%s)
-			echo -e "$(date)\t$scriptname\tParameters: rapsearch -q $basef.NT.snap.unmatched.uniq.fl.fasta -d $RAPSearch_VIRUS_db -o $basef.$rapsearch_database.RAPSearch.e1 -z $cores -e $ecutoff_Vir -v 1 -b 1 -t N >& $basef.$rapsearch_database.RAPSearch.log"
+			log "Parameters: rapsearch -q $basef.NT.snap.unmatched.uniq.fl.fasta -d $RAPSearch_VIRUS_db -o $basef.$rapsearch_database.RAPSearch.e1 -z $cores -e $ecutoff_Vir -v 1 -b 1 -t N >& $basef.$rapsearch_database.RAPSearch.log"
 			rapsearch -q "$basef.NT.snap.unmatched.uniq.fl.fasta" -d $RAPSearch_VIRUS_db -o $basef.$rapsearch_database.RAPSearch.e${ecutoff_Vir} -z "$cores" -e "$ecutoff_Vir" -v 1 -b 1 -t N >& $basef.$rapsearch_database.RAPSearch.log
-			echo -e "$(date)\t$scriptname\tDone RAPSearch"
+			log "Done RAPSearch"
 			END14=$(date +%s)
 			diff=$(( END14 - START14 ))
-			echo -e "$(date)\t$scriptname\tRAPSearch to Vir Took $diff seconds"
-			echo -e "$(date)\t$scriptname\tStarting: add FASTA sequences to RAPSearch m8 output file "
+			log "RAPSearch to Vir Took $diff seconds"
+			log "Starting: add FASTA sequences to RAPSearch m8 output file "
 			START15=$(date +%s)
 			sed -i '/^#/d' $basef.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.m8
 			seqtk subseq $basef.NT.snap.unmatched.uniq.fl.fasta $basef.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.m8 > $basef.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.m8.fasta
 
 			sed '/>/d' $basef.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.m8.fasta >  $basef.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.m8.fasta.seq
 			paste $basef.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.m8 $basef.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.m8.fasta.seq > $basef.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.addseq.m8
-			echo -e "$(date)\t$scriptname\tParameters: taxonomy_lookup.pl $basef.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.addseq.m8 blast prot $cores $taxonomy_db_directory"
+			log "Parameters: taxonomy_lookup.pl $basef.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.addseq.m8 blast prot $cores $taxonomy_db_directory"
 			taxonomy_lookup.pl $basef.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.addseq.m8 blast prot $cores $taxonomy_db_directory
 			mv $basef.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.addseq.all.annotated $basef.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.annotated
-			echo -e "$(date)\t$scriptname\tParameters: table_generator.sh $basef.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.annotated RAP N Y N N"
+			log "Parameters: table_generator.sh $basef.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.annotated RAP N Y N N"
 			table_generator.sh $basef.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.annotated RAP N Y N N
-			echo -e "$(date)\t$scriptname\tDone: converting RAPSearch Vir output to fasta"
+			log "Done: converting RAPSearch Vir output to fasta"
 			END15=$(date +%s)
 			diff=$(( END15 - START15 ))
-			echo -e "$(date)\t$scriptname\tConverting RAPSearch Vir output to fasta sequences took $diff seconds." | tee -a timing.$basef.log
+			log "Converting RAPSearch Vir output to fasta sequences took $diff seconds." | tee -a timing.$basef.log
 			cat "$basef.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.m8.fasta" "all.$basef.NT.snap.unmatched_addVir_uniq.fasta.unitigs.cut${length}.${contigcutoff}-mini.fa" > "$basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.m8.fasta"
 		else
-			echo -e "$(date)\t$scriptname\tCannot run viral RAPSearch - necessary input file ($basef.$rapsearch_database.RAPSearch.e$ecutoff_Vir.m8) does not exist"
-			echo -e "$(date)\t$scriptname\tconcatenating RAPSearchvirus output and Contigs"
+			log "Cannot run viral RAPSearch - necessary input file ($basef.$rapsearch_database.RAPSearch.e$ecutoff_Vir.m8) does not exist"
+			log "concatenating RAPSearchvirus output and Contigs"
 		fi
-		echo -e "$(date)\t$scriptname\t############# Cleanup RAPSearch Vir by RAPSearch to NR #################"
+		log "############# Cleanup RAPSearch Vir by RAPSearch to NR #################"
 		if [ -f $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.m8.fasta ]
 		then
-			echo -e "$(date)\t$scriptname\tStarting: RAPSearch to $RAPSearch_NR_db of $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.m8.fasta :"
+			log "Starting: RAPSearch to $RAPSearch_NR_db of $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.m8.fasta :"
 			START16=$(date +%s)
-			echo -e "$(date)\t$scriptname\tParameters: rapsearch -q $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.m8.fasta -d $RAPSearch_NR_db -o $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.NR.e${ecutoff_NR} -z $cores -e $ecutoff_NR -v 1 -b 1 -t N -a T"
+			log "Parameters: rapsearch -q $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.m8.fasta -d $RAPSearch_NR_db -o $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.NR.e${ecutoff_NR} -z $cores -e $ecutoff_NR -v 1 -b 1 -t N -a T"
 			rapsearch -q $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.m8.fasta -d $RAPSearch_NR_db -o $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.NR.e${ecutoff_NR} -z $cores -e $ecutoff_NR -v 1 -b 1 -t N -a T
-			echo -e "$(date)\t$scriptname\trapsearch to nr done"
+			log "rapsearch to nr done"
 			sed -i '/^#/d' $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.NR.e${ecutoff_NR}.m8
-			echo -e "$(date)\t$scriptname\tremoved extra #"
+			log "removed extra #"
 			END16=$(date +%s)
 			diff=$(( END16 - START16 ))
-			echo -e "$(date)\t$scriptname\tRAPSearch to NR took $diff seconds." | tee -a timing.$basef.log
-			echo -e "$(date)\t$scriptname\tStarting: Seq retrieval and Taxonomy $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.NR.e${ecutoff_NR}"
+			log "RAPSearch to NR took $diff seconds." | tee -a timing.$basef.log
+			log "Starting: Seq retrieval and Taxonomy $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.NR.e${ecutoff_NR}"
 			START17=$(date +%s)
 			seqtk subseq $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.m8.fasta \
 					$basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.NR.e${ecutoff_NR}.m8 > \
 					$basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.NR.e${ecutoff_NR}.m8.fasta
-			echo -e "$(date)\t$scriptname\tretrieved sequences"
+			log "retrieved sequences"
 			cat $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.NR.e${ecutoff_NR}.m8.fasta | \
 					awk '{if (substr($0,1,1)==">"){if (p){print "\n";} print $0} else printf("%s",$0);p++;}END{print "\n"}' | \
 					sed '/^$/d' | \
@@ -1039,30 +1015,30 @@ then
 			paste $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e$ecutoff_Vir.NR.e${ecutoff_NR}.m8 \
 					$basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.NR.e${ecutoff_NR}.m8.fasta.seq > \
 					$basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.NR.e${ecutoff_NR}.addseq.m8
-			echo -e "$(date)\t$scriptname\tmade addseq file"
-			echo -e "$(date)\t$scriptname\t############# RAPSearch Taxonomy"
-			echo -e "$(date)\t$scriptname\tParameters: taxonomy_lookup.pl $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.NR.e${ecutoff_NR}.addseq.m8 blast prot $cores $taxonomy_db_directory"
+			log "made addseq file"
+			log "############# RAPSearch Taxonomy"
+			log "Parameters: taxonomy_lookup.pl $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.NR.e${ecutoff_NR}.addseq.m8 blast prot $cores $taxonomy_db_directory"
 			taxonomy_lookup.pl $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.NR.e${ecutoff_NR}.addseq.m8 blast prot $cores $taxonomy_db_directory
 			cp $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.NR.e${ecutoff_NR}.addseq.all.annotated $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.NR.e${ecutoff_NR}.annotated
-			echo -e "$(date)\t$scriptname\tretrieved taxonomy"
+			log "retrieved taxonomy"
 			grep "Viruses" $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.NR.e${ecutoff_NR}.annotated > $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.NR.e${ecutoff_NR}.Viruses.annotated
 			egrep "^contig" $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.NR.e${ecutoff_NR}.annotated > $basef.Contigs.NR.RAPSearch.e${ecutoff_NR}.annotated
-			echo -e "$(date)\t$scriptname\textracted RAPSearch taxonomy"
-			echo -e "$(date)\t$scriptname\tStarting Readcount table"
-			echo -e "$(date)\t$scriptname\tParameters: table_generator.sh $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.NR.e${ecutoff_NR}.Viruses.annotated RAP Y Y Y Y"
+			log "extracted RAPSearch taxonomy"
+			log "Starting Readcount table"
+			log "Parameters: table_generator.sh $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.NR.e${ecutoff_NR}.Viruses.annotated RAP Y Y Y Y"
 			table_generator.sh $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.NR.e${ecutoff_NR}.Viruses.annotated RAP Y Y Y Y
-			echo -e "$(date)\t$scriptname\tParameters: table_generator.sh $basef.Contigs.NR.RAPSearch.e${ecutoff_NR}.annotated RAP Y Y Y Y"
+			log "Parameters: table_generator.sh $basef.Contigs.NR.RAPSearch.e${ecutoff_NR}.annotated RAP Y Y Y Y"
 			table_generator.sh $basef.Contigs.NR.RAPSearch.e${ecutoff_NR}.annotated RAP Y Y Y Y
 			#allow contigs to be incorporated into coverage maps by making contig barcodes the same as non-contig barcodes (removing the @)
 			sed 's/@//g' $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.NR.e${ecutoff_NR}.Viruses.annotated > $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.NR.e${ecutoff_NR}.Viruses.annotated.bar.inc
-			echo -e "$(date)\t$scriptname\tmaking coverage maps"
+			log "making coverage maps"
 			# coverage_generator_bp.sh (divides each fasta file into $cores cores then runs BLASTn using one core each.
-			echo -e "$(date)\t$scriptname\tParameters: coverage_generator_bp.sh $basef.NT.snap.matched.fl.Viruses.annotated $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.NR.e${ecutoff_NR}.Viruses.annotated.bar.inc $eBLASTn $cores 10 1 $basef"
+			log "Parameters: coverage_generator_bp.sh $basef.NT.snap.matched.fl.Viruses.annotated $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.NR.e${ecutoff_NR}.Viruses.annotated.bar.inc $eBLASTn $cores 10 1 $basef"
 			coverage_generator_bp.sh $basef.NT.snap.matched.fl.Viruses.annotated $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.NR.e${ecutoff_NR}.Viruses.annotated.bar.inc $eBLASTn $cores 10 1 $basef
 
 			awk '{print$1}'  $basef.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.annotated > $basef.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.annotated.header
 			awk '{print$1}' $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.NR.e${ecutoff_NR}.annotated > $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.NR.e${ecutoff_NR}.annotated.header
-			# find headers in viral rapsearch that are no longer found in rapsearch to nr 
+			# find headers in viral rapsearch that are no longer found in rapsearch to nr
 			sort $basef.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.annotated.header \
 					$basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.NR.e${ecutoff_NR}.annotated.header | \
 					uniq -d | \
@@ -1077,38 +1053,38 @@ then
 			cat $basef.not.in.NR.[a-z][a-z][a-z][a-z][a-z][a-z].annotated > $basef.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.annotated.not.in.NR.annotated
 			rm -r $basef.not.in.NR.[a-z][a-z][a-z][a-z][a-z][a-z]
 			rm -r $basef.not.in.NR.[a-z][a-z][a-z][a-z][a-z][a-z].annotated
-			echo -e "$(date)\t$scriptname\tParameters: table_generator.sh $basef.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.annotated.not.in.NR.annotated RAP N Y N N"
+			log "Parameters: table_generator.sh $basef.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.annotated.not.in.NR.annotated RAP N Y N N"
 			table_generator.sh $basef.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.annotated.not.in.NR.annotated RAP N Y N N
 
 			END17=$(date +%s)
 			diff=$(( END17 - START17 ))
-			echo -e "$(date)\t$scriptname\tRAPSearch seq retrieval, taxonomy and readcount took $diff seconds" | tee -a timing.$basef.log
+			log "RAPSearch seq retrieval, taxonomy and readcount took $diff seconds" | tee -a timing.$basef.log
 		else
-			echo -e "$(date)\t$scriptname\tCannot run RAPSearch to NR - necessary input file ($basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.m8.fasta) does not exist"
+			log "Cannot run RAPSearch to NR - necessary input file ($basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.m8.fasta) does not exist"
 		fi
 	fi
 	##################RAPSearch to NR #######
 	if [ "$rapsearch_database" == "NR" ]
 	then
-		echo -e "$(date)\t$scriptname\t#################### RAPSearch to NR ###########"
+		log "#################### RAPSearch to NR ###########"
 		cat "$basef.NT.snap.unmatched.uniq.fl.fasta" "all.$basef.NT.snap.unmatched_addVir_uniq.fasta.unitigs.cut${length}.${contigcutoff}-mini.fa" > "$basef.Contigs.NT.snap.unmatched.uniq.fl.fasta"
 		if [ -f $basef.Contigs.NT.snap.unmatched.uniq.fl.fasta ]
 		then
-			echo -e "$(date)\t$scriptname\t############# RAPSearch to NR #################"
-			echo -e "$(date)\t$scriptname\tStarting: RAPSearch to NR $basef.Contigs.NT.snap.unmatched.uniq.fl.fasta"
+			log "############# RAPSearch to NR #################"
+			log "Starting: RAPSearch to NR $basef.Contigs.NT.snap.unmatched.uniq.fl.fasta"
 			START16=$(date +%s)
-			echo -e "$(date)\t$scriptname\tParameters:rapsearch -q $basef.Contigs.NT.snap.unmatched.uniq.fl.fasta -d $RAPSearch_NR_db -o $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_NR} -z $cores -e $ecutoff_NR -v 1 -b 1 -t N -a $RAPSearch_NR_fast_mode"
+			log "Parameters:rapsearch -q $basef.Contigs.NT.snap.unmatched.uniq.fl.fasta -d $RAPSearch_NR_db -o $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_NR} -z $cores -e $ecutoff_NR -v 1 -b 1 -t N -a $RAPSearch_NR_fast_mode"
 			rapsearch -q "$basef.Contigs.NT.snap.unmatched.uniq.fl.fasta" -d "$RAPSearch_NR_db" -o "$basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_NR}" -z $cores -e $ecutoff_NR -v 1 -b 1 -t N -a $RAPSearch_NR_fast_mode
-			echo -e "$(date)\t$scriptname\tRAPSearch to NR done"
+			log "RAPSearch to NR done"
 			sed -i '/^#/d' $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_NR}.m8
-			echo -e "$(date)\t$scriptname\tremoved extra #"
+			log "removed extra #"
 			END16=$(date +%s)
 			diff=$(( END16 - START16 ))
-			echo -e "$(date)\t$scriptname\tRAPSearch to NR took $diff seconds" | tee -a timing.$basef.log
-			echo -e "$(date)\t$scriptname\tStarting: Seq retrieval and Taxonomy $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_NR}"
+			log "RAPSearch to NR took $diff seconds" | tee -a timing.$basef.log
+			log "Starting: Seq retrieval and Taxonomy $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_NR}"
 			START17=$(date +%s)
 			seqtk subseq $basef.Contigs.NT.snap.unmatched.uniq.fl.fasta $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_NR}.m8 > $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_NR}.m8.fasta
-			echo -e "$(date)\t$scriptname\tretrieved sequences"
+			log "retrieved sequences"
 			cat $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_NR}.m8.fasta | \
 				awk '{if (substr($0,1,1)==">"){if (p){print "\n";} print $0} else printf("%s",$0);p++;}END{print "\n"}' | \
 				sed '/^$/d' | \
@@ -1118,40 +1094,36 @@ then
 			paste $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_NR}.m8 \
 					$basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_NR}.m8.fasta.seq > \
 					$basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_NR}.addseq.m8
-			echo -e "$(date)\t$scriptname\tmade addseq file"
-			echo -e "$(date)\t$scriptname\t############# RAPSearch Taxonomy"
-			echo -e "$(date)\t$scriptname\tParameters: taxonomy_lookup.pl $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_NR}.addseq.m8 blast prot $cores $taxonomy_db_directory"
+			log "made addseq file"
+			log "############# RAPSearch Taxonomy"
+			log "Parameters: taxonomy_lookup.pl $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_NR}.addseq.m8 blast prot $cores $taxonomy_db_directory"
 			taxonomy_lookup.pl $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_NR}.addseq.m8 blast prot $cores $taxonomy_db_directory
 			cp $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_NR}.addseq.all.annotated $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_NR}.annotated
-			echo -e "$(date)\t$scriptname\tretrieved taxonomy"
+			log "retrieved taxonomy"
 			grep "Viruses" $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_NR}.annotated > $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_NR}.Viruses.annotated
-			egrep "^contig" $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_NR}.annotated > $basef.Contigs.$rapsearch_database.RAPSearch.e${ecutoff_NR}.annotated 
-			echo -e "$(date)\t$scriptname\textracted RAPSearch taxonomy"
-			echo -e "$(date)\t$scriptname\tStarting Readcount table"
-			echo -e "$(date)\t$scriptname\tParameters: table_generator.sh $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_NR}.Viruses.annotated RAP Y Y Y Y"
+			egrep "^contig" $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_NR}.annotated > $basef.Contigs.$rapsearch_database.RAPSearch.e${ecutoff_NR}.annotated
+			log "extracted RAPSearch taxonomy"
+			log "Starting Readcount table"
+			log "Parameters: table_generator.sh $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_NR}.Viruses.annotated RAP Y Y Y Y"
 			table_generator.sh $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_NR}.Viruses.annotated RAP Y Y Y Y
 			grep -v Viruses $basef.Contigs.$rapsearch_database.RAPSearch.e${ecutoff_NR}.annotated > $basef.Contigs.$rapsearch_database.RAPSearch.e${ecutoff_NR}.noVir.annotated
-			echo -e "$(date)\t$scriptname\tParameters: table_generator.sh $basef.Contigs.$rapsearch_database.RAPSearch.e${ecutoff_NR}.noVir.annotated RAP N Y N N"
+			log "Parameters: table_generator.sh $basef.Contigs.$rapsearch_database.RAPSearch.e${ecutoff_NR}.noVir.annotated RAP N Y N N"
 			table_generator.sh $basef.Contigs.$rapsearch_database.RAPSearch.e${ecutoff_NR}.noVir.annotated RAP N Y N N
 			sed 's/@//g' $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_NR}.Viruses.annotated > $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_NR}.Viruses.annotated.bar.inc
-			echo -e "$(date)\t$scriptname\tmaking coverage maps"
-			echo -e "$(date)\t$scriptname\tParameters: coverage_generator_bp.sh $basef.NT.snap.matched.fl.Viruses.annotated $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_NR}.Viruses.annotated.bar.inc $eBLASTn 10 10 1 $basef"
+			log "making coverage maps"
+			log "Parameters: coverage_generator_bp.sh $basef.NT.snap.matched.fl.Viruses.annotated $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_NR}.Viruses.annotated.bar.inc $eBLASTn 10 10 1 $basef"
 			coverage_generator_bp.sh $basef.NT.snap.matched.fl.Viruses.annotated $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_NR}.Viruses.annotated.bar.inc $eBLASTn 10 10 1 $basef
 			END17=$(date +%s)
 			diff=$(( END17 - START17 ))
-			echo -e "$(date)\t$scriptname\tRAPSearch seq retrieval, taxonomy and table readcount and coverage took $diff seconds." | tee -a timing.$basef.log
+			log "RAPSearch seq retrieval, taxonomy and table readcount and coverage took $diff seconds." | tee -a timing.$basef.log
 		else
-			echo -e "$(date)\t$scriptname\tCannot run RAPSearch to NR - necessary input file ($basef.Contigs.NT.snap.unmatched.uniq.fl.fasta) does not exist"
+			log "Cannot run RAPSearch to NR - necessary input file ($basef.Contigs.NT.snap.unmatched.uniq.fl.fasta) does not exist"
 		fi
 	fi
-	if [[ $dropcache == "Y" ]]
-	then
-		dropcache
-	fi
 fi
 
 ############################# OUTPUT FINAL COUNTS #############################
-echo -e "$(date)\t$scriptname\tStarting: generating readcounts.$basef.log report"
+log "Starting: generating readcounts.$basef.log report"
 START_READCOUNT=$(date +%s)
 
 headerid_top=$(head -1 $basef.fastq | cut -c1-4)
@@ -1162,8 +1134,8 @@ if [ "$headerid_top" == "$headerid_bottom" ]
 # We should adjust this code to check that all headers are unique, rather than just the first and last
 then
 	headerid=$(head -1 $basef.fastq | cut -c1-4 | sed 's/@//g')
-	echo -e "$(date)\t$scriptname\theaderid_top $headerid_top = headerid_bottom $headerid_bottom and headerid = $headerid"
-	echo -e "$(date)\t$scriptname\tParameters: readcount.sh $basef $headerid Y $basef.fastq $basef.preprocessed.fastq $basef.preprocessed.s20.h250n25d12xfu.human.snap.unmatched.fastq $basef.NT.snap.matched.fulllength.all.annotated.sorted $basef.NT.snap.matched.fl.Viruses.annotated $basef.NT.snap.matched.fl.Bacteria.annotated $basef.NT.snap.matched.fl.nonChordatEuk.annotated $basef.NT.snap.unmatched.sam $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.NR.e${ecutoff_NR}.Viruses.annotated"
+	log "headerid_top $headerid_top = headerid_bottom $headerid_bottom and headerid = $headerid"
+	log "Parameters: readcount.sh $basef $headerid Y $basef.fastq $basef.preprocessed.fastq $basef.preprocessed.s20.h250n25d12xfu.human.snap.unmatched.fastq $basef.NT.snap.matched.fulllength.all.annotated.sorted $basef.NT.snap.matched.fl.Viruses.annotated $basef.NT.snap.matched.fl.Bacteria.annotated $basef.NT.snap.matched.fl.nonChordatEuk.annotated $basef.NT.snap.unmatched.sam $basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.NR.e${ecutoff_NR}.Viruses.annotated"
 	readcount.sh $basef $headerid Y $basef.fastq \
 			$basef.preprocessed.fastq \
 			$basef.preprocessed.s20.h250n25d12xfu.human.snap.unmatched.fastq \
@@ -1173,18 +1145,18 @@ then
 			$basef.NT.snap.matched.fl.nonChordatEuk.annotated \
 			$basef.NT.snap.unmatched.sam \
 			$basef.Contigs.and.NTunmatched.$rapsearch_database.RAPSearch.e${ecutoff_Vir}.NR.e${ecutoff_NR}.Viruses.annotated
-	echo -e "$(date)\t$scriptname\tDone: generating readcounts.$basef.log report"
+	log "Done: generating readcounts.$basef.log report"
 	END_READCOUNT=$(date +%s)
 	diff_READCOUNT=$(( END_READCOUNT - START_READCOUNT ))
-	echo -e "$(date)\t$scriptname\tGenerating read count report Took $diff_READCOUNT seconds" | tee -a timing.$basef.log
+	log "Generating read count report Took $diff_READCOUNT seconds" | tee -a timing.$basef.log
 else
-	echo -e "$(date)\t$scriptname\treadcount.sh aborted due to non-unique header id"
+	log "readcount.sh aborted due to non-unique header id"
 fi
 
-echo -e "$(date)\t$scriptname\t#################### SURPI PIPELINE COMPLETE ##################"
+log "#################### SURPI PIPELINE COMPLETE ##################"
 END_PIPELINE=$(date +%s)
 diff_PIPELINE=$(( END_PIPELINE - START_PIPELINE ))
-echo -e "$(date)\t$scriptname\tTotal run time of pipeline took $diff_PIPELINE seconds" | tee -a timing.$basef.log
+log "Total run time of pipeline took $diff_PIPELINE seconds" | tee -a timing.$basef.log
 
 echo "Script and Parameters = $0 $@ " > $basef.pipeline_parameters.log
 echo "Raw Read quality = $quality" >> $basef.pipeline_parameters.log
diff --git a/abyss_minimus.sh b/abyss_minimus.sh
index ed14d85..99d62d7 100755
--- a/abyss_minimus.sh
+++ b/abyss_minimus.sh
@@ -15,8 +15,9 @@
 # Copyright (C) 2014 Samia N Naccache - All Rights Reserved
 # SURPI has been released under a modified BSD license.
 # Please see license file for details.
-
 scriptname=${0##*/}
+source debug.sh
+source logging.sh
 
 if [ $# -lt 5 ]
 then
@@ -34,7 +35,7 @@ ignore_barcodes=$6
 split_FASTA_size=100000
 
 #### create list of barcodes in the fasta file in the following format: #barcode
-echo -e "$(date)\t$scriptname\tDemultiplexing barcodes..."
+log "Demultiplexing barcodes..."
 START_DEMULTIPLEX=$(date +%s)
 
 #The sed '/^#$/d' removes a blank #. This is needed in cases where barcodes are present along with reads that have no barcodes
@@ -54,10 +55,10 @@ else #$inputfile.barcodes doesn't exist, or size=0, then we only have 1 barcode,
 	echo "#" > $inputfile.barcodes
 fi
 
-echo -e "$(date)\t$scriptname\tCompleted demultiplexing barcodes."
+log "Completed demultiplexing barcodes."
 END_DEMULTIPLEX=$(date +%s)
 diff_DEMULTIPLEX=$(( END_DEMULTIPLEX - START_DEMULTIPLEX ))
-echo -e "$(date)\t$scriptname\tBarcode demultiplex took $diff_DEMULTIPLEX s."
+log "Barcode demultiplex took $diff_DEMULTIPLEX s."
 ### generate fasta file for every separate barcode (demultiplex)
 
 for f in `cat $inputfile.barcodes` ; do
@@ -68,30 +69,33 @@ for f in `cat $inputfile.barcodes` ; do
 		grep -E "$f(/|$)" $inputfile -A 1 --no-group-separator > bar$f.$inputfile
 	fi
 	# split demultiplexed fasta file into 100,000 read sub-fastas
-	echo -e "$(date)\t$scriptname\tSplitting FASTA into chunks of size: $split_FASTA_size."
+	log "Splitting FASTA into chunks of size: $split_FASTA_size."
 	START_SPLIT=$(date +%s)
 
 	cp bar$f.$inputfile bar$f.${inputfile}_n # So that the unsplit demultiplexed file is also denovo assembled #
 	split_fasta.pl -i bar$f.$inputfile -o bar$f.$inputfile -n $split_FASTA_size
-	echo -e "$(date)\t$scriptname\tCompleted splitting FASTA file."
+	log "Completed splitting FASTA file."
 	END_SPLIT=$(date +%s)
 	diff_SPLIT=$(( $END_SPLIT - $START_SPLIT ))
-	echo -e "$(date)\t$scriptname\tSplitting FASTA took $diff_SPLIT s."
-	### run abyss (deBruijn assembler) on each 100,000 read demultiplexed fasta file, including the unsplit demultiplexed file 
-	echo -e "$(date)\t$scriptname\tRunning abyss on each $split_FASTA_size chunk..."
+	log "Splitting FASTA took $diff_SPLIT s."
+	### run abyss (deBruijn assembler) on each 100,000 read demultiplexed fasta file, including the unsplit demultiplexed file
+	log "Running abyss on each $split_FASTA_size chunk..."
 	START_ABYSS=$(date +%s)
 
-	for d in bar$f.${inputfile}_* ; do 
-		abyss-pe k=$kmer name=$d.f se=$d np=$cores >& $d.abyss.log
+	for d in bar$f.${inputfile}_* ; do
+    echo $d
+	  log "Command: abyss-pe k=$kmer name=$d.f se=$d np=$cores >& $d.abyss.log"
+		#abyss-pe k=$kmer name=$d.f se=$d np=$cores >& $d.abyss.log
+		abyss-pe k=$kmer name=$d.f se=$d >& $d.abyss.log
 	done
 
-	echo -e "$(date)\t$scriptname\tCompleted running abyss on each $split_FASTA_size chunk."
+	log "Completed running abyss on each $split_FASTA_size chunk."
 	END_ABYSS=$(date +%s)
 	diff_ABYSS=$(( END_ABYSS - START_ABYSS ))
-	echo -e "$(date)\t$scriptname\tAbyss took $diff_ABYSS s."
+	log "Abyss took $diff_ABYSS s."
 ###
 ### contigs from split files concatenated, after which reads smaller than the cutoff value are eliminated
-	echo -e "$(date)\t$scriptname\tStarting concatenating and cutoff of abyss output"
+	log "Starting concatenating and cutoff of abyss output"
 	START_CATCONTIGS=$(date +%s)
 
 	#concatenating contig files from different fasta splits, and adding kmer infromation and barcode information to contig headers
@@ -99,47 +103,48 @@ for f in `cat $inputfile.barcodes` ; do
 	# only contigs larger than $cutoff_post_Abyss are retained
  	cat all.bar$f.$inputfile.unitigs.fa | awk 'NR%2==1 {x = $0} NR%2==0 { if (length($0) >= '$2') printf("%s\n%s\n",x,$0)}' > all.bar$f.$inputfile.unitigs.cut$cutoff_post_Abyss.fa
 
-	echo -e "$(date)\t$scriptname\tDone concatenating and cutoff of abyss output"
+	log "Done concatenating and cutoff of abyss output"
 	END_CATCONTIGS=$(date +%s)
 	diff_CATCONTIGS=$(( END_CATCONTIGS - START_CATCONTIGS ))
-	echo -e "$(date)\t$scriptname\tConcatenating and cutoff of abyss output took $diff_CATCONTIGS s."
+	log "Concatenating and cutoff of abyss output took $diff_CATCONTIGS s."
 ###
 ### run Minimo (OLC assembler)
-	echo -e "$(date)\t$scriptname\tStarting Minimo..."
+	log "Starting Minimo..."
 	START_MINIMO=$(date +%s)
+  log "Command: Minimo all.bar$f.$inputfile.unitigs.cut$cutoff_post_Abyss.fa -D FASTA_EXP=1"
 	Minimo all.bar$f.$inputfile.unitigs.cut$cutoff_post_Abyss.fa -D FASTA_EXP=1
-	echo -e "$(date)\t$scriptname\tCompleted Minimo."
+	log "Completed Minimo."
 	END_MINIMO=$(date +%s)
 	diff_MINIMO=$(( END_MINIMO - START_MINIMO ))
-	echo -e "$(date)\t$scriptname\tMinimo took $diff_MINIMO s."
+	log "Minimo took $diff_MINIMO s."
 ###########
-	echo -e "$(date)\t$scriptname\tStarting cat barcode addition and cutoff of minimo output"
+	log "Starting cat barcode addition and cutoff of minimo output"
 	START_MIN_PROCESS=$(date +%s)
-	# Minimo output gives more than one line per sequence, so here we linearize sequences (linearization protocol from here http://seqanswers.com/forums/showthread.php?t=27567 ) 
+	# Minimo output gives more than one line per sequence, so here we linearize sequences (linearization protocol from here http://seqanswers.com/forums/showthread.php?t=27567 )
 	# then we add the relevant barcode to the end of the header contig. Contigs that survive untouched from abyss already have a barcode at the end of them, so that extra barcode is taken away
-	cat all-contigs.fa | awk '{if (substr($0,1,1)==">"){if (p){print "\n";} print $0} else printf("%s",$0);p++;}END{print "\n"}' | sed '/^$/d' | sed 's/ /_/g' | sed "s/$/"$f"/g;n" | sed "s/"$f""$f"/"$f"/g" | sed 's/#/#@/g' | sed 's/^>/>contig_/g' > all.bar$f.$inputfile.unitigs.cut$cutoff_post_Abyss-minim.fa	
-	# change generic name all-contigs.fa 
+	cat all-contigs.fa | awk '{if (substr($0,1,1)==">"){if (p){print "\n";} print $0} else printf("%s",$0);p++;}END{print "\n"}' | sed '/^$/d' | sed 's/ /_/g' | sed "s/$/"$f"/g;n" | sed "s/"$f""$f"/"$f"/g" | sed 's/#/#@/g' | sed 's/^>/>contig_/g' > all.bar$f.$inputfile.unitigs.cut$cutoff_post_Abyss-minim.fa
+	# change generic name all-contigs.fa
 	mv all-contigs.fa all-contigs.fa.$f
 	# only contigs larger than $cutoff_post_Minimo are retained
 	cat all.bar$f.$inputfile.unitigs.cut$cutoff_post_Abyss-minim.fa | awk 'NR%2==1 {x = $0} NR%2==0 { if (length($0) >= '$3') printf("%s\n%s\n",x,$0)}' > all.bar$f.$inputfile.unitigs.cut$cutoff_post_Abyss.${cutoff_post_Minimo}-mini.fa
 
-	echo -e "$(date)\t$scriptname\tDone cat barcode addition and cutoff of minimo output"
+	log "Done cat barcode addition and cutoff of minimo output"
 	END_MIN_PROCESS=$(date +%s)
 	diff_MIN_PROCESS=$(( END_MIN_PROCESS - START_MIN_PROCESS ))
-	echo -e "$(date)\t$scriptname\tcat barcode addition and cutoff of minimo output took $diff_MIN_PROCESS s"
+	log "cat barcode addition and cutoff of minimo output took $diff_MIN_PROCESS s"
 done
 ###
 ### concatenate deBruijn -> OLC contigs from all barcodes together
-echo -e "$(date)\t$scriptname\tConcatenating all barcodes together..."
+log "Concatenating all barcodes together..."
 START_CAT=$(date +%s)
 cat all.bar*.$inputfile.unitigs.cut$cutoff_post_Abyss.${cutoff_post_Minimo}-mini.fa > all.$inputfile.unitigs.cut$cutoff_post_Abyss.${cutoff_post_Minimo}-mini.fa
-echo -e "$(date)\t$scriptname\tCompleted concatenating all barcodes."
+log "Completed concatenating all barcodes."
 END_CAT=$(date +%s)
 diff_CAT=$(( END_CAT - START_CAT ))
-echo -e "$(date)\t$scriptname\tBarcode concatenation took $diff_CAT s."
+log "Barcode concatenation took $diff_CAT s."
 
 # cleaning up files by organizing directories, moving files into directories, and removing temporary files
-mkdir $inputfile.dir
+mkdir --parents $inputfile.dir
 mv bar*$inputfile*.fa $inputfile.dir
 if [ -e all.$inputfile.contigs.abyssmini.cut$cutoff_post_Abyss.$cutoff_post_Minimo.e1.NR.RAPSearch.m8 ]
 then
@@ -148,7 +153,7 @@ fi
 rm -f all.$inputfile.contigs.abyssmini.cut$cutoff_post_Abyss.$cutoff_post_Minimo.e1.NR.RAPSearch.aln
 rm -f all.$inputfile.contigs.abyssmini.e1.NR.RAPSearch.m8.noheader
 rm -f all.$inputfile.contigs.abyssmini.e1.NR.RAPSearch.m8.noheader.seq
-rm -f all.$inputfile.contigs.abyssmini.e1.NR.RAPSearch.m8.noheader 
+rm -f all.$inputfile.contigs.abyssmini.e1.NR.RAPSearch.m8.noheader
 rm -f all.$inputfile.contigs.abyssmini.e1.NR.RAPSearch.m8.noheader.ex.fa
 rm -f all.$inputfile.unitigs.cut$cutoff_post_Abyss-contigs.sortlen.seq
 rm -f all-contigs*
diff --git a/compare_multiple_sam.py b/compare_multiple_sam.py
index 1af36c8..ce151ed 100755
--- a/compare_multiple_sam.py
+++ b/compare_multiple_sam.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 #
 #	compare_multiple_sam.py
 #
@@ -9,7 +9,7 @@
 #
 # Copyright (C) 2014 Charles Y Chiu - All Rights Reserved
 # Permission to copy and modify is granted under the BSD license
-# Last revised 3/21/2014  
+# Last revised 3/21/2014
 
 import sys
 
@@ -61,7 +61,7 @@
 	lineList = []
 	for file in fileObjList:
 		lineList.append(file.readline())
-		
+
 for file in fileObjList:
 	file.close()
 outputFile.close()
diff --git a/compare_sam.py b/compare_sam.py
index 2bd65af..a8feb98 100755
--- a/compare_sam.py
+++ b/compare_sam.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 #
 #	compare_sam.py
 #
@@ -14,13 +14,13 @@
 import os
 
 def logHeader():
-	import os.path, sys, time
-	return "%s\t%s\t" % (time.strftime("%a %b %d %H:%M:%S %Z %Y"), os.path.basename(sys.argv[0]))
+    import os.path, sys, time
+    return "%s\t%s\t" % (time.strftime("%a %b %d %H:%M:%S %Z %Y"), os.path.basename(sys.argv[0]))
 
 usage = "compare_sam.py <annotated SAM file> <outputFile>"
 if len(sys.argv) != 3:
-	print usage
-	sys.exit(0)
+    print usage
+    sys.exit(0)
 
 SAMfile1 = sys.argv[1]
 outputFile1 = sys.argv[2]
@@ -41,63 +41,66 @@ def logHeader():
 linenum += 1
 
 while line1 != '':
-	data1 = line1.split()
-	 
-	# check to see whether line is in the SAM header or alignment section
-	if (data1[0][0]!="@"):
-		#We are in the SAM alignment section
-
-		edit_distance=int(data1[12].split(":")[2])
-	
-		#using old snap (<=v0.15.4), could use length of line to determine if it was a hit or miss.
-		#new snap (>=v1.0) instead uses an edit distance of -1 to signify a miss.
-	
-		if (edit_distance >= 0):	
-			#this is a hit
-			new_hits += 1
-			sum_edistance_new += int(data1[12].split(":")[2])
-			firstentry = data1[0].split("|")
-			if (len(firstentry)>=2):
-				# this has been previously flagged as a hit, and is also now a hit. Compare edit distances to find best hit.
-				edit_distance_previous=firstentry[1]
-				edit_distance_current = data1[12].split(":")[2]
-				
-				existing_hits += 1
-				sum_edistance_existing += int(edit_distance_previous)
-				
-				if (int(edit_distance_current) <= int(edit_distance_previous)):
-					replacements += 1
-					sum_data1 += int(edit_distance_current)
-					# need to pick the new gi, not the old gi, bug fix 1/27/13
-					gi=data1[2].split("|")[1]
-					replacement_line = line1.replace("|" + firstentry[1] + "|" + firstentry[2],"|" + str(edit_distance_current) + "|" + str(gi),1)
-					outputFile.write(replacement_line)
-				else:
-					sum_data1 += int(edit_distance_previous)
-					outputFile.write(line1)
-			else:
-				edit_distance_current = data1[12].split(":")[2]
-				existing_hits += 1
-				sum_data1 += int(edit_distance_current)
-				gi=data1[2].split("|")[1]
-				replacement_line = line1.replace(data1[0],data1[0] + "|" + str(edit_distance_current) + "|" + str(gi),1)
-				outputFile.write(replacement_line)
-				replacements+=1
-		else:
-			#this is a miss
-			outputFile.write(line1)
-			firstentry = data1[0].split("|")
-			if (len(firstentry)>=2):
-				edit_distance_previous=firstentry[1]
-				existing_hits += 1
-				sum_data1 += int(edit_distance_previous)
-				sum_edistance_existing += int(edit_distance_previous)
-	else:
-		#We are in the SAM header section
-		outputFile.write(line1)
-
-	line1 = file1.readline()
-	linenum += 1
+    data1 = line1.split()
+
+    # check to see whether line is in the SAM header or alignment section
+    if (data1[0][0]!="@"):
+        #We are in the SAM alignment section
+
+        edit_distance=int(data1[12].split(":")[2])
+        mapped = not bool(int(data1[1]) & 0x4)
+
+        #using old snap (<=v0.15.4), could use length of line to determine if it was a hit or miss.
+        #new snap (>=v1.0) instead uses an edit distance of -1 to signify a miss.
+
+        # Not sure what above is saying.
+        # Use 0x4 mask in FLAG field to check for unmapped reads.
+        if (mapped):
+            #this is a hit
+            new_hits += 1
+            sum_edistance_new += edit_distance
+            firstentry = data1[0].split("|")
+            if (len(firstentry)>=2):
+                # this has been previously flagged as a hit, and is also now a hit. Compare edit distances to find best hit.
+                edit_distance_previous=firstentry[1]
+                edit_distance_current = data1[12].split(":")[2]
+
+                existing_hits += 1
+                sum_edistance_existing += int(edit_distance_previous)
+
+                if (int(edit_distance_current) <= int(edit_distance_previous)):
+                    replacements += 1
+                    sum_data1 += int(edit_distance_current)
+                    # need to pick the new gi, not the old gi, bug fix 1/27/13
+                    gi=data1[2].split("|")[1]
+                    replacement_line = line1.replace("|" + firstentry[1] + "|" + firstentry[2],"|" + str(edit_distance_current) + "|" + str(gi),1)
+                    outputFile.write(replacement_line)
+                else:
+                    sum_data1 += int(edit_distance_previous)
+                    outputFile.write(line1)
+            else:
+                edit_distance_current = data1[12].split(":")[2]
+                existing_hits += 1
+                sum_data1 += int(edit_distance_current)
+                gi=data1[2].split("|")[1]
+                replacement_line = line1.replace(data1[0],data1[0] + "|" + str(edit_distance_current) + "|" + str(gi),1)
+                outputFile.write(replacement_line)
+                replacements+=1
+        else:
+            #this is a miss
+            outputFile.write(line1)
+            firstentry = data1[0].split("|")
+            if (len(firstentry)>=2):
+                edit_distance_previous=firstentry[1]
+                existing_hits += 1
+                sum_data1 += int(edit_distance_previous)
+                sum_edistance_existing += int(edit_distance_previous)
+    else:
+        #We are in the SAM header section
+        outputFile.write(line1)
+
+    line1 = file1.readline()
+    linenum += 1
 file1.close()
 outputFile.close()
 
diff --git a/coveragePlot.py b/coveragePlot.py
index c95981e..0c48713 100755
--- a/coveragePlot.py
+++ b/coveragePlot.py
@@ -1,8 +1,8 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 #
 #	coveragePlot.py
 #
-#	This program generates genomic coverage plots 
+#	This program generates genomic coverage plots
 #	Chiu Laboratory
 #	University of California, San Francisco
 #	January, 2014
@@ -47,7 +47,7 @@ def smart_truncate1(text, max_length=100, suffix='...'):
 
 # print "Installed version is: %s." % mpl_version
 
-#load function is deprecated as of matplotlib v1.3.1, replaced with 
+#load function is deprecated as of matplotlib v1.3.1, replaced with
 if (LooseVersion(mpl_version) >= LooseVersion('1.3.1') ):
 	data = np.loadtxt(dataFile)
 else:
@@ -78,7 +78,7 @@ def smart_truncate1(text, max_length=100, suffix='...'):
     plot(data[:,0],data[:,1],color)
     xlabel("base position",fontsize=8)
     ylabel("fold coverage",fontsize=8)
-    title_text = sys.argv[2] 
+    title_text = sys.argv[2]
     suptitle(title_text,fontsize=9)
     xMin, xMax, yMin, yMax = min(data[:,0]),max(data[:,0]),min(data[:,1]),max(data[:,1])
     # add a 10% buffer to yMax
@@ -105,7 +105,7 @@ def smart_truncate1(text, max_length=100, suffix='...'):
     plot(data[:,0],data[:,1],color)
     xlabel("base position",fontsize=8)
     ylabel("fold coverage",fontsize=8)
-    title_text = sys.argv[2] 
+    title_text = sys.argv[2]
     suptitle(title_text,fontsize=9)
     xMin, xMax, yMin, yMax = min(data[:,0]),max(data[:,0]),min(data[:,1]),max(data[:,1])
     yMax *= 1.1
@@ -122,7 +122,7 @@ def smart_truncate1(text, max_length=100, suffix='...'):
     plot(data[:,0],data[:,1],color)
     xlabel("base position",fontsize=8)
     ylabel("fold coverage",fontsize=8)
-    title_text = sys.argv[2] 
+    title_text = sys.argv[2]
     suptitle(title_text,fontsize=9)
     xMin, xMax, yMin, yMax = min(data[:,0]),max(data[:,0]),min(data[:,1]),max(data[:,1])
     yMax *= 1.1
diff --git a/coverage_generator_bp.sh b/coverage_generator_bp.sh
index 9510a8d..6f13feb 100755
--- a/coverage_generator_bp.sh
+++ b/coverage_generator_bp.sh
@@ -93,7 +93,7 @@ do
 		# create list of curated GIs $SNAP_file.$genus.gi.list.curatedgenome
 		# First retain only gis that are complete genomes if no GIs with "complete genomes" are returned, we'll go to GIs  "complete sequences", if no complete sequences, then we just go with the GIs we have
 
-		get_genbankfasta.pl -i bar.$bar.$SNAP_file.$genus.gi.list  > bar.$bar.$SNAP_file.$genus.gi.headers 
+		get_genbankfasta.pl -i bar.$bar.$SNAP_file.$genus.gi.list  > bar.$bar.$SNAP_file.$genus.gi.headers
 
 		egrep ">gi.*[Cc]omplete [Gg]enome" bar.$bar.$SNAP_file.$genus.gi.headers  | awk -F "|" '{print$2}'  | head -n $top_gis > bar.$bar.$SNAP_file.$genus.gi
 		if [ -s bar.$bar.$SNAP_file.$genus.gi ]
diff --git a/create_SURPI_data.sh b/create_SURPI_data.sh
index 8e13c42..93a1cd9 100755
--- a/create_SURPI_data.sh
+++ b/create_SURPI_data.sh
@@ -15,7 +15,7 @@
 # Copyright (C) 2014 Scot Federman - All Rights Reserved
 # SURPI has been released under a modified BSD license.
 # Please see license file for details.
-# Last revised 7/7/2014  
+# Last revised 7/7/2014
 
 scriptname=${0##*/}
 green='\e[0;32m'
@@ -23,8 +23,9 @@ red='\e[0;31m'
 endColor='\e[0m'
 
 DATE=$(date +%m%d%Y)
-db_dir="NCBI_$DATE"
-curated_dir="curated_$DATE"
+echo $DATE > DATE.txt
+db_dir="NCBI"
+curated_dir="curated"
 
 cleanup_dir="rawdata"
 
@@ -34,6 +35,7 @@ cleanup_dir="rawdata"
 #You should likely stick with the default values here. If changing them, then the corresponding
 #values will also need to be changed within the SURPI config file.
 reference_dir="reference"
+subtraction_dir="Subtraction_SNAP"
 taxonomy_dir="taxonomy"
 RAPSearch_dir="RAPSearch"
 FAST_dir="FAST_SNAP"
@@ -41,7 +43,7 @@ SNAP_nt_dir="COMP_SNAP"
 RiboClean_dir="RiboClean_SNAP"
 
 # SNAP_nt_chunks=  # of chunks to split nt into, SNAP index will be created for each chunk.
-# Currently, the minimum number of chunks is around 17-20 
+# Currently, the minimum number of chunks is around 17-20
 # SNAP 0.15.4 will not successfully index nt with less than 17 chunks, though this will vary a bit with each NT release.
 # I picked 20 here as a safe default.
 # This will likely have to be increased when using SNAP 1.0, which has different indexing characteristics,
@@ -53,9 +55,6 @@ if [ ! -d "$reference_dir" ]; then
 	mkdir "$reference_dir"
 fi
 
-#set SNAP index Ofactor. See SNAP documentation for details.
-Ofactor=1000
-
 #download NCBI data to $db_dir, curated data to $curated_dir
 echo -e "$(date)\t$scriptname\tdownload_SURPI_data.sh -d $db_dir -c $curated_dir"
 download_SURPI_data.sh -d "$db_dir" -c "$curated_dir"
@@ -68,12 +67,10 @@ if [ ! -d "$reference_dir/$taxonomy_dir" ]; then
 fi
 
 create_taxonomy_db.sh -d "$db_dir"
-mv gi_taxid_nucl.db "$reference_dir/$taxonomy_dir/gi_taxid_nucl_$DATE.db"
-mv gi_taxid_prot.db "$reference_dir/$taxonomy_dir/gi_taxid_prot_$DATE.db"
-mv names_nodes_scientific.db "$reference_dir/$taxonomy_dir/names_nodes_scientific_$DATE.db"
-ln -s "gi_taxid_nucl_$DATE.db" "$reference_dir/$taxonomy_dir/gi_taxid_nucl.db"
-ln -s "gi_taxid_prot_$DATE.db" "$reference_dir/$taxonomy_dir/gi_taxid_prot.db"
-ln -s "names_nodes_scientific_$DATE.db" "$reference_dir/$taxonomy_dir/names_nodes_scientific.db"
+mv gi_taxid_nucl.db "$reference_dir/$taxonomy_dir/gi_taxid_nucl.db"
+mv gi_taxid_prot.db "$reference_dir/$taxonomy_dir/gi_taxid_prot.db"
+mv names_nodes_scientific.db "$reference_dir/$taxonomy_dir/names_nodes_scientific.db"
+
 #
 ##create RAPSearch nr db and move into $RAPSearch_dir
 #
@@ -85,10 +82,10 @@ echo -e "$(date)\t$scriptname\tDecompressing nr..."
 pigz -dc -k "$db_dir/nr.gz" > nr
 
 echo -e "$(date)\t$scriptname\tStarting prerapsearch on nr..."
-prerapsearch -d nr -n "rapsearch_nr_${DATE}_db_v2.12"
+prerapsearch -d nr -n "rapsearch_nr_db_v2.12"
 echo -e "$(date)\t$scriptname\tCompleted prerapsearch on nr."
-mv rapsearch_nr_${DATE}_db_v2.12 "$reference_dir/$RAPSearch_dir"
-mv rapsearch_nr_${DATE}_db_v2.12.info "$reference_dir/$RAPSearch_dir"
+mv rapsearch_nr_db_v2.12 "$reference_dir/$RAPSearch_dir"
+mv rapsearch_nr_db_v2.12.info "$reference_dir/$RAPSearch_dir"
 
 #
 ## Index curated data
@@ -111,13 +108,13 @@ pigz -dc -k "$curated_dir/rdp_typed_iso_goodq_9210seqs.fa.gz" > rdp_typed_iso_go
 echo -e "$(date)\t$scriptname\tIndexing curated data..."
 
 echo -e "$(date)\t$scriptname\tSNAP indexing hg19_rRNA_mito_Hsapiens_rna.fa..."
-snap index hg19_rRNA_mito_Hsapiens_rna.fa snap_index_hg19_rRNA_mito_Hsapiens_rna -hg19 -O$Ofactor
+snap index hg19_rRNA_mito_Hsapiens_rna.fa snap_index_hg19_rRNA_mito_Hsapiens_rna -hg19 -locationSize 5
 echo -e "$(date)\t$scriptname\tSNAP indexing Bacterial_Refseq_05172012.CLEAN.LenFiltered.uniq.fa..."
-snap index Bacterial_Refseq_05172012.CLEAN.LenFiltered.uniq.fa snap_index_Bacterial_Refseq_05172012.CLEAN.LenFiltered.uniq_s16 -s 16 -O$Ofactor
+snap index Bacterial_Refseq_05172012.CLEAN.LenFiltered.uniq.fa snap_index_Bacterial_Refseq_05172012.CLEAN.LenFiltered.uniq_s16 -s 16 -locationSize 5
 echo -e "$(date)\t$scriptname\tStarting prerapsearch on rapsearch_viral_aa_130628_db_v2.12.fasta..."
 prerapsearch -d rapsearch_viral_aa_130628_db_v2.12.fasta -n "rapsearch_viral_aa_130628_db_v2.12"
 echo -e "$(date)\t$scriptname\tSNAP indexing viruses-5-2012_trimmedgi-MOD_addedgi.fa..."
-snap index viruses-5-2012_trimmedgi-MOD_addedgi.fa snap_index_viruses-5-2012_trimmedgi-MOD_addedgi -O$Ofactor
+snap index viruses-5-2012_trimmedgi-MOD_addedgi.fa snap_index_viruses-5-2012_trimmedgi-MOD_addedgi -locationSize 5
 
 #RiboClean additions
 if [ ! -d "$reference_dir/$RiboClean_dir" ]; then
@@ -125,20 +122,24 @@ if [ ! -d "$reference_dir/$RiboClean_dir" ]; then
 fi
 
 echo -e "$(date)\t$scriptname\tSNAP indexing 18s_rRNA_gene_not_partial.fa..."
-snap index 18s_rRNA_gene_not_partial.fa snap_index_18s_rRNA_gene_not_partial.fa -O$Ofactor
+snap index 18s_rRNA_gene_not_partial.fa snap_index_18s_rRNA_gene_not_partial.fa -locationSize 5
 echo -e "$(date)\t$scriptname\tSNAP indexing viruses-5-2012_trimmedgi-MOD_addedgi.fa..."
-snap index 23s.fa snap_index_23sRNA -O$Ofactor
+snap index 23s.fa snap_index_23sRNA -locationSize 5
 echo -e "$(date)\t$scriptname\tSNAP indexing rdp_typed_iso_goodq_9210seqs.fa..."
-snap index 28s_rRNA_gene_NOT_partial_18s_spacer_5.8s.fa snap_index_28s_rRNA_gene_NOT_partial_18s_spacer_5.8s.fa -O$Ofactor
+snap index 28s_rRNA_gene_NOT_partial_18s_spacer_5.8s.fa snap_index_28s_rRNA_gene_NOT_partial_18s_spacer_5.8s.fa -locationSize 5
 echo -e "$(date)\t$scriptname\tSNAP indexing viruses-5-2012_trimmedgi-MOD_addedgi.fa..."
-snap index rdp_typed_iso_goodq_9210seqs.fa snap_index_rdp_typed_iso_goodq_9210seqs -O$Ofactor
+snap index rdp_typed_iso_goodq_9210seqs.fa snap_index_rdp_typed_iso_goodq_9210seqs -locationSize 5
 
 if [ ! -d "$reference_dir/$FAST_dir" ]; then
 	mkdir "$reference_dir/$FAST_dir"
 fi
 
+if [ ! -d "$reference_dir/$subtraction_dir" ]; then
+	mkdir "$reference_dir/$subtraction_dir"
+fi
+
 echo -e "$(date)\t$scriptname\tMoving curated data into place..."
-mv snap_index_hg19_rRNA_mito_Hsapiens_rna "$reference_dir"
+mv snap_index_hg19_rRNA_mito_Hsapiens_rna "$reference_dir/$subtraction_dir"
 mv snap_index_Bacterial_Refseq_05172012.CLEAN.LenFiltered.uniq_s16 "$reference_dir/$FAST_dir"
 mv rapsearch_viral_aa_130628_db_v2.12 "$reference_dir/$RAPSearch_dir"
 mv rapsearch_viral_aa_130628_db_v2.12.info "$reference_dir/$RAPSearch_dir"
@@ -160,14 +161,14 @@ else
 fi
 echo -e "$(date)\t$scriptname\tStarting indexing of SNAP-nt..."
 echo -e "$(date)\t$scriptname\tcreate_snap_to_nt.sh -n $SNAP_nt_chunks -f nt"
-create_snap_to_nt.sh -n "$SNAP_nt_chunks" -f "nt" -p "$DATE"
+create_snap_to_nt.sh -n "$SNAP_nt_chunks" -f "nt" -p "prefix"
 echo -e "$(date)\t$scriptname\tCompleted indexing of SNAP-nt."
 
 if [ ! -d "$reference_dir/$SNAP_nt_dir" ]; then
 	mkdir "$reference_dir/$SNAP_nt_dir"
 fi
 
-mv "snap_index_${DATE}.nt."* "$reference_dir/$SNAP_nt_dir"
+mv "snap_index_prefix.nt."* "$reference_dir/$SNAP_nt_dir"
 
 #
 ## Cleanup
@@ -181,6 +182,6 @@ mv *.fa "$cleanup_dir"
 mv *.fasta "$cleanup_dir"
 mv nr "$cleanup_dir"
 mv nt "$cleanup_dir"
-mv "$DATE"* "$cleanup_dir"
+mv "prefix"* "$cleanup_dir"
 
 echo -e "$(date)\t$scriptname\t${green}Completed creation of SURPI reference data.${endColor}"
diff --git a/create_snap_to_nt.sh b/create_snap_to_nt.sh
index edb297e..3ea42a1 100755
--- a/create_snap_to_nt.sh
+++ b/create_snap_to_nt.sh
@@ -15,7 +15,7 @@
 # Copyright (C) 2014 Scot Federman - All Rights Reserved
 # SURPI has been released under a modified BSD license.
 # Please see license file for details.
-# Last revised 7/2/2014  
+# Last revised 7/2/2014
 
 scriptname=${0##*/}
 bold=$(tput bold)
@@ -25,10 +25,7 @@ red='\e[0;31m'
 endColor='\e[0m'
 prefix=$(date +%m%d%Y)
 
-#set SNAP index Ofactor. See SNAP documentation for details
-Ofactor=1000
-
-while getopts ":n:hs:f:" option; do
+while getopts ":n:hp:s:f:" option; do
 	case "${option}" in
 		f) fasta_file_to_index=${OPTARG};;
 		n) num_chunks=${OPTARG};;
@@ -44,20 +41,20 @@ done
 if [[ ${HELP} -eq 1  ||  $# -lt 1 ]]
 then
 	cat <<USAGE
-	
+
 ${bold}$scriptname${normal}
 
-This program will download necessary databases and run snap index NCBI NT for use with SURPI. 
+This program will download necessary databases and run snap index NCBI NT for use with SURPI.
 
 ${bold}Command Line Switches:${normal}
 
 	-h	Show this help
 
 	-f	Specify path to nt file (or other FASTA file to be indexed)
-	
+
 	-n	Specify number of chunks
-	
-	-s	Specify size of chunks	
+
+	-s	Specify size of chunks
 
 ${bold}Usage:${normal}
 
@@ -108,7 +105,7 @@ else
 	echo -e "$(date)\t$scriptname\tSplit file already present."
 	echo -e "$(date)\t$scriptname\t$scriptname is currently not capable of verifying the split files."
 	echo -e "$(date)\t$scriptname\tIn order to complete the indexing, please delete, or move the split files out of this directory."
-	exit
+#	exit
 fi
 
 #SNAP index each chunk
@@ -116,7 +113,7 @@ echo -e "$(date)\t$scriptname\tStarting SNAP indexing of nt..."
 for f in $prefix.nt.*
 do
 	echo -e "$(date)\t$scriptname\tStarting SNAP indexing of $f..."
-    snap index $f snap_index_$f -O$Ofactor
+    snap index $f snap_index_$f -locationSize 5
 	echo -e "$(date)\t$scriptname\tCompleted SNAP indexing of $f..."
 done
 echo -e "$(date)\t$scriptname\tCompleted SNAP indexing of nt."
diff --git a/create_tab_delimited_table.pl b/create_tab_delimited_table.pl
index 77a1fc5..a1be09e 100755
--- a/create_tab_delimited_table.pl
+++ b/create_tab_delimited_table.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 #
 #	create_tab_delimited_table.pl
 #
@@ -10,7 +10,7 @@
 # Copyright (C) 2014 Scot Federman - All Rights Reserved
 # SURPI has been released under a modified BSD license.
 # Please see license file for details.
-# Last revised 1/26/2014  
+# Last revised 1/26/2014
 
 use Getopt::Std;
 use strict;
@@ -21,14 +21,14 @@
 
 if ($opt_h) {
 	print <<USAGE;
-	
+
 create_tab_delimited_table.pl
 
 This program will return a simpler version of a SNAP or RAP output file that has been produced via the SURPI pipeline.
 Output file is in the following format:
 
 	header	gi	species	genus	family
-	
+
 
 Usage:
 
@@ -39,7 +39,7 @@
 Command Line Switches:
 
 	-f	Specify SNAP/RAP input format
-	
+
 USAGE
 	exit;
 }
diff --git a/create_taxonomy_db.py b/create_taxonomy_db.py
index c4b1775..09fe402 100755
--- a/create_taxonomy_db.py
+++ b/create_taxonomy_db.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 #
 #	create_taxonomy_db.py
 #
@@ -10,7 +10,7 @@
 # Copyright (C) 2014 Scot Federman - All Rights Reserved
 # SURPI has been released under a modified BSD license.
 # Please see license file for details.
-# Last revised 7/2/2014  
+# Last revised 7/2/2014
 
 import sqlite3
 
@@ -27,13 +27,13 @@
 		line = line.split("|")
 		taxid = line[0].strip()
 		name = line[1].strip()
-		
+
 		c.execute ("INSERT INTO names VALUES (?,?)", (taxid, name))
 
 d = conn.cursor()
 d.execute('''CREATE TABLE nodes (
 			taxid INTEGER PRIMARY KEY,
-			parent_taxid INTEGER, 
+			parent_taxid INTEGER,
 			rank TEXT)''')
 
 with open('nodes.dmp', 'r') as map_file:
@@ -42,7 +42,7 @@
 		taxid = line[0].strip()
 		parent_taxid = line[1].strip()
 		rank = line[2].strip()
-		
+
 		d.execute ("INSERT INTO nodes VALUES (?,?,?)", (taxid, parent_taxid, rank))
 conn.commit()
 conn.close()
@@ -67,7 +67,7 @@
 conn = sqlite3.connect('gi_taxid_prot.db')
 c = conn.cursor()
 c.execute('''CREATE TABLE gi_taxid (
-			gi INTEGER PRIMARY KEY, 
+			gi INTEGER PRIMARY KEY,
 			taxid INTEGER)''')
 
 with open('gi_taxid_prot.dmp', 'r') as map_file:
diff --git a/create_taxonomy_db.sh b/create_taxonomy_db.sh
index 6378221..ca81242 100755
--- a/create_taxonomy_db.sh
+++ b/create_taxonomy_db.sh
@@ -11,7 +11,7 @@
 # Copyright (C) 2014 Scot Federman - All Rights Reserved
 # SURPI has been released under a modified BSD license.
 # Please see license file for details.
-# Last revised 7/2/2014  
+# Last revised 7/2/2014
 
 scriptname=${0##*/}
 
@@ -28,16 +28,16 @@ done
 if [[ ${HELP-} -eq 1  ||  $# -lt 1 ]]
 then
 	cat <<USAGE
-	
+
 ${bold}$scriptname${normal}
 
-This script will create the taxonomy SQLite database using NCBI downloadable files. 
+This script will create the taxonomy SQLite database using NCBI downloadable files.
 
 ${bold}Command Line Switches:${normal}
 
 	-h	Show this help
 
-	-d	Specify directory containing NCBI data	
+	-d	Specify directory containing NCBI data
 
 ${bold}Usage:${normal}
 
@@ -65,7 +65,7 @@ pigz -dc -k "$db_directory/gi_taxid_nucl.dmp.gz" > gi_taxid_nucl.dmp
 pigz -dc -k "$db_directory/gi_taxid_prot.dmp.gz" > gi_taxid_prot.dmp
 
 # the below grep "fixes" the issue whereby aliases, mispellings, and other alternate names are returned.
-# We could simply look for a name that is a "scientific name", 
+# We could simply look for a name that is a "scientific name",
 # but this shrinks the db a bit, speeding up lookups, and removes data we do not need at this time.
 echo -e "$(date)\t$scriptname\tRetaining scientific names..."
 grep "scientific name" names.dmp > names_scientificname.dmp
diff --git a/crop_reads.csh b/crop_reads.csh
index 4b5a56d..29fc65b 100755
--- a/crop_reads.csh
+++ b/crop_reads.csh
@@ -9,7 +9,7 @@
 #
 #
 # will go from position 10 to 85  (or whatever size is available)
-#                          
+#
 #
 # $1 = file to crop
 # $2 = position to start crop ($start_nt)
@@ -18,11 +18,11 @@
 # Copyright (C) 2014 Charles Y Chiu - All Rights Reserved
 # SURPI has been released under a modified BSD license.
 # Please see license file for details.
-# Last revised 1/26/2014  
-                                                                                                      
+# Last revised 1/26/2014
+
 if ($#argv != 3) then
 	echo "Usage: crop_reads.csh <input FASTQ/FASTA file> <start pos> <cropped length>"
 	exit(1)
 endif
 
-cat $argv[1] | awk '(NR%2==1){print $0} (NR%2==0){print substr($0,'"$argv[2]"','"$argv[3]"')}' 
+cat $argv[1] | awk '(NR%2==1){print $0} (NR%2==0){print substr($0,'"$argv[2]"','"$argv[3]"')}'
diff --git a/cutadapt_quality.csh b/cutadapt_quality.csh
index 9198d9a..f29c2de 100755
--- a/cutadapt_quality.csh
+++ b/cutadapt_quality.csh
@@ -3,7 +3,7 @@
 # 	cutadapt_quality.csh
 #
 #	runs cutadapt to remove primer sequences from Illumina files
-#	also accepts a quality argument for Illumina / Sanger quality 
+#	also accepts a quality argument for Illumina / Sanger quality
 #	user specifies length cutoff
 #	user specifies whether short reads less than length cutoff are kept; if so, they are converted to size=1
 #
@@ -19,7 +19,7 @@
 # Copyright (C) 2014 Charles Chiu - All Rights Reserved
 # SURPI has been released under a modified BSD license.
 # Please see license file for details.
-                                                                                                             
+
 if ($#argv != 7) then
 	echo "Usage: cutadapt_quality.csh <input FASTQ file> <quality S/I> <length cutoff> <keep short reads Y/N> <adapter_set> <temporary_files_directory> <quality cutoff>"
 	exit(1)
@@ -113,6 +113,6 @@ endif
 #set numreads_end = `egrep -c "@HWI|@M00|@SRR" $inputfile:r.cutadapt.fastq`
 
 #@ reads_removed = $numreads_start - $numreads_end
-#echo $reads_removed" reads removed by cutadapt" 
+#echo $reads_removed" reads removed by cutadapt"
 #echo $numreads_end" reads at end of cutadapt"
 rm -f $TEMPDIR{$$}
diff --git a/debug.sh b/debug.sh
new file mode 100755
index 0000000..8a07c32
--- /dev/null
+++ b/debug.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+set -e
+trap _exit_trap EXIT
+trap _err_trap ERR
+_showed_traceback=f
+
+function _exit_trap
+{
+  local _ec="$?"
+  if [[ $_ec != 0 && "${_showed_traceback}" != t ]]; then
+    traceback 1
+  fi
+}
+
+function _err_trap
+{
+  local _ec="$?"
+  local _cmd="${BASH_COMMAND:-unknown}"
+  traceback 1
+  _showed_traceback=t
+  echo "The command ${_cmd} exited with exit code ${_ec}." 1>&2
+}
+
+function traceback
+{
+  # Hide the traceback() call.
+  local -i start=$(( ${1:-0} + 1 ))
+  local -i end=${#BASH_SOURCE[@]}
+  local -i i=0
+  local -i j=0
+
+  echo "Traceback (last called is first):" 1>&2
+  for ((i=${start}; i < ${end}; i++)); do
+    j=$(( $i - 1 ))
+    local function="${FUNCNAME[$i]}"
+    local file="${BASH_SOURCE[$i]}"
+    local line="${BASH_LINENO[$j]}"
+    echo "     ${function}() in ${file}:${line}" 1>&2
+  done
+}
+
diff --git a/download_SURPI_data.sh b/download_SURPI_data.sh
index c2bf6a3..f20f32e 100755
--- a/download_SURPI_data.sh
+++ b/download_SURPI_data.sh
@@ -12,7 +12,7 @@
 # Copyright (C) 2014 Scot Federman - All Rights Reserved
 # SURPI has been released under a modified BSD license.
 # Please see license file for details.
-# Last revised 7/2/2014  
+# Last revised 7/2/2014
 
 scriptname=${0##*/}
 bold=$(tput bold)
@@ -38,10 +38,10 @@ done
 if [[ ${HELP-} -eq 1 ]]
 then
 	cat <<USAGE
-	
+
 ${bold}$scriptname${normal}
 
-This program will download necessary files from NCBI for use with SURPI. 
+This program will download necessary files from NCBI for use with SURPI.
 
 ${bold}Command Line Switches:${normal}
 
@@ -85,7 +85,7 @@ download_file ()
 	remote_dir=$2
 	file=$3
 	md5=$4
-	
+
 	( cd $destination_folder ; curl -O "$remote_dir/$file" )
 	if [[ $md5 ]]
 	then
diff --git a/extractAlltoFast.sh b/extractAlltoFast.sh
index c7bbd94..8b1d79d 100755
--- a/extractAlltoFast.sh
+++ b/extractAlltoFast.sh
@@ -17,7 +17,7 @@ scriptname=${0##*/}
 
 if [ $# -lt 6 ]
 then
-	echo "Usage: $scriptname <inputfile> <Input File Type [BLASTN/FASTA/FASTQ]> <Parent file> <Parent File Type [FASTA/FASTQ]> <Output file> <Output format: [FASTA/FASTQ]>" 
+	echo "Usage: $scriptname <inputfile> <Input File Type [BLASTN/FASTA/FASTQ]> <Parent file> <Parent File Type [FASTA/FASTQ]> <Output file> <Output format: [FASTA/FASTQ]>"
 	exit 65
 fi
 
@@ -36,7 +36,7 @@ if [ $inputfile_type = BLASTN ]
 then
 	awk '{print$1}' $inputfile > $inputfile.header
 	echo -e "$(date)\t$scriptname\tuniqued blastn file, replaced beginning with @"
-	
+
 	if [ $parentfile_type = FASTA ]
 	then
 		seqtk subseq $parentfile $inputfile.header > $outputfile
@@ -59,12 +59,12 @@ then
 	echo -e "$(date)\t$scriptname\tDone preparing input Fasta file "
 
 	if [ $parentfile_type = FASTA ]
-	then   
+	then
 		seqtk subseq $parentfile $inputfile.header > $outputfile
 	elif [ $parentfile_type = FASTQ ]
 	then
 		if [ $output_format = FASTQ ]
-		then        
+		then
 			cat $parentfile | fqextract $inputfile.header > $outputfile
 		elif [ $output_format = FASTA ]
 		then
diff --git a/extractHeaderFromFastq.csh b/extractHeaderFromFastq.csh
index d441aa2..5945279 100755
--- a/extractHeaderFromFastq.csh
+++ b/extractHeaderFromFastq.csh
@@ -14,7 +14,7 @@
 # Copyright (C) 2014 Charles Y Chiu - All Rights Reserved
 # SURPI has been released under a modified BSD license.
 # Please see license file for details.
-# Last revised 1/26/2014  
+# Last revised 1/26/2014
 
 if ($#argv != 4) then
 	echo "Usage: extractHeaderFromFastq.csh <header file> <format of header list file (BLASTN / FASTQ / FASTA)> <data file in FASTQ format> <output file>"
diff --git a/extractHeaderFromFastq_ncores.sh b/extractHeaderFromFastq_ncores.sh
index c0e63eb..f95e4ee 100755
--- a/extractHeaderFromFastq_ncores.sh
+++ b/extractHeaderFromFastq_ncores.sh
@@ -50,7 +50,7 @@ split -l $LinesPerCore -a 3 $parentfile $parentfile.SplitXS &
 awk '{print$1}' $queryfile > $queryfile.header &
 echo -e "$(date)\t$scriptname\textracting header from $queryfile"
 
-for job in `jobs -p`
+for job in $(jobs -p)
 do
 	wait $job
 done
diff --git a/extractSamFromSam.sh b/extractSamFromSam.sh
index 31cfe1a..79b91e6 100755
--- a/extractSamFromSam.sh
+++ b/extractSamFromSam.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
-#     
+#
 #	extactSamFromSam.sh
-# 
+#
 #	extract SAM reads corresponding to a SAM header file from another SAM reference file and writes to
 #	SAM output file
 # 	Chiu Laboratory
@@ -40,7 +40,7 @@ else
     let "numlines = `wc -l basef | awk '{print $1}'`"
     let "LinesPerCore = numlines / $cores"
     echo -e "$(date)\t$scriptname\twill use $cores cores with $LinesPerCore entries per core"
-    
+
     split -l $LinesPerCore $basef
 
     echo -e "$(date)\t$scriptname\textracting reads from $baseg using headers from $basef"
diff --git a/fastq-extractBarcodedSRA.sh b/fastq-extractBarcodedSRA.sh
index f990a01..1b2fd58 100755
--- a/fastq-extractBarcodedSRA.sh
+++ b/fastq-extractBarcodedSRA.sh
@@ -18,7 +18,7 @@ if [ $# -lt 1 ]; then
     exit
 fi
 
-# 
+#
 # gets absolute path to SRA file
 #
 SRA_PATH=$(readlink -e $1)
@@ -34,7 +34,7 @@ files=($header*.fastq)
 
 # restore barcode and R1/R2 designation to FASTQ headers
 for f in "${files[@]}"
-do 
+do
     barcode=$(echo "$f" | sed 's/_[12]//g' | sed 's/.fastq//g' | sed 's/.*_//g')
     readnum=$(echo "$f" | sed 's/.*_\([12]\).fastq/\1/g' | sed '/'$header'/d')
     echo -e "$(date)\t$scriptname\tRestoring barcode # $barcode and R1/R2 designation to $f"
diff --git a/get_genbankfasta.pl b/get_genbankfasta.pl
index c4c2f22..995872e 100755
--- a/get_genbankfasta.pl
+++ b/get_genbankfasta.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 #
 #	get_genbankfasta.pl
 #
@@ -33,7 +33,7 @@
 
 if ($opt_h) {
 	print <<USAGE;
-	
+
 get_genbankfasta.pl
 
 Currently, the maximum number of sequences this program can retrieve in a single run is about 750.
@@ -53,7 +53,7 @@
 4. To output to a file, use the -o switch
 	get_genbankfasta.pl -o outputfile 149408158 116734707
 		note: all command line switches must be present before gi on command line
-		
+
 	get_genbankfasta.pl -i gi_list -o outputfile
 
 5. Use the -f switch to select the output type:
@@ -61,7 +61,7 @@
 	fasta - retrieve FASTA file for gi input
 	docsum - retrieve Document Summary for gi input
 	docsum_oneline - retrieve Document Summary for gi input & collapse to one line per gi
-		
+
 		get_genbankfasta.pl -f docsum_oneline 149408158 410777373
 		get_genbankfasta.pl -f fasta -i gi_list
 		get_genbankfasta.pl -f docsum_oneline -i gi_list
diff --git a/logging.sh b/logging.sh
new file mode 100755
index 0000000..8168821
--- /dev/null
+++ b/logging.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+##########################################################################
+# library
+##########################################################################
+
+# via http://stackoverflow.com/a/12199798/18706
+format_date() {
+  ((h=${1}/3600))
+  ((m=(${1}%3600)/60))
+  ((s=${1}%60))
+  printf "%02d:%02d:%02d\n" $h $m $s
+}
+
+start=$(date +"%s")
+age() {
+  now=$(date +"%s")
+  the_age=$(($now-$start))
+  format_date $the_age
+}
+
+announce() {
+  echo `age`" ${FUNCNAME[ 1 ]} $1"
+}
+
+log() {
+  echo -e "$(age)\t$scriptname\t $1"
+}
+
diff --git a/mapPerfectBLASTtoGenome.py b/mapPerfectBLASTtoGenome.py
index 2d41ca0..cf3da59 100755
--- a/mapPerfectBLASTtoGenome.py
+++ b/mapPerfectBLASTtoGenome.py
@@ -1,8 +1,8 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 #
 # Simple script to iterate over the -m 8 results from a
 # BLAST and spits out the number of hits at each base of the
-# query database.  
+# query database.
 #
 # Peter Skewes-Cox
 # February 3, 2009
diff --git a/plot_reads_to_gi.sh b/plot_reads_to_gi.sh
index 45c0c26..b436552 100755
--- a/plot_reads_to_gi.sh
+++ b/plot_reads_to_gi.sh
@@ -16,7 +16,7 @@
 scriptname=${0##*/}
 
 if [ $# -lt 5 ]; then
-    echo "Usage: $scriptname <.fasta> <gi # OR reference fasta> <name of gi / FA (if reference in fasta form> <e value> <cores available>"    
+    echo "Usage: $scriptname <.fasta> <gi # OR reference fasta> <name of gi / FA (if reference in fasta form> <e value> <cores available>"
     echo "not uniquing Blastn anymore 9/13/13 given a fasta file and the GI or FASTA of the reference to assemble to -> makes an assembly.  blastn e value in 1e-8 format . jedi has 16 cores available"
     exit
 fi
@@ -59,7 +59,7 @@ done
 
 cat $1*.$2.$3.$4.blastn > $1.$2.$3.$4.Blastn
 
-uniq_blastn_nopipes.sh $1.$2.$3.$4.Blastn  
+uniq_blastn_nopipes.sh $1.$2.$3.$4.Blastn
 extractAlltoFast.sh $1.$2.$3.$4.Blastn.uniq BLASTN $1 FASTA $1.$2.$3.$4.Blastn.uniq.ex.fa FASTA
 
 ####figuring out length of sequence in gi#######
@@ -79,7 +79,7 @@ echo -e -n "$(date)\t$scriptname\t%Coverage = "
 echo "scale=6;100*$coverage/$gilength" | bc
 
 let "sumofcolumntwo = `awk '{print$2}' $1.$2.$3.$4.map | awk '{sum += $1} END{print sum}'`"
-echo -e -n "$(date)\t$scriptname\tAverage depth of coverage (x) = " 
+echo -e -n "$(date)\t$scriptname\tAverage depth of coverage (x) = "
 echo "scale=6;$sumofcolumntwo/$gilength" | bc
 let "numberBlastnReads = `egrep -c "^SCS|^HWI|gi|^M00|^kmer|^SRR" $1.$2.$3.$4.Blastn.uniq`"
 echo -e "$(date)\t$scriptname\tNumber of reads contributing to assembly  $numberBlastnReads"
@@ -94,7 +94,7 @@ echo "Reference sequence length = $gilength bp" >> $1.$2.$3.$4.report
 echo "Coverage in bp = $coverage" >> $1.$2.$3.$4.report
 echo -n "%Coverage = " >> $1.$2.$3.$4.report
 echo "scale=6;100*$coverage/$gilength" | bc >> $1.$2.$3.$4.report
-echo -n "Average depth of coverage = " >> $1.$2.$3.$4.report 
+echo -n "Average depth of coverage = " >> $1.$2.$3.$4.report
 echo "scale=6;$sumofcolumntwo/$gilength" | bc >> $1.$2.$3.$4.report
 #echo "Average depth of coverage = $averagedepthcoverage x " >> $1.$2.$3.$4.report
 echo "Number of reads contributing to assembly = $numberBlastnReads" >> $1.$2.$3.$4.report
@@ -106,7 +106,7 @@ ps2pdf14 $1.$2.$3.$4.ps $1.$2.$3.$4.pdf
 #### Cleanup ########
 rm -f $1*$2.$3.$4.blastn
 rm -f $1*$2.$3.$4.error
-rm -f $3.$2.fasta.nhr                                                     
+rm -f $3.$2.fasta.nhr
 rm -f $3.$2.fasta.nin
 rm -f $3.$2.fasta.nsq
 #rm -f $1.$2.$3.$4.Blastn
diff --git a/preprocess.sh b/preprocess.sh
index 443ba3d..032ba42 100755
--- a/preprocess.sh
+++ b/preprocess.sh
@@ -8,7 +8,7 @@
 #	January, 2014
 #
 # cutadapt=>crop->dust removal (no uniq) ***
-#                                                                                                                                
+#
 # 12/20/12 - modified to switch to cutadapt for trimming
 # 12/31/12 - modified from Cshell to BASH version for timing
 #
@@ -17,6 +17,8 @@
 # Please see license file for details.
 
 scriptname=${0##*/}
+source debug.sh
+source logging.sh
 
 if [ $# != 10 ]; then
 	echo "Usage: $scriptname <R1 FASTQ file> <S/I quality> <Y/N uniq> <length_cutoff; 0 for no length_cutoff> <Y/N keep short reads> <adapter_set> <start_nt> <crop_length> <temporary_files_directory> <quality_cutoff>"
@@ -44,15 +46,15 @@ fi
 
 if [ $quality = "S" ]
 then
-	echo -e "$(date)\t$scriptname\tselected Sanger quality"
+	log "selected Sanger quality"
 else
-	echo -e "$(date)\t$scriptname\tselected Illumina quality"
+	log "selected Illumina quality"
 fi
 
 # fix header if space is present
 s=`head -1 $inputfile | awk '{if ($0 ~ / /) {print "SPACE"} else {print "NOSPACE"}}'`
 
-echo -e "$(date)\t$scriptname\t$s in header"
+log "$s in header"
 
 nopathf=${1##*/}
 basef=${nopathf%.fastq}
@@ -60,7 +62,7 @@ basef=${nopathf%.fastq}
 #################### START OF PREPROCESSING, READ1 #########################
 
 # run cutadapt, Read1
-echo -e "$(date)\t$scriptname\t********** running cutadapt, Read1 **********"
+log "********** running cutadapt, Read1 **********"
 if [ $s == "SPACE" ]
 then
 	sed "s/\([@HWI|@M00135|@SRR][^ ]*\) \(.\):.:0:\(.*\)/\1#\3\/\2/g" $inputfile > $basef.modheader.fastq
@@ -76,12 +78,12 @@ fi
 
 END1=$(date +%s)
 diff=$(( $END1 - $START1 ))
-echo -e "$(date)\t$scriptname\tDone cutadapt: CUTADAPT took $diff seconds"
+log "Done cutadapt: CUTADAPT took $diff seconds"
 
 # run uniq, Read1
 if [ $run_uniq == "Y" ]
 then
-	echo -e "$(date)\t$scriptname\t********** running uniq, Read1 **********"
+	log "********** running uniq, Read1 **********"
 	START1=$(date +%s)
 
 	if [ $quality = "S" ]
@@ -93,31 +95,31 @@ then
 
 	END1=$(date +%s)
 	diff=$(( $END1 - $START1 ))
-	echo -e "$(date)\t$scriptname\tDone uniq: UNIQ took $diff seconds"
+	log "Done uniq: UNIQ took $diff seconds"
 fi
 
 # run crop, Read 1
-echo -e "$(date)\t$scriptname\t********** running crop, Read1 **********"
+log "********** running crop, Read1 **********"
 START1=$(date +%s)
 
-if [ $run_uniq == "Y" ] 
+if [ $run_uniq == "Y" ]
 then
-	echo -e "$(date)\t$scriptname\tWe will be using $crop_length as the length of the cropped read"
+	log "We will be using $crop_length as the length of the cropped read"
 	crop_reads.csh $basef.cutadapt.uniq.fastq $start_nt $crop_length > $basef.cutadapt.uniq.cropped.fastq
 else
-	echo -e "$(date)\t$scriptname\tWe will be using $crop_length as the length of the cropped read"
+	log "We will be using $crop_length as the length of the cropped read"
 	crop_reads.csh $basef.cutadapt.fastq $start_nt $crop_length > $basef.cutadapt.cropped.fastq
 fi
 
 END1=$(date +%s)
 diff=$(( $END1 - $START1 ))
-echo -e "$(date)\t$scriptname\tDone crop: CROP took $diff seconds"
+log "Done crop: CROP took $diff seconds"
 
 # run dust, Read1
-echo -e "$(date)\t$scriptname\t********** running dust, Read1 **********"
+log "********** running dust, Read1 **********"
 START1=$(date +%s)
 
-if [ $run_uniq == "Y" ] 
+if [ $run_uniq == "Y" ]
 then
 	prinseq-lite.pl -fastq $basef.cutadapt.uniq.cropped.fastq -out_format 3 -out_good $basef.cutadapt.uniq.cropped.dusted -out_bad $basef.cutadapt.uniq.cropped.dusted.bad -log -lc_method dust -lc_threshold 7
 	mv -f $basef.cutadapt.uniq.cropped.dusted.fastq $basef.preprocessed.fastq
@@ -128,4 +130,4 @@ fi
 
 END1=$(date +%s)
 diff=$(( $END1 - $START1 ))
-echo -e "$(date)\t$scriptname\tDone dust: DUST took $diff seconds"
+log "Done dust: DUST took $diff seconds"
diff --git a/preprocess_ncores.sh b/preprocess_ncores.sh
index 820e50a..1b8ba03 100755
--- a/preprocess_ncores.sh
+++ b/preprocess_ncores.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-#                      
+#
 #	preprocess_ncores.sh
 #
 #	This script runs preprocessing across multiple cores (FASTA/FASTQ header modification, quality filtering, adapter trimming, and low-complexity filtering)
@@ -13,8 +13,9 @@
 # Please see license file for details.
 
 scriptname=${0##*/}
+source logging.sh
 
-if [ $# != 12 ]; then
+if [ $# != 11 ]; then
 	echo "Usage: $scriptname <R1 FASTQ file> <S/I quality> <Y/N uniq> <length cutoff; 0 for no length cutoff> <# of cores> <free cache memory cutoff in GB> <Y/N keep short reads> <adapter_set> <start_nt> <crop_length> <temporary_files_directory> <quality_cutoff>"
 	exit
 fi
@@ -25,13 +26,12 @@ quality=$2
 run_uniq=$3
 length_cutoff=$4
 cores=$5
-cache_reset=$6
-keep_short_reads=$7
-adapter_set=$8
-start_nt=$9
-crop_length=${10}
-temporary_files_directory=${11}
-quality_cutoff=${12}
+keep_short_reads=$6
+adapter_set=$7
+start_nt=$8
+crop_length=${9}
+temporary_files_directory=${10}
+quality_cutoff=${11}
 ###
 
 if [ ! -f $inputfile ]
@@ -40,40 +40,33 @@ then
 	exit
 fi
 
-freemem=$(free -g | awk '{print $4}' | head -n 2 | tail -1)
-echo -e "$(date)\t$scriptname\tThere is $freemem GB available free memory...[cutoff=$free_cache_cutoff GB]"
-if [ $freemem -lt $free_cache_cutoff ]
-then
-	echo -e "$(date)\t$scriptname\tClearing cache..."
-	dropcache
-fi
-
 START=$(date +%s)
 
-echo -e "$(date)\t$scriptname\tSplitting $inputfile..."
+log "Splitting $inputfile..."
 
 let "numlines = `wc -l $inputfile | awk '{print $1}'`"
 let "FASTQentries = numlines / 4"
-echo -e "$(date)\t$scriptname\tThere are $FASTQentries FASTQ entries in $inputfile"
+log "There are $FASTQentries FASTQ entries in $inputfile"
 let "LinesPerCore = numlines / $cores"
-let "FASTQperCore = LinesPerCore / 4"
+# Prevent leaving a small remainder file after split by adding 1.
+let "FASTQperCore = LinesPerCore / 4 + 1"
 let "SplitPerCore = FASTQperCore * 4"
-echo -e "$(date)\t$scriptname\twill use $cores cores with $FASTQperCore entries per core"
+log "will use $cores cores with $FASTQperCore entries per core"
 
 split -l $SplitPerCore $inputfile
 
 END_SPLIT=$(date +%s)
 diff_SPLIT=$(( END_SPLIT - START ))
 
-echo -e "$(date)\t$scriptname\tDone splitting: "
-echo -e "$(date)\t$scriptname\tSPLITTING took $diff_SPLIT seconds"
+log "Done splitting: "
+log "SPLITTING took $diff_SPLIT seconds"
 
-echo -e "$(date)\t$scriptname\tRunning preprocess script for each chunk..."
+log "Running preprocess script for each chunk..."
 
 for f in x??
 do
 	mv $f $f.fastq
-	echo -e "$(date)\t$scriptname\tpreprocess.sh $f.fastq $quality N $length_cutoff $keep_short_reads $adapter_set $start_nt $crop_length $temporary_files_directory >& $f.preprocess.log &"
+	log "preprocess.sh $f.fastq $quality N $length_cutoff $keep_short_reads $adapter_set $start_nt $crop_length $temporary_files_directory >& $f.preprocess.log &"
 	preprocess.sh $f.fastq $quality N $length_cutoff $keep_short_reads $adapter_set $start_nt $crop_length $temporary_files_directory $quality_cutoff >& $f.preprocess.log &
 done
 
@@ -82,7 +75,7 @@ do
 	wait $job
 done
 
-echo -e "$(date)\t$scriptname\tDone preprocessing for each chunk..."
+log "Done preprocessing for each chunk..."
 
 nopathf2=${1##*/}
 basef2=${nopathf2%.fastq}
@@ -108,20 +101,20 @@ do
 	cat $basef.preprocessed.fastq >> $basef2.preprocessed.fastq
 	rm -f $basef.preprocessed.fastq
 	cat $basef.cutadapt.cropped.dusted.bad.fastq >> $basef2.cutadapt.cropped.dusted.bad.fastq
-	rm -f $basef.cutadapt.cropped.dusted.bad.fastq 
+	rm -f $basef.cutadapt.cropped.dusted.bad.fastq
 
 	rm -f $f
 	rm -f $basef.modheader.fastq
 	rm -f $basef.cutadapt.summary.log
 	rm -f $basef.adapterinfo.log
-	rm -f $basef.cutadapt.cropped.fastq 
+	rm -f $basef.cutadapt.cropped.fastq
 done
 
-echo -e "$(date)\t$scriptname\tDone concatenating output..."
+log "Done concatenating output..."
 
 if [ $run_uniq == "Y" ]; # selecting unique reads
 then
-	echo -e "$(date)\t$scriptname\tSelecting unique reads"
+	log "Selecting unique reads"
 	START_UNIQ=$(date +%s)
 	# selecting unique reads
 	sed "n;n;n;d" $basef2.preprocessed.fastq | sed "n;n;d" | sed "s/^@/>/g" > $basef2.preprocessed.fasta
@@ -130,28 +123,28 @@ then
 	cp -f $basef2.uniq.fastq $basef2.preprocessed.fastq
 	END_UNIQ=$(date +%s)
 	diff_UNIQ=$(( END_UNIQ - START_UNIQ ))
-	echo -e "$(date)\t$scriptname\tUNIQ took $diff_UNIQ seconds"
+	log "UNIQ took $diff_UNIQ seconds"
 else
-	echo -e "$(date)\t$scriptname\tIncluding duplicates (did not run UNIQ)"
+	log "Including duplicates (did not run UNIQ)"
 fi
 
 END=$(date +%s)
 diff_TOTAL=$(( END - START ))
 
 let "avgtime1=`grep CUTADAPT $basef2.preprocess.log | awk '{print $12}' | sort -n | awk '{ a[i++]=$1} END {print a[int(i/2)];}'`"
-echo -e "$(date)\t$scriptname\tmedian CUTADAPT time per core: $avgtime1 seconds"
+log "median CUTADAPT time per core: $avgtime1 seconds"
 
 if [ $run_uniq = "Y" ]; then
 	let "avgtime2 = $diff_UNIQ"
-	echo -e "$(date)\t$scriptname\tUNIQ time: $diff_UNIQ seconds"
+	log "UNIQ time: $diff_UNIQ seconds"
 else
 	let "avgtime2=0"
 fi
 
 let "avgtime3=`grep DUST $basef2.preprocess.log | awk '{print $12}' | sort -n | awk '{ a[i++]=$1} END {print a[int(i/2)];}'`"
-echo -e "$(date)\t$scriptname\tmedian DUST time per core: $avgtime3 seconds"
+log "median DUST time per core: $avgtime3 seconds"
 
 let "totaltime = diff_SPLIT + avgtime1 + avgtime2 + avgtime3"
-echo -e "$(date)\t$scriptname\tTOTAL TIME: $totaltime seconds"
+log "TOTAL TIME: $totaltime seconds"
 
-echo -e "$(date)\t$scriptname\tTOTAL CLOCK TIME (INCLUDING OVERHEAD): $diff_TOTAL seconds"
+log "TOTAL CLOCK TIME (INCLUDING OVERHEAD): $diff_TOTAL seconds"
diff --git a/ribo_snap_bac_euk.sh b/ribo_snap_bac_euk.sh
index 261e3de..5a8a080 100755
--- a/ribo_snap_bac_euk.sh
+++ b/ribo_snap_bac_euk.sh
@@ -44,12 +44,12 @@ fasta_to_fastq $inputfile.fasta > $inputfile.fakq # if Bacteria.annotated file h
 
 crop_reads.csh $inputfile.fakq 10 75 > $inputfile.fakq.crop
 # snap against large ribosomal subunit
-snap single $SNAP_index_Large $inputfile.fakq.crop -o $inputfile.noLargeS.unmatched.sam -t $cores -x -f -h 250 -d 18 -n 200 -F u
+snap single $SNAP_index_Large -fastq $inputfile.fakq.crop -o $inputfile.noLargeS.unmatched.sam -t $cores -x -f -h 250 -d 18 -n 200 -F u
 egrep -v "^@" $inputfile.noLargeS.unmatched.sam | awk '{if($3 == "*") print "@"$1"\n"$10"\n""+"$1"\n"$11}' > $(echo "$inputfile".noLargeS.unmatched.sam | sed 's/\(.*\)\..*/\1/').fastq
 echo -e "$(date)\t$scriptname\tDone: first snap alignment"
 
 # snap against small ribosomal subunit
-snap single $SNAP_index_Small $inputfile.noLargeS.unmatched.fastq -o $inputfile.noSmallS_LargeS.sam -t $cores -h 250 -d 18 -n 200 -F u
+snap single $SNAP_index_Small -fastq $inputfile.noLargeS.unmatched.fastq -o $inputfile.noSmallS_LargeS.sam -t $cores -h 250 -d 18 -n 200 -F u
 echo -e "$(date)\t$scriptname\tDone: second snap alignment"
 
 # convert snap unmatched to ribo output to header format
@@ -58,8 +58,8 @@ awk '{print$1}' $inputfile.noSmallS_LargeS.sam | sed '/^@/d' > $inputfile.noSmal
 # retrieve reads from original $inputfile
 
 extractSamFromSam.sh $inputfile.noSmallS_LargeS.header.sam $inputfile $basef.noRibo.annotated
-echo -e "$(date)\t$scriptname\tCreated $inputfile.noRibo.annotated" 
-table_generator.sh $basef.noRibo.annotated SNAP N Y N N 
+echo -e "$(date)\t$scriptname\tCreated $inputfile.noRibo.annotated"
+table_generator.sh $basef.noRibo.annotated SNAP N Y N N
 
 rm -f $inputfile.noLargeS.sam
 rm -f $inputfile.noLargeS.matched.sam
diff --git a/slave/setup_slave.sh b/slave/setup_slave.sh
index e9a52a8..58a5261 100755
--- a/slave/setup_slave.sh
+++ b/slave/setup_slave.sh
@@ -22,7 +22,7 @@ if [ ! -d $bin_folder ]
 then
 	mkdir $bin_folder
 fi
-	
+
 CWD=$(pwd)
 
 # set timezone
diff --git a/snap_nt.sh b/snap_nt.sh
index 50c31b3..c010cff 100755
--- a/snap_nt.sh
+++ b/snap_nt.sh
@@ -1,22 +1,23 @@
 #!/bin/bash
-#                                                                                                                                
+#
 #	snap_nt.sh
 #
 #	This script runs SNAP against the NT database
 #	Chiu Laboratory
 #	University of California, San Francisco
 #
-# 
+#
 # Copyright (C) 2014 Charles Chiu - All Rights Reserved
 # SURPI has been released under a modified BSD license.
 # Please see license file for details.
-                 
-expected_args=6
 scriptname=${0##*/}
+source debug.sh
+source logging.sh
 
+expected_args=5
 if [ $# -lt $expected_args ]
 then
-	echo "Usage: $scriptname <FASTQ input file> <directory containing SNAP NT indexes> <number of cores> <free cache memory cutoff in GB> <SNAP d-value cutoff> <SNAP binary>"
+	echo "Usage: $scriptname <FASTQ input file> <directory containing SNAP NT indexes> <number of cores> <SNAP d-value cutoff> <SNAP binary>"
 	exit 65
 fi
 
@@ -24,76 +25,76 @@ fi
 inputfile=$1
 SNAP_NT_index_directory=$2
 cores=$3
-free_cache_cutoff=$4
-SNAP_d_cutoff=$5
-snap=$6
+SNAP_d_cutoff=$4
+snap=$5
 ###
 
-echo -e "$(date)\t$scriptname\tStarting SNAP to NT"
+log "Starting SNAP to NT"
 START1=$(date +%s)
 
-echo -e "$(date)\t$scriptname\tInput file: $inputfile"
+log "Input file: $inputfile"
 nopathf=${inputfile##*/} # remove the path to file
-echo -e "$(date)\t$scriptname\tAfter removing path: $nopathf"
+log "After removing path: $nopathf"
 basef=${nopathf%.fastq} # remove FASTQextension
-echo -e "$(date)\t$scriptname\tAfter removing FASTQ extension: $basef"
-
-echo -e "$(date)\t$scriptname\tMapping $basef to NT..."
-
-rm -f $basef.prev.sam
-rm -f $basef.tmp.sam
-rm -f $basef.tmp2.sam
-rm -f $basef.NT.sam
-
+log "After removing FASTQ extension: $basef"
+
+log "Mapping $basef to NT..."
+
+cleanup() {
+  rm -f $basef.prev.sam || true
+  rm -f $basef.tmp.sam || true
+  rm -f $basef.tmp2.sam || true
+  rm -f $basef.NT.sam || true
+  rm $basef.snapNT.log || true
+  rm $basef.timeNT.log || true
+}
+cleanup
 counter=0
 
-for snap_index in $SNAP_NT_index_directory/* ; do
-	freemem=$(free -g | awk '{print $4}' | head -n 2 | tail -1)
-	echo -e "$(date)\t$scriptname\tThere is $freemem GB available free memory...[cutoff=$free_cache_cutoff GB]"
-	if [ $freemem -lt $free_cache_cutoff ]
-	then
-		echo -e "$(date)\t$scriptname\tClearing cache..."
-		dropcache
-	fi
-	nopathsnap_index=${snap_index##*/} # remove the path to file
-	echo -e "$(date)\t$scriptname\tFound $snap_index ... processing ..."
+for snap_index_basename in $(ls -1v "$SNAP_NT_index_directory") ; do
+	#nopathsnap_index=${snap_index##*/} # remove the path to file
+  snap_index = "$SNAP_NT_index_directory/$snap_index_basename"
+	log "Found $snap_index_basename ... processing ..."
 	START2=$(date +%s)
 
 	if [[ $counter -eq 0 ]]
 	then
 		#running first SNAP chunk
-		/usr/bin/time -o $basef.snap.log $snap single $snap_index $basef.fastq -o $basef.tmp.sam -t $cores -x -f -h 250 -d $SNAP_d_cutoff -n 25 > $basef.time.log
+		/usr/bin/time -o $basef.time.log $snap single $snap_index $basef.fastq -o $basef.$nopathsnap_index.sam -t $cores -x -f -h 250 -d $SNAP_d_cutoff -n 25 > $basef.snap.log
+    ln --symbolic --force $basef.$nopathsnap_index.sam $basef.tmp.sam
 # 		cp $basef.tmp.sam temp.sam
 	else
 		#running 2nd SNAP chunk through last SNAP chunk
-		/usr/bin/time -o $basef.snap.log $snap single $snap_index $basef.tmp.fastq -o $basef.tmp.sam -t $cores -x -f -h 250 -d $SNAP_d_cutoff -n 25 > $basef.time.log
+		/usr/bin/time -o $basef.time.log $snap single $snap_index $basef.tmp.fastq -o $basef.$nopathsnap_index.sam -t $cores -x -f -h 250 -d $SNAP_d_cutoff -n 25 > $basef.snap.log
+    ln --symbolic --force $basef.$nopathsnap_index.sam $basef.tmp.sam
 	fi
 
 	cat $basef.snap.log >> $basef.snapNT.log
 	cat $basef.time.log >> $basef.timeNT.log
 
-    
+
 	compare_sam.py $basef.tmp.sam $basef.prev.sam
 	cat $basef.prev.sam | egrep -v "^@" | awk '{print "@"$1"\n"$10"\n""+"$1"\n"$11}' > $basef.tmp.fastq
 
 	counter=1
-    
+
 	END2=$(date +%s)
 
-	echo -e "$(date)\t$scriptname\tDone with $snap_index."
+	log "Done with $snap_index."
 	diff=$(( $END2 - $START2 ))
-	echo -e "$(date)\t$scriptname\tMapping of $snap_index took $diff seconds."
+	log "Mapping of $snap_index took $diff seconds."
 done
 
 # need to restore the hits
-update_sam.py $basef.prev.sam $basef.NT.sam 
+update_sam.py $basef.prev.sam $basef.NT.sam
 
 rm -f $basef.tmp.sam
 rm -f $basef.tmp.fastq
 rm -f $basef.prev.sam
+rm -f $basef.$nopathsnap_index.*
 
 END1=$(date +%s)
-echo -e "$(date)\t$scriptname\tDone with SNAP_NT"
+log "Done with SNAP_NT"
 diff=$(( $END1 - $START1 ))
-echo -e "$(date)\t$scriptname\toutput written to $basef.NT.sam"
-echo -e "$(date)\t$scriptname\tSNAP_NT took $diff seconds"
+log "output written to $basef.NT.sam"
+log "SNAP_NT took $diff seconds"
diff --git a/snap_nt_combine.sh b/snap_nt_combine.sh
index 08b71d7..cb700aa 100755
--- a/snap_nt_combine.sh
+++ b/snap_nt_combine.sh
@@ -19,7 +19,7 @@
 # Copyright (C) 2014 Charles Chiu - All Rights Reserved
 # Permission to copy and modify is granted under the BSD license
 
-expected_args=6
+expected_args=5
 scriptname=${0##*/}
 
 if [ $# -lt $expected_args ]
@@ -32,9 +32,8 @@ fi
 inputfile=$1
 SNAP_NT_index_directory=$2
 cores=$3
-free_cache_cutoff=$4
-SNAP_d_cutoff=$5
-simultaneous_SNAPs=$6
+SNAP_d_cutoff=$4
+simultaneous_SNAPs=$5
 ###
 
 echo -e "$(date)\t$scriptname\tStarting SNAP to NT"
@@ -55,14 +54,6 @@ for snap_index in $SNAP_NT_index_directory/*; do
 	nopathsnap_index=${snap_index##*/} # remove the path to file
 	echo -e "$(date)\t$scriptname\tStarting SNAP on $nopathsnap_index"
 
-	freemem=$(free -g | awk '{print $4}' | head -n 2 | tail -1)
-	echo -e "$(date)\t$scriptname\tThere is $freemem GB available free memory...[cutoff=$free_cache_cutoff GB]"
-	if [ $freemem -lt $free_cache_cutoff ]
-	then
-		echo -e "$(date)\t$scriptname\tClearing cache..."
-		dropcache
-	fi
-
 	START_SNAP=$(date +%s)
 	/usr/bin/time -o $basef.$nopathsnap_index.snap.log snap single $snap_index $basef.fastq -o $basef.$nopathsnap_index.sam -t $cores -x -f -h 250 -d $SNAP_d_cutoff -n 25 > $basef.$nopathsnap_index.time.log
 	SNAP_DONE=$(date +%s)
diff --git a/split_fasta.pl b/split_fasta.pl
index 060cb92..f67f660 100755
--- a/split_fasta.pl
+++ b/split_fasta.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 #split_fasta.pl version 1.0
 #This script accepts a file consisting of multiple FASTA formatted sequence records.
 #It splits the file into multiple new files, each consisting of a subset of the original records.
@@ -80,7 +80,7 @@
     #write record to file
     print (OUTFILE ">$sequenceTitle\n");
     print (OUTFILE "$sequenceEntry\n");
-    $seqCount++;   
+    $seqCount++;
     $seqThisFile++;
 
     if ($seqThisFile == $numberToCopy) {
diff --git a/start_slaves.sh b/start_slaves.sh
index 622df47..e428d77 100755
--- a/start_slaves.sh
+++ b/start_slaves.sh
@@ -33,14 +33,14 @@ file_with_slave_ips=$7
 placement_group=$8
 ###
 
-#This is the time (in seconds) this script waits after starting up the AWS machines. 
+#This is the time (in seconds) this script waits after starting up the AWS machines.
 #It allows the instances time to start up. In practice, 120s appears to be sufficient.
 WAIT_TIME=120
 
 #folder where slave scripts are located
 slave_folder="/usr/local/bin/surpi/slave"
 #script located on slave to run
-slave_script="slave_setup.sh"	
+slave_script="slave_setup.sh"
 
 #this parameter is currently tied to the $keypair used during slave_setup.sh. should be cleaned up prior to release
 pemkey="/home/ubuntu/.ssh/surpi.pem"
@@ -106,7 +106,7 @@ do
 done
 # instance_id_list=( $result )
 # instance_id_size=${#instance_id_list[@]}
-# 
+#
 # if [ "$instance_id_size" != "$number_of_instances" ]; then
 # 	echo "ERROR: could not create $number_of_instances instances"
 # 	echo "$number_of_instances instances were created."
@@ -150,7 +150,7 @@ do
 
 	echo -e "$(date)\t$scriptname\t$attached" >> slave.$COUNTER.log 2>&1
 	#verify $attached here to verify that EBS was attached
-	
+
 	echo -e "$(date)\t$scriptname\tconnecting and running $slave_script script..." >> slave.$COUNTER.log 2>&1
 	ssh -o StrictHostKeyChecking=no -i $pemkey ubuntu@$host "/home/ubuntu/$slave_script" >> slave.$COUNTER.log 2>&1 &
 	let COUNTER=COUNTER+1
diff --git a/table_generator.sh b/table_generator.sh
index 240d3b1..4491367 100755
--- a/table_generator.sh
+++ b/table_generator.sh
@@ -7,7 +7,7 @@
 #	University of California, San Francisco
 #	January, 2014
 #
-# input file: annotated sam or annotated -m 8 file with taxonomies provided in the following format at the end of the .sam and -m 8 file: 
+# input file: annotated sam or annotated -m 8 file with taxonomies provided in the following format at the end of the .sam and -m 8 file:
 # "gi# --family --genus --species"
 # Output files are tab delimited  files ending in .counttable whereby rows represent taxonomic annotations at various levels (family, genus, species, gi)
 # Columns represent individual barcodes found in the dataset, and cells contain the number of reads
@@ -20,7 +20,7 @@
 scriptname=${0##*/}
 
 if [ $# -lt 6 ]; then
-	echo "Usage: $scriptname <annotated file> <SNAP/RAP> <gi Y/N> <species Y/N> <genus Y/N> <family Y/N> " 
+	echo "Usage: $scriptname <annotated file> <SNAP/RAP> <gi Y/N> <species Y/N> <genus Y/N> <family Y/N> "
     exit
 fi
 
@@ -33,7 +33,7 @@ genus=$5
 family=$6
 ###
 
-###substitute forward slash with @_ because forward slash in species name makes it ungreppable. using @_ because @ is used inside the contig barcode (ie. #@13 is barcode 13, contig generated) 
+###substitute forward slash with @_ because forward slash in species name makes it ungreppable. using @_ because @ is used inside the contig barcode (ie. #@13 is barcode 13, contig generated)
 create_tab_delimited_table.pl -f $file_type $inputfile  |  sed 's/ /_/g' | sed 's/,/_/g' | sed 's/\//@_/g' > $inputfile.tempsorted
 echo -e "$(date)\t$scriptname\tdone creating $inputfile.tempsorted"
 
@@ -61,7 +61,7 @@ then
 		done
 	echo -e "GI\tSpecies\tGenus\tFamily(@=contigbarcode)" > $inputfile.header
 	cat $inputfile.header $inputfile.gi.uniq.columntable > $inputfile.gi.counttable_temp
-	paste $inputfile.gi.counttable_temp bar*.$inputfile.gi.output > $inputfile.gi.counttable 
+	paste $inputfile.gi.counttable_temp bar*.$inputfile.gi.output > $inputfile.gi.counttable
 	sed -i 's/@_/ /g' $inputfile.gi.counttable
 	echo -e "$(date)\t$scriptname\tdone generating gi counttable"
 fi
@@ -84,7 +84,7 @@ then
 	done
 	echo -e "Species\tGenus\tFamily(@=contigbarcode)" > $inputfile.header
 	cat $inputfile.header $inputfile.species.uniq.columntable > $inputfile.species.counttable_temp
-	paste $inputfile.species.counttable_temp bar*.$inputfile.species.output > $inputfile.species.counttable 
+	paste $inputfile.species.counttable_temp bar*.$inputfile.species.output > $inputfile.species.counttable
 	sed -i 's/@_/ /g' $inputfile.species.counttable
 	echo -e "$(date)\t$scriptname\tdone generating species counttable"
 fi
@@ -106,7 +106,7 @@ then
 	done
 	echo -e "Genus\tFamily(@=contigbarcode)" > $inputfile.header
 	cat $inputfile.header $inputfile.genus.uniq.columntable > $inputfile.genus.counttable_temp
-	paste $inputfile.genus.counttable_temp bar*.$inputfile.genus.output > $inputfile.genus.counttable 
+	paste $inputfile.genus.counttable_temp bar*.$inputfile.genus.output > $inputfile.genus.counttable
 	sed -i 's/@_/ /g' $inputfile.genus.counttable
 	echo -e "$(date)\t$scriptname\tdone generating genus counttable"
 fi
diff --git a/taxonomy_lookup.pl b/taxonomy_lookup.pl
index 79d0eac..35d7b85 100755
--- a/taxonomy_lookup.pl
+++ b/taxonomy_lookup.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
 #
 #	taxonomy_lookup.pl
 #
@@ -12,6 +12,7 @@
 # Please see license file for details.
 
 use strict;
+use warnings;
 #use diagnostics;
 use Time::HiRes qw[gettimeofday tv_interval];
 use DBI;
@@ -80,12 +81,11 @@
 
 # sort/uniq gi file
 my $startsort = [gettimeofday()];
-system ("sort --parallel=$cores -u $basef_inputfile.gi > $basef_inputfile.gi.uniq");
-
+system ("sort -u $basef_inputfile.gi > $basef_inputfile.gi.uniq");
 my $sorttime = tv_interval($startsort);
 print localtime()."\t$0\tTime to sort -u gi file: $sorttime seconds\n";
 
-# Parallelization can occur at this point in the code. Since the file in now sorted, it can be split into n chunks 
+# Parallelization can occur at this point in the code. Since the file in now sorted, it can be split into n chunks
 # with no overlap.
 my %taxonomy;
 
@@ -110,19 +110,19 @@
 	chomp;
 	my ($family, $genus, $species, $lineage) = ("") x 4;
 	my $result = taxonomy_fgsl($_, $seq_type);
-	
+
 	if ($result =~ /family--(.*?)\t/) {
 		$family = $1;
 	}
-	
+
 	if ($result =~ /genus--(.*?)\t/) {
 		$genus = $1;
 	}
-	
+
 	if ($result =~ /species--(.*?)\t/) {
 		$species = $1;
 	}
-	
+
 	if ($result =~ /lineage--(.*)$/) {
 		$lineage = $1;
 	}
@@ -228,7 +228,7 @@ sub taxonomy_fgsl {
 			$sth->execute(($taxid));
 			if (@noderow = $sth->fetchrow_array) {
 				(my $tid, my $parent, my $rank) = @noderow;
-		
+
 				if ($namerow = $name_return->fetchrow_array) {
 					$name = trim($namerow);
 				}
diff --git a/taxonomy_lookup_embedded.pl b/taxonomy_lookup_embedded.pl
index 5709a89..d8e7a51 100755
--- a/taxonomy_lookup_embedded.pl
+++ b/taxonomy_lookup_embedded.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
 #
 #	taxonomy_lookup_embedded.pl
 #
@@ -9,9 +9,10 @@
 # Copyright (C) 2014 Scot Federman - All Rights Reserved
 # SURPI has been released under a modified BSD license.
 # Please see license file for details.
-# Last revised 1/26/2014    
+# Last revised 1/26/2014
 
 use DBI;
+use warnings;
 use strict;
 use Getopt::Std;
 use Time::HiRes qw[gettimeofday tv_interval];
@@ -36,12 +37,12 @@
 
 if ($opt_h) {
 	print <<USAGE;
-	
+
 taxonomy_lookup_embedded.pl
 
 This program will query the NCBI taxonomic database and return whatever taxonomy is requested for a gi, or list of gis. The
 returned data will be in tabular format, and will be in the following order.
-	
+
 	species	genus	family	order	class	phylum	kingdom
 
 Usage:
@@ -60,7 +61,7 @@
 
 	-d	nucl/prot
 		This specifies whether the gi list are nucleotides or protein. It is a required parameter.
-	
+
 	The following switches all will specify which taxonomic information will be returned.
 	-k	Kingdom
 	-p	Phylum
@@ -71,7 +72,7 @@
 	-s	Species
 	-l	Lineage
 	-x	Display Taxid in output table
-	
+
 USAGE
 	exit;
 }
diff --git a/tweet.pl b/tweet.pl
index 23b21f4..255896b 100755
--- a/tweet.pl
+++ b/tweet.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 #
 #	tweet.pl
 #
@@ -7,13 +7,13 @@
 #	University of California, San Francisco
 #	January, 2014
 #
-#In order to use this script, the consumer_key, consumer_secret, oauth_token, and oauth_token_secret must be filled in with 
+#In order to use this script, the consumer_key, consumer_secret, oauth_token, and oauth_token_secret must be filled in with
 #proper values for your chosen Twitter account.
 #
 # Copyright (C) 2014 Scot Federman - All Rights Reserved
 # SURPI has been released under a modified BSD license.
 # Please see license file for details.
-# Last revised 1/26/2014    
+# Last revised 1/26/2014
 
 use Net::Twitter::Lite::WithAPIv1_1;
 use Scalar::Util 'blessed';
diff --git a/update_sam.py b/update_sam.py
index 4c81084..29cc545 100755
--- a/update_sam.py
+++ b/update_sam.py
@@ -1,7 +1,7 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 #
 #	update_sam.py
-#       this script reannotates the SAM file based on the better hit identified in "compare_sam.py" 
+#       this script reannotates the SAM file based on the better hit identified in "compare_sam.py"
 #
 #	Chiu Laboratory
 #	University of California, San Francisco