diff --git a/Changelog b/Changelog index 1998e9a..a8ca514 100644 --- a/Changelog +++ b/Changelog @@ -1,3 +1,8 @@ +2018-11-12 Can Alkan + * Default output file is now (seqFile1)-output.sam(.gz) & default nohit file is (seqFile1)-output.nohit.fastq(.gz) + * No-hit FASTQ output is new also compressed when --outcomp parameter is used. + * mrsFAST now accepts -t as a short form for --threads. + 2014-03-31 Faraz Hach * Added with-sse4 option to Makefile * Bug reported by viktor in dc mode is fixed. diff --git a/CommandLineParser.c b/CommandLineParser.c index fef5785..c78184e 100644 --- a/CommandLineParser.c +++ b/CommandLineParser.c @@ -52,6 +52,7 @@ int bestMappingMode = 0; int SNPMode = 0; int seqCompressed; int outCompressed; +int isCloud; int cropSize = 0; int tailCropSize = 0; int progressRep = 0; @@ -80,14 +81,16 @@ int SNP_QUAL_THRESHOLD = 53; +/* #if (defined(__MACH__) && defined(__APPLE__)) #include #else extern char _binary_HELP_start; extern char _binary_HELP_end; #endif +*/ - +/* void printHelp() { #if (defined(__MACH__) && defined(__APPLE__)) @@ -101,7 +104,7 @@ void printHelp() putchar(*c); #endif exit(EXIT_SUCCESS); -} + }*/ int parseCommandLine (int argc, char *argv[]) { @@ -119,6 +122,7 @@ int parseCommandLine (int argc, char *argv[]) strcpy(unmappedOutput, "output.nohit"); strcpy(concordantStatOutput, "concordant.statistic"); mappingOutputPath[0] = '\0'; + isCloud = 0; static struct option longOptions[] = { @@ -128,6 +132,7 @@ int parseCommandLine (int argc, char *argv[]) {"outcomp", no_argument, &outCompressed, 1}, {"progress", no_argument, &progressRep, 1}, {"best", no_argument, &bestMappingMode, 1}, + {"cloud", no_argument, &isCloud, 1}, {"disable-nohits", no_argument, &nohitDisabled, 1}, {"disable-sam-header", no_argument, &noSamHeader, 1}, {"index", required_argument, 0, 'i'}, @@ -152,7 +157,7 @@ int parseCommandLine (int argc, char *argv[]) - while ( (o = getopt_long ( argc, argv, "f:i:u:o:s:e:n:bhv", longOptions, &index))!= -1 ) + while ( (o = getopt_long ( argc, argv, "f:i:u:o:s:e:n:t:bhv", longOptions, &index))!= -1 ) { switch (o) { @@ -207,11 +212,11 @@ int parseCommandLine (int argc, char *argv[]) maxPairEndedDistance = atoi(optarg); break; case 'h': - printHelp(); + printHelp(); return 0; break; case 'v': - fprintf(stdout, "Version: %s\nBuild Date: %s\n", MRSFAST_VERSION, BUILD_DATE); + fprintf(stderr, "Version: %s\nBuild Date: %s\n", MRSFAST_VERSION, BUILD_DATE); return 0; break; case 't': @@ -235,7 +240,7 @@ int parseCommandLine (int argc, char *argv[]) #ifndef MRSFAST_SSE4 if (searchingMode) - fprintf(stdout, "==> This version is compiled without any SSE4 optimization <==\n"); + fprintf(stderr, "==> This version is compiled without any SSE4 optimization <==\n"); #endif if (bestMappingMode) { @@ -244,18 +249,18 @@ int parseCommandLine (int argc, char *argv[]) if (indexingMode + searchingMode != 1) { - fprintf(stdout, "ERROR: Indexing / Searching mode should be selected\n"); + fprintf(stderr, "ERROR: Indexing / Searching mode should be selected\n"); return 0; } if (WINDOW_SIZE > 14 || WINDOW_SIZE < 8) { - fprintf(stdout, "ERROR: Window size should be in [8..14]\n"); + fprintf(stderr, "ERROR: Window size should be in [8..14]\n"); return 0; } if (MAX_MEMORY < 2) - fprintf(stdout, "ERROR: At least 2 GB of memory is required for running mrsFAST\n"); + fprintf(stderr, "ERROR: At least 2 GB of memory is required for running mrsFAST\n"); if ( indexingMode ) @@ -265,7 +270,7 @@ int parseCommandLine (int argc, char *argv[]) if (fastaFile == NULL) { - fprintf(stdout, "ERROR: Reference(s) should be indicated for indexing\n"); + fprintf(stderr, "ERROR: Reference(s) should be indicated for indexing\n"); return 0; } } @@ -274,13 +279,13 @@ int parseCommandLine (int argc, char *argv[]) { if (maxHits < 0) { - fprintf(stdout, "ERROR: Number of maximum hits must be greater than 0\n"); + fprintf(stderr, "ERROR: Number of maximum hits must be greater than 0\n"); return 0; } if (bestMappingMode) { - fprintf(stdout, "ERROR: Maximum number of mappings could not be set in best mapping mode. Maximum mappings input ignored\n"); + fprintf(stderr, "ERROR: Maximum number of mappings could not be set in best mapping mode. Maximum mappings input ignored\n"); maxHits = 0; } } @@ -292,7 +297,7 @@ int parseCommandLine (int argc, char *argv[]) if ( cropSize && tailCropSize) { - fprintf(stdout, "ERROR: Sequences can be cropped from only one side\n"); + fprintf(stderr, "ERROR: Sequences can be cropped from only one side\n"); return 0; } @@ -303,19 +308,19 @@ int parseCommandLine (int argc, char *argv[]) if (fastaFile == NULL) { - fprintf(stdout, "ERROR: Index File(s) should be indiciated for searching\n"); + fprintf(stderr, "ERROR: Index File(s) should be indiciated for searching\n"); return 0; } if (seqFile1 == NULL && seqFile2 == NULL) { - fprintf(stdout, "ERROR: Please indicate a sequence file for searching.\n"); + fprintf(stderr, "ERROR: Please indicate a sequence file for searching.\n"); return 0; } if (!pairedEndMode && seqFile2 != NULL) { - fprintf(stdout, "ERROR: Second File can be indicated in pairedend mode\n"); + fprintf(stderr, "ERROR: Second File can be indicated in pairedend mode\n"); return 0; } @@ -327,13 +332,13 @@ int parseCommandLine (int argc, char *argv[]) } else if ( minPairEndedDistance <0 || maxPairEndedDistance < 0 || minPairEndedDistance > maxPairEndedDistance ) { - fprintf(stdout, "ERROR: Please enter a valid range for pairedend sequences.\n"); + fprintf(stderr, "ERROR: Please enter a valid range for pairedend sequences.\n"); return 0; } if (seqFile1 == NULL) { - fprintf(stdout, "ERROR: Please indicate the first file for pairedend search.\n"); + fprintf(stderr, "ERROR: Please indicate the first file for pairedend search.\n"); return 0; } } @@ -348,7 +353,7 @@ int parseCommandLine (int argc, char *argv[]) if (!indexingMode) { - fprintf(stdout, "# Threads: %d\n", THREAD_COUNT); + fprintf(stderr, "# Threads: %d\n", THREAD_COUNT); for (i = 0; i < 255; i++) THREAD_ID[i] = i; } @@ -359,6 +364,20 @@ int parseCommandLine (int argc, char *argv[]) char fname4[FILE_NAME_LENGTH]; char fname5[FILE_NAME_LENGTH]; + /* change defaut output filenames */ + if (!strcmp(mappingOutput, "output") && searchingMode) + { + sprintf(mappingOutput, "%s-output", seqFile1); + fprintf(stderr, "seqFile1 is %s\noutput file is %s\n", seqFile1, mappingOutput); + if (!outCompressed) + sprintf(unmappedOutput, "%s-output.nohit.fastq", seqFile1 ); + else + sprintf(unmappedOutput, "%s-output.nohit.fastq.gz", seqFile1 ); + char tmp_fname[FILE_NAME_LENGTH]; + strcpy(tmp_fname, seqFile1); + stripPath (tmp_fname, &mappingOutputPath, &mappingOutput); + } + // Why is this one here? if (pairedEndMode) { @@ -384,3 +403,253 @@ void finalizeCommandParser() freeMem(mappingOutputPath, FILE_NAME_LENGTH); freeMem(concordantStatOutput, FILE_NAME_LENGTH); } + +void printHelp() +{ + fprintf(stderr, "mrsFAST-Ultra(1) mrsfast-Ultra Manual mrsFAST-Ultra(1)\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "NNAAMMEE\n"); + fprintf(stderr, " mrsfast-ultra\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "DDEESSCCRRIIPPTTIIOONN\n"); + fprintf(stderr, " mrsFAST is a cache oblivious read mapping tool. mrsFAST capable of map-\n"); + fprintf(stderr, " ping single and paired end reads to the reference genome. Bisulfite\n"); + fprintf(stderr, " treated sequences are not supported in this version.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "IINNSSTTAALLLLAATTIIOONN\n"); + fprintf(stderr, " To install mrsFAST-ultra, please download the source zip package from\n"); + fprintf(stderr, " http://sourceforge.net/projects/mrsfast/. After unzipping the down-\n"); + fprintf(stderr, " loaded file \"mrsfast-ultra-3.X.X.zip\", change the current directory to\n"); + fprintf(stderr, " the source directory \"mrsfast-ultra-3.X.X\", and run \"make\" in the ter-\n"); + fprintf(stderr, " minal. The binary file \"mrsfast\" will be created, which is ready to\n"); + fprintf(stderr, " use.\n"); + fprintf(stderr, " $ unzip mrsfast-ultra-3.X.X.zip\n"); + fprintf(stderr, " $ cd mrsfast-ultra-3.X.X\n"); + fprintf(stderr, " $ make\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "SSYYNNOOPPSSIISS\n"); + fprintf(stderr, " mrsfast --index [file] [OPTIONS]\n"); + fprintf(stderr, " mrsfast --search [index] --seq [file] [OPTIONS]\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "OOPPTTIIOONNSS\n"); + fprintf(stderr, " GGEENNEERRAALL OOPPTTIIOONNSS\n"); + fprintf(stderr, " --hh Prints this help file.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " --vv,, ----vveerrssiioonn\n"); + fprintf(stderr, " Prints the version of software.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " IINNDDEEXXIINNGG OOPPTTIIOONNSS\n"); + fprintf(stderr, " ----wwss _w_i_n_d_o_w___s_i_z_e\n"); + fprintf(stderr, " Index the reference genome with sliding a window of size _w_i_n_-\n"); + fprintf(stderr, " _d_o_w___s_i_z_e (default: 12).\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " MMAAPPPPIINNGG OOPPTTIIOONNSS\n"); + fprintf(stderr, " ----mmeemm _m\n"); + fprintf(stderr, " Use maximum _m GB of memory (default: 4).\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " ----tthhrreeaaddss _t\n"); + fprintf(stderr, " Use _t number of cores for mapping the sequences (default: 1).\n"); + fprintf(stderr, " Use _0 to use all the available cores in the system.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " ----sseeqq _f_i_l_e\n"); + fprintf(stderr, " Set the input sequence to _f_i_l_e_. In paired-end mode, _f_i_l_e should\n"); + fprintf(stderr, " be used if the read sequences are interleaved.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " ----sseeqq11 _f_i_l_e\n"); + fprintf(stderr, " Set the input sequence (left mate) to _f_i_l_e_. Paired-end option\n"); + fprintf(stderr, " only.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " ----sseeqq22 _f_i_l_e\n"); + fprintf(stderr, " Set the input sequence (right mate) to _f_i_l_e_. Paired-end option\n"); + fprintf(stderr, " only.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " ----sseeqqccoommpp\n"); + fprintf(stderr, " Input file is compressed through gzip.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " --oo _f_i_l_e\n"); + fprintf(stderr, " Output the mapping record into _f_i_l_e (default: output.sam)\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " ----ddiissaabbllee--ssaamm--hheeaaddeerr\n"); + fprintf(stderr, " Do not generate SAM header.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " --uu _f_i_l_e\n"); + fprintf(stderr, " Output unmapped reads in _f_i_l_e (default: output.nohit). This file\n"); + fprintf(stderr, " will be generated in all mapping mode.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " ----ddiissaabbllee--nnoohhiittss\n"); + fprintf(stderr, " Do not output unmapped reads.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " ----oouuttccoommpp\n"); + fprintf(stderr, " Compress the output _f_i_l_e by gzip.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " --nn _c_u_t_-_o_f_f\n"); + fprintf(stderr, " Output the mapping for the read sequences that have less than\n"); + fprintf(stderr, " _c_u_t_-_o_f_f number of mappings. Cannot be used with ----bbeesstt or ----ddiiss--\n"); + fprintf(stderr, " ccoorrddaanntt--vvhh options.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " ----ccrroopp _n\n"); + fprintf(stderr, " Trim the reads to _n base pairs from begining of the read.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " ----ttaaiill--ccrroopp _n\n"); + fprintf(stderr, " Trim the reads to _n base pairs from end of the read.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " --ee _e_r_r_o_r_-_t_h_r_e_s_h_o_l_d\n"); + fprintf(stderr, " Allow up to _e_r_r_o_r_-_t_h_r_e_s_h_o_l_d mismatches in the mappings.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " ----bbeesstt Find the best mapping location of given reads.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " ----ppee Map the reads in Paired-End mode. mmiinn and mmaaxx of template\n"); + fprintf(stderr, " length will be calculated if not provided by corresponding\n"); + fprintf(stderr, " options.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " ----mmiinn _m_i_n_-_d_i_s_c_o_r_d_a_n_t_-_l_e_n_g_t_h\n"); + fprintf(stderr, " Use _m_i_n_-_d_i_s_c_o_r_d_a_n_t_-_l_e_n_g_t_h for minimum length of concordant map-\n"); + fprintf(stderr, " ping. Paired-end option only.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " ----mmaaxx _m_a_x_-_d_i_s_c_o_r_d_a_n_t_-_l_e_n_g_t_h\n"); + fprintf(stderr, " Use _m_a_x_-_d_i_s_c_o_r_d_a_n_t_-_l_e_n_g_t_h for maximum length of concordant map-\n"); + fprintf(stderr, " ping. Paired-end option only.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " ----ddiissccoorrddaanntt--vvhh\n"); + fprintf(stderr, " Map the reads in discordant fashion that can be processed by\n"); + fprintf(stderr, " Variation Hunter / Common Law. Output will be generate in DIVET\n"); + fprintf(stderr, " format.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " ----mmaaxx--ddiissccoorrddaanntt--ccuuttooffff _m\n"); + fprintf(stderr, " Allow _m discordant mappings per read. Should be only used with\n"); + fprintf(stderr, " ----ddiissccoorrddaanntt--vvhh option.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " ----ssnnpp _s_n_p_-_f_i_l_e\n"); + fprintf(stderr, " Map the reads in SNP aware mode. In this mode mrsFAST-Ultra tol-\n"); + fprintf(stderr, " erates the mismatches in known SNP locations reported by the\n"); + fprintf(stderr, " provided SNP database. The SNP index _s_n_p_-_f_i_l_e\n"); + fprintf(stderr, " should be created from the dbSNP (.vcf) file using the\n"); + fprintf(stderr, " snp_indexer binary.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " ----ssnnpp--qquuaall _q_u_a_l_i_t_y_-_t_h_r_e_s_h_o_l_d\n"); + fprintf(stderr, " In SNP-aware mode, a mismatch at a reported SNP location will be\n"); + fprintf(stderr, " ignored only if the corresponding read location has a quality\n"); + fprintf(stderr, " higher than or equal to the _q_u_a_l_i_t_y_-_t_h_r_e_s_h_o_l_d _q_u_a_l_i_t_y_-_t_h_r_e_s_h_o_l_d\n"); + fprintf(stderr, " is a Phred-Value base 33. The default is 53 (base call error\n"); + fprintf(stderr, " 0.01).\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "EEXXAAMMPPLLEESS\n"); + fprintf(stderr, " Indexing reference genome:\n"); + fprintf(stderr, " $ ./mrsfast --index refgen.fasta\n"); + fprintf(stderr, " $ ./mrsfast --index refgen.fasta --ws 14\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " Single-end mapping:\n"); + fprintf(stderr, " $ ./mrsfast --search refgen.fa --seq reads.fastq\n"); + fprintf(stderr, " $ ./mrsfast --search refgen.fa --seq reads.fastq -e 3 -n 10 --threads 4\n"); + fprintf(stderr, " $ ./mrsfast --search refgen.fa --seq reads.fastq -e 3 --best -o output\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " Paired-end mapping:\n"); + fprintf(stderr, " $ ./mrsfast --search refgen.fasta --pe --seq pe-reads.fastq --min 100\n"); + fprintf(stderr, " --max 400\n"); + fprintf(stderr, " $ ./mrsfast --search refgen.fasta --pe --seq1 first-mates.fastq --seq2\n"); + fprintf(stderr, " second-mates.fastq -e 3 --threads 4\n"); + fprintf(stderr, " $ ./mrsfast --search refgen.fasta --pe --seq1 first-mates.fastq --seq2\n"); + fprintf(stderr, " second-mates.fastq --min 100 --max 400 --best -o output\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " Discordant mapping:\n"); + fprintf(stderr, " $ ./mrsfast --search refgen.fasta --pe --discordant-vh --seq\n"); + fprintf(stderr, " reads.fastq --min 100 --max 400\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "BBUUGGSS\n"); + fprintf(stderr, " Please report the bugs through mrsfast's web page at http://mrs-\n"); + fprintf(stderr, " fast.sourceforge.net\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "AAuutthhoorrss\n"); + fprintf(stderr, " Faraz Hach (fhach@sfu.ca)\n"); + fprintf(stderr, " Iman Sarrafi (isarrafi@sfu.ca)\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "RReeffeerreennccee\n"); + fprintf(stderr, " Please cite the following paper for publications if using mrsFAST:\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " Faraz Hach, Fereydoun Hormozdiari, Can Alkan, Farhad Hormozdiari, Inanc\n"); + fprintf(stderr, " Birol, Evan E Eichler and S Cenk Sahinalp, \"mrsFAST: a cache-oblivious\n"); + fprintf(stderr, " algorithm for short-read mapping\", Nature Methods 7, 576-577 (2010)\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " Please cite the following paper for publications if using mrsFAST-\n"); + fprintf(stderr, " Ultra:\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " Faraz Hach, Iman Sarrafi, Farhad Hormozdiari, Can Alkan, Evan E. Eich-\n"); + fprintf(stderr, " ler, S. Cenk Sahinalp, \"mrsFAST-Ultra: a compact, SNP-aware mapper for\n"); + fprintf(stderr, " high performance sequencing applications\", Nucl. Acids Res. (1 July\n"); + fprintf(stderr, " 2014) 42 (W1): W494-W500.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "CCOOPPYYRRIIGGHHTT\n"); + fprintf(stderr, " Copyright (c) <2012-2020>, Simon Fraser University All rights reserved.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " Redistribution and use in source and binary forms, with or without mod-\n"); + fprintf(stderr, " ification, are permitted provided that the following conditions are\n"); + fprintf(stderr, " met:\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " 1 Redistributions of source code must retain the above copyright\n"); + fprintf(stderr, " notice, this list of conditions and the following disclaimer.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " 2 Redistributions in binary form must reproduce the above copy-\n"); + fprintf(stderr, " right notice, thislist of conditions and the following dis-\n"); + fprintf(stderr, " claimer in the documentation and/or other materials provided\n"); + fprintf(stderr, " with the distribution.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " 3 Neither the name of the Simon Fraser University nor the names of\n"); + fprintf(stderr, " its contributors may be used to endorse or promote products\n"); + fprintf(stderr, " derived from this software without specific prior written per-\n"); + fprintf(stderr, " mission.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS\n"); + fprintf(stderr, " IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED\n"); + fprintf(stderr, " TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTIC-\n"); + fprintf(stderr, " ULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR\n"); + fprintf(stderr, " CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\n"); + fprintf(stderr, " EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\n"); + fprintf(stderr, " PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\n"); + fprintf(stderr, " PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\n"); + fprintf(stderr, " LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\n"); + fprintf(stderr, " NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n"); + fprintf(stderr, " SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "mrsFAST-Ultra Last Updated: December 3, 2018 mrsFAST-Ultra(1)\n"); +} diff --git a/CommandLineParser.h b/CommandLineParser.h index c595897..d60b0ab 100644 --- a/CommandLineParser.h +++ b/CommandLineParser.h @@ -38,5 +38,5 @@ int parseCommandLine (int argc, char *argv[]); void finalizeCommandParser(); - +void printHelp(void); #endif diff --git a/Common.c b/Common.c index e324384..6f74b3b 100644 --- a/Common.c +++ b/Common.c @@ -61,8 +61,8 @@ FILE *fileOpen(char *fileName, char *mode) fp = fopen (fileName, mode); if (fp == NULL) { - fprintf(stdout, "Error: Cannot Open the file %s\n", fileName); - fflush(stdout); + fprintf(stderr, "Error: Cannot Open the file %s\n", fileName); + fflush(stderr); exit(EXIT_FAILURE); } return fp; @@ -74,8 +74,8 @@ gzFile fileOpenGZ(char *fileName, char *mode) gzfp = gzopen (fileName, mode); if (gzfp == Z_NULL) { - fprintf(stdout, "Error: Cannot Open the file %s\n", fileName); - fflush(stdout); + fprintf(stderr, "Error: Cannot Open the file %s\n", fileName); + fflush(stderr); exit(EXIT_FAILURE); } return gzfp; diff --git a/Common.h b/Common.h index e33d7cc..fa19810 100644 --- a/Common.h +++ b/Common.h @@ -77,6 +77,7 @@ extern int bestMappingMode; extern int SNPMode; extern int seqCompressed; extern int outCompressed; +extern int isCloud; extern int cropSize; extern int tailCropSize; extern int progressRep; diff --git a/HashTable.c b/HashTable.c index 465f62d..a75d2c7 100644 --- a/HashTable.c +++ b/HashTable.c @@ -232,8 +232,8 @@ int generateHashTable(char *fileName, char *indexName) if (!initLoadingRefGenome(fileName, genomeMetaInfo, &genomeMetaInfoLength)) return 0; initSavingIHashTable(indexName, genomeMetaInfo, genomeMetaInfoLength); - fprintf(stdout, "Generating Index from %s", fileName); - fflush(stdout); + fprintf(stderr, "Generating Index from %s", fileName); + fflush(stderr); do { @@ -244,14 +244,14 @@ int generateHashTable(char *fileName, char *indexName) if ( strcmp(prev, refGenName) != 0) { - fprintf(stdout, "\n - %s ", refGenName); - fflush(stdout); + fprintf(stderr, "\n - %s ", refGenName); + fflush(stderr); sprintf(prev, "%s", refGenName); } else { - fprintf(stdout, "."); - fflush(stdout); + fprintf(stderr, "."); + fflush(stderr); } c = refGen; @@ -296,7 +296,7 @@ int generateHashTable(char *fileName, char *indexName) finalizeLoadingRefGenome(); finalizeSavingIHashTable(); - fprintf(stdout, "\nDONE in %0.2fs!\n", (getTime()-startTime)); + fprintf(stderr, "\nDONE in %0.2fs!\n", (getTime()-startTime)); return 1; } /**********************************************/ @@ -315,12 +315,12 @@ int checkHashTable(char *fileName) tmp = fread(&magicNumber, sizeof(magicNumber), 1, _ih_fp); if (magicNumber == 1) { - fprintf(stdout, "Error: Please use version 1.2.6.4 in bisulfite mode.\n"); + fprintf(stderr, "Error: Please use version 1.2.6.4 in bisulfite mode.\n"); return 0; } else if (magicNumber == 0) { - fprintf(stdout, "Error: Please use version 2.x.x.x or upgrade your index.\n"); + fprintf(stderr, "Error: Please use version 2.x.x.x or upgrade your index.\n"); return 0; } diff --git a/Makefile b/Makefile index 2ccb759..d587cc7 100644 --- a/Makefile +++ b/Makefile @@ -1,16 +1,16 @@ -MRSFAST_VERSION := "3.4.0" +MRSFAST_VERSION := "3.4.8" BUILD_DATE := "$(shell date)" all: OPTIMIZE_FLAGS build debug: DEBUG_FLAGS build profile: PROFILE_FLAGS build -build: clean_executables SSE_FLAGS mrsfast snp_indexer clean_objects +build: clean_executables mrsfast snp_indexer clean_objects LDFLAGS=#-static -LIBS=-lz -lm -pthread -lpthread -CFLAGS=-fno-pic -DMRSFAST_VERSION=\"$(MRSFAST_VERSION)\" -DBUILD_DATE=\"$(BUILD_DATE)\" +LIBS=-lz -lm -pthread -lpthread -DSSE4=1 -msse4.2 +CFLAGS=-DMRSFAST_VERSION=\"$(MRSFAST_VERSION)\" -DBUILD_DATE=\"$(BUILD_DATE)\" -DSSE4=1 -msse4.2 -objects=baseFAST.o Sort.o MrsFAST.o Common.o CommandLineParser.o RefGenome.o HashTable.o Reads.o Output.o SNPReader.o HELP.o +objects=baseFAST.o Sort.o MrsFAST.o Common.o CommandLineParser.o RefGenome.o HashTable.o Reads.o Output.o SNPReader.o mrsfast: clean_executables $(objects) ifeq ($(shell uname -s),Linux) @@ -38,15 +38,6 @@ clean_executables: @rm -f mrsfast @rm -f snp_indexer -HELP.o: - @groff -Tascii -man ./HELP.man > HELP -ifeq ($(shell uname -s),Linux) - @ld -r -b binary -o HELP.o HELP -else - @touch HELPstub.c - gcc -o HELPstub.o -c HELPstub.c - ld -r -o HELP.o -sectcreate binary HELP HELP HELPstub.o -endif DEBUG_FLAGS: $(eval CFLAGS = $(CFLAGS) -ggdb) @@ -59,23 +50,3 @@ PROFILE_FLAGS: $(eval CFLAGS = $(CFLAGS) -pg -g) $(eval LIBS = $(LIBS) -pg -g) -SSE_FLAGS: -ifeq ($(shell uname -s),Linux) -ifeq ($(with-sse4),no) - $(shell echo "-DSSE4=0") -else - $(eval CFLAGS = $(CFLAGS) \ - $(shell gv=`gcc -dumpversion`; \ - sc=`grep -c "sse4" /proc/cpuinfo`; \ - echo $$sc.$$gv | awk -F. '{if($$1>0 && $$2>=4 && $$3>=4) print "-DSSE4=1 -msse4.2"; else print "-DSSE4=0"}')) -endif -else -ifeq ($(with-sse4),no) - $(shell echo "-DSSE4=0") -else - $(eval CFLAGS = $(CFLAGS) \ - $(shell gv=`gcc -dumpversion`; \ - sc=`sysctl -n machdep.cpu.features | grep -c "SSE4"` ;\ - echo $$sc.$$gv | awk -F. '{if($$1>0 && $$2>=4 && $$3>=4) print "-DSSE4=1 -msse4.2"; else print "-DSSE4=0"}')) -endif -endif diff --git a/MrsFAST.c b/MrsFAST.c index a546de1..68e8867 100644 --- a/MrsFAST.c +++ b/MrsFAST.c @@ -2692,34 +2692,34 @@ void outputBestPairedEnd() f1 = 1 + 4 + 8 + 64; f2 = 1 + 4 + 8 + 128; optSize1 = optSize2 = 0; - //fprintf(stdout, "unset\n"); + //fprintf(stderr, "unset\n"); break; case first_mate: f1 = 1 + 8 + 64 + ((_msf_bestMappingPE[i].dir1 == -1) ?16 :0); f2 = 1 + 4 + 128 + ((_msf_bestMappingPE[i].dir1 == -1) ?32 :0); optSize2 = 0; - //fprintf(stdout, "1stmate\n"); + //fprintf(stderr, "1stmate\n"); break; case second_mate: f1 = 1 + 4 + 64 + ((_msf_bestMappingPE[i].dir2 == -1) ?32 :0); f2 = 1 + 8 + 128 + ((_msf_bestMappingPE[i].dir2 == -1) ?16 :0); - //fprintf(stdout, "2ndmate\n"); + //fprintf(stderr, "2ndmate\n"); optSize1 = 0; break; case trans_loc: f1 = 1 + 64 + ((_msf_bestMappingPE[i].dir1 == -1) ?16 :0) + ((_msf_bestMappingPE[i].dir2 == -1) ?32 :0); f2 = 1 + 128 + ((_msf_bestMappingPE[i].dir2 == -1) ?16 :0) + ((_msf_bestMappingPE[i].dir1 == -1) ?32 :0); - //fprintf(stdout, "transLoc %d %d\n", f1, f2); + //fprintf(stderr, "transLoc %d %d\n", f1, f2); break; case improper: f1 = 1 + 64 + ((_msf_bestMappingPE[i].dir1 == -1) ?16 :0) + ((_msf_bestMappingPE[i].dir2 == -1) ?32 :0); f2 = 1 + 128 + ((_msf_bestMappingPE[i].dir2 == -1) ?16 :0) + ((_msf_bestMappingPE[i].dir1 == -1) ?32 :0); - //fprintf(stdout, "improper\n"); + //fprintf(stderr, "improper\n"); break; case proper: f1 = 1 + 2 + 64 + ((_msf_bestMappingPE[i].dir1 == -1) ?16 :0) + ((_msf_bestMappingPE[i].dir2 == -1) ?32 :0); f2 = 1 + 2 + 128 + ((_msf_bestMappingPE[i].dir2 == -1) ?16 :0) + ((_msf_bestMappingPE[i].dir1 == -1) ?32 :0); - //fprintf(stdout, "proper\n"); + //fprintf(stderr, "proper\n"); break; } //output best @@ -3415,7 +3415,7 @@ void updateBestPairedEnd() } -//fprintf(stdout, "HERE?\n"); +//fprintf(stderr, "HERE?\n"); /* pos = 0; for (j=0; j= maxPairEndedDiscordantDistance ) { event = 'D'; - //fprintf(stdout, "Deletion \n"); + //fprintf(stderr, "Deletion \n"); } else { event = 'I'; - //fprintf(stdout, "Insertion \n"); + //fprintf(stderr, "Insertion \n"); } } else if (loc2 < loc1) { - //fprintf(stdout, "> %d ", loc1-loc2-SEQ_LENGTH); + //fprintf(stderr, "> %d ", loc1-loc2-SEQ_LENGTH); if (dir2 == 'R' && dir1 == 'F') { event = 'E'; - //fprintf(stdout, "Everted \n"); + //fprintf(stderr, "Everted \n"); } else if ( loc1 - loc2 >= maxPairEndedDiscordantDistance ) { event = 'D'; - //fprintf(stdout, "Deletion \n"); + //fprintf(stderr, "Deletion \n"); } else { event = 'I'; - //fprintf(stdout, "Insertion \n"); + //fprintf(stderr, "Insertion \n"); } } } @@ -4080,7 +4080,7 @@ void calculateConcordantDistances() minPairEndedDistance = (int)(mu - 3*sigma); maxPairEndedDistance = (int)(mu + 3*sigma); - //fprintf(stdout, "cnt %d mu %lf sig %lf min %d max %d\n", cnt, mu, sigma, minPairEndedDistance, maxPairEndedDistance); + //fprintf(stderr, "cnt %d mu %lf sig %lf min %d max %d\n", cnt, mu, sigma, minPairEndedDistance, maxPairEndedDistance); FILE *out = fileOpen(concordantStatOutput, "w"); fprintf(out, "TotalNumbReads: 2*%d\n", (_msf_seqListSize / 2)); diff --git a/Output.c b/Output.c index b8358c7..86e2b5d 100644 --- a/Output.c +++ b/Output.c @@ -163,7 +163,11 @@ int initOutput ( char *fileName, int compressed) if (compressed) { char newFileName[strlen(mappingOutputPath)+strlen(fileName)+4]; - sprintf(newFileName, "%s%s.sam.gz", mappingOutputPath, fileName); + if (!isCloud) + sprintf(newFileName, "%s%s.sam.gz", mappingOutputPath, fileName); + else + sprintf(newFileName, "%s.sam.gz", fileName); + fprintf(stderr, "mappingOutputPath: %s\nfileName: %s\nnewFileName: %s\n", mappingOutputPath, fileName, newFileName); _out_gzfp = fileOpenGZ(newFileName, "w1f"); if (_out_gzfp == Z_NULL) { @@ -187,11 +191,28 @@ int initOutput ( char *fileName, int compressed) } else { - //sprintf(newFileName, "%s%s.sam", mappingOutputPath, fileName); - sprintf(newFileName, "%s%s", mappingOutputPath, fileName); + if (!isCloud){ + int fnlen = strlen(fileName); + if ( ! (fileName[fnlen-1]=='m' && fileName[fnlen-2]=='a' && fileName[fnlen-3]=='s' ) ) + sprintf(newFileName, "%s%s.sam", mappingOutputPath, fileName); + else + sprintf(newFileName, "%s%s", mappingOutputPath, fileName); + } + else{ + int fnlen = strlen(fileName); + if ( ! (fileName[fnlen-1]=='m' && fileName[fnlen-2]=='a' && fileName[fnlen-3]=='s' ) ) + sprintf(newFileName, "%s.sam", fileName); + else + sprintf(newFileName, "%s", fileName); + //sprintf(newFileName, "%s%s", mappingOutputPath, fileName); + } } - _out_fp = fileOpen(newFileName, "w"); + if (!strstr(newFileName, "/dev/stdout")) + _out_fp = fileOpen(newFileName, "w"); + else + _out_fp = stdout; + if (_out_fp == NULL) { return 0; diff --git a/Reads.c b/Reads.c index 2a7003e..2d91666 100644 --- a/Reads.c +++ b/Reads.c @@ -47,6 +47,7 @@ FILE *_r_fp1; FILE *_r_fp2; FILE *_r_umfp; +gzFile _r_gzumfp; gzFile _r_gzfp1; gzFile _r_gzfp2; Read *_r_seq; @@ -396,24 +397,24 @@ void calculateSamplingLocations() /*int j; for (i=0; i= SEQ_MAX_LENGTH ) { - fprintf(stdout, "ERR: Read Length is greater than the MAX length we can process (Current Max: %d).\n", SEQ_MAX_LENGTH); + fprintf(stderr, "ERR: Read Length is greater than the MAX length we can process (Current Max: %d).\n", SEQ_MAX_LENGTH); exit(EXIT_FAILURE); } @@ -562,12 +563,12 @@ int initRead(char *fileName1, char *fileName2) if (errThreshold == -1) { errThreshold = SEQ_LENGTH*6/100; - fprintf(stdout, "# Errors: %d\n", errThreshold); + fprintf(stderr, "# Errors: %d\n", errThreshold); } if (errThreshold > maxErrThreshold && SEQ_LENGTH>0) { errThreshold = maxErrThreshold; - fprintf(stdout, "# Error: %d (full sensitivity)\n", errThreshold); + fprintf(stderr, "# Error: %d (full sensitivity)\n", errThreshold); } @@ -580,7 +581,10 @@ int initRead(char *fileName1, char *fileName2) if (!nohitDisabled) { - _r_umfp = fileOpen(unmappedOutput, "w"); + if (!outCompressed) + _r_umfp = fileOpen(unmappedOutput, "w"); + else + _r_gzumfp = fileOpenGZ(unmappedOutput, "w"); } _r_alphIndex = getMem(128); // used in readChunk() @@ -617,6 +621,8 @@ int readChunk(Read **seqList, unsigned int *seqListSize) int i;//, len; int namelen; + int eliminate = 0; + while( (namelen = readFirstSeq(name1,1)) ) { @@ -643,10 +649,16 @@ int readChunk(Read **seqList, unsigned int *seqListSize) for (i=1; i 0) + eliminate = 1; + else + { + fprintf(stderr, "ERR: Inconsistent read length for %s\n", name1); + exit(EXIT_FAILURE); + } } if ( _r_fastq ) @@ -658,8 +670,11 @@ int readChunk(Read **seqList, unsigned int *seqListSize) { _r_seq[_r_seqCnt].qual = "*"; } - _r_seqCnt++; - + + if (eliminate) + freeMem(_r_seq[_r_seqCnt].hits, size); + else + _r_seqCnt++; if (pairedEndMode) { @@ -681,7 +696,7 @@ int readChunk(Read **seqList, unsigned int *seqListSize) if ( readSecondSeq(_r_seq[_r_seqCnt].seq,2) != SEQ_LENGTH) { - fprintf(stdout, "ERR: Inconsistent read length for %s\n", name1); + fprintf(stderr, "ERR: Inconsistent read length for %s\n", name1); exit(EXIT_FAILURE); } @@ -706,12 +721,12 @@ int readChunk(Read **seqList, unsigned int *seqListSize) if (_r_seqCnt > 0) { preProcessReadsMT(); - fprintf(stdout, "| *Reading Input* | %15.2f | XXXXXXXXXXXXXXX | %15.2f | XXXXXXXXXXXXXXX %15d |\n", (getTime()-startTime), getMemUsage(), _r_seqCnt ); + fprintf(stderr, "| *Reading Input* | %15.2f | XXXXXXXXXXXXXXX | %15.2f | XXXXXXXXXXXXXXX %15d |\n", (getTime()-startTime), getMemUsage(), _r_seqCnt ); _r_firstIteration = 0; } else if (_r_firstIteration) { - fprintf(stdout, "ERR: No reads for mapping\n"); + fprintf(stderr, "ERR: No reads for mapping\n"); exit(EXIT_FAILURE); } @@ -736,22 +751,34 @@ void outputUnmapped() { if (_r_seq[2*i].hits[0] == 0 && _r_fastq) { - fprintf(_r_umfp,"@%s/1\n%s\n+\n%s\n@%s/2\n%s\n+\n%s\n", _r_seq[i*2].name, _r_seq[i*2].seq, _r_seq[i*2].qual, _r_seq[i*2].name, _r_seq[i*2+1].seq, _r_seq[i*2+1].qual); + if (!outCompressed) + fprintf(_r_umfp,"@%s/1\n%s\n+\n%s\n@%s/2\n%s\n+\n%s\n", _r_seq[i*2].name, _r_seq[i*2].seq, _r_seq[i*2].qual, _r_seq[i*2].name, _r_seq[i*2+1].seq, _r_seq[i*2+1].qual); + else + gzprintf(_r_gzumfp,"@%s/1\n%s\n+\n%s\n@%s/2\n%s\n+\n%s\n", _r_seq[i*2].name, _r_seq[i*2].seq, _r_seq[i*2].qual, _r_seq[i*2].name, _r_seq[i*2+1].seq, _r_seq[i*2+1].qual); } else if (_r_seq[2*i].hits[0] == 0) { - fprintf(_r_umfp, ">%s/1\n%s\n>%s/2\n%s\n", _r_seq[i*2].name, _r_seq[i*2].seq, _r_seq[i*2].name, _r_seq[i*2+1].seq); + if (!outCompressed) + fprintf(_r_umfp, ">%s/1\n%s\n>%s/2\n%s\n", _r_seq[i*2].name, _r_seq[i*2].seq, _r_seq[i*2].name, _r_seq[i*2+1].seq); + else + gzprintf(_r_gzumfp, ">%s/1\n%s\n>%s/2\n%s\n", _r_seq[i*2].name, _r_seq[i*2].seq, _r_seq[i*2].name, _r_seq[i*2+1].seq); } } else { if (_r_seq[i].hits[0] == 0 && _r_fastq) { - fprintf(_r_umfp,"@%s\n%s\n+\n%s\n", _r_seq[i].name, _r_seq[i].seq, _r_seq[i].qual); + if (!outCompressed) + fprintf(_r_umfp,"@%s\n%s\n+\n%s\n", _r_seq[i].name, _r_seq[i].seq, _r_seq[i].qual); + else + gzprintf(_r_gzumfp,"@%s\n%s\n+\n%s\n", _r_seq[i].name, _r_seq[i].seq, _r_seq[i].qual); } else if (_r_seq[i].hits[0] == 0) { - fprintf(_r_umfp,">%s\n%s\n", _r_seq[i].name, _r_seq[i].seq); + if (!outCompressed) + fprintf(_r_umfp,">%s\n%s\n", _r_seq[i].name, _r_seq[i].seq); + else + gzprintf(_r_gzumfp,">%s\n%s\n", _r_seq[i].name, _r_seq[i].seq); } } } @@ -830,7 +857,10 @@ void finalizeReads() if (!nohitDisabled) { - fclose(_r_umfp); + if (!outCompressed) + fclose(_r_umfp); + else + gzclose(_r_gzumfp); } } @@ -900,7 +930,7 @@ void finalizeReads() } } - fprintf(stdout, "Input check: OK\n"); + fprintf(stderr, "Input check: OK\n"); return 1; } */ diff --git a/RefGenome.c b/RefGenome.c index 8d1a9d2..427f7b4 100644 --- a/RefGenome.c +++ b/RefGenome.c @@ -177,7 +177,7 @@ int getGenomeMetaInfo(char *fileName, char *genomeMetaInfo, int *genomeMetaInfoL { if (ch != '>') { - fprintf(stdout, "Error: Wrong fasta format file\n"); + fprintf(stderr, "Error: Wrong fasta format file\n"); return 0; } } @@ -186,7 +186,7 @@ int getGenomeMetaInfo(char *fileName, char *genomeMetaInfo, int *genomeMetaInfoL rewind(_rg_fp); - fprintf(stdout, "Scanning the fasta file: "); + fprintf(stderr, "Scanning the fasta file: "); while( fscanf(_rg_fp, "%c", &ch) > 0 ) { if (!isspace(ch)) @@ -206,8 +206,8 @@ int getGenomeMetaInfo(char *fileName, char *genomeMetaInfo, int *genomeMetaInfoL genSize = (int *)(genomeMetaInfo + i); i += sizeof(int); *genSize = 0; - fprintf(stdout, "."); - fflush(stdout); + fprintf(stderr, "."); + fflush(stderr); } else { @@ -215,7 +215,7 @@ int getGenomeMetaInfo(char *fileName, char *genomeMetaInfo, int *genomeMetaInfoL } } } - fprintf(stdout, "\n"); + fprintf(stderr, "\n"); *genomeMetaInfoLength = i; rewind(_rg_fp); diff --git a/SNPIndexer.c b/SNPIndexer.c index c6ccfcc..ff188de 100644 --- a/SNPIndexer.c +++ b/SNPIndexer.c @@ -17,8 +17,8 @@ FILE *fileOpen(char *fileName, char *mode) fp = fopen (fileName, mode); if (fp == NULL) { - fprintf(stdout, "Error: Cannot Open file \"%s\"\n", fileName); - fflush(stdout); + fprintf(stderr, "Error: Cannot Open file \"%s\"\n", fileName); + fflush(stderr); exit(EXIT_FAILURE); } return fp; @@ -82,7 +82,7 @@ int main(int argc, char *argv[]) // read file, count number of chromosomes and their locations inFile = fileOpen(inFileName, "r"); - fprintf(stdout, "Pre-processing VCF file ...\n"); + fprintf(stderr, "Pre-processing VCF file ...\n"); while ( fgets(line, MAX_LINE_LENGTH, inFile) ) { @@ -113,29 +113,29 @@ int main(int argc, char *argv[]) } } - fprintf(stdout, "Chromosomes: %d\n", chrCount); - fprintf(stdout, "Valid SNP locations: %d\n", snpCount); + fprintf(stderr, "Chromosomes: %d\n", chrCount); + fprintf(stderr, "Valid SNP locations: %d\n", snpCount); // allocate SNPLocs for each chromosome for (i = 0; i < chrCount; i++) { chrInfo[i].snpLocs = malloc(chrInfo[i].locCnt * sizeof(SNPLoc)); chrInfo[i].locCnt = 0; - //fprintf(stdout, "%s\n", chrInfo[i].chrName); + //fprintf(stderr, "%s\n", chrInfo[i].chrName); } // read file again, fill locations rewind(inFile); i = 0; - fprintf(stdout, "Reading SNP locations "); - fflush(stdout); + fprintf(stderr, "Reading SNP locations "); + fflush(stderr); while ( fgets(line, MAX_LINE_LENGTH, inFile) ) { if (++i == PROGRESS_METER_UNIT) { - fprintf(stdout, "."); - fflush(stdout); + fprintf(stderr, "."); + fflush(stderr); i = 0; } if (line[0] == '#') // comment line @@ -157,7 +157,7 @@ int main(int argc, char *argv[]) fclose(inFile); // sort locations for each chromosome - fprintf(stdout, ".\nReformatting data ...\n"); + fprintf(stderr, ".\nReformatting data ...\n"); for (i = 0; i < chrCount; i++) { @@ -166,7 +166,7 @@ int main(int argc, char *argv[]) } // write to output file - fprintf(stdout, "Creating output in %s\n", outFileName); + fprintf(stderr, "Creating output in %s\n", outFileName); outFile = fileOpen(outFileName, "w"); fwrite(&chrCount, sizeof(int), 1, outFile); @@ -181,7 +181,7 @@ int main(int argc, char *argv[]) } fclose(outFile); - fprintf(stdout, "%u SNP locations registered successfully\n", snpCount); + fprintf(stderr, "%u SNP locations registered successfully\n", snpCount); freeMems(chrInfo, chrCount); return 0; diff --git a/SNPReader.c b/SNPReader.c index d7ba61d..e0d1f94 100644 --- a/SNPReader.c +++ b/SNPReader.c @@ -115,7 +115,7 @@ void initLoadingSNPs(char *fileName) else // not found { t = fread(dummy, sizeof(SNPLoc), locCnt, fp); // read dummy - fprintf(stdout, "Warning: chromosome %s is present in the SNP database but not found in the reference genome\n", cname); + fprintf(stderr, "Warning: chromosome %s is present in the SNP database but not found in the reference genome\n", cname); } } diff --git a/baseFAST.c b/baseFAST.c index f44eee1..0bfb44b 100644 --- a/baseFAST.c +++ b/baseFAST.c @@ -98,9 +98,9 @@ int main(int argc, char *argv[]) if (SNPMode) initLoadingSNPs(fileName[2]); - fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n"); - fprintf(stdout, "| %15s | %15s | %15s | %15s | %15s %15s |\n","Genome Name","Loading Time", "Mapping Time", "Memory Usage(M)","Total Mappings","Mapped reads"); - fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n"); + fprintf(stderr, "-----------------------------------------------------------------------------------------------------------\n"); + fprintf(stderr, "| %15s | %15s | %15s | %15s | %15s %15s |\n","Genome Name","Loading Time", "Mapping Time", "Memory Usage(M)","Total Mappings","Mapped reads"); + fprintf(stderr, "-----------------------------------------------------------------------------------------------------------\n"); mappingTime = 0; loadingTime = 0; @@ -136,9 +136,9 @@ int main(int argc, char *argv[]) totalLoadingTime += loadingTime; - fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", + fprintf(stderr, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", getRefGenomeName(),loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); - fflush(stdout); + fflush(stderr); loadingTime = 0; mappingTime = 0; @@ -146,9 +146,9 @@ int main(int argc, char *argv[]) } else if (progressRep) { - fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", + fprintf(stderr, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", getRefGenomeName(),loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); - fflush(stdout); + fflush(stderr); } } while (flag); @@ -165,15 +165,15 @@ int main(int argc, char *argv[]) if (SNPMode) finalizeSNPs(); - fprintf(stdout, "----------------------------------------------------------------------------------------------------------\n"); + fprintf(stderr, "----------------------------------------------------------------------------------------------------------\n"); - fprintf(stdout, "%19s%16.2f%18.2f\n\n", "Total:",totalLoadingTime, totalMappingTime); - fprintf(stdout, "%-30s%10.2f\n","Total Time:", totalMappingTime+totalLoadingTime); - fprintf(stdout, "%-30s%10d\n","Total No. of Reads:", totalNumOfReads); - fprintf(stdout, "%-30s%10lld\n","Total No. of Mappings:", mappingCnt); - //fprintf(stdout, "%-30s%10.0f\n","Avg No. of locations verified:", ceil((float)verificationCnt/totalNumOfReads)); + fprintf(stderr, "%19s%16.2f%18.2f\n\n", "Total:",totalLoadingTime, totalMappingTime); + fprintf(stderr, "%-30s%10.2f\n","Total Time:", totalMappingTime+totalLoadingTime); + fprintf(stderr, "%-30s%10d\n","Total No. of Reads:", totalNumOfReads); + fprintf(stderr, "%-30s%10lld\n","Total No. of Mappings:", mappingCnt); + //fprintf(stderr, "%-30s%10.0f\n","Avg No. of locations verified:", ceil((float)verificationCnt/totalNumOfReads)); if (memUsage > 0) - fprintf(stdout, "Memory Leak: %lld Bytes\n", memUsage); + fprintf(stderr, "Memory Leak: %lld Bytes\n", memUsage); } return 0;