Skip to content

Commit cbf0357

Browse files
committed
Merge branch 'addcode'
2 parents 62634a9 + ac15b79 commit cbf0357

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+160529
-6
lines changed

LICENSE.txt

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
The MIT License (MIT)
2+
3+
Copyright (c) 2013,2014 Gregory G. Faust
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in
13+
all copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21+
THE SOFTWARE.

Makefile

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# Determine the yaha build number
2+
BUILDNUM := 83
3+
# Decide if we are compiling for user mode, or internal lab usage.
4+
# This almost exclusively determines the options available to the user when running YAHA.
5+
# But also controls timing information.
6+
USERMODE := TRUE
7+
8+
# Set up the sub-directory names.
9+
SDIR := src
10+
ODIR := obj
11+
BDIR := bin
12+
13+
# Set up flags depending on mode
14+
ifdef USERMODE
15+
PROG := yaha
16+
CCFLAGS := -Wall -O3 -D COMPILE_USER_MODE -D BUILDNUM=$(BUILDNUM)
17+
else
18+
PROG := yaha$(BUILDNUM)
19+
CCFLAGS := -Wall -Winline -O3 -g -D BUILDNUM=$(BUILDNUM)
20+
endif
21+
22+
# Auto build dependency files.
23+
CCFLAGS += -MMD -MP
24+
25+
CC := gcc
26+
CPP := g++
27+
CFLAGS := $(CCFLAGS) -std=gnu99
28+
CPPFLAGS := $(CCFLAGS)
29+
LDFLAGS := -pthread
30+
31+
# The list of object files.
32+
OBJfiles := Main.o AlignArgs.o AlignHelpers.o AlignExtFrag.o AlignOutput.o BaseSeq.o Compress.o \
33+
FileHelpers.o GraphPath.o Index.o Query.o QueryMatch.o QueryState.o SW.o Math.o
34+
# Convert to use the obj dir
35+
OBJS := $(patsubst %,$(ODIR)/%,$(OBJfiles))
36+
37+
# Make everything, all of which is defined below
38+
YAHA: $(ODIR) $(BDIR) $(BDIR)/$(PROG)
39+
40+
# Make the directories
41+
$(ODIR):
42+
mkdir -p $(ODIR)
43+
$(BDIR):
44+
mkdir -p $(BDIR)
45+
46+
# Link the program
47+
$(BDIR)/$(PROG): $(OBJS)
48+
$(CPP) $(LDFLAGS) -o $@ $(OBJS)
49+
50+
# Include the dependencies.
51+
# The actual objects will be built by the below generic rules based on these dependencies.
52+
# The - suppresses warning messages about missing files when building from scratch.
53+
-include $(OBJS:.o=.d)
54+
55+
# Make the object files.
56+
# The built in rules will miss the subdirectories.
57+
$(ODIR)/%.o: $(SDIR)/%.c
58+
$(CC) $(CFLAGS) -c $< -o $@
59+
60+
$(ODIR)/%.o: $(SDIR)/%.cpp
61+
$(CPP) $(CPPFLAGS) -c $< -o $@
62+
63+
.PHONY: clean
64+
clean:
65+
rm -Rf $(ODIR)
66+
rm -Rf $(BDIR)
67+

README.md

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,18 @@
77
[Faust G.G. and Hall I.M., "*YAHA*: fast and flexible long-read alignment with optimal breakpoint detection,"
88
*Bioinformatics* Oct. 2012; **28**(19): 2417-2424.](http://bioinformatics.oxfordjournals.org/content/28/19/2417)
99

10-
There is an extensive [User Guide](https://www.dropbox.com/s/bmyny13fcsnjuee/YAHA_User_Guide.0.1.82.pdf?dl=0)
10+
There is an extensive [User Guide](https://www.dropbox.com/s/7j758vpbaskcq20/YAHA_User_Guide.0.1.83.pdf?dl=0)
1111
that supplements the command information below.
1212
Click the preceeding link or download the file from this repository.
1313

1414
---
1515

16-
**Current version:** 0.1.82
16+
**Current version:** 0.1.83
1717

1818
Current support for Linux only.
1919

2020
##Summary
21-
*yaha* is a flexible, sensitive and accurate DNA aligner designed for single-end reads.
21+
*yaha* is an open source, flexible, sensitive and accurate DNA aligner designed for single-end reads.
2222
It supports three major modes of operation:
2323

2424
1. The default “Optimal Query Coverage” (**-OQC**) mode reports the best set of alignments that cover the length of each
@@ -32,13 +32,12 @@ identify structural variation events (deletions, duplications, insertions or inv
3232
between the subject query and the reference genome.
3333

3434
##Installation
35-
*yaha* is currently released as a executable binary only.
36-
3735
*yaha* can be downloaded from the **_releases_** tab or manually downloaded via *git clone*. For example:
3836
~~~~~~~~~~~~~~~~~~
3937
git clone git://github.com/GregoryFaust/yaha.git
4038
cd yaha
41-
cp yaha /usr/local/bin/.
39+
make
40+
cp bin/yaha /usr/local/bin/
4241
~~~~~~~~~~~~~~~~~~
4342

4443
##Usage

YAHA_User_Guide.0.1.82.pdf

-186 KB
Binary file not shown.

YAHA_User_Guide.0.1.83.pdf

191 KB
Binary file not shown.

src/AlignArgs.c

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
/* -*- Mode: C ; indent-tabs-mode: nil ; c-file-style: "stroustrup" ; column-number-mode: t -*-
2+
3+
Project: YAHA, DNA alignment tool designed to find optimal split-read mappings on single-end queries.
4+
Author: Greg Faust (gf4ea@virginia.edu)
5+
6+
File: AlignArgs.c Includes code for AlignmentArgs Structure.
7+
8+
License Information:
9+
10+
Copyright 2009-2015 Gregory G. Faust
11+
12+
Licensed under the MIT license (the "License");
13+
You may not use this file except in compliance with the License.
14+
You may obtain a copy of the License at http://opensource.org/licenses/MIT
15+
16+
*/
17+
18+
#include <stdlib.h>
19+
#include "Math.h"
20+
21+
/////
22+
// AlignmentArgs Structure.
23+
/////
24+
25+
#define DEFAULT_ARG_VALUE -1
26+
27+
AlignmentArgs_t * makeAlignmentArgs ()
28+
{
29+
AlignmentArgs_t * AAs = (AlignmentArgs_t *) malloc(sizeof(AlignmentArgs_t));
30+
31+
// Set all the default values.
32+
AAs->gfileName = NULL;
33+
AAs->xfileName = NULL;
34+
AAs->qfileName = "stdin";
35+
AAs->ofileName = NULL;
36+
AAs->numThreads = 1;
37+
AAs->fastq = FALSE;
38+
39+
#ifdef QUERYSTATS
40+
AAs->qsfileName = NULL;
41+
AAs->queryStats = FALSE;
42+
#endif
43+
44+
// Any defaults that depend on other parameters must be set after the program args are processed.
45+
// So, mark them as DEFAULT_ARG_VALUE
46+
47+
// Index parameters
48+
AAs->wordLen = 15;
49+
AAs->skipDist = 1;
50+
AAs->maxHits = DEFAULT_ARG_VALUE;
51+
52+
// General Alignment Parameters
53+
AAs->maxGap = 50;
54+
AAs->maxIntron = DEFAULT_ARG_VALUE;
55+
AAs->minMatch = 25;
56+
AAs->minIdentity = 0.9;
57+
AAs->bandWidth = 5;
58+
AAs->maxDesert = 50;
59+
AAs->minRawScore = DEFAULT_ARG_VALUE;
60+
AAs->minNonOverlap = DEFAULT_ARG_VALUE;
61+
62+
// These are the BWASW defaults.
63+
AAs->affineGapScoring = TRUE;
64+
AAs->GOCost = 5;
65+
AAs->GECost = 2;
66+
AAs->RCost = 3;
67+
AAs->MScore = 1;
68+
AAs->XCutoff = 25;
69+
70+
// These are for filtering of alignments against a "best" set of query spanning alignments.
71+
AAs->OQC = TRUE;
72+
AAs->OQCMinNonOverlap = DEFAULT_ARG_VALUE;
73+
AAs->BPCost = 5;
74+
AAs->maxBPLog = 5;
75+
// By default, output only primary alignments.
76+
// AAs->FBSMaxSimilar = 1;
77+
AAs->FBS = FALSE;
78+
AAs->FBS_PSLength = 0.90;
79+
AAs->FBS_PSScore = 0.90;
80+
81+
// For now, just make the default the max.
82+
AAs->maxQueryLength = 32000;
83+
AAs->verbose = FALSE;
84+
85+
AAs->outputBlast8 = FALSE;
86+
AAs->outputSAM = TRUE;
87+
AAs->hardClip = TRUE;
88+
89+
return AAs;
90+
}
91+
92+
void disposeAlignmentArgs(AlignmentArgs_t * AAs)
93+
{
94+
// The name of the index file and output file are always generated.
95+
if (AAs->gfileName != NULL) free(AAs->gfileName);
96+
if (AAs->ofileName != NULL) free(AAs->ofileName);
97+
free(AAs);
98+
}
99+
100+
void printAlignmentArgs(AlignmentArgs_t * AAs, FILE * out)
101+
{
102+
if (AAs->gfileName != NULL) fprintf(out, "Genome filename: %s\n", AAs->gfileName);
103+
if (AAs->xfileName != NULL) fprintf(out, "Index filename: %s\n", AAs->xfileName);
104+
if (AAs->qfileName != NULL) fprintf(out, "Query filename: %s\n", AAs->qfileName);
105+
if (AAs->ofileName != NULL) fprintf(out, "Output filename: %s\n", AAs->ofileName);
106+
}
107+
108+
void postProcessAlignmentArgs(AlignmentArgs_t * AAs, BOOL query)
109+
{
110+
// Make sure values not specified for AAs have reasonable values.
111+
if (AAs->maxIntron == DEFAULT_ARG_VALUE)
112+
AAs->maxIntron = AAs->maxGap;
113+
if (AAs->minRawScore == DEFAULT_ARG_VALUE)
114+
AAs->minRawScore = AAs->minMatch;
115+
if (AAs->OQCMinNonOverlap == DEFAULT_ARG_VALUE)
116+
AAs->OQCMinNonOverlap = AAs->minMatch;
117+
if (AAs->OQCMinNonOverlap <= 0)
118+
{
119+
fprintf(stderr, "MNO parameter must be >=1. MNO=1 will be used.\n");
120+
AAs->OQCMinNonOverlap = 1;
121+
}
122+
if (AAs->minNonOverlap == DEFAULT_ARG_VALUE)
123+
{
124+
AAs->minNonOverlap = AAs->OQCMinNonOverlap; // 1;
125+
}
126+
if (!AAs->affineGapScoring)
127+
{
128+
// Simulate edit distance by appropriate setting of the scoring paramenters.
129+
// This is not well tested.
130+
AAs->MScore = 1;
131+
AAs->RCost = AAs->GECost = 1;
132+
AAs->GOCost = 0;
133+
}
134+
// Calculate the min extension length requiring DP.
135+
// Since we always dp perfect extensions, the next bp is a mismatch.
136+
// DP is only needed if the length of the extension is long enough
137+
// to have enough matches to make up for the cost of the mismatch.
138+
// The following is equivalent to ceiling(RCost/MScore) + 2;
139+
// We take the min of RCost and a single base gap cost.
140+
// But for any rational choice of the cost parameters, RCost <= AAs->GOCost + AAs->GECost.
141+
int len = 1;
142+
int score = 0;
143+
int target = MIN(AAs->RCost, AAs->GOCost + AAs->GECost);
144+
while (score <= target)
145+
{
146+
score += AAs->MScore;
147+
len += 1;
148+
}
149+
AAs->minExtLength = len;
150+
151+
if (AAs->maxHits == DEFAULT_ARG_VALUE)
152+
{
153+
if (query) AAs->maxHits = 650;
154+
else AAs->maxHits = SUINT_MAX_VALUE - 10;
155+
}
156+
else AAs->maxHits = MIN(AAs->maxHits, SUINT_MAX_VALUE - 10);
157+
// Only values between 1 and 9 make sense for this parameter.
158+
// For now, we will do the right thing with a warning.
159+
if (AAs->maxBPLog < 1)
160+
{
161+
fprintf(stderr, "MGDP parameter must be between 1 and 9 (inclusive). MGDP=1 will be used.\n");
162+
AAs->maxBPLog = 1;
163+
}
164+
if (AAs->maxBPLog > 9)
165+
{
166+
fprintf(stderr, "MGDP parameter must be between 1 and 9 (inclusive). MGDP=9 will be used.\n");
167+
AAs->maxBPLog = 9;
168+
}
169+
}

0 commit comments

Comments
 (0)