Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
479 changes: 478 additions & 1 deletion alignment.cpp

Large diffs are not rendered by default.

33 changes: 32 additions & 1 deletion alignment.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ const char STATE_INVALID = 127;
const int NUM_CHAR = 256;

enum SeqType {
SEQ_DNA, SEQ_PROTEIN, SEQ_BINARY, SEQ_MORPH, SEQ_MULTISTATE, SEQ_CODON, SEQ_UNKNOWN
SEQ_DNA, SEQ_PROTEIN, SEQ_BINARY, SEQ_MORPH, SEQ_MULTISTATE, SEQ_CODON, SEQ_UNKNOWN, SEQ_GAN
};


Expand Down Expand Up @@ -60,6 +60,15 @@ class Alignment : public vector<Pattern> {
*/
Alignment(char *filename, char *sequence_type, InputType &intype);

/**
constructor
@param filename file name
@param sequence_type type of the sequence, either "BIN", "DNA", "AA", or NULL
@param intype (OUT) input format of the file
@param gap_as_new check if gap character is considered as new character
*/
Alignment(char *filename, char *sequence_type, InputType &intype, bool &gap_as_new);

/**
destructor
*/
Expand Down Expand Up @@ -93,6 +102,8 @@ class Alignment : public vector<Pattern> {

int buildPattern(StrVector &sequences, char *sequence_type, int nseq, int nsite);

int buildPatternGAN(StrVector &sequences, char *sequence_type, int nseq, int nsite, bool gap_as_new);

/**
read the alignment in PHYLIP format
@param filename file name
Expand All @@ -101,6 +112,15 @@ class Alignment : public vector<Pattern> {
*/
int readPhylip(char *filename, char *sequence_type);

/**
read the alignment in PHYLIP format
@param filename file name
@param sequence_type type of the sequence, either "BIN", "DNA", "AA", or NULL
@param gap_as_new check if gap character is considered as new character
@return 1 on success, 0 on failure
*/
int readPhylipGAN(char *filename, char *sequence_type, bool gap_as_new);

/**
read the alignment in FASTA format
@param filename file name
Expand All @@ -109,6 +129,15 @@ class Alignment : public vector<Pattern> {
*/
int readFasta(char *filename, char *sequence_type);

/**
read the alignment in FASTA format
@param filename file name
@param sequence_type type of the sequence, either "BIN", "DNA", "AA", or NULL
@param gap_as_new check if gap character is considered as new character
@return 1 on success, 0 on failure
*/
int readFastaGAN(char *filename, char *sequence_type, bool gap_as_new);

/**
extract the alignment from a nexus data block, called by readNexus()
@param data_block data block of nexus file
Expand Down Expand Up @@ -140,6 +169,8 @@ class Alignment : public vector<Pattern> {
****************************************************************************/
SeqType detectSequenceType(StrVector &sequences);

SeqType detectSequenceTypeGAN(StrVector &sequences, bool gap_as_new);

void computeUnknownState();

void buildStateMap(char *map, SeqType seq_type);
Expand Down
2 changes: 2 additions & 0 deletions iqtree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,8 @@ void IQTree::createPLLPartition(Params &params, ostream &pllPartitionFileHandle)
} else {
model = "WAG";
}
} else if (aln->seq_type == SEQ_GAN) {
model = "GAN";
} else {
model = "WAG";
//outError("PLL currently only supports DNA/protein alignments");
Expand Down
3 changes: 3 additions & 0 deletions maalignment.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ class MaAlignment : public Alignment
MaAlignment() : Alignment() {};

MaAlignment(char *filename, char *sequence_type, InputType &intype) : Alignment(filename, sequence_type, intype){};

MaAlignment(char *filename, char *sequence_type, InputType &intype, bool &gap_as_new) : Alignment(filename, sequence_type, intype, gap_as_new){};


MaAlignment(Alignment &align) : Alignment(align){};

Expand Down
6 changes: 5 additions & 1 deletion mexttree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,11 @@ void MExtTree::generateRandomTree(TreeGenType tree_type, Params &params, bool bi
Alignment *alignment = NULL;
if (params.aln_file) {
// generate random tree with leaf sets taken from an alignment
alignment = new Alignment(params.aln_file, params.sequence_type, params.intype);
if (params.gap_as_new) {
alignment = new Alignment(params.aln_file, params.sequence_type, params.intype, params.gap_as_new);
} else {
alignment = new Alignment(params.aln_file, params.sequence_type, params.intype);
}
params.sub_size = alignment->getNSeq();
}
if (params.sub_size < 3) {
Expand Down
14 changes: 9 additions & 5 deletions model/modelfactory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ ModelSubst* ModelFactory::createModel(string model_str, StateFreqType freq_type,
model = new ModelCodon(model_str.c_str(), model_params, freq_type, freq_params, tree, count_rates);
} else if (tree->aln->seq_type == SEQ_MORPH) {
model = new ModelMorphology(model_str.c_str(), model_params, freq_type, freq_params, tree);
} else if (tree->aln->seq_type == SEQ_GAN) {
model = new ModelMorphology(model_str.c_str(), model_params, freq_type, freq_params, tree);
} else {
outError("Unsupported model type");
}
Expand All @@ -117,6 +119,7 @@ ModelFactory::ModelFactory(Params &params, PhyloTree *tree) {
else if (tree->aln->seq_type == SEQ_BINARY) model_str = "GTR2";
else if (tree->aln->seq_type == SEQ_CODON) model_str = "GY";
else if (tree->aln->seq_type == SEQ_MORPH) model_str = "MK";
else if (tree->aln->seq_type == SEQ_GAN) model_str = "MK";
else model_str = "JC";
if(!params.maximum_parsimony)
outWarning("Default model may be under-fitting. Use option '-m TEST' to select best-fit model.");
Expand All @@ -129,6 +132,7 @@ ModelFactory::ModelFactory(Params &params, PhyloTree *tree) {
case SEQ_BINARY: freq_type = FREQ_ESTIMATE; break; // default for binary: optimized frequencies
case SEQ_PROTEIN: freq_type = FREQ_USER_DEFINED; break; // default for protein: frequencies of the empirical AA matrix
case SEQ_MORPH: freq_type = FREQ_EQUAL; break;
case SEQ_GAN: freq_type = FREQ_EQUAL; break;
default: freq_type = FREQ_EMPIRICAL; break; // default for DNA and others: counted frequencies from alignment
}
}
Expand Down Expand Up @@ -347,11 +351,11 @@ ModelFactory::ModelFactory(Params &params, PhyloTree *tree) {

}

int ModelFactory::getNParameters() {
int df = model->getNDim() + site_rate->getNDim() + site_rate->phylo_tree->branchNum;
if (model->freq_type == FREQ_EMPIRICAL) df += model->num_states-1;
return df;
}
int ModelFactory::getNParameters() {
int df = model->getNDim() + site_rate->getNDim() + site_rate->phylo_tree->branchNum;
if (model->freq_type == FREQ_EMPIRICAL) df += model->num_states-1;
return df;
}
void ModelFactory::readSiteFreq(Alignment *aln, char* site_freq_file, IntVector &site_model, vector<double*> &freq_vec)
{
cout << "Reading site-specific state frequency file " << site_freq_file << " ..." << endl;
Expand Down
12 changes: 12 additions & 0 deletions parsmultistate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,15 @@ void doParsMultiState(Params &params) {
cout << "Parsimony score ver2 is: " << tree.computeParsimony() << endl;
//tree.printParsimonyStates();
}

void doParsMultiStateGAN(Params &params) {
cout << "Here\n";
Alignment alignment(params.aln_file, params.sequence_type, params.intype, params.gap_as_new);
TinaTree tree;
tree.readTree(params.user_file, params.is_rooted);
tree.setAlignment(&alignment);
tree.drawTree(cout);
cout << "Parsimony score is: " << tree.computeParsimonyScore() << endl;
cout << "Parsimony score ver2 is: " << tree.computeParsimony() << endl;
//tree.printParsimonyStates();
}
1 change: 1 addition & 0 deletions parsmultistate.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,6 @@
#include "tools.h"

void doParsMultiState(Params &params);
void doParsMultiStateGAN(Params &params);

#endif
104 changes: 96 additions & 8 deletions pda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1699,6 +1699,43 @@ void guidedBootstrap(Params &params)
cout << "Log of the probability of the new alignment is printed to: " << outProb_name << endl;
}

void guidedBootstrapGAN(Params &params)
{
MaAlignment inputAlign(params.aln_file,params.sequence_type, params.intype, params.gap_as_new);
inputAlign.readLogLL(params.siteLL_file);

string outFre_name = params.out_prefix;
outFre_name += ".patInfo";

inputAlign.printPatObsExpFre(outFre_name.c_str());

string gboAln_name = params.out_prefix;
gboAln_name += ".gbo";

MaAlignment gboAlign;
double prob;
gboAlign.generateExpectedAlignment(&inputAlign, prob);
gboAlign.printPhylip(gboAln_name.c_str());


string outProb_name = params.out_prefix;
outProb_name += ".gbo.logP";
try {
ofstream outProb;
outProb.exceptions(ios::failbit | ios::badbit);
outProb.open(outProb_name.c_str());
outProb.precision(10);
outProb << prob << endl;
outProb.close();
} catch (ios::failure) {
outError(ERR_WRITE_OUTPUT, outProb_name);
}

cout << "Information about patterns in the input alignment is printed to: " << outFre_name << endl;
cout << "A 'guided bootstrap' alignment is printed to: " << gboAln_name << endl;
cout << "Log of the probability of the new alignment is printed to: " << outProb_name << endl;
}

/**MINH ANH: to compute the probability of an alignment given the multinomial distribution of patterns frequencies derived from a reference alignment*/
void computeMulProb(Params &params)
{
Expand All @@ -1723,6 +1760,29 @@ void computeMulProb(Params &params)
cout << "The probability is printed to: " << outProb_name << endl;
}

void computeMulProbGAN(Params &params)
{
Alignment refAlign(params.second_align, params.sequence_type, params.intype, params.gap_as_new);
Alignment inputAlign(params.aln_file, params.sequence_type, params.intype, params.gap_as_new);
double prob;
inputAlign.multinomialProb(refAlign,prob);
//Printing
string outProb_name = params.out_prefix;
outProb_name += ".mprob";
try {
ofstream outProb;
outProb.exceptions(ios::failbit | ios::badbit);
outProb.open(outProb_name.c_str());
outProb.precision(10);
outProb << prob << endl;
outProb.close();
} catch (ios::failure) {
outError(ERR_WRITE_OUTPUT, outProb_name);
}
cout << "Probability of alignment " << params.aln_file << " given alignment " << params.second_align << " is: " << prob << endl;
cout << "The probability is printed to: " << outProb_name << endl;
}

void processNCBITree(Params &params) {
NCBITree tree;
Node *dad = tree.readNCBITree(params.user_file, params.ncbi_taxid, params.ncbi_taxon_level, params.ncbi_ignore_level);
Expand Down Expand Up @@ -2327,19 +2387,35 @@ int main(int argc, char *argv[])
} else if (params.do_pars_multistate) {
// cout << "Starting the test for computing concensus NOT from file:" << endl;
// testCompConsensus("test.fa.boottrees.treefile", "test.out", &params); // (const char * infile, const char * outfile, Params *params);
doParsMultiState(params);
if (params.gap_as_new) {
doParsMultiStateGAN(params);
} else {
doParsMultiState(params);
}
} else if(params.test_mode){
test(params);
} else if(params.print_site_pars_user_tree){
printSiteParsimonyUserTree(params);
} else if (params.compute_parsimony) {
computeUserTreeParsimomy(params);
if (params.gap_as_new) {
computeUserTreeParsimomyGAN(params);
} else {
computeUserTreeParsimomy(params);
}
}
else if (params.newick_to_tnt) {
convertNewickToTnt(params);
if (params.gap_as_new) {
convertNewickToTntGAN(params);
} else {
convertNewickToTnt(params);
}
}
else if (params.newick_to_nexus) {
convertNewickToNexus(params);
if (params.gap_as_new) {
convertNewickToNexusGAN(params);
} else {
convertNewickToNexus(params);
}
}
else if (params.rf_dist_mode != 0) {
computeRFDist(params);
Expand All @@ -2365,10 +2441,22 @@ int main(int argc, char *argv[])
} else if (params.aln_file || params.partition_file) {
if ((params.siteLL_file || params.second_align) && !params.gbo_replicates)
{
if (params.siteLL_file)
guidedBootstrap(params);
if (params.second_align)
computeMulProb(params);
if (params.siteLL_file) {
if (params.gap_as_new) {
guidedBootstrapGAN(params);
} else {
guidedBootstrap(params);
}
}

if (params.second_align) {
if (params.gap_as_new) {
computeMulProbGAN(params);
} else {
computeMulProb(params);
}
}

} else {
runPhyloAnalysis(params);
}
Expand Down
Loading