Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
165 changes: 80 additions & 85 deletions parstree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <cstring>
#include "parstree.h"
#include "tools.h"
#include "vectorclass/vectorclass.h"

ParsTree::ParsTree(): IQTree(){
cost_matrix = NULL;
Expand Down Expand Up @@ -112,7 +113,12 @@ int ParsTree::computeParsimony(){
int nptn = aln->size();
if(_pattern_pars == NULL) _pattern_pars = aligned_alloc<BootValTypePars>(nptn + VCSIZE_USHORT);

return computeParsimonyBranch((PhyloNeighbor*) root->neighbors[0], (PhyloNode*) root);
#ifdef __AVX
#define VECTORCLASS Vec8ui
#else
#define VECTORCLASS Vec4ui
#endif
return computeParsimonyBranch<VECTORCLASS>((PhyloNeighbor*) root->neighbors[0], (PhyloNode*) root);
}

//inline UINT computeCostFromChild(UINT child_cost, UINT transition_cost){
Expand All @@ -124,6 +130,7 @@ compute partial parsimony score of the subtree rooted at dad
@param dad_branch the branch leading to the subtree
@param dad its dad, used to direct the traversal
*/
template<class VECTORCLASS>
void ParsTree::computePartialParsimony(PhyloNeighbor *dad_branch, PhyloNode *dad){
// don't recompute the parsimony
if (dad_branch->partial_lh_computed & 2)
Expand Down Expand Up @@ -180,14 +187,14 @@ void ParsTree::computePartialParsimony(PhyloNeighbor *dad_branch, PhyloNode *dad
UINT *left = NULL, *right = NULL;

FOR_NEIGHBOR_IT(node, dad, it)if ((*it)->node->name != ROOT_NAME) {
computePartialParsimony((PhyloNeighbor*) (*it), (PhyloNode*) node);
computePartialParsimony<VECTORCLASS>((PhyloNeighbor*) (*it), (PhyloNode*) node);
if (!left)
left = ((PhyloNeighbor*)*it)->partial_pars;
else
right = ((PhyloNeighbor*)*it)->partial_pars;
}


// What is this case lolll ???
if (node->degree() > 3) {
FOR_NEIGHBOR_IT(node, dad, it) if ((*it)->node->name != ROOT_NAME) {
UINT *partial_pars_child = ((PhyloNeighbor*) (*it))->partial_pars;
Expand Down Expand Up @@ -216,62 +223,30 @@ void ParsTree::computePartialParsimony(PhyloNeighbor *dad_branch, PhyloNode *dad
// bifurcating node
assert(node->degree() == 3);

switch (nstates) {
case 4:
for (ptn = 0; ptn < aln->size(); ptn++){
// ignore const ptn because it does not affect pars score
if (aln->at(ptn).is_const) continue;
int ptn_start_index = ptn*4;

if (aln->num_states != 2) {
for(ptn = 0; ptn < aln->size(); ++ptn) {
int ptn_start_index = ptn * aln->num_states;
UINT *left_ptr = &left[ptn_start_index];
UINT *right_ptr = &right[ptn_start_index];
UINT *partial_pars_ptr = &partial_pars[ptn_start_index];
UINT *cost_matrix_ptr = cost_matrix;
UINT left_contrib, right_contrib;

for(i = 0; i < 4; i++){
// min(j->i) from child_branch
left_contrib = left_ptr[0] + cost_matrix_ptr[0];
right_contrib = right_ptr[0] + cost_matrix_ptr[0];
for(j = 1; j < 4; j++) {
UINT value = left_ptr[j] + cost_matrix_ptr[j];
left_contrib = min(value, left_contrib);
value = right_ptr[j] + cost_matrix_ptr[j];
right_contrib = min(value, right_contrib);
}
partial_pars_ptr[i] = left_contrib+right_contrib;
cost_matrix_ptr += 4;
fill(sLeft, sLeft + aln->num_states, INT32_MAX);
fill(sRight, sRight + aln->num_states, INT32_MAX);

for(int i = 0; i < aln->num_states; ++i) {
// transforming from child(i) -> dad(j)
for(int j = 0; j < aln->num_states; j += VECSIZE) {
VECTORCLASS* vLeft = (VECTORCLASS*) &sLeft[j];
VECTORCLASS* vRight = (VECTORCLASS*) &sRight[j];
*vLeft = min(*vLeft, *((VECTORCLASS*) &inversedSankoff[i][j]) + left_ptr[i]);
*vRight = min(*vRight, *((VECTORCLASS*) &inversedSankoff[i][j]) + right_ptr[i]);
}
}
}
break;
case 20:
for (ptn = 0; ptn < aln->size(); ptn++){
// ignore const ptn because it does not affect pars score
if (aln->at(ptn).is_const) continue;
int ptn_start_index = ptn*20;

UINT *left_ptr = &left[ptn_start_index];
UINT *right_ptr = &right[ptn_start_index];
UINT *partial_pars_ptr = &partial_pars[ptn_start_index];
UINT *cost_matrix_ptr = cost_matrix;
UINT left_contrib, right_contrib;

for(i = 0; i < 20; i++){
// min(j->i) from child_branch
left_contrib = left_ptr[0] + cost_matrix_ptr[0];
right_contrib = right_ptr[0] + cost_matrix_ptr[0];
for(j = 1; j < 20; j++) {
UINT value = left_ptr[j] + cost_matrix_ptr[j];
left_contrib = min(value, left_contrib);
value = right_ptr[j] + cost_matrix_ptr[j];
right_contrib = min(value, right_contrib);
}
partial_pars_ptr[i] = left_contrib+right_contrib;
cost_matrix_ptr += 20;
for(int i = 0; i < aln->num_states; ++i) {
partial_pars_ptr[i] = sLeft[i] + sRight[i];
}
}
break;
default:
} else {
for (ptn = 0; ptn < aln->size(); ptn++){
// ignore const ptn because it does not affect pars score
if (aln->at(ptn).is_const) continue;
Expand All @@ -297,9 +272,7 @@ void ParsTree::computePartialParsimony(PhyloNeighbor *dad_branch, PhyloNode *dad
cost_matrix_ptr += nstates;
}
}
break;
}

}
/*

Expand Down Expand Up @@ -436,14 +409,37 @@ compute tree parsimony score based on a particular branch
@param branch_subst (OUT) if not NULL, the number of substitutions on this branch
@return parsimony score of the tree
*/
template<class VECTORCLASS>
int ParsTree::computeParsimonyBranch(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst) {
PhyloNode *node = (PhyloNode*) dad_branch->node;
PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
assert(node_branch);

if (!central_partial_pars)
if (!central_partial_pars) {
initializeAllPartialPars();

VECSIZE = VECTORCLASS::size();
sitesPerVec = (VECSIZE + aln->num_states - 1) / aln->num_states;
vecsPerSite = (VECSIZE + aln->num_states - 1) / VECSIZE;
assert(sitesPerVec == 1 || vecsPerSite == 1);
extendedSankoff = new UINT* [aln->num_states];
inversedSankoff = new UINT* [aln->num_states];

int sankoff_size = aln->num_states * sitesPerVec;
while(sankoff_size % VECSIZE != 0) sankoff_size++;
sLeft = new UINT[sankoff_size];
sRight = new UINT[sankoff_size];

for(int i = 0; i < aln->num_states; ++i) {
extendedSankoff[i] = new UINT [sankoff_size];
inversedSankoff[i] = new UINT [aln->num_states];
for(int j = 0; j < sankoff_size; ++j) {
extendedSankoff[i][j] = cost_matrix[i * aln->num_states + j % aln->num_states];
}
for(int j = 0; j < aln->num_states; ++j) {
inversedSankoff[i][j] = cost_matrix[j * aln->num_states + i];
}
}
}
// DTH: I don't really understand what this is for. ###########
// swap node and dad if dad is a leaf
if (node->isLeaf()) {
Expand All @@ -461,9 +457,9 @@ int ParsTree::computeParsimonyBranch(PhyloNeighbor *dad_branch, PhyloNode *dad,
memset(_pattern_pars, 0, sizeof(BootValTypePars) * (nptn+VCSIZE_USHORT));

if ((dad_branch->partial_lh_computed & 2) == 0)
computePartialParsimony(dad_branch, dad);
computePartialParsimony<VECTORCLASS>(dad_branch, dad);
if ((node_branch->partial_lh_computed & 2) == 0)
computePartialParsimony(node_branch, node);
computePartialParsimony<VECTORCLASS>(node_branch, node);

// now combine likelihood at the branch
tree_pars = 0;
Expand All @@ -476,35 +472,35 @@ int ParsTree::computeParsimonyBranch(PhyloNeighbor *dad_branch, PhyloNode *dad,
exit(1);
}

switch (nstates) {
case 4:
for (ptn = 0; ptn < aln->size(); ptn++){
_pattern_pars[ptn] = 0;
if (aln->num_states != 2) {
for(ptn = 0; ptn < aln->size(); ++ptn) {
if (aln->at(ptn).is_const) continue;

int ptn_start_index = ptn * 4;
UINT *node_branch_ptr = &node_branch->partial_pars[ptn_start_index];
UINT *dad_branch_ptr = &dad_branch->partial_pars[ptn_start_index];
UINT *cost_matrix_ptr = cost_matrix;
UINT min_ptn_pars = UINT_MAX;
for(i = 0; i < 4; i++){
// min(j->i) from node_branch
UINT min_score = node_branch_ptr[0] + cost_matrix_ptr[0];
for(j = 1; j < 4; j++) {
UINT value = node_branch_ptr[j] + cost_matrix_ptr[j];
min_score = min(value, min_score);

}
ptn_partial_pars[i] = min_score + dad_branch_ptr[i];
min_ptn_pars = min(min_ptn_pars, ptn_partial_pars[i]);
cost_matrix_ptr += 4;
int ptn_start_index = ptn * aln->num_states;
UINT *left_ptr = &node_branch->partial_pars[ptn_start_index];
UINT *right_ptr = &dad_branch->partial_pars[ptn_start_index];

fill(sLeft, sLeft + aln->num_states, UINT_MAX);
fill(sRight, sRight + aln->num_states, UINT_MAX);

for(int i = 0; i < aln->num_states; ++i) {
// transforming from child(i) -> dad(j)
for(int j = 0; j < aln->num_states; j += VECSIZE) {
VECTORCLASS* vLeft = (VECTORCLASS*) &sLeft[j];
VECTORCLASS* vRight = (VECTORCLASS*) &sRight[j];
*vLeft = min(*vLeft, *((VECTORCLASS*) &inversedSankoff[i][j]) + left_ptr[i]);
*vRight = min(*vRight, *((VECTORCLASS*) &inversedSankoff[i][j]) + right_ptr[i]);
}
}
_pattern_pars[ptn] = min_ptn_pars;
tree_pars += min_ptn_pars * aln->at(ptn).frequency;
}
break;

default:
UINT min_pars = INT32_MAX;
for(int i = 0; i < aln->num_states; ++i) {
min_pars = min(min_pars, sLeft[i] + sRight[i]);
}
_pattern_pars[ptn] = min_pars;
tree_pars += min_pars * aln->at(ptn).frequency;
}
} else {
for (ptn = 0; ptn < aln->size(); ptn++){
_pattern_pars[ptn] = 0;
if (aln->at(ptn).is_const) continue;
Expand All @@ -529,7 +525,6 @@ int ParsTree::computeParsimonyBranch(PhyloNeighbor *dad_branch, PhyloNode *dad,
_pattern_pars[ptn] = min_ptn_pars;
tree_pars += min_ptn_pars * aln->at(ptn).frequency;
}
break;
}
if (branch_subst)
*branch_subst = tree_pars;
Expand Down Expand Up @@ -565,7 +560,7 @@ void ParsTree::initializeAllPartialPars(int &index, PhyloNode *node, PhyloNode *
node = (PhyloNode*) root;
// allocate the big central partial pars memory
if (!central_partial_pars) {
int memsize = (aln->getNSeq() - 1) * 4 * pars_block_size; // Important for handling informal NEXUS format (e.g. output of TNT)
int memsize = (aln->getNSeq() - 1) * 4 * pars_block_size + 32; // Important for handling informal NEXUS format (e.g. output of TNT)
if (verbose_mode >= VB_MED)
cout << "Allocating " << memsize * sizeof(UINT) << " bytes for partial parsimony vectors" << endl;
central_partial_pars = new UINT[memsize];
Expand Down
11 changes: 11 additions & 0 deletions parstree.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ class ParsTree: public IQTree {
@param dad_branch the branch leading to the subtree
@param dad its dad, used to direct the traversal
*/
template<class VECTORCLASS>
void computePartialParsimony(PhyloNeighbor *dad_branch, PhyloNode *dad);


Expand All @@ -64,6 +65,7 @@ class ParsTree: public IQTree {
@param branch_subst (OUT) if not NULL, the number of substitutions on this branch
@return parsimony score of the tree
*/
template<class VECTORCLASS>
int computeParsimonyBranch(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst = NULL);

/**
Expand Down Expand Up @@ -119,6 +121,15 @@ class ParsTree: public IQTree {
// int * cost_matrix; // Sep 2016: store cost matrix in 1D array
// int cost_nstates; // Sep 2016: # of states provided by cost matrix
UINT tree_pars;

UINT VECSIZE;
UINT sitesPerVec;
UINT vecsPerSite;
UINT** extendedSankoff;
UINT** inversedSankoff;

UINT *sLeft;
UINT *sRight;
};

#endif /* PARSTREE_H_ */
41 changes: 24 additions & 17 deletions test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,16 @@
void test(Params &params){
testWeightedParsimony(params);
// testTreeConvertTaxaToID(params);
if(params.remove_dup_seq){
testRemoveDuplicateSeq(params);
cout << "\nDONE testRemoveDuplicateSeq(). Check output at " << params.user_file << ".\n\n";
cout << "NOTE: If you want to remove duplicate sequences for MPBoot search algorithm, "
<< "the correct command is: \n"
<< "./mpboot -s " << params.aln_file << " -test_mode -remove_dup_seq " << params.user_file << "\n\n";
cout << "NOTE: If you want to remove duplicate sequences for MPBoot bootstrap approximation algorithm, "
<< "the correct command is: \n"
<< "./mpboot -s " << params.aln_file << " -test_mode -bb 1000 -remove_dup_seq " << params.user_file << "\n\n";
}
// if(params.remove_dup_seq){
// testRemoveDuplicateSeq(params);
// cout << "\nDONE testRemoveDuplicateSeq(). Check output at " << params.user_file << ".\n\n";
// cout << "NOTE: If you want to remove duplicate sequences for MPBoot search algorithm, "
// << "the correct command is: \n"
// << "./mpboot -s " << params.aln_file << " -test_mode -remove_dup_seq " << params.user_file << "\n\n";
// cout << "NOTE: If you want to remove duplicate sequences for MPBoot bootstrap approximation algorithm, "
// << "the correct command is: \n"
// << "./mpboot -s " << params.aln_file << " -test_mode -bb 1000 -remove_dup_seq " << params.user_file << "\n\n";
// }
}

// -s <alnfile> -test_mode <treefile> -cost <costfile>
Expand All @@ -27,24 +27,31 @@ void testWeightedParsimony(Params &params){

// initialize an ParsTree instance connecting with the alignment
ParsTree * ptree = new ParsTree(&alignment);
ParsTree * qtree = new ParsTree(&alignment);

// initialize the cost matrix
dynamic_cast<ParsTree *>(ptree)->initParsData(&params);
dynamic_cast<ParsTree *>(qtree)->initParsData(&params);

// read in a tree from user's file
ptree->readTree(params.user_file, params.is_rooted);
ptree->setAlignment(&alignment); // make BIG difference $$$$$$$$$$$$


qtree->readTree(params.user_file, params.is_rooted);
qtree->setAlignment(&alignment); // make BIG difference $$$$$$$$$$$$

// compute score by Sankoff algorithm
cout << "Score = " << ptree->computeParsimony() << endl;
cout << "Pattern score = ";
ptree->printPatternScore();
cout << "Score old = " << ptree->computeParsimony() << endl;

// cout << "Pattern score = ";
// ptree->printPatternScore();
cout << endl;

cout << "mst score = ";
for(int i = 0; i < ptree->aln->getNPattern(); i++)
// for(int i = 0; i < 6; i++)
cout << ptree->findMstScore(i) << ", ";
// cout << "mst score = ";
// for(int i = 0; i < ptree->aln->getNPattern(); i++)
// // for(int i = 0; i < 6; i++)
// cout << ptree->findMstScore(i) << ", ";
cout << endl;
delete ptree;

Expand Down