Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ pll/libpll.a
pll_test/
workspace.code-workspace
.vscode
build/**
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -516,3 +516,5 @@ set(CPACK_STRIP_FILES TRUE)
include (CPack)

add_custom_target(dist COMMAND ${CMAKE_MAKE_PROGRAM} package_source)

target_link_libraries(mpboot pthread)
27 changes: 27 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,33 @@ MPBoot: Fast phylogenetic maximum parsimony tree inference and bootstrap approxi
* Run **make**
* You will find the executable named **mpboot** once the **make** command is done.

## **PLACEMENT CORE**
### **Parameter**
* **-pp_on**: enable placement.
* **-pp_n**: number of available samples on tree.
* **-pp_k**: number of missing samples.
* **-pp_tree**: tree file.
* **-pp_orig_spr**: run spr without changing origin tree.
* **-pp_test_spr**: enable check correct tree.
* **-pp_origin**: origin tree file.
* **-pp_zip_aln**: alignment's zip file.
* **-pp_zip_tree**: tree's zip file.

### **Command**
* Add missing samples to existing tree:
<br>
``./mpboot -s <vcf file> -pp_tree <tree file> -pp_on -pp_n <existing samples> -pp_k <missing samples>``
<br>
*Example:*
<br>
``./mpboot -s data/test5/1/added5.vcf -pp_tree data/test5/1/origin5.fasta.treefile -pp_on -pp_k 5 -pp_n 5``
* Check if tree unchanged
<br>
``./mpboot -s <vcf file> -pp_tree <tree file> -pp_origin <origin tree file> -pp_on -pp_test_spr -pp_k <missing samples> -pp_orig_spr``
<br>
*Example:*
<br>
``./mpboot -s data/test5/1/added5.vcf -pp_tree addedTree.txt -pp_origin data/test5/1/origin5.fasta.treefile -pp_on -pp_test_spr -pp_k 5 -pp_orig_spr``
<hr>
<br><br><br>

Expand Down
28 changes: 16 additions & 12 deletions phylonode.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#define PHYLONODE_H

#include "node.h"
#include "queue"
#include <mutex>

typedef short int UBYTE;

Expand All @@ -21,7 +23,8 @@ A neighbor in a phylogenetic tree

@author BUI Quang Minh, Steffen Klaere, Arndt von Haeseler <minh.bui@univie.ac.at>
*/
class PhyloNeighbor : public Neighbor {
class PhyloNeighbor : public Neighbor
{
friend class PhyloNode;
friend class PhyloTree;
friend class IQTree;
Expand All @@ -37,7 +40,8 @@ class PhyloNeighbor : public Neighbor {

@param alength length of branch
*/
PhyloNeighbor(Node *anode, double alength) : Neighbor(anode, alength) {
PhyloNeighbor(Node *anode, double alength) : Neighbor(anode, alength)
{
partial_lh = NULL;
partial_lh_computed = 0;
lh_scale_factor = 0.0;
Expand All @@ -50,7 +54,8 @@ class PhyloNeighbor : public Neighbor {
@param alength length of branch
@param aid branch ID
*/
PhyloNeighbor(Node *anode, double alength, int aid) : Neighbor(anode, alength, aid) {
PhyloNeighbor(Node *anode, double alength, int aid) : Neighbor(anode, alength, aid)
{
partial_lh = NULL;
partial_lh_computed = 0;
lh_scale_factor = 0.0;
Expand All @@ -60,14 +65,16 @@ class PhyloNeighbor : public Neighbor {
/**
tell that the partial likelihood vector is not computed
*/
inline void clearPartialLh() {
inline void clearPartialLh()
{
partial_lh_computed = 0;
}

/**
* tell that the partial likelihood vector is computed
*/
inline void unclearPartialLh() {
inline void unclearPartialLh()
{
partial_lh_computed = 1;
}

Expand All @@ -78,7 +85,6 @@ class PhyloNeighbor : public Neighbor {
void clearForwardPartialLh(Node *dad);

private:

/**
true if the partial likelihood was computed
*/
Expand All @@ -104,14 +110,16 @@ class PhyloNeighbor : public Neighbor {
*/
UINT *partial_pars;

std::mutex partial_lh_mutex;
};

/**
A node in a phylogenetic tree

@author BUI Quang Minh, Steffen Klaere, Arndt von Haeseler <minh.bui@univie.ac.at>
*/
class PhyloNode : public Node {
class PhyloNode : public Node
{
friend class PhyloTree;

public:
Expand Down Expand Up @@ -153,8 +161,6 @@ class PhyloNode : public Node {
*/
virtual void addNeighbor(Node *node, double length, int id = -1);



/**
tell that all partial likelihood vectors below this node are not computed
*/
Expand All @@ -166,11 +172,9 @@ class PhyloNode : public Node {
void clearReversePartialLh(PhyloNode *dad);
};


/**
Node vector
*/
typedef vector<PhyloNode*> PhyloNodeVector;

typedef vector<PhyloNode *> PhyloNodeVector;

#endif
196 changes: 195 additions & 1 deletion phylotree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -935,6 +935,200 @@ void PhyloTree::computePartialParsimony(PhyloNeighbor *dad_branch, PhyloNode *da
delete[] bits_entry;
}

void PhyloTree::computePartialParsimonyMultiThreads(PhyloNeighbor *dad_branch, PhyloNode *dad, int startPtn, int endPtn) {
// don't recompute the parsimony
if (dad_branch->partial_lh_computed & 2)
return;
if (startPtn > endPtn) {
return;
}
Node *node = dad_branch->node;
//assert(node->degree() <= 3);
int ptn;
int nstates = aln->num_states;
int pars_size = getBitsBlockSize();
int entry_size = getBitsEntrySize();
int nptn = aln->size();
int ptn_pars_start_id = pars_size - nptn - 1;

assert(dad_branch->partial_pars);

if (nstates == 4 && aln->seq_type == SEQ_DNA && (node->isLeaf() || node->degree() == 3)) {
// ULTRAFAST VERSION FOR DNA, assuming that UINT is 32-bit integer
if (node->isLeaf() && dad) {
// external node
for (ptn = startPtn; ptn <= endPtn; ptn+=8) {
UINT states = 0;
int maxi = endPtn - ptn + 1;
if(maxi > 8) maxi = 8;
for (int i = 0; i < maxi; i++) {
UINT bit_state = dna_state_map[(aln->at(ptn+i))[node->id]];
states |= (bit_state << (i*4));
dad_branch->partial_pars[ptn_pars_start_id + ptn + i] = 0;
}
dad_branch->partial_pars[ptn/8] = states;
}
} else {
// internal node
memset(dad_branch->partial_pars + ptn_pars_start_id + startPtn, 0, (endPtn - startPtn + 1) * sizeof(int));
UINT *left = NULL, *right = NULL;
FOR_NEIGHBOR_IT(node, dad, it)if ((*it)->node->name != ROOT_NAME) {
computePartialParsimonyMultiThreads((PhyloNeighbor*) (*it), (PhyloNode*) node, startPtn, endPtn);
if (!left)
left = ((PhyloNeighbor*) (*it))->partial_pars;
else
right = ((PhyloNeighbor*) (*it))->partial_pars;
for(int p = startPtn; p <= endPtn; p++)
dad_branch->partial_pars[ptn_pars_start_id + p] += ((PhyloNeighbor*) (*it))->partial_pars[ptn_pars_start_id + p];
}
int pars_steps = 0;
for (ptn = startPtn; ptn <= endPtn; ptn+=8) {
UINT states_left = left[ptn/8];
UINT states_right = right[ptn/8];
UINT states_dad = 0;
int maxi = endPtn - ptn + 1;
if(maxi > 8) maxi = 8;
for (int i = 0; i < maxi; i++) {
UINT state_left = (states_left >> (i*4)) & 15;
UINT state_right = (states_right >> (i*4)) & 15;
UINT state_both = state_left | (state_right << 4);
states_dad |= dna_fitch_result[state_both] << (i*4);
pars_steps += dna_fitch_step[state_both] * aln->at(ptn+i).frequency;
dad_branch->partial_pars[ptn_pars_start_id + ptn + i] += dna_fitch_step[state_both];
}
dad_branch->partial_pars[ptn/8] = states_dad;
}
dad_branch->partial_lh_mutex.lock();
dad_branch->partial_pars[pars_size - 1] += pars_steps;
dad_branch->partial_lh_mutex.unlock();
}
return;
} // END OF DNA VERSION
}

vector<pair<PhyloNeighbor*, PhyloNode*> > PhyloTree::initializeComputeParsimonyMultiThreads(PhyloNeighbor *dad_branch, PhyloNode *dad)
{
PhyloNode *node = (PhyloNode*) dad_branch->node;
PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
vector<pair<PhyloNeighbor*, PhyloNode*> > result;
queue<pair<PhyloNeighbor*, PhyloNode*> > q;
int pars_size = getBitsBlockSize();

q.push(make_pair(dad_branch, dad));
q.push(make_pair(node_branch, node));
while(q.size()) {
PhyloNeighbor *cur_dad_branch = q.front().first;
PhyloNode *cur_dad = q.front().second;
q.pop();
if (cur_dad_branch->partial_lh_computed & 2)
continue;
cur_dad_branch->partial_pars[pars_size - 1] = 0;
result.push_back(make_pair(cur_dad_branch, cur_dad));

PhyloNode *cur_node = (PhyloNode*) cur_dad_branch->node;
if (cur_node->isLeaf() && cur_dad)
continue;
FOR_NEIGHBOR_IT(cur_node, cur_dad, it)if ((*it)->node->name != ROOT_NAME) {
q.push(make_pair((PhyloNeighbor*) (*it), cur_node));
}
}
return result;
}

void PhyloTree::finalizeComputeParsimonyMultiThreads(vector<pair<PhyloNeighbor*, PhyloNode*> > topo_sorted_branches) {
int pars_size = getBitsBlockSize();
for (int i = (int)topo_sorted_branches.size() - 1; i >= 0; i--) {
PhyloNeighbor *dad_branch = topo_sorted_branches[i].first;
PhyloNode *dad = topo_sorted_branches[i].second;
dad_branch->partial_lh_computed |= 2;

PhyloNode *node = (PhyloNode*) dad_branch->node;
FOR_NEIGHBOR_IT(node, dad, it) if ((*it)->node->name != ROOT_NAME) {
dad_branch->partial_pars[pars_size - 1] += ((PhyloNeighbor*)(*it))->partial_pars[pars_size - 1];
}
}
}

int PhyloTree::computeParsimonyBranchMultiThreads(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst) {
PhyloNode *node = (PhyloNode*) dad_branch->node;
PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
assert(node_branch);
if (!central_partial_pars)
initializeAllPartialPars();
// swap node and dad if dad is a leaf
if (node->isLeaf()) {
PhyloNode *tmp_node = dad;
dad = node;
node = tmp_node;
PhyloNeighbor *tmp_nei = dad_branch;
dad_branch = node_branch;
node_branch = tmp_nei;
//cout << "swapped\n";
}

int nptn = aln->size();
if(!_pattern_pars) _pattern_pars = aligned_alloc<BootValTypePars>(nptn+VCSIZE_USHORT);
memset(_pattern_pars, 0, sizeof(BootValTypePars) * (nptn+VCSIZE_USHORT));

vector<pair<PhyloNeighbor*, PhyloNode*> > topo_sorted_branches = initializeComputeParsimonyMultiThreads(dad_branch, dad);
vector<thread> threads;
int ptnPerThread = (aln->size() + params->pp_thread - 1) / params->pp_thread;
if (ptnPerThread % 8 != 0) ptnPerThread += 8 - (ptnPerThread % 8);
// int ptnPerThread = aln->size();
for (int i = 0; i < params->pp_thread; i++) {
int startPtn = i * ptnPerThread;
int endPtn = min((int)aln->size(), (i + 1) * ptnPerThread) - 1;
if (startPtn > endPtn) continue;
if ((dad_branch->partial_lh_computed & 2) == 0) {
threads.push_back(thread(&PhyloTree::computePartialParsimonyMultiThreads, this, dad_branch, dad, startPtn, endPtn));
}
if ((node_branch->partial_lh_computed & 2) == 0) {
threads.push_back(thread(&PhyloTree::computePartialParsimonyMultiThreads, this, node_branch, node, startPtn, endPtn));
}
}
for (int i = 0; i < (int)threads.size(); ++i) {
threads[i].join();
}
finalizeComputeParsimonyMultiThreads(topo_sorted_branches);
// if ((dad_branch->partial_lh_computed & 2) == 0)
// computePartialParsimony(dad_branch, dad);
// if ((node_branch->partial_lh_computed & 2) == 0)
// computePartialParsimony(node_branch, node);
// // now combine likelihood at the branch

int pars_size = getBitsBlockSize();
int entry_size = getBitsEntrySize();
int ptn_pars_start_id = pars_size - nptn - 1;
//int nstates = aln->num_states;
int i, ptn;
int tree_pars = 0;

if (aln->num_states == 4 && aln->seq_type == SEQ_DNA) {
// ULTRAFAST VERSION FOR DNA
for (ptn = 0; ptn < aln->size(); ptn+=8) {
UINT states_left = node_branch->partial_pars[ptn/8];
UINT states_right = dad_branch->partial_pars[ptn/8];
UINT states_dad = 0;
int maxi = aln->size() - ptn;
if(maxi > 8) maxi = 8;
for (i = 0; i< maxi; i++) {
UINT state_left = (states_left >> (i*4)) & 15;
UINT state_right = (states_right >> (i*4)) & 15;
UINT state_both = state_left | (state_right << 4);
states_dad |= dna_fitch_result[state_both] << (i*4);
tree_pars += dna_fitch_step[state_both] * aln->at(ptn+i).frequency;
_pattern_pars[ptn + i] = node_branch->partial_pars[ptn_pars_start_id + ptn + i] +
dad_branch->partial_pars[ptn_pars_start_id + ptn + i] + dna_fitch_step[state_both];
}
}
}
if (branch_subst)
*branch_subst = tree_pars;

tree_pars += node_branch->partial_pars[pars_size - 1] + dad_branch->partial_pars[pars_size - 1];
return tree_pars;
}

int PhyloTree::computeParsimonyBranch(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst) {
PhyloNode *node = (PhyloNode*) dad_branch->node;
PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
Expand Down Expand Up @@ -1057,7 +1251,7 @@ int PhyloTree::computeParsimony() {
int nptn = aln->size();
if(_pattern_pars == NULL) _pattern_pars = aligned_alloc<BootValTypePars>(nptn + VCSIZE_USHORT);

return computeParsimonyBranch((PhyloNeighbor*) root->neighbors[0], (PhyloNode*) root);
return computeParsimonyBranchMultiThreads((PhyloNeighbor*) root->neighbors[0], (PhyloNode*) root);
}

void PhyloTree::printParsimonyStates(PhyloNeighbor *dad_branch, PhyloNode *dad) {
Expand Down
Loading