diff --git a/.gitignore b/.gitignore
index 3751d5ea..8b72265c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,4 @@ pll/libpll.a
pll_test/
workspace.code-workspace
.vscode
+build/**
diff --git a/CMakeLists.txt b/CMakeLists.txt
index fce4a26b..16d49c50 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -516,3 +516,5 @@ set(CPACK_STRIP_FILES TRUE)
include (CPack)
add_custom_target(dist COMMAND ${CMAKE_MAKE_PROGRAM} package_source)
+
+target_link_libraries(mpboot pthread)
diff --git a/README.md b/README.md
index d8c5336a..6a825dfc 100644
--- a/README.md
+++ b/README.md
@@ -12,6 +12,33 @@ MPBoot: Fast phylogenetic maximum parsimony tree inference and bootstrap approxi
* Run **make**
* You will find the executable named **mpboot** once the **make** command is done.
+## **PLACEMENT CORE**
+### **Parameter**
+* **-pp_on**: enable placement.
+* **-pp_n**: number of available samples on tree.
+* **-pp_k**: number of missing samples.
+* **-pp_tree**: tree file.
+* **-pp_orig_spr**: run spr without changing origin tree.
+* **-pp_test_spr**: enable check correct tree.
+* **-pp_origin**: origin tree file.
+* **-pp_zip_aln**: alignment's zip file.
+* **-pp_zip_tree**: tree's zip file.
+
+### **Command**
+* Add missing samples to existing tree:
+
+ ``./mpboot -s -pp_tree -pp_on -pp_n -pp_k ``
+
+ *Example:*
+
+ ``./mpboot -s data/test5/1/added5.vcf -pp_tree data/test5/1/origin5.fasta.treefile -pp_on -pp_k 5 -pp_n 5``
+* Check if tree unchanged
+
+ ``./mpboot -s -pp_tree -pp_origin -pp_on -pp_test_spr -pp_k -pp_orig_spr``
+
+ *Example:*
+
+ ``./mpboot -s data/test5/1/added5.vcf -pp_tree addedTree.txt -pp_origin data/test5/1/origin5.fasta.treefile -pp_on -pp_test_spr -pp_k 5 -pp_orig_spr``
diff --git a/phylonode.h b/phylonode.h
index ef77b22e..8051a788 100644
--- a/phylonode.h
+++ b/phylonode.h
@@ -13,6 +13,8 @@
#define PHYLONODE_H
#include "node.h"
+#include "queue"
+#include
typedef short int UBYTE;
@@ -21,7 +23,8 @@ A neighbor in a phylogenetic tree
@author BUI Quang Minh, Steffen Klaere, Arndt von Haeseler
*/
-class PhyloNeighbor : public Neighbor {
+class PhyloNeighbor : public Neighbor
+{
friend class PhyloNode;
friend class PhyloTree;
friend class IQTree;
@@ -37,7 +40,8 @@ class PhyloNeighbor : public Neighbor {
@param alength length of branch
*/
- PhyloNeighbor(Node *anode, double alength) : Neighbor(anode, alength) {
+ PhyloNeighbor(Node *anode, double alength) : Neighbor(anode, alength)
+ {
partial_lh = NULL;
partial_lh_computed = 0;
lh_scale_factor = 0.0;
@@ -50,7 +54,8 @@ class PhyloNeighbor : public Neighbor {
@param alength length of branch
@param aid branch ID
*/
- PhyloNeighbor(Node *anode, double alength, int aid) : Neighbor(anode, alength, aid) {
+ PhyloNeighbor(Node *anode, double alength, int aid) : Neighbor(anode, alength, aid)
+ {
partial_lh = NULL;
partial_lh_computed = 0;
lh_scale_factor = 0.0;
@@ -60,14 +65,16 @@ class PhyloNeighbor : public Neighbor {
/**
tell that the partial likelihood vector is not computed
*/
- inline void clearPartialLh() {
+ inline void clearPartialLh()
+ {
partial_lh_computed = 0;
}
/**
* tell that the partial likelihood vector is computed
*/
- inline void unclearPartialLh() {
+ inline void unclearPartialLh()
+ {
partial_lh_computed = 1;
}
@@ -78,7 +85,6 @@ class PhyloNeighbor : public Neighbor {
void clearForwardPartialLh(Node *dad);
private:
-
/**
true if the partial likelihood was computed
*/
@@ -104,6 +110,7 @@ class PhyloNeighbor : public Neighbor {
*/
UINT *partial_pars;
+ std::mutex partial_lh_mutex;
};
/**
@@ -111,7 +118,8 @@ A node in a phylogenetic tree
@author BUI Quang Minh, Steffen Klaere, Arndt von Haeseler
*/
-class PhyloNode : public Node {
+class PhyloNode : public Node
+{
friend class PhyloTree;
public:
@@ -153,8 +161,6 @@ class PhyloNode : public Node {
*/
virtual void addNeighbor(Node *node, double length, int id = -1);
-
-
/**
tell that all partial likelihood vectors below this node are not computed
*/
@@ -166,11 +172,9 @@ class PhyloNode : public Node {
void clearReversePartialLh(PhyloNode *dad);
};
-
/**
Node vector
*/
-typedef vector PhyloNodeVector;
-
+typedef vector PhyloNodeVector;
#endif
diff --git a/phylotree.cpp b/phylotree.cpp
index 3eb44929..4f959fd1 100644
--- a/phylotree.cpp
+++ b/phylotree.cpp
@@ -935,6 +935,200 @@ void PhyloTree::computePartialParsimony(PhyloNeighbor *dad_branch, PhyloNode *da
delete[] bits_entry;
}
+void PhyloTree::computePartialParsimonyMultiThreads(PhyloNeighbor *dad_branch, PhyloNode *dad, int startPtn, int endPtn) {
+ // don't recompute the parsimony
+ if (dad_branch->partial_lh_computed & 2)
+ return;
+ if (startPtn > endPtn) {
+ return;
+ }
+ Node *node = dad_branch->node;
+ //assert(node->degree() <= 3);
+ int ptn;
+ int nstates = aln->num_states;
+ int pars_size = getBitsBlockSize();
+ int entry_size = getBitsEntrySize();
+ int nptn = aln->size();
+ int ptn_pars_start_id = pars_size - nptn - 1;
+
+ assert(dad_branch->partial_pars);
+
+ if (nstates == 4 && aln->seq_type == SEQ_DNA && (node->isLeaf() || node->degree() == 3)) {
+ // ULTRAFAST VERSION FOR DNA, assuming that UINT is 32-bit integer
+ if (node->isLeaf() && dad) {
+ // external node
+ for (ptn = startPtn; ptn <= endPtn; ptn+=8) {
+ UINT states = 0;
+ int maxi = endPtn - ptn + 1;
+ if(maxi > 8) maxi = 8;
+ for (int i = 0; i < maxi; i++) {
+ UINT bit_state = dna_state_map[(aln->at(ptn+i))[node->id]];
+ states |= (bit_state << (i*4));
+ dad_branch->partial_pars[ptn_pars_start_id + ptn + i] = 0;
+ }
+ dad_branch->partial_pars[ptn/8] = states;
+ }
+ } else {
+ // internal node
+ memset(dad_branch->partial_pars + ptn_pars_start_id + startPtn, 0, (endPtn - startPtn + 1) * sizeof(int));
+ UINT *left = NULL, *right = NULL;
+ FOR_NEIGHBOR_IT(node, dad, it)if ((*it)->node->name != ROOT_NAME) {
+ computePartialParsimonyMultiThreads((PhyloNeighbor*) (*it), (PhyloNode*) node, startPtn, endPtn);
+ if (!left)
+ left = ((PhyloNeighbor*) (*it))->partial_pars;
+ else
+ right = ((PhyloNeighbor*) (*it))->partial_pars;
+ for(int p = startPtn; p <= endPtn; p++)
+ dad_branch->partial_pars[ptn_pars_start_id + p] += ((PhyloNeighbor*) (*it))->partial_pars[ptn_pars_start_id + p];
+ }
+ int pars_steps = 0;
+ for (ptn = startPtn; ptn <= endPtn; ptn+=8) {
+ UINT states_left = left[ptn/8];
+ UINT states_right = right[ptn/8];
+ UINT states_dad = 0;
+ int maxi = endPtn - ptn + 1;
+ if(maxi > 8) maxi = 8;
+ for (int i = 0; i < maxi; i++) {
+ UINT state_left = (states_left >> (i*4)) & 15;
+ UINT state_right = (states_right >> (i*4)) & 15;
+ UINT state_both = state_left | (state_right << 4);
+ states_dad |= dna_fitch_result[state_both] << (i*4);
+ pars_steps += dna_fitch_step[state_both] * aln->at(ptn+i).frequency;
+ dad_branch->partial_pars[ptn_pars_start_id + ptn + i] += dna_fitch_step[state_both];
+ }
+ dad_branch->partial_pars[ptn/8] = states_dad;
+ }
+ dad_branch->partial_lh_mutex.lock();
+ dad_branch->partial_pars[pars_size - 1] += pars_steps;
+ dad_branch->partial_lh_mutex.unlock();
+ }
+ return;
+ } // END OF DNA VERSION
+}
+
+vector > PhyloTree::initializeComputeParsimonyMultiThreads(PhyloNeighbor *dad_branch, PhyloNode *dad)
+{
+ PhyloNode *node = (PhyloNode*) dad_branch->node;
+ PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
+ vector > result;
+ queue > q;
+ int pars_size = getBitsBlockSize();
+
+ q.push(make_pair(dad_branch, dad));
+ q.push(make_pair(node_branch, node));
+ while(q.size()) {
+ PhyloNeighbor *cur_dad_branch = q.front().first;
+ PhyloNode *cur_dad = q.front().second;
+ q.pop();
+ if (cur_dad_branch->partial_lh_computed & 2)
+ continue;
+ cur_dad_branch->partial_pars[pars_size - 1] = 0;
+ result.push_back(make_pair(cur_dad_branch, cur_dad));
+
+ PhyloNode *cur_node = (PhyloNode*) cur_dad_branch->node;
+ if (cur_node->isLeaf() && cur_dad)
+ continue;
+ FOR_NEIGHBOR_IT(cur_node, cur_dad, it)if ((*it)->node->name != ROOT_NAME) {
+ q.push(make_pair((PhyloNeighbor*) (*it), cur_node));
+ }
+ }
+ return result;
+}
+
+void PhyloTree::finalizeComputeParsimonyMultiThreads(vector > topo_sorted_branches) {
+ int pars_size = getBitsBlockSize();
+ for (int i = (int)topo_sorted_branches.size() - 1; i >= 0; i--) {
+ PhyloNeighbor *dad_branch = topo_sorted_branches[i].first;
+ PhyloNode *dad = topo_sorted_branches[i].second;
+ dad_branch->partial_lh_computed |= 2;
+
+ PhyloNode *node = (PhyloNode*) dad_branch->node;
+ FOR_NEIGHBOR_IT(node, dad, it) if ((*it)->node->name != ROOT_NAME) {
+ dad_branch->partial_pars[pars_size - 1] += ((PhyloNeighbor*)(*it))->partial_pars[pars_size - 1];
+ }
+ }
+}
+
+int PhyloTree::computeParsimonyBranchMultiThreads(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst) {
+ PhyloNode *node = (PhyloNode*) dad_branch->node;
+ PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
+ assert(node_branch);
+ if (!central_partial_pars)
+ initializeAllPartialPars();
+ // swap node and dad if dad is a leaf
+ if (node->isLeaf()) {
+ PhyloNode *tmp_node = dad;
+ dad = node;
+ node = tmp_node;
+ PhyloNeighbor *tmp_nei = dad_branch;
+ dad_branch = node_branch;
+ node_branch = tmp_nei;
+ //cout << "swapped\n";
+ }
+
+ int nptn = aln->size();
+ if(!_pattern_pars) _pattern_pars = aligned_alloc(nptn+VCSIZE_USHORT);
+ memset(_pattern_pars, 0, sizeof(BootValTypePars) * (nptn+VCSIZE_USHORT));
+
+ vector > topo_sorted_branches = initializeComputeParsimonyMultiThreads(dad_branch, dad);
+ vector threads;
+ int ptnPerThread = (aln->size() + params->pp_thread - 1) / params->pp_thread;
+ if (ptnPerThread % 8 != 0) ptnPerThread += 8 - (ptnPerThread % 8);
+ // int ptnPerThread = aln->size();
+ for (int i = 0; i < params->pp_thread; i++) {
+ int startPtn = i * ptnPerThread;
+ int endPtn = min((int)aln->size(), (i + 1) * ptnPerThread) - 1;
+ if (startPtn > endPtn) continue;
+ if ((dad_branch->partial_lh_computed & 2) == 0) {
+ threads.push_back(thread(&PhyloTree::computePartialParsimonyMultiThreads, this, dad_branch, dad, startPtn, endPtn));
+ }
+ if ((node_branch->partial_lh_computed & 2) == 0) {
+ threads.push_back(thread(&PhyloTree::computePartialParsimonyMultiThreads, this, node_branch, node, startPtn, endPtn));
+ }
+ }
+ for (int i = 0; i < (int)threads.size(); ++i) {
+ threads[i].join();
+ }
+ finalizeComputeParsimonyMultiThreads(topo_sorted_branches);
+ // if ((dad_branch->partial_lh_computed & 2) == 0)
+ // computePartialParsimony(dad_branch, dad);
+ // if ((node_branch->partial_lh_computed & 2) == 0)
+ // computePartialParsimony(node_branch, node);
+ // // now combine likelihood at the branch
+
+ int pars_size = getBitsBlockSize();
+ int entry_size = getBitsEntrySize();
+ int ptn_pars_start_id = pars_size - nptn - 1;
+ //int nstates = aln->num_states;
+ int i, ptn;
+ int tree_pars = 0;
+
+ if (aln->num_states == 4 && aln->seq_type == SEQ_DNA) {
+ // ULTRAFAST VERSION FOR DNA
+ for (ptn = 0; ptn < aln->size(); ptn+=8) {
+ UINT states_left = node_branch->partial_pars[ptn/8];
+ UINT states_right = dad_branch->partial_pars[ptn/8];
+ UINT states_dad = 0;
+ int maxi = aln->size() - ptn;
+ if(maxi > 8) maxi = 8;
+ for (i = 0; i< maxi; i++) {
+ UINT state_left = (states_left >> (i*4)) & 15;
+ UINT state_right = (states_right >> (i*4)) & 15;
+ UINT state_both = state_left | (state_right << 4);
+ states_dad |= dna_fitch_result[state_both] << (i*4);
+ tree_pars += dna_fitch_step[state_both] * aln->at(ptn+i).frequency;
+ _pattern_pars[ptn + i] = node_branch->partial_pars[ptn_pars_start_id + ptn + i] +
+ dad_branch->partial_pars[ptn_pars_start_id + ptn + i] + dna_fitch_step[state_both];
+ }
+ }
+ }
+ if (branch_subst)
+ *branch_subst = tree_pars;
+
+ tree_pars += node_branch->partial_pars[pars_size - 1] + dad_branch->partial_pars[pars_size - 1];
+ return tree_pars;
+}
+
int PhyloTree::computeParsimonyBranch(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst) {
PhyloNode *node = (PhyloNode*) dad_branch->node;
PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
@@ -1057,7 +1251,7 @@ int PhyloTree::computeParsimony() {
int nptn = aln->size();
if(_pattern_pars == NULL) _pattern_pars = aligned_alloc(nptn + VCSIZE_USHORT);
- return computeParsimonyBranch((PhyloNeighbor*) root->neighbors[0], (PhyloNode*) root);
+ return computeParsimonyBranchMultiThreads((PhyloNeighbor*) root->neighbors[0], (PhyloNode*) root);
}
void PhyloTree::printParsimonyStates(PhyloNeighbor *dad_branch, PhyloNode *dad) {
diff --git a/phylotree.h b/phylotree.h
index 58ec5cb0..4a2cca4c 100644
--- a/phylotree.h
+++ b/phylotree.h
@@ -31,6 +31,7 @@
#include "optimization.h"
#include "model/rateheterogeneity.h"
#include "phyloanalysis.h"
+#include "thread"
const double MIN_BRANCH_LEN = 0.000001; // NEVER TOUCH THIS CONSTANT AGAIN PLEASE!
const double MAX_BRANCH_LEN = 100.0;
@@ -438,6 +439,14 @@ class PhyloTree : public MTree, public Optimization {
*/
virtual void computePartialParsimony(PhyloNeighbor *dad_branch, PhyloNode *dad);
+ /**
+ compute partial parsimony score of the subtree rooted at dad from startPtn to endPtn
+ @param dad_branch the branch leading to the subtree
+ @param dad its dad, used to direct the tranversal
+ @param startPtn start pattern
+ @param endPtn end pattern
+ */
+ void computePartialParsimonyMultiThreads(PhyloNeighbor *dad_branch, PhyloNode *dad, int startPtn, int endPtn);
/**
compute tree parsimony score on a branch
@@ -448,6 +457,29 @@ class PhyloTree : public MTree, public Optimization {
*/
virtual int computeParsimonyBranch(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst = NULL);
+ /**
+ compute tree parsimony score on a branch
+ @param dad_branch the branch leading to the subtree
+ @param dad its dad, used to direct the tranversal
+ @param branch_subst (OUT) if not NULL, the number of substitutions on this branch
+ @return parsimony score of the tree
+ */
+ int computeParsimonyBranchMultiThreads(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst = NULL);
+
+ /**
+ initialize information for computing parsimony score with multi-threads
+ @param dad_branch the branch leading to the subtree
+ @param dad its dad, used to direct the tranversal
+ @return a vector of pairs of (branch, node)
+ */
+ vector > initializeComputeParsimonyMultiThreads(PhyloNeighbor *dad_branch, PhyloNode *dad);
+
+ /**
+ finalize compute parsimony score with multi-threads
+ @param topo_sorted_branches a vector of pairs of (branch, node)
+ */
+ void finalizeComputeParsimonyMultiThreads(vector > topo_sorted_branches);
+
void printParsimonyStates(PhyloNeighbor *dad_branch = NULL, PhyloNode *dad = NULL);
diff --git a/tools.cpp b/tools.cpp
index d58577a6..c8c8bbd9 100644
--- a/tools.cpp
+++ b/tools.cpp
@@ -954,6 +954,13 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
//params.run_mode = BOTH_ALG;
continue;
}
+ if (strcmp(argv[cnt], "-pp_thread") == 0) {
+ cnt++;
+ if (cnt >= argc)
+ throw "Invalid thread number";
+ params.pp_thread = convert_int(argv[cnt]);
+ continue;
+ }
if (strcmp(argv[cnt], "-e") == 0) {
cnt++;
if (cnt >= argc)
diff --git a/tools.h b/tools.h
index 315db2dd..2926bcf2 100644
--- a/tools.h
+++ b/tools.h
@@ -411,6 +411,10 @@ extern int NNI_MAX_NR_STEP;
program parameters, everything is specified here
*/
struct Params {
+ /**
+ * Number of threads
+ */
+ int pp_thread = 1;
/**
* Number of starting parsimony trees