From 5408eb03bcb6ea031ba39e54a0f9dff7a78053fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ph=E1=BA=A1m=20Xu=C3=A2n=20Trung?= <66519569+trungnotchung@users.noreply.github.com> Date: Thu, 25 Jan 2024 10:54:30 +0700 Subject: [PATCH 01/14] Update README.md --- README.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/README.md b/README.md index d8c5336a..6a825dfc 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,33 @@ MPBoot: Fast phylogenetic maximum parsimony tree inference and bootstrap approxi * Run **make** * You will find the executable named **mpboot** once the **make** command is done. +## **PLACEMENT CORE** +### **Parameter** +* **-pp_on**: enable placement. +* **-pp_n**: number of available samples on tree. +* **-pp_k**: number of missing samples. +* **-pp_tree**: tree file. +* **-pp_orig_spr**: run spr without changing origin tree. +* **-pp_test_spr**: enable check correct tree. +* **-pp_origin**: origin tree file. +* **-pp_zip_aln**: alignment's zip file. +* **-pp_zip_tree**: tree's zip file. + +### **Command** +* Add missing samples to existing tree: +
+ ``./mpboot -s -pp_tree -pp_on -pp_n -pp_k `` +
+ *Example:* +
+ ``./mpboot -s data/test5/1/added5.vcf -pp_tree data/test5/1/origin5.fasta.treefile -pp_on -pp_k 5 -pp_n 5`` +* Check if tree unchanged +
+ ``./mpboot -s -pp_tree -pp_origin -pp_on -pp_test_spr -pp_k -pp_orig_spr`` +
+ *Example:* +
+ ``./mpboot -s data/test5/1/added5.vcf -pp_tree addedTree.txt -pp_origin data/test5/1/origin5.fasta.treefile -pp_on -pp_test_spr -pp_k 5 -pp_orig_spr``



From 39140c0c9afe06b6a6b9698c7352eb23e6d09c02 Mon Sep 17 00:00:00 2001 From: trungnotchung Date: Fri, 18 Oct 2024 11:45:18 +0700 Subject: [PATCH 02/14] feat: compute partial parsimony multi threads topo sort --- phylonode.h | 4 +++- phylotree.cpp | 64 +++++++++++++++++++++++++++++++++++++++++++++++---- phylotree.h | 5 ++++ tools.cpp | 7 ++++++ tools.h | 4 ++++ 5 files changed, 79 insertions(+), 5 deletions(-) diff --git a/phylonode.h b/phylonode.h index ef77b22e..a99fd094 100644 --- a/phylonode.h +++ b/phylonode.h @@ -13,6 +13,7 @@ #define PHYLONODE_H #include "node.h" +#include "queue" typedef short int UBYTE; @@ -103,7 +104,6 @@ class PhyloNeighbor : public Neighbor { vector containing the partial parsimony scores */ UINT *partial_pars; - }; /** @@ -164,6 +164,8 @@ class PhyloNode : public Node { tell that all partial likelihood vectors (in reverse direction) below this node are not computed */ void clearReversePartialLh(PhyloNode *dad); + + int dependency; }; diff --git a/phylotree.cpp b/phylotree.cpp index 3eb44929..3f2483b9 100644 --- a/phylotree.cpp +++ b/phylotree.cpp @@ -935,6 +935,60 @@ void PhyloTree::computePartialParsimony(PhyloNeighbor *dad_branch, PhyloNode *da delete[] bits_entry; } +vector > PhyloTree::breadthFirstExpansion(PhyloNeighbor *dad_branch, PhyloNode *dad) +{ + PhyloNode *node = (PhyloNode*) dad_branch->node; + PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad); + vector > result; + queue > q; + + q.push(make_pair(dad_branch, dad)); + q.push(make_pair(node_branch, node)); + node->dependency = 0; + dad->dependency = 0; + while(q.size()) { + PhyloNeighbor *cur_dad_branch = q.front().first; + PhyloNode *cur_dad = q.front().second; + q.pop(); + if (cur_dad_branch->partial_lh_computed & 2) + continue; + cur_dad->dependency++; + result.push_back(make_pair(cur_dad_branch, cur_dad)); + + PhyloNode *cur_node = (PhyloNode*) cur_dad_branch->node; + cur_node->dependency = 0; + if (cur_node->isLeaf() && cur_dad) + continue; + FOR_NEIGHBOR_IT(cur_node, cur_dad, it)if ((*it)->node->name != ROOT_NAME) { + q.push(make_pair((PhyloNeighbor*) (*it), cur_node)); + } + } + return result; +} + +void PhyloTree::computeParsimonyMultiThread(vector > &branch_list) { + while (branch_list.size()) { + vector threads; + for (int i = 0; i < params->pp_thread; ++i) { + if (branch_list.empty()) + break; + PhyloNeighbor *dad_branch = branch_list.back().first; + PhyloNode *dad = branch_list.back().second; + PhyloNode *node = (PhyloNode*) dad_branch->node; + assert(node->dependency >= 0); + if (node->dependency == 0) { + branch_list.pop_back(); + threads.push_back(thread(&PhyloTree::computePartialParsimony, this, dad_branch, dad)); + dad->dependency--; + } else { + break; + } + } + for (auto &thread : threads) + thread.join(); + } +} + int PhyloTree::computeParsimonyBranch(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst) { PhyloNode *node = (PhyloNode*) dad_branch->node; PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad); @@ -956,10 +1010,12 @@ int PhyloTree::computeParsimonyBranch(PhyloNeighbor *dad_branch, PhyloNode *dad, if(!_pattern_pars) _pattern_pars = aligned_alloc(nptn+VCSIZE_USHORT); memset(_pattern_pars, 0, sizeof(BootValTypePars) * (nptn+VCSIZE_USHORT)); - if ((dad_branch->partial_lh_computed & 2) == 0) - computePartialParsimony(dad_branch, dad); - if ((node_branch->partial_lh_computed & 2) == 0) - computePartialParsimony(node_branch, node); + vector > bfs = breadthFirstExpansion(dad_branch, dad); + computeParsimonyMultiThread(bfs); + // if ((dad_branch->partial_lh_computed & 2) == 0) + // computePartialParsimony(dad_branch, dad); + // if ((node_branch->partial_lh_computed & 2) == 0) + // computePartialParsimony(node_branch, node); // now combine likelihood at the branch int pars_size = getBitsBlockSize(); diff --git a/phylotree.h b/phylotree.h index 58ec5cb0..b68c16ea 100644 --- a/phylotree.h +++ b/phylotree.h @@ -31,6 +31,7 @@ #include "optimization.h" #include "model/rateheterogeneity.h" #include "phyloanalysis.h" +#include "thread" const double MIN_BRANCH_LEN = 0.000001; // NEVER TOUCH THIS CONSTANT AGAIN PLEASE! const double MAX_BRANCH_LEN = 100.0; @@ -448,6 +449,10 @@ class PhyloTree : public MTree, public Optimization { */ virtual int computeParsimonyBranch(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst = NULL); + void computeParsimonyMultiThread(vector > &branch_list); + + vector > breadthFirstExpansion(PhyloNeighbor *dad_branch, PhyloNode *dad); + void printParsimonyStates(PhyloNeighbor *dad_branch = NULL, PhyloNode *dad = NULL); diff --git a/tools.cpp b/tools.cpp index d58577a6..c8c8bbd9 100644 --- a/tools.cpp +++ b/tools.cpp @@ -954,6 +954,13 @@ void parseArg(int argc, char *argv[], Params ¶ms) { //params.run_mode = BOTH_ALG; continue; } + if (strcmp(argv[cnt], "-pp_thread") == 0) { + cnt++; + if (cnt >= argc) + throw "Invalid thread number"; + params.pp_thread = convert_int(argv[cnt]); + continue; + } if (strcmp(argv[cnt], "-e") == 0) { cnt++; if (cnt >= argc) diff --git a/tools.h b/tools.h index 315db2dd..58e4e7d9 100644 --- a/tools.h +++ b/tools.h @@ -411,6 +411,10 @@ extern int NNI_MAX_NR_STEP; program parameters, everything is specified here */ struct Params { + /** + * Number of threads + */ + int pp_thread; /** * Number of starting parsimony trees From dfa03f15128555598f063b534462976fa657827d Mon Sep 17 00:00:00 2001 From: trungnotchung Date: Fri, 18 Oct 2024 11:51:23 +0700 Subject: [PATCH 03/14] fix: multi threads conflict --- phylotree.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/phylotree.cpp b/phylotree.cpp index 3f2483b9..ecf3a04b 100644 --- a/phylotree.cpp +++ b/phylotree.cpp @@ -969,6 +969,7 @@ vector > PhyloTree::breadthFirstExpansion(Phylo void PhyloTree::computeParsimonyMultiThread(vector > &branch_list) { while (branch_list.size()) { vector threads; + vector save_dads; for (int i = 0; i < params->pp_thread; ++i) { if (branch_list.empty()) break; @@ -979,13 +980,16 @@ void PhyloTree::computeParsimonyMultiThread(vectordependency == 0) { branch_list.pop_back(); threads.push_back(thread(&PhyloTree::computePartialParsimony, this, dad_branch, dad)); - dad->dependency--; + save_dads.push_back(dad); } else { break; } } for (auto &thread : threads) thread.join(); + for (auto &dad : save_dads) { + dad->dependency--; + } } } From fe2b992ecdfafef14668bd17dcd639d14f43affa Mon Sep 17 00:00:00 2001 From: trungnotchung Date: Thu, 27 Mar 2025 16:00:10 +0700 Subject: [PATCH 04/14] initialize before start compute parsimony --- phylotree.cpp | 9 ++------- phylotree.h | 2 +- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/phylotree.cpp b/phylotree.cpp index ecf3a04b..783cd6a6 100644 --- a/phylotree.cpp +++ b/phylotree.cpp @@ -935,7 +935,7 @@ void PhyloTree::computePartialParsimony(PhyloNeighbor *dad_branch, PhyloNode *da delete[] bits_entry; } -vector > PhyloTree::breadthFirstExpansion(PhyloNeighbor *dad_branch, PhyloNode *dad) +vector > PhyloTree::initializeComputeParsimony(PhyloNeighbor *dad_branch, PhyloNode *dad) { PhyloNode *node = (PhyloNode*) dad_branch->node; PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad); @@ -1014,13 +1014,8 @@ int PhyloTree::computeParsimonyBranch(PhyloNeighbor *dad_branch, PhyloNode *dad, if(!_pattern_pars) _pattern_pars = aligned_alloc(nptn+VCSIZE_USHORT); memset(_pattern_pars, 0, sizeof(BootValTypePars) * (nptn+VCSIZE_USHORT)); - vector > bfs = breadthFirstExpansion(dad_branch, dad); + vector > bfs = initializeComputeParsimony(dad_branch, dad); computeParsimonyMultiThread(bfs); - // if ((dad_branch->partial_lh_computed & 2) == 0) - // computePartialParsimony(dad_branch, dad); - // if ((node_branch->partial_lh_computed & 2) == 0) - // computePartialParsimony(node_branch, node); - // now combine likelihood at the branch int pars_size = getBitsBlockSize(); int entry_size = getBitsEntrySize(); diff --git a/phylotree.h b/phylotree.h index b68c16ea..35596653 100644 --- a/phylotree.h +++ b/phylotree.h @@ -451,7 +451,7 @@ class PhyloTree : public MTree, public Optimization { void computeParsimonyMultiThread(vector > &branch_list); - vector > breadthFirstExpansion(PhyloNeighbor *dad_branch, PhyloNode *dad); + vector > initializeComputeParsimony(PhyloNeighbor *dad_branch, PhyloNode *dad); void printParsimonyStates(PhyloNeighbor *dad_branch = NULL, PhyloNode *dad = NULL); From f9f0721d31c3c32ff5faec7ecb46a8e46f8b274d Mon Sep 17 00:00:00 2001 From: trungnotchung Date: Thu, 27 Mar 2025 16:00:27 +0700 Subject: [PATCH 05/14] update git ignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 3751d5ea..8b72265c 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ pll/libpll.a pll_test/ workspace.code-workspace .vscode +build/** From ddcd6096c95a54457adf5152274b5ff1188ac5b5 Mon Sep 17 00:00:00 2001 From: trungnotchung Date: Thu, 27 Mar 2025 16:57:20 +0700 Subject: [PATCH 06/14] update code structure --- phylotree.cpp | 135 ++++++++++++++++++++++++++++++++++++++++++++++++-- phylotree.h | 17 ++++++- 2 files changed, 146 insertions(+), 6 deletions(-) diff --git a/phylotree.cpp b/phylotree.cpp index 783cd6a6..dac1f8fd 100644 --- a/phylotree.cpp +++ b/phylotree.cpp @@ -935,6 +935,75 @@ void PhyloTree::computePartialParsimony(PhyloNeighbor *dad_branch, PhyloNode *da delete[] bits_entry; } +void PhyloTree::computePartialParsimonyMultiThreads(PhyloNeighbor *dad_branch, PhyloNode *dad, int startPtn, int endPtn) { + // don't recompute the parsimony + if (dad_branch->partial_lh_computed & 2) + return; + Node *node = dad_branch->node; + //assert(node->degree() <= 3); + int ptn; + int nstates = aln->num_states; + int pars_size = getBitsBlockSize(); + int entry_size = getBitsEntrySize(); + int nptn = aln->size(); + int ptn_pars_start_id = pars_size - nptn - 1; + + assert(dad_branch->partial_pars); + dad_branch->partial_lh_computed |= 2; + + if (nstates == 4 && aln->seq_type == SEQ_DNA && (node->isLeaf() || node->degree() == 3)) { + // ULTRAFAST VERSION FOR DNA, assuming that UINT is 32-bit integer + if (node->isLeaf() && dad) { + // external node + for (ptn = 0; ptn < aln->size(); ptn+=8) { + UINT states = 0; + int maxi = aln->size() - ptn; + if(maxi > 8) maxi = 8; + for (int i = 0; i< maxi; i++) { + UINT bit_state = dna_state_map[(aln->at(ptn+i))[node->id]]; + states |= (bit_state << (i*4)); + dad_branch->partial_pars[ptn_pars_start_id + ptn + i] = 0; + } + dad_branch->partial_pars[ptn/8] = states; + } + dad_branch->partial_pars[pars_size - 1] = 0; // set subtree score = 0 + } else { + // internal node + memset(dad_branch->partial_pars + ptn_pars_start_id, 0, nptn * sizeof(int)); + UINT *left = NULL, *right = NULL; + int pars_steps = 0; + FOR_NEIGHBOR_IT(node, dad, it)if ((*it)->node->name != ROOT_NAME) { + computePartialParsimony((PhyloNeighbor*) (*it), (PhyloNode*) node); + if (!left) + left = ((PhyloNeighbor*) (*it))->partial_pars; + else + right = ((PhyloNeighbor*) (*it))->partial_pars; + pars_steps += ((PhyloNeighbor*) (*it))->partial_pars[pars_size-1]; + for(int p = 0; p < nptn; p++) + dad_branch->partial_pars[ptn_pars_start_id + p] += ((PhyloNeighbor*) (*it))->partial_pars[ptn_pars_start_id + p]; + } + for (ptn = 0; ptn < aln->size(); ptn+=8) { + UINT states_left = left[ptn/8]; + UINT states_right = right[ptn/8]; + UINT states_dad = 0; + int maxi = aln->size() - ptn; + if(maxi > 8) maxi = 8; + for (int i = 0; i< maxi; i++) { + UINT state_left = (states_left >> (i*4)) & 15; + UINT state_right = (states_right >> (i*4)) & 15; + UINT state_both = state_left | (state_right << 4); + states_dad |= dna_fitch_result[state_both] << (i*4); + pars_steps += dna_fitch_step[state_both] * aln->at(ptn+i).frequency; + dad_branch->partial_pars[ptn_pars_start_id + ptn + i] += dna_fitch_step[state_both]; + } + dad_branch->partial_pars[ptn/8] = states_dad; + } + dad_branch->partial_pars[pars_size - 1] = pars_steps; + } + return; + } // END OF DNA VERSION +} + vector > PhyloTree::initializeComputeParsimony(PhyloNeighbor *dad_branch, PhyloNode *dad) { PhyloNode *node = (PhyloNode*) dad_branch->node; @@ -966,7 +1035,28 @@ vector > PhyloTree::initializeComputeParsimony( return result; } -void PhyloTree::computeParsimonyMultiThread(vector > &branch_list) { +int PhyloTree::computeParsimonyBranchMultiThreads(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst) { + PhyloNode *node = (PhyloNode*) dad_branch->node; + PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad); + assert(node_branch); + if (!central_partial_pars) + initializeAllPartialPars(); + // swap node and dad if dad is a leaf + if (node->isLeaf()) { + PhyloNode *tmp_node = dad; + dad = node; + node = tmp_node; + PhyloNeighbor *tmp_nei = dad_branch; + dad_branch = node_branch; + node_branch = tmp_nei; + //cout << "swapped\n"; + } + + int nptn = aln->size(); + if(!_pattern_pars) _pattern_pars = aligned_alloc(nptn+VCSIZE_USHORT); + memset(_pattern_pars, 0, sizeof(BootValTypePars) * (nptn+VCSIZE_USHORT)); + + vector > branch_list = initializeComputeParsimony(dad_branch, dad); while (branch_list.size()) { vector threads; vector save_dads; @@ -979,7 +1069,7 @@ void PhyloTree::computeParsimonyMultiThread(vectordependency >= 0); if (node->dependency == 0) { branch_list.pop_back(); - threads.push_back(thread(&PhyloTree::computePartialParsimony, this, dad_branch, dad)); + threads.push_back(thread(&PhyloTree::computePartialParsimonyMultiThreads, this, dad_branch, dad, 0, 0)); save_dads.push_back(dad); } else { break; @@ -991,6 +1081,38 @@ void PhyloTree::computeParsimonyMultiThread(vectordependency--; } } + + int pars_size = getBitsBlockSize(); + int entry_size = getBitsEntrySize(); + int ptn_pars_start_id = pars_size - nptn - 1; + //int nstates = aln->num_states; + int i, ptn; + int tree_pars = 0; + + if (aln->num_states == 4 && aln->seq_type == SEQ_DNA) { + // ULTRAFAST VERSION FOR DNA + for (ptn = 0; ptn < aln->size(); ptn+=8) { + UINT states_left = node_branch->partial_pars[ptn/8]; + UINT states_right = dad_branch->partial_pars[ptn/8]; + UINT states_dad = 0; + int maxi = aln->size() - ptn; + if(maxi > 8) maxi = 8; + for (i = 0; i< maxi; i++) { + UINT state_left = (states_left >> (i*4)) & 15; + UINT state_right = (states_right >> (i*4)) & 15; + UINT state_both = state_left | (state_right << 4); + states_dad |= dna_fitch_result[state_both] << (i*4); + tree_pars += dna_fitch_step[state_both] * aln->at(ptn+i).frequency; + _pattern_pars[ptn + i] = node_branch->partial_pars[ptn_pars_start_id + ptn + i] + + dad_branch->partial_pars[ptn_pars_start_id + ptn + i] + dna_fitch_step[state_both]; + } + } + } + if (branch_subst) + *branch_subst = tree_pars; + + tree_pars += node_branch->partial_pars[pars_size - 1] + dad_branch->partial_pars[pars_size - 1]; + return tree_pars; } int PhyloTree::computeParsimonyBranch(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst) { @@ -1014,8 +1136,11 @@ int PhyloTree::computeParsimonyBranch(PhyloNeighbor *dad_branch, PhyloNode *dad, if(!_pattern_pars) _pattern_pars = aligned_alloc(nptn+VCSIZE_USHORT); memset(_pattern_pars, 0, sizeof(BootValTypePars) * (nptn+VCSIZE_USHORT)); - vector > bfs = initializeComputeParsimony(dad_branch, dad); - computeParsimonyMultiThread(bfs); + if ((dad_branch->partial_lh_computed & 2) == 0) + computePartialParsimony(dad_branch, dad); + if ((node_branch->partial_lh_computed & 2) == 0) + computePartialParsimony(node_branch, node); + // now combine likelihood at the branch int pars_size = getBitsBlockSize(); int entry_size = getBitsEntrySize(); @@ -1112,7 +1237,7 @@ int PhyloTree::computeParsimony() { int nptn = aln->size(); if(_pattern_pars == NULL) _pattern_pars = aligned_alloc(nptn + VCSIZE_USHORT); - return computeParsimonyBranch((PhyloNeighbor*) root->neighbors[0], (PhyloNode*) root); + return computeParsimonyBranchMultiThreads((PhyloNeighbor*) root->neighbors[0], (PhyloNode*) root); } void PhyloTree::printParsimonyStates(PhyloNeighbor *dad_branch, PhyloNode *dad) { diff --git a/phylotree.h b/phylotree.h index 35596653..eaba5566 100644 --- a/phylotree.h +++ b/phylotree.h @@ -439,6 +439,14 @@ class PhyloTree : public MTree, public Optimization { */ virtual void computePartialParsimony(PhyloNeighbor *dad_branch, PhyloNode *dad); + /** + compute partial parsimony score of the subtree rooted at dad from startPtn to endPtn + @param dad_branch the branch leading to the subtree + @param dad its dad, used to direct the tranversal + @param startPtn start pattern + @param endPtn end pattern + */ + void computePartialParsimonyMultiThreads(PhyloNeighbor *dad_branch, PhyloNode *dad, int startPtn, int endPtn); /** compute tree parsimony score on a branch @@ -449,7 +457,14 @@ class PhyloTree : public MTree, public Optimization { */ virtual int computeParsimonyBranch(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst = NULL); - void computeParsimonyMultiThread(vector > &branch_list); + /** + compute tree parsimony score on a branch + @param dad_branch the branch leading to the subtree + @param dad its dad, used to direct the tranversal + @param branch_subst (OUT) if not NULL, the number of substitutions on this branch + @return parsimony score of the tree + */ + int computeParsimonyBranchMultiThreads(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst = NULL); vector > initializeComputeParsimony(PhyloNeighbor *dad_branch, PhyloNode *dad); From 9165edce46208d17bed03df9b30f59a1fb1d2f1a Mon Sep 17 00:00:00 2001 From: trungnotchung Date: Thu, 27 Mar 2025 17:05:19 +0700 Subject: [PATCH 07/14] add default thread --- phylonode.h | 1 + tools.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/phylonode.h b/phylonode.h index a99fd094..589d7eae 100644 --- a/phylonode.h +++ b/phylonode.h @@ -104,6 +104,7 @@ class PhyloNeighbor : public Neighbor { vector containing the partial parsimony scores */ UINT *partial_pars; + }; /** diff --git a/tools.h b/tools.h index 58e4e7d9..2926bcf2 100644 --- a/tools.h +++ b/tools.h @@ -414,7 +414,7 @@ struct Params { /** * Number of threads */ - int pp_thread; + int pp_thread = 1; /** * Number of starting parsimony trees From 49a97c3c7c0715cba0e5bca964f24332ffd0df8f Mon Sep 17 00:00:00 2001 From: trungnotchung Date: Thu, 27 Mar 2025 19:07:13 +0700 Subject: [PATCH 08/14] update code structure --- phylotree.cpp | 18 +++++++++++------- phylotree.h | 14 +++++++++++++- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/phylotree.cpp b/phylotree.cpp index dac1f8fd..6cad51fa 100644 --- a/phylotree.cpp +++ b/phylotree.cpp @@ -1004,7 +1004,7 @@ void PhyloTree::computePartialParsimonyMultiThreads(PhyloNeighbor *dad_branch, P } // END OF DNA VERSION } -vector > PhyloTree::initializeComputeParsimony(PhyloNeighbor *dad_branch, PhyloNode *dad) +vector > PhyloTree::initializeComputeParsimonyMultiThreads(PhyloNeighbor *dad_branch, PhyloNode *dad) { PhyloNode *node = (PhyloNode*) dad_branch->node; PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad); @@ -1035,6 +1035,10 @@ vector > PhyloTree::initializeComputeParsimony( return result; } +void PhyloTree::finalizeComputeParsimonyMultiThreads(vector > topo_sorted_branches) { + +} + int PhyloTree::computeParsimonyBranchMultiThreads(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst) { PhyloNode *node = (PhyloNode*) dad_branch->node; PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad); @@ -1056,19 +1060,19 @@ int PhyloTree::computeParsimonyBranchMultiThreads(PhyloNeighbor *dad_branch, Phy if(!_pattern_pars) _pattern_pars = aligned_alloc(nptn+VCSIZE_USHORT); memset(_pattern_pars, 0, sizeof(BootValTypePars) * (nptn+VCSIZE_USHORT)); - vector > branch_list = initializeComputeParsimony(dad_branch, dad); - while (branch_list.size()) { + vector > topo_sorted_branches = initializeComputeParsimonyMultiThreads(dad_branch, dad); + while (topo_sorted_branches.size()) { vector threads; vector save_dads; for (int i = 0; i < params->pp_thread; ++i) { - if (branch_list.empty()) + if (topo_sorted_branches.empty()) break; - PhyloNeighbor *dad_branch = branch_list.back().first; - PhyloNode *dad = branch_list.back().second; + PhyloNeighbor *dad_branch = topo_sorted_branches.back().first; + PhyloNode *dad = topo_sorted_branches.back().second; PhyloNode *node = (PhyloNode*) dad_branch->node; assert(node->dependency >= 0); if (node->dependency == 0) { - branch_list.pop_back(); + topo_sorted_branches.pop_back(); threads.push_back(thread(&PhyloTree::computePartialParsimonyMultiThreads, this, dad_branch, dad, 0, 0)); save_dads.push_back(dad); } else { diff --git a/phylotree.h b/phylotree.h index eaba5566..4a2cca4c 100644 --- a/phylotree.h +++ b/phylotree.h @@ -466,7 +466,19 @@ class PhyloTree : public MTree, public Optimization { */ int computeParsimonyBranchMultiThreads(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst = NULL); - vector > initializeComputeParsimony(PhyloNeighbor *dad_branch, PhyloNode *dad); + /** + initialize information for computing parsimony score with multi-threads + @param dad_branch the branch leading to the subtree + @param dad its dad, used to direct the tranversal + @return a vector of pairs of (branch, node) + */ + vector > initializeComputeParsimonyMultiThreads(PhyloNeighbor *dad_branch, PhyloNode *dad); + + /** + finalize compute parsimony score with multi-threads + @param topo_sorted_branches a vector of pairs of (branch, node) + */ + void finalizeComputeParsimonyMultiThreads(vector > topo_sorted_branches); void printParsimonyStates(PhyloNeighbor *dad_branch = NULL, PhyloNode *dad = NULL); From 385e272961f406ade859e0db1767c2f887d28949 Mon Sep 17 00:00:00 2001 From: trungnotchung Date: Fri, 28 Mar 2025 20:54:25 +0700 Subject: [PATCH 09/14] compute parsimony score multi threads --- phylonode.h | 2 -- phylotree.cpp | 78 +++++++++++++++++++++++++-------------------------- 2 files changed, 38 insertions(+), 42 deletions(-) diff --git a/phylonode.h b/phylonode.h index 589d7eae..07fbf796 100644 --- a/phylonode.h +++ b/phylonode.h @@ -165,8 +165,6 @@ class PhyloNode : public Node { tell that all partial likelihood vectors (in reverse direction) below this node are not computed */ void clearReversePartialLh(PhyloNode *dad); - - int dependency; }; diff --git a/phylotree.cpp b/phylotree.cpp index 6cad51fa..940c4f38 100644 --- a/phylotree.cpp +++ b/phylotree.cpp @@ -939,6 +939,9 @@ void PhyloTree::computePartialParsimonyMultiThreads(PhyloNeighbor *dad_branch, P // don't recompute the parsimony if (dad_branch->partial_lh_computed & 2) return; + if (startPtn > endPtn) { + return; + } Node *node = dad_branch->node; //assert(node->degree() <= 3); int ptn; @@ -949,46 +952,43 @@ void PhyloTree::computePartialParsimonyMultiThreads(PhyloNeighbor *dad_branch, P int ptn_pars_start_id = pars_size - nptn - 1; assert(dad_branch->partial_pars); - dad_branch->partial_lh_computed |= 2; if (nstates == 4 && aln->seq_type == SEQ_DNA && (node->isLeaf() || node->degree() == 3)) { // ULTRAFAST VERSION FOR DNA, assuming that UINT is 32-bit integer if (node->isLeaf() && dad) { // external node - for (ptn = 0; ptn < aln->size(); ptn+=8) { + for (ptn = startPtn; ptn <= endPtn; ptn+=8) { UINT states = 0; - int maxi = aln->size() - ptn; + int maxi = endPtn - ptn + 1; if(maxi > 8) maxi = 8; - for (int i = 0; i< maxi; i++) { + for (int i = 0; i < maxi; i++) { UINT bit_state = dna_state_map[(aln->at(ptn+i))[node->id]]; states |= (bit_state << (i*4)); dad_branch->partial_pars[ptn_pars_start_id + ptn + i] = 0; } dad_branch->partial_pars[ptn/8] = states; } - dad_branch->partial_pars[pars_size - 1] = 0; // set subtree score = 0 } else { // internal node - memset(dad_branch->partial_pars + ptn_pars_start_id, 0, nptn * sizeof(int)); + memset(dad_branch->partial_pars + ptn_pars_start_id + startPtn, 0, (endPtn - startPtn + 1) * sizeof(int)); UINT *left = NULL, *right = NULL; - int pars_steps = 0; FOR_NEIGHBOR_IT(node, dad, it)if ((*it)->node->name != ROOT_NAME) { - computePartialParsimony((PhyloNeighbor*) (*it), (PhyloNode*) node); + computePartialParsimonyMultiThreads((PhyloNeighbor*) (*it), (PhyloNode*) node, startPtn, endPtn); if (!left) left = ((PhyloNeighbor*) (*it))->partial_pars; else right = ((PhyloNeighbor*) (*it))->partial_pars; - pars_steps += ((PhyloNeighbor*) (*it))->partial_pars[pars_size-1]; - for(int p = 0; p < nptn; p++) + for(int p = startPtn; p <= endPtn; p++) dad_branch->partial_pars[ptn_pars_start_id + p] += ((PhyloNeighbor*) (*it))->partial_pars[ptn_pars_start_id + p]; } - for (ptn = 0; ptn < aln->size(); ptn+=8) { + int pars_steps = 0; + for (ptn = startPtn; ptn <= endPtn; ptn+=8) { UINT states_left = left[ptn/8]; UINT states_right = right[ptn/8]; UINT states_dad = 0; - int maxi = aln->size() - ptn; + int maxi = endPtn - ptn + 1; if(maxi > 8) maxi = 8; - for (int i = 0; i< maxi; i++) { + for (int i = 0; i < maxi; i++) { UINT state_left = (states_left >> (i*4)) & 15; UINT state_right = (states_right >> (i*4)) & 15; UINT state_both = state_left | (state_right << 4); @@ -998,7 +998,7 @@ void PhyloTree::computePartialParsimonyMultiThreads(PhyloNeighbor *dad_branch, P } dad_branch->partial_pars[ptn/8] = states_dad; } - dad_branch->partial_pars[pars_size - 1] = pars_steps; + dad_branch->partial_pars[pars_size - 1] += pars_steps; } return; } // END OF DNA VERSION @@ -1010,22 +1010,20 @@ vector > PhyloTree::initializeComputeParsimonyM PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad); vector > result; queue > q; + int pars_size = getBitsBlockSize(); q.push(make_pair(dad_branch, dad)); q.push(make_pair(node_branch, node)); - node->dependency = 0; - dad->dependency = 0; while(q.size()) { PhyloNeighbor *cur_dad_branch = q.front().first; PhyloNode *cur_dad = q.front().second; q.pop(); if (cur_dad_branch->partial_lh_computed & 2) continue; - cur_dad->dependency++; + cur_dad_branch->partial_pars[pars_size - 1] = 0; result.push_back(make_pair(cur_dad_branch, cur_dad)); PhyloNode *cur_node = (PhyloNode*) cur_dad_branch->node; - cur_node->dependency = 0; if (cur_node->isLeaf() && cur_dad) continue; FOR_NEIGHBOR_IT(cur_node, cur_dad, it)if ((*it)->node->name != ROOT_NAME) { @@ -1036,7 +1034,17 @@ vector > PhyloTree::initializeComputeParsimonyM } void PhyloTree::finalizeComputeParsimonyMultiThreads(vector > topo_sorted_branches) { - + int pars_size = getBitsBlockSize(); + for (int i = (int)topo_sorted_branches.size() - 1; i >= 0; i--) { + PhyloNeighbor *dad_branch = topo_sorted_branches[i].first; + PhyloNode *dad = topo_sorted_branches[i].second; + dad_branch->partial_lh_computed |= 2; + + PhyloNode *node = (PhyloNode*) dad_branch->node; + FOR_NEIGHBOR_IT(node, dad, it) if ((*it)->node->name != ROOT_NAME) { + dad_branch->partial_pars[pars_size - 1] += ((PhyloNeighbor*)(*it))->partial_pars[pars_size - 1]; + } + } } int PhyloTree::computeParsimonyBranchMultiThreads(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst) { @@ -1061,30 +1069,20 @@ int PhyloTree::computeParsimonyBranchMultiThreads(PhyloNeighbor *dad_branch, Phy memset(_pattern_pars, 0, sizeof(BootValTypePars) * (nptn+VCSIZE_USHORT)); vector > topo_sorted_branches = initializeComputeParsimonyMultiThreads(dad_branch, dad); - while (topo_sorted_branches.size()) { - vector threads; - vector save_dads; - for (int i = 0; i < params->pp_thread; ++i) { - if (topo_sorted_branches.empty()) - break; - PhyloNeighbor *dad_branch = topo_sorted_branches.back().first; - PhyloNode *dad = topo_sorted_branches.back().second; - PhyloNode *node = (PhyloNode*) dad_branch->node; - assert(node->dependency >= 0); - if (node->dependency == 0) { - topo_sorted_branches.pop_back(); - threads.push_back(thread(&PhyloTree::computePartialParsimonyMultiThreads, this, dad_branch, dad, 0, 0)); - save_dads.push_back(dad); - } else { - break; - } + vector threads; + int ptnPerThread = (aln->size()) / params->pp_thread; + if (ptnPerThread % 8 != 0) ptnPerThread += 8 - (ptnPerThread % 8); + for (int i = 0; i < params->pp_thread; i++) { + int startPtn = i * ptnPerThread; + int endPtn = min((int)aln->size(), (i + 1) * ptnPerThread) - 1; + if ((dad_branch->partial_lh_computed & 2) == 0) { + threads.push_back(thread(&PhyloTree::computePartialParsimonyMultiThreads, this, dad_branch, dad, startPtn, endPtn)); } - for (auto &thread : threads) - thread.join(); - for (auto &dad : save_dads) { - dad->dependency--; + if ((node_branch->partial_lh_computed & 2) == 0) { + threads.push_back(thread(&PhyloTree::computePartialParsimonyMultiThreads, this, node_branch, node, startPtn, endPtn)); } } + finalizeComputeParsimonyMultiThreads(topo_sorted_branches); int pars_size = getBitsBlockSize(); int entry_size = getBitsEntrySize(); From 210efd12cfeb41a6e5e782ff69e2b3e93397ad22 Mon Sep 17 00:00:00 2001 From: trungnotchung Date: Fri, 28 Mar 2025 21:00:39 +0700 Subject: [PATCH 10/14] fix missing joining threads --- phylotree.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/phylotree.cpp b/phylotree.cpp index 940c4f38..aeb0c199 100644 --- a/phylotree.cpp +++ b/phylotree.cpp @@ -1082,6 +1082,9 @@ int PhyloTree::computeParsimonyBranchMultiThreads(PhyloNeighbor *dad_branch, Phy threads.push_back(thread(&PhyloTree::computePartialParsimonyMultiThreads, this, node_branch, node, startPtn, endPtn)); } } + for (int i = 0; i < (int)threads.size(); ++i) { + threads[i].join(); + } finalizeComputeParsimonyMultiThreads(topo_sorted_branches); int pars_size = getBitsBlockSize(); From 6bc49baf0d8c37b414dc8d27961e22ddd64a1756 Mon Sep 17 00:00:00 2001 From: trungnotchung Date: Fri, 4 Apr 2025 18:29:52 +0700 Subject: [PATCH 11/14] add mutex --- phylonode.h | 1 + phylotree.cpp | 2 ++ 2 files changed, 3 insertions(+) diff --git a/phylonode.h b/phylonode.h index 07fbf796..e0e0a3ba 100644 --- a/phylonode.h +++ b/phylonode.h @@ -105,6 +105,7 @@ class PhyloNeighbor : public Neighbor { */ UINT *partial_pars; + std::mutex partial_lh_mutex; }; /** diff --git a/phylotree.cpp b/phylotree.cpp index aeb0c199..aedb9a2b 100644 --- a/phylotree.cpp +++ b/phylotree.cpp @@ -998,7 +998,9 @@ void PhyloTree::computePartialParsimonyMultiThreads(PhyloNeighbor *dad_branch, P } dad_branch->partial_pars[ptn/8] = states_dad; } + dad_branch->partial_lh_mutex.lock(); dad_branch->partial_pars[pars_size - 1] += pars_steps; + dad_branch->partial_lh_mutex.unlock(); } return; } // END OF DNA VERSION From b15b4f0efa6cb3616b83cffce3e33a0ebe35f7b8 Mon Sep 17 00:00:00 2001 From: trungnotchung Date: Fri, 11 Apr 2025 16:51:06 +0700 Subject: [PATCH 12/14] fix missing column in alignment --- phylotree.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/phylotree.cpp b/phylotree.cpp index aedb9a2b..4b510d91 100644 --- a/phylotree.cpp +++ b/phylotree.cpp @@ -1072,8 +1072,9 @@ int PhyloTree::computeParsimonyBranchMultiThreads(PhyloNeighbor *dad_branch, Phy vector > topo_sorted_branches = initializeComputeParsimonyMultiThreads(dad_branch, dad); vector threads; - int ptnPerThread = (aln->size()) / params->pp_thread; + int ptnPerThread = (aln->size() + params->pp_thread - 1) / params->pp_thread; if (ptnPerThread % 8 != 0) ptnPerThread += 8 - (ptnPerThread % 8); + // int ptnPerThread = aln->size(); for (int i = 0; i < params->pp_thread; i++) { int startPtn = i * ptnPerThread; int endPtn = min((int)aln->size(), (i + 1) * ptnPerThread) - 1; @@ -1088,6 +1089,11 @@ int PhyloTree::computeParsimonyBranchMultiThreads(PhyloNeighbor *dad_branch, Phy threads[i].join(); } finalizeComputeParsimonyMultiThreads(topo_sorted_branches); + // if ((dad_branch->partial_lh_computed & 2) == 0) + // computePartialParsimony(dad_branch, dad); + // if ((node_branch->partial_lh_computed & 2) == 0) + // computePartialParsimony(node_branch, node); + // // now combine likelihood at the branch int pars_size = getBitsBlockSize(); int entry_size = getBitsEntrySize(); From a0eeb26dd41c0751262c0148fad3a33adeb0f4df Mon Sep 17 00:00:00 2001 From: trungnotchung Date: Fri, 11 Apr 2025 18:37:15 +0700 Subject: [PATCH 13/14] stop if has no column --- phylotree.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/phylotree.cpp b/phylotree.cpp index 4b510d91..4f959fd1 100644 --- a/phylotree.cpp +++ b/phylotree.cpp @@ -1078,6 +1078,7 @@ int PhyloTree::computeParsimonyBranchMultiThreads(PhyloNeighbor *dad_branch, Phy for (int i = 0; i < params->pp_thread; i++) { int startPtn = i * ptnPerThread; int endPtn = min((int)aln->size(), (i + 1) * ptnPerThread) - 1; + if (startPtn > endPtn) continue; if ((dad_branch->partial_lh_computed & 2) == 0) { threads.push_back(thread(&PhyloTree::computePartialParsimonyMultiThreads, this, dad_branch, dad, startPtn, endPtn)); } From 447d0c30094f9ef7bcc13bf7660a0ba22155ace8 Mon Sep 17 00:00:00 2001 From: loilon504 Date: Tue, 22 Apr 2025 15:54:31 +0700 Subject: [PATCH 14/14] fix: add pthread and mutex lib --- CMakeLists.txt | 2 ++ phylonode.h | 26 ++++++++++++++------------ 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fce4a26b..16d49c50 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -516,3 +516,5 @@ set(CPACK_STRIP_FILES TRUE) include (CPack) add_custom_target(dist COMMAND ${CMAKE_MAKE_PROGRAM} package_source) + +target_link_libraries(mpboot pthread) diff --git a/phylonode.h b/phylonode.h index e0e0a3ba..8051a788 100644 --- a/phylonode.h +++ b/phylonode.h @@ -14,6 +14,7 @@ #include "node.h" #include "queue" +#include typedef short int UBYTE; @@ -22,7 +23,8 @@ A neighbor in a phylogenetic tree @author BUI Quang Minh, Steffen Klaere, Arndt von Haeseler */ -class PhyloNeighbor : public Neighbor { +class PhyloNeighbor : public Neighbor +{ friend class PhyloNode; friend class PhyloTree; friend class IQTree; @@ -38,7 +40,8 @@ class PhyloNeighbor : public Neighbor { @param alength length of branch */ - PhyloNeighbor(Node *anode, double alength) : Neighbor(anode, alength) { + PhyloNeighbor(Node *anode, double alength) : Neighbor(anode, alength) + { partial_lh = NULL; partial_lh_computed = 0; lh_scale_factor = 0.0; @@ -51,7 +54,8 @@ class PhyloNeighbor : public Neighbor { @param alength length of branch @param aid branch ID */ - PhyloNeighbor(Node *anode, double alength, int aid) : Neighbor(anode, alength, aid) { + PhyloNeighbor(Node *anode, double alength, int aid) : Neighbor(anode, alength, aid) + { partial_lh = NULL; partial_lh_computed = 0; lh_scale_factor = 0.0; @@ -61,14 +65,16 @@ class PhyloNeighbor : public Neighbor { /** tell that the partial likelihood vector is not computed */ - inline void clearPartialLh() { + inline void clearPartialLh() + { partial_lh_computed = 0; } /** * tell that the partial likelihood vector is computed */ - inline void unclearPartialLh() { + inline void unclearPartialLh() + { partial_lh_computed = 1; } @@ -79,7 +85,6 @@ class PhyloNeighbor : public Neighbor { void clearForwardPartialLh(Node *dad); private: - /** true if the partial likelihood was computed */ @@ -113,7 +118,8 @@ A node in a phylogenetic tree @author BUI Quang Minh, Steffen Klaere, Arndt von Haeseler */ -class PhyloNode : public Node { +class PhyloNode : public Node +{ friend class PhyloTree; public: @@ -155,8 +161,6 @@ class PhyloNode : public Node { */ virtual void addNeighbor(Node *node, double length, int id = -1); - - /** tell that all partial likelihood vectors below this node are not computed */ @@ -168,11 +172,9 @@ class PhyloNode : public Node { void clearReversePartialLh(PhyloNode *dad); }; - /** Node vector */ -typedef vector PhyloNodeVector; - +typedef vector PhyloNodeVector; #endif