From 5408eb03bcb6ea031ba39e54a0f9dff7a78053fd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ph=E1=BA=A1m=20Xu=C3=A2n=20Trung?=
<66519569+trungnotchung@users.noreply.github.com>
Date: Thu, 25 Jan 2024 10:54:30 +0700
Subject: [PATCH 01/14] Update README.md
---
README.md | 27 +++++++++++++++++++++++++++
1 file changed, 27 insertions(+)
diff --git a/README.md b/README.md
index d8c5336a..6a825dfc 100644
--- a/README.md
+++ b/README.md
@@ -12,6 +12,33 @@ MPBoot: Fast phylogenetic maximum parsimony tree inference and bootstrap approxi
* Run **make**
* You will find the executable named **mpboot** once the **make** command is done.
+## **PLACEMENT CORE**
+### **Parameter**
+* **-pp_on**: enable placement.
+* **-pp_n**: number of available samples on tree.
+* **-pp_k**: number of missing samples.
+* **-pp_tree**: tree file.
+* **-pp_orig_spr**: run spr without changing origin tree.
+* **-pp_test_spr**: enable check correct tree.
+* **-pp_origin**: origin tree file.
+* **-pp_zip_aln**: alignment's zip file.
+* **-pp_zip_tree**: tree's zip file.
+
+### **Command**
+* Add missing samples to existing tree:
+
+ ``./mpboot -s -pp_tree -pp_on -pp_n -pp_k ``
+
+ *Example:*
+
+ ``./mpboot -s data/test5/1/added5.vcf -pp_tree data/test5/1/origin5.fasta.treefile -pp_on -pp_k 5 -pp_n 5``
+* Check if tree unchanged
+
+ ``./mpboot -s -pp_tree -pp_origin -pp_on -pp_test_spr -pp_k -pp_orig_spr``
+
+ *Example:*
+
+ ``./mpboot -s data/test5/1/added5.vcf -pp_tree addedTree.txt -pp_origin data/test5/1/origin5.fasta.treefile -pp_on -pp_test_spr -pp_k 5 -pp_orig_spr``
From 39140c0c9afe06b6a6b9698c7352eb23e6d09c02 Mon Sep 17 00:00:00 2001
From: trungnotchung
Date: Fri, 18 Oct 2024 11:45:18 +0700
Subject: [PATCH 02/14] feat: compute partial parsimony multi threads topo sort
---
phylonode.h | 4 +++-
phylotree.cpp | 64 +++++++++++++++++++++++++++++++++++++++++++++++----
phylotree.h | 5 ++++
tools.cpp | 7 ++++++
tools.h | 4 ++++
5 files changed, 79 insertions(+), 5 deletions(-)
diff --git a/phylonode.h b/phylonode.h
index ef77b22e..a99fd094 100644
--- a/phylonode.h
+++ b/phylonode.h
@@ -13,6 +13,7 @@
#define PHYLONODE_H
#include "node.h"
+#include "queue"
typedef short int UBYTE;
@@ -103,7 +104,6 @@ class PhyloNeighbor : public Neighbor {
vector containing the partial parsimony scores
*/
UINT *partial_pars;
-
};
/**
@@ -164,6 +164,8 @@ class PhyloNode : public Node {
tell that all partial likelihood vectors (in reverse direction) below this node are not computed
*/
void clearReversePartialLh(PhyloNode *dad);
+
+ int dependency;
};
diff --git a/phylotree.cpp b/phylotree.cpp
index 3eb44929..3f2483b9 100644
--- a/phylotree.cpp
+++ b/phylotree.cpp
@@ -935,6 +935,60 @@ void PhyloTree::computePartialParsimony(PhyloNeighbor *dad_branch, PhyloNode *da
delete[] bits_entry;
}
+vector > PhyloTree::breadthFirstExpansion(PhyloNeighbor *dad_branch, PhyloNode *dad)
+{
+ PhyloNode *node = (PhyloNode*) dad_branch->node;
+ PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
+ vector > result;
+ queue > q;
+
+ q.push(make_pair(dad_branch, dad));
+ q.push(make_pair(node_branch, node));
+ node->dependency = 0;
+ dad->dependency = 0;
+ while(q.size()) {
+ PhyloNeighbor *cur_dad_branch = q.front().first;
+ PhyloNode *cur_dad = q.front().second;
+ q.pop();
+ if (cur_dad_branch->partial_lh_computed & 2)
+ continue;
+ cur_dad->dependency++;
+ result.push_back(make_pair(cur_dad_branch, cur_dad));
+
+ PhyloNode *cur_node = (PhyloNode*) cur_dad_branch->node;
+ cur_node->dependency = 0;
+ if (cur_node->isLeaf() && cur_dad)
+ continue;
+ FOR_NEIGHBOR_IT(cur_node, cur_dad, it)if ((*it)->node->name != ROOT_NAME) {
+ q.push(make_pair((PhyloNeighbor*) (*it), cur_node));
+ }
+ }
+ return result;
+}
+
+void PhyloTree::computeParsimonyMultiThread(vector > &branch_list) {
+ while (branch_list.size()) {
+ vector threads;
+ for (int i = 0; i < params->pp_thread; ++i) {
+ if (branch_list.empty())
+ break;
+ PhyloNeighbor *dad_branch = branch_list.back().first;
+ PhyloNode *dad = branch_list.back().second;
+ PhyloNode *node = (PhyloNode*) dad_branch->node;
+ assert(node->dependency >= 0);
+ if (node->dependency == 0) {
+ branch_list.pop_back();
+ threads.push_back(thread(&PhyloTree::computePartialParsimony, this, dad_branch, dad));
+ dad->dependency--;
+ } else {
+ break;
+ }
+ }
+ for (auto &thread : threads)
+ thread.join();
+ }
+}
+
int PhyloTree::computeParsimonyBranch(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst) {
PhyloNode *node = (PhyloNode*) dad_branch->node;
PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
@@ -956,10 +1010,12 @@ int PhyloTree::computeParsimonyBranch(PhyloNeighbor *dad_branch, PhyloNode *dad,
if(!_pattern_pars) _pattern_pars = aligned_alloc(nptn+VCSIZE_USHORT);
memset(_pattern_pars, 0, sizeof(BootValTypePars) * (nptn+VCSIZE_USHORT));
- if ((dad_branch->partial_lh_computed & 2) == 0)
- computePartialParsimony(dad_branch, dad);
- if ((node_branch->partial_lh_computed & 2) == 0)
- computePartialParsimony(node_branch, node);
+ vector > bfs = breadthFirstExpansion(dad_branch, dad);
+ computeParsimonyMultiThread(bfs);
+ // if ((dad_branch->partial_lh_computed & 2) == 0)
+ // computePartialParsimony(dad_branch, dad);
+ // if ((node_branch->partial_lh_computed & 2) == 0)
+ // computePartialParsimony(node_branch, node);
// now combine likelihood at the branch
int pars_size = getBitsBlockSize();
diff --git a/phylotree.h b/phylotree.h
index 58ec5cb0..b68c16ea 100644
--- a/phylotree.h
+++ b/phylotree.h
@@ -31,6 +31,7 @@
#include "optimization.h"
#include "model/rateheterogeneity.h"
#include "phyloanalysis.h"
+#include "thread"
const double MIN_BRANCH_LEN = 0.000001; // NEVER TOUCH THIS CONSTANT AGAIN PLEASE!
const double MAX_BRANCH_LEN = 100.0;
@@ -448,6 +449,10 @@ class PhyloTree : public MTree, public Optimization {
*/
virtual int computeParsimonyBranch(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst = NULL);
+ void computeParsimonyMultiThread(vector > &branch_list);
+
+ vector > breadthFirstExpansion(PhyloNeighbor *dad_branch, PhyloNode *dad);
+
void printParsimonyStates(PhyloNeighbor *dad_branch = NULL, PhyloNode *dad = NULL);
diff --git a/tools.cpp b/tools.cpp
index d58577a6..c8c8bbd9 100644
--- a/tools.cpp
+++ b/tools.cpp
@@ -954,6 +954,13 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
//params.run_mode = BOTH_ALG;
continue;
}
+ if (strcmp(argv[cnt], "-pp_thread") == 0) {
+ cnt++;
+ if (cnt >= argc)
+ throw "Invalid thread number";
+ params.pp_thread = convert_int(argv[cnt]);
+ continue;
+ }
if (strcmp(argv[cnt], "-e") == 0) {
cnt++;
if (cnt >= argc)
diff --git a/tools.h b/tools.h
index 315db2dd..58e4e7d9 100644
--- a/tools.h
+++ b/tools.h
@@ -411,6 +411,10 @@ extern int NNI_MAX_NR_STEP;
program parameters, everything is specified here
*/
struct Params {
+ /**
+ * Number of threads
+ */
+ int pp_thread;
/**
* Number of starting parsimony trees
From dfa03f15128555598f063b534462976fa657827d Mon Sep 17 00:00:00 2001
From: trungnotchung
Date: Fri, 18 Oct 2024 11:51:23 +0700
Subject: [PATCH 03/14] fix: multi threads conflict
---
phylotree.cpp | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/phylotree.cpp b/phylotree.cpp
index 3f2483b9..ecf3a04b 100644
--- a/phylotree.cpp
+++ b/phylotree.cpp
@@ -969,6 +969,7 @@ vector > PhyloTree::breadthFirstExpansion(Phylo
void PhyloTree::computeParsimonyMultiThread(vector > &branch_list) {
while (branch_list.size()) {
vector threads;
+ vector save_dads;
for (int i = 0; i < params->pp_thread; ++i) {
if (branch_list.empty())
break;
@@ -979,13 +980,16 @@ void PhyloTree::computeParsimonyMultiThread(vectordependency == 0) {
branch_list.pop_back();
threads.push_back(thread(&PhyloTree::computePartialParsimony, this, dad_branch, dad));
- dad->dependency--;
+ save_dads.push_back(dad);
} else {
break;
}
}
for (auto &thread : threads)
thread.join();
+ for (auto &dad : save_dads) {
+ dad->dependency--;
+ }
}
}
From fe2b992ecdfafef14668bd17dcd639d14f43affa Mon Sep 17 00:00:00 2001
From: trungnotchung
Date: Thu, 27 Mar 2025 16:00:10 +0700
Subject: [PATCH 04/14] initialize before start compute parsimony
---
phylotree.cpp | 9 ++-------
phylotree.h | 2 +-
2 files changed, 3 insertions(+), 8 deletions(-)
diff --git a/phylotree.cpp b/phylotree.cpp
index ecf3a04b..783cd6a6 100644
--- a/phylotree.cpp
+++ b/phylotree.cpp
@@ -935,7 +935,7 @@ void PhyloTree::computePartialParsimony(PhyloNeighbor *dad_branch, PhyloNode *da
delete[] bits_entry;
}
-vector > PhyloTree::breadthFirstExpansion(PhyloNeighbor *dad_branch, PhyloNode *dad)
+vector > PhyloTree::initializeComputeParsimony(PhyloNeighbor *dad_branch, PhyloNode *dad)
{
PhyloNode *node = (PhyloNode*) dad_branch->node;
PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
@@ -1014,13 +1014,8 @@ int PhyloTree::computeParsimonyBranch(PhyloNeighbor *dad_branch, PhyloNode *dad,
if(!_pattern_pars) _pattern_pars = aligned_alloc(nptn+VCSIZE_USHORT);
memset(_pattern_pars, 0, sizeof(BootValTypePars) * (nptn+VCSIZE_USHORT));
- vector > bfs = breadthFirstExpansion(dad_branch, dad);
+ vector > bfs = initializeComputeParsimony(dad_branch, dad);
computeParsimonyMultiThread(bfs);
- // if ((dad_branch->partial_lh_computed & 2) == 0)
- // computePartialParsimony(dad_branch, dad);
- // if ((node_branch->partial_lh_computed & 2) == 0)
- // computePartialParsimony(node_branch, node);
- // now combine likelihood at the branch
int pars_size = getBitsBlockSize();
int entry_size = getBitsEntrySize();
diff --git a/phylotree.h b/phylotree.h
index b68c16ea..35596653 100644
--- a/phylotree.h
+++ b/phylotree.h
@@ -451,7 +451,7 @@ class PhyloTree : public MTree, public Optimization {
void computeParsimonyMultiThread(vector > &branch_list);
- vector > breadthFirstExpansion(PhyloNeighbor *dad_branch, PhyloNode *dad);
+ vector > initializeComputeParsimony(PhyloNeighbor *dad_branch, PhyloNode *dad);
void printParsimonyStates(PhyloNeighbor *dad_branch = NULL, PhyloNode *dad = NULL);
From f9f0721d31c3c32ff5faec7ecb46a8e46f8b274d Mon Sep 17 00:00:00 2001
From: trungnotchung
Date: Thu, 27 Mar 2025 16:00:27 +0700
Subject: [PATCH 05/14] update git ignore
---
.gitignore | 1 +
1 file changed, 1 insertion(+)
diff --git a/.gitignore b/.gitignore
index 3751d5ea..8b72265c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,4 @@ pll/libpll.a
pll_test/
workspace.code-workspace
.vscode
+build/**
From ddcd6096c95a54457adf5152274b5ff1188ac5b5 Mon Sep 17 00:00:00 2001
From: trungnotchung
Date: Thu, 27 Mar 2025 16:57:20 +0700
Subject: [PATCH 06/14] update code structure
---
phylotree.cpp | 135 ++++++++++++++++++++++++++++++++++++++++++++++++--
phylotree.h | 17 ++++++-
2 files changed, 146 insertions(+), 6 deletions(-)
diff --git a/phylotree.cpp b/phylotree.cpp
index 783cd6a6..dac1f8fd 100644
--- a/phylotree.cpp
+++ b/phylotree.cpp
@@ -935,6 +935,75 @@ void PhyloTree::computePartialParsimony(PhyloNeighbor *dad_branch, PhyloNode *da
delete[] bits_entry;
}
+void PhyloTree::computePartialParsimonyMultiThreads(PhyloNeighbor *dad_branch, PhyloNode *dad, int startPtn, int endPtn) {
+ // don't recompute the parsimony
+ if (dad_branch->partial_lh_computed & 2)
+ return;
+ Node *node = dad_branch->node;
+ //assert(node->degree() <= 3);
+ int ptn;
+ int nstates = aln->num_states;
+ int pars_size = getBitsBlockSize();
+ int entry_size = getBitsEntrySize();
+ int nptn = aln->size();
+ int ptn_pars_start_id = pars_size - nptn - 1;
+
+ assert(dad_branch->partial_pars);
+ dad_branch->partial_lh_computed |= 2;
+
+ if (nstates == 4 && aln->seq_type == SEQ_DNA && (node->isLeaf() || node->degree() == 3)) {
+ // ULTRAFAST VERSION FOR DNA, assuming that UINT is 32-bit integer
+ if (node->isLeaf() && dad) {
+ // external node
+ for (ptn = 0; ptn < aln->size(); ptn+=8) {
+ UINT states = 0;
+ int maxi = aln->size() - ptn;
+ if(maxi > 8) maxi = 8;
+ for (int i = 0; i< maxi; i++) {
+ UINT bit_state = dna_state_map[(aln->at(ptn+i))[node->id]];
+ states |= (bit_state << (i*4));
+ dad_branch->partial_pars[ptn_pars_start_id + ptn + i] = 0;
+ }
+ dad_branch->partial_pars[ptn/8] = states;
+ }
+ dad_branch->partial_pars[pars_size - 1] = 0; // set subtree score = 0
+ } else {
+ // internal node
+ memset(dad_branch->partial_pars + ptn_pars_start_id, 0, nptn * sizeof(int));
+ UINT *left = NULL, *right = NULL;
+ int pars_steps = 0;
+ FOR_NEIGHBOR_IT(node, dad, it)if ((*it)->node->name != ROOT_NAME) {
+ computePartialParsimony((PhyloNeighbor*) (*it), (PhyloNode*) node);
+ if (!left)
+ left = ((PhyloNeighbor*) (*it))->partial_pars;
+ else
+ right = ((PhyloNeighbor*) (*it))->partial_pars;
+ pars_steps += ((PhyloNeighbor*) (*it))->partial_pars[pars_size-1];
+ for(int p = 0; p < nptn; p++)
+ dad_branch->partial_pars[ptn_pars_start_id + p] += ((PhyloNeighbor*) (*it))->partial_pars[ptn_pars_start_id + p];
+ }
+ for (ptn = 0; ptn < aln->size(); ptn+=8) {
+ UINT states_left = left[ptn/8];
+ UINT states_right = right[ptn/8];
+ UINT states_dad = 0;
+ int maxi = aln->size() - ptn;
+ if(maxi > 8) maxi = 8;
+ for (int i = 0; i< maxi; i++) {
+ UINT state_left = (states_left >> (i*4)) & 15;
+ UINT state_right = (states_right >> (i*4)) & 15;
+ UINT state_both = state_left | (state_right << 4);
+ states_dad |= dna_fitch_result[state_both] << (i*4);
+ pars_steps += dna_fitch_step[state_both] * aln->at(ptn+i).frequency;
+ dad_branch->partial_pars[ptn_pars_start_id + ptn + i] += dna_fitch_step[state_both];
+ }
+ dad_branch->partial_pars[ptn/8] = states_dad;
+ }
+ dad_branch->partial_pars[pars_size - 1] = pars_steps;
+ }
+ return;
+ } // END OF DNA VERSION
+}
+
vector > PhyloTree::initializeComputeParsimony(PhyloNeighbor *dad_branch, PhyloNode *dad)
{
PhyloNode *node = (PhyloNode*) dad_branch->node;
@@ -966,7 +1035,28 @@ vector > PhyloTree::initializeComputeParsimony(
return result;
}
-void PhyloTree::computeParsimonyMultiThread(vector > &branch_list) {
+int PhyloTree::computeParsimonyBranchMultiThreads(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst) {
+ PhyloNode *node = (PhyloNode*) dad_branch->node;
+ PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
+ assert(node_branch);
+ if (!central_partial_pars)
+ initializeAllPartialPars();
+ // swap node and dad if dad is a leaf
+ if (node->isLeaf()) {
+ PhyloNode *tmp_node = dad;
+ dad = node;
+ node = tmp_node;
+ PhyloNeighbor *tmp_nei = dad_branch;
+ dad_branch = node_branch;
+ node_branch = tmp_nei;
+ //cout << "swapped\n";
+ }
+
+ int nptn = aln->size();
+ if(!_pattern_pars) _pattern_pars = aligned_alloc(nptn+VCSIZE_USHORT);
+ memset(_pattern_pars, 0, sizeof(BootValTypePars) * (nptn+VCSIZE_USHORT));
+
+ vector > branch_list = initializeComputeParsimony(dad_branch, dad);
while (branch_list.size()) {
vector threads;
vector save_dads;
@@ -979,7 +1069,7 @@ void PhyloTree::computeParsimonyMultiThread(vectordependency >= 0);
if (node->dependency == 0) {
branch_list.pop_back();
- threads.push_back(thread(&PhyloTree::computePartialParsimony, this, dad_branch, dad));
+ threads.push_back(thread(&PhyloTree::computePartialParsimonyMultiThreads, this, dad_branch, dad, 0, 0));
save_dads.push_back(dad);
} else {
break;
@@ -991,6 +1081,38 @@ void PhyloTree::computeParsimonyMultiThread(vectordependency--;
}
}
+
+ int pars_size = getBitsBlockSize();
+ int entry_size = getBitsEntrySize();
+ int ptn_pars_start_id = pars_size - nptn - 1;
+ //int nstates = aln->num_states;
+ int i, ptn;
+ int tree_pars = 0;
+
+ if (aln->num_states == 4 && aln->seq_type == SEQ_DNA) {
+ // ULTRAFAST VERSION FOR DNA
+ for (ptn = 0; ptn < aln->size(); ptn+=8) {
+ UINT states_left = node_branch->partial_pars[ptn/8];
+ UINT states_right = dad_branch->partial_pars[ptn/8];
+ UINT states_dad = 0;
+ int maxi = aln->size() - ptn;
+ if(maxi > 8) maxi = 8;
+ for (i = 0; i< maxi; i++) {
+ UINT state_left = (states_left >> (i*4)) & 15;
+ UINT state_right = (states_right >> (i*4)) & 15;
+ UINT state_both = state_left | (state_right << 4);
+ states_dad |= dna_fitch_result[state_both] << (i*4);
+ tree_pars += dna_fitch_step[state_both] * aln->at(ptn+i).frequency;
+ _pattern_pars[ptn + i] = node_branch->partial_pars[ptn_pars_start_id + ptn + i] +
+ dad_branch->partial_pars[ptn_pars_start_id + ptn + i] + dna_fitch_step[state_both];
+ }
+ }
+ }
+ if (branch_subst)
+ *branch_subst = tree_pars;
+
+ tree_pars += node_branch->partial_pars[pars_size - 1] + dad_branch->partial_pars[pars_size - 1];
+ return tree_pars;
}
int PhyloTree::computeParsimonyBranch(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst) {
@@ -1014,8 +1136,11 @@ int PhyloTree::computeParsimonyBranch(PhyloNeighbor *dad_branch, PhyloNode *dad,
if(!_pattern_pars) _pattern_pars = aligned_alloc(nptn+VCSIZE_USHORT);
memset(_pattern_pars, 0, sizeof(BootValTypePars) * (nptn+VCSIZE_USHORT));
- vector > bfs = initializeComputeParsimony(dad_branch, dad);
- computeParsimonyMultiThread(bfs);
+ if ((dad_branch->partial_lh_computed & 2) == 0)
+ computePartialParsimony(dad_branch, dad);
+ if ((node_branch->partial_lh_computed & 2) == 0)
+ computePartialParsimony(node_branch, node);
+ // now combine likelihood at the branch
int pars_size = getBitsBlockSize();
int entry_size = getBitsEntrySize();
@@ -1112,7 +1237,7 @@ int PhyloTree::computeParsimony() {
int nptn = aln->size();
if(_pattern_pars == NULL) _pattern_pars = aligned_alloc(nptn + VCSIZE_USHORT);
- return computeParsimonyBranch((PhyloNeighbor*) root->neighbors[0], (PhyloNode*) root);
+ return computeParsimonyBranchMultiThreads((PhyloNeighbor*) root->neighbors[0], (PhyloNode*) root);
}
void PhyloTree::printParsimonyStates(PhyloNeighbor *dad_branch, PhyloNode *dad) {
diff --git a/phylotree.h b/phylotree.h
index 35596653..eaba5566 100644
--- a/phylotree.h
+++ b/phylotree.h
@@ -439,6 +439,14 @@ class PhyloTree : public MTree, public Optimization {
*/
virtual void computePartialParsimony(PhyloNeighbor *dad_branch, PhyloNode *dad);
+ /**
+ compute partial parsimony score of the subtree rooted at dad from startPtn to endPtn
+ @param dad_branch the branch leading to the subtree
+ @param dad its dad, used to direct the tranversal
+ @param startPtn start pattern
+ @param endPtn end pattern
+ */
+ void computePartialParsimonyMultiThreads(PhyloNeighbor *dad_branch, PhyloNode *dad, int startPtn, int endPtn);
/**
compute tree parsimony score on a branch
@@ -449,7 +457,14 @@ class PhyloTree : public MTree, public Optimization {
*/
virtual int computeParsimonyBranch(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst = NULL);
- void computeParsimonyMultiThread(vector > &branch_list);
+ /**
+ compute tree parsimony score on a branch
+ @param dad_branch the branch leading to the subtree
+ @param dad its dad, used to direct the tranversal
+ @param branch_subst (OUT) if not NULL, the number of substitutions on this branch
+ @return parsimony score of the tree
+ */
+ int computeParsimonyBranchMultiThreads(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst = NULL);
vector > initializeComputeParsimony(PhyloNeighbor *dad_branch, PhyloNode *dad);
From 9165edce46208d17bed03df9b30f59a1fb1d2f1a Mon Sep 17 00:00:00 2001
From: trungnotchung
Date: Thu, 27 Mar 2025 17:05:19 +0700
Subject: [PATCH 07/14] add default thread
---
phylonode.h | 1 +
tools.h | 2 +-
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/phylonode.h b/phylonode.h
index a99fd094..589d7eae 100644
--- a/phylonode.h
+++ b/phylonode.h
@@ -104,6 +104,7 @@ class PhyloNeighbor : public Neighbor {
vector containing the partial parsimony scores
*/
UINT *partial_pars;
+
};
/**
diff --git a/tools.h b/tools.h
index 58e4e7d9..2926bcf2 100644
--- a/tools.h
+++ b/tools.h
@@ -414,7 +414,7 @@ struct Params {
/**
* Number of threads
*/
- int pp_thread;
+ int pp_thread = 1;
/**
* Number of starting parsimony trees
From 49a97c3c7c0715cba0e5bca964f24332ffd0df8f Mon Sep 17 00:00:00 2001
From: trungnotchung
Date: Thu, 27 Mar 2025 19:07:13 +0700
Subject: [PATCH 08/14] update code structure
---
phylotree.cpp | 18 +++++++++++-------
phylotree.h | 14 +++++++++++++-
2 files changed, 24 insertions(+), 8 deletions(-)
diff --git a/phylotree.cpp b/phylotree.cpp
index dac1f8fd..6cad51fa 100644
--- a/phylotree.cpp
+++ b/phylotree.cpp
@@ -1004,7 +1004,7 @@ void PhyloTree::computePartialParsimonyMultiThreads(PhyloNeighbor *dad_branch, P
} // END OF DNA VERSION
}
-vector > PhyloTree::initializeComputeParsimony(PhyloNeighbor *dad_branch, PhyloNode *dad)
+vector > PhyloTree::initializeComputeParsimonyMultiThreads(PhyloNeighbor *dad_branch, PhyloNode *dad)
{
PhyloNode *node = (PhyloNode*) dad_branch->node;
PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
@@ -1035,6 +1035,10 @@ vector > PhyloTree::initializeComputeParsimony(
return result;
}
+void PhyloTree::finalizeComputeParsimonyMultiThreads(vector > topo_sorted_branches) {
+
+}
+
int PhyloTree::computeParsimonyBranchMultiThreads(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst) {
PhyloNode *node = (PhyloNode*) dad_branch->node;
PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
@@ -1056,19 +1060,19 @@ int PhyloTree::computeParsimonyBranchMultiThreads(PhyloNeighbor *dad_branch, Phy
if(!_pattern_pars) _pattern_pars = aligned_alloc(nptn+VCSIZE_USHORT);
memset(_pattern_pars, 0, sizeof(BootValTypePars) * (nptn+VCSIZE_USHORT));
- vector > branch_list = initializeComputeParsimony(dad_branch, dad);
- while (branch_list.size()) {
+ vector > topo_sorted_branches = initializeComputeParsimonyMultiThreads(dad_branch, dad);
+ while (topo_sorted_branches.size()) {
vector threads;
vector save_dads;
for (int i = 0; i < params->pp_thread; ++i) {
- if (branch_list.empty())
+ if (topo_sorted_branches.empty())
break;
- PhyloNeighbor *dad_branch = branch_list.back().first;
- PhyloNode *dad = branch_list.back().second;
+ PhyloNeighbor *dad_branch = topo_sorted_branches.back().first;
+ PhyloNode *dad = topo_sorted_branches.back().second;
PhyloNode *node = (PhyloNode*) dad_branch->node;
assert(node->dependency >= 0);
if (node->dependency == 0) {
- branch_list.pop_back();
+ topo_sorted_branches.pop_back();
threads.push_back(thread(&PhyloTree::computePartialParsimonyMultiThreads, this, dad_branch, dad, 0, 0));
save_dads.push_back(dad);
} else {
diff --git a/phylotree.h b/phylotree.h
index eaba5566..4a2cca4c 100644
--- a/phylotree.h
+++ b/phylotree.h
@@ -466,7 +466,19 @@ class PhyloTree : public MTree, public Optimization {
*/
int computeParsimonyBranchMultiThreads(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst = NULL);
- vector > initializeComputeParsimony(PhyloNeighbor *dad_branch, PhyloNode *dad);
+ /**
+ initialize information for computing parsimony score with multi-threads
+ @param dad_branch the branch leading to the subtree
+ @param dad its dad, used to direct the tranversal
+ @return a vector of pairs of (branch, node)
+ */
+ vector > initializeComputeParsimonyMultiThreads(PhyloNeighbor *dad_branch, PhyloNode *dad);
+
+ /**
+ finalize compute parsimony score with multi-threads
+ @param topo_sorted_branches a vector of pairs of (branch, node)
+ */
+ void finalizeComputeParsimonyMultiThreads(vector > topo_sorted_branches);
void printParsimonyStates(PhyloNeighbor *dad_branch = NULL, PhyloNode *dad = NULL);
From 385e272961f406ade859e0db1767c2f887d28949 Mon Sep 17 00:00:00 2001
From: trungnotchung
Date: Fri, 28 Mar 2025 20:54:25 +0700
Subject: [PATCH 09/14] compute parsimony score multi threads
---
phylonode.h | 2 --
phylotree.cpp | 78 +++++++++++++++++++++++++--------------------------
2 files changed, 38 insertions(+), 42 deletions(-)
diff --git a/phylonode.h b/phylonode.h
index 589d7eae..07fbf796 100644
--- a/phylonode.h
+++ b/phylonode.h
@@ -165,8 +165,6 @@ class PhyloNode : public Node {
tell that all partial likelihood vectors (in reverse direction) below this node are not computed
*/
void clearReversePartialLh(PhyloNode *dad);
-
- int dependency;
};
diff --git a/phylotree.cpp b/phylotree.cpp
index 6cad51fa..940c4f38 100644
--- a/phylotree.cpp
+++ b/phylotree.cpp
@@ -939,6 +939,9 @@ void PhyloTree::computePartialParsimonyMultiThreads(PhyloNeighbor *dad_branch, P
// don't recompute the parsimony
if (dad_branch->partial_lh_computed & 2)
return;
+ if (startPtn > endPtn) {
+ return;
+ }
Node *node = dad_branch->node;
//assert(node->degree() <= 3);
int ptn;
@@ -949,46 +952,43 @@ void PhyloTree::computePartialParsimonyMultiThreads(PhyloNeighbor *dad_branch, P
int ptn_pars_start_id = pars_size - nptn - 1;
assert(dad_branch->partial_pars);
- dad_branch->partial_lh_computed |= 2;
if (nstates == 4 && aln->seq_type == SEQ_DNA && (node->isLeaf() || node->degree() == 3)) {
// ULTRAFAST VERSION FOR DNA, assuming that UINT is 32-bit integer
if (node->isLeaf() && dad) {
// external node
- for (ptn = 0; ptn < aln->size(); ptn+=8) {
+ for (ptn = startPtn; ptn <= endPtn; ptn+=8) {
UINT states = 0;
- int maxi = aln->size() - ptn;
+ int maxi = endPtn - ptn + 1;
if(maxi > 8) maxi = 8;
- for (int i = 0; i< maxi; i++) {
+ for (int i = 0; i < maxi; i++) {
UINT bit_state = dna_state_map[(aln->at(ptn+i))[node->id]];
states |= (bit_state << (i*4));
dad_branch->partial_pars[ptn_pars_start_id + ptn + i] = 0;
}
dad_branch->partial_pars[ptn/8] = states;
}
- dad_branch->partial_pars[pars_size - 1] = 0; // set subtree score = 0
} else {
// internal node
- memset(dad_branch->partial_pars + ptn_pars_start_id, 0, nptn * sizeof(int));
+ memset(dad_branch->partial_pars + ptn_pars_start_id + startPtn, 0, (endPtn - startPtn + 1) * sizeof(int));
UINT *left = NULL, *right = NULL;
- int pars_steps = 0;
FOR_NEIGHBOR_IT(node, dad, it)if ((*it)->node->name != ROOT_NAME) {
- computePartialParsimony((PhyloNeighbor*) (*it), (PhyloNode*) node);
+ computePartialParsimonyMultiThreads((PhyloNeighbor*) (*it), (PhyloNode*) node, startPtn, endPtn);
if (!left)
left = ((PhyloNeighbor*) (*it))->partial_pars;
else
right = ((PhyloNeighbor*) (*it))->partial_pars;
- pars_steps += ((PhyloNeighbor*) (*it))->partial_pars[pars_size-1];
- for(int p = 0; p < nptn; p++)
+ for(int p = startPtn; p <= endPtn; p++)
dad_branch->partial_pars[ptn_pars_start_id + p] += ((PhyloNeighbor*) (*it))->partial_pars[ptn_pars_start_id + p];
}
- for (ptn = 0; ptn < aln->size(); ptn+=8) {
+ int pars_steps = 0;
+ for (ptn = startPtn; ptn <= endPtn; ptn+=8) {
UINT states_left = left[ptn/8];
UINT states_right = right[ptn/8];
UINT states_dad = 0;
- int maxi = aln->size() - ptn;
+ int maxi = endPtn - ptn + 1;
if(maxi > 8) maxi = 8;
- for (int i = 0; i< maxi; i++) {
+ for (int i = 0; i < maxi; i++) {
UINT state_left = (states_left >> (i*4)) & 15;
UINT state_right = (states_right >> (i*4)) & 15;
UINT state_both = state_left | (state_right << 4);
@@ -998,7 +998,7 @@ void PhyloTree::computePartialParsimonyMultiThreads(PhyloNeighbor *dad_branch, P
}
dad_branch->partial_pars[ptn/8] = states_dad;
}
- dad_branch->partial_pars[pars_size - 1] = pars_steps;
+ dad_branch->partial_pars[pars_size - 1] += pars_steps;
}
return;
} // END OF DNA VERSION
@@ -1010,22 +1010,20 @@ vector > PhyloTree::initializeComputeParsimonyM
PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
vector > result;
queue > q;
+ int pars_size = getBitsBlockSize();
q.push(make_pair(dad_branch, dad));
q.push(make_pair(node_branch, node));
- node->dependency = 0;
- dad->dependency = 0;
while(q.size()) {
PhyloNeighbor *cur_dad_branch = q.front().first;
PhyloNode *cur_dad = q.front().second;
q.pop();
if (cur_dad_branch->partial_lh_computed & 2)
continue;
- cur_dad->dependency++;
+ cur_dad_branch->partial_pars[pars_size - 1] = 0;
result.push_back(make_pair(cur_dad_branch, cur_dad));
PhyloNode *cur_node = (PhyloNode*) cur_dad_branch->node;
- cur_node->dependency = 0;
if (cur_node->isLeaf() && cur_dad)
continue;
FOR_NEIGHBOR_IT(cur_node, cur_dad, it)if ((*it)->node->name != ROOT_NAME) {
@@ -1036,7 +1034,17 @@ vector > PhyloTree::initializeComputeParsimonyM
}
void PhyloTree::finalizeComputeParsimonyMultiThreads(vector > topo_sorted_branches) {
-
+ int pars_size = getBitsBlockSize();
+ for (int i = (int)topo_sorted_branches.size() - 1; i >= 0; i--) {
+ PhyloNeighbor *dad_branch = topo_sorted_branches[i].first;
+ PhyloNode *dad = topo_sorted_branches[i].second;
+ dad_branch->partial_lh_computed |= 2;
+
+ PhyloNode *node = (PhyloNode*) dad_branch->node;
+ FOR_NEIGHBOR_IT(node, dad, it) if ((*it)->node->name != ROOT_NAME) {
+ dad_branch->partial_pars[pars_size - 1] += ((PhyloNeighbor*)(*it))->partial_pars[pars_size - 1];
+ }
+ }
}
int PhyloTree::computeParsimonyBranchMultiThreads(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst) {
@@ -1061,30 +1069,20 @@ int PhyloTree::computeParsimonyBranchMultiThreads(PhyloNeighbor *dad_branch, Phy
memset(_pattern_pars, 0, sizeof(BootValTypePars) * (nptn+VCSIZE_USHORT));
vector > topo_sorted_branches = initializeComputeParsimonyMultiThreads(dad_branch, dad);
- while (topo_sorted_branches.size()) {
- vector threads;
- vector save_dads;
- for (int i = 0; i < params->pp_thread; ++i) {
- if (topo_sorted_branches.empty())
- break;
- PhyloNeighbor *dad_branch = topo_sorted_branches.back().first;
- PhyloNode *dad = topo_sorted_branches.back().second;
- PhyloNode *node = (PhyloNode*) dad_branch->node;
- assert(node->dependency >= 0);
- if (node->dependency == 0) {
- topo_sorted_branches.pop_back();
- threads.push_back(thread(&PhyloTree::computePartialParsimonyMultiThreads, this, dad_branch, dad, 0, 0));
- save_dads.push_back(dad);
- } else {
- break;
- }
+ vector threads;
+ int ptnPerThread = (aln->size()) / params->pp_thread;
+ if (ptnPerThread % 8 != 0) ptnPerThread += 8 - (ptnPerThread % 8);
+ for (int i = 0; i < params->pp_thread; i++) {
+ int startPtn = i * ptnPerThread;
+ int endPtn = min((int)aln->size(), (i + 1) * ptnPerThread) - 1;
+ if ((dad_branch->partial_lh_computed & 2) == 0) {
+ threads.push_back(thread(&PhyloTree::computePartialParsimonyMultiThreads, this, dad_branch, dad, startPtn, endPtn));
}
- for (auto &thread : threads)
- thread.join();
- for (auto &dad : save_dads) {
- dad->dependency--;
+ if ((node_branch->partial_lh_computed & 2) == 0) {
+ threads.push_back(thread(&PhyloTree::computePartialParsimonyMultiThreads, this, node_branch, node, startPtn, endPtn));
}
}
+ finalizeComputeParsimonyMultiThreads(topo_sorted_branches);
int pars_size = getBitsBlockSize();
int entry_size = getBitsEntrySize();
From 210efd12cfeb41a6e5e782ff69e2b3e93397ad22 Mon Sep 17 00:00:00 2001
From: trungnotchung
Date: Fri, 28 Mar 2025 21:00:39 +0700
Subject: [PATCH 10/14] fix missing joining threads
---
phylotree.cpp | 3 +++
1 file changed, 3 insertions(+)
diff --git a/phylotree.cpp b/phylotree.cpp
index 940c4f38..aeb0c199 100644
--- a/phylotree.cpp
+++ b/phylotree.cpp
@@ -1082,6 +1082,9 @@ int PhyloTree::computeParsimonyBranchMultiThreads(PhyloNeighbor *dad_branch, Phy
threads.push_back(thread(&PhyloTree::computePartialParsimonyMultiThreads, this, node_branch, node, startPtn, endPtn));
}
}
+ for (int i = 0; i < (int)threads.size(); ++i) {
+ threads[i].join();
+ }
finalizeComputeParsimonyMultiThreads(topo_sorted_branches);
int pars_size = getBitsBlockSize();
From 6bc49baf0d8c37b414dc8d27961e22ddd64a1756 Mon Sep 17 00:00:00 2001
From: trungnotchung
Date: Fri, 4 Apr 2025 18:29:52 +0700
Subject: [PATCH 11/14] add mutex
---
phylonode.h | 1 +
phylotree.cpp | 2 ++
2 files changed, 3 insertions(+)
diff --git a/phylonode.h b/phylonode.h
index 07fbf796..e0e0a3ba 100644
--- a/phylonode.h
+++ b/phylonode.h
@@ -105,6 +105,7 @@ class PhyloNeighbor : public Neighbor {
*/
UINT *partial_pars;
+ std::mutex partial_lh_mutex;
};
/**
diff --git a/phylotree.cpp b/phylotree.cpp
index aeb0c199..aedb9a2b 100644
--- a/phylotree.cpp
+++ b/phylotree.cpp
@@ -998,7 +998,9 @@ void PhyloTree::computePartialParsimonyMultiThreads(PhyloNeighbor *dad_branch, P
}
dad_branch->partial_pars[ptn/8] = states_dad;
}
+ dad_branch->partial_lh_mutex.lock();
dad_branch->partial_pars[pars_size - 1] += pars_steps;
+ dad_branch->partial_lh_mutex.unlock();
}
return;
} // END OF DNA VERSION
From b15b4f0efa6cb3616b83cffce3e33a0ebe35f7b8 Mon Sep 17 00:00:00 2001
From: trungnotchung
Date: Fri, 11 Apr 2025 16:51:06 +0700
Subject: [PATCH 12/14] fix missing column in alignment
---
phylotree.cpp | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/phylotree.cpp b/phylotree.cpp
index aedb9a2b..4b510d91 100644
--- a/phylotree.cpp
+++ b/phylotree.cpp
@@ -1072,8 +1072,9 @@ int PhyloTree::computeParsimonyBranchMultiThreads(PhyloNeighbor *dad_branch, Phy
vector > topo_sorted_branches = initializeComputeParsimonyMultiThreads(dad_branch, dad);
vector threads;
- int ptnPerThread = (aln->size()) / params->pp_thread;
+ int ptnPerThread = (aln->size() + params->pp_thread - 1) / params->pp_thread;
if (ptnPerThread % 8 != 0) ptnPerThread += 8 - (ptnPerThread % 8);
+ // int ptnPerThread = aln->size();
for (int i = 0; i < params->pp_thread; i++) {
int startPtn = i * ptnPerThread;
int endPtn = min((int)aln->size(), (i + 1) * ptnPerThread) - 1;
@@ -1088,6 +1089,11 @@ int PhyloTree::computeParsimonyBranchMultiThreads(PhyloNeighbor *dad_branch, Phy
threads[i].join();
}
finalizeComputeParsimonyMultiThreads(topo_sorted_branches);
+ // if ((dad_branch->partial_lh_computed & 2) == 0)
+ // computePartialParsimony(dad_branch, dad);
+ // if ((node_branch->partial_lh_computed & 2) == 0)
+ // computePartialParsimony(node_branch, node);
+ // // now combine likelihood at the branch
int pars_size = getBitsBlockSize();
int entry_size = getBitsEntrySize();
From a0eeb26dd41c0751262c0148fad3a33adeb0f4df Mon Sep 17 00:00:00 2001
From: trungnotchung
Date: Fri, 11 Apr 2025 18:37:15 +0700
Subject: [PATCH 13/14] stop if has no column
---
phylotree.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/phylotree.cpp b/phylotree.cpp
index 4b510d91..4f959fd1 100644
--- a/phylotree.cpp
+++ b/phylotree.cpp
@@ -1078,6 +1078,7 @@ int PhyloTree::computeParsimonyBranchMultiThreads(PhyloNeighbor *dad_branch, Phy
for (int i = 0; i < params->pp_thread; i++) {
int startPtn = i * ptnPerThread;
int endPtn = min((int)aln->size(), (i + 1) * ptnPerThread) - 1;
+ if (startPtn > endPtn) continue;
if ((dad_branch->partial_lh_computed & 2) == 0) {
threads.push_back(thread(&PhyloTree::computePartialParsimonyMultiThreads, this, dad_branch, dad, startPtn, endPtn));
}
From 447d0c30094f9ef7bcc13bf7660a0ba22155ace8 Mon Sep 17 00:00:00 2001
From: loilon504
Date: Tue, 22 Apr 2025 15:54:31 +0700
Subject: [PATCH 14/14] fix: add pthread and mutex lib
---
CMakeLists.txt | 2 ++
phylonode.h | 26 ++++++++++++++------------
2 files changed, 16 insertions(+), 12 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index fce4a26b..16d49c50 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -516,3 +516,5 @@ set(CPACK_STRIP_FILES TRUE)
include (CPack)
add_custom_target(dist COMMAND ${CMAKE_MAKE_PROGRAM} package_source)
+
+target_link_libraries(mpboot pthread)
diff --git a/phylonode.h b/phylonode.h
index e0e0a3ba..8051a788 100644
--- a/phylonode.h
+++ b/phylonode.h
@@ -14,6 +14,7 @@
#include "node.h"
#include "queue"
+#include
typedef short int UBYTE;
@@ -22,7 +23,8 @@ A neighbor in a phylogenetic tree
@author BUI Quang Minh, Steffen Klaere, Arndt von Haeseler
*/
-class PhyloNeighbor : public Neighbor {
+class PhyloNeighbor : public Neighbor
+{
friend class PhyloNode;
friend class PhyloTree;
friend class IQTree;
@@ -38,7 +40,8 @@ class PhyloNeighbor : public Neighbor {
@param alength length of branch
*/
- PhyloNeighbor(Node *anode, double alength) : Neighbor(anode, alength) {
+ PhyloNeighbor(Node *anode, double alength) : Neighbor(anode, alength)
+ {
partial_lh = NULL;
partial_lh_computed = 0;
lh_scale_factor = 0.0;
@@ -51,7 +54,8 @@ class PhyloNeighbor : public Neighbor {
@param alength length of branch
@param aid branch ID
*/
- PhyloNeighbor(Node *anode, double alength, int aid) : Neighbor(anode, alength, aid) {
+ PhyloNeighbor(Node *anode, double alength, int aid) : Neighbor(anode, alength, aid)
+ {
partial_lh = NULL;
partial_lh_computed = 0;
lh_scale_factor = 0.0;
@@ -61,14 +65,16 @@ class PhyloNeighbor : public Neighbor {
/**
tell that the partial likelihood vector is not computed
*/
- inline void clearPartialLh() {
+ inline void clearPartialLh()
+ {
partial_lh_computed = 0;
}
/**
* tell that the partial likelihood vector is computed
*/
- inline void unclearPartialLh() {
+ inline void unclearPartialLh()
+ {
partial_lh_computed = 1;
}
@@ -79,7 +85,6 @@ class PhyloNeighbor : public Neighbor {
void clearForwardPartialLh(Node *dad);
private:
-
/**
true if the partial likelihood was computed
*/
@@ -113,7 +118,8 @@ A node in a phylogenetic tree
@author BUI Quang Minh, Steffen Klaere, Arndt von Haeseler
*/
-class PhyloNode : public Node {
+class PhyloNode : public Node
+{
friend class PhyloTree;
public:
@@ -155,8 +161,6 @@ class PhyloNode : public Node {
*/
virtual void addNeighbor(Node *node, double length, int id = -1);
-
-
/**
tell that all partial likelihood vectors below this node are not computed
*/
@@ -168,11 +172,9 @@ class PhyloNode : public Node {
void clearReversePartialLh(PhyloNode *dad);
};
-
/**
Node vector
*/
-typedef vector PhyloNodeVector;
-
+typedef vector PhyloNodeVector;
#endif