-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtree_analysis.py
More file actions
124 lines (108 loc) · 4.29 KB
/
tree_analysis.py
File metadata and controls
124 lines (108 loc) · 4.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/usr/bin/env python3
'''
Script for assignment PathoSense:
Takes a Newick file that includes the sample strain and
visualizes a reduced tree based on a input distance in pdf.
Example of how to run the script:
./tree_analysis.py -f full_alignment.fasta.treefile -o pruned_tree.pdf
--sample_strain Porcine_parvovirus_1_1-0004394_BEL_Dec2022_30X Porcine
--type_strains PPV1_IDT_DEU_1964 PPV1_27a_DEU_2001
PPV1_NADL2_USA_1964 PPV1_NADL8_USA_1976
--distance 0.0007
Author: Tristan Vanneste
Date: February 2023
'''
# libraries
from ete3 import Tree, TreeStyle, NodeStyle
import statistics
import argparse
def parse_arguments():
"""Parses and returns arguments from the command line
Returns:
parser.parse_args
"""
parser = argparse.ArgumentParser(description='')
parser.add_argument("-f",
dest="input_file",
required=True,
type=str,
help="input Newick file.")
parser.add_argument("-o",
dest="output_name",
required=True,
type=str,
help="Output name for the pdf.")
parser.add_argument("--sample_strains",
dest="sample_strains",
nargs="*",
default="",
type=str,
help="Sample strains.")
parser.add_argument("--type_strains",
dest="type_strains",
nargs="*",
default="",
type=str,
help="Type strains.")
parser.add_argument("--distance",
dest="distance",
default=0.001,
type=float,
help="The distance where you want to filter on.")
# Parse arguments and call main
return parser.parse_args()
# Helper function used by main
def prune_tree_by_distance(tree: str, dist: float, sample_strains: list, type_strains: list):
"""Prunes the tree by detaching leaves/nodes that have a mean distances to their node
below a certain threshold (dist).
Arguments:
tree: The filename for the Newick tree
dist: The distance where you want to filter on
sample_strains: The sample strains
type_strains: The type strains
"""
for node in tree.traverse("preorder"):
distance = []
for leave in node:
distance.append(leave.dist)
if statistics.mean(distance) < dist:
for leave in node:
if leave.name not in (type_strains + sample_strains):
leave.delete()
for node in tree.traverse("preorder"):
for leave in node:
if leave.name in type_strains:
# Leave is a type_strain
style1 = NodeStyle()
style1["fgcolor"] = "#000000"
style1["shape"] = "circle"
style1["size"] = 10
style1["vt_line_type"] = 1 # dashed
leave.img_style = style1
elif leave.name in sample_strains:
# Leave is a sample_strain
style2 = NodeStyle()
style2["fgcolor"] = "#14e05c"
style2["shape"] = "circle"
style2["size"] = 15
style2["vt_line_type"] = 2 # dotted
leave.img_style = style2
else:
# Leave is no sample_strain or type_strain
style3 = NodeStyle()
style3["fgcolor"] = "darkred"
style3["shape"] = "sphere"
style3["size"] = 5
style3["vt_line_type"] = 0 # solid
leave.img_style = style3
if __name__ == '__main__':
args = parse_arguments()
# Load a tree structure from a newick file.
t = Tree(args.input_file)
# Pruning the tree
prune_tree_by_distance(t,
args.distance,
args.sample_strains,
args.type_strains)
# Saving pruned tree to output
t.render(args.output_name, tree_style=ts, w=200, units="mm")