Skip to content

Commit fd72f9d

Browse files
committed
added apobec_csv, unusual_csv, sdrms_csv and mutation_csv as arguments
1 parent 1d882a9 commit fd72f9d

2 files changed

Lines changed: 59 additions & 7 deletions

File tree

sierralocal/jsonwriter.py

Lines changed: 42 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111

1212
class JSONWriter():
13-
def __init__(self, algorithm):
13+
def __init__(self, algorithm, apobec_csv, unusual_csv, sdrms_csv, mutation_csv):
1414
# possible alternative drug abbrvs
1515
self.names = {'3TC': 'LMV'}
1616

@@ -39,7 +39,16 @@ def __init__(self, algorithm):
3939
self.rt_comments = dict(csv.reader(rt_file, delimiter='\t'))
4040

4141
# make dictionary for isUnusual
42-
dest = str(Path(os.path.dirname(__file__)) / 'data' / 'rx-all_subtype-all.csv')
42+
if unusual_csv is None:
43+
dest = str(Path(os.path.dirname(__file__)) / 'data' / 'rx-all_subtype-all.csv')
44+
else:
45+
if os.path.isfile(unusual_csv): # Ensure is a file
46+
dest = unusual_csv
47+
else:
48+
raise FileNotFoundError(
49+
"Path to CSV file to determine if is unusual cannot be found at user specified "
50+
"path {}".format(unusual_csv))
51+
print("Using unusual file: "+dest)
4352
with open(dest, 'r', encoding='utf-8-sig') as is_unusual_file:
4453
is_unusual_file = csv.DictReader(is_unusual_file)
4554
self.is_unusual_dic = {}
@@ -54,7 +63,16 @@ def __init__(self, algorithm):
5463
self.is_unusual_dic[gene].update({pos: {}})
5564
self.is_unusual_dic[gene][pos].update({aa: unusual})
5665

57-
dest = str(Path(os.path.dirname(__file__)) / 'data' / 'sdrms_hiv1.csv')
66+
if sdrms_csv is None:
67+
dest = str(Path(os.path.dirname(__file__)) / 'data' / 'sdrms_hiv1.csv')
68+
else:
69+
if os.path.isfile(sdrms_csv): # Ensure is a file
70+
dest = sdrms_csv
71+
else:
72+
raise FileNotFoundError(
73+
"Path to CSV file to determine SDRM mutations cannot be found at user specified "
74+
"path {}".format(sdrms_csv))
75+
print("Using SDRM mutations file: "+dest)
5876
with open(dest, 'r', encoding='utf-8-sig') as sdrm_files:
5977
sdrm_files = csv.DictReader(sdrm_files)
6078
self.sdrm_dic = {}
@@ -86,7 +104,17 @@ def __init__(self, algorithm):
86104
self.apobec_drm_dic[gene][position] += aa
87105

88106
# make dictionary for primary type
89-
dest = str(Path(os.path.dirname(__file__)) / 'data' / 'mutation-type-pairs_hiv1.csv')
107+
if mutation_csv is None:
108+
dest = str(Path(os.path.dirname(__file__)) / 'data' / 'mutation-type-pairs_hiv1.csv')
109+
else:
110+
if os.path.isfile(mutation_csv): # Ensure is a file
111+
dest = mutation_csv
112+
else:
113+
raise FileNotFoundError(
114+
"Path to CSV file to determine mutation type cannot be found at user specified "
115+
"path {}".format(mutation_csv))
116+
117+
print("Using mutation type file: "+dest)
90118
with open(dest, 'r', encoding='utf-8-sig') as mut_type_pairs1_files:
91119
mut_type_pairs1_files = csv.DictReader(mut_type_pairs1_files)
92120
self.primary_type_dic = {}
@@ -102,7 +130,16 @@ def __init__(self, algorithm):
102130
self.primary_type_dic[gene][pos].update({aa: mut})
103131

104132
# make dictionary for apobec mutations
105-
dest = str(Path(os.path.dirname(__file__)) / 'data' / 'apobecs.csv')
133+
if apobec_csv is None:
134+
dest = str(Path(os.path.dirname(__file__)) / 'data' / 'apobecs.csv')
135+
else:
136+
if os.path.isfile(apobec_csv): # Ensure is a file
137+
dest = apobec_csv
138+
else:
139+
raise FileNotFoundError(
140+
"Path to CSV file with APOBEC cannot be found at user specified "
141+
"path {}".format(apobec_csv))
142+
print("Using APOBEC file: "+dest)
106143
with open(dest, 'r', encoding='utf-8-sig') as apobec_mutations:
107144
apobec_mutations = csv.DictReader(apobec_mutations)
108145
self.apobec_mutations_dic = {}

sierralocal/main.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ def scorefile(input_file, algorithm, do_subtype=False, program='post'):
123123
file_genes, sequence_lengths, file_trims, subtypes, na_sequence, ambiguous, gene_order
124124

125125
def sierralocal(fasta, outfile, xml=None, json=None, cleanup=False, forceupdate=False,
126+
apobec_csv=None, unusual_csv=None, sdrms_csv=None, mutation_csv=None,
126127
program='post', do_subtype=False): # pragma: no cover
127128
"""
128129
Contains all initializing and processing calls.
@@ -134,13 +135,17 @@ def sierralocal(fasta, outfile, xml=None, json=None, cleanup=False, forceupdate=
134135
@param json: <optional> str, path to local copy of HIVdb algorithm APOBEC DRM file
135136
@param cleanup: <optional> bool, to delete alignment file
136137
@param forceupdate: <optional> bool, forces sierralocal to update its local copy of the HIVdb algorithm
138+
@param apobec_csv: str <optional>, Path to CSV APOBEC csv file (default: apobecs.csv)
139+
@param unusual_csv: str <optional>, Path to CSV file to determine if is unusual (default: rx-all_subtype-all.csv)
140+
@param sdrms_csv: str <optional>, Path to CSV file to determine SDRM mutations (default: sdrms_hiv1.csv)
141+
@param mutation_csv: str <optional>, Path to CSV file to determine mutation type (default: mutation-type-pairs_hiv1.csv)
137142
@return: tuple, a tuple of (number of records processed, time elapsed initializing algorithm)
138143
"""
139144

140145
# initialize algorithm and jsonwriter
141146
time0 = time.time()
142147
algorithm = HIVdb(asi2=xml, apobec=json, forceupdate=forceupdate)
143-
writer = JSONWriter(algorithm)
148+
writer = JSONWriter(algorithm, apobec_csv, unusual_csv, sdrms_csv, mutation_csv)
144149
time_elapsed = time.time() - time0
145150

146151
# accommodate single file path argument
@@ -197,10 +202,18 @@ def parse_args(): # pragma: no cover
197202
help='Forces update of HIVdb algorithm. Requires network connection.')
198203
parser.add_argument('-alignment', default='post', choices=['post', 'nuc'],
199204
help='Alignment program to use, "post" for post align and "nuc" for nucamino')
205+
parser.add_argument('-apobec_csv', default=None,
206+
help='<optional> Path to CSV APOBEC csv file (default: apobecs.csv)')
207+
parser.add_argument('-unusual_csv', default=None,
208+
help='<optional> Path to CSV file to determine if is unusual (default: rx-all_subtype-all.csv)')
209+
parser.add_argument('-sdrms_csv', default=None,
210+
help='<optional> Path to CSV file to determine SDRM mutations (default: sdrms_hiv1.csv)')
211+
parser.add_argument('-mutation_csv', default=None,
212+
help='<optional> Path to CSV file to determine mutation type (default: mutation-type-pairs_hiv1.csv)')
213+
200214
args = parser.parse_args()
201215
return args
202216

203-
204217
def main(): # pragma: no cover
205218
"""
206219
Main function called from CLI.
@@ -216,6 +229,8 @@ def main(): # pragma: no cover
216229
time_start = time.time()
217230
count, time_elapsed = sierralocal(args.fasta, args.outfile, xml=args.xml,
218231
json=args.json, cleanup=args.cleanup, forceupdate=args.forceupdate,
232+
apobec_csv=args.apobec_csv, unusual_csv=args.unusual_csv,
233+
sdrms_csv=args.sdrms_csv, mutation_csv=args.mutation_csv,
219234
program=args.alignment)
220235
time_diff = time.time() - time_start
221236

0 commit comments

Comments
 (0)