|
1 | 1 | import argparse |
| 2 | +import itertools |
| 3 | +import json |
2 | 4 | import logging |
3 | 5 | import os |
4 | 6 | import sys |
5 | 7 | from pathlib import Path |
6 | 8 |
|
7 | 9 | from . import __version__ |
8 | 10 | from .allele_call import allele_call |
9 | | -from .update import update_directory |
| 11 | +from .update import update_directory, get_known_alleles |
10 | 12 | from .tabulate import tabulate_calls |
11 | 13 |
|
12 | 14 | # Ensure numpy, via pandas, doesn't use more than 1 thread. |
@@ -122,19 +124,82 @@ def main(): |
122 | 124 | args.func(args) |
123 | 125 |
|
124 | 126 |
|
| 127 | +def validate_fasta(fasta_path: Path): |
| 128 | + |
| 129 | + if not fasta_path.is_file(): |
| 130 | + return (1, f"{fasta_path} does not exist") |
| 131 | + |
| 132 | + try: |
| 133 | + get_known_alleles(fasta_path) |
| 134 | + |
| 135 | + except UnboundLocalError: |
| 136 | + return (1, f"{fasta_path} is not in FASTA format") |
| 137 | + |
| 138 | + except UnicodeDecodeError: |
| 139 | + return (1, f"{fasta_path} is not in FASTA format") |
| 140 | + |
| 141 | + return (0, "") |
| 142 | + |
| 143 | + |
| 144 | +def validate_json(json_path: Path): |
| 145 | + |
| 146 | + if not json_path.is_file(): |
| 147 | + return (1, f"{json_path} does not exist") |
| 148 | + try: |
| 149 | + with json_path.open("r") as f: |
| 150 | + data = json.load(f) |
| 151 | + return (0, "") |
| 152 | + |
| 153 | + except json.decoder.JSONDecodeError: |
| 154 | + return (1, f"{json_path} is not a valid JSON file") |
| 155 | + |
| 156 | + |
| 157 | +def validate_directory(dir_path: Path, validation_method): |
| 158 | + |
| 159 | + if not dir_path.is_dir(): |
| 160 | + return [(1, f"{dir_path} is not a directory")] |
| 161 | + |
| 162 | + results = [validation_method(p) for p in dir_path.glob("*")] |
| 163 | + |
| 164 | + return results |
| 165 | + |
| 166 | + |
| 167 | +def validate(*args): |
| 168 | + |
| 169 | + errors, messages = zip(*itertools.chain(*args)) |
| 170 | + |
| 171 | + n_errors = sum(errors) |
| 172 | + |
| 173 | + if n_errors > 0: |
| 174 | + |
| 175 | + print(f"Got {n_errors} input errors:") |
| 176 | + print('\n'.join(filter(None, messages))) |
| 177 | + print("Exiting.") |
| 178 | + sys.exit(n_errors) |
| 179 | + |
| 180 | + |
125 | 181 | def call_alleles(args): |
126 | 182 |
|
| 183 | + validate([validate_fasta(args.input)], |
| 184 | + validate_directory(args.alleles, validate_fasta)) |
| 185 | + |
127 | 186 | allele_call(args.input, args.alleles, args.output) |
128 | 187 |
|
129 | 188 |
|
130 | 189 | def update_results(args): |
131 | 190 |
|
| 191 | + validate(validate_directory(args.json_dir, validate_json), |
| 192 | + validate_directory(args.alleles, validate_fasta), |
| 193 | + validate_directory(args.genome_dir, validate_fasta)) |
| 194 | + |
132 | 195 | update_directory(args.json_dir, args.alleles, |
133 | 196 | args.threshold, args.genome_dir) |
134 | 197 |
|
135 | 198 |
|
136 | 199 | def tabulate_allele_calls(args): |
137 | 200 |
|
| 201 | + validate(validate_directory(args.json_dir, validate_json)) |
| 202 | + |
138 | 203 | tabulate_calls(args.json_dir, args.output, args.delimiter) |
139 | 204 |
|
140 | 205 |
|
|
0 commit comments