diff --git a/._combine_trips_xml_into_scene.py b/._combine_trips_xml_into_scene.py index 5de11be..2fd018e 100644 Binary files a/._combine_trips_xml_into_scene.py and b/._combine_trips_xml_into_scene.py differ diff --git a/._parse_trips_xml.py b/._parse_trips_xml.py index 68c2cf5..664fdcb 100644 Binary files a/._parse_trips_xml.py and b/._parse_trips_xml.py differ diff --git a/parse_trips_xml.py b/parse_trips_xml.py index 631a4b4..7282e5b 100644 --- a/parse_trips_xml.py +++ b/parse_trips_xml.py @@ -21,11 +21,22 @@ def get_clean_parse(fileName): root_pattern = a.split("" - except IndexError: - out = "---------------------------------------" + except IndexError as iError: + out = "---------------------------------------\n" + out +="Affected file:\n" out += fileName.split("/")[-1] + "\n" + out += "Error Args:\n" + for arg in iError.args: + out+=str(arg)+"\n" + out += "XML error info: \n" + root = ET.parse(fileName).getroot() + out += str(root.get('error'))+'\n' + if args.removeBadFiles != None: + if bool(args.removeBadFiles): + os.remove(fileName) + out += "Removed file because removeBadFiles flag was set to True\n" error.write(out) - return + return 1 pattern = re.compile(r' input=".+\n') if pattern.search(a): @@ -127,6 +138,7 @@ def get_clean_parse(fileName): with open(fileName + '.clean', 'w') as new: new.write(myparse) + return 0 if __name__ == '__main__': @@ -137,7 +149,14 @@ def get_clean_parse(fileName): dest="path", required=True, help='path to the input directory containing all TRIPS XML parses') + parser.add_argument( + "--removeBadFiles", + dest="removeBadFiles", + required=False, + help='set to a True or False value to indicate if bad' + + 'files should be removed when parsing the input directory') args = parser.parse_args() + nFilesAffected=0 with open(time.strftime("%Y%m%d-%H%M") + '.err', 'a') as error: error.write("The following files did not have a parse.\n\n") @@ -148,5 +167,7 @@ def get_clean_parse(fileName): if fileName.endswith(".xml"): with open(os.path.join(args.path, dirName, fileName), 'r') as f: a = f.read() - get_clean_parse(os.path.join(dirName, fileName)) + nFilesAffected += get_clean_parse(os.path.join(dirName, fileName)) + error.write("---------------------------------------\n") + error.write("Number of files affected: "+str(nFilesAffected)+"\n") print("********************\nCleaned parses are in the same directory as the original parse files.\n\nfileName %s in the current directory contains the list of files that did not have parses to be cleaned.\n********************\n" % str(error).split("'")[1])