From d755cf8379c7932978c37830df663c8f9979e2aa Mon Sep 17 00:00:00 2001 From: Michael Date: Wed, 14 Feb 2018 19:04:04 -0600 Subject: [PATCH 1/2] Parse Error Reporting parse_trips_xml.py has some updates to make error file more readable. Also added count of affected files at the end. --- ._parse_trips_xml.py | Bin 4096 -> 4096 bytes parse_trips_xml.py | 16 ++++++++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/._parse_trips_xml.py b/._parse_trips_xml.py index 68c2cf5dbc35497a579e64295d281f8adaec5a66..07c13ec933c6a1d39380a86ea1386f9d33a888e2 100644 GIT binary patch delta 34 ocmZorXi(S?$H-H3vn7fF2rin12yHH8Easiuz{tM&37-rj0KIJrbN~PV delta 34 ocmZorXi(S?$H;T_R7(^C5XAAH6Wm" - except IndexError: - out = "---------------------------------------" + except IndexError as iError: + out = "---------------------------------------\n" + out +="Affected file:\n" out += fileName.split("/")[-1] + "\n" + out += "Error Args:\n" + for arg in iError.args: + out+=str(arg)+"\n" error.write(out) - return + return 1 pattern = re.compile(r' input=".+\n') if pattern.search(a): @@ -127,6 +131,7 @@ def get_clean_parse(fileName): with open(fileName + '.clean', 'w') as new: new.write(myparse) + return 0 if __name__ == '__main__': @@ -138,6 +143,7 @@ def get_clean_parse(fileName): required=True, help='path to the input directory containing all TRIPS XML parses') args = parser.parse_args() + nFilesAffected=0 with open(time.strftime("%Y%m%d-%H%M") + '.err', 'a') as error: error.write("The following files did not have a parse.\n\n") @@ -148,5 +154,7 @@ def get_clean_parse(fileName): if fileName.endswith(".xml"): with open(os.path.join(args.path, dirName, fileName), 'r') as f: a = f.read() - get_clean_parse(os.path.join(dirName, fileName)) + nFilesAffected += get_clean_parse(os.path.join(dirName, fileName)) + error.write("---------------------------------------\n") + error.write("Number of files affected: "+str(nFilesAffected)+"\n") print("********************\nCleaned parses are in the same directory as the original parse files.\n\nfileName %s in the current directory contains the list of files that did not have parses to be cleaned.\n********************\n" % str(error).split("'")[1]) From 22e21e732e41440aee12d3d9cf91f450496f3e54 Mon Sep 17 00:00:00 2001 From: Michael Date: Fri, 16 Feb 2018 00:06:37 -0600 Subject: [PATCH 2/2] Error Reporting Update parse_trips_xml.py now adds reasons files were skipped. Also added feature where skipped files are deleted after being logged in the error file. This feature is inteaded for use when testing an environment, not for use with production data. --- ._combine_trips_xml_into_scene.py | Bin 4096 -> 4096 bytes ._parse_trips_xml.py | Bin 4096 -> 4096 bytes parse_trips_xml.py | 13 +++++++++++++ 3 files changed, 13 insertions(+) diff --git a/._combine_trips_xml_into_scene.py b/._combine_trips_xml_into_scene.py index 5de11be0905d8a9ee08a0af13111f704c5867d5f..2fd018e44daff92d9ca5e194b8fada682c2bdddc 100644 GIT binary patch delta 25 fcmZorXi(Uo#mI9wyDf?V2)<2l64-3Uc#01IW;F-U delta 25 fcmZorXi(Uo#mI9)y()?U2n>_&>Tfn