Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified ._combine_trips_xml_into_scene.py
Binary file not shown.
Binary file modified ._parse_trips_xml.py
Binary file not shown.
29 changes: 25 additions & 4 deletions parse_trips_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,22 @@ def get_clean_parse(fileName):
root_pattern = a.split("<rdf:RDF")[1].split("<rdf:Description")[0]
rdf_pattern = "<rdf:RDF " + root_pattern \
+ new_rdf_pattern + "</rdf:RDF>"
except IndexError:
out = "---------------------------------------"
except IndexError as iError:
out = "---------------------------------------\n"
out +="Affected file:\n"
out += fileName.split("/")[-1] + "\n"
out += "Error Args:\n"
for arg in iError.args:
out+=str(arg)+"\n"
out += "XML error info: \n"
root = ET.parse(fileName).getroot()
out += str(root.get('error'))+'\n'
if args.removeBadFiles != None:
if bool(args.removeBadFiles):
os.remove(fileName)
out += "Removed file because removeBadFiles flag was set to True\n"
error.write(out)
return
return 1

pattern = re.compile(r' input=".+\n')
if pattern.search(a):
Expand Down Expand Up @@ -127,6 +138,7 @@ def get_clean_parse(fileName):

with open(fileName + '.clean', 'w') as new:
new.write(myparse)
return 0


if __name__ == '__main__':
Expand All @@ -137,7 +149,14 @@ def get_clean_parse(fileName):
dest="path",
required=True,
help='path to the input directory containing all TRIPS XML parses')
parser.add_argument(
"--removeBadFiles",
dest="removeBadFiles",
required=False,
help='set to a True or False value to indicate if bad' +
'files should be removed when parsing the input directory')
args = parser.parse_args()
nFilesAffected=0

with open(time.strftime("%Y%m%d-%H%M") + '.err', 'a') as error:
error.write("The following files did not have a parse.\n\n")
Expand All @@ -148,5 +167,7 @@ def get_clean_parse(fileName):
if fileName.endswith(".xml"):
with open(os.path.join(args.path, dirName, fileName), 'r') as f:
a = f.read()
get_clean_parse(os.path.join(dirName, fileName))
nFilesAffected += get_clean_parse(os.path.join(dirName, fileName))
error.write("---------------------------------------\n")
error.write("Number of files affected: "+str(nFilesAffected)+"\n")
print("********************\nCleaned parses are in the same directory as the original parse files.\n\nfileName %s in the current directory contains the list of files that did not have parses to be cleaned.\n********************\n" % str(error).split("'")[1])