From 327fc34e225f1848144af6504899998a31dc995f Mon Sep 17 00:00:00 2001 From: Jacob Calcutt Date: Fri, 31 May 2024 11:26:07 -0500 Subject: [PATCH 1/3] Adding tools for making standard metadata --- utilities/get_events_for_md.py | 40 +++++++++++++++++++++ utilities/inherit_metadata.py | 63 ++++++++++++++++++++++++++++++++++ utilities/meta_maker.py | 54 +++++++++++++++++++++++++++++ utilities/place_metadata.py | 31 +++++++++++++++++ 4 files changed, 188 insertions(+) create mode 100644 utilities/get_events_for_md.py create mode 100644 utilities/inherit_metadata.py create mode 100644 utilities/meta_maker.py create mode 100644 utilities/place_metadata.py diff --git a/utilities/get_events_for_md.py b/utilities/get_events_for_md.py new file mode 100644 index 0000000..da4bd2a --- /dev/null +++ b/utilities/get_events_for_md.py @@ -0,0 +1,40 @@ +import ROOT as RT +import json +from argparse import ArgumentParser as ap + +def get_events(filename): + ev = RT.gallery.Event(RT.vector(RT.string)(1, filename)) + event_numbers = [] + for i in range(ev.numberOfEventsInFile()): + ev.goToEntry(i) + event_numbers.append(ev.eventAuxiliary().id().event()) + return event_numbers + +def place_events(events, md): + ##Place events in the metadata + md |= { + 'core.events':events, + 'core.event_count':len(events), + 'core.first_event_number':events[0], + 'core.last_event_number':events[-1], + } + +if __name__ == '__main__': + parser = ap() + parser.add_argument('-i', type=str, required=True, help='Input File') + parser.add_argument('--json', '-j', type=str, required=True, help='Output JSON file') + args = parser.parse_args() + + events = get_events(args.i) + + output = json.dumps({ + 'metadata': { + 'core.events':events, + 'core.event_count':len(events), + 'core.first_event_number':events[0], + 'core.last_event_number':events[-1], + } + }) + + with open(args.json, 'w') as outfile: + outfile.write(output) diff --git a/utilities/inherit_metadata.py b/utilities/inherit_metadata.py new file mode 100644 index 0000000..26f16ed --- /dev/null +++ b/utilities/inherit_metadata.py @@ -0,0 +1,63 @@ +from metacat.webapi import MetaCatClient +from argparse import ArgumentParser as ap +import json + +required_keys = [ + "core.data_stream", + "core.file_content_status", + "core.file_type", + "core.run_type", + "core.runs", + "core.runs_subruns", + "dune.daq_test", + "retention.status", + "retention.class", +] + +def check_md(req_keys, parent_md, parent_name): + bad_keys = [rk for rk in req_keys if rk not in parent_md] + if len(bad_keys) > 0: + raise Exception( ##TODO -- better exception + f'Error! The following required keys are missing from metadata of {parent_name}' + '\n\t' + ', '.join(bad_keys) + ) + +def get_parent_md(parent_name): + mc = MetaCatClient() + parent_file = mc.get_file(did=parent_name, with_metadata=True, + with_provenance=False) + + #TODO -- check + + parent_md = parent_file['metadata'] + check_md(required_keys, parent_md, parent_name) + inherited_md = {rk:parent_md[rk] for rk in required_keys} + return inherited_md + +def inherit(parent_name): + output = { + 'parents':[ + {'did':parent_name} + ], + 'metadata':get_parent_md(parent_name), + } + return output + +if __name__ == '__main__': + parser = ap() + parser.add_argument('--parent', '-p', type=str, required=True, + help='Parent file did (namespace:name)') + parser.add_argument('--json', '-j', type=str, required=True, + help='Output json file') + args = parser.parse_args() + + output = inherit(args.parent) + + + # Serializing json + json_object = json.dumps(output, indent=2) + + # Writing to sample.json + with open(args.json, "w") as outfile: + outfile.write(json_object) + diff --git a/utilities/meta_maker.py b/utilities/meta_maker.py new file mode 100644 index 0000000..c356760 --- /dev/null +++ b/utilities/meta_maker.py @@ -0,0 +1,54 @@ +import place_metadata, get_events_for_md, inherit_metadata +from argparse import ArgumentParser as ap +import os +import json + +if __name__ == '__main__': + + parser = ap() + parser.add_argument('--file', '-f', required=True, type=str, + help="File did for which we're making metadata (namespace:name)", + ) + parser.add_argument('--get_events', action='store_true', + help='Get event numbers from artroot file') + place_metadata.base_args(parser) + parser.add_argument('--parent', '-p', default=None, type=str, + help='Parent DID to inherit from (namespace:name)') + parser.add_argument('--json', '-j', required=True, type=str, + help='Output json name') + args = parser.parse_args() + + base_md = place_metadata.make_md_from_args(args) + + output = { + 'name':args.file.split(':')[1], + 'namespace':args.file.split(':')[0], + 'metadata':base_md + } + + if args.get_events: + #Check that the file exists in this directoy + if not os.path.isfile(output['name']): + raise Exception(f'Want events but no file exists of name {output["name"]}') + + + #get events from file + events = get_events_for_md.get_events(output['name']) + ##Put in metadta + get_events_for_md.place_events(events, output['metadata']) + + if args.parent is not None: + + ##Get md from parent + results = inherit_metadata.inherit(args.parent) + + #place the inherited info in the output + output['metadata'] |= results['metadata'] + + #place the parent info + output['parents'] = results['parents'] + + ## Write the output + output_json = json.dumps(output, indent=2) + with open(args.json, 'w') as outfile: + outfile.write(output_json) diff --git a/utilities/place_metadata.py b/utilities/place_metadata.py new file mode 100644 index 0000000..d208ba3 --- /dev/null +++ b/utilities/place_metadata.py @@ -0,0 +1,31 @@ +import json +from argparse import ArgumentParser as ap + +def make_md_from_args(args): + md = { + 'core.file_format':args.file_format, + 'core.application.name':args.app_name, + 'core.application.family':args.app_family, + 'core.application.version':args.app_version, + 'core.data_tier':args.data_tier, + } + + if args.start_time is not None: + md['core.start_time'] = args.start_time + md['core.end_time'] = args.end_time + return md + +def base_args(parser): + parser.add_argument('--start_time', default=None,) + parser.add_argument('--end_time', default=None,) + parser.add_argument('--file_format', type=str) + parser.add_argument('--app_family', type=str) + parser.add_argument('--app_name', type=str) + parser.add_argument('--app_version', type=str) + parser.add_argument('--data_tier', type=str) + +if __name__ == '__main__': + parser = ap() + parser.add_argument('--json', '-j', type=str, help='Output JSON file') + base_args(parser) + args = parser.parse_args() From f570ac9808d87765e88ddeb805cb9abe5819569b Mon Sep 17 00:00:00 2001 From: Jacob Calcutt Date: Fri, 7 Jun 2024 07:15:48 -0500 Subject: [PATCH 2/3] Allowing json inheritance. making start and end time floats --- utilities/inherit_metadata.py | 30 ++++++++++++++++++++++++++++++ utilities/meta_maker.py | 8 ++++++-- utilities/place_metadata.py | 6 +++--- 3 files changed, 39 insertions(+), 5 deletions(-) diff --git a/utilities/inherit_metadata.py b/utilities/inherit_metadata.py index 26f16ed..e6649bb 100644 --- a/utilities/inherit_metadata.py +++ b/utilities/inherit_metadata.py @@ -34,6 +34,36 @@ def get_parent_md(parent_name): inherited_md = {rk:parent_md[rk] for rk in required_keys} return inherited_md +def get_parent_md_from_json(parent_md): + check_md(required_keys, parent_md, parent_md) + inherited_md = {rk:parent_md[rk] for rk in required_keys} + return inherited_md + +def get_name_from_json(json_vals): + if 'did' in json_vals: + return {'did': json_vals['did']} + elif 'name' in json_vals and 'namespace' in json_vals: + return { + 'name':json_vals['name'], + 'namespace':json_vals['namespace'], + } + elif 'fid' in json_vals: + return {'fid': json_vals['fid']} + else: + raise Exception( + 'Error! Tried importing parent name from json but could not find field' + ) + +def inherit_json(parent_json): + with open(parent_json, 'r') as f: + parent_json_values = json.load(f) + + output = { + 'parents': [get_name_from_json(parent_json_values)], + 'metadata': get_parent_md_from_json(parent_json_values['metadata']), + } + return output + def inherit(parent_name): output = { 'parents':[ diff --git a/utilities/meta_maker.py b/utilities/meta_maker.py index c356760..efbe3c3 100644 --- a/utilities/meta_maker.py +++ b/utilities/meta_maker.py @@ -13,7 +13,8 @@ help='Get event numbers from artroot file') place_metadata.base_args(parser) parser.add_argument('--parent', '-p', default=None, type=str, - help='Parent DID to inherit from (namespace:name)') + help='Parent DID to inherit from (namespace:name)\nOR parent json metadata (requires --parent_as_json)') + parser.add_argument('--parent_as_json', action='store_true', help='') #TODO parser.add_argument('--json', '-j', required=True, type=str, help='Output json name') args = parser.parse_args() @@ -40,7 +41,10 @@ if args.parent is not None: ##Get md from parent - results = inherit_metadata.inherit(args.parent) + if args.parent_as_json: + results = inherit_metadata.inherit_json(args.parent) + else: + results = inherit_metadata.inherit(args.parent) #place the inherited info in the output output['metadata'] |= results['metadata'] diff --git a/utilities/place_metadata.py b/utilities/place_metadata.py index d208ba3..a874084 100644 --- a/utilities/place_metadata.py +++ b/utilities/place_metadata.py @@ -16,9 +16,9 @@ def make_md_from_args(args): return md def base_args(parser): - parser.add_argument('--start_time', default=None,) - parser.add_argument('--end_time', default=None,) - parser.add_argument('--file_format', type=str) + parser.add_argument('--start_time', default=None, type=float) + parser.add_argument('--end_time', default=None, type=float) + parser.add_argument('--file_format', type=str) ##TODO -- make required parser.add_argument('--app_family', type=str) parser.add_argument('--app_name', type=str) parser.add_argument('--app_version', type=str) From 58e75df5d80b3bd6b393e5e6591a48a3a608c311 Mon Sep 17 00:00:00 2001 From: Jacob Calcutt Date: Wed, 10 Jul 2024 08:44:06 -0500 Subject: [PATCH 3/3] Adding origins --- utilities/add_origins.py | 40 +++++++++++++++++++++++++++++++++++++ utilities/meta_maker.py | 6 +++++- utilities/place_metadata.py | 4 ++++ 3 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 utilities/add_origins.py diff --git a/utilities/add_origins.py b/utilities/add_origins.py new file mode 100644 index 0000000..01219be --- /dev/null +++ b/utilities/add_origins.py @@ -0,0 +1,40 @@ +from metacat.webapi import MetaCatClient +from argparse import ArgumentParser as ap +import json + +def add_args(parser): + parser.add_argument('--past_fcls', type=str, nargs='+') + parser.add_argument('--past_apps', type=str, nargs='+') + +def add_origins(args, version): + if args.past_apps is None or len(args.past_fcls) != len(args.past_apps): + raise ValueError('Need to provide same number of past apps and fcls') + + results = { + 'origin.applications.config_files': { + args.past_apps[i]:args.past_fcls[i] for i in range(len(args.past_apps)) + }, + 'origin.applications.versions': { + args.past_apps[i]:version + for i in range(len(args.past_apps)) + }, + 'origin.applications.names':args.past_apps, + } + return results + +if __name__ == '__main__': + parser = ap() + parser.add_argument('--json', '-j', type=str, required=True, + help='Output json file') + add_origin_args(parser) + args = parser.parse_args() + + output = add_origins(args, 'v1') + + # Serializing json + json_object = json.dumps(output, indent=2) + + # Writing to sample.json + with open(args.json, "w") as outfile: + outfile.write(json_object) + diff --git a/utilities/meta_maker.py b/utilities/meta_maker.py index efbe3c3..9abebf7 100644 --- a/utilities/meta_maker.py +++ b/utilities/meta_maker.py @@ -1,4 +1,4 @@ -import place_metadata, get_events_for_md, inherit_metadata +import place_metadata, get_events_for_md, inherit_metadata, add_origins from argparse import ArgumentParser as ap import os import json @@ -12,6 +12,7 @@ parser.add_argument('--get_events', action='store_true', help='Get event numbers from artroot file') place_metadata.base_args(parser) + add_origins.add_args(parser) parser.add_argument('--parent', '-p', default=None, type=str, help='Parent DID to inherit from (namespace:name)\nOR parent json metadata (requires --parent_as_json)') parser.add_argument('--parent_as_json', action='store_true', help='') #TODO @@ -52,6 +53,9 @@ #place the parent info output['parents'] = results['parents'] + if args.past_apps is not None: + output['metadata'] |= add_origins.add_origins(args, args.app_version) + ## Write the output output_json = json.dumps(output, indent=2) with open(args.json, 'w') as outfile: diff --git a/utilities/place_metadata.py b/utilities/place_metadata.py index a874084..4574bcd 100644 --- a/utilities/place_metadata.py +++ b/utilities/place_metadata.py @@ -8,6 +8,8 @@ def make_md_from_args(args): 'core.application.family':args.app_family, 'core.application.version':args.app_version, 'core.data_tier':args.data_tier, + 'dune.config_file':args.fcl, + 'dune.campaign':args.campaign, } if args.start_time is not None: @@ -23,6 +25,8 @@ def base_args(parser): parser.add_argument('--app_name', type=str) parser.add_argument('--app_version', type=str) parser.add_argument('--data_tier', type=str) + parser.add_argument('--fcl', type=str) + parser.add_argument('--campaign', type=str) if __name__ == '__main__': parser = ap()