diff --git a/utilities/add_origins.py b/utilities/add_origins.py new file mode 100644 index 0000000..01219be --- /dev/null +++ b/utilities/add_origins.py @@ -0,0 +1,40 @@ +from metacat.webapi import MetaCatClient +from argparse import ArgumentParser as ap +import json + +def add_args(parser): + parser.add_argument('--past_fcls', type=str, nargs='+') + parser.add_argument('--past_apps', type=str, nargs='+') + +def add_origins(args, version): + if args.past_apps is None or len(args.past_fcls) != len(args.past_apps): + raise ValueError('Need to provide same number of past apps and fcls') + + results = { + 'origin.applications.config_files': { + args.past_apps[i]:args.past_fcls[i] for i in range(len(args.past_apps)) + }, + 'origin.applications.versions': { + args.past_apps[i]:version + for i in range(len(args.past_apps)) + }, + 'origin.applications.names':args.past_apps, + } + return results + +if __name__ == '__main__': + parser = ap() + parser.add_argument('--json', '-j', type=str, required=True, + help='Output json file') + add_origin_args(parser) + args = parser.parse_args() + + output = add_origins(args, 'v1') + + # Serializing json + json_object = json.dumps(output, indent=2) + + # Writing to sample.json + with open(args.json, "w") as outfile: + outfile.write(json_object) + diff --git a/utilities/get_events_for_md.py b/utilities/get_events_for_md.py new file mode 100644 index 0000000..da4bd2a --- /dev/null +++ b/utilities/get_events_for_md.py @@ -0,0 +1,40 @@ +import ROOT as RT +import json +from argparse import ArgumentParser as ap + +def get_events(filename): + ev = RT.gallery.Event(RT.vector(RT.string)(1, filename)) + event_numbers = [] + for i in range(ev.numberOfEventsInFile()): + ev.goToEntry(i) + event_numbers.append(ev.eventAuxiliary().id().event()) + return event_numbers + +def place_events(events, md): + ##Place events in the metadata + md |= { + 'core.events':events, + 'core.event_count':len(events), + 'core.first_event_number':events[0], + 'core.last_event_number':events[-1], + } + +if __name__ == '__main__': + parser = ap() + parser.add_argument('-i', type=str, required=True, help='Input File') + parser.add_argument('--json', '-j', type=str, required=True, help='Output JSON file') + args = parser.parse_args() + + events = get_events(args.i) + + output = json.dumps({ + 'metadata': { + 'core.events':events, + 'core.event_count':len(events), + 'core.first_event_number':events[0], + 'core.last_event_number':events[-1], + } + }) + + with open(args.json, 'w') as outfile: + outfile.write(output) diff --git a/utilities/inherit_metadata.py b/utilities/inherit_metadata.py new file mode 100644 index 0000000..e6649bb --- /dev/null +++ b/utilities/inherit_metadata.py @@ -0,0 +1,93 @@ +from metacat.webapi import MetaCatClient +from argparse import ArgumentParser as ap +import json + +required_keys = [ + "core.data_stream", + "core.file_content_status", + "core.file_type", + "core.run_type", + "core.runs", + "core.runs_subruns", + "dune.daq_test", + "retention.status", + "retention.class", +] + +def check_md(req_keys, parent_md, parent_name): + bad_keys = [rk for rk in req_keys if rk not in parent_md] + if len(bad_keys) > 0: + raise Exception( ##TODO -- better exception + f'Error! The following required keys are missing from metadata of {parent_name}' + '\n\t' + ', '.join(bad_keys) + ) + +def get_parent_md(parent_name): + mc = MetaCatClient() + parent_file = mc.get_file(did=parent_name, with_metadata=True, + with_provenance=False) + + #TODO -- check + + parent_md = parent_file['metadata'] + check_md(required_keys, parent_md, parent_name) + inherited_md = {rk:parent_md[rk] for rk in required_keys} + return inherited_md + +def get_parent_md_from_json(parent_md): + check_md(required_keys, parent_md, parent_md) + inherited_md = {rk:parent_md[rk] for rk in required_keys} + return inherited_md + +def get_name_from_json(json_vals): + if 'did' in json_vals: + return {'did': json_vals['did']} + elif 'name' in json_vals and 'namespace' in json_vals: + return { + 'name':json_vals['name'], + 'namespace':json_vals['namespace'], + } + elif 'fid' in json_vals: + return {'fid': json_vals['fid']} + else: + raise Exception( + 'Error! Tried importing parent name from json but could not find field' + ) + +def inherit_json(parent_json): + with open(parent_json, 'r') as f: + parent_json_values = json.load(f) + + output = { + 'parents': [get_name_from_json(parent_json_values)], + 'metadata': get_parent_md_from_json(parent_json_values['metadata']), + } + return output + +def inherit(parent_name): + output = { + 'parents':[ + {'did':parent_name} + ], + 'metadata':get_parent_md(parent_name), + } + return output + +if __name__ == '__main__': + parser = ap() + parser.add_argument('--parent', '-p', type=str, required=True, + help='Parent file did (namespace:name)') + parser.add_argument('--json', '-j', type=str, required=True, + help='Output json file') + args = parser.parse_args() + + output = inherit(args.parent) + + + # Serializing json + json_object = json.dumps(output, indent=2) + + # Writing to sample.json + with open(args.json, "w") as outfile: + outfile.write(json_object) + diff --git a/utilities/meta_maker.py b/utilities/meta_maker.py new file mode 100644 index 0000000..9abebf7 --- /dev/null +++ b/utilities/meta_maker.py @@ -0,0 +1,62 @@ +import place_metadata, get_events_for_md, inherit_metadata, add_origins +from argparse import ArgumentParser as ap +import os +import json + +if __name__ == '__main__': + + parser = ap() + parser.add_argument('--file', '-f', required=True, type=str, + help="File did for which we're making metadata (namespace:name)", + ) + parser.add_argument('--get_events', action='store_true', + help='Get event numbers from artroot file') + place_metadata.base_args(parser) + add_origins.add_args(parser) + parser.add_argument('--parent', '-p', default=None, type=str, + help='Parent DID to inherit from (namespace:name)\nOR parent json metadata (requires --parent_as_json)') + parser.add_argument('--parent_as_json', action='store_true', help='') #TODO + parser.add_argument('--json', '-j', required=True, type=str, + help='Output json name') + args = parser.parse_args() + + base_md = place_metadata.make_md_from_args(args) + + output = { + 'name':args.file.split(':')[1], + 'namespace':args.file.split(':')[0], + 'metadata':base_md + } + + if args.get_events: + #Check that the file exists in this directoy + if not os.path.isfile(output['name']): + raise Exception(f'Want events but no file exists of name {output["name"]}') + + + #get events from file + events = get_events_for_md.get_events(output['name']) + ##Put in metadta + get_events_for_md.place_events(events, output['metadata']) + + if args.parent is not None: + + ##Get md from parent + if args.parent_as_json: + results = inherit_metadata.inherit_json(args.parent) + else: + results = inherit_metadata.inherit(args.parent) + + #place the inherited info in the output + output['metadata'] |= results['metadata'] + + #place the parent info + output['parents'] = results['parents'] + + if args.past_apps is not None: + output['metadata'] |= add_origins.add_origins(args, args.app_version) + + ## Write the output + output_json = json.dumps(output, indent=2) + with open(args.json, 'w') as outfile: + outfile.write(output_json) diff --git a/utilities/place_metadata.py b/utilities/place_metadata.py new file mode 100644 index 0000000..4574bcd --- /dev/null +++ b/utilities/place_metadata.py @@ -0,0 +1,35 @@ +import json +from argparse import ArgumentParser as ap + +def make_md_from_args(args): + md = { + 'core.file_format':args.file_format, + 'core.application.name':args.app_name, + 'core.application.family':args.app_family, + 'core.application.version':args.app_version, + 'core.data_tier':args.data_tier, + 'dune.config_file':args.fcl, + 'dune.campaign':args.campaign, + } + + if args.start_time is not None: + md['core.start_time'] = args.start_time + md['core.end_time'] = args.end_time + return md + +def base_args(parser): + parser.add_argument('--start_time', default=None, type=float) + parser.add_argument('--end_time', default=None, type=float) + parser.add_argument('--file_format', type=str) ##TODO -- make required + parser.add_argument('--app_family', type=str) + parser.add_argument('--app_name', type=str) + parser.add_argument('--app_version', type=str) + parser.add_argument('--data_tier', type=str) + parser.add_argument('--fcl', type=str) + parser.add_argument('--campaign', type=str) + +if __name__ == '__main__': + parser = ap() + parser.add_argument('--json', '-j', type=str, help='Output JSON file') + base_args(parser) + args = parser.parse_args()