From c61b1fb9f581a031ad3a6d917b86995f162bca7e Mon Sep 17 00:00:00 2001 From: Eleanor Smith Date: Tue, 15 Jun 2021 12:30:23 +0100 Subject: [PATCH 1/5] update tests --- MANIFEST.in | 1 + dachar/analyse/checks/_base_check.py | 33 +-- dachar/analyse/checks/coord_checks.py | 34 ++- dachar/cli.py | 91 +++++++- dachar/fixes/__init__.py | 10 +- dachar/fixes/_base_fix.py | 3 +- dachar/fixes/array_fixes.py | 20 ++ dachar/fixes/attr_fixes.py | 59 +++++ dachar/fixes/coord_fixes.py | 27 ++- dachar/fixes/fix_api.py | 7 +- dachar/fixes/fix_processor.py | 20 +- dachar/fixes/fix_proposal_store.py | 25 ++- dachar/fixes/generate_proposals.py | 78 +++++++ dachar/fixes/var_fixes.py | 25 +++ dachar/utils/create_index.py | 69 +++--- dachar/utils/json_store.py | 30 +-- environment.yml | 2 +- requirements.txt | 2 +- test/data/test_file.nc | Bin 0 -> 16702 bytes test/data/test_file_2.nc | Bin 0 -> 16702 bytes tests/test_analyse/test_sample_analyser.py | 3 +- .../test_MissingCoordCheck.py | 17 +- tests/test_fixes/esmval_test_fixes/cl.json | 43 ++++ .../esmval_test_fixes/cl_fix_ds_list.txt | 13 ++ .../esmval_test_fixes/cl_template.json | 43 ++++ tests/test_fixes/esmval_test_fixes/gpp.json | 14 ++ .../esmval_test_fixes/gpp_fix_ds_list.txt | 38 ++++ .../esmval_test_fixes/gpp_template.json | 14 ++ tests/test_fixes/esmval_test_fixes/o2.json | 17 ++ .../esmval_test_fixes/o2_fix_ds_list.txt | 23 ++ .../esmval_test_fixes/o2_template.json | 17 ++ tests/test_fixes/test_coord_fixes.py | 44 +++- tests/test_fixes/test_fix_class.py | 30 ++- tests/test_fixes/test_fix_store.py | 4 +- tests/test_fixes/test_process_fixes.py | 104 +++++---- tests/test_fixes/test_propose_fixes.py | 207 ++++++++++++++++++ tests/test_scan/test_scan.py | 4 +- tests/test_scan/test_scan_datasets.py | 29 +-- tests/test_utils/test_base_dirs.py | 86 ++++++-- 39 files changed, 1070 insertions(+), 216 deletions(-) create mode 100644 dachar/fixes/array_fixes.py create mode 100644 dachar/fixes/attr_fixes.py create mode 100644 dachar/fixes/generate_proposals.py create mode 100644 dachar/fixes/var_fixes.py create mode 100644 test/data/test_file.nc create mode 100644 test/data/test_file_2.nc create mode 100644 tests/test_fixes/esmval_test_fixes/cl.json create mode 100644 tests/test_fixes/esmval_test_fixes/cl_fix_ds_list.txt create mode 100644 tests/test_fixes/esmval_test_fixes/cl_template.json create mode 100644 tests/test_fixes/esmval_test_fixes/gpp.json create mode 100644 tests/test_fixes/esmval_test_fixes/gpp_fix_ds_list.txt create mode 100644 tests/test_fixes/esmval_test_fixes/gpp_template.json create mode 100644 tests/test_fixes/esmval_test_fixes/o2.json create mode 100644 tests/test_fixes/esmval_test_fixes/o2_fix_ds_list.txt create mode 100644 tests/test_fixes/esmval_test_fixes/o2_template.json create mode 100644 tests/test_fixes/test_propose_fixes.py diff --git a/MANIFEST.in b/MANIFEST.in index 49bc7b31..fb13caf9 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -10,5 +10,6 @@ recursive-include tests * recursive-include etc * recursive-exclude * __pycache__ recursive-exclude * *.py[co] +recursive-include etc * recursive-include docs *.md conf.py Makefile make.bat *.jpg *.png *.gif diff --git a/dachar/analyse/checks/_base_check.py b/dachar/analyse/checks/_base_check.py index 460ac013..cfb15a1d 100644 --- a/dachar/analyse/checks/_base_check.py +++ b/dachar/analyse/checks/_base_check.py @@ -1,13 +1,13 @@ -import logging import pprint as pp -from dachar.utils import UNDEFINED, nested_lookup, JDict - -from dachar.utils.get_stores import get_fix_prop_store, get_dc_store - -from dachar.fixes.fix_api import get_fix - +from dachar import __version__ as version from dachar import logging +from dachar.fixes.fix_api import get_fix +from dachar.utils import JDict +from dachar.utils import nested_lookup +from dachar.utils import UNDEFINED +from dachar.utils.get_stores import get_dc_store +from dachar.utils.get_stores import get_fix_prop_store LOGGER = logging.getLogger(__file__) @@ -53,6 +53,13 @@ class _BaseCheck(object): typical_threshold = 0.41 atypical_threshold = 0.15 + source = { + "name": "dachar", + "version": f"{version}", + "comment": "No specific source provided - link to all fixes in dachar", + "url": "https://github.com/roocs/dachar/tree/master/dachar/fixes", + } + def __init__(self, sample): self.sample = sample self._load() @@ -124,12 +131,10 @@ def _extract_content(self): content = [] for ds_id in self.sample: - items = dict( - [ - (key, nested_lookup(key, self._cache[ds_id], must_exist=True)) - for key in self.characteristics - ] - ) + items = { + key: nested_lookup(key, self._cache[ds_id], must_exist=True) + for key in self.characteristics + } content.append((ds_id, items)) return content @@ -144,5 +149,3 @@ def deduce_fix(self, ds_id, atypical_content, typical_content): def _propose_fix(self, ds_id, fix): get_fix_prop_store().propose(ds_id, fix) - - diff --git a/dachar/analyse/checks/coord_checks.py b/dachar/analyse/checks/coord_checks.py index f66612ae..0f3c9146 100644 --- a/dachar/analyse/checks/coord_checks.py +++ b/dachar/analyse/checks/coord_checks.py @@ -1,17 +1,29 @@ +import pprint +from collections import Counter +from collections import namedtuple +from itertools import chain + +from scipy.stats import mode + +from dachar import __version__ as version from dachar.analyse.checks._base_check import _BaseCheck from dachar.fixes.fix_api import get_fix -from dachar.utils.common import get_extra_items_in_larger_sequence, coord_mappings from dachar.utils import nested_lookup -from scipy.stats import mode -from collections import Counter, namedtuple -from itertools import chain -import pprint +from dachar.utils.common import coord_mappings +from dachar.utils.common import get_extra_items_in_larger_sequence class RankCheck(_BaseCheck): characteristics = ["data.dim_names", "data.shape"] associated_fix = "SqueezeDimensionsFix" + source = { + "name": "dachar", + "version": f"{version}", + "comment": "", + "url": "https://github.com/roocs/dachar/blob/master/dachar/fixes/coord_fixes.py#L8", + } + def deduce_fix(self, ds_id, atypical_content, typical_content): dicts = [] @@ -38,7 +50,7 @@ def deduce_fix(self, ds_id, atypical_content, typical_content): operands = {"dims": extra_coords} - fix = fix_cls(ds_id, **operands) + fix = fix_cls(ds_id, source=self.source, **operands) d = fix.to_dict() dicts.append(d) return dicts @@ -51,10 +63,18 @@ def deduce_fix(self, ds_id, atypical_content, typical_content): # TODO: Need to change this so that characteristic compared is coord_type # But characteristic used to create new variable is id + class MissingCoordCheck(_BaseCheck): characteristics = ["coordinates.*.id"] associated_fix = "AddScalarCoordFix" + source = { + "name": "dachar", + "version": f"{version}", + "comment": "", + "url": "https://github.com/roocs/dachar/blob/master/dachar/fixes/coord_fixes.py#L44", + } + def deduce_fix(self, ds_id, atypical_content, typical_content): dicts = [] @@ -112,7 +132,7 @@ def deduce_fix(self, ds_id, atypical_content, typical_content): operands["attrs"] = operand_dict - fix = fix_cls(ds_id, **operands) + fix = fix_cls(ds_id, source=self.source, **operands) d = fix.to_dict() # coordinate isn't scalar - fix isn't suitable diff --git a/dachar/cli.py b/dachar/cli.py index 7ca1b44b..0c9c2dc6 100644 --- a/dachar/cli.py +++ b/dachar/cli.py @@ -2,8 +2,6 @@ import os import shutil -from dachar.utils._stores_for_tests import _TestFixProposalStore - """Console script for dachar.""" __author__ = """Elle Smith""" @@ -18,6 +16,10 @@ from dachar.analyse.sample_analyser import analyse from dachar.fixes import fix_processor from dachar.fixes.fix_processor import process_all_fixes +from dachar.fixes.generate_proposals import ( + generate_fix_proposals, + generate_proposal_from_template, +) from unittest.mock import Mock @@ -41,8 +43,10 @@ def _get_arg_parser_scan(parser): :return: Namespace object built from attributes parsed from command line. """ # parser = argparse.ArgumentParser() - project_options = [_.split(':')[1] for _ in CONFIG.keys() if _.startswith('project:')] - location_options = CONFIG['dachar:settings']['locations'] + project_options = [ + _.split(":")[1] for _ in CONFIG.keys() if _.startswith("project:") + ] + location_options = CONFIG["dachar:settings"]["locations"] parser.add_argument( "project", @@ -130,8 +134,10 @@ def scan_main(args): def _get_arg_parser_analyse(parser): - project_options = [_.split(':')[1] for _ in CONFIG.keys() if _.startswith('project:')] - location_options = CONFIG['dachar:settings']['locations'] + project_options = [ + _.split(":")[1] for _ in CONFIG.keys() if _.startswith("project:") + ] + location_options = CONFIG["dachar:settings"]["locations"] parser.add_argument( "project", @@ -200,7 +206,7 @@ def _get_arg_parser_process_fixes(parser): type=str, default=None, required=True, - help="Action to carry out on fixes: process for proposed fixes, withdraw to withdraw" + help="Action to carry out on fixes: process for proposed fixes, withdraw to withdraw " "existing fixes", ) @@ -218,6 +224,71 @@ def process_fixes_main(args): process_all_fixes(action, ds_ids) +def _get_arg_parser_propose_fixes(parser): + + parser.add_argument( + "-f", + "--files", + type=str, + default=None, + required=False, + help="List of comma-separated json files containing information to generate fix proposals. " + "This option must be used on its own", + ) + + parser.add_argument( + "-d", + "--dataset-list", + type=str, + default=None, + required=False, + help="Text file containing dataset ids for which to propose the fix provided in the template. " + "If using this option you must provide a template using --template (-t) option.", + ) + + parser.add_argument( + "-t", + "--template", + type=str, + default=None, + required=False, + help="Template for fix proposal. " + "If using this option you must provide a list of dataset ids using the --dataset-list (-d) option.", + ) + + return parser + + +def parse_args_propose_fixes(args): + + if args.files: + if args.dataset_list or args.template: + raise Exception( + "The file option must be used on its own. " + "A dataset list and a template must be provided together. " + ) + + if args.dataset_list and not args.template: + raise Exception("A dataset list and a template must be provided together.") + + if args.template and not args.dataset_list: + raise Exception("A dataset list and a template must be provided together.") + + files = _to_list(args.files) + ds_list = args.dataset_list + template = args.template + return files, ds_list, template + + +def propose_fixes_main(args): + files, ds_list, template = parse_args_propose_fixes(args) + + if files: + generate_fix_proposals(files) + elif ds_list and template: + generate_proposal_from_template(template, ds_list) + + def main(): """Console script for dachar.""" main_parser = argparse.ArgumentParser() @@ -235,9 +306,13 @@ def main(): _get_arg_parser_process_fixes(fix_parser) fix_parser.set_defaults(func=process_fixes_main) + fix_proposal_parser = subparsers.add_parser("propose-fixes") + _get_arg_parser_propose_fixes(fix_proposal_parser) + fix_proposal_parser.set_defaults(func=propose_fixes_main) + args = main_parser.parse_args() args.func(args) if __name__ == "__main__": - sys.exit(main()) # pragma: no cover + sys.exit(main()) diff --git a/dachar/fixes/__init__.py b/dachar/fixes/__init__.py index 6ac66023..6d278e05 100644 --- a/dachar/fixes/__init__.py +++ b/dachar/fixes/__init__.py @@ -1,2 +1,8 @@ -from .fix_proposal_store import LocalFixProposalStore, ElasticFixProposalStore -from .fix_store import LocalFixStore, ElasticFixStore +from .array_fixes import * +from .attr_fixes import * +from .coord_fixes import * +from .fix_proposal_store import ElasticFixProposalStore +from .fix_proposal_store import LocalFixProposalStore +from .fix_store import ElasticFixStore +from .fix_store import LocalFixStore +from .var_fixes import * diff --git a/dachar/fixes/_base_fix.py b/dachar/fixes/_base_fix.py index 0226ac36..d07f4a52 100644 --- a/dachar/fixes/_base_fix.py +++ b/dachar/fixes/_base_fix.py @@ -1,5 +1,4 @@ from dachar.utils.common import UNDEFINED -from dachar import __version__ as version class FixDetails(object): @@ -51,7 +50,7 @@ class _BaseDatasetFix(object): """ - def __init__(self, ds_id, source=f"dachar version {version}", **operands): + def __init__(self, ds_id, source, **operands): self.ds_id = ds_id self.source = source self.operands = operands diff --git a/dachar/fixes/array_fixes.py b/dachar/fixes/array_fixes.py new file mode 100644 index 00000000..29724381 --- /dev/null +++ b/dachar/fixes/array_fixes.py @@ -0,0 +1,20 @@ +from dachar.fixes._base_fix import _BaseDatasetFix +from dachar.utils.common import UNDEFINED + +__all__ = ["MaskDataFix"] + + +class MaskDataFix(_BaseDatasetFix): + fix_id = "MaskDataFix" + title = "Apply Mask to Data" + description = """ +Masks data equal to a given value. + +For example: + - inputs: + - {'value': '1.0e33'} +""" + category = "array_fixes" + required_operands = ["value"] + ref_implementation = "daops.data_utils.array_utils.mask_data" + process_type = "post_processor" diff --git a/dachar/fixes/attr_fixes.py b/dachar/fixes/attr_fixes.py new file mode 100644 index 00000000..e19308e0 --- /dev/null +++ b/dachar/fixes/attr_fixes.py @@ -0,0 +1,59 @@ +from dachar.fixes._base_fix import _BaseDatasetFix +from dachar.utils.common import UNDEFINED + +__all__ = ["MainVarAttrFix", "AttrFix"] + + +class MainVarAttrFix(_BaseDatasetFix): + fix_id = "MainVarAttrFix" + title = "Apply Fix to Attributes of Main Variable" + description = """ +"Applies metadata fix e.g. fixing standard name or adding missing standard name + for the main variable of the dataset. + +Takes a list of fixes with each fix as a dictionary containing the attribute +to be changed as the key and what the value should be as the value e.g.: + +{"long_name": "Dissolved Oxygen Concentration"}, +{"standard_name": "mole_concentration_of_dissolved_molecular_oxygen_in_sea_water"} + +For example: + - inputs: + {"attrs": [ + {"long_name": "Dissolved Oxygen Concentration"}, + {"standard_name": "mole_concentration_of_dissolved_molecular_oxygen_in_sea_water"} + ] + }, +""" + category = "attr_fixes" + required_operands = ["attrs"] + ref_implementation = "daops.data_utils.attr_utils.fix_attr_main_var" + process_type = "post_processor" + + +class AttrFix(_BaseDatasetFix): + fix_id = "AttrFix" + title = "Apply Fix to Attributes of any Variable" + description = """ +"Applies metadata fix e.g. fixing standard name or adding missing standard name + for a given variable of the dataset. + +Takes a list of fixes with each fix as a dictionary containing the attribute +to be changed as the key and what the value should be as the value e.g.: + +{"long_name": "Dissolved Oxygen Concentration"}, +{"standard_name": "mole_concentration_of_dissolved_molecular_oxygen_in_sea_water"} + +For example: + - inputs: + {"var_id": "lev", + "attrs": [ + {"long_name": "Dissolved Oxygen Concentration"}, + {"standard_name": "mole_concentration_of_dissolved_molecular_oxygen_in_sea_water"} + ] + }, +""" + category = "attr_fixes" + required_operands = ["var_id", "attrs"] + ref_implementation = "daops.data_utils.attr_utils.fix_attr" + process_type = "post_processor" diff --git a/dachar/fixes/coord_fixes.py b/dachar/fixes/coord_fixes.py index 81afc319..776c9274 100644 --- a/dachar/fixes/coord_fixes.py +++ b/dachar/fixes/coord_fixes.py @@ -1,8 +1,7 @@ -from dachar.utils.common import UNDEFINED - from dachar.fixes._base_fix import _BaseDatasetFix +from dachar.utils.common import UNDEFINED -__all__ = ["SqueezeDimensionsFix", "AddScalarCoordFix"] +__all__ = ["SqueezeDimensionsFix", "AddScalarCoordFix", "ReverseCoordFix"] class SqueezeDimensionsFix(_BaseDatasetFix): @@ -63,3 +62,25 @@ class AddScalarCoordFix(_BaseDatasetFix): required_operands = ["dtype", "id", "value", "length", "attrs", "coord_type"] ref_implementation = "daops.data_utils.coord_utils.add_scalar_coord" process_type = "post_processor" + + +class ReverseCoordFix(_BaseDatasetFix): + fix_id = "ReverseCoordFix" + title = "Reverse data of Coordinates" + description = """ +"Reverses the order of the data of a given coordinate + +Takes as an input the name of the coordinate/s to be reversed: + +For example: + - inputs: + {"coords": [ + "lat", + "lev" + ] + }, +""" + category = "coord_fixes" + required_operands = ["coords"] + ref_implementation = "daops.data_utils.coord_utils.reverse_coords" + process_type = "post_processor" diff --git a/dachar/fixes/fix_api.py b/dachar/fixes/fix_api.py index 22419ac6..1640c68c 100644 --- a/dachar/fixes/fix_api.py +++ b/dachar/fixes/fix_api.py @@ -1,8 +1,11 @@ -import os import glob +import os from pydoc import locate +import dachar.fixes.array_fixes as array_fixes +import dachar.fixes.attr_fixes as attr_fixes import dachar.fixes.coord_fixes as coord_fixes +import dachar.fixes.var_fixes as var_fixes def get_fix_modules(): @@ -27,7 +30,7 @@ def get_fix_dict(): def get_fix(fix_id): if fix_id: - fix_cls = locate(f"dachar.fixes.coord_fixes.{fix_id}") + fix_cls = locate(f"dachar.fixes.{fix_id}") return fix_cls diff --git a/dachar/fixes/fix_processor.py b/dachar/fixes/fix_processor.py index 1b1d36a4..a2db0680 100644 --- a/dachar/fixes/fix_processor.py +++ b/dachar/fixes/fix_processor.py @@ -1,6 +1,8 @@ -from dachar.utils.get_stores import get_fix_prop_store, get_fix_store import pprint +from dachar.utils.get_stores import get_fix_prop_store +from dachar.utils.get_stores import get_fix_store + def get_proposed_fixes(ds_ids=None): if ds_ids is None: @@ -9,9 +11,10 @@ def get_proposed_fixes(ds_ids=None): proposed_fixes = [] for ds_id in ds_ids: - proposed_fix = get_fix_prop_store().get_proposed_fix_by_id(ds_id) - if proposed_fix is not None: - proposed_fixes.append(proposed_fix) + proposed_fix_list = get_fix_prop_store().get_proposed_fix_by_id(ds_id) + if proposed_fix_list is not None: + for fix in proposed_fix_list: + proposed_fixes.append(fix) return proposed_fixes @@ -19,11 +22,12 @@ def get_proposed_fixes(ds_ids=None): def process_proposed_fixes(proposed_fixes): if len(proposed_fixes) > 0: for proposed_fix in proposed_fixes: - fix = proposed_fix["fixes"][0]["fix"] ds_id = proposed_fix["dataset_id"] + fix = proposed_fix["this_fix"]["fix"] # print fix so user can see what they are processing - pprint.pprint(proposed_fix) + pprint.pprint(ds_id) + pprint.pprint(fix) action = input("Enter action for proposed fix: ") @@ -41,7 +45,7 @@ def process_proposed_fixes(proposed_fixes): else: # print('[INFO] You have not selected an action for this fix.') - pass + continue else: raise Exception("No proposed fixes found.") @@ -61,7 +65,7 @@ def get_fixes_to_withdraw(ds_ids): def process_withdraw_fixes(existing_fixes): if len(existing_fixes) > 0: for existing_fix in existing_fixes: - fix = existing_fix["fixes"][0] + # fix = existing_fix["fixes"][0] ds_id = existing_fix["dataset_id"] # print fix so user can see what they are processing diff --git a/dachar/fixes/fix_proposal_store.py b/dachar/fixes/fix_proposal_store.py index 9a7a8a69..c0c53f28 100644 --- a/dachar/fixes/fix_proposal_store.py +++ b/dachar/fixes/fix_proposal_store.py @@ -1,8 +1,9 @@ from copy import deepcopy -from dachar.utils.common import now_string -from dachar.utils.json_store import _LocalBaseJsonStore, _ElasticSearchBaseJsonStore from dachar import CONFIG +from dachar.utils.common import now_string +from dachar.utils.json_store import _ElasticSearchBaseJsonStore +from dachar.utils.json_store import _LocalBaseJsonStore class BaseFixProposalStore(object): @@ -32,9 +33,9 @@ class BaseFixProposalStore(object): 'reason': '', 'status': 'proposed', 'timestamp': '2020-04-29T14:41:52'}]} - - - Are title, description, category, ref_implementation needed here? + + + Are title, description, category, ref_implementation needed here? Should ncml be in fix? """ @@ -106,11 +107,15 @@ def _action_fix(self, ds_id, fix, status, reason=""): def get_proposed_fix_by_id(self, ds_id): # go through fixes and return if status is proposed + proposed_fixes = [] + if self.exists(ds_id): content = self.get(ds_id) for this_fix in content["fixes"]: if this_fix["status"] == "proposed": - return content + proposed_fixes.append({"dataset_id": ds_id, "this_fix": this_fix}) + + return proposed_fixes def get_proposed_fixes(self): # go through fixes and return if status is proposed @@ -121,7 +126,9 @@ def get_proposed_fixes(self): content = self.get(ds_id) for this_fix in content["fixes"]: if this_fix["status"] == "proposed": - proposed_fixes.append(content) + proposed_fixes.append( + {"dataset_id": ds_id, "this_fix": this_fix} + ) return proposed_fixes @@ -146,7 +153,7 @@ class ElasticFixProposalStore(BaseFixProposalStore, _ElasticSearchBaseJsonStore) config = { "store_type": "elasticsearch", - "index": CONFIG['elasticsearch']["fix_proposal_store"], - "api_token": CONFIG['dachar:settings']['elastic_api_token'], + "index": CONFIG["elasticsearch"]["fix_proposal_store"], + "api_token": CONFIG["dachar:settings"]["elastic_api_token"], "id_type": "dataset_id", } diff --git a/dachar/fixes/generate_proposals.py b/dachar/fixes/generate_proposals.py new file mode 100644 index 00000000..b0ba67f3 --- /dev/null +++ b/dachar/fixes/generate_proposals.py @@ -0,0 +1,78 @@ +import json +from pydoc import locate + +from dachar.utils.get_stores import get_fix_prop_store + + +def flatten_proposal(d): + keys = [] + for k, v in d.items(): + keys.append(k) + if isinstance(v, list): + for k1, v1 in v[0].items(): + keys.append(k1) + if k1 == "source": + for k2, v2 in v1.items(): + keys.append(k2) + return keys + + +def validate_proposal(proposal): + required = [ + "dataset_id", + "fixes", + "fix_id", + "operands", + "source", + "name", + "version", + "comments", + "url", + ] + + existing = flatten_proposal(proposal) + + missing = set(required).difference(set(existing)) + invalid = set(existing).difference(set(required)) + + if missing: + raise KeyError(f"Required fields not provided: {missing}") + + if invalid: + raise KeyError(f"Invalid fields provided: {invalid}") + + +def generate_fix_proposals(files): + for file in files: + if isinstance(file, dict): + proposal = file + else: + with open(file) as f: + proposal = json.load(f) + + validate_proposal(proposal) + ds_id = proposal.get("dataset_id") + for prop in proposal.get("fixes"): + fix_id = prop.get("fix_id") + fix_cls = locate(f"dachar.fixes.{fix_id}") + + source, operands = prop.get("source"), prop.get("operands") + + fix = fix_cls(ds_id, source=source, **operands) + d = fix.to_dict() + get_fix_prop_store().propose(d["dataset_id"]["ds_id"], d["fix"]) + + +def generate_proposal_from_template(template, ds_list): + with open(template) as f: + proposal_template = json.load(f) + + proposals = [] + with open(ds_list, "r") as f1: + for line in f1: + ds_id = line.strip() + proposal_template = proposal_template.copy() + proposal_template["dataset_id"] = ds_id + proposals.append(proposal_template) + + generate_fix_proposals(proposals) diff --git a/dachar/fixes/var_fixes.py b/dachar/fixes/var_fixes.py new file mode 100644 index 00000000..44e6c912 --- /dev/null +++ b/dachar/fixes/var_fixes.py @@ -0,0 +1,25 @@ +from dachar.fixes._base_fix import _BaseDatasetFix +from dachar.utils.common import UNDEFINED + +__all__ = ["Reverse2DVarFix"] + + +class Reverse2DVarFix(_BaseDatasetFix): + fix_id = "Reverse2DVarFix" + title = "Reverse data of 2D Variables" + description = """ +"Reverses the order of the data of the given 2d variables + +Takes as an input the names of the variables to be reversed +as a list: + +For example: + - inputs: + { + "var_ids": ["a_bnds", "b_bnds"] + }, +""" + category = "var_fixes" + required_operands = ["var_ids"] + ref_implementation = "daops.data_utils.var_utils.reverse_2d_vars" + process_type = "post_processor" diff --git a/dachar/utils/create_index.py b/dachar/utils/create_index.py index ffa22676..55bf538f 100644 --- a/dachar/utils/create_index.py +++ b/dachar/utils/create_index.py @@ -2,51 +2,50 @@ Currently this script produces a index with today's date and creates an alias for it. There is a function to populate the elasticsearch store with the contents of the local store """ - -import sys -import os -import pathlib import hashlib import json +import os +import pathlib +import sys from datetime import datetime -from elasticsearch import Elasticsearch -from ceda_elasticsearch_tools.elasticsearch import CEDAElasticsearchClient -from dachar.config import ELASTIC_API_TOKEN -from dachar import CONFIG -from dachar.utils.get_stores import ( - get_fix_store, - get_fix_prop_store, - get_dc_store, - get_ar_store, -) +from ceda_elasticsearch_tools.elasticsearch import CEDAElasticsearchClient +from elasticsearch import Elasticsearch -from tests._stores_for_tests import ( - _TestFixProposalStore, - _TestFixStore, - _TestAnalysisStore, - _TestDatasetCharacterStore, +from dachar import CONFIG +from dachar.utils.get_stores import get_ar_store +from dachar.utils.get_stores import get_dc_store +from dachar.utils.get_stores import get_fix_prop_store +from dachar.utils.get_stores import get_fix_store + +# from tests._stores_for_tests import ( +# _TestFixProposalStore, +# _TestFixStore, +# _TestAnalysisStore, +# _TestDatasetCharacterStore, +# ) + +es = CEDAElasticsearchClient( + headers={"x-api-key": CONFIG["dachar:settings"]["elastic_api_token"]} ) -es = CEDAElasticsearchClient(headers={"x-api-key": ELASTIC_API_TOKEN}) - -# es.indices.delete(index="roocs-char-test", ignore=[400, 404]) +# es.indices.delete(index="roocs-fix-2020-10-12", ignore=[400, 404]) # print(es.indices.exists("roocs-char-test")) # es.indices.create("roocs-char-test") -date = datetime.today().strftime("%Y-%m-%d") +# date = datetime.today().strftime("%Y-%m-%d") # character store -char_name = CONFIG['elasticsearch']["character_store"] +char_name = CONFIG["elasticsearch"]["character_store"] # analysis store -a_name = CONFIG['elasticsearch']["analysis_store"] +a_name = CONFIG["elasticsearch"]["analysis_store"] # fix store -fix_name = CONFIG['elasticsearch']["fix_store"] +fix_name = CONFIG["elasticsearch"]["fix_store"] # fix proposal store -fix_prop_name = CONFIG['elasticsearch']["fix_proposal_store"] +fix_prop_name = CONFIG["elasticsearch"]["fix_proposal_store"] -def create_index_and_alias(name): +def create_index_and_alias(name, date): exists = es.indices.exists(f"{name}-{date}") if not exists: es.indices.create( @@ -54,7 +53,15 @@ def create_index_and_alias(name): ) # do I need to include a mapping - should be put in here alias_exists = es.indices.exists_alias(name=f"{name}", index=f"{name}-{date}") if not alias_exists: - es.indices.put_alias(index=f"{name}-{date}", name=f"{name}") + es.indices.update_aliases( + body={ + "actions": [ + {"remove": {"alias": f"{name}", "index": "*"}}, + {"add": {"alias": f"{name}", "index": f"{name}-{date}"}}, + ] + } + ) + # es.indices.put_alias(index=f"{name}-{date}", name=f"{name}") def populate_store(local_store, index, id_type): @@ -85,9 +92,9 @@ def populate_store(local_store, index, id_type): def main(): # for store in [char_name, a_name, fix_name, fix_prop_name]: - # create_index_and_alias(store) + create_index_and_alias(fix_name, "2020-10-12") - populate_store(_TestDatasetCharacterStore(), "roocs-char-2020-07-08", "dataset_id") + populate_store(get_fix_store(), "roocs-fix-2020-10-12", "dataset_id") if __name__ == "__main__": diff --git a/dachar/utils/json_store.py b/dachar/utils/json_store.py index e51012da..d7072d2d 100644 --- a/dachar/utils/json_store.py +++ b/dachar/utils/json_store.py @@ -1,12 +1,14 @@ -import json import hashlib +import json import os +from ceda_elasticsearch_tools.elasticsearch import CEDAElasticsearchClient +from elasticsearch import Elasticsearch +from elasticsearch import helpers from .common import nested_lookup -from elasticsearch import Elasticsearch, helpers -from ceda_elasticsearch_tools.elasticsearch import CEDAElasticsearchClient -from dachar import CONFIG, logging +from dachar import CONFIG +from dachar import logging LOGGER = logging.getLogger(__file__) @@ -237,7 +239,7 @@ class _ElasticSearchBaseJsonStore(_BaseJsonStore): config = { "store_type": "elasticsearch", "index": "", - "api_token": CONFIG['dachar:settings']['elastic_api_token'], + "api_token": CONFIG["dachar:settings"]["elastic_api_token"], "id_type": "id", } @@ -288,7 +290,7 @@ def _save(self, id, content): self.es.index(index=self.config.get("index"), body=content, id=id) - self._map(drs_id, reverse=True) # + self._map(drs_id, reverse=True) self.es.update( index=self.config.get("index"), id=id, @@ -332,7 +334,7 @@ def _search_fields(self, fields, term, query_type): results.append(each["_source"]) # ensure there are no duplicates of the same result - return list(dict((v[self.config.get("id_type")], v) for v in results).values()) + return list({v[self.config.get("id_type")]: v for v in results}.values()) def _search_all(self, term): @@ -347,7 +349,7 @@ def _search_all(self, term): results.append(each["_source"]) # ensure there are no duplicates of the same result - return list(dict((v[self.config.get("id_type")], v) for v in results).values()) + return list({v[self.config.get("id_type")]: v for v in results}.values()) def _field_requirements(self, fields, term, query_type): @@ -361,14 +363,16 @@ def search(self, term, exact=False, match_ids=True, fields=None): if isinstance(term, float) or isinstance(term, int): exact = True - LOGGER.info(f"Must search for exact value when the search term is a number, " - f"Changing search to exact=True" + LOGGER.info( + f"Must search for exact value when the search term is a number, " + f"Changing search to exact=True" ) if isinstance(term, str) and " " in term and exact is False: - LOGGER.info(f"Ensure the case of your search term is correct as this type of " - f"search is case sensitive. If you are not sure of the correct case change " - f"your search term to a one word search or use exact=True." + LOGGER.info( + f"Ensure the case of your search term is correct as this type of " + f"search is case sensitive. If you are not sure of the correct case change " + f"your search term to a one word search or use exact=True." ) if match_ids is True and exact is True: diff --git a/environment.yml b/environment.yml index 45bc22cf..3f24a839 100644 --- a/environment.yml +++ b/environment.yml @@ -8,6 +8,6 @@ dependencies: - cftime>=1.0.4 - netCDF4>=1.4 - dask>=2.6.0 - - scipy>=1.6.2 + - scipy>=1.5.4 - roocs-utils>=0.4.2 - clisops>=0.6.4 diff --git a/requirements.txt b/requirements.txt index 5bfd394e..0580e4b6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ xarray>=0.15 cftime>=1.0.4 netCDF4>=1.4 dask[complete]>=2.6 -scipy>=1.6.2 +scipy>=1.5.4 elasticsearch~=7.6.0 roocs-utils>=0.4.2 clisops>=0.6.4 diff --git a/test/data/test_file.nc b/test/data/test_file.nc new file mode 100644 index 0000000000000000000000000000000000000000..0afe76ae93ee337487656942f382bbf53f469ba1 GIT binary patch literal 16702 zcmeI&dzg*&-pBFZni&ihhBQcaqtcEXMvY2s3=563O>Mi$hgzzsXqF zb+5@eUAxz+aYPO0Z>gy+#ihl+4F7Y0TgT49J>hZaL&Gl)(SA8y2P8%JBpn_10 zONxr~OUerJOLhNY8vjz`FG9UIzofLFsIbF{EerF@hUE?Kkk$I+)+e0L?Xd+%=8Dkg-x#SpS*6yQSle*T!w3q>KcsGBdSEb zBqb$9f?cY-JSElN6CMTU-Kk~#>D=xhEcjtivb?nd7Z?e8Y z{MNNcpKg5z_UPAl$iP_V-rZdESZ@(mCBCX}=RV#1!~81$dcswsa?4y)d*FX;wOus6 zqw$T(>RK?mAWx4>OAXH;Lr1@?j32m#O+JqueBUa92e`2R+kI=&DtN;GC;L`Z_&fJ4 z*!gK~mhX3N-DYQNH#@glU8Sy2m#RzD#cH{_K%KA7Rp+R))tTyab(%U^oup1w$7{xE zF4c_D6l+FmhHG*)gEfOR12la!IhvTJqo%zkOVdWvLeor>p=qp1*VNTSHPtjpn*9$u zSE<>j*`?W`*{0d3S*KZ{Dc8)=Ow)|l6l(@+ax__*3{6y1ssBl~Y05RlnhgK-CKtbz zchYgT?$?xSwrMIg(aySGlcmYg4AvBD#%rc&=4i?_D>Umg8#UWBJ2bmA`!toB{hFjM zIxkIBQ&*F&X{^c6G}E-uw9#Z~+G{#$VwxOHAI$*GAkAP+u4cGqq^4LiMsulVoMwC% z{hZWE>ST4AI$fQq&Q|BBbJh9k0<~OStS(WPsw>o0>S}eZx=!P7mD(e}KkGDpZ_;&6 zeqRP?{GLqG`2AR-@q4jF-lu<)EUougf%>in`Ns5-ZbDY{SRl|4(xI;VF0&F}Ei*<|=B$TzNF6bBwv6Y4UpV`to#n19>BPV|f$#QSzqpW8}@`$H|YE zw~)7#x0YwhPn2iL+sRLnx0jzT?;!6a?<|kWyUM%EbL2hcz2$x7{pADX1LbGQ2g%Qt zpDRC4K17}?KVP0FA1)suzfe9(UMMe?UnDP;kC9&@A1l91euaEo%oUsNyE5kbkJtSZ zVlH!{?5dbcyIMXe<|;LNu8Fzrll44PVs6b;J=e6Dn{ln4Z@SiBr}Z*59V}OSE3I z=XR}Ms`bmXe!14K(E62Hze?-x(E2;Iezn%G(fYMof0x$Zt@VjpVh8@1I}p5Y(i_eH z@_utva&R_b!}m>{^xn+hvRzh9mvumo3f@nq)(Rf~Vi1=Se-|1&*59@8jMRF*$&uYW za%f|J?0$adS@r+=ooBN{z4NT1?;44JZ|BMiM&}1#R;%Y4^^4{7s+#VPdGoBVlJ(}a z7Z0!6Ah0AK-BSGW4K;&<84br2_$LT9WY`J8y}N?^&bMj)8~L{w$$G)T8}BiN1!bkW z*T02sstHeIMA4X%(u~rA!eRLtnVFewTgCs?m#4-5ZJily>)$~~T=n=n@nN~6@(c5F zOY{u!n}53Mn(*l^D9OK|sH7mbaIp0`$3J4~djD6rtDrV5IL^K{sGE*cyAGd$&%~+C z!e`^xEHxDX2@vsiojm@TxeqYIrJM9j8_UuZc%-YPIm%cpaQt8eSK#hf_NYua6&&Q%lDi z;0f|AJHNfOo_@;ncG6&UhD`S`6=s zcf+Z5$IrlXaB4m9o_H^uT5r4$-WR9V5ATov6{j`;ABdlcQ#%X)8$Jl9_ILbj{2ZLx zx%goGJe=A;@FDn6oLVk^K0XYmmWSu#!*OaC;3Mz?oZ5x>NPHAdZ8Tnp7va>3@r&>h zoLVVfhL6FiU5sCX{}ZP+7QYm~45xNEeg*z7oZ2|t@GEg@IJLX*yYY26wR`Y; z@%wOU>+$>X2XJZ|@CWgSaB3UzO?U-P?O}W~z6GcD2)-466sPtWz72mIr}hN?B>ogm z?P>fO{8^mZcKkVf2TtvI`~`d`PVGhfCH!Ta+AH|0_-i<|UHI$xZk*a1_#S*OPVG(n zE&Oeq+B^6@{9T;dd-(hK2ROA4@sIG2acZC7pW>A`wa@U+@h@;{U*cckU*pui!N0}7 z!>R4ZzsG;Tsr`umgdf1Exd-C!SFfygQwwjaccGO!|?hzwZrjrya7(FA>If-0;kp(KN4?(Q~MKs6rO=oYl@D6xKoLVP58}E!$>w?Gdt~j-Bcz65^oLUav1Mi7b>xK8m`{302;{EXcIJLjx z1Mq=3wKMUv@W0{I2H}6l&&H{pgP)5J#;Ki${{tU_QyYrs;^*VkhT(a5K2B{oegQrL zr&fSph>ygnjlxIcg*de$ycoX-r&fZO;$=9sG5E#!B{;Qz;$!hkacY<0m*ZF9)c%E! z!wsi)B|aXXfK!`@Uxi3M8eiJ?)r*<>G0AGkxy9F=D7vaR4SpT}RosqMsH#9zXxy^Ozt zzlu|P4c~>oj#Jx>zk%<;sqMwz#NWcHy^X(v@58CRi@%4zk5l^q{}BHOr}i=a3H~Wg ztrGtX{~V|G1^y-e6;ADI{2TmRoZ5Hze*Al!+7I}T_)j>s1Gw7|f1m0iK`W>w1;^RR zcnVIf3SJejhEq$$tK&6rYBljFUJIvI8?S?>;neEl_3*=RYW4BM@pPP81H2*L2&Z-g z-dO$l66~q|N?Rm3xCj?sdJRAFSK(2&L){-an5w<Zt3n{uX`?GCU5iPKU=;^_a0+pWN!tt4f;RJ^$IxUtd{|uGgnN1$A7RUekbX-MbCw z);HE|Nbs7{_455|%F8XwEzK|U-|~OFg|9tNm$nQ2r&f9NFW>Rt%m3RaG}+DYhZr{e zS%uGXrTg#&e{SLbgd5nTu57zMJAe86k9Bt8+5WWirFOqQy94n*SUPvk@{f6T;h%_8 zR*rlp{5d9?*nz|j{H`51Bi65X{1=A*J*`{#%Om`i8-Btl{A2gq`@_Fh!u!LIEBU`# z{4Kmce4g-kPh<8zlHaQf9D4GIybaV_*J5b9Z2jzVh0jCkl2C54kUIUu>*-6 UNbEpj2NFAw*nz|j{HHtcZ|;=kKmY&$ literal 0 HcmV?d00001 diff --git a/test/data/test_file_2.nc b/test/data/test_file_2.nc new file mode 100644 index 0000000000000000000000000000000000000000..14b14044772714dbb18038e55e2e985dd1f81018 GIT binary patch literal 16702 zcmeI&cXSlx8prW>lMn+02)zgr5Ksg{kRqZ$U=!&p8pn@XhRJ-p$TXvggc9?45a1 zl}ZgNIq#8_hTHe7QD>dNMyxqK2-unqw73Gz=WSs-=x2>v6j-JuziRzG9 zIHE9Ld!{4>N06$m-&RI9+`LBn!+(C>BK{68=zn|O8ny9X@c-nzl@$NY^X8xVDQ%bh z;M|(6&Q@-9Zn?TtU98@tE>!2M^VGTO9CemDQ=OqsQ>Unt)a%s=>NToqF4v6FT&yY5 z4A%_RoU0kE$n%Z*`ZmiF-^KAvRPkm(sb888Cs{A zt%+#%Xp(y9y=l@lnVP|xB8_P#X=ZBXX%=hNXdcl#uGyh^UGu(XujYUz)YG{{O=V4r zrnaVmrm?1(rllrL(_YhA(^b<`(@WD|bBgA4&Dok;`KT@eygGC*sY1GC(8g1(sA`fYMqqetb zdz-eO)b=yl-l6TC+TNw@H?;k>w%^nChuZ!`+k3UWU)$ek`+IHwth%tvjSIVsgs@9Z z4C~KI*p*idyGZ4*n_ngDteU*Kyr#Uiyso^Syn(!-{0Mofys5mIyoLNIc}saKc^i3K zc{_P~c}ICCd1rYS`HAvw@(g)Td8Rx|-b>y`-cLS2K2Uy&JV$=2{B-$X`C0OFmY0Afi-HvOt-?d@4YP|NH z5O&kA)Be|o-N+lXW@6Z#JW1;&hh6#mgBwf+{Z zpQZJ;YW-}jpQH7+Y5iQSzg_F^(E52=uPMJ%>*s6zU0T0D>lbSM-CBQ-*59l3i?n{R z)-TcerCNWV)-Tiguh#lCTEA8vd&JJbKl2RupPTeS^WQ#i7RCEV z6EyhT)Loy=yhn!XP}y}jsJ;BplSx(l{a^9p5~80%{e8Ws=JiUd;Wa+An>}+6_qO$u zj?dbO|Lael%@6g-vx2^BDEhtibE9xXf&b-UHC+9H;okic>v-FIc-B|(`f%Eh-4pBj z7UxBe1aJ9<%KpamdZP=y7w}KWuvY%FZ~FP0AJe>Vl&l|*@p{Al2k+6vg=M4ktoISR zi6%IZ3ra?pj!GR>SUjvCH9bAOU7P6t`m#&(|FrbvcHSrGkgFK|Bt9&!sGvAMuT-xP zeRwZdQ4_q}@X~_eC8dRV#pl?7v-G^J*f>(}!)2`W>sI-VP+N_!!Pnx{?#Ca%*WuJ2 z#Mk2w;nW_+H{g%p)HdRq@Xa{22)+g1ic@2mbRs1#lb)4E8_?!4!IJMpQ+ju!n?H&AG{5_o7 z`}ha=hd8y5@ICm)IJHmkPw~%iYMX&K9dK$L@lN=0IJM6B@pu=U+6nlH zcvqZSH@rKZfm7>&_r$|EwM;w~HkWc(C-5Kb)z zKNUX>r*=Ai20j?4b|!unel||+9Q<7TJe=APJQvTysSU-4;rTeV0{nb@I8N;Xyb!+- zr*;uugpa_f72_rNNSxZmcqu*#r&flK#>e2)F2OIw$Kup3!!O6Lz^Pq{Uxi8jjaB4T_(psaPHi(D!MEVlw&IWCkKxp| z;g91_;MAVPpTeKUscpxf!JoycJ%>M!@4%_OfWL^pgj3szzl^_vQ`?2Viob?admVoR ze-o$n7QP#Q8>d!|zk|PvQ+p48AO8TS_96Zez6YoFG5!huDNgM({BwLSPVEbPAO0my zZ9o1M{xweR8~j`RJDl19{CoTdoZ650Px#L`wO{aqxZ524oEo7P^0zbM{JMCYS^{1H zPsFJmh9}__acY(D%6KwPtqNWhuZB}g!K>pnaB4O2T6k@oS{=MDUJs{MA8&vkj#F!h zH^Ps=sWrw^@g_L6rudO~Gn`s;yaj#~PVH#CC4LM}trgxHZ-Y}y!`tHNIJI{8v3Pr& zS_ix%-U+969Nrl}9;em?KLI}xr`8qkhIhxQW#B#Vo;bBIo{4AS)K0>C;k|Kceek|` zKb%^Bd;p$}QyYk%jGuy28-(ZJr{dI3!%xT0z^M(!&&1Edshy3VgP)62I}aa%=i=1z z@S*rHoLW9!fS-?38;)Oq7vj_|#4o~&aB3s)V!Q;WHWI%WFU6^i!prc{IJGhOCHSQ{ zwXyhR_~kgYEAT7vt8i*pN6V@5jHwseO%qgMW)t`wl;Ve~(l90sj&I38(fm{tJE( zr{*I5^^1^S7w0!ZEgnz6E8x@;@x$;WoLWV^5?&dnmW)@ytK!tE;VF1^oLUXMCSD7t zRvWK_*Tt#T!|US>aB7F+4e>@ewIlGxcq&e<3EmVx5~tP-Z;rRXsU3wMjkm<99fP;R zTjSK);AwbUoLV~G4nG#B)*kPGcf_f6!jHo{zvF@l&^Y= z^hj>vT}b@1E0*4o`A6qmKgWgB=o;^rmTg)l{r(bcuxETxN%6H@a}6%Ra(!BtU<*c! z?AFqMjj_!pz39C_@~Pf-$ko!lg!OcHrhhMCxTK^szpyy3EP9Wou3Z1|oH#NM^<;O@=fm4bS&)J@l8y+?2# zWUw7voes7Wwa?gX+qZegRUySYd)~dBzrV7cyk2*G3aYs>y{AE0CuI%F$_{4@@!wM& zy?yUJ<>!^1n)gxm$r+&yH?rzx9|Ap%MW!ngWtrU!S7Y@D&yRzJH2BI z{t#~9BxQ2iy`%FkfB&Uf$L5_eXay`WH*b&Qbm|k1qHdaol|ueGvRU#u_^V zu`}?u&Oop5!2Z#HFuZ@K^$7m)2>!_p?l21e+WqnQ;Kxevd~myx_oKyo1kVSr6Z{(# zJRj`u{iyLC!Slh-T>n|;a;pVj#Tq*Uu`>`m1F`m1F'}, # {'fix_id': 'Fix2', 'operands': {'arg2': '2'}, 'ncml': ''} @@ -72,4 +73,5 @@ def test_withdraw_fix_1(): def teardown_module(): + # pass _clear_store() diff --git a/tests/test_fixes/test_process_fixes.py b/tests/test_fixes/test_process_fixes.py index 32d3adde..3ee80120 100644 --- a/tests/test_fixes/test_process_fixes.py +++ b/tests/test_fixes/test_process_fixes.py @@ -1,15 +1,15 @@ import os import shutil import subprocess +from unittest.mock import Mock + import mock import pytest -from tests._stores_for_tests import _TestFixProposalStore, _TestFixStore from dachar.fixes import fix_processor from dachar.utils.common import now_string - - -from unittest.mock import Mock +from tests._stores_for_tests import _TestFixProposalStore +from tests._stores_for_tests import _TestFixStore ds_ids = [ "ds.1.1.1.1.1.1", @@ -109,44 +109,40 @@ def test_get_2_proposed_fixes(): assert len(proposed_fixes) == 2 - assert (proposed_fixes[0]) == { + assert (proposed_fixes[1]) == { "dataset_id": "ds.1.1.1.1.1.1", - "fixes": [ - { - "fix": { - "category": "test_fixes", - "description": "Applies fix 1", - "fix_id": "Fix1", - "operands": {"arg1": "1"}, - "reference_implementation": "daops.test.test_fix1", - "title": "Apply Fix 1", - }, - "history": [], - "reason": "", - "status": "proposed", - "timestamp": now_string(), - } - ], + "this_fix": { + "fix": { + "category": "test_fixes", + "description": "Applies fix 1", + "fix_id": "Fix1", + "operands": {"arg1": "1"}, + "reference_implementation": "daops.test.test_fix1", + "title": "Apply Fix 1", + }, + "history": [], + "reason": "", + "status": "proposed", + "timestamp": now_string(), + }, } - assert proposed_fixes[1] == { + assert proposed_fixes[0] == { "dataset_id": "ds.2.1.1.1.1.1", - "fixes": [ - { - "fix": { - "category": "test_fixes", - "description": "Applies fix 2", - "fix_id": "Fix2", - "operands": {"arg2": "2"}, - "reference_implementation": "daops.test.test_fix2", - "title": "Apply Fix 2", - }, - "history": [], - "reason": "", - "status": "proposed", - "timestamp": now_string(), - } - ], + "this_fix": { + "fix": { + "category": "test_fixes", + "description": "Applies fix 2", + "fix_id": "Fix2", + "operands": {"arg2": "2"}, + "reference_implementation": "daops.test.test_fix2", + "title": "Apply Fix 2", + }, + "history": [], + "reason": "", + "status": "proposed", + "timestamp": now_string(), + }, } @@ -165,22 +161,20 @@ def test_get_1_proposed_fixes(): assert len(proposed_fixes) == 1 assert proposed_fixes[0] == { "dataset_id": "ds.1.1.1.1.1.1", - "fixes": [ - { - "fix": { - "category": "test_fixes", - "description": "Applies fix 1", - "fix_id": "Fix1", - "operands": {"arg1": "1"}, - "reference_implementation": "daops.test.test_fix1", - "title": "Apply Fix 1", - }, - "history": [], - "reason": "", - "status": "proposed", - "timestamp": now_string(), - } - ], + "this_fix": { + "fix": { + "category": "test_fixes", + "description": "Applies fix 1", + "fix_id": "Fix1", + "operands": {"arg1": "1"}, + "reference_implementation": "daops.test.test_fix1", + "title": "Apply Fix 1", + }, + "history": [], + "reason": "", + "status": "proposed", + "timestamp": now_string(), + }, } @@ -241,7 +235,7 @@ def test_withdraw_fix_not_found(): fix_processor.get_fix_store = Mock(return_value=f_store) with pytest.raises(Exception) as exc: fix_processor.process_all_fixes("withdraw", [ds_ids[1]]) - assert exc.value == "A fix could not be found." + assert exc.value.args[0] == "A fix could not be found." def teardown_module(): diff --git a/tests/test_fixes/test_propose_fixes.py b/tests/test_fixes/test_propose_fixes.py new file mode 100644 index 00000000..bb5a1f74 --- /dev/null +++ b/tests/test_fixes/test_propose_fixes.py @@ -0,0 +1,207 @@ +import os +import shutil +import subprocess +from unittest.mock import Mock + +import mock +import pytest + +from dachar.fixes import generate_proposals +from dachar.utils.common import now_string +from tests._stores_for_tests import _TestFixProposalStore + +prop_store = None +cwd = os.getcwd() + + +def clear_store(): + fp_dr = _TestFixProposalStore.config["local.base_dir"] + if os.path.isdir(fp_dr): + shutil.rmtree(fp_dr) + + +def setup_module(): + clear_store() + global prop_store + prop_store = _TestFixProposalStore() + + +def test_generate_proposal_json(): + file = [f"{cwd}/tests/test_fixes/esmval_test_fixes/o2.json"] + + generate_proposals.get_fix_prop_store = Mock(return_value=prop_store) + + generate_proposals.generate_fix_proposals(file) + record = prop_store.get_proposed_fix_by_id( + "cmip5.output1.MOHC.HadGEM2-CC.historical.yr.ocnBgchem.Oyr.r1i1p1.latest.o2" + ) + assert record[0]["this_fix"]["fix"]["fix_id"] == "MainVarAttrFix" + + +def test_generate_proposal_json_2_fixes(): + + file = [ + { + "dataset_id": "cmip5.output1.MOHC.HadGEM2-CC.historical.yr.ocnBgchem.Oyr.r1i1p1.latest.o2", + "fixes": [ + { + "fix_id": "MainVarAttrFix", + "operands": { + "attrs": [ + "long_name,Dissolved Oxygen Concentration", + "standard_name,mole_concentration_of_dissolved_molecular_oxygen_in_sea_water", + ] + }, + "source": { + "name": "", + "version": "", + "comments": "testing 2 fixes proposed externally", + "url": "", + }, + }, + { + "fix_id": "SqueezeDimensionsFix", + "operands": {"dims": ["test"]}, + "source": { + "name": "", + "version": "", + "comments": "testing 2 fixes proposed externally", + "url": "", + }, + }, + ], + } + ] + + generate_proposals.get_fix_prop_store = Mock(return_value=prop_store) + + generate_proposals.generate_fix_proposals(file) + record = prop_store.get_proposed_fix_by_id( + "cmip5.output1.MOHC.HadGEM2-CC.historical.yr.ocnBgchem.Oyr.r1i1p1.latest.o2" + ) + assert record[0]["this_fix"]["fix"]["fix_id"] == "MainVarAttrFix" + assert record[1]["this_fix"]["fix"]["fix_id"] == "SqueezeDimensionsFix" + + +def test_generate_proposal_template(): + ds_list = f"{cwd}/tests/test_fixes/esmval_test_fixes/o2_fix_ds_list.txt" + template = f"{cwd}/tests/test_fixes/esmval_test_fixes/o2_template.json" + + generate_proposals.get_fix_prop_store = Mock(return_value=prop_store) + + generate_proposals.generate_proposal_from_template(template, ds_list) + record = prop_store.get_proposed_fix_by_id( + "cmip5.output1.MOHC.HadGEM2-CC.rcp85.mon.ocnBgchem.Omon.r1i1p1.v20120531.o2" + ) + assert record[0]["this_fix"]["fix"]["fix_id"] == "MainVarAttrFix" + + +def test_generate_proposal_when_one_already_exists(): + file = [ + { + "dataset_id": "cmip5.output1.MOHC.HadGEM2-CC.rcp85.mon.ocnBgchem.Omon.r1i1p1.v20120531.o2", + "fixes": [ + { + "fix_id": "SqueezeDimensionsFix", + "operands": {"dims": ["test"]}, + "source": { + "name": "", + "version": "", + "comments": "testing 2 fixes proposed externally", + "url": "", + }, + } + ], + } + ] + + generate_proposals.get_fix_prop_store = Mock(return_value=prop_store) + + generate_proposals.generate_fix_proposals(file) + record = prop_store.get_proposed_fix_by_id( + "cmip5.output1.MOHC.HadGEM2-CC.rcp85.mon.ocnBgchem.Omon.r1i1p1.v20120531.o2" + ) + assert record[0]["this_fix"]["fix"]["fix_id"] == "MainVarAttrFix" + assert record[1]["this_fix"]["fix"]["fix_id"] == "SqueezeDimensionsFix" + + +def test_unexpected_operands(): + file = [ + { + "dataset_id": "cmip5.output1.MOHC.HadGEM2-CC.historical.yr.ocnBgchem.Oyr.r1i1p1.latest.o2", + "fixes": [ + { + "fix_id": "MainVarAttrFix", + "operands": {"test": ["not_real"]}, + "source": { + "name": "esmvaltool", + "version": "2.0.0", + "comments": "", + "url": "https://github.com/ESMValGroup/ESMValCore/blob/master/esmvalcore/cmor/_fixes/cmip5/hadgem2_cc.py#L34-L55", + }, + } + ], + } + ] + + generate_proposals.get_fix_prop_store = Mock(return_value=prop_store) + + with pytest.raises(KeyError) as exc: + generate_proposals.generate_fix_proposals(file) + assert exc.value.args[0] == "Required keyword argument(s) not provided: {'attrs'}" + + +def test_invalid_fields(): + file = [ + { + "dataset_id": "cmip5.output1.MOHC.HadGEM2-CC.historical.yr.ocnBgchem.Oyr.r1i1p1.latest.o2", + "fixes": [ + { + "fox_id": "MainVarAttrFix", + "operands": {"attrs": ["not_real"]}, + "source": { + "name": "esmvaltool", + "version": "2.0.0", + "comments": "", + "url": "https://github.com/ESMValGroup/ESMValCore/blob/master/esmvalcore/cmor/_fixes/cmip5/hadgem2_cc.py#L34-L55", + }, + } + ], + } + ] + + generate_proposals.get_fix_prop_store = Mock(return_value=prop_store) + + with pytest.raises(KeyError) as exc: + generate_proposals.generate_fix_proposals(file) + assert exc.value.args[0] == "Required fields not provided: {'fix_id'}" + + +def test_missing_fields(): + file = [ + { + "dataset_id": "cmip5.output1.MOHC.HadGEM2-CC.historical.yr.ocnBgchem.Oyr.r1i1p1.latest.o2", + "fixes": [ + { + "fix_id": "MainVarAttrFix", + "source": { + "name": "esmvaltool", + "version": "2.0.0", + "comments": "", + "url": "https://github.com/ESMValGroup/ESMValCore/blob/master/esmvalcore/cmor/_fixes/cmip5/hadgem2_cc.py#L34-L55", + }, + } + ], + } + ] + + generate_proposals.get_fix_prop_store = Mock(return_value=prop_store) + + with pytest.raises(KeyError) as exc: + generate_proposals.generate_fix_proposals(file) + assert exc.value.args[0] == "Required fields not provided: {'operands'}" + + +def teardown_module(): + # pass + clear_store() diff --git a/tests/test_scan/test_scan.py b/tests/test_scan/test_scan.py index 1894dc0c..8e60ae25 100644 --- a/tests/test_scan/test_scan.py +++ b/tests/test_scan/test_scan.py @@ -142,7 +142,7 @@ def test_varying_coords_example_fail(create_netcdf_file, create_netcdf_file_2): # seems to keep one variable but joins the coordinate lists together -@pytest.mark.skip(reason="Can't test for this shape when using test data") +@pytest.mark.xfail(reason="Can't test for this shape when using test data") def test_varying_coords_example_succeed(): """ Tests what happens when opening files as mfdataset for which the coordinates vary """ ds = xr.open_mfdataset( @@ -155,7 +155,7 @@ def test_varying_coords_example_succeed(): ) -@pytest.mark.skip( +@pytest.mark.xfail( reason="Exception was: Cannot compare type 'Timestamp' with type 'DatetimeProlepticGregorian'" ) def test_time_axis_types_issue(): diff --git a/tests/test_scan/test_scan_datasets.py b/tests/test_scan/test_scan_datasets.py index d915c052..dab0ae91 100644 --- a/tests/test_scan/test_scan_datasets.py +++ b/tests/test_scan/test_scan_datasets.py @@ -1,12 +1,14 @@ import json -import pytest import os + +import pytest import xarray as xr +from .test_check_files import make_nc_modify_var_attr +from dachar import CONFIG +from dachar import logging from dachar.scan.scan import scan_datasets from dachar.utils import switch_ds -from dachar import CONFIG, logging -from .test_check_files import make_nc_modify_var_attr LOGGER = logging.getLogger(__file__) @@ -20,23 +22,6 @@ class TestCorruptJson: - @pytest.mark.skip("This ds id no longer creates a corrupt JSON file") - def test_corrupt_json_file(self): - """ Tests what happens when a JSON file exists but is incomplete due to an issue encoding.""" - ds_id = [ - "c3s-cordex.output.EUR-11.IPSL.MOHC-HadGEM2-ES.rcp85.r1i1p1.IPSL-WRF381P.v1.day.psl.v20190212" - ] - try: - scan_datasets( - project="c3s-cordex", - ds_ids=ds_id, - paths=CONFIG['project:c3s-cordex']['base_dir'], - mode="quick", - location="ceda", - ) - except json.decoder.JSONDecodeError as exc: - pass - def test_fake_corrupt_json_file(self, tmpdir): """ Creates a bad JSON file and tests the code responds properly""" try: @@ -70,7 +55,7 @@ def test_file_checker(self): ds_id = ["cmip5.output1.MOHC.HadGEM2-ES.rcp85.mon.atmos.Amon.r1i1p1.latest.tas"] grouped_ds_id = switch_ds.get_grouped_ds_id(ds_id[0]) - CONFIG['project:cmip5']['base_dir'] = "tests/test_outputs/" + CONFIG["project:cmip5"]["base_dir"] = "tests/test_outputs/" failure_file = f"outputs/logs/failure/pre_extract_error/{grouped_ds_id}.log" json_file = f"outputs/logs/register/{grouped_ds_id}.json" @@ -83,7 +68,7 @@ def test_file_checker(self): scan_datasets( project="cmip5", ds_ids=ds_id, - paths=CONFIG['project:cmip5']['base_dir'], + paths=CONFIG["project:cmip5"]["base_dir"], mode="quick", location="ceda", ) diff --git a/tests/test_utils/test_base_dirs.py b/tests/test_utils/test_base_dirs.py index bdaf7cdb..dbe60c6c 100644 --- a/tests/test_utils/test_base_dirs.py +++ b/tests/test_utils/test_base_dirs.py @@ -1,61 +1,121 @@ +import configparser import os +import shutil import subprocess +import tempfile +import warnings +from unittest.mock import Mock import pytest -from dachar.scan.scan import scan_datasets from dachar import CONFIG +from dachar.scan import scan +from tests._stores_for_tests import _TestDatasetCharacterStore -@pytest.mark.skip("Fails - not possible locally") +# Must run with --noconftest flag + +char_store = None +cwd = os.getcwd() + + +def clear_store(): + dc_dr = _TestDatasetCharacterStore.config["local.base_dir"] + if os.path.isdir(dc_dr): + shutil.rmtree(dc_dr) + + +def setup_module(): + clear_store() + global char_store + char_store = _TestDatasetCharacterStore() + + +@pytest.mark.xfail( + reason="conftest overwrites base dire to test base dir. Will pass if run with --noconftest flag" +) +@pytest.mark.skipif( + os.path.isdir("/group_workspaces") is False, reason="data not available" +) def test_c3s_cmip5_base_dir(): """ Checks definition of c3s cmip5 base dir resolves to a real directory""" + scan.get_dc_store = Mock(return_value=char_store) + c3s_cmip5_id = [ "c3s-cmip5.output1.MOHC.HadGEM2-ES.rcp85.mon.atmos.Amon.r1i1p1.tas.latest" ] - result = scan_datasets( + scan.scan_datasets( project="c3s-cmip5", ds_ids=c3s_cmip5_id, - paths=CONFIG['project:c3s-cmip5']['base_dir'], + paths=CONFIG["project:c3s-cmip5"]["base_dir"], mode="quick", location="ceda", ) assert os.path.exists( - "./outputs/register/c3s-cmip5/output1/MOHC/HadGEM2-ES/rcp85/mon/atmos/Amon.r1i1p1.tas.latest.json" + os.path.join( + char_store.config.get("local.base_dir"), + "c3s-cmip5/output1/MOHC/HadGEM2-ES/rcp85/mon/atmos/Amon.r1i1p1.tas.latest.json", + ) ) -@pytest.mark.skip("FAILS - c3s-cmip6 base dir not defined yet") +@pytest.mark.xfail( + reason="conftest overwrites base dire to test base dir. Will pass if run with --noconftest flag" +) +@pytest.mark.skipif(os.path.isdir("/badc") is False, reason="data not available") def test_c3s_cmip6_base_dir(): """ Checks definition of c3s cmip6 base dir resolves to a real directory""" + scan.get_dc_store = Mock(return_value=char_store) + c3s_cmip6_id = [ "c3s-cmip6.CMIP.MOHC.HadGEM3-GC31-LL.amip.r1i1p1f3.Emon.rls.gn.latest" ] - result = scan_datasets( + scan.scan_datasets( project="c3s-cmip6", ds_ids=c3s_cmip6_id, - paths=CONFIG['project:c3s-cmip6']['base_dir'], + paths=CONFIG["project:c3s-cmip6"]["base_dir"], mode="quick", location="ceda", ) + + # base dir not defined yet assert os.path.exists( - "./outputs/register/c3s-cmip6/CMIP/MOHC/HadGEM3-GC31-LL/amip/r1i1p1f3/Emon.rls.gn.latest.json" + os.path.join( + char_store.config.get("local.base_dir"), + "c3s-cmip6/CMIP/MOHC/HadGEM3-GC31-LL/amip/r1i1p1f3/Emon.rls.gn.latest.json", + ) ) -@pytest.mark.skip("Fails - not possible locally") +@pytest.mark.xfail( + reason="conftest overwrites base dire to test base dir. Will pass if run with --noconftest flag" +) +@pytest.mark.skipif( + os.path.isdir("/group_workspaces") is False, reason="data not available" +) def test_c3s_cordex_base_dir(): """ Checks definition of c3s cordex base dir resolves to a real directory""" + scan.get_dc_store = Mock(return_value=char_store) + c3s_cordex_id = [ "c3s-cordex.output.EUR-11.CNRM.CNRM-CERFACS-CNRM-CM5.rcp45.r1i1p1.CNRM-ALADIN53.v1.day.tas.v20150127" ] - result = scan_datasets( + scan.scan_datasets( project="c3s-cordex", ds_ids=c3s_cordex_id, - paths=CONFIG['project:c3s-cordex']['base_dir'], + paths=CONFIG["project:c3s-cordex"]["base_dir"], mode="quick", location="ceda", ) assert os.path.exists( - "./outputs/register/c3s-cordex/output/EUR-11/CNRM/CNRM-CERFACS-CNRM-CM5/rcp45/r1i1p1/CNRM-ALADIN53/v1.day.tas.v20150127.json" + os.path.join( + char_store.config.get("local.base_dir"), + "c3s-cordex/output/EUR-11/CNRM/CNRM-CERFACS-CNRM-CM5/rcp45/r1i1p1" + "/CNRM-ALADIN53/v1.day.tas.v20150127.json", + ) ) + + +def teardown_module(): + # pass + clear_store() From 60447cae7d4c8f6411bfd2d52f05d750000156c8 Mon Sep 17 00:00:00 2001 From: Eleanor Smith Date: Wed, 16 Jun 2021 16:11:54 +0100 Subject: [PATCH 2/5] update lists to dicts --- dachar/fixes/attr_fixes.py | 30 +++++++++---------- tests/test_fixes/esmval_test_fixes/cl.json | 8 ++--- .../esmval_test_fixes/cl_template.json | 8 ++--- tests/test_fixes/esmval_test_fixes/o2.json | 8 ++--- .../esmval_test_fixes/o2_template.json | 7 ++--- 5 files changed, 29 insertions(+), 32 deletions(-) diff --git a/dachar/fixes/attr_fixes.py b/dachar/fixes/attr_fixes.py index e19308e0..9b840955 100644 --- a/dachar/fixes/attr_fixes.py +++ b/dachar/fixes/attr_fixes.py @@ -11,18 +11,17 @@ class MainVarAttrFix(_BaseDatasetFix): "Applies metadata fix e.g. fixing standard name or adding missing standard name for the main variable of the dataset. -Takes a list of fixes with each fix as a dictionary containing the attribute -to be changed as the key and what the value should be as the value e.g.: +Takes a dictionary of fixes with each fix as a key and value pair with the attribute +as the key and what the value should be as the value. e.g.: -{"long_name": "Dissolved Oxygen Concentration"}, -{"standard_name": "mole_concentration_of_dissolved_molecular_oxygen_in_sea_water"} +{"long_name": "Dissolved Oxygen Concentration", +"standard_name": "mole_concentration_of_dissolved_molecular_oxygen_in_sea_water"} For example: - inputs: - {"attrs": [ - {"long_name": "Dissolved Oxygen Concentration"}, - {"standard_name": "mole_concentration_of_dissolved_molecular_oxygen_in_sea_water"} - ] + {"attrs": + {"long_name": "Dissolved Oxygen Concentration", + "standard_name": "mole_concentration_of_dissolved_molecular_oxygen_in_sea_water"} }, """ category = "attr_fixes" @@ -38,19 +37,18 @@ class AttrFix(_BaseDatasetFix): "Applies metadata fix e.g. fixing standard name or adding missing standard name for a given variable of the dataset. -Takes a list of fixes with each fix as a dictionary containing the attribute -to be changed as the key and what the value should be as the value e.g.: +Takes a dictionary of fixes with each fix as a key and value pair with the attribute +as the key and what the value should be as the value. e.g.: -{"long_name": "Dissolved Oxygen Concentration"}, -{"standard_name": "mole_concentration_of_dissolved_molecular_oxygen_in_sea_water"} +{"long_name": "Dissolved Oxygen Concentration", +"standard_name": "mole_concentration_of_dissolved_molecular_oxygen_in_sea_water"} For example: - inputs: {"var_id": "lev", - "attrs": [ - {"long_name": "Dissolved Oxygen Concentration"}, - {"standard_name": "mole_concentration_of_dissolved_molecular_oxygen_in_sea_water"} - ] + "attrs": + {"long_name": "Dissolved Oxygen Concentration", + "standard_name": "mole_concentration_of_dissolved_molecular_oxygen_in_sea_water"} }, """ category = "attr_fixes" diff --git a/tests/test_fixes/esmval_test_fixes/cl.json b/tests/test_fixes/esmval_test_fixes/cl.json index acc6bbc0..9391b8ad 100644 --- a/tests/test_fixes/esmval_test_fixes/cl.json +++ b/tests/test_fixes/esmval_test_fixes/cl.json @@ -4,10 +4,10 @@ "fix_id": "AttrFix", "operands": { "var_id": "lev", - "attrs": [ - {"standard_name": "atmosphere_hybrid_sigma_pressure_coordinate"}, - {"formula_terms": "p0: p0 a: a b: b ps: ps"} - ]}, + "attrs": + {"standard_name": "atmosphere_hybrid_sigma_pressure_coordinate", + "formula_terms": "p0: p0 a: a b: b ps: ps"} + }, "source": { "name": "esmvaltool", "version": "2.0.0", diff --git a/tests/test_fixes/esmval_test_fixes/cl_template.json b/tests/test_fixes/esmval_test_fixes/cl_template.json index 32beb1ef..aa5992c1 100644 --- a/tests/test_fixes/esmval_test_fixes/cl_template.json +++ b/tests/test_fixes/esmval_test_fixes/cl_template.json @@ -4,10 +4,10 @@ "fix_id": "AttrFix", "operands": { "var_id": "lev", - "attrs": [ - {"standard_name": "atmosphere_hybrid_sigma_pressure_coordinate"}, - {"formula_terms": "p0: p0 a: a b: b ps: ps"} - ]}, + "attrs": + {"standard_name": "atmosphere_hybrid_sigma_pressure_coordinate", + "formula_terms": "p0: p0 a: a b: b ps: ps"} + }, "source": { "name": "esmvaltool", "version": "2.0.0", diff --git a/tests/test_fixes/esmval_test_fixes/o2.json b/tests/test_fixes/esmval_test_fixes/o2.json index af55f95b..083ea667 100644 --- a/tests/test_fixes/esmval_test_fixes/o2.json +++ b/tests/test_fixes/esmval_test_fixes/o2.json @@ -3,10 +3,10 @@ "fixes": [{ "fix_id": "MainVarAttrFix", "operands": { - "attrs": [ - {"long_name": "Dissolved Oxygen Concentration"}, - {"standard_name": "mole_concentration_of_dissolved_molecular_oxygen_in_sea_water"} - ] + "attrs": + {"long_name": "Dissolved Oxygen Concentration", + "standard_name": "mole_concentration_of_dissolved_molecular_oxygen_in_sea_water"} + }, "source": { "name": "esmvaltool", diff --git a/tests/test_fixes/esmval_test_fixes/o2_template.json b/tests/test_fixes/esmval_test_fixes/o2_template.json index 862f7182..962f371a 100644 --- a/tests/test_fixes/esmval_test_fixes/o2_template.json +++ b/tests/test_fixes/esmval_test_fixes/o2_template.json @@ -3,10 +3,9 @@ "fixes": [{ "fix_id": "MainVarAttrFix", "operands": { - "attrs": [ - {"long_name": "Dissolved Oxygen Concentration"}, - {"standard_name": "mole_concentration_of_dissolved_molecular_oxygen_in_sea_water"} - ] + "attrs": + {"long_name": "Dissolved Oxygen Concentration", + "standard_name": "mole_concentration_of_dissolved_molecular_oxygen_in_sea_water"} }, "source": { "name": "esmvaltool", From 896077a9ca199aa2f7a62858b9312cd6a9456c2b Mon Sep 17 00:00:00 2001 From: Eleanor Smith Date: Thu, 17 Jun 2021 09:54:36 +0100 Subject: [PATCH 3/5] update dicts and strings --- dachar/utils/create_index.py | 99 ++++++++++++++++--- tests/test_analyse/test_sample_analyser.py | 1 + tests/test_fixes/esmval_test_fixes/gpp.json | 2 +- .../esmval_test_fixes/gpp_template.json | 2 +- 4 files changed, 90 insertions(+), 14 deletions(-) diff --git a/dachar/utils/create_index.py b/dachar/utils/create_index.py index 55bf538f..60cc246b 100644 --- a/dachar/utils/create_index.py +++ b/dachar/utils/create_index.py @@ -1,6 +1,10 @@ """ -Currently this script produces a index with today's date and creates an alias for it. -There is a function to populate the elasticsearch store with the contents of the local store +This script can produce an index with today's date and update the alias to point to it. +There is also a function to populate the elasticsearch store with the contents of the local store. + +When updating the index: +- new index must be created with new date - clone_index_and_update_alias function creates this, fills with all documents from old index and updates the alias to point to it +- it can then be populated either with all documents in local store (populate_store) or one document at a time (add_document_to_index) """ import hashlib import json @@ -45,12 +49,42 @@ fix_prop_name = CONFIG["elasticsearch"]["fix_proposal_store"] -def create_index_and_alias(name, date): +def create_index_and_alias(index_name, date): + """ + create an empty index and update the alias to point to it + """ + + exists = es.indices.exists(f"{index_name}-{date}") + if not exists: + es.indices.create(f"{index_name}-{date}") + alias_exists = es.indices.exists_alias( + name=f"{index_name}", index=f"{index_name}-{date}" + ) + if not alias_exists: + es.indices.update_aliases( + body={ + "actions": [ + {"remove": {"alias": f"{index_name}", "index": "*"}}, + { + "add": { + "alias": f"{index_name}", + "index": f"{index_name}-{date}", + } + }, + ] + } + ) + # es.indices.put_alias(index=f"{name}-{date}", name=f"{name}") + + +def clone_index_and_update_alias(index_name, date, index_to_clone): + """ + clone an index and update the alias to point to the new index + """ + exists = es.indices.exists(f"{name}-{date}") if not exists: - es.indices.create( - f"{name}-{date}" - ) # do I need to include a mapping - should be put in here + es.indices.clone(index_to_clone, f"{name}-{date}") alias_exists = es.indices.exists_alias(name=f"{name}", index=f"{name}-{date}") if not alias_exists: es.indices.update_aliases( @@ -65,6 +99,13 @@ def create_index_and_alias(name, date): def populate_store(local_store, index, id_type): + """ + Populates elasticsearch index from local store + :param local_store: local store object to populate from + :param index: Name of elasticsearch index to populate + :param id_type: what the id is called in the provided index i.e. either dataset_id (for fix, character and fix proposal store) or sample_id (for the analysis store) + """ + root = local_store.config.get( "local.base_dir" ) # change if wanting to use a test store @@ -81,20 +122,54 @@ def populate_store(local_store, index, id_type): print(drs) m = hashlib.md5() m.update(drs.encode("utf-8")) - id = m.hexdigest() + doc_id = m.hexdigest() doc = json.load(open(fpath)) # es.delete(index=index, id=id) - es.index(index=index, id=id, body=doc) + es.index(index=index, id=doc_id, body=doc) if id_type is not None: - es.update(index=index, id=id, body={"doc": {id_type: drs}}) + es.update(index=index, id=doc_id, body={"doc": {id_type: drs}}) + + +def add_document_to_index(fpath, drs, index, id_type): + """ + Add document to elasticsearch index. Uses given file path to json file and ds_id (drs). + """ + + mapper = {"__ALL__": "*"} + for find_s, replace_s in mapper.items(): + drs = drs.replace(find_s, replace_s) + + print(drs) + m = hashlib.md5() + m.update(drs.encode("utf-8")) + doc_id = m.hexdigest() + doc = json.load(open(fpath)) + # es.delete(index=index, id=id) + print(doc) + + es.index(index=index, id=doc_id, body=doc) + if id_type is not None: + es.update(index=index, id=doc_id, body={"doc": {id_type: drs}}) def main(): # for store in [char_name, a_name, fix_name, fix_prop_name]: - create_index_and_alias(fix_name, "2020-10-12") - - populate_store(get_fix_store(), "roocs-fix-2020-10-12", "dataset_id") + # es.indices.delete(index="roocs-fix-2021-06-16", ignore=[400, 404]) + es.indices.delete(index="roocs-fix-prop-2021-06-16", ignore=[400, 404]) + # create_index_and_alias(fix_name, "2021-06-16") + create_index_and_alias(fix_prop_name, "2021-06-16") + # # clone_index_and_update_alias(fix_name, "2021-06-15", "roocs-fix-2020-10-12")) + + # populate_store(get_fix_store(), "roocs-fix-2021-06-16", "dataset_id") + populate_store(get_fix_prop_store(), "roocs-fix-prop-2021-06-16", "dataset_id") + # add_document_to_index( + # "/home/users/esmith88/roocs/dachar/tests/test_fixes/decadal_fixes/decadal.json", + # "CMIP6.DCPP.MOHC.HadGEM3-GC31-MM.dcppA-hindcast.s2004-r3i1p1f2.Amon.pr.gn.v20200417", + # "roocs-fix-2020-10-12", + # "dataset_id", + # ) + # add_document_to_index("/tmp/fix-store/cmip5/output1/INM/inmcm4/rcp45/mon/ocean/Omon.r1i1p1.latest.zostoga.json", "cmip5.output1.INM.inmcm4.rcp45.mon.ocean.Omon.r1i1p1.latest.zostoga", "roocs-fix-2021-06-16", "dataset_id") if __name__ == "__main__": diff --git a/tests/test_analyse/test_sample_analyser.py b/tests/test_analyse/test_sample_analyser.py index bc8199ae..0c932d04 100644 --- a/tests/test_analyse/test_sample_analyser.py +++ b/tests/test_analyse/test_sample_analyser.py @@ -92,4 +92,5 @@ def test_analyse(load_esgf_test_data): def teardown_module(): + # pass clear_stores() diff --git a/tests/test_fixes/esmval_test_fixes/gpp.json b/tests/test_fixes/esmval_test_fixes/gpp.json index 0f7d0e21..6dce083c 100644 --- a/tests/test_fixes/esmval_test_fixes/gpp.json +++ b/tests/test_fixes/esmval_test_fixes/gpp.json @@ -3,7 +3,7 @@ "fixes": [{ "fix_id": "MaskDataFix", "operands": { - "value": 1.0e33 + "value": "1.0e33" }, "source": { "name": "esmvaltool", diff --git a/tests/test_fixes/esmval_test_fixes/gpp_template.json b/tests/test_fixes/esmval_test_fixes/gpp_template.json index 2fcb0da7..ae30d5a5 100644 --- a/tests/test_fixes/esmval_test_fixes/gpp_template.json +++ b/tests/test_fixes/esmval_test_fixes/gpp_template.json @@ -3,7 +3,7 @@ "fixes": [{ "fix_id": "MaskDataFix", "operands": { - "value": 1.0e33 + "value": "1.0e33" }, "source": { "name": "esmvaltool", From 36ecbad325804f7c773edb8d7188c83b41057a13 Mon Sep 17 00:00:00 2001 From: Eleanor Smith Date: Thu, 17 Jun 2021 10:57:55 +0100 Subject: [PATCH 4/5] corrected timestamp in fix proposal store --- dachar/fixes/fix_proposal_store.py | 2 +- dachar/utils/create_index.py | 17 ++++++++++++----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/dachar/fixes/fix_proposal_store.py b/dachar/fixes/fix_proposal_store.py index c0c53f28..b920813f 100644 --- a/dachar/fixes/fix_proposal_store.py +++ b/dachar/fixes/fix_proposal_store.py @@ -61,7 +61,7 @@ def _update_fix_container(self, container, fix, status, reason=""): 0, { "status": container["status"], - "timestamp": container["timestamp"], + "timestamp": now_string(), "reason": container["reason"], }, ) diff --git a/dachar/utils/create_index.py b/dachar/utils/create_index.py index 60cc246b..5a6fc3b4 100644 --- a/dachar/utils/create_index.py +++ b/dachar/utils/create_index.py @@ -82,16 +82,23 @@ def clone_index_and_update_alias(index_name, date, index_to_clone): clone an index and update the alias to point to the new index """ - exists = es.indices.exists(f"{name}-{date}") + exists = es.indices.exists(f"{index_name}-{date}") if not exists: - es.indices.clone(index_to_clone, f"{name}-{date}") - alias_exists = es.indices.exists_alias(name=f"{name}", index=f"{name}-{date}") + es.indices.clone(index_to_clone, f"{index_name}-{date}") + alias_exists = es.indices.exists_alias( + name=f"{index_name}", index=f"{index_name}-{date}" + ) if not alias_exists: es.indices.update_aliases( body={ "actions": [ - {"remove": {"alias": f"{name}", "index": "*"}}, - {"add": {"alias": f"{name}", "index": f"{name}-{date}"}}, + {"remove": {"alias": f"{index_name}", "index": "*"}}, + { + "add": { + "alias": f"{index_name}", + "index": f"{index_name}-{date}", + } + }, ] } ) From 2e1a806f4f992e9f00df23956862d762400602bf Mon Sep 17 00:00:00 2001 From: ellesmith88 Date: Mon, 21 Jun 2021 13:00:58 +0100 Subject: [PATCH 5/5] add roocs.ini --- dachar/etc/roocs.ini | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 dachar/etc/roocs.ini diff --git a/dachar/etc/roocs.ini b/dachar/etc/roocs.ini new file mode 100644 index 00000000..f38fe615 --- /dev/null +++ b/dachar/etc/roocs.ini @@ -0,0 +1,44 @@ +[config_data_types] +extra_lists = locations common cmip5 cmip6 cordex +extra_ints = memory_large memory_small dir_grouping_level +extra_floats = concern_threshold + +[dachar:processing] +queue = short-serial +wallclock_large = 23:59 +memory_large = 32000 +wallclock_small = 04:00 +memory_small = 4000 + +[dachar:output_paths] +# reformat these +_base_path = ./outputs +base_log_dir = %(_base_path)s/logs +batch_output_path = %(base_log_dir)s/batch-outputs/{grouped_ds_id} +json_output_path = %(_base_path)s/register/{grouped_ds_id}.json +success_path = %(base_log_dir)s/success/{grouped_ds_id}.log +no_files_path = %(base_log_dir)s/failure/no_files/{grouped_ds_id}.log +pre_extract_error_path = %(base_log_dir)s/failure/pre_extract_error/{grouped_ds_id}.log +extract_error_path = %(base_log_dir)s/failure/extract_error/{grouped_ds_id}.log +write_error_path = %(base_log_dir)s/failure/write_error/{grouped_ds_id}.log +fix_path = %(_base_path)s/fixes/{grouped_ds_id}.json + + +[dachar:checks] +common = coord_checks.RankCheck coord_checks.MissingCoordCheck +cmip5 = +cmip6 = test +cordex = + + +[dachar:settings] +elastic_api_token = +dir_grouping_level = 4 +concern_threshold = 0.2 +locations = ceda dkrz other + + +# From old options file - do we still need these? +# dataset_id = cordex.%(product)s.%(domain)s.%(institute)s.%(driving_model)s.%(experiment)s.%(ensemble)s.%(rcm_name)s.%(rcm_version)s.%(time_frequency)s.%(variable)s + +# directory_format = %(root)s/%(project)s/%(product)s/%(domain)s/%(institute)s/%(driving_model)s/%(experiment)s/%(ensemble)s/%(rcm_model)s/%(rcm_version)s/%(time_frequency)s/%(variable)s/%(version)s \ No newline at end of file