Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ recursive-include tests *
recursive-include etc *
recursive-exclude * __pycache__
recursive-exclude * *.py[co]
recursive-include etc *

recursive-include docs *.md conf.py Makefile make.bat *.jpg *.png *.gif
33 changes: 18 additions & 15 deletions dachar/analyse/checks/_base_check.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import logging
import pprint as pp

from dachar.utils import UNDEFINED, nested_lookup, JDict

from dachar.utils.get_stores import get_fix_prop_store, get_dc_store

from dachar.fixes.fix_api import get_fix

from dachar import __version__ as version
from dachar import logging
from dachar.fixes.fix_api import get_fix
from dachar.utils import JDict
from dachar.utils import nested_lookup
from dachar.utils import UNDEFINED
from dachar.utils.get_stores import get_dc_store
from dachar.utils.get_stores import get_fix_prop_store

LOGGER = logging.getLogger(__file__)

Expand Down Expand Up @@ -53,6 +53,13 @@ class _BaseCheck(object):
typical_threshold = 0.41
atypical_threshold = 0.15

source = {
"name": "dachar",
"version": f"{version}",
"comment": "No specific source provided - link to all fixes in dachar",
"url": "https://github.com/roocs/dachar/tree/master/dachar/fixes",
}

def __init__(self, sample):
self.sample = sample
self._load()
Expand Down Expand Up @@ -124,12 +131,10 @@ def _extract_content(self):
content = []

for ds_id in self.sample:
items = dict(
[
(key, nested_lookup(key, self._cache[ds_id], must_exist=True))
for key in self.characteristics
]
)
items = {
key: nested_lookup(key, self._cache[ds_id], must_exist=True)
for key in self.characteristics
}
content.append((ds_id, items))
return content

Expand All @@ -144,5 +149,3 @@ def deduce_fix(self, ds_id, atypical_content, typical_content):

def _propose_fix(self, ds_id, fix):
get_fix_prop_store().propose(ds_id, fix)


34 changes: 27 additions & 7 deletions dachar/analyse/checks/coord_checks.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,29 @@
import pprint
from collections import Counter
from collections import namedtuple
from itertools import chain

from scipy.stats import mode

from dachar import __version__ as version
from dachar.analyse.checks._base_check import _BaseCheck
from dachar.fixes.fix_api import get_fix
from dachar.utils.common import get_extra_items_in_larger_sequence, coord_mappings
from dachar.utils import nested_lookup
from scipy.stats import mode
from collections import Counter, namedtuple
from itertools import chain
import pprint
from dachar.utils.common import coord_mappings
from dachar.utils.common import get_extra_items_in_larger_sequence


class RankCheck(_BaseCheck):
characteristics = ["data.dim_names", "data.shape"]
associated_fix = "SqueezeDimensionsFix"

source = {
"name": "dachar",
"version": f"{version}",
"comment": "",
"url": "https://github.com/roocs/dachar/blob/master/dachar/fixes/coord_fixes.py#L8",
}

def deduce_fix(self, ds_id, atypical_content, typical_content):
dicts = []

Expand All @@ -38,7 +50,7 @@ def deduce_fix(self, ds_id, atypical_content, typical_content):

operands = {"dims": extra_coords}

fix = fix_cls(ds_id, **operands)
fix = fix_cls(ds_id, source=self.source, **operands)
d = fix.to_dict()
dicts.append(d)
return dicts
Expand All @@ -51,10 +63,18 @@ def deduce_fix(self, ds_id, atypical_content, typical_content):
# TODO: Need to change this so that characteristic compared is coord_type
# But characteristic used to create new variable is id


class MissingCoordCheck(_BaseCheck):
characteristics = ["coordinates.*.id"]
associated_fix = "AddScalarCoordFix"

source = {
"name": "dachar",
"version": f"{version}",
"comment": "",
"url": "https://github.com/roocs/dachar/blob/master/dachar/fixes/coord_fixes.py#L44",
}

def deduce_fix(self, ds_id, atypical_content, typical_content):
dicts = []

Expand Down Expand Up @@ -112,7 +132,7 @@ def deduce_fix(self, ds_id, atypical_content, typical_content):

operands["attrs"] = operand_dict

fix = fix_cls(ds_id, **operands)
fix = fix_cls(ds_id, source=self.source, **operands)
d = fix.to_dict()

# coordinate isn't scalar - fix isn't suitable
Expand Down
91 changes: 83 additions & 8 deletions dachar/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
import os
import shutil

from dachar.utils._stores_for_tests import _TestFixProposalStore

"""Console script for dachar."""

__author__ = """Elle Smith"""
Expand All @@ -18,6 +16,10 @@
from dachar.analyse.sample_analyser import analyse
from dachar.fixes import fix_processor
from dachar.fixes.fix_processor import process_all_fixes
from dachar.fixes.generate_proposals import (
generate_fix_proposals,
generate_proposal_from_template,
)
from unittest.mock import Mock


Expand All @@ -41,8 +43,10 @@ def _get_arg_parser_scan(parser):
:return: Namespace object built from attributes parsed from command line.
"""
# parser = argparse.ArgumentParser()
project_options = [_.split(':')[1] for _ in CONFIG.keys() if _.startswith('project:')]
location_options = CONFIG['dachar:settings']['locations']
project_options = [
_.split(":")[1] for _ in CONFIG.keys() if _.startswith("project:")
]
location_options = CONFIG["dachar:settings"]["locations"]

parser.add_argument(
"project",
Expand Down Expand Up @@ -130,8 +134,10 @@ def scan_main(args):


def _get_arg_parser_analyse(parser):
project_options = [_.split(':')[1] for _ in CONFIG.keys() if _.startswith('project:')]
location_options = CONFIG['dachar:settings']['locations']
project_options = [
_.split(":")[1] for _ in CONFIG.keys() if _.startswith("project:")
]
location_options = CONFIG["dachar:settings"]["locations"]

parser.add_argument(
"project",
Expand Down Expand Up @@ -200,7 +206,7 @@ def _get_arg_parser_process_fixes(parser):
type=str,
default=None,
required=True,
help="Action to carry out on fixes: process for proposed fixes, withdraw to withdraw"
help="Action to carry out on fixes: process for proposed fixes, withdraw to withdraw "
"existing fixes",
)

Expand All @@ -218,6 +224,71 @@ def process_fixes_main(args):
process_all_fixes(action, ds_ids)


def _get_arg_parser_propose_fixes(parser):

parser.add_argument(
"-f",
"--files",
type=str,
default=None,
required=False,
help="List of comma-separated json files containing information to generate fix proposals. "
"This option must be used on its own",
)

parser.add_argument(
"-d",
"--dataset-list",
type=str,
default=None,
required=False,
help="Text file containing dataset ids for which to propose the fix provided in the template. "
"If using this option you must provide a template using --template (-t) option.",
)

parser.add_argument(
"-t",
"--template",
type=str,
default=None,
required=False,
help="Template for fix proposal. "
"If using this option you must provide a list of dataset ids using the --dataset-list (-d) option.",
)

return parser


def parse_args_propose_fixes(args):

if args.files:
if args.dataset_list or args.template:
raise Exception(
"The file option must be used on its own. "
"A dataset list and a template must be provided together. "
)

if args.dataset_list and not args.template:
raise Exception("A dataset list and a template must be provided together.")

if args.template and not args.dataset_list:
raise Exception("A dataset list and a template must be provided together.")

files = _to_list(args.files)
ds_list = args.dataset_list
template = args.template
return files, ds_list, template


def propose_fixes_main(args):
files, ds_list, template = parse_args_propose_fixes(args)

if files:
generate_fix_proposals(files)
elif ds_list and template:
generate_proposal_from_template(template, ds_list)


def main():
"""Console script for dachar."""
main_parser = argparse.ArgumentParser()
Expand All @@ -235,9 +306,13 @@ def main():
_get_arg_parser_process_fixes(fix_parser)
fix_parser.set_defaults(func=process_fixes_main)

fix_proposal_parser = subparsers.add_parser("propose-fixes")
_get_arg_parser_propose_fixes(fix_proposal_parser)
fix_proposal_parser.set_defaults(func=propose_fixes_main)

args = main_parser.parse_args()
args.func(args)


if __name__ == "__main__":
sys.exit(main()) # pragma: no cover
sys.exit(main())
44 changes: 44 additions & 0 deletions dachar/etc/roocs.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
[config_data_types]
extra_lists = locations common cmip5 cmip6 cordex
extra_ints = memory_large memory_small dir_grouping_level
extra_floats = concern_threshold

[dachar:processing]
queue = short-serial
wallclock_large = 23:59
memory_large = 32000
wallclock_small = 04:00
memory_small = 4000

[dachar:output_paths]
# reformat these
_base_path = ./outputs
base_log_dir = %(_base_path)s/logs
batch_output_path = %(base_log_dir)s/batch-outputs/{grouped_ds_id}
json_output_path = %(_base_path)s/register/{grouped_ds_id}.json
success_path = %(base_log_dir)s/success/{grouped_ds_id}.log
no_files_path = %(base_log_dir)s/failure/no_files/{grouped_ds_id}.log
pre_extract_error_path = %(base_log_dir)s/failure/pre_extract_error/{grouped_ds_id}.log
extract_error_path = %(base_log_dir)s/failure/extract_error/{grouped_ds_id}.log
write_error_path = %(base_log_dir)s/failure/write_error/{grouped_ds_id}.log
fix_path = %(_base_path)s/fixes/{grouped_ds_id}.json


[dachar:checks]
common = coord_checks.RankCheck coord_checks.MissingCoordCheck
cmip5 =
cmip6 = test
cordex =


[dachar:settings]
elastic_api_token =
dir_grouping_level = 4
concern_threshold = 0.2
locations = ceda dkrz other


# From old options file - do we still need these?
# dataset_id = cordex.%(product)s.%(domain)s.%(institute)s.%(driving_model)s.%(experiment)s.%(ensemble)s.%(rcm_name)s.%(rcm_version)s.%(time_frequency)s.%(variable)s

# directory_format = %(root)s/%(project)s/%(product)s/%(domain)s/%(institute)s/%(driving_model)s/%(experiment)s/%(ensemble)s/%(rcm_model)s/%(rcm_version)s/%(time_frequency)s/%(variable)s/%(version)s
10 changes: 8 additions & 2 deletions dachar/fixes/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,8 @@
from .fix_proposal_store import LocalFixProposalStore, ElasticFixProposalStore
from .fix_store import LocalFixStore, ElasticFixStore
from .array_fixes import *
from .attr_fixes import *
from .coord_fixes import *
from .fix_proposal_store import ElasticFixProposalStore
from .fix_proposal_store import LocalFixProposalStore
from .fix_store import ElasticFixStore
from .fix_store import LocalFixStore
from .var_fixes import *
3 changes: 1 addition & 2 deletions dachar/fixes/_base_fix.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from dachar.utils.common import UNDEFINED
from dachar import __version__ as version


class FixDetails(object):
Expand Down Expand Up @@ -51,7 +50,7 @@ class _BaseDatasetFix(object):
</variable>
"""

def __init__(self, ds_id, source=f"dachar version {version}", **operands):
def __init__(self, ds_id, source, **operands):
self.ds_id = ds_id
self.source = source
self.operands = operands
Expand Down
20 changes: 20 additions & 0 deletions dachar/fixes/array_fixes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from dachar.fixes._base_fix import _BaseDatasetFix
from dachar.utils.common import UNDEFINED

__all__ = ["MaskDataFix"]


class MaskDataFix(_BaseDatasetFix):
fix_id = "MaskDataFix"
title = "Apply Mask to Data"
description = """
Masks data equal to a given value.

For example:
- inputs:
- {'value': '1.0e33'}
"""
category = "array_fixes"
required_operands = ["value"]
ref_implementation = "daops.data_utils.array_utils.mask_data"
process_type = "post_processor"
Loading