From 009a3ca6402e0d33506c334558a0559f240ebf12 Mon Sep 17 00:00:00 2001 From: Carolyn Whitlock Date: Fri, 5 Sep 2025 13:30:49 -0400 Subject: [PATCH 1/8] new function for parsing yaml when only a hist_source is given --- fre/app/helpers.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fre/app/helpers.py b/fre/app/helpers.py index 7a1ad0967..95d8ef1fa 100644 --- a/fre/app/helpers.py +++ b/fre/app/helpers.py @@ -61,6 +61,17 @@ def get_variables(yml: dict, pp_comp: str) -> dict: return src_vars +def get_variables_hist_src(yml: dict pp_hist_src: str) -> list: + ''' + Gets the variables associated with a history_source from the yamlfile. + This only works because the history_source elements underneath the pp components + are supposed to be unique, and it is needed because some tasks in flow.cylc + (like split-netcdf) cycle on the flat list of all history_source elements, + not then . + ''' + src_vars = [] + return src_vars + ## NOTE: For python 3.11 - this might be available already as contextlib.chdir() ## Re-asses if our own contextmanager function is needed here @contextmanager From 646a3c62a10d983facd960485f0faba42f82e854 Mon Sep 17 00:00:00 2001 From: Carolyn Whitlock Date: Fri, 5 Sep 2025 15:42:39 -0400 Subject: [PATCH 2/8] adding new parsing function without component info, adding test for function, modifying test file paths to be absolute rather than relative --- fre/app/helpers.py | 46 ++++++++++++++++++++++++++++++++++- fre/app/tests/test_helpers.py | 32 ++++++++++++++++++++++-- 2 files changed, 75 insertions(+), 3 deletions(-) diff --git a/fre/app/helpers.py b/fre/app/helpers.py index 95d8ef1fa..efcc0681d 100644 --- a/fre/app/helpers.py +++ b/fre/app/helpers.py @@ -2,6 +2,7 @@ from pathlib import Path import yaml from contextlib import contextmanager +import sys # set up logging import logging @@ -61,7 +62,7 @@ def get_variables(yml: dict, pp_comp: str) -> dict: return src_vars -def get_variables_hist_src(yml: dict pp_hist_src: str) -> list: +def get_variables_hist_src(yml: dict, hist_src: str) -> list: ''' Gets the variables associated with a history_source from the yamlfile. This only works because the history_source elements underneath the pp components @@ -69,7 +70,42 @@ def get_variables_hist_src(yml: dict pp_hist_src: str) -> list: (like split-netcdf) cycle on the flat list of all history_source elements, not then . ''' + fre_logger.debug(f"Yaml file information: {yml}") + fre_logger.debug(f"history source: {hist_src}") + if not isinstance(yml, dict): + raise TypeError("yml should be of type dict, but was of type " + str(type(yml))) + is_found = False src_vars = [] + for component_info in yml["postprocess"]["components"]: + print(component_info['type']) + #it is assumed that hist_src is pre-filtered for active/inactive pp components + src_info = component_info.get("sources") + #timeseries case + for src in src_info: + print(src) + if src['history_file'] == hist_src: + is_found = True + if src.get("variables"): + src_vars = src.get("variables") + else: + src_vars = "all" + break + #static case + if component_info.get("static"): + static_el = component_info.get("static") + print(static_el[0]) + print(static_el[0].get("source")) + if static_el[0].get("source") == hist_src: + is_found = True + if static_el[0].get("variables"): + src_vars = static_el[0].get("variables") + else: + src_vars = "all" + break + #using is_found rather than a check for length on the off chance that the + #variable list is of length 0 without being caught earlier in the config + if not is_found: + raise ValueError(f"history_file, {hist_src}, not found in pp yaml configuration!") return src_vars ## NOTE: For python 3.11 - this might be available already as contextlib.chdir() @@ -97,3 +133,11 @@ def change_directory(new_path: str): yield finally: os.chdir(original_path) + +if __name__ == '__main__': + print("getting hist_src variables from yaml") + yamlfile = "/home/cew/Code/fre-cli/fre/app/remap_pp_components/tests/test-data/yaml_ex.yaml" + with open(yamlfile, 'r') as yfile: + yml = yaml.safe_load(yfile) + src_list = get_variables_hist_src(yml, sys.argv[1]) + print(src_list) diff --git a/fre/app/tests/test_helpers.py b/fre/app/tests/test_helpers.py index 8f4145705..35a82dc0d 100644 --- a/fre/app/tests/test_helpers.py +++ b/fre/app/tests/test_helpers.py @@ -1,14 +1,20 @@ from pathlib import Path import yaml import pytest +import os +from os import path from fre.app import helpers +print(dir(helpers)) + ## Path to example test data -DATA_DIR = Path("fre/app/remap_pp_components/tests/test-data") +thisfile = Path(os.path.abspath(__file__)) +APP_DIR = thisfile.parent.parent +DATA_DIR = Path(f"{APP_DIR}/remap_pp_components/tests/test-data") ## YAML configuration example file YAML_EX = f"{DATA_DIR}/yaml_ex.yaml" -DIR_CHANGE = Path("fre/app/remap_pp_components") +DIR_CHANGE = Path(f"{APP_DIR}/remap_pp_components") def test_get_variables(): """ @@ -47,6 +53,28 @@ def test_get_variables_err(): with pytest.raises( TypeError ) as execinfo: out = helpers.get_variables(yml = YAML_EX, pp_comp = "test_param") assert execinfo.type is TypeError + + +@pytest.mark.parametrize("hist_src,var_list", + [ + pytest.param("atmos_month", "all", id="tseries-all"), + pytest.param("atmos_static_scalar", "all", id="static-all"), + pytest.param("atmos_scalar_test_vars",["co2mass"], id="tseries-varlist"), + pytest.param("atmos_static_scalar_test_vars", ["bk"] , id="static-varlist"), + pytest.param("atmos_daily", "all", id="fail-nohist_src", marks=pytest.mark.xfail()) + ]) +def test_get_variables_hist_src(hist_src, var_list): + ''' + Test get_variables_hist_src() + - timeseries all, static all + - timeseries varlist, static varlist + - hist_src not found in file (xfail) + ''' + # Load the yaml config + with open(YAML_EX,'r') as f: + yml=yaml.safe_load(f) + new_var_list = helpers.get_variables_hist_src(yml, hist_src) + assert new_var_list == var_list def test_change_directory(): """ From 5fe54ef516734a5813112e3a083bf80a64bf882f Mon Sep 17 00:00:00 2001 From: Carolyn Whitlock Date: Wed, 10 Sep 2025 11:13:58 -0400 Subject: [PATCH 3/8] updating click documentation, checking tool functionality --- docs/tools/pp.rst | 2 +- fre/app/helpers.py | 8 -------- fre/pp/frepp.py | 13 +++++-------- fre/pp/split_netcdf_script.py | 13 ++++++------- 4 files changed, 12 insertions(+), 24 deletions(-) diff --git a/docs/tools/pp.rst b/docs/tools/pp.rst index 5620132fe..4c40eb609 100644 --- a/docs/tools/pp.rst +++ b/docs/tools/pp.rst @@ -63,7 +63,7 @@ ------------------------ * Given a directory structure with netcdf files, calls split-netcdf on individual netcdf files -* Minimal Syntax: ``fre pp split-netcdf-wrapper -i input/ -o output/ [-c yaml_component -s history_source -y yamlfile.yml | --split-all-vars] [--use-subdirs]`` +* Minimal Syntax: ``fre pp split-netcdf-wrapper -i input/ -o output/ [ -s history_source -y yamlfile.yml | --split-all-vars] [--use-subdirs]`` * Module(s) needed: n/a * Example: ``fre pp split-netcdf-wrapper -i input/ -o output/ --split-all-vars --use-subdirs`` diff --git a/fre/app/helpers.py b/fre/app/helpers.py index efcc0681d..959b1b6b8 100644 --- a/fre/app/helpers.py +++ b/fre/app/helpers.py @@ -133,11 +133,3 @@ def change_directory(new_path: str): yield finally: os.chdir(original_path) - -if __name__ == '__main__': - print("getting hist_src variables from yaml") - yamlfile = "/home/cew/Code/fre-cli/fre/app/remap_pp_components/tests/test-data/yaml_ex.yaml" - with open(yamlfile, 'r') as yfile: - yml = yaml.safe_load(yfile) - src_list = get_variables_hist_src(yml, sys.argv[1]) - print(src_list) diff --git a/fre/pp/frepp.py b/fre/pp/frepp.py index 025f88a11..1176e76b7 100644 --- a/fre/pp/frepp.py +++ b/fre/pp/frepp.py @@ -168,8 +168,6 @@ def histval(history,date_string,warn): help='Path to a directory in which to search for netcdf files to split. Files matching the pattern in $history-source will be split.') @click.option('-o', '--outputdir', required=True, help='Path to a directory to which to write split netcdf files.') -@click.option('-c', '--component', required=False, default=None, - help='component specified in yamlfile under postprocess:components. Needs to be the same component that contains the sources:history-file. Conflicts with --split-all-vars.') @click.option('-s', '--history-source', required=True, default=None, help='history-file specification under postprocess:components:type=component:sources in the fre postprocess config yamlfile. Used to match files in inputdir.') @click.option('-y', '--yamlfile', required=False, default=None, @@ -178,10 +176,10 @@ def histval(history,date_string,warn): help="Whether to search subdirs underneath $inputdir for netcdf files. Defaults to false. This option is used in flow.cylc when regridding.") @click.option('--split-all-vars', '-a', is_flag=True, default=False, help="Whether to ignore other config options and split all vars in the file. Defaults to false. Conflicts with -c, -s and -y options.") -def split_netcdf_wrapper(inputdir, outputdir, component, history_source, use_subdirs, yamlfile, split_all_vars): +def split_netcdf_wrapper(inputdir, outputdir,, history_source, use_subdirs, yamlfile, split_all_vars): ''' Splits all netcdf files matching the pattern specified by $history_source in $inputdir into files with a single data variable written to $outputdir. If $yamlfile contains - variable filtering settings under $component, only those variables specified will + variable filtering settings under $history_source, only those variables specified will be split into files for $outdir. If no variables in the variable filtering match vars in the netcdf files, no files will be written to $outdir. If --use-subdirs is set, netcdf files will be searched for in subdirs under $outdir. @@ -189,12 +187,11 @@ def split_netcdf_wrapper(inputdir, outputdir, component, history_source, use_sub This tool is intended for use in fre-workflows and assumes files to split have fre-specific naming conventions. For a more general tool, look at split-netcdf.''' if split_all_vars: - none_args = [component, yamlfile] + none_args = [yamlfile] if any([el is not None for el in none_args]): - fre_logger.error('''Error in split_netcdf_wrapper arg parsing: --split-all-vars was set and one or more of -mutually exclusive options --component and --yamlfile was also set! + fre_logger.error('''Error in split_netcdf_wrapper arg parsing: --split-all-vars was set and --yamlfile was also set! Either unset --split-all-vars or parse the varlist from the yaml - do not try do do both!''') - split_netcdf_script.split_netcdf(inputdir, outputdir, component, history_source, use_subdirs, yamlfile, split_all_vars) + split_netcdf_script.split_netcdf(inputdir, outputdir, history_source, use_subdirs, yamlfile, split_all_vars) #fre pp split-netcdf @pp_cli.command() diff --git a/fre/pp/split_netcdf_script.py b/fre/pp/split_netcdf_script.py index d2336e8c4..d0a5268ea 100644 --- a/fre/pp/split_netcdf_script.py +++ b/fre/pp/split_netcdf_script.py @@ -18,7 +18,8 @@ from itertools import chain import logging -from fre.app.helpers import get_variables +#from fre.app.helpers import get_variables +from fre.app.helpers import get_variables_hist_src fre_logger = logging.getLogger(__name__) @@ -80,12 +81,10 @@ def split_netcdf(inputDir, outputDir, component, history_source, use_subdirs, varlist = "all" else: ydict = yaml.safe_load(Path(yamlfile).read_text()) - vardict = get_variables(ydict, component) - if vardict is None or history_source not in vardict.keys(): - fre_logger.error(f"error: either component {component} not defined or source {history_source} not defined under component {component} in yamlfile {yamlfile}.") - raise ValueError(f"error: either component {component} not defined or source {history_source} not defined under component {component} in yamlfile {yamlfile}.") - else: - varlist = vardict[history_source] + varlist = get_variables_hist_src(ydict, history_source) + if varlist is None: + fre_logger.error(f"error: source {history_source} not defined in yamlfile {yamlfile}.") + raise ValueError(f"error: source {history_source} not defined in yamlfile {yamlfile}.") #extend globbing used to find both tiled and non-tiled files #all files that contain the current source:history_file name, From 6616068c1af0f690ca8ea91d857b23890c4050d3 Mon Sep 17 00:00:00 2001 From: Carolyn Whitlock Date: Wed, 10 Sep 2025 11:37:56 -0400 Subject: [PATCH 4/8] updating documentation, adding more tests --- fre/app/helpers.py | 16 +++++++++++----- fre/app/tests/test_helpers.py | 12 ++++++++++++ 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/fre/app/helpers.py b/fre/app/helpers.py index 959b1b6b8..0ace483ab 100644 --- a/fre/app/helpers.py +++ b/fre/app/helpers.py @@ -15,6 +15,7 @@ def get_variables(yml: dict, pp_comp: str) -> dict: :type yml: dict :param pp_comp: Active pp component :type pp_comp: str + :raises TypeError: if yml is not a dict (i.e. a path to a yamlfile) :raises ValueError: if the active pp component is not found in the yaml configuration :return: dictionary of {source name: [variables]} - the values are a list of specified variables @@ -69,6 +70,15 @@ def get_variables_hist_src(yml: dict, hist_src: str) -> list: are supposed to be unique, and it is needed because some tasks in flow.cylc (like split-netcdf) cycle on the flat list of all history_source elements, not then . + + :param yml: loaded yaml file + :type yml: dict + :param hist_src: name of a history_source file in that yamlfile + :type hist_src: string + :raises TypeError: if yml is not a dict (i.e. a path to a yamlfile) + :raises ValueError: if the history_source is not found in the yaml configuration + :return: either "all" or a list of variables associated with history_source + :rtype: string or list ''' fre_logger.debug(f"Yaml file information: {yml}") fre_logger.debug(f"history source: {hist_src}") @@ -77,12 +87,10 @@ def get_variables_hist_src(yml: dict, hist_src: str) -> list: is_found = False src_vars = [] for component_info in yml["postprocess"]["components"]: - print(component_info['type']) #it is assumed that hist_src is pre-filtered for active/inactive pp components src_info = component_info.get("sources") #timeseries case for src in src_info: - print(src) if src['history_file'] == hist_src: is_found = True if src.get("variables"): @@ -93,8 +101,6 @@ def get_variables_hist_src(yml: dict, hist_src: str) -> list: #static case if component_info.get("static"): static_el = component_info.get("static") - print(static_el[0]) - print(static_el[0].get("source")) if static_el[0].get("source") == hist_src: is_found = True if static_el[0].get("variables"): @@ -105,7 +111,7 @@ def get_variables_hist_src(yml: dict, hist_src: str) -> list: #using is_found rather than a check for length on the off chance that the #variable list is of length 0 without being caught earlier in the config if not is_found: - raise ValueError(f"history_file, {hist_src}, not found in pp yaml configuration!") + raise ValueError(f"history_file, {hist_src}, not found in pp yaml configuration {yml}!") return src_vars ## NOTE: For python 3.11 - this might be available already as contextlib.chdir() diff --git a/fre/app/tests/test_helpers.py b/fre/app/tests/test_helpers.py index 35a82dc0d..8798f9fb6 100644 --- a/fre/app/tests/test_helpers.py +++ b/fre/app/tests/test_helpers.py @@ -75,6 +75,18 @@ def test_get_variables_hist_src(hist_src, var_list): yml=yaml.safe_load(f) new_var_list = helpers.get_variables_hist_src(yml, hist_src) assert new_var_list == var_list + +def test_yaml_load_wrong_type(): + ''' + Checks that we get the right error raised when we try to read an unloaded yaml + for get_variables and get_variables_hist_src + ''' + with pytest.raises( Type Error ) as execinfo: + out1 = helpers.get_variables(YAML_EX, 'atmos_scalar_test_vars') + assert assert execinfo.type is TypeError + with pytest.raises( Type Error ) as execinfo2: + out1 = helpers.get_variables_hist_src(YAML_EX, 'atmos_scalar_test_vars') + assert assert execinfo2.type is TypeError def test_change_directory(): """ From 88ec37d3c29462ae023830b24aa1e99341c6f8ea Mon Sep 17 00:00:00 2001 From: Carolyn Whitlock Date: Thu, 11 Sep 2025 14:19:58 -0400 Subject: [PATCH 5/8] tweaking documenation, fixing syntax error in frepp --- fre/app/tests/test_helpers.py | 13 +++++-------- fre/pp/frepp.py | 2 +- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/fre/app/tests/test_helpers.py b/fre/app/tests/test_helpers.py index 8798f9fb6..d1f9c0e52 100644 --- a/fre/app/tests/test_helpers.py +++ b/fre/app/tests/test_helpers.py @@ -46,7 +46,7 @@ def test_get_variables(): out2 == expected_dicts[1], out3 == expected_dicts[2]]) -def test_get_variables_err(): +def test_get_variables_load_wrong_type(): """ Test get_variables() returns an error when given inappropriate input """ @@ -66,8 +66,8 @@ def test_get_variables_err(): def test_get_variables_hist_src(hist_src, var_list): ''' Test get_variables_hist_src() - - timeseries all, static all - - timeseries varlist, static varlist + - timeseries getting back all, static getting back all + - timeseries getting back varlist, static getting back varlist - hist_src not found in file (xfail) ''' # Load the yaml config @@ -76,14 +76,11 @@ def test_get_variables_hist_src(hist_src, var_list): new_var_list = helpers.get_variables_hist_src(yml, hist_src) assert new_var_list == var_list -def test_yaml_load_wrong_type(): +def test_get_var_hist_src_load_wrong_type(): ''' Checks that we get the right error raised when we try to read an unloaded yaml - for get_variables and get_variables_hist_src + for get_variables_hist_src ''' - with pytest.raises( Type Error ) as execinfo: - out1 = helpers.get_variables(YAML_EX, 'atmos_scalar_test_vars') - assert assert execinfo.type is TypeError with pytest.raises( Type Error ) as execinfo2: out1 = helpers.get_variables_hist_src(YAML_EX, 'atmos_scalar_test_vars') assert assert execinfo2.type is TypeError diff --git a/fre/pp/frepp.py b/fre/pp/frepp.py index 1176e76b7..09b958fd9 100644 --- a/fre/pp/frepp.py +++ b/fre/pp/frepp.py @@ -176,7 +176,7 @@ def histval(history,date_string,warn): help="Whether to search subdirs underneath $inputdir for netcdf files. Defaults to false. This option is used in flow.cylc when regridding.") @click.option('--split-all-vars', '-a', is_flag=True, default=False, help="Whether to ignore other config options and split all vars in the file. Defaults to false. Conflicts with -c, -s and -y options.") -def split_netcdf_wrapper(inputdir, outputdir,, history_source, use_subdirs, yamlfile, split_all_vars): +def split_netcdf_wrapper(inputdir, outputdir, history_source, use_subdirs, yamlfile, split_all_vars): ''' Splits all netcdf files matching the pattern specified by $history_source in $inputdir into files with a single data variable written to $outputdir. If $yamlfile contains variable filtering settings under $history_source, only those variables specified will From 8a31771cef323d708fd1bfe16a817409e3089c6c Mon Sep 17 00:00:00 2001 From: Carolyn Whitlock Date: Thu, 11 Sep 2025 14:26:57 -0400 Subject: [PATCH 6/8] adding more varnames to ok unknown words --- .cspell/ok-unknown-words.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.cspell/ok-unknown-words.txt b/.cspell/ok-unknown-words.txt index 60ba96b49..22c94293f 100644 --- a/.cspell/ok-unknown-words.txt +++ b/.cspell/ok-unknown-words.txt @@ -314,6 +314,7 @@ nncattr noaa NOAA noarch +nohist noleap notransfer notused @@ -462,6 +463,7 @@ tasmin TAVG testingtestingtestingtesting thefiles +thisfile timavg timavgcsh timeaveraged @@ -479,6 +481,7 @@ topdir tripolar truncatedate truncatedateformat +tseries turb twostep ucomp From 40152012d5e4a100d589994fbea84be2607fe067 Mon Sep 17 00:00:00 2001 From: Carolyn Whitlock Date: Thu, 11 Sep 2025 15:08:15 -0400 Subject: [PATCH 7/8] fixing syntax errors in test functions --- fre/app/tests/test_helpers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fre/app/tests/test_helpers.py b/fre/app/tests/test_helpers.py index d1f9c0e52..dcbbaab3a 100644 --- a/fre/app/tests/test_helpers.py +++ b/fre/app/tests/test_helpers.py @@ -81,9 +81,9 @@ def test_get_var_hist_src_load_wrong_type(): Checks that we get the right error raised when we try to read an unloaded yaml for get_variables_hist_src ''' - with pytest.raises( Type Error ) as execinfo2: + with pytest.raises( TypeError ) as execinfo2: out1 = helpers.get_variables_hist_src(YAML_EX, 'atmos_scalar_test_vars') - assert assert execinfo2.type is TypeError + assert execinfo2.type is TypeError def test_change_directory(): """ From 8d83684de0faacb61dd5c945df6917b6066a9a0e Mon Sep 17 00:00:00 2001 From: Carolyn Whitlock Date: Thu, 18 Sep 2025 15:32:43 -0400 Subject: [PATCH 8/8] pylint changes --- fre/app/helpers.py | 5 +---- fre/app/tests/test_helpers.py | 9 ++++----- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/fre/app/helpers.py b/fre/app/helpers.py index d4e5d4361..bad59e758 100644 --- a/fre/app/helpers.py +++ b/fre/app/helpers.py @@ -1,8 +1,5 @@ import os -from pathlib import Path -import yaml from contextlib import contextmanager -import sys # set up logging import logging @@ -108,7 +105,7 @@ def get_variables_hist_src(yml: dict, hist_src: str) -> list: else: src_vars = "all" break - #using is_found rather than a check for length on the off chance that the + #using is_found rather than a check for length on the off chance that the #variable list is of length 0 without being caught earlier in the config if not is_found: raise ValueError(f"history_file, {hist_src}, not found in pp yaml configuration {yml}!") diff --git a/fre/app/tests/test_helpers.py b/fre/app/tests/test_helpers.py index dcbbaab3a..42075e5ff 100644 --- a/fre/app/tests/test_helpers.py +++ b/fre/app/tests/test_helpers.py @@ -45,7 +45,7 @@ def test_get_variables(): out1 == expected_dicts[0], out2 == expected_dicts[1], out3 == expected_dicts[2]]) - + def test_get_variables_load_wrong_type(): """ Test get_variables() returns an error when given inappropriate input @@ -53,10 +53,9 @@ def test_get_variables_load_wrong_type(): with pytest.raises( TypeError ) as execinfo: out = helpers.get_variables(yml = YAML_EX, pp_comp = "test_param") assert execinfo.type is TypeError - - -@pytest.mark.parametrize("hist_src,var_list", - [ + +@pytest.mark.parametrize("hist_src,var_list", + [ pytest.param("atmos_month", "all", id="tseries-all"), pytest.param("atmos_static_scalar", "all", id="static-all"), pytest.param("atmos_scalar_test_vars",["co2mass"], id="tseries-varlist"),