diff --git a/fre/pp/checkout_script.py b/fre/pp/checkout_script.py index f46d77206..c58df1729 100644 --- a/fre/pp/checkout_script.py +++ b/fre/pp/checkout_script.py @@ -1,12 +1,20 @@ ''' Description: Checkout script which accounts for 4 different scenarios: -1. branch not given, folder does not exist, -2. branch given, folder does not exist, -3. branch not given, folder exists, -4. branch given and folder exists + default repo: + default branch: + checkout folder does not exist + checkout folder exists + checkout folder exists, --force-update set to True + non-default branch: + ... (same 3 combos) + non-default repo + ... (same 6 combos) +Non-default repo in combination with default branch is likely to give you +an error, but that's arguably a user problem ''' import os import subprocess +improt re import logging fre_logger = logging.getLogger(__name__) @@ -15,7 +23,8 @@ FRE_WORKFLOWS_URL = 'https://github.com/NOAA-GFDL/fre-workflows.git' -def checkout_template(experiment = None, platform = None, target = None, branch = None): +def checkout_template(experiment = None, platform = None, target = None, + branch = None, repo = None, force_update = False): """ Checkout the workflow template files from the repo """ @@ -29,11 +38,20 @@ def checkout_template(experiment = None, platform = None, target = None, branch fre_logger.info(f"default tag is '{default_tag}'") else: fre_logger.info(f"requested branch/tag is '{branch}'") + + #repo parameters: + if repo is None: + repo = FRE_WORKFLOWS_URL + fre_logger.info(f"default repo is '{FRE_WORKFLOWS_URL}'") + else: + regex = ".*github.com/NOAA-GFDL/.*\.git" + if re.match(regex, repo) is None: + fre_logger.error(f"error in checkout_template: repo {repo} is not under github.com/NOAA-GFDL!") # check args + set the name of the directory if None in [experiment, platform, target]: os.chdir(go_back_here) - raise ValueError( 'one of these are None: experiment / platform / target = \n' + raise ValueError( 'one or more of these are not set: experiment / platform / target = \n' f'{experiment} / {platform} / {target}' ) name = f"{experiment}__{platform}__{target}" @@ -46,43 +64,69 @@ def checkout_template(experiment = None, platform = None, target = None, branch '(checkoutScript) directory {directory} wasnt able to be created. exit!') from exc finally: os.chdir(go_back_here) + + clonedir = f'{directory}/{name}' + checkout_exists = os.path.isdir(clonedir) - checkout_exists = os.path.isdir(f'{directory}/{name}') - - if not checkout_exists: # scenarios 1+2, checkout doesn't exist, branch specified (or not) + if not checkout_exists: #checkout doesn't exist fre_logger.info('checkout does not yet exist; will create now') - clone_output = subprocess.run( ['git', 'clone','--recursive', - f'--branch={git_clone_branch_arg}', - FRE_WORKFLOWS_URL, f'{directory}/{name}'], - capture_output = True, text = True, check = True) - fre_logger.info(f'{clone_output}') + clone_fre_workflows(clonedir, repo, git_clone_branch_arg) - else: # the repo checkout does exist, scenarios 3 and 4. + else: #checkout does exist os.chdir(f'{directory}/{name}') # capture the branch and tag # if either match git_clone_branch_arg, then success. otherwise, fail. - current_tag = subprocess.run(["git","describe","--tags"], + current_tag = subprocess.run(["git -C $directory","describe","--tags"], capture_output = True, text = True, check = True).stdout.strip() current_branch = subprocess.run(["git", "branch", "--show-current"], capture_output = True, text = True, check = True).stdout.strip() + + #TODO: this also needs a check to make sure that the repo is the same. if current_tag == git_clone_branch_arg or current_branch == git_clone_branch_arg: fre_logger.info(f"checkout exists ('{directory}/{name}'), and matches '{git_clone_branch_arg}'") + if force_update: + fre_logger.info( + f"Forcing an update of the branch {branch} from repo {repo}") + update_fre_workflows(clonedir, repo, git_clone_branch_arg) else: fre_logger.info( - f"ERROR: checkout exists ('{directory}/{name}') and does not match '{git_clone_branch_arg}'") + f"ERROR: experiment checkout exists ('{directory}/{name}') and does not match '{git_clone_branch_arg}'") fre_logger.info( f"ERROR: current branch is '{current_branch}', current tag-describe is '{current_tag}'") + fre_logger.info( + f"You can fix this by running your config with a new experiment name (-e) or using --force-update") os.chdir(go_back_here) - raise ValueError('neither tag nor branch matches the git clone branch arg') #exit(1) + raise ValueError('neither tag nor branch matches the git clone branch arg') # make sure we are back where we should be if os.getcwd() != go_back_here: os.chdir(go_back_here) + +def clone_fre_workflows(clone_loc, repo, branch, limit_checkout_size=False): + ''' + Clones fre-workflows into a location $clone_loc from $repo and $branch. + ''' + git_command = f"git clone --recursive --branch {branch}}" + if limit_checkout_size: + git_command += " --depth 1 --shallow-submodules --filter=blob:none --no-tags" + git_command += f" {repo} {clone_loc}" + fre_logger.info(git_command) + clone_output = subprocess.run(git_command.split(" "), + capture_output=True, text=True, check=True) + fre_logger.info(f'{clone_output}') + +def update_fre_workflows(clone_loc, repo, branch): + ''' + Does a git pull of the current branch from the repo; needed if the code + is going to update (i.e. if you're testing a bugfix) + git -C does a pushd and popd internal ot the git command + ''' + git_command = f"git -C {clone_loc} pull" ############################################# diff --git a/fre/pp/frepp.py b/fre/pp/frepp.py index 79588e2c0..82b6f7b30 100644 --- a/fre/pp/frepp.py +++ b/fre/pp/frepp.py @@ -131,9 +131,18 @@ def configure_yaml(yamlfile,experiment,platform,target): @click.option("-b", "--branch", type =str, required=False, default = None, help="fre-workflows branch/tag to clone; default is $(fre --version)") -def checkout(experiment, platform, target, branch=None): +@click.option("-r", "--repo", type =str, + required=False, default = None, + help="fre-workflows repository to clone from; default is https://github.com/NOAA-GFDL/fre-workflows.git") +def checkout(experiment, platform, target, branch=None, repo=None): """ Execute fre pp checkout + -e, -p, -t: experiment, platform, target as used elsewhere in fre pp tools + -b, --branch: fre-workflows branch to clone. Default is the version of the + fre module you have loaded. This argument is expected to use non-default + values for production work. + -r, --repo: + --force-update: Whether to force an update of already-checked-out code """ checkout_script.checkout_template(experiment, platform, target, branch) diff --git a/fre/pp/split_netcdf_script.py b/fre/pp/split_netcdf_script.py index 750e2b437..87fb70b56 100644 --- a/fre/pp/split_netcdf_script.py +++ b/fre/pp/split_netcdf_script.py @@ -31,13 +31,17 @@ def split_netcdf(inputDir, outputDir, component, history_source, use_subdirs, file naming conventions Sample infile name convention: "19790101.atmos_tracer.tile6.nc" inputDir - directory containg netcdf files - outputDir - directory to which to write netcdf files + outputDir - directory to which to write netcdf files. Is created if it does + not yet exist. component - the 'component' element we are currently working with in the yaml history_source - a history_file under a 'source' under the 'component' that we are working with. Is used to identify the files in inputDir. use_subdirs - whether to recursively search through inputDir under the subdirectories. used when regridding. yamlfile - a .yml config file for fre postprocessing + split_all_vars - whether to split all data vars in the file into separate files + without parsing a yamlfile for the varlist. Skips parsing any component and + yamlfile args; equivalent to passing a varlist of "all" in the yamlfile. ''' #Verify input/output dirs exist and are dirs @@ -45,10 +49,18 @@ def split_netcdf(inputDir, outputDir, component, history_source, use_subdirs, fre_logger.error(f"error: input dir {inputDir} does not exist or is not a directory") raise OSError(f"error: input dir {inputDir} does not exist or is not a directory") if not (os.path.isdir(outputDir)): - fre_logger.error(f"error: output dir {outputDir} does not exist or is not a directory") - raise OSError(f"error: output dir {outputDir} does not exist or is not a directory") - - #Find files to split + if os.path.isfile(inputDir): + fre_logger.error(f"error: output dir {outputDir} is a file. Please specify a directory.") + else: + fre_logger.info(f"output dir {outputDir} does not exist. Creating now.") + try: + os.makedirs(outputDir) + except OSError as err: + fre_logger.error(f"error: Can't create {ouputDir}: {err}") + else: + if not os.access(outputDir, os.W_OK): + fre_logger.error(f"error: cannot write to output dir {outputDir}") + curr_dir = os.getcwd() workdir = os.path.abspath(inputDir)