From fe74cac73074e7210b1b73b591baad9a00d53931 Mon Sep 17 00:00:00 2001 From: aguinot Date: Sun, 22 Jan 2023 06:00:15 +0100 Subject: [PATCH 1/4] config parser First version of the config parser --- WeakLensingValidation/config_parser.py | 540 +++++++++++++++++++++++++ example/config_lensfit.yaml | 81 ++++ 2 files changed, 621 insertions(+) create mode 100644 WeakLensingValidation/config_parser.py create mode 100644 example/config_lensfit.yaml diff --git a/WeakLensingValidation/config_parser.py b/WeakLensingValidation/config_parser.py new file mode 100644 index 0000000..7470c7e --- /dev/null +++ b/WeakLensingValidation/config_parser.py @@ -0,0 +1,540 @@ +# Author: Axel Guinot (axel.guinot.astro@gmail.com) +# config parser +# Some of the ideas are inspired by the Galsim parser + +import yaml +import copy +import re +import os + +_main_fields = ['galaxy_catalog', 'star_catalog', 'mask_image'] +_catalog_fields = ['type', 'path', 'columns'] +_classic_gal_columns = [ + 'ra', + 'dec', + 'e1', + 'e2', + 'weights', + 'e1_psf', + 'e2_psf', + 'mag' +] +_classic_star_columns = [ + 'ra', + 'dec', + 'x', + 'y', + 'n_ccd', + 'e1_psf', + 'e2_psf', + 'size_psf', + 'e1_star', + 'e2_star', + 'size_star', +] + + +class ConfigParser(): + """ConfigParser + + Parse the input config file in .yaml format and build a dictionary. + + Parameters + ---------- + config_path : str + Path to the config file + """ + + def __init__( + self, + config_path=None, + ): + + if isinstance(config_path, type(None)): + raise ValueError("No config_path have been provided") + if not isinstance(config_path, str): + raise ValueError("config_path must be a string") + if not os.path.exists(config_path): + raise ValueError(f"No file found at: {config_path}") + + config_raw = self._read_yaml_file(config_path) + + self.parse_config(config_raw) + + def __str__(self): + + return yaml.dump(self.config) + + def __repr__(self): + + return self.__str__() + + def parse_config(self, config_raw): + """parse config + + Parse the yaml dictionnay and transform the output in something the + library can understand. + + Parameters + ---------- + config_raw : dict + raw output of the yaml loader + """ + + self.config = {} + + # Set workspace directory + self._parse_workspace(config_raw) + + # First get the variables + self._parse_variables(config_raw) + + # Galaxy catalogue + if 'galaxy_catalog' in config_raw.keys(): + self._parse_galaxy_catalog(config_raw) + # Star catalogue + if 'star_catalog' in config_raw.keys(): + self._parse_star_catalog(config_raw) + + def _parse_workspace(self, config_raw): + """parse workspace + + Setup the workspace directory and the name of the run. It also checks + if the only_plot option is set. + + Parameters + ---------- + config_raw : dict + raw output of the yaml loader + """ + + # Check run name + if 'run_name' not in config_raw.keys(): + raise ValueError("No run_name are provided") + if not isinstance(config_raw['run_name'], str): + raise ValueError("run_name must be a string") + run_name = config_raw['run_name'] + + # Check workspace + if 'workspace_directory' not in config_raw.keys(): + raise ValueError("No workspace_directory are provided") + if not isinstance(config_raw['workspace_directory'], str): + raise ValueError("workspace_directory must be a string") + workspace_dir_tmp = config_raw['workspace_directory'] + if not os.path.isdir(workspace_dir_tmp): + raise ValueError( + "The workspace path do not exist or is not a directory. " + f"Got: {workspace_dir_tmp}" + ) + workspace_dir = os.path.join(workspace_dir_tmp, run_name) + + if 'only_plot' in config_raw.keys(): + if not isinstance(config_raw['only_plot'], bool): + raise ValueError( + "only_plot must in [True, Fasle]. " + f"Got: {config_raw['only_plot']}" + ) + only_plot = config_raw['only_plot'] + else: + only_plot = False + + if os.path.exists(workspace_dir): + if only_plot: + raise ValueError( + "When using only_plot, the workspace has to an already" + f"existing directory. Got: {workspace_dir}" + ) + else: + try: + os.mkdir(workspace_dir) + except Exception as e: + raise ValueError( + "Error while creating the directory got the following " + f"exception:\n{e}" + ) + + config = { + 'path': workspace_dir, + 'run_name': run_name, + 'only_plot': only_plot, + } + + config = {'workspace': config} + self.config.update(config) + + def _parse_galaxy_catalog(self, config_raw): + """parse the galaxy catalog + + Read and store informations about the galaxy catalog. + + Parameters + ---------- + config_raw : dict + raw output of the yaml loader + """ + + config = {} + gal_dict = config_raw['galaxy_catalog'] + + # Make sure the necessary information are provided + if not all( + [ + needed_key in gal_dict.keys() + for needed_key in _catalog_fields + ] + ): + raise ValueError( + "The galaxy_catalog neeeds to have at least those entries: " + f"{_catalog_fields}" + ) + + # Parse the path + config['path'] = self._parse_path(gal_dict) + + if gal_dict["type"] == 'classic': + config["type"] = 'classic' + # Make sure the necessary columns are provided + for needed_key in _classic_gal_columns: + if needed_key not in gal_dict['columns'].keys(): + raise ValueError(f"Column {needed_key} not provided") + + # Now we go through all columns + # We cannot do it in the previous loop because more columns could + # be given + config["columns"] = {} + for key in gal_dict['columns'].keys(): + column_tmp = gal_dict['columns'][key] + + # Assign internal naming to catalog naming + if isinstance(column_tmp, str): + config["columns"][key] = column_tmp + + # Create a new column based on eval + # Note that the evaluation is not done at this stage because + # we don't have acces to the catalog yet. It will be complited + # later when the catalog is actually read. + elif isinstance(column_tmp, dict): + func, var = self._parse_eval(column_tmp, key) + config["columns"][key] = { + 'func': func, + 'var': var + } + config = {'galaxy_catalog': config} + self.config.update(config) + + def _parse_star_catalog(self, config_raw): + """parse the star catalog + + Read and store informations about the star catalog. + + Parameters + ---------- + config_raw : dict + raw output of the yaml loader + """ + + config = {} + star_dict = config_raw['star_catalog'] + + # Make sure the necessary information are provided + if not all( + [ + needed_key in star_dict.keys() + for needed_key in _catalog_fields + ] + ): + raise ValueError( + "The star_catalog neeeds to have at least those entries: " + f"{_catalog_fields}" + ) + + # Parse the path + config['path'] = self._parse_path(star_dict) + # Checks if the star information are consitent with the galaxy catalog + # if we have one + if 'galaxy_catalog' in self.config.keys(): + if not isinstance( + config['path']['path'], + type(self.config['galaxy_catalog']['path']['path']) + ): + raise ValueError( + "The format of the star catalog is not consistant with " + "the galaxy catalog" + ) + if isinstance(config['path']['path'], list): + if ( + len(config['path']['path']) != + len(self.config['galaxy_catalog']['path']['path']) + ): + raise ValueError( + "The number of star catalogs is not consistant with " + "the number of galaxy catalog" + ) + config['path']['keep_cat_history'] = \ + self.config['galaxy_catalog']['path']['keep_cat_history'] + config['path']['var_cat_history'] = \ + self.config['galaxy_catalog']['path']['var_cat_history'] + + if star_dict["type"] == 'classic': + config["type"] = 'classic' + # Make sure the necessary columns are provided + for needed_key in _classic_star_columns: + if needed_key not in star_dict['columns'].keys(): + raise ValueError(f"Column {needed_key} not provided") + + # Now we go through all columns + # We cannot do it in the previous loop because more columns could + # be given + config["columns"] = {} + for key in star_dict['columns'].keys(): + column_tmp = star_dict['columns'][key] + + # Assign internal naming to catalog naming + if isinstance(column_tmp, str): + config["columns"][key] = column_tmp + + # Create a new column based on eval + # Note that the evaluation is not done at this stage because + # we don't have acces to the catalog yet. It will be complited + # later when the catalog is actually read. + elif isinstance(column_tmp, dict): + func, var = self._parse_eval(column_tmp, key) + config["columns"][key] = { + 'func': func, + 'var': var + } + config = {'star_catalog': config} + self.config.update(config) + + def _read_yaml_file(self, path): + """read yaml file + + This method reads the input yaml file and return a raw dictionnary + which will be parse after. + + Parameters + ---------- + path : str + path to the config file + """ + + self._config_path = path + + with open(path) as f: + raw_dict = [c for c in yaml.load_all(f.read(), yaml.SafeLoader)] + + if len(raw_dict) != 1: + raise ValueError( + f"Error occured while reading config file at {path}" + ) + + return raw_dict[0] + + def _parse_variables(self, config_raw): + """parse variable + + Parse the variables defined in config file. + + Parameters + ---------- + config_raw : dict + raw output of the yaml loader + """ + + # check if variables are defined + var_keys = [key for key in config_raw.keys() if 'var' in key] + + if len(var_keys) != 0: + self._var = {} + for var_key in var_keys: + if not isinstance(config_raw[var_key], dict): + raise ValueError( + f"Unroconized format for variable {var_key}" + ) + + # Set the name of the variable + if 'name' not in config_raw[var_key].keys(): + raise ValueError( + f"No name found for varible {var_key}" + ) + if not isinstance(config_raw[var_key]['name'], str): + raise ValueError( + "Varibale name not of type string for variable " + f"{var_key}" + ) + var_name = config_raw[var_key]['name'] + + # Set the value fo the variable + if 'value' not in config_raw[var_key].keys(): + raise ValueError( + f"No value found for variable {var_key}" + ) + try: + var_val = eval(config_raw[var_key]['value']) + except Exception as e: + raise ValueError( + f"Error while evaluating value of variable {var_key}:" + f"\n{e}" + ) + + self._var[var_name] = var_val + + def _parse_path(self, cat_dict): + """parse path + + Parse the path of a catalog. + Path can be a list of paths or a function to evaluate. It is possible + to keep track of which objects belong to which catalog to use that in + the processing or the ploting. + + Parameters + ---------- + cat_dict : dict + Dictionnary containing information about a catalog + + Returns + ------- + path_output: dict + Output dictionnay with the information about the path of the + catalog(s) folowing the format: + - path: str or list + Path (or list of paths) of the catalog(s) + - keep_cat_history: bool + Weither to keep track of objects. Only for multiple + catalogs + - var_cat_history: list + list of int to keep the history of the catalogs. If not + set, a list is defined: [1, n_catalog] + """ + + path_output = { + 'path': '', + 'keep_cat_history': False, + 'var_cat_history': [], + } + + # Get the path + if isinstance(cat_dict['path'], str): + path_output['path'] = cat_dict['path'] + elif isinstance(cat_dict['path'], list): + path_output['path'] = cat_dict['path'] + elif isinstance(cat_dict['path'], dict): + path_output['path'] = self._parse_eval(cat_dict['path'], 'path') + else: + raise ValueError("path must either a string, list or dict") + + # handle keep_cat_history + if not ( + isinstance(cat_dict['path'], list) + or isinstance(cat_dict['path'], dict) + ): + return path_output + + if 'keep_cat_history' in cat_dict.keys(): + path_output['keep_cat_history'] = \ + cat_dict['keep_cat_history'] + + if not path_output['keep_cat_history']: + return path_output + + if 'var_cat_history' in cat_dict.keys(): + var_name = cat_dict['var_cat_history'] + if not isinstance(var_name, str): + raise ValueError( + f"var_cat_history not a string. Got: {var_name}" + ) + if var_name not in self._var.keys(): + raise ValueError( + "Variable for var_cat_history not defined. Got: " + f"{var_name}" + ) + var_history = self._var[var_name] + if not isinstance(var_history, list): + raise ValueError( + f"var_cat_history do not link to a list. Got: {var_name}" + ) + if len(var_history) != len(path_output['path']): + raise ValueError( + "Lenght of var_cat_history does not match number of " + "catalogs." + f"\nGot {len(var_history)} != {len(path_output['path'])}" + ) + path_output['var_cat_history'] = var_history + return path_output + # We set a default list if not provided + else: + n_cat = len(path_output['path']) + path_output['var_cat_history'] = list(range(1, n_cat+1)) + return path_output + + def _parse_eval(self, column, name): + """parse eval + + Parse column or path that will use the eval function. + Note that if the function contains variables, we first look for + general variables defined in the config file and then among the + catalog columns. + + Parameters + ---------- + column : dict + Dictionnary containing information about the eval + name: str + Name of variable to evaluate + Returns + ------- + res : float + Result of eval fucntion + func : str + Function to evaluate with the column name to use from the + catalog + var_names: list + List of the column names to replace in the function + """ + if 'type' not in column.keys(): + raise ValueError( + f"Unreconized type for {name}, missing type" + ) + if column['type'].lower() != 'eval': + raise ValueError( + f"Unreconized type for {name}, " + f"got {column['type']}" + ) + if 'func' not in column.keys(): + raise ValueError( + f"No function to evaluate for {name}" + ) + + # Copy the original string + func = copy.copy(column["func"]) + + # First check for variables + if '$' in func: + var_names = list(set(re.findall(r'\$(.*?)\$', func))) + for var_name in var_names: + if var_name in self._var.keys(): + func = func.replace( + f"${var_name}$", f"{self._var[var_name]}" + ) + var_names.remove(var_name) + if len(var_names) == 0: + try: + res = eval(func) + except Exception as e: + raise ValueError( + f"Error while evaluating function: \n{func}\n" + f"Got exception: \n{e}" + ) + return res + else: + return func, var_names + # Evaluate directly if no variables are found + else: + try: + res = eval(func) + except Exception as e: + raise ValueError( + f"Error while evaluating function: \n{func}\n" + f"Got exception: \n{e}" + ) + return res diff --git a/example/config_lensfit.yaml b/example/config_lensfit.yaml new file mode 100644 index 0000000..63c5420 --- /dev/null +++ b/example/config_lensfit.yaml @@ -0,0 +1,81 @@ +# Config file for lensfit analysis + +var-1: + name: patch + # Can handle int, float, str, list + # More complex variable would raise errors. The below example could be + # achieve by doing: + value: 'list(range(1, 3))' + # Just calling range will raise an error. + # value: '[1, 2]' + +# Name of the run +run_name: lensfit + +# The directory used for the run and store the results +workspace_directory: /Users/aguinot/Documents/weaklensingvalidation_test/workspace + +# If True, the processing is skiped and only the plot are done. +# For this option to work run as to be done with the same workspace_directory +# and run_name +# plot_only: True + +galaxy_catalog: + # If more than one catalog is provided they all need to have the same format + # path: 'galaxy_catalog_1.fits' + # path: ['galaxy_catalog_1.fits', 'galaxy_catalo_2.fits'] + path: + type: eval + func: "[f'galaxy_catalog_{i}.fits' for i in $patch$]" + # If keep_cat_history == True we keep track of the objects and from which + # catalog they come from. A variable is define internaly var_cat_history + # that can be used to make plot per catalog (see ploting below). + # var_cat_history can also be set manually. + # This feature only work if more than one catalog is provided. + keep_cat_history: True + var_cat_history: patch + + # Type of catalog + type: classic + + # Define columns and and link name in the catalog to internal naming + columns: + ra: ra + dec: dec + e1: e1 + e2: e2 + weights: w + e1_psf: + type: eval + # Variable are first checked among internal variable (define in config + # file) and then among the catalog columns + func: "($PSF_Q11$ - $PSF_Q22$) / ($PSF_Q11$ + $PSF_Q22$ + 2. * sqrt($PSF_Q11$ * $PSF_Q22$ - $PSF_Q12$**2))" + e2_psf: e2_psf + size_psf: fwhm_psf + mag: mag + +star_catalog: + path: + type: eval + func: "[f'star_catalog_{i}.fits' for i in $patch$]" + # This part is ignored for the star catalog if a galaxy catalog is provided. + # Everything set here will be updated based on the galaxy catalog. + # keep_cat_history: True + # var_cat_history: patch + + # Type of catalog + type: classic + + columns: + ra: ra + dec: dec + x: x + y: y + n_ccd: ccd_nb + e1_star: e1_s + e2_star: e2_s + size_star: fwhm_s + e1_psf: e1_psf + e2_psf: e2_psf + size_psf: fwhm_psf + From 51d0e8940e8dcd75ae76a4e434714add60deb820 Mon Sep 17 00:00:00 2001 From: aguinot Date: Tue, 24 Jan 2023 23:32:28 +0100 Subject: [PATCH 2/4] catalog First version of the Catalog class to deal with the input catalogs --- WeakLensingValidation/catalog.py | 209 +++++++++++++++++++++++++++++++ 1 file changed, 209 insertions(+) create mode 100644 WeakLensingValidation/catalog.py diff --git a/WeakLensingValidation/catalog.py b/WeakLensingValidation/catalog.py new file mode 100644 index 0000000..9d7bbe2 --- /dev/null +++ b/WeakLensingValidation/catalog.py @@ -0,0 +1,209 @@ +# Author: Axel Guinot (axel.guinot.astro@gmail.com) +# Catalogue + +from config_parser import ConfigParser + +# This is used with the eval function +import dask.array as da +import vaex +from vaex.convert import convert + +import numpy as np + +import os +import copy + + +class Catalog(): + """Catalog + + This class store a catalog as multiple DaskArrays. + This class is instenciate from a config file. + """ + + def __init__( + self, + path=None, + config=None, + params=None, + ): + + if isinstance(config, ConfigParser): + self._config = config.config + else: + raise ValueError( + f"config must be an instance of {ConfigParser}. " + f"Got: {type(config)}" + ) + + self.read_catalog() + + def __getitem__(self, key): + + if not isinstance(key, str): + raise KeyError("Key must be a string") + if key not in self.column_names: + raise KeyError(f"{key}") + return self._columns[key] + + def read_catalog(self): + + raise NotImplementedError + + def _read_catalog(self, cat_config): + """read catalog + + This method handle the reading of a catalog. It can read multiple + catalogs and return a single Dataframe. If requested, it will keep + track of the origin catalog for every objects. + + Parameters + ---------- + cat_config : dict + Configuration dictionnary + """ + + path_config = cat_config['path'] + + # Here we handle the opeinig of multiple files like vaex.open_many() + # but we also keep track of the catalog of origin for each objects if + # requested. + all_df = [] + for i, path in enumerate(path_config['path']): + # First we check the extension of the file. If not ".hdf5" we + # convert it. We cannot make memory mapped operations from ".fits" + # file. The converted file is put in the workspace directory. + ext = os.path.splitext(path)[1] + file_name = os.path.split(path)[1] + if ext != '.hdf5': + new_path = \ + self._config["workspace"]['path'] + '/' \ + + file_name + '.hdf5' + if ext != '.fits': + raise ValueError(f"Unreconized file format. Got: {ext}") + if not os.path.exists(new_path): + convert( + path_input=path, + fs_options_input={}, + fs_input=None, + path_output=new_path, + fs_options_output={}, + fs_output=None, + progress=False, + ) + else: + new_path = \ + self._config["workspace"]['path'] + '/' + file_name + if not os.path.exists(new_path): + os.symlink(path, new_path) + + # Now we handle the history if requested + df_tmp = vaex.open(new_path) + if path_config['keep_cat_history']: + # Check if it has alreay been added + if 'var_cat_history' not in df_tmp.column_names: + df_tmp['var_cat_history'] = \ + np.ones(len(df_tmp), dtype=int) \ + * path_config['var_cat_history'][i] + all_df.append(df_tmp) + self._df = vaex.concat(all_df) + + def _get_column(self, col_name, all_col_names): + """get column + + Build all the column of the catalog and convert them to DaskArray + + Parameters + ---------- + col_name : str, dict + Column name in the original catalog or dictionnary with a + description of how to build the column. + all_col_names : list + List of all the column names in the original catalog. + + Returns + ------- + dask.array + DaskArray of the column + """ + + if isinstance(col_name, str): + return self._df[col_name].to_dask_array() + elif isinstance(col_name, dict): + cat_tmp = {} + func = copy.copy(col_name['func']) + for var_name in col_name['var']: + cat_tmp[var_name] = self._df[var_name].to_dask_array() + func = func.replace( + f"${var_name}$", f"cat_tmp['{var_name}']" + ) + try: + res = eval(func) + except Exception as e: + raise ValueError( + f"Error while evaluating function: \n{func}\n" + f"Got exception: \n{e}" + ) + return res + + +class GalaxyCatalog(Catalog): + + def __init__( + self, + config=None + ): + + super().__init__(config=config) + + def read_catalog(self): + """read catalogue + + This function is called during the initialization and build the galaxy + catalog. + """ + + cat_config = self._config['galaxy_catalog'] + + # First we read the catalog with vaex + self._read_catalog(cat_config) + + # Now we set the column as dask arrays + self.column_names = list(cat_config['columns'].keys()) + self._columns = {} + for column_name in self.column_names: + self._columns[column_name] = self._get_column( + cat_config['columns'][column_name], + self.column_names, + ) + + +class StarCatalog(Catalog): + + def __init__( + self, + config=None + ): + + super().__init__(config=config) + + def read_catalog(self): + """read catalogue + + This function is called during the initialization and build the star + catalog. + """ + + cat_config = self._config['star_catalog'] + + # First we read the catalog with vaex + self._read_catalog(cat_config) + + # Now we set the column as dask arrays + self.column_names = list(cat_config['columns'].keys()) + self._columns = {} + for column_name in self.column_names: + self._columns[column_name] = self._get_column( + cat_config['columns'][column_name], + self.column_names, + ) From 1e9240a9208c254a6a67b46abeef009c87d08c3d Mon Sep 17 00:00:00 2001 From: Axel Guinot <39480528+aguinot@users.noreply.github.com> Date: Tue, 24 Jan 2023 23:36:30 +0100 Subject: [PATCH 3/4] Delete config_parser.py --- WeakLensingValidation/config_parser.py | 540 ------------------------- 1 file changed, 540 deletions(-) delete mode 100644 WeakLensingValidation/config_parser.py diff --git a/WeakLensingValidation/config_parser.py b/WeakLensingValidation/config_parser.py deleted file mode 100644 index 7470c7e..0000000 --- a/WeakLensingValidation/config_parser.py +++ /dev/null @@ -1,540 +0,0 @@ -# Author: Axel Guinot (axel.guinot.astro@gmail.com) -# config parser -# Some of the ideas are inspired by the Galsim parser - -import yaml -import copy -import re -import os - -_main_fields = ['galaxy_catalog', 'star_catalog', 'mask_image'] -_catalog_fields = ['type', 'path', 'columns'] -_classic_gal_columns = [ - 'ra', - 'dec', - 'e1', - 'e2', - 'weights', - 'e1_psf', - 'e2_psf', - 'mag' -] -_classic_star_columns = [ - 'ra', - 'dec', - 'x', - 'y', - 'n_ccd', - 'e1_psf', - 'e2_psf', - 'size_psf', - 'e1_star', - 'e2_star', - 'size_star', -] - - -class ConfigParser(): - """ConfigParser - - Parse the input config file in .yaml format and build a dictionary. - - Parameters - ---------- - config_path : str - Path to the config file - """ - - def __init__( - self, - config_path=None, - ): - - if isinstance(config_path, type(None)): - raise ValueError("No config_path have been provided") - if not isinstance(config_path, str): - raise ValueError("config_path must be a string") - if not os.path.exists(config_path): - raise ValueError(f"No file found at: {config_path}") - - config_raw = self._read_yaml_file(config_path) - - self.parse_config(config_raw) - - def __str__(self): - - return yaml.dump(self.config) - - def __repr__(self): - - return self.__str__() - - def parse_config(self, config_raw): - """parse config - - Parse the yaml dictionnay and transform the output in something the - library can understand. - - Parameters - ---------- - config_raw : dict - raw output of the yaml loader - """ - - self.config = {} - - # Set workspace directory - self._parse_workspace(config_raw) - - # First get the variables - self._parse_variables(config_raw) - - # Galaxy catalogue - if 'galaxy_catalog' in config_raw.keys(): - self._parse_galaxy_catalog(config_raw) - # Star catalogue - if 'star_catalog' in config_raw.keys(): - self._parse_star_catalog(config_raw) - - def _parse_workspace(self, config_raw): - """parse workspace - - Setup the workspace directory and the name of the run. It also checks - if the only_plot option is set. - - Parameters - ---------- - config_raw : dict - raw output of the yaml loader - """ - - # Check run name - if 'run_name' not in config_raw.keys(): - raise ValueError("No run_name are provided") - if not isinstance(config_raw['run_name'], str): - raise ValueError("run_name must be a string") - run_name = config_raw['run_name'] - - # Check workspace - if 'workspace_directory' not in config_raw.keys(): - raise ValueError("No workspace_directory are provided") - if not isinstance(config_raw['workspace_directory'], str): - raise ValueError("workspace_directory must be a string") - workspace_dir_tmp = config_raw['workspace_directory'] - if not os.path.isdir(workspace_dir_tmp): - raise ValueError( - "The workspace path do not exist or is not a directory. " - f"Got: {workspace_dir_tmp}" - ) - workspace_dir = os.path.join(workspace_dir_tmp, run_name) - - if 'only_plot' in config_raw.keys(): - if not isinstance(config_raw['only_plot'], bool): - raise ValueError( - "only_plot must in [True, Fasle]. " - f"Got: {config_raw['only_plot']}" - ) - only_plot = config_raw['only_plot'] - else: - only_plot = False - - if os.path.exists(workspace_dir): - if only_plot: - raise ValueError( - "When using only_plot, the workspace has to an already" - f"existing directory. Got: {workspace_dir}" - ) - else: - try: - os.mkdir(workspace_dir) - except Exception as e: - raise ValueError( - "Error while creating the directory got the following " - f"exception:\n{e}" - ) - - config = { - 'path': workspace_dir, - 'run_name': run_name, - 'only_plot': only_plot, - } - - config = {'workspace': config} - self.config.update(config) - - def _parse_galaxy_catalog(self, config_raw): - """parse the galaxy catalog - - Read and store informations about the galaxy catalog. - - Parameters - ---------- - config_raw : dict - raw output of the yaml loader - """ - - config = {} - gal_dict = config_raw['galaxy_catalog'] - - # Make sure the necessary information are provided - if not all( - [ - needed_key in gal_dict.keys() - for needed_key in _catalog_fields - ] - ): - raise ValueError( - "The galaxy_catalog neeeds to have at least those entries: " - f"{_catalog_fields}" - ) - - # Parse the path - config['path'] = self._parse_path(gal_dict) - - if gal_dict["type"] == 'classic': - config["type"] = 'classic' - # Make sure the necessary columns are provided - for needed_key in _classic_gal_columns: - if needed_key not in gal_dict['columns'].keys(): - raise ValueError(f"Column {needed_key} not provided") - - # Now we go through all columns - # We cannot do it in the previous loop because more columns could - # be given - config["columns"] = {} - for key in gal_dict['columns'].keys(): - column_tmp = gal_dict['columns'][key] - - # Assign internal naming to catalog naming - if isinstance(column_tmp, str): - config["columns"][key] = column_tmp - - # Create a new column based on eval - # Note that the evaluation is not done at this stage because - # we don't have acces to the catalog yet. It will be complited - # later when the catalog is actually read. - elif isinstance(column_tmp, dict): - func, var = self._parse_eval(column_tmp, key) - config["columns"][key] = { - 'func': func, - 'var': var - } - config = {'galaxy_catalog': config} - self.config.update(config) - - def _parse_star_catalog(self, config_raw): - """parse the star catalog - - Read and store informations about the star catalog. - - Parameters - ---------- - config_raw : dict - raw output of the yaml loader - """ - - config = {} - star_dict = config_raw['star_catalog'] - - # Make sure the necessary information are provided - if not all( - [ - needed_key in star_dict.keys() - for needed_key in _catalog_fields - ] - ): - raise ValueError( - "The star_catalog neeeds to have at least those entries: " - f"{_catalog_fields}" - ) - - # Parse the path - config['path'] = self._parse_path(star_dict) - # Checks if the star information are consitent with the galaxy catalog - # if we have one - if 'galaxy_catalog' in self.config.keys(): - if not isinstance( - config['path']['path'], - type(self.config['galaxy_catalog']['path']['path']) - ): - raise ValueError( - "The format of the star catalog is not consistant with " - "the galaxy catalog" - ) - if isinstance(config['path']['path'], list): - if ( - len(config['path']['path']) != - len(self.config['galaxy_catalog']['path']['path']) - ): - raise ValueError( - "The number of star catalogs is not consistant with " - "the number of galaxy catalog" - ) - config['path']['keep_cat_history'] = \ - self.config['galaxy_catalog']['path']['keep_cat_history'] - config['path']['var_cat_history'] = \ - self.config['galaxy_catalog']['path']['var_cat_history'] - - if star_dict["type"] == 'classic': - config["type"] = 'classic' - # Make sure the necessary columns are provided - for needed_key in _classic_star_columns: - if needed_key not in star_dict['columns'].keys(): - raise ValueError(f"Column {needed_key} not provided") - - # Now we go through all columns - # We cannot do it in the previous loop because more columns could - # be given - config["columns"] = {} - for key in star_dict['columns'].keys(): - column_tmp = star_dict['columns'][key] - - # Assign internal naming to catalog naming - if isinstance(column_tmp, str): - config["columns"][key] = column_tmp - - # Create a new column based on eval - # Note that the evaluation is not done at this stage because - # we don't have acces to the catalog yet. It will be complited - # later when the catalog is actually read. - elif isinstance(column_tmp, dict): - func, var = self._parse_eval(column_tmp, key) - config["columns"][key] = { - 'func': func, - 'var': var - } - config = {'star_catalog': config} - self.config.update(config) - - def _read_yaml_file(self, path): - """read yaml file - - This method reads the input yaml file and return a raw dictionnary - which will be parse after. - - Parameters - ---------- - path : str - path to the config file - """ - - self._config_path = path - - with open(path) as f: - raw_dict = [c for c in yaml.load_all(f.read(), yaml.SafeLoader)] - - if len(raw_dict) != 1: - raise ValueError( - f"Error occured while reading config file at {path}" - ) - - return raw_dict[0] - - def _parse_variables(self, config_raw): - """parse variable - - Parse the variables defined in config file. - - Parameters - ---------- - config_raw : dict - raw output of the yaml loader - """ - - # check if variables are defined - var_keys = [key for key in config_raw.keys() if 'var' in key] - - if len(var_keys) != 0: - self._var = {} - for var_key in var_keys: - if not isinstance(config_raw[var_key], dict): - raise ValueError( - f"Unroconized format for variable {var_key}" - ) - - # Set the name of the variable - if 'name' not in config_raw[var_key].keys(): - raise ValueError( - f"No name found for varible {var_key}" - ) - if not isinstance(config_raw[var_key]['name'], str): - raise ValueError( - "Varibale name not of type string for variable " - f"{var_key}" - ) - var_name = config_raw[var_key]['name'] - - # Set the value fo the variable - if 'value' not in config_raw[var_key].keys(): - raise ValueError( - f"No value found for variable {var_key}" - ) - try: - var_val = eval(config_raw[var_key]['value']) - except Exception as e: - raise ValueError( - f"Error while evaluating value of variable {var_key}:" - f"\n{e}" - ) - - self._var[var_name] = var_val - - def _parse_path(self, cat_dict): - """parse path - - Parse the path of a catalog. - Path can be a list of paths or a function to evaluate. It is possible - to keep track of which objects belong to which catalog to use that in - the processing or the ploting. - - Parameters - ---------- - cat_dict : dict - Dictionnary containing information about a catalog - - Returns - ------- - path_output: dict - Output dictionnay with the information about the path of the - catalog(s) folowing the format: - - path: str or list - Path (or list of paths) of the catalog(s) - - keep_cat_history: bool - Weither to keep track of objects. Only for multiple - catalogs - - var_cat_history: list - list of int to keep the history of the catalogs. If not - set, a list is defined: [1, n_catalog] - """ - - path_output = { - 'path': '', - 'keep_cat_history': False, - 'var_cat_history': [], - } - - # Get the path - if isinstance(cat_dict['path'], str): - path_output['path'] = cat_dict['path'] - elif isinstance(cat_dict['path'], list): - path_output['path'] = cat_dict['path'] - elif isinstance(cat_dict['path'], dict): - path_output['path'] = self._parse_eval(cat_dict['path'], 'path') - else: - raise ValueError("path must either a string, list or dict") - - # handle keep_cat_history - if not ( - isinstance(cat_dict['path'], list) - or isinstance(cat_dict['path'], dict) - ): - return path_output - - if 'keep_cat_history' in cat_dict.keys(): - path_output['keep_cat_history'] = \ - cat_dict['keep_cat_history'] - - if not path_output['keep_cat_history']: - return path_output - - if 'var_cat_history' in cat_dict.keys(): - var_name = cat_dict['var_cat_history'] - if not isinstance(var_name, str): - raise ValueError( - f"var_cat_history not a string. Got: {var_name}" - ) - if var_name not in self._var.keys(): - raise ValueError( - "Variable for var_cat_history not defined. Got: " - f"{var_name}" - ) - var_history = self._var[var_name] - if not isinstance(var_history, list): - raise ValueError( - f"var_cat_history do not link to a list. Got: {var_name}" - ) - if len(var_history) != len(path_output['path']): - raise ValueError( - "Lenght of var_cat_history does not match number of " - "catalogs." - f"\nGot {len(var_history)} != {len(path_output['path'])}" - ) - path_output['var_cat_history'] = var_history - return path_output - # We set a default list if not provided - else: - n_cat = len(path_output['path']) - path_output['var_cat_history'] = list(range(1, n_cat+1)) - return path_output - - def _parse_eval(self, column, name): - """parse eval - - Parse column or path that will use the eval function. - Note that if the function contains variables, we first look for - general variables defined in the config file and then among the - catalog columns. - - Parameters - ---------- - column : dict - Dictionnary containing information about the eval - name: str - Name of variable to evaluate - Returns - ------- - res : float - Result of eval fucntion - func : str - Function to evaluate with the column name to use from the - catalog - var_names: list - List of the column names to replace in the function - """ - if 'type' not in column.keys(): - raise ValueError( - f"Unreconized type for {name}, missing type" - ) - if column['type'].lower() != 'eval': - raise ValueError( - f"Unreconized type for {name}, " - f"got {column['type']}" - ) - if 'func' not in column.keys(): - raise ValueError( - f"No function to evaluate for {name}" - ) - - # Copy the original string - func = copy.copy(column["func"]) - - # First check for variables - if '$' in func: - var_names = list(set(re.findall(r'\$(.*?)\$', func))) - for var_name in var_names: - if var_name in self._var.keys(): - func = func.replace( - f"${var_name}$", f"{self._var[var_name]}" - ) - var_names.remove(var_name) - if len(var_names) == 0: - try: - res = eval(func) - except Exception as e: - raise ValueError( - f"Error while evaluating function: \n{func}\n" - f"Got exception: \n{e}" - ) - return res - else: - return func, var_names - # Evaluate directly if no variables are found - else: - try: - res = eval(func) - except Exception as e: - raise ValueError( - f"Error while evaluating function: \n{func}\n" - f"Got exception: \n{e}" - ) - return res From ddc1fa18b6609f5cd71717191c0548eddcccf82c Mon Sep 17 00:00:00 2001 From: Axel Guinot <39480528+aguinot@users.noreply.github.com> Date: Tue, 24 Jan 2023 23:36:39 +0100 Subject: [PATCH 4/4] Delete config_lensfit.yaml --- example/config_lensfit.yaml | 81 ------------------------------------- 1 file changed, 81 deletions(-) delete mode 100644 example/config_lensfit.yaml diff --git a/example/config_lensfit.yaml b/example/config_lensfit.yaml deleted file mode 100644 index 63c5420..0000000 --- a/example/config_lensfit.yaml +++ /dev/null @@ -1,81 +0,0 @@ -# Config file for lensfit analysis - -var-1: - name: patch - # Can handle int, float, str, list - # More complex variable would raise errors. The below example could be - # achieve by doing: - value: 'list(range(1, 3))' - # Just calling range will raise an error. - # value: '[1, 2]' - -# Name of the run -run_name: lensfit - -# The directory used for the run and store the results -workspace_directory: /Users/aguinot/Documents/weaklensingvalidation_test/workspace - -# If True, the processing is skiped and only the plot are done. -# For this option to work run as to be done with the same workspace_directory -# and run_name -# plot_only: True - -galaxy_catalog: - # If more than one catalog is provided they all need to have the same format - # path: 'galaxy_catalog_1.fits' - # path: ['galaxy_catalog_1.fits', 'galaxy_catalo_2.fits'] - path: - type: eval - func: "[f'galaxy_catalog_{i}.fits' for i in $patch$]" - # If keep_cat_history == True we keep track of the objects and from which - # catalog they come from. A variable is define internaly var_cat_history - # that can be used to make plot per catalog (see ploting below). - # var_cat_history can also be set manually. - # This feature only work if more than one catalog is provided. - keep_cat_history: True - var_cat_history: patch - - # Type of catalog - type: classic - - # Define columns and and link name in the catalog to internal naming - columns: - ra: ra - dec: dec - e1: e1 - e2: e2 - weights: w - e1_psf: - type: eval - # Variable are first checked among internal variable (define in config - # file) and then among the catalog columns - func: "($PSF_Q11$ - $PSF_Q22$) / ($PSF_Q11$ + $PSF_Q22$ + 2. * sqrt($PSF_Q11$ * $PSF_Q22$ - $PSF_Q12$**2))" - e2_psf: e2_psf - size_psf: fwhm_psf - mag: mag - -star_catalog: - path: - type: eval - func: "[f'star_catalog_{i}.fits' for i in $patch$]" - # This part is ignored for the star catalog if a galaxy catalog is provided. - # Everything set here will be updated based on the galaxy catalog. - # keep_cat_history: True - # var_cat_history: patch - - # Type of catalog - type: classic - - columns: - ra: ra - dec: dec - x: x - y: y - n_ccd: ccd_nb - e1_star: e1_s - e2_star: e2_s - size_star: fwhm_s - e1_psf: e1_psf - e2_psf: e2_psf - size_psf: fwhm_psf -