From fe74cac73074e7210b1b73b591baad9a00d53931 Mon Sep 17 00:00:00 2001
From: aguinot <axel.guinot.astro@gmail.com>
Date: Sun, 22 Jan 2023 06:00:15 +0100
Subject: [PATCH 1/4] config parser

First version of the config parser
---
 WeakLensingValidation/config_parser.py | 540 +++++++++++++++++++++++++
 example/config_lensfit.yaml            |  81 ++++
 2 files changed, 621 insertions(+)
 create mode 100644 WeakLensingValidation/config_parser.py
 create mode 100644 example/config_lensfit.yaml

diff --git a/WeakLensingValidation/config_parser.py b/WeakLensingValidation/config_parser.py
new file mode 100644
index 0000000..7470c7e
--- /dev/null
+++ b/WeakLensingValidation/config_parser.py
@@ -0,0 +1,540 @@
+# Author: Axel Guinot (axel.guinot.astro@gmail.com)
+# config parser
+# Some of the ideas are inspired by the Galsim parser
+
+import yaml
+import copy
+import re
+import os
+
+_main_fields = ['galaxy_catalog', 'star_catalog', 'mask_image']
+_catalog_fields = ['type', 'path', 'columns']
+_classic_gal_columns = [
+    'ra',
+    'dec',
+    'e1',
+    'e2',
+    'weights',
+    'e1_psf',
+    'e2_psf',
+    'mag'
+]
+_classic_star_columns = [
+    'ra',
+    'dec',
+    'x',
+    'y',
+    'n_ccd',
+    'e1_psf',
+    'e2_psf',
+    'size_psf',
+    'e1_star',
+    'e2_star',
+    'size_star',
+]
+
+
+class ConfigParser():
+    """ConfigParser
+
+    Parse the input config file in .yaml format and build a dictionary.
+
+    Parameters
+    ----------
+    config_path : str
+        Path to the config file
+    """
+
+    def __init__(
+        self,
+        config_path=None,
+    ):
+
+        if isinstance(config_path, type(None)):
+            raise ValueError("No config_path have been provided")
+        if not isinstance(config_path, str):
+            raise ValueError("config_path must be a string")
+        if not os.path.exists(config_path):
+            raise ValueError(f"No file found at: {config_path}")
+
+        config_raw = self._read_yaml_file(config_path)
+
+        self.parse_config(config_raw)
+
+    def __str__(self):
+
+        return yaml.dump(self.config)
+
+    def __repr__(self):
+
+        return self.__str__()
+
+    def parse_config(self, config_raw):
+        """parse config
+
+        Parse the yaml dictionnay and transform the output in something the
+        library can understand.
+
+        Parameters
+        ----------
+            config_raw : dict
+                raw output of the yaml loader
+        """
+
+        self.config = {}
+
+        # Set workspace directory
+        self._parse_workspace(config_raw)
+
+        # First get the variables
+        self._parse_variables(config_raw)
+
+        # Galaxy catalogue
+        if 'galaxy_catalog' in config_raw.keys():
+            self._parse_galaxy_catalog(config_raw)
+        # Star catalogue
+        if 'star_catalog' in config_raw.keys():
+            self._parse_star_catalog(config_raw)
+
+    def _parse_workspace(self, config_raw):
+        """parse workspace
+
+        Setup the workspace directory and the name of the run. It also checks
+        if the only_plot option is set.
+
+        Parameters
+        ----------
+        config_raw : dict
+            raw output of the yaml loader
+        """
+
+        # Check run name
+        if 'run_name' not in config_raw.keys():
+            raise ValueError("No run_name are provided")
+        if not isinstance(config_raw['run_name'], str):
+            raise ValueError("run_name must be a string")
+        run_name = config_raw['run_name']
+
+        # Check workspace
+        if 'workspace_directory' not in config_raw.keys():
+            raise ValueError("No workspace_directory are provided")
+        if not isinstance(config_raw['workspace_directory'], str):
+            raise ValueError("workspace_directory must be a string")
+        workspace_dir_tmp = config_raw['workspace_directory']
+        if not os.path.isdir(workspace_dir_tmp):
+            raise ValueError(
+                "The workspace path do not exist or is not a directory. "
+                f"Got: {workspace_dir_tmp}"
+            )
+        workspace_dir = os.path.join(workspace_dir_tmp, run_name)
+
+        if 'only_plot' in config_raw.keys():
+            if not isinstance(config_raw['only_plot'], bool):
+                raise ValueError(
+                    "only_plot must in [True, Fasle]. "
+                    f"Got: {config_raw['only_plot']}"
+                )
+            only_plot = config_raw['only_plot']
+        else:
+            only_plot = False
+
+        if os.path.exists(workspace_dir):
+            if only_plot:
+                raise ValueError(
+                    "When using only_plot, the workspace has to an already"
+                    f"existing directory. Got: {workspace_dir}"
+                )
+        else:
+            try:
+                os.mkdir(workspace_dir)
+            except Exception as e:
+                raise ValueError(
+                    "Error while creating the directory got the following "
+                    f"exception:\n{e}"
+                )
+
+        config = {
+            'path': workspace_dir,
+            'run_name': run_name,
+            'only_plot': only_plot,
+        }
+
+        config = {'workspace': config}
+        self.config.update(config)
+
+    def _parse_galaxy_catalog(self, config_raw):
+        """parse the galaxy catalog
+
+        Read and store informations about the galaxy catalog.
+
+        Parameters
+        ----------
+            config_raw : dict
+                raw output of the yaml loader
+        """
+
+        config = {}
+        gal_dict = config_raw['galaxy_catalog']
+
+        # Make sure the necessary information are provided
+        if not all(
+            [
+                needed_key in gal_dict.keys()
+                for needed_key in _catalog_fields
+            ]
+        ):
+            raise ValueError(
+                "The galaxy_catalog neeeds to have at least those entries: "
+                f"{_catalog_fields}"
+                )
+
+        # Parse the path
+        config['path'] = self._parse_path(gal_dict)
+
+        if gal_dict["type"] == 'classic':
+            config["type"] = 'classic'
+            # Make sure the necessary columns are provided
+            for needed_key in _classic_gal_columns:
+                if needed_key not in gal_dict['columns'].keys():
+                    raise ValueError(f"Column {needed_key} not provided")
+
+            # Now we go through all columns
+            # We cannot do it in the previous loop because more columns could
+            # be given
+            config["columns"] = {}
+            for key in gal_dict['columns'].keys():
+                column_tmp = gal_dict['columns'][key]
+
+                # Assign internal naming to catalog naming
+                if isinstance(column_tmp, str):
+                    config["columns"][key] = column_tmp
+
+                # Create a new column based on eval
+                # Note that the evaluation is not done at this stage because
+                # we don't have acces to the catalog yet. It will be complited
+                # later when the catalog is actually read.
+                elif isinstance(column_tmp, dict):
+                    func, var = self._parse_eval(column_tmp, key)
+                    config["columns"][key] = {
+                        'func': func,
+                        'var': var
+                    }
+        config = {'galaxy_catalog': config}
+        self.config.update(config)
+
+    def _parse_star_catalog(self, config_raw):
+        """parse the star catalog
+
+        Read and store informations about the star catalog.
+
+        Parameters
+        ----------
+            config_raw : dict
+                raw output of the yaml loader
+        """
+
+        config = {}
+        star_dict = config_raw['star_catalog']
+
+        # Make sure the necessary information are provided
+        if not all(
+            [
+                needed_key in star_dict.keys()
+                for needed_key in _catalog_fields
+            ]
+        ):
+            raise ValueError(
+                "The star_catalog neeeds to have at least those entries: "
+                f"{_catalog_fields}"
+                )
+
+        # Parse the path
+        config['path'] = self._parse_path(star_dict)
+        # Checks if the star information are consitent with the galaxy catalog
+        # if we have one
+        if 'galaxy_catalog' in self.config.keys():
+            if not isinstance(
+                config['path']['path'],
+                type(self.config['galaxy_catalog']['path']['path'])
+            ):
+                raise ValueError(
+                    "The format of the star catalog is not consistant with "
+                    "the galaxy catalog"
+                )
+            if isinstance(config['path']['path'], list):
+                if (
+                    len(config['path']['path']) !=
+                    len(self.config['galaxy_catalog']['path']['path'])
+                ):
+                    raise ValueError(
+                        "The number of star catalogs is not consistant with "
+                        "the number of galaxy catalog"
+                    )
+            config['path']['keep_cat_history'] = \
+                self.config['galaxy_catalog']['path']['keep_cat_history']
+            config['path']['var_cat_history'] = \
+                self.config['galaxy_catalog']['path']['var_cat_history']
+
+        if star_dict["type"] == 'classic':
+            config["type"] = 'classic'
+            # Make sure the necessary columns are provided
+            for needed_key in _classic_star_columns:
+                if needed_key not in star_dict['columns'].keys():
+                    raise ValueError(f"Column {needed_key} not provided")
+
+            # Now we go through all columns
+            # We cannot do it in the previous loop because more columns could
+            # be given
+            config["columns"] = {}
+            for key in star_dict['columns'].keys():
+                column_tmp = star_dict['columns'][key]
+
+                # Assign internal naming to catalog naming
+                if isinstance(column_tmp, str):
+                    config["columns"][key] = column_tmp
+
+                # Create a new column based on eval
+                # Note that the evaluation is not done at this stage because
+                # we don't have acces to the catalog yet. It will be complited
+                # later when the catalog is actually read.
+                elif isinstance(column_tmp, dict):
+                    func, var = self._parse_eval(column_tmp, key)
+                    config["columns"][key] = {
+                        'func': func,
+                        'var': var
+                    }
+        config = {'star_catalog': config}
+        self.config.update(config)
+
+    def _read_yaml_file(self, path):
+        """read yaml file
+
+        This method reads the input yaml file and return a raw dictionnary
+        which will be parse after.
+
+        Parameters
+        ----------
+            path : str
+                path to the config file
+        """
+
+        self._config_path = path
+
+        with open(path) as f:
+            raw_dict = [c for c in yaml.load_all(f.read(), yaml.SafeLoader)]
+
+        if len(raw_dict) != 1:
+            raise ValueError(
+                f"Error occured while reading config file at {path}"
+            )
+
+        return raw_dict[0]
+
+    def _parse_variables(self, config_raw):
+        """parse variable
+
+        Parse the variables defined in config file.
+
+        Parameters
+        ----------
+        config_raw : dict
+            raw output of the yaml loader
+        """
+
+        # check if variables are defined
+        var_keys = [key for key in config_raw.keys() if 'var' in key]
+
+        if len(var_keys) != 0:
+            self._var = {}
+            for var_key in var_keys:
+                if not isinstance(config_raw[var_key], dict):
+                    raise ValueError(
+                        f"Unroconized format for variable {var_key}"
+                    )
+
+                # Set the name of the variable
+                if 'name' not in config_raw[var_key].keys():
+                    raise ValueError(
+                        f"No name found for varible {var_key}"
+                    )
+                if not isinstance(config_raw[var_key]['name'], str):
+                    raise ValueError(
+                        "Varibale name not of type string  for variable "
+                        f"{var_key}"
+                    )
+                var_name = config_raw[var_key]['name']
+
+                # Set the value fo the variable
+                if 'value' not in config_raw[var_key].keys():
+                    raise ValueError(
+                        f"No value found for variable {var_key}"
+                    )
+                try:
+                    var_val = eval(config_raw[var_key]['value'])
+                except Exception as e:
+                    raise ValueError(
+                        f"Error while evaluating value of variable {var_key}:"
+                        f"\n{e}"
+                    )
+
+                self._var[var_name] = var_val
+
+    def _parse_path(self, cat_dict):
+        """parse path
+
+        Parse the path of a catalog.
+        Path can be a list of paths or a function to evaluate. It is possible
+        to keep track of which objects belong to which catalog to use that in
+        the processing or the ploting.
+
+        Parameters
+        ----------
+        cat_dict : dict
+            Dictionnary containing information about a catalog
+
+        Returns
+        -------
+        path_output: dict
+            Output dictionnay with the information about the path of the
+            catalog(s) folowing the format:
+                - path: str or list
+                    Path (or list of paths) of the catalog(s)
+                - keep_cat_history: bool
+                    Weither to keep track of objects. Only for multiple
+                    catalogs
+                - var_cat_history: list
+                    list of int to keep the history of the catalogs. If not
+                    set, a list is defined: [1, n_catalog]
+        """
+
+        path_output = {
+            'path': '',
+            'keep_cat_history': False,
+            'var_cat_history': [],
+        }
+
+        # Get the path
+        if isinstance(cat_dict['path'], str):
+            path_output['path'] = cat_dict['path']
+        elif isinstance(cat_dict['path'], list):
+            path_output['path'] = cat_dict['path']
+        elif isinstance(cat_dict['path'], dict):
+            path_output['path'] = self._parse_eval(cat_dict['path'], 'path')
+        else:
+            raise ValueError("path must either a string, list or dict")
+
+        # handle keep_cat_history
+        if not (
+            isinstance(cat_dict['path'], list)
+            or isinstance(cat_dict['path'], dict)
+        ):
+            return path_output
+
+        if 'keep_cat_history' in cat_dict.keys():
+            path_output['keep_cat_history'] = \
+                cat_dict['keep_cat_history']
+
+        if not path_output['keep_cat_history']:
+            return path_output
+
+        if 'var_cat_history' in cat_dict.keys():
+            var_name = cat_dict['var_cat_history']
+            if not isinstance(var_name, str):
+                raise ValueError(
+                    f"var_cat_history not a string. Got: {var_name}"
+                )
+            if var_name not in self._var.keys():
+                raise ValueError(
+                    "Variable for var_cat_history not defined. Got: "
+                    f"{var_name}"
+                )
+            var_history = self._var[var_name]
+            if not isinstance(var_history, list):
+                raise ValueError(
+                    f"var_cat_history do not link to a list. Got: {var_name}"
+                )
+            if len(var_history) != len(path_output['path']):
+                raise ValueError(
+                    "Lenght of var_cat_history does not match number of "
+                    "catalogs."
+                    f"\nGot {len(var_history)} != {len(path_output['path'])}"
+                )
+            path_output['var_cat_history'] = var_history
+            return path_output
+        # We set a default list if not provided
+        else:
+            n_cat = len(path_output['path'])
+            path_output['var_cat_history'] = list(range(1, n_cat+1))
+            return path_output
+
+    def _parse_eval(self, column, name):
+        """parse eval
+
+        Parse column or path that will use the eval function.
+        Note that if the function contains variables, we first look for
+        general variables defined in the config file and then among the
+        catalog columns.
+
+        Parameters
+        ----------
+            column : dict
+                Dictionnary containing information about the eval
+            name: str
+                Name of variable to evaluate
+        Returns
+        -------
+            res : float
+                Result of eval fucntion
+            func : str
+                Function to evaluate with the column name to use from the
+                catalog
+            var_names: list
+                List of the column names to replace in the function
+        """
+        if 'type' not in column.keys():
+            raise ValueError(
+                f"Unreconized type for {name}, missing type"
+            )
+        if column['type'].lower() != 'eval':
+            raise ValueError(
+                f"Unreconized type for {name}, "
+                f"got {column['type']}"
+            )
+        if 'func' not in column.keys():
+            raise ValueError(
+                f"No function to evaluate for {name}"
+            )
+
+        # Copy the original string
+        func = copy.copy(column["func"])
+
+        # First check for variables
+        if '$' in func:
+            var_names = list(set(re.findall(r'\$(.*?)\$', func)))
+            for var_name in var_names:
+                if var_name in self._var.keys():
+                    func = func.replace(
+                        f"${var_name}$", f"{self._var[var_name]}"
+                    )
+                    var_names.remove(var_name)
+            if len(var_names) == 0:
+                try:
+                    res = eval(func)
+                except Exception as e:
+                    raise ValueError(
+                        f"Error while evaluating function: \n{func}\n"
+                        f"Got exception: \n{e}"
+                    )
+                return res
+            else:
+                return func, var_names
+        # Evaluate directly if no variables are found
+        else:
+            try:
+                res = eval(func)
+            except Exception as e:
+                raise ValueError(
+                    f"Error while evaluating function: \n{func}\n"
+                    f"Got exception: \n{e}"
+                )
+            return res
diff --git a/example/config_lensfit.yaml b/example/config_lensfit.yaml
new file mode 100644
index 0000000..63c5420
--- /dev/null
+++ b/example/config_lensfit.yaml
@@ -0,0 +1,81 @@
+# Config file for lensfit analysis
+
+var-1:
+  name: patch
+  # Can handle int, float, str, list
+  # More complex variable would raise errors. The below example could be
+  # achieve by doing:
+  value: 'list(range(1, 3))'
+  # Just calling range will raise an error.
+  # value: '[1, 2]'
+
+# Name of the run
+run_name: lensfit
+
+# The directory used for the run and store the results
+workspace_directory: /Users/aguinot/Documents/weaklensingvalidation_test/workspace
+
+# If True, the processing is skiped and only the plot are done.
+# For this option to work run as to be done with the same workspace_directory
+# and run_name
+# plot_only: True
+
+galaxy_catalog:
+  # If more than one catalog is provided they all need to have the same format
+  # path: 'galaxy_catalog_1.fits'
+  # path: ['galaxy_catalog_1.fits', 'galaxy_catalo_2.fits']
+  path:
+    type: eval
+    func: "[f'galaxy_catalog_{i}.fits' for i in $patch$]"
+  # If keep_cat_history == True we keep track of the objects and from which
+  # catalog they come from. A variable is define internaly var_cat_history
+  # that can be used to make plot per catalog (see ploting below).
+  # var_cat_history can also be set manually.
+  # This feature only work if more than one catalog is provided.
+  keep_cat_history: True
+  var_cat_history: patch
+
+  # Type of catalog
+  type: classic
+
+  # Define columns and and link name in the catalog to internal naming
+  columns:
+    ra: ra
+    dec: dec
+    e1: e1
+    e2: e2
+    weights: w
+    e1_psf:
+      type: eval
+      # Variable are first checked among internal variable (define in config
+      # file) and then among the catalog columns
+      func: "($PSF_Q11$ - $PSF_Q22$) / ($PSF_Q11$ + $PSF_Q22$ + 2. * sqrt($PSF_Q11$ * $PSF_Q22$ - $PSF_Q12$**2))"
+    e2_psf: e2_psf
+    size_psf: fwhm_psf
+    mag: mag
+
+star_catalog:
+  path:
+    type: eval
+    func: "[f'star_catalog_{i}.fits' for i in $patch$]"
+  # This part is ignored for the star catalog if a galaxy catalog is provided.
+  # Everything set here will be updated based on the galaxy catalog.
+  # keep_cat_history: True
+  # var_cat_history: patch
+
+  # Type of catalog
+  type: classic
+
+  columns:
+    ra: ra
+    dec: dec
+    x: x
+    y: y
+    n_ccd: ccd_nb
+    e1_star: e1_s
+    e2_star: e2_s
+    size_star: fwhm_s
+    e1_psf: e1_psf
+    e2_psf: e2_psf
+    size_psf: fwhm_psf
+

From 51d0e8940e8dcd75ae76a4e434714add60deb820 Mon Sep 17 00:00:00 2001
From: aguinot <axel.guinot.astro@gmail.com>
Date: Tue, 24 Jan 2023 23:32:28 +0100
Subject: [PATCH 2/4] catalog

First version of the Catalog class to deal with the input catalogs
---
 WeakLensingValidation/catalog.py | 209 +++++++++++++++++++++++++++++++
 1 file changed, 209 insertions(+)
 create mode 100644 WeakLensingValidation/catalog.py

diff --git a/WeakLensingValidation/catalog.py b/WeakLensingValidation/catalog.py
new file mode 100644
index 0000000..9d7bbe2
--- /dev/null
+++ b/WeakLensingValidation/catalog.py
@@ -0,0 +1,209 @@
+# Author: Axel Guinot (axel.guinot.astro@gmail.com)
+# Catalogue
+
+from config_parser import ConfigParser
+
+# This is used with the eval function
+import dask.array as da
+import vaex
+from vaex.convert import convert
+
+import numpy as np
+
+import os
+import copy
+
+
+class Catalog():
+    """Catalog
+
+    This class store a catalog as multiple DaskArrays.
+    This class is instenciate from a config file.
+    """
+
+    def __init__(
+        self,
+        path=None,
+        config=None,
+        params=None,
+    ):
+
+        if isinstance(config, ConfigParser):
+            self._config = config.config
+        else:
+            raise ValueError(
+                f"config must be an instance of {ConfigParser}. "
+                f"Got: {type(config)}"
+            )
+
+        self.read_catalog()
+
+    def __getitem__(self, key):
+
+        if not isinstance(key, str):
+            raise KeyError("Key must be a string")
+        if key not in self.column_names:
+            raise KeyError(f"{key}")
+        return self._columns[key]
+
+    def read_catalog(self):
+
+        raise NotImplementedError
+
+    def _read_catalog(self, cat_config):
+        """read catalog
+
+        This method handle the reading of a catalog. It can read multiple
+        catalogs and return a single Dataframe. If requested, it will keep
+        track of the origin catalog for every objects.
+
+        Parameters
+        ----------
+        cat_config : dict
+            Configuration dictionnary
+        """
+
+        path_config = cat_config['path']
+
+        # Here we handle the opeinig of multiple files like vaex.open_many()
+        # but we also keep track of the catalog of origin for each objects if
+        # requested.
+        all_df = []
+        for i, path in enumerate(path_config['path']):
+            # First we check the extension of the file. If not ".hdf5" we
+            # convert it. We cannot make memory mapped operations from ".fits"
+            # file. The converted file is put in the workspace directory.
+            ext = os.path.splitext(path)[1]
+            file_name = os.path.split(path)[1]
+            if ext != '.hdf5':
+                new_path = \
+                    self._config["workspace"]['path'] + '/' \
+                    + file_name + '.hdf5'
+                if ext != '.fits':
+                    raise ValueError(f"Unreconized file format. Got: {ext}")
+                if not os.path.exists(new_path):
+                    convert(
+                        path_input=path,
+                        fs_options_input={},
+                        fs_input=None,
+                        path_output=new_path,
+                        fs_options_output={},
+                        fs_output=None,
+                        progress=False,
+                    )
+            else:
+                new_path = \
+                    self._config["workspace"]['path'] + '/' + file_name
+                if not os.path.exists(new_path):
+                    os.symlink(path, new_path)
+
+            # Now we handle the history if requested
+            df_tmp = vaex.open(new_path)
+            if path_config['keep_cat_history']:
+                # Check if it has alreay been added
+                if 'var_cat_history' not in df_tmp.column_names:
+                    df_tmp['var_cat_history'] = \
+                        np.ones(len(df_tmp), dtype=int) \
+                        * path_config['var_cat_history'][i]
+            all_df.append(df_tmp)
+        self._df = vaex.concat(all_df)
+
+    def _get_column(self, col_name, all_col_names):
+        """get column
+
+        Build all the column of the catalog and convert them to DaskArray
+
+        Parameters
+        ----------
+        col_name : str, dict
+            Column name in the original catalog or dictionnary with a
+            description of how to build the column.
+        all_col_names : list
+            List of all the column names in the original catalog.
+
+        Returns
+        -------
+        dask.array
+            DaskArray of the column
+        """
+
+        if isinstance(col_name, str):
+            return self._df[col_name].to_dask_array()
+        elif isinstance(col_name, dict):
+            cat_tmp = {}
+            func = copy.copy(col_name['func'])
+            for var_name in col_name['var']:
+                cat_tmp[var_name] = self._df[var_name].to_dask_array()
+                func = func.replace(
+                    f"${var_name}$", f"cat_tmp['{var_name}']"
+                )
+            try:
+                res = eval(func)
+            except Exception as e:
+                raise ValueError(
+                        f"Error while evaluating function: \n{func}\n"
+                        f"Got exception: \n{e}"
+                    )
+            return res
+
+
+class GalaxyCatalog(Catalog):
+
+    def __init__(
+            self,
+            config=None
+    ):
+
+        super().__init__(config=config)
+
+    def read_catalog(self):
+        """read catalogue
+
+        This function is called during the initialization and build the galaxy
+        catalog.
+        """
+
+        cat_config = self._config['galaxy_catalog']
+
+        # First we read the catalog with vaex
+        self._read_catalog(cat_config)
+
+        # Now we set the column as dask arrays
+        self.column_names = list(cat_config['columns'].keys())
+        self._columns = {}
+        for column_name in self.column_names:
+            self._columns[column_name] = self._get_column(
+                cat_config['columns'][column_name],
+                self.column_names,
+            )
+
+
+class StarCatalog(Catalog):
+
+    def __init__(
+            self,
+            config=None
+    ):
+
+        super().__init__(config=config)
+
+    def read_catalog(self):
+        """read catalogue
+
+        This function is called during the initialization and build the star
+        catalog.
+        """
+
+        cat_config = self._config['star_catalog']
+
+        # First we read the catalog with vaex
+        self._read_catalog(cat_config)
+
+        # Now we set the column as dask arrays
+        self.column_names = list(cat_config['columns'].keys())
+        self._columns = {}
+        for column_name in self.column_names:
+            self._columns[column_name] = self._get_column(
+                cat_config['columns'][column_name],
+                self.column_names,
+            )

From 1e9240a9208c254a6a67b46abeef009c87d08c3d Mon Sep 17 00:00:00 2001
From: Axel Guinot <39480528+aguinot@users.noreply.github.com>
Date: Tue, 24 Jan 2023 23:36:30 +0100
Subject: [PATCH 3/4] Delete config_parser.py

---
 WeakLensingValidation/config_parser.py | 540 -------------------------
 1 file changed, 540 deletions(-)
 delete mode 100644 WeakLensingValidation/config_parser.py

diff --git a/WeakLensingValidation/config_parser.py b/WeakLensingValidation/config_parser.py
deleted file mode 100644
index 7470c7e..0000000
--- a/WeakLensingValidation/config_parser.py
+++ /dev/null
@@ -1,540 +0,0 @@
-# Author: Axel Guinot (axel.guinot.astro@gmail.com)
-# config parser
-# Some of the ideas are inspired by the Galsim parser
-
-import yaml
-import copy
-import re
-import os
-
-_main_fields = ['galaxy_catalog', 'star_catalog', 'mask_image']
-_catalog_fields = ['type', 'path', 'columns']
-_classic_gal_columns = [
-    'ra',
-    'dec',
-    'e1',
-    'e2',
-    'weights',
-    'e1_psf',
-    'e2_psf',
-    'mag'
-]
-_classic_star_columns = [
-    'ra',
-    'dec',
-    'x',
-    'y',
-    'n_ccd',
-    'e1_psf',
-    'e2_psf',
-    'size_psf',
-    'e1_star',
-    'e2_star',
-    'size_star',
-]
-
-
-class ConfigParser():
-    """ConfigParser
-
-    Parse the input config file in .yaml format and build a dictionary.
-
-    Parameters
-    ----------
-    config_path : str
-        Path to the config file
-    """
-
-    def __init__(
-        self,
-        config_path=None,
-    ):
-
-        if isinstance(config_path, type(None)):
-            raise ValueError("No config_path have been provided")
-        if not isinstance(config_path, str):
-            raise ValueError("config_path must be a string")
-        if not os.path.exists(config_path):
-            raise ValueError(f"No file found at: {config_path}")
-
-        config_raw = self._read_yaml_file(config_path)
-
-        self.parse_config(config_raw)
-
-    def __str__(self):
-
-        return yaml.dump(self.config)
-
-    def __repr__(self):
-
-        return self.__str__()
-
-    def parse_config(self, config_raw):
-        """parse config
-
-        Parse the yaml dictionnay and transform the output in something the
-        library can understand.
-
-        Parameters
-        ----------
-            config_raw : dict
-                raw output of the yaml loader
-        """
-
-        self.config = {}
-
-        # Set workspace directory
-        self._parse_workspace(config_raw)
-
-        # First get the variables
-        self._parse_variables(config_raw)
-
-        # Galaxy catalogue
-        if 'galaxy_catalog' in config_raw.keys():
-            self._parse_galaxy_catalog(config_raw)
-        # Star catalogue
-        if 'star_catalog' in config_raw.keys():
-            self._parse_star_catalog(config_raw)
-
-    def _parse_workspace(self, config_raw):
-        """parse workspace
-
-        Setup the workspace directory and the name of the run. It also checks
-        if the only_plot option is set.
-
-        Parameters
-        ----------
-        config_raw : dict
-            raw output of the yaml loader
-        """
-
-        # Check run name
-        if 'run_name' not in config_raw.keys():
-            raise ValueError("No run_name are provided")
-        if not isinstance(config_raw['run_name'], str):
-            raise ValueError("run_name must be a string")
-        run_name = config_raw['run_name']
-
-        # Check workspace
-        if 'workspace_directory' not in config_raw.keys():
-            raise ValueError("No workspace_directory are provided")
-        if not isinstance(config_raw['workspace_directory'], str):
-            raise ValueError("workspace_directory must be a string")
-        workspace_dir_tmp = config_raw['workspace_directory']
-        if not os.path.isdir(workspace_dir_tmp):
-            raise ValueError(
-                "The workspace path do not exist or is not a directory. "
-                f"Got: {workspace_dir_tmp}"
-            )
-        workspace_dir = os.path.join(workspace_dir_tmp, run_name)
-
-        if 'only_plot' in config_raw.keys():
-            if not isinstance(config_raw['only_plot'], bool):
-                raise ValueError(
-                    "only_plot must in [True, Fasle]. "
-                    f"Got: {config_raw['only_plot']}"
-                )
-            only_plot = config_raw['only_plot']
-        else:
-            only_plot = False
-
-        if os.path.exists(workspace_dir):
-            if only_plot:
-                raise ValueError(
-                    "When using only_plot, the workspace has to an already"
-                    f"existing directory. Got: {workspace_dir}"
-                )
-        else:
-            try:
-                os.mkdir(workspace_dir)
-            except Exception as e:
-                raise ValueError(
-                    "Error while creating the directory got the following "
-                    f"exception:\n{e}"
-                )
-
-        config = {
-            'path': workspace_dir,
-            'run_name': run_name,
-            'only_plot': only_plot,
-        }
-
-        config = {'workspace': config}
-        self.config.update(config)
-
-    def _parse_galaxy_catalog(self, config_raw):
-        """parse the galaxy catalog
-
-        Read and store informations about the galaxy catalog.
-
-        Parameters
-        ----------
-            config_raw : dict
-                raw output of the yaml loader
-        """
-
-        config = {}
-        gal_dict = config_raw['galaxy_catalog']
-
-        # Make sure the necessary information are provided
-        if not all(
-            [
-                needed_key in gal_dict.keys()
-                for needed_key in _catalog_fields
-            ]
-        ):
-            raise ValueError(
-                "The galaxy_catalog neeeds to have at least those entries: "
-                f"{_catalog_fields}"
-                )
-
-        # Parse the path
-        config['path'] = self._parse_path(gal_dict)
-
-        if gal_dict["type"] == 'classic':
-            config["type"] = 'classic'
-            # Make sure the necessary columns are provided
-            for needed_key in _classic_gal_columns:
-                if needed_key not in gal_dict['columns'].keys():
-                    raise ValueError(f"Column {needed_key} not provided")
-
-            # Now we go through all columns
-            # We cannot do it in the previous loop because more columns could
-            # be given
-            config["columns"] = {}
-            for key in gal_dict['columns'].keys():
-                column_tmp = gal_dict['columns'][key]
-
-                # Assign internal naming to catalog naming
-                if isinstance(column_tmp, str):
-                    config["columns"][key] = column_tmp
-
-                # Create a new column based on eval
-                # Note that the evaluation is not done at this stage because
-                # we don't have acces to the catalog yet. It will be complited
-                # later when the catalog is actually read.
-                elif isinstance(column_tmp, dict):
-                    func, var = self._parse_eval(column_tmp, key)
-                    config["columns"][key] = {
-                        'func': func,
-                        'var': var
-                    }
-        config = {'galaxy_catalog': config}
-        self.config.update(config)
-
-    def _parse_star_catalog(self, config_raw):
-        """parse the star catalog
-
-        Read and store informations about the star catalog.
-
-        Parameters
-        ----------
-            config_raw : dict
-                raw output of the yaml loader
-        """
-
-        config = {}
-        star_dict = config_raw['star_catalog']
-
-        # Make sure the necessary information are provided
-        if not all(
-            [
-                needed_key in star_dict.keys()
-                for needed_key in _catalog_fields
-            ]
-        ):
-            raise ValueError(
-                "The star_catalog neeeds to have at least those entries: "
-                f"{_catalog_fields}"
-                )
-
-        # Parse the path
-        config['path'] = self._parse_path(star_dict)
-        # Checks if the star information are consitent with the galaxy catalog
-        # if we have one
-        if 'galaxy_catalog' in self.config.keys():
-            if not isinstance(
-                config['path']['path'],
-                type(self.config['galaxy_catalog']['path']['path'])
-            ):
-                raise ValueError(
-                    "The format of the star catalog is not consistant with "
-                    "the galaxy catalog"
-                )
-            if isinstance(config['path']['path'], list):
-                if (
-                    len(config['path']['path']) !=
-                    len(self.config['galaxy_catalog']['path']['path'])
-                ):
-                    raise ValueError(
-                        "The number of star catalogs is not consistant with "
-                        "the number of galaxy catalog"
-                    )
-            config['path']['keep_cat_history'] = \
-                self.config['galaxy_catalog']['path']['keep_cat_history']
-            config['path']['var_cat_history'] = \
-                self.config['galaxy_catalog']['path']['var_cat_history']
-
-        if star_dict["type"] == 'classic':
-            config["type"] = 'classic'
-            # Make sure the necessary columns are provided
-            for needed_key in _classic_star_columns:
-                if needed_key not in star_dict['columns'].keys():
-                    raise ValueError(f"Column {needed_key} not provided")
-
-            # Now we go through all columns
-            # We cannot do it in the previous loop because more columns could
-            # be given
-            config["columns"] = {}
-            for key in star_dict['columns'].keys():
-                column_tmp = star_dict['columns'][key]
-
-                # Assign internal naming to catalog naming
-                if isinstance(column_tmp, str):
-                    config["columns"][key] = column_tmp
-
-                # Create a new column based on eval
-                # Note that the evaluation is not done at this stage because
-                # we don't have acces to the catalog yet. It will be complited
-                # later when the catalog is actually read.
-                elif isinstance(column_tmp, dict):
-                    func, var = self._parse_eval(column_tmp, key)
-                    config["columns"][key] = {
-                        'func': func,
-                        'var': var
-                    }
-        config = {'star_catalog': config}
-        self.config.update(config)
-
-    def _read_yaml_file(self, path):
-        """read yaml file
-
-        This method reads the input yaml file and return a raw dictionnary
-        which will be parse after.
-
-        Parameters
-        ----------
-            path : str
-                path to the config file
-        """
-
-        self._config_path = path
-
-        with open(path) as f:
-            raw_dict = [c for c in yaml.load_all(f.read(), yaml.SafeLoader)]
-
-        if len(raw_dict) != 1:
-            raise ValueError(
-                f"Error occured while reading config file at {path}"
-            )
-
-        return raw_dict[0]
-
-    def _parse_variables(self, config_raw):
-        """parse variable
-
-        Parse the variables defined in config file.
-
-        Parameters
-        ----------
-        config_raw : dict
-            raw output of the yaml loader
-        """
-
-        # check if variables are defined
-        var_keys = [key for key in config_raw.keys() if 'var' in key]
-
-        if len(var_keys) != 0:
-            self._var = {}
-            for var_key in var_keys:
-                if not isinstance(config_raw[var_key], dict):
-                    raise ValueError(
-                        f"Unroconized format for variable {var_key}"
-                    )
-
-                # Set the name of the variable
-                if 'name' not in config_raw[var_key].keys():
-                    raise ValueError(
-                        f"No name found for varible {var_key}"
-                    )
-                if not isinstance(config_raw[var_key]['name'], str):
-                    raise ValueError(
-                        "Varibale name not of type string  for variable "
-                        f"{var_key}"
-                    )
-                var_name = config_raw[var_key]['name']
-
-                # Set the value fo the variable
-                if 'value' not in config_raw[var_key].keys():
-                    raise ValueError(
-                        f"No value found for variable {var_key}"
-                    )
-                try:
-                    var_val = eval(config_raw[var_key]['value'])
-                except Exception as e:
-                    raise ValueError(
-                        f"Error while evaluating value of variable {var_key}:"
-                        f"\n{e}"
-                    )
-
-                self._var[var_name] = var_val
-
-    def _parse_path(self, cat_dict):
-        """parse path
-
-        Parse the path of a catalog.
-        Path can be a list of paths or a function to evaluate. It is possible
-        to keep track of which objects belong to which catalog to use that in
-        the processing or the ploting.
-
-        Parameters
-        ----------
-        cat_dict : dict
-            Dictionnary containing information about a catalog
-
-        Returns
-        -------
-        path_output: dict
-            Output dictionnay with the information about the path of the
-            catalog(s) folowing the format:
-                - path: str or list
-                    Path (or list of paths) of the catalog(s)
-                - keep_cat_history: bool
-                    Weither to keep track of objects. Only for multiple
-                    catalogs
-                - var_cat_history: list
-                    list of int to keep the history of the catalogs. If not
-                    set, a list is defined: [1, n_catalog]
-        """
-
-        path_output = {
-            'path': '',
-            'keep_cat_history': False,
-            'var_cat_history': [],
-        }
-
-        # Get the path
-        if isinstance(cat_dict['path'], str):
-            path_output['path'] = cat_dict['path']
-        elif isinstance(cat_dict['path'], list):
-            path_output['path'] = cat_dict['path']
-        elif isinstance(cat_dict['path'], dict):
-            path_output['path'] = self._parse_eval(cat_dict['path'], 'path')
-        else:
-            raise ValueError("path must either a string, list or dict")
-
-        # handle keep_cat_history
-        if not (
-            isinstance(cat_dict['path'], list)
-            or isinstance(cat_dict['path'], dict)
-        ):
-            return path_output
-
-        if 'keep_cat_history' in cat_dict.keys():
-            path_output['keep_cat_history'] = \
-                cat_dict['keep_cat_history']
-
-        if not path_output['keep_cat_history']:
-            return path_output
-
-        if 'var_cat_history' in cat_dict.keys():
-            var_name = cat_dict['var_cat_history']
-            if not isinstance(var_name, str):
-                raise ValueError(
-                    f"var_cat_history not a string. Got: {var_name}"
-                )
-            if var_name not in self._var.keys():
-                raise ValueError(
-                    "Variable for var_cat_history not defined. Got: "
-                    f"{var_name}"
-                )
-            var_history = self._var[var_name]
-            if not isinstance(var_history, list):
-                raise ValueError(
-                    f"var_cat_history do not link to a list. Got: {var_name}"
-                )
-            if len(var_history) != len(path_output['path']):
-                raise ValueError(
-                    "Lenght of var_cat_history does not match number of "
-                    "catalogs."
-                    f"\nGot {len(var_history)} != {len(path_output['path'])}"
-                )
-            path_output['var_cat_history'] = var_history
-            return path_output
-        # We set a default list if not provided
-        else:
-            n_cat = len(path_output['path'])
-            path_output['var_cat_history'] = list(range(1, n_cat+1))
-            return path_output
-
-    def _parse_eval(self, column, name):
-        """parse eval
-
-        Parse column or path that will use the eval function.
-        Note that if the function contains variables, we first look for
-        general variables defined in the config file and then among the
-        catalog columns.
-
-        Parameters
-        ----------
-            column : dict
-                Dictionnary containing information about the eval
-            name: str
-                Name of variable to evaluate
-        Returns
-        -------
-            res : float
-                Result of eval fucntion
-            func : str
-                Function to evaluate with the column name to use from the
-                catalog
-            var_names: list
-                List of the column names to replace in the function
-        """
-        if 'type' not in column.keys():
-            raise ValueError(
-                f"Unreconized type for {name}, missing type"
-            )
-        if column['type'].lower() != 'eval':
-            raise ValueError(
-                f"Unreconized type for {name}, "
-                f"got {column['type']}"
-            )
-        if 'func' not in column.keys():
-            raise ValueError(
-                f"No function to evaluate for {name}"
-            )
-
-        # Copy the original string
-        func = copy.copy(column["func"])
-
-        # First check for variables
-        if '$' in func:
-            var_names = list(set(re.findall(r'\$(.*?)\$', func)))
-            for var_name in var_names:
-                if var_name in self._var.keys():
-                    func = func.replace(
-                        f"${var_name}$", f"{self._var[var_name]}"
-                    )
-                    var_names.remove(var_name)
-            if len(var_names) == 0:
-                try:
-                    res = eval(func)
-                except Exception as e:
-                    raise ValueError(
-                        f"Error while evaluating function: \n{func}\n"
-                        f"Got exception: \n{e}"
-                    )
-                return res
-            else:
-                return func, var_names
-        # Evaluate directly if no variables are found
-        else:
-            try:
-                res = eval(func)
-            except Exception as e:
-                raise ValueError(
-                    f"Error while evaluating function: \n{func}\n"
-                    f"Got exception: \n{e}"
-                )
-            return res

From ddc1fa18b6609f5cd71717191c0548eddcccf82c Mon Sep 17 00:00:00 2001
From: Axel Guinot <39480528+aguinot@users.noreply.github.com>
Date: Tue, 24 Jan 2023 23:36:39 +0100
Subject: [PATCH 4/4] Delete config_lensfit.yaml

---
 example/config_lensfit.yaml | 81 -------------------------------------
 1 file changed, 81 deletions(-)
 delete mode 100644 example/config_lensfit.yaml

diff --git a/example/config_lensfit.yaml b/example/config_lensfit.yaml
deleted file mode 100644
index 63c5420..0000000
--- a/example/config_lensfit.yaml
+++ /dev/null
@@ -1,81 +0,0 @@
-# Config file for lensfit analysis
-
-var-1:
-  name: patch
-  # Can handle int, float, str, list
-  # More complex variable would raise errors. The below example could be
-  # achieve by doing:
-  value: 'list(range(1, 3))'
-  # Just calling range will raise an error.
-  # value: '[1, 2]'
-
-# Name of the run
-run_name: lensfit
-
-# The directory used for the run and store the results
-workspace_directory: /Users/aguinot/Documents/weaklensingvalidation_test/workspace
-
-# If True, the processing is skiped and only the plot are done.
-# For this option to work run as to be done with the same workspace_directory
-# and run_name
-# plot_only: True
-
-galaxy_catalog:
-  # If more than one catalog is provided they all need to have the same format
-  # path: 'galaxy_catalog_1.fits'
-  # path: ['galaxy_catalog_1.fits', 'galaxy_catalo_2.fits']
-  path:
-    type: eval
-    func: "[f'galaxy_catalog_{i}.fits' for i in $patch$]"
-  # If keep_cat_history == True we keep track of the objects and from which
-  # catalog they come from. A variable is define internaly var_cat_history
-  # that can be used to make plot per catalog (see ploting below).
-  # var_cat_history can also be set manually.
-  # This feature only work if more than one catalog is provided.
-  keep_cat_history: True
-  var_cat_history: patch
-
-  # Type of catalog
-  type: classic
-
-  # Define columns and and link name in the catalog to internal naming
-  columns:
-    ra: ra
-    dec: dec
-    e1: e1
-    e2: e2
-    weights: w
-    e1_psf:
-      type: eval
-      # Variable are first checked among internal variable (define in config
-      # file) and then among the catalog columns
-      func: "($PSF_Q11$ - $PSF_Q22$) / ($PSF_Q11$ + $PSF_Q22$ + 2. * sqrt($PSF_Q11$ * $PSF_Q22$ - $PSF_Q12$**2))"
-    e2_psf: e2_psf
-    size_psf: fwhm_psf
-    mag: mag
-
-star_catalog:
-  path:
-    type: eval
-    func: "[f'star_catalog_{i}.fits' for i in $patch$]"
-  # This part is ignored for the star catalog if a galaxy catalog is provided.
-  # Everything set here will be updated based on the galaxy catalog.
-  # keep_cat_history: True
-  # var_cat_history: patch
-
-  # Type of catalog
-  type: classic
-
-  columns:
-    ra: ra
-    dec: dec
-    x: x
-    y: y
-    n_ccd: ccd_nb
-    e1_star: e1_s
-    e2_star: e2_s
-    size_star: fwhm_s
-    e1_psf: e1_psf
-    e2_psf: e2_psf
-    size_psf: fwhm_psf
-