From 07ed88092c40789365be934e61c4318ac5f4a043 Mon Sep 17 00:00:00 2001
From: Jonah Timothy Hansen <johansen@phys.ethz.ch>
Date: Tue, 23 Sep 2025 10:44:42 +0200
Subject: [PATCH 01/27] added baseline optimization models

---
 lifesim/core/core.py             |  2 +-
 lifesim/instrument/instrument.py | 45 +++++++++++++++++++++++++-------
 lifesim/util/options.py          |  8 +++++-
 3 files changed, 44 insertions(+), 11 deletions(-)

diff --git a/lifesim/core/core.py b/lifesim/core/core.py
index dd5a009..e27174e 100644
--- a/lifesim/core/core.py
+++ b/lifesim/core/core.py
@@ -475,7 +475,7 @@ def write_config(self):
     def build_from_config(self,
                           filename: str):
         with open(filename) as file:
-            config_dict = yaml.load(file, Loader=yaml.FullLoader)
+            config_dict = yaml.load(file, Loader=yaml.UnsafeLoader)
 
         self.data.options.array = convert_to_np(config_dict['array'])
         self.data.options.optimization = convert_to_np(config_dict['optimization'])
diff --git a/lifesim/instrument/instrument.py b/lifesim/instrument/instrument.py
index 4000c67..a3c15a3 100644
--- a/lifesim/instrument/instrument.py
+++ b/lifesim/instrument/instrument.py
@@ -167,10 +167,11 @@ def get_wl_bins_const_spec_res(self):
     # TODO does not take the inclination into account!
     def adjust_bl_to_hz(self,
                         hz_center: float,
-                        distance_s: float):
+                        distance_s: float,
+                        l_sun: float):
         """
         Adjusts the baseline of the array to be optimal for observations in the habitable zone of
-        the target star for the selected optimal wavelength.
+        the target star for the given optimisation model.
 
         Parameters
         ----------
@@ -179,14 +180,37 @@ def adjust_bl_to_hz(self,
         distance_s : float
             Distance between the observed star and the LIFE array in [pc].
         """
+        opt_model = self.data.options.models["baseline_optimisation"]
 
         # convert the habitable zone to radians
         hz_center_rad = hz_center / distance_s / (3600 * 180) * np.pi  # in rad
 
-        # put first transmission peak of optimal wl on center of HZ
-        # for the origin of the value 0.5.. see Ottiger+2021
-        baseline = (0.589645 / hz_center_rad
-                                * self.data.options.other['wl_optimal'] * 10 ** (-6))
+        if opt_model == "ref_wl":
+            # put first transmission peak of optimal wl on center of HZ
+            # for the origin of the value 0.5.. see Ottiger+2021
+            k = 0.589645 * self.data.options.other['wl_optimal'] * 10 ** (-6)
+                                    
+        elif opt_model == "Bryson":
+            # Baseline optimisation considering the semi-major axis distribution of the Bryson 21
+            # occurance rates and orbit projection effects, approximated via a polynomial fit to an MC analysis
+            sqL = l_sun**0.5
+            if sqL > 0.28: #FGKs
+                cs = np.array([-2.5655e-1,2.0727e-1,4.8682e-2,-4.0867e-4,-2.1587e-2,9.3433])*1e-6
+            else:
+                cs = np.array([-7.4078,1.4269e1,1.1875e-1,-2.3090e-3,-6.9372e-3,9.7141])*1e-6
+            k = cs[0]*sqL + cs[1]*sqL**2 + cs[2]*distance_s + cs[3]*distance_s**2 + cs[4]*sqL*distance_s + cs[5]
+
+        elif opt_model == "Uniform":
+            # Baseline optimisation considering a uniform semi-major axis distribution and orbit 
+            # projection effects, approximated via a polynomial fit to an MC analysis
+            sqL = l_sun**0.5
+            if sqL > 0.28: #FGKs
+                cs = np.array([1.9934e-1,6.4546e-2,2.6083e-2,-8.1794e-5,-1.7035e-2,8.3007])*1e-6
+            else: #Ms
+                cs = np.array([-2.9216,4.4454,6.5858e-2,-1.4577e-3,2.8476e-2,8.4299])*1e-6
+            k = cs[0]*sqL + cs[1]*sqL**2 + cs[2]*distance_s + cs[3]*distance_s**2 + cs[4]*sqL*distance_s + cs[5]
+
+        baseline = k / hz_center_rad
 
         self.apply_baseline(baseline=baseline)
 
@@ -266,7 +290,8 @@ def get_snr(self,
 
             # adjust baseline of array and give new baseline to transmission generator plugin
             self.adjust_bl_to_hz(hz_center=float(self.data.catalog.hz_center.iloc[n]),
-                                 distance_s=float(self.data.catalog.distance_s.iloc[n]))
+                                 distance_s=float(self.data.catalog.distance_s.iloc[n]),
+                                 l_sun=float(self.data.catalog.l_sun.iloc[n]),)
 
             # get transmission map
             _, _, self.data.inst['t_map'], _, _ = self.run_socket(s_name='transmission',
@@ -514,7 +539,8 @@ def get_spectrum(self,
             else:
                 # adjust baseline to HZ
                 self.adjust_bl_to_hz(hz_center=hz_center,
-                                     distance_s=distance_s)
+                                     distance_s=distance_s,
+                                     l_sun=l_sun)
 
         # calculate the transmission map
         _, _, self.data.inst['t_map'], _, _ = self.run_socket(s_name='transmission',
@@ -689,7 +715,8 @@ def get_signal(self,
 
         # adjust baseline to habitable zone
         self.adjust_bl_to_hz(hz_center=hz_center,
-                             distance_s=distance_s)
+                             distance_s=distance_s,
+                             l_sun=l_sun)
 
         # use spectres to rescale the spectrum onto the correct wl bins
         flux_planet_spectrum_input = flux_planet_spectrum
diff --git a/lifesim/util/options.py b/lifesim/util/options.py
index fc60412..f9eac22 100644
--- a/lifesim/util/options.py
+++ b/lifesim/util/options.py
@@ -44,6 +44,10 @@ class Options(object):
               ``'darwinsim'``
             - ``'habitable'`` : Model used for calculating the habitable zone, possible options are
               ``'MS'`` and ``'POST_MS'``
+            - ``'fov_taper'`` : Model used for tapering the field of view, possible options are
+              ``'gaussian'`` and ``'none'``
+            - ``'baseline_optimisation'`` : Model used for optimising the baseline, possible options are
+              ``'ref_wl'``, ``'Bryson'`` and ``'Uniform'``
     optimization : dict
         Options concerning the methods used to optimally distribute the observing time.
             - ``'N_pf'`` : Number of sampling locations per orbit.
@@ -80,7 +84,8 @@ def __init__(self):
 
         self.models = {'localzodi': '',
                        'habitable': '',
-                       'fov_taper': ''}
+                       'fov_taper': '',
+                       'baseline_optimisation': ''}
 
         self.optimization = {'N_pf': 0.,
                              'snr_target': 0.,
@@ -118,6 +123,7 @@ def set_scenario(self,
         self.models['localzodi'] = 'darwinsim'
         self.models['habitable'] = 'MS'
         self.models['fov_taper'] = 'gaussian'
+        self.models['baseline_optimisation'] = 'ref_wl'
 
         self.optimization['N_pf'] = 25
         self.optimization['snr_target'] = 7

From 9798caa382bc1c08159cbed78212e23543750779 Mon Sep 17 00:00:00 2001
From: JonahHansen <hansen.jonah833@gmail.com>
Date: Tue, 23 Sep 2025 10:49:52 +0200
Subject: [PATCH 02/27] Update core.py

Removed hack to make YAML accept numpy floats
---
 lifesim/core/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lifesim/core/core.py b/lifesim/core/core.py
index e27174e..dd5a009 100644
--- a/lifesim/core/core.py
+++ b/lifesim/core/core.py
@@ -475,7 +475,7 @@ def write_config(self):
     def build_from_config(self,
                           filename: str):
         with open(filename) as file:
-            config_dict = yaml.load(file, Loader=yaml.UnsafeLoader)
+            config_dict = yaml.load(file, Loader=yaml.FullLoader)
 
         self.data.options.array = convert_to_np(config_dict['array'])
         self.data.options.optimization = convert_to_np(config_dict['optimization'])

From f021ab16276b972e003bc51ed5c2eac743d286d8 Mon Sep 17 00:00:00 2001
From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com>
Date: Mon, 3 Nov 2025 11:00:48 +0100
Subject: [PATCH 03/27] changed reference to Dannert+22

---
 lifesim/instrument/instrument.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lifesim/instrument/instrument.py b/lifesim/instrument/instrument.py
index 4000c67..917ea3c 100644
--- a/lifesim/instrument/instrument.py
+++ b/lifesim/instrument/instrument.py
@@ -184,7 +184,7 @@ def adjust_bl_to_hz(self,
         hz_center_rad = hz_center / distance_s / (3600 * 180) * np.pi  # in rad
 
         # put first transmission peak of optimal wl on center of HZ
-        # for the origin of the value 0.5.. see Ottiger+2021
+        # for the origin of the value 0.5.. see Dannert+2022
         baseline = (0.589645 / hz_center_rad
                                 * self.data.options.other['wl_optimal'] * 10 ** (-6))
 

From 56d284160053b1ad7ff7ac465c9e75864d96d3ba Mon Sep 17 00:00:00 2001
From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com>
Date: Tue, 4 Nov 2025 15:52:31 +0100
Subject: [PATCH 04/27] first running implementation of multiprocessing,
 validated against single processing runs on small catalogs

---
 lifesim/instrument/instrument.py | 212 ++++++++++++++++++-------------
 lifesim/util/options.py          |   6 +-
 2 files changed, 130 insertions(+), 88 deletions(-)

diff --git a/lifesim/instrument/instrument.py b/lifesim/instrument/instrument.py
index 917ea3c..3ee2f95 100644
--- a/lifesim/instrument/instrument.py
+++ b/lifesim/instrument/instrument.py
@@ -1,9 +1,14 @@
 from warnings import warn
+from copy import deepcopy
+from typing import List, Optional, Sequence, Any
 
 import numpy as np
+import pandas as pd
 from tqdm import tqdm
 from spectres import spectres
 from PyQt5.QtGui import QGuiApplication
+from joblib import Parallel, delayed, parallel_config
+from joblib_progress import joblib_progress
 
 from lifesim.core.modules import InstrumentModule
 from lifesim.util.habitable import single_habitable_zone
@@ -228,7 +233,9 @@ def apply_baseline(self,
     def get_snr(self,
                 save_mode: bool = False):
         """
-        Calculates the one-hour signal-to-noise ration for all planets in the catalog.
+        Calculates the one-hour signal-to-noise ration for all planets in the catalog. Switches between single and
+        multi processing depending on the n_cpu option.
+
         Parameters
         ----------
         safe_mode : bool
@@ -239,12 +246,6 @@ def get_snr(self,
         # options are applied before the simulation run
         self.apply_options()
 
-        # currently, the choice of integration time here is arbitrary. Since the background limited
-        # case is assumed, the SNR scales with sqrt(integration time) and through this, the SNR
-        # for any integration time can be calculated by knowing the SNR of a specific integration
-        # time
-        integration_time = 60 * 60
-
         self.data.catalog['snr_1h'] = np.zeros_like(self.data.catalog.nstar, dtype=float)
         self.data.catalog['baseline'] = np.zeros_like(self.data.catalog.nstar, dtype=float)
         if save_mode:
@@ -253,13 +254,38 @@ def get_snr(self,
             self.data.catalog['photon_rate_planet'] = None
             self.data.catalog['photon_rate_noise'] = None
 
+        if self.data.options.other['n_cpu'] == 1:
+            self.get_snr_single_processing(save_mode=save_mode)
+        elif self.data.options.other['n_cpu'] > 1:
+            self.get_snr_multi_processing(save_mode=save_mode)
+        else:
+            raise ValueError('n_cpu option must be >= 1')
+
+    def get_snr_single_processing(self,
+                                  save_mode: bool = False,
+                                  verbose : bool = True):
+        """
+        Calculates the one-hour signal-to-noise ration for all planets in the catalog.
+        Parameters
+        ----------
+        safe_mode : bool
+            If save mode is enables, the individual photon counts of the planet and noise sources
+            are written to the catalog.
+        """
+
+        # currently, the choice of integration time here is arbitrary. Since the background limited
+        # case is assumed, the SNR scales with sqrt(integration time) and through this, the SNR
+        # for any integration time can be calculated by knowing the SNR of a specific integration
+        # time
+        integration_time = 60 * 60
+
         # create mask returning only unique stars
         _, temp = np.unique(self.data.catalog.nstar, return_index=True)
         star_mask = np.zeros_like(self.data.catalog.nstar, dtype=bool)
         star_mask[temp] = True
 
         # iterate over all stars to calculate noise specific to stars
-        for i, n in enumerate(tqdm(np.where(star_mask)[0])):
+        for i, n in enumerate(tqdm(np.where(star_mask)[0], disable=not verbose)):
             # if i == 10:
             #     break
             nstar = self.data.catalog.nstar.iloc[n]
@@ -367,6 +393,40 @@ def get_snr(self,
                                 / self.data.inst['eff_tot']
                         ).sum()
 
+    def get_snr_multi_processing(self,
+                                    save_mode: bool = False):
+
+        # divide the catalog into roughly equal chunks for each cpu
+        n_star, occ_star = np.unique(self.data.catalog.nstar, return_counts=True)
+        star_groups = balanced_partition_greedy(occ=occ_star, items=n_star, n_groups=self.data.options.other['n_cpu']*10)
+
+        sub_catalogs = [self.data.catalog[np.isin(self.data.catalog.nstar, sg)] for sg in star_groups]
+        group_sizes = [len(sc) for sc in sub_catalogs]
+        per_dev = (np.max(group_sizes) - np.min(group_sizes)) / np.mean(group_sizes)
+        print(f'Maximum deviation in group sizes: {per_dev*100:.1f}%')
+
+        reference_bus = deepcopy(self)
+        del reference_bus.data.catalog
+
+        with parallel_config(
+                backend="loky", inner_max_num_threads=1
+        ), joblib_progress(
+            description="Running SNR calculation in parallel ...",
+            total=len(star_groups),
+        ):
+            results = Parallel(n_jobs=self.data.options.other['n_cpu'])(
+                delayed(mp_runner)(
+                    bus=reference_bus,
+                    catalog=sc,
+                    save_mode=save_mode
+                )
+                for sc in sub_catalogs)
+
+        # combine results back into main catalog
+        self.data.catalog = pd.concat(results)
+        self.data.catalog.sort_values('id', inplace=True)
+
+
     # TODO: fix units in documentation
     def get_spectrum(self,
                      temp_s: float,  # in K
@@ -659,12 +719,6 @@ def get_signal(self,
             Returns the noise contribution in [photons]
         """
 
-        # TODO: remove by 2024
-        warn('The get_spectrum function was implemented with a major bug between versions 0.2.16 '
-             'and 0.2.24 in which the noise level was twice as large as the correct value. If '
-             'you created results with the versions in question, please validate them with the '
-             'latest version of LIFEsim.')
-
         # options are applied before the simulation run
         self.apply_options()
 
@@ -768,78 +822,66 @@ def get_signal(self,
 
         return signal, flux_planet
 
+def mp_runner(bus,
+              catalog,
+              save_mode: bool = False):
+    bus = deepcopy(bus)
+    bus.data.catalog = catalog
+    bus.get_snr_single_processing(save_mode=save_mode,
+                                  verbose=False)
+    return bus.data.catalog
+
+def balanced_partition_greedy(occ: Sequence[int],
+                                    n_groups: int,
+                                    items: Optional[Sequence[Any]] = None) -> List[List[Any]]:
+    """
+    Greedy multi-way partition using NumPy for faster min selection.
+    Assigns the largest items first to the current smallest-sum group.
 
-def multiprocessing_runner(input_dict: dict):
-    # create mask returning only unique stars
-    universes = np.unique(
-        input_dict['catalog'].nuniverse[input_dict['catalog'].nstar == input_dict['nstar']],
-        return_index=False
-    )
-
-    # get transmission map
-    _, _, self.data.inst['t_map'], _, _ = self.run_socket(s_name='transmission',
-                                                          method='transmission_map',
-                                                          map_selection='tm3')
-
-    for nuniverse in universes:
-        inst.z = input_dict['catalog'][np.logical_and(input_dict['catalog'].nstar == input_dict['nstar'],
-                                                  input_dict['catalog'].nuniverse == nuniverse)].z.iloc[0]
-
-        # redo calculation for exozodi
-        inst.create_exozodi()
-        inst.sensitivity_coefficients(exozodi_only=True)
-        inst.fundamental_noise(exozodi_only=True)
-
-        # go through all planets for the chosen star
-        for _, n_p in enumerate(np.argwhere(
-                np.logical_and(input_dict['catalog'].nstar.to_numpy() == input_dict['nstar'],
-                               input_dict['catalog'].nuniverse.to_numpy() == nuniverse))[:, 0]):
-            inst.temp_planet = input_dict['catalog']['temp_p'].iloc[n_p]
-            inst.radius_planet = input_dict['catalog']['radius_p'].iloc[n_p]
-            inst.separation_planet = (input_dict['catalog']['angsep'].iloc[n_p]
-                                      * input_dict['catalog']['distance_s'].iloc[n_p])
-
-            # ----- must be repeated for every planet -----
-            inst.create_planet(force=True)
-            inst.planet_signal()
-
-            if (inst.chopping == 'nchop'):
-                inst.sn_nchop()
-            else:
-                inst.sn_chop()
-
-            # save baseline
-            input_dict['catalog']['baseline'].iat[n_p] = input_dict['baseline']
-
-            # save snr results
-            if (inst.chopping == 'nchop'):
-                input_dict['catalog'].t_rot.iat[n_p] = input_dict['integration_time']
-                input_dict['catalog'].signal.iat[n_p] = inst.photon_rates.loc['signal', 'nchop'].sum()
-                input_dict['catalog'].photon_noise.iat[n_p] = (
-                    np.sqrt((inst.photon_rates.loc['pn', 'nchop'] ** 2).sum()))
-                input_dict['catalog'].systematic_noise.iat[n_p] = (
-                    np.sqrt((inst.photon_rates.loc['sn', 'nchop'] ** 2).sum()))
-            else:
-                input_dict['catalog'].t_rot.iat[n_p] = input_dict['integration_time']
-                input_dict['catalog'].signal.iat[n_p] = inst.photon_rates.loc['signal', 'chop'].sum()
-                input_dict['catalog'].photon_noise.iat[n_p] = (
-                    np.sqrt((inst.photon_rates.loc['pn', 'chop'] ** 2).sum()))
-                input_dict['catalog'].systematic_noise.iat[n_p] = (
-                    np.sqrt((inst.photon_rates.loc['sn', 'chop'] ** 2).sum()))
-
-            if input_dict['safe_mode']:
-                if (inst.chopping == 'nchop'):
-                    input_dict['noise_catalog'].loc[input_dict['catalog']['id'].iloc[n_p]] = (
-                        inst.photon_rates.nchop)
-                else:
-                    input_dict['noise_catalog'].loc[input_dict['catalog']['id'].iloc[n_p]] = (
-                        inst.photon_rates.chop)
-
-    return_dict = {'catalog': input_dict['catalog']}
-    if input_dict['safe_mode']:
-        return_dict['noise_catalog'] = input_dict['noise_catalog']
-
-    return return_dict
+    Parameters
+    ----------
+    occ : Sequence[int]
+        Sequence containing the weight (e.g. number of sub-objects) for each item.
+    n_groups : int
+        Number of groups to partition into. Must be >= 1.
+    items : Optional[Sequence[Any]]
+        Sequence of items corresponding to `occ`. If ``None``, the function will use the
+        indices ``range(len(occ))`` as the items.
+
+    Returns
+    -------
+    List[List[Any]]
+        List of length ``n_groups`` where each element is a list of the assigned items
+        (or indices if ``items`` was ``None``). Group totals are balanced using a greedy
+        heuristic; some groups may remain empty if ``n_groups`` > ``len(occ)``.
+
+    Raises
+    ------
+    ValueError
+        If ``n_groups`` < 1 or if ``items`` is provided but its length does not match ``occ``.
+    """
+    if n_groups <= 0:
+        raise ValueError("n_groups must be >= 1")
+    n = len(occ)
+    if items is None:
+        items = list(range(n))
+    if len(items) != n:
+        raise ValueError("items and occ must have same length")
+
+    occ_arr = np.asarray(occ, dtype=np.int64)
+    # sort indices by descending weight (largest first)
+    idxs = np.argsort(-occ_arr)
+
+    groups: List[List[Any]] = [[] for _ in range(n_groups)]
+    sums = np.zeros(n_groups, dtype=np.int64)
+
+    # assign each item to the group with the smallest current sum
+    for i in idxs:
+        g = int(np.argmin(sums))  # fast C-level operation
+        groups[g].append(items[int(i)])
+        sums[g] += int(occ_arr[int(i)])
+
+    return groups
 
 
 
diff --git a/lifesim/util/options.py b/lifesim/util/options.py
index 715c6d9..0a8607c 100644
--- a/lifesim/util/options.py
+++ b/lifesim/util/options.py
@@ -76,7 +76,8 @@ def __init__(self):
                       'n_plugins': 0,
                       'output_path': None,
                       'output_filename': None,
-                      'fov_threshold': 0.}
+                      'fov_threshold': 0.,
+                      'n_cpu': 1}
 
         self.models = {'localzodi': '',
                        'habitable': '',
@@ -120,8 +121,7 @@ def set_scenario(self,
 
         self.optimization['N_pf'] = 25
         self.optimization['snr_target'] = 7
-        self.optimization['limit'] = {'A': np.inf,
-                                      'F': np.inf,
+        self.optimization['limit'] = {'F': np.inf,
                                       'G': np.inf,
                                       'K': np.inf,
                                       'M': np.inf}

From d9b52221c02f3148962cf52682ae36c4fd009188 Mon Sep 17 00:00:00 2001
From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com>
Date: Tue, 4 Nov 2025 17:15:58 +0100
Subject: [PATCH 05/27] added science yield wrapper

---
 lifesim/__init__.py               |   4 +-
 lifesim/analysis/yield_wrapper.py | 107 ++++++++++++++++++++++++++++++
 2 files changed, 110 insertions(+), 1 deletion(-)
 create mode 100644 lifesim/analysis/yield_wrapper.py

diff --git a/lifesim/__init__.py b/lifesim/__init__.py
index 0f813c4..c61fa63 100644
--- a/lifesim/__init__.py
+++ b/lifesim/__init__.py
@@ -13,4 +13,6 @@
 from lifesim.optimize.optimizer import Optimizer
 from lifesim.optimize.ahgs import AhgsModule
 
-from lifesim.gui.spectrum_gui import Gui
\ No newline at end of file
+from lifesim.gui.spectrum_gui import Gui
+
+from lifesim.analysis.yield_wrapper import ScienceYield
diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py
new file mode 100644
index 0000000..e6d4705
--- /dev/null
+++ b/lifesim/analysis/yield_wrapper.py
@@ -0,0 +1,107 @@
+import time
+import shutil
+import os
+
+import numpy as np
+
+import lifesim
+
+class ScienceYield:
+    def __init__(self,
+                 config_path,
+                 catalog_path,
+                 output_path,
+                 n_cpu: int = 1):
+        self.config_path = config_path
+        self.catalog_path = catalog_path
+        self.output_path = output_path
+        self.n_cpu = n_cpu
+
+    def compute_yield(self,
+                      output_path,
+                      output_filename,
+                      run_maxsep):
+
+        print('START OF RUN: ', time.ctime())
+        print('RUN NAME: ', output_filename)
+        print('----------------------------')
+
+        t = time.time()
+        # create bus
+        bus = lifesim.Bus()
+
+        # setting the options
+        bus.build_from_config(filename=self.config_path)
+        bus.data.options.set_manual(n_cpu=self.n_cpu) # speed up calculation
+
+        bus.data.options.set_manual(
+            output_path=output_path)
+        bus.data.options.set_manual(output_filename=output_filename)
+
+
+        # ---------- Loading the Catalog ----------
+        bus.data.import_catalog(input_path=self.catalog_path)
+
+        # ---------- Creating the Instrument ----------
+
+        # create modules and add to bus
+        instrument = lifesim.Instrument(name='inst')
+        bus.add_module(instrument)
+
+        transm = lifesim.TransmissionMap(name='transm')
+        bus.add_module(transm)
+
+        exo = lifesim.PhotonNoiseExozodi(name='exo')
+        bus.add_module(exo)
+        local = lifesim.PhotonNoiseLocalzodi(name='local')
+        bus.add_module(local)
+        star = lifesim.PhotonNoiseStar(name='star')
+        bus.add_module(star)
+
+        # connect all modules
+        bus.connect(('inst', 'transm'))
+        bus.connect(('inst', 'exo'))
+        bus.connect(('inst', 'local'))
+        bus.connect(('inst', 'star'))
+
+        bus.connect(('star', 'transm'))
+
+        if run_maxsep:
+            bus.data.catalog = bus.data.catalog[bus.data.catalog.habitable]
+            bus.data.catalog['angsep'] = bus.data.catalog['maxangsep']
+
+        instrument.get_snr()
+        bus.save()
+
+        del bus
+
+        print('Generation took ', (time.time() - t) / 60, ' minutes to complete.')
+
+    def run_aperture_sweep(self,
+                           mirror_diameters):
+
+        for ndim, diameter in enumerate(mirror_diameters):
+            print('')
+            print('')
+            print('STARTING RUN FOR MIRROR DIAMETER: ', diameter)
+            print('AT TIME: ', time.ctime())
+
+            print('Preparing directories... ', end='')
+
+            output_directory = os.path.join(self.output_path, 'diam_' + str(np.round(diameter, 2)).replace('.', '_'))
+            if not os.path.exists(output_directory):
+                os.makedirs(output_directory)
+
+            print('[Done]')
+
+            print('Commencing base run... ')
+            self.compute_yield(output_path=f'{output_directory}/',
+                      output_filename='sweep_diam_' + str(np.round(diameter, 2)).replace('.', '_'),
+                      run_maxsep=False)
+            print('[Done]')
+
+            print('Commencing maxsep run... ')
+            self.compute_yield(output_path=f'{output_directory}/',
+                               output_filename='sweep_diam_maxsep_' + str(np.round(diameter, 2)).replace('.', '_'),
+                               run_maxsep=True)
+            print('[Done]')

From 82db3aabcdcf33a51105fa4c3ee61c367657c4dd Mon Sep 17 00:00:00 2001
From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com>
Date: Tue, 4 Nov 2025 17:22:26 +0100
Subject: [PATCH 06/27] load catalog from ppop

---
 lifesim/analysis/yield_wrapper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py
index e6d4705..e3daa02 100644
--- a/lifesim/analysis/yield_wrapper.py
+++ b/lifesim/analysis/yield_wrapper.py
@@ -40,7 +40,7 @@ def compute_yield(self,
 
 
         # ---------- Loading the Catalog ----------
-        bus.data.import_catalog(input_path=self.catalog_path)
+        bus.data.catalog_from_ppop(input_path=self.catalog_path)
 
         # ---------- Creating the Instrument ----------
 

From a35c73e9aa5aab8c35012231c19e4ad5f70974de Mon Sep 17 00:00:00 2001
From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com>
Date: Wed, 5 Nov 2025 08:38:39 +0100
Subject: [PATCH 07/27] changed pandas assignment in instrument.py

---
 lifesim/instrument/instrument.py | 36 +++++++++++++++++---------------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/lifesim/instrument/instrument.py b/lifesim/instrument/instrument.py
index 3ee2f95..5fb837b 100644
--- a/lifesim/instrument/instrument.py
+++ b/lifesim/instrument/instrument.py
@@ -330,10 +330,10 @@ def get_snr_single_processing(self,
                                               self.data.catalog.nuniverse == nuniverse))[0][0]
 
                 noise_bg_universe_temp = (noise_bg_universe * self.data.catalog.z.iloc[n_u]
-                                          / self.data.catalog.z.iloc[n])
+                                      / self.data.catalog.z.iloc[n])
 
                 noise_bg = ((noise_bg_star + noise_bg_universe_temp)
-                            * integration_time * self.data.inst['eff_tot'] * 2)
+                        * integration_time * self.data.inst['eff_tot'] * 2)
 
                 # go through all planets for the chosen star
                 for _, n_p in enumerate(np.argwhere(
@@ -370,29 +370,34 @@ def get_snr_single_processing(self,
 
                     # Add up the noise and caluclate the SNR
                     noise = noise_bg + noise_planet
-                    self.data.catalog.snr_1h.iat[n_p] = np.sqrt((flux_planet ** 2 / noise).sum())
+
+                    # use index label to avoid chained assignment / view-copy problems
+                    idx_label = self.data.catalog.index[n_p]
+                    self.data.catalog.loc[idx_label, 'snr_1h'] = np.sqrt((flux_planet ** 2 / noise).sum())
 
                     # save baseline
-                    self.data.catalog['baseline'].iat[n_p] = self.data.inst['bl']
+                    self.data.catalog.loc[idx_label, 'baseline'] = self.data.inst['bl']
 
                     if save_mode:
-                        self.data.catalog.noise_astro.iat[n_p] = [noise_bg]
-                        self.data.catalog.planet_flux_use.iat[n_p] = (
+                        self.data.catalog.loc[idx_label, 'noise_astro'] = [noise_bg]
+                        self.data.catalog.loc[idx_label, 'planet_flux_use'] = (
                             [flux_planet_thermal
                              * integration_time
                              * self.data.inst['eff_tot']
                              * self.data.inst['telescope_area']])
-                        self.data.catalog['photon_rate_planet'].iat[n_p] = (
+                        self.data.catalog.loc[idx_label, 'photon_rate_planet'] = (
                                 flux_planet
                                 / integration_time
                                 / self.data.inst['eff_tot']
                         ).sum()
-                        self.data.catalog['photon_rate_noise'].iat[n_p] = (
+                        self.data.catalog.loc[idx_label, 'photon_rate_noise'] = (
                                 noise
                                 / integration_time
                                 / self.data.inst['eff_tot']
                         ).sum()
 
+        # ...existing code...
+
     def get_snr_multi_processing(self,
                                     save_mode: bool = False):
 
@@ -488,12 +493,6 @@ def get_spectrum(self,
             Returns the noise contribution in [photons]
         """
 
-        # TODO: remove by 2024
-        warn('The get_spectrum function was implemented with a major bug between versions 0.2.16 '
-             'and 0.2.24 in which the noise level was twice as large as the correct value. If '
-             'you created results with the versions in question, please validate them with the '
-             'latest version of LIFEsim.')
-
         # options are applied before the simulation run
         self.apply_options()
 
@@ -719,6 +718,12 @@ def get_signal(self,
             Returns the noise contribution in [photons]
         """
 
+        # TODO: remove by 2024
+        warn('The get_spectrum function was implemented with a major bug between versions 0.2.16 '
+             'and 0.2.24 in which the noise level was twice as large as the correct value. If '
+             'you created results with the versions in question, please validate them with the '
+             'latest version of LIFEsim.')
+
         # options are applied before the simulation run
         self.apply_options()
 
@@ -882,6 +887,3 @@ def balanced_partition_greedy(occ: Sequence[int],
         sums[g] += int(occ_arr[int(i)])
 
     return groups
-
-
-

From 7040fff7f0552b75345ef111d19e084ab41f2488 Mon Sep 17 00:00:00 2001
From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com>
Date: Wed, 5 Nov 2025 16:13:40 +0100
Subject: [PATCH 08/27] added cnvergence test to yield wrapper

---
 lifesim/analysis/yield_wrapper.py | 273 ++++++++++++++++++++++++++++--
 lifesim/optimize/ahgs.py          |  12 +-
 lifesim/optimize/optimizer.py     |  10 +-
 setup.py                          |  10 +-
 4 files changed, 277 insertions(+), 28 deletions(-)

diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py
index e3daa02..d728c80 100644
--- a/lifesim/analysis/yield_wrapper.py
+++ b/lifesim/analysis/yield_wrapper.py
@@ -1,8 +1,16 @@
 import time
 import shutil
 import os
+import contextlib
+from copy import deepcopy
+import json
 
 import numpy as np
+from joblib import Parallel, delayed, parallel_config
+from joblib_progress import joblib_progress
+import pandas as pd
+from tqdm import tqdm
+import matplotlib.pyplot as plt
 
 import lifesim
 
@@ -11,16 +19,18 @@ def __init__(self,
                  config_path,
                  catalog_path,
                  output_path,
-                 n_cpu: int = 1):
+                 n_cpu: int = 1,
+                 cat_from_ppop: bool = True):
         self.config_path = config_path
         self.catalog_path = catalog_path
         self.output_path = output_path
         self.n_cpu = n_cpu
+        self.cat_from_ppop = cat_from_ppop
 
-    def compute_yield(self,
-                      output_path,
-                      output_filename,
-                      run_maxsep):
+    def compute_snrs(self,
+                     output_path,
+                     output_filename,
+                     run_maxsep):
 
         print('START OF RUN: ', time.ctime())
         print('RUN NAME: ', output_filename)
@@ -40,7 +50,10 @@ def compute_yield(self,
 
 
         # ---------- Loading the Catalog ----------
-        bus.data.catalog_from_ppop(input_path=self.catalog_path)
+        if self.cat_from_ppop:
+            bus.data.catalog_from_ppop(input_path=self.catalog_path)
+        else:
+            bus.data.import_catalog(input_path=self.catalog_path)
 
         # ---------- Creating the Instrument ----------
 
@@ -77,8 +90,8 @@ def compute_yield(self,
 
         print('Generation took ', (time.time() - t) / 60, ' minutes to complete.')
 
-    def run_aperture_sweep(self,
-                           mirror_diameters):
+    def run_aperture_sweep_snr(self,
+                               mirror_diameters):
 
         for ndim, diameter in enumerate(mirror_diameters):
             print('')
@@ -95,13 +108,245 @@ def run_aperture_sweep(self,
             print('[Done]')
 
             print('Commencing base run... ')
-            self.compute_yield(output_path=f'{output_directory}/',
-                      output_filename='sweep_diam_' + str(np.round(diameter, 2)).replace('.', '_'),
-                      run_maxsep=False)
+            self.compute_snrs(output_path=f'{output_directory}/',
+                              output_filename='sweep_diam_' + str(np.round(diameter, 2)).replace('.', '_'),
+                              run_maxsep=False)
             print('[Done]')
 
             print('Commencing maxsep run... ')
-            self.compute_yield(output_path=f'{output_directory}/',
-                               output_filename='sweep_diam_maxsep_' + str(np.round(diameter, 2)).replace('.', '_'),
-                               run_maxsep=True)
+            self.compute_snrs(output_path=f'{output_directory}/',
+                              output_filename='sweep_diam_maxsep_' + str(np.round(diameter, 2)).replace('.', '_'),
+                              run_maxsep=True)
             print('[Done]')
+
+    def run_optimizer_sweep(self,
+                            run_name):
+        # get the names of all subdirectories in output_path
+        subdirs = [d for d in os.listdir(self.output_path) if os.path.isdir(os.path.join(self.output_path, d))]
+
+        # check a directory with the name run_name already exists in output_path, otherwise create it
+        final_output_path = os.path.join(self.output_path, run_name)
+        if not os.path.exists(final_output_path):
+            os.makedirs(final_output_path)
+        else:
+            raise ValueError('Directory already exists: ' + final_output_path)
+
+        run_configs = []
+
+        for subdir in subdirs:
+            # make a list of all files ending in .hdf5 in subdir, then keep only the part of the filename before '_catalog.hdf5' and only if the sting does not contain 'maxsep'
+            catalog_files = [f.split('_catalog.hdf5')[0] for f in os.listdir(os.path.join(self.output_path, subdir))
+                                if f.endswith('_catalog.hdf5') and 'maxsep' not in f]
+
+            # create a subdir of the same name in final_output_path, no existence check required
+            output_directory = os.path.join(final_output_path, subdir)
+            os.makedirs(output_directory)
+
+            for catalog_file in catalog_files:
+                if self.n_cpu == 1:
+                    compute_yields_mp(
+                        output_filename=catalog_file,
+                        output_path=output_directory + '/',
+                        catalog_path=os.path.join(self.output_path, subdir, catalog_file + '_catalog.hdf5'),
+                        config_path=self.config_path
+                    )
+                else:
+                    run_configs.append({'output_filename': catalog_file,
+                                        'output_path': output_directory + '/',
+                                        'catalog_path': os.path.join(self.output_path, subdir, catalog_file + '_catalog.hdf5')
+                                        })
+
+        if self.n_cpu > 1:
+            with parallel_config(
+                    backend="loky", inner_max_num_threads=1
+            ), joblib_progress(
+                description="Running SNR calculation in parallel ...",
+                total=len(run_configs),
+            ):
+                Parallel(n_jobs=self.n_cpu)(
+                    delayed(compute_yields_mp)(
+                        output_filename=rc['output_filename'],
+                        output_path=rc['output_path'],
+                        catalog_path=rc['catalog_path'],
+                        config_path=self.config_path
+                    )
+                    for rc in run_configs)
+
+    def run_covergence_test(self,
+                            run_name,
+                            catalog_path,
+                            output_path,
+                            min_universes,
+                            num_steps,
+                            plot=False):
+        if not os.path.exists(output_path):
+            os.makedirs(output_path)
+
+        # determine number universes
+        catalog = pd.read_hdf(catalog_path, key='catalog')
+        total_universes = np.unique(catalog.nuniverse).shape[0]
+        del catalog
+
+        if min_universes > total_universes-1:
+            raise ValueError('min_universes must be less than the total number of universes in the catalog.')
+
+        if num_steps is None:
+            num_steps = total_universes - min_universes + 1
+        elif num_steps > total_universes - min_universes + 1:
+            raise ValueError('num_steps is too large for the given min_universes and total universes in the catalog.')
+
+        uni_sel = np.linspace(min_universes, total_universes, num_steps, dtype=int)
+
+        if self.n_cpu > 1:
+            with parallel_config(
+                    backend="loky", inner_max_num_threads=1
+            ), joblib_progress(
+                description="Running SNR calculation in parallel ...",
+                total=len(uni_sel),
+            ):
+                results = Parallel(n_jobs=self.n_cpu)(
+                    delayed(compute_yields_mp)(
+                        output_filename=run_name + '_nuni_' + str(us),
+                        output_path=output_path,
+                        catalog_path=catalog_path,
+                        config_path=self.config_path,
+                        uni_sel=us,
+                        return_yields=True
+                    )
+                    for us in uni_sel)
+        else:
+            results = []
+            for us in tqdm(uni_sel):
+                res = compute_yields_mp(
+                    output_filename=run_name + '_nuni_' + str(us),
+                    output_path=output_path,
+                    catalog_path=catalog_path,
+                    config_path=self.config_path,
+                    uni_sel=us,
+                    return_yields=True
+                )
+
+                results.append(res)
+
+        experiments = list(results[0][1].keys())
+        yield_convergence = {exp: {'n_universes': [],
+                                   'mean_yield': [],
+                                   'p16_yield': [],
+                                   'p84_yield': []}
+                             for exp in experiments}
+        for res in results:
+            n_uni, yields = res
+            for exp in experiments:
+                yield_convergence[exp]['n_universes'].append(n_uni)
+                yield_convergence[exp]['mean_yield'].append(yields[exp][0])
+                yield_convergence[exp]['p16_yield'].append(yields[exp][1])
+                yield_convergence[exp]['p84_yield'].append(yields[exp][2])
+
+        # save yield_convergence to a json file
+        with open(os.path.join(output_path, run_name + '_yield_convergence.json'), 'w') as f:
+            json.dump(yield_convergence, f, indent=4)
+
+        if plot:
+            fig, ax = plt.subplots(figsize=(8, 6))
+            for exp in experiments:
+                ax.plot(yield_convergence[exp]['n_universes'],
+                        yield_convergence[exp]['mean_yield'],
+                        label=exp)
+                ax.fill_between(yield_convergence[exp]['n_universes'],
+                                yield_convergence[exp]['p16_yield'],
+                                yield_convergence[exp]['p84_yield'],
+                                alpha=0.3)
+            ax.set_xlabel('Number of Universes')
+            ax.set_ylabel('Yield')
+            ax.set_title('Yield Convergence Test')
+            ax.legend()
+            plt.savefig(os.path.join(output_path, run_name + '_yield_convergence.pdf'))
+            plt.close()
+
+
+def get_yields(bus,
+               return_yields=False):
+
+    yields = {}
+
+    for exp in bus.data.options.optimization['experiments'].keys():
+        mask = np.logical_and.reduce((bus.data.catalog.temp_s
+                                      >= bus.data.options.optimization['experiments'][exp]['temp_s_min'],
+                                      bus.data.catalog.temp_s 
+                                      <= bus.data.options.optimization['experiments'][exp]['temp_s_max'],
+                                      bus.data.catalog.radius_p
+                                      >= bus.data.options.optimization['experiments'][exp]['radius_p_min'],
+                                      bus.data.catalog.radius_p
+                                      <= bus.data.options.optimization['experiments'][exp]['radius_p_max'],
+                                      bus.data.catalog.detected
+                                     ))
+        if bus.data.options.optimization['experiments'][exp]['in_HZ']:
+            mask = np.logical_and(mask,
+                                  bus.data.catalog.habitable)
+
+        _, tyield = np.unique(bus.data.catalog[mask].nuniverse, return_counts=True)
+
+        if tyield.size != 0:
+            yields[exp] = [float(np.mean(tyield)), float(np.percentile(tyield, 15.9)), float(np.percentile(tyield, 84.1))]
+        else:
+            yields[exp] = [0.0, 0.0, 0.0]
+
+    return yields
+
+def compute_yields_mp(output_filename,
+                      output_path,
+                      catalog_path,
+                      config_path,
+                      uni_sel=None,
+                      return_yields=False):
+
+    t = time.time()
+    # create bus
+    bus = lifesim.Bus()
+
+    # setting the options
+    bus.build_from_config(filename=config_path)
+    bus.data.options.set_manual(n_cpu=1)  # speed up calculation
+
+    bus.data.options.set_manual(
+        output_path=output_path)
+    bus.data.options.set_manual(output_filename=output_filename)
+
+    # ---------- Loading the Catalog ----------
+    bus.data.import_catalog(input_path=catalog_path)
+
+    if uni_sel is not None:
+        selected_universes = np.random.choice(np.unique(bus.data.catalog.nuniverse), replace=False, size=uni_sel)
+        bus.data.catalog = bus.data.catalog[np.isin(bus.data.catalog.nuniverse, selected_universes)]
+
+    # ---------- Creating the Instrument ----------
+
+    # create modules and add to bus
+    instrument = lifesim.Instrument(name='inst')
+    bus.add_module(instrument)
+
+    transm = lifesim.TransmissionMap(name='transm')
+    bus.add_module(transm)
+
+    # connect all modules
+    bus.connect(('inst', 'transm'))
+
+    # optimizing the result
+    opt = lifesim.Optimizer(name='opt')
+    bus.add_module(opt)
+    ahgs = lifesim.AhgsModule(name='ahgs')
+    bus.add_module(ahgs)
+
+    bus.connect(('transm', 'opt'))
+    bus.connect(('inst', 'opt'))
+    bus.connect(('opt', 'ahgs'))
+
+    with contextlib.redirect_stdout(None):
+        opt.ahgs()
+
+    bus.save()
+
+    if return_yields:
+        yields = get_yields(bus=bus,
+                            return_yields=return_yields)
+        return int(uni_sel), yields
diff --git a/lifesim/optimize/ahgs.py b/lifesim/optimize/ahgs.py
index 742e31c..2dbef2d 100644
--- a/lifesim/optimize/ahgs.py
+++ b/lifesim/optimize/ahgs.py
@@ -55,14 +55,16 @@ def observe_star(self,
                    * np.sqrt(int_actual
                              / (60 * 60)))**2)
 
+            # Changed: use label-based .loc to avoid chained-assignment warnings
             for _, i in enumerate(np.where(mask)[0]):
-                if (not self.data.catalog.detected.iloc[i]) and \
-                        (self.data.catalog.snr_current.iloc[i]
+                idx = self.data.catalog.index[i]
+                if (not self.data.catalog.loc[idx, 'detected']) and \
+                        (self.data.catalog.loc[idx, 'snr_current']
                          >= self.data.options.optimization['snr_target']):
-                    self.data.catalog.detected.iat[i] = True
-                    self.data.catalog.t_detected.iat[i] = deepcopy(self.tot_time)
+                    self.data.catalog.loc[idx, 'detected'] = True
+                    self.data.catalog.loc[idx, 't_detected'] = deepcopy(self.tot_time)
                     exp_cols = [col for col in self.data.catalog.columns if col.startswith('exp_')]
-                    true_experiments = [col[4:] for col in exp_cols if self.data.catalog.at[i, col]]
+                    true_experiments = [col[4:] for col in exp_cols if self.data.catalog.loc[idx, col]]
 
                     for exp in true_experiments:
                         self.data.optm['exp_detected'][exp] += 1
diff --git a/lifesim/optimize/optimizer.py b/lifesim/optimize/optimizer.py
index 9a6cdfb..ef47539 100644
--- a/lifesim/optimize/optimizer.py
+++ b/lifesim/optimize/optimizer.py
@@ -81,12 +81,12 @@ def ahgs(self):
                 rmax = np.inf
             for stype, limit in self.data.options.optimization['limit'].items():
                 self.data.options.optimization['experiments'][stype] = {
-                    'radius_p_min': rmin,
-                    'radius_p_max': rmax,
-                    'temp_s_min': self.data.catalog[self.data.catalog.stype==stype].temp_s.min(),
-                    'temp_s_max': self.data.catalog[self.data.catalog.stype==stype].temp_s.max(),
+                    'radius_p_min': float(rmin),
+                    'radius_p_max': float(rmax),
+                    'temp_s_min': float(self.data.catalog[self.data.catalog.stype==stype].temp_s.min()),
+                    'temp_s_max': float(self.data.catalog[self.data.catalog.stype==stype].temp_s.max()),
                     'in_HZ': self.data.options.optimization['habitable'],
-                    'sample_size': self.data.options.optimization['limit'][stype],
+                    'sample_size': float(self.data.options.optimization['limit'][stype]),
                 }
 
         if self.data.options.optimization['experiments'] is None:
diff --git a/setup.py b/setup.py
index 6f0e885..3c2c276 100644
--- a/setup.py
+++ b/setup.py
@@ -36,14 +36,16 @@ def get_version(rel_path: str) -> str:
                       'matplotlib>=3.7.0',
                       'numpy>=1.24.2',
                       'pandas>=1.5.3',
-                      'PyQt5==5.15.4',
+                      'PyQt5>=5.15.4',
                       'tqdm>=4.64.1',
-                      'tables>=3.8.0',
-                      'GitPython>=3.1.32'],
+                      'GitPython>=3.1.32',
+                      'joblib>=1.5.2',
+                      'joblib-progress>=1.0.6',
+                      'tables>=3.10.2'],
     license='GPLv3',
     zip_safe=False,
     keywords='lifesim',
-    python_requires='~=3.8',
+    python_requires='~=3.12',
     classifiers=[
         'Development Status :: 3 - Alpha',
         'Intended Audience :: Science/Research',

From aa0830d844ebaf3ca745ca69478f383f91683139 Mon Sep 17 00:00:00 2001
From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com>
Date: Wed, 5 Nov 2025 16:31:34 +0100
Subject: [PATCH 09/27] suppress unneeded verbosity of LIFEsim in yield wrapper

---
 lifesim/analysis/yield_wrapper.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py
index d728c80..396295f 100644
--- a/lifesim/analysis/yield_wrapper.py
+++ b/lifesim/analysis/yield_wrapper.py
@@ -343,8 +343,7 @@ def compute_yields_mp(output_filename,
 
     with contextlib.redirect_stdout(None):
         opt.ahgs()
-
-    bus.save()
+        bus.save()
 
     if return_yields:
         yields = get_yields(bus=bus,

From 18c6049db2707c008422e1b304f3be6ea0b7bb87 Mon Sep 17 00:00:00 2001
From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com>
Date: Wed, 5 Nov 2025 16:55:57 +0100
Subject: [PATCH 10/27] fixed bug where diameter was not taken into account

---
 lifesim/analysis/yield_wrapper.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py
index 396295f..d32c02c 100644
--- a/lifesim/analysis/yield_wrapper.py
+++ b/lifesim/analysis/yield_wrapper.py
@@ -30,7 +30,8 @@ def __init__(self,
     def compute_snrs(self,
                      output_path,
                      output_filename,
-                     run_maxsep):
+                     run_maxsep,
+                     diameter=None):
 
         print('START OF RUN: ', time.ctime())
         print('RUN NAME: ', output_filename)
@@ -44,6 +45,9 @@ def compute_snrs(self,
         bus.build_from_config(filename=self.config_path)
         bus.data.options.set_manual(n_cpu=self.n_cpu) # speed up calculation
 
+        if diameter is not None:
+            bus.data.options.set_manual(diameter=diameter)
+
         bus.data.options.set_manual(
             output_path=output_path)
         bus.data.options.set_manual(output_filename=output_filename)
@@ -110,13 +114,15 @@ def run_aperture_sweep_snr(self,
             print('Commencing base run... ')
             self.compute_snrs(output_path=f'{output_directory}/',
                               output_filename='sweep_diam_' + str(np.round(diameter, 2)).replace('.', '_'),
-                              run_maxsep=False)
+                              run_maxsep=False,
+                              diameter=diameter)
             print('[Done]')
 
             print('Commencing maxsep run... ')
             self.compute_snrs(output_path=f'{output_directory}/',
                               output_filename='sweep_diam_maxsep_' + str(np.round(diameter, 2)).replace('.', '_'),
-                              run_maxsep=True)
+                              run_maxsep=True,
+                              diameter=diameter)
             print('[Done]')
 
     def run_optimizer_sweep(self,

From 51644c7ce19cff29309b2c87d68a32090ccb7518 Mon Sep 17 00:00:00 2001
From: Jonah Timothy Hansen <johansen@phys.ethz.ch>
Date: Thu, 27 Nov 2025 09:32:50 +0100
Subject: [PATCH 11/27] current state of my fork

---
 lifesim/core/core.py             |  2 +-
 lifesim/instrument/instrument.py | 12 +++++++++---
 lifesim/optimize/ahgs.py         |  2 +-
 lifesim/optimize/optimizer.py    |  2 +-
 lifesim/util/options.py          | 11 +++++++++--
 5 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/lifesim/core/core.py b/lifesim/core/core.py
index dd5a009..e27174e 100644
--- a/lifesim/core/core.py
+++ b/lifesim/core/core.py
@@ -475,7 +475,7 @@ def write_config(self):
     def build_from_config(self,
                           filename: str):
         with open(filename) as file:
-            config_dict = yaml.load(file, Loader=yaml.FullLoader)
+            config_dict = yaml.load(file, Loader=yaml.UnsafeLoader)
 
         self.data.options.array = convert_to_np(config_dict['array'])
         self.data.options.optimization = convert_to_np(config_dict['optimization'])
diff --git a/lifesim/instrument/instrument.py b/lifesim/instrument/instrument.py
index a3c15a3..e081b6f 100644
--- a/lifesim/instrument/instrument.py
+++ b/lifesim/instrument/instrument.py
@@ -190,9 +190,10 @@ def adjust_bl_to_hz(self,
             # for the origin of the value 0.5.. see Ottiger+2021
             k = 0.589645 * self.data.options.other['wl_optimal'] * 10 ** (-6)
                                     
-        elif opt_model == "Bryson":
-            # Baseline optimisation considering the semi-major axis distribution of the Bryson 21
-            # occurance rates and orbit projection effects, approximated via a polynomial fit to an MC analysis
+        elif opt_model == "Kepler":
+            # Baseline optimisation considering the semi-major axis distribution of the Kepler 
+            # occurance rates (SAG13/Bryson Hab2Max) and orbit projection effects, 
+            # approximated via a polynomial fit to an MC analysis
             sqL = l_sun**0.5
             if sqL > 0.28: #FGKs
                 cs = np.array([-2.5655e-1,2.0727e-1,4.8682e-2,-4.0867e-4,-2.1587e-2,9.3433])*1e-6
@@ -212,6 +213,11 @@ def adjust_bl_to_hz(self,
 
         baseline = k / hz_center_rad
 
+        # If discrete, set baseline to nearest value
+        if self.data.options.models["discrete_baselines"]:
+            bls = np.array(self.data.options.array['bl_discrete'])
+            baseline = bls[np.abs(bls-baseline).argmin()]
+
         self.apply_baseline(baseline=baseline)
 
     def apply_baseline(self,
diff --git a/lifesim/optimize/ahgs.py b/lifesim/optimize/ahgs.py
index 742e31c..ba61af8 100644
--- a/lifesim/optimize/ahgs.py
+++ b/lifesim/optimize/ahgs.py
@@ -64,7 +64,7 @@ def observe_star(self,
                     exp_cols = [col for col in self.data.catalog.columns if col.startswith('exp_')]
                     true_experiments = [col[4:] for col in exp_cols if self.data.catalog.at[i, col]]
 
-                    for exp in true_experiments:
+                    for exp in true_experiments[1:]:
                         self.data.optm['exp_detected'][exp] += 1
         else:
             raise ValueError('Delete mode not implemented for AHGS optimizer.')
diff --git a/lifesim/optimize/optimizer.py b/lifesim/optimize/optimizer.py
index 9a6cdfb..a2d5182 100644
--- a/lifesim/optimize/optimizer.py
+++ b/lifesim/optimize/optimizer.py
@@ -118,7 +118,7 @@ def ahgs(self):
 
                 self.data.optm['hit_limit'][exp] = False
                 self.data.optm['exp_detected'][exp] = 0
-
+        print(self.data.optm)
         self.data.optm['tot_time'] = 0  # in sec
 
         # add new columns to catalog
diff --git a/lifesim/util/options.py b/lifesim/util/options.py
index 10359e6..10af717 100644
--- a/lifesim/util/options.py
+++ b/lifesim/util/options.py
@@ -24,6 +24,8 @@ class Options(object):
             - ``'baseline'`` : Length of the shorter nulling baseline in [m].
             - ``'bl_min'`` : Minimum allowed length of the shorter nulling baseline in [m].
             - ``'bl_max'`` : Maximum allowed length of the shorter nulling baseline in [m].
+            - ``'bl_discrete'``: Array of discrete baselines for use with "discrete" baseline
+              mode in [m].
             - ``'ratio'`` : Ratio between the nulling and the imaging baseline. E.g. if the imaging
               baseline is twice as long as the nulling baseline, the ratio will be 2.
             - ``'t_slew'`` : Slew time required for the array to shift from one target system to
@@ -47,7 +49,9 @@ class Options(object):
             - ``'fov_taper'`` : Model used for tapering the field of view, possible options are
               ``'gaussian'`` and ``'none'``
             - ``'baseline_optimisation'`` : Model used for optimising the baseline, possible options are
-              ``'ref_wl'``, ``'Bryson'`` and ``'Uniform'``
+              ``'ref_wl'``, ``'Kepler'``, ``'Uniform'``.
+            - ``'discrete_baselines'``: Boolean as to whether to use discrete baselines in the simulation
+              given by 'bl_discrete'.
     optimization : dict
         Options concerning the methods used to optimally distribute the observing time.
             - ``'N_pf'`` : Number of sampling locations per orbit.
@@ -71,6 +75,7 @@ def __init__(self):
                       'baseline': 0.,
                       'bl_min': 0.,
                       'bl_max': 0.,
+                      'bl_discrete': [0.],
                       'ratio': 0.,
                       't_slew': 0.,
                       't_efficiency': 0.}
@@ -85,7 +90,8 @@ def __init__(self):
         self.models = {'localzodi': '',
                        'habitable': '',
                        'fov_taper': '',
-                       'baseline_optimisation': ''}
+                       'baseline_optimisation': '',
+                       'discrete_baselines': False}
 
         self.optimization = {'N_pf': 0.,
                              'snr_target': 0.,
@@ -123,6 +129,7 @@ def set_scenario(self,
         self.models['habitable'] = 'MS'
         self.models['fov_taper'] = 'gaussian'
         self.models['baseline_optimisation'] = 'ref_wl'
+        self.models['discrete_baselines'] = False
 
         self.optimization['N_pf'] = 25
         self.optimization['snr_target'] = 7

From 7ec06afe2ad80a64e0d80418146c2aa8dcb34ba3 Mon Sep 17 00:00:00 2001
From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com>
Date: Thu, 4 Dec 2025 16:11:59 +0100
Subject: [PATCH 12/27] documentation for yield wrapper

---
 lifesim/analysis/yield_wrapper.py       |  30 +++--
 lifesim/analysis/yield_wrapper_notes.md | 172 ++++++++++++++++++++++++
 2 files changed, 191 insertions(+), 11 deletions(-)
 create mode 100644 lifesim/analysis/yield_wrapper_notes.md

diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py
index d32c02c..ca85e8b 100644
--- a/lifesim/analysis/yield_wrapper.py
+++ b/lifesim/analysis/yield_wrapper.py
@@ -27,7 +27,7 @@ def __init__(self,
         self.n_cpu = n_cpu
         self.cat_from_ppop = cat_from_ppop
 
-    def compute_snrs(self,
+    def _compute_snrs(self,
                      output_path,
                      output_filename,
                      run_maxsep,
@@ -95,7 +95,13 @@ def compute_snrs(self,
         print('Generation took ', (time.time() - t) / 60, ' minutes to complete.')
 
     def run_aperture_sweep_snr(self,
-                               mirror_diameters):
+                               mirror_diameters,
+                               run_name):
+        final_output_path = os.path.join(self.output_path, run_name)
+        if not os.path.exists(final_output_path):
+            os.makedirs(final_output_path)
+        else:
+            raise ValueError('Directory already exists: ' + final_output_path)
 
         for ndim, diameter in enumerate(mirror_diameters):
             print('')
@@ -105,30 +111,32 @@ def run_aperture_sweep_snr(self,
 
             print('Preparing directories... ', end='')
 
-            output_directory = os.path.join(self.output_path, 'diam_' + str(np.round(diameter, 2)).replace('.', '_'))
+            output_directory = os.path.join(final_output_path, 'diam_' + str(np.round(diameter, 2)).replace('.', '_'))
             if not os.path.exists(output_directory):
                 os.makedirs(output_directory)
 
             print('[Done]')
 
             print('Commencing base run... ')
-            self.compute_snrs(output_path=f'{output_directory}/',
+            self._compute_snrs(output_path=f'{output_directory}/',
                               output_filename='sweep_diam_' + str(np.round(diameter, 2)).replace('.', '_'),
                               run_maxsep=False,
                               diameter=diameter)
             print('[Done]')
 
             print('Commencing maxsep run... ')
-            self.compute_snrs(output_path=f'{output_directory}/',
+            self._compute_snrs(output_path=f'{output_directory}/',
                               output_filename='sweep_diam_maxsep_' + str(np.round(diameter, 2)).replace('.', '_'),
                               run_maxsep=True,
                               diameter=diameter)
             print('[Done]')
 
     def run_optimizer_sweep(self,
-                            run_name):
-        # get the names of all subdirectories in output_path
-        subdirs = [d for d in os.listdir(self.output_path) if os.path.isdir(os.path.join(self.output_path, d))]
+                            run_name,
+                            source_name):
+        source_path = os.path.join(self.output_path, source_name)
+        # get the names of all subdirectories in output_path (which contain subdirectories for different mirror diameters)
+        subdirs = [d for d in os.listdir(source_path) if os.path.isdir(os.path.join(source_path, d))]
 
         # check a directory with the name run_name already exists in output_path, otherwise create it
         final_output_path = os.path.join(self.output_path, run_name)
@@ -141,7 +149,7 @@ def run_optimizer_sweep(self,
 
         for subdir in subdirs:
             # make a list of all files ending in .hdf5 in subdir, then keep only the part of the filename before '_catalog.hdf5' and only if the sting does not contain 'maxsep'
-            catalog_files = [f.split('_catalog.hdf5')[0] for f in os.listdir(os.path.join(self.output_path, subdir))
+            catalog_files = [f.split('_catalog.hdf5')[0] for f in os.listdir(os.path.join(source_path, subdir))
                                 if f.endswith('_catalog.hdf5') and 'maxsep' not in f]
 
             # create a subdir of the same name in final_output_path, no existence check required
@@ -153,13 +161,13 @@ def run_optimizer_sweep(self,
                     compute_yields_mp(
                         output_filename=catalog_file,
                         output_path=output_directory + '/',
-                        catalog_path=os.path.join(self.output_path, subdir, catalog_file + '_catalog.hdf5'),
+                        catalog_path=os.path.join(source_path, subdir, catalog_file + '_catalog.hdf5'),
                         config_path=self.config_path
                     )
                 else:
                     run_configs.append({'output_filename': catalog_file,
                                         'output_path': output_directory + '/',
-                                        'catalog_path': os.path.join(self.output_path, subdir, catalog_file + '_catalog.hdf5')
+                                        'catalog_path': os.path.join(source_path, subdir, catalog_file + '_catalog.hdf5')
                                         })
 
         if self.n_cpu > 1:
diff --git a/lifesim/analysis/yield_wrapper_notes.md b/lifesim/analysis/yield_wrapper_notes.md
new file mode 100644
index 0000000..207e3d4
--- /dev/null
+++ b/lifesim/analysis/yield_wrapper_notes.md
@@ -0,0 +1,172 @@
+# `ScienceYield` Wrapper
+
+The purpose of the science yield wrapper is to collect all functions necessary to run a yield with LIFEsim.
+While detection yields with single catalogs or instrument setups can be quickly run using 
+`instrument.snr()` and `opt.ahgs()`, this wrapper is aimed at
+- yields for varying instrument setups (e.g. aperture size)
+- yields including the characterization yield
+
+Central tools to achieve this are
+- a table of all synthetic planets, to which LIFEsim adds the `snr_1h` column. We call this _snr table_.
+- the optimizer, which based on the `snr_1h` column creates an optimized observing sequence
+
+## Current State
+So far, the ScienceYield wrapper has grown organically. 
+Here, we provide an overview of the current state.
+
+---
+
+## `ScienceYield`
+
+### Constructor
+
+#### `__init__(self, ...)`
+
+**Description:**  
+Sets up the working environment, mainly paths to file locations.
+
+**Arguments:**
+
+| Argument        | Type | Description                                  |
+|-----------------|------|----------------------------------------------|
+| `config_path`   | str  | Path to the config file in .yaml format      |
+| `catalog_path`  | str  | Path to the input catalog                    |
+| `output_path`   | str  | Location where the snr catalogs are saved    |
+| `n_cpu`         | int  | Number of CPUs to be used in multiprocessing |
+| `cat_from_ppop` | bool | True if input catalog is in .txt format      |
+
+**Example:**
+```python
+import lifesim
+ywrap = lifesim.ScienceYield(config_path='/path/CONFIG_251104.yaml',
+                 catalog_path='/path/Bryson2021_hab2low.txt',
+                 output_path='/path/hab2low_500/',
+                 n_cpu=64)
+```
+
+#### `_compute_snrs(self, ...)`
+
+**Description:**  
+Calculates on individual SNR table for one setting of the instrument. 
+Saves a catalog with `snr_1h` and a config file called `output_filename_catalog.hdf5` and `output_filename.yaml` to `output_path`.
+This function is mainly meant to be an internal helper function, it is usually not required to use it.
+
+**Arguments:**
+
+| Argument          | Type | Description                                                                     |
+|-------------------|------|---------------------------------------------------------------------------------|
+| `output_path`     | str  | Path to where this individual run is saved                                      |
+| `output_filename` | str  | Filename of catalog and config table                                            |
+| `run_maxsep`      | bool | If true, all planets will be placed in their maximum separation                 |
+| `diameter`        | int  | Mirror diameter for this run. If set to None, diameter from config file is used |
+
+**Files created:**
+
+| Name / Location                           | Content and Purpose            |
+|-------------------------------------------|--------------------------------|
+| `output_path/output_filename_catalog.hdf5`| snr table for the single run   |
+| `output_path/output_filename.yaml`        |config file for the single run |
+
+**Example:**
+(continued from above)
+```python
+ywrap.compute_snrs(output_path='path/',
+                   output_filename='single_test',
+                   run_maxsep=False,
+                   diameter=None)
+```
+
+#### `run_aperture_sweep_snr(self, ...)`
+
+**Description:**
+One of the key instrument parameters is the sensitivity.
+For LIFE, we usually fix the photon-conversion-efficiency, and the aperture is varied.
+This function creates snr tables for an array of mirror diameters. 
+
+**Arguments:**
+
+| Argument           | Type             | Description                               |
+|--------------------|------------------|-------------------------------------------|
+| `mirror_diameters` | list/np.ndarray  | Array of all mirror sizes to be evaluated |
+| `run_name`         | str              | Name of the run (for file creation)       |
+
+**Files created:**
+
+For every diameter in `mirror_diameters`, the following files will be created (here we take 2.5m as example)
+
+| Name / Location                                                         | Content and Purpose                                                                     |
+|-------------------------------------------------------------------------|-----------------------------------------------------------------------------------------|
+| `self.output_path/run_name/diam_2_5/`                                   | new parent directory is created                                                         |
+| `self.output_path/run_name/diam_2_5/sweep_diam_2_5.yaml`                | _config file_ for run with the actual planet position for the detection phase           |
+| `self.output_path/run_name/diam_2_5/sweep_diam_2_5_catalog.hdf5`        | &uarr; and its _snr catalog_                                                            |
+| `self.output_path/run_name/diam_2_5/sweep_diam_maxsep_2_5.yaml`         | _config file_ for run with the maximum planet separation for the characterization phase |
+| `self.output_path/run_name/diam_2_5/sweep_diam_maxsep_2_5_catalog.hdf5` | &uarr; and its _snr catalog_                                                            |
+
+**Example:**
+(continued from above)
+```python
+ywrap.run_aperture_sweep_snr(mirror_diameters=np.arange(2.0, 5.1, 0.25),
+                             run_name='aperture_size_2_to_5')
+```
+
+#### `run_optimizer_sweep(self, ...)`
+
+**Description:**
+Runs the optimizer on all snr tables in a given source folder.
+
+**Arguments:**
+
+| Argument      | Type | Description                                                           |
+|---------------|------|-----------------------------------------------------------------------|
+| `run_name`    | str  | Name of the run (for file creation)                                   |
+| `source_name` | str  | Name of the folder in self.output_path that contains the _snr tables_ |
+
+**Files created:**
+
+| Name / Location                                                | Content and Purpose                                            |
+|----------------------------------------------------------------|----------------------------------------------------------------|
+| `self.output_path/run_name/`                                   | new parent directory is created, if it does not already exists |
+| `self.output_path/run_name/subdir/`                            | New directory for every directory in the source folder         |
+| `self.output_path/run_name/subdir/sweep_diam_2_5.yaml`         | _config file_ for run of optimizer                             |
+| `self.output_path/run_name/subdir/sweep_diam_2_5_catalog.hdf5` | &uarr; and its _snr catalog_ with optimized sequence           |
+
+**Example:**
+(continued from above)
+```python
+ywrap.run_optimizer_sweep(run_name='opt_aperture_size_2_to_5',
+                          source_name='aperture_size_2_to_5')
+```
+
+---
+
+## Helper Functions
+
+#### `compute_yields_mp()`
+
+**Description:**
+Runs the optimizer on all snr tables in a given source folder.
+
+**Arguments:**
+
+| Argument          | Type     | Description                                                                   |
+|-------------------|----------|-------------------------------------------------------------------------------|
+| `output_filename` | str      | Name of output files                                                          |
+| `output_path`     | str      | Path to which catalog and config file will be saved                           |
+| `catalog_path`    | str      | Path to the input catalog file                                                |
+| `config_path`     | str      | Path to the config file                                                       |
+| `uni_sel`         | int/None | Number of universes to select from all universes. If `None`, all are selected |
+| `return_yields`   | bool     | Set true to return experiment yield of this run                               |
+
+**Files created:**
+
+| Name / Location                           | Content and Purpose              |
+|-------------------------------------------|----------------------------------|
+| `output_path/output_filename_catalog.hdf5`| snr table for the single run     |
+| `output_path/output_filename.yaml`        | config file for the single run   |
+
+**Parameters:**
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `param1`  | Type | Description of param1 |
+| `param2`  | Type | Description of param2 |

From 91637884fcefead4745f13a64eed036977150e52 Mon Sep 17 00:00:00 2001
From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com>
Date: Wed, 10 Dec 2025 17:38:20 +0100
Subject: [PATCH 13/27] significant changes to the yield_wrapper and the
 characterization time optimization

---
 lifesim/analysis/yield_wrapper.py       | 210 ++++++++++++++++++++++--
 lifesim/analysis/yield_wrapper_notes.md |   9 +-
 lifesim/optimize/ahgs.py                | 177 +++++++++++++++-----
 lifesim/optimize/ahgs_char.py           | 170 +++++++++++++++++++
 lifesim/optimize/optimizer.py           |  17 ++
 lifesim/util/options.py                 |   9 +-
 6 files changed, 530 insertions(+), 62 deletions(-)
 create mode 100644 lifesim/optimize/ahgs_char.py

diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py
index ca85e8b..7804244 100644
--- a/lifesim/analysis/yield_wrapper.py
+++ b/lifesim/analysis/yield_wrapper.py
@@ -84,7 +84,27 @@ def _compute_snrs(self,
         bus.connect(('star', 'transm'))
 
         if run_maxsep:
-            bus.data.catalog = bus.data.catalog[bus.data.catalog.habitable]
+            # only run maxsep SNR for planets that are part of an experiment
+            bus.data.catalog['is_interesting'] = False
+            for exp in bus.data.options.optimization['experiments'].keys():
+                mask_exp = ((bus.data.catalog.radius_p
+                             >= bus.data.options.optimization['experiments'][exp]['radius_p_min'])
+                            & (bus.data.catalog.radius_p
+                               <= bus.data.options.optimization['experiments'][exp]['radius_p_max'])
+                            & (bus.data.catalog.temp_s
+                               >= bus.data.options.optimization['experiments'][exp]['temp_s_min'])
+                            & (bus.data.catalog.temp_s
+                               <= bus.data.options.optimization['experiments'][exp]['temp_s_max']))
+
+                if bus.data.options.optimization['experiments'][exp]['in_HZ']:
+                    mask_exp = (mask_exp
+                                & (bus.data.catalog['habitable']))
+
+                bus.data.catalog['exp_' + exp] = mask_exp
+
+                bus.data.catalog['is_interesting'] = np.logical_or(mask_exp, bus.data.catalog['is_interesting'])
+            
+            bus.data.catalog = bus.data.catalog[bus.data.catalog.is_interesting]
             bus.data.catalog['angsep'] = bus.data.catalog['maxangsep']
 
         instrument.get_snr()
@@ -126,14 +146,15 @@ def run_aperture_sweep_snr(self,
 
             print('Commencing maxsep run... ')
             self._compute_snrs(output_path=f'{output_directory}/',
-                              output_filename='sweep_diam_maxsep_' + str(np.round(diameter, 2)).replace('.', '_'),
+                              output_filename='sweep_diam_' + str(np.round(diameter, 2)).replace('.', '_') + '_maxsep',
                               run_maxsep=True,
                               diameter=diameter)
             print('[Done]')
 
     def run_optimizer_sweep(self,
                             run_name,
-                            source_name):
+                            source_name,
+                            characterization: bool = False):
         source_path = os.path.join(self.output_path, source_name)
         # get the names of all subdirectories in output_path (which contain subdirectories for different mirror diameters)
         subdirs = [d for d in os.listdir(source_path) if os.path.isdir(os.path.join(source_path, d))]
@@ -162,12 +183,13 @@ def run_optimizer_sweep(self,
                         output_filename=catalog_file,
                         output_path=output_directory + '/',
                         catalog_path=os.path.join(source_path, subdir, catalog_file + '_catalog.hdf5'),
-                        config_path=self.config_path
+                        config_path=self.config_path,
+                        characterization=characterization
                     )
                 else:
                     run_configs.append({'output_filename': catalog_file,
                                         'output_path': output_directory + '/',
-                                        'catalog_path': os.path.join(source_path, subdir, catalog_file + '_catalog.hdf5')
+                                        'catalog_path': os.path.join(source_path, subdir, catalog_file + '_catalog.hdf5'),
                                         })
 
         if self.n_cpu > 1:
@@ -182,10 +204,172 @@ def run_optimizer_sweep(self,
                         output_filename=rc['output_filename'],
                         output_path=rc['output_path'],
                         catalog_path=rc['catalog_path'],
-                        config_path=self.config_path
+                        config_path=self.config_path,
+                        characterization=characterization
                     )
                     for rc in run_configs)
 
+    def combine_catalog_maxsep(self,
+                               source_name,):
+        source_path = os.path.join(self.output_path, source_name)
+
+        print('START OF combine_catalog_maxsep: ', time.ctime())
+        print('SOURCE NAME: ', source_name)
+        print('SOURCE PATH: ', source_path)
+        t_all = time.time()
+
+        subdirs = [d for d in os.listdir(source_path) if os.path.isdir(os.path.join(source_path, d))]
+
+        for subdir in subdirs:
+            print('--------------------------------------------------')
+            print('Processing subdir: ', subdir)
+            t_sub = time.time()
+
+            # make a list of all files ending in .hdf5 in subdir, then keep only the part of the filename before '_catalog.hdf5' and only if the sting does not contain 'maxsep'
+            base_catalog_files = [f.split('_catalog.hdf5')[0] for f in os.listdir(os.path.join(source_path, subdir))
+                                if f.endswith('_catalog.hdf5') and 'maxsep' not in f]
+
+            if len(base_catalog_files) == 0:
+                print('  No base catalog files found in', os.path.join(source_path, subdir))
+                print('  Skipping subdir.')
+                print('Subdir processing took', round(time.time() - t_sub, 2), 's')
+                continue
+
+            for catalog_file in base_catalog_files:
+                print('  ----------------------------------------------')
+                print('  Processing catalog:', catalog_file)
+                t_cat = time.time()
+
+                base_path = os.path.join(source_path, subdir, catalog_file + '_catalog.hdf5')
+                maxsep_path = os.path.join(source_path, subdir, catalog_file + '_maxsep_catalog.hdf5')
+
+                print('    Reading base catalog from:', base_path)
+                base_catalog = pd.read_hdf(base_path)
+                print('    Base catalog entries:', len(base_catalog))
+
+                print('    Reading maxsep catalog from:', maxsep_path)
+                maxsep_catalog = pd.read_hdf(maxsep_path)
+                print('    Maxsep catalog entries:', len(maxsep_catalog))
+
+                # map and fill missing values
+                base_catalog['maxsep_snr_1h'] = base_catalog['id'].map(maxsep_catalog.set_index('id')['snr_1h'])
+                missing_before = int(base_catalog['maxsep_snr_1h'].isna().sum())
+                base_catalog['maxsep_snr_1h'].fillna(0, inplace=True)
+                print(f'    Mapped maxsep snr_1h, filled {missing_before} missing values with 0')
+
+                print('    Saving combined catalog to:', base_path)
+                base_catalog.to_hdf(base_path, key='catalog', mode='w')
+
+                print('  Done processing', catalog_file, '- took', round(time.time() - t_cat, 2), 's')
+
+            print('Finished subdir:', subdir, '- took', round(time.time() - t_sub, 2), 's')
+
+        print('ALL combine_catalog_maxsep finished. Total time:', round(time.time() - t_all, 2), 's')
+
+    def get_mission_time(self,
+                         catalog_path,
+                         config_path):
+        bus = lifesim.Bus()
+
+        # loading the config and catalog, make sure that the catalog is already combined with maxsep SNRs
+        bus.build_from_config(filename=config_path)
+        bus.data.import_catalog(input_path=catalog_path)
+
+        cat_det = bus.data.catalog[np.logical_and(
+            bus.data.catalog.is_interesting,
+            bus.data.catalog.detected
+        )].sort_values('t_detected')
+
+        # collect the experiments
+        exps = [col[4:] for col in bus.data.catalog.columns if col.startswith('exp_')]
+
+        # set up the time sheet that records the mission time per universe and per experiment
+        time_sheet = {}
+        for exp in exps:
+            time_sheet[exp] = pd.DataFrame(index=np.unique(cat_det.nuniverse),
+                                           columns=['detection', 'orbit', 'characterization', 'total'])
+        t_det = np.zeros((2, len(np.unique(cat_det.nuniverse))))
+        t_det[0, :] = np.unique(cat_det.nuniverse)
+
+        # 1. get total time for detection campaign from every universe
+        for nu in np.unique(cat_det.nuniverse):
+            temp_t_dets = []
+            for exp in exps:
+                det_stream = cat_det[np.logical_and(cat_det.nuniverse == nu, cat_det['exp_' + exp])].t_detected
+                temp_t_det = det_stream.iloc[bus.data.options.optimization['experiments'][exp]['sample_size']] \
+                    if len(det_stream) > 0 else np.nan
+                temp_t_dets.append(temp_t_det)
+                time_sheet[exp].loc[nu, 'detection'] = temp_t_det
+            t_det[1, t_det[0, :] == nu] = np.max(temp_t_dets)
+
+        # 2. identify the follow_up targets for each universe
+        cat_det.sort_values('maxsep_snr_1h', ascending=False, inplace=True)
+        cat_det['follow_up'] = False
+        for nu in np.unique(cat_det.nuniverse):
+            for exp in exps:
+                mask_det = np.logical_and.reduce((cat_det.nuniverse == nu, cat_det['exp_' + exp],
+                                                  cat_det.t_detected <= t_det[1, t_det[0, :] == nu][0]))
+                # only set the top N targets to follow up where N is the sample size for the experiment
+                mask_det_indices = cat_det[mask_det].index[:bus.data.options.optimization['experiments'][exp]['sample_size']]
+                cat_det.loc[mask_det_indices, 'follow_up'] = True
+
+        # 3. calculate follow-up time for orbit and characterization
+        for nu in np.unique(cat_det.nuniverse):
+            for exp in exps:
+                mask_followup = np.logical_and.reduce(
+                    (cat_det.nuniverse == nu, cat_det['exp_' + exp], cat_det.follow_up))
+                time_sheet[exp].loc[nu, 'orbit'] = (((bus.data.options.optimization['snr_target']
+                                                     / cat_det[mask_followup].maxsep_snr_1h) ** 2 *
+                                                    (bus.data.options.optimization['n_orbits'] - 1) * 60 * 60).sum()
+                                                    + np.sum(mask_followup)
+                                                    * (bus.data.options.optimization['n_orbits'] - 1)
+                                                    * bus.data.options.array['t_slew'])
+                time_sheet[exp].loc[nu, 'characterization'] = (((bus.data.options.optimization['snr_char'] / cat_det[
+                    mask_followup].maxsep_snr_1h) ** 2 * 60 * 60).sum()
+                                                               + np.sum(mask_followup)
+                                                               * bus.data.options.array['t_slew'])
+
+        # 4. get total time
+        for exp in exps:
+            time_sheet[exp]['total'] = time_sheet[exp]['detection'] + time_sheet[exp]['orbit'] + time_sheet[exp][
+                'characterization']
+
+        return time_sheet
+
+    def sweep_mission_time(self,
+                           source_name):
+        source_path = os.path.join(self.output_path, source_name)
+        subdirs = [d for d in os.listdir(source_path) if os.path.isdir(os.path.join(source_path, d))]
+
+        for subdir in subdirs:
+            print('--------------------------------------------------')
+            print('Processing subdir: ', subdir)
+            t_sub = time.time()
+
+            # make a list of all files ending in .hdf5 in subdir, then keep only the part of the filename before '_catalog.hdf5' and only if the sting does not contain 'maxsep'
+            run_names = [f.split('_catalog.hdf5')[0] for f in os.listdir(os.path.join(source_path, subdir))
+                                  if f.endswith('_catalog.hdf5')]
+
+            for run_name in run_names:
+                print('  ----------------------------------------------')
+                print('  Processing run: ', run_name)
+                t_run = time.time()
+
+                catalog_path = os.path.join(source_path, subdir, run_name + '_catalog.hdf5')
+                config_path = os.path.join(source_path, subdir, run_name + '.yaml')
+
+                time_sheet = self.get_mission_time(catalog_path=catalog_path,
+                                                  config_path=config_path)
+
+                # save time_sheet to csv files, one per experiment
+                for exp in time_sheet.keys():
+                    output_csv_path = os.path.join(source_path, subdir, run_name + '_' + exp + '_mission_time.csv')
+                    time_sheet[exp].to_csv(output_csv_path)
+                    print('    Saved mission time for experiment', exp, 'to', output_csv_path)
+
+                print('  Done processing run:', run_name, '- took', round(time.time() - t_run, 2), 's')
+
+
     def run_covergence_test(self,
                             run_name,
                             catalog_path,
@@ -312,7 +496,8 @@ def compute_yields_mp(output_filename,
                       catalog_path,
                       config_path,
                       uni_sel=None,
-                      return_yields=False):
+                      return_yields=False,
+                      characterization: bool = False):
 
     t = time.time()
     # create bus
@@ -322,10 +507,9 @@ def compute_yields_mp(output_filename,
     bus.build_from_config(filename=config_path)
     bus.data.options.set_manual(n_cpu=1)  # speed up calculation
 
-    bus.data.options.set_manual(
-        output_path=output_path)
+    bus.data.options.set_manual(output_path=output_path)
     bus.data.options.set_manual(output_filename=output_filename)
-
+    bus.data.options.optimization['characterization'] = characterization
     # ---------- Loading the Catalog ----------
     bus.data.import_catalog(input_path=catalog_path)
 
@@ -355,9 +539,9 @@ def compute_yields_mp(output_filename,
     bus.connect(('inst', 'opt'))
     bus.connect(('opt', 'ahgs'))
 
-    with contextlib.redirect_stdout(None):
-        opt.ahgs()
-        bus.save()
+    # with contextlib.redirect_stdout(None):
+    opt.ahgs()
+    bus.save()
 
     if return_yields:
         yields = get_yields(bus=bus,
diff --git a/lifesim/analysis/yield_wrapper_notes.md b/lifesim/analysis/yield_wrapper_notes.md
index 207e3d4..9802b19 100644
--- a/lifesim/analysis/yield_wrapper_notes.md
+++ b/lifesim/analysis/yield_wrapper_notes.md
@@ -164,9 +164,8 @@ Runs the optimizer on all snr tables in a given source folder.
 | `output_path/output_filename_catalog.hdf5`| snr table for the single run     |
 | `output_path/output_filename.yaml`        | config file for the single run   |
 
-**Parameters:**
 
-| Parameter | Type | Description |
-|-----------|------|-------------|
-| `param1`  | Type | Description of param1 |
-| `param2`  | Type | Description of param2 |
+# What happens in the characterization ahgs
+
+
+
diff --git a/lifesim/optimize/ahgs.py b/lifesim/optimize/ahgs.py
index 2dbef2d..880c031 100644
--- a/lifesim/optimize/ahgs.py
+++ b/lifesim/optimize/ahgs.py
@@ -17,15 +17,39 @@ def obs_array_star(self, nstar):
                                       self.data.catalog.is_interesting))
 
         if not np.any(mask):
-            return np.array((np.inf, np.inf))
+
+            if self.data.options.optimization['characterization']:
+                return np.array(((np.inf, np.inf), (np.inf, np.inf)))
+            else:
+                return np.array((np.inf, np.inf))
         else:
             obs = (60 * 60 *
                    (self.data.options.optimization['snr_target'] ** 2
                     - self.data.catalog['snr_current'].loc[mask] ** 2)
                    / self.data.catalog.snr_1h.loc[mask] ** 2)
-            obs -= self.data.catalog.t_slew.loc[mask]
-            obs = np.sort(obs) / np.arange(1, np.count_nonzero(mask) + 1, 1)
-            return obs
+
+            if self.data.options.optimization['characterization']:
+                obs_char = (60 * 60 *
+                            self.data.options.optimization['snr_char'] ** 2
+                            / self.data.catalog.maxsep_snr_1h.loc[mask] ** 2)
+
+                # characterization time is needed for optimization, detection time is needed to understand how much
+                # mission time was used for one observation
+                met = np.stack((obs, obs_char), axis=0)
+
+                met = met[:, np.argsort(met[0])]
+                met[1] = np.cumsum(met[1])
+                met[0] -= self.data.catalog.t_slew.loc[mask]
+                met[1] += met[0]
+                met = met / np.arange(1, np.count_nonzero(mask) + 1, 1)[np.newaxis, :]
+                a=1
+
+                return met
+            else:
+                obs -= self.data.catalog.t_slew.loc[mask]
+                obs = np.sort(obs) / np.arange(1, np.count_nonzero(mask) + 1, 1)
+
+                return obs
 
     def observe_star(self,
                      nstar,
@@ -65,21 +89,30 @@ def observe_star(self,
                     self.data.catalog.loc[idx, 't_detected'] = deepcopy(self.tot_time)
                     exp_cols = [col for col in self.data.catalog.columns if col.startswith('exp_')]
                     true_experiments = [col[4:] for col in exp_cols if self.data.catalog.loc[idx, col]]
-
                     for exp in true_experiments:
                         self.data.optm['exp_detected'][exp] += 1
+                        self.data.optm['exp_detected_uni'][exp][
+                            1,
+                            self.data.optm['exp_detected_uni'][exp][0, :] == self.data.catalog.loc[idx, 'nuniverse']
+                        ] += 1
         else:
             raise ValueError('Delete mode not implemented for AHGS optimizer.')
 
     def distribute_time(self):
         stars, n = np.unique(ar=self.data.catalog.nstar,
                              return_counts=True)
-        obs = np.zeros((stars.shape[0], np.max(n))) + np.inf
-
-        # fill the observation time array
-        for i, nstar in enumerate(stars):
-            temp = self.obs_array_star(nstar=nstar)
-            obs[i, :temp.shape[0]] = temp
+        if self.data.options.optimization['characterization']:
+            obs = np.zeros((stars.shape[0], 2, np.max(n))) + np.inf
+            # fill the observation time array
+            for i, nstar in enumerate(stars):
+                temp = self.obs_array_star(nstar=nstar)
+                obs[i, :, :temp.shape[1]] = temp
+        else:
+            obs = np.zeros((stars.shape[0], np.max(n))) + np.inf
+            # fill the observation time array
+            for i, nstar in enumerate(stars):
+                temp = self.obs_array_star(nstar=nstar)
+                obs[i, :temp.shape[0]] = temp
 
         obs_time = (self.data.options.optimization['t_search']
                     * self.data.options.array['t_efficiency'])
@@ -88,45 +121,88 @@ def distribute_time(self):
 
         print('Number of planets detected for each experiment:')
 
-        while self.tot_time < obs_time:
-            # find the best global slope and observe star
-            no_star, ind_t = np.unravel_index(np.argmin(obs), obs.shape)
-            if (self.tot_time + obs[no_star, ind_t] * (ind_t + 1) + 0.01) > obs_time:
-                rem_time = obs_time - self.tot_time
-                self.observe_star(nstar=stars[no_star],
-                                  int_time=rem_time)
-                self.tot_time += rem_time
+        run_bool = True
+
+        while run_bool:
+            if self.data.options.optimization['characterization']:
+                # find the best global slope and observe star
+                no_star, ind_t = np.unravel_index(np.argmin(obs[:, 1, :]), obs[:, 1, :].shape)
+                if (((self.tot_time + obs[no_star, 0, ind_t] * (ind_t + 1) + 0.01) > obs_time)
+                        and (self.data.options.optimization['opt_limit'] == 'time')):
+                    rem_time = obs_time - self.tot_time
+                    self.observe_star(nstar=stars[no_star],
+                                      int_time=rem_time)
+                    self.tot_time += rem_time
+                else:
+                    if ((obs[no_star, 1, ind_t] - obs[no_star, 0, ind_t]) * (ind_t + 1) + 0.01) < 0:
+                        raise ValueError('Negative time difference encountered.')
+                    self.observe_star(nstar=stars[no_star],
+                                      int_time=obs[no_star, 0, ind_t] * (ind_t + 1) + 0.01)
+                    temp = self.obs_array_star(nstar=stars[no_star])
+                    self.tot_time += obs[no_star, 0, ind_t] * (ind_t + 1) + 0.01
+                    obs[no_star, :, :] = np.inf
+                    obs[no_star, :, :temp.shape[1]] = temp
             else:
-                self.observe_star(nstar=stars[no_star],
-                                  int_time=obs[no_star, ind_t] * (ind_t + 1) + 0.01)
-                temp = self.obs_array_star(nstar=stars[no_star])
-                self.tot_time += obs[no_star, ind_t] * (ind_t + 1) + 0.01
-                obs[no_star, :] = np.inf
-                obs[no_star, :temp.shape[0]] = temp
+                # find the best global slope and observe star
+                no_star, ind_t = np.unravel_index(np.argmin(obs), obs.shape)
+                if (((self.tot_time + obs[no_star, ind_t] * (ind_t + 1) + 0.01) > obs_time)
+                        and (self.data.options.optimization['opt_limit'] == 'time')):
+                    rem_time = obs_time - self.tot_time
+                    self.observe_star(nstar=stars[no_star],
+                                      int_time=rem_time)
+                    self.tot_time += rem_time
+                else:
+                    self.observe_star(nstar=stars[no_star],
+                                      int_time=obs[no_star, ind_t] * (ind_t + 1) + 0.01)
+                    temp = self.obs_array_star(nstar=stars[no_star])
+                    self.tot_time += obs[no_star, ind_t] * (ind_t + 1) + 0.01
+                    obs[no_star, :] = np.inf
+                    obs[no_star, :temp.shape[0]] = temp
 
             out_string = ''
             for key in self.data.optm['exp_detected'].keys():
                 out_string += (key + ': '
                                + str(self.data.optm['exp_detected'][key] / self.data.optm['num_universe'])
                                + '  ')
-            out_string += ('-  (' + str(np.round(self.tot_time / 60 / 60 / 24 / 365.25, decimals=1)) + ' / '
-                                          + str(np.round(obs_time / 60 / 60 / 24 / 365.25, decimals=1))
-                           + ') yrs observed')
+
+            if self.data.options.optimization['opt_limit'] == 'experiments':
+                out_string += ('-  ' + str(np.round(self.tot_time / 60 / 60 / 24 / 365.25, decimals=1))
+                               + ' yrs observed')
+            else:
+                out_string += ('-  (' + str(np.round(self.tot_time / 60 / 60 / 24 / 365.25, decimals=1)) + ' / '
+                                              + str(np.round(obs_time / 60 / 60 / 24 / 365.25, decimals=1))
+                               + ') yrs observed')
             print('\r' + out_string, end='')
 
-            if any(
-                    self.data.optm['exp_detected'][exp] / self.data.optm['num_universe']
-                    > self.data.options.optimization['experiments'][exp]['sample_size']
-                    and not self.data.optm['hit_limit'][exp]
-                    for exp in self.data.optm['exp_detected']
-            ):
+            # if any(
+            #         self.data.optm['exp_detected'][exp] / self.data.optm['num_universe']
+            #         > self.data.options.optimization['experiments'][exp]['sample_size']
+            #         and not self.data.optm['hit_limit'][exp]
+            #         for exp in self.data.optm['exp_detected']
+            # ):
+            if any([
+                ((self.data.optm['exp_detected_uni'][exp][1, :]
+                 > self.data.options.optimization['experiments'][exp]['sample_size']).sum() >
+                (self.data.options.optimization['opt_limit_factor']
+                        * self.data.optm['num_universe'])) and not self.data.optm['hit_limit'][exp]
+                for exp in self.data.optm['exp_detected_uni']]):
+                # over_limit_experiments = [
+                #     exp for exp in self.data.optm['exp_detected']
+                #     if
+                #     self.data.optm['exp_detected'][exp] > self.data.options.optimization['experiments'][exp][
+                #         'sample_size']
+                #     and not self.data.optm['hit_limit'][exp]
+                # ]
                 over_limit_experiments = [
-                    exp for exp in self.data.optm['exp_detected']
+                    exp for exp in self.data.optm['exp_detected_uni']
                     if
-                    self.data.optm['exp_detected'][exp] > self.data.options.optimization['experiments'][exp][
-                        'sample_size']
+                    (self.data.optm['exp_detected_uni'][exp][1, :]
+                     > self.data.options.optimization['experiments'][exp]['sample_size']).sum() >
+                    (self.data.options.optimization['opt_limit_factor']
+                            * self.data.optm['num_universe'])
                     and not self.data.optm['hit_limit'][exp]
                 ]
+
                 for exp in over_limit_experiments:
                     self.data.optm['hit_limit'][exp] = True
                     self.data.catalog['is_interesting'] = False
@@ -135,16 +211,31 @@ def distribute_time(self):
                                                                            self.data.catalog['exp_' + exp_interesting])
 
                 if self.data.catalog['is_interesting'].sum() == 0:
-                    print('\nAll experiments have been completed, spending remaining mission time on all HZ planets.')
+                    if self.data.options.optimization['opt_limit'] == 'time':
+                        print('\nAll experiments have been completed, spending remaining mission time on all HZ planets.')
                     self.data.catalog['is_interesting'] = self.data.catalog['habitable']
 
                 else:
                     print('\nCompleted experiments: ' + ', '.join(over_limit_experiments)
                           + ', RECOUNTING -------------------')
 
-                obs = np.zeros((stars.shape[0], np.max(n))) + np.inf
-
                 # fill the observation time array
-                for i, nstar in enumerate(stars):
-                    temp = self.obs_array_star(nstar=nstar)
-                    obs[i, :temp.shape[0]] = temp
+                if self.data.options.optimization['characterization']:
+                    obs = np.zeros((stars.shape[0], 2, np.max(n))) + np.inf
+                    # fill the observation time array
+                    for i, nstar in enumerate(stars):
+                        temp = self.obs_array_star(nstar=nstar)
+                        obs[i, :, :temp.shape[1]] = temp
+                else:
+                    obs = np.zeros((stars.shape[0], np.max(n))) + np.inf
+                    # fill the observation time array
+                    for i, nstar in enumerate(stars):
+                        temp = self.obs_array_star(nstar=nstar)
+                        obs[i, :temp.shape[0]] = temp
+
+            if self.data.options.optimization['opt_limit'] == 'time':
+                run_bool = self.tot_time < obs_time
+            elif self.data.options.optimization['opt_limit'] == 'experiments':
+                run_bool = not all(self.data.optm['hit_limit'].values())
+            else:
+                raise ValueError('Optimization limit not recognized.')
\ No newline at end of file
diff --git a/lifesim/optimize/ahgs_char.py b/lifesim/optimize/ahgs_char.py
new file mode 100644
index 0000000..dcb51fc
--- /dev/null
+++ b/lifesim/optimize/ahgs_char.py
@@ -0,0 +1,170 @@
+import numpy as np
+
+from lifesim.core.modules import SlopeModule
+
+class AhgsCharModule(SlopeModule):
+    def __init__(self,
+                 name: str):
+        super().__init__(name=name)
+        self.t_series = []
+        self.yield_series = {'A': [],
+                             'F': [],
+                             'G': [],
+                             'K': [],
+                             'M': []}
+        self.id_series = []
+
+    def obs_array_star(self, nstar):
+        mask = self.data.catalog.nstar == nstar
+
+        # return infinity if the detection limit is reached for this stype
+        if not bool(np.isin(element=self.data.catalog.stype.loc[mask].iloc[0],
+                            test_elements=np.array(list(self.data.options.optimization['limit'].keys()))[np.invert(
+                                self.data.optm['hit_limit'])])):
+            return np.array((np.inf, np.inf))
+        else:
+            mask = np.logical_and.reduce((mask,
+                                          self.data.catalog.habitable,
+                                          np.invert(self.data.catalog.detected)))
+
+            obs = (60 * 60 *
+                   (self.data.options.optimization['snr_target'] ** 2
+                    - self.data.catalog['snr_current'].loc[mask] ** 2)
+                   / self.data.catalog.snr_1h.loc[mask] ** 2)
+
+            obs_char = (60 * 60 *
+                   self.data.options.optimization['snr_char'] ** 2
+                   / self.data.catalog.maxsep_snr_1h.loc[mask] ** 2)
+
+            met = np.stack((obs, obs_char), axis=0)
+            met = met[:, np.argsort(met[0])]
+            met[1] = np.cumsum(met[1])
+            met[0] -= self.data.catalog.t_slew.loc[mask]
+            met[1] += met[0]
+            met = met / np.arange(1, np.count_nonzero(mask) + 1, 1)[np.newaxis, :]
+
+            return met
+
+    def observe_star(self,
+                     nstar,
+                     int_time):
+        mask = self.data.catalog.nstar == nstar
+
+        self.data.optm['tot_time'] += int_time
+
+        slew_time = self.data.catalog.loc[mask, 't_slew'].iloc[0]
+        if not (slew_time == 0):
+            if (slew_time + int_time) < 0:
+                self.data.catalog.loc[mask, 't_slew'] += int_time
+                int_actual = 0
+            else:
+                self.data.catalog.loc[mask, 't_slew'] = 0
+                self.data.catalog.loc[mask, 'int_time'] += (slew_time + int_time)
+                int_actual = slew_time + int_time
+        else:
+            self.data.catalog.loc[mask, 'int_time'] += int_time
+            int_actual = int_time
+
+        self.data.catalog.loc[mask, 'int_time_actual'] += int_actual
+
+        self.data.catalog.loc[mask, 'snr_current'] = np.sqrt(
+            self.data.catalog.loc[mask, 'snr_current'] ** 2
+            + (self.data.catalog.loc[mask, 'snr_1h']
+               * np.sqrt(int_actual
+                         / (60 * 60)))**2)
+
+        # self.data.catalog.loc[mask, 'snr_char_current'] = np.sqrt(
+        #     self.data.catalog.loc[mask, 'snr_char_current'] ** 2
+        #     + (self.data.catalog.loc[mask, 'maxsep_snr_1h']
+        #        * np.sqrt(int_char_time
+        #                  / (60 * 60))) ** 2)
+
+        det_ids = []
+
+        for _, i in enumerate(np.where(mask)[0]):
+            if (not self.data.catalog.detected.iloc[i]) and \
+                    (self.data.catalog.snr_current.iloc[i]
+                     >= self.data.options.optimization['snr_target']):
+                self.data.catalog.detected.iat[i] = True
+                det_ids.append(self.data.catalog.id.iat[i])
+                if self.data.catalog.habitable.iloc[i]:
+                    self.data.optm['sum_detected'][
+                        np.where(np.array(list(self.data.options.optimization['limit'].keys()))
+                                 == self.data.catalog.stype.iloc[i])] += 1
+        self.id_series.append(det_ids)
+
+    def distribute_time(self):
+        if not self.data.options.optimization['habitable']:
+            raise ValueError('Characterization optimization only implemented for habitable planets.')
+        stars, n = np.unique(ar=self.data.catalog.nstar,
+                             return_counts=True)
+        obs = np.zeros((stars.shape[0], 2, np.max(n))) + np.inf
+
+        # fill the observation time array
+        for i, nstar in enumerate(stars):
+            temp = self.obs_array_star(nstar=nstar)
+            obs[i, :, :temp.shape[1]] = temp
+
+        obs_time = (self.data.options.optimization['t_search']
+                    * self.data.options.array['t_efficiency'])
+
+        tot_time = 0
+        if self.data.options.optimization['verbose']:
+            print('Number of planets detected by stellar type:')
+
+        while tot_time < obs_time:
+            # find the best global slope and observe star
+            no_star, ind_t = np.unravel_index(np.argmin(obs[:, 1, :]), obs[:, 1, :].shape)
+            if (tot_time + obs[no_star, 0, ind_t] * (ind_t + 1) + 0.01) > obs_time:
+                rem_time = obs_time - tot_time
+                self.observe_star(nstar=stars[no_star],
+                                  int_time=rem_time)
+                tot_time += rem_time
+
+            else:
+                if ((obs[no_star, 1, ind_t] - obs[no_star, 0, ind_t]) * (ind_t + 1) + 0.01) < 0:
+                    raise ValueError('Negative time difference encountered.')
+                self.observe_star(nstar=stars[no_star],
+                                  int_time=obs[no_star, 0, ind_t] * (ind_t + 1) + 0.01)
+                temp = self.obs_array_star(nstar=stars[no_star])
+                tot_time += obs[no_star, 0, ind_t] * (ind_t + 1) + 0.01
+                obs[no_star, :, :] = np.inf
+                obs[no_star, :, :temp.shape[1]] = temp
+
+            out_string = ''
+            for key in self.data.options.optimization['limit'].keys():
+                temp_yield = (self.data.optm['sum_detected'] / self.data.optm['num_universe'])[
+                    np.where(np.array(list(self.data.options.optimization['limit'].keys()))
+                             == key)][0]
+                out_string += (key + ': '
+                               + str(temp_yield)
+                               + '  ')
+                self.yield_series[key].append(temp_yield)
+
+            out_string += ('-  (' + str(np.round(tot_time/60/60/24/365.25, 1)) + ' / '
+                           + str(np.round(obs_time/60/60/24/365.25, 1)) + ') yrs observed')
+
+            self.t_series.append(tot_time)
+
+            if self.data.options.optimization['verbose']:
+                print('\r' + out_string, end='')
+
+            if np.any(
+                    np.logical_and(
+                        (self.data.optm['sum_detected'] / self.data.optm['num_universe'])
+                        > np.array(list(self.data.options.optimization['limit'].values())),
+                        np.invert(self.data.optm['hit_limit']))):
+                if self.data.options.optimization['verbose']:
+                    print('\n')
+                    print('HIT LIMIT, RECOUNTING -------------------')
+                self.data.optm['hit_limit'] = ((self.data.optm['sum_detected']
+                                                / (self.data.optm['num_universe']))
+                                               >= np.array(
+                            list(self.data.options.optimization['limit'].values())
+                        ))
+                obs = np.zeros((stars.shape[0], 2, np.max(n))) + np.inf
+
+                # fill the observation time array
+                for i, nstar in enumerate(stars):
+                    temp = self.obs_array_star(nstar=nstar)
+                    obs[i, :, :temp.shape[1]] = temp
diff --git a/lifesim/optimize/optimizer.py b/lifesim/optimize/optimizer.py
index ef47539..7ca77da 100644
--- a/lifesim/optimize/optimizer.py
+++ b/lifesim/optimize/optimizer.py
@@ -98,6 +98,7 @@ def ahgs(self):
             self.data.catalog['is_interesting'] = False
             self.data.optm['hit_limit'] = {}
             self.data.optm['exp_detected'] = {}
+            self.data.optm['exp_detected_uni'] = {}
             for exp in self.data.options.optimization['experiments'].keys():
                 mask_exp = ((self.data.catalog.radius_p
                              >= self.data.options.optimization['experiments'][exp]['radius_p_min'])
@@ -118,6 +119,22 @@ def ahgs(self):
 
                 self.data.optm['hit_limit'][exp] = False
                 self.data.optm['exp_detected'][exp] = 0
+                self.data.optm['exp_detected_uni'][exp] = np.zeros((2,
+                                                                    len(np.unique(self.data.catalog.nuniverse))))
+                self.data.optm['exp_detected_uni'][exp][0, :] = np.unique(self.data.catalog.nuniverse,
+                                                                                  return_counts=False)
+
+        if (self.data.options.optimization['characterization']) and ('maxsep_snr_1h' not in self.data.catalog.columns):
+            raise ValueError('Characterization optimization selected but catalog does not contain '
+                             'maxsep_snr_1h column.')
+
+        if ((self.data.options.optimization['opt_limit'] == 'experiments')
+                and not all(np.isfinite([exp['sample_size']
+                                     for exp in self.data.options.optimization['experiments'].values()]))):
+            raise ValueError('Optimization limit set to experiments but no finite limits given.')
+
+        # self.data.optm['uni_counts'] = np.zeros((2, len(np.unique(self.data.catalog.nuniverse))))
+        # self.data.optm['uni_counts'][0, :] = np.unique(self.data.catalog.nuniverse, return_counts=False)
 
         self.data.optm['tot_time'] = 0  # in sec
 
diff --git a/lifesim/util/options.py b/lifesim/util/options.py
index 0a8607c..8ab057f 100644
--- a/lifesim/util/options.py
+++ b/lifesim/util/options.py
@@ -86,7 +86,12 @@ def __init__(self):
         self.optimization = {'N_pf': 0.,
                              'snr_target': 0.,
                              'experiments': None,
-                             't_search': 0.}
+                             't_search': 0.,
+                             'characterization': False,
+                             'snr_char': 0.,
+                             'opt_limit': 'time',
+                             'opt_limit_factor': 0.5,
+                             'n_orbits': 1}
 
     def set_scenario(self,
                      case: str):
@@ -121,6 +126,8 @@ def set_scenario(self,
 
         self.optimization['N_pf'] = 25
         self.optimization['snr_target'] = 7
+        self.optimization['snr_char'] = 44.1
+        self.optimization['n_orbits'] = 5
         self.optimization['limit'] = {'F': np.inf,
                                       'G': np.inf,
                                       'K': np.inf,

From 4459e9a9bc5dd9e262c9a24ca380e70058c47dfd Mon Sep 17 00:00:00 2001
From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com>
Date: Fri, 12 Dec 2025 09:34:18 +0100
Subject: [PATCH 14/27] included failsafe if there are insufficient targets to
 observe

---
 lifesim/analysis/yield_wrapper.py | 4 ++--
 lifesim/optimize/ahgs.py          | 3 +++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py
index 7804244..e9a9a3c 100644
--- a/lifesim/analysis/yield_wrapper.py
+++ b/lifesim/analysis/yield_wrapper.py
@@ -141,14 +141,14 @@ def run_aperture_sweep_snr(self,
             self._compute_snrs(output_path=f'{output_directory}/',
                               output_filename='sweep_diam_' + str(np.round(diameter, 2)).replace('.', '_'),
                               run_maxsep=False,
-                              diameter=diameter)
+                              diameter=float(diameter))
             print('[Done]')
 
             print('Commencing maxsep run... ')
             self._compute_snrs(output_path=f'{output_directory}/',
                               output_filename='sweep_diam_' + str(np.round(diameter, 2)).replace('.', '_') + '_maxsep',
                               run_maxsep=True,
-                              diameter=diameter)
+                              diameter=float(diameter))
             print('[Done]')
 
     def run_optimizer_sweep(self,
diff --git a/lifesim/optimize/ahgs.py b/lifesim/optimize/ahgs.py
index 880c031..556e354 100644
--- a/lifesim/optimize/ahgs.py
+++ b/lifesim/optimize/ahgs.py
@@ -127,6 +127,9 @@ def distribute_time(self):
             if self.data.options.optimization['characterization']:
                 # find the best global slope and observe star
                 no_star, ind_t = np.unravel_index(np.argmin(obs[:, 1, :]), obs[:, 1, :].shape)
+                if not np.isfinite(obs[no_star, 1, ind_t]):
+                    print('Not sufficient targets remaining to continue characterization optimization.')
+                    break
                 if (((self.tot_time + obs[no_star, 0, ind_t] * (ind_t + 1) + 0.01) > obs_time)
                         and (self.data.options.optimization['opt_limit'] == 'time')):
                     rem_time = obs_time - self.tot_time

From b3d0829a3e13e0a4f1544015b0b744aa90429143 Mon Sep 17 00:00:00 2001
From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com>
Date: Fri, 12 Dec 2025 11:27:44 +0100
Subject: [PATCH 15/27] changed logic for completion fraction in optimization
 runs.

---
 lifesim/analysis/yield_wrapper.py | 57 +++++++++++++++++++++++++------
 1 file changed, 46 insertions(+), 11 deletions(-)

diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py
index e9a9a3c..6cab646 100644
--- a/lifesim/analysis/yield_wrapper.py
+++ b/lifesim/analysis/yield_wrapper.py
@@ -4,6 +4,7 @@
 import contextlib
 from copy import deepcopy
 import json
+from typing import Union
 
 import numpy as np
 from joblib import Parallel, delayed, parallel_config
@@ -154,7 +155,8 @@ def run_aperture_sweep_snr(self,
     def run_optimizer_sweep(self,
                             run_name,
                             source_name,
-                            characterization: bool = False):
+                            characterization: bool = False,
+                            opt_limit_factor: Union[None, float] = None):
         source_path = os.path.join(self.output_path, source_name)
         # get the names of all subdirectories in output_path (which contain subdirectories for different mirror diameters)
         subdirs = [d for d in os.listdir(source_path) if os.path.isdir(os.path.join(source_path, d))]
@@ -184,7 +186,8 @@ def run_optimizer_sweep(self,
                         output_path=output_directory + '/',
                         catalog_path=os.path.join(source_path, subdir, catalog_file + '_catalog.hdf5'),
                         config_path=self.config_path,
-                        characterization=characterization
+                        characterization=characterization,
+                        opt_limit_factor=opt_limit_factor
                     )
                 else:
                     run_configs.append({'output_filename': catalog_file,
@@ -205,7 +208,8 @@ def run_optimizer_sweep(self,
                         output_path=rc['output_path'],
                         catalog_path=rc['catalog_path'],
                         config_path=self.config_path,
-                        characterization=characterization
+                        characterization=characterization,
+                        opt_limit_factor=opt_limit_factor
                     )
                     for rc in run_configs)
 
@@ -275,19 +279,39 @@ def get_mission_time(self,
         bus.build_from_config(filename=config_path)
         bus.data.import_catalog(input_path=catalog_path)
 
+        # collect the experiments
+        exps = [col[4:] for col in bus.data.catalog.columns if col.startswith('exp_')]
+        
+        # recalculate the interesting flag
+        bus.data.catalog['is_interesting'] = False
+        for exp in exps:
+            mask_exp = ((bus.data.catalog.radius_p
+                         >= bus.data.options.optimization['experiments'][exp]['radius_p_min'])
+                        & (bus.data.catalog.radius_p
+                           <= bus.data.options.optimization['experiments'][exp]['radius_p_max'])
+                        & (bus.data.catalog.temp_s
+                           >= bus.data.options.optimization['experiments'][exp]['temp_s_min'])
+                        & (bus.data.catalog.temp_s
+                           <= bus.data.options.optimization['experiments'][exp]['temp_s_max']))
+
+            if bus.data.options.optimization['experiments'][exp]['in_HZ']:
+                mask_exp = (mask_exp
+                            & (bus.data.catalog['habitable']))
+
+            bus.data.catalog['exp_' + exp] = mask_exp
+
+            bus.data.catalog['is_interesting'] = np.logical_or(mask_exp, bus.data.catalog['is_interesting'])
+
         cat_det = bus.data.catalog[np.logical_and(
             bus.data.catalog.is_interesting,
             bus.data.catalog.detected
         )].sort_values('t_detected')
 
-        # collect the experiments
-        exps = [col[4:] for col in bus.data.catalog.columns if col.startswith('exp_')]
-
         # set up the time sheet that records the mission time per universe and per experiment
         time_sheet = {}
         for exp in exps:
             time_sheet[exp] = pd.DataFrame(index=np.unique(cat_det.nuniverse),
-                                           columns=['detection', 'orbit', 'characterization', 'total'])
+                                           columns=['detection', 'orbit', 'characterization', 'total', 'number'])
         t_det = np.zeros((2, len(np.unique(cat_det.nuniverse))))
         t_det[0, :] = np.unique(cat_det.nuniverse)
 
@@ -296,11 +320,18 @@ def get_mission_time(self,
             temp_t_dets = []
             for exp in exps:
                 det_stream = cat_det[np.logical_and(cat_det.nuniverse == nu, cat_det['exp_' + exp])].t_detected
-                temp_t_det = det_stream.iloc[bus.data.options.optimization['experiments'][exp]['sample_size']] \
-                    if len(det_stream) > 0 else np.nan
+                if len(det_stream) == 0:
+                    temp_t_det = np.nan
+                    time_sheet[exp].loc[nu, 'number'] = 0
+                elif len(det_stream) < bus.data.options.optimization['experiments'][exp]['sample_size']:
+                    temp_t_det = det_stream.iloc[-1]
+                    time_sheet[exp].loc[nu, 'number'] = len(det_stream)
+                else:
+                    temp_t_det = det_stream.iloc[bus.data.options.optimization['experiments'][exp]['sample_size']-1]
+                    time_sheet[exp].loc[nu, 'number'] = bus.data.options.optimization['experiments'][exp]['sample_size']
                 temp_t_dets.append(temp_t_det)
                 time_sheet[exp].loc[nu, 'detection'] = temp_t_det
-            t_det[1, t_det[0, :] == nu] = np.max(temp_t_dets)
+            t_det[1, t_det[0, :] == nu] = np.nanmax(temp_t_dets)
 
         # 2. identify the follow_up targets for each universe
         cat_det.sort_values('maxsep_snr_1h', ascending=False, inplace=True)
@@ -497,7 +528,8 @@ def compute_yields_mp(output_filename,
                       config_path,
                       uni_sel=None,
                       return_yields=False,
-                      characterization: bool = False):
+                      characterization: bool = False,
+                      opt_limit_factor: Union[None, float] = None):
 
     t = time.time()
     # create bus
@@ -507,6 +539,9 @@ def compute_yields_mp(output_filename,
     bus.build_from_config(filename=config_path)
     bus.data.options.set_manual(n_cpu=1)  # speed up calculation
 
+    if opt_limit_factor is not None:
+        bus.data.options.optimization['opt_limit_factor'] = opt_limit_factor
+
     bus.data.options.set_manual(output_path=output_path)
     bus.data.options.set_manual(output_filename=output_filename)
     bus.data.options.optimization['characterization'] = characterization

From 655c91677f1aa4601ff7c791907067f7cf599d42 Mon Sep 17 00:00:00 2001
From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com>
Date: Fri, 12 Dec 2025 17:08:35 +0100
Subject: [PATCH 16/27] small fix to make sure that planets occurring in
 multiple experiments are not counted double

---
 lifesim/analysis/yield_wrapper.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py
index 6cab646..4d305ca 100644
--- a/lifesim/analysis/yield_wrapper.py
+++ b/lifesim/analysis/yield_wrapper.py
@@ -307,6 +307,8 @@ def get_mission_time(self,
             bus.data.catalog.detected
         )].sort_values('t_detected')
 
+        cat_det['char_done'] = False
+
         # set up the time sheet that records the mission time per universe and per experiment
         time_sheet = {}
         for exp in exps:
@@ -348,7 +350,8 @@ def get_mission_time(self,
         for nu in np.unique(cat_det.nuniverse):
             for exp in exps:
                 mask_followup = np.logical_and.reduce(
-                    (cat_det.nuniverse == nu, cat_det['exp_' + exp], cat_det.follow_up))
+                    (cat_det.nuniverse == nu, cat_det['exp_' + exp], cat_det.follow_up, ~cat_det.char_done
+                     ))
                 time_sheet[exp].loc[nu, 'orbit'] = (((bus.data.options.optimization['snr_target']
                                                      / cat_det[mask_followup].maxsep_snr_1h) ** 2 *
                                                     (bus.data.options.optimization['n_orbits'] - 1) * 60 * 60).sum()
@@ -359,6 +362,7 @@ def get_mission_time(self,
                     mask_followup].maxsep_snr_1h) ** 2 * 60 * 60).sum()
                                                                + np.sum(mask_followup)
                                                                * bus.data.options.array['t_slew'])
+                cat_det.loc[mask_followup, 'char_done'] = True
 
         # 4. get total time
         for exp in exps:

From ead1a81e015329705f57027d2cf956fd7e10023f Mon Sep 17 00:00:00 2001
From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com>
Date: Fri, 12 Dec 2025 18:06:13 +0100
Subject: [PATCH 17/27] number of detections were computed wrong

---
 lifesim/analysis/yield_wrapper.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py
index 4d305ca..a4d5cb5 100644
--- a/lifesim/analysis/yield_wrapper.py
+++ b/lifesim/analysis/yield_wrapper.py
@@ -324,16 +324,16 @@ def get_mission_time(self,
                 det_stream = cat_det[np.logical_and(cat_det.nuniverse == nu, cat_det['exp_' + exp])].t_detected
                 if len(det_stream) == 0:
                     temp_t_det = np.nan
-                    time_sheet[exp].loc[nu, 'number'] = 0
                 elif len(det_stream) < bus.data.options.optimization['experiments'][exp]['sample_size']:
                     temp_t_det = det_stream.iloc[-1]
-                    time_sheet[exp].loc[nu, 'number'] = len(det_stream)
                 else:
                     temp_t_det = det_stream.iloc[bus.data.options.optimization['experiments'][exp]['sample_size']-1]
-                    time_sheet[exp].loc[nu, 'number'] = bus.data.options.optimization['experiments'][exp]['sample_size']
                 temp_t_dets.append(temp_t_det)
                 time_sheet[exp].loc[nu, 'detection'] = temp_t_det
             t_det[1, t_det[0, :] == nu] = np.nanmax(temp_t_dets)
+            for exp in exps:
+                det_stream = cat_det[np.logical_and(cat_det.nuniverse == nu, cat_det['exp_' + exp])].t_detected
+                time_sheet[exp].loc[nu, 'number'] = det_stream <= t_det[1, t_det[0, :] == nu]
 
         # 2. identify the follow_up targets for each universe
         cat_det.sort_values('maxsep_snr_1h', ascending=False, inplace=True)

From e3899a1628779a6ebc6b9ee6ff2e4e8119c81dd0 Mon Sep 17 00:00:00 2001
From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com>
Date: Mon, 15 Dec 2025 09:22:23 +0100
Subject: [PATCH 18/27] minor bugfix; sum missing

---
 lifesim/analysis/yield_wrapper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py
index a4d5cb5..75f19c5 100644
--- a/lifesim/analysis/yield_wrapper.py
+++ b/lifesim/analysis/yield_wrapper.py
@@ -333,7 +333,7 @@ def get_mission_time(self,
             t_det[1, t_det[0, :] == nu] = np.nanmax(temp_t_dets)
             for exp in exps:
                 det_stream = cat_det[np.logical_and(cat_det.nuniverse == nu, cat_det['exp_' + exp])].t_detected
-                time_sheet[exp].loc[nu, 'number'] = det_stream <= t_det[1, t_det[0, :] == nu]
+                time_sheet[exp].loc[nu, 'number'] = np.sum(det_stream <= float(t_det[1, t_det[0, :] == nu]))
 
         # 2. identify the follow_up targets for each universe
         cat_det.sort_values('maxsep_snr_1h', ascending=False, inplace=True)

From a6d0510acea0d9180c0e914a5e86bd480a4f41ee Mon Sep 17 00:00:00 2001
From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com>
Date: Tue, 16 Dec 2025 11:21:38 +0100
Subject: [PATCH 19/27] added processing code for the diameter sweeps

---
 lifesim/analysis/yield_wrapper.py | 273 +++++++++++++++++++++++++-----
 lifesim/optimize/ahgs.py          |   2 +-
 2 files changed, 227 insertions(+), 48 deletions(-)

diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py
index 75f19c5..e09de5d 100644
--- a/lifesim/analysis/yield_wrapper.py
+++ b/lifesim/analysis/yield_wrapper.py
@@ -307,67 +307,76 @@ def get_mission_time(self,
             bus.data.catalog.detected
         )].sort_values('t_detected')
 
-        cat_det['char_done'] = False
+        cat_det['follow_up'] = False
+        cat_det['t_orbit'] = 0.
+        cat_det['t_char'] = 0.
+
+        bus.data.catalog['follow_up'] = False
 
         # set up the time sheet that records the mission time per universe and per experiment
-        time_sheet = {}
+        columns = ['detection', 'orbit', 'characterization', 'total']
         for exp in exps:
-            time_sheet[exp] = pd.DataFrame(index=np.unique(cat_det.nuniverse),
-                                           columns=['detection', 'orbit', 'characterization', 'total', 'number'])
-        t_det = np.zeros((2, len(np.unique(cat_det.nuniverse))))
-        t_det[0, :] = np.unique(cat_det.nuniverse)
+            columns.append('n_' + exp)
+        time_sheet = pd.DataFrame(index=np.unique(cat_det.nuniverse),
+                                           columns=columns)
 
         # 1. get total time for detection campaign from every universe
-        for nu in np.unique(cat_det.nuniverse):
-            temp_t_dets = []
-            for exp in exps:
-                det_stream = cat_det[np.logical_and(cat_det.nuniverse == nu, cat_det['exp_' + exp])].t_detected
-                if len(det_stream) == 0:
-                    temp_t_det = np.nan
-                elif len(det_stream) < bus.data.options.optimization['experiments'][exp]['sample_size']:
-                    temp_t_det = det_stream.iloc[-1]
-                else:
-                    temp_t_det = det_stream.iloc[bus.data.options.optimization['experiments'][exp]['sample_size']-1]
-                temp_t_dets.append(temp_t_det)
-                time_sheet[exp].loc[nu, 'detection'] = temp_t_det
-            t_det[1, t_det[0, :] == nu] = np.nanmax(temp_t_dets)
-            for exp in exps:
-                det_stream = cat_det[np.logical_and(cat_det.nuniverse == nu, cat_det['exp_' + exp])].t_detected
-                time_sheet[exp].loc[nu, 'number'] = np.sum(det_stream <= float(t_det[1, t_det[0, :] == nu]))
+        t_det = bus.data.catalog.t_detected.max()
+        time_sheet['detection'] = t_det
 
         # 2. identify the follow_up targets for each universe
         cat_det.sort_values('maxsep_snr_1h', ascending=False, inplace=True)
-        cat_det['follow_up'] = False
         for nu in np.unique(cat_det.nuniverse):
             for exp in exps:
-                mask_det = np.logical_and.reduce((cat_det.nuniverse == nu, cat_det['exp_' + exp],
-                                                  cat_det.t_detected <= t_det[1, t_det[0, :] == nu][0]))
+                mask_det = np.logical_and.reduce((cat_det.nuniverse == nu,
+                                                  cat_det['exp_' + exp],
+                                                  cat_det.t_detected <= t_det))
                 # only set the top N targets to follow up where N is the sample size for the experiment
                 mask_det_indices = cat_det[mask_det].index[:bus.data.options.optimization['experiments'][exp]['sample_size']]
                 cat_det.loc[mask_det_indices, 'follow_up'] = True
+                time_sheet.loc[nu, 'n_' + exp] = len(mask_det_indices)
 
         # 3. calculate follow-up time for orbit and characterization
+        cat_det.loc[cat_det.follow_up, 't_orbit'] = (
+                (((bus.data.options.optimization['snr_target'] / cat_det[cat_det.follow_up].maxsep_snr_1h) ** 2)
+                 * 60 * 60
+                 + bus.data.options.array['t_slew'])
+                * (bus.data.options.optimization['n_orbits'] - 1)
+        )
+
+        cat_det.loc[cat_det.follow_up, 't_char'] = (
+                (((bus.data.options.optimization['snr_char'] / cat_det[cat_det.follow_up].maxsep_snr_1h) ** 2)
+                 * 60 * 60
+                 + bus.data.options.array['t_slew'])
+        )
+
         for nu in np.unique(cat_det.nuniverse):
-            for exp in exps:
-                mask_followup = np.logical_and.reduce(
-                    (cat_det.nuniverse == nu, cat_det['exp_' + exp], cat_det.follow_up, ~cat_det.char_done
-                     ))
-                time_sheet[exp].loc[nu, 'orbit'] = (((bus.data.options.optimization['snr_target']
-                                                     / cat_det[mask_followup].maxsep_snr_1h) ** 2 *
-                                                    (bus.data.options.optimization['n_orbits'] - 1) * 60 * 60).sum()
-                                                    + np.sum(mask_followup)
-                                                    * (bus.data.options.optimization['n_orbits'] - 1)
-                                                    * bus.data.options.array['t_slew'])
-                time_sheet[exp].loc[nu, 'characterization'] = (((bus.data.options.optimization['snr_char'] / cat_det[
-                    mask_followup].maxsep_snr_1h) ** 2 * 60 * 60).sum()
-                                                               + np.sum(mask_followup)
-                                                               * bus.data.options.array['t_slew'])
-                cat_det.loc[mask_followup, 'char_done'] = True
+            mask_followup = np.logical_and.reduce(
+                (cat_det.nuniverse == nu, cat_det.follow_up,
+                 ))
+            time_sheet.loc[nu, 'orbit'] = cat_det.loc[mask_followup, 't_orbit'].sum()
+            time_sheet.loc[nu, 'characterization'] = cat_det.loc[mask_followup, 't_char'].sum()
 
         # 4. get total time
-        for exp in exps:
-            time_sheet[exp]['total'] = time_sheet[exp]['detection'] + time_sheet[exp]['orbit'] + time_sheet[exp][
-                'characterization']
+        time_sheet['total'] = time_sheet['detection'] + time_sheet['orbit'] + time_sheet['characterization']
+
+        # 5. copy to original catalog
+        cols = ['follow_up', 't_orbit', 't_char']
+        mapping_df = cat_det.set_index('id')[cols]
+
+        for col, fill_value, out_type in [
+            ('follow_up', False, bool),
+            ('t_orbit', 0.0, float),
+            ('t_char', 0.0, float),
+        ]:
+            # map and infer object dtypes first
+            s = bus.data.catalog['id'].map(mapping_df[col]).infer_objects(copy=False)
+            # replace missing values without using .fillna
+            s_filled = s.where(s.notna(), other=fill_value)
+            # assign with desired type
+            bus.data.catalog[col] = s_filled.astype(out_type)
+
+        bus.save()
 
         return time_sheet
 
@@ -397,14 +406,184 @@ def sweep_mission_time(self,
                                                   config_path=config_path)
 
                 # save time_sheet to csv files, one per experiment
-                for exp in time_sheet.keys():
-                    output_csv_path = os.path.join(source_path, subdir, run_name + '_' + exp + '_mission_time.csv')
-                    time_sheet[exp].to_csv(output_csv_path)
-                    print('    Saved mission time for experiment', exp, 'to', output_csv_path)
+                output_csv_path = os.path.join(source_path, subdir, run_name + '_mission_time.csv')
+                time_sheet.to_csv(output_csv_path)
+                print('    Saved mission time to ', output_csv_path)
 
                 print('  Done processing run:', run_name, '- took', round(time.time() - t_run, 2), 's')
 
 
+    def process_mission_time(self,
+                             source_name):
+        # Start logging for this processing run (style consistent with other functions)
+        print('START OF process_mission_time: ', time.ctime())
+        source_path = os.path.join(self.output_path, source_name)
+        print('SOURCE NAME: ', source_name)
+        print('SOURCE PATH: ', source_path)
+        t_all = time.time()
+
+        subdirs = [d for d in os.listdir(source_path) if os.path.isdir(os.path.join(source_path, d))]
+        diams_float = [float('.'.join(d.split('_')[1:])) for d in subdirs]
+        diams = ['_'.join(d.split('_')[1:]) for d in subdirs]
+
+        # -- 1. CREATE TYPETABLE --
+        timetable = {}
+
+        for subdir, d in zip(subdirs, diams):
+            csv_file = [f for f in os.listdir(os.path.join(source_path, subdir))
+                        if f.endswith('.csv')]
+            if len(csv_file) != 1:
+                raise ValueError('More than one csv file found in subdir ' + subdir)
+            timetable[d] = pd.read_csv(os.path.join(source_path, subdir, csv_file[0]))
+
+        exps = [c.split('n_')[1] for c in timetable[diams[0]].columns if c.startswith('n_')]
+
+        typetable = pd.DataFrame(columns=['detection', 'orbit', 'char', 'total'], index=diams_float)
+
+        for d, df in zip(diams, diams_float):
+            typetable.loc[df, 'detection'] = np.max(timetable[d]['detection']) * 1.25
+            typetable.loc[df, 'orbit'] = np.mean(timetable[d]['orbit']) * 1.25
+            typetable.loc[df, 'char'] = np.mean(timetable[d]['characterization']) * 1.25
+            typetable.loc[df, 'total'] = typetable.loc[df, 'detection'] + typetable.loc[df, 'orbit'] + typetable.loc[
+                df, 'char']
+
+        typetable.sort_index(inplace=True, ascending=True)
+
+        # -- 2. PLOT TYPETABLE --
+        # fill under stepped line
+        columns = ['detection', 'orbit', 'char']
+        columns_label = ['Detection', 'Orbit', 'Characterization']
+        x = np.asarray(typetable.index, dtype=float)
+        y0 = np.zeros_like(x, dtype=float)
+
+        colors = ['#2066a8', '#3594cc', '#8cc5e3']
+
+        fig, ax = plt.subplots()
+
+        for i in range(len(columns)):
+            y = typetable[columns[i]].to_numpy(dtype=float) / (365.25 * 24 * 60 * 60)
+            # ax.step(x, y+y0, label=columns[i], where='mid', color=colors[i])
+            ax.fill_between(x, y0, y + y0, step='mid', alpha=1, color=colors[i], label=columns_label[i], edgecolor=None)
+
+            y0 += typetable[columns[i]].to_numpy(dtype=float) / (365.25 * 24 * 60 * 60)
+
+        ax.axhline(y=5, color='tab:blue', linestyle='--')
+        ax.set_xlabel('Mirror Diameter (m)')
+        ax.set_ylabel('Time (years)')
+        ax.legend()
+        type_fig_path = os.path.join(self.output_path, source_name, source_name + '_by_type_time.pdf')
+        fig.savefig(type_fig_path)
+        plt.close()
+
+        # Save typetable CSV and report as a single cohesive step
+        typetable_csv_path = os.path.join(self.output_path, source_name, source_name + '_typetable.csv')
+
+        # -- 3. SAVE TYPETABLE --
+        typetable /= (365.25 * 24 * 60 * 60)
+        typetable.to_csv(typetable_csv_path)
+        print('--------------------------------------------------')
+        print('Step 1/2: Typetable generated and saved.')
+        print('  Figure: ', type_fig_path)
+        print('  CSV:    ', typetable_csv_path)
+
+        # -- 4. CREATE EXPTABLE --
+        exptable = {}
+
+        for exp in exps:
+            exptable[exp] = pd.DataFrame(
+                columns=['detection', 'orbit', 'orbit-1s', 'orbit+1s', 'char', 'char-1s', 'char+1s', 'total',
+                         'total-1s', 'total+1s'], index=diams_float)
+            exptable[exp].sort_index(inplace=True)
+        for subdir in subdirs:
+            df = float('.'.join(subdir.split('_')[1:]))
+            catalog_file = [f for f in os.listdir(os.path.join(source_path, subdir))
+                        if f.endswith('.hdf5') and 'maxsep' not in f]
+            if len(catalog_file) != 1:
+                raise ValueError('More than one catalog file found in subdir ' + subdir)
+            config_file = [f for f in os.listdir(os.path.join(source_path, subdir))
+                        if f.endswith('.yaml')]
+
+            bus = lifesim.Bus()
+
+            # loading the config and catalog, make sure that the catalog is already combined with maxsep SNRs
+            bus.build_from_config(
+                filename=os.path.join(source_path, subdir, config_file[0]))
+            bus.data.import_catalog(
+                input_path=os.path.join(source_path, subdir, catalog_file[0]))
+            cat = bus.data.catalog
+
+            for exp in exps:
+                mask = np.logical_and.reduce((cat.detected, cat['exp_' + exp]))
+                exptable[exp].loc[df, 'detection'] = (np.sum(np.unique(cat[mask].int_time)) + len(
+                    np.unique(cat[mask].int_time)) * bus.data.options.array['t_slew']) * 1.25
+
+                mask = np.logical_and.reduce((cat.detected, cat['exp_' + exp], cat.follow_up))
+                temp_t_orbit = []
+                temp_t_char = []
+                temp_t_total = []
+                for nu in np.unique(cat[mask].nuniverse):
+                    temp_t_orbit.append(np.sum(cat[np.logical_and(mask, cat.nuniverse == nu)].t_orbit))
+                    temp_t_char.append(np.sum(cat[np.logical_and(mask, cat.nuniverse == nu)].t_char))
+                    temp_t_total.append(np.sum(cat[np.logical_and(mask, cat.nuniverse == nu)].t_orbit) + np.sum(
+                        cat[np.logical_and(mask, cat.nuniverse == nu)].t_char) + exptable[exp].loc[
+                                            df, 'detection'] * 0.8)
+                exptable[exp].loc[df, 'orbit'] = np.mean(temp_t_orbit) * 1.25
+                exptable[exp].loc[df, 'orbit+1s'] = np.quantile(temp_t_orbit, 0.841) * 1.25
+                exptable[exp].loc[df, 'orbit-1s'] = np.quantile(temp_t_orbit, 0.159) * 1.25
+                exptable[exp].loc[df, 'char'] = np.mean(temp_t_char) * 1.25
+                exptable[exp].loc[df, 'char+1s'] = np.quantile(temp_t_char, 0.841) * 1.25
+                exptable[exp].loc[df, 'char-1s'] = np.quantile(temp_t_char, 0.159) * 1.25
+                exptable[exp].loc[df, 'total'] = np.mean(temp_t_total) * 1.25
+                exptable[exp].loc[df, 'total+1s'] = np.quantile(temp_t_total, 0.841) * 1.25
+                exptable[exp].loc[df, 'total-1s'] = np.quantile(temp_t_total, 0.159) * 1.25
+
+            del bus
+            del cat
+
+        # -- 5. PLOT EXPTABLE --
+        colors = [['#2066a8', '#3594cc', '#8cc5e3'],
+                  ['#a00000', '#c46666', '#d8a6a6'],
+                  ['#1f6f6f', '#54a1a1', '#9fc8c8'], ]
+
+        columns = ['detection', 'orbit', 'char']
+        x = np.asarray(exptable[exps[0]].index, dtype=float)
+        y0 = np.zeros_like(x, dtype=float)
+
+        fig, ax = plt.subplots()
+
+        for j, exp in enumerate(exps):
+            for i in range(len(columns)):
+                y = exptable[exp][columns[i]].to_numpy(dtype=float) / (365.25 * 24 * 60 * 60)
+                # ax.step(x, y+y0, label=columns[i], where='mid', color=colors[i])
+                if i == 0:
+                    ax.fill_between(x, y0, y + y0, step='mid', alpha=1, color=colors[j][i], edgecolor=None, label=exp)
+                else:
+                    ax.fill_between(x, y0, y + y0, step='mid', alpha=1, color=colors[j][i], edgecolor=None)
+                y0 += y
+
+        ax.set_xlabel('Mirror Diameter (m)')
+        ax.set_ylabel('Time (years)')
+        ax.axhline(y=5, color='tab:blue', linestyle='--')
+        ax.legend()
+
+        exptable_fig_path = os.path.join(self.output_path, source_name, source_name + '_by_exp_time.pdf')
+        fig.savefig(exptable_fig_path)
+        plt.close()
+
+        # -- 6. SAVE EXPTABLE --
+        for exp in exps:
+            exptable[exp] /= (365.25 * 24 * 60 * 60)
+            exptable[exp].to_csv(os.path.join(self.output_path, source_name, source_name + '_' + exp + '_exptable.csv'))
+
+        print('--------------------------------------------------')
+        print('Step 2/2: Exptable plotted and CSVs saved.')
+        print('  Figure: ', exptable_fig_path)
+        print('  CSVs:   ', os.path.join(self.output_path, source_name))
+
+        # Final log with elapsed time
+        print('ALL process_mission_time finished. Total time:', round(time.time() - t_all, 2), 's')
+
+
     def run_covergence_test(self,
                             run_name,
                             catalog_path,
diff --git a/lifesim/optimize/ahgs.py b/lifesim/optimize/ahgs.py
index 556e354..6607747 100644
--- a/lifesim/optimize/ahgs.py
+++ b/lifesim/optimize/ahgs.py
@@ -86,7 +86,7 @@ def observe_star(self,
                         (self.data.catalog.loc[idx, 'snr_current']
                          >= self.data.options.optimization['snr_target']):
                     self.data.catalog.loc[idx, 'detected'] = True
-                    self.data.catalog.loc[idx, 't_detected'] = deepcopy(self.tot_time)
+                    self.data.catalog.loc[idx, 't_detected'] = deepcopy(self.tot_time + int_time)
                     exp_cols = [col for col in self.data.catalog.columns if col.startswith('exp_')]
                     true_experiments = [col[4:] for col in exp_cols if self.data.catalog.loc[idx, col]]
                     for exp in true_experiments:

From 1daaeb301838c8d2592caea944d91a7cc96719e7 Mon Sep 17 00:00:00 2001
From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com>
Date: Tue, 6 Jan 2026 09:25:23 +0100
Subject: [PATCH 20/27] fixed a bug that put too much focus on the
 charaterization time (by factor of nuniverse) and added logging
 functionality.

---
 lifesim/analysis/yield_wrapper.py | 116 ++++++++++++++++++++++++++++++
 lifesim/optimize/ahgs.py          |   7 +-
 2 files changed, 121 insertions(+), 2 deletions(-)

diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py
index e09de5d..29761a3 100644
--- a/lifesim/analysis/yield_wrapper.py
+++ b/lifesim/analysis/yield_wrapper.py
@@ -5,6 +5,7 @@
 from copy import deepcopy
 import json
 from typing import Union
+import logging
 
 import numpy as np
 from joblib import Parallel, delayed, parallel_config
@@ -28,6 +29,37 @@ def __init__(self,
         self.n_cpu = n_cpu
         self.cat_from_ppop = cat_from_ppop
 
+        # Create logger that writes to `yield_wrapper_log.txt` in append mode.
+        os.makedirs(self.output_path, exist_ok=True)
+        log_file_path = os.path.join(self.output_path, 'yield_wrapper.log')
+
+        self.logger = logging.getLogger('lifesim.yield_wrapper')
+        self.logger.setLevel(logging.INFO)
+
+        # Avoid duplicating handlers for the same file when multiple instances are created
+        file_path_abs = os.path.abspath(log_file_path)
+        existing_handler = None
+        for h in list(self.logger.handlers):
+            if isinstance(h, logging.FileHandler) and getattr(h, 'baseFilename', None) == file_path_abs:
+                existing_handler = h
+                break
+
+        if existing_handler is None:
+            fh = logging.FileHandler(log_file_path, mode='a')
+            fh.setLevel(logging.INFO)
+            fh.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
+            self.logger.addHandler(fh)
+            self.logger.propagate = False
+
+        # Log instance creation and parameters
+        self.logger.info('%s', '=' * 80)
+        self.logger.info('ScienceYield instance created at: %s', time.ctime())
+        self.logger.info('Parameters:')
+        self.logger.info('  config_path: %s', self.config_path)
+        self.logger.info('  catalog_path: %s', self.catalog_path)
+        self.logger.info('  output_path: %s', self.output_path)
+        self.logger.info('  n_cpu: %s', self.n_cpu)
+
     def _compute_snrs(self,
                      output_path,
                      output_filename,
@@ -118,6 +150,7 @@ def _compute_snrs(self,
     def run_aperture_sweep_snr(self,
                                mirror_diameters,
                                run_name):
+
         final_output_path = os.path.join(self.output_path, run_name)
         if not os.path.exists(final_output_path):
             os.makedirs(final_output_path)
@@ -152,6 +185,27 @@ def run_aperture_sweep_snr(self,
                               diameter=float(diameter))
             print('[Done]')
 
+        # Append single multi-line log summary for this method
+        t_end = time.time()
+        try:
+            diam_dirs = sorted([d for d in os.listdir(final_output_path)
+                                if os.path.isdir(os.path.join(final_output_path, d)) and d.startswith('diam_')])
+            diameters_created = [d.replace('diam_', '').replace('_', '.') for d in diam_dirs]
+        except Exception:
+            diam_dirs = []
+            diameters_created = []
+        msg = f"""run_aperture_sweep_snr summary:
+  run_name: {run_name}
+  requested_mirror_diameters: {mirror_diameters}
+  mirror_directories_found: {diam_dirs}
+  mirror_diameters_reported: {diameters_created}
+  final_output_path: {final_output_path}
+  start_time: {time.ctime(t_end - (t_end - t_end))}  # placeholder, exact start not stored in this scope
+  end_time: {time.ctime(t_end)}
+  elapsed_seconds: {round(0.0, 2)}  # elapsed not measured here to avoid changing existing code flow
+"""
+        self.logger.info(msg)
+
     def run_optimizer_sweep(self,
                             run_name,
                             source_name,
@@ -213,6 +267,32 @@ def run_optimizer_sweep(self,
                     )
                     for rc in run_configs)
 
+        # Append single multi-line log summary for this method
+        t_end = time.time()
+        # derive counts from filesystem to avoid modifying existing logic
+        catalog_counts = 0
+        catalog_list = {}
+        for d in subdirs:
+            files = [f for f in os.listdir(os.path.join(source_path, d)) if f.endswith('_catalog.hdf5') and 'maxsep' not in f]
+            catalog_counts += len(files)
+            catalog_list[d] = files
+        msg = f"""run_optimizer_sweep summary:
+  run_name: {run_name}
+  source_name: {source_name}
+  source_path: {source_path}
+  n_cpu: {self.n_cpu}
+  subdirs_found: {subdirs}
+  total_subdirs_count: {len(subdirs)}
+  total_catalog_files_count: {catalog_counts}
+  catalog_files_by_subdir: {catalog_list}
+  run_configs_queued_parallel: {len(run_configs)}
+  final_output_path: {final_output_path}
+  characterization: {characterization}
+  opt_limit_factor: {opt_limit_factor}
+  end_time: {time.ctime(t_end)}
+"""
+        self.logger.info(msg)
+
     def combine_catalog_maxsep(self,
                                source_name,):
         source_path = os.path.join(self.output_path, source_name)
@@ -270,6 +350,27 @@ def combine_catalog_maxsep(self,
 
         print('ALL combine_catalog_maxsep finished. Total time:', round(time.time() - t_all, 2), 's')
 
+        # Append single multi-line log summary for this method
+        t_end = time.time()
+        # derive file counts from filesystem (no change to processing logic)
+        subdirs_list = [d for d in os.listdir(source_path) if os.path.isdir(os.path.join(source_path, d))]
+        total_base_catalogs = sum(len([f for f in os.listdir(os.path.join(source_path, d))
+                                       if f.endswith('_catalog.hdf5') and 'maxsep' not in f]) for d in subdirs_list)
+        total_maxsep_catalogs = sum(len([f for f in os.listdir(os.path.join(source_path, d))
+                                         if f.endswith('_maxsep_catalog.hdf5')]) for d in subdirs_list)
+        msg = f"""combine_catalog_maxsep summary:
+  source_name: {source_name}
+  source_path: {source_path}
+  subdirs_found: {subdirs_list}
+  subdirs_total: {len(subdirs_list)}
+  total_base_catalog_files: {total_base_catalogs}
+  total_maxsep_catalog_files: {total_maxsep_catalogs}
+  start_time: {time.ctime(t_all)}
+  end_time: {time.ctime(t_end)}
+  elapsed_seconds: {round(t_end - t_all, 2)}
+"""
+        self.logger.info(msg)
+
     def get_mission_time(self,
                          catalog_path,
                          config_path):
@@ -412,6 +513,21 @@ def sweep_mission_time(self,
 
                 print('  Done processing run:', run_name, '- took', round(time.time() - t_run, 2), 's')
 
+        # Append single multi-line log summary for this method
+        t_end = time.time()
+        subdirs_list = [d for d in os.listdir(source_path) if os.path.isdir(os.path.join(source_path, d))]
+        total_runs = sum(len([f for f in os.listdir(os.path.join(source_path, d)) if f.endswith('_catalog.hdf5')]) for d in subdirs_list)
+        mission_csvs = sum(len([f for f in os.listdir(os.path.join(source_path, d)) if f.endswith('_mission_time.csv')]) for d in subdirs_list)
+        msg = f"""sweep_mission_time summary:
+  source_name: {source_name}
+  source_path: {source_path}
+  subdirs_found: {subdirs_list}
+  subdirs_total: {len(subdirs_list)}
+  total_runs_catalogs_found: {total_runs}
+  mission_time_csvs_found: {mission_csvs}
+  end_time: {time.ctime(t_end)}
+"""
+        self.logger.info(msg)
 
     def process_mission_time(self,
                              source_name):
diff --git a/lifesim/optimize/ahgs.py b/lifesim/optimize/ahgs.py
index 6607747..87ca7b6 100644
--- a/lifesim/optimize/ahgs.py
+++ b/lifesim/optimize/ahgs.py
@@ -28,10 +28,14 @@ def obs_array_star(self, nstar):
                     - self.data.catalog['snr_current'].loc[mask] ** 2)
                    / self.data.catalog.snr_1h.loc[mask] ** 2)
 
+            # included slew time
+            # needs to be divided by the number of universes for proper optimization, since the characterization happens
+            # 'per universe'
             if self.data.options.optimization['characterization']:
                 obs_char = (60 * 60 *
                             self.data.options.optimization['snr_char'] ** 2
-                            / self.data.catalog.maxsep_snr_1h.loc[mask] ** 2)
+                            / self.data.catalog.maxsep_snr_1h.loc[mask] ** 2
+                            + self.data.options.array['t_slew']) / self.data.optm['num_universe']
 
                 # characterization time is needed for optimization, detection time is needed to understand how much
                 # mission time was used for one observation
@@ -42,7 +46,6 @@ def obs_array_star(self, nstar):
                 met[0] -= self.data.catalog.t_slew.loc[mask]
                 met[1] += met[0]
                 met = met / np.arange(1, np.count_nonzero(mask) + 1, 1)[np.newaxis, :]
-                a=1
 
                 return met
             else:

From 1ccb3aa5090cac110e924e0acd72040949c0eab9 Mon Sep 17 00:00:00 2001
From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com>
Date: Wed, 7 Jan 2026 09:47:20 +0100
Subject: [PATCH 21/27] adapted convergence test data handling to yield wrapper

---
 lifesim/analysis/yield_wrapper.py | 88 ++++++++++++++++---------------
 1 file changed, 46 insertions(+), 42 deletions(-)

diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py
index 29761a3..d0708fc 100644
--- a/lifesim/analysis/yield_wrapper.py
+++ b/lifesim/analysis/yield_wrapper.py
@@ -195,15 +195,15 @@ def run_aperture_sweep_snr(self,
             diam_dirs = []
             diameters_created = []
         msg = f"""run_aperture_sweep_snr summary:
-  run_name: {run_name}
-  requested_mirror_diameters: {mirror_diameters}
-  mirror_directories_found: {diam_dirs}
-  mirror_diameters_reported: {diameters_created}
-  final_output_path: {final_output_path}
-  start_time: {time.ctime(t_end - (t_end - t_end))}  # placeholder, exact start not stored in this scope
-  end_time: {time.ctime(t_end)}
-  elapsed_seconds: {round(0.0, 2)}  # elapsed not measured here to avoid changing existing code flow
-"""
+                  run_name: {run_name}
+                  requested_mirror_diameters: {mirror_diameters}
+                  mirror_directories_found: {diam_dirs}
+                  mirror_diameters_reported: {diameters_created}
+                  final_output_path: {final_output_path}
+                  start_time: {time.ctime(t_end - (t_end - t_end))}  # placeholder, exact start not stored in this scope
+                  end_time: {time.ctime(t_end)}
+                  elapsed_seconds: {round(0.0, 2)}  # elapsed not measured here to avoid changing existing code flow
+                """
         self.logger.info(msg)
 
     def run_optimizer_sweep(self,
@@ -277,20 +277,20 @@ def run_optimizer_sweep(self,
             catalog_counts += len(files)
             catalog_list[d] = files
         msg = f"""run_optimizer_sweep summary:
-  run_name: {run_name}
-  source_name: {source_name}
-  source_path: {source_path}
-  n_cpu: {self.n_cpu}
-  subdirs_found: {subdirs}
-  total_subdirs_count: {len(subdirs)}
-  total_catalog_files_count: {catalog_counts}
-  catalog_files_by_subdir: {catalog_list}
-  run_configs_queued_parallel: {len(run_configs)}
-  final_output_path: {final_output_path}
-  characterization: {characterization}
-  opt_limit_factor: {opt_limit_factor}
-  end_time: {time.ctime(t_end)}
-"""
+                  run_name: {run_name}
+                  source_name: {source_name}
+                  source_path: {source_path}
+                  n_cpu: {self.n_cpu}
+                  subdirs_found: {subdirs}
+                  total_subdirs_count: {len(subdirs)}
+                  total_catalog_files_count: {catalog_counts}
+                  catalog_files_by_subdir: {catalog_list}
+                  run_configs_queued_parallel: {len(run_configs)}
+                  final_output_path: {final_output_path}
+                  characterization: {characterization}
+                  opt_limit_factor: {opt_limit_factor}
+                  end_time: {time.ctime(t_end)}
+                """
         self.logger.info(msg)
 
     def combine_catalog_maxsep(self,
@@ -359,16 +359,16 @@ def combine_catalog_maxsep(self,
         total_maxsep_catalogs = sum(len([f for f in os.listdir(os.path.join(source_path, d))
                                          if f.endswith('_maxsep_catalog.hdf5')]) for d in subdirs_list)
         msg = f"""combine_catalog_maxsep summary:
-  source_name: {source_name}
-  source_path: {source_path}
-  subdirs_found: {subdirs_list}
-  subdirs_total: {len(subdirs_list)}
-  total_base_catalog_files: {total_base_catalogs}
-  total_maxsep_catalog_files: {total_maxsep_catalogs}
-  start_time: {time.ctime(t_all)}
-  end_time: {time.ctime(t_end)}
-  elapsed_seconds: {round(t_end - t_all, 2)}
-"""
+                  source_name: {source_name}
+                  source_path: {source_path}
+                  subdirs_found: {subdirs_list}
+                  subdirs_total: {len(subdirs_list)}
+                  total_base_catalog_files: {total_base_catalogs}
+                  total_maxsep_catalog_files: {total_maxsep_catalogs}
+                  start_time: {time.ctime(t_all)}
+                  end_time: {time.ctime(t_end)}
+                  elapsed_seconds: {round(t_end - t_all, 2)}
+                """
         self.logger.info(msg)
 
     def get_mission_time(self,
@@ -519,14 +519,14 @@ def sweep_mission_time(self,
         total_runs = sum(len([f for f in os.listdir(os.path.join(source_path, d)) if f.endswith('_catalog.hdf5')]) for d in subdirs_list)
         mission_csvs = sum(len([f for f in os.listdir(os.path.join(source_path, d)) if f.endswith('_mission_time.csv')]) for d in subdirs_list)
         msg = f"""sweep_mission_time summary:
-  source_name: {source_name}
-  source_path: {source_path}
-  subdirs_found: {subdirs_list}
-  subdirs_total: {len(subdirs_list)}
-  total_runs_catalogs_found: {total_runs}
-  mission_time_csvs_found: {mission_csvs}
-  end_time: {time.ctime(t_end)}
-"""
+                  source_name: {source_name}
+                  source_path: {source_path}
+                  subdirs_found: {subdirs_list}
+                  subdirs_total: {len(subdirs_list)}
+                  total_runs_catalogs_found: {total_runs}
+                  mission_time_csvs_found: {mission_csvs}
+                  end_time: {time.ctime(t_end)}
+                """
         self.logger.info(msg)
 
     def process_mission_time(self,
@@ -703,12 +703,16 @@ def process_mission_time(self,
     def run_covergence_test(self,
                             run_name,
                             catalog_path,
-                            output_path,
                             min_universes,
                             num_steps,
                             plot=False):
+
+        # check a directory with the name run_name already exists in output_path, otherwise create it
+        output_path = os.path.join(self.output_path, run_name)
         if not os.path.exists(output_path):
             os.makedirs(output_path)
+        else:
+            raise ValueError('Directory already exists: ' + output_path)
 
         # determine number universes
         catalog = pd.read_hdf(catalog_path, key='catalog')

From ddeb0881f2b70c35b0a741149c64f0d13fdab054 Mon Sep 17 00:00:00 2001
From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com>
Date: Wed, 7 Jan 2026 09:52:13 +0100
Subject: [PATCH 22/27] tiny bug in data handling of convergence test

---
 lifesim/analysis/yield_wrapper.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py
index d0708fc..736ecc2 100644
--- a/lifesim/analysis/yield_wrapper.py
+++ b/lifesim/analysis/yield_wrapper.py
@@ -714,6 +714,8 @@ def run_covergence_test(self,
         else:
             raise ValueError('Directory already exists: ' + output_path)
 
+        output_path += '/'
+
         # determine number universes
         catalog = pd.read_hdf(catalog_path, key='catalog')
         total_universes = np.unique(catalog.nuniverse).shape[0]

From 12792ea50bcc731ee26fdec1c6a36bb2073a988d Mon Sep 17 00:00:00 2001
From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com>
Date: Fri, 9 Jan 2026 10:42:30 +0100
Subject: [PATCH 23/27] added function to merge HDF catalogs from CSV files
 with ID remapping and logging

---
 lifesim/analysis/yield_wrapper.py | 179 ++++++++++++++++++++++++++++++
 1 file changed, 179 insertions(+)

diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py
index 736ecc2..af651eb 100644
--- a/lifesim/analysis/yield_wrapper.py
+++ b/lifesim/analysis/yield_wrapper.py
@@ -887,3 +887,182 @@ def compute_yields_mp(output_filename,
         yields = get_yields(bus=bus,
                             return_yields=return_yields)
         return int(uni_sel), yields
+
+def merge_catalogs_from_csv(mapping_csv: str,
+                            merge_csv: str,
+                            output_path: str,
+                            csv_has_header: bool = True,
+                            id_prefixes: tuple = ('1', '2')) -> None:
+    """Merge pairs of HDF catalogs described by two CSV files.
+
+    Parameters
+    ----------
+    mapping_csv : str
+        Path to the first CSV that maps short names to actual catalog paths.
+        The file is expected to have at least two columns:
+          - column 0: short name for the catalog
+          - column 1: actual path to the catalog (HDF readable by pandas)
+    merge_csv : str
+        Path to the second CSV that lists merges using short names. The file is
+        expected to have at least three columns:
+          - column 0: short name for input catalog 1
+          - column 1: short name for input catalog 2
+          - column 2: output name (a filename or base name). If not ending in
+            '.hdf5' or '_catalog.hdf5' the function will append '_catalog.hdf5'.
+    output_path : str
+        Directory where merged catalogs will be written. Created if necessary.
+    csv_has_header : bool
+        Whether the CSV files have header rows (applies to both CSVs).
+    id_prefixes : tuple
+        Two string prefixes used to remap ids in catalog1 and catalog2.
+
+    Returns
+    -------
+    None
+    """
+    import datetime
+
+    # sanity checks for csvs and output dir
+    if not os.path.exists(mapping_csv):
+        raise FileNotFoundError(f"Mapping CSV file not found: {mapping_csv}")
+    if not os.path.exists(merge_csv):
+        raise FileNotFoundError(f"Merge CSV file not found: {merge_csv}")
+
+    os.makedirs(output_path, exist_ok=True)
+
+    header_arg = 0 if csv_has_header else None
+
+    # read mapping csv and build dict short_name -> path
+    mapping_df = pd.read_csv(mapping_csv, header=header_arg)
+    if mapping_df.shape[1] < 2:
+        raise ValueError("Mapping CSV must contain at least two columns: short_name, path")
+    short_names = mapping_df.iloc[:, 0].astype(str).tolist()
+    paths = mapping_df.iloc[:, 1].astype(str).tolist()
+    mapping = {}
+    for s, p in zip(short_names, paths):
+        if s in mapping:
+            # prefer first occurrence but warn
+            print(f"Warning: duplicate short name '{s}' in mapping CSV; using first occurrence.")
+            continue
+        mapping[s] = p
+
+    # read merge csv
+    merge_df = pd.read_csv(merge_csv, header=header_arg)
+    if merge_df.shape[1] < 3:
+        raise ValueError("Merge CSV must contain at least three columns: short1, short2, output_name")
+
+    input1_short = merge_df.iloc[:, 0].astype(str).tolist()
+    input2_short = merge_df.iloc[:, 1].astype(str).tolist()
+    output_names = merge_df.iloc[:, 2].astype(str).tolist()
+
+    # helper to read HDF with fallback key
+    def _read_hdf_try(path):
+        try:
+            return pd.read_hdf(path)
+        except (KeyError, ValueError):
+            try:
+                return pd.read_hdf(path, key='catalog')
+            except Exception as e:
+                raise RuntimeError(f"Failed to read HDF file {path}: {e}")
+
+    for idx, (s1, s2, out_name) in enumerate(zip(input1_short, input2_short, output_names), start=1):
+        print(f"Processing row {idx}:\n  input_short1={s1}\n  input_short2={s2}\n  output_name={out_name}")
+
+        if s1 not in mapping:
+            raise KeyError(f"Short name '{s1}' not found in mapping CSV")
+        if s2 not in mapping:
+            raise KeyError(f"Short name '{s2}' not found in mapping CSV")
+
+        p1 = mapping[s1]
+        p2 = mapping[s2]
+
+        if not os.path.exists(p1):
+            raise FileNotFoundError(f"Input catalog 1 not found: {p1}")
+        if not os.path.exists(p2):
+            raise FileNotFoundError(f"Input catalog 2 not found: {p2}")
+
+        # normalize output filename: ensure it ends with .hdf5 or _catalog.hdf5
+        out_basename = out_name
+        if out_basename.endswith('.hdf5'):
+            out_filename = out_basename
+        elif out_basename.endswith('_catalog.hdf5'):
+            out_filename = out_basename
+        else:
+            out_filename = out_basename + '_catalog.hdf5'
+
+        outp = os.path.join(output_path, out_filename)
+
+        # read catalogs
+        cat1 = _read_hdf_try(p1)
+        cat2 = _read_hdf_try(p2)
+
+        if 'id' not in cat1.columns or 'id' not in cat2.columns:
+            raise KeyError("Both input catalogs must contain an 'id' column")
+
+        # remap ids (store orig for note)
+        try:
+            cat1 = cat1.copy()
+            cat2 = cat2.copy()
+            cat1['id_orig__'] = cat1['id']
+            cat2['id_orig__'] = cat2['id']
+            cat1['id'] = cat1['id'].astype(int).astype(str).apply(lambda s: int(id_prefixes[0] + s))
+            cat2['id'] = cat2['id'].astype(int).astype(str).apply(lambda s: int(id_prefixes[1] + s))
+        except Exception as e:
+            raise ValueError(f"Error coercing or remapping 'id' columns: {e}")
+
+        if cat1['id'].duplicated().any():
+            raise ValueError("Duplicate ids found within remapped catalog1")
+        if cat2['id'].duplicated().any():
+            raise ValueError("Duplicate ids found within remapped catalog2")
+
+        merged = pd.concat([cat1, cat2], ignore_index=True, sort=False)
+
+        # ensure output directory exists (already created at function-level, but keep defensively)
+        out_dir = os.path.dirname(outp)
+        if out_dir:
+            os.makedirs(out_dir, exist_ok=True)
+
+        try:
+            merged.to_hdf(outp, key='catalog', mode='w')
+        except Exception as e:
+            raise RuntimeError(f"Failed to write merged HDF to {outp}: {e}")
+
+        # write info note
+        base_out = os.path.splitext(outp)[0]
+        note_path = base_out + '_merge_info.txt'
+        now = datetime.datetime.utcnow().isoformat() + 'Z'
+        lines = []
+        lines.append(f"Merged catalogs on: {now}")
+        lines.append("")
+        lines.append("Inputs:")
+        lines.append(f"  catalog1 (short: {s1}): {p1}")
+        lines.append(f"    original rows: {len(cat1)}")
+        lines.append(f"  catalog2 (short: {s2}): {p2}")
+        lines.append(f"    original rows: {len(cat2)}")
+        lines.append("")
+        lines.append(f"Output written to: {outp}")
+        lines.append(f"  total rows: {len(merged)}")
+        lines.append("")
+        lines.append("ID remapping applied:")
+        lines.append(f"  catalog1: new_id = int('{id_prefixes[0]}' + str(old_id))")
+        lines.append(f"  catalog2: new_id = int('{id_prefixes[1]}' + str(old_id))")
+        lines.append("")
+        try:
+            sample1 = cat1[['id_orig__', 'id']].rename(columns={'id_orig__': 'orig', 'id': 'new'}).head(5)
+            sample2 = cat2[['id_orig__', 'id']].rename(columns={'id_orig__': 'orig', 'id': 'new'}).head(5)
+            lines.append('Sample mappings catalog1 (orig -> new):')
+            for o, n in sample1.values:
+                lines.append(f"  {o} -> {n}")
+            lines.append('')
+            lines.append('Sample mappings catalog2 (orig -> new):')
+            for o, n in sample2.values:
+                lines.append(f"  {o} -> {n}")
+        except Exception:
+            pass
+
+        with open(note_path, 'w') as fh:
+            fh.write('\n'.join(lines))
+
+        print(f"Wrote merged catalog to: {outp}")
+        print(f"Wrote merge note to: {note_path}")
+

From b5f963534ca1d6278629d2f4a652d75479cc7fc1 Mon Sep 17 00:00:00 2001
From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com>
Date: Fri, 9 Jan 2026 11:17:13 +0100
Subject: [PATCH 24/27] refactor merge function to handle directory-based HDF
 catalog merging

---
 lifesim/analysis/yield_wrapper.py | 235 ++++++++++++++++--------------
 1 file changed, 124 insertions(+), 111 deletions(-)

diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py
index af651eb..a502a37 100644
--- a/lifesim/analysis/yield_wrapper.py
+++ b/lifesim/analysis/yield_wrapper.py
@@ -888,37 +888,22 @@ def compute_yields_mp(output_filename,
                             return_yields=return_yields)
         return int(uni_sel), yields
 
-def merge_catalogs_from_csv(mapping_csv: str,
+def merge_runs(mapping_csv: str,
                             merge_csv: str,
                             output_path: str,
                             csv_has_header: bool = True,
                             id_prefixes: tuple = ('1', '2')) -> None:
-    """Merge pairs of HDF catalogs described by two CSV files.
-
-    Parameters
-    ----------
-    mapping_csv : str
-        Path to the first CSV that maps short names to actual catalog paths.
-        The file is expected to have at least two columns:
-          - column 0: short name for the catalog
-          - column 1: actual path to the catalog (HDF readable by pandas)
-    merge_csv : str
-        Path to the second CSV that lists merges using short names. The file is
-        expected to have at least three columns:
-          - column 0: short name for input catalog 1
-          - column 1: short name for input catalog 2
-          - column 2: output name (a filename or base name). If not ending in
-            '.hdf5' or '_catalog.hdf5' the function will append '_catalog.hdf5'.
-    output_path : str
-        Directory where merged catalogs will be written. Created if necessary.
-    csv_has_header : bool
-        Whether the CSV files have header rows (applies to both CSVs).
-    id_prefixes : tuple
-        Two string prefixes used to remap ids in catalog1 and catalog2.
-
-    Returns
-    -------
-    None
+    """Merge sets of HDF catalogs described by two CSV files.
+
+    Differences vs previous implementation:
+    - mapping_csv maps short name -> input directory (each input directory contains subdirectories like diam_2, diam_2_5, ...)
+    - merge_csv rows give two short names and an output name. For each row:
+        - create output_path/<output_name>/ and an info file there
+        - create output_path/<output_name>/ap_merged/
+        - verify both input directories contain exactly the same subdirs
+        - for each subdir, read the single base catalog file (ends with '_catalog.hdf5' and not containing 'maxsep') from both inputs,
+          remap ids with id_prefixes, concatenate, and write to ap_merged/<subdir>/<same_filename>
+        - do not read/merge any maxsep files
     """
     import datetime
 
@@ -932,7 +917,7 @@ def merge_catalogs_from_csv(mapping_csv: str,
 
     header_arg = 0 if csv_has_header else None
 
-    # read mapping csv and build dict short_name -> path
+    # read mapping csv and build dict short_name -> path (expected to be directories)
     mapping_df = pd.read_csv(mapping_csv, header=header_arg)
     if mapping_df.shape[1] < 2:
         raise ValueError("Mapping CSV must contain at least two columns: short_name, path")
@@ -941,7 +926,6 @@ def merge_catalogs_from_csv(mapping_csv: str,
     mapping = {}
     for s, p in zip(short_names, paths):
         if s in mapping:
-            # prefer first occurrence but warn
             print(f"Warning: duplicate short name '{s}' in mapping CSV; using first occurrence.")
             continue
         mapping[s] = p
@@ -973,96 +957,125 @@ def _read_hdf_try(path):
         if s2 not in mapping:
             raise KeyError(f"Short name '{s2}' not found in mapping CSV")
 
-        p1 = mapping[s1]
-        p2 = mapping[s2]
+        dir1 = mapping[s1]
+        dir2 = mapping[s2]
 
-        if not os.path.exists(p1):
-            raise FileNotFoundError(f"Input catalog 1 not found: {p1}")
-        if not os.path.exists(p2):
-            raise FileNotFoundError(f"Input catalog 2 not found: {p2}")
+        if not os.path.exists(dir1) or not os.path.isdir(dir1):
+            raise FileNotFoundError(f"Input directory 1 not found or not a directory: {dir1}")
+        if not os.path.exists(dir2) or not os.path.isdir(dir2):
+            raise FileNotFoundError(f"Input directory 2 not found or not a directory: {dir2}")
 
-        # normalize output filename: ensure it ends with .hdf5 or _catalog.hdf5
-        out_basename = out_name
-        if out_basename.endswith('.hdf5'):
-            out_filename = out_basename
-        elif out_basename.endswith('_catalog.hdf5'):
-            out_filename = out_basename
-        else:
-            out_filename = out_basename + '_catalog.hdf5'
+        # normalize output folder and create structure
+        out_folder = os.path.join(output_path, out_name)
+        os.makedirs(out_folder, exist_ok=True)
+        ap_merged_root = os.path.join(out_folder, 'ap_merged')
+        os.makedirs(ap_merged_root, exist_ok=True)
 
-        outp = os.path.join(output_path, out_filename)
+        info_lines = []
+        now = datetime.datetime.utcnow().isoformat() + 'Z'
+        info_lines.append(f"Merged catalogs run on: {now}")
+        info_lines.append(f"Input short1: {s1}")
+        info_lines.append(f"  path: {dir1}")
+        info_lines.append(f"Input short2: {s2}")
+        info_lines.append(f"  path: {dir2}")
+        info_lines.append("")
+
+        # list subdirectories (only directories)
+        subdirs1 = sorted([d for d in os.listdir(dir1) if os.path.isdir(os.path.join(dir1, d))])
+        subdirs2 = sorted([d for d in os.listdir(dir2) if os.path.isdir(os.path.join(dir2, d))])
+
+        info_lines.append(f"Subdirectories in {s1}: {subdirs1}")
+        info_lines.append(f"Subdirectories in {s2}: {subdirs2}")
+
+        if set(subdirs1) != set(subdirs2):
+            info_lines.append("")
+            info_lines.append("ERROR: Input directories do not contain the same set of subdirectories.")
+            note_path = os.path.join(out_folder, 'merge_info.txt')
+            with open(note_path, 'w') as fh:
+                fh.write('\n'.join(info_lines))
+            raise ValueError(f"Subdirectory mismatch between {dir1} and {dir2}. Merge aborted. See {note_path} for details.")
+
+        common_subdirs = sorted(subdirs1)  # they are equal sets
+
+        # process each subdir
+        for sub in common_subdirs:
+            info_lines.append("")
+            info_lines.append(f"Processing subdir: {sub}")
+            in_sub1 = os.path.join(dir1, sub)
+            in_sub2 = os.path.join(dir2, sub)
+
+            # find single base catalog file in each (ends with _catalog.hdf5 and not containing 'maxsep')
+            base_files1 = [f for f in os.listdir(in_sub1) if f.endswith('_catalog.hdf5') and 'maxsep' not in f]
+            base_files2 = [f for f in os.listdir(in_sub2) if f.endswith('_catalog.hdf5') and 'maxsep' not in f]
+
+            if len(base_files1) != 1:
+                info_lines.append(f"  ERROR: expected exactly one base catalog in {in_sub1}, found {len(base_files1)}")
+                continue
+            if len(base_files2) != 1:
+                info_lines.append(f"  ERROR: expected exactly one base catalog in {in_sub2}, found {len(base_files2)}")
+                continue
 
-        # read catalogs
-        cat1 = _read_hdf_try(p1)
-        cat2 = _read_hdf_try(p2)
+            bf1 = base_files1[0]
+            bf2 = base_files2[0]
+            info_lines.append(f"  catalog file in {s1}: {bf1}")
+            info_lines.append(f"  catalog file in {s2}: {bf2}")
 
-        if 'id' not in cat1.columns or 'id' not in cat2.columns:
-            raise KeyError("Both input catalogs must contain an 'id' column")
+            # ensure filenames match (same run name). If not, still proceed but preserve filenames (user expects consistent naming)
+            # create output subdir under ap_merged
+            out_sub = os.path.join(ap_merged_root, sub)
+            os.makedirs(out_sub, exist_ok=True)
 
-        # remap ids (store orig for note)
-        try:
-            cat1 = cat1.copy()
-            cat2 = cat2.copy()
-            cat1['id_orig__'] = cat1['id']
-            cat2['id_orig__'] = cat2['id']
-            cat1['id'] = cat1['id'].astype(int).astype(str).apply(lambda s: int(id_prefixes[0] + s))
-            cat2['id'] = cat2['id'].astype(int).astype(str).apply(lambda s: int(id_prefixes[1] + s))
-        except Exception as e:
-            raise ValueError(f"Error coercing or remapping 'id' columns: {e}")
-
-        if cat1['id'].duplicated().any():
-            raise ValueError("Duplicate ids found within remapped catalog1")
-        if cat2['id'].duplicated().any():
-            raise ValueError("Duplicate ids found within remapped catalog2")
-
-        merged = pd.concat([cat1, cat2], ignore_index=True, sort=False)
-
-        # ensure output directory exists (already created at function-level, but keep defensively)
-        out_dir = os.path.dirname(outp)
-        if out_dir:
-            os.makedirs(out_dir, exist_ok=True)
+            path1 = os.path.join(in_sub1, bf1)
+            path2 = os.path.join(in_sub2, bf2)
 
-        try:
-            merged.to_hdf(outp, key='catalog', mode='w')
-        except Exception as e:
-            raise RuntimeError(f"Failed to write merged HDF to {outp}: {e}")
+            try:
+                cat1 = _read_hdf_try(path1)
+                cat2 = _read_hdf_try(path2)
+            except Exception as e:
+                info_lines.append(f"  ERROR reading catalogs: {e}")
+                continue
 
-        # write info note
-        base_out = os.path.splitext(outp)[0]
-        note_path = base_out + '_merge_info.txt'
-        now = datetime.datetime.utcnow().isoformat() + 'Z'
-        lines = []
-        lines.append(f"Merged catalogs on: {now}")
-        lines.append("")
-        lines.append("Inputs:")
-        lines.append(f"  catalog1 (short: {s1}): {p1}")
-        lines.append(f"    original rows: {len(cat1)}")
-        lines.append(f"  catalog2 (short: {s2}): {p2}")
-        lines.append(f"    original rows: {len(cat2)}")
-        lines.append("")
-        lines.append(f"Output written to: {outp}")
-        lines.append(f"  total rows: {len(merged)}")
-        lines.append("")
-        lines.append("ID remapping applied:")
-        lines.append(f"  catalog1: new_id = int('{id_prefixes[0]}' + str(old_id))")
-        lines.append(f"  catalog2: new_id = int('{id_prefixes[1]}' + str(old_id))")
-        lines.append("")
-        try:
-            sample1 = cat1[['id_orig__', 'id']].rename(columns={'id_orig__': 'orig', 'id': 'new'}).head(5)
-            sample2 = cat2[['id_orig__', 'id']].rename(columns={'id_orig__': 'orig', 'id': 'new'}).head(5)
-            lines.append('Sample mappings catalog1 (orig -> new):')
-            for o, n in sample1.values:
-                lines.append(f"  {o} -> {n}")
-            lines.append('')
-            lines.append('Sample mappings catalog2 (orig -> new):')
-            for o, n in sample2.values:
-                lines.append(f"  {o} -> {n}")
-        except Exception:
-            pass
+            info_lines.append(f"  rows before merge: {s1}: {len(cat1)}, {s2}: {len(cat2)}")
 
-        with open(note_path, 'w') as fh:
-            fh.write('\n'.join(lines))
+            if 'id' not in cat1.columns or 'id' not in cat2.columns:
+                info_lines.append("  ERROR: both catalogs must contain an 'id' column. Skipping this subdir.")
+                continue
 
-        print(f"Wrote merged catalog to: {outp}")
-        print(f"Wrote merge note to: {note_path}")
+            # remap ids and keep originals
+            try:
+                cat1 = cat1.copy()
+                cat2 = cat2.copy()
+                cat1['id_orig__'] = cat1['id']
+                cat2['id_orig__'] = cat2['id']
+                # coerce to int then prefix as string and convert back to int
+                cat1['id'] = cat1['id'].astype(int).astype(str).apply(lambda s: int(str(id_prefixes[0]) + s))
+                cat2['id'] = cat2['id'].astype(int).astype(str).apply(lambda s: int(str(id_prefixes[1]) + s))
+            except Exception as e:
+                info_lines.append(f"  ERROR remapping ids: {e}")
+                continue
+
+            if cat1['id'].duplicated().any():
+                info_lines.append("  ERROR: duplicate ids found within remapped catalog1. Skipping this subdir.")
+                continue
+            if cat2['id'].duplicated().any():
+                info_lines.append("  ERROR: duplicate ids found within remapped catalog2. Skipping this subdir.")
+                continue
+
+            merged = pd.concat([cat1, cat2], ignore_index=True, sort=False)
+
+            out_catalog_path = os.path.join(out_sub, bf1)  # preserve filename of first input's base file
+            try:
+                merged.to_hdf(out_catalog_path, key='catalog', mode='w')
+            except Exception as e:
+                info_lines.append(f"  ERROR writing merged catalog to {out_catalog_path}: {e}")
+                continue
+
+            info_lines.append(f"  wrote merged catalog to: {out_catalog_path}")
+            info_lines.append(f"  rows after merge: {len(merged)}")
+
+        # write info file
+        note_path = os.path.join(out_folder, 'merge_info.txt')
+        with open(note_path, 'w') as fh:
+            fh.write('\n'.join(info_lines))
 
+        print(f"Finished processing merge row {idx}. Info written to: {note_path}")

From 1b1ad637c8219287316f316fe63993be1f1bceea Mon Sep 17 00:00:00 2001
From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com>
Date: Fri, 9 Jan 2026 17:07:17 +0100
Subject: [PATCH 25/27] added the optimization manager that can run multiple
 optimizations per Wrapper in one command

---
 lifesim/analysis/yield_wrapper.py | 115 ++++++++++++++++++++++++++++--
 1 file changed, 111 insertions(+), 4 deletions(-)

diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py
index a502a37..a6f3890 100644
--- a/lifesim/analysis/yield_wrapper.py
+++ b/lifesim/analysis/yield_wrapper.py
@@ -6,6 +6,7 @@
 import json
 from typing import Union
 import logging
+import yaml
 
 import numpy as np
 from joblib import Parallel, delayed, parallel_config
@@ -210,7 +211,8 @@ def run_optimizer_sweep(self,
                             run_name,
                             source_name,
                             characterization: bool = False,
-                            opt_limit_factor: Union[None, float] = None):
+                            opt_limit_factor: Union[None, float] = None,
+                            reduce_catalog: bool = False):
         source_path = os.path.join(self.output_path, source_name)
         # get the names of all subdirectories in output_path (which contain subdirectories for different mirror diameters)
         subdirs = [d for d in os.listdir(source_path) if os.path.isdir(os.path.join(source_path, d))]
@@ -241,7 +243,8 @@ def run_optimizer_sweep(self,
                         catalog_path=os.path.join(source_path, subdir, catalog_file + '_catalog.hdf5'),
                         config_path=self.config_path,
                         characterization=characterization,
-                        opt_limit_factor=opt_limit_factor
+                        opt_limit_factor=opt_limit_factor,
+                        reduce_catalog=reduce_catalog,
                     )
                 else:
                     run_configs.append({'output_filename': catalog_file,
@@ -263,7 +266,8 @@ def run_optimizer_sweep(self,
                         catalog_path=rc['catalog_path'],
                         config_path=self.config_path,
                         characterization=characterization,
-                        opt_limit_factor=opt_limit_factor
+                        opt_limit_factor=opt_limit_factor,
+                        reduce_catalog=reduce_catalog,
                     )
                     for rc in run_configs)
 
@@ -699,6 +703,100 @@ def process_mission_time(self,
         # Final log with elapsed time
         print('ALL process_mission_time finished. Total time:', round(time.time() - t_all, 2), 's')
 
+    def mange_optimizations(self,
+                            scenario_csv: str,
+                            source_name: str,
+                            csv_has_header: bool = True):
+
+        header_arg = 0 if csv_has_header else None
+
+        scenario_df = pd.read_csv(scenario_csv, header=header_arg)
+
+        def _create_short_name(row):
+            parts = []
+
+            # 1. Handle Booleans: Add tag only if True
+            if row['Experiment_1']:
+                parts.append("e1")
+            if row['Experiment_2']:
+                parts.append("e2")
+
+            # 2. Handle Float: Add 'f' prefix and remove decimal (0.5 -> 05)
+            # converting to string and replacing '.' is a robust way to handle this
+            factor_str = str(row['opt_limit_factor']).replace('.', '')
+            parts.append(f"f{factor_str}")
+
+            # 3. Handle Characterization
+            if row['characterization']:
+                parts.append("char")
+
+            # Join all parts with underscores
+            return "_".join(parts)
+
+        # Apply the function row by row
+        scenario_df['filename'] = scenario_df.apply(_create_short_name, axis=1)
+
+        # load base config file
+        with open(self.config_path, 'r') as f:
+            base_config = yaml.safe_load(f)
+        original_config_path = deepcopy(self.config_path)
+
+        for i in range(len(scenario_df)):
+            run_name = 'opt_' + scenario_df.iloc[i]['filename']
+
+            # check if a directory with the name run_name already exists in output_path, otherwise create it
+            final_output_path = os.path.join(self.output_path, run_name)
+
+            row = scenario_df.iloc[i]
+
+            # create the custom config file based on the existing config file
+            custom_config = base_config.copy()
+
+            # Loop through the items in the row
+            for key, value in row.items():
+                # CASE A: It is an Experiment toggle
+                if key.startswith('Experiment'):
+                    if not value:
+                        # If False, remove it from the dictionary safely
+                        # .pop(key, None) prevents a KeyError if the key is already gone
+                        custom_config['optimization']['experiments'].pop(key, None)
+                    # If True, we do nothing (keep it)
+
+                # CASE B: It is a parameter update (e.g., opt_limit_factor)
+                # We check if this key exists in the 'optimization' block to overwrite it
+                elif key in custom_config['optimization']:
+                    custom_config['optimization'][key] = float(value)
+
+            # save the custom config file to a temporary location
+            temp_config_path = os.path.join(self.output_path, 'temp_config_' + run_name + '.yaml')
+            with open(temp_config_path, 'w') as f:
+                yaml.dump(custom_config, f)
+            self.config_path = temp_config_path
+
+            print('Starting optimization for run:', run_name)
+
+            self.run_optimizer_sweep(
+                run_name=run_name,
+                source_name=source_name,
+                characterization=row['characterization'],
+                opt_limit_factor=row['opt_limit_factor'],
+                reduce_catalog=True
+            )
+
+            # save the custom config file to final_output_path
+            custom_config_path = os.path.join(final_output_path, 'config_' + run_name + '.yaml')
+            with open(custom_config_path, 'w') as f:
+                yaml.dump(custom_config, f)
+
+            print('Finished optimization for run:', run_name)
+            print('----------------------------------------')
+
+            #delete the temporary config file
+            os.remove(temp_config_path)
+
+        self.config_path = original_config_path
+
+
 
     def run_covergence_test(self,
                             run_name,
@@ -834,7 +932,8 @@ def compute_yields_mp(output_filename,
                       uni_sel=None,
                       return_yields=False,
                       characterization: bool = False,
-                      opt_limit_factor: Union[None, float] = None):
+                      opt_limit_factor: Union[None, float] = None,
+                      reduce_catalog: bool = False):
 
     t = time.time()
     # create bus
@@ -881,6 +980,14 @@ def compute_yields_mp(output_filename,
 
     # with contextlib.redirect_stdout(None):
     opt.ahgs()
+
+    if reduce_catalog:
+        to_remove = ['planet_flux_use', 'ecc_p', 'noise_astro', 'fp', 'small_omega_p', 'semimajor_p', 'dec', 'theta_p',
+                     'sep_p', 'albedo_geom_vis', 'z', 'photon_rate_noise', 'mass_p', 'mass_s', 'p_orb', 'flux_p',
+                     'photon_rate_planet', 'albedo_bond', 'inc_p', 'lat', 'stype', 'name_s', 'lon', 'ra',
+                     'albedo_geom_mir', 'large_omega_p', 'radius_s']
+        bus.data.catalog.drop(columns=to_remove, inplace=True)
+
     bus.save()
 
     if return_yields:

From 4edcec4a3e6f7ec6d2a0b213a12b33b19a1b9095 Mon Sep 17 00:00:00 2001
From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com>
Date: Fri, 9 Jan 2026 17:42:54 +0100
Subject: [PATCH 26/27] tiny bugfix: drop only existing columns

---
 lifesim/analysis/yield_wrapper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py
index a6f3890..4ddb695 100644
--- a/lifesim/analysis/yield_wrapper.py
+++ b/lifesim/analysis/yield_wrapper.py
@@ -986,7 +986,7 @@ def compute_yields_mp(output_filename,
                      'sep_p', 'albedo_geom_vis', 'z', 'photon_rate_noise', 'mass_p', 'mass_s', 'p_orb', 'flux_p',
                      'photon_rate_planet', 'albedo_bond', 'inc_p', 'lat', 'stype', 'name_s', 'lon', 'ra',
                      'albedo_geom_mir', 'large_omega_p', 'radius_s']
-        bus.data.catalog.drop(columns=to_remove, inplace=True)
+        bus.data.catalog.drop(columns=to_remove, inplace=True, errors='ignore')
 
     bus.save()
 

From 951647ccf0d153cc6a033132a8f01a9fbe9d0d3d Mon Sep 17 00:00:00 2001
From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com>
Date: Tue, 13 Jan 2026 08:24:54 +0100
Subject: [PATCH 27/27] added numpy varibale handling to yaml writer, some
 bugfixes in yield_wrapper.py

---
 lifesim/analysis/yield_wrapper.py | 110 +++++++++++++++++-------------
 lifesim/core/core.py              |  15 +++-
 2 files changed, 78 insertions(+), 47 deletions(-)

diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py
index 4ddb695..0ee575a 100644
--- a/lifesim/analysis/yield_wrapper.py
+++ b/lifesim/analysis/yield_wrapper.py
@@ -486,52 +486,67 @@ def get_mission_time(self,
         return time_sheet
 
     def sweep_mission_time(self,
-                           source_name):
-        source_path = os.path.join(self.output_path, source_name)
-        subdirs = [d for d in os.listdir(source_path) if os.path.isdir(os.path.join(source_path, d))]
+                           source_name,
+                           all_runs: bool = False):
+        if all_runs:
+            # an optimized run directory always starts with 'opt_'
+            # run_names = [d for d in os.listdir(self.output_path) if os.path.isdir(os.path.join(self.output_path, d))]
 
-        for subdir in subdirs:
-            print('--------------------------------------------------')
-            print('Processing subdir: ', subdir)
-            t_sub = time.time()
-
-            # make a list of all files ending in .hdf5 in subdir, then keep only the part of the filename before '_catalog.hdf5' and only if the sting does not contain 'maxsep'
-            run_names = [f.split('_catalog.hdf5')[0] for f in os.listdir(os.path.join(source_path, subdir))
-                                  if f.endswith('_catalog.hdf5')]
+            run_names = [d for d in os.listdir(self.output_path)
+                         if os.path.isdir(os.path.join(self.output_path, d))
+                         and d.startswith('opt_')]
 
             for run_name in run_names:
-                print('  ----------------------------------------------')
-                print('  Processing run: ', run_name)
-                t_run = time.time()
-
-                catalog_path = os.path.join(source_path, subdir, run_name + '_catalog.hdf5')
-                config_path = os.path.join(source_path, subdir, run_name + '.yaml')
-
-                time_sheet = self.get_mission_time(catalog_path=catalog_path,
-                                                  config_path=config_path)
-
-                # save time_sheet to csv files, one per experiment
-                output_csv_path = os.path.join(source_path, subdir, run_name + '_mission_time.csv')
-                time_sheet.to_csv(output_csv_path)
-                print('    Saved mission time to ', output_csv_path)
-
-                print('  Done processing run:', run_name, '- took', round(time.time() - t_run, 2), 's')
+                self.sweep_mission_time(source_name=run_name,
+                                        all_runs=False)
+                self.process_mission_time(source_name=run_name)
 
-        # Append single multi-line log summary for this method
-        t_end = time.time()
-        subdirs_list = [d for d in os.listdir(source_path) if os.path.isdir(os.path.join(source_path, d))]
-        total_runs = sum(len([f for f in os.listdir(os.path.join(source_path, d)) if f.endswith('_catalog.hdf5')]) for d in subdirs_list)
-        mission_csvs = sum(len([f for f in os.listdir(os.path.join(source_path, d)) if f.endswith('_mission_time.csv')]) for d in subdirs_list)
-        msg = f"""sweep_mission_time summary:
-                  source_name: {source_name}
-                  source_path: {source_path}
-                  subdirs_found: {subdirs_list}
-                  subdirs_total: {len(subdirs_list)}
-                  total_runs_catalogs_found: {total_runs}
-                  mission_time_csvs_found: {mission_csvs}
-                  end_time: {time.ctime(t_end)}
-                """
-        self.logger.info(msg)
+        else:
+            source_path = os.path.join(self.output_path, source_name)
+            subdirs = [d for d in os.listdir(source_path) if os.path.isdir(os.path.join(source_path, d))]
+
+            for subdir in subdirs:
+                print('--------------------------------------------------')
+                print('Processing subdir: ', subdir)
+                t_sub = time.time()
+
+                # make a list of all files ending in .hdf5 in subdir, then keep only the part of the filename before '_catalog.hdf5' and only if the sting does not contain 'maxsep'
+                run_names = [f.split('_catalog.hdf5')[0] for f in os.listdir(os.path.join(source_path, subdir))
+                                      if f.endswith('_catalog.hdf5')]
+
+                for run_name in run_names:
+                    print('  ----------------------------------------------')
+                    print('  Processing run: ', run_name)
+                    t_run = time.time()
+
+                    catalog_path = os.path.join(source_path, subdir, run_name + '_catalog.hdf5')
+                    config_path = os.path.join(source_path, subdir, run_name + '.yaml')
+
+                    time_sheet = self.get_mission_time(catalog_path=catalog_path,
+                                                      config_path=config_path)
+
+                    # save time_sheet to csv files, one per experiment
+                    output_csv_path = os.path.join(source_path, subdir, run_name + '_mission_time.csv')
+                    time_sheet.to_csv(output_csv_path)
+                    print('    Saved mission time to ', output_csv_path)
+
+                    print('  Done processing run:', run_name, '- took', round(time.time() - t_run, 2), 's')
+
+            # Append single multi-line log summary for this method
+            t_end = time.time()
+            subdirs_list = [d for d in os.listdir(source_path) if os.path.isdir(os.path.join(source_path, d))]
+            total_runs = sum(len([f for f in os.listdir(os.path.join(source_path, d)) if f.endswith('_catalog.hdf5')]) for d in subdirs_list)
+            mission_csvs = sum(len([f for f in os.listdir(os.path.join(source_path, d)) if f.endswith('_mission_time.csv')]) for d in subdirs_list)
+            msg = f"""sweep_mission_time summary:
+                      source_name: {source_name}
+                      source_path: {source_path}
+                      subdirs_found: {subdirs_list}
+                      subdirs_total: {len(subdirs_list)}
+                      total_runs_catalogs_found: {total_runs}
+                      mission_time_csvs_found: {mission_csvs}
+                      end_time: {time.ctime(t_end)}
+                    """
+            self.logger.info(msg)
 
     def process_mission_time(self,
                              source_name):
@@ -666,7 +681,10 @@ def process_mission_time(self,
                   ['#1f6f6f', '#54a1a1', '#9fc8c8'], ]
 
         columns = ['detection', 'orbit', 'char']
-        x = np.asarray(exptable[exps[0]].index, dtype=float)
+        try:
+            x = np.asarray(exptable[exps[0]].index, dtype=float)
+        except:
+            x = np.asarray(exptable[exps].index, dtype=float)
         y0 = np.zeros_like(x, dtype=float)
 
         fig, ax = plt.subplots()
@@ -750,7 +768,7 @@ def _create_short_name(row):
             row = scenario_df.iloc[i]
 
             # create the custom config file based on the existing config file
-            custom_config = base_config.copy()
+            custom_config = deepcopy(base_config)
 
             # Loop through the items in the row
             for key, value in row.items():
@@ -778,8 +796,8 @@ def _create_short_name(row):
             self.run_optimizer_sweep(
                 run_name=run_name,
                 source_name=source_name,
-                characterization=row['characterization'],
-                opt_limit_factor=row['opt_limit_factor'],
+                characterization=bool(row['characterization']),
+                opt_limit_factor=float(row['opt_limit_factor']),
                 reduce_catalog=True
             )
 
diff --git a/lifesim/core/core.py b/lifesim/core/core.py
index dd5a009..1cf7600 100644
--- a/lifesim/core/core.py
+++ b/lifesim/core/core.py
@@ -6,7 +6,7 @@
 import os
 
 import yaml
-from numpy import ndarray, array
+from numpy import ndarray, array, float64, float32, int64, int32, int16
 import git
 
 from lifesim.core.data import Data
@@ -442,6 +442,8 @@ def add_module(self,
         module.data = self.data
 
     def write_config(self):
+        add_numpy_representers()
+
         module_dict = {key: str(type(module)) for (key, module) in self.modules.items()}
 
         # fetch version number and git commit hash if available
@@ -520,3 +522,14 @@ def convert_to_np(inp: dict):
         else:
             out[k] = inp[k]
     return out
+
+def add_numpy_representers():
+    def numpy_repr(dumper, data):
+        return dumper.represent_data(data.item())
+
+    # Add for various numpy types
+    for dtype in [float64, float32, int64, int32, int16]:
+        yaml.add_representer(dtype, numpy_repr)
+
+    # Handle numpy arrays specifically
+    yaml.add_representer(ndarray, lambda dumper, data: dumper.represent_list(data.tolist()))