From 07ed88092c40789365be934e61c4318ac5f4a043 Mon Sep 17 00:00:00 2001 From: Jonah Timothy Hansen Date: Tue, 23 Sep 2025 10:44:42 +0200 Subject: [PATCH 01/27] added baseline optimization models --- lifesim/core/core.py | 2 +- lifesim/instrument/instrument.py | 45 +++++++++++++++++++++++++------- lifesim/util/options.py | 8 +++++- 3 files changed, 44 insertions(+), 11 deletions(-) diff --git a/lifesim/core/core.py b/lifesim/core/core.py index dd5a009..e27174e 100644 --- a/lifesim/core/core.py +++ b/lifesim/core/core.py @@ -475,7 +475,7 @@ def write_config(self): def build_from_config(self, filename: str): with open(filename) as file: - config_dict = yaml.load(file, Loader=yaml.FullLoader) + config_dict = yaml.load(file, Loader=yaml.UnsafeLoader) self.data.options.array = convert_to_np(config_dict['array']) self.data.options.optimization = convert_to_np(config_dict['optimization']) diff --git a/lifesim/instrument/instrument.py b/lifesim/instrument/instrument.py index 4000c67..a3c15a3 100644 --- a/lifesim/instrument/instrument.py +++ b/lifesim/instrument/instrument.py @@ -167,10 +167,11 @@ def get_wl_bins_const_spec_res(self): # TODO does not take the inclination into account! def adjust_bl_to_hz(self, hz_center: float, - distance_s: float): + distance_s: float, + l_sun: float): """ Adjusts the baseline of the array to be optimal for observations in the habitable zone of - the target star for the selected optimal wavelength. + the target star for the given optimisation model. Parameters ---------- @@ -179,14 +180,37 @@ def adjust_bl_to_hz(self, distance_s : float Distance between the observed star and the LIFE array in [pc]. """ + opt_model = self.data.options.models["baseline_optimisation"] # convert the habitable zone to radians hz_center_rad = hz_center / distance_s / (3600 * 180) * np.pi # in rad - # put first transmission peak of optimal wl on center of HZ - # for the origin of the value 0.5.. see Ottiger+2021 - baseline = (0.589645 / hz_center_rad - * self.data.options.other['wl_optimal'] * 10 ** (-6)) + if opt_model == "ref_wl": + # put first transmission peak of optimal wl on center of HZ + # for the origin of the value 0.5.. see Ottiger+2021 + k = 0.589645 * self.data.options.other['wl_optimal'] * 10 ** (-6) + + elif opt_model == "Bryson": + # Baseline optimisation considering the semi-major axis distribution of the Bryson 21 + # occurance rates and orbit projection effects, approximated via a polynomial fit to an MC analysis + sqL = l_sun**0.5 + if sqL > 0.28: #FGKs + cs = np.array([-2.5655e-1,2.0727e-1,4.8682e-2,-4.0867e-4,-2.1587e-2,9.3433])*1e-6 + else: + cs = np.array([-7.4078,1.4269e1,1.1875e-1,-2.3090e-3,-6.9372e-3,9.7141])*1e-6 + k = cs[0]*sqL + cs[1]*sqL**2 + cs[2]*distance_s + cs[3]*distance_s**2 + cs[4]*sqL*distance_s + cs[5] + + elif opt_model == "Uniform": + # Baseline optimisation considering a uniform semi-major axis distribution and orbit + # projection effects, approximated via a polynomial fit to an MC analysis + sqL = l_sun**0.5 + if sqL > 0.28: #FGKs + cs = np.array([1.9934e-1,6.4546e-2,2.6083e-2,-8.1794e-5,-1.7035e-2,8.3007])*1e-6 + else: #Ms + cs = np.array([-2.9216,4.4454,6.5858e-2,-1.4577e-3,2.8476e-2,8.4299])*1e-6 + k = cs[0]*sqL + cs[1]*sqL**2 + cs[2]*distance_s + cs[3]*distance_s**2 + cs[4]*sqL*distance_s + cs[5] + + baseline = k / hz_center_rad self.apply_baseline(baseline=baseline) @@ -266,7 +290,8 @@ def get_snr(self, # adjust baseline of array and give new baseline to transmission generator plugin self.adjust_bl_to_hz(hz_center=float(self.data.catalog.hz_center.iloc[n]), - distance_s=float(self.data.catalog.distance_s.iloc[n])) + distance_s=float(self.data.catalog.distance_s.iloc[n]), + l_sun=float(self.data.catalog.l_sun.iloc[n]),) # get transmission map _, _, self.data.inst['t_map'], _, _ = self.run_socket(s_name='transmission', @@ -514,7 +539,8 @@ def get_spectrum(self, else: # adjust baseline to HZ self.adjust_bl_to_hz(hz_center=hz_center, - distance_s=distance_s) + distance_s=distance_s, + l_sun=l_sun) # calculate the transmission map _, _, self.data.inst['t_map'], _, _ = self.run_socket(s_name='transmission', @@ -689,7 +715,8 @@ def get_signal(self, # adjust baseline to habitable zone self.adjust_bl_to_hz(hz_center=hz_center, - distance_s=distance_s) + distance_s=distance_s, + l_sun=l_sun) # use spectres to rescale the spectrum onto the correct wl bins flux_planet_spectrum_input = flux_planet_spectrum diff --git a/lifesim/util/options.py b/lifesim/util/options.py index fc60412..f9eac22 100644 --- a/lifesim/util/options.py +++ b/lifesim/util/options.py @@ -44,6 +44,10 @@ class Options(object): ``'darwinsim'`` - ``'habitable'`` : Model used for calculating the habitable zone, possible options are ``'MS'`` and ``'POST_MS'`` + - ``'fov_taper'`` : Model used for tapering the field of view, possible options are + ``'gaussian'`` and ``'none'`` + - ``'baseline_optimisation'`` : Model used for optimising the baseline, possible options are + ``'ref_wl'``, ``'Bryson'`` and ``'Uniform'`` optimization : dict Options concerning the methods used to optimally distribute the observing time. - ``'N_pf'`` : Number of sampling locations per orbit. @@ -80,7 +84,8 @@ def __init__(self): self.models = {'localzodi': '', 'habitable': '', - 'fov_taper': ''} + 'fov_taper': '', + 'baseline_optimisation': ''} self.optimization = {'N_pf': 0., 'snr_target': 0., @@ -118,6 +123,7 @@ def set_scenario(self, self.models['localzodi'] = 'darwinsim' self.models['habitable'] = 'MS' self.models['fov_taper'] = 'gaussian' + self.models['baseline_optimisation'] = 'ref_wl' self.optimization['N_pf'] = 25 self.optimization['snr_target'] = 7 From 9798caa382bc1c08159cbed78212e23543750779 Mon Sep 17 00:00:00 2001 From: JonahHansen Date: Tue, 23 Sep 2025 10:49:52 +0200 Subject: [PATCH 02/27] Update core.py Removed hack to make YAML accept numpy floats --- lifesim/core/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lifesim/core/core.py b/lifesim/core/core.py index e27174e..dd5a009 100644 --- a/lifesim/core/core.py +++ b/lifesim/core/core.py @@ -475,7 +475,7 @@ def write_config(self): def build_from_config(self, filename: str): with open(filename) as file: - config_dict = yaml.load(file, Loader=yaml.UnsafeLoader) + config_dict = yaml.load(file, Loader=yaml.FullLoader) self.data.options.array = convert_to_np(config_dict['array']) self.data.options.optimization = convert_to_np(config_dict['optimization']) From f021ab16276b972e003bc51ed5c2eac743d286d8 Mon Sep 17 00:00:00 2001 From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com> Date: Mon, 3 Nov 2025 11:00:48 +0100 Subject: [PATCH 03/27] changed reference to Dannert+22 --- lifesim/instrument/instrument.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lifesim/instrument/instrument.py b/lifesim/instrument/instrument.py index 4000c67..917ea3c 100644 --- a/lifesim/instrument/instrument.py +++ b/lifesim/instrument/instrument.py @@ -184,7 +184,7 @@ def adjust_bl_to_hz(self, hz_center_rad = hz_center / distance_s / (3600 * 180) * np.pi # in rad # put first transmission peak of optimal wl on center of HZ - # for the origin of the value 0.5.. see Ottiger+2021 + # for the origin of the value 0.5.. see Dannert+2022 baseline = (0.589645 / hz_center_rad * self.data.options.other['wl_optimal'] * 10 ** (-6)) From 56d284160053b1ad7ff7ac465c9e75864d96d3ba Mon Sep 17 00:00:00 2001 From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com> Date: Tue, 4 Nov 2025 15:52:31 +0100 Subject: [PATCH 04/27] first running implementation of multiprocessing, validated against single processing runs on small catalogs --- lifesim/instrument/instrument.py | 212 ++++++++++++++++++------------- lifesim/util/options.py | 6 +- 2 files changed, 130 insertions(+), 88 deletions(-) diff --git a/lifesim/instrument/instrument.py b/lifesim/instrument/instrument.py index 917ea3c..3ee2f95 100644 --- a/lifesim/instrument/instrument.py +++ b/lifesim/instrument/instrument.py @@ -1,9 +1,14 @@ from warnings import warn +from copy import deepcopy +from typing import List, Optional, Sequence, Any import numpy as np +import pandas as pd from tqdm import tqdm from spectres import spectres from PyQt5.QtGui import QGuiApplication +from joblib import Parallel, delayed, parallel_config +from joblib_progress import joblib_progress from lifesim.core.modules import InstrumentModule from lifesim.util.habitable import single_habitable_zone @@ -228,7 +233,9 @@ def apply_baseline(self, def get_snr(self, save_mode: bool = False): """ - Calculates the one-hour signal-to-noise ration for all planets in the catalog. + Calculates the one-hour signal-to-noise ration for all planets in the catalog. Switches between single and + multi processing depending on the n_cpu option. + Parameters ---------- safe_mode : bool @@ -239,12 +246,6 @@ def get_snr(self, # options are applied before the simulation run self.apply_options() - # currently, the choice of integration time here is arbitrary. Since the background limited - # case is assumed, the SNR scales with sqrt(integration time) and through this, the SNR - # for any integration time can be calculated by knowing the SNR of a specific integration - # time - integration_time = 60 * 60 - self.data.catalog['snr_1h'] = np.zeros_like(self.data.catalog.nstar, dtype=float) self.data.catalog['baseline'] = np.zeros_like(self.data.catalog.nstar, dtype=float) if save_mode: @@ -253,13 +254,38 @@ def get_snr(self, self.data.catalog['photon_rate_planet'] = None self.data.catalog['photon_rate_noise'] = None + if self.data.options.other['n_cpu'] == 1: + self.get_snr_single_processing(save_mode=save_mode) + elif self.data.options.other['n_cpu'] > 1: + self.get_snr_multi_processing(save_mode=save_mode) + else: + raise ValueError('n_cpu option must be >= 1') + + def get_snr_single_processing(self, + save_mode: bool = False, + verbose : bool = True): + """ + Calculates the one-hour signal-to-noise ration for all planets in the catalog. + Parameters + ---------- + safe_mode : bool + If save mode is enables, the individual photon counts of the planet and noise sources + are written to the catalog. + """ + + # currently, the choice of integration time here is arbitrary. Since the background limited + # case is assumed, the SNR scales with sqrt(integration time) and through this, the SNR + # for any integration time can be calculated by knowing the SNR of a specific integration + # time + integration_time = 60 * 60 + # create mask returning only unique stars _, temp = np.unique(self.data.catalog.nstar, return_index=True) star_mask = np.zeros_like(self.data.catalog.nstar, dtype=bool) star_mask[temp] = True # iterate over all stars to calculate noise specific to stars - for i, n in enumerate(tqdm(np.where(star_mask)[0])): + for i, n in enumerate(tqdm(np.where(star_mask)[0], disable=not verbose)): # if i == 10: # break nstar = self.data.catalog.nstar.iloc[n] @@ -367,6 +393,40 @@ def get_snr(self, / self.data.inst['eff_tot'] ).sum() + def get_snr_multi_processing(self, + save_mode: bool = False): + + # divide the catalog into roughly equal chunks for each cpu + n_star, occ_star = np.unique(self.data.catalog.nstar, return_counts=True) + star_groups = balanced_partition_greedy(occ=occ_star, items=n_star, n_groups=self.data.options.other['n_cpu']*10) + + sub_catalogs = [self.data.catalog[np.isin(self.data.catalog.nstar, sg)] for sg in star_groups] + group_sizes = [len(sc) for sc in sub_catalogs] + per_dev = (np.max(group_sizes) - np.min(group_sizes)) / np.mean(group_sizes) + print(f'Maximum deviation in group sizes: {per_dev*100:.1f}%') + + reference_bus = deepcopy(self) + del reference_bus.data.catalog + + with parallel_config( + backend="loky", inner_max_num_threads=1 + ), joblib_progress( + description="Running SNR calculation in parallel ...", + total=len(star_groups), + ): + results = Parallel(n_jobs=self.data.options.other['n_cpu'])( + delayed(mp_runner)( + bus=reference_bus, + catalog=sc, + save_mode=save_mode + ) + for sc in sub_catalogs) + + # combine results back into main catalog + self.data.catalog = pd.concat(results) + self.data.catalog.sort_values('id', inplace=True) + + # TODO: fix units in documentation def get_spectrum(self, temp_s: float, # in K @@ -659,12 +719,6 @@ def get_signal(self, Returns the noise contribution in [photons] """ - # TODO: remove by 2024 - warn('The get_spectrum function was implemented with a major bug between versions 0.2.16 ' - 'and 0.2.24 in which the noise level was twice as large as the correct value. If ' - 'you created results with the versions in question, please validate them with the ' - 'latest version of LIFEsim.') - # options are applied before the simulation run self.apply_options() @@ -768,78 +822,66 @@ def get_signal(self, return signal, flux_planet +def mp_runner(bus, + catalog, + save_mode: bool = False): + bus = deepcopy(bus) + bus.data.catalog = catalog + bus.get_snr_single_processing(save_mode=save_mode, + verbose=False) + return bus.data.catalog + +def balanced_partition_greedy(occ: Sequence[int], + n_groups: int, + items: Optional[Sequence[Any]] = None) -> List[List[Any]]: + """ + Greedy multi-way partition using NumPy for faster min selection. + Assigns the largest items first to the current smallest-sum group. -def multiprocessing_runner(input_dict: dict): - # create mask returning only unique stars - universes = np.unique( - input_dict['catalog'].nuniverse[input_dict['catalog'].nstar == input_dict['nstar']], - return_index=False - ) - - # get transmission map - _, _, self.data.inst['t_map'], _, _ = self.run_socket(s_name='transmission', - method='transmission_map', - map_selection='tm3') - - for nuniverse in universes: - inst.z = input_dict['catalog'][np.logical_and(input_dict['catalog'].nstar == input_dict['nstar'], - input_dict['catalog'].nuniverse == nuniverse)].z.iloc[0] - - # redo calculation for exozodi - inst.create_exozodi() - inst.sensitivity_coefficients(exozodi_only=True) - inst.fundamental_noise(exozodi_only=True) - - # go through all planets for the chosen star - for _, n_p in enumerate(np.argwhere( - np.logical_and(input_dict['catalog'].nstar.to_numpy() == input_dict['nstar'], - input_dict['catalog'].nuniverse.to_numpy() == nuniverse))[:, 0]): - inst.temp_planet = input_dict['catalog']['temp_p'].iloc[n_p] - inst.radius_planet = input_dict['catalog']['radius_p'].iloc[n_p] - inst.separation_planet = (input_dict['catalog']['angsep'].iloc[n_p] - * input_dict['catalog']['distance_s'].iloc[n_p]) - - # ----- must be repeated for every planet ----- - inst.create_planet(force=True) - inst.planet_signal() - - if (inst.chopping == 'nchop'): - inst.sn_nchop() - else: - inst.sn_chop() - - # save baseline - input_dict['catalog']['baseline'].iat[n_p] = input_dict['baseline'] - - # save snr results - if (inst.chopping == 'nchop'): - input_dict['catalog'].t_rot.iat[n_p] = input_dict['integration_time'] - input_dict['catalog'].signal.iat[n_p] = inst.photon_rates.loc['signal', 'nchop'].sum() - input_dict['catalog'].photon_noise.iat[n_p] = ( - np.sqrt((inst.photon_rates.loc['pn', 'nchop'] ** 2).sum())) - input_dict['catalog'].systematic_noise.iat[n_p] = ( - np.sqrt((inst.photon_rates.loc['sn', 'nchop'] ** 2).sum())) - else: - input_dict['catalog'].t_rot.iat[n_p] = input_dict['integration_time'] - input_dict['catalog'].signal.iat[n_p] = inst.photon_rates.loc['signal', 'chop'].sum() - input_dict['catalog'].photon_noise.iat[n_p] = ( - np.sqrt((inst.photon_rates.loc['pn', 'chop'] ** 2).sum())) - input_dict['catalog'].systematic_noise.iat[n_p] = ( - np.sqrt((inst.photon_rates.loc['sn', 'chop'] ** 2).sum())) - - if input_dict['safe_mode']: - if (inst.chopping == 'nchop'): - input_dict['noise_catalog'].loc[input_dict['catalog']['id'].iloc[n_p]] = ( - inst.photon_rates.nchop) - else: - input_dict['noise_catalog'].loc[input_dict['catalog']['id'].iloc[n_p]] = ( - inst.photon_rates.chop) - - return_dict = {'catalog': input_dict['catalog']} - if input_dict['safe_mode']: - return_dict['noise_catalog'] = input_dict['noise_catalog'] - - return return_dict + Parameters + ---------- + occ : Sequence[int] + Sequence containing the weight (e.g. number of sub-objects) for each item. + n_groups : int + Number of groups to partition into. Must be >= 1. + items : Optional[Sequence[Any]] + Sequence of items corresponding to `occ`. If ``None``, the function will use the + indices ``range(len(occ))`` as the items. + + Returns + ------- + List[List[Any]] + List of length ``n_groups`` where each element is a list of the assigned items + (or indices if ``items`` was ``None``). Group totals are balanced using a greedy + heuristic; some groups may remain empty if ``n_groups`` > ``len(occ)``. + + Raises + ------ + ValueError + If ``n_groups`` < 1 or if ``items`` is provided but its length does not match ``occ``. + """ + if n_groups <= 0: + raise ValueError("n_groups must be >= 1") + n = len(occ) + if items is None: + items = list(range(n)) + if len(items) != n: + raise ValueError("items and occ must have same length") + + occ_arr = np.asarray(occ, dtype=np.int64) + # sort indices by descending weight (largest first) + idxs = np.argsort(-occ_arr) + + groups: List[List[Any]] = [[] for _ in range(n_groups)] + sums = np.zeros(n_groups, dtype=np.int64) + + # assign each item to the group with the smallest current sum + for i in idxs: + g = int(np.argmin(sums)) # fast C-level operation + groups[g].append(items[int(i)]) + sums[g] += int(occ_arr[int(i)]) + + return groups diff --git a/lifesim/util/options.py b/lifesim/util/options.py index 715c6d9..0a8607c 100644 --- a/lifesim/util/options.py +++ b/lifesim/util/options.py @@ -76,7 +76,8 @@ def __init__(self): 'n_plugins': 0, 'output_path': None, 'output_filename': None, - 'fov_threshold': 0.} + 'fov_threshold': 0., + 'n_cpu': 1} self.models = {'localzodi': '', 'habitable': '', @@ -120,8 +121,7 @@ def set_scenario(self, self.optimization['N_pf'] = 25 self.optimization['snr_target'] = 7 - self.optimization['limit'] = {'A': np.inf, - 'F': np.inf, + self.optimization['limit'] = {'F': np.inf, 'G': np.inf, 'K': np.inf, 'M': np.inf} From d9b52221c02f3148962cf52682ae36c4fd009188 Mon Sep 17 00:00:00 2001 From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com> Date: Tue, 4 Nov 2025 17:15:58 +0100 Subject: [PATCH 05/27] added science yield wrapper --- lifesim/__init__.py | 4 +- lifesim/analysis/yield_wrapper.py | 107 ++++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+), 1 deletion(-) create mode 100644 lifesim/analysis/yield_wrapper.py diff --git a/lifesim/__init__.py b/lifesim/__init__.py index 0f813c4..c61fa63 100644 --- a/lifesim/__init__.py +++ b/lifesim/__init__.py @@ -13,4 +13,6 @@ from lifesim.optimize.optimizer import Optimizer from lifesim.optimize.ahgs import AhgsModule -from lifesim.gui.spectrum_gui import Gui \ No newline at end of file +from lifesim.gui.spectrum_gui import Gui + +from lifesim.analysis.yield_wrapper import ScienceYield diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py new file mode 100644 index 0000000..e6d4705 --- /dev/null +++ b/lifesim/analysis/yield_wrapper.py @@ -0,0 +1,107 @@ +import time +import shutil +import os + +import numpy as np + +import lifesim + +class ScienceYield: + def __init__(self, + config_path, + catalog_path, + output_path, + n_cpu: int = 1): + self.config_path = config_path + self.catalog_path = catalog_path + self.output_path = output_path + self.n_cpu = n_cpu + + def compute_yield(self, + output_path, + output_filename, + run_maxsep): + + print('START OF RUN: ', time.ctime()) + print('RUN NAME: ', output_filename) + print('----------------------------') + + t = time.time() + # create bus + bus = lifesim.Bus() + + # setting the options + bus.build_from_config(filename=self.config_path) + bus.data.options.set_manual(n_cpu=self.n_cpu) # speed up calculation + + bus.data.options.set_manual( + output_path=output_path) + bus.data.options.set_manual(output_filename=output_filename) + + + # ---------- Loading the Catalog ---------- + bus.data.import_catalog(input_path=self.catalog_path) + + # ---------- Creating the Instrument ---------- + + # create modules and add to bus + instrument = lifesim.Instrument(name='inst') + bus.add_module(instrument) + + transm = lifesim.TransmissionMap(name='transm') + bus.add_module(transm) + + exo = lifesim.PhotonNoiseExozodi(name='exo') + bus.add_module(exo) + local = lifesim.PhotonNoiseLocalzodi(name='local') + bus.add_module(local) + star = lifesim.PhotonNoiseStar(name='star') + bus.add_module(star) + + # connect all modules + bus.connect(('inst', 'transm')) + bus.connect(('inst', 'exo')) + bus.connect(('inst', 'local')) + bus.connect(('inst', 'star')) + + bus.connect(('star', 'transm')) + + if run_maxsep: + bus.data.catalog = bus.data.catalog[bus.data.catalog.habitable] + bus.data.catalog['angsep'] = bus.data.catalog['maxangsep'] + + instrument.get_snr() + bus.save() + + del bus + + print('Generation took ', (time.time() - t) / 60, ' minutes to complete.') + + def run_aperture_sweep(self, + mirror_diameters): + + for ndim, diameter in enumerate(mirror_diameters): + print('') + print('') + print('STARTING RUN FOR MIRROR DIAMETER: ', diameter) + print('AT TIME: ', time.ctime()) + + print('Preparing directories... ', end='') + + output_directory = os.path.join(self.output_path, 'diam_' + str(np.round(diameter, 2)).replace('.', '_')) + if not os.path.exists(output_directory): + os.makedirs(output_directory) + + print('[Done]') + + print('Commencing base run... ') + self.compute_yield(output_path=f'{output_directory}/', + output_filename='sweep_diam_' + str(np.round(diameter, 2)).replace('.', '_'), + run_maxsep=False) + print('[Done]') + + print('Commencing maxsep run... ') + self.compute_yield(output_path=f'{output_directory}/', + output_filename='sweep_diam_maxsep_' + str(np.round(diameter, 2)).replace('.', '_'), + run_maxsep=True) + print('[Done]') From 82db3aabcdcf33a51105fa4c3ee61c367657c4dd Mon Sep 17 00:00:00 2001 From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com> Date: Tue, 4 Nov 2025 17:22:26 +0100 Subject: [PATCH 06/27] load catalog from ppop --- lifesim/analysis/yield_wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py index e6d4705..e3daa02 100644 --- a/lifesim/analysis/yield_wrapper.py +++ b/lifesim/analysis/yield_wrapper.py @@ -40,7 +40,7 @@ def compute_yield(self, # ---------- Loading the Catalog ---------- - bus.data.import_catalog(input_path=self.catalog_path) + bus.data.catalog_from_ppop(input_path=self.catalog_path) # ---------- Creating the Instrument ---------- From a35c73e9aa5aab8c35012231c19e4ad5f70974de Mon Sep 17 00:00:00 2001 From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com> Date: Wed, 5 Nov 2025 08:38:39 +0100 Subject: [PATCH 07/27] changed pandas assignment in instrument.py --- lifesim/instrument/instrument.py | 36 +++++++++++++++++--------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/lifesim/instrument/instrument.py b/lifesim/instrument/instrument.py index 3ee2f95..5fb837b 100644 --- a/lifesim/instrument/instrument.py +++ b/lifesim/instrument/instrument.py @@ -330,10 +330,10 @@ def get_snr_single_processing(self, self.data.catalog.nuniverse == nuniverse))[0][0] noise_bg_universe_temp = (noise_bg_universe * self.data.catalog.z.iloc[n_u] - / self.data.catalog.z.iloc[n]) + / self.data.catalog.z.iloc[n]) noise_bg = ((noise_bg_star + noise_bg_universe_temp) - * integration_time * self.data.inst['eff_tot'] * 2) + * integration_time * self.data.inst['eff_tot'] * 2) # go through all planets for the chosen star for _, n_p in enumerate(np.argwhere( @@ -370,29 +370,34 @@ def get_snr_single_processing(self, # Add up the noise and caluclate the SNR noise = noise_bg + noise_planet - self.data.catalog.snr_1h.iat[n_p] = np.sqrt((flux_planet ** 2 / noise).sum()) + + # use index label to avoid chained assignment / view-copy problems + idx_label = self.data.catalog.index[n_p] + self.data.catalog.loc[idx_label, 'snr_1h'] = np.sqrt((flux_planet ** 2 / noise).sum()) # save baseline - self.data.catalog['baseline'].iat[n_p] = self.data.inst['bl'] + self.data.catalog.loc[idx_label, 'baseline'] = self.data.inst['bl'] if save_mode: - self.data.catalog.noise_astro.iat[n_p] = [noise_bg] - self.data.catalog.planet_flux_use.iat[n_p] = ( + self.data.catalog.loc[idx_label, 'noise_astro'] = [noise_bg] + self.data.catalog.loc[idx_label, 'planet_flux_use'] = ( [flux_planet_thermal * integration_time * self.data.inst['eff_tot'] * self.data.inst['telescope_area']]) - self.data.catalog['photon_rate_planet'].iat[n_p] = ( + self.data.catalog.loc[idx_label, 'photon_rate_planet'] = ( flux_planet / integration_time / self.data.inst['eff_tot'] ).sum() - self.data.catalog['photon_rate_noise'].iat[n_p] = ( + self.data.catalog.loc[idx_label, 'photon_rate_noise'] = ( noise / integration_time / self.data.inst['eff_tot'] ).sum() + # ...existing code... + def get_snr_multi_processing(self, save_mode: bool = False): @@ -488,12 +493,6 @@ def get_spectrum(self, Returns the noise contribution in [photons] """ - # TODO: remove by 2024 - warn('The get_spectrum function was implemented with a major bug between versions 0.2.16 ' - 'and 0.2.24 in which the noise level was twice as large as the correct value. If ' - 'you created results with the versions in question, please validate them with the ' - 'latest version of LIFEsim.') - # options are applied before the simulation run self.apply_options() @@ -719,6 +718,12 @@ def get_signal(self, Returns the noise contribution in [photons] """ + # TODO: remove by 2024 + warn('The get_spectrum function was implemented with a major bug between versions 0.2.16 ' + 'and 0.2.24 in which the noise level was twice as large as the correct value. If ' + 'you created results with the versions in question, please validate them with the ' + 'latest version of LIFEsim.') + # options are applied before the simulation run self.apply_options() @@ -882,6 +887,3 @@ def balanced_partition_greedy(occ: Sequence[int], sums[g] += int(occ_arr[int(i)]) return groups - - - From 7040fff7f0552b75345ef111d19e084ab41f2488 Mon Sep 17 00:00:00 2001 From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com> Date: Wed, 5 Nov 2025 16:13:40 +0100 Subject: [PATCH 08/27] added cnvergence test to yield wrapper --- lifesim/analysis/yield_wrapper.py | 273 ++++++++++++++++++++++++++++-- lifesim/optimize/ahgs.py | 12 +- lifesim/optimize/optimizer.py | 10 +- setup.py | 10 +- 4 files changed, 277 insertions(+), 28 deletions(-) diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py index e3daa02..d728c80 100644 --- a/lifesim/analysis/yield_wrapper.py +++ b/lifesim/analysis/yield_wrapper.py @@ -1,8 +1,16 @@ import time import shutil import os +import contextlib +from copy import deepcopy +import json import numpy as np +from joblib import Parallel, delayed, parallel_config +from joblib_progress import joblib_progress +import pandas as pd +from tqdm import tqdm +import matplotlib.pyplot as plt import lifesim @@ -11,16 +19,18 @@ def __init__(self, config_path, catalog_path, output_path, - n_cpu: int = 1): + n_cpu: int = 1, + cat_from_ppop: bool = True): self.config_path = config_path self.catalog_path = catalog_path self.output_path = output_path self.n_cpu = n_cpu + self.cat_from_ppop = cat_from_ppop - def compute_yield(self, - output_path, - output_filename, - run_maxsep): + def compute_snrs(self, + output_path, + output_filename, + run_maxsep): print('START OF RUN: ', time.ctime()) print('RUN NAME: ', output_filename) @@ -40,7 +50,10 @@ def compute_yield(self, # ---------- Loading the Catalog ---------- - bus.data.catalog_from_ppop(input_path=self.catalog_path) + if self.cat_from_ppop: + bus.data.catalog_from_ppop(input_path=self.catalog_path) + else: + bus.data.import_catalog(input_path=self.catalog_path) # ---------- Creating the Instrument ---------- @@ -77,8 +90,8 @@ def compute_yield(self, print('Generation took ', (time.time() - t) / 60, ' minutes to complete.') - def run_aperture_sweep(self, - mirror_diameters): + def run_aperture_sweep_snr(self, + mirror_diameters): for ndim, diameter in enumerate(mirror_diameters): print('') @@ -95,13 +108,245 @@ def run_aperture_sweep(self, print('[Done]') print('Commencing base run... ') - self.compute_yield(output_path=f'{output_directory}/', - output_filename='sweep_diam_' + str(np.round(diameter, 2)).replace('.', '_'), - run_maxsep=False) + self.compute_snrs(output_path=f'{output_directory}/', + output_filename='sweep_diam_' + str(np.round(diameter, 2)).replace('.', '_'), + run_maxsep=False) print('[Done]') print('Commencing maxsep run... ') - self.compute_yield(output_path=f'{output_directory}/', - output_filename='sweep_diam_maxsep_' + str(np.round(diameter, 2)).replace('.', '_'), - run_maxsep=True) + self.compute_snrs(output_path=f'{output_directory}/', + output_filename='sweep_diam_maxsep_' + str(np.round(diameter, 2)).replace('.', '_'), + run_maxsep=True) print('[Done]') + + def run_optimizer_sweep(self, + run_name): + # get the names of all subdirectories in output_path + subdirs = [d for d in os.listdir(self.output_path) if os.path.isdir(os.path.join(self.output_path, d))] + + # check a directory with the name run_name already exists in output_path, otherwise create it + final_output_path = os.path.join(self.output_path, run_name) + if not os.path.exists(final_output_path): + os.makedirs(final_output_path) + else: + raise ValueError('Directory already exists: ' + final_output_path) + + run_configs = [] + + for subdir in subdirs: + # make a list of all files ending in .hdf5 in subdir, then keep only the part of the filename before '_catalog.hdf5' and only if the sting does not contain 'maxsep' + catalog_files = [f.split('_catalog.hdf5')[0] for f in os.listdir(os.path.join(self.output_path, subdir)) + if f.endswith('_catalog.hdf5') and 'maxsep' not in f] + + # create a subdir of the same name in final_output_path, no existence check required + output_directory = os.path.join(final_output_path, subdir) + os.makedirs(output_directory) + + for catalog_file in catalog_files: + if self.n_cpu == 1: + compute_yields_mp( + output_filename=catalog_file, + output_path=output_directory + '/', + catalog_path=os.path.join(self.output_path, subdir, catalog_file + '_catalog.hdf5'), + config_path=self.config_path + ) + else: + run_configs.append({'output_filename': catalog_file, + 'output_path': output_directory + '/', + 'catalog_path': os.path.join(self.output_path, subdir, catalog_file + '_catalog.hdf5') + }) + + if self.n_cpu > 1: + with parallel_config( + backend="loky", inner_max_num_threads=1 + ), joblib_progress( + description="Running SNR calculation in parallel ...", + total=len(run_configs), + ): + Parallel(n_jobs=self.n_cpu)( + delayed(compute_yields_mp)( + output_filename=rc['output_filename'], + output_path=rc['output_path'], + catalog_path=rc['catalog_path'], + config_path=self.config_path + ) + for rc in run_configs) + + def run_covergence_test(self, + run_name, + catalog_path, + output_path, + min_universes, + num_steps, + plot=False): + if not os.path.exists(output_path): + os.makedirs(output_path) + + # determine number universes + catalog = pd.read_hdf(catalog_path, key='catalog') + total_universes = np.unique(catalog.nuniverse).shape[0] + del catalog + + if min_universes > total_universes-1: + raise ValueError('min_universes must be less than the total number of universes in the catalog.') + + if num_steps is None: + num_steps = total_universes - min_universes + 1 + elif num_steps > total_universes - min_universes + 1: + raise ValueError('num_steps is too large for the given min_universes and total universes in the catalog.') + + uni_sel = np.linspace(min_universes, total_universes, num_steps, dtype=int) + + if self.n_cpu > 1: + with parallel_config( + backend="loky", inner_max_num_threads=1 + ), joblib_progress( + description="Running SNR calculation in parallel ...", + total=len(uni_sel), + ): + results = Parallel(n_jobs=self.n_cpu)( + delayed(compute_yields_mp)( + output_filename=run_name + '_nuni_' + str(us), + output_path=output_path, + catalog_path=catalog_path, + config_path=self.config_path, + uni_sel=us, + return_yields=True + ) + for us in uni_sel) + else: + results = [] + for us in tqdm(uni_sel): + res = compute_yields_mp( + output_filename=run_name + '_nuni_' + str(us), + output_path=output_path, + catalog_path=catalog_path, + config_path=self.config_path, + uni_sel=us, + return_yields=True + ) + + results.append(res) + + experiments = list(results[0][1].keys()) + yield_convergence = {exp: {'n_universes': [], + 'mean_yield': [], + 'p16_yield': [], + 'p84_yield': []} + for exp in experiments} + for res in results: + n_uni, yields = res + for exp in experiments: + yield_convergence[exp]['n_universes'].append(n_uni) + yield_convergence[exp]['mean_yield'].append(yields[exp][0]) + yield_convergence[exp]['p16_yield'].append(yields[exp][1]) + yield_convergence[exp]['p84_yield'].append(yields[exp][2]) + + # save yield_convergence to a json file + with open(os.path.join(output_path, run_name + '_yield_convergence.json'), 'w') as f: + json.dump(yield_convergence, f, indent=4) + + if plot: + fig, ax = plt.subplots(figsize=(8, 6)) + for exp in experiments: + ax.plot(yield_convergence[exp]['n_universes'], + yield_convergence[exp]['mean_yield'], + label=exp) + ax.fill_between(yield_convergence[exp]['n_universes'], + yield_convergence[exp]['p16_yield'], + yield_convergence[exp]['p84_yield'], + alpha=0.3) + ax.set_xlabel('Number of Universes') + ax.set_ylabel('Yield') + ax.set_title('Yield Convergence Test') + ax.legend() + plt.savefig(os.path.join(output_path, run_name + '_yield_convergence.pdf')) + plt.close() + + +def get_yields(bus, + return_yields=False): + + yields = {} + + for exp in bus.data.options.optimization['experiments'].keys(): + mask = np.logical_and.reduce((bus.data.catalog.temp_s + >= bus.data.options.optimization['experiments'][exp]['temp_s_min'], + bus.data.catalog.temp_s + <= bus.data.options.optimization['experiments'][exp]['temp_s_max'], + bus.data.catalog.radius_p + >= bus.data.options.optimization['experiments'][exp]['radius_p_min'], + bus.data.catalog.radius_p + <= bus.data.options.optimization['experiments'][exp]['radius_p_max'], + bus.data.catalog.detected + )) + if bus.data.options.optimization['experiments'][exp]['in_HZ']: + mask = np.logical_and(mask, + bus.data.catalog.habitable) + + _, tyield = np.unique(bus.data.catalog[mask].nuniverse, return_counts=True) + + if tyield.size != 0: + yields[exp] = [float(np.mean(tyield)), float(np.percentile(tyield, 15.9)), float(np.percentile(tyield, 84.1))] + else: + yields[exp] = [0.0, 0.0, 0.0] + + return yields + +def compute_yields_mp(output_filename, + output_path, + catalog_path, + config_path, + uni_sel=None, + return_yields=False): + + t = time.time() + # create bus + bus = lifesim.Bus() + + # setting the options + bus.build_from_config(filename=config_path) + bus.data.options.set_manual(n_cpu=1) # speed up calculation + + bus.data.options.set_manual( + output_path=output_path) + bus.data.options.set_manual(output_filename=output_filename) + + # ---------- Loading the Catalog ---------- + bus.data.import_catalog(input_path=catalog_path) + + if uni_sel is not None: + selected_universes = np.random.choice(np.unique(bus.data.catalog.nuniverse), replace=False, size=uni_sel) + bus.data.catalog = bus.data.catalog[np.isin(bus.data.catalog.nuniverse, selected_universes)] + + # ---------- Creating the Instrument ---------- + + # create modules and add to bus + instrument = lifesim.Instrument(name='inst') + bus.add_module(instrument) + + transm = lifesim.TransmissionMap(name='transm') + bus.add_module(transm) + + # connect all modules + bus.connect(('inst', 'transm')) + + # optimizing the result + opt = lifesim.Optimizer(name='opt') + bus.add_module(opt) + ahgs = lifesim.AhgsModule(name='ahgs') + bus.add_module(ahgs) + + bus.connect(('transm', 'opt')) + bus.connect(('inst', 'opt')) + bus.connect(('opt', 'ahgs')) + + with contextlib.redirect_stdout(None): + opt.ahgs() + + bus.save() + + if return_yields: + yields = get_yields(bus=bus, + return_yields=return_yields) + return int(uni_sel), yields diff --git a/lifesim/optimize/ahgs.py b/lifesim/optimize/ahgs.py index 742e31c..2dbef2d 100644 --- a/lifesim/optimize/ahgs.py +++ b/lifesim/optimize/ahgs.py @@ -55,14 +55,16 @@ def observe_star(self, * np.sqrt(int_actual / (60 * 60)))**2) + # Changed: use label-based .loc to avoid chained-assignment warnings for _, i in enumerate(np.where(mask)[0]): - if (not self.data.catalog.detected.iloc[i]) and \ - (self.data.catalog.snr_current.iloc[i] + idx = self.data.catalog.index[i] + if (not self.data.catalog.loc[idx, 'detected']) and \ + (self.data.catalog.loc[idx, 'snr_current'] >= self.data.options.optimization['snr_target']): - self.data.catalog.detected.iat[i] = True - self.data.catalog.t_detected.iat[i] = deepcopy(self.tot_time) + self.data.catalog.loc[idx, 'detected'] = True + self.data.catalog.loc[idx, 't_detected'] = deepcopy(self.tot_time) exp_cols = [col for col in self.data.catalog.columns if col.startswith('exp_')] - true_experiments = [col[4:] for col in exp_cols if self.data.catalog.at[i, col]] + true_experiments = [col[4:] for col in exp_cols if self.data.catalog.loc[idx, col]] for exp in true_experiments: self.data.optm['exp_detected'][exp] += 1 diff --git a/lifesim/optimize/optimizer.py b/lifesim/optimize/optimizer.py index 9a6cdfb..ef47539 100644 --- a/lifesim/optimize/optimizer.py +++ b/lifesim/optimize/optimizer.py @@ -81,12 +81,12 @@ def ahgs(self): rmax = np.inf for stype, limit in self.data.options.optimization['limit'].items(): self.data.options.optimization['experiments'][stype] = { - 'radius_p_min': rmin, - 'radius_p_max': rmax, - 'temp_s_min': self.data.catalog[self.data.catalog.stype==stype].temp_s.min(), - 'temp_s_max': self.data.catalog[self.data.catalog.stype==stype].temp_s.max(), + 'radius_p_min': float(rmin), + 'radius_p_max': float(rmax), + 'temp_s_min': float(self.data.catalog[self.data.catalog.stype==stype].temp_s.min()), + 'temp_s_max': float(self.data.catalog[self.data.catalog.stype==stype].temp_s.max()), 'in_HZ': self.data.options.optimization['habitable'], - 'sample_size': self.data.options.optimization['limit'][stype], + 'sample_size': float(self.data.options.optimization['limit'][stype]), } if self.data.options.optimization['experiments'] is None: diff --git a/setup.py b/setup.py index 6f0e885..3c2c276 100644 --- a/setup.py +++ b/setup.py @@ -36,14 +36,16 @@ def get_version(rel_path: str) -> str: 'matplotlib>=3.7.0', 'numpy>=1.24.2', 'pandas>=1.5.3', - 'PyQt5==5.15.4', + 'PyQt5>=5.15.4', 'tqdm>=4.64.1', - 'tables>=3.8.0', - 'GitPython>=3.1.32'], + 'GitPython>=3.1.32', + 'joblib>=1.5.2', + 'joblib-progress>=1.0.6', + 'tables>=3.10.2'], license='GPLv3', zip_safe=False, keywords='lifesim', - python_requires='~=3.8', + python_requires='~=3.12', classifiers=[ 'Development Status :: 3 - Alpha', 'Intended Audience :: Science/Research', From aa0830d844ebaf3ca745ca69478f383f91683139 Mon Sep 17 00:00:00 2001 From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com> Date: Wed, 5 Nov 2025 16:31:34 +0100 Subject: [PATCH 09/27] suppress unneeded verbosity of LIFEsim in yield wrapper --- lifesim/analysis/yield_wrapper.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py index d728c80..396295f 100644 --- a/lifesim/analysis/yield_wrapper.py +++ b/lifesim/analysis/yield_wrapper.py @@ -343,8 +343,7 @@ def compute_yields_mp(output_filename, with contextlib.redirect_stdout(None): opt.ahgs() - - bus.save() + bus.save() if return_yields: yields = get_yields(bus=bus, From 18c6049db2707c008422e1b304f3be6ea0b7bb87 Mon Sep 17 00:00:00 2001 From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com> Date: Wed, 5 Nov 2025 16:55:57 +0100 Subject: [PATCH 10/27] fixed bug where diameter was not taken into account --- lifesim/analysis/yield_wrapper.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py index 396295f..d32c02c 100644 --- a/lifesim/analysis/yield_wrapper.py +++ b/lifesim/analysis/yield_wrapper.py @@ -30,7 +30,8 @@ def __init__(self, def compute_snrs(self, output_path, output_filename, - run_maxsep): + run_maxsep, + diameter=None): print('START OF RUN: ', time.ctime()) print('RUN NAME: ', output_filename) @@ -44,6 +45,9 @@ def compute_snrs(self, bus.build_from_config(filename=self.config_path) bus.data.options.set_manual(n_cpu=self.n_cpu) # speed up calculation + if diameter is not None: + bus.data.options.set_manual(diameter=diameter) + bus.data.options.set_manual( output_path=output_path) bus.data.options.set_manual(output_filename=output_filename) @@ -110,13 +114,15 @@ def run_aperture_sweep_snr(self, print('Commencing base run... ') self.compute_snrs(output_path=f'{output_directory}/', output_filename='sweep_diam_' + str(np.round(diameter, 2)).replace('.', '_'), - run_maxsep=False) + run_maxsep=False, + diameter=diameter) print('[Done]') print('Commencing maxsep run... ') self.compute_snrs(output_path=f'{output_directory}/', output_filename='sweep_diam_maxsep_' + str(np.round(diameter, 2)).replace('.', '_'), - run_maxsep=True) + run_maxsep=True, + diameter=diameter) print('[Done]') def run_optimizer_sweep(self, From 51644c7ce19cff29309b2c87d68a32090ccb7518 Mon Sep 17 00:00:00 2001 From: Jonah Timothy Hansen Date: Thu, 27 Nov 2025 09:32:50 +0100 Subject: [PATCH 11/27] current state of my fork --- lifesim/core/core.py | 2 +- lifesim/instrument/instrument.py | 12 +++++++++--- lifesim/optimize/ahgs.py | 2 +- lifesim/optimize/optimizer.py | 2 +- lifesim/util/options.py | 11 +++++++++-- 5 files changed, 21 insertions(+), 8 deletions(-) diff --git a/lifesim/core/core.py b/lifesim/core/core.py index dd5a009..e27174e 100644 --- a/lifesim/core/core.py +++ b/lifesim/core/core.py @@ -475,7 +475,7 @@ def write_config(self): def build_from_config(self, filename: str): with open(filename) as file: - config_dict = yaml.load(file, Loader=yaml.FullLoader) + config_dict = yaml.load(file, Loader=yaml.UnsafeLoader) self.data.options.array = convert_to_np(config_dict['array']) self.data.options.optimization = convert_to_np(config_dict['optimization']) diff --git a/lifesim/instrument/instrument.py b/lifesim/instrument/instrument.py index a3c15a3..e081b6f 100644 --- a/lifesim/instrument/instrument.py +++ b/lifesim/instrument/instrument.py @@ -190,9 +190,10 @@ def adjust_bl_to_hz(self, # for the origin of the value 0.5.. see Ottiger+2021 k = 0.589645 * self.data.options.other['wl_optimal'] * 10 ** (-6) - elif opt_model == "Bryson": - # Baseline optimisation considering the semi-major axis distribution of the Bryson 21 - # occurance rates and orbit projection effects, approximated via a polynomial fit to an MC analysis + elif opt_model == "Kepler": + # Baseline optimisation considering the semi-major axis distribution of the Kepler + # occurance rates (SAG13/Bryson Hab2Max) and orbit projection effects, + # approximated via a polynomial fit to an MC analysis sqL = l_sun**0.5 if sqL > 0.28: #FGKs cs = np.array([-2.5655e-1,2.0727e-1,4.8682e-2,-4.0867e-4,-2.1587e-2,9.3433])*1e-6 @@ -212,6 +213,11 @@ def adjust_bl_to_hz(self, baseline = k / hz_center_rad + # If discrete, set baseline to nearest value + if self.data.options.models["discrete_baselines"]: + bls = np.array(self.data.options.array['bl_discrete']) + baseline = bls[np.abs(bls-baseline).argmin()] + self.apply_baseline(baseline=baseline) def apply_baseline(self, diff --git a/lifesim/optimize/ahgs.py b/lifesim/optimize/ahgs.py index 742e31c..ba61af8 100644 --- a/lifesim/optimize/ahgs.py +++ b/lifesim/optimize/ahgs.py @@ -64,7 +64,7 @@ def observe_star(self, exp_cols = [col for col in self.data.catalog.columns if col.startswith('exp_')] true_experiments = [col[4:] for col in exp_cols if self.data.catalog.at[i, col]] - for exp in true_experiments: + for exp in true_experiments[1:]: self.data.optm['exp_detected'][exp] += 1 else: raise ValueError('Delete mode not implemented for AHGS optimizer.') diff --git a/lifesim/optimize/optimizer.py b/lifesim/optimize/optimizer.py index 9a6cdfb..a2d5182 100644 --- a/lifesim/optimize/optimizer.py +++ b/lifesim/optimize/optimizer.py @@ -118,7 +118,7 @@ def ahgs(self): self.data.optm['hit_limit'][exp] = False self.data.optm['exp_detected'][exp] = 0 - + print(self.data.optm) self.data.optm['tot_time'] = 0 # in sec # add new columns to catalog diff --git a/lifesim/util/options.py b/lifesim/util/options.py index 10359e6..10af717 100644 --- a/lifesim/util/options.py +++ b/lifesim/util/options.py @@ -24,6 +24,8 @@ class Options(object): - ``'baseline'`` : Length of the shorter nulling baseline in [m]. - ``'bl_min'`` : Minimum allowed length of the shorter nulling baseline in [m]. - ``'bl_max'`` : Maximum allowed length of the shorter nulling baseline in [m]. + - ``'bl_discrete'``: Array of discrete baselines for use with "discrete" baseline + mode in [m]. - ``'ratio'`` : Ratio between the nulling and the imaging baseline. E.g. if the imaging baseline is twice as long as the nulling baseline, the ratio will be 2. - ``'t_slew'`` : Slew time required for the array to shift from one target system to @@ -47,7 +49,9 @@ class Options(object): - ``'fov_taper'`` : Model used for tapering the field of view, possible options are ``'gaussian'`` and ``'none'`` - ``'baseline_optimisation'`` : Model used for optimising the baseline, possible options are - ``'ref_wl'``, ``'Bryson'`` and ``'Uniform'`` + ``'ref_wl'``, ``'Kepler'``, ``'Uniform'``. + - ``'discrete_baselines'``: Boolean as to whether to use discrete baselines in the simulation + given by 'bl_discrete'. optimization : dict Options concerning the methods used to optimally distribute the observing time. - ``'N_pf'`` : Number of sampling locations per orbit. @@ -71,6 +75,7 @@ def __init__(self): 'baseline': 0., 'bl_min': 0., 'bl_max': 0., + 'bl_discrete': [0.], 'ratio': 0., 't_slew': 0., 't_efficiency': 0.} @@ -85,7 +90,8 @@ def __init__(self): self.models = {'localzodi': '', 'habitable': '', 'fov_taper': '', - 'baseline_optimisation': ''} + 'baseline_optimisation': '', + 'discrete_baselines': False} self.optimization = {'N_pf': 0., 'snr_target': 0., @@ -123,6 +129,7 @@ def set_scenario(self, self.models['habitable'] = 'MS' self.models['fov_taper'] = 'gaussian' self.models['baseline_optimisation'] = 'ref_wl' + self.models['discrete_baselines'] = False self.optimization['N_pf'] = 25 self.optimization['snr_target'] = 7 From 7ec06afe2ad80a64e0d80418146c2aa8dcb34ba3 Mon Sep 17 00:00:00 2001 From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com> Date: Thu, 4 Dec 2025 16:11:59 +0100 Subject: [PATCH 12/27] documentation for yield wrapper --- lifesim/analysis/yield_wrapper.py | 30 +++-- lifesim/analysis/yield_wrapper_notes.md | 172 ++++++++++++++++++++++++ 2 files changed, 191 insertions(+), 11 deletions(-) create mode 100644 lifesim/analysis/yield_wrapper_notes.md diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py index d32c02c..ca85e8b 100644 --- a/lifesim/analysis/yield_wrapper.py +++ b/lifesim/analysis/yield_wrapper.py @@ -27,7 +27,7 @@ def __init__(self, self.n_cpu = n_cpu self.cat_from_ppop = cat_from_ppop - def compute_snrs(self, + def _compute_snrs(self, output_path, output_filename, run_maxsep, @@ -95,7 +95,13 @@ def compute_snrs(self, print('Generation took ', (time.time() - t) / 60, ' minutes to complete.') def run_aperture_sweep_snr(self, - mirror_diameters): + mirror_diameters, + run_name): + final_output_path = os.path.join(self.output_path, run_name) + if not os.path.exists(final_output_path): + os.makedirs(final_output_path) + else: + raise ValueError('Directory already exists: ' + final_output_path) for ndim, diameter in enumerate(mirror_diameters): print('') @@ -105,30 +111,32 @@ def run_aperture_sweep_snr(self, print('Preparing directories... ', end='') - output_directory = os.path.join(self.output_path, 'diam_' + str(np.round(diameter, 2)).replace('.', '_')) + output_directory = os.path.join(final_output_path, 'diam_' + str(np.round(diameter, 2)).replace('.', '_')) if not os.path.exists(output_directory): os.makedirs(output_directory) print('[Done]') print('Commencing base run... ') - self.compute_snrs(output_path=f'{output_directory}/', + self._compute_snrs(output_path=f'{output_directory}/', output_filename='sweep_diam_' + str(np.round(diameter, 2)).replace('.', '_'), run_maxsep=False, diameter=diameter) print('[Done]') print('Commencing maxsep run... ') - self.compute_snrs(output_path=f'{output_directory}/', + self._compute_snrs(output_path=f'{output_directory}/', output_filename='sweep_diam_maxsep_' + str(np.round(diameter, 2)).replace('.', '_'), run_maxsep=True, diameter=diameter) print('[Done]') def run_optimizer_sweep(self, - run_name): - # get the names of all subdirectories in output_path - subdirs = [d for d in os.listdir(self.output_path) if os.path.isdir(os.path.join(self.output_path, d))] + run_name, + source_name): + source_path = os.path.join(self.output_path, source_name) + # get the names of all subdirectories in output_path (which contain subdirectories for different mirror diameters) + subdirs = [d for d in os.listdir(source_path) if os.path.isdir(os.path.join(source_path, d))] # check a directory with the name run_name already exists in output_path, otherwise create it final_output_path = os.path.join(self.output_path, run_name) @@ -141,7 +149,7 @@ def run_optimizer_sweep(self, for subdir in subdirs: # make a list of all files ending in .hdf5 in subdir, then keep only the part of the filename before '_catalog.hdf5' and only if the sting does not contain 'maxsep' - catalog_files = [f.split('_catalog.hdf5')[0] for f in os.listdir(os.path.join(self.output_path, subdir)) + catalog_files = [f.split('_catalog.hdf5')[0] for f in os.listdir(os.path.join(source_path, subdir)) if f.endswith('_catalog.hdf5') and 'maxsep' not in f] # create a subdir of the same name in final_output_path, no existence check required @@ -153,13 +161,13 @@ def run_optimizer_sweep(self, compute_yields_mp( output_filename=catalog_file, output_path=output_directory + '/', - catalog_path=os.path.join(self.output_path, subdir, catalog_file + '_catalog.hdf5'), + catalog_path=os.path.join(source_path, subdir, catalog_file + '_catalog.hdf5'), config_path=self.config_path ) else: run_configs.append({'output_filename': catalog_file, 'output_path': output_directory + '/', - 'catalog_path': os.path.join(self.output_path, subdir, catalog_file + '_catalog.hdf5') + 'catalog_path': os.path.join(source_path, subdir, catalog_file + '_catalog.hdf5') }) if self.n_cpu > 1: diff --git a/lifesim/analysis/yield_wrapper_notes.md b/lifesim/analysis/yield_wrapper_notes.md new file mode 100644 index 0000000..207e3d4 --- /dev/null +++ b/lifesim/analysis/yield_wrapper_notes.md @@ -0,0 +1,172 @@ +# `ScienceYield` Wrapper + +The purpose of the science yield wrapper is to collect all functions necessary to run a yield with LIFEsim. +While detection yields with single catalogs or instrument setups can be quickly run using +`instrument.snr()` and `opt.ahgs()`, this wrapper is aimed at +- yields for varying instrument setups (e.g. aperture size) +- yields including the characterization yield + +Central tools to achieve this are +- a table of all synthetic planets, to which LIFEsim adds the `snr_1h` column. We call this _snr table_. +- the optimizer, which based on the `snr_1h` column creates an optimized observing sequence + +## Current State +So far, the ScienceYield wrapper has grown organically. +Here, we provide an overview of the current state. + +--- + +## `ScienceYield` + +### Constructor + +#### `__init__(self, ...)` + +**Description:** +Sets up the working environment, mainly paths to file locations. + +**Arguments:** + +| Argument | Type | Description | +|-----------------|------|----------------------------------------------| +| `config_path` | str | Path to the config file in .yaml format | +| `catalog_path` | str | Path to the input catalog | +| `output_path` | str | Location where the snr catalogs are saved | +| `n_cpu` | int | Number of CPUs to be used in multiprocessing | +| `cat_from_ppop` | bool | True if input catalog is in .txt format | + +**Example:** +```python +import lifesim +ywrap = lifesim.ScienceYield(config_path='/path/CONFIG_251104.yaml', + catalog_path='/path/Bryson2021_hab2low.txt', + output_path='/path/hab2low_500/', + n_cpu=64) +``` + +#### `_compute_snrs(self, ...)` + +**Description:** +Calculates on individual SNR table for one setting of the instrument. +Saves a catalog with `snr_1h` and a config file called `output_filename_catalog.hdf5` and `output_filename.yaml` to `output_path`. +This function is mainly meant to be an internal helper function, it is usually not required to use it. + +**Arguments:** + +| Argument | Type | Description | +|-------------------|------|---------------------------------------------------------------------------------| +| `output_path` | str | Path to where this individual run is saved | +| `output_filename` | str | Filename of catalog and config table | +| `run_maxsep` | bool | If true, all planets will be placed in their maximum separation | +| `diameter` | int | Mirror diameter for this run. If set to None, diameter from config file is used | + +**Files created:** + +| Name / Location | Content and Purpose | +|-------------------------------------------|--------------------------------| +| `output_path/output_filename_catalog.hdf5`| snr table for the single run | +| `output_path/output_filename.yaml` |config file for the single run | + +**Example:** +(continued from above) +```python +ywrap.compute_snrs(output_path='path/', + output_filename='single_test', + run_maxsep=False, + diameter=None) +``` + +#### `run_aperture_sweep_snr(self, ...)` + +**Description:** +One of the key instrument parameters is the sensitivity. +For LIFE, we usually fix the photon-conversion-efficiency, and the aperture is varied. +This function creates snr tables for an array of mirror diameters. + +**Arguments:** + +| Argument | Type | Description | +|--------------------|------------------|-------------------------------------------| +| `mirror_diameters` | list/np.ndarray | Array of all mirror sizes to be evaluated | +| `run_name` | str | Name of the run (for file creation) | + +**Files created:** + +For every diameter in `mirror_diameters`, the following files will be created (here we take 2.5m as example) + +| Name / Location | Content and Purpose | +|-------------------------------------------------------------------------|-----------------------------------------------------------------------------------------| +| `self.output_path/run_name/diam_2_5/` | new parent directory is created | +| `self.output_path/run_name/diam_2_5/sweep_diam_2_5.yaml` | _config file_ for run with the actual planet position for the detection phase | +| `self.output_path/run_name/diam_2_5/sweep_diam_2_5_catalog.hdf5` | ↑ and its _snr catalog_ | +| `self.output_path/run_name/diam_2_5/sweep_diam_maxsep_2_5.yaml` | _config file_ for run with the maximum planet separation for the characterization phase | +| `self.output_path/run_name/diam_2_5/sweep_diam_maxsep_2_5_catalog.hdf5` | ↑ and its _snr catalog_ | + +**Example:** +(continued from above) +```python +ywrap.run_aperture_sweep_snr(mirror_diameters=np.arange(2.0, 5.1, 0.25), + run_name='aperture_size_2_to_5') +``` + +#### `run_optimizer_sweep(self, ...)` + +**Description:** +Runs the optimizer on all snr tables in a given source folder. + +**Arguments:** + +| Argument | Type | Description | +|---------------|------|-----------------------------------------------------------------------| +| `run_name` | str | Name of the run (for file creation) | +| `source_name` | str | Name of the folder in self.output_path that contains the _snr tables_ | + +**Files created:** + +| Name / Location | Content and Purpose | +|----------------------------------------------------------------|----------------------------------------------------------------| +| `self.output_path/run_name/` | new parent directory is created, if it does not already exists | +| `self.output_path/run_name/subdir/` | New directory for every directory in the source folder | +| `self.output_path/run_name/subdir/sweep_diam_2_5.yaml` | _config file_ for run of optimizer | +| `self.output_path/run_name/subdir/sweep_diam_2_5_catalog.hdf5` | ↑ and its _snr catalog_ with optimized sequence | + +**Example:** +(continued from above) +```python +ywrap.run_optimizer_sweep(run_name='opt_aperture_size_2_to_5', + source_name='aperture_size_2_to_5') +``` + +--- + +## Helper Functions + +#### `compute_yields_mp()` + +**Description:** +Runs the optimizer on all snr tables in a given source folder. + +**Arguments:** + +| Argument | Type | Description | +|-------------------|----------|-------------------------------------------------------------------------------| +| `output_filename` | str | Name of output files | +| `output_path` | str | Path to which catalog and config file will be saved | +| `catalog_path` | str | Path to the input catalog file | +| `config_path` | str | Path to the config file | +| `uni_sel` | int/None | Number of universes to select from all universes. If `None`, all are selected | +| `return_yields` | bool | Set true to return experiment yield of this run | + +**Files created:** + +| Name / Location | Content and Purpose | +|-------------------------------------------|----------------------------------| +| `output_path/output_filename_catalog.hdf5`| snr table for the single run | +| `output_path/output_filename.yaml` | config file for the single run | + +**Parameters:** + +| Parameter | Type | Description | +|-----------|------|-------------| +| `param1` | Type | Description of param1 | +| `param2` | Type | Description of param2 | From 91637884fcefead4745f13a64eed036977150e52 Mon Sep 17 00:00:00 2001 From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com> Date: Wed, 10 Dec 2025 17:38:20 +0100 Subject: [PATCH 13/27] significant changes to the yield_wrapper and the characterization time optimization --- lifesim/analysis/yield_wrapper.py | 210 ++++++++++++++++++++++-- lifesim/analysis/yield_wrapper_notes.md | 9 +- lifesim/optimize/ahgs.py | 177 +++++++++++++++----- lifesim/optimize/ahgs_char.py | 170 +++++++++++++++++++ lifesim/optimize/optimizer.py | 17 ++ lifesim/util/options.py | 9 +- 6 files changed, 530 insertions(+), 62 deletions(-) create mode 100644 lifesim/optimize/ahgs_char.py diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py index ca85e8b..7804244 100644 --- a/lifesim/analysis/yield_wrapper.py +++ b/lifesim/analysis/yield_wrapper.py @@ -84,7 +84,27 @@ def _compute_snrs(self, bus.connect(('star', 'transm')) if run_maxsep: - bus.data.catalog = bus.data.catalog[bus.data.catalog.habitable] + # only run maxsep SNR for planets that are part of an experiment + bus.data.catalog['is_interesting'] = False + for exp in bus.data.options.optimization['experiments'].keys(): + mask_exp = ((bus.data.catalog.radius_p + >= bus.data.options.optimization['experiments'][exp]['radius_p_min']) + & (bus.data.catalog.radius_p + <= bus.data.options.optimization['experiments'][exp]['radius_p_max']) + & (bus.data.catalog.temp_s + >= bus.data.options.optimization['experiments'][exp]['temp_s_min']) + & (bus.data.catalog.temp_s + <= bus.data.options.optimization['experiments'][exp]['temp_s_max'])) + + if bus.data.options.optimization['experiments'][exp]['in_HZ']: + mask_exp = (mask_exp + & (bus.data.catalog['habitable'])) + + bus.data.catalog['exp_' + exp] = mask_exp + + bus.data.catalog['is_interesting'] = np.logical_or(mask_exp, bus.data.catalog['is_interesting']) + + bus.data.catalog = bus.data.catalog[bus.data.catalog.is_interesting] bus.data.catalog['angsep'] = bus.data.catalog['maxangsep'] instrument.get_snr() @@ -126,14 +146,15 @@ def run_aperture_sweep_snr(self, print('Commencing maxsep run... ') self._compute_snrs(output_path=f'{output_directory}/', - output_filename='sweep_diam_maxsep_' + str(np.round(diameter, 2)).replace('.', '_'), + output_filename='sweep_diam_' + str(np.round(diameter, 2)).replace('.', '_') + '_maxsep', run_maxsep=True, diameter=diameter) print('[Done]') def run_optimizer_sweep(self, run_name, - source_name): + source_name, + characterization: bool = False): source_path = os.path.join(self.output_path, source_name) # get the names of all subdirectories in output_path (which contain subdirectories for different mirror diameters) subdirs = [d for d in os.listdir(source_path) if os.path.isdir(os.path.join(source_path, d))] @@ -162,12 +183,13 @@ def run_optimizer_sweep(self, output_filename=catalog_file, output_path=output_directory + '/', catalog_path=os.path.join(source_path, subdir, catalog_file + '_catalog.hdf5'), - config_path=self.config_path + config_path=self.config_path, + characterization=characterization ) else: run_configs.append({'output_filename': catalog_file, 'output_path': output_directory + '/', - 'catalog_path': os.path.join(source_path, subdir, catalog_file + '_catalog.hdf5') + 'catalog_path': os.path.join(source_path, subdir, catalog_file + '_catalog.hdf5'), }) if self.n_cpu > 1: @@ -182,10 +204,172 @@ def run_optimizer_sweep(self, output_filename=rc['output_filename'], output_path=rc['output_path'], catalog_path=rc['catalog_path'], - config_path=self.config_path + config_path=self.config_path, + characterization=characterization ) for rc in run_configs) + def combine_catalog_maxsep(self, + source_name,): + source_path = os.path.join(self.output_path, source_name) + + print('START OF combine_catalog_maxsep: ', time.ctime()) + print('SOURCE NAME: ', source_name) + print('SOURCE PATH: ', source_path) + t_all = time.time() + + subdirs = [d for d in os.listdir(source_path) if os.path.isdir(os.path.join(source_path, d))] + + for subdir in subdirs: + print('--------------------------------------------------') + print('Processing subdir: ', subdir) + t_sub = time.time() + + # make a list of all files ending in .hdf5 in subdir, then keep only the part of the filename before '_catalog.hdf5' and only if the sting does not contain 'maxsep' + base_catalog_files = [f.split('_catalog.hdf5')[0] for f in os.listdir(os.path.join(source_path, subdir)) + if f.endswith('_catalog.hdf5') and 'maxsep' not in f] + + if len(base_catalog_files) == 0: + print(' No base catalog files found in', os.path.join(source_path, subdir)) + print(' Skipping subdir.') + print('Subdir processing took', round(time.time() - t_sub, 2), 's') + continue + + for catalog_file in base_catalog_files: + print(' ----------------------------------------------') + print(' Processing catalog:', catalog_file) + t_cat = time.time() + + base_path = os.path.join(source_path, subdir, catalog_file + '_catalog.hdf5') + maxsep_path = os.path.join(source_path, subdir, catalog_file + '_maxsep_catalog.hdf5') + + print(' Reading base catalog from:', base_path) + base_catalog = pd.read_hdf(base_path) + print(' Base catalog entries:', len(base_catalog)) + + print(' Reading maxsep catalog from:', maxsep_path) + maxsep_catalog = pd.read_hdf(maxsep_path) + print(' Maxsep catalog entries:', len(maxsep_catalog)) + + # map and fill missing values + base_catalog['maxsep_snr_1h'] = base_catalog['id'].map(maxsep_catalog.set_index('id')['snr_1h']) + missing_before = int(base_catalog['maxsep_snr_1h'].isna().sum()) + base_catalog['maxsep_snr_1h'].fillna(0, inplace=True) + print(f' Mapped maxsep snr_1h, filled {missing_before} missing values with 0') + + print(' Saving combined catalog to:', base_path) + base_catalog.to_hdf(base_path, key='catalog', mode='w') + + print(' Done processing', catalog_file, '- took', round(time.time() - t_cat, 2), 's') + + print('Finished subdir:', subdir, '- took', round(time.time() - t_sub, 2), 's') + + print('ALL combine_catalog_maxsep finished. Total time:', round(time.time() - t_all, 2), 's') + + def get_mission_time(self, + catalog_path, + config_path): + bus = lifesim.Bus() + + # loading the config and catalog, make sure that the catalog is already combined with maxsep SNRs + bus.build_from_config(filename=config_path) + bus.data.import_catalog(input_path=catalog_path) + + cat_det = bus.data.catalog[np.logical_and( + bus.data.catalog.is_interesting, + bus.data.catalog.detected + )].sort_values('t_detected') + + # collect the experiments + exps = [col[4:] for col in bus.data.catalog.columns if col.startswith('exp_')] + + # set up the time sheet that records the mission time per universe and per experiment + time_sheet = {} + for exp in exps: + time_sheet[exp] = pd.DataFrame(index=np.unique(cat_det.nuniverse), + columns=['detection', 'orbit', 'characterization', 'total']) + t_det = np.zeros((2, len(np.unique(cat_det.nuniverse)))) + t_det[0, :] = np.unique(cat_det.nuniverse) + + # 1. get total time for detection campaign from every universe + for nu in np.unique(cat_det.nuniverse): + temp_t_dets = [] + for exp in exps: + det_stream = cat_det[np.logical_and(cat_det.nuniverse == nu, cat_det['exp_' + exp])].t_detected + temp_t_det = det_stream.iloc[bus.data.options.optimization['experiments'][exp]['sample_size']] \ + if len(det_stream) > 0 else np.nan + temp_t_dets.append(temp_t_det) + time_sheet[exp].loc[nu, 'detection'] = temp_t_det + t_det[1, t_det[0, :] == nu] = np.max(temp_t_dets) + + # 2. identify the follow_up targets for each universe + cat_det.sort_values('maxsep_snr_1h', ascending=False, inplace=True) + cat_det['follow_up'] = False + for nu in np.unique(cat_det.nuniverse): + for exp in exps: + mask_det = np.logical_and.reduce((cat_det.nuniverse == nu, cat_det['exp_' + exp], + cat_det.t_detected <= t_det[1, t_det[0, :] == nu][0])) + # only set the top N targets to follow up where N is the sample size for the experiment + mask_det_indices = cat_det[mask_det].index[:bus.data.options.optimization['experiments'][exp]['sample_size']] + cat_det.loc[mask_det_indices, 'follow_up'] = True + + # 3. calculate follow-up time for orbit and characterization + for nu in np.unique(cat_det.nuniverse): + for exp in exps: + mask_followup = np.logical_and.reduce( + (cat_det.nuniverse == nu, cat_det['exp_' + exp], cat_det.follow_up)) + time_sheet[exp].loc[nu, 'orbit'] = (((bus.data.options.optimization['snr_target'] + / cat_det[mask_followup].maxsep_snr_1h) ** 2 * + (bus.data.options.optimization['n_orbits'] - 1) * 60 * 60).sum() + + np.sum(mask_followup) + * (bus.data.options.optimization['n_orbits'] - 1) + * bus.data.options.array['t_slew']) + time_sheet[exp].loc[nu, 'characterization'] = (((bus.data.options.optimization['snr_char'] / cat_det[ + mask_followup].maxsep_snr_1h) ** 2 * 60 * 60).sum() + + np.sum(mask_followup) + * bus.data.options.array['t_slew']) + + # 4. get total time + for exp in exps: + time_sheet[exp]['total'] = time_sheet[exp]['detection'] + time_sheet[exp]['orbit'] + time_sheet[exp][ + 'characterization'] + + return time_sheet + + def sweep_mission_time(self, + source_name): + source_path = os.path.join(self.output_path, source_name) + subdirs = [d for d in os.listdir(source_path) if os.path.isdir(os.path.join(source_path, d))] + + for subdir in subdirs: + print('--------------------------------------------------') + print('Processing subdir: ', subdir) + t_sub = time.time() + + # make a list of all files ending in .hdf5 in subdir, then keep only the part of the filename before '_catalog.hdf5' and only if the sting does not contain 'maxsep' + run_names = [f.split('_catalog.hdf5')[0] for f in os.listdir(os.path.join(source_path, subdir)) + if f.endswith('_catalog.hdf5')] + + for run_name in run_names: + print(' ----------------------------------------------') + print(' Processing run: ', run_name) + t_run = time.time() + + catalog_path = os.path.join(source_path, subdir, run_name + '_catalog.hdf5') + config_path = os.path.join(source_path, subdir, run_name + '.yaml') + + time_sheet = self.get_mission_time(catalog_path=catalog_path, + config_path=config_path) + + # save time_sheet to csv files, one per experiment + for exp in time_sheet.keys(): + output_csv_path = os.path.join(source_path, subdir, run_name + '_' + exp + '_mission_time.csv') + time_sheet[exp].to_csv(output_csv_path) + print(' Saved mission time for experiment', exp, 'to', output_csv_path) + + print(' Done processing run:', run_name, '- took', round(time.time() - t_run, 2), 's') + + def run_covergence_test(self, run_name, catalog_path, @@ -312,7 +496,8 @@ def compute_yields_mp(output_filename, catalog_path, config_path, uni_sel=None, - return_yields=False): + return_yields=False, + characterization: bool = False): t = time.time() # create bus @@ -322,10 +507,9 @@ def compute_yields_mp(output_filename, bus.build_from_config(filename=config_path) bus.data.options.set_manual(n_cpu=1) # speed up calculation - bus.data.options.set_manual( - output_path=output_path) + bus.data.options.set_manual(output_path=output_path) bus.data.options.set_manual(output_filename=output_filename) - + bus.data.options.optimization['characterization'] = characterization # ---------- Loading the Catalog ---------- bus.data.import_catalog(input_path=catalog_path) @@ -355,9 +539,9 @@ def compute_yields_mp(output_filename, bus.connect(('inst', 'opt')) bus.connect(('opt', 'ahgs')) - with contextlib.redirect_stdout(None): - opt.ahgs() - bus.save() + # with contextlib.redirect_stdout(None): + opt.ahgs() + bus.save() if return_yields: yields = get_yields(bus=bus, diff --git a/lifesim/analysis/yield_wrapper_notes.md b/lifesim/analysis/yield_wrapper_notes.md index 207e3d4..9802b19 100644 --- a/lifesim/analysis/yield_wrapper_notes.md +++ b/lifesim/analysis/yield_wrapper_notes.md @@ -164,9 +164,8 @@ Runs the optimizer on all snr tables in a given source folder. | `output_path/output_filename_catalog.hdf5`| snr table for the single run | | `output_path/output_filename.yaml` | config file for the single run | -**Parameters:** -| Parameter | Type | Description | -|-----------|------|-------------| -| `param1` | Type | Description of param1 | -| `param2` | Type | Description of param2 | +# What happens in the characterization ahgs + + + diff --git a/lifesim/optimize/ahgs.py b/lifesim/optimize/ahgs.py index 2dbef2d..880c031 100644 --- a/lifesim/optimize/ahgs.py +++ b/lifesim/optimize/ahgs.py @@ -17,15 +17,39 @@ def obs_array_star(self, nstar): self.data.catalog.is_interesting)) if not np.any(mask): - return np.array((np.inf, np.inf)) + + if self.data.options.optimization['characterization']: + return np.array(((np.inf, np.inf), (np.inf, np.inf))) + else: + return np.array((np.inf, np.inf)) else: obs = (60 * 60 * (self.data.options.optimization['snr_target'] ** 2 - self.data.catalog['snr_current'].loc[mask] ** 2) / self.data.catalog.snr_1h.loc[mask] ** 2) - obs -= self.data.catalog.t_slew.loc[mask] - obs = np.sort(obs) / np.arange(1, np.count_nonzero(mask) + 1, 1) - return obs + + if self.data.options.optimization['characterization']: + obs_char = (60 * 60 * + self.data.options.optimization['snr_char'] ** 2 + / self.data.catalog.maxsep_snr_1h.loc[mask] ** 2) + + # characterization time is needed for optimization, detection time is needed to understand how much + # mission time was used for one observation + met = np.stack((obs, obs_char), axis=0) + + met = met[:, np.argsort(met[0])] + met[1] = np.cumsum(met[1]) + met[0] -= self.data.catalog.t_slew.loc[mask] + met[1] += met[0] + met = met / np.arange(1, np.count_nonzero(mask) + 1, 1)[np.newaxis, :] + a=1 + + return met + else: + obs -= self.data.catalog.t_slew.loc[mask] + obs = np.sort(obs) / np.arange(1, np.count_nonzero(mask) + 1, 1) + + return obs def observe_star(self, nstar, @@ -65,21 +89,30 @@ def observe_star(self, self.data.catalog.loc[idx, 't_detected'] = deepcopy(self.tot_time) exp_cols = [col for col in self.data.catalog.columns if col.startswith('exp_')] true_experiments = [col[4:] for col in exp_cols if self.data.catalog.loc[idx, col]] - for exp in true_experiments: self.data.optm['exp_detected'][exp] += 1 + self.data.optm['exp_detected_uni'][exp][ + 1, + self.data.optm['exp_detected_uni'][exp][0, :] == self.data.catalog.loc[idx, 'nuniverse'] + ] += 1 else: raise ValueError('Delete mode not implemented for AHGS optimizer.') def distribute_time(self): stars, n = np.unique(ar=self.data.catalog.nstar, return_counts=True) - obs = np.zeros((stars.shape[0], np.max(n))) + np.inf - - # fill the observation time array - for i, nstar in enumerate(stars): - temp = self.obs_array_star(nstar=nstar) - obs[i, :temp.shape[0]] = temp + if self.data.options.optimization['characterization']: + obs = np.zeros((stars.shape[0], 2, np.max(n))) + np.inf + # fill the observation time array + for i, nstar in enumerate(stars): + temp = self.obs_array_star(nstar=nstar) + obs[i, :, :temp.shape[1]] = temp + else: + obs = np.zeros((stars.shape[0], np.max(n))) + np.inf + # fill the observation time array + for i, nstar in enumerate(stars): + temp = self.obs_array_star(nstar=nstar) + obs[i, :temp.shape[0]] = temp obs_time = (self.data.options.optimization['t_search'] * self.data.options.array['t_efficiency']) @@ -88,45 +121,88 @@ def distribute_time(self): print('Number of planets detected for each experiment:') - while self.tot_time < obs_time: - # find the best global slope and observe star - no_star, ind_t = np.unravel_index(np.argmin(obs), obs.shape) - if (self.tot_time + obs[no_star, ind_t] * (ind_t + 1) + 0.01) > obs_time: - rem_time = obs_time - self.tot_time - self.observe_star(nstar=stars[no_star], - int_time=rem_time) - self.tot_time += rem_time + run_bool = True + + while run_bool: + if self.data.options.optimization['characterization']: + # find the best global slope and observe star + no_star, ind_t = np.unravel_index(np.argmin(obs[:, 1, :]), obs[:, 1, :].shape) + if (((self.tot_time + obs[no_star, 0, ind_t] * (ind_t + 1) + 0.01) > obs_time) + and (self.data.options.optimization['opt_limit'] == 'time')): + rem_time = obs_time - self.tot_time + self.observe_star(nstar=stars[no_star], + int_time=rem_time) + self.tot_time += rem_time + else: + if ((obs[no_star, 1, ind_t] - obs[no_star, 0, ind_t]) * (ind_t + 1) + 0.01) < 0: + raise ValueError('Negative time difference encountered.') + self.observe_star(nstar=stars[no_star], + int_time=obs[no_star, 0, ind_t] * (ind_t + 1) + 0.01) + temp = self.obs_array_star(nstar=stars[no_star]) + self.tot_time += obs[no_star, 0, ind_t] * (ind_t + 1) + 0.01 + obs[no_star, :, :] = np.inf + obs[no_star, :, :temp.shape[1]] = temp else: - self.observe_star(nstar=stars[no_star], - int_time=obs[no_star, ind_t] * (ind_t + 1) + 0.01) - temp = self.obs_array_star(nstar=stars[no_star]) - self.tot_time += obs[no_star, ind_t] * (ind_t + 1) + 0.01 - obs[no_star, :] = np.inf - obs[no_star, :temp.shape[0]] = temp + # find the best global slope and observe star + no_star, ind_t = np.unravel_index(np.argmin(obs), obs.shape) + if (((self.tot_time + obs[no_star, ind_t] * (ind_t + 1) + 0.01) > obs_time) + and (self.data.options.optimization['opt_limit'] == 'time')): + rem_time = obs_time - self.tot_time + self.observe_star(nstar=stars[no_star], + int_time=rem_time) + self.tot_time += rem_time + else: + self.observe_star(nstar=stars[no_star], + int_time=obs[no_star, ind_t] * (ind_t + 1) + 0.01) + temp = self.obs_array_star(nstar=stars[no_star]) + self.tot_time += obs[no_star, ind_t] * (ind_t + 1) + 0.01 + obs[no_star, :] = np.inf + obs[no_star, :temp.shape[0]] = temp out_string = '' for key in self.data.optm['exp_detected'].keys(): out_string += (key + ': ' + str(self.data.optm['exp_detected'][key] / self.data.optm['num_universe']) + ' ') - out_string += ('- (' + str(np.round(self.tot_time / 60 / 60 / 24 / 365.25, decimals=1)) + ' / ' - + str(np.round(obs_time / 60 / 60 / 24 / 365.25, decimals=1)) - + ') yrs observed') + + if self.data.options.optimization['opt_limit'] == 'experiments': + out_string += ('- ' + str(np.round(self.tot_time / 60 / 60 / 24 / 365.25, decimals=1)) + + ' yrs observed') + else: + out_string += ('- (' + str(np.round(self.tot_time / 60 / 60 / 24 / 365.25, decimals=1)) + ' / ' + + str(np.round(obs_time / 60 / 60 / 24 / 365.25, decimals=1)) + + ') yrs observed') print('\r' + out_string, end='') - if any( - self.data.optm['exp_detected'][exp] / self.data.optm['num_universe'] - > self.data.options.optimization['experiments'][exp]['sample_size'] - and not self.data.optm['hit_limit'][exp] - for exp in self.data.optm['exp_detected'] - ): + # if any( + # self.data.optm['exp_detected'][exp] / self.data.optm['num_universe'] + # > self.data.options.optimization['experiments'][exp]['sample_size'] + # and not self.data.optm['hit_limit'][exp] + # for exp in self.data.optm['exp_detected'] + # ): + if any([ + ((self.data.optm['exp_detected_uni'][exp][1, :] + > self.data.options.optimization['experiments'][exp]['sample_size']).sum() > + (self.data.options.optimization['opt_limit_factor'] + * self.data.optm['num_universe'])) and not self.data.optm['hit_limit'][exp] + for exp in self.data.optm['exp_detected_uni']]): + # over_limit_experiments = [ + # exp for exp in self.data.optm['exp_detected'] + # if + # self.data.optm['exp_detected'][exp] > self.data.options.optimization['experiments'][exp][ + # 'sample_size'] + # and not self.data.optm['hit_limit'][exp] + # ] over_limit_experiments = [ - exp for exp in self.data.optm['exp_detected'] + exp for exp in self.data.optm['exp_detected_uni'] if - self.data.optm['exp_detected'][exp] > self.data.options.optimization['experiments'][exp][ - 'sample_size'] + (self.data.optm['exp_detected_uni'][exp][1, :] + > self.data.options.optimization['experiments'][exp]['sample_size']).sum() > + (self.data.options.optimization['opt_limit_factor'] + * self.data.optm['num_universe']) and not self.data.optm['hit_limit'][exp] ] + for exp in over_limit_experiments: self.data.optm['hit_limit'][exp] = True self.data.catalog['is_interesting'] = False @@ -135,16 +211,31 @@ def distribute_time(self): self.data.catalog['exp_' + exp_interesting]) if self.data.catalog['is_interesting'].sum() == 0: - print('\nAll experiments have been completed, spending remaining mission time on all HZ planets.') + if self.data.options.optimization['opt_limit'] == 'time': + print('\nAll experiments have been completed, spending remaining mission time on all HZ planets.') self.data.catalog['is_interesting'] = self.data.catalog['habitable'] else: print('\nCompleted experiments: ' + ', '.join(over_limit_experiments) + ', RECOUNTING -------------------') - obs = np.zeros((stars.shape[0], np.max(n))) + np.inf - # fill the observation time array - for i, nstar in enumerate(stars): - temp = self.obs_array_star(nstar=nstar) - obs[i, :temp.shape[0]] = temp + if self.data.options.optimization['characterization']: + obs = np.zeros((stars.shape[0], 2, np.max(n))) + np.inf + # fill the observation time array + for i, nstar in enumerate(stars): + temp = self.obs_array_star(nstar=nstar) + obs[i, :, :temp.shape[1]] = temp + else: + obs = np.zeros((stars.shape[0], np.max(n))) + np.inf + # fill the observation time array + for i, nstar in enumerate(stars): + temp = self.obs_array_star(nstar=nstar) + obs[i, :temp.shape[0]] = temp + + if self.data.options.optimization['opt_limit'] == 'time': + run_bool = self.tot_time < obs_time + elif self.data.options.optimization['opt_limit'] == 'experiments': + run_bool = not all(self.data.optm['hit_limit'].values()) + else: + raise ValueError('Optimization limit not recognized.') \ No newline at end of file diff --git a/lifesim/optimize/ahgs_char.py b/lifesim/optimize/ahgs_char.py new file mode 100644 index 0000000..dcb51fc --- /dev/null +++ b/lifesim/optimize/ahgs_char.py @@ -0,0 +1,170 @@ +import numpy as np + +from lifesim.core.modules import SlopeModule + +class AhgsCharModule(SlopeModule): + def __init__(self, + name: str): + super().__init__(name=name) + self.t_series = [] + self.yield_series = {'A': [], + 'F': [], + 'G': [], + 'K': [], + 'M': []} + self.id_series = [] + + def obs_array_star(self, nstar): + mask = self.data.catalog.nstar == nstar + + # return infinity if the detection limit is reached for this stype + if not bool(np.isin(element=self.data.catalog.stype.loc[mask].iloc[0], + test_elements=np.array(list(self.data.options.optimization['limit'].keys()))[np.invert( + self.data.optm['hit_limit'])])): + return np.array((np.inf, np.inf)) + else: + mask = np.logical_and.reduce((mask, + self.data.catalog.habitable, + np.invert(self.data.catalog.detected))) + + obs = (60 * 60 * + (self.data.options.optimization['snr_target'] ** 2 + - self.data.catalog['snr_current'].loc[mask] ** 2) + / self.data.catalog.snr_1h.loc[mask] ** 2) + + obs_char = (60 * 60 * + self.data.options.optimization['snr_char'] ** 2 + / self.data.catalog.maxsep_snr_1h.loc[mask] ** 2) + + met = np.stack((obs, obs_char), axis=0) + met = met[:, np.argsort(met[0])] + met[1] = np.cumsum(met[1]) + met[0] -= self.data.catalog.t_slew.loc[mask] + met[1] += met[0] + met = met / np.arange(1, np.count_nonzero(mask) + 1, 1)[np.newaxis, :] + + return met + + def observe_star(self, + nstar, + int_time): + mask = self.data.catalog.nstar == nstar + + self.data.optm['tot_time'] += int_time + + slew_time = self.data.catalog.loc[mask, 't_slew'].iloc[0] + if not (slew_time == 0): + if (slew_time + int_time) < 0: + self.data.catalog.loc[mask, 't_slew'] += int_time + int_actual = 0 + else: + self.data.catalog.loc[mask, 't_slew'] = 0 + self.data.catalog.loc[mask, 'int_time'] += (slew_time + int_time) + int_actual = slew_time + int_time + else: + self.data.catalog.loc[mask, 'int_time'] += int_time + int_actual = int_time + + self.data.catalog.loc[mask, 'int_time_actual'] += int_actual + + self.data.catalog.loc[mask, 'snr_current'] = np.sqrt( + self.data.catalog.loc[mask, 'snr_current'] ** 2 + + (self.data.catalog.loc[mask, 'snr_1h'] + * np.sqrt(int_actual + / (60 * 60)))**2) + + # self.data.catalog.loc[mask, 'snr_char_current'] = np.sqrt( + # self.data.catalog.loc[mask, 'snr_char_current'] ** 2 + # + (self.data.catalog.loc[mask, 'maxsep_snr_1h'] + # * np.sqrt(int_char_time + # / (60 * 60))) ** 2) + + det_ids = [] + + for _, i in enumerate(np.where(mask)[0]): + if (not self.data.catalog.detected.iloc[i]) and \ + (self.data.catalog.snr_current.iloc[i] + >= self.data.options.optimization['snr_target']): + self.data.catalog.detected.iat[i] = True + det_ids.append(self.data.catalog.id.iat[i]) + if self.data.catalog.habitable.iloc[i]: + self.data.optm['sum_detected'][ + np.where(np.array(list(self.data.options.optimization['limit'].keys())) + == self.data.catalog.stype.iloc[i])] += 1 + self.id_series.append(det_ids) + + def distribute_time(self): + if not self.data.options.optimization['habitable']: + raise ValueError('Characterization optimization only implemented for habitable planets.') + stars, n = np.unique(ar=self.data.catalog.nstar, + return_counts=True) + obs = np.zeros((stars.shape[0], 2, np.max(n))) + np.inf + + # fill the observation time array + for i, nstar in enumerate(stars): + temp = self.obs_array_star(nstar=nstar) + obs[i, :, :temp.shape[1]] = temp + + obs_time = (self.data.options.optimization['t_search'] + * self.data.options.array['t_efficiency']) + + tot_time = 0 + if self.data.options.optimization['verbose']: + print('Number of planets detected by stellar type:') + + while tot_time < obs_time: + # find the best global slope and observe star + no_star, ind_t = np.unravel_index(np.argmin(obs[:, 1, :]), obs[:, 1, :].shape) + if (tot_time + obs[no_star, 0, ind_t] * (ind_t + 1) + 0.01) > obs_time: + rem_time = obs_time - tot_time + self.observe_star(nstar=stars[no_star], + int_time=rem_time) + tot_time += rem_time + + else: + if ((obs[no_star, 1, ind_t] - obs[no_star, 0, ind_t]) * (ind_t + 1) + 0.01) < 0: + raise ValueError('Negative time difference encountered.') + self.observe_star(nstar=stars[no_star], + int_time=obs[no_star, 0, ind_t] * (ind_t + 1) + 0.01) + temp = self.obs_array_star(nstar=stars[no_star]) + tot_time += obs[no_star, 0, ind_t] * (ind_t + 1) + 0.01 + obs[no_star, :, :] = np.inf + obs[no_star, :, :temp.shape[1]] = temp + + out_string = '' + for key in self.data.options.optimization['limit'].keys(): + temp_yield = (self.data.optm['sum_detected'] / self.data.optm['num_universe'])[ + np.where(np.array(list(self.data.options.optimization['limit'].keys())) + == key)][0] + out_string += (key + ': ' + + str(temp_yield) + + ' ') + self.yield_series[key].append(temp_yield) + + out_string += ('- (' + str(np.round(tot_time/60/60/24/365.25, 1)) + ' / ' + + str(np.round(obs_time/60/60/24/365.25, 1)) + ') yrs observed') + + self.t_series.append(tot_time) + + if self.data.options.optimization['verbose']: + print('\r' + out_string, end='') + + if np.any( + np.logical_and( + (self.data.optm['sum_detected'] / self.data.optm['num_universe']) + > np.array(list(self.data.options.optimization['limit'].values())), + np.invert(self.data.optm['hit_limit']))): + if self.data.options.optimization['verbose']: + print('\n') + print('HIT LIMIT, RECOUNTING -------------------') + self.data.optm['hit_limit'] = ((self.data.optm['sum_detected'] + / (self.data.optm['num_universe'])) + >= np.array( + list(self.data.options.optimization['limit'].values()) + )) + obs = np.zeros((stars.shape[0], 2, np.max(n))) + np.inf + + # fill the observation time array + for i, nstar in enumerate(stars): + temp = self.obs_array_star(nstar=nstar) + obs[i, :, :temp.shape[1]] = temp diff --git a/lifesim/optimize/optimizer.py b/lifesim/optimize/optimizer.py index ef47539..7ca77da 100644 --- a/lifesim/optimize/optimizer.py +++ b/lifesim/optimize/optimizer.py @@ -98,6 +98,7 @@ def ahgs(self): self.data.catalog['is_interesting'] = False self.data.optm['hit_limit'] = {} self.data.optm['exp_detected'] = {} + self.data.optm['exp_detected_uni'] = {} for exp in self.data.options.optimization['experiments'].keys(): mask_exp = ((self.data.catalog.radius_p >= self.data.options.optimization['experiments'][exp]['radius_p_min']) @@ -118,6 +119,22 @@ def ahgs(self): self.data.optm['hit_limit'][exp] = False self.data.optm['exp_detected'][exp] = 0 + self.data.optm['exp_detected_uni'][exp] = np.zeros((2, + len(np.unique(self.data.catalog.nuniverse)))) + self.data.optm['exp_detected_uni'][exp][0, :] = np.unique(self.data.catalog.nuniverse, + return_counts=False) + + if (self.data.options.optimization['characterization']) and ('maxsep_snr_1h' not in self.data.catalog.columns): + raise ValueError('Characterization optimization selected but catalog does not contain ' + 'maxsep_snr_1h column.') + + if ((self.data.options.optimization['opt_limit'] == 'experiments') + and not all(np.isfinite([exp['sample_size'] + for exp in self.data.options.optimization['experiments'].values()]))): + raise ValueError('Optimization limit set to experiments but no finite limits given.') + + # self.data.optm['uni_counts'] = np.zeros((2, len(np.unique(self.data.catalog.nuniverse)))) + # self.data.optm['uni_counts'][0, :] = np.unique(self.data.catalog.nuniverse, return_counts=False) self.data.optm['tot_time'] = 0 # in sec diff --git a/lifesim/util/options.py b/lifesim/util/options.py index 0a8607c..8ab057f 100644 --- a/lifesim/util/options.py +++ b/lifesim/util/options.py @@ -86,7 +86,12 @@ def __init__(self): self.optimization = {'N_pf': 0., 'snr_target': 0., 'experiments': None, - 't_search': 0.} + 't_search': 0., + 'characterization': False, + 'snr_char': 0., + 'opt_limit': 'time', + 'opt_limit_factor': 0.5, + 'n_orbits': 1} def set_scenario(self, case: str): @@ -121,6 +126,8 @@ def set_scenario(self, self.optimization['N_pf'] = 25 self.optimization['snr_target'] = 7 + self.optimization['snr_char'] = 44.1 + self.optimization['n_orbits'] = 5 self.optimization['limit'] = {'F': np.inf, 'G': np.inf, 'K': np.inf, From 4459e9a9bc5dd9e262c9a24ca380e70058c47dfd Mon Sep 17 00:00:00 2001 From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com> Date: Fri, 12 Dec 2025 09:34:18 +0100 Subject: [PATCH 14/27] included failsafe if there are insufficient targets to observe --- lifesim/analysis/yield_wrapper.py | 4 ++-- lifesim/optimize/ahgs.py | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py index 7804244..e9a9a3c 100644 --- a/lifesim/analysis/yield_wrapper.py +++ b/lifesim/analysis/yield_wrapper.py @@ -141,14 +141,14 @@ def run_aperture_sweep_snr(self, self._compute_snrs(output_path=f'{output_directory}/', output_filename='sweep_diam_' + str(np.round(diameter, 2)).replace('.', '_'), run_maxsep=False, - diameter=diameter) + diameter=float(diameter)) print('[Done]') print('Commencing maxsep run... ') self._compute_snrs(output_path=f'{output_directory}/', output_filename='sweep_diam_' + str(np.round(diameter, 2)).replace('.', '_') + '_maxsep', run_maxsep=True, - diameter=diameter) + diameter=float(diameter)) print('[Done]') def run_optimizer_sweep(self, diff --git a/lifesim/optimize/ahgs.py b/lifesim/optimize/ahgs.py index 880c031..556e354 100644 --- a/lifesim/optimize/ahgs.py +++ b/lifesim/optimize/ahgs.py @@ -127,6 +127,9 @@ def distribute_time(self): if self.data.options.optimization['characterization']: # find the best global slope and observe star no_star, ind_t = np.unravel_index(np.argmin(obs[:, 1, :]), obs[:, 1, :].shape) + if not np.isfinite(obs[no_star, 1, ind_t]): + print('Not sufficient targets remaining to continue characterization optimization.') + break if (((self.tot_time + obs[no_star, 0, ind_t] * (ind_t + 1) + 0.01) > obs_time) and (self.data.options.optimization['opt_limit'] == 'time')): rem_time = obs_time - self.tot_time From b3d0829a3e13e0a4f1544015b0b744aa90429143 Mon Sep 17 00:00:00 2001 From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com> Date: Fri, 12 Dec 2025 11:27:44 +0100 Subject: [PATCH 15/27] changed logic for completion fraction in optimization runs. --- lifesim/analysis/yield_wrapper.py | 57 +++++++++++++++++++++++++------ 1 file changed, 46 insertions(+), 11 deletions(-) diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py index e9a9a3c..6cab646 100644 --- a/lifesim/analysis/yield_wrapper.py +++ b/lifesim/analysis/yield_wrapper.py @@ -4,6 +4,7 @@ import contextlib from copy import deepcopy import json +from typing import Union import numpy as np from joblib import Parallel, delayed, parallel_config @@ -154,7 +155,8 @@ def run_aperture_sweep_snr(self, def run_optimizer_sweep(self, run_name, source_name, - characterization: bool = False): + characterization: bool = False, + opt_limit_factor: Union[None, float] = None): source_path = os.path.join(self.output_path, source_name) # get the names of all subdirectories in output_path (which contain subdirectories for different mirror diameters) subdirs = [d for d in os.listdir(source_path) if os.path.isdir(os.path.join(source_path, d))] @@ -184,7 +186,8 @@ def run_optimizer_sweep(self, output_path=output_directory + '/', catalog_path=os.path.join(source_path, subdir, catalog_file + '_catalog.hdf5'), config_path=self.config_path, - characterization=characterization + characterization=characterization, + opt_limit_factor=opt_limit_factor ) else: run_configs.append({'output_filename': catalog_file, @@ -205,7 +208,8 @@ def run_optimizer_sweep(self, output_path=rc['output_path'], catalog_path=rc['catalog_path'], config_path=self.config_path, - characterization=characterization + characterization=characterization, + opt_limit_factor=opt_limit_factor ) for rc in run_configs) @@ -275,19 +279,39 @@ def get_mission_time(self, bus.build_from_config(filename=config_path) bus.data.import_catalog(input_path=catalog_path) + # collect the experiments + exps = [col[4:] for col in bus.data.catalog.columns if col.startswith('exp_')] + + # recalculate the interesting flag + bus.data.catalog['is_interesting'] = False + for exp in exps: + mask_exp = ((bus.data.catalog.radius_p + >= bus.data.options.optimization['experiments'][exp]['radius_p_min']) + & (bus.data.catalog.radius_p + <= bus.data.options.optimization['experiments'][exp]['radius_p_max']) + & (bus.data.catalog.temp_s + >= bus.data.options.optimization['experiments'][exp]['temp_s_min']) + & (bus.data.catalog.temp_s + <= bus.data.options.optimization['experiments'][exp]['temp_s_max'])) + + if bus.data.options.optimization['experiments'][exp]['in_HZ']: + mask_exp = (mask_exp + & (bus.data.catalog['habitable'])) + + bus.data.catalog['exp_' + exp] = mask_exp + + bus.data.catalog['is_interesting'] = np.logical_or(mask_exp, bus.data.catalog['is_interesting']) + cat_det = bus.data.catalog[np.logical_and( bus.data.catalog.is_interesting, bus.data.catalog.detected )].sort_values('t_detected') - # collect the experiments - exps = [col[4:] for col in bus.data.catalog.columns if col.startswith('exp_')] - # set up the time sheet that records the mission time per universe and per experiment time_sheet = {} for exp in exps: time_sheet[exp] = pd.DataFrame(index=np.unique(cat_det.nuniverse), - columns=['detection', 'orbit', 'characterization', 'total']) + columns=['detection', 'orbit', 'characterization', 'total', 'number']) t_det = np.zeros((2, len(np.unique(cat_det.nuniverse)))) t_det[0, :] = np.unique(cat_det.nuniverse) @@ -296,11 +320,18 @@ def get_mission_time(self, temp_t_dets = [] for exp in exps: det_stream = cat_det[np.logical_and(cat_det.nuniverse == nu, cat_det['exp_' + exp])].t_detected - temp_t_det = det_stream.iloc[bus.data.options.optimization['experiments'][exp]['sample_size']] \ - if len(det_stream) > 0 else np.nan + if len(det_stream) == 0: + temp_t_det = np.nan + time_sheet[exp].loc[nu, 'number'] = 0 + elif len(det_stream) < bus.data.options.optimization['experiments'][exp]['sample_size']: + temp_t_det = det_stream.iloc[-1] + time_sheet[exp].loc[nu, 'number'] = len(det_stream) + else: + temp_t_det = det_stream.iloc[bus.data.options.optimization['experiments'][exp]['sample_size']-1] + time_sheet[exp].loc[nu, 'number'] = bus.data.options.optimization['experiments'][exp]['sample_size'] temp_t_dets.append(temp_t_det) time_sheet[exp].loc[nu, 'detection'] = temp_t_det - t_det[1, t_det[0, :] == nu] = np.max(temp_t_dets) + t_det[1, t_det[0, :] == nu] = np.nanmax(temp_t_dets) # 2. identify the follow_up targets for each universe cat_det.sort_values('maxsep_snr_1h', ascending=False, inplace=True) @@ -497,7 +528,8 @@ def compute_yields_mp(output_filename, config_path, uni_sel=None, return_yields=False, - characterization: bool = False): + characterization: bool = False, + opt_limit_factor: Union[None, float] = None): t = time.time() # create bus @@ -507,6 +539,9 @@ def compute_yields_mp(output_filename, bus.build_from_config(filename=config_path) bus.data.options.set_manual(n_cpu=1) # speed up calculation + if opt_limit_factor is not None: + bus.data.options.optimization['opt_limit_factor'] = opt_limit_factor + bus.data.options.set_manual(output_path=output_path) bus.data.options.set_manual(output_filename=output_filename) bus.data.options.optimization['characterization'] = characterization From 655c91677f1aa4601ff7c791907067f7cf599d42 Mon Sep 17 00:00:00 2001 From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com> Date: Fri, 12 Dec 2025 17:08:35 +0100 Subject: [PATCH 16/27] small fix to make sure that planets occurring in multiple experiments are not counted double --- lifesim/analysis/yield_wrapper.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py index 6cab646..4d305ca 100644 --- a/lifesim/analysis/yield_wrapper.py +++ b/lifesim/analysis/yield_wrapper.py @@ -307,6 +307,8 @@ def get_mission_time(self, bus.data.catalog.detected )].sort_values('t_detected') + cat_det['char_done'] = False + # set up the time sheet that records the mission time per universe and per experiment time_sheet = {} for exp in exps: @@ -348,7 +350,8 @@ def get_mission_time(self, for nu in np.unique(cat_det.nuniverse): for exp in exps: mask_followup = np.logical_and.reduce( - (cat_det.nuniverse == nu, cat_det['exp_' + exp], cat_det.follow_up)) + (cat_det.nuniverse == nu, cat_det['exp_' + exp], cat_det.follow_up, ~cat_det.char_done + )) time_sheet[exp].loc[nu, 'orbit'] = (((bus.data.options.optimization['snr_target'] / cat_det[mask_followup].maxsep_snr_1h) ** 2 * (bus.data.options.optimization['n_orbits'] - 1) * 60 * 60).sum() @@ -359,6 +362,7 @@ def get_mission_time(self, mask_followup].maxsep_snr_1h) ** 2 * 60 * 60).sum() + np.sum(mask_followup) * bus.data.options.array['t_slew']) + cat_det.loc[mask_followup, 'char_done'] = True # 4. get total time for exp in exps: From ead1a81e015329705f57027d2cf956fd7e10023f Mon Sep 17 00:00:00 2001 From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com> Date: Fri, 12 Dec 2025 18:06:13 +0100 Subject: [PATCH 17/27] number of detections were computed wrong --- lifesim/analysis/yield_wrapper.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py index 4d305ca..a4d5cb5 100644 --- a/lifesim/analysis/yield_wrapper.py +++ b/lifesim/analysis/yield_wrapper.py @@ -324,16 +324,16 @@ def get_mission_time(self, det_stream = cat_det[np.logical_and(cat_det.nuniverse == nu, cat_det['exp_' + exp])].t_detected if len(det_stream) == 0: temp_t_det = np.nan - time_sheet[exp].loc[nu, 'number'] = 0 elif len(det_stream) < bus.data.options.optimization['experiments'][exp]['sample_size']: temp_t_det = det_stream.iloc[-1] - time_sheet[exp].loc[nu, 'number'] = len(det_stream) else: temp_t_det = det_stream.iloc[bus.data.options.optimization['experiments'][exp]['sample_size']-1] - time_sheet[exp].loc[nu, 'number'] = bus.data.options.optimization['experiments'][exp]['sample_size'] temp_t_dets.append(temp_t_det) time_sheet[exp].loc[nu, 'detection'] = temp_t_det t_det[1, t_det[0, :] == nu] = np.nanmax(temp_t_dets) + for exp in exps: + det_stream = cat_det[np.logical_and(cat_det.nuniverse == nu, cat_det['exp_' + exp])].t_detected + time_sheet[exp].loc[nu, 'number'] = det_stream <= t_det[1, t_det[0, :] == nu] # 2. identify the follow_up targets for each universe cat_det.sort_values('maxsep_snr_1h', ascending=False, inplace=True) From e3899a1628779a6ebc6b9ee6ff2e4e8119c81dd0 Mon Sep 17 00:00:00 2001 From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com> Date: Mon, 15 Dec 2025 09:22:23 +0100 Subject: [PATCH 18/27] minor bugfix; sum missing --- lifesim/analysis/yield_wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py index a4d5cb5..75f19c5 100644 --- a/lifesim/analysis/yield_wrapper.py +++ b/lifesim/analysis/yield_wrapper.py @@ -333,7 +333,7 @@ def get_mission_time(self, t_det[1, t_det[0, :] == nu] = np.nanmax(temp_t_dets) for exp in exps: det_stream = cat_det[np.logical_and(cat_det.nuniverse == nu, cat_det['exp_' + exp])].t_detected - time_sheet[exp].loc[nu, 'number'] = det_stream <= t_det[1, t_det[0, :] == nu] + time_sheet[exp].loc[nu, 'number'] = np.sum(det_stream <= float(t_det[1, t_det[0, :] == nu])) # 2. identify the follow_up targets for each universe cat_det.sort_values('maxsep_snr_1h', ascending=False, inplace=True) From a6d0510acea0d9180c0e914a5e86bd480a4f41ee Mon Sep 17 00:00:00 2001 From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com> Date: Tue, 16 Dec 2025 11:21:38 +0100 Subject: [PATCH 19/27] added processing code for the diameter sweeps --- lifesim/analysis/yield_wrapper.py | 273 +++++++++++++++++++++++++----- lifesim/optimize/ahgs.py | 2 +- 2 files changed, 227 insertions(+), 48 deletions(-) diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py index 75f19c5..e09de5d 100644 --- a/lifesim/analysis/yield_wrapper.py +++ b/lifesim/analysis/yield_wrapper.py @@ -307,67 +307,76 @@ def get_mission_time(self, bus.data.catalog.detected )].sort_values('t_detected') - cat_det['char_done'] = False + cat_det['follow_up'] = False + cat_det['t_orbit'] = 0. + cat_det['t_char'] = 0. + + bus.data.catalog['follow_up'] = False # set up the time sheet that records the mission time per universe and per experiment - time_sheet = {} + columns = ['detection', 'orbit', 'characterization', 'total'] for exp in exps: - time_sheet[exp] = pd.DataFrame(index=np.unique(cat_det.nuniverse), - columns=['detection', 'orbit', 'characterization', 'total', 'number']) - t_det = np.zeros((2, len(np.unique(cat_det.nuniverse)))) - t_det[0, :] = np.unique(cat_det.nuniverse) + columns.append('n_' + exp) + time_sheet = pd.DataFrame(index=np.unique(cat_det.nuniverse), + columns=columns) # 1. get total time for detection campaign from every universe - for nu in np.unique(cat_det.nuniverse): - temp_t_dets = [] - for exp in exps: - det_stream = cat_det[np.logical_and(cat_det.nuniverse == nu, cat_det['exp_' + exp])].t_detected - if len(det_stream) == 0: - temp_t_det = np.nan - elif len(det_stream) < bus.data.options.optimization['experiments'][exp]['sample_size']: - temp_t_det = det_stream.iloc[-1] - else: - temp_t_det = det_stream.iloc[bus.data.options.optimization['experiments'][exp]['sample_size']-1] - temp_t_dets.append(temp_t_det) - time_sheet[exp].loc[nu, 'detection'] = temp_t_det - t_det[1, t_det[0, :] == nu] = np.nanmax(temp_t_dets) - for exp in exps: - det_stream = cat_det[np.logical_and(cat_det.nuniverse == nu, cat_det['exp_' + exp])].t_detected - time_sheet[exp].loc[nu, 'number'] = np.sum(det_stream <= float(t_det[1, t_det[0, :] == nu])) + t_det = bus.data.catalog.t_detected.max() + time_sheet['detection'] = t_det # 2. identify the follow_up targets for each universe cat_det.sort_values('maxsep_snr_1h', ascending=False, inplace=True) - cat_det['follow_up'] = False for nu in np.unique(cat_det.nuniverse): for exp in exps: - mask_det = np.logical_and.reduce((cat_det.nuniverse == nu, cat_det['exp_' + exp], - cat_det.t_detected <= t_det[1, t_det[0, :] == nu][0])) + mask_det = np.logical_and.reduce((cat_det.nuniverse == nu, + cat_det['exp_' + exp], + cat_det.t_detected <= t_det)) # only set the top N targets to follow up where N is the sample size for the experiment mask_det_indices = cat_det[mask_det].index[:bus.data.options.optimization['experiments'][exp]['sample_size']] cat_det.loc[mask_det_indices, 'follow_up'] = True + time_sheet.loc[nu, 'n_' + exp] = len(mask_det_indices) # 3. calculate follow-up time for orbit and characterization + cat_det.loc[cat_det.follow_up, 't_orbit'] = ( + (((bus.data.options.optimization['snr_target'] / cat_det[cat_det.follow_up].maxsep_snr_1h) ** 2) + * 60 * 60 + + bus.data.options.array['t_slew']) + * (bus.data.options.optimization['n_orbits'] - 1) + ) + + cat_det.loc[cat_det.follow_up, 't_char'] = ( + (((bus.data.options.optimization['snr_char'] / cat_det[cat_det.follow_up].maxsep_snr_1h) ** 2) + * 60 * 60 + + bus.data.options.array['t_slew']) + ) + for nu in np.unique(cat_det.nuniverse): - for exp in exps: - mask_followup = np.logical_and.reduce( - (cat_det.nuniverse == nu, cat_det['exp_' + exp], cat_det.follow_up, ~cat_det.char_done - )) - time_sheet[exp].loc[nu, 'orbit'] = (((bus.data.options.optimization['snr_target'] - / cat_det[mask_followup].maxsep_snr_1h) ** 2 * - (bus.data.options.optimization['n_orbits'] - 1) * 60 * 60).sum() - + np.sum(mask_followup) - * (bus.data.options.optimization['n_orbits'] - 1) - * bus.data.options.array['t_slew']) - time_sheet[exp].loc[nu, 'characterization'] = (((bus.data.options.optimization['snr_char'] / cat_det[ - mask_followup].maxsep_snr_1h) ** 2 * 60 * 60).sum() - + np.sum(mask_followup) - * bus.data.options.array['t_slew']) - cat_det.loc[mask_followup, 'char_done'] = True + mask_followup = np.logical_and.reduce( + (cat_det.nuniverse == nu, cat_det.follow_up, + )) + time_sheet.loc[nu, 'orbit'] = cat_det.loc[mask_followup, 't_orbit'].sum() + time_sheet.loc[nu, 'characterization'] = cat_det.loc[mask_followup, 't_char'].sum() # 4. get total time - for exp in exps: - time_sheet[exp]['total'] = time_sheet[exp]['detection'] + time_sheet[exp]['orbit'] + time_sheet[exp][ - 'characterization'] + time_sheet['total'] = time_sheet['detection'] + time_sheet['orbit'] + time_sheet['characterization'] + + # 5. copy to original catalog + cols = ['follow_up', 't_orbit', 't_char'] + mapping_df = cat_det.set_index('id')[cols] + + for col, fill_value, out_type in [ + ('follow_up', False, bool), + ('t_orbit', 0.0, float), + ('t_char', 0.0, float), + ]: + # map and infer object dtypes first + s = bus.data.catalog['id'].map(mapping_df[col]).infer_objects(copy=False) + # replace missing values without using .fillna + s_filled = s.where(s.notna(), other=fill_value) + # assign with desired type + bus.data.catalog[col] = s_filled.astype(out_type) + + bus.save() return time_sheet @@ -397,14 +406,184 @@ def sweep_mission_time(self, config_path=config_path) # save time_sheet to csv files, one per experiment - for exp in time_sheet.keys(): - output_csv_path = os.path.join(source_path, subdir, run_name + '_' + exp + '_mission_time.csv') - time_sheet[exp].to_csv(output_csv_path) - print(' Saved mission time for experiment', exp, 'to', output_csv_path) + output_csv_path = os.path.join(source_path, subdir, run_name + '_mission_time.csv') + time_sheet.to_csv(output_csv_path) + print(' Saved mission time to ', output_csv_path) print(' Done processing run:', run_name, '- took', round(time.time() - t_run, 2), 's') + def process_mission_time(self, + source_name): + # Start logging for this processing run (style consistent with other functions) + print('START OF process_mission_time: ', time.ctime()) + source_path = os.path.join(self.output_path, source_name) + print('SOURCE NAME: ', source_name) + print('SOURCE PATH: ', source_path) + t_all = time.time() + + subdirs = [d for d in os.listdir(source_path) if os.path.isdir(os.path.join(source_path, d))] + diams_float = [float('.'.join(d.split('_')[1:])) for d in subdirs] + diams = ['_'.join(d.split('_')[1:]) for d in subdirs] + + # -- 1. CREATE TYPETABLE -- + timetable = {} + + for subdir, d in zip(subdirs, diams): + csv_file = [f for f in os.listdir(os.path.join(source_path, subdir)) + if f.endswith('.csv')] + if len(csv_file) != 1: + raise ValueError('More than one csv file found in subdir ' + subdir) + timetable[d] = pd.read_csv(os.path.join(source_path, subdir, csv_file[0])) + + exps = [c.split('n_')[1] for c in timetable[diams[0]].columns if c.startswith('n_')] + + typetable = pd.DataFrame(columns=['detection', 'orbit', 'char', 'total'], index=diams_float) + + for d, df in zip(diams, diams_float): + typetable.loc[df, 'detection'] = np.max(timetable[d]['detection']) * 1.25 + typetable.loc[df, 'orbit'] = np.mean(timetable[d]['orbit']) * 1.25 + typetable.loc[df, 'char'] = np.mean(timetable[d]['characterization']) * 1.25 + typetable.loc[df, 'total'] = typetable.loc[df, 'detection'] + typetable.loc[df, 'orbit'] + typetable.loc[ + df, 'char'] + + typetable.sort_index(inplace=True, ascending=True) + + # -- 2. PLOT TYPETABLE -- + # fill under stepped line + columns = ['detection', 'orbit', 'char'] + columns_label = ['Detection', 'Orbit', 'Characterization'] + x = np.asarray(typetable.index, dtype=float) + y0 = np.zeros_like(x, dtype=float) + + colors = ['#2066a8', '#3594cc', '#8cc5e3'] + + fig, ax = plt.subplots() + + for i in range(len(columns)): + y = typetable[columns[i]].to_numpy(dtype=float) / (365.25 * 24 * 60 * 60) + # ax.step(x, y+y0, label=columns[i], where='mid', color=colors[i]) + ax.fill_between(x, y0, y + y0, step='mid', alpha=1, color=colors[i], label=columns_label[i], edgecolor=None) + + y0 += typetable[columns[i]].to_numpy(dtype=float) / (365.25 * 24 * 60 * 60) + + ax.axhline(y=5, color='tab:blue', linestyle='--') + ax.set_xlabel('Mirror Diameter (m)') + ax.set_ylabel('Time (years)') + ax.legend() + type_fig_path = os.path.join(self.output_path, source_name, source_name + '_by_type_time.pdf') + fig.savefig(type_fig_path) + plt.close() + + # Save typetable CSV and report as a single cohesive step + typetable_csv_path = os.path.join(self.output_path, source_name, source_name + '_typetable.csv') + + # -- 3. SAVE TYPETABLE -- + typetable /= (365.25 * 24 * 60 * 60) + typetable.to_csv(typetable_csv_path) + print('--------------------------------------------------') + print('Step 1/2: Typetable generated and saved.') + print(' Figure: ', type_fig_path) + print(' CSV: ', typetable_csv_path) + + # -- 4. CREATE EXPTABLE -- + exptable = {} + + for exp in exps: + exptable[exp] = pd.DataFrame( + columns=['detection', 'orbit', 'orbit-1s', 'orbit+1s', 'char', 'char-1s', 'char+1s', 'total', + 'total-1s', 'total+1s'], index=diams_float) + exptable[exp].sort_index(inplace=True) + for subdir in subdirs: + df = float('.'.join(subdir.split('_')[1:])) + catalog_file = [f for f in os.listdir(os.path.join(source_path, subdir)) + if f.endswith('.hdf5') and 'maxsep' not in f] + if len(catalog_file) != 1: + raise ValueError('More than one catalog file found in subdir ' + subdir) + config_file = [f for f in os.listdir(os.path.join(source_path, subdir)) + if f.endswith('.yaml')] + + bus = lifesim.Bus() + + # loading the config and catalog, make sure that the catalog is already combined with maxsep SNRs + bus.build_from_config( + filename=os.path.join(source_path, subdir, config_file[0])) + bus.data.import_catalog( + input_path=os.path.join(source_path, subdir, catalog_file[0])) + cat = bus.data.catalog + + for exp in exps: + mask = np.logical_and.reduce((cat.detected, cat['exp_' + exp])) + exptable[exp].loc[df, 'detection'] = (np.sum(np.unique(cat[mask].int_time)) + len( + np.unique(cat[mask].int_time)) * bus.data.options.array['t_slew']) * 1.25 + + mask = np.logical_and.reduce((cat.detected, cat['exp_' + exp], cat.follow_up)) + temp_t_orbit = [] + temp_t_char = [] + temp_t_total = [] + for nu in np.unique(cat[mask].nuniverse): + temp_t_orbit.append(np.sum(cat[np.logical_and(mask, cat.nuniverse == nu)].t_orbit)) + temp_t_char.append(np.sum(cat[np.logical_and(mask, cat.nuniverse == nu)].t_char)) + temp_t_total.append(np.sum(cat[np.logical_and(mask, cat.nuniverse == nu)].t_orbit) + np.sum( + cat[np.logical_and(mask, cat.nuniverse == nu)].t_char) + exptable[exp].loc[ + df, 'detection'] * 0.8) + exptable[exp].loc[df, 'orbit'] = np.mean(temp_t_orbit) * 1.25 + exptable[exp].loc[df, 'orbit+1s'] = np.quantile(temp_t_orbit, 0.841) * 1.25 + exptable[exp].loc[df, 'orbit-1s'] = np.quantile(temp_t_orbit, 0.159) * 1.25 + exptable[exp].loc[df, 'char'] = np.mean(temp_t_char) * 1.25 + exptable[exp].loc[df, 'char+1s'] = np.quantile(temp_t_char, 0.841) * 1.25 + exptable[exp].loc[df, 'char-1s'] = np.quantile(temp_t_char, 0.159) * 1.25 + exptable[exp].loc[df, 'total'] = np.mean(temp_t_total) * 1.25 + exptable[exp].loc[df, 'total+1s'] = np.quantile(temp_t_total, 0.841) * 1.25 + exptable[exp].loc[df, 'total-1s'] = np.quantile(temp_t_total, 0.159) * 1.25 + + del bus + del cat + + # -- 5. PLOT EXPTABLE -- + colors = [['#2066a8', '#3594cc', '#8cc5e3'], + ['#a00000', '#c46666', '#d8a6a6'], + ['#1f6f6f', '#54a1a1', '#9fc8c8'], ] + + columns = ['detection', 'orbit', 'char'] + x = np.asarray(exptable[exps[0]].index, dtype=float) + y0 = np.zeros_like(x, dtype=float) + + fig, ax = plt.subplots() + + for j, exp in enumerate(exps): + for i in range(len(columns)): + y = exptable[exp][columns[i]].to_numpy(dtype=float) / (365.25 * 24 * 60 * 60) + # ax.step(x, y+y0, label=columns[i], where='mid', color=colors[i]) + if i == 0: + ax.fill_between(x, y0, y + y0, step='mid', alpha=1, color=colors[j][i], edgecolor=None, label=exp) + else: + ax.fill_between(x, y0, y + y0, step='mid', alpha=1, color=colors[j][i], edgecolor=None) + y0 += y + + ax.set_xlabel('Mirror Diameter (m)') + ax.set_ylabel('Time (years)') + ax.axhline(y=5, color='tab:blue', linestyle='--') + ax.legend() + + exptable_fig_path = os.path.join(self.output_path, source_name, source_name + '_by_exp_time.pdf') + fig.savefig(exptable_fig_path) + plt.close() + + # -- 6. SAVE EXPTABLE -- + for exp in exps: + exptable[exp] /= (365.25 * 24 * 60 * 60) + exptable[exp].to_csv(os.path.join(self.output_path, source_name, source_name + '_' + exp + '_exptable.csv')) + + print('--------------------------------------------------') + print('Step 2/2: Exptable plotted and CSVs saved.') + print(' Figure: ', exptable_fig_path) + print(' CSVs: ', os.path.join(self.output_path, source_name)) + + # Final log with elapsed time + print('ALL process_mission_time finished. Total time:', round(time.time() - t_all, 2), 's') + + def run_covergence_test(self, run_name, catalog_path, diff --git a/lifesim/optimize/ahgs.py b/lifesim/optimize/ahgs.py index 556e354..6607747 100644 --- a/lifesim/optimize/ahgs.py +++ b/lifesim/optimize/ahgs.py @@ -86,7 +86,7 @@ def observe_star(self, (self.data.catalog.loc[idx, 'snr_current'] >= self.data.options.optimization['snr_target']): self.data.catalog.loc[idx, 'detected'] = True - self.data.catalog.loc[idx, 't_detected'] = deepcopy(self.tot_time) + self.data.catalog.loc[idx, 't_detected'] = deepcopy(self.tot_time + int_time) exp_cols = [col for col in self.data.catalog.columns if col.startswith('exp_')] true_experiments = [col[4:] for col in exp_cols if self.data.catalog.loc[idx, col]] for exp in true_experiments: From 1daaeb301838c8d2592caea944d91a7cc96719e7 Mon Sep 17 00:00:00 2001 From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com> Date: Tue, 6 Jan 2026 09:25:23 +0100 Subject: [PATCH 20/27] fixed a bug that put too much focus on the charaterization time (by factor of nuniverse) and added logging functionality. --- lifesim/analysis/yield_wrapper.py | 116 ++++++++++++++++++++++++++++++ lifesim/optimize/ahgs.py | 7 +- 2 files changed, 121 insertions(+), 2 deletions(-) diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py index e09de5d..29761a3 100644 --- a/lifesim/analysis/yield_wrapper.py +++ b/lifesim/analysis/yield_wrapper.py @@ -5,6 +5,7 @@ from copy import deepcopy import json from typing import Union +import logging import numpy as np from joblib import Parallel, delayed, parallel_config @@ -28,6 +29,37 @@ def __init__(self, self.n_cpu = n_cpu self.cat_from_ppop = cat_from_ppop + # Create logger that writes to `yield_wrapper_log.txt` in append mode. + os.makedirs(self.output_path, exist_ok=True) + log_file_path = os.path.join(self.output_path, 'yield_wrapper.log') + + self.logger = logging.getLogger('lifesim.yield_wrapper') + self.logger.setLevel(logging.INFO) + + # Avoid duplicating handlers for the same file when multiple instances are created + file_path_abs = os.path.abspath(log_file_path) + existing_handler = None + for h in list(self.logger.handlers): + if isinstance(h, logging.FileHandler) and getattr(h, 'baseFilename', None) == file_path_abs: + existing_handler = h + break + + if existing_handler is None: + fh = logging.FileHandler(log_file_path, mode='a') + fh.setLevel(logging.INFO) + fh.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) + self.logger.addHandler(fh) + self.logger.propagate = False + + # Log instance creation and parameters + self.logger.info('%s', '=' * 80) + self.logger.info('ScienceYield instance created at: %s', time.ctime()) + self.logger.info('Parameters:') + self.logger.info(' config_path: %s', self.config_path) + self.logger.info(' catalog_path: %s', self.catalog_path) + self.logger.info(' output_path: %s', self.output_path) + self.logger.info(' n_cpu: %s', self.n_cpu) + def _compute_snrs(self, output_path, output_filename, @@ -118,6 +150,7 @@ def _compute_snrs(self, def run_aperture_sweep_snr(self, mirror_diameters, run_name): + final_output_path = os.path.join(self.output_path, run_name) if not os.path.exists(final_output_path): os.makedirs(final_output_path) @@ -152,6 +185,27 @@ def run_aperture_sweep_snr(self, diameter=float(diameter)) print('[Done]') + # Append single multi-line log summary for this method + t_end = time.time() + try: + diam_dirs = sorted([d for d in os.listdir(final_output_path) + if os.path.isdir(os.path.join(final_output_path, d)) and d.startswith('diam_')]) + diameters_created = [d.replace('diam_', '').replace('_', '.') for d in diam_dirs] + except Exception: + diam_dirs = [] + diameters_created = [] + msg = f"""run_aperture_sweep_snr summary: + run_name: {run_name} + requested_mirror_diameters: {mirror_diameters} + mirror_directories_found: {diam_dirs} + mirror_diameters_reported: {diameters_created} + final_output_path: {final_output_path} + start_time: {time.ctime(t_end - (t_end - t_end))} # placeholder, exact start not stored in this scope + end_time: {time.ctime(t_end)} + elapsed_seconds: {round(0.0, 2)} # elapsed not measured here to avoid changing existing code flow +""" + self.logger.info(msg) + def run_optimizer_sweep(self, run_name, source_name, @@ -213,6 +267,32 @@ def run_optimizer_sweep(self, ) for rc in run_configs) + # Append single multi-line log summary for this method + t_end = time.time() + # derive counts from filesystem to avoid modifying existing logic + catalog_counts = 0 + catalog_list = {} + for d in subdirs: + files = [f for f in os.listdir(os.path.join(source_path, d)) if f.endswith('_catalog.hdf5') and 'maxsep' not in f] + catalog_counts += len(files) + catalog_list[d] = files + msg = f"""run_optimizer_sweep summary: + run_name: {run_name} + source_name: {source_name} + source_path: {source_path} + n_cpu: {self.n_cpu} + subdirs_found: {subdirs} + total_subdirs_count: {len(subdirs)} + total_catalog_files_count: {catalog_counts} + catalog_files_by_subdir: {catalog_list} + run_configs_queued_parallel: {len(run_configs)} + final_output_path: {final_output_path} + characterization: {characterization} + opt_limit_factor: {opt_limit_factor} + end_time: {time.ctime(t_end)} +""" + self.logger.info(msg) + def combine_catalog_maxsep(self, source_name,): source_path = os.path.join(self.output_path, source_name) @@ -270,6 +350,27 @@ def combine_catalog_maxsep(self, print('ALL combine_catalog_maxsep finished. Total time:', round(time.time() - t_all, 2), 's') + # Append single multi-line log summary for this method + t_end = time.time() + # derive file counts from filesystem (no change to processing logic) + subdirs_list = [d for d in os.listdir(source_path) if os.path.isdir(os.path.join(source_path, d))] + total_base_catalogs = sum(len([f for f in os.listdir(os.path.join(source_path, d)) + if f.endswith('_catalog.hdf5') and 'maxsep' not in f]) for d in subdirs_list) + total_maxsep_catalogs = sum(len([f for f in os.listdir(os.path.join(source_path, d)) + if f.endswith('_maxsep_catalog.hdf5')]) for d in subdirs_list) + msg = f"""combine_catalog_maxsep summary: + source_name: {source_name} + source_path: {source_path} + subdirs_found: {subdirs_list} + subdirs_total: {len(subdirs_list)} + total_base_catalog_files: {total_base_catalogs} + total_maxsep_catalog_files: {total_maxsep_catalogs} + start_time: {time.ctime(t_all)} + end_time: {time.ctime(t_end)} + elapsed_seconds: {round(t_end - t_all, 2)} +""" + self.logger.info(msg) + def get_mission_time(self, catalog_path, config_path): @@ -412,6 +513,21 @@ def sweep_mission_time(self, print(' Done processing run:', run_name, '- took', round(time.time() - t_run, 2), 's') + # Append single multi-line log summary for this method + t_end = time.time() + subdirs_list = [d for d in os.listdir(source_path) if os.path.isdir(os.path.join(source_path, d))] + total_runs = sum(len([f for f in os.listdir(os.path.join(source_path, d)) if f.endswith('_catalog.hdf5')]) for d in subdirs_list) + mission_csvs = sum(len([f for f in os.listdir(os.path.join(source_path, d)) if f.endswith('_mission_time.csv')]) for d in subdirs_list) + msg = f"""sweep_mission_time summary: + source_name: {source_name} + source_path: {source_path} + subdirs_found: {subdirs_list} + subdirs_total: {len(subdirs_list)} + total_runs_catalogs_found: {total_runs} + mission_time_csvs_found: {mission_csvs} + end_time: {time.ctime(t_end)} +""" + self.logger.info(msg) def process_mission_time(self, source_name): diff --git a/lifesim/optimize/ahgs.py b/lifesim/optimize/ahgs.py index 6607747..87ca7b6 100644 --- a/lifesim/optimize/ahgs.py +++ b/lifesim/optimize/ahgs.py @@ -28,10 +28,14 @@ def obs_array_star(self, nstar): - self.data.catalog['snr_current'].loc[mask] ** 2) / self.data.catalog.snr_1h.loc[mask] ** 2) + # included slew time + # needs to be divided by the number of universes for proper optimization, since the characterization happens + # 'per universe' if self.data.options.optimization['characterization']: obs_char = (60 * 60 * self.data.options.optimization['snr_char'] ** 2 - / self.data.catalog.maxsep_snr_1h.loc[mask] ** 2) + / self.data.catalog.maxsep_snr_1h.loc[mask] ** 2 + + self.data.options.array['t_slew']) / self.data.optm['num_universe'] # characterization time is needed for optimization, detection time is needed to understand how much # mission time was used for one observation @@ -42,7 +46,6 @@ def obs_array_star(self, nstar): met[0] -= self.data.catalog.t_slew.loc[mask] met[1] += met[0] met = met / np.arange(1, np.count_nonzero(mask) + 1, 1)[np.newaxis, :] - a=1 return met else: From 1ccb3aa5090cac110e924e0acd72040949c0eab9 Mon Sep 17 00:00:00 2001 From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com> Date: Wed, 7 Jan 2026 09:47:20 +0100 Subject: [PATCH 21/27] adapted convergence test data handling to yield wrapper --- lifesim/analysis/yield_wrapper.py | 88 ++++++++++++++++--------------- 1 file changed, 46 insertions(+), 42 deletions(-) diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py index 29761a3..d0708fc 100644 --- a/lifesim/analysis/yield_wrapper.py +++ b/lifesim/analysis/yield_wrapper.py @@ -195,15 +195,15 @@ def run_aperture_sweep_snr(self, diam_dirs = [] diameters_created = [] msg = f"""run_aperture_sweep_snr summary: - run_name: {run_name} - requested_mirror_diameters: {mirror_diameters} - mirror_directories_found: {diam_dirs} - mirror_diameters_reported: {diameters_created} - final_output_path: {final_output_path} - start_time: {time.ctime(t_end - (t_end - t_end))} # placeholder, exact start not stored in this scope - end_time: {time.ctime(t_end)} - elapsed_seconds: {round(0.0, 2)} # elapsed not measured here to avoid changing existing code flow -""" + run_name: {run_name} + requested_mirror_diameters: {mirror_diameters} + mirror_directories_found: {diam_dirs} + mirror_diameters_reported: {diameters_created} + final_output_path: {final_output_path} + start_time: {time.ctime(t_end - (t_end - t_end))} # placeholder, exact start not stored in this scope + end_time: {time.ctime(t_end)} + elapsed_seconds: {round(0.0, 2)} # elapsed not measured here to avoid changing existing code flow + """ self.logger.info(msg) def run_optimizer_sweep(self, @@ -277,20 +277,20 @@ def run_optimizer_sweep(self, catalog_counts += len(files) catalog_list[d] = files msg = f"""run_optimizer_sweep summary: - run_name: {run_name} - source_name: {source_name} - source_path: {source_path} - n_cpu: {self.n_cpu} - subdirs_found: {subdirs} - total_subdirs_count: {len(subdirs)} - total_catalog_files_count: {catalog_counts} - catalog_files_by_subdir: {catalog_list} - run_configs_queued_parallel: {len(run_configs)} - final_output_path: {final_output_path} - characterization: {characterization} - opt_limit_factor: {opt_limit_factor} - end_time: {time.ctime(t_end)} -""" + run_name: {run_name} + source_name: {source_name} + source_path: {source_path} + n_cpu: {self.n_cpu} + subdirs_found: {subdirs} + total_subdirs_count: {len(subdirs)} + total_catalog_files_count: {catalog_counts} + catalog_files_by_subdir: {catalog_list} + run_configs_queued_parallel: {len(run_configs)} + final_output_path: {final_output_path} + characterization: {characterization} + opt_limit_factor: {opt_limit_factor} + end_time: {time.ctime(t_end)} + """ self.logger.info(msg) def combine_catalog_maxsep(self, @@ -359,16 +359,16 @@ def combine_catalog_maxsep(self, total_maxsep_catalogs = sum(len([f for f in os.listdir(os.path.join(source_path, d)) if f.endswith('_maxsep_catalog.hdf5')]) for d in subdirs_list) msg = f"""combine_catalog_maxsep summary: - source_name: {source_name} - source_path: {source_path} - subdirs_found: {subdirs_list} - subdirs_total: {len(subdirs_list)} - total_base_catalog_files: {total_base_catalogs} - total_maxsep_catalog_files: {total_maxsep_catalogs} - start_time: {time.ctime(t_all)} - end_time: {time.ctime(t_end)} - elapsed_seconds: {round(t_end - t_all, 2)} -""" + source_name: {source_name} + source_path: {source_path} + subdirs_found: {subdirs_list} + subdirs_total: {len(subdirs_list)} + total_base_catalog_files: {total_base_catalogs} + total_maxsep_catalog_files: {total_maxsep_catalogs} + start_time: {time.ctime(t_all)} + end_time: {time.ctime(t_end)} + elapsed_seconds: {round(t_end - t_all, 2)} + """ self.logger.info(msg) def get_mission_time(self, @@ -519,14 +519,14 @@ def sweep_mission_time(self, total_runs = sum(len([f for f in os.listdir(os.path.join(source_path, d)) if f.endswith('_catalog.hdf5')]) for d in subdirs_list) mission_csvs = sum(len([f for f in os.listdir(os.path.join(source_path, d)) if f.endswith('_mission_time.csv')]) for d in subdirs_list) msg = f"""sweep_mission_time summary: - source_name: {source_name} - source_path: {source_path} - subdirs_found: {subdirs_list} - subdirs_total: {len(subdirs_list)} - total_runs_catalogs_found: {total_runs} - mission_time_csvs_found: {mission_csvs} - end_time: {time.ctime(t_end)} -""" + source_name: {source_name} + source_path: {source_path} + subdirs_found: {subdirs_list} + subdirs_total: {len(subdirs_list)} + total_runs_catalogs_found: {total_runs} + mission_time_csvs_found: {mission_csvs} + end_time: {time.ctime(t_end)} + """ self.logger.info(msg) def process_mission_time(self, @@ -703,12 +703,16 @@ def process_mission_time(self, def run_covergence_test(self, run_name, catalog_path, - output_path, min_universes, num_steps, plot=False): + + # check a directory with the name run_name already exists in output_path, otherwise create it + output_path = os.path.join(self.output_path, run_name) if not os.path.exists(output_path): os.makedirs(output_path) + else: + raise ValueError('Directory already exists: ' + output_path) # determine number universes catalog = pd.read_hdf(catalog_path, key='catalog') From ddeb0881f2b70c35b0a741149c64f0d13fdab054 Mon Sep 17 00:00:00 2001 From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com> Date: Wed, 7 Jan 2026 09:52:13 +0100 Subject: [PATCH 22/27] tiny bug in data handling of convergence test --- lifesim/analysis/yield_wrapper.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py index d0708fc..736ecc2 100644 --- a/lifesim/analysis/yield_wrapper.py +++ b/lifesim/analysis/yield_wrapper.py @@ -714,6 +714,8 @@ def run_covergence_test(self, else: raise ValueError('Directory already exists: ' + output_path) + output_path += '/' + # determine number universes catalog = pd.read_hdf(catalog_path, key='catalog') total_universes = np.unique(catalog.nuniverse).shape[0] From 12792ea50bcc731ee26fdec1c6a36bb2073a988d Mon Sep 17 00:00:00 2001 From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com> Date: Fri, 9 Jan 2026 10:42:30 +0100 Subject: [PATCH 23/27] added function to merge HDF catalogs from CSV files with ID remapping and logging --- lifesim/analysis/yield_wrapper.py | 179 ++++++++++++++++++++++++++++++ 1 file changed, 179 insertions(+) diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py index 736ecc2..af651eb 100644 --- a/lifesim/analysis/yield_wrapper.py +++ b/lifesim/analysis/yield_wrapper.py @@ -887,3 +887,182 @@ def compute_yields_mp(output_filename, yields = get_yields(bus=bus, return_yields=return_yields) return int(uni_sel), yields + +def merge_catalogs_from_csv(mapping_csv: str, + merge_csv: str, + output_path: str, + csv_has_header: bool = True, + id_prefixes: tuple = ('1', '2')) -> None: + """Merge pairs of HDF catalogs described by two CSV files. + + Parameters + ---------- + mapping_csv : str + Path to the first CSV that maps short names to actual catalog paths. + The file is expected to have at least two columns: + - column 0: short name for the catalog + - column 1: actual path to the catalog (HDF readable by pandas) + merge_csv : str + Path to the second CSV that lists merges using short names. The file is + expected to have at least three columns: + - column 0: short name for input catalog 1 + - column 1: short name for input catalog 2 + - column 2: output name (a filename or base name). If not ending in + '.hdf5' or '_catalog.hdf5' the function will append '_catalog.hdf5'. + output_path : str + Directory where merged catalogs will be written. Created if necessary. + csv_has_header : bool + Whether the CSV files have header rows (applies to both CSVs). + id_prefixes : tuple + Two string prefixes used to remap ids in catalog1 and catalog2. + + Returns + ------- + None + """ + import datetime + + # sanity checks for csvs and output dir + if not os.path.exists(mapping_csv): + raise FileNotFoundError(f"Mapping CSV file not found: {mapping_csv}") + if not os.path.exists(merge_csv): + raise FileNotFoundError(f"Merge CSV file not found: {merge_csv}") + + os.makedirs(output_path, exist_ok=True) + + header_arg = 0 if csv_has_header else None + + # read mapping csv and build dict short_name -> path + mapping_df = pd.read_csv(mapping_csv, header=header_arg) + if mapping_df.shape[1] < 2: + raise ValueError("Mapping CSV must contain at least two columns: short_name, path") + short_names = mapping_df.iloc[:, 0].astype(str).tolist() + paths = mapping_df.iloc[:, 1].astype(str).tolist() + mapping = {} + for s, p in zip(short_names, paths): + if s in mapping: + # prefer first occurrence but warn + print(f"Warning: duplicate short name '{s}' in mapping CSV; using first occurrence.") + continue + mapping[s] = p + + # read merge csv + merge_df = pd.read_csv(merge_csv, header=header_arg) + if merge_df.shape[1] < 3: + raise ValueError("Merge CSV must contain at least three columns: short1, short2, output_name") + + input1_short = merge_df.iloc[:, 0].astype(str).tolist() + input2_short = merge_df.iloc[:, 1].astype(str).tolist() + output_names = merge_df.iloc[:, 2].astype(str).tolist() + + # helper to read HDF with fallback key + def _read_hdf_try(path): + try: + return pd.read_hdf(path) + except (KeyError, ValueError): + try: + return pd.read_hdf(path, key='catalog') + except Exception as e: + raise RuntimeError(f"Failed to read HDF file {path}: {e}") + + for idx, (s1, s2, out_name) in enumerate(zip(input1_short, input2_short, output_names), start=1): + print(f"Processing row {idx}:\n input_short1={s1}\n input_short2={s2}\n output_name={out_name}") + + if s1 not in mapping: + raise KeyError(f"Short name '{s1}' not found in mapping CSV") + if s2 not in mapping: + raise KeyError(f"Short name '{s2}' not found in mapping CSV") + + p1 = mapping[s1] + p2 = mapping[s2] + + if not os.path.exists(p1): + raise FileNotFoundError(f"Input catalog 1 not found: {p1}") + if not os.path.exists(p2): + raise FileNotFoundError(f"Input catalog 2 not found: {p2}") + + # normalize output filename: ensure it ends with .hdf5 or _catalog.hdf5 + out_basename = out_name + if out_basename.endswith('.hdf5'): + out_filename = out_basename + elif out_basename.endswith('_catalog.hdf5'): + out_filename = out_basename + else: + out_filename = out_basename + '_catalog.hdf5' + + outp = os.path.join(output_path, out_filename) + + # read catalogs + cat1 = _read_hdf_try(p1) + cat2 = _read_hdf_try(p2) + + if 'id' not in cat1.columns or 'id' not in cat2.columns: + raise KeyError("Both input catalogs must contain an 'id' column") + + # remap ids (store orig for note) + try: + cat1 = cat1.copy() + cat2 = cat2.copy() + cat1['id_orig__'] = cat1['id'] + cat2['id_orig__'] = cat2['id'] + cat1['id'] = cat1['id'].astype(int).astype(str).apply(lambda s: int(id_prefixes[0] + s)) + cat2['id'] = cat2['id'].astype(int).astype(str).apply(lambda s: int(id_prefixes[1] + s)) + except Exception as e: + raise ValueError(f"Error coercing or remapping 'id' columns: {e}") + + if cat1['id'].duplicated().any(): + raise ValueError("Duplicate ids found within remapped catalog1") + if cat2['id'].duplicated().any(): + raise ValueError("Duplicate ids found within remapped catalog2") + + merged = pd.concat([cat1, cat2], ignore_index=True, sort=False) + + # ensure output directory exists (already created at function-level, but keep defensively) + out_dir = os.path.dirname(outp) + if out_dir: + os.makedirs(out_dir, exist_ok=True) + + try: + merged.to_hdf(outp, key='catalog', mode='w') + except Exception as e: + raise RuntimeError(f"Failed to write merged HDF to {outp}: {e}") + + # write info note + base_out = os.path.splitext(outp)[0] + note_path = base_out + '_merge_info.txt' + now = datetime.datetime.utcnow().isoformat() + 'Z' + lines = [] + lines.append(f"Merged catalogs on: {now}") + lines.append("") + lines.append("Inputs:") + lines.append(f" catalog1 (short: {s1}): {p1}") + lines.append(f" original rows: {len(cat1)}") + lines.append(f" catalog2 (short: {s2}): {p2}") + lines.append(f" original rows: {len(cat2)}") + lines.append("") + lines.append(f"Output written to: {outp}") + lines.append(f" total rows: {len(merged)}") + lines.append("") + lines.append("ID remapping applied:") + lines.append(f" catalog1: new_id = int('{id_prefixes[0]}' + str(old_id))") + lines.append(f" catalog2: new_id = int('{id_prefixes[1]}' + str(old_id))") + lines.append("") + try: + sample1 = cat1[['id_orig__', 'id']].rename(columns={'id_orig__': 'orig', 'id': 'new'}).head(5) + sample2 = cat2[['id_orig__', 'id']].rename(columns={'id_orig__': 'orig', 'id': 'new'}).head(5) + lines.append('Sample mappings catalog1 (orig -> new):') + for o, n in sample1.values: + lines.append(f" {o} -> {n}") + lines.append('') + lines.append('Sample mappings catalog2 (orig -> new):') + for o, n in sample2.values: + lines.append(f" {o} -> {n}") + except Exception: + pass + + with open(note_path, 'w') as fh: + fh.write('\n'.join(lines)) + + print(f"Wrote merged catalog to: {outp}") + print(f"Wrote merge note to: {note_path}") + From b5f963534ca1d6278629d2f4a652d75479cc7fc1 Mon Sep 17 00:00:00 2001 From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com> Date: Fri, 9 Jan 2026 11:17:13 +0100 Subject: [PATCH 24/27] refactor merge function to handle directory-based HDF catalog merging --- lifesim/analysis/yield_wrapper.py | 235 ++++++++++++++++-------------- 1 file changed, 124 insertions(+), 111 deletions(-) diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py index af651eb..a502a37 100644 --- a/lifesim/analysis/yield_wrapper.py +++ b/lifesim/analysis/yield_wrapper.py @@ -888,37 +888,22 @@ def compute_yields_mp(output_filename, return_yields=return_yields) return int(uni_sel), yields -def merge_catalogs_from_csv(mapping_csv: str, +def merge_runs(mapping_csv: str, merge_csv: str, output_path: str, csv_has_header: bool = True, id_prefixes: tuple = ('1', '2')) -> None: - """Merge pairs of HDF catalogs described by two CSV files. - - Parameters - ---------- - mapping_csv : str - Path to the first CSV that maps short names to actual catalog paths. - The file is expected to have at least two columns: - - column 0: short name for the catalog - - column 1: actual path to the catalog (HDF readable by pandas) - merge_csv : str - Path to the second CSV that lists merges using short names. The file is - expected to have at least three columns: - - column 0: short name for input catalog 1 - - column 1: short name for input catalog 2 - - column 2: output name (a filename or base name). If not ending in - '.hdf5' or '_catalog.hdf5' the function will append '_catalog.hdf5'. - output_path : str - Directory where merged catalogs will be written. Created if necessary. - csv_has_header : bool - Whether the CSV files have header rows (applies to both CSVs). - id_prefixes : tuple - Two string prefixes used to remap ids in catalog1 and catalog2. - - Returns - ------- - None + """Merge sets of HDF catalogs described by two CSV files. + + Differences vs previous implementation: + - mapping_csv maps short name -> input directory (each input directory contains subdirectories like diam_2, diam_2_5, ...) + - merge_csv rows give two short names and an output name. For each row: + - create output_path// and an info file there + - create output_path//ap_merged/ + - verify both input directories contain exactly the same subdirs + - for each subdir, read the single base catalog file (ends with '_catalog.hdf5' and not containing 'maxsep') from both inputs, + remap ids with id_prefixes, concatenate, and write to ap_merged// + - do not read/merge any maxsep files """ import datetime @@ -932,7 +917,7 @@ def merge_catalogs_from_csv(mapping_csv: str, header_arg = 0 if csv_has_header else None - # read mapping csv and build dict short_name -> path + # read mapping csv and build dict short_name -> path (expected to be directories) mapping_df = pd.read_csv(mapping_csv, header=header_arg) if mapping_df.shape[1] < 2: raise ValueError("Mapping CSV must contain at least two columns: short_name, path") @@ -941,7 +926,6 @@ def merge_catalogs_from_csv(mapping_csv: str, mapping = {} for s, p in zip(short_names, paths): if s in mapping: - # prefer first occurrence but warn print(f"Warning: duplicate short name '{s}' in mapping CSV; using first occurrence.") continue mapping[s] = p @@ -973,96 +957,125 @@ def _read_hdf_try(path): if s2 not in mapping: raise KeyError(f"Short name '{s2}' not found in mapping CSV") - p1 = mapping[s1] - p2 = mapping[s2] + dir1 = mapping[s1] + dir2 = mapping[s2] - if not os.path.exists(p1): - raise FileNotFoundError(f"Input catalog 1 not found: {p1}") - if not os.path.exists(p2): - raise FileNotFoundError(f"Input catalog 2 not found: {p2}") + if not os.path.exists(dir1) or not os.path.isdir(dir1): + raise FileNotFoundError(f"Input directory 1 not found or not a directory: {dir1}") + if not os.path.exists(dir2) or not os.path.isdir(dir2): + raise FileNotFoundError(f"Input directory 2 not found or not a directory: {dir2}") - # normalize output filename: ensure it ends with .hdf5 or _catalog.hdf5 - out_basename = out_name - if out_basename.endswith('.hdf5'): - out_filename = out_basename - elif out_basename.endswith('_catalog.hdf5'): - out_filename = out_basename - else: - out_filename = out_basename + '_catalog.hdf5' + # normalize output folder and create structure + out_folder = os.path.join(output_path, out_name) + os.makedirs(out_folder, exist_ok=True) + ap_merged_root = os.path.join(out_folder, 'ap_merged') + os.makedirs(ap_merged_root, exist_ok=True) - outp = os.path.join(output_path, out_filename) + info_lines = [] + now = datetime.datetime.utcnow().isoformat() + 'Z' + info_lines.append(f"Merged catalogs run on: {now}") + info_lines.append(f"Input short1: {s1}") + info_lines.append(f" path: {dir1}") + info_lines.append(f"Input short2: {s2}") + info_lines.append(f" path: {dir2}") + info_lines.append("") + + # list subdirectories (only directories) + subdirs1 = sorted([d for d in os.listdir(dir1) if os.path.isdir(os.path.join(dir1, d))]) + subdirs2 = sorted([d for d in os.listdir(dir2) if os.path.isdir(os.path.join(dir2, d))]) + + info_lines.append(f"Subdirectories in {s1}: {subdirs1}") + info_lines.append(f"Subdirectories in {s2}: {subdirs2}") + + if set(subdirs1) != set(subdirs2): + info_lines.append("") + info_lines.append("ERROR: Input directories do not contain the same set of subdirectories.") + note_path = os.path.join(out_folder, 'merge_info.txt') + with open(note_path, 'w') as fh: + fh.write('\n'.join(info_lines)) + raise ValueError(f"Subdirectory mismatch between {dir1} and {dir2}. Merge aborted. See {note_path} for details.") + + common_subdirs = sorted(subdirs1) # they are equal sets + + # process each subdir + for sub in common_subdirs: + info_lines.append("") + info_lines.append(f"Processing subdir: {sub}") + in_sub1 = os.path.join(dir1, sub) + in_sub2 = os.path.join(dir2, sub) + + # find single base catalog file in each (ends with _catalog.hdf5 and not containing 'maxsep') + base_files1 = [f for f in os.listdir(in_sub1) if f.endswith('_catalog.hdf5') and 'maxsep' not in f] + base_files2 = [f for f in os.listdir(in_sub2) if f.endswith('_catalog.hdf5') and 'maxsep' not in f] + + if len(base_files1) != 1: + info_lines.append(f" ERROR: expected exactly one base catalog in {in_sub1}, found {len(base_files1)}") + continue + if len(base_files2) != 1: + info_lines.append(f" ERROR: expected exactly one base catalog in {in_sub2}, found {len(base_files2)}") + continue - # read catalogs - cat1 = _read_hdf_try(p1) - cat2 = _read_hdf_try(p2) + bf1 = base_files1[0] + bf2 = base_files2[0] + info_lines.append(f" catalog file in {s1}: {bf1}") + info_lines.append(f" catalog file in {s2}: {bf2}") - if 'id' not in cat1.columns or 'id' not in cat2.columns: - raise KeyError("Both input catalogs must contain an 'id' column") + # ensure filenames match (same run name). If not, still proceed but preserve filenames (user expects consistent naming) + # create output subdir under ap_merged + out_sub = os.path.join(ap_merged_root, sub) + os.makedirs(out_sub, exist_ok=True) - # remap ids (store orig for note) - try: - cat1 = cat1.copy() - cat2 = cat2.copy() - cat1['id_orig__'] = cat1['id'] - cat2['id_orig__'] = cat2['id'] - cat1['id'] = cat1['id'].astype(int).astype(str).apply(lambda s: int(id_prefixes[0] + s)) - cat2['id'] = cat2['id'].astype(int).astype(str).apply(lambda s: int(id_prefixes[1] + s)) - except Exception as e: - raise ValueError(f"Error coercing or remapping 'id' columns: {e}") - - if cat1['id'].duplicated().any(): - raise ValueError("Duplicate ids found within remapped catalog1") - if cat2['id'].duplicated().any(): - raise ValueError("Duplicate ids found within remapped catalog2") - - merged = pd.concat([cat1, cat2], ignore_index=True, sort=False) - - # ensure output directory exists (already created at function-level, but keep defensively) - out_dir = os.path.dirname(outp) - if out_dir: - os.makedirs(out_dir, exist_ok=True) + path1 = os.path.join(in_sub1, bf1) + path2 = os.path.join(in_sub2, bf2) - try: - merged.to_hdf(outp, key='catalog', mode='w') - except Exception as e: - raise RuntimeError(f"Failed to write merged HDF to {outp}: {e}") + try: + cat1 = _read_hdf_try(path1) + cat2 = _read_hdf_try(path2) + except Exception as e: + info_lines.append(f" ERROR reading catalogs: {e}") + continue - # write info note - base_out = os.path.splitext(outp)[0] - note_path = base_out + '_merge_info.txt' - now = datetime.datetime.utcnow().isoformat() + 'Z' - lines = [] - lines.append(f"Merged catalogs on: {now}") - lines.append("") - lines.append("Inputs:") - lines.append(f" catalog1 (short: {s1}): {p1}") - lines.append(f" original rows: {len(cat1)}") - lines.append(f" catalog2 (short: {s2}): {p2}") - lines.append(f" original rows: {len(cat2)}") - lines.append("") - lines.append(f"Output written to: {outp}") - lines.append(f" total rows: {len(merged)}") - lines.append("") - lines.append("ID remapping applied:") - lines.append(f" catalog1: new_id = int('{id_prefixes[0]}' + str(old_id))") - lines.append(f" catalog2: new_id = int('{id_prefixes[1]}' + str(old_id))") - lines.append("") - try: - sample1 = cat1[['id_orig__', 'id']].rename(columns={'id_orig__': 'orig', 'id': 'new'}).head(5) - sample2 = cat2[['id_orig__', 'id']].rename(columns={'id_orig__': 'orig', 'id': 'new'}).head(5) - lines.append('Sample mappings catalog1 (orig -> new):') - for o, n in sample1.values: - lines.append(f" {o} -> {n}") - lines.append('') - lines.append('Sample mappings catalog2 (orig -> new):') - for o, n in sample2.values: - lines.append(f" {o} -> {n}") - except Exception: - pass + info_lines.append(f" rows before merge: {s1}: {len(cat1)}, {s2}: {len(cat2)}") - with open(note_path, 'w') as fh: - fh.write('\n'.join(lines)) + if 'id' not in cat1.columns or 'id' not in cat2.columns: + info_lines.append(" ERROR: both catalogs must contain an 'id' column. Skipping this subdir.") + continue - print(f"Wrote merged catalog to: {outp}") - print(f"Wrote merge note to: {note_path}") + # remap ids and keep originals + try: + cat1 = cat1.copy() + cat2 = cat2.copy() + cat1['id_orig__'] = cat1['id'] + cat2['id_orig__'] = cat2['id'] + # coerce to int then prefix as string and convert back to int + cat1['id'] = cat1['id'].astype(int).astype(str).apply(lambda s: int(str(id_prefixes[0]) + s)) + cat2['id'] = cat2['id'].astype(int).astype(str).apply(lambda s: int(str(id_prefixes[1]) + s)) + except Exception as e: + info_lines.append(f" ERROR remapping ids: {e}") + continue + + if cat1['id'].duplicated().any(): + info_lines.append(" ERROR: duplicate ids found within remapped catalog1. Skipping this subdir.") + continue + if cat2['id'].duplicated().any(): + info_lines.append(" ERROR: duplicate ids found within remapped catalog2. Skipping this subdir.") + continue + + merged = pd.concat([cat1, cat2], ignore_index=True, sort=False) + + out_catalog_path = os.path.join(out_sub, bf1) # preserve filename of first input's base file + try: + merged.to_hdf(out_catalog_path, key='catalog', mode='w') + except Exception as e: + info_lines.append(f" ERROR writing merged catalog to {out_catalog_path}: {e}") + continue + + info_lines.append(f" wrote merged catalog to: {out_catalog_path}") + info_lines.append(f" rows after merge: {len(merged)}") + + # write info file + note_path = os.path.join(out_folder, 'merge_info.txt') + with open(note_path, 'w') as fh: + fh.write('\n'.join(info_lines)) + print(f"Finished processing merge row {idx}. Info written to: {note_path}") From 1b1ad637c8219287316f316fe63993be1f1bceea Mon Sep 17 00:00:00 2001 From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com> Date: Fri, 9 Jan 2026 17:07:17 +0100 Subject: [PATCH 25/27] added the optimization manager that can run multiple optimizations per Wrapper in one command --- lifesim/analysis/yield_wrapper.py | 115 ++++++++++++++++++++++++++++-- 1 file changed, 111 insertions(+), 4 deletions(-) diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py index a502a37..a6f3890 100644 --- a/lifesim/analysis/yield_wrapper.py +++ b/lifesim/analysis/yield_wrapper.py @@ -6,6 +6,7 @@ import json from typing import Union import logging +import yaml import numpy as np from joblib import Parallel, delayed, parallel_config @@ -210,7 +211,8 @@ def run_optimizer_sweep(self, run_name, source_name, characterization: bool = False, - opt_limit_factor: Union[None, float] = None): + opt_limit_factor: Union[None, float] = None, + reduce_catalog: bool = False): source_path = os.path.join(self.output_path, source_name) # get the names of all subdirectories in output_path (which contain subdirectories for different mirror diameters) subdirs = [d for d in os.listdir(source_path) if os.path.isdir(os.path.join(source_path, d))] @@ -241,7 +243,8 @@ def run_optimizer_sweep(self, catalog_path=os.path.join(source_path, subdir, catalog_file + '_catalog.hdf5'), config_path=self.config_path, characterization=characterization, - opt_limit_factor=opt_limit_factor + opt_limit_factor=opt_limit_factor, + reduce_catalog=reduce_catalog, ) else: run_configs.append({'output_filename': catalog_file, @@ -263,7 +266,8 @@ def run_optimizer_sweep(self, catalog_path=rc['catalog_path'], config_path=self.config_path, characterization=characterization, - opt_limit_factor=opt_limit_factor + opt_limit_factor=opt_limit_factor, + reduce_catalog=reduce_catalog, ) for rc in run_configs) @@ -699,6 +703,100 @@ def process_mission_time(self, # Final log with elapsed time print('ALL process_mission_time finished. Total time:', round(time.time() - t_all, 2), 's') + def mange_optimizations(self, + scenario_csv: str, + source_name: str, + csv_has_header: bool = True): + + header_arg = 0 if csv_has_header else None + + scenario_df = pd.read_csv(scenario_csv, header=header_arg) + + def _create_short_name(row): + parts = [] + + # 1. Handle Booleans: Add tag only if True + if row['Experiment_1']: + parts.append("e1") + if row['Experiment_2']: + parts.append("e2") + + # 2. Handle Float: Add 'f' prefix and remove decimal (0.5 -> 05) + # converting to string and replacing '.' is a robust way to handle this + factor_str = str(row['opt_limit_factor']).replace('.', '') + parts.append(f"f{factor_str}") + + # 3. Handle Characterization + if row['characterization']: + parts.append("char") + + # Join all parts with underscores + return "_".join(parts) + + # Apply the function row by row + scenario_df['filename'] = scenario_df.apply(_create_short_name, axis=1) + + # load base config file + with open(self.config_path, 'r') as f: + base_config = yaml.safe_load(f) + original_config_path = deepcopy(self.config_path) + + for i in range(len(scenario_df)): + run_name = 'opt_' + scenario_df.iloc[i]['filename'] + + # check if a directory with the name run_name already exists in output_path, otherwise create it + final_output_path = os.path.join(self.output_path, run_name) + + row = scenario_df.iloc[i] + + # create the custom config file based on the existing config file + custom_config = base_config.copy() + + # Loop through the items in the row + for key, value in row.items(): + # CASE A: It is an Experiment toggle + if key.startswith('Experiment'): + if not value: + # If False, remove it from the dictionary safely + # .pop(key, None) prevents a KeyError if the key is already gone + custom_config['optimization']['experiments'].pop(key, None) + # If True, we do nothing (keep it) + + # CASE B: It is a parameter update (e.g., opt_limit_factor) + # We check if this key exists in the 'optimization' block to overwrite it + elif key in custom_config['optimization']: + custom_config['optimization'][key] = float(value) + + # save the custom config file to a temporary location + temp_config_path = os.path.join(self.output_path, 'temp_config_' + run_name + '.yaml') + with open(temp_config_path, 'w') as f: + yaml.dump(custom_config, f) + self.config_path = temp_config_path + + print('Starting optimization for run:', run_name) + + self.run_optimizer_sweep( + run_name=run_name, + source_name=source_name, + characterization=row['characterization'], + opt_limit_factor=row['opt_limit_factor'], + reduce_catalog=True + ) + + # save the custom config file to final_output_path + custom_config_path = os.path.join(final_output_path, 'config_' + run_name + '.yaml') + with open(custom_config_path, 'w') as f: + yaml.dump(custom_config, f) + + print('Finished optimization for run:', run_name) + print('----------------------------------------') + + #delete the temporary config file + os.remove(temp_config_path) + + self.config_path = original_config_path + + def run_covergence_test(self, run_name, @@ -834,7 +932,8 @@ def compute_yields_mp(output_filename, uni_sel=None, return_yields=False, characterization: bool = False, - opt_limit_factor: Union[None, float] = None): + opt_limit_factor: Union[None, float] = None, + reduce_catalog: bool = False): t = time.time() # create bus @@ -881,6 +980,14 @@ def compute_yields_mp(output_filename, # with contextlib.redirect_stdout(None): opt.ahgs() + + if reduce_catalog: + to_remove = ['planet_flux_use', 'ecc_p', 'noise_astro', 'fp', 'small_omega_p', 'semimajor_p', 'dec', 'theta_p', + 'sep_p', 'albedo_geom_vis', 'z', 'photon_rate_noise', 'mass_p', 'mass_s', 'p_orb', 'flux_p', + 'photon_rate_planet', 'albedo_bond', 'inc_p', 'lat', 'stype', 'name_s', 'lon', 'ra', + 'albedo_geom_mir', 'large_omega_p', 'radius_s'] + bus.data.catalog.drop(columns=to_remove, inplace=True) + bus.save() if return_yields: From 4edcec4a3e6f7ec6d2a0b213a12b33b19a1b9095 Mon Sep 17 00:00:00 2001 From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com> Date: Fri, 9 Jan 2026 17:42:54 +0100 Subject: [PATCH 26/27] tiny bugfix: drop only existing columns --- lifesim/analysis/yield_wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py index a6f3890..4ddb695 100644 --- a/lifesim/analysis/yield_wrapper.py +++ b/lifesim/analysis/yield_wrapper.py @@ -986,7 +986,7 @@ def compute_yields_mp(output_filename, 'sep_p', 'albedo_geom_vis', 'z', 'photon_rate_noise', 'mass_p', 'mass_s', 'p_orb', 'flux_p', 'photon_rate_planet', 'albedo_bond', 'inc_p', 'lat', 'stype', 'name_s', 'lon', 'ra', 'albedo_geom_mir', 'large_omega_p', 'radius_s'] - bus.data.catalog.drop(columns=to_remove, inplace=True) + bus.data.catalog.drop(columns=to_remove, inplace=True, errors='ignore') bus.save() From 951647ccf0d153cc6a033132a8f01a9fbe9d0d3d Mon Sep 17 00:00:00 2001 From: "Felix A. Dannert" <62441325+fdannert@users.noreply.github.com> Date: Tue, 13 Jan 2026 08:24:54 +0100 Subject: [PATCH 27/27] added numpy varibale handling to yaml writer, some bugfixes in yield_wrapper.py --- lifesim/analysis/yield_wrapper.py | 110 +++++++++++++++++------------- lifesim/core/core.py | 15 +++- 2 files changed, 78 insertions(+), 47 deletions(-) diff --git a/lifesim/analysis/yield_wrapper.py b/lifesim/analysis/yield_wrapper.py index 4ddb695..0ee575a 100644 --- a/lifesim/analysis/yield_wrapper.py +++ b/lifesim/analysis/yield_wrapper.py @@ -486,52 +486,67 @@ def get_mission_time(self, return time_sheet def sweep_mission_time(self, - source_name): - source_path = os.path.join(self.output_path, source_name) - subdirs = [d for d in os.listdir(source_path) if os.path.isdir(os.path.join(source_path, d))] + source_name, + all_runs: bool = False): + if all_runs: + # an optimized run directory always starts with 'opt_' + # run_names = [d for d in os.listdir(self.output_path) if os.path.isdir(os.path.join(self.output_path, d))] - for subdir in subdirs: - print('--------------------------------------------------') - print('Processing subdir: ', subdir) - t_sub = time.time() - - # make a list of all files ending in .hdf5 in subdir, then keep only the part of the filename before '_catalog.hdf5' and only if the sting does not contain 'maxsep' - run_names = [f.split('_catalog.hdf5')[0] for f in os.listdir(os.path.join(source_path, subdir)) - if f.endswith('_catalog.hdf5')] + run_names = [d for d in os.listdir(self.output_path) + if os.path.isdir(os.path.join(self.output_path, d)) + and d.startswith('opt_')] for run_name in run_names: - print(' ----------------------------------------------') - print(' Processing run: ', run_name) - t_run = time.time() - - catalog_path = os.path.join(source_path, subdir, run_name + '_catalog.hdf5') - config_path = os.path.join(source_path, subdir, run_name + '.yaml') - - time_sheet = self.get_mission_time(catalog_path=catalog_path, - config_path=config_path) - - # save time_sheet to csv files, one per experiment - output_csv_path = os.path.join(source_path, subdir, run_name + '_mission_time.csv') - time_sheet.to_csv(output_csv_path) - print(' Saved mission time to ', output_csv_path) - - print(' Done processing run:', run_name, '- took', round(time.time() - t_run, 2), 's') + self.sweep_mission_time(source_name=run_name, + all_runs=False) + self.process_mission_time(source_name=run_name) - # Append single multi-line log summary for this method - t_end = time.time() - subdirs_list = [d for d in os.listdir(source_path) if os.path.isdir(os.path.join(source_path, d))] - total_runs = sum(len([f for f in os.listdir(os.path.join(source_path, d)) if f.endswith('_catalog.hdf5')]) for d in subdirs_list) - mission_csvs = sum(len([f for f in os.listdir(os.path.join(source_path, d)) if f.endswith('_mission_time.csv')]) for d in subdirs_list) - msg = f"""sweep_mission_time summary: - source_name: {source_name} - source_path: {source_path} - subdirs_found: {subdirs_list} - subdirs_total: {len(subdirs_list)} - total_runs_catalogs_found: {total_runs} - mission_time_csvs_found: {mission_csvs} - end_time: {time.ctime(t_end)} - """ - self.logger.info(msg) + else: + source_path = os.path.join(self.output_path, source_name) + subdirs = [d for d in os.listdir(source_path) if os.path.isdir(os.path.join(source_path, d))] + + for subdir in subdirs: + print('--------------------------------------------------') + print('Processing subdir: ', subdir) + t_sub = time.time() + + # make a list of all files ending in .hdf5 in subdir, then keep only the part of the filename before '_catalog.hdf5' and only if the sting does not contain 'maxsep' + run_names = [f.split('_catalog.hdf5')[0] for f in os.listdir(os.path.join(source_path, subdir)) + if f.endswith('_catalog.hdf5')] + + for run_name in run_names: + print(' ----------------------------------------------') + print(' Processing run: ', run_name) + t_run = time.time() + + catalog_path = os.path.join(source_path, subdir, run_name + '_catalog.hdf5') + config_path = os.path.join(source_path, subdir, run_name + '.yaml') + + time_sheet = self.get_mission_time(catalog_path=catalog_path, + config_path=config_path) + + # save time_sheet to csv files, one per experiment + output_csv_path = os.path.join(source_path, subdir, run_name + '_mission_time.csv') + time_sheet.to_csv(output_csv_path) + print(' Saved mission time to ', output_csv_path) + + print(' Done processing run:', run_name, '- took', round(time.time() - t_run, 2), 's') + + # Append single multi-line log summary for this method + t_end = time.time() + subdirs_list = [d for d in os.listdir(source_path) if os.path.isdir(os.path.join(source_path, d))] + total_runs = sum(len([f for f in os.listdir(os.path.join(source_path, d)) if f.endswith('_catalog.hdf5')]) for d in subdirs_list) + mission_csvs = sum(len([f for f in os.listdir(os.path.join(source_path, d)) if f.endswith('_mission_time.csv')]) for d in subdirs_list) + msg = f"""sweep_mission_time summary: + source_name: {source_name} + source_path: {source_path} + subdirs_found: {subdirs_list} + subdirs_total: {len(subdirs_list)} + total_runs_catalogs_found: {total_runs} + mission_time_csvs_found: {mission_csvs} + end_time: {time.ctime(t_end)} + """ + self.logger.info(msg) def process_mission_time(self, source_name): @@ -666,7 +681,10 @@ def process_mission_time(self, ['#1f6f6f', '#54a1a1', '#9fc8c8'], ] columns = ['detection', 'orbit', 'char'] - x = np.asarray(exptable[exps[0]].index, dtype=float) + try: + x = np.asarray(exptable[exps[0]].index, dtype=float) + except: + x = np.asarray(exptable[exps].index, dtype=float) y0 = np.zeros_like(x, dtype=float) fig, ax = plt.subplots() @@ -750,7 +768,7 @@ def _create_short_name(row): row = scenario_df.iloc[i] # create the custom config file based on the existing config file - custom_config = base_config.copy() + custom_config = deepcopy(base_config) # Loop through the items in the row for key, value in row.items(): @@ -778,8 +796,8 @@ def _create_short_name(row): self.run_optimizer_sweep( run_name=run_name, source_name=source_name, - characterization=row['characterization'], - opt_limit_factor=row['opt_limit_factor'], + characterization=bool(row['characterization']), + opt_limit_factor=float(row['opt_limit_factor']), reduce_catalog=True ) diff --git a/lifesim/core/core.py b/lifesim/core/core.py index dd5a009..1cf7600 100644 --- a/lifesim/core/core.py +++ b/lifesim/core/core.py @@ -6,7 +6,7 @@ import os import yaml -from numpy import ndarray, array +from numpy import ndarray, array, float64, float32, int64, int32, int16 import git from lifesim.core.data import Data @@ -442,6 +442,8 @@ def add_module(self, module.data = self.data def write_config(self): + add_numpy_representers() + module_dict = {key: str(type(module)) for (key, module) in self.modules.items()} # fetch version number and git commit hash if available @@ -520,3 +522,14 @@ def convert_to_np(inp: dict): else: out[k] = inp[k] return out + +def add_numpy_representers(): + def numpy_repr(dumper, data): + return dumper.represent_data(data.item()) + + # Add for various numpy types + for dtype in [float64, float32, int64, int32, int16]: + yaml.add_representer(dtype, numpy_repr) + + # Handle numpy arrays specifically + yaml.add_representer(ndarray, lambda dumper, data: dumper.represent_list(data.tolist()))