From e519636652c048232658c67ee9454a7e114a5cd7 Mon Sep 17 00:00:00 2001 From: Amedeo Chiefa <103528316+achiefa@users.noreply.github.com> Date: Tue, 1 Jul 2025 09:28:44 +0000 Subject: [PATCH 01/69] Implementing check for repeated added rules. --- validphys2/src/validphys/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 04f306a858..a96564a8b1 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -1494,7 +1494,7 @@ def produce_rules( ) except RuleProcessingError as e: raise ConfigError(f"Error Processing filter rules: {e}") from e - + if added_filter_rules: for i, rule in enumerate(added_filter_rules): try: From d7bb1af1f7e3ee36dbe6fd0d4eb3177e2ce43c0d Mon Sep 17 00:00:00 2001 From: Amedeo Chiefa <103528316+achiefa@users.noreply.github.com> Date: Tue, 1 Jul 2025 11:17:38 +0000 Subject: [PATCH 02/69] Implementing uniqueness parsing logic for filter rules. --- validphys2/src/validphys/filters.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py index add8a3bf4b..b01d8ff825 100644 --- a/validphys2/src/validphys/filters.py +++ b/validphys2/src/validphys/filters.py @@ -125,7 +125,6 @@ def to_dict(self): class FilterRule: """ Dataclass which carries the filter rule information. - """ dataset: str = None @@ -168,10 +167,13 @@ def default_filter_rules_input(): """ Return a tuple of FilterRule objects. These are defined in ``filters.yaml`` in the ``validphys.cuts`` module. - Similarly to `parse_added_filter_rules`, this function checks if the rules - are unique, i.d. if there are no multiple rules for the same dataset of + Similarly to `parse_added_filter_rules`, this function checks if the rules + are unique, i.d. if there are no multiple rules for the same dataset of process with the same rule (`reason` and `local_variables` are not hashed). """ + # TODO: This should be done using a more sophisticated comparison + # that checks if two rules are actually the same, regardless of the + # order in which the cuts are defined. list_rules = yaml_safe.load(read_text(validphys.cuts, "filters.yaml")) unique_rules = set(FilterRule(**rule) for rule in list_rules) if len(unique_rules) != len(list_rules): From 61f6cb434786c14b59efbdc5da775f58637d53ce Mon Sep 17 00:00:00 2001 From: Amedeo Chiefa <103528316+achiefa@users.noreply.github.com> Date: Wed, 2 Jul 2025 09:40:34 +0000 Subject: [PATCH 03/69] Update error message --- validphys2/src/validphys/config.py | 2 +- validphys2/src/validphys/filters.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index a96564a8b1..04f306a858 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -1494,7 +1494,7 @@ def produce_rules( ) except RuleProcessingError as e: raise ConfigError(f"Error Processing filter rules: {e}") from e - + if added_filter_rules: for i, rule in enumerate(added_filter_rules): try: diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py index b01d8ff825..f02d843cb9 100644 --- a/validphys2/src/validphys/filters.py +++ b/validphys2/src/validphys/filters.py @@ -167,8 +167,8 @@ def default_filter_rules_input(): """ Return a tuple of FilterRule objects. These are defined in ``filters.yaml`` in the ``validphys.cuts`` module. - Similarly to `parse_added_filter_rules`, this function checks if the rules - are unique, i.d. if there are no multiple rules for the same dataset of + Similarly to `parse_added_filter_rules`, this function checks if the rules + are unique, i.d. if there are no multiple rules for the same dataset of process with the same rule (`reason` and `local_variables` are not hashed). """ # TODO: This should be done using a more sophisticated comparison From 7d9e9691e1c75c1636e58f0235234bc82fce2152 Mon Sep 17 00:00:00 2001 From: achiefa Date: Mon, 15 Jul 2024 09:43:25 +0100 Subject: [PATCH 04/69] Copied fro branch 'HT_thcovmat' --- validphys2/src/validphys/commondata.py | 4 + validphys2/src/validphys/config.py | 50 +++++- validphys2/src/validphys/dataplots.py | 14 +- .../theorycovariance/construction.py | 164 +++++++++++++++++- 4 files changed, 222 insertions(+), 10 deletions(-) diff --git a/validphys2/src/validphys/commondata.py b/validphys2/src/validphys/commondata.py index 7c68c0784f..e59ac5ce2c 100644 --- a/validphys2/src/validphys/commondata.py +++ b/validphys2/src/validphys/commondata.py @@ -38,3 +38,7 @@ def loaded_commondata_with_cuts(commondata, cuts): groups_dataset_inputs_loaded_cd_with_cuts = collect( "loaded_commondata_with_cuts", ("group_dataset_inputs_by_metadata", "data_input") ) + +groups_dataset_inputs_loaded_cd_with_cuts_byprocess = collect( + "loaded_commondata_with_cuts", ("group_dataset_inputs_by_process", "data") + ) \ No newline at end of file diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 04f306a858..51cc3f19d0 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -1247,6 +1247,29 @@ def produce_loaded_user_covmat_path(self, user_covmat_path: str = ""): l = self.loader fileloc = l.check_vp_output_file(user_covmat_path) return fileloc + + + @configparser.explicit_node + def produce_covmat_custom(self, use_ht_uncertainties: bool = False, ht_version: int = 1): + if use_ht_uncertainties: + from validphys.theorycovariance.construction import thcov_ht + + return thcov_ht + else: + from validphys.theorycovariance.construction import covs_pt_prescrip + + return covs_pt_prescrip + + @configparser.explicit_node + def produce_combine_custom(self, use_ht_uncertainties: bool = False): + if use_ht_uncertainties: + from validphys.theorycovariance.construction import combine_by_type_ht + + return combine_by_type_ht + else: + from validphys.theorycovariance.construction import combine_by_type + + return combine_by_type @configparser.explicit_node def produce_nnfit_theory_covmat( @@ -1274,8 +1297,33 @@ def produce_nnfit_theory_covmat( from validphys.theorycovariance.construction import user_covmat_fitting f = user_covmat_fitting + elif use_ht_uncertainties: + # NOTE: this covmat is the same as for scale variations, which will result in a clash of + # table names if we wish to use them simultaneously + if use_user_uncertainties: + from validphys.theorycovariance.construction import total_theory_covmat_fitting + + f = total_theory_covmat_fitting + else: + from validphys.theorycovariance.construction import theory_covmat_custom_fitting + + f = theory_covmat_custom_fitting + + @functools.wraps(f) + def res(*args, **kwargs): + return f(*args, **kwargs) + + # Set this to get the same filename regardless of the action. + res.__name__ = "theory_covmat" + return res + + + @configparser.explicit_node + def produce_combine_by_type_custom(self, use_ht_uncertainties: bool = False): + if use_ht_uncertainties: + return validphys.theorycovariance.construction.combine_by_type_ht + return validphys.theorycovariance.construction.combine_by_type - return f def produce_fitthcovmat( self, use_thcovmat_if_present: bool = False, fit: (str, type(None)) = None diff --git a/validphys2/src/validphys/dataplots.py b/validphys2/src/validphys/dataplots.py index 961c272406..bb4d7d7db6 100644 --- a/validphys2/src/validphys/dataplots.py +++ b/validphys2/src/validphys/dataplots.py @@ -1287,7 +1287,7 @@ def _check_display_cuts_requires_use_cuts(display_cuts, use_cuts): @make_argcheck def _check_marker_by(marker_by): - markers = ('process type', 'experiment', 'dataset', 'group') + markers = ('process type', 'experiment', 'dataset', 'group', 'kinematics') if marker_by not in markers: raise CheckError("Unknown marker_by value", marker_by, markers) @@ -1346,7 +1346,8 @@ def plot_xq2( will be displaed and marked. The points are grouped according to the `marker_by` option. The possible - values are: "process type", "experiment", "group" or "dataset". + values are: "process type", "experiment", "group" or "dataset" for discrete + colors, or "kinematics" for coloring by 1/(Q2(1-x)) Some datasets can be made to appear highlighted in the figure: Define a key called ``highlight_datasets`` containing the names of the datasets to be @@ -1477,6 +1478,7 @@ def plot_xq2( xh = defaultdict(list) q2h = defaultdict(list) + cvdict = defaultdict(list) if not highlight_datasets: highlight_datasets = set() @@ -1507,6 +1509,8 @@ def next_options(): elif marker_by == "group": # if group is None then make sure that shows on legend. key = str(group) + elif marker_by == "kinematics": + key = None else: raise ValueError('Unknown marker_by value') @@ -1522,6 +1526,7 @@ def next_options(): xdict = x q2dict = q2 + cvdict[key].append(commondata.load().get_cv()) xdict[key].append(fitted[0]) q2dict[key].append(fitted[1]) if display_cuts: @@ -1536,6 +1541,11 @@ def next_options(): else: # This is to get the label key coords = [], [] + if marker_by == "kinematics": + ht_magnitude = np.concatenate( cvdict[key]) / (coords[1] * (1 - coords[0]) ) + out = ax.scatter(*coords, marker='.', c=ht_magnitude, cmap="viridis", norm=mcolors.LogNorm()) + clb = fig.colorbar(out) + clb.ax.set_title(r'$F_\mathrm{exp}\frac{1}{Q^2(1-x)}$') ax.plot(*coords, label=key, markeredgewidth=1, markeredgecolor=None, **key_options[key]) # Iterate again so highlights are printed on top. diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index e4811c4478..f1ea41c48a 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -19,6 +19,8 @@ check_fit_dataset_order_matches_grouped, process_lookup, ) +import scipy.linalg as la +import scipy.interpolate as scint log = logging.getLogger(__name__) @@ -47,7 +49,7 @@ def theory_covmat_dataset(results, results_central_bytheoryids, point_prescripti return thcovmat -ProcessInfo = namedtuple("ProcessInfo", ("preds", "namelist", "sizes")) +ProcessInfo = namedtuple("ProcessInfo", ("preds", "namelist", "sizes", "data")) def combine_by_type(each_dataset_results_central_bytheory): @@ -78,12 +80,160 @@ def combine_by_type(each_dataset_results_central_bytheory): for key, item in theories_by_process.items(): theories_by_process[key] = np.concatenate(item, axis=1) process_info = ProcessInfo( - preds=theories_by_process, namelist=ordered_names, sizes=dataset_size + preds=theories_by_process, namelist=ordered_names, sizes=dataset_size, data=None ) return process_info -def covmat_3fpt(deltas1, deltas2): +def combine_by_type_ht(each_dataset_results, groups_dataset_inputs_loaded_cd_with_cuts_byprocess): + """same as combine_by_type but now for a single theory and including commondata info""" + dataset_size = defaultdict(list) + theories_by_process = defaultdict(list) + cd_by_process = defaultdict(list) + ordered_names = defaultdict(list) + for dataset, cd in zip( + each_dataset_results, groups_dataset_inputs_loaded_cd_with_cuts_byprocess + ): + name = cd.setname + if name != dataset[0].name: + raise ValueError("The underlying datasets do not match!") + theory_centrals = [x.central_value for x in dataset] + dataset_size[name] = len(theory_centrals[0]) + proc_type = process_lookup(name) + ordered_names[proc_type].append(name) + cd_by_process[proc_type].append(cd.kinematics.values) + theories_by_process[proc_type].append(theory_centrals) + + for key in theories_by_process.keys(): + theories_by_process[key] = np.concatenate(theories_by_process[key], axis=1) + cd_by_process[key] = np.concatenate(cd_by_process[key], axis=0) + process_info = ProcessInfo( + preds=theories_by_process, namelist=ordered_names, sizes=dataset_size, data=cd_by_process + ) + return process_info + + +def thcov_ht(combine_by_type_ht, H2_list, HL_list, reverse=False): + "Same as `thcov_HT` but implementing theory covariance method for each node of the spline." + process_info = combine_by_type_ht + running_index_tot = 0 + start_proc_by_exp = defaultdict(list) + deltas = defaultdict(list) + included_proc = ["DIS NC"] + excluded_exp = {"DIS NC" : ["NMC_NC_NOTFIXED_DW_EM-F2"]} + included_exp = {} + for proc in included_proc: + aux = [] + for exp in process_info.namelist[proc]: + if exp not in excluded_exp[proc]: + aux.append(exp) + included_exp[proc] = aux + + # ABMP parametrisation + x_abmp = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1] + + # Check that H2_list and HL_list have the same size as x + if (len(H2_list) != len(x_abmp)) or (len(HL_list) != len(x_abmp)): + raise ValueError(f"The size of HT parameters does not match the number of nodes in the spline.") + + def wrapper_to_splines(i): + if not reverse: + shifted_H2_list = [0 for k in range(len(x_abmp))] + shifted_HL_list = [0 for k in range(len(x_abmp))] + shifted_H2_list[i] = H2_list[i] + shifted_HL_list[i] = HL_list[i] + else: + shifted_H2_list = H2_list.copy() + shifted_HL_list = HL_list.copy() + shifted_H2_list[i] = 0 + shifted_HL_list[i] = 0 + + H_2 = scint.CubicSpline(x_abmp, shifted_H2_list) + H_L = scint.CubicSpline(x_abmp, shifted_HL_list) + H_2 = np.vectorize(H_2) + H_L = np.vectorize(H_L) + return H_2, H_L + + for proc in process_info.namelist.keys(): + running_index_proc = 0 + x = np.array([]) + Q2 = np.array([]) + y = np.array([]) + + for exp in process_info.namelist[proc]: + # Locate position of the experiment + size = process_info.sizes[exp] + start_proc_by_exp[exp] = running_index_tot + running_index_tot += size + running_index_proc += size + + # Compute shifts only for a subset of processes + if proc in included_proc and exp in included_exp[proc]: + #central = process_info.preds[proc][1][start_proc_by_exp[exp] : size] # Probably this is deprecated + x = process_info.data[proc].T[0][running_index_proc - size : running_index_proc] + Q2 = process_info.data[proc].T[1][running_index_proc - size : running_index_proc] + y = process_info.data[proc].T[2][running_index_proc - size : running_index_proc] + + if "SIGMA" in exp: + N_2, N_L = compute_normalisation_by_experiment(exp, x, y, Q2) + + elif "F2" in exp: + N_2 = np.ones(shape=x.shape) + N_L = np.zeros(shape=x.shape) + + else: + raise ValueError(f"The normalisation for the observable is not known.") + + # Loop over the parameter + for i in range(len(x_abmp)): + H_L, H_2 = wrapper_to_splines(i) + deltas[f"({i+1}+,0)"] += [N_2 * H_2(x) / Q2] + deltas[f"(0,{i+1}+)"] += [N_L * H_L(x) / Q2] + + + # Construct theory covmat + covmats = defaultdict(list) + for proc1 in included_proc: + for proc2 in included_proc: + for i, exp1 in enumerate(included_exp[proc1]): + for j, exp2 in enumerate(included_exp[proc2]): + s = np.zeros(shape=(deltas["(1+,0)"][i].size, deltas["(1+,0)"][j].size)) + for par in deltas.keys(): + s += np.outer(deltas[par][i], deltas[par][j]) + start_locs = (start_proc_by_exp[exp1], start_proc_by_exp[exp2]) + covmats[start_locs] = s + return covmats + + +def compute_normalisation_by_experiment(experiment_name, x, y, Q2): + N_2 = np.zeros(shape=y.shape) + N_L = np.zeros(shape=y.shape) + + if "HERA_NC" in experiment_name or "HERA_CC" in experiment_name or "NMC" in experiment_name: + yp = 1 + np.power(1 - y, 2) + yL = np.power(y, 2) + + if "HERA_NC" in experiment_name or "NMC" in experiment_name: + N_2 = 1 + N_L = - yL / yp + + elif "HERA_CC" in experiment_name: + N_2 = 1 / 4 * yp + N_L = - N_2 * yL / yp + + if "CHORUS_CC" in experiment_name: + yL = np.power(y, 2) + Gf = 1.1663787e-05 + Mh = 0.938 + MW2 = 80.398 ** 2 + yp = 1 + np.power(1 - y, 2) - 2 * np.power(x * y * Mh, 2) / Q2 + N_2 = Gf**2 * Mh * yp / ( 2 * np.pi * np.power( 1 + Q2 / MW2, 2) ) + N_L = - N_2 * yL / yp + + return N_2, N_L + + +def covmat_3fpt(name1, name2, deltas1, deltas2): """Returns theory covariance sub-matrix for 3pt factorisation scale variation *only*, given two dataset names and collections of scale variation shifts""" @@ -311,11 +461,11 @@ def covs_pt_prescrip(combine_by_type, point_prescription): @table -def theory_covmat_custom_per_prescription(covs_pt_prescrip, procs_index, combine_by_type): +def theory_covmat_custom(covmat_custom, procs_index, combine_by_type_custom): """Takes the individual sub-covmats between each two processes and assembles them into a full covmat. Then reshuffles the order from ordering by process to ordering by experiment as listed in the runcard""" - process_info = combine_by_type + process_info = combine_by_type_custom # Construct a covmat_index based on the order of experiments as they are in combine_by_type # NOTE: maybe the ordering of covmat_index is always the same as that of procs_index? @@ -329,9 +479,9 @@ def theory_covmat_custom_per_prescription(covs_pt_prescrip, procs_index, combine covmat_index = pd.MultiIndex.from_tuples(indexlist, names=procs_index.names) # Put the covariance matrices between two process into a single covariance matrix - total_datapoints = sum(combine_by_type.sizes.values()) + total_datapoints = sum(process_info.sizes.values()) mat = np.zeros((total_datapoints, total_datapoints), dtype=np.float32) - for locs, cov in covs_pt_prescrip.items(): + for locs, cov in covmat_custom.items(): xsize, ysize = cov.shape mat[locs[0] : locs[0] + xsize, locs[1] : locs[1] + ysize] = cov df = pd.DataFrame(mat, index=covmat_index, columns=covmat_index) From 356cddc08c325a9a2467e4bbe28e1f3d9c097c55 Mon Sep 17 00:00:00 2001 From: achiefa Date: Mon, 15 Jul 2024 09:45:37 +0100 Subject: [PATCH 05/69] Removed version --- validphys2/src/validphys/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 51cc3f19d0..662a1217b0 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -1250,7 +1250,7 @@ def produce_loaded_user_covmat_path(self, user_covmat_path: str = ""): @configparser.explicit_node - def produce_covmat_custom(self, use_ht_uncertainties: bool = False, ht_version: int = 1): + def produce_covmat_custom(self, use_ht_uncertainties: bool = False): if use_ht_uncertainties: from validphys.theorycovariance.construction import thcov_ht From 4bab1dadf711047a10cda8cb0dc4c26d16277c4f Mon Sep 17 00:00:00 2001 From: achiefa Date: Mon, 15 Jul 2024 19:22:01 +0100 Subject: [PATCH 06/69] Saving progress - not ready --- .../theorycovariance/construction.py | 198 ++++++++++++------ 1 file changed, 133 insertions(+), 65 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index f1ea41c48a..74249eb632 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -5,22 +5,25 @@ from collections import defaultdict, namedtuple import logging +import operator import numpy as np import pandas as pd +import scipy.linalg as la +import scipy.interpolate as scint from reportengine import collect from reportengine.table import table pass from validphys.results import results, results_central +from validphys.convolution import central_fk_predictions +from validphys.core import PDF from validphys.theorycovariance.theorycovarianceutils import ( check_correct_theory_combination, check_fit_dataset_order_matches_grouped, process_lookup, ) -import scipy.linalg as la -import scipy.interpolate as scint log = logging.getLogger(__name__) @@ -84,7 +87,6 @@ def combine_by_type(each_dataset_results_central_bytheory): ) return process_info - def combine_by_type_ht(each_dataset_results, groups_dataset_inputs_loaded_cd_with_cuts_byprocess): """same as combine_by_type but now for a single theory and including commondata info""" dataset_size = defaultdict(list) @@ -113,14 +115,14 @@ def combine_by_type_ht(each_dataset_results, groups_dataset_inputs_loaded_cd_wit return process_info -def thcov_ht(combine_by_type_ht, H2_list, HL_list, reverse=False): +def thcov_ht(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, reverse=False): "Same as `thcov_HT` but implementing theory covariance method for each node of the spline." process_info = combine_by_type_ht running_index_tot = 0 start_proc_by_exp = defaultdict(list) deltas = defaultdict(list) included_proc = ["DIS NC"] - excluded_exp = {"DIS NC" : ["NMC_NC_NOTFIXED_DW_EM-F2"]} + excluded_exp = {"DIS NC" : []} included_exp = {} for proc in included_proc: aux = [] @@ -129,40 +131,21 @@ def thcov_ht(combine_by_type_ht, H2_list, HL_list, reverse=False): aux.append(exp) included_exp[proc] = aux - # ABMP parametrisation + # ABMP parametrisation and target masses x_abmp = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1] # Check that H2_list and HL_list have the same size as x if (len(H2_list) != len(x_abmp)) or (len(HL_list) != len(x_abmp)): raise ValueError(f"The size of HT parameters does not match the number of nodes in the spline.") - - def wrapper_to_splines(i): - if not reverse: - shifted_H2_list = [0 for k in range(len(x_abmp))] - shifted_HL_list = [0 for k in range(len(x_abmp))] - shifted_H2_list[i] = H2_list[i] - shifted_HL_list[i] = HL_list[i] - else: - shifted_H2_list = H2_list.copy() - shifted_HL_list = HL_list.copy() - shifted_H2_list[i] = 0 - shifted_HL_list[i] = 0 - - H_2 = scint.CubicSpline(x_abmp, shifted_H2_list) - H_L = scint.CubicSpline(x_abmp, shifted_HL_list) - H_2 = np.vectorize(H_2) - H_L = np.vectorize(H_L) - return H_2, H_L - - for proc in process_info.namelist.keys(): + + for i_proc, proc in enumerate(process_info.namelist.keys()): running_index_proc = 0 - x = np.array([]) - Q2 = np.array([]) - y = np.array([]) + kin_dict = {} - for exp in process_info.namelist[proc]: + for i_exp, exp in enumerate(process_info.namelist[proc]): # Locate position of the experiment size = process_info.sizes[exp] + dataset = groups_data_by_process[i_proc].datasets[i_exp] start_proc_by_exp[exp] = running_index_tot running_index_tot += size running_index_proc += size @@ -170,26 +153,33 @@ def wrapper_to_splines(i): # Compute shifts only for a subset of processes if proc in included_proc and exp in included_exp[proc]: #central = process_info.preds[proc][1][start_proc_by_exp[exp] : size] # Probably this is deprecated - x = process_info.data[proc].T[0][running_index_proc - size : running_index_proc] - Q2 = process_info.data[proc].T[1][running_index_proc - size : running_index_proc] - y = process_info.data[proc].T[2][running_index_proc - size : running_index_proc] - - if "SIGMA" in exp: - N_2, N_L = compute_normalisation_by_experiment(exp, x, y, Q2) - - elif "F2" in exp: - N_2 = np.ones(shape=x.shape) - N_L = np.zeros(shape=x.shape) - - else: - raise ValueError(f"The normalisation for the observable is not known.") + kin_dict['x'] = process_info.data[proc].T[0][running_index_proc - size : running_index_proc] + kin_dict['Q2'] = process_info.data[proc].T[1][running_index_proc - size : running_index_proc] + kin_dict['y']= process_info.data[proc].T[2][running_index_proc - size : running_index_proc] + kin_size = kin_dict['x'].size + print(exp) + target = extract_target(dataset) # Loop over the parameter for i in range(len(x_abmp)): - H_L, H_2 = wrapper_to_splines(i) - deltas[f"({i+1}+,0)"] += [N_2 * H_2(x) / Q2] - deltas[f"(0,{i+1}+)"] += [N_L * H_L(x) / Q2] - + PC_2, PC_L = compute_ht_parametrisation(i, x_abmp, kin_dict, exp, H2_list, HL_list) + if target == 'proton': + deltas[f"p({i+1}+,0)"] += [PC_2] + deltas[f"p(0,{i+1}+)"] += [PC_L] + deltas[f"d({i+1}+,0)"] += [np.zeros(kin_size)] + deltas[f"d(0,{i+1}+)"] += [np.zeros(kin_size)] + elif target == 'deuterium': + deltas[f"p({i+1}+,0)"] += [np.zeros(kin_size)] + deltas[f"p(0,{i+1}+)"] += [np.zeros(kin_size)] + deltas[f"d({i+1}+,0)"] += [PC_2] + deltas[f"d(0,{i+1}+)"] += [PC_L] + elif target == 'ratio': + + compute_ratio_delta(dataset, pdf, "d") + deltas[f"p({i+1}+,0)"] += [PC_2] + deltas[f"p(0,{i+1}+)"] += [PC_L] + deltas[f"d({i+1}+,0)"] += [PC_2] + deltas[f"d(0,{i+1}+)"] += [PC_L] # Construct theory covmat covmats = defaultdict(list) @@ -197,38 +187,115 @@ def wrapper_to_splines(i): for proc2 in included_proc: for i, exp1 in enumerate(included_exp[proc1]): for j, exp2 in enumerate(included_exp[proc2]): - s = np.zeros(shape=(deltas["(1+,0)"][i].size, deltas["(1+,0)"][j].size)) + s = np.zeros(shape=(deltas["p(1+,0)"][i].size, deltas["p(1+,0)"][j].size)) for par in deltas.keys(): s += np.outer(deltas[par][i], deltas[par][j]) start_locs = (start_proc_by_exp[exp1], start_proc_by_exp[exp2]) covmats[start_locs] = s + import ipdb; ipdb.set_trace() return covmats +def extract_target(dataset): + if dataset.op == "NULL": + if "_P_" in dataset.name or "HERA" in dataset.name: + return "proton" + elif "_D_" in dataset.name: + return "deuteron" + else: + raise ValueError(f"No target detected for {dataset.name}") + elif dataset.op == "RATIO": + return "ratio" + else: + raise ValueError(f"Unexpected operator in {dataset.name}: {dataset.op}") + + +def compute_ratio_delta(dataset, pdf: PDF, target, PC: np.array): + """This function computes the predictions as in validphys.convolution._predictions, + but for ratio and including higher twist terms in bot NUM and """ + opfunc = operator.truediv + cuts = dataset.cuts + all_predictions = [] + for fk in dataset.fkspecs: + fk_w_cuts = fk.load_with_cuts(cuts) + tmp = central_fk_predictions(fk_w_cuts, pdf) + all_predictions.append(np.concatenate(tmp.values)) + import ipdb; ipdb.set_trace() + if target == "d": + all_predictions[0] += PC + if target == "p": + all_predictions[1] += PC + return opfunc(*all_predictions) + + +def compute_ht_parametrisation( + index: int, + nodes: list, + kin_dict: dict, + exp: str, + h2_prior: list, + hl_prior: list, + reverse: bool = False +): + if not reverse: + shifted_H2_list = [0 for k in range(len(nodes))] + shifted_HL_list = [0 for k in range(len(nodes))] + shifted_H2_list[index] = h2_prior[index] + shifted_HL_list[index] = hl_prior[index] + else: + shifted_H2_list = h2_prior.copy() + shifted_HL_list = hl_prior.copy() + shifted_H2_list[index] = 0 + shifted_HL_list[index] = 0 + + H_2 = scint.CubicSpline(nodes, shifted_H2_list) + H_L = scint.CubicSpline(nodes, shifted_HL_list) + H_2 = np.vectorize(H_2) + H_L = np.vectorize(H_L) + + x = kin_dict['x'] + y = kin_dict['y'] + Q2 = kin_dict['Q2'] + N2, NL = compute_normalisation_by_experiment(exp, x, y, Q2) + + PC_2 = N2 * H_2(x) / Q2 + PC_L = NL * H_2(x) / Q2 + return PC_2, PC_L + + def compute_normalisation_by_experiment(experiment_name, x, y, Q2): N_2 = np.zeros(shape=y.shape) N_L = np.zeros(shape=y.shape) - if "HERA_NC" in experiment_name or "HERA_CC" in experiment_name or "NMC" in experiment_name: - yp = 1 + np.power(1 - y, 2) - yL = np.power(y, 2) + if "SIGMA" in experiment_name: + + if "HERA_NC" in experiment_name or "HERA_CC" in experiment_name or "NMC" in experiment_name: + yp = 1 + np.power(1 - y, 2) + yL = np.power(y, 2) + + if "HERA_NC" in experiment_name or "NMC" in experiment_name: + N_2 = 1 + N_L = - yL / yp - if "HERA_NC" in experiment_name or "NMC" in experiment_name: - N_2 = 1 - N_L = - yL / yp + elif "HERA_CC" in experiment_name: + N_2 = 1 / 4 * yp + N_L = - N_2 * yL / yp - elif "HERA_CC" in experiment_name: - N_2 = 1 / 4 * yp - N_L = - N_2 * yL / yp + if "CHORUS_CC" in experiment_name: + yL = np.power(y, 2) + Gf = 1.1663787e-05 + Mh = 0.938 + MW2 = 80.398 ** 2 + yp = 1 + np.power(1 - y, 2) - 2 * np.power(x * y * Mh, 2) / Q2 + N_2 = Gf**2 * Mh * yp / ( 2 * np.pi * np.power( 1 + Q2 / MW2, 2) ) + N_L = - N_2 * yL / yp - if "CHORUS_CC" in experiment_name: - yL = np.power(y, 2) - Gf = 1.1663787e-05 - Mh = 0.938 - MW2 = 80.398 ** 2 - yp = 1 + np.power(1 - y, 2) - 2 * np.power(x * y * Mh, 2) / Q2 - N_2 = Gf**2 * Mh * yp / ( 2 * np.pi * np.power( 1 + Q2 / MW2, 2) ) - N_L = - N_2 * yL / yp + elif "F2" in experiment_name: + N_2 = np.ones(shape=x.shape) + N_L = np.zeros(shape=x.shape) + + else: + raise ValueError(f"The normalisation for the observable is not known.") return N_2, N_L @@ -683,3 +750,4 @@ def experimentplustheory_corrmat_custom(procs_covmat, theory_covmat_custom): each_dataset_results = collect(results, ("group_dataset_inputs_by_process", "data")) +groups_data_by_process = collect("data", ("group_dataset_inputs_by_process",)) \ No newline at end of file From 56d60cf5fd8e5ce3a839960b941134068d7222c1 Mon Sep 17 00:00:00 2001 From: achiefa Date: Tue, 16 Jul 2024 09:12:36 +0100 Subject: [PATCH 07/69] Implemented d/p ratio --- .../theorycovariance/construction.py | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index 74249eb632..01b48e458d 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -157,7 +157,6 @@ def thcov_ht(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, kin_dict['Q2'] = process_info.data[proc].T[1][running_index_proc - size : running_index_proc] kin_dict['y']= process_info.data[proc].T[2][running_index_proc - size : running_index_proc] kin_size = kin_dict['x'].size - print(exp) target = extract_target(dataset) # Loop over the parameter @@ -168,18 +167,18 @@ def thcov_ht(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, deltas[f"p(0,{i+1}+)"] += [PC_L] deltas[f"d({i+1}+,0)"] += [np.zeros(kin_size)] deltas[f"d(0,{i+1}+)"] += [np.zeros(kin_size)] - elif target == 'deuterium': + elif target == 'deuteron': deltas[f"p({i+1}+,0)"] += [np.zeros(kin_size)] deltas[f"p(0,{i+1}+)"] += [np.zeros(kin_size)] deltas[f"d({i+1}+,0)"] += [PC_2] deltas[f"d(0,{i+1}+)"] += [PC_L] elif target == 'ratio': - - compute_ratio_delta(dataset, pdf, "d") - deltas[f"p({i+1}+,0)"] += [PC_2] - deltas[f"p(0,{i+1}+)"] += [PC_L] - deltas[f"d({i+1}+,0)"] += [PC_2] - deltas[f"d(0,{i+1}+)"] += [PC_L] + deltas[f"p({i+1}+,0)"] += [compute_ratio_delta(dataset, pdf, "p", PC_2)] + deltas[f"p(0,{i+1}+)"] += [compute_ratio_delta(dataset, pdf, "p", PC_L)] + deltas[f"d({i+1}+,0)"] += [compute_ratio_delta(dataset, pdf, "d", PC_2)] + deltas[f"d(0,{i+1}+)"] += [compute_ratio_delta(dataset, pdf, "d", PC_L)] + else: + raise ValueError("Could not detect target.") # Construct theory covmat covmats = defaultdict(list) @@ -192,7 +191,6 @@ def thcov_ht(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, s += np.outer(deltas[par][i], deltas[par][j]) start_locs = (start_proc_by_exp[exp1], start_proc_by_exp[exp2]) covmats[start_locs] = s - import ipdb; ipdb.set_trace() return covmats @@ -210,7 +208,7 @@ def extract_target(dataset): raise ValueError(f"Unexpected operator in {dataset.name}: {dataset.op}") -def compute_ratio_delta(dataset, pdf: PDF, target, PC: np.array): +def compute_ratio_delta(dataset, pdf: PDF, target, PC: np.array) -> np.array: """This function computes the predictions as in validphys.convolution._predictions, but for ratio and including higher twist terms in bot NUM and """ opfunc = operator.truediv @@ -220,11 +218,10 @@ def compute_ratio_delta(dataset, pdf: PDF, target, PC: np.array): fk_w_cuts = fk.load_with_cuts(cuts) tmp = central_fk_predictions(fk_w_cuts, pdf) all_predictions.append(np.concatenate(tmp.values)) - import ipdb; ipdb.set_trace() if target == "d": all_predictions[0] += PC if target == "p": - all_predictions[1] += PC + all_predictions[1] += PC return opfunc(*all_predictions) From c58758ab9df3a673d2195d0b0c0dfa1571174a00 Mon Sep 17 00:00:00 2001 From: achiefa Date: Tue, 16 Jul 2024 09:34:09 +0100 Subject: [PATCH 08/69] Parsing 'separate_multiplicative' in vp_setupfit --- n3fit/src/n3fit/scripts/vp_setupfit.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/n3fit/src/n3fit/scripts/vp_setupfit.py b/n3fit/src/n3fit/scripts/vp_setupfit.py index d63429cc70..0d1f60ad4f 100644 --- a/n3fit/src/n3fit/scripts/vp_setupfit.py +++ b/n3fit/src/n3fit/scripts/vp_setupfit.py @@ -191,8 +191,10 @@ def from_yaml(cls, o, *args, **kwargs): # Check positivity bound if file_content.get('positivity_bound') is not None: SETUPFIT_FIXED_CONFIG['actions_'].append('positivity_bound check_unpolarized_bc') - - # Sets default values if they are not present in the runcard + if (sam_t0 := file_content.get('sampling')) is not None: + SETUPFIT_FIXED_CONFIG['separate_multiplicative'] = sam_t0.get( + 'separate_multiplicative', False + ) for k, v in SETUPFIT_DEFAULTS.items(): file_content.setdefault(k, v) From e4d9e6013b6fd57074868507bbf72792fd7cad7d Mon Sep 17 00:00:00 2001 From: achiefa Date: Tue, 16 Jul 2024 14:37:54 +0100 Subject: [PATCH 09/69] Minor adjustments --- .../src/validphys/theorycovariance/construction.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index 01b48e458d..9eee16c846 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -116,7 +116,11 @@ def combine_by_type_ht(each_dataset_results, groups_dataset_inputs_loaded_cd_wit def thcov_ht(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, reverse=False): - "Same as `thcov_HT` but implementing theory covariance method for each node of the spline." + """ + Same as `thcov_HT` but implementing theory covariance method for each node of the spline. + Note that 'groups_data_by_process' contains the same info as 'combine_by_type_ht'. At some + point we should use only one of them. + """ process_info = combine_by_type_ht running_index_tot = 0 start_proc_by_exp = defaultdict(list) @@ -140,7 +144,6 @@ def thcov_ht(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, for i_proc, proc in enumerate(process_info.namelist.keys()): running_index_proc = 0 - kin_dict = {} for i_exp, exp in enumerate(process_info.namelist[proc]): # Locate position of the experiment @@ -149,13 +152,14 @@ def thcov_ht(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, start_proc_by_exp[exp] = running_index_tot running_index_tot += size running_index_proc += size + kin_dict = {} # Compute shifts only for a subset of processes if proc in included_proc and exp in included_exp[proc]: #central = process_info.preds[proc][1][start_proc_by_exp[exp] : size] # Probably this is deprecated - kin_dict['x'] = process_info.data[proc].T[0][running_index_proc - size : running_index_proc] - kin_dict['Q2'] = process_info.data[proc].T[1][running_index_proc - size : running_index_proc] - kin_dict['y']= process_info.data[proc].T[2][running_index_proc - size : running_index_proc] + kin_dict['x'] = process_info.data[proc].T[0][running_index_proc - size : running_index_proc] + kin_dict['Q2'] = process_info.data[proc].T[1][running_index_proc - size : running_index_proc] + kin_dict['y'] = process_info.data[proc].T[2][running_index_proc - size : running_index_proc] kin_size = kin_dict['x'].size target = extract_target(dataset) From f11b466043e9991a5f6be83389863ec361217919 Mon Sep 17 00:00:00 2001 From: achiefa Date: Tue, 16 Jul 2024 14:48:01 +0100 Subject: [PATCH 10/69] Corrected bug --- .../src/validphys/theorycovariance/construction.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index 9eee16c846..4eb02ec4d4 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -177,10 +177,10 @@ def thcov_ht(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, deltas[f"d({i+1}+,0)"] += [PC_2] deltas[f"d(0,{i+1}+)"] += [PC_L] elif target == 'ratio': - deltas[f"p({i+1}+,0)"] += [compute_ratio_delta(dataset, pdf, "p", PC_2)] - deltas[f"p(0,{i+1}+)"] += [compute_ratio_delta(dataset, pdf, "p", PC_L)] - deltas[f"d({i+1}+,0)"] += [compute_ratio_delta(dataset, pdf, "d", PC_2)] - deltas[f"d(0,{i+1}+)"] += [compute_ratio_delta(dataset, pdf, "d", PC_L)] + deltas[f"p({i+1}+,0)"] += [compute_ratio_delta(dataset, pdf, "p", PC_2) - compute_ratio_delta(dataset, pdf)] + deltas[f"p(0,{i+1}+)"] += [compute_ratio_delta(dataset, pdf, "p", PC_L) - compute_ratio_delta(dataset, pdf)] + deltas[f"d({i+1}+,0)"] += [compute_ratio_delta(dataset, pdf, "d", PC_2) - compute_ratio_delta(dataset, pdf)] + deltas[f"d(0,{i+1}+)"] += [compute_ratio_delta(dataset, pdf, "d", PC_L) - compute_ratio_delta(dataset, pdf)] else: raise ValueError("Could not detect target.") @@ -212,7 +212,7 @@ def extract_target(dataset): raise ValueError(f"Unexpected operator in {dataset.name}: {dataset.op}") -def compute_ratio_delta(dataset, pdf: PDF, target, PC: np.array) -> np.array: +def compute_ratio_delta(dataset, pdf: PDF, target = None, PC: np.array = None) -> np.array: """This function computes the predictions as in validphys.convolution._predictions, but for ratio and including higher twist terms in bot NUM and """ opfunc = operator.truediv @@ -224,7 +224,7 @@ def compute_ratio_delta(dataset, pdf: PDF, target, PC: np.array) -> np.array: all_predictions.append(np.concatenate(tmp.values)) if target == "d": all_predictions[0] += PC - if target == "p": + elif target == "p": all_predictions[1] += PC return opfunc(*all_predictions) From ad050da272e2bb3e7eb24d5a59afc75fd6760ffa Mon Sep 17 00:00:00 2001 From: achiefa Date: Wed, 17 Jul 2024 15:24:46 +0100 Subject: [PATCH 11/69] Correcting bug --- validphys2/src/validphys/theorycovariance/construction.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index 4eb02ec4d4..874c62138b 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -165,7 +165,7 @@ def thcov_ht(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, # Loop over the parameter for i in range(len(x_abmp)): - PC_2, PC_L = compute_ht_parametrisation(i, x_abmp, kin_dict, exp, H2_list, HL_list) + PC_2, PC_L = compute_ht_parametrisation(i, x_abmp, kin_dict, exp, H2_list, HL_list, reverse=reverse) if target == 'proton': deltas[f"p({i+1}+,0)"] += [PC_2] deltas[f"p(0,{i+1}+)"] += [PC_L] @@ -201,9 +201,9 @@ def thcov_ht(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, def extract_target(dataset): if dataset.op == "NULL": if "_P_" in dataset.name or "HERA" in dataset.name: - return "proton" + return "proton" elif "_D_" in dataset.name: - return "deuteron" + return "deuteron" else: raise ValueError(f"No target detected for {dataset.name}") elif dataset.op == "RATIO": @@ -260,7 +260,7 @@ def compute_ht_parametrisation( N2, NL = compute_normalisation_by_experiment(exp, x, y, Q2) PC_2 = N2 * H_2(x) / Q2 - PC_L = NL * H_2(x) / Q2 + PC_L = NL * H_L(x) / Q2 return PC_2, PC_L From be18a4b7b36c871114dfba5113a0fef68954d122 Mon Sep 17 00:00:00 2001 From: achiefa Date: Mon, 5 Aug 2024 15:20:01 +0100 Subject: [PATCH 12/69] Implemented knots in runcard --- .../validphys/theorycovariance/construction.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index 874c62138b..0bcff2d707 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -115,7 +115,7 @@ def combine_by_type_ht(each_dataset_results, groups_dataset_inputs_loaded_cd_wit return process_info -def thcov_ht(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, reverse=False): +def thcov_ht(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, ht_knots = list(), reverse: bool = False): """ Same as `thcov_HT` but implementing theory covariance method for each node of the spline. Note that 'groups_data_by_process' contains the same info as 'combine_by_type_ht'. At some @@ -125,6 +125,7 @@ def thcov_ht(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, running_index_tot = 0 start_proc_by_exp = defaultdict(list) deltas = defaultdict(list) + x_knots = list() included_proc = ["DIS NC"] excluded_exp = {"DIS NC" : []} included_exp = {} @@ -135,11 +136,14 @@ def thcov_ht(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, aux.append(exp) included_exp[proc] = aux - # ABMP parametrisation and target masses - x_abmp = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1] + if len(ht_knots) == 0: + # ABMP parametrisation + x_knots = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1] + else: + x_knots = ht_knots # Check that H2_list and HL_list have the same size as x - if (len(H2_list) != len(x_abmp)) or (len(HL_list) != len(x_abmp)): + if (len(H2_list) != len(x_knots)) or (len(HL_list) != len(x_knots)): raise ValueError(f"The size of HT parameters does not match the number of nodes in the spline.") for i_proc, proc in enumerate(process_info.namelist.keys()): @@ -164,8 +168,8 @@ def thcov_ht(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, target = extract_target(dataset) # Loop over the parameter - for i in range(len(x_abmp)): - PC_2, PC_L = compute_ht_parametrisation(i, x_abmp, kin_dict, exp, H2_list, HL_list, reverse=reverse) + for i in range(len(x_knots)): + PC_2, PC_L = compute_ht_parametrisation(i, x_knots, kin_dict, exp, H2_list, HL_list, reverse=reverse) if target == 'proton': deltas[f"p({i+1}+,0)"] += [PC_2] deltas[f"p(0,{i+1}+)"] += [PC_L] From c6a1f4ef85336caa42004e044d6150eca330153c Mon Sep 17 00:00:00 2001 From: achiefa Date: Tue, 20 Aug 2024 11:13:40 +0100 Subject: [PATCH 13/69] Added valiphys card for chi2 report --- .../theory_covariance/chi2table_ht.yaml | 107 ++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 validphys2/examples/theory_covariance/chi2table_ht.yaml diff --git a/validphys2/examples/theory_covariance/chi2table_ht.yaml b/validphys2/examples/theory_covariance/chi2table_ht.yaml new file mode 100644 index 0000000000..e1327eebd8 --- /dev/null +++ b/validphys2/examples/theory_covariance/chi2table_ht.yaml @@ -0,0 +1,107 @@ +# This is the driver template for vp-comparefits. It consists on a validphys +# runcard where some settings are missing and are to be filled by the +# vp-comparefits script. The settings below are a sample of such settings, kept +# for reference +# +# meta: +# title: The title of the Report +# keywords: [report_template] +# author: NNPDF Collaboration +# +# current: +# fit: {id: id_of_the_base_fit} +# pdf: {id: id_of_the_base_fit, label: "Current Fit"} +# theory: +# from_: fit +# theoryid: +# from_: theory +# speclabel: "Current Fit" +# +# reference: +# fit: {id: id_of_the_reference_fit} +# pdf: {id: id_of_the_reference_fit, label: "Reference Fit" } +# theory: +# from_: fit +# theoryid: +# from_: theory +# speclabel: "Reference Fit" + +pdfs: + - {id: "240816-06-7-01-lc", label: "HT low cuts"} + - {id: "240812-02-ABMP-lnv", label: "HT mid cuts"} + - {id: "240812-04-ABMP-lnv", label: "HT std. cuts"} + - {id: "240819_nnpdf40_lowcuts", label: "no HYT low cuts"} + - {id: "240807-midcuts", label: "no HT mid cuts"} + - {id: "NNPDF40_nnlo_as_01180_qcd", label: "no HT std cuts (NNPDF40)"} + +fits: + - {id: "240816-06-7-01-lc", label: "HT low cuts"} + - {id: "240812-02-ABMP-lnv", label: "HT mid cuts"} + - {id: "240812-04-ABMP-lnv", label: "HT std. cuts"} + - {id: "240819_nnpdf40_lowcuts", label: "no HYT low cuts"} + - {id: "240807-midcuts", label: "no HT mid cuts"} + - {id: "NNPDF40_nnlo_as_01180_qcd", label: "no HT std cuts (NNPDF40)"} + +use_cuts: "fromfit" +use_weights_in_covmat: False +use_thcovmat_if_present: True + +Q: 1.651 + +#template: report.md + +description: + from_: fit + +dataset_inputs: + from_: fit + +#dataspecs: +# - theoryid: +# from_: current +# pdf: +# from_: current +# fit: +# from_: current +# speclabel: +# from_: current +# +# - theoryid: +# from_: reference +# pdf: +# from_: reference +# fit: +# from_: reference +# speclabel: +# from_: reference + +Datanorm: + normalize_to: data + +DataGroups: + - metadata_group: nnpdf31_process + - metadata_group: experiment + +ProcessGroup: + metadata_group: nnpdf31_process + +template_text: | + Summary + ------- + {@ summarise_fits @} + + {@with DataGroups@} + $\chi^2$ by {@processed_metadata_group@} + ---------------------------------------- + {@plot_fits_groups_data_chi2@} + {@endwith@} + + $\chi^2$ by dataset + ------------------- + ### Plot + {@plot_fits_datasets_chi2@} + ### Table + {@ProcessGroup fits_chi2_table(show_total=true)@} + +actions_: + - report(main=true) From e0c21cc863a48d2b53a863c53b809384eabdda8c Mon Sep 17 00:00:00 2001 From: achiefa Date: Fri, 23 Aug 2024 15:44:41 +0100 Subject: [PATCH 14/69] First implementation of HT at the level of theory predictions --- n3fit/src/n3fit/layers/DIS.py | 61 ++++++++++++++++++++++++++++++-- n3fit/src/n3fit/model_gen.py | 51 ++++++++++++++++++++++++-- n3fit/src/n3fit/model_trainer.py | 1 + 3 files changed, 108 insertions(+), 5 deletions(-) diff --git a/n3fit/src/n3fit/layers/DIS.py b/n3fit/src/n3fit/layers/DIS.py index 072d423e95..d7f15b24bd 100644 --- a/n3fit/src/n3fit/layers/DIS.py +++ b/n3fit/src/n3fit/layers/DIS.py @@ -23,9 +23,12 @@ """ import numpy as np +from scipy import interpolate as scint from n3fit.backends import operations as op +from validphys.theorycovariance.construction import compute_normalisation_by_experiment + from .observable import Observable @@ -39,6 +42,49 @@ class DIS(Observable): while the input pdf is rank 4 of shape (batch_size, replicas, xgrid, flavours) """ + def __init__(self, fktable_data, fktable_arr, dataset_name, boundary_condition=None, operation_name="NULL", nfl=14, n_replicas=1, exp_kinematics=None, **kwargs): + super().__init__(fktable_data, fktable_arr, dataset_name, boundary_condition, operation_name, nfl, n_replicas, **kwargs) + + self.power_corrections = None + if exp_kinematics is not None: + self.exp_kinematics = exp_kinematics + self.power_corrections = self.compute_abmp_parametrisation() + + def compute_abmp_parametrisation(self): + """ + This function is very similar to `compute_ht_parametrisation` in + validphys.theorycovariance.construction.py. However, the latter + accounts for shifts in the 5pt prescription. As of now, this function + is meant to work only for DIS NC data, using the ABMP16 result. + """ + x_knots = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1] + y_h2 = [0.023, -0.032, -0.005, 0.025, 0.051, 0.003, 0.0] + y_ht = [-0.319, -0.134, -0.052, 0.071, 0.030, 0.003, 0.0] + h2_sigma = [0.019, 0.013, 0.009, 0.006, 0.005, 0.004] + ht_sigma = [0.126, 0.040, 0.030, 0.025, 0.012, 0.007] + H_2 = scint.CubicSpline(x_knots, y_h2) + H_T = scint.CubicSpline(x_knots, y_ht) + + # Reconstruct HL from HT and H2 + def H_L(x): + return (H_2(x) - np.power(x, 0.05) * H_T(x)) + + H_2 = np.vectorize(H_2) + H_L = np.vectorize(H_L) + + x = self.exp_kinematics['kin1'] + y = self.exp_kinematics['kin3'] + Q2 = self.exp_kinematics['kin2'] + N2, NL = compute_normalisation_by_experiment(self.dataname, x, y, Q2) + + PC_2 = N2 * H_2(x) / Q2 + PC_L = NL * H_L(x) / Q2 + power_correction = PC_2 + PC_L + power_correction = power_correction.to_numpy() + + return power_correction + + def gen_mask(self, basis): """ Receives a list of active flavours and generates a boolean mask tensor @@ -85,7 +131,11 @@ def build(self, input_shape): if self.num_replicas > 1: self.compute_observable = compute_dis_observable_many_replica else: - self.compute_observable = compute_dis_observable_one_replica + # Currying the function so that the `Observable` does not need + # to get modified + def compute_dis_observable_one_replica_w_pc(pdf, padded_fk): + return compute_dis_observable_one_replica(pdf, padded_fk, power_corrections = self.power_corrections) + self.compute_observable = compute_dis_observable_one_replica_w_pc def compute_dis_observable_many_replica(pdf, padded_fk): @@ -107,9 +157,14 @@ def compute_dis_observable_many_replica(pdf, padded_fk): return op.einsum('brxf, nxf -> brn', pdf[0], padded_fk) -def compute_dis_observable_one_replica(pdf, padded_fk): +def compute_dis_observable_one_replica(pdf, padded_fk, power_corrections = None): """ Same operations as above but a specialized implementation that is more efficient for 1 replica, masking the PDF rather than the fk table. """ - return op.tensor_product(pdf[0], padded_fk, axes=[(2, 3), (1, 2)]) + if power_corrections is None: + + return op.tensor_product(pdf, padded_fk, axes=[(2, 3), (1, 2)]) + else: + + return op.tensor_product(pdf, padded_fk, axes=[(2, 3), (1, 2)]) + power_corrections diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py index 2e376254d5..27d55c1f1d 100644 --- a/n3fit/src/n3fit/model_gen.py +++ b/n3fit/src/n3fit/model_gen.py @@ -135,6 +135,7 @@ def observable_generator( positivity_initial=1.0, integrability=False, n_replicas=1, + exp_data=None ): # pylint: disable=too-many-locals """ This function generates the observable models for each experiment. @@ -196,10 +197,39 @@ def observable_generator( dataset_xsizes = [] model_inputs = [] model_observables = [] + #print_x_grid = np.array([]) for the NTK + + kin_by_dict = {} + if exp_data is not None: + included_processes = [ + 'DEUTERON', + 'NMC', + 'NUCLEAR' + ] + + for process in exp_data: + commondata = process.load_commondata() + for dataset in commondata: + if process.name in included_processes and "_NC_" in dataset.setname: + kin_by_dict[dataset.setname] = dataset.kinematics + else: + kin_by_dict[dataset.setname] = None + # The first step is to compute the observable for each of the datasets for dataset in spec_dict["datasets"]: # Get the generic information of the dataset dataset_name = dataset.name + kinematics = None + + if exp_data is not None: + if kin_by_dict[dataset_name] is not None: + kinematics = kin_by_dict[dataset_name] + + ########## For the NTK + #import ipdb; ipdb.set_trace() + #fktables_data = dataset.fktables_data[0] + #print_x_grid = np.concatenate((print_x_grid, fktables_data.xgrid)) + #print(print_x_grid.size) # Look at what kind of layer do we need for this dataset if dataset.hadronic: @@ -216,7 +246,9 @@ def observable_generator( # list of validphys.coredata.FKTableData objects # these will then be used to check how many different pdf inputs are needed # (and convolutions if given the case) - obs_layer = Obs_Layer( + # BAD IMPLEMENTATION, but effective + if dataset.hadronic: + obs_layer = Obs_Layer( dataset.fktables_data, dataset.fktables(), dataset_name, @@ -224,7 +256,18 @@ def observable_generator( operation_name, n_replicas=n_replicas, name=f"dat_{dataset_name}", - ) + ) + else: + obs_layer = Obs_Layer( + dataset.fktables_data, + dataset.fktables(), + dataset_name, + boundary_condition, + operation_name, + n_replicas=n_replicas, + name=f"dat_{dataset_name}", + exp_kinematics=kinematics, + ) # If the observable layer found that all input grids are equal, the splitting will be None # otherwise the different xgrids need to be stored separately @@ -240,6 +283,10 @@ def observable_generator( model_observables.append(obs_layer) + # Again, for the NTK + #with open(f'x_grid/xgrid_{spec_name}.npy', 'wb') as f: + # np.save(f, print_x_grid) + # Check whether all xgrids of all observables in this experiment are equal # if so, simplify the model input if is_unique(model_inputs): diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py index a8b7b95bee..2352558de4 100644 --- a/n3fit/src/n3fit/model_trainer.py +++ b/n3fit/src/n3fit/model_trainer.py @@ -576,6 +576,7 @@ def _generate_observables( invcovmat_tr=experiment_data["invcovmat"][i], invcovmat_vl=experiment_data["invcovmat_vl"][i], n_replicas=len(self.replicas), + exp_data=self.experiments_data ) # Save the input(s) corresponding to this experiment From 6fe197474d60322dc638490560531fe380222716 Mon Sep 17 00:00:00 2001 From: achiefa Date: Fri, 20 Sep 2024 14:18:09 +0100 Subject: [PATCH 15/69] Implemented table for kinematics --- validphys2/src/validphys/results.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/validphys2/src/validphys/results.py b/validphys2/src/validphys/results.py index bfc3279782..592a5d6552 100644 --- a/validphys2/src/validphys/results.py +++ b/validphys2/src/validphys/results.py @@ -279,8 +279,30 @@ def groups_index(groups_data, diagonal_basis=False): return df.index -def experiments_index(experiments_data, diagonal_basis=False): - return groups_index(experiments_data, diagonal_basis) +def group_kin_table_no_table(groups_data, groups_index): + """Generate a table containing the kinematics.""" + result_records = [] + for group_data in groups_data: + group_cd = group_data.load_commondata() + cd = np.concatenate( + [group_cd[i].get_kintable() for i in range(len(group_cd))], + axis=0 + ) + for index, dataset in enumerate(cd): + result_records.append( + dict([("kin_1", dataset[0]), ("kin_2", dataset[1]), ("kin_3", dataset[2])]) + ) + + if not result_records: + log.warning("Empty records for group results") + return pd.DataFrame() + df = pd.DataFrame(result_records, columns=result_records[0].keys(), index=groups_index) + + return df + + +def experiments_index(experiments_data): + return groups_index(experiments_data) def procs_index(procs_data): From bc58ac6780fb8263a0cb92dcccd1f75a92bd7921 Mon Sep 17 00:00:00 2001 From: achiefa Date: Sat, 21 Sep 2024 11:23:04 +0100 Subject: [PATCH 16/69] Allowed theory HT in runcard - added HERACOMB in HT calculations --- n3fit/src/n3fit/layers/DIS.py | 81 +++++++++++++++++++++++++++++--- n3fit/src/n3fit/model_gen.py | 37 +++++++-------- n3fit/src/n3fit/model_trainer.py | 12 ++++- n3fit/src/n3fit/performfit.py | 10 +++- 4 files changed, 111 insertions(+), 29 deletions(-) diff --git a/n3fit/src/n3fit/layers/DIS.py b/n3fit/src/n3fit/layers/DIS.py index d7f15b24bd..f263155d25 100644 --- a/n3fit/src/n3fit/layers/DIS.py +++ b/n3fit/src/n3fit/layers/DIS.py @@ -27,7 +27,7 @@ from n3fit.backends import operations as op -from validphys.theorycovariance.construction import compute_normalisation_by_experiment +from validphys.theorycovariance.construction import compute_normalisation_by_experiment, extract_target from .observable import Observable @@ -42,13 +42,37 @@ class DIS(Observable): while the input pdf is rank 4 of shape (batch_size, replicas, xgrid, flavours) """ - def __init__(self, fktable_data, fktable_arr, dataset_name, boundary_condition=None, operation_name="NULL", nfl=14, n_replicas=1, exp_kinematics=None, **kwargs): + def __init__(self, fktable_data, + fktable_arr, + dataset_name, + boundary_condition=None, + operation_name="NULL", + nfl=14, + n_replicas=1, + power_corrections=False, + ht_type=None, + exp_kinematics=None, + **kwargs): super().__init__(fktable_data, fktable_arr, dataset_name, boundary_condition, operation_name, nfl, n_replicas, **kwargs) + self.compute_power_corrections = power_corrections self.power_corrections = None - if exp_kinematics is not None: + + import logging + if self.compute_power_corrections and exp_kinematics is not None: self.exp_kinematics = exp_kinematics - self.power_corrections = self.compute_abmp_parametrisation() + if ht_type is None: + self.ht_type = 'ABMP' + else: + self.ht_type = ht_type + + if self.ht_type == 'ABMP': + self.power_corrections = self.compute_abmp_parametrisation() + elif self.ht_type == 'custom': + self.power_corrections = self.compute_custom_parametrisation() + else: + raise Exception(f"HT type {ht_type} is not implemented.") + def compute_abmp_parametrisation(self): """ @@ -60,8 +84,8 @@ def compute_abmp_parametrisation(self): x_knots = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1] y_h2 = [0.023, -0.032, -0.005, 0.025, 0.051, 0.003, 0.0] y_ht = [-0.319, -0.134, -0.052, 0.071, 0.030, 0.003, 0.0] - h2_sigma = [0.019, 0.013, 0.009, 0.006, 0.005, 0.004] - ht_sigma = [0.126, 0.040, 0.030, 0.025, 0.012, 0.007] + #h2_sigma = [0.019, 0.013, 0.009, 0.006, 0.005, 0.004] + #ht_sigma = [0.126, 0.040, 0.030, 0.025, 0.012, 0.007] H_2 = scint.CubicSpline(x_knots, y_h2) H_T = scint.CubicSpline(x_knots, y_ht) @@ -83,6 +107,51 @@ def H_L(x): power_correction = power_correction.to_numpy() return power_correction + + + def compute_custom_parametrisation(self): + """ + This function is very similar to `compute_ht_parametrisation` in + validphys.theorycovariance.construction.py. However, the latter + accounts for shifts in the 5pt prescription. As of now, this function + is meant to work only for DIS NC data, using the ABMP16 result. + """ + x_knots = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1] + y_h2_p = [-0.00441, 0.11169, -0.01632, 0.00000, -0.08742, -0.07279, 0.00000] + y_hl_p = [0.00000, -0.06241, -0.08655, -0.03306, 0.00000, -0.05987, 0.0000] + y_h2_d = [-0.04117, 0.00000, 0.03124, -0.01059, 0.04763, 0.00000, 0.00000] + y_hl_d = [0.00316, 0.00469, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] + + H_2p = scint.CubicSpline(x_knots, y_h2_p) + H_lp = scint.CubicSpline(x_knots, y_hl_p) + H_2d = scint.CubicSpline(x_knots, y_h2_d) + H_ld = scint.CubicSpline(x_knots, y_hl_d) + + H_2p = np.vectorize(H_2p) + H_lp = np.vectorize(H_lp) + H_2d = np.vectorize(H_2d) + H_ld = np.vectorize(H_ld) + + x = self.exp_kinematics['kin1'] + y = self.exp_kinematics['kin3'] + Q2 = self.exp_kinematics['kin2'] + N2, NL = compute_normalisation_by_experiment(self.dataname, x, y, Q2) + + target = extract_target(self.dataname) + import ipnb; ipnb.set_trace() + if target == 'proton': + PC_2 = N2 * H_2p(x) / Q2 + PC_L = NL * H_lp(x) / Q2 + elif target == 'deuteron': + PC_2 = N2 * H_2d(x) / Q2 + PC_L = NL * H_ld(x) / Q2 + else: + raise Exception("Target is not known") + + power_correction = PC_2 + PC_L + power_correction = power_correction.to_numpy() + + return power_correction def gen_mask(self, basis): diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py index 27d55c1f1d..b45fbbe61e 100644 --- a/n3fit/src/n3fit/model_gen.py +++ b/n3fit/src/n3fit/model_gen.py @@ -135,7 +135,9 @@ def observable_generator( positivity_initial=1.0, integrability=False, n_replicas=1, - exp_data=None + exp_data=None, + power_corrections=None, + ht_type=None ): # pylint: disable=too-many-locals """ This function generates the observable models for each experiment. @@ -182,6 +184,10 @@ def observable_generator( set the positivity lagrange multiplier for epoch 1 integrability: bool switch on/off the integrability constraints + power_corrections: bool + whether to include HT in theory predictions + ht_type: str + type of HT parametrisation Returns ------ @@ -197,16 +203,15 @@ def observable_generator( dataset_xsizes = [] model_inputs = [] model_observables = [] - #print_x_grid = np.array([]) for the NTK - kin_by_dict = {} + if exp_data is not None: included_processes = [ 'DEUTERON', 'NMC', - 'NUCLEAR' + 'NUCLEAR', + 'HERACOMB', ] - for process in exp_data: commondata = process.load_commondata() for dataset in commondata: @@ -221,21 +226,14 @@ def observable_generator( dataset_name = dataset.name kinematics = None - if exp_data is not None: - if kin_by_dict[dataset_name] is not None: - kinematics = kin_by_dict[dataset_name] - - ########## For the NTK - #import ipdb; ipdb.set_trace() - #fktables_data = dataset.fktables_data[0] - #print_x_grid = np.concatenate((print_x_grid, fktables_data.xgrid)) - #print(print_x_grid.size) - # Look at what kind of layer do we need for this dataset if dataset.hadronic: Obs_Layer = DY else: Obs_Layer = DIS + if power_corrections: + if exp_data is not None and kin_by_dict[dataset_name] is not None: + kinematics = kin_by_dict[dataset_name] # Set the operation (if any) to be applied to the fktables of this dataset operation_name = dataset.operation @@ -246,7 +244,6 @@ def observable_generator( # list of validphys.coredata.FKTableData objects # these will then be used to check how many different pdf inputs are needed # (and convolutions if given the case) - # BAD IMPLEMENTATION, but effective if dataset.hadronic: obs_layer = Obs_Layer( dataset.fktables_data, @@ -266,7 +263,9 @@ def observable_generator( operation_name, n_replicas=n_replicas, name=f"dat_{dataset_name}", - exp_kinematics=kinematics, + power_corrections=power_corrections, + exp_kinematics=kinematics if power_corrections else None, + ht_type=None if not power_corrections else ht_type ) # If the observable layer found that all input grids are equal, the splitting will be None @@ -283,10 +282,6 @@ def observable_generator( model_observables.append(obs_layer) - # Again, for the NTK - #with open(f'x_grid/xgrid_{spec_name}.npy', 'wb') as f: - # np.save(f, print_x_grid) - # Check whether all xgrids of all observables in this experiment are equal # if so, simplify the model input if is_unique(model_inputs): diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py index 2352558de4..f1261514ea 100644 --- a/n3fit/src/n3fit/model_trainer.py +++ b/n3fit/src/n3fit/model_trainer.py @@ -111,6 +111,8 @@ def __init__( theoryid=None, lux_params=None, replicas=None, + power_corrections=False, + ht_type=None ): """ Parameters @@ -151,6 +153,10 @@ def __init__( if not give, the photon is not generated replicas: list list with the replicas ids to be fitted + power_corrections: bool + whether to include HT in theory predictions + ht_type: str + type of HT parametrisation """ # Save all input information self.exp_info = list(exp_info) @@ -167,6 +173,8 @@ def __init__( self.lux_params = lux_params self.replicas = replicas self.experiments_data = experiments_data + self.power_corrections = power_corrections + self.ht_type = ht_type # Initialise internal variables which define behaviour if debug: @@ -576,7 +584,9 @@ def _generate_observables( invcovmat_tr=experiment_data["invcovmat"][i], invcovmat_vl=experiment_data["invcovmat_vl"][i], n_replicas=len(self.replicas), - exp_data=self.experiments_data + exp_data=self.experiments_data, + power_corrections=self.power_corrections, + ht_type=self.ht_type ) # Save the input(s) corresponding to this experiment diff --git a/n3fit/src/n3fit/performfit.py b/n3fit/src/n3fit/performfit.py index e0fd1af9b5..1c9f9452e3 100644 --- a/n3fit/src/n3fit/performfit.py +++ b/n3fit/src/n3fit/performfit.py @@ -41,7 +41,9 @@ def performfit( debug=False, maxcores=None, double_precision=False, - parallel_models=True, + parallel_models=False, + power_corrections=False, + ht_type=None ): """ This action will (upon having read a validcard) process a full PDF fit @@ -128,6 +130,10 @@ def performfit( whether to use double precision parallel_models: bool whether to run models in parallel + power_corrections: bool + whether to include HT in theory predictions + ht_type: str + Type of HT parametrisation """ from n3fit.backends import set_initial_state @@ -197,6 +203,8 @@ def performfit( theoryid=theoryid, lux_params=fiatlux, replicas=replica_idxs, + power_corrections=power_corrections, + ht_type=ht_type ) # This is just to give a descriptive name to the fit function From 754a9166e43cb065c23c2d91178c38c78528ead2 Mon Sep 17 00:00:00 2001 From: achiefa Date: Sat, 21 Sep 2024 13:26:02 +0100 Subject: [PATCH 17/69] Excluded HERACOMB --- n3fit/src/n3fit/model_gen.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py index b45fbbe61e..b324727a55 100644 --- a/n3fit/src/n3fit/model_gen.py +++ b/n3fit/src/n3fit/model_gen.py @@ -210,7 +210,7 @@ def observable_generator( 'DEUTERON', 'NMC', 'NUCLEAR', - 'HERACOMB', + #'HERACOMB', ] for process in exp_data: commondata = process.load_commondata() From 525d23949842f487aa7c7a6b5e0a4f47f5bf527b Mon Sep 17 00:00:00 2001 From: achiefa Date: Sat, 21 Sep 2024 13:26:53 +0100 Subject: [PATCH 18/69] Hacking NMC dataset --- n3fit/src/n3fit/layers/DIS.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/n3fit/src/n3fit/layers/DIS.py b/n3fit/src/n3fit/layers/DIS.py index f263155d25..96e0f67536 100644 --- a/n3fit/src/n3fit/layers/DIS.py +++ b/n3fit/src/n3fit/layers/DIS.py @@ -58,7 +58,6 @@ def __init__(self, fktable_data, self.compute_power_corrections = power_corrections self.power_corrections = None - import logging if self.compute_power_corrections and exp_kinematics is not None: self.exp_kinematics = exp_kinematics if ht_type is None: @@ -116,6 +115,7 @@ def compute_custom_parametrisation(self): accounts for shifts in the 5pt prescription. As of now, this function is meant to work only for DIS NC data, using the ABMP16 result. """ + # Posteriors from 240812-01-ABMP-large-prior-7k x_knots = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1] y_h2_p = [-0.00441, 0.11169, -0.01632, 0.00000, -0.08742, -0.07279, 0.00000] y_hl_p = [0.00000, -0.06241, -0.08655, -0.03306, 0.00000, -0.05987, 0.0000] @@ -137,16 +137,17 @@ def compute_custom_parametrisation(self): Q2 = self.exp_kinematics['kin2'] N2, NL = compute_normalisation_by_experiment(self.dataname, x, y, Q2) - target = extract_target(self.dataname) - import ipnb; ipnb.set_trace() - if target == 'proton': + if "_P_" in self.dataname or "HERA" in self.dataname: PC_2 = N2 * H_2p(x) / Q2 PC_L = NL * H_lp(x) / Q2 - elif target == 'deuteron': + elif "_D_" in self.dataname: PC_2 = N2 * H_2d(x) / Q2 PC_L = NL * H_ld(x) / Q2 else: - raise Exception("Target is not known") + # TODO + # Need to implement this + PC_2 = 0 / Q2 #N2 * H_2d(x) / Q2 + PC_L = 0 / Q2 #NL * H_ld(x) / Q2 power_correction = PC_2 + PC_L power_correction = power_correction.to_numpy() From ab292f4cd337d5094e7e0681fd20d6c0100c898e Mon Sep 17 00:00:00 2001 From: achiefa Date: Thu, 26 Sep 2024 14:37:59 +0200 Subject: [PATCH 19/69] Grouping kinematics --- validphys2/src/validphys/results.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/validphys2/src/validphys/results.py b/validphys2/src/validphys/results.py index 592a5d6552..2b2af3dbba 100644 --- a/validphys2/src/validphys/results.py +++ b/validphys2/src/validphys/results.py @@ -239,6 +239,7 @@ def data_index(data): experiments_data = collect("data", ("group_dataset_inputs_by_experiment",)) +# NOTE: Same a `groups_data_by_process` in `construction.py` procs_data = collect("data", ("group_dataset_inputs_by_process",)) @@ -280,17 +281,24 @@ def groups_index(groups_data, diagonal_basis=False): def group_kin_table_no_table(groups_data, groups_index): - """Generate a table containing the kinematics.""" + """Generate a table containing the kinematics and the process_type.""" result_records = [] for group_data in groups_data: group_cd = group_data.load_commondata() cd = np.concatenate( - [group_cd[i].get_kintable() for i in range(len(group_cd))], + [group_cd[i].commondata_table[['kin1','kin2','kin3','process']] for i in range(len(group_cd))], axis=0 ) for index, dataset in enumerate(cd): + try: + process_name = dataset[3].name + except AttributeError: + process_name = dataset[3] result_records.append( - dict([("kin_1", dataset[0]), ("kin_2", dataset[1]), ("kin_3", dataset[2])]) + dict([("kin_1", dataset[0]), + ("kin_2", dataset[1]), + ("kin_3", dataset[2]), + ("process_type", process_name)]) ) if not result_records: From 576bad186a9558882c0a6a25eb5146b5821f7630 Mon Sep 17 00:00:00 2001 From: achiefa Date: Thu, 26 Sep 2024 14:40:38 +0200 Subject: [PATCH 20/69] Reimplementing thcovmat --- .../theorycovariance/construction.py | 149 +++++++++++++++++- 1 file changed, 147 insertions(+), 2 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index 0bcff2d707..3f924d0a44 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -56,7 +56,7 @@ def theory_covmat_dataset(results, results_central_bytheoryids, point_prescripti def combine_by_type(each_dataset_results_central_bytheory): - """Groups the datasets bu process and returns an instance of the ProcessInfo class + """Groups the datasets by process and returns an instance of the ProcessInfo class Parameters ---------- @@ -115,7 +115,152 @@ def combine_by_type_ht(each_dataset_results, groups_dataset_inputs_loaded_cd_wit return process_info -def thcov_ht(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, ht_knots = list(), reverse: bool = False): +def thcov_ht(H2_list, + HL_list, + groups_data_by_process, + pdf, + ht_knots = list()): + """ + Same as `thcov_HT` but implementing theory covariance method for each node of the spline. + Note that 'groups_data_by_process' contains the same info as 'combine_by_type_ht'. At some + point we should use only one of them. + """ + groups_data = groups_data_by_process + x_knots = list() + start_proc_by_exp = defaultdict(list) + ndata_by_exp = defaultdict(list) + running_index_tot = 0 + included_proc = ["DIS NC"] + excluded_exp = {"DIS NC" : []} + deltas = defaultdict(list) + + for i in range(len(x_knots)): + deltas[f"p({i+1}+,0)"] = np.array([]) + deltas[f"p(0,{i+1}+)"] = np.array([]) + deltas[f"d({i+1}+,0)"] = np.array([]) + deltas[f"d(0,{i+1}+)"] = np.array([]) + + if len(ht_knots) == 0: + # ABMP parametrisation + x_knots = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1] + else: + x_knots = ht_knots + + # Check that H2_list and HL_list have the same size as x + if (len(H2_list) != len(x_knots)) or (len(HL_list) != len(x_knots)): + raise ValueError(f"The size of HT parameters does not match the number of nodes in the spline.") + + def ht_parametrisation( + index: int, + nodes: list, + x: list, + Q2: list, + h_prior: list, + reverse: bool = False + ): + if not reverse: + shifted_H_list = [0 for k in range(len(nodes))] + shifted_H_list[index] = h_prior[index] + else: + shifted_H_list = h_prior.copy() + shifted_H_list[index] = 0 + + H = scint.CubicSpline(nodes, shifted_H_list) + H = np.vectorize(H) + + PC = H(x) / Q2 + return PC + + for idx_proc, group_proc in enumerate(groups_data): + for idx_exp, exp_set in enumerate(group_proc.datasets): + # For covmat construction + exp_ndata = exp_set.load_commondata().ndata + start_proc_by_exp[exp_set.name] = running_index_tot + running_index_tot += exp_ndata + ndata_by_exp[exp_set.name] = exp_ndata + + if group_proc.name in included_proc and exp_set.name not in excluded_exp[group_proc.name]: + cd_table = exp_set.load_commondata().commondata_table + process_type = cd_table['process'].iloc[0] + x = cd_table['kin1'].to_numpy() + q2 = cd_table['kin2'].to_numpy() + y = cd_table['kin3'].to_numpy() + if x.size != exp_ndata: + raise ValueError("Problem with the number of data.") + + for i in range(len(x_knots)): + if process_type == "DIS_F2R": + cuts = exp_set.cuts + fkspec_F2D, fkspec_F2P = exp_set.fkspecs + fk_F2D = fkspec_F2D.load_with_cuts(cuts) + fk_F2P = fkspec_F2P.load_with_cuts(cuts) + F2D = central_fk_predictions(fk_F2D, pdf) + F2P = central_fk_predictions(fk_F2P, pdf) + + F2D = np.concatenate(F2D.values) + F2P = np.concatenate(F2P.values) + F2_ratio = operator.truediv(F2D, F2P) + PC = ht_parametrisation(i, x_knots, x, q2, H2_list) + + # NOTE + # Find a better way to store deltas + PC_2_p = np.array(operator.truediv(F2D, np.sum([F2P, PC],axis=0)) - F2_ratio) + PC_2_d = np.array(operator.truediv(F2D, np.sum([F2D, PC],axis=0)) - F2_ratio) + PC_L_p = np.zeros(F2_ratio.size) + PC_L_d = np.zeros(F2_ratio.size) + + + elif process_type == "DIS_F2P": + PC_2_p = ht_parametrisation(i, x_knots, x, q2, H2_list) + PC_2_d = np.zeros(exp_ndata) + PC_L_p = np.zeros(exp_ndata) + PC_L_d = np.zeros(exp_ndata) + + elif process_type == "DIS_F2D": + PC_2_p = np.zeros(exp_ndata) + PC_2_d = ht_parametrisation(i, x_knots, x, q2, H2_list) + PC_L_p = np.zeros(exp_ndata) + PC_L_d = np.zeros(exp_ndata) + + elif process_type == "DIS_NCE" or "DIS_NCP": + yp = 1 + np.power(1 - y, 2) + yL = np.power(y, 2) + N_L = - yL / yp + PC_2_p = ht_parametrisation(i, x_knots, x, q2, H2_list) + PC_2_d = np.zeros(exp_ndata) + PC_L_p = ht_parametrisation(i, x_knots, x, q2, HL_list) + PC_L_d = np.zeros(exp_ndata) + + else: + raise Exception(f"The process type `{process_type}` has not been implemented.") + + deltas[f"p({i+1}+,0)"] = np.append(deltas[f"p({i+1}+,0)"], PC_2_p) + deltas[f"p(0,{i+1}+)"] = np.append(deltas[f"p(0,{i+1}+)"], PC_2_d) + deltas[f"d({i+1}+,0)"] = np.append(deltas[f"d({i+1}+,0)"], PC_L_p) + deltas[f"d(0,{i+1}+)"] = np.append(deltas[f"d(0,{i+1}+)"], PC_L_d) + + else: + for i in range(len(x_knots)): + deltas[f"p({i+1}+,0)"] = np.append(deltas[f"p({i+1}+,0)"], np.zeros(exp_ndata)) + deltas[f"p(0,{i+1}+)"] = np.append(deltas[f"p(0,{i+1}+)"], np.zeros(exp_ndata)) + deltas[f"d({i+1}+,0)"] = np.append(deltas[f"d({i+1}+,0)"], np.zeros(exp_ndata)) + deltas[f"d(0,{i+1}+)"] = np.append(deltas[f"d(0,{i+1}+)"], np.zeros(exp_ndata)) + + # Construct the covariance matrix + covmats = defaultdict(list) + for exp_name_1, exp_idx_1 in start_proc_by_exp.items(): + for exp_name_2, exp_idx_2 in start_proc_by_exp.items(): + s = np.zeros(shape=(ndata_by_exp[exp_name_1], ndata_by_exp[exp_name_2])) + for shifts in deltas.keys(): + s += np.outer(deltas[shifts][exp_idx_1: exp_idx_1 + ndata_by_exp[exp_name_1]], + deltas[shifts][exp_idx_2: exp_idx_2 + ndata_by_exp[exp_name_2]]) + + start_locs = (exp_idx_1, exp_idx_2) + covmats[start_locs] = s + return covmats + + +def thcov_ht_old(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, ht_knots = list(), reverse: bool = False): """ Same as `thcov_HT` but implementing theory covariance method for each node of the spline. Note that 'groups_data_by_process' contains the same info as 'combine_by_type_ht'. At some From 8b9de4b10330ffba61d3f0bd03fe5b088b45835e Mon Sep 17 00:00:00 2001 From: achiefa Date: Fri, 27 Sep 2024 08:25:38 +0200 Subject: [PATCH 21/69] Added comment in HT for DIS --- n3fit/src/n3fit/layers/DIS.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/n3fit/src/n3fit/layers/DIS.py b/n3fit/src/n3fit/layers/DIS.py index 96e0f67536..7830c1dec8 100644 --- a/n3fit/src/n3fit/layers/DIS.py +++ b/n3fit/src/n3fit/layers/DIS.py @@ -58,6 +58,8 @@ def __init__(self, fktable_data, self.compute_power_corrections = power_corrections self.power_corrections = None + # NOTE + # Ratio of SFs are not implemented yet. Work in progress. if self.compute_power_corrections and exp_kinematics is not None: self.exp_kinematics = exp_kinematics if ht_type is None: From e403c61af5555978c26f3eec00f95841766f3121 Mon Sep 17 00:00:00 2001 From: achiefa Date: Fri, 27 Sep 2024 10:57:25 +0200 Subject: [PATCH 22/69] Corrected normalisation for SIGMARED DIS NC data sets --- validphys2/src/validphys/theorycovariance/construction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index 3f924d0a44..d2b9282934 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -228,7 +228,7 @@ def ht_parametrisation( N_L = - yL / yp PC_2_p = ht_parametrisation(i, x_knots, x, q2, H2_list) PC_2_d = np.zeros(exp_ndata) - PC_L_p = ht_parametrisation(i, x_knots, x, q2, HL_list) + PC_L_p = N_L * ht_parametrisation(i, x_knots, x, q2, HL_list) PC_L_d = np.zeros(exp_ndata) else: From 5040fb0aa64cbdbfc46e454367c9ddd84b752b28 Mon Sep 17 00:00:00 2001 From: achiefa Date: Sun, 29 Sep 2024 23:32:00 +0200 Subject: [PATCH 23/69] Removing unused code --- .../theorycovariance/construction.py | 105 +----------------- 1 file changed, 1 insertion(+), 104 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index d2b9282934..0605f5fed4 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -159,7 +159,7 @@ def ht_parametrisation( reverse: bool = False ): if not reverse: - shifted_H_list = [0 for k in range(len(nodes))] + shifted_H_list = [0 for _ in range(len(nodes))] shifted_H_list[index] = h_prior[index] else: shifted_H_list = h_prior.copy() @@ -347,109 +347,6 @@ def thcov_ht_old(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, p return covmats -def extract_target(dataset): - if dataset.op == "NULL": - if "_P_" in dataset.name or "HERA" in dataset.name: - return "proton" - elif "_D_" in dataset.name: - return "deuteron" - else: - raise ValueError(f"No target detected for {dataset.name}") - elif dataset.op == "RATIO": - return "ratio" - else: - raise ValueError(f"Unexpected operator in {dataset.name}: {dataset.op}") - - -def compute_ratio_delta(dataset, pdf: PDF, target = None, PC: np.array = None) -> np.array: - """This function computes the predictions as in validphys.convolution._predictions, - but for ratio and including higher twist terms in bot NUM and """ - opfunc = operator.truediv - cuts = dataset.cuts - all_predictions = [] - for fk in dataset.fkspecs: - fk_w_cuts = fk.load_with_cuts(cuts) - tmp = central_fk_predictions(fk_w_cuts, pdf) - all_predictions.append(np.concatenate(tmp.values)) - if target == "d": - all_predictions[0] += PC - elif target == "p": - all_predictions[1] += PC - return opfunc(*all_predictions) - - -def compute_ht_parametrisation( - index: int, - nodes: list, - kin_dict: dict, - exp: str, - h2_prior: list, - hl_prior: list, - reverse: bool = False -): - if not reverse: - shifted_H2_list = [0 for k in range(len(nodes))] - shifted_HL_list = [0 for k in range(len(nodes))] - shifted_H2_list[index] = h2_prior[index] - shifted_HL_list[index] = hl_prior[index] - else: - shifted_H2_list = h2_prior.copy() - shifted_HL_list = hl_prior.copy() - shifted_H2_list[index] = 0 - shifted_HL_list[index] = 0 - - H_2 = scint.CubicSpline(nodes, shifted_H2_list) - H_L = scint.CubicSpline(nodes, shifted_HL_list) - H_2 = np.vectorize(H_2) - H_L = np.vectorize(H_L) - - x = kin_dict['x'] - y = kin_dict['y'] - Q2 = kin_dict['Q2'] - N2, NL = compute_normalisation_by_experiment(exp, x, y, Q2) - - PC_2 = N2 * H_2(x) / Q2 - PC_L = NL * H_L(x) / Q2 - return PC_2, PC_L - - -def compute_normalisation_by_experiment(experiment_name, x, y, Q2): - N_2 = np.zeros(shape=y.shape) - N_L = np.zeros(shape=y.shape) - - if "SIGMA" in experiment_name: - - if "HERA_NC" in experiment_name or "HERA_CC" in experiment_name or "NMC" in experiment_name: - yp = 1 + np.power(1 - y, 2) - yL = np.power(y, 2) - - if "HERA_NC" in experiment_name or "NMC" in experiment_name: - N_2 = 1 - N_L = - yL / yp - - elif "HERA_CC" in experiment_name: - N_2 = 1 / 4 * yp - N_L = - N_2 * yL / yp - - if "CHORUS_CC" in experiment_name: - yL = np.power(y, 2) - Gf = 1.1663787e-05 - Mh = 0.938 - MW2 = 80.398 ** 2 - yp = 1 + np.power(1 - y, 2) - 2 * np.power(x * y * Mh, 2) / Q2 - N_2 = Gf**2 * Mh * yp / ( 2 * np.pi * np.power( 1 + Q2 / MW2, 2) ) - N_L = - N_2 * yL / yp - - elif "F2" in experiment_name: - N_2 = np.ones(shape=x.shape) - N_L = np.zeros(shape=x.shape) - - else: - raise ValueError(f"The normalisation for the observable is not known.") - - return N_2, N_L - - def covmat_3fpt(name1, name2, deltas1, deltas2): """Returns theory covariance sub-matrix for 3pt factorisation scale variation *only*, given two dataset names and collections From 6c04f5fcd75d8f1472688de88a07aac35b9d817d Mon Sep 17 00:00:00 2001 From: achiefa Date: Wed, 2 Oct 2024 18:30:20 +0100 Subject: [PATCH 24/69] Added HT for F2C data (EMC) - removed deprecated function --- .../theorycovariance/construction.py | 94 ++----------------- 1 file changed, 7 insertions(+), 87 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index 0605f5fed4..9b4df12058 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -164,6 +164,7 @@ def ht_parametrisation( else: shifted_H_list = h_prior.copy() shifted_H_list[index] = 0 + log.warning("The implemenation of the reverse 5-pt prescription must be checked.") H = scint.CubicSpline(nodes, shifted_H_list) H = np.vectorize(H) @@ -222,6 +223,12 @@ def ht_parametrisation( PC_L_p = np.zeros(exp_ndata) PC_L_d = np.zeros(exp_ndata) + elif process_type == 'DIS_F2C': + PC_2_p = - ht_parametrisation(i, x_knots, x, q2, H2_list) + PC_2_d = 2 * ht_parametrisation(i, x_knots, x, q2, H2_list) + PC_L_p = np.zeros(exp_ndata) + PC_L_d = np.zeros(exp_ndata) + elif process_type == "DIS_NCE" or "DIS_NCP": yp = 1 + np.power(1 - y, 2) yL = np.power(y, 2) @@ -260,93 +267,6 @@ def ht_parametrisation( return covmats -def thcov_ht_old(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, ht_knots = list(), reverse: bool = False): - """ - Same as `thcov_HT` but implementing theory covariance method for each node of the spline. - Note that 'groups_data_by_process' contains the same info as 'combine_by_type_ht'. At some - point we should use only one of them. - """ - process_info = combine_by_type_ht - running_index_tot = 0 - start_proc_by_exp = defaultdict(list) - deltas = defaultdict(list) - x_knots = list() - included_proc = ["DIS NC"] - excluded_exp = {"DIS NC" : []} - included_exp = {} - for proc in included_proc: - aux = [] - for exp in process_info.namelist[proc]: - if exp not in excluded_exp[proc]: - aux.append(exp) - included_exp[proc] = aux - - if len(ht_knots) == 0: - # ABMP parametrisation - x_knots = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1] - else: - x_knots = ht_knots - - # Check that H2_list and HL_list have the same size as x - if (len(H2_list) != len(x_knots)) or (len(HL_list) != len(x_knots)): - raise ValueError(f"The size of HT parameters does not match the number of nodes in the spline.") - - for i_proc, proc in enumerate(process_info.namelist.keys()): - running_index_proc = 0 - - for i_exp, exp in enumerate(process_info.namelist[proc]): - # Locate position of the experiment - size = process_info.sizes[exp] - dataset = groups_data_by_process[i_proc].datasets[i_exp] - start_proc_by_exp[exp] = running_index_tot - running_index_tot += size - running_index_proc += size - kin_dict = {} - - # Compute shifts only for a subset of processes - if proc in included_proc and exp in included_exp[proc]: - #central = process_info.preds[proc][1][start_proc_by_exp[exp] : size] # Probably this is deprecated - kin_dict['x'] = process_info.data[proc].T[0][running_index_proc - size : running_index_proc] - kin_dict['Q2'] = process_info.data[proc].T[1][running_index_proc - size : running_index_proc] - kin_dict['y'] = process_info.data[proc].T[2][running_index_proc - size : running_index_proc] - kin_size = kin_dict['x'].size - target = extract_target(dataset) - - # Loop over the parameter - for i in range(len(x_knots)): - PC_2, PC_L = compute_ht_parametrisation(i, x_knots, kin_dict, exp, H2_list, HL_list, reverse=reverse) - if target == 'proton': - deltas[f"p({i+1}+,0)"] += [PC_2] - deltas[f"p(0,{i+1}+)"] += [PC_L] - deltas[f"d({i+1}+,0)"] += [np.zeros(kin_size)] - deltas[f"d(0,{i+1}+)"] += [np.zeros(kin_size)] - elif target == 'deuteron': - deltas[f"p({i+1}+,0)"] += [np.zeros(kin_size)] - deltas[f"p(0,{i+1}+)"] += [np.zeros(kin_size)] - deltas[f"d({i+1}+,0)"] += [PC_2] - deltas[f"d(0,{i+1}+)"] += [PC_L] - elif target == 'ratio': - deltas[f"p({i+1}+,0)"] += [compute_ratio_delta(dataset, pdf, "p", PC_2) - compute_ratio_delta(dataset, pdf)] - deltas[f"p(0,{i+1}+)"] += [compute_ratio_delta(dataset, pdf, "p", PC_L) - compute_ratio_delta(dataset, pdf)] - deltas[f"d({i+1}+,0)"] += [compute_ratio_delta(dataset, pdf, "d", PC_2) - compute_ratio_delta(dataset, pdf)] - deltas[f"d(0,{i+1}+)"] += [compute_ratio_delta(dataset, pdf, "d", PC_L) - compute_ratio_delta(dataset, pdf)] - else: - raise ValueError("Could not detect target.") - - # Construct theory covmat - covmats = defaultdict(list) - for proc1 in included_proc: - for proc2 in included_proc: - for i, exp1 in enumerate(included_exp[proc1]): - for j, exp2 in enumerate(included_exp[proc2]): - s = np.zeros(shape=(deltas["p(1+,0)"][i].size, deltas["p(1+,0)"][j].size)) - for par in deltas.keys(): - s += np.outer(deltas[par][i], deltas[par][j]) - start_locs = (start_proc_by_exp[exp1], start_proc_by_exp[exp2]) - covmats[start_locs] = s - return covmats - - def covmat_3fpt(name1, name2, deltas1, deltas2): """Returns theory covariance sub-matrix for 3pt factorisation scale variation *only*, given two dataset names and collections From 054a49277afac3085b7a106308c66d358af8a390 Mon Sep 17 00:00:00 2001 From: achiefa Date: Wed, 2 Oct 2024 18:57:47 +0100 Subject: [PATCH 25/69] Corrected EMC data iron target --- validphys2/src/validphys/theorycovariance/construction.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index 9b4df12058..a3e3897b77 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -224,8 +224,11 @@ def ht_parametrisation( PC_L_d = np.zeros(exp_ndata) elif process_type == 'DIS_F2C': - PC_2_p = - ht_parametrisation(i, x_knots, x, q2, H2_list) - PC_2_d = 2 * ht_parametrisation(i, x_knots, x, q2, H2_list) + # Iron target + Z = 23.403 + A = 49.618 + PC_2_p = ht_parametrisation(i, x_knots, x, q2, H2_list) + PC_2_d = 2 * (Z - A) / A * ht_parametrisation(i, x_knots, x, q2, H2_list) PC_L_p = np.zeros(exp_ndata) PC_L_d = np.zeros(exp_ndata) From 4a6986bfaa4c0e899980c693968583870cf1aed2 Mon Sep 17 00:00:00 2001 From: achiefa Date: Thu, 3 Oct 2024 09:59:28 +0100 Subject: [PATCH 26/69] Removed deprecated code --- n3fit/src/n3fit/layers/DIS.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/n3fit/src/n3fit/layers/DIS.py b/n3fit/src/n3fit/layers/DIS.py index 7830c1dec8..d0c3bb10d8 100644 --- a/n3fit/src/n3fit/layers/DIS.py +++ b/n3fit/src/n3fit/layers/DIS.py @@ -27,8 +27,6 @@ from n3fit.backends import operations as op -from validphys.theorycovariance.construction import compute_normalisation_by_experiment, extract_target - from .observable import Observable @@ -64,6 +62,7 @@ def __init__(self, fktable_data, self.exp_kinematics = exp_kinematics if ht_type is None: self.ht_type = 'ABMP' + raise NotImplementedError("This part should be reimplemented.") else: self.ht_type = ht_type @@ -100,7 +99,7 @@ def H_L(x): x = self.exp_kinematics['kin1'] y = self.exp_kinematics['kin3'] Q2 = self.exp_kinematics['kin2'] - N2, NL = compute_normalisation_by_experiment(self.dataname, x, y, Q2) + N2, NL = 1#compute_normalisation_by_experiment(self.dataname, x, y, Q2) PC_2 = N2 * H_2(x) / Q2 PC_L = NL * H_L(x) / Q2 From 94e2fe10fde175bb724dac4dfb35c28c30ff80d8 Mon Sep 17 00:00:00 2001 From: achiefa Date: Thu, 3 Oct 2024 18:09:36 +0100 Subject: [PATCH 27/69] Refactoring + DIS CC --- .../theorycovariance/construction.py | 226 +++++++++--------- .../higher_twist_functions.py | 191 +++++++++++++++ 2 files changed, 306 insertions(+), 111 deletions(-) create mode 100644 validphys2/src/validphys/theorycovariance/higher_twist_functions.py diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index a3e3897b77..495663004e 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -17,13 +17,13 @@ pass from validphys.results import results, results_central -from validphys.convolution import central_fk_predictions from validphys.core import PDF from validphys.theorycovariance.theorycovarianceutils import ( check_correct_theory_combination, check_fit_dataset_order_matches_grouped, process_lookup, ) +import validphys.theorycovariance.higher_twist_functions as ht_func log = logging.getLogger(__name__) @@ -87,6 +87,7 @@ def combine_by_type(each_dataset_results_central_bytheory): ) return process_info + def combine_by_type_ht(each_dataset_results, groups_dataset_inputs_loaded_cd_with_cuts_byprocess): """same as combine_by_type but now for a single theory and including commondata info""" dataset_size = defaultdict(list) @@ -115,62 +116,40 @@ def combine_by_type_ht(each_dataset_results, groups_dataset_inputs_loaded_cd_wit return process_info -def thcov_ht(H2_list, - HL_list, +def thcov_shifts_ht(ht_parameters, + ht_included_proc, + ht_excluded_exp, groups_data_by_process, - pdf, - ht_knots = list()): + pdf): """ Same as `thcov_HT` but implementing theory covariance method for each node of the spline. Note that 'groups_data_by_process' contains the same info as 'combine_by_type_ht'. At some point we should use only one of them. """ groups_data = groups_data_by_process - x_knots = list() start_proc_by_exp = defaultdict(list) ndata_by_exp = defaultdict(list) - running_index_tot = 0 - included_proc = ["DIS NC"] - excluded_exp = {"DIS NC" : []} deltas = defaultdict(list) + running_index_tot = 0 + + HT = {} + HT_func = {} - for i in range(len(x_knots)): - deltas[f"p({i+1}+,0)"] = np.array([]) - deltas[f"p(0,{i+1}+)"] = np.array([]) - deltas[f"d({i+1}+,0)"] = np.array([]) - deltas[f"d(0,{i+1}+)"] = np.array([]) - - if len(ht_knots) == 0: - # ABMP parametrisation - x_knots = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1] - else: - x_knots = ht_knots - - # Check that H2_list and HL_list have the same size as x - if (len(H2_list) != len(x_knots)) or (len(HL_list) != len(x_knots)): - raise ValueError(f"The size of HT parameters does not match the number of nodes in the spline.") - - def ht_parametrisation( - index: int, - nodes: list, - x: list, - Q2: list, - h_prior: list, - reverse: bool = False - ): - if not reverse: - shifted_H_list = [0 for _ in range(len(nodes))] - shifted_H_list[index] = h_prior[index] - else: - shifted_H_list = h_prior.copy() - shifted_H_list[index] = 0 - log.warning("The implemenation of the reverse 5-pt prescription must be checked.") - - H = scint.CubicSpline(nodes, shifted_H_list) - H = np.vectorize(H) - - PC = H(x) / Q2 - return PC + for par in ht_parameters: + if len(par['list']) != len(par['nodes']): + raise ValueError(f"The length of nodes does not match that of the list in {par['ht']}. Check the runcard.\n \ + {len(par['list'])} vs. {len(par['nodes'])}") + + HT[par['ht']] = { + "list": par['list'], + "nodes": par['nodes'] + } + + HT_func[par['ht']] = None + + # Initialise shifts according to 5pt + for idx_node, _ in enumerate(par['nodes']): + deltas[par['ht'] + f"({idx_node})"] = np.array([]) for idx_proc, group_proc in enumerate(groups_data): for idx_exp, exp_set in enumerate(group_proc.datasets): @@ -180,7 +159,10 @@ def ht_parametrisation( running_index_tot += exp_ndata ndata_by_exp[exp_set.name] = exp_ndata - if group_proc.name in included_proc and exp_set.name not in excluded_exp[group_proc.name]: + for ht in HT_func.keys(): + HT_func[ht] = ht_func.null_func(exp_ndata) + + if group_proc.name in ht_included_proc and exp_set.name not in ht_excluded_exp: cd_table = exp_set.load_commondata().commondata_table process_type = cd_table['process'].iloc[0] x = cd_table['kin1'].to_numpy() @@ -189,72 +171,74 @@ def ht_parametrisation( if x.size != exp_ndata: raise ValueError("Problem with the number of data.") - for i in range(len(x_knots)): - if process_type == "DIS_F2R": - cuts = exp_set.cuts - fkspec_F2D, fkspec_F2P = exp_set.fkspecs - fk_F2D = fkspec_F2D.load_with_cuts(cuts) - fk_F2P = fkspec_F2P.load_with_cuts(cuts) - F2D = central_fk_predictions(fk_F2D, pdf) - F2P = central_fk_predictions(fk_F2P, pdf) - - F2D = np.concatenate(F2D.values) - F2P = np.concatenate(F2P.values) - F2_ratio = operator.truediv(F2D, F2P) - PC = ht_parametrisation(i, x_knots, x, q2, H2_list) - - # NOTE - # Find a better way to store deltas - PC_2_p = np.array(operator.truediv(F2D, np.sum([F2P, PC],axis=0)) - F2_ratio) - PC_2_d = np.array(operator.truediv(F2D, np.sum([F2D, PC],axis=0)) - F2_ratio) - PC_L_p = np.zeros(F2_ratio.size) - PC_L_d = np.zeros(F2_ratio.size) - - - elif process_type == "DIS_F2P": - PC_2_p = ht_parametrisation(i, x_knots, x, q2, H2_list) - PC_2_d = np.zeros(exp_ndata) - PC_L_p = np.zeros(exp_ndata) - PC_L_d = np.zeros(exp_ndata) - - elif process_type == "DIS_F2D": - PC_2_p = np.zeros(exp_ndata) - PC_2_d = ht_parametrisation(i, x_knots, x, q2, H2_list) - PC_L_p = np.zeros(exp_ndata) - PC_L_d = np.zeros(exp_ndata) - - elif process_type == 'DIS_F2C': + if process_type == "DIS_F2R": + HT_func['H2p'], HT_func['H2d'] = ht_func.DIS_F2R_ht(exp_set, pdf, HT["H2p"], HT["H2d"], x, q2) + + elif process_type == "DIS_F2P": + HT_func['H2p'] = ht_func.DIS_F2_ht(HT["H2p"], x, q2) + + elif process_type == "DIS_F2D": + HT_func['H2d'] = ht_func.DIS_F2_ht(HT["H2d"], x, q2) + + elif process_type == 'DIS_F2C': + HT_func['H2p'], HT_func['H2d'] = ht_func.DIS_F2_ht(HT['H2p'], HT['H2d'], x, q2) + + elif process_type == "DIS_NCE" or "DIS_NCP": + HT_func['H2p'], HT_func["HLp"] = ht_func.DIS_NC_ht(HT['H2p'], HT['HLp'], x, q2, y) + + elif process_type == "DIS_SNU_PB" or "DIS_SNB_PB": #CHORUS + # Lead target + A = 208.0 + Z = 82 + if process_type == "DIS_SNU_PB": + l = 0 + elif process_type == "DIS_SNB_PB": + l = 1 + + DIS_NU = ht_func.DIS_SNU(HT, (A,Z), (x,q2,y), Mh=0.938, Mw=80.398, lepton=l) + HT_func['H2p'] = DIS_NU.PC_2_p + HT_func['H2d'] = DIS_NU.PC_2_d + HT_func["HLp"] = DIS_NU.PC_L_p + HT_func["HLd"] = DIS_NU.PC_L_d + HT_func["H3p"] = DIS_NU.PC_3_p + HT_func["H3d"] = DIS_NU.PC_3_d + + elif process_type == "DIS_DM_NU" or "DIS_DM_NB": #NuTeV # Iron target Z = 23.403 A = 49.618 - PC_2_p = ht_parametrisation(i, x_knots, x, q2, H2_list) - PC_2_d = 2 * (Z - A) / A * ht_parametrisation(i, x_knots, x, q2, H2_list) - PC_L_p = np.zeros(exp_ndata) - PC_L_d = np.zeros(exp_ndata) - - elif process_type == "DIS_NCE" or "DIS_NCP": - yp = 1 + np.power(1 - y, 2) - yL = np.power(y, 2) - N_L = - yL / yp - PC_2_p = ht_parametrisation(i, x_knots, x, q2, H2_list) - PC_2_d = np.zeros(exp_ndata) - PC_L_p = N_L * ht_parametrisation(i, x_knots, x, q2, HL_list) - PC_L_d = np.zeros(exp_ndata) - - else: - raise Exception(f"The process type `{process_type}` has not been implemented.") - - deltas[f"p({i+1}+,0)"] = np.append(deltas[f"p({i+1}+,0)"], PC_2_p) - deltas[f"p(0,{i+1}+)"] = np.append(deltas[f"p(0,{i+1}+)"], PC_2_d) - deltas[f"d({i+1}+,0)"] = np.append(deltas[f"d({i+1}+,0)"], PC_L_p) - deltas[f"d(0,{i+1}+)"] = np.append(deltas[f"d(0,{i+1}+)"], PC_L_d) - - else: - for i in range(len(x_knots)): - deltas[f"p({i+1}+,0)"] = np.append(deltas[f"p({i+1}+,0)"], np.zeros(exp_ndata)) - deltas[f"p(0,{i+1}+)"] = np.append(deltas[f"p(0,{i+1}+)"], np.zeros(exp_ndata)) - deltas[f"d({i+1}+,0)"] = np.append(deltas[f"d({i+1}+,0)"], np.zeros(exp_ndata)) - deltas[f"d(0,{i+1}+)"] = np.append(deltas[f"d(0,{i+1}+)"], np.zeros(exp_ndata)) + if process_type == "DIS_SNU_PB": + l = 0 + elif process_type == "DIS_SNB_PB": + l = 1 + + DIS_NuTeV = ht_func.DIS_NUTEV(HT, (A,Z), (x,q2,y), Mh=0.938, Mw=80.398, lepton=l) + HT_func['H2p'] = DIS_NuTeV.PC_2_p + HT_func['H2d'] = DIS_NuTeV.PC_2_d + HT_func["HLp"] = DIS_NuTeV.PC_L_p + HT_func["HLd"] = DIS_NuTeV.PC_L_d + HT_func["H3p"] = DIS_NuTeV.PC_3_p + HT_func["H3d"] = DIS_NuTeV.PC_3_d + + elif process_type == "DIS_CCE" or "DIS_CCP": #HERA_CC + if process_type == "DIS_CCE": + l = 0 + elif process_type == "DIS_CCP": + l = 1 + DIS_CC_HERA = ht_func.DIS_HERA_CC(HT, (x,q2,y), Mh=0.938, Mw=80.398, lepton=l) + HT_func['H2p'] = DIS_CC_HERA.PC_2_p + HT_func['H2d'] = DIS_CC_HERA.PC_2_d + HT_func["HLp"] = DIS_CC_HERA.PC_L_p + HT_func["HLd"] = DIS_CC_HERA.PC_L_d + HT_func["H3p"] = DIS_CC_HERA.PC_3_p + HT_func["H3d"] = DIS_CC_HERA.PC_3_d + else: + raise Exception(f"The process type `{process_type}` has not been implemented.") + + for ht in HT.keys(): + for idx_node in range(len(HT[ht]['nodes'])): + shifted_list = ht_func.beta_tilde_5pt(HT[ht]['list'], idx_node) + deltas[ht + f"({idx_node})"] = np.append(deltas[ht + f"({idx_node})"], HT_func[ht](shifted_list)) # Construct the covariance matrix covmats = defaultdict(list) @@ -267,7 +251,27 @@ def ht_parametrisation( start_locs = (exp_idx_1, exp_idx_2) covmats[start_locs] = s - return covmats + + return covmats, deltas + + +def thcov_ht(thcov_shifts_ht, table_ht_deltas): + covmat, _ = thcov_shifts_ht + return covmat + + +@table +def table_ht_deltas(thcov_shifts_ht, procs_index, combine_by_type_custom): + _, deltas = thcov_shifts_ht + process_info = combine_by_type_custom + indexlist = [] + for procname in process_info.preds: + for datasetname in process_info.namelist[procname]: + slicer = procs_index.get_locs((procname, datasetname)) + indexlist += procs_index[slicer].to_list() + covmat_index = pd.MultiIndex.from_tuples(indexlist, names=procs_index.names) + df = pd.DataFrame(deltas, index=covmat_index, columns=deltas.keys()) + return df def covmat_3fpt(name1, name2, deltas1, deltas2): diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py new file mode 100644 index 0000000000..38155721f1 --- /dev/null +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -0,0 +1,191 @@ +import numpy as np +import pandas as pd +import scipy.linalg as la +import scipy.interpolate as scint +from validphys.convolution import central_fk_predictions + +import operator + +def beta_tilde_5pt(delta_h, idx): + shifted_list = [0 for _ in range(len(delta_h))] + shifted_list[idx] = delta_h[idx] + return shifted_list + +def ht_parametrisation( + delta_h: list, + nodes: list, + x: list, + Q2: list, + ): + H = scint.CubicSpline(nodes, delta_h) + H = np.vectorize(H) + + PC = H(x) / Q2 + return PC + + +def null_func(size): + """Auxiliary function used to return arrays of zeros for those datasets + that don't require higher twist.""" + def zeros(list): + return np.zeros(size) + return zeros + + +def DIS_F2R_ht(experiment, + pdf, + H2p_dict, + H2d_dict, + x, + q2): + cuts = experiment.cuts + fkspec_F2D, fkspec_F2P = experiment.fkspecs + fk_F2D = fkspec_F2D.load_with_cuts(cuts) + fk_F2P = fkspec_F2P.load_with_cuts(cuts) + F2D = central_fk_predictions(fk_F2D, pdf) + F2P = central_fk_predictions(fk_F2P, pdf) + + F2D = np.concatenate(F2D.values) + F2P = np.concatenate(F2P.values) + F2_ratio = operator.truediv(F2D, F2P) + + + def PC_2_p(list): + PC_p = ht_parametrisation(list, H2p_dict["nodes"], x, q2) + result = np.array(operator.truediv(F2D, np.sum([F2P, PC_p],axis=0)) - F2_ratio) + return result + + def PC_2_d(list): + PC_d = ht_parametrisation(list, H2d_dict["nodes"], x, q2) + result = np.array(operator.truediv(np.sum([F2D, PC_d],axis=0), F2P) - F2_ratio) + #result = np.array(operator.truediv(F2D, np.sum([F2D, PC_d],axis=0)) - F2_ratio) #old implementation + return result + + return PC_2_p, PC_2_d + + +def DIS_F2_ht(H2_dict, x, q2): + def PC_2(list): + result = ht_parametrisation(list, H2_dict['nodes'], x, q2) + return result + + return PC_2 + + +def DIS_F2C_ht(H2p_dict, + H2d_dict, + x, + q2): + # Iron target + Z = 23.403 + A = 49.618 + + def PC_2_p(list): + result = ht_parametrisation(list, H2p_dict['nodes'], x, q2) + return result + + def PC_2_d(list): + result = 2 * (Z - A) / A * ht_parametrisation(list, H2d_dict['nodes'], x, q2) + return result + + return PC_2_p, PC_2_d + + +def DIS_NC_ht(H2_dict, + HL_dict, + x, + q2, + y): + yp = 1 + np.power(1 - y, 2) + yL = np.power(y, 2) + N_L = - yL / yp + + def PC_2(list): + return ht_parametrisation(list, H2_dict['nodes'], x, q2) + + def PC_L(list): + return N_L * ht_parametrisation(list, HL_dict['nodes'], x, q2) + + return PC_2, PC_L + + +class DIS_SNU: + GF2 = 1.1663787e-05 #GeV^-2 + def __init__(self, + HT_dict, + target_tuple, #(A,Z) + kin_tuple, + Mh, + Mw, + lepton): + + # Lead target + A = target_tuple[0] + Z = target_tuple[1] + x = kin_tuple[0] + q2 = kin_tuple[1] + y = kin_tuple[2] + self.nuclear_target = 2 * (Z - A) / A + self.Mh = Mh #0.938 GeV + self.Mw2 = np.power(Mw, 2) # GeV^2 + self.yp = 1 + np.power(1 - y, 2) - 2 * np.power( x * y * Mh, 2) / q2 + self.yL = np.power(y, 2) + self.ym = 1 - np.power(1 - y, 2) + self.N = self.GF2 * Mh / ( 2 * np.pi * np.power( 1 + q2 / self.Mw2, 2) ) * self.yp + self.H2p_dict = HT_dict['H2p'] + self.H2d_dict = HT_dict['H2d'] + self.HLp_dict = HT_dict['HLp'] + self.HLd_dict = HT_dict['HLd'] + self.H3p_dict = HT_dict['H3p'] + self.H3d_dict = HT_dict['H3d'] + + self.x = x + self.q2 = q2 + self.l = lepton + + def PC_2_p(self, list): + norm = self.N + return norm * ht_parametrisation(list, self.H2p_dict['nodes'], self.x, self.q2) + + def PC_2_d(self, list): + norm = self.N * self.nuclear_target + return norm * ht_parametrisation(list, self.H2d_dict['nodes'], self.x, self.q2) + + def PC_L_p(self, list): + norm = - self.N * self.yL / self.yp + return norm * ht_parametrisation(list, self.HLp_dict['nodes'], self.x, self.q2) + + def PC_L_d(self, list): + norm = - self.N * self.yL / self.yp * self.nuclear_target + return norm * ht_parametrisation(list, self.HLd_dict['nodes'], self.x, self.q2) + + def PC_3_p(self, list): + norm = self.N * np.power(-1, self.l) * self.ym / self.yp * self.x + return norm * ht_parametrisation(list, self.H3p_dict['nodes'], self.x, self.q2) + + def PC_3_d(self, list): + norm = self.N * np.power(-1, self.l) * self.ym / self.yp * self.x * self.nuclear_target + return norm * ht_parametrisation(list, self.H3d_dict['nodes'], self.x, self.q2) + + +class DIS_NUTEV(DIS_SNU): + def __init__(self, HT_dict, + target_tuple, #(A,Z) + kin_tuple, + Mh, + Mw, + lepton): + super.__init__(self,HT_dict, target_tuple, kin_tuple, Mh, Mw, lepton) + self.N = 50 * self.yp / np.power( 1 + self.q2 / self.Mw2, 2) + + +class DIS_HERA_CC(DIS_SNU): + def __init__(self, HT_dict, + kin_tuple, + Mh, + Mw, + lepton): + super.__init__(self,HT_dict, (1,1), kin_tuple, Mh, Mw, lepton) + y = kin_tuple[2] + self.yp = 1 + np.power(1 - y, 2) + N = 1 / 4 * self.yp \ No newline at end of file From 5382633aaa10aae34d465d5a2e9c102cb6939df5 Mon Sep 17 00:00:00 2001 From: achiefa Date: Fri, 11 Oct 2024 15:12:46 +0100 Subject: [PATCH 28/69] Corrected bug - ready for cc test --- .../src/validphys/theorycovariance/higher_twist_functions.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py index 38155721f1..98c4ec51a6 100644 --- a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -58,7 +58,6 @@ def PC_2_p(list): def PC_2_d(list): PC_d = ht_parametrisation(list, H2d_dict["nodes"], x, q2) result = np.array(operator.truediv(np.sum([F2D, PC_d],axis=0), F2P) - F2_ratio) - #result = np.array(operator.truediv(F2D, np.sum([F2D, PC_d],axis=0)) - F2_ratio) #old implementation return result return PC_2_p, PC_2_d @@ -175,7 +174,7 @@ def __init__(self, HT_dict, Mh, Mw, lepton): - super.__init__(self,HT_dict, target_tuple, kin_tuple, Mh, Mw, lepton) + super().__init__(HT_dict, target_tuple, kin_tuple, Mh, Mw, lepton) self.N = 50 * self.yp / np.power( 1 + self.q2 / self.Mw2, 2) @@ -185,7 +184,7 @@ def __init__(self, HT_dict, Mh, Mw, lepton): - super.__init__(self,HT_dict, (1,1), kin_tuple, Mh, Mw, lepton) + super().__init__(HT_dict, (1,1), kin_tuple, Mh, Mw, lepton) y = kin_tuple[2] self.yp = 1 + np.power(1 - y, 2) N = 1 / 4 * self.yp \ No newline at end of file From 43225ada807d4ff69ee2c9a12b51b0567440052f Mon Sep 17 00:00:00 2001 From: achiefa Date: Fri, 11 Oct 2024 15:20:31 +0100 Subject: [PATCH 29/69] Corrected bug - ready --- .../theorycovariance/construction.py | 26 ++++++++++++++----- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index 495663004e..5b892fcbd4 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -18,6 +18,7 @@ pass from validphys.results import results, results_central from validphys.core import PDF +from validphys.process_options import _Process from validphys.theorycovariance.theorycovarianceutils import ( check_correct_theory_combination, check_fit_dataset_order_matches_grouped, @@ -165,12 +166,18 @@ def thcov_shifts_ht(ht_parameters, if group_proc.name in ht_included_proc and exp_set.name not in ht_excluded_exp: cd_table = exp_set.load_commondata().commondata_table process_type = cd_table['process'].iloc[0] + + if isinstance(process_type, _Process): + process_type = process_type.name + x = cd_table['kin1'].to_numpy() q2 = cd_table['kin2'].to_numpy() y = cd_table['kin3'].to_numpy() + if x.size != exp_ndata: raise ValueError("Problem with the number of data.") + # NMC_NC_NOTFIXED_DW_EM-F2 if process_type == "DIS_F2R": HT_func['H2p'], HT_func['H2d'] = ht_func.DIS_F2R_ht(exp_set, pdf, HT["H2p"], HT["H2d"], x, q2) @@ -180,13 +187,16 @@ def thcov_shifts_ht(ht_parameters, elif process_type == "DIS_F2D": HT_func['H2d'] = ht_func.DIS_F2_ht(HT["H2d"], x, q2) - elif process_type == 'DIS_F2C': - HT_func['H2p'], HT_func['H2d'] = ht_func.DIS_F2_ht(HT['H2p'], HT['H2d'], x, q2) + # EMC + elif process_type == "DIS_F2C": + HT_func['H2p'], HT_func['H2d'] = ht_func.DIS_F2C_ht(HT['H2p'], HT['H2d'], x, q2) - elif process_type == "DIS_NCE" or "DIS_NCP": + # HERA NC + elif process_type in ["DIS_NCE", "DIS_NCP", "DIS_NCP_CH", "DIS_NCE_BT"]: HT_func['H2p'], HT_func["HLp"] = ht_func.DIS_NC_ht(HT['H2p'], HT['HLp'], x, q2, y) - elif process_type == "DIS_SNU_PB" or "DIS_SNB_PB": #CHORUS + #CHORUS + elif process_type in ["DIS_SNU_PB", "DIS_SNB_PB"]: # Lead target A = 208.0 Z = 82 @@ -203,7 +213,8 @@ def thcov_shifts_ht(ht_parameters, HT_func["H3p"] = DIS_NU.PC_3_p HT_func["H3d"] = DIS_NU.PC_3_d - elif process_type == "DIS_DM_NU" or "DIS_DM_NB": #NuTeV + #NuTeV + elif process_type in ["DIS_DM_NU", "DIS_DM_NB"]: # Iron target Z = 23.403 A = 49.618 @@ -220,7 +231,8 @@ def thcov_shifts_ht(ht_parameters, HT_func["H3p"] = DIS_NuTeV.PC_3_p HT_func["H3d"] = DIS_NuTeV.PC_3_d - elif process_type == "DIS_CCE" or "DIS_CCP": #HERA_CC + #HERA_CC + elif process_type in ["DIS_CCE", "DIS_CCP"]: if process_type == "DIS_CCE": l = 0 elif process_type == "DIS_CCP": @@ -233,7 +245,7 @@ def thcov_shifts_ht(ht_parameters, HT_func["H3p"] = DIS_CC_HERA.PC_3_p HT_func["H3d"] = DIS_CC_HERA.PC_3_d else: - raise Exception(f"The process type `{process_type}` has not been implemented.") + raise Exception(f"The process type `{process_type}` in `{exp_set.name} has not been implemented.") for ht in HT.keys(): for idx_node in range(len(HT[ht]['nodes'])): From 6ca745c50c95ff28e70a239f3148b72121380247 Mon Sep 17 00:00:00 2001 From: achiefa Date: Fri, 11 Oct 2024 23:44:01 +0100 Subject: [PATCH 30/69] Removing unnecessary code --- n3fit/src/n3fit/scripts/vp_setupfit.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/n3fit/src/n3fit/scripts/vp_setupfit.py b/n3fit/src/n3fit/scripts/vp_setupfit.py index 0d1f60ad4f..0f9ee8e502 100644 --- a/n3fit/src/n3fit/scripts/vp_setupfit.py +++ b/n3fit/src/n3fit/scripts/vp_setupfit.py @@ -191,10 +191,6 @@ def from_yaml(cls, o, *args, **kwargs): # Check positivity bound if file_content.get('positivity_bound') is not None: SETUPFIT_FIXED_CONFIG['actions_'].append('positivity_bound check_unpolarized_bc') - if (sam_t0 := file_content.get('sampling')) is not None: - SETUPFIT_FIXED_CONFIG['separate_multiplicative'] = sam_t0.get( - 'separate_multiplicative', False - ) for k, v in SETUPFIT_DEFAULTS.items(): file_content.setdefault(k, v) From 610d7656c79e840cfc54dc4686052e04b774c902 Mon Sep 17 00:00:00 2001 From: achiefa Date: Sat, 12 Oct 2024 12:57:03 +0100 Subject: [PATCH 31/69] Corrected bug after rebase --- n3fit/src/n3fit/layers/DIS.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/n3fit/src/n3fit/layers/DIS.py b/n3fit/src/n3fit/layers/DIS.py index d0c3bb10d8..7ab09c137b 100644 --- a/n3fit/src/n3fit/layers/DIS.py +++ b/n3fit/src/n3fit/layers/DIS.py @@ -235,7 +235,7 @@ def compute_dis_observable_one_replica(pdf, padded_fk, power_corrections = None) """ if power_corrections is None: - return op.tensor_product(pdf, padded_fk, axes=[(2, 3), (1, 2)]) + return op.tensor_product(pdf[0], padded_fk, axes=[(2, 3), (1, 2)]) else: - return op.tensor_product(pdf, padded_fk, axes=[(2, 3), (1, 2)]) + power_corrections + return op.tensor_product(pdf[0], padded_fk, axes=[(2, 3), (1, 2)]) + power_corrections From 2dc457c662798ba03e7a29c6e1107935b02ac658 Mon Sep 17 00:00:00 2001 From: achiefa Date: Thu, 17 Oct 2024 14:47:12 +0100 Subject: [PATCH 32/69] Add normalisation in CC x-secs --- .../src/validphys/theorycovariance/higher_twist_functions.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py index 98c4ec51a6..bd74a984a4 100644 --- a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -6,6 +6,8 @@ import operator +GEV_CM2_CONV = 3.893793e10 + def beta_tilde_5pt(delta_h, idx): shifted_list = [0 for _ in range(len(delta_h))] shifted_list[idx] = delta_h[idx] @@ -130,7 +132,7 @@ def __init__(self, self.yp = 1 + np.power(1 - y, 2) - 2 * np.power( x * y * Mh, 2) / q2 self.yL = np.power(y, 2) self.ym = 1 - np.power(1 - y, 2) - self.N = self.GF2 * Mh / ( 2 * np.pi * np.power( 1 + q2 / self.Mw2, 2) ) * self.yp + self.N = GEV_CM2_CONV * self.GF2 * Mh / ( 2 * np.pi * np.power( 1 + q2 / self.Mw2, 2) ) * self.yp self.H2p_dict = HT_dict['H2p'] self.H2d_dict = HT_dict['H2d'] self.HLp_dict = HT_dict['HLp'] From 1400eb49e99d77d3b5d428a394c73e1d383b7812 Mon Sep 17 00:00:00 2001 From: achiefa Date: Thu, 17 Oct 2024 22:23:21 +0100 Subject: [PATCH 33/69] Correct normalisation --- .../src/validphys/theorycovariance/higher_twist_functions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py index bd74a984a4..0e16b62689 100644 --- a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -7,6 +7,7 @@ import operator GEV_CM2_CONV = 3.893793e10 +GF = 1.1663787e-05 # Fermi's constant [GeV^-2] def beta_tilde_5pt(delta_h, idx): shifted_list = [0 for _ in range(len(delta_h))] @@ -111,7 +112,6 @@ def PC_L(list): class DIS_SNU: - GF2 = 1.1663787e-05 #GeV^-2 def __init__(self, HT_dict, target_tuple, #(A,Z) @@ -132,7 +132,7 @@ def __init__(self, self.yp = 1 + np.power(1 - y, 2) - 2 * np.power( x * y * Mh, 2) / q2 self.yL = np.power(y, 2) self.ym = 1 - np.power(1 - y, 2) - self.N = GEV_CM2_CONV * self.GF2 * Mh / ( 2 * np.pi * np.power( 1 + q2 / self.Mw2, 2) ) * self.yp + self.N = GEV_CM2_CONV * (GF ** 2) * Mh / ( 2 * np.pi * np.power( 1 + q2 / self.Mw2, 2) ) * self.yp self.H2p_dict = HT_dict['H2p'] self.H2d_dict = HT_dict['H2d'] self.HLp_dict = HT_dict['HLp'] From 2530ae6c67eef2f20e8d0ba93f74e7b9c29bae8d Mon Sep 17 00:00:00 2001 From: achiefa Date: Thu, 17 Oct 2024 22:35:32 +0100 Subject: [PATCH 34/69] Restore n3fit files from master --- n3fit/src/n3fit/layers/DIS.py | 132 +------------------------------ n3fit/src/n3fit/model_gen.py | 46 +---------- n3fit/src/n3fit/model_trainer.py | 11 --- n3fit/src/n3fit/performfit.py | 8 -- 4 files changed, 5 insertions(+), 192 deletions(-) diff --git a/n3fit/src/n3fit/layers/DIS.py b/n3fit/src/n3fit/layers/DIS.py index 7ab09c137b..072d423e95 100644 --- a/n3fit/src/n3fit/layers/DIS.py +++ b/n3fit/src/n3fit/layers/DIS.py @@ -23,7 +23,6 @@ """ import numpy as np -from scipy import interpolate as scint from n3fit.backends import operations as op @@ -40,122 +39,6 @@ class DIS(Observable): while the input pdf is rank 4 of shape (batch_size, replicas, xgrid, flavours) """ - def __init__(self, fktable_data, - fktable_arr, - dataset_name, - boundary_condition=None, - operation_name="NULL", - nfl=14, - n_replicas=1, - power_corrections=False, - ht_type=None, - exp_kinematics=None, - **kwargs): - super().__init__(fktable_data, fktable_arr, dataset_name, boundary_condition, operation_name, nfl, n_replicas, **kwargs) - - self.compute_power_corrections = power_corrections - self.power_corrections = None - - # NOTE - # Ratio of SFs are not implemented yet. Work in progress. - if self.compute_power_corrections and exp_kinematics is not None: - self.exp_kinematics = exp_kinematics - if ht_type is None: - self.ht_type = 'ABMP' - raise NotImplementedError("This part should be reimplemented.") - else: - self.ht_type = ht_type - - if self.ht_type == 'ABMP': - self.power_corrections = self.compute_abmp_parametrisation() - elif self.ht_type == 'custom': - self.power_corrections = self.compute_custom_parametrisation() - else: - raise Exception(f"HT type {ht_type} is not implemented.") - - - def compute_abmp_parametrisation(self): - """ - This function is very similar to `compute_ht_parametrisation` in - validphys.theorycovariance.construction.py. However, the latter - accounts for shifts in the 5pt prescription. As of now, this function - is meant to work only for DIS NC data, using the ABMP16 result. - """ - x_knots = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1] - y_h2 = [0.023, -0.032, -0.005, 0.025, 0.051, 0.003, 0.0] - y_ht = [-0.319, -0.134, -0.052, 0.071, 0.030, 0.003, 0.0] - #h2_sigma = [0.019, 0.013, 0.009, 0.006, 0.005, 0.004] - #ht_sigma = [0.126, 0.040, 0.030, 0.025, 0.012, 0.007] - H_2 = scint.CubicSpline(x_knots, y_h2) - H_T = scint.CubicSpline(x_knots, y_ht) - - # Reconstruct HL from HT and H2 - def H_L(x): - return (H_2(x) - np.power(x, 0.05) * H_T(x)) - - H_2 = np.vectorize(H_2) - H_L = np.vectorize(H_L) - - x = self.exp_kinematics['kin1'] - y = self.exp_kinematics['kin3'] - Q2 = self.exp_kinematics['kin2'] - N2, NL = 1#compute_normalisation_by_experiment(self.dataname, x, y, Q2) - - PC_2 = N2 * H_2(x) / Q2 - PC_L = NL * H_L(x) / Q2 - power_correction = PC_2 + PC_L - power_correction = power_correction.to_numpy() - - return power_correction - - - def compute_custom_parametrisation(self): - """ - This function is very similar to `compute_ht_parametrisation` in - validphys.theorycovariance.construction.py. However, the latter - accounts for shifts in the 5pt prescription. As of now, this function - is meant to work only for DIS NC data, using the ABMP16 result. - """ - # Posteriors from 240812-01-ABMP-large-prior-7k - x_knots = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1] - y_h2_p = [-0.00441, 0.11169, -0.01632, 0.00000, -0.08742, -0.07279, 0.00000] - y_hl_p = [0.00000, -0.06241, -0.08655, -0.03306, 0.00000, -0.05987, 0.0000] - y_h2_d = [-0.04117, 0.00000, 0.03124, -0.01059, 0.04763, 0.00000, 0.00000] - y_hl_d = [0.00316, 0.00469, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] - - H_2p = scint.CubicSpline(x_knots, y_h2_p) - H_lp = scint.CubicSpline(x_knots, y_hl_p) - H_2d = scint.CubicSpline(x_knots, y_h2_d) - H_ld = scint.CubicSpline(x_knots, y_hl_d) - - H_2p = np.vectorize(H_2p) - H_lp = np.vectorize(H_lp) - H_2d = np.vectorize(H_2d) - H_ld = np.vectorize(H_ld) - - x = self.exp_kinematics['kin1'] - y = self.exp_kinematics['kin3'] - Q2 = self.exp_kinematics['kin2'] - N2, NL = compute_normalisation_by_experiment(self.dataname, x, y, Q2) - - if "_P_" in self.dataname or "HERA" in self.dataname: - PC_2 = N2 * H_2p(x) / Q2 - PC_L = NL * H_lp(x) / Q2 - elif "_D_" in self.dataname: - PC_2 = N2 * H_2d(x) / Q2 - PC_L = NL * H_ld(x) / Q2 - else: - # TODO - # Need to implement this - PC_2 = 0 / Q2 #N2 * H_2d(x) / Q2 - PC_L = 0 / Q2 #NL * H_ld(x) / Q2 - - power_correction = PC_2 + PC_L - power_correction = power_correction.to_numpy() - - return power_correction - - def gen_mask(self, basis): """ Receives a list of active flavours and generates a boolean mask tensor @@ -202,11 +85,7 @@ def build(self, input_shape): if self.num_replicas > 1: self.compute_observable = compute_dis_observable_many_replica else: - # Currying the function so that the `Observable` does not need - # to get modified - def compute_dis_observable_one_replica_w_pc(pdf, padded_fk): - return compute_dis_observable_one_replica(pdf, padded_fk, power_corrections = self.power_corrections) - self.compute_observable = compute_dis_observable_one_replica_w_pc + self.compute_observable = compute_dis_observable_one_replica def compute_dis_observable_many_replica(pdf, padded_fk): @@ -228,14 +107,9 @@ def compute_dis_observable_many_replica(pdf, padded_fk): return op.einsum('brxf, nxf -> brn', pdf[0], padded_fk) -def compute_dis_observable_one_replica(pdf, padded_fk, power_corrections = None): +def compute_dis_observable_one_replica(pdf, padded_fk): """ Same operations as above but a specialized implementation that is more efficient for 1 replica, masking the PDF rather than the fk table. """ - if power_corrections is None: - - return op.tensor_product(pdf[0], padded_fk, axes=[(2, 3), (1, 2)]) - else: - - return op.tensor_product(pdf[0], padded_fk, axes=[(2, 3), (1, 2)]) + power_corrections + return op.tensor_product(pdf[0], padded_fk, axes=[(2, 3), (1, 2)]) diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py index b324727a55..2e376254d5 100644 --- a/n3fit/src/n3fit/model_gen.py +++ b/n3fit/src/n3fit/model_gen.py @@ -135,9 +135,6 @@ def observable_generator( positivity_initial=1.0, integrability=False, n_replicas=1, - exp_data=None, - power_corrections=None, - ht_type=None ): # pylint: disable=too-many-locals """ This function generates the observable models for each experiment. @@ -184,10 +181,6 @@ def observable_generator( set the positivity lagrange multiplier for epoch 1 integrability: bool switch on/off the integrability constraints - power_corrections: bool - whether to include HT in theory predictions - ht_type: str - type of HT parametrisation Returns ------ @@ -203,37 +196,16 @@ def observable_generator( dataset_xsizes = [] model_inputs = [] model_observables = [] - kin_by_dict = {} - - if exp_data is not None: - included_processes = [ - 'DEUTERON', - 'NMC', - 'NUCLEAR', - #'HERACOMB', - ] - for process in exp_data: - commondata = process.load_commondata() - for dataset in commondata: - if process.name in included_processes and "_NC_" in dataset.setname: - kin_by_dict[dataset.setname] = dataset.kinematics - else: - kin_by_dict[dataset.setname] = None - # The first step is to compute the observable for each of the datasets for dataset in spec_dict["datasets"]: # Get the generic information of the dataset dataset_name = dataset.name - kinematics = None # Look at what kind of layer do we need for this dataset if dataset.hadronic: Obs_Layer = DY else: Obs_Layer = DIS - if power_corrections: - if exp_data is not None and kin_by_dict[dataset_name] is not None: - kinematics = kin_by_dict[dataset_name] # Set the operation (if any) to be applied to the fktables of this dataset operation_name = dataset.operation @@ -244,8 +216,7 @@ def observable_generator( # list of validphys.coredata.FKTableData objects # these will then be used to check how many different pdf inputs are needed # (and convolutions if given the case) - if dataset.hadronic: - obs_layer = Obs_Layer( + obs_layer = Obs_Layer( dataset.fktables_data, dataset.fktables(), dataset_name, @@ -253,20 +224,7 @@ def observable_generator( operation_name, n_replicas=n_replicas, name=f"dat_{dataset_name}", - ) - else: - obs_layer = Obs_Layer( - dataset.fktables_data, - dataset.fktables(), - dataset_name, - boundary_condition, - operation_name, - n_replicas=n_replicas, - name=f"dat_{dataset_name}", - power_corrections=power_corrections, - exp_kinematics=kinematics if power_corrections else None, - ht_type=None if not power_corrections else ht_type - ) + ) # If the observable layer found that all input grids are equal, the splitting will be None # otherwise the different xgrids need to be stored separately diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py index f1261514ea..a8b7b95bee 100644 --- a/n3fit/src/n3fit/model_trainer.py +++ b/n3fit/src/n3fit/model_trainer.py @@ -111,8 +111,6 @@ def __init__( theoryid=None, lux_params=None, replicas=None, - power_corrections=False, - ht_type=None ): """ Parameters @@ -153,10 +151,6 @@ def __init__( if not give, the photon is not generated replicas: list list with the replicas ids to be fitted - power_corrections: bool - whether to include HT in theory predictions - ht_type: str - type of HT parametrisation """ # Save all input information self.exp_info = list(exp_info) @@ -173,8 +167,6 @@ def __init__( self.lux_params = lux_params self.replicas = replicas self.experiments_data = experiments_data - self.power_corrections = power_corrections - self.ht_type = ht_type # Initialise internal variables which define behaviour if debug: @@ -584,9 +576,6 @@ def _generate_observables( invcovmat_tr=experiment_data["invcovmat"][i], invcovmat_vl=experiment_data["invcovmat_vl"][i], n_replicas=len(self.replicas), - exp_data=self.experiments_data, - power_corrections=self.power_corrections, - ht_type=self.ht_type ) # Save the input(s) corresponding to this experiment diff --git a/n3fit/src/n3fit/performfit.py b/n3fit/src/n3fit/performfit.py index 1c9f9452e3..8cdc8fd10f 100644 --- a/n3fit/src/n3fit/performfit.py +++ b/n3fit/src/n3fit/performfit.py @@ -42,8 +42,6 @@ def performfit( maxcores=None, double_precision=False, parallel_models=False, - power_corrections=False, - ht_type=None ): """ This action will (upon having read a validcard) process a full PDF fit @@ -130,10 +128,6 @@ def performfit( whether to use double precision parallel_models: bool whether to run models in parallel - power_corrections: bool - whether to include HT in theory predictions - ht_type: str - Type of HT parametrisation """ from n3fit.backends import set_initial_state @@ -203,8 +197,6 @@ def performfit( theoryid=theoryid, lux_params=fiatlux, replicas=replica_idxs, - power_corrections=power_corrections, - ht_type=ht_type ) # This is just to give a descriptive name to the fit function From 4404032a5142a2f18aad3377bb37a244e18ed1ec Mon Sep 17 00:00:00 2001 From: RoyStegeman Date: Thu, 14 Nov 2024 16:20:40 +0000 Subject: [PATCH 35/69] remove _PB suffix from process type --- .../src/validphys/theorycovariance/construction.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index 5b892fcbd4..10bcb21dd9 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -196,13 +196,13 @@ def thcov_shifts_ht(ht_parameters, HT_func['H2p'], HT_func["HLp"] = ht_func.DIS_NC_ht(HT['H2p'], HT['HLp'], x, q2, y) #CHORUS - elif process_type in ["DIS_SNU_PB", "DIS_SNB_PB"]: - # Lead target + elif process_type in ["DIS_SNU", "DIS_SNB"]: + # Lead target:: A = 208.0 Z = 82 - if process_type == "DIS_SNU_PB": + if process_type == "DIS_SNU": l = 0 - elif process_type == "DIS_SNB_PB": + elif process_type == "DIS_SNB": l = 1 DIS_NU = ht_func.DIS_SNU(HT, (A,Z), (x,q2,y), Mh=0.938, Mw=80.398, lepton=l) @@ -218,9 +218,9 @@ def thcov_shifts_ht(ht_parameters, # Iron target Z = 23.403 A = 49.618 - if process_type == "DIS_SNU_PB": + if process_type == "DIS_SNU": l = 0 - elif process_type == "DIS_SNB_PB": + elif process_type == "DIS_SNB": l = 1 DIS_NuTeV = ht_func.DIS_NUTEV(HT, (A,Z), (x,q2,y), Mh=0.938, Mw=80.398, lepton=l) From 037d5b3c188ca18cfc9fe59243c4142ca33849cd Mon Sep 17 00:00:00 2001 From: Roy Stegeman Date: Thu, 14 Nov 2024 16:26:24 +0000 Subject: [PATCH 36/69] format a bit --- validphys2/src/validphys/commondata.py | 4 +- validphys2/src/validphys/config.py | 7 +- validphys2/src/validphys/dataplots.py | 16 +- validphys2/src/validphys/results.py | 37 ++- .../theorycovariance/construction.py | 279 +++++++++-------- .../higher_twist_functions.py | 296 ++++++++---------- 6 files changed, 309 insertions(+), 330 deletions(-) diff --git a/validphys2/src/validphys/commondata.py b/validphys2/src/validphys/commondata.py index e59ac5ce2c..ed8e9053cb 100644 --- a/validphys2/src/validphys/commondata.py +++ b/validphys2/src/validphys/commondata.py @@ -40,5 +40,5 @@ def loaded_commondata_with_cuts(commondata, cuts): ) groups_dataset_inputs_loaded_cd_with_cuts_byprocess = collect( - "loaded_commondata_with_cuts", ("group_dataset_inputs_by_process", "data") - ) \ No newline at end of file + "loaded_commondata_with_cuts", ("group_dataset_inputs_by_process", "data") +) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 662a1217b0..863892934a 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -1247,8 +1247,7 @@ def produce_loaded_user_covmat_path(self, user_covmat_path: str = ""): l = self.loader fileloc = l.check_vp_output_file(user_covmat_path) return fileloc - - + @configparser.explicit_node def produce_covmat_custom(self, use_ht_uncertainties: bool = False): if use_ht_uncertainties: @@ -1316,15 +1315,13 @@ def res(*args, **kwargs): # Set this to get the same filename regardless of the action. res.__name__ = "theory_covmat" return res - - + @configparser.explicit_node def produce_combine_by_type_custom(self, use_ht_uncertainties: bool = False): if use_ht_uncertainties: return validphys.theorycovariance.construction.combine_by_type_ht return validphys.theorycovariance.construction.combine_by_type - def produce_fitthcovmat( self, use_thcovmat_if_present: bool = False, fit: (str, type(None)) = None ): diff --git a/validphys2/src/validphys/dataplots.py b/validphys2/src/validphys/dataplots.py index bb4d7d7db6..2829fa2f58 100644 --- a/validphys2/src/validphys/dataplots.py +++ b/validphys2/src/validphys/dataplots.py @@ -2,8 +2,6 @@ Plots of relations between data PDFs and fits. """ -from __future__ import generator_stop - from collections import defaultdict from collections.abc import Sequence import itertools @@ -28,7 +26,7 @@ from validphys.core import CutsPolicy, MCStats, cut_mask from validphys.plotoptions.core import get_info, kitable, transform_result from validphys.results import chi2_stat_labels, chi2_stats -from validphys.sumrules import POL_LIMS, partial_polarized_sum_rules +from validphys.sumrules import POL_LIMS from validphys.utils import sane_groupby_iter, scale_from_grid, split_ranges log = logging.getLogger(__name__) @@ -301,9 +299,7 @@ def _plot_fancy_impl( min_vals = [] max_vals = [] fig, ax = plotutils.subplots() - ax.set_title( - "{} {}".format(info.dataset_label, info.group_label(samefig_vals, info.figure_by)) - ) + ax.set_title(f"{info.dataset_label} {info.group_label(samefig_vals, info.figure_by)}") lineby = sane_groupby_iter(fig_data, info.line_by) @@ -1510,7 +1506,7 @@ def next_options(): # if group is None then make sure that shows on legend. key = str(group) elif marker_by == "kinematics": - key = None + key = None else: raise ValueError('Unknown marker_by value') @@ -1542,8 +1538,10 @@ def next_options(): # This is to get the label key coords = [], [] if marker_by == "kinematics": - ht_magnitude = np.concatenate( cvdict[key]) / (coords[1] * (1 - coords[0]) ) - out = ax.scatter(*coords, marker='.', c=ht_magnitude, cmap="viridis", norm=mcolors.LogNorm()) + ht_magnitude = np.concatenate(cvdict[key]) / (coords[1] * (1 - coords[0])) + out = ax.scatter( + *coords, marker='.', c=ht_magnitude, cmap="viridis", norm=mcolors.LogNorm() + ) clb = fig.colorbar(out) clb.ax.set_title(r'$F_\mathrm{exp}\frac{1}{Q^2(1-x)}$') ax.plot(*coords, label=key, markeredgewidth=1, markeredgecolor=None, **key_options[key]) diff --git a/validphys2/src/validphys/results.py b/validphys2/src/validphys/results.py index 2b2af3dbba..ac670657f9 100644 --- a/validphys2/src/validphys/results.py +++ b/validphys2/src/validphys/results.py @@ -284,22 +284,29 @@ def group_kin_table_no_table(groups_data, groups_index): """Generate a table containing the kinematics and the process_type.""" result_records = [] for group_data in groups_data: - group_cd = group_data.load_commondata() - cd = np.concatenate( - [group_cd[i].commondata_table[['kin1','kin2','kin3','process']] for i in range(len(group_cd))], - axis=0 - ) - for index, dataset in enumerate(cd): - try: - process_name = dataset[3].name - except AttributeError: - process_name = dataset[3] - result_records.append( - dict([("kin_1", dataset[0]), - ("kin_2", dataset[1]), - ("kin_3", dataset[2]), - ("process_type", process_name)]) + group_cd = group_data.load_commondata() + cd = np.concatenate( + [ + group_cd[i].commondata_table[['kin1', 'kin2', 'kin3', 'process']] + for i in range(len(group_cd)) + ], + axis=0, ) + for index, dataset in enumerate(cd): + try: + process_name = dataset[3].name + except AttributeError: + process_name = dataset[3] + result_records.append( + dict( + [ + ("kin_1", dataset[0]), + ("kin_2", dataset[1]), + ("kin_3", dataset[2]), + ("process_type", process_name), + ] + ) + ) if not result_records: log.warning("Empty records for group results") diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index 10bcb21dd9..6bd4db09f2 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -5,12 +5,9 @@ from collections import defaultdict, namedtuple import logging -import operator import numpy as np import pandas as pd -import scipy.linalg as la -import scipy.interpolate as scint from reportengine import collect from reportengine.table import table @@ -19,12 +16,13 @@ from validphys.results import results, results_central from validphys.core import PDF from validphys.process_options import _Process +from validphys.results import results, results_central +import validphys.theorycovariance.higher_twist_functions as ht_func from validphys.theorycovariance.theorycovarianceutils import ( check_correct_theory_combination, check_fit_dataset_order_matches_grouped, process_lookup, ) -import validphys.theorycovariance.higher_twist_functions as ht_func log = logging.getLogger(__name__) @@ -117,154 +115,161 @@ def combine_by_type_ht(each_dataset_results, groups_dataset_inputs_loaded_cd_wit return process_info -def thcov_shifts_ht(ht_parameters, - ht_included_proc, - ht_excluded_exp, - groups_data_by_process, - pdf): - """ - Same as `thcov_HT` but implementing theory covariance method for each node of the spline. - Note that 'groups_data_by_process' contains the same info as 'combine_by_type_ht'. At some - point we should use only one of them. - """ - groups_data = groups_data_by_process - start_proc_by_exp = defaultdict(list) - ndata_by_exp = defaultdict(list) - deltas = defaultdict(list) - running_index_tot = 0 - - HT = {} - HT_func = {} - - for par in ht_parameters: - if len(par['list']) != len(par['nodes']): - raise ValueError(f"The length of nodes does not match that of the list in {par['ht']}. Check the runcard.\n \ - {len(par['list'])} vs. {len(par['nodes'])}") - - HT[par['ht']] = { - "list": par['list'], - "nodes": par['nodes'] - } - - HT_func[par['ht']] = None - - # Initialise shifts according to 5pt - for idx_node, _ in enumerate(par['nodes']): - deltas[par['ht'] + f"({idx_node})"] = np.array([]) - - for idx_proc, group_proc in enumerate(groups_data): - for idx_exp, exp_set in enumerate(group_proc.datasets): - # For covmat construction - exp_ndata = exp_set.load_commondata().ndata - start_proc_by_exp[exp_set.name] = running_index_tot - running_index_tot += exp_ndata - ndata_by_exp[exp_set.name] = exp_ndata - - for ht in HT_func.keys(): - HT_func[ht] = ht_func.null_func(exp_ndata) - - if group_proc.name in ht_included_proc and exp_set.name not in ht_excluded_exp: - cd_table = exp_set.load_commondata().commondata_table - process_type = cd_table['process'].iloc[0] - - if isinstance(process_type, _Process): - process_type = process_type.name - - x = cd_table['kin1'].to_numpy() - q2 = cd_table['kin2'].to_numpy() - y = cd_table['kin3'].to_numpy() - - if x.size != exp_ndata: - raise ValueError("Problem with the number of data.") - - # NMC_NC_NOTFIXED_DW_EM-F2 - if process_type == "DIS_F2R": - HT_func['H2p'], HT_func['H2d'] = ht_func.DIS_F2R_ht(exp_set, pdf, HT["H2p"], HT["H2d"], x, q2) - - elif process_type == "DIS_F2P": - HT_func['H2p'] = ht_func.DIS_F2_ht(HT["H2p"], x, q2) +def thcov_shifts_ht(ht_parameters, ht_included_proc, ht_excluded_exp, groups_data_by_process, pdf): + """ + Same as `thcov_HT` but implementing theory covariance method for each node of the spline. + Note that 'groups_data_by_process' contains the same info as 'combine_by_type_ht'. At some + point we should use only one of them. + """ + groups_data = groups_data_by_process + start_proc_by_exp = defaultdict(list) + ndata_by_exp = defaultdict(list) + deltas = defaultdict(list) + running_index_tot = 0 + + HT = {} + HT_func = {} + + for par in ht_parameters: + if len(par['list']) != len(par['nodes']): + raise ValueError( + f"The length of nodes does not match that of the list in {par['ht']}. Check the runcard.\n \ + {len(par['list'])} vs. {len(par['nodes'])}" + ) - elif process_type == "DIS_F2D": - HT_func['H2d'] = ht_func.DIS_F2_ht(HT["H2d"], x, q2) + HT[par['ht']] = {"list": par['list'], "nodes": par['nodes']} - # EMC - elif process_type == "DIS_F2C": - HT_func['H2p'], HT_func['H2d'] = ht_func.DIS_F2C_ht(HT['H2p'], HT['H2d'], x, q2) + HT_func[par['ht']] = None - # HERA NC - elif process_type in ["DIS_NCE", "DIS_NCP", "DIS_NCP_CH", "DIS_NCE_BT"]: - HT_func['H2p'], HT_func["HLp"] = ht_func.DIS_NC_ht(HT['H2p'], HT['HLp'], x, q2, y) + # Initialise shifts according to 5pt + for idx_node, _ in enumerate(par['nodes']): + deltas[par['ht'] + f"({idx_node})"] = np.array([]) - #CHORUS - elif process_type in ["DIS_SNU", "DIS_SNB"]: - # Lead target:: - A = 208.0 - Z = 82 - if process_type == "DIS_SNU": + for idx_proc, group_proc in enumerate(groups_data): + for idx_exp, exp_set in enumerate(group_proc.datasets): + # For covmat construction + exp_ndata = exp_set.load_commondata().ndata + start_proc_by_exp[exp_set.name] = running_index_tot + running_index_tot += exp_ndata + ndata_by_exp[exp_set.name] = exp_ndata + + for ht in HT_func.keys(): + HT_func[ht] = ht_func.null_func(exp_ndata) + + if group_proc.name in ht_included_proc and exp_set.name not in ht_excluded_exp: + cd_table = exp_set.load_commondata().commondata_table + process_type = cd_table['process'].iloc[0] + + if isinstance(process_type, _Process): + process_type = process_type.name + + x = cd_table['kin1'].to_numpy() + q2 = cd_table['kin2'].to_numpy() + y = cd_table['kin3'].to_numpy() + + if x.size != exp_ndata: + raise ValueError("Problem with the number of data.") + + # NMC_NC_NOTFIXED_DW_EM-F2 + if process_type == "DIS_F2R": + HT_func['H2p'], HT_func['H2d'] = ht_func.DIS_F2R_ht( + exp_set, pdf, HT["H2p"], HT["H2d"], x, q2 + ) + + elif process_type == "DIS_F2P": + HT_func['H2p'] = ht_func.DIS_F2_ht(HT["H2p"], x, q2) + + elif process_type == "DIS_F2D": + HT_func['H2d'] = ht_func.DIS_F2_ht(HT["H2d"], x, q2) + + # EMC + elif process_type == "DIS_F2C": + HT_func['H2p'], HT_func['H2d'] = ht_func.DIS_F2C_ht(HT['H2p'], HT['H2d'], x, q2) + + # HERA NC + elif process_type in ["DIS_NCE", "DIS_NCP", "DIS_NCP_CH", "DIS_NCE_BT"]: + HT_func['H2p'], HT_func["HLp"] = ht_func.DIS_NC_ht( + HT['H2p'], HT['HLp'], x, q2, y + ) + + # CHORUS + elif process_type in ["DIS_SNU", "DIS_SNB"]: + # Lead target:: + A = 208.0 + Z = 82 + if process_type == "DIS_SNU": l = 0 - elif process_type == "DIS_SNB": + elif process_type == "DIS_SNB": l = 1 - DIS_NU = ht_func.DIS_SNU(HT, (A,Z), (x,q2,y), Mh=0.938, Mw=80.398, lepton=l) - HT_func['H2p'] = DIS_NU.PC_2_p - HT_func['H2d'] = DIS_NU.PC_2_d - HT_func["HLp"] = DIS_NU.PC_L_p - HT_func["HLd"] = DIS_NU.PC_L_d - HT_func["H3p"] = DIS_NU.PC_3_p - HT_func["H3d"] = DIS_NU.PC_3_d - - #NuTeV - elif process_type in ["DIS_DM_NU", "DIS_DM_NB"]: - # Iron target - Z = 23.403 - A = 49.618 - if process_type == "DIS_SNU": + DIS_NU = ht_func.DIS_SNU(HT, (A, Z), (x, q2, y), Mh=0.938, Mw=80.398, lepton=l) + HT_func['H2p'] = DIS_NU.PC_2_p + HT_func['H2d'] = DIS_NU.PC_2_d + HT_func["HLp"] = DIS_NU.PC_L_p + HT_func["HLd"] = DIS_NU.PC_L_d + HT_func["H3p"] = DIS_NU.PC_3_p + HT_func["H3d"] = DIS_NU.PC_3_d + + # NuTeV + elif process_type in ["DIS_DM_NU", "DIS_DM_NB"]: + # Iron target + Z = 23.403 + A = 49.618 + if process_type == "DIS_SNU": l = 0 - elif process_type == "DIS_SNB": + elif process_type == "DIS_SNB": l = 1 - DIS_NuTeV = ht_func.DIS_NUTEV(HT, (A,Z), (x,q2,y), Mh=0.938, Mw=80.398, lepton=l) - HT_func['H2p'] = DIS_NuTeV.PC_2_p - HT_func['H2d'] = DIS_NuTeV.PC_2_d - HT_func["HLp"] = DIS_NuTeV.PC_L_p - HT_func["HLd"] = DIS_NuTeV.PC_L_d - HT_func["H3p"] = DIS_NuTeV.PC_3_p - HT_func["H3d"] = DIS_NuTeV.PC_3_d - - #HERA_CC - elif process_type in ["DIS_CCE", "DIS_CCP"]: - if process_type == "DIS_CCE": + DIS_NuTeV = ht_func.DIS_NUTEV( + HT, (A, Z), (x, q2, y), Mh=0.938, Mw=80.398, lepton=l + ) + HT_func['H2p'] = DIS_NuTeV.PC_2_p + HT_func['H2d'] = DIS_NuTeV.PC_2_d + HT_func["HLp"] = DIS_NuTeV.PC_L_p + HT_func["HLd"] = DIS_NuTeV.PC_L_d + HT_func["H3p"] = DIS_NuTeV.PC_3_p + HT_func["H3d"] = DIS_NuTeV.PC_3_d + + # HERA_CC + elif process_type in ["DIS_CCE", "DIS_CCP"]: + if process_type == "DIS_CCE": l = 0 - elif process_type == "DIS_CCP": + elif process_type == "DIS_CCP": l = 1 - DIS_CC_HERA = ht_func.DIS_HERA_CC(HT, (x,q2,y), Mh=0.938, Mw=80.398, lepton=l) - HT_func['H2p'] = DIS_CC_HERA.PC_2_p - HT_func['H2d'] = DIS_CC_HERA.PC_2_d - HT_func["HLp"] = DIS_CC_HERA.PC_L_p - HT_func["HLd"] = DIS_CC_HERA.PC_L_d - HT_func["H3p"] = DIS_CC_HERA.PC_3_p - HT_func["H3d"] = DIS_CC_HERA.PC_3_d - else: - raise Exception(f"The process type `{process_type}` in `{exp_set.name} has not been implemented.") - - for ht in HT.keys(): + DIS_CC_HERA = ht_func.DIS_HERA_CC(HT, (x, q2, y), Mh=0.938, Mw=80.398, lepton=l) + HT_func['H2p'] = DIS_CC_HERA.PC_2_p + HT_func['H2d'] = DIS_CC_HERA.PC_2_d + HT_func["HLp"] = DIS_CC_HERA.PC_L_p + HT_func["HLd"] = DIS_CC_HERA.PC_L_d + HT_func["H3p"] = DIS_CC_HERA.PC_3_p + HT_func["H3d"] = DIS_CC_HERA.PC_3_d + else: + raise Exception( + f"The process type `{process_type}` in `{exp_set.name} has not been implemented." + ) + + for ht in HT.keys(): for idx_node in range(len(HT[ht]['nodes'])): - shifted_list = ht_func.beta_tilde_5pt(HT[ht]['list'], idx_node) - deltas[ht + f"({idx_node})"] = np.append(deltas[ht + f"({idx_node})"], HT_func[ht](shifted_list)) + shifted_list = ht_func.beta_tilde_5pt(HT[ht]['list'], idx_node) + deltas[ht + f"({idx_node})"] = np.append( + deltas[ht + f"({idx_node})"], HT_func[ht](shifted_list) + ) - # Construct the covariance matrix - covmats = defaultdict(list) - for exp_name_1, exp_idx_1 in start_proc_by_exp.items(): - for exp_name_2, exp_idx_2 in start_proc_by_exp.items(): - s = np.zeros(shape=(ndata_by_exp[exp_name_1], ndata_by_exp[exp_name_2])) - for shifts in deltas.keys(): - s += np.outer(deltas[shifts][exp_idx_1: exp_idx_1 + ndata_by_exp[exp_name_1]], - deltas[shifts][exp_idx_2: exp_idx_2 + ndata_by_exp[exp_name_2]]) + # Construct the covariance matrix + covmats = defaultdict(list) + for exp_name_1, exp_idx_1 in start_proc_by_exp.items(): + for exp_name_2, exp_idx_2 in start_proc_by_exp.items(): + s = np.zeros(shape=(ndata_by_exp[exp_name_1], ndata_by_exp[exp_name_2])) + for shifts in deltas.keys(): + s += np.outer( + deltas[shifts][exp_idx_1 : exp_idx_1 + ndata_by_exp[exp_name_1]], + deltas[shifts][exp_idx_2 : exp_idx_2 + ndata_by_exp[exp_name_2]], + ) - start_locs = (exp_idx_1, exp_idx_2) - covmats[start_locs] = s + start_locs = (exp_idx_1, exp_idx_2) + covmats[start_locs] = s - return covmats, deltas + return covmats, deltas def thcov_ht(thcov_shifts_ht, table_ht_deltas): @@ -736,4 +741,4 @@ def experimentplustheory_corrmat_custom(procs_covmat, theory_covmat_custom): each_dataset_results = collect(results, ("group_dataset_inputs_by_process", "data")) -groups_data_by_process = collect("data", ("group_dataset_inputs_by_process",)) \ No newline at end of file +groups_data_by_process = collect("data", ("group_dataset_inputs_by_process",)) diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py index 0e16b62689..bfdc4bbf2f 100644 --- a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -1,192 +1,164 @@ +import operator + import numpy as np -import pandas as pd -import scipy.linalg as la import scipy.interpolate as scint -from validphys.convolution import central_fk_predictions -import operator +from validphys.convolution import central_fk_predictions GEV_CM2_CONV = 3.893793e10 -GF = 1.1663787e-05 # Fermi's constant [GeV^-2] +GF = 1.1663787e-05 # Fermi's constant [GeV^-2] + def beta_tilde_5pt(delta_h, idx): - shifted_list = [0 for _ in range(len(delta_h))] - shifted_list[idx] = delta_h[idx] - return shifted_list + shifted_list = [0 for _ in range(len(delta_h))] + shifted_list[idx] = delta_h[idx] + return shifted_list + -def ht_parametrisation( - delta_h: list, - nodes: list, - x: list, - Q2: list, - ): - H = scint.CubicSpline(nodes, delta_h) - H = np.vectorize(H) +def ht_parametrisation(delta_h: list, nodes: list, x: list, Q2: list): + H = scint.CubicSpline(nodes, delta_h) + H = np.vectorize(H) - PC = H(x) / Q2 - return PC + PC = H(x) / Q2 + return PC def null_func(size): """Auxiliary function used to return arrays of zeros for those datasets that don't require higher twist.""" + def zeros(list): return np.zeros(size) + return zeros -def DIS_F2R_ht(experiment, - pdf, - H2p_dict, - H2d_dict, - x, - q2): - cuts = experiment.cuts - fkspec_F2D, fkspec_F2P = experiment.fkspecs - fk_F2D = fkspec_F2D.load_with_cuts(cuts) - fk_F2P = fkspec_F2P.load_with_cuts(cuts) - F2D = central_fk_predictions(fk_F2D, pdf) - F2P = central_fk_predictions(fk_F2P, pdf) - - F2D = np.concatenate(F2D.values) - F2P = np.concatenate(F2P.values) - F2_ratio = operator.truediv(F2D, F2P) - - - def PC_2_p(list): - PC_p = ht_parametrisation(list, H2p_dict["nodes"], x, q2) - result = np.array(operator.truediv(F2D, np.sum([F2P, PC_p],axis=0)) - F2_ratio) - return result - - def PC_2_d(list): - PC_d = ht_parametrisation(list, H2d_dict["nodes"], x, q2) - result = np.array(operator.truediv(np.sum([F2D, PC_d],axis=0), F2P) - F2_ratio) - return result - - return PC_2_p, PC_2_d +def DIS_F2R_ht(experiment, pdf, H2p_dict, H2d_dict, x, q2): + cuts = experiment.cuts + fkspec_F2D, fkspec_F2P = experiment.fkspecs + fk_F2D = fkspec_F2D.load_with_cuts(cuts) + fk_F2P = fkspec_F2P.load_with_cuts(cuts) + F2D = central_fk_predictions(fk_F2D, pdf) + F2P = central_fk_predictions(fk_F2P, pdf) + + F2D = np.concatenate(F2D.values) + F2P = np.concatenate(F2P.values) + F2_ratio = operator.truediv(F2D, F2P) + + def PC_2_p(list): + PC_p = ht_parametrisation(list, H2p_dict["nodes"], x, q2) + result = np.array(operator.truediv(F2D, np.sum([F2P, PC_p], axis=0)) - F2_ratio) + return result + + def PC_2_d(list): + PC_d = ht_parametrisation(list, H2d_dict["nodes"], x, q2) + result = np.array(operator.truediv(np.sum([F2D, PC_d], axis=0), F2P) - F2_ratio) + return result + + return PC_2_p, PC_2_d def DIS_F2_ht(H2_dict, x, q2): - def PC_2(list): - result = ht_parametrisation(list, H2_dict['nodes'], x, q2) - return result - - return PC_2 - - -def DIS_F2C_ht(H2p_dict, - H2d_dict, - x, - q2): - # Iron target - Z = 23.403 - A = 49.618 - - def PC_2_p(list): - result = ht_parametrisation(list, H2p_dict['nodes'], x, q2) - return result - - def PC_2_d(list): - result = 2 * (Z - A) / A * ht_parametrisation(list, H2d_dict['nodes'], x, q2) - return result - - return PC_2_p, PC_2_d - - -def DIS_NC_ht(H2_dict, - HL_dict, - x, - q2, - y): - yp = 1 + np.power(1 - y, 2) - yL = np.power(y, 2) - N_L = - yL / yp - - def PC_2(list): - return ht_parametrisation(list, H2_dict['nodes'], x, q2) - - def PC_L(list): - return N_L * ht_parametrisation(list, HL_dict['nodes'], x, q2) - - return PC_2, PC_L + def PC_2(list): + result = ht_parametrisation(list, H2_dict['nodes'], x, q2) + return result + + return PC_2 + + +def DIS_F2C_ht(H2p_dict, H2d_dict, x, q2): + # Iron target + Z = 23.403 + A = 49.618 + + def PC_2_p(list): + result = ht_parametrisation(list, H2p_dict['nodes'], x, q2) + return result + + def PC_2_d(list): + result = 2 * (Z - A) / A * ht_parametrisation(list, H2d_dict['nodes'], x, q2) + return result + + return PC_2_p, PC_2_d + + +def DIS_NC_ht(H2_dict, HL_dict, x, q2, y): + yp = 1 + np.power(1 - y, 2) + yL = np.power(y, 2) + N_L = -yL / yp + + def PC_2(list): + return ht_parametrisation(list, H2_dict['nodes'], x, q2) + + def PC_L(list): + return N_L * ht_parametrisation(list, HL_dict['nodes'], x, q2) + + return PC_2, PC_L class DIS_SNU: - def __init__(self, - HT_dict, - target_tuple, #(A,Z) - kin_tuple, - Mh, - Mw, - lepton): - - # Lead target - A = target_tuple[0] - Z = target_tuple[1] - x = kin_tuple[0] - q2 = kin_tuple[1] - y = kin_tuple[2] - self.nuclear_target = 2 * (Z - A) / A - self.Mh = Mh #0.938 GeV - self.Mw2 = np.power(Mw, 2) # GeV^2 - self.yp = 1 + np.power(1 - y, 2) - 2 * np.power( x * y * Mh, 2) / q2 - self.yL = np.power(y, 2) - self.ym = 1 - np.power(1 - y, 2) - self.N = GEV_CM2_CONV * (GF ** 2) * Mh / ( 2 * np.pi * np.power( 1 + q2 / self.Mw2, 2) ) * self.yp - self.H2p_dict = HT_dict['H2p'] - self.H2d_dict = HT_dict['H2d'] - self.HLp_dict = HT_dict['HLp'] - self.HLd_dict = HT_dict['HLd'] - self.H3p_dict = HT_dict['H3p'] - self.H3d_dict = HT_dict['H3d'] - - self.x = x - self.q2 = q2 - self.l = lepton - - def PC_2_p(self, list): - norm = self.N - return norm * ht_parametrisation(list, self.H2p_dict['nodes'], self.x, self.q2) - - def PC_2_d(self, list): - norm = self.N * self.nuclear_target - return norm * ht_parametrisation(list, self.H2d_dict['nodes'], self.x, self.q2) - - def PC_L_p(self, list): - norm = - self.N * self.yL / self.yp - return norm * ht_parametrisation(list, self.HLp_dict['nodes'], self.x, self.q2) - - def PC_L_d(self, list): - norm = - self.N * self.yL / self.yp * self.nuclear_target - return norm * ht_parametrisation(list, self.HLd_dict['nodes'], self.x, self.q2) - - def PC_3_p(self, list): - norm = self.N * np.power(-1, self.l) * self.ym / self.yp * self.x - return norm * ht_parametrisation(list, self.H3p_dict['nodes'], self.x, self.q2) - - def PC_3_d(self, list): - norm = self.N * np.power(-1, self.l) * self.ym / self.yp * self.x * self.nuclear_target - return norm * ht_parametrisation(list, self.H3d_dict['nodes'], self.x, self.q2) - + def __init__(self, HT_dict, target_tuple, kin_tuple, Mh, Mw, lepton): # (A,Z) + + # Lead target + A = target_tuple[0] + Z = target_tuple[1] + x = kin_tuple[0] + q2 = kin_tuple[1] + y = kin_tuple[2] + self.nuclear_target = 2 * (Z - A) / A + self.Mh = Mh # 0.938 GeV + self.Mw2 = np.power(Mw, 2) # GeV^2 + self.yp = 1 + np.power(1 - y, 2) - 2 * np.power(x * y * Mh, 2) / q2 + self.yL = np.power(y, 2) + self.ym = 1 - np.power(1 - y, 2) + self.N = ( + GEV_CM2_CONV * (GF**2) * Mh / (2 * np.pi * np.power(1 + q2 / self.Mw2, 2)) * self.yp + ) + self.H2p_dict = HT_dict['H2p'] + self.H2d_dict = HT_dict['H2d'] + self.HLp_dict = HT_dict['HLp'] + self.HLd_dict = HT_dict['HLd'] + self.H3p_dict = HT_dict['H3p'] + self.H3d_dict = HT_dict['H3d'] + + self.x = x + self.q2 = q2 + self.l = lepton + + def PC_2_p(self, list): + norm = self.N + return norm * ht_parametrisation(list, self.H2p_dict['nodes'], self.x, self.q2) + + def PC_2_d(self, list): + norm = self.N * self.nuclear_target + return norm * ht_parametrisation(list, self.H2d_dict['nodes'], self.x, self.q2) + + def PC_L_p(self, list): + norm = -self.N * self.yL / self.yp + return norm * ht_parametrisation(list, self.HLp_dict['nodes'], self.x, self.q2) + + def PC_L_d(self, list): + norm = -self.N * self.yL / self.yp * self.nuclear_target + return norm * ht_parametrisation(list, self.HLd_dict['nodes'], self.x, self.q2) + + def PC_3_p(self, list): + norm = self.N * np.power(-1, self.l) * self.ym / self.yp * self.x + return norm * ht_parametrisation(list, self.H3p_dict['nodes'], self.x, self.q2) + + def PC_3_d(self, list): + norm = self.N * np.power(-1, self.l) * self.ym / self.yp * self.x * self.nuclear_target + return norm * ht_parametrisation(list, self.H3d_dict['nodes'], self.x, self.q2) + class DIS_NUTEV(DIS_SNU): - def __init__(self, HT_dict, - target_tuple, #(A,Z) - kin_tuple, - Mh, - Mw, - lepton): - super().__init__(HT_dict, target_tuple, kin_tuple, Mh, Mw, lepton) - self.N = 50 * self.yp / np.power( 1 + self.q2 / self.Mw2, 2) + def __init__(self, HT_dict, target_tuple, kin_tuple, Mh, Mw, lepton): # (A,Z) + super().__init__(HT_dict, target_tuple, kin_tuple, Mh, Mw, lepton) + self.N = 50 * self.yp / np.power(1 + self.q2 / self.Mw2, 2) class DIS_HERA_CC(DIS_SNU): - def __init__(self, HT_dict, - kin_tuple, - Mh, - Mw, - lepton): - super().__init__(HT_dict, (1,1), kin_tuple, Mh, Mw, lepton) - y = kin_tuple[2] - self.yp = 1 + np.power(1 - y, 2) - N = 1 / 4 * self.yp \ No newline at end of file + def __init__(self, HT_dict, kin_tuple, Mh, Mw, lepton): + super().__init__(HT_dict, (1, 1), kin_tuple, Mh, Mw, lepton) + y = kin_tuple[2] + self.yp = 1 + np.power(1 - y, 2) + N = 1 / 4 * self.yp From 2570a1a39c4148d0a634c123532a0bb733a368c3 Mon Sep 17 00:00:00 2001 From: achiefa Date: Tue, 14 Jan 2025 14:06:11 +0000 Subject: [PATCH 37/69] Update for new thcovmat construction + refactor + docstrings --- validphys2/src/validphys/config.py | 73 +- .../theorycovariance/construction.py | 333 +++---- .../higher_twist_functions.py | 880 +++++++++++++++--- 3 files changed, 892 insertions(+), 394 deletions(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 863892934a..cc13f9ffdc 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -1248,28 +1248,6 @@ def produce_loaded_user_covmat_path(self, user_covmat_path: str = ""): fileloc = l.check_vp_output_file(user_covmat_path) return fileloc - @configparser.explicit_node - def produce_covmat_custom(self, use_ht_uncertainties: bool = False): - if use_ht_uncertainties: - from validphys.theorycovariance.construction import thcov_ht - - return thcov_ht - else: - from validphys.theorycovariance.construction import covs_pt_prescrip - - return covs_pt_prescrip - - @configparser.explicit_node - def produce_combine_custom(self, use_ht_uncertainties: bool = False): - if use_ht_uncertainties: - from validphys.theorycovariance.construction import combine_by_type_ht - - return combine_by_type_ht - else: - from validphys.theorycovariance.construction import combine_by_type - - return combine_by_type - @configparser.explicit_node def produce_nnfit_theory_covmat( self, point_prescriptions: list = None, user_covmat_path: str = None @@ -1296,31 +1274,8 @@ def produce_nnfit_theory_covmat( from validphys.theorycovariance.construction import user_covmat_fitting f = user_covmat_fitting - elif use_ht_uncertainties: - # NOTE: this covmat is the same as for scale variations, which will result in a clash of - # table names if we wish to use them simultaneously - if use_user_uncertainties: - from validphys.theorycovariance.construction import total_theory_covmat_fitting - - f = total_theory_covmat_fitting - else: - from validphys.theorycovariance.construction import theory_covmat_custom_fitting - - f = theory_covmat_custom_fitting - @functools.wraps(f) - def res(*args, **kwargs): - return f(*args, **kwargs) - - # Set this to get the same filename regardless of the action. - res.__name__ = "theory_covmat" - return res - - @configparser.explicit_node - def produce_combine_by_type_custom(self, use_ht_uncertainties: bool = False): - if use_ht_uncertainties: - return validphys.theorycovariance.construction.combine_by_type_ht - return validphys.theorycovariance.construction.combine_by_type + return f def produce_fitthcovmat( self, use_thcovmat_if_present: bool = False, fit: (str, type(None)) = None @@ -1844,6 +1799,8 @@ def produce_theoryids(self, t0id, point_prescription): prescription. The options for the latter are defined in pointprescriptions.yaml. This hard codes the theories needed for each prescription to avoid user error.""" th = t0id.id + if point_prescription == 'power corrections': + return NSList([t0id], nskey="theoryid") lsv = yaml_safe.load(read_text(validphys.scalevariations, "scalevariationtheoryids.yaml")) @@ -1954,6 +1911,30 @@ def produce_total_phi_data(self, fitthcovmat): return validphys.results.total_phi_data_from_experiments return validphys.results.dataset_inputs_phi_data + # @configparser.explicit_node + def produce_power_corr_dict(self, pc_parameters=None): + """The parameters for the power corrections are given as a list. + This function converts this list into a dictionary with the keys + being the names of the types of power corrections (e.g. `H2p`, `H2d`,...). + """ + if pc_parameters is None: + return None + + pc_parameters_by_type = {} + # Loop over the parameterization for the power corrections in the runcard + for par in pc_parameters: + # Check that the length of shifts matches the length of nodes. + if len(par['yshift']) != len(par['nodes']): + raise ValueError( + f"The length of nodes does not match that of the list in {par['ht']}." + f"Check the runcard. Got {len(par['yshift'])} != {len(par['nodes'])}" + ) + + # Store parameters for each power correction + pc_parameters_by_type[par['ht']] = {'yshift': par['yshift'], 'nodes': par['nodes']} + + return pc_parameters_by_type + class Config(report.Config, CoreConfig): """The effective configuration parser class.""" diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index 6bd4db09f2..1b44c3ee1d 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -13,11 +13,9 @@ from reportengine.table import table pass -from validphys.results import results, results_central from validphys.core import PDF -from validphys.process_options import _Process from validphys.results import results, results_central -import validphys.theorycovariance.higher_twist_functions as ht_func +from validphys.theorycovariance.higher_twist_functions import compute_deltas_pc from validphys.theorycovariance.theorycovarianceutils import ( check_correct_theory_combination, check_fit_dataset_order_matches_grouped, @@ -51,10 +49,10 @@ def theory_covmat_dataset(results, results_central_bytheoryids, point_prescripti return thcovmat -ProcessInfo = namedtuple("ProcessInfo", ("preds", "namelist", "sizes", "data")) +ProcessInfo = namedtuple("ProcessInfo", ("preds", "namelist", "sizes", "data", "data_spec")) -def combine_by_type(each_dataset_results_central_bytheory): +def combine_by_type(each_dataset_results_central_bytheory, groups_data_by_process): """Groups the datasets by process and returns an instance of the ProcessInfo class Parameters @@ -72,6 +70,7 @@ def combine_by_type(each_dataset_results_central_bytheory): dataset_size = defaultdict(list) theories_by_process = defaultdict(list) ordered_names = defaultdict(list) + data_spec = defaultdict(list) for dataset in each_dataset_results_central_bytheory: name = dataset[0][0].name theory_centrals = [x[1].central_value for x in dataset] @@ -81,217 +80,23 @@ def combine_by_type(each_dataset_results_central_bytheory): theories_by_process[proc_type].append(theory_centrals) for key, item in theories_by_process.items(): theories_by_process[key] = np.concatenate(item, axis=1) - process_info = ProcessInfo( - preds=theories_by_process, namelist=ordered_names, sizes=dataset_size, data=None - ) - return process_info + # Store DataGroupSpecs instances + for group_proc in groups_data_by_process: + for exp_set in group_proc.datasets: + data_spec[exp_set.name] = exp_set -def combine_by_type_ht(each_dataset_results, groups_dataset_inputs_loaded_cd_with_cuts_byprocess): - """same as combine_by_type but now for a single theory and including commondata info""" - dataset_size = defaultdict(list) - theories_by_process = defaultdict(list) - cd_by_process = defaultdict(list) - ordered_names = defaultdict(list) - for dataset, cd in zip( - each_dataset_results, groups_dataset_inputs_loaded_cd_with_cuts_byprocess - ): - name = cd.setname - if name != dataset[0].name: - raise ValueError("The underlying datasets do not match!") - theory_centrals = [x.central_value for x in dataset] - dataset_size[name] = len(theory_centrals[0]) - proc_type = process_lookup(name) - ordered_names[proc_type].append(name) - cd_by_process[proc_type].append(cd.kinematics.values) - theories_by_process[proc_type].append(theory_centrals) - - for key in theories_by_process.keys(): - theories_by_process[key] = np.concatenate(theories_by_process[key], axis=1) - cd_by_process[key] = np.concatenate(cd_by_process[key], axis=0) process_info = ProcessInfo( - preds=theories_by_process, namelist=ordered_names, sizes=dataset_size, data=cd_by_process + preds=theories_by_process, + namelist=ordered_names, + sizes=dataset_size, + data=None, + data_spec=data_spec, ) return process_info -def thcov_shifts_ht(ht_parameters, ht_included_proc, ht_excluded_exp, groups_data_by_process, pdf): - """ - Same as `thcov_HT` but implementing theory covariance method for each node of the spline. - Note that 'groups_data_by_process' contains the same info as 'combine_by_type_ht'. At some - point we should use only one of them. - """ - groups_data = groups_data_by_process - start_proc_by_exp = defaultdict(list) - ndata_by_exp = defaultdict(list) - deltas = defaultdict(list) - running_index_tot = 0 - - HT = {} - HT_func = {} - - for par in ht_parameters: - if len(par['list']) != len(par['nodes']): - raise ValueError( - f"The length of nodes does not match that of the list in {par['ht']}. Check the runcard.\n \ - {len(par['list'])} vs. {len(par['nodes'])}" - ) - - HT[par['ht']] = {"list": par['list'], "nodes": par['nodes']} - - HT_func[par['ht']] = None - - # Initialise shifts according to 5pt - for idx_node, _ in enumerate(par['nodes']): - deltas[par['ht'] + f"({idx_node})"] = np.array([]) - - for idx_proc, group_proc in enumerate(groups_data): - for idx_exp, exp_set in enumerate(group_proc.datasets): - # For covmat construction - exp_ndata = exp_set.load_commondata().ndata - start_proc_by_exp[exp_set.name] = running_index_tot - running_index_tot += exp_ndata - ndata_by_exp[exp_set.name] = exp_ndata - - for ht in HT_func.keys(): - HT_func[ht] = ht_func.null_func(exp_ndata) - - if group_proc.name in ht_included_proc and exp_set.name not in ht_excluded_exp: - cd_table = exp_set.load_commondata().commondata_table - process_type = cd_table['process'].iloc[0] - - if isinstance(process_type, _Process): - process_type = process_type.name - - x = cd_table['kin1'].to_numpy() - q2 = cd_table['kin2'].to_numpy() - y = cd_table['kin3'].to_numpy() - - if x.size != exp_ndata: - raise ValueError("Problem with the number of data.") - - # NMC_NC_NOTFIXED_DW_EM-F2 - if process_type == "DIS_F2R": - HT_func['H2p'], HT_func['H2d'] = ht_func.DIS_F2R_ht( - exp_set, pdf, HT["H2p"], HT["H2d"], x, q2 - ) - - elif process_type == "DIS_F2P": - HT_func['H2p'] = ht_func.DIS_F2_ht(HT["H2p"], x, q2) - - elif process_type == "DIS_F2D": - HT_func['H2d'] = ht_func.DIS_F2_ht(HT["H2d"], x, q2) - - # EMC - elif process_type == "DIS_F2C": - HT_func['H2p'], HT_func['H2d'] = ht_func.DIS_F2C_ht(HT['H2p'], HT['H2d'], x, q2) - - # HERA NC - elif process_type in ["DIS_NCE", "DIS_NCP", "DIS_NCP_CH", "DIS_NCE_BT"]: - HT_func['H2p'], HT_func["HLp"] = ht_func.DIS_NC_ht( - HT['H2p'], HT['HLp'], x, q2, y - ) - - # CHORUS - elif process_type in ["DIS_SNU", "DIS_SNB"]: - # Lead target:: - A = 208.0 - Z = 82 - if process_type == "DIS_SNU": - l = 0 - elif process_type == "DIS_SNB": - l = 1 - - DIS_NU = ht_func.DIS_SNU(HT, (A, Z), (x, q2, y), Mh=0.938, Mw=80.398, lepton=l) - HT_func['H2p'] = DIS_NU.PC_2_p - HT_func['H2d'] = DIS_NU.PC_2_d - HT_func["HLp"] = DIS_NU.PC_L_p - HT_func["HLd"] = DIS_NU.PC_L_d - HT_func["H3p"] = DIS_NU.PC_3_p - HT_func["H3d"] = DIS_NU.PC_3_d - - # NuTeV - elif process_type in ["DIS_DM_NU", "DIS_DM_NB"]: - # Iron target - Z = 23.403 - A = 49.618 - if process_type == "DIS_SNU": - l = 0 - elif process_type == "DIS_SNB": - l = 1 - - DIS_NuTeV = ht_func.DIS_NUTEV( - HT, (A, Z), (x, q2, y), Mh=0.938, Mw=80.398, lepton=l - ) - HT_func['H2p'] = DIS_NuTeV.PC_2_p - HT_func['H2d'] = DIS_NuTeV.PC_2_d - HT_func["HLp"] = DIS_NuTeV.PC_L_p - HT_func["HLd"] = DIS_NuTeV.PC_L_d - HT_func["H3p"] = DIS_NuTeV.PC_3_p - HT_func["H3d"] = DIS_NuTeV.PC_3_d - - # HERA_CC - elif process_type in ["DIS_CCE", "DIS_CCP"]: - if process_type == "DIS_CCE": - l = 0 - elif process_type == "DIS_CCP": - l = 1 - DIS_CC_HERA = ht_func.DIS_HERA_CC(HT, (x, q2, y), Mh=0.938, Mw=80.398, lepton=l) - HT_func['H2p'] = DIS_CC_HERA.PC_2_p - HT_func['H2d'] = DIS_CC_HERA.PC_2_d - HT_func["HLp"] = DIS_CC_HERA.PC_L_p - HT_func["HLd"] = DIS_CC_HERA.PC_L_d - HT_func["H3p"] = DIS_CC_HERA.PC_3_p - HT_func["H3d"] = DIS_CC_HERA.PC_3_d - else: - raise Exception( - f"The process type `{process_type}` in `{exp_set.name} has not been implemented." - ) - - for ht in HT.keys(): - for idx_node in range(len(HT[ht]['nodes'])): - shifted_list = ht_func.beta_tilde_5pt(HT[ht]['list'], idx_node) - deltas[ht + f"({idx_node})"] = np.append( - deltas[ht + f"({idx_node})"], HT_func[ht](shifted_list) - ) - - # Construct the covariance matrix - covmats = defaultdict(list) - for exp_name_1, exp_idx_1 in start_proc_by_exp.items(): - for exp_name_2, exp_idx_2 in start_proc_by_exp.items(): - s = np.zeros(shape=(ndata_by_exp[exp_name_1], ndata_by_exp[exp_name_2])) - for shifts in deltas.keys(): - s += np.outer( - deltas[shifts][exp_idx_1 : exp_idx_1 + ndata_by_exp[exp_name_1]], - deltas[shifts][exp_idx_2 : exp_idx_2 + ndata_by_exp[exp_name_2]], - ) - - start_locs = (exp_idx_1, exp_idx_2) - covmats[start_locs] = s - - return covmats, deltas - - -def thcov_ht(thcov_shifts_ht, table_ht_deltas): - covmat, _ = thcov_shifts_ht - return covmat - - -@table -def table_ht_deltas(thcov_shifts_ht, procs_index, combine_by_type_custom): - _, deltas = thcov_shifts_ht - process_info = combine_by_type_custom - indexlist = [] - for procname in process_info.preds: - for datasetname in process_info.namelist[procname]: - slicer = procs_index.get_locs((procname, datasetname)) - indexlist += procs_index[slicer].to_list() - covmat_index = pd.MultiIndex.from_tuples(indexlist, names=procs_index.names) - df = pd.DataFrame(deltas, index=covmat_index, columns=deltas.keys()) - return df - - -def covmat_3fpt(name1, name2, deltas1, deltas2): +def covmat_3fpt(deltas1, deltas2): """Returns theory covariance sub-matrix for 3pt factorisation scale variation *only*, given two dataset names and collections of scale variation shifts""" @@ -414,6 +219,40 @@ def covmat_n3lo_ad(name1, name2, deltas1, deltas2): return 1 / norm * s +def covmat_power_corrections(deltas1, deltas2): + """Returns the the theory covariance sub-matrix for power + corrections. The two arguments `deltas1` and `deltas2` contain + the shifts for the firs and second experiment, respectively. + + The shifts are given in this form: + ``` + deltas1 = {shift1_label: array1_of_shifts1, + shift2_label: array1_of_shifts2, + shift3_label: array1_of_shifts3, + ...} + deltas2 = {shift1_label: array2_of_shifts1, + shift2_label: array2_of_shifts2, + shift3_label: array2_of_shifts3, + ...} + ``` + The sub-matrix is computed using the 5-point prescription, thus + + s = array1_of_shifts1 X array2_of_shifts1 + array1_of_shifts2 X array2_of_shifts2 + ... + + where `X` is the outer product. + """ + # Check that `deltas1` and `deltas2` have the same shifts + if deltas1.keys() != deltas2.keys(): + raise RuntimeError('The two dictionaries do not contain the same shifts.') + + size1 = next(iter(deltas1.values())).size + size2 = next(iter(deltas2.values())).size + s = np.zeros(shape=(size1, size2)) + for shift in deltas1.keys(): + s += np.outer(deltas1[shift], deltas2[shift]) + return s + + def compute_covs_pt_prescrip(point_prescription, name1, deltas1, name2=None, deltas2=None): """Utility to compute the covariance matrix by prescription given the shifts with respect to the central value for a pair of processes. @@ -484,46 +323,82 @@ def compute_covs_pt_prescrip(point_prescription, name1, deltas1, name2=None, del # alphas is correlated for all datapoints and the covmat construction is # therefore equivalent to that of the factorization scale variations s = covmat_3fpt(deltas1, deltas2) + elif point_prescription == 'power corrections': + # Shifts computed from power corrected predictions + s = covmat_power_corrections(deltas1, deltas2) return s @check_correct_theory_combination -def covs_pt_prescrip(combine_by_type, point_prescription): +def covs_pt_prescrip( + combine_by_type, + point_prescription, + pdf: PDF, + power_corr_dict, + pc_included_prosc, + pc_excluded_exps, +): """Produces the sub-matrices of the theory covariance matrix according to a point prescription which matches the number of input theories. - If 5 theories are provided, a scheme 'bar' or 'nobar' must be chosen in the runcard in order to specify the prescription. Sub-matrices correspond to applying the scale variation prescription to each pair of processes in turn, using a different procedure for the case where the processes are the same relative to when they are different.""" - process_info = combine_by_type + datagroup_spec = process_info.data_spec running_index = 0 - start_proc = defaultdict(list) - for name in process_info.preds: - size = len(process_info.preds[name][0]) - start_proc[name] = running_index - running_index += size covmats = defaultdict(list) - for name1 in process_info.preds: - for name2 in process_info.preds: - central1, *others1 = process_info.preds[name1] - deltas1 = list(other - central1 for other in others1) - central2, *others2 = process_info.preds[name2] - deltas2 = list(other - central2 for other in others2) - s = compute_covs_pt_prescrip(point_prescription, name1, deltas1, name2, deltas2) - start_locs = (start_proc[name1], start_proc[name2]) - covmats[start_locs] = s + if point_prescription != 'power corrections': + start_proc = defaultdict(list) + for name in process_info.preds: + size = len(process_info.preds[name][0]) + start_proc[name] = running_index + running_index += size + + for name1 in process_info.preds: + for name2 in process_info.preds: + central1, *others1 = process_info.preds[name1] + deltas1 = list(other - central1 for other in others1) + central2, *others2 = process_info.preds[name2] + deltas2 = list(other - central2 for other in others2) + s = compute_covs_pt_prescrip(point_prescription, name1, deltas1, name2, deltas2) + start_locs = (start_proc[name1], start_proc[name2]) + covmats[start_locs] = s + + # For power corrections, the loops run over experimentes + else: + start_proc_by_exp = defaultdict(list) + for exp_name, data_spec in datagroup_spec.items(): + start_proc_by_exp[exp_name] = running_index + running_index += data_spec.load_commondata().ndata + + for exp_name1, data_spec1 in datagroup_spec.items(): + for exp_name2, data_spec2 in datagroup_spec.items(): + process_type1 = process_lookup(exp_name1) + process_type2 = process_lookup(exp_name2) + + is_excluded_exp = any(name in pc_excluded_exps for name in [exp_name1, exp_name2]) + is_included_proc = any( + proc not in pc_included_prosc for proc in [process_type1, process_type2] + ) + if not (is_excluded_exp or is_included_proc): + deltas1 = compute_deltas_pc(data_spec1, pdf, power_corr_dict) + deltas2 = compute_deltas_pc(data_spec2, pdf, power_corr_dict) + s = compute_covs_pt_prescrip( + point_prescription, exp_name1, deltas1, exp_name2, deltas2 + ) + start_locs = (start_proc_by_exp[exp_name1], start_proc_by_exp[exp_name2]) + covmats[start_locs] = s return covmats @table -def theory_covmat_custom(covmat_custom, procs_index, combine_by_type_custom): +def theory_covmat_custom_per_prescription(covs_pt_prescrip, procs_index, combine_by_type): """Takes the individual sub-covmats between each two processes and assembles them into a full covmat. Then reshuffles the order from ordering by process to ordering by experiment as listed in the runcard""" - process_info = combine_by_type_custom + process_info = combine_by_type # Construct a covmat_index based on the order of experiments as they are in combine_by_type # NOTE: maybe the ordering of covmat_index is always the same as that of procs_index? @@ -539,7 +414,7 @@ def theory_covmat_custom(covmat_custom, procs_index, combine_by_type_custom): # Put the covariance matrices between two process into a single covariance matrix total_datapoints = sum(process_info.sizes.values()) mat = np.zeros((total_datapoints, total_datapoints), dtype=np.float32) - for locs, cov in covmat_custom.items(): + for locs, cov in covs_pt_prescrip.items(): xsize, ysize = cov.shape mat[locs[0] : locs[0] + xsize, locs[1] : locs[1] + ysize] = cov df = pd.DataFrame(mat, index=covmat_index, columns=covmat_index) diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py index bfdc4bbf2f..4a496330b8 100644 --- a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -1,21 +1,85 @@ +""" +This module contains the utilities for the computation of the shifts in +the theoretical predictions due to the effects of power corrections. Contrary +to scale variations, in the case of power corrections the shifts are not +computed using theories. The shifts are computed at "runtime" during vp-setupfit. + +The aim is that, after shifts being computed, the covmat can be constructed using +the same functions implemented for scale variations (e.g. `covs_pt_prescrip`). +The way shifts are computed depends also on the point prescription. In the case of +scale variations, the point prescription specifies the theories whereby shifts are +computed. In the case of power corrections, shifts and covmat constructions are +computed using a 5-point prescription extended to every parameter used to define +the power correction. + +This module comprehends a bunch of ``factory`` functions such as `DIS_F2_pc`. Each +of these functions returns another function that computes the shifts taking as arguments +the values of the parameters used to parametrise the power corrections. In +other words, these factory functions hard-code the dependence on the kinematic +and leave the dependence on the parameters free. In this way, the shifts +can be computed using different combinations of parameters (i.e. different prescriptions) +if needed. +""" + +from collections import defaultdict import operator import numpy as np import scipy.interpolate as scint from validphys.convolution import central_fk_predictions +from validphys.core import PDF, DataSetSpec +from validphys.process_options import _Process GEV_CM2_CONV = 3.893793e10 GF = 1.1663787e-05 # Fermi's constant [GeV^-2] - - -def beta_tilde_5pt(delta_h, idx): - shifted_list = [0 for _ in range(len(delta_h))] - shifted_list[idx] = delta_h[idx] - return shifted_list - - -def ht_parametrisation(delta_h: list, nodes: list, x: list, Q2: list): +Mh = 0.938 # Proton's mass in GeV/c^2 +MW = 80.398 # W boson mass in GeV/c^2 + +F2P_exps = ['SLAC_NC_NOTFIXED_P_EM-F2', 'BCDMS_NC_NOTFIXED_P_EM-F2'] +F2D_exps = ['SLAC_NC_NOTFIXED_D_EM-F2', 'BCDMS_NC_NOTFIXED_D_EM-F2'] +NC_SIGMARED_P_EM = ['NMC_NC_NOTFIXED_P_EM-SIGMARED', 'HERA_NC_318GEV_EM-SIGMARED'] +NC_SIGMARED_P_EP = [ + 'HERA_NC_225GEV_EP-SIGMARED', + 'HERA_NC_251GEV_EP-SIGMARED', + 'HERA_NC_300GEV_EP-SIGMARED', + 'HERA_NC_318GEV_EP-SIGMARED', +] +NC_SIGMARED_P_EAVG = ['HERA_NC_318GEV_EAVG_CHARM-SIGMARED', 'HERA_NC_318GEV_EAVG_BOTTOM-SIGMARED'] + + +def dis_pc_func(delta_h: list, nodes: list, x: list, Q2: list): + """ + This function defines the parametrization used to model power corrections + for DIS-like processes. Currently, only the cubic spline is supported and + it is hard coded in this function. + + The initialization of the cubic spline requires a list of nodes, which contains + the array of the independent variables (e.g. x-Bjorken), + and a list of shifts that correspond to the dependent variables. Each pair (node, shift) + is a point in the plane. The ensemble of points will be interpolated according to the + cubic spline. + + Parameters + ---------- + delta_h: list + Shifts of the dependent variables at each node listed in `nodes`. These values correspond + to the `amplitude` of the power correction at each node. + nodes: list + List of nodes in the independent variables. For DIS-like processes, these are points + in the x-Bjorken. + x: list + List of x-Bjorken points where the power correction function is evaluated + Q2: list + List of scales where the power correction function is evaluated. Note that this list + is meant to be of the same length as `x`, and the two lists are meant to be considered + as pairs, e.g. (x1, Q2_1), (x2, Q2_2), ... . + + Returns + ------- + A list of power corrections for DIS-like processes where each point is evaluated at the + kinematic pair (x,Q2). + """ H = scint.CubicSpline(nodes, delta_h) H = np.vectorize(H) @@ -23,17 +87,95 @@ def ht_parametrisation(delta_h: list, nodes: list, x: list, Q2: list): return PC -def null_func(size): - """Auxiliary function used to return arrays of zeros for those datasets - that don't require higher twist.""" +# TODO Maybe we want to treat the function that parametrizes the PC +# as argument? +def DIS_F2_pc(pc2_nodes, x, q2): + """ + Returns the function that computes the shift for the ratio of structure + functions F2_d / F2_p. For this observable, power corrections are defined + such that + + F2 -> F2 + PC2, - def zeros(list): - return np.zeros(size) + and the shift is defined as - return zeros + Delta(F2) = (F2 + PC2) - F2 = PC2. + Note that, as in the case of `DIS_F2R_ht`, the shift still depends on the set + of parameters needed to define the parameterization of PC2. Also, this function + can be used to compute the shift for both proton and deuteron, provided that the + correct list of parameters is passed to the **curried** function. -def DIS_F2R_ht(experiment, pdf, H2p_dict, H2d_dict, x, q2): + The function used to parametrize the the power correction is `dis_pc_func` and + it is hard coded. + + Parameters + ---------- + + """ + + def PC_2(y_values): + result = dis_pc_func(y_values, pc2_nodes, x, q2) + return result + + return PC_2 + + +def DIS_F2R_pc(experiment, pdf, pc_2_p_nodes, pc_2_d_nodes, x, q2): + """ + Returns the function that computes the shift for the ratio of structure + functions F2_d / F2_p. For this observable, power corrections are defined + such that + + F2_d / F2_p -> (F2_d + PC2_d) / (F2_p + PC2_p) , + + and the shift is the defined as + + Delta(F2 ratio) = (F2_d + PC2_d) / (F2_p + PC2_p) - F2_d / F2_p . + + The shift is computed for a given set of kinematic variables specified + by the paris (x,Q2), but it still depends on the set of parameters need by + the power correction terms PC2_d and PC2_p. + + Note that this function does **not** return the power corrections for the + given kinematic points, but rather it returns another function where the + kinematic dependence has been **curried** (i.e. hard coded). This new function + takes as arguments the y-values of the nodes used to compute PC2_d and PC2_p + (see `delta_h` in `dis_pc_func`). Note that these y-values are not necessarily + the values listed in the runcard, as we can apply different point prescription. + For instance, we may want to pass in a set of y-values where the nodes are shifted + one at the time, leaving the others zero. The prescription is thus handled separately. + The returning function allows thus to compute Delta(F2 ratio)({...}_d, {...}_p), where + `{...}_d` and `{...}_p` are the sets of y-values for the parametrisation for the proton + and deuteron terms respectively. + + The function used to parametrize the the power correction is `dis_pc_func` and + it is hard coded. + + Parameters + ---------- + experiment: DataSetSpec + An instance of DataSetSpec used to extract information such as cuts + and fk tables. + pdf: PDF + An instance of the class PDF. This specifies the PDF to bo convoluted + with the FK tables. + pc_2_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for the proton (see `dis_pc_func`). + pc_2_d_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for the deuteron (see `dis_pc_func`). + x: list[float] + Set of points in x-Bjorken where the power corrections will be evaluated. + q2: list[float] + Set of points in Q2 where the power corrections will be evaluated. + + Returns + ------- + The function the computes the shift for this observable. It depends on the + y-values for the parameterization of P2_d and P2_p. + """ cuts = experiment.cuts fkspec_F2D, fkspec_F2P = experiment.fkspecs fk_F2D = fkspec_F2D.load_with_cuts(cuts) @@ -45,120 +187,620 @@ def DIS_F2R_ht(experiment, pdf, H2p_dict, H2d_dict, x, q2): F2P = np.concatenate(F2P.values) F2_ratio = operator.truediv(F2D, F2P) - def PC_2_p(list): - PC_p = ht_parametrisation(list, H2p_dict["nodes"], x, q2) - result = np.array(operator.truediv(F2D, np.sum([F2P, PC_p], axis=0)) - F2_ratio) + def func(y_values_d, y_values_p): + PC_d = dis_pc_func(y_values_d, pc_2_d_nodes, x, q2) + PC_p = dis_pc_func(y_values_p, pc_2_p_nodes, x, q2) + num = np.sum([F2D, PC_d], axis=0) + denom = np.sum([F2P, PC_p], axis=0) + result = np.array(operator.truediv(num, denom) - F2_ratio) return result - def PC_2_d(list): - PC_d = ht_parametrisation(list, H2d_dict["nodes"], x, q2) - result = np.array(operator.truediv(np.sum([F2D, PC_d], axis=0), F2P) - F2_ratio) - return result - - return PC_2_p, PC_2_d - + return func # PC_2_p, PC_2_d + + +def DIS_F2C_pc(pc2_p_nodes, pc2_d_nodes, x, q2): + """ + Builds the function used to compute the shifts for the charm + structure function measured by EMC. The process involved is + + mu^+ + Fe -> mu+^ + c cbar + X . + + This function works exactly as the previous functions used to + compute nuisance shifts. In this case, the constructed function + (`func` below) requires two lists of parameters for the proton + and the deuteron contribution. The reason being that in this process + the muon scatters off an iron target, and the power correction + contribution is a mixture of proton and deuteron nucleons. Hence, proton + and deuteron contribution are weighted by the appropriate atomic factor. + + Note that we are parametrising power corrections as proton and deuteron + targets. If we were to parametrize such contributions using, say, proton + and nucleon, than the weights would change. + + Parameters + ---------- + pc2_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for the proton (see `dis_pc_func`). + pc2_d_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for the deuteron (see `dis_pc_func`). + x: list[float] + Set of points in x-Bjorken where the power corrections will be evaluated. + q2: list[float] + Set of points in Q2 where the power corrections will be evaluated. + + Returns + ------- + The function the computes the shift for this observable. It depends on the + y-values for the parameterization of P2_d and P2_p. + """ + # Iron target + Z = 23.403 + A = 49.618 -def DIS_F2_ht(H2_dict, x, q2): - def PC_2(list): - result = ht_parametrisation(list, H2_dict['nodes'], x, q2) + def func(y_values_d, y_values_p): + PC2_d = dis_pc_func(y_values_d, pc2_d_nodes, x, q2) + PC2_p = dis_pc_func(y_values_p, pc2_p_nodes, x, q2) + result = 2 * (Z - A) / A * PC2_d + Z * PC2_p return result - return PC_2 + return func + + +def DIS_NC_XSEC_pc(pc2_nodes, pcL_nodes, pc3_nodes, lepton, x, q2, y): + """ + Builds the function used to compute the shifts for the DIS NC x-secs + delivered by HERA and NMC. The x-sec is reconstructed as calculated + in Yadism (see https://yadism.readthedocs.io/en/latest/theory/intro.html). + In particular, the x-sec is a linear combination of the structure functions + F_2, F_L, and F_3. The coefficients are also computed appropriately (see + link). The contribution of the power corrections is then + + Delta(x-sec) = x-sec_w_pc - x-sec_wo_pc = PC_2 + N_L * PC_L + N_3 * PC_3 + + where PC_i are the power corrections relative to the respective structure + functions and the N_i the respective coefficients (as defined in Yadism). + + This function works exactly as the previous functions used to + compute nuisance shifts. In addition, it requires the kinematic + invariant `y` to build the shift-function. + + Note that this function can be used for both proton and deuteron targets, + provided that the appropriate lists of nodes is given. + + Parameters + ---------- + pc2_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_2. + pcL_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_L. + pc3_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_3. + lepton: int + Whether the scattering particle is a lepton (0) or an anti-lepton(1). + x: list[float] + Set of points in x-Bjorken where the power corrections will be evaluated. + q2: list[float] + Set of points in Q2 where the power corrections will be evaluated. + y: list[float] + Set of points in y where the power corrections will be evaluated. + + Returns + ------- + The function the computes the shift for this observable. It depends on the + y-values for the parameterization of P2 and PL and P3. + """ + yp = 1 + np.power(1 - y, 2) + ym = 1 - np.power(1 - y, 2) + yL = np.power(y, 2) + N_L = -yL / yp # Coefficient for F_L + N_3 = np.power(-1, lepton) * ym / yp # Coefficient for F_3 + + def func(y_values_pc2, y_values_pcL, y_values_pc3): + PC_2 = dis_pc_func(y_values_pc2, pc2_nodes, x, q2) + PC_L = dis_pc_func(y_values_pcL, pcL_nodes, x, q2) + PC_3 = dis_pc_func(y_values_pc3, pc3_nodes, x, q2) + result = PC_2 + N_L * PC_L + N_3 * PC_3 + return result + return func + + +def DIS_CC_HERA_XSEC_pc(pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, lepton, x, q2, y): + """ + Builds the function used to compute the shifts for the DIS CC x-secs + delivered by HERA. The x-sec is reconstructed as calculated + in Yadism (see https://yadism.readthedocs.io/en/latest/theory/intro.html). + In particular, the x-sec is a linear combination of the structure functions + F_2, F_L, and F_3. The coefficients are also computed appropriately (see + link). The contribution of the power corrections is then + + Delta(x-sec) = x-sec_w_pc - x-sec_wo_pc = N * (PC_2 + N_L * PC_L + N_3 * PC_3) + + where PC_i are the power corrections relative to the respective structure + functions and the N_i the respective coefficients (as defined in Yadism). + N is the overall normalization factor. + + For the HERA_CC_318GEV dataset, the target is always a proton. However, the + lepton may be either the electron (0) or the positron (1). This information + is needed in order to compute the coefficient N_3. + + Parameters + ---------- + pc2_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_2. + pcL_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_L. + pc3_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_3. + lepton: int + Whether the scattering particle is a lepton (0) or an anti-lepton(1). + x: list[float] + Set of points in x-Bjorken where the power corrections will be evaluated. + q2: list[float] + Set of points in Q2 where the power corrections will be evaluated. + y: list[float] + Set of points in y where the power corrections will be evaluated. + + Returns + ------- + The function the computes the shift for this observable. It depends on the + y-values for the parameterization of P2 and PL and P3. + """ + yp = 1 + np.power(1 - y, 2) + ym = 1 - np.power(1 - y, 2) + yL = np.power(y, 2) + N = 1 / 4 * yp # Overall normalization + N_L = -yL / yp # Coefficient for F_L + N_3 = np.power(-1, lepton) * ym / yp # Coefficient for F_3 + + def func(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): + # Initialize power corrections for each structure function + PC2_p = dis_pc_func(y_values_pc2_p, pc2_p_nodes, x, q2) + PCL_p = dis_pc_func(y_values_pcL_p, pcL_p_nodes, x, q2) + PC3_p = dis_pc_func(y_values_pc3_p, pc3_p_nodes, x, q2) + + # Build the contribution to the x-sec of the power corrections + result = N * (PC2_p + N_L * PCL_p + N_3 * PC3_p) + return result -def DIS_F2C_ht(H2p_dict, H2d_dict, x, q2): + return func + + +def DIS_CC_NUTEV_pc( + pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, pc2_d_nodes, pcL_d_nodes, pc3_d_nodes, lepton, x, q2, y +): + """ + Builds the function used to compute the shifts for the DIS CC x-secs + delivered by NuTeV. The x-sec is reconstructed as calculated + in Yadism (see https://yadism.readthedocs.io/en/latest/theory/intro.html). + In particular, the x-sec is a linear combination of the structure functions + F_2, F_L, and F_3. The coefficients are also computed appropriately (see + link). Note that this experiment uses iron targets, and thus the coefficients + must take into account the nuclear mixture of porton and deuteron. The contribution + of the power corrections is then + + Delta(x-sec) = x-sec_w_pc - x-sec_wo_pc = N * (PC_2 + N_L * PC_L + N_3 * PC_3) + + where PC_i are the power corrections relative to the respective structure + functions (nuclear mixture implicit) and the N_i the respective coefficients (as defined in Yadism). + N is the overall normalization factor. + + For the NuTeV CC dataset, the target is always iron. However, the + lepton may be either the electron (0) or the positron (1). This + information is needed in order to compute the coefficient N_3. + + Parameters + ---------- + pc2_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_2 of the proton. + pcL_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_L of the proton. + pc3_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_3 of the proton. + pc2_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_2 of the deuteron. + pcL_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_L of the deuteron. + pc3_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_3 of the deuteron. + lepton: int + Whether the scattering particle is a lepton (0) or an anti-lepton(1). + x: list[float] + Set of points in x-Bjorken where the power corrections will be evaluated. + q2: list[float] + Set of points in Q2 where the power corrections will be evaluated. + y: list[float] + Set of points in y where the power corrections will be evaluated. + + Returns + ------- + The function the computes the shift for this observable. It depends on the + y-values for the parameterization of P2 and PL and P3 for proton and deuteron. + """ # Iron target Z = 23.403 A = 49.618 - - def PC_2_p(list): - result = ht_parametrisation(list, H2p_dict['nodes'], x, q2) + nuclear_factor = 2 * (Z - A) / A + yp = 1 + np.power(1 - y, 2) - 2 * np.power(x * y * Mh, 2) / q2 + ym = 1 - np.power(1 - y, 2) + yL = np.power(y, 2) + N_L = -yL / yp # Coefficient for F_L + N_3 = np.power(-1, lepton) * ym / yp # Coefficient for F_3 + + MW2 = np.power(MW, 2) + # Overall coefficient + # TODO: cross-check + N = 100 * yp / (2 * np.power(1 + q2 / MW2, 2)) + + def func( + y_values_pc2_p, + y_values_pcL_p, + y_values_pc3_p, + y_values_pc2_d, + y_values_pcL_d, + y_values_pc3_d, + ): + PC2_p = dis_pc_func(y_values_pc2_p, pc2_p_nodes, x, q2) + PCL_p = dis_pc_func(y_values_pcL_p, pcL_p_nodes, x, q2) + PC3_p = dis_pc_func(y_values_pc3_p, pc3_p_nodes, x, q2) + PC2_d = dis_pc_func(y_values_pc2_d, pc2_d_nodes, x, q2) + PCL_d = dis_pc_func(y_values_pcL_d, pcL_d_nodes, x, q2) + PC3_d = dis_pc_func(y_values_pc3_d, pc3_d_nodes, x, q2) + tmp_2 = Z * PC2_p + nuclear_factor * PC2_d + tmp_L = Z * PCL_p + nuclear_factor * PCL_d + tmp_3 = Z * PC3_p + nuclear_factor * PC3_d + result = N * (tmp_2 + N_L * tmp_L + N_3 * tmp_3) return result - def PC_2_d(list): - result = 2 * (Z - A) / A * ht_parametrisation(list, H2d_dict['nodes'], x, q2) + return func + + +# TODO This is function is really similar to the one +# defined for NUTEV CC. Can we reduce code repetitions? +def DIS_CC_CHORUS_pc( + pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, pc2_d_nodes, pcL_d_nodes, pc3_d_nodes, lepton, x, q2, y +): + """ + Same as DIS_CC_NUTEV_pc, but for CHORUS CC. + + Note that the difference here is in the definition of the overall + normalization N. + + Parameters + ---------- + pc2_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_2 of the proton. + pcL_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_L of the proton. + pc3_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_3 of the proton. + pc2_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_2 of the deuteron. + pcL_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_L of the deuteron. + pc3_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_3 of the deuteron. + lepton: int + Whether the scattering particle is a lepton (0) or an anti-lepton(1). + x: list[float] + Set of points in x-Bjorken where the power corrections will be evaluated. + q2: list[float] + Set of points in Q2 where the power corrections will be evaluated. + y: list[float] + Set of points in y where the power corrections will be evaluated. + + Returns + ------- + The function the computes the shift for this observable. It depends on the + y-values for the parameterization of P2 and PL and P3 for proton and deuteron. + """ + # Lead target + A = 208.0 + Z = 82 + nuclear_factor = 2 * (Z - A) / A + yp = 1 + np.power(1 - y, 2) - 2 * np.power(x * y * Mh, 2) / q2 + ym = 1 - np.power(1 - y, 2) + yL = np.power(y, 2) + N_L = -yL / yp # Coefficient for F_L + N_3 = np.power(-1, lepton) * ym / yp # Coefficient for F_3 + + MW2 = np.power(MW, 2) + # Overall coefficient + # TODO: cross-check + N = GEV_CM2_CONV * (GF**2) * Mh / (2 * np.pi * np.power(1 + q2 / MW2, 2)) * yp + + def func( + y_values_pc2_p, + y_values_pcL_p, + y_values_pc3_p, + y_values_pc2_d, + y_values_pcL_d, + y_values_pc3_d, + ): + PC2_p = dis_pc_func(y_values_pc2_p, pc2_p_nodes, x, q2) + PCL_p = dis_pc_func(y_values_pcL_p, pcL_p_nodes, x, q2) + PC3_p = dis_pc_func(y_values_pc3_p, pc3_p_nodes, x, q2) + PC2_d = dis_pc_func(y_values_pc2_d, pc2_d_nodes, x, q2) + PCL_d = dis_pc_func(y_values_pcL_d, pcL_d_nodes, x, q2) + PC3_d = dis_pc_func(y_values_pc3_d, pc3_d_nodes, x, q2) + tmp_2 = Z * PC2_p + nuclear_factor * PC2_d + tmp_L = Z * PCL_p + nuclear_factor * PCL_d + tmp_3 = Z * PC3_p + nuclear_factor * PC3_d + result = N * (tmp_2 + N_L * tmp_L + N_3 * tmp_3) return result - return PC_2_p, PC_2_d - - -def DIS_NC_ht(H2_dict, HL_dict, x, q2, y): - yp = 1 + np.power(1 - y, 2) - yL = np.power(y, 2) - N_L = -yL / yp - - def PC_2(list): - return ht_parametrisation(list, H2_dict['nodes'], x, q2) - - def PC_L(list): - return N_L * ht_parametrisation(list, HL_dict['nodes'], x, q2) - - return PC_2, PC_L - - -class DIS_SNU: - def __init__(self, HT_dict, target_tuple, kin_tuple, Mh, Mw, lepton): # (A,Z) - - # Lead target - A = target_tuple[0] - Z = target_tuple[1] - x = kin_tuple[0] - q2 = kin_tuple[1] - y = kin_tuple[2] - self.nuclear_target = 2 * (Z - A) / A - self.Mh = Mh # 0.938 GeV - self.Mw2 = np.power(Mw, 2) # GeV^2 - self.yp = 1 + np.power(1 - y, 2) - 2 * np.power(x * y * Mh, 2) / q2 - self.yL = np.power(y, 2) - self.ym = 1 - np.power(1 - y, 2) - self.N = ( - GEV_CM2_CONV * (GF**2) * Mh / (2 * np.pi * np.power(1 + q2 / self.Mw2, 2)) * self.yp - ) - self.H2p_dict = HT_dict['H2p'] - self.H2d_dict = HT_dict['H2d'] - self.HLp_dict = HT_dict['HLp'] - self.HLd_dict = HT_dict['HLd'] - self.H3p_dict = HT_dict['H3p'] - self.H3d_dict = HT_dict['H3d'] - - self.x = x - self.q2 = q2 - self.l = lepton - - def PC_2_p(self, list): - norm = self.N - return norm * ht_parametrisation(list, self.H2p_dict['nodes'], self.x, self.q2) - - def PC_2_d(self, list): - norm = self.N * self.nuclear_target - return norm * ht_parametrisation(list, self.H2d_dict['nodes'], self.x, self.q2) - - def PC_L_p(self, list): - norm = -self.N * self.yL / self.yp - return norm * ht_parametrisation(list, self.HLp_dict['nodes'], self.x, self.q2) - - def PC_L_d(self, list): - norm = -self.N * self.yL / self.yp * self.nuclear_target - return norm * ht_parametrisation(list, self.HLd_dict['nodes'], self.x, self.q2) - - def PC_3_p(self, list): - norm = self.N * np.power(-1, self.l) * self.ym / self.yp * self.x - return norm * ht_parametrisation(list, self.H3p_dict['nodes'], self.x, self.q2) - - def PC_3_d(self, list): - norm = self.N * np.power(-1, self.l) * self.ym / self.yp * self.x * self.nuclear_target - return norm * ht_parametrisation(list, self.H3d_dict['nodes'], self.x, self.q2) - - -class DIS_NUTEV(DIS_SNU): - def __init__(self, HT_dict, target_tuple, kin_tuple, Mh, Mw, lepton): # (A,Z) - super().__init__(HT_dict, target_tuple, kin_tuple, Mh, Mw, lepton) - self.N = 50 * self.yp / np.power(1 + self.q2 / self.Mw2, 2) - - -class DIS_HERA_CC(DIS_SNU): - def __init__(self, HT_dict, kin_tuple, Mh, Mw, lepton): - super().__init__(HT_dict, (1, 1), kin_tuple, Mh, Mw, lepton) - y = kin_tuple[2] - self.yp = 1 + np.power(1 - y, 2) - N = 1 / 4 * self.yp + return func + + +def construct_pars_combs(parameters_dict): + """Construct the combination of parameters (the ones that parametrize the power + corrections) used to compute the shifts. + + Example + ------- + Given the following dictionary that specifies that power corrections + ``` + pc_dict = { + 'H1': {'list': [1,2], 'nodes': [0,1]} } + 'H2': {'list': [3,4,5], 'nodes': [0,1,2]} } + } + ``` + then this functions constructs a list as follows + ``` + pars_combs = [ + {'label': 'H1(1)', 'comb': {'H1': [1,0], 'H2': [0,0,0]}, + {'label': 'H1(2)', 'comb': {'H1': [0,1], 'H2': [0,0,0]}, + {'label': 'H2(1)', 'comb': {'H1': [0,0], 'H2': [3,0,0]}, + {'label': 'H2(2)', 'comb': {'H1': [0,0], 'H2': [0,4,0]}, + {'label': 'H2(3)', 'comb': {'H1': [0,0], 'H2': [0,0,5]}, + ] + ``` + + Parameters + ---------- + """ + combinations = [] + for key, values in parameters_dict.items(): + for i in range(len(values['yshift'])): + # Create a new dictionary with all keys and zeroed-out values + new_dict = {k: np.zeros_like(v['yshift']) for k, v in parameters_dict.items()} + # Set the specific value for the current index + label = key + f'({i})' + new_dict[key][i] = values['yshift'][i] + new_dict = {'label': label, 'comb': new_dict} + combinations.append(new_dict) + + return combinations + + +def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, power_corr_dict: dict): + """ + Computes the shifts due to power corrections for a single dataset given + the set of parameters that model the power corrections. The result is + a dictionary containing as many arrays of shifts as the number of + combinations of the parameters. For instance, the final dictionary + may look like: + ``` + deltas1 = {comb1_label: array_of_shifts1, + comb2_label: array_of_shifts2, + comb3_label: array_of_shifts3, + ...} + ``` + Note that, as of now, we don't need to specify different prescriptions. + For that reason, the prescription adopted to construct the shifts is hard + coded in the function `construct_pars_combs`, and the prescription used to + compute the sub-matrix is hard-coded in `covmat_power_corrections`. + """ + + exp_name = dataset_sp.name + cd_table = dataset_sp.load_commondata().commondata_table + process_type = cd_table['process'].iloc[0] + if isinstance(process_type, _Process): + process_type = process_type.name + + pars_combs = construct_pars_combs(power_corr_dict) + deltas = defaultdict(list) + + pc_func = None + if process_type.startswith('DIS'): + pc2_p_nodes = power_corr_dict["H2p"]['nodes'] + pcL_p_nodes = power_corr_dict["HLp"]['nodes'] + pc3_p_nodes = power_corr_dict["H3p"]['nodes'] + pc2_d_nodes = power_corr_dict["H2d"]['nodes'] + pcL_d_nodes = power_corr_dict["HLd"]['nodes'] + pc3_d_nodes = power_corr_dict["H3d"]['nodes'] + + # TODO + # AFter the data re-implementation the name of the variables + # in the commondata table will change as indicated in the metadata. + # When this happens, this part must be updated. + x = cd_table['kin1'].to_numpy() + q2 = cd_table['kin2'].to_numpy() + y = cd_table['kin3'].to_numpy() + + # F2 ratio + if exp_name == "NMC_NC_NOTFIXED_EM-F2": + pc_func = DIS_F2R_pc(dataset_sp, pdf, pc2_p_nodes, pc2_d_nodes, x, q2) + for pars_pc in pars_combs: + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2p'], pars_pc['comb']['H2d']) + + # F2 proton traget + elif exp_name in F2P_exps: + pc_func = DIS_F2_pc(pc2_p_nodes, x, q2) + for pars_pc in pars_combs: + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2p']) + + # F2 deuteron traget + elif exp_name in F2D_exps: + pc_func = DIS_F2_pc(pc2_d_nodes, x, q2) + for pars_pc in pars_combs: + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2d']) + + # EMC + elif exp_name.startswith('EMC_NC_250GEV'): + pc_func = DIS_F2C_pc(pc2_p_nodes, pc2_d_nodes, x, q2) + for pars_pc in pars_combs: + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2p'], pars_pc['comb']['H2d']) + + # HERA and NMC SIGMARED NC + elif exp_name in np.concatenate([NC_SIGMARED_P_EM, NC_SIGMARED_P_EP, NC_SIGMARED_P_EAVG]): + # Electron + if exp_name in NC_SIGMARED_P_EM: + pc_func = DIS_NC_XSEC_pc(pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 0, x, q2, y) + # Positron + elif exp_name in NC_SIGMARED_P_EP: + pc_func = DIS_NC_XSEC_pc(pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 1, x, q2, y) + # Average positron and electron + # TODO + # Check if this is correct (ach) + elif NC_SIGMARED_P_EAVG: + + def average(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): + electron = DIS_NC_XSEC_pc(pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 0, x, q2, y) + positron = DIS_NC_XSEC_pc(pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 1, x, q2, y) + return ( + np.sum( + electron(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p), + positron(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p), + ) + / 2 + ) + + pc_func = average + else: + raise ValueError(f"{exp_name} not implemented.") + + for pars_pc in pars_combs: + deltas[pars_pc['label']] = pc_func( + pars_pc['comb']['H2p'], pars_pc['comb']['HLp'], pars_pc['comb']['H3p'] + ) + + # CHORUS + elif exp_name.startswith('CHORUS_CC'): + # Nu + if exp_name == 'CHORUS_CC_NOTFIXED_PB_NU-SIGMARED': + pc_func = DIS_CC_CHORUS_pc( + pc2_p_nodes, + pcL_p_nodes, + pc3_p_nodes, + pc2_d_nodes, + pcL_d_nodes, + pc3_d_nodes, + 0, + x, + q2, + y, + ) + # Nu bar + elif exp_name == 'CHORUS_CC_NOTFIXED_PB_NB-SIGMARED': + pc_func = DIS_CC_CHORUS_pc( + pc2_p_nodes, + pcL_p_nodes, + pc3_p_nodes, + pc2_d_nodes, + pcL_d_nodes, + pc3_d_nodes, + 1, + x, + q2, + y, + ) + else: + raise ValueError(f"{exp_name} not implemented.") + for pars_pc in pars_combs: + deltas[pars_pc['label']] = pc_func( + pars_pc['comb']['H2p'], + pars_pc['comb']['HLp'], + pars_pc['comb']['H3p'], + pars_pc['comb']['H2d'], + pars_pc['comb']['HLd'], + pars_pc['comb']['H3d'], + ) + + # NuTeV + elif exp_name.startswith('NUTEV_CC'): + # Nu + if exp_name == 'NUTEV_CC_NOTFIXED_FE_NU-SIGMARED': + pc_func = DIS_CC_NUTEV_pc( + pc2_p_nodes, + pcL_p_nodes, + pc3_p_nodes, + pc2_d_nodes, + pcL_d_nodes, + pc3_d_nodes, + 0, + x, + q2, + y, + ) + # Nu bar + elif exp_name == 'NUTEV_CC_NOTFIXED_FE_NB-SIGMARED': + pc_func = DIS_CC_NUTEV_pc( + pc2_p_nodes, + pcL_p_nodes, + pc3_p_nodes, + pc2_d_nodes, + pcL_d_nodes, + pc3_d_nodes, + 1, + x, + q2, + y, + ) + else: + raise ValueError(f"{exp_name} not implemented.") + for pars_pc in pars_combs: + deltas[pars_pc['label']] = pc_func( + pars_pc['comb']['H2p'], + pars_pc['comb']['HLp'], + pars_pc['comb']['H3p'], + pars_pc['comb']['H2d'], + pars_pc['comb']['HLd'], + pars_pc['comb']['H3d'], + ) + + # HERA_CC + elif exp_name.startswith('HERA_CC'): + # electron + if exp_name == 'HERA_CC_318GEV_EM-SIGMARED': + pc_func = DIS_CC_HERA_XSEC_pc(pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 0, x, q2, y) + # positron + elif exp_name == 'HERA_CC_318GEV_EP-SIGMARED': + pc_func = DIS_CC_HERA_XSEC_pc(pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 1, x, q2, y) + else: + raise ValueError(f"{exp_name} not implemented.") + + for pars_pc in pars_combs: + deltas[pars_pc['label']] = pc_func( + pars_pc['comb']['H2p'], pars_pc['comb']['HLp'], pars_pc['comb']['H3p'] + ) + + else: + raise ValueError( + f"The {process_type} observable for {exp_name} " "has not been implemented." + ) + + elif process_type == 'JET': + raise NotImplementedError("This part has not been implemented yet.") + + return deltas From ba65cc6e0a4d6bbe9543a7c7b2bad321b38bef47 Mon Sep 17 00:00:00 2001 From: achiefa Date: Tue, 14 Jan 2025 14:52:19 +0000 Subject: [PATCH 38/69] Correct typo + example runcard --- .../examples/Basic_runcard_pc_covmat.yml | 212 ++++++++++++++++++ .../theorycovariance/construction.py | 4 +- 2 files changed, 214 insertions(+), 2 deletions(-) create mode 100644 n3fit/runcards/examples/Basic_runcard_pc_covmat.yml diff --git a/n3fit/runcards/examples/Basic_runcard_pc_covmat.yml b/n3fit/runcards/examples/Basic_runcard_pc_covmat.yml new file mode 100644 index 0000000000..7daa3d60a6 --- /dev/null +++ b/n3fit/runcards/examples/Basic_runcard_pc_covmat.yml @@ -0,0 +1,212 @@ +# +# Configuration file for n3fit +# +###################################################################################### +description: NNPDF4.0 ht with TCM - DIS (NC & CC) only + +###################################################################################### +dataset_inputs: +- {dataset: NMC_NC_NOTFIXED_EM-F2, frac: 0.75, variant: legacy_dw} +- {dataset: NMC_NC_NOTFIXED_P_EM-SIGMARED, frac: 0.75, variant: legacy} +- {dataset: SLAC_NC_NOTFIXED_P_EM-F2, frac: 0.75, variant: legacy_dw} +- {dataset: SLAC_NC_NOTFIXED_D_EM-F2, frac: 0.75, variant: legacy_dw} +- {dataset: BCDMS_NC_NOTFIXED_P_EM-F2, frac: 0.75, variant: legacy_dw} +- {dataset: BCDMS_NC_NOTFIXED_D_EM-F2, frac: 0.75, variant: legacy_dw} +- {dataset: CHORUS_CC_NOTFIXED_PB_NU-SIGMARED, frac: 0.75, variant: legacy_dw} +- {dataset: CHORUS_CC_NOTFIXED_PB_NB-SIGMARED, frac: 0.75, variant: legacy_dw} +- {dataset: NUTEV_CC_NOTFIXED_FE_NU-SIGMARED, cfac: [MAS], frac: 0.75, variant: legacy_dw} +- {dataset: NUTEV_CC_NOTFIXED_FE_NB-SIGMARED, cfac: [MAS], frac: 0.75, variant: legacy_dw} +- {dataset: HERA_NC_318GEV_EM-SIGMARED, frac: 0.75, variant: legacy} +- {dataset: HERA_NC_225GEV_EP-SIGMARED, frac: 0.75, variant: legacy} +- {dataset: HERA_NC_251GEV_EP-SIGMARED, frac: 0.75, variant: legacy} +- {dataset: HERA_NC_300GEV_EP-SIGMARED, frac: 0.75, variant: legacy} +- {dataset: HERA_NC_318GEV_EP-SIGMARED, frac: 0.75, variant: legacy} +- {dataset: HERA_CC_318GEV_EM-SIGMARED, frac: 0.75, variant: legacy} +- {dataset: HERA_CC_318GEV_EP-SIGMARED, frac: 0.75, variant: legacy} +- {dataset: HERA_NC_318GEV_EAVG_CHARM-SIGMARED, frac: 0.75, variant: legacy} +- {dataset: HERA_NC_318GEV_EAVG_BOTTOM-SIGMARED, frac: 0.75, variant: legacy} +- {dataset: DYE866_Z0_800GEV_DW_RATIO_PDXSECRATIO, frac: 0.75, variant: legacy} +- {dataset: DYE866_Z0_800GEV_PXSEC, frac: 0.75, variant: legacy} +- {dataset: DYE605_Z0_38P8GEV_DW_PXSEC, frac: 0.75, variant: legacy} +- {dataset: DYE906_Z0_120GEV_DW_PDXSECRATIO, frac: 0.75, cfac: [ACC], variant: legacy} +- {dataset: CDF_Z0_1P96TEV_ZRAP, frac: 0.75, variant: legacy} +- {dataset: D0_Z0_1P96TEV_ZRAP, frac: 0.75, variant: legacy} +- {dataset: D0_WPWM_1P96TEV_ASY, frac: 0.75, variant: legacy} +- {dataset: ATLAS_WPWM_7TEV_36PB_ETA, frac: 0.75, variant: legacy} +- {dataset: ATLAS_Z0_7TEV_36PB_ETA, frac: 0.75, variant: legacy} +- {dataset: ATLAS_Z0_7TEV_49FB_HIMASS, frac: 0.75, variant: legacy} +- {dataset: ATLAS_Z0_7TEV_LOMASS_M, frac: 0.75, variant: legacy} +- {dataset: ATLAS_WPWM_7TEV_46FB_CC-ETA, frac: 0.75, variant: legacy} +- {dataset: ATLAS_Z0_7TEV_46FB_CC-Y, frac: 0.75, variant: legacy} +- {dataset: ATLAS_Z0_7TEV_46FB_CF-Y, frac: 0.75, variant: legacy} +- {dataset: ATLAS_Z0_8TEV_HIMASS_M-Y, frac: 0.75, variant: legacy} +- {dataset: ATLAS_Z0_8TEV_LOWMASS_M-Y, frac: 0.75, variant: legacy} +- {dataset: ATLAS_Z0_13TEV_TOT, frac: 0.75, cfac: [NRM], variant: legacy} +- {dataset: ATLAS_WPWM_13TEV_TOT, frac: 0.75, cfac: [NRM], variant: legacy} +- {dataset: ATLAS_WJ_8TEV_WP-PT, frac: 0.75, variant: legacy} +- {dataset: ATLAS_WJ_8TEV_WM-PT, frac: 0.75, variant: legacy} +- {dataset: ATLAS_Z0J_8TEV_PT-M, frac: 0.75, variant: legacy_10} +- {dataset: ATLAS_Z0J_8TEV_PT-Y, frac: 0.75, variant: legacy_10} +- {dataset: ATLAS_TTBAR_7TEV_TOT_X-SEC, frac: 0.75, variant: legacy} +- {dataset: ATLAS_TTBAR_8TEV_TOT_X-SEC, frac: 0.75, variant: legacy} +- {dataset: ATLAS_TTBAR_13TEV_TOT_X-SEC, frac: 0.75, variant: legacy} +- {dataset: ATLAS_TTBAR_8TEV_LJ_DIF_YT-NORM, frac: 0.75, variant: legacy} +- {dataset: ATLAS_TTBAR_8TEV_LJ_DIF_YTTBAR-NORM, frac: 0.75, variant: legacy} +- {dataset: ATLAS_TTBAR_8TEV_2L_DIF_YTTBAR-NORM, frac: 0.75, variant: legacy} +- {dataset: ATLAS_1JET_8TEV_R06_PTY, frac: 0.75, variant: legacy_decorrelated} +- {dataset: ATLAS_2JET_7TEV_R06_M12Y, frac: 0.75, variant: legacy} +- {dataset: ATLAS_PH_13TEV_XSEC, frac: 0.75, cfac: [EWK], variant: legacy} +- {dataset: ATLAS_SINGLETOP_7TEV_TCHANNEL-XSEC, frac: 0.75, variant: legacy} +- {dataset: ATLAS_SINGLETOP_13TEV_TCHANNEL-XSEC, frac: 0.75, variant: legacy} +- {dataset: ATLAS_SINGLETOP_7TEV_T-Y-NORM, frac: 0.75, variant: legacy} +- {dataset: ATLAS_SINGLETOP_7TEV_TBAR-Y-NORM, frac: 0.75, variant: legacy} +- {dataset: ATLAS_SINGLETOP_8TEV_T-RAP-NORM, frac: 0.75, variant: legacy} +- {dataset: ATLAS_SINGLETOP_8TEV_TBAR-RAP-NORM, frac: 0.75, variant: legacy} +- {dataset: CMS_WPWM_7TEV_ELECTRON_ASY, frac: 0.75} +- {dataset: CMS_WPWM_7TEV_MUON_ASY, frac: 0.75, variant: legacy} +- {dataset: CMS_Z0_7TEV_DIMUON_2D, frac: 0.75} +- {dataset: CMS_WPWM_8TEV_MUON_Y, frac: 0.75, variant: legacy} +- {dataset: CMS_Z0J_8TEV_PT-Y, frac: 0.75, cfac: [NRM], variant: legacy_10} +- {dataset: CMS_2JET_7TEV_M12Y, frac: 0.75} +- {dataset: CMS_1JET_8TEV_PTY, frac: 0.75, variant: legacy} +- {dataset: CMS_TTBAR_7TEV_TOT_X-SEC, frac: 0.75, variant: legacy} +- {dataset: CMS_TTBAR_8TEV_TOT_X-SEC, frac: 0.75, variant: legacy} +- {dataset: CMS_TTBAR_13TEV_TOT_X-SEC, frac: 0.75, variant: legacy} +- {dataset: CMS_TTBAR_8TEV_LJ_DIF_YTTBAR-NORM, frac: 0.75, variant: legacy} +- {dataset: CMS_TTBAR_5TEV_TOT_X-SEC, frac: 0.75, variant: legacy} +- {dataset: CMS_TTBAR_8TEV_2L_DIF_MTTBAR-YT-NORM, frac: 0.75, variant: legacy} +- {dataset: CMS_TTBAR_13TEV_2L_DIF_YT, frac: 0.75, variant: legacy} +- {dataset: CMS_TTBAR_13TEV_LJ_2016_DIF_YTTBAR, frac: 0.75, variant: legacy} +- {dataset: CMS_SINGLETOP_7TEV_TCHANNEL-XSEC, frac: 0.75, variant: legacy} +- {dataset: CMS_SINGLETOP_8TEV_TCHANNEL-XSEC, frac: 0.75, variant: legacy} +- {dataset: CMS_SINGLETOP_13TEV_TCHANNEL-XSEC, frac: 0.75, variant: legacy} +- {dataset: LHCB_Z0_7TEV_DIELECTRON_Y, frac: 0.75} +- {dataset: LHCB_Z0_8TEV_DIELECTRON_Y, frac: 0.75} +- {dataset: LHCB_WPWM_7TEV_MUON_Y, frac: 0.75, cfac: [NRM]} +- {dataset: LHCB_Z0_7TEV_MUON_Y, frac: 0.75, cfac: [NRM]} +- {dataset: LHCB_WPWM_8TEV_MUON_Y, frac: 0.75, cfac: [NRM]} +- {dataset: LHCB_Z0_8TEV_MUON_Y, frac: 0.75, cfac: [NRM]} +- {dataset: LHCB_Z0_13TEV_DIMUON-Y, frac: 0.75} +- {dataset: LHCB_Z0_13TEV_DIELECTRON-Y, frac: 0.75} + +################################################################################ +datacuts: + t0pdfset: 240701-02-rs-nnpdf40-baseline + q2min: 2.5 + w2min: 3.24 + +################################################################################ +# NNLO QCD TRN evolution +theory: + theoryid: 708 + +theorycovmatconfig: + point_prescriptions: ["9 point", "power corrections"] + pc_parameters: + - {ht: H2p, yshift: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0], nodes: [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1]} + - {ht: H2d, yshift: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0], nodes: [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1]} + - {ht: HLp, yshift: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0], nodes: [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1]} + - {ht: HLd, yshift: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0], nodes: [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1]} + - {ht: H3p, yshift: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0], nodes: [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1]} + - {ht: H3d, yshift: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0], nodes: [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1]} + pc_included_procs: ["DIS NC", "DIS CC"] + pc_excluded_exps: [HERA_NC_318GEV_EAVG_CHARM-SIGMARED, + HERA_NC_318GEV_EAVG_BOTTOM-SIGMARED,] + pdf: 210619-n3fit-001 + use_thcovmat_in_fitting: true + use_thcovmat_in_sampling: true + +# For fits <= 4.0 multiplicative and additive uncertainties were sampled separately +# and thus the flag `separate_multiplicative` needs to be set to True +# sampling: +# separate_multiplicative: True + +################################################################################ +trvlseed: 591866982 +nnseed: 945709987 +mcseed: 519562661 +genrep: true + +################################################################################ +parameters: # This defines the parameter dictionary that is passed to the Model Trainer + nodes_per_layer: [25, 20, 8] + activation_per_layer: [tanh, tanh, linear] + initializer: glorot_normal + optimizer: + clipnorm: 6.073e-6 + learning_rate: 2.621e-3 + optimizer_name: Nadam + epochs: 17000 + positivity: + initial: 184.8 + multiplier: + integrability: + initial: 10 + multiplier: + stopping_patience: 0.1 + layer_type: dense + dropout: 0.0 + threshold_chi2: 3.5 + +fitting: + fitbasis: EVOL + savepseudodata: True + basis: + - {fl: sng, trainable: false, smallx: [1.089, 1.119], largex: [1.475, 3.119]} + - {fl: g, trainable: false, smallx: [0.7504, 1.098], largex: [2.814, 5.669]} + - {fl: v, trainable: false, smallx: [0.479, 0.7384], largex: [1.549, 3.532]} + - {fl: v3, trainable: false, smallx: [0.1073, 0.4397], largex: [1.733, 3.458]} + - {fl: v8, trainable: false, smallx: [0.5507, 0.7837], largex: [1.516, 3.356]} + - {fl: t3, trainable: false, smallx: [-0.4506, 0.9305], largex: [1.745, 3.424]} + - {fl: t8, trainable: false, smallx: [0.5877, 0.8687], largex: [1.522, 3.515]} + - {fl: t15, trainable: false, smallx: [1.089, 1.141], largex: [1.492, 3.222]} + +################################################################################ +positivity: + posdatasets: + # Positivity Lagrange Multiplier + - {dataset: NNPDF_POS_2P24GEV_F2U, maxlambda: 1e6} + - {dataset: NNPDF_POS_2P24GEV_F2D, maxlambda: 1e6} + - {dataset: NNPDF_POS_2P24GEV_F2S, maxlambda: 1e6} + - {dataset: NNPDF_POS_2P24GEV_FLL, maxlambda: 1e6} + - {dataset: NNPDF_POS_2P24GEV_DYU, maxlambda: 1e10} + - {dataset: NNPDF_POS_2P24GEV_DYD, maxlambda: 1e10} + - {dataset: NNPDF_POS_2P24GEV_DYS, maxlambda: 1e10} + - {dataset: NNPDF_POS_2P24GEV_F2C, maxlambda: 1e6} + # Positivity of MSbar PDFs + - {dataset: NNPDF_POS_2P24GEV_XUQ, maxlambda: 1e6} + - {dataset: NNPDF_POS_2P24GEV_XUB, maxlambda: 1e6} + - {dataset: NNPDF_POS_2P24GEV_XDQ, maxlambda: 1e6} + - {dataset: NNPDF_POS_2P24GEV_XDB, maxlambda: 1e6} + - {dataset: NNPDF_POS_2P24GEV_XSQ, maxlambda: 1e6} + - {dataset: NNPDF_POS_2P24GEV_XSB, maxlambda: 1e6} + - {dataset: NNPDF_POS_2P24GEV_XGL, maxlambda: 1e6} + +added_filter_rules: + - dataset: NNPDF_POS_2P24GEV_FLL + rule: "x > 5.0e-7" + - dataset: NNPDF_POS_2P24GEV_F2C + rule: "x < 0.74" + - dataset: NNPDF_POS_2P24GEV_XGL + rule: "x > 0.1" + - dataset: NNPDF_POS_2P24GEV_XUQ + rule: "x > 0.1" + - dataset: NNPDF_POS_2P24GEV_XUB + rule: "x > 0.1" + - dataset: NNPDF_POS_2P24GEV_XDQ + rule: "x > 0.1" + - dataset: NNPDF_POS_2P24GEV_XDB + rule: "x > 0.1" + - dataset: NNPDF_POS_2P24GEV_XSQ + rule: "x > 0.1" + - dataset: NNPDF_POS_2P24GEV_XSB + rule: "x > 0.1" + +integrability: + integdatasets: + - {dataset: NNPDF_INTEG_3GEV_XT8, maxlambda: 1e2} + - {dataset: NNPDF_INTEG_3GEV_XT3, maxlambda: 1e2} + +################################################################################ +debug: false +maxcores: 8 diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index 1b44c3ee1d..895cdde20e 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -335,7 +335,7 @@ def covs_pt_prescrip( point_prescription, pdf: PDF, power_corr_dict, - pc_included_prosc, + pc_included_procs, pc_excluded_exps, ): """Produces the sub-matrices of the theory covariance matrix according @@ -380,7 +380,7 @@ def covs_pt_prescrip( is_excluded_exp = any(name in pc_excluded_exps for name in [exp_name1, exp_name2]) is_included_proc = any( - proc not in pc_included_prosc for proc in [process_type1, process_type2] + proc not in pc_included_procs for proc in [process_type1, process_type2] ) if not (is_excluded_exp or is_included_proc): deltas1 = compute_deltas_pc(data_spec1, pdf, power_corr_dict) From 91af93ca24b924aadd284dd0d6fbfef89b779b07 Mon Sep 17 00:00:00 2001 From: achiefa Date: Tue, 14 Jan 2025 15:30:10 +0000 Subject: [PATCH 39/69] Correct bug --- .../validphys/theorycovariance/higher_twist_functions.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py index 4a496330b8..566b357455 100644 --- a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -678,13 +678,10 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, power_corr_dict: dict): def average(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): electron = DIS_NC_XSEC_pc(pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 0, x, q2, y) positron = DIS_NC_XSEC_pc(pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 1, x, q2, y) - return ( - np.sum( - electron(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p), - positron(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p), - ) - / 2 + result = electron(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p) + positron( + y_values_pc2_p, y_values_pcL_p, y_values_pc3_p ) + return result / 2 pc_func = average else: From 8d43f0d30d8d6647bfe2a3098056d28555ed83da Mon Sep 17 00:00:00 2001 From: achiefa Date: Wed, 15 Jan 2025 13:10:31 +0000 Subject: [PATCH 40/69] Correct nuclear factors for nuclear targets --- .../higher_twist_functions.py | 49 ++++++++++++++----- 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py index 566b357455..e2294d311a 100644 --- a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -195,7 +195,7 @@ def func(y_values_d, y_values_p): result = np.array(operator.truediv(num, denom) - F2_ratio) return result - return func # PC_2_p, PC_2_d + return func def DIS_F2C_pc(pc2_p_nodes, pc2_d_nodes, x, q2): @@ -217,6 +217,27 @@ def DIS_F2C_pc(pc2_p_nodes, pc2_d_nodes, x, q2): targets. If we were to parametrize such contributions using, say, proton and nucleon, than the weights would change. + + Nuclear target + -------------- + The power corrections for nuclear observables, like in this case, are affected + by the pc contribution of the protons and that of the neutrons. + If we allow for the non-iscoscalarity of the target, and combining the two + contributions in accordance with the atomic and mass number (A and Z), the + power correction for the nuclear target can be written as (see eq.(4.2.5) + in https://nnpdf.mi.infn.it/wp-content/uploads/2021/09/thesis_master_RP.pdf) + + PC_N = 1/A (Z * PC_p + (A-Z) * PC_n) . + + The deuteron is obtained using the isoscalarity, namely + + PC_c = 1/2 (PC_p + PC_n) . + + Since we parametrise the power corrections of the proton and the deuteron, + we can combined the above equations and write + + PC_N = 1/A * ( PC_p * (2Z - A) + 2 * PC_d * (A - Z) ) + Parameters ---------- pc2_p_nodes: list[float] @@ -242,7 +263,7 @@ def DIS_F2C_pc(pc2_p_nodes, pc2_d_nodes, x, q2): def func(y_values_d, y_values_p): PC2_d = dis_pc_func(y_values_d, pc2_d_nodes, x, q2) PC2_p = dis_pc_func(y_values_p, pc2_p_nodes, x, q2) - result = 2 * (Z - A) / A * PC2_d + Z * PC2_p + result = (2 * Z - A) / A * PC2_p + 2 * (A - Z) / A * PC2_d return result return func @@ -397,6 +418,10 @@ def DIS_CC_NUTEV_pc( lepton may be either the electron (0) or the positron (1). This information is needed in order to compute the coefficient N_3. + Nuclear target + -------------- + See `DIS_F2C_pc`. + Parameters ---------- pc2_p_nodes: list[float] @@ -434,7 +459,6 @@ def DIS_CC_NUTEV_pc( # Iron target Z = 23.403 A = 49.618 - nuclear_factor = 2 * (Z - A) / A yp = 1 + np.power(1 - y, 2) - 2 * np.power(x * y * Mh, 2) / q2 ym = 1 - np.power(1 - y, 2) yL = np.power(y, 2) @@ -444,7 +468,7 @@ def DIS_CC_NUTEV_pc( MW2 = np.power(MW, 2) # Overall coefficient # TODO: cross-check - N = 100 * yp / (2 * np.power(1 + q2 / MW2, 2)) + N = 100 / 2 / np.power(1 + q2 / MW2, 2) * yp def func( y_values_pc2_p, @@ -460,9 +484,9 @@ def func( PC2_d = dis_pc_func(y_values_pc2_d, pc2_d_nodes, x, q2) PCL_d = dis_pc_func(y_values_pcL_d, pcL_d_nodes, x, q2) PC3_d = dis_pc_func(y_values_pc3_d, pc3_d_nodes, x, q2) - tmp_2 = Z * PC2_p + nuclear_factor * PC2_d - tmp_L = Z * PCL_p + nuclear_factor * PCL_d - tmp_3 = Z * PC3_p + nuclear_factor * PC3_d + tmp_2 = (2 * Z - A) / A * PC2_p + 2 * (A - Z) / A * PC2_d + tmp_L = (2 * Z - A) / A * PCL_p + 2 * (A - Z) / A * PCL_d + tmp_3 = (2 * Z - A) / A * PC3_p + 2 * (A - Z) / A * PC3_d result = N * (tmp_2 + N_L * tmp_L + N_3 * tmp_3) return result @@ -480,6 +504,10 @@ def DIS_CC_CHORUS_pc( Note that the difference here is in the definition of the overall normalization N. + Nuclear target + -------------- + See `DIS_F2C_pc`. + Parameters ---------- pc2_p_nodes: list[float] @@ -517,7 +545,6 @@ def DIS_CC_CHORUS_pc( # Lead target A = 208.0 Z = 82 - nuclear_factor = 2 * (Z - A) / A yp = 1 + np.power(1 - y, 2) - 2 * np.power(x * y * Mh, 2) / q2 ym = 1 - np.power(1 - y, 2) yL = np.power(y, 2) @@ -543,9 +570,9 @@ def func( PC2_d = dis_pc_func(y_values_pc2_d, pc2_d_nodes, x, q2) PCL_d = dis_pc_func(y_values_pcL_d, pcL_d_nodes, x, q2) PC3_d = dis_pc_func(y_values_pc3_d, pc3_d_nodes, x, q2) - tmp_2 = Z * PC2_p + nuclear_factor * PC2_d - tmp_L = Z * PCL_p + nuclear_factor * PCL_d - tmp_3 = Z * PC3_p + nuclear_factor * PC3_d + tmp_2 = (2 * Z - A) / A * PC2_p + 2 * (A - Z) / A * PC2_d + tmp_L = (2 * Z - A) / A * PCL_p + 2 * (A - Z) / A * PCL_d + tmp_3 = (2 * Z - A) / A * PC3_p + 2 * (A - Z) / A * PC3_d result = N * (tmp_2 + N_L * tmp_L + N_3 * tmp_3) return result From a622417c56a68d84d6acf721e8798131d0182641 Mon Sep 17 00:00:00 2001 From: achiefa Date: Mon, 20 Jan 2025 17:28:05 +0000 Subject: [PATCH 41/69] First implementation of jet data --- .../higher_twist_functions.py | 74 ++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py index e2294d311a..b275af7f68 100644 --- a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -87,6 +87,60 @@ def dis_pc_func(delta_h: list, nodes: list, x: list, Q2: list): return PC +def jets_pc_func(delta_h: list, nodes: list, pT: list, Q2: list): + """ + This function defines the parametrization used to model power corrections + for jet and dijet. Currently, only the cubic spline is supported and + it is hard coded in this function. + + Parameters + ---------- + delta_h: list + Shifts of the dependent variables at each node listed in `nodes`. These values correspond + to the `amplitude` of the power correction at each node. + nodes: list + List of nodes in the independent variables. For DIS-like processes, these are points + in pT. + pT: list + List of pT points where the power correction function is evaluated + Q2: list + List of scales where the power correction function is evaluated. Note that this list + is meant to be of the same length as `pT`, and the two lists are meant to be considered + as pairs, e.g. (pT_1, Q2_1), (pT_2, Q2_2), ... . + + Returns + ------- + A list of power corrections for DIS-like processes where each point is evaluated at the + kinematic pair (pT,Q2). + """ + H = scint.CubicSpline(nodes, delta_h) + H = np.vectorize(H) + + PC = H(pT) / np.sqrt(Q2) + return PC + + +def JET_pc(pc_nodes, pT, q2): + """ + Returns the function that computes the shift for the ratio for single + jet cross sections. In particular, the shift is computed such that + + xsec -> xsec + PC, + + and the shift is defined as + + Delta(xsec) = (xsec + xsec) - xsec = PC. + + The power correction is a function of the transverse momentum of the jet. + """ + + def func(y_values): + result = jets_pc_func(y_values, pc_nodes, pT, q2) + return result + + return func + + # TODO Maybe we want to treat the function that parametrizes the PC # as argument? def DIS_F2_pc(pc2_nodes, x, q2): @@ -825,6 +879,24 @@ def average(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): ) elif process_type == 'JET': - raise NotImplementedError("This part has not been implemented yet.") + pc_jet_nodes = power_corr_dict["Hj"]['nodes'] + + # TODO + # AFter the data re-implementation the name of the variables + # in the commondata table will change as indicated in the metadata. + # When this happens, this part must be updated. + # eta = cd_table['kin1'].to_numpy() + pT = cd_table['kin2'].to_numpy() + q2 = pT * pT + + pc_func = JET_pc(pc_jet_nodes, pT, q2) + for pars_pc in pars_combs: + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['Hj']) + + elif process_type == 'DIJET': + raise RuntimeError(f"No implementation for {exp_name} yet.") + + else: + raise RuntimeError(f"{process_type} has not been implemented.") return deltas From 8665ec706bee2121c5bc378f854602fa7fb831d8 Mon Sep 17 00:00:00 2001 From: achiefa Date: Thu, 23 Jan 2025 21:18:40 +0000 Subject: [PATCH 42/69] Change pc jet dependence from pT to eta --- .../src/validphys/theorycovariance/higher_twist_functions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py index b275af7f68..78947a2e35 100644 --- a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -885,11 +885,11 @@ def average(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): # AFter the data re-implementation the name of the variables # in the commondata table will change as indicated in the metadata. # When this happens, this part must be updated. - # eta = cd_table['kin1'].to_numpy() + eta = cd_table['kin1'].to_numpy() pT = cd_table['kin2'].to_numpy() q2 = pT * pT - pc_func = JET_pc(pc_jet_nodes, pT, q2) + pc_func = JET_pc(pc_jet_nodes, eta, q2) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['Hj']) From 39e68e4b8f1f29b907b175e608f8e8c3d636795e Mon Sep 17 00:00:00 2001 From: achiefa Date: Tue, 28 Jan 2025 18:06:39 +0000 Subject: [PATCH 43/69] Allowing step-function for the prior --- validphys2/src/validphys/config.py | 4 +- .../higher_twist_functions.py | 68 ++++++++++++------- 2 files changed, 47 insertions(+), 25 deletions(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index cc13f9ffdc..5dbebd4922 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -1923,8 +1923,8 @@ def produce_power_corr_dict(self, pc_parameters=None): pc_parameters_by_type = {} # Loop over the parameterization for the power corrections in the runcard for par in pc_parameters: - # Check that the length of shifts matches the length of nodes. - if len(par['yshift']) != len(par['nodes']): + # Check that the length of shifts is one less than the length of nodes. + if len(par['yshift']) != len(par['nodes']) - 1: raise ValueError( f"The length of nodes does not match that of the list in {par['ht']}." f"Check the runcard. Got {len(par['yshift'])} != {len(par['nodes'])}" diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py index 78947a2e35..f4cb0700bd 100644 --- a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -51,8 +51,7 @@ def dis_pc_func(delta_h: list, nodes: list, x: list, Q2: list): """ This function defines the parametrization used to model power corrections - for DIS-like processes. Currently, only the cubic spline is supported and - it is hard coded in this function. + for DIS-like processes. The initialization of the cubic spline requires a list of nodes, which contains the array of the independent variables (e.g. x-Bjorken), @@ -80,18 +79,31 @@ def dis_pc_func(delta_h: list, nodes: list, x: list, Q2: list): A list of power corrections for DIS-like processes where each point is evaluated at the kinematic pair (x,Q2). """ - H = scint.CubicSpline(nodes, delta_h) - H = np.vectorize(H) + # H = scint.CubicSpline(nodes, delta_h) + # H = np.vectorize(H) - PC = H(x) / Q2 + def step_function(x, y_shift, nodes): + # TODO + # The shift for the last node is still required to parse the config file, + # but nodes are only used to define bins. Hence, the last shift is unnecessary. + shifted_points_pos = np.flatnonzero(y_shift) + for shift_pos in shifted_points_pos: + bin_low = nodes[shift_pos] + bin_high = nodes[shift_pos + 1] + condition = x >= bin_low and x <= bin_high + if condition: + return y_shift[shift_pos] + return 0.0 + + step_function_vec = np.vectorize(lambda xb: step_function(xb, y_shift=delta_h, nodes=nodes)) + PC = step_function_vec(x) / Q2 return PC -def jets_pc_func(delta_h: list, nodes: list, pT: list, Q2: list): +def jets_pc_func(delta_h: list, nodes: list, pT: list, rap: list): """ This function defines the parametrization used to model power corrections - for jet and dijet. Currently, only the cubic spline is supported and - it is hard coded in this function. + for jet and dijet. A step function is used to parametrise the pc. Parameters ---------- @@ -99,28 +111,38 @@ def jets_pc_func(delta_h: list, nodes: list, pT: list, Q2: list): Shifts of the dependent variables at each node listed in `nodes`. These values correspond to the `amplitude` of the power correction at each node. nodes: list - List of nodes in the independent variables. For DIS-like processes, these are points - in pT. + List of nodes in rapidity. pT: list - List of pT points where the power correction function is evaluated - Q2: list - List of scales where the power correction function is evaluated. Note that this list - is meant to be of the same length as `pT`, and the two lists are meant to be considered - as pairs, e.g. (pT_1, Q2_1), (pT_2, Q2_2), ... . + The scale of jet processes + rap: list + The rapidity values Returns ------- - A list of power corrections for DIS-like processes where each point is evaluated at the - kinematic pair (pT,Q2). + A list of power corrections for rapidity processes where each point is evaluated at the + kinematic pair (y, pT). """ - H = scint.CubicSpline(nodes, delta_h) - H = np.vectorize(H) - PC = H(pT) / np.sqrt(Q2) + def step_function(x, y_shift, nodes): + # TODO + # The shift for the last node is still required to parse the config file, + # but nodes are only used to define bins. Hence, the last shift is unnecessary. + shifted_points_pos = np.flatnonzero(y_shift) + for shift_pos in shifted_points_pos: + bin_low = nodes[shift_pos] + bin_high = nodes[shift_pos + 1] + condition = x >= bin_low and x <= bin_high + if condition: + return y_shift[shift_pos] + return 0.0 + + step_function_vec = np.vectorize(lambda rap: step_function(rap, y_shift=delta_h, nodes=nodes)) + + PC = step_function_vec(rap) / pT return PC -def JET_pc(pc_nodes, pT, q2): +def JET_pc(pc_nodes, pT, rap): """ Returns the function that computes the shift for the ratio for single jet cross sections. In particular, the shift is computed such that @@ -135,7 +157,7 @@ def JET_pc(pc_nodes, pT, q2): """ def func(y_values): - result = jets_pc_func(y_values, pc_nodes, pT, q2) + result = jets_pc_func(y_values, pc_nodes, pT, rap) return result return func @@ -889,7 +911,7 @@ def average(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): pT = cd_table['kin2'].to_numpy() q2 = pT * pT - pc_func = JET_pc(pc_jet_nodes, eta, q2) + pc_func = JET_pc(pc_jet_nodes, pT, eta) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['Hj']) From 4729dcefd387796c1ac5a5554b55dd098fdbefb9 Mon Sep 17 00:00:00 2001 From: achiefa Date: Wed, 29 Jan 2025 16:00:21 +0000 Subject: [PATCH 44/69] Vectorize step_function + docstring --- .../higher_twist_functions.py | 139 +++++++++--------- 1 file changed, 67 insertions(+), 72 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py index f4cb0700bd..bfcb4546dc 100644 --- a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -25,7 +25,7 @@ import operator import numpy as np -import scipy.interpolate as scint +import numpy.typing as npt from validphys.convolution import central_fk_predictions from validphys.core import PDF, DataSetSpec @@ -48,97 +48,92 @@ NC_SIGMARED_P_EAVG = ['HERA_NC_318GEV_EAVG_CHARM-SIGMARED', 'HERA_NC_318GEV_EAVG_BOTTOM-SIGMARED'] -def dis_pc_func(delta_h: list, nodes: list, x: list, Q2: list): +def step_function(a: npt.ArrayLike, y_shift: npt.ArrayLike, bin_edges: npt.ArrayLike) -> np.ndarray: """ - This function defines the parametrization used to model power corrections - for DIS-like processes. + This function defines the step function used to construct the prior. The bins of the step + function are constructed using pairs of consecutive points. For instance, given the set of + points [0.0, 0.1, 0.3, 0.5], there will be three bins with edges [[0.0, 0.1], [0.1, 0.3], + 0.3, 0.5]]. Each bin is coupled with a shift, which correspond to the y-value of the bin. - The initialization of the cubic spline requires a list of nodes, which contains - the array of the independent variables (e.g. x-Bjorken), - and a list of shifts that correspond to the dependent variables. Each pair (node, shift) - is a point in the plane. The ensemble of points will be interpolated according to the - cubic spline. + Parameters + ---------- + a: ArrayLike of float + A one-dimensional array of points at which the function is evaluated. + y_shift: ArrayLike of float + A one-dimensional array whose elements represent the y-value of each bin + bin_edges: ArrayLike of float + A one-dimensional array containing the edges of the bins. The bins are + constructed using pairs of consecutive points. + + Return + ------ + A one-dimensional array containing the function values evaluated at the points + specified in `a`. + """ + res = [] + for shift_pos, shift in enumerate(y_shift): + bin_low = bin_edges[shift_pos] + bin_high = bin_edges[shift_pos + 1] + condition = np.multiply(a >= bin_low, a < bin_high) + res.append([shift for cond in condition if cond]) + res = np.concatenate(res) + return res + + +def dis_pc_func( + delta_h: npt.ArrayLike, nodes: npt.ArrayLike, x: npt.ArrayLike, Q2: npt.ArrayLike +) -> npt.ArrayLike: + """ + This function builds the functional form of the power corrections for DIS-like processes. + Power corrections are modelled using a step-function. The edges of the bins used in the + step-function are specified by the list of nodes. The y-values for each bin are given + by the array `delta_h`. The power corrections will be computed for the pairs (xb, Q2), + where `xb` is the Bjorken x. The power correction for DIS processes are rescaled by Q2. Parameters ---------- - delta_h: list - Shifts of the dependent variables at each node listed in `nodes`. These values correspond - to the `amplitude` of the power correction at each node. - nodes: list - List of nodes in the independent variables. For DIS-like processes, these are points - in the x-Bjorken. - x: list - List of x-Bjorken points where the power correction function is evaluated - Q2: list - List of scales where the power correction function is evaluated. Note that this list - is meant to be of the same length as `x`, and the two lists are meant to be considered - as pairs, e.g. (x1, Q2_1), (x2, Q2_2), ... . + delta_h: ArrayLike + One-dimensional array containing the shifts for each bin. + nodes: ArrayLike + One-dimensional array containing the edges of the bins in x-Bjorken. + x: ArrayLike + List of x-Bjorken points at which the power correction function is evaluated. + Q2: ArrayLike + List of scales where the power correction function is evaluated. Returns ------- - A list of power corrections for DIS-like processes where each point is evaluated at the - kinematic pair (x,Q2). + A one-dimensional array of power corrections for DIS-like processes where each point is + evaluated at the kinematic pair (x,Q2). """ - # H = scint.CubicSpline(nodes, delta_h) - # H = np.vectorize(H) - - def step_function(x, y_shift, nodes): - # TODO - # The shift for the last node is still required to parse the config file, - # but nodes are only used to define bins. Hence, the last shift is unnecessary. - shifted_points_pos = np.flatnonzero(y_shift) - for shift_pos in shifted_points_pos: - bin_low = nodes[shift_pos] - bin_high = nodes[shift_pos + 1] - condition = x >= bin_low and x <= bin_high - if condition: - return y_shift[shift_pos] - return 0.0 - - step_function_vec = np.vectorize(lambda xb: step_function(xb, y_shift=delta_h, nodes=nodes)) - PC = step_function_vec(x) / Q2 + PC = step_function(x, delta_h, nodes) / Q2 return PC -def jets_pc_func(delta_h: list, nodes: list, pT: list, rap: list): +def jets_pc_func( + delta_h: npt.ArrayLike, nodes: npt.ArrayLike, pT: npt.ArrayLike, rap: npt.ArrayLike +) -> npt.ArrayLike: """ - This function defines the parametrization used to model power corrections - for jet and dijet. A step function is used to parametrise the pc. + Same as `dis_pc_func`, but for jet data. Here, the kinematic pair consists of the rapidity + `rap` and the transverse momentum `pT`. Parameters ---------- - delta_h: list - Shifts of the dependent variables at each node listed in `nodes`. These values correspond - to the `amplitude` of the power correction at each node. - nodes: list - List of nodes in rapidity. - pT: list - The scale of jet processes - rap: list - The rapidity values + delta_h: ArrayLike + One-dimensional array containing the shifts for each bin. + nodes: ArrayLike + One-dimensional array containing the edges of the bins in rapidity. + rap: ArrayLike + List of rapidity points at which the power correction is evaluated. + pT: ArrayLike + List of pT points at which the power correction is evaluated. Returns ------- - A list of power corrections for rapidity processes where each point is evaluated at the - kinematic pair (y, pT). + A one-dimensional array of power corrections for jet processes where each point is + evaluated at the kinematic pair (y, pT). """ - - def step_function(x, y_shift, nodes): - # TODO - # The shift for the last node is still required to parse the config file, - # but nodes are only used to define bins. Hence, the last shift is unnecessary. - shifted_points_pos = np.flatnonzero(y_shift) - for shift_pos in shifted_points_pos: - bin_low = nodes[shift_pos] - bin_high = nodes[shift_pos + 1] - condition = x >= bin_low and x <= bin_high - if condition: - return y_shift[shift_pos] - return 0.0 - - step_function_vec = np.vectorize(lambda rap: step_function(rap, y_shift=delta_h, nodes=nodes)) - - PC = step_function_vec(rap) / pT + PC = step_function(rap, delta_h, nodes) / pT return PC From a2304950c7e466cb433995391820030083e68753 Mon Sep 17 00:00:00 2001 From: achiefa Date: Wed, 29 Jan 2025 18:00:41 +0000 Subject: [PATCH 45/69] Correct bug in step function --- .../src/validphys/theorycovariance/higher_twist_functions.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py index bfcb4546dc..acbb174164 100644 --- a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -74,7 +74,9 @@ def step_function(a: npt.ArrayLike, y_shift: npt.ArrayLike, bin_edges: npt.Array for shift_pos, shift in enumerate(y_shift): bin_low = bin_edges[shift_pos] bin_high = bin_edges[shift_pos + 1] - condition = np.multiply(a >= bin_low, a < bin_high) + condition = np.multiply( + a >= bin_low, a < bin_high if shift_pos != len(y_shift) - 1 else a <= bin_high + ) res.append([shift for cond in condition if cond]) res = np.concatenate(res) return res From c61d3f584aaeb31f1f94740af2fdecfac153e724 Mon Sep 17 00:00:00 2001 From: achiefa Date: Wed, 29 Jan 2025 19:13:24 +0000 Subject: [PATCH 46/69] Correct bug in step function --- .../src/validphys/theorycovariance/higher_twist_functions.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py index acbb174164..5e494478bf 100644 --- a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -70,15 +70,14 @@ def step_function(a: npt.ArrayLike, y_shift: npt.ArrayLike, bin_edges: npt.Array A one-dimensional array containing the function values evaluated at the points specified in `a`. """ - res = [] + res = np.zeros_like(a) for shift_pos, shift in enumerate(y_shift): bin_low = bin_edges[shift_pos] bin_high = bin_edges[shift_pos + 1] condition = np.multiply( a >= bin_low, a < bin_high if shift_pos != len(y_shift) - 1 else a <= bin_high ) - res.append([shift for cond in condition if cond]) - res = np.concatenate(res) + res = np.add(res, [shift if cond else 0.0 for cond in condition]) return res From 2e6a29e531f1b41d4d6fdfa5126342bf192d500b Mon Sep 17 00:00:00 2001 From: achiefa Date: Thu, 30 Jan 2025 12:06:36 +0000 Subject: [PATCH 47/69] Produce covs_pt_prescrip --- validphys2/src/validphys/config.py | 13 ++++ .../theorycovariance/construction.py | 73 +++++++++++-------- 2 files changed, 54 insertions(+), 32 deletions(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 5dbebd4922..031c4ec138 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -1935,6 +1935,19 @@ def produce_power_corr_dict(self, pc_parameters=None): return pc_parameters_by_type + @configparser.explicit_node + def produce_covs_pt_prescrip(self, point_prescription): + if point_prescription != 'power corrections': + from validphys.theorycovariance.construction import covs_pt_prescrip_mhou + + f = covs_pt_prescrip_mhou + else: + from validphys.theorycovariance.construction import covs_pt_prescrip_pc + + f = covs_pt_prescrip_pc + + return f + class Config(report.Config, CoreConfig): """The effective configuration parser class.""" diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index 895cdde20e..1a00714b2f 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -330,14 +330,7 @@ def compute_covs_pt_prescrip(point_prescription, name1, deltas1, name2=None, del @check_correct_theory_combination -def covs_pt_prescrip( - combine_by_type, - point_prescription, - pdf: PDF, - power_corr_dict, - pc_included_procs, - pc_excluded_exps, -): +def covs_pt_prescrip_mhou(combine_by_type, point_prescription): """Produces the sub-matrices of the theory covariance matrix according to a point prescription which matches the number of input theories. chosen in the runcard in order to specify the prescription. Sub-matrices @@ -345,7 +338,6 @@ def covs_pt_prescrip( processes in turn, using a different procedure for the case where the processes are the same relative to when they are different.""" process_info = combine_by_type - datagroup_spec = process_info.data_spec running_index = 0 covmats = defaultdict(list) @@ -366,30 +358,47 @@ def covs_pt_prescrip( start_locs = (start_proc[name1], start_proc[name2]) covmats[start_locs] = s - # For power corrections, the loops run over experimentes - else: - start_proc_by_exp = defaultdict(list) - for exp_name, data_spec in datagroup_spec.items(): - start_proc_by_exp[exp_name] = running_index - running_index += data_spec.load_commondata().ndata - - for exp_name1, data_spec1 in datagroup_spec.items(): - for exp_name2, data_spec2 in datagroup_spec.items(): - process_type1 = process_lookup(exp_name1) - process_type2 = process_lookup(exp_name2) - - is_excluded_exp = any(name in pc_excluded_exps for name in [exp_name1, exp_name2]) - is_included_proc = any( - proc not in pc_included_procs for proc in [process_type1, process_type2] + return covmats + + +def covs_pt_prescrip_pc( + combine_by_type, + point_prescription, + pdf: PDF, + power_corr_dict, + pc_included_procs, + pc_excluded_exps, +): + """Produces the sub-matrices of the theory covariance matrix for power + corrections. Sub-matrices correspond to applying power corrected shifts + to each pair of `datasets`.""" + process_info = combine_by_type + datagroup_spec = process_info.data_spec + running_index = 0 + + covmats = defaultdict(list) + start_proc_by_exp = defaultdict(list) + for exp_name, data_spec in datagroup_spec.items(): + start_proc_by_exp[exp_name] = running_index + running_index += data_spec.load_commondata().ndata + + for exp_name1, data_spec1 in datagroup_spec.items(): + for exp_name2, data_spec2 in datagroup_spec.items(): + process_type1 = process_lookup(exp_name1) + process_type2 = process_lookup(exp_name2) + + is_excluded_exp = any(name in pc_excluded_exps for name in [exp_name1, exp_name2]) + is_included_proc = any( + proc not in pc_included_procs for proc in [process_type1, process_type2] + ) + if not (is_excluded_exp or is_included_proc): + deltas1 = compute_deltas_pc(data_spec1, pdf, power_corr_dict) + deltas2 = compute_deltas_pc(data_spec2, pdf, power_corr_dict) + s = compute_covs_pt_prescrip( + point_prescription, exp_name1, deltas1, exp_name2, deltas2 ) - if not (is_excluded_exp or is_included_proc): - deltas1 = compute_deltas_pc(data_spec1, pdf, power_corr_dict) - deltas2 = compute_deltas_pc(data_spec2, pdf, power_corr_dict) - s = compute_covs_pt_prescrip( - point_prescription, exp_name1, deltas1, exp_name2, deltas2 - ) - start_locs = (start_proc_by_exp[exp_name1], start_proc_by_exp[exp_name2]) - covmats[start_locs] = s + start_locs = (start_proc_by_exp[exp_name1], start_proc_by_exp[exp_name2]) + covmats[start_locs] = s return covmats From 7f32963804508bdecc16fd04283a581f787eaf51 Mon Sep 17 00:00:00 2001 From: achiefa Date: Mon, 10 Feb 2025 11:18:36 +0000 Subject: [PATCH 48/69] Allow different funcs for posterior --- validphys2/src/validphys/config.py | 13 +- .../higher_twist_functions.py | 229 ++++++++++++++---- 2 files changed, 192 insertions(+), 50 deletions(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 031c4ec138..d17cc2fc03 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -1911,8 +1911,11 @@ def produce_total_phi_data(self, fitthcovmat): return validphys.results.total_phi_data_from_experiments return validphys.results.dataset_inputs_phi_data + def produce_pc_func_type(self, theorycovmatconfig=None): + return theorycovmatconfig.get('func_type', 'step') + # @configparser.explicit_node - def produce_power_corr_dict(self, pc_parameters=None): + def produce_power_corr_dict(self, pc_parameters=None, pc_func_type=None): """The parameters for the power corrections are given as a list. This function converts this list into a dictionary with the keys being the names of the types of power corrections (e.g. `H2p`, `H2d`,...). @@ -1924,7 +1927,12 @@ def produce_power_corr_dict(self, pc_parameters=None): # Loop over the parameterization for the power corrections in the runcard for par in pc_parameters: # Check that the length of shifts is one less than the length of nodes. - if len(par['yshift']) != len(par['nodes']) - 1: + if (len(par['yshift']) != len(par['nodes']) - 1) and pc_func_type != 'cubic': + raise ValueError( + f"The length of nodes does not match that of the list in {par['ht']}." + f"Check the runcard. Got {len(par['yshift'])} != {len(par['nodes'])}" + ) + elif (len(par['yshift']) != len(par['nodes'])) and pc_func_type == 'cubic': raise ValueError( f"The length of nodes does not match that of the list in {par['ht']}." f"Check the runcard. Got {len(par['yshift'])} != {len(par['nodes'])}" @@ -1933,6 +1941,7 @@ def produce_power_corr_dict(self, pc_parameters=None): # Store parameters for each power correction pc_parameters_by_type[par['ht']] = {'yshift': par['yshift'], 'nodes': par['nodes']} + pc_parameters_by_type['func_type'] = pc_func_type return pc_parameters_by_type @configparser.explicit_node diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py index 5e494478bf..9c99406b61 100644 --- a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -81,8 +81,81 @@ def step_function(a: npt.ArrayLike, y_shift: npt.ArrayLike, bin_edges: npt.Array return res +def cubic_spline_function( + a: npt.ArrayLike, y_shift: npt.ArrayLike, nodes: npt.ArrayLike +) -> np.ndarray: + """ + This function defines the cubic spline function used to construct the prior. The spline + is constructed using the nodes specified in `nodes` and the y-values in `y_shift`. The + spline is evaluated at the points specified in `a`. + + Parameters + ---------- + a: ArrayLike of float + A one-dimensional array of points at which the function is evaluated. + y_shift: ArrayLike of float + A one-dimensional array whose elements represent the y-value of each bin + nodes: ArrayLike of float + A one-dimensional array containing the nodes used to construct the spline. + + Return + ------ + A one-dimensional array containing the function values evaluated at the points + specified in `a`. + """ + from scipy.interpolate import CubicSpline + + cs = CubicSpline(nodes, y_shift) + return cs(a) + + +def linear_bin_function( + a: npt.ArrayLike, y_shift: npt.ArrayLike, bin_edges: npt.ArrayLike +) -> np.ndarray: + """ + This function defines the linear bin function used to construct the prior. The bins of the + function are constructed using pairs of consecutive points. For instance, given the set of + points [0.0, 0.1, 0.3, 0.5], there will be three bins with edges [[0.0, 0.1], [0.1, 0.3], + 0.3, 0.5]]. Each bin is coupled with a shift, which correspond to the y-value of the bin. + + Parameters + ---------- + a: ArrayLike of float + A one-dimensional array of points at which the function is evaluated. + y_shift: ArrayLike of float + A one-dimensional array whose elements represent the y-value of each bin + bin_edges: ArrayLike of float + A one-dimensional array containing the edges of the bins. The bins are + constructed using pairs of consecutive points. + + Return + ------ + A one-dimensional array containing the function values evaluated at the points + specified in `a`. + """ + res = np.zeros_like(a) + for shift_pos, shift in enumerate(y_shift): + bin_low = bin_edges[shift_pos] + bin_high = bin_edges[shift_pos + 1] + bin_mid = 0.5 * (bin_low + bin_high) + cond_low = np.multiply(a >= bin_low, a < bin_mid) + cond_high = np.multiply( + a >= bin_mid, a < bin_high if shift_pos != len(y_shift) - 1 else a <= bin_high + ) + m = 2 * shift / (bin_high - bin_low) + res = np.add(res, [m * (val - bin_low) if cond else 0.0 for val, cond in zip(a, cond_low)]) + res = np.add( + res, [-m * (val - bin_high) if cond else 0.0 for val, cond in zip(a, cond_high)] + ) + return res + + def dis_pc_func( - delta_h: npt.ArrayLike, nodes: npt.ArrayLike, x: npt.ArrayLike, Q2: npt.ArrayLike + delta_h: npt.ArrayLike, + nodes: npt.ArrayLike, + x: npt.ArrayLike, + Q2: npt.ArrayLike, + pc_func_type: str = "step", ) -> npt.ArrayLike: """ This function builds the functional form of the power corrections for DIS-like processes. @@ -107,12 +180,24 @@ def dis_pc_func( A one-dimensional array of power corrections for DIS-like processes where each point is evaluated at the kinematic pair (x,Q2). """ - PC = step_function(x, delta_h, nodes) / Q2 + if pc_func_type == "step": + PC = step_function(x, delta_h, nodes) / Q2 + elif pc_func_type == "linear": + PC = linear_bin_function(x, delta_h, nodes) / Q2 + elif pc_func_type == "cubic": + PC = cubic_spline_function(x, delta_h, nodes) / Q2 + else: + raise ValueError(f"Invalid function type: {pc_func_type} is not supported.") + return PC def jets_pc_func( - delta_h: npt.ArrayLike, nodes: npt.ArrayLike, pT: npt.ArrayLike, rap: npt.ArrayLike + delta_h: npt.ArrayLike, + nodes: npt.ArrayLike, + pT: npt.ArrayLike, + rap: npt.ArrayLike, + pc_func_type: str = "step", ) -> npt.ArrayLike: """ Same as `dis_pc_func`, but for jet data. Here, the kinematic pair consists of the rapidity @@ -134,11 +219,18 @@ def jets_pc_func( A one-dimensional array of power corrections for jet processes where each point is evaluated at the kinematic pair (y, pT). """ - PC = step_function(rap, delta_h, nodes) / pT + if pc_func_type == "step": + PC = step_function(rap, delta_h, nodes) / pT + elif pc_func_type == "linear": + PC = linear_bin_function(rap, delta_h, nodes) / pT + elif pc_func_type == "cubic": + PC = cubic_spline_function(rap, delta_h, nodes) / pT + else: + raise ValueError(f"Invalid function type: {pc_func_type} is not supported.") return PC -def JET_pc(pc_nodes, pT, rap): +def JET_pc(pc_nodes, pT, rap, pc_func_type: str = "step"): """ Returns the function that computes the shift for the ratio for single jet cross sections. In particular, the shift is computed such that @@ -153,7 +245,7 @@ def JET_pc(pc_nodes, pT, rap): """ def func(y_values): - result = jets_pc_func(y_values, pc_nodes, pT, rap) + result = jets_pc_func(y_values, pc_nodes, pT, rap, pc_func_type) return result return func @@ -161,7 +253,7 @@ def func(y_values): # TODO Maybe we want to treat the function that parametrizes the PC # as argument? -def DIS_F2_pc(pc2_nodes, x, q2): +def DIS_F2_pc(pc2_nodes, x, q2, pc_func_type: str = "step"): """ Returns the function that computes the shift for the ratio of structure functions F2_d / F2_p. For this observable, power corrections are defined @@ -187,13 +279,13 @@ def DIS_F2_pc(pc2_nodes, x, q2): """ def PC_2(y_values): - result = dis_pc_func(y_values, pc2_nodes, x, q2) + result = dis_pc_func(y_values, pc2_nodes, x, q2, pc_func_type) return result return PC_2 -def DIS_F2R_pc(experiment, pdf, pc_2_p_nodes, pc_2_d_nodes, x, q2): +def DIS_F2R_pc(experiment, pdf, pc_2_p_nodes, pc_2_d_nodes, x, q2, pc_func_type: str = "step"): """ Returns the function that computes the shift for the ratio of structure functions F2_d / F2_p. For this observable, power corrections are defined @@ -260,8 +352,8 @@ def DIS_F2R_pc(experiment, pdf, pc_2_p_nodes, pc_2_d_nodes, x, q2): F2_ratio = operator.truediv(F2D, F2P) def func(y_values_d, y_values_p): - PC_d = dis_pc_func(y_values_d, pc_2_d_nodes, x, q2) - PC_p = dis_pc_func(y_values_p, pc_2_p_nodes, x, q2) + PC_d = dis_pc_func(y_values_d, pc_2_d_nodes, x, q2, pc_func_type) + PC_p = dis_pc_func(y_values_p, pc_2_p_nodes, x, q2, pc_func_type) num = np.sum([F2D, PC_d], axis=0) denom = np.sum([F2P, PC_p], axis=0) result = np.array(operator.truediv(num, denom) - F2_ratio) @@ -270,7 +362,7 @@ def func(y_values_d, y_values_p): return func -def DIS_F2C_pc(pc2_p_nodes, pc2_d_nodes, x, q2): +def DIS_F2C_pc(pc2_p_nodes, pc2_d_nodes, x, q2, pc_func_type: str = "step"): """ Builds the function used to compute the shifts for the charm structure function measured by EMC. The process involved is @@ -333,15 +425,15 @@ def DIS_F2C_pc(pc2_p_nodes, pc2_d_nodes, x, q2): A = 49.618 def func(y_values_d, y_values_p): - PC2_d = dis_pc_func(y_values_d, pc2_d_nodes, x, q2) - PC2_p = dis_pc_func(y_values_p, pc2_p_nodes, x, q2) + PC2_d = dis_pc_func(y_values_d, pc2_d_nodes, x, q2, pc_func_type) + PC2_p = dis_pc_func(y_values_p, pc2_p_nodes, x, q2, pc_func_type) result = (2 * Z - A) / A * PC2_p + 2 * (A - Z) / A * PC2_d return result return func -def DIS_NC_XSEC_pc(pc2_nodes, pcL_nodes, pc3_nodes, lepton, x, q2, y): +def DIS_NC_XSEC_pc(pc2_nodes, pcL_nodes, pc3_nodes, lepton, x, q2, y, pc_func_type: str = "step"): """ Builds the function used to compute the shifts for the DIS NC x-secs delivered by HERA and NMC. The x-sec is reconstructed as calculated @@ -394,16 +486,18 @@ def DIS_NC_XSEC_pc(pc2_nodes, pcL_nodes, pc3_nodes, lepton, x, q2, y): N_3 = np.power(-1, lepton) * ym / yp # Coefficient for F_3 def func(y_values_pc2, y_values_pcL, y_values_pc3): - PC_2 = dis_pc_func(y_values_pc2, pc2_nodes, x, q2) - PC_L = dis_pc_func(y_values_pcL, pcL_nodes, x, q2) - PC_3 = dis_pc_func(y_values_pc3, pc3_nodes, x, q2) + PC_2 = dis_pc_func(y_values_pc2, pc2_nodes, x, q2, pc_func_type) + PC_L = dis_pc_func(y_values_pcL, pcL_nodes, x, q2, pc_func_type) + PC_3 = dis_pc_func(y_values_pc3, pc3_nodes, x, q2, pc_func_type) result = PC_2 + N_L * PC_L + N_3 * PC_3 return result return func -def DIS_CC_HERA_XSEC_pc(pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, lepton, x, q2, y): +def DIS_CC_HERA_XSEC_pc( + pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, lepton, x, q2, y, pc_func_type: str = "step" +): """ Builds the function used to compute the shifts for the DIS CC x-secs delivered by HERA. The x-sec is reconstructed as calculated @@ -456,9 +550,9 @@ def DIS_CC_HERA_XSEC_pc(pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, lepton, x, q2, y) def func(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): # Initialize power corrections for each structure function - PC2_p = dis_pc_func(y_values_pc2_p, pc2_p_nodes, x, q2) - PCL_p = dis_pc_func(y_values_pcL_p, pcL_p_nodes, x, q2) - PC3_p = dis_pc_func(y_values_pc3_p, pc3_p_nodes, x, q2) + PC2_p = dis_pc_func(y_values_pc2_p, pc2_p_nodes, x, q2, pc_func_type) + PCL_p = dis_pc_func(y_values_pcL_p, pcL_p_nodes, x, q2, pc_func_type) + PC3_p = dis_pc_func(y_values_pc3_p, pc3_p_nodes, x, q2, pc_func_type) # Build the contribution to the x-sec of the power corrections result = N * (PC2_p + N_L * PCL_p + N_3 * PC3_p) @@ -468,7 +562,17 @@ def func(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): def DIS_CC_NUTEV_pc( - pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, pc2_d_nodes, pcL_d_nodes, pc3_d_nodes, lepton, x, q2, y + pc2_p_nodes, + pcL_p_nodes, + pc3_p_nodes, + pc2_d_nodes, + pcL_d_nodes, + pc3_d_nodes, + lepton, + x, + q2, + y, + pc_func_type: str = "step", ): """ Builds the function used to compute the shifts for the DIS CC x-secs @@ -550,12 +654,12 @@ def func( y_values_pcL_d, y_values_pc3_d, ): - PC2_p = dis_pc_func(y_values_pc2_p, pc2_p_nodes, x, q2) - PCL_p = dis_pc_func(y_values_pcL_p, pcL_p_nodes, x, q2) - PC3_p = dis_pc_func(y_values_pc3_p, pc3_p_nodes, x, q2) - PC2_d = dis_pc_func(y_values_pc2_d, pc2_d_nodes, x, q2) - PCL_d = dis_pc_func(y_values_pcL_d, pcL_d_nodes, x, q2) - PC3_d = dis_pc_func(y_values_pc3_d, pc3_d_nodes, x, q2) + PC2_p = dis_pc_func(y_values_pc2_p, pc2_p_nodes, x, q2, pc_func_type) + PCL_p = dis_pc_func(y_values_pcL_p, pcL_p_nodes, x, q2, pc_func_type) + PC3_p = dis_pc_func(y_values_pc3_p, pc3_p_nodes, x, q2, pc_func_type) + PC2_d = dis_pc_func(y_values_pc2_d, pc2_d_nodes, x, q2, pc_func_type) + PCL_d = dis_pc_func(y_values_pcL_d, pcL_d_nodes, x, q2, pc_func_type) + PC3_d = dis_pc_func(y_values_pc3_d, pc3_d_nodes, x, q2, pc_func_type) tmp_2 = (2 * Z - A) / A * PC2_p + 2 * (A - Z) / A * PC2_d tmp_L = (2 * Z - A) / A * PCL_p + 2 * (A - Z) / A * PCL_d tmp_3 = (2 * Z - A) / A * PC3_p + 2 * (A - Z) / A * PC3_d @@ -568,7 +672,17 @@ def func( # TODO This is function is really similar to the one # defined for NUTEV CC. Can we reduce code repetitions? def DIS_CC_CHORUS_pc( - pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, pc2_d_nodes, pcL_d_nodes, pc3_d_nodes, lepton, x, q2, y + pc2_p_nodes, + pcL_p_nodes, + pc3_p_nodes, + pc2_d_nodes, + pcL_d_nodes, + pc3_d_nodes, + lepton, + x, + q2, + y, + pc_func_type: str = "step", ): """ Same as DIS_CC_NUTEV_pc, but for CHORUS CC. @@ -636,12 +750,12 @@ def func( y_values_pcL_d, y_values_pc3_d, ): - PC2_p = dis_pc_func(y_values_pc2_p, pc2_p_nodes, x, q2) - PCL_p = dis_pc_func(y_values_pcL_p, pcL_p_nodes, x, q2) - PC3_p = dis_pc_func(y_values_pc3_p, pc3_p_nodes, x, q2) - PC2_d = dis_pc_func(y_values_pc2_d, pc2_d_nodes, x, q2) - PCL_d = dis_pc_func(y_values_pcL_d, pcL_d_nodes, x, q2) - PC3_d = dis_pc_func(y_values_pc3_d, pc3_d_nodes, x, q2) + PC2_p = dis_pc_func(y_values_pc2_p, pc2_p_nodes, x, q2, pc_func_type) + PCL_p = dis_pc_func(y_values_pcL_p, pcL_p_nodes, x, q2, pc_func_type) + PC3_p = dis_pc_func(y_values_pc3_p, pc3_p_nodes, x, q2, pc_func_type) + PC2_d = dis_pc_func(y_values_pc2_d, pc2_d_nodes, x, q2, pc_func_type) + PCL_d = dis_pc_func(y_values_pcL_d, pcL_d_nodes, x, q2, pc_func_type) + PC3_d = dis_pc_func(y_values_pc3_d, pc3_d_nodes, x, q2, pc_func_type) tmp_2 = (2 * Z - A) / A * PC2_p + 2 * (A - Z) / A * PC2_d tmp_L = (2 * Z - A) / A * PCL_p + 2 * (A - Z) / A * PCL_d tmp_3 = (2 * Z - A) / A * PC3_p + 2 * (A - Z) / A * PC3_d @@ -717,7 +831,10 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, power_corr_dict: dict): if isinstance(process_type, _Process): process_type = process_type.name - pars_combs = construct_pars_combs(power_corr_dict) + pc_func_type = power_corr_dict['func_type'] + power_corr_dict_copy = power_corr_dict.copy() + power_corr_dict_copy.pop('func_type', None) + pars_combs = construct_pars_combs(power_corr_dict_copy) deltas = defaultdict(list) pc_func = None @@ -739,25 +856,25 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, power_corr_dict: dict): # F2 ratio if exp_name == "NMC_NC_NOTFIXED_EM-F2": - pc_func = DIS_F2R_pc(dataset_sp, pdf, pc2_p_nodes, pc2_d_nodes, x, q2) + pc_func = DIS_F2R_pc(dataset_sp, pdf, pc2_p_nodes, pc2_d_nodes, x, q2, pc_func_type) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2p'], pars_pc['comb']['H2d']) # F2 proton traget elif exp_name in F2P_exps: - pc_func = DIS_F2_pc(pc2_p_nodes, x, q2) + pc_func = DIS_F2_pc(pc2_p_nodes, x, q2, pc_func_type) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2p']) # F2 deuteron traget elif exp_name in F2D_exps: - pc_func = DIS_F2_pc(pc2_d_nodes, x, q2) + pc_func = DIS_F2_pc(pc2_d_nodes, x, q2, pc_func_type) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2d']) # EMC elif exp_name.startswith('EMC_NC_250GEV'): - pc_func = DIS_F2C_pc(pc2_p_nodes, pc2_d_nodes, x, q2) + pc_func = DIS_F2C_pc(pc2_p_nodes, pc2_d_nodes, x, q2, pc_func_type) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2p'], pars_pc['comb']['H2d']) @@ -765,18 +882,26 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, power_corr_dict: dict): elif exp_name in np.concatenate([NC_SIGMARED_P_EM, NC_SIGMARED_P_EP, NC_SIGMARED_P_EAVG]): # Electron if exp_name in NC_SIGMARED_P_EM: - pc_func = DIS_NC_XSEC_pc(pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 0, x, q2, y) + pc_func = DIS_NC_XSEC_pc( + pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 0, x, q2, y, pc_func_type + ) # Positron elif exp_name in NC_SIGMARED_P_EP: - pc_func = DIS_NC_XSEC_pc(pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 1, x, q2, y) + pc_func = DIS_NC_XSEC_pc( + pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 1, x, q2, y, pc_func_type + ) # Average positron and electron # TODO # Check if this is correct (ach) elif NC_SIGMARED_P_EAVG: def average(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): - electron = DIS_NC_XSEC_pc(pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 0, x, q2, y) - positron = DIS_NC_XSEC_pc(pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 1, x, q2, y) + electron = DIS_NC_XSEC_pc( + pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 0, x, q2, y, pc_func_type + ) + positron = DIS_NC_XSEC_pc( + pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 1, x, q2, y, pc_func_type + ) result = electron(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p) + positron( y_values_pc2_p, y_values_pcL_p, y_values_pc3_p ) @@ -806,6 +931,7 @@ def average(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): x, q2, y, + pc_func_type, ) # Nu bar elif exp_name == 'CHORUS_CC_NOTFIXED_PB_NB-SIGMARED': @@ -820,6 +946,7 @@ def average(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): x, q2, y, + pc_func_type, ) else: raise ValueError(f"{exp_name} not implemented.") @@ -848,6 +975,7 @@ def average(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): x, q2, y, + pc_func_type, ) # Nu bar elif exp_name == 'NUTEV_CC_NOTFIXED_FE_NB-SIGMARED': @@ -862,6 +990,7 @@ def average(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): x, q2, y, + pc_func_type, ) else: raise ValueError(f"{exp_name} not implemented.") @@ -879,10 +1008,14 @@ def average(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): elif exp_name.startswith('HERA_CC'): # electron if exp_name == 'HERA_CC_318GEV_EM-SIGMARED': - pc_func = DIS_CC_HERA_XSEC_pc(pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 0, x, q2, y) + pc_func = DIS_CC_HERA_XSEC_pc( + pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 0, x, q2, y, pc_func_type + ) # positron elif exp_name == 'HERA_CC_318GEV_EP-SIGMARED': - pc_func = DIS_CC_HERA_XSEC_pc(pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 1, x, q2, y) + pc_func = DIS_CC_HERA_XSEC_pc( + pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 1, x, q2, y, pc_func_type + ) else: raise ValueError(f"{exp_name} not implemented.") @@ -907,7 +1040,7 @@ def average(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): pT = cd_table['kin2'].to_numpy() q2 = pT * pT - pc_func = JET_pc(pc_jet_nodes, pT, eta) + pc_func = JET_pc(pc_jet_nodes, pT, eta, pc_func_type) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['Hj']) From 0edb275f24606783eee6d056d4347bd66a1d658d Mon Sep 17 00:00:00 2001 From: achiefa Date: Wed, 12 Feb 2025 14:05:05 +0000 Subject: [PATCH 49/69] Adjusting linear triangular function --- validphys2/src/validphys/config.py | 7 +++-- .../higher_twist_functions.py | 26 ++++++++++++++----- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index d17cc2fc03..405f7c09ba 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -1927,12 +1927,15 @@ def produce_power_corr_dict(self, pc_parameters=None, pc_func_type=None): # Loop over the parameterization for the power corrections in the runcard for par in pc_parameters: # Check that the length of shifts is one less than the length of nodes. - if (len(par['yshift']) != len(par['nodes']) - 1) and pc_func_type != 'cubic': + if (len(par['yshift']) != len(par['nodes']) - 1) and pc_func_type not in [ + 'cubic', + 'linear', + ]: raise ValueError( f"The length of nodes does not match that of the list in {par['ht']}." f"Check the runcard. Got {len(par['yshift'])} != {len(par['nodes'])}" ) - elif (len(par['yshift']) != len(par['nodes'])) and pc_func_type == 'cubic': + elif (len(par['yshift']) != len(par['nodes'])) and pc_func_type in ['cubic', 'linear']: raise ValueError( f"The length of nodes does not match that of the list in {par['ht']}." f"Check the runcard. Got {len(par['yshift'])} != {len(par['nodes'])}" diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py index 9c99406b61..7635907944 100644 --- a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -135,17 +135,31 @@ def linear_bin_function( """ res = np.zeros_like(a) for shift_pos, shift in enumerate(y_shift): - bin_low = bin_edges[shift_pos] - bin_high = bin_edges[shift_pos + 1] - bin_mid = 0.5 * (bin_low + bin_high) + if shift_pos > 0 and shift_pos < len(y_shift) - 1: + bin_low = bin_edges[shift_pos - 1] + bin_high = bin_edges[shift_pos + 1] + bin_mid = bin_edges[shift_pos] + m1 = shift / (bin_mid - bin_low) + m2 = shift / (bin_high - bin_mid) + elif shift_pos == 0: # Left-most bin + bin_high = bin_edges[shift_pos + 1] + bin_mid = bin_edges[shift_pos] + bin_low = bin_mid + m1 = 0.0 + m2 = shift / (bin_high - bin_mid) + else: # Right-most bin + bin_low = bin_edges[shift_pos - 1] + bin_mid = bin_edges[shift_pos] + bin_high = bin_mid + m1 = shift / (bin_mid - bin_low) + m2 = 0.0 cond_low = np.multiply(a >= bin_low, a < bin_mid) cond_high = np.multiply( a >= bin_mid, a < bin_high if shift_pos != len(y_shift) - 1 else a <= bin_high ) - m = 2 * shift / (bin_high - bin_low) - res = np.add(res, [m * (val - bin_low) if cond else 0.0 for val, cond in zip(a, cond_low)]) + res = np.add(res, [m1 * (val - bin_low) if cond else 0.0 for val, cond in zip(a, cond_low)]) res = np.add( - res, [-m * (val - bin_high) if cond else 0.0 for val, cond in zip(a, cond_high)] + res, [-m2 * (val - bin_high) if cond else 0.0 for val, cond in zip(a, cond_high)] ) return res From 34fd363b164ed5ce96e6186e5d52d132235753de Mon Sep 17 00:00:00 2001 From: achiefa Date: Mon, 17 Feb 2025 15:21:33 +0000 Subject: [PATCH 50/69] Correct bug in linear function --- .../theorycovariance/higher_twist_functions.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py index 7635907944..acf60d80aa 100644 --- a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -113,10 +113,9 @@ def linear_bin_function( a: npt.ArrayLike, y_shift: npt.ArrayLike, bin_edges: npt.ArrayLike ) -> np.ndarray: """ - This function defines the linear bin function used to construct the prior. The bins of the - function are constructed using pairs of consecutive points. For instance, given the set of - points [0.0, 0.1, 0.3, 0.5], there will be three bins with edges [[0.0, 0.1], [0.1, 0.3], - 0.3, 0.5]]. Each bin is coupled with a shift, which correspond to the y-value of the bin. + This function defines the linear bin function used to construct the prior. Specifically, + the prior is constructed using a triangular function whose value at the peak of the node + is linked to the right and left nodes using a straight line. Parameters ---------- @@ -124,7 +123,7 @@ def linear_bin_function( A one-dimensional array of points at which the function is evaluated. y_shift: ArrayLike of float A one-dimensional array whose elements represent the y-value of each bin - bin_edges: ArrayLike of float + bin_nodes: ArrayLike of float A one-dimensional array containing the edges of the bins. The bins are constructed using pairs of consecutive points. @@ -153,7 +152,9 @@ def linear_bin_function( bin_high = bin_mid m1 = shift / (bin_mid - bin_low) m2 = 0.0 - cond_low = np.multiply(a >= bin_low, a < bin_mid) + cond_low = np.multiply( + a >= bin_low, a < bin_mid if shift_pos != len(y_shift) - 1 else a <= bin_mid + ) cond_high = np.multiply( a >= bin_mid, a < bin_high if shift_pos != len(y_shift) - 1 else a <= bin_high ) @@ -1052,7 +1053,6 @@ def average(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): # When this happens, this part must be updated. eta = cd_table['kin1'].to_numpy() pT = cd_table['kin2'].to_numpy() - q2 = pT * pT pc_func = JET_pc(pc_jet_nodes, pT, eta, pc_func_type) for pars_pc in pars_combs: From 049baa34552937a470f5c181ef99f1e86c365acb Mon Sep 17 00:00:00 2001 From: achiefa Date: Fri, 21 Feb 2025 12:48:59 +0000 Subject: [PATCH 51/69] Implement multiplicative PC for jet --- .../theorycovariance/higher_twist_functions.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py index acf60d80aa..8d930e9355 100644 --- a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -245,23 +245,27 @@ def jets_pc_func( return PC -def JET_pc(pc_nodes, pT, rap, pc_func_type: str = "step"): +def JET_pc(dataset_sp, pdf, pc_nodes, pT, rap, pc_func_type: str = "step"): """ Returns the function that computes the shift for the ratio for single jet cross sections. In particular, the shift is computed such that - xsec -> xsec + PC, + xsec -> xsec * ( 1 + PC ), and the shift is defined as Delta(xsec) = (xsec + xsec) - xsec = PC. - The power correction is a function of the transverse momentum of the jet. + The power correction is a function of the rapidity. """ + cuts = dataset_sp.cuts + (fkspec,) = dataset_sp.fkspecs + fk = fkspec.load_with_cuts(cuts) + xsec = central_fk_predictions(fk, pdf) def func(y_values): result = jets_pc_func(y_values, pc_nodes, pT, rap, pc_func_type) - return result + return np.multiply(result, xsec.to_numpy()[:, 0]) return func @@ -1054,7 +1058,7 @@ def average(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): eta = cd_table['kin1'].to_numpy() pT = cd_table['kin2'].to_numpy() - pc_func = JET_pc(pc_jet_nodes, pT, eta, pc_func_type) + pc_func = JET_pc(dataset_sp, pdf, pc_jet_nodes, pT, eta, pc_func_type) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['Hj']) From 24204a5d743e9fffd6ac403e75b42efd8f0da0a2 Mon Sep 17 00:00:00 2001 From: achiefa Date: Fri, 28 Feb 2025 12:05:22 +0000 Subject: [PATCH 52/69] Correct docstring --- .../higher_twist_functions.py | 39 ++++++++++--------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py index 8d930e9355..26c183d55c 100644 --- a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -383,41 +383,42 @@ def func(y_values_d, y_values_p): def DIS_F2C_pc(pc2_p_nodes, pc2_d_nodes, x, q2, pc_func_type: str = "step"): """ - Builds the function used to compute the shifts for the charm - structure function measured by EMC. The process involved is + Builds the function used to compute the shifts for the charm structure + function measured by EMC. The process involved is mu^+ + Fe -> mu+^ + c cbar + X . - This function works exactly as the previous functions used to - compute nuisance shifts. In this case, the constructed function - (`func` below) requires two lists of parameters for the proton - and the deuteron contribution. The reason being that in this process - the muon scatters off an iron target, and the power correction - contribution is a mixture of proton and deuteron nucleons. Hence, proton - and deuteron contribution are weighted by the appropriate atomic factor. + This function works exactly as the previous functions used to compute + nuisance shifts. In this case, the constructed function (`func` below) + requires two lists of parameters for the proton and the deuteron + contribution. The reason being that in this process the muon scatters off an + iron target, and the power correction contribution is a mixture of proton + and deuteron nucleons. Hence, proton and deuteron contribution are weighted + by the appropriate atomic factor. Note that we are parametrising power corrections as proton and deuteron - targets. If we were to parametrize such contributions using, say, proton - and nucleon, than the weights would change. + targets. If we were to parametrize such contributions using, say, proton and + nucleon, than the weights would change. Nuclear target -------------- - The power corrections for nuclear observables, like in this case, are affected - by the pc contribution of the protons and that of the neutrons. - If we allow for the non-iscoscalarity of the target, and combining the two - contributions in accordance with the atomic and mass number (A and Z), the - power correction for the nuclear target can be written as (see eq.(4.2.5) - in https://nnpdf.mi.infn.it/wp-content/uploads/2021/09/thesis_master_RP.pdf) + The power corrections for nuclear observables, like in this case, are + affected by the pc contribution of the protons and that of the neutrons. If + we allow for the non-isoscalarity of the target, and combine the two + contributions in accordance with the atomic and mass number (A and Z + respectively), the power correction for the nuclear target can be written as + (see eq.(4.2.5) in + https://nnpdf.mi.infn.it/wp-content/uploads/2021/09/thesis_master_RP.pdf) PC_N = 1/A (Z * PC_p + (A-Z) * PC_n) . The deuteron is obtained using the isoscalarity, namely - PC_c = 1/2 (PC_p + PC_n) . + PC_d = 1/2 (PC_p + PC_n) . Since we parametrise the power corrections of the proton and the deuteron, - we can combined the above equations and write + we can combine the above equations and write PC_N = 1/A * ( PC_p * (2Z - A) + 2 * PC_d * (A - Z) ) From 26e1e41c2fafe0dfb6e874ce4372ecdfb55e6230 Mon Sep 17 00:00:00 2001 From: achiefa Date: Mon, 10 Mar 2025 15:06:42 +0000 Subject: [PATCH 53/69] Remove unused vp runcard --- .../theory_covariance/chi2table_ht.yaml | 107 ------------------ 1 file changed, 107 deletions(-) delete mode 100644 validphys2/examples/theory_covariance/chi2table_ht.yaml diff --git a/validphys2/examples/theory_covariance/chi2table_ht.yaml b/validphys2/examples/theory_covariance/chi2table_ht.yaml deleted file mode 100644 index e1327eebd8..0000000000 --- a/validphys2/examples/theory_covariance/chi2table_ht.yaml +++ /dev/null @@ -1,107 +0,0 @@ -# This is the driver template for vp-comparefits. It consists on a validphys -# runcard where some settings are missing and are to be filled by the -# vp-comparefits script. The settings below are a sample of such settings, kept -# for reference -# -# meta: -# title: The title of the Report -# keywords: [report_template] -# author: NNPDF Collaboration -# -# current: -# fit: {id: id_of_the_base_fit} -# pdf: {id: id_of_the_base_fit, label: "Current Fit"} -# theory: -# from_: fit -# theoryid: -# from_: theory -# speclabel: "Current Fit" -# -# reference: -# fit: {id: id_of_the_reference_fit} -# pdf: {id: id_of_the_reference_fit, label: "Reference Fit" } -# theory: -# from_: fit -# theoryid: -# from_: theory -# speclabel: "Reference Fit" - -pdfs: - - {id: "240816-06-7-01-lc", label: "HT low cuts"} - - {id: "240812-02-ABMP-lnv", label: "HT mid cuts"} - - {id: "240812-04-ABMP-lnv", label: "HT std. cuts"} - - {id: "240819_nnpdf40_lowcuts", label: "no HYT low cuts"} - - {id: "240807-midcuts", label: "no HT mid cuts"} - - {id: "NNPDF40_nnlo_as_01180_qcd", label: "no HT std cuts (NNPDF40)"} - -fits: - - {id: "240816-06-7-01-lc", label: "HT low cuts"} - - {id: "240812-02-ABMP-lnv", label: "HT mid cuts"} - - {id: "240812-04-ABMP-lnv", label: "HT std. cuts"} - - {id: "240819_nnpdf40_lowcuts", label: "no HYT low cuts"} - - {id: "240807-midcuts", label: "no HT mid cuts"} - - {id: "NNPDF40_nnlo_as_01180_qcd", label: "no HT std cuts (NNPDF40)"} - -use_cuts: "fromfit" -use_weights_in_covmat: False -use_thcovmat_if_present: True - -Q: 1.651 - -#template: report.md - -description: - from_: fit - -dataset_inputs: - from_: fit - -#dataspecs: -# - theoryid: -# from_: current -# pdf: -# from_: current -# fit: -# from_: current -# speclabel: -# from_: current -# -# - theoryid: -# from_: reference -# pdf: -# from_: reference -# fit: -# from_: reference -# speclabel: -# from_: reference - -Datanorm: - normalize_to: data - -DataGroups: - - metadata_group: nnpdf31_process - - metadata_group: experiment - -ProcessGroup: - metadata_group: nnpdf31_process - -template_text: | - Summary - ------- - {@ summarise_fits @} - - {@with DataGroups@} - $\chi^2$ by {@processed_metadata_group@} - ---------------------------------------- - {@plot_fits_groups_data_chi2@} - {@endwith@} - - $\chi^2$ by dataset - ------------------- - ### Plot - {@plot_fits_datasets_chi2@} - ### Table - {@ProcessGroup fits_chi2_table(show_total=true)@} - -actions_: - - report(main=true) From 85188cb3f8b48b3ff5427b66b524c6ae523b357e Mon Sep 17 00:00:00 2001 From: achiefa Date: Mon, 10 Mar 2025 15:17:43 +0000 Subject: [PATCH 54/69] Dijet + clean-up + checks for pc dict --- validphys2/src/validphys/checks.py | 20 ++++ .../theorycovariance/construction.py | 58 +++++++----- .../higher_twist_functions.py | 91 ++++++++++++------- 3 files changed, 110 insertions(+), 59 deletions(-) diff --git a/validphys2/src/validphys/checks.py b/validphys2/src/validphys/checks.py index 4f958b0dc4..81844c4160 100644 --- a/validphys2/src/validphys/checks.py +++ b/validphys2/src/validphys/checks.py @@ -361,3 +361,23 @@ def check_darwin_single_process(NPROC): """ if platform.system() == "Darwin" and NPROC != 1: raise CheckError("NPROC must be set to 1 on OSX, because multithreading is not supported.") + + +@make_argcheck +def check_pc_parameters(pc_parameters, pc_func_type): + """Check that the parameters for the PC method are set correctly""" + for par in pc_parameters.values(): + # Check that the length of shifts is one less than the length of nodes. + if (len(par['yshift']) != len(par['nodes']) - 1) and pc_func_type not in [ + 'cubic', + 'linear', + ]: + raise ValueError( + f"The length of nodes does not match that of the list in {par['ht']}." + f"Check the runcard. Got {len(par['yshift'])} != {len(par['nodes'])}" + ) + elif (len(par['yshift']) != len(par['nodes'])) and pc_func_type in ['cubic', 'linear']: + raise ValueError( + f"The length of nodes does not match that of the list in {par['ht']}." + f"Check the runcard. Got {len(par['yshift'])} != {len(par['nodes'])}" + ) diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index 1a00714b2f..e961b9ceab 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -4,6 +4,7 @@ """ from collections import defaultdict, namedtuple +import dataclasses import logging import numpy as np @@ -13,6 +14,7 @@ from reportengine.table import table pass +from validphys.checks import check_pc_parameters from validphys.core import PDF from validphys.results import results, results_central from validphys.theorycovariance.higher_twist_functions import compute_deltas_pc @@ -49,7 +51,14 @@ def theory_covmat_dataset(results, results_central_bytheoryids, point_prescripti return thcovmat -ProcessInfo = namedtuple("ProcessInfo", ("preds", "namelist", "sizes", "data", "data_spec")) +@dataclasses.dataclass(frozen=True) +class ProcessInfo: + """Dataclass containing the information needed to construct the theory covariance matrix.""" + + preds: dict + namelist: dict + sizes: dict + data_spec: dict def combine_by_type(each_dataset_results_central_bytheory, groups_data_by_process): @@ -87,11 +96,7 @@ def combine_by_type(each_dataset_results_central_bytheory, groups_data_by_proces data_spec[exp_set.name] = exp_set process_info = ProcessInfo( - preds=theories_by_process, - namelist=ordered_names, - sizes=dataset_size, - data=None, - data_spec=data_spec, + preds=theories_by_process, namelist=ordered_names, sizes=dataset_size, data_spec=data_spec ) return process_info @@ -341,33 +346,36 @@ def covs_pt_prescrip_mhou(combine_by_type, point_prescription): running_index = 0 covmats = defaultdict(list) - if point_prescription != 'power corrections': - start_proc = defaultdict(list) - for name in process_info.preds: - size = len(process_info.preds[name][0]) - start_proc[name] = running_index - running_index += size - - for name1 in process_info.preds: - for name2 in process_info.preds: - central1, *others1 = process_info.preds[name1] - deltas1 = list(other - central1 for other in others1) - central2, *others2 = process_info.preds[name2] - deltas2 = list(other - central2 for other in others2) - s = compute_covs_pt_prescrip(point_prescription, name1, deltas1, name2, deltas2) - start_locs = (start_proc[name1], start_proc[name2]) - covmats[start_locs] = s + print(point_prescription) + start_proc = defaultdict(list) + for name in process_info.preds: + size = len(process_info.preds[name][0]) + start_proc[name] = running_index + running_index += size + + for name1 in process_info.preds: + for name2 in process_info.preds: + central1, *others1 = process_info.preds[name1] + deltas1 = list(other - central1 for other in others1) + central2, *others2 = process_info.preds[name2] + deltas2 = list(other - central2 for other in others2) + s = compute_covs_pt_prescrip(point_prescription, name1, deltas1, name2, deltas2) + start_locs = (start_proc[name1], start_proc[name2]) + covmats[start_locs] = s return covmats +# TODO `pc_func_type`will be removed in the future +@check_pc_parameters def covs_pt_prescrip_pc( combine_by_type, point_prescription, pdf: PDF, - power_corr_dict, + pc_parameters, pc_included_procs, pc_excluded_exps, + pc_func_type, ): """Produces the sub-matrices of the theory covariance matrix for power corrections. Sub-matrices correspond to applying power corrected shifts @@ -392,8 +400,8 @@ def covs_pt_prescrip_pc( proc not in pc_included_procs for proc in [process_type1, process_type2] ) if not (is_excluded_exp or is_included_proc): - deltas1 = compute_deltas_pc(data_spec1, pdf, power_corr_dict) - deltas2 = compute_deltas_pc(data_spec2, pdf, power_corr_dict) + deltas1 = compute_deltas_pc(data_spec1, pdf, pc_parameters, pc_func_type) + deltas2 = compute_deltas_pc(data_spec2, pdf, pc_parameters, pc_func_type) s = compute_covs_pt_prescrip( point_prescription, exp_name1, deltas1, exp_name2, deltas2 ) diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py index 26c183d55c..57befe0f71 100644 --- a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -29,7 +29,6 @@ from validphys.convolution import central_fk_predictions from validphys.core import PDF, DataSetSpec -from validphys.process_options import _Process GEV_CM2_CONV = 3.893793e10 GF = 1.1663787e-05 # Fermi's constant [GeV^-2] @@ -48,6 +47,7 @@ NC_SIGMARED_P_EAVG = ['HERA_NC_318GEV_EAVG_CHARM-SIGMARED', 'HERA_NC_318GEV_EAVG_BOTTOM-SIGMARED'] +# TODO This function will be deleted in the future def step_function(a: npt.ArrayLike, y_shift: npt.ArrayLike, bin_edges: npt.ArrayLike) -> np.ndarray: """ This function defines the step function used to construct the prior. The bins of the step @@ -81,6 +81,7 @@ def step_function(a: npt.ArrayLike, y_shift: npt.ArrayLike, bin_edges: npt.Array return res +# TODO This function will be deleted in the future def cubic_spline_function( a: npt.ArrayLike, y_shift: npt.ArrayLike, nodes: npt.ArrayLike ) -> np.ndarray: @@ -826,7 +827,8 @@ def construct_pars_combs(parameters_dict): return combinations -def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, power_corr_dict: dict): +# TODO `pc_func_type` will be removed +def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_type: str): """ Computes the shifts due to power corrections for a single dataset given the set of parameters that model the power corrections. The result is @@ -846,33 +848,23 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, power_corr_dict: dict): """ exp_name = dataset_sp.name - cd_table = dataset_sp.load_commondata().commondata_table - process_type = cd_table['process'].iloc[0] - if isinstance(process_type, _Process): - process_type = process_type.name - - pc_func_type = power_corr_dict['func_type'] - power_corr_dict_copy = power_corr_dict.copy() - power_corr_dict_copy.pop('func_type', None) - pars_combs = construct_pars_combs(power_corr_dict_copy) + process_type = dataset_sp.commondata.metadata.process_type.name + cuts = dataset_sp.cuts.load() + + pars_combs = construct_pars_combs(pc_dict) deltas = defaultdict(list) pc_func = None if process_type.startswith('DIS'): - pc2_p_nodes = power_corr_dict["H2p"]['nodes'] - pcL_p_nodes = power_corr_dict["HLp"]['nodes'] - pc3_p_nodes = power_corr_dict["H3p"]['nodes'] - pc2_d_nodes = power_corr_dict["H2d"]['nodes'] - pcL_d_nodes = power_corr_dict["HLd"]['nodes'] - pc3_d_nodes = power_corr_dict["H3d"]['nodes'] - - # TODO - # AFter the data re-implementation the name of the variables - # in the commondata table will change as indicated in the metadata. - # When this happens, this part must be updated. - x = cd_table['kin1'].to_numpy() - q2 = cd_table['kin2'].to_numpy() - y = cd_table['kin3'].to_numpy() + pc2_p_nodes = pc_dict["H2p"]['nodes'] + pcL_p_nodes = pc_dict["HLp"]['nodes'] + pc3_p_nodes = pc_dict["H3p"]['nodes'] + pc2_d_nodes = pc_dict["H2d"]['nodes'] + pcL_d_nodes = pc_dict["HLd"]['nodes'] + pc3_d_nodes = pc_dict["H3d"]['nodes'] + x = dataset_sp.commondata.metadata.load_kinematics()['x'].to_numpy().reshape(-1)[cuts] + q2 = dataset_sp.commondata.metadata.load_kinematics()['Q2'].to_numpy().reshape(-1)[cuts] + y = dataset_sp.commondata.metadata.load_kinematics()['y'].to_numpy().reshape(-1)[cuts] # F2 ratio if exp_name == "NMC_NC_NOTFIXED_EM-F2": @@ -1050,21 +1042,52 @@ def average(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): ) elif process_type == 'JET': - pc_jet_nodes = power_corr_dict["Hj"]['nodes'] - - # TODO - # AFter the data re-implementation the name of the variables - # in the commondata table will change as indicated in the metadata. - # When this happens, this part must be updated. - eta = cd_table['kin1'].to_numpy() - pT = cd_table['kin2'].to_numpy() + pc_jet_nodes = pc_dict["Hj"]['nodes'] + eta = dataset_sp.commondata.metadata.load_kinematics()['y'].to_numpy().reshape(-1)[cuts] + pT = dataset_sp.commondata.metadata.load_kinematics()['pT'].to_numpy().reshape(-1)[cuts] pc_func = JET_pc(dataset_sp, pdf, pc_jet_nodes, pT, eta, pc_func_type) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['Hj']) elif process_type == 'DIJET': - raise RuntimeError(f"No implementation for {exp_name} yet.") + + if dataset_sp.commondata.metadata.experiment == 'ATLAS': + pc_jet_nodes = pc_dict["H2j_ATLAS"]['nodes'] + eta_star = ( + dataset_sp.commondata.metadata.load_kinematics()['ystar'] + .to_numpy() + .reshape(-1)[cuts] + ) + m_jj = ( + dataset_sp.commondata.metadata.load_kinematics()['m_jj'] + .to_numpy() + .reshape(-1)[cuts] + ) + pc_func = JET_pc(dataset_sp, pdf, pc_jet_nodes, m_jj, eta_star, pc_func_type) + for pars_pc in pars_combs: + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_ATLAS']) + + elif dataset_sp.commondata.metadata.experiment == 'CMS': + pc_jet_nodes = pc_dict["H2j_CMS"]['nodes'] + eta_diff = ( + dataset_sp.commondata.metadata.load_kinematics()['ydiff'] + .to_numpy() + .reshape(-1)[cuts] + ) + m_jj = ( + dataset_sp.commondata.metadata.load_kinematics()['m_jj'] + .to_numpy() + .reshape(-1)[cuts] + ) + pc_func = JET_pc(dataset_sp, pdf, pc_jet_nodes, m_jj, eta_diff, pc_func_type) + for pars_pc in pars_combs: + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_CMS']) + + else: + raise ValueError( + f"{dataset_sp.commondata.metadata.experiment} is not implemented for DIJET." + ) else: raise RuntimeError(f"{process_type} has not been implemented.") From f28807bbe81b8f5ae8a2eb379abea056ba022a08 Mon Sep 17 00:00:00 2001 From: achiefa Date: Mon, 10 Mar 2025 15:18:35 +0000 Subject: [PATCH 55/69] Remove translation layer for pc parameters --- validphys2/src/validphys/config.py | 35 ++---------------------------- 1 file changed, 2 insertions(+), 33 deletions(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 405f7c09ba..38dd402573 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -1911,42 +1911,11 @@ def produce_total_phi_data(self, fitthcovmat): return validphys.results.total_phi_data_from_experiments return validphys.results.dataset_inputs_phi_data + # TODO: to be removed once we are sure the the triangular + # function for the prior is the only one of interest def produce_pc_func_type(self, theorycovmatconfig=None): return theorycovmatconfig.get('func_type', 'step') - # @configparser.explicit_node - def produce_power_corr_dict(self, pc_parameters=None, pc_func_type=None): - """The parameters for the power corrections are given as a list. - This function converts this list into a dictionary with the keys - being the names of the types of power corrections (e.g. `H2p`, `H2d`,...). - """ - if pc_parameters is None: - return None - - pc_parameters_by_type = {} - # Loop over the parameterization for the power corrections in the runcard - for par in pc_parameters: - # Check that the length of shifts is one less than the length of nodes. - if (len(par['yshift']) != len(par['nodes']) - 1) and pc_func_type not in [ - 'cubic', - 'linear', - ]: - raise ValueError( - f"The length of nodes does not match that of the list in {par['ht']}." - f"Check the runcard. Got {len(par['yshift'])} != {len(par['nodes'])}" - ) - elif (len(par['yshift']) != len(par['nodes'])) and pc_func_type in ['cubic', 'linear']: - raise ValueError( - f"The length of nodes does not match that of the list in {par['ht']}." - f"Check the runcard. Got {len(par['yshift'])} != {len(par['nodes'])}" - ) - - # Store parameters for each power correction - pc_parameters_by_type[par['ht']] = {'yshift': par['yshift'], 'nodes': par['nodes']} - - pc_parameters_by_type['func_type'] = pc_func_type - return pc_parameters_by_type - @configparser.explicit_node def produce_covs_pt_prescrip(self, point_prescription): if point_prescription != 'power corrections': From 43c1666f59ea54862e8cb4b171851d713e17e6c7 Mon Sep 17 00:00:00 2001 From: achiefa Date: Mon, 10 Mar 2025 15:20:52 +0000 Subject: [PATCH 56/69] Remove copy of the same collection procs_data -> groups_data_by_process + clean-up --- validphys2/src/validphys/results.py | 49 +++---------------- .../theorycovariance/construction.py | 11 ++--- 2 files changed, 12 insertions(+), 48 deletions(-) diff --git a/validphys2/src/validphys/results.py b/validphys2/src/validphys/results.py index ac670657f9..596488f90e 100644 --- a/validphys2/src/validphys/results.py +++ b/validphys2/src/validphys/results.py @@ -239,8 +239,7 @@ def data_index(data): experiments_data = collect("data", ("group_dataset_inputs_by_experiment",)) -# NOTE: Same a `groups_data_by_process` in `construction.py` -procs_data = collect("data", ("group_dataset_inputs_by_process",)) +groups_data_by_process = collect("data", ("group_dataset_inputs_by_process",)) def groups_index(groups_data, diagonal_basis=False): @@ -280,48 +279,12 @@ def groups_index(groups_data, diagonal_basis=False): return df.index -def group_kin_table_no_table(groups_data, groups_index): - """Generate a table containing the kinematics and the process_type.""" - result_records = [] - for group_data in groups_data: - group_cd = group_data.load_commondata() - cd = np.concatenate( - [ - group_cd[i].commondata_table[['kin1', 'kin2', 'kin3', 'process']] - for i in range(len(group_cd)) - ], - axis=0, - ) - for index, dataset in enumerate(cd): - try: - process_name = dataset[3].name - except AttributeError: - process_name = dataset[3] - result_records.append( - dict( - [ - ("kin_1", dataset[0]), - ("kin_2", dataset[1]), - ("kin_3", dataset[2]), - ("process_type", process_name), - ] - ) - ) - - if not result_records: - log.warning("Empty records for group results") - return pd.DataFrame() - df = pd.DataFrame(result_records, columns=result_records[0].keys(), index=groups_index) - - return df - - def experiments_index(experiments_data): return groups_index(experiments_data) -def procs_index(procs_data): - return groups_index(procs_data) +def procs_index(groups_data_by_process): + return groups_index(groups_data_by_process) def groups_data_values(group_result_table): @@ -854,10 +817,12 @@ def groups_chi2_table(groups_data, pdf, groups_chi2, groups_each_dataset_chi2): @table -def procs_chi2_table(procs_data, pdf, groups_chi2_by_process, groups_each_dataset_chi2_by_process): +def procs_chi2_table( + groups_data_by_process, pdf, groups_chi2_by_process, groups_each_dataset_chi2_by_process +): """Same as groups_chi2_table but by process""" return groups_chi2_table( - procs_data, pdf, groups_chi2_by_process, groups_each_dataset_chi2_by_process + groups_data_by_process, pdf, groups_chi2_by_process, groups_each_dataset_chi2_by_process ) diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index e961b9ceab..cfe729470f 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -439,7 +439,7 @@ def theory_covmat_custom_per_prescription(covs_pt_prescrip, procs_index, combine @table -def fromfile_covmat(covmatpath, procs_data, procs_index): +def fromfile_covmat(covmatpath, groups_data_by_process, procs_index): """Reads a general theory covariance matrix from file. Then 1: Applies cuts to match experiment covariance matrix 2: Expands dimensions to match experiment covariance matrix @@ -453,7 +453,7 @@ def fromfile_covmat(covmatpath, procs_data, procs_index): # Reordering covmat to match exp order in runcard # Datasets in exp covmat dslist = [] - for group in procs_data: + for group in groups_data_by_process: for ds in group.datasets: dslist.append(ds.name) # Datasets in filecovmat in exp covmat order @@ -468,7 +468,7 @@ def fromfile_covmat(covmatpath, procs_data, procs_index): # ------------- # # Loading cuts to apply to covariance matrix indextuples = [] - for group in procs_data: + for group in groups_data_by_process: for ds in group.datasets: # Load cuts for each dataset in the covmat if ds.name in filecovmat.index.get_level_values(1): @@ -534,7 +534,7 @@ def fromfile_covmat(covmatpath, procs_data, procs_index): @table -def user_covmat(procs_data, procs_index, loaded_user_covmat_path): +def user_covmat(groups_data_by_process, procs_index, loaded_user_covmat_path): """ General theory covariance matrix provided by the user. Useful for testing the impact of externally produced @@ -544,7 +544,7 @@ def user_covmat(procs_data, procs_index, loaded_user_covmat_path): ``user_covmat_path`` in ``theorycovmatconfig`` in the runcard. For more information see documentation. """ - return fromfile_covmat(loaded_user_covmat_path, procs_data, procs_index) + return fromfile_covmat(loaded_user_covmat_path, groups_data_by_process, procs_index) @table @@ -633,4 +633,3 @@ def experimentplustheory_corrmat_custom(procs_covmat, theory_covmat_custom): each_dataset_results = collect(results, ("group_dataset_inputs_by_process", "data")) -groups_data_by_process = collect("data", ("group_dataset_inputs_by_process",)) From b805509f3181bce291b122280c0b281c5b53dd27 Mon Sep 17 00:00:00 2001 From: achiefa Date: Mon, 10 Mar 2025 15:21:15 +0000 Subject: [PATCH 57/69] Remove unused collect --- validphys2/src/validphys/commondata.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/validphys2/src/validphys/commondata.py b/validphys2/src/validphys/commondata.py index ed8e9053cb..7c68c0784f 100644 --- a/validphys2/src/validphys/commondata.py +++ b/validphys2/src/validphys/commondata.py @@ -38,7 +38,3 @@ def loaded_commondata_with_cuts(commondata, cuts): groups_dataset_inputs_loaded_cd_with_cuts = collect( "loaded_commondata_with_cuts", ("group_dataset_inputs_by_metadata", "data_input") ) - -groups_dataset_inputs_loaded_cd_with_cuts_byprocess = collect( - "loaded_commondata_with_cuts", ("group_dataset_inputs_by_process", "data") -) From fbf60e36eb3a4c3bd4ff3d31a04875b49a3624ce Mon Sep 17 00:00:00 2001 From: achiefa Date: Mon, 10 Mar 2025 15:38:34 +0000 Subject: [PATCH 58/69] Update basic runcard --- .../examples/Basic_runcard_pc_covmat.yml | 85 +++---------------- validphys2/src/validphys/config.py | 4 +- .../theorycovariance/construction.py | 1 - 3 files changed, 15 insertions(+), 75 deletions(-) diff --git a/n3fit/runcards/examples/Basic_runcard_pc_covmat.yml b/n3fit/runcards/examples/Basic_runcard_pc_covmat.yml index 7daa3d60a6..991e5a7cba 100644 --- a/n3fit/runcards/examples/Basic_runcard_pc_covmat.yml +++ b/n3fit/runcards/examples/Basic_runcard_pc_covmat.yml @@ -8,85 +8,20 @@ description: NNPDF4.0 ht with TCM - DIS (NC & CC) only dataset_inputs: - {dataset: NMC_NC_NOTFIXED_EM-F2, frac: 0.75, variant: legacy_dw} - {dataset: NMC_NC_NOTFIXED_P_EM-SIGMARED, frac: 0.75, variant: legacy} -- {dataset: SLAC_NC_NOTFIXED_P_EM-F2, frac: 0.75, variant: legacy_dw} - {dataset: SLAC_NC_NOTFIXED_D_EM-F2, frac: 0.75, variant: legacy_dw} - {dataset: BCDMS_NC_NOTFIXED_P_EM-F2, frac: 0.75, variant: legacy_dw} -- {dataset: BCDMS_NC_NOTFIXED_D_EM-F2, frac: 0.75, variant: legacy_dw} - {dataset: CHORUS_CC_NOTFIXED_PB_NU-SIGMARED, frac: 0.75, variant: legacy_dw} -- {dataset: CHORUS_CC_NOTFIXED_PB_NB-SIGMARED, frac: 0.75, variant: legacy_dw} -- {dataset: NUTEV_CC_NOTFIXED_FE_NU-SIGMARED, cfac: [MAS], frac: 0.75, variant: legacy_dw} - {dataset: NUTEV_CC_NOTFIXED_FE_NB-SIGMARED, cfac: [MAS], frac: 0.75, variant: legacy_dw} -- {dataset: HERA_NC_318GEV_EM-SIGMARED, frac: 0.75, variant: legacy} -- {dataset: HERA_NC_225GEV_EP-SIGMARED, frac: 0.75, variant: legacy} -- {dataset: HERA_NC_251GEV_EP-SIGMARED, frac: 0.75, variant: legacy} -- {dataset: HERA_NC_300GEV_EP-SIGMARED, frac: 0.75, variant: legacy} -- {dataset: HERA_NC_318GEV_EP-SIGMARED, frac: 0.75, variant: legacy} -- {dataset: HERA_CC_318GEV_EM-SIGMARED, frac: 0.75, variant: legacy} - {dataset: HERA_CC_318GEV_EP-SIGMARED, frac: 0.75, variant: legacy} - {dataset: HERA_NC_318GEV_EAVG_CHARM-SIGMARED, frac: 0.75, variant: legacy} - {dataset: HERA_NC_318GEV_EAVG_BOTTOM-SIGMARED, frac: 0.75, variant: legacy} - {dataset: DYE866_Z0_800GEV_DW_RATIO_PDXSECRATIO, frac: 0.75, variant: legacy} -- {dataset: DYE866_Z0_800GEV_PXSEC, frac: 0.75, variant: legacy} -- {dataset: DYE605_Z0_38P8GEV_DW_PXSEC, frac: 0.75, variant: legacy} -- {dataset: DYE906_Z0_120GEV_DW_PDXSECRATIO, frac: 0.75, cfac: [ACC], variant: legacy} - {dataset: CDF_Z0_1P96TEV_ZRAP, frac: 0.75, variant: legacy} -- {dataset: D0_Z0_1P96TEV_ZRAP, frac: 0.75, variant: legacy} -- {dataset: D0_WPWM_1P96TEV_ASY, frac: 0.75, variant: legacy} -- {dataset: ATLAS_WPWM_7TEV_36PB_ETA, frac: 0.75, variant: legacy} -- {dataset: ATLAS_Z0_7TEV_36PB_ETA, frac: 0.75, variant: legacy} -- {dataset: ATLAS_Z0_7TEV_49FB_HIMASS, frac: 0.75, variant: legacy} -- {dataset: ATLAS_Z0_7TEV_LOMASS_M, frac: 0.75, variant: legacy} -- {dataset: ATLAS_WPWM_7TEV_46FB_CC-ETA, frac: 0.75, variant: legacy} -- {dataset: ATLAS_Z0_7TEV_46FB_CC-Y, frac: 0.75, variant: legacy} -- {dataset: ATLAS_Z0_7TEV_46FB_CF-Y, frac: 0.75, variant: legacy} -- {dataset: ATLAS_Z0_8TEV_HIMASS_M-Y, frac: 0.75, variant: legacy} -- {dataset: ATLAS_Z0_8TEV_LOWMASS_M-Y, frac: 0.75, variant: legacy} -- {dataset: ATLAS_Z0_13TEV_TOT, frac: 0.75, cfac: [NRM], variant: legacy} -- {dataset: ATLAS_WPWM_13TEV_TOT, frac: 0.75, cfac: [NRM], variant: legacy} -- {dataset: ATLAS_WJ_8TEV_WP-PT, frac: 0.75, variant: legacy} -- {dataset: ATLAS_WJ_8TEV_WM-PT, frac: 0.75, variant: legacy} -- {dataset: ATLAS_Z0J_8TEV_PT-M, frac: 0.75, variant: legacy_10} - {dataset: ATLAS_Z0J_8TEV_PT-Y, frac: 0.75, variant: legacy_10} -- {dataset: ATLAS_TTBAR_7TEV_TOT_X-SEC, frac: 0.75, variant: legacy} -- {dataset: ATLAS_TTBAR_8TEV_TOT_X-SEC, frac: 0.75, variant: legacy} -- {dataset: ATLAS_TTBAR_13TEV_TOT_X-SEC, frac: 0.75, variant: legacy} -- {dataset: ATLAS_TTBAR_8TEV_LJ_DIF_YT-NORM, frac: 0.75, variant: legacy} -- {dataset: ATLAS_TTBAR_8TEV_LJ_DIF_YTTBAR-NORM, frac: 0.75, variant: legacy} -- {dataset: ATLAS_TTBAR_8TEV_2L_DIF_YTTBAR-NORM, frac: 0.75, variant: legacy} - {dataset: ATLAS_1JET_8TEV_R06_PTY, frac: 0.75, variant: legacy_decorrelated} - {dataset: ATLAS_2JET_7TEV_R06_M12Y, frac: 0.75, variant: legacy} -- {dataset: ATLAS_PH_13TEV_XSEC, frac: 0.75, cfac: [EWK], variant: legacy} -- {dataset: ATLAS_SINGLETOP_7TEV_TCHANNEL-XSEC, frac: 0.75, variant: legacy} -- {dataset: ATLAS_SINGLETOP_13TEV_TCHANNEL-XSEC, frac: 0.75, variant: legacy} -- {dataset: ATLAS_SINGLETOP_7TEV_T-Y-NORM, frac: 0.75, variant: legacy} -- {dataset: ATLAS_SINGLETOP_7TEV_TBAR-Y-NORM, frac: 0.75, variant: legacy} -- {dataset: ATLAS_SINGLETOP_8TEV_T-RAP-NORM, frac: 0.75, variant: legacy} -- {dataset: ATLAS_SINGLETOP_8TEV_TBAR-RAP-NORM, frac: 0.75, variant: legacy} -- {dataset: CMS_WPWM_7TEV_ELECTRON_ASY, frac: 0.75} -- {dataset: CMS_WPWM_7TEV_MUON_ASY, frac: 0.75, variant: legacy} -- {dataset: CMS_Z0_7TEV_DIMUON_2D, frac: 0.75} -- {dataset: CMS_WPWM_8TEV_MUON_Y, frac: 0.75, variant: legacy} -- {dataset: CMS_Z0J_8TEV_PT-Y, frac: 0.75, cfac: [NRM], variant: legacy_10} - {dataset: CMS_2JET_7TEV_M12Y, frac: 0.75} - {dataset: CMS_1JET_8TEV_PTY, frac: 0.75, variant: legacy} -- {dataset: CMS_TTBAR_7TEV_TOT_X-SEC, frac: 0.75, variant: legacy} -- {dataset: CMS_TTBAR_8TEV_TOT_X-SEC, frac: 0.75, variant: legacy} -- {dataset: CMS_TTBAR_13TEV_TOT_X-SEC, frac: 0.75, variant: legacy} -- {dataset: CMS_TTBAR_8TEV_LJ_DIF_YTTBAR-NORM, frac: 0.75, variant: legacy} -- {dataset: CMS_TTBAR_5TEV_TOT_X-SEC, frac: 0.75, variant: legacy} -- {dataset: CMS_TTBAR_8TEV_2L_DIF_MTTBAR-YT-NORM, frac: 0.75, variant: legacy} -- {dataset: CMS_TTBAR_13TEV_2L_DIF_YT, frac: 0.75, variant: legacy} -- {dataset: CMS_TTBAR_13TEV_LJ_2016_DIF_YTTBAR, frac: 0.75, variant: legacy} -- {dataset: CMS_SINGLETOP_7TEV_TCHANNEL-XSEC, frac: 0.75, variant: legacy} -- {dataset: CMS_SINGLETOP_8TEV_TCHANNEL-XSEC, frac: 0.75, variant: legacy} -- {dataset: CMS_SINGLETOP_13TEV_TCHANNEL-XSEC, frac: 0.75, variant: legacy} -- {dataset: LHCB_Z0_7TEV_DIELECTRON_Y, frac: 0.75} -- {dataset: LHCB_Z0_8TEV_DIELECTRON_Y, frac: 0.75} -- {dataset: LHCB_WPWM_7TEV_MUON_Y, frac: 0.75, cfac: [NRM]} -- {dataset: LHCB_Z0_7TEV_MUON_Y, frac: 0.75, cfac: [NRM]} -- {dataset: LHCB_WPWM_8TEV_MUON_Y, frac: 0.75, cfac: [NRM]} -- {dataset: LHCB_Z0_8TEV_MUON_Y, frac: 0.75, cfac: [NRM]} -- {dataset: LHCB_Z0_13TEV_DIMUON-Y, frac: 0.75} - {dataset: LHCB_Z0_13TEV_DIELECTRON-Y, frac: 0.75} ################################################################################ @@ -103,18 +38,22 @@ theory: theorycovmatconfig: point_prescriptions: ["9 point", "power corrections"] pc_parameters: - - {ht: H2p, yshift: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0], nodes: [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1]} - - {ht: H2d, yshift: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0], nodes: [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1]} - - {ht: HLp, yshift: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0], nodes: [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1]} - - {ht: HLd, yshift: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0], nodes: [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1]} - - {ht: H3p, yshift: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0], nodes: [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1]} - - {ht: H3d, yshift: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0], nodes: [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1]} - pc_included_procs: ["DIS NC", "DIS CC"] + H2p: {yshift: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.0], nodes: [0.0, 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 1]} + H2d: {yshift: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.0], nodes: [0.0, 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 1]} + HLp: {yshift: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.0], nodes: [0.0, 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 1]} + HLd: {yshift: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.0], nodes: [0.0, 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 1]} + H3p: {yshift: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.0], nodes: [0.0, 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 1]} + H3d: {yshift: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.0], nodes: [0.0, 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 1]} + Hj: {yshift: [2.0, 2.0, 2.0, 2.0, 2.0, 2.0], nodes: [0.25, 0.75, 1.25, 1.75, 2.25, 2.75]} + H2j_ATLAS: {yshift: [2.0, 2.0, 2.0, 2.0, 2.0, 2.0], nodes: [0.25, 0.75, 1.25, 1.75, 2.25, 2.75]} + H2j_CMS: {yshift: [2.0, 2.0, 2.0, 2.0, 2.0], nodes: [0.25, 0.75, 1.25, 1.75, 2.25]} + pc_included_procs: ["JETS", "DIJET", "DIS NC", "DIS CC"] pc_excluded_exps: [HERA_NC_318GEV_EAVG_CHARM-SIGMARED, HERA_NC_318GEV_EAVG_BOTTOM-SIGMARED,] pdf: 210619-n3fit-001 use_thcovmat_in_fitting: true use_thcovmat_in_sampling: true +resample_negative_pseudodata: false # For fits <= 4.0 multiplicative and additive uncertainties were sampled separately # and thus the flag `separate_multiplicative` needs to be set to True @@ -136,7 +75,7 @@ parameters: # This defines the parameter dictionary that is passed to the Model clipnorm: 6.073e-6 learning_rate: 2.621e-3 optimizer_name: Nadam - epochs: 17000 + epochs: 3000 positivity: initial: 184.8 multiplier: diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 38dd402573..c6e2f54a9f 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -1914,7 +1914,9 @@ def produce_total_phi_data(self, fitthcovmat): # TODO: to be removed once we are sure the the triangular # function for the prior is the only one of interest def produce_pc_func_type(self, theorycovmatconfig=None): - return theorycovmatconfig.get('func_type', 'step') + if theorycovmatconfig is None: + raise ValueError("theorycovmatconfig is defined in the runcard.") + return theorycovmatconfig.get('func_type', 'linear') @configparser.explicit_node def produce_covs_pt_prescrip(self, point_prescription): diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index cfe729470f..a796d05d73 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -346,7 +346,6 @@ def covs_pt_prescrip_mhou(combine_by_type, point_prescription): running_index = 0 covmats = defaultdict(list) - print(point_prescription) start_proc = defaultdict(list) for name in process_info.preds: size = len(process_info.preds[name][0]) From b06116601896becbf5f26d8c96c08ac38d34a0c2 Mon Sep 17 00:00:00 2001 From: achiefa Date: Mon, 10 Mar 2025 17:52:10 +0000 Subject: [PATCH 59/69] Allow generation of L1 data --- validphys2/src/validphys/config.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index c6e2f54a9f..802c4f5773 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -1883,7 +1883,18 @@ def produce_filter_data( if not fakedata: return validphys.filters.filter_real_data else: - if inconsistent_fakedata: + # TODO we don't want to sample from the theory covmat for L1 data, + # but we do want to use the theory covmat for L2 data + if theorycovmatconfig is not None and theorycovmatconfig.get( + "use_thcovmat_in_fakedata_sampling" + ): + # NOTE: By the time we run theory covmat closure tests, + # hopefully the generation of pseudodata will be done in python. + raise ConfigError( + "Generating L1 closure test data which samples from the theory " + "covariance matrix has not been implemented yet." + ) + elif inconsistent_fakedata: log.info("Using filter for inconsistent closure data") return validphys.filters.filter_inconsistent_closure_data_by_experiment From 65d858889efea8bd9f8137a2a8a17241ba4e5b2f Mon Sep 17 00:00:00 2001 From: achiefa Date: Mon, 14 Apr 2025 09:52:18 +0100 Subject: [PATCH 60/69] Jets with single parameters --- .../higher_twist_functions.py | 31 +++++++++++++++++-- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py index 57befe0f71..99c91cfaa2 100644 --- a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -246,6 +246,11 @@ def jets_pc_func( return PC +def jet_single_par(delta_h: float, pT: npt.ArrayLike, rap: npt.ArrayLike) -> npt.ArrayLike: + ret = [delta_h for _ in range(rap.size)] + return np.array(ret) / pT + + def JET_pc(dataset_sp, pdf, pc_nodes, pT, rap, pc_func_type: str = "step"): """ Returns the function that computes the shift for the ratio for single @@ -271,6 +276,26 @@ def func(y_values): return func +def JET_pc_single_par(dataset_sp, pdf, pc_nodes, pT, rap, pc_func_type: str = "step"): + """ + As JET_pc, but with one single shift for all rapidity bins. + + This function is meant to be for development purposes only. It will either substitute + JET_pc or be deleted in the future.""" + cuts = dataset_sp.cuts + (fkspec,) = dataset_sp.fkspecs + fk = fkspec.load_with_cuts(cuts) + xsec = central_fk_predictions(fk, pdf) + + def func(y_values): + assert y_values.size == 1 + ret = [y_values[0] for _ in range(rap.size)] + ret = np.array(ret) / pT + return np.multiply(ret, xsec.to_numpy()[:, 0]) + + return func + + # TODO Maybe we want to treat the function that parametrizes the PC # as argument? def DIS_F2_pc(pc2_nodes, x, q2, pc_func_type: str = "step"): @@ -1046,7 +1071,7 @@ def average(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): eta = dataset_sp.commondata.metadata.load_kinematics()['y'].to_numpy().reshape(-1)[cuts] pT = dataset_sp.commondata.metadata.load_kinematics()['pT'].to_numpy().reshape(-1)[cuts] - pc_func = JET_pc(dataset_sp, pdf, pc_jet_nodes, pT, eta, pc_func_type) + pc_func = JET_pc_single_par(dataset_sp, pdf, pc_jet_nodes, pT, eta, pc_func_type) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['Hj']) @@ -1064,7 +1089,7 @@ def average(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): .to_numpy() .reshape(-1)[cuts] ) - pc_func = JET_pc(dataset_sp, pdf, pc_jet_nodes, m_jj, eta_star, pc_func_type) + pc_func = JET_pc_single_par(dataset_sp, pdf, pc_jet_nodes, m_jj, eta_star, pc_func_type) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_ATLAS']) @@ -1080,7 +1105,7 @@ def average(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): .to_numpy() .reshape(-1)[cuts] ) - pc_func = JET_pc(dataset_sp, pdf, pc_jet_nodes, m_jj, eta_diff, pc_func_type) + pc_func = JET_pc_single_par(dataset_sp, pdf, pc_jet_nodes, m_jj, eta_diff, pc_func_type) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_CMS']) From 8490ccb1a9a10b584564a13ea33d4cfdbb8dd1b2 Mon Sep 17 00:00:00 2001 From: achiefa Date: Mon, 14 Apr 2025 10:16:24 +0100 Subject: [PATCH 61/69] Adjust format --- n3fit/src/n3fit/scripts/vp_setupfit.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/n3fit/src/n3fit/scripts/vp_setupfit.py b/n3fit/src/n3fit/scripts/vp_setupfit.py index 0f9ee8e502..3da207cf0e 100644 --- a/n3fit/src/n3fit/scripts/vp_setupfit.py +++ b/n3fit/src/n3fit/scripts/vp_setupfit.py @@ -1,8 +1,8 @@ #!/usr/bin/env python """ - setup-fit - prepare and apply data cuts before fit - setup-fit constructs the fit [results] folder where data used by nnfit - will be stored. +setup-fit - prepare and apply data cuts before fit +setup-fit constructs the fit [results] folder where data used by nnfit +will be stored. """ # Implementation notes @@ -191,6 +191,8 @@ def from_yaml(cls, o, *args, **kwargs): # Check positivity bound if file_content.get('positivity_bound') is not None: SETUPFIT_FIXED_CONFIG['actions_'].append('positivity_bound check_unpolarized_bc') + + # Sets default values if they are not present in the runcard for k, v in SETUPFIT_DEFAULTS.items(): file_content.setdefault(k, v) From 77b8cce34458b0dcfc14c0f20619fc7e809661f5 Mon Sep 17 00:00:00 2001 From: achiefa Date: Wed, 23 Apr 2025 16:19:22 +0100 Subject: [PATCH 62/69] Restoring nodes for jets and dijets --- .../validphys/theorycovariance/higher_twist_functions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py index 99c91cfaa2..972576f10d 100644 --- a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -1071,7 +1071,7 @@ def average(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): eta = dataset_sp.commondata.metadata.load_kinematics()['y'].to_numpy().reshape(-1)[cuts] pT = dataset_sp.commondata.metadata.load_kinematics()['pT'].to_numpy().reshape(-1)[cuts] - pc_func = JET_pc_single_par(dataset_sp, pdf, pc_jet_nodes, pT, eta, pc_func_type) + pc_func = JET_pc(dataset_sp, pdf, pc_jet_nodes, pT, eta, pc_func_type) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['Hj']) @@ -1089,7 +1089,7 @@ def average(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): .to_numpy() .reshape(-1)[cuts] ) - pc_func = JET_pc_single_par(dataset_sp, pdf, pc_jet_nodes, m_jj, eta_star, pc_func_type) + pc_func = JET_pc(dataset_sp, pdf, pc_jet_nodes, m_jj, eta_star, pc_func_type) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_ATLAS']) @@ -1105,7 +1105,7 @@ def average(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): .to_numpy() .reshape(-1)[cuts] ) - pc_func = JET_pc_single_par(dataset_sp, pdf, pc_jet_nodes, m_jj, eta_diff, pc_func_type) + pc_func = JET_pc(dataset_sp, pdf, pc_jet_nodes, m_jj, eta_diff, pc_func_type) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_CMS']) From caa8125260a3143426f35b8eeefa1597fb692ef5 Mon Sep 17 00:00:00 2001 From: achiefa Date: Tue, 10 Jun 2025 09:22:21 +0100 Subject: [PATCH 63/69] Implementation of multiplicative shifts --- .../higher_twist_functions.py | 743 +++--------------- 1 file changed, 93 insertions(+), 650 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py index 972576f10d..fee68c6c02 100644 --- a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -12,7 +12,7 @@ computed using a 5-point prescription extended to every parameter used to define the power correction. -This module comprehends a bunch of ``factory`` functions such as `DIS_F2_pc`. Each +This module comprehends a bunch of ``factory`` functions such as `mult_dis_pc`. Each of these functions returns another function that computes the shifts taking as arguments the values of the parameters used to parametrise the power corrections. In other words, these factory functions hard-code the dependence on the kinematic @@ -30,11 +30,6 @@ from validphys.convolution import central_fk_predictions from validphys.core import PDF, DataSetSpec -GEV_CM2_CONV = 3.893793e10 -GF = 1.1663787e-05 # Fermi's constant [GeV^-2] -Mh = 0.938 # Proton's mass in GeV/c^2 -MW = 80.398 # W boson mass in GeV/c^2 - F2P_exps = ['SLAC_NC_NOTFIXED_P_EM-F2', 'BCDMS_NC_NOTFIXED_P_EM-F2'] F2D_exps = ['SLAC_NC_NOTFIXED_D_EM-F2', 'BCDMS_NC_NOTFIXED_D_EM-F2'] NC_SIGMARED_P_EM = ['NMC_NC_NOTFIXED_P_EM-SIGMARED', 'HERA_NC_318GEV_EM-SIGMARED'] @@ -246,147 +241,99 @@ def jets_pc_func( return PC -def jet_single_par(delta_h: float, pT: npt.ArrayLike, rap: npt.ArrayLike) -> npt.ArrayLike: - ret = [delta_h for _ in range(rap.size)] - return np.array(ret) / pT +# def jet_single_par(delta_h: float, pT: npt.ArrayLike, rap: npt.ArrayLike) -> npt.ArrayLike: +# ret = [delta_h for _ in range(rap.size)] +# return np.array(ret) / pT +# def mult_jet_pc_single_par(dataset_sp, pdf, pc_nodes, pT, rap, pc_func_type: str = "step"): +# """ +# As mult_jet_pc, but with one single shift for all rapidity bins. -def JET_pc(dataset_sp, pdf, pc_nodes, pT, rap, pc_func_type: str = "step"): - """ - Returns the function that computes the shift for the ratio for single - jet cross sections. In particular, the shift is computed such that +# This function is meant to be for development purposes only. It will either substitute +# mult_jet_pc or be deleted in the future.""" +# cuts = dataset_sp.cuts +# (fkspec,) = dataset_sp.fkspecs +# fk = fkspec.load_with_cuts(cuts) +# xsec = central_fk_predictions(fk, pdf) - xsec -> xsec * ( 1 + PC ), +# def func(y_values): +# assert y_values.size == 1 +# ret = [y_values[0] for _ in range(rap.size)] +# ret = np.array(ret) / pT +# return np.multiply(ret, xsec.to_numpy()[:, 0]) - and the shift is defined as +# return func - Delta(xsec) = (xsec + xsec) - xsec = PC. - The power correction is a function of the rapidity. +def mult_dis_pc(nodes, x, q2, dataset_sp, pdf, pc_func_type: str = "step"): """ - cuts = dataset_sp.cuts - (fkspec,) = dataset_sp.fkspecs - fk = fkspec.load_with_cuts(cuts) - xsec = central_fk_predictions(fk, pdf) + Returns the function that computes the shift to observables due to + power corrections. Power corrections are treated as multiplicative + shifts. Hence if `O` is the observable, the prediction is shifted as - def func(y_values): - result = jets_pc_func(y_values, pc_nodes, pT, rap, pc_func_type) - return np.multiply(result, xsec.to_numpy()[:, 0]) + O -> O * (1 + PC), - return func + and the shift is defined as + Delta(O) = O * (1 + PC) - O = O * PC. -def JET_pc_single_par(dataset_sp, pdf, pc_nodes, pT, rap, pc_func_type: str = "step"): + This function returns a function that computes the shift + given the y-values of the nodes used to define the power corrections. + The interpolation between the nodes is specified by `pc_func_type`. """ - As JET_pc, but with one single shift for all rapidity bins. - - This function is meant to be for development purposes only. It will either substitute - JET_pc or be deleted in the future.""" cuts = dataset_sp.cuts (fkspec,) = dataset_sp.fkspecs fk = fkspec.load_with_cuts(cuts) - xsec = central_fk_predictions(fk, pdf) + th_preds = central_fk_predictions(fk, pdf) def func(y_values): - assert y_values.size == 1 - ret = [y_values[0] for _ in range(rap.size)] - ret = np.array(ret) / pT - return np.multiply(ret, xsec.to_numpy()[:, 0]) + result = dis_pc_func(y_values, nodes, x, q2, pc_func_type) + return np.multiply(result, th_preds.to_numpy()[:, 0]) return func -# TODO Maybe we want to treat the function that parametrizes the PC -# as argument? -def DIS_F2_pc(pc2_nodes, x, q2, pc_func_type: str = "step"): +def mult_dis_ratio_pc(p_nodes, d_nodes, x, q2, dataset_sp, pdf, pc_func_type: str = "step"): """ Returns the function that computes the shift for the ratio of structure functions F2_d / F2_p. For this observable, power corrections are defined such that - F2 -> F2 + PC2, - - and the shift is defined as - - Delta(F2) = (F2 + PC2) - F2 = PC2. - - Note that, as in the case of `DIS_F2R_ht`, the shift still depends on the set - of parameters needed to define the parameterization of PC2. Also, this function - can be used to compute the shift for both proton and deuteron, provided that the - correct list of parameters is passed to the **curried** function. - - The function used to parametrize the the power correction is `dis_pc_func` and - it is hard coded. - - Parameters - ---------- - - """ - - def PC_2(y_values): - result = dis_pc_func(y_values, pc2_nodes, x, q2, pc_func_type) - return result - - return PC_2 - - -def DIS_F2R_pc(experiment, pdf, pc_2_p_nodes, pc_2_d_nodes, x, q2, pc_func_type: str = "step"): - """ - Returns the function that computes the shift for the ratio of structure - functions F2_d / F2_p. For this observable, power corrections are defined - such that - - F2_d / F2_p -> (F2_d + PC2_d) / (F2_p + PC2_p) , + F2_d / F2_p -> F2_d * (1 + PC2_d) / F2_p * (1 + PC2_p) , and the shift is the defined as - Delta(F2 ratio) = (F2_d + PC2_d) / (F2_p + PC2_p) - F2_d / F2_p . - - The shift is computed for a given set of kinematic variables specified - by the paris (x,Q2), but it still depends on the set of parameters need by - the power correction terms PC2_d and PC2_p. + Delta(F2 ratio) = F2_d / F2_p * (PC2_d - PC2_p) / (1 + PC2_d). - Note that this function does **not** return the power corrections for the - given kinematic points, but rather it returns another function where the - kinematic dependence has been **curried** (i.e. hard coded). This new function - takes as arguments the y-values of the nodes used to compute PC2_d and PC2_p - (see `delta_h` in `dis_pc_func`). Note that these y-values are not necessarily - the values listed in the runcard, as we can apply different point prescription. - For instance, we may want to pass in a set of y-values where the nodes are shifted - one at the time, leaving the others zero. The prescription is thus handled separately. - The returning function allows thus to compute Delta(F2 ratio)({...}_d, {...}_p), where - `{...}_d` and `{...}_p` are the sets of y-values for the parametrisation for the proton - and deuteron terms respectively. - - The function used to parametrize the the power correction is `dis_pc_func` and - it is hard coded. + As for `mult_dis_pc`, this function returns a function that computes the shift + for the ratio of structure functions F2_d / F2_p given a set of y-values. Parameters ---------- - experiment: DataSetSpec - An instance of DataSetSpec used to extract information such as cuts - and fk tables. - pdf: PDF - An instance of the class PDF. This specifies the PDF to bo convoluted - with the FK tables. - pc_2_p_nodes: list[float] + p_nodes: list[float] The list of nodes in x-Bjorken used to define the parametrization of the power correction for the proton (see `dis_pc_func`). - pc_2_d_nodes: list[float] + d_nodes: list[float] The list of nodes in x-Bjorken used to define the parametrization of the power correction for the deuteron (see `dis_pc_func`). x: list[float] Set of points in x-Bjorken where the power corrections will be evaluated. q2: list[float] Set of points in Q2 where the power corrections will be evaluated. + dataset_sp: DataSetSpec + An instance of DataSetSpec used to extract information such as cuts + and fk tables. + pdf: PDF + An instance of the class PDF. This specifies the PDF to bo convoluted + with the FK tables. Returns ------- The function the computes the shift for this observable. It depends on the y-values for the parameterization of P2_d and P2_p. """ - cuts = experiment.cuts - fkspec_F2D, fkspec_F2P = experiment.fkspecs + cuts = dataset_sp.cuts + fkspec_F2D, fkspec_F2P = dataset_sp.fkspecs fk_F2D = fkspec_F2D.load_with_cuts(cuts) fk_F2P = fkspec_F2P.load_with_cuts(cuts) F2D = central_fk_predictions(fk_F2D, pdf) @@ -396,421 +343,39 @@ def DIS_F2R_pc(experiment, pdf, pc_2_p_nodes, pc_2_d_nodes, x, q2, pc_func_type: F2P = np.concatenate(F2P.values) F2_ratio = operator.truediv(F2D, F2P) - def func(y_values_d, y_values_p): - PC_d = dis_pc_func(y_values_d, pc_2_d_nodes, x, q2, pc_func_type) - PC_p = dis_pc_func(y_values_p, pc_2_p_nodes, x, q2, pc_func_type) - num = np.sum([F2D, PC_d], axis=0) - denom = np.sum([F2P, PC_p], axis=0) - result = np.array(operator.truediv(num, denom) - F2_ratio) - return result - - return func - - -def DIS_F2C_pc(pc2_p_nodes, pc2_d_nodes, x, q2, pc_func_type: str = "step"): - """ - Builds the function used to compute the shifts for the charm structure - function measured by EMC. The process involved is - - mu^+ + Fe -> mu+^ + c cbar + X . - - This function works exactly as the previous functions used to compute - nuisance shifts. In this case, the constructed function (`func` below) - requires two lists of parameters for the proton and the deuteron - contribution. The reason being that in this process the muon scatters off an - iron target, and the power correction contribution is a mixture of proton - and deuteron nucleons. Hence, proton and deuteron contribution are weighted - by the appropriate atomic factor. - - Note that we are parametrising power corrections as proton and deuteron - targets. If we were to parametrize such contributions using, say, proton and - nucleon, than the weights would change. - - - Nuclear target - -------------- - The power corrections for nuclear observables, like in this case, are - affected by the pc contribution of the protons and that of the neutrons. If - we allow for the non-isoscalarity of the target, and combine the two - contributions in accordance with the atomic and mass number (A and Z - respectively), the power correction for the nuclear target can be written as - (see eq.(4.2.5) in - https://nnpdf.mi.infn.it/wp-content/uploads/2021/09/thesis_master_RP.pdf) - - PC_N = 1/A (Z * PC_p + (A-Z) * PC_n) . - - The deuteron is obtained using the isoscalarity, namely - - PC_d = 1/2 (PC_p + PC_n) . - - Since we parametrise the power corrections of the proton and the deuteron, - we can combine the above equations and write - - PC_N = 1/A * ( PC_p * (2Z - A) + 2 * PC_d * (A - Z) ) - - Parameters - ---------- - pc2_p_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for the proton (see `dis_pc_func`). - pc2_d_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for the deuteron (see `dis_pc_func`). - x: list[float] - Set of points in x-Bjorken where the power corrections will be evaluated. - q2: list[float] - Set of points in Q2 where the power corrections will be evaluated. - - Returns - ------- - The function the computes the shift for this observable. It depends on the - y-values for the parameterization of P2_d and P2_p. - """ - # Iron target - Z = 23.403 - A = 49.618 - - def func(y_values_d, y_values_p): - PC2_d = dis_pc_func(y_values_d, pc2_d_nodes, x, q2, pc_func_type) - PC2_p = dis_pc_func(y_values_p, pc2_p_nodes, x, q2, pc_func_type) - result = (2 * Z - A) / A * PC2_p + 2 * (A - Z) / A * PC2_d - return result - - return func - - -def DIS_NC_XSEC_pc(pc2_nodes, pcL_nodes, pc3_nodes, lepton, x, q2, y, pc_func_type: str = "step"): - """ - Builds the function used to compute the shifts for the DIS NC x-secs - delivered by HERA and NMC. The x-sec is reconstructed as calculated - in Yadism (see https://yadism.readthedocs.io/en/latest/theory/intro.html). - In particular, the x-sec is a linear combination of the structure functions - F_2, F_L, and F_3. The coefficients are also computed appropriately (see - link). The contribution of the power corrections is then - - Delta(x-sec) = x-sec_w_pc - x-sec_wo_pc = PC_2 + N_L * PC_L + N_3 * PC_3 - - where PC_i are the power corrections relative to the respective structure - functions and the N_i the respective coefficients (as defined in Yadism). - - This function works exactly as the previous functions used to - compute nuisance shifts. In addition, it requires the kinematic - invariant `y` to build the shift-function. - - Note that this function can be used for both proton and deuteron targets, - provided that the appropriate lists of nodes is given. - - Parameters - ---------- - pc2_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_2. - pcL_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_L. - pc3_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_3. - lepton: int - Whether the scattering particle is a lepton (0) or an anti-lepton(1). - x: list[float] - Set of points in x-Bjorken where the power corrections will be evaluated. - q2: list[float] - Set of points in Q2 where the power corrections will be evaluated. - y: list[float] - Set of points in y where the power corrections will be evaluated. - - Returns - ------- - The function the computes the shift for this observable. It depends on the - y-values for the parameterization of P2 and PL and P3. - """ - yp = 1 + np.power(1 - y, 2) - ym = 1 - np.power(1 - y, 2) - yL = np.power(y, 2) - N_L = -yL / yp # Coefficient for F_L - N_3 = np.power(-1, lepton) * ym / yp # Coefficient for F_3 - - def func(y_values_pc2, y_values_pcL, y_values_pc3): - PC_2 = dis_pc_func(y_values_pc2, pc2_nodes, x, q2, pc_func_type) - PC_L = dis_pc_func(y_values_pcL, pcL_nodes, x, q2, pc_func_type) - PC_3 = dis_pc_func(y_values_pc3, pc3_nodes, x, q2, pc_func_type) - result = PC_2 + N_L * PC_L + N_3 * PC_3 + def func(y_values_p, y_values_d): + h2d = dis_pc_func(y_values_d, d_nodes, x, q2, pc_func_type) + h2p = dis_pc_func(y_values_p, p_nodes, x, q2, pc_func_type) + num = np.sum([h2d, -h2p], axis=0) + denom = np.sum([np.ones_like(h2p), h2p], axis=0) + result = np.multiply(operator.truediv(num, denom), F2_ratio) return result return func - -def DIS_CC_HERA_XSEC_pc( - pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, lepton, x, q2, y, pc_func_type: str = "step" -): +def mult_jet_pc(nodes, pT, rap, dataset_sp, pdf, pc_func_type: str = "step"): """ - Builds the function used to compute the shifts for the DIS CC x-secs - delivered by HERA. The x-sec is reconstructed as calculated - in Yadism (see https://yadism.readthedocs.io/en/latest/theory/intro.html). - In particular, the x-sec is a linear combination of the structure functions - F_2, F_L, and F_3. The coefficients are also computed appropriately (see - link). The contribution of the power corrections is then - - Delta(x-sec) = x-sec_w_pc - x-sec_wo_pc = N * (PC_2 + N_L * PC_L + N_3 * PC_3) - - where PC_i are the power corrections relative to the respective structure - functions and the N_i the respective coefficients (as defined in Yadism). - N is the overall normalization factor. + As `mult_dis_pc`, but for jet data. The power corrections are defined as - For the HERA_CC_318GEV dataset, the target is always a proton. However, the - lepton may be either the electron (0) or the positron (1). This information - is needed in order to compute the coefficient N_3. - - Parameters - ---------- - pc2_p_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_2. - pcL_p_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_L. - pc3_p_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_3. - lepton: int - Whether the scattering particle is a lepton (0) or an anti-lepton(1). - x: list[float] - Set of points in x-Bjorken where the power corrections will be evaluated. - q2: list[float] - Set of points in Q2 where the power corrections will be evaluated. - y: list[float] - Set of points in y where the power corrections will be evaluated. - - Returns - ------- - The function the computes the shift for this observable. It depends on the - y-values for the parameterization of P2 and PL and P3. - """ - yp = 1 + np.power(1 - y, 2) - ym = 1 - np.power(1 - y, 2) - yL = np.power(y, 2) - N = 1 / 4 * yp # Overall normalization - N_L = -yL / yp # Coefficient for F_L - N_3 = np.power(-1, lepton) * ym / yp # Coefficient for F_3 - - def func(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): - # Initialize power corrections for each structure function - PC2_p = dis_pc_func(y_values_pc2_p, pc2_p_nodes, x, q2, pc_func_type) - PCL_p = dis_pc_func(y_values_pcL_p, pcL_p_nodes, x, q2, pc_func_type) - PC3_p = dis_pc_func(y_values_pc3_p, pc3_p_nodes, x, q2, pc_func_type) - - # Build the contribution to the x-sec of the power corrections - result = N * (PC2_p + N_L * PCL_p + N_3 * PC3_p) - return result - - return func - - -def DIS_CC_NUTEV_pc( - pc2_p_nodes, - pcL_p_nodes, - pc3_p_nodes, - pc2_d_nodes, - pcL_d_nodes, - pc3_d_nodes, - lepton, - x, - q2, - y, - pc_func_type: str = "step", -): - """ - Builds the function used to compute the shifts for the DIS CC x-secs - delivered by NuTeV. The x-sec is reconstructed as calculated - in Yadism (see https://yadism.readthedocs.io/en/latest/theory/intro.html). - In particular, the x-sec is a linear combination of the structure functions - F_2, F_L, and F_3. The coefficients are also computed appropriately (see - link). Note that this experiment uses iron targets, and thus the coefficients - must take into account the nuclear mixture of porton and deuteron. The contribution - of the power corrections is then - - Delta(x-sec) = x-sec_w_pc - x-sec_wo_pc = N * (PC_2 + N_L * PC_L + N_3 * PC_3) - - where PC_i are the power corrections relative to the respective structure - functions (nuclear mixture implicit) and the N_i the respective coefficients (as defined in Yadism). - N is the overall normalization factor. - - For the NuTeV CC dataset, the target is always iron. However, the - lepton may be either the electron (0) or the positron (1). This - information is needed in order to compute the coefficient N_3. - - Nuclear target - -------------- - See `DIS_F2C_pc`. - - Parameters - ---------- - pc2_p_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_2 of the proton. - pcL_p_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_L of the proton. - pc3_p_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_3 of the proton. - pc2_p_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_2 of the deuteron. - pcL_p_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_L of the deuteron. - pc3_p_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_3 of the deuteron. - lepton: int - Whether the scattering particle is a lepton (0) or an anti-lepton(1). - x: list[float] - Set of points in x-Bjorken where the power corrections will be evaluated. - q2: list[float] - Set of points in Q2 where the power corrections will be evaluated. - y: list[float] - Set of points in y where the power corrections will be evaluated. - - Returns - ------- - The function the computes the shift for this observable. It depends on the - y-values for the parameterization of P2 and PL and P3 for proton and deuteron. - """ - # Iron target - Z = 23.403 - A = 49.618 - yp = 1 + np.power(1 - y, 2) - 2 * np.power(x * y * Mh, 2) / q2 - ym = 1 - np.power(1 - y, 2) - yL = np.power(y, 2) - N_L = -yL / yp # Coefficient for F_L - N_3 = np.power(-1, lepton) * ym / yp # Coefficient for F_3 - - MW2 = np.power(MW, 2) - # Overall coefficient - # TODO: cross-check - N = 100 / 2 / np.power(1 + q2 / MW2, 2) * yp - - def func( - y_values_pc2_p, - y_values_pcL_p, - y_values_pc3_p, - y_values_pc2_d, - y_values_pcL_d, - y_values_pc3_d, - ): - PC2_p = dis_pc_func(y_values_pc2_p, pc2_p_nodes, x, q2, pc_func_type) - PCL_p = dis_pc_func(y_values_pcL_p, pcL_p_nodes, x, q2, pc_func_type) - PC3_p = dis_pc_func(y_values_pc3_p, pc3_p_nodes, x, q2, pc_func_type) - PC2_d = dis_pc_func(y_values_pc2_d, pc2_d_nodes, x, q2, pc_func_type) - PCL_d = dis_pc_func(y_values_pcL_d, pcL_d_nodes, x, q2, pc_func_type) - PC3_d = dis_pc_func(y_values_pc3_d, pc3_d_nodes, x, q2, pc_func_type) - tmp_2 = (2 * Z - A) / A * PC2_p + 2 * (A - Z) / A * PC2_d - tmp_L = (2 * Z - A) / A * PCL_p + 2 * (A - Z) / A * PCL_d - tmp_3 = (2 * Z - A) / A * PC3_p + 2 * (A - Z) / A * PC3_d - result = N * (tmp_2 + N_L * tmp_L + N_3 * tmp_3) - return result + xsec -> xsec * ( 1 + PC ), - return func + and the shift is defined as + Delta(xsec) = (xsec + xsec) - xsec = PC. -# TODO This is function is really similar to the one -# defined for NUTEV CC. Can we reduce code repetitions? -def DIS_CC_CHORUS_pc( - pc2_p_nodes, - pcL_p_nodes, - pc3_p_nodes, - pc2_d_nodes, - pcL_d_nodes, - pc3_d_nodes, - lepton, - x, - q2, - y, - pc_func_type: str = "step", -): + The power correction is a function of the rapidity. """ - Same as DIS_CC_NUTEV_pc, but for CHORUS CC. - - Note that the difference here is in the definition of the overall - normalization N. - - Nuclear target - -------------- - See `DIS_F2C_pc`. - - Parameters - ---------- - pc2_p_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_2 of the proton. - pcL_p_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_L of the proton. - pc3_p_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_3 of the proton. - pc2_p_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_2 of the deuteron. - pcL_p_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_L of the deuteron. - pc3_p_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_3 of the deuteron. - lepton: int - Whether the scattering particle is a lepton (0) or an anti-lepton(1). - x: list[float] - Set of points in x-Bjorken where the power corrections will be evaluated. - q2: list[float] - Set of points in Q2 where the power corrections will be evaluated. - y: list[float] - Set of points in y where the power corrections will be evaluated. + cuts = dataset_sp.cuts + (fkspec,) = dataset_sp.fkspecs + fk = fkspec.load_with_cuts(cuts) + xsec = central_fk_predictions(fk, pdf) - Returns - ------- - The function the computes the shift for this observable. It depends on the - y-values for the parameterization of P2 and PL and P3 for proton and deuteron. - """ - # Lead target - A = 208.0 - Z = 82 - yp = 1 + np.power(1 - y, 2) - 2 * np.power(x * y * Mh, 2) / q2 - ym = 1 - np.power(1 - y, 2) - yL = np.power(y, 2) - N_L = -yL / yp # Coefficient for F_L - N_3 = np.power(-1, lepton) * ym / yp # Coefficient for F_3 - - MW2 = np.power(MW, 2) - # Overall coefficient - # TODO: cross-check - N = GEV_CM2_CONV * (GF**2) * Mh / (2 * np.pi * np.power(1 + q2 / MW2, 2)) * yp - - def func( - y_values_pc2_p, - y_values_pcL_p, - y_values_pc3_p, - y_values_pc2_d, - y_values_pcL_d, - y_values_pc3_d, - ): - PC2_p = dis_pc_func(y_values_pc2_p, pc2_p_nodes, x, q2, pc_func_type) - PCL_p = dis_pc_func(y_values_pcL_p, pcL_p_nodes, x, q2, pc_func_type) - PC3_p = dis_pc_func(y_values_pc3_p, pc3_p_nodes, x, q2, pc_func_type) - PC2_d = dis_pc_func(y_values_pc2_d, pc2_d_nodes, x, q2, pc_func_type) - PCL_d = dis_pc_func(y_values_pcL_d, pcL_d_nodes, x, q2, pc_func_type) - PC3_d = dis_pc_func(y_values_pc3_d, pc3_d_nodes, x, q2, pc_func_type) - tmp_2 = (2 * Z - A) / A * PC2_p + 2 * (A - Z) / A * PC2_d - tmp_L = (2 * Z - A) / A * PCL_p + 2 * (A - Z) / A * PCL_d - tmp_3 = (2 * Z - A) / A * PC3_p + 2 * (A - Z) / A * PC3_d - result = N * (tmp_2 + N_L * tmp_L + N_3 * tmp_3) - return result + def func(y_values): + result = jets_pc_func(y_values, nodes, pT, rap, pc_func_type) + return np.multiply(result, xsec.to_numpy()[:, 0]) return func - def construct_pars_combs(parameters_dict): """Construct the combination of parameters (the ones that parametrize the power corrections) used to compute the shifts. @@ -851,7 +416,6 @@ def construct_pars_combs(parameters_dict): return combinations - # TODO `pc_func_type` will be removed def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_type: str): """ @@ -881,185 +445,64 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_ pc_func = None if process_type.startswith('DIS'): - pc2_p_nodes = pc_dict["H2p"]['nodes'] - pcL_p_nodes = pc_dict["HLp"]['nodes'] - pc3_p_nodes = pc_dict["H3p"]['nodes'] - pc2_d_nodes = pc_dict["H2d"]['nodes'] - pcL_d_nodes = pc_dict["HLd"]['nodes'] - pc3_d_nodes = pc_dict["H3d"]['nodes'] + f2_p_nodes = pc_dict["H2p"]['nodes'] + f2_d_nodes = pc_dict["H2p"]['nodes'] + hera_nc_xsec_nodes = pc_dict["xsec_nc"]['nodes'] + hera_cc_xsec_nodes = pc_dict["hera_cc"]['nodes'] + chorus_cc_xsec_nodes = pc_dict["chorus_cc"]['nodes'] + nutev_cc_xsec_nodes = pc_dict["nutev_cc"]['nodes'] + x = dataset_sp.commondata.metadata.load_kinematics()['x'].to_numpy().reshape(-1)[cuts] q2 = dataset_sp.commondata.metadata.load_kinematics()['Q2'].to_numpy().reshape(-1)[cuts] - y = dataset_sp.commondata.metadata.load_kinematics()['y'].to_numpy().reshape(-1)[cuts] # F2 ratio if exp_name == "NMC_NC_NOTFIXED_EM-F2": - pc_func = DIS_F2R_pc(dataset_sp, pdf, pc2_p_nodes, pc2_d_nodes, x, q2, pc_func_type) + pc_func_ratio = mult_dis_ratio_pc(f2_p_nodes, f2_d_nodes, x, q2, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2p'], pars_pc['comb']['H2d']) + deltas[pars_pc['label']] = pc_func_ratio(pars_pc['comb']['H2p'], pars_pc['comb']['H2d']) # F2 proton traget elif exp_name in F2P_exps: - pc_func = DIS_F2_pc(pc2_p_nodes, x, q2, pc_func_type) + pc_func = mult_dis_pc(f2_p_nodes, x, q2, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2p']) # F2 deuteron traget elif exp_name in F2D_exps: - pc_func = DIS_F2_pc(pc2_d_nodes, x, q2, pc_func_type) + pc_func = mult_dis_pc(f2_d_nodes, x, q2, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2d']) # EMC elif exp_name.startswith('EMC_NC_250GEV'): - pc_func = DIS_F2C_pc(pc2_p_nodes, pc2_d_nodes, x, q2, pc_func_type) - for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2p'], pars_pc['comb']['H2d']) + raise NotImplementedError( + f"The {process_type} observable for {exp_name} " + "has not been implemented." + ) - # HERA and NMC SIGMARED NC + # HERA NC xsec elif exp_name in np.concatenate([NC_SIGMARED_P_EM, NC_SIGMARED_P_EP, NC_SIGMARED_P_EAVG]): - # Electron - if exp_name in NC_SIGMARED_P_EM: - pc_func = DIS_NC_XSEC_pc( - pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 0, x, q2, y, pc_func_type - ) - # Positron - elif exp_name in NC_SIGMARED_P_EP: - pc_func = DIS_NC_XSEC_pc( - pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 1, x, q2, y, pc_func_type - ) - # Average positron and electron - # TODO - # Check if this is correct (ach) - elif NC_SIGMARED_P_EAVG: - - def average(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): - electron = DIS_NC_XSEC_pc( - pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 0, x, q2, y, pc_func_type - ) - positron = DIS_NC_XSEC_pc( - pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 1, x, q2, y, pc_func_type - ) - result = electron(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p) + positron( - y_values_pc2_p, y_values_pcL_p, y_values_pc3_p - ) - return result / 2 - - pc_func = average - else: - raise ValueError(f"{exp_name} not implemented.") - + pc_func = mult_dis_pc(hera_nc_xsec_nodes, x, q2, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func( - pars_pc['comb']['H2p'], pars_pc['comb']['HLp'], pars_pc['comb']['H3p'] - ) + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['xsec_nc']) # CHORUS elif exp_name.startswith('CHORUS_CC'): - # Nu - if exp_name == 'CHORUS_CC_NOTFIXED_PB_NU-SIGMARED': - pc_func = DIS_CC_CHORUS_pc( - pc2_p_nodes, - pcL_p_nodes, - pc3_p_nodes, - pc2_d_nodes, - pcL_d_nodes, - pc3_d_nodes, - 0, - x, - q2, - y, - pc_func_type, - ) - # Nu bar - elif exp_name == 'CHORUS_CC_NOTFIXED_PB_NB-SIGMARED': - pc_func = DIS_CC_CHORUS_pc( - pc2_p_nodes, - pcL_p_nodes, - pc3_p_nodes, - pc2_d_nodes, - pcL_d_nodes, - pc3_d_nodes, - 1, - x, - q2, - y, - pc_func_type, - ) - else: - raise ValueError(f"{exp_name} not implemented.") + pc_func = mult_dis_pc(chorus_cc_xsec_nodes, x, q2, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func( - pars_pc['comb']['H2p'], - pars_pc['comb']['HLp'], - pars_pc['comb']['H3p'], - pars_pc['comb']['H2d'], - pars_pc['comb']['HLd'], - pars_pc['comb']['H3d'], - ) + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['chorus_cc']) # NuTeV elif exp_name.startswith('NUTEV_CC'): - # Nu - if exp_name == 'NUTEV_CC_NOTFIXED_FE_NU-SIGMARED': - pc_func = DIS_CC_NUTEV_pc( - pc2_p_nodes, - pcL_p_nodes, - pc3_p_nodes, - pc2_d_nodes, - pcL_d_nodes, - pc3_d_nodes, - 0, - x, - q2, - y, - pc_func_type, - ) - # Nu bar - elif exp_name == 'NUTEV_CC_NOTFIXED_FE_NB-SIGMARED': - pc_func = DIS_CC_NUTEV_pc( - pc2_p_nodes, - pcL_p_nodes, - pc3_p_nodes, - pc2_d_nodes, - pcL_d_nodes, - pc3_d_nodes, - 1, - x, - q2, - y, - pc_func_type, - ) - else: - raise ValueError(f"{exp_name} not implemented.") + pc_func = mult_dis_pc(nutev_cc_xsec_nodes, x, q2, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func( - pars_pc['comb']['H2p'], - pars_pc['comb']['HLp'], - pars_pc['comb']['H3p'], - pars_pc['comb']['H2d'], - pars_pc['comb']['HLd'], - pars_pc['comb']['H3d'], - ) + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['nutev_cc']) # HERA_CC elif exp_name.startswith('HERA_CC'): - # electron - if exp_name == 'HERA_CC_318GEV_EM-SIGMARED': - pc_func = DIS_CC_HERA_XSEC_pc( - pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 0, x, q2, y, pc_func_type - ) - # positron - elif exp_name == 'HERA_CC_318GEV_EP-SIGMARED': - pc_func = DIS_CC_HERA_XSEC_pc( - pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 1, x, q2, y, pc_func_type - ) - else: - raise ValueError(f"{exp_name} not implemented.") - + pc_func = mult_dis_pc(hera_cc_xsec_nodes, x, q2, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func( - pars_pc['comb']['H2p'], pars_pc['comb']['HLp'], pars_pc['comb']['H3p'] - ) + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['hera_cc']) else: raise ValueError( @@ -1071,7 +514,7 @@ def average(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): eta = dataset_sp.commondata.metadata.load_kinematics()['y'].to_numpy().reshape(-1)[cuts] pT = dataset_sp.commondata.metadata.load_kinematics()['pT'].to_numpy().reshape(-1)[cuts] - pc_func = JET_pc(dataset_sp, pdf, pc_jet_nodes, pT, eta, pc_func_type) + pc_func = mult_jet_pc(pc_jet_nodes, pT, eta, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['Hj']) @@ -1089,7 +532,7 @@ def average(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): .to_numpy() .reshape(-1)[cuts] ) - pc_func = JET_pc(dataset_sp, pdf, pc_jet_nodes, m_jj, eta_star, pc_func_type) + pc_func = mult_jet_pc(pc_jet_nodes, m_jj, eta_star, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_ATLAS']) @@ -1105,7 +548,7 @@ def average(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): .to_numpy() .reshape(-1)[cuts] ) - pc_func = JET_pc(dataset_sp, pdf, pc_jet_nodes, m_jj, eta_diff, pc_func_type) + pc_func = mult_jet_pc(pc_jet_nodes, m_jj, eta_diff, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_CMS']) From aef98bc53e26fd6210da1f1d76f0866b27fea64c Mon Sep 17 00:00:00 2001 From: achiefa Date: Tue, 17 Jun 2025 09:16:48 +0100 Subject: [PATCH 64/69] Less functions for PCs --- .../higher_twist_functions.py | 31 +++++++++---------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py index fee68c6c02..b94590f3de 100644 --- a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -445,12 +445,9 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_ pc_func = None if process_type.startswith('DIS'): - f2_p_nodes = pc_dict["H2p"]['nodes'] - f2_d_nodes = pc_dict["H2p"]['nodes'] - hera_nc_xsec_nodes = pc_dict["xsec_nc"]['nodes'] - hera_cc_xsec_nodes = pc_dict["hera_cc"]['nodes'] - chorus_cc_xsec_nodes = pc_dict["chorus_cc"]['nodes'] - nutev_cc_xsec_nodes = pc_dict["nutev_cc"]['nodes'] + f2_p_nodes = pc_dict["f2p"]['nodes'] + f2_d_nodes = pc_dict["f2d"]['nodes'] + dis_cc_nodes = pc_dict["dis_cc"]['nodes'] x = dataset_sp.commondata.metadata.load_kinematics()['x'].to_numpy().reshape(-1)[cuts] q2 = dataset_sp.commondata.metadata.load_kinematics()['Q2'].to_numpy().reshape(-1)[cuts] @@ -459,19 +456,19 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_ if exp_name == "NMC_NC_NOTFIXED_EM-F2": pc_func_ratio = mult_dis_ratio_pc(f2_p_nodes, f2_d_nodes, x, q2, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func_ratio(pars_pc['comb']['H2p'], pars_pc['comb']['H2d']) + deltas[pars_pc['label']] = pc_func_ratio(pars_pc['comb']['f2p'], pars_pc['comb']['f2d']) # F2 proton traget elif exp_name in F2P_exps: pc_func = mult_dis_pc(f2_p_nodes, x, q2, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2p']) + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['f2p']) # F2 deuteron traget elif exp_name in F2D_exps: pc_func = mult_dis_pc(f2_d_nodes, x, q2, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2d']) + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['f2d']) # EMC elif exp_name.startswith('EMC_NC_250GEV'): @@ -482,27 +479,27 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_ # HERA NC xsec elif exp_name in np.concatenate([NC_SIGMARED_P_EM, NC_SIGMARED_P_EP, NC_SIGMARED_P_EAVG]): - pc_func = mult_dis_pc(hera_nc_xsec_nodes, x, q2, dataset_sp, pdf, pc_func_type) + pc_func = mult_dis_pc(f2_p_nodes, x, q2, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func(pars_pc['comb']['xsec_nc']) + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['f2p']) # CHORUS elif exp_name.startswith('CHORUS_CC'): - pc_func = mult_dis_pc(chorus_cc_xsec_nodes, x, q2, dataset_sp, pdf, pc_func_type) + pc_func = mult_dis_pc(dis_cc_nodes, x, q2, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func(pars_pc['comb']['chorus_cc']) + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['dis_cc']) # NuTeV elif exp_name.startswith('NUTEV_CC'): - pc_func = mult_dis_pc(nutev_cc_xsec_nodes, x, q2, dataset_sp, pdf, pc_func_type) + pc_func = mult_dis_pc(dis_cc_nodes , x, q2, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func(pars_pc['comb']['nutev_cc']) + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['dis_cc']) # HERA_CC elif exp_name.startswith('HERA_CC'): - pc_func = mult_dis_pc(hera_cc_xsec_nodes, x, q2, dataset_sp, pdf, pc_func_type) + pc_func = mult_dis_pc(dis_cc_nodes, x, q2, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func(pars_pc['comb']['hera_cc']) + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['dis_cc']) else: raise ValueError( From cf277efeecd5460ada024a9ecfdeb003dfbdd13b Mon Sep 17 00:00:00 2001 From: achiefa Date: Tue, 1 Jul 2025 15:40:18 +0100 Subject: [PATCH 65/69] Combined di-jet --- .../validphys/theorycovariance/higher_twist_functions.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py index b94590f3de..8e164a5ad7 100644 --- a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -516,9 +516,10 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_ deltas[pars_pc['label']] = pc_func(pars_pc['comb']['Hj']) elif process_type == 'DIJET': + if dataset_sp.commondata.metadata.experiment == 'ATLAS': - pc_jet_nodes = pc_dict["H2j_ATLAS"]['nodes'] + pc_jet_nodes = pc_dict["H2j_ATLAS"]['nodes'] if pc_dict.get("H2j_ATLAS") else pc_dict["Hj"]['nodes'] eta_star = ( dataset_sp.commondata.metadata.load_kinematics()['ystar'] .to_numpy() @@ -531,10 +532,10 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_ ) pc_func = mult_jet_pc(pc_jet_nodes, m_jj, eta_star, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_ATLAS']) + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_ATLAS'] if pc_dict.get("H2j_ATLAS") else pars_pc['comb']['Hj']) elif dataset_sp.commondata.metadata.experiment == 'CMS': - pc_jet_nodes = pc_dict["H2j_CMS"]['nodes'] + pc_jet_nodes = pc_dict["H2j_CMS"]['nodes'] if pc_dict.get("H2j_CMS") else pc_dict["Hj"]['nodes'] eta_diff = ( dataset_sp.commondata.metadata.load_kinematics()['ydiff'] .to_numpy() @@ -547,7 +548,7 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_ ) pc_func = mult_jet_pc(pc_jet_nodes, m_jj, eta_diff, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_CMS']) + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_CMS'] if pc_dict.get("H2j_CMS") else pars_pc['comb']['Hj']) else: raise ValueError( From b8d380cfb93ea8bf1bca0a9b8b228114e5af0b0b Mon Sep 17 00:00:00 2001 From: achiefa Date: Thu, 3 Jul 2025 09:23:19 +0100 Subject: [PATCH 66/69] Correct combined di-jet --- .../validphys/theorycovariance/higher_twist_functions.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py index 8e164a5ad7..67fb45b9a2 100644 --- a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -519,7 +519,7 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_ if dataset_sp.commondata.metadata.experiment == 'ATLAS': - pc_jet_nodes = pc_dict["H2j_ATLAS"]['nodes'] if pc_dict.get("H2j_ATLAS") else pc_dict["Hj"]['nodes'] + pc_jet_nodes = pc_dict["H2j_ATLAS"]['nodes'] if pc_dict.get("H2j_ATLAS") else pc_dict["H2j"]['nodes'] eta_star = ( dataset_sp.commondata.metadata.load_kinematics()['ystar'] .to_numpy() @@ -532,10 +532,10 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_ ) pc_func = mult_jet_pc(pc_jet_nodes, m_jj, eta_star, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_ATLAS'] if pc_dict.get("H2j_ATLAS") else pars_pc['comb']['Hj']) + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_ATLAS'] if pc_dict.get("H2j_ATLAS") else pars_pc['comb']['H2j']) elif dataset_sp.commondata.metadata.experiment == 'CMS': - pc_jet_nodes = pc_dict["H2j_CMS"]['nodes'] if pc_dict.get("H2j_CMS") else pc_dict["Hj"]['nodes'] + pc_jet_nodes = pc_dict["H2j_CMS"]['nodes'] if pc_dict.get("H2j_CMS") else pc_dict["H2j"]['nodes'] eta_diff = ( dataset_sp.commondata.metadata.load_kinematics()['ydiff'] .to_numpy() @@ -548,7 +548,7 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_ ) pc_func = mult_jet_pc(pc_jet_nodes, m_jj, eta_diff, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_CMS'] if pc_dict.get("H2j_CMS") else pars_pc['comb']['Hj']) + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_CMS'] if pc_dict.get("H2j_CMS") else pars_pc['comb']['H2j']) else: raise ValueError( From 8d7aaf5f959a68bd018ea812326ac1e77aa3e153 Mon Sep 17 00:00:00 2001 From: achiefa Date: Wed, 13 Aug 2025 13:26:45 +0100 Subject: [PATCH 67/69] Remove func_type dependence - default is linear interpolation --- validphys2/src/validphys/checks.py | 14 +- validphys2/src/validphys/config.py | 7 - .../theorycovariance/construction.py | 6 +- .../higher_twist_functions.py | 128 ++++-------------- 4 files changed, 28 insertions(+), 127 deletions(-) diff --git a/validphys2/src/validphys/checks.py b/validphys2/src/validphys/checks.py index 81844c4160..d0d608c7b4 100644 --- a/validphys2/src/validphys/checks.py +++ b/validphys2/src/validphys/checks.py @@ -364,19 +364,11 @@ def check_darwin_single_process(NPROC): @make_argcheck -def check_pc_parameters(pc_parameters, pc_func_type): +def check_pc_parameters(pc_parameters): """Check that the parameters for the PC method are set correctly""" for par in pc_parameters.values(): - # Check that the length of shifts is one less than the length of nodes. - if (len(par['yshift']) != len(par['nodes']) - 1) and pc_func_type not in [ - 'cubic', - 'linear', - ]: - raise ValueError( - f"The length of nodes does not match that of the list in {par['ht']}." - f"Check the runcard. Got {len(par['yshift'])} != {len(par['nodes'])}" - ) - elif (len(par['yshift']) != len(par['nodes'])) and pc_func_type in ['cubic', 'linear']: + # Check that the length of shifts is the same as the length of nodes + if (len(par['yshift']) != len(par['nodes'])): raise ValueError( f"The length of nodes does not match that of the list in {par['ht']}." f"Check the runcard. Got {len(par['yshift'])} != {len(par['nodes'])}" diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 802c4f5773..c80a7ee5cd 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -1922,13 +1922,6 @@ def produce_total_phi_data(self, fitthcovmat): return validphys.results.total_phi_data_from_experiments return validphys.results.dataset_inputs_phi_data - # TODO: to be removed once we are sure the the triangular - # function for the prior is the only one of interest - def produce_pc_func_type(self, theorycovmatconfig=None): - if theorycovmatconfig is None: - raise ValueError("theorycovmatconfig is defined in the runcard.") - return theorycovmatconfig.get('func_type', 'linear') - @configparser.explicit_node def produce_covs_pt_prescrip(self, point_prescription): if point_prescription != 'power corrections': diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index a796d05d73..516abde0fb 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -365,7 +365,6 @@ def covs_pt_prescrip_mhou(combine_by_type, point_prescription): return covmats -# TODO `pc_func_type`will be removed in the future @check_pc_parameters def covs_pt_prescrip_pc( combine_by_type, @@ -374,7 +373,6 @@ def covs_pt_prescrip_pc( pc_parameters, pc_included_procs, pc_excluded_exps, - pc_func_type, ): """Produces the sub-matrices of the theory covariance matrix for power corrections. Sub-matrices correspond to applying power corrected shifts @@ -399,8 +397,8 @@ def covs_pt_prescrip_pc( proc not in pc_included_procs for proc in [process_type1, process_type2] ) if not (is_excluded_exp or is_included_proc): - deltas1 = compute_deltas_pc(data_spec1, pdf, pc_parameters, pc_func_type) - deltas2 = compute_deltas_pc(data_spec2, pdf, pc_parameters, pc_func_type) + deltas1 = compute_deltas_pc(data_spec1, pdf, pc_parameters) + deltas2 = compute_deltas_pc(data_spec2, pdf, pc_parameters) s = compute_covs_pt_prescrip( point_prescription, exp_name1, deltas1, exp_name2, deltas2 ) diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py index 67fb45b9a2..11876c3cee 100644 --- a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -42,69 +42,6 @@ NC_SIGMARED_P_EAVG = ['HERA_NC_318GEV_EAVG_CHARM-SIGMARED', 'HERA_NC_318GEV_EAVG_BOTTOM-SIGMARED'] -# TODO This function will be deleted in the future -def step_function(a: npt.ArrayLike, y_shift: npt.ArrayLike, bin_edges: npt.ArrayLike) -> np.ndarray: - """ - This function defines the step function used to construct the prior. The bins of the step - function are constructed using pairs of consecutive points. For instance, given the set of - points [0.0, 0.1, 0.3, 0.5], there will be three bins with edges [[0.0, 0.1], [0.1, 0.3], - 0.3, 0.5]]. Each bin is coupled with a shift, which correspond to the y-value of the bin. - - Parameters - ---------- - a: ArrayLike of float - A one-dimensional array of points at which the function is evaluated. - y_shift: ArrayLike of float - A one-dimensional array whose elements represent the y-value of each bin - bin_edges: ArrayLike of float - A one-dimensional array containing the edges of the bins. The bins are - constructed using pairs of consecutive points. - - Return - ------ - A one-dimensional array containing the function values evaluated at the points - specified in `a`. - """ - res = np.zeros_like(a) - for shift_pos, shift in enumerate(y_shift): - bin_low = bin_edges[shift_pos] - bin_high = bin_edges[shift_pos + 1] - condition = np.multiply( - a >= bin_low, a < bin_high if shift_pos != len(y_shift) - 1 else a <= bin_high - ) - res = np.add(res, [shift if cond else 0.0 for cond in condition]) - return res - - -# TODO This function will be deleted in the future -def cubic_spline_function( - a: npt.ArrayLike, y_shift: npt.ArrayLike, nodes: npt.ArrayLike -) -> np.ndarray: - """ - This function defines the cubic spline function used to construct the prior. The spline - is constructed using the nodes specified in `nodes` and the y-values in `y_shift`. The - spline is evaluated at the points specified in `a`. - - Parameters - ---------- - a: ArrayLike of float - A one-dimensional array of points at which the function is evaluated. - y_shift: ArrayLike of float - A one-dimensional array whose elements represent the y-value of each bin - nodes: ArrayLike of float - A one-dimensional array containing the nodes used to construct the spline. - - Return - ------ - A one-dimensional array containing the function values evaluated at the points - specified in `a`. - """ - from scipy.interpolate import CubicSpline - - cs = CubicSpline(nodes, y_shift) - return cs(a) - - def linear_bin_function( a: npt.ArrayLike, y_shift: npt.ArrayLike, bin_edges: npt.ArrayLike ) -> np.ndarray: @@ -166,14 +103,13 @@ def dis_pc_func( nodes: npt.ArrayLike, x: npt.ArrayLike, Q2: npt.ArrayLike, - pc_func_type: str = "step", ) -> npt.ArrayLike: """ This function builds the functional form of the power corrections for DIS-like processes. - Power corrections are modelled using a step-function. The edges of the bins used in the - step-function are specified by the list of nodes. The y-values for each bin are given + Power corrections are modelled using a linear function, which interpolates between the nodes + of the parameterisation. The y-values for each node are given by the array `delta_h`. The power corrections will be computed for the pairs (xb, Q2), - where `xb` is the Bjorken x. The power correction for DIS processes are rescaled by Q2. + where `xb` is the Bjorken x. The power correction for DIS processes is divided by Q2. Parameters ---------- @@ -191,15 +127,7 @@ def dis_pc_func( A one-dimensional array of power corrections for DIS-like processes where each point is evaluated at the kinematic pair (x,Q2). """ - if pc_func_type == "step": - PC = step_function(x, delta_h, nodes) / Q2 - elif pc_func_type == "linear": - PC = linear_bin_function(x, delta_h, nodes) / Q2 - elif pc_func_type == "cubic": - PC = cubic_spline_function(x, delta_h, nodes) / Q2 - else: - raise ValueError(f"Invalid function type: {pc_func_type} is not supported.") - + PC = linear_bin_function(x, delta_h, nodes) / Q2 return PC @@ -208,7 +136,6 @@ def jets_pc_func( nodes: npt.ArrayLike, pT: npt.ArrayLike, rap: npt.ArrayLike, - pc_func_type: str = "step", ) -> npt.ArrayLike: """ Same as `dis_pc_func`, but for jet data. Here, the kinematic pair consists of the rapidity @@ -230,14 +157,7 @@ def jets_pc_func( A one-dimensional array of power corrections for jet processes where each point is evaluated at the kinematic pair (y, pT). """ - if pc_func_type == "step": - PC = step_function(rap, delta_h, nodes) / pT - elif pc_func_type == "linear": - PC = linear_bin_function(rap, delta_h, nodes) / pT - elif pc_func_type == "cubic": - PC = cubic_spline_function(rap, delta_h, nodes) / pT - else: - raise ValueError(f"Invalid function type: {pc_func_type} is not supported.") + PC = linear_bin_function(rap, delta_h, nodes) / pT return PC @@ -265,7 +185,7 @@ def jets_pc_func( # return func -def mult_dis_pc(nodes, x, q2, dataset_sp, pdf, pc_func_type: str = "step"): +def mult_dis_pc(nodes, x, q2, dataset_sp, pdf): """ Returns the function that computes the shift to observables due to power corrections. Power corrections are treated as multiplicative @@ -279,7 +199,6 @@ def mult_dis_pc(nodes, x, q2, dataset_sp, pdf, pc_func_type: str = "step"): This function returns a function that computes the shift given the y-values of the nodes used to define the power corrections. - The interpolation between the nodes is specified by `pc_func_type`. """ cuts = dataset_sp.cuts (fkspec,) = dataset_sp.fkspecs @@ -287,13 +206,13 @@ def mult_dis_pc(nodes, x, q2, dataset_sp, pdf, pc_func_type: str = "step"): th_preds = central_fk_predictions(fk, pdf) def func(y_values): - result = dis_pc_func(y_values, nodes, x, q2, pc_func_type) + result = dis_pc_func(y_values, nodes, x, q2) return np.multiply(result, th_preds.to_numpy()[:, 0]) return func -def mult_dis_ratio_pc(p_nodes, d_nodes, x, q2, dataset_sp, pdf, pc_func_type: str = "step"): +def mult_dis_ratio_pc(p_nodes, d_nodes, x, q2, dataset_sp, pdf): """ Returns the function that computes the shift for the ratio of structure functions F2_d / F2_p. For this observable, power corrections are defined @@ -344,8 +263,8 @@ def mult_dis_ratio_pc(p_nodes, d_nodes, x, q2, dataset_sp, pdf, pc_func_type: st F2_ratio = operator.truediv(F2D, F2P) def func(y_values_p, y_values_d): - h2d = dis_pc_func(y_values_d, d_nodes, x, q2, pc_func_type) - h2p = dis_pc_func(y_values_p, p_nodes, x, q2, pc_func_type) + h2d = dis_pc_func(y_values_d, d_nodes, x, q2) + h2p = dis_pc_func(y_values_p, p_nodes, x, q2) num = np.sum([h2d, -h2p], axis=0) denom = np.sum([np.ones_like(h2p), h2p], axis=0) result = np.multiply(operator.truediv(num, denom), F2_ratio) @@ -353,7 +272,7 @@ def func(y_values_p, y_values_d): return func -def mult_jet_pc(nodes, pT, rap, dataset_sp, pdf, pc_func_type: str = "step"): +def mult_jet_pc(nodes, pT, rap, dataset_sp, pdf): """ As `mult_dis_pc`, but for jet data. The power corrections are defined as @@ -371,7 +290,7 @@ def mult_jet_pc(nodes, pT, rap, dataset_sp, pdf, pc_func_type: str = "step"): xsec = central_fk_predictions(fk, pdf) def func(y_values): - result = jets_pc_func(y_values, nodes, pT, rap, pc_func_type) + result = jets_pc_func(y_values, nodes, pT, rap) return np.multiply(result, xsec.to_numpy()[:, 0]) return func @@ -416,8 +335,7 @@ def construct_pars_combs(parameters_dict): return combinations -# TODO `pc_func_type` will be removed -def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_type: str): +def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict): """ Computes the shifts due to power corrections for a single dataset given the set of parameters that model the power corrections. The result is @@ -454,19 +372,19 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_ # F2 ratio if exp_name == "NMC_NC_NOTFIXED_EM-F2": - pc_func_ratio = mult_dis_ratio_pc(f2_p_nodes, f2_d_nodes, x, q2, dataset_sp, pdf, pc_func_type) + pc_func_ratio = mult_dis_ratio_pc(f2_p_nodes, f2_d_nodes, x, q2, dataset_sp, pdf) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func_ratio(pars_pc['comb']['f2p'], pars_pc['comb']['f2d']) # F2 proton traget elif exp_name in F2P_exps: - pc_func = mult_dis_pc(f2_p_nodes, x, q2, dataset_sp, pdf, pc_func_type) + pc_func = mult_dis_pc(f2_p_nodes, x, q2, dataset_sp, pdf) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['f2p']) # F2 deuteron traget elif exp_name in F2D_exps: - pc_func = mult_dis_pc(f2_d_nodes, x, q2, dataset_sp, pdf, pc_func_type) + pc_func = mult_dis_pc(f2_d_nodes, x, q2, dataset_sp, pdf) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['f2d']) @@ -479,25 +397,25 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_ # HERA NC xsec elif exp_name in np.concatenate([NC_SIGMARED_P_EM, NC_SIGMARED_P_EP, NC_SIGMARED_P_EAVG]): - pc_func = mult_dis_pc(f2_p_nodes, x, q2, dataset_sp, pdf, pc_func_type) + pc_func = mult_dis_pc(f2_p_nodes, x, q2, dataset_sp, pdf) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['f2p']) # CHORUS elif exp_name.startswith('CHORUS_CC'): - pc_func = mult_dis_pc(dis_cc_nodes, x, q2, dataset_sp, pdf, pc_func_type) + pc_func = mult_dis_pc(dis_cc_nodes, x, q2, dataset_sp, pdf) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['dis_cc']) # NuTeV elif exp_name.startswith('NUTEV_CC'): - pc_func = mult_dis_pc(dis_cc_nodes , x, q2, dataset_sp, pdf, pc_func_type) + pc_func = mult_dis_pc(dis_cc_nodes , x, q2, dataset_sp, pdf) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['dis_cc']) # HERA_CC elif exp_name.startswith('HERA_CC'): - pc_func = mult_dis_pc(dis_cc_nodes, x, q2, dataset_sp, pdf, pc_func_type) + pc_func = mult_dis_pc(dis_cc_nodes, x, q2, dataset_sp, pdf) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['dis_cc']) @@ -511,7 +429,7 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_ eta = dataset_sp.commondata.metadata.load_kinematics()['y'].to_numpy().reshape(-1)[cuts] pT = dataset_sp.commondata.metadata.load_kinematics()['pT'].to_numpy().reshape(-1)[cuts] - pc_func = mult_jet_pc(pc_jet_nodes, pT, eta, dataset_sp, pdf, pc_func_type) + pc_func = mult_jet_pc(pc_jet_nodes, pT, eta, dataset_sp, pdf) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['Hj']) @@ -530,7 +448,7 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_ .to_numpy() .reshape(-1)[cuts] ) - pc_func = mult_jet_pc(pc_jet_nodes, m_jj, eta_star, dataset_sp, pdf, pc_func_type) + pc_func = mult_jet_pc(pc_jet_nodes, m_jj, eta_star, dataset_sp, pdf) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_ATLAS'] if pc_dict.get("H2j_ATLAS") else pars_pc['comb']['H2j']) @@ -546,7 +464,7 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_ .to_numpy() .reshape(-1)[cuts] ) - pc_func = mult_jet_pc(pc_jet_nodes, m_jj, eta_diff, dataset_sp, pdf, pc_func_type) + pc_func = mult_jet_pc(pc_jet_nodes, m_jj, eta_diff, dataset_sp, pdf) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_CMS'] if pc_dict.get("H2j_CMS") else pars_pc['comb']['H2j']) From 7e6a97c57dface63d488c3b9e5164efc402bdeb1 Mon Sep 17 00:00:00 2001 From: achiefa Date: Thu, 25 Sep 2025 18:24:42 +0100 Subject: [PATCH 68/69] Allow multiplicative factor for user covmat --- validphys2/src/validphys/config.py | 16 ++++++++++++++++ .../validphys/theorycovariance/construction.py | 4 ++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index c80a7ee5cd..fc917d6a47 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -877,6 +877,7 @@ def produce_loaded_theory_covmat( # change ordering according to exp_covmat (so according to runcard order) tmp = theory_covmat.droplevel(0, axis=0).droplevel(0, axis=1) bb = [str(i) for i in data_input] + import ipdb; ipdb.set_trace() return tmp.reindex(index=bb, columns=bb, level=0).values @configparser.explicit_node @@ -1276,6 +1277,21 @@ def produce_nnfit_theory_covmat( f = user_covmat_fitting return f + + def produce_mult_factor_user_covmat(self, mult_factor: float = None, user_covmat_path: str = None): + """ + Multiplicative factors for the user covmat provided by mult_factors_user_covmat in the runcard. + If no factors are provided, returns None. + For use in theorycovariance.construction.user_covmat. + """ + # Check that if mult_factors is provided, user_covmat_paths is also provided + if mult_factor is not None and user_covmat_path is None: + raise ConfigError("If mult_factors is provided, user_covmat_paths must also be provided.") + + if mult_factor is None: + return 1.0 if user_covmat_path is not None else None + else: + return mult_factor def produce_fitthcovmat( self, use_thcovmat_if_present: bool = False, fit: (str, type(None)) = None diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index 516abde0fb..e2d05513be 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -531,7 +531,7 @@ def fromfile_covmat(covmatpath, groups_data_by_process, procs_index): @table -def user_covmat(groups_data_by_process, procs_index, loaded_user_covmat_path): +def user_covmat(groups_data_by_process, procs_index, loaded_user_covmat_path, mult_factor_user_covmat): """ General theory covariance matrix provided by the user. Useful for testing the impact of externally produced @@ -541,7 +541,7 @@ def user_covmat(groups_data_by_process, procs_index, loaded_user_covmat_path): ``user_covmat_path`` in ``theorycovmatconfig`` in the runcard. For more information see documentation. """ - return fromfile_covmat(loaded_user_covmat_path, groups_data_by_process, procs_index) + return mult_factor_user_covmat * fromfile_covmat(loaded_user_covmat_path, groups_data_by_process, procs_index) @table From 52997f3c168c255924d04028471afd9b2b4f77fd Mon Sep 17 00:00:00 2001 From: achiefa Date: Thu, 25 Sep 2025 22:34:03 +0100 Subject: [PATCH 69/69] Remove debug trace --- validphys2/src/validphys/config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index fc917d6a47..146846b337 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -877,7 +877,6 @@ def produce_loaded_theory_covmat( # change ordering according to exp_covmat (so according to runcard order) tmp = theory_covmat.droplevel(0, axis=0).droplevel(0, axis=1) bb = [str(i) for i in data_input] - import ipdb; ipdb.set_trace() return tmp.reindex(index=bb, columns=bb, level=0).values @configparser.explicit_node