NNPDF · achiefa · Jul 1, 2025 · Jul 1, 2025 · Jul 2, 2025 · Jul 15, 2024
diff --git a/n3fit/runcards/examples/Basic_runcard_pc_covmat.yml b/n3fit/runcards/examples/Basic_runcard_pc_covmat.yml
@@ -0,0 +1,151 @@
+#
+# Configuration file for n3fit
+#
+######################################################################################
+description: NNPDF4.0 ht with TCM - DIS (NC & CC) only
+
+######################################################################################
+dataset_inputs:
+- {dataset: NMC_NC_NOTFIXED_EM-F2, frac: 0.75, variant: legacy_dw}
+- {dataset: NMC_NC_NOTFIXED_P_EM-SIGMARED, frac: 0.75, variant: legacy}
+- {dataset: SLAC_NC_NOTFIXED_D_EM-F2, frac: 0.75, variant: legacy_dw}
+- {dataset: BCDMS_NC_NOTFIXED_P_EM-F2, frac: 0.75, variant: legacy_dw}
+- {dataset: CHORUS_CC_NOTFIXED_PB_NU-SIGMARED, frac: 0.75, variant: legacy_dw}
+- {dataset: NUTEV_CC_NOTFIXED_FE_NB-SIGMARED, cfac: [MAS], frac: 0.75, variant: legacy_dw}
+- {dataset: HERA_CC_318GEV_EP-SIGMARED, frac: 0.75, variant: legacy}
+- {dataset: HERA_NC_318GEV_EAVG_CHARM-SIGMARED, frac: 0.75, variant: legacy}
+- {dataset: HERA_NC_318GEV_EAVG_BOTTOM-SIGMARED, frac: 0.75, variant: legacy}
+- {dataset: DYE866_Z0_800GEV_DW_RATIO_PDXSECRATIO, frac: 0.75, variant: legacy}
+- {dataset: CDF_Z0_1P96TEV_ZRAP, frac: 0.75, variant: legacy}
+- {dataset: ATLAS_Z0J_8TEV_PT-Y, frac: 0.75, variant: legacy_10}
+- {dataset: ATLAS_1JET_8TEV_R06_PTY, frac: 0.75, variant: legacy_decorrelated}
+- {dataset: ATLAS_2JET_7TEV_R06_M12Y, frac: 0.75, variant: legacy}
+- {dataset: CMS_2JET_7TEV_M12Y, frac: 0.75}
+- {dataset: CMS_1JET_8TEV_PTY, frac: 0.75, variant: legacy}
+- {dataset: LHCB_Z0_13TEV_DIELECTRON-Y, frac: 0.75}
+
+################################################################################
+datacuts:
+  t0pdfset: 240701-02-rs-nnpdf40-baseline
+  q2min: 2.5
+  w2min: 3.24
+
+################################################################################
+# NNLO QCD TRN evolution
+theory:
+  theoryid: 708
+
+theorycovmatconfig:
+  point_prescriptions: ["9 point", "power corrections"]
+  pc_parameters:
+    H2p: {yshift: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.0], nodes: [0.0, 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 1]}
+    H2d: {yshift: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.0], nodes: [0.0, 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 1]}
+    HLp: {yshift: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.0], nodes: [0.0, 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 1]}
+    HLd: {yshift: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.0], nodes: [0.0, 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 1]}
+    H3p: {yshift: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.0], nodes: [0.0, 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 1]}
+    H3d: {yshift: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.0], nodes: [0.0, 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 1]}
+    Hj: {yshift: [2.0, 2.0, 2.0, 2.0, 2.0, 2.0], nodes: [0.25, 0.75, 1.25, 1.75, 2.25, 2.75]}
+    H2j_ATLAS: {yshift: [2.0, 2.0, 2.0, 2.0, 2.0, 2.0], nodes: [0.25, 0.75, 1.25, 1.75, 2.25, 2.75]}
+    H2j_CMS: {yshift: [2.0, 2.0, 2.0, 2.0, 2.0], nodes: [0.25, 0.75, 1.25, 1.75, 2.25]}
+  pc_included_procs: ["JETS", "DIJET", "DIS NC", "DIS CC"]
+  pc_excluded_exps: [HERA_NC_318GEV_EAVG_CHARM-SIGMARED,
+                     HERA_NC_318GEV_EAVG_BOTTOM-SIGMARED,]
+  pdf: 210619-n3fit-001
+  use_thcovmat_in_fitting: true
+  use_thcovmat_in_sampling: true
+resample_negative_pseudodata: false
+
+# For fits <= 4.0 multiplicative and additive uncertainties were sampled separately
+# and thus the flag `separate_multiplicative` needs to be set to True
+# sampling:
+#   separate_multiplicative: True
+
+################################################################################
+trvlseed: 591866982
+nnseed: 945709987
+mcseed: 519562661
+genrep: true
+
+################################################################################
+parameters: # This defines the parameter dictionary that is passed to the Model Trainer
+  nodes_per_layer: [25, 20, 8]
+  activation_per_layer: [tanh, tanh, linear]
+  initializer: glorot_normal
+  optimizer:
+    clipnorm: 6.073e-6
+    learning_rate: 2.621e-3
+    optimizer_name: Nadam
+  epochs: 3000
+  positivity:
+    initial: 184.8
+    multiplier:
+  integrability:
+    initial: 10
+    multiplier:
+  stopping_patience: 0.1
+  layer_type: dense
+  dropout: 0.0
+  threshold_chi2: 3.5
+
+fitting:
+  fitbasis: EVOL
+  savepseudodata: True
+  basis:
+  - {fl: sng, trainable: false, smallx: [1.089, 1.119], largex: [1.475, 3.119]}
+  - {fl: g, trainable: false, smallx: [0.7504, 1.098], largex: [2.814, 5.669]}
+  - {fl: v, trainable: false, smallx: [0.479, 0.7384], largex: [1.549, 3.532]}
+  - {fl: v3, trainable: false, smallx: [0.1073, 0.4397], largex: [1.733, 3.458]}
+  - {fl: v8, trainable: false, smallx: [0.5507, 0.7837], largex: [1.516, 3.356]}
+  - {fl: t3, trainable: false, smallx: [-0.4506, 0.9305], largex: [1.745, 3.424]}
+  - {fl: t8, trainable: false, smallx: [0.5877, 0.8687], largex: [1.522, 3.515]}
+  - {fl: t15, trainable: false, smallx: [1.089, 1.141], largex: [1.492, 3.222]}
+
+################################################################################
+positivity:
+  posdatasets:
+  # Positivity Lagrange Multiplier
+  - {dataset: NNPDF_POS_2P24GEV_F2U, maxlambda: 1e6}
+  - {dataset: NNPDF_POS_2P24GEV_F2D, maxlambda: 1e6}
+  - {dataset: NNPDF_POS_2P24GEV_F2S, maxlambda: 1e6}
+  - {dataset: NNPDF_POS_2P24GEV_FLL, maxlambda: 1e6}
+  - {dataset: NNPDF_POS_2P24GEV_DYU, maxlambda: 1e10}
+  - {dataset: NNPDF_POS_2P24GEV_DYD, maxlambda: 1e10}
+  - {dataset: NNPDF_POS_2P24GEV_DYS, maxlambda: 1e10}
+  - {dataset: NNPDF_POS_2P24GEV_F2C, maxlambda: 1e6}
+  # Positivity of MSbar PDFs
+  - {dataset: NNPDF_POS_2P24GEV_XUQ, maxlambda: 1e6}
+  - {dataset: NNPDF_POS_2P24GEV_XUB, maxlambda: 1e6}
+  - {dataset: NNPDF_POS_2P24GEV_XDQ, maxlambda: 1e6}
+  - {dataset: NNPDF_POS_2P24GEV_XDB, maxlambda: 1e6}
+  - {dataset: NNPDF_POS_2P24GEV_XSQ, maxlambda: 1e6}
+  - {dataset: NNPDF_POS_2P24GEV_XSB, maxlambda: 1e6}
+  - {dataset: NNPDF_POS_2P24GEV_XGL, maxlambda: 1e6}
+
+added_filter_rules:
+  - dataset: NNPDF_POS_2P24GEV_FLL
+    rule: "x > 5.0e-7"
+  - dataset: NNPDF_POS_2P24GEV_F2C
+    rule: "x < 0.74"
+  - dataset: NNPDF_POS_2P24GEV_XGL
+    rule: "x > 0.1"
+  - dataset: NNPDF_POS_2P24GEV_XUQ
+    rule: "x > 0.1"
+  - dataset: NNPDF_POS_2P24GEV_XUB
+    rule: "x > 0.1"
+  - dataset: NNPDF_POS_2P24GEV_XDQ
+    rule: "x > 0.1"
+  - dataset: NNPDF_POS_2P24GEV_XDB
+    rule: "x > 0.1"
+  - dataset: NNPDF_POS_2P24GEV_XSQ
+    rule: "x > 0.1"
+  - dataset: NNPDF_POS_2P24GEV_XSB
+    rule: "x > 0.1"
+
+integrability:
+  integdatasets:
+  - {dataset: NNPDF_INTEG_3GEV_XT8, maxlambda: 1e2}
+  - {dataset: NNPDF_INTEG_3GEV_XT3, maxlambda: 1e2}
+
+################################################################################
+debug: false
+maxcores: 8
diff --git a/n3fit/src/n3fit/performfit.py b/n3fit/src/n3fit/performfit.py
@@ -41,7 +41,7 @@ def performfit(
     debug=False,
     maxcores=None,
     double_precision=False,
-    parallel_models=True,
+    parallel_models=False,
 ):
     """
     This action will (upon having read a validcard) process a full PDF fit

diff --git a/n3fit/src/n3fit/scripts/vp_setupfit.py b/n3fit/src/n3fit/scripts/vp_setupfit.py
@@ -1,8 +1,8 @@
 #!/usr/bin/env python
 """
-    setup-fit - prepare and apply data cuts before fit
-    setup-fit constructs the fit [results] folder where data used by nnfit
-    will be stored.
+setup-fit - prepare and apply data cuts before fit
+setup-fit constructs the fit [results] folder where data used by nnfit
+will be stored.
 """
 
 # Implementation notes

diff --git a/validphys2/src/validphys/checks.py b/validphys2/src/validphys/checks.py
@@ -361,3 +361,15 @@ def check_darwin_single_process(NPROC):
     """
     if platform.system() == "Darwin" and NPROC != 1:
         raise CheckError("NPROC must be set to 1 on OSX, because multithreading is not supported.")
+
+
+@make_argcheck
+def check_pc_parameters(pc_parameters):
+    """Check that the parameters for the PC method are set correctly"""
+    for par in pc_parameters.values():
+        # Check that the length of shifts is the same as the length of nodes
+        if (len(par['yshift']) != len(par['nodes'])):
+            raise ValueError(
+                f"The length of nodes does not match that of the list in {par['ht']}."
+                f"Check the runcard. Got {len(par['yshift'])} != {len(par['nodes'])}"
+            )
diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py
@@ -1276,6 +1276,21 @@ def produce_nnfit_theory_covmat(
             f = user_covmat_fitting
 
         return f
+
+    def produce_mult_factor_user_covmat(self, mult_factor: float = None, user_covmat_path: str = None):
+        """
+        Multiplicative factors for the user covmat provided by mult_factors_user_covmat in the runcard.
+        If no factors are provided, returns None.
+        For use in theorycovariance.construction.user_covmat.
+        """
+        # Check that if mult_factors is provided, user_covmat_paths is also provided
+        if mult_factor is not None and user_covmat_path is None:
+            raise ConfigError("If mult_factors is provided, user_covmat_paths must also be provided.")
+
+        if mult_factor is None:
+            return 1.0 if user_covmat_path is not None else None
+        else:
+            return mult_factor
 
     def produce_fitthcovmat(
         self, use_thcovmat_if_present: bool = False, fit: (str, type(None)) = None
@@ -1799,6 +1814,8 @@ def produce_theoryids(self, t0id, point_prescription):
         prescription. The options for the latter are defined in pointprescriptions.yaml.
         This hard codes the theories needed for each prescription to avoid user error."""
         th = t0id.id
+        if point_prescription == 'power corrections':
+            return NSList([t0id], nskey="theoryid")
 
         lsv = yaml_safe.load(read_text(validphys.scalevariations, "scalevariationtheoryids.yaml"))
 
@@ -1881,7 +1898,18 @@ def produce_filter_data(
         if not fakedata:
             return validphys.filters.filter_real_data
         else:
-            if inconsistent_fakedata:
+            # TODO we don't want to sample from the theory covmat for L1 data,
+            # but we do want to use the theory covmat for L2 data
+            if theorycovmatconfig is not None and theorycovmatconfig.get(
+                "use_thcovmat_in_fakedata_sampling"
+            ):
+                # NOTE: By the time we run theory covmat closure tests,
+                # hopefully the generation of pseudodata will be done in python.
+                raise ConfigError(
+                    "Generating L1 closure test data which samples from the theory "
+                    "covariance matrix has not been implemented yet."
+                )
+            elif inconsistent_fakedata:
                 log.info("Using filter for inconsistent closure data")
                 return validphys.filters.filter_inconsistent_closure_data_by_experiment
 
@@ -1909,6 +1937,19 @@ def produce_total_phi_data(self, fitthcovmat):
             return validphys.results.total_phi_data_from_experiments
         return validphys.results.dataset_inputs_phi_data
 
+    @configparser.explicit_node
+    def produce_covs_pt_prescrip(self, point_prescription):
+        if point_prescription != 'power corrections':
+            from validphys.theorycovariance.construction import covs_pt_prescrip_mhou
+
+            f = covs_pt_prescrip_mhou
+        else:
+            from validphys.theorycovariance.construction import covs_pt_prescrip_pc
+
+            f = covs_pt_prescrip_pc
+
+        return f
+
 
 class Config(report.Config, CoreConfig):
     """The effective configuration parser class."""
diff --git a/validphys2/src/validphys/dataplots.py b/validphys2/src/validphys/dataplots.py
@@ -2,8 +2,6 @@
 Plots of relations between data PDFs and fits.
 """
 
-from __future__ import generator_stop
-
 from collections import defaultdict
 from collections.abc import Sequence
 import itertools
@@ -28,7 +26,7 @@
 from validphys.core import CutsPolicy, MCStats, cut_mask
 from validphys.plotoptions.core import get_info, kitable, transform_result
 from validphys.results import chi2_stat_labels, chi2_stats
-from validphys.sumrules import POL_LIMS, partial_polarized_sum_rules
+from validphys.sumrules import POL_LIMS
 from validphys.utils import sane_groupby_iter, scale_from_grid, split_ranges
 
 log = logging.getLogger(__name__)
@@ -301,9 +299,7 @@ def _plot_fancy_impl(
         min_vals = []
         max_vals = []
         fig, ax = plotutils.subplots()
-        ax.set_title(
-            "{} {}".format(info.dataset_label, info.group_label(samefig_vals, info.figure_by))
-        )
+        ax.set_title(f"{info.dataset_label} {info.group_label(samefig_vals, info.figure_by)}")
 
         lineby = sane_groupby_iter(fig_data, info.line_by)
 
@@ -1287,7 +1283,7 @@ def _check_display_cuts_requires_use_cuts(display_cuts, use_cuts):
 
 @make_argcheck
 def _check_marker_by(marker_by):
-    markers = ('process type', 'experiment', 'dataset', 'group')
+    markers = ('process type', 'experiment', 'dataset', 'group', 'kinematics')
     if marker_by not in markers:
         raise CheckError("Unknown marker_by value", marker_by, markers)
 
@@ -1346,7 +1342,8 @@ def plot_xq2(
     will be displaed and marked.
 
     The points are grouped according to the `marker_by` option. The possible
-    values are: "process type", "experiment", "group" or "dataset".
+    values are: "process type", "experiment", "group" or "dataset" for discrete
+    colors, or "kinematics" for coloring by 1/(Q2(1-x))
 
     Some datasets can be made to appear highlighted in the figure: Define a key
     called ``highlight_datasets`` containing the names of the datasets to be
@@ -1477,6 +1474,7 @@ def plot_xq2(
 
     xh = defaultdict(list)
     q2h = defaultdict(list)
+    cvdict = defaultdict(list)
 
     if not highlight_datasets:
         highlight_datasets = set()
@@ -1507,6 +1505,8 @@ def next_options():
         elif marker_by == "group":
             # if group is None then make sure that shows on legend.
             key = str(group)
+        elif marker_by == "kinematics":
+            key = None
         else:
             raise ValueError('Unknown marker_by value')
 
@@ -1522,6 +1522,7 @@ def next_options():
             xdict = x
             q2dict = q2
 
+        cvdict[key].append(commondata.load().get_cv())
         xdict[key].append(fitted[0])
         q2dict[key].append(fitted[1])
         if display_cuts:
@@ -1536,6 +1537,13 @@ def next_options():
         else:
             # This is to get the label key
             coords = [], []
+        if marker_by == "kinematics":
+            ht_magnitude = np.concatenate(cvdict[key]) / (coords[1] * (1 - coords[0]))
+            out = ax.scatter(
+                *coords, marker='.', c=ht_magnitude, cmap="viridis", norm=mcolors.LogNorm()
+            )
+            clb = fig.colorbar(out)
+            clb.ax.set_title(r'$F_\mathrm{exp}\frac{1}{Q^2(1-x)}$')
         ax.plot(*coords, label=key, markeredgewidth=1, markeredgecolor=None, **key_options[key])
 
     # Iterate again so highlights are printed on top.

diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py
@@ -125,7 +125,6 @@ def to_dict(self):
 class FilterRule:
     """
     Dataclass which carries the filter rule information.
-
     """
 
     dataset: str = None
@@ -172,6 +171,9 @@ def default_filter_rules_input():
     are unique, i.d. if there are no multiple rules for the same dataset of
     process with the same rule (`reason` and `local_variables` are not hashed).
     """
+    # TODO: This should be done using a more sophisticated comparison
+    # that checks if two rules are actually the same, regardless of the
+    # order in which the cuts are defined.
     list_rules = yaml_safe.load(read_text(validphys.cuts, "filters.yaml"))
     unique_rules = set(FilterRule(**rule) for rule in list_rules)
     if len(unique_rules) != len(list_rules):