From 7e7b2efdad2f92369b321fa293b13f8b9b15f928 Mon Sep 17 00:00:00 2001 From: jackaraz Date: Fri, 6 May 2022 18:08:36 +0200 Subject: [PATCH 01/34] initialize TACO interface --- madanalysis/misc/database.py | 153 +++++++++++++++++++ madanalysis/misc/taco_interface.py | 233 +++++++++++++++++++++++++++++ 2 files changed, 386 insertions(+) create mode 100644 madanalysis/misc/database.py create mode 100644 madanalysis/misc/taco_interface.py diff --git a/madanalysis/misc/database.py b/madanalysis/misc/database.py new file mode 100644 index 00000000..c9498638 --- /dev/null +++ b/madanalysis/misc/database.py @@ -0,0 +1,153 @@ +################################################################################ +# +# Copyright (C) 2012-2022 Jack Araz, Eric Conte & Benjamin Fuks +# The MadAnalysis development team, email: +# +# This file is part of MadAnalysis 5. +# Official website: +# +# MadAnalysis 5 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# MadAnalysis 5 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with MadAnalysis 5. If not, see +# +################################################################################ + +from typing import Text +from math import sqrt + +""" +TODO: Instead of using regiondata: dict we can change our datastructure to be more object oriented + which will give us more/easy control over the data. These functions are written to replicate `regiondata` + in run_recast.py file. In order not to change too much functionality `dict` properties have been implemented + this will allow the complete transition to be spread over time. +""" + +class Region: + """ + Data structure for an analysis region + + :param analysis: name of the analysis + :param regionID: name of the region + :param nobs: number of observed events + :param nb: number of expected events + :param deltanb: total uncertainty on the expected events + :param lumi: luminosity of the analysis + """ + + def __init__( + self, + regionID: Text, + nobs: int, + nb: float, + deltanb: float, + lumi: float = 0.0, + analysis: Text = "__unknown_analysis__", + ) -> None: + self.analysis = analysis + self.region_id = regionID + + self.nobs = nobs + self.nb = nb + self.deltanb = deltanb + + self.final_nevt = -1 + self.initial_nevt = -1 + self.xsec = -1 + self.lumi = lumi + + self.s95exp = -1 + self.s95obs = -1 + self.cls = -1 + self.best = 0 + + # define getitem and setitem attributes for backwards compatibility. + # This will allow us not to change current development too much. + + def __getitem__(self, item): + input_key = item + if item == "Nf": + input_key == "final_nevt" + elif item == "N0": + input_key == "initial_nevt" + return vars(self)[input_key] + + def __setitem__(self, key, value): + input_key = key + if key == "Nf": + input_key == "final_nevt" + elif key == "N0": + input_key == "initial_nevt" + elif key not in vars(self).keys(): + return + + setattr(input_key, value) + + @property + def eff(self) -> float: + """ + Get efficiency of the region + :return: float + """ + return self.final_nevt / self.initial_nevt + + @property + def stat(self) -> float: + """ + Statistical uncertainty of the final number of events + """ + return sqrt(self.eff * (1 - self.eff) / abs(self.initial_nevt * self.lumi)) + + @property + def luminosity(self) -> float: + return self.lumi + + @luminosity.setter + def luminosity(self, value: float): + self.lumi = value + + @property + def nsignal(self): + return self.xsec * self.lumi * 1000.0 * self.eff + + @property + def n95(self) -> float: + """ + number of expected events for excluded SR at 95% CL + """ + return self.s95exp * self.lumi * 1000.0 * self.eff + + @property + def rSR(self) -> float: + """ + Ratio between number of signal events and number of expected events for excluded SR at 95% CL + """ + return self.nsignal / self.n95 + + +class RegionData: + def __init__(self): + self.regiondata = {} + + def keys(self): + return self.regiondata.keys() + + def items(self): + return self.regiondata.items() + + def __getitem__(self, item): + return self.regiondata.get(item, {}) + + def __setitem__(self, key, value): + if isinstance(value, dict): + self.regiondata[key] = Region( + regionID=key, nobs=value["nobs"], nb=value["nb"], deltanb=value["deltanb"] + ) diff --git a/madanalysis/misc/taco_interface.py b/madanalysis/misc/taco_interface.py new file mode 100644 index 00000000..0aa70708 --- /dev/null +++ b/madanalysis/misc/taco_interface.py @@ -0,0 +1,233 @@ +################################################################################ +# +# Copyright (C) 2012-2022 Jack Araz, Eric Conte & Benjamin Fuks +# The MadAnalysis development team, email: +# +# This file is part of MadAnalysis 5. +# Official website: +# +# MadAnalysis 5 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# MadAnalysis 5 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with MadAnalysis 5. If not, see +# +################################################################################ + +from typing import Text + +from database import Region + +from numpy import warnings +import numpy as np +import pyhf + +# TODO: remove pyhf dependency from TACO interface + +class TACORegion(Region): + def __init__( + self, + analysis: Text, + regionID: Text, + nobs: int, + nb: float, + deltanb: float, + N0: float, + Nf: float, + xsection: float, + lumi: float, + s95exp: float, + s95obs: float, + cls: float, + ) -> None: + """ + Initialization of MadAnalysis Signal Region + :param analysis: analysis name + :param regionID: region name + :param N0: sum of weights before the analysis + :param Nf: sum of weights after the final cut + :param xsection: production cross section + :param lumi: Luminosity of the analysis [fb^-1] + :param s95exp: [pb] excluded xsec at 95% CLs calculated via expected events + :param s95obs: [pb] excluded xsec at 95% CLs calculated via observed events + :param cls: exclusion CLs + """ + super(TACORegion, self).__init__( + regionID=regionID, nobs=nobs, nb=nb, deltanb=deltanb, lumi=lumi, analysis=analysis + ) + self.final_nevt = Nf + self.initial_nevt = N0 + + # Temporary: SMODELS requires xsection to be a class with value attr + class XSEC: + def __init__(self, xsec): + self.value = xsec + + self.xsection = XSEC(xsection) + self.xsec = xsection + + self.s95exp = s95exp + self.s95obs = s95obs + self.cls = cls + + self.marginalize = False + + self.cachedLlhds = {"exp": {}, "obs": {}} + + @property + def analysisId(self): + return self.analysis + + @property + def dataType(self) -> Text: + return "efficiencyMap" + + def getUpperLimit(self, expected: bool = False) -> float: + """ + get exclusion cross section at 95% CLs + :param expected: is xsec value expected or observed + :return: float cross section in pb + """ + return getattr(self, "s95exp" if expected else "s95obs") + + def getRValue(self, expected: bool = False) -> float: + return self.getUpperLimit(expected) / self.xsection.value + + def likelihood( + self, + mu: float = 1.0, + expected: bool = False, + nll: bool = False, + useCached=False, + ) -> float: + """ + compute the likelihood for the signal strength mu + + :param mu: signal strength + :param expected: if true, compute expected likelihood, else observed. + if "posteriori", compute a posteriori expected likelihood + (FIXME do we need this?) + :param nll: if True, return the negative log likelihood instead of the + likelihood + """ + + if useCached: + cached = None + if expected: + cached = self.cachedLlhds["exp"].get(mu, None) + else: + cached = self.cachedLlhds["obs"].get(mu, None) + + if cached is not None: + return cached + + def get_twice_nllh(model, data, par_bounds): + try: + with warnings.catch_warnings(): + # `pyhf.infer.mle.fixed_poi_fit` returns twice negative log likelihood + _, twice_nllh = pyhf.infer.mle.fixed_poi_fit( + mu, data, model, return_fitted_val=True, + par_bounds = par_bounds + ) + return twice_nllh + except ValueError as err: + return "update bounds" + + model = pyhf.simplemodels.uncorrelated_background( + [self.nsignal], [self.nb], [self.deltanb] + ) + par_bounds = model.config.suggested_bounds() + # print(par_bounds) + data = [self.nobs if not expected else self.nb] + model.config.auxdata + + counter = 0 + while True: + twice_nllh = get_twice_nllh(model, data, par_bounds) + if twice_nllh == "update bounds": + par_bounds = [(par_bounds[0][0], par_bounds[0][1] * 2.), + (par_bounds[1][0], par_bounds[1][1] * 2.)] + # print(f"update bounds: {par_bounds}") + else: + break + counter += 1 + if counter > 3: + twice_nllh = -1 + break + + if nll: + return float(twice_nllh / 2.0) + + llhd = float(np.exp(-twice_nllh / 2.0)) + if expected: + cached = self.cachedLlhds["exp"][mu] = llhd + else: + cached = self.cachedLlhds["obs"][mu] = llhd + + return llhd + + +if __name__ == "__main__": + import argparse, json, os + from smodels.tools.theoryPredictionsCombiner import TheoryPredictionsCombiner + + parser = argparse.ArgumentParser(description="Test TACO interface") + path = parser.add_argument_group("Path handling") + path.add_argument("REGIONDATA", type=str, help="Path to the JSON sterilized region data file.") + + param = parser.add_argument_group("Parameter handling") + param.add_argument("-xs", "--xsection", type=float, dest="XSEC", help="Cross section in pb.") + param.add_argument("-lumi", type=float, dest="LUMI", help="Luminosity in fb^-1.") + + args = parser.parse_args() + if not os.path.isfile(args.REGIONDATA): + raise FileNotFoundError + + with open(args.REGIONDATA, "r") as f: + regiondata = json.load(f) + + SRs = [] + for analysis, regdat in regiondata.items(): + + for regionID, data in regdat["regiondata"].items(): + if "nobs" not in data.keys(): + continue + current_region = TACORegion( + analysis, + regionID, + float(data["nobs"]), + float(data["nb"]), + float(data["deltanb"]), + float(data["N0"]), + float(data["Nf"]), + args.XSEC, + args.LUMI, + float(data["s95exp"]), + float(data["s95obs"]), + float(data["CLs"]), + ) + SRs.append(current_region) + break + + print(f"Analysis : {analysis}") + + for region in SRs: + if region.final_nevt > 0.0: + print(f"\nRegion name: {region.region_id}") + print(f"Expected upper limit : {region.s95exp}") + print(f"Obs upper limit : {region.s95obs}") + print(f"R-value : {region.getRValue()}") + print(f"Likelihood mu = 1 : {region.likelihood(1.)}") + print(f"Likelihood mu = 0 : {region.likelihood(0.)}") + + combiner = TheoryPredictionsCombiner(SRs) + print("TheoryPredictionsCombiner: ") + print("ul", combiner.getUpperLimit(expected=False)) + print("expected ul", combiner.getUpperLimit(expected=True)) + print("r-value", combiner.getRValue()) From 461b66e682c0fa8ef4c2f4e872a2e34b06b96df6 Mon Sep 17 00:00:00 2001 From: jackaraz Date: Mon, 9 May 2022 13:58:54 +0200 Subject: [PATCH 02/34] correction on upper limit calculation --- madanalysis/misc/taco_interface.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/madanalysis/misc/taco_interface.py b/madanalysis/misc/taco_interface.py index 0aa70708..9b194bec 100644 --- a/madanalysis/misc/taco_interface.py +++ b/madanalysis/misc/taco_interface.py @@ -228,6 +228,6 @@ def get_twice_nllh(model, data, par_bounds): combiner = TheoryPredictionsCombiner(SRs) print("TheoryPredictionsCombiner: ") - print("ul", combiner.getUpperLimit(expected=False)) - print("expected ul", combiner.getUpperLimit(expected=True)) + print("ul", combiner.getUpperLimitOnMu(expected=False)) + print("expected ul", combiner.getUpperLimitOnMu(expected=True)) print("r-value", combiner.getRValue()) From c877d0e1654ba7be5734bd51dca13efecde511ab Mon Sep 17 00:00:00 2001 From: jackaraz Date: Mon, 9 May 2022 15:46:47 +0200 Subject: [PATCH 03/34] extend the test module of taco_interface.py --- madanalysis/misc/taco_interface.py | 38 +++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/madanalysis/misc/taco_interface.py b/madanalysis/misc/taco_interface.py index 9b194bec..2ea9a4e3 100644 --- a/madanalysis/misc/taco_interface.py +++ b/madanalysis/misc/taco_interface.py @@ -31,6 +31,7 @@ # TODO: remove pyhf dependency from TACO interface + class TACORegion(Region): def __init__( self, @@ -133,16 +134,13 @@ def get_twice_nllh(model, data, par_bounds): with warnings.catch_warnings(): # `pyhf.infer.mle.fixed_poi_fit` returns twice negative log likelihood _, twice_nllh = pyhf.infer.mle.fixed_poi_fit( - mu, data, model, return_fitted_val=True, - par_bounds = par_bounds + mu, data, model, return_fitted_val=True, par_bounds=par_bounds ) return twice_nllh except ValueError as err: return "update bounds" - model = pyhf.simplemodels.uncorrelated_background( - [self.nsignal], [self.nb], [self.deltanb] - ) + model = pyhf.simplemodels.uncorrelated_background([self.nsignal], [self.nb], [self.deltanb]) par_bounds = model.config.suggested_bounds() # print(par_bounds) data = [self.nobs if not expected else self.nb] + model.config.auxdata @@ -151,8 +149,10 @@ def get_twice_nllh(model, data, par_bounds): while True: twice_nllh = get_twice_nllh(model, data, par_bounds) if twice_nllh == "update bounds": - par_bounds = [(par_bounds[0][0], par_bounds[0][1] * 2.), - (par_bounds[1][0], par_bounds[1][1] * 2.)] + par_bounds = [ + (par_bounds[0][0], par_bounds[0][1] * 2.0), + (par_bounds[1][0], par_bounds[1][1] * 2.0), + ] # print(f"update bounds: {par_bounds}") else: break @@ -176,6 +176,7 @@ def get_twice_nllh(model, data, par_bounds): if __name__ == "__main__": import argparse, json, os from smodels.tools.theoryPredictionsCombiner import TheoryPredictionsCombiner + from smodels.tools.statistics import rootFromNLLs parser = argparse.ArgumentParser(description="Test TACO interface") path = parser.add_argument_group("Path handling") @@ -226,8 +227,29 @@ def get_twice_nllh(model, data, par_bounds): print(f"Likelihood mu = 1 : {region.likelihood(1.)}") print(f"Likelihood mu = 0 : {region.likelihood(0.)}") + def clsRoot(mu, combiner, expected=False): + # at - infinity this should be .95, + # at + infinity it should -.05 + # Make sure to always compute the correct llhd value (from theoryPrediction) + # and not used the cached value (which is constant for mu~=1 an mu~=0) + + mu_hat, sigma_mu, lmax = combiner.findMuHat(allowNegativeSignals=True, extended_output=True) + nll0 = combiner.likelihood(mu_hat, expected=expected, nll=True) + # a posteriori expected is needed here + # mu_hat is mu_hat for signal_rel + mu_hatA, _, nll0A = combiner.findMuHat( + expected="posteriori", nll=True, extended_output=True + ) + + nll = combiner.likelihood(mu, nll=True, expected=expected, useCached=False) + nllA = combiner.likelihood(mu, expected="posteriori", nll=True, useCached=False) + ret = rootFromNLLs(nllA, nll0A, nll, nll0) + return ret + combiner = TheoryPredictionsCombiner(SRs) - print("TheoryPredictionsCombiner: ") + print("\nTheoryPredictionsCombiner: ") print("ul", combiner.getUpperLimitOnMu(expected=False)) print("expected ul", combiner.getUpperLimitOnMu(expected=True)) print("r-value", combiner.getRValue()) + print(f"expected 1-CLs : {clsRoot(1., combiner, True)}") + print(f"obs 1-CLs : {clsRoot(1., combiner, False)}") From 3cd9ef0c564f6a23adcfd854de36bd4e0316163e Mon Sep 17 00:00:00 2001 From: jackaraz Date: Mon, 9 May 2022 18:38:05 +0200 Subject: [PATCH 04/34] add bests --- madanalysis/misc/taco_interface.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/madanalysis/misc/taco_interface.py b/madanalysis/misc/taco_interface.py index 2ea9a4e3..6de94cf0 100644 --- a/madanalysis/misc/taco_interface.py +++ b/madanalysis/misc/taco_interface.py @@ -47,6 +47,7 @@ def __init__( s95exp: float, s95obs: float, cls: float, + best: bool = None # we might not need this ) -> None: """ Initialization of MadAnalysis Signal Region @@ -77,6 +78,7 @@ def __init__(self, xsec): self.s95exp = s95exp self.s95obs = s95obs self.cls = cls + self.best = best self.marginalize = False @@ -251,5 +253,5 @@ def clsRoot(mu, combiner, expected=False): print("ul", combiner.getUpperLimitOnMu(expected=False)) print("expected ul", combiner.getUpperLimitOnMu(expected=True)) print("r-value", combiner.getRValue()) - print(f"expected 1-CLs : {clsRoot(1., combiner, True)}") - print(f"obs 1-CLs : {clsRoot(1., combiner, False)}") + print(f"expected 1-CLs : {1. - clsRoot(1., combiner, True)}") + print(f"obs 1-CLs : {1. - clsRoot(1., combiner, False)}") From 5cc6213f49340f8a7b9545d178aa46338b07260c Mon Sep 17 00:00:00 2001 From: jackaraz Date: Thu, 12 May 2022 09:45:38 +0200 Subject: [PATCH 05/34] update simplified_likelihood.py according to recent updates in SModelS: see combo_fixes branch --- madanalysis/misc/simplified_likelihood.py | 1687 +++++++++++++-------- 1 file changed, 1024 insertions(+), 663 deletions(-) diff --git a/madanalysis/misc/simplified_likelihood.py b/madanalysis/misc/simplified_likelihood.py index beb0e679..5a7d7413 100644 --- a/madanalysis/misc/simplified_likelihood.py +++ b/madanalysis/misc/simplified_likelihood.py @@ -1,137 +1,234 @@ ################################################################################ -# +# # Copyright (C) 2012-2022 Jack Araz, Eric Conte & Benjamin Fuks # The MadAnalysis development team, email: -# +# # This file is part of MadAnalysis 5. # Official website: -# +# # MadAnalysis 5 is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. -# +# # MadAnalysis 5 is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# +# # You should have received a copy of the GNU General Public License # along with MadAnalysis 5. If not, see -# +# ################################################################################ - #!/usr/bin/env python3 + """ -.. module:: simplified_likelihood +.. module:: simplifiedLikelihoods :synopsis: Code that implements the simplified likelihoods as presented in CMS-NOTE-2017-001, see https://cds.cern.ch/record/2242860. - Copied from the SModelS v1.2 simplifiedLikelihood.py module (arXiv:1811.10624) + In collaboration with Andy Buckley, Sylvain Fichet and Nickolas Wardle. + Simplified likelihoods v2 (JHEP_021P_1018) are partly implemented. + Adapted from SModelS .. moduleauthor:: Wolfgang Waltenberger """ -from __future__ import print_function -from __future__ import absolute_import -from scipy import stats, optimize, integrate, special -from scipy import __version__ as scipy_version -from numpy import sqrt, exp, log, sign, array, ndarray +from scipy import stats, optimize, integrate, special, linalg +from numpy import sqrt, exp, log, sign, array, ndarray from functools import reduce -import numpy as NP -import math, copy, logging + +# from smodels.tools.statistics import rootFromNLLs, determineBrentBracket + +import numpy as np +import math import copy -from six.moves import range -from six.moves import zip +import logging + +logger = logging.getLogger("MA5") + +# rootFromNLLs, determineBrentBracket are retreived from smodels.tools.statistics +def rootFromNLLs(nllA, nll0A, nll, nll0): + """compute the CLs - alpha from the NLLs""" + qmu = 2 * (nll - nll0) + if qmu < 0.0: + qmu = 0.0 + sqmu = np.sqrt(qmu) + qA = 2 * (nllA - nll0A) + if qA < 0.0: + qA = 0.0 + sqA = np.sqrt(qA) + CLsb = 1.0 - stats.multivariate_normal.cdf(sqmu) + CLb = 0.0 + if qA >= qmu: + CLb = stats.multivariate_normal.cdf(sqA - sqmu) + else: + if qA == 0.0: + CLsb = 1.0 + CLb = 1.0 + else: + CLsb = 1.0 - stats.multivariate_normal.cdf((qmu + qA) / (2 * sqA)) + CLb = 1.0 - stats.multivariate_normal.cdf((qmu - qA) / (2 * sqA)) + CLs = 0.0 + if CLb > 0.0: + CLs = CLsb / CLb + cl = 0.95 + root = CLs - 1.0 + cl + return root + + +def determineBrentBracket(mu_hat, sigma_mu, rootfinder): + """find a, b for brent bracketing + :param mu_hat: mu that maximizes likelihood + :param sigm_mu: error on mu_hat (not too reliable) + :param rootfinder: function that finds the root (usually root_func) + """ + sigma_mu = max(sigma_mu, 0.5) # there is a minimum on sigma_mu + # the root should be roughly at mu_hat + 2*sigma_mu + a = mu_hat + 1.5 * sigma_mu + ntrials = 20 + i = 0 + foundExtra = False + while rootfinder(a) < 0.0: + # if this is negative, we move it to the left + i += 1 + a -= (i**2.0) * sigma_mu + if i > ntrials or a < -10000.0: + for a in [0.0, 1.0, -1.0, 3.0, -3.0, 10.0, -10.0, 0.1, -0.1, 0.01, -0.01]: + if rootfinder(a) > 0.0: + foundExtra = True + break + if not foundExtra: + logger.error( + f"cannot find an a that is left of the root. last attempt, a={a:.2f}, root = {rootfinder(a):.2f}." + ) + logger.error(f"mu_hat={mu_hat:.2f}, sigma_mu={sigma_mu:.2f}") + return 0.0, 0.0 + i = 0 + foundExtra = False + b = mu_hat + 2.5 * sigma_mu + while rootfinder(b) > 0.0: + # if this is positive, we move it to the right + i += 1 + b += (i**2.0) * sigma_mu + closestr, closest = float("inf"), None + if i > ntrials: + for b in [1.0, 0.0, 3.0, -1.0, 10.0, -3.0, 0.1, -0.1, -10.0, 100.0, -100.0, 1000.0]: + root = rootfinder(b) + if root < 0.0: + foundExtra = True + break + if root < closestr: + closestr = root + closest = b + if not foundExtra: + logger.error(f"cannot find a b that is right of the root (i.e. rootfinder(b) < 0).") + logger.error(f"closest to zero rootfinder({closest})={closestr}") + logger.error(f"mu_hat was at {mu_hat:.2f} sigma_mu at {sigma_mu:.2f}") + return 0.0, 0.0 + return a, b -logger = logging.getLogger('MA5') class Data: - """ A very simple observed container to collect all the data - needed to fully define a specific statistical model """ - - def __init__(self, observed, backgrounds, covariance, third_moment=None, - nsignal=None, name="model", deltas_rel = 0.2): + """A very simple observed container to collect all the data + needed to fully define a specific statistical model""" + + def __init__( + self, + observed, + backgrounds, + covariance, + third_moment=None, + nsignal=None, + name="model", + deltas_rel=0.2, + lumi=None, + ): """ :param observed: number of observed events per dataset :param backgrounds: expected bg per dataset :param covariance: uncertainty in background, as a covariance matrix :param nsignal: number of signal events in each dataset :param name: give the model a name, just for convenience - :param deltas_rel: the assumed relative error on the signal hypotheses. + :param deltas_rel: the assumed relative error on the signal hypotheses. The default is 20%. + :param lumi: luminosity of dataset in 1/fb, or None """ - self.observed = NP.around(self.convert(observed)) #Make sure observed number of events are integers + self.observed = self.convert(observed) # Make sure observed number of events are integers + ## self.observed = np.around(self.convert(observed)) #Make sure observed number of events are integers self.backgrounds = self.convert(backgrounds) self.n = len(self.observed) self.covariance = self._convertCov(covariance) self.nsignal = self.convert(nsignal) + self.lumi = lumi if self.nsignal is None: - self.signal_rel = self.convert(1.) + if len(self.backgrounds) == 1: + # doesnt matter, does it? + self.nsignal = np.array([1.0]) + self.signal_rel = self.convert(1.0) elif self.nsignal.sum(): - self.signal_rel = self.nsignal/self.nsignal.sum() - else: - self.signal_rel = array([0.]*len(self.nsignal)) - + self.signal_rel = self.nsignal / self.nsignal.sum() + else: + self.signal_rel = array([0.0] * len(self.nsignal)) + self.third_moment = self.convert(third_moment) - if type(self.third_moment) != type(None) and NP.sum([ abs(x) for x in self.third_moment ]) < 1e-10: + if ( + type(self.third_moment) != type(None) + and np.sum([abs(x) for x in self.third_moment]) < 1e-10 + ): self.third_moment = None self.name = name self.deltas_rel = deltas_rel self._computeABC() - # Checking the scip version - v = float(scipy_version.split(".")[0]) - if v < 1.0: - logger.warning("You're using an old version of scipy. This might impact the global CLs calculation.") - - def totalCovariance ( self, nsig ): - """ get the total covariance matrix, taking into account - also signal uncertainty for the signal hypothesis . + + def totalCovariance(self, nsig): + """get the total covariance matrix, taking into account + also signal uncertainty for the signal hypothesis . If nsig is None, the predefined signal hypothesis is taken. """ if self.isLinear(): - cov_tot = self.V + self.var_s( nsig ) + cov_tot = self.V + self.var_s(nsig) else: - cov_tot = self.covariance+ self.var_s(nsig) + cov_tot = self.covariance + self.var_s(nsig) return cov_tot - def zeroSignal(self): """ Is the total number of signal events zero? """ - - return len(self.nsignal[self.nsignal>0.]) == 0 + if self.nsignal is None: + return True + return len(self.nsignal[self.nsignal > 0.0]) == 0 - def var_s(self,nsig=None): + def var_s(self, nsig=None): """ The signal variances. Convenience function. - + :param nsig: If None, it will use the model expected number of signal events, otherwise will return the variances for the input value using the relative signal uncertainty defined for the model. - + """ - + if nsig is None: nsig = self.nsignal else: nsig = self.convert(nsig) - return NP.diag((nsig*self.deltas_rel)**2) + return np.diag((nsig * self.deltas_rel) ** 2) def isScalar(self, obj): """ Determine if obj is a scalar (float or int) """ - - if type(obj) == ndarray: + + if isinstance(obj, ndarray): ## need to treat separately since casting array([0.]) to float works return False try: _ = float(obj) return True - except: + except (ValueError, TypeError): pass return False @@ -141,7 +238,7 @@ def convert(self, obj): If object is a float or int, it is converted to a one element array. """ - + if type(obj) == type(None): return obj if self.isScalar(obj): @@ -152,18 +249,18 @@ def __str__(self): return self.name + " (%d dims)" % self.n def _convertCov(self, obj): - + if self.isScalar(obj): - return array ( [ [ obj ] ] ) - if type(obj[0]) == list: - return array ( obj ) - if type(obj[0]) == float: + return array([[obj]]) + if isinstance(obj[0], list): + return array(obj) + if isinstance(obj[0], float): ## if the matrix is flattened, unflatten it. - return array([ obj[self.n*i:self.n*(i+1)] for i in range(self.n)]) - + return array([obj[self.n * i : self.n * (i + 1)] for i in range(self.n)]) + return obj - def _computeABC( self ): + def _computeABC(self): """ Compute the terms A, B, C, rho, V. Corresponds with Eqs. 1.27-1.30 in the second paper. @@ -176,202 +273,330 @@ def _computeABC( self ): return covD = self.diagCov() - C=[] - for m2,m3 in zip(covD, self.third_moment): - if m3 == 0.: + C = [] + for m2, m3 in zip(covD, self.third_moment): + if m3 == 0.0: m3 = 1e-30 - k = -NP.sign(m3)*sqrt(2.*m2 ) - dm = sqrt ( 8.*m2**3/m3**2 - 1. ) - C.append( k*NP.cos ( 4.*NP.pi/3. + NP.arctan(dm) / 3. )) - - self.C=NP.array(C) ## C, as define in Eq. 1.27 (?) in the second paper - self.B = sqrt( covD - 2*self.C**2 ) ## B, as defined in Eq. 1.28(?) - self.A = self.backgrounds - self.C ## A, Eq. 1.30(?) - self.rho = NP.array( [ [0.]*self.n ]*self.n ) ## Eq. 1.29 (?) + k = -np.sign(m3) * sqrt(2.0 * m2) + dm = sqrt(8.0 * m2**3 / m3**2 - 1.0) + C.append(k * np.cos(4.0 * np.pi / 3.0 + np.arctan(dm) / 3.0)) + + self.C = np.array(C) ## C, as define in Eq. 1.27 (?) in the second paper + self.B = sqrt(covD - 2 * self.C**2) ## B, as defined in Eq. 1.28(?) + self.A = self.backgrounds - self.C ## A, Eq. 1.30(?) + self.rho = np.array([[0.0] * self.n] * self.n) ## Eq. 1.29 (?) for x in range(self.n): - for y in range(x,self.n): - bxby=self.B[x]*self.B[y] - cxcy=self.C[x]*self.C[y] - e=(4.*cxcy)**(-1)*(sqrt( bxby**2+8*cxcy*self.covariance[x][y])-bxby) - self.rho[x][y]=e - self.rho[y][x]=e + for y in range(x, self.n): + bxby = self.B[x] * self.B[y] + cxcy = self.C[x] * self.C[y] + e = (4.0 * cxcy) ** (-1) * ( + sqrt(bxby**2 + 8 * cxcy * self.covariance[x][y]) - bxby + ) + self.rho[x][y] = e + self.rho[y][x] = e self.sandwich() # self.V = sandwich ( self.B, self.rho ) - def sandwich( self ): + def sandwich(self): """ Sandwich product """ - - ret = NP.array ( [ [0.]*len(self.B) ]*len(self.B) ) + + ret = np.array([[0.0] * len(self.B)] * len(self.B)) for x in range(len(self.B)): - for y in range(x,len(self.B)): - T=self.B[x]*self.B[y]*self.rho[x][y] - ret[x][y]=T - ret[y][x]=T + for y in range(x, len(self.B)): + T = self.B[x] * self.B[y] * self.rho[x][y] + ret[x][y] = T + ret[y][x] = T self.V = ret def isLinear(self): """ Statistical model is linear, i.e. no quadratic term in poissonians """ - + return type(self.C) == type(None) def diagCov(self): """ Diagonal elements of covariance matrix. Convenience function. """ - - return NP.diag( self.covariance ) + + return np.diag(self.covariance) def correlations(self): """ Correlation matrix, computed from covariance matrix. - Convenience function. + Convenience function. """ - + if hasattr(self, "corr"): return self.corr - + self.corr = copy.deepcopy(self.covariance) for x in range(self.n): - self.corr[x][x]=1. - for y in range(x+1,self.n): - rho=self.corr[x][y]/sqrt(self.covariance[x][x]*self.covariance[y][y]) - self.corr[x][y]=rho - self.corr[y][x]=rho + self.corr[x][x] = 1.0 + for y in range(x + 1, self.n): + rho = self.corr[x][y] / sqrt(self.covariance[x][x] * self.covariance[y][y]) + self.corr[x][y] = rho + self.corr[y][x] = rho return self.corr def signals(self, mu): """ Returns the number of expected signal events, for all datasets, given total signal strength mu. - + :param mu: Total number of signal events summed over all datasets. """ - - return (mu*self.nsignal) + + return mu * self.signal_rel + class LikelihoodComputer: - def __init__(self, data, ntoys = 10000 ): + + debug_mode = False + + def __init__(self, data, toys=30000): """ :param data: a Data object. - :param ntoys: number of toys when marginalizing + :param toys: number of toys when marginalizing """ - + self.model = data - self.ntoys = ntoys + self.toys = toys - def dLdMu(self, mu, signal_rel, theta_hat): + def dNLLdMu(self, mu, signal_rel, theta_hat): """ - d (ln L)/d mu, if L is the likelihood. The function + d (- ln L)/d mu, if L is the likelihood. The function whose root gives us muhat, i.e. the mu that maximizes the likelihood. - + :param mu: total number of signal events :param signal_rel: array with the relative signal strengths for each dataset (signal region) :param theta_hat: array with nuisance parameters - - """ - - #Define relative signal strengths: - denominator = mu*signal_rel + self.model.backgrounds + theta_hat - for ctr,d in enumerate(denominator): - if d == 0.: - if (self.model.observed[ctr]*signal_rel[ctr]) == 0.: - # logger.debug("zero denominator, but numerator also zero, so we set denom to 1.") - denominator[ctr]=1. + """ + if not self.model.isLinear(): + logger.debug("implemented only for linear model") + # n_pred^i := mu s_i + b_i + theta_i + # NLL = sum_i [ - n_obs^i * ln ( n_pred^i ) + n_pred^i ] + # d NLL / d mu = sum_i [ - ( n_obs^i * s_ i ) / n_pred_i + s_i ] + + # Define relative signal strengths: + n_pred = mu * signal_rel + self.model.backgrounds + theta_hat + + for ctr, d in enumerate(n_pred): + if d == 0.0: + if (self.model.observed[ctr] * signal_rel[ctr]) == 0.0: + # logger.debug("zero denominator, but numerator also zero, so we set denom to 1.") + n_pred[ctr] = 1e-5 else: - raise Exception("we have a zero value in the denominator at pos %d, with a non-zero numerator. dont know how to handle." % ctr) - ret = self.model.observed*signal_rel/denominator - signal_rel - - if type(ret) in [ array, ndarray, list ]: + # n_pred[ctr]=1e-5 + raise Exception( + "we have a zero value in the denominator at pos %d, with a non-zero numerator. dont know how to handle." + % ctr + ) + ret = -self.model.observed * signal_rel / n_pred + signal_rel + + if type(ret) in [array, ndarray, list]: ret = sum(ret) return ret - def findMuHat(self, signal_rel): + def extendedOutput(self, extended_output, default=None): + if extended_output: + ret = {"muhat": default, "sigma_mu": default, "lmax": default} + return ret + return default + + def findAvgr(self, mu, theta_hat, signal_rel): + """from the difference observed - background, find got inital + values for lower and upper""" + mu_c = self.model.observed - self.model.backgrounds - theta_hat + mu_r, wmu_r = [], [] + n_pred = mu * signal_rel + self.model.backgrounds + theta_hat + obs = self.model.observed + for i, s in enumerate(n_pred): + if s == 0.0: + n_pred[i] = 1e-6 + hessian = self.model.observed * signal_rel**2 / n_pred**2 + wtot = 0.0 + for s in zip(mu_c, signal_rel, hessian): + if s[1] > 1e-10: + w = 1.0 + if s[2] > 0.0: + w = s[2] + wtot += w + mu_r.append(s[0] / s[1]) + wmu_r.append(w * s[0] / s[1]) + ret = min(mu_r), sum(wmu_r) / wtot, max(mu_r) + return ret + + def findMuHat( + self, nsig, allowNegativeSignals=False, extended_output=False, nll=False, marginalize=False + ): """ Find the most likely signal strength mu given the relative signal strengths in each dataset (signal region). - - :param signal_rel: array with relative signal strengths - - :returns: mu_hat, the total signal yield. + + :param nsig: array with relative signal strengths or signal yields + :param allowNegativeSignals: if true, then also allow for negative values + :param extended_output: if true, return also sigma_mu, the estimate of the error of mu_hat, and lmax, the likelihood at mu_hat + :param nll: if true, return nll instead of lmax in the extended output + + :returns: mu_hat, i.e. the maximum likelihood estimate of mu, if extended output is requested, it returns mu_hat, sigma_mu -- the standard deviation around mu_hat, and llhd, the likelihood at mu_hat """ - if (self.model.backgrounds == self.model.observed).all(): - return 0. - - if type(signal_rel) in [list, ndarray]: - signal_rel = array(signal_rel) - - signal_rel[signal_rel==0.] = 1e-20 - if sum(signal_rel<0.): + return self.extendedOutput(extended_output, 0.0) + + if type(nsig) in [list, ndarray]: + nsig = array(nsig) + + nsig[nsig == 0.0] = 1e-20 + if sum(nsig < 0.0): raise Exception("Negative relative signal strengths!") ## we need a very rough initial guess for mu(hat), to come ## up with a first theta - self.nsig = array([0.]*len(self.model.observed)) + # self.nsig = array([0.]*len(self.model.observed)) + self.nsig = nsig ## we start with theta_hat being all zeroes - theta_hat = array([0.]*len(self.model.observed)) - mu_hat_old, mu_hat = 0., 1. - ctr=0 - widener=3. - while abs(mu_hat - mu_hat_old)>1e-10 and abs(mu_hat - mu_hat_old )/(mu_hat+mu_hat_old) > .5e-2 and ctr < 20: - ctr+=1 + # theta_hat = array([0.]*len(self.model.observed)) + mu_hat_old, mu_hat = 0.0, 1.0 + ctr = 0 + widener = 3.0 + while ( + abs(mu_hat - mu_hat_old) > 1e-10 + and abs(mu_hat - mu_hat_old) / (mu_hat + mu_hat_old) > 0.5e-2 + and ctr < 20 + ): + theta_hat, _ = self.findThetaHat(mu_hat * nsig) + ctr += 1 mu_hat_old = mu_hat - #logger.info ( "theta hat[%d]=%s" % (ctr,list( theta_hat[:11] ) ) ) - #logger.info ( " mu hat[%d]=%s" % (ctr, mu_hat ) ) - mu_c = NP.abs(self.model.observed - self.model.backgrounds - theta_hat)/signal_rel + minr, avgr, maxr = self.findAvgr(mu_hat, theta_hat, nsig) + # for i,s in enumerate ( signal_rel ): + # if abs(s) < 1e-19: + # mu_c[i]=0. ## find mu_hat by finding the root of 1/L dL/dmu. We know ## that the zero has to be between min(mu_c) and max(mu_c). - lower,upper = 0.,widener*max(mu_c) - lower_v = self.dLdMu(lower, signal_rel, theta_hat) - upper_v = self.dLdMu(upper, signal_rel, theta_hat) - total_sign = NP.sign(lower_v * upper_v) - if total_sign > -.5: - if upper_v < lower_v < 0.: - ## seems like we really want to go for mu_hat = 0. - return 0. - logger.debug ( "weird. cant find a zero in the Brent bracket "\ - "for finding mu(hat). Let me try with a very small" - " value." ) - lower = 1e-4*max(mu_c) - lower_v = self.dLdMu( lower, signal_rel, theta_hat ) - total_sign = NP.sign( lower_v * upper_v ) - if total_sign > -.5: - logger.debug ( "cant find zero in Brentq bracket. l,u,ctr=%s,%s,%s" % \ - ( lower, upper, ctr ) ) - widener=widener*1.5 - continue - mu_hat = optimize.brentq ( self.dLdMu, lower, upper, args=(signal_rel, theta_hat ) ) - theta_hat,_ = self.findThetaHat( mu_hat*signal_rel) - ctr+=1 - + lstarters = [ + avgr - 0.2 * abs(avgr), + minr, + 0.0, + -1.0, + 1.0, + 10.0, + -0.1, + 0.1, + -100.0, + 100.0, + -1000.0, + ] + closestl, closestr = None, float("inf") + for lower in lstarters: + lower_v = self.dNLLdMu(lower, nsig, theta_hat) + if lower_v < 0.0: + break + if lower_v < closestr: + closestl, closestr = lower, lower_v + if lower_v > 0.0: + logger.debug( + f"did not find a lower value with rootfinder(lower) < 0. Closest: f({closestl})={closestr}" + ) + return self.extendedOutput(extended_output, 0.0) + ustarters = [ + avgr + 0.2 * abs(avgr), + maxr, + 0.0, + 1.0, + 10.0, + -1.0 - 0.1, + 0.1, + 100.0, + -100.0, + 1000.0, + -1000.0, + 0.01, + -0.01, + ] + closestl, closestr = None, float("inf") + for upper in ustarters: + upper_v = self.dNLLdMu(upper, nsig, theta_hat) + if upper_v > 0.0: + break + if upper_v < closestr: + closestl, closestr = upper, upper_v + if upper_v < 0.0: + logger.debug("did not find an upper value with rootfinder(upper) > 0.") + return self.extendedOutput(extended_output, 0.0) + mu_hat = optimize.brentq(self.dNLLdMu, lower, upper, args=(nsig, theta_hat), rtol=1e-9) + if not allowNegativeSignals and mu_hat < 0.0: + mu_hat = 0.0 + theta_hat, _ = self.findThetaHat(mu_hat * nsig) + if self.debug_mode: + self.theta_hat = theta_hat + + # print ( f">> found mu_hat {mu_hat:.2g} for signal {sum(signal_rel):.2f} allowNeg {allowNegativeSignals} l,u={lower},{upper} lastavgr={avgr}" ) + # print ( f">>>> obs {self.model.observed[:2]} bg {self.model.backgrounds[:2]}" ) + # print ( f">>>> mu_c {np.mean(mu_c)}" ) + if extended_output: + sigma_mu = self.getSigmaMu(mu_hat, nsig, theta_hat) + llhd = self.likelihood(self.model.signals(mu_hat), marginalize=marginalize, nll=nll) + ret = {"muhat": mu_hat, "sigma_mu": sigma_mu, "lmax": llhd} + return ret return mu_hat - def getSigmaMu(self, signal_rel): + def getSigmaMu(self, mu, nsig, theta_hat): """ - Get a rough estimate for the variance of mu around mu_max. - - :param signal_rel: array with relative signal strengths in each dataset (signal region) + Get an estimate for the standard deviation of mu at , from + the inverse hessian + + :param nsig: array with signal yields or relative signal strengths + in each dataset (signal region) + """ + if not self.model.isLinear(): + logger.debug("implemented only for linear model") + # d^2 mu NLL / d mu^2 = sum_i [ n_obs^i * s_i**2 / n_pred^i**2 ] + + # Define relative signal strengths: + n_pred = mu * nsig + self.model.backgrounds + theta_hat + + for ctr, d in enumerate(n_pred): + if d == 0.0: + if (self.model.observed[ctr] * nsig[ctr]) == 0.0: + # logger.debug("zero denominator, but numerator also zero, so we set denom to 1.") + n_pred[ctr] = 1.0 + else: + raise Exception( + "we have a zero value in the denominator at pos %d, with a non-zero numerator. dont know how to handle." + % ctr + ) + hessian = self.model.observed * nsig**2 / n_pred**2 + + if type(hessian) in [array, ndarray, list]: + hessian = sum(hessian) + # the error is the square root of the inverse of the hessian + if hessian == 0.0: + # if all observations are zero, we replace them by the expectations + if sum(self.model.observed) == 0: + hessian = sum(nsig**2 / n_pred) + stderr = float(np.sqrt(1.0 / hessian)) + return stderr """ - if type(signal_rel) in [ list, ndarray ]: - s_effs = sum(signal_rel) - - sgm_mu = sqrt(sum(self.model.observed) + sum(NP.diag(self.model.covariance)))/s_effs - + if type(nsig) in [ list, ndarray ]: + s_effs = sum(nsig) + + sgm_mu = float(sqrt(sum(self.model.observed) + sum(np.diag(self.model.covariance)))/s_effs) + return sgm_mu + """ - #Define integrand (gaussian_(bg+signal)*poisson(nobs)): + # Define integrand (gaussian_(bg+signal)*poisson(nobs)): # def prob(x0, x1 ) - - def probMV(self, nll, theta ): - """ probability, for nuicance parameters theta - :params nll: if True, compute negative log likelihood """ + def probMV(self, nll, theta): + """probability, for nuicance parameters theta + :params nll: if True, compute negative log likelihood""" # theta = array ( thetaA ) # ntot = self.model.backgrounds + self.nsig # lmbda = theta + self.ntot ## the lambda for the Poissonian @@ -379,301 +604,357 @@ def probMV(self, nll, theta ): lmbda = self.model.backgrounds + self.nsig + theta else: lmbda = self.nsig + self.model.A + theta + self.model.C * theta**2 / self.model.B**2 - lmbda[lmbda<=0.] = 1e-30 ## turn zeroes to small values + lmbda[lmbda <= 0.0] = 1e-30 ## turn zeroes to small values + obs = self.model.observed + + def is_integer(x): + if type(x) in [int, np.int64]: + return True + if type(x) in [float]: + return x.is_integer() + return False + + ## not needed for now + allintegers = np.all([is_integer(i) for i in obs]) if nll: - #poisson = self.model.observed * log ( lmbda ) - lmbda #- self.gammaln - poisson = stats.poisson.logpmf( self.model.observed, lmbda ) - #print ( "p",poisson,poisson2 ) + if allintegers: + poisson = stats.poisson.logpmf(obs, lmbda) + else: + poisson = -lmbda + obs * np.log(lmbda) - special.loggamma(obs + 1) else: - poisson = stats.poisson.pmf( self.model.observed, lmbda ) - #print ( "nonll",poisson ) + if allintegers: + poisson = stats.poisson.pmf(obs, lmbda) + else: + # poisson = np.exp(-lmbda)*np.power(lmbda,obs)/special.gamma(obs+1) + logpoiss = -lmbda + obs * np.log(lmbda) - special.loggamma(obs + 1) + poisson = np.exp(logpoiss) try: - M = [0.]*len(theta) + M = [0.0] * len(theta) C = self.model.V - if self.model.n == 1: - C = self.model.totalCovariance(self.nsig) + # if self.model.n == 1: I think not a good idea + # C = self.model.totalCovariance(self.nsig) + dTheta = theta - M + expon = -0.5 * np.dot(np.dot(dTheta, self.weight), dTheta) + self.logcoeff + # print ( "expon", expon, "coeff", self.coeff ) if nll: - gaussian = stats.multivariate_normal.logpdf(theta,mean=M,cov=C) - ret = - gaussian - sum(poisson) + gaussian = expon # np.log ( self.coeff ) + # gaussian2 = stats.multivariate_normal.logpdf(theta,mean=M,cov=C) + ret = -gaussian - sum(poisson) else: - gaussian = stats.multivariate_normal.pdf(theta,mean=M,cov=C) - ret = gaussian * ( reduce(lambda x, y: x*y, poisson) ) + gaussian = np.exp(expon) + # gaussian = self.coeff * np.exp ( expon ) + # gaussian2 = stats.multivariate_normal.pdf(theta,mean=M,cov=C) + ret = gaussian * (reduce(lambda x, y: x * y, poisson)) return ret except ValueError as e: - raise Exception("ValueError %s, %s" % ( e, self.model.totalCovariance(self.nsig) )) + raise Exception("ValueError %s, %s" % (e, self.model.V)) + # raise Exception("ValueError %s, %s" % ( e, self.model.totalCovariance(self.nsig) )) # raise Exception("ValueError %s, %s" % ( e, self.model.V )) - def nll( self, theta ): - """ probability, for nuicance parameters theta, - as a negative log likelihood. """ - return self.probMV(True,theta) + def nllOfNuisances(self, theta): + """probability, for nuicance parameters theta, + as a negative log likelihood.""" + return self.probMV(True, theta) - def nllprime( self, theta ): - """ the derivative of nll as a function of the thetas. - Makes it easier to find the maximum likelihood. """ + def dNLLdTheta(self, theta): + """the derivative of nll as a function of the thetas. + Makes it easier to find the maximum likelihood.""" if self.model.isLinear(): xtot = theta + self.model.backgrounds + self.nsig - xtot[xtot<=0.] = 1e-30 ## turn zeroes to small values - nllp_ = self.ones - self.model.observed / xtot + NP.dot( theta , self.weight ) + xtot[xtot <= 0.0] = 1e-30 ## turn zeroes to small values + nllp_ = self.ones - self.model.observed / xtot + np.dot(theta, self.weight) return nllp_ lmbda = self.nsig + self.model.A + theta + self.model.C * theta**2 / self.model.B**2 - lmbda[lmbda<=0.] = 1e-30 ## turn zeroes to small values - # nllp_ = ( self.ones - self.model.observed / lmbda + NP.dot( theta , self.weight ) ) * ( self.ones + 2*self.model.C * theta / self.model.B**2 ) - T=self.ones + 2*self.model.C/self.model.B**2*theta - nllp_ = T - self.model.observed / lmbda * ( T ) + NP.dot( theta , self.weight ) + lmbda[lmbda <= 0.0] = 1e-30 ## turn zeroes to small values + # nllp_ = ( self.ones - self.model.observed / lmbda + np.dot( theta , self.weight ) ) * ( self.ones + 2*self.model.C * theta / self.model.B**2 ) + T = self.ones + 2 * self.model.C / self.model.B**2 * theta + nllp_ = T - self.model.observed / lmbda * (T) + np.dot(theta, self.weight) return nllp_ - def nllHess( self, theta ): - """ the Hessian of nll as a function of the thetas. - Makes it easier to find the maximum likelihood. """ + def d2NLLdTheta2(self, theta): + """the Hessian of nll as a function of the thetas. + Makes it easier to find the maximum likelihood.""" # xtot = theta + self.ntot if self.model.isLinear(): xtot = theta + self.model.backgrounds + self.nsig - xtot[xtot<=0.] = 1e-30 ## turn zeroes to small values - nllh_ = self.weight + NP.diag ( self.model.observed / (xtot**2) ) + xtot[xtot <= 0.0] = 1e-30 ## turn zeroes to small values + nllh_ = self.weight + np.diag(self.model.observed / (xtot**2)) return nllh_ lmbda = self.nsig + self.model.A + theta + self.model.C * theta**2 / self.model.B**2 - lmbda[lmbda<=0.] = 1e-30 ## turn zeroes to small values - T=self.ones + 2*self.model.C/self.model.B**2*theta + lmbda[lmbda <= 0.0] = 1e-30 ## turn zeroes to small values + T = self.ones + 2 * self.model.C / self.model.B**2 * theta # T_i = 1_i + 2*c_i/B_i**2 * theta_i - nllh_ = self.weight + NP.diag ( self.model.observed * T**2 / (lmbda**2) ) - NP.diag ( self.model.observed / lmbda * 2 * self.model.C / self.model.B**2 ) + NP.diag ( 2*self.model.C/self.model.B**2 ) + nllh_ = ( + self.weight + + np.diag(self.model.observed * T**2 / (lmbda**2)) + - np.diag(self.model.observed / lmbda * 2 * self.model.C / self.model.B**2) + + np.diag(2 * self.model.C / self.model.B**2) + ) return nllh_ - def getThetaHat(self, nobs, nb, nsig, covb, max_iterations ): - """ Compute nuisance parameter theta that - maximizes our likelihood (poisson*gauss). """ - self.nsig = nsig - sigma2 = covb + self.model.var_s(nsig) ## NP.diag ( (self.model.deltas)**2 ) - ## for now deal with variances only - ntot = nb + nsig - cov = NP.array(sigma2) - weight = NP.linalg.inv(cov) ## weight matrix - diag_cov = NP.diag(cov) - # first: no covariances: - q = diag_cov * ( ntot - nobs ) + def getThetaHat(self, nobs, nb, nsig, covb, max_iterations): + """Compute nuisance parameter theta that + maximizes our likelihood (poisson*gauss).""" + self.nsig = nsig + sigma2 = covb + self.model.var_s(nsig) ## np.diag ( (self.model.deltas)**2 ) + ## for now deal with variances only + ntot = nb + nsig + cov = np.array(sigma2) + # weight = cov**(-1) ## weight matrix + weight = linalg.inv(cov) + diag_cov = np.diag(cov) + # first: no covariances: + q = diag_cov * (ntot - nobs) + p = ntot + diag_cov + thetamaxes = [] + thetamax = -p / 2.0 * (1 - sign(p) * sqrt(1.0 - 4 * q / p**2)) + thetamaxes.append(thetamax) + ndims = len(p) + + def distance(theta1, theta2): + for ctr, i in enumerate(theta1): + if i == 0.0: + theta1[ctr] = 1e-20 + for ctr, i in enumerate(theta2): + if i == 0.0: + theta2[ctr] = 1e-20 + return sum(np.abs(theta1 - theta2) / np.abs(theta1 + theta2)) + + ictr = 0 + while ictr < max_iterations: + ictr += 1 + q = diag_cov * (ntot - nobs) p = ntot + diag_cov - thetamaxes = [] - thetamax = -p/2. * ( 1 - sign(p) * sqrt ( 1. - 4*q / p**2 ) ) - thetamaxes.append ( thetamax ) - ndims = len(p) - def distance ( theta1, theta2 ): - for ctr,i in enumerate ( theta1 ): - if i == 0.: - theta1[ctr]=1e-20 - for ctr,i in enumerate ( theta2 ): - if i == 0.: - theta2[ctr]=1e-20 - return sum ( NP.abs(theta1 - theta2) / NP.abs ( theta1+theta2 ) ) - - ictr = 0 - while ictr < max_iterations: - ictr += 1 - q = diag_cov * ( ntot - nobs ) - p = ntot + diag_cov - for i in range(ndims): - #q[i] = diag_cov[i] * ( ntot[i] - nobs[i] ) - #p[i] = ntot[i] + diag_cov[i] - for j in range(ndims): - if i==j: continue - dq = thetamax[j]*ntot[i]*diag_cov[i]*weight[i,j] - dp = thetamax[j]*weight[i,j]*diag_cov[i] - if abs ( dq / q[i] ) > .3: - #logger.warning ( "too big a change in iteration." ) - dq=NP.abs( .3 * q[i] ) * NP.sign ( dq ) - if abs ( dp / p[i] ) > .3: - #logger.warning ( "too big a change in iteration." ) - dp=NP.abs( .3 * p[i] ) * NP.sign ( dp ) - q[i] += dq - p[i] += dp - thetamax = -p/2. * ( 1 - sign(p) * sqrt ( 1. - 4*q / p**2 ) ) - thetamaxes.append ( thetamax ) - if len(thetamaxes)>2: - d1 = distance ( thetamaxes[-2], thetamax ) - d2 = distance ( thetamaxes[-3], thetamaxes[-2] ) - if d1 > d2: - raise Exception("diverging when computing thetamax: %f > %f" % ( d1, d2 )) - if d1 < 1e-5: - return thetamax - return thetamax + for i in range(ndims): + # q[i] = diag_cov[i] * ( ntot[i] - nobs[i] ) + # p[i] = ntot[i] + diag_cov[i] + for j in range(ndims): + if i == j: + continue + dq = thetamax[j] * ntot[i] * diag_cov[i] * weight[i, j] + dp = thetamax[j] * weight[i, j] * diag_cov[i] + if abs(dq / q[i]) > 0.3: + # logger.warning ( "too big a change in iteration." ) + dq = np.abs(0.3 * q[i]) * np.sign(dq) + if abs(dp / p[i]) > 0.3: + # logger.warning ( "too big a change in iteration." ) + dp = np.abs(0.3 * p[i]) * np.sign(dp) + q[i] += dq + p[i] += dp + thetamax = -p / 2.0 * (1 - sign(p) * sqrt(1.0 - 4 * q / p**2)) + thetamaxes.append(thetamax) + if len(thetamaxes) > 2: + d1 = distance(thetamaxes[-2], thetamax) + d2 = distance(thetamaxes[-3], thetamaxes[-2]) + if d1 > d2: + raise Exception("diverging when computing thetamax: %f > %f" % (d1, d2)) + if d1 < 1e-5: + return thetamax + return thetamax def findThetaHat(self, nsig): - """ Compute nuisance parameter theta that maximizes our likelihood - (poisson*gauss). - """ - - ## first step is to disregard the covariances and solve the - ## quadratic equations - ini = self.getThetaHat ( self.model.observed, self.model.backgrounds, nsig, self.model.covariance, 0 ) - self.cov_tot = self.model.V - if self.model.n == 1: - self.cov_tot = self.model.totalCovariance ( nsig ) - # self.ntot = self.model.backgrounds + self.nsig - # if not self.model.isLinear(): - # self.cov_tot = self.model.V + self.model.var_s(nsig) - # self.cov_tot = self.model.totalCovariance (nsig) - #self.ntot = None - self.weight = NP.linalg.inv(self.cov_tot) - self.ones = 1. - if type ( self.model.observed) in [ list, ndarray ]: - self.ones = NP.ones ( len (self.model.observed) ) - self.gammaln = special.gammaln(self.model.observed + 1) - try: - ret_c = optimize.fmin_ncg ( self.nll, ini, fprime=self.nllprime, - fhess=self.nllHess, full_output=True, disp=0 ) - # then always continue with TNC - if type ( self.model.observed ) in [ int, float ]: - bounds = [ ( -10*self.model.observed, 10*self.model.observed ) ] - else: - bounds = [ ( -10*x, 10*x ) for x in self.model.observed ] - ini = ret_c - ret_c = optimize.fmin_tnc ( self.nll, ret_c[0], fprime=self.nllprime, - disp=0, bounds=bounds ) - # print ( "[findThetaHat] mu=%s bg=%s observed=%s V=%s, nsig=%s theta=%s, nll=%s" % ( self.nsig[0]/self.model.efficiencies[0], self.model.backgrounds, self.model.observed,self.model.covariance, self.nsig, ret_c[0], self.nll(ret_c[0]) ) ) - if ret_c[-1] not in [ 0, 1, 2 ]: - return ret_c[0],ret_c[-1] - else: - return ret_c[0],0 - logger.debug ( "tnc worked." ) + """Compute nuisance parameter theta that maximizes our likelihood + (poisson*gauss). + """ - ret = ret_c[0] - return ret,-2 - except Exception as e: - logger.error("exception: %s. ini[-3:]=%s" % (e,ini[-3:]) ) - raise Exception("cov-1=%s" % (self.model.covariance+self.model.var_s(nsig))**(-1)) - return ini,-1 + ## first step is to disregard the covariances and solve the + ## quadratic equations + ini = self.getThetaHat( + self.model.observed, self.model.backgrounds, nsig, self.model.covariance, 0 + ) + self.cov_tot = self.model.V + # if self.model.n == 1: + # self.cov_tot = self.model.totalCovariance ( nsig ) + # self.ntot = self.model.backgrounds + self.nsig + # if not self.model.isLinear(): + # self.cov_tot = self.model.V + self.model.var_s(nsig) + # self.cov_tot = self.model.totalCovariance (nsig) + # self.ntot = None + self.weight = np.linalg.inv(self.cov_tot) + # self.coeff = 1. + logdet = np.linalg.slogdet(self.cov_tot) + self.logcoeff = -self.model.n / 2 * np.log(2 * np.pi) - 0.5 * logdet[1] + # self.coeff = (2*np.pi)**(-self.model.n/2) * np.exp(-.5* logdet[1] ) + # print ( "coeff", self.coeff, "n", self.model.n, "det", np.linalg.slogdet ( self.cov_tot ) ) + # print ( "cov_tot", self.cov_tot[:10] ) + self.ones = 1.0 + if type(self.model.observed) in [list, ndarray]: + self.ones = np.ones(len(self.model.observed)) + self.gammaln = special.gammaln(self.model.observed + 1) + try: + ret_c = optimize.fmin_ncg( + self.nllOfNuisances, + ini, + fprime=self.dNLLdTheta, + fhess=self.d2NLLdTheta2, + full_output=True, + disp=0, + ) + # then always continue with TNC + if type(self.model.observed) in [int, float]: + bounds = [(-10 * self.model.observed, 10 * self.model.observed)] + else: + bounds = [(-10 * x, 10 * x) for x in self.model.observed] + ini = ret_c + ret_c = optimize.fmin_tnc( + self.nllOfNuisances, ret_c[0], fprime=self.dNLLdTheta, disp=0, bounds=bounds + ) + # print ( "[findThetaHat] mu=%s bg=%s observed=%s V=%s, nsig=%s theta=%s, nll=%s" % ( self.nsig[0]/self.model.efficiencies[0], self.model.backgrounds, self.model.observed,self.model.covariance, self.nsig, ret_c[0], self.nllOfNuisances(ret_c[0]) ) ) + if ret_c[-1] not in [0, 1, 2]: + return ret_c[0], ret_c[-1] + else: + return ret_c[0], 0 + logger.debug("tnc worked.") + + ret = ret_c[0] + return ret, -2 + except (IndexError, ValueError) as e: + logger.error("exception: %s. ini[-3:]=%s" % (e, ini[-3:])) + raise Exception("cov-1=%s" % (self.model.covariance + self.model.var_s(nsig)) ** (-1)) + return ini, -1 def marginalizedLLHD1D(self, nsig, nll): - """ - Return the likelihood (of 1 signal region) to observe nobs events given the - predicted background nb, error on this background (deltab), - expected number of signal events nsig and the relative error on the signal (deltas_rel). - - :param nsig: predicted signal (float) - :param nobs: number of observed events (float) - :param nb: predicted background (float) - :param deltab: uncertainty on background (float) - - :return: likelihood to observe nobs events (float) - - """ - self.sigma2 = self.model.covariance + self.model.var_s(nsig)## (self.model.deltas)**2 - self.sigma_tot = sqrt(self.sigma2) - self.lngamma = math.lgamma(self.model.observed[0] + 1) - # Why not a simple gamma function for the factorial: - # ----------------------------------------------------- - # The scipy.stats.poisson.pmf probability mass function - # for the Poisson distribution only works for discrete - # numbers. The gamma distribution is used to create a - # continuous Poisson distribution. - # - # Why not a simple gamma function for the factorial: - # ----------------------------------------------------- - # The gamma function does not yield results for integers - # larger than 170. Since the expression for the Poisson - # probability mass function as a whole should not be huge, - # the exponent of the log of this expression is calculated - # instead to avoid using large numbers. - - - #Define integrand (gaussian_(bg+signal)*poisson(nobs)): - def prob( x, nsig ): - poisson = exp(self.model.observed*log(x) - x - self.lngamma ) - gaussian = stats.norm.pdf(x,loc=self.model.backgrounds+nsig,scale=self.sigma_tot) - - return poisson*gaussian - - #Compute maximum value for the integrand: - xm = self.model.backgrounds + nsig - self.sigma2 - #If nb + nsig = sigma2, shift the values slightly: - if xm == 0.: - xm = 0.001 - xmax = xm*(1.+sign(xm)*sqrt(1. + 4.*self.model.observed*self.sigma2/xm**2))/2. - - #Define initial integration range: - nrange = 5. - a = max(0.,xmax-nrange*sqrt(self.sigma2)) - b = xmax+nrange*self.sigma_tot - like = integrate.quad(prob,a,b,(nsig), epsabs=0.,epsrel=1e-3)[0] - if like==0.: - return 0. - - #Increase integration range until integral converges - err = 1. - ctr=0 - while err > 0.01: - ctr+=1 - if ctr > 10.: - raise Exception("Could not compute likelihood within required precision") - - like_old = like - nrange = nrange*2 - a = max(0.,(xmax-nrange*self.sigma_tot)[0][0] ) - b = (xmax+nrange*self.sigma_tot)[0][0] - like = integrate.quad(prob,a,b,(nsig), - epsabs=0.,epsrel=1e-3)[0] - if like == 0.: - continue - err = abs(like_old-like)/like - - #Renormalize the likelihood to account for the cut at x = 0. - #The integral of the gaussian from 0 to infinity gives: - #(1/2)*(1 + Erf(mu/sqrt(2*sigma2))), so we need to divide by it - #(for mu - sigma >> 0, the normalization gives 1.) - norm = (1./2.)*(1. + special.erf((self.model.backgrounds+nsig)/sqrt(2.*self.sigma2))) - like = like/norm + """ + Return the likelihood (of 1 signal region) to observe nobs events given the + predicted background nb, error on this background (deltab), + expected number of signal events nsig and the relative error on the signal (deltas_rel). - if nll: - like = - log ( like ) - - return like[0][0] - - def marginalizedLikelihood(self, nsig, nll ): - """ compute the marginalized likelihood of observing nsig signal event""" - if self.model.isLinear() and self.model.n == 1: ## 1-dimensional non-skewed llhds we can integrate analytically - return self.marginalizedLLHD1D ( nsig, nll ) - - vals=[] - self.gammaln = special.gammaln(self.model.observed + 1) - thetas = stats.multivariate_normal.rvs(mean=[0.]*self.model.n, - # cov=(self.model.totalCovariance(nsig)), - cov=self.model.V, - size=self.ntoys ) ## get ntoys values - for theta in thetas : - if self.model.isLinear(): - lmbda = nsig + self.model.backgrounds + theta - else: - lmbda = nsig + self.model.A + theta + self.model.C*theta**2/self.model.B**2 - if self.model.isScalar( lmbda ): - lmbda = array([lmbda]) - for ctr,v in enumerate( lmbda ): - if v<=0.: lmbda[ctr]=1e-30 -# print ( "lmbda=",lmbda ) - poisson = self.model.observed*NP.log(lmbda) - lmbda - self.gammaln - # poisson = NP.exp(self.model.observed*NP.log(lmbda) - lmbda - self.model.backgrounds - self.gammaln) - vals.append( NP.exp ( sum(poisson) ) ) - #vals.append ( reduce(lambda x, y: x*y, poisson) ) - mean = NP.mean( vals ) - if nll: - if mean == 0.: - mean = 1e-100 - mean = - log ( mean ) - return mean + :param nsig: predicted signal (float) + :param nobs: number of observed events (float) + :param nb: predicted background (float) + :param deltab: uncertainty on background (float) + :return: likelihood to observe nobs events (float) + + """ + self.sigma2 = self.model.covariance + self.model.var_s(nsig) ## (self.model.deltas)**2 + self.sigma_tot = sqrt(self.sigma2) + self.lngamma = math.lgamma(self.model.observed[0] + 1) + # Why not a simple gamma function for the factorial: + # ----------------------------------------------------- + # The scipy.stats.poisson.pmf probability mass function + # for the Poisson distribution only works for discrete + # numbers. The gamma distribution is used to create a + # continuous Poisson distribution. + # + # Why not a simple gamma function for the factorial: + # ----------------------------------------------------- + # The gamma function does not yield results for integers + # larger than 170. Since the expression for the Poisson + # probability mass function as a whole should not be huge, + # the exponent of the log of this expression is calculated + # instead to avoid using large numbers. + + # Define integrand (gaussian_(bg+signal)*poisson(nobs)): + def prob(x, nsig): + poisson = exp(self.model.observed * log(x) - x - self.lngamma) + gaussian = stats.norm.pdf(x, loc=self.model.backgrounds + nsig, scale=self.sigma_tot) + + return poisson * gaussian + + # Compute maximum value for the integrand: + xm = self.model.backgrounds + nsig - self.sigma2 + # If nb + nsig = sigma2, shift the values slightly: + if xm == 0.0: + xm = 0.001 + xmax = ( + xm + * (1.0 + sign(xm) * sqrt(1.0 + 4.0 * self.model.observed * self.sigma2 / xm**2)) + / 2.0 + ) + + # Define initial integration range: + nrange = 5.0 + a = max(0.0, xmax - nrange * sqrt(self.sigma2)) + b = xmax + nrange * self.sigma_tot + like = integrate.quad(prob, a, b, (nsig), epsabs=0.0, epsrel=1e-3)[0] + if like == 0.0: + return 0.0 + + # Increase integration range until integral converges + err = 1.0 + ctr = 0 + while err > 0.01: + ctr += 1 + if ctr > 10.0: + raise Exception("Could not compute likelihood within required precision") + + like_old = like + nrange = nrange * 2 + a = max(0.0, (xmax - nrange * self.sigma_tot)[0][0]) + b = (xmax + nrange * self.sigma_tot)[0][0] + like = integrate.quad(prob, a, b, (nsig), epsabs=0.0, epsrel=1e-3)[0] + if like == 0.0: + continue + err = abs(like_old - like) / like + + # Renormalize the likelihood to account for the cut at x = 0. + # The integral of the gaussian from 0 to infinity gives: + # (1/2)*(1 + Erf(mu/sqrt(2*sigma2))), so we need to divide by it + # (for mu - sigma >> 0, the normalization gives 1.) + norm = (1.0 / 2.0) * ( + 1.0 + special.erf((self.model.backgrounds + nsig) / sqrt(2.0 * self.sigma2)) + ) + like = like / norm - def profileLikelihood( self, nsig, nll ): - """ compute the profiled likelihood for nsig. - Warning: not normalized. - Returns profile likelihood and error code (0=no error) + if nll: + like = -log(like) + + return like[0][0] + + def marginalizedLikelihood(self, nsig, nll): + """compute the marginalized likelihood of observing nsig signal event""" + if ( + self.model.isLinear() and self.model.n == 1 + ): ## 1-dimensional non-skewed llhds we can integrate analytically + return self.marginalizedLLHD1D(nsig, nll) + + vals = [] + self.gammaln = special.gammaln(self.model.observed + 1) + thetas = stats.multivariate_normal.rvs( + mean=[0.0] * self.model.n, + # cov=(self.model.totalCovariance(nsig)), + cov=self.model.V, + size=self.toys, + ) ## get ntoys values + for theta in thetas: + if self.model.isLinear(): + lmbda = nsig + self.model.backgrounds + theta + else: + lmbda = nsig + self.model.A + theta + self.model.C * theta**2 / self.model.B**2 + if self.model.isScalar(lmbda): + lmbda = array([lmbda]) + for ctr, v in enumerate(lmbda): + if v <= 0.0: + lmbda[ctr] = 1e-30 + # print ( "lmbda=",lmbda ) + poisson = self.model.observed * np.log(lmbda) - lmbda - self.gammaln + # poisson = np.exp(self.model.observed*np.log(lmbda) - lmbda - self.model.backgrounds - self.gammaln) + vals.append(np.exp(sum(poisson))) + # vals.append ( reduce(lambda x, y: x*y, poisson) ) + mean = np.mean(vals) + if nll: + if mean == 0.0: + mean = 1e-100 + mean = -log(mean) + return mean + + def profileLikelihood(self, nsig, nll): + """compute the profiled likelihood for nsig. + Warning: not normalized. + Returns profile likelihood and error code (0=no error) """ # compute the profiled (not normalized) likelihood of observing # nsig signal events - theta_hat,_ = self.findThetaHat ( nsig ) - ret = self.probMV ( nll, theta_hat ) + theta_hat, _ = self.findThetaHat(nsig) + if self.debug_mode: + self.theta_hat = theta_hat + ret = self.probMV(nll, theta_hat) return ret - def likelihood(self, nsig, marginalize=False, nll=False ): - """ compute likelihood for nsig, profiling the nuisances + def likelihood(self, nsig, marginalize=False, nll=False): + """compute likelihood for nsig, profiling the nuisances :param marginalize: if true, marginalize, if false, profile :param nll: return nll instead of likelihood """ @@ -686,270 +967,350 @@ def likelihood(self, nsig, marginalize=False, nll=False ): else: return self.profileLikelihood(nsig, nll) + def lmax(self, nsig=None, marginalize=False, nll=False, allowNegativeSignals=False): + """convenience function, computes likelihood for nsig = nobs-nbg, + :param marginalize: if true, marginalize, if false, profile nuisances. + :param nsig: number of signal events, needed only for combinations + if None, then it gets replaced with obsN - expBG + :param nll: return nll instead of likelihood + :param allowNegativeSignals: if False, then negative nsigs are replaced with 0. + """ + if type(nsig) == type(None): + nsig = self.model.observed - self.model.backgrounds + if len(self.model.observed) == 1: + if not allowNegativeSignals and nsig[0] < 0.0: + nsig = [0.0] + self.muhat = float(nsig[0]) + if abs(self.model.nsignal) > 1e-100: + self.muhat = float(nsig[0] / self.model.nsignal[0]) + self.sigma_mu = np.sqrt(self.model.observed[0] + self.model.covariance[0][0]) + return self.likelihood(nsig, marginalize=marginalize, nll=nll) + fmh = self.findMuHat( + nsig, allowNegativeSignals=allowNegativeSignals, extended_output=True, nll=nll + ) + muhat_, sigma_mu, lmax = fmh["muhat"], fmh["sigma_mu"], fmh["lmax"] + self.muhat = muhat_ + self.sigma_mu = sigma_mu + return self.likelihood(muhat * nsig, marginalize=marginalize, nll=nll) + + def findMuHatViaGradient( + self, + signal_rel, + allowNegativeSignals=False, + extended_output=False, + nll=False, + marginalize=False, + ): + """currently not used but finding muhat via gradient descent""" + + def myllhd(mu): + return self.likelihood(mu * signal_rel, nll=True, marginalize=marginalize) + + import scipy.optimize + + bounds = None + if not allowNegativeSignals: + bounds = [(0, 1000.0)] + o = scipy.optimize.minimize(myllhd, 0.0, bounds=bounds) + llhd = o.fun + if not nll: + llhd = np.exp(-o.fun) + hess = o.hess_inv + try: + hess = hess.todense() + except Exception as e: + pass + mu_hat = float(o.x[0]) + if extended_output: + sigma_mu = float(np.sqrt(hess[0][0])) + return mu_hat, sigma_mu, llhd + return mu_hat + def chi2(self, nsig, marginalize=False): - """ - Computes the chi2 for a given number of observed events nobs given - the predicted background nb, error on this background deltab, - expected number of signal events nsig and the relative error on - signal (deltas_rel). - :param marginalize: if true, marginalize, if false, profile - :param nsig: number of signal events - :return: chi2 (float) - - """ - nsig = self.model.convert(nsig) - - # Compute the likelhood for the null hypothesis (signal hypothesis) H0: - llhd = self.likelihood(nsig, marginalize=marginalize, nll=True) - - # Compute the maximum likelihood H1, which sits at nsig = nobs - nb - # (keeping the same % error on signal): - dn = self.model.observed-self.model.backgrounds - maxllhd = self.likelihood(dn, marginalize=marginalize, nll=True ) - - chi2=2*(llhd-maxllhd) - - if not NP.isfinite ( chi2 ): - logger.error("chi2 is not a finite number! %s,%s,%s" % \ - (chi2, llhd,maxllhd)) - # Return the test statistic -2log(H0/H1) - return chi2 - -class CLsComputer: - def __init__(self, ntoys=10000, cl=.95): + """ + Computes the chi2 for a given number of observed events nobs given + the predicted background nb, error on this background deltab, + expected number of signal events nsig and the relative error on + signal (deltas_rel). + :param marginalize: if true, marginalize, if false, profile + :param nsig: number of signal events + :return: chi2 (float) + + """ + nsig = self.model.convert(nsig) + + # Compute the likelhood for the null hypothesis (signal hypothesis) H0: + llhd = self.likelihood(nsig, marginalize=marginalize, nll=True) + + # Compute the maximum likelihood H1, which sits at nsig = nobs - nb + # (keeping the same % error on signal): + if len(nsig) == 1: + nsig = self.model.observed - self.model.backgrounds + maxllhd = self.likelihood(nsig, marginalize=marginalize, nll=True) + else: + maxllhd = self.lmax(nsig, marginalize=marginalize, nll=True, allowNegativeSignals=False) + chi2 = 2 * (llhd - maxllhd) + + if not np.isfinite(chi2): + logger.error("chi2 is not a finite number! %s,%s,%s" % (chi2, llhd, maxllhd)) + # Return the test statistic -2log(H0/H1) + return chi2 + + +class UpperLimitComputer: + debug_mode = False + + def __init__(self, ntoys=30000, cl=0.95): """ :param ntoys: number of toys when marginalizing :param cl: desired quantile for limits """ - self.ntoys = ntoys + self.toys = ntoys self.cl = cl - def ulSigma(self, model, marginalize=False, toys=None, expected=False ): - """ upper limit obtained from the defined Data (using the signal prediction + def ulOnSigmaTimesEff( + self, model, marginalize=False, toys=None, expected=False, trylasttime=False + ): + """upper limit on the fiducial cross section sigma times efficiency, + obtained from the defined + Data (using the signal prediction for each signal regio/dataset), by using the q_mu test statistic from the CCGV paper (arXiv:1007.1727). :params marginalize: if true, marginalize nuisances, else profile them :params toys: specify number of toys. Use default is none - :params expected: compute the expected value, not the observed. - :returns: upper limit on *production* xsec (efficiencies unfolded) + :params expected: if false, compute observed, + true: compute a priori expected, "posteriori": + compute a posteriori expected + :params trylasttime: if True, then dont try extra + :returns: upper limit on fiducial cross section + """ + ul = self.ulOnYields( + model, marginalize=marginalize, toys=toys, expected=expected, trylasttime=trylasttime + ) + if ul == None: + return ul + if model.lumi is None: + logger.error(f"asked for upper limit on fiducial xsec, but no lumi given with the data") + return ul + return ul / model.lumi + + def _ul_preprocess( + self, model, marginalize=False, toys=None, expected=False, trylasttime=False + ): """ + Process the upper limit calculator + :param model: statistical model + :param marginalize: if true, marginalize nuisances, else profile them + :param toys: specify number of toys. Use default is none + :param expected: if false, compute observed, + true: compute a priori expected, "posteriori": + compute a posteriori expected + :param trylasttime: if True, then dont try extra + :return: mu_hat, sigma_mu, root_func + """ + # if expected: + # marginalize = True if model.zeroSignal(): - """ only zeroes in efficiencies? cannot give a limit! """ - return None - if toys==None: - toys=self.ntoys + """only zeroes in efficiencies? cannot give a limit!""" + return None, None, None + if toys == None: + toys = self.toys oldmodel = model if expected: model = copy.deepcopy(oldmodel) - #model.observed = model.backgrounds - for i,d in enumerate(model.backgrounds): - model.observed[i]=int(NP.round(d)) + if expected == "posteriori": + # print ( "here!" ) + tempc = LikelihoodComputer(oldmodel, toys) + theta_hat_, _ = tempc.findThetaHat(0 * oldmodel.signal_rel) + # model.observed = model.backgrounds + for i, d in enumerate(model.backgrounds): + # model.observed[i]=int(np.ceil(d)) + # model.observed[i]=int(np.round(d)) + if expected == "posteriori": + d += theta_hat_[i] + model.observed[i] = float(d) computer = LikelihoodComputer(model, toys) - mu_hat = computer.findMuHat(model.signal_rel) - theta_hat0,_ = computer.findThetaHat(0*model.signal_rel) - sigma_mu = computer.getSigmaMu(model.signal_rel) - + mu_hat = computer.findMuHat( + model.signal_rel, allowNegativeSignals=False, extended_output=False + ) + theta_hat0, _ = computer.findThetaHat(0 * model.signal_rel) + sigma_mu = computer.getSigmaMu(mu_hat, model.signal_rel, theta_hat0) + + nll0 = computer.likelihood(model.signals(mu_hat), marginalize=marginalize, nll=True) + # print ( f"SL nll0 {nll0:.3f} muhat {mu_hat:.3f} sigma_mu {sigma_mu:.3f}" ) + if np.isinf(nll0) and marginalize == False and not trylasttime: + logger.warning( + "nll is infinite in profiling! we switch to marginalization, but only for this one!" + ) + marginalize = True + nll0 = computer.likelihood(model.signals(mu_hat), marginalize=True, nll=True) + if np.isinf(nll0): + logger.warning("marginalization didnt help either. switch back.") + marginalize = False + else: + logger.warning("marginalization worked.") aModel = copy.deepcopy(model) - aModel.observed = array([NP.round(x+y) for x,y in zip(model.backgrounds,theta_hat0)]) - #print ( "aModeldata=", aModel.observed ) - #aModel.observed = array ( [ round(x) for x in model.backgrounds ] ) + aModel.observed = array([x + y for x, y in zip(model.backgrounds, theta_hat0)]) aModel.name = aModel.name + "A" + # print ( f"SL finding mu hat with {aModel.signal_rel}: mu_hatA, obs: {aModel.observed}" ) compA = LikelihoodComputer(aModel, toys) ## compute mu_hatA = compA.findMuHat(aModel.signal_rel) - if mu_hat < 0.: - mu_hat = 0. - nll0 = computer.likelihood(model.signals(mu_hat), - marginalize=marginalize, - nll=True) - if NP.isinf(nll0) and marginalize==False: - logger.warning("nll is infinite in profiling! we switch to marginalization, but only for this one!" ) - marginalize=True - nll0 = computer.likelihood(model.signals(mu_hat), - marginalize=True, - nll=True) - if NP.isinf(nll0): - logger.warning("marginalization didnt help either. switch back.") - marginalize=False - else: - logger.warning("marginalization worked.") - nll0A = compA.likelihood(aModel.signals(mu_hatA), - marginalize=marginalize, - nll=True) - + nll0A = compA.likelihood(aModel.signals(mu_hatA), marginalize=marginalize, nll=True) + # print ( f"SL nll0A {nll0A:.3f} mu_hatA {mu_hatA:.3f} bg {aModel.backgrounds[0]:.3f} obs {aModel.observed[0]:.3f}" ) + # return 1. + def root_func(mu): - ## the function to minimize. + ## the function to find the zero of (ie CLs - alpha) nsig = model.signals(mu) computer.ntot = model.backgrounds + nsig - nll = computer.likelihood(nsig, marginalize=marginalize, nll=True ) - nllA = compA.likelihood(nsig, marginalize=marginalize, nll=True ) - qmu = 2*( nll - nll0 ) - if qmu<0.: qmu=0. - sqmu = sqrt (qmu) - qA = 2*( nllA - nll0A ) - # print ( "mu: %s, qMu: %s, qA: %s nll0A: %s nllA: %s" % ( mu, qmu, qA, nll0A, nllA ) ) - if qA<0.: - qA=0. - sqA = sqrt(qA) - CLsb = 1. - stats.multivariate_normal.cdf(sqmu) - CLb = 0. - if qA >= qmu: - CLb = stats.multivariate_normal.cdf(sqA - sqmu) - else: - if qA == 0.: - CLsb = 1. - CLb = 1. - else: - CLsb = 1. - stats.multivariate_normal.cdf( (qmu + qA)/(2*sqA) ) - CLb = 1. - stats.multivariate_normal.cdf( (qmu - qA)/(2*sqA) ) - CLs = 0. - if CLb > 0.: - CLs = CLsb/CLb - root = CLs - 1. + self.cl - return root - - - a,b=1.5*mu_hat,2.5*mu_hat+2*sigma_mu - ctr=0 - while True: - while ( NP.sign ( root_func(a)* root_func(b) ) > -.5 ): - b=1.4*b ## widen bracket FIXME make a linear extrapolation! - a=a-(b-a)*.3 ## widen bracket - if a < 0.: a=0. - ctr+=1 - if ctr>20: ## but stop after 20 trials - if toys > 2000: - logger.error("cannot find brent bracket after 20 trials. a,b=%s(%s),%s(%s)" % ( root_func(a),a,root_func(b),b ) ) - return None - else: - logger.debug("cannot find brent bracket after 20 trials. but very low number of toys") - return self.ulSigma ( model, marginalize, 4*toys ) - try: - mu_lim = optimize.brentq ( root_func, a, b, rtol=1e-03, xtol=1e-06 ) - return mu_lim - except ValueError as e: ## it could still be that the signs arent opposite - # in that case, try again - pass - - """ - .. method:: computeCLs - :synopsis: Method that implements CLs calculation from simplified likelihoods as presented - in CMS-NOTE-2017-001. It basically consist in fixing mu to 1.0 instead of varying it - like it is done in the previous ulSigma method - - .. methodauthor:: Gael Alguero + nll = computer.likelihood(nsig, marginalize=marginalize, nll=True) + nllA = compA.likelihood(nsig, marginalize=marginalize, nll=True) + return rootFromNLLs(nllA, nll0A, nll, nll0) - """ + return mu_hat, sigma_mu, root_func - def computeCLs(self, model, marginalize=False, toys=None, expected=False ): - """ exclusion confidence level obtained from the defined Data (using the signal prediction + def ulOnYields(self, model, marginalize=False, toys=None, expected=False, trylasttime=False): + """upper limit on signal yields obtained from the defined + Data (using the signal prediction for each signal regio/dataset), by using the q_mu test statistic from the CCGV paper (arXiv:1007.1727). :params marginalize: if true, marginalize nuisances, else profile them :params toys: specify number of toys. Use default is none - :params expected: compute the expected value, not the observed. - :returns: exclusion confidence level (1-CLs) + :params expected: if false, compute observed, + true: compute a priori expected, "posteriori": + compute a posteriori expected + :params trylasttime: if True, then dont try extra + :returns: upper limit on yields """ - if model.zeroSignal(): - """ only zeroes in efficiencies? cannot give a limit! """ + mu_hat, sigma_mu, root_func = self._ul_preprocess( + model, marginalize, toys, expected, trylasttime + ) + if mu_hat == None: return None - if toys==None: - toys=self.ntoys - oldmodel = model - if expected: - model = copy.deepcopy(oldmodel) - #model.observed = model.backgrounds - for i,d in enumerate(model.backgrounds): - model.observed[i]=int(NP.round(d)) - computer = LikelihoodComputer(model, toys) - mu_hat = computer.findMuHat(model.nsignal) - theta_hat0,_ = computer.findThetaHat(0*model.nsignal) - sigma_mu = computer.getSigmaMu(model.nsignal) + a, b = determineBrentBracket(mu_hat, sigma_mu, root_func) + mu_lim = optimize.brentq(root_func, a, b, rtol=1e-03, xtol=1e-06) + return mu_lim - aModel = copy.deepcopy(model) - aModel.observed = array([NP.round(x+y) for x,y in zip(model.backgrounds,theta_hat0)]) - #print ( "aModeldata=", aModel.observed ) - #aModel.observed = array ( [ round(x) for x in model.backgrounds ] ) - aModel.name = aModel.name + "A" - compA = LikelihoodComputer(aModel, toys) - ## compute - mu_hatA = compA.findMuHat(aModel.nsignal) - # if mu_hat < 0.: - # mu_hat = 0. - # -log L(mu_hat, theta_hat(mu_hat)) - nll0 = computer.likelihood(model.signals(mu_hat), - marginalize=marginalize, - nll=True) - if NP.isinf(nll0) and marginalize==False: - logger.warning("nll is infinite in profiling! we switch to marginalization, but only for this one!" ) - marginalize=True - nll0 = computer.likelihood(model.signals(mu_hat), - marginalize=True, - nll=True) - if NP.isinf(nll0): - logger.warning("marginalization didnt help either. switch back.") - marginalize=False - else: - logger.warning("marginalization worked.") - nll0A = compA.likelihood(aModel.signals(mu_hatA), - marginalize=marginalize, - nll=True) - - # nsig = model.signals(1.) - computer.ntot = model.backgrounds + model.nsignal - # -log L(mu = 1, theta(1)) - nll = computer.likelihood(model.nsignal, marginalize=marginalize, nll=True ) - nllA = compA.likelihood(model.nsignal, marginalize=marginalize, nll=True ) - qmu = 2*( nll - nll0 ) - if qmu<0.: qmu=0. - sqmu = sqrt (qmu) - qA = 2*( nllA - nll0A ) - # print ( "mu: %s, qMu: %s, qA: %s nll0A: %s nllA: %s" % ( mu, qmu, qA, nll0A, nllA ) ) - if qA<0.: - qA=0. - sqA = sqrt(qA) - if qA >= qmu: - CLsb = 1. - stats.multivariate_normal.cdf(sqmu) - CLb = stats.multivariate_normal.cdf(sqA - sqmu) - else: - if qA == 0.: - CLsb = 1. - CLb = 1. - else: - CLsb = 1. - stats.multivariate_normal.cdf( (qmu + qA)/(2*sqA) ) - CLb = 1. - stats.multivariate_normal.cdf( (qmu - qA)/(2*sqA) ) - # CLs = 0. - # if CLb > 0.: - CLs = CLsb/CLb if CLb > 0. else 1. - return 1 - CLs + def computeCLs(self, model, marginalize=False, toys=None, expected=False, trylasttime=False): + """ + Compute the confidence level of the model + :param model: statistical model + :param marginalize: if true, marginalize nuisances, else profile them + :param toys: specify number of toys. Use default is none + :param expected: if false, compute observed, + true: compute a priori expected, "posteriori": + compute a posteriori expected + :param trylasttime: if True, then dont try extra + :return: 1 - CLs value + """ + _, _, root_func = self._ul_preprocess(model, marginalize, toys, expected, trylasttime) + + # 1-(CLs+alpha) -> alpha = 0.05 + return 0.95 - root_func(1.0) if __name__ == "__main__": - C = [ 18774.2, -2866.97, -5807.3, -4460.52, -2777.25, -1572.97, -846.653, -442.531, - -2866.97, 496.273, 900.195, 667.591, 403.92, 222.614, 116.779, 59.5958, - -5807.3, 900.195, 1799.56, 1376.77, 854.448, 482.435, 258.92, 134.975, - -4460.52, 667.591, 1376.77, 1063.03, 664.527, 377.714, 203.967, 106.926, - -2777.25, 403.92, 854.448, 664.527, 417.837, 238.76, 129.55, 68.2075, - -1572.97, 222.614, 482.435, 377.714, 238.76, 137.151, 74.7665, 39.5247, - -846.653, 116.779, 258.92, 203.967, 129.55, 74.7665, 40.9423, 21.7285, - -442.531, 59.5958, 134.975, 106.926, 68.2075, 39.5247, 21.7285, 11.5732] - nsignal = [ x/100. for x in [47,29.4,21.1,14.3,9.4,7.1,4.7,4.3] ] - m=Data( observed=[1964,877,354,182,82,36,15,11], - backgrounds=[2006.4,836.4,350.,147.1,62.0,26.2,11.1,4.7], - covariance= C, -# third_moment = [ 0.1, 0.02, 0.1, 0.1, 0.003, 0.0001, 0.0002, 0.0005 ], - third_moment = [ 0. ] * 8, - nsignal = nsignal, - name="CMS-NOTE-2017-001 model" ) - ulComp = UpperLimitComputer(ntoys=500, cl=.95) - #uls = ulComp.ulSigma ( Data ( 15,17.5,3.2,0.00454755 ) ) - #print ( "uls=", uls ) - ul_old = 131.828*sum(nsignal) #With respect to the older refernece value one must normalize the xsec - print ( "old ul=", ul_old ) - ul = ulComp.ulSigma ( m ) - print ( "ul (marginalized)", ul ) - ul = ulComp.ulSigma ( m, marginalize=False ) - print ( "ul (profiled)", ul ) + C = [ + 18774.2, + -2866.97, + -5807.3, + -4460.52, + -2777.25, + -1572.97, + -846.653, + -442.531, + -2866.97, + 496.273, + 900.195, + 667.591, + 403.92, + 222.614, + 116.779, + 59.5958, + -5807.3, + 900.195, + 1799.56, + 1376.77, + 854.448, + 482.435, + 258.92, + 134.975, + -4460.52, + 667.591, + 1376.77, + 1063.03, + 664.527, + 377.714, + 203.967, + 106.926, + -2777.25, + 403.92, + 854.448, + 664.527, + 417.837, + 238.76, + 129.55, + 68.2075, + -1572.97, + 222.614, + 482.435, + 377.714, + 238.76, + 137.151, + 74.7665, + 39.5247, + -846.653, + 116.779, + 258.92, + 203.967, + 129.55, + 74.7665, + 40.9423, + 21.7285, + -442.531, + 59.5958, + 134.975, + 106.926, + 68.2075, + 39.5247, + 21.7285, + 11.5732, + ] + nsignal = [x / 100.0 for x in [47, 29.4, 21.1, 14.3, 9.4, 7.1, 4.7, 4.3]] + m = Data( + observed=[1964, 877, 354, 182, 82, 36, 15, 11], + backgrounds=[2006.4, 836.4, 350.0, 147.1, 62.0, 26.2, 11.1, 4.7], + covariance=C, + # third_moment = [ 0.1, 0.02, 0.1, 0.1, 0.003, 0.0001, 0.0002, 0.0005 ], + third_moment=[0.0] * 8, + nsignal=nsignal, + name="CMS-NOTE-2017-001 model", + ) + ulComp = UpperLimitComputer(ntoys=500, cl=0.95) + # uls = ulComp.ulSigma ( Data ( 15,17.5,3.2,0.00454755 ) ) + # print ( "uls=", uls ) + ul_old = 131.828 * sum( + nsignal + ) # With respect to the older refernece value one must normalize the xsec + print("old ul=", ul_old) + ul = ulComp.ulOnYields(m, marginalize=True) + cls = ulComp.computeCLs(m, marginalize=True) + print("ul (marginalized)", ul) + print("CLs (marginalized)", cls) + ul = ulComp.ulOnYields(m, marginalize=False) + cls = ulComp.computeCLs(m, marginalize=False) + print("ul (profiled)", ul) + print("CLs (profiled)", cls) + + """ + results: + old ul= 180.999844 + ul (marginalized) 180.84672924414696 + CLs (marginalized) 0.0 + ul (profiled) 180.68039063387553 + CLs (profiled) 0.75 + """ From b5f531b44de44ba56a5ea03f4da4cf90c9724c24 Mon Sep 17 00:00:00 2001 From: jackaraz Date: Thu, 12 May 2022 11:00:23 +0200 Subject: [PATCH 06/34] update to the current status of simplified_likelihood.py --- madanalysis/misc/run_recast.py | 100 ++++++++++++++++++++------------- 1 file changed, 62 insertions(+), 38 deletions(-) diff --git a/madanalysis/misc/run_recast.py b/madanalysis/misc/run_recast.py index d53b5772..e2a54b0d 100644 --- a/madanalysis/misc/run_recast.py +++ b/madanalysis/misc/run_recast.py @@ -827,17 +827,17 @@ def compute_cls(self, analyses, dataset): return False ## Performing the CLS calculation - regiondata=self.extract_sig_cls(regiondata,regions,lumi,"exp") + regiondata=self.extract_sig_cls(regiondata, regions, lumi, "exp") if self.cov_config != {}: - regiondata=self.extract_sig_lhcls(regiondata,lumi,"exp") + regiondata=self.extract_sig_lhcls(regiondata, dataset.xsection, lumi, "exp") # CLs calculation for pyhf regiondata = self.pyhf_sig95Wrapper(lumi, regiondata, "exp") if extrapolated_lumi=='default': if self.cov_config != {}: - regiondata=self.extract_sig_lhcls(regiondata,lumi,"obs") - regiondata = self.extract_sig_cls(regiondata,regions,lumi,"obs") - regiondata = self.pyhf_sig95Wrapper(lumi,regiondata,'obs') + regiondata=self.extract_sig_lhcls(regiondata, dataset.xsection, lumi, "obs") + regiondata = self.extract_sig_cls(regiondata, regions, lumi, "obs") + regiondata = self.pyhf_sig95Wrapper(lumi, regiondata, 'obs') else: for reg in regions: regiondata[reg]["nobs"]=regiondata[reg]["nb"] @@ -1471,10 +1471,11 @@ def slhCLs(regiondata,cov_regions,xsection,lumi,covariance,expected=False, ntoys backgrounds.append(regiondata[reg]["nb"]) observed.append(regiondata[reg]["nobs"]) # data - from madanalysis.misc.simplified_likelihood import Data - LHdata = Data(observed, backgrounds, covariance, None, nsignal) - from madanalysis.misc.simplified_likelihood import CLsComputer - computer = CLsComputer(ntoys = ntoys, cl = .95) + from madanalysis.misc.simplified_likelihood import Data, UpperLimitComputer + LHdata = Data( + observed=observed, backgrounds=backgrounds, covariance=covariance, nsignal=nsignal, deltas_rel=0., lumi=lumi + ) + computer = UpperLimitComputer(ntoys = ntoys, cl = .95) # calculation and output try: return computer.computeCLs(LHdata, expected=expected) @@ -1529,7 +1530,7 @@ def sig95(xsection): return regiondata # Calculating the upper limits on sigma with simplified likelihood - def extract_sig_lhcls(self,regiondata,lumi,tag): + def extract_sig_lhcls(self, regiondata: dict, xsection: float, lumi: float, tag: str) -> dict: """ Compute gloabal upper limit on cross section. @@ -1537,6 +1538,8 @@ def extract_sig_lhcls(self,regiondata,lumi,tag): ---------- regiondata : Dict Dictionary including all the information about SR yields + xsection: float + production cross-section of the process lumi : float luminosity tag : str @@ -1546,10 +1549,10 @@ def extract_sig_lhcls(self,regiondata,lumi,tag): if "cov_subset" not in regiondata.keys(): regiondata["cov_subset"] = {} - def get_s95(regs, matrix): - def sig95(xsection): - return self.slhCLs(regiondata,regs,xsection,lumi,matrix,(tag=="exp"), ntoys = self.ntoys)-0.95 - return sig95 + # def get_s95(regs, matrix): + # def sig95(xsection): + # return self.slhCLs(regiondata,regs,xsection,lumi,matrix,(tag=="exp"), ntoys = self.ntoys)-0.95 + # return sig95 for cov_subset in self.cov_config.keys(): cov_regions = self.cov_config[cov_subset]["cov_regions"] @@ -1557,32 +1560,53 @@ def sig95(xsection): if cov_subset not in regiondata["cov_subset"].keys(): regiondata["cov_subset"][cov_subset] = {} if all(s <= 0. for s in [regiondata[reg]["Nf"] for reg in cov_regions]): - regiondata["cov_subset"][cov_subset]["s95"+tag]= "-1" + regiondata["cov_subset"][cov_subset][f"s95{tag}"]= "-1" continue - low, hig = 1., 1. - while self.slhCLs(regiondata,cov_regions,low,lumi,covariance,(tag=="exp"), ntoys = self.ntoys)>0.95: - self.logger.debug('lower bound = ' + str(low)) - low *= 0.1 - if low < 1e-10: break - while self.slhCLs(regiondata,cov_regions,hig,lumi,covariance,(tag=="exp"), ntoys = self.ntoys)<0.95: - self.logger.debug('upper bound = ' + str(hig)) - hig *= 10. - if hig > 1e10: break - - try: - import scipy - sig95 = get_s95(cov_regions, covariance) - s95 = scipy.optimize.brentq(sig95,low,hig,xtol=low/100.) - except ImportError as err: - self.logger.debug("Can't import scipy") - s95=-1 - except Exception as err: - self.logger.debug(str(err)) - s95=-1 - - self.logger.debug('s95 = ' + str(s95) + ' pb') - regiondata["cov_subset"][cov_subset]["s95"+tag] = ("%.7f" % s95) + observed, backgrounds, nsignal = [], [], [] + # Collect the input data necessary for the simplified_likelyhood.py method + for reg in cov_regions: + nsignal.append(xsection * lumi * 1000. * regiondata[reg]["Nf"] / regiondata[reg]["N0"]) + backgrounds.append(regiondata[reg]["nb"]) + observed.append(regiondata[reg]["nobs"]) + + from madanalysis.misc.simplified_likelihood import Data, UpperLimitComputer + # Prepare the data structure + LHdata = Data( + observed=observed, + backgrounds=backgrounds, + covariance=covariance, + nsignal=nsignal, + deltas_rel=0., + lumi=lumi + ) + computer = UpperLimitComputer(ntoys=self.ntoys, cl=.95) + is_expected = (tag == "exp") + if self.main.recasting.expectation_assumption == "aposteriori" and is_expected: + is_expected = "aposteriori" + s95 = computer.ulOnYields(model=LHdata, expected=is_expected) / (1000. * lumi) + # low, hig = 1., 1. + # while self.slhCLs(regiondata,cov_regions,low,lumi,covariance,(tag=="exp"), ntoys = self.ntoys)>0.95: + # self.logger.debug('lower bound = ' + str(low)) + # low *= 0.1 + # if low < 1e-10: break + # while self.slhCLs(regiondata,cov_regions,hig,lumi,covariance,(tag=="exp"), ntoys = self.ntoys)<0.95: + # self.logger.debug('upper bound = ' + str(hig)) + # hig *= 10. + # if hig > 1e10: break + # + # try: + # import scipy + # sig95 = get_s95(cov_regions, covariance) + # s95 = scipy.optimize.brentq(sig95,low,hig,xtol=low/100.) + # except ImportError as err: + # self.logger.debug("Can't import scipy") + # s95=-1 + # except Exception as err: + # self.logger.debug(str(err)) + # s95=-1 + self.logger.debug(f"s95{tag} = {s95:.5f} [pb]") + regiondata["cov_subset"][cov_subset][f"s95{tag}"] = f"{s95:.7f}" return regiondata From ccf61476a28e977fbe6aa43c58cd90fb38697114 Mon Sep 17 00:00:00 2001 From: jackaraz Date: Thu, 12 May 2022 12:03:49 +0200 Subject: [PATCH 07/34] s95exp reversed and test on CLs calculation have been added. --- madanalysis/misc/run_recast.py | 71 ++++++++++++++-------------------- 1 file changed, 29 insertions(+), 42 deletions(-) diff --git a/madanalysis/misc/run_recast.py b/madanalysis/misc/run_recast.py index e2a54b0d..60cc7152 100644 --- a/madanalysis/misc/run_recast.py +++ b/madanalysis/misc/run_recast.py @@ -1408,6 +1408,8 @@ def extract_cls(self,regiondata,regions,xsection,lumi): if all(s <= 0. for s in [regiondata[reg]["Nf"] for reg in cov_regions]): regiondata["cov_subset"][cov_subset]["CLs"]= 0. continue + # TODO remove self._cov_subset + self._cov_subset = cov_subset CLs = self.slhCLs(regiondata,cov_regions,xsection,lumi,covariance, ntoys = self.ntoys) s95 = float(regiondata["cov_subset"][cov_subset]["s95exp"]) regiondata["cov_subset"][cov_subset]["CLs"] = CLs @@ -1457,8 +1459,7 @@ def extract_cls(self,regiondata,regions,xsection,lumi): return regiondata - @staticmethod - def slhCLs(regiondata,cov_regions,xsection,lumi,covariance,expected=False, ntoys = 10000): + def slhCLs(self, regiondata, cov_regions, xsection, lumi, covariance, expected=False, ntoys = 10000): """ (slh for simplified likelihood) Compute a global CLs combining the different region yields by using a simplified likelihood method (see CMS-NOTE-2017-001 for more information). It relies on the @@ -1475,7 +1476,8 @@ def slhCLs(regiondata,cov_regions,xsection,lumi,covariance,expected=False, ntoys LHdata = Data( observed=observed, backgrounds=backgrounds, covariance=covariance, nsignal=nsignal, deltas_rel=0., lumi=lumi ) - computer = UpperLimitComputer(ntoys = ntoys, cl = .95) + computer = UpperLimitComputer(ntoys = self.ntoys, cl = .95) + regiondata["cov_subset"][self._cov_subset]["mu"] = float(computer.ulOnYields(LHdata, expected=expected)) # calculation and output try: return computer.computeCLs(LHdata, expected=expected) @@ -1549,10 +1551,10 @@ def extract_sig_lhcls(self, regiondata: dict, xsection: float, lumi: float, tag: if "cov_subset" not in regiondata.keys(): regiondata["cov_subset"] = {} - # def get_s95(regs, matrix): - # def sig95(xsection): - # return self.slhCLs(regiondata,regs,xsection,lumi,matrix,(tag=="exp"), ntoys = self.ntoys)-0.95 - # return sig95 + def get_s95(regs, matrix): + def sig95(xsection): + return self.slhCLs(regiondata,regs,xsection,lumi,matrix,(tag=="exp"), ntoys = self.ntoys)-0.95 + return sig95 for cov_subset in self.cov_config.keys(): cov_regions = self.cov_config[cov_subset]["cov_regions"] @@ -1570,41 +1572,26 @@ def extract_sig_lhcls(self, regiondata: dict, xsection: float, lumi: float, tag: backgrounds.append(regiondata[reg]["nb"]) observed.append(regiondata[reg]["nobs"]) - from madanalysis.misc.simplified_likelihood import Data, UpperLimitComputer - # Prepare the data structure - LHdata = Data( - observed=observed, - backgrounds=backgrounds, - covariance=covariance, - nsignal=nsignal, - deltas_rel=0., - lumi=lumi - ) - computer = UpperLimitComputer(ntoys=self.ntoys, cl=.95) - is_expected = (tag == "exp") - if self.main.recasting.expectation_assumption == "aposteriori" and is_expected: - is_expected = "aposteriori" - s95 = computer.ulOnYields(model=LHdata, expected=is_expected) / (1000. * lumi) - # low, hig = 1., 1. - # while self.slhCLs(regiondata,cov_regions,low,lumi,covariance,(tag=="exp"), ntoys = self.ntoys)>0.95: - # self.logger.debug('lower bound = ' + str(low)) - # low *= 0.1 - # if low < 1e-10: break - # while self.slhCLs(regiondata,cov_regions,hig,lumi,covariance,(tag=="exp"), ntoys = self.ntoys)<0.95: - # self.logger.debug('upper bound = ' + str(hig)) - # hig *= 10. - # if hig > 1e10: break - # - # try: - # import scipy - # sig95 = get_s95(cov_regions, covariance) - # s95 = scipy.optimize.brentq(sig95,low,hig,xtol=low/100.) - # except ImportError as err: - # self.logger.debug("Can't import scipy") - # s95=-1 - # except Exception as err: - # self.logger.debug(str(err)) - # s95=-1 + low, hig = 1., 1. + while self.slhCLs(regiondata,cov_regions,low,lumi,covariance,(tag=="exp"), ntoys = self.ntoys)>0.95: + self.logger.debug('lower bound = ' + str(low)) + low *= 0.1 + if low < 1e-10: break + while self.slhCLs(regiondata,cov_regions,hig,lumi,covariance,(tag=="exp"), ntoys = self.ntoys)<0.95: + self.logger.debug('upper bound = ' + str(hig)) + hig *= 10. + if hig > 1e10: break + + try: + import scipy + sig95 = get_s95(cov_regions, covariance) + s95 = scipy.optimize.brentq(sig95,low,hig,xtol=low/100.) + except ImportError as err: + self.logger.debug("Can't import scipy") + s95=-1 + except Exception as err: + self.logger.debug(str(err.args[0])) + s95=-1 self.logger.debug(f"s95{tag} = {s95:.5f} [pb]") regiondata["cov_subset"][cov_subset][f"s95{tag}"] = f"{s95:.7f}" From 62bf0c8e6de73e629f8dfe801f12d9ff92e4ed8a Mon Sep 17 00:00:00 2001 From: jackaraz Date: Thu, 12 May 2022 12:06:13 +0200 Subject: [PATCH 08/34] extend test --- madanalysis/misc/run_recast.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/madanalysis/misc/run_recast.py b/madanalysis/misc/run_recast.py index 60cc7152..47f120fd 100644 --- a/madanalysis/misc/run_recast.py +++ b/madanalysis/misc/run_recast.py @@ -1557,6 +1557,8 @@ def sig95(xsection): return sig95 for cov_subset in self.cov_config.keys(): + # TODO remove self._cov_subset + self._cov_subset = cov_subset cov_regions = self.cov_config[cov_subset]["cov_regions"] covariance = self.cov_config[cov_subset]["covariance" ] if cov_subset not in regiondata["cov_subset"].keys(): From 42824f15318566ce5798f1ac7346eee7e8ec9334 Mon Sep 17 00:00:00 2001 From: jackaraz Date: Thu, 12 May 2022 14:11:03 +0200 Subject: [PATCH 09/34] update CLs calculation --- madanalysis/misc/run_recast.py | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/madanalysis/misc/run_recast.py b/madanalysis/misc/run_recast.py index 47f120fd..311305fc 100644 --- a/madanalysis/misc/run_recast.py +++ b/madanalysis/misc/run_recast.py @@ -35,6 +35,7 @@ from string_tools import StringTools from six.moves import map, range, input import copy, logging, math, os, shutil, time, sys, json +from typing import Text, Dict class RunRecast(): @@ -75,12 +76,12 @@ def init(self): def SetCLsCalculator(self): if self.main.session_info.has_pyhf and self.main.recasting.CLs_calculator_backend == "pyhf": - self.cls_calculator = pyhf_wrapper_py3 + self.cls_calculator = pyhf_wrapper elif not self.main.session_info.has_pyhf: self.main.recasting.CLs_calculator_backend = "native" if self.main.session_info.has_pyhf and self.main.recasting.expectation_assumption == "aposteriori": - self.cls_calculator = pyhf_wrapper_py3 + self.cls_calculator = pyhf_wrapper self.main.recasting.CLs_calculator_backend = "pyhf" self.is_apriori = False elif not self.main.session_info.has_pyhf and self.main.recasting.expectation_assumption == "aposteriori": @@ -829,13 +830,13 @@ def compute_cls(self, analyses, dataset): ## Performing the CLS calculation regiondata=self.extract_sig_cls(regiondata, regions, lumi, "exp") if self.cov_config != {}: - regiondata=self.extract_sig_lhcls(regiondata, dataset.xsection, lumi, "exp") + regiondata=self.extract_sig_lhcls(regiondata, lumi, "exp") # CLs calculation for pyhf regiondata = self.pyhf_sig95Wrapper(lumi, regiondata, "exp") if extrapolated_lumi=='default': if self.cov_config != {}: - regiondata=self.extract_sig_lhcls(regiondata, dataset.xsection, lumi, "obs") + regiondata=self.extract_sig_lhcls(regiondata, lumi, "obs") regiondata = self.extract_sig_cls(regiondata, regions, lumi, "obs") regiondata = self.pyhf_sig95Wrapper(lumi, regiondata, 'obs') else: @@ -1480,7 +1481,7 @@ def slhCLs(self, regiondata, cov_regions, xsection, lumi, covariance, expected=F regiondata["cov_subset"][self._cov_subset]["mu"] = float(computer.ulOnYields(LHdata, expected=expected)) # calculation and output try: - return computer.computeCLs(LHdata, expected=expected) + return 1. - (computer.computeCLs(LHdata, expected=expected) + .05) except Exception as err: logging.getLogger('MA5').debug("slhCLs : " + str(err)) return 0.0 @@ -1532,7 +1533,7 @@ def sig95(xsection): return regiondata # Calculating the upper limits on sigma with simplified likelihood - def extract_sig_lhcls(self, regiondata: dict, xsection: float, lumi: float, tag: str) -> dict: + def extract_sig_lhcls(self, regiondata: Dict, lumi: float, tag: Text) -> dict: """ Compute gloabal upper limit on cross section. @@ -1540,8 +1541,6 @@ def extract_sig_lhcls(self, regiondata: dict, xsection: float, lumi: float, tag: ---------- regiondata : Dict Dictionary including all the information about SR yields - xsection: float - production cross-section of the process lumi : float luminosity tag : str @@ -1567,19 +1566,12 @@ def sig95(xsection): regiondata["cov_subset"][cov_subset][f"s95{tag}"]= "-1" continue - observed, backgrounds, nsignal = [], [], [] - # Collect the input data necessary for the simplified_likelyhood.py method - for reg in cov_regions: - nsignal.append(xsection * lumi * 1000. * regiondata[reg]["Nf"] / regiondata[reg]["N0"]) - backgrounds.append(regiondata[reg]["nb"]) - observed.append(regiondata[reg]["nobs"]) - low, hig = 1., 1. - while self.slhCLs(regiondata,cov_regions,low,lumi,covariance,(tag=="exp"), ntoys = self.ntoys)>0.95: + while self.slhCLs(regiondata, cov_regions, low, lumi, covariance, (tag == "exp"), ntoys=self.ntoys) > 0.95: self.logger.debug('lower bound = ' + str(low)) low *= 0.1 if low < 1e-10: break - while self.slhCLs(regiondata,cov_regions,hig,lumi,covariance,(tag=="exp"), ntoys = self.ntoys)<0.95: + while self.slhCLs(regiondata, cov_regions, hig, lumi, covariance, (tag == "exp"), ntoys=self.ntoys) < 0.95: self.logger.debug('upper bound = ' + str(hig)) hig *= 10. if hig > 1e10: break @@ -1587,13 +1579,13 @@ def sig95(xsection): try: import scipy sig95 = get_s95(cov_regions, covariance) - s95 = scipy.optimize.brentq(sig95,low,hig,xtol=low/100.) + s95 = scipy.optimize.brentq(sig95, low, hig, xtol=low / 100.) except ImportError as err: self.logger.debug("Can't import scipy") - s95=-1 + s95 = -1 except Exception as err: self.logger.debug(str(err.args[0])) - s95=-1 + s95 = -1 self.logger.debug(f"s95{tag} = {s95:.5f} [pb]") regiondata["cov_subset"][cov_subset][f"s95{tag}"] = f"{s95:.7f}" From f49d39ada78e2984a1af71455b27bf17253b4ee7 Mon Sep 17 00:00:00 2001 From: jackaraz Date: Thu, 12 May 2022 16:01:25 +0200 Subject: [PATCH 10/34] update simplified_likelihood.py --- madanalysis/misc/run_recast.py | 8 ++-- madanalysis/misc/simplified_likelihood.py | 48 +++++++++++------------ 2 files changed, 27 insertions(+), 29 deletions(-) diff --git a/madanalysis/misc/run_recast.py b/madanalysis/misc/run_recast.py index 311305fc..1be0216c 100644 --- a/madanalysis/misc/run_recast.py +++ b/madanalysis/misc/run_recast.py @@ -1469,19 +1469,19 @@ def slhCLs(self, regiondata, cov_regions, xsection, lumi, covariance, expected=F observed, backgrounds, nsignal = [], [], [] # Collect the input data necessary for the simplified_likelyhood.py method for reg in cov_regions: - nsignal.append(xsection*lumi*1000.*regiondata[reg]["Nf"]/regiondata[reg]["N0"]) + nsignal.append(xsection * lumi * 1000. * regiondata[reg]["Nf"] / regiondata[reg]["N0"]) backgrounds.append(regiondata[reg]["nb"]) observed.append(regiondata[reg]["nobs"]) # data from madanalysis.misc.simplified_likelihood import Data, UpperLimitComputer LHdata = Data( - observed=observed, backgrounds=backgrounds, covariance=covariance, nsignal=nsignal, deltas_rel=0., lumi=lumi + observed=observed, backgrounds=backgrounds, covariance=covariance, nsignal=nsignal, deltas_rel=0.2, lumi=lumi ) - computer = UpperLimitComputer(ntoys = self.ntoys, cl = .95) + computer = UpperLimitComputer(ntoys=self.ntoys, cl=.95) regiondata["cov_subset"][self._cov_subset]["mu"] = float(computer.ulOnYields(LHdata, expected=expected)) # calculation and output try: - return 1. - (computer.computeCLs(LHdata, expected=expected) + .05) + return computer.computeCLs(LHdata, expected=expected) except Exception as err: logging.getLogger('MA5').debug("slhCLs : " + str(err)) return 0.0 diff --git a/madanalysis/misc/simplified_likelihood.py b/madanalysis/misc/simplified_likelihood.py index 5a7d7413..4e5998ee 100644 --- a/madanalysis/misc/simplified_likelihood.py +++ b/madanalysis/misc/simplified_likelihood.py @@ -49,30 +49,26 @@ logger = logging.getLogger("MA5") # rootFromNLLs, determineBrentBracket are retreived from smodels.tools.statistics -def rootFromNLLs(nllA, nll0A, nll, nll0): +def rootFromNLLs(nllA, nll0A, nll, nll0, get_cls=False): """compute the CLs - alpha from the NLLs""" - qmu = 2 * (nll - nll0) - if qmu < 0.0: - qmu = 0.0 + qmu = 0.0 if 2 * (nll - nll0) < 0.0 else 2 * (nll - nll0) sqmu = np.sqrt(qmu) qA = 2 * (nllA - nll0A) if qA < 0.0: qA = 0.0 sqA = np.sqrt(qA) - CLsb = 1.0 - stats.multivariate_normal.cdf(sqmu) - CLb = 0.0 if qA >= qmu: + CLsb = 1.0 - stats.multivariate_normal.cdf(sqmu) CLb = stats.multivariate_normal.cdf(sqA - sqmu) else: - if qA == 0.0: - CLsb = 1.0 - CLb = 1.0 - else: - CLsb = 1.0 - stats.multivariate_normal.cdf((qmu + qA) / (2 * sqA)) - CLb = 1.0 - stats.multivariate_normal.cdf((qmu - qA) / (2 * sqA)) - CLs = 0.0 - if CLb > 0.0: - CLs = CLsb / CLb + CLsb = 1.0 if qA == 0.0 else 1.0 - stats.multivariate_normal.cdf((qmu + qA) / (2 * sqA)) + CLb = 1.0 if qA == 0.0 else 1.0 - stats.multivariate_normal.cdf((qmu - qA) / (2 * sqA)) + + CLs = CLsb / CLb if CLb > 0 else 0. + + if get_cls: + return 1. - CLs + cl = 0.95 root = CLs - 1.0 + cl return root @@ -1097,7 +1093,7 @@ def ulOnSigmaTimesEff( return ul / model.lumi def _ul_preprocess( - self, model, marginalize=False, toys=None, expected=False, trylasttime=False + self, model, marginalize=False, toys=None, expected=False, trylasttime=False, signal_type="signal_rel" ): """ Process the upper limit calculator @@ -1123,7 +1119,7 @@ def _ul_preprocess( if expected == "posteriori": # print ( "here!" ) tempc = LikelihoodComputer(oldmodel, toys) - theta_hat_, _ = tempc.findThetaHat(0 * oldmodel.signal_rel) + theta_hat_, _ = tempc.findThetaHat(0 * getattr(oldmodel, signal_type)) # model.observed = model.backgrounds for i, d in enumerate(model.backgrounds): # model.observed[i]=int(np.ceil(d)) @@ -1133,10 +1129,10 @@ def _ul_preprocess( model.observed[i] = float(d) computer = LikelihoodComputer(model, toys) mu_hat = computer.findMuHat( - model.signal_rel, allowNegativeSignals=False, extended_output=False + getattr(model, signal_type), allowNegativeSignals=False, extended_output=False ) - theta_hat0, _ = computer.findThetaHat(0 * model.signal_rel) - sigma_mu = computer.getSigmaMu(mu_hat, model.signal_rel, theta_hat0) + theta_hat0, _ = computer.findThetaHat(0 * getattr(model, signal_type)) + sigma_mu = computer.getSigmaMu(mu_hat, getattr(model, signal_type), theta_hat0) nll0 = computer.likelihood(model.signals(mu_hat), marginalize=marginalize, nll=True) # print ( f"SL nll0 {nll0:.3f} muhat {mu_hat:.3f} sigma_mu {sigma_mu:.3f}" ) @@ -1157,18 +1153,18 @@ def _ul_preprocess( # print ( f"SL finding mu hat with {aModel.signal_rel}: mu_hatA, obs: {aModel.observed}" ) compA = LikelihoodComputer(aModel, toys) ## compute - mu_hatA = compA.findMuHat(aModel.signal_rel) + mu_hatA = compA.findMuHat(getattr(aModel, signal_type)) nll0A = compA.likelihood(aModel.signals(mu_hatA), marginalize=marginalize, nll=True) # print ( f"SL nll0A {nll0A:.3f} mu_hatA {mu_hatA:.3f} bg {aModel.backgrounds[0]:.3f} obs {aModel.observed[0]:.3f}" ) # return 1. - def root_func(mu): + def root_func(mu, get_cls=False): ## the function to find the zero of (ie CLs - alpha) nsig = model.signals(mu) computer.ntot = model.backgrounds + nsig nll = computer.likelihood(nsig, marginalize=marginalize, nll=True) nllA = compA.likelihood(nsig, marginalize=marginalize, nll=True) - return rootFromNLLs(nllA, nll0A, nll, nll0) + return rootFromNLLs(nllA, nll0A, nll, nll0, get_cls=get_cls) return mu_hat, sigma_mu, root_func @@ -1207,10 +1203,12 @@ def computeCLs(self, model, marginalize=False, toys=None, expected=False, trylas :param trylasttime: if True, then dont try extra :return: 1 - CLs value """ - _, _, root_func = self._ul_preprocess(model, marginalize, toys, expected, trylasttime) + _, _, root_func = self._ul_preprocess( + model, marginalize, toys, expected, trylasttime, signal_type="nsignal" + ) # 1-(CLs+alpha) -> alpha = 0.05 - return 0.95 - root_func(1.0) + return root_func(1.0, get_cls=True) if __name__ == "__main__": From 1fed7719a141365bad2828d84b51c9c777341a4b Mon Sep 17 00:00:00 2001 From: jackaraz Date: Thu, 12 May 2022 19:13:14 +0200 Subject: [PATCH 11/34] fix CLs calculation issue --- madanalysis/misc/simplified_likelihood.py | 64 +++++++++++++++++++---- 1 file changed, 53 insertions(+), 11 deletions(-) diff --git a/madanalysis/misc/simplified_likelihood.py b/madanalysis/misc/simplified_likelihood.py index 4e5998ee..03272064 100644 --- a/madanalysis/misc/simplified_likelihood.py +++ b/madanalysis/misc/simplified_likelihood.py @@ -38,6 +38,7 @@ from scipy import stats, optimize, integrate, special, linalg from numpy import sqrt, exp, log, sign, array, ndarray from functools import reduce +from typing import Text, Optional, Union, Tuple # from smodels.tools.statistics import rootFromNLLs, determineBrentBracket @@ -64,10 +65,10 @@ def rootFromNLLs(nllA, nll0A, nll, nll0, get_cls=False): CLsb = 1.0 if qA == 0.0 else 1.0 - stats.multivariate_normal.cdf((qmu + qA) / (2 * sqA)) CLb = 1.0 if qA == 0.0 else 1.0 - stats.multivariate_normal.cdf((qmu - qA) / (2 * sqA)) - CLs = CLsb / CLb if CLb > 0 else 0. + CLs = CLsb / CLb if CLb > 0 else 0.0 if get_cls: - return 1. - CLs + return 1.0 - CLs cl = 0.95 root = CLs - 1.0 + cl @@ -349,6 +350,16 @@ def signals(self, mu): return mu * self.signal_rel + def nsignals(self, mu): + """ + Returns the number of expected signal events, for all datasets, + given total signal strength mu. + + :param mu: Total number of signal events summed over all datasets. + """ + + return mu * self.nsignal + class LikelihoodComputer: @@ -1093,8 +1104,14 @@ def ulOnSigmaTimesEff( return ul / model.lumi def _ul_preprocess( - self, model, marginalize=False, toys=None, expected=False, trylasttime=False, signal_type="signal_rel" - ): + self, + model: Data, + marginalize: Optional[bool] = False, + toys: Optional[float] = None, + expected: Optional[Union[bool, Text]] = False, + trylasttime: Optional[bool] = False, + signal_type: Optional[Text] = "signal_rel", + ) -> Tuple: """ Process the upper limit calculator :param model: statistical model @@ -1104,8 +1121,16 @@ def _ul_preprocess( true: compute a priori expected, "posteriori": compute a posteriori expected :param trylasttime: if True, then dont try extra + :param signal_type: signal_type will allow both SModelS and MadAnalysis interface + to use this function simultaneously. For signal_rel upper limit + is calculated for normalised signal events for nsignals upper limit + is calculated for number of signal events. :return: mu_hat, sigma_mu, root_func """ + assert signal_type in [ + "signal_rel", + "nsignal", + ], f"Signal type can only be `signal_rel` or `nsignal`. `{signal_type}` is given" # if expected: # marginalize = True if model.zeroSignal(): @@ -1134,14 +1159,22 @@ def _ul_preprocess( theta_hat0, _ = computer.findThetaHat(0 * getattr(model, signal_type)) sigma_mu = computer.getSigmaMu(mu_hat, getattr(model, signal_type), theta_hat0) - nll0 = computer.likelihood(model.signals(mu_hat), marginalize=marginalize, nll=True) + nll0 = computer.likelihood( + getattr(model, "signals" if signal_type == "signal_rel" else "nsignals")(mu_hat), + marginalize=marginalize, + nll=True, + ) # print ( f"SL nll0 {nll0:.3f} muhat {mu_hat:.3f} sigma_mu {sigma_mu:.3f}" ) if np.isinf(nll0) and marginalize == False and not trylasttime: logger.warning( "nll is infinite in profiling! we switch to marginalization, but only for this one!" ) marginalize = True - nll0 = computer.likelihood(model.signals(mu_hat), marginalize=True, nll=True) + nll0 = computer.likelihood( + getattr(model, "signals" if signal_type == "signal_rel" else "nsignals")(mu_hat), + marginalize=True, + nll=True, + ) if np.isinf(nll0): logger.warning("marginalization didnt help either. switch back.") marginalize = False @@ -1154,13 +1187,22 @@ def _ul_preprocess( compA = LikelihoodComputer(aModel, toys) ## compute mu_hatA = compA.findMuHat(getattr(aModel, signal_type)) - nll0A = compA.likelihood(aModel.signals(mu_hatA), marginalize=marginalize, nll=True) + nll0A = compA.likelihood( + getattr(aModel, "signals" if signal_type == "signal_rel" else "nsignals")(mu_hatA), + marginalize=marginalize, + nll=True, + ) # print ( f"SL nll0A {nll0A:.3f} mu_hatA {mu_hatA:.3f} bg {aModel.backgrounds[0]:.3f} obs {aModel.observed[0]:.3f}" ) # return 1. - def root_func(mu, get_cls=False): + def root_func(mu: float, get_cls: bool = False) -> float: + """ + Calculate the root + :param mu: float POI + :param get_cls: if true returns 1-CLs value + """ ## the function to find the zero of (ie CLs - alpha) - nsig = model.signals(mu) + nsig = getattr(model, "signals" if signal_type == "signal_rel" else "nsignals")(mu) computer.ntot = model.backgrounds + nsig nll = computer.likelihood(nsig, marginalize=marginalize, nll=True) nllA = compA.likelihood(nsig, marginalize=marginalize, nll=True) @@ -1307,8 +1349,8 @@ def computeCLs(self, model, marginalize=False, toys=None, expected=False, trylas """ results: old ul= 180.999844 - ul (marginalized) 180.84672924414696 - CLs (marginalized) 0.0 + ul (marginalized) 184.8081186162269 + CLs (marginalized) 1.0 ul (profiled) 180.68039063387553 CLs (profiled) 0.75 """ From 3ec8a324f421a8789011beccd304456c9657aa23 Mon Sep 17 00:00:00 2001 From: jackaraz Date: Thu, 12 May 2022 20:30:26 +0200 Subject: [PATCH 12/34] initiate pyhf interface --- madanalysis/misc/pyhf_interface.py | 233 +++++++++++++++++++++++++++++ 1 file changed, 233 insertions(+) create mode 100644 madanalysis/misc/pyhf_interface.py diff --git a/madanalysis/misc/pyhf_interface.py b/madanalysis/misc/pyhf_interface.py new file mode 100644 index 00000000..52adb9a7 --- /dev/null +++ b/madanalysis/misc/pyhf_interface.py @@ -0,0 +1,233 @@ +################################################################################ +# +# Copyright (C) 2012-2022 Jack Araz, Eric Conte & Benjamin Fuks +# The MadAnalysis development team, email: +# +# This file is part of MadAnalysis 5. +# Official website: +# +# MadAnalysis 5 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# MadAnalysis 5 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with MadAnalysis 5. If not, see +# +################################################################################ + +import logging, sys +from typing import Sequence, Dict, Union, Optional +from numpy import warnings, isnan +import numpy as np + +try: + import pyhf +except ImportError as err: + logging.getLogger("MA5").error("Can't import pyhf.") + sys.exit() + +from pyhf.optimize import mixins + +pyhf.pdf.log.setLevel(logging.CRITICAL) +pyhf.workspace.log.setLevel(logging.CRITICAL) +mixins.log.setLevel(logging.CRITICAL) +pyhf.set_backend("numpy", precision="64b") + + +class PyhfInterface: + """ + pyhf interface for MadAnalysis 5 + :param signal: json patch signal + :param background: json dict for background + """ + + def __init__( + self, + signal: Union[Sequence, float], + background: Union[Dict, float], + nb: Optional[float] = None, + delta_nb: Optional[float] = None, + ): + self.model = None + self.data = None + + self.background = background + self.signal = signal + self.nb = nb + self.delta_nb = delta_nb + + try: + with warnings.catch_warnings(): + warnings.filterwarnings("ignore") + if isinstance(signal, float) and isinstance(background, float): + # Create model from uncorrelated region + self.model = pyhf.simplemodels.uncorrelated_background( + [max(signal, 0.0)], [nb], [delta_nb] + ) + self.data = [background] + self.model.config.auxdata + else: + workspace = pyhf.Workspace(background) + self.model = workspace.model( + patches=[signal], + modifier_settings={ + "normsys": {"interpcode": "code4"}, + "histosys": {"interpcode": "code4p"}, + }, + ) + data = workspace.data(model) + except (pyhf.exceptions.InvalidSpecification, KeyError) as err: + logging.getLogger("MA5").error("Invalid JSON file!! " + str(err)) + except Exception as err: + logging.getLogger("MA5").debug("Unknown error, check PyhfInterface " + str(err)) + + def computeCLs( + self, CLs_exp: bool = True, CLs_obs: bool = True, iteration_threshold: int = 3 + ) -> Union[float, Dict]: + """ + Compute 1-CLs values + + Parameters + ---------- + CLs_exp: bool + if true return expected CLs value + CLs_obs: bool + if true return observed CLs value + iteration_threshold: int + maximum number of trials + + Returns + ------- + float or dict: + CLs values {"CLs_obs": xx, "CLs_exp": [xx] * 5} or single CLs value + """ + if self.model is None or self.data is None: + if not CLs_exp or not CLs_obs: + return -1 + else: + return {"CLs_obs": -1, "CLs_exp": [-1] * 5} + + def get_CLs(model, data, **kwargs): + try: + CLs_obs, CLs_exp = pyhf.infer.hypotest( + 1.0, + data, + model, + test_stat=kwargs.get("stats", "qtilde"), + par_bounds=kwargs.get("bounds", model.config.suggested_bounds()), + return_expected_set=True, + ) + + except (AssertionError, pyhf.exceptions.FailedMinimization, ValueError) as err: + logging.getLogger("MA5").debug(str(err)) + # dont use false here 1.-CLs = 0 can be interpreted as false + return "update bounds" + + # if isnan(float(CLs_obs)) or any([isnan(float(x)) for x in CLs_exp]): + # return "update mu" + CLs_obs = float(CLs_obs[0]) if isinstance(CLs_obs, (list, tuple)) else float(CLs_obs) + + return { + "CLs_obs": 1.0 - CLs_obs, + "CLs_exp": list(map(lambda x: float(1.0 - x), CLs_exp)), + } + + with warnings.catch_warnings(): + warnings.filterwarnings("ignore") + # pyhf can raise an error if the poi_test bounds are too stringent + # they need to be updated dynamically. + arguments = dict(bounds=self.model.config.suggested_bounds(), stats="qtilde") + it = 0 + while True: + CLs = get_CLs(self.model, self.data, **arguments) + if CLs == "update bounds": + arguments["bounds"][self.model.config.poi_index] = ( + arguments["bounds"][self.model.config.poi_index][0], + 2 * arguments["bounds"][self.model.config.poi_index][1], + ) + logging.getLogger("MA5").debug( + "Hypothesis test inference integration bounds has been increased to " + + str(arguments["bounds"][self.model.config.poi_index]) + ) + it += 1 + elif isinstance(CLs, dict): + if isnan(CLs["CLs_obs"]) or any([isnan(x) for x in CLs["CLs_exp"]]): + arguments["stats"] = "q" + arguments["bounds"][self.model.config.poi_index] = ( + arguments["bounds"][self.model.config.poi_index][0] - 5, + arguments["bounds"][self.model.config.poi_index][1], + ) + logging.getLogger("MA5").debug( + "Hypothesis test inference integration bounds has been increased to " + + str(arguments["bounds"][self.model.config.poi_index]) + ) + else: + break + else: + it += 1 + # hard limit on iteration required if it exceeds this value it means + # Nsig >>>>> Nobs + if it >= iteration_threshold: + if not CLs_exp or not CLs_obs: + return 1 + return {"CLs_obs": 1.0, "CLs_exp": [1.0] * 5} + + if CLs_exp and not CLs_obs: + return CLs["CLs_exp"][2] + elif CLs_obs and not CLs_exp: + return CLs["CLs_obs"] + + return CLs + + @staticmethod + def exponentiateNLL(twice_nll: float, doIt: bool) -> float: + """ + + Parameters + ---------- + twice_nll: float + twice negative log likelihood + doIt: bool + if doIt, then compute likelihood from nll, else return nll + """ + if twice_nll is None: + return 0.0 if doIt else 9000.0 + + return np.exp(-twice_nll / 2.0) if doIt else twice_nll / 2.0 + + def likelihood(self, mu: float = 1.0, nll: bool = False, expected: bool = False) -> float: + """ + Returns the value of the likelihood. + Inspired by the `pyhf.infer.mle` module but for non-log likelihood + + Parameters + ---------- + mu: float + POI: signal strength + nll: bool + if true, return nll, not llhd + expected: bool + if False, compute expected values, if True, + compute a priori expected, if "posteriori" compute posteriori + expected + """ + if self.model is None or self.data is None: + return -1 + # set a threshold for mu + mu = max(mu, -20.0) + mu = min(mu, 40.0) + + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "Values in x were outside bounds during a minimize step, clipping to bounds", + ) + _, twice_nllh = pyhf.infer.mle.fixed_poi_fit( + mu, self.data, self.model, return_fitted_val=True, maxiter=200 + ) + return np.exp(-twice_nllh / 2.0) if not nll else twice_nllh / 2.0 From 967bf530102d297393e7b0993485267ec6f687f4 Mon Sep 17 00:00:00 2001 From: jackaraz Date: Fri, 13 May 2022 09:29:49 +0200 Subject: [PATCH 13/34] finalize pyhf_interface.py --- madanalysis/misc/pyhf_interface.py | 173 ++++++++++++++++++++++++----- 1 file changed, 147 insertions(+), 26 deletions(-) diff --git a/madanalysis/misc/pyhf_interface.py b/madanalysis/misc/pyhf_interface.py index 52adb9a7..d841dcc0 100644 --- a/madanalysis/misc/pyhf_interface.py +++ b/madanalysis/misc/pyhf_interface.py @@ -25,6 +25,7 @@ from typing import Sequence, Dict, Union, Optional from numpy import warnings, isnan import numpy as np +from histfactory_reader import HF_Background, HF_Signal try: import pyhf @@ -49,8 +50,8 @@ class PyhfInterface: def __init__( self, - signal: Union[Sequence, float], - background: Union[Dict, float], + signal: Union[HF_Signal, float], + background: Union[HF_Background, float], nb: Optional[float] = None, delta_nb: Optional[float] = None, ): @@ -62,18 +63,55 @@ def __init__( self.nb = nb self.delta_nb = delta_nb + @staticmethod + def _initialize_workspace( + signal: Union[Sequence, float], + background: Union[Dict, float], + nb: Optional[float] = None, + delta_nb: Optional[float] = None, + expected: Optional[bool] = False, + ): + """ + Initialize pyhf workspace + + Parameters + ---------- + signal: Union[Sequence, float] + number of signal events or json patch + background: Union[Dict, float] + number of observed events or json dictionary + nb: Optional[float] + number of expected background events (MC) + delta_nb: Optional[float] + uncertainty on expected background events + expected: bool + if true prepare apriori expected workspace, default False + + Returns + ------- + Tuple: + Workspace(can be none in simple case), model, data + """ + try: with warnings.catch_warnings(): warnings.filterwarnings("ignore") if isinstance(signal, float) and isinstance(background, float): + if expected: + background = nb # Create model from uncorrelated region - self.model = pyhf.simplemodels.uncorrelated_background( + model = pyhf.simplemodels.uncorrelated_background( [max(signal, 0.0)], [nb], [delta_nb] ) - self.data = [background] + self.model.config.auxdata - else: - workspace = pyhf.Workspace(background) - self.model = workspace.model( + data = [background] + model.config.auxdata + + return None, model, data + elif isinstance(signal, HF_Signal) and isinstance(background, HF_Background): + HF = background() + if expected: + HF = background.get_expected() + workspace = pyhf.Workspace(HF) + model = workspace.model( patches=[signal], modifier_settings={ "normsys": {"interpcode": "code4"}, @@ -81,11 +119,15 @@ def __init__( }, ) data = workspace.data(model) + + return workspace, model, data except (pyhf.exceptions.InvalidSpecification, KeyError) as err: logging.getLogger("MA5").error("Invalid JSON file!! " + str(err)) except Exception as err: logging.getLogger("MA5").debug("Unknown error, check PyhfInterface " + str(err)) + return None, None, None + def computeCLs( self, CLs_exp: bool = True, CLs_obs: bool = True, iteration_threshold: int = 3 ) -> Union[float, Dict]: @@ -106,6 +148,10 @@ def computeCLs( float or dict: CLs values {"CLs_obs": xx, "CLs_exp": [xx] * 5} or single CLs value """ + _, self.model, self.data = self._initialize_workspace( + self.signal, self.background, self.nb, self.delta_nb + ) + if self.model is None or self.data is None: if not CLs_exp or not CLs_obs: return -1 @@ -184,22 +230,6 @@ def get_CLs(model, data, **kwargs): return CLs - @staticmethod - def exponentiateNLL(twice_nll: float, doIt: bool) -> float: - """ - - Parameters - ---------- - twice_nll: float - twice negative log likelihood - doIt: bool - if doIt, then compute likelihood from nll, else return nll - """ - if twice_nll is None: - return 0.0 if doIt else 9000.0 - - return np.exp(-twice_nll / 2.0) if doIt else twice_nll / 2.0 - def likelihood(self, mu: float = 1.0, nll: bool = False, expected: bool = False) -> float: """ Returns the value of the likelihood. @@ -212,10 +242,12 @@ def likelihood(self, mu: float = 1.0, nll: bool = False, expected: bool = False) nll: bool if true, return nll, not llhd expected: bool - if False, compute expected values, if True, - compute a priori expected, if "posteriori" compute posteriori - expected + if true, compute expected likelihood, else observed. """ + _, self.model, self.data = self._initialize_workspace( + self.signal, self.background, self.nb, self.delta_nb, expected + ) + if self.model is None or self.data is None: return -1 # set a threshold for mu @@ -231,3 +263,92 @@ def likelihood(self, mu: float = 1.0, nll: bool = False, expected: bool = False) mu, self.data, self.model, return_fitted_val=True, maxiter=200 ) return np.exp(-twice_nllh / 2.0) if not nll else twice_nllh / 2.0 + + def sigma_mu(self, expected: bool = False): + """ + get an estimate for the standard deviation of mu around mu_hat this is only used for + initialisation of the optimizer, so can be skipped in the first version, + + expected: + get muhat for the expected likelihood, not observed. + """ + workspace, self.model, self.data = self._initialize_workspace( + self.signal, self.background, self.nb, self.delta_nb, expected + ) + + if workspace is not None: + obss, bgs, bgVars, nsig = {}, {}, {}, {} + channels = workspace.channels + for chdata in workspace["channels"]: + if not chdata["name"] in channels: + continue + bg, var = 0.0, 0.0 + for sample in chdata["samples"]: + if sample["name"] == "Bkg": + tbg = sample["data"][0] + bg += tbg + hi = sample["modifiers"][0]["data"]["hi_data"][0] + lo = sample["modifiers"][0]["data"]["lo_data"][0] + delta = max((hi - bg, bg - lo)) + var += delta**2 + if sample["name"] == "bsm": + ns = sample["data"][0] + nsig[chdata["name"]] = ns + bgs[chdata["name"]] = bg + bgVars[chdata["name"]] = var + for chdata in workspace["observations"]: + if not chdata["name"] in channels: + continue + obss[chdata["name"]] = chdata["data"][0] + vars = [] + for c in channels: + # poissonian error + if nsig[c] == 0.0: + nsig[c] = 1e-5 + poiss = (obss[c] - bgs[c]) / nsig[c] + gauss = bgVars[c] / nsig[c] ** 2 + vars.append(poiss + gauss) + var_mu = np.sum(vars) + n = len(obss) + return float(np.sqrt(var_mu / (n**2))) + + else: + return ( + float(np.sqrt(self.delta_nb**2 + self.nb) / self.signal) + if self.signal > 0.0 + else 0.0 + ) + + def muhat(self, expected: bool = False, allowNegativeSignals:bool = True): + """ + get the value of mu for which the likelihood is maximal. this is only used for + initialisation of the optimizer, so can be skipped in the first version of the code. + + Parameters + ---------- + expected: bool + get muhat for the expected likelihood, not observed. + allowNegativeSignals: bool + if true, allow negative values for mu + """ + workspace, self.model, self.data = self._initialize_workspace( + self.signal, self.background, self.nb, self.delta_nb, expected + ) + + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "Values in x were outside bounds during a minimize step, clipping to bounds", + ) + + try: + bounds = self.model.config.suggested_bounds() + if allowNegativeSignals: + bounds[self.model.config.poi_index] = (-5., 10. ) + muhat, maxNllh = pyhf.infer.mle.fit( + self.data, self.model, return_fitted_val=True, par_bounds = bounds + ) + return muhat[self.model.config.poi_index] + except (pyhf.exceptions.FailedMinimization, ValueError) as e: + logging.getLogger("MA5").error(f"pyhf mle.fit failed {e}") + return float("nan") From 106e481e200f7e38fe7e6b4e19a87befe54c4269 Mon Sep 17 00:00:00 2001 From: jackaraz Date: Fri, 13 May 2022 10:18:40 +0200 Subject: [PATCH 14/34] update run_recast.py with new pyhf interface --- madanalysis/misc/pyhf_interface.py | 26 +++---- madanalysis/misc/run_recast.py | 121 +++++++++++++++++------------ 2 files changed, 82 insertions(+), 65 deletions(-) diff --git a/madanalysis/misc/pyhf_interface.py b/madanalysis/misc/pyhf_interface.py index d841dcc0..4f03ed31 100644 --- a/madanalysis/misc/pyhf_interface.py +++ b/madanalysis/misc/pyhf_interface.py @@ -128,9 +128,7 @@ def _initialize_workspace( return None, None, None - def computeCLs( - self, CLs_exp: bool = True, CLs_obs: bool = True, iteration_threshold: int = 3 - ) -> Union[float, Dict]: + def computeCLs(self, iteration_threshold: int = 3, **kwargs) -> Union[float, Dict]: """ Compute 1-CLs values @@ -153,19 +151,19 @@ def computeCLs( ) if self.model is None or self.data is None: - if not CLs_exp or not CLs_obs: + if "CLs_exp" in kwargs.keys() or "CLs_obs" in kwargs.keys(): return -1 else: return {"CLs_obs": -1, "CLs_exp": [-1] * 5} - def get_CLs(model, data, **kwargs): + def get_CLs(model, data, **keywordargs): try: CLs_obs, CLs_exp = pyhf.infer.hypotest( 1.0, data, model, - test_stat=kwargs.get("stats", "qtilde"), - par_bounds=kwargs.get("bounds", model.config.suggested_bounds()), + test_stat=keywordargs.get("stats", "qtilde"), + par_bounds=keywordargs.get("bounds", model.config.suggested_bounds()), return_expected_set=True, ) @@ -219,13 +217,13 @@ def get_CLs(model, data, **kwargs): # hard limit on iteration required if it exceeds this value it means # Nsig >>>>> Nobs if it >= iteration_threshold: - if not CLs_exp or not CLs_obs: + if "CLs_exp" in kwargs.keys() or "CLs_obs" in kwargs.keys(): return 1 return {"CLs_obs": 1.0, "CLs_exp": [1.0] * 5} - if CLs_exp and not CLs_obs: + if kwargs.get("CLs_exp", False): return CLs["CLs_exp"][2] - elif CLs_obs and not CLs_exp: + elif kwargs.get("CLs_obs", False): return CLs["CLs_obs"] return CLs @@ -319,7 +317,7 @@ def sigma_mu(self, expected: bool = False): else 0.0 ) - def muhat(self, expected: bool = False, allowNegativeSignals:bool = True): + def muhat(self, expected: bool = False, allowNegativeSignals: bool = True): """ get the value of mu for which the likelihood is maximal. this is only used for initialisation of the optimizer, so can be skipped in the first version of the code. @@ -332,7 +330,7 @@ def muhat(self, expected: bool = False, allowNegativeSignals:bool = True): if true, allow negative values for mu """ workspace, self.model, self.data = self._initialize_workspace( - self.signal, self.background, self.nb, self.delta_nb, expected + self.signal, self.background, self.nb, self.delta_nb, expected ) with warnings.catch_warnings(): @@ -344,9 +342,9 @@ def muhat(self, expected: bool = False, allowNegativeSignals:bool = True): try: bounds = self.model.config.suggested_bounds() if allowNegativeSignals: - bounds[self.model.config.poi_index] = (-5., 10. ) + bounds[self.model.config.poi_index] = (-5.0, 10.0) muhat, maxNllh = pyhf.infer.mle.fit( - self.data, self.model, return_fitted_val=True, par_bounds = bounds + self.data, self.model, return_fitted_val=True, par_bounds=bounds ) return muhat[self.model.config.poi_index] except (pyhf.exceptions.FailedMinimization, ValueError) as e: diff --git a/madanalysis/misc/run_recast.py b/madanalysis/misc/run_recast.py index 1be0216c..4d8d5f1d 100644 --- a/madanalysis/misc/run_recast.py +++ b/madanalysis/misc/run_recast.py @@ -38,7 +38,7 @@ from typing import Text, Dict -class RunRecast(): +class RunRecast: def __init__(self, main, dirname): self.dirname = dirname @@ -75,6 +75,10 @@ def init(self): def SetCLsCalculator(self): + def pyhf_wrapper(nobs, nb, deltanb, nsignal, ntoys, CLs_obs = True): + from pyhf_interface import PyhfInterface + interface = PyhfInterface(nsignal, nobs, nb, deltanb) + return interface.computeCLs(CLs_obs = CLs_obs) if self.main.session_info.has_pyhf and self.main.recasting.CLs_calculator_backend == "pyhf": self.cls_calculator = pyhf_wrapper elif not self.main.session_info.has_pyhf: @@ -131,15 +135,17 @@ def edit_recasting_card(self): if self.forced or self.main.script: return self.logger.info("Would you like to edit the recasting Card ? (Y/N)") - allowed_answers=['n','no','y','yes'] - answer="" - while answer not in allowed_answers: - answer=input("Answer: ") - answer=answer.lower() - if answer=="no" or answer=="n": + allowed_answers = ['n', 'no', 'y', 'yes'] + answer = "" + while answer not in allowed_answers: + answer = input("Answer: ") + answer = answer.lower() + if answer == "no" or answer == "n": return else: - err = os.system(self.main.session_info.editor+" "+self.dirname+"/Input/recasting_card.dat") + err = os.system( + self.main.session_info.editor + " " + self.dirname + "/Input/recasting_card.dat" + ) return @@ -1376,91 +1382,93 @@ def read_cutflows(self, path, regions, regiondata): def extract_cls(self,regiondata,regions,xsection,lumi): self.logger.debug('Compute CLs...') ## computing fi a region belongs to the best expected ones, and derive the CLs in all cases - bestreg=[] + bestreg = [] rMax = -1 for reg in regions: nsignal = xsection * lumi * 1000. * regiondata[reg]["Nf"] / regiondata[reg]["N0"] - nb = regiondata[reg]["nb"] - nobs = regiondata[reg]["nobs"] + nb = regiondata[reg]["nb"] + nobs = regiondata[reg]["nobs"] deltanb = regiondata[reg]["deltanb"] - if nsignal<=0: - rSR = -1 + if nsignal <= 0: + rSR = -1 myCLs = 0 else: - n95 = float(regiondata[reg]["s95exp"]) * lumi * 1000. * regiondata[reg]["Nf"] / regiondata[reg]["N0"] - rSR = nsignal/n95 - myCLs = self.cls_calculator(nobs, nb, deltanb, nsignal, self.ntoys, CLs_obs = True) + n95 = float(regiondata[reg]["s95exp"]) * lumi * 1000. * regiondata[reg]["Nf"] / \ + regiondata[reg]["N0"] + rSR = nsignal / n95 + myCLs = self.cls_calculator(nobs, nb, deltanb, nsignal, self.ntoys, CLs_obs = True) regiondata[reg]["rSR"] = rSR regiondata[reg]["CLs"] = myCLs if rSR > rMax: - regiondata[reg]["best"]=1 + regiondata[reg]["best"] = 1 for mybr in bestreg: - regiondata[mybr]["best"]=0 + regiondata[mybr]["best"] = 0 bestreg = [reg] rMax = rSR else: - regiondata[reg]["best"]=0 + regiondata[reg]["best"] = 0 if self.cov_config != {}: minsig95, bestreg = 1e99, [] for cov_subset in self.cov_config.keys(): cov_regions = self.cov_config[cov_subset]["cov_regions"] - covariance = self.cov_config[cov_subset]["covariance" ] + covariance = self.cov_config[cov_subset]["covariance"] if all(s <= 0. for s in [regiondata[reg]["Nf"] for reg in cov_regions]): - regiondata["cov_subset"][cov_subset]["CLs"]= 0. + regiondata["cov_subset"][cov_subset]["CLs"] = 0. continue - # TODO remove self._cov_subset - self._cov_subset = cov_subset - CLs = self.slhCLs(regiondata,cov_regions,xsection,lumi,covariance, ntoys = self.ntoys) s95 = float(regiondata["cov_subset"][cov_subset]["s95exp"]) - regiondata["cov_subset"][cov_subset]["CLs"] = CLs + regiondata["cov_subset"][cov_subset]["CLs"] = self.slhCLs( + regiondata, cov_regions, xsection, lumi, covariance + ) if 0. < s95 < minsig95: regiondata['cov_subset'][cov_subset]["best"] = 1 for mybr in bestreg: - regiondata['cov_subset'][mybr]["best"]=0 + regiondata['cov_subset'][mybr]["best"] = 0 bestreg = [cov_subset] minsig95 = s95 else: - regiondata['cov_subset'][cov_subset]["best"]=0 + regiondata['cov_subset'][cov_subset]["best"] = 0 #initialize pyhf for cls calculation - iterator = [] if self.pyhf_config=={} else copy.deepcopy(list(self.pyhf_config.items())) + iterator = [] if self.pyhf_config == {} else copy.deepcopy(list(self.pyhf_config.items())) minsig95, bestreg = 1e99, [] for n, (likelihood_profile, config) in enumerate(iterator): - self.logger.debug(' * Running CLs for '+likelihood_profile) + self.logger.debug(' * Running CLs for ' + likelihood_profile) # safety check, just in case - if regiondata.get('pyhf',{}).get(likelihood_profile, False) == False: + if regiondata.get('pyhf', {}).get(likelihood_profile, False) == False: continue background = HF_Background(config) - self.logger.debug('current pyhf Configuration = '+str(config)) - signal = HF_Signal(config,regiondata,xsection=xsection) + self.logger.debug('current pyhf Configuration = ' + str(config)) + signal = HF_Signal(config, regiondata, xsection = xsection) is_not_extrapolated = signal.lumi == lumi - CLs = -1 + CLs = -1 if signal.isAlive(): sig_HF = signal(lumi) bkg_HF = background(lumi) if self.main.developer_mode: setattr(self, "hf_sig_test", sig_HF) setattr(self, "hf_bkg_test", bkg_HF) - CLs = pyhf_wrapper(bkg_HF, sig_HF) + from pyhf_interface import PyhfInterface + interface = PyhfInterface(sig_HF, bkg_HF) + CLs = interface.computeCLs() # Take observed if default lumi used, use expected if extrapolated CLs_out = CLs['CLs_obs'] if is_not_extrapolated else CLs['CLs_exp'][2] regiondata['pyhf'][likelihood_profile]['full_CLs_output'] = CLs if CLs_out >= 0.: - regiondata['pyhf'][likelihood_profile]['CLs'] = CLs_out + regiondata['pyhf'][likelihood_profile]['CLs'] = CLs_out s95 = float(regiondata['pyhf'][likelihood_profile]['s95exp']) if 0. < s95 < minsig95: regiondata['pyhf'][likelihood_profile]["best"] = 1 for mybr in bestreg: - regiondata['pyhf'][mybr]["best"]=0 + regiondata['pyhf'][mybr]["best"] = 0 bestreg = [likelihood_profile] minsig95 = s95 else: - regiondata['pyhf'][likelihood_profile]["best"]=0 + regiondata['pyhf'][likelihood_profile]["best"] = 0 return regiondata - def slhCLs(self, regiondata, cov_regions, xsection, lumi, covariance, expected=False, ntoys = 10000): + def slhCLs(self, regiondata, cov_regions, xsection, lumi, covariance, expected = False): """ (slh for simplified likelihood) Compute a global CLs combining the different region yields by using a simplified likelihood method (see CMS-NOTE-2017-001 for more information). It relies on the @@ -1475,13 +1483,17 @@ def slhCLs(self, regiondata, cov_regions, xsection, lumi, covariance, expected=F # data from madanalysis.misc.simplified_likelihood import Data, UpperLimitComputer LHdata = Data( - observed=observed, backgrounds=backgrounds, covariance=covariance, nsignal=nsignal, deltas_rel=0.2, lumi=lumi + observed = observed, + backgrounds = backgrounds, + covariance = covariance, + nsignal = nsignal, + deltas_rel = 0.0, + lumi = lumi, ) - computer = UpperLimitComputer(ntoys=self.ntoys, cl=.95) - regiondata["cov_subset"][self._cov_subset]["mu"] = float(computer.ulOnYields(LHdata, expected=expected)) + computer = UpperLimitComputer(ntoys = self.ntoys, cl = .95) # calculation and output try: - return computer.computeCLs(LHdata, expected=expected) + return computer.computeCLs(LHdata, expected = expected) except Exception as err: logging.getLogger('MA5').debug("slhCLs : " + str(err)) return 0.0 @@ -1552,26 +1564,31 @@ def extract_sig_lhcls(self, regiondata: Dict, lumi: float, tag: Text) -> dict: def get_s95(regs, matrix): def sig95(xsection): - return self.slhCLs(regiondata,regs,xsection,lumi,matrix,(tag=="exp"), ntoys = self.ntoys)-0.95 + return self.slhCLs( + regiondata, regs, xsection, lumi, matrix, (tag == "exp"), ntoys = self.ntoys + ) - 0.95 + return sig95 for cov_subset in self.cov_config.keys(): - # TODO remove self._cov_subset - self._cov_subset = cov_subset cov_regions = self.cov_config[cov_subset]["cov_regions"] - covariance = self.cov_config[cov_subset]["covariance" ] + covariance = self.cov_config[cov_subset]["covariance"] if cov_subset not in regiondata["cov_subset"].keys(): regiondata["cov_subset"][cov_subset] = {} if all(s <= 0. for s in [regiondata[reg]["Nf"] for reg in cov_regions]): - regiondata["cov_subset"][cov_subset][f"s95{tag}"]= "-1" + regiondata["cov_subset"][cov_subset][f"s95{tag}"] = "-1" continue low, hig = 1., 1. - while self.slhCLs(regiondata, cov_regions, low, lumi, covariance, (tag == "exp"), ntoys=self.ntoys) > 0.95: + while self.slhCLs( + regiondata, cov_regions, low, lumi, covariance, (tag == "exp"), + ) > 0.95: self.logger.debug('lower bound = ' + str(low)) low *= 0.1 if low < 1e-10: break - while self.slhCLs(regiondata, cov_regions, hig, lumi, covariance, (tag == "exp"), ntoys=self.ntoys) < 0.95: + while self.slhCLs( + regiondata, cov_regions, hig, lumi, covariance, (tag == "exp"), + ) < 0.95: self.logger.debug('upper bound = ' + str(hig)) hig *= 10. if hig > 1e10: break @@ -1598,8 +1615,10 @@ def pyhf_sig95Wrapper(self, lumi, regiondata, tag): if 'pyhf' not in list(regiondata.keys()): regiondata['pyhf'] = {} - def get_pyhf_result(*args): - rslt = pyhf_wrapper(*args) + def get_pyhf_result(background, signal): + from pyhf_interface import PyhfInterface + interface = PyhfInterface(signal, background) + rslt = interface.computeCLs() if tag == "exp" and not self.is_apriori: return rslt["CLs_exp"][2] return rslt['CLs_obs'] From b5727e60d57b64b111d0cace0466fcb3565485e2 Mon Sep 17 00:00:00 2001 From: jackaraz Date: Fri, 13 May 2022 10:54:46 +0200 Subject: [PATCH 15/34] fix relative import --- madanalysis/misc/run_recast.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/madanalysis/misc/run_recast.py b/madanalysis/misc/run_recast.py index 4d8d5f1d..1cbd6015 100644 --- a/madanalysis/misc/run_recast.py +++ b/madanalysis/misc/run_recast.py @@ -76,7 +76,7 @@ def init(self): def SetCLsCalculator(self): def pyhf_wrapper(nobs, nb, deltanb, nsignal, ntoys, CLs_obs = True): - from pyhf_interface import PyhfInterface + from madanalysis.misc.pyhf_interface import PyhfInterface interface = PyhfInterface(nsignal, nobs, nb, deltanb) return interface.computeCLs(CLs_obs = CLs_obs) if self.main.session_info.has_pyhf and self.main.recasting.CLs_calculator_backend == "pyhf": @@ -1448,7 +1448,7 @@ def extract_cls(self,regiondata,regions,xsection,lumi): if self.main.developer_mode: setattr(self, "hf_sig_test", sig_HF) setattr(self, "hf_bkg_test", bkg_HF) - from pyhf_interface import PyhfInterface + from madanalysis.misc.pyhf_interface import PyhfInterface interface = PyhfInterface(sig_HF, bkg_HF) CLs = interface.computeCLs() # Take observed if default lumi used, use expected if extrapolated @@ -1616,7 +1616,7 @@ def pyhf_sig95Wrapper(self, lumi, regiondata, tag): regiondata['pyhf'] = {} def get_pyhf_result(background, signal): - from pyhf_interface import PyhfInterface + from madanalysis.misc.pyhf_interface import PyhfInterface interface = PyhfInterface(signal, background) rslt = interface.computeCLs() if tag == "exp" and not self.is_apriori: From 103bd3d91e31ba99c8cfaf04867e0cd8be7fad10 Mon Sep 17 00:00:00 2001 From: jackaraz Date: Fri, 13 May 2022 10:56:07 +0200 Subject: [PATCH 16/34] fix relative import --- madanalysis/misc/pyhf_interface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/madanalysis/misc/pyhf_interface.py b/madanalysis/misc/pyhf_interface.py index 4f03ed31..1ec71044 100644 --- a/madanalysis/misc/pyhf_interface.py +++ b/madanalysis/misc/pyhf_interface.py @@ -25,7 +25,7 @@ from typing import Sequence, Dict, Union, Optional from numpy import warnings, isnan import numpy as np -from histfactory_reader import HF_Background, HF_Signal +from madanalysis.misc.histfactory_reader import HF_Background, HF_Signal try: import pyhf From 6832e177104333dcbe3b9266ecd311d126b57907 Mon Sep 17 00:00:00 2001 From: jackaraz Date: Fri, 13 May 2022 16:02:09 +0200 Subject: [PATCH 17/34] match simplified_likelihood.py --- madanalysis/misc/simplified_likelihood.py | 81 +++++++++++++++-------- 1 file changed, 52 insertions(+), 29 deletions(-) diff --git a/madanalysis/misc/simplified_likelihood.py b/madanalysis/misc/simplified_likelihood.py index 03272064..1ecbd871 100644 --- a/madanalysis/misc/simplified_likelihood.py +++ b/madanalysis/misc/simplified_likelihood.py @@ -49,9 +49,21 @@ logger = logging.getLogger("MA5") -# rootFromNLLs, determineBrentBracket are retreived from smodels.tools.statistics -def rootFromNLLs(nllA, nll0A, nll, nll0, get_cls=False): - """compute the CLs - alpha from the NLLs""" +# CLsfromNLL, determineBrentBracket are retreived from smodels.tools.statistics +def CLsfromNLL( + nllA: float, nll0A: float, nll: float, nll0: float, return_type: Text = "CLs-alpha" +) -> float: + """ + compute the CLs - alpha from the NLLs + TODO: following needs explanation + :param nllA: + :param nll0A: + :param nll: + :param nll0: + :param return_type: (Text) "CLs-alpha" or "1-CLs" + :return: + """ + assert return_type in ["CLs-alpha", "1-CLs"], f"Unknown return type: {return_type}." qmu = 0.0 if 2 * (nll - nll0) < 0.0 else 2 * (nll - nll0) sqmu = np.sqrt(qmu) qA = 2 * (nllA - nll0A) @@ -67,7 +79,7 @@ def rootFromNLLs(nllA, nll0A, nll, nll0, get_cls=False): CLs = CLsb / CLb if CLb > 0 else 0.0 - if get_cls: + if return_type == "1-CLs": return 1.0 - CLs cl = 0.95 @@ -1067,7 +1079,7 @@ def chi2(self, nsig, marginalize=False): class UpperLimitComputer: debug_mode = False - def __init__(self, ntoys=30000, cl=0.95): + def __init__(self, ntoys: float = 30000, cl: float = 0.95): """ :param ntoys: number of toys when marginalizing @@ -1076,12 +1088,12 @@ def __init__(self, ntoys=30000, cl=0.95): self.toys = ntoys self.cl = cl - def ulOnSigmaTimesEff( + def getUpperLimitOnSigmaTimesEff( self, model, marginalize=False, toys=None, expected=False, trylasttime=False ): """upper limit on the fiducial cross section sigma times efficiency, - obtained from the defined - Data (using the signal prediction + summed over all signal regions, i.e. sum_i xsec^prod_i eff_i + obtained from the defined Data (using the signal prediction for each signal regio/dataset), by using the q_mu test statistic from the CCGV paper (arXiv:1007.1727). @@ -1093,9 +1105,10 @@ def ulOnSigmaTimesEff( :params trylasttime: if True, then dont try extra :returns: upper limit on fiducial cross section """ - ul = self.ulOnYields( + ul = self.getUpperLimitOnMu( model, marginalize=marginalize, toys=toys, expected=expected, trylasttime=trylasttime ) + if ul == None: return ul if model.lumi is None: @@ -1130,7 +1143,7 @@ def _ul_preprocess( assert signal_type in [ "signal_rel", "nsignal", - ], f"Signal type can only be `signal_rel` or `nsignal`. `{signal_type}` is given" + ], f"Signal type can only be `signal_rel` or `nsignal`. `{signal_type}` is given." # if expected: # marginalize = True if model.zeroSignal(): @@ -1165,7 +1178,7 @@ def _ul_preprocess( nll=True, ) # print ( f"SL nll0 {nll0:.3f} muhat {mu_hat:.3f} sigma_mu {sigma_mu:.3f}" ) - if np.isinf(nll0) and marginalize == False and not trylasttime: + if np.isinf(nll0) and not marginalize and not trylasttime: logger.warning( "nll is infinite in profiling! we switch to marginalization, but only for this one!" ) @@ -1195,7 +1208,7 @@ def _ul_preprocess( # print ( f"SL nll0A {nll0A:.3f} mu_hatA {mu_hatA:.3f} bg {aModel.backgrounds[0]:.3f} obs {aModel.observed[0]:.3f}" ) # return 1. - def root_func(mu: float, get_cls: bool = False) -> float: + def clsRoot(mu: float, return_type: Text = "CLs-alpha") -> float: """ Calculate the root :param mu: float POI @@ -1206,13 +1219,16 @@ def root_func(mu: float, get_cls: bool = False) -> float: computer.ntot = model.backgrounds + nsig nll = computer.likelihood(nsig, marginalize=marginalize, nll=True) nllA = compA.likelihood(nsig, marginalize=marginalize, nll=True) - return rootFromNLLs(nllA, nll0A, nll, nll0, get_cls=get_cls) + return CLsfromNLL(nllA, nll0A, nll, nll0, return_type=return_type) - return mu_hat, sigma_mu, root_func + return mu_hat, sigma_mu, clsRoot + + def getUpperLimitOnMu( + self, model, marginalize=False, toys=None, expected=False, trylasttime=False + ): + """upper limit on the signal strength multiplier mu + obtained from the defined Data (using the signal prediction - def ulOnYields(self, model, marginalize=False, toys=None, expected=False, trylasttime=False): - """upper limit on signal yields obtained from the defined - Data (using the signal prediction for each signal regio/dataset), by using the q_mu test statistic from the CCGV paper (arXiv:1007.1727). @@ -1222,20 +1238,27 @@ def ulOnYields(self, model, marginalize=False, toys=None, expected=False, trylas true: compute a priori expected, "posteriori": compute a posteriori expected :params trylasttime: if True, then dont try extra - :returns: upper limit on yields + :returns: upper limit on the signal strength multiplier mu """ - mu_hat, sigma_mu, root_func = self._ul_preprocess( + mu_hat, sigma_mu, clsRoot = self._ul_preprocess( model, marginalize, toys, expected, trylasttime ) if mu_hat == None: return None - a, b = determineBrentBracket(mu_hat, sigma_mu, root_func) - mu_lim = optimize.brentq(root_func, a, b, rtol=1e-03, xtol=1e-06) + a, b = determineBrentBracket(mu_hat, sigma_mu, clsRoot) + mu_lim = optimize.brentq(clsRoot, a, b, rtol=1e-03, xtol=1e-06) return mu_lim - def computeCLs(self, model, marginalize=False, toys=None, expected=False, trylasttime=False): + def computeCLs( + self, + model: Data, + marginalize: bool = False, + toys: float = None, + expected: Union[bool, Text] = False, + trylasttime: bool = False, + ) -> float: """ - Compute the confidence level of the model + Compute the exclusion confidence level of the model (1-CLs) :param model: statistical model :param marginalize: if true, marginalize nuisances, else profile them :param toys: specify number of toys. Use default is none @@ -1245,12 +1268,10 @@ def computeCLs(self, model, marginalize=False, toys=None, expected=False, trylas :param trylasttime: if True, then dont try extra :return: 1 - CLs value """ - _, _, root_func = self._ul_preprocess( - model, marginalize, toys, expected, trylasttime, signal_type="nsignal" - ) + _, _, clsRoot = self._ul_preprocess(model, marginalize, toys, expected, trylasttime) # 1-(CLs+alpha) -> alpha = 0.05 - return root_func(1.0, get_cls=True) + return clsRoot(1.0, return_type="1-CLs") if __name__ == "__main__": @@ -1337,11 +1358,13 @@ def computeCLs(self, model, marginalize=False, toys=None, expected=False, trylas nsignal ) # With respect to the older refernece value one must normalize the xsec print("old ul=", ul_old) - ul = ulComp.ulOnYields(m, marginalize=True) + ul = ulComp.getUpperLimitOnMu(m, marginalize=True) + cls = ulComp.computeCLs(m, marginalize=True) print("ul (marginalized)", ul) print("CLs (marginalized)", cls) - ul = ulComp.ulOnYields(m, marginalize=False) + + ul = ulComp.getUpperLimitOnMu(m, marginalize=False) cls = ulComp.computeCLs(m, marginalize=False) print("ul (profiled)", ul) print("CLs (profiled)", cls) From 0d1460dcccb24a4a02c4fdb3d4d1818a3659d498 Mon Sep 17 00:00:00 2001 From: jackaraz Date: Fri, 13 May 2022 16:18:25 +0200 Subject: [PATCH 18/34] adapt new pyhf_interface.py: bugfixes --- madanalysis/misc/pyhf_interface.py | 16 +++++++--- madanalysis/misc/run_recast.py | 48 +++++++++++++++++------------- 2 files changed, 40 insertions(+), 24 deletions(-) diff --git a/madanalysis/misc/pyhf_interface.py b/madanalysis/misc/pyhf_interface.py index 1ec71044..9c5431cd 100644 --- a/madanalysis/misc/pyhf_interface.py +++ b/madanalysis/misc/pyhf_interface.py @@ -92,6 +92,7 @@ def _initialize_workspace( Tuple: Workspace(can be none in simple case), model, data """ + workspace, model, data = None, None, None try: with warnings.catch_warnings(): @@ -105,7 +106,6 @@ def _initialize_workspace( ) data = [background] + model.config.auxdata - return None, model, data elif isinstance(signal, HF_Signal) and isinstance(background, HF_Background): HF = background() if expected: @@ -119,14 +119,22 @@ def _initialize_workspace( }, ) data = workspace.data(model) - - return workspace, model, data + elif isinstance(signal, list) and isinstance(background, dict): + workspace = pyhf.Workspace(background) + model = workspace.model( + patches=[signal], + modifier_settings={ + "normsys": {"interpcode": "code4"}, + "histosys": {"interpcode": "code4p"}, + }, + ) + data = workspace.data(model) except (pyhf.exceptions.InvalidSpecification, KeyError) as err: logging.getLogger("MA5").error("Invalid JSON file!! " + str(err)) except Exception as err: logging.getLogger("MA5").debug("Unknown error, check PyhfInterface " + str(err)) - return None, None, None + return workspace, model, data def computeCLs(self, iteration_threshold: int = 3, **kwargs) -> Union[float, Dict]: """ diff --git a/madanalysis/misc/run_recast.py b/madanalysis/misc/run_recast.py index 1cbd6015..169667bc 100644 --- a/madanalysis/misc/run_recast.py +++ b/madanalysis/misc/run_recast.py @@ -1629,42 +1629,50 @@ def CLs(xsec): return get_pyhf_result(bkg(lumi), signal(lumi))-0.95 return CLs - iterator = [] if self.pyhf_config=={} else copy.deepcopy(list(self.pyhf_config.items())) + iterator = [] if self.pyhf_config == {} else copy.deepcopy(list(self.pyhf_config.items())) for n, (likelihood_profile, config) in enumerate(iterator): - self.logger.debug(' * Running sig95'+tag+' for '+likelihood_profile) + self.logger.debug(' * Running sig95' + tag + ' for ' + likelihood_profile) if likelihood_profile not in list(regiondata['pyhf'].keys()): regiondata['pyhf'][likelihood_profile] = {} - background = HF_Background(config, expected=(tag=='exp' and self.is_apriori)) - self.logger.debug('Config : '+str(config)) - if not HF_Signal(config, regiondata, xsection=1., background=background).isAlive(): - self.logger.debug(likelihood_profile+' has no signal event.') - regiondata['pyhf'][likelihood_profile]["s95"+tag] = "-1" + background = HF_Background(config, expected = (tag == 'exp' and self.is_apriori)) + self.logger.debug('Config : ' + str(config)) + if not HF_Signal(config, regiondata, xsection = 1., background = background).isAlive(): + self.logger.debug(likelihood_profile + ' has no signal event.') + regiondata['pyhf'][likelihood_profile]["s95" + tag] = "-1" continue low, hig = 1., 1. - while get_pyhf_result(background(lumi),\ - HF_Signal(config, regiondata,xsection=low)(lumi)) > 0.95: - self.logger.debug(tag+': profile '+likelihood_profile+\ - ', lower bound = '+str(low)) + while get_pyhf_result( + background(lumi), + HF_Signal(config, regiondata, xsection = low)(lumi) + ) > 0.95: + self.logger.debug( + tag + ': profile ' + likelihood_profile + \ + ', lower bound = ' + str(low) + ) low *= 0.1 if low < 1e-10: break - while get_pyhf_result(background(lumi),\ - HF_Signal(config, regiondata,xsection=hig)(lumi)) < 0.95: - self.logger.debug(tag+': profile '+likelihood_profile+\ - ', higher bound = '+str(hig)) + while get_pyhf_result( + background(lumi), + HF_Signal(config, regiondata, xsection = hig)(lumi) + ) < 0.95: + self.logger.debug( + tag + ': profile ' + likelihood_profile + \ + ', higher bound = ' + str(hig) + ) hig *= 10. if hig > 1e10: break try: import scipy s95 = scipy.optimize.brentq( - sig95(config, regiondata, background),low,hig,xtol=low/100. + sig95(config, regiondata, background), low, hig, xtol = low / 100. ) except Exception as err: self.logger.debug(str(err)) - self.logger.debug('Can not calculate sig95'+tag+' for '+likelihood_profile) - s95=-1 - regiondata['pyhf'][likelihood_profile]["s95"+tag] = "{:.7f}".format(s95) - self.logger.debug(likelihood_profile+' sig95'+tag+' = {:.7f} pb'.format(s95)) + self.logger.debug('Can not calculate sig95' + tag + ' for ' + likelihood_profile) + s95 = -1 + regiondata['pyhf'][likelihood_profile]["s95" + tag] = "{:.7f}".format(s95) + self.logger.debug(likelihood_profile + ' sig95' + tag + ' = {:.7f} pb'.format(s95)) return regiondata From f3e47e10d1b54f02b8ecc7797f7d4e807f2034d4 Mon Sep 17 00:00:00 2001 From: jackaraz Date: Fri, 13 May 2022 16:35:02 +0200 Subject: [PATCH 19/34] add an abstract method for taco interface --- madanalysis/misc/taco_base.py | 82 +++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 madanalysis/misc/taco_base.py diff --git a/madanalysis/misc/taco_base.py b/madanalysis/misc/taco_base.py new file mode 100644 index 00000000..d7c1321c --- /dev/null +++ b/madanalysis/misc/taco_base.py @@ -0,0 +1,82 @@ +################################################################################ +# +# Copyright (C) 2012-2022 Jack Araz, Eric Conte & Benjamin Fuks +# The MadAnalysis development team, email: +# +# This file is part of MadAnalysis 5. +# Official website: +# +# MadAnalysis 5 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# MadAnalysis 5 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with MadAnalysis 5. If not, see +# +################################################################################ + +from abc import ABC, abstractmethod +from typing import Text + + +class TACOBase(ABC): + def __init__(self, analysis: Text, regionID: Text): + """ + Abstract Class for TACO interface + + Parameters + ---------- + analysis: (Text) analysis name + regionID: (Text) region name + """ + self.analysis = analysis + self.regionOD = regionID + + @abstractmethod + def likelihood(self, mu: float = 1.0, nll: bool = False, expected: bool = False) -> float: + """ + Returns the value of the likelihood. + Inspired by the `pyhf.infer.mle` module but for non-log likelihood + + Parameters + ---------- + mu: float + POI: signal strength + nll: bool + if true, return nll, not llhd + expected: bool + if true, compute expected likelihood, else observed. + """ + raise NotImplementedError + + @abstractmethod + def sigma_mu(self, expected: bool = False) -> float: + """ + get an estimate for the standard deviation of mu around mu_hat this is only used for + initialisation of the optimizer, so can be skipped in the first version, + + expected: + get muhat for the expected likelihood, not observed. + """ + raise NotImplementedError + + @abstractmethod + def muhat(self, expected: bool = False, allowNegativeSignals: bool = True) -> float: + """ + get the value of mu for which the likelihood is maximal. this is only used for + initialisation of the optimizer, so can be skipped in the first version of the code. + + Parameters + ---------- + expected: bool + get muhat for the expected likelihood, not observed. + allowNegativeSignals: bool + if true, allow negative values for mu + """ + raise NotImplementedError From 1bde071b77e6a4ede92ec30591e393d727375604 Mon Sep 17 00:00:00 2001 From: jackaraz Date: Fri, 13 May 2022 19:06:59 +0200 Subject: [PATCH 20/34] add upper limit calculation for POI --- madanalysis/misc/pyhf_interface.py | 33 ++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/madanalysis/misc/pyhf_interface.py b/madanalysis/misc/pyhf_interface.py index 9c5431cd..c045cf03 100644 --- a/madanalysis/misc/pyhf_interface.py +++ b/madanalysis/misc/pyhf_interface.py @@ -21,7 +21,7 @@ # ################################################################################ -import logging, sys +import logging, sys, scipy from typing import Sequence, Dict, Union, Optional from numpy import warnings, isnan import numpy as np @@ -136,12 +136,16 @@ def _initialize_workspace( return workspace, model, data - def computeCLs(self, iteration_threshold: int = 3, **kwargs) -> Union[float, Dict]: + def computeCLs( + self, mu: float = 1.0, expected: bool = False, iteration_threshold: int = 3, **kwargs + ) -> Union[float, Dict]: """ Compute 1-CLs values Parameters ---------- + mu: float + POI signal strength CLs_exp: bool if true return expected CLs value CLs_obs: bool @@ -155,7 +159,7 @@ def computeCLs(self, iteration_threshold: int = 3, **kwargs) -> Union[float, Dic CLs values {"CLs_obs": xx, "CLs_exp": [xx] * 5} or single CLs value """ _, self.model, self.data = self._initialize_workspace( - self.signal, self.background, self.nb, self.delta_nb + self.signal, self.background, self.nb, self.delta_nb, expected ) if self.model is None or self.data is None: @@ -167,7 +171,7 @@ def computeCLs(self, iteration_threshold: int = 3, **kwargs) -> Union[float, Dic def get_CLs(model, data, **keywordargs): try: CLs_obs, CLs_exp = pyhf.infer.hypotest( - 1.0, + mu, data, model, test_stat=keywordargs.get("stats", "qtilde"), @@ -358,3 +362,24 @@ def muhat(self, expected: bool = False, allowNegativeSignals: bool = True): except (pyhf.exceptions.FailedMinimization, ValueError) as e: logging.getLogger("MA5").error(f"pyhf mle.fit failed {e}") return float("nan") + + def computeUpperLimitOnMu(self, expected: bool = False): + """ + Compute upper limit on POI (signal strength) + Parameters + ---------- + expected:bool + """ + computer = lambda mu: self.computeCLs(mu=mu, expected=expected, CLs_obs=True) - 0.95 + + low, hig = 1.0, 1.0 + while computer(low) > 0.95: + low *= 0.1 + if low < 1e-10: + break + while computer(hig) < 0.95: + hig *= 10.0 + if hig > 1e10: + break + + return scipy.optimize.brentq(computer, low, hig, xtol=low / 100.0) From 5cf4311ba0f0899dd205ffc40812594f87bb89bd Mon Sep 17 00:00:00 2001 From: jackaraz Date: Fri, 13 May 2022 19:58:00 +0200 Subject: [PATCH 21/34] initialize TACO with pyhf backend --- madanalysis/misc/pyhf_interface.py | 101 +++++++++--- madanalysis/misc/taco/__init__.py | 0 .../misc/{taco_base.py => taco/base.py} | 50 +++++- madanalysis/misc/taco/pyhf_backend.py | 144 ++++++++++++++++++ madanalysis/misc/taco/sl_backend.py | 138 +++++++++++++++++ 5 files changed, 412 insertions(+), 21 deletions(-) create mode 100644 madanalysis/misc/taco/__init__.py rename madanalysis/misc/{taco_base.py => taco/base.py} (67%) create mode 100644 madanalysis/misc/taco/pyhf_backend.py create mode 100644 madanalysis/misc/taco/sl_backend.py diff --git a/madanalysis/misc/pyhf_interface.py b/madanalysis/misc/pyhf_interface.py index c045cf03..b4153967 100644 --- a/madanalysis/misc/pyhf_interface.py +++ b/madanalysis/misc/pyhf_interface.py @@ -93,11 +93,10 @@ def _initialize_workspace( Workspace(can be none in simple case), model, data """ workspace, model, data = None, None, None - try: with warnings.catch_warnings(): warnings.filterwarnings("ignore") - if isinstance(signal, float) and isinstance(background, float): + if isinstance(signal, float) and isinstance(background, (float, int)): if expected: background = nb # Create model from uncorrelated region @@ -240,7 +239,13 @@ def get_CLs(model, data, **keywordargs): return CLs - def likelihood(self, mu: float = 1.0, nll: bool = False, expected: bool = False) -> float: + def likelihood( + self, + mu: float = 1.0, + nll: bool = False, + expected: bool = False, + iteration_threshold: int = 3, + ) -> float: """ Returns the value of the likelihood. Inspired by the `pyhf.infer.mle` module but for non-log likelihood @@ -264,14 +269,44 @@ def likelihood(self, mu: float = 1.0, nll: bool = False, expected: bool = False) mu = max(mu, -20.0) mu = min(mu, 40.0) + def computellhd(model, data, bounds): + try: + _, twice_nllh = pyhf.infer.mle.fixed_poi_fit( + mu, + self.data, + self.model, + return_fitted_val=True, + maxiter=200, + par_bounds=bounds, + ) + except (AssertionError, pyhf.exceptions.FailedMinimization, ValueError) as err: + logging.getLogger("MA5").debug(str(err)) + return "update bounds" + + return twice_nllh + with warnings.catch_warnings(): warnings.filterwarnings( "ignore", "Values in x were outside bounds during a minimize step, clipping to bounds", ) - _, twice_nllh = pyhf.infer.mle.fixed_poi_fit( - mu, self.data, self.model, return_fitted_val=True, maxiter=200 - ) + + bounds = self.model.config.suggested_bounds() + it = 0 + while True: + twice_nllh = computellhd(self.model, self.data, bounds) + if twice_nllh == "update bounds": + bounds[self.model.config.poi_index] = ( + bounds[self.model.config.poi_index][0] - 5.0, + 2.0 * bounds[self.model.config.poi_index][1], + ) + it += 1 + else: + break + if it >= iteration_threshold: + logging.getLogger("MA5").debug("pyhf mle.fit failed") + return float("nan") + return np.exp(-twice_nllh / 2.0) if not nll else twice_nllh / 2.0 def sigma_mu(self, expected: bool = False): @@ -279,6 +314,8 @@ def sigma_mu(self, expected: bool = False): get an estimate for the standard deviation of mu around mu_hat this is only used for initialisation of the optimizer, so can be skipped in the first version, + adapted from smodels.tools.pyhfInterface + expected: get muhat for the expected likelihood, not observed. """ @@ -329,7 +366,12 @@ def sigma_mu(self, expected: bool = False): else 0.0 ) - def muhat(self, expected: bool = False, allowNegativeSignals: bool = True): + def muhat( + self, + expected: bool = False, + allowNegativeSignals: bool = True, + iteration_threshold: int = 3, + ): """ get the value of mu for which the likelihood is maximal. this is only used for initialisation of the optimizer, so can be skipped in the first version of the code. @@ -345,23 +387,46 @@ def muhat(self, expected: bool = False, allowNegativeSignals: bool = True): self.signal, self.background, self.nb, self.delta_nb, expected ) + def computeMu(model, data, **keywordargs): + try: + muhat, maxNllh = pyhf.infer.mle.fit( + data, + model, + return_fitted_val=True, + par_bounds=keywordargs.get("bounds", model.config.suggested_bounds()), + ) + except (AssertionError, pyhf.exceptions.FailedMinimization, ValueError) as err: + logging.getLogger("MA5").debug(str(err)) + # dont use false here 1.-CLs = 0 can be interpreted as false + return "update bounds", None + + return muhat, maxNllh + with warnings.catch_warnings(): warnings.filterwarnings( "ignore", "Values in x were outside bounds during a minimize step, clipping to bounds", ) + bounds = self.model.config.suggested_bounds() + if allowNegativeSignals: + bounds[self.model.config.poi_index] = (-5.0, 10.0) + arguments = dict(bounds=bounds) + it = 0 + while True: + muhat, maxNllh = computeMu(self.model, self.data, **arguments) + if muhat == "update bounds": + arguments["bounds"][self.model.config.poi_index] = ( + 2.0 * arguments["bounds"][self.model.config.poi_index][0], + 2.0 * arguments["bounds"][self.model.config.poi_index][1], + ) + it += 1 + else: + break + if it >= iteration_threshold: + logging.getLogger("MA5").debug("pyhf mle.fit failed") + return float("nan") - try: - bounds = self.model.config.suggested_bounds() - if allowNegativeSignals: - bounds[self.model.config.poi_index] = (-5.0, 10.0) - muhat, maxNllh = pyhf.infer.mle.fit( - self.data, self.model, return_fitted_val=True, par_bounds=bounds - ) - return muhat[self.model.config.poi_index] - except (pyhf.exceptions.FailedMinimization, ValueError) as e: - logging.getLogger("MA5").error(f"pyhf mle.fit failed {e}") - return float("nan") + return muhat[self.model.config.poi_index] def computeUpperLimitOnMu(self, expected: bool = False): """ diff --git a/madanalysis/misc/taco/__init__.py b/madanalysis/misc/taco/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/madanalysis/misc/taco_base.py b/madanalysis/misc/taco/base.py similarity index 67% rename from madanalysis/misc/taco_base.py rename to madanalysis/misc/taco/base.py index d7c1321c..d1d8dd12 100644 --- a/madanalysis/misc/taco_base.py +++ b/madanalysis/misc/taco/base.py @@ -25,8 +25,33 @@ from typing import Text +class MA5DataSet: + def __init__(self, srname): + """param srname: the name of the signal region + ("dataset" is SModelS' name for a signal region)""" + self.srname = srname + + def getID(self): + return self.srname + + +class MA5CrossSection: + def __init__(self, value: float, sqrts: float = 13.0): + """production cross section + :param value: the value of the production cross section + :param sqrts: center of mass energy + """ + self.value = value + self.sqrts = sqrts + + class TACOBase(ABC): - def __init__(self, analysis: Text, regionID: Text): + def __init__( + self, + analysis: Text = "__unknown_analysis__", + regionID: Text = "__unknown_region__", + xsection: float = -1, + ): """ Abstract Class for TACO interface @@ -36,10 +61,13 @@ def __init__(self, analysis: Text, regionID: Text): regionID: (Text) region name """ self.analysis = analysis - self.regionOD = regionID + self.dataset = MA5DataSet(regionID) + self.xsection = MA5CrossSection(xsection, 13.0) @abstractmethod - def likelihood(self, mu: float = 1.0, nll: bool = False, expected: bool = False) -> float: + def likelihood( + self, mu: float = 1.0, nll: bool = False, expected: bool = False, useCached=False + ) -> float: """ Returns the value of the likelihood. Inspired by the `pyhf.infer.mle` module but for non-log likelihood @@ -80,3 +108,19 @@ def muhat(self, expected: bool = False, allowNegativeSignals: bool = True) -> fl if true, allow negative values for mu """ raise NotImplementedError + + @abstractmethod + def getUpperLimit(self, expected: bool = False): + """ + code here that retrieves the upper limit, expected or observed + """ + raise NotImplementedError + + def dataType(self): # a technicality + return "efficiencyMap" + + def analysisId(self): + """ + return the analysis id, e.g. "atlas_susy_2018_02" + """ + return self.analysis diff --git a/madanalysis/misc/taco/pyhf_backend.py b/madanalysis/misc/taco/pyhf_backend.py new file mode 100644 index 00000000..a31c3d2c --- /dev/null +++ b/madanalysis/misc/taco/pyhf_backend.py @@ -0,0 +1,144 @@ +################################################################################ +# +# Copyright (C) 2012-2022 Jack Araz, Eric Conte & Benjamin Fuks +# The MadAnalysis development team, email: +# +# This file is part of MadAnalysis 5. +# Official website: +# +# MadAnalysis 5 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# MadAnalysis 5 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with MadAnalysis 5. If not, see +# +################################################################################ + +from madanalysis.misc.taco.base import TACOBase +from madanalysis.misc.pyhf_interface import PyhfInterface +from typing import Text + + +class pyhfSingleRegion(TACOBase): + def __init__( + self, + analysis: Text, + regionID: Text, + nobs: int, + nb: float, + deltanb: float, + N0: float, + Nf: float, + xsection: float, + lumi: float, + sqrts: float = 13.0, + ): + super(pyhfSingleRegion, self).__init__(analysis, regionID, xsection) + self.xsection.sqrts = sqrts + self.Nf = Nf + self.N0 = N0 + self.lumi = lumi + + self.experimental_data = [nobs, nb, deltanb] + + self.marginalize = False + self.cachedLlhds = {"exp": {}, "obs": {}} + + def nsignal(self, mu: float = 1.0): + """ + Number of signal events + + Parameters + ---------- + mu: (float) POI signal strength + """ + return mu * self.xsection.value * 1000.0 * self.lumi * self.Nf / self.N0 + + def computeCLs(self, expected: bool = False) -> float: + """ + for testing purposes + """ + interface = PyhfInterface(self.nsignal(), *self.experimental_data) + if expected: + return interface.computeCLs(CLs_exp=True) + else: + return interface.computeCLs(CLs_obs=True) + + def likelihood( + self, + mu: float = 1.0, + expected: bool = False, + nll: bool = False, + useCached=False, + ) -> float: + """ + + Parameters + ---------- + mu: (float) POI signal strength + expected: if true, compute expected likelihood, else observed. + nll: if True, return the negative log likelihood instead of the likelihood + useCached: if true reuse cached results + + Returns + ------- + likelihood with respect to given POI + """ + + if useCached: + cached = None + if expected: + cached = self.cachedLlhds["exp"].get(mu, None) + else: + cached = self.cachedLlhds["obs"].get(mu, None) + + if cached is not None: + return cached + + interface = PyhfInterface(self.nsignal(), *self.experimental_data) + llhd = interface.likelihood(mu, expected=expected, nll=nll) + + if expected: + cached = self.cachedLlhds["exp"][mu] = llhd + else: + cached = self.cachedLlhds["obs"][mu] = llhd + + return llhd + + def sigma_mu(self, expected: bool = False, **kwargs) -> float: + """ + get an estimate for the standard deviation of mu around mu_hat this is only used for + initialisation of the optimizer, so can be skipped in the first version, it's even + less important than muhat + + Parameters + ---------- + expected: (float) if true, compute expected likelihood, else observed. + if "posteriori", compute a posteriori expected likelihood + (FIXME do we need this?) + """ + interface = PyhfInterface(self.nsignal(), *self.experimental_data) + return interface.sigma_mu(expected=expected) + + def muhat(self, expected: bool = False, allowNegativeSignals: bool = True) -> float: + """ + Find the most likely signal strength mu. + + Parameters + ---------- + expected + allowNegativeSignals: if true, then also allow for negative values + """ + interface = PyhfInterface(self.nsignal(), *self.experimental_data) + return interface.muhat(expected, allowNegativeSignals) + + def getUpperLimit(self, expected: bool = False): + interface = PyhfInterface(self.nsignal(), *self.experimental_data) + return interface.computeUpperLimitOnMu(expected = expected) diff --git a/madanalysis/misc/taco/sl_backend.py b/madanalysis/misc/taco/sl_backend.py new file mode 100644 index 00000000..a5ab03b6 --- /dev/null +++ b/madanalysis/misc/taco/sl_backend.py @@ -0,0 +1,138 @@ +################################################################################ +# +# Copyright (C) 2012-2022 Jack Araz, Eric Conte & Benjamin Fuks +# The MadAnalysis development team, email: +# +# This file is part of MadAnalysis 5. +# Official website: +# +# MadAnalysis 5 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# MadAnalysis 5 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with MadAnalysis 5. If not, see +# +################################################################################ + +from madanalysis.misc.taco.base import TACOBase +from madanalysis.misc.simplified_likelihood import Data, LikelihoodComputer, UpperLimitComputer +from typing import Text + + +class SLSingleRegion(TACOBase): + def __init__( + self, + analysis: Text, + regionID: Text, + nobs: int, + nb: float, + deltanb: float, + N0: float, + Nf: float, + xsection: float, + lumi: float, + sqrts: float = 13.0, + ): + super(SLSingleRegion, self).__init__(analysis, regionID, xsection) + self.xsection.sqrts = sqrts + self.Nf = Nf + self.N0 = N0 + self.lumi = lumi + + self.data = Data(nobs, nb, deltanb, nsignal=self.nsignal()) + self.data_exp = Data(nb, nb, deltanb, nsignal=self.nsignal()) + + self.marginalize = False + self.cachedLlhds = {"exp": {}, "obs": {}} + + def nsignal(self, mu: float = 1.0): + """ + Number of signal events + + Parameters + ---------- + mu: (float) POI signal strength + """ + return mu * self.xsection.value * 1000.0 * self.lumi * self.Nf / self.N0 + + def computeCLs(self, expected: bool = False) -> float: + """ + for testing purposes + """ + return UpperLimitComputer().computeCLs(self.data, expected=expected) + + def likelihood( + self, + mu: float = 1.0, + expected: bool = False, + nll: bool = False, + useCached=False, + ) -> float: + """ + + Parameters + ---------- + mu: (float) POI signal strength + expected: if true, compute expected likelihood, else observed. + if "posteriori", compute a posteriori expected likelihood + (FIXME do we need this?) + nll: if True, return the negative log likelihood instead of the likelihood + useCached: if true reuse cached results + + Returns + ------- + likelihood with respect to given POI + """ + if useCached: + cached = None + if expected: + cached = self.cachedLlhds["exp"].get(mu, None) + else: + cached = self.cachedLlhds["obs"].get(mu, None) + + if cached is not None: + return cached + + computer = LikelihoodComputer(self.data) + llhd = computer.likelihood(self.nsignal(mu), marginalize=self.marginalize, nll=nll) + + if expected: + cached = self.cachedLlhds["exp"][mu] = llhd + else: + cached = self.cachedLlhds["obs"][mu] = llhd + + return llhd + + def sigma_mu(self, expected: bool = False, **kwargs) -> float: + """ + get an estimate for the standard deviation of mu around mu_hat this is only used for + initialisation of the optimizer, so can be skipped in the first version, it's even + less important than muhat + + Parameters + ---------- + expected: (float) if true, compute expected likelihood, else observed. + if "posteriori", compute a posteriori expected likelihood + (FIXME do we need this?) + """ + computer = LikelihoodComputer(self.data_exp if expected else self.data) + return computer.getSigmaMu(1.0, self.nsignal(), computer.findThetaHat(self.nsignal())) + + def muhat(self, expected: bool = False, allowNegativeSignals: bool = True) -> float: + """ + Find the most likely signal strength mu. + + Parameters + ---------- + expected + allowNegativeSignals: if true, then also allow for negative values + """ + computer = LikelihoodComputer(self.data_exp if expected else self.data) + return computer.findMuHat(self.nsignal(), allowNegativeSignals) From 0a024c1102182806843f216f7d4c7b25e4a62068 Mon Sep 17 00:00:00 2001 From: jackaraz Date: Tue, 17 May 2022 13:12:34 +0200 Subject: [PATCH 22/34] update pyhf_interface.py --- madanalysis/misc/pyhf_interface.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/madanalysis/misc/pyhf_interface.py b/madanalysis/misc/pyhf_interface.py index b4153967..ff2defb3 100644 --- a/madanalysis/misc/pyhf_interface.py +++ b/madanalysis/misc/pyhf_interface.py @@ -22,7 +22,7 @@ ################################################################################ import logging, sys, scipy -from typing import Sequence, Dict, Union, Optional +from typing import Sequence, Dict, Union, Optional, Tuple from numpy import warnings, isnan import numpy as np from madanalysis.misc.histfactory_reader import HF_Background, HF_Signal @@ -241,10 +241,11 @@ def get_CLs(model, data, **keywordargs): def likelihood( self, - mu: float = 1.0, - nll: bool = False, - expected: bool = False, - iteration_threshold: int = 3, + mu: Optional[float] = 1.0, + nll: Optional[bool] = False, + expected: Optional[bool] = False, + mu_lim: Optional[Tuple[float, float]] = (-20., 40.), + iteration_threshold: Optional[int] = 3, ) -> float: """ Returns the value of the likelihood. @@ -258,6 +259,10 @@ def likelihood( if true, return nll, not llhd expected: bool if true, compute expected likelihood, else observed. + mu_lim: Optional[Sequence[float,float]] + threshold on mu (POI). default [-20, 40] + iteration_threshold: int + maximum number of trials to find maximum log-likelihood """ _, self.model, self.data = self._initialize_workspace( self.signal, self.background, self.nb, self.delta_nb, expected @@ -266,8 +271,12 @@ def likelihood( if self.model is None or self.data is None: return -1 # set a threshold for mu - mu = max(mu, -20.0) - mu = min(mu, 40.0) + if not isinstance(mu, float): + mu = 1.0 + if mu < mu_lim[0]: + mu = mu_lim[0] + elif mu > mu_lim[1]: + mu = mu_lim[1] def computellhd(model, data, bounds): try: @@ -357,14 +366,16 @@ def sigma_mu(self, expected: bool = False): vars.append(poiss + gauss) var_mu = np.sum(vars) n = len(obss) + print(f"sigma_mu = {float(np.sqrt(var_mu / (n**2)))}") return float(np.sqrt(var_mu / (n**2))) else: - return ( + res = ( float(np.sqrt(self.delta_nb**2 + self.nb) / self.signal) if self.signal > 0.0 else 0.0 ) + return res def muhat( self, From 0fe62c19f1c7fdffb19658caa9f690a2546290bf Mon Sep 17 00:00:00 2001 From: jackaraz Date: Tue, 17 May 2022 15:50:25 +0200 Subject: [PATCH 23/34] fix TACO writer --- madanalysis/misc/run_recast.py | 70 +++++++++++++++++++++------------- 1 file changed, 43 insertions(+), 27 deletions(-) diff --git a/madanalysis/misc/run_recast.py b/madanalysis/misc/run_recast.py index 169667bc..48f03b19 100644 --- a/madanalysis/misc/run_recast.py +++ b/madanalysis/misc/run_recast.py @@ -323,10 +323,8 @@ def run_SimplifiedFastSim(self,dataset,card,analysislist): self.main.script = script_mode old_fastsim = self.main.fastsim.package self.main.fastsim.package="fastjet" + output_name = "SFS_events.lhe" + self.main.archi_info.has_zlib * ".gz" if self.main.recasting.store_events: - output_name = "SFS_events.lhe" - if self.main.archi_info.has_zlib: - output_name += ".gz" self.logger.debug(" Setting the output LHE file :"+output_name) # Initializing the JobWriter @@ -339,7 +337,7 @@ def run_SimplifiedFastSim(self,dataset,card,analysislist): self.logger.info(" Copying 'SampleAnalyzer' source files...") if not jobber.CopyLHEAnalysis(): return False - if not jobber.CreateBldDir(analysisName="SFSRun",outputName="SFSRun.saf"): + if not jobber.CreateBldDir(analysisName = "SFSRun", outputName = "SFSRun.saf"): return False if not jobber.WriteSelectionHeader(self.main): return False @@ -354,14 +352,20 @@ def run_SimplifiedFastSim(self,dataset,card,analysislist): if not jobber.WriteMakefiles(): return False # Copying the analysis files - analysisList = open(self.dirname+'_SFSRun/Build/SampleAnalyzer/User/Analyzer/analysisList.h','w') + analysisList = open( + self.dirname + '_SFSRun/Build/SampleAnalyzer/User/Analyzer/analysisList.h', 'w' + ) for ana in analysislist: - analysisList.write('#include "SampleAnalyzer/User/Analyzer/'+ana+'.h"\n') + analysisList.write('#include "SampleAnalyzer/User/Analyzer/' + ana + '.h"\n') analysisList.write('#include "SampleAnalyzer/Process/Analyzer/AnalyzerManager.h"\n') analysisList.write('#include "SampleAnalyzer/Commons/Service/LogStream.h"\n\n') - analysisList.write('// -----------------------------------------------------------------------------\n') + analysisList.write( + '// -----------------------------------------------------------------------------\n' + ) analysisList.write('// BuildUserTable\n') - analysisList.write('// -----------------------------------------------------------------------------\n') + analysisList.write( + '// -----------------------------------------------------------------------------\n' + ) analysisList.write('void BuildUserTable(MA5::AnalyzerManager& manager)\n') analysisList.write('{\n') analysisList.write(' using namespace MA5;\n') @@ -402,28 +406,39 @@ def run_SimplifiedFastSim(self,dataset,card,analysislist): if self.main.recasting.store_events: newfile.write(' //Getting pointer to the writer\n') newfile.write(' WriterBase* writer1 = \n') - newfile.write(' manager.InitializeWriter("lhe","'+output_name+'");\n') + newfile.write(' manager.InitializeWriter("lhe","' + output_name + '");\n') newfile.write(' if (writer1==0) return 1;\n\n') - elif '// Post initialization (creates the new output directory structure)' in line and self.TACO_output!='': - newfile.write(' std::ofstream out;\n out.open(\"../Output/' + self.TACO_output+'\");\n') - newfile.write('\n manager.HeadSR(out);\n out << std::endl;\n'); + elif 'if(!manager.PostInitialize()) return 1;' in line: + ignore = False + newfile.write(line) + if self.TACO_output != "": + newfile.write(" // Initialize TACO output\n") + newfile.write( + ' std::ofstream TACO_file;\n TACO_file.open(\"../Output/' + + self.TACO_output + '\");\n' + ) + newfile.write('\n manager.HeadSR(TACO_file);\n TACO_file << std::endl;\n') elif '//Getting pointer to the clusterer' in line: ignore=False newfile.write(line) elif '!analyzer1' in line and not ignore: - ignore=True + ignore = True if self.main.recasting.store_events: - newfile.write(' writer1->WriteEvent(myEvent,mySample);\n') + newfile.write(' writer1->WriteEvent(myEvent, mySample);\n') for analysis in analysislist: - newfile.write(' if (!analyzer_'+analysis+'->Execute(mySample,myEvent)) continue;\n') - if self.TACO_output!='': - newfile.write('\n manager.DumpSR(out);\n'); + newfile.write( + ' if (!analyzer_' + analysis + '->Execute(mySample, myEvent)) continue;\n' + ) + if self.TACO_output != '': + newfile.write("\n // Fill TACO file\n") + newfile.write(' manager.DumpSR(TACO_file);\n') elif ' }' in line: newfile.write(line) ignore=False - elif 'manager.Finalize(mySamples,myEvent);' in line and self.TACO_output!='': + elif 'manager.Finalize(mySamples, myEvent);' in line and self.TACO_output != '': newfile.write(line); - newfile.write(' out.close();\n'); + newfile.write(" // Close TACO file\n") + newfile.write(' TACO_file.close();\n'); elif not ignore: newfile.write(line) mainfile.close() @@ -646,24 +661,24 @@ def update_pad_main(self,analysislist): newfile.write(' AnalyzerBase* analyzer_'+analysis+'=\n') newfile.write(' manager.InitializeAnalyzer(\"'+analysis+'\",\"'+analysis+'.saf\",'+\ 'prm'+analysis+');\n') - newfile.write( ' if (analyzer_'+analysis+'==0) return 1;\n\n') - elif '// Post initialization (creates the new output directory structure)' in line: - ignore=False + newfile.write(' if (analyzer_'+analysis+'==0) return 1;\n\n') + elif 'if(!manager.PostInitialize()) return 1;' in line: + ignore = False newfile.write(line) if self.TACO_output!='': - newfile.write(' std::ofstream out;\n out.open(\"../Output/' + self.TACO_output+'\");\n') - newfile.write(' manager.HeadSR(out);\n out << std::endl;\n'); + newfile.write(' std::ofstream TACO_file;\n TACO_file.open(\"../Output/' + self.TACO_output+'\");\n') + newfile.write(' manager.HeadSR(TACO_file);\n TACO_file << std::endl;\n'); elif '!analyzer_' in line and not ignore: ignore=True for analysis in analysislist: newfile.write(' if (!analyzer_'+analysis+'->Execute(mySample,myEvent)) continue;\n') elif '!analyzer1' in line: if self.TACO_output!='': - newfile.write('\nmanager.DumpSR(out);\n'); + newfile.write('\nmanager.DumpSR(TACO_file);\n'); ignore=False elif 'manager.Finalize(mySamples,myEvent);' in line and self.TACO_output!='': newfile.write(line); - newfile.write(' out.close();\n'); + newfile.write(' TACO_file.close();\n'); elif not ignore: newfile.write(line) @@ -748,7 +763,8 @@ def save_output(self, eventfile, setname, analyses, card): for analysis in analyses: shutil.move(self.dirname+'_RecastRun/Output/SAF/PADevents/'+analysis+'_0',self.dirname+'/Output/SAF/'+setname+'/'+analysis) if self.TACO_output!='': - filename = '.'.join(self.TACO_output.split('.')[:-1]) + '_' + card.replace('tcl','') + self.TACO_output.split('.')[-1] + filename = '.'.join(self.TACO_output.split('.')[:-1]) + '_' + card.replace('tcl', '') + \ + self.TACO_output.split('.')[-1] shutil.move(self.dirname+'_RecastRun/Output/'+self.TACO_output,self.dirname+'/Output/SAF/'+setname+'/'+filename) return True From 823287c2b846a962bb0784c5e4b10c1c8a6ed363 Mon Sep 17 00:00:00 2001 From: jackaraz Date: Tue, 17 May 2022 16:26:34 +0200 Subject: [PATCH 24/34] update code style --- madanalysis/misc/run_recast.py | 47 +++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/madanalysis/misc/run_recast.py b/madanalysis/misc/run_recast.py index 48f03b19..dc9aac01 100644 --- a/madanalysis/misc/run_recast.py +++ b/madanalysis/misc/run_recast.py @@ -412,12 +412,12 @@ def run_SimplifiedFastSim(self,dataset,card,analysislist): ignore = False newfile.write(line) if self.TACO_output != "": - newfile.write(" // Initialize TACO output\n") - newfile.write( - ' std::ofstream TACO_file;\n TACO_file.open(\"../Output/' + - self.TACO_output + '\");\n' - ) - newfile.write('\n manager.HeadSR(TACO_file);\n TACO_file << std::endl;\n') + if self.TACO_output != '': + newfile.write(" // Initialize TACO output\n") + newfile.write( + ' std::ofstream TACO_file;\n TACO_file.open(\"../Output/' + self.TACO_output + '\");\n' + ) + newfile.write(' manager.HeadSR(TACO_file);\n TACO_file << std::endl;\n') elif '//Getting pointer to the clusterer' in line: ignore=False newfile.write(line) @@ -430,15 +430,16 @@ def run_SimplifiedFastSim(self,dataset,card,analysislist): ' if (!analyzer_' + analysis + '->Execute(mySample, myEvent)) continue;\n' ) if self.TACO_output != '': - newfile.write("\n // Fill TACO file\n") + newfile.write(" // Fill TACO file\n") newfile.write(' manager.DumpSR(TACO_file);\n') elif ' }' in line: newfile.write(line) - ignore=False - elif 'manager.Finalize(mySamples, myEvent);' in line and self.TACO_output != '': - newfile.write(line); - newfile.write(" // Close TACO file\n") - newfile.write(' TACO_file.close();\n'); + ignore = False + elif 'manager.Finalize(mySamples,myEvent);' in line and self.TACO_output != '': + newfile.write(line) + if self.TACO_output != '': + newfile.write(" // Close TACO file\n") + newfile.write(' TACO_file.close();\n') elif not ignore: newfile.write(line) mainfile.close() @@ -665,20 +666,26 @@ def update_pad_main(self,analysislist): elif 'if(!manager.PostInitialize()) return 1;' in line: ignore = False newfile.write(line) - if self.TACO_output!='': - newfile.write(' std::ofstream TACO_file;\n TACO_file.open(\"../Output/' + self.TACO_output+'\");\n') - newfile.write(' manager.HeadSR(TACO_file);\n TACO_file << std::endl;\n'); + if self.TACO_output != '': + newfile.write(" // Initialize TACO output\n") + newfile.write(' std::ofstream TACO_file;\n TACO_file.open(\"../Output/' + self.TACO_output+'\");\n') + newfile.write(' manager.HeadSR(TACO_file);\n TACO_file << std::endl;\n') elif '!analyzer_' in line and not ignore: ignore=True for analysis in analysislist: - newfile.write(' if (!analyzer_'+analysis+'->Execute(mySample,myEvent)) continue;\n') + newfile.write( + ' if (!analyzer_' + analysis + '->Execute(mySample, myEvent)) continue;\n' + ) elif '!analyzer1' in line: - if self.TACO_output!='': - newfile.write('\nmanager.DumpSR(TACO_file);\n'); + if self.TACO_output != '': + newfile.write(" // Fill TACO file\n") + newfile.write(' manager.DumpSR(TACO_file);\n') ignore=False elif 'manager.Finalize(mySamples,myEvent);' in line and self.TACO_output!='': - newfile.write(line); - newfile.write(' TACO_file.close();\n'); + newfile.write(line) + if self.TACO_output != '': + newfile.write(" // Close TACO file\n") + newfile.write(' TACO_file.close();\n'); elif not ignore: newfile.write(line) From 971f21fe404da06ced7bcfe6aab9a8fe8d3acfb2 Mon Sep 17 00:00:00 2001 From: jackaraz Date: Tue, 17 May 2022 16:28:27 +0200 Subject: [PATCH 25/34] bugfix in run_recast.py --- madanalysis/misc/run_recast.py | 1 - 1 file changed, 1 deletion(-) diff --git a/madanalysis/misc/run_recast.py b/madanalysis/misc/run_recast.py index dc9aac01..fa3e459c 100644 --- a/madanalysis/misc/run_recast.py +++ b/madanalysis/misc/run_recast.py @@ -676,7 +676,6 @@ def update_pad_main(self,analysislist): newfile.write( ' if (!analyzer_' + analysis + '->Execute(mySample, myEvent)) continue;\n' ) - elif '!analyzer1' in line: if self.TACO_output != '': newfile.write(" // Fill TACO file\n") newfile.write(' manager.DumpSR(TACO_file);\n') From 69a8fbbb259e04906d05b4c9ef91adaaa83f9244 Mon Sep 17 00:00:00 2001 From: jackaraz Date: Tue, 17 May 2022 16:33:34 +0200 Subject: [PATCH 26/34] bugfix in run_recast.py --- madanalysis/misc/run_recast.py | 1 + 1 file changed, 1 insertion(+) diff --git a/madanalysis/misc/run_recast.py b/madanalysis/misc/run_recast.py index fa3e459c..dc9aac01 100644 --- a/madanalysis/misc/run_recast.py +++ b/madanalysis/misc/run_recast.py @@ -676,6 +676,7 @@ def update_pad_main(self,analysislist): newfile.write( ' if (!analyzer_' + analysis + '->Execute(mySample, myEvent)) continue;\n' ) + elif '!analyzer1' in line: if self.TACO_output != '': newfile.write(" // Fill TACO file\n") newfile.write(' manager.DumpSR(TACO_file);\n') From 825e3b07896ca53bcee570aabe73f50a5fcafc73 Mon Sep 17 00:00:00 2001 From: jackaraz Date: Wed, 18 May 2022 18:03:56 +0200 Subject: [PATCH 27/34] add SL backend --- madanalysis/misc/taco/sl_backend.py | 58 +++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 16 deletions(-) diff --git a/madanalysis/misc/taco/sl_backend.py b/madanalysis/misc/taco/sl_backend.py index a5ab03b6..ee136852 100644 --- a/madanalysis/misc/taco/sl_backend.py +++ b/madanalysis/misc/taco/sl_backend.py @@ -26,7 +26,7 @@ from typing import Text -class SLSingleRegion(TACOBase): +class slSingleRegion(TACOBase): def __init__( self, analysis: Text, @@ -40,14 +40,14 @@ def __init__( lumi: float, sqrts: float = 13.0, ): - super(SLSingleRegion, self).__init__(analysis, regionID, xsection) + super(slSingleRegion, self).__init__(analysis, regionID, xsection) self.xsection.sqrts = sqrts self.Nf = Nf self.N0 = N0 self.lumi = lumi - self.data = Data(nobs, nb, deltanb, nsignal=self.nsignal()) - self.data_exp = Data(nb, nb, deltanb, nsignal=self.nsignal()) + self.experimental_data = [nobs, nb, deltanb] + self.data = Data(*self.experimental_data, nsignal=self.nsignal()) self.marginalize = False self.cachedLlhds = {"exp": {}, "obs": {}} @@ -66,7 +66,8 @@ def computeCLs(self, expected: bool = False) -> float: """ for testing purposes """ - return UpperLimitComputer().computeCLs(self.data, expected=expected) + interface = UpperLimitComputer() + return interface.computeCLs(self.data, expected=expected) def likelihood( self, @@ -81,8 +82,6 @@ def likelihood( ---------- mu: (float) POI signal strength expected: if true, compute expected likelihood, else observed. - if "posteriori", compute a posteriori expected likelihood - (FIXME do we need this?) nll: if True, return the negative log likelihood instead of the likelihood useCached: if true reuse cached results @@ -90,24 +89,27 @@ def likelihood( ------- likelihood with respect to given POI """ + if useCached: cached = None if expected: cached = self.cachedLlhds["exp"].get(mu, None) else: cached = self.cachedLlhds["obs"].get(mu, None) - if cached is not None: return cached - computer = LikelihoodComputer(self.data) - llhd = computer.likelihood(self.nsignal(mu), marginalize=self.marginalize, nll=nll) - if expected: + data = Data( + self.experimental_data[1], *self.experimental_data[1:], nsignal=self.nsignal() + ) + interface = LikelihoodComputer(self.data) + llhd = interface.likelihood(self.nsignal(mu), marginalize=self.marginalize, nll=nll) cached = self.cachedLlhds["exp"][mu] = llhd else: + interface = LikelihoodComputer(self.data) + llhd = interface.likelihood(self.nsignal(mu), marginalize=self.marginalize, nll=nll) cached = self.cachedLlhds["obs"][mu] = llhd - return llhd def sigma_mu(self, expected: bool = False, **kwargs) -> float: @@ -122,8 +124,16 @@ def sigma_mu(self, expected: bool = False, **kwargs) -> float: if "posteriori", compute a posteriori expected likelihood (FIXME do we need this?) """ - computer = LikelihoodComputer(self.data_exp if expected else self.data) - return computer.getSigmaMu(1.0, self.nsignal(), computer.findThetaHat(self.nsignal())) + + if expected: + data = Data( + self.experimental_data[1], *self.experimental_data[1:], nsignal=self.nsignal() + ) + else: + data = self.data + interface = LikelihoodComputer(data) + theta_hat = interface.findThetaHat(self.nsignal()) + return interface.getSigmaMu(1.0, self.nsignal(), theta_hat) def muhat(self, expected: bool = False, allowNegativeSignals: bool = True) -> float: """ @@ -134,5 +144,21 @@ def muhat(self, expected: bool = False, allowNegativeSignals: bool = True) -> fl expected allowNegativeSignals: if true, then also allow for negative values """ - computer = LikelihoodComputer(self.data_exp if expected else self.data) - return computer.findMuHat(self.nsignal(), allowNegativeSignals) + if expected: + data = Data( + self.experimental_data[1], *self.experimental_data[1:], nsignal=self.nsignal() + ) + else: + data = self.data + interface = LikelihoodComputer(data) + return interface.findMuHat([self.nsignal()], allowNegativeSignals=allowNegativeSignals) + + def getUpperLimit(self, expected: bool = False): + if expected: + data = Data( + self.experimental_data[1], *self.experimental_data[1:], nsignal=self.nsignal() + ) + else: + data = self.data + interface = UpperLimitComputer() + return interface.getUpperLimitOnMu(data, marginalize=self.marginalize, expected=expected) From fad87db632a002480c97970ff1533a94cec3cf6b Mon Sep 17 00:00:00 2001 From: jackaraz Date: Wed, 18 May 2022 18:11:58 +0200 Subject: [PATCH 28/34] bugfix in sl_backend.py --- madanalysis/misc/taco/sl_backend.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/madanalysis/misc/taco/sl_backend.py b/madanalysis/misc/taco/sl_backend.py index ee136852..33183d84 100644 --- a/madanalysis/misc/taco/sl_backend.py +++ b/madanalysis/misc/taco/sl_backend.py @@ -154,11 +154,5 @@ def muhat(self, expected: bool = False, allowNegativeSignals: bool = True) -> fl return interface.findMuHat([self.nsignal()], allowNegativeSignals=allowNegativeSignals) def getUpperLimit(self, expected: bool = False): - if expected: - data = Data( - self.experimental_data[1], *self.experimental_data[1:], nsignal=self.nsignal() - ) - else: - data = self.data interface = UpperLimitComputer() - return interface.getUpperLimitOnMu(data, marginalize=self.marginalize, expected=expected) + return interface.getUpperLimitOnMu(self.data, marginalize=self.marginalize, expected=expected) From 348c5ffb7db7444bc49939a7d0cbf560fcb36433 Mon Sep 17 00:00:00 2001 From: jackaraz Date: Thu, 19 May 2022 13:47:13 +0200 Subject: [PATCH 29/34] turn TACO output to CSV format --- .../Process/Core/SampleAnalyzer.cpp | 18 +++++++----------- .../RegionSelection/RegionSelectionManager.cpp | 4 ++-- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/tools/SampleAnalyzer/Process/Core/SampleAnalyzer.cpp b/tools/SampleAnalyzer/Process/Core/SampleAnalyzer.cpp index 79e26dd0..0870ff6f 100644 --- a/tools/SampleAnalyzer/Process/Core/SampleAnalyzer.cpp +++ b/tools/SampleAnalyzer/Process/Core/SampleAnalyzer.cpp @@ -919,21 +919,17 @@ void SampleAnalyzer::UpdateProgressBar() } - +// Write header for the TACO file in CSV format void SampleAnalyzer::HeadSR(std::ostream &outwriter) { - outwriter << "#"; - for(MAuint32 i=0; iManager()->HeadSR(outwriter, analyzers_[i]->name()); - } + for(MAuint32 i=0; iManager()->HeadSR(outwriter, analyzers_[i]->name()); } - +// Write body of the TACO file in CSV format void SampleAnalyzer::DumpSR(std::ostream &outwriter) { - for(MAuint32 i=0; iManager()->DumpSR(outwriter); - outwriter << std::endl; + for(MAuint32 i=0; iManager()->DumpSR(outwriter); + outwriter << std::endl; } diff --git a/tools/SampleAnalyzer/Process/RegionSelection/RegionSelectionManager.cpp b/tools/SampleAnalyzer/Process/RegionSelection/RegionSelectionManager.cpp index 429a85b9..42ef7c2a 100644 --- a/tools/SampleAnalyzer/Process/RegionSelection/RegionSelectionManager.cpp +++ b/tools/SampleAnalyzer/Process/RegionSelection/RegionSelectionManager.cpp @@ -167,13 +167,13 @@ void RegionSelectionManager::WriteHistoDefinition(SAFWriter& output) void RegionSelectionManager::HeadSR(std::ostream &outwriter, const std::string &ananame) { for (MAuint32 i=0;iGetName(); + outwriter << "," << ananame << "-" << regions_[i]->GetName(); } void RegionSelectionManager::DumpSR(std::ostream &outwriter) { for (MAuint32 i=0;iIsSurviving(); + outwriter<< "," << regions_[i]->IsSurviving(); } From cdbaa967a7d8ba75e7b163b62e463b87e2a4eaa8 Mon Sep 17 00:00:00 2001 From: jackaraz Date: Thu, 26 May 2022 09:49:07 +0100 Subject: [PATCH 30/34] bugfix in makedoc --- doc/makedoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/makedoc b/doc/makedoc index 1fbcf3d8..6c413e12 100755 --- a/doc/makedoc +++ b/doc/makedoc @@ -121,7 +121,7 @@ class MakeLatex(): return False, [] # Splitting the lines - msg = out.split('\n') + msg = str(out).split('\n') # Removing irrelevant component msg2 = [] From c284267b53d216cde004ebd7f0a536902f61a69b Mon Sep 17 00:00:00 2001 From: jackaraz Date: Thu, 26 May 2022 09:49:44 +0100 Subject: [PATCH 31/34] add max likelihood function to base.py --- madanalysis/misc/taco/base.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/madanalysis/misc/taco/base.py b/madanalysis/misc/taco/base.py index d1d8dd12..6874332e 100644 --- a/madanalysis/misc/taco/base.py +++ b/madanalysis/misc/taco/base.py @@ -64,6 +64,9 @@ def __init__( self.dataset = MA5DataSet(regionID) self.xsection = MA5CrossSection(xsection, 13.0) + def __repr__(self): + return f"Analysis: {self.analysis}, Region: {self.dataset.srname}" + @abstractmethod def likelihood( self, mu: float = 1.0, nll: bool = False, expected: bool = False, useCached=False @@ -83,6 +86,20 @@ def likelihood( """ raise NotImplementedError + @abstractmethod + def lmax(self, expected: bool = False, nll: bool = False) -> float: + """ + Compute maximum likelihood + + Parameters + ---------- + expected: bool + if expected, compute expected max likelihood + nll: bool + if true, return nll, not llhd + """ + raise NotImplementedError + @abstractmethod def sigma_mu(self, expected: bool = False) -> float: """ From 3927f007dba4017fcd696c3e6f128a188702df5f Mon Sep 17 00:00:00 2001 From: jackaraz Date: Thu, 26 May 2022 09:50:10 +0100 Subject: [PATCH 32/34] bugfix in simplified_likelihood.py --- madanalysis/misc/simplified_likelihood.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/madanalysis/misc/simplified_likelihood.py b/madanalysis/misc/simplified_likelihood.py index 1ecbd871..3ba26a0b 100644 --- a/madanalysis/misc/simplified_likelihood.py +++ b/madanalysis/misc/simplified_likelihood.py @@ -1010,7 +1010,7 @@ def lmax(self, nsig=None, marginalize=False, nll=False, allowNegativeSignals=Fal muhat_, sigma_mu, lmax = fmh["muhat"], fmh["sigma_mu"], fmh["lmax"] self.muhat = muhat_ self.sigma_mu = sigma_mu - return self.likelihood(muhat * nsig, marginalize=marginalize, nll=nll) + return self.likelihood(muhat_ * nsig, marginalize=marginalize, nll=nll) def findMuHatViaGradient( self, From 9a0605a8fe74ea6e3268edc15a7173787abab3e4 Mon Sep 17 00:00:00 2001 From: jackaraz Date: Thu, 26 May 2022 12:46:50 +0100 Subject: [PATCH 33/34] bugfix for CSV formatting --- .../RegionSelection/RegionSelectionManager.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/tools/SampleAnalyzer/Process/RegionSelection/RegionSelectionManager.cpp b/tools/SampleAnalyzer/Process/RegionSelection/RegionSelectionManager.cpp index 42ef7c2a..01bd95d0 100644 --- a/tools/SampleAnalyzer/Process/RegionSelection/RegionSelectionManager.cpp +++ b/tools/SampleAnalyzer/Process/RegionSelection/RegionSelectionManager.cpp @@ -166,14 +166,21 @@ void RegionSelectionManager::WriteHistoDefinition(SAFWriter& output) void RegionSelectionManager::HeadSR(std::ostream &outwriter, const std::string &ananame) { - for (MAuint32 i=0;iGetName(); + // Set first SR out of the for loop to avoid many if executions + // use :: instead of - since - is generally used in SR names + if (regions_.size() > 0) outwriter << ananame << "::" << regions_[0]->GetName(); + + for (MAuint32 i = 1; i < regions_.size(); i++) + outwriter << "," << ananame << "::" << regions_[i]->GetName(); } void RegionSelectionManager::DumpSR(std::ostream &outwriter) { - for (MAuint32 i=0;iIsSurviving(); + // Set first SR out of the for loop to avoid many if executions + if (regions_.size() > 0) outwriter << regions_[0]->IsSurviving(); + + for (MAuint32 i = 1; i < regions_.size(); i++) + outwriter << "," << regions_[i]->IsSurviving(); } From 1e6585b627b688caf9cc089f5a25d4ef8a65abdc Mon Sep 17 00:00:00 2001 From: jackaraz Date: Thu, 26 May 2022 14:38:44 +0100 Subject: [PATCH 34/34] bugfix for multi analysis runs --- tools/SampleAnalyzer/Process/Core/SampleAnalyzer.cpp | 10 ++++++++-- .../RegionSelection/RegionSelectionManager.cpp | 12 ++++++------ .../Process/RegionSelection/RegionSelectionManager.h | 4 ++-- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/tools/SampleAnalyzer/Process/Core/SampleAnalyzer.cpp b/tools/SampleAnalyzer/Process/Core/SampleAnalyzer.cpp index 0870ff6f..6f7f2bae 100644 --- a/tools/SampleAnalyzer/Process/Core/SampleAnalyzer.cpp +++ b/tools/SampleAnalyzer/Process/Core/SampleAnalyzer.cpp @@ -923,13 +923,19 @@ void SampleAnalyzer::UpdateProgressBar() void SampleAnalyzer::HeadSR(std::ostream &outwriter) { for(MAuint32 i=0; iManager()->HeadSR(outwriter, analyzers_[i]->name()); + { + MAbool is_first = (i == 0); + analyzers_[i]->Manager()->HeadSR(outwriter, analyzers_[i]->name(), is_first); + } } // Write body of the TACO file in CSV format void SampleAnalyzer::DumpSR(std::ostream &outwriter) { for(MAuint32 i=0; iManager()->DumpSR(outwriter); + { + MAbool is_first = (i == 0); + analyzers_[i]->Manager()->DumpSR(outwriter, is_first); + } outwriter << std::endl; } diff --git a/tools/SampleAnalyzer/Process/RegionSelection/RegionSelectionManager.cpp b/tools/SampleAnalyzer/Process/RegionSelection/RegionSelectionManager.cpp index 01bd95d0..a92cf5f3 100644 --- a/tools/SampleAnalyzer/Process/RegionSelection/RegionSelectionManager.cpp +++ b/tools/SampleAnalyzer/Process/RegionSelection/RegionSelectionManager.cpp @@ -164,23 +164,23 @@ void RegionSelectionManager::WriteHistoDefinition(SAFWriter& output) -void RegionSelectionManager::HeadSR(std::ostream &outwriter, const std::string &ananame) +void RegionSelectionManager::HeadSR(std::ostream &outwriter, const std::string &ananame, MAbool &is_first) { // Set first SR out of the for loop to avoid many if executions // use :: instead of - since - is generally used in SR names - if (regions_.size() > 0) outwriter << ananame << "::" << regions_[0]->GetName(); + if (regions_.size() > 0 && is_first) outwriter << ananame << "::" << regions_[0]->GetName(); - for (MAuint32 i = 1; i < regions_.size(); i++) + for (MAuint32 i = is_first ? 1 : 0; i < regions_.size(); i++) outwriter << "," << ananame << "::" << regions_[i]->GetName(); } -void RegionSelectionManager::DumpSR(std::ostream &outwriter) +void RegionSelectionManager::DumpSR(std::ostream &outwriter, MAbool &is_first) { // Set first SR out of the for loop to avoid many if executions - if (regions_.size() > 0) outwriter << regions_[0]->IsSurviving(); + if (regions_.size() > 0 && is_first) outwriter << regions_[0]->IsSurviving(); - for (MAuint32 i = 1; i < regions_.size(); i++) + for (MAuint32 i = is_first ? 1 : 0; i < regions_.size(); i++) outwriter << "," << regions_[i]->IsSurviving(); } diff --git a/tools/SampleAnalyzer/Process/RegionSelection/RegionSelectionManager.h b/tools/SampleAnalyzer/Process/RegionSelection/RegionSelectionManager.h index cf806edf..ac3565dc 100644 --- a/tools/SampleAnalyzer/Process/RegionSelection/RegionSelectionManager.h +++ b/tools/SampleAnalyzer/Process/RegionSelection/RegionSelectionManager.h @@ -475,8 +475,8 @@ class RegionSelectionManager } /// Dumping the content of the counters - void HeadSR(std::ostream &, const std::string&); - void DumpSR(std::ostream &); + void HeadSR(std::ostream &, const std::string&, MAbool&); + void DumpSR(std::ostream &, MAbool&); };