From fc992c4410c447ee12c16fead9dd6892f4b64628 Mon Sep 17 00:00:00 2001 From: Marco Cipriani Date: Fri, 17 Oct 2025 15:44:19 +0200 Subject: [PATCH 01/24] generalize eta labels better for plot_decorr_params --- scripts/plotting/plot_decorr_params.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/plotting/plot_decorr_params.py b/scripts/plotting/plot_decorr_params.py index 4dd1d18aa..32564cc88 100644 --- a/scripts/plotting/plot_decorr_params.py +++ b/scripts/plotting/plot_decorr_params.py @@ -226,15 +226,15 @@ def get_values_and_impacts_as_panda( if "eta" in axes: # this assumes the 48 eta bins are rebinned uniformly, and 0 is an edge of the central bins nEtaBins = df_p.shape[0] if "charge" not in axes else int(df_p.shape[0] / 2) - nEtaBinsOneSide = int(nEtaBins / 2) + lowest_eta = -2.4 etaWidth = 4.8 / nEtaBins df_p["yticks"] = ( df_p["eta"] - .apply(lambda x: round((x - nEtaBinsOneSide) * etaWidth, 1)) + .apply(lambda x: round(lowest_eta + x * etaWidth, 1)) .astype(str) + r"<\mathit{\eta}^{\mu}<" + df_p["eta"] - .apply(lambda x: round((x - nEtaBinsOneSide) * etaWidth + etaWidth, 1)) + .apply(lambda x: round(lowest_eta + (x + 1) * etaWidth, 1)) .astype(str) ) if "charge" in axes: From d330024bc3343f4a5fdf891ccf8072007a4b1d4d Mon Sep 17 00:00:00 2001 From: Marco Cipriani Date: Thu, 13 Nov 2025 11:11:24 +0100 Subject: [PATCH 02/24] plotting and testing --- scripts/analysisTools/tests/testFakesVsMt.py | 843 +++++++++++------- scripts/analysisTools/tests/testPlots1D.py | 7 +- scripts/histmakers/mw_with_mu_eta_pt.py | 181 +++- scripts/histmakers/mz_wlike_with_mu_eta_pt.py | 2 - scripts/plotting/makeDataMCStackPlot.py | 14 +- utilities/parsing.py | 6 + utilities/styles/styles.py | 3 +- wremnants/include/utils.hpp | 2 +- wremnants/muon_selections.py | 3 +- 9 files changed, 703 insertions(+), 358 deletions(-) diff --git a/scripts/analysisTools/tests/testFakesVsMt.py b/scripts/analysisTools/tests/testFakesVsMt.py index 2f9666b08..49ef70a32 100644 --- a/scripts/analysisTools/tests/testFakesVsMt.py +++ b/scripts/analysisTools/tests/testFakesVsMt.py @@ -87,6 +87,7 @@ def plotProjection1D( mTvalueRange=[], rebinVariable=None, scaleQCDMC=1.0, + noMtABCD=False, ): firstBinMt = 1 @@ -109,8 +110,9 @@ def plotProjection1D( rootHists[d].GetAxis(2).SetRange(chargeBin, chargeBin) rootHists[d].GetAxis(3).SetRange(firstBinMt, lastBinMt) rootHists[d].GetAxis(4).SetRange(isoAxisRange[0], isoAxisRange[1]) - rootHists[d].GetAxis(5).SetRange(jetAxisRange[0], jetAxisRange[1]) - rootHists[d].GetAxis(6).SetRange(1, rootHists[d].GetAxis(6).GetNbins()) + if not noMtABCD: + rootHists[d].GetAxis(5).SetRange(jetAxisRange[0], jetAxisRange[1]) + rootHists[d].GetAxis(6).SetRange(1, rootHists[d].GetAxis(6).GetNbins()) if d == "Data": hdata = rootHists[d].Projection(projectAxisToKeep, "EO") hdata.SetName(f"{plotName}_{d}") @@ -231,6 +233,8 @@ def drawAndFitFRF( # following two are test histograms which make sense for pol1 fits histoChi2diffTest=None, histoPullsPol1Slope=None, + noMtABCD=False, + erfAsMainModel=False, ): # moreTextLatex is used to write text with TLatex, and the syntax is textToWrite::x1,y1,ypass,textsize @@ -321,7 +325,7 @@ def drawAndFitFRF( fitBinLow = h1.GetXaxis().FindFixBin(xMinFit + 0.001) fitBinHigh = h1.GetXaxis().FindFixBin(xMaxFit + 0.001) fitBinEdges = [ - round(h1.GetXaxis().GetBinLowEdge(i), 2) + round(h1.GetXaxis().GetBinLowEdge(i), 3) for i in range(fitBinLow, fitBinHigh + 1) ] # logger.info(fitBinEdges) @@ -335,11 +339,14 @@ def drawAndFitFRF( binOriginalHisto = ib + fitBinLow - 1 h1fitRange.SetBinContent(ib, h1.GetBinContent(binOriginalHisto)) h1fitRange.SetBinError(ib, h1.GetBinError(binOriginalHisto)) + # logger.error(h1fitRange.GetXaxis().GetBinLowEdge(ib)) boost_hist = narf.root_to_hist(h1fitRange) + fitModel = ( + "1-Erf[x]" if erfAsMainModel else f"pol{fitPolDegree}" + ) # just for legend, set here to make sure it is changed consistently with line below # FIT WITH TENSORFLOW global polN_scaled_global - fitModel = f"pol{fitPolDegree}" # just for legend, set here to make sure it is changed consistently with line below params = np.array([1.0] + [0.0 for i in range(fitPolDegree)]) if polN_scaled_global == None: polN_scaled_global = partial( @@ -349,10 +356,45 @@ def drawAndFitFRF( degree=fitPolDegree, ) - fitres_tf1 = wums.fitutils.fit_hist(boost_hist, polN_scaled_global, params) - f1 = ROOT.TF1("f1", polN_scaled_global, xMinFit, xMaxFit, len(params)) - status = fitres_tf1["status"] - covstatus = fitres_tf1["covstatus"] + xvalMin = h1.GetXaxis().GetBinLowEdge(1) + xvalMax = h1.GetXaxis().GetBinUpEdge(h1.GetNbinsX()) + + fitres_tf1_antiErf = None + f1_antiErf = None + if erfAsMainModel: + fitres_tf1_main = wums.fitutils.fit_hist( + boost_hist, antiErf_tf, np.array([1.0, 60.0, 5.0]) + ) + f1_main = ROOT.TF1( + "f1_antiErf", + "[0] * (1.0 - TMath::Erf((x-[1])/[2]))", + xMinFit, + xMaxFit, + ) + funcFullRange = ROOT.TF1( + "funcFullRange", "[0] * (1.0 - TMath::Erf((x-[1])/[2]))", xvalMin, 200 + ) # define this one until "infinity" + funcFullRange.SetParameters(np.array(fitres_tf1_main["x"], dtype=np.dtype("d"))) + else: + fitres_tf1_main = wums.fitutils.fit_hist(boost_hist, polN_scaled_global, params) + f1_main = ROOT.TF1("f1_main", polN_scaled_global, xMinFit, xMaxFit, len(params)) + if not noMtABCD: + fitres_tf1_antiErf = wums.fitutils.fit_hist( + boost_hist, antiErf_tf, np.array([1.0, 60.0, 5.0]) + ) + f1_antiErf = ROOT.TF1( + "f1_antiErf", + "[0] * (1.0 - TMath::Erf((x-[1])/[2]))", + xMinFit, + xMaxFit, + ) + funcFullRange = ROOT.TF1( + "funcFullRange", polN_scaled_global, xvalMin, 200, len(params) + ) # define this one until "infinity" + funcFullRange.SetParameters(np.array(fitres_tf1_main["x"], dtype=np.dtype("d"))) + + status = fitres_tf1_main["status"] + covstatus = fitres_tf1_main["covstatus"] if status: logger.warning(f"\n\n-----> STATUS: fit had status {status}\n\n") if status not in badFitsID.keys(): @@ -366,31 +408,21 @@ def drawAndFitFRF( else: badCovMatrixID[covstatus] += 1 - fitres_tf1_antiErf = wums.fitutils.fit_hist( - boost_hist, antiErf_tf, np.array([1.0, 60.0, 5.0]) - ) - f1_antiErf = ROOT.TF1( - "f1_antiErf", - "[0] * (1.0 - TMath::Erf((x-[1])/[2]))", - xMinFit, - xMaxFit, - ) - # get chi 2: - chi2 = fitres_tf1["loss_val"] - ndof = int(len(fitBinEdges) - 1 - f1.GetNpar()) + chi2 = fitres_tf1_main["loss_val"] + ndof = int(len(fitBinEdges) - 1 - f1_main.GetNpar()) chi2prob = ROOT.TMath.Prob(chi2, ndof) chi2text = f"#chi^{{2}} = {round(chi2,1)} / {ndof}" - if chi2prob < 0.05: - perc_chi2prob = 100.0 * chi2prob - sign = "=" - if perc_chi2prob < 0.1: - perc_chi2prob = 0.1 - sign = "<" - chi2text += " (prob {} {}%)".format(sign, round(perc_chi2prob, 1)) + # if chi2prob < 0.05: + # perc_chi2prob = 100.0 * chi2prob + # sign = "=" + # if perc_chi2prob < 0.1: + # perc_chi2prob = 0.1 + # sign = "<" + # chi2text += " (prob {} {}%)".format(sign, round(perc_chi2prob, 1)) global pol0_scaled_global - if fitPolDegree == 1: + if fitPolDegree == 1 and not erfAsMainModel: params0 = np.array([1.0]) if pol0_scaled_global == None: pol0_scaled_global = partial( @@ -406,51 +438,56 @@ def drawAndFitFRF( histoChi2diffTest.Fill(chi2diffTest) if histoPullsPol1Slope: histoPullsPol1Slope.Fill( - fitres_tf1["x"][1] / np.sqrt(fitres_tf1["cov"][1][1]) + fitres_tf1_main["x"][1] / np.sqrt(fitres_tf1_main["cov"][1][1]) ) - f1.SetParameters(np.array(fitres_tf1["x"], dtype=np.dtype("d"))) - f1.SetLineWidth(3) - # f1.SetLineStyle(9) # ROOT.kDashed == thin dashes, almost dotted - f1.SetLineColor(ROOT.kBlue + 1) + f1_main.SetParameters(np.array(fitres_tf1_main["x"], dtype=np.dtype("d"))) + f1_main.SetLineWidth(3) + # f1_main.SetLineStyle(9) # ROOT.kDashed == thin dashes, almost dotted + f1_main.SetLineColor(ROOT.kBlue + 1) + # draw extrapolation - f2 = ROOT.TF1( - "f2", - polN_scaled_global, - xMaxFit, - h1.GetXaxis().GetBinLowEdge(1 + h1.GetNbinsX()), - len(params), - ) - for i in range(1 + fitPolDegree): - f2.SetParameter(i, f1.GetParameter(i)) + if erfAsMainModel: + f2 = ROOT.TF1( + "f2_antiErf", + "[0] * (1.0 - TMath::Erf((x-[1])/[2]))", + xMaxFit, + h1.GetXaxis().GetBinLowEdge(1 + h1.GetNbinsX()), + ) + else: + f2 = ROOT.TF1( + "f2", + polN_scaled_global, + xMaxFit, + h1.GetXaxis().GetBinLowEdge(1 + h1.GetNbinsX()), + len(params), + ) + + for i in range(3 if erfAsMainModel else (1 + fitPolDegree)): + f2.SetParameter(i, f1_main.GetParameter(i)) + f2.SetLineColor(ROOT.kRed + 2) - f2.SetLineWidth(3) - f2.SetLineStyle(9) + f2.SetLineWidth(2) + f2.SetLineStyle(2) - # test anti error function - f1_antiErf.SetParameters(np.array(fitres_tf1_antiErf["x"], dtype=np.dtype("d"))) - f1_antiErf.SetLineWidth(3) - # f1_antiErf.SetLineStyle(9) # ROOT.kDashed == thin dashes, almost dotted - f1_antiErf.SetLineColor(ROOT.kRed + 2) + if not noMtABCD and not erfAsMainModel: + # test anti error function + f1_antiErf.SetParameters(np.array(fitres_tf1_antiErf["x"], dtype=np.dtype("d"))) + f1_antiErf.SetLineWidth(3) + f1_antiErf.SetLineColor(ROOT.kRed + 2) - xvalMin = h1.GetXaxis().GetBinLowEdge(1) - xvalMax = h1.GetXaxis().GetBinUpEdge(h1.GetNbinsX()) - funcFullRange = ROOT.TF1( - "funcFullRange", polN_scaled_global, xvalMin, 200, len(params) - ) # define this one until "infinity" - funcFullRange.SetParameters(np.array(fitres_tf1["x"], dtype=np.dtype("d"))) hband = ROOT.TH1D("hband", "", 400, xvalMin, xvalMax) hband.SetStats(0) hband.SetFillColor(ROOT.kGray) # hband.SetFillStyle(3001) for ib in range(1, hband.GetNbinsX() + 1): xval = hband.GetBinCenter(ib) - val = f1.Eval(xval) + val = f1_main.Eval(xval) # logger.info(f"val({xval}) = {val}") hband.SetBinContent(ib, val) - npar = len(params) + npar = 3 if erfAsMainModel else len(params) # diagonalize and get eigenvalues and eigenvectors - e, v = np.linalg.eigh(fitres_tf1["cov"]) + e, v = np.linalg.eigh(fitres_tf1_main["cov"]) # store all variations for faster access below altParameters = np.array( [np.zeros(npar, dtype=np.dtype("d"))] * (npar * 2), dtype=np.dtype("d") @@ -458,8 +495,8 @@ def drawAndFitFRF( # logger.info(altParameters) for ivar in range(npar): shift = np.sqrt(e[ivar]) * v[:, ivar] - altParameters[ivar] = fitres_tf1["x"] + shift - altParameters[ivar + npar] = fitres_tf1["x"] - shift + altParameters[ivar] = fitres_tf1_main["x"] + shift + altParameters[ivar + npar] = fitres_tf1_main["x"] - shift tf1_func_alt = ROOT.TF1() tf1_func_alt.SetName("tf1_func_alt") @@ -480,10 +517,11 @@ def drawAndFitFRF( hband.SetBinError(ib, err) hband.Draw("E2 SAME") - f1.Draw("L SAME") + f1_main.Draw("L SAME") if xMaxFit < xvalMax: f2.Draw("L SAME") - f1_antiErf.Draw("L SAME") + if not noMtABCD and not erfAsMainModel: + f1_antiErf.Draw("L SAME") h1.Draw("EP SAME") nColumnsLeg = 1 @@ -501,11 +539,16 @@ def drawAndFitFRF( leg.SetNColumns(nColumnsLeg) leg.AddEntry(h1, "Measurement", "PE") - leg.AddEntry(f1, f"Fit {fitModel} in [{int(xMinFit)}, {int(xMaxFit)}]", "L") + # TODO improve rounding based on variable + leg_xMinFit = int(xMinFit) if xMinFit > 1 else float(round(xMinFit, 3)) + leg_xMaxFit = int(xMaxFit) if xMaxFit > 1 else float(round(xMaxFit, 3)) + leg.AddEntry(f1_main, f"Fit {fitModel}", "L") + # leg.AddEntry(f1_main, f"Fit {fitModel} in [{leg_xMinFit}, {leg_xMaxFit}]", "L") if xMaxFit < xvalMax: leg.AddEntry(f2, f"Extrapolation", "L") leg.AddEntry(hband, "Fit uncertainty", "F") - leg.AddEntry(f1_antiErf, f"Fit f(x) = 1 - Erf[x]", "L") + if not noMtABCD and not erfAsMainModel: + leg.AddEntry(f1_antiErf, f"Fit f(x) = 1 - Erf[x]", "L") leg.Draw("same") canvas.RedrawAxis("sameaxis") @@ -667,13 +710,35 @@ def runStudy(fname, charges, mainOutputFolder, args): histoChi2diffTest = None histoPullsPol1Slope = None - etaLabel = "#eta" if not args.absEta else "|#eta|" + inputHistDict = { + "mt": "mTStudyForFakes", + "oneMinusCosdphi": "mTStudyForFakesAlt", + "mtOverPt": "mTStudyForFakesAlt2", + "altMt": "mTStudyForFakesAlt3", + "dxybs": "mTStudyForFakesAlt4", + } + + etaLabel = "#eta^{#mu}" if not args.absEta else "|#eta^{#mu}|" + ptLabel = "#it{p}_{T}^{#mu}" + mtLabel = "#it{m}_{T}" + metLabel = "#it{p}_{T}^{miss}" + dphiMuMetLabel = "#Delta#phi(#mu,^{ }#it{p}_{T}^{miss})" + + zAxisNames = { + "mt": f"{mtLabel} (GeV)", + "oneMinusCosdphi": f"1 - {dphiMuMetLabel}", + "mtOverPt": "#it{m}_{T} / #it{p}_{T}^{#mu}", + "altMt": f"{metLabel} (1 - {dphiMuMetLabel})", + "dxybs": "Thr - |dxy_{bs}^{#mu}| (cm)", + } + xABCD = args.xABCD + noMtABCD = xABCD != "mt" xAxisName = args.xAxisName - if args.absEta and "|#eta|" not in xAxisName and "#eta" in xAxisName: - xAxisName.replace("#eta", "|#eta|") + if args.absEta and "|#eta" not in xAxisName and "#eta" in xAxisName: + xAxisName = etaLabel yAxisName = args.yAxisName - zAxisName = args.met + " " + args.zAxisName + zAxisName = args.zAxisName if len(args.zAxisName) else zAxisNames[xABCD] adjustSettings_CMS_lumi() canvas = ROOT.TCanvas("canvas", "", 800, 800) @@ -702,7 +767,7 @@ def runStudy(fname, charges, mainOutputFolder, args): datasetsNoFakes = list(filter(lambda x: x != "Fake", datasets)) datasetsNoQCDFakes = list(filter(lambda x: x not in ["QCD", "Fake"], datasets)) logger.info(f"All original datasets available {datasets}") - inputHistName = "mTStudyForFakes" + inputHistName = inputHistDict[xABCD] groups.setNominalName(inputHistName) groups.loadHistsForDatagroups( inputHistName, syst="", procsToRead=datasets, applySelection=False @@ -723,7 +788,7 @@ def runStudy(fname, charges, mainOutputFolder, args): d: None for d in datasets } # for 1D plots, mt rebinned by 2 but no eta-pt rebinning rootHists_asNominal = {d: None for d in datasets} - rootHists_forMtWithDataDrivenFakes = {d: None for d in datasetsNoQCD} + rootHists_forMtWithDataDrivenFakes = {d: None for d in datasets} hnarf_fakerateDeltaPhi = None for d in datasets: s = hist.tag.Slicer() @@ -739,7 +804,11 @@ def runStudy(fname, charges, mainOutputFolder, args): hnarfTMP = hnarfTMP[ {"absdz_cm": s[:: hist.sum]} ] # FIXME: JUST A TEST !!! - # hnarfTMP = hnarfTMP[{"absdz_cm": s[complex(0,0.1)::hist.sum]}] # FIXME: JUST A TEST !!! + if noMtABCD and "passMT" in hnarfTMP.axes.name: + # integrate all mT (unless required differently, + # but to use dphi should not cut on mT) + hnarfTMP = hnarfTMP[{"passMT": s[:: hist.sum]}] + # hnarfTMP = hnarfTMP[{"passMT": False}] hnarf = hnarfTMP.copy() hnarf_forplots = hnarf.copy() hnarf_asNominal = hnarf.copy() @@ -782,142 +851,151 @@ def runStudy(fname, charges, mainOutputFolder, args): hnarf_fakerateDeltaPhi = hnarf_fakerateDeltaPhi[ {"hasJets": s[:: hist.sum]} ] - lowMtUpperBound = int(args.mtNominalRange.split(",")[1]) + lowMtUpperBound = float(args.mtNominalRange.split(",")[1]) hnarf_fakerateDeltaPhi = hnarf_fakerateDeltaPhi[ - {"mt": s[: complex(0, lowMtUpperBound) : hist.sum]} + {xABCD: s[: complex(0, lowMtUpperBound) : hist.sum]} ] chargeIndex = 0 if charge in ["minus", "inclusive"] else 1 hnarf_fakerateDeltaPhi = hnarf_fakerateDeltaPhi[ {"charge": s[chargeIndex : chargeIndex + 1 : hist.sum]} ] - hnarf_fakerateDeltaPhi = hnarf_fakerateDeltaPhi[ - {"DphiMuonMet": s[:: hist.rebin(2)]} - ] - # make all TH of FRF in bins of dphi - histo_fakes_dphiBins = narf.hist_to_root( - hnarf_fakerateDeltaPhi - ) # this is a THnD with eta-pt-passIso-DphiMuonMet - histo_FRFvsDphi = {} - nBinsDphi = histo_fakes_dphiBins.GetAxis(3).GetNbins() - for idp in range(nBinsDphi): - idpBin = idp + 1 - histo_fakes_dphiBins.GetAxis(3).SetRange(idpBin, idpBin) + if "DphiMuonMet" in hnarf_fakerateDeltaPhi.axes.name: + hnarf_fakerateDeltaPhi = hnarf_fakerateDeltaPhi[ + {"DphiMuonMet": s[:: hist.rebin(2)]} + ] + if not noMtABCD: + # make all TH of FRF in bins of dphi + histo_fakes_dphiBins = narf.hist_to_root( + hnarf_fakerateDeltaPhi + ) # this is a THnD with eta-pt-passIso-DphiMuonMet + histo_FRFvsDphi = {} + nBinsDphi = histo_fakes_dphiBins.GetAxis(3).GetNbins() + for idp in range(nBinsDphi): + idpBin = idp + 1 + histo_fakes_dphiBins.GetAxis(3).SetRange(idpBin, idpBin) + histo_fakes_dphiBins.GetAxis(2).SetRange(1, 1) + histo_fakes_dphiBins_failIso = histo_fakes_dphiBins.Projection( + 1, 0, "E" + ) + histo_fakes_dphiBins_failIso.SetName( + f"histo_fakes_dphiBins_failIso_idp{idp}" + ) + histo_fakes_dphiBins.GetAxis(3).SetRange(idpBin, idpBin) + histo_fakes_dphiBins.GetAxis(2).SetRange(2, 2) + histo_fakes_dphiBins_passIso = histo_fakes_dphiBins.Projection( + 1, 0, "E" + ) + histo_fakes_dphiBins_passIso.SetName( + f"histo_fakes_dphiBins_passIso_idp{idp}" + ) + histo_FRFvsDphi[idp] = copy.deepcopy( + histo_fakes_dphiBins_passIso.Clone( + f"histo_FRFvsDphi_idp{idp}" + ) + ) + histo_FRFvsDphi[idp].Divide(histo_fakes_dphiBins_failIso) + ## + histo_fakes_dphiBins.GetAxis(3).SetRange(1, nBinsDphi) histo_fakes_dphiBins.GetAxis(2).SetRange(1, 1) histo_fakes_dphiBins_failIso = histo_fakes_dphiBins.Projection( 1, 0, "E" ) histo_fakes_dphiBins_failIso.SetName( - f"histo_fakes_dphiBins_failIso_idp{idp}" + "histo_fakes_dphiBins_failIso_idpInclusive" ) - histo_fakes_dphiBins.GetAxis(3).SetRange(idpBin, idpBin) + histo_fakes_dphiBins.GetAxis(3).SetRange(1, nBinsDphi) histo_fakes_dphiBins.GetAxis(2).SetRange(2, 2) histo_fakes_dphiBins_passIso = histo_fakes_dphiBins.Projection( 1, 0, "E" ) histo_fakes_dphiBins_passIso.SetName( - f"histo_fakes_dphiBins_passIso_idp{idp}" - ) - histo_FRFvsDphi[idp] = copy.deepcopy( - histo_fakes_dphiBins_passIso.Clone(f"histo_FRFvsDphi_idp{idp}") + "histo_fakes_dphiBins_passIso_idpInclusive" ) - histo_FRFvsDphi[idp].Divide(histo_fakes_dphiBins_failIso) - ## - histo_fakes_dphiBins.GetAxis(3).SetRange(1, nBinsDphi) - histo_fakes_dphiBins.GetAxis(2).SetRange(1, 1) - histo_fakes_dphiBins_failIso = histo_fakes_dphiBins.Projection( - 1, 0, "E" - ) - histo_fakes_dphiBins_failIso.SetName( - "histo_fakes_dphiBins_failIso_idpInclusive" - ) - histo_fakes_dphiBins.GetAxis(3).SetRange(1, nBinsDphi) - histo_fakes_dphiBins.GetAxis(2).SetRange(2, 2) - histo_fakes_dphiBins_passIso = histo_fakes_dphiBins.Projection( - 1, 0, "E" - ) - histo_fakes_dphiBins_passIso.SetName( - "histo_fakes_dphiBins_passIso_idpInclusive" - ) - histo_FRFvsDphi["inclusive"] = copy.deepcopy( - histo_fakes_dphiBins_passIso.Clone("histo_FRFvsDphi_idpInclusive") - ) - histo_FRFvsDphi["inclusive"].Divide(histo_fakes_dphiBins_failIso) - ## - # now should plot - hists = [ - unroll2Dto1D( - histo_FRFvsDphi["inclusive"], - newname=f"unrolled_{histo_FRFvsDphi['inclusive'].GetName()}", - cropNegativeBins=False, + histo_FRFvsDphi["inclusive"] = copy.deepcopy( + histo_fakes_dphiBins_passIso.Clone( + "histo_FRFvsDphi_idpInclusive" + ) ) - ] - for idp in range(nBinsDphi): - hists.append( + histo_FRFvsDphi["inclusive"].Divide(histo_fakes_dphiBins_failIso) + ## + # now should plot + hists = [ unroll2Dto1D( - histo_FRFvsDphi[idp], - newname=f"unrolled_{histo_FRFvsDphi[idp].GetName()}", + histo_FRFvsDphi["inclusive"], + newname=f"unrolled_{histo_FRFvsDphi['inclusive'].GetName()}", cropNegativeBins=False, ) - ) + ] + for idp in range(nBinsDphi): + hists.append( + unroll2Dto1D( + histo_FRFvsDphi[idp], + newname=f"unrolled_{histo_FRFvsDphi[idp].GetName()}", + cropNegativeBins=False, + ) + ) - legEntries = ["inclusive"] + [ - f"#Delta#phi bin {idp}" for idp in range(nBinsDphi) - ] - ptBinRanges = [] - for ipt in range(histo_FRFvsDphi["inclusive"].GetNbinsY()): - ptBinRanges.append( - "[{ptmin},{ptmax}] GeV".format( - ptmin=int( - histo_FRFvsDphi["inclusive"] - .GetYaxis() - .GetBinLowEdge(ipt + 1) - ), - ptmax=int( - histo_FRFvsDphi["inclusive"] - .GetYaxis() - .GetBinLowEdge(ipt + 2) - ), + legEntries = ["inclusive"] + [ + f"#Delta#phi bin {idp}" for idp in range(nBinsDphi) + ] + ptBinRanges = [] + for ipt in range(histo_FRFvsDphi["inclusive"].GetNbinsY()): + ptBinRanges.append( + "[{ptmin},{ptmax}] GeV".format( + ptmin=int( + histo_FRFvsDphi["inclusive"] + .GetYaxis() + .GetBinLowEdge(ipt + 1) + ), + ptmax=int( + histo_FRFvsDphi["inclusive"] + .GetYaxis() + .GetBinLowEdge(ipt + 2) + ), + ) ) + drawNTH1( + hists, + legEntries, + "Unrolled (%s, %s) bin" % (etaLabel, ptLabel), + "Fakerate factor", + f"FRFvsDeltaPhi_{charge}", + outfolder, + leftMargin=0.06, + rightMargin=0.01, + labelRatioTmp="BinX/inclusive::0.7,1.3", + legendCoords="0.06,0.99,0.91,0.99;6", + lowerPanelHeight=0.5, + skipLumi=True, + passCanvas=canvas_unroll, + drawVertLines="{a},{b}".format( + a=histo_FRFvsDphi["inclusive"].GetNbinsY(), + b=histo_FRFvsDphi["inclusive"].GetNbinsX(), + ), + textForLines=ptBinRanges, + transparentLegend=False, + drawErrorAll=True, + onlyLineColor=True, + noErrorRatioDen=False, + useLineFirstHistogram=True, + setOnlyLineRatio=True, + lineWidth=1, ) - drawNTH1( - hists, - legEntries, - "Unrolled " + etaLabel + "-p_{{T}} bin", - "Fakerate factor", - f"FRFvsDeltaPhi_{charge}", - outfolder, - leftMargin=0.06, - rightMargin=0.01, - labelRatioTmp="BinX/inclusive::0.7,1.3", - legendCoords="0.06,0.99,0.91,0.99;6", - lowerPanelHeight=0.5, - skipLumi=True, - passCanvas=canvas_unroll, - drawVertLines="{a},{b}".format( - a=histo_FRFvsDphi["inclusive"].GetNbinsY(), - b=histo_FRFvsDphi["inclusive"].GetNbinsX(), - ), - textForLines=ptBinRanges, - transparentLegend=False, - drawErrorAll=True, - onlyLineColor=True, - noErrorRatioDen=False, - useLineFirstHistogram=True, - setOnlyLineRatio=True, - lineWidth=1, - ) ## dphiMuonMetCut = args.dphiMuonMetCut * np.pi - hnarf = hnarf[ - {"DphiMuonMet": s[complex(0, 0.01 + dphiMuonMetCut) :: hist.sum]} - ] # test dphi cut + if "DphiMuonMet" in hnarf.axes.name: + hnarf = hnarf[ + {"DphiMuonMet": s[complex(0, 0.01 + dphiMuonMetCut) :: hist.sum]} + ] # test dphi cut rootHists[d] = narf.hist_to_root( hnarf ) # this is a THnD with eta-pt-charge-mt-passIso-hasJets-DphiMuonMet - hnarf_forplots = hnarf_forplots[ - {"DphiMuonMet": s[complex(0, 0.01 + dphiMuonMetCut) :]} - ] # cut but do not integrate - hnarf_forplots = hnarf_forplots[{"mt": s[:: hist.rebin(2)]}] + if "DphiMuonMet" in hnarf_forplots.axes.name: + hnarf_forplots = hnarf_forplots[ + {"DphiMuonMet": s[complex(0, 0.01 + dphiMuonMetCut) :]} + ] # cut but do not integrate + if xABCD == "mt": + hnarf_forplots = hnarf_forplots[{xABCD: s[:: hist.rebin(2)]}] rootHists_forplots[d] = narf.hist_to_root( hnarf_forplots ) # this is a THnD with eta-pt-charge-mt-passIso-hasJets-DphiMuonMet @@ -925,9 +1003,10 @@ def runStudy(fname, charges, mainOutputFolder, args): # integrate njet and Dphi in appropriate range if "hasJets" in hnarf_asNominal.axes.name: hnarf_asNominal = hnarf_asNominal[{"hasJets": s[:: hist.sum]}] - hnarf_asNominal = hnarf_asNominal[ - {"DphiMuonMet": s[complex(0, 0.01 + dphiMuonMetCut) :: hist.sum]} - ] + if "DphiMuonMet" in hnarf_asNominal.axes.name: + hnarf_asNominal = hnarf_asNominal[ + {"DphiMuonMet": s[complex(0, 0.01 + dphiMuonMetCut) :: hist.sum]} + ] hnarf_forMtWithDataDrivenFakes = hnarf_asNominal.copy() mtThreshold = float(args.mtNominalRange.split(",")[-1]) # now select signal region @@ -936,30 +1015,30 @@ def runStudy(fname, charges, mainOutputFolder, args): hnarf_asNominal = sel.fakeHistABCD( hnarf_asNominal, thresholdMT=mtThreshold, - axis_name_mt="mt", + axis_name_mt=xABCD, integrateLowMT=True, integrateHighMT=True, ) hnarf_forMtWithDataDrivenFakes = sel.fakeHistABCD( hnarf_forMtWithDataDrivenFakes, thresholdMT=mtThreshold, - axis_name_mt="mt", + axis_name_mt=xABCD, integrateLowMT=True, integrateHighMT=False, ) else: - nMtBins = hnarf_asNominal.axes["mt"].size + nMtBins = hnarf_asNominal.axes[xABCD].size # just select signal region: pass isolation and mT > mtThreshold with overflow included hnarf_asNominal = hnarf_asNominal[ { "passIso": True, - "mt": s[complex(0, mtThreshold) :: hist.sum], + xABCD: s[complex(0, mtThreshold) :: hist.sum], } ] hnarf_forMtWithDataDrivenFakes = hnarf_forMtWithDataDrivenFakes[ { "passIso": True, - "mt": s[complex(0, mtThreshold) : nMtBins], + xABCD: s[complex(0, mtThreshold) : nMtBins], } ] rootHists_asNominal[d] = narf.hist_to_root(hnarf_asNominal) @@ -972,7 +1051,7 @@ def runStudy(fname, charges, mainOutputFolder, args): "charge": 0 if charge in ["minus", "inclusive"] else 1, } ] - if d in datasetsNoQCD: + if d in datasets: rootHists_forMtWithDataDrivenFakes[d] = narf.hist_to_root( hnarf_forMtWithDataDrivenFakes ) @@ -1002,17 +1081,20 @@ def runStudy(fname, charges, mainOutputFolder, args): axisVar = { 0: ["muon_eta", etaLabel], - 1: ["muon_pt", "p_{T} (GeV)"], - 3: ["mT", args.met + " m_{T} (GeV)"], - 6: ["deltaPhiMuonMET", "#Delta#phi(#mu,MET) (GeV)"], + 1: ["muon_pt", ptLabel + " (GeV)"], } + if noMtABCD: + axisVar[3] = [xABCD, zAxisName] + else: + axisVar[3] = ["mT", args.met + f" {mtLabel} (GeV)"] + axisVar[6] = ["deltaPhiMuonMET", dphiMuMetLabel] # bin number from root histogram chargeBin = 1 if charge in ["minus", "inclusive"] else 2 # plot mT with data-driven fakes, as a test (should add the mT correction in fact) hmcMt = {} - for d in datasetsNoQCD: + for d in datasets: if d == "Data": continue hmcMt[d] = rootHists_forMtWithDataDrivenFakes[d] @@ -1020,128 +1102,164 @@ def runStudy(fname, charges, mainOutputFolder, args): if "Data" in rootHists_forMtWithDataDrivenFakes.keys(): hdataMt = rootHists_forMtWithDataDrivenFakes["Data"] + plotName = xABCD + "_passIso_jetInclusive_passCut" plotDistribution1D( hdataMt, hmcMt, - datasetsNoQCD, - outfolder_dataMC, - canvas1Dshapes=canvas1Dshapes, - xAxisName=f"{args.met} m_{{T}} (GeV)", - plotName=f"mT_passIso_jetInclusive_passMT_dataDrivenFakes", - ) - - # plot mT, eta, pt in some regions iso-nJet regions, for checks - # don't plot fakes here - for xbin in axisVar.keys(): - if "Data" not in rootHists_forplots.keys(): - continue - # Dphi in 4 mT bins, for jet inclusive and pass/fail iso - if xbin == 6: - mtRanges = [0, 20, 40, 60, -1] - for imt in range(len(mtRanges) - 1): - mtLow = str(mtRanges[imt]) - mtHigh = "inf" if mtRanges[imt + 1] < 0 else str(mtRanges[imt + 1]) - mtTitle = f"mT{mtLow}to{mtHigh}" - mtRange = [mtRanges[imt], mtRanges[imt + 1]] - plotProjection1D( - rootHists_forplots, - datasetsNoFakes, - outfolder_dataMC, - canvas1Dshapes=canvas1Dshapes, - chargeBin=chargeBin, - projectAxisToKeep=xbin, - xAxisName=axisVar[xbin][1], - plotName=f"{axisVar[xbin][0]}_failIso_jetInclusive_{mtTitle}", - isoAxisRange=[1, 1], - jetAxisRange=[1, 2], - mTvalueRange=mtRange, - scaleQCDMC=args.scaleQCDMC, - ) - plotProjection1D( - rootHists_forplots, - datasetsNoFakes, - outfolder_dataMC, - canvas1Dshapes=canvas1Dshapes, - chargeBin=chargeBin, - projectAxisToKeep=xbin, - xAxisName=axisVar[xbin][1], - plotName=f"{axisVar[xbin][0]}_passIso_jetInclusive_{mtTitle}", - isoAxisRange=[2, 2], - jetAxisRange=[1, 2], - mTvalueRange=mtRange, - scaleQCDMC=args.scaleQCDMC, - ) - continue # no need for the rest of the plots for this variable - ###### - - plotProjection1D( - rootHists_forplots, datasetsNoFakes, outfolder_dataMC, canvas1Dshapes=canvas1Dshapes, - chargeBin=chargeBin, - projectAxisToKeep=xbin, - xAxisName=axisVar[xbin][1], - plotName=f"{axisVar[xbin][0]}_passIso_1orMoreJet", - isoAxisRange=[2, 2], - jetAxisRange=[2, 2], - scaleQCDMC=args.scaleQCDMC, + xAxisName=zAxisName, + plotName=plotName, + scaleProcs={"QCD": args.scaleQCDMC} if args.scaleQCDMC else {}, ) - plotProjection1D( - rootHists_forplots, - datasetsNoFakes, - outfolder_dataMC, - canvas1Dshapes=canvas1Dshapes, - chargeBin=chargeBin, - projectAxisToKeep=xbin, - xAxisName=axisVar[xbin][1], - plotName=f"{axisVar[xbin][0]}_passIso_jetInclusive", - isoAxisRange=[2, 2], - jetAxisRange=[1, 2], - scaleQCDMC=args.scaleQCDMC, - ) - plotProjection1D( - rootHists_forplots, - datasetsNoFakes, - outfolder_dataMC, - canvas1Dshapes=canvas1Dshapes, - chargeBin=chargeBin, - projectAxisToKeep=xbin, - xAxisName=axisVar[xbin][1], - plotName=f"{axisVar[xbin][0]}_failIso_1orMoreJet", - isoAxisRange=[1, 1], - jetAxisRange=[2, 2], - scaleQCDMC=args.scaleQCDMC, - ) - plotProjection1D( - rootHists_forplots, - datasetsNoFakes, + plotDistribution1D( + hdataMt, + hmcMt, + datasetsNoQCD, outfolder_dataMC, canvas1Dshapes=canvas1Dshapes, - chargeBin=chargeBin, - projectAxisToKeep=xbin, - xAxisName=axisVar[xbin][1], - plotName=f"{axisVar[xbin][0]}_failIso_jetInclusive", - isoAxisRange=[1, 1], - jetAxisRange=[1, 2], - scaleQCDMC=args.scaleQCDMC, + xAxisName=zAxisName, + plotName=f"{plotName}_dataDrivenFakes", ) - # signal region adding mT cut too - plotProjection1D( - rootHists_forplots, - datasetsNoFakes, + # hmcMt_noFakes = {d: v for d,v in hmcMt.items() if d != "Fake"} + plotDistribution1D( + hdataMt, + hmcMt, # hmcMt_noFakes, + datasetsNoQCDFakes, outfolder_dataMC, canvas1Dshapes=canvas1Dshapes, - chargeBin=chargeBin, - projectAxisToKeep=xbin, - xAxisName=axisVar[xbin][1], - plotName=f"{axisVar[xbin][0]}_passIso_jetInclusive_passMt", - isoAxisRange=[2, 2], - jetAxisRange=[1, 2], - mTvalueRange=[40, -1], - scaleQCDMC=args.scaleQCDMC, + xAxisName=zAxisName, + plotName=f"{plotName}_noFakes", ) + # plot mT, eta, pt in some regions iso-nJet regions, for checks + # don't plot fakes here + for xbin in axisVar.keys(): + if "Data" not in rootHists_forplots.keys(): + continue + if noMtABCD: + plotProjection1D( + rootHists_forplots, + datasetsNoFakes, + outfolder_dataMC, + canvas1Dshapes=canvas1Dshapes, + chargeBin=chargeBin, + projectAxisToKeep=xbin, + xAxisName=axisVar[xbin][1], + plotName=f"{axisVar[xbin][0]}_passIso_jetInclusive", + isoAxisRange=[2, 2], + jetAxisRange=None, + scaleQCDMC=args.scaleQCDMC, + noMtABCD=noMtABCD, + ) + plotProjection1D( + rootHists_forplots, + datasetsNoFakes, + outfolder_dataMC, + canvas1Dshapes=canvas1Dshapes, + chargeBin=chargeBin, + projectAxisToKeep=xbin, + xAxisName=axisVar[xbin][1], + plotName=f"{axisVar[xbin][0]}_failIso_jetInclusive", + isoAxisRange=[1, 1], + jetAxisRange=None, + scaleQCDMC=args.scaleQCDMC, + noMtABCD=noMtABCD, + ) + else: + # Dphi in 4 mT bins, for jet inclusive and pass/fail iso + if xbin == 6: + mtRanges = [0, 20, 40, 60, -1] + for imt in range(len(mtRanges) - 1): + mtLow = str(mtRanges[imt]) + mtHigh = ( + "inf" if mtRanges[imt + 1] < 0 else str(mtRanges[imt + 1]) + ) + mtTitle = f"mT{mtLow}to{mtHigh}" + mtRange = [mtRanges[imt], mtRanges[imt + 1]] + plotProjection1D( + rootHists_forplots, + datasetsNoFakes, + outfolder_dataMC, + canvas1Dshapes=canvas1Dshapes, + chargeBin=chargeBin, + projectAxisToKeep=xbin, + xAxisName=axisVar[xbin][1], + plotName=f"{axisVar[xbin][0]}_failIso_jetInclusive_{mtTitle}", + isoAxisRange=[1, 1], + jetAxisRange=[1, 2], + mTvalueRange=mtRange, + scaleQCDMC=args.scaleQCDMC, + ) + plotProjection1D( + rootHists_forplots, + datasetsNoFakes, + outfolder_dataMC, + canvas1Dshapes=canvas1Dshapes, + chargeBin=chargeBin, + projectAxisToKeep=xbin, + xAxisName=axisVar[xbin][1], + plotName=f"{axisVar[xbin][0]}_passIso_jetInclusive_{mtTitle}", + isoAxisRange=[2, 2], + jetAxisRange=[1, 2], + mTvalueRange=mtRange, + scaleQCDMC=args.scaleQCDMC, + ) + continue # no need for the rest of the plots for this variable + ###### + plotProjection1D( + rootHists_forplots, + datasetsNoFakes, + outfolder_dataMC, + canvas1Dshapes=canvas1Dshapes, + chargeBin=chargeBin, + projectAxisToKeep=xbin, + xAxisName=axisVar[xbin][1], + plotName=f"{axisVar[xbin][0]}_passIso_1orMoreJet", + isoAxisRange=[2, 2], + jetAxisRange=[2, 2], + scaleQCDMC=args.scaleQCDMC, + ) + plotProjection1D( + rootHists_forplots, + datasetsNoFakes, + outfolder_dataMC, + canvas1Dshapes=canvas1Dshapes, + chargeBin=chargeBin, + projectAxisToKeep=xbin, + xAxisName=axisVar[xbin][1], + plotName=f"{axisVar[xbin][0]}_failIso_1orMoreJet", + isoAxisRange=[1, 1], + jetAxisRange=[2, 2], + scaleQCDMC=args.scaleQCDMC, + ) + plotProjection1D( + rootHists_forplots, + datasetsNoFakes, + outfolder_dataMC, + canvas1Dshapes=canvas1Dshapes, + chargeBin=chargeBin, + projectAxisToKeep=xbin, + xAxisName=axisVar[xbin][1], + plotName=f"{axisVar[xbin][0]}_passIso_jetInclusive", + isoAxisRange=[2, 2], + jetAxisRange=[1, 2], + scaleQCDMC=args.scaleQCDMC, + ) + plotProjection1D( + rootHists_forplots, + datasetsNoFakes, + outfolder_dataMC, + canvas1Dshapes=canvas1Dshapes, + chargeBin=chargeBin, + projectAxisToKeep=xbin, + xAxisName=axisVar[xbin][1], + plotName=f"{axisVar[xbin][0]}_failIso_jetInclusive", + isoAxisRange=[1, 1], + jetAxisRange=[1, 2], + scaleQCDMC=args.scaleQCDMC, + ) ################################### ################################### ### Now the actual study on fakes @@ -1155,14 +1273,16 @@ def runStudy(fname, charges, mainOutputFolder, args): # set charge from charge axis histo_fakes.GetAxis(2).SetRange(chargeBin, chargeBin) histo_fakes.GetAxis(4).SetRange(2, 2) # passIso, equivalent to lowIso - histo_fakes.GetAxis(5).SetRange(2 if args.jetCut else 1, 2) + if not noMtABCD: + histo_fakes.GetAxis(5).SetRange(2 if args.jetCut else 1, 2) # now get a TH3 histoPassIso = histo_fakes.Projection(0, 1, 3, "E") histoPassIso.SetName("fakes_passIso") histoPassIso.SetTitle("fakes_passIso") histo_fakes.GetAxis(2).SetRange(chargeBin, chargeBin) histo_fakes.GetAxis(4).SetRange(1, 1) # FailIso - histo_fakes.GetAxis(5).SetRange(2 if args.jetCut else 1, 2) + if not noMtABCD: + histo_fakes.GetAxis(5).SetRange(2 if args.jetCut else 1, 2) histoFailIso = histo_fakes.Projection(0, 1, 3, "E") histoFailIso.SetName("fakes_failIso") histoFailIso.SetTitle("fakes_failIso") @@ -1172,11 +1292,12 @@ def runStudy(fname, charges, mainOutputFolder, args): mtThreshold = float(args.mtNominalRange.split(",")[-1]) histo_fakes.GetAxis(2).SetRange(chargeBin, chargeBin) histo_fakes.GetAxis(3).SetRange( - histo_fakes.GetAxis(3).FindFixBin(mtThreshold + 0.001), + histo_fakes.GetAxis(3).FindFixBin(mtThreshold + 0.0005), histo_fakes.GetAxis(3).GetNbins(), ) # high mT histo_fakes.GetAxis(4).SetRange(1, 1) # FailIso - histo_fakes.GetAxis(5).SetRange(1, 2) # jet inclusive + if not noMtABCD: + histo_fakes.GetAxis(5).SetRange(1, 2) # jet inclusive histoPassMtFailIso = histo_fakes.Projection(0, 1, 3, "E") histoPassMtFailIso.SetName("fakes_passMt_failIso_jetInclusive") histoPassMtFailIso.SetTitle("fakes_passMt_failIso_jetInclusive") @@ -1190,10 +1311,11 @@ def runStudy(fname, charges, mainOutputFolder, args): # also integrate mt < mtThreshold histo_fakes.GetAxis(2).SetRange(chargeBin, chargeBin) histo_fakes.GetAxis(3).SetRange( - 1, histo_fakes.GetAxis(3).FindFixBin(mtThreshold - 0.001) + 1, histo_fakes.GetAxis(3).FindFixBin(mtThreshold - 0.0005) ) histo_fakes.GetAxis(4).SetRange(2, 2) # passIso, equivalent to lowIso - histo_fakes.GetAxis(5).SetRange(1, 2) # always integrate jet here + if not noMtABCD: + histo_fakes.GetAxis(5).SetRange(1, 2) # always integrate jet here # now get a TH2 for passIso histoPassIsoLowMt_jetInclusive_vsEtaPt = histo_fakes.Projection(1, 0, "E") histoPassIsoLowMt_jetInclusive_vsEtaPt.SetName( @@ -1228,7 +1350,8 @@ def runStudy(fname, charges, mainOutputFolder, args): ## now redo the same for the >=1 jet region ## could be done from previous TH3 histograms integrating in mT, but they don't always have the same jet cut histo_fakes.GetAxis(4).SetRange(2, 2) # passIso, equivalent to lowIso - histo_fakes.GetAxis(5).SetRange(2, 2) # always integrate jet here + if not noMtABCD: + histo_fakes.GetAxis(5).SetRange(2, 2) # always integrate jet here # now get a TH2 for passIso histoPassIsoLowMt_1orMoreJet_vsEtaPt = histo_fakes.Projection(1, 0, "E") histoPassIsoLowMt_1orMoreJet_vsEtaPt.SetName( @@ -1321,7 +1444,11 @@ def runStudy(fname, charges, mainOutputFolder, args): # histoFailIso.Rebin3D(args.rebinEta, args.rebinPt, 1) # histoPassMtFailIso.Rebin3D(args.rebinEta, args.rebinPt, 1) - mtEdges = [round(int(x), 1) for x in args.mtBinEdges.split(",")] + mtEdges = ( + [round(float(x), 3) for x in args.mtBinEdges.strip().split(",")] + if noMtABCD + else [round(int(x), 1) for x in args.mtBinEdges.split(",")] + ) nMtBins = len(mtEdges) - 1 ratio = [] for imt in range(nMtBins): @@ -1345,10 +1472,10 @@ def runStudy(fname, charges, mainOutputFolder, args): ) h2PassIso.SetTitle( - "Low Iso: %s m_{T} #in [%d, %d]" % (args.met, lowEdge, highEdge) + "Low Iso: %s #in [%d, %d]" % (zAxisName, lowEdge, highEdge) ) h2FailIso.SetTitle( - "High Iso: %s m_{T} #in [%d, %d]" % (args.met, lowEdge, highEdge) + "High Iso: %s #in [%d, %d]" % (zAxisName, lowEdge, highEdge) ) if not args.skipPlot2D: drawCorrelationPlot( @@ -1383,7 +1510,7 @@ def runStudy(fname, charges, mainOutputFolder, args): ) ratio.append(h2PassIso.Clone(f"fakerateFactor_mt{lowEdge}to{highEdge}")) - ratio[imt].SetTitle("%s m_{T} #in [%d, %d]" % (args.met, lowEdge, highEdge)) + ratio[imt].SetTitle("%s #in [%d, %d]" % (zAxisName, lowEdge, highEdge)) ratio[imt].Divide(h2FailIso) if not args.skipPlot2D: drawCorrelationPlot( @@ -1403,7 +1530,14 @@ def runStudy(fname, charges, mainOutputFolder, args): ) if args.mtNominalRange: - lowEdge, highEdge = map(int, args.mtNominalRange.split(",")) + lowEdge, highEdge = ( + map(float, args.mtNominalRange.split(",")) + if noMtABCD + else map(int, args.mtNominalRange.split(",")) + ) + lowEdgeStr, highEdgeStr = args.mtNominalRange.split(",") + lowEdgeStr = lowEdgeStr.replace(".", "p") + highEdgeStr = highEdgeStr.replace(".", "p") binStart = histoPassIso.GetZaxis().FindFixBin(lowEdge) binEnd = ( histoPassIso.GetZaxis().FindFixBin(highEdge + 0.001) - 1 @@ -1418,13 +1552,13 @@ def runStudy(fname, charges, mainOutputFolder, args): ) h2PassIso = getTH2fromTH3( histoPassIso, - f"pt_eta_nominalmt{lowEdge}to{highEdge}_passIso", + f"pt_eta_nominalmt{lowEdgeStr}to{highEdgeStr}_passIso", binStart, binEnd, ) h2FailIso = getTH2fromTH3( histoFailIso, - f"pt_eta_nominalmt{lowEdge}to{highEdge}_failIso", + f"pt_eta_nominalmt{lowEdgeStr}to{highEdgeStr}_failIso", binStart, binEnd, ) @@ -1432,10 +1566,10 @@ def runStudy(fname, charges, mainOutputFolder, args): cropNegativeContent(h2FailIso) nominalFakerateFactor = h2PassIso.Clone( - f"nominalFakerateFactor_mt{lowEdge}to{highEdge}" + f"nominalFakerateFactor_mt{lowEdgeStr}to{highEdgeStr}" ) nominalFakerateFactor.SetTitle( - "%s m_{T} #in [%d, %d]" % (args.met, lowEdge, highEdge) + "%s #in [%s, %s]" % (zAxisName, lowEdgeStr, highEdgeStr) ) nominalFakerateFactor.Divide(h2FailIso) drawCorrelationPlot( @@ -1456,22 +1590,22 @@ def runStudy(fname, charges, mainOutputFolder, args): # plot also at high mt, including overflow h2PassIso_highMt = getTH2fromTH3( histoPassIso, - f"pt_eta_mt{highEdge}toInf_passIso", + f"pt_eta_mt{highEdgeStr}toInf_passIso", binEnd, 1 + histoPassIso.GetNbinsZ(), ) h2FailIso_highMt = getTH2fromTH3( histoFailIso, - f"pt_eta_mt{highEdge}toInf_failIso", + f"pt_eta_mt{highEdgeStr}toInf_failIso", binEnd, 1 + histoFailIso.GetNbinsZ(), ) cropNegativeContent(h2PassIso_highMt) cropNegativeContent(h2FailIso_highMt) highMtFakerateFactor = h2PassIso_highMt.Clone( - f"highMtFakerateFactor_mt{highEdge}toInf" + f"highMtFakerateFactor_mt{highEdgeStr}toInf" ) - highMtFakerateFactor.SetTitle("%s m_{T} > %d" % (args.met, highEdge)) + highMtFakerateFactor.SetTitle("%s > %s" % (zAxisName, highEdgeStr)) highMtFakerateFactor.Divide(h2FailIso_highMt) drawCorrelationPlot( highMtFakerateFactor, @@ -1514,13 +1648,13 @@ def runStudy(fname, charges, mainOutputFolder, args): drawNTH1( [highMtFRF_unrolled, nomiFRF_unrolled], [highMtFakerateFactor.GetTitle(), nominalFakerateFactor.GetTitle()], - "Unrolled eta-p_{T} bin", + "Unrolled (%s, %s) bin" % (etaLabel, ptLabel), "Fakerate factor", f"FRFnomiAndHighMt_etaPt_{charge}", outfolder, leftMargin=0.06, rightMargin=0.01, - labelRatioTmp="Nomi/highMt", + labelRatioTmp=f"High {zAxisName} / nomi", legendCoords="0.06,0.99,0.91,0.99;2", lowerPanelHeight=0.5, skipLumi=True, @@ -1556,7 +1690,7 @@ def runStudy(fname, charges, mainOutputFolder, args): hFRFcorr = ROOT.TH2D( f"fakerateFactorCorrection_{charge}", - "%s m_{T} > %d GeV" % (args.met, int(args.mtNominalRange.split(",")[1])), + "%s > %s GeV" % (zAxisName, args.mtNominalRange.split(",")[1]), h2PassIso.GetNbinsX(), round(etaLow, 1), round(etaHigh, 1), @@ -1586,7 +1720,7 @@ def runStudy(fname, charges, mainOutputFolder, args): ptBinHigh = int(h2PassIso.GetYaxis().GetBinLowEdge(ipt + 1)) fakerateFactor_vs_etaMt = ROOT.TH2D( "fakerateFactor_vs_etaMt_pt%dto%d" % (ptBinLow, ptBinHigh), - "Muon p_{T} #in [%d, %d] GeV" % (ptBinLow, ptBinHigh), + "%s #in [%d, %d] GeV" % (ptLabel, ptBinLow, ptBinHigh), h2PassIso.GetNbinsX(), round(etaLow, 1), round(etaHigh, 1), @@ -1613,8 +1747,8 @@ def runStudy(fname, charges, mainOutputFolder, args): # logger.info(f"ieta = {ieta} {ptBinLow} < pt < {ptBinHigh} {etaBinLow} < eta < {etaBinHigh} {etaBinLow} < etaNoRound < {etaBinHigh}") hFRfactorVsMt = ROOT.TH1D( f"hFRfactorVsMt_ieta{ieta}_pt{ptBinLow}to{ptBinHigh}", - "%.1f < %s < %.1f, p_{T} #in [%d, %d] GeV" - % (etaBinLow, etaLabel, etaBinHigh, ptBinLow, ptBinHigh), + "%.1f < %s < %.1f, %s #in [%d, %d] GeV" + % (etaBinLow, etaLabel, etaBinHigh, ptLabel, ptBinLow, ptBinHigh), nMtBins, array.array("d", mtEdges), ) @@ -1634,9 +1768,13 @@ def runStudy(fname, charges, mainOutputFolder, args): hTmp[imt - 1].SetBinContent(1, max(0.0, binContent)) hTmp[imt - 1].SetBinError(1, binError) - textLatex = ( - "%.1f < %s < %.1f;%d < p_{T} < %d GeV::0.2,0.88,0.08,0.045" - % (etaBinLow, etaLabel, etaBinHigh, ptBinLow, ptBinHigh) + textLatex = "%.1f < %s < %.1f;%d < %s < %d GeV::0.2,0.88,0.08,0.045" % ( + etaBinLow, + etaLabel, + etaBinHigh, + ptBinLow, + ptLabel, + ptBinHigh, ) if nMtBins > 2: ## get high mt value to evaluate the correction, can use the mean of the mt distribution for each etapt bin @@ -1676,6 +1814,8 @@ def runStudy(fname, charges, mainOutputFolder, args): useBinnedCorr=args.useBinnedCorrection, histoChi2diffTest=histoChi2diffTest, histoPullsPol1Slope=histoPullsPol1Slope, + noMtABCD=noMtABCD, + erfAsMainModel=args.fitErf, ) # logger.info(f"{valFRF}, {valFRFCap}") if valFRF < 0: @@ -1851,7 +1991,7 @@ def runStudy(fname, charges, mainOutputFolder, args): drawNTH1( hList, legEntries, - "Unrolled eta-p_{T} bin", + "Unrolled (%s, %s) bin" % (etaLabel, ptLabel), "FRF correction", f"FRFcorrAndUnc_etaPt_{charge}", outfolder, @@ -1895,7 +2035,7 @@ def runStudy(fname, charges, mainOutputFolder, args): drawNTH1( hListV2, legEntriesV2, - "Unrolled eta-p_{T} bin", + "Unrolled (%s, %s) bin" % (etaLabel, ptLabel), "FRF correction::0.5,1.6", f"FRFcorrAndUnc_etaPt_{charge}_v2", outfolder, @@ -1945,7 +2085,7 @@ def runStudy(fname, charges, mainOutputFolder, args): nominalFakerateFactor.GetTitle(), f"{nominalFakerateFactor.GetTitle()} with FRF corr", ], - "Unrolled eta-p_{T} bin", + "Unrolled (%s, %s) bin" % (etaLabel, ptLabel), "Fakerate factor", f"FRFnomiAndHighMtAndCorr_etaPt_{charge}", outfolder, @@ -2098,7 +2238,8 @@ def runStudy(fname, charges, mainOutputFolder, args): def runStudyVsDphi(fname, charges, mainOutputFolder, args): - etaLabel = "#eta" if not args.absEta else "|#eta|" + etaLabel = "#eta^{#mu}" if not args.absEta else "|#eta^{#mu}|" + ptLabel = "#it{p}_{T}^{#mu}" xAxisName = args.xAxisName yAxisName = args.yAxisName @@ -2167,9 +2308,9 @@ def runStudyVsDphi(fname, charges, mainOutputFolder, args): logger.warning(f"Histogram has {nPtBins} pt bins") nPtBins = hnarf.axes["pt"].size - lowMtUpperBound = int(args.mtNominalRange.split(",")[1]) - hnarf = hnarf[{"mt": s[: complex(0, lowMtUpperBound) : hist.sum]}] - # hnarf = hnarf[{"mt" : s[::hist.sum]}] + lowMtUpperBound = float(args.mtNominalRange.split(",")[1]) + hnarf = hnarf[{args.xABCD: s[: complex(0, lowMtUpperBound) : hist.sum]}] + # hnarf = hnarf[{args.xABCD : s[::hist.sum]}] chargeIndex = 0 if charge in ["minus", "inclusive"] else 1 hnarf = hnarf[{"charge": s[chargeIndex : chargeIndex + 1 : hist.sum]}] @@ -2251,13 +2392,13 @@ def runStudyVsDphi(fname, charges, mainOutputFolder, args): for b in etapt_bins: hs.append(hists[b]) etaText = f"{htmp.GetXaxis().GetBinLowEdge(b[0]):.1f} < {etaLabel} < {htmp.GetXaxis().GetBinLowEdge(b[0]+1):.1f}" - ptText = f"{htmp.GetYaxis().GetBinLowEdge(b[1]):.0f} < p_{{T}} < {htmp.GetYaxis().GetBinLowEdge(b[1]+1):.0f}" + ptText = f"{htmp.GetYaxis().GetBinLowEdge(b[1]):.0f} < {ptLabel} < {htmp.GetYaxis().GetBinLowEdge(b[1]+1):.0f}" legEntries.append(f"{etaText} && {ptText} GeV") drawNTH1( hs, legEntries, - "#Delta#phi(#mu,MET)", + "#Delta#phi(#mu, #it{p}_{T}^{miss})", "Fakerate factor", f"FRFvsDeltaPhi_{d}_someBins_ipt{ipt}_{charge}", outfolder, @@ -2285,11 +2426,17 @@ def runStudyVsDphi(fname, charges, mainOutputFolder, args): parser = common_plot_parser() parser.add_argument("inputfile", type=str, nargs=1) parser.add_argument("outputfolder", type=str, nargs=1) - parser.add_argument("-x", "--xAxisName", default="Muon #eta", help="x axis name") parser.add_argument( - "-y", "--yAxisName", default="Muon p_{T} (GeV)", help="y axis name" + "--xABCD", + default="mt", + choices=["mt", "oneMinusCosdphi", "mtOverPt", "altMt", "dxybs"], + help="X variable to use for ABCD method (y is pass isolation)", + ) + parser.add_argument("-x", "--xAxisName", default="#eta^{#mu}", help="x axis name") + parser.add_argument( + "-y", "--yAxisName", default="#it{p}_{T}^{#mu} (GeV)", help="y axis name" ) - parser.add_argument("-z", "--zAxisName", default="m_{T} (GeV)", help="z axis name") + parser.add_argument("-z", "--zAxisName", default="", help="z axis name") parser.add_argument( "--met", default="DeepMET", @@ -2327,6 +2474,11 @@ def runStudyVsDphi(fname, charges, mainOutputFolder, args): type=int, help="Degree for polynomial used in the fits", ) + parser.add_argument( + "--fitErf", + action="store_true", + help="Main fit is an (inverse) error function, only if using mT for ABCD", + ) parser.add_argument( "--maxPt", default=50, @@ -2396,9 +2548,6 @@ def runStudyVsDphi(fname, charges, mainOutputFolder, args): args = parser.parse_args() logger = logging.setup_logger(os.path.basename(__file__), args.verbose) - # if args.absEta: - # logger.error("Option --absolute-eta not implemented correctly yet. Abort") - # quit() if args.charge == "both": charges = ["plus", "minus"] @@ -2407,7 +2556,11 @@ def runStudyVsDphi(fname, charges, mainOutputFolder, args): ROOT.TH1.SetDefaultSumw2() - subFolder = f"/{args.integralMtMethod}Integral_fit{args.mtFitRange.replace(',','to')}_pol{args.fitPolDegree}" + fitRangeStr = args.mtFitRange.strip() + fitRangeStr = fitRangeStr.replace(",", "to").replace(".", "p").replace("-", "m") + subFolder = ( + f"/{args.integralMtMethod}Integral_fit{fitRangeStr}_pol{args.fitPolDegree}" + ) if args.rebinEta > 1: subFolder += f"_rebinEta{args.rebinEta}" if args.rebinPt > 1: @@ -2424,6 +2577,10 @@ def runStudyVsDphi(fname, charges, mainOutputFolder, args): subFolder += f"_absEta" if args.postfix: subFolder += f"_{args.postfix}" + if args.xABCD != "mt": + subFolder += f"_{args.xABCD}" + if args.fitErf: + subFolder += f"_fitErf" subFolder += "/" outdir_original = args.outputfolder[0] + subFolder diff --git a/scripts/analysisTools/tests/testPlots1D.py b/scripts/analysisTools/tests/testPlots1D.py index 299aed71a..da80d0c1f 100644 --- a/scripts/analysisTools/tests/testPlots1D.py +++ b/scripts/analysisTools/tests/testPlots1D.py @@ -47,6 +47,7 @@ def plotDistribution1D( ratioPadYaxisTitle="Data/pred::0.9,1.1", scaleToUnitArea=False, noRatioPanel=False, + scaleProcs={}, ): createPlotDirAndCopyPhp(outfolder_dataMC) @@ -71,13 +72,17 @@ def plotDistribution1D( hmc[d].SetLineColor(ROOT.kBlack) hmc[d].SetMarkerSize(0) hmc[d].SetMarkerStyle(0) + if d in scaleProcs.keys(): + hmc[d].Scale(scaleProcs[d]) stackIntegral += hmc[d].Integral() if scaleToUnitArea: hdata.Scale(1.0 / hdata.Integral()) stack_1D = ROOT.THStack("stack_1D", "signal and backgrounds") - hmcSortedKeys = sorted(hmc.keys(), key=lambda x: hmc[x].Integral()) + hmcSortedKeys = sorted( + [x for x in hmc.keys() if x in datasets], key=lambda x: hmc[x].Integral() + ) for i in hmcSortedKeys: if scaleToUnitArea: hmc[i].Scale(1.0 / stackIntegral) diff --git a/scripts/histmakers/mw_with_mu_eta_pt.py b/scripts/histmakers/mw_with_mu_eta_pt.py index 7d9761de9..6de2a6c84 100644 --- a/scripts/histmakers/mw_with_mu_eta_pt.py +++ b/scripts/histmakers/mw_with_mu_eta_pt.py @@ -133,6 +133,11 @@ action="store_true", help="Add another fit axis with the sign of the uT recoil projection", ) +parser.add_argument( + "--addAxisMt", + action="store_true", + help="Add another fit axis with mT", +) parser.add_argument( "--utAngleCosineCut", nargs=2, @@ -279,6 +284,12 @@ overflow=True, ) axis_met = hist.axis.Regular(25, 0.0, 100.0, name="met", underflow=False, overflow=True) +axis_mt_testfit = hist.axis.Variable( + (*np.arange(0, mtw_min + 20, 20), *np.arange(mtw_min + 2, 122, 2)), + name="mt", + underflow=False, + overflow=True, +) # for mt, met, ptW plots, to compute the fakes properly (but FR pretty stable vs pt and also vs eta) # may not exactly reproduce the same pt range as analysis, though @@ -334,6 +345,24 @@ nominal_axes = [axis_eta, axis_pt, axis_charge, axis_ut_analysis, *axes_abcd] nominal_cols = columns_fakerate +elif args.addAxisMt: + axes_fakerate = [ + axis_fakes_eta, + axis_fakes_pt, + axis_charge, + axis_mt_testfit, + axis_isoCat, + ] + columns_fakerate = [ + "goodMuons_eta0", + "goodMuons_pt0", + "goodMuons_charge0", + "transverseMass", + "goodMuons_relIso0", + ] + + nominal_axes = [axis_eta, axis_pt, axis_charge, axis_mt_testfit, axis_isoCat] + nominal_cols = columns_fakerate else: axes_fakerate = [axis_fakes_eta, axis_fakes_pt, axis_charge, *axes_abcd] columns_fakerate = [ @@ -431,6 +460,27 @@ axis_dphi_fakes = hist.axis.Regular( 8, 0.0, np.pi, name="DphiMuonMet", underflow=False, overflow=False ) +# filled with 1 - cos, as in mT and such that W peaks at +2 +axis_oneMinusCosdphi = hist.axis.Regular( + 20, 0.0, 2.0, name="oneMinusCosdphi", underflow=False, overflow=False +) +axis_metOverPt = hist.axis.Regular( + 40, 0.0, 2.0, name="metOverPt", underflow=False, overflow=True +) +axis_mtOverPt = hist.axis.Regular( + 80, 0.0, 4.0, name="mtOverPt", underflow=False, overflow=True +) +axis_altMt = hist.axis.Regular( + 120, 0.0, 120.0, name="altMt", underflow=False, overflow=True +) +axis_dxybs = hist.axis.Regular( + int(args.dxybs / 0.002), + 0.0, + args.dxybs, + name="dxybs", + underflow=False, + overflow=True, +) axis_hasjet_fakes = hist.axis.Boolean( name="hasJets" ) # only need case with 0 jets or > 0 for now @@ -443,6 +493,38 @@ axis_hasjet_fakes, axis_dphi_fakes, ] +mTStudyForFakesAlt_axes = [ + axis_eta, + axis_pt, + axis_charge, + axis_oneMinusCosdphi, + axis_passIso, + axis_passMT, +] +mTStudyForFakesAlt2_axes = [ + axis_eta, + axis_pt, + axis_charge, + axis_mtOverPt, + axis_passIso, + axis_passMT, +] +mTStudyForFakesAlt3_axes = [ + axis_eta, + axis_pt, + axis_charge, + axis_altMt, + axis_passIso, + axis_passMT, +] +mTStudyForFakesAlt4_axes = [ + axis_eta, + axis_pt, + axis_charge, + axis_dxybs, + axis_passIso, + axis_passMT, +] axis_met = hist.axis.Regular( 100, 0.0, 200.0, name="met", underflow=False, overflow=True @@ -863,6 +945,7 @@ def build_graph(df, dataset): nMuons=1, ptCut=args.vetoRecoPt, etaCut=args.vetoRecoEta, + dxybsCut=args.dxybs, useGlobalOrTrackerVeto=useGlobalOrTrackerVeto, ) df = muon_selections.select_good_muons( @@ -1272,8 +1355,10 @@ def build_graph(df, dataset): df = df.Define("hasCleanJet", "Sum(goodCleanJetsNoPt && Jet_pt > 30) >= 1") df = df.Define( - "deltaPhiMuonMet", "std::abs(wrem::deltaPhi(goodMuons_phi0,MET_corr_rec_phi))" + "goodMuons_dphiMuMet0", + "std::abs(wrem::deltaPhi(goodMuons_phi0,MET_corr_rec_phi))", ) + df = df.Define("passMT", f"transverseMass >= {mtw_min}") if auxiliary_histograms: # would move the following in a function but there are too many dependencies on axes defined in the main loop @@ -1354,7 +1439,7 @@ def build_graph(df, dataset): "passIso", "nJetsClean", "leadjetPt", - "deltaPhiMuonMet", + "goodMuons_dphiMuMet0", "nominal_weight", ], ) @@ -1538,6 +1623,23 @@ def build_graph(df, dataset): # instead of passIso in mTStudyForFakes # df = df.Define("passIsoAlt", "(goodMuons_pfRelIso04_all0 * Muon_pt[goodMuons][0] / goodMuons_jetpt0) < 0.12") # df = df.Define("passIsoAlt", "(Muon_vtxAgnPfRelIso04_chg[goodMuons][0] * Muon_pt[goodMuons][0]) < 5.0") + + df = df.Define( + # fill with 1 - cos so that W signal is at +1 instead of -1, for convenience + "goodMuons_oneMinusCosdphiMuMet0", + "1.0 - std::cos(goodMuons_dphiMuMet0)", + ) + df = df.Define("goodMuons_metOverPt0", "MET_corr_rec_pt/goodMuons_pt0") + df = df.Define("goodMuons_mtOverPt0", "transverseMass/goodMuons_pt0") + df = df.Define( + "goodMuons_altMt0", "MET_corr_rec_pt*goodMuons_oneMinusCosdphiMuMet0" + ) + # Defined as Threshold - |dxybs| so that for signal it peaks at Threshold instead of 0 + # for convenience in the later study + df = df.Define( + "goodMuons_dxybs0", f"{args.dxybs} - abs(Muon_dxybs[goodMuons][0])" + ) + mTStudyForFakes = df.HistoBoost( "mTStudyForFakes", mTStudyForFakes_axes, @@ -1548,21 +1650,79 @@ def build_graph(df, dataset): "transverseMass", "passIso", "hasCleanJet", - "deltaPhiMuonMet", + "goodMuons_dphiMuMet0", "nominal_weight", ], ) results.append(mTStudyForFakes) + mTStudyForFakesAlt = df.HistoBoost( + "mTStudyForFakesAlt", + mTStudyForFakesAlt_axes, + [ + "goodMuons_eta0", + "goodMuons_pt0", + "goodMuons_charge0", + "goodMuons_oneMinusCosdphiMuMet0", + "passIso", + "passMT", + "nominal_weight", + ], + ) + results.append(mTStudyForFakesAlt) + + mTStudyForFakesAlt2 = df.HistoBoost( + "mTStudyForFakesAlt2", + mTStudyForFakesAlt2_axes, + [ + "goodMuons_eta0", + "goodMuons_pt0", + "goodMuons_charge0", + "goodMuons_mtOverPt0", + "passIso", + "passMT", + "nominal_weight", + ], + ) + results.append(mTStudyForFakesAlt2) + + mTStudyForFakesAlt3 = df.HistoBoost( + "mTStudyForFakesAlt3", + mTStudyForFakesAlt3_axes, + [ + "goodMuons_eta0", + "goodMuons_pt0", + "goodMuons_charge0", + "goodMuons_altMt0", + "passIso", + "passMT", + "nominal_weight", + ], + ) + results.append(mTStudyForFakesAlt3) + + mTStudyForFakesAlt4 = df.HistoBoost( + "mTStudyForFakesAlt4", + mTStudyForFakesAlt4_axes, + [ + "goodMuons_eta0", + "goodMuons_pt0", + "goodMuons_charge0", + "goodMuons_dxybs0", + "passIso", + "passMT", + "nominal_weight", + ], + ) + results.append(mTStudyForFakesAlt4) + # add filter of deltaPhi(muon,met) before other histograms (but after histogram mTStudyForFakes) if args.dphiMuonMetCut > 0.0 and not args.makeMCefficiency: dphiMuonMetCut = args.dphiMuonMetCut * np.pi df = df.Filter( - f"deltaPhiMuonMet > {dphiMuonMetCut}" + f"goodMuons_dphiMuMet0 > {dphiMuonMetCut}" ) # pi/4 was found to be a good threshold for signal with mT > 40 GeV - df = df.Define("passMT", f"transverseMass >= {mtw_min}") - if auxiliary_histograms: # control plots, lepton, met, to plot them later (need eta-pt to make fakes) @@ -1591,7 +1751,14 @@ def build_graph(df, dataset): df.HistoBoost( "deltaPhiMuonMet", [axis_phi, *axes_fakerate], - ["deltaPhiMuonMet", *columns_fakerate, "nominal_weight"], + ["goodMuons_dphiMuMet0", *columns_fakerate, "nominal_weight"], + ) + ) + results.append( + df.HistoBoost( + "metOverPt", + [axis_metOverPt, *axes_fakerate], + ["goodMuons_metOverPt0", *columns_fakerate, "nominal_weight"], ) ) results.append( diff --git a/scripts/histmakers/mz_wlike_with_mu_eta_pt.py b/scripts/histmakers/mz_wlike_with_mu_eta_pt.py index ee022dfc5..7b7852d4d 100644 --- a/scripts/histmakers/mz_wlike_with_mu_eta_pt.py +++ b/scripts/histmakers/mz_wlike_with_mu_eta_pt.py @@ -227,8 +227,6 @@ [0, 0.15, 0.3, 100], name="relIso", underflow=False, overflow=False ) -nominal_axes = [axis_eta, axis_pt, common.axis_charge] -nominal_cols = ["trigMuons_eta0", "trigMuons_pt0", "trigMuons_charge0"] if args.addIsoMtAxes: nominal_axes.extend([axis_mtCat, axis_isoCat]) nominal_cols.extend(["transverseMass", "trigMuons_relIso0"]) diff --git a/scripts/plotting/makeDataMCStackPlot.py b/scripts/plotting/makeDataMCStackPlot.py index 990f2112e..66118ba5c 100644 --- a/scripts/plotting/makeDataMCStackPlot.py +++ b/scripts/plotting/makeDataMCStackPlot.py @@ -71,7 +71,13 @@ "--procFilters", type=str, nargs="*", - help="Filter to plot (default no filter, only specify if you want a subset", + help="Filter to plot (default no filter, only specify if you want a subset)", +) +parser.add_argument( + "--procExcludes", + type=str, + nargs="*", + help="Exclude some processes", ) parser.add_argument("--noData", action="store_true", help="Don't plot data") parser.add_argument("--noFill", action="store_true", help="Don't fill") @@ -266,7 +272,11 @@ def padArray(ref, matchLength): groups = Datagroups( args.infile, filterGroups=args.procFilters, - excludeGroups=None if args.procFilters else ["QCD"], + excludeGroups=( + args.procExcludes + if args.procExcludes + else None if args.procFilters else ["QCD"] + ), ) if not args.fineGroups: diff --git a/utilities/parsing.py b/utilities/parsing.py index 3b2ee11c7..a083e8953 100644 --- a/utilities/parsing.py +++ b/utilities/parsing.py @@ -507,6 +507,12 @@ def __call__(self, parser, namespace, values, option_string=None): type=float, help="Upper threshold for muon absolute eta in the veto definition", ) + parser.add_argument( + "--dxybs", + default=0.05, + type=float, + help="Upper threshold for muon absolute dxy with respect to beamspot", + ) # Options to test splitting of data into subsets parser.add_argument( "--addRunAxis", diff --git a/utilities/styles/styles.py b/utilities/styles/styles.py index 2841d38c0..47ab5b0c2 100644 --- a/utilities/styles/styles.py +++ b/utilities/styles/styles.py @@ -146,7 +146,8 @@ def translate_html_to_latex(n): "MET_pt": {"label": r"$\mathit{p}_{\mathrm{T}}^{miss}$", "unit": "GeV"}, "MET": {"label": r"$\mathit{p}_{\mathrm{T}}^{miss}$", "unit": "GeV"}, "met": {"label": r"$\mathit{p}_{\mathrm{T}}^{miss}$", "unit": "GeV"}, - "mt": {"label": r"$\mathit{m}_{T}^{\mu,MET}$", "unit": "GeV"}, + # "mt": {"label": r"$\mathit{m}_{T}^{\mu,MET}$", "unit": "GeV"}, + "mt": {"label": r"$\mathit{m}_{T}^{W}$", "unit": "GeV"}, "mtfix": {"label": r"$\mathit{m}_{T}^\mathrm{fix}$", "unit": "GeV"}, "etaPlus": r"$\mathit{\eta}^{\mu(+)}$", "etaMinus": r"$\mathit{\eta}^{\mu(-)}$", diff --git a/wremnants/include/utils.hpp b/wremnants/include/utils.hpp index 2508a71e2..5be87e91a 100644 --- a/wremnants/include/utils.hpp +++ b/wremnants/include/utils.hpp @@ -365,7 +365,7 @@ float zqtproj0(const float &goodMuons_pt0, const float &goodMuons_eta0, GenPart_eta[postFSRnusIdx[0]], GenPart_phi[postFSRnusIdx[0]], 0.); TVector2 Muon(muon.X(), muon.Y()), Neutrino(neutrino.X(), neutrino.Y()); - return (Muon * ((Muon + Neutrino))) / sqrt(Muon * Muon); + return (Muon * ((Muon + Neutrino))) / std::sqrt(Muon * Muon); } float zqtproj0(float pt, float phi, float ptOther, float phiOther) { diff --git a/wremnants/muon_selections.py b/wremnants/muon_selections.py index ff1ab2758..9951f5bc2 100644 --- a/wremnants/muon_selections.py +++ b/wremnants/muon_selections.py @@ -66,6 +66,7 @@ def select_veto_muons( ptCut=15.0, staPtCut=15.0, etaCut=2.4, + dxybsCut=0.05, useGlobalOrTrackerVeto=False, tightGlobalOrTracker=True, ): @@ -75,7 +76,7 @@ def select_veto_muons( # tightGlobalOrTracker relevant only when useGlobalOrTrackerVeto = True df = df.Define( "vetoMuonsPre", - "Muon_looseId && abs(Muon_dxybs) < 0.05 && Muon_correctedCharge != -99", + f"Muon_looseId && abs(Muon_dxybs) < {dxybsCut} && Muon_correctedCharge != -99", ) df = df.Define( "Muon_isGoodGlobal", From 09c630f87fec99f59256e30ba30bc17384cd5132 Mon Sep 17 00:00:00 2001 From: Ruben Forti Date: Mon, 10 Nov 2025 17:12:45 +0100 Subject: [PATCH 03/24] Protection on the fakes estimation: fake yield is set to zero for the analysis bins of specific axes (e.g. utAngleSign) which have identically zero yield in the A-Ax-B-Bx regions (cherry picked from commit 773c1d92f16d0919c37e2db4168972106919410b) --- wremnants/datasets/datagroups.py | 3 +- wremnants/histselections.py | 200 +++++++++++++++++++++++++------ 2 files changed, 167 insertions(+), 36 deletions(-) diff --git a/wremnants/datasets/datagroups.py b/wremnants/datasets/datagroups.py index fa9e4edd4..d4b6345bf 100644 --- a/wremnants/datasets/datagroups.py +++ b/wremnants/datasets/datagroups.py @@ -79,6 +79,7 @@ def __init__(self, infile, mode=None, **kwargs): self.gen_axes = {} self.fit_axes = [] self.fakerate_axes = ["pt", "eta", "charge"] + self.decorrFakeAxis = "utAngleSign" self.setGenAxes() @@ -314,6 +315,7 @@ def set_histselectors( h, global_scalefactor=scale, fakerate_axes=self.fakerate_axes, + decorrFakeAxis=self.decorrFakeAxis, smoothing_mode=smoothing_mode, smoothing_order_fakerate=smoothingOrderFakerate, smoothing_order_spectrum=smoothingOrderSpectrum, @@ -646,7 +648,6 @@ def loadHistsForDatagroups( if self.rebinOp and self.rebinBeforeSelection: logger.debug(f"Apply rebin operation for process {procName}") group.hists[label] = self.rebinOp(group.hists[label]) - if group.histselector is not None: if not applySelection: logger.warning( diff --git a/wremnants/histselections.py b/wremnants/histselections.py index 95de5cf8a..ba4346e13 100644 --- a/wremnants/histselections.py +++ b/wremnants/histselections.py @@ -123,6 +123,7 @@ def __init__( name_x=None, name_y=None, fakerate_axes=["eta", "pt", "charge"], + decorrFakeAxis="", smoothing_axis_name="pt", rebin_smoothing_axis="automatic", # can be a list of bin edges, "automatic", or None upper_bound_y=None, # using an upper bound on the abcd y-axis (e.g. isolation) @@ -196,17 +197,17 @@ def __init__( self.set_selections_x() self.set_selections_y() + self.decorrFakeAxis = decorrFakeAxis if fakerate_axes is not None: self.fakerate_axes = fakerate_axes self.fakerate_integration_axes = [ n for n in h.axes.name - if n not in [self.name_x, self.name_y, *fakerate_axes] + if n not in [self.name_x, self.name_y, *fakerate_axes, self.decorrFakeAxis] ] logger.debug( f"Setting fakerate integration axes to {self.fakerate_integration_axes}" ) - self.smoothing_axis_name = smoothing_axis_name edges = h.axes[smoothing_axis_name].edges if rebin_smoothing_axis == "automatic": @@ -753,9 +754,14 @@ def smoothen( y = np.moveaxis(y, idx_ax_smoothing, -1) w = np.moveaxis(w, idx_ax_smoothing, -1) + logger.debug("Sorted axes for smoothing: " + ", ".join(axes)) + # smoothen regressor.solve(x, y, w) + + logger.debug("Reduce is " + str(reduce)) + if reduce: # add up parameters from smoothing of individual sideband regions if type(self) == FakeSelectorSimpleABCD: @@ -805,6 +811,45 @@ def smoothen( ) return y_smooth_orig, y_smooth_var_orig + + + def get_smoothed_tensor(self, h, sel, sval, svar, syst_variations=False, flow=True): + # get output shape from original hist axes, but as for result histogram + + hOut = ( + h[{self.name_x: self.sel_x if not self.integrate_x else hist.sum}] + if self.name_x in h.axes.name + else h + ) + out = np.zeros( + [a.extent if flow else a.shape for a in hOut.axes if a.name not in [self.name_y, self.decorrFakeAxis]], + dtype=sval.dtype, + ) + # leave the underflow and overflow unchanged if present + out[*sel[:-1]] = sval + if syst_variations: + outvar = np.zeros_like(out) + outvar = outvar[..., None, None] * np.ones( + (*outvar.shape, *svar.shape[-2:]), dtype=outvar.dtype + ) + + logger.debug("Doing syst_variations... something might be wrong here") + logger.error("Shapes of outvar and svar: " + str(outvar.shape) + " " + str(svar.shape)) + + # leave the underflow and overflow unchanged if present + outvar[*sel[:-1], :, :] = svar + + logger.error("Shapes of outvar and svar: " + str(outvar.shape) + " " + str(svar.shape)) + + + else: + # with full smoothing all of the statistical uncertainty is included in the + # explicit variations, so the remaining binned uncertainty is zero + outvar = np.zeros_like(out) + + return out, outvar + + def smoothen_spectrum( self, @@ -823,6 +868,8 @@ def smoothen_spectrum( smoothing_axis = h.axes[self.smoothing_axis_name] nax = sval.ndim + logger.debug("Smoothing spectrum along axis " + self.smoothing_axis_name) + # underflow and overflow are left unchanged along the smoothing axis # so we need to exclude them if they have been otherwise included if flow: @@ -867,6 +914,9 @@ def smoothen_spectrum( sval *= 1.0 / xwidth svar *= 1.0 / xwidth**2 + logger.debug("Sval shape after xwidth scaling: " + str(sval.shape)) + logger.debug("Svar shape after xwidth scaling: " + str(svar.shape)) + goodbin = (sval > 0.0) & (svar > 0.0) if goodbin.size - np.sum(goodbin) > 0: logger.warning( @@ -890,6 +940,9 @@ def smoothen_spectrum( reduce=reduce, ) + logger.debug("Svar shape after smoothing: " + (str(svar.shape) if svar is not None else "None")) + logger.debug("xwidthgt shape: " + str(xwidthtgt.shape)) + sval = np.exp(sval) * xwidthtgt sval = np.where(np.isfinite(sval), sval, 0.0) if syst_variations: @@ -899,30 +952,14 @@ def smoothen_spectrum( svar, sval[..., None, None], ) + - # get output shape from original hist axes, but as for result histogram - hOut = ( - h[{self.name_x: self.sel_x if not self.integrate_x else hist.sum}] - if self.name_x in h.axes.name - else h - ) - out = np.zeros( - [a.extent if flow else a.shape for a in hOut.axes if a.name != self.name_y], - dtype=sval.dtype, - ) - # leave the underflow and overflow unchanged if present - out[*sel[:-1]] = sval - if syst_variations: - outvar = np.zeros_like(out) - outvar = outvar[..., None, None] * np.ones( - (*outvar.shape, *svar.shape[-2:]), dtype=outvar.dtype - ) - # leave the underflow and overflow unchanged if present - outvar[*sel[:-1], :, :] = svar - else: - # with full smoothing all of the statistical uncertainty is included in the - # explicit variations, so the remaining binned uncertainty is zero - outvar = np.zeros_like(out) + logger.debug("Smoothing completed. Getting output tensors.") + + + out, outvar = self.get_smoothed_tensor(h, sel, sval, svar, syst_variations, flow=flow) + + logger.debug("Smoothing completed.") return out, outvar @@ -1044,16 +1081,109 @@ def calculate_fullABCD_smoothed( ..., :-1 ] - return self.smoothen_spectrum( - h, - hNew.axes[self.smoothing_axis_name].edges, - sval, - svar, - syst_variations=syst_variations, - use_spline=use_spline, - reduce=not signal_region, - flow=flow, - ) + logger.debug("X and Y axes: " + self.name_x + ", " + self.name_y) + logger.debug("hNew axes: " + ", ".join(hNew.axes.name)) + logger.debug(f"Sval shape after flattening: {sval.shape}") + logger.debug(f"Svar shape after flattening: {svar.shape}") + + baseOutTensor = np.zeros(sval.shape[:-1]) + baseOutVarTensor = ( + np.zeros(svar.shape[:-1]) # if not syst_variations + #else np.zeros((*svar.shape[:-1], svar.shape[-1], svar.shape[-1])) + ) + + logger.debug(f"baseOutTensor shape: {baseOutTensor.shape}") + logger.debug(f"baseOutVarTensor shape: {baseOutVarTensor.shape}") + + if self.decorrFakeAxis!="" and self.decorrFakeAxis in hNew.axes.name: + decorrFake_idx = [n for n in hNew.axes.name].index(self.decorrFakeAxis) + nbins_decorr = hNew.axes[self.decorrFakeAxis].size + logger.debug( + f"Found decorrelation axis {self.decorrFakeAxis} with {nbins_decorr} bins, applying smoothing independently in each bin." + ) + else: + nbins_decorr = 1 + decorrFake_idx = -1 + + logger.debug(f"Decorrelation axis index: {decorrFake_idx}") + + for i in range(nbins_decorr): + smoothing_slices = [slice(None)]*(sval.ndim) + outTensor_slices = [slice(None)]*(baseOutTensor.ndim) + outVarTensor_slices = [slice(None)]*(baseOutVarTensor.ndim) + if decorrFake_idx >= 0: + smoothing_slices[decorrFake_idx] = i + outTensor_slices[decorrFake_idx] = i + outVarTensor_slices[decorrFake_idx] = i + + sval_sliced = sval[tuple(smoothing_slices)] + svar_sliced = svar[tuple(smoothing_slices)] + + logger.debug(f"Shape of sval_sliced: {sval_sliced.shape}") + logger.debug(f"Shape of svar_sliced: {svar_sliced.shape}") + + logger.debug(f"Starting smoothing for decorrelation bin {i}.") + + goodbin = (sval_sliced > 0.0) & (svar_sliced > 0.0) + if goodbin.size - np.sum(goodbin) > 0: + logger.warning( + f"Found {goodbin.size-np.sum(goodbin)} of {goodbin.size} bins with 0 or negative bin content, those will be set to 0 and a large error" + ) + + logd = np.where(goodbin, np.log(sval_sliced), 0.0) + if np.all(logd[..., :4]==0.0): + logger.debug(f"All ABCD values are zeros! Returning zero as Fake estimate.") + logger.debug(f"Syst variations: {syst_variations}") + sval_sliced = np.zeros_like(sval_sliced[..., 0]) + svar_sliced = np.zeros_like( + svar_sliced[..., 0] if not syst_variations + else svar_sliced[..., 0][..., None, None] + ) + + out, outvar = self.get_smoothed_tensor( + h, (sval_sliced.ndim)*[slice(None)], sval_sliced, svar_sliced, syst_variations=syst_variations, flow=flow + ) + + else: + out, outvar = self.smoothen_spectrum( + h, hNew.axes[self.smoothing_axis_name].edges, sval_sliced, svar_sliced, + syst_variations=syst_variations, use_spline=use_spline, reduce=not signal_region, flow=flow + ) + + if syst_variations and i==0: + logger.debug("Updatng baseOutVarTensor to correctly include syst variations") + logger.debug(f"Current outvar shape: {outvar.shape}") + logger.debug(f"Current baseOutVarTensor shape: {baseOutVarTensor.shape}") + baseOutVarTensor = np.zeros( + (*baseOutVarTensor.shape, *outvar.shape[-2:]), dtype=baseOutVarTensor.dtype + ) + outVarTensor_slices += [slice(None), slice(None)] + + + logger.debug(f"Smoothing completed for decorrelation bin {i}.") + logger.debug(f"Smoothing output shape: {out.shape}") + logger.debug(f"Smoothing output var shape: {outvar.shape}") + logger.debug(f"Smoothing baseOutTensor shape: {baseOutTensor.shape}") + logger.debug(f"Smoothing baseOutVarTensor shape: {baseOutVarTensor.shape}") + + + if syst_variations and outvar.shape[-2:]!=baseOutVarTensor.shape[-2:]: + logger.warning( + f"Shape mismatch in syst variations: existing is {baseOutVarTensor.shape[-2:]}, new is {outvar.shape[-2:]}. Overwriting baseOutVarTensor." + ) + baseOutVarTensor = baseOutVarTensor[..., 0, 0] + baseOutVarTensor = np.broadcast_to(baseOutVarTensor[..., None, None], baseOutVarTensor.shape + outvar.shape[-2:]).copy() + + logger.debug(f"New baseOutVarTensor shape: {baseOutVarTensor.shape}") + + baseOutTensor[tuple(outTensor_slices)] = out + baseOutVarTensor[tuple(outVarTensor_slices)] = outvar + + + out = baseOutTensor + outvar = baseOutVarTensor + + return out, outvar class FakeSelector1DExtendedABCD(FakeSelectorSimpleABCD): From 8c5d50d1b3ade89a9b43a7dcf2dd4e73d28059fa Mon Sep 17 00:00:00 2001 From: Ruben Forti Date: Mon, 10 Nov 2025 17:12:45 +0100 Subject: [PATCH 04/24] Protection on the fakes estimation: fake yield is set to zero for the analysis bins of specific axes (e.g. utAngleSign) which have identically zero yield in the A-Ax-B-Bx regions --- wremnants/datasets/datagroups.py | 3 +- wremnants/histselections.py | 200 +++++++++++++++++++++++++------ 2 files changed, 167 insertions(+), 36 deletions(-) diff --git a/wremnants/datasets/datagroups.py b/wremnants/datasets/datagroups.py index fa9e4edd4..d4b6345bf 100644 --- a/wremnants/datasets/datagroups.py +++ b/wremnants/datasets/datagroups.py @@ -79,6 +79,7 @@ def __init__(self, infile, mode=None, **kwargs): self.gen_axes = {} self.fit_axes = [] self.fakerate_axes = ["pt", "eta", "charge"] + self.decorrFakeAxis = "utAngleSign" self.setGenAxes() @@ -314,6 +315,7 @@ def set_histselectors( h, global_scalefactor=scale, fakerate_axes=self.fakerate_axes, + decorrFakeAxis=self.decorrFakeAxis, smoothing_mode=smoothing_mode, smoothing_order_fakerate=smoothingOrderFakerate, smoothing_order_spectrum=smoothingOrderSpectrum, @@ -646,7 +648,6 @@ def loadHistsForDatagroups( if self.rebinOp and self.rebinBeforeSelection: logger.debug(f"Apply rebin operation for process {procName}") group.hists[label] = self.rebinOp(group.hists[label]) - if group.histselector is not None: if not applySelection: logger.warning( diff --git a/wremnants/histselections.py b/wremnants/histselections.py index 95de5cf8a..ba4346e13 100644 --- a/wremnants/histselections.py +++ b/wremnants/histselections.py @@ -123,6 +123,7 @@ def __init__( name_x=None, name_y=None, fakerate_axes=["eta", "pt", "charge"], + decorrFakeAxis="", smoothing_axis_name="pt", rebin_smoothing_axis="automatic", # can be a list of bin edges, "automatic", or None upper_bound_y=None, # using an upper bound on the abcd y-axis (e.g. isolation) @@ -196,17 +197,17 @@ def __init__( self.set_selections_x() self.set_selections_y() + self.decorrFakeAxis = decorrFakeAxis if fakerate_axes is not None: self.fakerate_axes = fakerate_axes self.fakerate_integration_axes = [ n for n in h.axes.name - if n not in [self.name_x, self.name_y, *fakerate_axes] + if n not in [self.name_x, self.name_y, *fakerate_axes, self.decorrFakeAxis] ] logger.debug( f"Setting fakerate integration axes to {self.fakerate_integration_axes}" ) - self.smoothing_axis_name = smoothing_axis_name edges = h.axes[smoothing_axis_name].edges if rebin_smoothing_axis == "automatic": @@ -753,9 +754,14 @@ def smoothen( y = np.moveaxis(y, idx_ax_smoothing, -1) w = np.moveaxis(w, idx_ax_smoothing, -1) + logger.debug("Sorted axes for smoothing: " + ", ".join(axes)) + # smoothen regressor.solve(x, y, w) + + logger.debug("Reduce is " + str(reduce)) + if reduce: # add up parameters from smoothing of individual sideband regions if type(self) == FakeSelectorSimpleABCD: @@ -805,6 +811,45 @@ def smoothen( ) return y_smooth_orig, y_smooth_var_orig + + + def get_smoothed_tensor(self, h, sel, sval, svar, syst_variations=False, flow=True): + # get output shape from original hist axes, but as for result histogram + + hOut = ( + h[{self.name_x: self.sel_x if not self.integrate_x else hist.sum}] + if self.name_x in h.axes.name + else h + ) + out = np.zeros( + [a.extent if flow else a.shape for a in hOut.axes if a.name not in [self.name_y, self.decorrFakeAxis]], + dtype=sval.dtype, + ) + # leave the underflow and overflow unchanged if present + out[*sel[:-1]] = sval + if syst_variations: + outvar = np.zeros_like(out) + outvar = outvar[..., None, None] * np.ones( + (*outvar.shape, *svar.shape[-2:]), dtype=outvar.dtype + ) + + logger.debug("Doing syst_variations... something might be wrong here") + logger.error("Shapes of outvar and svar: " + str(outvar.shape) + " " + str(svar.shape)) + + # leave the underflow and overflow unchanged if present + outvar[*sel[:-1], :, :] = svar + + logger.error("Shapes of outvar and svar: " + str(outvar.shape) + " " + str(svar.shape)) + + + else: + # with full smoothing all of the statistical uncertainty is included in the + # explicit variations, so the remaining binned uncertainty is zero + outvar = np.zeros_like(out) + + return out, outvar + + def smoothen_spectrum( self, @@ -823,6 +868,8 @@ def smoothen_spectrum( smoothing_axis = h.axes[self.smoothing_axis_name] nax = sval.ndim + logger.debug("Smoothing spectrum along axis " + self.smoothing_axis_name) + # underflow and overflow are left unchanged along the smoothing axis # so we need to exclude them if they have been otherwise included if flow: @@ -867,6 +914,9 @@ def smoothen_spectrum( sval *= 1.0 / xwidth svar *= 1.0 / xwidth**2 + logger.debug("Sval shape after xwidth scaling: " + str(sval.shape)) + logger.debug("Svar shape after xwidth scaling: " + str(svar.shape)) + goodbin = (sval > 0.0) & (svar > 0.0) if goodbin.size - np.sum(goodbin) > 0: logger.warning( @@ -890,6 +940,9 @@ def smoothen_spectrum( reduce=reduce, ) + logger.debug("Svar shape after smoothing: " + (str(svar.shape) if svar is not None else "None")) + logger.debug("xwidthgt shape: " + str(xwidthtgt.shape)) + sval = np.exp(sval) * xwidthtgt sval = np.where(np.isfinite(sval), sval, 0.0) if syst_variations: @@ -899,30 +952,14 @@ def smoothen_spectrum( svar, sval[..., None, None], ) + - # get output shape from original hist axes, but as for result histogram - hOut = ( - h[{self.name_x: self.sel_x if not self.integrate_x else hist.sum}] - if self.name_x in h.axes.name - else h - ) - out = np.zeros( - [a.extent if flow else a.shape for a in hOut.axes if a.name != self.name_y], - dtype=sval.dtype, - ) - # leave the underflow and overflow unchanged if present - out[*sel[:-1]] = sval - if syst_variations: - outvar = np.zeros_like(out) - outvar = outvar[..., None, None] * np.ones( - (*outvar.shape, *svar.shape[-2:]), dtype=outvar.dtype - ) - # leave the underflow and overflow unchanged if present - outvar[*sel[:-1], :, :] = svar - else: - # with full smoothing all of the statistical uncertainty is included in the - # explicit variations, so the remaining binned uncertainty is zero - outvar = np.zeros_like(out) + logger.debug("Smoothing completed. Getting output tensors.") + + + out, outvar = self.get_smoothed_tensor(h, sel, sval, svar, syst_variations, flow=flow) + + logger.debug("Smoothing completed.") return out, outvar @@ -1044,16 +1081,109 @@ def calculate_fullABCD_smoothed( ..., :-1 ] - return self.smoothen_spectrum( - h, - hNew.axes[self.smoothing_axis_name].edges, - sval, - svar, - syst_variations=syst_variations, - use_spline=use_spline, - reduce=not signal_region, - flow=flow, - ) + logger.debug("X and Y axes: " + self.name_x + ", " + self.name_y) + logger.debug("hNew axes: " + ", ".join(hNew.axes.name)) + logger.debug(f"Sval shape after flattening: {sval.shape}") + logger.debug(f"Svar shape after flattening: {svar.shape}") + + baseOutTensor = np.zeros(sval.shape[:-1]) + baseOutVarTensor = ( + np.zeros(svar.shape[:-1]) # if not syst_variations + #else np.zeros((*svar.shape[:-1], svar.shape[-1], svar.shape[-1])) + ) + + logger.debug(f"baseOutTensor shape: {baseOutTensor.shape}") + logger.debug(f"baseOutVarTensor shape: {baseOutVarTensor.shape}") + + if self.decorrFakeAxis!="" and self.decorrFakeAxis in hNew.axes.name: + decorrFake_idx = [n for n in hNew.axes.name].index(self.decorrFakeAxis) + nbins_decorr = hNew.axes[self.decorrFakeAxis].size + logger.debug( + f"Found decorrelation axis {self.decorrFakeAxis} with {nbins_decorr} bins, applying smoothing independently in each bin." + ) + else: + nbins_decorr = 1 + decorrFake_idx = -1 + + logger.debug(f"Decorrelation axis index: {decorrFake_idx}") + + for i in range(nbins_decorr): + smoothing_slices = [slice(None)]*(sval.ndim) + outTensor_slices = [slice(None)]*(baseOutTensor.ndim) + outVarTensor_slices = [slice(None)]*(baseOutVarTensor.ndim) + if decorrFake_idx >= 0: + smoothing_slices[decorrFake_idx] = i + outTensor_slices[decorrFake_idx] = i + outVarTensor_slices[decorrFake_idx] = i + + sval_sliced = sval[tuple(smoothing_slices)] + svar_sliced = svar[tuple(smoothing_slices)] + + logger.debug(f"Shape of sval_sliced: {sval_sliced.shape}") + logger.debug(f"Shape of svar_sliced: {svar_sliced.shape}") + + logger.debug(f"Starting smoothing for decorrelation bin {i}.") + + goodbin = (sval_sliced > 0.0) & (svar_sliced > 0.0) + if goodbin.size - np.sum(goodbin) > 0: + logger.warning( + f"Found {goodbin.size-np.sum(goodbin)} of {goodbin.size} bins with 0 or negative bin content, those will be set to 0 and a large error" + ) + + logd = np.where(goodbin, np.log(sval_sliced), 0.0) + if np.all(logd[..., :4]==0.0): + logger.debug(f"All ABCD values are zeros! Returning zero as Fake estimate.") + logger.debug(f"Syst variations: {syst_variations}") + sval_sliced = np.zeros_like(sval_sliced[..., 0]) + svar_sliced = np.zeros_like( + svar_sliced[..., 0] if not syst_variations + else svar_sliced[..., 0][..., None, None] + ) + + out, outvar = self.get_smoothed_tensor( + h, (sval_sliced.ndim)*[slice(None)], sval_sliced, svar_sliced, syst_variations=syst_variations, flow=flow + ) + + else: + out, outvar = self.smoothen_spectrum( + h, hNew.axes[self.smoothing_axis_name].edges, sval_sliced, svar_sliced, + syst_variations=syst_variations, use_spline=use_spline, reduce=not signal_region, flow=flow + ) + + if syst_variations and i==0: + logger.debug("Updatng baseOutVarTensor to correctly include syst variations") + logger.debug(f"Current outvar shape: {outvar.shape}") + logger.debug(f"Current baseOutVarTensor shape: {baseOutVarTensor.shape}") + baseOutVarTensor = np.zeros( + (*baseOutVarTensor.shape, *outvar.shape[-2:]), dtype=baseOutVarTensor.dtype + ) + outVarTensor_slices += [slice(None), slice(None)] + + + logger.debug(f"Smoothing completed for decorrelation bin {i}.") + logger.debug(f"Smoothing output shape: {out.shape}") + logger.debug(f"Smoothing output var shape: {outvar.shape}") + logger.debug(f"Smoothing baseOutTensor shape: {baseOutTensor.shape}") + logger.debug(f"Smoothing baseOutVarTensor shape: {baseOutVarTensor.shape}") + + + if syst_variations and outvar.shape[-2:]!=baseOutVarTensor.shape[-2:]: + logger.warning( + f"Shape mismatch in syst variations: existing is {baseOutVarTensor.shape[-2:]}, new is {outvar.shape[-2:]}. Overwriting baseOutVarTensor." + ) + baseOutVarTensor = baseOutVarTensor[..., 0, 0] + baseOutVarTensor = np.broadcast_to(baseOutVarTensor[..., None, None], baseOutVarTensor.shape + outvar.shape[-2:]).copy() + + logger.debug(f"New baseOutVarTensor shape: {baseOutVarTensor.shape}") + + baseOutTensor[tuple(outTensor_slices)] = out + baseOutVarTensor[tuple(outVarTensor_slices)] = outvar + + + out = baseOutTensor + outvar = baseOutVarTensor + + return out, outvar class FakeSelector1DExtendedABCD(FakeSelectorSimpleABCD): From e5ed649a75f697db4a79a796fcccd444f3b52958 Mon Sep 17 00:00:00 2001 From: Ruben Forti Date: Fri, 5 Dec 2025 11:40:12 +0100 Subject: [PATCH 05/24] small changes for plotting --- scripts/plotting/makeDataMCStackPlot.py | 63 ++++++++++++++++--------- utilities/styles/styles.py | 11 +++++ wremnants/datasets/datagroups.py | 4 +- wremnants/histselections.py | 13 +++-- 4 files changed, 62 insertions(+), 29 deletions(-) diff --git a/scripts/plotting/makeDataMCStackPlot.py b/scripts/plotting/makeDataMCStackPlot.py index 214e63345..184b1ba76 100644 --- a/scripts/plotting/makeDataMCStackPlot.py +++ b/scripts/plotting/makeDataMCStackPlot.py @@ -158,6 +158,14 @@ help="Axes for the fakerate binning", default=["eta", "pt", "charge"], ) +parser.add_argument( + "--decorrFakeAxis", + type=str, + default="utAngleSign", + help="Axis on which to enforce a decorrelation the in the fake rate smoothing (e.g., utAngleSign)", + # Created to exclude the bin utAngleSign<0, in which any ABCD method cannot + # take place, since the four regions at low mt are empty +) parser.add_argument( "--fineGroups", action="store_true", @@ -296,9 +304,19 @@ def padArray(ref, matchLength): for ps in args.presel: if "=" in ps: axName, axRange = ps.split("=") - axMin, axMax = map(float, axRange.split(",")) - logger.info(f"{axName} in [{axMin},{axMax}]") - presel[axName] = s[complex(0, axMin) : complex(0, axMax) : hist.sum] + if "," in ps: + #axMin, axMax = map(float, axRange.split(",")) + axMin, axMax = [float(p) if p != str() else None for p in axRange.split(",")] + logger.info(f"{axName} in [{axMin},{axMax}]") + presel[axName] = s[complex(0, axMin) : complex(0, axMax) : hist.sum] + else: + logger.info(f"Selecting {axName} {axRange.split('.')[1]}") + if axRange == "hist.overflow": + presel[axName] = hist.overflow + if axRange == "hist.underflow": + presel[axName] = hist.underflow + if axRange == "hist.sum": + presel[axName] = hist.sum else: logger.info(f"Integrating boolean {ps} axis") presel[ps] = s[:: hist.sum] @@ -314,32 +332,35 @@ def padArray(ref, matchLength): args.rebinBeforeSelection, ) -if args.selection: - applySelection = False - if args.selection != "none": - translate = { +if args.selection and args.selection != "none": + translate = { "hist.overflow": hist.overflow, "hist.underflow": hist.underflow, "hist.sum": hist.sum, } - for selection in args.selection.split(","): - axis, value = selection.split("=") - if value.startswith("["): - parts = [ - translate[p] if p in translate else int(p) if p != str() else None - for p in value[1:-1].split(":") - ] - select[axis] = hist.tag.Slicer()[parts[0] : parts[1] : parts[2]] - elif value == "hist.overflow": - select[axis] = hist.overflow - elif value == "hist.underflow": - select[axis] = hist.overflow - else: - select[axis] = int(value) + for selection in args.selection.split(","): + axis, value = selection.split("=") + if value.startswith("["): + parts = [ + translate[p] if p in translate else int(p) if p != str() else None + for p in value[1:-1].split(":") + ] + select[axis] = hist.tag.Slicer()[parts[0] : parts[1] : parts[2]] + elif value == "hist.overflow": + select[axis] = hist.overflow + elif value == "hist.underflow": + select[axis] = hist.underflow + else: + select[axis] = int(value) + applySelection = False if any( # does not trigger ABCD fake estimation if a cut is applied on the variables used to define ABCD regions + abcd_var in [cut_str.split("=")[0] for cut_str in args.selection.split(",")] + for abcd_var in ["relIso", "mt"] + ) else True else: applySelection = True groups.fakerate_axes = args.fakerateAxes +groups.decorrFakeAxis = args.decorrFakeAxis if applySelection: groups.set_histselectors( datasets, diff --git a/utilities/styles/styles.py b/utilities/styles/styles.py index d72c5cd17..387142439 100644 --- a/utilities/styles/styles.py +++ b/utilities/styles/styles.py @@ -38,6 +38,7 @@ def translate_html_to_latex(n): "Ztautau": "#964a8b", "Wmunu": "#E42536", "Wenu": "#E42536", + "WmunuOOA": "#2E9B92E6", "Wtaunu": "#F89C20", "DYlowMass": "deepskyblue", "PhotonInduced": "gold", @@ -74,6 +75,14 @@ def translate_html_to_latex(n): "Fake": ["Fake"], "Rare": ["PhotonInduced", "Top", "Diboson"], }, + "w_mass_ext": { + "Wmunu": ["Wmunu"], + "WmunuOOA": ["WmunuOOA"], + "Wtaunu": ["Wtaunu"], + "Z": ["Ztautau", "Zmumu", "DYlowMass"], + "Fake": ["Fake"], + "Rare": ["PhotonInduced", "Top", "Diboson"], + }, "z_dilepton": { "Zmumu": ["Zmumu"], "Other": ["Other", "Ztautau", "PhotonInduced"], @@ -95,6 +104,7 @@ def translate_html_to_latex(n): "Ztautau": r"Z/$\gamma^{\star}\to\tau\tau$", "Wmunu": r"W$^{\pm}\to\mu\nu$", "Wenu": r"W$^{\pm}\to e\nu$", + "WmunuOOA": r"W$^{\pm}\to\mu\nu$ OOA", "Wtaunu": r"W$^{\pm}\to\tau\nu$", "DYlowMass": r"Z/$\gamma^{\star}\to\mu\mu$, $10 Date: Fri, 5 Dec 2025 11:45:17 +0100 Subject: [PATCH 06/24] consider the polVar as noi - workaround to better interface with the current version of Rabbit, it will be probably reverted when the definitions of poi/noi/nuisanceNotConstrained are fixed --- wremnants/combine_theoryAgnostic_helper.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/wremnants/combine_theoryAgnostic_helper.py b/wremnants/combine_theoryAgnostic_helper.py index a7a6a6d87..0e588d170 100644 --- a/wremnants/combine_theoryAgnostic_helper.py +++ b/wremnants/combine_theoryAgnostic_helper.py @@ -60,7 +60,8 @@ def add_theoryAgnostic_polVar_uncertainty(self): noConstraint=True, systAxes=["nPolVarSyst", "downUpVar"], labelsByAxis=["v", "downUpVar"], - # splitGroup={f"{groupName}_{coeffKey}" : f"{groupName}_{coeffKey}"} + # splitGroup={f"{groupName}_{coeffKey}" : f"{groupName}_{coeffKey}"}, + noi=True, ) def add_muRmuF_polVar_uncertainty(self): From 5f8ce1fe3705798cf2d13e9c98214fe65d17a2a0 Mon Sep 17 00:00:00 2001 From: Ruben Forti Date: Fri, 5 Dec 2025 11:54:35 +0100 Subject: [PATCH 07/24] fake prediction for uT<0 is now evaluated by scaling the ABCD prediction on uT>0 with an hardcoded factor (normalization) --- wremnants/histselections.py | 109 ++++++++++++++++++++++++------------ 1 file changed, 73 insertions(+), 36 deletions(-) diff --git a/wremnants/histselections.py b/wremnants/histselections.py index ddbf06abd..6f9623de2 100644 --- a/wremnants/histselections.py +++ b/wremnants/histselections.py @@ -399,6 +399,7 @@ def __init__( super().__init__(h, *args, **kwargs) # nominal histogram to be used to transfer variances for systematic variations + logger.debug("Setting h_nominal to None") self.h_nominal = None self.global_scalefactor = global_scalefactor @@ -544,8 +545,12 @@ def apply_correction(self, y, yvar=None, flow=True): def transfer_variances(self, h, set_nominal=False): if set_nominal: + logger.debug("Setting h_nominal to h") self.h_nominal = h.copy() elif self.h_nominal is not None: + logger.debug("Setting variances in h from h_nominal") + logger.debug(f"Shape of h: {h.values().shape}") + logger.debug(f"Shape of h_nominal: {self.h_nominal.values().shape}") h = hh.transfer_variances(h, self.h_nominal) elif h.storage_type == hist.storage.Weight: logger.warning( @@ -629,7 +634,11 @@ def get_hist( dvar = y_frf**2 * cvar elif self.smoothing_mode == "full": - h = self.transfer_variances(h, set_nominal=is_nominal) + if not (variations_frf or variations_smoothing): + logger.debug("Transferring variances for full smoothing without systematic variations") + h = self.transfer_variances(h, set_nominal=is_nominal) + else: + logger.warning("Not transferring variances before full smoothing, since systematic variations are requested") d, dvar = self.calculate_fullABCD_smoothed( h, flow=flow, syst_variations=variations_smoothing, use_spline=False ) @@ -834,12 +843,12 @@ def get_smoothed_tensor(self, h, sel, sval, svar, syst_variations=False, flow=Tr ) logger.debug("Doing syst_variations... something might be wrong here") - logger.error("Shapes of outvar and svar: " + str(outvar.shape) + " " + str(svar.shape)) + logger.debug("Shapes of outvar and svar: " + str(outvar.shape) + " " + str(svar.shape)) # leave the underflow and overflow unchanged if present outvar[*sel[:-1], :, :] = svar - logger.error("Shapes of outvar and svar: " + str(outvar.shape) + " " + str(svar.shape)) + logger.debug("Shapes of outvar and svar: " + str(outvar.shape) + " " + str(svar.shape)) else: @@ -850,7 +859,6 @@ def get_smoothed_tensor(self, h, sel, sval, svar, syst_variations=False, flow=Tr return out, outvar - def smoothen_spectrum( self, h, @@ -954,7 +962,7 @@ def smoothen_spectrum( ) logger.debug("Smoothing completed. Getting output tensors.") - + out, outvar = self.get_smoothed_tensor(h, sel, sval, svar, syst_variations, flow=flow) logger.debug("Smoothing completed.") @@ -1094,33 +1102,33 @@ def calculate_fullABCD_smoothed( logger.debug(f"baseOutVarTensor shape: {baseOutVarTensor.shape}") if self.decorrFakeAxis!="" and self.decorrFakeAxis in hNew.axes.name: - decorrFake_idx = [n for n in hNew.axes.name].index(self.decorrFakeAxis) + decorrFakeAxis_idx = [n for n in hNew.axes.name].index(self.decorrFakeAxis) nbins_separateFakes = hNew.axes[self.decorrFakeAxis].size logger.debug( f"Found decorrelation axis {self.decorrFakeAxis} with {nbins_separateFakes} bins, applying smoothing independently in each bin." ) else: nbins_separateFakes = 1 - decorrFake_idx = -1 + decorrFakeAxis_idx = -1 - logger.debug(f"Decorrelation axis index: {decorrFake_idx}") + logger.debug(f"Decorrelation axis index: {decorrFakeAxis_idx}") - for i in range(nbins_separateFakes): - smoothing_slices = [slice(None)]*(sval.ndim) + for idx_fakeSep in range(nbins_separateFakes): + fakeSep_slices = [slice(None)]*(sval.ndim) outTensor_slices = [slice(None)]*(baseOutTensor.ndim) outVarTensor_slices = [slice(None)]*(baseOutVarTensor.ndim) - if decorrFake_idx >= 0: - smoothing_slices[decorrFake_idx] = i - outTensor_slices[decorrFake_idx] = i - outVarTensor_slices[decorrFake_idx] = i + if decorrFakeAxis_idx >= 0: + fakeSep_slices[decorrFakeAxis_idx] = idx_fakeSep + outTensor_slices[decorrFakeAxis_idx] = idx_fakeSep + outVarTensor_slices[decorrFakeAxis_idx] = idx_fakeSep - sval_sliced = sval[tuple(smoothing_slices)] - svar_sliced = svar[tuple(smoothing_slices)] + sval_sliced = sval[tuple(fakeSep_slices)] + svar_sliced = svar[tuple(fakeSep_slices)] logger.debug(f"Shape of sval_sliced: {sval_sliced.shape}") logger.debug(f"Shape of svar_sliced: {svar_sliced.shape}") - - logger.debug(f"Starting smoothing for decorrelation bin {i}.") + logger.debug(f"Number of nonvalid bins in sval AFTER the first SELECTION: {np.sum(sval<=0.0)} out of {sval.size}") + logger.debug(f"Starting smoothing for decorrelation bin {idx_fakeSep}.") goodbin = (sval_sliced > 0.0) & (svar_sliced > 0.0) if goodbin.size - np.sum(goodbin) > 0: @@ -1130,36 +1138,65 @@ def calculate_fullABCD_smoothed( logd = np.where(goodbin, np.log(sval_sliced), 0.0) if np.all(logd[..., :4]==0.0): - logger.debug(f"All ABCD values are zeros! Returning zero as Fake estimate.") - logger.debug(f"Syst variations: {syst_variations}") - sval_sliced = np.zeros_like(sval_sliced[..., 0]) - svar_sliced = np.zeros_like( - svar_sliced[..., 0] if not syst_variations - else svar_sliced[..., 0][..., None, None] + + if decorrFakeAxis_idx <0: + logger.debug(f"All ABCD values are zeros! Returning zero as Fake estimate.") + logger.debug(f"Syst variations: {syst_variations}") + sval_sliced = np.zeros_like(sval_sliced[..., 0]) + svar_sliced = np.zeros_like( + svar_sliced[..., 0] if not syst_variations + else svar_sliced[..., 0][..., None, None] ) - - out, outvar = self.get_smoothed_tensor( - h, (sval_sliced.ndim)*[slice(None)], sval_sliced, svar_sliced, - syst_variations=syst_variations, flow=flow + + out, outvar = self.get_smoothed_tensor( + h, (sval_sliced.ndim)*[slice(None)], sval_sliced, svar_sliced, + syst_variations=syst_variations, flow=flow + ) + + else: + logger.debug(f"All ABCD values are zeros! Returning Fake estimate based on other bin, with an HARDCODED norm. factor") + logger.debug(f"Syst variations: {syst_variations}") + compl_mask = np.array([True if j!=idx_fakeSep else False for j in range(nbins_separateFakes)]) + + logger.debug(f"Complement mask: {compl_mask}") + logger.debug(f"APPLIED 'np.where'; {np.where(compl_mask)[0]}") + + sval_slicedCompl = sval.take(indices=np.where(compl_mask)[0], axis=decorrFakeAxis_idx).sum(axis=decorrFakeAxis_idx) + svar_slicedCompl = svar.take(indices=np.where(compl_mask)[0], axis=decorrFakeAxis_idx).sum(axis=decorrFakeAxis_idx) + + logger.debug(f"Sval_slicedCompl shape: {sval_slicedCompl.shape}") + count_nonvalid = np.sum(sval_slicedCompl<=0.0) + logger.debug(f"Number of nonvalid bins in complement: {count_nonvalid} out of {sval_slicedCompl.size}") + + out_other, outvar_other = self.smoothen_spectrum( + h, hNew.axes[self.smoothing_axis_name].edges, sval_slicedCompl, svar_slicedCompl, + syst_variations=syst_variations, use_spline=use_spline, + reduce=not signal_region, flow=flow ) + NORMFACTOR = 0.1196 # Ratio of nonprompt data in SV for uT<0 (num.) to ut>0 (den.) + + out = out_other * NORMFACTOR + outvar = outvar_other * NORMFACTOR**2 + else: out, outvar = self.smoothen_spectrum( h, hNew.axes[self.smoothing_axis_name].edges, sval_sliced, svar_sliced, - syst_variations=syst_variations, use_spline=use_spline, reduce=not signal_region, flow=flow - ) - - if syst_variations and i==0: - logger.debug("Updatng baseOutVarTensor to correctly include syst variations") + syst_variations=syst_variations, use_spline=use_spline, + reduce=not signal_region, flow=flow + ) + + if syst_variations and idx_fakeSep==0: + logger.debug("Updating baseOutVarTensor to correctly include syst variations") logger.debug(f"Current outvar shape: {outvar.shape}") logger.debug(f"Current baseOutVarTensor shape: {baseOutVarTensor.shape}") baseOutVarTensor = np.zeros( (*baseOutVarTensor.shape, *outvar.shape[-2:]), dtype=baseOutVarTensor.dtype ) outVarTensor_slices += [slice(None), slice(None)] - - - logger.debug(f"Smoothing completed for decorrelation bin {i}.") + + + logger.debug(f"Smoothing completed for decorrelation bin {idx_fakeSep}.") logger.debug(f"Smoothing output shape: {out.shape}") logger.debug(f"Smoothing output var shape: {outvar.shape}") logger.debug(f"Smoothing baseOutTensor shape: {baseOutTensor.shape}") From c9354b7dbec7106c41083c5db420d8623fe72969 Mon Sep 17 00:00:00 2001 From: Ruben Forti Date: Fri, 5 Dec 2025 12:18:57 +0100 Subject: [PATCH 08/24] label change for the utAngleSign axis --- scripts/plotting/makeDataMCStackPlot.py | 10 ++++---- wremnants/datasets/datagroups.py | 4 +-- wremnants/histselections.py | 34 ++++++++++++------------- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/scripts/plotting/makeDataMCStackPlot.py b/scripts/plotting/makeDataMCStackPlot.py index 184b1ba76..068d365e5 100644 --- a/scripts/plotting/makeDataMCStackPlot.py +++ b/scripts/plotting/makeDataMCStackPlot.py @@ -159,12 +159,12 @@ default=["eta", "pt", "charge"], ) parser.add_argument( - "--decorrFakeAxis", + "--fakeTransferAxis", type=str, default="utAngleSign", - help="Axis on which to enforce a decorrelation the in the fake rate smoothing (e.g., utAngleSign)", - # Created to exclude the bin utAngleSign<0, in which any ABCD method cannot - # take place, since the four regions at low mt are empty + help=""" + Axis where the fake prediction on non-valid bins (i.e. where the A-Ax-B-Bx regions are empty) + is estimated by using the other 'valid' bins of this axis, via a normalization or shape reweighting.""", ) parser.add_argument( "--fineGroups", @@ -360,7 +360,7 @@ def padArray(ref, matchLength): applySelection = True groups.fakerate_axes = args.fakerateAxes -groups.decorrFakeAxis = args.decorrFakeAxis +groups.fakeTransferAxis = args.fakeTransferAxis if applySelection: groups.set_histselectors( datasets, diff --git a/wremnants/datasets/datagroups.py b/wremnants/datasets/datagroups.py index 287af1b03..6e2ec9558 100644 --- a/wremnants/datasets/datagroups.py +++ b/wremnants/datasets/datagroups.py @@ -79,7 +79,7 @@ def __init__(self, infile, mode=None, **kwargs): self.gen_axes = {} self.fit_axes = [] self.fakerate_axes = ["pt", "eta", "charge"] - self.decorrFakeAxis = "utAngleSign" + self.fakeTransferAxis = "utAngleSign" self.setGenAxes() @@ -315,7 +315,7 @@ def set_histselectors( h, global_scalefactor=scale, fakerate_axes=self.fakerate_axes, - decorrFakeAxis=self.decorrFakeAxis, + fakeTransferAxis=self.fakeTransferAxis, smoothing_mode=smoothing_mode, smoothing_order_fakerate=smoothingOrderFakerate, smoothing_order_spectrum=smoothingOrderSpectrum, diff --git a/wremnants/histselections.py b/wremnants/histselections.py index 6f9623de2..8f51c4c79 100644 --- a/wremnants/histselections.py +++ b/wremnants/histselections.py @@ -123,7 +123,7 @@ def __init__( name_x=None, name_y=None, fakerate_axes=["eta", "pt", "charge"], - decorrFakeAxis="", + fakeTransferAxis="", smoothing_axis_name="pt", rebin_smoothing_axis="automatic", # can be a list of bin edges, "automatic", or None upper_bound_y=None, # using an upper bound on the abcd y-axis (e.g. isolation) @@ -197,13 +197,13 @@ def __init__( self.set_selections_x() self.set_selections_y() - self.decorrFakeAxis = decorrFakeAxis + self.fakeTransferAxis = fakeTransferAxis if fakerate_axes is not None: self.fakerate_axes = fakerate_axes self.fakerate_integration_axes = [ n for n in h.axes.name - if n not in [self.name_x, self.name_y, *fakerate_axes, self.decorrFakeAxis] + if n not in [self.name_x, self.name_y, *fakerate_axes, self.fakeTransferAxis] ] logger.debug( f"Setting fakerate integration axes to {self.fakerate_integration_axes}" @@ -831,7 +831,7 @@ def get_smoothed_tensor(self, h, sel, sval, svar, syst_variations=False, flow=Tr else h ) out = np.zeros( - [a.extent if flow else a.shape for a in hOut.axes if a.name not in [self.name_y, self.decorrFakeAxis]], + [a.extent if flow else a.shape for a in hOut.axes if a.name not in [self.name_y, self.fakeTransferAxis]], dtype=sval.dtype, ) # leave the underflow and overflow unchanged if present @@ -1101,26 +1101,26 @@ def calculate_fullABCD_smoothed( logger.debug(f"baseOutTensor shape: {baseOutTensor.shape}") logger.debug(f"baseOutVarTensor shape: {baseOutVarTensor.shape}") - if self.decorrFakeAxis!="" and self.decorrFakeAxis in hNew.axes.name: - decorrFakeAxis_idx = [n for n in hNew.axes.name].index(self.decorrFakeAxis) - nbins_separateFakes = hNew.axes[self.decorrFakeAxis].size + if self.fakeTransferAxis!="" and self.fakeTransferAxis in hNew.axes.name: + fakeTransferAxis_idx = [n for n in hNew.axes.name].index(self.fakeTransferAxis) + nbins_separateFakes = hNew.axes[self.fakeTransferAxis].size logger.debug( - f"Found decorrelation axis {self.decorrFakeAxis} with {nbins_separateFakes} bins, applying smoothing independently in each bin." + f"Found decorrelation axis {self.fakeTransferAxis} with {nbins_separateFakes} bins, applying smoothing independently in each bin." ) else: nbins_separateFakes = 1 - decorrFakeAxis_idx = -1 + fakeTransferAxis_idx = -1 - logger.debug(f"Decorrelation axis index: {decorrFakeAxis_idx}") + logger.debug(f"Decorrelation axis index: {fakeTransferAxis_idx}") for idx_fakeSep in range(nbins_separateFakes): fakeSep_slices = [slice(None)]*(sval.ndim) outTensor_slices = [slice(None)]*(baseOutTensor.ndim) outVarTensor_slices = [slice(None)]*(baseOutVarTensor.ndim) - if decorrFakeAxis_idx >= 0: - fakeSep_slices[decorrFakeAxis_idx] = idx_fakeSep - outTensor_slices[decorrFakeAxis_idx] = idx_fakeSep - outVarTensor_slices[decorrFakeAxis_idx] = idx_fakeSep + if fakeTransferAxis_idx >= 0: + fakeSep_slices[fakeTransferAxis_idx] = idx_fakeSep + outTensor_slices[fakeTransferAxis_idx] = idx_fakeSep + outVarTensor_slices[fakeTransferAxis_idx] = idx_fakeSep sval_sliced = sval[tuple(fakeSep_slices)] svar_sliced = svar[tuple(fakeSep_slices)] @@ -1139,7 +1139,7 @@ def calculate_fullABCD_smoothed( logd = np.where(goodbin, np.log(sval_sliced), 0.0) if np.all(logd[..., :4]==0.0): - if decorrFakeAxis_idx <0: + if fakeTransferAxis_idx <0: logger.debug(f"All ABCD values are zeros! Returning zero as Fake estimate.") logger.debug(f"Syst variations: {syst_variations}") sval_sliced = np.zeros_like(sval_sliced[..., 0]) @@ -1161,8 +1161,8 @@ def calculate_fullABCD_smoothed( logger.debug(f"Complement mask: {compl_mask}") logger.debug(f"APPLIED 'np.where'; {np.where(compl_mask)[0]}") - sval_slicedCompl = sval.take(indices=np.where(compl_mask)[0], axis=decorrFakeAxis_idx).sum(axis=decorrFakeAxis_idx) - svar_slicedCompl = svar.take(indices=np.where(compl_mask)[0], axis=decorrFakeAxis_idx).sum(axis=decorrFakeAxis_idx) + sval_slicedCompl = sval.take(indices=np.where(compl_mask)[0], axis=fakeTransferAxis_idx).sum(axis=fakeTransferAxis_idx) + svar_slicedCompl = svar.take(indices=np.where(compl_mask)[0], axis=fakeTransferAxis_idx).sum(axis=fakeTransferAxis_idx) logger.debug(f"Sval_slicedCompl shape: {sval_slicedCompl.shape}") count_nonvalid = np.sum(sval_slicedCompl<=0.0) From e433ca3600aab46f7fcc6f23a98e6c7d01df5d49 Mon Sep 17 00:00:00 2001 From: Marco Cipriani Date: Mon, 8 Dec 2025 17:38:38 +0100 Subject: [PATCH 09/24] small fix --- scripts/analysisTools/tests/testFakesVsMt.py | 4 ++-- utilities/styles/styles.py | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/analysisTools/tests/testFakesVsMt.py b/scripts/analysisTools/tests/testFakesVsMt.py index 49ef70a32..5bc17c419 100644 --- a/scripts/analysisTools/tests/testFakesVsMt.py +++ b/scripts/analysisTools/tests/testFakesVsMt.py @@ -293,8 +293,8 @@ def drawAndFitFRF( ymax += diff * 0.45 if ymin < 0: ymin = 0 - if ymax > 5.0: - ymax = 5.0 + if ymax > 4.0: + ymax = 4.0 h1.GetXaxis().SetTitle(xAxisName) h1.GetXaxis().SetTitleOffset(1.2) diff --git a/utilities/styles/styles.py b/utilities/styles/styles.py index 47ab5b0c2..72d549c14 100644 --- a/utilities/styles/styles.py +++ b/utilities/styles/styles.py @@ -164,6 +164,8 @@ def translate_html_to_latex(n): "run": r"Run range", "nRecoVtx": r"Number of reconstructed vertices", "PV_npvsGood": r"Number of reconstructed vertices", + "utmu": {"label": r"$\mathit{u}_{T}^{\mu}$", "unit": "GeV"}, + "utAngleSign": r"sign($\mathit{u}_{T}^{\mu}$)", # "ewPTll": r"$\mathrm{Post\ FSR}\ p_\mathrm{T}^{\mu\mu}$", # "ewMll": r"$\mathrm{Post\ FSR}\ m^{\mu\mu}$", # "ewYll": r"$\mathrm{Post\ FSR}\ Y^{\mu\mu}$", @@ -421,6 +423,7 @@ def translate_html_to_latex(n): "absYVGen": lambda l, h: rf"${round(l,3)} < |Y| < {round(h,3)}$", "helicitySig": lambda x: rf"$\sigma_{{{'UL' if x==-1 else int(x)}}}$", "ai": lambda x: rf"$A_{int(x)}$", + "utAngleSign": lambda x: rf"$\mathit{{sign}}(\mathit{{u}}_{{T}}^\mu) = {int(x)}$", } impact_labels = { From c79eca9ac76ff416a053797b822ef655e6a68029 Mon Sep 17 00:00:00 2001 From: Ruben Forti Date: Tue, 9 Dec 2025 11:08:57 +0100 Subject: [PATCH 10/24] moved reference commit of rabbit --- rabbit | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rabbit b/rabbit index 6d6791ac3..febf9a78e 160000 --- a/rabbit +++ b/rabbit @@ -1 +1 @@ -Subproject commit 6d6791ac309536785826946c0cefc38af7b17107 +Subproject commit febf9a78eaf36929ad6519c1061fb6939b33f807 From 4c957892dfb746b368b72a3302bcf94de8ca2180 Mon Sep 17 00:00:00 2001 From: Marco Cipriani Date: Tue, 9 Dec 2025 11:51:13 +0100 Subject: [PATCH 11/24] remove Zmumu OOA for theory agnostic with pol var --- scripts/histmakers/mw_with_mu_eta_pt.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/histmakers/mw_with_mu_eta_pt.py b/scripts/histmakers/mw_with_mu_eta_pt.py index 68101cd87..23d9cae36 100644 --- a/scripts/histmakers/mw_with_mu_eta_pt.py +++ b/scripts/histmakers/mw_with_mu_eta_pt.py @@ -481,7 +481,6 @@ args.theoryAgnosticPolVar and args.theoryAgnosticSplitOOA ): # this splitting is not needed for the normVar version of the theory agnostic datasets = unfolding_tools.add_out_of_acceptance(datasets, group="Wmunu") - datasets = unfolding_tools.add_out_of_acceptance(datasets, group="Zmumu") groups_to_aggregate.append("WmunuOOA") # axes for study of fakes From 488f6cc16c91cee571a0ff7926e82433b35c3f16 Mon Sep 17 00:00:00 2001 From: Ruben Forti Date: Mon, 22 Dec 2025 21:53:26 +0100 Subject: [PATCH 12/24] fakes in uT<0 are evaluated by scaling the ABCD prediction on uT>0 with a pT dependent shape --- .../analysisTools/makeFakeTransferTemplate.py | 254 ++++++++++++++++++ wremnants/datasets/datagroups.py | 2 +- wremnants/histselections.py | 37 ++- 3 files changed, 273 insertions(+), 20 deletions(-) create mode 100644 scripts/analysisTools/makeFakeTransferTemplate.py diff --git a/scripts/analysisTools/makeFakeTransferTemplate.py b/scripts/analysisTools/makeFakeTransferTemplate.py new file mode 100644 index 000000000..a086099ea --- /dev/null +++ b/scripts/analysisTools/makeFakeTransferTemplate.py @@ -0,0 +1,254 @@ +import hist +import ROOT +import os +import argparse +import numpy as np +import hist +import pickle +import tensorflow as tf +from wremnants.datasets.datagroups import Datagroups +from wums import boostHistHelpers as hh +from wums.fitutils import fit_hist +from narf import histutils +from functools import partial +from array import array + +ROOT.gROOT.SetBatch(True) + +def pol4_root(xvals, parms, xLowVal=0.0, xFitRange=1.0): + xscaled = (xvals[0] - xLowVal) / xFitRange + return ( + parms[0] + + parms[1] * xscaled + + parms[2] * xscaled**2 + + parms[3] * xscaled**3 + + parms[4] * xscaled**4 + ) + +def crystal_ball_right_tf(xvals, parms): + # parms: [A, MPV, sigma, alpha, n] + x = tf.convert_to_tensor(xvals[0]) + A = tf.cast(parms[0], x.dtype) + MPV = tf.cast(parms[1], x.dtype) + sigma = tf.maximum(tf.cast(parms[2], x.dtype), 1e-8) + alpha = tf.cast(parms[3], x.dtype) + n = tf.cast(parms[4], x.dtype) + + t = (x - MPV) / sigma + + # constants for continuity + # for right-tail CrystalBall: tail when t > alpha + # tail shape: A * ( (n/alpha)^n * exp(-alpha^2/2) ) / ( (n/alpha) + t )^n + # gaussian part: A * exp(-t^2/2) + prefactor = tf.pow(n/alpha, n) * tf.exp(-0.5 * alpha * alpha) + + gauss = A * tf.exp(-0.5 * t * t) + tail = A * prefactor / tf.pow((n/alpha) + t, n) + + return tf.where(t > alpha, tail, gauss) + + +def convert_binEdges_idx(ed_list, binning): + low, high = 0, 0 + for binEdge in binning: + if binEdge+0.01 < ed_list[0]: low+=1 + if binEdge+0.01 < ed_list[1]: high+=1 + return (low, high) + +parser = argparse.ArgumentParser() + +parser.add_argument("-i", "--infile", type=str, + default = "/scratch/rforti/wremnants_hists/mw_with_mu_eta_pt_scetlib_dyturboCorr_maxFiles_m1_x0p50_y3p00_THAGNV0_fromSV.hdf5", + help="Input file with histograms") +parser.add_argument("-o", "--outdir", type=str, default = "./", help="Output directory") +parser.add_argument( "--nEtaBins", type=int, default=1, choices=[1,2,3,4,6,8], help="Number of eta bins where to evaluate the templates") +parser.add_argument( "--nChargeBins", type=int, default=1, choices=[1,2], help="Number of charge bins where to evaluate the templates") +parser.add_argument( "--doQCD", action="store_true", help="Make templates with QCD instead of nonprompt contribution") + +args = parser.parse_args() + +groupsToConsider = ( + ["Data", "Wmunu", "Wtaunu", "Diboson", "Zmumu", "DYlowMass", "PhotonInduced", "Ztautau", "Top"] + if not args.doQCD + else ["QCD"] +) + +eta_genBinning = array("d", [round(-2.4+0.1*i,1) for i in range(49)]) +charge_genBinning = array("d", [-2, 0, 2]) + +delta_eta = (eta_genBinning[-1] - eta_genBinning[0]) / args.nEtaBins +delta_ch = (charge_genBinning[-1] - charge_genBinning[0]) / args.nChargeBins + +decorrBins_eta = [ + (round((eta_genBinning[0] + i *delta_eta), 1), + round((eta_genBinning[0] + (i+1)*delta_eta), 1)) + for i in range(args.nEtaBins) +] +decorrBins_ch = [ + (round((charge_genBinning[0] + i *delta_ch), 1), + round((charge_genBinning[0] + (i+1)*delta_ch), 1)) + for i in range(args.nChargeBins) +] + +print("Decorrelating in the eta bins: ", decorrBins_eta) +print("Decorrelating in the charge bins: ", decorrBins_ch) + +groups = Datagroups( + args.infile, + filterGroups=groupsToConsider, + excludeGroups=None, +) + +# There is probably a better way to do this but I don't want to deal with it +datasets = groups.getNames() +print(f"Will work on datasets {datasets}") + +exclude = ["Data"] if not args.doQCD else [] +prednames = list(groups.getNames([d for d in datasets if d not in exclude], exclude=False, match_exact=True)) + +print(f"Unstacked processes are {exclude}") +print(f"Stacked processes are {prednames}") + +histInfo = groups.groups + +select_utMinus = {"utAngleSign" : hist.tag.Slicer()[0:1:hist.sum]} +select_utPlus = {"utAngleSign" : hist.tag.Slicer()[1:2:hist.sum]} + +groups.set_histselectors( + datasets, + "nominal", + smoothing_mode="full", + smoothingOrderSpectrum=3, + smoothingPolynomialSpectrum="chebyshev", + integrate_x=all("mt" not in x.split("-") for x in ["pt"]), + mode="extended1D", + forceGlobalScaleFakes=None, + mcCorr=[None], +) + +groups.setNominalName("nominal") +groups.loadHistsForDatagroups("nominal", syst="", procsToRead=datasets, applySelection=True) + +out_hist = ROOT.TH3D(f"fakeRatio_utAngleSign_{'Data' if not args.doQCD else 'QCD'}", "", + len(eta_genBinning)-1, eta_genBinning, + 30, array("d", [round(26.0+1.0*i, 1) for i in range(31)]), + len(charge_genBinning)-1, charge_genBinning) + +for ch_edges in decorrBins_ch: + for eta_edges in decorrBins_eta: + + ch_low_idx, ch_high_idx = convert_binEdges_idx(ch_edges, charge_genBinning) + eta_low_idx, eta_high_idx = convert_binEdges_idx(eta_edges, eta_genBinning) + + print(ch_low_idx, ch_high_idx) + print(eta_low_idx, eta_high_idx) + + select_utMinus["charge"] = hist.tag.Slicer()[ch_low_idx:ch_high_idx:hist.sum] + select_utMinus["eta"] = hist.tag.Slicer()[eta_low_idx:eta_high_idx:hist.sum] + + select_utPlus["charge"] = hist.tag.Slicer()[ch_low_idx:ch_high_idx:hist.sum] + select_utPlus["eta"] = hist.tag.Slicer()[eta_low_idx:eta_high_idx:hist.sum] + + print(f"Processing charge bin [{ch_edges}] and eta bin [{eta_edges}]") + + boost_h_utMinus = histInfo["Data"].copy("Data_utMinus").hists["nominal"] + boost_h_utMinus = boost_h_utMinus[select_utMinus] + boost_h_utMinus = hh.projectNoFlow(boost_h_utMinus, ["pt"], ["relIso", "mt"]) + root_h_utMinus = histutils.hist_to_root(boost_h_utMinus) + + boost_h_utPlus = histInfo["Data"].copy("Data_utPlus").hists["nominal"] + boost_h_utPlus = boost_h_utPlus[select_utPlus] + boost_h_utPlus = hh.projectNoFlow(boost_h_utPlus, ["pt"], ["relIso", "mt"]) + root_h_utPlus = histutils.hist_to_root(boost_h_utPlus) + + print("Integrals BEFORE prompt subraction", root_h_utMinus.Integral(), root_h_utPlus.Integral()) + + for mcName in prednames: + if args.doQCD: continue + print(f"Subtracting {mcName} from data") + boost_h_mc_utMinus = histInfo[mcName].copy(f"{mcName}_utMinus").hists["nominal"] + boost_h_mc_utMinus = boost_h_mc_utMinus[select_utMinus] + boost_h_mc_utMinus = hh.projectNoFlow(boost_h_mc_utMinus, ["pt"], ["relIso", "mt"]) + root_h_mc_utMinus = histutils.hist_to_root(boost_h_mc_utMinus) + root_h_utMinus.Add(root_h_mc_utMinus, -1) + + boost_h_mc_utPlus = histInfo[mcName].copy(f"{mcName}_utPlus").hists["nominal"] + boost_h_mc_utPlus = boost_h_mc_utPlus[select_utPlus] + boost_h_mc_utPlus = hh.projectNoFlow(boost_h_mc_utPlus, ["pt"], ["relIso", "mt"]) + root_h_mc_utPlus = histutils.hist_to_root(boost_h_mc_utPlus) + root_h_utPlus.Add(root_h_mc_utPlus, -1) + + print("Integrals AFTER prompt subraction", root_h_utMinus.Integral(), root_h_utPlus.Integral()) + + + ratio_h = root_h_utMinus.Clone(f"fakeRatio_utAngleSign") + ratio_h.Sumw2() + ratio_h.Divide(root_h_utPlus) + + for idx_ch in range(ch_low_idx+1, ch_high_idx+1): + for idx_eta in range(eta_low_idx+1, eta_high_idx+1): + print(f"Setting weights for chBin={idx_ch}, etaBin={idx_eta}") + for idx_pt in range(1, 31): + out_hist.SetBinContent( + idx_eta, idx_pt, idx_ch, + ratio_h.GetBinContent(idx_pt) + ) + out_hist.SetBinError( + idx_eta, idx_pt, idx_ch, + ratio_h.GetBinError(idx_pt) + ) + if ratio_h.GetBinContent(idx_pt)<=0.0: + print("WARNING - found negative value in bin: ", idx_eta, idx_pt, idx_ch) + + +boost_out_hist = histutils.root_to_hist(out_hist) + +with open(f"{args.outdir}/fakeTransferTemplates.pkl", "wb") as fout: + pickle.dump(boost_out_hist, fout) + +fout = ROOT.TFile(f"{args.outdir}/fakeTransferTemplates{'' if not args.doQCD else '_QCD'}.root", "RECREATE") +fout.cd() +out_hist.Write() + +''' +x_axis = hist.axis.Regular(30, 26, 56, name="pt", flow=False) + +tr_hist = hist.Hist(x_axis, storage=hist.storage.Weight()) + +for i in range(30): + tr_hist[i] = (arr_val[i], arr_var[i]) + + +params = np.array([1.0, 0.0, 0.0, 0.0, 0.0]) # Initial parameters for pol4_root + +res = fit_hist( + tr_hist, + partial(pol4_root, xLowVal=26.0, xFitRange=30.0), + params, +) + +tr_func = [] +for i in range(len(bincenters)): + tr_func.append( + float( + pol4_root( + [bincenters[i]], + res["x"], + xLowVal=26.0, + xFitRange=30.0, + ))) +print(tr_func) +print("Params:", res["x"]) + +chi2 = res["loss_val"] +ndof = len(bincenters) - len(res["x"]) +chi2Prob = ROOT.TMath.Prob(chi2, ndof) + +print(chi2, ndof, chi2Prob) + +''' + + +fout.Close() + + diff --git a/wremnants/datasets/datagroups.py b/wremnants/datasets/datagroups.py index 6e2ec9558..dabb83acf 100644 --- a/wremnants/datasets/datagroups.py +++ b/wremnants/datasets/datagroups.py @@ -1373,7 +1373,7 @@ def addSystematic( ) for proc in procs_to_add: - logger.error(f"Now doing syst {name} for proc {proc}!") + logger.debug(f"Now doing syst {name} for proc {proc}!") hvar = self.groups[proc].hists["syst"] logger.debug(f"Actions: {action}, args: {actionArgs}") diff --git a/wremnants/histselections.py b/wremnants/histselections.py index 8f51c4c79..59907241b 100644 --- a/wremnants/histselections.py +++ b/wremnants/histselections.py @@ -1,4 +1,6 @@ import hist +import os +import pickle import numpy as np from scipy import interpolate @@ -199,11 +201,11 @@ def __init__( self.fakeTransferAxis = fakeTransferAxis if fakerate_axes is not None: - self.fakerate_axes = fakerate_axes + self.fakerate_axes = fakerate_axes # list of axes names where to perform independent fakerate computation self.fakerate_integration_axes = [ n for n in h.axes.name - if n not in [self.name_x, self.name_y, *fakerate_axes, self.fakeTransferAxis] + if n not in [self.name_x, self.name_y, *fakerate_axes] ] logger.debug( f"Setting fakerate integration axes to {self.fakerate_integration_axes}" @@ -435,6 +437,16 @@ def __init__( ) else: self.spectrum_regressor = None + + if self.fakeTransferAxis in h.axes.name: + logger.debug("Loaded transfer tensor for fakes") + trTensorPath = os.path.join( + os.environ["WREM_BASE"], + "wremnants-data/data/fakesWmass/fakeTransferTemplates.pkl" + ) + with open(trTensorPath, "rb") as fTens: + self.fakeTransferTensor = pickle.load(fTens) + if self.smoothing_mode in ["binned"]: # rebinning doesn't make sense for binned estimation @@ -1169,15 +1181,14 @@ def calculate_fullABCD_smoothed( logger.debug(f"Number of nonvalid bins in complement: {count_nonvalid} out of {sval_slicedCompl.size}") out_other, outvar_other = self.smoothen_spectrum( - h, hNew.axes[self.smoothing_axis_name].edges, sval_slicedCompl, svar_slicedCompl, + h, hNew.axes[self.smoothing_axis_name].edges, + sval_slicedCompl, svar_slicedCompl, syst_variations=syst_variations, use_spline=use_spline, reduce=not signal_region, flow=flow ) - NORMFACTOR = 0.1196 # Ratio of nonprompt data in SV for uT<0 (num.) to ut>0 (den.) - - out = out_other * NORMFACTOR - outvar = outvar_other * NORMFACTOR**2 + out = out_other * self.fakeTransferTensor.values()[..., *[None]*(out_other.ndim-3)] + outvar = outvar_other * (self.fakeTransferTensor.values()[..., *[None]*(outvar_other.ndim-3)]**2) else: out, outvar = self.smoothen_spectrum( @@ -1195,27 +1206,15 @@ def calculate_fullABCD_smoothed( ) outVarTensor_slices += [slice(None), slice(None)] - logger.debug(f"Smoothing completed for decorrelation bin {idx_fakeSep}.") logger.debug(f"Smoothing output shape: {out.shape}") logger.debug(f"Smoothing output var shape: {outvar.shape}") logger.debug(f"Smoothing baseOutTensor shape: {baseOutTensor.shape}") logger.debug(f"Smoothing baseOutVarTensor shape: {baseOutVarTensor.shape}") - - if syst_variations and outvar.shape[-2:]!=baseOutVarTensor.shape[-2:]: - logger.warning( - f"Shape mismatch in syst variations: existing is {baseOutVarTensor.shape[-2:]}, new is {outvar.shape[-2:]}. Overwriting baseOutVarTensor." - ) - baseOutVarTensor = baseOutVarTensor[..., 0, 0] - baseOutVarTensor = np.broadcast_to(baseOutVarTensor[..., None, None], baseOutVarTensor.shape + outvar.shape[-2:]).copy() - - logger.debug(f"New baseOutVarTensor shape: {baseOutVarTensor.shape}") - baseOutTensor[tuple(outTensor_slices)] = out baseOutVarTensor[tuple(outVarTensor_slices)] = outvar - out = baseOutTensor outvar = baseOutVarTensor From 92931262c98dbdc83ed88758e35dd07c813317fc Mon Sep 17 00:00:00 2001 From: Ruben Forti Date: Mon, 22 Dec 2025 21:57:52 +0100 Subject: [PATCH 13/24] decorrelate FakeParam syst in uT bins --- scripts/rabbit/setupRabbit.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/rabbit/setupRabbit.py b/scripts/rabbit/setupRabbit.py index 4e4a7c71c..aadd31151 100644 --- a/scripts/rabbit/setupRabbit.py +++ b/scripts/rabbit/setupRabbit.py @@ -65,7 +65,7 @@ def make_subparsers(parser): "--priorNormXsec", type=float, default=1, - help=r"Prior for shape uncertainties on cross sections for theory agnostic or unfolding analysis with POIs as NOIs (1 means 100%). If negative, it will use shapeNoConstraint in the fit", + help=r"Prior for shape uncertainties on cross sections for theory agnostic or unfolding analysis with POIs as NOIs (1 means 100%%). If negative, it will use shapeNoConstraint in the fit", ) parser.add_argument( "--scaleNormXsecHistYields", @@ -236,7 +236,7 @@ def make_parser(parser=None): parser.add_argument( "--lumiUncertainty", type=float, - help=r"Uncertainty for luminosity in excess to 1 (e.g. 1.012 means 1.2%); automatic by default", + help=r"Uncertainty for luminosity in excess to 1 (e.g. 1.012 means 1.2%%); automatic by default", default=None, ) parser.add_argument( @@ -647,7 +647,7 @@ def make_parser(parser=None): default=0, type=float, help=r"""Add normalization uncertainty for W signal. - If negative, treat as free floating with the absolute being the size of the variation (e.g. -1.01 means +/-1% of the nominal is varied). + If negative, treat as free floating with the absolute being the size of the variation (e.g. -1.01 means +/-1%% of the nominal is varied). If 0 nothing is added""", ) parser.add_argument( @@ -655,7 +655,7 @@ def make_parser(parser=None): default=0, type=float, help=r"""Add normalization uncertainty for W->tau,nu process. - If negative, treat as free floating with the absolute being the size of the variation (e.g. -1.01 means +/-1% of the nominal is varied). + If negative, treat as free floating with the absolute being the size of the variation (e.g. -1.01 means +/-1%% of the nominal is varied). If 0 nothing is added""", ) parser.add_argument( @@ -1856,7 +1856,7 @@ def fake_nonclosure( return hvar for axesToDecorrNames in [ - [], + [datagroups.fakeTransferAxis] if True else [], ]: for idx, mag in [ (1, 0.1), From 3f96a4c53bbc0058910f902a6c92b69cb96994d6 Mon Sep 17 00:00:00 2001 From: Ruben Forti Date: Mon, 22 Dec 2025 22:01:03 +0100 Subject: [PATCH 14/24] new syst for eta nonclosure in ut<0 bin --- scripts/rabbit/setupRabbit.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/scripts/rabbit/setupRabbit.py b/scripts/rabbit/setupRabbit.py index aadd31151..d013df9a4 100644 --- a/scripts/rabbit/setupRabbit.py +++ b/scripts/rabbit/setupRabbit.py @@ -1891,6 +1891,41 @@ def fake_nonclosure( ), ) + def fake_nonclosure_utMinus( + h, + axesToDecorrNames=["eta"], + variation_size=0.1, + *args, + **kwargs, + ): + logger.error("Doing decorr nonclosure for utMinus") + hnom = fakeselector.get_hist(h, *args, **kwargs) + hvar = (1+variation_size)*hnom + # applying the variations only on the utAngleSign<0 bin + hvar.values()[..., -1] = hnom.values()[..., -1] + + hvar = syst_tools.decorrelateByAxes(hvar, hnom, axesToDecorrNames) + + return hvar + + datagroups.addSystematic( + inputBaseName, + groups=[subgroup, "Fake", "experiment", "expNoCalib"], + name=f"{datagroups.fakeName}EtaClos_eta", + baseName=f"{datagroups.fakeName}EtaClos", + processes=datagroups.fakeName, + noConstraint=False, + mirror=True, + scale=1, + applySelection=False, # don't apply selection, external parameters need to be added + action=fake_nonclosure_utMinus, + actionArgs=dict( + axesToDecorrNames=["eta"], + variation_size=0.1, + ), + systAxes=["eta_decorr"] + ) + if not args.noEfficiencyUnc: if not lowPU: From fec8acea75abb82d94fcb1d41e3f677271c40ab8 Mon Sep 17 00:00:00 2001 From: Marco Cipriani Date: Wed, 24 Dec 2025 09:46:37 +0100 Subject: [PATCH 15/24] updates for veto (no default change) --- scripts/histmakers/mw_with_mu_eta_pt.py | 3 ++- utilities/parsing.py | 6 ++++++ wremnants/muon_selections.py | 3 ++- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/scripts/histmakers/mw_with_mu_eta_pt.py b/scripts/histmakers/mw_with_mu_eta_pt.py index 23d9cae36..1d99de7d0 100644 --- a/scripts/histmakers/mw_with_mu_eta_pt.py +++ b/scripts/histmakers/mw_with_mu_eta_pt.py @@ -1038,6 +1038,7 @@ def build_graph(df, dataset): nMuons=1, ptCut=args.vetoRecoPt, etaCut=args.vetoRecoEta, + staPtCut=args.vetoRecoStaPt, dxybsCut=args.dxybs, useGlobalOrTrackerVeto=useGlobalOrTrackerVeto, ) @@ -1158,7 +1159,7 @@ def build_graph(df, dataset): if args.selectVetoEventsMC: # in principle a gen muon with eta = 2.401 might still be matched to a reco muon with eta < 2.4, same for pt, so this condition is potentially fragile, but it is just for test plots df = df.Filter("Sum(postfsrMuons_inAcc) >= 2") - if not args.noVetoSF: + if not args.noVetoSF or args.scaleDYvetoFraction > 0.0: df = df.Define( "hasMatchDR2idx", "wrem::hasMatchDR2idx_closest(goodMuons_eta0,goodMuons_phi0,GenPart_eta[postfsrMuons_inAcc],GenPart_phi[postfsrMuons_inAcc],0.09)", diff --git a/utilities/parsing.py b/utilities/parsing.py index 47f1f0fff..5918d74b4 100644 --- a/utilities/parsing.py +++ b/utilities/parsing.py @@ -501,6 +501,12 @@ def __call__(self, parser, namespace, values, option_string=None): type=float, help="Lower threshold for muon pt in the veto definition", ) + parser.add_argument( + "--vetoRecoStaPt", + default=15, + type=float, + help="Lower threshold for muon standalone pt in the veto definition (should typically match vetoRecoPt, but not necessary)", + ) parser.add_argument( "--vetoRecoEta", default=2.4, diff --git a/wremnants/muon_selections.py b/wremnants/muon_selections.py index 9951f5bc2..1fa3ac528 100644 --- a/wremnants/muon_selections.py +++ b/wremnants/muon_selections.py @@ -78,9 +78,10 @@ def select_veto_muons( "vetoMuonsPre", f"Muon_looseId && abs(Muon_dxybs) < {dxybsCut} && Muon_correctedCharge != -99", ) + nHitsSA = common.muonEfficiency_standaloneNumberOfValidHits df = df.Define( "Muon_isGoodGlobal", - f"Muon_isGlobal && Muon_highPurity && Muon_standalonePt > {staPtCut} && Muon_standaloneNumberOfValidHits > 0 && wrem::vectDeltaR2(Muon_standaloneEta, Muon_standalonePhi, Muon_correctedEta, Muon_correctedPhi) < 0.09", + f"Muon_isGlobal && Muon_highPurity && Muon_standalonePt > {staPtCut} && Muon_standaloneNumberOfValidHits >= {nHitsSA} && wrem::vectDeltaR2(Muon_standaloneEta, Muon_standalonePhi, Muon_correctedEta, Muon_correctedPhi) < 0.09", ) if useGlobalOrTrackerVeto: if tightGlobalOrTracker: From 65201f6bba4d117562e70435a894c62dfc6aa7b2 Mon Sep 17 00:00:00 2001 From: Marco Cipriani Date: Thu, 25 Dec 2025 10:09:54 +0100 Subject: [PATCH 16/24] updates --- scripts/plotting/makeDataMCStackPlot.py | 9 ++++ scripts/rabbit/setupRabbit.py | 72 ++++++++++++++----------- utilities/styles/styles.py | 2 +- wremnants/histselections.py | 51 +----------------- 4 files changed, 51 insertions(+), 83 deletions(-) diff --git a/scripts/plotting/makeDataMCStackPlot.py b/scripts/plotting/makeDataMCStackPlot.py index 4b01d48e5..1d30bc25b 100644 --- a/scripts/plotting/makeDataMCStackPlot.py +++ b/scripts/plotting/makeDataMCStackPlot.py @@ -204,6 +204,14 @@ default=(0.05, 0.8), help="Location in (x,y) for additional text, aligned to upper left", ) +parser.add_argument( + "--vertLineEdges", + type=float, + nargs="*", + default=[], + help="Horizontal axis edges where to plot vertical lines", +) + subparsers = parser.add_subparsers(dest="variation") variation = subparsers.add_parser( "variation", help="Arguments for adding variation hists" @@ -617,6 +625,7 @@ def collapseSyst(h): lowerLegPos=args.lowerLegPos, lower_leg_padding=args.lowerLegPadding, subplotsizes=args.subplotSizes, + x_vertLines_edges=args.vertLineEdges, ) to_join = [f"{h.replace('-','_')}"] diff --git a/scripts/rabbit/setupRabbit.py b/scripts/rabbit/setupRabbit.py index c3cd2b876..6c5a547e6 100644 --- a/scripts/rabbit/setupRabbit.py +++ b/scripts/rabbit/setupRabbit.py @@ -2042,8 +2042,13 @@ def fake_nonclosure( return hvar + fakeParamDecorrAxes = ( + [datagroups.fakeTransferAxis] + if (datagroups.fakeTransferAxis != "" and "utAngleSign" in fitvar) + else [] + ) for axesToDecorrNames in [ - [datagroups.fakeTransferAxis] if True else [], + fakeParamDecorrAxes, ]: for idx, mag in [ (1, 0.1), @@ -2084,40 +2089,43 @@ def fake_nonclosure( ), ) - def fake_nonclosure_utMinus( - h, - axesToDecorrNames=["eta"], - variation_size=0.1, - *args, - **kwargs, - ): - logger.error("Doing decorr nonclosure for utMinus") - hnom = fakeselector.get_hist(h, *args, **kwargs) - hvar = (1 + variation_size) * hnom - # applying the variations only on the utAngleSign<0 bin - hvar.values()[..., -1] = hnom.values()[..., -1] + if "utAngleSign" in fitvar: + # TODO: extend and use previous function fake_nonclosure(...) + def fake_nonclosure_utMinus( + h, + axesToDecorrNames=["eta"], + variation_size=0.1, + *args, + **kwargs, + ): + logger.debug("Doing decorr nonclosure for utMinus") + hnom = fakeselector.get_hist(h, *args, **kwargs) + hvar = (1 + variation_size) * hnom + # applying the variations only on the utAngleSign<0 bin + ## TODO: select axis by name and bin id by input argument + hvar.values()[..., -1] = hnom.values()[..., -1] - hvar = syst_tools.decorrelateByAxes(hvar, hnom, axesToDecorrNames) + hvar = syst_tools.decorrelateByAxes(hvar, hnom, axesToDecorrNames) - return hvar + return hvar - datagroups.addSystematic( - inputBaseName, - groups=[subgroup, "Fake", "experiment", "expNoCalib"], - name=f"{datagroups.fakeName}EtaClos_eta", - baseName=f"{datagroups.fakeName}EtaClos", - processes=datagroups.fakeName, - noConstraint=False, - mirror=True, - scale=1, - applySelection=False, # don't apply selection, external parameters need to be added - action=fake_nonclosure_utMinus, - actionArgs=dict( - axesToDecorrNames=["eta"], - variation_size=0.1, - ), - systAxes=["eta_decorr"], - ) + datagroups.addSystematic( + inputBaseName, + groups=[subgroup, "Fake", "experiment", "expNoCalib"], + name=f"{datagroups.fakeName}EtaClos_eta", + baseName=f"{datagroups.fakeName}EtaClos", + processes=datagroups.fakeName, + noConstraint=False, + mirror=True, + scale=1, + applySelection=False, # don't apply selection, external parameters need to be added + action=fake_nonclosure_utMinus, + actionArgs=dict( + axesToDecorrNames=["eta"], + variation_size=0.1, + ), + systAxes=["eta_decorr"], + ) if not args.noEfficiencyUnc: diff --git a/utilities/styles/styles.py b/utilities/styles/styles.py index 6ad089a95..51306c48d 100644 --- a/utilities/styles/styles.py +++ b/utilities/styles/styles.py @@ -192,7 +192,7 @@ def translate_html_to_latex(n): "nRecoVtx": r"Number of reconstructed vertices", "PV_npvsGood": r"Number of reconstructed vertices", "utmu": {"label": r"$\mathit{u}_{T}^{\mu}$", "unit": "GeV"}, - "utAngleSign": r"$\mathrm{sign}(\mathit{u}_{T}{\mu})$", + "utAngleSign": r"$\mathrm{sign}(\mathit{u}_{T}^{\mu})$", # "ewPTll": r"$\mathrm{Post\ FSR}\ p_\mathrm{T}^{\mu\mu}$", # "ewMll": r"$\mathrm{Post\ FSR}\ m^{\mu\mu}$", # "ewYll": r"$\mathrm{Post\ FSR}\ Y^{\mu\mu}$", diff --git a/wremnants/histselections.py b/wremnants/histselections.py index d9b3cde7d..d080e4801 100644 --- a/wremnants/histselections.py +++ b/wremnants/histselections.py @@ -207,7 +207,7 @@ def __init__( n for n in h.axes.name if n - not in [self.name_x, self.name_y, *fakerate_axes, self.decorrFakeAxis] + not in [self.name_x, self.name_y, *fakerate_axes, self.fakeTransferAxis] ] logger.debug( f"Setting fakerate integration axes to {self.fakerate_integration_axes}" @@ -886,55 +886,6 @@ def get_smoothed_tensor(self, h, sel, sval, svar, syst_variations=False, flow=Tr return out, outvar - def get_smoothed_tensor(self, h, sel, sval, svar, syst_variations=False, flow=True): - # get output shape from original hist axes, but as for result histogram - - hOut = ( - h[{self.name_x: self.sel_x if not self.integrate_x else hist.sum}] - if self.name_x in h.axes.name - else h - ) - out = np.zeros( - [ - a.extent if flow else a.shape - for a in hOut.axes - if a.name not in [self.name_y, self.decorrFakeAxis] - ], - dtype=sval.dtype, - ) - # leave the underflow and overflow unchanged if present - out[*sel[:-1]] = sval - if syst_variations: - outvar = np.zeros_like(out) - outvar = outvar[..., None, None] * np.ones( - (*outvar.shape, *svar.shape[-2:]), dtype=outvar.dtype - ) - - logger.debug("Doing syst_variations... something might be wrong here") - logger.error( - "Shapes of outvar and svar: " - + str(outvar.shape) - + " " - + str(svar.shape) - ) - - # leave the underflow and overflow unchanged if present - outvar[*sel[:-1], :, :] = svar - - logger.error( - "Shapes of outvar and svar: " - + str(outvar.shape) - + " " - + str(svar.shape) - ) - - else: - # with full smoothing all of the statistical uncertainty is included in the - # explicit variations, so the remaining binned uncertainty is zero - outvar = np.zeros_like(out) - - return out, outvar - def smoothen_spectrum( self, h, From f1a26ce0d6bc6bf7fc040989b4e156551adf1bac Mon Sep 17 00:00:00 2001 From: Marco Cipriani Date: Fri, 26 Dec 2025 10:09:16 +0100 Subject: [PATCH 17/24] updates for fakes vs uT --- .../w_mass_13TeV/makeFakeTransferTemplate.py | 113 +++++++++++------- scripts/histmakers/mw_with_mu_eta_pt.py | 2 +- scripts/rabbit/setupRabbit.py | 16 +++ wremnants/datasets/datagroups.py | 2 +- wremnants/histselections.py | 17 +-- 5 files changed, 100 insertions(+), 50 deletions(-) diff --git a/scripts/analysisTools/w_mass_13TeV/makeFakeTransferTemplate.py b/scripts/analysisTools/w_mass_13TeV/makeFakeTransferTemplate.py index a71ec44b2..d9c84c161 100644 --- a/scripts/analysisTools/w_mass_13TeV/makeFakeTransferTemplate.py +++ b/scripts/analysisTools/w_mass_13TeV/makeFakeTransferTemplate.py @@ -1,16 +1,33 @@ +#!/usr/bin/env python3 import argparse +import os import pickle +import sys from array import array import hist +import lz4.frame import ROOT import tensorflow as tf -from narf import histutils +# from narf import histutils +import narf +import wums.output_tools +from utilities import common from wremnants.datasets.datagroups import Datagroups from wums import boostHistHelpers as hh +from wums import logging + +args = sys.argv[:] +sys.argv = ["-b"] +sys.argv = args ROOT.gROOT.SetBatch(True) +ROOT.PyConfig.IgnoreCommandLineOptions = True + +from scripts.analysisTools.plotUtils.utility import ( + safeOpenFile, +) def pol4_root(xvals, parms, xLowVal=0.0, xFitRange=1.0): @@ -67,6 +84,13 @@ def convert_binEdges_idx(ed_list, binning): help="Input file with histograms", ) parser.add_argument("-o", "--outdir", type=str, default="./", help="Output directory") +parser.add_argument( + "-p", + "--postfix", + type=str, + default=None, + help="Postfix appended to output file name", +) parser.add_argument( "--nEtaBins", type=int, @@ -89,6 +113,9 @@ def convert_binEdges_idx(ed_list, binning): args = parser.parse_args() +logger = logging.setup_logger(os.path.basename(__file__), 4) +ROOT.TH1.SetDefaultSumw2() + groupsToConsider = ( [ "Data", @@ -126,8 +153,8 @@ def convert_binEdges_idx(ed_list, binning): for i in range(args.nChargeBins) ] -print("Decorrelating in the eta bins: ", decorrBins_eta) -print("Decorrelating in the charge bins: ", decorrBins_ch) +logger.info(f"Decorrelating in the eta bins: {decorrBins_eta}") +logger.info(f"Decorrelating in the charge bins: {decorrBins_ch}") groups = Datagroups( args.infile, @@ -137,7 +164,7 @@ def convert_binEdges_idx(ed_list, binning): # There is probably a better way to do this but I don't want to deal with it datasets = groups.getNames() -print(f"Will work on datasets {datasets}") +logger.info(f"Will work on datasets {datasets}") exclude = ["Data"] if not args.doQCD else [] prednames = list( @@ -146,8 +173,8 @@ def convert_binEdges_idx(ed_list, binning): ) ) -print(f"Unstacked processes are {exclude}") -print(f"Stacked processes are {prednames}") +logger.info(f"Unstacked processes are {exclude}") +logger.info(f"Stacked processes are {prednames}") histInfo = groups.groups @@ -188,8 +215,8 @@ def convert_binEdges_idx(ed_list, binning): ch_low_idx, ch_high_idx = convert_binEdges_idx(ch_edges, charge_genBinning) eta_low_idx, eta_high_idx = convert_binEdges_idx(eta_edges, eta_genBinning) - print(ch_low_idx, ch_high_idx) - print(eta_low_idx, eta_high_idx) + logger.info(f"{ch_low_idx}, {ch_high_idx}") + logger.info(f"{eta_low_idx}, {eta_high_idx}") select_utMinus["charge"] = hist.tag.Slicer()[ ch_low_idx : ch_high_idx : hist.sum @@ -199,28 +226,25 @@ def convert_binEdges_idx(ed_list, binning): select_utPlus["charge"] = hist.tag.Slicer()[ch_low_idx : ch_high_idx : hist.sum] select_utPlus["eta"] = hist.tag.Slicer()[eta_low_idx : eta_high_idx : hist.sum] - print(f"Processing charge bin [{ch_edges}] and eta bin [{eta_edges}]") + logger.info(f"Processing charge bin [{ch_edges}] and eta bin [{eta_edges}]") boost_h_utMinus = histInfo["Data"].copy("Data_utMinus").hists["nominal"] boost_h_utMinus = boost_h_utMinus[select_utMinus] boost_h_utMinus = hh.projectNoFlow(boost_h_utMinus, ["pt"], ["relIso", "mt"]) - root_h_utMinus = histutils.hist_to_root(boost_h_utMinus) + root_h_utMinus = narf.hist_to_root(boost_h_utMinus) boost_h_utPlus = histInfo["Data"].copy("Data_utPlus").hists["nominal"] boost_h_utPlus = boost_h_utPlus[select_utPlus] boost_h_utPlus = hh.projectNoFlow(boost_h_utPlus, ["pt"], ["relIso", "mt"]) - root_h_utPlus = histutils.hist_to_root(boost_h_utPlus) + root_h_utPlus = narf.hist_to_root(boost_h_utPlus) - print( - "Integrals BEFORE prompt subraction", - root_h_utMinus.Integral(), - root_h_utPlus.Integral(), - ) + logger.info(f"Integrals BEFORE prompt subraction (uT < 0, uT > 0)") + logger.info(f"{root_h_utMinus.Integral()}, {root_h_utPlus.Integral()}") for mcName in prednames: if args.doQCD: continue - print(f"Subtracting {mcName} from data") + logger.info(f"Subtracting {mcName} from data") boost_h_mc_utMinus = ( histInfo[mcName].copy(f"{mcName}_utMinus").hists["nominal"] ) @@ -228,7 +252,7 @@ def convert_binEdges_idx(ed_list, binning): boost_h_mc_utMinus = hh.projectNoFlow( boost_h_mc_utMinus, ["pt"], ["relIso", "mt"] ) - root_h_mc_utMinus = histutils.hist_to_root(boost_h_mc_utMinus) + root_h_mc_utMinus = narf.hist_to_root(boost_h_mc_utMinus) root_h_utMinus.Add(root_h_mc_utMinus, -1) boost_h_mc_utPlus = ( @@ -238,14 +262,11 @@ def convert_binEdges_idx(ed_list, binning): boost_h_mc_utPlus = hh.projectNoFlow( boost_h_mc_utPlus, ["pt"], ["relIso", "mt"] ) - root_h_mc_utPlus = histutils.hist_to_root(boost_h_mc_utPlus) + root_h_mc_utPlus = narf.hist_to_root(boost_h_mc_utPlus) root_h_utPlus.Add(root_h_mc_utPlus, -1) - print( - "Integrals AFTER prompt subraction", - root_h_utMinus.Integral(), - root_h_utPlus.Integral(), - ) + logger.info(f"Integrals AFTER prompt subraction (uT < 0, uT > 0)") + logger.info(f"{root_h_utMinus.Integral()}, {root_h_utPlus.Integral()}") ratio_h = root_h_utMinus.Clone(f"fakeRatio_utAngleSign") ratio_h.Sumw2() @@ -253,7 +274,7 @@ def convert_binEdges_idx(ed_list, binning): for idx_ch in range(ch_low_idx + 1, ch_high_idx + 1): for idx_eta in range(eta_low_idx + 1, eta_high_idx + 1): - print(f"Setting weights for chBin={idx_ch}, etaBin={idx_eta}") + # logger.debug(f"Setting weights for chBin={idx_ch}, etaBin={idx_eta}") for idx_pt in range(1, 31): out_hist.SetBinContent( idx_eta, idx_pt, idx_ch, ratio_h.GetBinContent(idx_pt) @@ -262,25 +283,35 @@ def convert_binEdges_idx(ed_list, binning): idx_eta, idx_pt, idx_ch, ratio_h.GetBinError(idx_pt) ) if ratio_h.GetBinContent(idx_pt) <= 0.0: - print( - "WARNING - found negative value in bin: ", - idx_eta, - idx_pt, - idx_ch, + logger.info( + "WARNING - found negative value in bin: ({idx_eta}, {idx_pt}, {idx_ch})" ) -boost_out_hist = histutils.root_to_hist(out_hist) +boost_out_hist = narf.root_to_hist(out_hist) +resultDict = {"fakeCorr": boost_out_hist} +base_dir = common.base_dir +resultDict.update( + {"meta_info": wums.output_tools.make_meta_info_dict(args=args, wd=base_dir)} +) -with open(f"{args.outdir}/fakeTransferTemplates.pkl", "wb") as fout: - pickle.dump(boost_out_hist, fout) +outfileName = "fakeTransferTemplates" +if args.postfix: + outfileName += f"_{args.postfix}" +if args.doQCD: + outfileName += "_QCD" -fout = ROOT.TFile( - f"{args.outdir}/fakeTransferTemplates{'' if not args.doQCD else '_QCD'}.root", - "RECREATE", -) -fout.cd() +pklfileName = f"{args.outdir}/{outfileName}.pkl.lz4" +with lz4.frame.open(pklfileName, "wb") as fout: + pickle.dump(resultDict, fout, protocol=pickle.HIGHEST_PROTOCOL) +logger.warning(f"Created file {pklfileName}") + +rootfileName = f"{args.outdir}/{outfileName}.root" +fout = safeOpenFile(rootfileName, mode="RECREATE") out_hist.Write() +fout.Close() +logger.warning(f"Created file {rootfileName}") + """ x_axis = hist.axis.Regular(30, 26, 56, name="pt", flow=False) @@ -309,14 +340,14 @@ def convert_binEdges_idx(ed_list, binning): xLowVal=26.0, xFitRange=30.0, ))) -print(tr_func) -print("Params:", res["x"]) +logger.info(tr_func) +logger.info("Params:", res["x"]) chi2 = res["loss_val"] ndof = len(bincenters) - len(res["x"]) chi2Prob = ROOT.TMath.Prob(chi2, ndof) -print(chi2, ndof, chi2Prob) +logger.info(chi2, ndof, chi2Prob) """ diff --git a/scripts/histmakers/mw_with_mu_eta_pt.py b/scripts/histmakers/mw_with_mu_eta_pt.py index 1d99de7d0..44bfc3068 100644 --- a/scripts/histmakers/mw_with_mu_eta_pt.py +++ b/scripts/histmakers/mw_with_mu_eta_pt.py @@ -284,7 +284,7 @@ ) axis_met = hist.axis.Regular(25, 0.0, 100.0, name="met", underflow=False, overflow=True) axis_mt_testfit = hist.axis.Variable( - (*np.arange(0, mtw_min + 20, 20), *np.arange(mtw_min + 2, 122, 2)), + (*np.arange(0, 40, 20), *np.arange(40, 122, 2)), name="mt", underflow=False, overflow=True, diff --git a/scripts/rabbit/setupRabbit.py b/scripts/rabbit/setupRabbit.py index 6c5a547e6..093e6b850 100644 --- a/scripts/rabbit/setupRabbit.py +++ b/scripts/rabbit/setupRabbit.py @@ -449,6 +449,16 @@ def make_parser(parser=None): choices=Regressor.polynomials, help="Type of polynomial for the smoothing of the application region or full prediction, depending on the smoothing mode", ) + parser.add_argument( + "--fakeTransferCorrFileName", + type=str, + default="fakeTransferTemplates", + help=""" + Name of pkl.lz4 file (without extension) with pTmu correction for the shape of data-driven fakes. + Currently used only when utAngleSign is a fakerate axis (detected automatically), since the shape + at negative uTmu must be taken from positive bin, but a shape correction is needed versus pTmu. + """, + ) parser.add_argument( "--ABCDedgesByAxis", type=str, @@ -1150,6 +1160,8 @@ def setup( if wmass and not datagroups.xnorm: datagroups.fakerate_axes = args.fakerateAxes + # datagroups.fakeTransferAxis = "utAngleSign" if "utAngleSign" in args.fakerateAxes else "" + # datagroups.fakeTransferCorrFileName = args.fakeTransferCorrFileName histselector_kwargs = dict( mode=args.fakeEstimation, smoothing_mode=args.fakeSmoothingMode, @@ -1159,6 +1171,10 @@ def setup( integrate_x="mt" not in fitvar, forceGlobalScaleFakes=args.forceGlobalScaleFakes, abcdExplicitAxisEdges=abcdExplicitAxisEdges, + fakeTransferAxis=( + "utAngleSign" if "utAngleSign" in args.fakerateAxes else "" + ), + fakeTransferCorrFileName=args.fakeTransferCorrFileName, ) datagroups.set_histselectors( datagroups.getNames(), inputBaseName, **histselector_kwargs diff --git a/wremnants/datasets/datagroups.py b/wremnants/datasets/datagroups.py index df660f7d2..94018f406 100644 --- a/wremnants/datasets/datagroups.py +++ b/wremnants/datasets/datagroups.py @@ -80,6 +80,7 @@ def __init__(self, infile, mode=None, xnorm=False, **kwargs): self.fit_axes = [] self.fakerate_axes = ["pt", "eta", "charge"] self.fakeTransferAxis = "utAngleSign" + self.fakeTransferCorrFileName = "fakeTransferTemplates" self.setGenAxes() @@ -320,7 +321,6 @@ def set_histselectors( h, global_scalefactor=scale, fakerate_axes=self.fakerate_axes, - fakeTransferAxis=self.fakeTransferAxis, smoothing_mode=smoothing_mode, smoothing_order_fakerate=smoothingOrderFakerate, smoothing_order_spectrum=smoothingOrderSpectrum, diff --git a/wremnants/histselections.py b/wremnants/histselections.py index d080e4801..90b86cb23 100644 --- a/wremnants/histselections.py +++ b/wremnants/histselections.py @@ -1,7 +1,7 @@ -import os import pickle import hist +import lz4.frame import numpy as np from scipy import interpolate @@ -127,6 +127,7 @@ def __init__( name_y=None, fakerate_axes=["eta", "pt", "charge"], fakeTransferAxis="", + fakeTransferCorrFileName="fakeTransferTemplates", # only with fakeTransferAxis smoothing_axis_name="pt", rebin_smoothing_axis="automatic", # can be a list of bin edges, "automatic", or None upper_bound_y=None, # using an upper bound on the abcd y-axis (e.g. isolation) @@ -200,6 +201,7 @@ def __init__( self.set_selections_x() self.set_selections_y() self.fakeTransferAxis = fakeTransferAxis + self.fakeTransferCorrFileName = fakeTransferCorrFileName if fakerate_axes is not None: self.fakerate_axes = fakerate_axes # list of axes names where to perform independent fakerate computation @@ -441,13 +443,14 @@ def __init__( self.spectrum_regressor = None if self.fakeTransferAxis in h.axes.name: - logger.debug("Loaded transfer tensor for fakes") - trTensorPath = os.path.join( - os.environ["WREM_BASE"], - "wremnants-data/data/fakesWmass/fakeTransferTemplates.pkl", + data_dir = common.data_dir + trTensorPath = ( + f"{data_dir}/fakesWmass/{self.fakeTransferCorrFileName}.pkl.lz4" ) - with open(trTensorPath, "rb") as fTens: - self.fakeTransferTensor = pickle.load(fTens) + logger.warning(f"Loaded transfer tensor for fakes: {trTensorPath}") + with lz4.frame.open(trTensorPath) as fTens: + resultDict = pickle.load(fTens) + self.fakeTransferTensor = resultDict["fakeCorr"] if self.smoothing_mode in ["binned"]: # rebinning doesn't make sense for binned estimation From ea0dfe2434f691814932ba7b501b3366da6646ac Mon Sep 17 00:00:00 2001 From: Marco Cipriani Date: Fri, 26 Dec 2025 17:52:13 +0100 Subject: [PATCH 18/24] updates --- scripts/plotting/makeDataMCStackPlot.py | 15 ++++++++++++++- scripts/rabbit/setupRabbit.py | 2 ++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/scripts/plotting/makeDataMCStackPlot.py b/scripts/plotting/makeDataMCStackPlot.py index 1d30bc25b..3e2fba60a 100644 --- a/scripts/plotting/makeDataMCStackPlot.py +++ b/scripts/plotting/makeDataMCStackPlot.py @@ -178,6 +178,16 @@ Axis where the fake prediction on non-valid bins (i.e. where the A-Ax-B-Bx regions are empty) is estimated by using the other 'valid' bins of this axis, via a normalization or shape reweighting.""", ) +parser.add_argument( + "--fakeTransferCorrFileName", + type=str, + default="fakeTransferTemplates", + help=""" + Name of pkl.lz4 file (without extension) with pTmu correction for the shape of data-driven fakes. + Currently used only when utAngleSign is a fakerate axis (detected automatically), since the shape + at negative uTmu must be taken from positive bin, but a shape correction is needed versus pTmu. + """, +) parser.add_argument( "--fineGroups", action="store_true", @@ -386,7 +396,10 @@ def padArray(ref, matchLength): applySelection = True groups.fakerate_axes = args.fakerateAxes -groups.fakeTransferAxis = args.fakeTransferAxis +groups.fakeTransferAxis = ( + args.fakeTransferAxis if "utAngleSign" in args.fakerateAxes else "" +) +groups.fakeTransferCorrFileName = args.fakeTransferCorrFileName if applySelection: groups.set_histselectors( datasets, diff --git a/scripts/rabbit/setupRabbit.py b/scripts/rabbit/setupRabbit.py index 093e6b850..9cbd1ee3a 100644 --- a/scripts/rabbit/setupRabbit.py +++ b/scripts/rabbit/setupRabbit.py @@ -367,7 +367,9 @@ def make_parser(parser=None): choices=[ "run", "phi", + "utAngleSign", "nRecoVtx", + # variables above, systematics below "prefire", "effi", "lumi", From 2fc7884e8bb8ddb608d0e662e379ec22eec23684 Mon Sep 17 00:00:00 2001 From: Marco Cipriani Date: Sun, 28 Dec 2025 18:53:22 +0100 Subject: [PATCH 19/24] set noi=False for polynomial variations --- wremnants/combine_theoryAgnostic_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wremnants/combine_theoryAgnostic_helper.py b/wremnants/combine_theoryAgnostic_helper.py index 0e588d170..b7138bcdd 100644 --- a/wremnants/combine_theoryAgnostic_helper.py +++ b/wremnants/combine_theoryAgnostic_helper.py @@ -61,7 +61,7 @@ def add_theoryAgnostic_polVar_uncertainty(self): systAxes=["nPolVarSyst", "downUpVar"], labelsByAxis=["v", "downUpVar"], # splitGroup={f"{groupName}_{coeffKey}" : f"{groupName}_{coeffKey}"}, - noi=True, + # noi=True, ) def add_muRmuF_polVar_uncertainty(self): From 811a7b0dd52ece1012b5e51661028e6bd217edf6 Mon Sep 17 00:00:00 2001 From: Marco Cipriani Date: Sun, 28 Dec 2025 18:54:56 +0100 Subject: [PATCH 20/24] fixes and updates to fit uT^mu --- .../w_mass_13TeV/makeEfficiencyCorrection.py | 24 ++++++++++--- scripts/histmakers/mz_wlike_with_mu_eta_pt.py | 35 ++++++++++++++++--- scripts/plotting/plot_decorr_params.py | 8 +++++ 3 files changed, 58 insertions(+), 9 deletions(-) diff --git a/scripts/analysisTools/w_mass_13TeV/makeEfficiencyCorrection.py b/scripts/analysisTools/w_mass_13TeV/makeEfficiencyCorrection.py index 2d906bcc8..05a1917dd 100644 --- a/scripts/analysisTools/w_mass_13TeV/makeEfficiencyCorrection.py +++ b/scripts/analysisTools/w_mass_13TeV/makeEfficiencyCorrection.py @@ -58,7 +58,7 @@ "--corrvar", type=str, default="run", - choices=["run", "phi"], + choices=["run", "phi", "utAngleSign"], help="Variable to be used for the correction", ) parser.add_argument( @@ -162,6 +162,11 @@ f"{round(corrEdges[i], 2)} < #phi^{{#mu}} < {round(corrEdges[i+1], 2)}" for i in range(nCorrBins) ] + elif args.corrvar == "utAngleSign": + legEntries = [ + "#it{u}_{T}^{#mu} < 0", + "#it{u}_{T}^{#mu} > 0", + ] else: legEntries = [f"{corrvar} bin {i}" for i in range(nCorrBins)] @@ -209,9 +214,20 @@ f"{corrvar}-eta ID = {i} {j}: {round(corr,3)} +/- {round(corr_unc,3)}" ) - title_var = "Run" if corrvar == "run" else "#phi" if corrvar == "phi" else "CorrVar" - - effRange = "0.85,1.05" if corrvar == "run" else "0.80,1.1" + titles = { + "run": "Run", + "phi": "#phi^{#mu}", + "utAngleSign": "sign(#it{u}_{T}^{#mu})", + } + title_var = "CorrVar" + if corrvar in titles.keys(): + title_var = titles[corrvar] + + effrange = "0.80,1.1" + if corrvar == "run": + effRange = "0.85,1.05" + elif corrvar == "utAngleSign": + effRange = "0.8,1.2" for d in datasets: drawNTH1( diff --git a/scripts/histmakers/mz_wlike_with_mu_eta_pt.py b/scripts/histmakers/mz_wlike_with_mu_eta_pt.py index f39abb35e..2ebf0840d 100644 --- a/scripts/histmakers/mz_wlike_with_mu_eta_pt.py +++ b/scripts/histmakers/mz_wlike_with_mu_eta_pt.py @@ -84,6 +84,11 @@ action="store_true", help="Flip even with odd event numbers to consider the positive or negative muon as the W-like muon", ) +parser.add_argument( + "--addAxisSignUt", + action="store_true", + help="Add another fit axis with the sign of the uT recoil projection", +) parser.add_argument( "--useTnpMuonVarForSF", action="store_true", @@ -214,9 +219,21 @@ nominal_axes = [axis_eta, axis_pt, common.axis_charge] nominal_cols = ["trigMuons_eta0", "trigMuons_pt0", "trigMuons_charge0"] +axis_ut_analysis = hist.axis.Regular( + 2, -2, 2, underflow=False, overflow=False, name="utAngleSign" +) # used only to separate positive/negative uT for now +if args.addAxisSignUt: + nominal_axes.append(axis_ut_analysis) + nominal_cols.append( + "nonTrigMuons_angleSignUt0" + if args.fillHistNonTrig + else "trigMuons_angleSignUt0" + ) + # for isoMt region validation and related tests # use very high upper edge as a proxy for infinity (cannot exploit overflow bins in the fit) # can probably use numpy infinity, but this is compatible with the root conversion +# FIXME: now we can probably use overflow bins in the fit axis_mtCat = hist.axis.Variable( [0, int(args.mtCut / 2.0), args.mtCut, 1000], name="mt", @@ -282,7 +299,6 @@ # axes for mT measurement axis_mt = hist.axis.Regular(200, 0.0, 200.0, name="mt", underflow=False, overflow=True) -axis_eta_mT = hist.axis.Variable([-2.4, 2.4], name="eta") # define helpers muon_prefiring_helper, muon_prefiring_helper_stat, muon_prefiring_helper_syst = ( @@ -947,10 +963,12 @@ def build_graph(df, dataset): ########### # utility plots of transverse mass, with or without recoil corrections ########### + muonKey = "nonTrigMuons" if args.fillHistNonTrig else "trigMuons" + muonAntiKey = "trigMuons" if args.fillHistNonTrig else "nonTrigMuons" met_vars = ("MET_pt", "MET_phi") df = df.Define( "transverseMass_uncorr", - f"wrem::get_mt_wlike(trigMuons_pt0, trigMuons_phi0, nonTrigMuons_pt0, nonTrigMuons_phi0, {', '.join(met_vars)})", + f"wrem::get_mt_wlike({muonKey}_pt0, {muonKey}_phi0, {muonAntiKey}_pt0, {muonAntiKey}_phi0, {', '.join(met_vars)})", ) results.append( df.HistoBoost( @@ -963,11 +981,11 @@ def build_graph(df, dataset): met_vars = ("MET_corr_rec_pt", "MET_corr_rec_phi") df = df.Define( "met_wlike_TV2", - f"wrem::get_met_wlike(nonTrigMuons_pt0, nonTrigMuons_phi0, {', '.join(met_vars)})", + f"wrem::get_met_wlike({muonAntiKey}_pt0, {muonAntiKey}_phi0, {', '.join(met_vars)})", ) df = df.Define( "transverseMass", - "wrem::get_mt_wlike(trigMuons_pt0, trigMuons_phi0, met_wlike_TV2)", + f"wrem::get_mt_wlike({muonKey}_pt0, {muonKey}_phi0, met_wlike_TV2)", ) results.append( df.HistoBoost("transverseMass", [axis_mt], ["transverseMass", "nominal_weight"]) @@ -987,6 +1005,13 @@ def build_graph(df, dataset): ["met_wlike_TV2_pt", "nominal_weight"], ) ) + if args.addAxisSignUt: + # use Wlike met instead of only the second lepton, for consistency with the W, + # also because the relevant quantity should be the recoil rather than the boson + df = df.Define( + f"{muonKey}_angleSignUt0", + f"wrem::zqtproj0_angleSign({muonKey}_pt0, {muonKey}_phi0, met_wlike_TV2_pt, met_wlike_TV2.Phi())", + ) ########### df = df.Define("passWlikeMT", f"transverseMass >= {mtw_min}") @@ -1117,7 +1142,7 @@ def build_graph(df, dataset): 6, 0, 2.4, name="abseta", overflow=False, underflow=False ) cols_vertexZstudy = [ - "trigMuons_eta0", + "trigMuons_abseta0", "trigMuons_passIso0", "passWlikeMT", "absDiffGenRecoVtx_z", diff --git a/scripts/plotting/plot_decorr_params.py b/scripts/plotting/plot_decorr_params.py index 32564cc88..879bbacc2 100644 --- a/scripts/plotting/plot_decorr_params.py +++ b/scripts/plotting/plot_decorr_params.py @@ -370,6 +370,14 @@ def get_values_and_impacts_as_panda( df_p["yticks"] = ( df_p["phi"].apply(lambda x: str(axis_ranges[x])).astype(str) ) + elif "utAngleSign" in axes: + axis_ranges = { + 0: rf"$\mathit{{u}}_{{T}}^{{\mu}}$ < 0", + 1: rf"$\mathit{{u}}_{{T}}^{{\mu}}$ > 0", + } + df_p["yticks"] = ( + df_p["utAngleSign"].apply(lambda x: str(axis_ranges[x])).astype(str) + ) elif "nRecoVtx" in axes: nRecoVtxBins = df.shape[0] if nRecoVtxBins == 5: From ba08ba70d60d5d134f1a01801bed0871b108607f Mon Sep 17 00:00:00 2001 From: Marco Cipriani Date: Sun, 28 Dec 2025 18:57:19 +0100 Subject: [PATCH 21/24] foo --- scripts/analysisTools/w_mass_13TeV/makeEfficiencyCorrection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/analysisTools/w_mass_13TeV/makeEfficiencyCorrection.py b/scripts/analysisTools/w_mass_13TeV/makeEfficiencyCorrection.py index 05a1917dd..0ed014065 100644 --- a/scripts/analysisTools/w_mass_13TeV/makeEfficiencyCorrection.py +++ b/scripts/analysisTools/w_mass_13TeV/makeEfficiencyCorrection.py @@ -180,7 +180,7 @@ sign = "minus" if args.charge < 0 else "plus" chargePostfix = f"_{sign}" - # create a TH2 and convrt into boost again to save it + # create a TH2 and convert into boost again to save it nx = heffiroot["Data"][0].GetNbinsX() xmin = heffiroot["Data"][0].GetXaxis().GetBinLowEdge(1) xmax = heffiroot["Data"][0].GetXaxis().GetBinLowEdge(1 + nx) From 4d1e7b505d8bddae577d3a77acfa0a72aba005e2 Mon Sep 17 00:00:00 2001 From: Marco Cipriani Date: Wed, 31 Dec 2025 17:28:18 +0100 Subject: [PATCH 22/24] fixes --- scripts/plotting/makeDataMCStackPlot.py | 6 ++++-- scripts/rabbit/setupRabbit.py | 19 +++++++++++++++---- wremnants/datasets/datagroups.py | 7 +++++++ wremnants/datasets/datasetDict_v9.py | 1 + wremnants/histselections.py | 16 +++++++++++++++- 5 files changed, 42 insertions(+), 7 deletions(-) diff --git a/scripts/plotting/makeDataMCStackPlot.py b/scripts/plotting/makeDataMCStackPlot.py index 3e2fba60a..880379ae3 100644 --- a/scripts/plotting/makeDataMCStackPlot.py +++ b/scripts/plotting/makeDataMCStackPlot.py @@ -386,9 +386,10 @@ def padArray(ref, matchLength): select[axis] = int(value) applySelection = ( False - if any( # does not trigger ABCD fake estimation if a cut is applied on the variables used to define ABCD regions + # do not trigger ABCD method if a cut is applied on the variables defining the ABCD regions + if any( abcd_var in [cut_str.split("=")[0] for cut_str in args.selection.split(",")] - for abcd_var in ["relIso", "mt"] + for abcd_var in ["relIso", "mt", "passMT", "passIso"] ) else True ) @@ -400,6 +401,7 @@ def padArray(ref, matchLength): args.fakeTransferAxis if "utAngleSign" in args.fakerateAxes else "" ) groups.fakeTransferCorrFileName = args.fakeTransferCorrFileName +groups.histAxesRemovedBeforeFakes = [str(x.split("=")[0]) for x in args.presel] if applySelection: groups.set_histselectors( datasets, diff --git a/scripts/rabbit/setupRabbit.py b/scripts/rabbit/setupRabbit.py index 9cbd1ee3a..8246f63f0 100644 --- a/scripts/rabbit/setupRabbit.py +++ b/scripts/rabbit/setupRabbit.py @@ -994,7 +994,10 @@ def setup( if massConstraintMode == "automatic": constrainMass = ( - "xsec" in args.noi or (dilepton and not "mll" in fitvar) or genfit + "xsec" in args.noi + or (dilepton and not "mll" in fitvar) + or genfit + or (wmass and "wmass" not in args.noi) ) else: constrainMass = True if massConstraintMode == "constrained" else False @@ -1177,6 +1180,9 @@ def setup( "utAngleSign" if "utAngleSign" in args.fakerateAxes else "" ), fakeTransferCorrFileName=args.fakeTransferCorrFileName, + histAxesRemovedBeforeFakes=( + [str(x.split()[0]) for x in args.selection] if args.selection else [] + ), ) datagroups.set_histselectors( datagroups.getNames(), inputBaseName, **histselector_kwargs @@ -1613,15 +1619,20 @@ def setup( if wmass and ("wwidth" in args.noi or (not stat_only and not args.noTheoryUnc)): width_info = dict( - name="WidthW0p6MeV", + # 42, 'widthW2p043GeV', 'widthW2p127GeV' + # 0p6, 'widthW2p09053GeV', 'widthW2p09173GeV' + # name="WidthW0p6MeV", + name="WidthW42MeV", processes=signal_samples_forMass, groups=["widthW", "theory"], mirror=False, noi="wwidth" in args.noi, noConstraint="wwidth" in args.noi, - skipEntries=widthWeightNames(proc="W", exclude=(2.09053, 2.09173)), + # skipEntries=widthWeightNames(proc="W", exclude=(2.09053, 2.09173)), + skipEntries=widthWeightNames(proc="W", exclude=(2.043, 2.127)), systAxes=["width"], - systNameReplace=[["2p09053GeV", "Down"], ["2p09173GeV", "Up"]], + # systNameReplace=[["2p09053GeV", "Down"], ["2p09173GeV", "Up"]], + systNameReplace=[["2p043GeV", "Down"], ["2p127GeV", "Up"]], passToFakes=passSystToFakes, ) widthWeightName = f"widthWeight{label}" diff --git a/wremnants/datasets/datagroups.py b/wremnants/datasets/datagroups.py index 94018f406..91c8ba04c 100644 --- a/wremnants/datasets/datagroups.py +++ b/wremnants/datasets/datagroups.py @@ -81,6 +81,13 @@ def __init__(self, infile, mode=None, xnorm=False, **kwargs): self.fakerate_axes = ["pt", "eta", "charge"] self.fakeTransferAxis = "utAngleSign" self.fakeTransferCorrFileName = "fakeTransferTemplates" + # in case some fit axes are integrated out, to make full smoothing work, + # we need to warn the class that these don't belong to fit axes nor fakerate axes, + # but are also not "integration axes" because they are removed before entering + # the fake estimate, this can happen for example + # with option --select in setupRabbit.py or --presel in makeDataMCStackPlot.py, + # which slice and remove the axis + self.histAxesRemovedBeforeFakes = [] self.setGenAxes() diff --git a/wremnants/datasets/datasetDict_v9.py b/wremnants/datasets/datasetDict_v9.py index ba178746c..0838e35ca 100644 --- a/wremnants/datasets/datasetDict_v9.py +++ b/wremnants/datasets/datasetDict_v9.py @@ -28,6 +28,7 @@ "DYJetsToMuMuMass10to50PostVFP": { "filepaths": [ "{BASE_PATH}/DYJetsToMuMu_M-10to50_H2ErratumFix_TuneCP5_13TeV-powhegMiNNLO-pythia8-photos/NanoV9MCPostVFP_{NANO_PROD_TAG}", + # "{BASE_PATH}/DYJetsToMuMu_M-10to50_H2ErratumFix_TuneCP5_13TeV-powhegMiNNLO-pythia8-photos_ext1/NanoV9MCPostVFP_{NANO_PROD_TAG}", ], "xsec": common.xsec_DYJetsToMuMuMass10to50, "group": "DYlowMass", diff --git a/wremnants/histselections.py b/wremnants/histselections.py index 90b86cb23..0c7336b61 100644 --- a/wremnants/histselections.py +++ b/wremnants/histselections.py @@ -128,6 +128,11 @@ def __init__( fakerate_axes=["eta", "pt", "charge"], fakeTransferAxis="", fakeTransferCorrFileName="fakeTransferTemplates", # only with fakeTransferAxis + # histAxesRemovedBeforeFakes is needed when some axes are in the histograms, + # but are not supposed to be fit or used as fakerate axes, and are sliced/integrated and removed + # before computing the fakes, so they are technically not "fake integration axes", which is + # needed to make the full smoothing work (integration axes within the method are not implemented) + histAxesRemovedBeforeFakes=[], smoothing_axis_name="pt", rebin_smoothing_axis="automatic", # can be a list of bin edges, "automatic", or None upper_bound_y=None, # using an upper bound on the abcd y-axis (e.g. isolation) @@ -202,6 +207,7 @@ def __init__( self.set_selections_y() self.fakeTransferAxis = fakeTransferAxis self.fakeTransferCorrFileName = fakeTransferCorrFileName + self.histAxesRemovedBeforeFakes = histAxesRemovedBeforeFakes if fakerate_axes is not None: self.fakerate_axes = fakerate_axes # list of axes names where to perform independent fakerate computation @@ -209,11 +215,19 @@ def __init__( n for n in h.axes.name if n - not in [self.name_x, self.name_y, *fakerate_axes, self.fakeTransferAxis] + not in [ + self.name_x, + self.name_y, + *fakerate_axes, + *histAxesRemovedBeforeFakes, + ] ] logger.debug( f"Setting fakerate integration axes to {self.fakerate_integration_axes}" ) + logger.debug(f"histAxesRemovedBeforeFakes = {histAxesRemovedBeforeFakes}") + logger.debug(f"fakerate_integration_axes = {self.fakerate_integration_axes}") + self.smoothing_axis_name = smoothing_axis_name edges = h.axes[smoothing_axis_name].edges if rebin_smoothing_axis == "automatic": From ea60c341f33d7fb23fe4f95615eaa844c9be5ebc Mon Sep 17 00:00:00 2001 From: Marco Cipriani Date: Tue, 6 Jan 2026 18:46:02 +0100 Subject: [PATCH 23/24] fixes --- scripts/plotting/makeDataMCStackPlot.py | 26 +++++++++++++++++-------- scripts/rabbit/setupRabbit.py | 5 ++++- utilities/parsing.py | 5 +++++ utilities/styles/styles.py | 19 ++++++++++++++++-- wremnants/histselections.py | 5 ++++- 5 files changed, 48 insertions(+), 12 deletions(-) diff --git a/scripts/plotting/makeDataMCStackPlot.py b/scripts/plotting/makeDataMCStackPlot.py index 880379ae3..94f64a9f2 100644 --- a/scripts/plotting/makeDataMCStackPlot.py +++ b/scripts/plotting/makeDataMCStackPlot.py @@ -335,12 +335,12 @@ def padArray(ref, matchLength): if "=" in ps: axName, axRange = ps.split("=") if "," in ps: - # axMin, axMax = map(float, axRange.split(",")) axMin, axMax = [ - float(p) if p != str() else None for p in axRange.split(",") + complex(0, float(p)) if p != str() else None + for p in axRange.split(",") ] logger.info(f"{axName} in [{axMin},{axMax}]") - presel[axName] = s[complex(0, axMin) : complex(0, axMax) : hist.sum] + presel[axName] = s[axMin : axMax : hist.sum] else: logger.info(f"Selecting {axName} {axRange.split('.')[1]}") if axRange == "hist.overflow": @@ -397,11 +397,13 @@ def padArray(ref, matchLength): applySelection = True groups.fakerate_axes = args.fakerateAxes -groups.fakeTransferAxis = ( - args.fakeTransferAxis if "utAngleSign" in args.fakerateAxes else "" +histselector_kwargs = dict( + fakeTransferAxis=( + args.fakeTransferAxis if args.fakeTransferAxis in args.fakerateAxes else "" + ), + fakeTransferCorrFileName=args.fakeTransferCorrFileName, + histAxesRemovedBeforeFakes=[str(x.split("=")[0]) for x in args.presel], ) -groups.fakeTransferCorrFileName = args.fakeTransferCorrFileName -groups.histAxesRemovedBeforeFakes = [str(x.split("=")[0]) for x in args.presel] if applySelection: groups.set_histselectors( datasets, @@ -413,6 +415,7 @@ def padArray(ref, matchLength): mode=args.fakeEstimation, forceGlobalScaleFakes=args.forceGlobalScaleFakes, mcCorr=args.fakeMCCorr, + **histselector_kwargs, ) if not args.nominalRef: @@ -568,6 +571,9 @@ def collapseSyst(h): else: binwnorm = None ylabel = r"$Events\,/\,bin$" + if args.noBinWidthNorm: + binwnorm = None + ylabel = r"$Events\,/\,bin$" if args.rlabel is None: if args.noData: @@ -632,7 +638,11 @@ def collapseSyst(h): normalize_to_data=args.normToData, noSci=args.noSciy, logoPos=args.logoPos, - width_scale=1.25 if len(h.split("-")) == 1 else 1, + width_scale=( + args.customFigureWidth + if args.customFigureWidth + else 1.25 if len(h.split("-")) == 1 else 1 + ), legPos=args.legPos, leg_padding=args.legPadding, lowerLeg=not args.noLowerLeg, diff --git a/scripts/rabbit/setupRabbit.py b/scripts/rabbit/setupRabbit.py index 8246f63f0..2b0ccf3ef 100644 --- a/scripts/rabbit/setupRabbit.py +++ b/scripts/rabbit/setupRabbit.py @@ -875,7 +875,7 @@ def make_parser(parser=None): parser.add_argument( "--breitwignerWMassWeights", action="store_true", - help="Use the Breit-Wigner mass wights for mW.", + help="Use the Breit-Wigner mass weights for mW.", ) parser = make_subparsers(parser) @@ -1623,6 +1623,7 @@ def setup( # 0p6, 'widthW2p09053GeV', 'widthW2p09173GeV' # name="WidthW0p6MeV", name="WidthW42MeV", + # name="WidthWm6p36MeV", processes=signal_samples_forMass, groups=["widthW", "theory"], mirror=False, @@ -1630,9 +1631,11 @@ def setup( noConstraint="wwidth" in args.noi, # skipEntries=widthWeightNames(proc="W", exclude=(2.09053, 2.09173)), skipEntries=widthWeightNames(proc="W", exclude=(2.043, 2.127)), + # skipEntries=widthWeightNames(proc="W", exclude=(2.085, 2.127)), systAxes=["width"], # systNameReplace=[["2p09053GeV", "Down"], ["2p09173GeV", "Up"]], systNameReplace=[["2p043GeV", "Down"], ["2p127GeV", "Up"]], + # systNameReplace=[["2p085GeV", "Down"], ["2p127GeV", "Up"]], passToFakes=passSystToFakes, ) widthWeightName = f"widthWeight{label}" diff --git a/utilities/parsing.py b/utilities/parsing.py index 5918d74b4..f1a716f92 100644 --- a/utilities/parsing.py +++ b/utilities/parsing.py @@ -916,5 +916,10 @@ def plot_parser(): default=None, help="Use a custom figure width, otherwise chosen automatic", ) + parser.add_argument( + "--noBinWidthNorm", + action="store_true", + help="Do not normalize bin yields by bin width", + ) return parser diff --git a/utilities/styles/styles.py b/utilities/styles/styles.py index 51306c48d..d5baca537 100644 --- a/utilities/styles/styles.py +++ b/utilities/styles/styles.py @@ -300,6 +300,8 @@ def translate_html_to_latex(n): "angularCoeffs_A6", "angularCoeffs_A7", "pdfCT18Z", + "pdfCT18ZNoAlphaS", + "pdfCT18ZAlphaS", "pTModeling", "muon_eff_syst", "muon_eff_stat", @@ -343,6 +345,18 @@ def translate_html_to_latex(n): "normZ_Helicity3", "normZ_Helicity4", ], + "width": common_groups + + [ + "angularCoeffs", + "pdfCT18Z", + "pTModeling", + "muon_eff_syst", + "muon_eff_stat", + "prefire", + "muonCalibration", + "Fake", + "massShift", + ], "min": common_groups + [ "massShiftW", @@ -525,12 +539,13 @@ def translate_html_to_latex(n): "FakeeLowMT": "FakeLowMT", "massShiftZ": "Z boson mass", "massShiftW": "W boson mass", + "massShift": "W boson mass", "pdfMSHT20": "PDF", "pdfCT18Z": "PDF", - "pdfMSHT20NoAlphaS": "PDF", + "pdfMSHT20NoAlphaS": "PDF no αS", "pdfMSHT20AlphaS": "αS PDF", "pdfCT18ZAlphaS": "αS PDF", - "pdfCT18ZNoAlphaS": "PDF", + "pdfCT18ZNoAlphaS": "PDF no αS", "pTModeling": "pTV modelling", "resum": "Resummation", "resumTNP": "Non pert. trans.", diff --git a/wremnants/histselections.py b/wremnants/histselections.py index 0c7336b61..c2a6ec9e1 100644 --- a/wremnants/histselections.py +++ b/wremnants/histselections.py @@ -471,7 +471,10 @@ def __init__( self.rebin_smoothing_axis = None if hasattr(self, "fakerate_integration_axes"): - if smoothing_mode == "full" and self.fakerate_integration_axes: + logger.warning( + f"self.fakerate_integration_axes = {self.fakerate_integration_axes}" + ) + if smoothing_mode == "full" and len(self.fakerate_integration_axes): raise NotImplementedError( "Smoothing of full fake prediction is not currently supported together with integration axes." ) From 1cd1a13ae6a39008fcde8ec855206c6ee63eab64 Mon Sep 17 00:00:00 2001 From: Marco Cipriani Date: Wed, 7 Jan 2026 19:50:59 +0100 Subject: [PATCH 24/24] set default W width variation to original 0.6 MeV --- scripts/rabbit/setupRabbit.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/rabbit/setupRabbit.py b/scripts/rabbit/setupRabbit.py index 2b0ccf3ef..281423d76 100644 --- a/scripts/rabbit/setupRabbit.py +++ b/scripts/rabbit/setupRabbit.py @@ -1621,20 +1621,20 @@ def setup( width_info = dict( # 42, 'widthW2p043GeV', 'widthW2p127GeV' # 0p6, 'widthW2p09053GeV', 'widthW2p09173GeV' - # name="WidthW0p6MeV", - name="WidthW42MeV", + name="WidthW0p6MeV", + # name="WidthW42MeV", # name="WidthWm6p36MeV", processes=signal_samples_forMass, groups=["widthW", "theory"], mirror=False, noi="wwidth" in args.noi, noConstraint="wwidth" in args.noi, - # skipEntries=widthWeightNames(proc="W", exclude=(2.09053, 2.09173)), - skipEntries=widthWeightNames(proc="W", exclude=(2.043, 2.127)), + skipEntries=widthWeightNames(proc="W", exclude=(2.09053, 2.09173)), + # skipEntries=widthWeightNames(proc="W", exclude=(2.043, 2.127)), # skipEntries=widthWeightNames(proc="W", exclude=(2.085, 2.127)), systAxes=["width"], - # systNameReplace=[["2p09053GeV", "Down"], ["2p09173GeV", "Up"]], - systNameReplace=[["2p043GeV", "Down"], ["2p127GeV", "Up"]], + systNameReplace=[["2p09053GeV", "Down"], ["2p09173GeV", "Up"]], + # systNameReplace=[["2p043GeV", "Down"], ["2p127GeV", "Up"]], # systNameReplace=[["2p085GeV", "Down"], ["2p127GeV", "Up"]], passToFakes=passSystToFakes, )