diff --git a/src/HHbbVV/postprocessing/PostProcessRes.ipynb b/src/HHbbVV/postprocessing/PostProcessRes.ipynb index 627e8ab9..22743c16 100644 --- a/src/HHbbVV/postprocessing/PostProcessRes.ipynb +++ b/src/HHbbVV/postprocessing/PostProcessRes.ipynb @@ -12,7 +12,7 @@ "import corrections\n", "\n", "from utils import CUT_MAX_VAL, ShapeVar\n", - "from hh_vars import (\n", + "from HHbbVV.hh_vars import (\n", " years,\n", " data_key,\n", " qcd_key,\n", @@ -92,6 +92,7 @@ "source": [ "MAIN_DIR = Path(\"../../../\")\n", "samples_dir = MAIN_DIR / \"../data/skimmer/24Feb25_update_skimmer\"\n", + "sig_samples_dir = MAIN_DIR / \"../data/skimmer/24Mar5_update_lp\"\n", "# samples_dir = f\"{MAIN_DIR}/../data/skimmer/Feb24\"\n", "# nonres_signal_samples_dir = f\"{MAIN_DIR}/../data/skimmer/Jun10\"\n", "# res_signal_samples_dir = f\"{MAIN_DIR}/../data/skimmer/Apr11\"\n", @@ -100,7 +101,7 @@ "# res_signal_samples_dir = \"/eos/uscms/store/user/rkansal/bbVV/skimmer/Apr11/\"\n", "year = \"2017\"\n", "\n", - "date = \"24Mar2\"\n", + "date = \"24Mar5_update_lp\"\n", "plot_dir = MAIN_DIR / f\"plots/PostProcessing/{date}/\"\n", "templates_dir = Path(f\"templates/{date}/\")\n", "\n", @@ -133,22 +134,34 @@ " index=list(samples.keys()) + list(nonres_samples.keys()) + list(res_samples.keys())\n", ")\n", "\n", - "# hem cleaning in load_samples not implemented yet for res samples\n", - "hem_cleaning = True\n", - "\n", - "# utils.remove_empty_parquets(samples_dir, year)\n", "events_dict = postprocessing.load_samples(\n", + " sig_samples_dir,\n", + " {**nonres_samples, **res_samples},\n", + " year,\n", + " new_filters,\n", + ")\n", + "\n", + "events_dict |= postprocessing.load_samples(\n", " samples_dir,\n", - " {**nonres_samples, **res_samples, **samples},\n", + " samples,\n", " year,\n", " new_filters,\n", - " hem_cleaning=hem_cleaning,\n", ")\n", "\n", "utils.add_to_cutflow(events_dict, \"Preselection\", \"finalWeight\", cutflow)\n", "cutflow" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "events = pd.read_parquet(f\"{sig_samples_dir}/{year}/GluGluToHHTobbVV_node_cHHH1/parquet\")\n", + "events" + ] + }, { "attachments": {}, "cell_type": "markdown", @@ -226,7 +239,7 @@ " bb_masks,\n", " nonres_sig_keys + res_sig_keys,\n", " control_plot_vars,\n", - " f\"{plot_dir}/ControlPlots/{year}/\",\n", + " plot_dir / f\"ControlPlots/{year}\",\n", " year,\n", " bg_keys=bg_keys,\n", " sig_scale_dict={\"HHbbVV\": 1e5, \"VBFHHbbVV\": 2e6} | {key: 2e4 for key in res_sig_keys},\n", @@ -243,6 +256,32 @@ "Overall LP SF" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from postprocessing import Region, nonres_shape_vars\n", + "\n", + "# temp region to check systematics\n", + "selection_regions = {\n", + " \"pass\": Region(\n", + " cuts={\n", + " \"bbFatJetParticleNetMD_Txbb\": [0.97, CUT_MAX_VAL],\n", + " \"VVFatJetParTMD_THWWvsT\": [0.8, CUT_MAX_VAL],\n", + " },\n", + " label=\"Pass\",\n", + " ),\n", + " \"lpsf\": Region(\n", + " cuts={\n", + " \"VVFatJetParTMD_THWWvsT\": [0.8, CUT_MAX_VAL],\n", + " },\n", + " label=\"LP SF\",\n", + " ),\n", + "}" + ] + }, { "cell_type": "code", "execution_count": null, @@ -311,31 +350,12 @@ "metadata": {}, "outputs": [], "source": [ - "from postprocessing import Region, nonres_shape_vars\n", - "\n", - "# temp region to check systematics\n", - "selection_regions = {\n", - " \"pass\": Region(\n", - " cuts={\n", - " \"bbFatJetParticleNetMD_Txbb\": [0.97, CUT_MAX_VAL],\n", - " \"VVFatJetParTMD_THWWvsT\": [0.8, CUT_MAX_VAL],\n", - " },\n", - " label=\"Pass\",\n", - " )\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "h = postprocessing.get_templates(\n", + "ht = postprocessing.get_templates(\n", " events_dict,\n", " bb_masks,\n", " year,\n", - " nonres_sig_keys + res_sig_keys,\n", + " [\"HHbbVV\"],\n", + " # nonres_sig_keys + res_sig_keys,\n", " # res_sig_keys,\n", " selection_regions,\n", " # res_shape_vars[:1],\n", @@ -343,13 +363,13 @@ " systematics,\n", " templates_dir,\n", " # bg_keys=[\"QCD\", \"TT\", \"V+Jets\", \"Diboson\", \"Hbb\"],\n", - " plot_dir=f\"{plot_dir}/templates/\",\n", + " plot_dir=plot_dir / \"templates\",\n", " prev_cutflow=cutflow,\n", " sig_scale_dict={\"HHbbVV\": 1e3, \"VBFHHbbVV\": 1e4} | {key: 1e2 for key in res_sig_keys},\n", " # sig_splits=sig_splits[:2],\n", " weight_shifts={},\n", " jshift=\"\",\n", - " lpsfs=False,\n", + " lpsfs=True,\n", " plot_shifts=False,\n", " pass_ylim=500,\n", " fail_ylim=40000,\n", @@ -379,12 +399,12 @@ " # res_shape_vars,\n", " systematics,\n", " templates_dir,\n", - " plot_dir=f\"{plot_dir}/templates\",\n", + " plot_dir=plot_dir / \"templates\",\n", " prev_cutflow=cutflow,\n", - " sig_scale_dict={\"HHbbVV\": 1e3, \"VBFHHbbVV\": 1e4} | {key: 1e2 for key in res_sig_keys},\n", + " sig_scale_dict={\"HHbbVV\": 1e3, \"VBFHHbbVV\": 2e4} | {key: 1e2 for key in res_sig_keys},\n", " weight_shifts=postprocessing.weight_shifts,\n", " jshift=jshift,\n", - " lpsfs=False,\n", + " lpsfs=True,\n", " pass_ylim=500,\n", " fail_ylim=40000,\n", " # blind_pass=True,\n", @@ -395,33 +415,6 @@ " templates = {**templates, **ttemps}" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ttemps" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.linalg.norm((ttemps[1].values() - ttemps[0])[:, 10]) / " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.linalg.norm([1, 1])" - ] - }, { "cell_type": "code", "execution_count": null, diff --git a/src/HHbbVV/postprocessing/corrections.py b/src/HHbbVV/postprocessing/corrections.py index f78566e0..d1872215 100644 --- a/src/HHbbVV/postprocessing/corrections.py +++ b/src/HHbbVV/postprocessing/corrections.py @@ -29,7 +29,7 @@ def _load_txbb_sfs(year: str): """Create 2D lookup tables in [Txbb, pT] for Txbb SFs from given year""" # https://coli.web.cern.ch/coli/.cms/btv/boohft-calib/20221201_bb_ULNanoV9_PNetXbbVsQCD_ak8_ext_2016APV/4_fit/ - with (package_path / f"/corrections/txbb_sfs/txbb_sf_ul_{year}.json").open() as f: + with (package_path / f"corrections/txbb_sfs/txbb_sf_ul_{year}.json").open() as f: txbb_sf = json.load(f) wps = ["LP", "MP", "HP"] @@ -81,9 +81,12 @@ def apply_txbb_sfs( events[f"{weight_key}_txbb_{var}"] = events[weight_key] if len(events[weight_key]): - events[weight_key] = events[weight_key] * txbb_sf_lookups[year]["nom"](bb_txbb, bb_pt) - else: - events[weight_key] = events[weight_key] + txbb_nom = txbb_sf_lookups[year]["nom"](bb_txbb, bb_pt) + for wkey in utils.get_all_weights(events): + if len(events[wkey].shape) > 1: + events[wkey] *= txbb_nom[:, np.newaxis] + else: + events[wkey] *= txbb_nom trig_effs = {} @@ -334,7 +337,7 @@ def get_lpsf( # pt extrapolation uncertainty is the std of all pt param variations uncs["sj_pt_unc"] = ( np.std( - np.sum(weight[:, np.newaxis] * events[f"{jet}_lp_pt_extrap_vars"].to_numpy(), axis=0) + np.sum(weight[:, np.newaxis] * events[f"{jet}_lp_sf_pt_extrap_vars"].to_numpy(), axis=0) ) / tot_post ) diff --git a/src/HHbbVV/postprocessing/plotting.py b/src/HHbbVV/postprocessing/plotting.py index a52ed43d..ca863d02 100644 --- a/src/HHbbVV/postprocessing/plotting.py +++ b/src/HHbbVV/postprocessing/plotting.py @@ -232,6 +232,7 @@ def ratioHistPlot( divide_bin_width: bool = False, plot_significance: bool = False, significance_dir: str = "right", + plot_ratio: bool = True, axrax: tuple = None, ): """ @@ -320,10 +321,12 @@ def ratioHistPlot( gridspec_kw={"height_ratios": [3, 1, 1], "hspace": 0}, sharex=True, ) - else: + elif plot_ratio: fig, (ax, rax) = plt.subplots( 2, 1, figsize=(12, 14), gridspec_kw={"height_ratios": [3, 1], "hspace": 0}, sharex=True ) + else: + fig, ax = plt.subplots(1, 1, figsize=(12, 11)) plt.rcParams.update({"font.size": 24}) @@ -385,6 +388,7 @@ def ratioHistPlot( alpha=0.2, hatch="//", linewidth=0, + label="Total Background Uncertainty", ) # plot data @@ -412,24 +416,27 @@ def ratioHistPlot( ax.set_ylim(y_lowlim) # plot ratio below - if plot_data: - bg_tot = sum([pre_divide_hists[sample, :] for sample in bg_keys]) - yerr = ratio_uncertainty(pre_divide_hists[data_key, :].values(), bg_tot.values(), "poisson") + if plot_ratio: + if plot_data: + bg_tot = sum([pre_divide_hists[sample, :] for sample in bg_keys]) + yerr = ratio_uncertainty( + pre_divide_hists[data_key, :].values(), bg_tot.values(), "poisson" + ) - hep.histplot( - pre_divide_hists[data_key, :] / (bg_tot.values() + 1e-5), - yerr=yerr, - ax=rax, - histtype="errorbar", - color="black", - capsize=4, - ) - else: - rax.set_xlabel(hists.axes[1].label) + hep.histplot( + pre_divide_hists[data_key, :] / (bg_tot.values() + 1e-5), + yerr=yerr, + ax=rax, + histtype="errorbar", + color="black", + capsize=4, + ) + else: + rax.set_xlabel(hists.axes[1].label) - rax.set_ylabel("Data/MC") - rax.set_ylim(ratio_ylims) - rax.grid() + rax.set_ylabel("Data/MC") + rax.set_ylim(ratio_ylims) + rax.grid() if plot_significance: bg_tot = sum([pre_divide_hists[sample, :] for sample in bg_keys]).values() diff --git a/src/HHbbVV/postprocessing/postprocessing.py b/src/HHbbVV/postprocessing/postprocessing.py index c7be1ead..cae78b40 100644 --- a/src/HHbbVV/postprocessing/postprocessing.py +++ b/src/HHbbVV/postprocessing/postprocessing.py @@ -1221,7 +1221,7 @@ def get_lpsf_all_years( bb_masks = bb_VV_assignment(events_dict) - if "finalWeight_noTrigEffs" not in events_dict[sig_key]: + if "weight_noTrigEffs" not in events_dict[sig_key]: apply_weights(events_dict, year) derive_variables(events_dict) @@ -1441,7 +1441,7 @@ def control_plots( if HEM2d and year == "2018": hists["HEM2d"] = hists_HEM2d(events_dict, bb_masks, weight_key, selection) - with Path(plot_dir / "hists.pkl", "wb") as f: + with (plot_dir / "hists.pkl").open("wb") as f: pickle.dump(hists, f) if sig_splits is None: @@ -1636,9 +1636,13 @@ def get_templates( # make selection, taking JEC/JMC variations into account sel, cf = utils.make_selection( - region.cuts, events_dict, bb_masks, prev_cutflow=prev_cutflow, jshift=jshift + region.cuts, + events_dict, + bb_masks, + prev_cutflow=prev_cutflow, + jshift=jshift, + weight_key=weight_key, ) - # print(cf) if template_dir != "": cf.to_csv(f"{template_dir}/cutflows/{year}/{rname}_cutflow{jlabel}.csv") @@ -1658,9 +1662,9 @@ def get_templates( sig_bb_mask = bb_masks[sig_key][sel[sig_key]] if pass_region: - # scale signal by LP SF + # scale all signal weights by LP SF if lpsfs: - for wkey in [weight_key, f"{weight_key}_noTrigEffs"]: + for wkey in utils.get_all_weights(sig_events[sig_key]): sig_events[sig_key][wkey] *= systematics[sig_key]["lp_sf"] corrections.apply_txbb_sfs(sig_events[sig_key], sig_bb_mask, year, weight_key) @@ -1672,7 +1676,7 @@ def get_templates( hist_samples = list(events_dict.keys()) if not do_jshift: - # set up weight-based variations + # add all weight-based variations to histogram axis for shift in ["down", "up"]: if pass_region: for sig_key in sig_keys: @@ -1730,7 +1734,7 @@ def get_templates( nom_vals = h[sample, :].values() abs_unc = np.linalg.norm( (whists.values() - nom_vals), axis=0 - ) / np.sqrt(103) + ) # / np.sqrt(103) # cap at 100% uncertainty rel_unc = np.clip(abs_unc / nom_vals, 0, 1) shape_up = nom_vals * (1 + rel_unc) @@ -1802,7 +1806,6 @@ def get_templates( plot_params = { "hists": h.project(0, i + 1), "sig_keys": p_sig_keys, - "bg_keys": p_bg_keys, "sig_scale_dict": ( {key: sig_scale_dict[key] for key in p_sig_keys} if pass_region @@ -1822,6 +1825,7 @@ def get_templates( plotting.ratioHistPlot( **plot_params, + bg_keys=bg_keys, title=title, name=f"{plot_name}{jlabel}.pdf", ) @@ -1834,6 +1838,7 @@ def get_templates( for wshift, wsyst in weight_shifts.items(): plotting.ratioHistPlot( **plot_params, + bg_keys=bg_keys, syst=(wshift, wsyst.samples), title=f"{region.label} Region {wsyst.label} Unc.", name=f"{plot_name}_{wshift}.pdf", @@ -1842,15 +1847,18 @@ def get_templates( for skey, shift in [("Down", "down"), ("Up", "up")]: plotting.ratioHistPlot( **plot_params, + bg_keys=p_bg_keys, # don't plot QCD syst=(wshift, wsyst.samples), variation=shift, title=f"{region.label} Region {wsyst.label} Unc. {skey} Shapes", name=f"{plot_name}_{wshift}_{shift}.pdf", + plot_ratio=False, ) if pass_region: plotting.ratioHistPlot( **plot_params, + bg_keys=bg_keys, sig_err="txbb", title=rf"{region.label} Region $T_{{Xbb}}$ Shapes", name=f"{plot_name}_txbb.pdf", diff --git a/src/HHbbVV/postprocessing/templates/24Mar5_update_lp/cutflows/2017/pass_cutflow.csv b/src/HHbbVV/postprocessing/templates/24Mar5_update_lp/cutflows/2017/pass_cutflow.csv new file mode 100644 index 00000000..23cc98d9 --- /dev/null +++ b/src/HHbbVV/postprocessing/templates/24Mar5_update_lp/cutflows/2017/pass_cutflow.csv @@ -0,0 +1,22 @@ +,Preselection,QCD SF,bbFatJetParticleNetMD_Txbb >= 0.97,VVFatJetParTMD_THWWvsT >= 0.8 +QCD,979287.6349132347,979287.634913235,210430.09414706996,2269.113453099677 +TT,116428.4156578406,116428.4156578406,24255.315491153477,391.3575052230973 +ST,10133.749871746222,10133.749871746222,2048.2546699879467,44.98804071000517 +W+Jets,14863.589391946809,14863.589391946809,2704.0098010873708,84.0506933145966 +Z+Jets,21810.249247296022,21810.249247296022,9830.13553043351,141.05219142274115 +Diboson,944.5571650558329,944.5571650558329,412.484656415579,8.185628548079201 +ggFHbb,226.95685088670757,226.95685088670757,157.27752402118352,1.6976052291726098 +VBFHbb,66.97275111602431,66.97275111602431,47.449174893882,0.5332721672841487 +ZHbb,63.96246914780236,63.96246914780236,45.379215471990825,0.844136317284847 +WHbb,109.68223866342207,109.68223866342207,73.99520784233742,2.311863067588508 +ggZHbb,9.637138831976987,9.637138831976987,5.872520883724087,0.08202381657508848 +ttHbb,318.24018721235785,318.24018721235785,121.05255814898767,1.8803545549795848 +HWW,153.35211702134148,153.35211702134148,31.224751489102488,2.6994308124911046 +Data,1144417.0,1144417.0,228352.0,4042.0 +HHbbVV,1.676130328580837,1.676130328580837,1.1511317721054473,0.40900950627726057 +VBFHHbbVV,0.03380955089741052,0.03380955089741052,0.022778253615771286,0.007133390360331839 +X[900]->H(bb)Y[80](VV),11.19980821914222,11.19980821914222,7.789032088198821,2.978145363696696 +X[1200]->H(bb)Y[190](VV),20.852666558466492,20.852666558466492,14.663466510313835,5.163467006134891 +X[2000]->H(bb)Y[125](VV),28.53386346388635,28.53386346388635,20.820717608067813,8.851038292379796 +X[3000]->H(bb)Y[250](VV),31.012733160854424,31.012733160854424,23.554959281771087,9.016848443837965 +X[4000]->H(bb)Y[150](VV),31.35180296475921,31.35180296475921,24.33204003610424,11.035967295901486 diff --git a/src/HHbbVV/postprocessing/utils.py b/src/HHbbVV/postprocessing/utils.py index bb9a142f..dabe81ce 100644 --- a/src/HHbbVV/postprocessing/utils.py +++ b/src/HHbbVV/postprocessing/utils.py @@ -680,6 +680,11 @@ def make_selection( return selection, cutflow +def get_all_weights(events: pd.DataFrame): + """Get all weight columns in the events dataframe""" + return np.unique([key for (key, ind) in events.columns if "weight" in key or "Weight" in key]) + + def getSigSidebandBGYields( mass_key: str, sig_key: str,