Skip to content

Commit

Permalink
updates for hhbbvv test
Browse files Browse the repository at this point in the history
  • Loading branch information
rkansal47 committed Jul 25, 2024
1 parent 24b9096 commit 51c2f67
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 26 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,12 @@ In `src/HHbbVV/postprocessing':
python BDTPreProcessing.py --data-dir "../../../../data/skimmer/Feb24/" --signal-data-dir "../../../../data/skimmer/Jun10/" --plot-dir "../../../plots/BDTPreProcessing/$TAG/" --year "2017" --bdt-data (--control-plots)
```

Running inference with a trained model, e.g.:

```bash
python src/HHbbVV/postprocessing/BDTPreProcessing.py --no-save-data --inference --bdt-preds-dir temp/24_04_05_k2v0_training_eqsig_vbf_vars_rm_deta/ --data-dir temp --year 2016 --sig-samples HHbbVV --bg-keys "" --no-data --no-do-jshifts
```

### BDT Trainings

```bash
Expand Down
66 changes: 41 additions & 25 deletions src/HHbbVV/postprocessing/TopAnalysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -376,21 +376,21 @@
"# package_path = Path(__file__).parent.parent.resolve()\n",
"package_path = Path(\"../\").resolve()\n",
"\n",
"for key in [\n",
" \"TTToSemiLeptonic\",\n",
" # \"ST_tW_antitop_5f_NoFullyHadronicDecays\",\n",
" # \"ST_tW_top_5f_NoFullyHadronicDecays\",\n",
" # \"ST_s-channel_4f_leptonDecays\",\n",
" # \"ST_t-channel_antitop_4f_InclusiveDecays\",\n",
" # \"ST_t-channel_top_4f_InclusiveDecays\",\n",
"]:\n",
" sig_lp_hist = utils.get_pickles(f\"{signal_data_dir}/{year}/{key}/pickles\", year, key)[\"lp_hist\"]\n",
"\n",
" # remove negatives\n",
" sig_lp_hist.values()[sig_lp_hist.values() < 0] = 0\n",
"\n",
" with (package_path / f\"corrections/lp_ratios/signals/{year}_{key}.hist\").open(\"wb\") as f:\n",
" pickle.dump(sig_lp_hist, f)"
"# for key in [\n",
"# \"TTToSemiLeptonic\",\n",
"# # \"ST_tW_antitop_5f_NoFullyHadronicDecays\",\n",
"# # \"ST_tW_top_5f_NoFullyHadronicDecays\",\n",
"# # \"ST_s-channel_4f_leptonDecays\",\n",
"# # \"ST_t-channel_antitop_4f_InclusiveDecays\",\n",
"# # \"ST_t-channel_top_4f_InclusiveDecays\",\n",
"# ]:\n",
"# sig_lp_hist = utils.get_pickles(f\"{signal_data_dir}/{year}/{key}/pickles\", year, key)[\"lp_hist\"]\n",
"\n",
"# # remove negatives\n",
"# sig_lp_hist.values()[sig_lp_hist.values() < 0] = 0\n",
"\n",
"# with (package_path / f\"corrections/lp_ratios/signals/{year}_{key}.hist\").open(\"wb\") as f:\n",
"# pickle.dump(sig_lp_hist, f)"
]
},
{
Expand Down Expand Up @@ -422,12 +422,15 @@
"import uproot\n",
"\n",
"# initialize lund plane scale factors lookups\n",
"f = uproot.open(package_path / f\"corrections/lp_ratios/ratio_{year[:4]}.root\")\n",
"# f = uproot.open(package_path / f\"corrections/lp_ratios/ratio_{year[:4]}.root\")\n",
"f = uproot.open(package_path / f\"corrections/lp_ratios/ratio_2018.root\")\n",
"\n",
"# 3D histogram: [subjet_pt, ln(0.8/Delta), ln(kT/GeV)]\n",
"mc_nom = f[\"mc_nom\"].to_numpy()\n",
"ratio_edges = mc_nom[1:]\n",
"mc_nom = mc_nom[0]"
"mc_nom = mc_nom[0]\n",
"\n",
"ratio_nom = f[\"ratio_nom\"].to_numpy()[0]"
]
},
{
Expand All @@ -436,7 +439,7 @@
"metadata": {},
"outputs": [],
"source": [
"plt.imshow(sig_lp_hist.values()[0])"
"np.min(ratio_nom[ratio_nom > 0])"
]
},
{
Expand All @@ -445,7 +448,10 @@
"metadata": {},
"outputs": [],
"source": [
"plt.imshow(mc_nom[0])"
"with (package_path / f\"corrections/lp_ratios/signals/2018_GluGluToHHTobbVV_node_cHHH1.hist\").open(\n",
" \"rb\"\n",
") as f:\n",
" sig_lp_hist = pickle.load(f)"
]
},
{
Expand All @@ -454,8 +460,16 @@
"metadata": {},
"outputs": [],
"source": [
"plt.imshow(sig_mc_ratio_pt[5])\n",
"plt.colorbar()"
"plt.imshow(sig_lp_hist.values()[0])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.imshow(mc_nom[0])"
]
},
{
Expand All @@ -465,7 +479,10 @@
"outputs": [],
"source": [
"mc_tot = np.sum(mc_nom)\n",
"sig_tot = sig_lp_hist.sum()"
"sig_tot = sig_lp_hist.sum()\n",
"sig_mc_ratio = np.clip(\n",
" np.nan_to_num((sig_lp_hist.values() / sig_tot) / (mc_nom / mc_tot), nan=1), 0.5, 2.0\n",
")"
]
},
{
Expand All @@ -487,9 +504,8 @@
"metadata": {},
"outputs": [],
"source": [
"sig_mc_ratio = np.clip(\n",
" np.nan_to_num((sig_lp_hist.values() / sig_tot) / (mc_nom / mc_tot), nan=1), 0.5, 2.0\n",
")"
"plt.imshow(sig_mc_ratio_pt[0])\n",
"plt.colorbar()"
]
},
{
Expand Down
1 change: 1 addition & 0 deletions src/HHbbVV/postprocessing/TrainBDT.py
Original file line number Diff line number Diff line change
Expand Up @@ -821,6 +821,7 @@ def do_inference_year(
# preds = preds[:, :-1] if multiclass else preds[:, 1] # save n-1 probs to save space
preds = preds if multiclass else preds[:, 1]
np.save(f"{model_dir}/inferences/{year}/preds.npy", preds)
print(preds)

if jec_jmsr_shifts:
for jshift in jec_shifts:
Expand Down
14 changes: 13 additions & 1 deletion src/HHbbVV/postprocessing/corrections.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,10 +310,20 @@ def postprocess_lpsfs(
else:
raise ValueError("LP SF shapes are invalid")

nom_mean = None
for key in ["lp_sf_nom", "lp_sf_toys", "lp_sf_pt_extrap_vars"] + sf_vars:
CLIP = 5.0
td[key] = np.clip(np.nan_to_num(td[key], nan=1.0), 1.0 / CLIP, CLIP)
td[key] = td[key] / np.mean(td[key], axis=0)

if key == "lp_sf_nom":
nom_mean = np.mean(td[key], axis=0)

if "unmatched" not in key:
td[key] = td[key] / np.mean(td[key], axis=0)
else:
# unmatched normalization is otherwise dominated by unmatched jets which aren't in the pass regions
# which artificially inflates this uncertainty
td[key] = td[key] / nom_mean

# add to dataframe
if save_all:
Expand Down Expand Up @@ -389,4 +399,6 @@ def get_lpsf(
# tot_rel_unc = np.mean([tot_rel_unc_up, tot_rel_unc_down])
tot_unc = (lp_sf * tot_rel_unc_up, lp_sf * tot_rel_unc_down)

# breakpoint()

return lp_sf, tot_unc, uncs, uncs_asym
3 changes: 3 additions & 0 deletions src/HHbbVV/postprocessing/postprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,8 +520,11 @@ def _add_nonres_columns(df, bb_mask, vbf_vars=False, ptlabel="", mlabel=""):

if f"vbf_Mass_jj{ptlabel}" not in df.columns:
df[f"vbf_Mass_jj{ptlabel}"] = jj.M
# df[f"vbf_Mass_jj{ptlabel}"] = np.nan_to_num(jj.M)
if "vbf_dEta_jj" not in df.columns:
df["vbf_dEta_jj"] = np.abs(vbf1.eta - vbf2.eta)
# df["vbf_dEta_jj"] = np.nan_to_num(np.abs(vbf1.eta - vbf2.eta))

# print(f"VBF jet vars: {time.time() - start:.2f}")

if not vbf_vars:
Expand Down

0 comments on commit 51c2f67

Please sign in to comment.