From 3f171dbfad4674949d07fb48753e0284cd6b73a1 Mon Sep 17 00:00:00 2001 From: rkansal47 Date: Sat, 27 Jul 2024 01:10:40 -0700 Subject: [PATCH] empty subjet bug fix --- src/HHbbVV/processors/corrections.py | 13 ++- .../scale_factors/top_reweighting.ipynb | 99 ++++--------------- 2 files changed, 31 insertions(+), 81 deletions(-) diff --git a/src/HHbbVV/processors/corrections.py b/src/HHbbVV/processors/corrections.py index 39125144..480b0911 100644 --- a/src/HHbbVV/processors/corrections.py +++ b/src/HHbbVV/processors/corrections.py @@ -903,12 +903,21 @@ def _get_lund_arrays( kt_subjets_pt = kt_subjets_vec.pt * jec_correction # get constituents kt_subjet_consts = kt_clustering.exclusive_jets_constituents(num_prongs) - # breakpoint() kt_subjet_consts = kt_subjet_consts[kt_subjet_consts.pt > min_pt] + kt_subjet_consts = ak.flatten(kt_subjet_consts, axis=1) + + # dummy particle to pad empty subjets. SF for these subjets will be 1 + dummy_particle = ak.Array( + [{kin_key: 0.0 for kin_key in P4}], + with_name="PtEtaPhiMLorentzVector", + ) + + # pad empty subjets + kt_subjet_consts = ak.fill_none(ak.pad_none(kt_subjet_consts, 1, axis=1), dummy_particle[0]) # then re-cluster with CA # won't need to flatten once https://github.com/scikit-hep/fastjet/pull/145 is released - ca_clustering = fastjet.ClusterSequence(ak.flatten(kt_subjet_consts, axis=1), cadef) + ca_clustering = fastjet.ClusterSequence(kt_subjet_consts, cadef) lds = ca_clustering.exclusive_jets_lund_declusterings(1) return lds, kt_subjets_vec, kt_subjets_pt diff --git a/src/HHbbVV/scale_factors/top_reweighting.ipynb b/src/HHbbVV/scale_factors/top_reweighting.ipynb index 8de28262..2be8baa0 100644 --- a/src/HHbbVV/scale_factors/top_reweighting.ipynb +++ b/src/HHbbVV/scale_factors/top_reweighting.ipynb @@ -799,7 +799,20 @@ "cadef = fastjet.JetDefinition(fastjet.cambridge_algorithm, dR)\n", "ktdef = fastjet.JetDefinition(fastjet.kt_algorithm, dR)\n", "\n", - "num_prongs = 3" + "num_prongs = 3\n", + "min_pt = 10" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dummy_particle = ak.Array(\n", + " [{kin_key: 0.0 for kin_key in skim_vars}],\n", + " with_name=\"PtEtaPhiMLorentzVector\",\n", + ")" ] }, { @@ -815,10 +828,15 @@ " np.linalg.norm((kt_subjets.px, kt_subjets.py), axis=0) * jec_correction[:, np.newaxis]\n", ")\n", "kt_subjet_consts = kt_clustering.exclusive_jets_constituents(3)\n", + "kt_subjet_consts = kt_subjet_consts[kt_subjet_consts.pt > min_pt]\n", + "\n", + "kt_subjet_consts = ak.flatten(kt_subjet_consts, axis=1)\n", + "filled_consts = ak.fill_none(ak.pad_none(kt_subjet_consts, 1, axis=1), dummy_particle[0])\n", "\n", "# then re-cluster with CA\n", "# won't need to flatten once https://github.com/scikit-hep/fastjet/pull/145 is released\n", - "ca_clustering = fastjet.ClusterSequence(ak.flatten(kt_subjet_consts, axis=1), cadef)\n", + "# ca_clustering = fastjet.ClusterSequence(ak.flatten(kt_subjet_consts, axis=1), cadef)\n", + "ca_clustering = fastjet.ClusterSequence(filled_consts, cadef)\n", "lds = ca_clustering.exclusive_jets_lund_declusterings(1)\n", "lds_flat = ak.flatten(lds, axis=1)" ] @@ -883,24 +901,6 @@ " return ld_offsets, flat_logD, flat_logkt, flat_subjet_pt" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kt_subjets_pt" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kt_subjets_pt[np.arange(len(kt_subjets_pt)), closest_sjidx]" - ] - }, { "attachments": {}, "cell_type": "markdown", @@ -1185,33 +1185,6 @@ "flat_weight = np.repeat(np.repeat(weight, num_prongs), ak.count(lds_flat.kt, axis=1))" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "type(lds.layout) is ak._ext.ListArray64" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "lp_hist" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sum([lp_hist, []])" - ] - }, { "cell_type": "code", "execution_count": null, @@ -1235,29 +1208,6 @@ "lp_hist[0, ...]" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Testing summing over histograms\n", - "from copy import deepcopy\n", - "\n", - "lp_hist2 = deepcopy(lp_hist)\n", - "lp_hist2.values()[:] = 1\n", - "lp_hist2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sum([lp_hist, lp_hist2])" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -1359,15 +1309,6 @@ "sf_vals = np.array(sf_vals)" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sf_vals" - ] - }, { "attachments": {}, "cell_type": "markdown",