From 45a08999693ccb2016f6c239a2a192911523df50 Mon Sep 17 00:00:00 2001
From: Humair Raj Khan <khumairraj@gmail.com>
Date: Sat, 20 Nov 2021 17:24:45 +0000
Subject: [PATCH 1/5] Add metrics

---
 heareval/score.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/heareval/score.py b/heareval/score.py
index c71a3077..1da3e2d6 100644
--- a/heareval/score.py
+++ b/heareval/score.py
@@ -239,6 +239,28 @@ class EventBasedScore(SoundEventScore):
             "percentage_of_length": 0.5,
         },
     ),
+    "event_onset_50ms_fms": partial(
+        EventBasedScore,
+        name="event_onset_50ms_fms",
+        score="f_measure",
+        params={
+            "evaluate_onset": True,
+            "evaluate_offset": False,
+            "t_collar": 0.05,
+            "percentage_of_length": 0.5,
+        },
+    ),
+    "event_onset_offset_50ms_20perc_fms": partial(
+        EventBasedScore,
+        name="event_onset_offset_50ms_20perc_fms",
+        score="f_measure",
+        params={
+            "evaluate_onset": True,
+            "evaluate_offset": True,
+            "t_collar": 0.05,
+            "percentage_of_length": 0.2,
+        },
+    ),
     "segment_1s_er": partial(
         SegmentBasedScore,
         name="segment_1s_er",

From 4f4d177efd007ab9bbdc74a264b121419f8c344d Mon Sep 17 00:00:00 2001
From: Humair Raj Khan <khumairraj@gmail.com>
Date: Sat, 20 Nov 2021 19:18:28 +0000
Subject: [PATCH 2/5] Add configurable grids

---
 heareval/predictions/task_predictions.py | 37 ++++++++++++++++++++++--
 1 file changed, 34 insertions(+), 3 deletions(-)

diff --git a/heareval/predictions/task_predictions.py b/heareval/predictions/task_predictions.py
index b0009eb8..4af61b41 100755
--- a/heareval/predictions/task_predictions.py
+++ b/heareval/predictions/task_predictions.py
@@ -362,6 +362,7 @@ def __init__(
         scores: List[ScoreFunction],
         validation_target_events: Dict[str, List[Dict[str, Any]]],
         test_target_events: Dict[str, List[Dict[str, Any]]],
+        postprocessing_grid: Dict[str, List[float]],
         conf: Dict,
     ):
         super().__init__(
@@ -378,6 +379,7 @@ def __init__(
         }
         # For each epoch, what postprocessing parameters were best
         self.epoch_best_postprocessing: Dict[int, Tuple[Tuple[str, Any], ...]] = {}
+        self.postprocessing_grid = postprocessing_grid
 
     def _score_epoch_end(self, name: str, outputs: List[Dict[str, List[Any]]]):
         flat_outputs = self._flatten_batched_outputs(
@@ -410,7 +412,12 @@ def _score_epoch_end(self, name: str, outputs: List[Dict[str, List[Any]]]):
         # print("\n\n\n", epoch)
 
         predicted_events_by_postprocessing = get_events_for_all_files(
-            prediction, filename, timestamp, self.idx_to_label, postprocessing_cached
+            prediction,
+            filename,
+            timestamp,
+            self.idx_to_label,
+            self.postprocessing_grid,
+            postprocessing_cached,
         )
 
         score_and_postprocessing = []
@@ -617,6 +624,7 @@ def get_events_for_all_files(
     filenames: List[str],
     timestamps: torch.Tensor,
     idx_to_label: Dict[int, str],
+    postprocessing_grid: Dict[str, List[float]],
     postprocessing: Optional[Tuple[Tuple[str, Any], ...]] = None,
 ) -> Dict[Tuple[Tuple[str, Any], ...], Dict[str, List[Dict[str, Union[str, float]]]]]:
     """
@@ -630,7 +638,8 @@ def get_events_for_all_files(
 
     If no postprocessing is specified (during training), we try a
     variety of ways of postprocessing the predictions into events,
-    including median filtering and minimum event length.
+    from the postprocessing_grid including median filtering and
+    minimum event length.
 
     If postprocessing is specified (during test, chosen at the best
     validation epoch), we use this postprocessing.
@@ -679,7 +688,7 @@ def get_events_for_all_files(
                 timestamp_predictions, idx_to_label, **dict(postprocess)
             )
     else:
-        postprocessing_confs = list(ParameterGrid(EVENT_POSTPROCESSING_GRID))
+        postprocessing_confs = list(ParameterGrid(postprocessing_grid))
         for postprocess_dict in tqdm(postprocessing_confs):
             postprocess = tuple(postprocess_dict.items())
             event_dict[postprocess] = {}
@@ -865,6 +874,18 @@ def _combine_target_events(split_names: List[str]):
         validation_target_events: Dict = _combine_target_events(data_splits["valid"])
         test_target_events: Dict = _combine_target_events(data_splits["test"])
 
+        # The postprocessing search space for getting the
+        # best task specific postprocessing, can be task
+        # specific, present in the task metadata in
+        # evaluation_params.postprocessing_grid. If not, the default
+        # EVENT_POSTPROCESSING_GRID will be used.
+        if "event_postprocessing_grid" in metadata["evaluation_params"]:
+            postprocessing_grid = metadata["evaluation_params"][
+                "event_postprocessing_grid"
+            ]
+        else:
+            postprocessing_grid = EVENT_POSTPROCESSING_GRID
+
         predictor = EventPredictionModel(
             nfeatures=embedding_size,
             label_to_idx=label_to_idx,
@@ -873,6 +894,7 @@ def _combine_target_events(split_names: List[str]):
             scores=scores,
             validation_target_events=validation_target_events,
             test_target_events=test_target_events,
+            postprocessing_grid=postprocessing_grid,
             conf=conf,
         )
     elif metadata["embedding_type"] == "scene":
@@ -1212,9 +1234,18 @@ def task_predictions(
         )
 
     # Update with task specific grid parameters
+    # From the global TASK_SPECIFIC_PARAM_GRID
     if metadata["task_name"] in TASK_SPECIFIC_PARAM_GRID:
         final_grid.update(TASK_SPECIFIC_PARAM_GRID[metadata["task_name"]])
 
+    # From task specific parameter grid in the task metadata
+    # We add this option, so that task specific param grid can be used
+    # for secret tasks, without mentioning them in the global
+    # TASK_SPECIFIC_PARAM_GRID. Ideally one out of the two option should be
+    # there
+    if "task_specific_param_grid" in metadata["evaluation_params"]:
+        final_grid.update(metadata["evaluation_params"]["task_specific_param_grid"])
+
     # Model selection
     confs = list(ParameterGrid(final_grid))
     random.shuffle(confs)

From dfa8474612e1c8bca7a7975dbd7dcd824672ce59 Mon Sep 17 00:00:00 2001
From: Humair Raj Khan <khumairraj@gmail.com>
Date: Sat, 20 Nov 2021 19:26:37 +0000
Subject: [PATCH 3/5] Fix key

---
 heareval/predictions/task_predictions.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/heareval/predictions/task_predictions.py b/heareval/predictions/task_predictions.py
index 4af61b41..bede4d5d 100755
--- a/heareval/predictions/task_predictions.py
+++ b/heareval/predictions/task_predictions.py
@@ -879,7 +879,7 @@ def _combine_target_events(split_names: List[str]):
         # specific, present in the task metadata in
         # evaluation_params.postprocessing_grid. If not, the default
         # EVENT_POSTPROCESSING_GRID will be used.
-        if "event_postprocessing_grid" in metadata["evaluation_params"]:
+        if "event_postprocessing_grid" in metadata.get("evaluation_params", {}):
             postprocessing_grid = metadata["evaluation_params"][
                 "event_postprocessing_grid"
             ]
@@ -1243,7 +1243,7 @@ def task_predictions(
     # for secret tasks, without mentioning them in the global
     # TASK_SPECIFIC_PARAM_GRID. Ideally one out of the two option should be
     # there
-    if "task_specific_param_grid" in metadata["evaluation_params"]:
+    if "task_specific_param_grid" in metadata.get("evaluation_params"):
         final_grid.update(metadata["evaluation_params"]["task_specific_param_grid"])
 
     # Model selection

From b56f1daa98805b437d212046d67f3820e1fc6062 Mon Sep 17 00:00:00 2001
From: Humair Raj Khan <khumairraj@gmail.com>
Date: Sat, 20 Nov 2021 19:46:13 +0000
Subject: [PATCH 4/5] fix dict

---
 heareval/predictions/task_predictions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/heareval/predictions/task_predictions.py b/heareval/predictions/task_predictions.py
index bede4d5d..fb37eae0 100755
--- a/heareval/predictions/task_predictions.py
+++ b/heareval/predictions/task_predictions.py
@@ -1243,7 +1243,7 @@ def task_predictions(
     # for secret tasks, without mentioning them in the global
     # TASK_SPECIFIC_PARAM_GRID. Ideally one out of the two option should be
     # there
-    if "task_specific_param_grid" in metadata.get("evaluation_params"):
+    if "task_specific_param_grid" in metadata.get("evaluation_params", {}):
         final_grid.update(metadata["evaluation_params"]["task_specific_param_grid"])
 
     # Model selection

From 36223105d397b381c4d4e9f31bcce83c7ca4799b Mon Sep 17 00:00:00 2001
From: Humair Raj Khan <khumairraj@gmail.com>
Date: Sat, 20 Nov 2021 19:48:53 +0000
Subject: [PATCH 5/5] fix metric by removing percentage length in offset
 metrics

---
 heareval/score.py | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/heareval/score.py b/heareval/score.py
index 1da3e2d6..ea93fe98 100644
--- a/heareval/score.py
+++ b/heareval/score.py
@@ -232,23 +232,13 @@ class EventBasedScore(SoundEventScore):
         EventBasedScore,
         name="event_onset_200ms_fms",
         score="f_measure",
-        params={
-            "evaluate_onset": True,
-            "evaluate_offset": False,
-            "t_collar": 0.2,
-            "percentage_of_length": 0.5,
-        },
+        params={"evaluate_onset": True, "evaluate_offset": False, "t_collar": 0.2},
     ),
     "event_onset_50ms_fms": partial(
         EventBasedScore,
         name="event_onset_50ms_fms",
         score="f_measure",
-        params={
-            "evaluate_onset": True,
-            "evaluate_offset": False,
-            "t_collar": 0.05,
-            "percentage_of_length": 0.5,
-        },
+        params={"evaluate_onset": True, "evaluate_offset": False, "t_collar": 0.05},
     ),
     "event_onset_offset_50ms_20perc_fms": partial(
         EventBasedScore,