diff --git a/utilization/README.md b/utilization/README.md index ad919746..aee7e905 100644 --- a/utilization/README.md +++ b/utilization/README.md @@ -951,11 +951,15 @@ Then, format the instance by implementing the `instruction` attribute and `forma source str or list - source - str + source + str Source text + source_idx + int + + options list options @@ -963,33 +967,19 @@ Then, format the instance by implementing the `instruction` attribute and `forma Options of MCQ - source_idx - int - source_idx - int - -
The index of source text in options for datasets like winogrande
- - - target str - target - str - Target text. Either return target or target_idx in format_instance + target + str + Target text. Either return target or target_idx in format_instance target_idx int - target_idx - int - target_idx + label int - -
The index of target text in options for general MCQs
- No need to return from format_instance diff --git a/utilization/dataset/dataset.py b/utilization/dataset/dataset.py index 7752d554..0c983314 100644 --- a/utilization/dataset/dataset.py +++ b/utilization/dataset/dataset.py @@ -449,7 +449,7 @@ def construct_instances(self): ) self.prefix_caching = False - if self.model_evaluation_method == "generation" or not self.model.is_local_model(): + if self.model_evaluation_method == "generation": # generation endpoint supports Conversation evaluation_instances = conversations else: @@ -498,10 +498,6 @@ def _format_instance( options = formatted_instance.pop("options", None) options_text = None - if self.model_evaluation_method != "generation" or example_idx >= 0: - msg = "few-shot examples" if example_idx >= 0 else "ranking evaluation" - assert target is not None, f"The target text is required for {msg}." - if self.evaluation_type == "ranking" and target_idx is not None: if self.ranking_with_options: # update options with labels and then append options to source @@ -516,6 +512,10 @@ def _format_instance( elif self.model_evaluation_method == "generation": target = chr(65 + target_idx) + if example_idx >= 0: + msg = "few-shot examples" if example_idx >= 0 else "ranking evaluation" + assert target is not None, f"The target text is missing for {msg}. Return either `target` or `target_idx` in `format_instance`" + # source_idx is used to render the correct answer in few-shot examples if example_idx >= 0 and self.evaluation_type == "ranking" and source_idx is not None: source: str = source[source_idx]