Skip to content

Commit

Permalink
fix format_instance for mcq
Browse files Browse the repository at this point in the history
  • Loading branch information
huyiwen committed May 24, 2024
1 parent 29e986b commit 4c1bae3
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 25 deletions.
30 changes: 10 additions & 20 deletions utilization/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -951,45 +951,35 @@ Then, format the instance by implementing the `instruction` attribute and `forma
<tr>
<td class="s1" dir="ltr">source</td>
<td class="s1" dir="ltr"><code>str</code> or <code>list</code></td>
<td class="s1" dir="ltr">source</td>
<td class="s1" dir="ltr"><code>str</code></td>
<td class="s1" dir="ltr" rowspan="2">source</td>
<td class="s1" dir="ltr" rowspan="2"><code>str</code></td>
<td class="s1" dir="ltr">Source text</td>
</tr>
<tr>
<td class="s1" dir="ltr">source_idx</td>
<td class="s1" dir="ltr"><code>int</code></td>
</tr>
<tr>
<td class="s1" dir="ltr">options</td>
<td class="s1" dir="ltr"><code>list</code></td>
<td class="s1" dir="ltr">options</td>
<td class="s1" dir="ltr"><code>str</code></td>
<td class="s1" dir="ltr">Options of MCQ</td>
</tr>
<tr>
<td class="s1" dir="ltr">source_idx</td>
<td class="s1" dir="ltr"><code>int</code></td>
<td class="s1" dir="ltr">source_idx</td>
<td class="s1" dir="ltr"><code>int</code></td>
<td class="s2 softmerge" dir="ltr">
<div class="softmerge-inner">The index of source text in options for datasets like winogrande</div>
</td>
</tr>
<tr>
<td class="s1" dir="ltr">target</td>
<td class="s1" dir="ltr"><code>str</code></td>
<td class="s1" dir="ltr" rowspan="2">target</td>
<td class="s1" dir="ltr" rowspan="2"><code>str</code></td>
<td class="s1" dir="ltr" rowspan="2">Target text. Either return <code>target</code> or <code>target_idx</code> in <code>format_instance</code></td>
<td class="s1" dir="ltr" rowspan="3">target</td>
<td class="s1" dir="ltr" rowspan="3"><code>str</code></td>
<td class="s1" dir="ltr" rowspan="3">Target text. Either return <code>target</code> or <code>target_idx</code> in <code>format_instance</code></td>
</tr>
<tr>
<td class="s1" dir="ltr">target_idx</td>
<td class="s1" dir="ltr"><code>int</code></td>
</tr>
<tr>
<td class="s1" dir="ltr">target_idx</td>
<td class="s1" dir="ltr"><code>int</code></td>
<td class="s1" dir="ltr">target_idx</td>
<td class="s1" dir="ltr">label</td>
<td class="s1" dir="ltr"><code>int</code></td>
<td class="s2 softmerge" dir="ltr">
<div class="softmerge-inner">The index of target text in options for general MCQs</div>
</td>
</tr>
<tr>
<td class="s1" dir="ltr" colspan="2" rowspan="5"><i>No need to return from format_instance</i></td>
Expand Down
10 changes: 5 additions & 5 deletions utilization/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,7 @@ def construct_instances(self):
)
self.prefix_caching = False

if self.model_evaluation_method == "generation" or not self.model.is_local_model():
if self.model_evaluation_method == "generation":
# generation endpoint supports Conversation
evaluation_instances = conversations
else:
Expand Down Expand Up @@ -498,10 +498,6 @@ def _format_instance(
options = formatted_instance.pop("options", None)
options_text = None

if self.model_evaluation_method != "generation" or example_idx >= 0:
msg = "few-shot examples" if example_idx >= 0 else "ranking evaluation"
assert target is not None, f"The target text is required for {msg}."

if self.evaluation_type == "ranking" and target_idx is not None:
if self.ranking_with_options:
# update options with labels and then append options to source
Expand All @@ -516,6 +512,10 @@ def _format_instance(
elif self.model_evaluation_method == "generation":
target = chr(65 + target_idx)

if example_idx >= 0:
msg = "few-shot examples" if example_idx >= 0 else "ranking evaluation"
assert target is not None, f"The target text is missing for {msg}. Return either `target` or `target_idx` in `format_instance`"

# source_idx is used to render the correct answer in few-shot examples
if example_idx >= 0 and self.evaluation_type == "ranking" and source_idx is not None:
source: str = source[source_idx]
Expand Down

0 comments on commit 4c1bae3

Please sign in to comment.